diff options
Diffstat (limited to 'net/9p')
| -rw-r--r-- | net/9p/Kconfig | 10 | ||||
| -rw-r--r-- | net/9p/Makefile | 1 | ||||
| -rw-r--r-- | net/9p/client.c | 977 | ||||
| -rw-r--r-- | net/9p/error.c | 10 | ||||
| -rw-r--r-- | net/9p/mod.c | 39 | ||||
| -rw-r--r-- | net/9p/protocol.c | 154 | ||||
| -rw-r--r-- | net/9p/protocol.h | 4 | ||||
| -rw-r--r-- | net/9p/trans_common.c | 69 | ||||
| -rw-r--r-- | net/9p/trans_common.h | 17 | ||||
| -rw-r--r-- | net/9p/trans_fd.c | 386 | ||||
| -rw-r--r-- | net/9p/trans_rdma.c | 182 | ||||
| -rw-r--r-- | net/9p/trans_virtio.c | 354 | ||||
| -rw-r--r-- | net/9p/util.c | 23 | 
13 files changed, 1506 insertions, 720 deletions
diff --git a/net/9p/Kconfig b/net/9p/Kconfig index 7ed75c7bd5d..a75174a3372 100644 --- a/net/9p/Kconfig +++ b/net/9p/Kconfig @@ -3,8 +3,8 @@  #  menuconfig NET_9P -	depends on NET && EXPERIMENTAL -	tristate "Plan 9 Resource Sharing Support (9P2000) (Experimental)" +	depends on NET +	tristate "Plan 9 Resource Sharing Support (9P2000)"  	help  	  If you say Y here, you will get experimental support for  	  Plan 9 resource sharing via the 9P2000 protocol. @@ -16,14 +16,14 @@ menuconfig NET_9P  if NET_9P  config NET_9P_VIRTIO -	depends on EXPERIMENTAL && VIRTIO -	tristate "9P Virtio Transport (Experimental)" +	depends on VIRTIO +	tristate "9P Virtio Transport"  	help  	  This builds support for a transports between  	  guest partitions and a host partition.  config NET_9P_RDMA -	depends on INET && INFINIBAND && INFINIBAND_ADDR_TRANS && EXPERIMENTAL +	depends on INET && INFINIBAND && INFINIBAND_ADDR_TRANS  	tristate "9P RDMA Transport (Experimental)"  	help  	  This builds support for an RDMA transport. diff --git a/net/9p/Makefile b/net/9p/Makefile index 198a640d53a..a0874cc1f71 100644 --- a/net/9p/Makefile +++ b/net/9p/Makefile @@ -9,6 +9,7 @@ obj-$(CONFIG_NET_9P_RDMA) += 9pnet_rdma.o  	util.o \  	protocol.o \  	trans_fd.o \ +	trans_common.o \  9pnet_virtio-objs := \  	trans_virtio.o \ diff --git a/net/9p/client.c b/net/9p/client.c index a848bca9fbf..0004cbaac4a 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -23,6 +23,8 @@   *   */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +  #include <linux/module.h>  #include <linux/errno.h>  #include <linux/fs.h> @@ -38,6 +40,9 @@  #include <net/9p/transport.h>  #include "protocol.h" +#define CREATE_TRACE_POINTS +#include <trace/events/9p.h> +  /*    * Client Option Parsing (code inspired by NFS code)    *  - a little lazy - parse all client options @@ -71,30 +76,40 @@ inline int p9_is_proto_dotu(struct p9_client *clnt)  }  EXPORT_SYMBOL(p9_is_proto_dotu); +/* + * Some error codes are taken directly from the server replies, + * make sure they are valid. + */ +static int safe_errno(int err) +{ +	if ((err > 0) || (err < -MAX_ERRNO)) { +		p9_debug(P9_DEBUG_ERROR, "Invalid error code %d\n", err); +		return -EPROTO; +	} +	return err; +} + +  /* Interpret mount option for protocol version */ -static int get_protocol_version(const substring_t *name) +static int get_protocol_version(char *s)  {  	int version = -EINVAL; -	if (!strncmp("9p2000", name->from, name->to-name->from)) { +	if (!strcmp(s, "9p2000")) {  		version = p9_proto_legacy; -		P9_DPRINTK(P9_DEBUG_9P, "Protocol version: Legacy\n"); -	} else if (!strncmp("9p2000.u", name->from, name->to-name->from)) { +		p9_debug(P9_DEBUG_9P, "Protocol version: Legacy\n"); +	} else if (!strcmp(s, "9p2000.u")) {  		version = p9_proto_2000u; -		P9_DPRINTK(P9_DEBUG_9P, "Protocol version: 9P2000.u\n"); -	} else if (!strncmp("9p2000.L", name->from, name->to-name->from)) { +		p9_debug(P9_DEBUG_9P, "Protocol version: 9P2000.u\n"); +	} else if (!strcmp(s, "9p2000.L")) {  		version = p9_proto_2000L; -		P9_DPRINTK(P9_DEBUG_9P, "Protocol version: 9P2000.L\n"); -	} else { -		P9_DPRINTK(P9_DEBUG_ERROR, "Unknown protocol version %s. ", -							name->from); -	} +		p9_debug(P9_DEBUG_9P, "Protocol version: 9P2000.L\n"); +	} else +		pr_info("Unknown protocol version %s\n", s); +  	return version;  } -static struct p9_req_t * -p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...); -  /**   * parse_options - parse mount options into client structure   * @opts: options string passed from mount @@ -109,9 +124,10 @@ static int parse_opts(char *opts, struct p9_client *clnt)  	char *p;  	substring_t args[MAX_OPT_ARGS];  	int option; +	char *s;  	int ret = 0; -	clnt->proto_version = p9_proto_2000u; +	clnt->proto_version = p9_proto_2000L;  	clnt->msize = 8192;  	if (!opts) @@ -119,47 +135,63 @@ static int parse_opts(char *opts, struct p9_client *clnt)  	tmp_options = kstrdup(opts, GFP_KERNEL);  	if (!tmp_options) { -		P9_DPRINTK(P9_DEBUG_ERROR, -				"failed to allocate copy of option string\n"); +		p9_debug(P9_DEBUG_ERROR, +			 "failed to allocate copy of option string\n");  		return -ENOMEM;  	}  	options = tmp_options;  	while ((p = strsep(&options, ",")) != NULL) { -		int token; +		int token, r;  		if (!*p)  			continue;  		token = match_token(p, tokens, args); -		if (token < Opt_trans) { -			int r = match_int(&args[0], &option); +		switch (token) { +		case Opt_msize: +			r = match_int(&args[0], &option);  			if (r < 0) { -				P9_DPRINTK(P9_DEBUG_ERROR, -					"integer field, but no integer?\n"); +				p9_debug(P9_DEBUG_ERROR, +					 "integer field, but no integer?\n");  				ret = r;  				continue;  			} -		} -		switch (token) { -		case Opt_msize:  			clnt->msize = option;  			break;  		case Opt_trans: -			clnt->trans_mod = v9fs_get_trans_by_name(&args[0]); -			if(clnt->trans_mod == NULL) { -				P9_DPRINTK(P9_DEBUG_ERROR, -				   "Could not find request transport: %s\n", -				   (char *) &args[0]); +			s = match_strdup(&args[0]); +			if (!s) { +				ret = -ENOMEM; +				p9_debug(P9_DEBUG_ERROR, +					 "problem allocating copy of trans arg\n"); +				goto free_and_return; +			 } +			clnt->trans_mod = v9fs_get_trans_by_name(s); +			if (clnt->trans_mod == NULL) { +				pr_info("Could not find request transport: %s\n", +					s);  				ret = -EINVAL; +				kfree(s);  				goto free_and_return;  			} +			kfree(s);  			break;  		case Opt_legacy:  			clnt->proto_version = p9_proto_legacy;  			break;  		case Opt_version: -			ret = get_protocol_version(&args[0]); -			if (ret == -EINVAL) +			s = match_strdup(&args[0]); +			if (!s) { +				ret = -ENOMEM; +				p9_debug(P9_DEBUG_ERROR, +					 "problem allocating copy of version arg\n");  				goto free_and_return; +			} +			ret = get_protocol_version(s); +			if (ret == -EINVAL) { +				kfree(s); +				goto free_and_return; +			} +			kfree(s);  			clnt->proto_version = ret;  			break;  		default: @@ -172,13 +204,24 @@ free_and_return:  	return ret;  } +static struct p9_fcall *p9_fcall_alloc(int alloc_msize) +{ +	struct p9_fcall *fc; +	fc = kmalloc(sizeof(struct p9_fcall) + alloc_msize, GFP_NOFS); +	if (!fc) +		return NULL; +	fc->capacity = alloc_msize; +	fc->sdata = (char *) fc + sizeof(struct p9_fcall); +	return fc; +} +  /**   * p9_tag_alloc - lookup/allocate a request by tag   * @c: client session to lookup tag within   * @tag: numeric id for transaction   *   * this is a simple array lookup, but will grow the - * request_slots as necessary to accomodate transaction + * request_slots as necessary to accommodate transaction   * ids which did not previously have a slot.   *   * this code relies on the client spinlock to manage locks, its @@ -187,11 +230,13 @@ free_and_return:   *   */ -static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag) +static struct p9_req_t * +p9_tag_alloc(struct p9_client *c, u16 tag, unsigned int max_size)  {  	unsigned long flags;  	int row, col;  	struct p9_req_t *req; +	int alloc_msize = min(c->msize, max_size);  	/* This looks up the original request by tag so we know which  	 * buffer to read the data into */ @@ -206,7 +251,7 @@ static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag)  					sizeof(struct p9_req_t), GFP_ATOMIC);  			if (!c->reqs[row]) { -				printk(KERN_ERR "Couldn't grow tag array\n"); +				pr_err("Couldn't grow tag array\n");  				spin_unlock_irqrestore(&c->lock, flags);  				return ERR_PTR(-ENOMEM);  			} @@ -222,39 +267,36 @@ static struct p9_req_t *p9_tag_alloc(struct p9_client *c, u16 tag)  	col = tag % P9_ROW_MAXTAG;  	req = &c->reqs[row][col]; -	if (!req->tc) { -		req->wq = kmalloc(sizeof(wait_queue_head_t), GFP_KERNEL); -		if (!req->wq) { -			printk(KERN_ERR "Couldn't grow tag array\n"); -			return ERR_PTR(-ENOMEM); -		} +	if (!req->wq) { +		req->wq = kmalloc(sizeof(wait_queue_head_t), GFP_NOFS); +		if (!req->wq) +			goto grow_failed;  		init_waitqueue_head(req->wq); -		req->tc = kmalloc(sizeof(struct p9_fcall)+c->msize, -								GFP_KERNEL); -		req->rc = kmalloc(sizeof(struct p9_fcall)+c->msize, -								GFP_KERNEL); -		if ((!req->tc) || (!req->rc)) { -			printk(KERN_ERR "Couldn't grow tag array\n"); -			kfree(req->tc); -			kfree(req->rc); -			kfree(req->wq); -			req->tc = req->rc = NULL; -			req->wq = NULL; -			return ERR_PTR(-ENOMEM); -		} -		req->tc->sdata = (char *) req->tc + sizeof(struct p9_fcall); -		req->tc->capacity = c->msize; -		req->rc->sdata = (char *) req->rc + sizeof(struct p9_fcall); -		req->rc->capacity = c->msize;  	} +	if (!req->tc) +		req->tc = p9_fcall_alloc(alloc_msize); +	if (!req->rc) +		req->rc = p9_fcall_alloc(alloc_msize); +	if (!req->tc || !req->rc) +		goto grow_failed; +  	p9pdu_reset(req->tc);  	p9pdu_reset(req->rc);  	req->tc->tag = tag-1;  	req->status = REQ_STATUS_ALLOC; -	return &c->reqs[row][col]; +	return req; + +grow_failed: +	pr_err("Couldn't grow tag array\n"); +	kfree(req->tc); +	kfree(req->rc); +	kfree(req->wq); +	req->tc = req->rc = NULL; +	req->wq = NULL; +	return ERR_PTR(-ENOMEM);  }  /** @@ -272,7 +314,8 @@ struct p9_req_t *p9_tag_lookup(struct p9_client *c, u16 tag)  	 * buffer to read the data into */  	tag++; -	BUG_ON(tag >= c->max_tag); +	if(tag >= c->max_tag)  +		return NULL;  	row = tag / P9_ROW_MAXTAG;  	col = tag % P9_ROW_MAXTAG; @@ -296,12 +339,13 @@ static int p9_tag_init(struct p9_client *c)  	c->tagpool = p9_idpool_create();  	if (IS_ERR(c->tagpool)) {  		err = PTR_ERR(c->tagpool); -		c->tagpool = NULL;  		goto error;  	} - -	p9_idpool_get(c->tagpool); /* reserve tag 0 */ - +	err = p9_idpool_get(c->tagpool); /* reserve tag 0 */ +	if (err < 0) { +		p9_idpool_destroy(c->tagpool); +		goto error; +	}  	c->max_tag = 0;  error:  	return err; @@ -322,9 +366,9 @@ static void p9_tag_cleanup(struct p9_client *c)  	for (row = 0; row < (c->max_tag/P9_ROW_MAXTAG); row++) {  		for (col = 0; col < P9_ROW_MAXTAG; col++) {  			if (c->reqs[row][col].status != REQ_STATUS_IDLE) { -				P9_DPRINTK(P9_DEBUG_MUX, -				  "Attempting to cleanup non-free tag %d,%d\n", -				  row, col); +				p9_debug(P9_DEBUG_MUX, +					 "Attempting to cleanup non-free tag %d,%d\n", +					 row, col);  				/* TODO: delay execution of cleanup */  				return;  			} @@ -358,7 +402,7 @@ static void p9_tag_cleanup(struct p9_client *c)  static void p9_free_req(struct p9_client *c, struct p9_req_t *r)  {  	int tag = r->tc->tag; -	P9_DPRINTK(P9_DEBUG_MUX, "clnt %p req %p tag: %d\n", c, r, tag); +	p9_debug(P9_DEBUG_MUX, "clnt %p req %p tag: %d\n", c, r, tag);  	r->status = REQ_STATUS_IDLE;  	if (tag != P9_NOTAG && p9_idpool_check(tag, c->tagpool)) @@ -371,11 +415,19 @@ static void p9_free_req(struct p9_client *c, struct p9_req_t *r)   * req: request received   *   */ -void p9_client_cb(struct p9_client *c, struct p9_req_t *req) +void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status)  { -	P9_DPRINTK(P9_DEBUG_MUX, " tag %d\n", req->tc->tag); +	p9_debug(P9_DEBUG_MUX, " tag %d\n", req->tc->tag); + +	/* +	 * This barrier is needed to make sure any change made to req before +	 * the other thread wakes up will indeed be seen by the waiting side. +	 */ +	smp_wmb(); +	req->status = status; +  	wake_up(req->wq); -	P9_DPRINTK(P9_DEBUG_MUX, "wakeup: %d\n", req->tc->tag); +	p9_debug(P9_DEBUG_MUX, "wakeup: %d\n", req->tc->tag);  }  EXPORT_SYMBOL(p9_client_cb); @@ -410,8 +462,8 @@ p9_parse_header(struct p9_fcall *pdu, int32_t *size, int8_t *type, int16_t *tag,  	pdu->id = r_type;  	pdu->tag = r_tag; -	P9_DPRINTK(P9_DEBUG_9P, "<<< size=%d type: %d tag: %d\n", pdu->size, -							pdu->id, pdu->tag); +	p9_debug(P9_DEBUG_9P, "<<< size=%d type: %d tag: %d\n", +		 pdu->size, pdu->id, pdu->tag);  	if (type)  		*type = r_type; @@ -443,58 +495,152 @@ static int p9_check_errors(struct p9_client *c, struct p9_req_t *req)  {  	int8_t type;  	int err; +	int ecode;  	err = p9_parse_header(req->rc, NULL, &type, NULL, 0); +	/* +	 * dump the response from server +	 * This should be after check errors which poplulate pdu_fcall. +	 */ +	trace_9p_protocol_dump(c, req->rc);  	if (err) { -		P9_DPRINTK(P9_DEBUG_ERROR, "couldn't parse header %d\n", err); +		p9_debug(P9_DEBUG_ERROR, "couldn't parse header %d\n", err);  		return err;  	} +	if (type != P9_RERROR && type != P9_RLERROR) +		return 0; + +	if (!p9_is_proto_dotl(c)) { +		char *ename; +		err = p9pdu_readf(req->rc, c->proto_version, "s?d", +				  &ename, &ecode); +		if (err) +			goto out_err; -	if (type == P9_RERROR || type == P9_RLERROR) { -		int ecode; +		if (p9_is_proto_dotu(c)) +			err = -ecode; + +		if (!err || !IS_ERR_VALUE(err)) { +			err = p9_errstr2errno(ename, strlen(ename)); + +			p9_debug(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", +				 -ecode, ename); +		} +		kfree(ename); +	} else { +		err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode); +		err = -ecode; + +		p9_debug(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode); +	} + +	return err; + +out_err: +	p9_debug(P9_DEBUG_ERROR, "couldn't parse error%d\n", err); + +	return err; +} + +/** + * p9_check_zc_errors - check 9p packet for error return and process it + * @c: current client instance + * @req: request to parse and check for error conditions + * @in_hdrlen: Size of response protocol buffer. + * + * returns error code if one is discovered, otherwise returns 0 + * + * this will have to be more complicated if we have multiple + * error packet types + */ -		if (!p9_is_proto_dotl(c)) { -			char *ename; +static int p9_check_zc_errors(struct p9_client *c, struct p9_req_t *req, +			      char *uidata, int in_hdrlen, int kern_buf) +{ +	int err; +	int ecode; +	int8_t type; +	char *ename = NULL; -			err = p9pdu_readf(req->rc, c->proto_version, "s?d", -								&ename, &ecode); -			if (err) -				goto out_err; +	err = p9_parse_header(req->rc, NULL, &type, NULL, 0); +	/* +	 * dump the response from server +	 * This should be after parse_header which poplulate pdu_fcall. +	 */ +	trace_9p_protocol_dump(c, req->rc); +	if (err) { +		p9_debug(P9_DEBUG_ERROR, "couldn't parse header %d\n", err); +		return err; +	} -			if (p9_is_proto_dotu(c)) -				err = -ecode; +	if (type != P9_RERROR && type != P9_RLERROR) +		return 0; -			if (!err || !IS_ERR_VALUE(err)) { -				err = p9_errstr2errno(ename, strlen(ename)); +	if (!p9_is_proto_dotl(c)) { +		/* Error is reported in string format */ +		int len; +		/* 7 = header size for RERROR; */ +		int inline_len = in_hdrlen - 7; -				P9_DPRINTK(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", -ecode, ename); +		len =  req->rc->size - req->rc->offset; +		if (len > (P9_ZC_HDR_SZ - 7)) { +			err = -EFAULT; +			goto out_err; +		} -				kfree(ename); +		ename = &req->rc->sdata[req->rc->offset]; +		if (len > inline_len) { +			/* We have error in external buffer */ +			if (kern_buf) { +				memcpy(ename + inline_len, uidata, +				       len - inline_len); +			} else { +				err = copy_from_user(ename + inline_len, +						     uidata, len - inline_len); +				if (err) { +					err = -EFAULT; +					goto out_err; +				}  			} -		} else { -			err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode); +		} +		ename = NULL; +		err = p9pdu_readf(req->rc, c->proto_version, "s?d", +				  &ename, &ecode); +		if (err) +			goto out_err; + +		if (p9_is_proto_dotu(c))  			err = -ecode; -			P9_DPRINTK(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode); -		} +		if (!err || !IS_ERR_VALUE(err)) { +			err = p9_errstr2errno(ename, strlen(ename)); -	} else -		err = 0; +			p9_debug(P9_DEBUG_9P, "<<< RERROR (%d) %s\n", +				 -ecode, ename); +		} +		kfree(ename); +	} else { +		err = p9pdu_readf(req->rc, c->proto_version, "d", &ecode); +		err = -ecode; +		p9_debug(P9_DEBUG_9P, "<<< RLERROR (%d)\n", -ecode); +	}  	return err;  out_err: -	P9_DPRINTK(P9_DEBUG_ERROR, "couldn't parse error%d\n", err); - +	p9_debug(P9_DEBUG_ERROR, "couldn't parse error%d\n", err);  	return err;  } +static struct p9_req_t * +p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...); +  /**   * p9_client_flush - flush (cancel) a request   * @c: client state   * @oldreq: request to cancel   * - * This sents a flush for a particular requests and links + * This sents a flush for a particular request and links   * the flush request to the original request.  The current   * code only supports a single flush request although the protocol   * allows for multiple flush requests to be sent for a single request. @@ -511,43 +657,32 @@ static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq)  	if (err)  		return err; -	P9_DPRINTK(P9_DEBUG_9P, ">>> TFLUSH tag %d\n", oldtag); +	p9_debug(P9_DEBUG_9P, ">>> TFLUSH tag %d\n", oldtag);  	req = p9_client_rpc(c, P9_TFLUSH, "w", oldtag);  	if (IS_ERR(req))  		return PTR_ERR(req); - -	/* if we haven't received a response for oldreq, -	   remove it from the list. */ -	spin_lock(&c->lock); -	if (oldreq->status == REQ_STATUS_FLSH) -		list_del(&oldreq->req_list); -	spin_unlock(&c->lock); +	/* +	 * if we haven't received a response for oldreq, +	 * remove it from the list +	 */ +	if (oldreq->status == REQ_STATUS_SENT) +		if (c->trans_mod->cancelled) +			c->trans_mod->cancelled(c, oldreq);  	p9_free_req(c, req);  	return 0;  } -/** - * p9_client_rpc - issue a request and wait for a response - * @c: client session - * @type: type of request - * @fmt: protocol format string (see protocol.c) - * - * Returns request structure (which client must free using p9_free_req) - */ - -static struct p9_req_t * -p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) +static struct p9_req_t *p9_client_prepare_req(struct p9_client *c, +					      int8_t type, int req_size, +					      const char *fmt, va_list ap)  { -	va_list ap;  	int tag, err;  	struct p9_req_t *req; -	unsigned long flags; -	int sigpending; -	P9_DPRINTK(P9_DEBUG_MUX, "client %p op %d\n", c, type); +	p9_debug(P9_DEBUG_MUX, "client %p op %d\n", c, type);  	/* we allow for any status other than disconnected */  	if (c->status == Disconnected) @@ -557,12 +692,6 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)  	if ((c->status == BeginDisconnect) && (type != P9_TCLUNK))  		return ERR_PTR(-EIO); -	if (signal_pending(current)) { -		sigpending = 1; -		clear_thread_flag(TIF_SIGPENDING); -	} else -		sigpending = 0; -  	tag = P9_NOTAG;  	if (type != P9_TVERSION) {  		tag = p9_idpool_get(c->tagpool); @@ -570,39 +699,82 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)  			return ERR_PTR(-ENOMEM);  	} -	req = p9_tag_alloc(c, tag); +	req = p9_tag_alloc(c, tag, req_size);  	if (IS_ERR(req))  		return req;  	/* marshall the data */  	p9pdu_prepare(req->tc, tag, type); -	va_start(ap, fmt);  	err = p9pdu_vwritef(req->tc, c->proto_version, fmt, ap); -	va_end(ap);  	if (err)  		goto reterr; -	p9pdu_finalize(req->tc); +	p9pdu_finalize(c, req->tc); +	trace_9p_client_req(c, type, tag); +	return req; +reterr: +	p9_free_req(c, req); +	return ERR_PTR(err); +} + +/** + * p9_client_rpc - issue a request and wait for a response + * @c: client session + * @type: type of request + * @fmt: protocol format string (see protocol.c) + * + * Returns request structure (which client must free using p9_free_req) + */ + +static struct p9_req_t * +p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) +{ +	va_list ap; +	int sigpending, err; +	unsigned long flags; +	struct p9_req_t *req; + +	va_start(ap, fmt); +	req = p9_client_prepare_req(c, type, c->msize, fmt, ap); +	va_end(ap); +	if (IS_ERR(req)) +		return req; + +	if (signal_pending(current)) { +		sigpending = 1; +		clear_thread_flag(TIF_SIGPENDING); +	} else +		sigpending = 0;  	err = c->trans_mod->request(c, req);  	if (err < 0) { -		if (err != -ERESTARTSYS) +		if (err != -ERESTARTSYS && err != -EFAULT)  			c->status = Disconnected;  		goto reterr;  	} - -	P9_DPRINTK(P9_DEBUG_MUX, "wait %p tag: %d\n", req->wq, tag); +again: +	/* Wait for the response */  	err = wait_event_interruptible(*req->wq, -						req->status >= REQ_STATUS_RCVD); -	P9_DPRINTK(P9_DEBUG_MUX, "wait %p tag: %d returned %d\n", -						req->wq, tag, err); +				       req->status >= REQ_STATUS_RCVD); + +	/* +	 * Make sure our req is coherent with regard to updates in other +	 * threads - echoes to wmb() in the callback +	 */ +	smp_rmb(); + +	if ((err == -ERESTARTSYS) && (c->status == Connected) +				  && (type == P9_TFLUSH)) { +		sigpending = 1; +		clear_thread_flag(TIF_SIGPENDING); +		goto again; +	}  	if (req->status == REQ_STATUS_ERROR) { -		P9_DPRINTK(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err); +		p9_debug(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err);  		err = req->t_err;  	} -  	if ((err == -ERESTARTSYS) && (c->status == Connected)) { -		P9_DPRINTK(P9_DEBUG_MUX, "flushing\n"); +		p9_debug(P9_DEBUG_MUX, "flushing\n");  		sigpending = 1;  		clear_thread_flag(TIF_SIGPENDING); @@ -613,27 +785,104 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)  		if (req->status == REQ_STATUS_RCVD)  			err = 0;  	} -  	if (sigpending) {  		spin_lock_irqsave(¤t->sighand->siglock, flags);  		recalc_sigpending();  		spin_unlock_irqrestore(¤t->sighand->siglock, flags);  	} -  	if (err < 0)  		goto reterr;  	err = p9_check_errors(c, req); -	if (!err) { -		P9_DPRINTK(P9_DEBUG_MUX, "exit: client %p op %d\n", c, type); +	trace_9p_client_res(c, type, req->rc->tag, err); +	if (!err) +		return req; +reterr: +	p9_free_req(c, req); +	return ERR_PTR(safe_errno(err)); +} + +/** + * p9_client_zc_rpc - issue a request and wait for a response + * @c: client session + * @type: type of request + * @uidata: user bffer that should be ued for zero copy read + * @uodata: user buffer that shoud be user for zero copy write + * @inlen: read buffer size + * @olen: write buffer size + * @hdrlen: reader header size, This is the size of response protocol data + * @fmt: protocol format string (see protocol.c) + * + * Returns request structure (which client must free using p9_free_req) + */ +static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type, +					 char *uidata, char *uodata, +					 int inlen, int olen, int in_hdrlen, +					 int kern_buf, const char *fmt, ...) +{ +	va_list ap; +	int sigpending, err; +	unsigned long flags; +	struct p9_req_t *req; + +	va_start(ap, fmt); +	/* +	 * We allocate a inline protocol data of only 4k bytes. +	 * The actual content is passed in zero-copy fashion. +	 */ +	req = p9_client_prepare_req(c, type, P9_ZC_HDR_SZ, fmt, ap); +	va_end(ap); +	if (IS_ERR(req))  		return req; + +	if (signal_pending(current)) { +		sigpending = 1; +		clear_thread_flag(TIF_SIGPENDING); +	} else +		sigpending = 0; + +	/* If we are called with KERNEL_DS force kern_buf */ +	if (segment_eq(get_fs(), KERNEL_DS)) +		kern_buf = 1; + +	err = c->trans_mod->zc_request(c, req, uidata, uodata, +				       inlen, olen, in_hdrlen, kern_buf); +	if (err < 0) { +		if (err == -EIO) +			c->status = Disconnected; +		goto reterr;  	} +	if (req->status == REQ_STATUS_ERROR) { +		p9_debug(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err); +		err = req->t_err; +	} +	if ((err == -ERESTARTSYS) && (c->status == Connected)) { +		p9_debug(P9_DEBUG_MUX, "flushing\n"); +		sigpending = 1; +		clear_thread_flag(TIF_SIGPENDING); +		if (c->trans_mod->cancel(c, req)) +			p9_client_flush(c, req); + +		/* if we received the response anyway, don't signal error */ +		if (req->status == REQ_STATUS_RCVD) +			err = 0; +	} +	if (sigpending) { +		spin_lock_irqsave(¤t->sighand->siglock, flags); +		recalc_sigpending(); +		spin_unlock_irqrestore(¤t->sighand->siglock, flags); +	} +	if (err < 0) +		goto reterr; + +	err = p9_check_zc_errors(c, req, uidata, in_hdrlen, kern_buf); +	trace_9p_client_res(c, type, req->rc->tag, err); +	if (!err) +		return req;  reterr: -	P9_DPRINTK(P9_DEBUG_MUX, "exit: client %p op %d error: %d\n", c, type, -									err);  	p9_free_req(c, req); -	return ERR_PTR(err); +	return ERR_PTR(safe_errno(err));  }  static struct p9_fid *p9_fid_create(struct p9_client *clnt) @@ -642,7 +891,7 @@ static struct p9_fid *p9_fid_create(struct p9_client *clnt)  	struct p9_fid *fid;  	unsigned long flags; -	P9_DPRINTK(P9_DEBUG_FID, "clnt %p\n", clnt); +	p9_debug(P9_DEBUG_FID, "clnt %p\n", clnt);  	fid = kmalloc(sizeof(struct p9_fid), GFP_KERNEL);  	if (!fid)  		return ERR_PTR(-ENOMEM); @@ -675,7 +924,7 @@ static void p9_fid_destroy(struct p9_fid *fid)  	struct p9_client *clnt;  	unsigned long flags; -	P9_DPRINTK(P9_DEBUG_FID, "fid %d\n", fid->fid); +	p9_debug(P9_DEBUG_FID, "fid %d\n", fid->fid);  	clnt = fid->clnt;  	p9_idpool_put(fid->fid, clnt->fidpool);  	spin_lock_irqsave(&clnt->lock, flags); @@ -692,8 +941,8 @@ static int p9_client_version(struct p9_client *c)  	char *version;  	int msize; -	P9_DPRINTK(P9_DEBUG_9P, ">>> TVERSION msize %d protocol %d\n", -						c->msize, c->proto_version); +	p9_debug(P9_DEBUG_9P, ">>> TVERSION msize %d protocol %d\n", +		 c->msize, c->proto_version);  	switch (c->proto_version) {  	case p9_proto_2000L: @@ -718,12 +967,12 @@ static int p9_client_version(struct p9_client *c)  	err = p9pdu_readf(req->rc, c->proto_version, "ds", &msize, &version);  	if (err) { -		P9_DPRINTK(P9_DEBUG_9P, "version error %d\n", err); -		p9pdu_dump(1, req->rc); +		p9_debug(P9_DEBUG_9P, "version error %d\n", err); +		trace_9p_protocol_dump(c, req->rc);  		goto error;  	} -	P9_DPRINTK(P9_DEBUG_9P, "<<< RVERSION msize %d %s\n", msize, version); +	p9_debug(P9_DEBUG_9P, "<<< RVERSION msize %d %s\n", msize, version);  	if (!strncmp(version, "9P2000.L", 8))  		c->proto_version = p9_proto_2000L;  	else if (!strncmp(version, "9P2000.u", 8)) @@ -749,6 +998,7 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)  {  	int err;  	struct p9_client *clnt; +	char *client_id;  	err = 0;  	clnt = kmalloc(sizeof(struct p9_client), GFP_KERNEL); @@ -757,41 +1007,46 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)  	clnt->trans_mod = NULL;  	clnt->trans = NULL; + +	client_id = utsname()->nodename; +	memcpy(clnt->name, client_id, strlen(client_id) + 1); +  	spin_lock_init(&clnt->lock);  	INIT_LIST_HEAD(&clnt->fidlist); -	p9_tag_init(clnt); +	err = p9_tag_init(clnt); +	if (err < 0) +		goto free_client;  	err = parse_opts(options, clnt);  	if (err < 0) -		goto free_client; +		goto destroy_tagpool;  	if (!clnt->trans_mod)  		clnt->trans_mod = v9fs_get_default_trans();  	if (clnt->trans_mod == NULL) {  		err = -EPROTONOSUPPORT; -		P9_DPRINTK(P9_DEBUG_ERROR, -				"No transport defined or default transport\n"); -		goto free_client; +		p9_debug(P9_DEBUG_ERROR, +			 "No transport defined or default transport\n"); +		goto destroy_tagpool;  	}  	clnt->fidpool = p9_idpool_create();  	if (IS_ERR(clnt->fidpool)) {  		err = PTR_ERR(clnt->fidpool); -		clnt->fidpool = NULL;  		goto put_trans;  	} -	P9_DPRINTK(P9_DEBUG_MUX, "clnt %p trans %p msize %d protocol %d\n", -		clnt, clnt->trans_mod, clnt->msize, clnt->proto_version); +	p9_debug(P9_DEBUG_MUX, "clnt %p trans %p msize %d protocol %d\n", +		 clnt, clnt->trans_mod, clnt->msize, clnt->proto_version);  	err = clnt->trans_mod->create(clnt, dev_name, options);  	if (err)  		goto destroy_fidpool; -	if ((clnt->msize+P9_IOHDRSZ) > clnt->trans_mod->maxsize) -		clnt->msize = clnt->trans_mod->maxsize-P9_IOHDRSZ; +	if (clnt->msize > clnt->trans_mod->maxsize) +		clnt->msize = clnt->trans_mod->maxsize;  	err = p9_client_version(clnt);  	if (err) @@ -805,6 +1060,8 @@ destroy_fidpool:  	p9_idpool_destroy(clnt->fidpool);  put_trans:  	v9fs_put_trans(clnt->trans_mod); +destroy_tagpool: +	p9_idpool_destroy(clnt->tagpool);  free_client:  	kfree(clnt);  	return ERR_PTR(err); @@ -815,7 +1072,7 @@ void p9_client_destroy(struct p9_client *clnt)  {  	struct p9_fid *fid, *fidptr; -	P9_DPRINTK(P9_DEBUG_MUX, "clnt %p\n", clnt); +	p9_debug(P9_DEBUG_MUX, "clnt %p\n", clnt);  	if (clnt->trans_mod)  		clnt->trans_mod->close(clnt); @@ -823,7 +1080,7 @@ void p9_client_destroy(struct p9_client *clnt)  	v9fs_put_trans(clnt->trans_mod);  	list_for_each_entry_safe(fid, fidptr, &clnt->fidlist, flist) { -		printk(KERN_INFO "Found fid %d not clunked\n", fid->fid); +		pr_info("Found fid %d not clunked\n", fid->fid);  		p9_fid_destroy(fid);  	} @@ -838,30 +1095,29 @@ EXPORT_SYMBOL(p9_client_destroy);  void p9_client_disconnect(struct p9_client *clnt)  { -	P9_DPRINTK(P9_DEBUG_9P, "clnt %p\n", clnt); +	p9_debug(P9_DEBUG_9P, "clnt %p\n", clnt);  	clnt->status = Disconnected;  }  EXPORT_SYMBOL(p9_client_disconnect);  void p9_client_begin_disconnect(struct p9_client *clnt)  { -	P9_DPRINTK(P9_DEBUG_9P, "clnt %p\n", clnt); +	p9_debug(P9_DEBUG_9P, "clnt %p\n", clnt);  	clnt->status = BeginDisconnect;  }  EXPORT_SYMBOL(p9_client_begin_disconnect);  struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid, -	char *uname, u32 n_uname, char *aname) +	char *uname, kuid_t n_uname, char *aname)  { -	int err; +	int err = 0;  	struct p9_req_t *req;  	struct p9_fid *fid;  	struct p9_qid qid; -	P9_DPRINTK(P9_DEBUG_9P, ">>> TATTACH afid %d uname %s aname %s\n", -					afid ? afid->fid : -1, uname, aname); -	err = 0; +	p9_debug(P9_DEBUG_9P, ">>> TATTACH afid %d uname %s aname %s\n", +		 afid ? afid->fid : -1, uname, aname);  	fid = p9_fid_create(clnt);  	if (IS_ERR(fid)) {  		err = PTR_ERR(fid); @@ -869,7 +1125,7 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid,  		goto error;  	} -	req = p9_client_rpc(clnt, P9_TATTACH, "ddss?d", fid->fid, +	req = p9_client_rpc(clnt, P9_TATTACH, "ddss?u", fid->fid,  			afid ? afid->fid : P9_NOFID, uname, aname, n_uname);  	if (IS_ERR(req)) {  		err = PTR_ERR(req); @@ -878,15 +1134,13 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid,  	err = p9pdu_readf(req->rc, clnt->proto_version, "Q", &qid);  	if (err) { -		p9pdu_dump(1, req->rc); +		trace_9p_protocol_dump(clnt, req->rc);  		p9_free_req(clnt, req);  		goto error;  	} -	P9_DPRINTK(P9_DEBUG_9P, "<<< RATTACH qid %x.%llx.%x\n", -					qid.type, -					(unsigned long long)qid.path, -					qid.version); +	p9_debug(P9_DEBUG_9P, "<<< RATTACH qid %x.%llx.%x\n", +		 qid.type, (unsigned long long)qid.path, qid.version);  	memmove(&fid->qid, &qid, sizeof(struct p9_qid)); @@ -900,15 +1154,15 @@ error:  }  EXPORT_SYMBOL(p9_client_attach); -struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames, -	int clone) +struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname, +		char **wnames, int clone)  {  	int err;  	struct p9_client *clnt;  	struct p9_fid *fid;  	struct p9_qid *wqids;  	struct p9_req_t *req; -	int16_t nwqids, count; +	uint16_t nwqids, count;  	err = 0;  	wqids = NULL; @@ -926,8 +1180,8 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames,  		fid = oldfid; -	P9_DPRINTK(P9_DEBUG_9P, ">>> TWALK fids %d,%d nwname %d wname[0] %s\n", -		oldfid->fid, fid->fid, nwname, wnames ? wnames[0] : NULL); +	p9_debug(P9_DEBUG_9P, ">>> TWALK fids %d,%d nwname %ud wname[0] %s\n", +		 oldfid->fid, fid->fid, nwname, wnames ? wnames[0] : NULL);  	req = p9_client_rpc(clnt, P9_TWALK, "ddT", oldfid->fid, fid->fid,  								nwname, wnames); @@ -938,13 +1192,13 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames,  	err = p9pdu_readf(req->rc, clnt->proto_version, "R", &nwqids, &wqids);  	if (err) { -		p9pdu_dump(1, req->rc); +		trace_9p_protocol_dump(clnt, req->rc);  		p9_free_req(clnt, req);  		goto clunk_fid;  	}  	p9_free_req(clnt, req); -	P9_DPRINTK(P9_DEBUG_9P, "<<< RWALK nwqid %d:\n", nwqids); +	p9_debug(P9_DEBUG_9P, "<<< RWALK nwqid %d:\n", nwqids);  	if (nwqids != nwname) {  		err = -ENOENT; @@ -952,7 +1206,7 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames,  	}  	for (count = 0; count < nwqids; count++) -		P9_DPRINTK(P9_DEBUG_9P, "<<<     [%d] %x.%llx.%x\n", +		p9_debug(P9_DEBUG_9P, "<<<     [%d] %x.%llx.%x\n",  			count, wqids[count].type,  			(unsigned long long)wqids[count].path,  			wqids[count].version); @@ -987,7 +1241,7 @@ int p9_client_open(struct p9_fid *fid, int mode)  	int iounit;  	clnt = fid->clnt; -	P9_DPRINTK(P9_DEBUG_9P, ">>> %s fid %d mode %d\n", +	p9_debug(P9_DEBUG_9P, ">>> %s fid %d mode %d\n",  		p9_is_proto_dotl(clnt) ? "TLOPEN" : "TOPEN", fid->fid, mode);  	err = 0; @@ -1005,11 +1259,11 @@ int p9_client_open(struct p9_fid *fid, int mode)  	err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", &qid, &iounit);  	if (err) { -		p9pdu_dump(1, req->rc); +		trace_9p_protocol_dump(clnt, req->rc);  		goto free_and_error;  	} -	P9_DPRINTK(P9_DEBUG_9P, "<<< %s qid %x.%llx.%x iounit %x\n", +	p9_debug(P9_DEBUG_9P, "<<< %s qid %x.%llx.%x iounit %x\n",  		p9_is_proto_dotl(clnt) ? "RLOPEN" : "ROPEN",  qid.type,  		(unsigned long long)qid.path, qid.version, iounit); @@ -1024,22 +1278,23 @@ error:  EXPORT_SYMBOL(p9_client_open);  int p9_client_create_dotl(struct p9_fid *ofid, char *name, u32 flags, u32 mode, -		gid_t gid, struct p9_qid *qid) +		kgid_t gid, struct p9_qid *qid)  {  	int err = 0;  	struct p9_client *clnt;  	struct p9_req_t *req;  	int iounit; -	P9_DPRINTK(P9_DEBUG_9P, +	p9_debug(P9_DEBUG_9P,  			">>> TLCREATE fid %d name %s flags %d mode %d gid %d\n", -			ofid->fid, name, flags, mode, gid); +			ofid->fid, name, flags, mode, +		 	from_kgid(&init_user_ns, gid));  	clnt = ofid->clnt;  	if (ofid->mode != -1)  		return -EINVAL; -	req = p9_client_rpc(clnt, P9_TLCREATE, "dsddd", ofid->fid, name, flags, +	req = p9_client_rpc(clnt, P9_TLCREATE, "dsddg", ofid->fid, name, flags,  			mode, gid);  	if (IS_ERR(req)) {  		err = PTR_ERR(req); @@ -1048,11 +1303,11 @@ int p9_client_create_dotl(struct p9_fid *ofid, char *name, u32 flags, u32 mode,  	err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", qid, &iounit);  	if (err) { -		p9pdu_dump(1, req->rc); +		trace_9p_protocol_dump(clnt, req->rc);  		goto free_and_error;  	} -	P9_DPRINTK(P9_DEBUG_9P, "<<< RLCREATE qid %x.%llx.%x iounit %x\n", +	p9_debug(P9_DEBUG_9P, "<<< RLCREATE qid %x.%llx.%x iounit %x\n",  			qid->type,  			(unsigned long long)qid->path,  			qid->version, iounit); @@ -1076,7 +1331,7 @@ int p9_client_fcreate(struct p9_fid *fid, char *name, u32 perm, int mode,  	struct p9_qid qid;  	int iounit; -	P9_DPRINTK(P9_DEBUG_9P, ">>> TCREATE fid %d name %s perm %d mode %d\n", +	p9_debug(P9_DEBUG_9P, ">>> TCREATE fid %d name %s perm %d mode %d\n",  						fid->fid, name, perm, mode);  	err = 0;  	clnt = fid->clnt; @@ -1093,11 +1348,11 @@ int p9_client_fcreate(struct p9_fid *fid, char *name, u32 perm, int mode,  	err = p9pdu_readf(req->rc, clnt->proto_version, "Qd", &qid, &iounit);  	if (err) { -		p9pdu_dump(1, req->rc); +		trace_9p_protocol_dump(clnt, req->rc);  		goto free_and_error;  	} -	P9_DPRINTK(P9_DEBUG_9P, "<<< RCREATE qid %x.%llx.%x iounit %x\n", +	p9_debug(P9_DEBUG_9P, "<<< RCREATE qid %x.%llx.%x iounit %x\n",  				qid.type,  				(unsigned long long)qid.path,  				qid.version, iounit); @@ -1112,18 +1367,18 @@ error:  }  EXPORT_SYMBOL(p9_client_fcreate); -int p9_client_symlink(struct p9_fid *dfid, char *name, char *symtgt, gid_t gid, +int p9_client_symlink(struct p9_fid *dfid, char *name, char *symtgt, kgid_t gid,  		struct p9_qid *qid)  {  	int err = 0;  	struct p9_client *clnt;  	struct p9_req_t *req; -	P9_DPRINTK(P9_DEBUG_9P, ">>> TSYMLINK dfid %d name %s  symtgt %s\n", +	p9_debug(P9_DEBUG_9P, ">>> TSYMLINK dfid %d name %s  symtgt %s\n",  			dfid->fid, name, symtgt);  	clnt = dfid->clnt; -	req = p9_client_rpc(clnt, P9_TSYMLINK, "dssd", dfid->fid, name, symtgt, +	req = p9_client_rpc(clnt, P9_TSYMLINK, "dssg", dfid->fid, name, symtgt,  			gid);  	if (IS_ERR(req)) {  		err = PTR_ERR(req); @@ -1132,11 +1387,11 @@ int p9_client_symlink(struct p9_fid *dfid, char *name, char *symtgt, gid_t gid,  	err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid);  	if (err) { -		p9pdu_dump(1, req->rc); +		trace_9p_protocol_dump(clnt, req->rc);  		goto free_and_error;  	} -	P9_DPRINTK(P9_DEBUG_9P, "<<< RSYMLINK qid %x.%llx.%x\n", +	p9_debug(P9_DEBUG_9P, "<<< RSYMLINK qid %x.%llx.%x\n",  			qid->type, (unsigned long long)qid->path, qid->version);  free_and_error: @@ -1151,7 +1406,7 @@ int p9_client_link(struct p9_fid *dfid, struct p9_fid *oldfid, char *newname)  	struct p9_client *clnt;  	struct p9_req_t *req; -	P9_DPRINTK(P9_DEBUG_9P, ">>> TLINK dfid %d oldfid %d newname %s\n", +	p9_debug(P9_DEBUG_9P, ">>> TLINK dfid %d oldfid %d newname %s\n",  			dfid->fid, oldfid->fid, newname);  	clnt = dfid->clnt;  	req = p9_client_rpc(clnt, P9_TLINK, "dds", dfid->fid, oldfid->fid, @@ -1159,7 +1414,7 @@ int p9_client_link(struct p9_fid *dfid, struct p9_fid *oldfid, char *newname)  	if (IS_ERR(req))  		return PTR_ERR(req); -	P9_DPRINTK(P9_DEBUG_9P, "<<< RLINK\n"); +	p9_debug(P9_DEBUG_9P, "<<< RLINK\n");  	p9_free_req(clnt, req);  	return 0;  } @@ -1171,7 +1426,7 @@ int p9_client_fsync(struct p9_fid *fid, int datasync)  	struct p9_client *clnt;  	struct p9_req_t *req; -	P9_DPRINTK(P9_DEBUG_9P, ">>> TFSYNC fid %d datasync:%d\n", +	p9_debug(P9_DEBUG_9P, ">>> TFSYNC fid %d datasync:%d\n",  			fid->fid, datasync);  	err = 0;  	clnt = fid->clnt; @@ -1182,7 +1437,7 @@ int p9_client_fsync(struct p9_fid *fid, int datasync)  		goto error;  	} -	P9_DPRINTK(P9_DEBUG_9P, "<<< RFSYNC fid %d\n", fid->fid); +	p9_debug(P9_DEBUG_9P, "<<< RFSYNC fid %d\n", fid->fid);  	p9_free_req(clnt, req); @@ -1196,14 +1451,18 @@ int p9_client_clunk(struct p9_fid *fid)  	int err;  	struct p9_client *clnt;  	struct p9_req_t *req; +	int retries = 0;  	if (!fid) { -		P9_EPRINTK(KERN_WARNING, "Trying to clunk with NULL fid\n"); +		pr_warn("%s (%d): Trying to clunk with NULL fid\n", +			__func__, task_pid_nr(current));  		dump_stack();  		return 0;  	} -	P9_DPRINTK(P9_DEBUG_9P, ">>> TCLUNK fid %d\n", fid->fid); +again: +	p9_debug(P9_DEBUG_9P, ">>> TCLUNK fid %d (try %d)\n", fid->fid, +								retries);  	err = 0;  	clnt = fid->clnt; @@ -1213,12 +1472,20 @@ int p9_client_clunk(struct p9_fid *fid)  		goto error;  	} -	P9_DPRINTK(P9_DEBUG_9P, "<<< RCLUNK fid %d\n", fid->fid); +	p9_debug(P9_DEBUG_9P, "<<< RCLUNK fid %d\n", fid->fid);  	p9_free_req(clnt, req); -	p9_fid_destroy(fid); -  error: +	/* +	 * Fid is not valid even after a failed clunk +	 * If interrupted, retry once then give up and +	 * leak fid until umount. +	 */ +	if (err == -ERESTARTSYS) { +		if (retries++ == 0) +			goto again; +	} else +		p9_fid_destroy(fid);  	return err;  }  EXPORT_SYMBOL(p9_client_clunk); @@ -1229,7 +1496,7 @@ int p9_client_remove(struct p9_fid *fid)  	struct p9_client *clnt;  	struct p9_req_t *req; -	P9_DPRINTK(P9_DEBUG_9P, ">>> TREMOVE fid %d\n", fid->fid); +	p9_debug(P9_DEBUG_9P, ">>> TREMOVE fid %d\n", fid->fid);  	err = 0;  	clnt = fid->clnt; @@ -1239,29 +1506,56 @@ int p9_client_remove(struct p9_fid *fid)  		goto error;  	} -	P9_DPRINTK(P9_DEBUG_9P, "<<< RREMOVE fid %d\n", fid->fid); +	p9_debug(P9_DEBUG_9P, "<<< RREMOVE fid %d\n", fid->fid);  	p9_free_req(clnt, req);  error: -	p9_fid_destroy(fid); +	if (err == -ERESTARTSYS) +		p9_client_clunk(fid); +	else +		p9_fid_destroy(fid);  	return err;  }  EXPORT_SYMBOL(p9_client_remove); +int p9_client_unlinkat(struct p9_fid *dfid, const char *name, int flags) +{ +	int err = 0; +	struct p9_req_t *req; +	struct p9_client *clnt; + +	p9_debug(P9_DEBUG_9P, ">>> TUNLINKAT fid %d %s %d\n", +		   dfid->fid, name, flags); + +	clnt = dfid->clnt; +	req = p9_client_rpc(clnt, P9_TUNLINKAT, "dsd", dfid->fid, name, flags); +	if (IS_ERR(req)) { +		err = PTR_ERR(req); +		goto error; +	} +	p9_debug(P9_DEBUG_9P, "<<< RUNLINKAT fid %d %s\n", dfid->fid, name); + +	p9_free_req(clnt, req); +error: +	return err; +} +EXPORT_SYMBOL(p9_client_unlinkat); +  int  p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset,  								u32 count)  { -	int err, rsize, total; -	struct p9_client *clnt; -	struct p9_req_t *req;  	char *dataptr; +	int kernel_buf = 0; +	struct p9_req_t *req; +	struct p9_client *clnt; +	int err, rsize, non_zc = 0; + -	P9_DPRINTK(P9_DEBUG_9P, ">>> TREAD fid %d offset %llu %d\n", fid->fid, -					(long long unsigned) offset, count); +	p9_debug(P9_DEBUG_9P, ">>> TREAD fid %d offset %llu %d\n", +		   fid->fid, (unsigned long long) offset, count);  	err = 0;  	clnt = fid->clnt; -	total = 0;  	rsize = fid->iounit;  	if (!rsize || rsize > clnt->msize-P9_IOHDRSZ) @@ -1270,7 +1564,26 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset,  	if (count < rsize)  		rsize = count; -	req = p9_client_rpc(clnt, P9_TREAD, "dqd", fid->fid, offset, rsize); +	/* Don't bother zerocopy for small IO (< 1024) */ +	if (clnt->trans_mod->zc_request && rsize > 1024) { +		char *indata; +		if (data) { +			kernel_buf = 1; +			indata = data; +		} else +			indata = (__force char *)udata; +		/* +		 * response header len is 11 +		 * PDU Header(7) + IO Size (4) +		 */ +		req = p9_client_zc_rpc(clnt, P9_TREAD, indata, NULL, rsize, 0, +				       11, kernel_buf, "dqd", fid->fid, +				       offset, rsize); +	} else { +		non_zc = 1; +		req = p9_client_rpc(clnt, P9_TREAD, "dqd", fid->fid, offset, +				    rsize); +	}  	if (IS_ERR(req)) {  		err = PTR_ERR(req);  		goto error; @@ -1278,19 +1591,21 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset,  	err = p9pdu_readf(req->rc, clnt->proto_version, "D", &count, &dataptr);  	if (err) { -		p9pdu_dump(1, req->rc); +		trace_9p_protocol_dump(clnt, req->rc);  		goto free_and_error;  	} -	P9_DPRINTK(P9_DEBUG_9P, "<<< RREAD count %d\n", count); +	p9_debug(P9_DEBUG_9P, "<<< RREAD count %d\n", count); -	if (data) { -		memmove(data, dataptr, count); -	} else { -		err = copy_to_user(udata, dataptr, count); -		if (err) { -			err = -EFAULT; -			goto free_and_error; +	if (non_zc) { +		if (data) { +			memmove(data, dataptr, count); +		} else { +			err = copy_to_user(udata, dataptr, count); +			if (err) { +				err = -EFAULT; +				goto free_and_error; +			}  		}  	}  	p9_free_req(clnt, req); @@ -1307,15 +1622,15 @@ int  p9_client_write(struct p9_fid *fid, char *data, const char __user *udata,  							u64 offset, u32 count)  { -	int err, rsize, total; +	int err, rsize; +	int kernel_buf = 0;  	struct p9_client *clnt;  	struct p9_req_t *req; -	P9_DPRINTK(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu count %d\n", -				fid->fid, (long long unsigned) offset, count); +	p9_debug(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu count %d\n", +				fid->fid, (unsigned long long) offset, count);  	err = 0;  	clnt = fid->clnt; -	total = 0;  	rsize = fid->iounit;  	if (!rsize || rsize > clnt->msize-P9_IOHDRSZ) @@ -1323,12 +1638,26 @@ p9_client_write(struct p9_fid *fid, char *data, const char __user *udata,  	if (count < rsize)  		rsize = count; -	if (data) -		req = p9_client_rpc(clnt, P9_TWRITE, "dqD", fid->fid, offset, -								rsize, data); -	else -		req = p9_client_rpc(clnt, P9_TWRITE, "dqU", fid->fid, offset, -								rsize, udata); + +	/* Don't bother zerocopy for small IO (< 1024) */ +	if (clnt->trans_mod->zc_request && rsize > 1024) { +		char *odata; +		if (data) { +			kernel_buf = 1; +			odata = data; +		} else +			odata = (char *)udata; +		req = p9_client_zc_rpc(clnt, P9_TWRITE, NULL, odata, 0, rsize, +				       P9_ZC_HDR_SZ, kernel_buf, "dqd", +				       fid->fid, offset, rsize); +	} else { +		if (data) +			req = p9_client_rpc(clnt, P9_TWRITE, "dqD", fid->fid, +					    offset, rsize, data); +		else +			req = p9_client_rpc(clnt, P9_TWRITE, "dqU", fid->fid, +					    offset, rsize, udata); +	}  	if (IS_ERR(req)) {  		err = PTR_ERR(req);  		goto error; @@ -1336,11 +1665,11 @@ p9_client_write(struct p9_fid *fid, char *data, const char __user *udata,  	err = p9pdu_readf(req->rc, clnt->proto_version, "d", &count);  	if (err) { -		p9pdu_dump(1, req->rc); +		trace_9p_protocol_dump(clnt, req->rc);  		goto free_and_error;  	} -	P9_DPRINTK(P9_DEBUG_9P, "<<< RWRITE count %d\n", count); +	p9_debug(P9_DEBUG_9P, "<<< RWRITE count %d\n", count);  	p9_free_req(clnt, req);  	return count; @@ -1360,7 +1689,7 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid)  	struct p9_req_t *req;  	u16 ignored; -	P9_DPRINTK(P9_DEBUG_9P, ">>> TSTAT fid %d\n", fid->fid); +	p9_debug(P9_DEBUG_9P, ">>> TSTAT fid %d\n", fid->fid);  	if (!ret)  		return ERR_PTR(-ENOMEM); @@ -1376,12 +1705,12 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid)  	err = p9pdu_readf(req->rc, clnt->proto_version, "wS", &ignored, ret);  	if (err) { -		p9pdu_dump(1, req->rc); +		trace_9p_protocol_dump(clnt, req->rc);  		p9_free_req(clnt, req);  		goto error;  	} -	P9_DPRINTK(P9_DEBUG_9P, +	p9_debug(P9_DEBUG_9P,  		"<<< RSTAT sz=%x type=%x dev=%x qid=%x.%llx.%x\n"  		"<<<    mode=%8.8x atime=%8.8x mtime=%8.8x length=%llx\n"  		"<<<    name=%s uid=%s gid=%s muid=%s extension=(%s)\n" @@ -1390,7 +1719,9 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid)  		(unsigned long long)ret->qid.path, ret->qid.version, ret->mode,  		ret->atime, ret->mtime, (unsigned long long)ret->length,  		ret->name, ret->uid, ret->gid, ret->muid, ret->extension, -		ret->n_uid, ret->n_gid, ret->n_muid); +		from_kuid(&init_user_ns, ret->n_uid), +		from_kgid(&init_user_ns, ret->n_gid), +		from_kuid(&init_user_ns, ret->n_muid));  	p9_free_req(clnt, req);  	return ret; @@ -1410,7 +1741,7 @@ struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid,  								GFP_KERNEL);  	struct p9_req_t *req; -	P9_DPRINTK(P9_DEBUG_9P, ">>> TGETATTR fid %d, request_mask %lld\n", +	p9_debug(P9_DEBUG_9P, ">>> TGETATTR fid %d, request_mask %lld\n",  							fid->fid, request_mask);  	if (!ret) @@ -1427,12 +1758,12 @@ struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid,  	err = p9pdu_readf(req->rc, clnt->proto_version, "A", ret);  	if (err) { -		p9pdu_dump(1, req->rc); +		trace_9p_protocol_dump(clnt, req->rc);  		p9_free_req(clnt, req);  		goto error;  	} -	P9_DPRINTK(P9_DEBUG_9P, +	p9_debug(P9_DEBUG_9P,  		"<<< RGETATTR st_result_mask=%lld\n"  		"<<< qid=%x.%llx.%x\n"  		"<<< st_mode=%8.8x st_nlink=%llu\n" @@ -1444,8 +1775,10 @@ struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid,  		"<<< st_btime_sec=%lld st_btime_nsec=%lld\n"  		"<<< st_gen=%lld st_data_version=%lld",  		ret->st_result_mask, ret->qid.type, ret->qid.path, -		ret->qid.version, ret->st_mode, ret->st_nlink, ret->st_uid, -		ret->st_gid, ret->st_rdev, ret->st_size, ret->st_blksize, +		ret->qid.version, ret->st_mode, ret->st_nlink, +		from_kuid(&init_user_ns, ret->st_uid), +		from_kgid(&init_user_ns, ret->st_gid), +		ret->st_rdev, ret->st_size, ret->st_blksize,  		ret->st_blocks, ret->st_atime_sec, ret->st_atime_nsec,  		ret->st_mtime_sec, ret->st_mtime_nsec, ret->st_ctime_sec,  		ret->st_ctime_nsec, ret->st_btime_sec, ret->st_btime_nsec, @@ -1498,8 +1831,8 @@ int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst)  	err = 0;  	clnt = fid->clnt;  	wst->size = p9_client_statsize(wst, clnt->proto_version); -	P9_DPRINTK(P9_DEBUG_9P, ">>> TWSTAT fid %d\n", fid->fid); -	P9_DPRINTK(P9_DEBUG_9P, +	p9_debug(P9_DEBUG_9P, ">>> TWSTAT fid %d\n", fid->fid); +	p9_debug(P9_DEBUG_9P,  		"     sz=%x type=%x dev=%x qid=%x.%llx.%x\n"  		"     mode=%8.8x atime=%8.8x mtime=%8.8x length=%llx\n"  		"     name=%s uid=%s gid=%s muid=%s extension=(%s)\n" @@ -1508,7 +1841,9 @@ int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst)  		(unsigned long long)wst->qid.path, wst->qid.version, wst->mode,  		wst->atime, wst->mtime, (unsigned long long)wst->length,  		wst->name, wst->uid, wst->gid, wst->muid, wst->extension, -		wst->n_uid, wst->n_gid, wst->n_muid); +		from_kuid(&init_user_ns, wst->n_uid), +		from_kgid(&init_user_ns, wst->n_gid), +		from_kuid(&init_user_ns, wst->n_muid));  	req = p9_client_rpc(clnt, P9_TWSTAT, "dwS", fid->fid, wst->size+2, wst);  	if (IS_ERR(req)) { @@ -1516,7 +1851,7 @@ int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst)  		goto error;  	} -	P9_DPRINTK(P9_DEBUG_9P, "<<< RWSTAT fid %d\n", fid->fid); +	p9_debug(P9_DEBUG_9P, "<<< RWSTAT fid %d\n", fid->fid);  	p9_free_req(clnt, req);  error: @@ -1532,12 +1867,14 @@ int p9_client_setattr(struct p9_fid *fid, struct p9_iattr_dotl *p9attr)  	err = 0;  	clnt = fid->clnt; -	P9_DPRINTK(P9_DEBUG_9P, ">>> TSETATTR fid %d\n", fid->fid); -	P9_DPRINTK(P9_DEBUG_9P, +	p9_debug(P9_DEBUG_9P, ">>> TSETATTR fid %d\n", fid->fid); +	p9_debug(P9_DEBUG_9P,  		"    valid=%x mode=%x uid=%d gid=%d size=%lld\n"  		"    atime_sec=%lld atime_nsec=%lld\n"  		"    mtime_sec=%lld mtime_nsec=%lld\n", -		p9attr->valid, p9attr->mode, p9attr->uid, p9attr->gid, +		p9attr->valid, p9attr->mode, +		from_kuid(&init_user_ns, p9attr->uid), +		from_kgid(&init_user_ns, p9attr->gid),  		p9attr->size, p9attr->atime_sec, p9attr->atime_nsec,  		p9attr->mtime_sec, p9attr->mtime_nsec); @@ -1547,7 +1884,7 @@ int p9_client_setattr(struct p9_fid *fid, struct p9_iattr_dotl *p9attr)  		err = PTR_ERR(req);  		goto error;  	} -	P9_DPRINTK(P9_DEBUG_9P, "<<< RSETATTR fid %d\n", fid->fid); +	p9_debug(P9_DEBUG_9P, "<<< RSETATTR fid %d\n", fid->fid);  	p9_free_req(clnt, req);  error:  	return err; @@ -1563,7 +1900,7 @@ int p9_client_statfs(struct p9_fid *fid, struct p9_rstatfs *sb)  	err = 0;  	clnt = fid->clnt; -	P9_DPRINTK(P9_DEBUG_9P, ">>> TSTATFS fid %d\n", fid->fid); +	p9_debug(P9_DEBUG_9P, ">>> TSTATFS fid %d\n", fid->fid);  	req = p9_client_rpc(clnt, P9_TSTATFS, "d", fid->fid);  	if (IS_ERR(req)) { @@ -1575,12 +1912,12 @@ int p9_client_statfs(struct p9_fid *fid, struct p9_rstatfs *sb)  		&sb->bsize, &sb->blocks, &sb->bfree, &sb->bavail,  		&sb->files, &sb->ffree, &sb->fsid, &sb->namelen);  	if (err) { -		p9pdu_dump(1, req->rc); +		trace_9p_protocol_dump(clnt, req->rc);  		p9_free_req(clnt, req);  		goto error;  	} -	P9_DPRINTK(P9_DEBUG_9P, "<<< RSTATFS fid %d type 0x%lx bsize %ld " +	p9_debug(P9_DEBUG_9P, "<<< RSTATFS fid %d type 0x%lx bsize %ld "  		"blocks %llu bfree %llu bavail %llu files %llu ffree %llu "  		"fsid %llu namelen %ld\n",  		fid->fid, (long unsigned int)sb->type, (long int)sb->bsize, @@ -1593,7 +1930,8 @@ error:  }  EXPORT_SYMBOL(p9_client_statfs); -int p9_client_rename(struct p9_fid *fid, struct p9_fid *newdirfid, char *name) +int p9_client_rename(struct p9_fid *fid, +		     struct p9_fid *newdirfid, const char *name)  {  	int err;  	struct p9_req_t *req; @@ -1602,7 +1940,7 @@ int p9_client_rename(struct p9_fid *fid, struct p9_fid *newdirfid, char *name)  	err = 0;  	clnt = fid->clnt; -	P9_DPRINTK(P9_DEBUG_9P, ">>> TRENAME fid %d newdirfid %d name %s\n", +	p9_debug(P9_DEBUG_9P, ">>> TRENAME fid %d newdirfid %d name %s\n",  			fid->fid, newdirfid->fid, name);  	req = p9_client_rpc(clnt, P9_TRENAME, "dds", fid->fid, @@ -1612,7 +1950,7 @@ int p9_client_rename(struct p9_fid *fid, struct p9_fid *newdirfid, char *name)  		goto error;  	} -	P9_DPRINTK(P9_DEBUG_9P, "<<< RRENAME fid %d\n", fid->fid); +	p9_debug(P9_DEBUG_9P, "<<< RRENAME fid %d\n", fid->fid);  	p9_free_req(clnt, req);  error: @@ -1620,6 +1958,36 @@ error:  }  EXPORT_SYMBOL(p9_client_rename); +int p9_client_renameat(struct p9_fid *olddirfid, const char *old_name, +		       struct p9_fid *newdirfid, const char *new_name) +{ +	int err; +	struct p9_req_t *req; +	struct p9_client *clnt; + +	err = 0; +	clnt = olddirfid->clnt; + +	p9_debug(P9_DEBUG_9P, ">>> TRENAMEAT olddirfid %d old name %s" +		   " newdirfid %d new name %s\n", olddirfid->fid, old_name, +		   newdirfid->fid, new_name); + +	req = p9_client_rpc(clnt, P9_TRENAMEAT, "dsds", olddirfid->fid, +			    old_name, newdirfid->fid, new_name); +	if (IS_ERR(req)) { +		err = PTR_ERR(req); +		goto error; +	} + +	p9_debug(P9_DEBUG_9P, "<<< RRENAMEAT newdirfid %d new name %s\n", +		   newdirfid->fid, new_name); + +	p9_free_req(clnt, req); +error: +	return err; +} +EXPORT_SYMBOL(p9_client_renameat); +  /*   * An xattrwalk without @attr_name gives the fid for the lisxattr namespace   */ @@ -1639,7 +2007,7 @@ struct p9_fid *p9_client_xattrwalk(struct p9_fid *file_fid,  		attr_fid = NULL;  		goto error;  	} -	P9_DPRINTK(P9_DEBUG_9P, +	p9_debug(P9_DEBUG_9P,  		">>> TXATTRWALK file_fid %d, attr_fid %d name %s\n",  		file_fid->fid, attr_fid->fid, attr_name); @@ -1651,12 +2019,12 @@ struct p9_fid *p9_client_xattrwalk(struct p9_fid *file_fid,  	}  	err = p9pdu_readf(req->rc, clnt->proto_version, "q", attr_size);  	if (err) { -		p9pdu_dump(1, req->rc); +		trace_9p_protocol_dump(clnt, req->rc);  		p9_free_req(clnt, req);  		goto clunk_fid;  	}  	p9_free_req(clnt, req); -	P9_DPRINTK(P9_DEBUG_9P, "<<<  RXATTRWALK fid %d size %llu\n", +	p9_debug(P9_DEBUG_9P, "<<<  RXATTRWALK fid %d size %llu\n",  		attr_fid->fid, *attr_size);  	return attr_fid;  clunk_fid: @@ -1677,7 +2045,7 @@ int p9_client_xattrcreate(struct p9_fid *fid, const char *name,  	struct p9_req_t *req;  	struct p9_client *clnt; -	P9_DPRINTK(P9_DEBUG_9P, +	p9_debug(P9_DEBUG_9P,  		">>> TXATTRCREATE fid %d name  %s size %lld flag %d\n",  		fid->fid, name, (long long)attr_size, flags);  	err = 0; @@ -1688,7 +2056,7 @@ int p9_client_xattrcreate(struct p9_fid *fid, const char *name,  		err = PTR_ERR(req);  		goto error;  	} -	P9_DPRINTK(P9_DEBUG_9P, "<<< RXATTRCREATE fid %d\n", fid->fid); +	p9_debug(P9_DEBUG_9P, "<<< RXATTRCREATE fid %d\n", fid->fid);  	p9_free_req(clnt, req);  error:  	return err; @@ -1697,17 +2065,16 @@ EXPORT_SYMBOL_GPL(p9_client_xattrcreate);  int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset)  { -	int err, rsize, total; +	int err, rsize, non_zc = 0;  	struct p9_client *clnt;  	struct p9_req_t *req;  	char *dataptr; -	P9_DPRINTK(P9_DEBUG_9P, ">>> TREADDIR fid %d offset %llu count %d\n", -				fid->fid, (long long unsigned) offset, count); +	p9_debug(P9_DEBUG_9P, ">>> TREADDIR fid %d offset %llu count %d\n", +				fid->fid, (unsigned long long) offset, count);  	err = 0;  	clnt = fid->clnt; -	total = 0;  	rsize = fid->iounit;  	if (!rsize || rsize > clnt->msize-P9_READDIRHDRSZ) @@ -1716,7 +2083,19 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset)  	if (count < rsize)  		rsize = count; -	req = p9_client_rpc(clnt, P9_TREADDIR, "dqd", fid->fid, offset, rsize); +	/* Don't bother zerocopy for small IO (< 1024) */ +	if (clnt->trans_mod->zc_request && rsize > 1024) { +		/* +		 * response header len is 11 +		 * PDU Header(7) + IO Size (4) +		 */ +		req = p9_client_zc_rpc(clnt, P9_TREADDIR, data, NULL, rsize, 0, +				       11, 1, "dqd", fid->fid, offset, rsize); +	} else { +		non_zc = 1; +		req = p9_client_rpc(clnt, P9_TREADDIR, "dqd", fid->fid, +				    offset, rsize); +	}  	if (IS_ERR(req)) {  		err = PTR_ERR(req);  		goto error; @@ -1724,13 +2103,13 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset)  	err = p9pdu_readf(req->rc, clnt->proto_version, "D", &count, &dataptr);  	if (err) { -		p9pdu_dump(1, req->rc); +		trace_9p_protocol_dump(clnt, req->rc);  		goto free_and_error;  	} -	P9_DPRINTK(P9_DEBUG_9P, "<<< RREADDIR count %d\n", count); +	p9_debug(P9_DEBUG_9P, "<<< RREADDIR count %d\n", count); -	if (data) +	if (non_zc)  		memmove(data, dataptr, count);  	p9_free_req(clnt, req); @@ -1744,7 +2123,7 @@ error:  EXPORT_SYMBOL(p9_client_readdir);  int p9_client_mknod_dotl(struct p9_fid *fid, char *name, int mode, -			dev_t rdev, gid_t gid, struct p9_qid *qid) +			dev_t rdev, kgid_t gid, struct p9_qid *qid)  {  	int err;  	struct p9_client *clnt; @@ -1752,19 +2131,19 @@ int p9_client_mknod_dotl(struct p9_fid *fid, char *name, int mode,  	err = 0;  	clnt = fid->clnt; -	P9_DPRINTK(P9_DEBUG_9P, ">>> TMKNOD fid %d name %s mode %d major %d " +	p9_debug(P9_DEBUG_9P, ">>> TMKNOD fid %d name %s mode %d major %d "  		"minor %d\n", fid->fid, name, mode, MAJOR(rdev), MINOR(rdev)); -	req = p9_client_rpc(clnt, P9_TMKNOD, "dsdddd", fid->fid, name, mode, +	req = p9_client_rpc(clnt, P9_TMKNOD, "dsdddg", fid->fid, name, mode,  		MAJOR(rdev), MINOR(rdev), gid);  	if (IS_ERR(req))  		return PTR_ERR(req);  	err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid);  	if (err) { -		p9pdu_dump(1, req->rc); +		trace_9p_protocol_dump(clnt, req->rc);  		goto error;  	} -	P9_DPRINTK(P9_DEBUG_9P, "<<< RMKNOD qid %x.%llx.%x\n", qid->type, +	p9_debug(P9_DEBUG_9P, "<<< RMKNOD qid %x.%llx.%x\n", qid->type,  				(unsigned long long)qid->path, qid->version);  error: @@ -1775,7 +2154,7 @@ error:  EXPORT_SYMBOL(p9_client_mknod_dotl);  int p9_client_mkdir_dotl(struct p9_fid *fid, char *name, int mode, -				gid_t gid, struct p9_qid *qid) +				kgid_t gid, struct p9_qid *qid)  {  	int err;  	struct p9_client *clnt; @@ -1783,19 +2162,19 @@ int p9_client_mkdir_dotl(struct p9_fid *fid, char *name, int mode,  	err = 0;  	clnt = fid->clnt; -	P9_DPRINTK(P9_DEBUG_9P, ">>> TMKDIR fid %d name %s mode %d gid %d\n", -		 fid->fid, name, mode, gid); -	req = p9_client_rpc(clnt, P9_TMKDIR, "dsdd", fid->fid, name, mode, +	p9_debug(P9_DEBUG_9P, ">>> TMKDIR fid %d name %s mode %d gid %d\n", +		 fid->fid, name, mode, from_kgid(&init_user_ns, gid)); +	req = p9_client_rpc(clnt, P9_TMKDIR, "dsdg", fid->fid, name, mode,  		gid);  	if (IS_ERR(req))  		return PTR_ERR(req);  	err = p9pdu_readf(req->rc, clnt->proto_version, "Q", qid);  	if (err) { -		p9pdu_dump(1, req->rc); +		trace_9p_protocol_dump(clnt, req->rc);  		goto error;  	} -	P9_DPRINTK(P9_DEBUG_9P, "<<< RMKDIR qid %x.%llx.%x\n", qid->type, +	p9_debug(P9_DEBUG_9P, "<<< RMKDIR qid %x.%llx.%x\n", qid->type,  				(unsigned long long)qid->path, qid->version);  error: @@ -1813,7 +2192,7 @@ int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status)  	err = 0;  	clnt = fid->clnt; -	P9_DPRINTK(P9_DEBUG_9P, ">>> TLOCK fid %d type %i flags %d " +	p9_debug(P9_DEBUG_9P, ">>> TLOCK fid %d type %i flags %d "  			"start %lld length %lld proc_id %d client_id %s\n",  			fid->fid, flock->type, flock->flags, flock->start,  			flock->length, flock->proc_id, flock->client_id); @@ -1827,10 +2206,10 @@ int p9_client_lock_dotl(struct p9_fid *fid, struct p9_flock *flock, u8 *status)  	err = p9pdu_readf(req->rc, clnt->proto_version, "b", status);  	if (err) { -		p9pdu_dump(1, req->rc); +		trace_9p_protocol_dump(clnt, req->rc);  		goto error;  	} -	P9_DPRINTK(P9_DEBUG_9P, "<<< RLOCK status %i\n", *status); +	p9_debug(P9_DEBUG_9P, "<<< RLOCK status %i\n", *status);  error:  	p9_free_req(clnt, req);  	return err; @@ -1846,7 +2225,7 @@ int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *glock)  	err = 0;  	clnt = fid->clnt; -	P9_DPRINTK(P9_DEBUG_9P, ">>> TGETLOCK fid %d, type %i start %lld " +	p9_debug(P9_DEBUG_9P, ">>> TGETLOCK fid %d, type %i start %lld "  		"length %lld proc_id %d client_id %s\n", fid->fid, glock->type,  		glock->start, glock->length, glock->proc_id, glock->client_id); @@ -1860,10 +2239,10 @@ int p9_client_getlock_dotl(struct p9_fid *fid, struct p9_getlock *glock)  			&glock->start, &glock->length, &glock->proc_id,  			&glock->client_id);  	if (err) { -		p9pdu_dump(1, req->rc); +		trace_9p_protocol_dump(clnt, req->rc);  		goto error;  	} -	P9_DPRINTK(P9_DEBUG_9P, "<<< RGETLOCK type %i start %lld length %lld " +	p9_debug(P9_DEBUG_9P, "<<< RGETLOCK type %i start %lld length %lld "  		"proc_id %d client_id %s\n", glock->type, glock->start,  		glock->length, glock->proc_id, glock->client_id);  error: @@ -1880,7 +2259,7 @@ int p9_client_readlink(struct p9_fid *fid, char **target)  	err = 0;  	clnt = fid->clnt; -	P9_DPRINTK(P9_DEBUG_9P, ">>> TREADLINK fid %d\n", fid->fid); +	p9_debug(P9_DEBUG_9P, ">>> TREADLINK fid %d\n", fid->fid);  	req = p9_client_rpc(clnt, P9_TREADLINK, "d", fid->fid);  	if (IS_ERR(req)) @@ -1888,10 +2267,10 @@ int p9_client_readlink(struct p9_fid *fid, char **target)  	err = p9pdu_readf(req->rc, clnt->proto_version, "s", target);  	if (err) { -		p9pdu_dump(1, req->rc); +		trace_9p_protocol_dump(clnt, req->rc);  		goto error;  	} -	P9_DPRINTK(P9_DEBUG_9P, "<<< RREADLINK target %s\n", *target); +	p9_debug(P9_DEBUG_9P, "<<< RREADLINK target %s\n", *target);  error:  	p9_free_req(clnt, req);  	return err; diff --git a/net/9p/error.c b/net/9p/error.c index 52518512a93..126fd0dceea 100644 --- a/net/9p/error.c +++ b/net/9p/error.c @@ -27,6 +27,8 @@   *   */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +  #include <linux/module.h>  #include <linux/list.h>  #include <linux/jhash.h> @@ -219,15 +221,13 @@ EXPORT_SYMBOL(p9_error_init);  int p9_errstr2errno(char *errstr, int len)  {  	int errno; -	struct hlist_node *p;  	struct errormap *c;  	int bucket;  	errno = 0; -	p = NULL;  	c = NULL;  	bucket = jhash(errstr, len, 0) % ERRHASHSZ; -	hlist_for_each_entry(c, p, &hash_errmap[bucket], list) { +	hlist_for_each_entry(c, &hash_errmap[bucket], list) {  		if (c->namelen == len && !memcmp(c->name, errstr, len)) {  			errno = c->val;  			break; @@ -237,8 +237,8 @@ int p9_errstr2errno(char *errstr, int len)  	if (errno == 0) {  		/* TODO: if error isn't found, add it dynamically */  		errstr[len] = 0; -		printk(KERN_ERR "%s: server reported unknown error %s\n", -			__func__, errstr); +		pr_err("%s: server reported unknown error %s\n", +		       __func__, errstr);  		errno = ESERVERFAULT;  	} diff --git a/net/9p/mod.c b/net/9p/mod.c index cf8a4128cd5..6ab36aea772 100644 --- a/net/9p/mod.c +++ b/net/9p/mod.c @@ -24,7 +24,11 @@   *   */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +  #include <linux/module.h> +#include <linux/errno.h> +#include <linux/sched.h>  #include <linux/moduleparam.h>  #include <net/9p/9p.h>  #include <linux/fs.h> @@ -39,6 +43,29 @@ unsigned int p9_debug_level = 0;	/* feature-rific global debug level  */  EXPORT_SYMBOL(p9_debug_level);  module_param_named(debug, p9_debug_level, uint, 0);  MODULE_PARM_DESC(debug, "9P debugging level"); + +void _p9_debug(enum p9_debug_flags level, const char *func, +		const char *fmt, ...) +{ +	struct va_format vaf; +	va_list args; + +	if ((p9_debug_level & level) != level) +		return; + +	va_start(args, fmt); + +	vaf.fmt = fmt; +	vaf.va = &args; + +	if (level == P9_DEBUG_9P) +		pr_notice("(%8.8d) %pV", task_pid_nr(current), &vaf); +	else +		pr_notice("-- %s (%d): %pV", func, task_pid_nr(current), &vaf); + +	va_end(args); +} +EXPORT_SYMBOL(_p9_debug);  #endif  /* @@ -80,14 +107,14 @@ EXPORT_SYMBOL(v9fs_unregister_trans);   * @name: string identifying transport   *   */ -struct p9_trans_module *v9fs_get_trans_by_name(const substring_t *name) +struct p9_trans_module *v9fs_get_trans_by_name(char *s)  {  	struct p9_trans_module *t, *found = NULL;  	spin_lock(&v9fs_trans_lock);  	list_for_each_entry(t, &v9fs_trans_list, list) -		if (strncmp(t->name, name->from, name->to-name->from) == 0 && +		if (strcmp(t->name, s) == 0 &&  		    try_module_get(t->owner)) {  			found = t;  			break; @@ -139,7 +166,7 @@ void v9fs_put_trans(struct p9_trans_module *m)  }  /** - * v9fs_init - Initialize module + * init_p9 - Initialize module   *   */  static int __init init_p9(void) @@ -147,20 +174,20 @@ static int __init init_p9(void)  	int ret = 0;  	p9_error_init(); -	printk(KERN_INFO "Installing 9P2000 support\n"); +	pr_info("Installing 9P2000 support\n");  	p9_trans_fd_init();  	return ret;  }  /** - * v9fs_init - shutdown module + * exit_p9 - shutdown module   *   */  static void __exit exit_p9(void)  { -	printk(KERN_INFO "Unloading 9P2000 support\n"); +	pr_info("Unloading 9P2000 support\n");  	p9_trans_fd_exit();  } diff --git a/net/9p/protocol.c b/net/9p/protocol.c index 798beac7f10..ab9127ec5b7 100644 --- a/net/9p/protocol.c +++ b/net/9p/protocol.c @@ -37,46 +37,11 @@  #include <net/9p/client.h>  #include "protocol.h" +#include <trace/events/9p.h> +  static int  p9pdu_writef(struct p9_fcall *pdu, int proto_version, const char *fmt, ...); -#ifdef CONFIG_NET_9P_DEBUG -void -p9pdu_dump(int way, struct p9_fcall *pdu) -{ -	int i, n; -	u8 *data = pdu->sdata; -	int datalen = pdu->size; -	char buf[255]; -	int buflen = 255; - -	i = n = 0; -	if (datalen > (buflen-16)) -		datalen = buflen-16; -	while (i < datalen) { -		n += scnprintf(buf + n, buflen - n, "%02x ", data[i]); -		if (i%4 == 3) -			n += scnprintf(buf + n, buflen - n, " "); -		if (i%32 == 31) -			n += scnprintf(buf + n, buflen - n, "\n"); - -		i++; -	} -	n += scnprintf(buf + n, buflen - n, "\n"); - -	if (way) -		P9_DPRINTK(P9_DEBUG_PKT, "[[[(%d) %s\n", datalen, buf); -	else -		P9_DPRINTK(P9_DEBUG_PKT, "]]](%d) %s\n", datalen, buf); -} -#else -void -p9pdu_dump(int way, struct p9_fcall *pdu) -{ -} -#endif -EXPORT_SYMBOL(p9pdu_dump); -  void p9stat_free(struct p9_wstat *stbuf)  {  	kfree(stbuf->name); @@ -87,7 +52,7 @@ void p9stat_free(struct p9_wstat *stbuf)  }  EXPORT_SYMBOL(p9stat_free); -static size_t pdu_read(struct p9_fcall *pdu, void *data, size_t size) +size_t pdu_read(struct p9_fcall *pdu, void *data, size_t size)  {  	size_t len = min(pdu->size - pdu->offset, size);  	memcpy(data, &pdu->sdata[pdu->offset], len); @@ -120,6 +85,8 @@ pdu_write_u(struct p9_fcall *pdu, const char __user *udata, size_t size)  	d - int32_t  	q - int64_t  	s - string +	u - numeric uid +	g - numeric gid  	S - stat  	Q - qid  	D - data blob (int32_t size followed by void *, results are not freed) @@ -178,29 +145,46 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,  			break;  		case 's':{  				char **sptr = va_arg(ap, char **); -				int16_t len; -				int size; +				uint16_t len;  				errcode = p9pdu_readf(pdu, proto_version,  								"w", &len);  				if (errcode)  					break; -				size = max_t(int16_t, len, 0); - -				*sptr = kmalloc(size + 1, GFP_KERNEL); +				*sptr = kmalloc(len + 1, GFP_NOFS);  				if (*sptr == NULL) {  					errcode = -EFAULT;  					break;  				} -				if (pdu_read(pdu, *sptr, size)) { +				if (pdu_read(pdu, *sptr, len)) {  					errcode = -EFAULT;  					kfree(*sptr);  					*sptr = NULL;  				} else -					(*sptr)[size] = 0; +					(*sptr)[len] = 0;  			}  			break; +		case 'u': { +				kuid_t *uid = va_arg(ap, kuid_t *); +				__le32 le_val; +				if (pdu_read(pdu, &le_val, sizeof(le_val))) { +					errcode = -EFAULT; +					break; +				} +				*uid = make_kuid(&init_user_ns, +						 le32_to_cpu(le_val)); +			} break; +		case 'g': { +				kgid_t *gid = va_arg(ap, kgid_t *); +				__le32 le_val; +				if (pdu_read(pdu, &le_val, sizeof(le_val))) { +					errcode = -EFAULT; +					break; +				} +				*gid = make_kgid(&init_user_ns, +						 le32_to_cpu(le_val)); +			} break;  		case 'Q':{  				struct p9_qid *qid =  				    va_arg(ap, struct p9_qid *); @@ -215,11 +199,12 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,  				    va_arg(ap, struct p9_wstat *);  				memset(stbuf, 0, sizeof(struct p9_wstat)); -				stbuf->n_uid = stbuf->n_gid = stbuf->n_muid = -									-1; +				stbuf->n_uid = stbuf->n_muid = INVALID_UID; +				stbuf->n_gid = INVALID_GID; +  				errcode =  				    p9pdu_readf(pdu, proto_version, -						"wwdQdddqssss?sddd", +						"wwdQdddqssss?sugu",  						&stbuf->size, &stbuf->type,  						&stbuf->dev, &stbuf->qid,  						&stbuf->mode, &stbuf->atime, @@ -234,21 +219,21 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,  			}  			break;  		case 'D':{ -				int32_t *count = va_arg(ap, int32_t *); +				uint32_t *count = va_arg(ap, uint32_t *);  				void **data = va_arg(ap, void **);  				errcode =  				    p9pdu_readf(pdu, proto_version, "d", count);  				if (!errcode) {  					*count = -					    min_t(int32_t, *count, +					    min_t(uint32_t, *count,  						  pdu->size - pdu->offset);  					*data = &pdu->sdata[pdu->offset];  				}  			}  			break;  		case 'T':{ -				int16_t *nwname = va_arg(ap, int16_t *); +				uint16_t *nwname = va_arg(ap, uint16_t *);  				char ***wnames = va_arg(ap, char ***);  				errcode = p9pdu_readf(pdu, proto_version, @@ -256,7 +241,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,  				if (!errcode) {  					*wnames =  					    kmalloc(sizeof(char *) * *nwname, -						    GFP_KERNEL); +						    GFP_NOFS);  					if (!*wnames)  						errcode = -ENOMEM;  				} @@ -300,7 +285,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,  					*wqids =  					    kmalloc(*nwqid *  						    sizeof(struct p9_qid), -						    GFP_KERNEL); +						    GFP_NOFS);  					if (*wqids == NULL)  						errcode = -ENOMEM;  				} @@ -332,7 +317,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,  				memset(stbuf, 0, sizeof(struct p9_stat_dotl));  				errcode =  				    p9pdu_readf(pdu, proto_version, -					"qQdddqqqqqqqqqqqqqqq", +					"qQdugqqqqqqqqqqqqqqq",  					&stbuf->st_result_mask,  					&stbuf->qid,  					&stbuf->st_mode, @@ -404,9 +389,10 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,  			break;  		case 's':{  				const char *sptr = va_arg(ap, const char *); -				int16_t len = 0; +				uint16_t len = 0;  				if (sptr) -					len = min_t(int16_t, strlen(sptr), USHRT_MAX); +					len = min_t(size_t, strlen(sptr), +								USHRT_MAX);  				errcode = p9pdu_writef(pdu, proto_version,  								"w", len); @@ -414,6 +400,20 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,  					errcode = -EFAULT;  			}  			break; +		case 'u': { +				kuid_t uid = va_arg(ap, kuid_t); +				__le32 val = cpu_to_le32( +						from_kuid(&init_user_ns, uid)); +				if (pdu_write(pdu, &val, sizeof(val))) +					errcode = -EFAULT; +			} break; +		case 'g': { +				kgid_t gid = va_arg(ap, kgid_t); +				__le32 val = cpu_to_le32( +						from_kgid(&init_user_ns, gid)); +				if (pdu_write(pdu, &val, sizeof(val))) +					errcode = -EFAULT; +			} break;  		case 'Q':{  				const struct p9_qid *qid =  				    va_arg(ap, const struct p9_qid *); @@ -427,7 +427,7 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,  				    va_arg(ap, const struct p9_wstat *);  				errcode =  				    p9pdu_writef(pdu, proto_version, -						 "wwdQdddqssss?sddd", +						 "wwdQdddqssss?sugu",  						 stbuf->size, stbuf->type,  						 stbuf->dev, &stbuf->qid,  						 stbuf->mode, stbuf->atime, @@ -438,7 +438,7 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,  						 stbuf->n_gid, stbuf->n_muid);  			} break;  		case 'D':{ -				int32_t count = va_arg(ap, int32_t); +				uint32_t count = va_arg(ap, uint32_t);  				const void *data = va_arg(ap, const void *);  				errcode = p9pdu_writef(pdu, proto_version, "d", @@ -458,7 +458,7 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,  			}  			break;  		case 'T':{ -				int16_t nwname = va_arg(ap, int); +				uint16_t nwname = va_arg(ap, int);  				const char **wnames = va_arg(ap, const char **);  				errcode = p9pdu_writef(pdu, proto_version, "w", @@ -505,7 +505,7 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,  							struct p9_iattr_dotl *);  				errcode = p9pdu_writef(pdu, proto_version, -							"ddddqqqqq", +							"ddugqqqqq",  							p9attr->valid,  							p9attr->mode,  							p9attr->uid, @@ -559,7 +559,7 @@ p9pdu_writef(struct p9_fcall *pdu, int proto_version, const char *fmt, ...)  	return ret;  } -int p9stat_read(char *buf, int len, struct p9_wstat *st, int proto_version) +int p9stat_read(struct p9_client *clnt, char *buf, int len, struct p9_wstat *st)  {  	struct p9_fcall fake_pdu;  	int ret; @@ -569,10 +569,10 @@ int p9stat_read(char *buf, int len, struct p9_wstat *st, int proto_version)  	fake_pdu.sdata = buf;  	fake_pdu.offset = 0; -	ret = p9pdu_readf(&fake_pdu, proto_version, "S", st); +	ret = p9pdu_readf(&fake_pdu, clnt->proto_version, "S", st);  	if (ret) { -		P9_DPRINTK(P9_DEBUG_9P, "<<< p9stat_read failed: %d\n", ret); -		p9pdu_dump(1, &fake_pdu); +		p9_debug(P9_DEBUG_9P, "<<< p9stat_read failed: %d\n", ret); +		trace_9p_protocol_dump(clnt, &fake_pdu);  	}  	return ret; @@ -581,10 +581,11 @@ EXPORT_SYMBOL(p9stat_read);  int p9pdu_prepare(struct p9_fcall *pdu, int16_t tag, int8_t type)  { +	pdu->id = type;  	return p9pdu_writef(pdu, 0, "dbw", 0, type, tag);  } -int p9pdu_finalize(struct p9_fcall *pdu) +int p9pdu_finalize(struct p9_client *clnt, struct p9_fcall *pdu)  {  	int size = pdu->size;  	int err; @@ -593,13 +594,9 @@ int p9pdu_finalize(struct p9_fcall *pdu)  	err = p9pdu_writef(pdu, 0, "d", size);  	pdu->size = size; -#ifdef CONFIG_NET_9P_DEBUG -	if ((p9_debug_level & P9_DEBUG_PKT) == P9_DEBUG_PKT) -		p9pdu_dump(0, pdu); -#endif - -	P9_DPRINTK(P9_DEBUG_9P, ">>> size=%d type: %d tag: %d\n", pdu->size, -							pdu->id, pdu->tag); +	trace_9p_protocol_dump(clnt, pdu); +	p9_debug(P9_DEBUG_9P, ">>> size=%d type: %d tag: %d\n", +		 pdu->size, pdu->id, pdu->tag);  	return err;  } @@ -610,8 +607,8 @@ void p9pdu_reset(struct p9_fcall *pdu)  	pdu->size = 0;  } -int p9dirent_read(char *buf, int len, struct p9_dirent *dirent, -						int proto_version) +int p9dirent_read(struct p9_client *clnt, char *buf, int len, +		  struct p9_dirent *dirent)  {  	struct p9_fcall fake_pdu;  	int ret; @@ -622,15 +619,16 @@ int p9dirent_read(char *buf, int len, struct p9_dirent *dirent,  	fake_pdu.sdata = buf;  	fake_pdu.offset = 0; -	ret = p9pdu_readf(&fake_pdu, proto_version, "Qqbs", &dirent->qid, -			&dirent->d_off, &dirent->d_type, &nameptr); +	ret = p9pdu_readf(&fake_pdu, clnt->proto_version, "Qqbs", &dirent->qid, +			  &dirent->d_off, &dirent->d_type, &nameptr);  	if (ret) { -		P9_DPRINTK(P9_DEBUG_9P, "<<< p9dirent_read failed: %d\n", ret); -		p9pdu_dump(1, &fake_pdu); +		p9_debug(P9_DEBUG_9P, "<<< p9dirent_read failed: %d\n", ret); +		trace_9p_protocol_dump(clnt, &fake_pdu);  		goto out;  	}  	strcpy(dirent->d_name, nameptr); +	kfree(nameptr);  out:  	return fake_pdu.offset; diff --git a/net/9p/protocol.h b/net/9p/protocol.h index 2431c0f38d5..2cc525fa49f 100644 --- a/net/9p/protocol.h +++ b/net/9p/protocol.h @@ -29,6 +29,6 @@ int p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,  								va_list ap);  int p9pdu_readf(struct p9_fcall *pdu, int proto_version, const char *fmt, ...);  int p9pdu_prepare(struct p9_fcall *pdu, int16_t tag, int8_t type); -int p9pdu_finalize(struct p9_fcall *pdu); -void p9pdu_dump(int, struct p9_fcall *); +int p9pdu_finalize(struct p9_client *clnt, struct p9_fcall *pdu);  void p9pdu_reset(struct p9_fcall *pdu); +size_t pdu_read(struct p9_fcall *pdu, void *data, size_t size); diff --git a/net/9p/trans_common.c b/net/9p/trans_common.c new file mode 100644 index 00000000000..2ee3879161b --- /dev/null +++ b/net/9p/trans_common.c @@ -0,0 +1,69 @@ +/* + * Copyright IBM Corporation, 2010 + * Author Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2.1 of the GNU Lesser General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + */ + +#include <linux/slab.h> +#include <linux/module.h> +#include <net/9p/9p.h> +#include <net/9p/client.h> +#include <linux/scatterlist.h> +#include "trans_common.h" + +/** + *  p9_release_req_pages - Release pages after the transaction. + */ +void p9_release_pages(struct page **pages, int nr_pages) +{ +	int i; + +	for (i = 0; i < nr_pages; i++) +		if (pages[i]) +			put_page(pages[i]); +} +EXPORT_SYMBOL(p9_release_pages); + +/** + * p9_nr_pages - Return number of pages needed to accommodate the payload. + */ +int p9_nr_pages(char *data, int len) +{ +	unsigned long start_page, end_page; +	start_page =  (unsigned long)data >> PAGE_SHIFT; +	end_page = ((unsigned long)data + len + PAGE_SIZE - 1) >> PAGE_SHIFT; +	return end_page - start_page; +} +EXPORT_SYMBOL(p9_nr_pages); + +/** + * payload_gup - Translates user buffer into kernel pages and + * pins them either for read/write through get_user_pages_fast(). + * @req: Request to be sent to server. + * @pdata_off: data offset into the first page after translation (gup). + * @pdata_len: Total length of the IO. gup may not return requested # of pages. + * @nr_pages: number of pages to accommodate the payload + * @rw: Indicates if the pages are for read or write. + */ + +int p9_payload_gup(char *data, int *nr_pages, struct page **pages, int write) +{ +	int nr_mapped_pages; + +	nr_mapped_pages = get_user_pages_fast((unsigned long)data, +					      *nr_pages, write, pages); +	if (nr_mapped_pages <= 0) +		return nr_mapped_pages; + +	*nr_pages = nr_mapped_pages; +	return 0; +} +EXPORT_SYMBOL(p9_payload_gup); diff --git a/net/9p/trans_common.h b/net/9p/trans_common.h new file mode 100644 index 00000000000..173bb550a9e --- /dev/null +++ b/net/9p/trans_common.h @@ -0,0 +1,17 @@ +/* + * Copyright IBM Corporation, 2010 + * Author Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2.1 of the GNU Lesser General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + */ + +void p9_release_pages(struct page **, int); +int p9_payload_gup(char *, int *, struct page **, int); +int p9_nr_pages(char *, int); diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index 078eb162d9b..80d08f6664c 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -25,6 +25,8 @@   *   */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +  #include <linux/in.h>  #include <linux/module.h>  #include <linux/net.h> @@ -61,20 +63,7 @@ struct p9_fd_opts {  	int rfd;  	int wfd;  	u16 port; -}; - -/** - * struct p9_trans_fd - transport state - * @rd: reference to file to read from - * @wr: reference of file to write to - * @conn: connection state reference - * - */ - -struct p9_trans_fd { -	struct file *rd; -	struct file *wr; -	struct p9_conn *conn; +	int privport;  };  /* @@ -85,12 +74,15 @@ struct p9_trans_fd {  enum {  	/* Options that take integer arguments */  	Opt_port, Opt_rfdno, Opt_wfdno, Opt_err, +	/* Options that take no arguments */ +	Opt_privport,  };  static const match_table_t tokens = {  	{Opt_port, "port=%u"},  	{Opt_rfdno, "rfdno=%u"},  	{Opt_wfdno, "wfdno=%u"}, +	{Opt_privport, "privport"},  	{Opt_err, NULL},  }; @@ -153,10 +145,28 @@ struct p9_conn {  	unsigned long wsched;  }; +/** + * struct p9_trans_fd - transport state + * @rd: reference to file to read from + * @wr: reference of file to write to + * @conn: connection state reference + * + */ + +struct p9_trans_fd { +	struct file *rd; +	struct file *wr; +	struct p9_conn conn; +}; + +static void p9_poll_workfn(struct work_struct *work); +  static DEFINE_SPINLOCK(p9_poll_lock);  static LIST_HEAD(p9_poll_pending_list); -static struct workqueue_struct *p9_mux_wq; -static struct task_struct *p9_poll_task; +static DECLARE_WORK(p9_poll_work, p9_poll_workfn); + +static unsigned int p9_ipport_resv_min = P9_DEF_MIN_RESVPORT; +static unsigned int p9_ipport_resv_max = P9_DEF_MAX_RESVPORT;  static void p9_mux_poll_stop(struct p9_conn *m)  { @@ -190,7 +200,7 @@ static void p9_conn_cancel(struct p9_conn *m, int err)  	unsigned long flags;  	LIST_HEAD(cancel_list); -	P9_DPRINTK(P9_DEBUG_ERROR, "mux %p err %d\n", m, err); +	p9_debug(P9_DEBUG_ERROR, "mux %p err %d\n", m, err);  	spin_lock_irqsave(&m->client->lock, flags); @@ -202,23 +212,19 @@ static void p9_conn_cancel(struct p9_conn *m, int err)  	m->err = err;  	list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) { -		req->status = REQ_STATUS_ERROR; -		if (!req->t_err) -			req->t_err = err;  		list_move(&req->req_list, &cancel_list);  	}  	list_for_each_entry_safe(req, rtmp, &m->unsent_req_list, req_list) { -		req->status = REQ_STATUS_ERROR; -		if (!req->t_err) -			req->t_err = err;  		list_move(&req->req_list, &cancel_list);  	}  	spin_unlock_irqrestore(&m->client->lock, flags);  	list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) { -		P9_DPRINTK(P9_DEBUG_ERROR, "call back req %p\n", req); +		p9_debug(P9_DEBUG_ERROR, "call back req %p\n", req);  		list_del(&req->req_list); -		p9_client_cb(m->client, req); +		if (!req->t_err) +			req->t_err = err; +		p9_client_cb(m->client, req, REQ_STATUS_ERROR);  	}  } @@ -234,10 +240,10 @@ p9_fd_poll(struct p9_client *client, struct poll_table_struct *pt)  	if (!ts)  		return -EREMOTEIO; -	if (!ts->rd->f_op || !ts->rd->f_op->poll) +	if (!ts->rd->f_op->poll)  		return -EIO; -	if (!ts->wr->f_op || !ts->wr->f_op->poll) +	if (!ts->wr->f_op->poll)  		return -EIO;  	ret = ts->rd->f_op->poll(ts->rd, pt); @@ -274,7 +280,7 @@ static int p9_fd_read(struct p9_client *client, void *v, int len)  		return -EREMOTEIO;  	if (!(ts->rd->f_flags & O_NONBLOCK)) -		P9_DPRINTK(P9_DEBUG_ERROR, "blocking read ...\n"); +		p9_debug(P9_DEBUG_ERROR, "blocking read ...\n");  	ret = kernel_read(ts->rd, ts->rd->f_pos, v, len);  	if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN) @@ -292,13 +298,14 @@ static void p9_read_work(struct work_struct *work)  {  	int n, err;  	struct p9_conn *m; +	int status = REQ_STATUS_ERROR;  	m = container_of(work, struct p9_conn, rq);  	if (m->err < 0)  		return; -	P9_DPRINTK(P9_DEBUG_TRANS, "start mux %p pos %d\n", m, m->rpos); +	p9_debug(P9_DEBUG_TRANS, "start mux %p pos %d\n", m, m->rpos);  	if (!m->rbuf) {  		m->rbuf = m->tmp_buf; @@ -307,14 +314,13 @@ static void p9_read_work(struct work_struct *work)  	}  	clear_bit(Rpending, &m->wsched); -	P9_DPRINTK(P9_DEBUG_TRANS, "read mux %p pos %d size: %d = %d\n", m, -					m->rpos, m->rsize, m->rsize-m->rpos); +	p9_debug(P9_DEBUG_TRANS, "read mux %p pos %d size: %d = %d\n", +		 m, m->rpos, m->rsize, m->rsize-m->rpos);  	err = p9_fd_read(m->client, m->rbuf + m->rpos,  						m->rsize - m->rpos); -	P9_DPRINTK(P9_DEBUG_TRANS, "mux %p got %d bytes\n", m, err); +	p9_debug(P9_DEBUG_TRANS, "mux %p got %d bytes\n", m, err);  	if (err == -EAGAIN) { -		clear_bit(Rworksched, &m->wsched); -		return; +		goto end_clear;  	}  	if (err <= 0) @@ -324,32 +330,31 @@ static void p9_read_work(struct work_struct *work)  	if ((!m->req) && (m->rpos == m->rsize)) { /* header read in */  		u16 tag; -		P9_DPRINTK(P9_DEBUG_TRANS, "got new header\n"); +		p9_debug(P9_DEBUG_TRANS, "got new header\n");  		n = le32_to_cpu(*(__le32 *) m->rbuf); /* read packet size */  		if (n >= m->client->msize) { -			P9_DPRINTK(P9_DEBUG_ERROR, -				"requested packet size too big: %d\n", n); +			p9_debug(P9_DEBUG_ERROR, +				 "requested packet size too big: %d\n", n);  			err = -EIO;  			goto error;  		}  		tag = le16_to_cpu(*(__le16 *) (m->rbuf+5)); /* read tag */ -		P9_DPRINTK(P9_DEBUG_TRANS, -			"mux %p pkt: size: %d bytes tag: %d\n", m, n, tag); +		p9_debug(P9_DEBUG_TRANS, +			 "mux %p pkt: size: %d bytes tag: %d\n", m, n, tag);  		m->req = p9_tag_lookup(m->client, tag); -		if (!m->req || (m->req->status != REQ_STATUS_SENT && -					m->req->status != REQ_STATUS_FLSH)) { -			P9_DPRINTK(P9_DEBUG_ERROR, "Unexpected packet tag %d\n", -								 tag); +		if (!m->req || (m->req->status != REQ_STATUS_SENT)) { +			p9_debug(P9_DEBUG_ERROR, "Unexpected packet tag %d\n", +				 tag);  			err = -EIO;  			goto error;  		}  		if (m->req->rc == NULL) {  			m->req->rc = kmalloc(sizeof(struct p9_fcall) + -						m->client->msize, GFP_KERNEL); +						m->client->msize, GFP_NOFS);  			if (!m->req->rc) {  				m->req = NULL;  				err = -ENOMEM; @@ -363,32 +368,33 @@ static void p9_read_work(struct work_struct *work)  	/* not an else because some packets (like clunk) have no payload */  	if ((m->req) && (m->rpos == m->rsize)) { /* packet is read in */ -		P9_DPRINTK(P9_DEBUG_TRANS, "got new packet\n"); +		p9_debug(P9_DEBUG_TRANS, "got new packet\n");  		spin_lock(&m->client->lock);  		if (m->req->status != REQ_STATUS_ERROR) -			m->req->status = REQ_STATUS_RCVD; +			status = REQ_STATUS_RCVD;  		list_del(&m->req->req_list);  		spin_unlock(&m->client->lock); -		p9_client_cb(m->client, m->req); +		p9_client_cb(m->client, m->req, status);  		m->rbuf = NULL;  		m->rpos = 0;  		m->rsize = 0;  		m->req = NULL;  	} +end_clear: +	clear_bit(Rworksched, &m->wsched); +  	if (!list_empty(&m->req_list)) {  		if (test_and_clear_bit(Rpending, &m->wsched))  			n = POLLIN;  		else  			n = p9_fd_poll(m->client, NULL); -		if (n & POLLIN) { -			P9_DPRINTK(P9_DEBUG_TRANS, "sched read work %p\n", m); -			queue_work(p9_mux_wq, &m->rq); -		} else -			clear_bit(Rworksched, &m->wsched); -	} else -		clear_bit(Rworksched, &m->wsched); +		if ((n & POLLIN) && !test_and_set_bit(Rworksched, &m->wsched)) { +			p9_debug(P9_DEBUG_TRANS, "sched read work %p\n", m); +			schedule_work(&m->rq); +		} +	}  	return;  error: @@ -417,7 +423,7 @@ static int p9_fd_write(struct p9_client *client, void *v, int len)  		return -EREMOTEIO;  	if (!(ts->wr->f_flags & O_NONBLOCK)) -		P9_DPRINTK(P9_DEBUG_ERROR, "blocking write ...\n"); +		p9_debug(P9_DEBUG_ERROR, "blocking write ...\n");  	oldfs = get_fs();  	set_fs(get_ds()); @@ -450,16 +456,17 @@ static void p9_write_work(struct work_struct *work)  	}  	if (!m->wsize) { +		spin_lock(&m->client->lock);  		if (list_empty(&m->unsent_req_list)) {  			clear_bit(Wworksched, &m->wsched); +			spin_unlock(&m->client->lock);  			return;  		} -		spin_lock(&m->client->lock);  		req = list_entry(m->unsent_req_list.next, struct p9_req_t,  			       req_list);  		req->status = REQ_STATUS_SENT; -		P9_DPRINTK(P9_DEBUG_TRANS, "move req %p\n", req); +		p9_debug(P9_DEBUG_TRANS, "move req %p\n", req);  		list_move_tail(&req->req_list, &m->req_list);  		m->wbuf = req->tc->sdata; @@ -468,15 +475,14 @@ static void p9_write_work(struct work_struct *work)  		spin_unlock(&m->client->lock);  	} -	P9_DPRINTK(P9_DEBUG_TRANS, "mux %p pos %d size %d\n", m, m->wpos, -								m->wsize); +	p9_debug(P9_DEBUG_TRANS, "mux %p pos %d size %d\n", +		 m, m->wpos, m->wsize);  	clear_bit(Wpending, &m->wsched);  	err = p9_fd_write(m->client, m->wbuf + m->wpos, m->wsize - m->wpos); -	P9_DPRINTK(P9_DEBUG_TRANS, "mux %p sent %d bytes\n", m, err); -	if (err == -EAGAIN) { -		clear_bit(Wworksched, &m->wsched); -		return; -	} +	p9_debug(P9_DEBUG_TRANS, "mux %p sent %d bytes\n", m, err); +	if (err == -EAGAIN) +		goto end_clear; +  	if (err < 0)  		goto error; @@ -489,19 +495,21 @@ static void p9_write_work(struct work_struct *work)  	if (m->wpos == m->wsize)  		m->wpos = m->wsize = 0; -	if (m->wsize == 0 && !list_empty(&m->unsent_req_list)) { +end_clear: +	clear_bit(Wworksched, &m->wsched); + +	if (m->wsize || !list_empty(&m->unsent_req_list)) {  		if (test_and_clear_bit(Wpending, &m->wsched))  			n = POLLOUT;  		else  			n = p9_fd_poll(m->client, NULL); -		if (n & POLLOUT) { -			P9_DPRINTK(P9_DEBUG_TRANS, "sched write work %p\n", m); -			queue_work(p9_mux_wq, &m->wq); -		} else -			clear_bit(Wworksched, &m->wsched); -	} else -		clear_bit(Wworksched, &m->wsched); +		if ((n & POLLOUT) && +		   !test_and_set_bit(Wworksched, &m->wsched)) { +			p9_debug(P9_DEBUG_TRANS, "sched write work %p\n", m); +			schedule_work(&m->wq); +		} +	}  	return; @@ -510,21 +518,20 @@ error:  	clear_bit(Wworksched, &m->wsched);  } -static int p9_pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key) +static int p9_pollwake(wait_queue_t *wait, unsigned int mode, int sync, void *key)  {  	struct p9_poll_wait *pwait =  		container_of(wait, struct p9_poll_wait, wait);  	struct p9_conn *m = pwait->conn;  	unsigned long flags; -	DECLARE_WAITQUEUE(dummy_wait, p9_poll_task);  	spin_lock_irqsave(&p9_poll_lock, flags);  	if (list_empty(&m->poll_pending_link))  		list_add_tail(&m->poll_pending_link, &p9_poll_pending_list);  	spin_unlock_irqrestore(&p9_poll_lock, flags); -	/* perform the default wake up operation */ -	return default_wake_function(&dummy_wait, mode, sync, key); +	schedule_work(&p9_poll_work); +	return 1;  }  /** @@ -551,7 +558,7 @@ p9_pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p)  	}  	if (!pwait) { -		P9_DPRINTK(P9_DEBUG_ERROR, "not enough wait_address slots\n"); +		p9_debug(P9_DEBUG_ERROR, "not enough wait_address slots\n");  		return;  	} @@ -562,22 +569,19 @@ p9_pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p)  }  /** - * p9_conn_create - allocate and initialize the per-session mux data + * p9_conn_create - initialize the per-session mux data   * @client: client instance   *   * Note: Creates the polling task if this is the first session.   */ -static struct p9_conn *p9_conn_create(struct p9_client *client) +static void p9_conn_create(struct p9_client *client)  {  	int n; -	struct p9_conn *m; +	struct p9_trans_fd *ts = client->trans; +	struct p9_conn *m = &ts->conn; -	P9_DPRINTK(P9_DEBUG_TRANS, "client %p msize %d\n", client, -								client->msize); -	m = kzalloc(sizeof(struct p9_conn), GFP_KERNEL); -	if (!m) -		return ERR_PTR(-ENOMEM); +	p9_debug(P9_DEBUG_TRANS, "client %p msize %d\n", client, client->msize);  	INIT_LIST_HEAD(&m->mux_list);  	m->client = client; @@ -591,16 +595,14 @@ static struct p9_conn *p9_conn_create(struct p9_client *client)  	n = p9_fd_poll(client, &m->pt);  	if (n & POLLIN) { -		P9_DPRINTK(P9_DEBUG_TRANS, "mux %p can read\n", m); +		p9_debug(P9_DEBUG_TRANS, "mux %p can read\n", m);  		set_bit(Rpending, &m->wsched);  	}  	if (n & POLLOUT) { -		P9_DPRINTK(P9_DEBUG_TRANS, "mux %p can write\n", m); +		p9_debug(P9_DEBUG_TRANS, "mux %p can write\n", m);  		set_bit(Wpending, &m->wsched);  	} - -	return m;  }  /** @@ -618,7 +620,7 @@ static void p9_poll_mux(struct p9_conn *m)  	n = p9_fd_poll(m->client, NULL);  	if (n < 0 || n & (POLLERR | POLLHUP | POLLNVAL)) { -		P9_DPRINTK(P9_DEBUG_TRANS, "error mux %p err %d\n", m, n); +		p9_debug(P9_DEBUG_TRANS, "error mux %p err %d\n", m, n);  		if (n >= 0)  			n = -ECONNRESET;  		p9_conn_cancel(m, n); @@ -626,20 +628,20 @@ static void p9_poll_mux(struct p9_conn *m)  	if (n & POLLIN) {  		set_bit(Rpending, &m->wsched); -		P9_DPRINTK(P9_DEBUG_TRANS, "mux %p can read\n", m); +		p9_debug(P9_DEBUG_TRANS, "mux %p can read\n", m);  		if (!test_and_set_bit(Rworksched, &m->wsched)) { -			P9_DPRINTK(P9_DEBUG_TRANS, "sched read work %p\n", m); -			queue_work(p9_mux_wq, &m->rq); +			p9_debug(P9_DEBUG_TRANS, "sched read work %p\n", m); +			schedule_work(&m->rq);  		}  	}  	if (n & POLLOUT) {  		set_bit(Wpending, &m->wsched); -		P9_DPRINTK(P9_DEBUG_TRANS, "mux %p can write\n", m); +		p9_debug(P9_DEBUG_TRANS, "mux %p can write\n", m);  		if ((m->wsize || !list_empty(&m->unsent_req_list)) &&  		    !test_and_set_bit(Wworksched, &m->wsched)) { -			P9_DPRINTK(P9_DEBUG_TRANS, "sched write work %p\n", m); -			queue_work(p9_mux_wq, &m->wq); +			p9_debug(P9_DEBUG_TRANS, "sched write work %p\n", m); +			schedule_work(&m->wq);  		}  	}  } @@ -659,10 +661,10 @@ static int p9_fd_request(struct p9_client *client, struct p9_req_t *req)  {  	int n;  	struct p9_trans_fd *ts = client->trans; -	struct p9_conn *m = ts->conn; +	struct p9_conn *m = &ts->conn; -	P9_DPRINTK(P9_DEBUG_TRANS, "mux %p task %p tcall %p id %d\n", m, -						current, req->tc, req->tc->id); +	p9_debug(P9_DEBUG_TRANS, "mux %p task %p tcall %p id %d\n", +		 m, current, req->tc, req->tc->id);  	if (m->err < 0)  		return m->err; @@ -677,7 +679,7 @@ static int p9_fd_request(struct p9_client *client, struct p9_req_t *req)  		n = p9_fd_poll(m->client, NULL);  	if (n & POLLOUT && !test_and_set_bit(Wworksched, &m->wsched)) -		queue_work(p9_mux_wq, &m->wq); +		schedule_work(&m->wq);  	return 0;  } @@ -686,7 +688,7 @@ static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req)  {  	int ret = 1; -	P9_DPRINTK(P9_DEBUG_TRANS, "client %p req %p\n", client, req); +	p9_debug(P9_DEBUG_TRANS, "client %p req %p\n", client, req);  	spin_lock(&client->lock); @@ -694,14 +696,26 @@ static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req)  		list_del(&req->req_list);  		req->status = REQ_STATUS_FLSHD;  		ret = 0; -	} else if (req->status == REQ_STATUS_SENT) -		req->status = REQ_STATUS_FLSH; - +	}  	spin_unlock(&client->lock);  	return ret;  } +static int p9_fd_cancelled(struct p9_client *client, struct p9_req_t *req) +{ +	p9_debug(P9_DEBUG_TRANS, "client %p req %p\n", client, req); + +	/* we haven't received a response for oldreq, +	 * remove it from the list. +	 */ +	spin_lock(&client->lock); +	list_del(&req->req_list); +	spin_unlock(&client->lock); + +	return 0; +} +  /**   * parse_opts - parse mount options into p9_fd_opts structure   * @params: options string passed from mount @@ -716,7 +730,6 @@ static int parse_opts(char *params, struct p9_fd_opts *opts)  	substring_t args[MAX_OPT_ARGS];  	int option;  	char *options, *tmp_options; -	int ret;  	opts->port = P9_PORT;  	opts->rfd = ~0; @@ -727,8 +740,8 @@ static int parse_opts(char *params, struct p9_fd_opts *opts)  	tmp_options = kstrdup(params, GFP_KERNEL);  	if (!tmp_options) { -		P9_DPRINTK(P9_DEBUG_ERROR, -				"failed to allocate copy of option string\n"); +		p9_debug(P9_DEBUG_ERROR, +			 "failed to allocate copy of option string\n");  		return -ENOMEM;  	}  	options = tmp_options; @@ -739,12 +752,11 @@ static int parse_opts(char *params, struct p9_fd_opts *opts)  		if (!*p)  			continue;  		token = match_token(p, tokens, args); -		if (token != Opt_err) { +		if ((token != Opt_err) && (token != Opt_privport)) {  			r = match_int(&args[0], &option);  			if (r < 0) { -				P9_DPRINTK(P9_DEBUG_ERROR, -				"integer field, but no integer?\n"); -				ret = r; +				p9_debug(P9_DEBUG_ERROR, +					 "integer field, but no integer?\n");  				continue;  			}  		} @@ -758,6 +770,9 @@ static int parse_opts(char *params, struct p9_fd_opts *opts)  		case Opt_wfdno:  			opts->wfd = option;  			break; +		case Opt_privport: +			opts->privport = 1; +			break;  		default:  			continue;  		} @@ -769,7 +784,7 @@ static int parse_opts(char *params, struct p9_fd_opts *opts)  static int p9_fd_open(struct p9_client *client, int rfd, int wfd)  { -	struct p9_trans_fd *ts = kmalloc(sizeof(struct p9_trans_fd), +	struct p9_trans_fd *ts = kzalloc(sizeof(struct p9_trans_fd),  					   GFP_KERNEL);  	if (!ts)  		return -ENOMEM; @@ -794,53 +809,43 @@ static int p9_fd_open(struct p9_client *client, int rfd, int wfd)  static int p9_socket_open(struct p9_client *client, struct socket *csocket)  {  	struct p9_trans_fd *p; -	int ret, fd; +	struct file *file; -	p = kmalloc(sizeof(struct p9_trans_fd), GFP_KERNEL); +	p = kzalloc(sizeof(struct p9_trans_fd), GFP_KERNEL);  	if (!p)  		return -ENOMEM;  	csocket->sk->sk_allocation = GFP_NOIO; -	fd = sock_map_fd(csocket, 0); -	if (fd < 0) { -		P9_EPRINTK(KERN_ERR, "p9_socket_open: failed to map fd\n"); +	file = sock_alloc_file(csocket, 0, NULL); +	if (IS_ERR(file)) { +		pr_err("%s (%d): failed to map fd\n", +		       __func__, task_pid_nr(current));  		sock_release(csocket);  		kfree(p); -		return fd; +		return PTR_ERR(file);  	} -	get_file(csocket->file); -	get_file(csocket->file); -	p->wr = p->rd = csocket->file; +	get_file(file); +	p->wr = p->rd = file;  	client->trans = p;  	client->status = Connected; -	sys_close(fd);	/* still racy */ -  	p->rd->f_flags |= O_NONBLOCK; -	p->conn = p9_conn_create(client); -	if (IS_ERR(p->conn)) { -		ret = PTR_ERR(p->conn); -		p->conn = NULL; -		kfree(p); -		sockfd_put(csocket); -		sockfd_put(csocket); -		return ret; -	} +	p9_conn_create(client);  	return 0;  }  /** - * p9_mux_destroy - cancels all pending requests and frees mux resources + * p9_mux_destroy - cancels all pending requests of mux   * @m: mux to destroy   *   */  static void p9_conn_destroy(struct p9_conn *m)  { -	P9_DPRINTK(P9_DEBUG_TRANS, "mux %p prev %p next %p\n", m, -		m->mux_list.prev, m->mux_list.next); +	p9_debug(P9_DEBUG_TRANS, "mux %p prev %p next %p\n", +		 m, m->mux_list.prev, m->mux_list.next);  	p9_mux_poll_stop(m);  	cancel_work_sync(&m->rq); @@ -849,7 +854,6 @@ static void p9_conn_destroy(struct p9_conn *m)  	p9_conn_cancel(m, -ECONNRESET);  	m->client = NULL; -	kfree(m);  }  /** @@ -871,7 +875,7 @@ static void p9_fd_close(struct p9_client *client)  	client->status = Disconnected; -	p9_conn_destroy(ts->conn); +	p9_conn_destroy(&ts->conn);  	if (ts->rd)  		fput(ts->rd); @@ -898,6 +902,24 @@ static inline int valid_ipaddr4(const char *buf)  	return 0;  } +static int p9_bind_privport(struct socket *sock) +{ +	struct sockaddr_in cl; +	int port, err = -EINVAL; + +	memset(&cl, 0, sizeof(cl)); +	cl.sin_family = AF_INET; +	cl.sin_addr.s_addr = INADDR_ANY; +	for (port = p9_ipport_resv_max; port >= p9_ipport_resv_min; port--) { +		cl.sin_port = htons((ushort)port); +		err = kernel_bind(sock, (struct sockaddr *)&cl, sizeof(cl)); +		if (err != -EADDRINUSE) +			break; +	} +	return err; +} + +  static int  p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args)  { @@ -918,20 +940,30 @@ p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args)  	sin_server.sin_family = AF_INET;  	sin_server.sin_addr.s_addr = in_aton(addr);  	sin_server.sin_port = htons(opts.port); -	err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &csocket); - +	err = __sock_create(read_pnet(¤t->nsproxy->net_ns), PF_INET, +			    SOCK_STREAM, IPPROTO_TCP, &csocket, 1);  	if (err) { -		P9_EPRINTK(KERN_ERR, "p9_trans_tcp: problem creating socket\n"); +		pr_err("%s (%d): problem creating socket\n", +		       __func__, task_pid_nr(current));  		return err;  	} +	if (opts.privport) { +		err = p9_bind_privport(csocket); +		if (err < 0) { +			pr_err("%s (%d): problem binding to privport\n", +			       __func__, task_pid_nr(current)); +			sock_release(csocket); +			return err; +		} +	} +  	err = csocket->ops->connect(csocket,  				    (struct sockaddr *)&sin_server,  				    sizeof(struct sockaddr_in), 0);  	if (err < 0) { -		P9_EPRINTK(KERN_ERR, -			"p9_trans_tcp: problem connecting socket to %s\n", -			addr); +		pr_err("%s (%d): problem connecting socket to %s\n", +		       __func__, task_pid_nr(current), addr);  		sock_release(csocket);  		return err;  	} @@ -949,24 +981,26 @@ p9_fd_create_unix(struct p9_client *client, const char *addr, char *args)  	csocket = NULL;  	if (strlen(addr) >= UNIX_PATH_MAX) { -		P9_EPRINTK(KERN_ERR, "p9_trans_unix: address too long: %s\n", -			addr); +		pr_err("%s (%d): address too long: %s\n", +		       __func__, task_pid_nr(current), addr);  		return -ENAMETOOLONG;  	}  	sun_server.sun_family = PF_UNIX;  	strcpy(sun_server.sun_path, addr); -	err = sock_create_kern(PF_UNIX, SOCK_STREAM, 0, &csocket); +	err = __sock_create(read_pnet(¤t->nsproxy->net_ns), PF_UNIX, +			    SOCK_STREAM, 0, &csocket, 1);  	if (err < 0) { -		P9_EPRINTK(KERN_ERR, "p9_trans_unix: problem creating socket\n"); +		pr_err("%s (%d): problem creating socket\n", +		       __func__, task_pid_nr(current)); +  		return err;  	}  	err = csocket->ops->connect(csocket, (struct sockaddr *)&sun_server,  			sizeof(struct sockaddr_un) - 1, 0);  	if (err < 0) { -		P9_EPRINTK(KERN_ERR, -			"p9_trans_unix: problem connecting socket: %s: %d\n", -			addr, err); +		pr_err("%s (%d): problem connecting socket: %s: %d\n", +		       __func__, task_pid_nr(current), addr, err);  		sock_release(csocket);  		return err;  	} @@ -984,7 +1018,7 @@ p9_fd_create(struct p9_client *client, const char *addr, char *args)  	parse_opts(args, &opts);  	if (opts.rfd == ~0 || opts.wfd == ~0) { -		printk(KERN_ERR "v9fs: Insufficient options for proto=fd\n"); +		pr_err("Insufficient options for proto=fd\n");  		return -ENOPROTOOPT;  	} @@ -993,14 +1027,7 @@ p9_fd_create(struct p9_client *client, const char *addr, char *args)  		return err;  	p = (struct p9_trans_fd *) client->trans; -	p->conn = p9_conn_create(client); -	if (IS_ERR(p->conn)) { -		err = PTR_ERR(p->conn); -		p->conn = NULL; -		fput(p->rd); -		fput(p->wr); -		return err; -	} +	p9_conn_create(client);  	return 0;  } @@ -1008,11 +1035,12 @@ p9_fd_create(struct p9_client *client, const char *addr, char *args)  static struct p9_trans_module p9_tcp_trans = {  	.name = "tcp",  	.maxsize = MAX_SOCK_BUF, -	.def = 1, +	.def = 0,  	.create = p9_fd_create_tcp,  	.close = p9_fd_close,  	.request = p9_fd_request,  	.cancel = p9_fd_cancel, +	.cancelled = p9_fd_cancelled,  	.owner = THIS_MODULE,  }; @@ -1024,6 +1052,7 @@ static struct p9_trans_module p9_unix_trans = {  	.close = p9_fd_close,  	.request = p9_fd_request,  	.cancel = p9_fd_cancel, +	.cancelled = p9_fd_cancelled,  	.owner = THIS_MODULE,  }; @@ -1035,6 +1064,7 @@ static struct p9_trans_module p9_fd_trans = {  	.close = p9_fd_close,  	.request = p9_fd_request,  	.cancel = p9_fd_cancel, +	.cancelled = p9_fd_cancelled,  	.owner = THIS_MODULE,  }; @@ -1047,12 +1077,12 @@ static struct p9_trans_module p9_fd_trans = {   *   */ -static int p9_poll_proc(void *a) +static void p9_poll_workfn(struct work_struct *work)  {  	unsigned long flags; -	P9_DPRINTK(P9_DEBUG_TRANS, "start %p\n", current); - repeat: +	p9_debug(P9_DEBUG_TRANS, "start %p\n", current); +  	spin_lock_irqsave(&p9_poll_lock, flags);  	while (!list_empty(&p9_poll_pending_list)) {  		struct p9_conn *conn = list_first_entry(&p9_poll_pending_list, @@ -1067,35 +1097,11 @@ static int p9_poll_proc(void *a)  	}  	spin_unlock_irqrestore(&p9_poll_lock, flags); -	set_current_state(TASK_INTERRUPTIBLE); -	if (list_empty(&p9_poll_pending_list)) { -		P9_DPRINTK(P9_DEBUG_TRANS, "sleeping...\n"); -		schedule(); -	} -	__set_current_state(TASK_RUNNING); - -	if (!kthread_should_stop()) -		goto repeat; - -	P9_DPRINTK(P9_DEBUG_TRANS, "finish\n"); -	return 0; +	p9_debug(P9_DEBUG_TRANS, "finish\n");  }  int p9_trans_fd_init(void)  { -	p9_mux_wq = create_workqueue("v9fs"); -	if (!p9_mux_wq) { -		printk(KERN_WARNING "v9fs: mux: creating workqueue failed\n"); -		return -ENOMEM; -	} - -	p9_poll_task = kthread_run(p9_poll_proc, NULL, "v9fs-poll"); -	if (IS_ERR(p9_poll_task)) { -		destroy_workqueue(p9_mux_wq); -		printk(KERN_WARNING "v9fs: mux: creating poll task failed\n"); -		return PTR_ERR(p9_poll_task); -	} -  	v9fs_register_trans(&p9_tcp_trans);  	v9fs_register_trans(&p9_unix_trans);  	v9fs_register_trans(&p9_fd_trans); @@ -1105,10 +1111,8 @@ int p9_trans_fd_init(void)  void p9_trans_fd_exit(void)  { -	kthread_stop(p9_poll_task); +	flush_work(&p9_poll_work);  	v9fs_unregister_trans(&p9_tcp_trans);  	v9fs_unregister_trans(&p9_unix_trans);  	v9fs_unregister_trans(&p9_fd_trans); - -	destroy_workqueue(p9_mux_wq);  } diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index 17c5ba7551a..14ad43b5cf8 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -26,6 +26,8 @@   *   */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +  #include <linux/in.h>  #include <linux/module.h>  #include <linux/net.h> @@ -55,11 +57,8 @@  #define P9_RDMA_IRD		0  #define P9_RDMA_ORD		0  #define P9_RDMA_TIMEOUT		30000		/* 30 seconds */ -#define P9_RDMA_MAXSIZE		(4*4096)	/* Min SGE is 4, so we can -						 * safely advertise a maxsize -						 * of 64k */ +#define P9_RDMA_MAXSIZE		(1024*1024)	/* 1MB */ -#define P9_RDMA_MAX_SGE (P9_RDMA_MAXSIZE >> PAGE_SHIFT)  /**   * struct p9_trans_rdma - RDMA transport instance   * @@ -74,7 +73,9 @@   * @sq_depth: The depth of the Send Queue   * @sq_sem: Semaphore for the SQ   * @rq_depth: The depth of the Receive Queue. - * @rq_count: Count of requests in the Receive Queue. + * @rq_sem: Semaphore for the RQ + * @excess_rc : Amount of posted Receive Contexts without a pending request. + *		See rdma_request()   * @addr: The remote peer's address   * @req_lock: Protects the active request list   * @cm_done: Completion event for connection management tracking @@ -99,7 +100,8 @@ struct p9_trans_rdma {  	int sq_depth;  	struct semaphore sq_sem;  	int rq_depth; -	atomic_t rq_count; +	struct semaphore rq_sem; +	atomic_t excess_rc;  	struct sockaddr_in addr;  	spinlock_t req_lock; @@ -168,7 +170,6 @@ static int parse_opts(char *params, struct p9_rdma_opts *opts)  	substring_t args[MAX_OPT_ARGS];  	int option;  	char *options, *tmp_options; -	int ret;  	opts->port = P9_PORT;  	opts->sq_depth = P9_RDMA_SQ_DEPTH; @@ -180,8 +181,8 @@ static int parse_opts(char *params, struct p9_rdma_opts *opts)  	tmp_options = kstrdup(params, GFP_KERNEL);  	if (!tmp_options) { -		P9_DPRINTK(P9_DEBUG_ERROR, -			   "failed to allocate copy of option string\n"); +		p9_debug(P9_DEBUG_ERROR, +			 "failed to allocate copy of option string\n");  		return -ENOMEM;  	}  	options = tmp_options; @@ -192,11 +193,12 @@ static int parse_opts(char *params, struct p9_rdma_opts *opts)  		if (!*p)  			continue;  		token = match_token(p, tokens, args); +		if (token == Opt_err) +			continue;  		r = match_int(&args[0], &option);  		if (r < 0) { -			P9_DPRINTK(P9_DEBUG_ERROR, -				   "integer field, but no integer?\n"); -			ret = r; +			p9_debug(P9_DEBUG_ERROR, +				 "integer field, but no integer?\n");  			continue;  		}  		switch (token) { @@ -297,15 +299,20 @@ handle_recv(struct p9_client *client, struct p9_trans_rdma *rdma,  	if (!req)  		goto err_out; +	/* Check that we have not yet received a reply for this request. +	 */ +	if (unlikely(req->rc)) { +		pr_err("Duplicate reply for request %d", tag); +		goto err_out; +	} +  	req->rc = c->rc; -	req->status = REQ_STATUS_RCVD; -	p9_client_cb(client, req); +	p9_client_cb(client, req, REQ_STATUS_RCVD);  	return;   err_out: -	P9_DPRINTK(P9_DEBUG_ERROR, "req %p err %d status %d\n", -		   req, err, status); +	p9_debug(P9_DEBUG_ERROR, "req %p err %d status %d\n", req, err, status);  	rdma->state = P9_RDMA_FLUSHING;  	client->status = Disconnected;  } @@ -321,8 +328,8 @@ handle_send(struct p9_client *client, struct p9_trans_rdma *rdma,  static void qp_event_handler(struct ib_event *event, void *context)  { -	P9_DPRINTK(P9_DEBUG_ERROR, "QP event %d context %p\n", event->event, -								context); +	p9_debug(P9_DEBUG_ERROR, "QP event %d context %p\n", +		 event->event, context);  }  static void cq_comp_handler(struct ib_cq *cq, void *cq_context) @@ -338,8 +345,8 @@ static void cq_comp_handler(struct ib_cq *cq, void *cq_context)  		switch (c->wc_op) {  		case IB_WC_RECV: -			atomic_dec(&rdma->rq_count);  			handle_recv(client, rdma, c, wc.status, wc.byte_len); +			up(&rdma->rq_sem);  			break;  		case IB_WC_SEND: @@ -348,8 +355,7 @@ static void cq_comp_handler(struct ib_cq *cq, void *cq_context)  			break;  		default: -			printk(KERN_ERR "9prdma: unexpected completion type, " -			       "c->wc_op=%d, wc.opcode=%d, status=%d\n", +			pr_err("unexpected completion type, c->wc_op=%d, wc.opcode=%d, status=%d\n",  			       c->wc_op, wc.opcode, wc.status);  			break;  		} @@ -359,7 +365,7 @@ static void cq_comp_handler(struct ib_cq *cq, void *cq_context)  static void cq_event_handler(struct ib_event *e, void *v)  { -	P9_DPRINTK(P9_DEBUG_ERROR, "CQ event %d context %p\n", e->event, v); +	p9_debug(P9_DEBUG_ERROR, "CQ event %d context %p\n", e->event, v);  }  static void rdma_destroy_trans(struct p9_trans_rdma *rdma) @@ -410,7 +416,7 @@ post_recv(struct p9_client *client, struct p9_rdma_context *c)  	return ib_post_recv(rdma->qp, &wr, &bad_wr);   error: -	P9_DPRINTK(P9_DEBUG_ERROR, "EIO\n"); +	p9_debug(P9_DEBUG_ERROR, "EIO\n");  	return -EIO;  } @@ -424,32 +430,33 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)  	struct p9_rdma_context *c = NULL;  	struct p9_rdma_context *rpl_context = NULL; +	/* When an error occurs between posting the recv and the send, +	 * there will be a receive context posted without a pending request. +	 * Since there is no way to "un-post" it, we remember it and skip +	 * post_recv() for the next request. +	 * So here, +	 * see if we are this `next request' and need to absorb an excess rc. +	 * If yes, then drop and free our own, and do not recv_post(). +	 **/ +	if (unlikely(atomic_read(&rdma->excess_rc) > 0)) { +		if ((atomic_sub_return(1, &rdma->excess_rc) >= 0)) { +			/* Got one ! */ +			kfree(req->rc); +			req->rc = NULL; +			goto dont_need_post_recv; +		} else { +			/* We raced and lost. */ +			atomic_inc(&rdma->excess_rc); +		} +	} +  	/* Allocate an fcall for the reply */ -	rpl_context = kmalloc(sizeof *rpl_context, GFP_KERNEL); +	rpl_context = kmalloc(sizeof *rpl_context, GFP_NOFS);  	if (!rpl_context) {  		err = -ENOMEM; -		goto err_close; -	} - -	/* -	 * If the request has a buffer, steal it, otherwise -	 * allocate a new one.  Typically, requests should already -	 * have receive buffers allocated and just swap them around -	 */ -	if (!req->rc) { -		req->rc = kmalloc(sizeof(struct p9_fcall)+client->msize, -								GFP_KERNEL); -		if (req->rc) { -			req->rc->sdata = (char *) req->rc + -						sizeof(struct p9_fcall); -			req->rc->capacity = client->msize; -		} +		goto recv_error;  	}  	rpl_context->rc = req->rc; -	if (!rpl_context->rc) { -		err = -ENOMEM; -		goto err_free2; -	}  	/*  	 * Post a receive buffer for this request. We need to ensure @@ -458,29 +465,35 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)  	 * outstanding request, so we must keep a count to avoid  	 * overflowing the RQ.  	 */ -	if (atomic_inc_return(&rdma->rq_count) <= rdma->rq_depth) { -		err = post_recv(client, rpl_context); -		if (err) -			goto err_free1; -	} else -		atomic_dec(&rdma->rq_count); +	if (down_interruptible(&rdma->rq_sem)) { +		err = -EINTR; +		goto recv_error; +	} +	err = post_recv(client, rpl_context); +	if (err) { +		p9_debug(P9_DEBUG_FCALL, "POST RECV failed\n"); +		goto recv_error; +	}  	/* remove posted receive buffer from request structure */  	req->rc = NULL; +dont_need_post_recv:  	/* Post the request */ -	c = kmalloc(sizeof *c, GFP_KERNEL); +	c = kmalloc(sizeof *c, GFP_NOFS);  	if (!c) {  		err = -ENOMEM; -		goto err_free1; +		goto send_error;  	}  	c->req = req;  	c->busa = ib_dma_map_single(rdma->cm_id->device,  				    c->req->tc->sdata, c->req->tc->size,  				    DMA_TO_DEVICE); -	if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) -		goto error; +	if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) { +		err = -EIO; +		goto send_error; +	}  	sge.addr = c->busa;  	sge.length = c->req->tc->size; @@ -494,22 +507,38 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)  	wr.sg_list = &sge;  	wr.num_sge = 1; -	if (down_interruptible(&rdma->sq_sem)) -		goto error; +	if (down_interruptible(&rdma->sq_sem)) { +		err = -EINTR; +		goto send_error; +	} -	return ib_post_send(rdma->qp, &wr, &bad_wr); +	/* Mark request as `sent' *before* we actually send it, +	 * because doing if after could erase the REQ_STATUS_RCVD +	 * status in case of a very fast reply. +	 */ +	req->status = REQ_STATUS_SENT; +	err = ib_post_send(rdma->qp, &wr, &bad_wr); +	if (err) +		goto send_error; - error: +	/* Success */ +	return 0; + + /* Handle errors that happened during or while preparing the send: */ + send_error: +	req->status = REQ_STATUS_ERROR;  	kfree(c); -	kfree(rpl_context->rc); -	kfree(rpl_context); -	P9_DPRINTK(P9_DEBUG_ERROR, "EIO\n"); -	return -EIO; - err_free1: -	kfree(rpl_context->rc); - err_free2: +	p9_debug(P9_DEBUG_ERROR, "Error %d in rdma_request()\n", err); + +	/* Ach. +	 *  We did recv_post(), but not send. We have one recv_post in excess. +	 */ +	atomic_inc(&rdma->excess_rc); +	return err; + + /* Handle errors that happened during or while preparing post_recv(): */ + recv_error:  	kfree(rpl_context); - err_close:  	spin_lock_irqsave(&rdma->req_lock, flags);  	if (rdma->state < P9_RDMA_CLOSING) {  		rdma->state = P9_RDMA_CLOSING; @@ -554,17 +583,30 @@ static struct p9_trans_rdma *alloc_rdma(struct p9_rdma_opts *opts)  	spin_lock_init(&rdma->req_lock);  	init_completion(&rdma->cm_done);  	sema_init(&rdma->sq_sem, rdma->sq_depth); -	atomic_set(&rdma->rq_count, 0); +	sema_init(&rdma->rq_sem, rdma->rq_depth); +	atomic_set(&rdma->excess_rc, 0);  	return rdma;  } -/* its not clear to me we can do anything after send has been posted */  static int rdma_cancel(struct p9_client *client, struct p9_req_t *req)  { +	/* Nothing to do here. +	 * We will take care of it (if we have to) in rdma_cancelled() +	 */  	return 1;  } +/* A request has been fully flushed without a reply. + * That means we have posted one buffer in excess. + */ +static int rdma_cancelled(struct p9_client *client, struct p9_req_t *req) +{ +	struct p9_trans_rdma *rdma = client->trans; +	atomic_inc(&rdma->excess_rc); +	return 0; +} +  /**   * trans_create_rdma - Transport method for creating atransport instance   * @client: client instance @@ -592,7 +634,8 @@ rdma_create_trans(struct p9_client *client, const char *addr, char *args)  		return -ENOMEM;  	/* Create the RDMA CM ID */ -	rdma->cm_id = rdma_create_id(p9_cm_event_handler, client, RDMA_PS_TCP); +	rdma->cm_id = rdma_create_id(p9_cm_event_handler, client, RDMA_PS_TCP, +				     IB_QPT_RC);  	if (IS_ERR(rdma->cm_id))  		goto error; @@ -697,6 +740,7 @@ static struct p9_trans_module p9_rdma_trans = {  	.close = rdma_close,  	.request = rdma_request,  	.cancel = rdma_cancel, +	.cancelled = rdma_cancelled,  };  /** diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index c8f3f72ab20..6940d8fe897 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -26,6 +26,8 @@   *   */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +  #include <linux/in.h>  #include <linux/module.h>  #include <linux/net.h> @@ -37,19 +39,24 @@  #include <linux/inet.h>  #include <linux/idr.h>  #include <linux/file.h> +#include <linux/highmem.h>  #include <linux/slab.h>  #include <net/9p/9p.h>  #include <linux/parser.h>  #include <net/9p/client.h>  #include <net/9p/transport.h>  #include <linux/scatterlist.h> +#include <linux/swap.h>  #include <linux/virtio.h>  #include <linux/virtio_9p.h> +#include "trans_common.h"  #define VIRTQUEUE_NUM	128  /* a single mutex to manage channel initialization and attachment */  static DEFINE_MUTEX(virtio_9p_lock); +static DECLARE_WAIT_QUEUE_HEAD(vp_wq); +static atomic_t vp_pinned = ATOMIC_INIT(0);  /**   * struct virtio_chan - per-instance transport information @@ -77,7 +84,10 @@ struct virtio_chan {  	struct virtqueue *vq;  	int ring_bufs_avail;  	wait_queue_head_t *vc_wq; - +	/* This is global limit. Since we don't have a global structure, +	 * will be placing it in each channel. +	 */ +	unsigned long p9_max_pages;  	/* Scatterlist: can be too big for stack. */  	struct scatterlist sg[VIRTQUEUE_NUM]; @@ -138,28 +148,24 @@ static void req_done(struct virtqueue *vq)  	struct p9_req_t *req;  	unsigned long flags; -	P9_DPRINTK(P9_DEBUG_TRANS, ": request done\n"); +	p9_debug(P9_DEBUG_TRANS, ": request done\n"); -	do { +	while (1) {  		spin_lock_irqsave(&chan->lock, flags);  		rc = virtqueue_get_buf(chan->vq, &len); - -		if (rc != NULL) { -			if (!chan->ring_bufs_avail) { -				chan->ring_bufs_avail = 1; -				wake_up(chan->vc_wq); -			} -			spin_unlock_irqrestore(&chan->lock, flags); -			P9_DPRINTK(P9_DEBUG_TRANS, ": rc %p\n", rc); -			P9_DPRINTK(P9_DEBUG_TRANS, ": lookup tag %d\n", -					rc->tag); -			req = p9_tag_lookup(chan->client, rc->tag); -			req->status = REQ_STATUS_RCVD; -			p9_client_cb(chan->client, req); -		} else { +		if (rc == NULL) {  			spin_unlock_irqrestore(&chan->lock, flags); +			break;  		} -	} while (rc != NULL); +		chan->ring_bufs_avail = 1; +		spin_unlock_irqrestore(&chan->lock, flags); +		/* Wakeup if anyone waiting for VirtIO ring space. */ +		wake_up(chan->vc_wq); +		p9_debug(P9_DEBUG_TRANS, ": rc %p\n", rc); +		p9_debug(P9_DEBUG_TRANS, ": lookup tag %d\n", rc->tag); +		req = p9_tag_lookup(chan->client, rc->tag); +		p9_client_cb(chan->client, req, REQ_STATUS_RCVD); +	}  }  /** @@ -176,9 +182,8 @@ static void req_done(struct virtqueue *vq)   *   */ -static int -pack_sg_list(struct scatterlist *sg, int start, int limit, char *data, -								int count) +static int pack_sg_list(struct scatterlist *sg, int start, +			int limit, char *data, int count)  {  	int s;  	int index = start; @@ -187,12 +192,15 @@ pack_sg_list(struct scatterlist *sg, int start, int limit, char *data,  		s = rest_of_page(data);  		if (s > count)  			s = count; +		BUG_ON(index > limit); +		/* Make sure we don't terminate early. */ +		sg_unmark_end(&sg[index]);  		sg_set_buf(&sg[index++], data, s);  		count -= s;  		data += s; -		BUG_ON(index > limit);  	} - +	if (index-start) +		sg_mark_end(&sg[index - 1]);  	return index-start;  } @@ -203,6 +211,48 @@ static int p9_virtio_cancel(struct p9_client *client, struct p9_req_t *req)  }  /** + * pack_sg_list_p - Just like pack_sg_list. Instead of taking a buffer, + * this takes a list of pages. + * @sg: scatter/gather list to pack into + * @start: which segment of the sg_list to start at + * @pdata: a list of pages to add into sg. + * @nr_pages: number of pages to pack into the scatter/gather list + * @data: data to pack into scatter/gather list + * @count: amount of data to pack into the scatter/gather list + */ +static int +pack_sg_list_p(struct scatterlist *sg, int start, int limit, +	       struct page **pdata, int nr_pages, char *data, int count) +{ +	int i = 0, s; +	int data_off; +	int index = start; + +	BUG_ON(nr_pages > (limit - start)); +	/* +	 * if the first page doesn't start at +	 * page boundary find the offset +	 */ +	data_off = offset_in_page(data); +	while (nr_pages) { +		s = rest_of_page(data); +		if (s > count) +			s = count; +		/* Make sure we don't terminate early. */ +		sg_unmark_end(&sg[index]); +		sg_set_page(&sg[index++], pdata[i++], s, data_off); +		data_off = 0; +		data += s; +		count -= s; +		nr_pages--; +	} + +	if (index-start) +		sg_mark_end(&sg[index - 1]); +	return index - start; +} + +/**   * p9_virtio_request - issue a request   * @client: client instance issuing the request   * @req: request to be issued @@ -212,24 +262,32 @@ static int p9_virtio_cancel(struct p9_client *client, struct p9_req_t *req)  static int  p9_virtio_request(struct p9_client *client, struct p9_req_t *req)  { -	int in, out; -	struct virtio_chan *chan = client->trans; -	char *rdata = (char *)req->rc+sizeof(struct p9_fcall); -	unsigned long flags;  	int err; +	int in, out, out_sgs, in_sgs; +	unsigned long flags; +	struct virtio_chan *chan = client->trans; +	struct scatterlist *sgs[2]; -	P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request\n"); +	p9_debug(P9_DEBUG_TRANS, "9p debug: virtio request\n"); -req_retry:  	req->status = REQ_STATUS_SENT; - +req_retry:  	spin_lock_irqsave(&chan->lock, flags); -	out = pack_sg_list(chan->sg, 0, VIRTQUEUE_NUM, req->tc->sdata, -								req->tc->size); -	in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM-out, rdata, -								client->msize); -	err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc); +	out_sgs = in_sgs = 0; +	/* Handle out VirtIO ring buffers */ +	out = pack_sg_list(chan->sg, 0, +			   VIRTQUEUE_NUM, req->tc->sdata, req->tc->size); +	if (out) +		sgs[out_sgs++] = chan->sg; + +	in = pack_sg_list(chan->sg, out, +			  VIRTQUEUE_NUM, req->rc->sdata, req->rc->capacity); +	if (in) +		sgs[out_sgs + in_sgs++] = chan->sg + out; + +	err = virtqueue_add_sgs(chan->vq, sgs, out_sgs, in_sgs, req->tc, +				GFP_ATOMIC);  	if (err < 0) {  		if (err == -ENOSPC) {  			chan->ring_bufs_avail = 0; @@ -239,24 +297,204 @@ req_retry:  			if (err  == -ERESTARTSYS)  				return err; -			P9_DPRINTK(P9_DEBUG_TRANS, "9p:Retry virtio request\n"); +			p9_debug(P9_DEBUG_TRANS, "Retry virtio request\n");  			goto req_retry;  		} else {  			spin_unlock_irqrestore(&chan->lock, flags); -			P9_DPRINTK(P9_DEBUG_TRANS, -					"9p debug: " -					"virtio rpc add_buf returned failure"); +			p9_debug(P9_DEBUG_TRANS, +				 "virtio rpc add_sgs returned failure\n");  			return -EIO;  		}  	} -  	virtqueue_kick(chan->vq);  	spin_unlock_irqrestore(&chan->lock, flags); -	P9_DPRINTK(P9_DEBUG_TRANS, "9p debug: virtio request kicked\n"); +	p9_debug(P9_DEBUG_TRANS, "virtio request kicked\n");  	return 0;  } +static int p9_get_mapped_pages(struct virtio_chan *chan, +			       struct page **pages, char *data, +			       int nr_pages, int write, int kern_buf) +{ +	int err; +	if (!kern_buf) { +		/* +		 * We allow only p9_max_pages pinned. We wait for the +		 * Other zc request to finish here +		 */ +		if (atomic_read(&vp_pinned) >= chan->p9_max_pages) { +			err = wait_event_interruptible(vp_wq, +			      (atomic_read(&vp_pinned) < chan->p9_max_pages)); +			if (err == -ERESTARTSYS) +				return err; +		} +		err = p9_payload_gup(data, &nr_pages, pages, write); +		if (err < 0) +			return err; +		atomic_add(nr_pages, &vp_pinned); +	} else { +		/* kernel buffer, no need to pin pages */ +		int s, index = 0; +		int count = nr_pages; +		while (nr_pages) { +			s = rest_of_page(data); +			if (is_vmalloc_addr(data)) +				pages[index++] = vmalloc_to_page(data); +			else +				pages[index++] = kmap_to_page(data); +			data += s; +			nr_pages--; +		} +		nr_pages = count; +	} +	return nr_pages; +} + +/** + * p9_virtio_zc_request - issue a zero copy request + * @client: client instance issuing the request + * @req: request to be issued + * @uidata: user bffer that should be ued for zero copy read + * @uodata: user buffer that shoud be user for zero copy write + * @inlen: read buffer size + * @olen: write buffer size + * @hdrlen: reader header size, This is the size of response protocol data + * + */ +static int +p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req, +		     char *uidata, char *uodata, int inlen, +		     int outlen, int in_hdr_len, int kern_buf) +{ +	int in, out, err, out_sgs, in_sgs; +	unsigned long flags; +	int in_nr_pages = 0, out_nr_pages = 0; +	struct page **in_pages = NULL, **out_pages = NULL; +	struct virtio_chan *chan = client->trans; +	struct scatterlist *sgs[4]; + +	p9_debug(P9_DEBUG_TRANS, "virtio request\n"); + +	if (uodata) { +		out_nr_pages = p9_nr_pages(uodata, outlen); +		out_pages = kmalloc(sizeof(struct page *) * out_nr_pages, +				    GFP_NOFS); +		if (!out_pages) { +			err = -ENOMEM; +			goto err_out; +		} +		out_nr_pages = p9_get_mapped_pages(chan, out_pages, uodata, +						   out_nr_pages, 0, kern_buf); +		if (out_nr_pages < 0) { +			err = out_nr_pages; +			kfree(out_pages); +			out_pages = NULL; +			goto err_out; +		} +	} +	if (uidata) { +		in_nr_pages = p9_nr_pages(uidata, inlen); +		in_pages = kmalloc(sizeof(struct page *) * in_nr_pages, +				   GFP_NOFS); +		if (!in_pages) { +			err = -ENOMEM; +			goto err_out; +		} +		in_nr_pages = p9_get_mapped_pages(chan, in_pages, uidata, +						  in_nr_pages, 1, kern_buf); +		if (in_nr_pages < 0) { +			err = in_nr_pages; +			kfree(in_pages); +			in_pages = NULL; +			goto err_out; +		} +	} +	req->status = REQ_STATUS_SENT; +req_retry_pinned: +	spin_lock_irqsave(&chan->lock, flags); + +	out_sgs = in_sgs = 0; + +	/* out data */ +	out = pack_sg_list(chan->sg, 0, +			   VIRTQUEUE_NUM, req->tc->sdata, req->tc->size); + +	if (out) +		sgs[out_sgs++] = chan->sg; + +	if (out_pages) { +		sgs[out_sgs++] = chan->sg + out; +		out += pack_sg_list_p(chan->sg, out, VIRTQUEUE_NUM, +				      out_pages, out_nr_pages, uodata, outlen); +	} +		 +	/* +	 * Take care of in data +	 * For example TREAD have 11. +	 * 11 is the read/write header = PDU Header(7) + IO Size (4). +	 * Arrange in such a way that server places header in the +	 * alloced memory and payload onto the user buffer. +	 */ +	in = pack_sg_list(chan->sg, out, +			  VIRTQUEUE_NUM, req->rc->sdata, in_hdr_len); +	if (in) +		sgs[out_sgs + in_sgs++] = chan->sg + out; + +	if (in_pages) { +		sgs[out_sgs + in_sgs++] = chan->sg + out + in; +		in += pack_sg_list_p(chan->sg, out + in, VIRTQUEUE_NUM, +				     in_pages, in_nr_pages, uidata, inlen); +	} + +	BUG_ON(out_sgs + in_sgs > ARRAY_SIZE(sgs)); +	err = virtqueue_add_sgs(chan->vq, sgs, out_sgs, in_sgs, req->tc, +				GFP_ATOMIC); +	if (err < 0) { +		if (err == -ENOSPC) { +			chan->ring_bufs_avail = 0; +			spin_unlock_irqrestore(&chan->lock, flags); +			err = wait_event_interruptible(*chan->vc_wq, +						       chan->ring_bufs_avail); +			if (err  == -ERESTARTSYS) +				goto err_out; + +			p9_debug(P9_DEBUG_TRANS, "Retry virtio request\n"); +			goto req_retry_pinned; +		} else { +			spin_unlock_irqrestore(&chan->lock, flags); +			p9_debug(P9_DEBUG_TRANS, +				 "virtio rpc add_sgs returned failure\n"); +			err = -EIO; +			goto err_out; +		} +	} +	virtqueue_kick(chan->vq); +	spin_unlock_irqrestore(&chan->lock, flags); +	p9_debug(P9_DEBUG_TRANS, "virtio request kicked\n"); +	err = wait_event_interruptible(*req->wq, +				       req->status >= REQ_STATUS_RCVD); +	/* +	 * Non kernel buffers are pinned, unpin them +	 */ +err_out: +	if (!kern_buf) { +		if (in_pages) { +			p9_release_pages(in_pages, in_nr_pages); +			atomic_sub(in_nr_pages, &vp_pinned); +		} +		if (out_pages) { +			p9_release_pages(out_pages, out_nr_pages); +			atomic_sub(out_nr_pages, &vp_pinned); +		} +		/* wakeup anybody waiting for slots to pin pages */ +		wake_up(&vp_wq); +	} +	kfree(in_pages); +	kfree(out_pages); +	return err; +} +  static ssize_t p9_mount_tag_show(struct device *dev,  				struct device_attribute *attr, char *buf)  { @@ -288,7 +526,7 @@ static int p9_virtio_probe(struct virtio_device *vdev)  	chan = kmalloc(sizeof(struct virtio_chan), GFP_KERNEL);  	if (!chan) { -		printk(KERN_ERR "9p: Failed to allocate virtio 9P channel\n"); +		pr_err("Failed to allocate virtio 9P channel\n");  		err = -ENOMEM;  		goto fail;  	} @@ -308,9 +546,7 @@ static int p9_virtio_probe(struct virtio_device *vdev)  	chan->inuse = false;  	if (virtio_has_feature(vdev, VIRTIO_9P_MOUNT_TAG)) { -		vdev->config->get(vdev, -				offsetof(struct virtio_9p_config, tag_len), -				&tag_len, sizeof(tag_len)); +		virtio_cread(vdev, struct virtio_9p_config, tag_len, &tag_len);  	} else {  		err = -EINVAL;  		goto out_free_vq; @@ -320,8 +556,9 @@ static int p9_virtio_probe(struct virtio_device *vdev)  		err = -ENOMEM;  		goto out_free_vq;  	} -	vdev->config->get(vdev, offsetof(struct virtio_9p_config, tag), -			tag, tag_len); + +	virtio_cread_bytes(vdev, offsetof(struct virtio_9p_config, tag), +			   tag, tag_len);  	chan->tag = tag;  	chan->tag_len = tag_len;  	err = sysfs_create_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr); @@ -335,10 +572,16 @@ static int p9_virtio_probe(struct virtio_device *vdev)  	}  	init_waitqueue_head(chan->vc_wq);  	chan->ring_bufs_avail = 1; +	/* Ceiling limit to avoid denial of service attacks */ +	chan->p9_max_pages = nr_free_buffer_pages()/4;  	mutex_lock(&virtio_9p_lock);  	list_add_tail(&chan->chan_list, &virtio_chan_list);  	mutex_unlock(&virtio_9p_lock); + +	/* Let udev rules use the new mount_tag attribute. */ +	kobject_uevent(&(vdev->dev.kobj), KOBJ_CHANGE); +  	return 0;  out_free_tag: @@ -387,7 +630,7 @@ p9_virtio_create(struct p9_client *client, const char *devname, char *args)  	mutex_unlock(&virtio_9p_lock);  	if (!found) { -		printk(KERN_ERR "9p: no channels available\n"); +		pr_err("no channels available\n");  		return ret;  	} @@ -408,13 +651,15 @@ static void p9_virtio_remove(struct virtio_device *vdev)  {  	struct virtio_chan *chan = vdev->priv; -	BUG_ON(chan->inuse); +	if (chan->inuse) +		p9_virtio_close(chan->client);  	vdev->config->del_vqs(vdev);  	mutex_lock(&virtio_9p_lock);  	list_del(&chan->chan_list);  	mutex_unlock(&virtio_9p_lock);  	sysfs_remove_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr); +	kobject_uevent(&(vdev->dev.kobj), KOBJ_CHANGE);  	kfree(chan->tag);  	kfree(chan->vc_wq);  	kfree(chan); @@ -446,9 +691,16 @@ static struct p9_trans_module p9_virtio_trans = {  	.create = p9_virtio_create,  	.close = p9_virtio_close,  	.request = p9_virtio_request, +	.zc_request = p9_virtio_zc_request,  	.cancel = p9_virtio_cancel, -	.maxsize = PAGE_SIZE*16, -	.def = 0, +	/* +	 * We leave one entry for input and one entry for response +	 * headers. We also skip one more entry to accomodate, address +	 * that are not at page boundary, that can result in an extra +	 * page in zero copy. +	 */ +	.maxsize = PAGE_SIZE * (VIRTQUEUE_NUM - 3), +	.def = 1,  	.owner = THIS_MODULE,  }; diff --git a/net/9p/util.c b/net/9p/util.c index e048701a72d..59f278e64f5 100644 --- a/net/9p/util.c +++ b/net/9p/util.c @@ -67,7 +67,7 @@ EXPORT_SYMBOL(p9_idpool_create);  /**   * p9_idpool_destroy - create a new per-connection id pool - * @p: idpool to destory + * @p: idpool to destroy   */  void p9_idpool_destroy(struct p9_idpool *p) @@ -87,26 +87,21 @@ EXPORT_SYMBOL(p9_idpool_destroy);  int p9_idpool_get(struct p9_idpool *p)  { -	int i = 0; -	int error; +	int i;  	unsigned long flags; -retry: -	if (idr_pre_get(&p->pool, GFP_KERNEL) == 0) -		return 0; - +	idr_preload(GFP_NOFS);  	spin_lock_irqsave(&p->lock, flags);  	/* no need to store exactly p, we just need something non-null */ -	error = idr_get_new(&p->pool, p, &i); -	spin_unlock_irqrestore(&p->lock, flags); +	i = idr_alloc(&p->pool, p, 0, 0, GFP_NOWAIT); -	if (error == -EAGAIN) -		goto retry; -	else if (error) +	spin_unlock_irqrestore(&p->lock, flags); +	idr_preload_end(); +	if (i < 0)  		return -1; -	P9_DPRINTK(P9_DEBUG_MUX, " id %d pool %p\n", i, p); +	p9_debug(P9_DEBUG_MUX, " id %d pool %p\n", i, p);  	return i;  }  EXPORT_SYMBOL(p9_idpool_get); @@ -124,7 +119,7 @@ void p9_idpool_put(int id, struct p9_idpool *p)  {  	unsigned long flags; -	P9_DPRINTK(P9_DEBUG_MUX, " id %d pool %p\n", id, p); +	p9_debug(P9_DEBUG_MUX, " id %d pool %p\n", id, p);  	spin_lock_irqsave(&p->lock, flags);  	idr_remove(&p->pool, id);  | 
