aboutsummaryrefslogtreecommitdiff
path: root/net/9p
diff options
context:
space:
mode:
Diffstat (limited to 'net/9p')
-rw-r--r--net/9p/Kconfig2
-rw-r--r--net/9p/client.c241
-rw-r--r--net/9p/error.c4
-rw-r--r--net/9p/protocol.c51
-rw-r--r--net/9p/trans_common.c10
-rw-r--r--net/9p/trans_fd.c212
-rw-r--r--net/9p/trans_rdma.c148
-rw-r--r--net/9p/trans_virtio.c82
-rw-r--r--net/9p/util.c17
9 files changed, 480 insertions, 287 deletions
diff --git a/net/9p/Kconfig b/net/9p/Kconfig
index d9ea09b11cf..a75174a3372 100644
--- a/net/9p/Kconfig
+++ b/net/9p/Kconfig
@@ -23,7 +23,7 @@ config NET_9P_VIRTIO
guest partitions and a host partition.
config NET_9P_RDMA
- depends on INET && INFINIBAND && INFINIBAND_ADDR_TRANS && EXPERIMENTAL
+ depends on INET && INFINIBAND && INFINIBAND_ADDR_TRANS
tristate "9P RDMA Transport (Experimental)"
help
This builds support for an RDMA transport.
diff --git a/net/9p/client.c b/net/9p/client.c
index 776618cd2be..0004cbaac4a 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -76,6 +76,20 @@ inline int p9_is_proto_dotu(struct p9_client *clnt)
}
EXPORT_SYMBOL(p9_is_proto_dotu);
+/*
+ * Some error codes are taken directly from the server replies,
+ * make sure they are valid.
+ */
+static int safe_errno(int err)
+{
+ if ((err > 0) || (err < -MAX_ERRNO)) {
+ p9_debug(P9_DEBUG_ERROR, "Invalid error code %d\n", err);
+ return -EPROTO;
+ }
+ return err;
+}
+
+
/* Interpret mount option for protocol version */
static int get_protocol_version(char *s)
{
@@ -113,7 +127,7 @@ static int parse_opts(char *opts, struct p9_client *clnt)
char *s;
int ret = 0;
- clnt->proto_version = p9_proto_2000u;
+ clnt->proto_version = p9_proto_2000L;
clnt->msize = 8192;
if (!opts)
@@ -190,6 +204,17 @@ free_and_return:
return ret;
}
+static struct p9_fcall *p9_fcall_alloc(int alloc_msize)
+{
+ struct p9_fcall *fc;
+ fc = kmalloc(sizeof(struct p9_fcall) + alloc_msize, GFP_NOFS);
+ if (!fc)
+ return NULL;
+ fc->capacity = alloc_msize;
+ fc->sdata = (char *) fc + sizeof(struct p9_fcall);
+ return fc;
+}
+
/**
* p9_tag_alloc - lookup/allocate a request by tag
* @c: client session to lookup tag within
@@ -242,39 +267,36 @@ p9_tag_alloc(struct p9_client *c, u16 tag, unsigned int max_size)
col = tag % P9_ROW_MAXTAG;
req = &c->reqs[row][col];
- if (!req->tc) {
+ if (!req->wq) {
req->wq = kmalloc(sizeof(wait_queue_head_t), GFP_NOFS);
- if (!req->wq) {
- pr_err("Couldn't grow tag array\n");
- return ERR_PTR(-ENOMEM);
- }
+ if (!req->wq)
+ goto grow_failed;
init_waitqueue_head(req->wq);
- req->tc = kmalloc(sizeof(struct p9_fcall) + alloc_msize,
- GFP_NOFS);
- req->rc = kmalloc(sizeof(struct p9_fcall) + alloc_msize,
- GFP_NOFS);
- if ((!req->tc) || (!req->rc)) {
- pr_err("Couldn't grow tag array\n");
- kfree(req->tc);
- kfree(req->rc);
- kfree(req->wq);
- req->tc = req->rc = NULL;
- req->wq = NULL;
- return ERR_PTR(-ENOMEM);
- }
- req->tc->capacity = alloc_msize;
- req->rc->capacity = alloc_msize;
- req->tc->sdata = (char *) req->tc + sizeof(struct p9_fcall);
- req->rc->sdata = (char *) req->rc + sizeof(struct p9_fcall);
}
+ if (!req->tc)
+ req->tc = p9_fcall_alloc(alloc_msize);
+ if (!req->rc)
+ req->rc = p9_fcall_alloc(alloc_msize);
+ if (!req->tc || !req->rc)
+ goto grow_failed;
+
p9pdu_reset(req->tc);
p9pdu_reset(req->rc);
req->tc->tag = tag-1;
req->status = REQ_STATUS_ALLOC;
- return &c->reqs[row][col];
+ return req;
+
+grow_failed:
+ pr_err("Couldn't grow tag array\n");
+ kfree(req->tc);
+ kfree(req->rc);
+ kfree(req->wq);
+ req->tc = req->rc = NULL;
+ req->wq = NULL;
+ return ERR_PTR(-ENOMEM);
}
/**
@@ -393,9 +415,17 @@ static void p9_free_req(struct p9_client *c, struct p9_req_t *r)
* req: request received
*
*/
-void p9_client_cb(struct p9_client *c, struct p9_req_t *req)
+void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status)
{
p9_debug(P9_DEBUG_MUX, " tag %d\n", req->tc->tag);
+
+ /*
+ * This barrier is needed to make sure any change made to req before
+ * the other thread wakes up will indeed be seen by the waiting side.
+ */
+ smp_wmb();
+ req->status = status;
+
wake_up(req->wq);
p9_debug(P9_DEBUG_MUX, "wakeup: %d\n", req->tc->tag);
}
@@ -548,36 +578,19 @@ static int p9_check_zc_errors(struct p9_client *c, struct p9_req_t *req,
if (!p9_is_proto_dotl(c)) {
/* Error is reported in string format */
- uint16_t len;
- /* 7 = header size for RERROR, 2 is the size of string len; */
- int inline_len = in_hdrlen - (7 + 2);
-
- /* Read the size of error string */
- err = p9pdu_readf(req->rc, c->proto_version, "w", &len);
- if (err)
- goto out_err;
+ int len;
+ /* 7 = header size for RERROR; */
+ int inline_len = in_hdrlen - 7;
- ename = kmalloc(len + 1, GFP_NOFS);
- if (!ename) {
- err = -ENOMEM;
+ len = req->rc->size - req->rc->offset;
+ if (len > (P9_ZC_HDR_SZ - 7)) {
+ err = -EFAULT;
goto out_err;
}
- if (len <= inline_len) {
- /* We have error in protocol buffer itself */
- if (pdu_read(req->rc, ename, len)) {
- err = -EFAULT;
- goto out_free;
- }
- } else {
- /*
- * Part of the data is in user space buffer.
- */
- if (pdu_read(req->rc, ename, inline_len)) {
- err = -EFAULT;
- goto out_free;
-
- }
+ ename = &req->rc->sdata[req->rc->offset];
+ if (len > inline_len) {
+ /* We have error in external buffer */
if (kern_buf) {
memcpy(ename + inline_len, uidata,
len - inline_len);
@@ -586,19 +599,19 @@ static int p9_check_zc_errors(struct p9_client *c, struct p9_req_t *req,
uidata, len - inline_len);
if (err) {
err = -EFAULT;
- goto out_free;
+ goto out_err;
}
}
}
- ename[len] = 0;
- if (p9_is_proto_dotu(c)) {
- /* For dotu we also have error code */
- err = p9pdu_readf(req->rc,
- c->proto_version, "d", &ecode);
- if (err)
- goto out_free;
+ ename = NULL;
+ err = p9pdu_readf(req->rc, c->proto_version, "s?d",
+ &ename, &ecode);
+ if (err)
+ goto out_err;
+
+ if (p9_is_proto_dotu(c))
err = -ecode;
- }
+
if (!err || !IS_ERR_VALUE(err)) {
err = p9_errstr2errno(ename, strlen(ename));
@@ -614,8 +627,6 @@ static int p9_check_zc_errors(struct p9_client *c, struct p9_req_t *req,
}
return err;
-out_free:
- kfree(ename);
out_err:
p9_debug(P9_DEBUG_ERROR, "couldn't parse error%d\n", err);
return err;
@@ -652,13 +663,13 @@ static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq)
if (IS_ERR(req))
return PTR_ERR(req);
-
- /* if we haven't received a response for oldreq,
- remove it from the list. */
- spin_lock(&c->lock);
- if (oldreq->status == REQ_STATUS_FLSH)
- list_del(&oldreq->req_list);
- spin_unlock(&c->lock);
+ /*
+ * if we haven't received a response for oldreq,
+ * remove it from the list
+ */
+ if (oldreq->status == REQ_STATUS_SENT)
+ if (c->trans_mod->cancelled)
+ c->trans_mod->cancelled(c, oldreq);
p9_free_req(c, req);
return 0;
@@ -740,10 +751,24 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
c->status = Disconnected;
goto reterr;
}
+again:
/* Wait for the response */
err = wait_event_interruptible(*req->wq,
req->status >= REQ_STATUS_RCVD);
+ /*
+ * Make sure our req is coherent with regard to updates in other
+ * threads - echoes to wmb() in the callback
+ */
+ smp_rmb();
+
+ if ((err == -ERESTARTSYS) && (c->status == Connected)
+ && (type == P9_TFLUSH)) {
+ sigpending = 1;
+ clear_thread_flag(TIF_SIGPENDING);
+ goto again;
+ }
+
if (req->status == REQ_STATUS_ERROR) {
p9_debug(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err);
err = req->t_err;
@@ -774,7 +799,7 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
return req;
reterr:
p9_free_req(c, req);
- return ERR_PTR(err);
+ return ERR_PTR(safe_errno(err));
}
/**
@@ -857,7 +882,7 @@ static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type,
return req;
reterr:
p9_free_req(c, req);
- return ERR_PTR(err);
+ return ERR_PTR(safe_errno(err));
}
static struct p9_fid *p9_fid_create(struct p9_client *clnt)
@@ -973,6 +998,7 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
{
int err;
struct p9_client *clnt;
+ char *client_id;
err = 0;
clnt = kmalloc(sizeof(struct p9_client), GFP_KERNEL);
@@ -981,6 +1007,10 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
clnt->trans_mod = NULL;
clnt->trans = NULL;
+
+ client_id = utsname()->nodename;
+ memcpy(clnt->name, client_id, strlen(client_id) + 1);
+
spin_lock_init(&clnt->lock);
INIT_LIST_HEAD(&clnt->fidlist);
@@ -1078,7 +1108,7 @@ void p9_client_begin_disconnect(struct p9_client *clnt)
EXPORT_SYMBOL(p9_client_begin_disconnect);
struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid,
- char *uname, u32 n_uname, char *aname)
+ char *uname, kuid_t n_uname, char *aname)
{
int err = 0;
struct p9_req_t *req;
@@ -1095,7 +1125,7 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid,
goto error;
}
- req = p9_client_rpc(clnt, P9_TATTACH, "ddss?d", fid->fid,
+ req = p9_client_rpc(clnt, P9_TATTACH, "ddss?u", fid->fid,
afid ? afid->fid : P9_NOFID, uname, aname, n_uname);
if (IS_ERR(req)) {
err = PTR_ERR(req);
@@ -1248,7 +1278,7 @@ error:
EXPORT_SYMBOL(p9_client_open);
int p9_client_create_dotl(struct p9_fid *ofid, char *name, u32 flags, u32 mode,
- gid_t gid, struct p9_qid *qid)
+ kgid_t gid, struct p9_qid *qid)
{
int err = 0;
struct p9_client *clnt;
@@ -1257,13 +1287,14 @@ int p9_client_create_dotl(struct p9_fid *ofid, char *name, u32 flags, u32 mode,
p9_debug(P9_DEBUG_9P,
">>> TLCREATE fid %d name %s flags %d mode %d gid %d\n",
- ofid->fid, name, flags, mode, gid);
+ ofid->fid, name, flags, mode,
+ from_kgid(&init_user_ns, gid));
clnt = ofid->clnt;
if (ofid->mode != -1)
return -EINVAL;
- req = p9_client_rpc(clnt, P9_TLCREATE, "dsddd", ofid->fid, name, flags,
+ req = p9_client_rpc(clnt, P9_TLCREATE, "dsddg", ofid->fid, name, flags,
mode, gid);
if (IS_ERR(req)) {
err = PTR_ERR(req);
@@ -1336,7 +1367,7 @@ error:
}
EXPORT_SYMBOL(p9_client_fcreate);
-int p9_client_symlink(struct p9_fid *dfid, char *name, char *symtgt, gid_t gid,
+int p9_client_symlink(struct p9_fid *dfid, char *name, char *symtgt, kgid_t gid,
struct p9_qid *qid)
{
int err = 0;
@@ -1347,7 +1378,7 @@ int p9_client_symlink(struct p9_fid *dfid, char *name, char *symtgt, gid_t gid,
dfid->fid, name, symtgt);
clnt = dfid->clnt;
- req = p9_client_rpc(clnt, P9_TSYMLINK, "dssd", dfid->fid, name, symtgt,
+ req = p9_client_rpc(clnt, P9_TSYMLINK, "dssg", dfid->fid, name, symtgt,
gid);
if (IS_ERR(req)) {
err = PTR_ERR(req);
@@ -1420,6 +1451,7 @@ int p9_client_clunk(struct p9_fid *fid)
int err;
struct p9_client *clnt;
struct p9_req_t *req;
+ int retries = 0;
if (!fid) {
pr_warn("%s (%d): Trying to clunk with NULL fid\n",
@@ -1428,7 +1460,9 @@ int p9_client_clunk(struct p9_fid *fid)
return 0;
}
- p9_debug(P9_DEBUG_9P, ">>> TCLUNK fid %d\n", fid->fid);
+again:
+ p9_debug(P9_DEBUG_9P, ">>> TCLUNK fid %d (try %d)\n", fid->fid,
+ retries);
err = 0;
clnt = fid->clnt;
@@ -1444,8 +1478,14 @@ int p9_client_clunk(struct p9_fid *fid)
error:
/*
* Fid is not valid even after a failed clunk
+ * If interrupted, retry once then give up and
+ * leak fid until umount.
*/
- p9_fid_destroy(fid);
+ if (err == -ERESTARTSYS) {
+ if (retries++ == 0)
+ goto again;
+ } else
+ p9_fid_destroy(fid);
return err;
}
EXPORT_SYMBOL(p9_client_clunk);
@@ -1470,7 +1510,10 @@ int p9_client_remove(struct p9_fid *fid)
p9_free_req(clnt, req);
error:
- p9_fid_destroy(fid);
+ if (err == -ERESTARTSYS)
+ p9_client_clunk(fid);
+ else
+ p9_fid_destroy(fid);
return err;
}
EXPORT_SYMBOL(p9_client_remove);
@@ -1510,7 +1553,7 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset,
p9_debug(P9_DEBUG_9P, ">>> TREAD fid %d offset %llu %d\n",
- fid->fid, (long long unsigned) offset, count);
+ fid->fid, (unsigned long long) offset, count);
err = 0;
clnt = fid->clnt;
@@ -1528,7 +1571,7 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset,
kernel_buf = 1;
indata = data;
} else
- indata = (char *)udata;
+ indata = (__force char *)udata;
/*
* response header len is 11
* PDU Header(7) + IO Size (4)
@@ -1585,7 +1628,7 @@ p9_client_write(struct p9_fid *fid, char *data, const char __user *udata,
struct p9_req_t *req;
p9_debug(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu count %d\n",
- fid->fid, (long long unsigned) offset, count);
+ fid->fid, (unsigned long long) offset, count);
err = 0;
clnt = fid->clnt;
@@ -1676,7 +1719,9 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid)
(unsigned long long)ret->qid.path, ret->qid.version, ret->mode,
ret->atime, ret->mtime, (unsigned long long)ret->length,
ret->name, ret->uid, ret->gid, ret->muid, ret->extension,
- ret->n_uid, ret->n_gid, ret->n_muid);
+ from_kuid(&init_user_ns, ret->n_uid),
+ from_kgid(&init_user_ns, ret->n_gid),
+ from_kuid(&init_user_ns, ret->n_muid));
p9_free_req(clnt, req);
return ret;
@@ -1730,8 +1775,10 @@ struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid,
"<<< st_btime_sec=%lld st_btime_nsec=%lld\n"
"<<< st_gen=%lld st_data_version=%lld",
ret->st_result_mask, ret->qid.type, ret->qid.path,
- ret->qid.version, ret->st_mode, ret->st_nlink, ret->st_uid,
- ret->st_gid, ret->st_rdev, ret->st_size, ret->st_blksize,
+ ret->qid.version, ret->st_mode, ret->st_nlink,
+ from_kuid(&init_user_ns, ret->st_uid),
+ from_kgid(&init_user_ns, ret->st_gid),
+ ret->st_rdev, ret->st_size, ret->st_blksize,
ret->st_blocks, ret->st_atime_sec, ret->st_atime_nsec,
ret->st_mtime_sec, ret->st_mtime_nsec, ret->st_ctime_sec,
ret->st_ctime_nsec, ret->st_btime_sec, ret->st_btime_nsec,
@@ -1794,7 +1841,9 @@ int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst)
(unsigned long long)wst->qid.path, wst->qid.version, wst->mode,
wst->atime, wst->mtime, (unsigned long long)wst->length,
wst->name, wst->uid, wst->gid, wst->muid, wst->extension,
- wst->n_uid, wst->n_gid, wst->n_muid);
+ from_kuid(&init_user_ns, wst->n_uid),
+ from_kgid(&init_user_ns, wst->n_gid),
+ from_kuid(&init_user_ns, wst->n_muid));
req = p9_client_rpc(clnt, P9_TWSTAT, "dwS", fid->fid, wst->size+2, wst);
if (IS_ERR(req)) {
@@ -1823,7 +1872,9 @@ int p9_client_setattr(struct p9_fid *fid, struct p9_iattr_dotl *p9attr)
" valid=%x mode=%x uid=%d gid=%d size=%lld\n"
" atime_sec=%lld atime_nsec=%lld\n"
" mtime_sec=%lld mtime_nsec=%lld\n",
- p9attr->valid, p9attr->mode, p9attr->uid, p9attr->gid,
+ p9attr->valid, p9attr->mode,
+ from_kuid(&init_user_ns, p9attr->uid),
+ from_kgid(&init_user_ns, p9attr->gid),
p9attr->size, p9attr->atime_sec, p9attr->atime_nsec,
p9attr->mtime_sec, p9attr->mtime_nsec);
@@ -2020,7 +2071,7 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset)
char *dataptr;
p9_debug(P9_DEBUG_9P, ">>> TREADDIR fid %d offset %llu count %d\n",
- fid->fid, (long long unsigned) offset, count);
+ fid->fid, (unsigned long long) offset, count);
err = 0;
clnt = fid->clnt;
@@ -2072,7 +2123,7 @@ error:
EXPORT_SYMBOL(p9_client_readdir);
int p9_client_mknod_dotl(struct p9_fid *fid, char *name, int mode,
- dev_t rdev, gid_t gid, struct p9_qid *qid)
+ dev_t rdev, kgid_t gid, struct p9_qid *qid)
{
int err;
struct p9_client *clnt;
@@ -2082,7 +2133,7 @@ int p9_client_mknod_dotl(struct p9_fid *fid, char *name, int mode,
clnt = fid->clnt;
p9_debug(P9_DEBUG_9P, ">>> TMKNOD fid %d name %s mode %d major %d "
"minor %d\n", fid->fid, name, mode, MAJOR(rdev), MINOR(rdev));
- req = p9_client_rpc(clnt, P9_TMKNOD, "dsdddd", fid->fid, name, mode,
+ req = p9_client_rpc(clnt, P9_TMKNOD, "dsdddg", fid->fid, name, mode,
MAJOR(rdev), MINOR(rdev), gid);
if (IS_ERR(req))
return PTR_ERR(req);
@@ -2103,7 +2154,7 @@ error:
EXPORT_SYMBOL(p9_client_mknod_dotl);
int p9_client_mkdir_dotl(struct p9_fid *fid, char *name, int mode,
- gid_t gid, struct p9_qid *qid)
+ kgid_t gid, struct p9_qid *qid)
{
int err;
struct p9_client *clnt;
@@ -2112,8 +2163,8 @@ int p9_client_mkdir_dotl(struct p9_fid *fid, char *name, int mode,
err = 0;
clnt = fid->clnt;
p9_debug(P9_DEBUG_9P, ">>> TMKDIR fid %d name %s mode %d gid %d\n",
- fid->fid, name, mode, gid);
- req = p9_client_rpc(clnt, P9_TMKDIR, "dsdd", fid->fid, name, mode,
+ fid->fid, name, mode, from_kgid(&init_user_ns, gid));
+ req = p9_client_rpc(clnt, P9_TMKDIR, "dsdg", fid->fid, name, mode,
gid);
if (IS_ERR(req))
return PTR_ERR(req);
diff --git a/net/9p/error.c b/net/9p/error.c
index 2ab2de76010..126fd0dceea 100644
--- a/net/9p/error.c
+++ b/net/9p/error.c
@@ -221,15 +221,13 @@ EXPORT_SYMBOL(p9_error_init);
int p9_errstr2errno(char *errstr, int len)
{
int errno;
- struct hlist_node *p;
struct errormap *c;
int bucket;
errno = 0;
- p = NULL;
c = NULL;
bucket = jhash(errstr, len, 0) % ERRHASHSZ;
- hlist_for_each_entry(c, p, &hash_errmap[bucket], list) {
+ hlist_for_each_entry(c, &hash_errmap[bucket], list) {
if (c->namelen == len && !memcmp(c->name, errstr, len)) {
errno = c->val;
break;
diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index 9ee48cb3017..ab9127ec5b7 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -85,6 +85,8 @@ pdu_write_u(struct p9_fcall *pdu, const char __user *udata, size_t size)
d - int32_t
q - int64_t
s - string
+ u - numeric uid
+ g - numeric gid
S - stat
Q - qid
D - data blob (int32_t size followed by void *, results are not freed)
@@ -163,6 +165,26 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,
(*sptr)[len] = 0;
}
break;
+ case 'u': {
+ kuid_t *uid = va_arg(ap, kuid_t *);
+ __le32 le_val;
+ if (pdu_read(pdu, &le_val, sizeof(le_val))) {
+ errcode = -EFAULT;
+ break;
+ }
+ *uid = make_kuid(&init_user_ns,
+ le32_to_cpu(le_val));
+ } break;
+ case 'g': {
+ kgid_t *gid = va_arg(ap, kgid_t *);
+ __le32 le_val;
+ if (pdu_read(pdu, &le_val, sizeof(le_val))) {
+ errcode = -EFAULT;
+ break;
+ }
+ *gid = make_kgid(&init_user_ns,
+ le32_to_cpu(le_val));
+ } break;
case 'Q':{
struct p9_qid *qid =
va_arg(ap, struct p9_qid *);
@@ -177,11 +199,12 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,
va_arg(ap, struct p9_wstat *);
memset(stbuf, 0, sizeof(struct p9_wstat));
- stbuf->n_uid = stbuf->n_gid = stbuf->n_muid =
- -1;
+ stbuf->n_uid = stbuf->n_muid = INVALID_UID;
+ stbuf->n_gid = INVALID_GID;
+
errcode =
p9pdu_readf(pdu, proto_version,
- "wwdQdddqssss?sddd",
+ "wwdQdddqssss?sugu",
&stbuf->size, &stbuf->type,
&stbuf->dev, &stbuf->qid,
&stbuf->mode, &stbuf->atime,
@@ -294,7 +317,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt,
memset(stbuf, 0, sizeof(struct p9_stat_dotl));
errcode =
p9pdu_readf(pdu, proto_version,
- "qQdddqqqqqqqqqqqqqqq",
+ "qQdugqqqqqqqqqqqqqqq",
&stbuf->st_result_mask,
&stbuf->qid,
&stbuf->st_mode,
@@ -368,7 +391,7 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
const char *sptr = va_arg(ap, const char *);
uint16_t len = 0;
if (sptr)
- len = min_t(uint16_t, strlen(sptr),
+ len = min_t(size_t, strlen(sptr),
USHRT_MAX);
errcode = p9pdu_writef(pdu, proto_version,
@@ -377,6 +400,20 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
errcode = -EFAULT;
}
break;
+ case 'u': {
+ kuid_t uid = va_arg(ap, kuid_t);
+ __le32 val = cpu_to_le32(
+ from_kuid(&init_user_ns, uid));
+ if (pdu_write(pdu, &val, sizeof(val)))
+ errcode = -EFAULT;
+ } break;
+ case 'g': {
+ kgid_t gid = va_arg(ap, kgid_t);
+ __le32 val = cpu_to_le32(
+ from_kgid(&init_user_ns, gid));
+ if (pdu_write(pdu, &val, sizeof(val)))
+ errcode = -EFAULT;
+ } break;
case 'Q':{
const struct p9_qid *qid =
va_arg(ap, const struct p9_qid *);
@@ -390,7 +427,7 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
va_arg(ap, const struct p9_wstat *);
errcode =
p9pdu_writef(pdu, proto_version,
- "wwdQdddqssss?sddd",
+ "wwdQdddqssss?sugu",
stbuf->size, stbuf->type,
stbuf->dev, &stbuf->qid,
stbuf->mode, stbuf->atime,
@@ -468,7 +505,7 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt,
struct p9_iattr_dotl *);
errcode = p9pdu_writef(pdu, proto_version,
- "ddddqqqqq",
+ "ddugqqqqq",
p9attr->valid,
p9attr->mode,
p9attr->uid,
diff --git a/net/9p/trans_common.c b/net/9p/trans_common.c
index de8df957867..2ee3879161b 100644
--- a/net/9p/trans_common.c
+++ b/net/9p/trans_common.c
@@ -24,11 +24,11 @@
*/
void p9_release_pages(struct page **pages, int nr_pages)
{
- int i = 0;
- while (pages[i] && nr_pages--) {
- put_page(pages[i]);
- i++;
- }
+ int i;
+
+ for (i = 0; i < nr_pages; i++)
+ if (pages[i])
+ put_page(pages[i]);
}
EXPORT_SYMBOL(p9_release_pages);
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index fccae26fa67..80d08f6664c 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -63,20 +63,7 @@ struct p9_fd_opts {
int rfd;
int wfd;
u16 port;
-};
-
-/**
- * struct p9_trans_fd - transport state
- * @rd: reference to file to read from
- * @wr: reference of file to write to
- * @conn: connection state reference
- *
- */
-
-struct p9_trans_fd {
- struct file *rd;
- struct file *wr;
- struct p9_conn *conn;
+ int privport;
};
/*
@@ -87,12 +74,15 @@ struct p9_trans_fd {
enum {
/* Options that take integer arguments */
Opt_port, Opt_rfdno, Opt_wfdno, Opt_err,
+ /* Options that take no arguments */
+ Opt_privport,
};
static const match_table_t tokens = {
{Opt_port, "port=%u"},
{Opt_rfdno, "rfdno=%u"},
{Opt_wfdno, "wfdno=%u"},
+ {Opt_privport, "privport"},
{Opt_err, NULL},
};
@@ -155,12 +145,29 @@ struct p9_conn {
unsigned long wsched;
};
+/**
+ * struct p9_trans_fd - transport state
+ * @rd: reference to file to read from
+ * @wr: reference of file to write to
+ * @conn: connection state reference
+ *
+ */
+
+struct p9_trans_fd {
+ struct file *rd;
+ struct file *wr;
+ struct p9_conn conn;
+};
+
static void p9_poll_workfn(struct work_struct *work);
static DEFINE_SPINLOCK(p9_poll_lock);
static LIST_HEAD(p9_poll_pending_list);
static DECLARE_WORK(p9_poll_work, p9_poll_workfn);
+static unsigned int p9_ipport_resv_min = P9_DEF_MIN_RESVPORT;
+static unsigned int p9_ipport_resv_max = P9_DEF_MAX_RESVPORT;
+
static void p9_mux_poll_stop(struct p9_conn *m)
{
unsigned long flags;
@@ -205,15 +212,9 @@ static void p9_conn_cancel(struct p9_conn *m, int err)
m->err = err;
list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) {
- req->status = REQ_STATUS_ERROR;
- if (!req->t_err)
- req->t_err = err;
list_move(&req->req_list, &cancel_list);
}
list_for_each_entry_safe(req, rtmp, &m->unsent_req_list, req_list) {
- req->status = REQ_STATUS_ERROR;
- if (!req->t_err)
- req->t_err = err;
list_move(&req->req_list, &cancel_list);
}
spin_unlock_irqrestore(&m->client->lock, flags);
@@ -221,7 +222,9 @@ static void p9_conn_cancel(struct p9_conn *m, int err)
list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) {
p9_debug(P9_DEBUG_ERROR, "call back req %p\n", req);
list_del(&req->req_list);
- p9_client_cb(m->client, req);
+ if (!req->t_err)
+ req->t_err = err;
+ p9_client_cb(m->client, req, REQ_STATUS_ERROR);
}
}
@@ -237,10 +240,10 @@ p9_fd_poll(struct p9_client *client, struct poll_table_struct *pt)
if (!ts)
return -EREMOTEIO;
- if (!ts->rd->f_op || !ts->rd->f_op->poll)
+ if (!ts->rd->f_op->poll)
return -EIO;
- if (!ts->wr->f_op || !ts->wr->f_op->poll)
+ if (!ts->wr->f_op->poll)
return -EIO;
ret = ts->rd->f_op->poll(ts->rd, pt);
@@ -295,6 +298,7 @@ static void p9_read_work(struct work_struct *work)
{
int n, err;
struct p9_conn *m;
+ int status = REQ_STATUS_ERROR;
m = container_of(work, struct p9_conn, rq);
@@ -316,8 +320,7 @@ static void p9_read_work(struct work_struct *work)
m->rsize - m->rpos);
p9_debug(P9_DEBUG_TRANS, "mux %p got %d bytes\n", m, err);
if (err == -EAGAIN) {
- clear_bit(Rworksched, &m->wsched);
- return;
+ goto end_clear;
}
if (err <= 0)
@@ -342,8 +345,7 @@ static void p9_read_work(struct work_struct *work)
"mux %p pkt: size: %d bytes tag: %d\n", m, n, tag);
m->req = p9_tag_lookup(m->client, tag);
- if (!m->req || (m->req->status != REQ_STATUS_SENT &&
- m->req->status != REQ_STATUS_FLSH)) {
+ if (!m->req || (m->req->status != REQ_STATUS_SENT)) {
p9_debug(P9_DEBUG_ERROR, "Unexpected packet tag %d\n",
tag);
err = -EIO;
@@ -369,29 +371,30 @@ static void p9_read_work(struct work_struct *work)
p9_debug(P9_DEBUG_TRANS, "got new packet\n");
spin_lock(&m->client->lock);
if (m->req->status != REQ_STATUS_ERROR)
- m->req->status = REQ_STATUS_RCVD;
+ status = REQ_STATUS_RCVD;
list_del(&m->req->req_list);
spin_unlock(&m->client->lock);
- p9_client_cb(m->client, m->req);
+ p9_client_cb(m->client, m->req, status);
m->rbuf = NULL;
m->rpos = 0;
m->rsize = 0;
m->req = NULL;
}
+end_clear:
+ clear_bit(Rworksched, &m->wsched);
+
if (!list_empty(&m->req_list)) {
if (test_and_clear_bit(Rpending, &m->wsched))
n = POLLIN;
else
n = p9_fd_poll(m->client, NULL);
- if (n & POLLIN) {
+ if ((n & POLLIN) && !test_and_set_bit(Rworksched, &m->wsched)) {
p9_debug(P9_DEBUG_TRANS, "sched read work %p\n", m);
schedule_work(&m->rq);
- } else
- clear_bit(Rworksched, &m->wsched);
- } else
- clear_bit(Rworksched, &m->wsched);
+ }
+ }
return;
error:
@@ -453,12 +456,13 @@ static void p9_write_work(struct work_struct *work)
}
if (!m->wsize) {
+ spin_lock(&m->client->lock);
if (list_empty(&m->unsent_req_list)) {
clear_bit(Wworksched, &m->wsched);
+ spin_unlock(&m->client->lock);
return;
}
- spin_lock(&m->client->lock);
req = list_entry(m->unsent_req_list.next, struct p9_req_t,
req_list);
req->status = REQ_STATUS_SENT;
@@ -476,10 +480,9 @@ static void p9_write_work(struct work_struct *work)
clear_bit(Wpending, &m->wsched);
err = p9_fd_write(m->client, m->wbuf + m->wpos, m->wsize - m->wpos);
p9_debug(P9_DEBUG_TRANS, "mux %p sent %d bytes\n", m, err);
- if (err == -EAGAIN) {
- clear_bit(Wworksched, &m->wsched);
- return;
- }
+ if (err == -EAGAIN)
+ goto end_clear;
+
if (err < 0)
goto error;
@@ -492,19 +495,21 @@ static void p9_write_work(struct work_struct *work)
if (m->wpos == m->wsize)
m->wpos = m->wsize = 0;
- if (m->wsize == 0 && !list_empty(&m->unsent_req_list)) {
+end_clear:
+ clear_bit(Wworksched, &m->wsched);
+
+ if (m->wsize || !list_empty(&m->unsent_req_list)) {
if (test_and_clear_bit(Wpending, &m->wsched))
n = POLLOUT;
else
n = p9_fd_poll(m->client, NULL);
- if (n & POLLOUT) {
+ if ((n & POLLOUT) &&
+ !test_and_set_bit(Wworksched, &m->wsched)) {
p9_debug(P9_DEBUG_TRANS, "sched write work %p\n", m);
schedule_work(&m->wq);
- } else
- clear_bit(Wworksched, &m->wsched);
- } else
- clear_bit(Wworksched, &m->wsched);
+ }
+ }
return;
@@ -513,7 +518,7 @@ error:
clear_bit(Wworksched, &m->wsched);
}
-static int p9_pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key)
+static int p9_pollwake(wait_queue_t *wait, unsigned int mode, int sync, void *key)
{
struct p9_poll_wait *pwait =
container_of(wait, struct p9_poll_wait, wait);
@@ -564,21 +569,19 @@ p9_pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p)
}
/**
- * p9_conn_create - allocate and initialize the per-session mux data
+ * p9_conn_create - initialize the per-session mux data
* @client: client instance
*
* Note: Creates the polling task if this is the first session.
*/
-static struct p9_conn *p9_conn_create(struct p9_client *client)
+static void p9_conn_create(struct p9_client *client)
{
int n;
- struct p9_conn *m;
+ struct p9_trans_fd *ts = client->trans;
+ struct p9_conn *m = &ts->conn;
p9_debug(P9_DEBUG_TRANS, "client %p msize %d\n", client, client->msize);
- m = kzalloc(sizeof(struct p9_conn), GFP_KERNEL);
- if (!m)
- return ERR_PTR(-ENOMEM);
INIT_LIST_HEAD(&m->mux_list);
m->client = client;
@@ -600,8 +603,6 @@ static struct p9_conn *p9_conn_create(struct p9_client *client)
p9_debug(P9_DEBUG_TRANS, "mux %p can write\n", m);
set_bit(Wpending, &m->wsched);
}
-
- return m;
}
/**
@@ -660,7 +661,7 @@ static int p9_fd_request(struct p9_client *client, struct p9_req_t *req)
{
int n;
struct p9_trans_fd *ts = client->trans;
- struct p9_conn *m = ts->conn;
+ struct p9_conn *m = &ts->conn;
p9_debug(P9_DEBUG_TRANS, "mux %p task %p tcall %p id %d\n",
m, current, req->tc, req->tc->id);
@@ -695,14 +696,26 @@ static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req)
list_del(&req->req_list);
req->status = REQ_STATUS_FLSHD;
ret = 0;
- } else if (req->status == REQ_STATUS_SENT)
- req->status = REQ_STATUS_FLSH;
-
+ }
spin_unlock(&client->lock);
return ret;
}
+static int p9_fd_cancelled(struct p9_client *client, struct p9_req_t *req)
+{
+ p9_debug(P9_DEBUG_TRANS, "client %p req %p\n", client, req);
+
+ /* we haven't received a response for oldreq,
+ * remove it from the list.
+ */
+ spin_lock(&client->lock);
+ list_del(&req->req_list);
+ spin_unlock(&client->lock);
+
+ return 0;
+}
+
/**
* parse_opts - parse mount options into p9_fd_opts structure
* @params: options string passed from mount
@@ -739,7 +752,7 @@ static int parse_opts(char *params, struct p9_fd_opts *opts)
if (!*p)
continue;
token = match_token(p, tokens, args);
- if (token != Opt_err) {
+ if ((token != Opt_err) && (token != Opt_privport)) {
r = match_int(&args[0], &option);
if (r < 0) {
p9_debug(P9_DEBUG_ERROR,
@@ -757,6 +770,9 @@ static int parse_opts(char *params, struct p9_fd_opts *opts)
case Opt_wfdno:
opts->wfd = option;
break;
+ case Opt_privport:
+ opts->privport = 1;
+ break;
default:
continue;
}
@@ -768,7 +784,7 @@ static int parse_opts(char *params, struct p9_fd_opts *opts)
static int p9_fd_open(struct p9_client *client, int rfd, int wfd)
{
- struct p9_trans_fd *ts = kmalloc(sizeof(struct p9_trans_fd),
+ struct p9_trans_fd *ts = kzalloc(sizeof(struct p9_trans_fd),
GFP_KERNEL);
if (!ts)
return -ENOMEM;
@@ -793,46 +809,35 @@ static int p9_fd_open(struct p9_client *client, int rfd, int wfd)
static int p9_socket_open(struct p9_client *client, struct socket *csocket)
{
struct p9_trans_fd *p;
- int ret, fd;
+ struct file *file;
- p = kmalloc(sizeof(struct p9_trans_fd), GFP_KERNEL);
+ p = kzalloc(sizeof(struct p9_trans_fd), GFP_KERNEL);
if (!p)
return -ENOMEM;
csocket->sk->sk_allocation = GFP_NOIO;
- fd = sock_map_fd(csocket, 0);
- if (fd < 0) {
+ file = sock_alloc_file(csocket, 0, NULL);
+ if (IS_ERR(file)) {
pr_err("%s (%d): failed to map fd\n",
__func__, task_pid_nr(current));
sock_release(csocket);
kfree(p);
- return fd;
+ return PTR_ERR(file);
}
- get_file(csocket->file);
- get_file(csocket->file);
- p->wr = p->rd = csocket->file;
+ get_file(file);
+ p->wr = p->rd = file;
client->trans = p;
client->status = Connected;
- sys_close(fd); /* still racy */
-
p->rd->f_flags |= O_NONBLOCK;
- p->conn = p9_conn_create(client);
- if (IS_ERR(p->conn)) {
- ret = PTR_ERR(p->conn);
- p->conn = NULL;
- kfree(p);
- sockfd_put(csocket);
- sockfd_put(csocket);
- return ret;
- }
+ p9_conn_create(client);
return 0;
}
/**
- * p9_mux_destroy - cancels all pending requests and frees mux resources
+ * p9_mux_destroy - cancels all pending requests of mux
* @m: mux to destroy
*
*/
@@ -849,7 +854,6 @@ static void p9_conn_destroy(struct p9_conn *m)
p9_conn_cancel(m, -ECONNRESET);
m->client = NULL;
- kfree(m);
}
/**
@@ -871,7 +875,7 @@ static void p9_fd_close(struct p9_client *client)
client->status = Disconnected;
- p9_conn_destroy(ts->conn);
+ p9_conn_destroy(&ts->conn);
if (ts->rd)
fput(ts->rd);
@@ -898,6 +902,24 @@ static inline int valid_ipaddr4(const char *buf)
return 0;
}
+static int p9_bind_privport(struct socket *sock)
+{
+ struct sockaddr_in cl;
+ int port, err = -EINVAL;
+
+ memset(&cl, 0, sizeof(cl));
+ cl.sin_family = AF_INET;
+ cl.sin_addr.s_addr = INADDR_ANY;
+ for (port = p9_ipport_resv_max; port >= p9_ipport_resv_min; port--) {
+ cl.sin_port = htons((ushort)port);
+ err = kernel_bind(sock, (struct sockaddr *)&cl, sizeof(cl));
+ if (err != -EADDRINUSE)
+ break;
+ }
+ return err;
+}
+
+
static int
p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args)
{
@@ -926,6 +948,16 @@ p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args)
return err;
}
+ if (opts.privport) {
+ err = p9_bind_privport(csocket);
+ if (err < 0) {
+ pr_err("%s (%d): problem binding to privport\n",
+ __func__, task_pid_nr(current));
+ sock_release(csocket);
+ return err;
+ }
+ }
+
err = csocket->ops->connect(csocket,
(struct sockaddr *)&sin_server,
sizeof(struct sockaddr_in), 0);
@@ -995,14 +1027,7 @@ p9_fd_create(struct p9_client *client, const char *addr, char *args)
return err;
p = (struct p9_trans_fd *) client->trans;
- p->conn = p9_conn_create(client);
- if (IS_ERR(p->conn)) {
- err = PTR_ERR(p->conn);
- p->conn = NULL;
- fput(p->rd);
- fput(p->wr);
- return err;
- }
+ p9_conn_create(client);
return 0;
}
@@ -1010,11 +1035,12 @@ p9_fd_create(struct p9_client *client, const char *addr, char *args)
static struct p9_trans_module p9_tcp_trans = {
.name = "tcp",
.maxsize = MAX_SOCK_BUF,
- .def = 1,
+ .def = 0,
.create = p9_fd_create_tcp,
.close = p9_fd_close,
.request = p9_fd_request,
.cancel = p9_fd_cancel,
+ .cancelled = p9_fd_cancelled,
.owner = THIS_MODULE,
};
@@ -1026,6 +1052,7 @@ static struct p9_trans_module p9_unix_trans = {
.close = p9_fd_close,
.request = p9_fd_request,
.cancel = p9_fd_cancel,
+ .cancelled = p9_fd_cancelled,
.owner = THIS_MODULE,
};
@@ -1037,6 +1064,7 @@ static struct p9_trans_module p9_fd_trans = {
.close = p9_fd_close,
.request = p9_fd_request,
.cancel = p9_fd_cancel,
+ .cancelled = p9_fd_cancelled,
.owner = THIS_MODULE,
};
@@ -1083,7 +1111,7 @@ int p9_trans_fd_init(void)
void p9_trans_fd_exit(void)
{
- flush_work_sync(&p9_poll_work);
+ flush_work(&p9_poll_work);
v9fs_unregister_trans(&p9_tcp_trans);
v9fs_unregister_trans(&p9_unix_trans);
v9fs_unregister_trans(&p9_fd_trans);
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index 2c69ddd691a..14ad43b5cf8 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -57,9 +57,7 @@
#define P9_RDMA_IRD 0
#define P9_RDMA_ORD 0
#define P9_RDMA_TIMEOUT 30000 /* 30 seconds */
-#define P9_RDMA_MAXSIZE (4*4096) /* Min SGE is 4, so we can
- * safely advertise a maxsize
- * of 64k */
+#define P9_RDMA_MAXSIZE (1024*1024) /* 1MB */
/**
* struct p9_trans_rdma - RDMA transport instance
@@ -75,7 +73,9 @@
* @sq_depth: The depth of the Send Queue
* @sq_sem: Semaphore for the SQ
* @rq_depth: The depth of the Receive Queue.
- * @rq_count: Count of requests in the Receive Queue.
+ * @rq_sem: Semaphore for the RQ
+ * @excess_rc : Amount of posted Receive Contexts without a pending request.
+ * See rdma_request()
* @addr: The remote peer's address
* @req_lock: Protects the active request list
* @cm_done: Completion event for connection management tracking
@@ -100,7 +100,8 @@ struct p9_trans_rdma {
int sq_depth;
struct semaphore sq_sem;
int rq_depth;
- atomic_t rq_count;
+ struct semaphore rq_sem;
+ atomic_t excess_rc;
struct sockaddr_in addr;
spinlock_t req_lock;
@@ -192,6 +193,8 @@ static int parse_opts(char *params, struct p9_rdma_opts *opts)
if (!*p)
continue;
token = match_token(p, tokens, args);
+ if (token == Opt_err)
+ continue;
r = match_int(&args[0], &option);
if (r < 0) {
p9_debug(P9_DEBUG_ERROR,
@@ -296,9 +299,15 @@ handle_recv(struct p9_client *client, struct p9_trans_rdma *rdma,
if (!req)
goto err_out;
+ /* Check that we have not yet received a reply for this request.
+ */
+ if (unlikely(req->rc)) {
+ pr_err("Duplicate reply for request %d", tag);
+ goto err_out;
+ }
+
req->rc = c->rc;
- req->status = REQ_STATUS_RCVD;
- p9_client_cb(client, req);
+ p9_client_cb(client, req, REQ_STATUS_RCVD);
return;
@@ -336,8 +345,8 @@ static void cq_comp_handler(struct ib_cq *cq, void *cq_context)
switch (c->wc_op) {
case IB_WC_RECV:
- atomic_dec(&rdma->rq_count);
handle_recv(client, rdma, c, wc.status, wc.byte_len);
+ up(&rdma->rq_sem);
break;
case IB_WC_SEND:
@@ -421,32 +430,33 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
struct p9_rdma_context *c = NULL;
struct p9_rdma_context *rpl_context = NULL;
+ /* When an error occurs between posting the recv and the send,
+ * there will be a receive context posted without a pending request.
+ * Since there is no way to "un-post" it, we remember it and skip
+ * post_recv() for the next request.
+ * So here,
+ * see if we are this `next request' and need to absorb an excess rc.
+ * If yes, then drop and free our own, and do not recv_post().
+ **/
+ if (unlikely(atomic_read(&rdma->excess_rc) > 0)) {
+ if ((atomic_sub_return(1, &rdma->excess_rc) >= 0)) {
+ /* Got one ! */
+ kfree(req->rc);
+ req->rc = NULL;
+ goto dont_need_post_recv;
+ } else {
+ /* We raced and lost. */
+ atomic_inc(&rdma->excess_rc);
+ }
+ }
+
/* Allocate an fcall for the reply */
rpl_context = kmalloc(sizeof *rpl_context, GFP_NOFS);
if (!rpl_context) {
err = -ENOMEM;
- goto err_close;
- }
-
- /*
- * If the request has a buffer, steal it, otherwise
- * allocate a new one. Typically, requests should already
- * have receive buffers allocated and just swap them around
- */
- if (!req->rc) {
- req->rc = kmalloc(sizeof(struct p9_fcall)+client->msize,
- GFP_NOFS);
- if (req->rc) {
- req->rc->sdata = (char *) req->rc +
- sizeof(struct p9_fcall);
- req->rc->capacity = client->msize;
- }
+ goto recv_error;
}
rpl_context->rc = req->rc;
- if (!rpl_context->rc) {
- err = -ENOMEM;
- goto err_free2;
- }
/*
* Post a receive buffer for this request. We need to ensure
@@ -455,29 +465,35 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
* outstanding request, so we must keep a count to avoid
* overflowing the RQ.
*/
- if (atomic_inc_return(&rdma->rq_count) <= rdma->rq_depth) {
- err = post_recv(client, rpl_context);
- if (err)
- goto err_free1;
- } else
- atomic_dec(&rdma->rq_count);
+ if (down_interruptible(&rdma->rq_sem)) {
+ err = -EINTR;
+ goto recv_error;
+ }
+ err = post_recv(client, rpl_context);
+ if (err) {
+ p9_debug(P9_DEBUG_FCALL, "POST RECV failed\n");
+ goto recv_error;
+ }
/* remove posted receive buffer from request structure */
req->rc = NULL;
+dont_need_post_recv:
/* Post the request */
c = kmalloc(sizeof *c, GFP_NOFS);
if (!c) {
err = -ENOMEM;
- goto err_free1;
+ goto send_error;
}
c->req = req;
c->busa = ib_dma_map_single(rdma->cm_id->device,
c->req->tc->sdata, c->req->tc->size,
DMA_TO_DEVICE);
- if (ib_dma_mapping_error(rdma->cm_id->device, c->busa))
- goto error;
+ if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) {
+ err = -EIO;
+ goto send_error;
+ }
sge.addr = c->busa;
sge.length = c->req->tc->size;
@@ -491,22 +507,38 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req)
wr.sg_list = &sge;
wr.num_sge = 1;
- if (down_interruptible(&rdma->sq_sem))
- goto error;
+ if (down_interruptible(&rdma->sq_sem)) {
+ err = -EINTR;
+ goto send_error;
+ }
- return ib_post_send(rdma->qp, &wr, &bad_wr);
+ /* Mark request as `sent' *before* we actually send it,
+ * because doing if after could erase the REQ_STATUS_RCVD
+ * status in case of a very fast reply.
+ */
+ req->status = REQ_STATUS_SENT;
+ err = ib_post_send(rdma->qp, &wr, &bad_wr);
+ if (err)
+ goto send_error;
- error:
+ /* Success */
+ return 0;
+
+ /* Handle errors that happened during or while preparing the send: */
+ send_error:
+ req->status = REQ_STATUS_ERROR;
kfree(c);
- kfree(rpl_context->rc);
- kfree(rpl_context);
- p9_debug(P9_DEBUG_ERROR, "EIO\n");
- return -EIO;
- err_free1:
- kfree(rpl_context->rc);
- err_free2:
+ p9_debug(P9_DEBUG_ERROR, "Error %d in rdma_request()\n", err);
+
+ /* Ach.
+ * We did recv_post(), but not send. We have one recv_post in excess.
+ */
+ atomic_inc(&rdma->excess_rc);
+ return err;
+
+ /* Handle errors that happened during or while preparing post_recv(): */
+ recv_error:
kfree(rpl_context);
- err_close:
spin_lock_irqsave(&rdma->req_lock, flags);
if (rdma->state < P9_RDMA_CLOSING) {
rdma->state = P9_RDMA_CLOSING;
@@ -551,17 +583,30 @@ static struct p9_trans_rdma *alloc_rdma(struct p9_rdma_opts *opts)
spin_lock_init(&rdma->req_lock);
init_completion(&rdma->cm_done);
sema_init(&rdma->sq_sem, rdma->sq_depth);
- atomic_set(&rdma->rq_count, 0);
+ sema_init(&rdma->rq_sem, rdma->rq_depth);
+ atomic_set(&rdma->excess_rc, 0);
return rdma;
}
-/* its not clear to me we can do anything after send has been posted */
static int rdma_cancel(struct p9_client *client, struct p9_req_t *req)
{
+ /* Nothing to do here.
+ * We will take care of it (if we have to) in rdma_cancelled()
+ */
return 1;
}
+/* A request has been fully flushed without a reply.
+ * That means we have posted one buffer in excess.
+ */
+static int rdma_cancelled(struct p9_client *client, struct p9_req_t *req)
+{
+ struct p9_trans_rdma *rdma = client->trans;
+ atomic_inc(&rdma->excess_rc);
+ return 0;
+}
+
/**
* trans_create_rdma - Transport method for creating atransport instance
* @client: client instance
@@ -695,6 +740,7 @@ static struct p9_trans_module p9_rdma_trans = {
.close = rdma_close,
.request = rdma_request,
.cancel = rdma_cancel,
+ .cancelled = rdma_cancelled,
};
/**
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 3d432068f62..6940d8fe897 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -39,6 +39,7 @@
#include <linux/inet.h>
#include <linux/idr.h>
#include <linux/file.h>
+#include <linux/highmem.h>
#include <linux/slab.h>
#include <net/9p/9p.h>
#include <linux/parser.h>
@@ -86,7 +87,7 @@ struct virtio_chan {
/* This is global limit. Since we don't have a global structure,
* will be placing it in each channel.
*/
- int p9_max_pages;
+ unsigned long p9_max_pages;
/* Scatterlist: can be too big for stack. */
struct scatterlist sg[VIRTQUEUE_NUM];
@@ -163,8 +164,7 @@ static void req_done(struct virtqueue *vq)
p9_debug(P9_DEBUG_TRANS, ": rc %p\n", rc);
p9_debug(P9_DEBUG_TRANS, ": lookup tag %d\n", rc->tag);
req = p9_tag_lookup(chan->client, rc->tag);
- req->status = REQ_STATUS_RCVD;
- p9_client_cb(chan->client, req);
+ p9_client_cb(chan->client, req, REQ_STATUS_RCVD);
}
}
@@ -192,12 +192,15 @@ static int pack_sg_list(struct scatterlist *sg, int start,
s = rest_of_page(data);
if (s > count)
s = count;
+ BUG_ON(index > limit);
+ /* Make sure we don't terminate early. */
+ sg_unmark_end(&sg[index]);
sg_set_buf(&sg[index++], data, s);
count -= s;
data += s;
- BUG_ON(index > limit);
}
-
+ if (index-start)
+ sg_mark_end(&sg[index - 1]);
return index-start;
}
@@ -212,7 +215,7 @@ static int p9_virtio_cancel(struct p9_client *client, struct p9_req_t *req)
* this takes a list of pages.
* @sg: scatter/gather list to pack into
* @start: which segment of the sg_list to start at
- * @**pdata: a list of pages to add into sg.
+ * @pdata: a list of pages to add into sg.
* @nr_pages: number of pages to pack into the scatter/gather list
* @data: data to pack into scatter/gather list
* @count: amount of data to pack into the scatter/gather list
@@ -235,12 +238,17 @@ pack_sg_list_p(struct scatterlist *sg, int start, int limit,
s = rest_of_page(data);
if (s > count)
s = count;
+ /* Make sure we don't terminate early. */
+ sg_unmark_end(&sg[index]);
sg_set_page(&sg[index++], pdata[i++], s, data_off);
data_off = 0;
data += s;
count -= s;
nr_pages--;
}
+
+ if (index-start)
+ sg_mark_end(&sg[index - 1]);
return index - start;
}
@@ -255,9 +263,10 @@ static int
p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
{
int err;
- int in, out;
+ int in, out, out_sgs, in_sgs;
unsigned long flags;
struct virtio_chan *chan = client->trans;
+ struct scatterlist *sgs[2];
p9_debug(P9_DEBUG_TRANS, "9p debug: virtio request\n");
@@ -265,14 +274,19 @@ p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
req_retry:
spin_lock_irqsave(&chan->lock, flags);
+ out_sgs = in_sgs = 0;
/* Handle out VirtIO ring buffers */
out = pack_sg_list(chan->sg, 0,
VIRTQUEUE_NUM, req->tc->sdata, req->tc->size);
+ if (out)
+ sgs[out_sgs++] = chan->sg;
in = pack_sg_list(chan->sg, out,
VIRTQUEUE_NUM, req->rc->sdata, req->rc->capacity);
+ if (in)
+ sgs[out_sgs + in_sgs++] = chan->sg + out;
- err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc,
+ err = virtqueue_add_sgs(chan->vq, sgs, out_sgs, in_sgs, req->tc,
GFP_ATOMIC);
if (err < 0) {
if (err == -ENOSPC) {
@@ -288,7 +302,7 @@ req_retry:
} else {
spin_unlock_irqrestore(&chan->lock, flags);
p9_debug(P9_DEBUG_TRANS,
- "virtio rpc add_buf returned failure\n");
+ "virtio rpc add_sgs returned failure\n");
return -EIO;
}
}
@@ -325,7 +339,10 @@ static int p9_get_mapped_pages(struct virtio_chan *chan,
int count = nr_pages;
while (nr_pages) {
s = rest_of_page(data);
- pages[index++] = virt_to_page(data);
+ if (is_vmalloc_addr(data))
+ pages[index++] = vmalloc_to_page(data);
+ else
+ pages[index++] = kmap_to_page(data);
data += s;
nr_pages--;
}
@@ -350,11 +367,12 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
char *uidata, char *uodata, int inlen,
int outlen, int in_hdr_len, int kern_buf)
{
- int in, out, err;
+ int in, out, err, out_sgs, in_sgs;
unsigned long flags;
int in_nr_pages = 0, out_nr_pages = 0;
struct page **in_pages = NULL, **out_pages = NULL;
struct virtio_chan *chan = client->trans;
+ struct scatterlist *sgs[4];
p9_debug(P9_DEBUG_TRANS, "virtio request\n");
@@ -395,13 +413,22 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
req->status = REQ_STATUS_SENT;
req_retry_pinned:
spin_lock_irqsave(&chan->lock, flags);
+
+ out_sgs = in_sgs = 0;
+
/* out data */
out = pack_sg_list(chan->sg, 0,
VIRTQUEUE_NUM, req->tc->sdata, req->tc->size);
- if (out_pages)
+ if (out)
+ sgs[out_sgs++] = chan->sg;
+
+ if (out_pages) {
+ sgs[out_sgs++] = chan->sg + out;
out += pack_sg_list_p(chan->sg, out, VIRTQUEUE_NUM,
out_pages, out_nr_pages, uodata, outlen);
+ }
+
/*
* Take care of in data
* For example TREAD have 11.
@@ -411,11 +438,17 @@ req_retry_pinned:
*/
in = pack_sg_list(chan->sg, out,
VIRTQUEUE_NUM, req->rc->sdata, in_hdr_len);
- if (in_pages)
+ if (in)
+ sgs[out_sgs + in_sgs++] = chan->sg + out;
+
+ if (in_pages) {
+ sgs[out_sgs + in_sgs++] = chan->sg + out + in;
in += pack_sg_list_p(chan->sg, out + in, VIRTQUEUE_NUM,
in_pages, in_nr_pages, uidata, inlen);
+ }
- err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc,
+ BUG_ON(out_sgs + in_sgs > ARRAY_SIZE(sgs));
+ err = virtqueue_add_sgs(chan->vq, sgs, out_sgs, in_sgs, req->tc,
GFP_ATOMIC);
if (err < 0) {
if (err == -ENOSPC) {
@@ -431,7 +464,7 @@ req_retry_pinned:
} else {
spin_unlock_irqrestore(&chan->lock, flags);
p9_debug(P9_DEBUG_TRANS,
- "virtio rpc add_buf returned failure\n");
+ "virtio rpc add_sgs returned failure\n");
err = -EIO;
goto err_out;
}
@@ -513,9 +546,7 @@ static int p9_virtio_probe(struct virtio_device *vdev)
chan->inuse = false;
if (virtio_has_feature(vdev, VIRTIO_9P_MOUNT_TAG)) {
- vdev->config->get(vdev,
- offsetof(struct virtio_9p_config, tag_len),
- &tag_len, sizeof(tag_len));
+ virtio_cread(vdev, struct virtio_9p_config, tag_len, &tag_len);
} else {
err = -EINVAL;
goto out_free_vq;
@@ -525,8 +556,9 @@ static int p9_virtio_probe(struct virtio_device *vdev)
err = -ENOMEM;
goto out_free_vq;
}
- vdev->config->get(vdev, offsetof(struct virtio_9p_config, tag),
- tag, tag_len);
+
+ virtio_cread_bytes(vdev, offsetof(struct virtio_9p_config, tag),
+ tag, tag_len);
chan->tag = tag;
chan->tag_len = tag_len;
err = sysfs_create_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr);
@@ -546,6 +578,10 @@ static int p9_virtio_probe(struct virtio_device *vdev)
mutex_lock(&virtio_9p_lock);
list_add_tail(&chan->chan_list, &virtio_chan_list);
mutex_unlock(&virtio_9p_lock);
+
+ /* Let udev rules use the new mount_tag attribute. */
+ kobject_uevent(&(vdev->dev.kobj), KOBJ_CHANGE);
+
return 0;
out_free_tag:
@@ -615,13 +651,15 @@ static void p9_virtio_remove(struct virtio_device *vdev)
{
struct virtio_chan *chan = vdev->priv;
- BUG_ON(chan->inuse);
+ if (chan->inuse)
+ p9_virtio_close(chan->client);
vdev->config->del_vqs(vdev);
mutex_lock(&virtio_9p_lock);
list_del(&chan->chan_list);
mutex_unlock(&virtio_9p_lock);
sysfs_remove_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr);
+ kobject_uevent(&(vdev->dev.kobj), KOBJ_CHANGE);
kfree(chan->tag);
kfree(chan->vc_wq);
kfree(chan);
@@ -662,7 +700,7 @@ static struct p9_trans_module p9_virtio_trans = {
* page in zero copy.
*/
.maxsize = PAGE_SIZE * (VIRTQUEUE_NUM - 3),
- .def = 0,
+ .def = 1,
.owner = THIS_MODULE,
};
diff --git a/net/9p/util.c b/net/9p/util.c
index 6ceeeb384de..59f278e64f5 100644
--- a/net/9p/util.c
+++ b/net/9p/util.c
@@ -87,23 +87,18 @@ EXPORT_SYMBOL(p9_idpool_destroy);
int p9_idpool_get(struct p9_idpool *p)
{
- int i = 0;
- int error;
+ int i;
unsigned long flags;
-retry:
- if (idr_pre_get(&p->pool, GFP_NOFS) == 0)
- return -1;
-
+ idr_preload(GFP_NOFS);
spin_lock_irqsave(&p->lock, flags);
/* no need to store exactly p, we just need something non-null */
- error = idr_get_new(&p->pool, p, &i);
- spin_unlock_irqrestore(&p->lock, flags);
+ i = idr_alloc(&p->pool, p, 0, 0, GFP_NOWAIT);
- if (error == -EAGAIN)
- goto retry;
- else if (error)
+ spin_unlock_irqrestore(&p->lock, flags);
+ idr_preload_end();
+ if (i < 0)
return -1;
p9_debug(P9_DEBUG_MUX, " id %d pool %p\n", i, p);