diff options
Diffstat (limited to 'net/9p')
| -rw-r--r-- | net/9p/Kconfig | 2 | ||||
| -rw-r--r-- | net/9p/client.c | 241 | ||||
| -rw-r--r-- | net/9p/error.c | 4 | ||||
| -rw-r--r-- | net/9p/protocol.c | 51 | ||||
| -rw-r--r-- | net/9p/trans_common.c | 10 | ||||
| -rw-r--r-- | net/9p/trans_fd.c | 212 | ||||
| -rw-r--r-- | net/9p/trans_rdma.c | 148 | ||||
| -rw-r--r-- | net/9p/trans_virtio.c | 82 | ||||
| -rw-r--r-- | net/9p/util.c | 17 |
9 files changed, 480 insertions, 287 deletions
diff --git a/net/9p/Kconfig b/net/9p/Kconfig index d9ea09b11cf..a75174a3372 100644 --- a/net/9p/Kconfig +++ b/net/9p/Kconfig @@ -23,7 +23,7 @@ config NET_9P_VIRTIO guest partitions and a host partition. config NET_9P_RDMA - depends on INET && INFINIBAND && INFINIBAND_ADDR_TRANS && EXPERIMENTAL + depends on INET && INFINIBAND && INFINIBAND_ADDR_TRANS tristate "9P RDMA Transport (Experimental)" help This builds support for an RDMA transport. diff --git a/net/9p/client.c b/net/9p/client.c index 776618cd2be..0004cbaac4a 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -76,6 +76,20 @@ inline int p9_is_proto_dotu(struct p9_client *clnt) } EXPORT_SYMBOL(p9_is_proto_dotu); +/* + * Some error codes are taken directly from the server replies, + * make sure they are valid. + */ +static int safe_errno(int err) +{ + if ((err > 0) || (err < -MAX_ERRNO)) { + p9_debug(P9_DEBUG_ERROR, "Invalid error code %d\n", err); + return -EPROTO; + } + return err; +} + + /* Interpret mount option for protocol version */ static int get_protocol_version(char *s) { @@ -113,7 +127,7 @@ static int parse_opts(char *opts, struct p9_client *clnt) char *s; int ret = 0; - clnt->proto_version = p9_proto_2000u; + clnt->proto_version = p9_proto_2000L; clnt->msize = 8192; if (!opts) @@ -190,6 +204,17 @@ free_and_return: return ret; } +static struct p9_fcall *p9_fcall_alloc(int alloc_msize) +{ + struct p9_fcall *fc; + fc = kmalloc(sizeof(struct p9_fcall) + alloc_msize, GFP_NOFS); + if (!fc) + return NULL; + fc->capacity = alloc_msize; + fc->sdata = (char *) fc + sizeof(struct p9_fcall); + return fc; +} + /** * p9_tag_alloc - lookup/allocate a request by tag * @c: client session to lookup tag within @@ -242,39 +267,36 @@ p9_tag_alloc(struct p9_client *c, u16 tag, unsigned int max_size) col = tag % P9_ROW_MAXTAG; req = &c->reqs[row][col]; - if (!req->tc) { + if (!req->wq) { req->wq = kmalloc(sizeof(wait_queue_head_t), GFP_NOFS); - if (!req->wq) { - pr_err("Couldn't grow tag array\n"); - return ERR_PTR(-ENOMEM); - } + if (!req->wq) + goto grow_failed; init_waitqueue_head(req->wq); - req->tc = kmalloc(sizeof(struct p9_fcall) + alloc_msize, - GFP_NOFS); - req->rc = kmalloc(sizeof(struct p9_fcall) + alloc_msize, - GFP_NOFS); - if ((!req->tc) || (!req->rc)) { - pr_err("Couldn't grow tag array\n"); - kfree(req->tc); - kfree(req->rc); - kfree(req->wq); - req->tc = req->rc = NULL; - req->wq = NULL; - return ERR_PTR(-ENOMEM); - } - req->tc->capacity = alloc_msize; - req->rc->capacity = alloc_msize; - req->tc->sdata = (char *) req->tc + sizeof(struct p9_fcall); - req->rc->sdata = (char *) req->rc + sizeof(struct p9_fcall); } + if (!req->tc) + req->tc = p9_fcall_alloc(alloc_msize); + if (!req->rc) + req->rc = p9_fcall_alloc(alloc_msize); + if (!req->tc || !req->rc) + goto grow_failed; + p9pdu_reset(req->tc); p9pdu_reset(req->rc); req->tc->tag = tag-1; req->status = REQ_STATUS_ALLOC; - return &c->reqs[row][col]; + return req; + +grow_failed: + pr_err("Couldn't grow tag array\n"); + kfree(req->tc); + kfree(req->rc); + kfree(req->wq); + req->tc = req->rc = NULL; + req->wq = NULL; + return ERR_PTR(-ENOMEM); } /** @@ -393,9 +415,17 @@ static void p9_free_req(struct p9_client *c, struct p9_req_t *r) * req: request received * */ -void p9_client_cb(struct p9_client *c, struct p9_req_t *req) +void p9_client_cb(struct p9_client *c, struct p9_req_t *req, int status) { p9_debug(P9_DEBUG_MUX, " tag %d\n", req->tc->tag); + + /* + * This barrier is needed to make sure any change made to req before + * the other thread wakes up will indeed be seen by the waiting side. + */ + smp_wmb(); + req->status = status; + wake_up(req->wq); p9_debug(P9_DEBUG_MUX, "wakeup: %d\n", req->tc->tag); } @@ -548,36 +578,19 @@ static int p9_check_zc_errors(struct p9_client *c, struct p9_req_t *req, if (!p9_is_proto_dotl(c)) { /* Error is reported in string format */ - uint16_t len; - /* 7 = header size for RERROR, 2 is the size of string len; */ - int inline_len = in_hdrlen - (7 + 2); - - /* Read the size of error string */ - err = p9pdu_readf(req->rc, c->proto_version, "w", &len); - if (err) - goto out_err; + int len; + /* 7 = header size for RERROR; */ + int inline_len = in_hdrlen - 7; - ename = kmalloc(len + 1, GFP_NOFS); - if (!ename) { - err = -ENOMEM; + len = req->rc->size - req->rc->offset; + if (len > (P9_ZC_HDR_SZ - 7)) { + err = -EFAULT; goto out_err; } - if (len <= inline_len) { - /* We have error in protocol buffer itself */ - if (pdu_read(req->rc, ename, len)) { - err = -EFAULT; - goto out_free; - } - } else { - /* - * Part of the data is in user space buffer. - */ - if (pdu_read(req->rc, ename, inline_len)) { - err = -EFAULT; - goto out_free; - - } + ename = &req->rc->sdata[req->rc->offset]; + if (len > inline_len) { + /* We have error in external buffer */ if (kern_buf) { memcpy(ename + inline_len, uidata, len - inline_len); @@ -586,19 +599,19 @@ static int p9_check_zc_errors(struct p9_client *c, struct p9_req_t *req, uidata, len - inline_len); if (err) { err = -EFAULT; - goto out_free; + goto out_err; } } } - ename[len] = 0; - if (p9_is_proto_dotu(c)) { - /* For dotu we also have error code */ - err = p9pdu_readf(req->rc, - c->proto_version, "d", &ecode); - if (err) - goto out_free; + ename = NULL; + err = p9pdu_readf(req->rc, c->proto_version, "s?d", + &ename, &ecode); + if (err) + goto out_err; + + if (p9_is_proto_dotu(c)) err = -ecode; - } + if (!err || !IS_ERR_VALUE(err)) { err = p9_errstr2errno(ename, strlen(ename)); @@ -614,8 +627,6 @@ static int p9_check_zc_errors(struct p9_client *c, struct p9_req_t *req, } return err; -out_free: - kfree(ename); out_err: p9_debug(P9_DEBUG_ERROR, "couldn't parse error%d\n", err); return err; @@ -652,13 +663,13 @@ static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq) if (IS_ERR(req)) return PTR_ERR(req); - - /* if we haven't received a response for oldreq, - remove it from the list. */ - spin_lock(&c->lock); - if (oldreq->status == REQ_STATUS_FLSH) - list_del(&oldreq->req_list); - spin_unlock(&c->lock); + /* + * if we haven't received a response for oldreq, + * remove it from the list + */ + if (oldreq->status == REQ_STATUS_SENT) + if (c->trans_mod->cancelled) + c->trans_mod->cancelled(c, oldreq); p9_free_req(c, req); return 0; @@ -740,10 +751,24 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) c->status = Disconnected; goto reterr; } +again: /* Wait for the response */ err = wait_event_interruptible(*req->wq, req->status >= REQ_STATUS_RCVD); + /* + * Make sure our req is coherent with regard to updates in other + * threads - echoes to wmb() in the callback + */ + smp_rmb(); + + if ((err == -ERESTARTSYS) && (c->status == Connected) + && (type == P9_TFLUSH)) { + sigpending = 1; + clear_thread_flag(TIF_SIGPENDING); + goto again; + } + if (req->status == REQ_STATUS_ERROR) { p9_debug(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err); err = req->t_err; @@ -774,7 +799,7 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...) return req; reterr: p9_free_req(c, req); - return ERR_PTR(err); + return ERR_PTR(safe_errno(err)); } /** @@ -857,7 +882,7 @@ static struct p9_req_t *p9_client_zc_rpc(struct p9_client *c, int8_t type, return req; reterr: p9_free_req(c, req); - return ERR_PTR(err); + return ERR_PTR(safe_errno(err)); } static struct p9_fid *p9_fid_create(struct p9_client *clnt) @@ -973,6 +998,7 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) { int err; struct p9_client *clnt; + char *client_id; err = 0; clnt = kmalloc(sizeof(struct p9_client), GFP_KERNEL); @@ -981,6 +1007,10 @@ struct p9_client *p9_client_create(const char *dev_name, char *options) clnt->trans_mod = NULL; clnt->trans = NULL; + + client_id = utsname()->nodename; + memcpy(clnt->name, client_id, strlen(client_id) + 1); + spin_lock_init(&clnt->lock); INIT_LIST_HEAD(&clnt->fidlist); @@ -1078,7 +1108,7 @@ void p9_client_begin_disconnect(struct p9_client *clnt) EXPORT_SYMBOL(p9_client_begin_disconnect); struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid, - char *uname, u32 n_uname, char *aname) + char *uname, kuid_t n_uname, char *aname) { int err = 0; struct p9_req_t *req; @@ -1095,7 +1125,7 @@ struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid, goto error; } - req = p9_client_rpc(clnt, P9_TATTACH, "ddss?d", fid->fid, + req = p9_client_rpc(clnt, P9_TATTACH, "ddss?u", fid->fid, afid ? afid->fid : P9_NOFID, uname, aname, n_uname); if (IS_ERR(req)) { err = PTR_ERR(req); @@ -1248,7 +1278,7 @@ error: EXPORT_SYMBOL(p9_client_open); int p9_client_create_dotl(struct p9_fid *ofid, char *name, u32 flags, u32 mode, - gid_t gid, struct p9_qid *qid) + kgid_t gid, struct p9_qid *qid) { int err = 0; struct p9_client *clnt; @@ -1257,13 +1287,14 @@ int p9_client_create_dotl(struct p9_fid *ofid, char *name, u32 flags, u32 mode, p9_debug(P9_DEBUG_9P, ">>> TLCREATE fid %d name %s flags %d mode %d gid %d\n", - ofid->fid, name, flags, mode, gid); + ofid->fid, name, flags, mode, + from_kgid(&init_user_ns, gid)); clnt = ofid->clnt; if (ofid->mode != -1) return -EINVAL; - req = p9_client_rpc(clnt, P9_TLCREATE, "dsddd", ofid->fid, name, flags, + req = p9_client_rpc(clnt, P9_TLCREATE, "dsddg", ofid->fid, name, flags, mode, gid); if (IS_ERR(req)) { err = PTR_ERR(req); @@ -1336,7 +1367,7 @@ error: } EXPORT_SYMBOL(p9_client_fcreate); -int p9_client_symlink(struct p9_fid *dfid, char *name, char *symtgt, gid_t gid, +int p9_client_symlink(struct p9_fid *dfid, char *name, char *symtgt, kgid_t gid, struct p9_qid *qid) { int err = 0; @@ -1347,7 +1378,7 @@ int p9_client_symlink(struct p9_fid *dfid, char *name, char *symtgt, gid_t gid, dfid->fid, name, symtgt); clnt = dfid->clnt; - req = p9_client_rpc(clnt, P9_TSYMLINK, "dssd", dfid->fid, name, symtgt, + req = p9_client_rpc(clnt, P9_TSYMLINK, "dssg", dfid->fid, name, symtgt, gid); if (IS_ERR(req)) { err = PTR_ERR(req); @@ -1420,6 +1451,7 @@ int p9_client_clunk(struct p9_fid *fid) int err; struct p9_client *clnt; struct p9_req_t *req; + int retries = 0; if (!fid) { pr_warn("%s (%d): Trying to clunk with NULL fid\n", @@ -1428,7 +1460,9 @@ int p9_client_clunk(struct p9_fid *fid) return 0; } - p9_debug(P9_DEBUG_9P, ">>> TCLUNK fid %d\n", fid->fid); +again: + p9_debug(P9_DEBUG_9P, ">>> TCLUNK fid %d (try %d)\n", fid->fid, + retries); err = 0; clnt = fid->clnt; @@ -1444,8 +1478,14 @@ int p9_client_clunk(struct p9_fid *fid) error: /* * Fid is not valid even after a failed clunk + * If interrupted, retry once then give up and + * leak fid until umount. */ - p9_fid_destroy(fid); + if (err == -ERESTARTSYS) { + if (retries++ == 0) + goto again; + } else + p9_fid_destroy(fid); return err; } EXPORT_SYMBOL(p9_client_clunk); @@ -1470,7 +1510,10 @@ int p9_client_remove(struct p9_fid *fid) p9_free_req(clnt, req); error: - p9_fid_destroy(fid); + if (err == -ERESTARTSYS) + p9_client_clunk(fid); + else + p9_fid_destroy(fid); return err; } EXPORT_SYMBOL(p9_client_remove); @@ -1510,7 +1553,7 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset, p9_debug(P9_DEBUG_9P, ">>> TREAD fid %d offset %llu %d\n", - fid->fid, (long long unsigned) offset, count); + fid->fid, (unsigned long long) offset, count); err = 0; clnt = fid->clnt; @@ -1528,7 +1571,7 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset, kernel_buf = 1; indata = data; } else - indata = (char *)udata; + indata = (__force char *)udata; /* * response header len is 11 * PDU Header(7) + IO Size (4) @@ -1585,7 +1628,7 @@ p9_client_write(struct p9_fid *fid, char *data, const char __user *udata, struct p9_req_t *req; p9_debug(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu count %d\n", - fid->fid, (long long unsigned) offset, count); + fid->fid, (unsigned long long) offset, count); err = 0; clnt = fid->clnt; @@ -1676,7 +1719,9 @@ struct p9_wstat *p9_client_stat(struct p9_fid *fid) (unsigned long long)ret->qid.path, ret->qid.version, ret->mode, ret->atime, ret->mtime, (unsigned long long)ret->length, ret->name, ret->uid, ret->gid, ret->muid, ret->extension, - ret->n_uid, ret->n_gid, ret->n_muid); + from_kuid(&init_user_ns, ret->n_uid), + from_kgid(&init_user_ns, ret->n_gid), + from_kuid(&init_user_ns, ret->n_muid)); p9_free_req(clnt, req); return ret; @@ -1730,8 +1775,10 @@ struct p9_stat_dotl *p9_client_getattr_dotl(struct p9_fid *fid, "<<< st_btime_sec=%lld st_btime_nsec=%lld\n" "<<< st_gen=%lld st_data_version=%lld", ret->st_result_mask, ret->qid.type, ret->qid.path, - ret->qid.version, ret->st_mode, ret->st_nlink, ret->st_uid, - ret->st_gid, ret->st_rdev, ret->st_size, ret->st_blksize, + ret->qid.version, ret->st_mode, ret->st_nlink, + from_kuid(&init_user_ns, ret->st_uid), + from_kgid(&init_user_ns, ret->st_gid), + ret->st_rdev, ret->st_size, ret->st_blksize, ret->st_blocks, ret->st_atime_sec, ret->st_atime_nsec, ret->st_mtime_sec, ret->st_mtime_nsec, ret->st_ctime_sec, ret->st_ctime_nsec, ret->st_btime_sec, ret->st_btime_nsec, @@ -1794,7 +1841,9 @@ int p9_client_wstat(struct p9_fid *fid, struct p9_wstat *wst) (unsigned long long)wst->qid.path, wst->qid.version, wst->mode, wst->atime, wst->mtime, (unsigned long long)wst->length, wst->name, wst->uid, wst->gid, wst->muid, wst->extension, - wst->n_uid, wst->n_gid, wst->n_muid); + from_kuid(&init_user_ns, wst->n_uid), + from_kgid(&init_user_ns, wst->n_gid), + from_kuid(&init_user_ns, wst->n_muid)); req = p9_client_rpc(clnt, P9_TWSTAT, "dwS", fid->fid, wst->size+2, wst); if (IS_ERR(req)) { @@ -1823,7 +1872,9 @@ int p9_client_setattr(struct p9_fid *fid, struct p9_iattr_dotl *p9attr) " valid=%x mode=%x uid=%d gid=%d size=%lld\n" " atime_sec=%lld atime_nsec=%lld\n" " mtime_sec=%lld mtime_nsec=%lld\n", - p9attr->valid, p9attr->mode, p9attr->uid, p9attr->gid, + p9attr->valid, p9attr->mode, + from_kuid(&init_user_ns, p9attr->uid), + from_kgid(&init_user_ns, p9attr->gid), p9attr->size, p9attr->atime_sec, p9attr->atime_nsec, p9attr->mtime_sec, p9attr->mtime_nsec); @@ -2020,7 +2071,7 @@ int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset) char *dataptr; p9_debug(P9_DEBUG_9P, ">>> TREADDIR fid %d offset %llu count %d\n", - fid->fid, (long long unsigned) offset, count); + fid->fid, (unsigned long long) offset, count); err = 0; clnt = fid->clnt; @@ -2072,7 +2123,7 @@ error: EXPORT_SYMBOL(p9_client_readdir); int p9_client_mknod_dotl(struct p9_fid *fid, char *name, int mode, - dev_t rdev, gid_t gid, struct p9_qid *qid) + dev_t rdev, kgid_t gid, struct p9_qid *qid) { int err; struct p9_client *clnt; @@ -2082,7 +2133,7 @@ int p9_client_mknod_dotl(struct p9_fid *fid, char *name, int mode, clnt = fid->clnt; p9_debug(P9_DEBUG_9P, ">>> TMKNOD fid %d name %s mode %d major %d " "minor %d\n", fid->fid, name, mode, MAJOR(rdev), MINOR(rdev)); - req = p9_client_rpc(clnt, P9_TMKNOD, "dsdddd", fid->fid, name, mode, + req = p9_client_rpc(clnt, P9_TMKNOD, "dsdddg", fid->fid, name, mode, MAJOR(rdev), MINOR(rdev), gid); if (IS_ERR(req)) return PTR_ERR(req); @@ -2103,7 +2154,7 @@ error: EXPORT_SYMBOL(p9_client_mknod_dotl); int p9_client_mkdir_dotl(struct p9_fid *fid, char *name, int mode, - gid_t gid, struct p9_qid *qid) + kgid_t gid, struct p9_qid *qid) { int err; struct p9_client *clnt; @@ -2112,8 +2163,8 @@ int p9_client_mkdir_dotl(struct p9_fid *fid, char *name, int mode, err = 0; clnt = fid->clnt; p9_debug(P9_DEBUG_9P, ">>> TMKDIR fid %d name %s mode %d gid %d\n", - fid->fid, name, mode, gid); - req = p9_client_rpc(clnt, P9_TMKDIR, "dsdd", fid->fid, name, mode, + fid->fid, name, mode, from_kgid(&init_user_ns, gid)); + req = p9_client_rpc(clnt, P9_TMKDIR, "dsdg", fid->fid, name, mode, gid); if (IS_ERR(req)) return PTR_ERR(req); diff --git a/net/9p/error.c b/net/9p/error.c index 2ab2de76010..126fd0dceea 100644 --- a/net/9p/error.c +++ b/net/9p/error.c @@ -221,15 +221,13 @@ EXPORT_SYMBOL(p9_error_init); int p9_errstr2errno(char *errstr, int len) { int errno; - struct hlist_node *p; struct errormap *c; int bucket; errno = 0; - p = NULL; c = NULL; bucket = jhash(errstr, len, 0) % ERRHASHSZ; - hlist_for_each_entry(c, p, &hash_errmap[bucket], list) { + hlist_for_each_entry(c, &hash_errmap[bucket], list) { if (c->namelen == len && !memcmp(c->name, errstr, len)) { errno = c->val; break; diff --git a/net/9p/protocol.c b/net/9p/protocol.c index 9ee48cb3017..ab9127ec5b7 100644 --- a/net/9p/protocol.c +++ b/net/9p/protocol.c @@ -85,6 +85,8 @@ pdu_write_u(struct p9_fcall *pdu, const char __user *udata, size_t size) d - int32_t q - int64_t s - string + u - numeric uid + g - numeric gid S - stat Q - qid D - data blob (int32_t size followed by void *, results are not freed) @@ -163,6 +165,26 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt, (*sptr)[len] = 0; } break; + case 'u': { + kuid_t *uid = va_arg(ap, kuid_t *); + __le32 le_val; + if (pdu_read(pdu, &le_val, sizeof(le_val))) { + errcode = -EFAULT; + break; + } + *uid = make_kuid(&init_user_ns, + le32_to_cpu(le_val)); + } break; + case 'g': { + kgid_t *gid = va_arg(ap, kgid_t *); + __le32 le_val; + if (pdu_read(pdu, &le_val, sizeof(le_val))) { + errcode = -EFAULT; + break; + } + *gid = make_kgid(&init_user_ns, + le32_to_cpu(le_val)); + } break; case 'Q':{ struct p9_qid *qid = va_arg(ap, struct p9_qid *); @@ -177,11 +199,12 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt, va_arg(ap, struct p9_wstat *); memset(stbuf, 0, sizeof(struct p9_wstat)); - stbuf->n_uid = stbuf->n_gid = stbuf->n_muid = - -1; + stbuf->n_uid = stbuf->n_muid = INVALID_UID; + stbuf->n_gid = INVALID_GID; + errcode = p9pdu_readf(pdu, proto_version, - "wwdQdddqssss?sddd", + "wwdQdddqssss?sugu", &stbuf->size, &stbuf->type, &stbuf->dev, &stbuf->qid, &stbuf->mode, &stbuf->atime, @@ -294,7 +317,7 @@ p9pdu_vreadf(struct p9_fcall *pdu, int proto_version, const char *fmt, memset(stbuf, 0, sizeof(struct p9_stat_dotl)); errcode = p9pdu_readf(pdu, proto_version, - "qQdddqqqqqqqqqqqqqqq", + "qQdugqqqqqqqqqqqqqqq", &stbuf->st_result_mask, &stbuf->qid, &stbuf->st_mode, @@ -368,7 +391,7 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt, const char *sptr = va_arg(ap, const char *); uint16_t len = 0; if (sptr) - len = min_t(uint16_t, strlen(sptr), + len = min_t(size_t, strlen(sptr), USHRT_MAX); errcode = p9pdu_writef(pdu, proto_version, @@ -377,6 +400,20 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt, errcode = -EFAULT; } break; + case 'u': { + kuid_t uid = va_arg(ap, kuid_t); + __le32 val = cpu_to_le32( + from_kuid(&init_user_ns, uid)); + if (pdu_write(pdu, &val, sizeof(val))) + errcode = -EFAULT; + } break; + case 'g': { + kgid_t gid = va_arg(ap, kgid_t); + __le32 val = cpu_to_le32( + from_kgid(&init_user_ns, gid)); + if (pdu_write(pdu, &val, sizeof(val))) + errcode = -EFAULT; + } break; case 'Q':{ const struct p9_qid *qid = va_arg(ap, const struct p9_qid *); @@ -390,7 +427,7 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt, va_arg(ap, const struct p9_wstat *); errcode = p9pdu_writef(pdu, proto_version, - "wwdQdddqssss?sddd", + "wwdQdddqssss?sugu", stbuf->size, stbuf->type, stbuf->dev, &stbuf->qid, stbuf->mode, stbuf->atime, @@ -468,7 +505,7 @@ p9pdu_vwritef(struct p9_fcall *pdu, int proto_version, const char *fmt, struct p9_iattr_dotl *); errcode = p9pdu_writef(pdu, proto_version, - "ddddqqqqq", + "ddugqqqqq", p9attr->valid, p9attr->mode, p9attr->uid, diff --git a/net/9p/trans_common.c b/net/9p/trans_common.c index de8df957867..2ee3879161b 100644 --- a/net/9p/trans_common.c +++ b/net/9p/trans_common.c @@ -24,11 +24,11 @@ */ void p9_release_pages(struct page **pages, int nr_pages) { - int i = 0; - while (pages[i] && nr_pages--) { - put_page(pages[i]); - i++; - } + int i; + + for (i = 0; i < nr_pages; i++) + if (pages[i]) + put_page(pages[i]); } EXPORT_SYMBOL(p9_release_pages); diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index fccae26fa67..80d08f6664c 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -63,20 +63,7 @@ struct p9_fd_opts { int rfd; int wfd; u16 port; -}; - -/** - * struct p9_trans_fd - transport state - * @rd: reference to file to read from - * @wr: reference of file to write to - * @conn: connection state reference - * - */ - -struct p9_trans_fd { - struct file *rd; - struct file *wr; - struct p9_conn *conn; + int privport; }; /* @@ -87,12 +74,15 @@ struct p9_trans_fd { enum { /* Options that take integer arguments */ Opt_port, Opt_rfdno, Opt_wfdno, Opt_err, + /* Options that take no arguments */ + Opt_privport, }; static const match_table_t tokens = { {Opt_port, "port=%u"}, {Opt_rfdno, "rfdno=%u"}, {Opt_wfdno, "wfdno=%u"}, + {Opt_privport, "privport"}, {Opt_err, NULL}, }; @@ -155,12 +145,29 @@ struct p9_conn { unsigned long wsched; }; +/** + * struct p9_trans_fd - transport state + * @rd: reference to file to read from + * @wr: reference of file to write to + * @conn: connection state reference + * + */ + +struct p9_trans_fd { + struct file *rd; + struct file *wr; + struct p9_conn conn; +}; + static void p9_poll_workfn(struct work_struct *work); static DEFINE_SPINLOCK(p9_poll_lock); static LIST_HEAD(p9_poll_pending_list); static DECLARE_WORK(p9_poll_work, p9_poll_workfn); +static unsigned int p9_ipport_resv_min = P9_DEF_MIN_RESVPORT; +static unsigned int p9_ipport_resv_max = P9_DEF_MAX_RESVPORT; + static void p9_mux_poll_stop(struct p9_conn *m) { unsigned long flags; @@ -205,15 +212,9 @@ static void p9_conn_cancel(struct p9_conn *m, int err) m->err = err; list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) { - req->status = REQ_STATUS_ERROR; - if (!req->t_err) - req->t_err = err; list_move(&req->req_list, &cancel_list); } list_for_each_entry_safe(req, rtmp, &m->unsent_req_list, req_list) { - req->status = REQ_STATUS_ERROR; - if (!req->t_err) - req->t_err = err; list_move(&req->req_list, &cancel_list); } spin_unlock_irqrestore(&m->client->lock, flags); @@ -221,7 +222,9 @@ static void p9_conn_cancel(struct p9_conn *m, int err) list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) { p9_debug(P9_DEBUG_ERROR, "call back req %p\n", req); list_del(&req->req_list); - p9_client_cb(m->client, req); + if (!req->t_err) + req->t_err = err; + p9_client_cb(m->client, req, REQ_STATUS_ERROR); } } @@ -237,10 +240,10 @@ p9_fd_poll(struct p9_client *client, struct poll_table_struct *pt) if (!ts) return -EREMOTEIO; - if (!ts->rd->f_op || !ts->rd->f_op->poll) + if (!ts->rd->f_op->poll) return -EIO; - if (!ts->wr->f_op || !ts->wr->f_op->poll) + if (!ts->wr->f_op->poll) return -EIO; ret = ts->rd->f_op->poll(ts->rd, pt); @@ -295,6 +298,7 @@ static void p9_read_work(struct work_struct *work) { int n, err; struct p9_conn *m; + int status = REQ_STATUS_ERROR; m = container_of(work, struct p9_conn, rq); @@ -316,8 +320,7 @@ static void p9_read_work(struct work_struct *work) m->rsize - m->rpos); p9_debug(P9_DEBUG_TRANS, "mux %p got %d bytes\n", m, err); if (err == -EAGAIN) { - clear_bit(Rworksched, &m->wsched); - return; + goto end_clear; } if (err <= 0) @@ -342,8 +345,7 @@ static void p9_read_work(struct work_struct *work) "mux %p pkt: size: %d bytes tag: %d\n", m, n, tag); m->req = p9_tag_lookup(m->client, tag); - if (!m->req || (m->req->status != REQ_STATUS_SENT && - m->req->status != REQ_STATUS_FLSH)) { + if (!m->req || (m->req->status != REQ_STATUS_SENT)) { p9_debug(P9_DEBUG_ERROR, "Unexpected packet tag %d\n", tag); err = -EIO; @@ -369,29 +371,30 @@ static void p9_read_work(struct work_struct *work) p9_debug(P9_DEBUG_TRANS, "got new packet\n"); spin_lock(&m->client->lock); if (m->req->status != REQ_STATUS_ERROR) - m->req->status = REQ_STATUS_RCVD; + status = REQ_STATUS_RCVD; list_del(&m->req->req_list); spin_unlock(&m->client->lock); - p9_client_cb(m->client, m->req); + p9_client_cb(m->client, m->req, status); m->rbuf = NULL; m->rpos = 0; m->rsize = 0; m->req = NULL; } +end_clear: + clear_bit(Rworksched, &m->wsched); + if (!list_empty(&m->req_list)) { if (test_and_clear_bit(Rpending, &m->wsched)) n = POLLIN; else n = p9_fd_poll(m->client, NULL); - if (n & POLLIN) { + if ((n & POLLIN) && !test_and_set_bit(Rworksched, &m->wsched)) { p9_debug(P9_DEBUG_TRANS, "sched read work %p\n", m); schedule_work(&m->rq); - } else - clear_bit(Rworksched, &m->wsched); - } else - clear_bit(Rworksched, &m->wsched); + } + } return; error: @@ -453,12 +456,13 @@ static void p9_write_work(struct work_struct *work) } if (!m->wsize) { + spin_lock(&m->client->lock); if (list_empty(&m->unsent_req_list)) { clear_bit(Wworksched, &m->wsched); + spin_unlock(&m->client->lock); return; } - spin_lock(&m->client->lock); req = list_entry(m->unsent_req_list.next, struct p9_req_t, req_list); req->status = REQ_STATUS_SENT; @@ -476,10 +480,9 @@ static void p9_write_work(struct work_struct *work) clear_bit(Wpending, &m->wsched); err = p9_fd_write(m->client, m->wbuf + m->wpos, m->wsize - m->wpos); p9_debug(P9_DEBUG_TRANS, "mux %p sent %d bytes\n", m, err); - if (err == -EAGAIN) { - clear_bit(Wworksched, &m->wsched); - return; - } + if (err == -EAGAIN) + goto end_clear; + if (err < 0) goto error; @@ -492,19 +495,21 @@ static void p9_write_work(struct work_struct *work) if (m->wpos == m->wsize) m->wpos = m->wsize = 0; - if (m->wsize == 0 && !list_empty(&m->unsent_req_list)) { +end_clear: + clear_bit(Wworksched, &m->wsched); + + if (m->wsize || !list_empty(&m->unsent_req_list)) { if (test_and_clear_bit(Wpending, &m->wsched)) n = POLLOUT; else n = p9_fd_poll(m->client, NULL); - if (n & POLLOUT) { + if ((n & POLLOUT) && + !test_and_set_bit(Wworksched, &m->wsched)) { p9_debug(P9_DEBUG_TRANS, "sched write work %p\n", m); schedule_work(&m->wq); - } else - clear_bit(Wworksched, &m->wsched); - } else - clear_bit(Wworksched, &m->wsched); + } + } return; @@ -513,7 +518,7 @@ error: clear_bit(Wworksched, &m->wsched); } -static int p9_pollwake(wait_queue_t *wait, unsigned mode, int sync, void *key) +static int p9_pollwake(wait_queue_t *wait, unsigned int mode, int sync, void *key) { struct p9_poll_wait *pwait = container_of(wait, struct p9_poll_wait, wait); @@ -564,21 +569,19 @@ p9_pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p) } /** - * p9_conn_create - allocate and initialize the per-session mux data + * p9_conn_create - initialize the per-session mux data * @client: client instance * * Note: Creates the polling task if this is the first session. */ -static struct p9_conn *p9_conn_create(struct p9_client *client) +static void p9_conn_create(struct p9_client *client) { int n; - struct p9_conn *m; + struct p9_trans_fd *ts = client->trans; + struct p9_conn *m = &ts->conn; p9_debug(P9_DEBUG_TRANS, "client %p msize %d\n", client, client->msize); - m = kzalloc(sizeof(struct p9_conn), GFP_KERNEL); - if (!m) - return ERR_PTR(-ENOMEM); INIT_LIST_HEAD(&m->mux_list); m->client = client; @@ -600,8 +603,6 @@ static struct p9_conn *p9_conn_create(struct p9_client *client) p9_debug(P9_DEBUG_TRANS, "mux %p can write\n", m); set_bit(Wpending, &m->wsched); } - - return m; } /** @@ -660,7 +661,7 @@ static int p9_fd_request(struct p9_client *client, struct p9_req_t *req) { int n; struct p9_trans_fd *ts = client->trans; - struct p9_conn *m = ts->conn; + struct p9_conn *m = &ts->conn; p9_debug(P9_DEBUG_TRANS, "mux %p task %p tcall %p id %d\n", m, current, req->tc, req->tc->id); @@ -695,14 +696,26 @@ static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req) list_del(&req->req_list); req->status = REQ_STATUS_FLSHD; ret = 0; - } else if (req->status == REQ_STATUS_SENT) - req->status = REQ_STATUS_FLSH; - + } spin_unlock(&client->lock); return ret; } +static int p9_fd_cancelled(struct p9_client *client, struct p9_req_t *req) +{ + p9_debug(P9_DEBUG_TRANS, "client %p req %p\n", client, req); + + /* we haven't received a response for oldreq, + * remove it from the list. + */ + spin_lock(&client->lock); + list_del(&req->req_list); + spin_unlock(&client->lock); + + return 0; +} + /** * parse_opts - parse mount options into p9_fd_opts structure * @params: options string passed from mount @@ -739,7 +752,7 @@ static int parse_opts(char *params, struct p9_fd_opts *opts) if (!*p) continue; token = match_token(p, tokens, args); - if (token != Opt_err) { + if ((token != Opt_err) && (token != Opt_privport)) { r = match_int(&args[0], &option); if (r < 0) { p9_debug(P9_DEBUG_ERROR, @@ -757,6 +770,9 @@ static int parse_opts(char *params, struct p9_fd_opts *opts) case Opt_wfdno: opts->wfd = option; break; + case Opt_privport: + opts->privport = 1; + break; default: continue; } @@ -768,7 +784,7 @@ static int parse_opts(char *params, struct p9_fd_opts *opts) static int p9_fd_open(struct p9_client *client, int rfd, int wfd) { - struct p9_trans_fd *ts = kmalloc(sizeof(struct p9_trans_fd), + struct p9_trans_fd *ts = kzalloc(sizeof(struct p9_trans_fd), GFP_KERNEL); if (!ts) return -ENOMEM; @@ -793,46 +809,35 @@ static int p9_fd_open(struct p9_client *client, int rfd, int wfd) static int p9_socket_open(struct p9_client *client, struct socket *csocket) { struct p9_trans_fd *p; - int ret, fd; + struct file *file; - p = kmalloc(sizeof(struct p9_trans_fd), GFP_KERNEL); + p = kzalloc(sizeof(struct p9_trans_fd), GFP_KERNEL); if (!p) return -ENOMEM; csocket->sk->sk_allocation = GFP_NOIO; - fd = sock_map_fd(csocket, 0); - if (fd < 0) { + file = sock_alloc_file(csocket, 0, NULL); + if (IS_ERR(file)) { pr_err("%s (%d): failed to map fd\n", __func__, task_pid_nr(current)); sock_release(csocket); kfree(p); - return fd; + return PTR_ERR(file); } - get_file(csocket->file); - get_file(csocket->file); - p->wr = p->rd = csocket->file; + get_file(file); + p->wr = p->rd = file; client->trans = p; client->status = Connected; - sys_close(fd); /* still racy */ - p->rd->f_flags |= O_NONBLOCK; - p->conn = p9_conn_create(client); - if (IS_ERR(p->conn)) { - ret = PTR_ERR(p->conn); - p->conn = NULL; - kfree(p); - sockfd_put(csocket); - sockfd_put(csocket); - return ret; - } + p9_conn_create(client); return 0; } /** - * p9_mux_destroy - cancels all pending requests and frees mux resources + * p9_mux_destroy - cancels all pending requests of mux * @m: mux to destroy * */ @@ -849,7 +854,6 @@ static void p9_conn_destroy(struct p9_conn *m) p9_conn_cancel(m, -ECONNRESET); m->client = NULL; - kfree(m); } /** @@ -871,7 +875,7 @@ static void p9_fd_close(struct p9_client *client) client->status = Disconnected; - p9_conn_destroy(ts->conn); + p9_conn_destroy(&ts->conn); if (ts->rd) fput(ts->rd); @@ -898,6 +902,24 @@ static inline int valid_ipaddr4(const char *buf) return 0; } +static int p9_bind_privport(struct socket *sock) +{ + struct sockaddr_in cl; + int port, err = -EINVAL; + + memset(&cl, 0, sizeof(cl)); + cl.sin_family = AF_INET; + cl.sin_addr.s_addr = INADDR_ANY; + for (port = p9_ipport_resv_max; port >= p9_ipport_resv_min; port--) { + cl.sin_port = htons((ushort)port); + err = kernel_bind(sock, (struct sockaddr *)&cl, sizeof(cl)); + if (err != -EADDRINUSE) + break; + } + return err; +} + + static int p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args) { @@ -926,6 +948,16 @@ p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args) return err; } + if (opts.privport) { + err = p9_bind_privport(csocket); + if (err < 0) { + pr_err("%s (%d): problem binding to privport\n", + __func__, task_pid_nr(current)); + sock_release(csocket); + return err; + } + } + err = csocket->ops->connect(csocket, (struct sockaddr *)&sin_server, sizeof(struct sockaddr_in), 0); @@ -995,14 +1027,7 @@ p9_fd_create(struct p9_client *client, const char *addr, char *args) return err; p = (struct p9_trans_fd *) client->trans; - p->conn = p9_conn_create(client); - if (IS_ERR(p->conn)) { - err = PTR_ERR(p->conn); - p->conn = NULL; - fput(p->rd); - fput(p->wr); - return err; - } + p9_conn_create(client); return 0; } @@ -1010,11 +1035,12 @@ p9_fd_create(struct p9_client *client, const char *addr, char *args) static struct p9_trans_module p9_tcp_trans = { .name = "tcp", .maxsize = MAX_SOCK_BUF, - .def = 1, + .def = 0, .create = p9_fd_create_tcp, .close = p9_fd_close, .request = p9_fd_request, .cancel = p9_fd_cancel, + .cancelled = p9_fd_cancelled, .owner = THIS_MODULE, }; @@ -1026,6 +1052,7 @@ static struct p9_trans_module p9_unix_trans = { .close = p9_fd_close, .request = p9_fd_request, .cancel = p9_fd_cancel, + .cancelled = p9_fd_cancelled, .owner = THIS_MODULE, }; @@ -1037,6 +1064,7 @@ static struct p9_trans_module p9_fd_trans = { .close = p9_fd_close, .request = p9_fd_request, .cancel = p9_fd_cancel, + .cancelled = p9_fd_cancelled, .owner = THIS_MODULE, }; @@ -1083,7 +1111,7 @@ int p9_trans_fd_init(void) void p9_trans_fd_exit(void) { - flush_work_sync(&p9_poll_work); + flush_work(&p9_poll_work); v9fs_unregister_trans(&p9_tcp_trans); v9fs_unregister_trans(&p9_unix_trans); v9fs_unregister_trans(&p9_fd_trans); diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index 2c69ddd691a..14ad43b5cf8 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -57,9 +57,7 @@ #define P9_RDMA_IRD 0 #define P9_RDMA_ORD 0 #define P9_RDMA_TIMEOUT 30000 /* 30 seconds */ -#define P9_RDMA_MAXSIZE (4*4096) /* Min SGE is 4, so we can - * safely advertise a maxsize - * of 64k */ +#define P9_RDMA_MAXSIZE (1024*1024) /* 1MB */ /** * struct p9_trans_rdma - RDMA transport instance @@ -75,7 +73,9 @@ * @sq_depth: The depth of the Send Queue * @sq_sem: Semaphore for the SQ * @rq_depth: The depth of the Receive Queue. - * @rq_count: Count of requests in the Receive Queue. + * @rq_sem: Semaphore for the RQ + * @excess_rc : Amount of posted Receive Contexts without a pending request. + * See rdma_request() * @addr: The remote peer's address * @req_lock: Protects the active request list * @cm_done: Completion event for connection management tracking @@ -100,7 +100,8 @@ struct p9_trans_rdma { int sq_depth; struct semaphore sq_sem; int rq_depth; - atomic_t rq_count; + struct semaphore rq_sem; + atomic_t excess_rc; struct sockaddr_in addr; spinlock_t req_lock; @@ -192,6 +193,8 @@ static int parse_opts(char *params, struct p9_rdma_opts *opts) if (!*p) continue; token = match_token(p, tokens, args); + if (token == Opt_err) + continue; r = match_int(&args[0], &option); if (r < 0) { p9_debug(P9_DEBUG_ERROR, @@ -296,9 +299,15 @@ handle_recv(struct p9_client *client, struct p9_trans_rdma *rdma, if (!req) goto err_out; + /* Check that we have not yet received a reply for this request. + */ + if (unlikely(req->rc)) { + pr_err("Duplicate reply for request %d", tag); + goto err_out; + } + req->rc = c->rc; - req->status = REQ_STATUS_RCVD; - p9_client_cb(client, req); + p9_client_cb(client, req, REQ_STATUS_RCVD); return; @@ -336,8 +345,8 @@ static void cq_comp_handler(struct ib_cq *cq, void *cq_context) switch (c->wc_op) { case IB_WC_RECV: - atomic_dec(&rdma->rq_count); handle_recv(client, rdma, c, wc.status, wc.byte_len); + up(&rdma->rq_sem); break; case IB_WC_SEND: @@ -421,32 +430,33 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req) struct p9_rdma_context *c = NULL; struct p9_rdma_context *rpl_context = NULL; + /* When an error occurs between posting the recv and the send, + * there will be a receive context posted without a pending request. + * Since there is no way to "un-post" it, we remember it and skip + * post_recv() for the next request. + * So here, + * see if we are this `next request' and need to absorb an excess rc. + * If yes, then drop and free our own, and do not recv_post(). + **/ + if (unlikely(atomic_read(&rdma->excess_rc) > 0)) { + if ((atomic_sub_return(1, &rdma->excess_rc) >= 0)) { + /* Got one ! */ + kfree(req->rc); + req->rc = NULL; + goto dont_need_post_recv; + } else { + /* We raced and lost. */ + atomic_inc(&rdma->excess_rc); + } + } + /* Allocate an fcall for the reply */ rpl_context = kmalloc(sizeof *rpl_context, GFP_NOFS); if (!rpl_context) { err = -ENOMEM; - goto err_close; - } - - /* - * If the request has a buffer, steal it, otherwise - * allocate a new one. Typically, requests should already - * have receive buffers allocated and just swap them around - */ - if (!req->rc) { - req->rc = kmalloc(sizeof(struct p9_fcall)+client->msize, - GFP_NOFS); - if (req->rc) { - req->rc->sdata = (char *) req->rc + - sizeof(struct p9_fcall); - req->rc->capacity = client->msize; - } + goto recv_error; } rpl_context->rc = req->rc; - if (!rpl_context->rc) { - err = -ENOMEM; - goto err_free2; - } /* * Post a receive buffer for this request. We need to ensure @@ -455,29 +465,35 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req) * outstanding request, so we must keep a count to avoid * overflowing the RQ. */ - if (atomic_inc_return(&rdma->rq_count) <= rdma->rq_depth) { - err = post_recv(client, rpl_context); - if (err) - goto err_free1; - } else - atomic_dec(&rdma->rq_count); + if (down_interruptible(&rdma->rq_sem)) { + err = -EINTR; + goto recv_error; + } + err = post_recv(client, rpl_context); + if (err) { + p9_debug(P9_DEBUG_FCALL, "POST RECV failed\n"); + goto recv_error; + } /* remove posted receive buffer from request structure */ req->rc = NULL; +dont_need_post_recv: /* Post the request */ c = kmalloc(sizeof *c, GFP_NOFS); if (!c) { err = -ENOMEM; - goto err_free1; + goto send_error; } c->req = req; c->busa = ib_dma_map_single(rdma->cm_id->device, c->req->tc->sdata, c->req->tc->size, DMA_TO_DEVICE); - if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) - goto error; + if (ib_dma_mapping_error(rdma->cm_id->device, c->busa)) { + err = -EIO; + goto send_error; + } sge.addr = c->busa; sge.length = c->req->tc->size; @@ -491,22 +507,38 @@ static int rdma_request(struct p9_client *client, struct p9_req_t *req) wr.sg_list = &sge; wr.num_sge = 1; - if (down_interruptible(&rdma->sq_sem)) - goto error; + if (down_interruptible(&rdma->sq_sem)) { + err = -EINTR; + goto send_error; + } - return ib_post_send(rdma->qp, &wr, &bad_wr); + /* Mark request as `sent' *before* we actually send it, + * because doing if after could erase the REQ_STATUS_RCVD + * status in case of a very fast reply. + */ + req->status = REQ_STATUS_SENT; + err = ib_post_send(rdma->qp, &wr, &bad_wr); + if (err) + goto send_error; - error: + /* Success */ + return 0; + + /* Handle errors that happened during or while preparing the send: */ + send_error: + req->status = REQ_STATUS_ERROR; kfree(c); - kfree(rpl_context->rc); - kfree(rpl_context); - p9_debug(P9_DEBUG_ERROR, "EIO\n"); - return -EIO; - err_free1: - kfree(rpl_context->rc); - err_free2: + p9_debug(P9_DEBUG_ERROR, "Error %d in rdma_request()\n", err); + + /* Ach. + * We did recv_post(), but not send. We have one recv_post in excess. + */ + atomic_inc(&rdma->excess_rc); + return err; + + /* Handle errors that happened during or while preparing post_recv(): */ + recv_error: kfree(rpl_context); - err_close: spin_lock_irqsave(&rdma->req_lock, flags); if (rdma->state < P9_RDMA_CLOSING) { rdma->state = P9_RDMA_CLOSING; @@ -551,17 +583,30 @@ static struct p9_trans_rdma *alloc_rdma(struct p9_rdma_opts *opts) spin_lock_init(&rdma->req_lock); init_completion(&rdma->cm_done); sema_init(&rdma->sq_sem, rdma->sq_depth); - atomic_set(&rdma->rq_count, 0); + sema_init(&rdma->rq_sem, rdma->rq_depth); + atomic_set(&rdma->excess_rc, 0); return rdma; } -/* its not clear to me we can do anything after send has been posted */ static int rdma_cancel(struct p9_client *client, struct p9_req_t *req) { + /* Nothing to do here. + * We will take care of it (if we have to) in rdma_cancelled() + */ return 1; } +/* A request has been fully flushed without a reply. + * That means we have posted one buffer in excess. + */ +static int rdma_cancelled(struct p9_client *client, struct p9_req_t *req) +{ + struct p9_trans_rdma *rdma = client->trans; + atomic_inc(&rdma->excess_rc); + return 0; +} + /** * trans_create_rdma - Transport method for creating atransport instance * @client: client instance @@ -695,6 +740,7 @@ static struct p9_trans_module p9_rdma_trans = { .close = rdma_close, .request = rdma_request, .cancel = rdma_cancel, + .cancelled = rdma_cancelled, }; /** diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index 3d432068f62..6940d8fe897 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -39,6 +39,7 @@ #include <linux/inet.h> #include <linux/idr.h> #include <linux/file.h> +#include <linux/highmem.h> #include <linux/slab.h> #include <net/9p/9p.h> #include <linux/parser.h> @@ -86,7 +87,7 @@ struct virtio_chan { /* This is global limit. Since we don't have a global structure, * will be placing it in each channel. */ - int p9_max_pages; + unsigned long p9_max_pages; /* Scatterlist: can be too big for stack. */ struct scatterlist sg[VIRTQUEUE_NUM]; @@ -163,8 +164,7 @@ static void req_done(struct virtqueue *vq) p9_debug(P9_DEBUG_TRANS, ": rc %p\n", rc); p9_debug(P9_DEBUG_TRANS, ": lookup tag %d\n", rc->tag); req = p9_tag_lookup(chan->client, rc->tag); - req->status = REQ_STATUS_RCVD; - p9_client_cb(chan->client, req); + p9_client_cb(chan->client, req, REQ_STATUS_RCVD); } } @@ -192,12 +192,15 @@ static int pack_sg_list(struct scatterlist *sg, int start, s = rest_of_page(data); if (s > count) s = count; + BUG_ON(index > limit); + /* Make sure we don't terminate early. */ + sg_unmark_end(&sg[index]); sg_set_buf(&sg[index++], data, s); count -= s; data += s; - BUG_ON(index > limit); } - + if (index-start) + sg_mark_end(&sg[index - 1]); return index-start; } @@ -212,7 +215,7 @@ static int p9_virtio_cancel(struct p9_client *client, struct p9_req_t *req) * this takes a list of pages. * @sg: scatter/gather list to pack into * @start: which segment of the sg_list to start at - * @**pdata: a list of pages to add into sg. + * @pdata: a list of pages to add into sg. * @nr_pages: number of pages to pack into the scatter/gather list * @data: data to pack into scatter/gather list * @count: amount of data to pack into the scatter/gather list @@ -235,12 +238,17 @@ pack_sg_list_p(struct scatterlist *sg, int start, int limit, s = rest_of_page(data); if (s > count) s = count; + /* Make sure we don't terminate early. */ + sg_unmark_end(&sg[index]); sg_set_page(&sg[index++], pdata[i++], s, data_off); data_off = 0; data += s; count -= s; nr_pages--; } + + if (index-start) + sg_mark_end(&sg[index - 1]); return index - start; } @@ -255,9 +263,10 @@ static int p9_virtio_request(struct p9_client *client, struct p9_req_t *req) { int err; - int in, out; + int in, out, out_sgs, in_sgs; unsigned long flags; struct virtio_chan *chan = client->trans; + struct scatterlist *sgs[2]; p9_debug(P9_DEBUG_TRANS, "9p debug: virtio request\n"); @@ -265,14 +274,19 @@ p9_virtio_request(struct p9_client *client, struct p9_req_t *req) req_retry: spin_lock_irqsave(&chan->lock, flags); + out_sgs = in_sgs = 0; /* Handle out VirtIO ring buffers */ out = pack_sg_list(chan->sg, 0, VIRTQUEUE_NUM, req->tc->sdata, req->tc->size); + if (out) + sgs[out_sgs++] = chan->sg; in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, req->rc->sdata, req->rc->capacity); + if (in) + sgs[out_sgs + in_sgs++] = chan->sg + out; - err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc, + err = virtqueue_add_sgs(chan->vq, sgs, out_sgs, in_sgs, req->tc, GFP_ATOMIC); if (err < 0) { if (err == -ENOSPC) { @@ -288,7 +302,7 @@ req_retry: } else { spin_unlock_irqrestore(&chan->lock, flags); p9_debug(P9_DEBUG_TRANS, - "virtio rpc add_buf returned failure\n"); + "virtio rpc add_sgs returned failure\n"); return -EIO; } } @@ -325,7 +339,10 @@ static int p9_get_mapped_pages(struct virtio_chan *chan, int count = nr_pages; while (nr_pages) { s = rest_of_page(data); - pages[index++] = virt_to_page(data); + if (is_vmalloc_addr(data)) + pages[index++] = vmalloc_to_page(data); + else + pages[index++] = kmap_to_page(data); data += s; nr_pages--; } @@ -350,11 +367,12 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req, char *uidata, char *uodata, int inlen, int outlen, int in_hdr_len, int kern_buf) { - int in, out, err; + int in, out, err, out_sgs, in_sgs; unsigned long flags; int in_nr_pages = 0, out_nr_pages = 0; struct page **in_pages = NULL, **out_pages = NULL; struct virtio_chan *chan = client->trans; + struct scatterlist *sgs[4]; p9_debug(P9_DEBUG_TRANS, "virtio request\n"); @@ -395,13 +413,22 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req, req->status = REQ_STATUS_SENT; req_retry_pinned: spin_lock_irqsave(&chan->lock, flags); + + out_sgs = in_sgs = 0; + /* out data */ out = pack_sg_list(chan->sg, 0, VIRTQUEUE_NUM, req->tc->sdata, req->tc->size); - if (out_pages) + if (out) + sgs[out_sgs++] = chan->sg; + + if (out_pages) { + sgs[out_sgs++] = chan->sg + out; out += pack_sg_list_p(chan->sg, out, VIRTQUEUE_NUM, out_pages, out_nr_pages, uodata, outlen); + } + /* * Take care of in data * For example TREAD have 11. @@ -411,11 +438,17 @@ req_retry_pinned: */ in = pack_sg_list(chan->sg, out, VIRTQUEUE_NUM, req->rc->sdata, in_hdr_len); - if (in_pages) + if (in) + sgs[out_sgs + in_sgs++] = chan->sg + out; + + if (in_pages) { + sgs[out_sgs + in_sgs++] = chan->sg + out + in; in += pack_sg_list_p(chan->sg, out + in, VIRTQUEUE_NUM, in_pages, in_nr_pages, uidata, inlen); + } - err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc, + BUG_ON(out_sgs + in_sgs > ARRAY_SIZE(sgs)); + err = virtqueue_add_sgs(chan->vq, sgs, out_sgs, in_sgs, req->tc, GFP_ATOMIC); if (err < 0) { if (err == -ENOSPC) { @@ -431,7 +464,7 @@ req_retry_pinned: } else { spin_unlock_irqrestore(&chan->lock, flags); p9_debug(P9_DEBUG_TRANS, - "virtio rpc add_buf returned failure\n"); + "virtio rpc add_sgs returned failure\n"); err = -EIO; goto err_out; } @@ -513,9 +546,7 @@ static int p9_virtio_probe(struct virtio_device *vdev) chan->inuse = false; if (virtio_has_feature(vdev, VIRTIO_9P_MOUNT_TAG)) { - vdev->config->get(vdev, - offsetof(struct virtio_9p_config, tag_len), - &tag_len, sizeof(tag_len)); + virtio_cread(vdev, struct virtio_9p_config, tag_len, &tag_len); } else { err = -EINVAL; goto out_free_vq; @@ -525,8 +556,9 @@ static int p9_virtio_probe(struct virtio_device *vdev) err = -ENOMEM; goto out_free_vq; } - vdev->config->get(vdev, offsetof(struct virtio_9p_config, tag), - tag, tag_len); + + virtio_cread_bytes(vdev, offsetof(struct virtio_9p_config, tag), + tag, tag_len); chan->tag = tag; chan->tag_len = tag_len; err = sysfs_create_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr); @@ -546,6 +578,10 @@ static int p9_virtio_probe(struct virtio_device *vdev) mutex_lock(&virtio_9p_lock); list_add_tail(&chan->chan_list, &virtio_chan_list); mutex_unlock(&virtio_9p_lock); + + /* Let udev rules use the new mount_tag attribute. */ + kobject_uevent(&(vdev->dev.kobj), KOBJ_CHANGE); + return 0; out_free_tag: @@ -615,13 +651,15 @@ static void p9_virtio_remove(struct virtio_device *vdev) { struct virtio_chan *chan = vdev->priv; - BUG_ON(chan->inuse); + if (chan->inuse) + p9_virtio_close(chan->client); vdev->config->del_vqs(vdev); mutex_lock(&virtio_9p_lock); list_del(&chan->chan_list); mutex_unlock(&virtio_9p_lock); sysfs_remove_file(&(vdev->dev.kobj), &dev_attr_mount_tag.attr); + kobject_uevent(&(vdev->dev.kobj), KOBJ_CHANGE); kfree(chan->tag); kfree(chan->vc_wq); kfree(chan); @@ -662,7 +700,7 @@ static struct p9_trans_module p9_virtio_trans = { * page in zero copy. */ .maxsize = PAGE_SIZE * (VIRTQUEUE_NUM - 3), - .def = 0, + .def = 1, .owner = THIS_MODULE, }; diff --git a/net/9p/util.c b/net/9p/util.c index 6ceeeb384de..59f278e64f5 100644 --- a/net/9p/util.c +++ b/net/9p/util.c @@ -87,23 +87,18 @@ EXPORT_SYMBOL(p9_idpool_destroy); int p9_idpool_get(struct p9_idpool *p) { - int i = 0; - int error; + int i; unsigned long flags; -retry: - if (idr_pre_get(&p->pool, GFP_NOFS) == 0) - return -1; - + idr_preload(GFP_NOFS); spin_lock_irqsave(&p->lock, flags); /* no need to store exactly p, we just need something non-null */ - error = idr_get_new(&p->pool, p, &i); - spin_unlock_irqrestore(&p->lock, flags); + i = idr_alloc(&p->pool, p, 0, 0, GFP_NOWAIT); - if (error == -EAGAIN) - goto retry; - else if (error) + spin_unlock_irqrestore(&p->lock, flags); + idr_preload_end(); + if (i < 0) return -1; p9_debug(P9_DEBUG_MUX, " id %d pool %p\n", i, p); |
