aboutsummaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan_dev.c2
-rw-r--r--net/9p/Kconfig8
-rw-r--r--net/9p/client.c30
-rw-r--r--net/9p/mod.c4
-rw-r--r--net/9p/trans_fd.c7
-rw-r--r--net/9p/trans_rdma.c3
-rw-r--r--net/9p/util.c2
-rw-r--r--net/atm/lec.c2
-rw-r--r--net/atm/mpc.c2
-rw-r--r--net/caif/chnl_net.c9
-rw-r--r--net/ceph/messenger.c82
-rw-r--r--net/ceph/osd_client.c19
-rw-r--r--net/ceph/osdmap.c13
-rw-r--r--net/core/dev.c7
-rw-r--r--net/core/net_namespace.c63
-rw-r--r--net/core/rtnetlink.c5
-rw-r--r--net/dns_resolver/dns_key.c10
-rw-r--r--net/ipv4/af_inet.c3
-rw-r--r--net/ipv4/inet_connection_sock.c2
-rw-r--r--net/ipv4/ip_options.c15
-rw-r--r--net/iucv/iucv.c2
-rw-r--r--net/l2tp/l2tp_debugfs.c2
-rw-r--r--net/packet/af_packet.c15
-rw-r--r--net/rds/ib.c2
-rw-r--r--net/rds/ib_cm.c2
-rw-r--r--net/rds/iw.c2
-rw-r--r--net/rds/iw_cm.c2
-rw-r--r--net/rds/rdma_transport.c3
-rw-r--r--net/sctp/associola.c23
-rw-r--r--net/sctp/sm_sideeffect.c3
-rw-r--r--net/sctp/sm_statefuns.c14
-rw-r--r--net/sunrpc/auth.c4
-rw-r--r--net/sunrpc/clnt.c29
-rw-r--r--net/sunrpc/rpcb_clnt.c97
-rw-r--r--net/sunrpc/svc.c2
-rw-r--r--net/sunrpc/svcsock.c336
-rw-r--r--net/sunrpc/xdr.c19
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c3
-rw-r--r--net/sunrpc/xprtrdma/verbs.c2
-rw-r--r--net/sunrpc/xprtsock.c435
40 files changed, 995 insertions, 290 deletions
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index f247f5bff88..7ea5cf9ea08 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -165,7 +165,7 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb,
u64_stats_update_begin(&stats->syncp);
stats->tx_packets++;
stats->tx_bytes += len;
- u64_stats_update_begin(&stats->syncp);
+ u64_stats_update_end(&stats->syncp);
} else {
this_cpu_inc(vlan_dev_info(dev)->vlan_pcpu_stats->tx_dropped);
}
diff --git a/net/9p/Kconfig b/net/9p/Kconfig
index 7ed75c7bd5d..d9ea09b11cf 100644
--- a/net/9p/Kconfig
+++ b/net/9p/Kconfig
@@ -3,8 +3,8 @@
#
menuconfig NET_9P
- depends on NET && EXPERIMENTAL
- tristate "Plan 9 Resource Sharing Support (9P2000) (Experimental)"
+ depends on NET
+ tristate "Plan 9 Resource Sharing Support (9P2000)"
help
If you say Y here, you will get experimental support for
Plan 9 resource sharing via the 9P2000 protocol.
@@ -16,8 +16,8 @@ menuconfig NET_9P
if NET_9P
config NET_9P_VIRTIO
- depends on EXPERIMENTAL && VIRTIO
- tristate "9P Virtio Transport (Experimental)"
+ depends on VIRTIO
+ tristate "9P Virtio Transport"
help
This builds support for a transports between
guest partitions and a host partition.
diff --git a/net/9p/client.c b/net/9p/client.c
index ceab943dfc4..9e3b0e640da 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -92,9 +92,6 @@ static int get_protocol_version(const substring_t *name)
return version;
}
-static struct p9_req_t *
-p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...);
-
/**
* parse_options - parse mount options into client structure
* @opts: options string passed from mount
@@ -307,12 +304,13 @@ static int p9_tag_init(struct p9_client *c)
c->tagpool = p9_idpool_create();
if (IS_ERR(c->tagpool)) {
err = PTR_ERR(c->tagpool);
- c->tagpool = NULL;
goto error;
}
-
- p9_idpool_get(c->tagpool); /* reserve tag 0 */
-
+ err = p9_idpool_get(c->tagpool); /* reserve tag 0 */
+ if (err < 0) {
+ p9_idpool_destroy(c->tagpool);
+ goto error;
+ }
c->max_tag = 0;
error:
return err;
@@ -518,12 +516,15 @@ out_err:
return err;
}
+static struct p9_req_t *
+p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...);
+
/**
* p9_client_flush - flush (cancel) a request
* @c: client state
* @oldreq: request to cancel
*
- * This sents a flush for a particular requests and links
+ * This sents a flush for a particular request and links
* the flush request to the original request. The current
* code only supports a single flush request although the protocol
* allows for multiple flush requests to be sent for a single request.
@@ -789,11 +790,13 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
spin_lock_init(&clnt->lock);
INIT_LIST_HEAD(&clnt->fidlist);
- p9_tag_init(clnt);
+ err = p9_tag_init(clnt);
+ if (err < 0)
+ goto free_client;
err = parse_opts(options, clnt);
if (err < 0)
- goto free_client;
+ goto destroy_tagpool;
if (!clnt->trans_mod)
clnt->trans_mod = v9fs_get_default_trans();
@@ -802,13 +805,12 @@ struct p9_client *p9_client_create(const char *dev_name, char *options)
err = -EPROTONOSUPPORT;
P9_DPRINTK(P9_DEBUG_ERROR,
"No transport defined or default transport\n");
- goto free_client;
+ goto destroy_tagpool;
}
clnt->fidpool = p9_idpool_create();
if (IS_ERR(clnt->fidpool)) {
err = PTR_ERR(clnt->fidpool);
- clnt->fidpool = NULL;
goto put_trans;
}
@@ -834,6 +836,8 @@ destroy_fidpool:
p9_idpool_destroy(clnt->fidpool);
put_trans:
v9fs_put_trans(clnt->trans_mod);
+destroy_tagpool:
+ p9_idpool_destroy(clnt->tagpool);
free_client:
kfree(clnt);
return ERR_PTR(err);
@@ -1298,7 +1302,7 @@ p9_client_read(struct p9_fid *fid, char *data, char __user *udata, u64 offset,
if (count < rsize)
rsize = count;
- /* Don't bother zerocopy form small IO (< 1024) */
+ /* Don't bother zerocopy for small IO (< 1024) */
if (((clnt->trans_mod->pref & P9_TRANS_PREF_PAYLOAD_MASK) ==
P9_TRANS_PREF_PAYLOAD_SEP) && (rsize > 1024)) {
req = p9_client_rpc(clnt, P9_TREAD, "dqE", fid->fid, offset,
diff --git a/net/9p/mod.c b/net/9p/mod.c
index cf8a4128cd5..72c39827505 100644
--- a/net/9p/mod.c
+++ b/net/9p/mod.c
@@ -139,7 +139,7 @@ void v9fs_put_trans(struct p9_trans_module *m)
}
/**
- * v9fs_init - Initialize module
+ * init_p9 - Initialize module
*
*/
static int __init init_p9(void)
@@ -154,7 +154,7 @@ static int __init init_p9(void)
}
/**
- * v9fs_init - shutdown module
+ * exit_p9 - shutdown module
*
*/
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 4a9084395d3..fdfdb5747f6 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -916,8 +916,8 @@ p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args)
sin_server.sin_family = AF_INET;
sin_server.sin_addr.s_addr = in_aton(addr);
sin_server.sin_port = htons(opts.port);
- err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &csocket);
-
+ err = __sock_create(read_pnet(&current->nsproxy->net_ns), PF_INET,
+ SOCK_STREAM, IPPROTO_TCP, &csocket, 1);
if (err) {
P9_EPRINTK(KERN_ERR, "p9_trans_tcp: problem creating socket\n");
return err;
@@ -954,7 +954,8 @@ p9_fd_create_unix(struct p9_client *client, const char *addr, char *args)
sun_server.sun_family = PF_UNIX;
strcpy(sun_server.sun_path, addr);
- err = sock_create_kern(PF_UNIX, SOCK_STREAM, 0, &csocket);
+ err = __sock_create(read_pnet(&current->nsproxy->net_ns), PF_UNIX,
+ SOCK_STREAM, 0, &csocket, 1);
if (err < 0) {
P9_EPRINTK(KERN_ERR, "p9_trans_unix: problem creating socket\n");
return err;
diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c
index 844a7a5607e..159c50f1c6b 100644
--- a/net/9p/trans_rdma.c
+++ b/net/9p/trans_rdma.c
@@ -589,7 +589,8 @@ rdma_create_trans(struct p9_client *client, const char *addr, char *args)
return -ENOMEM;
/* Create the RDMA CM ID */
- rdma->cm_id = rdma_create_id(p9_cm_event_handler, client, RDMA_PS_TCP);
+ rdma->cm_id = rdma_create_id(p9_cm_event_handler, client, RDMA_PS_TCP,
+ IB_QPT_RC);
if (IS_ERR(rdma->cm_id))
goto error;
diff --git a/net/9p/util.c b/net/9p/util.c
index da6af81e59d..9c1c9348ac3 100644
--- a/net/9p/util.c
+++ b/net/9p/util.c
@@ -93,7 +93,7 @@ int p9_idpool_get(struct p9_idpool *p)
retry:
if (idr_pre_get(&p->pool, GFP_NOFS) == 0)
- return 0;
+ return -1;
spin_lock_irqsave(&p->lock, flags);
diff --git a/net/atm/lec.c b/net/atm/lec.c
index 25073b6ef47..ba48daa68c1 100644
--- a/net/atm/lec.c
+++ b/net/atm/lec.c
@@ -1171,7 +1171,7 @@ static int __init lane_module_init(void)
#endif
register_atm_ioctl(&lane_ioctl_ops);
- pr_info("lec.c: " __DATE__ " " __TIME__ " initialized\n");
+ pr_info("lec.c: initialized\n");
return 0;
}
diff --git a/net/atm/mpc.c b/net/atm/mpc.c
index 644cdf07164..3ccca42e6f9 100644
--- a/net/atm/mpc.c
+++ b/net/atm/mpc.c
@@ -1482,7 +1482,7 @@ static __init int atm_mpoa_init(void)
if (mpc_proc_init() != 0)
pr_info("failed to initialize /proc/mpoa\n");
- pr_info("mpc.c: " __DATE__ " " __TIME__ " initialized\n");
+ pr_info("mpc.c: initialized\n");
return 0;
}
diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c
index 649ebacaf6b..adbb424403d 100644
--- a/net/caif/chnl_net.c
+++ b/net/caif/chnl_net.c
@@ -139,17 +139,14 @@ static void close_work(struct work_struct *work)
struct chnl_net *dev = NULL;
struct list_head *list_node;
struct list_head *_tmp;
- /* May be called with or without RTNL lock held */
- int islocked = rtnl_is_locked();
- if (!islocked)
- rtnl_lock();
+
+ rtnl_lock();
list_for_each_safe(list_node, _tmp, &chnl_net_list) {
dev = list_entry(list_node, struct chnl_net, list_field);
if (dev->state == CAIF_SHUTDOWN)
dev_close(dev->netdev);
}
- if (!islocked)
- rtnl_unlock();
+ rtnl_unlock();
}
static DECLARE_WORK(close_worker, close_work);
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index e15a82ccc05..78b55f49de7 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -76,7 +76,8 @@ const char *ceph_pr_addr(const struct sockaddr_storage *ss)
break;
default:
- sprintf(s, "(unknown sockaddr family %d)", (int)ss->ss_family);
+ snprintf(s, MAX_ADDR_STR_LEN, "(unknown sockaddr family %d)",
+ (int)ss->ss_family);
}
return s;
@@ -598,7 +599,7 @@ static void prepare_write_keepalive(struct ceph_connection *con)
* Connection negotiation.
*/
-static void prepare_connect_authorizer(struct ceph_connection *con)
+static int prepare_connect_authorizer(struct ceph_connection *con)
{
void *auth_buf;
int auth_len = 0;
@@ -612,13 +613,20 @@ static void prepare_connect_authorizer(struct ceph_connection *con)
con->auth_retry);
mutex_lock(&con->mutex);
+ if (test_bit(CLOSED, &con->state) ||
+ test_bit(OPENING, &con->state))
+ return -EAGAIN;
+
con->out_connect.authorizer_protocol = cpu_to_le32(auth_protocol);
con->out_connect.authorizer_len = cpu_to_le32(auth_len);
- con->out_kvec[con->out_kvec_left].iov_base = auth_buf;
- con->out_kvec[con->out_kvec_left].iov_len = auth_len;
- con->out_kvec_left++;
- con->out_kvec_bytes += auth_len;
+ if (auth_len) {
+ con->out_kvec[con->out_kvec_left].iov_base = auth_buf;
+ con->out_kvec[con->out_kvec_left].iov_len = auth_len;
+ con->out_kvec_left++;
+ con->out_kvec_bytes += auth_len;
+ }
+ return 0;
}
/*
@@ -640,9 +648,9 @@ static void prepare_write_banner(struct ceph_messenger *msgr,
set_bit(WRITE_PENDING, &con->state);
}
-static void prepare_write_connect(struct ceph_messenger *msgr,
- struct ceph_connection *con,
- int after_banner)
+static int prepare_write_connect(struct ceph_messenger *msgr,
+ struct ceph_connection *con,
+ int after_banner)
{
unsigned global_seq = get_global_seq(con->msgr, 0);
int proto;
@@ -683,7 +691,7 @@ static void prepare_write_connect(struct ceph_messenger *msgr,
con->out_more = 0;
set_bit(WRITE_PENDING, &con->state);
- prepare_connect_authorizer(con);
+ return prepare_connect_authorizer(con);
}
@@ -1065,8 +1073,10 @@ static void addr_set_port(struct sockaddr_storage *ss, int p)
switch (ss->ss_family) {
case AF_INET:
((struct sockaddr_in *)ss)->sin_port = htons(p);
+ break;
case AF_INET6:
((struct sockaddr_in6 *)ss)->sin6_port = htons(p);
+ break;
}
}
@@ -1216,6 +1226,7 @@ static int process_connect(struct ceph_connection *con)
u64 sup_feat = con->msgr->supported_features;
u64 req_feat = con->msgr->required_features;
u64 server_feat = le64_to_cpu(con->in_reply.features);
+ int ret;
dout("process_connect on %p tag %d\n", con, (int)con->in_tag);
@@ -1250,7 +1261,9 @@ static int process_connect(struct ceph_connection *con)
return -1;
}
con->auth_retry = 1;
- prepare_write_connect(con->msgr, con, 0);
+ ret = prepare_write_connect(con->msgr, con, 0);
+ if (ret < 0)
+ return ret;
prepare_read_connect(con);
break;
@@ -1277,6 +1290,9 @@ static int process_connect(struct ceph_connection *con)
if (con->ops->peer_reset)
con->ops->peer_reset(con);
mutex_lock(&con->mutex);
+ if (test_bit(CLOSED, &con->state) ||
+ test_bit(OPENING, &con->state))
+ return -EAGAIN;
break;
case CEPH_MSGR_TAG_RETRY_SESSION:
@@ -1341,7 +1357,9 @@ static int process_connect(struct ceph_connection *con)
* to WAIT. This shouldn't happen if we are the
* client.
*/
- pr_err("process_connect peer connecting WAIT\n");
+ pr_err("process_connect got WAIT as client\n");
+ con->error_msg = "protocol error, got WAIT as client";
+ return -1;
default:
pr_err("connect protocol error, will retry\n");
@@ -1810,6 +1828,17 @@ static int try_read(struct ceph_connection *con)
more:
dout("try_read tag %d in_base_pos %d\n", (int)con->in_tag,
con->in_base_pos);
+
+ /*
+ * process_connect and process_message drop and re-take
+ * con->mutex. make sure we handle a racing close or reopen.
+ */
+ if (test_bit(CLOSED, &con->state) ||
+ test_bit(OPENING, &con->state)) {
+ ret = -EAGAIN;
+ goto out;
+ }
+
if (test_bit(CONNECTING, &con->state)) {
if (!test_bit(NEGOTIATING, &con->state)) {
dout("try_read connecting\n");
@@ -1938,8 +1967,10 @@ static void con_work(struct work_struct *work)
{
struct ceph_connection *con = container_of(work, struct ceph_connection,
work.work);
+ int ret;
mutex_lock(&con->mutex);
+restart:
if (test_and_clear_bit(BACKOFF, &con->state)) {
dout("con_work %p backing off\n", con);
if (queue_delayed_work(ceph_msgr_wq, &con->work,
@@ -1969,18 +2000,31 @@ static void con_work(struct work_struct *work)
con_close_socket(con);
}
- if (test_and_clear_bit(SOCK_CLOSED, &con->state) ||
- try_read(con) < 0 ||
- try_write(con) < 0) {
- mutex_unlock(&con->mutex);
- ceph_fault(con); /* error/fault path */
- goto done_unlocked;
- }
+ if (test_and_clear_bit(SOCK_CLOSED, &con->state))
+ goto fault;
+
+ ret = try_read(con);
+ if (ret == -EAGAIN)
+ goto restart;
+ if (ret < 0)
+ goto fault;
+
+ ret = try_write(con);
+ if (ret == -EAGAIN)
+ goto restart;
+ if (ret < 0)
+ goto fault;
done:
mutex_unlock(&con->mutex);
done_unlocked:
con->ops->put(con);
+ return;
+
+fault:
+ mutex_unlock(&con->mutex);
+ ceph_fault(con); /* error/fault path */
+ goto done_unlocked;
}
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 6b5dda1cb5d..6ea2b892f44 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -124,7 +124,7 @@ static void calc_layout(struct ceph_osd_client *osdc,
ceph_calc_raw_layout(osdc, layout, vino.snap, off,
plen, &bno, req, op);
- sprintf(req->r_oid, "%llx.%08llx", vino.ino, bno);
+ snprintf(req->r_oid, sizeof(req->r_oid), "%llx.%08llx", vino.ino, bno);
req->r_oid_len = strlen(req->r_oid);
}
@@ -1421,6 +1421,15 @@ void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg)
done:
downgrade_write(&osdc->map_sem);
ceph_monc_got_osdmap(&osdc->client->monc, osdc->osdmap->epoch);
+
+ /*
+ * subscribe to subsequent osdmap updates if full to ensure
+ * we find out when we are no longer full and stop returning
+ * ENOSPC.
+ */
+ if (ceph_osdmap_flag(osdc->osdmap, CEPH_OSDMAP_FULL))
+ ceph_monc_request_next_osdmap(&osdc->client->monc);
+
send_queued(osdc);
up_read(&osdc->map_sem);
wake_up_all(&osdc->client->auth_wq);
@@ -1677,8 +1686,14 @@ int ceph_osdc_start_request(struct ceph_osd_client *osdc,
*/
if (req->r_sent == 0) {
rc = __map_request(osdc, req);
- if (rc < 0)
+ if (rc < 0) {
+ if (nofail) {
+ dout("osdc_start_request failed map, "
+ " will retry %lld\n", req->r_tid);
+ rc = 0;
+ }
goto out_unlock;
+ }
if (req->r_osd == NULL) {
dout("send_request %p no up osds in pg\n", req);
ceph_monc_request_next_osdmap(&osdc->client->monc);
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 71603ac3dff..e97c3588c3e 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -765,7 +765,7 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
}
map->epoch++;
- map->modified = map->modified;
+ map->modified = modified;
if (newcrush) {
if (map->crush)
crush_destroy(map->crush);
@@ -830,15 +830,20 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
map->osd_addr[osd] = addr;
}
- /* new_down */
+ /* new_state */
ceph_decode_32_safe(p, end, len, bad);
while (len--) {
u32 osd;
+ u8 xorstate;
ceph_decode_32_safe(p, end, osd, bad);
+ xorstate = **(u8 **)p;
(*p)++; /* clean flag */
- pr_info("osd%d down\n", osd);
+ if (xorstate == 0)
+ xorstate = CEPH_OSD_UP;
+ if (xorstate & CEPH_OSD_UP)
+ pr_info("osd%d down\n", osd);
if (osd < map->max_osd)
- map->osd_state[osd] &= ~CEPH_OSD_UP;
+ map->osd_state[osd] ^= xorstate;
}
/* new_weight */
diff --git a/net/core/dev.c b/net/core/dev.c
index c7e305d13b7..939307891e7 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2096,6 +2096,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
{
const struct net_device_ops *ops = dev->netdev_ops;
int rc = NETDEV_TX_OK;
+ unsigned int skb_len;
if (likely(!skb->next)) {
u32 features;
@@ -2146,8 +2147,9 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
}
}
+ skb_len = skb->len;
rc = ops->ndo_start_xmit(skb, dev);
- trace_net_dev_xmit(skb, rc);
+ trace_net_dev_xmit(skb, rc, dev, skb_len);
if (rc == NETDEV_TX_OK)
txq_trans_update(txq);
return rc;
@@ -2167,8 +2169,9 @@ gso:
if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
skb_dst_drop(nskb);
+ skb_len = nskb->len;
rc = ops->ndo_start_xmit(nskb, dev);
- trace_net_dev_xmit(nskb, rc);
+ trace_net_dev_xmit(nskb, rc, dev, skb_len);
if (unlikely(rc != NETDEV_TX_OK)) {
if (rc & ~NETDEV_TX_MASK)
goto out_kfree_gso_skb;
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 2e2dce6583e..e41e5110c65 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -8,6 +8,8 @@
#include <linux/idr.h>
#include <linux/rculist.h>
#include <linux/nsproxy.h>
+#include <linux/proc_fs.h>
+#include <linux/file.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
@@ -302,6 +304,26 @@ void __put_net(struct net *net)
}
EXPORT_SYMBOL_GPL(__put_net);
+struct net *get_net_ns_by_fd(int fd)
+{
+ struct proc_inode *ei;
+ struct file *file;
+ struct net *net;
+
+ file = proc_ns_fget(fd);
+ if (IS_ERR(file))
+ return ERR_CAST(file);
+
+ ei = PROC_I(file->f_dentry->d_inode);
+ if (ei->ns_ops == &netns_operations)
+ net = get_net(ei->ns);
+ else
+ net = ERR_PTR(-EINVAL);
+
+ fput(file);
+ return net;
+}
+
#else
struct net *copy_net_ns(unsigned long flags, struct net *old_net)
{
@@ -309,6 +331,11 @@ struct net *copy_net_ns(unsigned long flags, struct net *old_net)
return ERR_PTR(-EINVAL);
return old_net;
}
+
+struct net *get_net_ns_by_fd(int fd)
+{
+ return ERR_PTR(-EINVAL);
+}
#endif
struct net *get_net_ns_by_pid(pid_t pid)
@@ -561,3 +588,39 @@ void unregister_pernet_device(struct pernet_operations *ops)
mutex_unlock(&net_mutex);
}
EXPORT_SYMBOL_GPL(unregister_pernet_device);
+
+#ifdef CONFIG_NET_NS
+static void *netns_get(struct task_struct *task)
+{
+ struct net *net = NULL;
+ struct nsproxy *nsproxy;
+
+ rcu_read_lock();
+ nsproxy = task_nsproxy(task);
+ if (nsproxy)
+ net = get_net(nsproxy->net_ns);
+ rcu_read_unlock();
+
+ return net;
+}
+
+static void netns_put(void *ns)
+{
+ put_net(ns);
+}
+
+static int netns_install(struct nsproxy *nsproxy, void *ns)
+{
+ put_net(nsproxy->net_ns);
+ nsproxy->net_ns = get_net(ns);
+ return 0;
+}
+
+const struct proc_ns_operations netns_operations = {
+ .name = "net",
+ .type = CLONE_NEWNET,
+ .get = netns_get,
+ .put = netns_put,
+ .install = netns_install,
+};
+#endif
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 2d56cb9b0b9..abd936d8a71 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1046,6 +1046,7 @@ const struct nla_policy ifla_policy[IFLA_MAX+1] = {
[IFLA_LINKMODE] = { .type = NLA_U8 },
[IFLA_LINKINFO] = { .type = NLA_NESTED },
[IFLA_NET_NS_PID] = { .type = NLA_U32 },
+ [IFLA_NET_NS_FD] = { .type = NLA_U32 },
[IFLA_IFALIAS] = { .type = NLA_STRING, .len = IFALIASZ-1 },
[IFLA_VFINFO_LIST] = {. type = NLA_NESTED },
[IFLA_VF_PORTS] = { .type = NLA_NESTED },
@@ -1094,6 +1095,8 @@ struct net *rtnl_link_get_net(struct net *src_net, struct nlattr *tb[])
*/
if (tb[IFLA_NET_NS_PID])
net = get_net_ns_by_pid(nla_get_u32(tb[IFLA_NET_NS_PID]));
+ else if (tb[IFLA_NET_NS_FD])
+ net = get_net_ns_by_fd(nla_get_u32(tb[IFLA_NET_NS_FD]));
else
net = get_net(src_net);
return net;
@@ -1224,7 +1227,7 @@ static int do_setlink(struct net_device *dev, struct ifinfomsg *ifm,
int send_addr_notify = 0;
int err;
- if (tb[IFLA_NET_NS_PID]) {
+ if (tb[IFLA_NET_NS_PID] || tb[IFLA_NET_NS_FD]) {
struct net *net = rtnl_link_get_net(dev_net(dev), tb);
if (IS_ERR(net)) {
err = PTR_ERR(net);
diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c
index cfa7a5e1c5c..fa000d26dc6 100644
--- a/net/dns_resolver/dns_key.c
+++ b/net/dns_resolver/dns_key.c
@@ -212,10 +212,12 @@ static void dns_resolver_describe(const struct key *key, struct seq_file *m)
int err = key->type_data.x[0];
seq_puts(m, key->description);
- if (err)
- seq_printf(m, ": %d", err);
- else
- seq_printf(m, ": %u", key->datalen);
+ if (key_is_instantiated(key)) {
+ if (err)
+ seq_printf(m, ": %d", err);
+ else
+ seq_printf(m, ": %u", key->datalen);
+ }
}
/*
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index cc1463156cd..9c1926027a2 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -465,6 +465,9 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
if (addr_len < sizeof(struct sockaddr_in))
goto out;
+ if (addr->sin_family != AF_INET)
+ goto out;
+
chk_addr_ret = inet_addr_type(sock_net(sk), addr->sin_addr.s_addr);
/* Not specified by any standard per-se, however it breaks too
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 61fac4cabc7..c14d88ad348 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -33,7 +33,7 @@ EXPORT_SYMBOL(inet_csk_timer_bug_msg);
* This struct holds the first and last local port number.
*/
struct local_ports sysctl_local_ports __read_mostly = {
- .lock = SEQLOCK_UNLOCKED,
+ .lock = __SEQLOCK_UNLOCKED(sysctl_local_ports.lock),
.range = { 32768, 61000 },
};
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index c3118e1cd3b..ec93335901d 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -14,6 +14,7 @@
#include <linux/slab.h>
#include <linux/types.h>
#include <asm/uaccess.h>
+#include <asm/unaligned.h>
#include <linux/skbuff.h>
#include <linux/ip.h>
#include <linux/icmp.h>
@@ -350,7 +351,7 @@ int ip_options_compile(struct net *net,
goto error;
}
if (optptr[2] <= optlen) {
- __be32 *timeptr = NULL;
+ unsigned char *timeptr = NULL;
if (optptr[2]+3 > optptr[1]) {
pp_ptr = optptr + 2;
goto error;
@@ -359,7 +360,7 @@ int ip_options_compile(struct net *net,
case IPOPT_TS_TSONLY:
opt->ts = optptr - iph;
if (skb)
- timeptr = (__be32*)&optptr[optptr[2]-1];
+ timeptr = &optptr[optptr[2]-1];
opt->ts_needtime = 1;
optptr[2] += 4;
break;
@@ -371,7 +372,7 @@ int ip_options_compile(struct net *net,
opt->ts = optptr - iph;
if (rt) {
memcpy(&optptr[optptr[2]-1], &rt->rt_spec_dst, 4);
- timeptr = (__be32*)&optptr[optptr[2]+3];
+ timeptr = &optptr[optptr[2]+3];
}
opt->ts_needaddr = 1;
opt->ts_needtime = 1;
@@ -389,7 +390,7 @@ int ip_options_compile(struct net *net,
if (inet_addr_type(net, addr) == RTN_UNICAST)
break;
if (skb)
- timeptr = (__be32*)&optptr[optptr[2]+3];
+ timeptr = &optptr[optptr[2]+3];
}
opt->ts_needtime = 1;
optptr[2] += 8;
@@ -403,10 +404,10 @@ int ip_options_compile(struct net *net,
}
if (timeptr) {
struct timespec tv;
- __be32 midtime;
+ u32 midtime;
getnstimeofday(&tv);
- midtime = htonl((tv.tv_sec % 86400) * MSEC_PER_SEC + tv.tv_nsec / NSEC_PER_MSEC);
- memcpy(timeptr, &midtime, sizeof(__be32));
+ midtime = (tv.tv_sec % 86400) * MSEC_PER_SEC + tv.tv_nsec / NSEC_PER_MSEC;
+ put_unaligned_be32(midtime, timeptr);
opt->is_changed = 1;
}
} else {
diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index a15c0152495..7f9124914b1 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -54,7 +54,7 @@
#include <asm/atomic.h>
#include <asm/ebcdic.h>
#include <asm/io.h>
-#include <asm/s390_ext.h>
+#include <asm/irq.h>
#include <asm/smp.h>
/*
diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c
index b8dbae82fab..76130134bfa 100644
--- a/net/l2tp/l2tp_debugfs.c
+++ b/net/l2tp/l2tp_debugfs.c
@@ -258,7 +258,7 @@ static int l2tp_dfs_seq_open(struct inode *inode, struct file *file)
*/
pd->net = get_net_ns_by_pid(current->pid);
if (IS_ERR(pd->net)) {
- rc = -PTR_ERR(pd->net);
+ rc = PTR_ERR(pd->net);
goto err_free_pd;
}
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 925f715686a..ba248d93399 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -798,7 +798,12 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
getnstimeofday(&ts);
h.h2->tp_sec = ts.tv_sec;
h.h2->tp_nsec = ts.tv_nsec;
- h.h2->tp_vlan_tci = vlan_tx_tag_get(skb);
+ if (vlan_tx_tag_present(skb)) {
+ h.h2->tp_vlan_tci = vlan_tx_tag_get(skb);
+ status |= TP_STATUS_VLAN_VALID;
+ } else {
+ h.h2->tp_vlan_tci = 0;
+ }
hdrlen = sizeof(*h.h2);
break;
default:
@@ -1725,8 +1730,12 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
aux.tp_snaplen = skb->len;
aux.tp_mac = 0;
aux.tp_net = skb_network_offset(skb);
- aux.tp_vlan_tci = vlan_tx_tag_get(skb);
-
+ if (vlan_tx_tag_present(skb)) {
+ aux.tp_vlan_tci = vlan_tx_tag_get(skb);
+ aux.tp_status |= TP_STATUS_VLAN_VALID;
+ } else {
+ aux.tp_vlan_tci = 0;
+ }
put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
}
diff --git a/net/rds/ib.c b/net/rds/ib.c
index cce19f95c62..3b83086bcc3 100644
--- a/net/rds/ib.c
+++ b/net/rds/ib.c
@@ -325,7 +325,7 @@ static int rds_ib_laddr_check(__be32 addr)
/* Create a CMA ID and try to bind it. This catches both
* IB and iWARP capable NICs.
*/
- cm_id = rdma_create_id(NULL, NULL, RDMA_PS_TCP);
+ cm_id = rdma_create_id(NULL, NULL, RDMA_PS_TCP, IB_QPT_RC);
if (IS_ERR(cm_id))
return PTR_ERR(cm_id);
diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c
index ee369d201a6..fd453dd5124 100644
--- a/net/rds/ib_cm.c
+++ b/net/rds/ib_cm.c
@@ -587,7 +587,7 @@ int rds_ib_conn_connect(struct rds_connection *conn)
/* XXX I wonder what affect the port space has */
/* delegate cm event handler to rdma_transport */
ic->i_cm_id = rdma_create_id(rds_rdma_cm_event_handler, conn,
- RDMA_PS_TCP);
+ RDMA_PS_TCP, IB_QPT_RC);
if (IS_ERR(ic->i_cm_id)) {
ret = PTR_ERR(ic->i_cm_id);
ic->i_cm_id = NULL;
diff --git a/net/rds/iw.c b/net/rds/iw.c
index 5a9676fe594..f7474844f09 100644
--- a/net/rds/iw.c
+++ b/net/rds/iw.c
@@ -226,7 +226,7 @@ static int rds_iw_laddr_check(__be32 addr)
/* Create a CMA ID and try to bind it. This catches both
* IB and iWARP capable NICs.
*/
- cm_id = rdma_create_id(NULL, NULL, RDMA_PS_TCP);
+ cm_id = rdma_create_id(NULL, NULL, RDMA_PS_TCP, IB_QPT_RC);
if (IS_ERR(cm_id))
return PTR_ERR(cm_id);
diff --git a/net/rds/iw_cm.c b/net/rds/iw_cm.c
index 3a60a15d1b4..c12db66f24c 100644
--- a/net/rds/iw_cm.c
+++ b/net/rds/iw_cm.c
@@ -522,7 +522,7 @@ int rds_iw_conn_connect(struct rds_connection *conn)
/* XXX I wonder what affect the port space has */
/* delegate cm event handler to rdma_transport */
ic->i_cm_id = rdma_create_id(rds_rdma_cm_event_handler, conn,
- RDMA_PS_TCP);
+ RDMA_PS_TCP, IB_QPT_RC);
if (IS_ERR(ic->i_cm_id)) {
ret = PTR_ERR(ic->i_cm_id);
ic->i_cm_id = NULL;
diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c
index 4195a053982..f8760e1b668 100644
--- a/net/rds/rdma_transport.c
+++ b/net/rds/rdma_transport.c
@@ -158,7 +158,8 @@ static int rds_rdma_listen_init(void)
struct rdma_cm_id *cm_id;
int ret;
- cm_id = rdma_create_id(rds_rdma_cm_event_handler, NULL, RDMA_PS_TCP);
+ cm_id = rdma_create_id(rds_rdma_cm_event_handler, NULL, RDMA_PS_TCP,
+ IB_QPT_RC);
if (IS_ERR(cm_id)) {
ret = PTR_ERR(cm_id);
printk(KERN_ERR "RDS/RDMA: failed to setup listener, "
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 525f97c467e..4a62888f2e4 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -444,15 +444,7 @@ void sctp_association_free(struct sctp_association *asoc)
asoc->peer.transport_count = 0;
- /* Free any cached ASCONF_ACK chunk. */
- sctp_assoc_free_asconf_acks(asoc);
-
- /* Free the ASCONF queue. */
- sctp_assoc_free_asconf_queue(asoc);
-
- /* Free any cached ASCONF chunk. */
- if (asoc->addip_last_asconf)
- sctp_chunk_free(asoc->addip_last_asconf);
+ sctp_asconf_queue_teardown(asoc);
/* AUTH - Free the endpoint shared keys */
sctp_auth_destroy_keys(&asoc->endpoint_shared_keys);
@@ -1646,3 +1638,16 @@ struct sctp_chunk *sctp_assoc_lookup_asconf_ack(
return NULL;
}
+
+void sctp_asconf_queue_teardown(struct sctp_association *asoc)
+{
+ /* Free any cached ASCONF_ACK chunk. */
+ sctp_assoc_free_asconf_acks(asoc);
+
+ /* Free the ASCONF queue. */
+ sctp_assoc_free_asconf_queue(asoc);
+
+ /* Free any cached ASCONF chunk. */
+ if (asoc->addip_last_asconf)
+ sctp_chunk_free(asoc->addip_last_asconf);
+}
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c
index d612ca1ca6c..534c2e5feb0 100644
--- a/net/sctp/sm_sideeffect.c
+++ b/net/sctp/sm_sideeffect.c
@@ -1670,6 +1670,9 @@ static int sctp_cmd_interpreter(sctp_event_t event_type,
case SCTP_CMD_SEND_NEXT_ASCONF:
sctp_cmd_send_asconf(asoc);
break;
+ case SCTP_CMD_PURGE_ASCONF_QUEUE:
+ sctp_asconf_queue_teardown(asoc);
+ break;
default:
pr_warn("Impossible command: %u, %p\n",
cmd->verb, cmd->obj.ptr);
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c
index 7f4a4f8368e..a297283154d 100644
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -1718,11 +1718,21 @@ static sctp_disposition_t sctp_sf_do_dupcook_a(const struct sctp_endpoint *ep,
return SCTP_DISPOSITION_CONSUME;
}
- /* For now, fail any unsent/unacked data. Consider the optional
- * choice of resending of this data.
+ /* For now, stop pending T3-rtx and SACK timers, fail any unsent/unacked
+ * data. Consider the optional choice of resending of this data.
*/
+ sctp_add_cmd_sf(commands, SCTP_CMD_T3_RTX_TIMERS_STOP, SCTP_NULL());
+ sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
+ SCTP_TO(SCTP_EVENT_TIMEOUT_SACK));
sctp_add_cmd_sf(commands, SCTP_CMD_PURGE_OUTQUEUE, SCTP_NULL());
+ /* Stop pending T4-rto timer, teardown ASCONF queue, ASCONF-ACK queue
+ * and ASCONF-ACK cache.
+ */
+ sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP,
+ SCTP_TO(SCTP_EVENT_TIMEOUT_T4_RTO));
+ sctp_add_cmd_sf(commands, SCTP_CMD_PURGE_ASCONF_QUEUE, SCTP_NULL());
+
repl = sctp_make_cookie_ack(new_asoc, chunk);
if (!repl)
goto nomem;
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 67e31276682..cd6e4aa19db 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -326,10 +326,12 @@ rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
* Run memory cache shrinker.
*/
static int
-rpcauth_cache_shrinker(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
+rpcauth_cache_shrinker(struct shrinker *shrink, struct shrink_control *sc)
{
LIST_HEAD(free);
int res;
+ int nr_to_scan = sc->nr_to_scan;
+ gfp_t gfp_mask = sc->gfp_mask;
if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL)
return (nr_to_scan == 0) ? 0 : -1;
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 8d83f9d4871..b84d7395535 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -13,10 +13,6 @@
* and need to be refreshed, or when a packet was damaged in transit.
* This may be have to be moved to the VFS layer.
*
- * NB: BSD uses a more intelligent approach to guessing when a request
- * or reply has been lost by keeping the RTO estimate for each procedure.
- * We currently make do with a constant timeout value.
- *
* Copyright (C) 1992,1993 Rick Sladkey <jrs@world.std.com>
* Copyright (C) 1995,1996 Olaf Kirch <okir@monad.swb.de>
*/
@@ -32,7 +28,9 @@
#include <linux/slab.h>
#include <linux/utsname.h>
#include <linux/workqueue.h>
+#include <linux/in.h>
#include <linux/in6.h>
+#include <linux/un.h>
#include <linux/sunrpc/clnt.h>
#include <linux/sunrpc/rpc_pipe_fs.h>
@@ -298,22 +296,27 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
* up a string representation of the passed-in address.
*/
if (args->servername == NULL) {
+ struct sockaddr_un *sun =
+ (struct sockaddr_un *)args->address;
+ struct sockaddr_in *sin =
+ (struct sockaddr_in *)args->address;
+ struct sockaddr_in6 *sin6 =
+ (struct sockaddr_in6 *)args->address;
+
servername[0] = '\0';
switch (args->address->sa_family) {
- case AF_INET: {
- struct sockaddr_in *sin =
- (struct sockaddr_in *)args->address;
+ case AF_LOCAL:
+ snprintf(servername, sizeof(servername), "%s",
+ sun->sun_path);
+ break;
+ case AF_INET:
snprintf(servername, sizeof(servername), "%pI4",
&sin->sin_addr.s_addr);
break;
- }
- case AF_INET6: {
- struct sockaddr_in6 *sin =
- (struct sockaddr_in6 *)args->address;
+ case AF_INET6:
snprintf(servername, sizeof(servername), "%pI6",
- &sin->sin6_addr);
+ &sin6->sin6_addr);
break;
- }
default:
/* caller wants default server name, but
* address family isn't recognized. */
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index c652e4cc9fe..9a80a922c52 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -16,6 +16,7 @@
#include <linux/types.h>
#include <linux/socket.h>
+#include <linux/un.h>
#include <linux/in.h>
#include <linux/in6.h>
#include <linux/kernel.h>
@@ -32,6 +33,8 @@
# define RPCDBG_FACILITY RPCDBG_BIND
#endif
+#define RPCBIND_SOCK_PATHNAME "/var/run/rpcbind.sock"
+
#define RPCBIND_PROGRAM (100000u)
#define RPCBIND_PORT (111u)
@@ -158,20 +161,69 @@ static void rpcb_map_release(void *data)
kfree(map);
}
-static const struct sockaddr_in rpcb_inaddr_loopback = {
- .sin_family = AF_INET,
- .sin_addr.s_addr = htonl(INADDR_LOOPBACK),
- .sin_port = htons(RPCBIND_PORT),
-};
+/*
+ * Returns zero on success, otherwise a negative errno value
+ * is returned.
+ */
+static int rpcb_create_local_unix(void)
+{
+ static const struct sockaddr_un rpcb_localaddr_rpcbind = {
+ .sun_family = AF_LOCAL,
+ .sun_path = RPCBIND_SOCK_PATHNAME,
+ };
+ struct rpc_create_args args = {
+ .net = &init_net,
+ .protocol = XPRT_TRANSPORT_LOCAL,
+ .address = (struct sockaddr *)&rpcb_localaddr_rpcbind,
+ .addrsize = sizeof(rpcb_localaddr_rpcbind),
+ .servername = "localhost",
+ .program = &rpcb_program,
+ .version = RPCBVERS_2,
+ .authflavor = RPC_AUTH_NULL,
+ };
+ struct rpc_clnt *clnt, *clnt4;
+ int result = 0;
+
+ /*
+ * Because we requested an RPC PING at transport creation time,
+ * this works only if the user space portmapper is rpcbind, and
+ * it's listening on AF_LOCAL on the named socket.
+ */
+ clnt = rpc_create(&args);
+ if (IS_ERR(clnt)) {
+ dprintk("RPC: failed to create AF_LOCAL rpcbind "
+ "client (errno %ld).\n", PTR_ERR(clnt));
+ result = -PTR_ERR(clnt);
+ goto out;
+ }
+
+ clnt4 = rpc_bind_new_program(clnt, &rpcb_program, RPCBVERS_4);
+ if (IS_ERR(clnt4)) {
+ dprintk("RPC: failed to bind second program to "
+ "rpcbind v4 client (errno %ld).\n",
+ PTR_ERR(clnt4));
+ clnt4 = NULL;
+ }
+
+ /* Protected by rpcb_create_local_mutex */
+ rpcb_local_clnt = clnt;
+ rpcb_local_clnt4 = clnt4;
-static DEFINE_MUTEX(rpcb_create_local_mutex);
+out:
+ return result;
+}
/*
* Returns zero on success, otherwise a negative errno value
* is returned.
*/
-static int rpcb_create_local(void)
+static int rpcb_create_local_net(void)
{
+ static const struct sockaddr_in rpcb_inaddr_loopback = {
+ .sin_family = AF_INET,
+ .sin_addr.s_addr = htonl(INADDR_LOOPBACK),
+ .sin_port = htons(RPCBIND_PORT),
+ };
struct rpc_create_args args = {
.net = &init_net,
.protocol = XPRT_TRANSPORT_TCP,
@@ -186,13 +238,6 @@ static int rpcb_create_local(void)
struct rpc_clnt *clnt, *clnt4;
int result = 0;
- if (rpcb_local_clnt)
- return result;
-
- mutex_lock(&rpcb_create_local_mutex);
- if (rpcb_local_clnt)
- goto out;
-
clnt = rpc_create(&args);
if (IS_ERR(clnt)) {
dprintk("RPC: failed to create local rpcbind "
@@ -214,10 +259,34 @@ static int rpcb_create_local(void)
clnt4 = NULL;
}
+ /* Protected by rpcb_create_local_mutex */
rpcb_local_clnt = clnt;
rpcb_local_clnt4 = clnt4;
out:
+ return result;
+}
+
+/*
+ * Returns zero on success, otherwise a negative errno value
+ * is returned.
+ */
+static int rpcb_create_local(void)
+{
+ static DEFINE_MUTEX(rpcb_create_local_mutex);
+ int result = 0;
+
+ if (rpcb_local_clnt)
+ return result;
+
+ mutex_lock(&rpcb_create_local_mutex);
+ if (rpcb_local_clnt)
+ goto out;
+
+ if (rpcb_create_local_unix() != 0)
+ result = rpcb_create_local_net();
+
+out:
mutex_unlock(&rpcb_create_local_mutex);
return result;
}
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 08e05a8ce02..2b90292e950 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -942,6 +942,8 @@ static void svc_unregister(const struct svc_serv *serv)
if (progp->pg_vers[i]->vs_hidden)
continue;
+ dprintk("svc: attempting to unregister %sv%u\n",
+ progp->pg_name, i);
__svc_unregister(progp->pg_prog, i, progp->pg_name);
}
}
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index b7d435c3f19..af04f779ce9 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -387,6 +387,33 @@ static int svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, int nr,
return len;
}
+static int svc_partial_recvfrom(struct svc_rqst *rqstp,
+ struct kvec *iov, int nr,
+ int buflen, unsigned int base)
+{
+ size_t save_iovlen;
+ void __user *save_iovbase;
+ unsigned int i;
+ int ret;
+
+ if (base == 0)
+ return svc_recvfrom(rqstp, iov, nr, buflen);
+
+ for (i = 0; i < nr; i++) {
+ if (iov[i].iov_len > base)
+ break;
+ base -= iov[i].iov_len;
+ }
+ save_iovlen = iov[i].iov_len;
+ save_iovbase = iov[i].iov_base;
+ iov[i].iov_len -= base;
+ iov[i].iov_base += base;
+ ret = svc_recvfrom(rqstp, &iov[i], nr - i, buflen);
+ iov[i].iov_len = save_iovlen;
+ iov[i].iov_base = save_iovbase;
+ return ret;
+}
+
/*
* Set socket snd and rcv buffer lengths
*/
@@ -409,7 +436,6 @@ static void svc_sock_setbufsize(struct socket *sock, unsigned int snd,
lock_sock(sock->sk);
sock->sk->sk_sndbuf = snd * 2;
sock->sk->sk_rcvbuf = rcv * 2;
- sock->sk->sk_userlocks |= SOCK_SNDBUF_LOCK|SOCK_RCVBUF_LOCK;
sock->sk->sk_write_space(sock->sk);
release_sock(sock->sk);
#endif
@@ -884,6 +910,56 @@ failed:
return NULL;
}
+static unsigned int svc_tcp_restore_pages(struct svc_sock *svsk, struct svc_rqst *rqstp)
+{
+ unsigned int i, len, npages;
+
+ if (svsk->sk_tcplen <= sizeof(rpc_fraghdr))
+ return 0;
+ len = svsk->sk_tcplen - sizeof(rpc_fraghdr);
+ npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ for (i = 0; i < npages; i++) {
+ if (rqstp->rq_pages[i] != NULL)
+ put_page(rqstp->rq_pages[i]);
+ BUG_ON(svsk->sk_pages[i] == NULL);
+ rqstp->rq_pages[i] = svsk->sk_pages[i];
+ svsk->sk_pages[i] = NULL;
+ }
+ rqstp->rq_arg.head[0].iov_base = page_address(rqstp->rq_pages[0]);
+ return len;
+}
+
+static void svc_tcp_save_pages(struct svc_sock *svsk, struct svc_rqst *rqstp)
+{
+ unsigned int i, len, npages;
+
+ if (svsk->sk_tcplen <= sizeof(rpc_fraghdr))
+ return;
+ len = svsk->sk_tcplen - sizeof(rpc_fraghdr);
+ npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ for (i = 0; i < npages; i++) {
+ svsk->sk_pages[i] = rqstp->rq_pages[i];
+ rqstp->rq_pages[i] = NULL;
+ }
+}
+
+static void svc_tcp_clear_pages(struct svc_sock *svsk)
+{
+ unsigned int i, len, npages;
+
+ if (svsk->sk_tcplen <= sizeof(rpc_fraghdr))
+ goto out;
+ len = svsk->sk_tcplen - sizeof(rpc_fraghdr);
+ npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ for (i = 0; i < npages; i++) {
+ BUG_ON(svsk->sk_pages[i] == NULL);
+ put_page(svsk->sk_pages[i]);
+ svsk->sk_pages[i] = NULL;
+ }
+out:
+ svsk->sk_tcplen = 0;
+}
+
/*
* Receive data.
* If we haven't gotten the record length yet, get the next four bytes.
@@ -893,31 +969,15 @@ failed:
static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp)
{
struct svc_serv *serv = svsk->sk_xprt.xpt_server;
+ unsigned int want;
int len;
- if (test_and_clear_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags))
- /* sndbuf needs to have room for one request
- * per thread, otherwise we can stall even when the
- * network isn't a bottleneck.
- *
- * We count all threads rather than threads in a
- * particular pool, which provides an upper bound
- * on the number of threads which will access the socket.
- *
- * rcvbuf just needs to be able to hold a few requests.
- * Normally they will be removed from the queue
- * as soon a a complete request arrives.
- */
- svc_sock_setbufsize(svsk->sk_sock,
- (serv->sv_nrthreads+3) * serv->sv_max_mesg,
- 3 * serv->sv_max_mesg);
-
clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
if (svsk->sk_tcplen < sizeof(rpc_fraghdr)) {
- int want = sizeof(rpc_fraghdr) - svsk->sk_tcplen;
struct kvec iov;
+ want = sizeof(rpc_fraghdr) - svsk->sk_tcplen;
iov.iov_base = ((char *) &svsk->sk_reclen) + svsk->sk_tcplen;
iov.iov_len = want;
if ((len = svc_recvfrom(rqstp, &iov, 1, want)) < 0)
@@ -927,7 +987,7 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp)
if (len < want) {
dprintk("svc: short recvfrom while reading record "
"length (%d of %d)\n", len, want);
- goto err_again; /* record header not complete */
+ return -EAGAIN;
}
svsk->sk_reclen = ntohl(svsk->sk_reclen);
@@ -954,83 +1014,75 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp)
}
}
- /* Check whether enough data is available */
- len = svc_recv_available(svsk);
- if (len < 0)
- goto error;
+ if (svsk->sk_reclen < 8)
+ goto err_delete; /* client is nuts. */
- if (len < svsk->sk_reclen) {
- dprintk("svc: incomplete TCP record (%d of %d)\n",
- len, svsk->sk_reclen);
- goto err_again; /* record not complete */
- }
len = svsk->sk_reclen;
- set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
return len;
- error:
- if (len == -EAGAIN)
- dprintk("RPC: TCP recv_record got EAGAIN\n");
+error:
+ dprintk("RPC: TCP recv_record got %d\n", len);
return len;
- err_delete:
+err_delete:
set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
- err_again:
return -EAGAIN;
}
-static int svc_process_calldir(struct svc_sock *svsk, struct svc_rqst *rqstp,
- struct rpc_rqst **reqpp, struct kvec *vec)
+static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp)
{
+ struct rpc_xprt *bc_xprt = svsk->sk_xprt.xpt_bc_xprt;
struct rpc_rqst *req = NULL;
- u32 *p;
- u32 xid;
- u32 calldir;
- int len;
-
- len = svc_recvfrom(rqstp, vec, 1, 8);
- if (len < 0)
- goto error;
+ struct kvec *src, *dst;
+ __be32 *p = (__be32 *)rqstp->rq_arg.head[0].iov_base;
+ __be32 xid;
+ __be32 calldir;
- p = (u32 *)rqstp->rq_arg.head[0].iov_base;
xid = *p++;
calldir = *p;
- if (calldir == 0) {
- /* REQUEST is the most common case */
- vec[0] = rqstp->rq_arg.head[0];
- } else {
- /* REPLY */
- struct rpc_xprt *bc_xprt = svsk->sk_xprt.xpt_bc_xprt;
-
- if (bc_xprt)
- req = xprt_lookup_rqst(bc_xprt, xid);
-
- if (!req) {
- printk(KERN_NOTICE
- "%s: Got unrecognized reply: "
- "calldir 0x%x xpt_bc_xprt %p xid %08x\n",
- __func__, ntohl(calldir),
- bc_xprt, xid);
- vec[0] = rqstp->rq_arg.head[0];
- goto out;
- }
+ if (bc_xprt)
+ req = xprt_lookup_rqst(bc_xprt, xid);
- memcpy(&req->rq_private_buf, &req->rq_rcv_buf,
- sizeof(struct xdr_buf));
- /* copy the xid and call direction */
- memcpy(req->rq_private_buf.head[0].iov_base,
- rqstp->rq_arg.head[0].iov_base, 8);
- vec[0] = req->rq_private_buf.head[0];
+ if (!req) {
+ printk(KERN_NOTICE
+ "%s: Got unrecognized reply: "
+ "calldir 0x%x xpt_bc_xprt %p xid %08x\n",
+ __func__, ntohl(calldir),
+ bc_xprt, xid);
+ return -EAGAIN;
}
- out:
- vec[0].iov_base += 8;
- vec[0].iov_len -= 8;
- len = svsk->sk_reclen - 8;
- error:
- *reqpp = req;
- return len;
+
+ memcpy(&req->rq_private_buf, &req->rq_rcv_buf, sizeof(struct xdr_buf));
+ /*
+ * XXX!: cheating for now! Only copying HEAD.
+ * But we know this is good enough for now (in fact, for any
+ * callback reply in the forseeable future).
+ */
+ dst = &req->rq_private_buf.head[0];
+ src = &rqstp->rq_arg.head[0];
+ if (dst->iov_len < src->iov_len)
+ return -EAGAIN; /* whatever; just giving up. */
+ memcpy(dst->iov_base, src->iov_base, src->iov_len);
+ xprt_complete_rqst(req->rq_task, svsk->sk_reclen);
+ rqstp->rq_arg.len = 0;
+ return 0;
}
+static int copy_pages_to_kvecs(struct kvec *vec, struct page **pages, int len)
+{
+ int i = 0;
+ int t = 0;
+
+ while (t < len) {
+ vec[i].iov_base = page_address(pages[i]);
+ vec[i].iov_len = PAGE_SIZE;
+ i++;
+ t += PAGE_SIZE;
+ }
+ return i;
+}
+
+
/*
* Receive data from a TCP socket.
*/
@@ -1041,8 +1093,10 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
struct svc_serv *serv = svsk->sk_xprt.xpt_server;
int len;
struct kvec *vec;
- int pnum, vlen;
- struct rpc_rqst *req = NULL;
+ unsigned int want, base;
+ __be32 *p;
+ __be32 calldir;
+ int pnum;
dprintk("svc: tcp_recv %p data %d conn %d close %d\n",
svsk, test_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags),
@@ -1053,87 +1107,73 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
if (len < 0)
goto error;
+ base = svc_tcp_restore_pages(svsk, rqstp);
+ want = svsk->sk_reclen - base;
+
vec = rqstp->rq_vec;
- vec[0] = rqstp->rq_arg.head[0];
- vlen = PAGE_SIZE;
- /*
- * We have enough data for the whole tcp record. Let's try and read the
- * first 8 bytes to get the xid and the call direction. We can use this
- * to figure out if this is a call or a reply to a callback. If
- * sk_reclen is < 8 (xid and calldir), then this is a malformed packet.
- * In that case, don't bother with the calldir and just read the data.
- * It will be rejected in svc_process.
- */
- if (len >= 8) {
- len = svc_process_calldir(svsk, rqstp, &req, vec);
- if (len < 0)
- goto err_again;
- vlen -= 8;
- }
+ pnum = copy_pages_to_kvecs(&vec[0], &rqstp->rq_pages[0],
+ svsk->sk_reclen);
- pnum = 1;
- while (vlen < len) {
- vec[pnum].iov_base = (req) ?
- page_address(req->rq_private_buf.pages[pnum - 1]) :
- page_address(rqstp->rq_pages[pnum]);
- vec[pnum].iov_len = PAGE_SIZE;
- pnum++;
- vlen += PAGE_SIZE;
- }
rqstp->rq_respages = &rqstp->rq_pages[pnum];
/* Now receive data */
- len = svc_recvfrom(rqstp, vec, pnum, len);
- if (len < 0)
- goto err_again;
-
- /*
- * Account for the 8 bytes we read earlier
- */
- len += 8;
-
- if (req) {
- xprt_complete_rqst(req->rq_task, len);
- len = 0;
- goto out;
+ len = svc_partial_recvfrom(rqstp, vec, pnum, want, base);
+ if (len >= 0)
+ svsk->sk_tcplen += len;
+ if (len != want) {
+ if (len < 0 && len != -EAGAIN)
+ goto err_other;
+ svc_tcp_save_pages(svsk, rqstp);
+ dprintk("svc: incomplete TCP record (%d of %d)\n",
+ svsk->sk_tcplen, svsk->sk_reclen);
+ goto err_noclose;
}
- dprintk("svc: TCP complete record (%d bytes)\n", len);
- rqstp->rq_arg.len = len;
+
+ rqstp->rq_arg.len = svsk->sk_reclen;
rqstp->rq_arg.page_base = 0;
- if (len <= rqstp->rq_arg.head[0].iov_len) {
- rqstp->rq_arg.head[0].iov_len = len;
+ if (rqstp->rq_arg.len <= rqstp->rq_arg.head[0].iov_len) {
+ rqstp->rq_arg.head[0].iov_len = rqstp->rq_arg.len;
rqstp->rq_arg.page_len = 0;
- } else {
- rqstp->rq_arg.page_len = len - rqstp->rq_arg.head[0].iov_len;
- }
+ } else
+ rqstp->rq_arg.page_len = rqstp->rq_arg.len - rqstp->rq_arg.head[0].iov_len;
rqstp->rq_xprt_ctxt = NULL;
rqstp->rq_prot = IPPROTO_TCP;
-out:
+ p = (__be32 *)rqstp->rq_arg.head[0].iov_base;
+ calldir = p[1];
+ if (calldir)
+ len = receive_cb_reply(svsk, rqstp);
+
/* Reset TCP read info */
svsk->sk_reclen = 0;
svsk->sk_tcplen = 0;
+ /* If we have more data, signal svc_xprt_enqueue() to try again */
+ if (svc_recv_available(svsk) > sizeof(rpc_fraghdr))
+ set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
+
+ if (len < 0)
+ goto error;
svc_xprt_copy_addrs(rqstp, &svsk->sk_xprt);
if (serv->sv_stats)
serv->sv_stats->nettcpcnt++;
- return len;
+ dprintk("svc: TCP complete record (%d bytes)\n", rqstp->rq_arg.len);
+ return rqstp->rq_arg.len;
-err_again:
- if (len == -EAGAIN) {
- dprintk("RPC: TCP recvfrom got EAGAIN\n");
- return len;
- }
error:
- if (len != -EAGAIN) {
- printk(KERN_NOTICE "%s: recvfrom returned errno %d\n",
- svsk->sk_xprt.xpt_server->sv_name, -len);
- set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
- }
+ if (len != -EAGAIN)
+ goto err_other;
+ dprintk("RPC: TCP recvfrom got EAGAIN\n");
return -EAGAIN;
+err_other:
+ printk(KERN_NOTICE "%s: recvfrom returned errno %d\n",
+ svsk->sk_xprt.xpt_server->sv_name, -len);
+ set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
+err_noclose:
+ return -EAGAIN; /* record not complete */
}
/*
@@ -1304,18 +1344,10 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
svsk->sk_reclen = 0;
svsk->sk_tcplen = 0;
+ memset(&svsk->sk_pages[0], 0, sizeof(svsk->sk_pages));
tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF;
- /* initialise setting must have enough space to
- * receive and respond to one request.
- * svc_tcp_recvfrom will re-adjust if necessary
- */
- svc_sock_setbufsize(svsk->sk_sock,
- 3 * svsk->sk_xprt.xpt_server->sv_max_mesg,
- 3 * svsk->sk_xprt.xpt_server->sv_max_mesg);
-
- set_bit(XPT_CHNGBUF, &svsk->sk_xprt.xpt_flags);
set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
if (sk->sk_state != TCP_ESTABLISHED)
set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
@@ -1379,8 +1411,14 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
/* Initialize the socket */
if (sock->type == SOCK_DGRAM)
svc_udp_init(svsk, serv);
- else
+ else {
+ /* initialise setting must have enough space to
+ * receive and respond to one request.
+ */
+ svc_sock_setbufsize(svsk->sk_sock, 4 * serv->sv_max_mesg,
+ 4 * serv->sv_max_mesg);
svc_tcp_init(svsk, serv);
+ }
dprintk("svc: svc_setup_socket created %p (inet %p)\n",
svsk, svsk->sk_sk);
@@ -1562,8 +1600,10 @@ static void svc_tcp_sock_detach(struct svc_xprt *xprt)
svc_sock_detach(xprt);
- if (!test_bit(XPT_LISTENER, &xprt->xpt_flags))
+ if (!test_bit(XPT_LISTENER, &xprt->xpt_flags)) {
+ svc_tcp_clear_pages(svsk);
kernel_sock_shutdown(svsk->sk_sock, SHUT_RDWR);
+ }
}
/*
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 679cd674b81..f008c14ad34 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -638,6 +638,25 @@ void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
}
EXPORT_SYMBOL_GPL(xdr_init_decode);
+/**
+ * xdr_init_decode - Initialize an xdr_stream for decoding data.
+ * @xdr: pointer to xdr_stream struct
+ * @buf: pointer to XDR buffer from which to decode data
+ * @pages: list of pages to decode into
+ * @len: length in bytes of buffer in pages
+ */
+void xdr_init_decode_pages(struct xdr_stream *xdr, struct xdr_buf *buf,
+ struct page **pages, unsigned int len)
+{
+ memset(buf, 0, sizeof(*buf));
+ buf->pages = pages;
+ buf->page_len = len;
+ buf->buflen = len;
+ buf->len = len;
+ xdr_init_decode(xdr, buf, NULL);
+}
+EXPORT_SYMBOL_GPL(xdr_init_decode_pages);
+
static __be32 * __xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes)
{
__be32 *p = xdr->p;
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 6c014dd3a20..c3c232a88d9 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -695,7 +695,8 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv,
return ERR_PTR(-ENOMEM);
xprt = &cma_xprt->sc_xprt;
- listen_id = rdma_create_id(rdma_listen_handler, cma_xprt, RDMA_PS_TCP);
+ listen_id = rdma_create_id(rdma_listen_handler, cma_xprt, RDMA_PS_TCP,
+ IB_QPT_RC);
if (IS_ERR(listen_id)) {
ret = PTR_ERR(listen_id);
dprintk("svcrdma: rdma_create_id failed = %d\n", ret);
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index d4297dc43dc..80f8da344df 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -387,7 +387,7 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
init_completion(&ia->ri_done);
- id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP);
+ id = rdma_create_id(rpcrdma_conn_upcall, xprt, RDMA_PS_TCP, IB_QPT_RC);
if (IS_ERR(id)) {
rc = PTR_ERR(id);
dprintk("RPC: %s: rdma_create_id() failed %i\n",
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index bf005d3c65e..72abb735893 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -19,6 +19,7 @@
*/
#include <linux/types.h>
+#include <linux/string.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/capability.h>
@@ -28,6 +29,7 @@
#include <linux/in.h>
#include <linux/net.h>
#include <linux/mm.h>
+#include <linux/un.h>
#include <linux/udp.h>
#include <linux/tcp.h>
#include <linux/sunrpc/clnt.h>
@@ -45,6 +47,9 @@
#include <net/tcp.h>
#include "sunrpc.h"
+
+static void xs_close(struct rpc_xprt *xprt);
+
/*
* xprtsock tunables
*/
@@ -261,6 +266,11 @@ static inline struct sockaddr *xs_addr(struct rpc_xprt *xprt)
return (struct sockaddr *) &xprt->addr;
}
+static inline struct sockaddr_un *xs_addr_un(struct rpc_xprt *xprt)
+{
+ return (struct sockaddr_un *) &xprt->addr;
+}
+
static inline struct sockaddr_in *xs_addr_in(struct rpc_xprt *xprt)
{
return (struct sockaddr_in *) &xprt->addr;
@@ -276,23 +286,34 @@ static void xs_format_common_peer_addresses(struct rpc_xprt *xprt)
struct sockaddr *sap = xs_addr(xprt);
struct sockaddr_in6 *sin6;
struct sockaddr_in *sin;
+ struct sockaddr_un *sun;
char buf[128];
- (void)rpc_ntop(sap, buf, sizeof(buf));
- xprt->address_strings[RPC_DISPLAY_ADDR] = kstrdup(buf, GFP_KERNEL);
-
switch (sap->sa_family) {
+ case AF_LOCAL:
+ sun = xs_addr_un(xprt);
+ strlcpy(buf, sun->sun_path, sizeof(buf));
+ xprt->address_strings[RPC_DISPLAY_ADDR] =
+ kstrdup(buf, GFP_KERNEL);
+ break;
case AF_INET:
+ (void)rpc_ntop(sap, buf, sizeof(buf));
+ xprt->address_strings[RPC_DISPLAY_ADDR] =
+ kstrdup(buf, GFP_KERNEL);
sin = xs_addr_in(xprt);
snprintf(buf, sizeof(buf), "%08x", ntohl(sin->sin_addr.s_addr));
break;
case AF_INET6:
+ (void)rpc_ntop(sap, buf, sizeof(buf));
+ xprt->address_strings[RPC_DISPLAY_ADDR] =
+ kstrdup(buf, GFP_KERNEL);
sin6 = xs_addr_in6(xprt);
snprintf(buf, sizeof(buf), "%pi6", &sin6->sin6_addr);
break;
default:
BUG();
}
+
xprt->address_strings[RPC_DISPLAY_HEX_ADDR] = kstrdup(buf, GFP_KERNEL);
}
@@ -495,6 +516,70 @@ static int xs_nospace(struct rpc_task *task)
return ret;
}
+/*
+ * Construct a stream transport record marker in @buf.
+ */
+static inline void xs_encode_stream_record_marker(struct xdr_buf *buf)
+{
+ u32 reclen = buf->len - sizeof(rpc_fraghdr);
+ rpc_fraghdr *base = buf->head[0].iov_base;
+ *base = cpu_to_be32(RPC_LAST_STREAM_FRAGMENT | reclen);
+}
+
+/**
+ * xs_local_send_request - write an RPC request to an AF_LOCAL socket
+ * @task: RPC task that manages the state of an RPC request
+ *
+ * Return values:
+ * 0: The request has been sent
+ * EAGAIN: The socket was blocked, please call again later to
+ * complete the request
+ * ENOTCONN: Caller needs to invoke connect logic then call again
+ * other: Some other error occured, the request was not sent
+ */
+static int xs_local_send_request(struct rpc_task *task)
+{
+ struct rpc_rqst *req = task->tk_rqstp;
+ struct rpc_xprt *xprt = req->rq_xprt;
+ struct sock_xprt *transport =
+ container_of(xprt, struct sock_xprt, xprt);
+ struct xdr_buf *xdr = &req->rq_snd_buf;
+ int status;
+
+ xs_encode_stream_record_marker(&req->rq_snd_buf);
+
+ xs_pktdump("packet data:",
+ req->rq_svec->iov_base, req->rq_svec->iov_len);
+
+ status = xs_sendpages(transport->sock, NULL, 0,
+ xdr, req->rq_bytes_sent);
+ dprintk("RPC: %s(%u) = %d\n",
+ __func__, xdr->len - req->rq_bytes_sent, status);
+ if (likely(status >= 0)) {
+ req->rq_bytes_sent += status;
+ req->rq_xmit_bytes_sent += status;
+ if (likely(req->rq_bytes_sent >= req->rq_slen)) {
+ req->rq_bytes_sent = 0;
+ return 0;
+ }
+ status = -EAGAIN;
+ }
+
+ switch (status) {
+ case -EAGAIN:
+ status = xs_nospace(task);
+ break;
+ default:
+ dprintk("RPC: sendmsg returned unrecognized error %d\n",
+ -status);
+ case -EPIPE:
+ xs_close(xprt);
+ status = -ENOTCONN;
+ }
+
+ return status;
+}
+
/**
* xs_udp_send_request - write an RPC request to a UDP socket
* @task: address of RPC task that manages the state of an RPC request
@@ -574,13 +659,6 @@ static void xs_tcp_shutdown(struct rpc_xprt *xprt)
kernel_sock_shutdown(sock, SHUT_WR);
}
-static inline void xs_encode_tcp_record_marker(struct xdr_buf *buf)
-{
- u32 reclen = buf->len - sizeof(rpc_fraghdr);
- rpc_fraghdr *base = buf->head[0].iov_base;
- *base = htonl(RPC_LAST_STREAM_FRAGMENT | reclen);
-}
-
/**
* xs_tcp_send_request - write an RPC request to a TCP socket
* @task: address of RPC task that manages the state of an RPC request
@@ -603,7 +681,7 @@ static int xs_tcp_send_request(struct rpc_task *task)
struct xdr_buf *xdr = &req->rq_snd_buf;
int status;
- xs_encode_tcp_record_marker(&req->rq_snd_buf);
+ xs_encode_stream_record_marker(&req->rq_snd_buf);
xs_pktdump("packet data:",
req->rq_svec->iov_base,
@@ -785,6 +863,88 @@ static inline struct rpc_xprt *xprt_from_sock(struct sock *sk)
return (struct rpc_xprt *) sk->sk_user_data;
}
+static int xs_local_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb)
+{
+ struct xdr_skb_reader desc = {
+ .skb = skb,
+ .offset = sizeof(rpc_fraghdr),
+ .count = skb->len - sizeof(rpc_fraghdr),
+ };
+
+ if (xdr_partial_copy_from_skb(xdr, 0, &desc, xdr_skb_read_bits) < 0)
+ return -1;
+ if (desc.count)
+ return -1;
+ return 0;
+}
+
+/**
+ * xs_local_data_ready - "data ready" callback for AF_LOCAL sockets
+ * @sk: socket with data to read
+ * @len: how much data to read
+ *
+ * Currently this assumes we can read the whole reply in a single gulp.
+ */
+static void xs_local_data_ready(struct sock *sk, int len)
+{
+ struct rpc_task *task;
+ struct rpc_xprt *xprt;
+ struct rpc_rqst *rovr;
+ struct sk_buff *skb;
+ int err, repsize, copied;
+ u32 _xid;
+ __be32 *xp;
+
+ read_lock_bh(&sk->sk_callback_lock);
+ dprintk("RPC: %s...\n", __func__);
+ xprt = xprt_from_sock(sk);
+ if (xprt == NULL)
+ goto out;
+
+ skb = skb_recv_datagram(sk, 0, 1, &err);
+ if (skb == NULL)
+ goto out;
+
+ if (xprt->shutdown)
+ goto dropit;
+
+ repsize = skb->len - sizeof(rpc_fraghdr);
+ if (repsize < 4) {
+ dprintk("RPC: impossible RPC reply size %d\n", repsize);
+ goto dropit;
+ }
+
+ /* Copy the XID from the skb... */
+ xp = skb_header_pointer(skb, sizeof(rpc_fraghdr), sizeof(_xid), &_xid);
+ if (xp == NULL)
+ goto dropit;
+
+ /* Look up and lock the request corresponding to the given XID */
+ spin_lock(&xprt->transport_lock);
+ rovr = xprt_lookup_rqst(xprt, *xp);
+ if (!rovr)
+ goto out_unlock;
+ task = rovr->rq_task;
+
+ copied = rovr->rq_private_buf.buflen;
+ if (copied > repsize)
+ copied = repsize;
+
+ if (xs_local_copy_to_xdr(&rovr->rq_private_buf, skb)) {
+ dprintk("RPC: sk_buff copy failed\n");
+ goto out_unlock;
+ }
+
+ xprt_complete_rqst(task, copied);
+
+ out_unlock:
+ spin_unlock(&xprt->transport_lock);
+ dropit:
+ skb_free_datagram(sk, skb);
+ out:
+ read_unlock_bh(&sk->sk_callback_lock);
+}
+
/**
* xs_udp_data_ready - "data ready" callback for UDP sockets
* @sk: socket with data to read
@@ -1344,7 +1504,6 @@ static void xs_tcp_state_change(struct sock *sk)
case TCP_CLOSE_WAIT:
/* The server initiated a shutdown of the socket */
xprt_force_disconnect(xprt);
- case TCP_SYN_SENT:
xprt->connect_cookie++;
case TCP_CLOSING:
/*
@@ -1571,11 +1730,31 @@ static int xs_bind(struct sock_xprt *transport, struct socket *sock)
return err;
}
+/*
+ * We don't support autobind on AF_LOCAL sockets
+ */
+static void xs_local_rpcbind(struct rpc_task *task)
+{
+ xprt_set_bound(task->tk_xprt);
+}
+
+static void xs_local_set_port(struct rpc_xprt *xprt, unsigned short port)
+{
+}
#ifdef CONFIG_DEBUG_LOCK_ALLOC
static struct lock_class_key xs_key[2];
static struct lock_class_key xs_slock_key[2];
+static inline void xs_reclassify_socketu(struct socket *sock)
+{
+ struct sock *sk = sock->sk;
+
+ BUG_ON(sock_owned_by_user(sk));
+ sock_lock_init_class_and_name(sk, "slock-AF_LOCAL-RPC",
+ &xs_slock_key[1], "sk_lock-AF_LOCAL-RPC", &xs_key[1]);
+}
+
static inline void xs_reclassify_socket4(struct socket *sock)
{
struct sock *sk = sock->sk;
@@ -1597,6 +1776,9 @@ static inline void xs_reclassify_socket6(struct socket *sock)
static inline void xs_reclassify_socket(int family, struct socket *sock)
{
switch (family) {
+ case AF_LOCAL:
+ xs_reclassify_socketu(sock);
+ break;
case AF_INET:
xs_reclassify_socket4(sock);
break;
@@ -1606,6 +1788,10 @@ static inline void xs_reclassify_socket(int family, struct socket *sock)
}
}
#else
+static inline void xs_reclassify_socketu(struct socket *sock)
+{
+}
+
static inline void xs_reclassify_socket4(struct socket *sock)
{
}
@@ -1644,6 +1830,94 @@ out:
return ERR_PTR(err);
}
+static int xs_local_finish_connecting(struct rpc_xprt *xprt,
+ struct socket *sock)
+{
+ struct sock_xprt *transport = container_of(xprt, struct sock_xprt,
+ xprt);
+
+ if (!transport->inet) {
+ struct sock *sk = sock->sk;
+
+ write_lock_bh(&sk->sk_callback_lock);
+
+ xs_save_old_callbacks(transport, sk);
+
+ sk->sk_user_data = xprt;
+ sk->sk_data_ready = xs_local_data_ready;
+ sk->sk_write_space = xs_udp_write_space;
+ sk->sk_error_report = xs_error_report;
+ sk->sk_allocation = GFP_ATOMIC;
+
+ xprt_clear_connected(xprt);
+
+ /* Reset to new socket */
+ transport->sock = sock;
+ transport->inet = sk;
+
+ write_unlock_bh(&sk->sk_callback_lock);
+ }
+
+ /* Tell the socket layer to start connecting... */
+ xprt->stat.connect_count++;
+ xprt->stat.connect_start = jiffies;
+ return kernel_connect(sock, xs_addr(xprt), xprt->addrlen, 0);
+}
+
+/**
+ * xs_local_setup_socket - create AF_LOCAL socket, connect to a local endpoint
+ * @xprt: RPC transport to connect
+ * @transport: socket transport to connect
+ * @create_sock: function to create a socket of the correct type
+ *
+ * Invoked by a work queue tasklet.
+ */
+static void xs_local_setup_socket(struct work_struct *work)
+{
+ struct sock_xprt *transport =
+ container_of(work, struct sock_xprt, connect_worker.work);
+ struct rpc_xprt *xprt = &transport->xprt;
+ struct socket *sock;
+ int status = -EIO;
+
+ if (xprt->shutdown)
+ goto out;
+
+ clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
+ status = __sock_create(xprt->xprt_net, AF_LOCAL,
+ SOCK_STREAM, 0, &sock, 1);
+ if (status < 0) {
+ dprintk("RPC: can't create AF_LOCAL "
+ "transport socket (%d).\n", -status);
+ goto out;
+ }
+ xs_reclassify_socketu(sock);
+
+ dprintk("RPC: worker connecting xprt %p via AF_LOCAL to %s\n",
+ xprt, xprt->address_strings[RPC_DISPLAY_ADDR]);
+
+ status = xs_local_finish_connecting(xprt, sock);
+ switch (status) {
+ case 0:
+ dprintk("RPC: xprt %p connected to %s\n",
+ xprt, xprt->address_strings[RPC_DISPLAY_ADDR]);
+ xprt_set_connected(xprt);
+ break;
+ case -ENOENT:
+ dprintk("RPC: xprt %p: socket %s does not exist\n",
+ xprt, xprt->address_strings[RPC_DISPLAY_ADDR]);
+ break;
+ default:
+ printk(KERN_ERR "%s: unhandled error (%d) connecting to %s\n",
+ __func__, -status,
+ xprt->address_strings[RPC_DISPLAY_ADDR]);
+ }
+
+out:
+ xprt_clear_connecting(xprt);
+ xprt_wake_pending_tasks(xprt, status);
+}
+
static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
{
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
@@ -1758,6 +2032,7 @@ static void xs_tcp_reuse_connection(struct sock_xprt *transport)
static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
{
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+ int ret = -ENOTCONN;
if (!transport->inet) {
struct sock *sk = sock->sk;
@@ -1789,12 +2064,22 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
}
if (!xprt_bound(xprt))
- return -ENOTCONN;
+ goto out;
/* Tell the socket layer to start connecting... */
xprt->stat.connect_count++;
xprt->stat.connect_start = jiffies;
- return kernel_connect(sock, xs_addr(xprt), xprt->addrlen, O_NONBLOCK);
+ ret = kernel_connect(sock, xs_addr(xprt), xprt->addrlen, O_NONBLOCK);
+ switch (ret) {
+ case 0:
+ case -EINPROGRESS:
+ /* SYN_SENT! */
+ xprt->connect_cookie++;
+ if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO)
+ xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
+ }
+out:
+ return ret;
}
/**
@@ -1917,6 +2202,32 @@ static void xs_connect(struct rpc_task *task)
}
/**
+ * xs_local_print_stats - display AF_LOCAL socket-specifc stats
+ * @xprt: rpc_xprt struct containing statistics
+ * @seq: output file
+ *
+ */
+static void xs_local_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
+{
+ long idle_time = 0;
+
+ if (xprt_connected(xprt))
+ idle_time = (long)(jiffies - xprt->last_used) / HZ;
+
+ seq_printf(seq, "\txprt:\tlocal %lu %lu %lu %ld %lu %lu %lu "
+ "%llu %llu\n",
+ xprt->stat.bind_count,
+ xprt->stat.connect_count,
+ xprt->stat.connect_time,
+ idle_time,
+ xprt->stat.sends,
+ xprt->stat.recvs,
+ xprt->stat.bad_xids,
+ xprt->stat.req_u,
+ xprt->stat.bklog_u);
+}
+
+/**
* xs_udp_print_stats - display UDP socket-specifc stats
* @xprt: rpc_xprt struct containing statistics
* @seq: output file
@@ -2014,10 +2325,7 @@ static int bc_sendto(struct rpc_rqst *req)
unsigned long headoff;
unsigned long tailoff;
- /*
- * Set up the rpc header and record marker stuff
- */
- xs_encode_tcp_record_marker(xbufp);
+ xs_encode_stream_record_marker(xbufp);
tailoff = (unsigned long)xbufp->tail[0].iov_base & ~PAGE_MASK;
headoff = (unsigned long)xbufp->head[0].iov_base & ~PAGE_MASK;
@@ -2089,6 +2397,21 @@ static void bc_destroy(struct rpc_xprt *xprt)
{
}
+static struct rpc_xprt_ops xs_local_ops = {
+ .reserve_xprt = xprt_reserve_xprt,
+ .release_xprt = xs_tcp_release_xprt,
+ .rpcbind = xs_local_rpcbind,
+ .set_port = xs_local_set_port,
+ .connect = xs_connect,
+ .buf_alloc = rpc_malloc,
+ .buf_free = rpc_free,
+ .send_request = xs_local_send_request,
+ .set_retrans_timeout = xprt_set_retrans_timeout_def,
+ .close = xs_close,
+ .destroy = xs_destroy,
+ .print_stats = xs_local_print_stats,
+};
+
static struct rpc_xprt_ops xs_udp_ops = {
.set_buffer_size = xs_udp_set_buffer_size,
.reserve_xprt = xprt_reserve_xprt_cong,
@@ -2150,6 +2473,8 @@ static int xs_init_anyaddr(const int family, struct sockaddr *sap)
};
switch (family) {
+ case AF_LOCAL:
+ break;
case AF_INET:
memcpy(sap, &sin, sizeof(sin));
break;
@@ -2197,6 +2522,70 @@ static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args,
return xprt;
}
+static const struct rpc_timeout xs_local_default_timeout = {
+ .to_initval = 10 * HZ,
+ .to_maxval = 10 * HZ,
+ .to_retries = 2,
+};
+
+/**
+ * xs_setup_local - Set up transport to use an AF_LOCAL socket
+ * @args: rpc transport creation arguments
+ *
+ * AF_LOCAL is a "tpi_cots_ord" transport, just like TCP
+ */
+static struct rpc_xprt *xs_setup_local(struct xprt_create *args)
+{
+ struct sockaddr_un *sun = (struct sockaddr_un *)args->dstaddr;
+ struct sock_xprt *transport;
+ struct rpc_xprt *xprt;
+ struct rpc_xprt *ret;
+
+ xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries);
+ if (IS_ERR(xprt))
+ return xprt;
+ transport = container_of(xprt, struct sock_xprt, xprt);
+
+ xprt->prot = 0;
+ xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32);
+ xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
+
+ xprt->bind_timeout = XS_BIND_TO;
+ xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
+ xprt->idle_timeout = XS_IDLE_DISC_TO;
+
+ xprt->ops = &xs_local_ops;
+ xprt->timeout = &xs_local_default_timeout;
+
+ switch (sun->sun_family) {
+ case AF_LOCAL:
+ if (sun->sun_path[0] != '/') {
+ dprintk("RPC: bad AF_LOCAL address: %s\n",
+ sun->sun_path);
+ ret = ERR_PTR(-EINVAL);
+ goto out_err;
+ }
+ xprt_set_bound(xprt);
+ INIT_DELAYED_WORK(&transport->connect_worker,
+ xs_local_setup_socket);
+ xs_format_peer_addresses(xprt, "local", RPCBIND_NETID_LOCAL);
+ break;
+ default:
+ ret = ERR_PTR(-EAFNOSUPPORT);
+ goto out_err;
+ }
+
+ dprintk("RPC: set up xprt to %s via AF_LOCAL\n",
+ xprt->address_strings[RPC_DISPLAY_ADDR]);
+
+ if (try_module_get(THIS_MODULE))
+ return xprt;
+ ret = ERR_PTR(-EINVAL);
+out_err:
+ xprt_free(xprt);
+ return ret;
+}
+
static const struct rpc_timeout xs_udp_default_timeout = {
.to_initval = 5 * HZ,
.to_maxval = 30 * HZ,
@@ -2438,6 +2827,14 @@ out_err:
return ret;
}
+static struct xprt_class xs_local_transport = {
+ .list = LIST_HEAD_INIT(xs_local_transport.list),
+ .name = "named UNIX socket",
+ .owner = THIS_MODULE,
+ .ident = XPRT_TRANSPORT_LOCAL,
+ .setup = xs_setup_local,
+};
+
static struct xprt_class xs_udp_transport = {
.list = LIST_HEAD_INIT(xs_udp_transport.list),
.name = "udp",
@@ -2473,6 +2870,7 @@ int init_socket_xprt(void)
sunrpc_table_header = register_sysctl_table(sunrpc_table);
#endif
+ xprt_register_transport(&xs_local_transport);
xprt_register_transport(&xs_udp_transport);
xprt_register_transport(&xs_tcp_transport);
xprt_register_transport(&xs_bc_tcp_transport);
@@ -2493,6 +2891,7 @@ void cleanup_socket_xprt(void)
}
#endif
+ xprt_unregister_transport(&xs_local_transport);
xprt_unregister_transport(&xs_udp_transport);
xprt_unregister_transport(&xs_tcp_transport);
xprt_unregister_transport(&xs_bc_tcp_transport);