aboutsummaryrefslogtreecommitdiff
path: root/net/sunrpc/xprt.c
diff options
context:
space:
mode:
authorTony Luck <tony.luck@intel.com>2005-10-31 10:51:57 -0800
committerTony Luck <tony.luck@intel.com>2005-10-31 10:51:57 -0800
commitc7fb577e2a6cb04732541f2dc402bd46747f7558 (patch)
treedf3b1a1922ed13bfbcc45d08650c38beeb1a7bd1 /net/sunrpc/xprt.c
parent9cec58dc138d6fcad9f447a19c8ff69f6540e667 (diff)
parent581c1b14394aee60aff46ea67d05483261ed6527 (diff)
manual update from upstream:
Applied Al's change 06a544971fad0992fe8b92c5647538d573089dd4 to new location of swiotlb.c Signed-off-by: Tony Luck <tony.luck@intel.com>
Diffstat (limited to 'net/sunrpc/xprt.c')
-rw-r--r--net/sunrpc/xprt.c1613
1 files changed, 447 insertions, 1166 deletions
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 3c654e06b08..6dda3860351 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -10,12 +10,12 @@
* one is available. Otherwise, it sleeps on the backlog queue
* (xprt_reserve).
* - Next, the caller puts together the RPC message, stuffs it into
- * the request struct, and calls xprt_call().
- * - xprt_call transmits the message and installs the caller on the
- * socket's wait list. At the same time, it installs a timer that
+ * the request struct, and calls xprt_transmit().
+ * - xprt_transmit sends the message and installs the caller on the
+ * transport's wait list. At the same time, it installs a timer that
* is run after the packet's timeout has expired.
* - When a packet arrives, the data_ready handler walks the list of
- * pending requests for that socket. If a matching XID is found, the
+ * pending requests for that transport. If a matching XID is found, the
* caller is woken up, and the timer removed.
* - When no reply arrives within the timeout interval, the timer is
* fired by the kernel and runs xprt_timer(). It either adjusts the
@@ -33,36 +33,17 @@
*
* Copyright (C) 1995-1997, Olaf Kirch <okir@monad.swb.de>
*
- * TCP callback races fixes (C) 1998 Red Hat Software <alan@redhat.com>
- * TCP send fixes (C) 1998 Red Hat Software <alan@redhat.com>
- * TCP NFS related read + write fixes
- * (C) 1999 Dave Airlie, University of Limerick, Ireland <airlied@linux.ie>
- *
- * Rewrite of larges part of the code in order to stabilize TCP stuff.
- * Fix behaviour when socket buffer is full.
- * (C) 1999 Trond Myklebust <trond.myklebust@fys.uio.no>
+ * Transport switch API copyright (C) 2005, Chuck Lever <cel@netapp.com>
*/
+#include <linux/module.h>
+
#include <linux/types.h>
-#include <linux/slab.h>
-#include <linux/capability.h>
-#include <linux/sched.h>
-#include <linux/errno.h>
-#include <linux/socket.h>
-#include <linux/in.h>
-#include <linux/net.h>
-#include <linux/mm.h>
-#include <linux/udp.h>
-#include <linux/tcp.h>
-#include <linux/sunrpc/clnt.h>
-#include <linux/file.h>
+#include <linux/interrupt.h>
#include <linux/workqueue.h>
#include <linux/random.h>
-#include <net/sock.h>
-#include <net/checksum.h>
-#include <net/udp.h>
-#include <net/tcp.h>
+#include <linux/sunrpc/clnt.h>
/*
* Local variables
@@ -73,81 +54,90 @@
# define RPCDBG_FACILITY RPCDBG_XPRT
#endif
-#define XPRT_MAX_BACKOFF (8)
-#define XPRT_IDLE_TIMEOUT (5*60*HZ)
-#define XPRT_MAX_RESVPORT (800)
-
/*
* Local functions
*/
static void xprt_request_init(struct rpc_task *, struct rpc_xprt *);
static inline void do_xprt_reserve(struct rpc_task *);
-static void xprt_disconnect(struct rpc_xprt *);
static void xprt_connect_status(struct rpc_task *task);
-static struct rpc_xprt * xprt_setup(int proto, struct sockaddr_in *ap,
- struct rpc_timeout *to);
-static struct socket *xprt_create_socket(struct rpc_xprt *, int, int);
-static void xprt_bind_socket(struct rpc_xprt *, struct socket *);
static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *);
-static int xprt_clear_backlog(struct rpc_xprt *xprt);
-
-#ifdef RPC_DEBUG_DATA
/*
- * Print the buffer contents (first 128 bytes only--just enough for
- * diropres return).
+ * The transport code maintains an estimate on the maximum number of out-
+ * standing RPC requests, using a smoothed version of the congestion
+ * avoidance implemented in 44BSD. This is basically the Van Jacobson
+ * congestion algorithm: If a retransmit occurs, the congestion window is
+ * halved; otherwise, it is incremented by 1/cwnd when
+ *
+ * - a reply is received and
+ * - a full number of requests are outstanding and
+ * - the congestion window hasn't been updated recently.
*/
-static void
-xprt_pktdump(char *msg, u32 *packet, unsigned int count)
-{
- u8 *buf = (u8 *) packet;
- int j;
-
- dprintk("RPC: %s\n", msg);
- for (j = 0; j < count && j < 128; j += 4) {
- if (!(j & 31)) {
- if (j)
- dprintk("\n");
- dprintk("0x%04x ", j);
- }
- dprintk("%02x%02x%02x%02x ",
- buf[j], buf[j+1], buf[j+2], buf[j+3]);
- }
- dprintk("\n");
-}
-#else
-static inline void
-xprt_pktdump(char *msg, u32 *packet, unsigned int count)
-{
- /* NOP */
-}
-#endif
+#define RPC_CWNDSHIFT (8U)
+#define RPC_CWNDSCALE (1U << RPC_CWNDSHIFT)
+#define RPC_INITCWND RPC_CWNDSCALE
+#define RPC_MAXCWND(xprt) ((xprt)->max_reqs << RPC_CWNDSHIFT)
-/*
- * Look up RPC transport given an INET socket
+#define RPCXPRT_CONGESTED(xprt) ((xprt)->cong >= (xprt)->cwnd)
+
+/**
+ * xprt_reserve_xprt - serialize write access to transports
+ * @task: task that is requesting access to the transport
+ *
+ * This prevents mixing the payload of separate requests, and prevents
+ * transport connects from colliding with writes. No congestion control
+ * is provided.
*/
-static inline struct rpc_xprt *
-xprt_from_sock(struct sock *sk)
+int xprt_reserve_xprt(struct rpc_task *task)
{
- return (struct rpc_xprt *) sk->sk_user_data;
+ struct rpc_xprt *xprt = task->tk_xprt;
+ struct rpc_rqst *req = task->tk_rqstp;
+
+ if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) {
+ if (task == xprt->snd_task)
+ return 1;
+ if (task == NULL)
+ return 0;
+ goto out_sleep;
+ }
+ xprt->snd_task = task;
+ if (req) {
+ req->rq_bytes_sent = 0;
+ req->rq_ntrans++;
+ }
+ return 1;
+
+out_sleep:
+ dprintk("RPC: %4d failed to lock transport %p\n",
+ task->tk_pid, xprt);
+ task->tk_timeout = 0;
+ task->tk_status = -EAGAIN;
+ if (req && req->rq_ntrans)
+ rpc_sleep_on(&xprt->resend, task, NULL, NULL);
+ else
+ rpc_sleep_on(&xprt->sending, task, NULL, NULL);
+ return 0;
}
/*
- * Serialize write access to sockets, in order to prevent different
- * requests from interfering with each other.
- * Also prevents TCP socket connects from colliding with writes.
+ * xprt_reserve_xprt_cong - serialize write access to transports
+ * @task: task that is requesting access to the transport
+ *
+ * Same as xprt_reserve_xprt, but Van Jacobson congestion control is
+ * integrated into the decision of whether a request is allowed to be
+ * woken up and given access to the transport.
*/
-static int
-__xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task)
+int xprt_reserve_xprt_cong(struct rpc_task *task)
{
+ struct rpc_xprt *xprt = task->tk_xprt;
struct rpc_rqst *req = task->tk_rqstp;
- if (test_and_set_bit(XPRT_LOCKED, &xprt->sockstate)) {
+ if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) {
if (task == xprt->snd_task)
return 1;
goto out_sleep;
}
- if (xprt->nocong || __xprt_get_cong(xprt, task)) {
+ if (__xprt_get_cong(xprt, task)) {
xprt->snd_task = task;
if (req) {
req->rq_bytes_sent = 0;
@@ -156,10 +146,10 @@ __xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task)
return 1;
}
smp_mb__before_clear_bit();
- clear_bit(XPRT_LOCKED, &xprt->sockstate);
+ clear_bit(XPRT_LOCKED, &xprt->state);
smp_mb__after_clear_bit();
out_sleep:
- dprintk("RPC: %4d failed to lock socket %p\n", task->tk_pid, xprt);
+ dprintk("RPC: %4d failed to lock transport %p\n", task->tk_pid, xprt);
task->tk_timeout = 0;
task->tk_status = -EAGAIN;
if (req && req->rq_ntrans)
@@ -169,26 +159,52 @@ out_sleep:
return 0;
}
-static inline int
-xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task)
+static inline int xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task)
{
int retval;
- spin_lock_bh(&xprt->sock_lock);
- retval = __xprt_lock_write(xprt, task);
- spin_unlock_bh(&xprt->sock_lock);
+ spin_lock_bh(&xprt->transport_lock);
+ retval = xprt->ops->reserve_xprt(task);
+ spin_unlock_bh(&xprt->transport_lock);
return retval;
}
+static void __xprt_lock_write_next(struct rpc_xprt *xprt)
+{
+ struct rpc_task *task;
+ struct rpc_rqst *req;
-static void
-__xprt_lock_write_next(struct rpc_xprt *xprt)
+ if (test_and_set_bit(XPRT_LOCKED, &xprt->state))
+ return;
+
+ task = rpc_wake_up_next(&xprt->resend);
+ if (!task) {
+ task = rpc_wake_up_next(&xprt->sending);
+ if (!task)
+ goto out_unlock;
+ }
+
+ req = task->tk_rqstp;
+ xprt->snd_task = task;
+ if (req) {
+ req->rq_bytes_sent = 0;
+ req->rq_ntrans++;
+ }
+ return;
+
+out_unlock:
+ smp_mb__before_clear_bit();
+ clear_bit(XPRT_LOCKED, &xprt->state);
+ smp_mb__after_clear_bit();
+}
+
+static void __xprt_lock_write_next_cong(struct rpc_xprt *xprt)
{
struct rpc_task *task;
- if (test_and_set_bit(XPRT_LOCKED, &xprt->sockstate))
+ if (test_and_set_bit(XPRT_LOCKED, &xprt->state))
return;
- if (!xprt->nocong && RPCXPRT_CONGESTED(xprt))
+ if (RPCXPRT_CONGESTED(xprt))
goto out_unlock;
task = rpc_wake_up_next(&xprt->resend);
if (!task) {
@@ -196,7 +212,7 @@ __xprt_lock_write_next(struct rpc_xprt *xprt)
if (!task)
goto out_unlock;
}
- if (xprt->nocong || __xprt_get_cong(xprt, task)) {
+ if (__xprt_get_cong(xprt, task)) {
struct rpc_rqst *req = task->tk_rqstp;
xprt->snd_task = task;
if (req) {
@@ -207,87 +223,52 @@ __xprt_lock_write_next(struct rpc_xprt *xprt)
}
out_unlock:
smp_mb__before_clear_bit();
- clear_bit(XPRT_LOCKED, &xprt->sockstate);
+ clear_bit(XPRT_LOCKED, &xprt->state);
smp_mb__after_clear_bit();
}
-/*
- * Releases the socket for use by other requests.
+/**
+ * xprt_release_xprt - allow other requests to use a transport
+ * @xprt: transport with other tasks potentially waiting
+ * @task: task that is releasing access to the transport
+ *
+ * Note that "task" can be NULL. No congestion control is provided.
*/
-static void
-__xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task)
+void xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task)
{
if (xprt->snd_task == task) {
xprt->snd_task = NULL;
smp_mb__before_clear_bit();
- clear_bit(XPRT_LOCKED, &xprt->sockstate);
+ clear_bit(XPRT_LOCKED, &xprt->state);
smp_mb__after_clear_bit();
__xprt_lock_write_next(xprt);
}
}
-static inline void
-xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task)
-{
- spin_lock_bh(&xprt->sock_lock);
- __xprt_release_write(xprt, task);
- spin_unlock_bh(&xprt->sock_lock);
-}
-
-/*
- * Write data to socket.
+/**
+ * xprt_release_xprt_cong - allow other requests to use a transport
+ * @xprt: transport with other tasks potentially waiting
+ * @task: task that is releasing access to the transport
+ *
+ * Note that "task" can be NULL. Another task is awoken to use the
+ * transport if the transport's congestion window allows it.
*/
-static inline int
-xprt_sendmsg(struct rpc_xprt *xprt, struct rpc_rqst *req)
+void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task)
{
- struct socket *sock = xprt->sock;
- struct xdr_buf *xdr = &req->rq_snd_buf;
- struct sockaddr *addr = NULL;
- int addrlen = 0;
- unsigned int skip;
- int result;
-
- if (!sock)
- return -ENOTCONN;
-
- xprt_pktdump("packet data:",
- req->rq_svec->iov_base,
- req->rq_svec->iov_len);
-
- /* For UDP, we need to provide an address */
- if (!xprt->stream) {
- addr = (struct sockaddr *) &xprt->addr;
- addrlen = sizeof(xprt->addr);
+ if (xprt->snd_task == task) {
+ xprt->snd_task = NULL;
+ smp_mb__before_clear_bit();
+ clear_bit(XPRT_LOCKED, &xprt->state);
+ smp_mb__after_clear_bit();
+ __xprt_lock_write_next_cong(xprt);
}
- /* Dont repeat bytes */
- skip = req->rq_bytes_sent;
-
- clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags);
- result = xdr_sendpages(sock, addr, addrlen, xdr, skip, MSG_DONTWAIT);
-
- dprintk("RPC: xprt_sendmsg(%d) = %d\n", xdr->len - skip, result);
-
- if (result >= 0)
- return result;
+}
- switch (result) {
- case -ECONNREFUSED:
- /* When the server has died, an ICMP port unreachable message
- * prompts ECONNREFUSED.
- */
- case -EAGAIN:
- break;
- case -ECONNRESET:
- case -ENOTCONN:
- case -EPIPE:
- /* connection broken */
- if (xprt->stream)
- result = -ENOTCONN;
- break;
- default:
- printk(KERN_NOTICE "RPC: sendmsg returned error %d\n", -result);
- }
- return result;
+static inline void xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task)
+{
+ spin_lock_bh(&xprt->transport_lock);
+ xprt->ops->release_xprt(xprt, task);
+ spin_unlock_bh(&xprt->transport_lock);
}
/*
@@ -321,26 +302,40 @@ __xprt_put_cong(struct rpc_xprt *xprt, struct rpc_rqst *req)
return;
req->rq_cong = 0;
xprt->cong -= RPC_CWNDSCALE;
- __xprt_lock_write_next(xprt);
+ __xprt_lock_write_next_cong(xprt);
}
-/*
- * Adjust RPC congestion window
+/**
+ * xprt_release_rqst_cong - housekeeping when request is complete
+ * @task: RPC request that recently completed
+ *
+ * Useful for transports that require congestion control.
+ */
+void xprt_release_rqst_cong(struct rpc_task *task)
+{
+ __xprt_put_cong(task->tk_xprt, task->tk_rqstp);
+}
+
+/**
+ * xprt_adjust_cwnd - adjust transport congestion window
+ * @task: recently completed RPC request used to adjust window
+ * @result: result code of completed RPC request
+ *
* We use a time-smoothed congestion estimator to avoid heavy oscillation.
*/
-static void
-xprt_adjust_cwnd(struct rpc_xprt *xprt, int result)
+void xprt_adjust_cwnd(struct rpc_task *task, int result)
{
- unsigned long cwnd;
+ struct rpc_rqst *req = task->tk_rqstp;
+ struct rpc_xprt *xprt = task->tk_xprt;
+ unsigned long cwnd = xprt->cwnd;
- cwnd = xprt->cwnd;
if (result >= 0 && cwnd <= xprt->cong) {
/* The (cwnd >> 1) term makes sure
* the result gets rounded properly. */
cwnd += (RPC_CWNDSCALE * RPC_CWNDSCALE + (cwnd >> 1)) / cwnd;
if (cwnd > RPC_MAXCWND(xprt))
cwnd = RPC_MAXCWND(xprt);
- __xprt_lock_write_next(xprt);
+ __xprt_lock_write_next_cong(xprt);
} else if (result == -ETIMEDOUT) {
cwnd >>= 1;
if (cwnd < RPC_CWNDSCALE)
@@ -349,11 +344,89 @@ xprt_adjust_cwnd(struct rpc_xprt *xprt, int result)
dprintk("RPC: cong %ld, cwnd was %ld, now %ld\n",
xprt->cong, xprt->cwnd, cwnd);
xprt->cwnd = cwnd;
+ __xprt_put_cong(xprt, req);
+}
+
+/**
+ * xprt_wake_pending_tasks - wake all tasks on a transport's pending queue
+ * @xprt: transport with waiting tasks
+ * @status: result code to plant in each task before waking it
+ *
+ */
+void xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status)
+{
+ if (status < 0)
+ rpc_wake_up_status(&xprt->pending, status);
+ else
+ rpc_wake_up(&xprt->pending);
+}
+
+/**
+ * xprt_wait_for_buffer_space - wait for transport output buffer to clear
+ * @task: task to be put to sleep
+ *
+ */
+void xprt_wait_for_buffer_space(struct rpc_task *task)
+{
+ struct rpc_rqst *req = task->tk_rqstp;
+ struct rpc_xprt *xprt = req->rq_xprt;
+
+ task->tk_timeout = req->rq_timeout;
+ rpc_sleep_on(&xprt->pending, task, NULL, NULL);
+}
+
+/**
+ * xprt_write_space - wake the task waiting for transport output buffer space
+ * @xprt: transport with waiting tasks
+ *
+ * Can be called in a soft IRQ context, so xprt_write_space never sleeps.
+ */
+void xprt_write_space(struct rpc_xprt *xprt)
+{
+ if (unlikely(xprt->shutdown))
+ return;
+
+ spin_lock_bh(&xprt->transport_lock);
+ if (xprt->snd_task) {
+ dprintk("RPC: write space: waking waiting task on xprt %p\n",
+ xprt);
+ rpc_wake_up_task(xprt->snd_task);
+ }
+ spin_unlock_bh(&xprt->transport_lock);
+}
+
+/**
+ * xprt_set_retrans_timeout_def - set a request's retransmit timeout
+ * @task: task whose timeout is to be set
+ *
+ * Set a request's retransmit timeout based on the transport's
+ * default timeout parameters. Used by transports that don't adjust
+ * the retransmit timeout based on round-trip time estimation.
+ */
+void xprt_set_retrans_timeout_def(struct rpc_task *task)
+{
+ task->tk_timeout = task->tk_rqstp->rq_timeout;
}
/*
- * Reset the major timeout value
+ * xprt_set_retrans_timeout_rtt - set a request's retransmit timeout
+ * @task: task whose timeout is to be set
+ *
+ * Set a request's retransmit timeout using the RTT estimator.
*/
+void xprt_set_retrans_timeout_rtt(struct rpc_task *task)
+{
+ int timer = task->tk_msg.rpc_proc->p_timer;
+ struct rpc_rtt *rtt = task->tk_client->cl_rtt;
+ struct rpc_rqst *req = task->tk_rqstp;
+ unsigned long max_timeout = req->rq_xprt->timeout.to_maxval;
+
+ task->tk_timeout = rpc_calc_rto(rtt, timer);
+ task->tk_timeout <<= rpc_ntimeo(rtt, timer) + req->rq_retries;
+ if (task->tk_timeout > max_timeout || task->tk_timeout == 0)
+ task->tk_timeout = max_timeout;
+}
+
static void xprt_reset_majortimeo(struct rpc_rqst *req)
{
struct rpc_timeout *to = &req->rq_xprt->timeout;
@@ -368,8 +441,10 @@ static void xprt_reset_majortimeo(struct rpc_rqst *req)
req->rq_majortimeo += jiffies;
}
-/*
- * Adjust timeout values etc for next retransmit
+/**
+ * xprt_adjust_timeout - adjust timeout values for next retransmit
+ * @req: RPC request containing parameters to use for the adjustment
+ *
*/
int xprt_adjust_timeout(struct rpc_rqst *req)
{
@@ -391,9 +466,9 @@ int xprt_adjust_timeout(struct rpc_rqst *req)
req->rq_retries = 0;
xprt_reset_majortimeo(req);
/* Reset the RTT counters == "slow start" */
- spin_lock_bh(&xprt->sock_lock);
+ spin_lock_bh(&xprt->transport_lock);
rpc_init_rtt(req->rq_task->tk_client->cl_rtt, to->to_initval);
- spin_unlock_bh(&xprt->sock_lock);
+ spin_unlock_bh(&xprt->transport_lock);
pprintk("RPC: %lu timeout\n", jiffies);
status = -ETIMEDOUT;
}
@@ -405,133 +480,52 @@ int xprt_adjust_timeout(struct rpc_rqst *req)
return status;
}
-/*
- * Close down a transport socket
- */
-static void
-xprt_close(struct rpc_xprt *xprt)
-{
- struct socket *sock = xprt->sock;
- struct sock *sk = xprt->inet;
-
- if (!sk)
- return;
-
- write_lock_bh(&sk->sk_callback_lock);
- xprt->inet = NULL;
- xprt->sock = NULL;
-
- sk->sk_user_data = NULL;
- sk->sk_data_ready = xprt->old_data_ready;
- sk->sk_state_change = xprt->old_state_change;
- sk->sk_write_space = xprt->old_write_space;
- write_unlock_bh(&sk->sk_callback_lock);
-
- sk->sk_no_check = 0;
-
- sock_release(sock);
-}
-
-static void
-xprt_socket_autoclose(void *args)
+static void xprt_autoclose(void *args)
{
struct rpc_xprt *xprt = (struct rpc_xprt *)args;
xprt_disconnect(xprt);
- xprt_close(xprt);
+ xprt->ops->close(xprt);
xprt_release_write(xprt, NULL);
}
-/*
- * Mark a transport as disconnected
+/**
+ * xprt_disconnect - mark a transport as disconnected
+ * @xprt: transport to flag for disconnect
+ *
*/
-static void
-xprt_disconnect(struct rpc_xprt *xprt)
+void xprt_disconnect(struct rpc_xprt *xprt)
{
dprintk("RPC: disconnected transport %p\n", xprt);
- spin_lock_bh(&xprt->sock_lock);
+ spin_lock_bh(&xprt->transport_lock);
xprt_clear_connected(xprt);
- rpc_wake_up_status(&xprt->pending, -ENOTCONN);
- spin_unlock_bh(&xprt->sock_lock);
+ xprt_wake_pending_tasks(xprt, -ENOTCONN);
+ spin_unlock_bh(&xprt->transport_lock);
}
-/*
- * Used to allow disconnection when we've been idle
- */
static void
xprt_init_autodisconnect(unsigned long data)
{
struct rpc_xprt *xprt = (struct rpc_xprt *)data;
- spin_lock(&xprt->sock_lock);
+ spin_lock(&xprt->transport_lock);
if (!list_empty(&xprt->recv) || xprt->shutdown)
goto out_abort;
- if (test_and_set_bit(XPRT_LOCKED, &xprt->sockstate))
+ if (test_and_set_bit(XPRT_LOCKED, &xprt->state))
goto out_abort;
- spin_unlock(&xprt->sock_lock);
- /* Let keventd close the socket */
- if (test_bit(XPRT_CONNECTING, &xprt->sockstate) != 0)
+ spin_unlock(&xprt->transport_lock);
+ if (xprt_connecting(xprt))
xprt_release_write(xprt, NULL);
else
schedule_work(&xprt->task_cleanup);
return;
out_abort:
- spin_unlock(&xprt->sock_lock);
-}
-
-static void xprt_socket_connect(void *args)
-{
- struct rpc_xprt *xprt = (struct rpc_xprt *)args;
- struct socket *sock = xprt->sock;
- int status = -EIO;
-
- if (xprt->shutdown || xprt->addr.sin_port == 0)
- goto out;
-
- /*
- * Start by resetting any existing state
- */
- xprt_close(xprt);
- sock = xprt_create_socket(xprt, xprt->prot, xprt->resvport);
- if (sock == NULL) {
- /* couldn't create socket or bind to reserved port;
- * this is likely a permanent error, so cause an abort */
- goto out;
- }
- xprt_bind_socket(xprt, sock);
- xprt_sock_setbufsize(xprt);
-
- status = 0;
- if (!xprt->stream)
- goto out;
-
- /*
- * Tell the socket layer to start connecting...
- */
- status = sock->ops->connect(sock, (struct sockaddr *) &xprt->addr,
- sizeof(xprt->addr), O_NONBLOCK);
- dprintk("RPC: %p connect status %d connected %d sock state %d\n",
- xprt, -status, xprt_connected(xprt), sock->sk->sk_state);
- if (status < 0) {
- switch (status) {
- case -EINPROGRESS:
- case -EALREADY:
- goto out_clear;
- }
- }
-out:
- if (status < 0)
- rpc_wake_up_status(&xprt->pending, status);
- else
- rpc_wake_up(&xprt->pending);
-out_clear:
- smp_mb__before_clear_bit();
- clear_bit(XPRT_CONNECTING, &xprt->sockstate);
- smp_mb__after_clear_bit();
+ spin_unlock(&xprt->transport_lock);
}
-/*
- * Attempt to connect a TCP socket.
+/**
+ * xprt_connect - schedule a transport connect operation
+ * @task: RPC task that is requesting the connect
*
*/
void xprt_connect(struct rpc_task *task)
@@ -552,37 +546,19 @@ void xprt_connect(struct rpc_task *task)
if (!xprt_lock_write(xprt, task))
return;
if (xprt_connected(xprt))
- goto out_write;
+ xprt_release_write(xprt, task);
+ else {
+ if (task->tk_rqstp)
+ task->tk_rqstp->rq_bytes_sent = 0;
- if (task->tk_rqstp)
- task->tk_rqstp->rq_bytes_sent = 0;
-
- task->tk_timeout = RPC_CONNECT_TIMEOUT;
- rpc_sleep_on(&xprt->pending, task, xprt_connect_status, NULL);
- if (!test_and_set_bit(XPRT_CONNECTING, &xprt->sockstate)) {
- /* Note: if we are here due to a dropped connection
- * we delay reconnecting by RPC_REESTABLISH_TIMEOUT/HZ
- * seconds
- */
- if (xprt->sock != NULL)
- schedule_delayed_work(&xprt->sock_connect,
- RPC_REESTABLISH_TIMEOUT);
- else {
- schedule_work(&xprt->sock_connect);
- if (!RPC_IS_ASYNC(task))
- flush_scheduled_work();
- }
+ task->tk_timeout = xprt->connect_timeout;
+ rpc_sleep_on(&xprt->pending, task, xprt_connect_status, NULL);
+ xprt->ops->connect(task);
}
return;
- out_write:
- xprt_release_write(xprt, task);
}
-/*
- * We arrive here when awoken from waiting on connection establishment.
- */
-static void
-xprt_connect_status(struct rpc_task *task)
+static void xprt_connect_status(struct rpc_task *task)
{
struct rpc_xprt *xprt = task->tk_xprt;
@@ -592,31 +568,42 @@ xprt_connect_status(struct rpc_task *task)
return;
}
- /* if soft mounted, just cause this RPC to fail */
- if (RPC_IS_SOFT(task))
- task->tk_status = -EIO;
-
switch (task->tk_status) {
case -ECONNREFUSED:
case -ECONNRESET:
+ dprintk("RPC: %4d xprt_connect_status: server %s refused connection\n",
+ task->tk_pid, task->tk_client->cl_server);
+ break;
case -ENOTCONN:
- return;
+ dprintk("RPC: %4d xprt_connect_status: connection broken\n",
+ task->tk_pid);
+ break;
case -ETIMEDOUT:
- dprintk("RPC: %4d xprt_connect_status: timed out\n",
+ dprintk("RPC: %4d xprt_connect_status: connect attempt timed out\n",
task->tk_pid);
break;
default:
- printk(KERN_ERR "RPC: error %d connecting to server %s\n",
- -task->tk_status, task->tk_client->cl_server);
+ dprintk("RPC: %4d xprt_connect_status: error %d connecting to server %s\n",
+ task->tk_pid, -task->tk_status, task->tk_client->cl_server);
+ xprt_release_write(xprt, task);
+ task->tk_status = -EIO;
+ return;
+ }
+
+ /* if soft mounted, just cause this RPC to fail */
+ if (RPC_IS_SOFT(task)) {
+ xprt_release_write(xprt, task);
+ task->tk_status = -EIO;
}
- xprt_release_write(xprt, task);
}
-/*
- * Look up the RPC request corresponding to a reply, and then lock it.
+/**
+ * xprt_lookup_rqst - find an RPC request corresponding to an XID
+ * @xprt: transport on which the original request was transmitted
+ * @xid: RPC XID of incoming reply
+ *
*/
-static inline struct rpc_rqst *
-xprt_lookup_rqst(struct rpc_xprt *xprt, u32 xid)
+struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, u32 xid)
{
struct list_head *pos;
struct rpc_rqst *req = NULL;
@@ -631,556 +618,68 @@ xprt_lookup_rqst(struct rpc_xprt *xprt, u32 xid)
return req;
}
-/*
- * Complete reply received.
- * The TCP code relies on us to remove the request from xprt->pending.
- */
-static void
-xprt_complete_rqst(struct rpc_xprt *xprt, struct rpc_rqst *req, int copied)
-{
- struct rpc_task *task = req->rq_task;
- struct rpc_clnt *clnt = task->tk_client;
-
- /* Adjust congestion window */
- if (!xprt->nocong) {
- unsigned timer = task->tk_msg.rpc_proc->p_timer;
- xprt_adjust_cwnd(xprt, copied);
- __xprt_put_cong(xprt, req);
- if (timer) {
- if (req->rq_ntrans == 1)
- rpc_update_rtt(clnt->cl_rtt, timer,
- (long)jiffies - req->rq_xtime);
- rpc_set_timeo(clnt->cl_rtt, timer, req->rq_ntrans - 1);
- }
- }
-
-#ifdef RPC_PROFILE
- /* Profile only reads for now */
- if (copied > 1024) {
- static unsigned long nextstat;
- static unsigned long pkt_rtt, pkt_len, pkt_cnt;
-
- pkt_cnt++;
- pkt_len += req->rq_slen + copied;
- pkt_rtt += jiffies - req->rq_xtime;
- if (time_before(nextstat, jiffies)) {
- printk("RPC: %lu %ld cwnd\n", jiffies, xprt->cwnd);
- printk("RPC: %ld %ld %ld %ld stat\n",
- jiffies, pkt_cnt, pkt_len, pkt_rtt);
- pkt_rtt = pkt_len = pkt_cnt = 0;
- nextstat = jiffies + 5 * HZ;
- }
- }
-#endif
-
- dprintk("RPC: %4d has input (%d bytes)\n", task->tk_pid, copied);
- list_del_init(&req->rq_list);
- req->rq_received = req->rq_private_buf.len = copied;
-
- /* ... and wake up the process. */
- rpc_wake_up_task(task);
- return;
-}
-
-static size_t
-skb_read_bits(skb_reader_t *desc, void *to, size_t len)
-{
- if (len > desc->count)
- len = desc->count;
- if (skb_copy_bits(desc->skb, desc->offset, to, len))
- return 0;
- desc->count -= len;
- desc->offset += len;
- return len;
-}
-
-static size_t
-skb_read_and_csum_bits(skb_reader_t *desc, void *to, size_t len)
-{
- unsigned int csum2, pos;
-
- if (len > desc->count)
- len = desc->count;
- pos = desc->offset;
- csum2 = skb_copy_and_csum_bits(desc->skb, pos, to, len, 0);
- desc->csum = csum_block_add(desc->csum, csum2, pos);
- desc->count -= len;
- desc->offset += len;
- return len;
-}
-
-/*
- * We have set things up such that we perform the checksum of the UDP
- * packet in parallel with the copies into the RPC client iovec. -DaveM
- */
-int
-csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb)
-{
- skb_reader_t desc;
-
- desc.skb = skb;
- desc.offset = sizeof(struct udphdr);
- desc.count = skb->len - desc.offset;
-
- if (skb->ip_summed == CHECKSUM_UNNECESSARY)
- goto no_checksum;
-
- desc.csum = csum_partial(skb->data, desc.offset, skb->csum);
- if (xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_and_csum_bits) < 0)
- return -1;
- if (desc.offset != skb->len) {
- unsigned int csum2;
- csum2 = skb_checksum(skb, desc.offset, skb->len - desc.offset, 0);
- desc.csum = csum_block_add(desc.csum, csum2, desc.offset);
- }
- if (desc.count)
- return -1;
- if ((unsigned short)csum_fold(desc.csum))
- return -1;
- return 0;
-no_checksum:
- if (xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_bits) < 0)
- return -1;
- if (desc.count)
- return -1;
- return 0;
-}
-
-/*
- * Input handler for RPC replies. Called from a bottom half and hence
- * atomic.
- */
-static void
-udp_data_ready(struct sock *sk, int len)
-{
- struct rpc_task *task;
- struct rpc_xprt *xprt;
- struct rpc_rqst *rovr;
- struct sk_buff *skb;
- int err, repsize, copied;
- u32 _xid, *xp;
-
- read_lock(&sk->sk_callback_lock);
- dprintk("RPC: udp_data_ready...\n");
- if (!(xprt = xprt_from_sock(sk))) {
- printk("RPC: udp_data_ready request not found!\n");
- goto out;
- }
-
- dprintk("RPC: udp_data_ready client %p\n", xprt);
-
- if ((skb = skb_recv_datagram(sk, 0, 1, &err)) == NULL)
- goto out;
-
- if (xprt->shutdown)
- goto dropit;
-
- repsize = skb->len - sizeof(struct udphdr);
- if (repsize < 4) {
- printk("RPC: impossible RPC reply size %d!\n", repsize);
- goto dropit;
- }
-
- /* Copy the XID from the skb... */
- xp = skb_header_pointer(skb, sizeof(struct udphdr),
- sizeof(_xid), &_xid);
- if (xp == NULL)
- goto dropit;
-
- /* Look up and lock the request corresponding to the given XID */
- spin_lock(&xprt->sock_lock);
- rovr = xprt_lookup_rqst(xprt, *xp);
- if (!rovr)
- goto out_unlock;
- task = rovr->rq_task;
-
- dprintk("RPC: %4d received reply\n", task->tk_pid);
-
- if ((copied = rovr->rq_private_buf.buflen) > repsize)
- copied = repsize;
-
- /* Suck it into the iovec, verify checksum if not done by hw. */
- if (csum_partial_copy_to_xdr(&rovr->rq_private_buf, skb))
- goto out_unlock;
-
- /* Something worked... */
- dst_confirm(skb->dst);
-
- xprt_complete_rqst(xprt, rovr, copied);
-
- out_unlock:
- spin_unlock(&xprt->sock_lock);
- dropit:
- skb_free_datagram(sk, skb);
- out:
- read_unlock(&sk->sk_callback_lock);
-}
-
-/*
- * Copy from an skb into memory and shrink the skb.
- */
-static inline size_t
-tcp_copy_data(skb_reader_t *desc, void *p, size_t len)
-{
- if (len > desc->count)
- len = desc->count;
- if (skb_copy_bits(desc->skb, desc->offset, p, len)) {
- dprintk("RPC: failed to copy %zu bytes from skb. %zu bytes remain\n",
- len, desc->count);
- return 0;
- }
- desc->offset += len;
- desc->count -= len;
- dprintk("RPC: copied %zu bytes from skb. %zu bytes remain\n",
- len, desc->count);
- return len;
-}
-
-/*
- * TCP read fragment marker
- */
-static inline void
-tcp_read_fraghdr(struct rpc_xprt *xprt, skb_reader_t *desc)
-{
- size_t len, used;
- char *p;
-
- p = ((char *) &xprt->tcp_recm) + xprt->tcp_offset;
- len = sizeof(xprt->tcp_recm) - xprt->tcp_offset;
- used = tcp_copy_data(desc, p, len);
- xprt->tcp_offset += used;
- if (used != len)
- return;
- xprt->tcp_reclen = ntohl(xprt->tcp_recm);
- if (xprt->tcp_reclen & 0x80000000)
- xprt->tcp_flags |= XPRT_LAST_FRAG;
- else
- xprt->tcp_flags &= ~XPRT_LAST_FRAG;
- xprt->tcp_reclen &= 0x7fffffff;
- xprt->tcp_flags &= ~XPRT_COPY_RECM;
- xprt->tcp_offset = 0;
- /* Sanity check of the record length */
- if (xprt->tcp_reclen < 4) {
- printk(KERN_ERR "RPC: Invalid TCP record fragment length\n");
- xprt_disconnect(xprt);
- }
- dprintk("RPC: reading TCP record fragment of length %d\n",
- xprt->tcp_reclen);
-}
-
-static void
-tcp_check_recm(struct rpc_xprt *xprt)
-{
- dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u, tcp_flags = %lx\n",
- xprt, xprt->tcp_copied, xprt->tcp_offset, xprt->tcp_reclen, xprt->tcp_flags);
- if (xprt->tcp_offset == xprt->tcp_reclen) {
- xprt->tcp_flags |= XPRT_COPY_RECM;
- xprt->tcp_offset = 0;
- if (xprt->tcp_flags & XPRT_LAST_FRAG) {
- xprt->tcp_flags &= ~XPRT_COPY_DATA;
- xprt->tcp_flags |= XPRT_COPY_XID;
- xprt->tcp_copied = 0;
- }
- }
-}
-
-/*
- * TCP read xid
- */
-static inline void
-tcp_read_xid(struct rpc_xprt *xprt, skb_reader_t *desc)
-{
- size_t len, used;
- char *p;
-
- len = sizeof(xprt->tcp_xid) - xprt->tcp_offset;
- dprintk("RPC: reading XID (%Zu bytes)\n", len);
- p = ((char *) &xprt->tcp_xid) + xprt->tcp_offset;
- used = tcp_copy_data(desc, p, len);
- xprt->tcp_offset += used;
- if (used != len)
- return;
- xprt->tcp_flags &= ~XPRT_COPY_XID;
- xprt->tcp_flags |= XPRT_COPY_DATA;
- xprt->tcp_copied = 4;
- dprintk("RPC: reading reply for XID %08x\n",
- ntohl(xprt->tcp_xid));
- tcp_check_recm(xprt);
-}
-
-/*
- * TCP read and complete request
- */
-static inline void
-tcp_read_request(struct rpc_xprt *xprt, skb_reader_t *desc)
-{
- struct rpc_rqst *req;
- struct xdr_buf *rcvbuf;
- size_t len;
- ssize_t r;
-
- /* Find and lock the request corresponding to this xid */
- spin_lock(&xprt->sock_lock);
- req = xprt_lookup_rqst(xprt, xprt->tcp_xid);
- if (!req) {
- xprt->tcp_flags &= ~XPRT_COPY_DATA;
- dprintk("RPC: XID %08x request not found!\n",
- ntohl(xprt->tcp_xid));
- spin_unlock(&xprt->sock_lock);
- return;
- }
-
- rcvbuf = &req->rq_private_buf;
- len = desc->coun