diff options
Diffstat (limited to 'fs/ocfs2/cluster/tcp.c')
| -rw-r--r-- | fs/ocfs2/cluster/tcp.c | 498 |
1 files changed, 290 insertions, 208 deletions
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index 9aa426e4212..681691bc233 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -59,6 +59,7 @@ #include <linux/idr.h> #include <linux/kref.h> #include <linux/net.h> +#include <linux/export.h> #include <net/tcp.h> #include <asm/uaccess.h> @@ -107,7 +108,7 @@ static struct rb_root o2net_handler_tree = RB_ROOT; static struct o2net_node o2net_nodes[O2NM_MAX_NODES]; /* XXX someday we'll need better accounting */ -static struct socket *o2net_listen_sock = NULL; +static struct socket *o2net_listen_sock; /* * listen work is only queued by the listening socket callbacks on the @@ -136,7 +137,7 @@ static int o2net_sys_err_translations[O2NET_ERR_MAX] = static void o2net_sc_connect_completed(struct work_struct *work); static void o2net_rx_until_empty(struct work_struct *work); static void o2net_shutdown_sc(struct work_struct *work); -static void o2net_listen_data_ready(struct sock *sk, int bytes); +static void o2net_listen_data_ready(struct sock *sk); static void o2net_sc_send_keep_req(struct work_struct *work); static void o2net_idle_timer(unsigned long data); static void o2net_sc_postpone_idle(struct o2net_sock_container *sc); @@ -153,74 +154,125 @@ static void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype, nst->st_node = node; } -static void o2net_set_nst_sock_time(struct o2net_send_tracking *nst) +static inline void o2net_set_nst_sock_time(struct o2net_send_tracking *nst) { - do_gettimeofday(&nst->st_sock_time); + nst->st_sock_time = ktime_get(); } -static void o2net_set_nst_send_time(struct o2net_send_tracking *nst) +static inline void o2net_set_nst_send_time(struct o2net_send_tracking *nst) { - do_gettimeofday(&nst->st_send_time); + nst->st_send_time = ktime_get(); } -static void o2net_set_nst_status_time(struct o2net_send_tracking *nst) +static inline void o2net_set_nst_status_time(struct o2net_send_tracking *nst) { - do_gettimeofday(&nst->st_status_time); + nst->st_status_time = ktime_get(); } -static void o2net_set_nst_sock_container(struct o2net_send_tracking *nst, - struct o2net_sock_container *sc) +static inline void o2net_set_nst_sock_container(struct o2net_send_tracking *nst, + struct o2net_sock_container *sc) { nst->st_sc = sc; } -static void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, u32 msg_id) +static inline void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, + u32 msg_id) { nst->st_id = msg_id; } -#else /* CONFIG_DEBUG_FS */ - -static inline void o2net_init_nst(struct o2net_send_tracking *nst, u32 msgtype, - u32 msgkey, struct task_struct *task, u8 node) +static inline void o2net_set_sock_timer(struct o2net_sock_container *sc) { + sc->sc_tv_timer = ktime_get(); } -static inline void o2net_set_nst_sock_time(struct o2net_send_tracking *nst) +static inline void o2net_set_data_ready_time(struct o2net_sock_container *sc) { + sc->sc_tv_data_ready = ktime_get(); } -static inline void o2net_set_nst_send_time(struct o2net_send_tracking *nst) +static inline void o2net_set_advance_start_time(struct o2net_sock_container *sc) { + sc->sc_tv_advance_start = ktime_get(); } -static inline void o2net_set_nst_status_time(struct o2net_send_tracking *nst) +static inline void o2net_set_advance_stop_time(struct o2net_sock_container *sc) { + sc->sc_tv_advance_stop = ktime_get(); } -static inline void o2net_set_nst_sock_container(struct o2net_send_tracking *nst, - struct o2net_sock_container *sc) +static inline void o2net_set_func_start_time(struct o2net_sock_container *sc) { + sc->sc_tv_func_start = ktime_get(); } -static inline void o2net_set_nst_msg_id(struct o2net_send_tracking *nst, - u32 msg_id) +static inline void o2net_set_func_stop_time(struct o2net_sock_container *sc) { + sc->sc_tv_func_stop = ktime_get(); } +#else /* CONFIG_DEBUG_FS */ +# define o2net_init_nst(a, b, c, d, e) +# define o2net_set_nst_sock_time(a) +# define o2net_set_nst_send_time(a) +# define o2net_set_nst_status_time(a) +# define o2net_set_nst_sock_container(a, b) +# define o2net_set_nst_msg_id(a, b) +# define o2net_set_sock_timer(a) +# define o2net_set_data_ready_time(a) +# define o2net_set_advance_start_time(a) +# define o2net_set_advance_stop_time(a) +# define o2net_set_func_start_time(a) +# define o2net_set_func_stop_time(a) #endif /* CONFIG_DEBUG_FS */ -static inline int o2net_reconnect_delay(void) +#ifdef CONFIG_OCFS2_FS_STATS +static ktime_t o2net_get_func_run_time(struct o2net_sock_container *sc) +{ + return ktime_sub(sc->sc_tv_func_stop, sc->sc_tv_func_start); +} + +static void o2net_update_send_stats(struct o2net_send_tracking *nst, + struct o2net_sock_container *sc) +{ + sc->sc_tv_status_total = ktime_add(sc->sc_tv_status_total, + ktime_sub(ktime_get(), + nst->st_status_time)); + sc->sc_tv_send_total = ktime_add(sc->sc_tv_send_total, + ktime_sub(nst->st_status_time, + nst->st_send_time)); + sc->sc_tv_acquiry_total = ktime_add(sc->sc_tv_acquiry_total, + ktime_sub(nst->st_send_time, + nst->st_sock_time)); + sc->sc_send_count++; +} + +static void o2net_update_recv_stats(struct o2net_sock_container *sc) +{ + sc->sc_tv_process_total = ktime_add(sc->sc_tv_process_total, + o2net_get_func_run_time(sc)); + sc->sc_recv_count++; +} + +#else + +# define o2net_update_send_stats(a, b) + +# define o2net_update_recv_stats(sc) + +#endif /* CONFIG_OCFS2_FS_STATS */ + +static inline unsigned int o2net_reconnect_delay(void) { return o2nm_single_cluster->cl_reconnect_delay_ms; } -static inline int o2net_keepalive_delay(void) +static inline unsigned int o2net_keepalive_delay(void) { return o2nm_single_cluster->cl_keepalive_delay_ms; } -static inline int o2net_idle_timeout(void) +static inline unsigned int o2net_idle_timeout(void) { return o2nm_single_cluster->cl_idle_timeout_ms; } @@ -252,28 +304,22 @@ static u8 o2net_num_from_nn(struct o2net_node *nn) static int o2net_prep_nsw(struct o2net_node *nn, struct o2net_status_wait *nsw) { - int ret = 0; - - do { - if (!idr_pre_get(&nn->nn_status_idr, GFP_ATOMIC)) { - ret = -EAGAIN; - break; - } - spin_lock(&nn->nn_lock); - ret = idr_get_new(&nn->nn_status_idr, nsw, &nsw->ns_id); - if (ret == 0) - list_add_tail(&nsw->ns_node_item, - &nn->nn_status_list); - spin_unlock(&nn->nn_lock); - } while (ret == -EAGAIN); + int ret; - if (ret == 0) { - init_waitqueue_head(&nsw->ns_wq); - nsw->ns_sys_status = O2NET_ERR_NONE; - nsw->ns_status = 0; + spin_lock(&nn->nn_lock); + ret = idr_alloc(&nn->nn_status_idr, nsw, 0, 0, GFP_ATOMIC); + if (ret >= 0) { + nsw->ns_id = ret; + list_add_tail(&nsw->ns_node_item, &nn->nn_status_list); } + spin_unlock(&nn->nn_lock); + if (ret < 0) + return ret; - return ret; + init_waitqueue_head(&nsw->ns_wq); + nsw->ns_sys_status = O2NET_ERR_NONE; + nsw->ns_status = 0; + return 0; } static void o2net_complete_nsw_locked(struct o2net_node *nn, @@ -355,10 +401,14 @@ static void sc_kref_release(struct kref *kref) sc->sc_sock = NULL; } + o2nm_undepend_item(&sc->sc_node->nd_item); o2nm_node_put(sc->sc_node); sc->sc_node = NULL; o2net_debug_del_sc(sc); + + if (sc->sc_page) + __free_page(sc->sc_page); kfree(sc); } @@ -376,6 +426,7 @@ static struct o2net_sock_container *sc_alloc(struct o2nm_node *node) { struct o2net_sock_container *sc, *ret = NULL; struct page *page = NULL; + int status = 0; page = alloc_page(GFP_NOFS); sc = kzalloc(sizeof(*sc), GFP_NOFS); @@ -386,6 +437,13 @@ static struct o2net_sock_container *sc_alloc(struct o2nm_node *node) o2nm_node_get(node); sc->sc_node = node; + /* pin the node item of the remote node */ + status = o2nm_depend_item(&node->nd_item); + if (status) { + mlog_errno(status); + o2nm_node_put(node); + goto out; + } INIT_WORK(&sc->sc_connect_work, o2net_sc_connect_completed); INIT_WORK(&sc->sc_rx_work, o2net_rx_until_empty); INIT_WORK(&sc->sc_shutdown_work, o2net_shutdown_sc); @@ -485,8 +543,9 @@ static void o2net_set_nn_state(struct o2net_node *nn, } if (was_valid && !valid) { - printk(KERN_NOTICE "o2net: no longer connected to " - SC_NODEF_FMT "\n", SC_NODEF_ARGS(old_sc)); + if (old_sc) + printk(KERN_NOTICE "o2net: No longer connected to " + SC_NODEF_FMT "\n", SC_NODEF_ARGS(old_sc)); o2net_complete_nodes_nsw(nn); } @@ -495,7 +554,7 @@ static void o2net_set_nn_state(struct o2net_node *nn, cancel_delayed_work(&nn->nn_connect_expired); printk(KERN_NOTICE "o2net: %s " SC_NODEF_FMT "\n", o2nm_this_node() > sc->sc_node->nd_num ? - "connected to" : "accepted connection from", + "Connected to" : "Accepted connection from", SC_NODEF_ARGS(sc)); } @@ -505,7 +564,7 @@ static void o2net_set_nn_state(struct o2net_node *nn, * the work queue actually being up. */ if (!valid && o2net_wq) { unsigned long delay; - /* delay if we're withing a RECONNECT_DELAY of the + /* delay if we're within a RECONNECT_DELAY of the * last attempt */ delay = (nn->nn_last_connect_attempt + msecs_to_jiffies(o2net_reconnect_delay())) @@ -538,15 +597,15 @@ static void o2net_set_nn_state(struct o2net_node *nn, } /* see o2net_register_callbacks() */ -static void o2net_data_ready(struct sock *sk, int bytes) +static void o2net_data_ready(struct sock *sk) { - void (*ready)(struct sock *sk, int bytes); + void (*ready)(struct sock *sk); read_lock(&sk->sk_callback_lock); if (sk->sk_user_data) { struct o2net_sock_container *sc = sk->sk_user_data; sclog(sc, "data_ready hit\n"); - do_gettimeofday(&sc->sc_tv_data_ready); + o2net_set_data_ready_time(sc); o2net_sc_queue_work(sc, &sc->sc_rx_work); ready = sc->sc_data_ready; } else { @@ -554,7 +613,7 @@ static void o2net_data_ready(struct sock *sk, int bytes) } read_unlock(&sk->sk_callback_lock); - ready(sk, bytes); + ready(sk); } /* see o2net_register_callbacks() */ @@ -575,19 +634,19 @@ static void o2net_state_change(struct sock *sk) state_change = sc->sc_state_change; switch(sk->sk_state) { - /* ignore connecting sockets as they make progress */ - case TCP_SYN_SENT: - case TCP_SYN_RECV: - break; - case TCP_ESTABLISHED: - o2net_sc_queue_work(sc, &sc->sc_connect_work); - break; - default: - printk(KERN_INFO "o2net: connection to " SC_NODEF_FMT - " shutdown, state %d\n", - SC_NODEF_ARGS(sc), sk->sk_state); - o2net_sc_queue_work(sc, &sc->sc_shutdown_work); - break; + /* ignore connecting sockets as they make progress */ + case TCP_SYN_SENT: + case TCP_SYN_RECV: + break; + case TCP_ESTABLISHED: + o2net_sc_queue_work(sc, &sc->sc_connect_work); + break; + default: + printk(KERN_INFO "o2net: Connection to " SC_NODEF_FMT + " shutdown, state %d\n", + SC_NODEF_ARGS(sc), sk->sk_state); + o2net_sc_queue_work(sc, &sc->sc_shutdown_work); + break; } out: read_unlock(&sk->sk_callback_lock); @@ -707,32 +766,32 @@ static struct o2net_msg_handler * o2net_handler_tree_lookup(u32 msg_type, u32 key, struct rb_node ***ret_p, struct rb_node **ret_parent) { - struct rb_node **p = &o2net_handler_tree.rb_node; - struct rb_node *parent = NULL; + struct rb_node **p = &o2net_handler_tree.rb_node; + struct rb_node *parent = NULL; struct o2net_msg_handler *nmh, *ret = NULL; int cmp; - while (*p) { - parent = *p; - nmh = rb_entry(parent, struct o2net_msg_handler, nh_node); + while (*p) { + parent = *p; + nmh = rb_entry(parent, struct o2net_msg_handler, nh_node); cmp = o2net_handler_cmp(nmh, msg_type, key); - if (cmp < 0) - p = &(*p)->rb_left; - else if (cmp > 0) - p = &(*p)->rb_right; - else { + if (cmp < 0) + p = &(*p)->rb_left; + else if (cmp > 0) + p = &(*p)->rb_right; + else { ret = nmh; - break; + break; } - } + } - if (ret_p != NULL) - *ret_p = p; - if (ret_parent != NULL) - *ret_parent = parent; + if (ret_p != NULL) + *ret_p = p; + if (ret_parent != NULL) + *ret_parent = parent; - return ret; + return ret; } static void o2net_handler_kref_release(struct kref *kref) @@ -809,7 +868,7 @@ int o2net_register_handler(u32 msg_type, u32 key, u32 max_len, /* we've had some trouble with handlers seemingly vanishing. */ mlog_bug_on_msg(o2net_handler_tree_lookup(msg_type, key, &p, &parent) == NULL, - "couldn't find handler we *just* registerd " + "couldn't find handler we *just* registered " "for type %u key %08x\n", msg_type, key); } write_unlock(&o2net_handler_lock); @@ -857,57 +916,30 @@ static struct o2net_msg_handler *o2net_handler_get(u32 msg_type, u32 key) static int o2net_recv_tcp_msg(struct socket *sock, void *data, size_t len) { - int ret; - mm_segment_t oldfs; - struct kvec vec = { - .iov_len = len, - .iov_base = data, - }; - struct msghdr msg = { - .msg_iovlen = 1, - .msg_iov = (struct iovec *)&vec, - .msg_flags = MSG_DONTWAIT, - }; - - oldfs = get_fs(); - set_fs(get_ds()); - ret = sock_recvmsg(sock, &msg, len, msg.msg_flags); - set_fs(oldfs); - - return ret; + struct kvec vec = { .iov_len = len, .iov_base = data, }; + struct msghdr msg = { .msg_flags = MSG_DONTWAIT, }; + return kernel_recvmsg(sock, &msg, &vec, 1, len, msg.msg_flags); } static int o2net_send_tcp_msg(struct socket *sock, struct kvec *vec, size_t veclen, size_t total) { int ret; - mm_segment_t oldfs; - struct msghdr msg = { - .msg_iov = (struct iovec *)vec, - .msg_iovlen = veclen, - }; + struct msghdr msg; if (sock == NULL) { ret = -EINVAL; goto out; } - oldfs = get_fs(); - set_fs(get_ds()); - ret = sock_sendmsg(sock, &msg, total); - set_fs(oldfs); - if (ret != total) { - mlog(ML_ERROR, "sendmsg returned %d instead of %zu\n", ret, - total); - if (ret >= 0) - ret = -EPIPE; /* should be smarter, I bet */ - goto out; - } - - ret = 0; + ret = kernel_sendmsg(sock, &msg, vec, veclen, total); + if (likely(ret == total)) + return 0; + mlog(ML_ERROR, "sendmsg returned %d instead of %zu\n", ret, total); + if (ret >= 0) + ret = -EPIPE; /* should be smarter, I bet */ out: - if (ret < 0) - mlog(0, "returning error: %d\n", ret); + mlog(0, "returning error: %d\n", ret); return ret; } @@ -974,6 +1006,25 @@ static int o2net_tx_can_proceed(struct o2net_node *nn, return ret; } +/* Get a map of all nodes to which this node is currently connected to */ +void o2net_fill_node_map(unsigned long *map, unsigned bytes) +{ + struct o2net_sock_container *sc; + int node, ret; + + BUG_ON(bytes < (BITS_TO_LONGS(O2NM_MAX_NODES) * sizeof(unsigned long))); + + memset(map, 0, bytes); + for (node = 0; node < O2NM_MAX_NODES; ++node) { + o2net_tx_can_proceed(o2net_nn_from_num(node), &sc, &ret); + if (!ret) { + set_bit(node, map); + sc_put(sc); + } + } +} +EXPORT_SYMBOL_GPL(o2net_fill_node_map); + int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec, size_t caller_veclen, u8 target_node, int *status) { @@ -1070,6 +1121,8 @@ int o2net_send_message_vec(u32 msg_type, u32 key, struct kvec *caller_vec, o2net_set_nst_status_time(&nst); wait_event(nsw.ns_wq, o2net_nsw_completed(nn, &nsw)); + o2net_update_send_stats(&nst, sc); + /* Note that we avoid overwriting the callers status return * variable if a system error was reported on the other * side. Callers beware. */ @@ -1083,10 +1136,8 @@ out: o2net_debug_del_nst(&nst); /* must be before dropping sc and node */ if (sc) sc_put(sc); - if (vec) - kfree(vec); - if (msg) - kfree(msg); + kfree(vec); + kfree(msg); o2net_complete_nsw(nn, &nsw, 0, 0, 0); return ret; } @@ -1183,13 +1234,15 @@ static int o2net_process_message(struct o2net_sock_container *sc, if (syserr != O2NET_ERR_NONE) goto out_respond; - do_gettimeofday(&sc->sc_tv_func_start); + o2net_set_func_start_time(sc); sc->sc_msg_key = be32_to_cpu(hdr->key); sc->sc_msg_type = be16_to_cpu(hdr->msg_type); handler_status = (nmh->nh_func)(hdr, sizeof(struct o2net_msg) + be16_to_cpu(hdr->data_len), nmh->nh_func_data, &ret_data); - do_gettimeofday(&sc->sc_tv_func_stop); + o2net_set_func_stop_time(sc); + + o2net_update_recv_stats(sc); out_respond: /* this destroys the hdr, so don't use it after this */ @@ -1220,11 +1273,11 @@ static int o2net_check_handshake(struct o2net_sock_container *sc) struct o2net_node *nn = o2net_nn_from_num(sc->sc_node->nd_num); if (hand->protocol_version != cpu_to_be64(O2NET_PROTOCOL_VERSION)) { - mlog(ML_NOTICE, SC_NODEF_FMT " advertised net protocol " - "version %llu but %llu is required, disconnecting\n", - SC_NODEF_ARGS(sc), - (unsigned long long)be64_to_cpu(hand->protocol_version), - O2NET_PROTOCOL_VERSION); + printk(KERN_NOTICE "o2net: " SC_NODEF_FMT " Advertised net " + "protocol version %llu but %llu is required. " + "Disconnecting.\n", SC_NODEF_ARGS(sc), + (unsigned long long)be64_to_cpu(hand->protocol_version), + O2NET_PROTOCOL_VERSION); /* don't bother reconnecting if its the wrong version. */ o2net_ensure_shutdown(nn, sc, -ENOTCONN); @@ -1238,33 +1291,33 @@ static int o2net_check_handshake(struct o2net_sock_container *sc) */ if (be32_to_cpu(hand->o2net_idle_timeout_ms) != o2net_idle_timeout()) { - mlog(ML_NOTICE, SC_NODEF_FMT " uses a network idle timeout of " - "%u ms, but we use %u ms locally. disconnecting\n", - SC_NODEF_ARGS(sc), - be32_to_cpu(hand->o2net_idle_timeout_ms), - o2net_idle_timeout()); + printk(KERN_NOTICE "o2net: " SC_NODEF_FMT " uses a network " + "idle timeout of %u ms, but we use %u ms locally. " + "Disconnecting.\n", SC_NODEF_ARGS(sc), + be32_to_cpu(hand->o2net_idle_timeout_ms), + o2net_idle_timeout()); o2net_ensure_shutdown(nn, sc, -ENOTCONN); return -1; } if (be32_to_cpu(hand->o2net_keepalive_delay_ms) != o2net_keepalive_delay()) { - mlog(ML_NOTICE, SC_NODEF_FMT " uses a keepalive delay of " - "%u ms, but we use %u ms locally. disconnecting\n", - SC_NODEF_ARGS(sc), - be32_to_cpu(hand->o2net_keepalive_delay_ms), - o2net_keepalive_delay()); + printk(KERN_NOTICE "o2net: " SC_NODEF_FMT " uses a keepalive " + "delay of %u ms, but we use %u ms locally. " + "Disconnecting.\n", SC_NODEF_ARGS(sc), + be32_to_cpu(hand->o2net_keepalive_delay_ms), + o2net_keepalive_delay()); o2net_ensure_shutdown(nn, sc, -ENOTCONN); return -1; } if (be32_to_cpu(hand->o2hb_heartbeat_timeout_ms) != O2HB_MAX_WRITE_TIMEOUT_MS) { - mlog(ML_NOTICE, SC_NODEF_FMT " uses a heartbeat timeout of " - "%u ms, but we use %u ms locally. disconnecting\n", - SC_NODEF_ARGS(sc), - be32_to_cpu(hand->o2hb_heartbeat_timeout_ms), - O2HB_MAX_WRITE_TIMEOUT_MS); + printk(KERN_NOTICE "o2net: " SC_NODEF_FMT " uses a heartbeat " + "timeout of %u ms, but we use %u ms locally. " + "Disconnecting.\n", SC_NODEF_ARGS(sc), + be32_to_cpu(hand->o2hb_heartbeat_timeout_ms), + O2HB_MAX_WRITE_TIMEOUT_MS); o2net_ensure_shutdown(nn, sc, -ENOTCONN); return -1; } @@ -1300,7 +1353,7 @@ static int o2net_advance_rx(struct o2net_sock_container *sc) size_t datalen; sclog(sc, "receiving\n"); - do_gettimeofday(&sc->sc_tv_advance_start); + o2net_set_advance_start_time(sc); if (unlikely(sc->sc_handshake_ok == 0)) { if(sc->sc_page_off < sizeof(struct o2net_handshake)) { @@ -1375,7 +1428,7 @@ static int o2net_advance_rx(struct o2net_sock_container *sc) out: sclog(sc, "ret = %d\n", ret); - do_gettimeofday(&sc->sc_tv_advance_stop); + o2net_set_advance_stop_time(sc); return ret; } @@ -1475,27 +1528,16 @@ static void o2net_idle_timer(unsigned long data) { struct o2net_sock_container *sc = (struct o2net_sock_container *)data; struct o2net_node *nn = o2net_nn_from_num(sc->sc_node->nd_num); - struct timeval now; - - do_gettimeofday(&now); +#ifdef CONFIG_DEBUG_FS + unsigned long msecs = ktime_to_ms(ktime_get()) - + ktime_to_ms(sc->sc_tv_timer); +#else + unsigned long msecs = o2net_idle_timeout(); +#endif - printk(KERN_NOTICE "o2net: connection to " SC_NODEF_FMT " has been idle for %u.%u " - "seconds, shutting it down.\n", SC_NODEF_ARGS(sc), - o2net_idle_timeout() / 1000, - o2net_idle_timeout() % 1000); - mlog(ML_NOTICE, "here are some times that might help debug the " - "situation: (tmr %ld.%ld now %ld.%ld dr %ld.%ld adv " - "%ld.%ld:%ld.%ld func (%08x:%u) %ld.%ld:%ld.%ld)\n", - sc->sc_tv_timer.tv_sec, (long) sc->sc_tv_timer.tv_usec, - now.tv_sec, (long) now.tv_usec, - sc->sc_tv_data_ready.tv_sec, (long) sc->sc_tv_data_ready.tv_usec, - sc->sc_tv_advance_start.tv_sec, - (long) sc->sc_tv_advance_start.tv_usec, - sc->sc_tv_advance_stop.tv_sec, - (long) sc->sc_tv_advance_stop.tv_usec, - sc->sc_msg_key, sc->sc_msg_type, - sc->sc_tv_func_start.tv_sec, (long) sc->sc_tv_func_start.tv_usec, - sc->sc_tv_func_stop.tv_sec, (long) sc->sc_tv_func_stop.tv_usec); + printk(KERN_NOTICE "o2net: Connection to " SC_NODEF_FMT " has been " + "idle for %lu.%lu secs, shutting it down.\n", SC_NODEF_ARGS(sc), + msecs / 1000, msecs % 1000); /* * Initialize the nn_timeout so that the next connection attempt @@ -1511,7 +1553,7 @@ static void o2net_sc_reset_idle_timer(struct o2net_sock_container *sc) o2net_sc_cancel_delayed_work(sc, &sc->sc_keepalive_work); o2net_sc_queue_delayed_work(sc, &sc->sc_keepalive_work, msecs_to_jiffies(o2net_keepalive_delay())); - do_gettimeofday(&sc->sc_tv_timer); + o2net_set_sock_timer(sc); mod_timer(&sc->sc_idle_timeout, jiffies + msecs_to_jiffies(o2net_idle_timeout())); } @@ -1627,13 +1669,12 @@ static void o2net_start_connect(struct work_struct *work) ret = 0; out: - if (ret) { - mlog(ML_NOTICE, "connect attempt to " SC_NODEF_FMT " failed " - "with errno %d\n", SC_NODEF_ARGS(sc), ret); + if (ret && sc) { + printk(KERN_NOTICE "o2net: Connect attempt to " SC_NODEF_FMT + " failed with errno %d\n", SC_NODEF_ARGS(sc), ret); /* 0 err so that another will be queued and attempted * from set_nn_state */ - if (sc) - o2net_ensure_shutdown(nn, sc, 0); + o2net_ensure_shutdown(nn, sc, 0); } if (sc) sc_put(sc); @@ -1652,8 +1693,8 @@ static void o2net_connect_expired(struct work_struct *work) spin_lock(&nn->nn_lock); if (!nn->nn_sc_valid) { - mlog(ML_ERROR, "no connection established with node %u after " - "%u.%u seconds, giving up and returning errors.\n", + printk(KERN_NOTICE "o2net: No connection established with " + "node %u after %u.%u seconds, giving up.\n", o2net_num_from_nn(nn), o2net_idle_timeout() / 1000, o2net_idle_timeout() % 1000); @@ -1758,7 +1799,7 @@ int o2net_register_hb_callbacks(void) /* ------------------------------------------------------------ */ -static int o2net_accept_one(struct socket *sock) +static int o2net_accept_one(struct socket *sock, int *more) { int ret, slen; struct sockaddr_in sin; @@ -1769,6 +1810,7 @@ static int o2net_accept_one(struct socket *sock) struct o2net_node *nn; BUG_ON(sock == NULL); + *more = 0; ret = sock_create_lite(sock->sk->sk_family, sock->sk->sk_type, sock->sk->sk_protocol, &new_sock); if (ret) @@ -1780,6 +1822,7 @@ static int o2net_accept_one(struct socket *sock) if (ret < 0) goto out; + *more = 1; new_sock->sk->sk_allocation = GFP_ATOMIC; ret = o2net_set_nodelay(new_sock); @@ -1796,21 +1839,25 @@ static int o2net_accept_one(struct socket *sock) node = o2nm_get_node_by_ip(sin.sin_addr.s_addr); if (node == NULL) { - mlog(ML_NOTICE, "attempt to connect from unknown node at %pI4:%d\n", - &sin.sin_addr.s_addr, ntohs(sin.sin_port)); + printk(KERN_NOTICE "o2net: Attempt to connect from unknown " + "node at %pI4:%d\n", &sin.sin_addr.s_addr, + ntohs(sin.sin_port)); ret = -EINVAL; goto out; } if (o2nm_this_node() >= node->nd_num) { local_node = o2nm_get_node_by_num(o2nm_this_node()); - mlog(ML_NOTICE, "unexpected connect attempt seen at node '%s' (" - "%u, %pI4:%d) from node '%s' (%u, %pI4:%d)\n", - local_node->nd_name, local_node->nd_num, - &(local_node->nd_ipv4_address), - ntohs(local_node->nd_ipv4_port), - node->nd_name, node->nd_num, &sin.sin_addr.s_addr, - ntohs(sin.sin_port)); + if (local_node) + printk(KERN_NOTICE "o2net: Unexpected connect attempt " + "seen at node '%s' (%u, %pI4:%d) from " + "node '%s' (%u, %pI4:%d)\n", + local_node->nd_name, local_node->nd_num, + &(local_node->nd_ipv4_address), + ntohs(local_node->nd_ipv4_port), + node->nd_name, + node->nd_num, &sin.sin_addr.s_addr, + ntohs(sin.sin_port)); ret = -EINVAL; goto out; } @@ -1835,10 +1882,10 @@ static int o2net_accept_one(struct socket *sock) ret = 0; spin_unlock(&nn->nn_lock); if (ret) { - mlog(ML_NOTICE, "attempt to connect from node '%s' at " - "%pI4:%d but it already has an open connection\n", - node->nd_name, &sin.sin_addr.s_addr, - ntohs(sin.sin_port)); + printk(KERN_NOTICE "o2net: Attempt to connect from node '%s' " + "at %pI4:%d but it already has an open connection\n", + node->nd_name, &sin.sin_addr.s_addr, + ntohs(sin.sin_port)); goto out; } @@ -1874,16 +1921,41 @@ out: return ret; } +/* + * This function is invoked in response to one or more + * pending accepts at softIRQ level. We must drain the + * entire que before returning. + */ + static void o2net_accept_many(struct work_struct *work) { struct socket *sock = o2net_listen_sock; - while (o2net_accept_one(sock) == 0) + int more; + int err; + + /* + * It is critical to note that due to interrupt moderation + * at the network driver level, we can't assume to get a + * softIRQ for every single conn since tcp SYN packets + * can arrive back-to-back, and therefore many pending + * accepts may result in just 1 softIRQ. If we terminate + * the o2net_accept_one() loop upon seeing an err, what happens + * to the rest of the conns in the queue? If no new SYN + * arrives for hours, no softIRQ will be delivered, + * and the connections will just sit in the queue. + */ + + for (;;) { + err = o2net_accept_one(sock, &more); + if (!more) + break; cond_resched(); + } } -static void o2net_listen_data_ready(struct sock *sk, int bytes) +static void o2net_listen_data_ready(struct sock *sk) { - void (*ready)(struct sock *sk, int bytes); + void (*ready)(struct sock *sk); read_lock(&sk->sk_callback_lock); ready = sk->sk_user_data; @@ -1892,18 +1964,29 @@ static void o2net_listen_data_ready(struct sock *sk, int bytes) goto out; } - /* ->sk_data_ready is also called for a newly established child socket - * before it has been accepted and the acceptor has set up their - * data_ready.. we only want to queue listen work for our listening - * socket */ + /* This callback may called twice when a new connection + * is being established as a child socket inherits everything + * from a parent LISTEN socket, including the data_ready cb of + * the parent. This leads to a hazard. In o2net_accept_one() + * we are still initializing the child socket but have not + * changed the inherited data_ready callback yet when + * data starts arriving. + * We avoid this hazard by checking the state. + * For the listening socket, the state will be TCP_LISTEN; for the new + * socket, will be TCP_ESTABLISHED. Also, in this case, + * sk->sk_user_data is not a valid function pointer. + */ + if (sk->sk_state == TCP_LISTEN) { - mlog(ML_TCP, "bytes: %d\n", bytes); queue_work(o2net_wq, &o2net_listen_work); + } else { + ready = NULL; } out: read_unlock(&sk->sk_callback_lock); - ready(sk, bytes); + if (ready != NULL) + ready(sk); } static int o2net_open_listening_sock(__be32 addr, __be16 port) @@ -1918,7 +2001,7 @@ static int o2net_open_listening_sock(__be32 addr, __be16 port) ret = sock_create(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock); if (ret < 0) { - mlog(ML_ERROR, "unable to create socket, ret=%d\n", ret); + printk(KERN_ERR "o2net: Error %d while creating socket\n", ret); goto out; } @@ -1932,19 +2015,18 @@ static int o2net_open_listening_sock(__be32 addr, __be16 port) o2net_listen_sock = sock; INIT_WORK(&o2net_listen_work, o2net_accept_many); - sock->sk->sk_reuse = 1; + sock->sk->sk_reuse = SK_CAN_REUSE; ret = sock->ops->bind(sock, (struct sockaddr *)&sin, sizeof(sin)); if (ret < 0) { - mlog(ML_ERROR, "unable to bind socket at %pI4:%u, " - "ret=%d\n", &addr, ntohs(port), ret); + printk(KERN_ERR "o2net: Error %d while binding socket at " + "%pI4:%u\n", ret, &addr, ntohs(port)); goto out; } ret = sock->ops->listen(sock, 64); - if (ret < 0) { - mlog(ML_ERROR, "unable to listen on %pI4:%u, ret=%d\n", - &addr, ntohs(port), ret); - } + if (ret < 0) + printk(KERN_ERR "o2net: Error %d while listening on %pI4:%u\n", + ret, &addr, ntohs(port)); out: if (ret) { |
