diff options
Diffstat (limited to 'fs/dlm/lowcomms.c')
| -rw-r--r-- | fs/dlm/lowcomms.c | 533 |
1 files changed, 414 insertions, 119 deletions
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 37a34c2c622..d08e079ea5d 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -52,7 +52,7 @@ #include <linux/mutex.h> #include <linux/sctp.h> #include <linux/slab.h> -#include <net/sctp/user.h> +#include <net/sctp/sctp.h> #include <net/ipv6.h> #include "dlm_internal.h" @@ -63,6 +63,9 @@ #define NEEDED_RMEM (4*1024*1024) #define CONN_HASH_SIZE 32 +/* Number of messages to send before rescheduling */ +#define MAX_SEND_MSG_COUNT 25 + struct cbuf { unsigned int base; unsigned int len; @@ -108,6 +111,7 @@ struct connection { #define CF_INIT_PENDING 4 #define CF_IS_OTHERCON 5 #define CF_CLOSE 6 +#define CF_APP_LIMITED 7 struct list_head writequeue; /* List of outgoing writequeue_entries */ spinlock_t writequeue_lock; int (*rx_action) (struct connection *); /* What to do when active */ @@ -121,6 +125,7 @@ struct connection { struct connection *othercon; struct work_struct rwork; /* Receive workqueue */ struct work_struct swork; /* Send workqueue */ + bool try_new_addr; }; #define sock2con(x) ((struct connection *)(x)->sk_user_data) @@ -135,8 +140,20 @@ struct writequeue_entry { struct connection *con; }; +struct dlm_node_addr { + struct list_head list; + int nodeid; + int addr_count; + int curr_addr_index; + struct sockaddr_storage *addr[DLM_MAX_ADDR_COUNT]; +}; + +static LIST_HEAD(dlm_node_addrs); +static DEFINE_SPINLOCK(dlm_node_addrs_spin); + static struct sockaddr_storage *dlm_local_addr[DLM_MAX_ADDR_COUNT]; static int dlm_local_count; +static int dlm_allow_conn; /* Work queues */ static struct workqueue_struct *recv_workqueue; @@ -161,12 +178,11 @@ static inline int nodeid_hash(int nodeid) static struct connection *__find_con(int nodeid) { int r; - struct hlist_node *h; struct connection *con; r = nodeid_hash(nodeid); - hlist_for_each_entry(con, h, &connection_hash[r], list) { + hlist_for_each_entry(con, &connection_hash[r], list) { if (con->nodeid == nodeid) return con; } @@ -216,13 +232,12 @@ static struct connection *__nodeid2con(int nodeid, gfp_t alloc) static void foreach_conn(void (*conn_func)(struct connection *c)) { int i; - struct hlist_node *h, *n; + struct hlist_node *n; struct connection *con; for (i = 0; i < CONN_HASH_SIZE; i++) { - hlist_for_each_entry_safe(con, h, n, &connection_hash[i], list){ + hlist_for_each_entry_safe(con, n, &connection_hash[i], list) conn_func(con); - } } } @@ -241,13 +256,12 @@ static struct connection *nodeid2con(int nodeid, gfp_t allocation) static struct connection *assoc2con(int assoc_id) { int i; - struct hlist_node *h; struct connection *con; mutex_lock(&connections_lock); for (i = 0 ; i < CONN_HASH_SIZE; i++) { - hlist_for_each_entry(con, h, &connection_hash[i], list) { + hlist_for_each_entry(con, &connection_hash[i], list) { if (con->sctp_assoc == assoc_id) { mutex_unlock(&connections_lock); return con; @@ -258,33 +272,159 @@ static struct connection *assoc2con(int assoc_id) return NULL; } -static int nodeid_to_addr(int nodeid, struct sockaddr *retaddr) +static struct dlm_node_addr *find_node_addr(int nodeid) +{ + struct dlm_node_addr *na; + + list_for_each_entry(na, &dlm_node_addrs, list) { + if (na->nodeid == nodeid) + return na; + } + return NULL; +} + +static int addr_compare(struct sockaddr_storage *x, struct sockaddr_storage *y) +{ + switch (x->ss_family) { + case AF_INET: { + struct sockaddr_in *sinx = (struct sockaddr_in *)x; + struct sockaddr_in *siny = (struct sockaddr_in *)y; + if (sinx->sin_addr.s_addr != siny->sin_addr.s_addr) + return 0; + if (sinx->sin_port != siny->sin_port) + return 0; + break; + } + case AF_INET6: { + struct sockaddr_in6 *sinx = (struct sockaddr_in6 *)x; + struct sockaddr_in6 *siny = (struct sockaddr_in6 *)y; + if (!ipv6_addr_equal(&sinx->sin6_addr, &siny->sin6_addr)) + return 0; + if (sinx->sin6_port != siny->sin6_port) + return 0; + break; + } + default: + return 0; + } + return 1; +} + +static int nodeid_to_addr(int nodeid, struct sockaddr_storage *sas_out, + struct sockaddr *sa_out, bool try_new_addr) { - struct sockaddr_storage addr; - int error; + struct sockaddr_storage sas; + struct dlm_node_addr *na; if (!dlm_local_count) return -1; - error = dlm_nodeid_to_addr(nodeid, &addr); - if (error) - return error; + spin_lock(&dlm_node_addrs_spin); + na = find_node_addr(nodeid); + if (na && na->addr_count) { + if (try_new_addr) { + na->curr_addr_index++; + if (na->curr_addr_index == na->addr_count) + na->curr_addr_index = 0; + } + + memcpy(&sas, na->addr[na->curr_addr_index ], + sizeof(struct sockaddr_storage)); + } + spin_unlock(&dlm_node_addrs_spin); + + if (!na) + return -EEXIST; + + if (!na->addr_count) + return -ENOENT; + + if (sas_out) + memcpy(sas_out, &sas, sizeof(struct sockaddr_storage)); + + if (!sa_out) + return 0; if (dlm_local_addr[0]->ss_family == AF_INET) { - struct sockaddr_in *in4 = (struct sockaddr_in *) &addr; - struct sockaddr_in *ret4 = (struct sockaddr_in *) retaddr; + struct sockaddr_in *in4 = (struct sockaddr_in *) &sas; + struct sockaddr_in *ret4 = (struct sockaddr_in *) sa_out; ret4->sin_addr.s_addr = in4->sin_addr.s_addr; } else { - struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) &addr; - struct sockaddr_in6 *ret6 = (struct sockaddr_in6 *) retaddr; - ipv6_addr_copy(&ret6->sin6_addr, &in6->sin6_addr); + struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) &sas; + struct sockaddr_in6 *ret6 = (struct sockaddr_in6 *) sa_out; + ret6->sin6_addr = in6->sin6_addr; + } + + return 0; +} + +static int addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid) +{ + struct dlm_node_addr *na; + int rv = -EEXIST; + int addr_i; + + spin_lock(&dlm_node_addrs_spin); + list_for_each_entry(na, &dlm_node_addrs, list) { + if (!na->addr_count) + continue; + + for (addr_i = 0; addr_i < na->addr_count; addr_i++) { + if (addr_compare(na->addr[addr_i], addr)) { + *nodeid = na->nodeid; + rv = 0; + goto unlock; + } + } + } +unlock: + spin_unlock(&dlm_node_addrs_spin); + return rv; +} + +int dlm_lowcomms_addr(int nodeid, struct sockaddr_storage *addr, int len) +{ + struct sockaddr_storage *new_addr; + struct dlm_node_addr *new_node, *na; + + new_node = kzalloc(sizeof(struct dlm_node_addr), GFP_NOFS); + if (!new_node) + return -ENOMEM; + + new_addr = kzalloc(sizeof(struct sockaddr_storage), GFP_NOFS); + if (!new_addr) { + kfree(new_node); + return -ENOMEM; + } + + memcpy(new_addr, addr, len); + + spin_lock(&dlm_node_addrs_spin); + na = find_node_addr(nodeid); + if (!na) { + new_node->nodeid = nodeid; + new_node->addr[0] = new_addr; + new_node->addr_count = 1; + list_add(&new_node->list, &dlm_node_addrs); + spin_unlock(&dlm_node_addrs_spin); + return 0; + } + + if (na->addr_count >= DLM_MAX_ADDR_COUNT) { + spin_unlock(&dlm_node_addrs_spin); + kfree(new_addr); + kfree(new_node); + return -ENOSPC; } + na->addr[na->addr_count++] = new_addr; + spin_unlock(&dlm_node_addrs_spin); + kfree(new_node); return 0; } /* Data available on socket or listen socket received a connect */ -static void lowcomms_data_ready(struct sock *sk, int count_unused) +static void lowcomms_data_ready(struct sock *sk) { struct connection *con = sock2con(sk); if (con && !test_and_set_bit(CF_READ_PENDING, &con->flags)) @@ -295,7 +435,17 @@ static void lowcomms_write_space(struct sock *sk) { struct connection *con = sock2con(sk); - if (con && !test_and_set_bit(CF_WRITE_PENDING, &con->flags)) + if (!con) + return; + + clear_bit(SOCK_NOSPACE, &con->sock->flags); + + if (test_and_clear_bit(CF_APP_LIMITED, &con->flags)) { + con->sock->sk->sk_write_pending--; + clear_bit(SOCK_ASYNC_NOSPACE, &con->sock->flags); + } + + if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags)) queue_work(send_workqueue, &con->swork); } @@ -332,7 +482,7 @@ int dlm_lowcomms_connect_node(int nodeid) } /* Make a socket active */ -static int add_sock(struct socket *sock, struct connection *con) +static void add_sock(struct socket *sock, struct connection *con) { con->sock = sock; @@ -342,7 +492,6 @@ static int add_sock(struct socket *sock, struct connection *con) con->sock->sk->sk_state_change = lowcomms_state_change; con->sock->sk->sk_user_data = con; con->sock->sk->sk_allocation = GFP_NOFS; - return 0; } /* Add the port number to an IPv6 or 4 sockaddr and return the address @@ -424,8 +573,23 @@ static void sctp_send_shutdown(sctp_assoc_t associd) static void sctp_init_failed_foreach(struct connection *con) { + + /* + * Don't try to recover base con and handle race where the + * other node's assoc init creates a assoc and we get that + * notification, then we get a notification that our attempt + * failed due. This happens when we are still trying the primary + * address, but the other node has already tried secondary addrs + * and found one that worked. + */ + if (!con->nodeid || con->sctp_assoc) + return; + + log_print("Retrying SCTP association init for node %d\n", con->nodeid); + + con->try_new_addr = true; con->sctp_assoc = 0; - if (test_and_clear_bit(CF_CONNECT_PENDING, &con->flags)) { + if (test_and_clear_bit(CF_INIT_PENDING, &con->flags)) { if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags)) queue_work(send_workqueue, &con->swork); } @@ -442,15 +606,62 @@ static void sctp_init_failed(void) mutex_unlock(&connections_lock); } +static void retry_failed_sctp_send(struct connection *recv_con, + struct sctp_send_failed *sn_send_failed, + char *buf) +{ + int len = sn_send_failed->ssf_length - sizeof(struct sctp_send_failed); + struct dlm_mhandle *mh; + struct connection *con; + char *retry_buf; + int nodeid = sn_send_failed->ssf_info.sinfo_ppid; + + log_print("Retry sending %d bytes to node id %d", len, nodeid); + + if (!nodeid) { + log_print("Shouldn't resend data via listening connection."); + return; + } + + con = nodeid2con(nodeid, 0); + if (!con) { + log_print("Could not look up con for nodeid %d\n", + nodeid); + return; + } + + mh = dlm_lowcomms_get_buffer(nodeid, len, GFP_NOFS, &retry_buf); + if (!mh) { + log_print("Could not allocate buf for retry."); + return; + } + memcpy(retry_buf, buf + sizeof(struct sctp_send_failed), len); + dlm_lowcomms_commit_buffer(mh); + + /* + * If we got a assoc changed event before the send failed event then + * we only need to retry the send. + */ + if (con->sctp_assoc) { + if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags)) + queue_work(send_workqueue, &con->swork); + } else + sctp_init_failed_foreach(con); +} + /* Something happened to an association */ static void process_sctp_notification(struct connection *con, struct msghdr *msg, char *buf) { union sctp_notification *sn = (union sctp_notification *)buf; + struct linger linger; - if (sn->sn_header.sn_type == SCTP_ASSOC_CHANGE) { + switch (sn->sn_header.sn_type) { + case SCTP_SEND_FAILED: + retry_failed_sctp_send(con, &sn->sn_send_failed, buf); + break; + case SCTP_ASSOC_CHANGE: switch (sn->sn_assoc_change.sac_state) { - case SCTP_COMM_UP: case SCTP_RESTART: { @@ -460,9 +671,6 @@ static void process_sctp_notification(struct connection *con, int prim_len, ret; int addr_len; struct connection *new_con; - sctp_peeloff_arg_t parg; - int parglen = sizeof(parg); - int err; /* * We get this before any data for an association. @@ -497,13 +705,11 @@ static void process_sctp_notification(struct connection *con, return; } make_sockaddr(&prim.ssp_addr, 0, &addr_len); - if (dlm_addr_to_nodeid(&prim.ssp_addr, &nodeid)) { - int i; + if (addr_to_nodeid(&prim.ssp_addr, &nodeid)) { unsigned char *b=(unsigned char *)&prim.ssp_addr; log_print("reject connect from unknown addr"); - for (i=0; i<sizeof(struct sockaddr_storage);i++) - printk("%02x ", b[i]); - printk("\n"); + print_hex_dump_bytes("ss: ", DUMP_PREFIX_NONE, + b, sizeof(struct sockaddr_storage)); sctp_send_shutdown(prim.ssp_assoc_id); return; } @@ -513,30 +719,35 @@ static void process_sctp_notification(struct connection *con, return; /* Peel off a new sock */ - parg.associd = sn->sn_assoc_change.sac_assoc_id; - ret = kernel_getsockopt(con->sock, IPPROTO_SCTP, - SCTP_SOCKOPT_PEELOFF, - (void *)&parg, &parglen); + lock_sock(con->sock->sk); + ret = sctp_do_peeloff(con->sock->sk, + sn->sn_assoc_change.sac_assoc_id, + &new_con->sock); + release_sock(con->sock->sk); if (ret < 0) { log_print("Can't peel off a socket for " "connection %d to node %d: err=%d", - parg.associd, nodeid, ret); - return; - } - new_con->sock = sockfd_lookup(parg.sd, &err); - if (!new_con->sock) { - log_print("sockfd_lookup error %d", err); + (int)sn->sn_assoc_change.sac_assoc_id, + nodeid, ret); return; } add_sock(new_con->sock, new_con); - sockfd_put(new_con->sock); + + linger.l_onoff = 1; + linger.l_linger = 0; + ret = kernel_setsockopt(new_con->sock, SOL_SOCKET, SO_LINGER, + (char *)&linger, sizeof(linger)); + if (ret < 0) + log_print("set socket option SO_LINGER failed"); log_print("connecting to %d sctp association %d", nodeid, (int)sn->sn_assoc_change.sac_assoc_id); + new_con->sctp_assoc = sn->sn_assoc_change.sac_assoc_id; + new_con->try_new_addr = false; /* Send any pending writes */ clear_bit(CF_CONNECT_PENDING, &new_con->flags); - clear_bit(CF_INIT_PENDING, &con->flags); + clear_bit(CF_INIT_PENDING, &new_con->flags); if (!test_and_set_bit(CF_WRITE_PENDING, &new_con->flags)) { queue_work(send_workqueue, &new_con->swork); } @@ -555,14 +766,10 @@ static void process_sctp_notification(struct connection *con, } break; - /* We don't know which INIT failed, so clear the PENDING flags - * on them all. if assoc_id is zero then it will then try - * again */ - case SCTP_CANT_STR_ASSOC: { + /* Will retry init when we get the send failed notification */ log_print("Can't start SCTP association - retrying"); - sctp_init_failed(); } break; @@ -571,6 +778,8 @@ static void process_sctp_notification(struct connection *con, (int)sn->sn_assoc_change.sac_assoc_id, sn->sn_assoc_change.sac_state); } + default: + ; /* fall through */ } } @@ -704,6 +913,13 @@ static int tcp_accept_from_sock(struct connection *con) struct connection *newcon; struct connection *addcon; + mutex_lock(&connections_lock); + if (!dlm_allow_conn) { + mutex_unlock(&connections_lock); + return -1; + } + mutex_unlock(&connections_lock); + memset(&peeraddr, 0, sizeof(peeraddr)); result = sock_create_kern(dlm_local_addr[0]->ss_family, SOCK_STREAM, IPPROTO_TCP, &newsock); @@ -733,8 +949,11 @@ static int tcp_accept_from_sock(struct connection *con) /* Get the new node's NODEID */ make_sockaddr(&peeraddr, 0, &len); - if (dlm_addr_to_nodeid(&peeraddr, &nodeid)) { + if (addr_to_nodeid(&peeraddr, &nodeid)) { + unsigned char *b=(unsigned char *)&peeraddr; log_print("connect from non cluster node"); + print_hex_dump_bytes("ss: ", DUMP_PREFIX_NONE, + b, sizeof(struct sockaddr_storage)); sock_release(newsock); mutex_unlock(&con->sock_mutex); return -1; @@ -796,7 +1015,7 @@ static int tcp_accept_from_sock(struct connection *con) /* * Add it to the active queue in case we got data - * beween processing the accept adding the socket + * between processing the accept adding the socket * to the read_sockets list */ if (!test_and_set_bit(CF_READ_PENDING, &addcon->flags)) @@ -820,6 +1039,24 @@ static void free_entry(struct writequeue_entry *e) kfree(e); } +/* + * writequeue_entry_complete - try to delete and free write queue entry + * @e: write queue entry to try to delete + * @completed: bytes completed + * + * writequeue_lock must be held. + */ +static void writequeue_entry_complete(struct writequeue_entry *e, int completed) +{ + e->offset += completed; + e->len -= completed; + + if (e->len == 0 && e->users == 0) { + list_del(&e->list); + free_entry(e); + } +} + /* Initiate an SCTP association. This is a special case of send_to_sock() in that we don't yet have a peeled-off socket for this association, so we use the listening socket @@ -839,15 +1076,14 @@ static void sctp_init_assoc(struct connection *con) int addrlen; struct kvec iov[1]; + mutex_lock(&con->sock_mutex); if (test_and_set_bit(CF_INIT_PENDING, &con->flags)) - return; - - if (con->retries++ > MAX_CONNECT_RETRIES) - return; + goto unlock; - if (nodeid_to_addr(con->nodeid, (struct sockaddr *)&rem_addr)) { + if (nodeid_to_addr(con->nodeid, NULL, (struct sockaddr *)&rem_addr, + con->try_new_addr)) { log_print("no address for nodeid %d", con->nodeid); - return; + goto unlock; } base_con = nodeid2con(0, 0); BUG_ON(base_con == NULL); @@ -865,17 +1101,25 @@ static void sctp_init_assoc(struct connection *con) if (list_empty(&con->writequeue)) { spin_unlock(&con->writequeue_lock); log_print("writequeue empty for nodeid %d", con->nodeid); - return; + goto unlock; } e = list_first_entry(&con->writequeue, struct writequeue_entry, list); len = e->len; offset = e->offset; - spin_unlock(&con->writequeue_lock); /* Send the first block off the write queue */ iov[0].iov_base = page_address(e->page)+offset; iov[0].iov_len = len; + spin_unlock(&con->writequeue_lock); + + if (rem_addr.ss_family == AF_INET) { + struct sockaddr_in *sin = (struct sockaddr_in *)&rem_addr; + log_print("Trying to connect to %pI4", &sin->sin_addr.s_addr); + } else { + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&rem_addr; + log_print("Trying to connect to %pI6", &sin6->sin6_addr); + } cmsg = CMSG_FIRSTHDR(&outmessage); cmsg->cmsg_level = IPPROTO_SCTP; @@ -883,8 +1127,9 @@ static void sctp_init_assoc(struct connection *con) cmsg->cmsg_len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo)); sinfo = CMSG_DATA(cmsg); memset(sinfo, 0x00, sizeof(struct sctp_sndrcvinfo)); - sinfo->sinfo_ppid = cpu_to_le32(dlm_our_nodeid()); + sinfo->sinfo_ppid = cpu_to_le32(con->nodeid); outmessage.msg_controllen = cmsg->cmsg_len; + sinfo->sinfo_flags |= SCTP_ADDR_OVER; ret = kernel_sendmsg(base_con->sock, &outmessage, iov, 1, len); if (ret < 0) { @@ -897,24 +1142,22 @@ static void sctp_init_assoc(struct connection *con) } else { spin_lock(&con->writequeue_lock); - e->offset += ret; - e->len -= ret; - - if (e->len == 0 && e->users == 0) { - list_del(&e->list); - free_entry(e); - } + writequeue_entry_complete(e, ret); spin_unlock(&con->writequeue_lock); } + +unlock: + mutex_unlock(&con->sock_mutex); } /* Connect a new socket to its peer */ static void tcp_connect_to_sock(struct connection *con) { - int result = -EHOSTUNREACH; struct sockaddr_storage saddr, src_addr; int addr_len; struct socket *sock = NULL; + int one = 1; + int result; if (con->nodeid == 0) { log_print("attempt to connect sock 0 foiled"); @@ -926,10 +1169,8 @@ static void tcp_connect_to_sock(struct connection *con) goto out; /* Some odd races can cause double-connects, ignore them */ - if (con->sock) { - result = 0; + if (con->sock) goto out; - } /* Create a socket to communicate with */ result = sock_create_kern(dlm_local_addr[0]->ss_family, SOCK_STREAM, @@ -938,8 +1179,11 @@ static void tcp_connect_to_sock(struct connection *con) goto out_err; memset(&saddr, 0, sizeof(saddr)); - if (dlm_nodeid_to_addr(con->nodeid, &saddr)) + result = nodeid_to_addr(con->nodeid, &saddr, NULL, false); + if (result < 0) { + log_print("no address for nodeid %d", con->nodeid); goto out_err; + } sock->sk->sk_user_data = con; con->rx_action = receive_from_sock; @@ -960,8 +1204,12 @@ static void tcp_connect_to_sock(struct connection *con) make_sockaddr(&saddr, dlm_config.ci_tcp_port, &addr_len); log_print("connecting to %d", con->nodeid); - result = - sock->ops->connect(sock, (struct sockaddr *)&saddr, addr_len, + + /* Turn off Nagle's algorithm */ + kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, (char *)&one, + sizeof(one)); + + result = sock->ops->connect(sock, (struct sockaddr *)&saddr, addr_len, O_NONBLOCK); if (result == -EINPROGRESS) result = 0; @@ -979,11 +1227,17 @@ out_err: * Some errors are fatal and this list might need adjusting. For other * errors we try again until the max number of retries is reached. */ - if (result != -EHOSTUNREACH && result != -ENETUNREACH && - result != -ENETDOWN && result != -EINVAL - && result != -EPROTONOSUPPORT) { + if (result != -EHOSTUNREACH && + result != -ENETUNREACH && + result != -ENETDOWN && + result != -EINVAL && + result != -EPROTONOSUPPORT) { + log_print("connect %d try %d error %d", con->nodeid, + con->retries, result); + mutex_unlock(&con->sock_mutex); + msleep(1000); lowcomms_connect_sock(con); - result = 0; + return; } out: mutex_unlock(&con->sock_mutex); @@ -1011,16 +1265,18 @@ static struct socket *tcp_create_listen_sock(struct connection *con, goto create_out; } + /* Turn off Nagle's algorithm */ + kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, (char *)&one, + sizeof(one)); + result = kernel_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, (char *)&one, sizeof(one)); if (result < 0) { log_print("Failed to set SO_REUSEADDR on socket: %d", result); } - sock->sk->sk_user_data = con; con->rx_action = tcp_accept_from_sock; con->connect_action = tcp_connect_to_sock; - con->sock = sock; /* Bind to our port */ make_sockaddr(saddr, dlm_config.ci_tcp_port, &addr_len); @@ -1057,7 +1313,7 @@ static void init_local(void) int i; dlm_local_count = 0; - for (i = 0; i < DLM_MAX_ADDR_COUNT - 1; i++) { + for (i = 0; i < DLM_MAX_ADDR_COUNT; i++) { if (dlm_our_addr(&sas, i)) break; @@ -1102,6 +1358,7 @@ static int sctp_listen_for_all(void) int result = -EINVAL, num = 1, i, addr_len; struct connection *con = nodeid2con(0, GFP_NOFS); int bufsize = NEEDED_RMEM; + int one = 1; if (!con) return -ENOMEM; @@ -1136,6 +1393,11 @@ static int sctp_listen_for_all(void) goto create_delsock; } + result = kernel_setsockopt(sock, SOL_SCTP, SCTP_NODELAY, (char *)&one, + sizeof(one)); + if (result < 0) + log_print("Could not set SCTP NODELAY error %d\n", result); + /* Init con struct */ sock->sk->sk_user_data = con; con->sock = sock; @@ -1230,7 +1492,6 @@ void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc) struct connection *con; struct writequeue_entry *e; int offset = 0; - int users = 0; con = nodeid2con(nodeid, allocation); if (!con) @@ -1244,7 +1505,7 @@ void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc) } else { offset = e->end; e->end += len; - users = e->users++; + e->users++; } spin_unlock(&con->writequeue_lock); @@ -1259,7 +1520,7 @@ void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc) spin_lock(&con->writequeue_lock); offset = e->end; e->end += len; - users = e->users++; + e->users++; list_add_tail(&e->list, &con->writequeue); spin_unlock(&con->writequeue_lock); goto got_one; @@ -1297,6 +1558,7 @@ static void send_to_sock(struct connection *con) const int msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL; struct writequeue_entry *e; int len, offset; + int count = 0; mutex_lock(&con->sock_mutex); if (con->sock == NULL) @@ -1319,24 +1581,29 @@ static void send_to_sock(struct connection *con) ret = kernel_sendpage(con->sock, e->page, offset, len, msg_flags); if (ret == -EAGAIN || ret == 0) { + if (ret == -EAGAIN && + test_bit(SOCK_ASYNC_NOSPACE, &con->sock->flags) && + !test_and_set_bit(CF_APP_LIMITED, &con->flags)) { + /* Notify TCP that we're limited by the + * application window size. + */ + set_bit(SOCK_NOSPACE, &con->sock->flags); + con->sock->sk->sk_write_pending++; + } cond_resched(); goto out; - } - if (ret <= 0) + } else if (ret < 0) goto send_error; } - /* Don't starve people filling buffers */ + + /* Don't starve people filling buffers */ + if (++count >= MAX_SEND_MSG_COUNT) { cond_resched(); + count = 0; + } spin_lock(&con->writequeue_lock); - e->offset += ret; - e->len -= ret; - - if (e->len == 0 && e->users == 0) { - list_del(&e->list); - free_entry(e); - continue; - } + writequeue_entry_complete(e, ret); } spin_unlock(&con->writequeue_lock); out: @@ -1353,7 +1620,6 @@ out_connect: mutex_unlock(&con->sock_mutex); if (!test_bit(CF_INIT_PENDING, &con->flags)) lowcomms_connect_sock(con); - return; } static void clean_one_writequeue(struct connection *con) @@ -1373,6 +1639,7 @@ static void clean_one_writequeue(struct connection *con) int dlm_lowcomms_close(int nodeid) { struct connection *con; + struct dlm_node_addr *na; log_print("closing connection to node %d", nodeid); con = nodeid2con(nodeid, 0); @@ -1387,6 +1654,17 @@ int dlm_lowcomms_close(int nodeid) clean_one_writequeue(con); close_connection(con, true); } + + spin_lock(&dlm_node_addrs_spin); + na = find_node_addr(nodeid); + if (na) { + list_del(&na->list); + while (na->addr_count--) + kfree(na->addr[na->addr_count]); + kfree(na); + } + spin_unlock(&dlm_node_addrs_spin); + return 0; } @@ -1430,20 +1708,19 @@ static void work_stop(void) static int work_start(void) { - int error; - recv_workqueue = create_workqueue("dlm_recv"); - error = IS_ERR(recv_workqueue); - if (error) { - log_print("can't start dlm_recv %d", error); - return error; - } - - send_workqueue = create_singlethread_workqueue("dlm_send"); - error = IS_ERR(send_workqueue); - if (error) { - log_print("can't start dlm_send %d", error); + recv_workqueue = alloc_workqueue("dlm_recv", + WQ_UNBOUND | WQ_MEM_RECLAIM, 1); + if (!recv_workqueue) { + log_print("can't start dlm_recv"); + return -ENOMEM; + } + + send_workqueue = alloc_workqueue("dlm_send", + WQ_UNBOUND | WQ_MEM_RECLAIM, 1); + if (!send_workqueue) { + log_print("can't start dlm_send"); destroy_workqueue(recv_workqueue); - return error; + return -ENOMEM; } return 0; @@ -1471,6 +1748,7 @@ void dlm_lowcomms_stop(void) socket activity. */ mutex_lock(&connections_lock); + dlm_allow_conn = 0; foreach_conn(stop_conn); mutex_unlock(&connections_lock); @@ -1498,7 +1776,7 @@ int dlm_lowcomms_start(void) if (!dlm_local_count) { error = -ENOTCONN; log_print("no local IP address has been set"); - goto out; + goto fail; } error = -ENOMEM; @@ -1506,7 +1784,13 @@ int dlm_lowcomms_start(void) __alignof__(struct connection), 0, NULL); if (!con_cache) - goto out; + goto fail; + + error = work_start(); + if (error) + goto fail_destroy; + + dlm_allow_conn = 1; /* Start listening */ if (dlm_config.ci_protocol == 0) @@ -1516,20 +1800,31 @@ int dlm_lowcomms_start(void) if (error) goto fail_unlisten; - error = work_start(); - if (error) - goto fail_unlisten; - return 0; fail_unlisten: + dlm_allow_conn = 0; con = nodeid2con(0,0); if (con) { close_connection(con, false); kmem_cache_free(con_cache, con); } +fail_destroy: kmem_cache_destroy(con_cache); - -out: +fail: return error; } + +void dlm_lowcomms_exit(void) +{ + struct dlm_node_addr *na, *safe; + + spin_lock(&dlm_node_addrs_spin); + list_for_each_entry_safe(na, safe, &dlm_node_addrs, list) { + list_del(&na->list); + while (na->addr_count--) + kfree(na->addr[na->addr_count]); + kfree(na); + } + spin_unlock(&dlm_node_addrs_spin); +} |
