diff options
Diffstat (limited to 'net/dccp/proto.c')
| -rw-r--r-- | net/dccp/proto.c | 476 |
1 files changed, 275 insertions, 201 deletions
diff --git a/net/dccp/proto.c b/net/dccp/proto.c index e3f5d37b84b..de2c1e71930 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -20,6 +20,7 @@ #include <linux/if_arp.h> #include <linux/init.h> #include <linux/random.h> +#include <linux/slab.h> #include <net/checksum.h> #include <net/inet_sock.h> @@ -27,7 +28,6 @@ #include <net/xfrm.h> #include <asm/ioctls.h> -#include <asm/semaphore.h> #include <linux/spinlock.h> #include <linux/timer.h> #include <linux/delay.h> @@ -41,21 +41,39 @@ DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly; EXPORT_SYMBOL_GPL(dccp_statistics); -atomic_t dccp_orphan_count = ATOMIC_INIT(0); - +struct percpu_counter dccp_orphan_count; EXPORT_SYMBOL_GPL(dccp_orphan_count); -struct inet_hashinfo __cacheline_aligned dccp_hashinfo = { - .lhash_lock = RW_LOCK_UNLOCKED, - .lhash_users = ATOMIC_INIT(0), - .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait), -}; - +struct inet_hashinfo dccp_hashinfo; EXPORT_SYMBOL_GPL(dccp_hashinfo); /* the maximum queue length for tx in packets. 0 is no limit */ int sysctl_dccp_tx_qlen __read_mostly = 5; +#ifdef CONFIG_IP_DCCP_DEBUG +static const char *dccp_state_name(const int state) +{ + static const char *const dccp_state_names[] = { + [DCCP_OPEN] = "OPEN", + [DCCP_REQUESTING] = "REQUESTING", + [DCCP_PARTOPEN] = "PARTOPEN", + [DCCP_LISTEN] = "LISTEN", + [DCCP_RESPOND] = "RESPOND", + [DCCP_CLOSING] = "CLOSING", + [DCCP_ACTIVE_CLOSEREQ] = "CLOSEREQ", + [DCCP_PASSIVE_CLOSE] = "PASSIVE_CLOSE", + [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ", + [DCCP_TIME_WAIT] = "TIME_WAIT", + [DCCP_CLOSED] = "CLOSED", + }; + + if (state >= DCCP_MAX_STATES) + return "INVALID STATE!"; + else + return dccp_state_names[state]; +} +#endif + void dccp_set_state(struct sock *sk, const int state) { const int oldstate = sk->sk_state; @@ -68,6 +86,9 @@ void dccp_set_state(struct sock *sk, const int state) case DCCP_OPEN: if (oldstate != DCCP_OPEN) DCCP_INC_STATS(DCCP_MIB_CURRESTAB); + /* Client retransmits all Confirm options until entering OPEN */ + if (oldstate == DCCP_PARTOPEN) + dccp_feat_list_purge(&dccp_sk(sk)->dccps_featneg); break; case DCCP_CLOSED: @@ -128,7 +149,7 @@ EXPORT_SYMBOL_GPL(dccp_done); const char *dccp_packet_name(const int type) { - static const char *dccp_packet_names[] = { + static const char *const dccp_packet_names[] = { [DCCP_PKT_REQUEST] = "REQUEST", [DCCP_PKT_RESPONSE] = "RESPONSE", [DCCP_PKT_DATA] = "DATA", @@ -149,38 +170,11 @@ const char *dccp_packet_name(const int type) EXPORT_SYMBOL_GPL(dccp_packet_name); -const char *dccp_state_name(const int state) -{ - static char *dccp_state_names[] = { - [DCCP_OPEN] = "OPEN", - [DCCP_REQUESTING] = "REQUESTING", - [DCCP_PARTOPEN] = "PARTOPEN", - [DCCP_LISTEN] = "LISTEN", - [DCCP_RESPOND] = "RESPOND", - [DCCP_CLOSING] = "CLOSING", - [DCCP_ACTIVE_CLOSEREQ] = "CLOSEREQ", - [DCCP_PASSIVE_CLOSE] = "PASSIVE_CLOSE", - [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ", - [DCCP_TIME_WAIT] = "TIME_WAIT", - [DCCP_CLOSED] = "CLOSED", - }; - - if (state >= DCCP_MAX_STATES) - return "INVALID STATE!"; - else - return dccp_state_names[state]; -} - -EXPORT_SYMBOL_GPL(dccp_state_name); - int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized) { struct dccp_sock *dp = dccp_sk(sk); - struct dccp_minisock *dmsk = dccp_msk(sk); struct inet_connection_sock *icsk = inet_csk(sk); - dccp_minisock_init(&dp->dccps_minisock); - icsk->icsk_rto = DCCP_TIMEOUT_INIT; icsk->icsk_syn_retries = sysctl_dccp_request_retries; sk->sk_state = DCCP_CLOSED; @@ -190,58 +184,22 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized) dp->dccps_rate_last = jiffies; dp->dccps_role = DCCP_ROLE_UNDEFINED; dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT; - dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1; + dp->dccps_tx_qlen = sysctl_dccp_tx_qlen; dccp_init_xmit_timers(sk); - /* - * FIXME: We're hardcoding the CCID, and doing this at this point makes - * the listening (master) sock get CCID control blocks, which is not - * necessary, but for now, to not mess with the test userspace apps, - * lets leave it here, later the real solution is to do this in a - * setsockopt(CCIDs-I-want/accept). -acme - */ - if (likely(ctl_sock_initialized)) { - int rc = dccp_feat_init(dmsk); - - if (rc) - return rc; - - if (dmsk->dccpms_send_ack_vector) { - dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL); - if (dp->dccps_hc_rx_ackvec == NULL) - return -ENOMEM; - } - dp->dccps_hc_rx_ccid = ccid_hc_rx_new(dmsk->dccpms_rx_ccid, - sk, GFP_KERNEL); - dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid, - sk, GFP_KERNEL); - if (unlikely(dp->dccps_hc_rx_ccid == NULL || - dp->dccps_hc_tx_ccid == NULL)) { - ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk); - ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk); - if (dmsk->dccpms_send_ack_vector) { - dccp_ackvec_free(dp->dccps_hc_rx_ackvec); - dp->dccps_hc_rx_ackvec = NULL; - } - dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL; - return -ENOMEM; - } - } else { - /* control socket doesn't need feat nego */ - INIT_LIST_HEAD(&dmsk->dccpms_pending); - INIT_LIST_HEAD(&dmsk->dccpms_conf); - } - + INIT_LIST_HEAD(&dp->dccps_featneg); + /* control socket doesn't need feat nego */ + if (likely(ctl_sock_initialized)) + return dccp_feat_init(sk); return 0; } EXPORT_SYMBOL_GPL(dccp_init_sock); -int dccp_destroy_sock(struct sock *sk) +void dccp_destroy_sock(struct sock *sk) { struct dccp_sock *dp = dccp_sk(sk); - struct dccp_minisock *dmsk = dccp_msk(sk); /* * DCCP doesn't use sk_write_queue, just sk_send_head @@ -259,7 +217,7 @@ int dccp_destroy_sock(struct sock *sk) kfree(dp->dccps_service_list); dp->dccps_service_list = NULL; - if (dmsk->dccpms_send_ack_vector) { + if (dp->dccps_hc_rx_ackvec != NULL) { dccp_ackvec_free(dp->dccps_hc_rx_ackvec); dp->dccps_hc_rx_ackvec = NULL; } @@ -268,9 +226,7 @@ int dccp_destroy_sock(struct sock *sk) dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL; /* clean up feature negotiation state */ - dccp_feat_clean(dmsk); - - return 0; + dccp_feat_list_purge(&dp->dccps_featneg); } EXPORT_SYMBOL_GPL(dccp_destroy_sock); @@ -280,6 +236,9 @@ static inline int dccp_listen_start(struct sock *sk, int backlog) struct dccp_sock *dp = dccp_sk(sk); dp->dccps_role = DCCP_ROLE_LISTEN; + /* do not start to listen if feature negotiation setup fails */ + if (dccp_feat_finalise_settings(dp)) + return -EPROTO; return inet_csk_listen_start(sk, backlog); } @@ -312,13 +271,15 @@ int dccp_disconnect(struct sock *sk, int flags) sk->sk_err = ECONNRESET; dccp_clear_xmit_timers(sk); + __skb_queue_purge(&sk->sk_receive_queue); + __skb_queue_purge(&sk->sk_write_queue); if (sk->sk_send_head != NULL) { __kfree_skb(sk->sk_send_head); sk->sk_send_head = NULL; } - inet->dport = 0; + inet->inet_dport = 0; if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK)) inet_reset_saddr(sk); @@ -330,7 +291,7 @@ int dccp_disconnect(struct sock *sk, int flags) inet_csk_delack_init(sk); __sk_dst_reset(sk); - BUG_TRAP(!inet->num || icsk->icsk_bind_hash); + WARN_ON(inet->inet_num && !icsk->icsk_bind_hash); sk->sk_error_report(sk); return err; @@ -351,7 +312,7 @@ unsigned int dccp_poll(struct file *file, struct socket *sock, unsigned int mask; struct sock *sk = sock->sk; - poll_wait(file, sk->sk_sleep, wait); + sock_poll_wait(file, sk_sleep(sk), wait); if (sk->sk_state == DCCP_LISTEN) return inet_csk_listen_poll(sk); @@ -375,7 +336,7 @@ unsigned int dccp_poll(struct file *file, struct socket *sock, mask |= POLLIN | POLLRDNORM; if (!(sk->sk_shutdown & SEND_SHUTDOWN)) { - if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) { + if (sk_stream_is_writeable(sk)) { mask |= POLLOUT | POLLWRNORM; } else { /* send SIGIO later */ set_bit(SOCK_ASYNC_NOSPACE, @@ -386,7 +347,7 @@ unsigned int dccp_poll(struct file *file, struct socket *sock, * wspace test but before the flags are set, * IO signal will be lost. */ - if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) + if (sk_stream_is_writeable(sk)) mask |= POLLOUT | POLLWRNORM; } } @@ -433,7 +394,7 @@ out: EXPORT_SYMBOL_GPL(dccp_ioctl); static int dccp_setsockopt_service(struct sock *sk, const __be32 service, - char __user *optval, int optlen) + char __user *optval, unsigned int optlen) { struct dccp_sock *dp = dccp_sk(sk); struct dccp_service_list *sl = NULL; @@ -467,46 +428,88 @@ static int dccp_setsockopt_service(struct sock *sk, const __be32 service, return 0; } -/* byte 1 is feature. the rest is the preference list */ -static int dccp_setsockopt_change(struct sock *sk, int type, - struct dccp_so_feat __user *optval) +static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx) { - struct dccp_so_feat opt; - u8 *val; - int rc; + u8 *list, len; + int i, rc; - if (copy_from_user(&opt, optval, sizeof(opt))) - return -EFAULT; + if (cscov < 0 || cscov > 15) + return -EINVAL; + /* + * Populate a list of permissible values, in the range cscov...15. This + * is necessary since feature negotiation of single values only works if + * both sides incidentally choose the same value. Since the list starts + * lowest-value first, negotiation will pick the smallest shared value. + */ + if (cscov == 0) + return 0; + len = 16 - cscov; - val = kmalloc(opt.dccpsf_len, GFP_KERNEL); - if (!val) - return -ENOMEM; + list = kmalloc(len, GFP_KERNEL); + if (list == NULL) + return -ENOBUFS; - if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) { - rc = -EFAULT; - goto out_free_val; - } + for (i = 0; i < len; i++) + list[i] = cscov++; - rc = dccp_feat_change(dccp_msk(sk), type, opt.dccpsf_feat, - val, opt.dccpsf_len, GFP_KERNEL); - if (rc) - goto out_free_val; + rc = dccp_feat_register_sp(sk, DCCPF_MIN_CSUM_COVER, rx, list, len); -out: + if (rc == 0) { + if (rx) + dccp_sk(sk)->dccps_pcrlen = cscov; + else + dccp_sk(sk)->dccps_pcslen = cscov; + } + kfree(list); return rc; +} + +static int dccp_setsockopt_ccid(struct sock *sk, int type, + char __user *optval, unsigned int optlen) +{ + u8 *val; + int rc = 0; + + if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS) + return -EINVAL; + + val = memdup_user(optval, optlen); + if (IS_ERR(val)) + return PTR_ERR(val); + + lock_sock(sk); + if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID) + rc = dccp_feat_register_sp(sk, DCCPF_CCID, 1, val, optlen); + + if (!rc && (type == DCCP_SOCKOPT_RX_CCID || type == DCCP_SOCKOPT_CCID)) + rc = dccp_feat_register_sp(sk, DCCPF_CCID, 0, val, optlen); + release_sock(sk); -out_free_val: kfree(val); - goto out; + return rc; } static int do_dccp_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, int optlen) + char __user *optval, unsigned int optlen) { struct dccp_sock *dp = dccp_sk(sk); int val, err = 0; - if (optlen < sizeof(int)) + switch (optname) { + case DCCP_SOCKOPT_PACKET_SIZE: + DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n"); + return 0; + case DCCP_SOCKOPT_CHANGE_L: + case DCCP_SOCKOPT_CHANGE_R: + DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n"); + return 0; + case DCCP_SOCKOPT_CCID: + case DCCP_SOCKOPT_RX_CCID: + case DCCP_SOCKOPT_TX_CCID: + return dccp_setsockopt_ccid(sk, optname, optval, optlen); + } + + if (optlen < (int)sizeof(int)) return -EINVAL; if (get_user(val, (int __user *)optval)) @@ -517,58 +520,43 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname, lock_sock(sk); switch (optname) { - case DCCP_SOCKOPT_PACKET_SIZE: - DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n"); - err = 0; - break; - case DCCP_SOCKOPT_CHANGE_L: - if (optlen != sizeof(struct dccp_so_feat)) - err = -EINVAL; - else - err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L, - (struct dccp_so_feat __user *) - optval); - break; - case DCCP_SOCKOPT_CHANGE_R: - if (optlen != sizeof(struct dccp_so_feat)) - err = -EINVAL; - else - err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R, - (struct dccp_so_feat __user *) - optval); - break; case DCCP_SOCKOPT_SERVER_TIMEWAIT: if (dp->dccps_role != DCCP_ROLE_SERVER) err = -EOPNOTSUPP; else dp->dccps_server_timewait = (val != 0); break; - case DCCP_SOCKOPT_SEND_CSCOV: /* sender side, RFC 4340, sec. 9.2 */ - if (val < 0 || val > 15) + case DCCP_SOCKOPT_SEND_CSCOV: + err = dccp_setsockopt_cscov(sk, val, false); + break; + case DCCP_SOCKOPT_RECV_CSCOV: + err = dccp_setsockopt_cscov(sk, val, true); + break; + case DCCP_SOCKOPT_QPOLICY_ID: + if (sk->sk_state != DCCP_CLOSED) + err = -EISCONN; + else if (val < 0 || val >= DCCPQ_POLICY_MAX) err = -EINVAL; else - dp->dccps_pcslen = val; + dp->dccps_qpolicy = val; break; - case DCCP_SOCKOPT_RECV_CSCOV: /* receiver side, RFC 4340 sec. 9.2.1 */ - if (val < 0 || val > 15) + case DCCP_SOCKOPT_QPOLICY_TXQLEN: + if (val < 0) err = -EINVAL; - else { - dp->dccps_pcrlen = val; - /* FIXME: add feature negotiation, - * ChangeL(MinimumChecksumCoverage, val) */ - } + else + dp->dccps_tx_qlen = val; break; default: err = -ENOPROTOOPT; break; } - release_sock(sk); + return err; } int dccp_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, int optlen) + char __user *optval, unsigned int optlen) { if (level != SOL_DCCP) return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level, @@ -581,7 +569,7 @@ EXPORT_SYMBOL_GPL(dccp_setsockopt); #ifdef CONFIG_COMPAT int compat_dccp_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, int optlen) + char __user *optval, unsigned int optlen) { if (level != SOL_DCCP) return inet_csk_compat_setsockopt(sk, level, optname, @@ -644,6 +632,18 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname, case DCCP_SOCKOPT_GET_CUR_MPS: val = dp->dccps_mss_cache; break; + case DCCP_SOCKOPT_AVAILABLE_CCIDS: + return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen); + case DCCP_SOCKOPT_TX_CCID: + val = ccid_get_current_tx_ccid(dp); + if (val < 0) + return -ENOPROTOOPT; + break; + case DCCP_SOCKOPT_RX_CCID: + val = ccid_get_current_rx_ccid(dp); + if (val < 0) + return -ENOPROTOOPT; + break; case DCCP_SOCKOPT_SERVER_TIMEWAIT: val = dp->dccps_server_timewait; break; @@ -653,6 +653,12 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname, case DCCP_SOCKOPT_RECV_CSCOV: val = dp->dccps_pcrlen; break; + case DCCP_SOCKOPT_QPOLICY_ID: + val = dp->dccps_qpolicy; + break; + case DCCP_SOCKOPT_QPOLICY_TXQLEN: + val = dp->dccps_tx_qlen; + break; case 128 ... 191: return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname, len, (u32 __user *)optval, optlen); @@ -695,6 +701,47 @@ int compat_dccp_getsockopt(struct sock *sk, int level, int optname, EXPORT_SYMBOL_GPL(compat_dccp_getsockopt); #endif +static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb) +{ + struct cmsghdr *cmsg = CMSG_FIRSTHDR(msg); + + /* + * Assign an (opaque) qpolicy priority value to skb->priority. + * + * We are overloading this skb field for use with the qpolicy subystem. + * The skb->priority is normally used for the SO_PRIORITY option, which + * is initialised from sk_priority. Since the assignment of sk_priority + * to skb->priority happens later (on layer 3), we overload this field + * for use with queueing priorities as long as the skb is on layer 4. + * The default priority value (if nothing is set) is 0. + */ + skb->priority = 0; + + for (; cmsg != NULL; cmsg = CMSG_NXTHDR(msg, cmsg)) { + + if (!CMSG_OK(msg, cmsg)) + return -EINVAL; + + if (cmsg->cmsg_level != SOL_DCCP) + continue; + + if (cmsg->cmsg_type <= DCCP_SCM_QPOLICY_MAX && + !dccp_qpolicy_param_ok(skb->sk, cmsg->cmsg_type)) + return -EINVAL; + + switch (cmsg->cmsg_type) { + case DCCP_SCM_PRIORITY: + if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u32))) + return -EINVAL; + skb->priority = *(__u32 *)CMSG_DATA(cmsg); + break; + default: + return -EINVAL; + } + } + return 0; +} + int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len) { @@ -710,8 +757,7 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, lock_sock(sk); - if (sysctl_dccp_tx_qlen && - (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) { + if (dccp_qpolicy_full(sk)) { rc = -EAGAIN; goto out_release; } @@ -739,8 +785,18 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (rc != 0) goto out_discard; - skb_queue_tail(&sk->sk_write_queue, skb); - dccp_write_xmit(sk,0); + rc = dccp_msghdr_parse(msg, skb); + if (rc != 0) + goto out_discard; + + dccp_qpolicy_push(sk, skb); + /* + * The xmit_timer is set if the TX CCID is rate-based and will expire + * when congestion control permits to release further packets into the + * network. Window-based CCIDs do not use this timer. + */ + if (!timer_pending(&dp->dccps_xmit_timer)) + dccp_write_xmit(sk); out_release: release_sock(sk); return rc ? : len; @@ -792,7 +848,7 @@ int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, default: dccp_pr_debug("packet_type=%s\n", dccp_packet_name(dh->dccph_type)); - sk_eat_skb(sk, skb, 0); + sk_eat_skb(sk, skb, false); } verify_sock_status: if (sock_flag(sk, SOCK_DONE)) { @@ -845,9 +901,11 @@ verify_sock_status: len = -EFAULT; break; } + if (flags & MSG_TRUNC) + len = skb->len; found_fin_ok: if (!(flags & MSG_PEEK)) - sk_eat_skb(sk, skb, 0); + sk_eat_skb(sk, skb, false); break; } while (1); out: @@ -956,23 +1014,35 @@ void dccp_close(struct sock *sk, long timeout) if (data_was_unread) { /* Unread data was tossed, send an appropriate Reset Code */ - DCCP_WARN("DCCP: ABORT -- %u bytes unread\n", data_was_unread); + DCCP_WARN("ABORT with %u bytes unread\n", data_was_unread); dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED); dccp_set_state(sk, DCCP_CLOSED); } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) { /* Check zero linger _after_ checking for unread data. */ sk->sk_prot->disconnect(sk, 0); } else if (sk->sk_state != DCCP_CLOSED) { + /* + * Normal connection termination. May need to wait if there are + * still packets in the TX queue that are delayed by the CCID. + */ + dccp_flush_write_queue(sk, &timeout); dccp_terminate_connection(sk); } + /* + * Flush write queue. This may be necessary in several cases: + * - we have been closed by the peer but still have application data; + * - abortive termination (unread data or zero linger time), + * - normal termination but queue could not be flushed within time limit + */ + __skb_queue_purge(&sk->sk_write_queue); + sk_stream_wait_close(sk, timeout); adjudge_to_death: state = sk->sk_state; sock_hold(sk); sock_orphan(sk); - atomic_inc(sk->sk_prot->orphan_count); /* * It is the last release_sock in its life. It will remove backlog. @@ -984,7 +1054,9 @@ adjudge_to_death: */ local_bh_disable(); bh_lock_sock(sk); - BUG_TRAP(!sock_owned_by_user(sk)); + WARN_ON(sock_owned_by_user(sk)); + + percpu_counter_inc(sk->sk_prot->orphan_count); /* Have we already been destroyed by a softirq or backlog? */ if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED) @@ -1010,33 +1082,17 @@ void dccp_shutdown(struct sock *sk, int how) EXPORT_SYMBOL_GPL(dccp_shutdown); -static int __init dccp_mib_init(void) +static inline int dccp_mib_init(void) { - int rc = -ENOMEM; - - dccp_statistics[0] = alloc_percpu(struct dccp_mib); - if (dccp_statistics[0] == NULL) - goto out; - - dccp_statistics[1] = alloc_percpu(struct dccp_mib); - if (dccp_statistics[1] == NULL) - goto out_free_one; - - rc = 0; -out: - return rc; -out_free_one: - free_percpu(dccp_statistics[0]); - dccp_statistics[0] = NULL; - goto out; - + dccp_statistics = alloc_percpu(struct dccp_mib); + if (!dccp_statistics) + return -ENOMEM; + return 0; } -static void dccp_mib_exit(void) +static inline void dccp_mib_exit(void) { - free_percpu(dccp_statistics[0]); - free_percpu(dccp_statistics[1]); - dccp_statistics[0] = dccp_statistics[1] = NULL; + free_percpu(dccp_statistics); } static int thash_entries; @@ -1044,8 +1100,8 @@ module_param(thash_entries, int, 0444); MODULE_PARM_DESC(thash_entries, "Number of ehash buckets"); #ifdef CONFIG_IP_DCCP_DEBUG -int dccp_debug; -module_param(dccp_debug, bool, 0444); +bool dccp_debug; +module_param(dccp_debug, bool, 0644); MODULE_PARM_DESC(dccp_debug, "Enable debug messages"); EXPORT_SYMBOL_GPL(dccp_debug); @@ -1055,14 +1111,21 @@ static int __init dccp_init(void) { unsigned long goal; int ehash_order, bhash_order, i; - int rc = -ENOBUFS; + int rc; + BUILD_BUG_ON(sizeof(struct dccp_skb_cb) > + FIELD_SIZEOF(struct sk_buff, cb)); + rc = percpu_counter_init(&dccp_orphan_count, 0); + if (rc) + goto out_fail; + rc = -ENOBUFS; + inet_hashinfo_init(&dccp_hashinfo); dccp_hashinfo.bind_bucket_cachep = kmem_cache_create("dccp_bind_bucket", sizeof(struct inet_bind_bucket), 0, SLAB_HWCACHE_ALIGN, NULL); if (!dccp_hashinfo.bind_bucket_cachep) - goto out; + goto out_free_percpu; /* * Size and allocate the main established and bind bucket @@ -1070,10 +1133,10 @@ static int __init dccp_init(void) * * The methodology is similar to that of the buffer cache. */ - if (num_physpages >= (128 * 1024)) - goal = num_physpages >> (21 - PAGE_SHIFT); + if (totalram_pages >= (128 * 1024)) + goal = totalram_pages >> (21 - PAGE_SHIFT); else - goal = num_physpages >> (23 - PAGE_SHIFT); + goal = totalram_pages >> (23 - PAGE_SHIFT); if (thash_entries) goal = (thash_entries * @@ -1081,13 +1144,14 @@ static int __init dccp_init(void) for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++) ; do { - dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE / + unsigned long hash_size = (1UL << ehash_order) * PAGE_SIZE / sizeof(struct inet_ehash_bucket); - while (dccp_hashinfo.ehash_size & - (dccp_hashinfo.ehash_size - 1)) - dccp_hashinfo.ehash_size--; + + while (hash_size & (hash_size - 1)) + hash_size--; + dccp_hashinfo.ehash_mask = hash_size - 1; dccp_hashinfo.ehash = (struct inet_ehash_bucket *) - __get_free_pages(GFP_ATOMIC, ehash_order); + __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, ehash_order); } while (!dccp_hashinfo.ehash && --ehash_order > 0); if (!dccp_hashinfo.ehash) { @@ -1095,10 +1159,8 @@ static int __init dccp_init(void) goto out_free_bind_bucket_cachep; } - for (i = 0; i < dccp_hashinfo.ehash_size; i++) { - INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain); - INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain); - } + for (i = 0; i <= dccp_hashinfo.ehash_mask; i++) + INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].chain, i); if (inet_ehash_locks_alloc(&dccp_hashinfo)) goto out_free_dccp_ehash; @@ -1112,7 +1174,7 @@ static int __init dccp_init(void) bhash_order > 0) continue; dccp_hashinfo.bhash = (struct inet_bind_hashbucket *) - __get_free_pages(GFP_ATOMIC, bhash_order); + __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, bhash_order); } while (!dccp_hashinfo.bhash && --bhash_order >= 0); if (!dccp_hashinfo.bhash) { @@ -1137,40 +1199,52 @@ static int __init dccp_init(void) if (rc) goto out_ackvec_exit; + rc = ccid_initialize_builtins(); + if (rc) + goto out_sysctl_exit; + dccp_timestamping_init(); -out: - return rc; + + return 0; + +out_sysctl_exit: + dccp_sysctl_exit(); out_ackvec_exit: dccp_ackvec_exit(); out_free_dccp_mib: dccp_mib_exit(); out_free_dccp_bhash: free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order); - dccp_hashinfo.bhash = NULL; out_free_dccp_locks: inet_ehash_locks_free(&dccp_hashinfo); out_free_dccp_ehash: free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order); - dccp_hashinfo.ehash = NULL; out_free_bind_bucket_cachep: kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); +out_free_percpu: + percpu_counter_destroy(&dccp_orphan_count); +out_fail: + dccp_hashinfo.bhash = NULL; + dccp_hashinfo.ehash = NULL; dccp_hashinfo.bind_bucket_cachep = NULL; - goto out; + return rc; } static void __exit dccp_fini(void) { + ccid_cleanup_builtins(); dccp_mib_exit(); free_pages((unsigned long)dccp_hashinfo.bhash, get_order(dccp_hashinfo.bhash_size * sizeof(struct inet_bind_hashbucket))); free_pages((unsigned long)dccp_hashinfo.ehash, - get_order(dccp_hashinfo.ehash_size * + get_order((dccp_hashinfo.ehash_mask + 1) * sizeof(struct inet_ehash_bucket))); inet_ehash_locks_free(&dccp_hashinfo); kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep); dccp_ackvec_exit(); dccp_sysctl_exit(); + percpu_counter_destroy(&dccp_orphan_count); } module_init(dccp_init); |
