aboutsummaryrefslogtreecommitdiff
path: root/net/dccp/proto.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/dccp/proto.c')
-rw-r--r--net/dccp/proto.c476
1 files changed, 275 insertions, 201 deletions
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index e3f5d37b84b..de2c1e71930 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -20,6 +20,7 @@
#include <linux/if_arp.h>
#include <linux/init.h>
#include <linux/random.h>
+#include <linux/slab.h>
#include <net/checksum.h>
#include <net/inet_sock.h>
@@ -27,7 +28,6 @@
#include <net/xfrm.h>
#include <asm/ioctls.h>
-#include <asm/semaphore.h>
#include <linux/spinlock.h>
#include <linux/timer.h>
#include <linux/delay.h>
@@ -41,21 +41,39 @@ DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
EXPORT_SYMBOL_GPL(dccp_statistics);
-atomic_t dccp_orphan_count = ATOMIC_INIT(0);
-
+struct percpu_counter dccp_orphan_count;
EXPORT_SYMBOL_GPL(dccp_orphan_count);
-struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
- .lhash_lock = RW_LOCK_UNLOCKED,
- .lhash_users = ATOMIC_INIT(0),
- .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
-};
-
+struct inet_hashinfo dccp_hashinfo;
EXPORT_SYMBOL_GPL(dccp_hashinfo);
/* the maximum queue length for tx in packets. 0 is no limit */
int sysctl_dccp_tx_qlen __read_mostly = 5;
+#ifdef CONFIG_IP_DCCP_DEBUG
+static const char *dccp_state_name(const int state)
+{
+ static const char *const dccp_state_names[] = {
+ [DCCP_OPEN] = "OPEN",
+ [DCCP_REQUESTING] = "REQUESTING",
+ [DCCP_PARTOPEN] = "PARTOPEN",
+ [DCCP_LISTEN] = "LISTEN",
+ [DCCP_RESPOND] = "RESPOND",
+ [DCCP_CLOSING] = "CLOSING",
+ [DCCP_ACTIVE_CLOSEREQ] = "CLOSEREQ",
+ [DCCP_PASSIVE_CLOSE] = "PASSIVE_CLOSE",
+ [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
+ [DCCP_TIME_WAIT] = "TIME_WAIT",
+ [DCCP_CLOSED] = "CLOSED",
+ };
+
+ if (state >= DCCP_MAX_STATES)
+ return "INVALID STATE!";
+ else
+ return dccp_state_names[state];
+}
+#endif
+
void dccp_set_state(struct sock *sk, const int state)
{
const int oldstate = sk->sk_state;
@@ -68,6 +86,9 @@ void dccp_set_state(struct sock *sk, const int state)
case DCCP_OPEN:
if (oldstate != DCCP_OPEN)
DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
+ /* Client retransmits all Confirm options until entering OPEN */
+ if (oldstate == DCCP_PARTOPEN)
+ dccp_feat_list_purge(&dccp_sk(sk)->dccps_featneg);
break;
case DCCP_CLOSED:
@@ -128,7 +149,7 @@ EXPORT_SYMBOL_GPL(dccp_done);
const char *dccp_packet_name(const int type)
{
- static const char *dccp_packet_names[] = {
+ static const char *const dccp_packet_names[] = {
[DCCP_PKT_REQUEST] = "REQUEST",
[DCCP_PKT_RESPONSE] = "RESPONSE",
[DCCP_PKT_DATA] = "DATA",
@@ -149,38 +170,11 @@ const char *dccp_packet_name(const int type)
EXPORT_SYMBOL_GPL(dccp_packet_name);
-const char *dccp_state_name(const int state)
-{
- static char *dccp_state_names[] = {
- [DCCP_OPEN] = "OPEN",
- [DCCP_REQUESTING] = "REQUESTING",
- [DCCP_PARTOPEN] = "PARTOPEN",
- [DCCP_LISTEN] = "LISTEN",
- [DCCP_RESPOND] = "RESPOND",
- [DCCP_CLOSING] = "CLOSING",
- [DCCP_ACTIVE_CLOSEREQ] = "CLOSEREQ",
- [DCCP_PASSIVE_CLOSE] = "PASSIVE_CLOSE",
- [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
- [DCCP_TIME_WAIT] = "TIME_WAIT",
- [DCCP_CLOSED] = "CLOSED",
- };
-
- if (state >= DCCP_MAX_STATES)
- return "INVALID STATE!";
- else
- return dccp_state_names[state];
-}
-
-EXPORT_SYMBOL_GPL(dccp_state_name);
-
int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
{
struct dccp_sock *dp = dccp_sk(sk);
- struct dccp_minisock *dmsk = dccp_msk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
- dccp_minisock_init(&dp->dccps_minisock);
-
icsk->icsk_rto = DCCP_TIMEOUT_INIT;
icsk->icsk_syn_retries = sysctl_dccp_request_retries;
sk->sk_state = DCCP_CLOSED;
@@ -190,58 +184,22 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
dp->dccps_rate_last = jiffies;
dp->dccps_role = DCCP_ROLE_UNDEFINED;
dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT;
- dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1;
+ dp->dccps_tx_qlen = sysctl_dccp_tx_qlen;
dccp_init_xmit_timers(sk);
- /*
- * FIXME: We're hardcoding the CCID, and doing this at this point makes
- * the listening (master) sock get CCID control blocks, which is not
- * necessary, but for now, to not mess with the test userspace apps,
- * lets leave it here, later the real solution is to do this in a
- * setsockopt(CCIDs-I-want/accept). -acme
- */
- if (likely(ctl_sock_initialized)) {
- int rc = dccp_feat_init(dmsk);
-
- if (rc)
- return rc;
-
- if (dmsk->dccpms_send_ack_vector) {
- dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
- if (dp->dccps_hc_rx_ackvec == NULL)
- return -ENOMEM;
- }
- dp->dccps_hc_rx_ccid = ccid_hc_rx_new(dmsk->dccpms_rx_ccid,
- sk, GFP_KERNEL);
- dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid,
- sk, GFP_KERNEL);
- if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
- dp->dccps_hc_tx_ccid == NULL)) {
- ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
- ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
- if (dmsk->dccpms_send_ack_vector) {
- dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
- dp->dccps_hc_rx_ackvec = NULL;
- }
- dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
- return -ENOMEM;
- }
- } else {
- /* control socket doesn't need feat nego */
- INIT_LIST_HEAD(&dmsk->dccpms_pending);
- INIT_LIST_HEAD(&dmsk->dccpms_conf);
- }
-
+ INIT_LIST_HEAD(&dp->dccps_featneg);
+ /* control socket doesn't need feat nego */
+ if (likely(ctl_sock_initialized))
+ return dccp_feat_init(sk);
return 0;
}
EXPORT_SYMBOL_GPL(dccp_init_sock);
-int dccp_destroy_sock(struct sock *sk)
+void dccp_destroy_sock(struct sock *sk)
{
struct dccp_sock *dp = dccp_sk(sk);
- struct dccp_minisock *dmsk = dccp_msk(sk);
/*
* DCCP doesn't use sk_write_queue, just sk_send_head
@@ -259,7 +217,7 @@ int dccp_destroy_sock(struct sock *sk)
kfree(dp->dccps_service_list);
dp->dccps_service_list = NULL;
- if (dmsk->dccpms_send_ack_vector) {
+ if (dp->dccps_hc_rx_ackvec != NULL) {
dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
dp->dccps_hc_rx_ackvec = NULL;
}
@@ -268,9 +226,7 @@ int dccp_destroy_sock(struct sock *sk)
dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
/* clean up feature negotiation state */
- dccp_feat_clean(dmsk);
-
- return 0;
+ dccp_feat_list_purge(&dp->dccps_featneg);
}
EXPORT_SYMBOL_GPL(dccp_destroy_sock);
@@ -280,6 +236,9 @@ static inline int dccp_listen_start(struct sock *sk, int backlog)
struct dccp_sock *dp = dccp_sk(sk);
dp->dccps_role = DCCP_ROLE_LISTEN;
+ /* do not start to listen if feature negotiation setup fails */
+ if (dccp_feat_finalise_settings(dp))
+ return -EPROTO;
return inet_csk_listen_start(sk, backlog);
}
@@ -312,13 +271,15 @@ int dccp_disconnect(struct sock *sk, int flags)
sk->sk_err = ECONNRESET;
dccp_clear_xmit_timers(sk);
+
__skb_queue_purge(&sk->sk_receive_queue);
+ __skb_queue_purge(&sk->sk_write_queue);
if (sk->sk_send_head != NULL) {
__kfree_skb(sk->sk_send_head);
sk->sk_send_head = NULL;
}
- inet->dport = 0;
+ inet->inet_dport = 0;
if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
inet_reset_saddr(sk);
@@ -330,7 +291,7 @@ int dccp_disconnect(struct sock *sk, int flags)
inet_csk_delack_init(sk);
__sk_dst_reset(sk);
- BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
+ WARN_ON(inet->inet_num && !icsk->icsk_bind_hash);
sk->sk_error_report(sk);
return err;
@@ -351,7 +312,7 @@ unsigned int dccp_poll(struct file *file, struct socket *sock,
unsigned int mask;
struct sock *sk = sock->sk;
- poll_wait(file, sk->sk_sleep, wait);
+ sock_poll_wait(file, sk_sleep(sk), wait);
if (sk->sk_state == DCCP_LISTEN)
return inet_csk_listen_poll(sk);
@@ -375,7 +336,7 @@ unsigned int dccp_poll(struct file *file, struct socket *sock,
mask |= POLLIN | POLLRDNORM;
if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
- if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
+ if (sk_stream_is_writeable(sk)) {
mask |= POLLOUT | POLLWRNORM;
} else { /* send SIGIO later */
set_bit(SOCK_ASYNC_NOSPACE,
@@ -386,7 +347,7 @@ unsigned int dccp_poll(struct file *file, struct socket *sock,
* wspace test but before the flags are set,
* IO signal will be lost.
*/
- if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
+ if (sk_stream_is_writeable(sk))
mask |= POLLOUT | POLLWRNORM;
}
}
@@ -433,7 +394,7 @@ out:
EXPORT_SYMBOL_GPL(dccp_ioctl);
static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
- char __user *optval, int optlen)
+ char __user *optval, unsigned int optlen)
{
struct dccp_sock *dp = dccp_sk(sk);
struct dccp_service_list *sl = NULL;
@@ -467,46 +428,88 @@ static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
return 0;
}
-/* byte 1 is feature. the rest is the preference list */
-static int dccp_setsockopt_change(struct sock *sk, int type,
- struct dccp_so_feat __user *optval)
+static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx)
{
- struct dccp_so_feat opt;
- u8 *val;
- int rc;
+ u8 *list, len;
+ int i, rc;
- if (copy_from_user(&opt, optval, sizeof(opt)))
- return -EFAULT;
+ if (cscov < 0 || cscov > 15)
+ return -EINVAL;
+ /*
+ * Populate a list of permissible values, in the range cscov...15. This
+ * is necessary since feature negotiation of single values only works if
+ * both sides incidentally choose the same value. Since the list starts
+ * lowest-value first, negotiation will pick the smallest shared value.
+ */
+ if (cscov == 0)
+ return 0;
+ len = 16 - cscov;
- val = kmalloc(opt.dccpsf_len, GFP_KERNEL);
- if (!val)
- return -ENOMEM;
+ list = kmalloc(len, GFP_KERNEL);
+ if (list == NULL)
+ return -ENOBUFS;
- if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) {
- rc = -EFAULT;
- goto out_free_val;
- }
+ for (i = 0; i < len; i++)
+ list[i] = cscov++;
- rc = dccp_feat_change(dccp_msk(sk), type, opt.dccpsf_feat,
- val, opt.dccpsf_len, GFP_KERNEL);
- if (rc)
- goto out_free_val;
+ rc = dccp_feat_register_sp(sk, DCCPF_MIN_CSUM_COVER, rx, list, len);
-out:
+ if (rc == 0) {
+ if (rx)
+ dccp_sk(sk)->dccps_pcrlen = cscov;
+ else
+ dccp_sk(sk)->dccps_pcslen = cscov;
+ }
+ kfree(list);
return rc;
+}
+
+static int dccp_setsockopt_ccid(struct sock *sk, int type,
+ char __user *optval, unsigned int optlen)
+{
+ u8 *val;
+ int rc = 0;
+
+ if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS)
+ return -EINVAL;
+
+ val = memdup_user(optval, optlen);
+ if (IS_ERR(val))
+ return PTR_ERR(val);
+
+ lock_sock(sk);
+ if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID)
+ rc = dccp_feat_register_sp(sk, DCCPF_CCID, 1, val, optlen);
+
+ if (!rc && (type == DCCP_SOCKOPT_RX_CCID || type == DCCP_SOCKOPT_CCID))
+ rc = dccp_feat_register_sp(sk, DCCPF_CCID, 0, val, optlen);
+ release_sock(sk);
-out_free_val:
kfree(val);
- goto out;
+ return rc;
}
static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int optlen)
+ char __user *optval, unsigned int optlen)
{
struct dccp_sock *dp = dccp_sk(sk);
int val, err = 0;
- if (optlen < sizeof(int))
+ switch (optname) {
+ case DCCP_SOCKOPT_PACKET_SIZE:
+ DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
+ return 0;
+ case DCCP_SOCKOPT_CHANGE_L:
+ case DCCP_SOCKOPT_CHANGE_R:
+ DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n");
+ return 0;
+ case DCCP_SOCKOPT_CCID:
+ case DCCP_SOCKOPT_RX_CCID:
+ case DCCP_SOCKOPT_TX_CCID:
+ return dccp_setsockopt_ccid(sk, optname, optval, optlen);
+ }
+
+ if (optlen < (int)sizeof(int))
return -EINVAL;
if (get_user(val, (int __user *)optval))
@@ -517,58 +520,43 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
lock_sock(sk);
switch (optname) {
- case DCCP_SOCKOPT_PACKET_SIZE:
- DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
- err = 0;
- break;
- case DCCP_SOCKOPT_CHANGE_L:
- if (optlen != sizeof(struct dccp_so_feat))
- err = -EINVAL;
- else
- err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L,
- (struct dccp_so_feat __user *)
- optval);
- break;
- case DCCP_SOCKOPT_CHANGE_R:
- if (optlen != sizeof(struct dccp_so_feat))
- err = -EINVAL;
- else
- err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R,
- (struct dccp_so_feat __user *)
- optval);
- break;
case DCCP_SOCKOPT_SERVER_TIMEWAIT:
if (dp->dccps_role != DCCP_ROLE_SERVER)
err = -EOPNOTSUPP;
else
dp->dccps_server_timewait = (val != 0);
break;
- case DCCP_SOCKOPT_SEND_CSCOV: /* sender side, RFC 4340, sec. 9.2 */
- if (val < 0 || val > 15)
+ case DCCP_SOCKOPT_SEND_CSCOV:
+ err = dccp_setsockopt_cscov(sk, val, false);
+ break;
+ case DCCP_SOCKOPT_RECV_CSCOV:
+ err = dccp_setsockopt_cscov(sk, val, true);
+ break;
+ case DCCP_SOCKOPT_QPOLICY_ID:
+ if (sk->sk_state != DCCP_CLOSED)
+ err = -EISCONN;
+ else if (val < 0 || val >= DCCPQ_POLICY_MAX)
err = -EINVAL;
else
- dp->dccps_pcslen = val;
+ dp->dccps_qpolicy = val;
break;
- case DCCP_SOCKOPT_RECV_CSCOV: /* receiver side, RFC 4340 sec. 9.2.1 */
- if (val < 0 || val > 15)
+ case DCCP_SOCKOPT_QPOLICY_TXQLEN:
+ if (val < 0)
err = -EINVAL;
- else {
- dp->dccps_pcrlen = val;
- /* FIXME: add feature negotiation,
- * ChangeL(MinimumChecksumCoverage, val) */
- }
+ else
+ dp->dccps_tx_qlen = val;
break;
default:
err = -ENOPROTOOPT;
break;
}
-
release_sock(sk);
+
return err;
}
int dccp_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int optlen)
+ char __user *optval, unsigned int optlen)
{
if (level != SOL_DCCP)
return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
@@ -581,7 +569,7 @@ EXPORT_SYMBOL_GPL(dccp_setsockopt);
#ifdef CONFIG_COMPAT
int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
- char __user *optval, int optlen)
+ char __user *optval, unsigned int optlen)
{
if (level != SOL_DCCP)
return inet_csk_compat_setsockopt(sk, level, optname,
@@ -644,6 +632,18 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
case DCCP_SOCKOPT_GET_CUR_MPS:
val = dp->dccps_mss_cache;
break;
+ case DCCP_SOCKOPT_AVAILABLE_CCIDS:
+ return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
+ case DCCP_SOCKOPT_TX_CCID:
+ val = ccid_get_current_tx_ccid(dp);
+ if (val < 0)
+ return -ENOPROTOOPT;
+ break;
+ case DCCP_SOCKOPT_RX_CCID:
+ val = ccid_get_current_rx_ccid(dp);
+ if (val < 0)
+ return -ENOPROTOOPT;
+ break;
case DCCP_SOCKOPT_SERVER_TIMEWAIT:
val = dp->dccps_server_timewait;
break;
@@ -653,6 +653,12 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
case DCCP_SOCKOPT_RECV_CSCOV:
val = dp->dccps_pcrlen;
break;
+ case DCCP_SOCKOPT_QPOLICY_ID:
+ val = dp->dccps_qpolicy;
+ break;
+ case DCCP_SOCKOPT_QPOLICY_TXQLEN:
+ val = dp->dccps_tx_qlen;
+ break;
case 128 ... 191:
return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
len, (u32 __user *)optval, optlen);
@@ -695,6 +701,47 @@ int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
#endif
+static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb)
+{
+ struct cmsghdr *cmsg = CMSG_FIRSTHDR(msg);
+
+ /*
+ * Assign an (opaque) qpolicy priority value to skb->priority.
+ *
+ * We are overloading this skb field for use with the qpolicy subystem.
+ * The skb->priority is normally used for the SO_PRIORITY option, which
+ * is initialised from sk_priority. Since the assignment of sk_priority
+ * to skb->priority happens later (on layer 3), we overload this field
+ * for use with queueing priorities as long as the skb is on layer 4.
+ * The default priority value (if nothing is set) is 0.
+ */
+ skb->priority = 0;
+
+ for (; cmsg != NULL; cmsg = CMSG_NXTHDR(msg, cmsg)) {
+
+ if (!CMSG_OK(msg, cmsg))
+ return -EINVAL;
+
+ if (cmsg->cmsg_level != SOL_DCCP)
+ continue;
+
+ if (cmsg->cmsg_type <= DCCP_SCM_QPOLICY_MAX &&
+ !dccp_qpolicy_param_ok(skb->sk, cmsg->cmsg_type))
+ return -EINVAL;
+
+ switch (cmsg->cmsg_type) {
+ case DCCP_SCM_PRIORITY:
+ if (cmsg->cmsg_len != CMSG_LEN(sizeof(__u32)))
+ return -EINVAL;
+ skb->priority = *(__u32 *)CMSG_DATA(cmsg);
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+ return 0;
+}
+
int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
size_t len)
{
@@ -710,8 +757,7 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
lock_sock(sk);
- if (sysctl_dccp_tx_qlen &&
- (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
+ if (dccp_qpolicy_full(sk)) {
rc = -EAGAIN;
goto out_release;
}
@@ -739,8 +785,18 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
if (rc != 0)
goto out_discard;
- skb_queue_tail(&sk->sk_write_queue, skb);
- dccp_write_xmit(sk,0);
+ rc = dccp_msghdr_parse(msg, skb);
+ if (rc != 0)
+ goto out_discard;
+
+ dccp_qpolicy_push(sk, skb);
+ /*
+ * The xmit_timer is set if the TX CCID is rate-based and will expire
+ * when congestion control permits to release further packets into the
+ * network. Window-based CCIDs do not use this timer.
+ */
+ if (!timer_pending(&dp->dccps_xmit_timer))
+ dccp_write_xmit(sk);
out_release:
release_sock(sk);
return rc ? : len;
@@ -792,7 +848,7 @@ int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
default:
dccp_pr_debug("packet_type=%s\n",
dccp_packet_name(dh->dccph_type));
- sk_eat_skb(sk, skb, 0);
+ sk_eat_skb(sk, skb, false);
}
verify_sock_status:
if (sock_flag(sk, SOCK_DONE)) {
@@ -845,9 +901,11 @@ verify_sock_status:
len = -EFAULT;
break;
}
+ if (flags & MSG_TRUNC)
+ len = skb->len;
found_fin_ok:
if (!(flags & MSG_PEEK))
- sk_eat_skb(sk, skb, 0);
+ sk_eat_skb(sk, skb, false);
break;
} while (1);
out:
@@ -956,23 +1014,35 @@ void dccp_close(struct sock *sk, long timeout)
if (data_was_unread) {
/* Unread data was tossed, send an appropriate Reset Code */
- DCCP_WARN("DCCP: ABORT -- %u bytes unread\n", data_was_unread);
+ DCCP_WARN("ABORT with %u bytes unread\n", data_was_unread);
dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
dccp_set_state(sk, DCCP_CLOSED);
} else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
/* Check zero linger _after_ checking for unread data. */
sk->sk_prot->disconnect(sk, 0);
} else if (sk->sk_state != DCCP_CLOSED) {
+ /*
+ * Normal connection termination. May need to wait if there are
+ * still packets in the TX queue that are delayed by the CCID.
+ */
+ dccp_flush_write_queue(sk, &timeout);
dccp_terminate_connection(sk);
}
+ /*
+ * Flush write queue. This may be necessary in several cases:
+ * - we have been closed by the peer but still have application data;
+ * - abortive termination (unread data or zero linger time),
+ * - normal termination but queue could not be flushed within time limit
+ */
+ __skb_queue_purge(&sk->sk_write_queue);
+
sk_stream_wait_close(sk, timeout);
adjudge_to_death:
state = sk->sk_state;
sock_hold(sk);
sock_orphan(sk);
- atomic_inc(sk->sk_prot->orphan_count);
/*
* It is the last release_sock in its life. It will remove backlog.
@@ -984,7 +1054,9 @@ adjudge_to_death:
*/
local_bh_disable();
bh_lock_sock(sk);
- BUG_TRAP(!sock_owned_by_user(sk));
+ WARN_ON(sock_owned_by_user(sk));
+
+ percpu_counter_inc(sk->sk_prot->orphan_count);
/* Have we already been destroyed by a softirq or backlog? */
if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
@@ -1010,33 +1082,17 @@ void dccp_shutdown(struct sock *sk, int how)
EXPORT_SYMBOL_GPL(dccp_shutdown);
-static int __init dccp_mib_init(void)
+static inline int dccp_mib_init(void)
{
- int rc = -ENOMEM;
-
- dccp_statistics[0] = alloc_percpu(struct dccp_mib);
- if (dccp_statistics[0] == NULL)
- goto out;
-
- dccp_statistics[1] = alloc_percpu(struct dccp_mib);
- if (dccp_statistics[1] == NULL)
- goto out_free_one;
-
- rc = 0;
-out:
- return rc;
-out_free_one:
- free_percpu(dccp_statistics[0]);
- dccp_statistics[0] = NULL;
- goto out;
-
+ dccp_statistics = alloc_percpu(struct dccp_mib);
+ if (!dccp_statistics)
+ return -ENOMEM;
+ return 0;
}
-static void dccp_mib_exit(void)
+static inline void dccp_mib_exit(void)
{
- free_percpu(dccp_statistics[0]);
- free_percpu(dccp_statistics[1]);
- dccp_statistics[0] = dccp_statistics[1] = NULL;
+ free_percpu(dccp_statistics);
}
static int thash_entries;
@@ -1044,8 +1100,8 @@ module_param(thash_entries, int, 0444);
MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
#ifdef CONFIG_IP_DCCP_DEBUG
-int dccp_debug;
-module_param(dccp_debug, bool, 0444);
+bool dccp_debug;
+module_param(dccp_debug, bool, 0644);
MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
EXPORT_SYMBOL_GPL(dccp_debug);
@@ -1055,14 +1111,21 @@ static int __init dccp_init(void)
{
unsigned long goal;
int ehash_order, bhash_order, i;
- int rc = -ENOBUFS;
+ int rc;
+ BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
+ FIELD_SIZEOF(struct sk_buff, cb));
+ rc = percpu_counter_init(&dccp_orphan_count, 0);
+ if (rc)
+ goto out_fail;
+ rc = -ENOBUFS;
+ inet_hashinfo_init(&dccp_hashinfo);
dccp_hashinfo.bind_bucket_cachep =
kmem_cache_create("dccp_bind_bucket",
sizeof(struct inet_bind_bucket), 0,
SLAB_HWCACHE_ALIGN, NULL);
if (!dccp_hashinfo.bind_bucket_cachep)
- goto out;
+ goto out_free_percpu;
/*
* Size and allocate the main established and bind bucket
@@ -1070,10 +1133,10 @@ static int __init dccp_init(void)
*
* The methodology is similar to that of the buffer cache.
*/
- if (num_physpages >= (128 * 1024))
- goal = num_physpages >> (21 - PAGE_SHIFT);
+ if (totalram_pages >= (128 * 1024))
+ goal = totalram_pages >> (21 - PAGE_SHIFT);
else
- goal = num_physpages >> (23 - PAGE_SHIFT);
+ goal = totalram_pages >> (23 - PAGE_SHIFT);
if (thash_entries)
goal = (thash_entries *
@@ -1081,13 +1144,14 @@ static int __init dccp_init(void)
for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
;
do {
- dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
+ unsigned long hash_size = (1UL << ehash_order) * PAGE_SIZE /
sizeof(struct inet_ehash_bucket);
- while (dccp_hashinfo.ehash_size &
- (dccp_hashinfo.ehash_size - 1))
- dccp_hashinfo.ehash_size--;
+
+ while (hash_size & (hash_size - 1))
+ hash_size--;
+ dccp_hashinfo.ehash_mask = hash_size - 1;
dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
- __get_free_pages(GFP_ATOMIC, ehash_order);
+ __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, ehash_order);
} while (!dccp_hashinfo.ehash && --ehash_order > 0);
if (!dccp_hashinfo.ehash) {
@@ -1095,10 +1159,8 @@ static int __init dccp_init(void)
goto out_free_bind_bucket_cachep;
}
- for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
- INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
- INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain);
- }
+ for (i = 0; i <= dccp_hashinfo.ehash_mask; i++)
+ INIT_HLIST_NULLS_HEAD(&dccp_hashinfo.ehash[i].chain, i);
if (inet_ehash_locks_alloc(&dccp_hashinfo))
goto out_free_dccp_ehash;
@@ -1112,7 +1174,7 @@ static int __init dccp_init(void)
bhash_order > 0)
continue;
dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
- __get_free_pages(GFP_ATOMIC, bhash_order);
+ __get_free_pages(GFP_ATOMIC|__GFP_NOWARN, bhash_order);
} while (!dccp_hashinfo.bhash && --bhash_order >= 0);
if (!dccp_hashinfo.bhash) {
@@ -1137,40 +1199,52 @@ static int __init dccp_init(void)
if (rc)
goto out_ackvec_exit;
+ rc = ccid_initialize_builtins();
+ if (rc)
+ goto out_sysctl_exit;
+
dccp_timestamping_init();
-out:
- return rc;
+
+ return 0;
+
+out_sysctl_exit:
+ dccp_sysctl_exit();
out_ackvec_exit:
dccp_ackvec_exit();
out_free_dccp_mib:
dccp_mib_exit();
out_free_dccp_bhash:
free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
- dccp_hashinfo.bhash = NULL;
out_free_dccp_locks:
inet_ehash_locks_free(&dccp_hashinfo);
out_free_dccp_ehash:
free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
- dccp_hashinfo.ehash = NULL;
out_free_bind_bucket_cachep:
kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
+out_free_percpu:
+ percpu_counter_destroy(&dccp_orphan_count);
+out_fail:
+ dccp_hashinfo.bhash = NULL;
+ dccp_hashinfo.ehash = NULL;
dccp_hashinfo.bind_bucket_cachep = NULL;
- goto out;
+ return rc;
}
static void __exit dccp_fini(void)
{
+ ccid_cleanup_builtins();
dccp_mib_exit();
free_pages((unsigned long)dccp_hashinfo.bhash,
get_order(dccp_hashinfo.bhash_size *
sizeof(struct inet_bind_hashbucket)));
free_pages((unsigned long)dccp_hashinfo.ehash,
- get_order(dccp_hashinfo.ehash_size *
+ get_order((dccp_hashinfo.ehash_mask + 1) *
sizeof(struct inet_ehash_bucket)));
inet_ehash_locks_free(&dccp_hashinfo);
kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
dccp_ackvec_exit();
dccp_sysctl_exit();
+ percpu_counter_destroy(&dccp_orphan_count);
}
module_init(dccp_init);