diff options
Diffstat (limited to 'net/dccp/ipv4.c')
| -rw-r--r-- | net/dccp/ipv4.c | 1131 |
1 files changed, 528 insertions, 603 deletions
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 00f98322667..6ca645c4b48 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -10,66 +10,49 @@ * 2 of the License, or (at your option) any later version. */ -#include <linux/config.h> #include <linux/dccp.h> #include <linux/icmp.h> +#include <linux/slab.h> #include <linux/module.h> #include <linux/skbuff.h> #include <linux/random.h> #include <net/icmp.h> +#include <net/inet_common.h> #include <net/inet_hashtables.h> #include <net/inet_sock.h> +#include <net/protocol.h> #include <net/sock.h> #include <net/timewait_sock.h> #include <net/tcp_states.h> #include <net/xfrm.h> +#include <net/secure_seq.h> #include "ackvec.h" #include "ccid.h" #include "dccp.h" +#include "feat.h" -struct inet_hashinfo __cacheline_aligned dccp_hashinfo = { - .lhash_lock = RW_LOCK_UNLOCKED, - .lhash_users = ATOMIC_INIT(0), - .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait), -}; - -EXPORT_SYMBOL_GPL(dccp_hashinfo); - -static int dccp_v4_get_port(struct sock *sk, const unsigned short snum) -{ - return inet_csk_get_port(&dccp_hashinfo, sk, snum, - inet_csk_bind_conflict); -} - -static void dccp_v4_hash(struct sock *sk) -{ - inet_hash(&dccp_hashinfo, sk); -} - -void dccp_unhash(struct sock *sk) -{ - inet_unhash(&dccp_hashinfo, sk); -} - -EXPORT_SYMBOL_GPL(dccp_unhash); +/* + * The per-net dccp.v4_ctl_sk socket is used for responding to + * the Out-of-the-blue (OOTB) packets. A control sock will be created + * for this socket at the initialization time. + */ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { + const struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; struct inet_sock *inet = inet_sk(sk); struct dccp_sock *dp = dccp_sk(sk); - const struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; + __be16 orig_sport, orig_dport; + __be32 daddr, nexthop; + struct flowi4 *fl4; struct rtable *rt; - u32 daddr, nexthop; - int tmp; int err; + struct ip_options_rcu *inet_opt; dp->dccps_role = DCCP_ROLE_CLIENT; - if (dccp_service_not_initialized(sk)) - return -EPROTO; - if (addr_len < sizeof(struct sockaddr_in)) return -EINVAL; @@ -77,37 +60,43 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) return -EAFNOSUPPORT; nexthop = daddr = usin->sin_addr.s_addr; - if (inet->opt != NULL && inet->opt->srr) { + + inet_opt = rcu_dereference_protected(inet->inet_opt, + sock_owned_by_user(sk)); + if (inet_opt != NULL && inet_opt->opt.srr) { if (daddr == 0) return -EINVAL; - nexthop = inet->opt->faddr; + nexthop = inet_opt->opt.faddr; } - tmp = ip_route_connect(&rt, nexthop, inet->saddr, - RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, - IPPROTO_DCCP, - inet->sport, usin->sin_port, sk); - if (tmp < 0) - return tmp; + orig_sport = inet->inet_sport; + orig_dport = usin->sin_port; + fl4 = &inet->cork.fl.u.ip4; + rt = ip_route_connect(fl4, nexthop, inet->inet_saddr, + RT_CONN_FLAGS(sk), sk->sk_bound_dev_if, + IPPROTO_DCCP, + orig_sport, orig_dport, sk); + if (IS_ERR(rt)) + return PTR_ERR(rt); if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) { ip_rt_put(rt); return -ENETUNREACH; } - if (inet->opt == NULL || !inet->opt->srr) - daddr = rt->rt_dst; + if (inet_opt == NULL || !inet_opt->opt.srr) + daddr = fl4->daddr; - if (inet->saddr == 0) - inet->saddr = rt->rt_src; - inet->rcv_saddr = inet->saddr; + if (inet->inet_saddr == 0) + inet->inet_saddr = fl4->saddr; + inet->inet_rcv_saddr = inet->inet_saddr; - inet->dport = usin->sin_port; - inet->daddr = daddr; + inet->inet_dport = usin->sin_port; + inet->inet_daddr = daddr; inet_csk(sk)->icsk_ext_hdr_len = 0; - if (inet->opt != NULL) - inet_csk(sk)->icsk_ext_hdr_len = inet->opt->optlen; + if (inet_opt) + inet_csk(sk)->icsk_ext_hdr_len = inet_opt->opt.optlen; /* * Socket identity is still unknown (sport may be zero). * However we set state to DCCP_REQUESTING and not releasing socket @@ -119,21 +108,21 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (err != 0) goto failure; - err = ip_route_newports(&rt, inet->sport, inet->dport, sk); - if (err != 0) + rt = ip_route_newports(fl4, rt, orig_sport, orig_dport, + inet->inet_sport, inet->inet_dport, sk); + if (IS_ERR(rt)) { + err = PTR_ERR(rt); + rt = NULL; goto failure; - + } /* OK, now commit destination to socket. */ - sk_setup_caps(sk, &rt->u.dst); - - dp->dccps_gar = - dp->dccps_iss = secure_dccp_sequence_number(inet->saddr, - inet->daddr, - inet->sport, - usin->sin_port); - dccp_update_gss(sk, dp->dccps_iss); + sk_setup_caps(sk, &rt->dst); - inet->id = dp->dccps_iss ^ jiffies; + dp->dccps_iss = secure_dccp_sequence_number(inet->inet_saddr, + inet->inet_daddr, + inet->inet_sport, + inet->inet_dport); + inet->inet_id = dp->dccps_iss ^ jiffies; err = dccp_connect(sk); rt = NULL; @@ -148,7 +137,7 @@ failure: dccp_set_state(sk, DCCP_CLOSED); ip_rt_put(rt); sk->sk_route_caps = 0; - inet->dport = 0; + inet->inet_dport = 0; goto out; } @@ -172,17 +161,10 @@ static inline void dccp_do_pmtu_discovery(struct sock *sk, if (sk->sk_state == DCCP_LISTEN) return; - /* We don't check in the destentry if pmtu discovery is forbidden - * on this route. We just assume that no packet_to_big packets - * are send back when pmtu discovery is not active. - * There is a small race when the user changes this flag in the - * route, but I think that's acceptable. - */ - if ((dst = __sk_dst_check(sk, 0)) == NULL) + dst = inet_csk_update_pmtu(sk, mtu); + if (!dst) return; - dst->ops->update_pmtu(dst, mtu); - /* Something is about to be wrong... Remember soft error * for the case, if this connection will not able to recover. */ @@ -192,11 +174,12 @@ static inline void dccp_do_pmtu_discovery(struct sock *sk, mtu = dst_mtu(dst); if (inet->pmtudisc != IP_PMTUDISC_DONT && + ip_sk_accept_pmtu(sk) && inet_csk(sk)->icsk_pmtu_cookie > mtu) { dccp_sync_mss(sk, mtu); /* - * From: draft-ietf-dccp-spec-11.txt + * From RFC 4340, sec. 14.1: * * DCCP-Sync packets are the best choice for upward * probing, since DCCP-Sync probes do not risk application @@ -206,86 +189,12 @@ static inline void dccp_do_pmtu_discovery(struct sock *sk, } /* else let the usual retransmit timer handle it */ } -static void dccp_v4_ctl_send_ack(struct sk_buff *rxskb) -{ - int err; - struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh; - const int dccp_hdr_ack_len = sizeof(struct dccp_hdr) + - sizeof(struct dccp_hdr_ext) + - sizeof(struct dccp_hdr_ack_bits); - struct sk_buff *skb; - - if (((struct rtable *)rxskb->dst)->rt_type != RTN_LOCAL) - return; - - skb = alloc_skb(MAX_DCCP_HEADER + 15, GFP_ATOMIC); - if (skb == NULL) - return; - - /* Reserve space for headers. */ - skb_reserve(skb, MAX_DCCP_HEADER); - - skb->dst = dst_clone(rxskb->dst); - - skb->h.raw = skb_push(skb, dccp_hdr_ack_len); - dh = dccp_hdr(skb); - memset(dh, 0, dccp_hdr_ack_len); - - /* Build DCCP header and checksum it. */ - dh->dccph_type = DCCP_PKT_ACK; - dh->dccph_sport = rxdh->dccph_dport; - dh->dccph_dport = rxdh->dccph_sport; - dh->dccph_doff = dccp_hdr_ack_len / 4; - dh->dccph_x = 1; - - dccp_hdr_set_seq(dh, DCCP_SKB_CB(rxskb)->dccpd_ack_seq); - dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), - DCCP_SKB_CB(rxskb)->dccpd_seq); - - bh_lock_sock(dccp_ctl_socket->sk); - err = ip_build_and_send_pkt(skb, dccp_ctl_socket->sk, - rxskb->nh.iph->daddr, - rxskb->nh.iph->saddr, NULL); - bh_unlock_sock(dccp_ctl_socket->sk); - - if (err == NET_XMIT_CN || err == 0) { - DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS); - DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS); - } -} - -static void dccp_v4_reqsk_send_ack(struct sk_buff *skb, - struct request_sock *req) -{ - dccp_v4_ctl_send_ack(skb); -} - -static int dccp_v4_send_response(struct sock *sk, struct request_sock *req, - struct dst_entry *dst) +static void dccp_do_redirect(struct sk_buff *skb, struct sock *sk) { - int err = -1; - struct sk_buff *skb; - - /* First, grab a route. */ - - if (dst == NULL && (dst = inet_csk_route_req(sk, req)) == NULL) - goto out; - - skb = dccp_make_response(sk, dst, req); - if (skb != NULL) { - const struct inet_request_sock *ireq = inet_rsk(req); - - memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); - err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr, - ireq->rmt_addr, - ireq->opt); - if (err == NET_XMIT_CN) - err = 0; - } + struct dst_entry *dst = __sk_dst_check(sk, 0); -out: - dst_release(dst); - return err; + if (dst) + dst->ops->redirect(dst, sk, skb); } /* @@ -300,33 +209,36 @@ out: * check at all. A more general error queue to queue errors for later handling * is probably better. */ -void dccp_v4_err(struct sk_buff *skb, u32 info) +static void dccp_v4_err(struct sk_buff *skb, u32 info) { const struct iphdr *iph = (struct iphdr *)skb->data; - const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + - (iph->ihl << 2)); + const u8 offset = iph->ihl << 2; + const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + offset); struct dccp_sock *dp; struct inet_sock *inet; - const int type = skb->h.icmph->type; - const int code = skb->h.icmph->code; + const int type = icmp_hdr(skb)->type; + const int code = icmp_hdr(skb)->code; struct sock *sk; __u64 seq; int err; + struct net *net = dev_net(skb->dev); - if (skb->len < (iph->ihl << 2) + 8) { - ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); + if (skb->len < offset + sizeof(*dh) || + skb->len < offset + __dccp_basic_hdr_len(dh)) { + ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); return; } - sk = inet_lookup(&dccp_hashinfo, iph->daddr, dh->dccph_dport, - iph->saddr, dh->dccph_sport, inet_iif(skb)); + sk = inet_lookup(net, &dccp_hashinfo, + iph->daddr, dh->dccph_dport, + iph->saddr, dh->dccph_sport, inet_iif(skb)); if (sk == NULL) { - ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); + ICMP_INC_STATS_BH(net, ICMP_MIB_INERRORS); return; } if (sk->sk_state == DCCP_TIME_WAIT) { - inet_twsk_put((struct inet_timewait_sock *)sk); + inet_twsk_put(inet_twsk(sk)); return; } @@ -335,20 +247,23 @@ void dccp_v4_err(struct sk_buff *skb, u32 info) * servers this needs to be solved differently. */ if (sock_owned_by_user(sk)) - NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS); + NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS); if (sk->sk_state == DCCP_CLOSED) goto out; dp = dccp_sk(sk); - seq = dccp_hdr_seq(skb); - if (sk->sk_state != DCCP_LISTEN && - !between48(seq, dp->dccps_swl, dp->dccps_swh)) { - NET_INC_STATS(LINUX_MIB_OUTOFWINDOWICMPS); + seq = dccp_hdr_seq(dh); + if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_LISTEN) && + !between48(seq, dp->dccps_awl, dp->dccps_awh)) { + NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); goto out; } switch (type) { + case ICMP_REDIRECT: + dccp_do_redirect(skb, sk); + goto out; case ICMP_SOURCE_QUENCH: /* Just silently ignore these. */ goto out; @@ -388,10 +303,11 @@ void dccp_v4_err(struct sk_buff *skb, u32 info) * ICMPs are not backlogged, hence we cannot get an established * socket here. */ - BUG_TRAP(!req->sk); + WARN_ON(req->sk); - if (seq != dccp_rsk(req)->dreq_iss) { - NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS); + if (!between48(seq, dccp_rsk(req)->dreq_iss, + dccp_rsk(req)->dreq_gss)) { + NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); goto out; } /* @@ -444,136 +360,33 @@ out: sock_put(sk); } -/* This routine computes an IPv4 DCCP checksum. */ -void dccp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb) +static inline __sum16 dccp_v4_csum_finish(struct sk_buff *skb, + __be32 src, __be32 dst) +{ + return csum_tcpudp_magic(src, dst, skb->len, IPPROTO_DCCP, skb->csum); +} + +void dccp_v4_send_check(struct sock *sk, struct sk_buff *skb) { const struct inet_sock *inet = inet_sk(sk); struct dccp_hdr *dh = dccp_hdr(skb); - dh->dccph_checksum = dccp_v4_checksum(skb, inet->saddr, inet->daddr); + dccp_csum_outgoing(skb); + dh->dccph_checksum = dccp_v4_csum_finish(skb, + inet->inet_saddr, + inet->inet_daddr); } EXPORT_SYMBOL_GPL(dccp_v4_send_check); -int dccp_v4_send_reset(struct sock *sk, enum dccp_reset_codes code) +static inline u64 dccp_v4_init_sequence(const struct sk_buff *skb) { - struct sk_buff *skb; - /* - * FIXME: what if rebuild_header fails? - * Should we be doing a rebuild_header here? - */ - int err = inet_sk_rebuild_header(sk); - - if (err != 0) - return err; - - skb = dccp_make_reset(sk, sk->sk_dst_cache, code); - if (skb != NULL) { - const struct inet_sock *inet = inet_sk(sk); - - memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); - err = ip_build_and_send_pkt(skb, sk, - inet->saddr, inet->daddr, NULL); - if (err == NET_XMIT_CN) - err = 0; - } - - return err; -} - -static inline u64 dccp_v4_init_sequence(const struct sock *sk, - const struct sk_buff *skb) -{ - return secure_dccp_sequence_number(skb->nh.iph->daddr, - skb->nh.iph->saddr, + return secure_dccp_sequence_number(ip_hdr(skb)->daddr, + ip_hdr(skb)->saddr, dccp_hdr(skb)->dccph_dport, dccp_hdr(skb)->dccph_sport); } -int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) -{ - struct inet_request_sock *ireq; - struct dccp_sock dp; - struct request_sock *req; - struct dccp_request_sock *dreq; - const __u32 saddr = skb->nh.iph->saddr; - const __u32 daddr = skb->nh.iph->daddr; - const __u32 service = dccp_hdr_request(skb)->dccph_req_service; - struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); - __u8 reset_code = DCCP_RESET_CODE_TOO_BUSY; - - /* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */ - if (((struct rtable *)skb->dst)->rt_flags & - (RTCF_BROADCAST | RTCF_MULTICAST)) { - reset_code = DCCP_RESET_CODE_NO_CONNECTION; - goto drop; - } - - if (dccp_bad_service_code(sk, service)) { - reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE; - goto drop; - } - /* - * TW buckets are converted to open requests without - * limitations, they conserve resources and peer is - * evidently real one. - */ - if (inet_csk_reqsk_queue_is_full(sk)) - goto drop; - - /* - * Accept backlog is full. If we have already queued enough - * of warm entries in syn queue, drop request. It is better than - * clogging syn queue with openreqs with exponentially increasing - * timeout. - */ - if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) - goto drop; - - req = reqsk_alloc(sk->sk_prot->rsk_prot); - if (req == NULL) - goto drop; - - /* FIXME: process options */ - - dccp_openreq_init(req, &dp, skb); - - ireq = inet_rsk(req); - ireq->loc_addr = daddr; - ireq->rmt_addr = saddr; - req->rcv_wnd = 100; /* Fake, option parsing will get the - right value */ - ireq->opt = NULL; - - /* - * Step 3: Process LISTEN state - * - * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie - * - * In fact we defer setting S.GSR, S.SWL, S.SWH to - * dccp_create_openreq_child. - */ - dreq = dccp_rsk(req); - dreq->dreq_isr = dcb->dccpd_seq; - dreq->dreq_iss = dccp_v4_init_sequence(sk, skb); - dreq->dreq_service = service; - - if (dccp_v4_send_response(sk, req, NULL)) - goto drop_and_free; - - inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); - return 0; - -drop_and_free: - reqsk_free(req); -drop: - DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); - dcb->dccpd_reset_code = reset_code; - return -1; -} - -EXPORT_SYMBOL_GPL(dccp_v4_conn_request); - /* * The three way handshake has completed - we got a valid ACK or DATAACK - * now create the new socket. @@ -586,46 +399,50 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb, { struct inet_request_sock *ireq; struct inet_sock *newinet; - struct dccp_sock *newdp; struct sock *newsk; if (sk_acceptq_is_full(sk)) goto exit_overflow; - if (dst == NULL && (dst = inet_csk_route_req(sk, req)) == NULL) - goto exit; - newsk = dccp_create_openreq_child(sk, req, skb); if (newsk == NULL) - goto exit; + goto exit_nonewsk; - sk_setup_caps(newsk, dst); - - newdp = dccp_sk(newsk); newinet = inet_sk(newsk); ireq = inet_rsk(req); - newinet->daddr = ireq->rmt_addr; - newinet->rcv_saddr = ireq->loc_addr; - newinet->saddr = ireq->loc_addr; - newinet->opt = ireq->opt; + newinet->inet_daddr = ireq->ir_rmt_addr; + newinet->inet_rcv_saddr = ireq->ir_loc_addr; + newinet->inet_saddr = ireq->ir_loc_addr; + newinet->inet_opt = ireq->opt; ireq->opt = NULL; newinet->mc_index = inet_iif(skb); - newinet->mc_ttl = skb->nh.iph->ttl; - newinet->id = jiffies; + newinet->mc_ttl = ip_hdr(skb)->ttl; + newinet->inet_id = jiffies; + + if (dst == NULL && (dst = inet_csk_route_child_sock(sk, newsk, req)) == NULL) + goto put_and_exit; + + sk_setup_caps(newsk, dst); dccp_sync_mss(newsk, dst_mtu(dst)); - __inet_hash(&dccp_hashinfo, newsk, 0); - __inet_inherit_port(&dccp_hashinfo, sk, newsk); + if (__inet_inherit_port(sk, newsk) < 0) + goto put_and_exit; + __inet_hash_nolisten(newsk, NULL); return newsk; exit_overflow: - NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS); -exit: - NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); +exit_nonewsk: dst_release(dst); +exit: + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); return NULL; +put_and_exit: + inet_csk_prepare_forced_close(newsk); + dccp_done(newsk); + goto exit; } EXPORT_SYMBOL_GPL(dccp_v4_request_recv_sock); @@ -633,7 +450,7 @@ EXPORT_SYMBOL_GPL(dccp_v4_request_recv_sock); static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) { const struct dccp_hdr *dh = dccp_hdr(skb); - const struct iphdr *iph = skb->nh.iph; + const struct iphdr *iph = ip_hdr(skb); struct sock *nsk; struct request_sock **prev; /* Find possible connection requests. */ @@ -643,147 +460,111 @@ static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) if (req != NULL) return dccp_check_req(sk, skb, req, prev); - nsk = __inet_lookup_established(&dccp_hashinfo, - iph->saddr, dh->dccph_sport, - iph->daddr, ntohs(dh->dccph_dport), - inet_iif(skb)); + nsk = inet_lookup_established(sock_net(sk), &dccp_hashinfo, + iph->saddr, dh->dccph_sport, + iph->daddr, dh->dccph_dport, + inet_iif(skb)); if (nsk != NULL) { if (nsk->sk_state != DCCP_TIME_WAIT) { bh_lock_sock(nsk); return nsk; } - inet_twsk_put((struct inet_timewait_sock *)nsk); + inet_twsk_put(inet_twsk(nsk)); return NULL; } return sk; } -int dccp_v4_checksum(const struct sk_buff *skb, const u32 saddr, - const u32 daddr) +static struct dst_entry* dccp_v4_route_skb(struct net *net, struct sock *sk, + struct sk_buff *skb) { - const struct dccp_hdr* dh = dccp_hdr(skb); - int checksum_len; - u32 tmp; - - if (dh->dccph_cscov == 0) - checksum_len = skb->len; - else { - checksum_len = (dh->dccph_cscov + dh->dccph_x) * sizeof(u32); - checksum_len = checksum_len < skb->len ? checksum_len : - skb->len; + struct rtable *rt; + const struct iphdr *iph = ip_hdr(skb); + struct flowi4 fl4 = { + .flowi4_oif = inet_iif(skb), + .daddr = iph->saddr, + .saddr = iph->daddr, + .flowi4_tos = RT_CONN_FLAGS(sk), + .flowi4_proto = sk->sk_protocol, + .fl4_sport = dccp_hdr(skb)->dccph_dport, + .fl4_dport = dccp_hdr(skb)->dccph_sport, + }; + + security_skb_classify_flow(skb, flowi4_to_flowi(&fl4)); + rt = ip_route_output_flow(net, &fl4, sk); + if (IS_ERR(rt)) { + IP_INC_STATS_BH(net, IPSTATS_MIB_OUTNOROUTES); + return NULL; } - tmp = csum_partial((unsigned char *)dh, checksum_len, 0); - return csum_tcpudp_magic(saddr, daddr, checksum_len, - IPPROTO_DCCP, tmp); + return &rt->dst; } -static int dccp_v4_verify_checksum(struct sk_buff *skb, - const u32 saddr, const u32 daddr) +static int dccp_v4_send_response(struct sock *sk, struct request_sock *req) { - struct dccp_hdr *dh = dccp_hdr(skb); - int checksum_len; - u32 tmp; - - if (dh->dccph_cscov == 0) - checksum_len = skb->len; - else { - checksum_len = (dh->dccph_cscov + dh->dccph_x) * sizeof(u32); - checksum_len = checksum_len < skb->len ? checksum_len : - skb->len; - } - tmp = csum_partial((unsigned char *)dh, checksum_len, 0); - return csum_tcpudp_magic(saddr, daddr, checksum_len, - IPPROTO_DCCP, tmp) == 0 ? 0 : -1; -} + int err = -1; + struct sk_buff *skb; + struct dst_entry *dst; + struct flowi4 fl4; -static struct dst_entry* dccp_v4_route_skb(struct sock *sk, - struct sk_buff *skb) -{ - struct rtable *rt; - struct flowi fl = { .oif = ((struct rtable *)skb->dst)->rt_iif, - .nl_u = { .ip4_u = - { .daddr = skb->nh.iph->saddr, - .saddr = skb->nh.iph->daddr, - .tos = RT_CONN_FLAGS(sk) } }, - .proto = sk->sk_protocol, - .uli_u = { .ports = - { .sport = dccp_hdr(skb)->dccph_dport, - .dport = dccp_hdr(skb)->dccph_sport } - } - }; - - if (ip_route_output_flow(&rt, &fl, sk, 0)) { - IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); - return NULL; + dst = inet_csk_route_req(sk, &fl4, req); + if (dst == NULL) + goto out; + + skb = dccp_make_response(sk, dst, req); + if (skb != NULL) { + const struct inet_request_sock *ireq = inet_rsk(req); + struct dccp_hdr *dh = dccp_hdr(skb); + + dh->dccph_checksum = dccp_v4_csum_finish(skb, ireq->ir_loc_addr, + ireq->ir_rmt_addr); + err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr, + ireq->ir_rmt_addr, + ireq->opt); + err = net_xmit_eval(err); } - return &rt->u.dst; +out: + dst_release(dst); + return err; } -static void dccp_v4_ctl_send_reset(struct sk_buff *rxskb) +static void dccp_v4_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb) { int err; - struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh; - const int dccp_hdr_reset_len = sizeof(struct dccp_hdr) + - sizeof(struct dccp_hdr_ext) + - sizeof(struct dccp_hdr_reset); + const struct iphdr *rxiph; struct sk_buff *skb; struct dst_entry *dst; - u64 seqno; + struct net *net = dev_net(skb_dst(rxskb)->dev); + struct sock *ctl_sk = net->dccp.v4_ctl_sk; /* Never send a reset in response to a reset. */ - if (rxdh->dccph_type == DCCP_PKT_RESET) + if (dccp_hdr(rxskb)->dccph_type == DCCP_PKT_RESET) return; - if (((struct rtable *)rxskb->dst)->rt_type != RTN_LOCAL) + if (skb_rtable(rxskb)->rt_type != RTN_LOCAL) return; - dst = dccp_v4_route_skb(dccp_ctl_socket->sk, rxskb); + dst = dccp_v4_route_skb(net, ctl_sk, rxskb); if (dst == NULL) return; - skb = alloc_skb(MAX_DCCP_HEADER + 15, GFP_ATOMIC); + skb = dccp_ctl_make_reset(ctl_sk, rxskb); if (skb == NULL) goto out; - /* Reserve space for headers. */ - skb_reserve(skb, MAX_DCCP_HEADER); - skb->dst = dst_clone(dst); + rxiph = ip_hdr(rxskb); + dccp_hdr(skb)->dccph_checksum = dccp_v4_csum_finish(skb, rxiph->saddr, + rxiph->daddr); + skb_dst_set(skb, dst_clone(dst)); - skb->h.raw = skb_push(skb, dccp_hdr_reset_len); - dh = dccp_hdr(skb); - memset(dh, 0, dccp_hdr_reset_len); - - /* Build DCCP header and checksum it. */ - dh->dccph_type = DCCP_PKT_RESET; - dh->dccph_sport = rxdh->dccph_dport; - dh->dccph_dport = rxdh->dccph_sport; - dh->dccph_doff = dccp_hdr_reset_len / 4; - dh->dccph_x = 1; - dccp_hdr_reset(skb)->dccph_reset_code = - DCCP_SKB_CB(rxskb)->dccpd_reset_code; - - /* See "8.3.1. Abnormal Termination" in draft-ietf-dccp-spec-11 */ - seqno = 0; - if (DCCP_SKB_CB(rxskb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ) - dccp_set_seqno(&seqno, DCCP_SKB_CB(rxskb)->dccpd_ack_seq + 1); - - dccp_hdr_set_seq(dh, seqno); - dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), - DCCP_SKB_CB(rxskb)->dccpd_seq); - - dh->dccph_checksum = dccp_v4_checksum(skb, rxskb->nh.iph->saddr, - rxskb->nh.iph->daddr); - - bh_lock_sock(dccp_ctl_socket->sk); - err = ip_build_and_send_pkt(skb, dccp_ctl_socket->sk, - rxskb->nh.iph->daddr, - rxskb->nh.iph->saddr, NULL); - bh_unlock_sock(dccp_ctl_socket->sk); - - if (err == NET_XMIT_CN || err == 0) { + bh_lock_sock(ctl_sk); + err = ip_build_and_send_pkt(skb, ctl_sk, + rxiph->daddr, rxiph->saddr, NULL); + bh_unlock_sock(ctl_sk); + + if (net_xmit_eval(err) == 0) { DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS); DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS); } @@ -791,6 +572,107 @@ out: dst_release(dst); } +static void dccp_v4_reqsk_destructor(struct request_sock *req) +{ + dccp_feat_list_purge(&dccp_rsk(req)->dreq_featneg); + kfree(inet_rsk(req)->opt); +} + +void dccp_syn_ack_timeout(struct sock *sk, struct request_sock *req) +{ +} +EXPORT_SYMBOL(dccp_syn_ack_timeout); + +static struct request_sock_ops dccp_request_sock_ops __read_mostly = { + .family = PF_INET, + .obj_size = sizeof(struct dccp_request_sock), + .rtx_syn_ack = dccp_v4_send_response, + .send_ack = dccp_reqsk_send_ack, + .destructor = dccp_v4_reqsk_destructor, + .send_reset = dccp_v4_ctl_send_reset, + .syn_ack_timeout = dccp_syn_ack_timeout, +}; + +int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) +{ + struct inet_request_sock *ireq; + struct request_sock *req; + struct dccp_request_sock *dreq; + const __be32 service = dccp_hdr_request(skb)->dccph_req_service; + struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb); + + /* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */ + if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) + return 0; /* discard, don't send a reset here */ + + if (dccp_bad_service_code(sk, service)) { + dcb->dccpd_reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE; + goto drop; + } + /* + * TW buckets are converted to open requests without + * limitations, they conserve resources and peer is + * evidently real one. + */ + dcb->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY; + if (inet_csk_reqsk_queue_is_full(sk)) + goto drop; + + /* + * Accept backlog is full. If we have already queued enough + * of warm entries in syn queue, drop request. It is better than + * clogging syn queue with openreqs with exponentially increasing + * timeout. + */ + if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1) + goto drop; + + req = inet_reqsk_alloc(&dccp_request_sock_ops); + if (req == NULL) + goto drop; + + if (dccp_reqsk_init(req, dccp_sk(sk), skb)) + goto drop_and_free; + + dreq = dccp_rsk(req); + if (dccp_parse_options(sk, dreq, skb)) + goto drop_and_free; + + if (security_inet_conn_request(sk, skb, req)) + goto drop_and_free; + + ireq = inet_rsk(req); + ireq->ir_loc_addr = ip_hdr(skb)->daddr; + ireq->ir_rmt_addr = ip_hdr(skb)->saddr; + + /* + * Step 3: Process LISTEN state + * + * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie + * + * Setting S.SWL/S.SWH to is deferred to dccp_create_openreq_child(). + */ + dreq->dreq_isr = dcb->dccpd_seq; + dreq->dreq_gsr = dreq->dreq_isr; + dreq->dreq_iss = dccp_v4_init_sequence(skb); + dreq->dreq_gss = dreq->dreq_iss; + dreq->dreq_service = service; + + if (dccp_v4_send_response(sk, req)) + goto drop_and_free; + + inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); + return 0; + +drop_and_free: + reqsk_free(req); +drop: + DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS); + return -1; +} + +EXPORT_SYMBOL_GPL(dccp_v4_conn_request); + int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) { struct dccp_hdr *dh = dccp_hdr(skb); @@ -803,24 +685,23 @@ int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) /* * Step 3: Process LISTEN state - * If S.state == LISTEN, - * If P.type == Request or P contains a valid Init Cookie - * option, - * * Must scan the packet's options to check for an Init - * Cookie. Only the Init Cookie is processed here, - * however; other options are processed in Step 8. This - * scan need only be performed if the endpoint uses Init - * Cookies * - * * Generate a new socket and switch to that socket * - * Set S := new socket for this port pair - * S.state = RESPOND - * Choose S.ISS (initial seqno) or set from Init Cookie - * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie - * Continue with S.state == RESPOND - * * A Response packet will be generated in Step 11 * - * Otherwise, - * Generate Reset(No Connection) unless P.type == Reset - * Drop packet and return + * If P.type == Request or P contains a valid Init Cookie option, + * (* Must scan the packet's options to check for Init + * Cookies. Only Init Cookies are processed here, + * however; other options are processed in Step 8. This + * scan need only be performed if the endpoint uses Init + * Cookies *) + * (* Generate a new socket and switch to that socket *) + * Set S := new socket for this port pair + * S.state = RESPOND + * Choose S.ISS (initial seqno) or set from Init Cookies + * Initialize S.GAR := S.ISS + * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookies + * Continue with S.state == RESPOND + * (* A Response packet will be generated in Step 11 *) + * Otherwise, + * Generate Reset(No Connection) unless P.type == Reset + * Drop packet and return * * NOTE: the check for the packet types is done in * dccp_rcv_state_process @@ -843,7 +724,7 @@ int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) return 0; reset: - dccp_v4_ctl_send_reset(skb); + dccp_v4_ctl_send_reset(sk, skb); discard: kfree_skb(skb); return 0; @@ -851,95 +732,108 @@ discard: EXPORT_SYMBOL_GPL(dccp_v4_do_rcv); +/** + * dccp_invalid_packet - check for malformed packets + * Implements RFC 4340, 8.5: Step 1: Check header basics + * Packets that fail these checks are ignored and do not receive Resets. + */ int dccp_invalid_packet(struct sk_buff *skb) { const struct dccp_hdr *dh; + unsigned int cscov; if (skb->pkt_type != PACKET_HOST) return 1; + /* If the packet is shorter than 12 bytes, drop packet and return */ if (!pskb_may_pull(skb, sizeof(struct dccp_hdr))) { - LIMIT_NETDEBUG(KERN_WARNING "DCCP: pskb_may_pull failed\n"); + DCCP_WARN("pskb_may_pull failed\n"); return 1; } dh = dccp_hdr(skb); - /* If the packet type is not understood, drop packet and return */ + /* If P.type is not understood, drop packet and return */ if (dh->dccph_type >= DCCP_PKT_INVALID) { - LIMIT_NETDEBUG(KERN_WARNING "DCCP: invalid packet type\n"); + DCCP_WARN("invalid packet type\n"); return 1; } /* - * If P.Data Offset is too small for packet type, or too large for - * packet, drop packet and return + * If P.Data Offset is too small for packet type, drop packet and return */ if (dh->dccph_doff < dccp_hdr_len(skb) / sizeof(u32)) { - LIMIT_NETDEBUG(KERN_WARNING "DCCP: P.Data Offset(%u) " - "too small 1\n", - dh->dccph_doff); + DCCP_WARN("P.Data Offset(%u) too small\n", dh->dccph_doff); return 1; } - + /* + * If P.Data Offset is too too large for packet, drop packet and return + */ if (!pskb_may_pull(skb, dh->dccph_doff * sizeof(u32))) { - LIMIT_NETDEBUG(KERN_WARNING "DCCP: P.Data Offset(%u) " - "too small 2\n", - dh->dccph_doff); + DCCP_WARN("P.Data Offset(%u) too large\n", dh->dccph_doff); return 1; } - dh = dccp_hdr(skb); - /* * If P.type is not Data, Ack, or DataAck and P.X == 0 (the packet * has short sequence numbers), drop packet and return */ - if (dh->dccph_x == 0 && - dh->dccph_type != DCCP_PKT_DATA && - dh->dccph_type != DCCP_PKT_ACK && - dh->dccph_type != DCCP_PKT_DATAACK) { - LIMIT_NETDEBUG(KERN_WARNING "DCCP: P.type (%s) not Data, Ack " - "nor DataAck and P.X == 0\n", - dccp_packet_name(dh->dccph_type)); + if ((dh->dccph_type < DCCP_PKT_DATA || + dh->dccph_type > DCCP_PKT_DATAACK) && dh->dccph_x == 0) { + DCCP_WARN("P.type (%s) not Data || [Data]Ack, while P.X == 0\n", + dccp_packet_name(dh->dccph_type)); return 1; } + /* + * If P.CsCov is too large for the packet size, drop packet and return. + * This must come _before_ checksumming (not as RFC 4340 suggests). + */ + cscov = dccp_csum_coverage(skb); + if (cscov > skb->len) { + DCCP_WARN("P.CsCov %u exceeds packet length %d\n", + dh->dccph_cscov, skb->len); + return 1; + } + + /* If header checksum is incorrect, drop packet and return. + * (This step is completed in the AF-dependent functions.) */ + skb->csum = skb_checksum(skb, 0, cscov, 0); + return 0; } EXPORT_SYMBOL_GPL(dccp_invalid_packet); /* this is called when real data arrives */ -int dccp_v4_rcv(struct sk_buff *skb) +static int dccp_v4_rcv(struct sk_buff *skb) { const struct dccp_hdr *dh; + const struct iphdr *iph; struct sock *sk; + int min_cov; - /* Step 1: Check header basics: */ + /* Step 1: Check header basics */ if (dccp_invalid_packet(skb)) goto discard_it; - /* If the header checksum is incorrect, drop packet and return */ - if (dccp_v4_verify_checksum(skb, skb->nh.iph->saddr, - skb->nh.iph->daddr) < 0) { - LIMIT_NETDEBUG(KERN_WARNING "%s: incorrect header checksum\n", - __FUNCTION__); + iph = ip_hdr(skb); + /* Step 1: If header checksum is incorrect, drop packet and return */ + if (dccp_v4_csum_finish(skb, iph->saddr, iph->daddr)) { + DCCP_WARN("dropped packet with invalid checksum\n"); goto discard_it; } dh = dccp_hdr(skb); - DCCP_SKB_CB(skb)->dccpd_seq = dccp_hdr_seq(skb); + DCCP_SKB_CB(skb)->dccpd_seq = dccp_hdr_seq(dh); DCCP_SKB_CB(skb)->dccpd_type = dh->dccph_type; - dccp_pr_debug("%8.8s " - "src=%u.%u.%u.%u@%-5d " - "dst=%u.%u.%u.%u@%-5d seq=%llu", + dccp_pr_debug("%8.8s src=%pI4@%-5d dst=%pI4@%-5d seq=%llu", dccp_packet_name(dh->dccph_type), - NIPQUAD(skb->nh.iph->saddr), ntohs(dh->dccph_sport), - NIPQUAD(skb->nh.iph->daddr), ntohs(dh->dccph_dport), + &iph->saddr, ntohs(dh->dccph_sport), + &iph->daddr, ntohs(dh->dccph_dport), (unsigned long long) DCCP_SKB_CB(skb)->dccpd_seq); if (dccp_packet_without_ack(skb)) { @@ -947,23 +841,17 @@ int dccp_v4_rcv(struct sk_buff *skb) dccp_pr_debug_cat("\n"); } else { DCCP_SKB_CB(skb)->dccpd_ack_seq = dccp_hdr_ack_seq(skb); - dccp_pr_debug_cat(", ack=%llu\n", - (unsigned long long) + dccp_pr_debug_cat(", ack=%llu\n", (unsigned long long) DCCP_SKB_CB(skb)->dccpd_ack_seq); } /* Step 2: - * Look up flow ID in table and get corresponding socket */ - sk = __inet_lookup(&dccp_hashinfo, - skb->nh.iph->saddr, dh->dccph_sport, - skb->nh.iph->daddr, ntohs(dh->dccph_dport), - inet_iif(skb)); - - /* + * Look up flow ID in table and get corresponding socket */ + sk = __inet_lookup_skb(&dccp_hashinfo, skb, + dh->dccph_sport, dh->dccph_dport); + /* * Step 2: - * If no socket ... - * Generate Reset(No Connection) unless P.type == Reset - * Drop packet and return + * If no socket ... */ if (sk == NULL) { dccp_pr_debug("failed to look up flow ID in table and " @@ -971,177 +859,100 @@ int dccp_v4_rcv(struct sk_buff *skb) goto no_dccp_socket; } - /* + /* * Step 2: - * ... or S.state == TIMEWAIT, + * ... or S.state == TIMEWAIT, * Generate Reset(No Connection) unless P.type == Reset * Drop packet and return */ - if (sk->sk_state == DCCP_TIME_WAIT) { - dccp_pr_debug("sk->sk_state == DCCP_TIME_WAIT: " - "do_time_wait\n"); - goto do_time_wait; + dccp_pr_debug("sk->sk_state == DCCP_TIME_WAIT: do_time_wait\n"); + inet_twsk_put(inet_twsk(sk)); + goto no_dccp_socket; + } + + /* + * RFC 4340, sec. 9.2.1: Minimum Checksum Coverage + * o if MinCsCov = 0, only packets with CsCov = 0 are accepted + * o if MinCsCov > 0, also accept packets with CsCov >= MinCsCov + */ + min_cov = dccp_sk(sk)->dccps_pcrlen; + if (dh->dccph_cscov && (min_cov == 0 || dh->dccph_cscov < min_cov)) { + dccp_pr_debug("Packet CsCov %d does not satisfy MinCsCov %d\n", + dh->dccph_cscov, min_cov); + /* FIXME: "Such packets SHOULD be reported using Data Dropped + * options (Section 11.7) with Drop Code 0, Protocol + * Constraints." */ + goto discard_and_relse; } if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_and_relse; nf_reset(skb); - return sk_receive_skb(sk, skb); + return sk_receive_skb(sk, skb, 1); no_dccp_socket: if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) goto discard_it; /* * Step 2: + * If no socket ... * Generate Reset(No Connection) unless P.type == Reset * Drop packet and return */ if (dh->dccph_type != DCCP_PKT_RESET) { DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION; - dccp_v4_ctl_send_reset(skb); + dccp_v4_ctl_send_reset(sk, skb); } discard_it: - /* Discard frame. */ kfree_skb(skb); return 0; discard_and_relse: sock_put(sk); goto discard_it; - -do_time_wait: - inet_twsk_put((struct inet_timewait_sock *)sk); - goto no_dccp_socket; } -struct inet_connection_sock_af_ops dccp_ipv4_af_ops = { - .queue_xmit = ip_queue_xmit, - .send_check = dccp_v4_send_check, - .rebuild_header = inet_sk_rebuild_header, - .conn_request = dccp_v4_conn_request, - .syn_recv_sock = dccp_v4_request_recv_sock, - .net_header_len = sizeof(struct iphdr), - .setsockopt = ip_setsockopt, - .getsockopt = ip_getsockopt, - .addr2sockaddr = inet_csk_addr2sockaddr, - .sockaddr_len = sizeof(struct sockaddr_in), +static const struct inet_connection_sock_af_ops dccp_ipv4_af_ops = { + .queue_xmit = ip_queue_xmit, + .send_check = dccp_v4_send_check, + .rebuild_header = inet_sk_rebuild_header, + .conn_request = dccp_v4_conn_request, + .syn_recv_sock = dccp_v4_request_recv_sock, + .net_header_len = sizeof(struct iphdr), + .setsockopt = ip_setsockopt, + .getsockopt = ip_getsockopt, + .addr2sockaddr = inet_csk_addr2sockaddr, + .sockaddr_len = sizeof(struct sockaddr_in), + .bind_conflict = inet_csk_bind_conflict, +#ifdef CONFIG_COMPAT + .compat_setsockopt = compat_ip_setsockopt, + .compat_getsockopt = compat_ip_getsockopt, +#endif }; -int dccp_v4_init_sock(struct sock *sk) +static int dccp_v4_init_sock(struct sock *sk) { - struct dccp_sock *dp = dccp_sk(sk); - struct inet_connection_sock *icsk = inet_csk(sk); - static int dccp_ctl_socket_init = 1; - - dccp_options_init(&dp->dccps_options); - do_gettimeofday(&dp->dccps_epoch); + static __u8 dccp_v4_ctl_sock_initialized; + int err = dccp_init_sock(sk, dccp_v4_ctl_sock_initialized); - if (dp->dccps_options.dccpo_send_ack_vector) { - dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(DCCP_MAX_ACKVEC_LEN, - GFP_KERNEL); - if (dp->dccps_hc_rx_ackvec == NULL) - return -ENOMEM; + if (err == 0) { + if (unlikely(!dccp_v4_ctl_sock_initialized)) + dccp_v4_ctl_sock_initialized = 1; + inet_csk(sk)->icsk_af_ops = &dccp_ipv4_af_ops; } - /* - * FIXME: We're hardcoding the CCID, and doing this at this point makes - * the listening (master) sock get CCID control blocks, which is not - * necessary, but for now, to not mess with the test userspace apps, - * lets leave it here, later the real solution is to do this in a - * setsockopt(CCIDs-I-want/accept). -acme - */ - if (likely(!dccp_ctl_socket_init)) { - dp->dccps_hc_rx_ccid = ccid_init(dp->dccps_options.dccpo_rx_ccid, - sk); - dp->dccps_hc_tx_ccid = ccid_init(dp->dccps_options.dccpo_tx_ccid, - sk); - if (dp->dccps_hc_rx_ccid == NULL || - dp->dccps_hc_tx_ccid == NULL) { - ccid_exit(dp->dccps_hc_rx_ccid, sk); - ccid_exit(dp->dccps_hc_tx_ccid, sk); - if (dp->dccps_options.dccpo_send_ack_vector) { - dccp_ackvec_free(dp->dccps_hc_rx_ackvec); - dp->dccps_hc_rx_ackvec = NULL; - } - dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL; - return -ENOMEM; - } - } else - dccp_ctl_socket_init = 0; - - dccp_init_xmit_timers(sk); - icsk->icsk_rto = DCCP_TIMEOUT_INIT; - sk->sk_state = DCCP_CLOSED; - sk->sk_write_space = dccp_write_space; - icsk->icsk_af_ops = &dccp_ipv4_af_ops; - icsk->icsk_sync_mss = dccp_sync_mss; - dp->dccps_mss_cache = 536; - dp->dccps_role = DCCP_ROLE_UNDEFINED; - dp->dccps_service = DCCP_SERVICE_INVALID_VALUE; - - return 0; -} - -EXPORT_SYMBOL_GPL(dccp_v4_init_sock); - -int dccp_v4_destroy_sock(struct sock *sk) -{ - struct dccp_sock *dp = dccp_sk(sk); - - /* - * DCCP doesn't use sk_write_queue, just sk_send_head - * for retransmissions - */ - if (sk->sk_send_head != NULL) { - kfree_skb(sk->sk_send_head); - sk->sk_send_head = NULL; - } - - /* Clean up a referenced DCCP bind bucket. */ - if (inet_csk(sk)->icsk_bind_hash != NULL) - inet_put_port(&dccp_hashinfo, sk); - - kfree(dp->dccps_service_list); - dp->dccps_service_list = NULL; - - ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk); - ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk); - if (dp->dccps_options.dccpo_send_ack_vector) { - dccp_ackvec_free(dp->dccps_hc_rx_ackvec); - dp->dccps_hc_rx_ackvec = NULL; - } - ccid_exit(dp->dccps_hc_rx_ccid, sk); - ccid_exit(dp->dccps_hc_tx_ccid, sk); - dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL; - - return 0; -} - -EXPORT_SYMBOL_GPL(dccp_v4_destroy_sock); - -static void dccp_v4_reqsk_destructor(struct request_sock *req) -{ - kfree(inet_rsk(req)->opt); + return err; } -static struct request_sock_ops dccp_request_sock_ops = { - .family = PF_INET, - .obj_size = sizeof(struct dccp_request_sock), - .rtx_syn_ack = dccp_v4_send_response, - .send_ack = dccp_v4_reqsk_send_ack, - .destructor = dccp_v4_reqsk_destructor, - .send_reset = dccp_v4_ctl_send_reset, -}; - static struct timewait_sock_ops dccp_timewait_sock_ops = { .twsk_obj_size = sizeof(struct inet_timewait_sock), }; -struct proto dccp_prot = { +static struct proto dccp_v4_prot = { .name = "DCCP", .owner = THIS_MODULE, .close = dccp_close, @@ -1154,17 +965,131 @@ struct proto dccp_prot = { .sendmsg = dccp_sendmsg, .recvmsg = dccp_recvmsg, .backlog_rcv = dccp_v4_do_rcv, - .hash = dccp_v4_hash, - .unhash = dccp_unhash, + .hash = inet_hash, + .unhash = inet_unhash, .accept = inet_csk_accept, - .get_port = dccp_v4_get_port, + .get_port = inet_csk_get_port, .shutdown = dccp_shutdown, - .destroy = dccp_v4_destroy_sock, + .destroy = dccp_destroy_sock, .orphan_count = &dccp_orphan_count, .max_header = MAX_DCCP_HEADER, .obj_size = sizeof(struct dccp_sock), + .slab_flags = SLAB_DESTROY_BY_RCU, .rsk_prot = &dccp_request_sock_ops, .twsk_prot = &dccp_timewait_sock_ops, + .h.hashinfo = &dccp_hashinfo, +#ifdef CONFIG_COMPAT + .compat_setsockopt = compat_dccp_setsockopt, + .compat_getsockopt = compat_dccp_getsockopt, +#endif +}; + +static const struct net_protocol dccp_v4_protocol = { + .handler = dccp_v4_rcv, + .err_handler = dccp_v4_err, + .no_policy = 1, + .netns_ok = 1, + .icmp_strict_tag_validation = 1, +}; + +static const struct proto_ops inet_dccp_ops = { + .family = PF_INET, + .owner = THIS_MODULE, + .release = inet_release, + .bind = inet_bind, + .connect = inet_stream_connect, + .socketpair = sock_no_socketpair, + .accept = inet_accept, + .getname = inet_getname, + /* FIXME: work on tcp_poll to rename it to inet_csk_poll */ + .poll = dccp_poll, + .ioctl = inet_ioctl, + /* FIXME: work on inet_listen to rename it to sock_common_listen */ + .listen = inet_dccp_listen, + .shutdown = inet_shutdown, + .setsockopt = sock_common_setsockopt, + .getsockopt = sock_common_getsockopt, + .sendmsg = inet_sendmsg, + .recvmsg = sock_common_recvmsg, + .mmap = sock_no_mmap, + .sendpage = sock_no_sendpage, +#ifdef CONFIG_COMPAT + .compat_setsockopt = compat_sock_common_setsockopt, + .compat_getsockopt = compat_sock_common_getsockopt, +#endif +}; + +static struct inet_protosw dccp_v4_protosw = { + .type = SOCK_DCCP, + .protocol = IPPROTO_DCCP, + .prot = &dccp_v4_prot, + .ops = &inet_dccp_ops, + .flags = INET_PROTOSW_ICSK, +}; + +static int __net_init dccp_v4_init_net(struct net *net) +{ + if (dccp_hashinfo.bhash == NULL) + return -ESOCKTNOSUPPORT; + + return inet_ctl_sock_create(&net->dccp.v4_ctl_sk, PF_INET, + SOCK_DCCP, IPPROTO_DCCP, net); +} + +static void __net_exit dccp_v4_exit_net(struct net *net) +{ + inet_ctl_sock_destroy(net->dccp.v4_ctl_sk); +} + +static struct pernet_operations dccp_v4_ops = { + .init = dccp_v4_init_net, + .exit = dccp_v4_exit_net, }; -EXPORT_SYMBOL_GPL(dccp_prot); +static int __init dccp_v4_init(void) +{ + int err = proto_register(&dccp_v4_prot, 1); + + if (err != 0) + goto out; + + err = inet_add_protocol(&dccp_v4_protocol, IPPROTO_DCCP); + if (err != 0) + goto out_proto_unregister; + + inet_register_protosw(&dccp_v4_protosw); + + err = register_pernet_subsys(&dccp_v4_ops); + if (err) + goto out_destroy_ctl_sock; +out: + return err; +out_destroy_ctl_sock: + inet_unregister_protosw(&dccp_v4_protosw); + inet_del_protocol(&dccp_v4_protocol, IPPROTO_DCCP); +out_proto_unregister: + proto_unregister(&dccp_v4_prot); + goto out; +} + +static void __exit dccp_v4_exit(void) +{ + unregister_pernet_subsys(&dccp_v4_ops); + inet_unregister_protosw(&dccp_v4_protosw); + inet_del_protocol(&dccp_v4_protocol, IPPROTO_DCCP); + proto_unregister(&dccp_v4_prot); +} + +module_init(dccp_v4_init); +module_exit(dccp_v4_exit); + +/* + * __stringify doesn't likes enums, so use SOCK_DCCP (6) and IPPROTO_DCCP (33) + * values directly, Also cover the case where the protocol is not specified, + * i.e. net-pf-PF_INET-proto-0-type-SOCK_DCCP + */ +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 33, 6); +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_INET, 0, 6); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@mandriva.com>"); +MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol"); |
