diff options
-rw-r--r-- | include/linux/udp.h | 10 | ||||
-rw-r--r-- | include/net/ipv6.h | 5 | ||||
-rw-r--r-- | include/net/transp_v6.h | 5 | ||||
-rw-r--r-- | include/net/udplite.h | 9 | ||||
-rw-r--r-- | net/ipv4/Kconfig | 10 | ||||
-rw-r--r-- | net/ipv4/Makefile | 3 | ||||
-rw-r--r-- | net/ipv4/af_inet.c | 7 | ||||
-rw-r--r-- | net/ipv4/proc.c | 5 | ||||
-rw-r--r-- | net/ipv4/udp.c | 1090 | ||||
-rw-r--r-- | net/ipv4/udp_ipv4.c | 1134 | ||||
-rw-r--r-- | net/ipv4/udplite.c (renamed from net/ipv4/udplite_ipv4.c) | 0 | ||||
-rw-r--r-- | net/ipv6/Makefile | 3 | ||||
-rw-r--r-- | net/ipv6/af_inet6.c | 14 | ||||
-rw-r--r-- | net/ipv6/ipv6_sockglue.c | 6 | ||||
-rw-r--r-- | net/ipv6/proc.c | 6 | ||||
-rw-r--r-- | net/ipv6/udp.c (renamed from net/ipv6/udp_ipv6.c) | 16 | ||||
-rw-r--r-- | net/ipv6/udplite.c (renamed from net/ipv6/udplite_ipv6.c) | 0 |
17 files changed, 1101 insertions, 1222 deletions
diff --git a/include/linux/udp.h b/include/linux/udp.h index 4144664d69d..1e7b7cb5703 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -70,10 +70,8 @@ struct udp_sock { #define UDPLITE_BIT 0x1 /* set by udplite proto init function */ #define UDPLITE_SEND_CC 0x2 /* set via udplite setsockopt */ #define UDPLITE_RECV_CC 0x4 /* set via udplite setsocktopt */ -#ifdef CONFIG_IP_UDPLITE __u8 pcflag; /* marks socket as UDP-Lite if > 0 */ __u8 unused[3]; -#endif /* * For encapsulation sockets. */ @@ -85,15 +83,7 @@ static inline struct udp_sock *udp_sk(const struct sock *sk) return (struct udp_sock *)sk; } -#ifdef CONFIG_IP_UDPLITE #define IS_UDPLITE(__sk) (udp_sk(__sk)->pcflag) -#define IS_PROTO_UDPLITE(__proto) ((__proto) == IPPROTO_UDPLITE) -#define IS_SOL_UDPFAMILY(level) ((level) == SOL_UDP || (level) == SOL_UDPLITE) -#else -#define IS_UDPLITE(__sk) 0 -#define IS_PROTO_UDPLITE(__proto) 0 -#define IS_SOL_UDPFAMILY(level) ((level) == SOL_UDP) -#endif #endif diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 5f6df50a33a..8db06af1efb 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -599,13 +599,8 @@ extern int tcp6_proc_init(void); extern void tcp6_proc_exit(void); extern int udp6_proc_init(void); extern void udp6_proc_exit(void); -#ifdef CONFIG_IP_UDPLITE extern int udplite6_proc_init(void); extern void udplite6_proc_exit(void); -#else -static inline int udplite6_proc_init(void) { return 0; } -static inline void udplite6_proc_exit(void) { } -#endif extern int ipv6_misc_proc_init(void); extern void ipv6_misc_proc_exit(void); extern int snmp6_register_dev(struct inet6_dev *idev); diff --git a/include/net/transp_v6.h b/include/net/transp_v6.h index 902e6c6bc79..27394e0447d 100644 --- a/include/net/transp_v6.h +++ b/include/net/transp_v6.h @@ -27,13 +27,8 @@ extern int rawv6_init(void); extern void rawv6_exit(void); extern int udpv6_init(void); extern void udpv6_exit(void); -#ifdef CONFIG_IP_UDPLITE extern int udplitev6_init(void); extern void udplitev6_exit(void); -#else -static inline int udplitev6_init(void) { return 0; } -static inline void udplitev6_exit(void) { } -#endif extern int tcpv6_init(void); extern void tcpv6_exit(void); diff --git a/include/net/udplite.h b/include/net/udplite.h index 01ddb2c2026..b76b2e377af 100644 --- a/include/net/udplite.h +++ b/include/net/udplite.h @@ -25,9 +25,7 @@ static __inline__ int udplite_getfrag(void *from, char *to, int offset, /* Designate sk as UDP-Lite socket */ static inline int udplite_sk_init(struct sock *sk) { -#ifdef CONFIG_IP_UDPLITE udp_sk(sk)->pcflag = UDPLITE_BIT; -#endif return 0; } @@ -71,7 +69,7 @@ static inline int udplite_checksum_init(struct sk_buff *skb, struct udphdr *uh) static inline int udplite_sender_cscov(struct udp_sock *up, struct udphdr *uh) { int cscov = up->len; -#ifdef CONFIG_IP_UDPLITE + /* * Sender has set `partial coverage' option on UDP-Lite socket */ @@ -95,15 +93,13 @@ static inline int udplite_sender_cscov(struct udp_sock *up, struct udphdr *uh) * illegal, we fall back to the defaults here. */ } -#endif return cscov; } static inline __wsum udplite_csum_outgoing(struct sock *sk, struct sk_buff *skb) { - __wsum csum = 0; -#ifdef CONFIG_IP_UDPLITE int cscov = udplite_sender_cscov(udp_sk(sk), udp_hdr(skb)); + __wsum csum = 0; skb->ip_summed = CHECKSUM_NONE; /* no HW support for checksumming */ @@ -116,7 +112,6 @@ static inline __wsum udplite_csum_outgoing(struct sock *sk, struct sk_buff *skb) if ((cscov -= len) <= 0) break; } -#endif return csum; } diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 5098fd2ff4d..9c7e5ffb223 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -632,15 +632,5 @@ config TCP_MD5SIG If unsure, say N. -config IP_UDPLITE - bool "IP: UDP-Lite Protocol (RFC 3828)" - default n - ---help--- - UDP-Lite (RFC 3828) is a UDP-like protocol with variable-length - checksum. Read <file:Documentation/networking/udplite.txt> for - details. - - If unsure, say N. - source "net/ipv4/ipvs/Kconfig" diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index d5226241d5e..ad40ef3f9eb 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -8,7 +8,7 @@ obj-y := route.o inetpeer.o protocol.o \ inet_timewait_sock.o inet_connection_sock.o \ tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \ tcp_minisocks.o tcp_cong.o \ - datagram.o raw.o udp.o udp_ipv4.o \ + datagram.o raw.o udp.o udplite.o \ arp.o icmp.o devinet.o af_inet.o igmp.o \ fib_frontend.o fib_semantics.o \ inet_fragment.o @@ -49,7 +49,6 @@ obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o -obj-$(CONFIG_IP_UDPLITE) += udplite_ipv4.o obj-$(CONFIG_NETLABEL) += cipso_ipv4.o obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \ diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 25871c6c744..4cb8a138553 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1317,18 +1317,15 @@ static int __init init_ipv4_mibs(void) if (snmp_mib_init((void **)udp_statistics, sizeof(struct udp_mib)) < 0) goto err_udp_mib; -#ifdef CONFIG_IP_UDPLITE if (snmp_mib_init((void **)udplite_statistics, sizeof(struct udp_mib)) < 0) goto err_udplite_mib; -#endif + tcp_mib_init(); return 0; -#ifdef CONFIG_IP_UDPLITE err_udplite_mib: -#endif snmp_mib_free((void **)udp_statistics); err_udp_mib: snmp_mib_free((void **)tcp_statistics); @@ -1426,10 +1423,8 @@ static int __init inet_init(void) /* Setup UDP memory threshold */ udp_init(); -#ifdef CONFIG_IP_UDPLITE /* Add UDP-Lite (RFC 3828) */ udplite4_register(); -#endif /* * Set the ICMP layer up diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index d75ddb7fa4b..d63474c6b40 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -59,9 +59,7 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) atomic_read(&tcp_memory_allocated)); seq_printf(seq, "UDP: inuse %d mem %d\n", sock_prot_inuse_get(&udp_prot), atomic_read(&udp_memory_allocated)); -#ifdef CONFIG_IP_UDPLITE seq_printf(seq, "UDPLITE: inuse %d\n", sock_prot_inuse_get(&udplite_prot)); -#endif seq_printf(seq, "RAW: inuse %d\n", sock_prot_inuse_get(&raw_prot)); seq_printf(seq, "FRAG: inuse %d memory %d\n", ip_frag_nqueues(&init_net), ip_frag_mem(&init_net)); @@ -351,7 +349,6 @@ static int snmp_seq_show(struct seq_file *seq, void *v) snmp_fold_field((void **)udp_statistics, snmp4_udp_list[i].entry)); -#ifdef CONFIG_IP_UDPLITE /* the UDP and UDP-Lite MIBs are the same */ seq_puts(seq, "\nUdpLite:"); for (i = 0; snmp4_udp_list[i].name != NULL; i++) @@ -362,7 +359,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v) seq_printf(seq, " %lu", snmp_fold_field((void **)udplite_statistics, snmp4_udp_list[i].entry)); -#endif + seq_putc(seq, '\n'); return 0; } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index c53d7673b57..7ea1b67b6de 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -246,6 +246,553 @@ int udp_get_port(struct sock *sk, unsigned short snum, return __udp_lib_get_port(sk, snum, udp_hash, scmp); } +int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) +{ + struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); + + return ( !ipv6_only_sock(sk2) && + (!inet1->rcv_saddr || !inet2->rcv_saddr || + inet1->rcv_saddr == inet2->rcv_saddr )); +} + +static inline int udp_v4_get_port(struct sock *sk, unsigned short snum) +{ + return udp_get_port(sk, snum, ipv4_rcv_saddr_equal); +} + +/* UDP is nearly always wildcards out the wazoo, it makes no sense to try + * harder than this. -DaveM + */ +static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, + __be16 sport, __be32 daddr, __be16 dport, + int dif, struct hlist_head udptable[]) +{ + struct sock *sk, *result = NULL; + struct hlist_node *node; + unsigned short hnum = ntohs(dport); + int badness = -1; + + read_lock(&udp_hash_lock); + sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) { + struct inet_sock *inet = inet_sk(sk); + + if (sk->sk_net == net && sk->sk_hash == hnum && + !ipv6_only_sock(sk)) { + int score = (sk->sk_family == PF_INET ? 1 : 0); + if (inet->rcv_saddr) { + if (inet->rcv_saddr != daddr) + continue; + score+=2; + } + if (inet->daddr) { + if (inet->daddr != saddr) + continue; + score+=2; + } + if (inet->dport) { + if (inet->dport != sport) + continue; + score+=2; + } + if (sk->sk_bound_dev_if) { + if (sk->sk_bound_dev_if != dif) + continue; + score+=2; + } + if (score == 9) { + result = sk; + break; + } else if (score > badness) { + result = sk; + badness = score; + } + } + } + if (result) + sock_hold(result); + read_unlock(&udp_hash_lock); + return result; +} + +static inline struct sock *udp_v4_mcast_next(struct sock *sk, + __be16 loc_port, __be32 loc_addr, + __be16 rmt_port, __be32 rmt_addr, + int dif) +{ + struct hlist_node *node; + struct sock *s = sk; + unsigned short hnum = ntohs(loc_port); + + sk_for_each_from(s, node) { + struct inet_sock *inet = inet_sk(s); + + if (s->sk_hash != hnum || + (inet->daddr && inet->daddr != rmt_addr) || + (inet->dport != rmt_port && inet->dport) || + (inet->rcv_saddr && inet->rcv_saddr != loc_addr) || + ipv6_only_sock(s) || + (s->sk_bound_dev_if && s->sk_bound_dev_if != dif)) + continue; + if (!ip_mc_sf_allow(s, loc_addr, rmt_addr, dif)) + continue; + goto found; + } + s = NULL; +found: + return s; +} + +/* + * This routine is called by the ICMP module when it gets some + * sort of error condition. If err < 0 then the socket should + * be closed and the error returned to the user. If err > 0 + * it's just the icmp type << 8 | icmp code. + * Header points to the ip header of the error packet. We move + * on past this. Then (as it used to claim before adjustment) + * header points to the first 8 bytes of the udp header. We need + * to find the appropriate port. + */ + +void __udp4_lib_err(struct sk_buff *skb, u32 info, struct hlist_head udptable[]) +{ + struct inet_sock *inet; + struct iphdr *iph = (struct iphdr*)skb->data; + struct udphdr *uh = (struct udphdr*)(skb->data+(iph->ihl<<2)); + const int type = icmp_hdr(skb)->type; + const int code = icmp_hdr(skb)->code; + struct sock *sk; + int harderr; + int err; + + sk = __udp4_lib_lookup(skb->dev->nd_net, iph->daddr, uh->dest, + iph->saddr, uh->source, skb->dev->ifindex, udptable); + if (sk == NULL) { + ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); + return; /* No socket for error */ + } + + err = 0; + harderr = 0; + inet = inet_sk(sk); + + switch (type) { + default: + case ICMP_TIME_EXCEEDED: + err = EHOSTUNREACH; + break; + case ICMP_SOURCE_QUENCH: + goto out; + case ICMP_PARAMETERPROB: + err = EPROTO; + harderr = 1; + break; + case ICMP_DEST_UNREACH: + if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */ + if (inet->pmtudisc != IP_PMTUDISC_DONT) { + err = EMSGSIZE; + harderr = 1; + break; + } + goto out; + } + err = EHOSTUNREACH; + if (code <= NR_ICMP_UNREACH) { + harderr = icmp_err_convert[code].fatal; + err = icmp_err_convert[code].errno; + } + break; + } + + /* + * RFC1122: OK. Passes ICMP errors back to application, as per + * 4.1.3.3. + */ + if (!inet->recverr) { + if (!harderr || sk->sk_state != TCP_ESTABLISHED) + goto out; + } else { + ip_icmp_error(sk, skb, err, uh->dest, info, (u8*)(uh+1)); + } + sk->sk_err = err; + sk->sk_error_report(sk); +out: + sock_put(sk); +} + +void udp_err(struct sk_buff *skb, u32 info) +{ + __udp4_lib_err(skb, info, udp_hash); +} + +/* + * Throw away all pending data and cancel the corking. Socket is locked. + */ +static void udp_flush_pending_frames(struct sock *sk) +{ + struct udp_sock *up = udp_sk(sk); + + if (up->pending) { + up->len = 0; + up->pending = 0; + ip_flush_pending_frames(sk); + } +} + +/** + * udp4_hwcsum_outgoing - handle outgoing HW checksumming + * @sk: socket we are sending on + * @skb: sk_buff containing the filled-in UDP header + * (checksum field must be zeroed out) + */ +static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, + __be32 src, __be32 dst, int len ) +{ + unsigned int offset; + struct udphdr *uh = udp_hdr(skb); + __wsum csum = 0; + + if (skb_queue_len(&sk->sk_write_queue) == 1) { + /* + * Only one fragment on the socket. + */ + skb->csum_start = skb_transport_header(skb) - skb->head; + skb->csum_offset = offsetof(struct udphdr, check); + uh->check = ~csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, 0); + } else { + /* + * HW-checksum won't work as there are two or more + * fragments on the socket so that all csums of sk_buffs + * should be together + */ + offset = skb_transport_offset(skb); + skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); + + skb->ip_summed = CHECKSUM_NONE; + + skb_queue_walk(&sk->sk_write_queue, skb) { + csum = csum_add(csum, skb->csum); + } + + uh->check = csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, csum); + if (uh->check == 0) + uh->check = CSUM_MANGLED_0; + } +} + +/* + * Push out all pending data as one UDP datagram. Socket is locked. + */ +static int udp_push_pending_frames(struct sock *sk) +{ + struct udp_sock *up = udp_sk(sk); + struct inet_sock *inet = inet_sk(sk); + struct flowi *fl = &inet->cork.fl; + struct sk_buff *skb; + struct udphdr *uh; + int err = 0; + int is_udplite = IS_UDPLITE(sk); + __wsum csum = 0; + + /* Grab the skbuff where UDP header space exists. */ + if ((skb = skb_peek(&sk->sk_write_queue)) == NULL) + goto out; + + /* + * Create a UDP header + */ + uh = udp_hdr(skb); + uh->source = fl->fl_ip_sport; + uh->dest = fl->fl_ip_dport; + uh->len = htons(up->len); + uh->check = 0; + + if (is_udplite) /* UDP-Lite */ + csum = udplite_csum_outgoing(sk, skb); + + else if (sk->sk_no_check == UDP_CSUM_NOXMIT) { /* UDP csum disabled */ + + skb->ip_summed = CHECKSUM_NONE; + goto send; + + } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ + + udp4_hwcsum_outgoing(sk, skb, fl->fl4_src,fl->fl4_dst, up->len); + goto send; + + } else /* `normal' UDP */ + csum = udp_csum_outgoing(sk, skb); + + /* add protocol-dependent pseudo-header */ + uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, up->len, + sk->sk_protocol, csum ); + if (uh->check == 0) + uh->check = CSUM_MANGLED_0; + +send: + err = ip_push_pending_frames(sk); +out: + up->len = 0; + up->pending = 0; + if (!err) + UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, is_udplite); + return err; +} + +int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, + size_t len) +{ + struct inet_sock *inet = inet_sk(sk); + struct udp_sock *up = udp_sk(sk); + int ulen = len; + struct ipcm_cookie ipc; + struct rtable *rt = NULL; + int free = 0; + int connected = 0; + __be32 daddr, faddr, saddr; + __be16 dport; + u8 tos; + int err, is_udplite = IS_UDPLITE(sk); + int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; + int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); + + if (len > 0xFFFF) + return -EMSGSIZE; + + /* + * Check the flags. + */ + + if (msg->msg_flags&MSG_OOB) /* Mirror BSD error message compatibility */ + return -EOPNOTSUPP; + + ipc.opt = NULL; + + if (up->pending) { + /* + * There are pending frames. + * The socket lock must be held while it's corked. + */ + lock_sock(sk); + if (likely(up->pending)) { + if (unlikely(up->pending != AF_INET)) { + release_sock(sk); + return -EINVAL; + } + goto do_append_data; + } + release_sock(sk); + } + ulen += sizeof(struct udphdr); + + /* + * Get and verify the address. + */ + if (msg->msg_name) { + struct sockaddr_in * usin = (struct sockaddr_in*)msg->msg_name; + if (msg->msg_namelen < sizeof(*usin)) + return -EINVAL; + if (usin->sin_family != AF_INET) { + if (usin->sin_family != AF_UNSPEC) + return -EAFNOSUPPORT; + } + + daddr = usin->sin_addr.s_addr; + dport = usin->sin_port; + if (dport == 0) + return -EINVAL; + } else { + if (sk->sk_state != TCP_ESTABLISHED) + return -EDESTADDRREQ; + daddr = inet->daddr; + dport = inet->dport; + /* Open fast path for connected socket. + Route will not be used, if at least one option is set. + */ + connected = 1; + } + ipc.addr = inet->saddr; + + ipc.oif = sk->sk_bound_dev_if; + if (msg->msg_controllen) { + err = ip_cmsg_send(msg, &ipc); + if (err) + return err; + if (ipc.opt) + free = 1; + connected = 0; + } + if (!ipc.opt) + ipc.opt = inet->opt; + + saddr = ipc.addr; + ipc.addr = faddr = daddr; + + if (ipc.opt && ipc.opt->srr) { + if (!daddr) + return -EINVAL; + faddr = ipc.opt->faddr; + connected = 0; + } + tos = RT_TOS(inet->tos); + if (sock_flag(sk, SOCK_LOCALROUTE) || + (msg->msg_flags & MSG_DONTROUTE) || + (ipc.opt && ipc.opt->is_strictroute)) { + tos |= RTO_ONLINK; + connected = 0; + } + + if (ipv4_is_multicast(daddr)) { + if (!ipc.oif) + ipc.oif = inet->mc_index; + if (!saddr) + saddr = inet->mc_addr; + connected = 0; + } + + if (connected) + rt = (struct rtable*)sk_dst_check(sk, 0); + + if (rt == NULL) { + struct flowi fl = { .oif = ipc.oif, + .nl_u = { .ip4_u = + { .daddr = faddr, + .saddr = saddr, + .tos = tos } }, + .proto = sk->sk_protocol, + .uli_u = { .ports = + { .sport = inet->sport, + .dport = dport } } }; + security_sk_classify_flow(sk, &fl); + err = ip_route_output_flow(&init_net, &rt, &fl, sk, 1); + if (err) { + if (err == -ENETUNREACH) + IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); + goto out; + } + + err = -EACCES; + if ((rt->rt_flags & RTCF_BROADCAST) && + !sock_flag(sk, SOCK_BROADCAST)) + goto out; + if (connected) + sk_dst_set(sk, dst_clone(&rt->u.dst)); + } + + if (msg->msg_flags&MSG_CONFIRM) + goto do_confirm; +back_from_confirm: + + saddr = rt->rt_src; + if (!ipc.addr) + daddr = ipc.addr = rt->rt_dst; + + lock_sock(sk); + if (unlikely(up->pending)) { + /* The socket is already corked while preparing it. */ + /* ... which is an evident application bug. --ANK */ + release_sock(sk); + + LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 2\n"); + err = -EINVAL; + goto out; + } + /* + * Now cork the socket to pend data. + */ + inet->cork.fl.fl4_dst = daddr; + inet->cork.fl.fl_ip_dport = dport; + inet->cork.fl.fl4_src = saddr; + inet->cork.fl.fl_ip_sport = inet->sport; + up->pending = AF_INET; + +do_append_data: + up->len += ulen; + getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; + err = ip_append_data(sk, getfrag, msg->msg_iov, ulen, + sizeof(struct udphdr), &ipc, rt, + corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); + if (err) + udp_flush_pending_frames(sk); + else if (!corkreq) + err = udp_push_pending_frames(sk); + else if (unlikely(skb_queue_empty(&sk->sk_write_queue))) + up->pending = 0; + release_sock(sk); + +out: + ip_rt_put(rt); + if (free) + kfree(ipc.opt); + if (!err) + return len; + /* + * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting + * ENOBUFS might not be good (it's not tunable per se), but otherwise + * we don't have a good statistic (IpOutDiscards but it can be too many + * things). We could add another new stat but at least for now that + * seems like overkill. + */ + if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { + UDP_INC_STATS_USER(UDP_MIB_SNDBUFERRORS, is_udplite); + } + return err; + +do_confirm: + dst_confirm(&rt->u.dst); + if (!(msg->msg_flags&MSG_PROBE) || len) + goto back_from_confirm; + err = 0; + goto out; +} + +int udp_sendpage(struct sock *sk, struct page *page, int offset, + size_t size, int flags) +{ + struct udp_sock *up = udp_sk(sk); + int ret; + + if (!up->pending) { + struct msghdr msg = { .msg_flags = flags|MSG_MORE }; + + /* Call udp_sendmsg to specify destination address which + * sendpage interface can't pass. + * This will succeed only when the socket is connected. + */ + ret = udp_sendmsg(NULL, sk, &msg, 0); + if (ret < 0) + return ret; + } + + lock_sock(sk); + + if (unlikely(!up->pending)) { + release_sock(sk); + + LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 3\n"); + return -EINVAL; + } + + ret = ip_append_page(sk, page, offset, size, flags); + if (ret == -EOPNOTSUPP) { + release_sock(sk); + return sock_no_sendpage(sk->sk_socket, page, offset, + size, flags); + } + if (ret < 0) { + udp_flush_pending_frames(sk); + goto out; + } + + up->len += size; + if (!(up->corkflag || (flags&MSG_MORE))) + ret = udp_push_pending_frames(sk); + if (!ret) + ret = size; +out: + release_sock(sk); + return ret; +} + /* * IOCTL requests applicable to the UDP protocol */ @@ -286,6 +833,107 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg) return 0; } +/* + * This should be easy, if there is something there we + * return it, otherwise we block. + */ + +int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, + size_t len, int noblock, int flags, int *addr_len) +{ + struct inet_sock *inet = inet_sk(sk); + struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; + struct sk_buff *skb; + unsigned int ulen, copied; + int peeked; + int err; + int is_udplite = IS_UDPLITE(sk); + + /* + * Check any passed addresses + */ + if (addr_len) + *addr_len=sizeof(*sin); + + if (flags & MSG_ERRQUEUE) + return ip_recv_error(sk, msg, len); + +try_again: + skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), + &peeked, &err); + if (!skb) + goto out; + + ulen = skb->len - sizeof(struct udphdr); + copied = len; + if (copied > ulen) + copied = ulen; + else if (copied < ulen) + msg->msg_flags |= MSG_TRUNC; + + /* + * If checksum is needed at all, try to do it while copying the + * data. If the data is truncated, or if we only want a partial + * coverage checksum (UDP-Lite), do it before the copy. + */ + + if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) { + if (udp_lib_checksum_complete(skb)) + goto csum_copy_err; + } + + if (skb_csum_unnecessary(skb)) + err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), + msg->msg_iov, copied ); + else { + err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov); + + if (err == -EINVAL) + goto csum_copy_err; + } + + if (err) + goto out_free; + + if (!peeked) + UDP_INC_STATS_USER(UDP_MIB_INDATAGRAMS, is_udplite); + + sock_recv_timestamp(msg, sk, skb); + + /* Copy the address. */ + if (sin) + { + sin->sin_family = AF_INET; + sin->sin_port = udp_hdr(skb)->source; + sin->sin_addr.s_addr = ip_hdr(skb)->saddr; + memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); + } + if (inet->cmsg_flags) + ip_cmsg_recv(msg, skb); + + err = copied; + if (flags & MSG_TRUNC) + err = ulen; + +out_free: + lock_sock(sk); + skb_free_datagram(sk, skb); + release_sock(sk); +out: + return err; + +csum_copy_err: + lock_sock(sk); + if (!skb_kill_datagram(sk, skb, flags)) + UDP_INC_STATS_USER(UDP_MIB_INERRORS, is_udplite); + release_sock(sk); + + if (noblock) + return -EAGAIN; + goto try_again; +} + + int udp_disconnect(struct sock *sk, int flags) { struct inet_sock *inet = inet_sk(sk); @@ -308,6 +956,319 @@ int udp_disconnect(struct sock *sk, int flags) return 0; } +/* returns: + * -1: error + * 0: success + * >0: "udp encap" protocol resubmission + * + * Note that in the success and error cases, the skb is assumed to + * have either been requeued or freed. + */ +int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) +{ + struct udp_sock *up = udp_sk(sk); + int rc; + int is_udplite = IS_UDPLITE(sk); + + /* + * Charge it to the socket, dropping if the queue is full. + */ + if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) + goto drop; + nf_reset(skb); + + if (up->encap_type) { + /* + * This is an encapsulation socket so pass the skb to + * the socket's udp_encap_rcv() hook. Otherwise, just + * fall through and pass this up the UDP socket. + * up->encap_rcv() returns the following value: + * =0 if skb was successfully passed to the encap + * handler or was discarded by it. + * >0 if skb should be passed on to UDP. + * <0 if skb should be resubmitted as proto -N + */ + + /* if we're overly short, let UDP handle it */ + if (skb->len > sizeof(struct udphdr) && + up->encap_rcv != NULL) { + int ret; + + ret = (*up->encap_rcv)(sk, skb); + if (ret <= 0) { + UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS, + is_udplite); + return -ret; + } + } + + /* FALLTHROUGH -- it's a UDP Packet */ + } + + /* + * UDP-Lite specific tests, ignored on UDP sockets + */ + if ((is_udplite & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) { + + /* + * MIB statistics other than incrementing the error count are + * disabled for the following two types of errors: these depend + * on the application settings, not on the functioning of the + * protocol stack as such. + * + * RFC 3828 here recommends (sec 3.3): "There should also be a + * way ... to ... at least let the receiving application block + * delivery of packets with coverage values less than a value + * provided by the application." + */ + if (up->pcrlen == 0) { /* full coverage was set */ + LIMIT_NETDEBUG(KERN_WARNING "UDPLITE: partial coverage " + "%d while full coverage %d requested\n", + UDP_SKB_CB(skb)->cscov, skb->len); + goto drop; + } + /* The next case involves violating the min. coverage requested + * by the receiver. This is subtle: if receiver wants x and x is + * greater than the buffersize/MTU then receiver will complain + * that it wants x while sender emits packets of smaller size y. + * Therefore the above ...()->partial_cov statement is essential. + */ + if (UDP_SKB_CB(skb)->cscov < up->pcrlen) { + LIMIT_NETDEBUG(KERN_WARNING + "UDPLITE: coverage %d too small, need min %d\n", + UDP_SKB_CB(skb)->cscov, up->pcrlen); + goto drop; + } + } + + if (sk->sk_filter) { + if (udp_lib_checksum_complete(skb)) + goto drop; + } + + if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) { + /* Note that an ENOMEM error is charged twice */ + if (rc == -ENOMEM) + UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, is_udplite); + goto drop; + } + + return 0; + +drop: + UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite); + kfree_skb(skb); + return -1; +} + +/* + * Multicasts and broadcasts go to each listener. + * + * Note: called only from the BH handler context, + * so we don't need to lock the hashes. + */ +static int __udp4_lib_mcast_deliver(struct sk_buff *skb, + struct udphdr *uh, + __be32 saddr, __be32 daddr, + struct hlist_head udptable[]) +{ + struct sock *sk; + int dif; + + read_lock(&udp_hash_lock); + sk = sk_head(&udptable[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]); + dif = skb->dev->ifindex; + sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif); + if (sk) { + struct sock *sknext = NULL; + + do { + struct sk_buff *skb1 = skb; + + sknext = udp_v4_mcast_next(sk_next(sk), uh->dest, daddr, + uh->source, saddr, dif); + if (sknext) + skb1 = skb_clone(skb, GFP_ATOMIC); + + if (skb1) { + int ret = 0; + + bh_lock_sock_nested(sk); + if (!sock_owned_by_user(sk)) + ret = udp_queue_rcv_skb(sk, skb1); + else + sk_add_backlog(sk, skb1); + bh_unlock_sock(sk); + + if (ret > 0) + /* we should probably re-process instead + * of dropping packets here. */ + kfree_skb(skb1); + } + sk = sknext; + } while (sknext); + } else + kfree_skb(skb); + read_unlock(&udp_hash_lock); + return 0; +} + +/* Initialize UDP checksum. If exited with zero value (success), + * CHECKSUM_UNNECESSARY means, that no more checks are required. + * Otherwise, csum completion requires chacksumming packet body, + * including udp header and folding it to skb->csum. + */ +static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh, + int proto) +{ + const struct iphdr *iph; + int err; + + UDP_SKB_CB(skb)->partial_cov = 0; + UDP_SKB_CB(skb)->cscov = skb->len; + + if (proto == IPPROTO_UDPLITE) { + err = udplite_checksum_init(skb, uh); + if (err) + return err; + } + + iph = ip_hdr(skb); + if (uh->check == 0) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + } else if (skb->ip_summed == CHECKSUM_COMPLETE) { + if (!csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len, + proto, skb->csum)) + skb->ip_summed = CHECKSUM_UNNECESSARY; + } + if (!skb_csum_unnecessary(skb)) + skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, + skb->len, proto, 0); + /* Probably, we should checksum udp header (it should be in cache + * in any case) and data in tiny packets (< rx copybreak). + */ + + return 0; +} + +/* + * All we need to do is get the socket, and then do a checksum. + */ + +int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], + int proto) +{ + struct sock *sk; + struct udphdr *uh = udp_hdr(skb); + unsigned short ulen; + struct rtable *rt = (struct rtable*)skb->dst; + __be32 saddr = ip_hdr(skb)->saddr; + __be32 daddr = ip_hdr(skb)->daddr; + + /* + * Validate the packet. + */ + if (!pskb_may_pull(skb, sizeof(struct udphdr))) + goto drop; /* No space for header. */ + + ulen = ntohs(uh->len); + if (ulen > skb->len) + goto short_packet; + + if (proto == IPPROTO_UDP) { + /* UDP validates ulen. */ + if (ulen < sizeof(*uh) || pskb_trim_rcsum(skb, ulen)) + goto short_packet; + uh = udp_hdr(skb); + } + + if (udp4_csum_init(skb, uh, proto)) + goto csum_error; + + if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) + return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable); + + sk = __udp4_lib_lookup(skb->dev->nd_net, saddr, uh->source, daddr, + uh->dest, inet_iif(skb), udptable); + + if (sk != NULL) { + int ret = 0; + bh_lock_sock_nested(sk); + if (!sock_owned_by_user(sk)) + ret = udp_queue_rcv_skb(sk, skb); + else + sk_add_backlog(sk, skb); + bh_unlock_sock(sk); + sock_put(sk); + + /* a return value > 0 means to resubmit the input, but + * it wants the return to be -protocol, or 0 + */ + if (ret > 0) + return -ret; + return 0; + } + + if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) + goto drop; + nf_reset(skb); + + /* No socket. Drop packet silently, if checksum is wrong */ + if (udp_lib_checksum_complete(skb)) + goto csum_error; + + UDP_INC_STATS_BH(UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE); + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); + + /* + * Hmm. We got an UDP packet to a port to which we + * don't wanna listen. Ignore it. + */ + kfree_skb(skb); + return 0; + +short_packet: + LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: short packet: From %u.%u.%u.%u:%u %d/%d to %u.%u.%u.%u:%u\n", + proto == IPPROTO_UDPLITE ? "-Lite" : "", + NIPQUAD(saddr), + ntohs(uh->source), + ulen, + skb->len, + NIPQUAD(daddr), + ntohs(uh->dest)); + goto drop; + +csum_error: + /* + * RFC1122: OK. Discards the bad packet silently (as far as + * the network is concerned, anyway) as per 4.1.3.4 (MUST). + */ + LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n", + proto == IPPROTO_UDPLITE ? "-Lite" : "", + NIPQUAD(saddr), + ntohs(uh->source), + NIPQUAD(daddr), + ntohs(uh->dest), + ulen); +drop: + UDP_INC_STATS_BH(UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); + kfree_skb(skb); + return 0; +} + +int udp_rcv(struct sk_buff *skb) +{ + return __udp4_lib_rcv(skb, udp_hash, IPPROTO_UDP); +} + +int udp_destroy_sock(struct sock *sk) +{ + lock_sock(sk); + udp_flush_pending_frames(sk); + release_sock(sk); + return 0; +} + /* * Socket option code for UDP */ @@ -318,9 +1279,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, struct udp_sock *up = udp_sk(sk); int val; int err = 0; -#ifdef CONFIG_IP_UDPLITE int is_udplite = IS_UDPLITE(sk); -#endif if (optlen<sizeof(int)) return -EINVAL; @@ -356,7 +1315,6 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, } break; -#ifdef CONFIG_IP_UDPLITE /* * UDP-Lite's partial checksum coverage (RFC 3828). */ @@ -382,7 +1340,6 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, up->pcrlen = val; up->pcflag |= UDPLITE_RECV_CC; break; -#endif default: err = -ENOPROTOOPT; @@ -392,6 +1349,26 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname, return err; } +int udp_setsockopt(struct sock *sk, int level, int optname, + char __user *optval, int optlen) +{ + if (level == SOL_UDP || level == SOL_UDPLITE) + return udp_lib_setsockopt(sk, level, optname, optval, optlen, + udp_push_pending_frames); + return ip_setsockopt(sk, level, optname, optval, optlen); +} + +#ifdef CONFIG_COMPAT +int compat_udp_setsockopt(struct sock *sk, int level, int optname, + char __user *optval, int optlen) +{ + if (level == SOL_UDP || level == SOL_UDPLITE) + return udp_lib_setsockopt(sk, level, optname, optval, optlen, + udp_push_pending_frames); + return compat_ip_setsockopt(sk, level, optname, optval, optlen); +} +#endif + int udp_lib_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { @@ -436,6 +1413,23 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname, return 0; } +int udp_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen) +{ + if (level == SOL_UDP || level == SOL_UDPLITE) + return udp_lib_getsockopt(sk, level, optname, optval, optlen); + return ip_getsockopt(sk, level, optname, optval, optlen); +} + +#ifdef CONFIG_COMPAT +int compat_udp_getsockopt(struct sock *sk, int level, int optname, + char __user *optval, int __user *optlen) +{ + if (level == SOL_UDP || level == SOL_UDPLITE) + return udp_lib_getsockopt(sk, level, optname, optval, optlen); + return compat_ip_getsockopt(sk, level, optname, optval, optlen); +} +#endif /** * udp_poll - wait for a UDP event. * @file - file struct @@ -480,6 +1474,36 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait) } +DEFINE_PROTO_INUSE(udp) + +struct proto udp_prot = { + .name = "UDP", + .owner = THIS_MODULE, + .close = udp_lib_close, + .connect = ip4_datagram_connect, + .disconnect = udp_disconnect, + .ioctl = udp_ioctl, + .destroy = udp_destroy_sock, + .setsockopt = udp_setsockopt, + .getsockopt = udp_getsockopt, + .sendmsg = udp_sendmsg, + .recvmsg = udp_recvmsg, + .sendpage = udp_sendpage, + .backlog_rcv = udp_queue_rcv_skb, + .hash = udp_lib_hash, + .unhash = udp_lib_unhash, + .get_port = udp_v4_get_port, + .memory_allocated = &udp_memory_allocated, + .sysctl_mem = sysctl_udp_mem, + .sysctl_wmem = &sysctl_udp_wmem_min, + .sysctl_rmem = &sysctl_udp_rmem_min, + .obj_size = sizeof(struct udp_sock), +#ifdef CONFIG_COMPAT + .compat_setsockopt = compat_udp_setsockopt, + .compat_getsockopt = compat_udp_getsockopt, +#endif + REF_PROTO_INUSE(udp) +}; /* ------------------------------------------------------------------------ */ #ifdef CONFIG_PROC_FS @@ -612,6 +1636,62 @@ void udp_proc_unregister(struct udp_seq_afinfo *afinfo) proc_net_remove(&init_net, afinfo->name); memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops)); } + +/* ------------------------------------------------------------------------ */ +static void udp4_format_sock(struct sock *sp, char *tmpbuf, int bucket) +{ + struct inet_sock *inet = inet_sk(sp); + __be32 dest = inet->daddr; + __be32 src = inet->rcv_saddr; + __u16 destp = ntohs(inet->dport); + __u16 srcp = ntohs(inet->sport); + + sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X" + " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p", + bucket, src, srcp, dest, destp, sp->sk_state, + atomic_read(&sp->sk_wmem_alloc), + atomic_read(&sp->sk_rmem_alloc), + 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp), + atomic_read(&sp->sk_refcnt), sp); +} + +int udp4_seq_show(struct seq_file *seq, void *v) +{ + if (v == SEQ_START_TOKEN) + seq_printf(seq, "%-127s\n", + " sl local_address rem_address st tx_queue " + "rx_queue tr tm->when retrnsmt uid timeout " + "inode"); + else { + char tmpbuf[129]; + struct udp_iter_state *state = seq->private; + + udp4_format_sock(v, tmpbuf, state->bucket); + seq_printf(seq, "%-127s\n", tmpbuf); + } + return 0; +} + +/* ------------------------------------------------------------------------ */ +static struct file_operations udp4_seq_fops; +static struct udp_seq_afinfo udp4_seq_afinfo = { + .owner = THIS_MODULE, + .name = "udp", + .family = AF_INET, + .hashtable = udp_hash, + .seq_show = udp4_seq_show, + .seq_fops = &udp4_seq_fops, +}; + +int __init udp4_proc_init(void) +{ + return udp_proc_register(&udp4_seq_afinfo); +} + +void udp4_proc_exit(void) +{ + udp_proc_unregister(&udp4_seq_afinfo); +} #endif /* CONFIG_PROC_FS */ void __init udp_init(void) @@ -638,6 +1718,8 @@ EXPORT_SYMBOL(udp_hash); EXPORT_SYMBOL(udp_hash_lock); EXPORT_SYMBOL(udp_ioctl); EXPORT_SYMBOL(udp_get_port); +EXPORT_SYMBOL(udp_prot); +EXPORT_SYMBOL(udp_sendmsg); EXPORT_SYMBOL(udp_lib_getsockopt); EXPORT_SYMBOL(udp_lib_setsockopt); EXPORT_SYMBOL(udp_poll); diff --git a/net/ipv4/udp_ipv4.c b/net/ipv4/udp_ipv4.c deleted file mode 100644 index fd14c2c50ed..00000000000 --- a/net/ipv4/udp_ipv4.c +++ /dev/null @@ -1,1134 +0,0 @@ -/* - * INET An implementation of the TCP/IP protocol suite for the LINUX - * operating system. INET is implemented using the BSD Socket - * interface as the means of communication with the user level. - * - * UDP for IPv4. - * - * For full credits, see net/ipv4/udp.c. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <asm/system.h> -#include <asm/uaccess.h> -#include <asm/ioctls.h> -#include <linux/bootmem.h> -#include <linux/types.h> -#include <linux/fcntl.h> -#include <linux/module.h> -#include <linux/socket.h> -#include <linux/sockios.h> -#include <linux/igmp.h> -#include <linux/in.h> -#include <linux/errno.h> -#include <linux/timer.h> -#include <linux/mm.h> -#include <linux/inet.h> -#include <linux/netdevice.h> -#include <net/tcp_states.h> -#include <linux/skbuff.h> -#include <linux/proc_fs.h> -#include <linux/seq_file.h> -#include <net/net_namespace.h> -#include <net/icmp.h> -#include <net/route.h> -#include <net/checksum.h> -#include <net/xfrm.h> -#include "udp_impl.h" - -int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) -{ - struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); - - return ( !ipv6_only_sock(sk2) && - (!inet1->rcv_saddr || !inet2->rcv_saddr || - inet1->rcv_saddr == inet2->rcv_saddr )); -} - -static inline int udp_v4_get_port(struct sock *sk, unsigned short snum) -{ - return udp_get_port(sk, snum, ipv4_rcv_saddr_equal); -} - -/* UDP is nearly always wildcards out the wazoo, it makes no sense to try - * harder than this. -DaveM - */ -static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, - __be16 sport, __be32 daddr, __be16 dport, - int dif, struct hlist_head udptable[]) -{ - struct sock *sk, *result = NULL; - struct hlist_node *node; - unsigned short hnum = ntohs(dport); - int badness = -1; - - read_lock(&udp_hash_lock); - sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) { - struct inet_sock *inet = inet_sk(sk); - - if (sk->sk_net == net && sk->sk_hash == hnum && - !ipv6_only_sock(sk)) { - int score = (sk->sk_family == PF_INET ? 1 : 0); - if (inet->rcv_saddr) { - if (inet->rcv_saddr != daddr) - continue; - score+=2; - } - if (inet->daddr) { - if (inet->daddr != saddr) - continue; - score+=2; - } - if (inet->dport) { - if (inet->dport != sport) - continue; - score+=2; - } - if (sk->sk_bound_dev_if) { - if (sk->sk_bound_dev_if != dif) - continue; - score+=2; - } - if (score == 9) { - result = sk; - break; - } else if (score > badness) { - result = sk; - badness = score; - } - } - } - if (result) - sock_hold(result); - read_unlock(&udp_hash_lock); - return result; -} - -static inline struct sock *udp_v4_mcast_next(struct sock *sk, - __be16 loc_port, __be32 loc_addr, - __be16 rmt_port, __be32 rmt_addr, - int dif) -{ - struct hlist_node *node; - struct sock *s = sk; - unsigned short hnum = ntohs(loc_port); - - sk_for_each_from(s, node) { - struct inet_sock *inet = inet_sk(s); - - if (s->sk_hash != hnum || - (inet->daddr && inet->daddr != rmt_addr) || - (inet->dport != rmt_port && inet->dport) || - (inet->rcv_saddr && inet->rcv_saddr != loc_addr) || - ipv6_only_sock(s) || - (s->sk_bound_dev_if && s->sk_bound_dev_if != dif)) - continue; - if (!ip_mc_sf_allow(s, loc_addr, rmt_addr, dif)) - continue; - goto found; - } - s = NULL; -found: - return s; -} - -/* - * This routine is called by the ICMP module when it gets some - * sort of error condition. If err < 0 then the socket should - * be closed and the error returned to the user. If err > 0 - * it's just the icmp type << 8 | icmp code. - * Header points to the ip header of the error packet. We move - * on past this. Then (as it used to claim before adjustment) - * header points to the first 8 bytes of the udp header. We need - * to find the appropriate port. - */ - -void __udp4_lib_err(struct sk_buff *skb, u32 info, struct hlist_head udptable[]) -{ - struct inet_sock *inet; - struct iphdr *iph = (struct iphdr*)skb->data; - struct udphdr *uh = (struct udphdr*)(skb->data+(iph->ihl<<2)); - const int type = icmp_hdr(skb)->type; - const int code = icmp_hdr(skb)->code; - struct sock *sk; - int harderr; - int err; - - sk = __udp4_lib_lookup(skb->dev->nd_net, iph->daddr, uh->dest, - iph->saddr, uh->source, skb->dev->ifindex, udptable); - if (sk == NULL) { - ICMP_INC_STATS_BH(ICMP_MIB_INERRORS); - return; /* No socket for error */ - } - - err = 0; - harderr = 0; - inet = inet_sk(sk); - - switch (type) { - default: - case ICMP_TIME_EXCEEDED: - err = EHOSTUNREACH; - break; - case ICMP_SOURCE_QUENCH: - goto out; - case ICMP_PARAMETERPROB: - err = EPROTO; - harderr = 1; - break; - case ICMP_DEST_UNREACH: - if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */ - if (inet->pmtudisc != IP_PMTUDISC_DONT) { - err = EMSGSIZE; - harderr = 1; - break; - } - goto out; - } - err = EHOSTUNREACH; - if (code <= NR_ICMP_UNREACH) { - harderr = icmp_err_convert[code].fatal; - err = icmp_err_convert[code].errno; - } - break; - } - - /* - * RFC1122: OK. Passes ICMP errors back to application, as per - * 4.1.3.3. - */ - if (!inet->recverr) { - if (!harderr || sk->sk_state != TCP_ESTABLISHED) - goto out; - } else { - ip_icmp_error(sk, skb, err, uh->dest, info, (u8*)(uh+1)); - } - sk->sk_err = err; - sk->sk_error_report(sk); -out: - sock_put(sk); -} - -void udp_err(struct sk_buff *skb, u32 info) -{ - __udp4_lib_err(skb, info, udp_hash); -} - -/* - * Throw away all pending data and cancel the corking. Socket is locked. - */ -static void udp_flush_pending_frames(struct sock *sk) -{ - struct udp_sock *up = udp_sk(sk); - - if (up->pending) { - up->len = 0; - up->pending = 0; - ip_flush_pending_frames(sk); - } -} - -/** - * udp4_hwcsum_outgoing - handle outgoing HW checksumming - * @sk: socket we are sending on - * @skb: sk_buff containing the filled-in UDP header - * (checksum field must be zeroed out) - */ -static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, - __be32 src, __be32 dst, int len ) -{ - unsigned int offset; - struct udphdr *uh = udp_hdr(skb); - __wsum csum = 0; - - if (skb_queue_len(&sk->sk_write_queue) == 1) { - /* - * Only one fragment on the socket. - */ - skb->csum_start = skb_transport_header(skb) - skb->head; - skb->csum_offset = offsetof(struct udphdr, check); - uh->check = ~csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, 0); - } else { - /* - * HW-checksum won't work as there are two or more - * fragments on the socket so that all csums of sk_buffs - * should be together - */ - offset = skb_transport_offset(skb); - skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); - - skb->ip_summed = CHECKSUM_NONE; - - skb_queue_walk(&sk->sk_write_queue, skb) { - csum = csum_add(csum, skb->csum); - } - - uh->check = csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, csum); - if (uh->check == 0) - uh->check = CSUM_MANGLED_0; - } -} - -/* - * Push out all pending data as one UDP datagram. Socket is locked. - */ -static int udp_push_pending_frames(struct sock *sk) -{ - struct udp_sock *up = udp_sk(sk); - struct inet_sock *inet = inet_sk(sk); - struct flowi *fl = &inet->cork.fl; - struct sk_buff *skb; - struct udphdr *uh; - int err = 0; - int is_udplite = IS_UDPLITE(sk); - __wsum csum = 0; - - /* Grab the skbuff where UDP header space exists. */ - if ((skb = skb_peek(&sk->sk_write_queue)) == NULL) - goto out; - - /* - * Create a UDP header - */ - uh = udp_hdr(skb); - uh->source = fl->fl_ip_sport; - uh->dest = fl->fl_ip_dport; - uh->len = htons(up->len); - uh->check = 0; - - if (is_udplite) /* UDP-Lite */ - csum = udplite_csum_outgoing(sk, skb); - - else if (sk->sk_no_check == UDP_CSUM_NOXMIT) { /* UDP csum disabled */ - - skb->ip_summed = CHECKSUM_NONE; - goto send; - - } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ - - udp4_hwcsum_outgoing(sk, skb, fl->fl4_src,fl->fl4_dst, up->len); - goto send; - - } else /* `normal' UDP */ - csum = udp_csum_outgoing(sk, skb); - - /* add protocol-dependent pseudo-header */ - uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, up->len, - sk->sk_protocol, csum ); - if (uh->check == 0) - uh->check = CSUM_MANGLED_0; - -send: - err = ip_push_pending_frames(sk); -out: - up->len = 0; - up->pending = 0; - if (!err) - UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, is_udplite); - return err; -} - -int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len) -{ - struct inet_sock *inet = inet_sk(sk); - struct udp_sock *up = udp_sk(sk); - int ulen = len; - struct ipcm_cookie ipc; - struct rtable *rt = NULL; - int free = 0; - int connected = 0; - __be32 daddr, faddr, saddr; - __be16 dport; - u8 tos; - int err, is_udplite = IS_UDPLITE(sk); - int corkreq = up->corkflag || msg->msg_flags&MSG_MORE; - int (*getfrag)(void *, char *, int, int, int, struct sk_buff *); - - if (len > 0xFFFF) - return -EMSGSIZE; - - /* - * Check the flags. - */ - - if (msg->msg_flags&MSG_OOB) /* Mirror BSD error message compatibility */ - return -EOPNOTSUPP; - - ipc.opt = NULL; - - if (up->pending) { - /* - * There are pending frames. - * The socket lock must be held while it's corked. - */ - lock_sock(sk); - if (likely(up->pending)) { - if (unlikely(up->pending != AF_INET)) { - release_sock(sk); - return -EINVAL; - } - goto do_append_data; - } - release_sock(sk); - } - ulen += sizeof(struct udphdr); - - /* - * Get and verify the address. - */ - if (msg->msg_name) { - struct sockaddr_in * usin = (struct sockaddr_in*)msg->msg_name; - if (msg->msg_namelen < sizeof(*usin)) - return -EINVAL; - if (usin->sin_family != AF_INET) { - if (usin->sin_family != AF_UNSPEC) - return -EAFNOSUPPORT; - } - - daddr = usin->sin_addr.s_addr; - dport = usin->sin_port; - if (dport == 0) - return -EINVAL; - } else { - if (sk->sk_state != TCP_ESTABLISHED) - return -EDESTADDRREQ; - daddr = inet->daddr; - dport = inet->dport; - /* Open fast path for connected socket. - Route will not be used, if at least one option is set. - */ - connected = 1; - } - ipc.addr = inet->saddr; - - ipc.oif = sk->sk_bound_dev_if; - if (msg->msg_controllen) { - err = ip_cmsg_send(msg, &ipc); - if (err) - return err; - if (ipc.opt) - free = 1; - connected = 0; - } - if (!ipc.opt) - ipc.opt = inet->opt; - - saddr = ipc.addr; - ipc.addr = faddr = daddr; - - if (ipc.opt && ipc.opt->srr) { - if (!daddr) - return -EINVAL; - faddr = ipc.opt->faddr; - connected = 0; - } - tos = RT_TOS(inet->tos); - if (sock_flag(sk, SOCK_LOCALROUTE) || - (msg->msg_flags & MSG_DONTROUTE) || - (ipc.opt && ipc.opt->is_strictroute)) { - tos |= RTO_ONLINK; - connected = 0; - } - - if (ipv4_is_multicast(daddr)) { - if (!ipc.oif) - ipc.oif = inet->mc_index; - if (!saddr) - saddr = inet->mc_addr; - connected = 0; - } - - if (connected) - rt = (struct rtable*)sk_dst_check(sk, 0); - - if (rt == NULL) { - struct flowi fl = { .oif = ipc.oif, - .nl_u = { .ip4_u = - { .daddr = faddr, - .saddr = saddr, - .tos = tos } }, - .proto = sk->sk_protocol, - .uli_u = { .ports = - { .sport = inet->sport, - .dport = dport } } }; - security_sk_classify_flow(sk, &fl); - err = ip_route_output_flow(&init_net, &rt, &fl, sk, 1); - if (err) { - if (err == -ENETUNREACH) - IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); - goto out; - } - - err = -EACCES; - if ((rt->rt_flags & RTCF_BROADCAST) && - !sock_flag(sk, SOCK_BROADCAST)) - goto out; - if (connected) - sk_dst_set(sk, dst_clone(&rt->u.dst)); - } - - if (msg->msg_flags&MSG_CONFIRM) - goto do_confirm; -back_from_confirm: - - saddr = rt->rt_src; - if (!ipc.addr) - daddr = ipc.addr = rt->rt_dst; - - lock_sock(sk); - if (unlikely(up->pending)) { - /* The socket is already corked while preparing it. */ - /* ... which is an evident application bug. --ANK */ - release_sock(sk); - - LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 2\n"); - err = -EINVAL; - goto out; - } - /* - * Now cork the socket to pend data. - */ - inet->cork.fl.fl4_dst = daddr; - inet->cork.fl.fl_ip_dport = dport; - inet->cork.fl.fl4_src = saddr; - inet->cork.fl.fl_ip_sport = inet->sport; - up->pending = AF_INET; - -do_append_data: - up->len += ulen; - getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag; - err = ip_append_data(sk, getfrag, msg->msg_iov, ulen, - sizeof(struct udphdr), &ipc, rt, - corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags); - if (err) - udp_flush_pending_frames(sk); - else if (!corkreq) - err = udp_push_pending_frames(sk); - else if (unlikely(skb_queue_empty(&sk->sk_write_queue))) - up->pending = 0; - release_sock(sk); - -out: - ip_rt_put(rt); - if (free) - kfree(ipc.opt); - if (!err) - return len; - /* - * ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting - * ENOBUFS might not be good (it's not tunable per se), but otherwise - * we don't have a good statistic (IpOutDiscards but it can be too many - * things). We could add another new stat but at least for now that - * seems like overkill. - */ - if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) { - UDP_INC_STATS_USER(UDP_MIB_SNDBUFERRORS, is_udplite); - } - return err; - -do_confirm: - dst_confirm(&rt->u.dst); - if (!(msg->msg_flags&MSG_PROBE) || len) - goto back_from_confirm; - err = 0; - goto out; -} - -int udp_sendpage(struct sock *sk, struct page *page, int offset, - size_t size, int flags) -{ - struct udp_sock *up = udp_sk(sk); - int ret; - - if (!up->pending) { - struct msghdr msg = { .msg_flags = flags|MSG_MORE }; - - /* Call udp_sendmsg to specify destination address which - * sendpage interface can't pass. - * This will succeed only when the socket is connected. - */ - ret = udp_sendmsg(NULL, sk, &msg, 0); - if (ret < 0) - return ret; - } - - lock_sock(sk); - - if (unlikely(!up->pending)) { - release_sock(sk); - - LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 3\n"); - return -EINVAL; - } - - ret = ip_append_page(sk, page, offset, size, flags); - if (ret == -EOPNOTSUPP) { - release_sock(sk); - return sock_no_sendpage(sk->sk_socket, page, offset, - size, flags); - } - if (ret < 0) { - udp_flush_pending_frames(sk); - goto out; - } - - up->len += size; - if (!(up->corkflag || (flags&MSG_MORE))) - ret = udp_push_pending_frames(sk); - if (!ret) - ret = size; -out: - release_sock(sk); - return ret; -} - -/* - * This should be easy, if there is something there we - * return it, otherwise we block. - */ - -int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len, int noblock, int flags, int *addr_len) -{ - struct inet_sock *inet = inet_sk(sk); - struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name; - struct sk_buff *skb; - unsigned int ulen, copied; - int peeked; - int err; - int is_udplite = IS_UDPLITE(sk); - - /* - * Check any passed addresses - */ - if (addr_len) - *addr_len=sizeof(*sin); - - if (flags & MSG_ERRQUEUE) - return ip_recv_error(sk, msg, len); - -try_again: - skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), - &peeked, &err); - if (!skb) - goto out; - - ulen = skb->len - sizeof(struct udphdr); - copied = len; - if (copied > ulen) - copied = ulen; - else if (copied < ulen) - msg->msg_flags |= MSG_TRUNC; - - /* - * If checksum is needed at all, try to do it while copying the - * data. If the data is truncated, or if we only want a partial - * coverage checksum (UDP-Lite), do it before the copy. - */ - - if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) { - if (udp_lib_checksum_complete(skb)) - goto csum_copy_err; - } - - if (skb_csum_unnecessary(skb)) - err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr), - msg->msg_iov, copied ); - else { - err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov); - - if (err == -EINVAL) - goto csum_copy_err; - } - - if (err) - goto out_free; - - if (!peeked) - UDP_INC_STATS_USER(UDP_MIB_INDATAGRAMS, is_udplite); - - sock_recv_timestamp(msg, sk, skb); - - /* Copy the address. */ - if (sin) - { - sin->sin_family = AF_INET; - sin->sin_port = udp_hdr(skb)->source; - sin->sin_addr.s_addr = ip_hdr(skb)->saddr; - memset(sin->sin_zero, 0, sizeof(sin->sin_zero)); - } - if (inet->cmsg_flags) - ip_cmsg_recv(msg, skb); - - err = copied; - if (flags & MSG_TRUNC) - err = ulen; - -out_free: - lock_sock(sk); - skb_free_datagram(sk, skb); - release_sock(sk); -out: - return err; - -csum_copy_err: - lock_sock(sk); - if (!skb_kill_datagram(sk, skb, flags)) - UDP_INC_STATS_USER(UDP_MIB_INERRORS, is_udplite); - release_sock(sk); - - if (noblock) - return -EAGAIN; - goto try_again; -} - - -/* returns: - * -1: error - * 0: success - * >0: "udp encap" protocol resubmission - * - * Note that in the success and error cases, the skb is assumed to - * have either been requeued or freed. - */ -int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb) -{ - struct udp_sock *up = udp_sk(sk); - int rc; - int is_udplite = IS_UDPLITE(sk); - - /* - * Charge it to the socket, dropping if the queue is full. - */ - if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) - goto drop; - nf_reset(skb); - - if (up->encap_type) { - /* - * This is an encapsulation socket so pass the skb to - * the socket's udp_encap_rcv() hook. Otherwise, just - * fall through and pass this up the UDP socket. - * up->encap_rcv() returns the following value: - * =0 if skb was successfully passed to the encap - * handler or was discarded by it. - * >0 if skb should be passed on to UDP. - * <0 if skb should be resubmitted as proto -N - */ - - /* if we're overly short, let UDP handle it */ - if (skb->len > sizeof(struct udphdr) && - up->encap_rcv != NULL) { - int ret; - - ret = (*up->encap_rcv)(sk, skb); - if (ret <= 0) { - UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS, - is_udplite); - return -ret; - } - } - - /* FALLTHROUGH -- it's a UDP Packet */ - } - - /* - * UDP-Lite specific tests, ignored on UDP sockets - */ - if ((is_udplite & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) { - - /* - * MIB statistics other than incrementing the error count are - * disabled for the following two types of errors: these depend - * on the application settings, not on the functioning of the - * protocol stack as such. - * - * RFC 3828 here recommends (sec 3.3): "There should also be a - * way ... to ... at least let the receiving application block - * delivery of packets with coverage values less than a value - * provided by the application." - */ - if (up->pcrlen == 0) { /* full coverage was set */ - LIMIT_NETDEBUG(KERN_WARNING "UDPLITE: partial coverage " - "%d while full coverage %d requested\n", - UDP_SKB_CB(skb)->cscov, skb->len); - goto drop; - } - /* The next case involves violating the min. coverage requested - * by the receiver. This is subtle: if receiver wants x and x is - * greater than the buffersize/MTU then receiver will complain - * that it wants x while sender emits packets of smaller size y. - * Therefore the above ...()->partial_cov statement is essential. - */ - if (UDP_SKB_CB(skb)->cscov < up->pcrlen) { - LIMIT_NETDEBUG(KERN_WARNING - "UDPLITE: coverage %d too small, need min %d\n", - UDP_SKB_CB(skb)->cscov, up->pcrlen); - goto drop; - } - } - - if (sk->sk_filter) { - if (udp_lib_checksum_complete(skb)) - goto drop; - } - - if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) { - /* Note that an ENOMEM error is charged twice */ - if (rc == -ENOMEM) - UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, is_udplite); - goto drop; - } - - return 0; - -drop: - UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite); - kfree_skb(skb); - return -1; -} - -/* - * Multicasts and broadcasts go to each listener. - * - * Note: called only from the BH handler context, - * so we don't need to lock the hashes. - */ -static int __udp4_lib_mcast_deliver(struct sk_buff *skb, - struct udphdr *uh, - __be32 saddr, __be32 daddr, - struct hlist_head udptable[]) -{ - struct sock *sk; - int dif; - - read_lock(&udp_hash_lock); - sk = sk_head(&udptable[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]); - dif = skb->dev->ifindex; - sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif); - if (sk) { - struct sock *sknext = NULL; - - do { - struct sk_buff *skb1 = skb; - - sknext = udp_v4_mcast_next(sk_next(sk), uh->dest, daddr, - uh->source, saddr, dif); - if (sknext) - skb1 = skb_clone(skb, GFP_ATOMIC); - - if (skb1) { - int ret = 0; - - bh_lock_sock_nested(sk); - if (!sock_owned_by_user(sk)) - ret = udp_queue_rcv_skb(sk, skb1); - else - sk_add_backlog(sk, skb1); - bh_unlock_sock(sk); - - if (ret > 0) - /* we should probably re-process instead - * of dropping packets here. */ - kfree_skb(skb1); - } - sk = sknext; - } while (sknext); - } else - kfree_skb(skb); - read_unlock(&udp_hash_lock); - return 0; -} - -/* Initialize UDP checksum. If exited with zero value (success), - * CHECKSUM_UNNECESSARY means, that no more checks are required. - * Otherwise, csum completion requires chacksumming packet body, - * including udp header and folding it to skb->csum. - */ -static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh, - int proto) -{ - const struct iphdr *iph; - int err; - - UDP_SKB_CB(skb)->partial_cov = 0; - UDP_SKB_CB(skb)->cscov = skb->len; - - if (IS_PROTO_UDPLITE(proto)) { - err = udplite_checksum_init(skb, uh); - if (err) - return err; - } - - iph = ip_hdr(skb); - if (uh->check == 0) { - skb->ip_summed = CHECKSUM_UNNECESSARY; - } else if (skb->ip_summed == CHECKSUM_COMPLETE) { - if (!csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len, - proto, skb->csum)) - skb->ip_summed = CHECKSUM_UNNECESSARY; - } - if (!skb_csum_unnecessary(skb)) - skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr, - skb->len, proto, 0); - /* Probably, we should checksum udp header (it should be in cache - * in any case) and data in tiny packets (< rx copybreak). - */ - - return 0; -} - -/* - * All we need to do is get the socket, and then do a checksum. - */ - -int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], - int proto) -{ - struct sock *sk; - struct udphdr *uh = udp_hdr(skb); - unsigned short ulen; - struct rtable *rt = skb->rtable; - __be32 saddr = ip_hdr(skb)->saddr; - __be32 daddr = ip_hdr(skb)->daddr; - - /* - * Validate the packet. - */ - if (!pskb_may_pull(skb, sizeof(struct udphdr))) - goto drop; /* No space for header. */ - - ulen = ntohs(uh->len); - if (ulen > skb->len) - goto short_packet; - - if (IS_PROTO_UDPLITE(proto)) { - /* UDP validates ulen. */ - if (ulen < sizeof(*uh) || pskb_trim_rcsum(skb, ulen)) - goto short_packet; - uh = udp_hdr(skb); - } - - if (udp4_csum_init(skb, uh, proto)) - goto csum_error; - - if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST)) - return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable); - - sk = __udp4_lib_lookup(skb->dev->nd_net, saddr, uh->source, daddr, - uh->dest, inet_iif(skb), udptable); - - if (sk != NULL) { - int ret = 0; - bh_lock_sock_nested(sk); - if (!sock_owned_by_user(sk)) - ret = udp_queue_rcv_skb(sk, skb); - else - sk_add_backlog(sk, skb); - bh_unlock_sock(sk); - sock_put(sk); - - /* a return value > 0 means to resubmit the input, but - * it wants the return to be -protocol, or 0 - */ - if (ret > 0) - return -ret; - return 0; - } - - if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) - goto drop; - nf_reset(skb); - - /* No socket. Drop packet silently, if checksum is wrong */ - if (udp_lib_checksum_complete(skb)) - goto csum_error; - - UDP_INC_STATS_BH(UDP_MIB_NOPORTS, IS_PROTO_UDPLITE(proto)); - icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); - - /* - * Hmm. We got an UDP packet to a port to which we - * don't wanna listen. Ignore it. - */ - kfree_skb(skb); - return 0; - -short_packet: - LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: short packet: From %u.%u.%u.%u:%u %d/%d to %u.%u.%u.%u:%u\n", - IS_PROTO_UDPLITE(proto) ? "-Lite" : "", - NIPQUAD(saddr), - ntohs(uh->source), - ulen, - skb->len, - NIPQUAD(daddr), - ntohs(uh->dest)); - goto drop; - -csum_error: - /* - * RFC1122: OK. Discards the bad packet silently (as far as - * the network is concerned, anyway) as per 4.1.3.4 (MUST). - */ - LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n", - IS_PROTO_UDPLITE(proto) ? "-Lite" : "", - NIPQUAD(saddr), - ntohs(uh->source), - NIPQUAD(daddr), - ntohs(uh->dest), - ulen); -drop: - UDP_INC_STATS_BH(UDP_MIB_INERRORS, IS_PROTO_UDPLITE(proto)); - kfree_skb(skb); - return 0; -} - -int udp_rcv(struct sk_buff *skb) -{ - return __udp4_lib_rcv(skb, udp_hash, IPPROTO_UDP); -} - -int udp_destroy_sock(struct sock *sk) -{ - lock_sock(sk); - udp_flush_pending_frames(sk); - release_sock(sk); - return 0; -} - -int udp_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, int optlen) -{ - if (IS_SOL_UDPFAMILY(level)) - return udp_lib_setsockopt(sk, level, optname, optval, optlen, - udp_push_pending_frames); - return ip_setsockopt(sk, level, optname, optval, optlen); -} - -#ifdef CONFIG_COMPAT -int compat_udp_setsockopt(struct sock *sk, int level, int optname, - char __user *optval, int optlen) -{ - if (IS_SOL_UDPFAMILY(level)) - return udp_lib_setsockopt(sk, level, optname, optval, optlen, - udp_push_pending_frames); - return compat_ip_setsockopt(sk, level, optname, optval, optlen); -} -#endif - -int udp_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen) -{ - if (IS_SOL_UDPFAMILY(level)) - return udp_lib_getsockopt(sk, level, optname, optval, optlen); - return ip_getsockopt(sk, level, optname, optval, optlen); -} - -#ifdef CONFIG_COMPAT -int compat_udp_getsockopt(struct sock *sk, int level, int optname, - char __user *optval, int __user *optlen) -{ - if (IS_SOL_UDPFAMILY(level)) - return udp_lib_getsockopt(sk, level, optname, optval, optlen); - return compat_ip_getsockopt(sk, level, optname, optval, optlen); -} -#endif - -/* ------------------------------------------------------------------------ */ -DEFINE_PROTO_INUSE(udp) - -struct proto udp_prot = { - .name = "UDP", - .owner = THIS_MODULE, - .close = udp_lib_close, - .connect = ip4_datagram_connect, - .disconnect = udp_disconnect, - .ioctl = udp_ioctl, - .destroy = udp_destroy_sock, - .setsockopt = udp_setsockopt, - .getsockopt = udp_getsockopt, - .sendmsg = udp_sendmsg, - .recvmsg = udp_recvmsg, - .sendpage = udp_sendpage, - .backlog_rcv = udp_queue_rcv_skb, - .hash = udp_lib_hash, - .unhash = udp_lib_unhash, - .get_port = udp_v4_get_port, - .memory_allocated = &udp_memory_allocated, - .sysctl_mem = sysctl_udp_mem, - .sysctl_wmem = &sysctl_udp_wmem_min, - .sysctl_rmem = &sysctl_udp_rmem_min, - .obj_size = sizeof(struct udp_sock), -#ifdef CONFIG_COMPAT - .compat_setsockopt = compat_udp_setsockopt, - .compat_getsockopt = compat_udp_getsockopt, -#endif - REF_PROTO_INUSE(udp) -}; - -/* ------------------------------------------------------------------------ */ -static void udp4_format_sock(struct sock *sp, char *tmpbuf, int bucket) -{ - struct inet_sock *inet = inet_sk(sp); - __be32 dest = inet->daddr; - __be32 src = inet->rcv_saddr; - __u16 destp = ntohs(inet->dport); - __u16 srcp = ntohs(inet->sport); - - sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X" - " %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p", - bucket, src, srcp, dest, destp, sp->sk_state, - atomic_read(&sp->sk_wmem_alloc), - atomic_read(&sp->sk_rmem_alloc), - 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp), - atomic_read(&sp->sk_refcnt), sp); -} - -int udp4_seq_show(struct seq_file *seq, void *v) -{ - if (v == SEQ_START_TOKEN) - seq_printf(seq, "%-127s\n", - " sl local_address rem_address st tx_queue " - "rx_queue tr tm->when retrnsmt uid timeout " - "inode"); - else { - char tmpbuf[129]; - struct udp_iter_state *state = seq->private; - - udp4_format_sock(v, tmpbuf, state->bucket); - seq_printf(seq, "%-127s\n", tmpbuf); - } - return 0; -} - -/* ------------------------------------------------------------------------ */ -#ifdef CONFIG_PROC_FS -static struct file_operations udp4_seq_fops; -static struct udp_seq_afinfo udp4_seq_afinfo = { - .owner = THIS_MODULE, - .name = "udp", - .family = AF_INET, - .hashtable = udp_hash, - .seq_show = udp4_seq_show, - .seq_fops = &udp4_seq_fops, -}; - -int __init udp4_proc_init(void) -{ - return udp_proc_register(&udp4_seq_afinfo); -} - -void udp4_proc_exit(void) -{ - udp_proc_unregister(&udp4_seq_afinfo); -} -#endif /* CONFIG_PROC_FS */ - -EXPORT_SYMBOL(udp_prot); -EXPORT_SYMBOL(udp_sendmsg); - diff --git a/net/ipv4/udplite_ipv4.c b/net/ipv4/udplite.c index d49c6d68c8a..d49c6d68c8a 100644 --- a/net/ipv4/udplite_ipv4.c +++ b/net/ipv4/udplite.c diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 107051f7c22..ae14617e607 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -6,7 +6,7 @@ obj-$(CONFIG_IPV6) += ipv6.o ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \ addrlabel.o \ - route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp_ipv6.o \ + route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \ raw.o protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \ exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o @@ -17,7 +17,6 @@ ipv6-$(CONFIG_NETFILTER) += netfilter.o ipv6-$(CONFIG_IPV6_MULTIPLE_TABLES) += fib6_rules.o ipv6-$(CONFIG_PROC_FS) += proc.o ipv6-$(CONFIG_SYN_COOKIES) += syncookies.o -ipv6-$(CONFIG_IP_UDPLITE) += udplite_ipv6.o ipv6-objs += $(ipv6-y) diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index afe9276d042..730a861b8f4 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -813,16 +813,12 @@ static int __init init_ipv6_mibs(void) goto err_icmpmsg_mib; if (snmp_mib_init((void **)udp_stats_in6, sizeof (struct udp_mib)) < 0) goto err_udp_mib; -#ifdef CONFIG_IP_UDPLITE if (snmp_mib_init((void **)udplite_stats_in6, sizeof (struct udp_mib)) < 0) goto err_udplite_mib; -#endif return 0; -#ifdef CONFIG_IP_UDPLITE err_udplite_mib: -#endif snmp_mib_free((void **)udp_stats_in6); err_udp_mib: snmp_mib_free((void **)icmpv6msg_statistics); @@ -841,9 +837,7 @@ static void cleanup_ipv6_mibs(void) snmp_mib_free((void **)icmpv6_statistics); snmp_mib_free((void **)icmpv6msg_statistics); snmp_mib_free((void **)udp_stats_in6); -#ifdef CONFIG_IP_UDPLITE snmp_mib_free((void **)udplite_stats_in6); -#endif } static int inet6_net_init(struct net *net) @@ -888,11 +882,9 @@ static int __init inet6_init(void) if (err) goto out_unregister_tcp_proto; -#ifdef CONFIG_IP_UDPLITE err = proto_register(&udplitev6_prot, 1); if (err) goto out_unregister_udp_proto; -#endif err = proto_register(&rawv6_prot, 1); if (err) @@ -1063,10 +1055,8 @@ out_sock_register_fail: out_unregister_raw_proto: proto_unregister(&rawv6_prot); out_unregister_udplite_proto: -#ifdef CONFIG_IP_UDPLITE proto_unregister(&udplitev6_prot); out_unregister_udp_proto: -#endif proto_unregister(&udpv6_prot); out_unregister_tcp_proto: proto_unregister(&tcpv6_prot); @@ -1085,9 +1075,7 @@ static void __exit inet6_exit(void) ipv6_sysctl_unregister(); #endif udpv6_exit(); -#ifdef CONFIG_IP_UDPLITE udplitev6_exit(); -#endif tcpv6_exit(); /* Cleanup code parts. */ @@ -1117,9 +1105,7 @@ static void __exit inet6_exit(void) unregister_pernet_subsys(&inet6_net_ops); cleanup_ipv6_mibs(); proto_unregister(&rawv6_prot); -#ifdef CONFIG_IP_UDPLITE proto_unregister(&udplitev6_prot); -#endif proto_unregister(&udpv6_prot); proto_unregister(&tcpv6_prot); } diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 3bbfdff698d..5eea6fa506e 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -127,9 +127,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, struct sk_buff *pktopt; if (sk->sk_protocol != IPPROTO_UDP && -#ifdef CONFIG_IP_UDPLITE sk->sk_protocol != IPPROTO_UDPLITE && -#endif sk->sk_protocol != IPPROTO_TCP) break; @@ -169,7 +167,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, } else { struct proto *prot = &udp_prot; - if (IS_PROTO_UDPLITE(sk->sk_protocol)) + if (sk->sk_protocol == IPPROTO_UDPLITE) prot = &udplite_prot; local_bh_disable(); sock_prot_inuse_add(sk->sk_prot, -1); @@ -734,9 +732,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, switch (optname) { case IPV6_ADDRFORM: if (sk->sk_protocol != IPPROTO_UDP && -#ifdef CONFIG_IP_UDPLITE sk->sk_protocol != IPPROTO_UDPLITE && -#endif sk->sk_protocol != IPPROTO_TCP) return -EINVAL; if (sk->sk_state != TCP_ESTABLISHED) diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 2453f2229ef..8a5be290c71 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -39,10 +39,8 @@ static int sockstat6_seq_show(struct seq_file *seq, void *v) sock_prot_inuse_get(&tcpv6_prot)); seq_printf(seq, "UDP6: inuse %d\n", sock_prot_inuse_get(&udpv6_prot)); -#ifdef CONFIG_IP_UDPLITE seq_printf(seq, "UDPLITE6: inuse %d\n", sock_prot_inuse_get(&udplitev6_prot)); -#endif seq_printf(seq, "RAW6: inuse %d\n", sock_prot_inuse_get(&rawv6_prot)); seq_printf(seq, "FRAG6: inuse %d memory %d\n", @@ -113,7 +111,6 @@ static struct snmp_mib snmp6_udp6_list[] = { SNMP_MIB_SENTINEL }; -#ifdef CONFIG_IP_UDPLITE static struct snmp_mib snmp6_udplite6_list[] = { SNMP_MIB_ITEM("UdpLite6InDatagrams", UDP_MIB_INDATAGRAMS), SNMP_MIB_ITEM("UdpLite6NoPorts", UDP_MIB_NOPORTS), @@ -121,7 +118,6 @@ static struct snmp_mib snmp6_udplite6_list[] = { SNMP_MIB_ITEM("UdpLite6OutDatagrams", UDP_MIB_OUTDATAGRAMS), SNMP_MIB_SENTINEL }; -#endif static void snmp6_seq_show_icmpv6msg(struct seq_file *seq, void **mib) { @@ -180,9 +176,7 @@ static int snmp6_seq_show(struct seq_file *seq, void *v) snmp6_seq_show_item(seq, (void **)icmpv6_statistics, snmp6_icmp6_list); snmp6_seq_show_icmpv6msg(seq, (void **)icmpv6msg_statistics); snmp6_seq_show_item(seq, (void **)udp_stats_in6, snmp6_udp6_list); -#ifdef CONFIG_IP_UDPLITE snmp6_seq_show_item(seq, (void **)udplite_stats_in6, snmp6_udplite6_list); -#endif } return 0; } diff --git a/net/ipv6/udp_ipv6.c b/net/ipv6/udp.c index 55feac7ba71..53739de829d 100644 --- a/net/ipv6/udp_ipv6.c +++ b/net/ipv6/udp.c @@ -400,7 +400,7 @@ static inline int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, UDP_SKB_CB(skb)->partial_cov = 0; UDP_SKB_CB(skb)->cscov = skb->len; - if (IS_PROTO_UDPLITE(proto)) { + if (proto == IPPROTO_UDPLITE) { err = udplite_checksum_init(skb, uh); if (err) return err; @@ -489,7 +489,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], if (udp_lib_checksum_complete(skb)) goto discard; - UDP6_INC_STATS_BH(UDP_MIB_NOPORTS, IS_PROTO_UDPLITE(proto)); + UDP6_INC_STATS_BH(UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE); icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0, dev); @@ -510,11 +510,11 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[], short_packet: LIMIT_NETDEBUG(KERN_DEBUG "UDP%sv6: short packet: %d/%u\n", - IS_PROTO_UDPLITE(proto) ? "-Lite" : "", + proto == IPPROTO_UDPLITE ? "-Lite" : "", ulen, skb->len); discard: - UDP6_INC_STATS_BH(UDP_MIB_INERRORS, IS_PROTO_UDPLITE(proto)); + UDP6_INC_STATS_BH(UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE); kfree_skb(skb); return 0; } @@ -890,7 +890,7 @@ int udpv6_destroy_sock(struct sock *sk) int udpv6_setsockopt(struct sock *sk, int level, int optname, char __user *optval, int optlen) { - if (IS_SOL_UDPFAMILY(level)) + if (level == SOL_UDP || level == SOL_UDPLITE) return udp_lib_setsockopt(sk, level, optname, optval, optlen, udp_v6_push_pending_frames); return ipv6_setsockopt(sk, level, optname, optval, optlen); @@ -900,7 +900,7 @@ int udpv6_setsockopt(struct sock *sk, int level, int optname, int compat_udpv6_setsockopt(struct sock *sk, int level, int optname, char __user *optval, int optlen) { - if (IS_SOL_UDPFAMILY(level)) + if (level == SOL_UDP || level == SOL_UDPLITE) return udp_lib_setsockopt(sk, level, optname, optval, optlen, udp_v6_push_pending_frames); return compat_ipv6_setsockopt(sk, level, optname, optval, optlen); @@ -910,7 +910,7 @@ int compat_udpv6_setsockopt(struct sock *sk, int level, int optname, int udpv6_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { - if (IS_SOL_UDPFAMILY(level)) + if (level == SOL_UDP || level == SOL_UDPLITE) return udp_lib_getsockopt(sk, level, optname, optval, optlen); return ipv6_getsockopt(sk, level, optname, optval, optlen); } @@ -919,7 +919,7 @@ int udpv6_getsockopt(struct sock *sk, int level, int optname, int compat_udpv6_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { - if (IS_SOL_UDPFAMILY(level)) + if (level == SOL_UDP || level == SOL_UDPLITE) return udp_lib_getsockopt(sk, level, optname, optval, optlen); return compat_ipv6_getsockopt(sk, level, optname, optval, optlen); } diff --git a/net/ipv6/udplite_ipv6.c b/net/ipv6/udplite.c index 87d4202522e..87d4202522e 100644 --- a/net/ipv6/udplite_ipv6.c +++ b/net/ipv6/udplite.c |