diff options
Diffstat (limited to 'net/ipv4')
81 files changed, 1551 insertions, 837 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index e55136ae09f..011cca7ae02 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -456,6 +456,14 @@ config TCP_CONG_BIC increase provides TCP friendliness. See http://www.csc.ncsu.edu/faculty/rhee/export/bitcp/ +config TCP_CONG_CUBIC + tristate "CUBIC TCP" + default m + ---help--- + This is version 2.0 of BIC-TCP which uses a cubic growth function + among other techniques. + See http://www.csc.ncsu.edu/faculty/rhee/export/bitcp/cubic-paper.pdf + config TCP_CONG_WESTWOOD tristate "TCP Westwood+" default m diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index f0435d00db6..c54edd76de0 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -34,6 +34,7 @@ obj-$(CONFIG_INET_DIAG) += inet_diag.o obj-$(CONFIG_IP_ROUTE_MULTIPATH_CACHED) += multipath.o obj-$(CONFIG_INET_TCP_DIAG) += tcp_diag.o obj-$(CONFIG_TCP_CONG_BIC) += tcp_bic.o +obj-$(CONFIG_TCP_CONG_CUBIC) += tcp_cubic.o obj-$(CONFIG_TCP_CONG_WESTWOOD) += tcp_westwood.o obj-$(CONFIG_TCP_CONG_HSTCP) += tcp_highspeed.o obj-$(CONFIG_TCP_CONG_HYBLA) += tcp_hybla.o diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index d368cf24900..966a071a408 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -93,6 +93,7 @@ #include <linux/smp_lock.h> #include <linux/inet.h> #include <linux/igmp.h> +#include <linux/inetdevice.h> #include <linux/netdevice.h> #include <net/ip.h> #include <net/protocol.h> @@ -302,6 +303,7 @@ lookup_protocol: sk->sk_reuse = 1; inet = inet_sk(sk); + inet->is_icsk = INET_PROTOSW_ICSK & answer_flags; if (SOCK_RAW == sock->type) { inet->num = protocol; @@ -775,16 +777,16 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) err = devinet_ioctl(cmd, (void __user *)arg); break; default: - if (!sk->sk_prot->ioctl || - (err = sk->sk_prot->ioctl(sk, cmd, arg)) == - -ENOIOCTLCMD) - err = dev_ioctl(cmd, (void __user *)arg); + if (sk->sk_prot->ioctl) + err = sk->sk_prot->ioctl(sk, cmd, arg); + else + err = -ENOIOCTLCMD; break; } return err; } -struct proto_ops inet_stream_ops = { +const struct proto_ops inet_stream_ops = { .family = PF_INET, .owner = THIS_MODULE, .release = inet_release, @@ -805,7 +807,7 @@ struct proto_ops inet_stream_ops = { .sendpage = tcp_sendpage }; -struct proto_ops inet_dgram_ops = { +const struct proto_ops inet_dgram_ops = { .family = PF_INET, .owner = THIS_MODULE, .release = inet_release, @@ -830,7 +832,7 @@ struct proto_ops inet_dgram_ops = { * For SOCK_RAW sockets; should be the same as inet_dgram_ops but without * udp_poll */ -static struct proto_ops inet_sockraw_ops = { +static const struct proto_ops inet_sockraw_ops = { .family = PF_INET, .owner = THIS_MODULE, .release = inet_release, @@ -869,7 +871,8 @@ static struct inet_protosw inetsw_array[] = .ops = &inet_stream_ops, .capability = -1, .no_check = 0, - .flags = INET_PROTOSW_PERMANENT, + .flags = INET_PROTOSW_PERMANENT | + INET_PROTOSW_ICSK, }, { diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 035ad2c9e1b..aed537fa2c8 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -6,6 +6,7 @@ #include <linux/crypto.h> #include <linux/pfkeyv2.h> #include <net/icmp.h> +#include <net/protocol.h> #include <asm/scatterlist.h> diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index b425748f02d..37432088fe6 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -86,6 +86,7 @@ #include <linux/in.h> #include <linux/mm.h> #include <linux/inet.h> +#include <linux/inetdevice.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> #include <linux/fddidevice.h> diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 04a6fe3e95a..7b9bb28e2ee 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -58,6 +58,7 @@ #endif #include <linux/kmod.h> +#include <net/arp.h> #include <net/ip.h> #include <net/route.h> #include <net/ip_fib.h> diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 1b18ce66e7b..73bfcae8af9 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -9,6 +9,7 @@ #include <linux/pfkeyv2.h> #include <linux/random.h> #include <net/icmp.h> +#include <net/protocol.h> #include <net/udp.h> /* decapsulation data for use when post-processing */ diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 19b1b984d68..18f5e509281 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -30,6 +30,7 @@ #include <linux/errno.h> #include <linux/in.h> #include <linux/inet.h> +#include <linux/inetdevice.h> #include <linux/netdevice.h> #include <linux/if_arp.h> #include <linux/skbuff.h> diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c index 7ea0209cb16..e2890ec8159 100644 --- a/net/ipv4/fib_hash.c +++ b/net/ipv4/fib_hash.c @@ -29,6 +29,7 @@ #include <linux/errno.h> #include <linux/in.h> #include <linux/inet.h> +#include <linux/inetdevice.h> #include <linux/netdevice.h> #include <linux/if_arp.h> #include <linux/proc_fs.h> diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 0b298bbc151..0dd4d06e456 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -33,6 +33,7 @@ #include <linux/errno.h> #include <linux/in.h> #include <linux/inet.h> +#include <linux/inetdevice.h> #include <linux/netdevice.h> #include <linux/if_arp.h> #include <linux/proc_fs.h> diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 6d2a6ac070e..ef4724de735 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -29,6 +29,7 @@ #include <linux/errno.h> #include <linux/in.h> #include <linux/inet.h> +#include <linux/inetdevice.h> #include <linux/netdevice.h> #include <linux/if_arp.h> #include <linux/proc_fs.h> @@ -36,6 +37,7 @@ #include <linux/netlink.h> #include <linux/init.h> +#include <net/arp.h> #include <net/ip.h> #include <net/protocol.h> #include <net/route.h> diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 705e3ce86df..e320b32373e 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -41,6 +41,13 @@ * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. + * + * Substantial contributions to this work comes from: + * + * David S. Miller, <davem@davemloft.net> + * Stephen Hemminger <shemminger@osdl.org> + * Paul E. McKenney <paulmck@us.ibm.com> + * Patrick McHardy <kaber@trash.net> */ #define VERSION "0.404" @@ -59,6 +66,7 @@ #include <linux/errno.h> #include <linux/in.h> #include <linux/inet.h> +#include <linux/inetdevice.h> #include <linux/netdevice.h> #include <linux/if_arp.h> #include <linux/proc_fs.h> diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 92e23b2ad4d..be5a519cd2f 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -73,6 +73,7 @@ #include <linux/socket.h> #include <linux/in.h> #include <linux/inet.h> +#include <linux/inetdevice.h> #include <linux/netdevice.h> #include <linux/string.h> #include <linux/netfilter_ipv4.h> diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 4a195c724f0..34758118c10 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -91,6 +91,8 @@ #include <linux/if_arp.h> #include <linux/rtnetlink.h> #include <linux/times.h> + +#include <net/arp.h> #include <net/ip.h> #include <net/protocol.h> #include <net/route.h> diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 3fe021f1a56..ae20281d8de 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -37,7 +37,8 @@ EXPORT_SYMBOL(inet_csk_timer_bug_msg); */ int sysctl_local_port_range[2] = { 1024, 4999 }; -static inline int inet_csk_bind_conflict(struct sock *sk, struct inet_bind_bucket *tb) +int inet_csk_bind_conflict(const struct sock *sk, + const struct inet_bind_bucket *tb) { const u32 sk_rcv_saddr = inet_rcv_saddr(sk); struct sock *sk2; @@ -62,11 +63,15 @@ static inline int inet_csk_bind_conflict(struct sock *sk, struct inet_bind_bucke return node != NULL; } +EXPORT_SYMBOL_GPL(inet_csk_bind_conflict); + /* Obtain a reference to a local port for the given sock, * if snum is zero it means select any available local port. */ int inet_csk_get_port(struct inet_hashinfo *hashinfo, - struct sock *sk, unsigned short snum) + struct sock *sk, unsigned short snum, + int (*bind_conflict)(const struct sock *sk, + const struct inet_bind_bucket *tb)) { struct inet_bind_hashbucket *head; struct hlist_node *node; @@ -125,7 +130,7 @@ tb_found: goto success; } else { ret = 1; - if (inet_csk_bind_conflict(sk, tb)) + if (bind_conflict(sk, tb)) goto fail_unlock; } } @@ -380,7 +385,7 @@ struct request_sock *inet_csk_search_req(const struct sock *sk, EXPORT_SYMBOL_GPL(inet_csk_search_req); void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, - const unsigned timeout) + unsigned long timeout) { struct inet_connection_sock *icsk = inet_csk(sk); struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; @@ -631,3 +636,15 @@ void inet_csk_listen_stop(struct sock *sk) } EXPORT_SYMBOL_GPL(inet_csk_listen_stop); + +void inet_csk_addr2sockaddr(struct sock *sk, struct sockaddr *uaddr) +{ + struct sockaddr_in *sin = (struct sockaddr_in *)uaddr; + const struct inet_sock *inet = inet_sk(sk); + + sin->sin_family = AF_INET; + sin->sin_addr.s_addr = inet->daddr; + sin->sin_port = inet->dport; +} + +EXPORT_SYMBOL_GPL(inet_csk_addr2sockaddr); diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 39061ed53cf..c4990819204 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -112,12 +112,12 @@ static int inet_diag_fill(struct sk_buff *skb, struct sock *sk, r->idiag_inode = 0; #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) if (r->idiag_family == AF_INET6) { - const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk); + const struct inet6_timewait_sock *tw6 = inet6_twsk(sk); ipv6_addr_copy((struct in6_addr *)r->id.idiag_src, - &tcp6tw->tw_v6_rcv_saddr); + &tw6->tw_v6_rcv_saddr); ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst, - &tcp6tw->tw_v6_daddr); + &tw6->tw_v6_daddr); } #endif nlh->nlmsg_len = skb->tail - b; @@ -489,9 +489,9 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) if (r->idiag_family == AF_INET6) { ipv6_addr_copy((struct in6_addr *)r->id.idiag_src, - &tcp6_rsk(req)->loc_addr); + &inet6_rsk(req)->loc_addr); ipv6_addr_copy((struct in6_addr *)r->id.idiag_dst, - &tcp6_rsk(req)->rmt_addr); + &inet6_rsk(req)->rmt_addr); } #endif nlh->nlmsg_len = skb->tail - b; @@ -553,13 +553,13 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, entry.saddr = #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) (entry.family == AF_INET6) ? - tcp6_rsk(req)->loc_addr.s6_addr32 : + inet6_rsk(req)->loc_addr.s6_addr32 : #endif &ireq->loc_addr; entry.daddr = #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE) (entry.family == AF_INET6) ? - tcp6_rsk(req)->rmt_addr.s6_addr32 : + inet6_rsk(req)->rmt_addr.s6_addr32 : #endif &ireq->rmt_addr; entry.dport = ntohs(ireq->rmt_port); diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index e8d29fe736d..33228115cda 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -15,12 +15,14 @@ #include <linux/config.h> #include <linux/module.h> +#include <linux/random.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/wait.h> #include <net/inet_connection_sock.h> #include <net/inet_hashtables.h> +#include <net/ip.h> /* * Allocate and initialize a new local port bind bucket. @@ -163,3 +165,179 @@ struct sock *__inet_lookup_listener(const struct hlist_head *head, const u32 dad } EXPORT_SYMBOL_GPL(__inet_lookup_listener); + +/* called with local bh disabled */ +static int __inet_check_established(struct inet_timewait_death_row *death_row, + struct sock *sk, __u16 lport, + struct inet_timewait_sock **twp) +{ + struct inet_hashinfo *hinfo = death_row->hashinfo; + struct inet_sock *inet = inet_sk(sk); + u32 daddr = inet->rcv_saddr; + u32 saddr = inet->daddr; + int dif = sk->sk_bound_dev_if; + INET_ADDR_COOKIE(acookie, saddr, daddr) + const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport); + unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport); + struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); + struct sock *sk2; + const struct hlist_node *node; + struct inet_timewait_sock *tw; + + prefetch(head->chain.first); + write_lock(&head->lock); + + /* Check TIME-WAIT sockets first. */ + sk_for_each(sk2, node, &(head + hinfo->ehash_size)->chain) { + tw = inet_twsk(sk2); + + if (INET_TW_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) { + if (twsk_unique(sk, sk2, twp)) + goto unique; + else + goto not_unique; + } + } + tw = NULL; + + /* And established part... */ + sk_for_each(sk2, node, &head->chain) { + if (INET_MATCH(sk2, hash, acookie, saddr, daddr, ports, dif)) + goto not_unique; + } + +unique: + /* Must record num and sport now. Otherwise we will see + * in hash table socket with a funny identity. */ < |