diff options
Diffstat (limited to 'net/ipv4')
42 files changed, 2260 insertions, 1722 deletions
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 8603ca82710..37cf1a6ea3a 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -9,10 +9,7 @@ config IP_MULTICAST intend to participate in the MBONE, a high bandwidth network on top of the Internet which carries audio and video broadcasts. More information about the MBONE is on the WWW at - <http://www.savetz.com/mbone/>. Information about the multicast - capabilities of the various network cards is contained in - <file:Documentation/networking/multicast.txt>. For most people, it's - safe to say N. + <http://www.savetz.com/mbone/>. For most people, it's safe to say N. config IP_ADVANCED_ROUTER bool "IP: advanced router" @@ -223,10 +220,8 @@ config IP_MROUTE packets that have several destination addresses. It is needed on the MBONE, a high bandwidth network on top of the Internet which carries audio and video broadcasts. In order to do that, you would most - likely run the program mrouted. Information about the multicast - capabilities of the various network cards is contained in - <file:Documentation/networking/multicast.txt>. If you haven't heard - about it, you don't need it. + likely run the program mrouted. If you haven't heard about it, you + don't need it. config IP_MROUTE_MULTIPLE_TABLES bool "IP: multicast policy routing" diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 089cb9f3638..4b81e91c80f 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -8,10 +8,10 @@ obj-y := route.o inetpeer.o protocol.o \ inet_timewait_sock.o inet_connection_sock.o \ tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \ tcp_minisocks.o tcp_cong.o tcp_metrics.o tcp_fastopen.o \ - datagram.o raw.o udp.o udplite.o \ - arp.o icmp.o devinet.o af_inet.o igmp.o \ + tcp_offload.o datagram.o raw.o udp.o udplite.o \ + udp_offload.o arp.o icmp.o devinet.o af_inet.o igmp.o \ fib_frontend.o fib_semantics.o fib_trie.o \ - inet_fragment.o ping.o + inet_fragment.o ping.o ip_tunnel_core.o obj-$(CONFIG_NET_IP_TUNNEL) += ip_tunnel.o obj-$(CONFIG_SYSCTL) += sysctl_net_ipv4.o @@ -19,6 +19,7 @@ obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o obj-$(CONFIG_IP_MROUTE) += ipmr.o obj-$(CONFIG_NET_IPIP) += ipip.o +gre-y := gre_demux.o gre_offload.o obj-$(CONFIG_NET_IPGRE_DEMUX) += gre.o obj-$(CONFIG_NET_IPGRE) += ip_gre.o obj-$(CONFIG_NET_IPVTI) += ip_vti.o diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index d01be2a3ae5..b4d0be2b7ce 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1295,6 +1295,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb, SKB_GSO_GRE | SKB_GSO_TCPV6 | SKB_GSO_UDP_TUNNEL | + SKB_GSO_MPLS | 0))) goto out; @@ -1384,7 +1385,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, goto out_unlock; id = ntohl(*(__be32 *)&iph->id); - flush = (u16)((ntohl(*(__be32 *)iph) ^ skb_gro_len(skb)) | (id ^ IP_DF)); + flush = (u16)((ntohl(*(__be32 *)iph) ^ skb_gro_len(skb)) | (id & ~IP_DF)); id >>= 16; for (p = *head; p; p = p->next) { @@ -1406,6 +1407,7 @@ static struct sk_buff **inet_gro_receive(struct sk_buff **head, NAPI_GRO_CB(p)->flush |= (iph->ttl ^ iph2->ttl) | (iph->tos ^ iph2->tos) | + (__force int)((iph->frag_off ^ iph2->frag_off) & htons(IP_DF)) | ((u16)(ntohs(iph2->id) + NAPI_GRO_CB(p)->count) ^ id); NAPI_GRO_CB(p)->flush |= flush; @@ -1557,15 +1559,6 @@ static const struct net_protocol tcp_protocol = { .netns_ok = 1, }; -static const struct net_offload tcp_offload = { - .callbacks = { - .gso_send_check = tcp_v4_gso_send_check, - .gso_segment = tcp_tso_segment, - .gro_receive = tcp4_gro_receive, - .gro_complete = tcp4_gro_complete, - }, -}; - static const struct net_protocol udp_protocol = { .handler = udp_rcv, .err_handler = udp_err, @@ -1573,13 +1566,6 @@ static const struct net_protocol udp_protocol = { .netns_ok = 1, }; -static const struct net_offload udp_offload = { - .callbacks = { - .gso_send_check = udp4_ufo_send_check, - .gso_segment = udp4_ufo_fragment, - }, -}; - static const struct net_protocol icmp_protocol = { .handler = icmp_rcv, .err_handler = icmp_err, @@ -1679,10 +1665,10 @@ static int __init ipv4_offload_init(void) /* * Add offloads */ - if (inet_add_offload(&udp_offload, IPPROTO_UDP) < 0) + if (udpv4_offload_init() < 0) pr_crit("%s: Cannot add UDP protocol offload\n", __func__); - if (inet_add_offload(&tcp_offload, IPPROTO_TCP) < 0) - pr_crit("%s: Cannot add TCP protocol offlaod\n", __func__); + if (tcpv4_offload_init() < 0) + pr_crit("%s: Cannot add TCP protocol offload\n", __func__); dev_add_offload(&ip_packet_offload); return 0; diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 2e7f1948216..717902669d2 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -419,12 +419,9 @@ static void ah4_err(struct sk_buff *skb, u32 info) if (!x) return; - if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) { - atomic_inc(&flow_cache_genid); - rt_genid_bump(net); - + if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_AH, 0); - } else + else ipv4_redirect(skb, net, 0, 0, IPPROTO_AH, 0); xfrm_state_put(x); } diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 247ec1951c3..4429b013f26 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -1234,13 +1234,19 @@ out: static int arp_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct netdev_notifier_change_info *change_info; switch (event) { case NETDEV_CHANGEADDR: neigh_changeaddr(&arp_tbl, dev); rt_cache_flush(dev_net(dev)); break; + case NETDEV_CHANGE: + change_info = ptr; + if (change_info->flags_changed & IFF_NOARP) + neigh_changeaddr(&arp_tbl, dev); + break; default: break; } diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index dfc39d4d48b..34ca6d5a3a4 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -215,6 +215,7 @@ void in_dev_finish_destroy(struct in_device *idev) WARN_ON(idev->ifa_list); WARN_ON(idev->mc_list); + kfree(rcu_dereference_protected(idev->mc_hash, 1)); #ifdef NET_REFCNT_DEBUG pr_debug("%s: %p=%s\n", __func__, idev, dev ? dev->name : "NIL"); #endif @@ -771,7 +772,7 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh, ci = nla_data(tb[IFA_CACHEINFO]); if (!ci->ifa_valid || ci->ifa_prefered > ci->ifa_valid) { err = -EINVAL; - goto errout; + goto errout_free; } *pvalid_lft = ci->ifa_valid; *pprefered_lft = ci->ifa_prefered; @@ -779,6 +780,8 @@ static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh, return ifa; +errout_free: + inet_free_ifa(ifa); errout: return ERR_PTR(err); } @@ -1333,7 +1336,7 @@ static void inetdev_send_gratuitous_arp(struct net_device *dev, static int inetdev_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct in_device *in_dev = __in_dev_get_rtnl(dev); ASSERT_RTNL(); @@ -1941,7 +1944,7 @@ static void inet_forward_change(struct net *net) } } -static int devinet_conf_proc(ctl_table *ctl, int write, +static int devinet_conf_proc(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -1984,7 +1987,7 @@ static int devinet_conf_proc(ctl_table *ctl, int write, return ret; } -static int devinet_sysctl_forward(ctl_table *ctl, int write, +static int devinet_sysctl_forward(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -2027,7 +2030,7 @@ static int devinet_sysctl_forward(ctl_table *ctl, int write, return ret; } -static int ipv4_doint_and_flush(ctl_table *ctl, int write, +static int ipv4_doint_and_flush(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 4cfe34d4cc9..ab3d814bc80 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -502,12 +502,9 @@ static void esp4_err(struct sk_buff *skb, u32 info) if (!x) return; - if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) { - atomic_inc(&flow_cache_genid); - rt_genid_bump(net); - + if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) ipv4_update_pmtu(skb, net, info, 0, 0, IPPROTO_ESP, 0); - } else + else ipv4_redirect(skb, net, 0, 0, IPPROTO_ESP, 0); xfrm_state_put(x); } diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index c7629a209f9..b3f627ac4ed 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -961,7 +961,7 @@ static void nl_fib_input(struct sk_buff *skb) nlmsg_len(nlh) < sizeof(*frn)) return; - skb = skb_clone(skb, GFP_KERNEL); + skb = netlink_skb_clone(skb, GFP_KERNEL); if (skb == NULL) return; nlh = nlmsg_hdr(skb); @@ -1038,7 +1038,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); struct in_device *in_dev; struct net *net = dev_net(dev); diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 8f6cb7a87cd..d5dbca5ecf6 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -169,7 +169,8 @@ static void free_nh_exceptions(struct fib_nh *nh) next = rcu_dereference_protected(fnhe->fnhe_next, 1); - rt_fibinfo_free(&fnhe->fnhe_rth); + rt_fibinfo_free(&fnhe->fnhe_rth_input); + rt_fibinfo_free(&fnhe->fnhe_rth_output); kfree(fnhe); diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 49616fed934..108a1e9c9ea 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -2133,7 +2133,7 @@ static void trie_show_stats(struct seq_file *seq, struct trie_stat *stat) max--; pointers = 0; - for (i = 1; i <= max; i++) + for (i = 1; i < max; i++) if (stat->nodesizes[i] != 0) { seq_printf(seq, " %u: %u", i, stat->nodesizes[i]); pointers += (1<<i) * stat->nodesizes[i]; diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c deleted file mode 100644 index 7856d1651d0..00000000000 --- a/net/ipv4/gre.c +++ /dev/null @@ -1,253 +0,0 @@ -/* - * GRE over IPv4 demultiplexer driver - * - * Authors: Dmitry Kozlov (xeb@mail.ru) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/kmod.h> -#include <linux/skbuff.h> -#include <linux/in.h> -#include <linux/ip.h> -#include <linux/netdevice.h> -#include <linux/if_tunnel.h> -#include <linux/spinlock.h> -#include <net/protocol.h> -#include <net/gre.h> - - -static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly; -static DEFINE_SPINLOCK(gre_proto_lock); - -int gre_add_protocol(const struct gre_protocol *proto, u8 version) -{ - if (version >= GREPROTO_MAX) - goto err_out; - - spin_lock(&gre_proto_lock); - if (gre_proto[version]) - goto err_out_unlock; - - RCU_INIT_POINTER(gre_proto[version], proto); - spin_unlock(&gre_proto_lock); - return 0; - -err_out_unlock: - spin_unlock(&gre_proto_lock); -err_out: - return -1; -} -EXPORT_SYMBOL_GPL(gre_add_protocol); - -int gre_del_protocol(const struct gre_protocol *proto, u8 version) -{ - if (version >= GREPROTO_MAX) - goto err_out; - - spin_lock(&gre_proto_lock); - if (rcu_dereference_protected(gre_proto[version], - lockdep_is_held(&gre_proto_lock)) != proto) - goto err_out_unlock; - RCU_INIT_POINTER(gre_proto[version], NULL); - spin_unlock(&gre_proto_lock); - synchronize_rcu(); - return 0; - -err_out_unlock: - spin_unlock(&gre_proto_lock); -err_out: - return -1; -} -EXPORT_SYMBOL_GPL(gre_del_protocol); - -static int gre_rcv(struct sk_buff *skb) -{ - const struct gre_protocol *proto; - u8 ver; - int ret; - - if (!pskb_may_pull(skb, 12)) - goto drop; - - ver = skb->data[1]&0x7f; - if (ver >= GREPROTO_MAX) - goto drop; - - rcu_read_lock(); - proto = rcu_dereference(gre_proto[ver]); - if (!proto || !proto->handler) - goto drop_unlock; - ret = proto->handler(skb); - rcu_read_unlock(); - return ret; - -drop_unlock: - rcu_read_unlock(); -drop: - kfree_skb(skb); - return NET_RX_DROP; -} - -static void gre_err(struct sk_buff *skb, u32 info) -{ - const struct gre_protocol *proto; - const struct iphdr *iph = (const struct iphdr *)skb->data; - u8 ver = skb->data[(iph->ihl<<2) + 1]&0x7f; - - if (ver >= GREPROTO_MAX) - return; - - rcu_read_lock(); - proto = rcu_dereference(gre_proto[ver]); - if (proto && proto->err_handler) - proto->err_handler(skb, info); - rcu_read_unlock(); -} - -static struct sk_buff *gre_gso_segment(struct sk_buff *skb, - netdev_features_t features) -{ - struct sk_buff *segs = ERR_PTR(-EINVAL); - netdev_features_t enc_features; - int ghl = GRE_HEADER_SECTION; - struct gre_base_hdr *greh; - int mac_len = skb->mac_len; - __be16 protocol = skb->protocol; - int tnl_hlen; - bool csum; - - if (unlikely(skb_shinfo(skb)->gso_type & - ~(SKB_GSO_TCPV4 | - SKB_GSO_TCPV6 | - SKB_GSO_UDP | - SKB_GSO_DODGY | - SKB_GSO_TCP_ECN | - SKB_GSO_GRE))) - goto out; - - if (unlikely(!pskb_may_pull(skb, sizeof(*greh)))) - goto out; - - greh = (struct gre_base_hdr *)skb_transport_header(skb); - - if (greh->flags & GRE_KEY) - ghl += GRE_HEADER_SECTION; - if (greh->flags & GRE_SEQ) - ghl += GRE_HEADER_SECTION; - if (greh->flags & GRE_CSUM) { - ghl += GRE_HEADER_SECTION; - csum = true; - } else - csum = false; - - /* setup inner skb. */ - skb->protocol = greh->protocol; - skb->encapsulation = 0; - - if (unlikely(!pskb_may_pull(skb, ghl))) - goto out; - __skb_pull(skb, ghl); - skb_reset_mac_header(skb); - skb_set_network_header(skb, skb_inner_network_offset(skb)); - skb->mac_len = skb_inner_network_offset(skb); - - /* segment inner packet. */ - enc_features = skb->dev->hw_enc_features & netif_skb_features(skb); - segs = skb_mac_gso_segment(skb, enc_features); - if (!segs || IS_ERR(segs)) - goto out; - - skb = segs; - tnl_hlen = skb_tnl_header_len(skb); - do { - __skb_push(skb, ghl); - if (csum) { - __be32 *pcsum; - - if (skb_has_shared_frag(skb)) { - int err; - - err = __skb_linearize(skb); - if (err) { - kfree_skb_list(segs); - segs = ERR_PTR(err); - goto out; - } - } - - greh = (struct gre_base_hdr *)(skb->data); - pcsum = (__be32 *)(greh + 1); - *pcsum = 0; - *(__sum16 *)pcsum = csum_fold(skb_checksum(skb, 0, skb->len, 0)); - } - __skb_push(skb, tnl_hlen - ghl); - - skb_reset_mac_header(skb); - skb_set_network_header(skb, mac_len); - skb->mac_len = mac_len; - skb->protocol = protocol; - } while ((skb = skb->next)); -out: - return segs; -} - -static int gre_gso_send_check(struct sk_buff *skb) -{ - if (!skb->encapsulation) - return -EINVAL; - return 0; -} - -static const struct net_protocol net_gre_protocol = { - .handler = gre_rcv, - .err_handler = gre_err, - .netns_ok = 1, -}; - -static const struct net_offload gre_offload = { - .callbacks = { - .gso_send_check = gre_gso_send_check, - .gso_segment = gre_gso_segment, - }, -}; - -static int __init gre_init(void) -{ - pr_info("GRE over IPv4 demultiplexor driver\n"); - - if (inet_add_protocol(&net_gre_protocol, IPPROTO_GRE) < 0) { - pr_err("can't add protocol\n"); - return -EAGAIN; - } - - if (inet_add_offload(&gre_offload, IPPROTO_GRE)) { - pr_err("can't add protocol offload\n"); - inet_del_protocol(&net_gre_protocol, IPPROTO_GRE); - return -EAGAIN; - } - - return 0; -} - -static void __exit gre_exit(void) -{ - inet_del_offload(&gre_offload, IPPROTO_GRE); - inet_del_protocol(&net_gre_protocol, IPPROTO_GRE); -} - -module_init(gre_init); -module_exit(gre_exit); - -MODULE_DESCRIPTION("GRE over IPv4 demultiplexer driver"); -MODULE_AUTHOR("D. Kozlov (xeb@mail.ru)"); -MODULE_LICENSE("GPL"); - diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c new file mode 100644 index 00000000000..736c9fc3ef9 --- /dev/null +++ b/net/ipv4/gre_demux.c @@ -0,0 +1,414 @@ +/* + * GRE over IPv4 demultiplexer driver + * + * Authors: Dmitry Kozlov (xeb@mail.ru) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/module.h> +#include <linux/if.h> +#include <linux/icmp.h> +#include <linux/kernel.h> +#include <linux/kmod.h> +#include <linux/skbuff.h> +#include <linux/in.h> +#include <linux/ip.h> +#include <linux/netdevice.h> +#include <linux/if_tunnel.h> +#include <linux/spinlock.h> +#include <net/protocol.h> +#include <net/gre.h> + +#include <net/icmp.h> +#include <net/route.h> +#include <net/xfrm.h> + +static const struct gre_protocol __rcu *gre_proto[GREPROTO_MAX] __read_mostly; +static struct gre_cisco_protocol __rcu *gre_cisco_proto_list[GRE_IP_PROTO_MAX]; + +int gre_add_protocol(const struct gre_protocol *proto, u8 version) +{ + if (version >= GREPROTO_MAX) + return -EINVAL; + + return (cmpxchg((const struct gre_protocol **)&gre_proto[version], NULL, proto) == NULL) ? + 0 : -EBUSY; +} +EXPORT_SYMBOL_GPL(gre_add_protocol); + +int gre_del_protocol(const struct gre_protocol *proto, u8 version) +{ + int ret; + + if (version >= GREPROTO_MAX) + return -EINVAL; + + ret = (cmpxchg((const struct gre_protocol **)&gre_proto[version], proto, NULL) == proto) ? + 0 : -EBUSY; + + if (ret) + return ret; + + synchronize_rcu(); + return 0; +} +EXPORT_SYMBOL_GPL(gre_del_protocol); + +void gre_build_header(struct sk_buff *skb, const struct tnl_ptk_info *tpi, + int hdr_len) +{ + struct gre_base_hdr *greh; + + skb_push(skb, hdr_len); + + greh = (struct gre_base_hdr *)skb->data; + greh->flags = tnl_flags_to_gre_flags(tpi->flags); + greh->protocol = tpi->proto; + + if (tpi->flags&(TUNNEL_KEY|TUNNEL_CSUM|TUNNEL_SEQ)) { + __be32 *ptr = (__be32 *)(((u8 *)greh) + hdr_len - 4); + + if (tpi->flags&TUNNEL_SEQ) { + *ptr = tpi->seq; + ptr--; + } + if (tpi->flags&TUNNEL_KEY) { + *ptr = tpi->key; + ptr--; + } + if (tpi->flags&TUNNEL_CSUM && + !(skb_shinfo(skb)->gso_type & SKB_GSO_GRE)) { + *ptr = 0; + *(__sum16 *)ptr = csum_fold(skb_checksum(skb, 0, + skb->len, 0)); + } + } +} +EXPORT_SYMBOL_GPL(gre_build_header); + +struct sk_buff *gre_handle_offloads(struct sk_buff *skb, bool gre_csum) +{ + int err; + + if (likely(!skb->encapsulation)) { + skb_reset_inner_headers(skb); + skb->encapsulation = 1; + } + + if (skb_is_gso(skb)) { + err = skb_unclone(skb, GFP_ATOMIC); + if (unlikely(err)) + goto error; + skb_shinfo(skb)->gso_type |= SKB_GSO_GRE; + return skb; + } else if (skb->ip_summed == CHECKSUM_PARTIAL && gre_csum) { + err = skb_checksum_help(skb); + if (unlikely(err)) + goto error; + } else if (skb->ip_summed != CHECKSUM_PARTIAL) + skb->ip_summed = CHECKSUM_NONE; + + return skb; +error: + kfree_skb(skb); + return ERR_PTR(err); +} +EXPORT_SYMBOL_GPL(gre_handle_offloads); + +static __sum16 check_checksum(struct sk_buff *skb) +{ + __sum16 csum = 0; + + switch (skb->ip_summed) { + case CHECKSUM_COMPLETE: + csum = csum_fold(skb->csum); + + if (!csum) + break; + /* Fall through. */ + + case CHECKSUM_NONE: + skb->csum = 0; + csum = __skb_checksum_complete(skb); + skb->ip_summed = CHECKSUM_COMPLETE; + break; + } + + return csum; +} + +static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, + bool *csum_err) +{ + unsigned int ip_hlen = ip_hdrlen(skb); + const struct gre_base_hdr *greh; + __be32 *options; + int hdr_len; + + if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr)))) + return -EINVAL; + + greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen); + if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING))) + return -EINVAL; + + tpi->flags = gre_flags_to_tnl_flags(greh->flags); + hdr_len = ip_gre_calc_hlen(tpi->flags); + + if (!pskb_may_pull(skb, hdr_len)) + return -EINVAL; + + greh = (struct gre_base_hdr *)(skb_network_header(skb) + ip_hlen); + tpi->proto = greh->protocol; + + options = (__be32 *)(greh + 1); + if (greh->flags & GRE_CSUM) { + if (check_checksum(skb)) { + *csum_err = true; + return -EINVAL; + } + options++; + } + + if (greh->flags & GRE_KEY) { + tpi->key = *options; + options++; + } else + tpi->key = 0; + + if (unlikely(greh->flags & GRE_SEQ)) { + tpi->seq = *options; + options++; + } else + tpi->seq = 0; + + /* WCCP version 1 and 2 protocol decoding. + * - Change protocol to IP + * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header + */ + if (greh->flags == 0 && tpi->proto == htons(ETH_P_WCCP)) { + tpi->proto = htons(ETH_P_IP); + if ((*(u8 *)options & 0xF0) != 0x40) { + hdr_len += 4; + if (!pskb_may_pull(skb, hdr_len)) + return -EINVAL; + } + } + + return iptunnel_pull_header(skb, hdr_len, tpi->proto); +} + +static int gre_cisco_rcv(struct sk_buff *skb) +{ + struct tnl_ptk_info tpi; + int i; + bool csum_err = false; + + if (parse_gre_header(skb, &tpi, &csum_err) < 0) + goto drop; + + rcu_read_lock(); + for (i = 0; i < GRE_IP_PROTO_MAX; i++) { + struct gre_cisco_protocol *proto; + int ret; + + proto = rcu_dereference(gre_cisco_proto_list[i]); + if (!proto) + continue; + ret = proto->handler(skb, &tpi); + if (ret == PACKET_RCVD) { + rcu_read_unlock(); + return 0; + } + } + rcu_read_unlock(); + + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); +drop: + kfree_skb(skb); + return 0; +} + +static void gre_cisco_err(struct sk_buff *skb, u32 info) +{ + /* All the routers (except for Linux) return only + * 8 bytes of packet payload. It means, that precise relaying of + * ICMP in the real Internet is absolutely infeasible. + * + * Moreover, Cisco "wise men" put GRE key to the third word + * in GRE header. It makes impossible maintaining even soft + * state for keyed + * GRE tunnels with enabled checksum. Tell them "thank you". + * + * Well, I wonder, rfc1812 was written by Cisco employee, + * what the hell these idiots break standards established + * by themselves??? + */ + + const int type = icmp_hdr(skb)->type; + const int code = icmp_hdr(skb)->code; + struct tnl_ptk_info tpi; + bool csum_err = false; + int i; + + if (parse_gre_header(skb, &tpi, &csum_err)) { + if (!csum_err) /* ignore csum errors. */ + return; + } + + if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { + ipv4_update_pmtu(skb, dev_net(skb->dev), info, + skb->dev->ifindex, 0, IPPROTO_GRE, 0); + return; + } + if (type == ICMP_REDIRECT) { + ipv4_redirect(skb, dev_net(skb->dev), skb->dev->ifindex, 0, + IPPROTO_GRE, 0); + return; + } + + rcu_read_lock(); + for (i = 0; i < GRE_IP_PROTO_MAX; i++) { + struct gre_cisco_protocol *proto; + + proto = rcu_dereference(gre_cisco_proto_list[i]); + if (!proto) + continue; + + if (proto->err_handler(skb, info, &tpi) == PACKET_RCVD) |