diff options
Diffstat (limited to 'net/ipv4/ip_input.c')
| -rw-r--r-- | net/ipv4/ip_input.c | 194 |
1 files changed, 105 insertions, 89 deletions
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 65631391d47..3d4da2c16b6 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -5,12 +5,10 @@ * * The Internet Protocol (IP) module. * - * Version: $Id: ip_input.c,v 1.55 2002/01/12 07:39:45 davem Exp $ - * * Authors: Ross Biro * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG> * Donald Becker, <becker@super.org> - * Alan Cox, <Alan.Cox@linux.org> + * Alan Cox, <alan@lxorguk.ukuu.org.uk> * Richard Underwood * Stefan Becker, <stefanb@yello.ping.de> * Jorge Cwik, <jorge@laser.satlink.net> @@ -115,12 +113,14 @@ * 2 of the License, or (at your option) any later version. */ -#include <asm/system.h> +#define pr_fmt(fmt) "IPv4: " fmt + #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/string.h> #include <linux/errno.h> +#include <linux/slab.h> #include <linux/net.h> #include <linux/socket.h> @@ -141,41 +141,35 @@ #include <net/icmp.h> #include <net/raw.h> #include <net/checksum.h> +#include <net/inet_ecn.h> #include <linux/netfilter_ipv4.h> #include <net/xfrm.h> #include <linux/mroute.h> #include <linux/netlink.h> /* - * SNMP management statistics + * Process Router Attention IP option (RFC 2113) */ - -DEFINE_SNMP_STAT(struct ipstats_mib, ip_statistics) __read_mostly; - -/* - * Process Router Attention IP option - */ -int ip_call_ra_chain(struct sk_buff *skb) +bool ip_call_ra_chain(struct sk_buff *skb) { struct ip_ra_chain *ra; u8 protocol = ip_hdr(skb)->protocol; struct sock *last = NULL; + struct net_device *dev = skb->dev; - read_lock(&ip_ra_lock); - for (ra = ip_ra_chain; ra; ra = ra->next) { + for (ra = rcu_dereference(ip_ra_chain); ra; ra = rcu_dereference(ra->next)) { struct sock *sk = ra->sk; /* If socket is bound to an interface, only report * the packet if it came from that interface. */ - if (sk && inet_sk(sk)->num == protocol && + if (sk && inet_sk(sk)->inet_num == protocol && (!sk->sk_bound_dev_if || - sk->sk_bound_dev_if == skb->dev->ifindex)) { - if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { - if (ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN)) { - read_unlock(&ip_ra_lock); - return 1; - } + sk->sk_bound_dev_if == dev->ifindex) && + net_eq(sock_net(sk), dev_net(dev))) { + if (ip_is_fragment(ip_hdr(skb))) { + if (ip_defrag(skb, IP_DEFRAG_CALL_RA_CHAIN)) + return true; } if (last) { struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); @@ -188,31 +182,28 @@ int ip_call_ra_chain(struct sk_buff *skb) if (last) { raw_rcv(last, skb); - read_unlock(&ip_ra_lock); - return 1; + return true; } - read_unlock(&ip_ra_lock); - return 0; + return false; } static int ip_local_deliver_finish(struct sk_buff *skb) { - __skb_pull(skb, ip_hdrlen(skb)); + struct net *net = dev_net(skb->dev); - /* Point into the IP datagram, just past the header. */ - skb_reset_transport_header(skb); + __skb_pull(skb, skb_network_header_len(skb)); rcu_read_lock(); { int protocol = ip_hdr(skb)->protocol; - int hash, raw; - struct net_protocol *ipprot; + const struct net_protocol *ipprot; + int raw; resubmit: raw = raw_local_deliver(skb, protocol); - hash = protocol & (MAX_INET_PROTOS - 1); - if ((ipprot = rcu_dereference(inet_protos[hash])) != NULL) { + ipprot = rcu_dereference(inet_protos[protocol]); + if (ipprot != NULL) { int ret; if (!ipprot->no_policy) { @@ -227,17 +218,19 @@ static int ip_local_deliver_finish(struct sk_buff *skb) protocol = -ret; goto resubmit; } - IP_INC_STATS_BH(IPSTATS_MIB_INDELIVERS); + IP_INC_STATS_BH(net, IPSTATS_MIB_INDELIVERS); } else { if (!raw) { if (xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { - IP_INC_STATS_BH(IPSTATS_MIB_INUNKNOWNPROTOS); + IP_INC_STATS_BH(net, IPSTATS_MIB_INUNKNOWNPROTOS); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0); } - } else - IP_INC_STATS_BH(IPSTATS_MIB_INDELIVERS); - kfree_skb(skb); + kfree_skb(skb); + } else { + IP_INC_STATS_BH(net, IPSTATS_MIB_INDELIVERS); + consume_skb(skb); + } } } out: @@ -255,19 +248,19 @@ int ip_local_deliver(struct sk_buff *skb) * Reassemble IP fragments. */ - if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) { + if (ip_is_fragment(ip_hdr(skb))) { if (ip_defrag(skb, IP_DEFRAG_LOCAL_DELIVER)) return 0; } - return NF_HOOK(PF_INET, NF_INET_LOCAL_IN, skb, skb->dev, NULL, + return NF_HOOK(NFPROTO_IPV4, NF_INET_LOCAL_IN, skb, skb->dev, NULL, ip_local_deliver_finish); } -static inline int ip_rcv_options(struct sk_buff *skb) +static inline bool ip_rcv_options(struct sk_buff *skb) { struct ip_options *opt; - struct iphdr *iph; + const struct iphdr *iph; struct net_device *dev = skb->dev; /* It looks as overkill, because not all @@ -278,84 +271,97 @@ static inline int ip_rcv_options(struct sk_buff *skb) --ANK (980813) */ if (skb_cow(skb, skb_headroom(skb))) { - IP_INC_STATS_BH(IPSTATS_MIB_INDISCARDS); + IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS); goto drop; } iph = ip_hdr(skb); + opt = &(IPCB(skb)->opt); + opt->optlen = iph->ihl*4 - sizeof(struct iphdr); - if (ip_options_compile(NULL, skb)) { - IP_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); + if (ip_options_compile(dev_net(dev), opt, skb)) { + IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INHDRERRORS); goto drop; } - opt = &(IPCB(skb)->opt); if (unlikely(opt->srr)) { - struct in_device *in_dev = in_dev_get(dev); + struct in_device *in_dev = __in_dev_get_rcu(dev); + if (in_dev) { if (!IN_DEV_SOURCE_ROUTE(in_dev)) { - if (IN_DEV_LOG_MARTIANS(in_dev) && - net_ratelimit()) - printk(KERN_INFO "source route option " - "%u.%u.%u.%u -> %u.%u.%u.%u\n", - NIPQUAD(iph->saddr), - NIPQUAD(iph->daddr)); - in_dev_put(in_dev); + if (IN_DEV_LOG_MARTIANS(in_dev)) + net_info_ratelimited("source route option %pI4 -> %pI4\n", + &iph->saddr, + &iph->daddr); goto drop; } - - in_dev_put(in_dev); } if (ip_options_rcv_srr(skb)) goto drop; } - return 0; + return false; drop: - return -1; + return true; } +int sysctl_ip_early_demux __read_mostly = 1; +EXPORT_SYMBOL(sysctl_ip_early_demux); + static int ip_rcv_finish(struct sk_buff *skb) { const struct iphdr *iph = ip_hdr(skb); struct rtable *rt; + if (sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) { + const struct net_protocol *ipprot; + int protocol = iph->protocol; + + ipprot = rcu_dereference(inet_protos[protocol]); + if (ipprot && ipprot->early_demux) { + ipprot->early_demux(skb); + /* must reload iph, skb->head might have changed */ + iph = ip_hdr(skb); + } + } + /* * Initialise the virtual path cache for the packet. It describes * how the packet travels inside Linux networking. */ - if (skb->dst == NULL) { - int err = ip_route_input(skb, iph->daddr, iph->saddr, iph->tos, - skb->dev); + if (!skb_dst(skb)) { + int err = ip_route_input_noref(skb, iph->daddr, iph->saddr, + iph->tos, skb->dev); if (unlikely(err)) { - if (err == -EHOSTUNREACH) - IP_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS); - else if (err == -ENETUNREACH) - IP_INC_STATS_BH(IPSTATS_MIB_INNOROUTES); + if (err == -EXDEV) + NET_INC_STATS_BH(dev_net(skb->dev), + LINUX_MIB_IPRPFILTER); goto drop; } } -#ifdef CONFIG_NET_CLS_ROUTE - if (unlikely(skb->dst->tclassid)) { - struct ip_rt_acct *st = per_cpu_ptr(ip_rt_acct, smp_processor_id()); - u32 idx = skb->dst->tclassid; +#ifdef CONFIG_IP_ROUTE_CLASSID + if (unlikely(skb_dst(skb)->tclassid)) { + struct ip_rt_acct *st = this_cpu_ptr(ip_rt_acct); + u32 idx = skb_dst(skb)->tclassid; st[idx&0xFF].o_packets++; - st[idx&0xFF].o_bytes+=skb->len; + st[idx&0xFF].o_bytes += skb->len; st[(idx>>16)&0xFF].i_packets++; - st[(idx>>16)&0xFF].i_bytes+=skb->len; + st[(idx>>16)&0xFF].i_bytes += skb->len; } #endif if (iph->ihl > 5 && ip_rcv_options(skb)) goto drop; - rt = (struct rtable*)skb->dst; - if (rt->rt_type == RTN_MULTICAST) - IP_INC_STATS_BH(IPSTATS_MIB_INMCASTPKTS); - else if (rt->rt_type == RTN_BROADCAST) - IP_INC_STATS_BH(IPSTATS_MIB_INBCASTPKTS); + rt = skb_rtable(skb); + if (rt->rt_type == RTN_MULTICAST) { + IP_UPD_PO_STATS_BH(dev_net(rt->dst.dev), IPSTATS_MIB_INMCAST, + skb->len); + } else if (rt->rt_type == RTN_BROADCAST) + IP_UPD_PO_STATS_BH(dev_net(rt->dst.dev), IPSTATS_MIB_INBCAST, + skb->len); return dst_input(skb); @@ -369,22 +375,20 @@ drop: */ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { - struct iphdr *iph; + const struct iphdr *iph; u32 len; - if (dev->nd_net != &init_net) - goto drop; - /* When the interface is in promisc. mode, drop all the crap * that it receives, do not try to analyse it. */ if (skb->pkt_type == PACKET_OTHERHOST) goto drop; - IP_INC_STATS_BH(IPSTATS_MIB_INRECEIVES); + + IP_UPD_PO_STATS_BH(dev_net(dev), IPSTATS_MIB_IN, skb->len); if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) { - IP_INC_STATS_BH(IPSTATS_MIB_INDISCARDS); + IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS); goto out; } @@ -394,7 +398,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, iph = ip_hdr(skb); /* - * RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the checksum. + * RFC1122: 3.2.1.2 MUST silently discard any IP frame that fails the checksum. * * Is the datagram acceptable? * @@ -407,17 +411,24 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, if (iph->ihl < 5 || iph->version != 4) goto inhdr_error; + BUILD_BUG_ON(IPSTATS_MIB_ECT1PKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_ECT_1); + BUILD_BUG_ON(IPSTATS_MIB_ECT0PKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_ECT_0); + BUILD_BUG_ON(IPSTATS_MIB_CEPKTS != IPSTATS_MIB_NOECTPKTS + INET_ECN_CE); + IP_ADD_STATS_BH(dev_net(dev), + IPSTATS_MIB_NOECTPKTS + (iph->tos & INET_ECN_MASK), + max_t(unsigned short, 1, skb_shinfo(skb)->gso_segs)); + if (!pskb_may_pull(skb, iph->ihl*4)) goto inhdr_error; iph = ip_hdr(skb); if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl))) - goto inhdr_error; + goto csum_error; len = ntohs(iph->tot_len); if (skb->len < len) { - IP_INC_STATS_BH(IPSTATS_MIB_INTRUNCATEDPKTS); + IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INTRUNCATEDPKTS); goto drop; } else if (len < (iph->ihl*4)) goto inhdr_error; @@ -427,22 +438,27 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, * Note this now means skb->len holds ntohs(iph->tot_len). */ if (pskb_trim_rcsum(skb, len)) { - IP_INC_STATS_BH(IPSTATS_MIB_INDISCARDS); + IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INDISCARDS); goto drop; } + skb->transport_header = skb->network_header + iph->ihl*4; + /* Remove any debris in the socket control block */ memset(IPCB(skb), 0, sizeof(struct inet_skb_parm)); - return NF_HOOK(PF_INET, NF_INET_PRE_ROUTING, skb, dev, NULL, + /* Must drop socket now because of tproxy. */ + skb_orphan(skb); + + return NF_HOOK(NFPROTO_IPV4, NF_INET_PRE_ROUTING, skb, dev, NULL, ip_rcv_finish); +csum_error: + IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_CSUMERRORS); inhdr_error: - IP_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); + IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_INHDRERRORS); drop: kfree_skb(skb); out: return NET_RX_DROP; } - -EXPORT_SYMBOL(ip_statistics); |
