diff options
Diffstat (limited to 'net/ipv6/reassembly.c')
| -rw-r--r-- | net/ipv6/reassembly.c | 348 |
1 files changed, 152 insertions, 196 deletions
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 2cddea3bd6b..cc85a9ba501 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -26,6 +26,9 @@ * YOSHIFUJI,H. @USAGI Always remove fragment header to * calculate ICV correctly. */ + +#define pr_fmt(fmt) "IPv6: " fmt + #include <linux/errno.h> #include <linux/types.h> #include <linux/string.h> @@ -41,6 +44,8 @@ #include <linux/random.h> #include <linux/jhash.h> #include <linux/skbuff.h> +#include <linux/slab.h> +#include <linux/export.h> #include <net/sock.h> #include <net/snmp.h> @@ -53,6 +58,7 @@ #include <net/ndisc.h> #include <net/addrconf.h> #include <net/inet_frag.h> +#include <net/inet_ecn.h> struct ip6frag_skb_cb { @@ -62,36 +68,12 @@ struct ip6frag_skb_cb #define FRAG6_CB(skb) ((struct ip6frag_skb_cb*)((skb)->cb)) - -/* - * Equivalent of ipv4 struct ipq - */ - -struct frag_queue -{ - struct inet_frag_queue q; - - __be32 id; /* fragment id */ - u32 user; - struct in6_addr saddr; - struct in6_addr daddr; - - int iif; - unsigned int csum; - __u16 nhoffset; -}; - -static struct inet_frags ip6_frags; - -int ip6_frag_nqueues(struct net *net) +static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h) { - return net->ipv6.frags.nqueues; + return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK); } -int ip6_frag_mem(struct net *net) -{ - return atomic_read(&net->ipv6.frags.mem); -} +static struct inet_frags ip6_frags; static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_device *dev); @@ -100,64 +82,39 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, * callers should be careful not to use the hash value outside the ipfrag_lock * as doing so could race with ipfrag_hash_rnd being recalculated. */ -unsigned int inet6_hash_frag(__be32 id, const struct in6_addr *saddr, - const struct in6_addr *daddr, u32 rnd) +static unsigned int inet6_hash_frag(__be32 id, const struct in6_addr *saddr, + const struct in6_addr *daddr) { - u32 a, b, c; - - a = (__force u32)saddr->s6_addr32[0]; - b = (__force u32)saddr->s6_addr32[1]; - c = (__force u32)saddr->s6_addr32[2]; + u32 c; - a += JHASH_GOLDEN_RATIO; - b += JHASH_GOLDEN_RATIO; - c += rnd; - __jhash_mix(a, b, c); - - a += (__force u32)saddr->s6_addr32[3]; - b += (__force u32)daddr->s6_addr32[0]; - c += (__force u32)daddr->s6_addr32[1]; - __jhash_mix(a, b, c); - - a += (__force u32)daddr->s6_addr32[2]; - b += (__force u32)daddr->s6_addr32[3]; - c += (__force u32)id; - __jhash_mix(a, b, c); + net_get_random_once(&ip6_frags.rnd, sizeof(ip6_frags.rnd)); + c = jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr), + (__force u32)id, ip6_frags.rnd); return c & (INETFRAGS_HASHSZ - 1); } -EXPORT_SYMBOL_GPL(inet6_hash_frag); static unsigned int ip6_hashfn(struct inet_frag_queue *q) { struct frag_queue *fq; fq = container_of(q, struct frag_queue, q); - return inet6_hash_frag(fq->id, &fq->saddr, &fq->daddr, ip6_frags.rnd); + return inet6_hash_frag(fq->id, &fq->saddr, &fq->daddr); } -int ip6_frag_match(struct inet_frag_queue *q, void *a) +bool ip6_frag_match(struct inet_frag_queue *q, void *a) { struct frag_queue *fq; struct ip6_create_arg *arg = a; fq = container_of(q, struct frag_queue, q); - return (fq->id == arg->id && fq->user == arg->user && - ipv6_addr_equal(&fq->saddr, arg->src) && - ipv6_addr_equal(&fq->daddr, arg->dst)); + return fq->id == arg->id && + fq->user == arg->user && + ipv6_addr_equal(&fq->saddr, arg->src) && + ipv6_addr_equal(&fq->daddr, arg->dst); } EXPORT_SYMBOL(ip6_frag_match); -/* Memory Tracking Functions. */ -static inline void frag_kfree_skb(struct netns_frags *nf, - struct sk_buff *skb, int *work) -{ - if (work) - *work -= skb->truesize; - atomic_sub(skb->truesize, &nf->mem); - kfree_skb(skb); -} - void ip6_frag_init(struct inet_frag_queue *q, void *a) { struct frag_queue *fq = container_of(q, struct frag_queue, q); @@ -165,51 +122,24 @@ void ip6_frag_init(struct inet_frag_queue *q, void *a) fq->id = arg->id; fq->user = arg->user; - ipv6_addr_copy(&fq->saddr, arg->src); - ipv6_addr_copy(&fq->daddr, arg->dst); + fq->saddr = *arg->src; + fq->daddr = *arg->dst; + fq->ecn = arg->ecn; } EXPORT_SYMBOL(ip6_frag_init); -/* Destruction primitives. */ - -static __inline__ void fq_put(struct frag_queue *fq) -{ - inet_frag_put(&fq->q, &ip6_frags); -} - -/* Kill fq entry. It is not destroyed immediately, - * because caller (and someone more) holds reference count. - */ -static __inline__ void fq_kill(struct frag_queue *fq) -{ - inet_frag_kill(&fq->q, &ip6_frags); -} - -static void ip6_evictor(struct net *net, struct inet6_dev *idev) -{ - int evicted; - - evicted = inet_frag_evictor(&net->ipv6.frags, &ip6_frags); - if (evicted) - IP6_ADD_STATS_BH(net, idev, IPSTATS_MIB_REASMFAILS, evicted); -} - -static void ip6_frag_expire(unsigned long data) +void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq, + struct inet_frags *frags) { - struct frag_queue *fq; struct net_device *dev = NULL; - struct net *net; - - fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q); spin_lock(&fq->q.lock); if (fq->q.last_in & INET_FRAG_COMPLETE) goto out; - fq_kill(fq); + inet_frag_kill(&fq->q, frags); - net = container_of(fq->q.net, struct net, ipv6.frags); rcu_read_lock(); dev = dev_get_by_index_rcu(net, fq->iif); if (!dev) @@ -228,17 +158,29 @@ static void ip6_frag_expire(unsigned long data) pointer directly, device might already disappeared. */ fq->q.fragments->dev = dev; - icmpv6_send(fq->q.fragments, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0, dev); + icmpv6_send(fq->q.fragments, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0); out_rcu_unlock: rcu_read_unlock(); out: spin_unlock(&fq->q.lock); - fq_put(fq); + inet_frag_put(&fq->q, frags); +} +EXPORT_SYMBOL(ip6_expire_frag_queue); + +static void ip6_frag_expire(unsigned long data) +{ + struct frag_queue *fq; + struct net *net; + + fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q); + net = container_of(fq->q.net, struct net, ipv6.frags); + + ip6_expire_frag_queue(net, fq, &ip6_frags); } static __inline__ struct frag_queue * -fq_find(struct net *net, __be32 id, struct in6_addr *src, struct in6_addr *dst, - struct inet6_dev *idev) +fq_find(struct net *net, __be32 id, const struct in6_addr *src, + const struct in6_addr *dst, u8 ecn) { struct inet_frag_queue *q; struct ip6_create_arg arg; @@ -248,19 +190,17 @@ fq_find(struct net *net, __be32 id, struct in6_addr *src, struct in6_addr *dst, arg.user = IP6_DEFRAG_LOCAL_DELIVER; arg.src = src; arg.dst = dst; + arg.ecn = ecn; read_lock(&ip6_frags.lock); - hash = inet6_hash_frag(id, src, dst, ip6_frags.rnd); + hash = inet6_hash_frag(id, src, dst); q = inet_frag_find(&net->ipv6.frags, &ip6_frags, &arg, hash); - if (q == NULL) - goto oom; - + if (IS_ERR_OR_NULL(q)) { + inet_frag_maybe_warn_overflow(q, pr_fmt()); + return NULL; + } return container_of(q, struct frag_queue, q); - -oom: - IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_REASMFAILS); - return NULL; } static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, @@ -270,6 +210,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, struct net_device *dev; int offset, end; struct net *net = dev_net(skb_dst(skb)->dev); + u8 ecn; if (fq->q.last_in & INET_FRAG_COMPLETE) goto err; @@ -287,6 +228,8 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, return -1; } + ecn = ip6_frag_ecn(ipv6_hdr(skb)); + if (skb->ip_summed == CHECKSUM_COMPLETE) { const unsigned char *nh = skb_network_header(skb); skb->csum = csum_sub(skb->csum, @@ -340,6 +283,11 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, * in the chain of fragments so far. We must know where to put * this fragment, right? */ + prev = fq->q.fragments_tail; + if (!prev || FRAG6_CB(prev)->offset < offset) { + next = NULL; + goto found; + } prev = NULL; for(next = fq->q.fragments; next != NULL; next = next->next) { if (FRAG6_CB(next)->offset >= offset) @@ -347,63 +295,29 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, prev = next; } - /* We found where to put this one. Check for overlap with - * preceding fragment, and, if needed, align things so that - * any overlaps are eliminated. - */ - if (prev) { - int i = (FRAG6_CB(prev)->offset + prev->len) - offset; - - if (i > 0) { - offset += i; - if (end <= offset) - goto err; - if (!pskb_pull(skb, i)) - goto err; - if (skb->ip_summed != CHECKSUM_UNNECESSARY) - skb->ip_summed = CHECKSUM_NONE; - } - } - - /* Look for overlap with succeeding segments. - * If we can merge fragments, do it. +found: + /* RFC5722, Section 4, amended by Errata ID : 3089 + * When reassembling an IPv6 datagram, if + * one or more its constituent fragments is determined to be an + * overlapping fragment, the entire datagram (and any constituent + * fragments) MUST be silently discarded. */ - while (next && FRAG6_CB(next)->offset < end) { - int i = end - FRAG6_CB(next)->offset; /* overlap is 'i' bytes */ - - if (i < next->len) { - /* Eat head of the next overlapped fragment - * and leave the loop. The next ones cannot overlap. - */ - if (!pskb_pull(next, i)) - goto err; - FRAG6_CB(next)->offset += i; /* next fragment */ - fq->q.meat -= i; - if (next->ip_summed != CHECKSUM_UNNECESSARY) - next->ip_summed = CHECKSUM_NONE; - break; - } else { - struct sk_buff *free_it = next; - /* Old fragment is completely overridden with - * new one drop it. - */ - next = next->next; + /* Check for overlap with preceding fragment. */ + if (prev && + (FRAG6_CB(prev)->offset + prev->len) > offset) + goto discard_fq; - if (prev) - prev->next = next; - else - fq->q.fragments = next; - - fq->q.meat -= free_it->len; - frag_kfree_skb(fq->q.net, free_it, NULL); - } - } + /* Look for overlap with succeeding segment. */ + if (next && FRAG6_CB(next)->offset < end) + goto discard_fq; FRAG6_CB(skb)->offset = offset; /* Insert this fragment in the chain of fragments. */ skb->next = next; + if (!next) + fq->q.fragments_tail = skb; if (prev) prev->next = skb; else @@ -416,7 +330,8 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, } fq->q.stamp = skb->tstamp; fq->q.meat += skb->len; - atomic_add(skb->truesize, &fq->q.net->mem); + fq->ecn |= ecn; + add_frag_mem_limit(&fq->q, skb->truesize); /* The first fragment. * nhoffset is obtained from the first fragment, of course. @@ -427,14 +342,22 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, } if (fq->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && - fq->q.meat == fq->q.len) - return ip6_frag_reasm(fq, prev, dev); + fq->q.meat == fq->q.len) { + int res; + unsigned long orefdst = skb->_skb_refdst; + + skb->_skb_refdst = 0UL; + res = ip6_frag_reasm(fq, prev, dev); + skb->_skb_refdst = orefdst; + return res; + } - write_lock(&ip6_frags.lock); - list_move_tail(&fq->q.lru_list, &fq->q.net->lru_list); - write_unlock(&ip6_frags.lock); + skb_dst_drop(skb); + inet_frag_lru_move(&fq->q); return -1; +discard_fq: + inet_frag_kill(&fq->q, &ip6_frags); err: IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMFAILS); @@ -458,8 +381,14 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, struct sk_buff *fp, *head = fq->q.fragments; int payload_len; unsigned int nhoff; + int sum_truesize; + u8 ecn; - fq_kill(fq); + inet_frag_kill(&fq->q, &ip6_frags); + + ecn = ip_frag_ecn_table[fq->ecn]; + if (unlikely(ecn == 0xff)) + goto out_fail; /* Make the one we just received the head. */ if (prev) { @@ -470,12 +399,14 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, goto out_oom; fp->next = head->next; + if (!fp->next) + fq->q.fragments_tail = fp; prev->next = fp; skb_morph(head, fq->q.fragments); head->next = fq->q.fragments->next; - kfree_skb(fq->q.fragments); + consume_skb(fq->q.fragments); fq->q.fragments = head; } @@ -490,13 +421,13 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, goto out_oversize; /* Head of list must not be cloned. */ - if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC)) + if (skb_unclone(head, GFP_ATOMIC)) goto out_oom; /* If the first fragment is fragmented itself, we split * it to two chunks: the first with data and paged part * and the second, holding only fragments. */ - if (skb_has_frags(head)) { + if (skb_has_frag_list(head)) { struct sk_buff *clone; int i, plen = 0; @@ -506,14 +437,14 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, head->next = clone; skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list; skb_frag_list_init(head); - for (i=0; i<skb_shinfo(head)->nr_frags; i++) - plen += skb_shinfo(head)->frags[i].size; + for (i = 0; i < skb_shinfo(head)->nr_frags; i++) + plen += skb_frag_size(&skb_shinfo(head)->frags[i]); clone->len = clone->data_len = head->data_len - plen; head->data_len -= clone->len; head->len -= clone->len; clone->csum = 0; clone->ip_summed = head->ip_summed; - atomic_add(clone->truesize, &fq->q.net->mem); + add_frag_mem_limit(&fq->q, clone->truesize); } /* We have to remove fragment header from datagram and to relocate @@ -525,27 +456,41 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, head->mac_header += sizeof(struct frag_hdr); head->network_header += sizeof(struct frag_hdr); - skb_shinfo(head)->frag_list = head->next; skb_reset_transport_header(head); skb_push(head, head->data - skb_network_header(head)); - atomic_sub(head->truesize, &fq->q.net->mem); - for (fp=head->next; fp; fp = fp->next) { - head->data_len += fp->len; - head->len += fp->len; + sum_truesize = head->truesize; + for (fp = head->next; fp;) { + bool headstolen; + int delta; + struct sk_buff *next = fp->next; + + sum_truesize += fp->truesize; if (head->ip_summed != fp->ip_summed) head->ip_summed = CHECKSUM_NONE; else if (head->ip_summed == CHECKSUM_COMPLETE) head->csum = csum_add(head->csum, fp->csum); - head->truesize += fp->truesize; - atomic_sub(fp->truesize, &fq->q.net->mem); + + if (skb_try_coalesce(head, fp, &headstolen, &delta)) { + kfree_skb_partial(fp, headstolen); + } else { + if (!skb_shinfo(head)->frag_list) + skb_shinfo(head)->frag_list = fp; + head->data_len += fp->len; + head->len += fp->len; + head->truesize += fp->truesize; + } + fp = next; } + sub_frag_mem_limit(&fq->q, sum_truesize); head->next = NULL; head->dev = dev; head->tstamp = fq->q.stamp; ipv6_hdr(head)->payload_len = htons(payload_len); + ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn); IP6CB(head)->nhoff = nhoff; + IP6CB(head)->flags |= IP6SKB_FRAGMENTED; /* Yes, and fold redundant checksum back. 8) */ if (head->ip_summed == CHECKSUM_COMPLETE) @@ -557,15 +502,14 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS); rcu_read_unlock(); fq->q.fragments = NULL; + fq->q.fragments_tail = NULL; return 1; out_oversize: - if (net_ratelimit()) - printk(KERN_DEBUG "ip6_frag_reasm: payload len = %d\n", payload_len); + net_dbg_ratelimited("ip6_frag_reasm: payload len = %d\n", payload_len); goto out_fail; out_oom: - if (net_ratelimit()) - printk(KERN_DEBUG "ip6_frag_reasm: no memory for reassembly\n"); + net_dbg_ratelimited("ip6_frag_reasm: no memory for reassembly\n"); out_fail: rcu_read_lock(); IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); @@ -577,8 +521,12 @@ static int ipv6_frag_rcv(struct sk_buff *skb) { struct frag_hdr *fhdr; struct frag_queue *fq; - struct ipv6hdr *hdr = ipv6_hdr(skb); + const struct ipv6hdr *hdr = ipv6_hdr(skb); struct net *net = dev_net(skb_dst(skb)->dev); + int evicted; + + if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED) + goto fail_hdr; IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMREQDS); @@ -600,14 +548,18 @@ static int ipv6_frag_rcv(struct sk_buff *skb) ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMOKS); IP6CB(skb)->nhoff = (u8 *)fhdr - skb_network_header(skb); + IP6CB(skb)->flags |= IP6SKB_FRAGMENTED; return 1; } - if (atomic_read(&net->ipv6.frags.mem) > net->ipv6.frags.high_thresh) - ip6_evictor(net, ip6_dst_idev(skb_dst(skb))); + evicted = inet_frag_evictor(&net->ipv6.frags, &ip6_frags, false); + if (evicted) + IP6_ADD_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_REASMFAILS, evicted); - if ((fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr, - ip6_dst_idev(skb_dst(skb)))) != NULL) { + fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr, + ip6_frag_ecn(hdr)); + if (fq != NULL) { int ret; spin_lock(&fq->q.lock); @@ -615,7 +567,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb) ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff); spin_unlock(&fq->q.lock); - fq_put(fq); + inet_frag_put(&fq->q, &ip6_frags); return ret; } @@ -672,7 +624,7 @@ static struct ctl_table ip6_frags_ctl_table[] = { { } }; -static int ip6_frags_ns_sysctl_register(struct net *net) +static int __net_init ip6_frags_ns_sysctl_register(struct net *net) { struct ctl_table *table; struct ctl_table_header *hdr; @@ -686,9 +638,13 @@ static int ip6_frags_ns_sysctl_register(struct net *net) table[0].data = &net->ipv6.frags.high_thresh; table[1].data = &net->ipv6.frags.low_thresh; table[2].data = &net->ipv6.frags.timeout; + + /* Don't export sysctls to unprivileged users */ + if (net->user_ns != &init_user_ns) + table[0].procname = NULL; } - hdr = register_net_sysctl_table(net, net_ipv6_ctl_path, table); + hdr = register_net_sysctl(net, "net/ipv6", table); if (hdr == NULL) goto err_reg; @@ -702,7 +658,7 @@ err_alloc: return -ENOMEM; } -static void ip6_frags_ns_sysctl_unregister(struct net *net) +static void __net_exit ip6_frags_ns_sysctl_unregister(struct net *net) { struct ctl_table *table; @@ -716,7 +672,7 @@ static struct ctl_table_header *ip6_ctl_header; static int ip6_frags_sysctl_register(void) { - ip6_ctl_header = register_net_sysctl_rotable(net_ipv6_ctl_path, + ip6_ctl_header = register_net_sysctl(&init_net, "net/ipv6", ip6_frags_ctl_table); return ip6_ctl_header == NULL ? -ENOMEM : 0; } @@ -745,10 +701,10 @@ static inline void ip6_frags_sysctl_unregister(void) } #endif -static int ipv6_frags_init_net(struct net *net) +static int __net_init ipv6_frags_init_net(struct net *net) { - net->ipv6.frags.high_thresh = 256 * 1024; - net->ipv6.frags.low_thresh = 192 * 1024; + net->ipv6.frags.high_thresh = IPV6_FRAG_HIGH_THRESH; + net->ipv6.frags.low_thresh = IPV6_FRAG_LOW_THRESH; net->ipv6.frags.timeout = IPV6_FRAG_TIMEOUT; inet_frags_init_net(&net->ipv6.frags); @@ -756,7 +712,7 @@ static int ipv6_frags_init_net(struct net *net) return ip6_frags_ns_sysctl_register(net); } -static void ipv6_frags_exit_net(struct net *net) +static void __net_exit ipv6_frags_exit_net(struct net *net) { ip6_frags_ns_sysctl_unregister(net); inet_frags_exit_net(&net->ipv6.frags, &ip6_frags); |
