diff options
Diffstat (limited to 'net/ipv6/reassembly.c')
| -rw-r--r-- | net/ipv6/reassembly.c | 969 |
1 files changed, 489 insertions, 480 deletions
diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 15e1456b3f1..cc85a9ba501 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -1,11 +1,9 @@ /* * IPv6 fragment reassembly - * Linux INET6 implementation + * Linux INET6 implementation * * Authors: - * Pedro Roque <roque@di.fc.ul.pt> - * - * $Id: reassembly.c,v 1.26 2001/03/07 22:00:57 davem Exp $ + * Pedro Roque <roque@di.fc.ul.pt> * * Based on: net/ipv4/ip_fragment.c * @@ -15,8 +13,8 @@ * 2 of the License, or (at your option) any later version. */ -/* - * Fixes: +/* + * Fixes: * Andi Kleen Make it work with multiple hosts. * More RFC compliance. * @@ -28,7 +26,9 @@ * YOSHIFUJI,H. @USAGI Always remove fragment header to * calculate ICV correctly. */ -#include <linux/config.h> + +#define pr_fmt(fmt) "IPv6: " fmt + #include <linux/errno.h> #include <linux/types.h> #include <linux/string.h> @@ -43,21 +43,22 @@ #include <linux/icmpv6.h> #include <linux/random.h> #include <linux/jhash.h> +#include <linux/skbuff.h> +#include <linux/slab.h> +#include <linux/export.h> #include <net/sock.h> #include <net/snmp.h> #include <net/ipv6.h> +#include <net/ip6_route.h> #include <net/protocol.h> #include <net/transp_v6.h> #include <net/rawv6.h> #include <net/ndisc.h> #include <net/addrconf.h> - -int sysctl_ip6frag_high_thresh = 256*1024; -int sysctl_ip6frag_low_thresh = 192*1024; - -int sysctl_ip6frag_time = IPV6_FRAG_TIMEOUT; +#include <net/inet_frag.h> +#include <net/inet_ecn.h> struct ip6frag_skb_cb { @@ -67,382 +68,185 @@ struct ip6frag_skb_cb #define FRAG6_CB(skb) ((struct ip6frag_skb_cb*)((skb)->cb)) - -/* - * Equivalent of ipv4 struct ipq - */ - -struct frag_queue -{ - struct hlist_node list; - struct list_head lru_list; /* lru list member */ - - __u32 id; /* fragment id */ - struct in6_addr saddr; - struct in6_addr daddr; - - spinlock_t lock; - atomic_t refcnt; - struct timer_list timer; /* expire timer */ - struct sk_buff *fragments; - int len; - int meat; - int iif; - struct timeval stamp; - unsigned int csum; - __u8 last_in; /* has first/last segment arrived? */ -#define COMPLETE 4 -#define FIRST_IN 2 -#define LAST_IN 1 - __u16 nhoffset; -}; - -/* Hash table. */ - -#define IP6Q_HASHSZ 64 - -static struct hlist_head ip6_frag_hash[IP6Q_HASHSZ]; -static DEFINE_RWLOCK(ip6_frag_lock); -static u32 ip6_frag_hash_rnd; -static LIST_HEAD(ip6_frag_lru_list); -int ip6_frag_nqueues = 0; - -static __inline__ void __fq_unlink(struct frag_queue *fq) -{ - hlist_del(&fq->list); - list_del(&fq->lru_list); - ip6_frag_nqueues--; -} - -static __inline__ void fq_unlink(struct frag_queue *fq) -{ - write_lock(&ip6_frag_lock); - __fq_unlink(fq); - write_unlock(&ip6_frag_lock); -} - -static unsigned int ip6qhashfn(u32 id, struct in6_addr *saddr, - struct in6_addr *daddr) -{ - u32 a, b, c; - - a = saddr->s6_addr32[0]; - b = saddr->s6_addr32[1]; - c = saddr->s6_addr32[2]; - - a += JHASH_GOLDEN_RATIO; - b += JHASH_GOLDEN_RATIO; - c += ip6_frag_hash_rnd; - __jhash_mix(a, b, c); - - a += saddr->s6_addr32[3]; - b += daddr->s6_addr32[0]; - c += daddr->s6_addr32[1]; - __jhash_mix(a, b, c); - - a += daddr->s6_addr32[2]; - b += daddr->s6_addr32[3]; - c += id; - __jhash_mix(a, b, c); - - return c & (IP6Q_HASHSZ - 1); -} - -static struct timer_list ip6_frag_secret_timer; -int sysctl_ip6frag_secret_interval = 10 * 60 * HZ; - -static void ip6_frag_secret_rebuild(unsigned long dummy) +static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h) { - unsigned long now = jiffies; - int i; - - write_lock(&ip6_frag_lock); - get_random_bytes(&ip6_frag_hash_rnd, sizeof(u32)); - for (i = 0; i < IP6Q_HASHSZ; i++) { - struct frag_queue *q; - struct hlist_node *p, *n; - - hlist_for_each_entry_safe(q, p, n, &ip6_frag_hash[i], list) { - unsigned int hval = ip6qhashfn(q->id, - &q->saddr, - &q->daddr); - - if (hval != i) { - hlist_del(&q->list); - - /* Relink to new hash chain. */ - hlist_add_head(&q->list, - &ip6_frag_hash[hval]); - - } - } - } - write_unlock(&ip6_frag_lock); - - mod_timer(&ip6_frag_secret_timer, now + sysctl_ip6frag_secret_interval); + return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK); } -atomic_t ip6_frag_mem = ATOMIC_INIT(0); +static struct inet_frags ip6_frags; -/* Memory Tracking Functions. */ -static inline void frag_kfree_skb(struct sk_buff *skb, int *work) -{ - if (work) - *work -= skb->truesize; - atomic_sub(skb->truesize, &ip6_frag_mem); - kfree_skb(skb); -} +static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, + struct net_device *dev); -static inline void frag_free_queue(struct frag_queue *fq, int *work) +/* + * callers should be careful not to use the hash value outside the ipfrag_lock + * as doing so could race with ipfrag_hash_rnd being recalculated. + */ +static unsigned int inet6_hash_frag(__be32 id, const struct in6_addr *saddr, + const struct in6_addr *daddr) { - if (work) - *work -= sizeof(struct frag_queue); - atomic_sub(sizeof(struct frag_queue), &ip6_frag_mem); - kfree(fq); -} + u32 c; -static inline struct frag_queue *frag_alloc_queue(void) -{ - struct frag_queue *fq = kmalloc(sizeof(struct frag_queue), GFP_ATOMIC); + net_get_random_once(&ip6_frags.rnd, sizeof(ip6_frags.rnd)); + c = jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr), + (__force u32)id, ip6_frags.rnd); - if(!fq) - return NULL; - atomic_add(sizeof(struct frag_queue), &ip6_frag_mem); - return fq; + return c & (INETFRAGS_HASHSZ - 1); } -/* Destruction primitives. */ - -/* Complete destruction of fq. */ -static void ip6_frag_destroy(struct frag_queue *fq, int *work) +static unsigned int ip6_hashfn(struct inet_frag_queue *q) { - struct sk_buff *fp; - - BUG_TRAP(fq->last_in&COMPLETE); - BUG_TRAP(del_timer(&fq->timer) == 0); - - /* Release all fragment data. */ - fp = fq->fragments; - while (fp) { - struct sk_buff *xp = fp->next; - - frag_kfree_skb(fp, work); - fp = xp; - } - - frag_free_queue(fq, work); -} + struct frag_queue *fq; -static __inline__ void fq_put(struct frag_queue *fq, int *work) -{ - if (atomic_dec_and_test(&fq->refcnt)) - ip6_frag_destroy(fq, work); + fq = container_of(q, struct frag_queue, q); + return inet6_hash_frag(fq->id, &fq->saddr, &fq->daddr); } -/* Kill fq entry. It is not destroyed immediately, - * because caller (and someone more) holds reference count. - */ -static __inline__ void fq_kill(struct frag_queue *fq) +bool ip6_frag_match(struct inet_frag_queue *q, void *a) { - if (del_timer(&fq->timer)) - atomic_dec(&fq->refcnt); + struct frag_queue *fq; + struct ip6_create_arg *arg = a; - if (!(fq->last_in & COMPLETE)) { - fq_unlink(fq); - atomic_dec(&fq->refcnt); - fq->last_in |= COMPLETE; - } + fq = container_of(q, struct frag_queue, q); + return fq->id == arg->id && + fq->user == arg->user && + ipv6_addr_equal(&fq->saddr, arg->src) && + ipv6_addr_equal(&fq->daddr, arg->dst); } +EXPORT_SYMBOL(ip6_frag_match); -static void ip6_evictor(void) +void ip6_frag_init(struct inet_frag_queue *q, void *a) { - struct frag_queue *fq; - struct list_head *tmp; - int work; - - work = atomic_read(&ip6_frag_mem) - sysctl_ip6frag_low_thresh; - if (work <= 0) - return; - - while(work > 0) { - read_lock(&ip6_frag_lock); - if (list_empty(&ip6_frag_lru_list)) { - read_unlock(&ip6_frag_lock); - return; - } - tmp = ip6_frag_lru_list.next; - fq = list_entry(tmp, struct frag_queue, lru_list); - atomic_inc(&fq->refcnt); - read_unlock(&ip6_frag_lock); - - spin_lock(&fq->lock); - if (!(fq->last_in&COMPLETE)) - fq_kill(fq); - spin_unlock(&fq->lock); - - fq_put(fq, &work); - IP6_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); - } + struct frag_queue *fq = container_of(q, struct frag_queue, q); + struct ip6_create_arg *arg = a; + + fq->id = arg->id; + fq->user = arg->user; + fq->saddr = *arg->src; + fq->daddr = *arg->dst; + fq->ecn = arg->ecn; } +EXPORT_SYMBOL(ip6_frag_init); -static void ip6_frag_expire(unsigned long data) +void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq, + struct inet_frags *frags) { - struct frag_queue *fq = (struct frag_queue *) data; + struct net_device *dev = NULL; - spin_lock(&fq->lock); + spin_lock(&fq->q.lock); - if (fq->last_in & COMPLETE) + if (fq->q.last_in & INET_FRAG_COMPLETE) goto out; - fq_kill(fq); - - IP6_INC_STATS_BH(IPSTATS_MIB_REASMTIMEOUT); - IP6_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); - - /* Send error only if the first segment arrived. */ - if (fq->last_in&FIRST_IN && fq->fragments) { - struct net_device *dev = dev_get_by_index(fq->iif); - - /* - But use as source device on which LAST ARRIVED - segment was received. And do not use fq->dev - pointer directly, device might already disappeared. - */ - if (dev) { - fq->fragments->dev = dev; - icmpv6_send(fq->fragments, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0, - dev); - dev_put(dev); - } - } -out: - spin_unlock(&fq->lock); - fq_put(fq, NULL); -} + inet_frag_kill(&fq->q, frags); -/* Creation primitives. */ + rcu_read_lock(); + dev = dev_get_by_index_rcu(net, fq->iif); + if (!dev) + goto out_rcu_unlock; + IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT); + IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); -static struct frag_queue *ip6_frag_intern(unsigned int hash, - struct frag_queue *fq_in) -{ - struct frag_queue *fq; -#ifdef CONFIG_SMP - struct hlist_node *n; -#endif + /* Don't send error if the first segment did not arrive. */ + if (!(fq->q.last_in & INET_FRAG_FIRST_IN) || !fq->q.fragments) + goto out_rcu_unlock; - write_lock(&ip6_frag_lock); -#ifdef CONFIG_SMP - hlist_for_each_entry(fq, n, &ip6_frag_hash[hash], list) { - if (fq->id == fq_in->id && - ipv6_addr_equal(&fq_in->saddr, &fq->saddr) && - ipv6_addr_equal(&fq_in->daddr, &fq->daddr)) { - atomic_inc(&fq->refcnt); - write_unlock(&ip6_frag_lock); - fq_in->last_in |= COMPLETE; - fq_put(fq_in, NULL); - return fq; - } - } -#endif - fq = fq_in; - - if (!mod_timer(&fq->timer, jiffies + sysctl_ip6frag_time)) - atomic_inc(&fq->refcnt); - - atomic_inc(&fq->refcnt); - hlist_add_head(&fq->list, &ip6_frag_hash[hash]); - INIT_LIST_HEAD(&fq->lru_list); - list_add_tail(&fq->lru_list, &ip6_frag_lru_list); - ip6_frag_nqueues++; - write_unlock(&ip6_frag_lock); - return fq; + /* + But use as source device on which LAST ARRIVED + segment was received. And do not use fq->dev + pointer directly, device might already disappeared. + */ + fq->q.fragments->dev = dev; + icmpv6_send(fq->q.fragments, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0); +out_rcu_unlock: + rcu_read_unlock(); +out: + spin_unlock(&fq->q.lock); + inet_frag_put(&fq->q, frags); } +EXPORT_SYMBOL(ip6_expire_frag_queue); - -static struct frag_queue * -ip6_frag_create(unsigned int hash, u32 id, struct in6_addr *src, struct in6_addr *dst) +static void ip6_frag_expire(unsigned long data) { struct frag_queue *fq; + struct net *net; - if ((fq = frag_alloc_queue()) == NULL) - goto oom; - - memset(fq, 0, sizeof(struct frag_queue)); - - fq->id = id; - ipv6_addr_copy(&fq->saddr, src); - ipv6_addr_copy(&fq->daddr, dst); - - init_timer(&fq->timer); - fq->timer.function = ip6_frag_expire; - fq->timer.data = (long) fq; - spin_lock_init(&fq->lock); - atomic_set(&fq->refcnt, 1); + fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q); + net = container_of(fq->q.net, struct net, ipv6.frags); - return ip6_frag_intern(hash, fq); - -oom: - IP6_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); - return NULL; + ip6_expire_frag_queue(net, fq, &ip6_frags); } static __inline__ struct frag_queue * -fq_find(u32 id, struct in6_addr *src, struct in6_addr *dst) +fq_find(struct net *net, __be32 id, const struct in6_addr *src, + const struct in6_addr *dst, u8 ecn) { - struct frag_queue *fq; - struct hlist_node *n; - unsigned int hash = ip6qhashfn(id, src, dst); - - read_lock(&ip6_frag_lock); - hlist_for_each_entry(fq, n, &ip6_frag_hash[hash], list) { - if (fq->id == id && - ipv6_addr_equal(src, &fq->saddr) && - ipv6_addr_equal(dst, &fq->daddr)) { - atomic_inc(&fq->refcnt); - read_unlock(&ip6_frag_lock); - return fq; - } + struct inet_frag_queue *q; + struct ip6_create_arg arg; + unsigned int hash; + + arg.id = id; + arg.user = IP6_DEFRAG_LOCAL_DELIVER; + arg.src = src; + arg.dst = dst; + arg.ecn = ecn; + + read_lock(&ip6_frags.lock); + hash = inet6_hash_frag(id, src, dst); + + q = inet_frag_find(&net->ipv6.frags, &ip6_frags, &arg, hash); + if (IS_ERR_OR_NULL(q)) { + inet_frag_maybe_warn_overflow(q, pr_fmt()); + return NULL; } - read_unlock(&ip6_frag_lock); - - return ip6_frag_create(hash, id, src, dst); + return container_of(q, struct frag_queue, q); } - -static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, +static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, struct frag_hdr *fhdr, int nhoff) { struct sk_buff *prev, *next; + struct net_device *dev; int offset, end; + struct net *net = dev_net(skb_dst(skb)->dev); + u8 ecn; - if (fq->last_in & COMPLETE) + if (fq->q.last_in & INET_FRAG_COMPLETE) goto err; offset = ntohs(fhdr->frag_off) & ~0x7; - end = offset + (ntohs(skb->nh.ipv6h->payload_len) - - ((u8 *) (fhdr + 1) - (u8 *) (skb->nh.ipv6h + 1))); + end = offset + (ntohs(ipv6_hdr(skb)->payload_len) - + ((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1))); if ((unsigned int)end > IPV6_MAXPLEN) { - IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); - icmpv6_param_prob(skb,ICMPV6_HDR_FIELD, (u8*)&fhdr->frag_off - skb->nh.raw); - return; + IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_INHDRERRORS); + icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, + ((u8 *)&fhdr->frag_off - + skb_network_header(skb))); + return -1; } - if (skb->ip_summed == CHECKSUM_HW) - skb->csum = csum_sub(skb->csum, - csum_partial(skb->nh.raw, (u8*)(fhdr+1)-skb->nh.raw, 0)); + ecn = ip6_frag_ecn(ipv6_hdr(skb)); + + if (skb->ip_summed == CHECKSUM_COMPLETE) { + const unsigned char *nh = skb_network_header(skb); + skb->csum = csum_sub(skb->csum, + csum_partial(nh, (u8 *)(fhdr + 1) - nh, + 0)); + } /* Is this the final fragment? */ if (!(fhdr->frag_off & htons(IP6_MF))) { /* If we already have some bits beyond end * or have different end, the segment is corrupted. */ - if (end < fq->len || - ((fq->last_in & LAST_IN) && end != fq->len)) + if (end < fq->q.len || + ((fq->q.last_in & INET_FRAG_LAST_IN) && end != fq->q.len)) goto err; - fq->last_in |= LAST_IN; - fq->len = end; + fq->q.last_in |= INET_FRAG_LAST_IN; + fq->q.len = end; } else { /* Check if the fragment is rounded to 8 bytes. * Required by the RFC. @@ -451,16 +255,17 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, /* RFC2460 says always send parameter problem in * this case. -DaveM */ - IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); - icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, + IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_INHDRERRORS); + icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, offsetof(struct ipv6hdr, payload_len)); - return; + return -1; } - if (end > fq->len) { + if (end > fq->q.len) { /* Some bits beyond end -> corruption. */ - if (fq->last_in & LAST_IN) + if (fq->q.last_in & INET_FRAG_LAST_IN) goto err; - fq->len = end; + fq->q.len = end; } } @@ -470,7 +275,7 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, /* Point into the IP datagram 'data' part. */ if (!pskb_pull(skb, (u8 *) (fhdr + 1) - skb->data)) goto err; - + if (pskb_trim_rcsum(skb, end - offset)) goto err; @@ -478,97 +283,86 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb, * in the chain of fragments so far. We must know where to put * this fragment, right? */ + prev = fq->q.fragments_tail; + if (!prev || FRAG6_CB(prev)->offset < offset) { + next = NULL; + goto found; + } prev = NULL; - for(next = fq->fragments; next != NULL; next = next->next) { + for(next = fq->q.fragments; next != NULL; next = next->next) { if (FRAG6_CB(next)->offset >= offset) break; /* bingo! */ prev = next; } - /* We found where to put this one. Check for overlap with - * preceding fragment, and, if needed, align things so that - * any overlaps are eliminated. +found: + /* RFC5722, Section 4, amended by Errata ID : 3089 + * When reassembling an IPv6 datagram, if + * one or more its constituent fragments is determined to be an + * overlapping fragment, the entire datagram (and any constituent + * fragments) MUST be silently discarded. */ - if (prev) { - int i = (FRAG6_CB(prev)->offset + prev->len) - offset; - if (i > 0) { - offset += i; - if (end <= offset) - goto err; - if (!pskb_pull(skb, i)) - goto err; - if (skb->ip_summed != CHECKSUM_UNNECESSARY) - skb->ip_summed = CHECKSUM_NONE; - } - } - - /* Look for overlap with succeeding segments. - * If we can merge fragments, do it. - */ - while (next && FRAG6_CB(next)->offset < end) { - int i = end - FRAG6_CB(next)->offset; /* overlap is 'i' bytes */ - - if (i < next->len) { - /* Eat head of the next overlapped fragment - * and leave the loop. The next ones cannot overlap. - */ - if (!pskb_pull(next, i)) - goto err; - FRAG6_CB(next)->offset += i; /* next fragment */ - fq->meat -= i; - if (next->ip_summed != CHECKSUM_UNNECESSARY) - next->ip_summed = CHECKSUM_NONE; - break; - } else { - struct sk_buff *free_it = next; - - /* Old fragment is completely overridden with - * new one drop it. - */ - next = next->next; + /* Check for overlap with preceding fragment. */ + if (prev && + (FRAG6_CB(prev)->offset + prev->len) > offset) + goto discard_fq; - if (prev) - prev->next = next; - else - fq->fragments = next; - - fq->meat -= free_it->len; - frag_kfree_skb(free_it, NULL); - } - } + /* Look for overlap with succeeding segment. */ + if (next && FRAG6_CB(next)->offset < end) + goto discard_fq; FRAG6_CB(skb)->offset = offset; /* Insert this fragment in the chain of fragments. */ skb->next = next; + if (!next) + fq->q.fragments_tail = skb; if (prev) prev->next = skb; else - fq->fragments = skb; + fq->q.fragments = skb; - if (skb->dev) - fq->iif = skb->dev->ifindex; - skb->dev = NULL; - skb_get_timestamp(skb, &fq->stamp); - fq->meat += skb->len; - atomic_add(skb->truesize, &ip6_frag_mem); + dev = skb->dev; + if (dev) { + fq->iif = dev->ifindex; + skb->dev = NULL; + } + fq->q.stamp = skb->tstamp; + fq->q.meat += skb->len; + fq->ecn |= ecn; + add_frag_mem_limit(&fq->q, skb->truesize); /* The first fragment. * nhoffset is obtained from the first fragment, of course. */ if (offset == 0) { fq->nhoffset = nhoff; - fq->last_in |= FIRST_IN; + fq->q.last_in |= INET_FRAG_FIRST_IN; + } + + if (fq->q.last_in == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) && + fq->q.meat == fq->q.len) { + int res; + unsigned long orefdst = skb->_skb_refdst; + + skb->_skb_refdst = 0UL; + res = ip6_frag_reasm(fq, prev, dev); + skb->_skb_refdst = orefdst; + return res; } - write_lock(&ip6_frag_lock); - list_move_tail(&fq->lru_list, &ip6_frag_lru_list); - write_unlock(&ip6_frag_lock); - return; + skb_dst_drop(skb); + inet_frag_lru_move(&fq->q); + return -1; + +discard_fq: + inet_frag_kill(&fq->q, &ip6_frags); err: - IP6_INC_STATS(IPSTATS_MIB_REASMFAILS); + IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_REASMFAILS); kfree_skb(skb); + return -1; } /* @@ -580,31 +374,60 @@ err: * queue is eligible for reassembly i.e. it is not COMPLETE, * the last and the first frames arrived and all the bits are here. */ -static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in, +static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_device *dev) { - struct sk_buff *fp, *head = fq->fragments; + struct net *net = container_of(fq->q.net, struct net, ipv6.frags); + struct sk_buff *fp, *head = fq->q.fragments; int payload_len; unsigned int nhoff; + int sum_truesize; + u8 ecn; + + inet_frag_kill(&fq->q, &ip6_frags); + + ecn = ip_frag_ecn_table[fq->ecn]; + if (unlikely(ecn == 0xff)) + goto out_fail; - fq_kill(fq); + /* Make the one we just received the head. */ + if (prev) { + head = prev->next; + fp = skb_clone(head, GFP_ATOMIC); + + if (!fp) + goto out_oom; + + fp->next = head->next; + if (!fp->next) + fq->q.fragments_tail = fp; + prev->next = fp; + + skb_morph(head, fq->q.fragments); + head->next = fq->q.fragments->next; + + consume_skb(fq->q.fragments); + fq->q.fragments = head; + } - BUG_TRAP(head != NULL); - BUG_TRAP(FRAG6_CB(head)->offset == 0); + WARN_ON(head == NULL); + WARN_ON(FRAG6_CB(head)->offset != 0); /* Unfragmented part is taken from the first segment. */ - payload_len = (head->data - head->nh.raw) - sizeof(struct ipv6hdr) + fq->len - sizeof(struct frag_hdr); + payload_len = ((head->data - skb_network_header(head)) - + sizeof(struct ipv6hdr) + fq->q.len - + sizeof(struct frag_hdr)); if (payload_len > IPV6_MAXPLEN) goto out_oversize; /* Head of list must not be cloned. */ - if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC)) + if (skb_unclone(head, GFP_ATOMIC)) goto out_oom; /* If the first fragment is fragmented itself, we split * it to two chunks: the first with data and paged part * and the second, holding only fragments. */ - if (skb_shinfo(head)->frag_list) { + if (skb_has_frag_list(head)) { struct sk_buff *clone; int i, plen = 0; @@ -613,146 +436,332 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in, clone->next = head->next; head->next = clone; skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list; - skb_shinfo(head)->frag_list = NULL; - for (i=0; i<skb_shinfo(head)->nr_frags; i++) - plen += skb_shinfo(head)->frags[i].size; + skb_frag_list_init(head); + for (i = 0; i < skb_shinfo(head)->nr_frags; i++) + plen += skb_frag_size(&skb_shinfo(head)->frags[i]); clone->len = clone->data_len = head->data_len - plen; head->data_len -= clone->len; head->len -= clone->len; clone->csum = 0; clone->ip_summed = head->ip_summed; - atomic_add(clone->truesize, &ip6_frag_mem); + add_frag_mem_limit(&fq->q, clone->truesize); } /* We have to remove fragment header from datagram and to relocate * header in order to calculate ICV correctly. */ nhoff = fq->nhoffset; - head->nh.raw[nhoff] = head->h.raw[0]; - memmove(head->head + sizeof(struct frag_hdr), head->head, + skb_network_header(head)[nhoff] = skb_transport_header(head)[0]; + memmove(head->head + sizeof(struct frag_hdr), head->head, (head->data - head->head) - sizeof(struct frag_hdr)); - head->mac.raw += sizeof(struct frag_hdr); - head->nh.raw += sizeof(struct frag_hdr); + head->mac_header += sizeof(struct frag_hdr); + head->network_header += sizeof(struct frag_hdr); + + skb_reset_transport_header(head); + skb_push(head, head->data - skb_network_header(head)); - skb_shinfo(head)->frag_list = head->next; - head->h.raw = head->data; - skb_push(head, head->data - head->nh.raw); - atomic_sub(head->truesize, &ip6_frag_mem); + sum_truesize = head->truesize; + for (fp = head->next; fp;) { + bool headstolen; + int delta; + struct sk_buff *next = fp->next; - for (fp=head->next; fp; fp = fp->next) { - head->data_len += fp->len; - head->len += fp->len; + sum_truesize += fp->truesize; if (head->ip_summed != fp->ip_summed) head->ip_summed = CHECKSUM_NONE; - else if (head->ip_summed == CHECKSUM_HW) + else if (head->ip_summed == CHECKSUM_COMPLETE) head->csum = csum_add(head->csum, fp->csum); - head->truesize += fp->truesize; - atomic_sub(fp->truesize, &ip6_frag_mem); + + if (skb_try_coalesce(head, fp, &headstolen, &delta)) { + kfree_skb_partial(fp, headstolen); + } else { + if (!skb_shinfo(head)->frag_list) + skb_shinfo(head)->frag_list = fp; + head->data_len += fp->len; + head->len += fp->len; + head->truesize += fp->truesize; + } + fp = next; } + sub_frag_mem_limit(&fq->q, sum_truesize); head->next = NULL; head->dev = dev; - skb_set_timestamp(head, &fq->stamp); - head->nh.ipv6h->payload_len = htons(payload_len); + head->tstamp = fq->q.stamp; + ipv6_hdr(head)->payload_len = htons(payload_len); + ipv6_change_dsfield(ipv6_hdr(head), 0xff, ecn); IP6CB(head)->nhoff = nhoff; - - *skb_in = head; + IP6CB(head)->flags |= IP6SKB_FRAGMENTED; /* Yes, and fold redundant checksum back. 8) */ - if (head->ip_summed == CHECKSUM_HW) - head->csum = csum_partial(head->nh.raw, head->h.raw-head->nh.raw, head->csum); - - IP6_INC_STATS_BH(IPSTATS_MIB_REASMOKS); - fq->fragments = NULL; + if (head->ip_summed == CHECKSUM_COMPLETE) + head->csum = csum_partial(skb_network_header(head), + skb_network_header_len(head), + head->csum); + + rcu_read_lock(); + IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMOKS); + rcu_read_unlock(); + fq->q.fragments = NULL; + fq->q.fragments_tail = NULL; return 1; out_oversize: - if (net_ratelimit()) - printk(KERN_DEBUG "ip6_frag_reasm: payload len = %d\n", payload_len); + net_dbg_ratelimited("ip6_frag_reasm: payload len = %d\n", payload_len); goto out_fail; out_oom: - if (net_ratelimit()) - printk(KERN_DEBUG "ip6_frag_reasm: no memory for reassembly\n"); + net_dbg_ratelimited("ip6_frag_reasm: no memory for reassembly\n"); out_fail: - IP6_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); + rcu_read_lock(); + IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); + rcu_read_unlock(); return -1; } -static int ipv6_frag_rcv(struct sk_buff **skbp) +static int ipv6_frag_rcv(struct sk_buff *skb) { - struct sk_buff *skb = *skbp; - struct net_device *dev = skb->dev; struct frag_hdr *fhdr; struct frag_queue *fq; - struct ipv6hdr *hdr; + const struct ipv6hdr *hdr = ipv6_hdr(skb); + struct net *net = dev_net(skb_dst(skb)->dev); + int evicted; - hdr = skb->nh.ipv6h; + if (IP6CB(skb)->flags & IP6SKB_FRAGMENTED) + goto fail_hdr; - IP6_INC_STATS_BH(IPSTATS_MIB_REASMREQDS); + IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMREQDS); /* Jumbo payload inhibits frag. header */ - if (hdr->payload_len==0) { - IP6_INC_STATS(IPSTATS_MIB_INHDRERRORS); - icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb->h.raw-skb->nh.raw); - return -1; - } - if (!pskb_may_pull(skb, (skb->h.raw-skb->data)+sizeof(struct frag_hdr))) { - IP6_INC_STATS(IPSTATS_MIB_INHDRERRORS); - icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb->h.raw-skb->nh.raw); - return -1; - } + if (hdr->payload_len==0) + goto fail_hdr; + + if (!pskb_may_pull(skb, (skb_transport_offset(skb) + + sizeof(struct frag_hdr)))) + goto fail_hdr; - hdr = skb->nh.ipv6h; - fhdr = (struct frag_hdr *)skb->h.raw; + hdr = ipv6_hdr(skb); + fhdr = (struct frag_hdr *)skb_transport_header(skb); if (!(fhdr->frag_off & htons(0xFFF9))) { /* It is not a fragmented frame */ - skb->h.raw += sizeof(struct frag_hdr); - IP6_INC_STATS_BH(IPSTATS_MIB_REASMOKS); + skb->transport_header += sizeof(struct frag_hdr); + IP6_INC_STATS_BH(net, + ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMOKS); - IP6CB(skb)->nhoff = (u8*)fhdr - skb->nh.raw; + IP6CB(skb)->nhoff = (u8 *)fhdr - skb_network_header(skb); + IP6CB(skb)->flags |= IP6SKB_FRAGMENTED; return 1; } - if (atomic_read(&ip6_frag_mem) > sysctl_ip6frag_high_thresh) - ip6_evictor(); + evicted = inet_frag_evictor(&net->ipv6.frags, &ip6_frags, false); + if (evicted) + IP6_ADD_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_REASMFAILS, evicted); - if ((fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr)) != NULL) { - int ret = -1; + fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr, + ip6_frag_ecn(hdr)); + if (fq != NULL) { + int ret; - spin_lock(&fq->lock); + spin_lock(&fq->q.lock); - ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff); + ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff); - if (fq->last_in == (FIRST_IN|LAST_IN) && - fq->meat == fq->len) - ret = ip6_frag_reasm(fq, skbp, dev); - - spin_unlock(&fq->lock); - fq_put(fq, NULL); + spin_unlock(&fq->q.lock); + inet_frag_put(&fq->q, &ip6_frags); return ret; } - IP6_INC_STATS_BH(IPSTATS_MIB_REASMFAILS); + IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMFAILS); kfree_skb(skb); return -1; + +fail_hdr: + IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_INHDRERRORS); + icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, skb_network_header_len(skb)); + return -1; } -static struct inet6_protocol frag_protocol = +static const struct inet6_protocol frag_protocol = { .handler = ipv6_frag_rcv, .flags = INET6_PROTO_NOPOLICY, }; -void __init ipv6_frag_init(void) +#ifdef CONFIG_SYSCTL +static struct ctl_table ip6_frags_ns_ctl_table[] = { + { + .procname = "ip6frag_high_thresh", + .data = &init_net.ipv6.frags.high_thresh, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .procname = "ip6frag_low_thresh", + .data = &init_net.ipv6.frags.low_thresh, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .procname = "ip6frag_time", + .data = &init_net.ipv6.frags.timeout, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + { } +}; + +static struct ctl_table ip6_frags_ctl_table[] = { + { + .procname = "ip6frag_secret_interval", + .data = &ip6_frags.secret_interval, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + { } +}; + +static int __net_init ip6_frags_ns_sysctl_register(struct net *net) +{ + struct ctl_table *table; + struct ctl_table_header *hdr; + + table = ip6_frags_ns_ctl_table; + if (!net_eq(net, &init_net)) { + table = kmemdup(table, sizeof(ip6_frags_ns_ctl_table), GFP_KERNEL); + if (table == NULL) + goto err_alloc; + + table[0].data = &net->ipv6.frags.high_thresh; + table[1].data = &net->ipv6.frags.low_thresh; + table[2].data = &net->ipv6.frags.timeout; + + /* Don't export sysctls to unprivileged users */ + if (net->user_ns != &init_user_ns) + table[0].procname = NULL; + } + + hdr = register_net_sysctl(net, "net/ipv6", table); + if (hdr == NULL) + goto err_reg; + + net->ipv6.sysctl.frags_hdr = hdr; + return 0; + +err_reg: + if (!net_eq(net, &init_net)) + kfree(table); +err_alloc: + return -ENOMEM; +} + +static void __net_exit ip6_frags_ns_sysctl_unregister(struct net *net) +{ + struct ctl_table *table; + + table = net->ipv6.sysctl.frags_hdr->ctl_table_arg; + unregister_net_sysctl_table(net->ipv6.sysctl.frags_hdr); + if (!net_eq(net, &init_net)) + kfree(table); +} + +static struct ctl_table_header *ip6_ctl_header; + +static int ip6_frags_sysctl_register(void) +{ + ip6_ctl_header = register_net_sysctl(&init_net, "net/ipv6", + ip6_frags_ctl_table); + return ip6_ctl_header == NULL ? -ENOMEM : 0; +} + +static void ip6_frags_sysctl_unregister(void) +{ + unregister_net_sysctl_table(ip6_ctl_header); +} +#else +static inline int ip6_frags_ns_sysctl_register(struct net *net) { - if (inet6_add_protocol(&frag_protocol, IPPROTO_FRAGMENT) < 0) - printk(KERN_ERR "ipv6_frag_init: Could not register protocol\n"); + return 0; +} + +static inline void ip6_frags_ns_sysctl_unregister(struct net *net) +{ +} - ip6_frag_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^ - (jiffies ^ (jiffies >> 6))); +static inline int ip6_frags_sysctl_register(void) +{ + return 0; +} - init_timer(&ip6_frag_secret_timer); - ip6_frag_secret_timer.function = ip6_frag_secret_rebuild; - ip6_frag_secret_timer.expires = jiffies + sysctl_ip6frag_secret_interval; - add_timer(&ip6_frag_secret_timer); +static inline void ip6_frags_sysctl_unregister(void) +{ +} +#endif + +static int __net_init ipv6_frags_init_net(struct net *net) +{ + net->ipv6.frags.high_thresh = IPV6_FRAG_HIGH_THRESH; + net->ipv6.frags.low_thresh = IPV6_FRAG_LOW_THRESH; + net->ipv6.frags.timeout = IPV6_FRAG_TIMEOUT; + + inet_frags_init_net(&net->ipv6.frags); + + return ip6_frags_ns_sysctl_register(net); +} + +static void __net_exit ipv6_frags_exit_net(struct net *net) +{ + ip6_frags_ns_sysctl_unregister(net); + inet_frags_exit_net(&net->ipv6.frags, &ip6_frags); +} + +static struct pernet_operations ip6_frags_ops = { + .init = ipv6_frags_init_net, + .exit = ipv6_frags_exit_net, +}; + +int __init ipv6_frag_init(void) +{ + int ret; + + ret = inet6_add_protocol(&frag_protocol, IPPROTO_FRAGMENT); + if (ret) + goto out; + + ret = ip6_frags_sysctl_register(); + if (ret) + goto err_sysctl; + + ret = register_pernet_subsys(&ip6_frags_ops); + if (ret) + goto err_pernet; + + ip6_frags.hashfn = ip6_hashfn; + ip6_frags.constructor = ip6_frag_init; + ip6_frags.destructor = NULL; + ip6_frags.skb_free = NULL; + ip6_frags.qsize = sizeof(struct frag_queue); + ip6_frags.match = ip6_frag_match; + ip6_frags.frag_expire = ip6_frag_expire; + ip6_frags.secret_interval = 10 * 60 * HZ; + inet_frags_init(&ip6_frags); +out: + return ret; + +err_pernet: + ip6_frags_sysctl_unregister(); +err_sysctl: + inet6_del_protocol(&frag_protocol, IPPROTO_FRAGMENT); + goto out; +} + +void ipv6_frag_exit(void) +{ + inet_frags_fini(&ip6_frags); + ip6_frags_sysctl_unregister(); + unregister_pernet_subsys(&ip6_frags_ops); + inet6_del_protocol(&frag_protocol, IPPROTO_FRAGMENT); } |
