diff options
Diffstat (limited to 'net/ipv6/ip6_output.c')
| -rw-r--r-- | net/ipv6/ip6_output.c | 893 |
1 files changed, 488 insertions, 405 deletions
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index cd48801a8d6..45702b8cd14 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -37,6 +37,7 @@ #include <linux/tcp.h> #include <linux/route.h> #include <linux/module.h> +#include <linux/slab.h> #include <linux/netfilter.h> #include <linux/netfilter_ipv6.h> @@ -55,77 +56,22 @@ #include <net/checksum.h> #include <linux/mroute6.h> -static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)); - -int __ip6_local_out(struct sk_buff *skb) -{ - int len; - - len = skb->len - sizeof(struct ipv6hdr); - if (len > IPV6_MAXPLEN) - len = 0; - ipv6_hdr(skb)->payload_len = htons(len); - - return nf_hook(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev, - dst_output); -} - -int ip6_local_out(struct sk_buff *skb) -{ - int err; - - err = __ip6_local_out(skb); - if (likely(err == 1)) - err = dst_output(skb); - - return err; -} -EXPORT_SYMBOL_GPL(ip6_local_out); - -static int ip6_output_finish(struct sk_buff *skb) -{ - struct dst_entry *dst = skb_dst(skb); - - if (dst->hh) - return neigh_hh_output(dst->hh, skb); - else if (dst->neighbour) - return dst->neighbour->output(skb); - - IP6_INC_STATS_BH(dev_net(dst->dev), - ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); - kfree_skb(skb); - return -EINVAL; - -} - -/* dev_loopback_xmit for use with netfilter. */ -static int ip6_dev_loopback_xmit(struct sk_buff *newskb) -{ - skb_reset_mac_header(newskb); - __skb_pull(newskb, skb_network_offset(newskb)); - newskb->pkt_type = PACKET_LOOPBACK; - newskb->ip_summed = CHECKSUM_UNNECESSARY; - WARN_ON(!skb_dst(newskb)); - - netif_rx(newskb); - return 0; -} - - -static int ip6_output2(struct sk_buff *skb) +static int ip6_finish_output2(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct net_device *dev = dst->dev; + struct neighbour *neigh; + struct in6_addr *nexthop; + int ret; skb->protocol = htons(ETH_P_IPV6); skb->dev = dev; if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) { - struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL; struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); - if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) && - ((mroute6_socket(dev_net(dev)) && + if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) && + ((mroute6_socket(dev_net(dev), skb) && !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) || ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr, &ipv6_hdr(skb)->saddr))) { @@ -135,9 +81,9 @@ static int ip6_output2(struct sk_buff *skb) is not supported in any case. */ if (newskb) - NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, newskb, - NULL, newskb->dev, - ip6_dev_loopback_xmit); + NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, + newskb, NULL, newskb->dev, + dev_loopback_xmit); if (ipv6_hdr(skb)->hop_limit == 0) { IP6_INC_STATS(dev_net(dev), idev, @@ -149,53 +95,74 @@ static int ip6_output2(struct sk_buff *skb) IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST, skb->len); + + if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <= + IPV6_ADDR_SCOPE_NODELOCAL && + !(dev->flags & IFF_LOOPBACK)) { + kfree_skb(skb); + return 0; + } + } + + rcu_read_lock_bh(); + nexthop = rt6_nexthop((struct rt6_info *)dst); + neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop); + if (unlikely(!neigh)) + neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false); + if (!IS_ERR(neigh)) { + ret = dst_neigh_output(dst, neigh, skb); + rcu_read_unlock_bh(); + return ret; } + rcu_read_unlock_bh(); - return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev, - ip6_output_finish); + IP6_INC_STATS(dev_net(dst->dev), + ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); + kfree_skb(skb); + return -EINVAL; } -static inline int ip6_skb_dst_mtu(struct sk_buff *skb) +static int ip6_finish_output(struct sk_buff *skb) { - struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL; - - return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ? - skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb)); + if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || + dst_allfrag(skb_dst(skb)) || + (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size)) + return ip6_fragment(skb, ip6_finish_output2); + else + return ip6_finish_output2(skb); } -int ip6_output(struct sk_buff *skb) +int ip6_output(struct sock *sk, struct sk_buff *skb) { + struct net_device *dev = skb_dst(skb)->dev; struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); if (unlikely(idev->cnf.disable_ipv6)) { - IP6_INC_STATS(dev_net(skb_dst(skb)->dev), idev, + IP6_INC_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTDISCARDS); kfree_skb(skb); return 0; } - if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || - dst_allfrag(skb_dst(skb))) - return ip6_fragment(skb, ip6_output2); - else - return ip6_output2(skb); + return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev, + ip6_finish_output, + !(IP6CB(skb)->flags & IP6SKB_REROUTED)); } /* - * xmit an sk_buff (used by TCP) + * xmit an sk_buff (used by TCP, SCTP and DCCP) */ -int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, - struct ipv6_txoptions *opt, int ipfragok) +int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, + struct ipv6_txoptions *opt, int tclass) { struct net *net = sock_net(sk); struct ipv6_pinfo *np = inet6_sk(sk); - struct in6_addr *first_hop = &fl->fl6_dst; + struct in6_addr *first_hop = &fl6->daddr; struct dst_entry *dst = skb_dst(skb); struct ipv6hdr *hdr; - u8 proto = fl->proto; + u8 proto = fl6->flowi6_proto; int seg_len = skb->len; int hlimit = -1; - int tclass = 0; u32 mtu; if (opt) { @@ -216,10 +183,9 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, kfree_skb(skb); return -ENOBUFS; } - kfree_skb(skb); + consume_skb(skb); skb = skb2; - if (sk) - skb_set_owner_w(skb, sk); + skb_set_owner_w(skb, sk); } if (opt->opt_flen) ipv6_push_frag_opts(skb, opt, &proto); @@ -231,44 +197,37 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, skb_reset_network_header(skb); hdr = ipv6_hdr(skb); - /* Allow local fragmentation. */ - if (ipfragok) - skb->local_df = 1; - /* * Fill in the IPv6 header */ - if (np) { - tclass = np->tclass; + if (np) hlimit = np->hop_limit; - } if (hlimit < 0) hlimit = ip6_dst_hoplimit(dst); - *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel; + ip6_flow_hdr(hdr, tclass, fl6->flowlabel); hdr->payload_len = htons(seg_len); hdr->nexthdr = proto; hdr->hop_limit = hlimit; - ipv6_addr_copy(&hdr->saddr, &fl->fl6_src); - ipv6_addr_copy(&hdr->daddr, first_hop); + hdr->saddr = fl6->saddr; + hdr->daddr = *first_hop; + skb->protocol = htons(ETH_P_IPV6); skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; mtu = dst_mtu(dst); - if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) { + if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) { IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_OUT, skb->len); - return NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev, - dst_output); + return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, + dst->dev, dst_output); } - if (net_ratelimit()) - printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n"); skb->dev = dst->dev; - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev); + ipv6_local_error(sk, EMSGSIZE, fl6, mtu); IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); kfree_skb(skb); return -EMSGSIZE; @@ -276,42 +235,6 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, EXPORT_SYMBOL(ip6_xmit); -/* - * To avoid extra problems ND packets are send through this - * routine. It's code duplication but I really want to avoid - * extra checks since ipv6_build_header is used by TCP (which - * is for us performance critical) - */ - -int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev, - const struct in6_addr *saddr, const struct in6_addr *daddr, - int proto, int len) -{ - struct ipv6_pinfo *np = inet6_sk(sk); - struct ipv6hdr *hdr; - int totlen; - - skb->protocol = htons(ETH_P_IPV6); - skb->dev = dev; - - totlen = len + sizeof(struct ipv6hdr); - - skb_reset_network_header(skb); - skb_put(skb, sizeof(struct ipv6hdr)); - hdr = ipv6_hdr(skb); - - *(__be32*)hdr = htonl(0x60000000); - - hdr->payload_len = htons(len); - hdr->nexthdr = proto; - hdr->hop_limit = np->hop_limit; - - ipv6_addr_copy(&hdr->saddr, saddr); - ipv6_addr_copy(&hdr->daddr, daddr); - - return 0; -} - static int ip6_call_ra_chain(struct sk_buff *skb, int sel) { struct ip6_ra_chain *ra; @@ -345,10 +268,11 @@ static int ip6_forward_proxy_check(struct sk_buff *skb) { struct ipv6hdr *hdr = ipv6_hdr(skb); u8 nexthdr = hdr->nexthdr; + __be16 frag_off; int offset; if (ipv6_ext_hdr(nexthdr)) { - offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr); + offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off); if (offset < 0) return 0; } else @@ -397,21 +321,65 @@ static inline int ip6_forward_finish(struct sk_buff *skb) return dst_output(skb); } +static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst) +{ + unsigned int mtu; + struct inet6_dev *idev; + + if (dst_metric_locked(dst, RTAX_MTU)) { + mtu = dst_metric_raw(dst, RTAX_MTU); + if (mtu) + return mtu; + } + + mtu = IPV6_MIN_MTU; + rcu_read_lock(); + idev = __in6_dev_get(dst->dev); + if (idev) + mtu = idev->cnf.mtu6; + rcu_read_unlock(); + + return mtu; +} + +static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) +{ + if (skb->len <= mtu) + return false; + + /* ipv6 conntrack defrag sets max_frag_size + ignore_df */ + if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu) + return true; + + if (skb->ignore_df) + return false; + + if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu) + return false; + + return true; +} + int ip6_forward(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct ipv6hdr *hdr = ipv6_hdr(skb); struct inet6_skb_parm *opt = IP6CB(skb); struct net *net = dev_net(dst->dev); + u32 mtu; if (net->ipv6.devconf_all->forwarding == 0) goto error; + if (skb->pkt_type != PACKET_HOST) + goto drop; + if (skb_warn_if_lro(skb)) goto drop; if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) { - IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); + IP6_INC_STATS_BH(net, ip6_dst_idev(dst), + IPSTATS_MIB_INDISCARDS); goto drop; } @@ -430,9 +398,8 @@ int ip6_forward(struct sk_buff *skb) * cannot be fragmented, because there is no warranty * that different fragments will go along one path. --ANK */ - if (opt->ra) { - u8 *ptr = skb_network_header(skb) + opt->ra; - if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3])) + if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) { + if (ip6_call_ra_chain(skb, ntohs(opt->ra))) return 0; } @@ -442,10 +409,9 @@ int ip6_forward(struct sk_buff *skb) if (hdr->hop_limit <= 1) { /* Force OUTPUT device used as source address */ skb->dev = dst->dev; - icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, - 0, skb->dev); - IP6_INC_STATS_BH(net, - ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS); + icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0); + IP6_INC_STATS_BH(net, ip6_dst_idev(dst), + IPSTATS_MIB_INHDRERRORS); kfree_skb(skb); return -ETIMEDOUT; @@ -458,14 +424,15 @@ int ip6_forward(struct sk_buff *skb) if (proxied > 0) return ip6_input(skb); else if (proxied < 0) { - IP6_INC_STATS(net, ip6_dst_idev(dst), - IPSTATS_MIB_INDISCARDS); + IP6_INC_STATS_BH(net, ip6_dst_idev(dst), + IPSTATS_MIB_INDISCARDS); goto drop; } } if (!xfrm6_route_forward(skb)) { - IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); + IP6_INC_STATS_BH(net, ip6_dst_idev(dst), + IPSTATS_MIB_INDISCARDS); goto drop; } dst = skb_dst(skb); @@ -474,11 +441,10 @@ int ip6_forward(struct sk_buff *skb) send redirects to source routed frames. We don't send redirects to frames decapsulated from IPsec. */ - if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 && - !skb_sec_path(skb)) { + if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) { struct in6_addr *target = NULL; + struct inet_peer *peer; struct rt6_info *rt; - struct neighbour *n = dst->neighbour; /* * incoming and outgoing devices are the same @@ -486,16 +452,20 @@ int ip6_forward(struct sk_buff *skb) */ rt = (struct rt6_info *) dst; - if ((rt->rt6i_flags & RTF_GATEWAY)) - target = (struct in6_addr*)&n->primary_key; + if (rt->rt6i_flags & RTF_GATEWAY) + target = &rt->rt6i_gateway; else target = &hdr->daddr; + peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1); + /* Limit redirects both by destination (here) and by source (inside ndisc_send_redirect) */ - if (xrlim_allow(dst, 1*HZ)) - ndisc_send_redirect(skb, n, target); + if (inet_peer_xrlim_allow(peer, 1*HZ)) + ndisc_send_redirect(skb, target); + if (peer) + inet_putpeer(peer); } else { int addrtype = ipv6_addr_type(&hdr->saddr); @@ -505,25 +475,30 @@ int ip6_forward(struct sk_buff *skb) goto error; if (addrtype & IPV6_ADDR_LINKLOCAL) { icmpv6_send(skb, ICMPV6_DEST_UNREACH, - ICMPV6_NOT_NEIGHBOUR, 0, skb->dev); + ICMPV6_NOT_NEIGHBOUR, 0); goto error; } } - if (skb->len > dst_mtu(dst)) { + mtu = ip6_dst_mtu_forward(dst); + if (mtu < IPV6_MIN_MTU) + mtu = IPV6_MIN_MTU; + + if (ip6_pkt_too_big(skb, mtu)) { /* Again, force OUTPUT device used as source address */ skb->dev = dst->dev; - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev); - IP6_INC_STATS_BH(net, - ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS); - IP6_INC_STATS_BH(net, - ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS); + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + IP6_INC_STATS_BH(net, ip6_dst_idev(dst), + IPSTATS_MIB_INTOOBIGERRORS); + IP6_INC_STATS_BH(net, ip6_dst_idev(dst), + IPSTATS_MIB_FRAGFAILS); kfree_skb(skb); return -EMSGSIZE; } if (skb_cow(skb, dst->dev->hard_header_len)) { - IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS); + IP6_INC_STATS_BH(net, ip6_dst_idev(dst), + IPSTATS_MIB_OUTDISCARDS); goto drop; } @@ -534,7 +509,8 @@ int ip6_forward(struct sk_buff *skb) hdr->hop_limit--; IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); - return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dst->dev, + IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len); + return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev, ip6_forward_finish); error: @@ -558,53 +534,24 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) to->tc_index = from->tc_index; #endif nf_copy(to, from); -#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \ - defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE) - to->nf_trace = from->nf_trace; -#endif skb_copy_secmark(to, from); } -int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) +static void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt) { - u16 offset = sizeof(struct ipv6hdr); - struct ipv6_opt_hdr *exthdr = - (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1); - unsigned int packet_len = skb->tail - skb->network_header; - int found_rhdr = 0; - *nexthdr = &ipv6_hdr(skb)->nexthdr; - - while (offset + 1 <= packet_len) { + static u32 ip6_idents_hashrnd __read_mostly; + u32 hash, id; - switch (**nexthdr) { + net_get_random_once(&ip6_idents_hashrnd, sizeof(ip6_idents_hashrnd)); - case NEXTHDR_HOP: - break; - case NEXTHDR_ROUTING: - found_rhdr = 1; - break; - case NEXTHDR_DEST: -#if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) - if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) - break; -#endif - if (found_rhdr) - return offset; - break; - default : - return offset; - } + hash = __ipv6_addr_jhash(&rt->rt6i_dst.addr, ip6_idents_hashrnd); + hash = __ipv6_addr_jhash(&rt->rt6i_src.addr, hash); - offset += ipv6_optlen(exthdr); - *nexthdr = &exthdr->nexthdr; - exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) + - offset); - } - - return offset; + id = ip_idents_reserve(hash, 1); + fhdr->identification = htonl(id); } -static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) +int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) { struct sk_buff *frag; struct rt6_info *rt = (struct rt6_info*)skb_dst(skb); @@ -612,6 +559,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) struct ipv6hdr *tmp_hdr; struct frag_hdr *fh; unsigned int mtu, hlen, left, len; + int hroom, troom; __be32 frag_id = 0; int ptr, offset = 0, err=0; u8 *prevhdr, nexthdr = 0; @@ -623,12 +571,16 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) mtu = ip6_skb_dst_mtu(skb); /* We must not fragment if the socket is set to force MTU discovery - * or if the skb it not generated by a local socket. (This last - * check should be redundant, but it's free.) + * or if the skb it not generated by a local socket. */ - if (!skb->local_df) { + if (unlikely(!skb->ignore_df && skb->len > mtu) || + (IP6CB(skb)->frag_max_size && + IP6CB(skb)->frag_max_size > mtu)) { + if (skb->sk && dst_allfrag(skb_dst(skb))) + sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK); + skb->dev = skb_dst(skb)->dev; - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev); + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); kfree_skb(skb); @@ -641,9 +593,9 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) } mtu -= hlen + sizeof(struct frag_hdr); - if (skb_has_frags(skb)) { + if (skb_has_frag_list(skb)) { int first_len = skb_pagelen(skb); - int truesizes = 0; + struct sk_buff *frag2; if (first_len - hlen > mtu || ((first_len - hlen) & 7) || @@ -655,18 +607,18 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) if (frag->len > mtu || ((frag->len & 7) && frag->next) || skb_headroom(frag) < hlen) - goto slow_path; + goto slow_path_clean; /* Partially cloned skb? */ if (skb_shared(frag)) - goto slow_path; + goto slow_path_clean; BUG_ON(frag->sk); if (skb->sk) { frag->sk = skb->sk; frag->destructor = sock_wfree; - truesizes += frag->truesize; } + skb->truesize -= frag->truesize; } err = 0; @@ -689,7 +641,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) skb_reset_network_header(skb); memcpy(skb_network_header(skb), tmp_hdr, hlen); - ipv6_select_ident(fh); + ipv6_select_ident(fh, rt); fh->nexthdr = nexthdr; fh->reserved = 0; fh->frag_off = htons(IP6_MF); @@ -697,12 +649,11 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) first_len = skb_pagelen(skb); skb->data_len = first_len - skb_headlen(skb); - skb->truesize -= truesizes; skb->len = first_len; ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr)); - dst_hold(&rt->u.dst); + dst_hold(&rt->dst); for (;;) { /* Prepare header of the next frame, @@ -730,7 +681,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) err = output(skb); if(!err) - IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst), + IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), IPSTATS_MIB_FRAGCREATES); if (err || !frag) @@ -744,9 +695,9 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) kfree(tmp_hdr); if (err == 0) { - IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst), + IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), IPSTATS_MIB_FRAGOKS); - dst_release(&rt->u.dst); + ip6_rt_put(rt); return 0; } @@ -756,13 +707,26 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) frag = skb; } - IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst), + IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), IPSTATS_MIB_FRAGFAILS); - dst_release(&rt->u.dst); + ip6_rt_put(rt); return err; + +slow_path_clean: + skb_walk_frags(skb, frag2) { + if (frag2 == frag) + break; + frag2->sk = NULL; + frag2->destructor = NULL; + skb->truesize += frag2->truesize; + } } slow_path: + if ((skb->ip_summed == CHECKSUM_PARTIAL) && + skb_checksum_help(skb)) + goto fail; + left = skb->len - hlen; /* Space per frame */ ptr = hlen; /* Where to start from */ @@ -771,6 +735,8 @@ slow_path: */ *prevhdr = NEXTHDR_FRAGMENT; + hroom = LL_RESERVED_SPACE(rt->dst.dev); + troom = rt->dst.dev->needed_tailroom; /* * Keep copying data until we run out. @@ -780,7 +746,7 @@ slow_path: /* IF: it doesn't fit, use 'mtu' - the data space left */ if (len > mtu) len = mtu; - /* IF: we are not sending upto and including the packet end + /* IF: we are not sending up to and including the packet end then align the next start on an eight byte boundary */ if (len < left) { len &= ~7; @@ -789,7 +755,8 @@ slow_path: * Allocate buffer. */ - if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) { + if ((frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) + + hroom + troom, GFP_ATOMIC)) == NULL) { NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n"); IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); @@ -802,7 +769,7 @@ slow_path: */ ip6_copy_metadata(frag, skb); - skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev)); + skb_reserve(frag, hroom); skb_put(frag, len + hlen + sizeof(struct frag_hdr)); skb_reset_network_header(frag); fh = (struct frag_hdr *)(skb_network_header(frag) + hlen); @@ -827,7 +794,7 @@ slow_path: fh->nexthdr = nexthdr; fh->reserved = 0; if (!frag_id) { - ipv6_select_ident(fh); + ipv6_select_ident(fh, rt); frag_id = fh->identification; } else fh->identification = frag_id; @@ -860,7 +827,7 @@ slow_path: } IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGOKS); - kfree_skb(skb); + consume_skb(skb); return err; fail: @@ -870,24 +837,30 @@ fail: return err; } -static inline int ip6_rt_check(struct rt6key *rt_key, - struct in6_addr *fl_addr, - struct in6_addr *addr_cache) +static inline int ip6_rt_check(const struct rt6key *rt_key, + const struct in6_addr *fl_addr, + const struct in6_addr *addr_cache) { - return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) && - (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache))); + return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) && + (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)); } static struct dst_entry *ip6_sk_dst_check(struct sock *sk, struct dst_entry *dst, - struct flowi *fl) + const struct flowi6 *fl6) { struct ipv6_pinfo *np = inet6_sk(sk); - struct rt6_info *rt = (struct rt6_info *)dst; + struct rt6_info *rt; if (!dst) goto out; + if (dst->ops->family != AF_INET6) { + dst_release(dst); + return NULL; + } + + rt = (struct rt6_info *)dst; /* Yes, checking route validity in not connected * case is not very simple. Take into account, * that we do not support routing by source, TOS, @@ -905,11 +878,11 @@ static struct dst_entry *ip6_sk_dst_check(struct sock *sk, * sockets. * 2. oif also should be the same. */ - if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) || + if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) || #ifdef CONFIG_IPV6_SUBTREES - ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) || + ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) || #endif - (fl->oif && fl->oif != dst->dev->ifindex)) { + (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) { dst_release(dst); dst = NULL; } @@ -919,22 +892,26 @@ out: } static int ip6_dst_lookup_tail(struct sock *sk, - struct dst_entry **dst, struct flowi *fl) + struct dst_entry **dst, struct flowi6 *fl6) { - int err; struct net *net = sock_net(sk); +#ifdef CONFIG_IPV6_OPTIMISTIC_DAD + struct neighbour *n; + struct rt6_info *rt; +#endif + int err; if (*dst == NULL) - *dst = ip6_route_output(net, sk, fl); + *dst = ip6_route_output(net, sk, fl6); if ((err = (*dst)->error)) goto out_err_release; - if (ipv6_addr_any(&fl->fl6_src)) { - err = ipv6_dev_get_saddr(net, ip6_dst_idev(*dst)->dev, - &fl->fl6_dst, - sk ? inet6_sk(sk)->srcprefs : 0, - &fl->fl6_src); + if (ipv6_addr_any(&fl6->saddr)) { + struct rt6_info *rt = (struct rt6_info *) *dst; + err = ip6_route_get_saddr(net, rt, &fl6->daddr, + sk ? inet6_sk(sk)->srcprefs : 0, + &fl6->saddr); if (err) goto out_err_release; } @@ -948,12 +925,18 @@ static int ip6_dst_lookup_tail(struct sock *sk, * dst entry and replace it instead with the * dst entry of the nexthop router */ - if ((*dst)->neighbour && !((*dst)->neighbour->nud_state & NUD_VALID)) { + rt = (struct rt6_info *) *dst; + rcu_read_lock_bh(); + n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt)); + err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0; + rcu_read_unlock_bh(); + + if (err) { struct inet6_ifaddr *ifp; - struct flowi fl_gw; + struct flowi6 fl_gw6; int redirect; - ifp = ipv6_get_ifaddr(net, &fl->fl6_src, + ifp = ipv6_get_ifaddr(net, &fl6->saddr, (*dst)->dev, 1); redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC); @@ -966,9 +949,9 @@ static int ip6_dst_lookup_tail(struct sock *sk, * default router instead */ dst_release(*dst); - memcpy(&fl_gw, fl, sizeof(struct flowi)); - memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr)); - *dst = ip6_route_output(net, sk, &fl_gw); + memcpy(&fl_gw6, fl6, sizeof(struct flowi6)); + memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr)); + *dst = ip6_route_output(net, sk, &fl_gw6); if ((err = (*dst)->error)) goto out_err_release; } @@ -979,7 +962,7 @@ static int ip6_dst_lookup_tail(struct sock *sk, out_err_release: if (err == -ENETUNREACH) - IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES); + IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES); dst_release(*dst); *dst = NULL; return err; @@ -989,52 +972,88 @@ out_err_release: * ip6_dst_lookup - perform route lookup on flow * @sk: socket which provides route info * @dst: pointer to dst_entry * for result - * @fl: flow to lookup + * @fl6: flow to lookup * * This function performs a route lookup on the given flow. * * It returns zero on success, or a standard errno code on error. */ -int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl) +int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6) { *dst = NULL; - return ip6_dst_lookup_tail(sk, dst, fl); + return ip6_dst_lookup_tail(sk, dst, fl6); } EXPORT_SYMBOL_GPL(ip6_dst_lookup); /** - * ip6_sk_dst_lookup - perform socket cached route lookup on flow + * ip6_dst_lookup_flow - perform route lookup on flow with ipsec + * @sk: socket which provides route info + * @fl6: flow to lookup + * @final_dst: final destination address for ipsec lookup + * + * This function performs a route lookup on the given flow. + * + * It returns a valid dst pointer on success, or a pointer encoded + * error code. + */ +struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, + const struct in6_addr *final_dst) +{ + struct dst_entry *dst = NULL; + int err; + + err = ip6_dst_lookup_tail(sk, &dst, fl6); + if (err) + return ERR_PTR(err); + if (final_dst) + fl6->daddr = *final_dst; + + return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); +} +EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow); + +/** + * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow * @sk: socket which provides the dst cache and route info - * @dst: pointer to dst_entry * for result - * @fl: flow to lookup + * @fl6: flow to lookup + * @final_dst: final destination address for ipsec lookup * * This function performs a route lookup on the given flow with the * possibility of using the cached route in the socket if it is valid. * It will take the socket dst lock when operating on the dst cache. * As a result, this function can only be used in process context. * - * It returns zero on success, or a standard errno code on error. + * It returns a valid dst pointer on success, or a pointer encoded + * error code. */ -int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl) +struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, + const struct in6_addr *final_dst) { - *dst = NULL; - if (sk) { - *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie); - *dst = ip6_sk_dst_check(sk, *dst, fl); - } + struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie); + int err; - return ip6_dst_lookup_tail(sk, dst, fl); + dst = ip6_sk_dst_check(sk, dst, fl6); + + err = ip6_dst_lookup_tail(sk, &dst, fl6); + if (err) + return ERR_PTR(err); + if (final_dst) + fl6->daddr = *final_dst; + + return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); } -EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup); +EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow); static inline int ip6_ufo_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, int length, int hh_len, int fragheaderlen, - int transhdrlen, int mtu,unsigned int flags) + int transhdrlen, int mtu,unsigned int flags, + struct rt6_info *rt) { struct sk_buff *skb; + struct frag_hdr fhdr; int err; /* There is support for UDP large send offload by network @@ -1046,7 +1065,7 @@ static inline int ip6_ufo_append_data(struct sock *sk, hh_len + fragheaderlen + transhdrlen + 20, (flags & MSG_DONTWAIT), &err); if (skb == NULL) - return -ENOMEM; + return err; /* reserve space for Hardware header */ skb_reserve(skb, hh_len); @@ -1060,34 +1079,27 @@ static inline int ip6_ufo_append_data(struct sock *sk, /* initialize protocol header pointer */ skb->transport_header = skb->network_header + fragheaderlen; - skb->ip_summed = CHECKSUM_PARTIAL; + skb->protocol = htons(ETH_P_IPV6); skb->csum = 0; - sk->sk_sndmsg_off = 0; - } - - err = skb_append_datato_frags(sk,skb, getfrag, from, - (length - transhdrlen)); - if (!err) { - struct frag_hdr fhdr; - /* Specify the length of each IPv6 datagram fragment. - * It has to be a multiple of 8. - */ - skb_shinfo(skb)->gso_size = (mtu - fragheaderlen - - sizeof(struct frag_hdr)) & ~7; - skb_shinfo(skb)->gso_type = SKB_GSO_UDP; - ipv6_select_ident(&fhdr); - skb_shinfo(skb)->ip6_frag_id = fhdr.identification; __skb_queue_tail(&sk->sk_write_queue, skb); - - return 0; + } else if (skb_is_gso(skb)) { + goto append; } - /* There is not enough support do UPD LSO, - * so follow normal path - */ - kfree_skb(skb); - return err; + skb->ip_summed = CHECKSUM_PARTIAL; + /* Specify the length of each IPv6 datagram fragment. + * It has to be a multiple of 8. + */ + skb_shinfo(skb)->gso_size = (mtu - fragheaderlen - + sizeof(struct frag_hdr)) & ~7; + skb_shinfo(skb)->gso_type = SKB_GSO_UDP; + ipv6_select_ident(&fhdr, rt); + skb_shinfo(skb)->ip6_frag_id = fhdr.identification; + +append: + return skb_append_datato_frags(sk, skb, getfrag, from, + (length - transhdrlen)); } static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src, @@ -1102,26 +1114,52 @@ static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src, return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL; } +static void ip6_append_data_mtu(unsigned int *mtu, + int *maxfraglen, + unsigned int fragheaderlen, + struct sk_buff *skb, + struct rt6_info *rt, + unsigned int orig_mtu) +{ + if (!(rt->dst.flags & DST_XFRM_TUNNEL)) { + if (skb == NULL) { + /* first fragment, reserve header_len */ + *mtu = orig_mtu - rt->dst.header_len; + + } else { + /* + * this fragment is not first, the headers + * space is regarded as data space. + */ + *mtu = orig_mtu; + } + *maxfraglen = ((*mtu - fragheaderlen) & ~7) + + fragheaderlen - sizeof(struct frag_hdr); + } +} + int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, int length, int transhdrlen, - int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl, - struct rt6_info *rt, unsigned int flags) + int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6, + struct rt6_info *rt, unsigned int flags, int dontfrag) { struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); - struct sk_buff *skb; - unsigned int maxfraglen, fragheaderlen; + struct inet_cork *cork; + struct sk_buff *skb, *skb_prev = NULL; + unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu; int exthdrlen; + int dst_exthdrlen; int hh_len; - int mtu; int copy; int err; int offset = 0; - int csummode = CHECKSUM_NONE; + __u8 tx_flags = 0; if (flags&MSG_PROBE) return 0; + cork = &inet->cork.base; if (skb_queue_empty(&sk->sk_write_queue)) { /* * setup for corking @@ -1130,7 +1168,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, if (WARN_ON(np->cork.opt)) return -EINVAL; - np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation); + np->cork.opt = kzalloc(opt->tot_len, sk->sk_allocation); if (unlikely(np->cork.opt == NULL)) return -ENOBUFS; @@ -1160,49 +1198,83 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, /* need source address above miyazawa*/ } - dst_hold(&rt->u.dst); - inet->cork.dst = &rt->u.dst; - inet->cork.fl = *fl; + dst_hold(&rt->dst); + cork->dst = &rt->dst; + inet->cork.fl.u.ip6 = *fl6; np->cork.hop_limit = hlimit; np->cork.tclass = tclass; - mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ? - rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path); + if (rt->dst.flags & DST_XFRM_TUNNEL) + mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? + rt->dst.dev->mtu : dst_mtu(&rt->dst); + else + mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? + rt->dst.dev->mtu : dst_mtu(rt->dst.path); if (np->frag_size < mtu) { if (np->frag_size) mtu = np->frag_size; } - inet->cork.fragsize = mtu; - if (dst_allfrag(rt->u.dst.path)) - inet->cork.flags |= IPCORK_ALLFRAG; - inet->cork.length = 0; - sk->sk_sndmsg_page = NULL; - sk->sk_sndmsg_off = 0; - exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) - - rt->rt6i_nfheader_len; + cork->fragsize = mtu; + if (dst_allfrag(rt->dst.path)) + cork->flags |= IPCORK_ALLFRAG; + cork->length = 0; + exthdrlen = (opt ? opt->opt_flen : 0); length += exthdrlen; transhdrlen += exthdrlen; + dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len; } else { - rt = (struct rt6_info *)inet->cork.dst; - fl = &inet->cork.fl; + rt = (struct rt6_info *)cork->dst; + fl6 = &inet->cork.fl.u.ip6; opt = np->cork.opt; transhdrlen = 0; exthdrlen = 0; - mtu = inet->cork.fragsize; + dst_exthdrlen = 0; + mtu = cork->fragsize; } + orig_mtu = mtu; - hh_len = LL_RESERVED_SPACE(rt->u.dst.dev); + hh_len = LL_RESERVED_SPACE(rt->dst.dev); fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len + (opt ? opt->opt_nflen : 0); - maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr); + maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - + sizeof(struct frag_hdr); if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) { - if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) { - ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen); + unsigned int maxnonfragsize, headersize; + + headersize = sizeof(struct ipv6hdr) + + (opt ? opt->opt_flen + opt->opt_nflen : 0) + + (dst_allfrag(&rt->dst) ? + sizeof(struct frag_hdr) : 0) + + rt->rt6i_nfheader_len; + + if (ip6_sk_ignore_df(sk)) + maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN; + else + maxnonfragsize = mtu; + + /* dontfrag active */ + if ((cork->length + length > mtu - headersize) && dontfrag && + (sk->sk_protocol == IPPROTO_UDP || + sk->sk_protocol == IPPROTO_RAW)) { + ipv6_local_rxpmtu(sk, fl6, mtu - headersize + + sizeof(struct ipv6hdr)); + goto emsgsize; + } + + if (cork->length + length > maxnonfragsize - headersize) { +emsgsize: + ipv6_local_error(sk, EMSGSIZE, fl6, + mtu - headersize + + sizeof(struct ipv6hdr)); return -EMSGSIZE; } } + /* For UDP, check if TX timestamp is enabled */ + if (sk->sk_type == SOCK_DGRAM) + sock_tx_timestamp(sk, &tx_flags); + /* * Let's try using as much space as possible. * Use MTU if total length of the message fits into the MTU. @@ -1219,24 +1291,26 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, * --yoshfuji */ - inet->cork.length += length; - if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) && - (rt->u.dst.dev->features & NETIF_F_UFO)) { - - err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len, - fragheaderlen, transhdrlen, mtu, - flags); + skb = skb_peek_tail(&sk->sk_write_queue); + cork->length += length; + if (((length > mtu) || + (skb && skb_is_gso(skb))) && + (sk->sk_protocol == IPPROTO_UDP) && + (rt->dst.dev->features & NETIF_F_UFO)) { + err = ip6_ufo_append_data(sk, getfrag, from, length, + hh_len, fragheaderlen, + transhdrlen, mtu, flags, rt); if (err) goto error; return 0; } - if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) + if (!skb) goto alloc_new_skb; while (length > 0) { /* Check if the remaining data fits into current packet. */ - copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len; + copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len; if (copy < length) copy = maxfraglen - skb->len; @@ -1246,38 +1320,46 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, unsigned int fraglen; unsigned int fraggap; unsigned int alloclen; - struct sk_buff *skb_prev; alloc_new_skb: - skb_prev = skb; - /* There's no room in the current skb */ - if (skb_prev) - fraggap = skb_prev->len - maxfraglen; + if (skb) + fraggap = skb->len - maxfraglen; else fraggap = 0; + /* update mtu and maxfraglen if necessary */ + if (skb == NULL || skb_prev == NULL) + ip6_append_data_mtu(&mtu, &maxfraglen, + fragheaderlen, skb, rt, + orig_mtu); + + skb_prev = skb; /* * If remaining data exceeds the mtu, * we know we need more fragment(s). */ datalen = length + fraggap; - if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen) - datalen = maxfraglen - fragheaderlen; - fraglen = datalen + fragheaderlen; + if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen) + datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len; if ((flags & MSG_MORE) && - !(rt->u.dst.dev->features&NETIF_F_SG)) + !(rt->dst.dev->features&NETIF_F_SG)) alloclen = mtu; else alloclen = datalen + fragheaderlen; - /* - * The last fragment gets additional space at tail. - * Note: we overallocate on fragments with MSG_MODE - * because we have no idea if we're the last one. - */ - if (datalen == length + fraggap) - alloclen += rt->u.dst.trailer_len; + alloclen += dst_exthdrlen; + + if (datalen != length + fraggap) { + /* + * this is not the last fragment, the trailer + * space is regarded as data space. + */ + datalen += rt->dst.trailer_len; + } + + alloclen += rt->dst.trailer_len; + fraglen = datalen + fragheaderlen; /* * We just reserve space for fragment header. @@ -1299,16 +1381,27 @@ alloc_new_skb: sk->sk_allocation); if (unlikely(skb == NULL)) err = -ENOBUFS; + else { + /* Only the initial fragment + * is time stamped. + */ + tx_flags = 0; + } } if (skb == NULL) goto error; /* * Fill in the control structures */ - skb->ip_summed = csummode; + skb->protocol = htons(ETH_P_IPV6); + skb->ip_summed = CHECKSUM_NONE; skb->csum = 0; - /* reserve for fragmentation */ - skb_reserve(skb, hh_len+sizeof(struct frag_hdr)); + /* reserve for fragmentation and ipsec header */ + skb_reserve(skb, hh_len + sizeof(struct frag_hdr) + + dst_exthdrlen); + + if (sk->sk_type == SOCK_DGRAM) + skb_shinfo(skb)->tx_flags = tx_flags; /* * Find where to start putting bytes @@ -1328,6 +1421,7 @@ alloc_new_skb: pskb_trim_unique(skb_prev, maxfraglen); } copy = datalen - transhdrlen - fraggap; + if (copy < 0) { err = -EINVAL; kfree_skb(skb); @@ -1342,7 +1436,7 @@ alloc_new_skb: length -= datalen - fraggap; transhdrlen = 0; exthdrlen = 0; - csummode = CHECKSUM_NONE; + dst_exthdrlen = 0; /* * Put the packet on the pending queue @@ -1354,7 +1448,7 @@ alloc_new_skb: if (copy > length) copy = length; - if (!(rt->u.dst.dev->features&NETIF_F_SG)) { + if (!(rt->dst.dev->features&NETIF_F_SG)) { unsigned int off; off = skb->len; @@ -1366,46 +1460,31 @@ alloc_new_skb: } } else { int i = skb_shinfo(skb)->nr_frags; - skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1]; - struct page *page = sk->sk_sndmsg_page; - int off = sk->sk_sndmsg_off; - unsigned int left; - - if (page && (left = PAGE_SIZE - off) > 0) { - if (copy >= left) - copy = left; - if (page != frag->page) { - if (i == MAX_SKB_FRAGS) { - err = -EMSGSIZE; - goto error; - } - get_page(page); - skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0); - frag = &skb_shinfo(skb)->frags[i]; - } - } else if(i < MAX_SKB_FRAGS) { - if (copy > PAGE_SIZE) - copy = PAGE_SIZE; - page = alloc_pages(sk->sk_allocation, 0); - if (page == NULL) { - err = -ENOMEM; - goto error; - } - sk->sk_sndmsg_page = page; - sk->sk_sndmsg_off = 0; + struct page_frag *pfrag = sk_page_frag(sk); - skb_fill_page_desc(skb, i, page, 0, 0); - frag = &skb_shinfo(skb)->frags[i]; - } else { - err = -EMSGSIZE; - goto error; - } - if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) { - err = -EFAULT; + err = -ENOMEM; + if (!sk_page_frag_refill(sk, pfrag)) goto error; + + if (!skb_can_coalesce(skb, i, pfrag->page, + pfrag->offset)) { + err = -EMSGSIZE; + if (i == MAX_SKB_FRAGS) + goto error; + + __skb_fill_page_desc(skb, i, pfrag->page, + pfrag->offset, 0); + skb_shinfo(skb)->nr_frags = ++i; + get_page(pfrag->page); } - sk->sk_sndmsg_off += copy; - frag->size += copy; + copy = min_t(int, copy, pfrag->size - pfrag->offset); + if (getfrag(from, + page_address(pfrag->page) + pfrag->offset, + offset, copy, skb->len, skb) < 0) + goto error_efault; + + pfrag->offset += copy; + skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); skb->len += copy; skb->data_len += copy; skb->truesize += copy; @@ -1414,12 +1493,17 @@ alloc_new_skb: offset += copy; length -= copy; } + return 0; + +error_efault: + err = -EFAULT; error: - inet->cork.length -= length; + cork->length -= length; IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); return err; } +EXPORT_SYMBOL_GPL(ip6_append_data); static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np) { @@ -1432,10 +1516,10 @@ static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np) np->cork.opt = NULL; } - if (inet->cork.dst) { - dst_release(inet->cork.dst); - inet->cork.dst = NULL; - inet->cork.flags &= ~IPCORK_ALLFRAG; + if (inet->cork.base.dst) { + dst_release(inet->cork.base.dst); + inet->cork.base.dst = NULL; + inet->cork.base.flags &= ~IPCORK_ALLFRAG; } memset(&inet->cork.fl, 0, sizeof(inet->cork.fl)); } @@ -1450,9 +1534,9 @@ int ip6_push_pending_frames(struct sock *sk) struct net *net = sock_net(sk); struct ipv6hdr *hdr; struct ipv6_txoptions *opt = np->cork.opt; - struct rt6_info *rt = (struct rt6_info *)inet->cork.dst; - struct flowi *fl = &inet->cork.fl; - unsigned char proto = fl->proto; + struct rt6_info *rt = (struct rt6_info *)inet->cork.base.dst; + struct flowi6 *fl6 = &inet->cork.fl.u.ip6; + unsigned char proto = fl6->flowi6_proto; int err = 0; if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL) @@ -1474,10 +1558,9 @@ int ip6_push_pending_frames(struct sock *sk) } /* Allow local fragmentation. */ - if (np->pmtudisc < IPV6_PMTUDISC_DO) - skb->local_df = 1; + skb->ignore_df = ip6_sk_ignore_df(sk); - ipv6_addr_copy(final_dst, &fl->fl6_dst); + *final_dst = fl6->daddr; __skb_pull(skb, skb_network_header_len(skb)); if (opt && opt->opt_flen) ipv6_push_frag_opts(skb, opt, &proto); @@ -1488,24 +1571,22 @@ int ip6_push_pending_frames(struct sock *sk) skb_reset_network_header(skb); hdr = ipv6_hdr(skb); - *(__be32*)hdr = fl->fl6_flowlabel | - htonl(0x60000000 | ((int)np->cork.tclass << 20)); - + ip6_flow_hdr(hdr, np->cork.tclass, fl6->flowlabel); hdr->hop_limit = np->cork.hop_limit; hdr->nexthdr = proto; - ipv6_addr_copy(&hdr->saddr, &fl->fl6_src); - ipv6_addr_copy(&hdr->daddr, final_dst); + hdr->saddr = fl6->saddr; + hdr->daddr = *final_dst; skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; - skb_dst_set(skb, dst_clone(&rt->u.dst)); + skb_dst_set(skb, dst_clone(&rt->dst)); IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); if (proto == IPPROTO_ICMPV6) { struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); - ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type); - ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS); + ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type); + ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); } err = ip6_local_out(skb); @@ -1523,6 +1604,7 @@ error: IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); goto out; } +EXPORT_SYMBOL_GPL(ip6_push_pending_frames); void ip6_flush_pending_frames(struct sock *sk) { @@ -1537,3 +1619,4 @@ void ip6_flush_pending_frames(struct sock *sk) ip6_cork_release(inet_sk(sk), inet6_sk(sk)); } +EXPORT_SYMBOL_GPL(ip6_flush_pending_frames); |
