diff options
Diffstat (limited to 'net/ipv6/ip6_output.c')
| -rw-r--r-- | net/ipv6/ip6_output.c | 248 |
1 files changed, 152 insertions, 96 deletions
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 3a692d52916..45702b8cd14 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -105,7 +105,7 @@ static int ip6_finish_output2(struct sk_buff *skb) } rcu_read_lock_bh(); - nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr); + nexthop = rt6_nexthop((struct rt6_info *)dst); neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop); if (unlikely(!neigh)) neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false); @@ -116,8 +116,8 @@ static int ip6_finish_output2(struct sk_buff *skb) } rcu_read_unlock_bh(); - IP6_INC_STATS_BH(dev_net(dst->dev), - ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); + IP6_INC_STATS(dev_net(dst->dev), + ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); kfree_skb(skb); return -EINVAL; } @@ -125,13 +125,14 @@ static int ip6_finish_output2(struct sk_buff *skb) static int ip6_finish_output(struct sk_buff *skb) { if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || - dst_allfrag(skb_dst(skb))) + dst_allfrag(skb_dst(skb)) || + (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size)) return ip6_fragment(skb, ip6_finish_output2); else return ip6_finish_output2(skb); } -int ip6_output(struct sk_buff *skb) +int ip6_output(struct sock *sk, struct sk_buff *skb) { struct net_device *dev = skb_dst(skb)->dev; struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); @@ -218,7 +219,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, skb->mark = sk->sk_mark; mtu = dst_mtu(dst); - if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) { + if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) { IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_OUT, skb->len); return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, @@ -320,6 +321,45 @@ static inline int ip6_forward_finish(struct sk_buff *skb) return dst_output(skb); } +static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst) +{ + unsigned int mtu; + struct inet6_dev *idev; + + if (dst_metric_locked(dst, RTAX_MTU)) { + mtu = dst_metric_raw(dst, RTAX_MTU); + if (mtu) + return mtu; + } + + mtu = IPV6_MIN_MTU; + rcu_read_lock(); + idev = __in6_dev_get(dst->dev); + if (idev) + mtu = idev->cnf.mtu6; + rcu_read_unlock(); + + return mtu; +} + +static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) +{ + if (skb->len <= mtu) + return false; + + /* ipv6 conntrack defrag sets max_frag_size + ignore_df */ + if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu) + return true; + + if (skb->ignore_df) + return false; + + if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu) + return false; + + return true; +} + int ip6_forward(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); @@ -331,17 +371,18 @@ int ip6_forward(struct sk_buff *skb) if (net->ipv6.devconf_all->forwarding == 0) goto error; + if (skb->pkt_type != PACKET_HOST) + goto drop; + if (skb_warn_if_lro(skb)) goto drop; if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) { - IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); + IP6_INC_STATS_BH(net, ip6_dst_idev(dst), + IPSTATS_MIB_INDISCARDS); goto drop; } - if (skb->pkt_type != PACKET_HOST) - goto drop; - skb_forward_csum(skb); /* @@ -369,8 +410,8 @@ int ip6_forward(struct sk_buff *skb) /* Force OUTPUT device used as source address */ skb->dev = dst->dev; icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0); - IP6_INC_STATS_BH(net, - ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS); + IP6_INC_STATS_BH(net, ip6_dst_idev(dst), + IPSTATS_MIB_INHDRERRORS); kfree_skb(skb); return -ETIMEDOUT; @@ -383,14 +424,15 @@ int ip6_forward(struct sk_buff *skb) if (proxied > 0) return ip6_input(skb); else if (proxied < 0) { - IP6_INC_STATS(net, ip6_dst_idev(dst), - IPSTATS_MIB_INDISCARDS); + IP6_INC_STATS_BH(net, ip6_dst_idev(dst), + IPSTATS_MIB_INDISCARDS); goto drop; } } if (!xfrm6_route_forward(skb)) { - IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); + IP6_INC_STATS_BH(net, ip6_dst_idev(dst), + IPSTATS_MIB_INDISCARDS); goto drop; } dst = skb_dst(skb); @@ -438,25 +480,25 @@ int ip6_forward(struct sk_buff *skb) } } - mtu = dst_mtu(dst); + mtu = ip6_dst_mtu_forward(dst); if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; - if ((!skb->local_df && skb->len > mtu && !skb_is_gso(skb)) || - (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)) { + if (ip6_pkt_too_big(skb, mtu)) { /* Again, force OUTPUT device used as source address */ skb->dev = dst->dev; icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); - IP6_INC_STATS_BH(net, - ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS); - IP6_INC_STATS_BH(net, - ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS); + IP6_INC_STATS_BH(net, ip6_dst_idev(dst), + IPSTATS_MIB_INTOOBIGERRORS); + IP6_INC_STATS_BH(net, ip6_dst_idev(dst), + IPSTATS_MIB_FRAGFAILS); kfree_skb(skb); return -EMSGSIZE; } if (skb_cow(skb, dst->dev->hard_header_len)) { - IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS); + IP6_INC_STATS_BH(net, ip6_dst_idev(dst), + IPSTATS_MIB_OUTDISCARDS); goto drop; } @@ -492,12 +534,23 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) to->tc_index = from->tc_index; #endif nf_copy(to, from); -#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) - to->nf_trace = from->nf_trace; -#endif skb_copy_secmark(to, from); } +static void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt) +{ + static u32 ip6_idents_hashrnd __read_mostly; + u32 hash, id; + + net_get_random_once(&ip6_idents_hashrnd, sizeof(ip6_idents_hashrnd)); + + hash = __ipv6_addr_jhash(&rt->rt6i_dst.addr, ip6_idents_hashrnd); + hash = __ipv6_addr_jhash(&rt->rt6i_src.addr, hash); + + id = ip_idents_reserve(hash, 1); + fhdr->identification = htonl(id); +} + int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) { struct sk_buff *frag; @@ -520,7 +573,7 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) /* We must not fragment if the socket is set to force MTU discovery * or if the skb it not generated by a local socket. */ - if (unlikely(!skb->local_df && skb->len > mtu) || + if (unlikely(!skb->ignore_df && skb->len > mtu) || (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)) { if (skb->sk && dst_allfrag(skb_dst(skb))) @@ -874,7 +927,7 @@ static int ip6_dst_lookup_tail(struct sock *sk, */ rt = (struct rt6_info *) *dst; rcu_read_lock_bh(); - n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt, &fl6->daddr)); + n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt)); err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0; rcu_read_unlock_bh(); @@ -909,7 +962,7 @@ static int ip6_dst_lookup_tail(struct sock *sk, out_err_release: if (err == -ENETUNREACH) - IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES); + IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES); dst_release(*dst); *dst = NULL; return err; @@ -937,7 +990,6 @@ EXPORT_SYMBOL_GPL(ip6_dst_lookup); * @sk: socket which provides route info * @fl6: flow to lookup * @final_dst: final destination address for ipsec lookup - * @can_sleep: we are in a sleepable context * * This function performs a route lookup on the given flow. * @@ -945,8 +997,7 @@ EXPORT_SYMBOL_GPL(ip6_dst_lookup); * error code. */ struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, - const struct in6_addr *final_dst, - bool can_sleep) + const struct in6_addr *final_dst) { struct dst_entry *dst = NULL; int err; @@ -956,8 +1007,6 @@ struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, return ERR_PTR(err); if (final_dst) fl6->daddr = *final_dst; - if (can_sleep) - fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP; return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); } @@ -968,7 +1017,6 @@ EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow); * @sk: socket which provides the dst cache and route info * @fl6: flow to lookup * @final_dst: final destination address for ipsec lookup - * @can_sleep: we are in a sleepable context * * This function performs a route lookup on the given flow with the * possibility of using the cached route in the socket if it is valid. @@ -979,8 +1027,7 @@ EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow); * error code. */ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, - const struct in6_addr *final_dst, - bool can_sleep) + const struct in6_addr *final_dst) { struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie); int err; @@ -992,8 +1039,6 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, return ERR_PTR(err); if (final_dst) fl6->daddr = *final_dst; - if (can_sleep) - fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP; return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); } @@ -1008,6 +1053,7 @@ static inline int ip6_ufo_append_data(struct sock *sk, { struct sk_buff *skb; + struct frag_hdr fhdr; int err; /* There is support for UDP large send offload by network @@ -1034,33 +1080,26 @@ static inline int ip6_ufo_append_data(struct sock *sk, skb->transport_header = skb->network_header + fragheaderlen; skb->protocol = htons(ETH_P_IPV6); - skb->ip_summed = CHECKSUM_PARTIAL; skb->csum = 0; - } - err = skb_append_datato_frags(sk,skb, getfrag, from, - (length - transhdrlen)); - if (!err) { - struct frag_hdr fhdr; - - /* Specify the length of each IPv6 datagram fragment. - * It has to be a multiple of 8. - */ - skb_shinfo(skb)->gso_size = (mtu - fragheaderlen - - sizeof(struct frag_hdr)) & ~7; - skb_shinfo(skb)->gso_type = SKB_GSO_UDP; - ipv6_select_ident(&fhdr, rt); - skb_shinfo(skb)->ip6_frag_id = fhdr.identification; __skb_queue_tail(&sk->sk_write_queue, skb); - - return 0; + } else if (skb_is_gso(skb)) { + goto append; } - /* There is not enough support do UPD LSO, - * so follow normal path - */ - kfree_skb(skb); - return err; + skb->ip_summed = CHECKSUM_PARTIAL; + /* Specify the length of each IPv6 datagram fragment. + * It has to be a multiple of 8. + */ + skb_shinfo(skb)->gso_size = (mtu - fragheaderlen - + sizeof(struct frag_hdr)) & ~7; + skb_shinfo(skb)->gso_type = SKB_GSO_UDP; + ipv6_select_ident(&fhdr, rt); + skb_shinfo(skb)->ip6_frag_id = fhdr.identification; + +append: + return skb_append_datato_frags(sk, skb, getfrag, from, + (length - transhdrlen)); } static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src, @@ -1080,21 +1119,19 @@ static void ip6_append_data_mtu(unsigned int *mtu, unsigned int fragheaderlen, struct sk_buff *skb, struct rt6_info *rt, - bool pmtuprobe) + unsigned int orig_mtu) { if (!(rt->dst.flags & DST_XFRM_TUNNEL)) { if (skb == NULL) { /* first fragment, reserve header_len */ - *mtu = *mtu - rt->dst.header_len; + *mtu = orig_mtu - rt->dst.header_len; } else { /* * this fragment is not first, the headers * space is regarded as data space. */ - *mtu = min(*mtu, pmtuprobe ? - rt->dst.dev->mtu : - dst_mtu(rt->dst.path)); + *mtu = orig_mtu; } *maxfraglen = ((*mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr); @@ -1111,7 +1148,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, struct ipv6_pinfo *np = inet6_sk(sk); struct inet_cork *cork; struct sk_buff *skb, *skb_prev = NULL; - unsigned int maxfraglen, fragheaderlen, mtu; + unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu; int exthdrlen; int dst_exthdrlen; int hh_len; @@ -1167,10 +1204,10 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, np->cork.hop_limit = hlimit; np->cork.tclass = tclass; if (rt->dst.flags & DST_XFRM_TUNNEL) - mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ? + mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? rt->dst.dev->mtu : dst_mtu(&rt->dst); else - mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ? + mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? rt->dst.dev->mtu : dst_mtu(rt->dst.path); if (np->frag_size < mtu) { if (np->frag_size) @@ -1193,16 +1230,43 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, dst_exthdrlen = 0; mtu = cork->fragsize; } + orig_mtu = mtu; hh_len = LL_RESERVED_SPACE(rt->dst.dev); fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len + (opt ? opt->opt_nflen : 0); - maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr); + maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - + sizeof(struct frag_hdr); if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) { - if (cork->length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) { - ipv6_local_error(sk, EMSGSIZE, fl6, mtu-exthdrlen); + unsigned int maxnonfragsize, headersize; + + headersize = sizeof(struct ipv6hdr) + + (opt ? opt->opt_flen + opt->opt_nflen : 0) + + (dst_allfrag(&rt->dst) ? + sizeof(struct frag_hdr) : 0) + + rt->rt6i_nfheader_len; + + if (ip6_sk_ignore_df(sk)) + maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN; + else + maxnonfragsize = mtu; + + /* dontfrag active */ + if ((cork->length + length > mtu - headersize) && dontfrag && + (sk->sk_protocol == IPPROTO_UDP || + sk->sk_protocol == IPPROTO_RAW)) { + ipv6_local_rxpmtu(sk, fl6, mtu - headersize + + sizeof(struct ipv6hdr)); + goto emsgsize; + } + + if (cork->length + length > maxnonfragsize - headersize) { +emsgsize: + ipv6_local_error(sk, EMSGSIZE, fl6, + mtu - headersize + + sizeof(struct ipv6hdr)); return -EMSGSIZE; } } @@ -1227,27 +1291,21 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, * --yoshfuji */ + skb = skb_peek_tail(&sk->sk_write_queue); cork->length += length; - if (length > mtu) { - int proto = sk->sk_protocol; - if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){ - ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen); - return -EMSGSIZE; - } - - if (proto == IPPROTO_UDP && - (rt->dst.dev->features & NETIF_F_UFO)) { - - err = ip6_ufo_append_data(sk, getfrag, from, length, - hh_len, fragheaderlen, - transhdrlen, mtu, flags, rt); - if (err) - goto error; - return 0; - } + if (((length > mtu) || + (skb && skb_is_gso(skb))) && + (sk->sk_protocol == IPPROTO_UDP) && + (rt->dst.dev->features & NETIF_F_UFO)) { + err = ip6_ufo_append_data(sk, getfrag, from, length, + hh_len, fragheaderlen, + transhdrlen, mtu, flags, rt); + if (err) + goto error; + return 0; } - if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) + if (!skb) goto alloc_new_skb; while (length > 0) { @@ -1272,8 +1330,7 @@ alloc_new_skb: if (skb == NULL || skb_prev == NULL) ip6_append_data_mtu(&mtu, &maxfraglen, fragheaderlen, skb, rt, - np->pmtudisc == - IPV6_PMTUDISC_PROBE); + orig_mtu); skb_prev = skb; @@ -1501,8 +1558,7 @@ int ip6_push_pending_frames(struct sock *sk) } /* Allow local fragmentation. */ - if (np->pmtudisc < IPV6_PMTUDISC_DO) - skb->local_df = 1; + skb->ignore_df = ip6_sk_ignore_df(sk); *final_dst = fl6->daddr; __skb_pull(skb, skb_network_header_len(skb)); @@ -1529,8 +1585,8 @@ int ip6_push_pending_frames(struct sock *sk) if (proto == IPPROTO_ICMPV6) { struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); - ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type); - ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS); + ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type); + ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); } err = ip6_local_out(skb); |
