diff options
Diffstat (limited to 'net/ipv6')
59 files changed, 2203 insertions, 1375 deletions
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index d92e5586783..438a73aa777 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -138,6 +138,7 @@ config INET6_XFRM_MODE_ROUTEOPTIMIZATION config IPV6_VTI tristate "Virtual (secure) IPv6: tunneling" select IPV6_TUNNEL + select NET_IP_TUNNEL depends on INET6_XFRM_MODE_TUNNEL ---help--- Tunneling means encapsulating data of one protocol type within diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 17bb830872d..2fe68364bb2 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -16,7 +16,7 @@ ipv6-$(CONFIG_SYSCTL) = sysctl_net_ipv6.o ipv6-$(CONFIG_IPV6_MROUTE) += ip6mr.o ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \ - xfrm6_output.o + xfrm6_output.o xfrm6_protocol.o ipv6-$(CONFIG_NETFILTER) += netfilter.o ipv6-$(CONFIG_IPV6_MULTIPLE_TABLES) += fib6_rules.o ipv6-$(CONFIG_PROC_FS) += proc.o diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index be4dbbd17d3..5667b3003af 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -133,10 +133,12 @@ static int ipv6_count_addresses(struct inet6_dev *idev); static struct hlist_head inet6_addr_lst[IN6_ADDR_HSIZE]; static DEFINE_SPINLOCK(addrconf_hash_lock); -static void addrconf_verify(unsigned long); +static void addrconf_verify(void); +static void addrconf_verify_rtnl(void); +static void addrconf_verify_work(struct work_struct *); -static DEFINE_TIMER(addr_chk_timer, addrconf_verify, 0, 0); -static DEFINE_SPINLOCK(addrconf_verify_lock); +static struct workqueue_struct *addrconf_wq; +static DECLARE_DELAYED_WORK(addr_chk_work, addrconf_verify_work); static void addrconf_join_anycast(struct inet6_ifaddr *ifp); static void addrconf_leave_anycast(struct inet6_ifaddr *ifp); @@ -151,7 +153,7 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx, u32 flags, u32 noflags); static void addrconf_dad_start(struct inet6_ifaddr *ifp); -static void addrconf_dad_timer(unsigned long data); +static void addrconf_dad_work(struct work_struct *w); static void addrconf_dad_completed(struct inet6_ifaddr *ifp); static void addrconf_dad_run(struct inet6_dev *idev); static void addrconf_rs_timer(unsigned long data); @@ -247,9 +249,9 @@ static void addrconf_del_rs_timer(struct inet6_dev *idev) __in6_dev_put(idev); } -static void addrconf_del_dad_timer(struct inet6_ifaddr *ifp) +static void addrconf_del_dad_work(struct inet6_ifaddr *ifp) { - if (del_timer(&ifp->dad_timer)) + if (cancel_delayed_work(&ifp->dad_work)) __in6_ifa_put(ifp); } @@ -261,31 +263,26 @@ static void addrconf_mod_rs_timer(struct inet6_dev *idev, mod_timer(&idev->rs_timer, jiffies + when); } -static void addrconf_mod_dad_timer(struct inet6_ifaddr *ifp, - unsigned long when) +static void addrconf_mod_dad_work(struct inet6_ifaddr *ifp, + unsigned long delay) { - if (!timer_pending(&ifp->dad_timer)) + if (!delayed_work_pending(&ifp->dad_work)) in6_ifa_hold(ifp); - mod_timer(&ifp->dad_timer, jiffies + when); + mod_delayed_work(addrconf_wq, &ifp->dad_work, delay); } static int snmp6_alloc_dev(struct inet6_dev *idev) { int i; - if (snmp_mib_init((void __percpu **)idev->stats.ipv6, - sizeof(struct ipstats_mib), - __alignof__(struct ipstats_mib)) < 0) + idev->stats.ipv6 = alloc_percpu(struct ipstats_mib); + if (!idev->stats.ipv6) goto err_ip; for_each_possible_cpu(i) { struct ipstats_mib *addrconf_stats; - addrconf_stats = per_cpu_ptr(idev->stats.ipv6[0], i); + addrconf_stats = per_cpu_ptr(idev->stats.ipv6, i); u64_stats_init(&addrconf_stats->syncp); -#if SNMP_ARRAY_SZ == 2 - addrconf_stats = per_cpu_ptr(idev->stats.ipv6[1], i); - u64_stats_init(&addrconf_stats->syncp); -#endif } @@ -303,7 +300,7 @@ static int snmp6_alloc_dev(struct inet6_dev *idev) err_icmpmsg: kfree(idev->stats.icmpv6dev); err_icmp: - snmp_mib_free((void __percpu **)idev->stats.ipv6); + free_percpu(idev->stats.ipv6); err_ip: return -ENOMEM; } @@ -442,6 +439,8 @@ static int inet6_netconf_msgsize_devconf(int type) if (type == -1 || type == NETCONFA_MC_FORWARDING) size += nla_total_size(4); #endif + if (type == -1 || type == NETCONFA_PROXY_NEIGH) + size += nla_total_size(4); return size; } @@ -475,6 +474,10 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex, devconf->mc_forwarding) < 0) goto nla_put_failure; #endif + if ((type == -1 || type == NETCONFA_PROXY_NEIGH) && + nla_put_s32(skb, NETCONFA_PROXY_NEIGH, devconf->proxy_ndp) < 0) + goto nla_put_failure; + return nlmsg_end(skb, nlh); nla_put_failure: @@ -509,6 +512,7 @@ errout: static const struct nla_policy devconf_ipv6_policy[NETCONFA_MAX+1] = { [NETCONFA_IFINDEX] = { .len = sizeof(int) }, [NETCONFA_FORWARDING] = { .len = sizeof(int) }, + [NETCONFA_PROXY_NEIGH] = { .len = sizeof(int) }, }; static int inet6_netconf_get_devconf(struct sk_buff *in_skb, @@ -744,8 +748,9 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp) in6_dev_put(ifp->idev); - if (del_timer(&ifp->dad_timer)) - pr_notice("Timer is still running, when freeing ifa=%p\n", ifp); + if (cancel_delayed_work(&ifp->dad_work)) + pr_notice("delayed DAD work was pending while freeing ifa=%p\n", + ifp); if (ifp->state != INET6_IFADDR_STATE_DEAD) { pr_warn("Freeing alive inet6 address %p\n", ifp); @@ -842,8 +847,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, spin_lock_init(&ifa->lock); spin_lock_init(&ifa->state_lock); - setup_timer(&ifa->dad_timer, addrconf_dad_timer, - (unsigned long)ifa); + INIT_DELAYED_WORK(&ifa->dad_work, addrconf_dad_work); INIT_HLIST_NODE(&ifa->addr_lst); ifa->scope = scope; ifa->prefix_len = pfxlen; @@ -893,15 +897,97 @@ out: goto out2; } +enum cleanup_prefix_rt_t { + CLEANUP_PREFIX_RT_NOP, /* no cleanup action for prefix route */ + CLEANUP_PREFIX_RT_DEL, /* delete the prefix route */ + CLEANUP_PREFIX_RT_EXPIRE, /* update the lifetime of the prefix route */ +}; + +/* + * Check, whether the prefix for ifp would still need a prefix route + * after deleting ifp. The function returns one of the CLEANUP_PREFIX_RT_* + * constants. + * + * 1) we don't purge prefix if address was not permanent. + * prefix is managed by its own lifetime. + * 2) we also don't purge, if the address was IFA_F_NOPREFIXROUTE. + * 3) if there are no addresses, delete prefix. + * 4) if there are still other permanent address(es), + * corresponding prefix is still permanent. + * 5) if there are still other addresses with IFA_F_NOPREFIXROUTE, + * don't purge the prefix, assume user space is managing it. + * 6) otherwise, update prefix lifetime to the + * longest valid lifetime among the corresponding + * addresses on the device. + * Note: subsequent RA will update lifetime. + **/ +static enum cleanup_prefix_rt_t +check_cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long *expires) +{ + struct inet6_ifaddr *ifa; + struct inet6_dev *idev = ifp->idev; + unsigned long lifetime; + enum cleanup_prefix_rt_t action = CLEANUP_PREFIX_RT_DEL; + + *expires = jiffies; + + list_for_each_entry(ifa, &idev->addr_list, if_list) { + if (ifa == ifp) + continue; + if (!ipv6_prefix_equal(&ifa->addr, &ifp->addr, + ifp->prefix_len)) + continue; + if (ifa->flags & (IFA_F_PERMANENT | IFA_F_NOPREFIXROUTE)) + return CLEANUP_PREFIX_RT_NOP; + + action = CLEANUP_PREFIX_RT_EXPIRE; + + spin_lock(&ifa->lock); + + lifetime = addrconf_timeout_fixup(ifa->valid_lft, HZ); + /* + * Note: Because this address is + * not permanent, lifetime < + * LONG_MAX / HZ here. + */ + if (time_before(*expires, ifa->tstamp + lifetime * HZ)) + *expires = ifa->tstamp + lifetime * HZ; + spin_unlock(&ifa->lock); + } + + return action; +} + +static void +cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires, bool del_rt) +{ + struct rt6_info *rt; + + rt = addrconf_get_prefix_route(&ifp->addr, + ifp->prefix_len, + ifp->idev->dev, + 0, RTF_GATEWAY | RTF_DEFAULT); + if (rt) { + if (del_rt) + ip6_del_rt(rt); + else { + if (!(rt->rt6i_flags & RTF_EXPIRES)) + rt6_set_expires(rt, expires); + ip6_rt_put(rt); + } + } +} + + /* This function wants to get referenced ifp and releases it before return */ static void ipv6_del_addr(struct inet6_ifaddr *ifp) { - struct inet6_ifaddr *ifa, *ifn; - struct inet6_dev *idev = ifp->idev; int state; - int deleted = 0, onlink = 0; - unsigned long expires = jiffies; + enum cleanup_prefix_rt_t action = CLEANUP_PREFIX_RT_NOP; + unsigned long expires; + + ASSERT_RTNL(); spin_lock_bh(&ifp->state_lock); state = ifp->state; @@ -915,7 +1001,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) hlist_del_init_rcu(&ifp->addr_lst); spin_unlock_bh(&addrconf_hash_lock); - write_lock_bh(&idev->lock); + write_lock_bh(&ifp->idev->lock); if (ifp->flags&IFA_F_TEMPORARY) { list_del(&ifp->tmp_list); @@ -926,87 +1012,23 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) __in6_ifa_put(ifp); } - list_for_each_entry_safe(ifa, ifn, &idev->addr_list, if_list) { - if (ifa == ifp) { - list_del_init(&ifp->if_list); - __in6_ifa_put(ifp); + if (ifp->flags & IFA_F_PERMANENT && !(ifp->flags & IFA_F_NOPREFIXROUTE)) + action = check_cleanup_prefix_route(ifp, &expires); - if (!(ifp->flags & IFA_F_PERMANENT) || onlink > 0) - break; - deleted = 1; - continue; - } else if (ifp->flags & IFA_F_PERMANENT) { - if (ipv6_prefix_equal(&ifa->addr, &ifp->addr, - ifp->prefix_len)) { - if (ifa->flags & IFA_F_PERMANENT) { - onlink = 1; - if (deleted) - break; - } else { - unsigned long lifetime; - - if (!onlink) - onlink = -1; - - spin_lock(&ifa->lock); - - lifetime = addrconf_timeout_fixup(ifa->valid_lft, HZ); - /* - * Note: Because this address is - * not permanent, lifetime < - * LONG_MAX / HZ here. - */ - if (time_before(expires, - ifa->tstamp + lifetime * HZ)) - expires = ifa->tstamp + lifetime * HZ; - spin_unlock(&ifa->lock); - } - } - } - } - write_unlock_bh(&idev->lock); + list_del_init(&ifp->if_list); + __in6_ifa_put(ifp); + + write_unlock_bh(&ifp->idev->lock); - addrconf_del_dad_timer(ifp); + addrconf_del_dad_work(ifp); ipv6_ifa_notify(RTM_DELADDR, ifp); inet6addr_notifier_call_chain(NETDEV_DOWN, ifp); - /* - * Purge or update corresponding prefix - * - * 1) we don't purge prefix here if address was not permanent. - * prefix is managed by its own lifetime. - * 2) if there're no addresses, delete prefix. - * 3) if there're still other permanent address(es), - * corresponding prefix is still permanent. - * 4) otherwise, update prefix lifetime to the - * longest valid lifetime among the corresponding - * addresses on the device. - * Note: subsequent RA will update lifetime. - * - * --yoshfuji - */ - if ((ifp->flags & IFA_F_PERMANENT) && onlink < 1) { - struct in6_addr prefix; - struct rt6_info *rt; - - ipv6_addr_prefix(&prefix, &ifp->addr, ifp->prefix_len); - - rt = addrconf_get_prefix_route(&prefix, - ifp->prefix_len, - ifp->idev->dev, - 0, RTF_GATEWAY | RTF_DEFAULT); - - if (rt) { - if (onlink == 0) { - ip6_del_rt(rt); - rt = NULL; - } else if (!(rt->rt6i_flags & RTF_EXPIRES)) { - rt6_set_expires(rt, expires); - } - } - ip6_rt_put(rt); + if (action != CLEANUP_PREFIX_RT_NOP) { + cleanup_prefix_route(ifp, expires, + action == CLEANUP_PREFIX_RT_DEL); } /* clean up prefsrc entries */ @@ -1080,8 +1102,11 @@ retry: * Lifetime is greater than REGEN_ADVANCE time units. In particular, * an implementation must not create a temporary address with a zero * Preferred Lifetime. + * Use age calculation as in addrconf_verify to avoid unnecessary + * temporary addresses being generated. */ - if (tmp_prefered_lft <= regen_advance) { + age = (now - tmp_tstamp + ADDRCONF_TIMER_FUZZ_MINUS) / HZ; + if (tmp_prefered_lft <= regen_advance + age) { in6_ifa_put(ifp); in6_dev_put(idev); ret = -1; @@ -1202,7 +1227,7 @@ static int ipv6_get_saddr_eval(struct net *net, * | d is scope of the destination. * B-d | \ * | \ <- smaller scope is better if - * B-15 | \ if scope is enough for destinaion. + * B-15 | \ if scope is enough for destination. * | ret = B - scope (-1 <= scope >= d <= 15). * d-C-1 | / * |/ <- greater is better @@ -1414,7 +1439,9 @@ int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr, struct inet6_ifaddr *ifp; int err = -EADDRNOTAVAIL; - list_for_each_entry(ifp, &idev->addr_list, if_list) { + list_for_each_entry_reverse(ifp, &idev->addr_list, if_list) { + if (ifp->scope > IFA_LINK) + break; if (ifp->scope == IFA_LINK && !(ifp->flags & banned_flags)) { *addr = ifp->addr; @@ -1576,7 +1603,7 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed) { if (ifp->flags&IFA_F_PERMANENT) { spin_lock_bh(&ifp->lock); - addrconf_del_dad_timer(ifp); + addrconf_del_dad_work(ifp); ifp->flags |= IFA_F_TENTATIVE; if (dad_failed) ifp->flags |= IFA_F_DADFAILED; @@ -1597,20 +1624,21 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed) spin_unlock_bh(&ifp->lock); } ipv6_del_addr(ifp); - } else + } else { ipv6_del_addr(ifp); + } } static int addrconf_dad_end(struct inet6_ifaddr *ifp) { int err = -ENOENT; - spin_lock(&ifp->state_lock); + spin_lock_bh(&ifp->state_lock); if (ifp->state == INET6_IFADDR_STATE_DAD) { ifp->state = INET6_IFADDR_STATE_POSTDAD; err = 0; } - spin_unlock(&ifp->state_lock); + spin_unlock_bh(&ifp->state_lock); return err; } @@ -1643,7 +1671,12 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp) } } - addrconf_dad_stop(ifp, 1); + spin_lock_bh(&ifp->state_lock); + /* transition from _POSTDAD to _ERRDAD */ + ifp->state = INET6_IFADDR_STATE_ERRDAD; + spin_unlock_bh(&ifp->state_lock); + + addrconf_mod_dad_work(ifp, 0); } /* Join to solicited addr multicast group. */ @@ -1652,6 +1685,8 @@ void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr) { struct in6_addr maddr; + ASSERT_RTNL(); + if (dev->flags&(IFF_LOOPBACK|IFF_NOARP)) return; @@ -1663,6 +1698,8 @@ void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr) { struct in6_addr maddr; + ASSERT_RTNL(); + if (idev->dev->flags&(IFF_LOOPBACK|IFF_NOARP)) return; @@ -1673,7 +1710,10 @@ void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr) static void addrconf_join_anycast(struct inet6_ifaddr *ifp) { struct in6_addr addr; - if (ifp->prefix_len == 127) /* RFC 6164 */ + + ASSERT_RTNL(); + + if (ifp->prefix_len >= 127) /* RFC 6164 */ return; ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len); if (ipv6_addr_any(&addr)) @@ -1684,7 +1724,10 @@ static void addrconf_join_anycast(struct inet6_ifaddr *ifp) static void addrconf_leave_anycast(struct inet6_ifaddr *ifp) { struct in6_addr addr; - if (ifp->prefix_len == 127) /* RFC 6164 */ + + ASSERT_RTNL(); + + if (ifp->prefix_len >= 127) /* RFC 6164 */ return; ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len); if (ipv6_addr_any(&addr)) @@ -1818,6 +1861,7 @@ static int ipv6_generate_eui64(u8 *eui, struct net_device *dev) return addrconf_ifid_sit(eui, dev); case ARPHRD_IPGRE: return addrconf_ifid_gre(eui, dev); + case ARPHRD_6LOWPAN: case ARPHRD_IEEE802154: return addrconf_ifid_eui64(eui, dev); case ARPHRD_IEEE1394: @@ -1834,7 +1878,9 @@ static int ipv6_inherit_eui64(u8 *eui, struct inet6_dev *idev) struct inet6_ifaddr *ifp; read_lock_bh(&idev->lock); - list_for_each_entry(ifp, &idev->addr_list, if_list) { + list_for_each_entry_reverse(ifp, &idev->addr_list, if_list) { + if (ifp->scope > IFA_LINK) + break; if (ifp->scope == IFA_LINK && !(ifp->flags&IFA_F_TENTATIVE)) { memcpy(eui, ifp->addr.s6_addr+8, 8); err = 0; @@ -2240,11 +2286,13 @@ ok: return; } - ifp->flags |= IFA_F_MANAGETEMPADDR; update_lft = 0; create = 1; + spin_lock_bh(&ifp->lock); + ifp->flags |= IFA_F_MANAGETEMPADDR; ifp->cstamp = jiffies; ifp->tokenized = tokenized; + spin_unlock_bh(&ifp->lock); addrconf_dad_start(ifp); } @@ -2295,7 +2343,7 @@ ok: create, now); in6_ifa_put(ifp); - addrconf_verify(0); + addrconf_verify(); } } inet6_prefix_notify(RTM_NEWPREFIX, in6_dev, pinfo); @@ -2429,8 +2477,11 @@ static int inet6_addr_add(struct net *net, int ifindex, valid_lft, prefered_lft); if (!IS_ERR(ifp)) { - addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev, - expires, flags); + if (!(ifa_flags & IFA_F_NOPREFIXROUTE)) { + addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev, + expires, flags); + } + /* * Note that section 3.1 of RFC 4429 indicates * that the Optimistic flag should not be set for @@ -2441,15 +2492,15 @@ static int inet6_addr_add(struct net *net, int ifindex, manage_tempaddrs(idev, ifp, valid_lft, prefered_lft, true, jiffies); in6_ifa_put(ifp); - addrconf_verify(0); + addrconf_verify_rtnl(); return 0; } return PTR_ERR(ifp); } -static int inet6_addr_del(struct net *net, int ifindex, const struct in6_addr *pfx, - unsigned int plen) +static int inet6_addr_del(struct net *net, int ifindex, u32 ifa_flags, + const struct in6_addr *pfx, unsigned int plen) { struct inet6_ifaddr *ifp; struct inet6_dev *idev; @@ -2472,7 +2523,12 @@ static int inet6_addr_del(struct net *net, int ifindex, const struct in6_addr *p in6_ifa_hold(ifp); read_unlock_bh(&idev->lock); + if (!(ifp->flags & IFA_F_TEMPORARY) && + (ifa_flags & IFA_F_MANAGETEMPADDR)) + manage_tempaddrs(idev, ifp, 0, 0, false, + jiffies); ipv6_del_addr(ifp); + addrconf_verify_rtnl(); return 0; } } @@ -2512,7 +2568,7 @@ int addrconf_del_ifaddr(struct net *net, void __user *arg) return -EFAULT; rtnl_lock(); - err = inet6_addr_del(net, ireq.ifr6_ifindex, &ireq.ifr6_addr, + err = inet6_addr_del(net, ireq.ifr6_ifindex, 0, &ireq.ifr6_addr, ireq.ifr6_prefixlen); rtnl_unlock(); return err; @@ -2524,7 +2580,8 @@ static void add_addr(struct inet6_dev *idev, const struct in6_addr *addr, struct inet6_ifaddr *ifp; ifp = ipv6_add_addr(idev, addr, NULL, plen, - scope, IFA_F_PERMANENT, 0, 0); + scope, IFA_F_PERMANENT, + INFINITY_LIFE_TIME, INFINITY_LIFE_TIME); if (!IS_ERR(ifp)) { spin_lock_bh(&ifp->lock); ifp->flags &= ~IFA_F_TENTATIVE; @@ -2625,8 +2682,18 @@ static void init_loopback(struct net_device *dev) if (sp_ifa->flags & (IFA_F_DADFAILED | IFA_F_TENTATIVE)) continue; - if (sp_ifa->rt) - continue; + if (sp_ifa->rt) { + /* This dst has been added to garbage list when + * lo device down, release this obsolete dst and + * reallocate a new router for ifa. + */ + if (sp_ifa->rt->dst.obsolete > 0) { + ip6_rt_put(sp_ifa->rt); + sp_ifa->rt = NULL; + } else { + continue; + } + } sp_rt = addrconf_dst_alloc(idev, &sp_ifa->addr, false); @@ -2652,7 +2719,8 @@ static void addrconf_add_linklocal(struct inet6_dev *idev, const struct in6_addr #endif - ifp = ipv6_add_addr(idev, addr, NULL, 64, IFA_LINK, addr_flags, 0, 0); + ifp = ipv6_add_addr(idev, addr, NULL, 64, IFA_LINK, addr_flags, + INFINITY_LIFE_TIME, INFINITY_LIFE_TIME); if (!IS_ERR(ifp)) { addrconf_prefix_route(&ifp->addr, ifp->prefix_len, idev->dev, 0, 0); addrconf_dad_start(ifp); @@ -2673,7 +2741,8 @@ static void addrconf_dev_config(struct net_device *dev) (dev->type != ARPHRD_INFINIBAND) && (dev->type != ARPHRD_IEEE802154) && (dev->type != ARPHRD_IEEE1394) && - (dev->type != ARPHRD_TUNNEL6)) { + (dev->type != ARPHRD_TUNNEL6) && + (dev->type != ARPHRD_6LOWPAN)) { /* Alas, we support only Ethernet autoconfiguration. */ return; } @@ -2739,21 +2808,11 @@ static void addrconf_gre_config(struct net_device *dev) ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0); if (!ipv6_generate_eui64(addr.s6_addr + 8, dev)) addrconf_add_linklocal(idev, &addr); + else + addrconf_prefix_route(&addr, 64, dev, 0, 0); } #endif -static inline int -ipv6_inherit_linklocal(struct inet6_dev *idev, struct net_device *link_dev) -{ - struct in6_addr lladdr; - - if (!ipv6_get_lladdr(link_dev, &lladdr, IFA_F_TENTATIVE)) { - addrconf_add_linklocal(idev, &lladdr); - return 0; - } - return -1; -} - static int addrconf_notify(struct notifier_block *this, unsigned long event, void *ptr) { @@ -2870,7 +2929,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, } /* - * MTU falled under IPV6_MIN_MTU. + * if MTU under IPV6_MIN_MTU. * Stop IPv6 on this interface. */ @@ -2962,7 +3021,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) hlist_for_each_entry_rcu(ifa, h, addr_lst) { if (ifa->idev == idev) { hlist_del_init_rcu(&ifa->addr_lst); - addrconf_del_dad_timer(ifa); + addrconf_del_dad_work(ifa); goto restart; } } @@ -3000,7 +3059,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) while (!list_empty(&idev->addr_list)) { ifa = list_first_entry(&idev->addr_list, struct inet6_ifaddr, if_list); - addrconf_del_dad_timer(ifa); + addrconf_del_dad_work(ifa); list_del(&ifa->if_list); @@ -3096,20 +3155,20 @@ static void addrconf_dad_kick(struct inet6_ifaddr *ifp) if (ifp->flags & IFA_F_OPTIMISTIC) rand_num = 0; else - rand_num = net_random() % (idev->cnf.rtr_solicit_delay ? : 1); + rand_num = prandom_u32() % (idev->cnf.rtr_solicit_delay ? : 1); ifp->dad_probes = idev->cnf.dad_transmits; - addrconf_mod_dad_timer(ifp, rand_num); + addrconf_mod_dad_work(ifp, rand_num); } -static void addrconf_dad_start(struct inet6_ifaddr *ifp) +static void addrconf_dad_begin(struct inet6_ifaddr *ifp) { struct inet6_dev *idev = ifp->idev; struct net_device *dev = idev->dev; addrconf_join_solict(dev, &ifp->addr); - net_srandom(ifp->addr.s6_addr32[3]); + prandom_seed((__force u32) ifp->addr.s6_addr32[3]); read_lock_bh(&idev->lock); spin_lock(&ifp->lock); @@ -3154,25 +3213,68 @@ out: read_unlock_bh(&idev->lock); } -static void addrconf_dad_timer(unsigned long data) +static void addrconf_dad_start(struct inet6_ifaddr *ifp) +{ + bool begin_dad = false; + + spin_lock_bh(&ifp->state_lock); + if (ifp->state != INET6_IFADDR_STATE_DEAD) { + ifp->state = INET6_IFADDR_STATE_PREDAD; + begin_dad = true; + } + spin_unlock_bh(&ifp->state_lock); + + if (begin_dad) + addrconf_mod_dad_work(ifp, 0); +} + +static void addrconf_dad_work(struct work_struct *w) { - struct inet6_ifaddr *ifp = (struct inet6_ifaddr *) data; + struct inet6_ifaddr *ifp = container_of(to_delayed_work(w), + struct inet6_ifaddr, + dad_work); struct inet6_dev *idev = ifp->idev; struct in6_addr mcaddr; + enum { + DAD_PROCESS, + DAD_BEGIN, + DAD_ABORT, + } action = DAD_PROCESS; + + rtnl_lock(); + + spin_lock_bh(&ifp->state_lock); + if (ifp->state == INET6_IFADDR_STATE_PREDAD) { + action = DAD_BEGIN; + ifp->state = INET6_IFADDR_STATE_DAD; + } else if (ifp->state == INET6_IFADDR_STATE_ERRDAD) { + action = DAD_ABORT; + ifp->state = INET6_IFADDR_STATE_POSTDAD; + } + spin_unlock_bh(&ifp->state_lock); + + if (action == DAD_BEGIN) { + addrconf_dad_begin(ifp); + goto out; + } else if (action == DAD_ABORT) { + addrconf_dad_stop(ifp, 1); + goto out; + } + if (!ifp->dad_probes && addrconf_dad_end(ifp)) goto out; - write_lock(&idev->lock); + write_lock_bh(&idev->lock); if (idev->dead || !(idev->if_flags & IF_READY)) { - write_unlock(&idev->lock); + write_unlock_bh(&idev->lock); goto out; } spin_lock(&ifp->lock); if (ifp->state == INET6_IFADDR_STATE_DEAD) { spin_unlock(&ifp->lock); - write_unlock(&idev->lock); + write_unlock_bh(&idev->lock); goto out; } @@ -3183,7 +3285,7 @@ static void addrconf_dad_timer(unsigned long data) ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED); spin_unlock(&ifp->lock); - write_unlock(&idev->lock); + write_unlock_bh(&idev->lock); addrconf_dad_completed(ifp); @@ -3191,16 +3293,35 @@ static void addrconf_dad_timer(unsigned long data) } ifp->dad_probes--; - addrconf_mod_dad_timer(ifp, - NEIGH_VAR(ifp->idev->nd_parms, RETRANS_TIME)); + addrconf_mod_dad_work(ifp, + NEIGH_VAR(ifp->idev->nd_parms, RETRANS_TIME)); spin_unlock(&ifp->lock); - write_unlock(&idev->lock); + write_unlock_bh(&idev->lock); /* send a neighbour solicitation for our addr */ addrconf_addr_solict_mult(&ifp->addr, &mcaddr); ndisc_send_ns(ifp->idev->dev, NULL, &ifp->addr, &mcaddr, &in6addr_any); out: in6_ifa_put(ifp); + rtnl_unlock(); +} + +/* ifp->idev must be at least read locked */ +static bool ipv6_lonely_lladdr(struct inet6_ifaddr *ifp) +{ + struct inet6_ifaddr *ifpiter; + struct inet6_dev *idev = ifp->idev; + + list_for_each_entry_reverse(ifpiter, &idev->addr_list, if_list) { + if (ifpiter->scope > IFA_LINK) + break; + if (ifp != ifpiter && ifpiter->scope == IFA_LINK && + (ifpiter->flags & (IFA_F_PERMANENT|IFA_F_TENTATIVE| + IFA_F_OPTIMISTIC|IFA_F_DADFAILED)) == + IFA_F_PERMANENT) + return false; + } + return true; } static void addrconf_dad_completed(struct inet6_ifaddr *ifp) @@ -3209,7 +3330,7 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp) struct in6_addr lladdr; bool send_rs, send_mld; - addrconf_del_dad_timer(ifp); + addrconf_del_dad_work(ifp); /* * Configure the address for reception. Now it is valid. @@ -3222,14 +3343,11 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp) */ read_lock_bh(&ifp->idev->lock); - spin_lock(&ifp->lock); - send_mld = ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL && - ifp->idev->valid_ll_addr_cnt == 1; + send_mld = ifp->scope == IFA_LINK && ipv6_lonely_lladdr(ifp); send_rs = send_mld && ipv6_accept_ra(ifp->idev) && ifp->idev->cnf.rtr_solicits > 0 && (dev->flags&IFF_LOOPBACK) == 0; - spin_unlock(&ifp->lock); read_unlock_bh(&ifp->idev->lock); /* While dad is in progress mld report's source address is in6_addrany. @@ -3367,12 +3485,12 @@ static void if6_seq_stop(struct seq_file *seq, void *v) static int if6_seq_show(struct seq_file *seq, void *v) { struct inet6_ifaddr *ifp = (struct inet6_ifaddr *)v; - seq_printf(seq, "%pi6 %02x %02x %02x %03x %8s\n", + seq_printf(seq, "%pi6 %02x %02x %02x %02x %8s\n", &ifp->addr, ifp->idev->dev->ifindex, ifp->prefix_len, ifp->scope, - ifp->flags, + (u8) ifp->flags, ifp->idev->dev->name); return 0; } @@ -3453,26 +3571,31 @@ int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr) * Periodic address status verification */ -static void addrconf_verify(unsigned long foo) +static void addrconf_verify_rtnl(void) { unsigned long now, next, next_sec, next_sched; struct inet6_ifaddr *ifp; int i; + ASSERT_RTNL(); + rcu_read_lock_bh(); - spin_lock(&addrconf_verify_lock); now = jiffies; next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY); - del_timer(&addr_chk_timer); + cancel_delayed_work(&addr_chk_work); for (i = 0; i < IN6_ADDR_HSIZE; i++) { restart: - hlist_for_each_entry_rcu_bh(ifp, - &inet6_addr_lst[i], addr_lst) { + hlist_for_each_entry_rcu_bh(ifp, &inet6_addr_lst[i], addr_lst) { unsigned long age; - if (ifp->flags & IFA_F_PERMANENT) + /* When setting preferred_lft to a value not zero or + * infinity, while valid_lft is infinity + * IFA_F_PERMANENT has a non-infinity life time. + */ + if ((ifp->flags & IFA_F_PERMANENT) && + (ifp->prefered_lft == INFINITY_LIFE_TIME)) continue; spin_lock(&ifp->lock); @@ -3497,7 +3620,8 @@ restart: ifp->flags |= IFA_F_DEPRECATED; } - if (time_before(ifp->tstamp + ifp->valid_lft * HZ, next)) + if ((ifp->valid_lft != INFINITY_LIFE_TIME) && + (time_before(ifp->tstamp + ifp->valid_lft * HZ, next))) next = ifp->tstamp + ifp->valid_lft * HZ; spin_unlock(&ifp->lock); @@ -3558,13 +3682,22 @@ restart: ADBG(KERN_DEBUG "now = %lu, schedule = %lu, rounded schedule = %lu => %lu\n", now, next, next_sec, next_sched); - - addr_chk_timer.expires = next_sched; - add_timer(&addr_chk_timer); - spin_unlock(&addrconf_verify_lock); + mod_delayed_work(addrconf_wq, &addr_chk_work, next_sched - now); rcu_read_unlock_bh(); } +static void addrconf_verify_work(struct work_struct *w) +{ + rtnl_lock(); + addrconf_verify_rtnl(); + rtnl_unlock(); +} + +static void addrconf_verify(void) +{ + mod_delayed_work(addrconf_wq, &addr_chk_work, 0); +} + static struct in6_addr *extract_addr(struct nlattr *addr, struct nlattr *local, struct in6_addr **peer_pfx) { @@ -3598,6 +3731,7 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh) struct ifaddrmsg *ifm; struct nlattr *tb[IFA_MAX+1]; struct in6_addr *pfx, *peer_pfx; + u32 ifa_flags; int err; err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); @@ -3609,7 +3743,13 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh) if (pfx == NULL) return -EINVAL; - return inet6_addr_del(net, ifm->ifa_index, pfx, ifm->ifa_prefixlen); + ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) : ifm->ifa_flags; + + /* We ignore other flags so far. */ + ifa_flags &= IFA_F_MANAGETEMPADDR; + + return inet6_addr_del(net, ifm->ifa_index, ifa_flags, pfx, + ifm->ifa_prefixlen); } static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags, @@ -3619,6 +3759,9 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags, clock_t expires; unsigned long timeout; bool was_managetempaddr; + bool had_prefixroute; + + ASSERT_RTNL(); if (!valid_lft || (prefered_lft > valid_lft)) return -EINVAL; @@ -3647,8 +3790,11 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags, spin_lock_bh(&ifp->lock); was_managetempaddr = ifp->flags & IFA_F_MANAGETEMPADDR; + had_prefixroute = ifp->flags & IFA_F_PERMANENT && + !(ifp->flags & IFA_F_NOPREFIXROUTE); ifp->flags &= ~(IFA_F_DEPRECATED | IFA_F_PERMANENT | IFA_F_NODAD | - IFA_F_HOMEADDRESS | IFA_F_MANAGETEMPADDR); + IFA_F_HOMEADDRESS | IFA_F_MANAGETEMPADDR | + IFA_F_NOPREFIXROUTE); ifp->flags |= ifa_flags; ifp->tstamp = jiffies; ifp->valid_lft = valid_lft; @@ -3658,8 +3804,22 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags, if (!(ifp->flags&IFA_F_TENTATIVE)) ipv6_ifa_notify(0, ifp); - addrconf_prefix_route(&ifp->addr, ifp->prefix_len, ifp->idev->dev, - expires, flags); + if (!(ifa_flags & IFA_F_NOPREFIXROUTE)) { + addrconf_prefix_route(&ifp->addr, ifp->prefix_len, ifp->idev->dev, + expires, flags); + } else if (had_prefixroute) { + enum cleanup_prefix_rt_t action; + unsigned long rt_expires; + + write_lock_bh(&ifp->idev->lock); + action = check_cleanup_prefix_route(ifp, &rt_expires); + write_unlock_bh(&ifp->idev->lock); + + if (action != CLEANUP_PREFIX_RT_NOP) { + cleanup_prefix_route(ifp, rt_expires, + action == CLEANUP_PREFIX_RT_DEL); + } + } if (was_managetempaddr || ifp->flags & IFA_F_MANAGETEMPADDR) { if (was_managetempaddr && !(ifp->flags & IFA_F_MANAGETEMPADDR)) @@ -3668,7 +3828,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags, !was_managetempaddr, jiffies); } - addrconf_verify(0); + addrconf_verify_rtnl(); return 0; } @@ -3713,13 +3873,14 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh) ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) : ifm->ifa_flags; /* We ignore other flags so far. */ - ifa_flags &= IFA_F_NODAD | IFA_F_HOMEADDRESS | IFA_F_MANAGETEMPADDR; + ifa_flags &= IFA_F_NODAD | IFA_F_HOMEADDRESS | IFA_F_MANAGETEMPADDR | + IFA_F_NOPREFIXROUTE; ifa = ipv6_get_ifaddr(net, pfx, dev, 1); if (ifa == NULL) { /* * It would be best to check for !NLM_F_CREATE here but - * userspace alreay relies on not having to provide this. + * userspace already relies on not having to provide this. */ return inet6_addr_add(net, ifm->ifa_index, pfx, peer_pfx, ifm->ifa_prefixlen, ifa_flags, @@ -3797,7 +3958,8 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, put_ifaddrmsg(nlh, ifa->prefix_len, ifa->flags, rt_scope(ifa->scope), ifa->idev->dev->ifindex); - if (!(ifa->flags&IFA_F_PERMANENT)) { + if (!((ifa->flags&IFA_F_PERMANENT) && + (ifa->prefered_lft == INFINITY_LIFE_TIME))) { preferred = ifa->prefered_lft; valid = ifa->valid_lft; if (preferred != INFINITY_LIFE_TIME) { @@ -4196,7 +4358,7 @@ static inline void __snmp6_fill_statsdev(u64 *stats, atomic_long_t *mib, memset(&stats[items], 0, pad); } -static inline void __snmp6_fill_stats64(u64 *stats, void __percpu **mib, +static inline void __snmp6_fill_stats64(u64 *stats, void __percpu *mib, int items, int bytes, size_t syncpoff) { int i; @@ -4216,7 +4378,7 @@ static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype, { switch (attrtype) { case IFLA_INET6_STATS: - __snmp6_fill_stats64(stats, (void __percpu **)idev->stats.ipv6, + __snmp6_fill_stats64(stats, idev->stats.ipv6, IPSTATS_MIB_MAX, bytes, offsetof(struct ipstats_mib, syncp)); break; case IFLA_INET6_ICMP6STATS: @@ -4296,6 +4458,8 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token) bool update_rs = false; struct in6_addr ll_addr; + ASSERT_RTNL(); + if (token == NULL) return -EINVAL; if (ipv6_addr_any(token)) @@ -4344,7 +4508,7 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token) } write_unlock_bh(&idev->lock); - addrconf_verify(0); + addrconf_verify_rtnl(); return 0; } @@ -4542,29 +4706,17 @@ errout: rtnl_set_sk_err(net, RTNLGRP_IPV6_PREFIX, err); } -static void update_valid_ll_addr_cnt(struct inet6_ifaddr *ifp, int count) -{ - write_lock_bh(&ifp->idev->lock); - spin_lock(&ifp->lock); - if (((ifp->flags & (IFA_F_PERMANENT|IFA_F_TENTATIVE|IFA_F_OPTIMISTIC| - IFA_F_DADFAILED)) == IFA_F_PERMANENT) && - (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)) - ifp->idev->valid_ll_addr_cnt += count; - WARN_ON(ifp->idev->valid_ll_addr_cnt < 0); - spin_unlock(&ifp->lock); - write_unlock_bh(&ifp->idev->lock); -} - static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) { struct net *net = dev_net(ifp->idev->dev); + if (event) + ASSERT_RTNL(); + inet6_ifa_notify(event ? : RTM_NEWADDR, ifp); switch (event) { case RTM_NEWADDR: - update_valid_ll_addr_cnt(ifp, 1); - /* * If the address was optimistic * we inserted the route at the start of @@ -4580,8 +4732,6 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) ifp->idev->dev, 0, 0); break; case RTM_DELADDR: - update_valid_ll_addr_cnt(ifp, -1); - if (ifp->idev->cnf.forwarding) addrconf_leave_anycast(ifp); addrconf_leave_solict(ifp->idev, &ifp->addr); @@ -4728,6 +4878,46 @@ int addrconf_sysctl_disable(struct ctl_table *ctl, int write, return ret; } +static +int addrconf_sysctl_proxy_ndp(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + int *valp = ctl->data; + int ret; + int old, new; + + old = *valp; + ret = proc_dointvec(ctl, write, buffer, lenp, ppos); + new = *valp; + + if (write && old != new) { + struct net *net = ctl->extra2; + + if (!rtnl_trylock()) + return restart_syscall(); + + if (valp == &net->ipv6.devconf_dflt->proxy_ndp) + inet6_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH, + NETCONFA_IFINDEX_DEFAULT, + net->ipv6.devconf_dflt); + else if (valp == &net->ipv6.devconf_all->proxy_ndp) + inet6_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH, + NETCONFA_IFINDEX_ALL, + net->ipv6.devconf_all); + else { + struct inet6_dev *idev = ctl->extra1; + + inet6_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH, + idev->dev->ifindex, + &idev->cnf); + } + rtnl_unlock(); + } + + return ret; +} + + static struct addrconf_sysctl_table { struct ctl_table_header *sysctl_header; @@ -4914,7 +5104,7 @@ static struct addrconf_sysctl_table .data = &ipv6_devconf.proxy_ndp, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec, + .proc_handler = addrconf_sysctl_proxy_ndp, }, { .procname = "accept_source_route", @@ -5131,6 +5321,12 @@ int __init addrconf_init(void) if (err < 0) goto out_addrlabel; + addrconf_wq = create_workqueue("ipv6_addrconf"); + if (!addrconf_wq) { + err = -ENOMEM; + goto out_nowq; + } + /* The addrconf netdev notifier requires that loopback_dev * has it's ipv6 private information allocated and setup * before it can bring up and give link-local addresses @@ -5161,11 +5357,9 @@ int __init addrconf_init(void) register_netdevice_notifier(&ipv6_dev_notf); - addrconf_verify(0); + addrconf_verify(); - err = rtnl_af_register(&inet6_ops); - if (err < 0) - goto errout_af; + rtnl_af_register(&inet6_ops); err = __rtnl_register(PF_INET6, RTM_GETLINK, NULL, inet6_dump_ifinfo, NULL); @@ -5189,9 +5383,10 @@ int __init addrconf_init(void) return 0; errout: rtnl_af_unregister(&inet6_ops); -errout_af: unregister_netdevice_notifier(&ipv6_dev_notf); errlo: + destroy_workqueue(addrconf_wq); +out_nowq: unregister_pernet_subsys(&addrconf_ops); out_addrlabel: ipv6_addr_label_cleanup(); @@ -5227,7 +5422,8 @@ void addrconf_cleanup(void) for (i = 0; i < IN6_ADDR_HSIZE; i++) WARN_ON(!hlist_empty(&inet6_addr_lst[i])); spin_unlock_bh(&addrconf_hash_lock); - - del_timer(&addr_chk_timer); + cancel_delayed_work(&addr_chk_work); rtnl_unlock(); + + destroy_workqueue(addrconf_wq); } diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c index 4c11cbcf830..e6960457f62 100644 --- a/net/ipv6/addrconf_core.c +++ b/net/ipv6/addrconf_core.c @@ -123,7 +123,7 @@ static void snmp6_free_dev(struct inet6_dev *idev) { kfree(idev->stats.icmpv6msgdev); kfree(idev->stats.icmpv6dev); - snmp_mib_free((void __percpu **)idev->stats.ipv6); + free_percpu(idev->stats.ipv6); } /* Nobody refers to this device, we may destroy it. */ diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index b30ad3741b4..731e1e1722d 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -6,7 +6,7 @@ */ /* * Author: - * YOSHIFUJI Hideaki @ USAGI/WIDE Project <yoshfuji@linux-ipv6.org> + * YOSHIFUJI Hideaki @ USAGI/WIDE Project <yoshfuji@linux-ipv6.org> */ #include <linux/kernel.h> @@ -22,14 +22,13 @@ #if 0 #define ADDRLABEL(x...) printk(x) #else -#define ADDRLABEL(x...) do { ; } while(0) +#define ADDRLABEL(x...) do { ; } while (0) #endif /* * Policy Table */ -struct ip6addrlbl_entry -{ +struct ip6addrlbl_entry { #ifdef CONFIG_NET_NS struct net *lbl_net; #endif @@ -88,39 +87,39 @@ static const __net_initconst struct ip6addrlbl_init_table { /* ::/0 */ .prefix = &in6addr_any, .label = 1, - },{ /* fc00::/7 */ - .prefix = &(struct in6_addr){{{ 0xfc }}}, + }, { /* fc00::/7 */ + .prefix = &(struct in6_addr){ { { 0xfc } } } , .prefixlen = 7, .label = 5, - },{ /* fec0::/10 */ - .prefix = &(struct in6_addr){{{ 0xfe, 0xc0 }}}, + }, { /* fec0::/10 */ + .prefix = &(struct in6_addr){ { { 0xfe, 0xc0 } } }, .prefixlen = 10, .label = 11, - },{ /* 2002::/16 */ - .prefix = &(struct in6_addr){{{ 0x20, 0x02 }}}, + }, { /* 2002::/16 */ + .prefix = &(struct in6_addr){ { { 0x20, 0x02 } } }, .prefixlen = 16, .label = 2, - },{ /* 3ffe::/16 */ - .prefix = &(struct in6_addr){{{ 0x3f, 0xfe }}}, + }, { /* 3ffe::/16 */ + .prefix = &(struct in6_addr){ { { 0x3f, 0xfe } } }, .prefixlen = 16, .label = 12, - },{ /* 2001::/32 */ - .prefix = &(struct in6_addr){{{ 0x20, 0x01 }}}, + }, { /* 2001::/32 */ + .prefix = &(struct in6_addr){ { { 0x20, 0x01 } } }, .prefixlen = 32, .label = 6, - },{ /* 2001:10::/28 */ - .prefix = &(struct in6_addr){{{ 0x20, 0x01, 0x00, 0x10 }}}, + }, { /* 2001:10::/28 */ + .prefix = &(struct in6_addr){ { { 0x20, 0x01, 0x00, 0x10 } } }, .prefixlen = 28, .label = 7, - },{ /* ::ffff:0:0 */ - .prefix = &(struct in6_addr){{{ [10] = 0xff, [11] = 0xff }}}, + }, { /* ::ffff:0:0 */ + .prefix = &(struct in6_addr){ { { [10] = 0xff, [11] = 0xff } } }, .prefixlen = 96, .label = 4, - },{ /* ::/96 */ + }, { /* ::/96 */ .prefix = &in6addr_any, .prefixlen = 96, .label = 3, - },{ /* ::1/128 */ + }, { /* ::1/128 */ .prefix = &in6addr_loopback, .prefixlen = 128, .label = 0, @@ -441,7 +440,7 @@ static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh) if (label == IPV6_ADDR_LABEL_DEFAULT) return -EINVAL; - switch(nlh->nlmsg_type) { + switch (nlh->nlmsg_type) { case RTM_NEWADDRLABEL: if (ifal->ifal_index && !__dev_get_by_index(net, ifal->ifal_index)) @@ -505,12 +504,13 @@ static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb) hlist_for_each_entry_rcu(p, &ip6addrlbl_table.head, list) { if (idx >= s_idx && net_eq(ip6addrlbl_net(p), net)) { - if ((err = ip6addrlbl_fill(skb, p, - ip6addrlbl_table.seq, - NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq, - RTM_NEWADDRLABEL, - NLM_F_MULTI)) <= 0) + err = ip6addrlbl_fill(skb, p, + ip6addrlbl_table.seq, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + RTM_NEWADDRLABEL, + NLM_F_MULTI); + if (err <= 0) break; } idx++; @@ -527,7 +527,7 @@ static inline int ip6addrlbl_msgsize(void) + nla_total_size(4); /* IFAL_LABEL */ } -static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh) +static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh) { struct net *net = sock_net(in_skb->sk); struct ifaddrlblmsg *ifal; @@ -568,7 +568,8 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh) goto out; } - if (!(skb = nlmsg_new(ip6addrlbl_msgsize(), GFP_KERNEL))) { + skb = nlmsg_new(ip6addrlbl_msgsize(), GFP_KERNEL); + if (!skb) { ip6addrlbl_put(p); return -ENOBUFS; } diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 4fbdb7046d2..7cb4392690d 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -106,7 +106,6 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol, struct inet_protosw *answer; struct proto *answer_prot; unsigned char answer_flags; - char answer_no_check; int try_loading_module = 0; int err; @@ -162,7 +161,6 @@ lookup_protocol: sock->ops = answer->ops; answer_prot = answer->prot; - answer_no_check = answer->no_check; answer_flags = answer->flags; rcu_read_unlock(); @@ -176,7 +174,6 @@ lookup_protocol: sock_init_data(sock, sk); err = 0; - sk->sk_no_check = answer_no_check; if (INET_PROTOSW_REUSE & answer_flags) sk->sk_reuse = SK_CAN_REUSE; @@ -213,7 +210,7 @@ lookup_protocol: inet->mc_list = NULL; inet->rcv_tos = 0; - if (ipv4_config.no_pmtu_disc) + if (net->ipv4.sysctl_ip_no_pmtu_disc) inet->pmtudisc = IP_PMTUDISC_DONT; else inet->pmtudisc = IP_PMTUDISC_WANT; @@ -661,7 +658,7 @@ int inet6_sk_rebuild_header(struct sock *sk) final_p = fl6_update_dst(&fl6, np->opt, &final); - dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); + dst = ip6_dst_lookup_flow(sk, &fl6, final_p); if (IS_ERR(dst)) { sk->sk_route_caps = 0; sk->sk_err_soft = -PTR_ERR(dst); @@ -683,8 +680,7 @@ bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb) if (np->rxopt.all) { if ((opt->hop && (np->rxopt.bits.hopopts || np->rxopt.bits.ohopopts)) || - ((IPV6_FLOWINFO_MASK & - *(__be32 *)skb_network_header(skb)) && + (ip6_flowinfo((struct ipv6hdr *) skb_network_header(skb)) && np->rxopt.bits.rxflow) || (opt->srcrt && (np->rxopt.bits.srcrt || np->rxopt.bits.osrcrt)) || @@ -716,33 +712,25 @@ static int __net_init ipv6_init_mibs(struct net *net) { int i; - if (snmp_mib_init((void __percpu **)net->mib.udp_stats_in6, - sizeof(struct udp_mib), - __alignof__(struct udp_mib)) < 0) + net->mib.udp_stats_in6 = alloc_percpu(struct udp_mib); + if (!net->mib.udp_stats_in6) return -ENOMEM; - if (snmp_mib_init((void __percpu **)net->mib.udplite_stats_in6, - sizeof(struct udp_mib), - __alignof__(struct udp_mib)) < 0) + net->mib.udplite_stats_in6 = alloc_percpu(struct udp_mib); + if (!net->mib.udplite_stats_in6) goto err_udplite_mib; - if (snmp_mib_init((void __percpu **)net->mib.ipv6_statistics, - sizeof(struct ipstats_mib), - __alignof__(struct ipstats_mib)) < 0) + net->mib.ipv6_statistics = alloc_percpu(struct ipstats_mib); + if (!net->mib.ipv6_statistics) goto err_ip_mib; for_each_possible_cpu(i) { struct ipstats_mib *af_inet6_stats; - af_inet6_stats = per_cpu_ptr(net->mib.ipv6_statistics[0], i); + af_inet6_stats = per_cpu_ptr(net->mib.ipv6_statistics, i); u64_stats_init(&af_inet6_stats->syncp); -#if SNMP_ARRAY_SZ == 2 - af_inet6_stats = per_cpu_ptr(net->mib.ipv6_statistics[1], i); - u64_stats_init(&af_inet6_stats->syncp); -#endif } - if (snmp_mib_init((void __percpu **)net->mib.icmpv6_statistics, - sizeof(struct icmpv6_mib), - __alignof__(struct icmpv6_mib)) < 0) + net->mib.icmpv6_statistics = alloc_percpu(struct icmpv6_mib); + if (!net->mib.icmpv6_statistics) goto err_icmp_mib; net->mib.icmpv6msg_statistics = kzalloc(sizeof(struct icmpv6msg_mib), GFP_KERNEL); @@ -751,22 +739,22 @@ static int __net_init ipv6_init_mibs(struct net *net) return 0; err_icmpmsg_mib: - snmp_mib_free((void __percpu **)net->mib.icmpv6_statistics); + free_percpu(net->mib.icmpv6_statistics); err_icmp_mib: - snmp_mib_free((void __percpu **)net->mib.ipv6_statistics); + free_percpu(net->mib.ipv6_statistics); err_ip_mib: - snmp_mib_free((void __percpu **)net->mib.udplite_stats_in6); + free_percpu(net->mib.udplite_stats_in6); err_udplite_mib: - snmp_mib_free((void __percpu **)net->mib.udp_stats_in6); + free_percpu(net->mib.udp_stats_in6); return -ENOMEM; } static void ipv6_cleanup_mibs(struct net *net) { - snmp_mib_free((void __percpu **)net->mib.udp_stats_in6); - snmp_mib_free((void __percpu **)net->mib.udplite_stats_in6); - snmp_mib_free((void __percpu **)net->mib.ipv6_statistics); - snmp_mib_free((void __percpu **)net->mib.icmpv6_statistics); + free_percpu(net->mib.udp_stats_in6); + free_percpu(net->mib.udplite_stats_in6); + free_percpu(net->mib.ipv6_statistics); + free_percpu(net->mib.icmpv6_statistics); kfree(net->mib.icmpv6msg_statistics); } @@ -776,6 +764,7 @@ static int __net_init inet6_net_init(struct net *net) net->ipv6.sysctl.bindv6only = 0; net->ipv6.sysctl.icmpv6_time = 1*HZ; + net->ipv6.sysctl.flowlabel_consistency = 1; atomic_set(&net->ipv6.rt_genid, 0); err = ipv6_init_mibs(net); diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 81e496a2e00..72a4930bdc0 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -346,6 +346,10 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb) struct ip_auth_hdr *ah; struct ah_data *ahp; struct tmp_ext *iph_ext; + int seqhi_len = 0; + __be32 *seqhi; + int sglists = 0; + struct scatterlist *seqhisg; ahp = x->data; ahash = ahp->ahash; @@ -359,15 +363,22 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb) if (extlen) extlen += sizeof(*iph_ext); + if (x->props.flags & XFRM_STATE_ESN) { + sglists = 1; + seqhi_len = sizeof(*seqhi); + } err = -ENOMEM; - iph_base = ah_alloc_tmp(ahash, nfrags, IPV6HDR_BASELEN + extlen); + iph_base = ah_alloc_tmp(ahash, nfrags + sglists, IPV6HDR_BASELEN + + extlen + seqhi_len); if (!iph_base) goto out; iph_ext = ah_tmp_ext(iph_base); - icv = ah_tmp_icv(ahash, iph_ext, extlen); + seqhi = (__be32 *)((char *)iph_ext + extlen); + icv = ah_tmp_icv(ahash, seqhi, seqhi_len); req = ah_tmp_req(ahash, icv); sg = ah_req_sg(ahash, req); + seqhisg = sg + nfrags; ah = ip_auth_hdr(skb); memset(ah->auth_data, 0, ahp->icv_trunc_len); @@ -411,10 +422,15 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb) ah->spi = x->id.spi; ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low); - sg_init_table(sg, nfrags); - skb_to_sgvec(skb, sg, 0, skb->len); + sg_init_table(sg, nfrags + sglists); + skb_to_sgvec_nomark(skb, sg, 0, skb->len); - ahash_request_set_crypt(req, sg, icv, skb->len); + if (x->props.flags & XFRM_STATE_ESN) { + /* Attach seqhi sg right after packet payload */ + *seqhi = htonl(XFRM_SKB_CB(skb)->seq.output.hi); + sg_set_buf(seqhisg, seqhi, seqhi_len); + } + ahash_request_set_crypt(req, sg, icv, skb->len + seqhi_len); ahash_request_set_callback(req, 0, ah6_output_done, skb); AH_SKB_CB(skb)->tmp = iph_base; @@ -514,6 +530,10 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) int nexthdr; int nfrags; int err = -ENOMEM; + int seqhi_len = 0; + __be32 *seqhi; + int sglists = 0; + struct scatterlist *seqhisg; if (!pskb_may_pull(skb, sizeof(struct ip_auth_hdr))) goto out; @@ -550,14 +570,22 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) skb_push(skb, hdr_len); - work_iph = ah_alloc_tmp(ahash, nfrags, hdr_len + ahp->icv_trunc_len); + if (x->props.flags & XFRM_STATE_ESN) { + sglists = 1; + seqhi_len = sizeof(*seqhi); + } + + work_iph = ah_alloc_tmp(ahash, nfrags + sglists, hdr_len + + ahp->icv_trunc_len + seqhi_len); if (!work_iph) goto out; - auth_data = ah_tmp_auth(work_iph, hdr_len); - icv = ah_tmp_icv(ahash, auth_data, ahp->icv_trunc_len); + auth_data = ah_tmp_auth((u8 *)work_iph, hdr_len); + seqhi = (__be32 *)(auth_data + ahp->icv_trunc_len); + icv = ah_tmp_icv(ahash, seqhi, seqhi_len); req = ah_tmp_req(ahash, icv); sg = ah_req_sg(ahash, req); + seqhisg = sg + nfrags; memcpy(work_iph, ip6h, hdr_len); memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len); @@ -572,10 +600,16 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) ip6h->flow_lbl[2] = 0; ip6h->hop_limit = 0; - sg_init_table(sg, nfrags); - skb_to_sgvec(skb, sg, 0, skb->len); + sg_init_table(sg, nfrags + sglists); + skb_to_sgvec_nomark(skb, sg, 0, skb->len); + + if (x->props.flags & XFRM_STATE_ESN) { + /* Attach seqhi sg right after packet payload */ + *seqhi = XFRM_SKB_CB(skb)->seq.input.hi; + sg_set_buf(seqhisg, seqhi, seqhi_len); + } - ahash_request_set_crypt(req, sg, icv, skb->len); + ahash_request_set_crypt(req, sg, icv, skb->len + seqhi_len); ahash_request_set_callback(req, 0, ah6_input_done, skb); AH_SKB_CB(skb)->tmp = work_iph; @@ -609,8 +643,8 @@ out: return err; } -static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - u8 type, u8 code, int offset, __be32 info) +static int ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, + u8 type, u8 code, int offset, __be32 info) { struct net *net = dev_net(skb->dev); struct ipv6hdr *iph = (struct ipv6hdr*)skb->data; @@ -619,17 +653,19 @@ static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (type != ICMPV6_PKT_TOOBIG && type != NDISC_REDIRECT) - return; + return 0; x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET6); if (!x) - return; + return 0; if (type == NDISC_REDIRECT) ip6_redirect(skb, net, skb->dev->ifindex, 0); else ip6_update_pmtu(skb, net, info, 0, 0); xfrm_state_put(x); + + return 0; } static int ah6_init_state(struct xfrm_state *x) @@ -714,6 +750,11 @@ static void ah6_destroy(struct xfrm_state *x) kfree(ahp); } +static int ah6_rcv_cb(struct sk_buff *skb, int err) +{ + return 0; +} + static const struct xfrm_type ah6_type = { .description = "AH6", @@ -727,10 +768,11 @@ static const struct xfrm_type ah6_type = .hdr_offset = xfrm6_find_1stfragopt, }; -static const struct inet6_protocol ah6_protocol = { +static struct xfrm6_protocol ah6_protocol = { .handler = xfrm6_rcv, + .cb_handler = ah6_rcv_cb, .err_handler = ah6_err, - .flags = INET6_PROTO_NOPOLICY, + .priority = 0, }; static int __init ah6_init(void) @@ -740,7 +782,7 @@ static int __init ah6_init(void) return -EAGAIN; } - if (inet6_add_protocol(&ah6_protocol, IPPROTO_AH) < 0) { + if (xfrm6_protocol_register(&ah6_protocol, IPPROTO_AH) < 0) { pr_info("%s: can't add protocol\n", __func__); xfrm_unregister_type(&ah6_type, AF_INET6); return -EAGAIN; @@ -751,7 +793,7 @@ static int __init ah6_init(void) static void __exit ah6_fini(void) { - if (inet6_del_protocol(&ah6_protocol, IPPROTO_AH) < 0) + if (xfrm6_protocol_deregister(&ah6_protocol, IPPROTO_AH) < 0) pr_info("%s: can't remove protocol\n", __func__); if (xfrm_unregister_type(&ah6_type, AF_INET6) < 0) diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index 5a80f15a9de..21018324468 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -383,6 +383,17 @@ bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev, return found; } +/* check if this anycast address is link-local on given interface or + * is global + */ +bool ipv6_chk_acast_addr_src(struct net *net, struct net_device *dev, + const struct in6_addr *addr) +{ + return ipv6_chk_acast_addr(net, + (ipv6_addr_type(addr) & IPV6_ADDR_LINKLOCAL ? + dev : NULL), + addr); +} #ifdef CONFIG_PROC_FS struct ac6_iter_state { diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 8dfe1f4d3c1..c3bf2d2e519 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -73,7 +73,6 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); if (flowlabel == NULL) return -EINVAL; - usin->sin6_addr = flowlabel->dst; } } @@ -171,7 +170,7 @@ ipv4_connected: opt = flowlabel ? flowlabel->opt : np->opt; final_p = fl6_update_dst(&fl6, opt, &final); - dst = ip6_dst_lookup_flow(sk, &fl6, final_p, true); + dst = ip6_dst_lookup_flow(sk, &fl6, final_p); err = 0; if (IS_ERR(dst)) { err = PTR_ERR(dst); @@ -206,6 +205,16 @@ out: } EXPORT_SYMBOL_GPL(ip6_datagram_connect); +int ip6_datagram_connect_v6_only(struct sock *sk, struct sockaddr *uaddr, + int addr_len) +{ + DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, uaddr); + if (sin6->sin6_family != AF_INET6) + return -EAFNOSUPPORT; + return ip6_datagram_connect(sk, uaddr, addr_len); +} +EXPORT_SYMBOL_GPL(ip6_datagram_connect_v6_only); + void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err, __be16 port, u32 info, u8 *payload) { @@ -323,7 +332,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) struct ipv6_pinfo *np = inet6_sk(sk); struct sock_exterr_skb *serr; struct sk_buff *skb, *skb2; - struct sockaddr_in6 *sin; + DECLARE_SOCKADDR(struct sockaddr_in6 *, sin, msg->msg_name); struct { struct sock_extended_err ee; struct sockaddr_in6 offender; @@ -349,7 +358,6 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) serr = SKB_EXT_ERR(skb); - sin = (struct sockaddr_in6 *)msg->msg_name; if (sin) { const unsigned char *nh = skb_network_header(skb); sin->sin6_family = AF_INET6; @@ -379,10 +387,12 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) sin->sin6_family = AF_INET6; sin->sin6_flowinfo = 0; sin->sin6_port = 0; + if (np->rxopt.all) + ip6_datagram_recv_common_ctl(sk, msg, skb); if (skb->protocol == htons(ETH_P_IPV6)) { sin->sin6_addr = ipv6_hdr(skb)->saddr; if (np->rxopt.all) - ip6_datagram_recv_ctl(sk, msg, skb); + ip6_datagram_recv_specific_ctl(sk, msg, skb); sin->sin6_scope_id = ipv6_iface_scope_id(&sin->sin6_addr, IP6CB(skb)->iif); @@ -430,8 +440,8 @@ int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len, { struct ipv6_pinfo *np = inet6_sk(sk); struct sk_buff *skb; - struct sockaddr_in6 *sin; struct ip6_mtuinfo mtu_info; + DECLARE_SOCKADDR(struct sockaddr_in6 *, sin, msg->msg_name); int err; int copied; @@ -453,7 +463,6 @@ int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len, memcpy(&mtu_info, IP6CBMTU(skb), sizeof(mtu_info)); - sin = (struct sockaddr_in6 *)msg->msg_name; if (sin) { sin->sin6_family = AF_INET6; sin->sin6_flowinfo = 0; @@ -474,20 +483,34 @@ out: } -int ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg, - struct sk_buff *skb) +void ip6_datagram_recv_common_ctl(struct sock *sk, struct msghdr *msg, + struct sk_buff *skb) { struct ipv6_pinfo *np = inet6_sk(sk); - struct inet6_skb_parm *opt = IP6CB(skb); - unsigned char *nh = skb_network_header(skb); + bool is_ipv6 = skb->protocol == htons(ETH_P_IPV6); if (np->rxopt.bits.rxinfo) { struct in6_pktinfo src_info; - src_info.ipi6_ifindex = opt->iif; - src_info.ipi6_addr = ipv6_hdr(skb)->daddr; + if (is_ipv6) { + src_info.ipi6_ifindex = IP6CB(skb)->iif; + src_info.ipi6_addr = ipv6_hdr(skb)->daddr; + } else { + src_info.ipi6_ifindex = + PKTINFO_SKB_CB(skb)->ipi_ifindex; + ipv6_addr_set_v4mapped(ip_hdr(skb)->daddr, + &src_info.ipi6_addr); + } put_cmsg(msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info); } +} + +void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg, + struct sk_buff *skb) +{ + struct ipv6_pinfo *np = inet6_sk(sk); + struct inet6_skb_parm *opt = IP6CB(skb); + unsigned char *nh = skb_network_header(skb); if (np->rxopt.bits.rxhlim) { int hlim = ipv6_hdr(skb)->hop_limit; @@ -605,7 +628,13 @@ int ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg, put_cmsg(msg, SOL_IPV6, IPV6_ORIGDSTADDR, sizeof(sin6), &sin6); } } - return 0; +} + +void ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg, + struct sk_buff *skb) +{ + ip6_datagram_recv_common_ctl(sk, msg, skb); + ip6_datagram_recv_specific_ctl(sk, msg, skb); } EXPORT_SYMBOL_GPL(ip6_datagram_recv_ctl); @@ -670,7 +699,9 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk, int strict = __ipv6_addr_src_scope(addr_type) <= IPV6_ADDR_SCOPE_LINKLOCAL; if (!(inet_sk(sk)->freebind || inet_sk(sk)->transparent) && !ipv6_chk_addr(net, &src_info->ipi6_addr, - strict ? dev : NULL, 0)) + strict ? dev : NULL, 0) && + !ipv6_chk_acast_addr_src(net, dev, + &src_info->ipi6_addr)) err = -EINVAL; else fl6->saddr = src_info->ipi6_addr; diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 6eef8a7e35f..d15da137714 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -421,8 +421,8 @@ static u32 esp6_get_mtu(struct xfrm_state *x, int mtu) net_adj) & ~(blksize - 1)) + net_adj - 2; } -static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, - u8 type, u8 code, int offset, __be32 info) +static int esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, + u8 type, u8 code, int offset, __be32 info) { struct net *net = dev_net(skb->dev); const struct ipv6hdr *iph = (const struct ipv6hdr *)skb->data; @@ -431,18 +431,20 @@ static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (type != ICMPV6_PKT_TOOBIG && type != NDISC_REDIRECT) - return; + return 0; x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET6); if (!x) - return; + return 0; if (type == NDISC_REDIRECT) ip6_redirect(skb, net, skb->dev->ifindex, 0); else ip6_update_pmtu(skb, net, info, 0, 0); xfrm_state_put(x); + + return 0; } static void esp6_destroy(struct xfrm_state *x) @@ -614,6 +616,11 @@ error: return err; } +static int esp6_rcv_cb(struct sk_buff *skb, int err) +{ + return 0; +} + static const struct xfrm_type esp6_type = { .description = "ESP6", @@ -628,10 +635,11 @@ static const struct xfrm_type esp6_type = .hdr_offset = xfrm6_find_1stfragopt, }; -static const struct inet6_protocol esp6_protocol = { - .handler = xfrm6_rcv, +static struct xfrm6_protocol esp6_protocol = { + .handler = xfrm6_rcv, + .cb_handler = esp6_rcv_cb, .err_handler = esp6_err, - .flags = INET6_PROTO_NOPOLICY, + .priority = 0, }; static int __init esp6_init(void) @@ -640,7 +648,7 @@ static int __init esp6_init(void) pr_info("%s: can't add xfrm type\n", __func__); return -EAGAIN; } - if (inet6_add_protocol(&esp6_protocol, IPPROTO_ESP) < 0) { + if (xfrm6_protocol_register(&esp6_protocol, IPPROTO_ESP) < 0) { pr_info("%s: can't add protocol\n", __func__); xfrm_unregister_type(&esp6_type, AF_INET6); return -EAGAIN; @@ -651,7 +659,7 @@ static int __init esp6_init(void) static void __exit esp6_fini(void) { - if (inet6_del_protocol(&esp6_protocol, IPPROTO_ESP) < 0) + if (xfrm6_protocol_deregister(&esp6_protocol, IPPROTO_ESP) < 0) pr_info("%s: can't remove protocol\n", __func__); if (xfrm_unregister_type(&esp6_type, AF_INET6) < 0) pr_info("%s: can't remove xfrm type\n", __func__); diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c index 140748debc4..8af3eb57f43 100644 --- a/net/ipv6/exthdrs_core.c +++ b/net/ipv6/exthdrs_core.c @@ -212,7 +212,7 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, found = (nexthdr == target); if ((!ipv6_ext_hdr(nexthdr)) || nexthdr == NEXTHDR_NONE) { - if (target < 0) + if (target < 0 || found) break; return -ENOENT; } diff --git a/net/ipv6/exthdrs_offload.c b/net/ipv6/exthdrs_offload.c index cf77f3abfd0..447a7fbd1bb 100644 --- a/net/ipv6/exthdrs_offload.c +++ b/net/ipv6/exthdrs_offload.c @@ -25,11 +25,11 @@ int __init ipv6_exthdrs_offload_init(void) int ret; ret = inet6_add_offload(&rthdr_offload, IPPROTO_ROUTING); - if (!ret) + if (ret) goto out; ret = inet6_add_offload(&dstopt_offload, IPPROTO_DSTOPTS); - if (!ret) + if (ret) goto out_rt; out: diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index e27591635f9..b4d5e1d97c1 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -122,7 +122,11 @@ out: static bool fib6_rule_suppress(struct fib_rule *rule, struct fib_lookup_arg *arg) { struct rt6_info *rt = (struct rt6_info *) arg->result; - struct net_device *dev = rt->rt6i_idev->dev; + struct net_device *dev = NULL; + + if (rt->rt6i_idev) + dev = rt->rt6i_idev->dev; + /* do not accept result if the route does * not meet the required prefix length */ @@ -165,7 +169,7 @@ static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) return 0; } - if (r->tclass && r->tclass != ((ntohl(fl6->flowlabel) >> 20) & 0xff)) + if (r->tclass && r->tclass != ip6_tclass(fl6->flowlabel)) return 0; return 1; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index eef8d945b36..f6c84a6eb23 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -67,6 +67,7 @@ #include <net/icmp.h> #include <net/xfrm.h> #include <net/inet_common.h> +#include <net/dsfield.h> #include <asm/uaccess.h> @@ -315,8 +316,10 @@ static void mip6_addr_swap(struct sk_buff *skb) static inline void mip6_addr_swap(struct sk_buff *skb) {} #endif -struct dst_entry *icmpv6_route_lookup(struct net *net, struct sk_buff *skb, - struct sock *sk, struct flowi6 *fl6) +static struct dst_entry *icmpv6_route_lookup(struct net *net, + struct sk_buff *skb, + struct sock *sk, + struct flowi6 *fl6) { struct dst_entry *dst, *dst2; struct flowi6 fl2; @@ -397,6 +400,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) int len; int hlimit; int err = 0; + u32 mark = IP6_REPLY_MARK(net, skb->mark); if ((u8 *)hdr < skb->head || (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb)) @@ -410,7 +414,8 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) */ addr_type = ipv6_addr_type(&hdr->daddr); - if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0)) + if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) || + ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr)) saddr = &hdr->daddr; /* @@ -462,6 +467,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) fl6.daddr = hdr->saddr; if (saddr) fl6.saddr = *saddr; + fl6.flowi6_mark = mark; fl6.flowi6_oif = iif; fl6.fl6_icmp_type = type; fl6.fl6_icmp_code = code; @@ -470,6 +476,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) sk = icmpv6_xmit_lock(net); if (sk == NULL) return; + sk->sk_mark = mark; np = inet6_sk(sk); if (!icmpv6_xrlim_allow(sk, type, &fl6)) @@ -489,12 +496,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) if (IS_ERR(dst)) goto out; - if (ipv6_addr_is_multicast(&fl6.daddr)) - hlimit = np->mcast_hops; - else - hlimit = np->hop_limit; - if (hlimit < 0) - hlimit = ip6_dst_hoplimit(dst); + hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); msg.skb = skb; msg.offset = skb_network_offset(skb); @@ -516,7 +518,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) np->tclass, NULL, &fl6, (struct rt6_info *)dst, MSG_DONTWAIT, np->dontfrag); if (err) { - ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS); + ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS); ip6_flush_pending_frames(sk); } else { err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr, @@ -551,10 +553,14 @@ static void icmpv6_echo_reply(struct sk_buff *skb) struct dst_entry *dst; int err = 0; int hlimit; + u8 tclass; + u32 mark = IP6_REPLY_MARK(net, skb->mark); saddr = &ipv6_hdr(skb)->daddr; - if (!ipv6_unicast_destination(skb)) + if (!ipv6_unicast_destination(skb) && + !(net->ipv6.sysctl.anycast_src_echo_reply && + ipv6_anycast_destination(skb))) saddr = NULL; memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr)); @@ -567,11 +573,13 @@ static void icmpv6_echo_reply(struct sk_buff *skb) fl6.saddr = *saddr; fl6.flowi6_oif = skb->dev->ifindex; fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY; + fl6.flowi6_mark = mark; security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); sk = icmpv6_xmit_lock(net); if (sk == NULL) return; + sk->sk_mark = mark; np = inet6_sk(sk); if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) @@ -586,12 +594,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) if (IS_ERR(dst)) goto out; - if (ipv6_addr_is_multicast(&fl6.daddr)) - hlimit = np->mcast_hops; - else - hlimit = np->hop_limit; - if (hlimit < 0) - hlimit = ip6_dst_hoplimit(dst); + hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); idev = __in6_dev_get(skb->dev); @@ -599,8 +602,9 @@ static void icmpv6_echo_reply(struct sk_buff *skb) msg.offset = 0; msg.type = ICMPV6_ECHO_REPLY; + tclass = ipv6_get_dsfield(ipv6_hdr(skb)); err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr), - sizeof(struct icmp6hdr), hlimit, np->tclass, NULL, &fl6, + sizeof(struct icmp6hdr), hlimit, tclass, NULL, &fl6, (struct rt6_info *)dst, MSG_DONTWAIT, np->dontfrag); @@ -694,22 +698,11 @@ static int icmpv6_rcv(struct sk_buff *skb) saddr = &ipv6_hdr(skb)->saddr; daddr = &ipv6_hdr(skb)->daddr; - /* Perform checksum. */ - switch (skb->ip_summed) { - case CHECKSUM_COMPLETE: - if (!csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6, - skb->csum)) - break; - /* fall through */ - case CHECKSUM_NONE: - skb->csum = ~csum_unfold(csum_ipv6_magic(saddr, daddr, skb->len, - IPPROTO_ICMPV6, 0)); - if (__skb_checksum_complete(skb)) { - LIMIT_NETDEBUG(KERN_DEBUG - "ICMPv6 checksum failed [%pI6c > %pI6c]\n", - saddr, daddr); - goto csum_error; - } + if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) { + LIMIT_NETDEBUG(KERN_DEBUG + "ICMPv6 checksum failed [%pI6c > %pI6c]\n", + saddr, daddr); + goto csum_error; } if (!pskb_pull(skb, sizeof(*hdr))) @@ -984,7 +977,7 @@ int icmpv6_err_convert(u8 type, u8 code, int *err) EXPORT_SYMBOL(icmpv6_err_convert); #ifdef CONFIG_SYSCTL -struct ctl_table ipv6_icmp_table_template[] = { +static struct ctl_table ipv6_icmp_table_template[] = { { .procname = "ratelimit", .data = &init_net.ipv6.sysctl.icmpv6_time, diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 77bb8afb141..a245e5ddffb 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -81,12 +81,12 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk, final_p = fl6_update_dst(fl6, np->opt, &final); fl6->saddr = ireq->ir_v6_loc_addr; fl6->flowi6_oif = ireq->ir_iif; - fl6->flowi6_mark = sk->sk_mark; + fl6->flowi6_mark = ireq->ir_mark; fl6->fl6_dport = ireq->ir_rmt_port; fl6->fl6_sport = htons(ireq->ir_num); security_req_classify_flow(req, flowi6_to_flowi(fl6)); - dst = ip6_dst_lookup_flow(sk, fl6, final_p, false); + dst = ip6_dst_lookup_flow(sk, fl6, final_p); if (IS_ERR(dst)) return NULL; @@ -216,7 +216,7 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk, dst = __inet6_csk_dst_check(sk, np->dst_cookie); if (!dst) { - dst = ip6_dst_lookup_flow(sk, fl6, final_p, false); + dst = ip6_dst_lookup_flow(sk, fl6, final_p); if (!IS_ERR(dst)) __inet6_csk_dst_store(sk, dst, NULL, NULL); @@ -224,9 +224,8 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk, return dst; } -int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused) +int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl_unused) { - struct sock *sk = skb->sk; struct ipv6_pinfo *np = inet6_sk(sk); struct flowi6 fl6; struct dst_entry *dst; diff --git a/net/ipv6/ip6_checksum.c b/net/ipv6/ip6_checksum.c index 72d198b8e4d..9a4d7322fb2 100644 --- a/net/ipv6/ip6_checksum.c +++ b/net/ipv6/ip6_checksum.c @@ -75,23 +75,50 @@ int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto) return err; } - if (uh->check == 0) { - /* RFC 2460 section 8.1 says that we SHOULD log - this error. Well, it is reasonable. - */ - LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0\n"); - return 1; - } - if (skb->ip_summed == CHECKSUM_COMPLETE && - !csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, - skb->len, proto, skb->csum)) - skb->ip_summed = CHECKSUM_UNNECESSARY; + /* To support RFC 6936 (allow zero checksum in UDP/IPV6 for tunnels) + * we accept a checksum of zero here. When we find the socket + * for the UDP packet we'll check if that socket allows zero checksum + * for IPv6 (set by socket option). + */ + return skb_checksum_init_zero_check(skb, proto, uh->check, + ip6_compute_pseudo); +} +EXPORT_SYMBOL(udp6_csum_init); + +/* Function to set UDP checksum for an IPv6 UDP packet. This is intended + * for the simple case like when setting the checksum for a UDP tunnel. + */ +void udp6_set_csum(bool nocheck, struct sk_buff *skb, + const struct in6_addr *saddr, + const struct in6_addr *daddr, int len) +{ + struct udphdr *uh = udp_hdr(skb); + + if (nocheck) + uh->check = 0; + else if (skb_is_gso(skb)) + uh->check = ~udp_v6_check(len, saddr, daddr, 0); + else if (skb_dst(skb) && skb_dst(skb)->dev && + (skb_dst(skb)->dev->features & NETIF_F_IPV6_CSUM)) { - if (!skb_csum_unnecessary(skb)) - skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr, - &ipv6_hdr(skb)->daddr, - skb->len, proto, 0)); + BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL); - return 0; + skb->ip_summed = CHECKSUM_PARTIAL; + skb->csum_start = skb_transport_header(skb) - skb->head; + skb->csum_offset = offsetof(struct udphdr, check); + uh->check = ~udp_v6_check(len, saddr, daddr, 0); + } else { + __wsum csum; + + BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL); + + uh->check = 0; + csum = skb_checksum(skb, 0, len, 0); + uh->check = udp_v6_check(len, saddr, daddr, csum); + if (uh->check == 0) + uh->check = CSUM_MANGLED_0; + + skb->ip_summed = CHECKSUM_UNNECESSARY; + } } -EXPORT_SYMBOL(udp6_csum_init); +EXPORT_SYMBOL(udp6_set_csum); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 5550a8113a6..cb4459bd1d2 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -9,14 +9,12 @@ * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. - */ - -/* - * Changes: - * Yuji SEKIYA @USAGI: Support default route on router node; - * remove ip6_null_entry from the top of - * routing table. - * Ville Nuorvala: Fixed routing subtrees. + * + * Changes: + * Yuji SEKIYA @USAGI: Support default route on router node; + * remove ip6_null_entry from the top of + * routing table. + * Ville Nuorvala: Fixed routing subtrees. */ #define pr_fmt(fmt) "IPv6: " fmt @@ -46,10 +44,9 @@ #define RT6_TRACE(x...) do { ; } while (0) #endif -static struct kmem_cache * fib6_node_kmem __read_mostly; +static struct kmem_cache *fib6_node_kmem __read_mostly; -enum fib_walk_state_t -{ +enum fib_walk_state_t { #ifdef CONFIG_IPV6_SUBTREES FWS_S, #endif @@ -59,8 +56,7 @@ enum fib_walk_state_t FWS_U }; -struct fib6_cleaner_t -{ +struct fib6_cleaner_t { struct fib6_walker_t w; struct net *net; int (*func)(struct rt6_info *, void *arg); @@ -75,8 +71,7 @@ static DEFINE_RWLOCK(fib6_walker_lock); #define FWS_INIT FWS_L #endif -static void fib6_prune_clones(struct net *net, struct fib6_node *fn, - struct rt6_info *rt); +static void fib6_prune_clones(struct net *net, struct fib6_node *fn); static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn); static struct fib6_node *fib6_repair_tree(struct net *net, struct fib6_node *fn); static int fib6_walk(struct fib6_walker_t *w); @@ -138,7 +133,7 @@ static __inline__ __be32 addr_bit_set(const void *token, int fn_bit) const __be32 *addr = token; /* * Here, - * 1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f) + * 1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f) * is optimized version of * htonl(1 << ((~fn_bit)&0x1F)) * See include/asm-generic/bitops/le.h. @@ -147,7 +142,7 @@ static __inline__ __be32 addr_bit_set(const void *token, int fn_bit) addr[fn_bit >> 5]; } -static __inline__ struct fib6_node * node_alloc(void) +static __inline__ struct fib6_node *node_alloc(void) { struct fib6_node *fn; @@ -156,7 +151,7 @@ static __inline__ struct fib6_node * node_alloc(void) return fn; } -static __inline__ void node_free(struct fib6_node * fn) +static __inline__ void node_free(struct fib6_node *fn) { kmem_cache_free(fib6_node_kmem, fn); } @@ -292,7 +287,7 @@ static int fib6_dump_node(struct fib6_walker_t *w) static void fib6_dump_end(struct netlink_callback *cb) { - struct fib6_walker_t *w = (void*)cb->args[2]; + struct fib6_walker_t *w = (void *)cb->args[2]; if (w) { if (cb->args[4]) { @@ -302,7 +297,7 @@ static void fib6_dump_end(struct netlink_callback *cb) cb->args[2] = 0; kfree(w); } - cb->done = (void*)cb->args[3]; + cb->done = (void *)cb->args[3]; cb->args[1] = 3; } @@ -485,7 +480,7 @@ static struct fib6_node *fib6_add_1(struct fib6_node *root, fn->fn_sernum = sernum; dir = addr_bit_set(addr, fn->fn_bit); pn = fn; - fn = dir ? fn->right: fn->left; + fn = dir ? fn->right : fn->left; } while (fn); if (!allow_create) { @@ -638,12 +633,41 @@ static inline bool rt6_qualify_for_ecmp(struct rt6_info *rt) RTF_GATEWAY; } +static int fib6_commit_metrics(struct dst_entry *dst, + struct nlattr *mx, int mx_len) +{ + struct nlattr *nla; + int remaining; + u32 *mp; + + if (dst->flags & DST_HOST) { + mp = dst_metrics_write_ptr(dst); + } else { + mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL); + if (!mp) + return -ENOMEM; + dst_init_metrics(dst, mp, 0); + } + + nla_for_each_attr(nla, mx, mx_len, remaining) { + int type = nla_type(nla); + + if (type) { + if (type > RTAX_MAX) + return -EINVAL; + + mp[type - 1] = nla_get_u32(nla); + } + } + return 0; +} + /* * Insert routing information in a node. */ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, - struct nl_info *info) + struct nl_info *info, struct nlattr *mx, int mx_len) { struct rt6_info *iter = NULL; struct rt6_info **ins; @@ -653,6 +677,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, (info->nlh->nlmsg_flags & NLM_F_CREATE)); int found = 0; bool rt_can_ecmp = rt6_qualify_for_ecmp(rt); + int err; ins = &fn->leaf; @@ -751,6 +776,11 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, pr_warn("NLM_F_CREATE should be set when creating new route\n"); add: + if (mx) { + err = fib6_commit_metrics(&rt->dst, mx, mx_len); + if (err) + return err; + } rt->dst.rt6_next = iter; *ins = rt; rt->rt6i_node = fn; @@ -770,6 +800,11 @@ add: pr_warn("NLM_F_REPLACE set, but no existing node found!\n"); return -ENOENT; } + if (mx) { + err = fib6_commit_metrics(&rt->dst, mx, mx_len); + if (err) + return err; + } *ins = rt; rt->rt6i_node = fn; rt->dst.rt6_next = iter->dst.rt6_next; @@ -806,7 +841,8 @@ void fib6_force_start_gc(struct net *net) * with source addr info in sub-trees */ -int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info) +int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info, + struct nlattr *mx, int mx_len) { struct fib6_node *fn, *pn = NULL; int err = -ENOMEM; @@ -900,11 +936,11 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info) } #endif - err = fib6_add_rt2node(fn, rt, info); + err = fib6_add_rt2node(fn, rt, info, mx, mx_len); if (!err) { fib6_start_gc(info->nl_net, rt); if (!(rt->rt6i_flags & RTF_CACHE)) - fib6_prune_clones(info->nl_net, pn, rt); + fib6_prune_clones(info->nl_net, pn); } out: @@ -955,8 +991,8 @@ struct lookup_args { const struct in6_addr *addr; /* search key */ }; -static struct fib6_node * fib6_lookup_1(struct fib6_node *root, - struct lookup_args *args) +static struct fib6_node *fib6_lookup_1(struct fib6_node *root, + struct lookup_args *args) { struct fib6_node *fn; __be32 dir; @@ -1018,8 +1054,8 @@ backtrack: return NULL; } -struct fib6_node * fib6_lookup(struct fib6_node *root, const struct in6_addr *daddr, - const struct in6_addr *saddr) +struct fib6_node *fib6_lookup(struct fib6_node *root, const struct in6_addr *daddr, + const struct in6_addr *saddr) { struct fib6_node *fn; struct lookup_args args[] = { @@ -1051,9 +1087,9 @@ struct fib6_node * fib6_lookup(struct fib6_node *root, const struct in6_addr *da */ -static struct fib6_node * fib6_locate_1(struct fib6_node *root, - const struct in6_addr *addr, - int plen, int offset) +static struct fib6_node *fib6_locate_1(struct fib6_node *root, + const struct in6_addr *addr, + int plen, int offset) { struct fib6_node *fn; @@ -1081,9 +1117,9 @@ static struct fib6_node * fib6_locate_1(struct fib6_node *root, return NULL; } -struct fib6_node * fib6_locate(struct fib6_node *root, - const struct in6_addr *daddr, int dst_len, - const struct in6_addr *saddr, int src_len) +struct fib6_node *fib6_locate(struct fib6_node *root, + const struct in6_addr *daddr, int dst_len, + const struct in6_addr *saddr, int src_len) { struct fib6_node *fn; @@ -1151,8 +1187,10 @@ static struct fib6_node *fib6_repair_tree(struct net *net, children = 0; child = NULL; - if (fn->right) child = fn->right, children |= 1; - if (fn->left) child = fn->left, children |= 2; + if (fn->right) + child = fn->right, children |= 1; + if (fn->left) + child = fn->left, children |= 2; if (children == 3 || FIB6_SUBTREE(fn) #ifdef CONFIG_IPV6_SUBTREES @@ -1180,8 +1218,10 @@ static struct fib6_node *fib6_repair_tree(struct net *net, } else { WARN_ON(fn->fn_flags & RTN_ROOT); #endif - if (pn->right == fn) pn->right = child; - else if (pn->left == fn) pn->left = child; + if (pn->right == fn) + pn->right = child; + else if (pn->left == fn) + pn->left = child; #if RT6_DEBUG >= 2 else WARN_ON(1); @@ -1213,10 +1253,10 @@ static struct fib6_node *fib6_repair_tree(struct net *net, w->node = child; if (children&2) { RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state); - w->state = w->state>=FWS_R ? FWS_U : FWS_INIT; + w->state = w->state >= FWS_R ? FWS_U : FWS_INIT; } else { RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state); - w->state = w->state>=FWS_C ? FWS_U : FWS_INIT; + w->state = w->state >= FWS_C ? FWS_U : FWS_INIT; } } } @@ -1314,7 +1354,7 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info) struct rt6_info **rtp; #if RT6_DEBUG >= 2 - if (rt->dst.obsolete>0) { + if (rt->dst.obsolete > 0) { WARN_ON(fn != NULL); return -ENOENT; } @@ -1334,7 +1374,7 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info) pn = pn->parent; } #endif - fib6_prune_clones(info->nl_net, pn, rt); + fib6_prune_clones(info->nl_net, pn); } /* @@ -1418,7 +1458,7 @@ static int fib6_walk_continue(struct fib6_walker_t *w) if (w->skip) { w->skip--; - continue; + goto skip; } err = w->func(w); @@ -1428,6 +1468,7 @@ static int fib6_walk_continue(struct fib6_walker_t *w) w->count++; continue; } +skip: w->state = FWS_U; case FWS_U: if (fn == w->root) @@ -1530,7 +1571,7 @@ static void fib6_clean_tree(struct net *net, struct fib6_node *root, } void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg), - int prune, void *arg) + void *arg) { struct fib6_table *table; struct hlist_head *head; @@ -1542,7 +1583,7 @@ void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg), hlist_for_each_entry_rcu(table, head, tb6_hlist) { write_lock_bh(&table->tb6_lock); fib6_clean_tree(net, &table->tb6_root, - func, prune, arg); + func, 0, arg); write_unlock_bh(&table->tb6_lock); } } @@ -1559,10 +1600,9 @@ static int fib6_prune_clone(struct rt6_info *rt, void *arg) return 0; } -static void fib6_prune_clones(struct net *net, struct fib6_node *fn, - struct rt6_info *rt) +static void fib6_prune_clones(struct net *net, struct fib6_node *fn) { - fib6_clean_tree(net, fn, fib6_prune_clone, 1, rt); + fib6_clean_tree(net, fn, fib6_prune_clone, 1, NULL); } /* @@ -1636,7 +1676,7 @@ void fib6_run_gc(unsigned long expires, struct net *net, bool force) gc_args.more = icmp6_dst_gc(); - fib6_clean_all(net, fib6_age, 0, NULL); + fib6_clean_all(net, fib6_age, NULL); now = jiffies; net->ipv6.ip6_rt_last_gc = now; @@ -1707,7 +1747,7 @@ out_rt6_stats: kfree(net->ipv6.rt6_stats); out_timer: return -ENOMEM; - } +} static void fib6_net_exit(struct net *net) { diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index e7fb7106550..4052694c6f2 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -15,9 +15,7 @@ #include <linux/socket.h> #include <linux/net.h> #include <linux/netdevice.h> -#include <linux/if_arp.h> #include <linux/in6.h> -#include <linux/route.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/slab.h> @@ -28,12 +26,7 @@ #include <net/sock.h> #include <net/ipv6.h> -#include <net/ndisc.h> -#include <net/protocol.h> -#include <net/ip6_route.h> -#include <net/addrconf.h> #include <net/rawv6.h> -#include <net/icmp.h> #include <net/transp_v6.h> #include <asm/uaccess.h> @@ -210,7 +203,7 @@ static struct ip6_flowlabel *fl_intern(struct net *net, spin_lock_bh(&ip6_fl_lock); if (label == 0) { for (;;) { - fl->label = htonl(net_random())&IPV6_FLOWLABEL_MASK; + fl->label = htonl(prandom_u32())&IPV6_FLOWLABEL_MASK; if (fl->label) { lfl = __fl_lookup(net, fl->label); if (lfl == NULL) @@ -481,11 +474,22 @@ static inline void fl_link(struct ipv6_pinfo *np, struct ipv6_fl_socklist *sfl, spin_unlock_bh(&ip6_sk_fl_lock); } -int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq) +int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq, + int flags) { struct ipv6_pinfo *np = inet6_sk(sk); struct ipv6_fl_socklist *sfl; + if (flags & IPV6_FL_F_REMOTE) { + freq->flr_label = np->rcv_flowinfo & IPV6_FLOWLABEL_MASK; + return 0; + } + + if (np->repflow) { + freq->flr_label = np->flow_label; + return 0; + } + rcu_read_lock_bh(); for_each_sk_fl_rcu(np, sfl) { @@ -527,6 +531,15 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) switch (freq.flr_action) { case IPV6_FL_A_PUT: + if (freq.flr_flags & IPV6_FL_F_REFLECT) { + if (sk->sk_protocol != IPPROTO_TCP) + return -ENOPROTOOPT; + if (!np->repflow) + return -ESRCH; + np->flow_label = 0; + np->repflow = 0; + return 0; + } spin_lock_bh(&ip6_sk_fl_lock); for (sflp = &np->ipv6_fl_list; (sfl = rcu_dereference(*sflp))!=NULL; @@ -567,6 +580,20 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) return -ESRCH; case IPV6_FL_A_GET: + if (freq.flr_flags & IPV6_FL_F_REFLECT) { + struct net *net = sock_net(sk); + if (net->ipv6.sysctl.flowlabel_consistency) { + net_info_ratelimited("Can not set IPV6_FL_F_REFLECT if flowlabel_consistency sysctl is enable\n"); + return -EPERM; + } + + if (sk->sk_protocol != IPPROTO_TCP) + return -ENOPROTOOPT; + + np->repflow = 1; + return 0; + } + if (freq.flr_label & ~IPV6_FLOWLABEL_MASK) return -EINVAL; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 8acb28621f9..3873181ed85 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -61,9 +61,6 @@ static bool log_ecn_error = true; module_param(log_ecn_error, bool, 0644); MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); -#define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK) -#define IPV6_TCLASS_SHIFT 20 - #define HASH_SIZE_SHIFT 5 #define HASH_SIZE (1 << HASH_SIZE_SHIFT) @@ -75,6 +72,7 @@ struct ip6gre_net { }; static struct rtnl_link_ops ip6gre_link_ops __read_mostly; +static struct rtnl_link_ops ip6gre_tap_ops __read_mostly; static int ip6gre_tunnel_init(struct net_device *dev); static void ip6gre_tunnel_setup(struct net_device *dev); static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t); @@ -356,10 +354,10 @@ failed_free: static void ip6gre_tunnel_uninit(struct net_device *dev) { - struct net *net = dev_net(dev); - struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); + struct ip6_tnl *t = netdev_priv(dev); + struct ip6gre_net *ign = net_generic(t->net, ip6gre_net_id); - ip6gre_tunnel_unlink(ign, netdev_priv(dev)); + ip6gre_tunnel_unlink(ign, t); dev_put(dev); } @@ -470,17 +468,7 @@ static int ip6gre_rcv(struct sk_buff *skb) goto drop; if (flags&GRE_CSUM) { - switch (skb->ip_summed) { - case CHECKSUM_COMPLETE: - csum = csum_fold(skb->csum); - if (!csum) - break; - /* fall through */ - case CHECKSUM_NONE: - skb->csum = 0; - csum = __skb_checksum_complete(skb); - skb->ip_summed = CHECKSUM_COMPLETE; - } + csum = skb_checksum_simple_validate(skb); offset += 4; } if (flags&GRE_KEY) { @@ -499,7 +487,7 @@ static int ip6gre_rcv(struct sk_buff *skb) &ipv6h->saddr, &ipv6h->daddr, key, gre_proto); if (tunnel) { - struct pcpu_tstats *tstats; + struct pcpu_sw_netstats *tstats; if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) goto drop; @@ -614,8 +602,8 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, int encap_limit, __u32 *pmtu) { - struct net *net = dev_net(dev); struct ip6_tnl *tunnel = netdev_priv(dev); + struct net *net = tunnel->net; struct net_device *tdev; /* Device to other host */ struct ipv6hdr *ipv6h; /* Our new IP header */ unsigned int max_headroom = 0; /* The extra header space needed */ @@ -846,7 +834,7 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev) if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK); if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL) - fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_FLOWLABEL_MASK); + fl6.flowlabel |= ip6_flowlabel(ipv6h); if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) fl6.flowi6_mark = skb->mark; @@ -982,7 +970,7 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu) int strict = (ipv6_addr_type(&p->raddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL)); - struct rt6_info *rt = rt6_lookup(dev_net(dev), + struct rt6_info *rt = rt6_lookup(t->net, &p->raddr, &p->laddr, p->link, strict); @@ -1066,13 +1054,12 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev, int err = 0; struct ip6_tnl_parm2 p; struct __ip6_tnl_parm p1; - struct ip6_tnl *t; - struct net *net = dev_net(dev); + struct ip6_tnl *t = netdev_priv(dev); + struct net *net = t->net; struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); switch (cmd) { case SIOCGETTUNNEL: - t = NULL; if (dev == ign->fb_tunnel_dev) { if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { err = -EFAULT; @@ -1080,9 +1067,9 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev, } ip6gre_tnl_parm_from_user(&p1, &p); t = ip6gre_tunnel_locate(net, &p1, 0); + if (t == NULL) + t = netdev_priv(dev); } - if (t == NULL) - t = netdev_priv(dev); memset(&p, 0, sizeof(p)); ip6gre_tnl_parm_to_user(&p, &t->parms); if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) @@ -1245,7 +1232,6 @@ static void ip6gre_tunnel_setup(struct net_device *dev) dev->flags |= IFF_NOARP; dev->iflink = 0; dev->addr_len = sizeof(struct in6_addr); - dev->features |= NETIF_F_NETNS_LOCAL; dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; } @@ -1266,12 +1252,12 @@ static int ip6gre_tunnel_init(struct net_device *dev) if (ipv6_addr_any(&tunnel->parms.raddr)) dev->header_ops = &ip6gre_header_ops; - dev->tstats = alloc_percpu(struct pcpu_tstats); + dev->tstats = alloc_percpu(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; for_each_possible_cpu(i) { - struct pcpu_tstats *ip6gre_tunnel_stats; + struct pcpu_sw_netstats *ip6gre_tunnel_stats; ip6gre_tunnel_stats = per_cpu_ptr(dev->tstats, i); u64_stats_init(&ip6gre_tunnel_stats->syncp); } @@ -1300,11 +1286,17 @@ static struct inet6_protocol ip6gre_protocol __read_mostly = { .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, }; -static void ip6gre_destroy_tunnels(struct ip6gre_net *ign, - struct list_head *head) +static void ip6gre_destroy_tunnels(struct net *net, struct list_head *head) { + struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); + struct net_device *dev, *aux; int prio; + for_each_netdev_safe(net, dev, aux) + if (dev->rtnl_link_ops == &ip6gre_link_ops || + dev->rtnl_link_ops == &ip6gre_tap_ops) + unregister_netdevice_queue(dev, head); + for (prio = 0; prio < 4; prio++) { int h; for (h = 0; h < HASH_SIZE; h++) { @@ -1313,7 +1305,12 @@ static void ip6gre_destroy_tunnels(struct ip6gre_net *ign, t = rtnl_dereference(ign->tunnels[prio][h]); while (t != NULL) { - unregister_netdevice_queue(t->dev, head); + /* If dev is in the same netns, it has already + * been added to the list by the previous loop. + */ + if (!net_eq(dev_net(t->dev), net)) + unregister_netdevice_queue(t->dev, + head); t = rtnl_dereference(t->next); } } @@ -1332,6 +1329,11 @@ static int __net_init ip6gre_init_net(struct net *net) goto err_alloc_dev; } dev_net_set(ign->fb_tunnel_dev, net); + /* FB netdevice is special: we have one, and only one per netns. + * Allowing to move it to another netns is clearly unsafe. + */ + ign->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL; + ip6gre_fb_tunnel_init(ign->fb_tunnel_dev); ign->fb_tunnel_dev->rtnl_link_ops = &ip6gre_link_ops; @@ -1352,12 +1354,10 @@ err_alloc_dev: static void __net_exit ip6gre_exit_net(struct net *net) { - struct ip6gre_net *ign; LIST_HEAD(list); - ign = net_generic(net, ip6gre_net_id); rtnl_lock(); - ip6gre_destroy_tunnels(ign, &list); + ip6gre_destroy_tunnels(net, &list); unregister_netdevice_many(&list); rtnl_unlock(); } @@ -1457,7 +1457,6 @@ static void ip6gre_netlink_parms(struct nlattr *data[], static int ip6gre_tap_init(struct net_device *dev) { struct ip6_tnl *tunnel; - int i; tunnel = netdev_priv(dev); @@ -1467,16 +1466,10 @@ static int ip6gre_tap_init(struct net_device *dev) ip6gre_tnl_link_config(tunnel, 1); - dev->tstats = alloc_percpu(struct pcpu_tstats); + dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; - for_each_possible_cpu(i) { - struct pcpu_tstats *ip6gre_tap_stats; - ip6gre_tap_stats = per_cpu_ptr(dev->tstats, i); - u64_stats_init(&ip6gre_tap_stats->syncp); - } - return 0; } @@ -1541,15 +1534,14 @@ out: static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { - struct ip6_tnl *t, *nt; - struct net *net = dev_net(dev); + struct ip6_tnl *t, *nt = netdev_priv(dev); + struct net *net = nt->net; struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); struct __ip6_tnl_parm p; if (dev == ign->fb_tunnel_dev) return -EINVAL; - nt = netdev_priv(dev); ip6gre_netlink_parms(data, &p); t = ip6gre_tunnel_locate(net, &p, 0); @@ -1569,6 +1561,15 @@ static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[], return 0; } +static void ip6gre_dellink(struct net_device *dev, struct list_head *head) +{ + struct net *net = dev_net(dev); + struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); + + if (dev != ign->fb_tunnel_dev) + unregister_netdevice_queue(dev, head); +} + static size_t ip6gre_get_size(const struct net_device *dev) { return @@ -1646,6 +1647,7 @@ static struct rtnl_link_ops ip6gre_link_ops __read_mostly = { .validate = ip6gre_tunnel_validate, .newlink = ip6gre_newlink, .changelink = ip6gre_changelink, + .dellink = ip6gre_dellink, .get_size = ip6gre_get_size, .fill_info = ip6gre_fill_info, }; diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 302d6fb1ff2..51d54dc376f 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -49,7 +49,7 @@ int ip6_rcv_finish(struct sk_buff *skb) { - if (sysctl_ip_early_demux && !skb_dst(skb)) { + if (sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) { const struct inet6_protocol *ipprot; ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]); diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 4b851692b1f..65eda2a8af4 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -89,7 +89,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, unsigned int unfrag_ip6hlen; u8 *prevhdr; int offset = 0; - bool tunnel; + bool encap, udpfrag; int nhoff; if (unlikely(skb_shinfo(skb)->gso_type & @@ -97,9 +97,11 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, SKB_GSO_DODGY | SKB_GSO_TCP_ECN | SKB_GSO_GRE | + SKB_GSO_GRE_CSUM | SKB_GSO_IPIP | SKB_GSO_SIT | SKB_GSO_UDP_TUNNEL | + SKB_GSO_UDP_TUNNEL_CSUM | SKB_GSO_MPLS | SKB_GSO_TCPV6 | 0))) @@ -110,8 +112,8 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h)))) goto out; - tunnel = SKB_GSO_CB(skb)->encap_level > 0; - if (tunnel) + encap = SKB_GSO_CB(skb)->encap_level > 0; + if (encap) features = skb->dev->hw_enc_features & netif_skb_features(skb); SKB_GSO_CB(skb)->encap_level += sizeof(*ipv6h); @@ -121,6 +123,12 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, proto = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr); + if (skb->encapsulation && + skb_shinfo(skb)->gso_type & (SKB_GSO_SIT|SKB_GSO_IPIP)) + udpfrag = proto == IPPROTO_UDP && encap; + else + udpfrag = proto == IPPROTO_UDP && !skb->encapsulation; + ops = rcu_dereference(inet6_offloads[proto]); if (likely(ops && ops->callbacks.gso_segment)) { skb_reset_transport_header(skb); @@ -133,13 +141,9 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, for (skb = segs; skb; skb = skb->next) { ipv6h = (struct ipv6hdr *)(skb_mac_header(skb) + nhoff); ipv6h->payload_len = htons(skb->len - nhoff - sizeof(*ipv6h)); - if (tunnel) { - skb_reset_inner_headers(skb); - skb->encapsulation = 1; - } skb->network_header = (u8 *)ipv6h - skb->head; - if (!tunnel && proto == IPPROTO_UDP) { + if (udpfrag) { unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr); fptr = (struct frag_hdr *)((u8 *)ipv6h + unfrag_ip6hlen); fptr->frag_off = htons(offset); @@ -148,12 +152,40 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, offset += (ntohs(ipv6h->payload_len) - sizeof(struct frag_hdr)); } + if (encap) + skb_reset_inner_headers(skb); } out: return segs; } +/* Return the total length of all the extension hdrs, following the same + * logic in ipv6_gso_pull_exthdrs() when parsing ext-hdrs. + */ +static int ipv6_exthdrs_len(struct ipv6hdr *iph, + const struct net_offload **opps) +{ + struct ipv6_opt_hdr *opth = (void *)iph; + int len = 0, proto, optlen = sizeof(*iph); + + proto = iph->nexthdr; + for (;;) { + if (proto != NEXTHDR_HOP) { + *opps = rcu_dereference(inet6_offloads[proto]); + if (unlikely(!(*opps))) + break; + if (!((*opps)->flags & INET6_PROTO_GSO_EXTHDR)) + break; + } + opth = (void *)opth + optlen; + optlen = ipv6_optlen(opth); + len += optlen; + proto = opth->nexthdr; + } + return len; +} + static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, struct sk_buff *skb) { @@ -164,9 +196,8 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, unsigned int nlen; unsigned int hlen; unsigned int off; - int flush = 1; + u16 flush = 1; int proto; - __wsum csum; off = skb_gro_offset(skb); hlen = off + sizeof(*iph); @@ -177,6 +208,7 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, goto out; } + skb_set_network_header(skb, off); skb_gro_pull(skb, sizeof(*iph)); skb_set_transport_header(skb, skb_gro_offset(skb)); @@ -211,12 +243,16 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, if (!NAPI_GRO_CB(p)->same_flow) continue; - iph2 = ipv6_hdr(p); + iph2 = (struct ipv6hdr *)(p->data + off); first_word = *(__be32 *)iph ^ *(__be32 *)iph2 ; - /* All fields must match except length and Traffic Class. */ - if (nlen != skb_network_header_len(p) || - (first_word & htonl(0xF00FFFFF)) || + /* All fields must match except length and Traffic Class. + * XXX skbs on the gro_list have all been parsed and pulled + * already so we don't need to compare nlen + * (nlen != (sizeof(*iph2) + ipv6_exthdrs_len(iph2, &ops))) + * memcmp() alone below is suffcient, right? + */ + if ((first_word & htonl(0xF00FFFFF)) || memcmp(&iph->nexthdr, &iph2->nexthdr, nlen - offsetof(struct ipv6hdr, nexthdr))) { NAPI_GRO_CB(p)->same_flow = 0; @@ -229,13 +265,10 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head, NAPI_GRO_CB(skb)->flush |= flush; - csum = skb->csum; - skb_postpull_rcsum(skb, iph, skb_network_header_len(skb)); + skb_gro_postpull_rcsum(skb, iph, nlen); pp = ops->callbacks.gro_receive(head, skb); - skb->csum = csum; - out_unlock: rcu_read_unlock(); @@ -245,21 +278,21 @@ out: return pp; } -static int ipv6_gro_complete(struct sk_buff *skb) +static int ipv6_gro_complete(struct sk_buff *skb, int nhoff) { const struct net_offload *ops; - struct ipv6hdr *iph = ipv6_hdr(skb); + struct ipv6hdr *iph = (struct ipv6hdr *)(skb->data + nhoff); int err = -ENOSYS; - iph->payload_len = htons(skb->len - skb_network_offset(skb) - - sizeof(*iph)); + iph->payload_len = htons(skb->len - nhoff - sizeof(*iph)); rcu_read_lock(); - ops = rcu_dereference(inet6_offloads[NAPI_GRO_CB(skb)->proto]); + + nhoff += sizeof(*iph) + ipv6_exthdrs_len(iph, &ops); if (WARN_ON(!ops || !ops->callbacks.gro_complete)) goto out_unlock; - err = ops->callbacks.gro_complete(skb); + err = ops->callbacks.gro_complete(skb, nhoff); out_unlock: rcu_read_unlock(); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 9a311cc7967..45702b8cd14 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -132,7 +132,7 @@ static int ip6_finish_output(struct sk_buff *skb) return ip6_finish_output2(skb); } -int ip6_output(struct sk_buff *skb) +int ip6_output(struct sock *sk, struct sk_buff *skb) { struct net_device *dev = skb_dst(skb)->dev; struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); @@ -219,7 +219,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, skb->mark = sk->sk_mark; mtu = dst_mtu(dst); - if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) { + if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) { IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_OUT, skb->len); return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, @@ -321,6 +321,45 @@ static inline int ip6_forward_finish(struct sk_buff *skb) return dst_output(skb); } +static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst) +{ + unsigned int mtu; + struct inet6_dev *idev; + + if (dst_metric_locked(dst, RTAX_MTU)) { + mtu = dst_metric_raw(dst, RTAX_MTU); + if (mtu) + return mtu; + } + + mtu = IPV6_MIN_MTU; + rcu_read_lock(); + idev = __in6_dev_get(dst->dev); + if (idev) + mtu = idev->cnf.mtu6; + rcu_read_unlock(); + + return mtu; +} + +static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) +{ + if (skb->len <= mtu) + return false; + + /* ipv6 conntrack defrag sets max_frag_size + ignore_df */ + if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu) + return true; + + if (skb->ignore_df) + return false; + + if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu) + return false; + + return true; +} + int ip6_forward(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); @@ -332,6 +371,9 @@ int ip6_forward(struct sk_buff *skb) if (net->ipv6.devconf_all->forwarding == 0) goto error; + if (skb->pkt_type != PACKET_HOST) + goto drop; + if (skb_warn_if_lro(skb)) goto drop; @@ -341,9 +383,6 @@ int ip6_forward(struct sk_buff *skb) goto drop; } - if (skb->pkt_type != PACKET_HOST) - goto drop; - skb_forward_csum(skb); /* @@ -441,12 +480,11 @@ int ip6_forward(struct sk_buff *skb) } } - mtu = dst_mtu(dst); + mtu = ip6_dst_mtu_forward(dst); if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; - if ((!skb->local_df && skb->len > mtu && !skb_is_gso(skb)) || - (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)) { + if (ip6_pkt_too_big(skb, mtu)) { /* Again, force OUTPUT device used as source address */ skb->dev = dst->dev; icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); @@ -496,12 +534,23 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) to->tc_index = from->tc_index; #endif nf_copy(to, from); -#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) - to->nf_trace = from->nf_trace; -#endif skb_copy_secmark(to, from); } +static void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt) +{ + static u32 ip6_idents_hashrnd __read_mostly; + u32 hash, id; + + net_get_random_once(&ip6_idents_hashrnd, sizeof(ip6_idents_hashrnd)); + + hash = __ipv6_addr_jhash(&rt->rt6i_dst.addr, ip6_idents_hashrnd); + hash = __ipv6_addr_jhash(&rt->rt6i_src.addr, hash); + + id = ip_idents_reserve(hash, 1); + fhdr->identification = htonl(id); +} + int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) { struct sk_buff *frag; @@ -524,7 +573,7 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) /* We must not fragment if the socket is set to force MTU discovery * or if the skb it not generated by a local socket. */ - if (unlikely(!skb->local_df && skb->len > mtu) || + if (unlikely(!skb->ignore_df && skb->len > mtu) || (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)) { if (skb->sk && dst_allfrag(skb_dst(skb))) @@ -941,7 +990,6 @@ EXPORT_SYMBOL_GPL(ip6_dst_lookup); * @sk: socket which provides route info * @fl6: flow to lookup * @final_dst: final destination address for ipsec lookup - * @can_sleep: we are in a sleepable context * * This function performs a route lookup on the given flow. * @@ -949,8 +997,7 @@ EXPORT_SYMBOL_GPL(ip6_dst_lookup); * error code. */ struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, - const struct in6_addr *final_dst, - bool can_sleep) + const struct in6_addr *final_dst) { struct dst_entry *dst = NULL; int err; @@ -960,8 +1007,6 @@ struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, return ERR_PTR(err); if (final_dst) fl6->daddr = *final_dst; - if (can_sleep) - fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP; return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); } @@ -972,7 +1017,6 @@ EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow); * @sk: socket which provides the dst cache and route info * @fl6: flow to lookup * @final_dst: final destination address for ipsec lookup - * @can_sleep: we are in a sleepable context * * This function performs a route lookup on the given flow with the * possibility of using the cached route in the socket if it is valid. @@ -983,8 +1027,7 @@ EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow); * error code. */ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, - const struct in6_addr *final_dst, - bool can_sleep) + const struct in6_addr *final_dst) { struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie); int err; @@ -996,8 +1039,6 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, return ERR_PTR(err); if (final_dst) fl6->daddr = *final_dst; - if (can_sleep) - fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP; return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); } @@ -1078,21 +1119,19 @@ static void ip6_append_data_mtu(unsigned int *mtu, unsigned int fragheaderlen, struct sk_buff *skb, struct rt6_info *rt, - bool pmtuprobe) + unsigned int orig_mtu) { if (!(rt->dst.flags & DST_XFRM_TUNNEL)) { if (skb == NULL) { /* first fragment, reserve header_len */ - *mtu = *mtu - rt->dst.header_len; + *mtu = orig_mtu - rt->dst.header_len; } else { /* * this fragment is not first, the headers * space is regarded as data space. */ - *mtu = min(*mtu, pmtuprobe ? - rt->dst.dev->mtu : - dst_mtu(rt->dst.path)); + *mtu = orig_mtu; } *maxfraglen = ((*mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr); @@ -1109,7 +1148,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, struct ipv6_pinfo *np = inet6_sk(sk); struct inet_cork *cork; struct sk_buff *skb, *skb_prev = NULL; - unsigned int maxfraglen, fragheaderlen, mtu; + unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu; int exthdrlen; int dst_exthdrlen; int hh_len; @@ -1165,10 +1204,10 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, np->cork.hop_limit = hlimit; np->cork.tclass = tclass; if (rt->dst.flags & DST_XFRM_TUNNEL) - mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ? + mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? rt->dst.dev->mtu : dst_mtu(&rt->dst); else - mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ? + mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ? rt->dst.dev->mtu : dst_mtu(rt->dst.path); if (np->frag_size < mtu) { if (np->frag_size) @@ -1191,16 +1230,43 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, dst_exthdrlen = 0; mtu = cork->fragsize; } + orig_mtu = mtu; hh_len = LL_RESERVED_SPACE(rt->dst.dev); fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len + (opt ? opt->opt_nflen : 0); - maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr); + maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - + sizeof(struct frag_hdr); if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) { - if (cork->length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) { - ipv6_local_error(sk, EMSGSIZE, fl6, mtu-exthdrlen); + unsigned int maxnonfragsize, headersize; + + headersize = sizeof(struct ipv6hdr) + + (opt ? opt->opt_flen + opt->opt_nflen : 0) + + (dst_allfrag(&rt->dst) ? + sizeof(struct frag_hdr) : 0) + + rt->rt6i_nfheader_len; + + if (ip6_sk_ignore_df(sk)) + maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN; + else + maxnonfragsize = mtu; + + /* dontfrag active */ + if ((cork->length + length > mtu - headersize) && dontfrag && + (sk->sk_protocol == IPPROTO_UDP || + sk->sk_protocol == IPPROTO_RAW)) { + ipv6_local_rxpmtu(sk, fl6, mtu - headersize + + sizeof(struct ipv6hdr)); + goto emsgsize; + } + + if (cork->length + length > maxnonfragsize - headersize) { +emsgsize: + ipv6_local_error(sk, EMSGSIZE, fl6, + mtu - headersize + + sizeof(struct ipv6hdr)); return -EMSGSIZE; } } @@ -1225,12 +1291,6 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, * --yoshfuji */ - if ((length > mtu) && dontfrag && (sk->sk_protocol == IPPROTO_UDP || - sk->sk_protocol == IPPROTO_RAW)) { - ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen); - return -EMSGSIZE; - } - skb = skb_peek_tail(&sk->sk_write_queue); cork->length += length; if (((length > mtu) || @@ -1270,8 +1330,7 @@ alloc_new_skb: if (skb == NULL || skb_prev == NULL) ip6_append_data_mtu(&mtu, &maxfraglen, fragheaderlen, skb, rt, - np->pmtudisc == - IPV6_PMTUDISC_PROBE); + orig_mtu); skb_prev = skb; @@ -1499,8 +1558,7 @@ int ip6_push_pending_frames(struct sock *sk) } /* Allow local fragmentation. */ - if (np->pmtudisc < IPV6_PMTUDISC_DO) - skb->local_df = 1; + skb->ignore_df = ip6_sk_ignore_df(sk); *final_dst = fl6->daddr; __skb_pull(skb, skb_network_header_len(skb)); @@ -1527,8 +1585,8 @@ int ip6_push_pending_frames(struct sock *sk) if (proto == IPPROTO_ICMPV6) { struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); - ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type); - ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS); + ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type); + ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); } err = ip6_local_out(skb); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index d6062325db0..afa08245836 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -29,7 +29,6 @@ #include <linux/if.h> #include <linux/in.h> #include <linux/ip.h> -#include <linux/if_tunnel.h> #include <linux/net.h> #include <linux/in6.h> #include <linux/netdevice.h> @@ -62,6 +61,7 @@ MODULE_AUTHOR("Ville Nuorvala"); MODULE_DESCRIPTION("IPv6 tunneling device"); MODULE_LICENSE("GPL"); +MODULE_ALIAS_RTNL_LINK("ip6tnl"); MODULE_ALIAS_NETDEV("ip6tnl0"); #ifdef IP6_TNL_DEBUG @@ -70,9 +70,6 @@ MODULE_ALIAS_NETDEV("ip6tnl0"); #define IP6_TNL_TRACE(x...) do {;} while(0) #endif -#define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK) -#define IPV6_TCLASS_SHIFT 20 - #define HASH_SIZE_SHIFT 5 #define HASH_SIZE (1 << HASH_SIZE_SHIFT) @@ -103,16 +100,26 @@ struct ip6_tnl_net { static struct net_device_stats *ip6_get_stats(struct net_device *dev) { - struct pcpu_tstats sum = { 0 }; + struct pcpu_sw_netstats tmp, sum = { 0 }; int i; for_each_possible_cpu(i) { - const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); - - sum.rx_packets += tstats->rx_packets; - sum.rx_bytes += tstats->rx_bytes; - sum.tx_packets += tstats->tx_packets; - sum.tx_bytes += tstats->tx_bytes; + unsigned int start; + const struct pcpu_sw_netstats *tstats = + per_cpu_ptr(dev->tstats, i); + + do { + start = u64_stats_fetch_begin_irq(&tstats->syncp); + tmp.rx_packets = tstats->rx_packets; + tmp.rx_bytes = tstats->rx_bytes; + tmp.tx_packets = tstats->tx_packets; + tmp.tx_bytes = tstats->tx_bytes; + } while (u64_stats_fetch_retry_irq(&tstats->syncp, start)); + + sum.rx_packets += tmp.rx_packets; + sum.rx_bytes += tmp.rx_bytes; + sum.tx_packets += tmp.tx_packets; + sum.tx_bytes += tmp.tx_bytes; } dev->stats.rx_packets = sum.rx_packets; dev->stats.rx_bytes = sum.rx_bytes; @@ -785,7 +792,7 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol, if ((t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, &ipv6h->daddr)) != NULL) { - struct pcpu_tstats *tstats; + struct pcpu_sw_netstats *tstats; if (t->parms.proto != ipproto && t->parms.proto != 0) { rcu_read_unlock(); @@ -824,8 +831,10 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol, } tstats = this_cpu_ptr(t->dev->tstats); + u64_stats_update_begin(&tstats->syncp); tstats->rx_packets++; tstats->rx_bytes += skb->len; + u64_stats_update_end(&tstats->syncp); netif_rx(skb); @@ -1131,7 +1140,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK); if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL) - fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_FLOWLABEL_MASK); + fl6.flowlabel |= ip6_flowlabel(ipv6h); if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) fl6.flowi6_mark = skb->mark; @@ -1332,8 +1341,8 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) int err = 0; struct ip6_tnl_parm p; struct __ip6_tnl_parm p1; - struct ip6_tnl *t = NULL; - struct net *net = dev_net(dev); + struct ip6_tnl *t = netdev_priv(dev); + struct net *net = t->net; struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); switch (cmd) { @@ -1345,11 +1354,11 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) } ip6_tnl_parm_from_user(&p1, &p); t = ip6_tnl_locate(net, &p1, 0); + if (t == NULL) + t = netdev_priv(dev); } else { memset(&p, 0, sizeof(p)); } - if (t == NULL) - t = netdev_priv(dev); ip6_tnl_parm_to_user(&p, &t->parms); if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof (p))) { err = -EFAULT; @@ -1494,19 +1503,12 @@ static inline int ip6_tnl_dev_init_gen(struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); - int i; t->dev = dev; t->net = dev_net(dev); - dev->tstats = alloc_percpu(struct pcpu_tstats); + dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; - - for_each_possible_cpu(i) { - struct pcpu_tstats *ip6_tnl_stats; - ip6_tnl_stats = per_cpu_ptr(dev->tstats, i); - u64_stats_init(&ip6_tnl_stats->syncp); - } return 0; } @@ -1556,7 +1558,7 @@ static int ip6_tnl_validate(struct nlattr *tb[], struct nlattr *data[]) { u8 proto; - if (!data) + if (!data || !data[IFLA_IPTUN_PROTO]) return 0; proto = nla_get_u8(data[IFLA_IPTUN_PROTO]); diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index ed94ba61dda..9aaa6bb229e 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -24,7 +24,6 @@ #include <linux/if.h> #include <linux/in.h> #include <linux/ip.h> -#include <linux/if_tunnel.h> #include <linux/net.h> #include <linux/in6.h> #include <linux/netdevice.h> @@ -75,26 +74,6 @@ struct vti6_net { struct ip6_tnl __rcu **tnls[2]; }; -static struct net_device_stats *vti6_get_stats(struct net_device *dev) -{ - struct pcpu_tstats sum = { 0 }; - int i; - - for_each_possible_cpu(i) { - const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); - - sum.rx_packets += tstats->rx_packets; - sum.rx_bytes += tstats->rx_bytes; - sum.tx_packets += tstats->tx_packets; - sum.tx_bytes += tstats->tx_bytes; - } - dev->stats.rx_packets = sum.rx_packets; - dev->stats.rx_bytes = sum.rx_bytes; - dev->stats.tx_packets = sum.tx_packets; - dev->stats.tx_bytes = sum.tx_bytes; - return &dev->stats; -} - #define for_each_vti6_tunnel_rcu(start) \ for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) @@ -299,7 +278,6 @@ static void vti6_dev_uninit(struct net_device *dev) RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL); else vti6_tnl_unlink(ip6n, t); - ip6_tnl_dst_reset(t); dev_put(dev); } @@ -309,11 +287,8 @@ static int vti6_rcv(struct sk_buff *skb) const struct ipv6hdr *ipv6h = ipv6_hdr(skb); rcu_read_lock(); - if ((t = vti6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, &ipv6h->daddr)) != NULL) { - struct pcpu_tstats *tstats; - if (t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) { rcu_read_unlock(); goto discard; @@ -330,25 +305,58 @@ static int vti6_rcv(struct sk_buff *skb) goto discard; } - tstats = this_cpu_ptr(t->dev->tstats); - tstats->rx_packets++; - tstats->rx_bytes += skb->len; - - skb->mark = 0; - secpath_reset(skb); - skb->dev = t->dev; + XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = t; + skb->mark = be32_to_cpu(t->parms.i_key); rcu_read_unlock(); - return 0; + + return xfrm6_rcv(skb); } rcu_read_unlock(); - return 1; - + return -EINVAL; discard: kfree_skb(skb); return 0; } +static int vti6_rcv_cb(struct sk_buff *skb, int err) +{ + unsigned short family; + struct net_device *dev; + struct pcpu_sw_netstats *tstats; + struct xfrm_state *x; + struct ip6_tnl *t = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6; + + if (!t) + return 1; + + dev = t->dev; + + if (err) { + dev->stats.rx_errors++; + dev->stats.rx_dropped++; + + return 0; + } + + x = xfrm_input_state(skb); + family = x->inner_mode->afinfo->family; + + if (!xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family)) + return -EPERM; + + skb_scrub_packet(skb, !net_eq(t->net, dev_net(skb->dev))); + skb->dev = dev; + + tstats = this_cpu_ptr(dev->tstats); + u64_stats_update_begin(&tstats->syncp); + tstats->rx_packets++; + tstats->rx_bytes += skb->len; + u64_stats_update_end(&tstats->syncp); + + return 0; +} + /** * vti6_addr_conflict - compare packet addresses to tunnel's own * @t: the outgoing tunnel device @@ -368,44 +376,56 @@ vti6_addr_conflict(const struct ip6_tnl *t, const struct ipv6hdr *hdr) return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr); } +static bool vti6_state_check(const struct xfrm_state *x, + const struct in6_addr *dst, + const struct in6_addr *src) +{ + xfrm_address_t *daddr = (xfrm_address_t *)dst; + xfrm_address_t *saddr = (xfrm_address_t *)src; + + /* if there is no transform then this tunnel is not functional. + * Or if the xfrm is not mode tunnel. + */ + if (!x || x->props.mode != XFRM_MODE_TUNNEL || + x->props.family != AF_INET6) + return false; + + if (ipv6_addr_any(dst)) + return xfrm_addr_equal(saddr, &x->props.saddr, AF_INET6); + + if (!xfrm_state_addr_check(x, daddr, saddr, AF_INET6)) + return false; + + return true; +} + /** * vti6_xmit - send a packet * @skb: the outgoing socket buffer * @dev: the outgoing tunnel device + * @fl: the flow informations for the xfrm_lookup **/ -static int vti6_xmit(struct sk_buff *skb, struct net_device *dev) +static int +vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) { - struct net *net = dev_net(dev); struct ip6_tnl *t = netdev_priv(dev); struct net_device_stats *stats = &t->dev->stats; - struct dst_entry *dst = NULL, *ndst = NULL; - struct flowi6 fl6; - struct ipv6hdr *ipv6h = ipv6_hdr(skb); + struct dst_entry *dst = skb_dst(skb); struct net_device *tdev; int err = -1; - if ((t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) || - !ip6_tnl_xmit_ctl(t) || vti6_addr_conflict(t, ipv6h)) - return err; - - dst = ip6_tnl_dst_check(t); - if (!dst) { - memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); - - ndst = ip6_route_output(net, NULL, &fl6); + if (!dst) + goto tx_err_link_failure; - if (ndst->error) - goto tx_err_link_failure; - ndst = xfrm_lookup(net, ndst, flowi6_to_flowi(&fl6), NULL, 0); - if (IS_ERR(ndst)) { - err = PTR_ERR(ndst); - ndst = NULL; - goto tx_err_link_failure; - } - dst = ndst; + dst_hold(dst); + dst = xfrm_lookup(t->net, dst, fl, NULL, 0); + if (IS_ERR(dst)) { + err = PTR_ERR(dst); + dst = NULL; + goto tx_err_link_failure; } - if (!dst->xfrm || dst->xfrm->props.mode != XFRM_MODE_TUNNEL) + if (!vti6_state_check(dst->xfrm, &t->parms.raddr, &t->parms.laddr)) goto tx_err_link_failure; tdev = dst->dev; @@ -417,14 +437,21 @@ static int vti6_xmit(struct sk_buff *skb, struct net_device *dev) goto tx_err_dst_release; } - - skb_dst_drop(skb); - skb_dst_set_noref(skb, dst); - - ip6tunnel_xmit(skb, dev); - if (ndst) { - dev->mtu = dst_mtu(ndst); - ip6_tnl_dst_store(t, ndst); + skb_scrub_packet(skb, !net_eq(t->net, dev_net(dev))); + skb_dst_set(skb, dst); + skb->dev = skb_dst(skb)->dev; + + err = dst_output(skb); + if (net_xmit_eval(err) == 0) { + struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats); + + u64_stats_update_begin(&tstats->syncp); + tstats->tx_bytes += skb->len; + tstats->tx_packets++; + u64_stats_update_end(&tstats->syncp); + } else { + stats->tx_errors++; + stats->tx_aborted_errors++; } return 0; @@ -432,7 +459,7 @@ tx_err_link_failure: stats->tx_carrier_errors++; dst_link_failure(skb); tx_err_dst_release: - dst_release(ndst); + dst_release(dst); return err; } @@ -441,16 +468,33 @@ vti6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); struct net_device_stats *stats = &t->dev->stats; + struct ipv6hdr *ipv6h; + struct flowi fl; int ret; + memset(&fl, 0, sizeof(fl)); + skb->mark = be32_to_cpu(t->parms.o_key); + switch (skb->protocol) { case htons(ETH_P_IPV6): - ret = vti6_xmit(skb, dev); + ipv6h = ipv6_hdr(skb); + + if ((t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) || + !ip6_tnl_xmit_ctl(t) || vti6_addr_conflict(t, ipv6h)) + goto tx_err; + + xfrm_decode_session(skb, &fl, AF_INET6); + memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); + break; + case htons(ETH_P_IP): + xfrm_decode_session(skb, &fl, AF_INET); + memset(IPCB(skb), 0, sizeof(*IPCB(skb))); break; default: goto tx_err; } + ret = vti6_xmit(skb, dev, &fl); if (ret < 0) goto tx_err; @@ -463,24 +507,69 @@ tx_err: return NETDEV_TX_OK; } +static int vti6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, + u8 type, u8 code, int offset, __be32 info) +{ + __be32 spi; + __u32 mark; + struct xfrm_state *x; + struct ip6_tnl *t; + struct ip_esp_hdr *esph; + struct ip_auth_hdr *ah; + struct ip_comp_hdr *ipch; + struct net *net = dev_net(skb->dev); + const struct ipv6hdr *iph = (const struct ipv6hdr *)skb->data; + int protocol = iph->nexthdr; + + t = vti6_tnl_lookup(dev_net(skb->dev), &iph->daddr, &iph->saddr); + if (!t) + return -1; + + mark = be32_to_cpu(t->parms.o_key); + + switch (protocol) { + case IPPROTO_ESP: + esph = (struct ip_esp_hdr *)(skb->data + offset); + spi = esph->spi; + break; + case IPPROTO_AH: + ah = (struct ip_auth_hdr *)(skb->data + offset); + spi = ah->spi; + break; + case IPPROTO_COMP: + ipch = (struct ip_comp_hdr *)(skb->data + offset); + spi = htonl(ntohs(ipch->cpi)); + break; + default: + return 0; + } + + if (type != ICMPV6_PKT_TOOBIG && + type != NDISC_REDIRECT) + return 0; + + x = xfrm_state_lookup(net, mark, (const xfrm_address_t *)&iph->daddr, + spi, protocol, AF_INET6); + if (!x) + return 0; + + if (type == NDISC_REDIRECT) + ip6_redirect(skb, net, skb->dev->ifindex, 0); + else + ip6_update_pmtu(skb, net, info, 0, 0); + xfrm_state_put(x); + + return 0; +} + static void vti6_link_config(struct ip6_tnl *t) { - struct dst_entry *dst; struct net_device *dev = t->dev; struct __ip6_tnl_parm *p = &t->parms; - struct flowi6 *fl6 = &t->fl.u.ip6; memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr)); memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr)); - /* Set up flowi template */ - fl6->saddr = p->laddr; - fl6->daddr = p->raddr; - fl6->flowi6_oif = p->link; - fl6->flowi6_mark = be32_to_cpu(p->i_key); - fl6->flowi6_proto = p->proto; - fl6->flowlabel = 0; - p->flags &= ~(IP6_TNL_F_CAP_XMIT | IP6_TNL_F_CAP_RCV | IP6_TNL_F_CAP_PER_PACKET); p->flags |= ip6_tnl_get_cap(t, &p->laddr, &p->raddr); @@ -491,28 +580,6 @@ static void vti6_link_config(struct ip6_tnl *t) dev->flags &= ~IFF_POINTOPOINT; dev->iflink = p->link; - - if (p->flags & IP6_TNL_F_CAP_XMIT) { - - dst = ip6_route_output(dev_net(dev), NULL, fl6); - if (dst->error) - return; - - dst = xfrm_lookup(dev_net(dev), dst, flowi6_to_flowi(fl6), - NULL, 0); - if (IS_ERR(dst)) - return; - - if (dst->dev) { - dev->hard_header_len = dst->dev->hard_header_len; - - dev->mtu = dst_mtu(dst); - - if (dev->mtu < IPV6_MIN_MTU) - dev->mtu = IPV6_MIN_MTU; - } - dst_release(dst); - } } /** @@ -716,7 +783,7 @@ static const struct net_device_ops vti6_netdev_ops = { .ndo_start_xmit = vti6_tnl_xmit, .ndo_do_ioctl = vti6_ioctl, .ndo_change_mtu = vti6_change_mtu, - .ndo_get_stats = vti6_get_stats, + .ndo_get_stats64 = ip_tunnel_get_stats64, }; /** @@ -728,18 +795,14 @@ static const struct net_device_ops vti6_netdev_ops = { **/ static void vti6_dev_setup(struct net_device *dev) { - struct ip6_tnl *t; - dev->netdev_ops = &vti6_netdev_ops; dev->destructor = vti6_dev_free; dev->type = ARPHRD_TUNNEL6; dev->hard_header_len = LL_MAX_HEADER + sizeof(struct ipv6hdr); dev->mtu = ETH_DATA_LEN; - t = netdev_priv(dev); dev->flags |= IFF_NOARP; dev->addr_len = sizeof(struct in6_addr); - dev->features |= NETIF_F_NETNS_LOCAL; dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; } @@ -753,7 +816,7 @@ static inline int vti6_dev_init_gen(struct net_device *dev) t->dev = dev; t->net = dev_net(dev); - dev->tstats = alloc_percpu(struct pcpu_tstats); + dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; return 0; @@ -927,11 +990,6 @@ static struct rtnl_link_ops vti6_link_ops __read_mostly = { .fill_info = vti6_fill_info, }; -static struct xfrm_tunnel_notifier vti6_handler __read_mostly = { - .handler = vti6_rcv, - .priority = 1, -}; - static void __net_exit vti6_destroy_tunnels(struct vti6_net *ip6n) { int h; @@ -1003,6 +1061,27 @@ static struct pernet_operations vti6_net_ops = { .size = sizeof(struct vti6_net), }; +static struct xfrm6_protocol vti_esp6_protocol __read_mostly = { + .handler = vti6_rcv, + .cb_handler = vti6_rcv_cb, + .err_handler = vti6_err, + .priority = 100, +}; + +static struct xfrm6_protocol vti_ah6_protocol __read_mostly = { + .handler = vti6_rcv, + .cb_handler = vti6_rcv_cb, + .err_handler = vti6_err, + .priority = 100, +}; + +static struct xfrm6_protocol vti_ipcomp6_protocol __read_mostly = { + .handler = vti6_rcv, + .cb_handler = vti6_rcv_cb, + .err_handler = vti6_err, + .priority = 100, +}; + /** * vti6_tunnel_init - register protocol and reserve needed resources * @@ -1016,11 +1095,30 @@ static int __init vti6_tunnel_init(void) if (err < 0) goto out_pernet; - err = xfrm6_mode_tunnel_input_register(&vti6_handler); + err = xfrm6_protocol_register(&vti_esp6_protocol, IPPROTO_ESP); if (err < 0) { - pr_err("%s: can't register vti6\n", __func__); + pr_err("%s: can't register vti6 protocol\n", __func__); + + goto out; + } + + err = xfrm6_protocol_register(&vti_ah6_protocol, IPPROTO_AH); + if (err < 0) { + xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP); + pr_err("%s: can't register vti6 protocol\n", __func__); + goto out; } + + err = xfrm6_protocol_register(&vti_ipcomp6_protocol, IPPROTO_COMP); + if (err < 0) { + xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH); + xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP); + pr_err("%s: can't register vti6 protocol\n", __func__); + + goto out; + } + err = rtnl_link_register(&vti6_link_ops); if (err < 0) goto rtnl_link_failed; @@ -1028,7 +1126,9 @@ static int __init vti6_tunnel_init(void) return 0; rtnl_link_failed: - xfrm6_mode_tunnel_input_deregister(&vti6_handler); + xfrm6_protocol_deregister(&vti_ipcomp6_protocol, IPPROTO_COMP); + xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH); + xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP); out: unregister_pernet_device(&vti6_net_ops); out_pernet: @@ -1041,8 +1141,12 @@ out_pernet: static void __exit vti6_tunnel_cleanup(void) { rtnl_link_unregister(&vti6_link_ops); - if (xfrm6_mode_tunnel_input_deregister(&vti6_handler)) - pr_info("%s: can't deregister vti6\n", __func__); + if (xfrm6_protocol_deregister(&vti_ipcomp6_protocol, IPPROTO_COMP)) + pr_info("%s: can't deregister protocol\n", __func__); + if (xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH)) + pr_info("%s: can't deregister protocol\n", __func__); + if (xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP)) + pr_info("%s: can't deregister protocol\n", __func__); unregister_pernet_device(&vti6_net_ops); } diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index f365310bfcc..8250474ab7d 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -141,9 +141,12 @@ static struct mr6_table *ip6mr_get_table(struct net *net, u32 id) static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6, struct mr6_table **mrt) { - struct ip6mr_result res; - struct fib_lookup_arg arg = { .result = &res, }; int err; + struct ip6mr_result res; + struct fib_lookup_arg arg = { + .result = &res, + .flags = FIB_LOOKUP_NOREF, + }; err = fib_rules_lookup(net->ipv6.mr6_rules_ops, flowi6_to_flowi(flp6), 0, &arg); @@ -697,7 +700,7 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct mr6_table *mrt; struct flowi6 fl6 = { .flowi6_oif = dev->ifindex, - .flowi6_iif = skb->skb_iif, + .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX, .flowi6_mark = skb->mark, }; int err; @@ -1630,7 +1633,7 @@ struct sock *mroute6_socket(struct net *net, struct sk_buff *skb) { struct mr6_table *mrt; struct flowi6 fl6 = { - .flowi6_iif = skb->skb_iif, + .flowi6_iif = skb->skb_iif ? : LOOPBACK_IFINDEX, .flowi6_oif = skb->dev->ifindex, .flowi6_mark = skb->mark, }; @@ -2346,13 +2349,14 @@ int ip6mr_get_route(struct net *net, } static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, - u32 portid, u32 seq, struct mfc6_cache *c, int cmd) + u32 portid, u32 seq, struct mfc6_cache *c, int cmd, + int flags) { struct nlmsghdr *nlh; struct rtmsg *rtm; int err; - nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), NLM_F_MULTI); + nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags); if (nlh == NULL) return -EMSGSIZE; @@ -2420,7 +2424,7 @@ static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc, if (skb == NULL) goto errout; - err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd); + err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0); if (err < 0) goto errout; @@ -2459,7 +2463,8 @@ static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) if (ip6mr_fill_mroute(mrt, skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, - mfc, RTM_NEWROUTE) < 0) + mfc, RTM_NEWROUTE, + NLM_F_MULTI) < 0) goto done; next_entry: e++; @@ -2473,7 +2478,8 @@ next_entry: if (ip6mr_fill_mroute(mrt, skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, - mfc, RTM_NEWROUTE) < 0) { + mfc, RTM_NEWROUTE, + NLM_F_MULTI) < 0) { spin_unlock_bh(&mfc_unres_lock); goto done; } diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index da9becb42e8..d1c793cffcb 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -53,7 +53,7 @@ #include <linux/icmpv6.h> #include <linux/mutex.h> -static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, +static int ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { struct net *net = dev_net(skb->dev); @@ -65,19 +65,21 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (type != ICMPV6_PKT_TOOBIG && type != NDISC_REDIRECT) - return; + return 0; spi = htonl(ntohs(ipcomph->cpi)); x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr, spi, IPPROTO_COMP, AF_INET6); if (!x) - return; + return 0; if (type == NDISC_REDIRECT) ip6_redirect(skb, net, skb->dev->ifindex, 0); else ip6_update_pmtu(skb, net, info, 0, 0); xfrm_state_put(x); + + return 0; } static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x) @@ -174,6 +176,11 @@ out: return err; } +static int ipcomp6_rcv_cb(struct sk_buff *skb, int err) +{ + return 0; +} + static const struct xfrm_type ipcomp6_type = { .description = "IPCOMP6", @@ -186,11 +193,12 @@ static const struct xfrm_type ipcomp6_type = .hdr_offset = xfrm6_find_1stfragopt, }; -static const struct inet6_protocol ipcomp6_protocol = +static struct xfrm6_protocol ipcomp6_protocol = { .handler = xfrm6_rcv, + .cb_handler = ipcomp6_rcv_cb, .err_handler = ipcomp6_err, - .flags = INET6_PROTO_NOPOLICY, + .priority = 0, }; static int __init ipcomp6_init(void) @@ -199,7 +207,7 @@ static int __init ipcomp6_init(void) pr_info("%s: can't add xfrm type\n", __func__); return -EAGAIN; } - if (inet6_add_protocol(&ipcomp6_protocol, IPPROTO_COMP) < 0) { + if (xfrm6_protocol_register(&ipcomp6_protocol, IPPROTO_COMP) < 0) { pr_info("%s: can't add protocol\n", __func__); xfrm_unregister_type(&ipcomp6_type, AF_INET6); return -EAGAIN; @@ -209,7 +217,7 @@ static int __init ipcomp6_init(void) static void __exit ipcomp6_fini(void) { - if (inet6_del_protocol(&ipcomp6_protocol, IPPROTO_COMP) < 0) + if (xfrm6_protocol_deregister(&ipcomp6_protocol, IPPROTO_COMP) < 0) pr_info("%s: can't remove protocol\n", __func__); if (xfrm_unregister_type(&ipcomp6_type, AF_INET6) < 0) pr_info("%s: can't remove xfrm type\n", __func__); diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 1c6ce3119ff..edb58aff4ae 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -722,7 +722,7 @@ done: case IPV6_MTU_DISCOVER: if (optlen < sizeof(int)) goto e_inval; - if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_PROBE) + if (val < IPV6_PMTUDISC_DONT || val > IPV6_PMTUDISC_OMIT) goto e_inval; np->pmtudisc = val; retv = 0; @@ -1002,10 +1002,8 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, release_sock(sk); if (skb) { - int err = ip6_datagram_recv_ctl(sk, &msg, skb); + ip6_datagram_recv_ctl(sk, &msg, skb); kfree_skb(skb); - if (err) - return err; } else { if (np->rxopt.bits.rxinfo) { struct in6_pktinfo src_info; @@ -1019,7 +1017,8 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, put_cmsg(&msg, SOL_IPV6, IPV6_HOPLIMIT, sizeof(hlim), &hlim); } if (np->rxopt.bits.rxtclass) { - int tclass = np->rcv_tclass; + int tclass = (int)ip6_tclass(np->rcv_flowinfo); + put_cmsg(&msg, SOL_IPV6, IPV6_TCLASS, sizeof(tclass), &tclass); } if (np->rxopt.bits.rxoinfo) { @@ -1034,6 +1033,11 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, int hlim = np->mcast_hops; put_cmsg(&msg, SOL_IPV6, IPV6_2292HOPLIMIT, sizeof(hlim), &hlim); } + if (np->rxopt.bits.rxflow) { + __be32 flowinfo = np->rcv_flowinfo; + + put_cmsg(&msg, SOL_IPV6, IPV6_FLOWINFO, sizeof(flowinfo), &flowinfo); + } } len -= msg.msg_controllen; return put_user(len, optlen); @@ -1215,6 +1219,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, case IPV6_FLOWLABEL_MGR: { struct in6_flowlabel_req freq; + int flags; if (len < sizeof(freq)) return -EINVAL; @@ -1226,9 +1231,11 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, return -EINVAL; len = sizeof(freq); + flags = freq.flr_flags; + memset(&freq, 0, sizeof(freq)); - val = ipv6_flowlabel_opt_get(sk, &freq); + val = ipv6_flowlabel_opt_get(sk, &freq, flags); if (val < 0) return val; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index d18f9f903db..617f0958e16 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -999,7 +999,7 @@ bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group, static void mld_gq_start_timer(struct inet6_dev *idev) { - unsigned long tv = net_random() % idev->mc_maxdelay; + unsigned long tv = prandom_u32() % idev->mc_maxdelay; idev->mc_gq_running = 1; if (!mod_timer(&idev->mc_gq_timer, jiffies+tv+2)) @@ -1015,7 +1015,7 @@ static void mld_gq_stop_timer(struct inet6_dev *idev) static void mld_ifc_start_timer(struct inet6_dev *idev, unsigned long delay) { - unsigned long tv = net_random() % delay; + unsigned long tv = prandom_u32() % delay; if (!mod_timer(&idev->mc_ifc_timer, jiffies+tv+2)) in6_dev_hold(idev); @@ -1030,7 +1030,7 @@ static void mld_ifc_stop_timer(struct inet6_dev *idev) static void mld_dad_start_timer(struct inet6_dev *idev, unsigned long delay) { - unsigned long tv = net_random() % delay; + unsigned long tv = prandom_u32() % delay; if (!mod_timer(&idev->mc_dad_timer, jiffies+tv+2)) in6_dev_hold(idev); @@ -1061,7 +1061,7 @@ static void igmp6_group_queried(struct ifmcaddr6 *ma, unsigned long resptime) } if (delay >= resptime) - delay = net_random() % resptime; + delay = prandom_u32() % resptime; ma->mca_timer.expires = jiffies + delay; if (!mod_timer(&ma->mca_timer, jiffies + delay)) @@ -1301,8 +1301,17 @@ int igmp6_event_query(struct sk_buff *skb) len = ntohs(ipv6_hdr(skb)->payload_len) + sizeof(struct ipv6hdr); len -= skb_network_header_len(skb); - /* Drop queries with not link local source */ - if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) + /* RFC3810 6.2 + * Upon reception of an MLD message that contains a Query, the node + * checks if the source address of the message is a valid link-local + * address, if the Hop Limit is set to 1, and if the Router Alert + * option is present in the Hop-By-Hop Options header of the IPv6 + * packet. If any of these checks fails, the packet is dropped. + */ + if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL) || + ipv6_hdr(skb)->hop_limit != 1 || + !(IP6CB(skb)->flags & IP6SKB_ROUTERALERT) || + IP6CB(skb)->ra != htons(IPV6_OPT_ROUTERALERT_MLD)) return -EINVAL; idev = __in6_dev_get(skb->dev); @@ -1620,11 +1629,12 @@ static void mld_sendpack(struct sk_buff *skb) dst_output); out: if (!err) { - ICMP6MSGOUT_INC_STATS_BH(net, idev, ICMPV6_MLD2_REPORT); - ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS); - IP6_UPD_PO_STATS_BH(net, idev, IPSTATS_MIB_OUTMCAST, payload_len); - } else - IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_OUTDISCARDS); + ICMP6MSGOUT_INC_STATS(net, idev, ICMPV6_MLD2_REPORT); + ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); + IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, payload_len); + } else { + IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); + } rcu_read_unlock(); return; @@ -1665,7 +1675,7 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc, skb_tailroom(skb)) : 0) static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, - int type, int gdeleted, int sdeleted) + int type, int gdeleted, int sdeleted, int crsend) { struct inet6_dev *idev = pmc->idev; struct net_device *dev = idev->dev; @@ -1757,7 +1767,7 @@ empty_source: if (type == MLD2_ALLOW_NEW_SOURCES || type == MLD2_BLOCK_OLD_SOURCES) return skb; - if (pmc->mca_crcount || isquery) { + if (pmc->mca_crcount || isquery || crsend) { /* make sure we have room for group header */ if (skb && AVAILABLE(skb) < sizeof(struct mld2_grec)) { mld_sendpack(skb); @@ -1789,7 +1799,7 @@ static void mld_send_report(struct inet6_dev *idev, struct ifmcaddr6 *pmc) type = MLD2_MODE_IS_EXCLUDE; else type = MLD2_MODE_IS_INCLUDE; - skb = add_grec(skb, pmc, type, 0, 0); + skb = add_grec(skb, pmc, type, 0, 0, 0); spin_unlock_bh(&pmc->mca_lock); } } else { @@ -1798,7 +1808,7 @@ static void mld_send_report(struct inet6_dev *idev, struct ifmcaddr6 *pmc) type = MLD2_MODE_IS_EXCLUDE; else type = MLD2_MODE_IS_INCLUDE; - skb = add_grec(skb, pmc, type, 0, 0); + skb = add_grec(skb, pmc, type, 0, 0, 0); spin_unlock_bh(&pmc->mca_lock); } read_unlock_bh(&idev->lock); @@ -1843,13 +1853,13 @@ static void mld_send_cr(struct inet6_dev *idev) if (pmc->mca_sfmode == MCAST_INCLUDE) { type = MLD2_BLOCK_OLD_SOURCES; dtype = MLD2_BLOCK_OLD_SOURCES; - skb = add_grec(skb, pmc, type, 1, 0); - skb = add_grec(skb, pmc, dtype, 1, 1); + skb = add_grec(skb, pmc, type, 1, 0, 0); + skb = add_grec(skb, pmc, dtype, 1, 1, 0); } if (pmc->mca_crcount) { if (pmc->mca_sfmode == MCAST_EXCLUDE) { type = MLD2_CHANGE_TO_INCLUDE; - skb = add_grec(skb, pmc, type, 1, 0); + skb = add_grec(skb, pmc, type, 1, 0, 0); } pmc->mca_crcount--; if (pmc->mca_crcount == 0) { @@ -1880,8 +1890,8 @@ static void mld_send_cr(struct inet6_dev *idev) type = MLD2_ALLOW_NEW_SOURCES; dtype = MLD2_BLOCK_OLD_SOURCES; } - skb = add_grec(skb, pmc, type, 0, 0); - skb = add_grec(skb, pmc, dtype, 0, 1); /* deleted sources */ + skb = add_grec(skb, pmc, type, 0, 0, 0); + skb = add_grec(skb, pmc, dtype, 0, 1, 0); /* deleted sources */ /* filter mode changes */ if (pmc->mca_crcount) { @@ -1889,7 +1899,7 @@ static void mld_send_cr(struct inet6_dev *idev) type = MLD2_CHANGE_TO_EXCLUDE; else type = MLD2_CHANGE_TO_INCLUDE; - skb = add_grec(skb, pmc, type, 0, 0); + skb = add_grec(skb, pmc, type, 0, 0, 0); pmc->mca_crcount--; } spin_unlock_bh(&pmc->mca_lock); @@ -1997,27 +2007,36 @@ err_out: goto out; } -static void mld_resend_report(struct inet6_dev *idev) +static void mld_send_initial_cr(struct inet6_dev *idev) { - if (mld_in_v1_mode(idev)) { - struct ifmcaddr6 *mcaddr; - read_lock_bh(&idev->lock); - for (mcaddr = idev->mc_list; mcaddr; mcaddr = mcaddr->next) { - if (!(mcaddr->mca_flags & MAF_NOREPORT)) - igmp6_send(&mcaddr->mca_addr, idev->dev, - ICMPV6_MGM_REPORT); - } - read_unlock_bh(&idev->lock); - } else { - mld_send_report(idev, NULL); + struct sk_buff *skb; + struct ifmcaddr6 *pmc; + int type; + + if (mld_in_v1_mode(idev)) + return; + + skb = NULL; + read_lock_bh(&idev->lock); + for (pmc=idev->mc_list; pmc; pmc=pmc->next) { + spin_lock_bh(&pmc->mca_lock); + if (pmc->mca_sfcount[MCAST_EXCLUDE]) + type = MLD2_CHANGE_TO_EXCLUDE; + else + type = MLD2_CHANGE_TO_INCLUDE; + skb = add_grec(skb, pmc, type, 0, 0, 1); + spin_unlock_bh(&pmc->mca_lock); } + read_unlock_bh(&idev->lock); + if (skb) + mld_sendpack(skb); } void ipv6_mc_dad_complete(struct inet6_dev *idev) { idev->mc_dad_count = idev->mc_qrv; if (idev->mc_dad_count) { - mld_resend_report(idev); + mld_send_initial_cr(idev); idev->mc_dad_count--; if (idev->mc_dad_count) mld_dad_start_timer(idev, idev->mc_maxdelay); @@ -2028,7 +2047,7 @@ static void mld_dad_timer_expire(unsigned long data) { struct inet6_dev *idev = (struct inet6_dev *)data; - mld_resend_report(idev); + mld_send_initial_cr(idev); if (idev->mc_dad_count) { idev->mc_dad_count--; if (idev->mc_dad_count) @@ -2328,7 +2347,7 @@ static void igmp6_join_group(struct ifmcaddr6 *ma) igmp6_send(&ma->mca_addr, ma->idev->dev, ICMPV6_MGM_REPORT); - delay = net_random() % unsolicited_report_interval(ma->idev); + delay = prandom_u32() % unsolicited_report_interval(ma->idev); spin_lock_bh(&ma->mca_lock); if (del_timer(&ma->mca_timer)) { diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 09a22f4f36c..ca8d4ea48a5 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -851,7 +851,7 @@ out: static void ndisc_recv_na(struct sk_buff *skb) { struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb); - const struct in6_addr *saddr = &ipv6_hdr(skb)->saddr; + struct in6_addr *saddr = &ipv6_hdr(skb)->saddr; const struct in6_addr *daddr = &ipv6_hdr(skb)->daddr; u8 *lladdr = NULL; u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) + @@ -944,10 +944,7 @@ static void ndisc_recv_na(struct sk_buff *skb) /* * Change: router to host */ - struct rt6_info *rt; - rt = rt6_get_dflt_router(saddr, dev); - if (rt) - ip6_del_rt(rt); + rt6_clean_tohost(dev_net(dev), saddr); } out: diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 95f3f1da0d7..d38e6a8d8b9 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -30,13 +30,15 @@ int ip6_route_me_harder(struct sk_buff *skb) .daddr = iph->daddr, .saddr = iph->saddr, }; + int err; dst = ip6_route_output(net, skb->sk, &fl6); - if (dst->error) { + err = dst->error; + if (err) { IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); LIMIT_NETDEBUG(KERN_DEBUG "ip6_route_me_harder: No more route.\n"); dst_release(dst); - return dst->error; + return err; } /* Drop old route. */ diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 7702f9e90a0..4bff1f297e3 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -28,15 +28,32 @@ config NF_CONNTRACK_IPV6 config NF_TABLES_IPV6 depends on NF_TABLES tristate "IPv6 nf_tables support" + help + This option enables the IPv6 support for nf_tables. config NFT_CHAIN_ROUTE_IPV6 depends on NF_TABLES_IPV6 tristate "IPv6 nf_tables route chain support" + help + This option enables the "route" chain for IPv6 in nf_tables. This + chain type is used to force packet re-routing after mangling header + fields such as the source, destination, flowlabel, hop-limit and + the packet mark. config NFT_CHAIN_NAT_IPV6 depends on NF_TABLES_IPV6 depends on NF_NAT_IPV6 && NFT_NAT tristate "IPv6 nf_tables nat chain support" + help + This option enables the "nat" chain for IPv6 in nf_tables. This + chain type is used to perform Network Address Translation (NAT) + packet transformations such as the source, destination address and + source and destination ports. + +config NFT_REJECT_IPV6 + depends on NF_TABLES_IPV6 + default NFT_REJECT + tristate config IP6_NF_IPTABLES tristate "IP6 tables support (required for filtering)" diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index d1b4928f34f..70d3dd66f2c 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -27,6 +27,7 @@ obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o obj-$(CONFIG_NF_TABLES_IPV6) += nf_tables_ipv6.o obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV6) += nft_chain_route_ipv6.o obj-$(CONFIG_NFT_CHAIN_NAT_IPV6) += nft_chain_nat_ipv6.o +obj-$(CONFIG_NFT_REJECT_IPV6) += nft_reject_ipv6.o # matches obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 710238f58aa..e080fbbbc0e 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -1241,8 +1241,10 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, xt_free_table_info(oldinfo); if (copy_to_user(counters_ptr, counters, - sizeof(struct xt_counters) * num_counters) != 0) - ret = -EFAULT; + sizeof(struct xt_counters) * num_counters) != 0) { + /* Silent error, can't fail, new table is already in place */ + net_warn_ratelimited("ip6tables: counters copy to user failed while replacing table\n"); + } vfree(counters); xt_table_unlock(t); return ret; diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index da00a2ecde5..544b0a9da1b 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -23,181 +23,18 @@ #include <linux/skbuff.h> #include <linux/icmpv6.h> #include <linux/netdevice.h> -#include <net/ipv6.h> -#include <net/tcp.h> #include <net/icmp.h> -#include <net/ip6_checksum.h> -#include <net/ip6_fib.h> -#include <net/ip6_route.h> #include <net/flow.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv6/ip6_tables.h> #include <linux/netfilter_ipv6/ip6t_REJECT.h> +#include <net/netfilter/ipv6/nf_reject.h> + MODULE_AUTHOR("Yasuyuki KOZAKAI <yasuyuki.kozakai@toshiba.co.jp>"); MODULE_DESCRIPTION("Xtables: packet \"rejection\" target for IPv6"); MODULE_LICENSE("GPL"); -/* Send RST reply */ -static void send_reset(struct net *net, struct sk_buff *oldskb, int hook) -{ - struct sk_buff *nskb; - struct tcphdr otcph, *tcph; - unsigned int otcplen, hh_len; - int tcphoff, needs_ack; - const struct ipv6hdr *oip6h = ipv6_hdr(oldskb); - struct ipv6hdr *ip6h; -#define DEFAULT_TOS_VALUE 0x0U - const __u8 tclass = DEFAULT_TOS_VALUE; - struct dst_entry *dst = NULL; - u8 proto; - __be16 frag_off; - struct flowi6 fl6; - - if ((!(ipv6_addr_type(&oip6h->saddr) & IPV6_ADDR_UNICAST)) || - (!(ipv6_addr_type(&oip6h->daddr) & IPV6_ADDR_UNICAST))) { - pr_debug("addr is not unicast.\n"); - return; - } - - proto = oip6h->nexthdr; - tcphoff = ipv6_skip_exthdr(oldskb, ((u8*)(oip6h+1) - oldskb->data), &proto, &frag_off); - - if ((tcphoff < 0) || (tcphoff > oldskb->len)) { - pr_debug("Cannot get TCP header.\n"); - return; - } - - otcplen = oldskb->len - tcphoff; - - /* IP header checks: fragment, too short. */ - if (proto != IPPROTO_TCP || otcplen < sizeof(struct tcphdr)) { - pr_debug("proto(%d) != IPPROTO_TCP, " - "or too short. otcplen = %d\n", - proto, otcplen); - return; - } - - if (skb_copy_bits(oldskb, tcphoff, &otcph, sizeof(struct tcphdr))) - BUG(); - - /* No RST for RST. */ - if (otcph.rst) { - pr_debug("RST is set\n"); - return; - } - - /* Check checksum. */ - if (nf_ip6_checksum(oldskb, hook, tcphoff, IPPROTO_TCP)) { - pr_debug("TCP checksum is invalid\n"); - return; - } - - memset(&fl6, 0, sizeof(fl6)); - fl6.flowi6_proto = IPPROTO_TCP; - fl6.saddr = oip6h->daddr; - fl6.daddr = oip6h->saddr; - fl6.fl6_sport = otcph.dest; - fl6.fl6_dport = otcph.source; - security_skb_classify_flow(oldskb, flowi6_to_flowi(&fl6)); - dst = ip6_route_output(net, NULL, &fl6); - if (dst == NULL || dst->error) { - dst_release(dst); - return; - } - dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0); - if (IS_ERR(dst)) - return; - - hh_len = (dst->dev->hard_header_len + 15)&~15; - nskb = alloc_skb(hh_len + 15 + dst->header_len + sizeof(struct ipv6hdr) - + sizeof(struct tcphdr) + dst->trailer_len, - GFP_ATOMIC); - - if (!nskb) { - net_dbg_ratelimited("cannot alloc skb\n"); - dst_release(dst); - return; - } - - skb_dst_set(nskb, dst); - - skb_reserve(nskb, hh_len + dst->header_len); - - skb_put(nskb, sizeof(struct ipv6hdr)); - skb_reset_network_header(nskb); - ip6h = ipv6_hdr(nskb); - ip6_flow_hdr(ip6h, tclass, 0); - ip6h->hop_limit = ip6_dst_hoplimit(dst); - ip6h->nexthdr = IPPROTO_TCP; - ip6h->saddr = oip6h->daddr; - ip6h->daddr = oip6h->saddr; - - skb_reset_transport_header(nskb); - tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr)); - /* Truncate to length (no data) */ - tcph->doff = sizeof(struct tcphdr)/4; - tcph->source = otcph.dest; - tcph->dest = otcph.source; - - if (otcph.ack) { - needs_ack = 0; - tcph->seq = otcph.ack_seq; - tcph->ack_seq = 0; - } else { - needs_ack = 1; - tcph->ack_seq = htonl(ntohl(otcph.seq) + otcph.syn + otcph.fin - + otcplen - (otcph.doff<<2)); - tcph->seq = 0; - } - - /* Reset flags */ - ((u_int8_t *)tcph)[13] = 0; - tcph->rst = 1; - tcph->ack = needs_ack; - tcph->window = 0; - tcph->urg_ptr = 0; - tcph->check = 0; - - /* Adjust TCP checksum */ - tcph->check = csum_ipv6_magic(&ipv6_hdr(nskb)->saddr, - &ipv6_hdr(nskb)->daddr, - sizeof(struct tcphdr), IPPROTO_TCP, - csum_partial(tcph, - sizeof(struct tcphdr), 0)); - - nf_ct_attach(nskb, oldskb); - -#ifdef CONFIG_BRIDGE_NETFILTER - /* If we use ip6_local_out for bridged traffic, the MAC source on - * the RST will be ours, instead of the destination's. This confuses - * some routers/firewalls, and they drop the packet. So we need to - * build the eth header using the original destination's MAC as the - * source, and send the RST packet directly. - */ - if (oldskb->nf_bridge) { - struct ethhdr *oeth = eth_hdr(oldskb); - nskb->dev = oldskb->nf_bridge->physindev; - nskb->protocol = htons(ETH_P_IPV6); - ip6h->payload_len = htons(sizeof(struct tcphdr)); - if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol), - oeth->h_source, oeth->h_dest, nskb->len) < 0) - return; - dev_queue_xmit(nskb); - } else -#endif - ip6_local_out(nskb); -} - -static inline void -send_unreach(struct net *net, struct sk_buff *skb_in, unsigned char code, - unsigned int hooknum) -{ - if (hooknum == NF_INET_LOCAL_OUT && skb_in->dev == NULL) - skb_in->dev = net->loopback_dev; - - icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0); -} static unsigned int reject_tg6(struct sk_buff *skb, const struct xt_action_param *par) @@ -208,25 +45,25 @@ reject_tg6(struct sk_buff *skb, const struct xt_action_param *par) pr_debug("%s: medium point\n", __func__); switch (reject->with) { case IP6T_ICMP6_NO_ROUTE: - send_unreach(net, skb, ICMPV6_NOROUTE, par->hooknum); + nf_send_unreach6(net, skb, ICMPV6_NOROUTE, par->hooknum); break; case IP6T_ICMP6_ADM_PROHIBITED: - send_unreach(net, skb, ICMPV6_ADM_PROHIBITED, par->hooknum); + nf_send_unreach6(net, skb, ICMPV6_ADM_PROHIBITED, par->hooknum); break; case IP6T_ICMP6_NOT_NEIGHBOUR: - send_unreach(net, skb, ICMPV6_NOT_NEIGHBOUR, par->hooknum); + nf_send_unreach6(net, skb, ICMPV6_NOT_NEIGHBOUR, par->hooknum); break; case IP6T_ICMP6_ADDR_UNREACH: - send_unreach(net, skb, ICMPV6_ADDR_UNREACH, par->hooknum); + nf_send_unreach6(net, skb, ICMPV6_ADDR_UNREACH, par->hooknum); break; case IP6T_ICMP6_PORT_UNREACH: - send_unreach(net, skb, ICMPV6_PORT_UNREACH, par->hooknum); + nf_send_unreach6(net, skb, ICMPV6_PORT_UNREACH, par->hooknum); break; case IP6T_ICMP6_ECHOREPLY: /* Do nothing */ break; case IP6T_TCP_RESET: - send_reset(net, skb, par->hooknum); + nf_send_reset6(net, skb, par->hooknum); break; default: net_info_ratelimited("case %u not handled yet\n", reject->with); diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c index f78f41aca8e..a0d17270117 100644 --- a/net/ipv6/netfilter/ip6t_SYNPROXY.c +++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c @@ -446,6 +446,7 @@ static void synproxy_tg6_destroy(const struct xt_tgdtor_param *par) static struct xt_target synproxy_tg6_reg __read_mostly = { .name = "SYNPROXY", .family = NFPROTO_IPV6, + .hooks = (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_FORWARD), .target = synproxy_tg6, .targetsize = sizeof(struct xt_synproxy_info), .checkentry = synproxy_tg6_check, diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c index e0983f3648a..790e0c6b19e 100644 --- a/net/ipv6/netfilter/ip6t_rpfilter.c +++ b/net/ipv6/netfilter/ip6t_rpfilter.c @@ -33,6 +33,7 @@ static bool rpfilter_lookup_reverse6(const struct sk_buff *skb, struct ipv6hdr *iph = ipv6_hdr(skb); bool ret = false; struct flowi6 fl6 = { + .flowi6_iif = LOOPBACK_IFINDEX, .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK, .flowi6_proto = iph->nexthdr, .daddr = iph->saddr, diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c index 84c7f33d0cf..387d8b8fc18 100644 --- a/net/ipv6/netfilter/ip6table_nat.c +++ b/net/ipv6/netfilter/ip6table_nat.c @@ -90,17 +90,9 @@ nf_nat_ipv6_fn(const struct nf_hook_ops *ops, if (nf_ct_is_untracked(ct)) return NF_ACCEPT; - nat = nfct_nat(ct); - if (!nat) { - /* NAT module was loaded late. */ - if (nf_ct_is_confirmed(ct)) - return NF_ACCEPT; - nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); - if (nat == NULL) { - pr_debug("failed to add NAT extension\n"); - return NF_ACCEPT; - } - } + nat = nf_ct_nat_ext_add(ct); + if (nat == NULL) + return NF_ACCEPT; switch (ctinfo) { case IP_CT_RELATED: diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 767ab8da821..0d5279fd852 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -451,7 +451,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev) } sub_frag_mem_limit(&fq->q, head->truesize); - head->local_df = 1; + head->ignore_df = 1; head->next = NULL; head->dev = dev; head->tstamp = fq->q.stamp; diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c index d77db8a1350..0d812b31277 100644 --- a/net/ipv6/netfilter/nf_tables_ipv6.c +++ b/net/ipv6/netfilter/nf_tables_ipv6.c @@ -16,34 +16,51 @@ #include <net/netfilter/nf_tables.h> #include <net/netfilter/nf_tables_ipv6.h> +static unsigned int nft_do_chain_ipv6(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct nft_pktinfo pkt; + + /* malformed packet, drop it */ + if (nft_set_pktinfo_ipv6(&pkt, ops, skb, in, out) < 0) + return NF_DROP; + + return nft_do_chain(&pkt, ops); +} + static unsigned int nft_ipv6_output(const struct nf_hook_ops *ops, struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - struct nft_pktinfo pkt; - if (unlikely(skb->len < sizeof(struct ipv6hdr))) { if (net_ratelimit()) pr_info("nf_tables_ipv6: ignoring short SOCK_RAW " "packet\n"); return NF_ACCEPT; } - if (nft_set_pktinfo_ipv6(&pkt, ops, skb, in, out) < 0) - return NF_DROP; - return nft_do_chain_pktinfo(&pkt, ops); + return nft_do_chain_ipv6(ops, skb, in, out, okfn); } -static struct nft_af_info nft_af_ipv6 __read_mostly = { +struct nft_af_info nft_af_ipv6 __read_mostly = { .family = NFPROTO_IPV6, .nhooks = NF_INET_NUMHOOKS, .owner = THIS_MODULE, + .nops = 1, .hooks = { + [NF_INET_LOCAL_IN] = nft_do_chain_ipv6, [NF_INET_LOCAL_OUT] = nft_ipv6_output, + [NF_INET_FORWARD] = nft_do_chain_ipv6, + [NF_INET_PRE_ROUTING] = nft_do_chain_ipv6, + [NF_INET_POST_ROUTING] = nft_do_chain_ipv6, }, }; +EXPORT_SYMBOL_GPL(nft_af_ipv6); static int nf_tables_ipv6_init_net(struct net *net) { @@ -73,44 +90,28 @@ static struct pernet_operations nf_tables_ipv6_net_ops = { .exit = nf_tables_ipv6_exit_net, }; -static unsigned int -nft_do_chain_ipv6(const struct nf_hook_ops *ops, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - struct nft_pktinfo pkt; - - /* malformed packet, drop it */ - if (nft_set_pktinfo_ipv6(&pkt, ops, skb, in, out) < 0) - return NF_DROP; - - return nft_do_chain_pktinfo(&pkt, ops); -} - -static struct nf_chain_type filter_ipv6 = { - .family = NFPROTO_IPV6, +static const struct nf_chain_type filter_ipv6 = { .name = "filter", .type = NFT_CHAIN_T_DEFAULT, + .family = NFPROTO_IPV6, + .owner = THIS_MODULE, .hook_mask = (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) | (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_POST_ROUTING), - .fn = { - [NF_INET_LOCAL_IN] = nft_do_chain_ipv6, - [NF_INET_LOCAL_OUT] = nft_ipv6_output, - [NF_INET_FORWARD] = nft_do_chain_ipv6, - [NF_INET_PRE_ROUTING] = nft_do_chain_ipv6, - [NF_INET_POST_ROUTING] = nft_do_chain_ipv6, - }, }; static int __init nf_tables_ipv6_init(void) { + int ret; + nft_register_chain_type(&filter_ipv6); - return register_pernet_subsys(&nf_tables_ipv6_net_ops); + ret = register_pernet_subsys(&nf_tables_ipv6_net_ops); + if (ret < 0) + nft_unregister_chain_type(&filter_ipv6); + + return ret; } static void __exit nf_tables_ipv6_exit(void) diff --git a/net/ipv6/netfilter/nft_chain_nat_ipv6.c b/net/ipv6/netfilter/nft_chain_nat_ipv6.c index e86dcd70dc7..d189fcb437f 100644 --- a/net/ipv6/netfilter/nft_chain_nat_ipv6.c +++ b/net/ipv6/netfilter/nft_chain_nat_ipv6.c @@ -47,15 +47,9 @@ static unsigned int nf_nat_ipv6_fn(const struct nf_hook_ops *ops, if (ct == NULL || nf_ct_is_untracked(ct)) return NF_ACCEPT; - nat = nfct_nat(ct); - if (nat == NULL) { - /* Conntrack module was loaded late, can't add extension. */ - if (nf_ct_is_confirmed(ct)) - return NF_ACCEPT; - nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); - if (nat == NULL) - return NF_ACCEPT; - } + nat = nf_ct_nat_ext_add(ct); + if (nat == NULL) + return NF_ACCEPT; switch (ctinfo) { case IP_CT_RELATED: @@ -79,7 +73,7 @@ static unsigned int nf_nat_ipv6_fn(const struct nf_hook_ops *ops, nft_set_pktinfo_ipv6(&pkt, ops, skb, in, out); - ret = nft_do_chain_pktinfo(&pkt, ops); + ret = nft_do_chain(&pkt, ops); if (ret != NF_ACCEPT) return ret; if (!nf_nat_initialized(ct, maniptype)) { @@ -170,21 +164,21 @@ static unsigned int nf_nat_ipv6_output(const struct nf_hook_ops *ops, return ret; } -static struct nf_chain_type nft_chain_nat_ipv6 = { - .family = NFPROTO_IPV6, +static const struct nf_chain_type nft_chain_nat_ipv6 = { .name = "nat", .type = NFT_CHAIN_T_NAT, + .family = NFPROTO_IPV6, + .owner = THIS_MODULE, .hook_mask = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_POST_ROUTING) | (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_LOCAL_IN), - .fn = { + .hooks = { [NF_INET_PRE_ROUTING] = nf_nat_ipv6_prerouting, [NF_INET_POST_ROUTING] = nf_nat_ipv6_postrouting, [NF_INET_LOCAL_OUT] = nf_nat_ipv6_output, [NF_INET_LOCAL_IN] = nf_nat_ipv6_fn, }, - .me = THIS_MODULE, }; static int __init nft_chain_nat_ipv6_init(void) diff --git a/net/ipv6/netfilter/nft_chain_route_ipv6.c b/net/ipv6/netfilter/nft_chain_route_ipv6.c index 3fe40f0456a..42031299585 100644 --- a/net/ipv6/netfilter/nft_chain_route_ipv6.c +++ b/net/ipv6/netfilter/nft_chain_route_ipv6.c @@ -47,7 +47,7 @@ static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops, /* flowlabel and prio (includes version, which shouldn't change either */ flowlabel = *((u32 *)ipv6_hdr(skb)); - ret = nft_do_chain_pktinfo(&pkt, ops); + ret = nft_do_chain(&pkt, ops); if (ret != NF_DROP && ret != NF_QUEUE && (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) || memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr)) || @@ -59,15 +59,15 @@ static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops, return ret; } -static struct nf_chain_type nft_chain_route_ipv6 = { - .family = NFPROTO_IPV6, +static const struct nf_chain_type nft_chain_route_ipv6 = { .name = "route", .type = NFT_CHAIN_T_ROUTE, + .family = NFPROTO_IPV6, + .owner = THIS_MODULE, .hook_mask = (1 << NF_INET_LOCAL_OUT), - .fn = { + .hooks = { [NF_INET_LOCAL_OUT] = nf_route_table_hook, }, - .me = THIS_MODULE, }; static int __init nft_chain_route_init(void) diff --git a/net/ipv6/netfilter/nft_reject_ipv6.c b/net/ipv6/netfilter/nft_reject_ipv6.c new file mode 100644 index 00000000000..0bc19fa8782 --- /dev/null +++ b/net/ipv6/netfilter/nft_reject_ipv6.c @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net> + * Copyright (c) 2013 Eric Leblond <eric@regit.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nft_reject.h> +#include <net/netfilter/ipv6/nf_reject.h> + +void nft_reject_ipv6_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + struct nft_reject *priv = nft_expr_priv(expr); + struct net *net = dev_net((pkt->in != NULL) ? pkt->in : pkt->out); + + switch (priv->type) { + case NFT_REJECT_ICMP_UNREACH: + nf_send_unreach6(net, pkt->skb, priv->icmp_code, + pkt->ops->hooknum); + break; + case NFT_REJECT_TCP_RST: + nf_send_reset6(net, pkt->skb, pkt->ops->hooknum); + break; + } + + data[NFT_REG_VERDICT].verdict = NF_DROP; +} +EXPORT_SYMBOL_GPL(nft_reject_ipv6_eval); + +static struct nft_expr_type nft_reject_ipv6_type; +static const struct nft_expr_ops nft_reject_ipv6_ops = { + .type = &nft_reject_ipv6_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_reject)), + .eval = nft_reject_ipv6_eval, + .init = nft_reject_init, + .dump = nft_reject_dump, +}; + +static struct nft_expr_type nft_reject_ipv6_type __read_mostly = { + .family = NFPROTO_IPV6, + .name = "reject", + .ops = &nft_reject_ipv6_ops, + .policy = nft_reject_policy, + .maxattr = NFTA_REJECT_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_reject_ipv6_module_init(void) +{ + return nft_register_expr(&nft_reject_ipv6_type); +} + +static void __exit nft_reject_ipv6_module_exit(void) +{ + nft_unregister_expr(&nft_reject_ipv6_type); +} + +module_init(nft_reject_ipv6_module_init); +module_exit(nft_reject_ipv6_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); +MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "reject"); diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index 827f795209c..5ec867e4a8b 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -6,35 +6,7 @@ #include <net/ipv6.h> #include <net/ip6_fib.h> #include <net/addrconf.h> - -void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt) -{ - static atomic_t ipv6_fragmentation_id; - int old, new; - -#if IS_ENABLED(CONFIG_IPV6) - if (rt && !(rt->dst.flags & DST_NOPEER)) { - struct inet_peer *peer; - struct net *net; - - net = dev_net(rt->dst.dev); - peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1); - if (peer) { - fhdr->identification = htonl(inet_getid(peer, 0)); - inet_putpeer(peer); - return; - } - } -#endif - do { - old = atomic_read(&ipv6_fragmentation_id); - new = old + 1; - if (!new) - new = 1; - } while (atomic_cmpxchg(&ipv6_fragmentation_id, old, new) != old); - fhdr->identification = htonl(new); -} -EXPORT_SYMBOL(ipv6_select_ident); +#include <net/secure_seq.h> int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) { @@ -106,6 +78,7 @@ int __ip6_local_out(struct sk_buff *skb) if (len > IPV6_MAXPLEN) len = 0; ipv6_hdr(skb)->payload_len = htons(len); + IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr); return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev, dst_output); diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index a83243c3d65..5b7a1ed2aba 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -31,7 +31,7 @@ struct proto pingv6_prot = { .owner = THIS_MODULE, .init = ping_init_sock, .close = ping_close, - .connect = ip6_datagram_connect, + .connect = ip6_datagram_connect_v6_only, .disconnect = udp_disconnect, .setsockopt = ipv6_setsockopt, .getsockopt = ipv6_getsockopt, @@ -51,7 +51,6 @@ static struct inet_protosw pingv6_protosw = { .protocol = IPPROTO_ICMPV6, .prot = &pingv6_prot, .ops = &inet6_dgram_ops, - .no_check = UDP_CSUM_DEFAULT, .flags = INET_PROTOSW_REUSE, }; @@ -62,10 +61,9 @@ static int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, { return -EAFNOSUPPORT; } -static int dummy_ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg, +static void dummy_ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg, struct sk_buff *skb) { - return -EAFNOSUPPORT; } static int dummy_icmpv6_err_convert(u8 type, u8 code, int *err) { @@ -103,7 +101,7 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, return err; if (msg->msg_name) { - struct sockaddr_in6 *u = (struct sockaddr_in6 *) msg->msg_name; + DECLARE_SOCKADDR(struct sockaddr_in6 *, u, msg->msg_name); if (msg->msg_namelen < sizeof(struct sockaddr_in6) || u->sin6_family != AF_INET6) { return -EINVAL; @@ -136,6 +134,7 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, fl6.flowi6_proto = IPPROTO_ICMPV6; fl6.saddr = np->saddr; fl6.daddr = *daddr; + fl6.flowi6_mark = sk->sk_mark; fl6.fl6_icmp_type = user_icmph.icmp6_type; fl6.fl6_icmp_code = user_icmph.icmp6_code; security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); @@ -145,7 +144,7 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, else if (!fl6.flowi6_oif) fl6.flowi6_oif = np->ucast_oif; - dst = ip6_sk_dst_lookup_flow(sk, &fl6, daddr, 1); + dst = ip6_sk_dst_lookup_flow(sk, &fl6, daddr); if (IS_ERR(dst)) return PTR_ERR(dst); rt = (struct rt6_info *) dst; @@ -168,12 +167,7 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, pfh.wcheck = 0; pfh.family = AF_INET6; - if (ipv6_addr_is_multicast(&fl6.daddr)) - hlimit = np->mcast_hops; - else - hlimit = np->hop_limit; - if (hlimit < 0) - hlimit = ip6_dst_hoplimit(dst); + hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); lock_sock(sk); err = ip6_append_data(sk, ping_getfrag, &pfh, len, @@ -182,8 +176,8 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, MSG_DONTWAIT, np->dontfrag); if (err) { - ICMP6_INC_STATS_BH(sock_net(sk), rt->rt6i_idev, - ICMP6_MIB_OUTERRORS); + ICMP6_INC_STATS(sock_net(sk), rt->rt6i_idev, + ICMP6_MIB_OUTERRORS); ip6_flush_pending_frames(sk); } else { err = icmpv6_push_pending_frames(sk, &fl6, @@ -254,7 +248,9 @@ int __init pingv6_init(void) return ret; #endif pingv6_ops.ipv6_recv_error = ipv6_recv_error; - pingv6_ops.ip6_datagram_recv_ctl = ip6_datagram_recv_ctl; + pingv6_ops.ip6_datagram_recv_common_ctl = ip6_datagram_recv_common_ctl; + pingv6_ops.ip6_datagram_recv_specific_ctl = + ip6_datagram_recv_specific_ctl; pingv6_ops.icmpv6_err_convert = icmpv6_err_convert; pingv6_ops.ipv6_icmp_error = ipv6_icmp_error; pingv6_ops.ipv6_chk_addr = ipv6_chk_addr; @@ -267,7 +263,8 @@ int __init pingv6_init(void) void pingv6_exit(void) { pingv6_ops.ipv6_recv_error = dummy_ipv6_recv_error; - pingv6_ops.ip6_datagram_recv_ctl = dummy_ip6_datagram_recv_ctl; + pingv6_ops.ip6_datagram_recv_common_ctl = dummy_ip6_datagram_recv_ctl; + pingv6_ops.ip6_datagram_recv_specific_ctl = dummy_ip6_datagram_recv_ctl; pingv6_ops.icmpv6_err_convert = dummy_icmpv6_err_convert; pingv6_ops.ipv6_icmp_error = dummy_ipv6_icmp_error; pingv6_ops.ipv6_chk_addr = dummy_ipv6_chk_addr; diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 091d066a57b..3317440ea34 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -186,7 +186,7 @@ static void snmp6_seq_show_icmpv6msg(struct seq_file *seq, atomic_long_t *smib) /* can be called either with percpu mib (pcpumib != NULL), * or shared one (smib != NULL) */ -static void snmp6_seq_show_item(struct seq_file *seq, void __percpu **pcpumib, +static void snmp6_seq_show_item(struct seq_file *seq, void __percpu *pcpumib, atomic_long_t *smib, const struct snmp_mib *itemlist) { @@ -201,7 +201,7 @@ static void snmp6_seq_show_item(struct seq_file *seq, void __percpu **pcpumib, } } -static void snmp6_seq_show_item64(struct seq_file *seq, void __percpu **mib, +static void snmp6_seq_show_item64(struct seq_file *seq, void __percpu *mib, const struct snmp_mib *itemlist, size_t syncpoff) { int i; @@ -215,14 +215,14 @@ static int snmp6_seq_show(struct seq_file *seq, void *v) { struct net *net = (struct net *)seq->private; - snmp6_seq_show_item64(seq, (void __percpu **)net->mib.ipv6_statistics, + snmp6_seq_show_item64(seq, net->mib.ipv6_statistics, snmp6_ipstats_list, offsetof(struct ipstats_mib, syncp)); - snmp6_seq_show_item(seq, (void __percpu **)net->mib.icmpv6_statistics, + snmp6_seq_show_item(seq, net->mib.icmpv6_statistics, NULL, snmp6_icmp6_list); snmp6_seq_show_icmpv6msg(seq, net->mib.icmpv6msg_statistics->mibs); - snmp6_seq_show_item(seq, (void __percpu **)net->mib.udp_stats_in6, + snmp6_seq_show_item(seq, net->mib.udp_stats_in6, NULL, snmp6_udp6_list); - snmp6_seq_show_item(seq, (void __percpu **)net->mib.udplite_stats_in6, + snmp6_seq_show_item(seq, net->mib.udplite_stats_in6, NULL, snmp6_udplite6_list); return 0; } @@ -245,7 +245,7 @@ static int snmp6_dev_seq_show(struct seq_file *seq, void *v) struct inet6_dev *idev = (struct inet6_dev *)seq->private; seq_printf(seq, "%-32s\t%u\n", "ifIndex", idev->dev->ifindex); - snmp6_seq_show_item64(seq, (void __percpu **)idev->stats.ipv6, + snmp6_seq_show_item64(seq, idev->stats.ipv6, snmp6_ipstats_list, offsetof(struct ipstats_mib, syncp)); snmp6_seq_show_item(seq, NULL, idev->stats.icmpv6dev->mibs, snmp6_icmp6_list); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 7fb4e14c467..b2dc60b0c76 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -250,6 +250,10 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (addr_len < SIN6_LEN_RFC2133) return -EINVAL; + + if (addr->sin6_family != AF_INET6) + return -EINVAL; + addr_type = ipv6_addr_type(&addr->sin6_addr); /* Raw sockets are IPv6 only */ @@ -457,7 +461,7 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk, int noblock, int flags, int *addr_len) { struct ipv6_pinfo *np = inet6_sk(sk); - struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)msg->msg_name; + DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name); struct sk_buff *skb; size_t copied; int err; @@ -734,7 +738,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, size_t len) { struct ipv6_txoptions opt_space; - struct sockaddr_in6 * sin6 = (struct sockaddr_in6 *) msg->msg_name; + DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name); struct in6_addr *daddr, *final_p, final; struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); @@ -792,7 +796,6 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); if (flowlabel == NULL) return -EINVAL; - daddr = &flowlabel->dst; } } @@ -865,19 +868,13 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, fl6.flowi6_oif = np->ucast_oif; security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - dst = ip6_dst_lookup_flow(sk, &fl6, final_p, true); + dst = ip6_dst_lookup_flow(sk, &fl6, final_p); if (IS_ERR(dst)) { err = PTR_ERR(dst); goto out; } - if (hlimit < 0) { - if (ipv6_addr_is_multicast(&fl6.daddr)) - hlimit = np->mcast_hops; - else - hlimit = np->hop_limit; - if (hlimit < 0) - hlimit = ip6_dst_hoplimit(dst); - } + if (hlimit < 0) + hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); if (tclass < 0) tclass = np->tclass; @@ -1210,7 +1207,7 @@ struct proto rawv6_prot = { .owner = THIS_MODULE, .close = rawv6_close, .destroy = raw6_destroy, - .connect = ip6_datagram_connect, + .connect = ip6_datagram_connect_v6_only, .disconnect = udp_disconnect, .ioctl = rawv6_ioctl, .init = rawv6_init_sk, @@ -1325,7 +1322,6 @@ static struct inet_protosw rawv6_protosw = { .protocol = IPPROTO_IP, /* wild card */ .prot = &rawv6_prot, .ops = &inet6_sockraw_ops, - .no_check = UDP_CSUM_DEFAULT, .flags = INET_PROTOSW_REUSE, }; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index ddb9d41c8ee..f23fbd28a50 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -66,8 +66,9 @@ #endif enum rt6_nud_state { - RT6_NUD_FAIL_HARD = -2, - RT6_NUD_FAIL_SOFT = -1, + RT6_NUD_FAIL_HARD = -3, + RT6_NUD_FAIL_PROBE = -2, + RT6_NUD_FAIL_DO_RR = -1, RT6_NUD_SUCCEED = 1 }; @@ -83,9 +84,9 @@ static void ip6_dst_ifdown(struct dst_entry *, static int ip6_dst_gc(struct dst_ops *ops); static int ip6_pkt_discard(struct sk_buff *skb); -static int ip6_pkt_discard_out(struct sk_buff *skb); +static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb); static int ip6_pkt_prohibit(struct sk_buff *skb); -static int ip6_pkt_prohibit_out(struct sk_buff *skb); +static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb); static void ip6_link_failure(struct sk_buff *skb); static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, u32 mtu); @@ -103,6 +104,36 @@ static struct rt6_info *rt6_get_route_info(struct net *net, const struct in6_addr *gwaddr, int ifindex); #endif +static void rt6_bind_peer(struct rt6_info *rt, int create) +{ + struct inet_peer_base *base; + struct inet_peer *peer; + + base = inetpeer_base_ptr(rt->_rt6i_peer); + if (!base) + return; + + peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create); + if (peer) { + if (!rt6_set_peer(rt, peer)) + inet_putpeer(peer); + } +} + +static struct inet_peer *__rt6_get_peer(struct rt6_info *rt, int create) +{ + if (rt6_has_peer(rt)) + return rt6_peer_ptr(rt); + + rt6_bind_peer(rt, create); + return (rt6_has_peer(rt) ? rt6_peer_ptr(rt) : NULL); +} + +static struct inet_peer *rt6_get_peer_create(struct rt6_info *rt) +{ + return __rt6_get_peer(rt, 1); +} + static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old) { struct rt6_info *rt = (struct rt6_info *) dst; @@ -118,7 +149,8 @@ static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old) unsigned long prev, new; p = peer->metrics; - if (inet_metrics_new(peer)) + if (inet_metrics_new(peer) || + (old & DST_METRICS_FORCE_OVERWRITE)) memcpy(p, old_p, sizeof(u32) * RTAX_MAX); new = (unsigned long) p; @@ -258,7 +290,7 @@ static const struct rt6_info ip6_blk_hole_entry_template = { .obsolete = DST_OBSOLETE_FORCE_CHK, .error = -EINVAL, .input = dst_discard, - .output = dst_discard, + .output = dst_discard_sk, }, .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), .rt6i_protocol = RTPROT_KERNEL, @@ -311,22 +343,6 @@ static void ip6_dst_destroy(struct dst_entry *dst) } } -void rt6_bind_peer(struct rt6_info *rt, int create) -{ - struct inet_peer_base *base; - struct inet_peer *peer; - - base = inetpeer_base_ptr(rt->_rt6i_peer); - if (!base) - return; - - peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create); - if (peer) { - if (!rt6_set_peer(rt, peer)) - inet_putpeer(peer); - } -} - static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, int how) { @@ -358,12 +374,6 @@ static bool rt6_check_expired(const struct rt6_info *rt) return false; } -static bool rt6_need_strict(const struct in6_addr *daddr) -{ - return ipv6_addr_type(daddr) & - (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK); -} - /* Multipath route selection: * Hash based function using packet header and flowlabel. * Adapted from fib_info_hashfn() @@ -521,7 +531,7 @@ static void rt6_probe(struct rt6_info *rt) work = kmalloc(sizeof(*work), GFP_ATOMIC); if (neigh && work) - neigh->updated = jiffies; + __neigh_set_probe_once(neigh); if (neigh) write_unlock(&neigh->lock); @@ -577,11 +587,13 @@ static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt) #ifdef CONFIG_IPV6_ROUTER_PREF else if (!(neigh->nud_state & NUD_FAILED)) ret = RT6_NUD_SUCCEED; + else + ret = RT6_NUD_FAIL_PROBE; #endif read_unlock(&neigh->lock); } else { ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ? - RT6_NUD_SUCCEED : RT6_NUD_FAIL_SOFT; + RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR; } rcu_read_unlock_bh(); @@ -618,16 +630,17 @@ static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict, goto out; m = rt6_score_route(rt, oif, strict); - if (m == RT6_NUD_FAIL_SOFT) { + if (m == RT6_NUD_FAIL_DO_RR) { match_do_rr = true; m = 0; /* lowest valid score */ - } else if (m < 0) { + } else if (m == RT6_NUD_FAIL_HARD) { goto out; } if (strict & RT6_LOOKUP_F_REACHABLE) rt6_probe(rt); + /* note that m can be RT6_NUD_FAIL_PROBE at this point */ if (m > *mpri) { *do_rr = match_do_rr; *mpri = m; @@ -839,14 +852,15 @@ EXPORT_SYMBOL(rt6_lookup); be destroyed. */ -static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info) +static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info, + struct nlattr *mx, int mx_len) { int err; struct fib6_table *table; table = rt->rt6i_table; write_lock_bh(&table->tb6_lock); - err = fib6_add(&table->tb6_root, rt, info); + err = fib6_add(&table->tb6_root, rt, info, mx, mx_len); write_unlock_bh(&table->tb6_lock); return err; @@ -857,7 +871,7 @@ int ip6_ins_rt(struct rt6_info *rt) struct nl_info info = { .nl_net = dev_net(rt->dst.dev), }; - return __ip6_ins_rt(rt, &info); + return __ip6_ins_rt(rt, &info, NULL, 0); } static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, @@ -1044,7 +1058,7 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori new->__use = 1; new->input = dst_discard; - new->output = dst_discard; + new->output = dst_discard_sk; if (dst_metrics_read_only(&ort->dst)) new->_metrics = ort->dst._metrics; @@ -1162,7 +1176,7 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_oif = oif; - fl6.flowi6_mark = mark; + fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark); fl6.daddr = iph->daddr; fl6.saddr = iph->saddr; fl6.flowlabel = ip6_flowinfo(iph); @@ -1259,6 +1273,7 @@ void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark) struct flowi6 fl6; memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_iif = LOOPBACK_IFINDEX; fl6.flowi6_oif = oif; fl6.flowi6_mark = mark; fl6.daddr = iph->daddr; @@ -1280,6 +1295,7 @@ void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif, struct flowi6 fl6; memset(&fl6, 0, sizeof(fl6)); + fl6.flowi6_iif = LOOPBACK_IFINDEX; fl6.flowi6_oif = oif; fl6.flowi6_mark = mark; fl6.daddr = msg->dest; @@ -1324,7 +1340,7 @@ static unsigned int ip6_mtu(const struct dst_entry *dst) unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); if (mtu) - return mtu; + goto out; mtu = IPV6_MIN_MTU; @@ -1334,7 +1350,8 @@ static unsigned int ip6_mtu(const struct dst_entry *dst) mtu = idev->cnf.mtu6; rcu_read_unlock(); - return mtu; +out: + return min_t(unsigned int, mtu, IP6_MAX_MTU); } static struct dst_entry *icmp6_dst_gc_list; @@ -1438,7 +1455,7 @@ static int ip6_dst_gc(struct dst_ops *ops) goto out; net->ipv6.ip6_rt_gc_expire++; - fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, entries > rt_max_size); + fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true); entries = dst_entries_get_slow(ops); if (entries < ops->gc_thresh) net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1; @@ -1495,7 +1512,7 @@ int ip6_route_add(struct fib6_config *cfg) if (!table) goto out; - rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table); + rt = ip6_dst_alloc(net, NULL, (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT, table); if (!rt) { err = -ENOMEM; @@ -1525,17 +1542,11 @@ int ip6_route_add(struct fib6_config *cfg) ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); rt->rt6i_dst.plen = cfg->fc_dst_len; - if (rt->rt6i_dst.plen == 128) - rt->dst.flags |= DST_HOST; - - if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) { - u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL); - if (!metrics) { - err = -ENOMEM; - goto out; - } - dst_init_metrics(&rt->dst, metrics, 0); + if (rt->rt6i_dst.plen == 128) { + rt->dst.flags |= DST_HOST; + dst_metrics_set_force_overwrite(&rt->dst); } + #ifdef CONFIG_IPV6_SUBTREES ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len); rt->rt6i_src.plen = cfg->fc_src_len; @@ -1568,7 +1579,7 @@ int ip6_route_add(struct fib6_config *cfg) switch (cfg->fc_type) { case RTN_BLACKHOLE: rt->dst.error = -EINVAL; - rt->dst.output = dst_discard; + rt->dst.output = dst_discard_sk; rt->dst.input = dst_discard; break; case RTN_PROHIBIT: @@ -1654,31 +1665,13 @@ int ip6_route_add(struct fib6_config *cfg) rt->rt6i_flags = cfg->fc_flags; install_route: - if (cfg->fc_mx) { - struct nlattr *nla; - int remaining; - - nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { - int type = nla_type(nla); - - if (type) { - if (type > RTAX_MAX) { - err = -EINVAL; - goto out; - } - - dst_metric_set(&rt->dst, type, nla_get_u32(nla)); - } - } - } - rt->dst.dev = dev; rt->rt6i_idev = idev; rt->rt6i_table = table; cfg->fc_nlinfo.nl_net = dev_net(dev); - return __ip6_ins_rt(rt, &cfg->fc_nlinfo); + return __ip6_ins_rt(rt, &cfg->fc_nlinfo, cfg->fc_mx, cfg->fc_mx_len); out: if (dev) @@ -1905,9 +1898,7 @@ static struct rt6_info *ip6_rt_copy(struct rt6_info *ort, else rt->rt6i_gateway = *dest; rt->rt6i_flags = ort->rt6i_flags; - if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) == - (RTF_DEFAULT | RTF_ADDRCONF)) - rt6_set_from(rt, ort); + rt6_set_from(rt, ort); rt->rt6i_metric = 0; #ifdef CONFIG_IPV6_SUBTREES @@ -2140,7 +2131,7 @@ static int ip6_pkt_discard(struct sk_buff *skb) return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES); } -static int ip6_pkt_discard_out(struct sk_buff *skb) +static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb) { skb->dev = skb_dst(skb)->dev; return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES); @@ -2151,7 +2142,7 @@ static int ip6_pkt_prohibit(struct sk_buff *skb) return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES); } -static int ip6_pkt_prohibit_out(struct sk_buff *skb) +static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb) { skb->dev = skb_dst(skb)->dev; return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES); @@ -2166,12 +2157,10 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, bool anycast) { struct net *net = dev_net(idev->dev); - struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL); - - if (!rt) { - net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n"); + struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, + DST_NOCOUNT, NULL); + if (!rt) return ERR_PTR(-ENOMEM); - } in6_dev_hold(idev); @@ -2242,7 +2231,28 @@ void rt6_remove_prefsrc(struct inet6_ifaddr *ifp) .net = net, .addr = &ifp->addr, }; - fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni); + fib6_clean_all(net, fib6_remove_prefsrc, &adni); +} + +#define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY) +#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE) + +/* Remove routers and update dst entries when gateway turn into host. */ +static int fib6_clean_tohost(struct rt6_info *rt, void *arg) +{ + struct in6_addr *gateway = (struct in6_addr *)arg; + + if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) || + ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) && + ipv6_addr_equal(gateway, &rt->rt6i_gateway)) { + return -1; + } + return 0; +} + +void rt6_clean_tohost(struct net *net, struct in6_addr *gateway) +{ + fib6_clean_all(net, fib6_clean_tohost, gateway); } struct arg_dev_net { @@ -2269,7 +2279,7 @@ void rt6_ifdown(struct net *net, struct net_device *dev) .net = net, }; - fib6_clean_all(net, fib6_ifdown, 0, &adn); + fib6_clean_all(net, fib6_ifdown, &adn); icmp6_clean_all(fib6_ifdown, &adn); } @@ -2324,7 +2334,7 @@ void rt6_mtu_change(struct net_device *dev, unsigned int mtu) .mtu = mtu, }; - fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg); + fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg); } static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { @@ -2720,6 +2730,9 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh) if (tb[RTA_OIF]) oif = nla_get_u32(tb[RTA_OIF]); + if (tb[RTA_MARK]) + fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]); + if (iif) { struct net_device *dev; int flags = 0; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 366fbba3359..4f408176dc6 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -475,6 +475,7 @@ static void ipip6_tunnel_uninit(struct net_device *dev) ipip6_tunnel_unlink(sitn, tunnel); ipip6_tunnel_del_prl(tunnel, NULL); } + ip_tunnel_dst_reset_all(tunnel); dev_put(dev); } @@ -559,12 +560,12 @@ static int ipip6_err(struct sk_buff *skb, u32 info) if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { ipv4_update_pmtu(skb, dev_net(skb->dev), info, - t->dev->ifindex, 0, IPPROTO_IPV6, 0); + t->parms.link, 0, IPPROTO_IPV6, 0); err = 0; goto out; } if (type == ICMP_REDIRECT) { - ipv4_redirect(skb, dev_net(skb->dev), t->dev->ifindex, 0, + ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0, IPPROTO_IPV6, 0); err = 0; goto out; @@ -671,7 +672,7 @@ static int ipip6_rcv(struct sk_buff *skb) tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev, iph->saddr, iph->daddr); if (tunnel != NULL) { - struct pcpu_tstats *tstats; + struct pcpu_sw_netstats *tstats; if (tunnel->parms.iph.protocol != IPPROTO_IPV6 && tunnel->parms.iph.protocol != 0) @@ -702,8 +703,10 @@ static int ipip6_rcv(struct sk_buff *skb) } tstats = this_cpu_ptr(tunnel->dev->tstats); + u64_stats_update_begin(&tstats->syncp); tstats->rx_packets++; tstats->rx_bytes += skb->len; + u64_stats_update_end(&tstats->syncp); netif_rx(skb); @@ -924,7 +927,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, if (tunnel->parms.iph.daddr && skb_dst(skb)) skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); - if (skb->len > mtu) { + if (skb->len > mtu && !skb_is_gso(skb)) { icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); ip_rt_put(rt); goto tx_error; @@ -966,11 +969,14 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, tos = INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6)); skb = iptunnel_handle_offloads(skb, false, SKB_GSO_SIT); - if (IS_ERR(skb)) + if (IS_ERR(skb)) { + ip_rt_put(rt); goto out; + } - err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, IPPROTO_IPV6, tos, - ttl, df, !net_eq(tunnel->net, dev_net(dev))); + err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr, + IPPROTO_IPV6, tos, ttl, df, + !net_eq(tunnel->net, dev_net(dev))); iptunnel_xmit_stats(err, &dev->stats, dev->tstats); return NETDEV_TX_OK; @@ -1078,6 +1084,7 @@ static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p) t->parms.link = p->link; ipip6_tunnel_bind_dev(t->dev); } + ip_tunnel_dst_reset_all(t); netdev_state_change(t->dev); } @@ -1108,6 +1115,7 @@ static int ipip6_tunnel_update_6rd(struct ip_tunnel *t, t->ip6rd.relay_prefix = relay_prefix; t->ip6rd.prefixlen = ip6rd->prefixlen; t->ip6rd.relay_prefixlen = ip6rd->relay_prefixlen; + ip_tunnel_dst_reset_all(t); netdev_state_change(t->dev); return 0; } @@ -1119,8 +1127,8 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) int err = 0; struct ip_tunnel_parm p; struct ip_tunnel_prl prl; - struct ip_tunnel *t; - struct net *net = dev_net(dev); + struct ip_tunnel *t = netdev_priv(dev); + struct net *net = t->net; struct sit_net *sitn = net_generic(net, sit_net_id); #ifdef CONFIG_IPV6_SIT_6RD struct ip_tunnel_6rd ip6rd; @@ -1131,16 +1139,15 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) #ifdef CONFIG_IPV6_SIT_6RD case SIOCGET6RD: #endif - t = NULL; if (dev == sitn->fb_tunnel_dev) { if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { err = -EFAULT; break; } t = ipip6_tunnel_locate(net, &p, 0); + if (t == NULL) + t = netdev_priv(dev); } - if (t == NULL) - t = netdev_priv(dev); err = -EFAULT; if (cmd == SIOCGETTUNNEL) { @@ -1236,9 +1243,6 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) err = -EINVAL; if (dev == sitn->fb_tunnel_dev) goto done; - err = -ENOENT; - if (!(t = netdev_priv(dev))) - goto done; err = ipip6_tunnel_get_prl(t, ifr->ifr_ifru.ifru_data); break; @@ -1254,9 +1258,6 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) err = -EFAULT; if (copy_from_user(&prl, ifr->ifr_ifru.ifru_data, sizeof(prl))) goto done; - err = -ENOENT; - if (!(t = netdev_priv(dev))) - goto done; switch (cmd) { case SIOCDELPRL: @@ -1267,6 +1268,7 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) err = ipip6_tunnel_add_prl(t, &prl, cmd == SIOCCHGPRL); break; } + ip_tunnel_dst_reset_all(t); netdev_state_change(dev); break; @@ -1283,8 +1285,6 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) sizeof(ip6rd))) goto done; - t = netdev_priv(dev); - if (cmd != SIOCDEL6RD) { err = ipip6_tunnel_update_6rd(t, &ip6rd); if (err < 0) @@ -1322,6 +1322,9 @@ static const struct net_device_ops ipip6_netdev_ops = { static void ipip6_dev_free(struct net_device *dev) { + struct ip_tunnel *tunnel = netdev_priv(dev); + + free_percpu(tunnel->dst_cache); free_percpu(dev->tstats); free_netdev(dev); } @@ -1352,7 +1355,6 @@ static void ipip6_tunnel_setup(struct net_device *dev) static int ipip6_tunnel_init(struct net_device *dev) { struct ip_tunnel *tunnel = netdev_priv(dev); - int i; tunnel->dev = dev; tunnel->net = dev_net(dev); @@ -1361,14 +1363,14 @@ static int ipip6_tunnel_init(struct net_device *dev) memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); ipip6_tunnel_bind_dev(dev); - dev->tstats = alloc_percpu(struct pcpu_tstats); + dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; - for_each_possible_cpu(i) { - struct pcpu_tstats *ipip6_tunnel_stats; - ipip6_tunnel_stats = per_cpu_ptr(dev->tstats, i); - u64_stats_init(&ipip6_tunnel_stats->syncp); + tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst); + if (!tunnel->dst_cache) { + free_percpu(dev->tstats); + return -ENOMEM; } return 0; @@ -1380,7 +1382,6 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev) struct iphdr *iph = &tunnel->parms.iph; struct net *net = dev_net(dev); struct sit_net *sitn = net_generic(net, sit_net_id); - int i; tunnel->dev = dev; tunnel->net = dev_net(dev); @@ -1391,14 +1392,14 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev) iph->ihl = 5; iph->ttl = 64; - dev->tstats = alloc_percpu(struct pcpu_tstats); + dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; - for_each_possible_cpu(i) { - struct pcpu_tstats *ipip6_fb_stats; - ipip6_fb_stats = per_cpu_ptr(dev->tstats, i); - u64_stats_init(&ipip6_fb_stats->syncp); + tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst); + if (!tunnel->dst_cache) { + free_percpu(dev->tstats); + return -ENOMEM; } dev_hold(dev); @@ -1827,4 +1828,5 @@ xfrm_tunnel_failed: module_init(sit_init); module_exit(sit_cleanup); MODULE_LICENSE("GPL"); +MODULE_ALIAS_RTNL_LINK("sit"); MODULE_ALIAS_NETDEV("sit0"); diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 535a3ad262f..a822b880689 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -216,6 +216,8 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) ireq->ir_iif = inet6_iif(skb); + ireq->ir_mark = inet_request_mark(sk, skb); + req->expires = 0UL; req->num_retrans = 0; ireq->ecn_ok = ecn_ok; @@ -242,12 +244,12 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) final_p = fl6_update_dst(&fl6, np->opt, &final); fl6.saddr = ireq->ir_v6_loc_addr; fl6.flowi6_oif = sk->sk_bound_dev_if; - fl6.flowi6_mark = sk->sk_mark; + fl6.flowi6_mark = ireq->ir_mark; fl6.fl6_dport = ireq->ir_rmt_port; fl6.fl6_sport = inet_sk(sk)->inet_sport; security_req_classify_flow(req, flowi6_to_flowi(&fl6)); - dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); + dst = ip6_dst_lookup_flow(sk, &fl6, final_p); if (IS_ERR(dst)) goto out_free; } diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 107b2f1d90a..058f3eca2e5 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -24,6 +24,27 @@ static struct ctl_table ipv6_table_template[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "anycast_src_echo_reply", + .data = &init_net.ipv6.sysctl.anycast_src_echo_reply, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .procname = "flowlabel_consistency", + .data = &init_net.ipv6.sysctl.flowlabel_consistency, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .procname = "fwmark_reflect", + .data = &init_net.ipv6.sysctl.fwmark_reflect, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { } }; @@ -51,6 +72,8 @@ static int __net_init ipv6_sysctl_net_init(struct net *net) if (!ipv6_table) goto out; ipv6_table[0].data = &net->ipv6.sysctl.bindv6only; + ipv6_table[1].data = &net->ipv6.sysctl.anycast_src_echo_reply; + ipv6_table[2].data = &net->ipv6.sysctl.flowlabel_consistency; ipv6_route_table = ipv6_route_sysctl_init(net); if (!ipv6_route_table) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 0740f93a114..229239ad96b 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -39,7 +39,7 @@ #include <linux/ipsec.h> #include <linux/times.h> #include <linux/slab.h> - +#include <linux/uaccess.h> #include <linux/ipv6.h> #include <linux/icmpv6.h> #include <linux/random.h> @@ -65,8 +65,6 @@ #include <net/tcp_memcontrol.h> #include <net/busy_poll.h> -#include <asm/uaccess.h> - #include <linux/proc_fs.h> #include <linux/seq_file.h> @@ -156,7 +154,6 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); if (flowlabel == NULL) return -EINVAL; - usin->sin6_addr = flowlabel->dst; fl6_sock_release(flowlabel); } } @@ -165,12 +162,12 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, * connect() to INADDR_ANY means loopback (BSD'ism). */ - if(ipv6_addr_any(&usin->sin6_addr)) + if (ipv6_addr_any(&usin->sin6_addr)) usin->sin6_addr.s6_addr[15] = 0x1; addr_type = ipv6_addr_type(&usin->sin6_addr); - if(addr_type & IPV6_ADDR_MULTICAST) + if (addr_type & IPV6_ADDR_MULTICAST) return -ENETUNREACH; if (addr_type&IPV6_ADDR_LINKLOCAL) { @@ -258,7 +255,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - dst = ip6_dst_lookup_flow(sk, &fl6, final_p, true); + dst = ip6_dst_lookup_flow(sk, &fl6, final_p); if (IS_ERR(dst)) { err = PTR_ERR(dst); goto failure; @@ -337,13 +334,14 @@ static void tcp_v6_mtu_reduced(struct sock *sk) static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { - const struct ipv6hdr *hdr = (const struct ipv6hdr*)skb->data; + const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; const struct tcphdr *th = (struct tcphdr *)(skb->data+offset); struct ipv6_pinfo *np; struct sock *sk; int err; struct tcp_sock *tp; - __u32 seq; + struct request_sock *fastopen; + __u32 seq, snd_una; struct net *net = dev_net(skb->dev); sk = inet6_lookup(net, &tcp_hashinfo, &hdr->daddr, @@ -374,8 +372,11 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, tp = tcp_sk(sk); seq = ntohl(th->seq); + /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */ + fastopen = tp->fastopen_rsk; + snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una; if (sk->sk_state != TCP_LISTEN && - !between(seq, tp->snd_una, tp->snd_nxt)) { + !between(seq, snd_una, tp->snd_nxt)) { NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); goto out; } @@ -398,6 +399,9 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (sk->sk_state == TCP_LISTEN) goto out; + if (!ip6_sk_accept_pmtu(sk)) + goto out; + tp->mtu_info = ntohl(info); if (!sock_owned_by_user(sk)) tcp_v6_mtu_reduced(sk); @@ -436,8 +440,13 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, goto out; case TCP_SYN_SENT: - case TCP_SYN_RECV: /* Cannot happen. - It can, it SYNs are crossed. --ANK */ + case TCP_SYN_RECV: + /* Only in fast or simultaneous open. If a fast open socket is + * is already accepted it is treated as a connected one below. + */ + if (fastopen && fastopen->sk == NULL) + break; + if (!sock_owned_by_user(sk)) { sk->sk_err = err; sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */ @@ -463,24 +472,28 @@ out: static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst, struct flowi6 *fl6, struct request_sock *req, - u16 queue_mapping) + u16 queue_mapping, + struct tcp_fastopen_cookie *foc) { struct inet_request_sock *ireq = inet_rsk(req); struct ipv6_pinfo *np = inet6_sk(sk); - struct sk_buff * skb; + struct sk_buff *skb; int err = -ENOMEM; /* First, grab a route. */ if (!dst && (dst = inet6_csk_route_req(sk, fl6, req)) == NULL) goto done; - skb = tcp_make_synack(sk, dst, req, NULL); + skb = tcp_make_synack(sk, dst, req, foc); if (skb) { __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr, &ireq->ir_v6_rmt_addr); fl6->daddr = ireq->ir_v6_rmt_addr; + if (np->repflow && (ireq->pktopts != NULL)) + fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts)); + skb_set_queue_mapping(skb, queue_mapping); err = ip6_xmit(sk, skb, fl6, np->opt, np->tclass); err = net_xmit_eval(err); @@ -495,9 +508,11 @@ static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req) struct flowi6 fl6; int res; - res = tcp_v6_send_synack(sk, NULL, &fl6, req, 0); - if (!res) + res = tcp_v6_send_synack(sk, NULL, &fl6, req, 0, NULL); + if (!res) { TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); + } return res; } @@ -525,8 +540,8 @@ static struct tcp_md5sig_key *tcp_v6_reqsk_md5_lookup(struct sock *sk, return tcp_v6_md5_do_lookup(sk, &inet_rsk(req)->ir_v6_rmt_addr); } -static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval, - int optlen) +static int tcp_v6_parse_md5_keys(struct sock *sk, char __user *optval, + int optlen) { struct tcp_md5sig cmd; struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr; @@ -710,7 +725,7 @@ struct request_sock_ops tcp6_request_sock_ops __read_mostly = { .send_ack = tcp_v6_reqsk_send_ack, .destructor = tcp_v6_reqsk_destructor, .send_reset = tcp_v6_send_reset, - .syn_ack_timeout = tcp_syn_ack_timeout, + .syn_ack_timeout = tcp_syn_ack_timeout, }; #ifdef CONFIG_TCP_MD5SIG @@ -721,8 +736,9 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { #endif static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, - u32 tsval, u32 tsecr, - struct tcp_md5sig_key *key, int rst, u8 tclass) + u32 tsval, u32 tsecr, int oif, + struct tcp_md5sig_key *key, int rst, u8 tclass, + u32 label) { const struct tcphdr *th = tcp_hdr(skb); struct tcphdr *t1; @@ -784,6 +800,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, memset(&fl6, 0, sizeof(fl6)); fl6.daddr = ipv6_hdr(skb)->saddr; fl6.saddr = ipv6_hdr(skb)->daddr; + fl6.flowlabel = label; buff->ip_summed = CHECKSUM_PARTIAL; buff->csum = 0; @@ -791,8 +808,11 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr); fl6.flowi6_proto = IPPROTO_TCP; - if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL) + if (rt6_need_strict(&fl6.daddr) && !oif) fl6.flowi6_oif = inet6_iif(skb); + else + fl6.flowi6_oif = oif; + fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark); fl6.fl6_dport = t1->dest; fl6.fl6_sport = t1->source; security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); @@ -801,7 +821,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, * Underlying function will use this to retrieve the network * namespace */ - dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL, false); + dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL); if (!IS_ERR(dst)) { skb_dst_set(buff, dst); ip6_xmit(ctl_sk, buff, &fl6, NULL, tclass); @@ -826,6 +846,7 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) int genhash; struct sock *sk1 = NULL; #endif + int oif; if (th->rst) return; @@ -869,7 +890,8 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len - (th->doff << 2); - tcp_v6_send_response(skb, seq, ack_seq, 0, 0, 0, key, 1, 0); + oif = sk ? sk->sk_bound_dev_if : 0; + tcp_v6_send_response(skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, 0); #ifdef CONFIG_TCP_MD5SIG release_sk1: @@ -881,10 +903,12 @@ release_sk1: } static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, - u32 win, u32 tsval, u32 tsecr, - struct tcp_md5sig_key *key, u8 tclass) + u32 win, u32 tsval, u32 tsecr, int oif, + struct tcp_md5sig_key *key, u8 tclass, + u32 label) { - tcp_v6_send_response(skb, seq, ack, win, tsval, tsecr, key, 0, tclass); + tcp_v6_send_response(skb, seq, ack, win, tsval, tsecr, oif, key, 0, tclass, + label); } static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) @@ -895,8 +919,8 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt, tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, tcp_time_stamp + tcptw->tw_ts_offset, - tcptw->tw_ts_recent, tcp_twsk_md5_key(tcptw), - tw->tw_tclass); + tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw), + tw->tw_tclass, (tw->tw_flowlabel << 12)); inet_twsk_put(tw); } @@ -904,13 +928,19 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, struct request_sock *req) { - tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, - req->rcv_wnd, tcp_time_stamp, req->ts_recent, - tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr), 0); + /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV + * sk->sk_state == TCP_SYN_RECV -> for Fast Open. + */ + tcp_v6_send_ack(skb, (sk->sk_state == TCP_LISTEN) ? + tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, + tcp_rsk(req)->rcv_nxt, + req->rcv_wnd, tcp_time_stamp, req->ts_recent, sk->sk_bound_dev_if, + tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr), + 0, 0); } -static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) +static struct sock *tcp_v6_hnd_req(struct sock *sk, struct sk_buff *skb) { struct request_sock *req, **prev; const struct tcphdr *th = tcp_hdr(skb); @@ -955,8 +985,10 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) struct tcp_sock *tp = tcp_sk(sk); __u32 isn = TCP_SKB_CB(skb)->when; struct dst_entry *dst = NULL; + struct tcp_fastopen_cookie foc = { .len = -1 }; + bool want_cookie = false, fastopen; struct flowi6 fl6; - bool want_cookie = false; + int err; if (skb->protocol == htons(ETH_P_IP)) return tcp_v4_conn_request(sk, skb); @@ -987,7 +1019,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) tcp_clear_options(&tmp_opt); tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); tmp_opt.user_mss = tp->rx_opt.user_mss; - tcp_parse_options(skb, &tmp_opt, 0, NULL); + tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc); if (want_cookie && !tmp_opt.saw_tstamp) tcp_clear_options(&tmp_opt); @@ -1002,6 +1034,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) TCP_ECN_create_request(req, skb, sock_net(sk)); ireq->ir_iif = sk->sk_bound_dev_if; + ireq->ir_mark = inet_request_mark(sk, skb); /* So that link locals have meaning */ if (!sk->sk_bound_dev_if && @@ -1011,7 +1044,8 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) if (!isn) { if (ipv6_opt_accepted(sk, skb) || np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || - np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) { + np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim || + np->repflow) { atomic_inc(&skb->users); ireq->pktopts = skb; } @@ -1059,19 +1093,27 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) isn = tcp_v6_init_sequence(skb); } have_isn: - tcp_rsk(req)->snt_isn = isn; if (security_inet_conn_request(sk, skb, req)) goto drop_and_release; - if (tcp_v6_send_synack(sk, dst, &fl6, req, - skb_get_queue_mapping(skb)) || - want_cookie) + if (!dst && (dst = inet6_csk_route_req(sk, &fl6, req)) == NULL) goto drop_and_free; + tcp_rsk(req)->snt_isn = isn; tcp_rsk(req)->snt_synack = tcp_time_stamp; - tcp_rsk(req)->listener = NULL; - inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); + tcp_openreq_init_rwin(req, sk, dst); + fastopen = !want_cookie && + tcp_try_fastopen(sk, skb, req, &foc, dst); + err = tcp_v6_send_synack(sk, dst, &fl6, req, + skb_get_queue_mapping(skb), &foc); + if (!fastopen) { + if (err || want_cookie) + goto drop_and_free; + + tcp_rsk(req)->listener = NULL; + inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); + } return 0; drop_and_release: @@ -1083,9 +1125,9 @@ drop: return 0; /* don't send reset */ } -static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, - struct request_sock *req, - struct dst_entry *dst) +static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, + struct request_sock *req, + struct dst_entry *dst) { struct inet_request_sock *ireq; struct ipv6_pinfo *newnp, *np = inet6_sk(sk); @@ -1135,7 +1177,9 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newnp->opt = NULL; newnp->mcast_oif = inet6_iif(skb); newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; - newnp->rcv_tclass = ipv6_get_dsfield(ipv6_hdr(skb)); + newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb)); + if (np->repflow) + newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb)); /* * No need to charge this sock to the relevant IPv6 refcnt debug socks count @@ -1215,7 +1259,9 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newnp->opt = NULL; newnp->mcast_oif = inet6_iif(skb); newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; - newnp->rcv_tclass = ipv6_get_dsfield(ipv6_hdr(skb)); + newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb)); + if (np->repflow) + newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb)); /* Clone native IPv6 options from listening socket (if any) @@ -1231,7 +1277,6 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen + newnp->opt->opt_flen); - tcp_mtup_init(newsk); tcp_sync_mss(newsk, dst_mtu(dst)); newtp->advmss = dst_metric_advmss(dst); if (tcp_sk(sk)->rx_opt.user_mss && @@ -1245,7 +1290,8 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, #ifdef CONFIG_TCP_MD5SIG /* Copy over the MD5 key from the original socket */ - if ((key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr)) != NULL) { + key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr); + if (key != NULL) { /* We're using one, so create a matching key * on the newsk structure. If we fail to get * memory, then we end up not copying the key @@ -1275,26 +1321,6 @@ out: return NULL; } -static __sum16 tcp_v6_checksum_init(struct sk_buff *skb) -{ - if (skb->ip_summed == CHECKSUM_COMPLETE) { - if (!tcp_v6_check(skb->len, &ipv6_hdr(skb)->saddr, - &ipv6_hdr(skb)->daddr, skb->csum)) { - skb->ip_summed = CHECKSUM_UNNECESSARY; - return 0; - } - } - - skb->csum = ~csum_unfold(tcp_v6_check(skb->len, - &ipv6_hdr(skb)->saddr, - &ipv6_hdr(skb)->daddr, 0)); - - if (skb->len <= 76) { - return __skb_checksum_complete(skb); - } - return 0; -} - /* The socket must have it's spinlock held when we get * here. * @@ -1321,7 +1347,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) return tcp_v4_do_rcv(sk, skb); #ifdef CONFIG_TCP_MD5SIG - if (tcp_v6_inbound_md5_hash (sk, skb)) + if (tcp_v6_inbound_md5_hash(sk, skb)) goto discard; #endif @@ -1380,7 +1406,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) * otherwise we just shortcircuit this and continue with * the new socket.. */ - if(nsk != sk) { + if (nsk != sk) { sock_rps_save_rxhash(nsk, skb); if (tcp_child_process(sk, nsk, skb)) goto reset; @@ -1425,8 +1451,10 @@ ipv6_pktoptions: np->mcast_oif = inet6_iif(opt_skb); if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit; - if (np->rxopt.bits.rxtclass) - np->rcv_tclass = ipv6_get_dsfield(ipv6_hdr(opt_skb)); + if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass) + np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb)); + if (np->repflow) + np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb)); if (ipv6_opt_accepted(sk, opt_skb)) { skb_set_owner_r(opt_skb, sk); opt_skb = xchg(&np->pktoptions, opt_skb); @@ -1466,7 +1494,7 @@ static int tcp_v6_rcv(struct sk_buff *skb) if (!pskb_may_pull(skb, th->doff*4)) goto discard_it; - if (!skb_csum_unnecessary(skb) && tcp_v6_checksum_init(skb)) + if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo)) goto csum_error; th = tcp_hdr(skb); @@ -1586,7 +1614,8 @@ do_time_wait: break; case TCP_TW_RST: goto no_tcp_socket; - case TCP_TW_SUCCESS:; + case TCP_TW_SUCCESS: + ; } goto discard_it; } @@ -1631,7 +1660,7 @@ static void tcp_v6_early_demux(struct sk_buff *skb) static struct timewait_sock_ops tcp6_timewait_sock_ops = { .twsk_obj_size = sizeof(struct tcp6_timewait_sock), .twsk_unique = tcp_twsk_unique, - .twsk_destructor= tcp_twsk_destructor, + .twsk_destructor = tcp_twsk_destructor, }; static const struct inet_connection_sock_af_ops ipv6_specific = { @@ -1665,7 +1694,6 @@ static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = { /* * TCP over IPv4 via INET6 API */ - static const struct inet_connection_sock_af_ops ipv6_mapped = { .queue_xmit = ip_queue_xmit, .send_check = tcp_v4_send_check, @@ -1740,7 +1768,7 @@ static void get_openreq6(struct seq_file *seq, dest->s6_addr32[2], dest->s6_addr32[3], ntohs(inet_rsk(req)->ir_rmt_port), TCP_SYN_RECV, - 0,0, /* could print option size, but that is af dependent. */ + 0, 0, /* could print option size, but that is af dependent. */ 1, /* timers active (only the expire timer) */ jiffies_to_clock_t(ttd), req->num_timeout, @@ -1759,6 +1787,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) const struct inet_sock *inet = inet_sk(sp); const struct tcp_sock *tp = tcp_sk(sp); const struct inet_connection_sock *icsk = inet_csk(sp); + struct fastopen_queue *fastopenq = icsk->icsk_accept_queue.fastopenq; dest = &sp->sk_v6_daddr; src = &sp->sk_v6_rcv_saddr; @@ -1799,9 +1828,11 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) atomic_read(&sp->sk_refcnt), sp, jiffies_to_clock_t(icsk->icsk_rto), jiffies_to_clock_t(icsk->icsk_ack.ato), - (icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong, + (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong, tp->snd_cwnd, - tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh + sp->sk_state == TCP_LISTEN ? + (fastopenq ? fastopenq->max_qlen : 0) : + (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh) ); } @@ -1961,7 +1992,6 @@ static struct inet_protosw tcpv6_protosw = { .protocol = IPPROTO_TCP, .prot = &tcpv6_prot, .ops = &inet6_stream_ops, - .no_check = 0, .flags = INET_PROTOSW_PERMANENT | INET_PROTOSW_ICSK, }; diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c index 6d18157dc32..01b0ff9a0c2 100644 --- a/net/ipv6/tcpv6_offload.c +++ b/net/ipv6/tcpv6_offload.c @@ -42,7 +42,7 @@ static struct sk_buff **tcp6_gro_receive(struct sk_buff **head, if (NAPI_GRO_CB(skb)->flush) goto skip_csum; - wsum = skb->csum; + wsum = NAPI_GRO_CB(skb)->csum; switch (skb->ip_summed) { case CHECKSUM_NONE: @@ -66,14 +66,14 @@ skip_csum: return tcp_gro_receive(head, skb); } -static int tcp6_gro_complete(struct sk_buff *skb) +static int tcp6_gro_complete(struct sk_buff *skb, int thoff) { const struct ipv6hdr *iph = ipv6_hdr(skb); struct tcphdr *th = tcp_hdr(skb); - th->check = ~tcp_v6_check(skb->len - skb_transport_offset(skb), - &iph->saddr, &iph->daddr, 0); - skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6; + th->check = ~tcp_v6_check(skb->len - thoff, &iph->saddr, + &iph->daddr, 0); + skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV6; return tcp_gro_complete(skb); } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index bcd5699313c..7092ff78fd8 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -460,9 +460,7 @@ try_again: /* Copy the address. */ if (msg->msg_name) { - struct sockaddr_in6 *sin6; - - sin6 = (struct sockaddr_in6 *) msg->msg_name; + DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name); sin6->sin6_family = AF_INET6; sin6->sin6_port = udp_hdr(skb)->source; sin6->sin6_flowinfo = 0; @@ -479,12 +477,16 @@ try_again: } *addr_len = sizeof(*sin6); } + + if (np->rxopt.all) + ip6_datagram_recv_common_ctl(sk, msg, skb); + if (is_udp4) { if (inet->cmsg_flags) ip_cmsg_recv(msg, skb); } else { if (np->rxopt.all) - ip6_datagram_recv_ctl(sk, msg, skb); + ip6_datagram_recv_specific_ctl(sk, msg, skb); } err = copied; @@ -538,8 +540,11 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (sk == NULL) return; - if (type == ICMPV6_PKT_TOOBIG) + if (type == ICMPV6_PKT_TOOBIG) { + if (!ip6_sk_accept_pmtu(sk)) + goto out; ip6_sk_update_pmtu(skb, sk, info); + } if (type == NDISC_REDIRECT) { ip6_sk_redirect(skb, sk); goto out; @@ -629,6 +634,10 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) if (skb->len > sizeof(struct udphdr) && encap_rcv != NULL) { int ret; + /* Verify checksum before giving to encap */ + if (udp_lib_checksum_complete(skb)) + goto csum_error; + ret = encap_rcv(sk, skb); if (ret <= 0) { UDP_INC_STATS_BH(sock_net(sk), @@ -665,8 +674,11 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) goto csum_error; } - if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf)) + if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf)) { + UDP6_INC_STATS_BH(sock_net(sk), + UDP_MIB_RCVBUFERRORS, is_udplite); goto drop; + } skb_dst_drop(skb); @@ -681,6 +693,7 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) bh_unlock_sock(sk); return rc; + csum_error: UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); drop: @@ -696,17 +709,16 @@ static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk, int dif) { struct hlist_nulls_node *node; - struct sock *s = sk; unsigned short num = ntohs(loc_port); - sk_nulls_for_each_from(s, node) { - struct inet_sock *inet = inet_sk(s); + sk_nulls_for_each_from(sk, node) { + struct inet_sock *inet = inet_sk(sk); - if (!net_eq(sock_net(s), net)) + if (!net_eq(sock_net(sk), net)) continue; - if (udp_sk(s)->udp_port_hash == num && - s->sk_family == PF_INET6) { + if (udp_sk(sk)->udp_port_hash == num && + sk->sk_family == PF_INET6) { if (inet->inet_dport) { if (inet->inet_dport != rmt_port) continue; @@ -715,16 +727,16 @@ static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk, !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr)) continue; - if (s->sk_bound_dev_if && s->sk_bound_dev_if != dif) + if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif) continue; if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) { if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, loc_addr)) continue; } - if (!inet6_mc_check(s, loc_addr, rmt_addr)) + if (!inet6_mc_check(sk, loc_addr, rmt_addr)) continue; - return s; + return sk; } } return NULL; @@ -755,6 +767,17 @@ static void flush_stack(struct sock **stack, unsigned int count, if (unlikely(skb1)) kfree_skb(skb1); } + +static void udp6_csum_zero_error(struct sk_buff *skb) +{ + /* RFC 2460 section 8.1 says that we SHOULD log + * this error. Well, it is reasonable. + */ + LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0 for [%pI6c]:%u->[%pI6c]:%u\n", + &ipv6_hdr(skb)->saddr, ntohs(udp_hdr(skb)->source), + &ipv6_hdr(skb)->daddr, ntohs(udp_hdr(skb)->dest)); +} + /* * Note: called only from the BH handler context, * so we don't need to lock the hashes. @@ -774,7 +797,12 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb, dif = inet6_iif(skb); sk = udp_v6_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif); while (sk) { - stack[count++] = sk; + /* If zero checksum and no_check is not on for + * the socket then skip it. + */ + if (uh->check || udp_sk(sk)->no_check6_rx) + stack[count++] = sk; + sk = udp_v6_mcast_next(net, sk_nulls_next(sk), uh->dest, daddr, uh->source, saddr, dif); if (unlikely(count == ARRAY_SIZE(stack))) { @@ -862,6 +890,12 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, if (sk != NULL) { int ret; + if (!uh->check && !udp_sk(sk)->no_check6_rx) { + sock_put(sk); + udp6_csum_zero_error(skb); + goto csum_error; + } + ret = udpv6_queue_rcv_skb(sk, skb); sock_put(sk); @@ -874,6 +908,11 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, return 0; } + if (!uh->check) { + udp6_csum_zero_error(skb); + goto csum_error; + } + if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) goto discard; @@ -1001,7 +1040,10 @@ static int udp_v6_push_pending_frames(struct sock *sk) if (is_udplite) csum = udplite_csum_outgoing(sk, skb); - else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ + else if (up->no_check6_tx) { /* UDP csum disabled */ + skb->ip_summed = CHECKSUM_NONE; + goto send; + } else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ udp6_hwcsum_outgoing(sk, skb, &fl6->saddr, &fl6->daddr, up->len); goto send; @@ -1038,7 +1080,7 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct udp_sock *up = udp_sk(sk); struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); - struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) msg->msg_name; + DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name); struct in6_addr *daddr, *final_p, final; struct ipv6_txoptions *opt = NULL; struct ip6_flowlabel *flowlabel = NULL; @@ -1140,7 +1182,6 @@ do_udp_sendmsg: flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); if (flowlabel == NULL) return -EINVAL; - daddr = &flowlabel->dst; } } @@ -1221,21 +1262,15 @@ do_udp_sendmsg: security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); - dst = ip6_sk_dst_lookup_flow(sk, &fl6, final_p, true); + dst = ip6_sk_dst_lookup_flow(sk, &fl6, final_p); if (IS_ERR(dst)) { err = PTR_ERR(dst); dst = NULL; goto out; } - if (hlimit < 0) { - if (ipv6_addr_is_multicast(&fl6.daddr)) - hlimit = np->mcast_hops; - else - hlimit = np->hop_limit; - if (hlimit < 0) - hlimit = ip6_dst_hoplimit(dst); - } + if (hlimit < 0) + hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst); if (tclass < 0) tclass = np->tclass; @@ -1475,7 +1510,6 @@ static struct inet_protosw udpv6_protosw = { .protocol = IPPROTO_UDP, .prot = &udpv6_prot, .ops = &inet6_dgram_ops, - .no_check = UDP_CSUM_DEFAULT, .flags = INET_PROTOSW_PERMANENT, }; diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index e7359f9eaa8..0ae3d98f83e 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -63,7 +63,9 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY | SKB_GSO_UDP_TUNNEL | + SKB_GSO_UDP_TUNNEL_CSUM | SKB_GSO_GRE | + SKB_GSO_GRE_CSUM | SKB_GSO_IPIP | SKB_GSO_SIT | SKB_GSO_MPLS) || @@ -76,7 +78,8 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, goto out; } - if (skb->encapsulation && skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL) + if (skb->encapsulation && skb_shinfo(skb)->gso_type & + (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM)) segs = skb_udp_tunnel_segment(skb, features); else { /* Do software UFO. Complete and fill in the UDP checksum as HW cannot @@ -113,7 +116,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen); fptr->nexthdr = nexthdr; fptr->reserved = 0; - ipv6_select_ident(fptr, (struct rt6_info *)skb_dst(skb)); + fptr->identification = skb_shinfo(skb)->ip6_frag_id; /* Fragment the skb. ipv6 header and the remaining fields of the * fragment header are updated in ipv6_gso_segment() diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index dfcc4be4689..9cf097e206e 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -64,7 +64,6 @@ static struct inet_protosw udplite6_protosw = { .protocol = IPPROTO_UDPLITE, .prot = &udplitev6_prot, .ops = &inet6_dgram_ops, - .no_check = 0, .flags = INET_PROTOSW_PERMANENT, }; diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c index cb04f7a16b5..901ef6f8add 100644 --- a/net/ipv6/xfrm6_mode_tunnel.c +++ b/net/ipv6/xfrm6_mode_tunnel.c @@ -18,65 +18,6 @@ #include <net/ipv6.h> #include <net/xfrm.h> -/* Informational hook. The decap is still done here. */ -static struct xfrm_tunnel_notifier __rcu *rcv_notify_handlers __read_mostly; -static DEFINE_MUTEX(xfrm6_mode_tunnel_input_mutex); - -int xfrm6_mode_tunnel_input_register(struct xfrm_tunnel_notifier *handler) -{ - struct xfrm_tunnel_notifier __rcu **pprev; - struct xfrm_tunnel_notifier *t; - int ret = -EEXIST; - int priority = handler->priority; - - mutex_lock(&xfrm6_mode_tunnel_input_mutex); - - for (pprev = &rcv_notify_handlers; - (t = rcu_dereference_protected(*pprev, - lockdep_is_held(&xfrm6_mode_tunnel_input_mutex))) != NULL; - pprev = &t->next) { - if (t->priority > priority) - break; - if (t->priority == priority) - goto err; - - } - - handler->next = *pprev; - rcu_assign_pointer(*pprev, handler); - - ret = 0; - -err: - mutex_unlock(&xfrm6_mode_tunnel_input_mutex); - return ret; -} -EXPORT_SYMBOL_GPL(xfrm6_mode_tunnel_input_register); - -int xfrm6_mode_tunnel_input_deregister(struct xfrm_tunnel_notifier *handler) -{ - struct xfrm_tunnel_notifier __rcu **pprev; - struct xfrm_tunnel_notifier *t; - int ret = -ENOENT; - - mutex_lock(&xfrm6_mode_tunnel_input_mutex); - for (pprev = &rcv_notify_handlers; - (t = rcu_dereference_protected(*pprev, - lockdep_is_held(&xfrm6_mode_tunnel_input_mutex))) != NULL; - pprev = &t->next) { - if (t == handler) { - *pprev = handler->next; - ret = 0; - break; - } - } - mutex_unlock(&xfrm6_mode_tunnel_input_mutex); - synchronize_net(); - - return ret; -} -EXPORT_SYMBOL_GPL(xfrm6_mode_tunnel_input_deregister); - static inline void ipip6_ecn_decapsulate(struct sk_buff *skb) { const struct ipv6hdr *outer_iph = ipv6_hdr(skb); @@ -130,7 +71,6 @@ static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) { - struct xfrm_tunnel_notifier *handler; int err = -EINVAL; if (XFRM_MODE_SKB_CB(skb)->protocol != IPPROTO_IPV6) @@ -138,9 +78,6 @@ static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) goto out; - for_each_input_rcu(rcv_notify_handlers, handler) - handler->handler(skb); - err = skb_unclone(skb, GFP_ATOMIC); if (err) goto out; diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 6cd625e3770..433672d07d0 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -78,7 +78,7 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb) if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; - if (!skb->local_df && skb->len > mtu) { + if (!skb->ignore_df && skb->len > mtu) { skb->dev = dst->dev; if (xfrm6_local_dontfrag(skb)) @@ -114,13 +114,7 @@ int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb) if (err) return err; - memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); -#ifdef CONFIG_NETFILTER - IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED; -#endif - - skb->protocol = htons(ETH_P_IPV6); - skb->local_df = 1; + skb->ignore_df = 1; return x->outer_mode->output2(x, skb); } @@ -128,11 +122,13 @@ EXPORT_SYMBOL(xfrm6_prepare_output); int xfrm6_output_finish(struct sk_buff *skb) { + memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); + skb->protocol = htons(ETH_P_IPV6); + #ifdef CONFIG_NETFILTER IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED; #endif - skb->protocol = htons(ETH_P_IPV6); return xfrm_output(skb); } @@ -142,6 +138,13 @@ static int __xfrm6_output(struct sk_buff *skb) struct xfrm_state *x = dst->xfrm; int mtu; +#ifdef CONFIG_NETFILTER + if (!x) { + IP6CB(skb)->flags |= IP6SKB_REROUTED; + return dst_output(skb); + } +#endif + if (skb->protocol == htons(ETH_P_IPV6)) mtu = ip6_skb_dst_mtu(skb); else @@ -150,7 +153,7 @@ static int __xfrm6_output(struct sk_buff *skb) if (skb->len > mtu && xfrm6_local_dontfrag(skb)) { xfrm6_local_rxpmtu(skb, mtu); return -EMSGSIZE; - } else if (!skb->local_df && skb->len > mtu && skb->sk) { + } else if (!skb->ignore_df && skb->len > mtu && skb->sk) { xfrm_local_error(skb, mtu); return -EMSGSIZE; } @@ -163,8 +166,9 @@ static int __xfrm6_output(struct sk_buff *skb) return x->outer_mode->afinfo->output_finish(skb); } -int xfrm6_output(struct sk_buff *skb) +int xfrm6_output(struct sock *sk, struct sk_buff *skb) { - return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, - skb_dst(skb)->dev, __xfrm6_output); + return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, + NULL, skb_dst(skb)->dev, __xfrm6_output, + !(IP6CB(skb)->flags & IP6SKB_REROUTED)); } diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 5f8e128c512..2a0bbda2c76 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -389,11 +389,17 @@ int __init xfrm6_init(void) if (ret) goto out_policy; + ret = xfrm6_protocol_init(); + if (ret) + goto out_state; + #ifdef CONFIG_SYSCTL register_pernet_subsys(&xfrm6_net_ops); #endif out: return ret; +out_state: + xfrm6_state_fini(); out_policy: xfrm6_policy_fini(); goto out; @@ -404,6 +410,7 @@ void xfrm6_fini(void) #ifdef CONFIG_SYSCTL unregister_pernet_subsys(&xfrm6_net_ops); #endif + xfrm6_protocol_fini(); xfrm6_policy_fini(); xfrm6_state_fini(); dst_entries_destroy(&xfrm6_dst_ops); diff --git a/net/ipv6/xfrm6_protocol.c b/net/ipv6/xfrm6_protocol.c new file mode 100644 index 00000000000..54d13f8dbba --- /dev/null +++ b/net/ipv6/xfrm6_protocol.c @@ -0,0 +1,279 @@ +/* xfrm6_protocol.c - Generic xfrm protocol multiplexer for ipv6. + * + * Copyright (C) 2013 secunet Security Networks AG + * + * Author: + * Steffen Klassert <steffen.klassert@secunet.com> + * + * Based on: + * net/ipv4/xfrm4_protocol.c + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/init.h> +#include <linux/mutex.h> +#include <linux/skbuff.h> +#include <linux/icmpv6.h> +#include <net/ipv6.h> +#include <net/protocol.h> +#include <net/xfrm.h> + +static struct xfrm6_protocol __rcu *esp6_handlers __read_mostly; +static struct xfrm6_protocol __rcu *ah6_handlers __read_mostly; +static struct xfrm6_protocol __rcu *ipcomp6_handlers __read_mostly; +static DEFINE_MUTEX(xfrm6_protocol_mutex); + +static inline struct xfrm6_protocol __rcu **proto_handlers(u8 protocol) +{ + switch (protocol) { + case IPPROTO_ESP: + return &esp6_handlers; + case IPPROTO_AH: + return &ah6_handlers; + case IPPROTO_COMP: + return &ipcomp6_handlers; + } + + return NULL; +} + +#define for_each_protocol_rcu(head, handler) \ + for (handler = rcu_dereference(head); \ + handler != NULL; \ + handler = rcu_dereference(handler->next)) \ + +int xfrm6_rcv_cb(struct sk_buff *skb, u8 protocol, int err) +{ + int ret; + struct xfrm6_protocol *handler; + struct xfrm6_protocol __rcu **head = proto_handlers(protocol); + + if (!head) + return 0; + + for_each_protocol_rcu(*proto_handlers(protocol), handler) + if ((ret = handler->cb_handler(skb, err)) <= 0) + return ret; + + return 0; +} +EXPORT_SYMBOL(xfrm6_rcv_cb); + +static int xfrm6_esp_rcv(struct sk_buff *skb) +{ + int ret; + struct xfrm6_protocol *handler; + + XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL; + + for_each_protocol_rcu(esp6_handlers, handler) + if ((ret = handler->handler(skb)) != -EINVAL) + return ret; + + icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); + + kfree_skb(skb); + return 0; +} + +static void xfrm6_esp_err(struct sk_buff *skb, struct inet6_skb_parm *opt, + u8 type, u8 code, int offset, __be32 info) +{ + struct xfrm6_protocol *handler; + + for_each_protocol_rcu(esp6_handlers, handler) + if (!handler->err_handler(skb, opt, type, code, offset, info)) + break; +} + +static int xfrm6_ah_rcv(struct sk_buff *skb) +{ + int ret; + struct xfrm6_protocol *handler; + + XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL; + + for_each_protocol_rcu(ah6_handlers, handler) + if ((ret = handler->handler(skb)) != -EINVAL) + return ret; + + icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); + + kfree_skb(skb); + return 0; +} + +static void xfrm6_ah_err(struct sk_buff *skb, struct inet6_skb_parm *opt, + u8 type, u8 code, int offset, __be32 info) +{ + struct xfrm6_protocol *handler; + + for_each_protocol_rcu(ah6_handlers, handler) + if (!handler->err_handler(skb, opt, type, code, offset, info)) + break; +} + +static int xfrm6_ipcomp_rcv(struct sk_buff *skb) +{ + int ret; + struct xfrm6_protocol *handler; + + XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL; + + for_each_protocol_rcu(ipcomp6_handlers, handler) + if ((ret = handler->handler(skb)) != -EINVAL) + return ret; + + icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); + + kfree_skb(skb); + return 0; +} + +static void xfrm6_ipcomp_err(struct sk_buff *skb, struct inet6_skb_parm *opt, + u8 type, u8 code, int offset, __be32 info) +{ + struct xfrm6_protocol *handler; + + for_each_protocol_rcu(ipcomp6_handlers, handler) + if (!handler->err_handler(skb, opt, type, code, offset, info)) + break; +} + +static const struct inet6_protocol esp6_protocol = { + .handler = xfrm6_esp_rcv, + .err_handler = xfrm6_esp_err, + .flags = INET6_PROTO_NOPOLICY, +}; + +static const struct inet6_protocol ah6_protocol = { + .handler = xfrm6_ah_rcv, + .err_handler = xfrm6_ah_err, + .flags = INET6_PROTO_NOPOLICY, +}; + +static const struct inet6_protocol ipcomp6_protocol = { + .handler = xfrm6_ipcomp_rcv, + .err_handler = xfrm6_ipcomp_err, + .flags = INET6_PROTO_NOPOLICY, +}; + +static struct xfrm_input_afinfo xfrm6_input_afinfo = { + .family = AF_INET6, + .owner = THIS_MODULE, + .callback = xfrm6_rcv_cb, +}; + +static inline const struct inet6_protocol *netproto(unsigned char protocol) +{ + switch (protocol) { + case IPPROTO_ESP: + return &esp6_protocol; + case IPPROTO_AH: + return &ah6_protocol; + case IPPROTO_COMP: + return &ipcomp6_protocol; + } + + return NULL; +} + +int xfrm6_protocol_register(struct xfrm6_protocol *handler, + unsigned char protocol) +{ + struct xfrm6_protocol __rcu **pprev; + struct xfrm6_protocol *t; + bool add_netproto = false; + int ret = -EEXIST; + int priority = handler->priority; + + if (!proto_handlers(protocol) || !netproto(protocol)) + return -EINVAL; + + mutex_lock(&xfrm6_protocol_mutex); + + if (!rcu_dereference_protected(*proto_handlers(protocol), + lockdep_is_held(&xfrm6_protocol_mutex))) + add_netproto = true; + + for (pprev = proto_handlers(protocol); + (t = rcu_dereference_protected(*pprev, + lockdep_is_held(&xfrm6_protocol_mutex))) != NULL; + pprev = &t->next) { + if (t->priority < priority) + break; + if (t->priority == priority) + goto err; + } + + handler->next = *pprev; + rcu_assign_pointer(*pprev, handler); + + ret = 0; + +err: + mutex_unlock(&xfrm6_protocol_mutex); + + if (add_netproto) { + if (inet6_add_protocol(netproto(protocol), protocol)) { + pr_err("%s: can't add protocol\n", __func__); + ret = -EAGAIN; + } + } + + return ret; +} +EXPORT_SYMBOL(xfrm6_protocol_register); + +int xfrm6_protocol_deregister(struct xfrm6_protocol *handler, + unsigned char protocol) +{ + struct xfrm6_protocol __rcu **pprev; + struct xfrm6_protocol *t; + int ret = -ENOENT; + + if (!proto_handlers(protocol) || !netproto(protocol)) + return -EINVAL; + + mutex_lock(&xfrm6_protocol_mutex); + + for (pprev = proto_handlers(protocol); + (t = rcu_dereference_protected(*pprev, + lockdep_is_held(&xfrm6_protocol_mutex))) != NULL; + pprev = &t->next) { + if (t == handler) { + *pprev = handler->next; + ret = 0; + break; + } + } + + if (!rcu_dereference_protected(*proto_handlers(protocol), + lockdep_is_held(&xfrm6_protocol_mutex))) { + if (inet6_del_protocol(netproto(protocol), protocol) < 0) { + pr_err("%s: can't remove protocol\n", __func__); + ret = -EAGAIN; + } + } + + mutex_unlock(&xfrm6_protocol_mutex); + + synchronize_net(); + + return ret; +} +EXPORT_SYMBOL(xfrm6_protocol_deregister); + +int __init xfrm6_protocol_init(void) +{ + return xfrm_input_register_afinfo(&xfrm6_input_afinfo); +} + +void xfrm6_protocol_fini(void) +{ + xfrm_input_unregister_afinfo(&xfrm6_input_afinfo); +} |
