diff options
Diffstat (limited to 'net/ipv6')
73 files changed, 4610 insertions, 2010 deletions
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 11b13ea69db..438a73aa777 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -21,24 +21,6 @@ menuconfig IPV6  if IPV6 -config IPV6_PRIVACY -	bool "IPv6: Privacy Extensions (RFC 3041) support" -	---help--- -	  Privacy Extensions for Stateless Address Autoconfiguration in IPv6 -	  support.  With this option, additional periodically-altered -	  pseudo-random global-scope unicast address(es) will be assigned to -	  your interface(s). -	 -	  We use our standard pseudo-random algorithm to generate the -          randomized interface identifier, instead of one described in RFC 3041. - -	  By default the kernel does not generate temporary addresses. -	  To use temporary addresses, do -	 -	        echo 2 >/proc/sys/net/ipv6/conf/all/use_tempaddr  - -	  See <file:Documentation/networking/ip-sysctl.txt> for details. -  config IPV6_ROUTER_PREF  	bool "IPv6: Router Preference (RFC 4191) support"  	---help--- @@ -153,6 +135,18 @@ config INET6_XFRM_MODE_ROUTEOPTIMIZATION  	---help---  	  Support for MIPv6 route optimization mode. +config IPV6_VTI +tristate "Virtual (secure) IPv6: tunneling" +	select IPV6_TUNNEL +	select NET_IP_TUNNEL +	depends on INET6_XFRM_MODE_TUNNEL +	---help--- +	Tunneling means encapsulating data of one protocol type within +	another protocol and sending it over a channel that understands the +	encapsulating protocol. This can be used with xfrm mode tunnel to give +	the notion of a secure tunnel for IPSEC and then use routing protocol +	on top. +  config IPV6_SIT  	tristate "IPv6: IPv6-in-IPv4 tunnel (SIT driver)"  	select INET_TUNNEL diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 470a9c008e9..2fe68364bb2 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -16,7 +16,7 @@ ipv6-$(CONFIG_SYSCTL) = sysctl_net_ipv6.o  ipv6-$(CONFIG_IPV6_MROUTE) += ip6mr.o  ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \ -	xfrm6_output.o +	xfrm6_output.o xfrm6_protocol.o  ipv6-$(CONFIG_NETFILTER) += netfilter.o  ipv6-$(CONFIG_IPV6_MULTIPLE_TABLES) += fib6_rules.o  ipv6-$(CONFIG_PROC_FS) += proc.o @@ -36,6 +36,7 @@ obj-$(CONFIG_INET6_XFRM_MODE_BEET) += xfrm6_mode_beet.o  obj-$(CONFIG_IPV6_MIP6) += mip6.o  obj-$(CONFIG_NETFILTER)	+= netfilter/ +obj-$(CONFIG_IPV6_VTI) += ip6_vti.o  obj-$(CONFIG_IPV6_SIT) += sit.o  obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o  obj-$(CONFIG_IPV6_GRE) += ip6_gre.o diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index cd3fb301da3..5667b3003af 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -83,11 +83,7 @@  #include <linux/if_tunnel.h>  #include <linux/rtnetlink.h>  #include <linux/netconf.h> - -#ifdef CONFIG_IPV6_PRIVACY  #include <linux/random.h> -#endif -  #include <linux/uaccess.h>  #include <asm/unaligned.h> @@ -124,11 +120,9 @@ static inline void addrconf_sysctl_unregister(struct inet6_dev *idev)  }  #endif -#ifdef CONFIG_IPV6_PRIVACY  static void __ipv6_regen_rndid(struct inet6_dev *idev);  static void __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr);  static void ipv6_regen_rndid(unsigned long data); -#endif  static int ipv6_generate_eui64(u8 *eui, struct net_device *dev);  static int ipv6_count_addresses(struct inet6_dev *idev); @@ -139,10 +133,12 @@ static int ipv6_count_addresses(struct inet6_dev *idev);  static struct hlist_head inet6_addr_lst[IN6_ADDR_HSIZE];  static DEFINE_SPINLOCK(addrconf_hash_lock); -static void addrconf_verify(unsigned long); +static void addrconf_verify(void); +static void addrconf_verify_rtnl(void); +static void addrconf_verify_work(struct work_struct *); -static DEFINE_TIMER(addr_chk_timer, addrconf_verify, 0, 0); -static DEFINE_SPINLOCK(addrconf_verify_lock); +static struct workqueue_struct *addrconf_wq; +static DECLARE_DELAYED_WORK(addr_chk_work, addrconf_verify_work);  static void addrconf_join_anycast(struct inet6_ifaddr *ifp);  static void addrconf_leave_anycast(struct inet6_ifaddr *ifp); @@ -157,7 +153,7 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,  						  u32 flags, u32 noflags);  static void addrconf_dad_start(struct inet6_ifaddr *ifp); -static void addrconf_dad_timer(unsigned long data); +static void addrconf_dad_work(struct work_struct *w);  static void addrconf_dad_completed(struct inet6_ifaddr *ifp);  static void addrconf_dad_run(struct inet6_dev *idev);  static void addrconf_rs_timer(unsigned long data); @@ -183,13 +179,11 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {  	.rtr_solicits		= MAX_RTR_SOLICITATIONS,  	.rtr_solicit_interval	= RTR_SOLICITATION_INTERVAL,  	.rtr_solicit_delay	= MAX_RTR_SOLICITATION_DELAY, -#ifdef CONFIG_IPV6_PRIVACY  	.use_tempaddr 		= 0,  	.temp_valid_lft		= TEMP_VALID_LIFETIME,  	.temp_prefered_lft	= TEMP_PREFERRED_LIFETIME,  	.regen_max_retry	= REGEN_MAX_RETRY,  	.max_desync_factor	= MAX_DESYNC_FACTOR, -#endif  	.max_addresses		= IPV6_MAX_ADDRESSES,  	.accept_ra_defrtr	= 1,  	.accept_ra_pinfo	= 1, @@ -221,13 +215,11 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {  	.rtr_solicits		= MAX_RTR_SOLICITATIONS,  	.rtr_solicit_interval	= RTR_SOLICITATION_INTERVAL,  	.rtr_solicit_delay	= MAX_RTR_SOLICITATION_DELAY, -#ifdef CONFIG_IPV6_PRIVACY  	.use_tempaddr		= 0,  	.temp_valid_lft		= TEMP_VALID_LIFETIME,  	.temp_prefered_lft	= TEMP_PREFERRED_LIFETIME,  	.regen_max_retry	= REGEN_MAX_RETRY,  	.max_desync_factor	= MAX_DESYNC_FACTOR, -#endif  	.max_addresses		= IPV6_MAX_ADDRESSES,  	.accept_ra_defrtr	= 1,  	.accept_ra_pinfo	= 1, @@ -257,9 +249,9 @@ static void addrconf_del_rs_timer(struct inet6_dev *idev)  		__in6_dev_put(idev);  } -static void addrconf_del_dad_timer(struct inet6_ifaddr *ifp) +static void addrconf_del_dad_work(struct inet6_ifaddr *ifp)  { -	if (del_timer(&ifp->dad_timer)) +	if (cancel_delayed_work(&ifp->dad_work))  		__in6_ifa_put(ifp);  } @@ -271,20 +263,29 @@ static void addrconf_mod_rs_timer(struct inet6_dev *idev,  	mod_timer(&idev->rs_timer, jiffies + when);  } -static void addrconf_mod_dad_timer(struct inet6_ifaddr *ifp, -				   unsigned long when) +static void addrconf_mod_dad_work(struct inet6_ifaddr *ifp, +				   unsigned long delay)  { -	if (!timer_pending(&ifp->dad_timer)) +	if (!delayed_work_pending(&ifp->dad_work))  		in6_ifa_hold(ifp); -	mod_timer(&ifp->dad_timer, jiffies + when); +	mod_delayed_work(addrconf_wq, &ifp->dad_work, delay);  }  static int snmp6_alloc_dev(struct inet6_dev *idev)  { -	if (snmp_mib_init((void __percpu **)idev->stats.ipv6, -			  sizeof(struct ipstats_mib), -			  __alignof__(struct ipstats_mib)) < 0) +	int i; + +	idev->stats.ipv6 = alloc_percpu(struct ipstats_mib); +	if (!idev->stats.ipv6)  		goto err_ip; + +	for_each_possible_cpu(i) { +		struct ipstats_mib *addrconf_stats; +		addrconf_stats = per_cpu_ptr(idev->stats.ipv6, i); +		u64_stats_init(&addrconf_stats->syncp); +	} + +  	idev->stats.icmpv6dev = kzalloc(sizeof(struct icmpv6_mib_device),  					GFP_KERNEL);  	if (!idev->stats.icmpv6dev) @@ -299,7 +300,7 @@ static int snmp6_alloc_dev(struct inet6_dev *idev)  err_icmpmsg:  	kfree(idev->stats.icmpv6dev);  err_icmp: -	snmp_mib_free((void __percpu **)idev->stats.ipv6); +	free_percpu(idev->stats.ipv6);  err_ip:  	return -ENOMEM;  } @@ -371,7 +372,6 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)  	}  #endif -#ifdef CONFIG_IPV6_PRIVACY  	INIT_LIST_HEAD(&ndev->tempaddr_list);  	setup_timer(&ndev->regen_timer, ipv6_regen_rndid, (unsigned long)ndev);  	if ((dev->flags&IFF_LOOPBACK) || @@ -384,7 +384,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)  		in6_dev_hold(ndev);  		ipv6_regen_rndid((unsigned long) ndev);  	} -#endif +  	ndev->token = in6addr_any;  	if (netif_running(dev) && addrconf_qdisc_ok(dev)) @@ -439,6 +439,8 @@ static int inet6_netconf_msgsize_devconf(int type)  	if (type == -1 || type == NETCONFA_MC_FORWARDING)  		size += nla_total_size(4);  #endif +	if (type == -1 || type == NETCONFA_PROXY_NEIGH) +		size += nla_total_size(4);  	return size;  } @@ -472,6 +474,10 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex,  			devconf->mc_forwarding) < 0)  		goto nla_put_failure;  #endif +	if ((type == -1 || type == NETCONFA_PROXY_NEIGH) && +	    nla_put_s32(skb, NETCONFA_PROXY_NEIGH, devconf->proxy_ndp) < 0) +		goto nla_put_failure; +  	return nlmsg_end(skb, nlh);  nla_put_failure: @@ -506,6 +512,7 @@ errout:  static const struct nla_policy devconf_ipv6_policy[NETCONFA_MAX+1] = {  	[NETCONFA_IFINDEX]	= { .len = sizeof(int) },  	[NETCONFA_FORWARDING]	= { .len = sizeof(int) }, +	[NETCONFA_PROXY_NEIGH]	= { .len = sizeof(int) },  };  static int inet6_netconf_get_devconf(struct sk_buff *in_skb, @@ -741,8 +748,9 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)  	in6_dev_put(ifp->idev); -	if (del_timer(&ifp->dad_timer)) -		pr_notice("Timer is still running, when freeing ifa=%p\n", ifp); +	if (cancel_delayed_work(&ifp->dad_work)) +		pr_notice("delayed DAD work was pending while freeing ifa=%p\n", +			  ifp);  	if (ifp->state != INET6_IFADDR_STATE_DEAD) {  		pr_warn("Freeing alive inet6 address %p\n", ifp); @@ -831,14 +839,15 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,  		goto out;  	} +	neigh_parms_data_state_setall(idev->nd_parms); +  	ifa->addr = *addr;  	if (peer_addr)  		ifa->peer_addr = *peer_addr;  	spin_lock_init(&ifa->lock);  	spin_lock_init(&ifa->state_lock); -	setup_timer(&ifa->dad_timer, addrconf_dad_timer, -		    (unsigned long)ifa); +	INIT_DELAYED_WORK(&ifa->dad_work, addrconf_dad_work);  	INIT_HLIST_NODE(&ifa->addr_lst);  	ifa->scope = scope;  	ifa->prefix_len = pfxlen; @@ -865,12 +874,10 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr,  	/* Add to inet6_dev unicast addr list. */  	ipv6_link_dev_addr(idev, ifa); -#ifdef CONFIG_IPV6_PRIVACY  	if (ifa->flags&IFA_F_TEMPORARY) {  		list_add(&ifa->tmp_list, &idev->tempaddr_list);  		in6_ifa_hold(ifa);  	} -#endif  	in6_ifa_hold(ifa);  	write_unlock(&idev->lock); @@ -890,15 +897,97 @@ out:  	goto out2;  } +enum cleanup_prefix_rt_t { +	CLEANUP_PREFIX_RT_NOP,    /* no cleanup action for prefix route */ +	CLEANUP_PREFIX_RT_DEL,    /* delete the prefix route */ +	CLEANUP_PREFIX_RT_EXPIRE, /* update the lifetime of the prefix route */ +}; + +/* + * Check, whether the prefix for ifp would still need a prefix route + * after deleting ifp. The function returns one of the CLEANUP_PREFIX_RT_* + * constants. + * + * 1) we don't purge prefix if address was not permanent. + *    prefix is managed by its own lifetime. + * 2) we also don't purge, if the address was IFA_F_NOPREFIXROUTE. + * 3) if there are no addresses, delete prefix. + * 4) if there are still other permanent address(es), + *    corresponding prefix is still permanent. + * 5) if there are still other addresses with IFA_F_NOPREFIXROUTE, + *    don't purge the prefix, assume user space is managing it. + * 6) otherwise, update prefix lifetime to the + *    longest valid lifetime among the corresponding + *    addresses on the device. + *    Note: subsequent RA will update lifetime. + **/ +static enum cleanup_prefix_rt_t +check_cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long *expires) +{ +	struct inet6_ifaddr *ifa; +	struct inet6_dev *idev = ifp->idev; +	unsigned long lifetime; +	enum cleanup_prefix_rt_t action = CLEANUP_PREFIX_RT_DEL; + +	*expires = jiffies; + +	list_for_each_entry(ifa, &idev->addr_list, if_list) { +		if (ifa == ifp) +			continue; +		if (!ipv6_prefix_equal(&ifa->addr, &ifp->addr, +				       ifp->prefix_len)) +			continue; +		if (ifa->flags & (IFA_F_PERMANENT | IFA_F_NOPREFIXROUTE)) +			return CLEANUP_PREFIX_RT_NOP; + +		action = CLEANUP_PREFIX_RT_EXPIRE; + +		spin_lock(&ifa->lock); + +		lifetime = addrconf_timeout_fixup(ifa->valid_lft, HZ); +		/* +		 * Note: Because this address is +		 * not permanent, lifetime < +		 * LONG_MAX / HZ here. +		 */ +		if (time_before(*expires, ifa->tstamp + lifetime * HZ)) +			*expires = ifa->tstamp + lifetime * HZ; +		spin_unlock(&ifa->lock); +	} + +	return action; +} + +static void +cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires, bool del_rt) +{ +	struct rt6_info *rt; + +	rt = addrconf_get_prefix_route(&ifp->addr, +				       ifp->prefix_len, +				       ifp->idev->dev, +				       0, RTF_GATEWAY | RTF_DEFAULT); +	if (rt) { +		if (del_rt) +			ip6_del_rt(rt); +		else { +			if (!(rt->rt6i_flags & RTF_EXPIRES)) +				rt6_set_expires(rt, expires); +			ip6_rt_put(rt); +		} +	} +} + +  /* This function wants to get referenced ifp and releases it before return */  static void ipv6_del_addr(struct inet6_ifaddr *ifp)  { -	struct inet6_ifaddr *ifa, *ifn; -	struct inet6_dev *idev = ifp->idev;  	int state; -	int deleted = 0, onlink = 0; -	unsigned long expires = jiffies; +	enum cleanup_prefix_rt_t action = CLEANUP_PREFIX_RT_NOP; +	unsigned long expires; + +	ASSERT_RTNL();  	spin_lock_bh(&ifp->state_lock);  	state = ifp->state; @@ -912,8 +1001,8 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)  	hlist_del_init_rcu(&ifp->addr_lst);  	spin_unlock_bh(&addrconf_hash_lock); -	write_lock_bh(&idev->lock); -#ifdef CONFIG_IPV6_PRIVACY +	write_lock_bh(&ifp->idev->lock); +  	if (ifp->flags&IFA_F_TEMPORARY) {  		list_del(&ifp->tmp_list);  		if (ifp->ifpub) { @@ -922,89 +1011,24 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)  		}  		__in6_ifa_put(ifp);  	} -#endif -	list_for_each_entry_safe(ifa, ifn, &idev->addr_list, if_list) { -		if (ifa == ifp) { -			list_del_init(&ifp->if_list); -			__in6_ifa_put(ifp); +	if (ifp->flags & IFA_F_PERMANENT && !(ifp->flags & IFA_F_NOPREFIXROUTE)) +		action = check_cleanup_prefix_route(ifp, &expires); -			if (!(ifp->flags & IFA_F_PERMANENT) || onlink > 0) -				break; -			deleted = 1; -			continue; -		} else if (ifp->flags & IFA_F_PERMANENT) { -			if (ipv6_prefix_equal(&ifa->addr, &ifp->addr, -					      ifp->prefix_len)) { -				if (ifa->flags & IFA_F_PERMANENT) { -					onlink = 1; -					if (deleted) -						break; -				} else { -					unsigned long lifetime; - -					if (!onlink) -						onlink = -1; - -					spin_lock(&ifa->lock); - -					lifetime = addrconf_timeout_fixup(ifa->valid_lft, HZ); -					/* -					 * Note: Because this address is -					 * not permanent, lifetime < -					 * LONG_MAX / HZ here. -					 */ -					if (time_before(expires, -							ifa->tstamp + lifetime * HZ)) -						expires = ifa->tstamp + lifetime * HZ; -					spin_unlock(&ifa->lock); -				} -			} -		} -	} -	write_unlock_bh(&idev->lock); +	list_del_init(&ifp->if_list); +	__in6_ifa_put(ifp); -	addrconf_del_dad_timer(ifp); +	write_unlock_bh(&ifp->idev->lock); + +	addrconf_del_dad_work(ifp);  	ipv6_ifa_notify(RTM_DELADDR, ifp);  	inet6addr_notifier_call_chain(NETDEV_DOWN, ifp); -	/* -	 * Purge or update corresponding prefix -	 * -	 * 1) we don't purge prefix here if address was not permanent. -	 *    prefix is managed by its own lifetime. -	 * 2) if there're no addresses, delete prefix. -	 * 3) if there're still other permanent address(es), -	 *    corresponding prefix is still permanent. -	 * 4) otherwise, update prefix lifetime to the -	 *    longest valid lifetime among the corresponding -	 *    addresses on the device. -	 *    Note: subsequent RA will update lifetime. -	 * -	 * --yoshfuji -	 */ -	if ((ifp->flags & IFA_F_PERMANENT) && onlink < 1) { -		struct in6_addr prefix; -		struct rt6_info *rt; - -		ipv6_addr_prefix(&prefix, &ifp->addr, ifp->prefix_len); - -		rt = addrconf_get_prefix_route(&prefix, -					       ifp->prefix_len, -					       ifp->idev->dev, -					       0, RTF_GATEWAY | RTF_DEFAULT); - -		if (rt) { -			if (onlink == 0) { -				ip6_del_rt(rt); -				rt = NULL; -			} else if (!(rt->rt6i_flags & RTF_EXPIRES)) { -				rt6_set_expires(rt, expires); -			} -		} -		ip6_rt_put(rt); +	if (action != CLEANUP_PREFIX_RT_NOP) { +		cleanup_prefix_route(ifp, expires, +			action == CLEANUP_PREFIX_RT_DEL);  	}  	/* clean up prefsrc entries */ @@ -1013,7 +1037,6 @@ out:  	in6_ifa_put(ifp);  } -#ifdef CONFIG_IPV6_PRIVACY  static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *ift)  {  	struct inet6_dev *idev = ifp->idev; @@ -1025,7 +1048,7 @@ static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *i  	u32 addr_flags;  	unsigned long now = jiffies; -	write_lock(&idev->lock); +	write_lock_bh(&idev->lock);  	if (ift) {  		spin_lock_bh(&ift->lock);  		memcpy(&addr.s6_addr[8], &ift->addr.s6_addr[8], 8); @@ -1037,7 +1060,7 @@ static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *i  retry:  	in6_dev_hold(idev);  	if (idev->cnf.use_tempaddr <= 0) { -		write_unlock(&idev->lock); +		write_unlock_bh(&idev->lock);  		pr_info("%s: use_tempaddr is disabled\n", __func__);  		in6_dev_put(idev);  		ret = -1; @@ -1047,7 +1070,7 @@ retry:  	if (ifp->regen_count++ >= idev->cnf.regen_max_retry) {  		idev->cnf.use_tempaddr = -1;	/*XXX*/  		spin_unlock_bh(&ifp->lock); -		write_unlock(&idev->lock); +		write_unlock_bh(&idev->lock);  		pr_warn("%s: regeneration time exceeded - disabled temporary address support\n",  			__func__);  		in6_dev_put(idev); @@ -1072,15 +1095,18 @@ retry:  	regen_advance = idev->cnf.regen_max_retry *  	                idev->cnf.dad_transmits * -	                idev->nd_parms->retrans_time / HZ; -	write_unlock(&idev->lock); +	                NEIGH_VAR(idev->nd_parms, RETRANS_TIME) / HZ; +	write_unlock_bh(&idev->lock);  	/* A temporary address is created only if this calculated Preferred  	 * Lifetime is greater than REGEN_ADVANCE time units.  In particular,  	 * an implementation must not create a temporary address with a zero  	 * Preferred Lifetime. +	 * Use age calculation as in addrconf_verify to avoid unnecessary +	 * temporary addresses being generated.  	 */ -	if (tmp_prefered_lft <= regen_advance) { +	age = (now - tmp_tstamp + ADDRCONF_TIMER_FUZZ_MINUS) / HZ; +	if (tmp_prefered_lft <= regen_advance + age) {  		in6_ifa_put(ifp);  		in6_dev_put(idev);  		ret = -1; @@ -1100,7 +1126,7 @@ retry:  		in6_dev_put(idev);  		pr_info("%s: retry temporary address regeneration\n", __func__);  		tmpaddr = &addr; -		write_lock(&idev->lock); +		write_lock_bh(&idev->lock);  		goto retry;  	} @@ -1116,7 +1142,6 @@ retry:  out:  	return ret;  } -#endif  /*   *	Choose an appropriate source address (RFC3484) @@ -1131,9 +1156,7 @@ enum {  #endif  	IPV6_SADDR_RULE_OIF,  	IPV6_SADDR_RULE_LABEL, -#ifdef CONFIG_IPV6_PRIVACY  	IPV6_SADDR_RULE_PRIVACY, -#endif  	IPV6_SADDR_RULE_ORCHID,  	IPV6_SADDR_RULE_PREFIX,  	IPV6_SADDR_RULE_MAX @@ -1204,7 +1227,7 @@ static int ipv6_get_saddr_eval(struct net *net,  		 *       |             d is scope of the destination.  		 *  B-d  |  \  		 *       |   \      <- smaller scope is better if -		 *  B-15 |    \        if scope is enough for destinaion. +		 *  B-15 |    \        if scope is enough for destination.  		 *       |             ret = B - scope (-1 <= scope >= d <= 15).  		 * d-C-1 | /  		 *       |/         <- greater is better @@ -1247,7 +1270,6 @@ static int ipv6_get_saddr_eval(struct net *net,  				      &score->ifa->addr, score->addr_type,  				      score->ifa->idev->dev->ifindex) == dst->label;  		break; -#ifdef CONFIG_IPV6_PRIVACY  	case IPV6_SADDR_RULE_PRIVACY:  	    {  		/* Rule 7: Prefer public address @@ -1259,7 +1281,6 @@ static int ipv6_get_saddr_eval(struct net *net,  		ret = (!(score->ifa->flags & IFA_F_TEMPORARY)) ^ preftmp;  		break;  	    } -#endif  	case IPV6_SADDR_RULE_ORCHID:  		/* Rule 8-: Prefer ORCHID vs ORCHID or  		 *	    non-ORCHID vs non-ORCHID @@ -1413,12 +1434,14 @@ try_nextdev:  EXPORT_SYMBOL(ipv6_dev_get_saddr);  int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr, -		      unsigned char banned_flags) +		      u32 banned_flags)  {  	struct inet6_ifaddr *ifp;  	int err = -EADDRNOTAVAIL; -	list_for_each_entry(ifp, &idev->addr_list, if_list) { +	list_for_each_entry_reverse(ifp, &idev->addr_list, if_list) { +		if (ifp->scope > IFA_LINK) +			break;  		if (ifp->scope == IFA_LINK &&  		    !(ifp->flags & banned_flags)) {  			*addr = ifp->addr; @@ -1430,7 +1453,7 @@ int __ipv6_get_lladdr(struct inet6_dev *idev, struct in6_addr *addr,  }  int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr, -		    unsigned char banned_flags) +		    u32 banned_flags)  {  	struct inet6_dev *idev;  	int err = -EADDRNOTAVAIL; @@ -1580,7 +1603,7 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed)  {  	if (ifp->flags&IFA_F_PERMANENT) {  		spin_lock_bh(&ifp->lock); -		addrconf_del_dad_timer(ifp); +		addrconf_del_dad_work(ifp);  		ifp->flags |= IFA_F_TENTATIVE;  		if (dad_failed)  			ifp->flags |= IFA_F_DADFAILED; @@ -1588,7 +1611,6 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed)  		if (dad_failed)  			ipv6_ifa_notify(0, ifp);  		in6_ifa_put(ifp); -#ifdef CONFIG_IPV6_PRIVACY  	} else if (ifp->flags&IFA_F_TEMPORARY) {  		struct inet6_ifaddr *ifpub;  		spin_lock_bh(&ifp->lock); @@ -1602,21 +1624,21 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed)  			spin_unlock_bh(&ifp->lock);  		}  		ipv6_del_addr(ifp); -#endif -	} else +	} else {  		ipv6_del_addr(ifp); +	}  }  static int addrconf_dad_end(struct inet6_ifaddr *ifp)  {  	int err = -ENOENT; -	spin_lock(&ifp->state_lock); +	spin_lock_bh(&ifp->state_lock);  	if (ifp->state == INET6_IFADDR_STATE_DAD) {  		ifp->state = INET6_IFADDR_STATE_POSTDAD;  		err = 0;  	} -	spin_unlock(&ifp->state_lock); +	spin_unlock_bh(&ifp->state_lock);  	return err;  } @@ -1649,7 +1671,12 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp)  		}  	} -	addrconf_dad_stop(ifp, 1); +	spin_lock_bh(&ifp->state_lock); +	/* transition from _POSTDAD to _ERRDAD */ +	ifp->state = INET6_IFADDR_STATE_ERRDAD; +	spin_unlock_bh(&ifp->state_lock); + +	addrconf_mod_dad_work(ifp, 0);  }  /* Join to solicited addr multicast group. */ @@ -1658,6 +1685,8 @@ void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr)  {  	struct in6_addr maddr; +	ASSERT_RTNL(); +  	if (dev->flags&(IFF_LOOPBACK|IFF_NOARP))  		return; @@ -1669,6 +1698,8 @@ void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr)  {  	struct in6_addr maddr; +	ASSERT_RTNL(); +  	if (idev->dev->flags&(IFF_LOOPBACK|IFF_NOARP))  		return; @@ -1679,7 +1710,10 @@ void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr)  static void addrconf_join_anycast(struct inet6_ifaddr *ifp)  {  	struct in6_addr addr; -	if (ifp->prefix_len == 127) /* RFC 6164 */ + +	ASSERT_RTNL(); + +	if (ifp->prefix_len >= 127) /* RFC 6164 */  		return;  	ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len);  	if (ipv6_addr_any(&addr)) @@ -1690,7 +1724,10 @@ static void addrconf_join_anycast(struct inet6_ifaddr *ifp)  static void addrconf_leave_anycast(struct inet6_ifaddr *ifp)  {  	struct in6_addr addr; -	if (ifp->prefix_len == 127) /* RFC 6164 */ + +	ASSERT_RTNL(); + +	if (ifp->prefix_len >= 127) /* RFC 6164 */  		return;  	ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len);  	if (ipv6_addr_any(&addr)) @@ -1824,6 +1861,7 @@ static int ipv6_generate_eui64(u8 *eui, struct net_device *dev)  		return addrconf_ifid_sit(eui, dev);  	case ARPHRD_IPGRE:  		return addrconf_ifid_gre(eui, dev); +	case ARPHRD_6LOWPAN:  	case ARPHRD_IEEE802154:  		return addrconf_ifid_eui64(eui, dev);  	case ARPHRD_IEEE1394: @@ -1840,7 +1878,9 @@ static int ipv6_inherit_eui64(u8 *eui, struct inet6_dev *idev)  	struct inet6_ifaddr *ifp;  	read_lock_bh(&idev->lock); -	list_for_each_entry(ifp, &idev->addr_list, if_list) { +	list_for_each_entry_reverse(ifp, &idev->addr_list, if_list) { +		if (ifp->scope > IFA_LINK) +			break;  		if (ifp->scope == IFA_LINK && !(ifp->flags&IFA_F_TENTATIVE)) {  			memcpy(eui, ifp->addr.s6_addr+8, 8);  			err = 0; @@ -1851,7 +1891,6 @@ static int ipv6_inherit_eui64(u8 *eui, struct inet6_dev *idev)  	return err;  } -#ifdef CONFIG_IPV6_PRIVACY  /* (re)generation of randomized interface identifier (RFC 3041 3.2, 3.5) */  static void __ipv6_regen_rndid(struct inet6_dev *idev)  { @@ -1897,7 +1936,8 @@ static void ipv6_regen_rndid(unsigned long data)  	expires = jiffies +  		idev->cnf.temp_prefered_lft * HZ - -		idev->cnf.regen_max_retry * idev->cnf.dad_transmits * idev->nd_parms->retrans_time - +		idev->cnf.regen_max_retry * idev->cnf.dad_transmits * +		NEIGH_VAR(idev->nd_parms, RETRANS_TIME) -  		idev->cnf.max_desync_factor * HZ;  	if (time_before(expires, jiffies)) {  		pr_warn("%s: too short regeneration interval; timer disabled for %s\n", @@ -1919,7 +1959,6 @@ static void  __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmp  	if (tmpaddr && memcmp(idev->rndid, &tmpaddr->s6_addr[8], 8) == 0)  		__ipv6_regen_rndid(idev);  } -#endif  /*   *	Add prefix route. @@ -2006,23 +2045,6 @@ static void addrconf_add_mroute(struct net_device *dev)  	ip6_route_add(&cfg);  } -#if IS_ENABLED(CONFIG_IPV6_SIT) -static void sit_route_add(struct net_device *dev) -{ -	struct fib6_config cfg = { -		.fc_table = RT6_TABLE_MAIN, -		.fc_metric = IP6_RT_PRIO_ADDRCONF, -		.fc_ifindex = dev->ifindex, -		.fc_dst_len = 96, -		.fc_flags = RTF_UP | RTF_NONEXTHOP, -		.fc_nlinfo.nl_net = dev_net(dev), -	}; - -	/* prefix length - 96 bits "::d.d.d.d" */ -	ip6_route_add(&cfg); -} -#endif -  static struct inet6_dev *addrconf_add_dev(struct net_device *dev)  {  	struct inet6_dev *idev; @@ -2043,6 +2065,73 @@ static struct inet6_dev *addrconf_add_dev(struct net_device *dev)  	return idev;  } +static void manage_tempaddrs(struct inet6_dev *idev, +			     struct inet6_ifaddr *ifp, +			     __u32 valid_lft, __u32 prefered_lft, +			     bool create, unsigned long now) +{ +	u32 flags; +	struct inet6_ifaddr *ift; + +	read_lock_bh(&idev->lock); +	/* update all temporary addresses in the list */ +	list_for_each_entry(ift, &idev->tempaddr_list, tmp_list) { +		int age, max_valid, max_prefered; + +		if (ifp != ift->ifpub) +			continue; + +		/* RFC 4941 section 3.3: +		 * If a received option will extend the lifetime of a public +		 * address, the lifetimes of temporary addresses should +		 * be extended, subject to the overall constraint that no +		 * temporary addresses should ever remain "valid" or "preferred" +		 * for a time longer than (TEMP_VALID_LIFETIME) or +		 * (TEMP_PREFERRED_LIFETIME - DESYNC_FACTOR), respectively. +		 */ +		age = (now - ift->cstamp) / HZ; +		max_valid = idev->cnf.temp_valid_lft - age; +		if (max_valid < 0) +			max_valid = 0; + +		max_prefered = idev->cnf.temp_prefered_lft - +			       idev->cnf.max_desync_factor - age; +		if (max_prefered < 0) +			max_prefered = 0; + +		if (valid_lft > max_valid) +			valid_lft = max_valid; + +		if (prefered_lft > max_prefered) +			prefered_lft = max_prefered; + +		spin_lock(&ift->lock); +		flags = ift->flags; +		ift->valid_lft = valid_lft; +		ift->prefered_lft = prefered_lft; +		ift->tstamp = now; +		if (prefered_lft > 0) +			ift->flags &= ~IFA_F_DEPRECATED; + +		spin_unlock(&ift->lock); +		if (!(flags&IFA_F_TENTATIVE)) +			ipv6_ifa_notify(0, ift); +	} + +	if ((create || list_empty(&idev->tempaddr_list)) && +	    idev->cnf.use_tempaddr > 0) { +		/* When a new public address is created as described +		 * in [ADDRCONF], also create a new temporary address. +		 * Also create a temporary address if it's enabled but +		 * no temporary address currently exists. +		 */ +		read_unlock_bh(&idev->lock); +		ipv6_create_tempaddr(ifp, NULL); +	} else { +		read_unlock_bh(&idev->lock); +	} +} +  void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)  {  	struct prefix_info *pinfo; @@ -2199,17 +2288,17 @@ ok:  			update_lft = 0;  			create = 1; +			spin_lock_bh(&ifp->lock); +			ifp->flags |= IFA_F_MANAGETEMPADDR;  			ifp->cstamp = jiffies;  			ifp->tokenized = tokenized; +			spin_unlock_bh(&ifp->lock);  			addrconf_dad_start(ifp);  		}  		if (ifp) { -			int flags; +			u32 flags;  			unsigned long now; -#ifdef CONFIG_IPV6_PRIVACY -			struct inet6_ifaddr *ift; -#endif  			u32 stored_lft;  			/* update lifetime (RFC2462 5.5.3 e) */ @@ -2250,74 +2339,11 @@ ok:  			} else  				spin_unlock(&ifp->lock); -#ifdef CONFIG_IPV6_PRIVACY -			read_lock_bh(&in6_dev->lock); -			/* update all temporary addresses in the list */ -			list_for_each_entry(ift, &in6_dev->tempaddr_list, -					    tmp_list) { -				int age, max_valid, max_prefered; +			manage_tempaddrs(in6_dev, ifp, valid_lft, prefered_lft, +					 create, now); -				if (ifp != ift->ifpub) -					continue; - -				/* -				 * RFC 4941 section 3.3: -				 * If a received option will extend the lifetime -				 * of a public address, the lifetimes of -				 * temporary addresses should be extended, -				 * subject to the overall constraint that no -				 * temporary addresses should ever remain -				 * "valid" or "preferred" for a time longer than -				 * (TEMP_VALID_LIFETIME) or -				 * (TEMP_PREFERRED_LIFETIME - DESYNC_FACTOR), -				 * respectively. -				 */ -				age = (now - ift->cstamp) / HZ; -				max_valid = in6_dev->cnf.temp_valid_lft - age; -				if (max_valid < 0) -					max_valid = 0; - -				max_prefered = in6_dev->cnf.temp_prefered_lft - -					       in6_dev->cnf.max_desync_factor - -					       age; -				if (max_prefered < 0) -					max_prefered = 0; - -				if (valid_lft > max_valid) -					valid_lft = max_valid; - -				if (prefered_lft > max_prefered) -					prefered_lft = max_prefered; - -				spin_lock(&ift->lock); -				flags = ift->flags; -				ift->valid_lft = valid_lft; -				ift->prefered_lft = prefered_lft; -				ift->tstamp = now; -				if (prefered_lft > 0) -					ift->flags &= ~IFA_F_DEPRECATED; - -				spin_unlock(&ift->lock); -				if (!(flags&IFA_F_TENTATIVE)) -					ipv6_ifa_notify(0, ift); -			} - -			if ((create || list_empty(&in6_dev->tempaddr_list)) && in6_dev->cnf.use_tempaddr > 0) { -				/* -				 * When a new public address is created as -				 * described in [ADDRCONF], also create a new -				 * temporary address. Also create a temporary -				 * address if it's enabled but no temporary -				 * address currently exists. -				 */ -				read_unlock_bh(&in6_dev->lock); -				ipv6_create_tempaddr(ifp, NULL); -			} else { -				read_unlock_bh(&in6_dev->lock); -			} -#endif  			in6_ifa_put(ifp); -			addrconf_verify(0); +			addrconf_verify();  		}  	}  	inet6_prefix_notify(RTM_NEWPREFIX, in6_dev, pinfo); @@ -2393,10 +2419,11 @@ err_exit:  /*   *	Manual configuration of address on an interface   */ -static int inet6_addr_add(struct net *net, int ifindex, const struct in6_addr *pfx, +static int inet6_addr_add(struct net *net, int ifindex, +			  const struct in6_addr *pfx,  			  const struct in6_addr *peer_pfx, -			  unsigned int plen, __u8 ifa_flags, __u32 prefered_lft, -			  __u32 valid_lft) +			  unsigned int plen, __u32 ifa_flags, +			  __u32 prefered_lft, __u32 valid_lft)  {  	struct inet6_ifaddr *ifp;  	struct inet6_dev *idev; @@ -2415,6 +2442,9 @@ static int inet6_addr_add(struct net *net, int ifindex, const struct in6_addr *p  	if (!valid_lft || prefered_lft > valid_lft)  		return -EINVAL; +	if (ifa_flags & IFA_F_MANAGETEMPADDR && plen != 64) +		return -EINVAL; +  	dev = __dev_get_by_index(net, ifindex);  	if (!dev)  		return -ENODEV; @@ -2447,24 +2477,30 @@ static int inet6_addr_add(struct net *net, int ifindex, const struct in6_addr *p  			    valid_lft, prefered_lft);  	if (!IS_ERR(ifp)) { -		addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev, -				      expires, flags); +		if (!(ifa_flags & IFA_F_NOPREFIXROUTE)) { +			addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev, +					      expires, flags); +		} +  		/*  		 * Note that section 3.1 of RFC 4429 indicates  		 * that the Optimistic flag should not be set for  		 * manually configured addresses  		 */  		addrconf_dad_start(ifp); +		if (ifa_flags & IFA_F_MANAGETEMPADDR) +			manage_tempaddrs(idev, ifp, valid_lft, prefered_lft, +					 true, jiffies);  		in6_ifa_put(ifp); -		addrconf_verify(0); +		addrconf_verify_rtnl();  		return 0;  	}  	return PTR_ERR(ifp);  } -static int inet6_addr_del(struct net *net, int ifindex, const struct in6_addr *pfx, -			  unsigned int plen) +static int inet6_addr_del(struct net *net, int ifindex, u32 ifa_flags, +			  const struct in6_addr *pfx, unsigned int plen)  {  	struct inet6_ifaddr *ifp;  	struct inet6_dev *idev; @@ -2487,7 +2523,12 @@ static int inet6_addr_del(struct net *net, int ifindex, const struct in6_addr *p  			in6_ifa_hold(ifp);  			read_unlock_bh(&idev->lock); +			if (!(ifp->flags & IFA_F_TEMPORARY) && +			    (ifa_flags & IFA_F_MANAGETEMPADDR)) +				manage_tempaddrs(idev, ifp, 0, 0, false, +						 jiffies);  			ipv6_del_addr(ifp); +			addrconf_verify_rtnl();  			return 0;  		}  	} @@ -2527,7 +2568,7 @@ int addrconf_del_ifaddr(struct net *net, void __user *arg)  		return -EFAULT;  	rtnl_lock(); -	err = inet6_addr_del(net, ireq.ifr6_ifindex, &ireq.ifr6_addr, +	err = inet6_addr_del(net, ireq.ifr6_ifindex, 0, &ireq.ifr6_addr,  			     ireq.ifr6_prefixlen);  	rtnl_unlock();  	return err; @@ -2539,7 +2580,8 @@ static void add_addr(struct inet6_dev *idev, const struct in6_addr *addr,  	struct inet6_ifaddr *ifp;  	ifp = ipv6_add_addr(idev, addr, NULL, plen, -			    scope, IFA_F_PERMANENT, 0, 0); +			    scope, IFA_F_PERMANENT, +			    INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);  	if (!IS_ERR(ifp)) {  		spin_lock_bh(&ifp->lock);  		ifp->flags &= ~IFA_F_TENTATIVE; @@ -2555,7 +2597,8 @@ static void sit_add_v4_addrs(struct inet6_dev *idev)  	struct in6_addr addr;  	struct net_device *dev;  	struct net *net = dev_net(idev->dev); -	int scope; +	int scope, plen; +	u32 pflags = 0;  	ASSERT_RTNL(); @@ -2565,12 +2608,16 @@ static void sit_add_v4_addrs(struct inet6_dev *idev)  	if (idev->dev->flags&IFF_POINTOPOINT) {  		addr.s6_addr32[0] = htonl(0xfe800000);  		scope = IFA_LINK; +		plen = 64;  	} else {  		scope = IPV6_ADDR_COMPATv4; +		plen = 96; +		pflags |= RTF_NONEXTHOP;  	}  	if (addr.s6_addr32[3]) { -		add_addr(idev, &addr, 128, scope); +		add_addr(idev, &addr, plen, scope); +		addrconf_prefix_route(&addr, plen, idev->dev, 0, pflags);  		return;  	} @@ -2582,7 +2629,6 @@ static void sit_add_v4_addrs(struct inet6_dev *idev)  			int flag = scope;  			for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) { -				int plen;  				addr.s6_addr32[3] = ifa->ifa_local; @@ -2593,12 +2639,10 @@ static void sit_add_v4_addrs(struct inet6_dev *idev)  						continue;  					flag |= IFA_HOST;  				} -				if (idev->dev->flags&IFF_POINTOPOINT) -					plen = 64; -				else -					plen = 96;  				add_addr(idev, &addr, plen, flag); +				addrconf_prefix_route(&addr, plen, idev->dev, 0, +						      pflags);  			}  		}  	} @@ -2638,10 +2682,20 @@ static void init_loopback(struct net_device *dev)  			if (sp_ifa->flags & (IFA_F_DADFAILED | IFA_F_TENTATIVE))  				continue; -			if (sp_ifa->rt) -				continue; +			if (sp_ifa->rt) { +				/* This dst has been added to garbage list when +				 * lo device down, release this obsolete dst and +				 * reallocate a new router for ifa. +				 */ +				if (sp_ifa->rt->dst.obsolete > 0) { +					ip6_rt_put(sp_ifa->rt); +					sp_ifa->rt = NULL; +				} else { +					continue; +				} +			} -			sp_rt = addrconf_dst_alloc(idev, &sp_ifa->addr, 0); +			sp_rt = addrconf_dst_alloc(idev, &sp_ifa->addr, false);  			/* Failure cases are ignored */  			if (!IS_ERR(sp_rt)) { @@ -2665,7 +2719,8 @@ static void addrconf_add_linklocal(struct inet6_dev *idev, const struct in6_addr  #endif -	ifp = ipv6_add_addr(idev, addr, NULL, 64, IFA_LINK, addr_flags, 0, 0); +	ifp = ipv6_add_addr(idev, addr, NULL, 64, IFA_LINK, addr_flags, +			    INFINITY_LIFE_TIME, INFINITY_LIFE_TIME);  	if (!IS_ERR(ifp)) {  		addrconf_prefix_route(&ifp->addr, ifp->prefix_len, idev->dev, 0, 0);  		addrconf_dad_start(ifp); @@ -2686,7 +2741,8 @@ static void addrconf_dev_config(struct net_device *dev)  	    (dev->type != ARPHRD_INFINIBAND) &&  	    (dev->type != ARPHRD_IEEE802154) &&  	    (dev->type != ARPHRD_IEEE1394) && -	    (dev->type != ARPHRD_TUNNEL6)) { +	    (dev->type != ARPHRD_TUNNEL6) && +	    (dev->type != ARPHRD_6LOWPAN)) {  		/* Alas, we support only Ethernet autoconfiguration. */  		return;  	} @@ -2724,7 +2780,6 @@ static void addrconf_sit_config(struct net_device *dev)  		struct in6_addr addr;  		ipv6_addr_set(&addr,  htonl(0xFE800000), 0, 0, 0); -		addrconf_prefix_route(&addr, 64, dev, 0, 0);  		if (!ipv6_generate_eui64(addr.s6_addr + 8, dev))  			addrconf_add_linklocal(idev, &addr);  		return; @@ -2734,8 +2789,6 @@ static void addrconf_sit_config(struct net_device *dev)  	if (dev->flags&IFF_POINTOPOINT)  		addrconf_add_mroute(dev); -	else -		sit_route_add(dev);  }  #endif @@ -2753,25 +2806,13 @@ static void addrconf_gre_config(struct net_device *dev)  	}  	ipv6_addr_set(&addr,  htonl(0xFE800000), 0, 0, 0); -	addrconf_prefix_route(&addr, 64, dev, 0, 0); -  	if (!ipv6_generate_eui64(addr.s6_addr + 8, dev))  		addrconf_add_linklocal(idev, &addr); +	else +		addrconf_prefix_route(&addr, 64, dev, 0, 0);  }  #endif -static inline int -ipv6_inherit_linklocal(struct inet6_dev *idev, struct net_device *link_dev) -{ -	struct in6_addr lladdr; - -	if (!ipv6_get_lladdr(link_dev, &lladdr, IFA_F_TENTATIVE)) { -		addrconf_add_linklocal(idev, &lladdr); -		return 0; -	} -	return -1; -} -  static int addrconf_notify(struct notifier_block *this, unsigned long event,  			   void *ptr)  { @@ -2888,7 +2929,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,  		}  		/* -		 * MTU falled under IPV6_MIN_MTU. +		 * if MTU under IPV6_MIN_MTU.  		 * Stop IPv6 on this interface.  		 */ @@ -2980,7 +3021,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)  		hlist_for_each_entry_rcu(ifa, h, addr_lst) {  			if (ifa->idev == idev) {  				hlist_del_init_rcu(&ifa->addr_lst); -				addrconf_del_dad_timer(ifa); +				addrconf_del_dad_work(ifa);  				goto restart;  			}  		} @@ -2995,7 +3036,6 @@ static int addrconf_ifdown(struct net_device *dev, int how)  	if (!how)  		idev->if_flags &= ~(IF_RS_SENT|IF_RA_RCVD|IF_READY); -#ifdef CONFIG_IPV6_PRIVACY  	if (how && del_timer(&idev->regen_timer))  		in6_dev_put(idev); @@ -3015,12 +3055,11 @@ static int addrconf_ifdown(struct net_device *dev, int how)  		in6_ifa_put(ifa);  		write_lock_bh(&idev->lock);  	} -#endif  	while (!list_empty(&idev->addr_list)) {  		ifa = list_first_entry(&idev->addr_list,  				       struct inet6_ifaddr, if_list); -		addrconf_del_dad_timer(ifa); +		addrconf_del_dad_work(ifa);  		list_del(&ifa->if_list); @@ -3116,20 +3155,20 @@ static void addrconf_dad_kick(struct inet6_ifaddr *ifp)  	if (ifp->flags & IFA_F_OPTIMISTIC)  		rand_num = 0;  	else -		rand_num = net_random() % (idev->cnf.rtr_solicit_delay ? : 1); +		rand_num = prandom_u32() % (idev->cnf.rtr_solicit_delay ? : 1);  	ifp->dad_probes = idev->cnf.dad_transmits; -	addrconf_mod_dad_timer(ifp, rand_num); +	addrconf_mod_dad_work(ifp, rand_num);  } -static void addrconf_dad_start(struct inet6_ifaddr *ifp) +static void addrconf_dad_begin(struct inet6_ifaddr *ifp)  {  	struct inet6_dev *idev = ifp->idev;  	struct net_device *dev = idev->dev;  	addrconf_join_solict(dev, &ifp->addr); -	net_srandom(ifp->addr.s6_addr32[3]); +	prandom_seed((__force u32) ifp->addr.s6_addr32[3]);  	read_lock_bh(&idev->lock);  	spin_lock(&ifp->lock); @@ -3174,25 +3213,68 @@ out:  	read_unlock_bh(&idev->lock);  } -static void addrconf_dad_timer(unsigned long data) +static void addrconf_dad_start(struct inet6_ifaddr *ifp)  { -	struct inet6_ifaddr *ifp = (struct inet6_ifaddr *) data; +	bool begin_dad = false; + +	spin_lock_bh(&ifp->state_lock); +	if (ifp->state != INET6_IFADDR_STATE_DEAD) { +		ifp->state = INET6_IFADDR_STATE_PREDAD; +		begin_dad = true; +	} +	spin_unlock_bh(&ifp->state_lock); + +	if (begin_dad) +		addrconf_mod_dad_work(ifp, 0); +} + +static void addrconf_dad_work(struct work_struct *w) +{ +	struct inet6_ifaddr *ifp = container_of(to_delayed_work(w), +						struct inet6_ifaddr, +						dad_work);  	struct inet6_dev *idev = ifp->idev;  	struct in6_addr mcaddr; +	enum { +		DAD_PROCESS, +		DAD_BEGIN, +		DAD_ABORT, +	} action = DAD_PROCESS; + +	rtnl_lock(); + +	spin_lock_bh(&ifp->state_lock); +	if (ifp->state == INET6_IFADDR_STATE_PREDAD) { +		action = DAD_BEGIN; +		ifp->state = INET6_IFADDR_STATE_DAD; +	} else if (ifp->state == INET6_IFADDR_STATE_ERRDAD) { +		action = DAD_ABORT; +		ifp->state = INET6_IFADDR_STATE_POSTDAD; +	} +	spin_unlock_bh(&ifp->state_lock); + +	if (action == DAD_BEGIN) { +		addrconf_dad_begin(ifp); +		goto out; +	} else if (action == DAD_ABORT) { +		addrconf_dad_stop(ifp, 1); +		goto out; +	} +  	if (!ifp->dad_probes && addrconf_dad_end(ifp))  		goto out; -	write_lock(&idev->lock); +	write_lock_bh(&idev->lock);  	if (idev->dead || !(idev->if_flags & IF_READY)) { -		write_unlock(&idev->lock); +		write_unlock_bh(&idev->lock);  		goto out;  	}  	spin_lock(&ifp->lock);  	if (ifp->state == INET6_IFADDR_STATE_DEAD) {  		spin_unlock(&ifp->lock); -		write_unlock(&idev->lock); +		write_unlock_bh(&idev->lock);  		goto out;  	} @@ -3203,7 +3285,7 @@ static void addrconf_dad_timer(unsigned long data)  		ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED);  		spin_unlock(&ifp->lock); -		write_unlock(&idev->lock); +		write_unlock_bh(&idev->lock);  		addrconf_dad_completed(ifp); @@ -3211,15 +3293,35 @@ static void addrconf_dad_timer(unsigned long data)  	}  	ifp->dad_probes--; -	addrconf_mod_dad_timer(ifp, ifp->idev->nd_parms->retrans_time); +	addrconf_mod_dad_work(ifp, +			      NEIGH_VAR(ifp->idev->nd_parms, RETRANS_TIME));  	spin_unlock(&ifp->lock); -	write_unlock(&idev->lock); +	write_unlock_bh(&idev->lock);  	/* send a neighbour solicitation for our addr */  	addrconf_addr_solict_mult(&ifp->addr, &mcaddr);  	ndisc_send_ns(ifp->idev->dev, NULL, &ifp->addr, &mcaddr, &in6addr_any);  out:  	in6_ifa_put(ifp); +	rtnl_unlock(); +} + +/* ifp->idev must be at least read locked */ +static bool ipv6_lonely_lladdr(struct inet6_ifaddr *ifp) +{ +	struct inet6_ifaddr *ifpiter; +	struct inet6_dev *idev = ifp->idev; + +	list_for_each_entry_reverse(ifpiter, &idev->addr_list, if_list) { +		if (ifpiter->scope > IFA_LINK) +			break; +		if (ifp != ifpiter && ifpiter->scope == IFA_LINK && +		    (ifpiter->flags & (IFA_F_PERMANENT|IFA_F_TENTATIVE| +				       IFA_F_OPTIMISTIC|IFA_F_DADFAILED)) == +		    IFA_F_PERMANENT) +			return false; +	} +	return true;  }  static void addrconf_dad_completed(struct inet6_ifaddr *ifp) @@ -3228,7 +3330,7 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp)  	struct in6_addr lladdr;  	bool send_rs, send_mld; -	addrconf_del_dad_timer(ifp); +	addrconf_del_dad_work(ifp);  	/*  	 *	Configure the address for reception. Now it is valid. @@ -3241,14 +3343,11 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp)  	 */  	read_lock_bh(&ifp->idev->lock); -	spin_lock(&ifp->lock); -	send_mld = ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL && -		   ifp->idev->valid_ll_addr_cnt == 1; +	send_mld = ifp->scope == IFA_LINK && ipv6_lonely_lladdr(ifp);  	send_rs = send_mld &&  		  ipv6_accept_ra(ifp->idev) &&  		  ifp->idev->cnf.rtr_solicits > 0 &&  		  (dev->flags&IFF_LOOPBACK) == 0; -	spin_unlock(&ifp->lock);  	read_unlock_bh(&ifp->idev->lock);  	/* While dad is in progress mld report's source address is in6_addrany. @@ -3391,7 +3490,7 @@ static int if6_seq_show(struct seq_file *seq, void *v)  		   ifp->idev->dev->ifindex,  		   ifp->prefix_len,  		   ifp->scope, -		   ifp->flags, +		   (u8) ifp->flags,  		   ifp->idev->dev->name);  	return 0;  } @@ -3472,26 +3571,31 @@ int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr)   *	Periodic address status verification   */ -static void addrconf_verify(unsigned long foo) +static void addrconf_verify_rtnl(void)  {  	unsigned long now, next, next_sec, next_sched;  	struct inet6_ifaddr *ifp;  	int i; +	ASSERT_RTNL(); +  	rcu_read_lock_bh(); -	spin_lock(&addrconf_verify_lock);  	now = jiffies;  	next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY); -	del_timer(&addr_chk_timer); +	cancel_delayed_work(&addr_chk_work);  	for (i = 0; i < IN6_ADDR_HSIZE; i++) {  restart: -		hlist_for_each_entry_rcu_bh(ifp, -					 &inet6_addr_lst[i], addr_lst) { +		hlist_for_each_entry_rcu_bh(ifp, &inet6_addr_lst[i], addr_lst) {  			unsigned long age; -			if (ifp->flags & IFA_F_PERMANENT) +			/* When setting preferred_lft to a value not zero or +			 * infinity, while valid_lft is infinity +			 * IFA_F_PERMANENT has a non-infinity life time. +			 */ +			if ((ifp->flags & IFA_F_PERMANENT) && +			    (ifp->prefered_lft == INFINITY_LIFE_TIME))  				continue;  			spin_lock(&ifp->lock); @@ -3516,7 +3620,8 @@ restart:  					ifp->flags |= IFA_F_DEPRECATED;  				} -				if (time_before(ifp->tstamp + ifp->valid_lft * HZ, next)) +				if ((ifp->valid_lft != INFINITY_LIFE_TIME) && +				    (time_before(ifp->tstamp + ifp->valid_lft * HZ, next)))  					next = ifp->tstamp + ifp->valid_lft * HZ;  				spin_unlock(&ifp->lock); @@ -3528,12 +3633,11 @@ restart:  					in6_ifa_put(ifp);  					goto restart;  				} -#ifdef CONFIG_IPV6_PRIVACY  			} else if ((ifp->flags&IFA_F_TEMPORARY) &&  				   !(ifp->flags&IFA_F_TENTATIVE)) {  				unsigned long regen_advance = ifp->idev->cnf.regen_max_retry *  					ifp->idev->cnf.dad_transmits * -					ifp->idev->nd_parms->retrans_time / HZ; +					NEIGH_VAR(ifp->idev->nd_parms, RETRANS_TIME) / HZ;  				if (age >= ifp->prefered_lft - regen_advance) {  					struct inet6_ifaddr *ifpub = ifp->ifpub; @@ -3556,7 +3660,6 @@ restart:  				} else if (time_before(ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ, next))  					next = ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ;  				spin_unlock(&ifp->lock); -#endif  			} else {  				/* ifp->prefered_lft <= ifp->valid_lft */  				if (time_before(ifp->tstamp + ifp->prefered_lft * HZ, next)) @@ -3579,13 +3682,22 @@ restart:  	ADBG(KERN_DEBUG "now = %lu, schedule = %lu, rounded schedule = %lu => %lu\n",  	      now, next, next_sec, next_sched); - -	addr_chk_timer.expires = next_sched; -	add_timer(&addr_chk_timer); -	spin_unlock(&addrconf_verify_lock); +	mod_delayed_work(addrconf_wq, &addr_chk_work, next_sched - now);  	rcu_read_unlock_bh();  } +static void addrconf_verify_work(struct work_struct *w) +{ +	rtnl_lock(); +	addrconf_verify_rtnl(); +	rtnl_unlock(); +} + +static void addrconf_verify(void) +{ +	mod_delayed_work(addrconf_wq, &addr_chk_work, 0); +} +  static struct in6_addr *extract_addr(struct nlattr *addr, struct nlattr *local,  				     struct in6_addr **peer_pfx)  { @@ -3609,6 +3721,7 @@ static const struct nla_policy ifa_ipv6_policy[IFA_MAX+1] = {  	[IFA_ADDRESS]		= { .len = sizeof(struct in6_addr) },  	[IFA_LOCAL]		= { .len = sizeof(struct in6_addr) },  	[IFA_CACHEINFO]		= { .len = sizeof(struct ifa_cacheinfo) }, +	[IFA_FLAGS]		= { .len = sizeof(u32) },  };  static int @@ -3618,6 +3731,7 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)  	struct ifaddrmsg *ifm;  	struct nlattr *tb[IFA_MAX+1];  	struct in6_addr *pfx, *peer_pfx; +	u32 ifa_flags;  	int err;  	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); @@ -3629,19 +3743,33 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh)  	if (pfx == NULL)  		return -EINVAL; -	return inet6_addr_del(net, ifm->ifa_index, pfx, ifm->ifa_prefixlen); +	ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) : ifm->ifa_flags; + +	/* We ignore other flags so far. */ +	ifa_flags &= IFA_F_MANAGETEMPADDR; + +	return inet6_addr_del(net, ifm->ifa_index, ifa_flags, pfx, +			      ifm->ifa_prefixlen);  } -static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags, +static int inet6_addr_modify(struct inet6_ifaddr *ifp, u32 ifa_flags,  			     u32 prefered_lft, u32 valid_lft)  {  	u32 flags;  	clock_t expires;  	unsigned long timeout; +	bool was_managetempaddr; +	bool had_prefixroute; + +	ASSERT_RTNL();  	if (!valid_lft || (prefered_lft > valid_lft))  		return -EINVAL; +	if (ifa_flags & IFA_F_MANAGETEMPADDR && +	    (ifp->flags & IFA_F_TEMPORARY || ifp->prefix_len != 64)) +		return -EINVAL; +  	timeout = addrconf_timeout_fixup(valid_lft, HZ);  	if (addrconf_finite_timeout(timeout)) {  		expires = jiffies_to_clock_t(timeout * HZ); @@ -3661,7 +3789,13 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags,  	}  	spin_lock_bh(&ifp->lock); -	ifp->flags = (ifp->flags & ~(IFA_F_DEPRECATED | IFA_F_PERMANENT | IFA_F_NODAD | IFA_F_HOMEADDRESS)) | ifa_flags; +	was_managetempaddr = ifp->flags & IFA_F_MANAGETEMPADDR; +	had_prefixroute = ifp->flags & IFA_F_PERMANENT && +			  !(ifp->flags & IFA_F_NOPREFIXROUTE); +	ifp->flags &= ~(IFA_F_DEPRECATED | IFA_F_PERMANENT | IFA_F_NODAD | +			IFA_F_HOMEADDRESS | IFA_F_MANAGETEMPADDR | +			IFA_F_NOPREFIXROUTE); +	ifp->flags |= ifa_flags;  	ifp->tstamp = jiffies;  	ifp->valid_lft = valid_lft;  	ifp->prefered_lft = prefered_lft; @@ -3670,9 +3804,31 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags,  	if (!(ifp->flags&IFA_F_TENTATIVE))  		ipv6_ifa_notify(0, ifp); -	addrconf_prefix_route(&ifp->addr, ifp->prefix_len, ifp->idev->dev, -			      expires, flags); -	addrconf_verify(0); +	if (!(ifa_flags & IFA_F_NOPREFIXROUTE)) { +		addrconf_prefix_route(&ifp->addr, ifp->prefix_len, ifp->idev->dev, +				      expires, flags); +	} else if (had_prefixroute) { +		enum cleanup_prefix_rt_t action; +		unsigned long rt_expires; + +		write_lock_bh(&ifp->idev->lock); +		action = check_cleanup_prefix_route(ifp, &rt_expires); +		write_unlock_bh(&ifp->idev->lock); + +		if (action != CLEANUP_PREFIX_RT_NOP) { +			cleanup_prefix_route(ifp, rt_expires, +				action == CLEANUP_PREFIX_RT_DEL); +		} +	} + +	if (was_managetempaddr || ifp->flags & IFA_F_MANAGETEMPADDR) { +		if (was_managetempaddr && !(ifp->flags & IFA_F_MANAGETEMPADDR)) +			valid_lft = prefered_lft = 0; +		manage_tempaddrs(ifp->idev, ifp, valid_lft, prefered_lft, +				 !was_managetempaddr, jiffies); +	} + +	addrconf_verify_rtnl();  	return 0;  } @@ -3687,7 +3843,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)  	struct inet6_ifaddr *ifa;  	struct net_device *dev;  	u32 valid_lft = INFINITY_LIFE_TIME, preferred_lft = INFINITY_LIFE_TIME; -	u8 ifa_flags; +	u32 ifa_flags;  	int err;  	err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy); @@ -3714,14 +3870,17 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)  	if (dev == NULL)  		return -ENODEV; +	ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) : ifm->ifa_flags; +  	/* We ignore other flags so far. */ -	ifa_flags = ifm->ifa_flags & (IFA_F_NODAD | IFA_F_HOMEADDRESS); +	ifa_flags &= IFA_F_NODAD | IFA_F_HOMEADDRESS | IFA_F_MANAGETEMPADDR | +		     IFA_F_NOPREFIXROUTE;  	ifa = ipv6_get_ifaddr(net, pfx, dev, 1);  	if (ifa == NULL) {  		/*  		 * It would be best to check for !NLM_F_CREATE here but -		 * userspace alreay relies on not having to provide this. +		 * userspace already relies on not having to provide this.  		 */  		return inet6_addr_add(net, ifm->ifa_index, pfx, peer_pfx,  				      ifm->ifa_prefixlen, ifa_flags, @@ -3739,7 +3898,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh)  	return err;  } -static void put_ifaddrmsg(struct nlmsghdr *nlh, u8 prefixlen, u8 flags, +static void put_ifaddrmsg(struct nlmsghdr *nlh, u8 prefixlen, u32 flags,  			  u8 scope, int ifindex)  {  	struct ifaddrmsg *ifm; @@ -3782,7 +3941,8 @@ static inline int inet6_ifaddr_msgsize(void)  	return NLMSG_ALIGN(sizeof(struct ifaddrmsg))  	       + nla_total_size(16) /* IFA_LOCAL */  	       + nla_total_size(16) /* IFA_ADDRESS */ -	       + nla_total_size(sizeof(struct ifa_cacheinfo)); +	       + nla_total_size(sizeof(struct ifa_cacheinfo)) +	       + nla_total_size(4)  /* IFA_FLAGS */;  }  static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, @@ -3798,7 +3958,8 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,  	put_ifaddrmsg(nlh, ifa->prefix_len, ifa->flags, rt_scope(ifa->scope),  		      ifa->idev->dev->ifindex); -	if (!(ifa->flags&IFA_F_PERMANENT)) { +	if (!((ifa->flags&IFA_F_PERMANENT) && +	      (ifa->prefered_lft == INFINITY_LIFE_TIME))) {  		preferred = ifa->prefered_lft;  		valid = ifa->valid_lft;  		if (preferred != INFINITY_LIFE_TIME) { @@ -3830,6 +3991,9 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,  	if (put_cacheinfo(skb, ifa->cstamp, ifa->tstamp, preferred, valid) < 0)  		goto error; +	if (nla_put_u32(skb, IFA_FLAGS, ifa->flags) < 0) +		goto error; +  	return nlmsg_end(skb, nlh);  error: @@ -4128,13 +4292,11 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,  		jiffies_to_msecs(cnf->mldv1_unsolicited_report_interval);  	array[DEVCONF_MLDV2_UNSOLICITED_REPORT_INTERVAL] =  		jiffies_to_msecs(cnf->mldv2_unsolicited_report_interval); -#ifdef CONFIG_IPV6_PRIVACY  	array[DEVCONF_USE_TEMPADDR] = cnf->use_tempaddr;  	array[DEVCONF_TEMP_VALID_LFT] = cnf->temp_valid_lft;  	array[DEVCONF_TEMP_PREFERED_LFT] = cnf->temp_prefered_lft;  	array[DEVCONF_REGEN_MAX_RETRY] = cnf->regen_max_retry;  	array[DEVCONF_MAX_DESYNC_FACTOR] = cnf->max_desync_factor; -#endif  	array[DEVCONF_MAX_ADDRESSES] = cnf->max_addresses;  	array[DEVCONF_ACCEPT_RA_DEFRTR] = cnf->accept_ra_defrtr;  	array[DEVCONF_ACCEPT_RA_PINFO] = cnf->accept_ra_pinfo; @@ -4196,7 +4358,7 @@ static inline void __snmp6_fill_statsdev(u64 *stats, atomic_long_t *mib,  	memset(&stats[items], 0, pad);  } -static inline void __snmp6_fill_stats64(u64 *stats, void __percpu **mib, +static inline void __snmp6_fill_stats64(u64 *stats, void __percpu *mib,  				      int items, int bytes, size_t syncpoff)  {  	int i; @@ -4216,7 +4378,7 @@ static void snmp6_fill_stats(u64 *stats, struct inet6_dev *idev, int attrtype,  {  	switch (attrtype) {  	case IFLA_INET6_STATS: -		__snmp6_fill_stats64(stats, (void __percpu **)idev->stats.ipv6, +		__snmp6_fill_stats64(stats, idev->stats.ipv6,  				     IPSTATS_MIB_MAX, bytes, offsetof(struct ipstats_mib, syncp));  		break;  	case IFLA_INET6_ICMP6STATS: @@ -4235,7 +4397,7 @@ static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev)  	ci.max_reasm_len = IPV6_MAXPLEN;  	ci.tstamp = cstamp_delta(idev->tstamp);  	ci.reachable_time = jiffies_to_msecs(idev->nd_parms->reachable_time); -	ci.retrans_time = jiffies_to_msecs(idev->nd_parms->retrans_time); +	ci.retrans_time = jiffies_to_msecs(NEIGH_VAR(idev->nd_parms, RETRANS_TIME));  	if (nla_put(skb, IFLA_INET6_CACHEINFO, sizeof(ci), &ci))  		goto nla_put_failure;  	nla = nla_reserve(skb, IFLA_INET6_CONF, DEVCONF_MAX * sizeof(s32)); @@ -4296,6 +4458,8 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token)  	bool update_rs = false;  	struct in6_addr ll_addr; +	ASSERT_RTNL(); +  	if (token == NULL)  		return -EINVAL;  	if (ipv6_addr_any(token)) @@ -4344,7 +4508,7 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token)  	}  	write_unlock_bh(&idev->lock); -	addrconf_verify(0); +	addrconf_verify_rtnl();  	return 0;  } @@ -4542,29 +4706,17 @@ errout:  		rtnl_set_sk_err(net, RTNLGRP_IPV6_PREFIX, err);  } -static void update_valid_ll_addr_cnt(struct inet6_ifaddr *ifp, int count) -{ -	write_lock_bh(&ifp->idev->lock); -	spin_lock(&ifp->lock); -	if (((ifp->flags & (IFA_F_PERMANENT|IFA_F_TENTATIVE|IFA_F_OPTIMISTIC| -			    IFA_F_DADFAILED)) == IFA_F_PERMANENT) && -	    (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)) -		ifp->idev->valid_ll_addr_cnt += count; -	WARN_ON(ifp->idev->valid_ll_addr_cnt < 0); -	spin_unlock(&ifp->lock); -	write_unlock_bh(&ifp->idev->lock); -} -  static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)  {  	struct net *net = dev_net(ifp->idev->dev); +	if (event) +		ASSERT_RTNL(); +  	inet6_ifa_notify(event ? : RTM_NEWADDR, ifp);  	switch (event) {  	case RTM_NEWADDR: -		update_valid_ll_addr_cnt(ifp, 1); -  		/*  		 * If the address was optimistic  		 * we inserted the route at the start of @@ -4580,8 +4732,6 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)  					      ifp->idev->dev, 0, 0);  		break;  	case RTM_DELADDR: -		update_valid_ll_addr_cnt(ifp, -1); -  		if (ifp->idev->cnf.forwarding)  			addrconf_leave_anycast(ifp);  		addrconf_leave_solict(ifp->idev, &ifp->addr); @@ -4728,6 +4878,46 @@ int addrconf_sysctl_disable(struct ctl_table *ctl, int write,  	return ret;  } +static +int addrconf_sysctl_proxy_ndp(struct ctl_table *ctl, int write, +			      void __user *buffer, size_t *lenp, loff_t *ppos) +{ +	int *valp = ctl->data; +	int ret; +	int old, new; + +	old = *valp; +	ret = proc_dointvec(ctl, write, buffer, lenp, ppos); +	new = *valp; + +	if (write && old != new) { +		struct net *net = ctl->extra2; + +		if (!rtnl_trylock()) +			return restart_syscall(); + +		if (valp == &net->ipv6.devconf_dflt->proxy_ndp) +			inet6_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH, +						     NETCONFA_IFINDEX_DEFAULT, +						     net->ipv6.devconf_dflt); +		else if (valp == &net->ipv6.devconf_all->proxy_ndp) +			inet6_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH, +						     NETCONFA_IFINDEX_ALL, +						     net->ipv6.devconf_all); +		else { +			struct inet6_dev *idev = ctl->extra1; + +			inet6_netconf_notify_devconf(net, NETCONFA_PROXY_NEIGH, +						     idev->dev->ifindex, +						     &idev->cnf); +		} +		rtnl_unlock(); +	} + +	return ret; +} + +  static struct addrconf_sysctl_table  {  	struct ctl_table_header *sysctl_header; @@ -4828,7 +5018,6 @@ static struct addrconf_sysctl_table  			.mode		= 0644,  			.proc_handler	= proc_dointvec_ms_jiffies,  		}, -#ifdef CONFIG_IPV6_PRIVACY  		{  			.procname	= "use_tempaddr",  			.data		= &ipv6_devconf.use_tempaddr, @@ -4864,7 +5053,6 @@ static struct addrconf_sysctl_table  			.mode		= 0644,  			.proc_handler	= proc_dointvec,  		}, -#endif  		{  			.procname	= "max_addresses",  			.data		= &ipv6_devconf.max_addresses, @@ -4916,7 +5104,7 @@ static struct addrconf_sysctl_table  			.data		= &ipv6_devconf.proxy_ndp,  			.maxlen		= sizeof(int),  			.mode		= 0644, -			.proc_handler	= proc_dointvec, +			.proc_handler	= addrconf_sysctl_proxy_ndp,  		},  		{  			.procname	= "accept_source_route", @@ -5032,7 +5220,7 @@ static void __addrconf_sysctl_unregister(struct ipv6_devconf *p)  static void addrconf_sysctl_register(struct inet6_dev *idev)  { -	neigh_sysctl_register(idev->dev, idev->nd_parms, "ipv6", +	neigh_sysctl_register(idev->dev, idev->nd_parms,  			      &ndisc_ifinfo_sysctl_change);  	__addrconf_sysctl_register(dev_net(idev->dev), idev->dev->name,  					idev, &idev->cnf); @@ -5133,6 +5321,12 @@ int __init addrconf_init(void)  	if (err < 0)  		goto out_addrlabel; +	addrconf_wq = create_workqueue("ipv6_addrconf"); +	if (!addrconf_wq) { +		err = -ENOMEM; +		goto out_nowq; +	} +  	/* The addrconf netdev notifier requires that loopback_dev  	 * has it's ipv6 private information allocated and setup  	 * before it can bring up and give link-local addresses @@ -5163,11 +5357,9 @@ int __init addrconf_init(void)  	register_netdevice_notifier(&ipv6_dev_notf); -	addrconf_verify(0); +	addrconf_verify(); -	err = rtnl_af_register(&inet6_ops); -	if (err < 0) -		goto errout_af; +	rtnl_af_register(&inet6_ops);  	err = __rtnl_register(PF_INET6, RTM_GETLINK, NULL, inet6_dump_ifinfo,  			      NULL); @@ -5191,9 +5383,10 @@ int __init addrconf_init(void)  	return 0;  errout:  	rtnl_af_unregister(&inet6_ops); -errout_af:  	unregister_netdevice_notifier(&ipv6_dev_notf);  errlo: +	destroy_workqueue(addrconf_wq); +out_nowq:  	unregister_pernet_subsys(&addrconf_ops);  out_addrlabel:  	ipv6_addr_label_cleanup(); @@ -5229,7 +5422,8 @@ void addrconf_cleanup(void)  	for (i = 0; i < IN6_ADDR_HSIZE; i++)  		WARN_ON(!hlist_empty(&inet6_addr_lst[i]));  	spin_unlock_bh(&addrconf_hash_lock); - -	del_timer(&addr_chk_timer); +	cancel_delayed_work(&addr_chk_work);  	rtnl_unlock(); + +	destroy_workqueue(addrconf_wq);  } diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c index 4c11cbcf830..e6960457f62 100644 --- a/net/ipv6/addrconf_core.c +++ b/net/ipv6/addrconf_core.c @@ -123,7 +123,7 @@ static void snmp6_free_dev(struct inet6_dev *idev)  {  	kfree(idev->stats.icmpv6msgdev);  	kfree(idev->stats.icmpv6dev); -	snmp_mib_free((void __percpu **)idev->stats.ipv6); +	free_percpu(idev->stats.ipv6);  }  /* Nobody refers to this device, we may destroy it. */ diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index b30ad3741b4..731e1e1722d 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -6,7 +6,7 @@   */  /*   * Author: - * 	YOSHIFUJI Hideaki @ USAGI/WIDE Project <yoshfuji@linux-ipv6.org> + *	YOSHIFUJI Hideaki @ USAGI/WIDE Project <yoshfuji@linux-ipv6.org>   */  #include <linux/kernel.h> @@ -22,14 +22,13 @@  #if 0  #define ADDRLABEL(x...) printk(x)  #else -#define ADDRLABEL(x...) do { ; } while(0) +#define ADDRLABEL(x...) do { ; } while (0)  #endif  /*   * Policy Table   */ -struct ip6addrlbl_entry -{ +struct ip6addrlbl_entry {  #ifdef CONFIG_NET_NS  	struct net *lbl_net;  #endif @@ -88,39 +87,39 @@ static const __net_initconst struct ip6addrlbl_init_table  	{	/* ::/0 */  		.prefix = &in6addr_any,  		.label = 1, -	},{	/* fc00::/7 */ -		.prefix = &(struct in6_addr){{{ 0xfc }}}, +	}, {	/* fc00::/7 */ +		.prefix = &(struct in6_addr){ { { 0xfc } } } ,  		.prefixlen = 7,  		.label = 5, -	},{	/* fec0::/10 */ -		.prefix = &(struct in6_addr){{{ 0xfe, 0xc0 }}}, +	}, {	/* fec0::/10 */ +		.prefix = &(struct in6_addr){ { { 0xfe, 0xc0 } } },  		.prefixlen = 10,  		.label = 11, -	},{	/* 2002::/16 */ -		.prefix = &(struct in6_addr){{{ 0x20, 0x02 }}}, +	}, {	/* 2002::/16 */ +		.prefix = &(struct in6_addr){ { { 0x20, 0x02 } } },  		.prefixlen = 16,  		.label = 2, -	},{	/* 3ffe::/16 */ -		.prefix = &(struct in6_addr){{{ 0x3f, 0xfe }}}, +	}, {	/* 3ffe::/16 */ +		.prefix = &(struct in6_addr){ { { 0x3f, 0xfe } } },  		.prefixlen = 16,  		.label = 12, -	},{	/* 2001::/32 */ -		.prefix = &(struct in6_addr){{{ 0x20, 0x01 }}}, +	}, {	/* 2001::/32 */ +		.prefix = &(struct in6_addr){ { { 0x20, 0x01 } } },  		.prefixlen = 32,  		.label = 6, -	},{	/* 2001:10::/28 */ -		.prefix = &(struct in6_addr){{{ 0x20, 0x01, 0x00, 0x10 }}}, +	}, {	/* 2001:10::/28 */ +		.prefix = &(struct in6_addr){ { { 0x20, 0x01, 0x00, 0x10 } } },  		.prefixlen = 28,  		.label = 7, -	},{	/* ::ffff:0:0 */ -		.prefix = &(struct in6_addr){{{ [10] = 0xff, [11] = 0xff }}}, +	}, {	/* ::ffff:0:0 */ +		.prefix = &(struct in6_addr){ { { [10] = 0xff, [11] = 0xff } } },  		.prefixlen = 96,  		.label = 4, -	},{	/* ::/96 */ +	}, {	/* ::/96 */  		.prefix = &in6addr_any,  		.prefixlen = 96,  		.label = 3, -	},{	/* ::1/128 */ +	}, {	/* ::1/128 */  		.prefix = &in6addr_loopback,  		.prefixlen = 128,  		.label = 0, @@ -441,7 +440,7 @@ static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh)  	if (label == IPV6_ADDR_LABEL_DEFAULT)  		return -EINVAL; -	switch(nlh->nlmsg_type) { +	switch (nlh->nlmsg_type) {  	case RTM_NEWADDRLABEL:  		if (ifal->ifal_index &&  		    !__dev_get_by_index(net, ifal->ifal_index)) @@ -505,12 +504,13 @@ static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb)  	hlist_for_each_entry_rcu(p, &ip6addrlbl_table.head, list) {  		if (idx >= s_idx &&  		    net_eq(ip6addrlbl_net(p), net)) { -			if ((err = ip6addrlbl_fill(skb, p, -						   ip6addrlbl_table.seq, -						   NETLINK_CB(cb->skb).portid, -						   cb->nlh->nlmsg_seq, -						   RTM_NEWADDRLABEL, -						   NLM_F_MULTI)) <= 0) +			err = ip6addrlbl_fill(skb, p, +					      ip6addrlbl_table.seq, +					      NETLINK_CB(cb->skb).portid, +					      cb->nlh->nlmsg_seq, +					      RTM_NEWADDRLABEL, +					      NLM_F_MULTI); +			if (err <= 0)  				break;  		}  		idx++; @@ -527,7 +527,7 @@ static inline int ip6addrlbl_msgsize(void)  		+ nla_total_size(4);	/* IFAL_LABEL */  } -static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh) +static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh)  {  	struct net *net = sock_net(in_skb->sk);  	struct ifaddrlblmsg *ifal; @@ -568,7 +568,8 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh)  		goto out;  	} -	if (!(skb = nlmsg_new(ip6addrlbl_msgsize(), GFP_KERNEL))) { +	skb = nlmsg_new(ip6addrlbl_msgsize(), GFP_KERNEL); +	if (!skb) {  		ip6addrlbl_put(p);  		return -ENOBUFS;  	} diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 7c96100b021..7cb4392690d 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -106,15 +106,9 @@ static int inet6_create(struct net *net, struct socket *sock, int protocol,  	struct inet_protosw *answer;  	struct proto *answer_prot;  	unsigned char answer_flags; -	char answer_no_check;  	int try_loading_module = 0;  	int err; -	if (sock->type != SOCK_RAW && -	    sock->type != SOCK_DGRAM && -	    !inet_ehash_secret) -		build_ehash_secret(); -  	/* Look for the requested type/protocol pair. */  lookup_protocol:  	err = -ESOCKTNOSUPPORT; @@ -167,7 +161,6 @@ lookup_protocol:  	sock->ops = answer->ops;  	answer_prot = answer->prot; -	answer_no_check = answer->no_check;  	answer_flags = answer->flags;  	rcu_read_unlock(); @@ -181,7 +174,6 @@ lookup_protocol:  	sock_init_data(sock, sk);  	err = 0; -	sk->sk_no_check = answer_no_check;  	if (INET_PROTOSW_REUSE & answer_flags)  		sk->sk_reuse = SK_CAN_REUSE; @@ -218,7 +210,7 @@ lookup_protocol:  	inet->mc_list	= NULL;  	inet->rcv_tos	= 0; -	if (ipv4_config.no_pmtu_disc) +	if (net->ipv4.sysctl_ip_no_pmtu_disc)  		inet->pmtudisc = IP_PMTUDISC_DONT;  	else  		inet->pmtudisc = IP_PMTUDISC_WANT; @@ -364,7 +356,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)  	inet->inet_rcv_saddr = v4addr;  	inet->inet_saddr = v4addr; -	np->rcv_saddr = addr->sin6_addr; +	sk->sk_v6_rcv_saddr = addr->sin6_addr;  	if (!(addr_type & IPV6_ADDR_MULTICAST))  		np->saddr = addr->sin6_addr; @@ -461,14 +453,14 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,  		    peer == 1)  			return -ENOTCONN;  		sin->sin6_port = inet->inet_dport; -		sin->sin6_addr = np->daddr; +		sin->sin6_addr = sk->sk_v6_daddr;  		if (np->sndflow)  			sin->sin6_flowinfo = np->flow_label;  	} else { -		if (ipv6_addr_any(&np->rcv_saddr)) +		if (ipv6_addr_any(&sk->sk_v6_rcv_saddr))  			sin->sin6_addr = np->saddr;  		else -			sin->sin6_addr = np->rcv_saddr; +			sin->sin6_addr = sk->sk_v6_rcv_saddr;  		sin->sin6_port = inet->inet_sport;  	} @@ -655,7 +647,7 @@ int inet6_sk_rebuild_header(struct sock *sk)  		memset(&fl6, 0, sizeof(fl6));  		fl6.flowi6_proto = sk->sk_protocol; -		fl6.daddr = np->daddr; +		fl6.daddr = sk->sk_v6_daddr;  		fl6.saddr = np->saddr;  		fl6.flowlabel = np->flow_label;  		fl6.flowi6_oif = sk->sk_bound_dev_if; @@ -666,7 +658,7 @@ int inet6_sk_rebuild_header(struct sock *sk)  		final_p = fl6_update_dst(&fl6, np->opt, &final); -		dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); +		dst = ip6_dst_lookup_flow(sk, &fl6, final_p);  		if (IS_ERR(dst)) {  			sk->sk_route_caps = 0;  			sk->sk_err_soft = -PTR_ERR(dst); @@ -688,8 +680,7 @@ bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb)  	if (np->rxopt.all) {  		if ((opt->hop && (np->rxopt.bits.hopopts ||  				  np->rxopt.bits.ohopopts)) || -		    ((IPV6_FLOWINFO_MASK & -		      *(__be32 *)skb_network_header(skb)) && +		    (ip6_flowinfo((struct ipv6hdr *) skb_network_header(skb)) &&  		     np->rxopt.bits.rxflow) ||  		    (opt->srcrt && (np->rxopt.bits.srcrt ||  		     np->rxopt.bits.osrcrt)) || @@ -719,21 +710,27 @@ static void ipv6_packet_cleanup(void)  static int __net_init ipv6_init_mibs(struct net *net)  { -	if (snmp_mib_init((void __percpu **)net->mib.udp_stats_in6, -			  sizeof(struct udp_mib), -			  __alignof__(struct udp_mib)) < 0) +	int i; + +	net->mib.udp_stats_in6 = alloc_percpu(struct udp_mib); +	if (!net->mib.udp_stats_in6)  		return -ENOMEM; -	if (snmp_mib_init((void __percpu **)net->mib.udplite_stats_in6, -			  sizeof(struct udp_mib), -			  __alignof__(struct udp_mib)) < 0) +	net->mib.udplite_stats_in6 = alloc_percpu(struct udp_mib); +	if (!net->mib.udplite_stats_in6)  		goto err_udplite_mib; -	if (snmp_mib_init((void __percpu **)net->mib.ipv6_statistics, -			  sizeof(struct ipstats_mib), -			  __alignof__(struct ipstats_mib)) < 0) +	net->mib.ipv6_statistics = alloc_percpu(struct ipstats_mib); +	if (!net->mib.ipv6_statistics)  		goto err_ip_mib; -	if (snmp_mib_init((void __percpu **)net->mib.icmpv6_statistics, -			  sizeof(struct icmpv6_mib), -			  __alignof__(struct icmpv6_mib)) < 0) + +	for_each_possible_cpu(i) { +		struct ipstats_mib *af_inet6_stats; +		af_inet6_stats = per_cpu_ptr(net->mib.ipv6_statistics, i); +		u64_stats_init(&af_inet6_stats->syncp); +	} + + +	net->mib.icmpv6_statistics = alloc_percpu(struct icmpv6_mib); +	if (!net->mib.icmpv6_statistics)  		goto err_icmp_mib;  	net->mib.icmpv6msg_statistics = kzalloc(sizeof(struct icmpv6msg_mib),  						GFP_KERNEL); @@ -742,22 +739,22 @@ static int __net_init ipv6_init_mibs(struct net *net)  	return 0;  err_icmpmsg_mib: -	snmp_mib_free((void __percpu **)net->mib.icmpv6_statistics); +	free_percpu(net->mib.icmpv6_statistics);  err_icmp_mib: -	snmp_mib_free((void __percpu **)net->mib.ipv6_statistics); +	free_percpu(net->mib.ipv6_statistics);  err_ip_mib: -	snmp_mib_free((void __percpu **)net->mib.udplite_stats_in6); +	free_percpu(net->mib.udplite_stats_in6);  err_udplite_mib: -	snmp_mib_free((void __percpu **)net->mib.udp_stats_in6); +	free_percpu(net->mib.udp_stats_in6);  	return -ENOMEM;  }  static void ipv6_cleanup_mibs(struct net *net)  { -	snmp_mib_free((void __percpu **)net->mib.udp_stats_in6); -	snmp_mib_free((void __percpu **)net->mib.udplite_stats_in6); -	snmp_mib_free((void __percpu **)net->mib.ipv6_statistics); -	snmp_mib_free((void __percpu **)net->mib.icmpv6_statistics); +	free_percpu(net->mib.udp_stats_in6); +	free_percpu(net->mib.udplite_stats_in6); +	free_percpu(net->mib.ipv6_statistics); +	free_percpu(net->mib.icmpv6_statistics);  	kfree(net->mib.icmpv6msg_statistics);  } @@ -767,6 +764,7 @@ static int __net_init inet6_net_init(struct net *net)  	net->ipv6.sysctl.bindv6only = 0;  	net->ipv6.sysctl.icmpv6_time = 1*HZ; +	net->ipv6.sysctl.flowlabel_consistency = 1;  	atomic_set(&net->ipv6.rt_genid, 0);  	err = ipv6_init_mibs(net); @@ -870,8 +868,6 @@ static int __init inet6_init(void)  	if (err)  		goto out_sock_register_fail; -	tcpv6_prot.sysctl_mem = init_net.ipv4.sysctl_tcp_mem; -  	/*  	 *	ipngwg API draft makes clear that the correct semantics  	 *	for TCP and UDP is to consider one TCP and UDP instance @@ -965,10 +961,10 @@ out:  #ifdef CONFIG_SYSCTL  sysctl_fail: -	ipv6_packet_cleanup(); +	pingv6_exit();  #endif  pingv6_fail: -	pingv6_exit(); +	ipv6_packet_cleanup();  ipv6_packet_fail:  	tcpv6_exit();  tcpv6_fail: @@ -1028,52 +1024,4 @@ out_unregister_tcp_proto:  }  module_init(inet6_init); -static void __exit inet6_exit(void) -{ -	if (disable_ipv6_mod) -		return; - -	/* First of all disallow new sockets creation. */ -	sock_unregister(PF_INET6); -	/* Disallow any further netlink messages */ -	rtnl_unregister_all(PF_INET6); - -	udpv6_exit(); -	udplitev6_exit(); -	tcpv6_exit(); - -	/* Cleanup code parts. */ -	ipv6_packet_cleanup(); -	ipv6_frag_exit(); -	ipv6_exthdrs_exit(); -	addrconf_cleanup(); -	ip6_flowlabel_cleanup(); -	ndisc_late_cleanup(); -	ip6_route_cleanup(); -#ifdef CONFIG_PROC_FS - -	/* Cleanup code parts. */ -	if6_proc_exit(); -	ipv6_misc_proc_exit(); -	udplite6_proc_exit(); -	raw6_proc_exit(); -#endif -	ipv6_netfilter_fini(); -	ipv6_stub = NULL; -	igmp6_cleanup(); -	ndisc_cleanup(); -	ip6_mr_cleanup(); -	icmpv6_cleanup(); -	rawv6_exit(); - -	unregister_pernet_subsys(&inet6_net_ops); -	proto_unregister(&rawv6_prot); -	proto_unregister(&udplitev6_prot); -	proto_unregister(&udpv6_prot); -	proto_unregister(&tcpv6_prot); - -	rcu_barrier(); /* Wait for completion of call_rcu()'s */ -} -module_exit(inet6_exit); -  MODULE_ALIAS_NETPROTO(PF_INET6); diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 73784c3d464..72a4930bdc0 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -12,8 +12,7 @@   * GNU General Public License for more details.   *   * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA + * along with this program; if not, see <http://www.gnu.org/licenses/>.   *   * Authors   * @@ -347,6 +346,10 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)  	struct ip_auth_hdr *ah;  	struct ah_data *ahp;  	struct tmp_ext *iph_ext; +	int seqhi_len = 0; +	__be32 *seqhi; +	int sglists = 0; +	struct scatterlist *seqhisg;  	ahp = x->data;  	ahash = ahp->ahash; @@ -360,15 +363,22 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)  	if (extlen)  		extlen += sizeof(*iph_ext); +	if (x->props.flags & XFRM_STATE_ESN) { +		sglists = 1; +		seqhi_len = sizeof(*seqhi); +	}  	err = -ENOMEM; -	iph_base = ah_alloc_tmp(ahash, nfrags, IPV6HDR_BASELEN + extlen); +	iph_base = ah_alloc_tmp(ahash, nfrags + sglists, IPV6HDR_BASELEN + +				extlen + seqhi_len);  	if (!iph_base)  		goto out;  	iph_ext = ah_tmp_ext(iph_base); -	icv = ah_tmp_icv(ahash, iph_ext, extlen); +	seqhi = (__be32 *)((char *)iph_ext + extlen); +	icv = ah_tmp_icv(ahash, seqhi, seqhi_len);  	req = ah_tmp_req(ahash, icv);  	sg = ah_req_sg(ahash, req); +	seqhisg = sg + nfrags;  	ah = ip_auth_hdr(skb);  	memset(ah->auth_data, 0, ahp->icv_trunc_len); @@ -412,10 +422,15 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb)  	ah->spi = x->id.spi;  	ah->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low); -	sg_init_table(sg, nfrags); -	skb_to_sgvec(skb, sg, 0, skb->len); +	sg_init_table(sg, nfrags + sglists); +	skb_to_sgvec_nomark(skb, sg, 0, skb->len); -	ahash_request_set_crypt(req, sg, icv, skb->len); +	if (x->props.flags & XFRM_STATE_ESN) { +		/* Attach seqhi sg right after packet payload */ +		*seqhi = htonl(XFRM_SKB_CB(skb)->seq.output.hi); +		sg_set_buf(seqhisg, seqhi, seqhi_len); +	} +	ahash_request_set_crypt(req, sg, icv, skb->len + seqhi_len);  	ahash_request_set_callback(req, 0, ah6_output_done, skb);  	AH_SKB_CB(skb)->tmp = iph_base; @@ -515,6 +530,10 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)  	int nexthdr;  	int nfrags;  	int err = -ENOMEM; +	int seqhi_len = 0; +	__be32 *seqhi; +	int sglists = 0; +	struct scatterlist *seqhisg;  	if (!pskb_may_pull(skb, sizeof(struct ip_auth_hdr)))  		goto out; @@ -551,14 +570,22 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)  	skb_push(skb, hdr_len); -	work_iph = ah_alloc_tmp(ahash, nfrags, hdr_len + ahp->icv_trunc_len); +	if (x->props.flags & XFRM_STATE_ESN) { +		sglists = 1; +		seqhi_len = sizeof(*seqhi); +	} + +	work_iph = ah_alloc_tmp(ahash, nfrags + sglists, hdr_len + +				ahp->icv_trunc_len + seqhi_len);  	if (!work_iph)  		goto out; -	auth_data = ah_tmp_auth(work_iph, hdr_len); -	icv = ah_tmp_icv(ahash, auth_data, ahp->icv_trunc_len); +	auth_data = ah_tmp_auth((u8 *)work_iph, hdr_len); +	seqhi = (__be32 *)(auth_data + ahp->icv_trunc_len); +	icv = ah_tmp_icv(ahash, seqhi, seqhi_len);  	req = ah_tmp_req(ahash, icv);  	sg = ah_req_sg(ahash, req); +	seqhisg = sg + nfrags;  	memcpy(work_iph, ip6h, hdr_len);  	memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len); @@ -573,10 +600,16 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb)  	ip6h->flow_lbl[2] = 0;  	ip6h->hop_limit   = 0; -	sg_init_table(sg, nfrags); -	skb_to_sgvec(skb, sg, 0, skb->len); +	sg_init_table(sg, nfrags + sglists); +	skb_to_sgvec_nomark(skb, sg, 0, skb->len); + +	if (x->props.flags & XFRM_STATE_ESN) { +		/* Attach seqhi sg right after packet payload */ +		*seqhi = XFRM_SKB_CB(skb)->seq.input.hi; +		sg_set_buf(seqhisg, seqhi, seqhi_len); +	} -	ahash_request_set_crypt(req, sg, icv, skb->len); +	ahash_request_set_crypt(req, sg, icv, skb->len + seqhi_len);  	ahash_request_set_callback(req, 0, ah6_input_done, skb);  	AH_SKB_CB(skb)->tmp = work_iph; @@ -610,28 +643,29 @@ out:  	return err;  } -static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, -		    u8 type, u8 code, int offset, __be32 info) +static int ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, +		   u8 type, u8 code, int offset, __be32 info)  {  	struct net *net = dev_net(skb->dev);  	struct ipv6hdr *iph = (struct ipv6hdr*)skb->data;  	struct ip_auth_hdr *ah = (struct ip_auth_hdr*)(skb->data+offset);  	struct xfrm_state *x; -	if (type != ICMPV6_DEST_UNREACH && -	    type != ICMPV6_PKT_TOOBIG && +	if (type != ICMPV6_PKT_TOOBIG &&  	    type != NDISC_REDIRECT) -		return; +		return 0;  	x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET6);  	if (!x) -		return; +		return 0;  	if (type == NDISC_REDIRECT)  		ip6_redirect(skb, net, skb->dev->ifindex, 0);  	else  		ip6_update_pmtu(skb, net, info, 0, 0);  	xfrm_state_put(x); + +	return 0;  }  static int ah6_init_state(struct xfrm_state *x) @@ -716,6 +750,11 @@ static void ah6_destroy(struct xfrm_state *x)  	kfree(ahp);  } +static int ah6_rcv_cb(struct sk_buff *skb, int err) +{ +	return 0; +} +  static const struct xfrm_type ah6_type =  {  	.description	= "AH6", @@ -729,10 +768,11 @@ static const struct xfrm_type ah6_type =  	.hdr_offset	= xfrm6_find_1stfragopt,  }; -static const struct inet6_protocol ah6_protocol = { +static struct xfrm6_protocol ah6_protocol = {  	.handler	=	xfrm6_rcv, +	.cb_handler	=	ah6_rcv_cb,  	.err_handler	=	ah6_err, -	.flags		=	INET6_PROTO_NOPOLICY, +	.priority	=	0,  };  static int __init ah6_init(void) @@ -742,7 +782,7 @@ static int __init ah6_init(void)  		return -EAGAIN;  	} -	if (inet6_add_protocol(&ah6_protocol, IPPROTO_AH) < 0) { +	if (xfrm6_protocol_register(&ah6_protocol, IPPROTO_AH) < 0) {  		pr_info("%s: can't add protocol\n", __func__);  		xfrm_unregister_type(&ah6_type, AF_INET6);  		return -EAGAIN; @@ -753,7 +793,7 @@ static int __init ah6_init(void)  static void __exit ah6_fini(void)  { -	if (inet6_del_protocol(&ah6_protocol, IPPROTO_AH) < 0) +	if (xfrm6_protocol_deregister(&ah6_protocol, IPPROTO_AH) < 0)  		pr_info("%s: can't remove protocol\n", __func__);  	if (xfrm_unregister_type(&ah6_type, AF_INET6) < 0) diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index 5a80f15a9de..21018324468 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -383,6 +383,17 @@ bool ipv6_chk_acast_addr(struct net *net, struct net_device *dev,  	return found;  } +/*	check if this anycast address is link-local on given interface or + *	is global + */ +bool ipv6_chk_acast_addr_src(struct net *net, struct net_device *dev, +			     const struct in6_addr *addr) +{ +	return ipv6_chk_acast_addr(net, +				   (ipv6_addr_type(addr) & IPV6_ADDR_LINKLOCAL ? +				    dev : NULL), +				   addr); +}  #ifdef CONFIG_PROC_FS  struct ac6_iter_state { diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 48b6bd2a9a1..c3bf2d2e519 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -73,7 +73,6 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)  			flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);  			if (flowlabel == NULL)  				return -EINVAL; -			usin->sin6_addr = flowlabel->dst;  		}  	} @@ -107,16 +106,16 @@ ipv4_connected:  		if (err)  			goto out; -		ipv6_addr_set_v4mapped(inet->inet_daddr, &np->daddr); +		ipv6_addr_set_v4mapped(inet->inet_daddr, &sk->sk_v6_daddr);  		if (ipv6_addr_any(&np->saddr) ||  		    ipv6_mapped_addr_any(&np->saddr))  			ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr); -		if (ipv6_addr_any(&np->rcv_saddr) || -		    ipv6_mapped_addr_any(&np->rcv_saddr)) { +		if (ipv6_addr_any(&sk->sk_v6_rcv_saddr) || +		    ipv6_mapped_addr_any(&sk->sk_v6_rcv_saddr)) {  			ipv6_addr_set_v4mapped(inet->inet_rcv_saddr, -					       &np->rcv_saddr); +					       &sk->sk_v6_rcv_saddr);  			if (sk->sk_prot->rehash)  				sk->sk_prot->rehash(sk);  		} @@ -145,7 +144,7 @@ ipv4_connected:  		}  	} -	np->daddr = *daddr; +	sk->sk_v6_daddr = *daddr;  	np->flow_label = fl6.flowlabel;  	inet->inet_dport = usin->sin6_port; @@ -156,7 +155,7 @@ ipv4_connected:  	 */  	fl6.flowi6_proto = sk->sk_protocol; -	fl6.daddr = np->daddr; +	fl6.daddr = sk->sk_v6_daddr;  	fl6.saddr = np->saddr;  	fl6.flowi6_oif = sk->sk_bound_dev_if;  	fl6.flowi6_mark = sk->sk_mark; @@ -171,7 +170,7 @@ ipv4_connected:  	opt = flowlabel ? flowlabel->opt : np->opt;  	final_p = fl6_update_dst(&fl6, opt, &final); -	dst = ip6_dst_lookup_flow(sk, &fl6, final_p, true); +	dst = ip6_dst_lookup_flow(sk, &fl6, final_p);  	err = 0;  	if (IS_ERR(dst)) {  		err = PTR_ERR(dst); @@ -183,16 +182,16 @@ ipv4_connected:  	if (ipv6_addr_any(&np->saddr))  		np->saddr = fl6.saddr; -	if (ipv6_addr_any(&np->rcv_saddr)) { -		np->rcv_saddr = fl6.saddr; +	if (ipv6_addr_any(&sk->sk_v6_rcv_saddr)) { +		sk->sk_v6_rcv_saddr = fl6.saddr;  		inet->inet_rcv_saddr = LOOPBACK4_IPV6;  		if (sk->sk_prot->rehash)  			sk->sk_prot->rehash(sk);  	}  	ip6_dst_store(sk, dst, -		      ipv6_addr_equal(&fl6.daddr, &np->daddr) ? -		      &np->daddr : NULL, +		      ipv6_addr_equal(&fl6.daddr, &sk->sk_v6_daddr) ? +		      &sk->sk_v6_daddr : NULL,  #ifdef CONFIG_IPV6_SUBTREES  		      ipv6_addr_equal(&fl6.saddr, &np->saddr) ?  		      &np->saddr : @@ -206,6 +205,16 @@ out:  }  EXPORT_SYMBOL_GPL(ip6_datagram_connect); +int ip6_datagram_connect_v6_only(struct sock *sk, struct sockaddr *uaddr, +				 int addr_len) +{ +	DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, uaddr); +	if (sin6->sin6_family != AF_INET6) +		return -EAFNOSUPPORT; +	return ip6_datagram_connect(sk, uaddr, addr_len); +} +EXPORT_SYMBOL_GPL(ip6_datagram_connect_v6_only); +  void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err,  		     __be16 port, u32 info, u8 *payload)  { @@ -318,12 +327,12 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu)  /*   *	Handle MSG_ERRQUEUE   */ -int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len) +int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)  {  	struct ipv6_pinfo *np = inet6_sk(sk);  	struct sock_exterr_skb *serr;  	struct sk_buff *skb, *skb2; -	struct sockaddr_in6 *sin; +	DECLARE_SOCKADDR(struct sockaddr_in6 *, sin, msg->msg_name);  	struct {  		struct sock_extended_err ee;  		struct sockaddr_in6	 offender; @@ -349,7 +358,6 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)  	serr = SKB_EXT_ERR(skb); -	sin = (struct sockaddr_in6 *)msg->msg_name;  	if (sin) {  		const unsigned char *nh = skb_network_header(skb);  		sin->sin6_family = AF_INET6; @@ -369,6 +377,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)  					       &sin->sin6_addr);  			sin->sin6_scope_id = 0;  		} +		*addr_len = sizeof(*sin);  	}  	memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err)); @@ -377,10 +386,13 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len)  	if (serr->ee.ee_origin != SO_EE_ORIGIN_LOCAL) {  		sin->sin6_family = AF_INET6;  		sin->sin6_flowinfo = 0; +		sin->sin6_port = 0; +		if (np->rxopt.all) +			ip6_datagram_recv_common_ctl(sk, msg, skb);  		if (skb->protocol == htons(ETH_P_IPV6)) {  			sin->sin6_addr = ipv6_hdr(skb)->saddr;  			if (np->rxopt.all) -				ip6_datagram_recv_ctl(sk, msg, skb); +				ip6_datagram_recv_specific_ctl(sk, msg, skb);  			sin->sin6_scope_id =  				ipv6_iface_scope_id(&sin->sin6_addr,  						    IP6CB(skb)->iif); @@ -423,12 +435,13 @@ EXPORT_SYMBOL_GPL(ipv6_recv_error);  /*   *	Handle IPV6_RECVPATHMTU   */ -int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len) +int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len, +		     int *addr_len)  {  	struct ipv6_pinfo *np = inet6_sk(sk);  	struct sk_buff *skb; -	struct sockaddr_in6 *sin;  	struct ip6_mtuinfo mtu_info; +	DECLARE_SOCKADDR(struct sockaddr_in6 *, sin, msg->msg_name);  	int err;  	int copied; @@ -450,13 +463,13 @@ int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len)  	memcpy(&mtu_info, IP6CBMTU(skb), sizeof(mtu_info)); -	sin = (struct sockaddr_in6 *)msg->msg_name;  	if (sin) {  		sin->sin6_family = AF_INET6;  		sin->sin6_flowinfo = 0;  		sin->sin6_port = 0;  		sin->sin6_scope_id = mtu_info.ip6m_addr.sin6_scope_id;  		sin->sin6_addr = mtu_info.ip6m_addr.sin6_addr; +		*addr_len = sizeof(*sin);  	}  	put_cmsg(msg, SOL_IPV6, IPV6_PATHMTU, sizeof(mtu_info), &mtu_info); @@ -470,20 +483,34 @@ out:  } -int ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg, -			  struct sk_buff *skb) +void ip6_datagram_recv_common_ctl(struct sock *sk, struct msghdr *msg, +				 struct sk_buff *skb)  {  	struct ipv6_pinfo *np = inet6_sk(sk); -	struct inet6_skb_parm *opt = IP6CB(skb); -	unsigned char *nh = skb_network_header(skb); +	bool is_ipv6 = skb->protocol == htons(ETH_P_IPV6);  	if (np->rxopt.bits.rxinfo) {  		struct in6_pktinfo src_info; -		src_info.ipi6_ifindex = opt->iif; -		src_info.ipi6_addr = ipv6_hdr(skb)->daddr; +		if (is_ipv6) { +			src_info.ipi6_ifindex = IP6CB(skb)->iif; +			src_info.ipi6_addr = ipv6_hdr(skb)->daddr; +		} else { +			src_info.ipi6_ifindex = +				PKTINFO_SKB_CB(skb)->ipi_ifindex; +			ipv6_addr_set_v4mapped(ip_hdr(skb)->daddr, +					       &src_info.ipi6_addr); +		}  		put_cmsg(msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info);  	} +} + +void ip6_datagram_recv_specific_ctl(struct sock *sk, struct msghdr *msg, +				    struct sk_buff *skb) +{ +	struct ipv6_pinfo *np = inet6_sk(sk); +	struct inet6_skb_parm *opt = IP6CB(skb); +	unsigned char *nh = skb_network_header(skb);  	if (np->rxopt.bits.rxhlim) {  		int hlim = ipv6_hdr(skb)->hop_limit; @@ -601,7 +628,13 @@ int ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg,  			put_cmsg(msg, SOL_IPV6, IPV6_ORIGDSTADDR, sizeof(sin6), &sin6);  		}  	} -	return 0; +} + +void ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg, +			  struct sk_buff *skb) +{ +	ip6_datagram_recv_common_ctl(sk, msg, skb); +	ip6_datagram_recv_specific_ctl(sk, msg, skb);  }  EXPORT_SYMBOL_GPL(ip6_datagram_recv_ctl); @@ -666,7 +699,9 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk,  				int strict = __ipv6_addr_src_scope(addr_type) <= IPV6_ADDR_SCOPE_LINKLOCAL;  				if (!(inet_sk(sk)->freebind || inet_sk(sk)->transparent) &&  				    !ipv6_chk_addr(net, &src_info->ipi6_addr, -						   strict ? dev : NULL, 0)) +						   strict ? dev : NULL, 0) && +				    !ipv6_chk_acast_addr_src(net, dev, +							     &src_info->ipi6_addr))  					err = -EINVAL;  				else  					fl6->saddr = src_info->ipi6_addr; @@ -883,11 +918,10 @@ EXPORT_SYMBOL_GPL(ip6_datagram_send_ctl);  void ip6_dgram_sock_seq_show(struct seq_file *seq, struct sock *sp,  			     __u16 srcp, __u16 destp, int bucket)  { -	struct ipv6_pinfo *np = inet6_sk(sp);  	const struct in6_addr *dest, *src; -	dest  = &np->daddr; -	src   = &np->rcv_saddr; +	dest  = &sp->sk_v6_daddr; +	src   = &sp->sk_v6_rcv_saddr;  	seq_printf(seq,  		   "%5d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "  		   "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %d\n", diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index d3618a78fca..d15da137714 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -12,8 +12,7 @@   * GNU General Public License for more details.   *   * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA + * along with this program; if not, see <http://www.gnu.org/licenses/>.   *   * Authors   * @@ -164,10 +163,9 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)  	u8 *iv;  	u8 *tail;  	__be32 *seqhi; -	struct esp_data *esp = x->data;  	/* skb is pure payload to encrypt */ -	aead = esp->aead; +	aead = x->data;  	alen = crypto_aead_authsize(aead);  	tfclen = 0; @@ -181,8 +179,6 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)  	}  	blksize = ALIGN(crypto_aead_blocksize(aead), 4);  	clen = ALIGN(skb->len + 2 + tfclen, blksize); -	if (esp->padlen) -		clen = ALIGN(clen, esp->padlen);  	plen = clen - skb->len - tfclen;  	err = skb_cow_data(skb, tfclen + plen + alen, &trailer); @@ -271,8 +267,7 @@ error:  static int esp_input_done2(struct sk_buff *skb, int err)  {  	struct xfrm_state *x = xfrm_input_state(skb); -	struct esp_data *esp = x->data; -	struct crypto_aead *aead = esp->aead; +	struct crypto_aead *aead = x->data;  	int alen = crypto_aead_authsize(aead);  	int hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead);  	int elen = skb->len - hlen; @@ -325,8 +320,7 @@ static void esp_input_done(struct crypto_async_request *base, int err)  static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)  {  	struct ip_esp_hdr *esph; -	struct esp_data *esp = x->data; -	struct crypto_aead *aead = esp->aead; +	struct crypto_aead *aead = x->data;  	struct aead_request *req;  	struct sk_buff *trailer;  	int elen = skb->len - sizeof(*esph) - crypto_aead_ivsize(aead); @@ -414,9 +408,8 @@ out:  static u32 esp6_get_mtu(struct xfrm_state *x, int mtu)  { -	struct esp_data *esp = x->data; -	u32 blksize = ALIGN(crypto_aead_blocksize(esp->aead), 4); -	u32 align = max_t(u32, blksize, esp->padlen); +	struct crypto_aead *aead = x->data; +	u32 blksize = ALIGN(crypto_aead_blocksize(aead), 4);  	unsigned int net_adj;  	if (x->props.mode != XFRM_MODE_TUNNEL) @@ -424,49 +417,48 @@ static u32 esp6_get_mtu(struct xfrm_state *x, int mtu)  	else  		net_adj = 0; -	return ((mtu - x->props.header_len - crypto_aead_authsize(esp->aead) - -		 net_adj) & ~(align - 1)) + net_adj - 2; +	return ((mtu - x->props.header_len - crypto_aead_authsize(aead) - +		 net_adj) & ~(blksize - 1)) + net_adj - 2;  } -static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, -		     u8 type, u8 code, int offset, __be32 info) +static int esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, +		    u8 type, u8 code, int offset, __be32 info)  {  	struct net *net = dev_net(skb->dev);  	const struct ipv6hdr *iph = (const struct ipv6hdr *)skb->data;  	struct ip_esp_hdr *esph = (struct ip_esp_hdr *)(skb->data + offset);  	struct xfrm_state *x; -	if (type != ICMPV6_DEST_UNREACH && -	    type != ICMPV6_PKT_TOOBIG && +	if (type != ICMPV6_PKT_TOOBIG &&  	    type != NDISC_REDIRECT) -		return; +		return 0;  	x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,  			      esph->spi, IPPROTO_ESP, AF_INET6);  	if (!x) -		return; +		return 0;  	if (type == NDISC_REDIRECT)  		ip6_redirect(skb, net, skb->dev->ifindex, 0);  	else  		ip6_update_pmtu(skb, net, info, 0, 0);  	xfrm_state_put(x); + +	return 0;  }  static void esp6_destroy(struct xfrm_state *x)  { -	struct esp_data *esp = x->data; +	struct crypto_aead *aead = x->data; -	if (!esp) +	if (!aead)  		return; -	crypto_free_aead(esp->aead); -	kfree(esp); +	crypto_free_aead(aead);  }  static int esp_init_aead(struct xfrm_state *x)  { -	struct esp_data *esp = x->data;  	struct crypto_aead *aead;  	int err; @@ -475,7 +467,7 @@ static int esp_init_aead(struct xfrm_state *x)  	if (IS_ERR(aead))  		goto error; -	esp->aead = aead; +	x->data = aead;  	err = crypto_aead_setkey(aead, x->aead->alg_key,  				 (x->aead->alg_key_len + 7) / 8); @@ -492,7 +484,6 @@ error:  static int esp_init_authenc(struct xfrm_state *x)  { -	struct esp_data *esp = x->data;  	struct crypto_aead *aead;  	struct crypto_authenc_key_param *param;  	struct rtattr *rta; @@ -527,7 +518,7 @@ static int esp_init_authenc(struct xfrm_state *x)  	if (IS_ERR(aead))  		goto error; -	esp->aead = aead; +	x->data = aead;  	keylen = (x->aalg ? (x->aalg->alg_key_len + 7) / 8 : 0) +  		 (x->ealg->alg_key_len + 7) / 8 + RTA_SPACE(sizeof(*param)); @@ -582,7 +573,6 @@ error:  static int esp6_init_state(struct xfrm_state *x)  { -	struct esp_data *esp;  	struct crypto_aead *aead;  	u32 align;  	int err; @@ -590,11 +580,7 @@ static int esp6_init_state(struct xfrm_state *x)  	if (x->encap)  		return -EINVAL; -	esp = kzalloc(sizeof(*esp), GFP_KERNEL); -	if (esp == NULL) -		return -ENOMEM; - -	x->data = esp; +	x->data = NULL;  	if (x->aead)  		err = esp_init_aead(x); @@ -604,9 +590,7 @@ static int esp6_init_state(struct xfrm_state *x)  	if (err)  		goto error; -	aead = esp->aead; - -	esp->padlen = 0; +	aead = x->data;  	x->props.header_len = sizeof(struct ip_esp_hdr) +  			      crypto_aead_ivsize(aead); @@ -626,14 +610,17 @@ static int esp6_init_state(struct xfrm_state *x)  	}  	align = ALIGN(crypto_aead_blocksize(aead), 4); -	if (esp->padlen) -		align = max_t(u32, align, esp->padlen); -	x->props.trailer_len = align + 1 + crypto_aead_authsize(esp->aead); +	x->props.trailer_len = align + 1 + crypto_aead_authsize(aead);  error:  	return err;  } +static int esp6_rcv_cb(struct sk_buff *skb, int err) +{ +	return 0; +} +  static const struct xfrm_type esp6_type =  {  	.description	= "ESP6", @@ -648,10 +635,11 @@ static const struct xfrm_type esp6_type =  	.hdr_offset	= xfrm6_find_1stfragopt,  }; -static const struct inet6_protocol esp6_protocol = { -	.handler 	=	xfrm6_rcv, +static struct xfrm6_protocol esp6_protocol = { +	.handler	=	xfrm6_rcv, +	.cb_handler	=	esp6_rcv_cb,  	.err_handler	=	esp6_err, -	.flags		=	INET6_PROTO_NOPOLICY, +	.priority	=	0,  };  static int __init esp6_init(void) @@ -660,7 +648,7 @@ static int __init esp6_init(void)  		pr_info("%s: can't add xfrm type\n", __func__);  		return -EAGAIN;  	} -	if (inet6_add_protocol(&esp6_protocol, IPPROTO_ESP) < 0) { +	if (xfrm6_protocol_register(&esp6_protocol, IPPROTO_ESP) < 0) {  		pr_info("%s: can't add protocol\n", __func__);  		xfrm_unregister_type(&esp6_type, AF_INET6);  		return -EAGAIN; @@ -671,7 +659,7 @@ static int __init esp6_init(void)  static void __exit esp6_fini(void)  { -	if (inet6_del_protocol(&esp6_protocol, IPPROTO_ESP) < 0) +	if (xfrm6_protocol_deregister(&esp6_protocol, IPPROTO_ESP) < 0)  		pr_info("%s: can't remove protocol\n", __func__);  	if (xfrm_unregister_type(&esp6_type, AF_INET6) < 0)  		pr_info("%s: can't remove xfrm type\n", __func__); diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c index 140748debc4..8af3eb57f43 100644 --- a/net/ipv6/exthdrs_core.c +++ b/net/ipv6/exthdrs_core.c @@ -212,7 +212,7 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset,  		found = (nexthdr == target);  		if ((!ipv6_ext_hdr(nexthdr)) || nexthdr == NEXTHDR_NONE) { -			if (target < 0) +			if (target < 0 || found)  				break;  			return -ENOENT;  		} diff --git a/net/ipv6/exthdrs_offload.c b/net/ipv6/exthdrs_offload.c index cf77f3abfd0..447a7fbd1bb 100644 --- a/net/ipv6/exthdrs_offload.c +++ b/net/ipv6/exthdrs_offload.c @@ -25,11 +25,11 @@ int __init ipv6_exthdrs_offload_init(void)  	int ret;  	ret = inet6_add_offload(&rthdr_offload, IPPROTO_ROUTING); -	if (!ret) +	if (ret)  		goto out;  	ret = inet6_add_offload(&dstopt_offload, IPPROTO_DSTOPTS); -	if (!ret) +	if (ret)  		goto out_rt;  out: diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index e27591635f9..b4d5e1d97c1 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -122,7 +122,11 @@ out:  static bool fib6_rule_suppress(struct fib_rule *rule, struct fib_lookup_arg *arg)  {  	struct rt6_info *rt = (struct rt6_info *) arg->result; -	struct net_device *dev = rt->rt6i_idev->dev; +	struct net_device *dev = NULL; + +	if (rt->rt6i_idev) +		dev = rt->rt6i_idev->dev; +  	/* do not accept result if the route does  	 * not meet the required prefix length  	 */ @@ -165,7 +169,7 @@ static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)  			return 0;  	} -	if (r->tclass && r->tclass != ((ntohl(fl6->flowlabel) >> 20) & 0xff)) +	if (r->tclass && r->tclass != ip6_tclass(fl6->flowlabel))  		return 0;  	return 1; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index eef8d945b36..f6c84a6eb23 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -67,6 +67,7 @@  #include <net/icmp.h>  #include <net/xfrm.h>  #include <net/inet_common.h> +#include <net/dsfield.h>  #include <asm/uaccess.h> @@ -315,8 +316,10 @@ static void mip6_addr_swap(struct sk_buff *skb)  static inline void mip6_addr_swap(struct sk_buff *skb) {}  #endif -struct dst_entry *icmpv6_route_lookup(struct net *net, struct sk_buff *skb, -				      struct sock *sk, struct flowi6 *fl6) +static struct dst_entry *icmpv6_route_lookup(struct net *net, +					     struct sk_buff *skb, +					     struct sock *sk, +					     struct flowi6 *fl6)  {  	struct dst_entry *dst, *dst2;  	struct flowi6 fl2; @@ -397,6 +400,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)  	int len;  	int hlimit;  	int err = 0; +	u32 mark = IP6_REPLY_MARK(net, skb->mark);  	if ((u8 *)hdr < skb->head ||  	    (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb)) @@ -410,7 +414,8 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)  	 */  	addr_type = ipv6_addr_type(&hdr->daddr); -	if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0)) +	if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) || +	    ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr))  		saddr = &hdr->daddr;  	/* @@ -462,6 +467,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)  	fl6.daddr = hdr->saddr;  	if (saddr)  		fl6.saddr = *saddr; +	fl6.flowi6_mark = mark;  	fl6.flowi6_oif = iif;  	fl6.fl6_icmp_type = type;  	fl6.fl6_icmp_code = code; @@ -470,6 +476,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)  	sk = icmpv6_xmit_lock(net);  	if (sk == NULL)  		return; +	sk->sk_mark = mark;  	np = inet6_sk(sk);  	if (!icmpv6_xrlim_allow(sk, type, &fl6)) @@ -489,12 +496,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)  	if (IS_ERR(dst))  		goto out; -	if (ipv6_addr_is_multicast(&fl6.daddr)) -		hlimit = np->mcast_hops; -	else -		hlimit = np->hop_limit; -	if (hlimit < 0) -		hlimit = ip6_dst_hoplimit(dst); +	hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);  	msg.skb = skb;  	msg.offset = skb_network_offset(skb); @@ -516,7 +518,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)  			      np->tclass, NULL, &fl6, (struct rt6_info *)dst,  			      MSG_DONTWAIT, np->dontfrag);  	if (err) { -		ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTERRORS); +		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);  		ip6_flush_pending_frames(sk);  	} else {  		err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr, @@ -551,10 +553,14 @@ static void icmpv6_echo_reply(struct sk_buff *skb)  	struct dst_entry *dst;  	int err = 0;  	int hlimit; +	u8 tclass; +	u32 mark = IP6_REPLY_MARK(net, skb->mark);  	saddr = &ipv6_hdr(skb)->daddr; -	if (!ipv6_unicast_destination(skb)) +	if (!ipv6_unicast_destination(skb) && +	    !(net->ipv6.sysctl.anycast_src_echo_reply && +	      ipv6_anycast_destination(skb)))  		saddr = NULL;  	memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr)); @@ -567,11 +573,13 @@ static void icmpv6_echo_reply(struct sk_buff *skb)  		fl6.saddr = *saddr;  	fl6.flowi6_oif = skb->dev->ifindex;  	fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY; +	fl6.flowi6_mark = mark;  	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));  	sk = icmpv6_xmit_lock(net);  	if (sk == NULL)  		return; +	sk->sk_mark = mark;  	np = inet6_sk(sk);  	if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr)) @@ -586,12 +594,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)  	if (IS_ERR(dst))  		goto out; -	if (ipv6_addr_is_multicast(&fl6.daddr)) -		hlimit = np->mcast_hops; -	else -		hlimit = np->hop_limit; -	if (hlimit < 0) -		hlimit = ip6_dst_hoplimit(dst); +	hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);  	idev = __in6_dev_get(skb->dev); @@ -599,8 +602,9 @@ static void icmpv6_echo_reply(struct sk_buff *skb)  	msg.offset = 0;  	msg.type = ICMPV6_ECHO_REPLY; +	tclass = ipv6_get_dsfield(ipv6_hdr(skb));  	err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr), -				sizeof(struct icmp6hdr), hlimit, np->tclass, NULL, &fl6, +				sizeof(struct icmp6hdr), hlimit, tclass, NULL, &fl6,  				(struct rt6_info *)dst, MSG_DONTWAIT,  				np->dontfrag); @@ -694,22 +698,11 @@ static int icmpv6_rcv(struct sk_buff *skb)  	saddr = &ipv6_hdr(skb)->saddr;  	daddr = &ipv6_hdr(skb)->daddr; -	/* Perform checksum. */ -	switch (skb->ip_summed) { -	case CHECKSUM_COMPLETE: -		if (!csum_ipv6_magic(saddr, daddr, skb->len, IPPROTO_ICMPV6, -				     skb->csum)) -			break; -		/* fall through */ -	case CHECKSUM_NONE: -		skb->csum = ~csum_unfold(csum_ipv6_magic(saddr, daddr, skb->len, -					     IPPROTO_ICMPV6, 0)); -		if (__skb_checksum_complete(skb)) { -			LIMIT_NETDEBUG(KERN_DEBUG -				       "ICMPv6 checksum failed [%pI6c > %pI6c]\n", -				       saddr, daddr); -			goto csum_error; -		} +	if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) { +		LIMIT_NETDEBUG(KERN_DEBUG +			       "ICMPv6 checksum failed [%pI6c > %pI6c]\n", +			       saddr, daddr); +		goto csum_error;  	}  	if (!pskb_pull(skb, sizeof(*hdr))) @@ -984,7 +977,7 @@ int icmpv6_err_convert(u8 type, u8 code, int *err)  EXPORT_SYMBOL(icmpv6_err_convert);  #ifdef CONFIG_SYSCTL -struct ctl_table ipv6_icmp_table_template[] = { +static struct ctl_table ipv6_icmp_table_template[] = {  	{  		.procname	= "ratelimit",  		.data		= &init_net.ipv6.sysctl.icmpv6_time, diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index e4311cbc8b4..a245e5ddffb 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -70,23 +70,23 @@ struct dst_entry *inet6_csk_route_req(struct sock *sk,  				      struct flowi6 *fl6,  				      const struct request_sock *req)  { -	struct inet6_request_sock *treq = inet6_rsk(req); +	struct inet_request_sock *ireq = inet_rsk(req);  	struct ipv6_pinfo *np = inet6_sk(sk);  	struct in6_addr *final_p, final;  	struct dst_entry *dst;  	memset(fl6, 0, sizeof(*fl6));  	fl6->flowi6_proto = IPPROTO_TCP; -	fl6->daddr = treq->rmt_addr; +	fl6->daddr = ireq->ir_v6_rmt_addr;  	final_p = fl6_update_dst(fl6, np->opt, &final); -	fl6->saddr = treq->loc_addr; -	fl6->flowi6_oif = treq->iif; -	fl6->flowi6_mark = sk->sk_mark; -	fl6->fl6_dport = inet_rsk(req)->rmt_port; -	fl6->fl6_sport = inet_rsk(req)->loc_port; +	fl6->saddr = ireq->ir_v6_loc_addr; +	fl6->flowi6_oif = ireq->ir_iif; +	fl6->flowi6_mark = ireq->ir_mark; +	fl6->fl6_dport = ireq->ir_rmt_port; +	fl6->fl6_sport = htons(ireq->ir_num);  	security_req_classify_flow(req, flowi6_to_flowi(fl6)); -	dst = ip6_dst_lookup_flow(sk, fl6, final_p, false); +	dst = ip6_dst_lookup_flow(sk, fl6, final_p);  	if (IS_ERR(dst))  		return NULL; @@ -129,13 +129,13 @@ struct request_sock *inet6_csk_search_req(const struct sock *sk,  						     lopt->nr_table_entries)];  	     (req = *prev) != NULL;  	     prev = &req->dl_next) { -		const struct inet6_request_sock *treq = inet6_rsk(req); +		const struct inet_request_sock *ireq = inet_rsk(req); -		if (inet_rsk(req)->rmt_port == rport && +		if (ireq->ir_rmt_port == rport &&  		    req->rsk_ops->family == AF_INET6 && -		    ipv6_addr_equal(&treq->rmt_addr, raddr) && -		    ipv6_addr_equal(&treq->loc_addr, laddr) && -		    (!treq->iif || treq->iif == iif)) { +		    ipv6_addr_equal(&ireq->ir_v6_rmt_addr, raddr) && +		    ipv6_addr_equal(&ireq->ir_v6_loc_addr, laddr) && +		    (!ireq->ir_iif || ireq->ir_iif == iif)) {  			WARN_ON(req->sk != NULL);  			*prevp = prev;  			return req; @@ -153,8 +153,8 @@ void inet6_csk_reqsk_queue_hash_add(struct sock *sk,  {  	struct inet_connection_sock *icsk = inet_csk(sk);  	struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; -	const u32 h = inet6_synq_hash(&inet6_rsk(req)->rmt_addr, -				      inet_rsk(req)->rmt_port, +	const u32 h = inet6_synq_hash(&inet_rsk(req)->ir_v6_rmt_addr, +				      inet_rsk(req)->ir_rmt_port,  				      lopt->hash_rnd, lopt->nr_table_entries);  	reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, timeout); @@ -165,11 +165,10 @@ EXPORT_SYMBOL_GPL(inet6_csk_reqsk_queue_hash_add);  void inet6_csk_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)  { -	struct ipv6_pinfo *np = inet6_sk(sk);  	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;  	sin6->sin6_family = AF_INET6; -	sin6->sin6_addr = np->daddr; +	sin6->sin6_addr = sk->sk_v6_daddr;  	sin6->sin6_port	= inet_sk(sk)->inet_dport;  	/* We do not store received flowlabel for TCP */  	sin6->sin6_flowinfo = 0; @@ -203,7 +202,7 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk,  	memset(fl6, 0, sizeof(*fl6));  	fl6->flowi6_proto = sk->sk_protocol; -	fl6->daddr = np->daddr; +	fl6->daddr = sk->sk_v6_daddr;  	fl6->saddr = np->saddr;  	fl6->flowlabel = np->flow_label;  	IP6_ECN_flow_xmit(sk, fl6->flowlabel); @@ -217,7 +216,7 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk,  	dst = __inet6_csk_dst_check(sk, np->dst_cookie);  	if (!dst) { -		dst = ip6_dst_lookup_flow(sk, fl6, final_p, false); +		dst = ip6_dst_lookup_flow(sk, fl6, final_p);  		if (!IS_ERR(dst))  			__inet6_csk_dst_store(sk, dst, NULL, NULL); @@ -225,9 +224,8 @@ static struct dst_entry *inet6_csk_route_socket(struct sock *sk,  	return dst;  } -int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused) +int inet6_csk_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl_unused)  { -	struct sock *sk = skb->sk;  	struct ipv6_pinfo *np = inet6_sk(sk);  	struct flowi6 fl6;  	struct dst_entry *dst; @@ -245,7 +243,7 @@ int inet6_csk_xmit(struct sk_buff *skb, struct flowi *fl_unused)  	skb_dst_set_noref(skb, dst);  	/* Restore final destination back after routing done */ -	fl6.daddr = np->daddr; +	fl6.daddr = sk->sk_v6_daddr;  	res = ip6_xmit(sk, skb, &fl6, np->opt, np->tclass);  	rcu_read_unlock(); diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 066640e0ba8..262e13c02ec 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -23,6 +23,39 @@  #include <net/secure_seq.h>  #include <net/ip.h> +static unsigned int inet6_ehashfn(struct net *net, +				  const struct in6_addr *laddr, +				  const u16 lport, +				  const struct in6_addr *faddr, +				  const __be16 fport) +{ +	static u32 inet6_ehash_secret __read_mostly; +	static u32 ipv6_hash_secret __read_mostly; + +	u32 lhash, fhash; + +	net_get_random_once(&inet6_ehash_secret, sizeof(inet6_ehash_secret)); +	net_get_random_once(&ipv6_hash_secret, sizeof(ipv6_hash_secret)); + +	lhash = (__force u32)laddr->s6_addr32[3]; +	fhash = __ipv6_addr_jhash(faddr, ipv6_hash_secret); + +	return __inet6_ehashfn(lhash, lport, fhash, fport, +			       inet6_ehash_secret + net_hash_mix(net)); +} + +static int inet6_sk_ehashfn(const struct sock *sk) +{ +	const struct inet_sock *inet = inet_sk(sk); +	const struct in6_addr *laddr = &sk->sk_v6_rcv_saddr; +	const struct in6_addr *faddr = &sk->sk_v6_daddr; +	const __u16 lport = inet->inet_num; +	const __be16 fport = inet->inet_dport; +	struct net *net = sock_net(sk); + +	return inet6_ehashfn(net, laddr, lport, faddr, fport); +} +  int __inet6_hash(struct sock *sk, struct inet_timewait_sock *tw)  {  	struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; @@ -89,43 +122,22 @@ begin:  	sk_nulls_for_each_rcu(sk, node, &head->chain) {  		if (sk->sk_hash != hash)  			continue; -		if (likely(INET6_MATCH(sk, net, saddr, daddr, ports, dif))) { -			if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) -				goto begintw; -			if (unlikely(!INET6_MATCH(sk, net, saddr, daddr, -						  ports, dif))) { -				sock_put(sk); -				goto begin; -			} -		goto out; -		} -	} -	if (get_nulls_value(node) != slot) -		goto begin; - -begintw: -	/* Must check for a TIME_WAIT'er before going to listener hash. */ -	sk_nulls_for_each_rcu(sk, node, &head->twchain) { -		if (sk->sk_hash != hash) +		if (!INET6_MATCH(sk, net, saddr, daddr, ports, dif))  			continue; -		if (likely(INET6_TW_MATCH(sk, net, saddr, daddr, -					  ports, dif))) { -			if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt))) { -				sk = NULL; -				goto out; -			} -			if (unlikely(!INET6_TW_MATCH(sk, net, saddr, daddr, -						     ports, dif))) { -				inet_twsk_put(inet_twsk(sk)); -				goto begintw; -			} +		if (unlikely(!atomic_inc_not_zero(&sk->sk_refcnt)))  			goto out; + +		if (unlikely(!INET6_MATCH(sk, net, saddr, daddr, ports, dif))) { +			sock_gen_put(sk); +			goto begin;  		} +		goto found;  	}  	if (get_nulls_value(node) != slot) -		goto begintw; -	sk = NULL; +		goto begin;  out: +	sk = NULL; +found:  	rcu_read_unlock();  	return sk;  } @@ -140,11 +152,10 @@ static inline int compute_score(struct sock *sk, struct net *net,  	if (net_eq(sock_net(sk), net) && inet_sk(sk)->inet_num == hnum &&  	    sk->sk_family == PF_INET6) { -		const struct ipv6_pinfo *np = inet6_sk(sk);  		score = 1; -		if (!ipv6_addr_any(&np->rcv_saddr)) { -			if (!ipv6_addr_equal(&np->rcv_saddr, daddr)) +		if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) { +			if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))  				return -1;  			score++;  		} @@ -236,9 +247,8 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,  {  	struct inet_hashinfo *hinfo = death_row->hashinfo;  	struct inet_sock *inet = inet_sk(sk); -	const struct ipv6_pinfo *np = inet6_sk(sk); -	const struct in6_addr *daddr = &np->rcv_saddr; -	const struct in6_addr *saddr = &np->daddr; +	const struct in6_addr *daddr = &sk->sk_v6_rcv_saddr; +	const struct in6_addr *saddr = &sk->sk_v6_daddr;  	const int dif = sk->sk_bound_dev_if;  	const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport);  	struct net *net = sock_net(sk); @@ -248,38 +258,28 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,  	spinlock_t *lock = inet_ehash_lockp(hinfo, hash);  	struct sock *sk2;  	const struct hlist_nulls_node *node; -	struct inet_timewait_sock *tw; +	struct inet_timewait_sock *tw = NULL;  	int twrefcnt = 0;  	spin_lock(lock); -	/* Check TIME-WAIT sockets first. */ -	sk_nulls_for_each(sk2, node, &head->twchain) { -		if (sk2->sk_hash != hash) -			continue; - -		if (likely(INET6_TW_MATCH(sk2, net, saddr, daddr, -					  ports, dif))) { -			tw = inet_twsk(sk2); -			if (twsk_unique(sk, sk2, twp)) -				goto unique; -			else -				goto not_unique; -		} -	} -	tw = NULL; - -	/* And established part... */  	sk_nulls_for_each(sk2, node, &head->chain) {  		if (sk2->sk_hash != hash)  			continue; -		if (likely(INET6_MATCH(sk2, net, saddr, daddr, ports, dif))) + +		if (likely(INET6_MATCH(sk2, net, saddr, daddr, ports, dif))) { +			if (sk2->sk_state == TCP_TIME_WAIT) { +				tw = inet_twsk(sk2); +				if (twsk_unique(sk, sk2, twp)) +					break; +			}  			goto not_unique; +		}  	} -unique:  	/* Must record num and sport now. Otherwise we will see -	 * in hash table socket with a funny identity. */ +	 * in hash table socket with a funny identity. +	 */  	inet->inet_num = lport;  	inet->inet_sport = htons(lport);  	sk->sk_hash = hash; @@ -312,9 +312,9 @@ not_unique:  static inline u32 inet6_sk_port_offset(const struct sock *sk)  {  	const struct inet_sock *inet = inet_sk(sk); -	const struct ipv6_pinfo *np = inet6_sk(sk); -	return secure_ipv6_port_ephemeral(np->rcv_saddr.s6_addr32, -					  np->daddr.s6_addr32, + +	return secure_ipv6_port_ephemeral(sk->sk_v6_rcv_saddr.s6_addr32, +					  sk->sk_v6_daddr.s6_addr32,  					  inet->inet_dport);  } diff --git a/net/ipv6/ip6_checksum.c b/net/ipv6/ip6_checksum.c index 72d198b8e4d..9a4d7322fb2 100644 --- a/net/ipv6/ip6_checksum.c +++ b/net/ipv6/ip6_checksum.c @@ -75,23 +75,50 @@ int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto)  			return err;  	} -	if (uh->check == 0) { -		/* RFC 2460 section 8.1 says that we SHOULD log -		   this error. Well, it is reasonable. -		 */ -		LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0\n"); -		return 1; -	} -	if (skb->ip_summed == CHECKSUM_COMPLETE && -	    !csum_ipv6_magic(&ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, -			     skb->len, proto, skb->csum)) -		skb->ip_summed = CHECKSUM_UNNECESSARY; +	/* To support RFC 6936 (allow zero checksum in UDP/IPV6 for tunnels) +	 * we accept a checksum of zero here. When we find the socket +	 * for the UDP packet we'll check if that socket allows zero checksum +	 * for IPv6 (set by socket option). +	 */ +	return skb_checksum_init_zero_check(skb, proto, uh->check, +					   ip6_compute_pseudo); +} +EXPORT_SYMBOL(udp6_csum_init); + +/* Function to set UDP checksum for an IPv6 UDP packet. This is intended + * for the simple case like when setting the checksum for a UDP tunnel. + */ +void udp6_set_csum(bool nocheck, struct sk_buff *skb, +		   const struct in6_addr *saddr, +		   const struct in6_addr *daddr, int len) +{ +	struct udphdr *uh = udp_hdr(skb); + +	if (nocheck) +		uh->check = 0; +	else if (skb_is_gso(skb)) +		uh->check = ~udp_v6_check(len, saddr, daddr, 0); +	else if (skb_dst(skb) && skb_dst(skb)->dev && +		 (skb_dst(skb)->dev->features & NETIF_F_IPV6_CSUM)) { -	if (!skb_csum_unnecessary(skb)) -		skb->csum = ~csum_unfold(csum_ipv6_magic(&ipv6_hdr(skb)->saddr, -							 &ipv6_hdr(skb)->daddr, -							 skb->len, proto, 0)); +		BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL); -	return 0; +		skb->ip_summed = CHECKSUM_PARTIAL; +		skb->csum_start = skb_transport_header(skb) - skb->head; +		skb->csum_offset = offsetof(struct udphdr, check); +		uh->check = ~udp_v6_check(len, saddr, daddr, 0); +	} else { +		__wsum csum; + +		BUG_ON(skb->ip_summed == CHECKSUM_PARTIAL); + +		uh->check = 0; +		csum = skb_checksum(skb, 0, len, 0); +		uh->check = udp_v6_check(len, saddr, daddr, csum); +		if (uh->check == 0) +			uh->check = CSUM_MANGLED_0; + +		skb->ip_summed = CHECKSUM_UNNECESSARY; +	}  } -EXPORT_SYMBOL(udp6_csum_init); +EXPORT_SYMBOL(udp6_set_csum); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 5bec666aba6..cb4459bd1d2 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -9,14 +9,12 @@   *      modify it under the terms of the GNU General Public License   *      as published by the Free Software Foundation; either version   *      2 of the License, or (at your option) any later version. - */ - -/* - * 	Changes: - * 	Yuji SEKIYA @USAGI:	Support default route on router node; - * 				remove ip6_null_entry from the top of - * 				routing table. - * 	Ville Nuorvala:		Fixed routing subtrees. + * + *	Changes: + *	Yuji SEKIYA @USAGI:	Support default route on router node; + *				remove ip6_null_entry from the top of + *				routing table. + *	Ville Nuorvala:		Fixed routing subtrees.   */  #define pr_fmt(fmt) "IPv6: " fmt @@ -46,10 +44,9 @@  #define RT6_TRACE(x...) do { ; } while (0)  #endif -static struct kmem_cache * fib6_node_kmem __read_mostly; +static struct kmem_cache *fib6_node_kmem __read_mostly; -enum fib_walk_state_t -{ +enum fib_walk_state_t {  #ifdef CONFIG_IPV6_SUBTREES  	FWS_S,  #endif @@ -59,8 +56,7 @@ enum fib_walk_state_t  	FWS_U  }; -struct fib6_cleaner_t -{ +struct fib6_cleaner_t {  	struct fib6_walker_t w;  	struct net *net;  	int (*func)(struct rt6_info *, void *arg); @@ -75,8 +71,7 @@ static DEFINE_RWLOCK(fib6_walker_lock);  #define FWS_INIT FWS_L  #endif -static void fib6_prune_clones(struct net *net, struct fib6_node *fn, -			      struct rt6_info *rt); +static void fib6_prune_clones(struct net *net, struct fib6_node *fn);  static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn);  static struct fib6_node *fib6_repair_tree(struct net *net, struct fib6_node *fn);  static int fib6_walk(struct fib6_walker_t *w); @@ -138,7 +133,7 @@ static __inline__ __be32 addr_bit_set(const void *token, int fn_bit)  	const __be32 *addr = token;  	/*  	 * Here, -	 * 	1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f) +	 *	1 << ((~fn_bit ^ BITOP_BE32_SWIZZLE) & 0x1f)  	 * is optimized version of  	 *	htonl(1 << ((~fn_bit)&0x1F))  	 * See include/asm-generic/bitops/le.h. @@ -147,7 +142,7 @@ static __inline__ __be32 addr_bit_set(const void *token, int fn_bit)  	       addr[fn_bit >> 5];  } -static __inline__ struct fib6_node * node_alloc(void) +static __inline__ struct fib6_node *node_alloc(void)  {  	struct fib6_node *fn; @@ -156,7 +151,7 @@ static __inline__ struct fib6_node * node_alloc(void)  	return fn;  } -static __inline__ void node_free(struct fib6_node * fn) +static __inline__ void node_free(struct fib6_node *fn)  {  	kmem_cache_free(fib6_node_kmem, fn);  } @@ -292,7 +287,7 @@ static int fib6_dump_node(struct fib6_walker_t *w)  static void fib6_dump_end(struct netlink_callback *cb)  { -	struct fib6_walker_t *w = (void*)cb->args[2]; +	struct fib6_walker_t *w = (void *)cb->args[2];  	if (w) {  		if (cb->args[4]) { @@ -302,7 +297,7 @@ static void fib6_dump_end(struct netlink_callback *cb)  		cb->args[2] = 0;  		kfree(w);  	} -	cb->done = (void*)cb->args[3]; +	cb->done = (void *)cb->args[3];  	cb->args[1] = 3;  } @@ -485,7 +480,7 @@ static struct fib6_node *fib6_add_1(struct fib6_node *root,  		fn->fn_sernum = sernum;  		dir = addr_bit_set(addr, fn->fn_bit);  		pn = fn; -		fn = dir ? fn->right: fn->left; +		fn = dir ? fn->right : fn->left;  	} while (fn);  	if (!allow_create) { @@ -638,12 +633,41 @@ static inline bool rt6_qualify_for_ecmp(struct rt6_info *rt)  	       RTF_GATEWAY;  } +static int fib6_commit_metrics(struct dst_entry *dst, +			       struct nlattr *mx, int mx_len) +{ +	struct nlattr *nla; +	int remaining; +	u32 *mp; + +	if (dst->flags & DST_HOST) { +		mp = dst_metrics_write_ptr(dst); +	} else { +		mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL); +		if (!mp) +			return -ENOMEM; +		dst_init_metrics(dst, mp, 0); +	} + +	nla_for_each_attr(nla, mx, mx_len, remaining) { +		int type = nla_type(nla); + +		if (type) { +			if (type > RTAX_MAX) +				return -EINVAL; + +			mp[type - 1] = nla_get_u32(nla); +		} +	} +	return 0; +} +  /*   *	Insert routing information in a node.   */  static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, -			    struct nl_info *info) +			    struct nl_info *info, struct nlattr *mx, int mx_len)  {  	struct rt6_info *iter = NULL;  	struct rt6_info **ins; @@ -653,6 +677,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,  		   (info->nlh->nlmsg_flags & NLM_F_CREATE));  	int found = 0;  	bool rt_can_ecmp = rt6_qualify_for_ecmp(rt); +	int err;  	ins = &fn->leaf; @@ -751,6 +776,11 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,  			pr_warn("NLM_F_CREATE should be set when creating new route\n");  add: +		if (mx) { +			err = fib6_commit_metrics(&rt->dst, mx, mx_len); +			if (err) +				return err; +		}  		rt->dst.rt6_next = iter;  		*ins = rt;  		rt->rt6i_node = fn; @@ -770,6 +800,11 @@ add:  			pr_warn("NLM_F_REPLACE set, but no existing node found!\n");  			return -ENOENT;  		} +		if (mx) { +			err = fib6_commit_metrics(&rt->dst, mx, mx_len); +			if (err) +				return err; +		}  		*ins = rt;  		rt->rt6i_node = fn;  		rt->dst.rt6_next = iter->dst.rt6_next; @@ -806,7 +841,8 @@ void fib6_force_start_gc(struct net *net)   *	with source addr info in sub-trees   */ -int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info) +int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info, +	     struct nlattr *mx, int mx_len)  {  	struct fib6_node *fn, *pn = NULL;  	int err = -ENOMEM; @@ -900,11 +936,11 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info)  	}  #endif -	err = fib6_add_rt2node(fn, rt, info); +	err = fib6_add_rt2node(fn, rt, info, mx, mx_len);  	if (!err) {  		fib6_start_gc(info->nl_net, rt);  		if (!(rt->rt6i_flags & RTF_CACHE)) -			fib6_prune_clones(info->nl_net, pn, rt); +			fib6_prune_clones(info->nl_net, pn);  	}  out: @@ -955,8 +991,8 @@ struct lookup_args {  	const struct in6_addr	*addr;		/* search key			*/  }; -static struct fib6_node * fib6_lookup_1(struct fib6_node *root, -					struct lookup_args *args) +static struct fib6_node *fib6_lookup_1(struct fib6_node *root, +				       struct lookup_args *args)  {  	struct fib6_node *fn;  	__be32 dir; @@ -1018,8 +1054,8 @@ backtrack:  	return NULL;  } -struct fib6_node * fib6_lookup(struct fib6_node *root, const struct in6_addr *daddr, -			       const struct in6_addr *saddr) +struct fib6_node *fib6_lookup(struct fib6_node *root, const struct in6_addr *daddr, +			      const struct in6_addr *saddr)  {  	struct fib6_node *fn;  	struct lookup_args args[] = { @@ -1051,9 +1087,9 @@ struct fib6_node * fib6_lookup(struct fib6_node *root, const struct in6_addr *da   */ -static struct fib6_node * fib6_locate_1(struct fib6_node *root, -					const struct in6_addr *addr, -					int plen, int offset) +static struct fib6_node *fib6_locate_1(struct fib6_node *root, +				       const struct in6_addr *addr, +				       int plen, int offset)  {  	struct fib6_node *fn; @@ -1081,9 +1117,9 @@ static struct fib6_node * fib6_locate_1(struct fib6_node *root,  	return NULL;  } -struct fib6_node * fib6_locate(struct fib6_node *root, -			       const struct in6_addr *daddr, int dst_len, -			       const struct in6_addr *saddr, int src_len) +struct fib6_node *fib6_locate(struct fib6_node *root, +			      const struct in6_addr *daddr, int dst_len, +			      const struct in6_addr *saddr, int src_len)  {  	struct fib6_node *fn; @@ -1151,8 +1187,10 @@ static struct fib6_node *fib6_repair_tree(struct net *net,  		children = 0;  		child = NULL; -		if (fn->right) child = fn->right, children |= 1; -		if (fn->left) child = fn->left, children |= 2; +		if (fn->right) +			child = fn->right, children |= 1; +		if (fn->left) +			child = fn->left, children |= 2;  		if (children == 3 || FIB6_SUBTREE(fn)  #ifdef CONFIG_IPV6_SUBTREES @@ -1180,8 +1218,10 @@ static struct fib6_node *fib6_repair_tree(struct net *net,  		} else {  			WARN_ON(fn->fn_flags & RTN_ROOT);  #endif -			if (pn->right == fn) pn->right = child; -			else if (pn->left == fn) pn->left = child; +			if (pn->right == fn) +				pn->right = child; +			else if (pn->left == fn) +				pn->left = child;  #if RT6_DEBUG >= 2  			else  				WARN_ON(1); @@ -1213,10 +1253,10 @@ static struct fib6_node *fib6_repair_tree(struct net *net,  					w->node = child;  					if (children&2) {  						RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state); -						w->state = w->state>=FWS_R ? FWS_U : FWS_INIT; +						w->state = w->state >= FWS_R ? FWS_U : FWS_INIT;  					} else {  						RT6_TRACE("W %p adjusted by delnode 2, s=%d\n", w, w->state); -						w->state = w->state>=FWS_C ? FWS_U : FWS_INIT; +						w->state = w->state >= FWS_C ? FWS_U : FWS_INIT;  					}  				}  			} @@ -1314,7 +1354,7 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info)  	struct rt6_info **rtp;  #if RT6_DEBUG >= 2 -	if (rt->dst.obsolete>0) { +	if (rt->dst.obsolete > 0) {  		WARN_ON(fn != NULL);  		return -ENOENT;  	} @@ -1334,7 +1374,7 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info)  			pn = pn->parent;  		}  #endif -		fib6_prune_clones(info->nl_net, pn, rt); +		fib6_prune_clones(info->nl_net, pn);  	}  	/* @@ -1418,7 +1458,7 @@ static int fib6_walk_continue(struct fib6_walker_t *w)  				if (w->skip) {  					w->skip--; -					continue; +					goto skip;  				}  				err = w->func(w); @@ -1428,6 +1468,7 @@ static int fib6_walk_continue(struct fib6_walker_t *w)  				w->count++;  				continue;  			} +skip:  			w->state = FWS_U;  		case FWS_U:  			if (fn == w->root) @@ -1529,27 +1570,8 @@ static void fib6_clean_tree(struct net *net, struct fib6_node *root,  	fib6_walk(&c.w);  } -void fib6_clean_all_ro(struct net *net, int (*func)(struct rt6_info *, void *arg), -		    int prune, void *arg) -{ -	struct fib6_table *table; -	struct hlist_head *head; -	unsigned int h; - -	rcu_read_lock(); -	for (h = 0; h < FIB6_TABLE_HASHSZ; h++) { -		head = &net->ipv6.fib_table_hash[h]; -		hlist_for_each_entry_rcu(table, head, tb6_hlist) { -			read_lock_bh(&table->tb6_lock); -			fib6_clean_tree(net, &table->tb6_root, -					func, prune, arg); -			read_unlock_bh(&table->tb6_lock); -		} -	} -	rcu_read_unlock(); -}  void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg), -		    int prune, void *arg) +		    void *arg)  {  	struct fib6_table *table;  	struct hlist_head *head; @@ -1561,7 +1583,7 @@ void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg),  		hlist_for_each_entry_rcu(table, head, tb6_hlist) {  			write_lock_bh(&table->tb6_lock);  			fib6_clean_tree(net, &table->tb6_root, -					func, prune, arg); +					func, 0, arg);  			write_unlock_bh(&table->tb6_lock);  		}  	} @@ -1578,10 +1600,9 @@ static int fib6_prune_clone(struct rt6_info *rt, void *arg)  	return 0;  } -static void fib6_prune_clones(struct net *net, struct fib6_node *fn, -			      struct rt6_info *rt) +static void fib6_prune_clones(struct net *net, struct fib6_node *fn)  { -	fib6_clean_tree(net, fn, fib6_prune_clone, 1, rt); +	fib6_clean_tree(net, fn, fib6_prune_clone, 1, NULL);  }  /* @@ -1655,7 +1676,7 @@ void fib6_run_gc(unsigned long expires, struct net *net, bool force)  	gc_args.more = icmp6_dst_gc(); -	fib6_clean_all(net, fib6_age, 0, NULL); +	fib6_clean_all(net, fib6_age, NULL);  	now = jiffies;  	net->ipv6.ip6_rt_last_gc = now; @@ -1726,7 +1747,7 @@ out_rt6_stats:  	kfree(net->ipv6.rt6_stats);  out_timer:  	return -ENOMEM; - } +}  static void fib6_net_exit(struct net *net)  { @@ -1782,3 +1803,189 @@ void fib6_gc_cleanup(void)  	unregister_pernet_subsys(&fib6_net_ops);  	kmem_cache_destroy(fib6_node_kmem);  } + +#ifdef CONFIG_PROC_FS + +struct ipv6_route_iter { +	struct seq_net_private p; +	struct fib6_walker_t w; +	loff_t skip; +	struct fib6_table *tbl; +	__u32 sernum; +}; + +static int ipv6_route_seq_show(struct seq_file *seq, void *v) +{ +	struct rt6_info *rt = v; +	struct ipv6_route_iter *iter = seq->private; + +	seq_printf(seq, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen); + +#ifdef CONFIG_IPV6_SUBTREES +	seq_printf(seq, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen); +#else +	seq_puts(seq, "00000000000000000000000000000000 00 "); +#endif +	if (rt->rt6i_flags & RTF_GATEWAY) +		seq_printf(seq, "%pi6", &rt->rt6i_gateway); +	else +		seq_puts(seq, "00000000000000000000000000000000"); + +	seq_printf(seq, " %08x %08x %08x %08x %8s\n", +		   rt->rt6i_metric, atomic_read(&rt->dst.__refcnt), +		   rt->dst.__use, rt->rt6i_flags, +		   rt->dst.dev ? rt->dst.dev->name : ""); +	iter->w.leaf = NULL; +	return 0; +} + +static int ipv6_route_yield(struct fib6_walker_t *w) +{ +	struct ipv6_route_iter *iter = w->args; + +	if (!iter->skip) +		return 1; + +	do { +		iter->w.leaf = iter->w.leaf->dst.rt6_next; +		iter->skip--; +		if (!iter->skip && iter->w.leaf) +			return 1; +	} while (iter->w.leaf); + +	return 0; +} + +static void ipv6_route_seq_setup_walk(struct ipv6_route_iter *iter) +{ +	memset(&iter->w, 0, sizeof(iter->w)); +	iter->w.func = ipv6_route_yield; +	iter->w.root = &iter->tbl->tb6_root; +	iter->w.state = FWS_INIT; +	iter->w.node = iter->w.root; +	iter->w.args = iter; +	iter->sernum = iter->w.root->fn_sernum; +	INIT_LIST_HEAD(&iter->w.lh); +	fib6_walker_link(&iter->w); +} + +static struct fib6_table *ipv6_route_seq_next_table(struct fib6_table *tbl, +						    struct net *net) +{ +	unsigned int h; +	struct hlist_node *node; + +	if (tbl) { +		h = (tbl->tb6_id & (FIB6_TABLE_HASHSZ - 1)) + 1; +		node = rcu_dereference_bh(hlist_next_rcu(&tbl->tb6_hlist)); +	} else { +		h = 0; +		node = NULL; +	} + +	while (!node && h < FIB6_TABLE_HASHSZ) { +		node = rcu_dereference_bh( +			hlist_first_rcu(&net->ipv6.fib_table_hash[h++])); +	} +	return hlist_entry_safe(node, struct fib6_table, tb6_hlist); +} + +static void ipv6_route_check_sernum(struct ipv6_route_iter *iter) +{ +	if (iter->sernum != iter->w.root->fn_sernum) { +		iter->sernum = iter->w.root->fn_sernum; +		iter->w.state = FWS_INIT; +		iter->w.node = iter->w.root; +		WARN_ON(iter->w.skip); +		iter->w.skip = iter->w.count; +	} +} + +static void *ipv6_route_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ +	int r; +	struct rt6_info *n; +	struct net *net = seq_file_net(seq); +	struct ipv6_route_iter *iter = seq->private; + +	if (!v) +		goto iter_table; + +	n = ((struct rt6_info *)v)->dst.rt6_next; +	if (n) { +		++*pos; +		return n; +	} + +iter_table: +	ipv6_route_check_sernum(iter); +	read_lock(&iter->tbl->tb6_lock); +	r = fib6_walk_continue(&iter->w); +	read_unlock(&iter->tbl->tb6_lock); +	if (r > 0) { +		if (v) +			++*pos; +		return iter->w.leaf; +	} else if (r < 0) { +		fib6_walker_unlink(&iter->w); +		return NULL; +	} +	fib6_walker_unlink(&iter->w); + +	iter->tbl = ipv6_route_seq_next_table(iter->tbl, net); +	if (!iter->tbl) +		return NULL; + +	ipv6_route_seq_setup_walk(iter); +	goto iter_table; +} + +static void *ipv6_route_seq_start(struct seq_file *seq, loff_t *pos) +	__acquires(RCU_BH) +{ +	struct net *net = seq_file_net(seq); +	struct ipv6_route_iter *iter = seq->private; + +	rcu_read_lock_bh(); +	iter->tbl = ipv6_route_seq_next_table(NULL, net); +	iter->skip = *pos; + +	if (iter->tbl) { +		ipv6_route_seq_setup_walk(iter); +		return ipv6_route_seq_next(seq, NULL, pos); +	} else { +		return NULL; +	} +} + +static bool ipv6_route_iter_active(struct ipv6_route_iter *iter) +{ +	struct fib6_walker_t *w = &iter->w; +	return w->node && !(w->state == FWS_U && w->node == w->root); +} + +static void ipv6_route_seq_stop(struct seq_file *seq, void *v) +	__releases(RCU_BH) +{ +	struct ipv6_route_iter *iter = seq->private; + +	if (ipv6_route_iter_active(iter)) +		fib6_walker_unlink(&iter->w); + +	rcu_read_unlock_bh(); +} + +static const struct seq_operations ipv6_route_seq_ops = { +	.start	= ipv6_route_seq_start, +	.next	= ipv6_route_seq_next, +	.stop	= ipv6_route_seq_stop, +	.show	= ipv6_route_seq_show +}; + +int ipv6_route_open(struct inode *inode, struct file *file) +{ +	return seq_open_net(inode, file, &ipv6_route_seq_ops, +			    sizeof(struct ipv6_route_iter)); +} + +#endif /* CONFIG_PROC_FS */ diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 46e88433ec7..4052694c6f2 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -15,9 +15,7 @@  #include <linux/socket.h>  #include <linux/net.h>  #include <linux/netdevice.h> -#include <linux/if_arp.h>  #include <linux/in6.h> -#include <linux/route.h>  #include <linux/proc_fs.h>  #include <linux/seq_file.h>  #include <linux/slab.h> @@ -28,12 +26,7 @@  #include <net/sock.h>  #include <net/ipv6.h> -#include <net/ndisc.h> -#include <net/protocol.h> -#include <net/ip6_route.h> -#include <net/addrconf.h>  #include <net/rawv6.h> -#include <net/icmp.h>  #include <net/transp_v6.h>  #include <asm/uaccess.h> @@ -41,7 +34,7 @@  #define FL_MIN_LINGER	6	/* Minimal linger. It is set to 6sec specified  				   in old IPv6 RFC. Well, it was reasonable value.  				 */ -#define FL_MAX_LINGER	60	/* Maximal linger timeout */ +#define FL_MAX_LINGER	150	/* Maximal linger timeout */  /* FL hash table */ @@ -210,7 +203,7 @@ static struct ip6_flowlabel *fl_intern(struct net *net,  	spin_lock_bh(&ip6_fl_lock);  	if (label == 0) {  		for (;;) { -			fl->label = htonl(net_random())&IPV6_FLOWLABEL_MASK; +			fl->label = htonl(prandom_u32())&IPV6_FLOWLABEL_MASK;  			if (fl->label) {  				lfl = __fl_lookup(net, fl->label);  				if (lfl == NULL) @@ -345,6 +338,8 @@ static int fl6_renew(struct ip6_flowlabel *fl, unsigned long linger, unsigned lo  	expires = check_linger(expires);  	if (!expires)  		return -EPERM; + +	spin_lock_bh(&ip6_fl_lock);  	fl->lastuse = jiffies;  	if (time_before(fl->linger, linger))  		fl->linger = linger; @@ -352,6 +347,8 @@ static int fl6_renew(struct ip6_flowlabel *fl, unsigned long linger, unsigned lo  		expires = fl->linger;  	if (time_before(fl->expires, fl->lastuse + expires))  		fl->expires = fl->lastuse + expires; +	spin_unlock_bh(&ip6_fl_lock); +  	return 0;  } @@ -453,8 +450,10 @@ static int mem_check(struct sock *sk)  	if (room > FL_MAX_SIZE - FL_MAX_PER_SOCK)  		return 0; +	rcu_read_lock_bh();  	for_each_sk_fl_rcu(np, sfl)  		count++; +	rcu_read_unlock_bh();  	if (room <= 0 ||  	    ((count >= FL_MAX_PER_SOCK || @@ -465,34 +464,6 @@ static int mem_check(struct sock *sk)  	return 0;  } -static bool ipv6_hdr_cmp(struct ipv6_opt_hdr *h1, struct ipv6_opt_hdr *h2) -{ -	if (h1 == h2) -		return false; -	if (h1 == NULL || h2 == NULL) -		return true; -	if (h1->hdrlen != h2->hdrlen) -		return true; -	return memcmp(h1+1, h2+1, ((h1->hdrlen+1)<<3) - sizeof(*h1)); -} - -static bool ipv6_opt_cmp(struct ipv6_txoptions *o1, struct ipv6_txoptions *o2) -{ -	if (o1 == o2) -		return false; -	if (o1 == NULL || o2 == NULL) -		return true; -	if (o1->opt_nflen != o2->opt_nflen) -		return true; -	if (ipv6_hdr_cmp(o1->hopopt, o2->hopopt)) -		return true; -	if (ipv6_hdr_cmp(o1->dst0opt, o2->dst0opt)) -		return true; -	if (ipv6_hdr_cmp((struct ipv6_opt_hdr *)o1->srcrt, (struct ipv6_opt_hdr *)o2->srcrt)) -		return true; -	return false; -} -  static inline void fl_link(struct ipv6_pinfo *np, struct ipv6_fl_socklist *sfl,  		struct ip6_flowlabel *fl)  { @@ -503,6 +474,43 @@ static inline void fl_link(struct ipv6_pinfo *np, struct ipv6_fl_socklist *sfl,  	spin_unlock_bh(&ip6_sk_fl_lock);  } +int ipv6_flowlabel_opt_get(struct sock *sk, struct in6_flowlabel_req *freq, +			   int flags) +{ +	struct ipv6_pinfo *np = inet6_sk(sk); +	struct ipv6_fl_socklist *sfl; + +	if (flags & IPV6_FL_F_REMOTE) { +		freq->flr_label = np->rcv_flowinfo & IPV6_FLOWLABEL_MASK; +		return 0; +	} + +	if (np->repflow) { +		freq->flr_label = np->flow_label; +		return 0; +	} + +	rcu_read_lock_bh(); + +	for_each_sk_fl_rcu(np, sfl) { +		if (sfl->fl->label == (np->flow_label & IPV6_FLOWLABEL_MASK)) { +			spin_lock_bh(&ip6_fl_lock); +			freq->flr_label = sfl->fl->label; +			freq->flr_dst = sfl->fl->dst; +			freq->flr_share = sfl->fl->share; +			freq->flr_expires = (sfl->fl->expires - jiffies) / HZ; +			freq->flr_linger = sfl->fl->linger / HZ; + +			spin_unlock_bh(&ip6_fl_lock); +			rcu_read_unlock_bh(); +			return 0; +		} +	} +	rcu_read_unlock_bh(); + +	return -ENOENT; +} +  int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)  {  	int uninitialized_var(err); @@ -523,6 +531,15 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)  	switch (freq.flr_action) {  	case IPV6_FL_A_PUT: +		if (freq.flr_flags & IPV6_FL_F_REFLECT) { +			if (sk->sk_protocol != IPPROTO_TCP) +				return -ENOPROTOOPT; +			if (!np->repflow) +				return -ESRCH; +			np->flow_label = 0; +			np->repflow = 0; +			return 0; +		}  		spin_lock_bh(&ip6_sk_fl_lock);  		for (sflp = &np->ipv6_fl_list;  		     (sfl = rcu_dereference(*sflp))!=NULL; @@ -563,6 +580,20 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen)  		return -ESRCH;  	case IPV6_FL_A_GET: +		if (freq.flr_flags & IPV6_FL_F_REFLECT) { +			struct net *net = sock_net(sk); +			if (net->ipv6.sysctl.flowlabel_consistency) { +				net_info_ratelimited("Can not set IPV6_FL_F_REFLECT if flowlabel_consistency sysctl is enable\n"); +				return -EPERM; +			} + +			if (sk->sk_protocol != IPPROTO_TCP) +				return -ENOPROTOOPT; + +			np->repflow = 1; +			return 0; +		} +  		if (freq.flr_label & ~IPV6_FLOWLABEL_MASK)  			return -EINVAL; @@ -603,11 +634,6 @@ recheck:  				     uid_eq(fl1->owner.uid, fl->owner.uid)))  					goto release; -				err = -EINVAL; -				if (!ipv6_addr_equal(&fl1->dst, &fl->dst) || -				    ipv6_opt_cmp(fl1->opt, fl->opt)) -					goto release; -  				err = -ENOMEM;  				if (sfl1 == NULL)  					goto release; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 1ef1fa2b22a..3873181ed85 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -61,9 +61,6 @@ static bool log_ecn_error = true;  module_param(log_ecn_error, bool, 0644);  MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); -#define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK) -#define IPV6_TCLASS_SHIFT 20 -  #define HASH_SIZE_SHIFT  5  #define HASH_SIZE (1 << HASH_SIZE_SHIFT) @@ -75,6 +72,7 @@ struct ip6gre_net {  };  static struct rtnl_link_ops ip6gre_link_ops __read_mostly; +static struct rtnl_link_ops ip6gre_tap_ops __read_mostly;  static int ip6gre_tunnel_init(struct net_device *dev);  static void ip6gre_tunnel_setup(struct net_device *dev);  static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t); @@ -356,10 +354,10 @@ failed_free:  static void ip6gre_tunnel_uninit(struct net_device *dev)  { -	struct net *net = dev_net(dev); -	struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); +	struct ip6_tnl *t = netdev_priv(dev); +	struct ip6gre_net *ign = net_generic(t->net, ip6gre_net_id); -	ip6gre_tunnel_unlink(ign, netdev_priv(dev)); +	ip6gre_tunnel_unlink(ign, t);  	dev_put(dev);  } @@ -470,17 +468,7 @@ static int ip6gre_rcv(struct sk_buff *skb)  			goto drop;  		if (flags&GRE_CSUM) { -			switch (skb->ip_summed) { -			case CHECKSUM_COMPLETE: -				csum = csum_fold(skb->csum); -				if (!csum) -					break; -				/* fall through */ -			case CHECKSUM_NONE: -				skb->csum = 0; -				csum = __skb_checksum_complete(skb); -				skb->ip_summed = CHECKSUM_COMPLETE; -			} +			csum = skb_checksum_simple_validate(skb);  			offset += 4;  		}  		if (flags&GRE_KEY) { @@ -499,7 +487,7 @@ static int ip6gre_rcv(struct sk_buff *skb)  					  &ipv6h->saddr, &ipv6h->daddr, key,  					  gre_proto);  	if (tunnel) { -		struct pcpu_tstats *tstats; +		struct pcpu_sw_netstats *tstats;  		if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))  			goto drop; @@ -614,8 +602,8 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb,  			 int encap_limit,  			 __u32 *pmtu)  { -	struct net *net = dev_net(dev);  	struct ip6_tnl *tunnel = netdev_priv(dev); +	struct net *net = tunnel->net;  	struct net_device *tdev;    /* Device to other host */  	struct ipv6hdr  *ipv6h;     /* Our new IP header */  	unsigned int max_headroom = 0; /* The extra header space needed */ @@ -846,7 +834,7 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev)  	if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)  		fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK);  	if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL) -		fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_FLOWLABEL_MASK); +		fl6.flowlabel |= ip6_flowlabel(ipv6h);  	if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)  		fl6.flowi6_mark = skb->mark; @@ -976,12 +964,13 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)  		if (t->parms.o_flags&GRE_SEQ)  			addend += 4;  	} +	t->hlen = addend;  	if (p->flags & IP6_TNL_F_CAP_XMIT) {  		int strict = (ipv6_addr_type(&p->raddr) &  			      (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL)); -		struct rt6_info *rt = rt6_lookup(dev_net(dev), +		struct rt6_info *rt = rt6_lookup(t->net,  						 &p->raddr, &p->laddr,  						 p->link, strict); @@ -1002,8 +991,6 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu)  		}  		ip6_rt_put(rt);  	} - -	t->hlen = addend;  }  static int ip6gre_tnl_change(struct ip6_tnl *t, @@ -1067,13 +1054,12 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev,  	int err = 0;  	struct ip6_tnl_parm2 p;  	struct __ip6_tnl_parm p1; -	struct ip6_tnl *t; -	struct net *net = dev_net(dev); +	struct ip6_tnl *t = netdev_priv(dev); +	struct net *net = t->net;  	struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);  	switch (cmd) {  	case SIOCGETTUNNEL: -		t = NULL;  		if (dev == ign->fb_tunnel_dev) {  			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {  				err = -EFAULT; @@ -1081,9 +1067,9 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev,  			}  			ip6gre_tnl_parm_from_user(&p1, &p);  			t = ip6gre_tunnel_locate(net, &p1, 0); +			if (t == NULL) +				t = netdev_priv(dev);  		} -		if (t == NULL) -			t = netdev_priv(dev);  		memset(&p, 0, sizeof(p));  		ip6gre_tnl_parm_to_user(&p, &t->parms);  		if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) @@ -1246,13 +1232,13 @@ static void ip6gre_tunnel_setup(struct net_device *dev)  	dev->flags |= IFF_NOARP;  	dev->iflink = 0;  	dev->addr_len = sizeof(struct in6_addr); -	dev->features |= NETIF_F_NETNS_LOCAL;  	dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;  }  static int ip6gre_tunnel_init(struct net_device *dev)  {  	struct ip6_tnl *tunnel; +	int i;  	tunnel = netdev_priv(dev); @@ -1266,10 +1252,17 @@ static int ip6gre_tunnel_init(struct net_device *dev)  	if (ipv6_addr_any(&tunnel->parms.raddr))  		dev->header_ops = &ip6gre_header_ops; -	dev->tstats = alloc_percpu(struct pcpu_tstats); +	dev->tstats = alloc_percpu(struct pcpu_sw_netstats);  	if (!dev->tstats)  		return -ENOMEM; +	for_each_possible_cpu(i) { +		struct pcpu_sw_netstats *ip6gre_tunnel_stats; +		ip6gre_tunnel_stats = per_cpu_ptr(dev->tstats, i); +		u64_stats_init(&ip6gre_tunnel_stats->syncp); +	} + +  	return 0;  } @@ -1293,11 +1286,17 @@ static struct inet6_protocol ip6gre_protocol __read_mostly = {  	.flags       = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,  }; -static void ip6gre_destroy_tunnels(struct ip6gre_net *ign, -	struct list_head *head) +static void ip6gre_destroy_tunnels(struct net *net, struct list_head *head)  { +	struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); +	struct net_device *dev, *aux;  	int prio; +	for_each_netdev_safe(net, dev, aux) +		if (dev->rtnl_link_ops == &ip6gre_link_ops || +		    dev->rtnl_link_ops == &ip6gre_tap_ops) +			unregister_netdevice_queue(dev, head); +  	for (prio = 0; prio < 4; prio++) {  		int h;  		for (h = 0; h < HASH_SIZE; h++) { @@ -1306,7 +1305,12 @@ static void ip6gre_destroy_tunnels(struct ip6gre_net *ign,  			t = rtnl_dereference(ign->tunnels[prio][h]);  			while (t != NULL) { -				unregister_netdevice_queue(t->dev, head); +				/* If dev is in the same netns, it has already +				 * been added to the list by the previous loop. +				 */ +				if (!net_eq(dev_net(t->dev), net)) +					unregister_netdevice_queue(t->dev, +								   head);  				t = rtnl_dereference(t->next);  			}  		} @@ -1325,6 +1329,11 @@ static int __net_init ip6gre_init_net(struct net *net)  		goto err_alloc_dev;  	}  	dev_net_set(ign->fb_tunnel_dev, net); +	/* FB netdevice is special: we have one, and only one per netns. +	 * Allowing to move it to another netns is clearly unsafe. +	 */ +	ign->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL; +  	ip6gre_fb_tunnel_init(ign->fb_tunnel_dev);  	ign->fb_tunnel_dev->rtnl_link_ops = &ip6gre_link_ops; @@ -1345,12 +1354,10 @@ err_alloc_dev:  static void __net_exit ip6gre_exit_net(struct net *net)  { -	struct ip6gre_net *ign;  	LIST_HEAD(list); -	ign = net_generic(net, ip6gre_net_id);  	rtnl_lock(); -	ip6gre_destroy_tunnels(ign, &list); +	ip6gre_destroy_tunnels(net, &list);  	unregister_netdevice_many(&list);  	rtnl_unlock();  } @@ -1459,7 +1466,7 @@ static int ip6gre_tap_init(struct net_device *dev)  	ip6gre_tnl_link_config(tunnel, 1); -	dev->tstats = alloc_percpu(struct pcpu_tstats); +	dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);  	if (!dev->tstats)  		return -ENOMEM; @@ -1527,15 +1534,14 @@ out:  static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[],  			    struct nlattr *data[])  { -	struct ip6_tnl *t, *nt; -	struct net *net = dev_net(dev); +	struct ip6_tnl *t, *nt = netdev_priv(dev); +	struct net *net = nt->net;  	struct ip6gre_net *ign = net_generic(net, ip6gre_net_id);  	struct __ip6_tnl_parm p;  	if (dev == ign->fb_tunnel_dev)  		return -EINVAL; -	nt = netdev_priv(dev);  	ip6gre_netlink_parms(data, &p);  	t = ip6gre_tunnel_locate(net, &p, 0); @@ -1555,6 +1561,15 @@ static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[],  	return 0;  } +static void ip6gre_dellink(struct net_device *dev, struct list_head *head) +{ +	struct net *net = dev_net(dev); +	struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); + +	if (dev != ign->fb_tunnel_dev) +		unregister_netdevice_queue(dev, head); +} +  static size_t ip6gre_get_size(const struct net_device *dev)  {  	return @@ -1632,6 +1647,7 @@ static struct rtnl_link_ops ip6gre_link_ops __read_mostly = {  	.validate	= ip6gre_tunnel_validate,  	.newlink	= ip6gre_newlink,  	.changelink	= ip6gre_changelink, +	.dellink	= ip6gre_dellink,  	.get_size	= ip6gre_get_size,  	.fill_info	= ip6gre_fill_info,  }; diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 302d6fb1ff2..51d54dc376f 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -49,7 +49,7 @@  int ip6_rcv_finish(struct sk_buff *skb)  { -	if (sysctl_ip_early_demux && !skb_dst(skb)) { +	if (sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) {  		const struct inet6_protocol *ipprot;  		ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]); diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index d82de722810..65eda2a8af4 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -66,7 +66,6 @@ static int ipv6_gso_send_check(struct sk_buff *skb)  	__skb_pull(skb, sizeof(*ipv6h));  	err = -EPROTONOSUPPORT; -	rcu_read_lock();  	ops = rcu_dereference(inet6_offloads[  		ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr)]); @@ -74,7 +73,6 @@ static int ipv6_gso_send_check(struct sk_buff *skb)  		skb_reset_transport_header(skb);  		err = ops->callbacks.gso_send_check(skb);  	} -	rcu_read_unlock();  out:  	return err; @@ -91,59 +89,103 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,  	unsigned int unfrag_ip6hlen;  	u8 *prevhdr;  	int offset = 0; -	bool tunnel; +	bool encap, udpfrag; +	int nhoff;  	if (unlikely(skb_shinfo(skb)->gso_type &  		     ~(SKB_GSO_UDP |  		       SKB_GSO_DODGY |  		       SKB_GSO_TCP_ECN |  		       SKB_GSO_GRE | +		       SKB_GSO_GRE_CSUM | +		       SKB_GSO_IPIP | +		       SKB_GSO_SIT |  		       SKB_GSO_UDP_TUNNEL | +		       SKB_GSO_UDP_TUNNEL_CSUM |  		       SKB_GSO_MPLS |  		       SKB_GSO_TCPV6 |  		       0)))  		goto out; +	skb_reset_network_header(skb); +	nhoff = skb_network_header(skb) - skb_mac_header(skb);  	if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))  		goto out; -	tunnel = skb->encapsulation; +	encap = SKB_GSO_CB(skb)->encap_level > 0; +	if (encap) +		features = skb->dev->hw_enc_features & netif_skb_features(skb); +	SKB_GSO_CB(skb)->encap_level += sizeof(*ipv6h); +  	ipv6h = ipv6_hdr(skb);  	__skb_pull(skb, sizeof(*ipv6h));  	segs = ERR_PTR(-EPROTONOSUPPORT);  	proto = ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr); -	rcu_read_lock(); + +	if (skb->encapsulation && +	    skb_shinfo(skb)->gso_type & (SKB_GSO_SIT|SKB_GSO_IPIP)) +		udpfrag = proto == IPPROTO_UDP && encap; +	else +		udpfrag = proto == IPPROTO_UDP && !skb->encapsulation; +  	ops = rcu_dereference(inet6_offloads[proto]);  	if (likely(ops && ops->callbacks.gso_segment)) {  		skb_reset_transport_header(skb);  		segs = ops->callbacks.gso_segment(skb, features);  	} -	rcu_read_unlock();  	if (IS_ERR(segs))  		goto out;  	for (skb = segs; skb; skb = skb->next) { -		ipv6h = ipv6_hdr(skb); -		ipv6h->payload_len = htons(skb->len - skb->mac_len - -					   sizeof(*ipv6h)); -		if (!tunnel && proto == IPPROTO_UDP) { +		ipv6h = (struct ipv6hdr *)(skb_mac_header(skb) + nhoff); +		ipv6h->payload_len = htons(skb->len - nhoff - sizeof(*ipv6h)); +		skb->network_header = (u8 *)ipv6h - skb->head; + +		if (udpfrag) {  			unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr); -			fptr = (struct frag_hdr *)(skb_network_header(skb) + -				unfrag_ip6hlen); +			fptr = (struct frag_hdr *)((u8 *)ipv6h + unfrag_ip6hlen);  			fptr->frag_off = htons(offset);  			if (skb->next != NULL)  				fptr->frag_off |= htons(IP6_MF);  			offset += (ntohs(ipv6h->payload_len) -  				   sizeof(struct frag_hdr));  		} +		if (encap) +			skb_reset_inner_headers(skb);  	}  out:  	return segs;  } +/* Return the total length of all the extension hdrs, following the same + * logic in ipv6_gso_pull_exthdrs() when parsing ext-hdrs. + */ +static int ipv6_exthdrs_len(struct ipv6hdr *iph, +			    const struct net_offload **opps) +{ +	struct ipv6_opt_hdr *opth = (void *)iph; +	int len = 0, proto, optlen = sizeof(*iph); + +	proto = iph->nexthdr; +	for (;;) { +		if (proto != NEXTHDR_HOP) { +			*opps = rcu_dereference(inet6_offloads[proto]); +			if (unlikely(!(*opps))) +				break; +			if (!((*opps)->flags & INET6_PROTO_GSO_EXTHDR)) +				break; +		} +		opth = (void *)opth + optlen; +		optlen = ipv6_optlen(opth); +		len += optlen; +		proto = opth->nexthdr; +	} +	return len; +} +  static struct sk_buff **ipv6_gro_receive(struct sk_buff **head,  					 struct sk_buff *skb)  { @@ -154,9 +196,8 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head,  	unsigned int nlen;  	unsigned int hlen;  	unsigned int off; -	int flush = 1; +	u16 flush = 1;  	int proto; -	__wsum csum;  	off = skb_gro_offset(skb);  	hlen = off + sizeof(*iph); @@ -167,6 +208,7 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head,  			goto out;  	} +	skb_set_network_header(skb, off);  	skb_gro_pull(skb, sizeof(*iph));  	skb_set_transport_header(skb, skb_gro_offset(skb)); @@ -201,12 +243,16 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head,  		if (!NAPI_GRO_CB(p)->same_flow)  			continue; -		iph2 = ipv6_hdr(p); +		iph2 = (struct ipv6hdr *)(p->data + off);  		first_word = *(__be32 *)iph ^ *(__be32 *)iph2 ; -		/* All fields must match except length and Traffic Class. */ -		if (nlen != skb_network_header_len(p) || -		    (first_word & htonl(0xF00FFFFF)) || +		/* All fields must match except length and Traffic Class. +		 * XXX skbs on the gro_list have all been parsed and pulled +		 * already so we don't need to compare nlen +		 * (nlen != (sizeof(*iph2) + ipv6_exthdrs_len(iph2, &ops))) +		 * memcmp() alone below is suffcient, right? +		 */ +		 if ((first_word & htonl(0xF00FFFFF)) ||  		    memcmp(&iph->nexthdr, &iph2->nexthdr,  			   nlen - offsetof(struct ipv6hdr, nexthdr))) {  			NAPI_GRO_CB(p)->same_flow = 0; @@ -219,13 +265,10 @@ static struct sk_buff **ipv6_gro_receive(struct sk_buff **head,  	NAPI_GRO_CB(skb)->flush |= flush; -	csum = skb->csum; -	skb_postpull_rcsum(skb, iph, skb_network_header_len(skb)); +	skb_gro_postpull_rcsum(skb, iph, nlen);  	pp = ops->callbacks.gro_receive(head, skb); -	skb->csum = csum; -  out_unlock:  	rcu_read_unlock(); @@ -235,21 +278,21 @@ out:  	return pp;  } -static int ipv6_gro_complete(struct sk_buff *skb) +static int ipv6_gro_complete(struct sk_buff *skb, int nhoff)  {  	const struct net_offload *ops; -	struct ipv6hdr *iph = ipv6_hdr(skb); +	struct ipv6hdr *iph = (struct ipv6hdr *)(skb->data + nhoff);  	int err = -ENOSYS; -	iph->payload_len = htons(skb->len - skb_network_offset(skb) - -				 sizeof(*iph)); +	iph->payload_len = htons(skb->len - nhoff - sizeof(*iph));  	rcu_read_lock(); -	ops = rcu_dereference(inet6_offloads[NAPI_GRO_CB(skb)->proto]); + +	nhoff += sizeof(*iph) + ipv6_exthdrs_len(iph, &ops);  	if (WARN_ON(!ops || !ops->callbacks.gro_complete))  		goto out_unlock; -	err = ops->callbacks.gro_complete(skb); +	err = ops->callbacks.gro_complete(skb, nhoff);  out_unlock:  	rcu_read_unlock(); @@ -267,6 +310,13 @@ static struct packet_offload ipv6_packet_offload __read_mostly = {  	},  }; +static const struct net_offload sit_offload = { +	.callbacks = { +		.gso_send_check = ipv6_gso_send_check, +		.gso_segment	= ipv6_gso_segment, +	}, +}; +  static int __init ipv6_offload_init(void)  { @@ -278,6 +328,9 @@ static int __init ipv6_offload_init(void)  		pr_crit("%s: Cannot add EXTHDRS protocol offload\n", __func__);  	dev_add_offload(&ipv6_packet_offload); + +	inet_add_offload(&sit_offload, IPPROTO_IPV6); +  	return 0;  } diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index a54c45ce4a4..45702b8cd14 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -105,7 +105,7 @@ static int ip6_finish_output2(struct sk_buff *skb)  	}  	rcu_read_lock_bh(); -	nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr); +	nexthop = rt6_nexthop((struct rt6_info *)dst);  	neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);  	if (unlikely(!neigh))  		neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false); @@ -116,8 +116,8 @@ static int ip6_finish_output2(struct sk_buff *skb)  	}  	rcu_read_unlock_bh(); -	IP6_INC_STATS_BH(dev_net(dst->dev), -			 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); +	IP6_INC_STATS(dev_net(dst->dev), +		      ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);  	kfree_skb(skb);  	return -EINVAL;  } @@ -125,13 +125,14 @@ static int ip6_finish_output2(struct sk_buff *skb)  static int ip6_finish_output(struct sk_buff *skb)  {  	if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || -	    dst_allfrag(skb_dst(skb))) +	    dst_allfrag(skb_dst(skb)) || +	    (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))  		return ip6_fragment(skb, ip6_finish_output2);  	else  		return ip6_finish_output2(skb);  } -int ip6_output(struct sk_buff *skb) +int ip6_output(struct sock *sk, struct sk_buff *skb)  {  	struct net_device *dev = skb_dst(skb)->dev;  	struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); @@ -218,7 +219,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,  	skb->mark = sk->sk_mark;  	mtu = dst_mtu(dst); -	if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) { +	if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {  		IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),  			      IPSTATS_MIB_OUT, skb->len);  		return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, @@ -320,6 +321,45 @@ static inline int ip6_forward_finish(struct sk_buff *skb)  	return dst_output(skb);  } +static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst) +{ +	unsigned int mtu; +	struct inet6_dev *idev; + +	if (dst_metric_locked(dst, RTAX_MTU)) { +		mtu = dst_metric_raw(dst, RTAX_MTU); +		if (mtu) +			return mtu; +	} + +	mtu = IPV6_MIN_MTU; +	rcu_read_lock(); +	idev = __in6_dev_get(dst->dev); +	if (idev) +		mtu = idev->cnf.mtu6; +	rcu_read_unlock(); + +	return mtu; +} + +static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu) +{ +	if (skb->len <= mtu) +		return false; + +	/* ipv6 conntrack defrag sets max_frag_size + ignore_df */ +	if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu) +		return true; + +	if (skb->ignore_df) +		return false; + +	if (skb_is_gso(skb) && skb_gso_network_seglen(skb) <= mtu) +		return false; + +	return true; +} +  int ip6_forward(struct sk_buff *skb)  {  	struct dst_entry *dst = skb_dst(skb); @@ -331,17 +371,18 @@ int ip6_forward(struct sk_buff *skb)  	if (net->ipv6.devconf_all->forwarding == 0)  		goto error; +	if (skb->pkt_type != PACKET_HOST) +		goto drop; +  	if (skb_warn_if_lro(skb))  		goto drop;  	if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) { -		IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); +		IP6_INC_STATS_BH(net, ip6_dst_idev(dst), +				 IPSTATS_MIB_INDISCARDS);  		goto drop;  	} -	if (skb->pkt_type != PACKET_HOST) -		goto drop; -  	skb_forward_csum(skb);  	/* @@ -369,8 +410,8 @@ int ip6_forward(struct sk_buff *skb)  		/* Force OUTPUT device used as source address */  		skb->dev = dst->dev;  		icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0); -		IP6_INC_STATS_BH(net, -				 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS); +		IP6_INC_STATS_BH(net, ip6_dst_idev(dst), +				 IPSTATS_MIB_INHDRERRORS);  		kfree_skb(skb);  		return -ETIMEDOUT; @@ -383,14 +424,15 @@ int ip6_forward(struct sk_buff *skb)  		if (proxied > 0)  			return ip6_input(skb);  		else if (proxied < 0) { -			IP6_INC_STATS(net, ip6_dst_idev(dst), -				      IPSTATS_MIB_INDISCARDS); +			IP6_INC_STATS_BH(net, ip6_dst_idev(dst), +					 IPSTATS_MIB_INDISCARDS);  			goto drop;  		}  	}  	if (!xfrm6_route_forward(skb)) { -		IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS); +		IP6_INC_STATS_BH(net, ip6_dst_idev(dst), +				 IPSTATS_MIB_INDISCARDS);  		goto drop;  	}  	dst = skb_dst(skb); @@ -438,25 +480,25 @@ int ip6_forward(struct sk_buff *skb)  		}  	} -	mtu = dst_mtu(dst); +	mtu = ip6_dst_mtu_forward(dst);  	if (mtu < IPV6_MIN_MTU)  		mtu = IPV6_MIN_MTU; -	if ((!skb->local_df && skb->len > mtu && !skb_is_gso(skb)) || -	    (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)) { +	if (ip6_pkt_too_big(skb, mtu)) {  		/* Again, force OUTPUT device used as source address */  		skb->dev = dst->dev;  		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); -		IP6_INC_STATS_BH(net, -				 ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS); -		IP6_INC_STATS_BH(net, -				 ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS); +		IP6_INC_STATS_BH(net, ip6_dst_idev(dst), +				 IPSTATS_MIB_INTOOBIGERRORS); +		IP6_INC_STATS_BH(net, ip6_dst_idev(dst), +				 IPSTATS_MIB_FRAGFAILS);  		kfree_skb(skb);  		return -EMSGSIZE;  	}  	if (skb_cow(skb, dst->dev->hard_header_len)) { -		IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS); +		IP6_INC_STATS_BH(net, ip6_dst_idev(dst), +				 IPSTATS_MIB_OUTDISCARDS);  		goto drop;  	} @@ -492,12 +534,23 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)  	to->tc_index = from->tc_index;  #endif  	nf_copy(to, from); -#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) -	to->nf_trace = from->nf_trace; -#endif  	skb_copy_secmark(to, from);  } +static void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt) +{ +	static u32 ip6_idents_hashrnd __read_mostly; +	u32 hash, id; + +	net_get_random_once(&ip6_idents_hashrnd, sizeof(ip6_idents_hashrnd)); + +	hash = __ipv6_addr_jhash(&rt->rt6i_dst.addr, ip6_idents_hashrnd); +	hash = __ipv6_addr_jhash(&rt->rt6i_src.addr, hash); + +	id = ip_idents_reserve(hash, 1); +	fhdr->identification = htonl(id); +} +  int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))  {  	struct sk_buff *frag; @@ -520,7 +573,7 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))  	/* We must not fragment if the socket is set to force MTU discovery  	 * or if the skb it not generated by a local socket.  	 */ -	if (unlikely(!skb->local_df && skb->len > mtu) || +	if (unlikely(!skb->ignore_df && skb->len > mtu) ||  		     (IP6CB(skb)->frag_max_size &&  		      IP6CB(skb)->frag_max_size > mtu)) {  		if (skb->sk && dst_allfrag(skb_dst(skb))) @@ -874,7 +927,7 @@ static int ip6_dst_lookup_tail(struct sock *sk,  	 */  	rt = (struct rt6_info *) *dst;  	rcu_read_lock_bh(); -	n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt, &fl6->daddr)); +	n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt));  	err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;  	rcu_read_unlock_bh(); @@ -909,7 +962,7 @@ static int ip6_dst_lookup_tail(struct sock *sk,  out_err_release:  	if (err == -ENETUNREACH) -		IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES); +		IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);  	dst_release(*dst);  	*dst = NULL;  	return err; @@ -937,7 +990,6 @@ EXPORT_SYMBOL_GPL(ip6_dst_lookup);   *	@sk: socket which provides route info   *	@fl6: flow to lookup   *	@final_dst: final destination address for ipsec lookup - *	@can_sleep: we are in a sleepable context   *   *	This function performs a route lookup on the given flow.   * @@ -945,8 +997,7 @@ EXPORT_SYMBOL_GPL(ip6_dst_lookup);   *	error code.   */  struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, -				      const struct in6_addr *final_dst, -				      bool can_sleep) +				      const struct in6_addr *final_dst)  {  	struct dst_entry *dst = NULL;  	int err; @@ -956,8 +1007,6 @@ struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,  		return ERR_PTR(err);  	if (final_dst)  		fl6->daddr = *final_dst; -	if (can_sleep) -		fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;  	return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);  } @@ -968,7 +1017,6 @@ EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);   *	@sk: socket which provides the dst cache and route info   *	@fl6: flow to lookup   *	@final_dst: final destination address for ipsec lookup - *	@can_sleep: we are in a sleepable context   *   *	This function performs a route lookup on the given flow with the   *	possibility of using the cached route in the socket if it is valid. @@ -979,8 +1027,7 @@ EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);   *	error code.   */  struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, -					 const struct in6_addr *final_dst, -					 bool can_sleep) +					 const struct in6_addr *final_dst)  {  	struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);  	int err; @@ -992,8 +1039,6 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,  		return ERR_PTR(err);  	if (final_dst)  		fl6->daddr = *final_dst; -	if (can_sleep) -		fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;  	return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);  } @@ -1008,6 +1053,7 @@ static inline int ip6_ufo_append_data(struct sock *sk,  {  	struct sk_buff *skb; +	struct frag_hdr fhdr;  	int err;  	/* There is support for UDP large send offload by network @@ -1015,8 +1061,6 @@ static inline int ip6_ufo_append_data(struct sock *sk,  	 * udp datagram  	 */  	if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) { -		struct frag_hdr fhdr; -  		skb = sock_alloc_send_skb(sk,  			hh_len + fragheaderlen + transhdrlen + 20,  			(flags & MSG_DONTWAIT), &err); @@ -1036,20 +1080,24 @@ static inline int ip6_ufo_append_data(struct sock *sk,  		skb->transport_header = skb->network_header + fragheaderlen;  		skb->protocol = htons(ETH_P_IPV6); -		skb->ip_summed = CHECKSUM_PARTIAL;  		skb->csum = 0; -		/* Specify the length of each IPv6 datagram fragment. -		 * It has to be a multiple of 8. -		 */ -		skb_shinfo(skb)->gso_size = (mtu - fragheaderlen - -					     sizeof(struct frag_hdr)) & ~7; -		skb_shinfo(skb)->gso_type = SKB_GSO_UDP; -		ipv6_select_ident(&fhdr, rt); -		skb_shinfo(skb)->ip6_frag_id = fhdr.identification;  		__skb_queue_tail(&sk->sk_write_queue, skb); +	} else if (skb_is_gso(skb)) { +		goto append;  	} +	skb->ip_summed = CHECKSUM_PARTIAL; +	/* Specify the length of each IPv6 datagram fragment. +	 * It has to be a multiple of 8. +	 */ +	skb_shinfo(skb)->gso_size = (mtu - fragheaderlen - +				     sizeof(struct frag_hdr)) & ~7; +	skb_shinfo(skb)->gso_type = SKB_GSO_UDP; +	ipv6_select_ident(&fhdr, rt); +	skb_shinfo(skb)->ip6_frag_id = fhdr.identification; + +append:  	return skb_append_datato_frags(sk, skb, getfrag, from,  				       (length - transhdrlen));  } @@ -1071,21 +1119,19 @@ static void ip6_append_data_mtu(unsigned int *mtu,  				unsigned int fragheaderlen,  				struct sk_buff *skb,  				struct rt6_info *rt, -				bool pmtuprobe) +				unsigned int orig_mtu)  {  	if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {  		if (skb == NULL) {  			/* first fragment, reserve header_len */ -			*mtu = *mtu - rt->dst.header_len; +			*mtu = orig_mtu - rt->dst.header_len;  		} else {  			/*  			 * this fragment is not first, the headers  			 * space is regarded as data space.  			 */ -			*mtu = min(*mtu, pmtuprobe ? -				   rt->dst.dev->mtu : -				   dst_mtu(rt->dst.path)); +			*mtu = orig_mtu;  		}  		*maxfraglen = ((*mtu - fragheaderlen) & ~7)  			      + fragheaderlen - sizeof(struct frag_hdr); @@ -1102,7 +1148,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,  	struct ipv6_pinfo *np = inet6_sk(sk);  	struct inet_cork *cork;  	struct sk_buff *skb, *skb_prev = NULL; -	unsigned int maxfraglen, fragheaderlen, mtu; +	unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu;  	int exthdrlen;  	int dst_exthdrlen;  	int hh_len; @@ -1158,10 +1204,10 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,  		np->cork.hop_limit = hlimit;  		np->cork.tclass = tclass;  		if (rt->dst.flags & DST_XFRM_TUNNEL) -			mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ? +			mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?  			      rt->dst.dev->mtu : dst_mtu(&rt->dst);  		else -			mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ? +			mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?  			      rt->dst.dev->mtu : dst_mtu(rt->dst.path);  		if (np->frag_size < mtu) {  			if (np->frag_size) @@ -1184,16 +1230,43 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,  		dst_exthdrlen = 0;  		mtu = cork->fragsize;  	} +	orig_mtu = mtu;  	hh_len = LL_RESERVED_SPACE(rt->dst.dev);  	fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +  			(opt ? opt->opt_nflen : 0); -	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr); +	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - +		     sizeof(struct frag_hdr);  	if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) { -		if (cork->length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) { -			ipv6_local_error(sk, EMSGSIZE, fl6, mtu-exthdrlen); +		unsigned int maxnonfragsize, headersize; + +		headersize = sizeof(struct ipv6hdr) + +			     (opt ? opt->opt_flen + opt->opt_nflen : 0) + +			     (dst_allfrag(&rt->dst) ? +			      sizeof(struct frag_hdr) : 0) + +			     rt->rt6i_nfheader_len; + +		if (ip6_sk_ignore_df(sk)) +			maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN; +		else +			maxnonfragsize = mtu; + +		/* dontfrag active */ +		if ((cork->length + length > mtu - headersize) && dontfrag && +		    (sk->sk_protocol == IPPROTO_UDP || +		     sk->sk_protocol == IPPROTO_RAW)) { +			ipv6_local_rxpmtu(sk, fl6, mtu - headersize + +						   sizeof(struct ipv6hdr)); +			goto emsgsize; +		} + +		if (cork->length + length > maxnonfragsize - headersize) { +emsgsize: +			ipv6_local_error(sk, EMSGSIZE, fl6, +					 mtu - headersize + +					 sizeof(struct ipv6hdr));  			return -EMSGSIZE;  		}  	} @@ -1218,12 +1291,6 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,  	 * --yoshfuji  	 */ -	if ((length > mtu) && dontfrag && (sk->sk_protocol == IPPROTO_UDP || -					   sk->sk_protocol == IPPROTO_RAW)) { -		ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen); -		return -EMSGSIZE; -	} -  	skb = skb_peek_tail(&sk->sk_write_queue);  	cork->length += length;  	if (((length > mtu) || @@ -1263,8 +1330,7 @@ alloc_new_skb:  			if (skb == NULL || skb_prev == NULL)  				ip6_append_data_mtu(&mtu, &maxfraglen,  						    fragheaderlen, skb, rt, -						    np->pmtudisc == -						    IPV6_PMTUDISC_PROBE); +						    orig_mtu);  			skb_prev = skb; @@ -1492,8 +1558,7 @@ int ip6_push_pending_frames(struct sock *sk)  	}  	/* Allow local fragmentation. */ -	if (np->pmtudisc < IPV6_PMTUDISC_DO) -		skb->local_df = 1; +	skb->ignore_df = ip6_sk_ignore_df(sk);  	*final_dst = fl6->daddr;  	__skb_pull(skb, skb_network_header_len(skb)); @@ -1520,8 +1585,8 @@ int ip6_push_pending_frames(struct sock *sk)  	if (proto == IPPROTO_ICMPV6) {  		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); -		ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type); -		ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS); +		ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type); +		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);  	}  	err = ip6_local_out(skb); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 583b77e2f69..afa08245836 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -29,7 +29,6 @@  #include <linux/if.h>  #include <linux/in.h>  #include <linux/ip.h> -#include <linux/if_tunnel.h>  #include <linux/net.h>  #include <linux/in6.h>  #include <linux/netdevice.h> @@ -62,6 +61,7 @@  MODULE_AUTHOR("Ville Nuorvala");  MODULE_DESCRIPTION("IPv6 tunneling device");  MODULE_LICENSE("GPL"); +MODULE_ALIAS_RTNL_LINK("ip6tnl");  MODULE_ALIAS_NETDEV("ip6tnl0");  #ifdef IP6_TNL_DEBUG @@ -70,9 +70,6 @@ MODULE_ALIAS_NETDEV("ip6tnl0");  #define IP6_TNL_TRACE(x...) do {;} while(0)  #endif -#define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK) -#define IPV6_TCLASS_SHIFT 20 -  #define HASH_SIZE_SHIFT  5  #define HASH_SIZE (1 << HASH_SIZE_SHIFT) @@ -103,16 +100,26 @@ struct ip6_tnl_net {  static struct net_device_stats *ip6_get_stats(struct net_device *dev)  { -	struct pcpu_tstats sum = { 0 }; +	struct pcpu_sw_netstats tmp, sum = { 0 };  	int i;  	for_each_possible_cpu(i) { -		const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); - -		sum.rx_packets += tstats->rx_packets; -		sum.rx_bytes   += tstats->rx_bytes; -		sum.tx_packets += tstats->tx_packets; -		sum.tx_bytes   += tstats->tx_bytes; +		unsigned int start; +		const struct pcpu_sw_netstats *tstats = +						   per_cpu_ptr(dev->tstats, i); + +		do { +			start = u64_stats_fetch_begin_irq(&tstats->syncp); +			tmp.rx_packets = tstats->rx_packets; +			tmp.rx_bytes = tstats->rx_bytes; +			tmp.tx_packets = tstats->tx_packets; +			tmp.tx_bytes =  tstats->tx_bytes; +		} while (u64_stats_fetch_retry_irq(&tstats->syncp, start)); + +		sum.rx_packets += tmp.rx_packets; +		sum.rx_bytes   += tmp.rx_bytes; +		sum.tx_packets += tmp.tx_packets; +		sum.tx_bytes   += tmp.tx_bytes;  	}  	dev->stats.rx_packets = sum.rx_packets;  	dev->stats.rx_bytes   = sum.rx_bytes; @@ -785,7 +792,7 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol,  	if ((t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr,  					&ipv6h->daddr)) != NULL) { -		struct pcpu_tstats *tstats; +		struct pcpu_sw_netstats *tstats;  		if (t->parms.proto != ipproto && t->parms.proto != 0) {  			rcu_read_unlock(); @@ -824,8 +831,10 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol,  		}  		tstats = this_cpu_ptr(t->dev->tstats); +		u64_stats_update_begin(&tstats->syncp);  		tstats->rx_packets++;  		tstats->rx_bytes += skb->len; +		u64_stats_update_end(&tstats->syncp);  		netif_rx(skb); @@ -1131,7 +1140,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)  	if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)  		fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK);  	if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL) -		fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_FLOWLABEL_MASK); +		fl6.flowlabel |= ip6_flowlabel(ipv6h);  	if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)  		fl6.flowi6_mark = skb->mark; @@ -1332,8 +1341,8 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)  	int err = 0;  	struct ip6_tnl_parm p;  	struct __ip6_tnl_parm p1; -	struct ip6_tnl *t = NULL; -	struct net *net = dev_net(dev); +	struct ip6_tnl *t = netdev_priv(dev); +	struct net *net = t->net;  	struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);  	switch (cmd) { @@ -1345,11 +1354,11 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)  			}  			ip6_tnl_parm_from_user(&p1, &p);  			t = ip6_tnl_locate(net, &p1, 0); +			if (t == NULL) +				t = netdev_priv(dev);  		} else {  			memset(&p, 0, sizeof(p));  		} -		if (t == NULL) -			t = netdev_priv(dev);  		ip6_tnl_parm_to_user(&p, &t->parms);  		if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof (p))) {  			err = -EFAULT; @@ -1497,7 +1506,7 @@ ip6_tnl_dev_init_gen(struct net_device *dev)  	t->dev = dev;  	t->net = dev_net(dev); -	dev->tstats = alloc_percpu(struct pcpu_tstats); +	dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);  	if (!dev->tstats)  		return -ENOMEM;  	return 0; @@ -1549,7 +1558,7 @@ static int ip6_tnl_validate(struct nlattr *tb[], struct nlattr *data[])  {  	u8 proto; -	if (!data) +	if (!data || !data[IFLA_IPTUN_PROTO])  		return 0;  	proto = nla_get_u8(data[IFLA_IPTUN_PROTO]); @@ -1635,6 +1644,15 @@ static int ip6_tnl_changelink(struct net_device *dev, struct nlattr *tb[],  	return ip6_tnl_update(t, &p);  } +static void ip6_tnl_dellink(struct net_device *dev, struct list_head *head) +{ +	struct net *net = dev_net(dev); +	struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); + +	if (dev != ip6n->fb_tnl_dev) +		unregister_netdevice_queue(dev, head); +} +  static size_t ip6_tnl_get_size(const struct net_device *dev)  {  	return @@ -1699,6 +1717,7 @@ static struct rtnl_link_ops ip6_link_ops __read_mostly = {  	.validate	= ip6_tnl_validate,  	.newlink	= ip6_tnl_newlink,  	.changelink	= ip6_tnl_changelink, +	.dellink	= ip6_tnl_dellink,  	.get_size	= ip6_tnl_get_size,  	.fill_info	= ip6_tnl_fill_info,  }; @@ -1715,9 +1734,9 @@ static struct xfrm6_tunnel ip6ip6_handler __read_mostly = {  	.priority	=	1,  }; -static void __net_exit ip6_tnl_destroy_tunnels(struct ip6_tnl_net *ip6n) +static void __net_exit ip6_tnl_destroy_tunnels(struct net *net)  { -	struct net *net = dev_net(ip6n->fb_tnl_dev); +	struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);  	struct net_device *dev, *aux;  	int h;  	struct ip6_tnl *t; @@ -1785,10 +1804,8 @@ err_alloc_dev:  static void __net_exit ip6_tnl_exit_net(struct net *net)  { -	struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); -  	rtnl_lock(); -	ip6_tnl_destroy_tunnels(ip6n); +	ip6_tnl_destroy_tunnels(net);  	rtnl_unlock();  } diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c new file mode 100644 index 00000000000..9aaa6bb229e --- /dev/null +++ b/net/ipv6/ip6_vti.c @@ -0,0 +1,1160 @@ +/* + *	IPv6 virtual tunneling interface + * + *	Copyright (C) 2013 secunet Security Networks AG + * + *	Author: + *	Steffen Klassert <steffen.klassert@secunet.com> + * + *	Based on: + *	net/ipv6/ip6_tunnel.c + * + *	This program is free software; you can redistribute it and/or + *	modify it under the terms of the GNU General Public License + *	as published by the Free Software Foundation; either version + *	2 of the License, or (at your option) any later version. + */ + +#include <linux/module.h> +#include <linux/capability.h> +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/sockios.h> +#include <linux/icmp.h> +#include <linux/if.h> +#include <linux/in.h> +#include <linux/ip.h> +#include <linux/net.h> +#include <linux/in6.h> +#include <linux/netdevice.h> +#include <linux/if_arp.h> +#include <linux/icmpv6.h> +#include <linux/init.h> +#include <linux/route.h> +#include <linux/rtnetlink.h> +#include <linux/netfilter_ipv6.h> +#include <linux/slab.h> +#include <linux/hash.h> + +#include <linux/uaccess.h> +#include <linux/atomic.h> + +#include <net/icmp.h> +#include <net/ip.h> +#include <net/ip_tunnels.h> +#include <net/ipv6.h> +#include <net/ip6_route.h> +#include <net/addrconf.h> +#include <net/ip6_tunnel.h> +#include <net/xfrm.h> +#include <net/net_namespace.h> +#include <net/netns/generic.h> + +#define HASH_SIZE_SHIFT  5 +#define HASH_SIZE (1 << HASH_SIZE_SHIFT) + +static u32 HASH(const struct in6_addr *addr1, const struct in6_addr *addr2) +{ +	u32 hash = ipv6_addr_hash(addr1) ^ ipv6_addr_hash(addr2); + +	return hash_32(hash, HASH_SIZE_SHIFT); +} + +static int vti6_dev_init(struct net_device *dev); +static void vti6_dev_setup(struct net_device *dev); +static struct rtnl_link_ops vti6_link_ops __read_mostly; + +static int vti6_net_id __read_mostly; +struct vti6_net { +	/* the vti6 tunnel fallback device */ +	struct net_device *fb_tnl_dev; +	/* lists for storing tunnels in use */ +	struct ip6_tnl __rcu *tnls_r_l[HASH_SIZE]; +	struct ip6_tnl __rcu *tnls_wc[1]; +	struct ip6_tnl __rcu **tnls[2]; +}; + +#define for_each_vti6_tunnel_rcu(start) \ +	for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) + +/** + * vti6_tnl_lookup - fetch tunnel matching the end-point addresses + *   @net: network namespace + *   @remote: the address of the tunnel exit-point + *   @local: the address of the tunnel entry-point + * + * Return: + *   tunnel matching given end-points if found, + *   else fallback tunnel if its device is up, + *   else %NULL + **/ +static struct ip6_tnl * +vti6_tnl_lookup(struct net *net, const struct in6_addr *remote, +		const struct in6_addr *local) +{ +	unsigned int hash = HASH(remote, local); +	struct ip6_tnl *t; +	struct vti6_net *ip6n = net_generic(net, vti6_net_id); + +	for_each_vti6_tunnel_rcu(ip6n->tnls_r_l[hash]) { +		if (ipv6_addr_equal(local, &t->parms.laddr) && +		    ipv6_addr_equal(remote, &t->parms.raddr) && +		    (t->dev->flags & IFF_UP)) +			return t; +	} +	t = rcu_dereference(ip6n->tnls_wc[0]); +	if (t && (t->dev->flags & IFF_UP)) +		return t; + +	return NULL; +} + +/** + * vti6_tnl_bucket - get head of list matching given tunnel parameters + *   @p: parameters containing tunnel end-points + * + * Description: + *   vti6_tnl_bucket() returns the head of the list matching the + *   &struct in6_addr entries laddr and raddr in @p. + * + * Return: head of IPv6 tunnel list + **/ +static struct ip6_tnl __rcu ** +vti6_tnl_bucket(struct vti6_net *ip6n, const struct __ip6_tnl_parm *p) +{ +	const struct in6_addr *remote = &p->raddr; +	const struct in6_addr *local = &p->laddr; +	unsigned int h = 0; +	int prio = 0; + +	if (!ipv6_addr_any(remote) || !ipv6_addr_any(local)) { +		prio = 1; +		h = HASH(remote, local); +	} +	return &ip6n->tnls[prio][h]; +} + +static void +vti6_tnl_link(struct vti6_net *ip6n, struct ip6_tnl *t) +{ +	struct ip6_tnl __rcu **tp = vti6_tnl_bucket(ip6n, &t->parms); + +	rcu_assign_pointer(t->next , rtnl_dereference(*tp)); +	rcu_assign_pointer(*tp, t); +} + +static void +vti6_tnl_unlink(struct vti6_net *ip6n, struct ip6_tnl *t) +{ +	struct ip6_tnl __rcu **tp; +	struct ip6_tnl *iter; + +	for (tp = vti6_tnl_bucket(ip6n, &t->parms); +	     (iter = rtnl_dereference(*tp)) != NULL; +	     tp = &iter->next) { +		if (t == iter) { +			rcu_assign_pointer(*tp, t->next); +			break; +		} +	} +} + +static void vti6_dev_free(struct net_device *dev) +{ +	free_percpu(dev->tstats); +	free_netdev(dev); +} + +static int vti6_tnl_create2(struct net_device *dev) +{ +	struct ip6_tnl *t = netdev_priv(dev); +	struct net *net = dev_net(dev); +	struct vti6_net *ip6n = net_generic(net, vti6_net_id); +	int err; + +	err = vti6_dev_init(dev); +	if (err < 0) +		goto out; + +	err = register_netdevice(dev); +	if (err < 0) +		goto out; + +	strcpy(t->parms.name, dev->name); +	dev->rtnl_link_ops = &vti6_link_ops; + +	dev_hold(dev); +	vti6_tnl_link(ip6n, t); + +	return 0; + +out: +	return err; +} + +static struct ip6_tnl *vti6_tnl_create(struct net *net, struct __ip6_tnl_parm *p) +{ +	struct net_device *dev; +	struct ip6_tnl *t; +	char name[IFNAMSIZ]; +	int err; + +	if (p->name[0]) +		strlcpy(name, p->name, IFNAMSIZ); +	else +		sprintf(name, "ip6_vti%%d"); + +	dev = alloc_netdev(sizeof(*t), name, vti6_dev_setup); +	if (dev == NULL) +		goto failed; + +	dev_net_set(dev, net); + +	t = netdev_priv(dev); +	t->parms = *p; +	t->net = dev_net(dev); + +	err = vti6_tnl_create2(dev); +	if (err < 0) +		goto failed_free; + +	return t; + +failed_free: +	vti6_dev_free(dev); +failed: +	return NULL; +} + +/** + * vti6_locate - find or create tunnel matching given parameters + *   @net: network namespace + *   @p: tunnel parameters + *   @create: != 0 if allowed to create new tunnel if no match found + * + * Description: + *   vti6_locate() first tries to locate an existing tunnel + *   based on @parms. If this is unsuccessful, but @create is set a new + *   tunnel device is created and registered for use. + * + * Return: + *   matching tunnel or NULL + **/ +static struct ip6_tnl *vti6_locate(struct net *net, struct __ip6_tnl_parm *p, +				   int create) +{ +	const struct in6_addr *remote = &p->raddr; +	const struct in6_addr *local = &p->laddr; +	struct ip6_tnl __rcu **tp; +	struct ip6_tnl *t; +	struct vti6_net *ip6n = net_generic(net, vti6_net_id); + +	for (tp = vti6_tnl_bucket(ip6n, p); +	     (t = rtnl_dereference(*tp)) != NULL; +	     tp = &t->next) { +		if (ipv6_addr_equal(local, &t->parms.laddr) && +		    ipv6_addr_equal(remote, &t->parms.raddr)) +			return t; +	} +	if (!create) +		return NULL; +	return vti6_tnl_create(net, p); +} + +/** + * vti6_dev_uninit - tunnel device uninitializer + *   @dev: the device to be destroyed + * + * Description: + *   vti6_dev_uninit() removes tunnel from its list + **/ +static void vti6_dev_uninit(struct net_device *dev) +{ +	struct ip6_tnl *t = netdev_priv(dev); +	struct net *net = dev_net(dev); +	struct vti6_net *ip6n = net_generic(net, vti6_net_id); + +	if (dev == ip6n->fb_tnl_dev) +		RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL); +	else +		vti6_tnl_unlink(ip6n, t); +	dev_put(dev); +} + +static int vti6_rcv(struct sk_buff *skb) +{ +	struct ip6_tnl *t; +	const struct ipv6hdr *ipv6h = ipv6_hdr(skb); + +	rcu_read_lock(); +	if ((t = vti6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, +				 &ipv6h->daddr)) != NULL) { +		if (t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) { +			rcu_read_unlock(); +			goto discard; +		} + +		if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) { +			rcu_read_unlock(); +			return 0; +		} + +		if (!ip6_tnl_rcv_ctl(t, &ipv6h->daddr, &ipv6h->saddr)) { +			t->dev->stats.rx_dropped++; +			rcu_read_unlock(); +			goto discard; +		} + +		XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = t; +		skb->mark = be32_to_cpu(t->parms.i_key); + +		rcu_read_unlock(); + +		return xfrm6_rcv(skb); +	} +	rcu_read_unlock(); +	return -EINVAL; +discard: +	kfree_skb(skb); +	return 0; +} + +static int vti6_rcv_cb(struct sk_buff *skb, int err) +{ +	unsigned short family; +	struct net_device *dev; +	struct pcpu_sw_netstats *tstats; +	struct xfrm_state *x; +	struct ip6_tnl *t = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6; + +	if (!t) +		return 1; + +	dev = t->dev; + +	if (err) { +		dev->stats.rx_errors++; +		dev->stats.rx_dropped++; + +		return 0; +	} + +	x = xfrm_input_state(skb); +	family = x->inner_mode->afinfo->family; + +	if (!xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family)) +		return -EPERM; + +	skb_scrub_packet(skb, !net_eq(t->net, dev_net(skb->dev))); +	skb->dev = dev; + +	tstats = this_cpu_ptr(dev->tstats); +	u64_stats_update_begin(&tstats->syncp); +	tstats->rx_packets++; +	tstats->rx_bytes += skb->len; +	u64_stats_update_end(&tstats->syncp); + +	return 0; +} + +/** + * vti6_addr_conflict - compare packet addresses to tunnel's own + *   @t: the outgoing tunnel device + *   @hdr: IPv6 header from the incoming packet + * + * Description: + *   Avoid trivial tunneling loop by checking that tunnel exit-point + *   doesn't match source of incoming packet. + * + * Return: + *   1 if conflict, + *   0 else + **/ +static inline bool +vti6_addr_conflict(const struct ip6_tnl *t, const struct ipv6hdr *hdr) +{ +	return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr); +} + +static bool vti6_state_check(const struct xfrm_state *x, +			     const struct in6_addr *dst, +			     const struct in6_addr *src) +{ +	xfrm_address_t *daddr = (xfrm_address_t *)dst; +	xfrm_address_t *saddr = (xfrm_address_t *)src; + +	/* if there is no transform then this tunnel is not functional. +	 * Or if the xfrm is not mode tunnel. +	 */ +	if (!x || x->props.mode != XFRM_MODE_TUNNEL || +	    x->props.family != AF_INET6) +		return false; + +	if (ipv6_addr_any(dst)) +		return xfrm_addr_equal(saddr, &x->props.saddr, AF_INET6); + +	if (!xfrm_state_addr_check(x, daddr, saddr, AF_INET6)) +		return false; + +	return true; +} + +/** + * vti6_xmit - send a packet + *   @skb: the outgoing socket buffer + *   @dev: the outgoing tunnel device + *   @fl: the flow informations for the xfrm_lookup + **/ +static int +vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) +{ +	struct ip6_tnl *t = netdev_priv(dev); +	struct net_device_stats *stats = &t->dev->stats; +	struct dst_entry *dst = skb_dst(skb); +	struct net_device *tdev; +	int err = -1; + +	if (!dst) +		goto tx_err_link_failure; + +	dst_hold(dst); +	dst = xfrm_lookup(t->net, dst, fl, NULL, 0); +	if (IS_ERR(dst)) { +		err = PTR_ERR(dst); +		dst = NULL; +		goto tx_err_link_failure; +	} + +	if (!vti6_state_check(dst->xfrm, &t->parms.raddr, &t->parms.laddr)) +		goto tx_err_link_failure; + +	tdev = dst->dev; + +	if (tdev == dev) { +		stats->collisions++; +		net_warn_ratelimited("%s: Local routing loop detected!\n", +				     t->parms.name); +		goto tx_err_dst_release; +	} + +	skb_scrub_packet(skb, !net_eq(t->net, dev_net(dev))); +	skb_dst_set(skb, dst); +	skb->dev = skb_dst(skb)->dev; + +	err = dst_output(skb); +	if (net_xmit_eval(err) == 0) { +		struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats); + +		u64_stats_update_begin(&tstats->syncp); +		tstats->tx_bytes += skb->len; +		tstats->tx_packets++; +		u64_stats_update_end(&tstats->syncp); +	} else { +		stats->tx_errors++; +		stats->tx_aborted_errors++; +	} + +	return 0; +tx_err_link_failure: +	stats->tx_carrier_errors++; +	dst_link_failure(skb); +tx_err_dst_release: +	dst_release(dst); +	return err; +} + +static netdev_tx_t +vti6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) +{ +	struct ip6_tnl *t = netdev_priv(dev); +	struct net_device_stats *stats = &t->dev->stats; +	struct ipv6hdr *ipv6h; +	struct flowi fl; +	int ret; + +	memset(&fl, 0, sizeof(fl)); +	skb->mark = be32_to_cpu(t->parms.o_key); + +	switch (skb->protocol) { +	case htons(ETH_P_IPV6): +		ipv6h = ipv6_hdr(skb); + +		if ((t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) || +		    !ip6_tnl_xmit_ctl(t) || vti6_addr_conflict(t, ipv6h)) +			goto tx_err; + +		xfrm_decode_session(skb, &fl, AF_INET6); +		memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); +		break; +	case htons(ETH_P_IP): +		xfrm_decode_session(skb, &fl, AF_INET); +		memset(IPCB(skb), 0, sizeof(*IPCB(skb))); +		break; +	default: +		goto tx_err; +	} + +	ret = vti6_xmit(skb, dev, &fl); +	if (ret < 0) +		goto tx_err; + +	return NETDEV_TX_OK; + +tx_err: +	stats->tx_errors++; +	stats->tx_dropped++; +	kfree_skb(skb); +	return NETDEV_TX_OK; +} + +static int vti6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, +		    u8 type, u8 code, int offset, __be32 info) +{ +	__be32 spi; +	__u32 mark; +	struct xfrm_state *x; +	struct ip6_tnl *t; +	struct ip_esp_hdr *esph; +	struct ip_auth_hdr *ah; +	struct ip_comp_hdr *ipch; +	struct net *net = dev_net(skb->dev); +	const struct ipv6hdr *iph = (const struct ipv6hdr *)skb->data; +	int protocol = iph->nexthdr; + +	t = vti6_tnl_lookup(dev_net(skb->dev), &iph->daddr, &iph->saddr); +	if (!t) +		return -1; + +	mark = be32_to_cpu(t->parms.o_key); + +	switch (protocol) { +	case IPPROTO_ESP: +		esph = (struct ip_esp_hdr *)(skb->data + offset); +		spi = esph->spi; +		break; +	case IPPROTO_AH: +		ah = (struct ip_auth_hdr *)(skb->data + offset); +		spi = ah->spi; +		break; +	case IPPROTO_COMP: +		ipch = (struct ip_comp_hdr *)(skb->data + offset); +		spi = htonl(ntohs(ipch->cpi)); +		break; +	default: +		return 0; +	} + +	if (type != ICMPV6_PKT_TOOBIG && +	    type != NDISC_REDIRECT) +		return 0; + +	x = xfrm_state_lookup(net, mark, (const xfrm_address_t *)&iph->daddr, +			      spi, protocol, AF_INET6); +	if (!x) +		return 0; + +	if (type == NDISC_REDIRECT) +		ip6_redirect(skb, net, skb->dev->ifindex, 0); +	else +		ip6_update_pmtu(skb, net, info, 0, 0); +	xfrm_state_put(x); + +	return 0; +} + +static void vti6_link_config(struct ip6_tnl *t) +{ +	struct net_device *dev = t->dev; +	struct __ip6_tnl_parm *p = &t->parms; + +	memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr)); +	memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr)); + +	p->flags &= ~(IP6_TNL_F_CAP_XMIT | IP6_TNL_F_CAP_RCV | +		      IP6_TNL_F_CAP_PER_PACKET); +	p->flags |= ip6_tnl_get_cap(t, &p->laddr, &p->raddr); + +	if (p->flags & IP6_TNL_F_CAP_XMIT && p->flags & IP6_TNL_F_CAP_RCV) +		dev->flags |= IFF_POINTOPOINT; +	else +		dev->flags &= ~IFF_POINTOPOINT; + +	dev->iflink = p->link; +} + +/** + * vti6_tnl_change - update the tunnel parameters + *   @t: tunnel to be changed + *   @p: tunnel configuration parameters + * + * Description: + *   vti6_tnl_change() updates the tunnel parameters + **/ +static int +vti6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p) +{ +	t->parms.laddr = p->laddr; +	t->parms.raddr = p->raddr; +	t->parms.link = p->link; +	t->parms.i_key = p->i_key; +	t->parms.o_key = p->o_key; +	t->parms.proto = p->proto; +	ip6_tnl_dst_reset(t); +	vti6_link_config(t); +	return 0; +} + +static int vti6_update(struct ip6_tnl *t, struct __ip6_tnl_parm *p) +{ +	struct net *net = dev_net(t->dev); +	struct vti6_net *ip6n = net_generic(net, vti6_net_id); +	int err; + +	vti6_tnl_unlink(ip6n, t); +	synchronize_net(); +	err = vti6_tnl_change(t, p); +	vti6_tnl_link(ip6n, t); +	netdev_state_change(t->dev); +	return err; +} + +static void +vti6_parm_from_user(struct __ip6_tnl_parm *p, const struct ip6_tnl_parm2 *u) +{ +	p->laddr = u->laddr; +	p->raddr = u->raddr; +	p->link = u->link; +	p->i_key = u->i_key; +	p->o_key = u->o_key; +	p->proto = u->proto; + +	memcpy(p->name, u->name, sizeof(u->name)); +} + +static void +vti6_parm_to_user(struct ip6_tnl_parm2 *u, const struct __ip6_tnl_parm *p) +{ +	u->laddr = p->laddr; +	u->raddr = p->raddr; +	u->link = p->link; +	u->i_key = p->i_key; +	u->o_key = p->o_key; +	u->proto = p->proto; + +	memcpy(u->name, p->name, sizeof(u->name)); +} + +/** + * vti6_tnl_ioctl - configure vti6 tunnels from userspace + *   @dev: virtual device associated with tunnel + *   @ifr: parameters passed from userspace + *   @cmd: command to be performed + * + * Description: + *   vti6_ioctl() is used for managing vti6 tunnels + *   from userspace. + * + *   The possible commands are the following: + *     %SIOCGETTUNNEL: get tunnel parameters for device + *     %SIOCADDTUNNEL: add tunnel matching given tunnel parameters + *     %SIOCCHGTUNNEL: change tunnel parameters to those given + *     %SIOCDELTUNNEL: delete tunnel + * + *   The fallback device "ip6_vti0", created during module + *   initialization, can be used for creating other tunnel devices. + * + * Return: + *   0 on success, + *   %-EFAULT if unable to copy data to or from userspace, + *   %-EPERM if current process hasn't %CAP_NET_ADMIN set + *   %-EINVAL if passed tunnel parameters are invalid, + *   %-EEXIST if changing a tunnel's parameters would cause a conflict + *   %-ENODEV if attempting to change or delete a nonexisting device + **/ +static int +vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +{ +	int err = 0; +	struct ip6_tnl_parm2 p; +	struct __ip6_tnl_parm p1; +	struct ip6_tnl *t = NULL; +	struct net *net = dev_net(dev); +	struct vti6_net *ip6n = net_generic(net, vti6_net_id); + +	switch (cmd) { +	case SIOCGETTUNNEL: +		if (dev == ip6n->fb_tnl_dev) { +			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { +				err = -EFAULT; +				break; +			} +			vti6_parm_from_user(&p1, &p); +			t = vti6_locate(net, &p1, 0); +		} else { +			memset(&p, 0, sizeof(p)); +		} +		if (t == NULL) +			t = netdev_priv(dev); +		vti6_parm_to_user(&p, &t->parms); +		if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) +			err = -EFAULT; +		break; +	case SIOCADDTUNNEL: +	case SIOCCHGTUNNEL: +		err = -EPERM; +		if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) +			break; +		err = -EFAULT; +		if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) +			break; +		err = -EINVAL; +		if (p.proto != IPPROTO_IPV6  && p.proto != 0) +			break; +		vti6_parm_from_user(&p1, &p); +		t = vti6_locate(net, &p1, cmd == SIOCADDTUNNEL); +		if (dev != ip6n->fb_tnl_dev && cmd == SIOCCHGTUNNEL) { +			if (t != NULL) { +				if (t->dev != dev) { +					err = -EEXIST; +					break; +				} +			} else +				t = netdev_priv(dev); + +			err = vti6_update(t, &p1); +		} +		if (t) { +			err = 0; +			vti6_parm_to_user(&p, &t->parms); +			if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) +				err = -EFAULT; + +		} else +			err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); +		break; +	case SIOCDELTUNNEL: +		err = -EPERM; +		if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) +			break; + +		if (dev == ip6n->fb_tnl_dev) { +			err = -EFAULT; +			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) +				break; +			err = -ENOENT; +			vti6_parm_from_user(&p1, &p); +			t = vti6_locate(net, &p1, 0); +			if (t == NULL) +				break; +			err = -EPERM; +			if (t->dev == ip6n->fb_tnl_dev) +				break; +			dev = t->dev; +		} +		err = 0; +		unregister_netdevice(dev); +		break; +	default: +		err = -EINVAL; +	} +	return err; +} + +/** + * vti6_tnl_change_mtu - change mtu manually for tunnel device + *   @dev: virtual device associated with tunnel + *   @new_mtu: the new mtu + * + * Return: + *   0 on success, + *   %-EINVAL if mtu too small + **/ +static int vti6_change_mtu(struct net_device *dev, int new_mtu) +{ +	if (new_mtu < IPV6_MIN_MTU) +		return -EINVAL; + +	dev->mtu = new_mtu; +	return 0; +} + +static const struct net_device_ops vti6_netdev_ops = { +	.ndo_uninit	= vti6_dev_uninit, +	.ndo_start_xmit = vti6_tnl_xmit, +	.ndo_do_ioctl	= vti6_ioctl, +	.ndo_change_mtu = vti6_change_mtu, +	.ndo_get_stats64 = ip_tunnel_get_stats64, +}; + +/** + * vti6_dev_setup - setup virtual tunnel device + *   @dev: virtual device associated with tunnel + * + * Description: + *   Initialize function pointers and device parameters + **/ +static void vti6_dev_setup(struct net_device *dev) +{ +	dev->netdev_ops = &vti6_netdev_ops; +	dev->destructor = vti6_dev_free; + +	dev->type = ARPHRD_TUNNEL6; +	dev->hard_header_len = LL_MAX_HEADER + sizeof(struct ipv6hdr); +	dev->mtu = ETH_DATA_LEN; +	dev->flags |= IFF_NOARP; +	dev->addr_len = sizeof(struct in6_addr); +	dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; +} + +/** + * vti6_dev_init_gen - general initializer for all tunnel devices + *   @dev: virtual device associated with tunnel + **/ +static inline int vti6_dev_init_gen(struct net_device *dev) +{ +	struct ip6_tnl *t = netdev_priv(dev); + +	t->dev = dev; +	t->net = dev_net(dev); +	dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); +	if (!dev->tstats) +		return -ENOMEM; +	return 0; +} + +/** + * vti6_dev_init - initializer for all non fallback tunnel devices + *   @dev: virtual device associated with tunnel + **/ +static int vti6_dev_init(struct net_device *dev) +{ +	struct ip6_tnl *t = netdev_priv(dev); +	int err = vti6_dev_init_gen(dev); + +	if (err) +		return err; +	vti6_link_config(t); +	return 0; +} + +/** + * vti6_fb_tnl_dev_init - initializer for fallback tunnel device + *   @dev: fallback device + * + * Return: 0 + **/ +static int __net_init vti6_fb_tnl_dev_init(struct net_device *dev) +{ +	struct ip6_tnl *t = netdev_priv(dev); +	struct net *net = dev_net(dev); +	struct vti6_net *ip6n = net_generic(net, vti6_net_id); +	int err = vti6_dev_init_gen(dev); + +	if (err) +		return err; + +	t->parms.proto = IPPROTO_IPV6; +	dev_hold(dev); + +	vti6_link_config(t); + +	rcu_assign_pointer(ip6n->tnls_wc[0], t); +	return 0; +} + +static int vti6_validate(struct nlattr *tb[], struct nlattr *data[]) +{ +	return 0; +} + +static void vti6_netlink_parms(struct nlattr *data[], +			       struct __ip6_tnl_parm *parms) +{ +	memset(parms, 0, sizeof(*parms)); + +	if (!data) +		return; + +	if (data[IFLA_VTI_LINK]) +		parms->link = nla_get_u32(data[IFLA_VTI_LINK]); + +	if (data[IFLA_VTI_LOCAL]) +		nla_memcpy(&parms->laddr, data[IFLA_VTI_LOCAL], +			   sizeof(struct in6_addr)); + +	if (data[IFLA_VTI_REMOTE]) +		nla_memcpy(&parms->raddr, data[IFLA_VTI_REMOTE], +			   sizeof(struct in6_addr)); + +	if (data[IFLA_VTI_IKEY]) +		parms->i_key = nla_get_be32(data[IFLA_VTI_IKEY]); + +	if (data[IFLA_VTI_OKEY]) +		parms->o_key = nla_get_be32(data[IFLA_VTI_OKEY]); +} + +static int vti6_newlink(struct net *src_net, struct net_device *dev, +			struct nlattr *tb[], struct nlattr *data[]) +{ +	struct net *net = dev_net(dev); +	struct ip6_tnl *nt; + +	nt = netdev_priv(dev); +	vti6_netlink_parms(data, &nt->parms); + +	nt->parms.proto = IPPROTO_IPV6; + +	if (vti6_locate(net, &nt->parms, 0)) +		return -EEXIST; + +	return vti6_tnl_create2(dev); +} + +static int vti6_changelink(struct net_device *dev, struct nlattr *tb[], +			   struct nlattr *data[]) +{ +	struct ip6_tnl *t; +	struct __ip6_tnl_parm p; +	struct net *net = dev_net(dev); +	struct vti6_net *ip6n = net_generic(net, vti6_net_id); + +	if (dev == ip6n->fb_tnl_dev) +		return -EINVAL; + +	vti6_netlink_parms(data, &p); + +	t = vti6_locate(net, &p, 0); + +	if (t) { +		if (t->dev != dev) +			return -EEXIST; +	} else +		t = netdev_priv(dev); + +	return vti6_update(t, &p); +} + +static size_t vti6_get_size(const struct net_device *dev) +{ +	return +		/* IFLA_VTI_LINK */ +		nla_total_size(4) + +		/* IFLA_VTI_LOCAL */ +		nla_total_size(sizeof(struct in6_addr)) + +		/* IFLA_VTI_REMOTE */ +		nla_total_size(sizeof(struct in6_addr)) + +		/* IFLA_VTI_IKEY */ +		nla_total_size(4) + +		/* IFLA_VTI_OKEY */ +		nla_total_size(4) + +		0; +} + +static int vti6_fill_info(struct sk_buff *skb, const struct net_device *dev) +{ +	struct ip6_tnl *tunnel = netdev_priv(dev); +	struct __ip6_tnl_parm *parm = &tunnel->parms; + +	if (nla_put_u32(skb, IFLA_VTI_LINK, parm->link) || +	    nla_put(skb, IFLA_VTI_LOCAL, sizeof(struct in6_addr), +		    &parm->laddr) || +	    nla_put(skb, IFLA_VTI_REMOTE, sizeof(struct in6_addr), +		    &parm->raddr) || +	    nla_put_be32(skb, IFLA_VTI_IKEY, parm->i_key) || +	    nla_put_be32(skb, IFLA_VTI_OKEY, parm->o_key)) +		goto nla_put_failure; +	return 0; + +nla_put_failure: +	return -EMSGSIZE; +} + +static const struct nla_policy vti6_policy[IFLA_VTI_MAX + 1] = { +	[IFLA_VTI_LINK]		= { .type = NLA_U32 }, +	[IFLA_VTI_LOCAL]	= { .len = sizeof(struct in6_addr) }, +	[IFLA_VTI_REMOTE]	= { .len = sizeof(struct in6_addr) }, +	[IFLA_VTI_IKEY]		= { .type = NLA_U32 }, +	[IFLA_VTI_OKEY]		= { .type = NLA_U32 }, +}; + +static struct rtnl_link_ops vti6_link_ops __read_mostly = { +	.kind		= "vti6", +	.maxtype	= IFLA_VTI_MAX, +	.policy		= vti6_policy, +	.priv_size	= sizeof(struct ip6_tnl), +	.setup		= vti6_dev_setup, +	.validate	= vti6_validate, +	.newlink	= vti6_newlink, +	.changelink	= vti6_changelink, +	.get_size	= vti6_get_size, +	.fill_info	= vti6_fill_info, +}; + +static void __net_exit vti6_destroy_tunnels(struct vti6_net *ip6n) +{ +	int h; +	struct ip6_tnl *t; +	LIST_HEAD(list); + +	for (h = 0; h < HASH_SIZE; h++) { +		t = rtnl_dereference(ip6n->tnls_r_l[h]); +		while (t != NULL) { +			unregister_netdevice_queue(t->dev, &list); +			t = rtnl_dereference(t->next); +		} +	} + +	t = rtnl_dereference(ip6n->tnls_wc[0]); +	unregister_netdevice_queue(t->dev, &list); +	unregister_netdevice_many(&list); +} + +static int __net_init vti6_init_net(struct net *net) +{ +	struct vti6_net *ip6n = net_generic(net, vti6_net_id); +	struct ip6_tnl *t = NULL; +	int err; + +	ip6n->tnls[0] = ip6n->tnls_wc; +	ip6n->tnls[1] = ip6n->tnls_r_l; + +	err = -ENOMEM; +	ip6n->fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6_vti0", +					vti6_dev_setup); + +	if (!ip6n->fb_tnl_dev) +		goto err_alloc_dev; +	dev_net_set(ip6n->fb_tnl_dev, net); + +	err = vti6_fb_tnl_dev_init(ip6n->fb_tnl_dev); +	if (err < 0) +		goto err_register; + +	err = register_netdev(ip6n->fb_tnl_dev); +	if (err < 0) +		goto err_register; + +	t = netdev_priv(ip6n->fb_tnl_dev); + +	strcpy(t->parms.name, ip6n->fb_tnl_dev->name); +	return 0; + +err_register: +	vti6_dev_free(ip6n->fb_tnl_dev); +err_alloc_dev: +	return err; +} + +static void __net_exit vti6_exit_net(struct net *net) +{ +	struct vti6_net *ip6n = net_generic(net, vti6_net_id); + +	rtnl_lock(); +	vti6_destroy_tunnels(ip6n); +	rtnl_unlock(); +} + +static struct pernet_operations vti6_net_ops = { +	.init = vti6_init_net, +	.exit = vti6_exit_net, +	.id   = &vti6_net_id, +	.size = sizeof(struct vti6_net), +}; + +static struct xfrm6_protocol vti_esp6_protocol __read_mostly = { +	.handler	=	vti6_rcv, +	.cb_handler	=	vti6_rcv_cb, +	.err_handler	=	vti6_err, +	.priority	=	100, +}; + +static struct xfrm6_protocol vti_ah6_protocol __read_mostly = { +	.handler	=	vti6_rcv, +	.cb_handler	=	vti6_rcv_cb, +	.err_handler	=	vti6_err, +	.priority	=	100, +}; + +static struct xfrm6_protocol vti_ipcomp6_protocol __read_mostly = { +	.handler	=	vti6_rcv, +	.cb_handler	=	vti6_rcv_cb, +	.err_handler	=	vti6_err, +	.priority	=	100, +}; + +/** + * vti6_tunnel_init - register protocol and reserve needed resources + * + * Return: 0 on success + **/ +static int __init vti6_tunnel_init(void) +{ +	int  err; + +	err = register_pernet_device(&vti6_net_ops); +	if (err < 0) +		goto out_pernet; + +	err = xfrm6_protocol_register(&vti_esp6_protocol, IPPROTO_ESP); +	if (err < 0) { +		pr_err("%s: can't register vti6 protocol\n", __func__); + +		goto out; +	} + +	err = xfrm6_protocol_register(&vti_ah6_protocol, IPPROTO_AH); +	if (err < 0) { +		xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP); +		pr_err("%s: can't register vti6 protocol\n", __func__); + +		goto out; +	} + +	err = xfrm6_protocol_register(&vti_ipcomp6_protocol, IPPROTO_COMP); +	if (err < 0) { +		xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH); +		xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP); +		pr_err("%s: can't register vti6 protocol\n", __func__); + +		goto out; +	} + +	err = rtnl_link_register(&vti6_link_ops); +	if (err < 0) +		goto rtnl_link_failed; + +	return 0; + +rtnl_link_failed: +	xfrm6_protocol_deregister(&vti_ipcomp6_protocol, IPPROTO_COMP); +	xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH); +	xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP); +out: +	unregister_pernet_device(&vti6_net_ops); +out_pernet: +	return err; +} + +/** + * vti6_tunnel_cleanup - free resources and unregister protocol + **/ +static void __exit vti6_tunnel_cleanup(void) +{ +	rtnl_link_unregister(&vti6_link_ops); +	if (xfrm6_protocol_deregister(&vti_ipcomp6_protocol, IPPROTO_COMP)) +		pr_info("%s: can't deregister protocol\n", __func__); +	if (xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH)) +		pr_info("%s: can't deregister protocol\n", __func__); +	if (xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP)) +		pr_info("%s: can't deregister protocol\n", __func__); + +	unregister_pernet_device(&vti6_net_ops); +} + +module_init(vti6_tunnel_init); +module_exit(vti6_tunnel_cleanup); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_RTNL_LINK("vti6"); +MODULE_ALIAS_NETDEV("ip6_vti0"); +MODULE_AUTHOR("Steffen Klassert"); +MODULE_DESCRIPTION("IPv6 virtual tunnel interface"); diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index f365310bfcc..8250474ab7d 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -141,9 +141,12 @@ static struct mr6_table *ip6mr_get_table(struct net *net, u32 id)  static int ip6mr_fib_lookup(struct net *net, struct flowi6 *flp6,  			    struct mr6_table **mrt)  { -	struct ip6mr_result res; -	struct fib_lookup_arg arg = { .result = &res, };  	int err; +	struct ip6mr_result res; +	struct fib_lookup_arg arg = { +		.result = &res, +		.flags = FIB_LOOKUP_NOREF, +	};  	err = fib_rules_lookup(net->ipv6.mr6_rules_ops,  			       flowi6_to_flowi(flp6), 0, &arg); @@ -697,7 +700,7 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb,  	struct mr6_table *mrt;  	struct flowi6 fl6 = {  		.flowi6_oif	= dev->ifindex, -		.flowi6_iif	= skb->skb_iif, +		.flowi6_iif	= skb->skb_iif ? : LOOPBACK_IFINDEX,  		.flowi6_mark	= skb->mark,  	};  	int err; @@ -1630,7 +1633,7 @@ struct sock *mroute6_socket(struct net *net, struct sk_buff *skb)  {  	struct mr6_table *mrt;  	struct flowi6 fl6 = { -		.flowi6_iif	= skb->skb_iif, +		.flowi6_iif	= skb->skb_iif ? : LOOPBACK_IFINDEX,  		.flowi6_oif	= skb->dev->ifindex,  		.flowi6_mark	= skb->mark,  	}; @@ -2346,13 +2349,14 @@ int ip6mr_get_route(struct net *net,  }  static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, -			     u32 portid, u32 seq, struct mfc6_cache *c, int cmd) +			     u32 portid, u32 seq, struct mfc6_cache *c, int cmd, +			     int flags)  {  	struct nlmsghdr *nlh;  	struct rtmsg *rtm;  	int err; -	nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), NLM_F_MULTI); +	nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags);  	if (nlh == NULL)  		return -EMSGSIZE; @@ -2420,7 +2424,7 @@ static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc,  	if (skb == NULL)  		goto errout; -	err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd); +	err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0);  	if (err < 0)  		goto errout; @@ -2459,7 +2463,8 @@ static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)  				if (ip6mr_fill_mroute(mrt, skb,  						      NETLINK_CB(cb->skb).portid,  						      cb->nlh->nlmsg_seq, -						      mfc, RTM_NEWROUTE) < 0) +						      mfc, RTM_NEWROUTE, +						      NLM_F_MULTI) < 0)  					goto done;  next_entry:  				e++; @@ -2473,7 +2478,8 @@ next_entry:  			if (ip6mr_fill_mroute(mrt, skb,  					      NETLINK_CB(cb->skb).portid,  					      cb->nlh->nlmsg_seq, -					      mfc, RTM_NEWROUTE) < 0) { +					      mfc, RTM_NEWROUTE, +					      NLM_F_MULTI) < 0) {  				spin_unlock_bh(&mfc_unres_lock);  				goto done;  			} diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 5636a912074..d1c793cffcb 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -16,8 +16,7 @@   * GNU General Public License for more details.   *   * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA + * along with this program; if not, see <http://www.gnu.org/licenses/>.   */  /*   * [Memo] @@ -54,7 +53,7 @@  #include <linux/icmpv6.h>  #include <linux/mutex.h> -static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, +static int ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,  				u8 type, u8 code, int offset, __be32 info)  {  	struct net *net = dev_net(skb->dev); @@ -64,22 +63,23 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,  		(struct ip_comp_hdr *)(skb->data + offset);  	struct xfrm_state *x; -	if (type != ICMPV6_DEST_UNREACH && -	    type != ICMPV6_PKT_TOOBIG && +	if (type != ICMPV6_PKT_TOOBIG &&  	    type != NDISC_REDIRECT) -		return; +		return 0;  	spi = htonl(ntohs(ipcomph->cpi));  	x = xfrm_state_lookup(net, skb->mark, (const xfrm_address_t *)&iph->daddr,  			      spi, IPPROTO_COMP, AF_INET6);  	if (!x) -		return; +		return 0;  	if (type == NDISC_REDIRECT)  		ip6_redirect(skb, net, skb->dev->ifindex, 0);  	else  		ip6_update_pmtu(skb, net, info, 0, 0);  	xfrm_state_put(x); + +	return 0;  }  static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x) @@ -176,6 +176,11 @@ out:  	return err;  } +static int ipcomp6_rcv_cb(struct sk_buff *skb, int err) +{ +	return 0; +} +  static const struct xfrm_type ipcomp6_type =  {  	.description	= "IPCOMP6", @@ -188,11 +193,12 @@ static const struct xfrm_type ipcomp6_type =  	.hdr_offset	= xfrm6_find_1stfragopt,  }; -static const struct inet6_protocol ipcomp6_protocol = +static struct xfrm6_protocol ipcomp6_protocol =  {  	.handler	= xfrm6_rcv, +	.cb_handler	= ipcomp6_rcv_cb,  	.err_handler	= ipcomp6_err, -	.flags		= INET6_PROTO_NOPOLICY, +	.priority	= 0,  };  static int __init ipcomp6_init(void) @@ -201,7 +207,7 @@ static int __init ipcomp6_init(void)  		pr_info("%s: can't add xfrm type\n", __func__);  		return -EAGAIN;  	} -	if (inet6_add_protocol(&ipcomp6_protocol, IPPROTO_COMP) < 0) { +	if (xfrm6_protocol_register(&ipcomp6_protocol, IPPROTO_COMP) < 0) {  		pr_info("%s: can't add protocol\n", __func__);  		xfrm_unregister_type(&ipcomp6_type, AF_INET6);  		return -EAGAIN; @@ -211,7 +217,7 @@ static int __init ipcomp6_init(void)  static void __exit ipcomp6_fini(void)  { -	if (inet6_del_protocol(&ipcomp6_protocol, IPPROTO_COMP) < 0) +	if (xfrm6_protocol_deregister(&ipcomp6_protocol, IPPROTO_COMP) < 0)  		pr_info("%s: can't remove protocol\n", __func__);  	if (xfrm_unregister_type(&ipcomp6_type, AF_INET6) < 0)  		pr_info("%s: can't remove xfrm type\n", __func__); diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index d1e2e8ef29c..edb58aff4ae 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -174,7 +174,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,  			}  			if (ipv6_only_sock(sk) || -			    !ipv6_addr_v4mapped(&np->daddr)) { +			    !ipv6_addr_v4mapped(&sk->sk_v6_daddr)) {  				retv = -EADDRNOTAVAIL;  				break;  			} @@ -722,7 +722,7 @@ done:  	case IPV6_MTU_DISCOVER:  		if (optlen < sizeof(int))  			goto e_inval; -		if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_PROBE) +		if (val < IPV6_PMTUDISC_DONT || val > IPV6_PMTUDISC_OMIT)  			goto e_inval;  		np->pmtudisc = val;  		retv = 0; @@ -1002,16 +1002,14 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,  		release_sock(sk);  		if (skb) { -			int err = ip6_datagram_recv_ctl(sk, &msg, skb); +			ip6_datagram_recv_ctl(sk, &msg, skb);  			kfree_skb(skb); -			if (err) -				return err;  		} else {  			if (np->rxopt.bits.rxinfo) {  				struct in6_pktinfo src_info;  				src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif :  					np->sticky_pktinfo.ipi6_ifindex; -				src_info.ipi6_addr = np->mcast_oif ? np->daddr : np->sticky_pktinfo.ipi6_addr; +				src_info.ipi6_addr = np->mcast_oif ? sk->sk_v6_daddr : np->sticky_pktinfo.ipi6_addr;  				put_cmsg(&msg, SOL_IPV6, IPV6_PKTINFO, sizeof(src_info), &src_info);  			}  			if (np->rxopt.bits.rxhlim) { @@ -1019,20 +1017,27 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,  				put_cmsg(&msg, SOL_IPV6, IPV6_HOPLIMIT, sizeof(hlim), &hlim);  			}  			if (np->rxopt.bits.rxtclass) { -				int tclass = np->rcv_tclass; +				int tclass = (int)ip6_tclass(np->rcv_flowinfo); +  				put_cmsg(&msg, SOL_IPV6, IPV6_TCLASS, sizeof(tclass), &tclass);  			}  			if (np->rxopt.bits.rxoinfo) {  				struct in6_pktinfo src_info;  				src_info.ipi6_ifindex = np->mcast_oif ? np->mcast_oif :  					np->sticky_pktinfo.ipi6_ifindex; -				src_info.ipi6_addr = np->mcast_oif ? np->daddr : np->sticky_pktinfo.ipi6_addr; +				src_info.ipi6_addr = np->mcast_oif ? sk->sk_v6_daddr : +								     np->sticky_pktinfo.ipi6_addr;  				put_cmsg(&msg, SOL_IPV6, IPV6_2292PKTINFO, sizeof(src_info), &src_info);  			}  			if (np->rxopt.bits.rxohlim) {  				int hlim = np->mcast_hops;  				put_cmsg(&msg, SOL_IPV6, IPV6_2292HOPLIMIT, sizeof(hlim), &hlim);  			} +			if (np->rxopt.bits.rxflow) { +				__be32 flowinfo = np->rcv_flowinfo; + +				put_cmsg(&msg, SOL_IPV6, IPV6_FLOWINFO, sizeof(flowinfo), &flowinfo); +			}  		}  		len -= msg.msg_controllen;  		return put_user(len, optlen); @@ -1211,6 +1216,37 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,  		val = np->sndflow;  		break; +	case IPV6_FLOWLABEL_MGR: +	{ +		struct in6_flowlabel_req freq; +		int flags; + +		if (len < sizeof(freq)) +			return -EINVAL; + +		if (copy_from_user(&freq, optval, sizeof(freq))) +			return -EFAULT; + +		if (freq.flr_action != IPV6_FL_A_GET) +			return -EINVAL; + +		len = sizeof(freq); +		flags = freq.flr_flags; + +		memset(&freq, 0, sizeof(freq)); + +		val = ipv6_flowlabel_opt_get(sk, &freq, flags); +		if (val < 0) +			return val; + +		if (put_user(len, optlen)) +			return -EFAULT; +		if (copy_to_user(optval, &freq, len)) +			return -EFAULT; + +		return 0; +	} +  	case IPV6_ADDR_PREFERENCES:  		val = 0; diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index d18f9f903db..617f0958e16 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -999,7 +999,7 @@ bool ipv6_chk_mcast_addr(struct net_device *dev, const struct in6_addr *group,  static void mld_gq_start_timer(struct inet6_dev *idev)  { -	unsigned long tv = net_random() % idev->mc_maxdelay; +	unsigned long tv = prandom_u32() % idev->mc_maxdelay;  	idev->mc_gq_running = 1;  	if (!mod_timer(&idev->mc_gq_timer, jiffies+tv+2)) @@ -1015,7 +1015,7 @@ static void mld_gq_stop_timer(struct inet6_dev *idev)  static void mld_ifc_start_timer(struct inet6_dev *idev, unsigned long delay)  { -	unsigned long tv = net_random() % delay; +	unsigned long tv = prandom_u32() % delay;  	if (!mod_timer(&idev->mc_ifc_timer, jiffies+tv+2))  		in6_dev_hold(idev); @@ -1030,7 +1030,7 @@ static void mld_ifc_stop_timer(struct inet6_dev *idev)  static void mld_dad_start_timer(struct inet6_dev *idev, unsigned long delay)  { -	unsigned long tv = net_random() % delay; +	unsigned long tv = prandom_u32() % delay;  	if (!mod_timer(&idev->mc_dad_timer, jiffies+tv+2))  		in6_dev_hold(idev); @@ -1061,7 +1061,7 @@ static void igmp6_group_queried(struct ifmcaddr6 *ma, unsigned long resptime)  	}  	if (delay >= resptime) -		delay = net_random() % resptime; +		delay = prandom_u32() % resptime;  	ma->mca_timer.expires = jiffies + delay;  	if (!mod_timer(&ma->mca_timer, jiffies + delay)) @@ -1301,8 +1301,17 @@ int igmp6_event_query(struct sk_buff *skb)  	len = ntohs(ipv6_hdr(skb)->payload_len) + sizeof(struct ipv6hdr);  	len -= skb_network_header_len(skb); -	/* Drop queries with not link local source */ -	if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) +	/* RFC3810 6.2 +	 * Upon reception of an MLD message that contains a Query, the node +	 * checks if the source address of the message is a valid link-local +	 * address, if the Hop Limit is set to 1, and if the Router Alert +	 * option is present in the Hop-By-Hop Options header of the IPv6 +	 * packet.  If any of these checks fails, the packet is dropped. +	 */ +	if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL) || +	    ipv6_hdr(skb)->hop_limit != 1 || +	    !(IP6CB(skb)->flags & IP6SKB_ROUTERALERT) || +	    IP6CB(skb)->ra != htons(IPV6_OPT_ROUTERALERT_MLD))  		return -EINVAL;  	idev = __in6_dev_get(skb->dev); @@ -1620,11 +1629,12 @@ static void mld_sendpack(struct sk_buff *skb)  		      dst_output);  out:  	if (!err) { -		ICMP6MSGOUT_INC_STATS_BH(net, idev, ICMPV6_MLD2_REPORT); -		ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS); -		IP6_UPD_PO_STATS_BH(net, idev, IPSTATS_MIB_OUTMCAST, payload_len); -	} else -		IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_OUTDISCARDS); +		ICMP6MSGOUT_INC_STATS(net, idev, ICMPV6_MLD2_REPORT); +		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); +		IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, payload_len); +	} else { +		IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS); +	}  	rcu_read_unlock();  	return; @@ -1665,7 +1675,7 @@ static struct sk_buff *add_grhead(struct sk_buff *skb, struct ifmcaddr6 *pmc,  	skb_tailroom(skb)) : 0)  static struct sk_buff *add_grec(struct sk_buff *skb, struct ifmcaddr6 *pmc, -	int type, int gdeleted, int sdeleted) +	int type, int gdeleted, int sdeleted, int crsend)  {  	struct inet6_dev *idev = pmc->idev;  	struct net_device *dev = idev->dev; @@ -1757,7 +1767,7 @@ empty_source:  		if (type == MLD2_ALLOW_NEW_SOURCES ||  		    type == MLD2_BLOCK_OLD_SOURCES)  			return skb; -		if (pmc->mca_crcount || isquery) { +		if (pmc->mca_crcount || isquery || crsend) {  			/* make sure we have room for group header */  			if (skb && AVAILABLE(skb) < sizeof(struct mld2_grec)) {  				mld_sendpack(skb); @@ -1789,7 +1799,7 @@ static void mld_send_report(struct inet6_dev *idev, struct ifmcaddr6 *pmc)  				type = MLD2_MODE_IS_EXCLUDE;  			else  				type = MLD2_MODE_IS_INCLUDE; -			skb = add_grec(skb, pmc, type, 0, 0); +			skb = add_grec(skb, pmc, type, 0, 0, 0);  			spin_unlock_bh(&pmc->mca_lock);  		}  	} else { @@ -1798,7 +1808,7 @@ static void mld_send_report(struct inet6_dev *idev, struct ifmcaddr6 *pmc)  			type = MLD2_MODE_IS_EXCLUDE;  		else  			type = MLD2_MODE_IS_INCLUDE; -		skb = add_grec(skb, pmc, type, 0, 0); +		skb = add_grec(skb, pmc, type, 0, 0, 0);  		spin_unlock_bh(&pmc->mca_lock);  	}  	read_unlock_bh(&idev->lock); @@ -1843,13 +1853,13 @@ static void mld_send_cr(struct inet6_dev *idev)  		if (pmc->mca_sfmode == MCAST_INCLUDE) {  			type = MLD2_BLOCK_OLD_SOURCES;  			dtype = MLD2_BLOCK_OLD_SOURCES; -			skb = add_grec(skb, pmc, type, 1, 0); -			skb = add_grec(skb, pmc, dtype, 1, 1); +			skb = add_grec(skb, pmc, type, 1, 0, 0); +			skb = add_grec(skb, pmc, dtype, 1, 1, 0);  		}  		if (pmc->mca_crcount) {  			if (pmc->mca_sfmode == MCAST_EXCLUDE) {  				type = MLD2_CHANGE_TO_INCLUDE; -				skb = add_grec(skb, pmc, type, 1, 0); +				skb = add_grec(skb, pmc, type, 1, 0, 0);  			}  			pmc->mca_crcount--;  			if (pmc->mca_crcount == 0) { @@ -1880,8 +1890,8 @@ static void mld_send_cr(struct inet6_dev *idev)  			type = MLD2_ALLOW_NEW_SOURCES;  			dtype = MLD2_BLOCK_OLD_SOURCES;  		} -		skb = add_grec(skb, pmc, type, 0, 0); -		skb = add_grec(skb, pmc, dtype, 0, 1);	/* deleted sources */ +		skb = add_grec(skb, pmc, type, 0, 0, 0); +		skb = add_grec(skb, pmc, dtype, 0, 1, 0);	/* deleted sources */  		/* filter mode changes */  		if (pmc->mca_crcount) { @@ -1889,7 +1899,7 @@ static void mld_send_cr(struct inet6_dev *idev)  				type = MLD2_CHANGE_TO_EXCLUDE;  			else  				type = MLD2_CHANGE_TO_INCLUDE; -			skb = add_grec(skb, pmc, type, 0, 0); +			skb = add_grec(skb, pmc, type, 0, 0, 0);  			pmc->mca_crcount--;  		}  		spin_unlock_bh(&pmc->mca_lock); @@ -1997,27 +2007,36 @@ err_out:  	goto out;  } -static void mld_resend_report(struct inet6_dev *idev) +static void mld_send_initial_cr(struct inet6_dev *idev)  { -	if (mld_in_v1_mode(idev)) { -		struct ifmcaddr6 *mcaddr; -		read_lock_bh(&idev->lock); -		for (mcaddr = idev->mc_list; mcaddr; mcaddr = mcaddr->next) { -			if (!(mcaddr->mca_flags & MAF_NOREPORT)) -				igmp6_send(&mcaddr->mca_addr, idev->dev, -					   ICMPV6_MGM_REPORT); -		} -		read_unlock_bh(&idev->lock); -	} else { -		mld_send_report(idev, NULL); +	struct sk_buff *skb; +	struct ifmcaddr6 *pmc; +	int type; + +	if (mld_in_v1_mode(idev)) +		return; + +	skb = NULL; +	read_lock_bh(&idev->lock); +	for (pmc=idev->mc_list; pmc; pmc=pmc->next) { +		spin_lock_bh(&pmc->mca_lock); +		if (pmc->mca_sfcount[MCAST_EXCLUDE]) +			type = MLD2_CHANGE_TO_EXCLUDE; +		else +			type = MLD2_CHANGE_TO_INCLUDE; +		skb = add_grec(skb, pmc, type, 0, 0, 1); +		spin_unlock_bh(&pmc->mca_lock);  	} +	read_unlock_bh(&idev->lock); +	if (skb) +		mld_sendpack(skb);  }  void ipv6_mc_dad_complete(struct inet6_dev *idev)  {  	idev->mc_dad_count = idev->mc_qrv;  	if (idev->mc_dad_count) { -		mld_resend_report(idev); +		mld_send_initial_cr(idev);  		idev->mc_dad_count--;  		if (idev->mc_dad_count)  			mld_dad_start_timer(idev, idev->mc_maxdelay); @@ -2028,7 +2047,7 @@ static void mld_dad_timer_expire(unsigned long data)  {  	struct inet6_dev *idev = (struct inet6_dev *)data; -	mld_resend_report(idev); +	mld_send_initial_cr(idev);  	if (idev->mc_dad_count) {  		idev->mc_dad_count--;  		if (idev->mc_dad_count) @@ -2328,7 +2347,7 @@ static void igmp6_join_group(struct ifmcaddr6 *ma)  	igmp6_send(&ma->mca_addr, ma->idev->dev, ICMPV6_MGM_REPORT); -	delay = net_random() % unsolicited_report_interval(ma->idev); +	delay = prandom_u32() % unsolicited_report_interval(ma->idev);  	spin_lock_bh(&ma->mca_lock);  	if (del_timer(&ma->mca_timer)) { diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index 9ac01dc9402..db9b6cbc9db 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -13,8 +13,7 @@   * GNU General Public License for more details.   *   * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA + * along with this program; if not, see <http://www.gnu.org/licenses/>.   */  /*   * Authors: diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index f8a55ff1971..ca8d4ea48a5 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -125,17 +125,19 @@ struct neigh_table nd_tbl = {  	.id =		"ndisc_cache",  	.parms = {  		.tbl			= &nd_tbl, -		.base_reachable_time	= ND_REACHABLE_TIME, -		.retrans_time		= ND_RETRANS_TIMER, -		.gc_staletime		= 60 * HZ,  		.reachable_time		= ND_REACHABLE_TIME, -		.delay_probe_time	= 5 * HZ, -		.queue_len_bytes	= 64*1024, -		.ucast_probes		= 3, -		.mcast_probes		= 3, -		.anycast_delay		= 1 * HZ, -		.proxy_delay		= (8 * HZ) / 10, -		.proxy_qlen		= 64, +		.data = { +			[NEIGH_VAR_MCAST_PROBES] = 3, +			[NEIGH_VAR_UCAST_PROBES] = 3, +			[NEIGH_VAR_RETRANS_TIME] = ND_RETRANS_TIMER, +			[NEIGH_VAR_BASE_REACHABLE_TIME] = ND_REACHABLE_TIME, +			[NEIGH_VAR_DELAY_PROBE_TIME] = 5 * HZ, +			[NEIGH_VAR_GC_STALETIME] = 60 * HZ, +			[NEIGH_VAR_QUEUE_LEN_BYTES] = 64 * 1024, +			[NEIGH_VAR_PROXY_QLEN] = 64, +			[NEIGH_VAR_ANYCAST_DELAY] = 1 * HZ, +			[NEIGH_VAR_PROXY_DELAY] = (8 * HZ) / 10, +		},  	},  	.gc_interval =	  30 * HZ,  	.gc_thresh1 =	 128, @@ -656,14 +658,14 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)  	if (skb && ipv6_chk_addr(dev_net(dev), &ipv6_hdr(skb)->saddr, dev, 1))  		saddr = &ipv6_hdr(skb)->saddr; -	if ((probes -= neigh->parms->ucast_probes) < 0) { +	if ((probes -= NEIGH_VAR(neigh->parms, UCAST_PROBES)) < 0) {  		if (!(neigh->nud_state & NUD_VALID)) {  			ND_PRINTK(1, dbg,  				  "%s: trying to ucast probe in NUD_INVALID: %pI6\n",  				  __func__, target);  		}  		ndisc_send_ns(dev, neigh, target, target, saddr); -	} else if ((probes -= neigh->parms->app_probes) < 0) { +	} else if ((probes -= NEIGH_VAR(neigh->parms, APP_PROBES)) < 0) {  		neigh_app_ns(neigh);  	} else {  		addrconf_addr_solict_mult(target, &mcaddr); @@ -790,7 +792,7 @@ static void ndisc_recv_ns(struct sk_buff *skb)  			if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) &&  			    skb->pkt_type != PACKET_HOST &&  			    inc && -			    idev->nd_parms->proxy_delay != 0) { +			    NEIGH_VAR(idev->nd_parms, PROXY_DELAY) != 0) {  				/*  				 * for anycast or proxy,  				 * sender should delay its response @@ -849,7 +851,7 @@ out:  static void ndisc_recv_na(struct sk_buff *skb)  {  	struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb); -	const struct in6_addr *saddr = &ipv6_hdr(skb)->saddr; +	struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;  	const struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;  	u8 *lladdr = NULL;  	u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) + @@ -942,10 +944,7 @@ static void ndisc_recv_na(struct sk_buff *skb)  			/*  			 * Change: router to host  			 */ -			struct rt6_info *rt; -			rt = rt6_get_dflt_router(saddr, dev); -			if (rt) -				ip6_del_rt(rt); +			rt6_clean_tohost(dev_net(dev),  saddr);  		}  out: @@ -1210,7 +1209,7 @@ skip_defrtr:  			rtime = (rtime*HZ)/1000;  			if (rtime < HZ/10)  				rtime = HZ/10; -			in6_dev->nd_parms->retrans_time = rtime; +			NEIGH_VAR_SET(in6_dev->nd_parms, RETRANS_TIME, rtime);  			in6_dev->tstamp = jiffies;  			inet6_ifinfo_notify(RTM_NEWLINK, in6_dev);  		} @@ -1222,9 +1221,11 @@ skip_defrtr:  			if (rtime < HZ/10)  				rtime = HZ/10; -			if (rtime != in6_dev->nd_parms->base_reachable_time) { -				in6_dev->nd_parms->base_reachable_time = rtime; -				in6_dev->nd_parms->gc_staletime = 3 * rtime; +			if (rtime != NEIGH_VAR(in6_dev->nd_parms, BASE_REACHABLE_TIME)) { +				NEIGH_VAR_SET(in6_dev->nd_parms, +					      BASE_REACHABLE_TIME, rtime); +				NEIGH_VAR_SET(in6_dev->nd_parms, +					      GC_STALETIME, 3 * rtime);  				in6_dev->nd_parms->reachable_time = neigh_rand_reach_time(rtime);  				in6_dev->tstamp = jiffies;  				inet6_ifinfo_notify(RTM_NEWLINK, in6_dev); @@ -1277,6 +1278,9 @@ skip_linkparms:  			    ri->prefix_len == 0)  				continue;  #endif +			if (ri->prefix_len == 0 && +			    !in6_dev->cnf.accept_ra_defrtr) +				continue;  			if (ri->prefix_len > in6_dev->cnf.accept_ra_rt_info_max_plen)  				continue;  			rt6_route_rcv(skb->dev, (u8*)p, (p->nd_opt_len) << 3, @@ -1648,22 +1652,23 @@ int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, void __user *bu  		ndisc_warn_deprecated_sysctl(ctl, "syscall", dev ? dev->name : "default");  	if (strcmp(ctl->procname, "retrans_time") == 0) -		ret = proc_dointvec(ctl, write, buffer, lenp, ppos); +		ret = neigh_proc_dointvec(ctl, write, buffer, lenp, ppos);  	else if (strcmp(ctl->procname, "base_reachable_time") == 0) -		ret = proc_dointvec_jiffies(ctl, write, -					    buffer, lenp, ppos); +		ret = neigh_proc_dointvec_jiffies(ctl, write, +						  buffer, lenp, ppos);  	else if ((strcmp(ctl->procname, "retrans_time_ms") == 0) ||  		 (strcmp(ctl->procname, "base_reachable_time_ms") == 0)) -		ret = proc_dointvec_ms_jiffies(ctl, write, -					       buffer, lenp, ppos); +		ret = neigh_proc_dointvec_ms_jiffies(ctl, write, +						     buffer, lenp, ppos);  	else  		ret = -1;  	if (write && ret == 0 && dev && (idev = in6_dev_get(dev)) != NULL) { -		if (ctl->data == &idev->nd_parms->base_reachable_time) -			idev->nd_parms->reachable_time = neigh_rand_reach_time(idev->nd_parms->base_reachable_time); +		if (ctl->data == &NEIGH_VAR(idev->nd_parms, BASE_REACHABLE_TIME)) +			idev->nd_parms->reachable_time = +					neigh_rand_reach_time(NEIGH_VAR(idev->nd_parms, BASE_REACHABLE_TIME));  		idev->tstamp = jiffies;  		inet6_ifinfo_notify(RTM_NEWLINK, idev);  		in6_dev_put(idev); @@ -1722,12 +1727,12 @@ int __init ndisc_init(void)  	neigh_table_init(&nd_tbl);  #ifdef CONFIG_SYSCTL -	err = neigh_sysctl_register(NULL, &nd_tbl.parms, "ipv6", +	err = neigh_sysctl_register(NULL, &nd_tbl.parms,  				    &ndisc_ifinfo_sysctl_change);  	if (err)  		goto out_unregister_pernet; -#endif  out: +#endif  	return err;  #ifdef CONFIG_SYSCTL diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 95f3f1da0d7..d38e6a8d8b9 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -30,13 +30,15 @@ int ip6_route_me_harder(struct sk_buff *skb)  		.daddr = iph->daddr,  		.saddr = iph->saddr,  	}; +	int err;  	dst = ip6_route_output(net, skb->sk, &fl6); -	if (dst->error) { +	err = dst->error; +	if (err) {  		IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);  		LIMIT_NETDEBUG(KERN_DEBUG "ip6_route_me_harder: No more route.\n");  		dst_release(dst); -		return dst->error; +		return err;  	}  	/* Drop old route. */ diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index a7f842b29b6..4bff1f297e3 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -25,6 +25,36 @@ config NF_CONNTRACK_IPV6  	  To compile it as a module, choose M here.  If unsure, say N. +config NF_TABLES_IPV6 +	depends on NF_TABLES +	tristate "IPv6 nf_tables support" +	help +	  This option enables the IPv6 support for nf_tables. + +config NFT_CHAIN_ROUTE_IPV6 +	depends on NF_TABLES_IPV6 +	tristate "IPv6 nf_tables route chain support" +	help +	  This option enables the "route" chain for IPv6 in nf_tables. This +	  chain type is used to force packet re-routing after mangling header +	  fields such as the source, destination, flowlabel, hop-limit and +	  the packet mark. + +config NFT_CHAIN_NAT_IPV6 +	depends on NF_TABLES_IPV6 +	depends on NF_NAT_IPV6 && NFT_NAT +	tristate "IPv6 nf_tables nat chain support" +	help +	  This option enables the "nat" chain for IPv6 in nf_tables. This +	  chain type is used to perform Network Address Translation (NAT) +	  packet transformations such as the source, destination address and +	  source and destination ports. + +config NFT_REJECT_IPV6 +	depends on NF_TABLES_IPV6 +	default NFT_REJECT +	tristate +  config IP6_NF_IPTABLES  	tristate "IP6 tables support (required for filtering)"  	depends on INET && IPV6 diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index 2b53738f798..70d3dd66f2c 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -23,6 +23,12 @@ obj-$(CONFIG_NF_NAT_IPV6) += nf_nat_ipv6.o  nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o  obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o +# nf_tables +obj-$(CONFIG_NF_TABLES_IPV6) += nf_tables_ipv6.o +obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV6) += nft_chain_route_ipv6.o +obj-$(CONFIG_NFT_CHAIN_NAT_IPV6) += nft_chain_nat_ipv6.o +obj-$(CONFIG_NFT_REJECT_IPV6) += nft_reject_ipv6.o +  # matches  obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o  obj-$(CONFIG_IP6_NF_MATCH_EUI64) += ip6t_eui64.o diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 44400c216dc..e080fbbbc0e 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -349,6 +349,11 @@ ip6t_do_table(struct sk_buff *skb,  	local_bh_disable();  	addend = xt_write_recseq_begin();  	private = table->private; +	/* +	 * Ensure we load private-> members after we've fetched the base +	 * pointer. +	 */ +	smp_read_barrier_depends();  	cpu        = smp_processor_id();  	table_base = private->entries[cpu];  	jumpstack  = (struct ip6t_entry **)private->jumpstack[cpu]; @@ -1236,8 +1241,10 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,  	xt_free_table_info(oldinfo);  	if (copy_to_user(counters_ptr, counters, -			 sizeof(struct xt_counters) * num_counters) != 0) -		ret = -EFAULT; +			 sizeof(struct xt_counters) * num_counters) != 0) { +		/* Silent error, can't fail, new table is already in place */ +		net_warn_ratelimited("ip6tables: counters copy to user failed while replacing table\n"); +	}  	vfree(counters);  	xt_table_unlock(t);  	return ret; diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index 56eef30ee5f..544b0a9da1b 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -23,182 +23,18 @@  #include <linux/skbuff.h>  #include <linux/icmpv6.h>  #include <linux/netdevice.h> -#include <net/ipv6.h> -#include <net/tcp.h>  #include <net/icmp.h> -#include <net/ip6_checksum.h> -#include <net/ip6_fib.h> -#include <net/ip6_route.h>  #include <net/flow.h>  #include <linux/netfilter/x_tables.h>  #include <linux/netfilter_ipv6/ip6_tables.h>  #include <linux/netfilter_ipv6/ip6t_REJECT.h> +#include <net/netfilter/ipv6/nf_reject.h> +  MODULE_AUTHOR("Yasuyuki KOZAKAI <yasuyuki.kozakai@toshiba.co.jp>");  MODULE_DESCRIPTION("Xtables: packet \"rejection\" target for IPv6");  MODULE_LICENSE("GPL"); -/* Send RST reply */ -static void send_reset(struct net *net, struct sk_buff *oldskb) -{ -	struct sk_buff *nskb; -	struct tcphdr otcph, *tcph; -	unsigned int otcplen, hh_len; -	int tcphoff, needs_ack; -	const struct ipv6hdr *oip6h = ipv6_hdr(oldskb); -	struct ipv6hdr *ip6h; -#define DEFAULT_TOS_VALUE	0x0U -	const __u8 tclass = DEFAULT_TOS_VALUE; -	struct dst_entry *dst = NULL; -	u8 proto; -	__be16 frag_off; -	struct flowi6 fl6; - -	if ((!(ipv6_addr_type(&oip6h->saddr) & IPV6_ADDR_UNICAST)) || -	    (!(ipv6_addr_type(&oip6h->daddr) & IPV6_ADDR_UNICAST))) { -		pr_debug("addr is not unicast.\n"); -		return; -	} - -	proto = oip6h->nexthdr; -	tcphoff = ipv6_skip_exthdr(oldskb, ((u8*)(oip6h+1) - oldskb->data), &proto, &frag_off); - -	if ((tcphoff < 0) || (tcphoff > oldskb->len)) { -		pr_debug("Cannot get TCP header.\n"); -		return; -	} - -	otcplen = oldskb->len - tcphoff; - -	/* IP header checks: fragment, too short. */ -	if (proto != IPPROTO_TCP || otcplen < sizeof(struct tcphdr)) { -		pr_debug("proto(%d) != IPPROTO_TCP, " -			 "or too short. otcplen = %d\n", -			 proto, otcplen); -		return; -	} - -	if (skb_copy_bits(oldskb, tcphoff, &otcph, sizeof(struct tcphdr))) -		BUG(); - -	/* No RST for RST. */ -	if (otcph.rst) { -		pr_debug("RST is set\n"); -		return; -	} - -	/* Check checksum. */ -	if (csum_ipv6_magic(&oip6h->saddr, &oip6h->daddr, otcplen, IPPROTO_TCP, -			    skb_checksum(oldskb, tcphoff, otcplen, 0))) { -		pr_debug("TCP checksum is invalid\n"); -		return; -	} - -	memset(&fl6, 0, sizeof(fl6)); -	fl6.flowi6_proto = IPPROTO_TCP; -	fl6.saddr = oip6h->daddr; -	fl6.daddr = oip6h->saddr; -	fl6.fl6_sport = otcph.dest; -	fl6.fl6_dport = otcph.source; -	security_skb_classify_flow(oldskb, flowi6_to_flowi(&fl6)); -	dst = ip6_route_output(net, NULL, &fl6); -	if (dst == NULL || dst->error) { -		dst_release(dst); -		return; -	} -	dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), NULL, 0); -	if (IS_ERR(dst)) -		return; - -	hh_len = (dst->dev->hard_header_len + 15)&~15; -	nskb = alloc_skb(hh_len + 15 + dst->header_len + sizeof(struct ipv6hdr) -			 + sizeof(struct tcphdr) + dst->trailer_len, -			 GFP_ATOMIC); - -	if (!nskb) { -		net_dbg_ratelimited("cannot alloc skb\n"); -		dst_release(dst); -		return; -	} - -	skb_dst_set(nskb, dst); - -	skb_reserve(nskb, hh_len + dst->header_len); - -	skb_put(nskb, sizeof(struct ipv6hdr)); -	skb_reset_network_header(nskb); -	ip6h = ipv6_hdr(nskb); -	ip6_flow_hdr(ip6h, tclass, 0); -	ip6h->hop_limit = ip6_dst_hoplimit(dst); -	ip6h->nexthdr = IPPROTO_TCP; -	ip6h->saddr = oip6h->daddr; -	ip6h->daddr = oip6h->saddr; - -	skb_reset_transport_header(nskb); -	tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr)); -	/* Truncate to length (no data) */ -	tcph->doff = sizeof(struct tcphdr)/4; -	tcph->source = otcph.dest; -	tcph->dest = otcph.source; - -	if (otcph.ack) { -		needs_ack = 0; -		tcph->seq = otcph.ack_seq; -		tcph->ack_seq = 0; -	} else { -		needs_ack = 1; -		tcph->ack_seq = htonl(ntohl(otcph.seq) + otcph.syn + otcph.fin -				      + otcplen - (otcph.doff<<2)); -		tcph->seq = 0; -	} - -	/* Reset flags */ -	((u_int8_t *)tcph)[13] = 0; -	tcph->rst = 1; -	tcph->ack = needs_ack; -	tcph->window = 0; -	tcph->urg_ptr = 0; -	tcph->check = 0; - -	/* Adjust TCP checksum */ -	tcph->check = csum_ipv6_magic(&ipv6_hdr(nskb)->saddr, -				      &ipv6_hdr(nskb)->daddr, -				      sizeof(struct tcphdr), IPPROTO_TCP, -				      csum_partial(tcph, -						   sizeof(struct tcphdr), 0)); - -	nf_ct_attach(nskb, oldskb); - -#ifdef CONFIG_BRIDGE_NETFILTER -	/* If we use ip6_local_out for bridged traffic, the MAC source on -	 * the RST will be ours, instead of the destination's.  This confuses -	 * some routers/firewalls, and they drop the packet.  So we need to -	 * build the eth header using the original destination's MAC as the -	 * source, and send the RST packet directly. -	 */ -	if (oldskb->nf_bridge) { -		struct ethhdr *oeth = eth_hdr(oldskb); -		nskb->dev = oldskb->nf_bridge->physindev; -		nskb->protocol = htons(ETH_P_IPV6); -		ip6h->payload_len = htons(sizeof(struct tcphdr)); -		if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol), -				    oeth->h_source, oeth->h_dest, nskb->len) < 0) -			return; -		dev_queue_xmit(nskb); -	} else -#endif -		ip6_local_out(nskb); -} - -static inline void -send_unreach(struct net *net, struct sk_buff *skb_in, unsigned char code, -	     unsigned int hooknum) -{ -	if (hooknum == NF_INET_LOCAL_OUT && skb_in->dev == NULL) -		skb_in->dev = net->loopback_dev; - -	icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0); -}  static unsigned int  reject_tg6(struct sk_buff *skb, const struct xt_action_param *par) @@ -209,25 +45,25 @@ reject_tg6(struct sk_buff *skb, const struct xt_action_param *par)  	pr_debug("%s: medium point\n", __func__);  	switch (reject->with) {  	case IP6T_ICMP6_NO_ROUTE: -		send_unreach(net, skb, ICMPV6_NOROUTE, par->hooknum); +		nf_send_unreach6(net, skb, ICMPV6_NOROUTE, par->hooknum);  		break;  	case IP6T_ICMP6_ADM_PROHIBITED: -		send_unreach(net, skb, ICMPV6_ADM_PROHIBITED, par->hooknum); +		nf_send_unreach6(net, skb, ICMPV6_ADM_PROHIBITED, par->hooknum);  		break;  	case IP6T_ICMP6_NOT_NEIGHBOUR: -		send_unreach(net, skb, ICMPV6_NOT_NEIGHBOUR, par->hooknum); +		nf_send_unreach6(net, skb, ICMPV6_NOT_NEIGHBOUR, par->hooknum);  		break;  	case IP6T_ICMP6_ADDR_UNREACH: -		send_unreach(net, skb, ICMPV6_ADDR_UNREACH, par->hooknum); +		nf_send_unreach6(net, skb, ICMPV6_ADDR_UNREACH, par->hooknum);  		break;  	case IP6T_ICMP6_PORT_UNREACH: -		send_unreach(net, skb, ICMPV6_PORT_UNREACH, par->hooknum); +		nf_send_unreach6(net, skb, ICMPV6_PORT_UNREACH, par->hooknum);  		break;  	case IP6T_ICMP6_ECHOREPLY:  		/* Do nothing */  		break;  	case IP6T_TCP_RESET: -		send_reset(net, skb); +		nf_send_reset6(net, skb, par->hooknum);  		break;  	default:  		net_info_ratelimited("case %u not handled yet\n", reject->with); diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c index 2748b042da7..a0d17270117 100644 --- a/net/ipv6/netfilter/ip6t_SYNPROXY.c +++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c @@ -259,6 +259,7 @@ synproxy_recv_client_ack(const struct synproxy_net *snet,  	this_cpu_inc(snet->stats->cookie_valid);  	opts->mss = mss; +	opts->options |= XT_SYNPROXY_OPT_MSS;  	if (opts->options & XT_SYNPROXY_OPT_TIMESTAMP)  		synproxy_check_timestamp_cookie(opts); @@ -312,7 +313,7 @@ synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par)  	return XT_CONTINUE;  } -static unsigned int ipv6_synproxy_hook(unsigned int hooknum, +static unsigned int ipv6_synproxy_hook(const struct nf_hook_ops *ops,  				       struct sk_buff *skb,  				       const struct net_device *in,  				       const struct net_device *out, @@ -445,6 +446,7 @@ static void synproxy_tg6_destroy(const struct xt_tgdtor_param *par)  static struct xt_target synproxy_tg6_reg __read_mostly = {  	.name		= "SYNPROXY",  	.family		= NFPROTO_IPV6, +	.hooks		= (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_FORWARD),  	.target		= synproxy_tg6,  	.targetsize	= sizeof(struct xt_synproxy_info),  	.checkentry	= synproxy_tg6_check, diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c index e0983f3648a..790e0c6b19e 100644 --- a/net/ipv6/netfilter/ip6t_rpfilter.c +++ b/net/ipv6/netfilter/ip6t_rpfilter.c @@ -33,6 +33,7 @@ static bool rpfilter_lookup_reverse6(const struct sk_buff *skb,  	struct ipv6hdr *iph = ipv6_hdr(skb);  	bool ret = false;  	struct flowi6 fl6 = { +		.flowi6_iif = LOOPBACK_IFINDEX,  		.flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK,  		.flowi6_proto = iph->nexthdr,  		.daddr = iph->saddr, diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index 29b44b14c5e..ca7f6c12808 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -32,13 +32,14 @@ static const struct xt_table packet_filter = {  /* The work comes in here from netfilter.c. */  static unsigned int -ip6table_filter_hook(unsigned int hook, struct sk_buff *skb, +ip6table_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,  		     const struct net_device *in, const struct net_device *out,  		     int (*okfn)(struct sk_buff *))  {  	const struct net *net = dev_net((in != NULL) ? in : out); -	return ip6t_do_table(skb, hook, in, out, net->ipv6.ip6table_filter); +	return ip6t_do_table(skb, ops->hooknum, in, out, +			     net->ipv6.ip6table_filter);  }  static struct nf_hook_ops *filter_ops __read_mostly; diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index c705907ae6a..307bbb782d1 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -76,17 +76,17 @@ ip6t_mangle_out(struct sk_buff *skb, const struct net_device *out)  /* The work comes in here from netfilter.c. */  static unsigned int -ip6table_mangle_hook(unsigned int hook, struct sk_buff *skb, +ip6table_mangle_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,  		     const struct net_device *in, const struct net_device *out,  		     int (*okfn)(struct sk_buff *))  { -	if (hook == NF_INET_LOCAL_OUT) +	if (ops->hooknum == NF_INET_LOCAL_OUT)  		return ip6t_mangle_out(skb, out); -	if (hook == NF_INET_POST_ROUTING) -		return ip6t_do_table(skb, hook, in, out, +	if (ops->hooknum == NF_INET_POST_ROUTING) +		return ip6t_do_table(skb, ops->hooknum, in, out,  				     dev_net(out)->ipv6.ip6table_mangle);  	/* INPUT/FORWARD */ -	return ip6t_do_table(skb, hook, in, out, +	return ip6t_do_table(skb, ops->hooknum, in, out,  			     dev_net(in)->ipv6.ip6table_mangle);  } diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c index 9b076d2d3a7..387d8b8fc18 100644 --- a/net/ipv6/netfilter/ip6table_nat.c +++ b/net/ipv6/netfilter/ip6table_nat.c @@ -63,7 +63,7 @@ static unsigned int nf_nat_rule_find(struct sk_buff *skb, unsigned int hooknum,  }  static unsigned int -nf_nat_ipv6_fn(unsigned int hooknum, +nf_nat_ipv6_fn(const struct nf_hook_ops *ops,  	       struct sk_buff *skb,  	       const struct net_device *in,  	       const struct net_device *out, @@ -72,7 +72,7 @@ nf_nat_ipv6_fn(unsigned int hooknum,  	struct nf_conn *ct;  	enum ip_conntrack_info ctinfo;  	struct nf_conn_nat *nat; -	enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum); +	enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum);  	__be16 frag_off;  	int hdrlen;  	u8 nexthdr; @@ -90,17 +90,9 @@ nf_nat_ipv6_fn(unsigned int hooknum,  	if (nf_ct_is_untracked(ct))  		return NF_ACCEPT; -	nat = nfct_nat(ct); -	if (!nat) { -		/* NAT module was loaded late. */ -		if (nf_ct_is_confirmed(ct)) -			return NF_ACCEPT; -		nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); -		if (nat == NULL) { -			pr_debug("failed to add NAT extension\n"); -			return NF_ACCEPT; -		} -	} +	nat = nf_ct_nat_ext_add(ct); +	if (nat == NULL) +		return NF_ACCEPT;  	switch (ctinfo) {  	case IP_CT_RELATED: @@ -111,7 +103,8 @@ nf_nat_ipv6_fn(unsigned int hooknum,  		if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) {  			if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo, -							     hooknum, hdrlen)) +							     ops->hooknum, +							     hdrlen))  				return NF_DROP;  			else  				return NF_ACCEPT; @@ -124,14 +117,14 @@ nf_nat_ipv6_fn(unsigned int hooknum,  		if (!nf_nat_initialized(ct, maniptype)) {  			unsigned int ret; -			ret = nf_nat_rule_find(skb, hooknum, in, out, ct); +			ret = nf_nat_rule_find(skb, ops->hooknum, in, out, ct);  			if (ret != NF_ACCEPT)  				return ret;  		} else {  			pr_debug("Already setup manip %s for ct %p\n",  				 maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST",  				 ct); -			if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) +			if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out))  				goto oif_changed;  		}  		break; @@ -140,11 +133,11 @@ nf_nat_ipv6_fn(unsigned int hooknum,  		/* ESTABLISHED */  		NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||  			     ctinfo == IP_CT_ESTABLISHED_REPLY); -		if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) +		if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out))  			goto oif_changed;  	} -	return nf_nat_packet(ct, ctinfo, hooknum, skb); +	return nf_nat_packet(ct, ctinfo, ops->hooknum, skb);  oif_changed:  	nf_ct_kill_acct(ct, ctinfo, skb); @@ -152,7 +145,7 @@ oif_changed:  }  static unsigned int -nf_nat_ipv6_in(unsigned int hooknum, +nf_nat_ipv6_in(const struct nf_hook_ops *ops,  	       struct sk_buff *skb,  	       const struct net_device *in,  	       const struct net_device *out, @@ -161,7 +154,7 @@ nf_nat_ipv6_in(unsigned int hooknum,  	unsigned int ret;  	struct in6_addr daddr = ipv6_hdr(skb)->daddr; -	ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn); +	ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn);  	if (ret != NF_DROP && ret != NF_STOLEN &&  	    ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr))  		skb_dst_drop(skb); @@ -170,7 +163,7 @@ nf_nat_ipv6_in(unsigned int hooknum,  }  static unsigned int -nf_nat_ipv6_out(unsigned int hooknum, +nf_nat_ipv6_out(const struct nf_hook_ops *ops,  		struct sk_buff *skb,  		const struct net_device *in,  		const struct net_device *out, @@ -187,7 +180,7 @@ nf_nat_ipv6_out(unsigned int hooknum,  	if (skb->len < sizeof(struct ipv6hdr))  		return NF_ACCEPT; -	ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn); +	ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn);  #ifdef CONFIG_XFRM  	if (ret != NF_DROP && ret != NF_STOLEN &&  	    !(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && @@ -209,7 +202,7 @@ nf_nat_ipv6_out(unsigned int hooknum,  }  static unsigned int -nf_nat_ipv6_local_fn(unsigned int hooknum, +nf_nat_ipv6_local_fn(const struct nf_hook_ops *ops,  		     struct sk_buff *skb,  		     const struct net_device *in,  		     const struct net_device *out, @@ -224,7 +217,7 @@ nf_nat_ipv6_local_fn(unsigned int hooknum,  	if (skb->len < sizeof(struct ipv6hdr))  		return NF_ACCEPT; -	ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn); +	ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn);  	if (ret != NF_DROP && ret != NF_STOLEN &&  	    (ct = nf_ct_get(skb, &ctinfo)) != NULL) {  		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index 9a626d86720..5274740acec 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -19,13 +19,14 @@ static const struct xt_table packet_raw = {  /* The work comes in here from netfilter.c. */  static unsigned int -ip6table_raw_hook(unsigned int hook, struct sk_buff *skb, +ip6table_raw_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,  		  const struct net_device *in, const struct net_device *out,  		  int (*okfn)(struct sk_buff *))  {  	const struct net *net = dev_net((in != NULL) ? in : out); -	return ip6t_do_table(skb, hook, in, out, net->ipv6.ip6table_raw); +	return ip6t_do_table(skb, ops->hooknum, in, out, +			     net->ipv6.ip6table_raw);  }  static struct nf_hook_ops *rawtable_ops __read_mostly; diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c index ce88d1d7e52..ab3b0219ecf 100644 --- a/net/ipv6/netfilter/ip6table_security.c +++ b/net/ipv6/netfilter/ip6table_security.c @@ -36,14 +36,15 @@ static const struct xt_table security_table = {  };  static unsigned int -ip6table_security_hook(unsigned int hook, struct sk_buff *skb, +ip6table_security_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,  		       const struct net_device *in,  		       const struct net_device *out,  		       int (*okfn)(struct sk_buff *))  {  	const struct net *net = dev_net((in != NULL) ? in : out); -	return ip6t_do_table(skb, hook, in, out, net->ipv6.ip6table_security); +	return ip6t_do_table(skb, ops->hooknum, in, out, +			     net->ipv6.ip6table_security);  }  static struct nf_hook_ops *sectbl_ops __read_mostly; diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index d6e4dd8b58d..4cbc6b290dd 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -95,7 +95,7 @@ static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,  	return NF_ACCEPT;  } -static unsigned int ipv6_helper(unsigned int hooknum, +static unsigned int ipv6_helper(const struct nf_hook_ops *ops,  				struct sk_buff *skb,  				const struct net_device *in,  				const struct net_device *out, @@ -133,7 +133,7 @@ static unsigned int ipv6_helper(unsigned int hooknum,  	return helper->help(skb, protoff, ct, ctinfo);  } -static unsigned int ipv6_confirm(unsigned int hooknum, +static unsigned int ipv6_confirm(const struct nf_hook_ops *ops,  				 struct sk_buff *skb,  				 const struct net_device *in,  				 const struct net_device *out, @@ -169,66 +169,16 @@ out:  	return nf_conntrack_confirm(skb);  } -static unsigned int __ipv6_conntrack_in(struct net *net, -					unsigned int hooknum, -					struct sk_buff *skb, -					const struct net_device *in, -					const struct net_device *out, -					int (*okfn)(struct sk_buff *)) -{ -	struct sk_buff *reasm = skb->nfct_reasm; -	const struct nf_conn_help *help; -	struct nf_conn *ct; -	enum ip_conntrack_info ctinfo; - -	/* This packet is fragmented and has reassembled packet. */ -	if (reasm) { -		/* Reassembled packet isn't parsed yet ? */ -		if (!reasm->nfct) { -			unsigned int ret; - -			ret = nf_conntrack_in(net, PF_INET6, hooknum, reasm); -			if (ret != NF_ACCEPT) -				return ret; -		} - -		/* Conntrack helpers need the entire reassembled packet in the -		 * POST_ROUTING hook. In case of unconfirmed connections NAT -		 * might reassign a helper, so the entire packet is also -		 * required. -		 */ -		ct = nf_ct_get(reasm, &ctinfo); -		if (ct != NULL && !nf_ct_is_untracked(ct)) { -			help = nfct_help(ct); -			if ((help && help->helper) || !nf_ct_is_confirmed(ct)) { -				nf_conntrack_get_reasm(reasm); -				NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, reasm, -					       (struct net_device *)in, -					       (struct net_device *)out, -					       okfn, NF_IP6_PRI_CONNTRACK + 1); -				return NF_DROP_ERR(-ECANCELED); -			} -		} - -		nf_conntrack_get(reasm->nfct); -		skb->nfct = reasm->nfct; -		skb->nfctinfo = reasm->nfctinfo; -		return NF_ACCEPT; -	} - -	return nf_conntrack_in(net, PF_INET6, hooknum, skb); -} - -static unsigned int ipv6_conntrack_in(unsigned int hooknum, +static unsigned int ipv6_conntrack_in(const struct nf_hook_ops *ops,  				      struct sk_buff *skb,  				      const struct net_device *in,  				      const struct net_device *out,  				      int (*okfn)(struct sk_buff *))  { -	return __ipv6_conntrack_in(dev_net(in), hooknum, skb, in, out, okfn); +	return nf_conntrack_in(dev_net(in), PF_INET6, ops->hooknum, skb);  } -static unsigned int ipv6_conntrack_local(unsigned int hooknum, +static unsigned int ipv6_conntrack_local(const struct nf_hook_ops *ops,  					 struct sk_buff *skb,  					 const struct net_device *in,  					 const struct net_device *out, @@ -239,7 +189,7 @@ static unsigned int ipv6_conntrack_local(unsigned int hooknum,  		net_notice_ratelimited("ipv6_conntrack_local: packet too short\n");  		return NF_ACCEPT;  	} -	return __ipv6_conntrack_in(dev_net(out), hooknum, skb, in, out, okfn); +	return nf_conntrack_in(dev_net(out), PF_INET6, ops->hooknum, skb);  }  static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = { @@ -297,9 +247,9 @@ ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len)  	struct nf_conntrack_tuple tuple = { .src.l3num = NFPROTO_IPV6 };  	struct nf_conn *ct; -	tuple.src.u3.in6 = inet6->rcv_saddr; +	tuple.src.u3.in6 = sk->sk_v6_rcv_saddr;  	tuple.src.u.tcp.port = inet->inet_sport; -	tuple.dst.u3.in6 = inet6->daddr; +	tuple.dst.u3.in6 = sk->sk_v6_daddr;  	tuple.dst.u.tcp.port = inet->inet_dport;  	tuple.dst.protonum = sk->sk_protocol; diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index dffdc1a389c..0d5279fd852 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -144,12 +144,24 @@ static inline u8 ip6_frag_ecn(const struct ipv6hdr *ipv6h)  	return 1 << (ipv6_get_dsfield(ipv6h) & INET_ECN_MASK);  } +static unsigned int nf_hash_frag(__be32 id, const struct in6_addr *saddr, +				 const struct in6_addr *daddr) +{ +	u32 c; + +	net_get_random_once(&nf_frags.rnd, sizeof(nf_frags.rnd)); +	c = jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr), +			 (__force u32)id, nf_frags.rnd); +	return c & (INETFRAGS_HASHSZ - 1); +} + +  static unsigned int nf_hashfn(struct inet_frag_queue *q)  {  	const struct frag_queue *nq;  	nq = container_of(q, struct frag_queue, q); -	return inet6_hash_frag(nq->id, &nq->saddr, &nq->daddr, nf_frags.rnd); +	return nf_hash_frag(nq->id, &nq->saddr, &nq->daddr);  }  static void nf_skb_free(struct sk_buff *skb) @@ -185,7 +197,7 @@ static inline struct frag_queue *fq_find(struct net *net, __be32 id,  	arg.ecn = ecn;  	read_lock_bh(&nf_frags.lock); -	hash = inet6_hash_frag(id, src, dst, nf_frags.rnd); +	hash = nf_hash_frag(id, src, dst);  	q = inet_frag_find(&net->nf_frag.frags, &nf_frags, &arg, hash);  	local_bh_enable(); @@ -439,7 +451,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev)  	}  	sub_frag_mem_limit(&fq->q, head->truesize); -	head->local_df = 1; +	head->ignore_df = 1;  	head->next = NULL;  	head->dev = dev;  	head->tstamp = fq->q.stamp; @@ -621,31 +633,16 @@ ret_orig:  	return skb;  } -void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb, -			struct net_device *in, struct net_device *out, -			int (*okfn)(struct sk_buff *)) +void nf_ct_frag6_consume_orig(struct sk_buff *skb)  {  	struct sk_buff *s, *s2; -	unsigned int ret = 0;  	for (s = NFCT_FRAG6_CB(skb)->orig; s;) { -		nf_conntrack_put_reasm(s->nfct_reasm); -		nf_conntrack_get_reasm(skb); -		s->nfct_reasm = skb; -  		s2 = s->next;  		s->next = NULL; - -		if (ret != -ECANCELED) -			ret = NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, s, -					     in, out, okfn, -					     NF_IP6_PRI_CONNTRACK_DEFRAG + 1); -		else -			kfree_skb(s); - +		consume_skb(s);  		s = s2;  	} -	nf_conntrack_put_reasm(skb);  }  static int nf_ct_net_init(struct net *net) diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c index aacd121fe8c..7b9a748c6ba 100644 --- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c +++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c @@ -52,7 +52,7 @@ static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum,  } -static unsigned int ipv6_defrag(unsigned int hooknum, +static unsigned int ipv6_defrag(const struct nf_hook_ops *ops,  				struct sk_buff *skb,  				const struct net_device *in,  				const struct net_device *out, @@ -66,7 +66,7 @@ static unsigned int ipv6_defrag(unsigned int hooknum,  		return NF_ACCEPT;  #endif -	reasm = nf_ct_frag6_gather(skb, nf_ct6_defrag_user(hooknum, skb)); +	reasm = nf_ct_frag6_gather(skb, nf_ct6_defrag_user(ops->hooknum, skb));  	/* queued */  	if (reasm == NULL)  		return NF_STOLEN; @@ -75,8 +75,11 @@ static unsigned int ipv6_defrag(unsigned int hooknum,  	if (reasm == skb)  		return NF_ACCEPT; -	nf_ct_frag6_output(hooknum, reasm, (struct net_device *)in, -			   (struct net_device *)out, okfn); +	nf_ct_frag6_consume_orig(reasm); + +	NF_HOOK_THRESH(NFPROTO_IPV6, ops->hooknum, reasm, +		       (struct net_device *) in, (struct net_device *) out, +		       okfn, NF_IP6_PRI_CONNTRACK_DEFRAG + 1);  	return NF_STOLEN;  } diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c new file mode 100644 index 00000000000..0d812b31277 --- /dev/null +++ b/net/ipv6/netfilter/nf_tables_ipv6.c @@ -0,0 +1,128 @@ +/* + * Copyright (c) 2008 Patrick McHardy <kaber@trash.net> + * Copyright (c) 2012-2013 Pablo Neira Ayuso <pablo@netfilter.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/ipv6.h> +#include <linux/netfilter_ipv6.h> +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables_ipv6.h> + +static unsigned int nft_do_chain_ipv6(const struct nf_hook_ops *ops, +				      struct sk_buff *skb, +				      const struct net_device *in, +				      const struct net_device *out, +				      int (*okfn)(struct sk_buff *)) +{ +	struct nft_pktinfo pkt; + +	/* malformed packet, drop it */ +	if (nft_set_pktinfo_ipv6(&pkt, ops, skb, in, out) < 0) +		return NF_DROP; + +	return nft_do_chain(&pkt, ops); +} + +static unsigned int nft_ipv6_output(const struct nf_hook_ops *ops, +				    struct sk_buff *skb, +				    const struct net_device *in, +				    const struct net_device *out, +				    int (*okfn)(struct sk_buff *)) +{ +	if (unlikely(skb->len < sizeof(struct ipv6hdr))) { +		if (net_ratelimit()) +			pr_info("nf_tables_ipv6: ignoring short SOCK_RAW " +				"packet\n"); +		return NF_ACCEPT; +	} + +	return nft_do_chain_ipv6(ops, skb, in, out, okfn); +} + +struct nft_af_info nft_af_ipv6 __read_mostly = { +	.family		= NFPROTO_IPV6, +	.nhooks		= NF_INET_NUMHOOKS, +	.owner		= THIS_MODULE, +	.nops		= 1, +	.hooks		= { +		[NF_INET_LOCAL_IN]	= nft_do_chain_ipv6, +		[NF_INET_LOCAL_OUT]	= nft_ipv6_output, +		[NF_INET_FORWARD]	= nft_do_chain_ipv6, +		[NF_INET_PRE_ROUTING]	= nft_do_chain_ipv6, +		[NF_INET_POST_ROUTING]	= nft_do_chain_ipv6, +	}, +}; +EXPORT_SYMBOL_GPL(nft_af_ipv6); + +static int nf_tables_ipv6_init_net(struct net *net) +{ +	net->nft.ipv6 = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL); +	if (net->nft.ipv6 == NULL) +		return -ENOMEM; + +	memcpy(net->nft.ipv6, &nft_af_ipv6, sizeof(nft_af_ipv6)); + +	if (nft_register_afinfo(net, net->nft.ipv6) < 0) +		goto err; + +	return 0; +err: +	kfree(net->nft.ipv6); +	return -ENOMEM; +} + +static void nf_tables_ipv6_exit_net(struct net *net) +{ +	nft_unregister_afinfo(net->nft.ipv6); +	kfree(net->nft.ipv6); +} + +static struct pernet_operations nf_tables_ipv6_net_ops = { +	.init	= nf_tables_ipv6_init_net, +	.exit	= nf_tables_ipv6_exit_net, +}; + +static const struct nf_chain_type filter_ipv6 = { +	.name		= "filter", +	.type		= NFT_CHAIN_T_DEFAULT, +	.family		= NFPROTO_IPV6, +	.owner		= THIS_MODULE, +	.hook_mask	= (1 << NF_INET_LOCAL_IN) | +			  (1 << NF_INET_LOCAL_OUT) | +			  (1 << NF_INET_FORWARD) | +			  (1 << NF_INET_PRE_ROUTING) | +			  (1 << NF_INET_POST_ROUTING), +}; + +static int __init nf_tables_ipv6_init(void) +{ +	int ret; + +	nft_register_chain_type(&filter_ipv6); +	ret = register_pernet_subsys(&nf_tables_ipv6_net_ops); +	if (ret < 0) +		nft_unregister_chain_type(&filter_ipv6); + +	return ret; +} + +static void __exit nf_tables_ipv6_exit(void) +{ +	unregister_pernet_subsys(&nf_tables_ipv6_net_ops); +	nft_unregister_chain_type(&filter_ipv6); +} + +module_init(nf_tables_ipv6_init); +module_exit(nf_tables_ipv6_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); +MODULE_ALIAS_NFT_FAMILY(AF_INET6); diff --git a/net/ipv6/netfilter/nft_chain_nat_ipv6.c b/net/ipv6/netfilter/nft_chain_nat_ipv6.c new file mode 100644 index 00000000000..d189fcb437f --- /dev/null +++ b/net/ipv6/netfilter/nft_chain_nat_ipv6.c @@ -0,0 +1,205 @@ +/* + * Copyright (c) 2011 Patrick McHardy <kaber@trash.net> + * Copyright (c) 2012 Intel Corporation + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/list.h> +#include <linux/skbuff.h> +#include <linux/ip.h> +#include <linux/netfilter.h> +#include <linux/netfilter_ipv6.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_nat.h> +#include <net/netfilter/nf_nat_core.h> +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables_ipv6.h> +#include <net/netfilter/nf_nat_l3proto.h> +#include <net/ipv6.h> + +/* + * IPv6 NAT chains + */ + +static unsigned int nf_nat_ipv6_fn(const struct nf_hook_ops *ops, +			      struct sk_buff *skb, +			      const struct net_device *in, +			      const struct net_device *out, +			      int (*okfn)(struct sk_buff *)) +{ +	enum ip_conntrack_info ctinfo; +	struct nf_conn *ct = nf_ct_get(skb, &ctinfo); +	struct nf_conn_nat *nat; +	enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum); +	__be16 frag_off; +	int hdrlen; +	u8 nexthdr; +	struct nft_pktinfo pkt; +	unsigned int ret; + +	if (ct == NULL || nf_ct_is_untracked(ct)) +		return NF_ACCEPT; + +	nat = nf_ct_nat_ext_add(ct); +	if (nat == NULL) +		return NF_ACCEPT; + +	switch (ctinfo) { +	case IP_CT_RELATED: +	case IP_CT_RELATED + IP_CT_IS_REPLY: +		nexthdr = ipv6_hdr(skb)->nexthdr; +		hdrlen = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), +					  &nexthdr, &frag_off); + +		if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) { +			if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo, +							   ops->hooknum, +							   hdrlen)) +				return NF_DROP; +			else +				return NF_ACCEPT; +		} +		/* Fall through */ +	case IP_CT_NEW: +		if (nf_nat_initialized(ct, maniptype)) +			break; + +		nft_set_pktinfo_ipv6(&pkt, ops, skb, in, out); + +		ret = nft_do_chain(&pkt, ops); +		if (ret != NF_ACCEPT) +			return ret; +		if (!nf_nat_initialized(ct, maniptype)) { +			ret = nf_nat_alloc_null_binding(ct, ops->hooknum); +			if (ret != NF_ACCEPT) +				return ret; +		} +	default: +		break; +	} + +	return nf_nat_packet(ct, ctinfo, ops->hooknum, skb); +} + +static unsigned int nf_nat_ipv6_prerouting(const struct nf_hook_ops *ops, +				      struct sk_buff *skb, +				      const struct net_device *in, +				      const struct net_device *out, +				      int (*okfn)(struct sk_buff *)) +{ +	struct in6_addr daddr = ipv6_hdr(skb)->daddr; +	unsigned int ret; + +	ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn); +	if (ret != NF_DROP && ret != NF_STOLEN && +	    ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr)) +		skb_dst_drop(skb); + +	return ret; +} + +static unsigned int nf_nat_ipv6_postrouting(const struct nf_hook_ops *ops, +				       struct sk_buff *skb, +				       const struct net_device *in, +				       const struct net_device *out, +				       int (*okfn)(struct sk_buff *)) +{ +	enum ip_conntrack_info ctinfo __maybe_unused; +	const struct nf_conn *ct __maybe_unused; +	unsigned int ret; + +	ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn); +#ifdef CONFIG_XFRM +	if (ret != NF_DROP && ret != NF_STOLEN && +	    !(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && +	    (ct = nf_ct_get(skb, &ctinfo)) != NULL) { +		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + +		if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3, +				      &ct->tuplehash[!dir].tuple.dst.u3) || +		    (ct->tuplehash[dir].tuple.src.u.all != +		     ct->tuplehash[!dir].tuple.dst.u.all)) +			if (nf_xfrm_me_harder(skb, AF_INET6) < 0) +				ret = NF_DROP; +	} +#endif +	return ret; +} + +static unsigned int nf_nat_ipv6_output(const struct nf_hook_ops *ops, +				  struct sk_buff *skb, +				  const struct net_device *in, +				  const struct net_device *out, +				  int (*okfn)(struct sk_buff *)) +{ +	enum ip_conntrack_info ctinfo; +	const struct nf_conn *ct; +	unsigned int ret; + +	ret = nf_nat_ipv6_fn(ops, skb, in, out, okfn); +	if (ret != NF_DROP && ret != NF_STOLEN && +	    (ct = nf_ct_get(skb, &ctinfo)) != NULL) { +		enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + +		if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3, +				      &ct->tuplehash[!dir].tuple.src.u3)) { +			if (ip6_route_me_harder(skb)) +				ret = NF_DROP; +		} +#ifdef CONFIG_XFRM +		else if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && +			 ct->tuplehash[dir].tuple.dst.u.all != +			 ct->tuplehash[!dir].tuple.src.u.all) +			if (nf_xfrm_me_harder(skb, AF_INET6)) +				ret = NF_DROP; +#endif +	} +	return ret; +} + +static const struct nf_chain_type nft_chain_nat_ipv6 = { +	.name		= "nat", +	.type		= NFT_CHAIN_T_NAT, +	.family		= NFPROTO_IPV6, +	.owner		= THIS_MODULE, +	.hook_mask	= (1 << NF_INET_PRE_ROUTING) | +			  (1 << NF_INET_POST_ROUTING) | +			  (1 << NF_INET_LOCAL_OUT) | +			  (1 << NF_INET_LOCAL_IN), +	.hooks		= { +		[NF_INET_PRE_ROUTING]	= nf_nat_ipv6_prerouting, +		[NF_INET_POST_ROUTING]	= nf_nat_ipv6_postrouting, +		[NF_INET_LOCAL_OUT]	= nf_nat_ipv6_output, +		[NF_INET_LOCAL_IN]	= nf_nat_ipv6_fn, +	}, +}; + +static int __init nft_chain_nat_ipv6_init(void) +{ +	int err; + +	err = nft_register_chain_type(&nft_chain_nat_ipv6); +	if (err < 0) +		return err; + +	return 0; +} + +static void __exit nft_chain_nat_ipv6_exit(void) +{ +	nft_unregister_chain_type(&nft_chain_nat_ipv6); +} + +module_init(nft_chain_nat_ipv6_init); +module_exit(nft_chain_nat_ipv6_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Tomasz Bursztyka <tomasz.bursztyka@linux.intel.com>"); +MODULE_ALIAS_NFT_CHAIN(AF_INET6, "nat"); diff --git a/net/ipv6/netfilter/nft_chain_route_ipv6.c b/net/ipv6/netfilter/nft_chain_route_ipv6.c new file mode 100644 index 00000000000..42031299585 --- /dev/null +++ b/net/ipv6/netfilter/nft_chain_route_ipv6.c @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2008 Patrick McHardy <kaber@trash.net> + * Copyright (c) 2012 Pablo Neira Ayuso <pablo@netfilter.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/list.h> +#include <linux/skbuff.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter_ipv6.h> +#include <linux/netfilter/nfnetlink.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables_ipv6.h> +#include <net/route.h> + +static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops, +					struct sk_buff *skb, +					const struct net_device *in, +					const struct net_device *out, +					int (*okfn)(struct sk_buff *)) +{ +	unsigned int ret; +	struct nft_pktinfo pkt; +	struct in6_addr saddr, daddr; +	u_int8_t hop_limit; +	u32 mark, flowlabel; + +	/* malformed packet, drop it */ +	if (nft_set_pktinfo_ipv6(&pkt, ops, skb, in, out) < 0) +		return NF_DROP; + +	/* save source/dest address, mark, hoplimit, flowlabel, priority */ +	memcpy(&saddr, &ipv6_hdr(skb)->saddr, sizeof(saddr)); +	memcpy(&daddr, &ipv6_hdr(skb)->daddr, sizeof(daddr)); +	mark = skb->mark; +	hop_limit = ipv6_hdr(skb)->hop_limit; + +	/* flowlabel and prio (includes version, which shouldn't change either */ +	flowlabel = *((u32 *)ipv6_hdr(skb)); + +	ret = nft_do_chain(&pkt, ops); +	if (ret != NF_DROP && ret != NF_QUEUE && +	    (memcmp(&ipv6_hdr(skb)->saddr, &saddr, sizeof(saddr)) || +	     memcmp(&ipv6_hdr(skb)->daddr, &daddr, sizeof(daddr)) || +	     skb->mark != mark || +	     ipv6_hdr(skb)->hop_limit != hop_limit || +	     flowlabel != *((u_int32_t *)ipv6_hdr(skb)))) +		return ip6_route_me_harder(skb) == 0 ? ret : NF_DROP; + +	return ret; +} + +static const struct nf_chain_type nft_chain_route_ipv6 = { +	.name		= "route", +	.type		= NFT_CHAIN_T_ROUTE, +	.family		= NFPROTO_IPV6, +        .owner		= THIS_MODULE, +	.hook_mask	= (1 << NF_INET_LOCAL_OUT), +	.hooks		= { +                [NF_INET_LOCAL_OUT]	= nf_route_table_hook, +        }, +}; + +static int __init nft_chain_route_init(void) +{ +	return nft_register_chain_type(&nft_chain_route_ipv6); +} + +static void __exit nft_chain_route_exit(void) +{ +	nft_unregister_chain_type(&nft_chain_route_ipv6); +} + +module_init(nft_chain_route_init); +module_exit(nft_chain_route_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); +MODULE_ALIAS_NFT_CHAIN(AF_INET6, "route"); diff --git a/net/ipv6/netfilter/nft_reject_ipv6.c b/net/ipv6/netfilter/nft_reject_ipv6.c new file mode 100644 index 00000000000..0bc19fa8782 --- /dev/null +++ b/net/ipv6/netfilter/nft_reject_ipv6.c @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net> + * Copyright (c) 2013 Eric Leblond <eric@regit.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nft_reject.h> +#include <net/netfilter/ipv6/nf_reject.h> + +void nft_reject_ipv6_eval(const struct nft_expr *expr, +			  struct nft_data data[NFT_REG_MAX + 1], +			  const struct nft_pktinfo *pkt) +{ +	struct nft_reject *priv = nft_expr_priv(expr); +	struct net *net = dev_net((pkt->in != NULL) ? pkt->in : pkt->out); + +	switch (priv->type) { +	case NFT_REJECT_ICMP_UNREACH: +		nf_send_unreach6(net, pkt->skb, priv->icmp_code, +				 pkt->ops->hooknum); +		break; +	case NFT_REJECT_TCP_RST: +		nf_send_reset6(net, pkt->skb, pkt->ops->hooknum); +		break; +	} + +	data[NFT_REG_VERDICT].verdict = NF_DROP; +} +EXPORT_SYMBOL_GPL(nft_reject_ipv6_eval); + +static struct nft_expr_type nft_reject_ipv6_type; +static const struct nft_expr_ops nft_reject_ipv6_ops = { +	.type		= &nft_reject_ipv6_type, +	.size		= NFT_EXPR_SIZE(sizeof(struct nft_reject)), +	.eval		= nft_reject_ipv6_eval, +	.init		= nft_reject_init, +	.dump		= nft_reject_dump, +}; + +static struct nft_expr_type nft_reject_ipv6_type __read_mostly = { +	.family		= NFPROTO_IPV6, +	.name		= "reject", +	.ops		= &nft_reject_ipv6_ops, +	.policy		= nft_reject_policy, +	.maxattr	= NFTA_REJECT_MAX, +	.owner		= THIS_MODULE, +}; + +static int __init nft_reject_ipv6_module_init(void) +{ +	return nft_register_expr(&nft_reject_ipv6_type); +} + +static void __exit nft_reject_ipv6_module_exit(void) +{ +	nft_unregister_expr(&nft_reject_ipv6_type); +} + +module_init(nft_reject_ipv6_module_init); +module_exit(nft_reject_ipv6_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); +MODULE_ALIAS_NFT_AF_EXPR(AF_INET6, "reject"); diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index 827f795209c..5ec867e4a8b 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -6,35 +6,7 @@  #include <net/ipv6.h>  #include <net/ip6_fib.h>  #include <net/addrconf.h> - -void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt) -{ -	static atomic_t ipv6_fragmentation_id; -	int old, new; - -#if IS_ENABLED(CONFIG_IPV6) -	if (rt && !(rt->dst.flags & DST_NOPEER)) { -		struct inet_peer *peer; -		struct net *net; - -		net = dev_net(rt->dst.dev); -		peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1); -		if (peer) { -			fhdr->identification = htonl(inet_getid(peer, 0)); -			inet_putpeer(peer); -			return; -		} -	} -#endif -	do { -		old = atomic_read(&ipv6_fragmentation_id); -		new = old + 1; -		if (!new) -			new = 1; -	} while (atomic_cmpxchg(&ipv6_fragmentation_id, old, new) != old); -	fhdr->identification = htonl(new); -} -EXPORT_SYMBOL(ipv6_select_ident); +#include <net/secure_seq.h>  int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)  { @@ -106,6 +78,7 @@ int __ip6_local_out(struct sk_buff *skb)  	if (len > IPV6_MAXPLEN)  		len = 0;  	ipv6_hdr(skb)->payload_len = htons(len); +	IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);  	return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,  		       skb_dst(skb)->dev, dst_output); diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 18f19df4189..5b7a1ed2aba 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -31,7 +31,7 @@ struct proto pingv6_prot = {  	.owner =	THIS_MODULE,  	.init =		ping_init_sock,  	.close =	ping_close, -	.connect =	ip6_datagram_connect, +	.connect =	ip6_datagram_connect_v6_only,  	.disconnect =	udp_disconnect,  	.setsockopt =	ipv6_setsockopt,  	.getsockopt =	ipv6_getsockopt, @@ -51,20 +51,19 @@ static struct inet_protosw pingv6_protosw = {  	.protocol =  IPPROTO_ICMPV6,  	.prot =      &pingv6_prot,  	.ops =       &inet6_dgram_ops, -	.no_check =  UDP_CSUM_DEFAULT,  	.flags =     INET_PROTOSW_REUSE,  };  /* Compatibility glue so we can support IPv6 when it's compiled as a module */ -static int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len) +static int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, +				 int *addr_len)  {  	return -EAFNOSUPPORT;  } -static int dummy_ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg, +static void dummy_ip6_datagram_recv_ctl(struct sock *sk, struct msghdr *msg,  				       struct sk_buff *skb)  { -	return -EAFNOSUPPORT;  }  static int dummy_icmpv6_err_convert(u8 type, u8 code, int *err)  { @@ -102,7 +101,7 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,  		return err;  	if (msg->msg_name) { -		struct sockaddr_in6 *u = (struct sockaddr_in6 *) msg->msg_name; +		DECLARE_SOCKADDR(struct sockaddr_in6 *, u, msg->msg_name);  		if (msg->msg_namelen < sizeof(struct sockaddr_in6) ||  		    u->sin6_family != AF_INET6) {  			return -EINVAL; @@ -116,7 +115,7 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,  	} else {  		if (sk->sk_state != TCP_ESTABLISHED)  			return -EDESTADDRREQ; -		daddr = &np->daddr; +		daddr = &sk->sk_v6_daddr;  	}  	if (!iif) @@ -135,6 +134,7 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,  	fl6.flowi6_proto = IPPROTO_ICMPV6;  	fl6.saddr = np->saddr;  	fl6.daddr = *daddr; +	fl6.flowi6_mark = sk->sk_mark;  	fl6.fl6_icmp_type = user_icmph.icmp6_type;  	fl6.fl6_icmp_code = user_icmph.icmp6_code;  	security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); @@ -144,7 +144,7 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,  	else if (!fl6.flowi6_oif)  		fl6.flowi6_oif = np->ucast_oif; -	dst = ip6_sk_dst_lookup_flow(sk, &fl6,  daddr, 1); +	dst = ip6_sk_dst_lookup_flow(sk, &fl6,  daddr);  	if (IS_ERR(dst))  		return PTR_ERR(dst);  	rt = (struct rt6_info *) dst; @@ -167,12 +167,7 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,  	pfh.wcheck = 0;  	pfh.family = AF_INET6; -	if (ipv6_addr_is_multicast(&fl6.daddr)) -		hlimit = np->mcast_hops; -	else -		hlimit = np->hop_limit; -	if (hlimit < 0) -		hlimit = ip6_dst_hoplimit(dst); +	hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);  	lock_sock(sk);  	err = ip6_append_data(sk, ping_getfrag, &pfh, len, @@ -181,8 +176,8 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,  			      MSG_DONTWAIT, np->dontfrag);  	if (err) { -		ICMP6_INC_STATS_BH(sock_net(sk), rt->rt6i_idev, -				   ICMP6_MIB_OUTERRORS); +		ICMP6_INC_STATS(sock_net(sk), rt->rt6i_idev, +				ICMP6_MIB_OUTERRORS);  		ip6_flush_pending_frames(sk);  	} else {  		err = icmpv6_push_pending_frames(sk, &fl6, @@ -253,7 +248,9 @@ int __init pingv6_init(void)  		return ret;  #endif  	pingv6_ops.ipv6_recv_error = ipv6_recv_error; -	pingv6_ops.ip6_datagram_recv_ctl = ip6_datagram_recv_ctl; +	pingv6_ops.ip6_datagram_recv_common_ctl = ip6_datagram_recv_common_ctl; +	pingv6_ops.ip6_datagram_recv_specific_ctl = +		ip6_datagram_recv_specific_ctl;  	pingv6_ops.icmpv6_err_convert = icmpv6_err_convert;  	pingv6_ops.ipv6_icmp_error = ipv6_icmp_error;  	pingv6_ops.ipv6_chk_addr = ipv6_chk_addr; @@ -266,7 +263,8 @@ int __init pingv6_init(void)  void pingv6_exit(void)  {  	pingv6_ops.ipv6_recv_error = dummy_ipv6_recv_error; -	pingv6_ops.ip6_datagram_recv_ctl = dummy_ip6_datagram_recv_ctl; +	pingv6_ops.ip6_datagram_recv_common_ctl = dummy_ip6_datagram_recv_ctl; +	pingv6_ops.ip6_datagram_recv_specific_ctl = dummy_ip6_datagram_recv_ctl;  	pingv6_ops.icmpv6_err_convert = dummy_icmpv6_err_convert;  	pingv6_ops.ipv6_icmp_error = dummy_ipv6_icmp_error;  	pingv6_ops.ipv6_chk_addr = dummy_ipv6_chk_addr; diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index 091d066a57b..3317440ea34 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -186,7 +186,7 @@ static void snmp6_seq_show_icmpv6msg(struct seq_file *seq, atomic_long_t *smib)  /* can be called either with percpu mib (pcpumib != NULL),   * or shared one (smib != NULL)   */ -static void snmp6_seq_show_item(struct seq_file *seq, void __percpu **pcpumib, +static void snmp6_seq_show_item(struct seq_file *seq, void __percpu *pcpumib,  				atomic_long_t *smib,  				const struct snmp_mib *itemlist)  { @@ -201,7 +201,7 @@ static void snmp6_seq_show_item(struct seq_file *seq, void __percpu **pcpumib,  	}  } -static void snmp6_seq_show_item64(struct seq_file *seq, void __percpu **mib, +static void snmp6_seq_show_item64(struct seq_file *seq, void __percpu *mib,  				  const struct snmp_mib *itemlist, size_t syncpoff)  {  	int i; @@ -215,14 +215,14 @@ static int snmp6_seq_show(struct seq_file *seq, void *v)  {  	struct net *net = (struct net *)seq->private; -	snmp6_seq_show_item64(seq, (void __percpu **)net->mib.ipv6_statistics, +	snmp6_seq_show_item64(seq, net->mib.ipv6_statistics,  			    snmp6_ipstats_list, offsetof(struct ipstats_mib, syncp)); -	snmp6_seq_show_item(seq, (void __percpu **)net->mib.icmpv6_statistics, +	snmp6_seq_show_item(seq, net->mib.icmpv6_statistics,  			    NULL, snmp6_icmp6_list);  	snmp6_seq_show_icmpv6msg(seq, net->mib.icmpv6msg_statistics->mibs); -	snmp6_seq_show_item(seq, (void __percpu **)net->mib.udp_stats_in6, +	snmp6_seq_show_item(seq, net->mib.udp_stats_in6,  			    NULL, snmp6_udp6_list); -	snmp6_seq_show_item(seq, (void __percpu **)net->mib.udplite_stats_in6, +	snmp6_seq_show_item(seq, net->mib.udplite_stats_in6,  			    NULL, snmp6_udplite6_list);  	return 0;  } @@ -245,7 +245,7 @@ static int snmp6_dev_seq_show(struct seq_file *seq, void *v)  	struct inet6_dev *idev = (struct inet6_dev *)seq->private;  	seq_printf(seq, "%-32s\t%u\n", "ifIndex", idev->dev->ifindex); -	snmp6_seq_show_item64(seq, (void __percpu **)idev->stats.ipv6, +	snmp6_seq_show_item64(seq, idev->stats.ipv6,  			    snmp6_ipstats_list, offsetof(struct ipstats_mib, syncp));  	snmp6_seq_show_item(seq, NULL, idev->stats.icmpv6dev->mibs,  			    snmp6_icmp6_list); diff --git a/net/ipv6/protocol.c b/net/ipv6/protocol.c index 22d1bd4670d..e048cf1bb6a 100644 --- a/net/ipv6/protocol.c +++ b/net/ipv6/protocol.c @@ -36,10 +36,6 @@ int inet6_add_protocol(const struct inet6_protocol *prot, unsigned char protocol  }  EXPORT_SYMBOL(inet6_add_protocol); -/* - *	Remove a protocol from the hash tables. - */ -  int inet6_del_protocol(const struct inet6_protocol *prot, unsigned char protocol)  {  	int ret; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index a4ed2416399..b2dc60b0c76 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -77,20 +77,19 @@ static struct sock *__raw_v6_lookup(struct net *net, struct sock *sk,  	sk_for_each_from(sk)  		if (inet_sk(sk)->inet_num == num) { -			struct ipv6_pinfo *np = inet6_sk(sk);  			if (!net_eq(sock_net(sk), net))  				continue; -			if (!ipv6_addr_any(&np->daddr) && -			    !ipv6_addr_equal(&np->daddr, rmt_addr)) +			if (!ipv6_addr_any(&sk->sk_v6_daddr) && +			    !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr))  				continue;  			if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)  				continue; -			if (!ipv6_addr_any(&np->rcv_saddr)) { -				if (ipv6_addr_equal(&np->rcv_saddr, loc_addr)) +			if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) { +				if (ipv6_addr_equal(&sk->sk_v6_rcv_saddr, loc_addr))  					goto found;  				if (is_multicast &&  				    inet6_mc_check(sk, loc_addr, rmt_addr)) @@ -251,6 +250,10 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)  	if (addr_len < SIN6_LEN_RFC2133)  		return -EINVAL; + +	if (addr->sin6_family != AF_INET6) +		return -EINVAL; +  	addr_type = ipv6_addr_type(&addr->sin6_addr);  	/* Raw sockets are IPv6 only */ @@ -302,7 +305,7 @@ static int rawv6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len)  	}  	inet->inet_rcv_saddr = inet->inet_saddr = v4addr; -	np->rcv_saddr = addr->sin6_addr; +	sk->sk_v6_rcv_saddr = addr->sin6_addr;  	if (!(addr_type & IPV6_ADDR_MULTICAST))  		np->saddr = addr->sin6_addr;  	err = 0; @@ -458,7 +461,7 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk,  		  int noblock, int flags, int *addr_len)  {  	struct ipv6_pinfo *np = inet6_sk(sk); -	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)msg->msg_name; +	DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name);  	struct sk_buff *skb;  	size_t copied;  	int err; @@ -466,14 +469,11 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk,  	if (flags & MSG_OOB)  		return -EOPNOTSUPP; -	if (addr_len) -		*addr_len=sizeof(*sin6); -  	if (flags & MSG_ERRQUEUE) -		return ipv6_recv_error(sk, msg, len); +		return ipv6_recv_error(sk, msg, len, addr_len);  	if (np->rxpmtu && np->rxopt.bits.rxpmtu) -		return ipv6_recv_rxpmtu(sk, msg, len); +		return ipv6_recv_rxpmtu(sk, msg, len, addr_len);  	skb = skb_recv_datagram(sk, flags, noblock, &err);  	if (!skb) @@ -507,6 +507,7 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk,  		sin6->sin6_flowinfo = 0;  		sin6->sin6_scope_id = ipv6_iface_scope_id(&sin6->sin6_addr,  							  IP6CB(skb)->iif); +		*addr_len = sizeof(*sin6);  	}  	sock_recv_ts_and_drops(msg, sk, skb); @@ -737,7 +738,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,  		   struct msghdr *msg, size_t len)  {  	struct ipv6_txoptions opt_space; -	struct sockaddr_in6 * sin6 = (struct sockaddr_in6 *) msg->msg_name; +	DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name);  	struct in6_addr *daddr, *final_p, final;  	struct inet_sock *inet = inet_sk(sk);  	struct ipv6_pinfo *np = inet6_sk(sk); @@ -795,7 +796,6 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,  				flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);  				if (flowlabel == NULL)  					return -EINVAL; -				daddr = &flowlabel->dst;  			}  		} @@ -804,8 +804,8 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,  		 * sk->sk_dst_cache.  		 */  		if (sk->sk_state == TCP_ESTABLISHED && -		    ipv6_addr_equal(daddr, &np->daddr)) -			daddr = &np->daddr; +		    ipv6_addr_equal(daddr, &sk->sk_v6_daddr)) +			daddr = &sk->sk_v6_daddr;  		if (addr_len >= sizeof(struct sockaddr_in6) &&  		    sin6->sin6_scope_id && @@ -816,7 +816,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,  			return -EDESTADDRREQ;  		proto = inet->inet_num; -		daddr = &np->daddr; +		daddr = &sk->sk_v6_daddr;  		fl6.flowlabel = np->flow_label;  	} @@ -868,19 +868,13 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,  		fl6.flowi6_oif = np->ucast_oif;  	security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); -	dst = ip6_dst_lookup_flow(sk, &fl6, final_p, true); +	dst = ip6_dst_lookup_flow(sk, &fl6, final_p);  	if (IS_ERR(dst)) {  		err = PTR_ERR(dst);  		goto out;  	} -	if (hlimit < 0) { -		if (ipv6_addr_is_multicast(&fl6.daddr)) -			hlimit = np->mcast_hops; -		else -			hlimit = np->hop_limit; -		if (hlimit < 0) -			hlimit = ip6_dst_hoplimit(dst); -	} +	if (hlimit < 0) +		hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);  	if (tclass < 0)  		tclass = np->tclass; @@ -1213,7 +1207,7 @@ struct proto rawv6_prot = {  	.owner		   = THIS_MODULE,  	.close		   = rawv6_close,  	.destroy	   = raw6_destroy, -	.connect	   = ip6_datagram_connect, +	.connect	   = ip6_datagram_connect_v6_only,  	.disconnect	   = udp_disconnect,  	.ioctl		   = rawv6_ioctl,  	.init		   = rawv6_init_sk, @@ -1328,7 +1322,6 @@ static struct inet_protosw rawv6_protosw = {  	.protocol	= IPPROTO_IP,	/* wild card */  	.prot		= &rawv6_prot,  	.ops		= &inet6_sockraw_ops, -	.no_check	= UDP_CSUM_DEFAULT,  	.flags		= INET_PROTOSW_REUSE,  }; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 1aeb473b2cc..cc85a9ba501 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -82,24 +82,24 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev,   * callers should be careful not to use the hash value outside the ipfrag_lock   * as doing so could race with ipfrag_hash_rnd being recalculated.   */ -unsigned int inet6_hash_frag(__be32 id, const struct in6_addr *saddr, -			     const struct in6_addr *daddr, u32 rnd) +static unsigned int inet6_hash_frag(__be32 id, const struct in6_addr *saddr, +				    const struct in6_addr *daddr)  {  	u32 c; +	net_get_random_once(&ip6_frags.rnd, sizeof(ip6_frags.rnd));  	c = jhash_3words(ipv6_addr_hash(saddr), ipv6_addr_hash(daddr), -			 (__force u32)id, rnd); +			 (__force u32)id, ip6_frags.rnd);  	return c & (INETFRAGS_HASHSZ - 1);  } -EXPORT_SYMBOL_GPL(inet6_hash_frag);  static unsigned int ip6_hashfn(struct inet_frag_queue *q)  {  	struct frag_queue *fq;  	fq = container_of(q, struct frag_queue, q); -	return inet6_hash_frag(fq->id, &fq->saddr, &fq->daddr, ip6_frags.rnd); +	return inet6_hash_frag(fq->id, &fq->saddr, &fq->daddr);  }  bool ip6_frag_match(struct inet_frag_queue *q, void *a) @@ -193,7 +193,7 @@ fq_find(struct net *net, __be32 id, const struct in6_addr *src,  	arg.ecn = ecn;  	read_lock(&ip6_frags.lock); -	hash = inet6_hash_frag(id, src, dst, ip6_frags.rnd); +	hash = inet6_hash_frag(id, src, dst);  	q = inet_frag_find(&net->ipv6.frags, &ip6_frags, &arg, hash);  	if (IS_ERR_OR_NULL(q)) { diff --git a/net/ipv6/route.c b/net/ipv6/route.c index c979dd96d82..f23fbd28a50 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -66,8 +66,9 @@  #endif  enum rt6_nud_state { -	RT6_NUD_FAIL_HARD = -2, -	RT6_NUD_FAIL_SOFT = -1, +	RT6_NUD_FAIL_HARD = -3, +	RT6_NUD_FAIL_PROBE = -2, +	RT6_NUD_FAIL_DO_RR = -1,  	RT6_NUD_SUCCEED = 1  }; @@ -83,7 +84,9 @@ static void		ip6_dst_ifdown(struct dst_entry *,  static int		 ip6_dst_gc(struct dst_ops *ops);  static int		ip6_pkt_discard(struct sk_buff *skb); -static int		ip6_pkt_discard_out(struct sk_buff *skb); +static int		ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb); +static int		ip6_pkt_prohibit(struct sk_buff *skb); +static int		ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb);  static void		ip6_link_failure(struct sk_buff *skb);  static void		ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk,  					   struct sk_buff *skb, u32 mtu); @@ -101,6 +104,36 @@ static struct rt6_info *rt6_get_route_info(struct net *net,  					   const struct in6_addr *gwaddr, int ifindex);  #endif +static void rt6_bind_peer(struct rt6_info *rt, int create) +{ +	struct inet_peer_base *base; +	struct inet_peer *peer; + +	base = inetpeer_base_ptr(rt->_rt6i_peer); +	if (!base) +		return; + +	peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create); +	if (peer) { +		if (!rt6_set_peer(rt, peer)) +			inet_putpeer(peer); +	} +} + +static struct inet_peer *__rt6_get_peer(struct rt6_info *rt, int create) +{ +	if (rt6_has_peer(rt)) +		return rt6_peer_ptr(rt); + +	rt6_bind_peer(rt, create); +	return (rt6_has_peer(rt) ? rt6_peer_ptr(rt) : NULL); +} + +static struct inet_peer *rt6_get_peer_create(struct rt6_info *rt) +{ +	return __rt6_get_peer(rt, 1); +} +  static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)  {  	struct rt6_info *rt = (struct rt6_info *) dst; @@ -116,7 +149,8 @@ static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)  		unsigned long prev, new;  		p = peer->metrics; -		if (inet_metrics_new(peer)) +		if (inet_metrics_new(peer) || +		    (old & DST_METRICS_FORCE_OVERWRITE))  			memcpy(p, old_p, sizeof(u32) * RTAX_MAX);  		new = (unsigned long) p; @@ -234,9 +268,6 @@ static const struct rt6_info ip6_null_entry_template = {  #ifdef CONFIG_IPV6_MULTIPLE_TABLES -static int ip6_pkt_prohibit(struct sk_buff *skb); -static int ip6_pkt_prohibit_out(struct sk_buff *skb); -  static const struct rt6_info ip6_prohibit_entry_template = {  	.dst = {  		.__refcnt	= ATOMIC_INIT(1), @@ -259,7 +290,7 @@ static const struct rt6_info ip6_blk_hole_entry_template = {  		.obsolete	= DST_OBSOLETE_FORCE_CHK,  		.error		= -EINVAL,  		.input		= dst_discard, -		.output		= dst_discard, +		.output		= dst_discard_sk,  	},  	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),  	.rt6i_protocol  = RTPROT_KERNEL, @@ -312,22 +343,6 @@ static void ip6_dst_destroy(struct dst_entry *dst)  	}  } -void rt6_bind_peer(struct rt6_info *rt, int create) -{ -	struct inet_peer_base *base; -	struct inet_peer *peer; - -	base = inetpeer_base_ptr(rt->_rt6i_peer); -	if (!base) -		return; - -	peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create); -	if (peer) { -		if (!rt6_set_peer(rt, peer)) -			inet_putpeer(peer); -	} -} -  static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,  			   int how)  { @@ -359,12 +374,6 @@ static bool rt6_check_expired(const struct rt6_info *rt)  	return false;  } -static bool rt6_need_strict(const struct in6_addr *daddr) -{ -	return ipv6_addr_type(daddr) & -		(IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK); -} -  /* Multipath route selection:   *   Hash based function using packet header and flowlabel.   * Adapted from fib_info_hashfn() @@ -476,6 +485,24 @@ out:  }  #ifdef CONFIG_IPV6_ROUTER_PREF +struct __rt6_probe_work { +	struct work_struct work; +	struct in6_addr target; +	struct net_device *dev; +}; + +static void rt6_probe_deferred(struct work_struct *w) +{ +	struct in6_addr mcaddr; +	struct __rt6_probe_work *work = +		container_of(w, struct __rt6_probe_work, work); + +	addrconf_addr_solict_mult(&work->target, &mcaddr); +	ndisc_send_ns(work->dev, NULL, &work->target, &mcaddr, NULL); +	dev_put(work->dev); +	kfree(w); +} +  static void rt6_probe(struct rt6_info *rt)  {  	struct neighbour *neigh; @@ -499,17 +526,23 @@ static void rt6_probe(struct rt6_info *rt)  	if (!neigh ||  	    time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) { -		struct in6_addr mcaddr; -		struct in6_addr *target; +		struct __rt6_probe_work *work; + +		work = kmalloc(sizeof(*work), GFP_ATOMIC); -		if (neigh) { -			neigh->updated = jiffies; +		if (neigh && work) +			__neigh_set_probe_once(neigh); + +		if (neigh)  			write_unlock(&neigh->lock); -		} -		target = (struct in6_addr *)&rt->rt6i_gateway; -		addrconf_addr_solict_mult(target, &mcaddr); -		ndisc_send_ns(rt->dst.dev, NULL, target, &mcaddr, NULL); +		if (work) { +			INIT_WORK(&work->work, rt6_probe_deferred); +			work->target = rt->rt6i_gateway; +			dev_hold(rt->dst.dev); +			work->dev = rt->dst.dev; +			schedule_work(&work->work); +		}  	} else {  out:  		write_unlock(&neigh->lock); @@ -554,11 +587,13 @@ static inline enum rt6_nud_state rt6_check_neigh(struct rt6_info *rt)  #ifdef CONFIG_IPV6_ROUTER_PREF  		else if (!(neigh->nud_state & NUD_FAILED))  			ret = RT6_NUD_SUCCEED; +		else +			ret = RT6_NUD_FAIL_PROBE;  #endif  		read_unlock(&neigh->lock);  	} else {  		ret = IS_ENABLED(CONFIG_IPV6_ROUTER_PREF) ? -		      RT6_NUD_SUCCEED : RT6_NUD_FAIL_SOFT; +		      RT6_NUD_SUCCEED : RT6_NUD_FAIL_DO_RR;  	}  	rcu_read_unlock_bh(); @@ -595,16 +630,17 @@ static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,  		goto out;  	m = rt6_score_route(rt, oif, strict); -	if (m == RT6_NUD_FAIL_SOFT && !IS_ENABLED(CONFIG_IPV6_ROUTER_PREF)) { +	if (m == RT6_NUD_FAIL_DO_RR) {  		match_do_rr = true;  		m = 0; /* lowest valid score */ -	} else if (m < 0) { +	} else if (m == RT6_NUD_FAIL_HARD) {  		goto out;  	}  	if (strict & RT6_LOOKUP_F_REACHABLE)  		rt6_probe(rt); +	/* note that m can be RT6_NUD_FAIL_PROBE at this point */  	if (m > *mpri) {  		*do_rr = match_do_rr;  		*mpri = m; @@ -707,8 +743,11 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,  		prefix = &prefix_buf;  	} -	rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr, -				dev->ifindex); +	if (rinfo->prefix_len == 0) +		rt = rt6_get_dflt_router(gwaddr, dev); +	else +		rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, +					gwaddr, dev->ifindex);  	if (rt && !lifetime) {  		ip6_del_rt(rt); @@ -813,14 +852,15 @@ EXPORT_SYMBOL(rt6_lookup);     be destroyed.   */ -static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info) +static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info, +			struct nlattr *mx, int mx_len)  {  	int err;  	struct fib6_table *table;  	table = rt->rt6i_table;  	write_lock_bh(&table->tb6_lock); -	err = fib6_add(&table->tb6_root, rt, info); +	err = fib6_add(&table->tb6_root, rt, info, mx, mx_len);  	write_unlock_bh(&table->tb6_lock);  	return err; @@ -831,7 +871,7 @@ int ip6_ins_rt(struct rt6_info *rt)  	struct nl_info info = {  		.nl_net = dev_net(rt->dst.dev),  	}; -	return __ip6_ins_rt(rt, &info); +	return __ip6_ins_rt(rt, &info, NULL, 0);  }  static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, @@ -847,12 +887,9 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,  	rt = ip6_rt_copy(ort, daddr);  	if (rt) { -		if (!(rt->rt6i_flags & RTF_GATEWAY)) { -			if (ort->rt6i_dst.plen != 128 && -			    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr)) -				rt->rt6i_flags |= RTF_ANYCAST; -			rt->rt6i_gateway = *daddr; -		} +		if (ort->rt6i_dst.plen != 128 && +		    ipv6_addr_equal(&ort->rt6i_dst.addr, daddr)) +			rt->rt6i_flags |= RTF_ANYCAST;  		rt->rt6i_flags |= RTF_CACHE; @@ -1021,7 +1058,7 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori  		new->__use = 1;  		new->input = dst_discard; -		new->output = dst_discard; +		new->output = dst_discard_sk;  		if (dst_metrics_read_only(&ort->dst))  			new->_metrics = ort->dst._metrics; @@ -1064,10 +1101,13 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)  	if (rt->rt6i_genid != rt_genid_ipv6(dev_net(rt->dst.dev)))  		return NULL; -	if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) -		return dst; +	if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie)) +		return NULL; -	return NULL; +	if (rt6_check_expired(rt)) +		return NULL; + +	return dst;  }  static struct dst_entry *ip6_negative_advice(struct dst_entry *dst) @@ -1136,8 +1176,7 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu,  	memset(&fl6, 0, sizeof(fl6));  	fl6.flowi6_oif = oif; -	fl6.flowi6_mark = mark; -	fl6.flowi6_flags = 0; +	fl6.flowi6_mark = mark ? mark : IP6_REPLY_MARK(net, skb->mark);  	fl6.daddr = iph->daddr;  	fl6.saddr = iph->saddr;  	fl6.flowlabel = ip6_flowinfo(iph); @@ -1234,9 +1273,9 @@ void ip6_redirect(struct sk_buff *skb, struct net *net, int oif, u32 mark)  	struct flowi6 fl6;  	memset(&fl6, 0, sizeof(fl6)); +	fl6.flowi6_iif = LOOPBACK_IFINDEX;  	fl6.flowi6_oif = oif;  	fl6.flowi6_mark = mark; -	fl6.flowi6_flags = 0;  	fl6.daddr = iph->daddr;  	fl6.saddr = iph->saddr;  	fl6.flowlabel = ip6_flowinfo(iph); @@ -1256,9 +1295,9 @@ void ip6_redirect_no_header(struct sk_buff *skb, struct net *net, int oif,  	struct flowi6 fl6;  	memset(&fl6, 0, sizeof(fl6)); +	fl6.flowi6_iif = LOOPBACK_IFINDEX;  	fl6.flowi6_oif = oif;  	fl6.flowi6_mark = mark; -	fl6.flowi6_flags = 0;  	fl6.daddr = msg->dest;  	fl6.saddr = iph->daddr; @@ -1301,7 +1340,7 @@ static unsigned int ip6_mtu(const struct dst_entry *dst)  	unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);  	if (mtu) -		return mtu; +		goto out;  	mtu = IPV6_MIN_MTU; @@ -1311,7 +1350,8 @@ static unsigned int ip6_mtu(const struct dst_entry *dst)  		mtu = idev->cnf.mtu6;  	rcu_read_unlock(); -	return mtu; +out: +	return min_t(unsigned int, mtu, IP6_MAX_MTU);  }  static struct dst_entry *icmp6_dst_gc_list; @@ -1338,6 +1378,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev,  	rt->dst.flags |= DST_HOST;  	rt->dst.output  = ip6_output;  	atomic_set(&rt->dst.__refcnt, 1); +	rt->rt6i_gateway  = fl6->daddr;  	rt->rt6i_dst.addr = fl6->daddr;  	rt->rt6i_dst.plen = 128;  	rt->rt6i_idev     = idev; @@ -1414,7 +1455,7 @@ static int ip6_dst_gc(struct dst_ops *ops)  		goto out;  	net->ipv6.ip6_rt_gc_expire++; -	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, entries > rt_max_size); +	fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net, true);  	entries = dst_entries_get_slow(ops);  	if (entries < ops->gc_thresh)  		net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1; @@ -1471,7 +1512,7 @@ int ip6_route_add(struct fib6_config *cfg)  	if (!table)  		goto out; -	rt = ip6_dst_alloc(net, NULL, DST_NOCOUNT, table); +	rt = ip6_dst_alloc(net, NULL, (cfg->fc_flags & RTF_ADDRCONF) ? 0 : DST_NOCOUNT, table);  	if (!rt) {  		err = -ENOMEM; @@ -1501,17 +1542,11 @@ int ip6_route_add(struct fib6_config *cfg)  	ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);  	rt->rt6i_dst.plen = cfg->fc_dst_len; -	if (rt->rt6i_dst.plen == 128) -	       rt->dst.flags |= DST_HOST; - -	if (!(rt->dst.flags & DST_HOST) && cfg->fc_mx) { -		u32 *metrics = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL); -		if (!metrics) { -			err = -ENOMEM; -			goto out; -		} -		dst_init_metrics(&rt->dst, metrics, 0); +	if (rt->rt6i_dst.plen == 128) { +		rt->dst.flags |= DST_HOST; +		dst_metrics_set_force_overwrite(&rt->dst);  	} +  #ifdef CONFIG_IPV6_SUBTREES  	ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);  	rt->rt6i_src.plen = cfg->fc_src_len; @@ -1540,21 +1575,24 @@ int ip6_route_add(struct fib6_config *cfg)  				goto out;  			}  		} -		rt->dst.output = ip6_pkt_discard_out; -		rt->dst.input = ip6_pkt_discard;  		rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;  		switch (cfg->fc_type) {  		case RTN_BLACKHOLE:  			rt->dst.error = -EINVAL; +			rt->dst.output = dst_discard_sk; +			rt->dst.input = dst_discard;  			break;  		case RTN_PROHIBIT:  			rt->dst.error = -EACCES; +			rt->dst.output = ip6_pkt_prohibit_out; +			rt->dst.input = ip6_pkt_prohibit;  			break;  		case RTN_THROW: -			rt->dst.error = -EAGAIN; -			break;  		default: -			rt->dst.error = -ENETUNREACH; +			rt->dst.error = (cfg->fc_type == RTN_THROW) ? -EAGAIN +					: -ENETUNREACH; +			rt->dst.output = ip6_pkt_discard_out; +			rt->dst.input = ip6_pkt_discard;  			break;  		}  		goto install_route; @@ -1627,31 +1665,13 @@ int ip6_route_add(struct fib6_config *cfg)  	rt->rt6i_flags = cfg->fc_flags;  install_route: -	if (cfg->fc_mx) { -		struct nlattr *nla; -		int remaining; - -		nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { -			int type = nla_type(nla); - -			if (type) { -				if (type > RTAX_MAX) { -					err = -EINVAL; -					goto out; -				} - -				dst_metric_set(&rt->dst, type, nla_get_u32(nla)); -			} -		} -	} -  	rt->dst.dev = dev;  	rt->rt6i_idev = idev;  	rt->rt6i_table = table;  	cfg->fc_nlinfo.nl_net = dev_net(dev); -	return __ip6_ins_rt(rt, &cfg->fc_nlinfo); +	return __ip6_ins_rt(rt, &cfg->fc_nlinfo, cfg->fc_mx, cfg->fc_mx_len);  out:  	if (dev) @@ -1873,11 +1893,12 @@ static struct rt6_info *ip6_rt_copy(struct rt6_info *ort,  			in6_dev_hold(rt->rt6i_idev);  		rt->dst.lastuse = jiffies; -		rt->rt6i_gateway = ort->rt6i_gateway; +		if (ort->rt6i_flags & RTF_GATEWAY) +			rt->rt6i_gateway = ort->rt6i_gateway; +		else +			rt->rt6i_gateway = *dest;  		rt->rt6i_flags = ort->rt6i_flags; -		if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) == -		    (RTF_DEFAULT | RTF_ADDRCONF)) -			rt6_set_from(rt, ort); +		rt6_set_from(rt, ort);  		rt->rt6i_metric = 0;  #ifdef CONFIG_IPV6_SUBTREES @@ -2110,27 +2131,23 @@ static int ip6_pkt_discard(struct sk_buff *skb)  	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);  } -static int ip6_pkt_discard_out(struct sk_buff *skb) +static int ip6_pkt_discard_out(struct sock *sk, struct sk_buff *skb)  {  	skb->dev = skb_dst(skb)->dev;  	return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);  } -#ifdef CONFIG_IPV6_MULTIPLE_TABLES -  static int ip6_pkt_prohibit(struct sk_buff *skb)  {  	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);  } -static int ip6_pkt_prohibit_out(struct sk_buff *skb) +static int ip6_pkt_prohibit_out(struct sock *sk, struct sk_buff *skb)  {  	skb->dev = skb_dst(skb)->dev;  	return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);  } -#endif -  /*   *	Allocate a dst for local (unicast / anycast) address.   */ @@ -2140,12 +2157,10 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,  				    bool anycast)  {  	struct net *net = dev_net(idev->dev); -	struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, 0, NULL); - -	if (!rt) { -		net_warn_ratelimited("Maximum number of routes reached, consider increasing route/max_size\n"); +	struct rt6_info *rt = ip6_dst_alloc(net, net->loopback_dev, +					    DST_NOCOUNT, NULL); +	if (!rt)  		return ERR_PTR(-ENOMEM); -	}  	in6_dev_hold(idev); @@ -2160,6 +2175,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,  	else  		rt->rt6i_flags |= RTF_LOCAL; +	rt->rt6i_gateway  = *addr;  	rt->rt6i_dst.addr = *addr;  	rt->rt6i_dst.plen = 128;  	rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL); @@ -2215,7 +2231,28 @@ void rt6_remove_prefsrc(struct inet6_ifaddr *ifp)  		.net = net,  		.addr = &ifp->addr,  	}; -	fib6_clean_all(net, fib6_remove_prefsrc, 0, &adni); +	fib6_clean_all(net, fib6_remove_prefsrc, &adni); +} + +#define RTF_RA_ROUTER		(RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY) +#define RTF_CACHE_GATEWAY	(RTF_GATEWAY | RTF_CACHE) + +/* Remove routers and update dst entries when gateway turn into host. */ +static int fib6_clean_tohost(struct rt6_info *rt, void *arg) +{ +	struct in6_addr *gateway = (struct in6_addr *)arg; + +	if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) || +	     ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) && +	     ipv6_addr_equal(gateway, &rt->rt6i_gateway)) { +		return -1; +	} +	return 0; +} + +void rt6_clean_tohost(struct net *net, struct in6_addr *gateway) +{ +	fib6_clean_all(net, fib6_clean_tohost, gateway);  }  struct arg_dev_net { @@ -2242,7 +2279,7 @@ void rt6_ifdown(struct net *net, struct net_device *dev)  		.net = net,  	}; -	fib6_clean_all(net, fib6_ifdown, 0, &adn); +	fib6_clean_all(net, fib6_ifdown, &adn);  	icmp6_clean_all(fib6_ifdown, &adn);  } @@ -2297,7 +2334,7 @@ void rt6_mtu_change(struct net_device *dev, unsigned int mtu)  		.mtu = mtu,  	}; -	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, 0, &arg); +	fib6_clean_all(dev_net(dev), rt6_mtu_change_route, &arg);  }  static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { @@ -2693,6 +2730,9 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh)  	if (tb[RTA_OIF])  		oif = nla_get_u32(tb[RTA_OIF]); +	if (tb[RTA_MARK]) +		fl6.flowi6_mark = nla_get_u32(tb[RTA_MARK]); +  	if (iif) {  		struct net_device *dev;  		int flags = 0; @@ -2800,56 +2840,12 @@ static int ip6_route_dev_notify(struct notifier_block *this,  #ifdef CONFIG_PROC_FS -struct rt6_proc_arg -{ -	char *buffer; -	int offset; -	int length; -	int skip; -	int len; -}; - -static int rt6_info_route(struct rt6_info *rt, void *p_arg) -{ -	struct seq_file *m = p_arg; - -	seq_printf(m, "%pi6 %02x ", &rt->rt6i_dst.addr, rt->rt6i_dst.plen); - -#ifdef CONFIG_IPV6_SUBTREES -	seq_printf(m, "%pi6 %02x ", &rt->rt6i_src.addr, rt->rt6i_src.plen); -#else -	seq_puts(m, "00000000000000000000000000000000 00 "); -#endif -	if (rt->rt6i_flags & RTF_GATEWAY) { -		seq_printf(m, "%pi6", &rt->rt6i_gateway); -	} else { -		seq_puts(m, "00000000000000000000000000000000"); -	} -	seq_printf(m, " %08x %08x %08x %08x %8s\n", -		   rt->rt6i_metric, atomic_read(&rt->dst.__refcnt), -		   rt->dst.__use, rt->rt6i_flags, -		   rt->dst.dev ? rt->dst.dev->name : ""); -	return 0; -} - -static int ipv6_route_show(struct seq_file *m, void *v) -{ -	struct net *net = (struct net *)m->private; -	fib6_clean_all_ro(net, rt6_info_route, 0, m); -	return 0; -} - -static int ipv6_route_open(struct inode *inode, struct file *file) -{ -	return single_open_net(inode, file, ipv6_route_show); -} -  static const struct file_operations ipv6_route_proc_fops = {  	.owner		= THIS_MODULE,  	.open		= ipv6_route_open,  	.read		= seq_read,  	.llseek		= seq_lseek, -	.release	= single_release_net, +	.release	= seq_release_net,  };  static int rt6_stats_seq_show(struct seq_file *seq, void *v) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 19269453a8e..4f408176dc6 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -475,17 +475,48 @@ static void ipip6_tunnel_uninit(struct net_device *dev)  		ipip6_tunnel_unlink(sitn, tunnel);  		ipip6_tunnel_del_prl(tunnel, NULL);  	} +	ip_tunnel_dst_reset_all(tunnel);  	dev_put(dev);  } +/* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH + * if sufficient data bytes are available + */ +static int ipip6_err_gen_icmpv6_unreach(struct sk_buff *skb) +{ +	const struct iphdr *iph = (const struct iphdr *) skb->data; +	struct rt6_info *rt; +	struct sk_buff *skb2; + +	if (!pskb_may_pull(skb, iph->ihl * 4 + sizeof(struct ipv6hdr) + 8)) +		return 1; + +	skb2 = skb_clone(skb, GFP_ATOMIC); + +	if (!skb2) +		return 1; + +	skb_dst_drop(skb2); +	skb_pull(skb2, iph->ihl * 4); +	skb_reset_network_header(skb2); + +	rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0, 0); + +	if (rt && rt->dst.dev) +		skb2->dev = rt->dst.dev; + +	icmpv6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0); + +	if (rt) +		ip6_rt_put(rt); + +	kfree_skb(skb2); + +	return 0; +}  static int ipip6_err(struct sk_buff *skb, u32 info)  { - -/* All the routers (except for Linux) return only -   8 bytes of packet payload. It means, that precise relaying of -   ICMP in the real Internet is absolutely infeasible. - */  	const struct iphdr *iph = (const struct iphdr *)skb->data;  	const int type = icmp_hdr(skb)->type;  	const int code = icmp_hdr(skb)->code; @@ -500,7 +531,6 @@ static int ipip6_err(struct sk_buff *skb, u32 info)  	case ICMP_DEST_UNREACH:  		switch (code) {  		case ICMP_SR_FAILED: -		case ICMP_PORT_UNREACH:  			/* Impossible event. */  			return 0;  		default: @@ -530,12 +560,12 @@ static int ipip6_err(struct sk_buff *skb, u32 info)  	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {  		ipv4_update_pmtu(skb, dev_net(skb->dev), info, -				 t->dev->ifindex, 0, IPPROTO_IPV6, 0); +				 t->parms.link, 0, IPPROTO_IPV6, 0);  		err = 0;  		goto out;  	}  	if (type == ICMP_REDIRECT) { -		ipv4_redirect(skb, dev_net(skb->dev), t->dev->ifindex, 0, +		ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0,  			      IPPROTO_IPV6, 0);  		err = 0;  		goto out; @@ -545,6 +575,9 @@ static int ipip6_err(struct sk_buff *skb, u32 info)  		goto out;  	err = 0; +	if (!ipip6_err_gen_icmpv6_unreach(skb)) +		goto out; +  	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)  		goto out; @@ -639,7 +672,7 @@ static int ipip6_rcv(struct sk_buff *skb)  	tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev,  				     iph->saddr, iph->daddr);  	if (tunnel != NULL) { -		struct pcpu_tstats *tstats; +		struct pcpu_sw_netstats *tstats;  		if (tunnel->parms.iph.protocol != IPPROTO_IPV6 &&  		    tunnel->parms.iph.protocol != 0) @@ -670,8 +703,10 @@ static int ipip6_rcv(struct sk_buff *skb)  		}  		tstats = this_cpu_ptr(tunnel->dev->tstats); +		u64_stats_update_begin(&tstats->syncp);  		tstats->rx_packets++;  		tstats->rx_bytes += skb->len; +		u64_stats_update_end(&tstats->syncp);  		netif_rx(skb); @@ -892,7 +927,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,  		if (tunnel->parms.iph.daddr && skb_dst(skb))  			skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); -		if (skb->len > mtu) { +		if (skb->len > mtu && !skb_is_gso(skb)) {  			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);  			ip_rt_put(rt);  			goto tx_error; @@ -919,7 +954,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,  		if (!new_skb) {  			ip_rt_put(rt);  			dev->stats.tx_dropped++; -			dev_kfree_skb(skb); +			kfree_skb(skb);  			return NETDEV_TX_OK;  		}  		if (skb->sk) @@ -933,21 +968,24 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,  		ttl = iph6->hop_limit;  	tos = INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6)); -	if (likely(!skb->encapsulation)) { -		skb_reset_inner_headers(skb); -		skb->encapsulation = 1; +	skb = iptunnel_handle_offloads(skb, false, SKB_GSO_SIT); +	if (IS_ERR(skb)) { +		ip_rt_put(rt); +		goto out;  	} -	err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, IPPROTO_IPV6, tos, -			    ttl, df, !net_eq(tunnel->net, dev_net(dev))); +	err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr, +			    IPPROTO_IPV6, tos, ttl, df, +			    !net_eq(tunnel->net, dev_net(dev)));  	iptunnel_xmit_stats(err, &dev->stats, dev->tstats);  	return NETDEV_TX_OK;  tx_error_icmp:  	dst_link_failure(skb);  tx_error: +	kfree_skb(skb); +out:  	dev->stats.tx_errors++; -	dev_kfree_skb(skb);  	return NETDEV_TX_OK;  } @@ -956,13 +994,15 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)  	struct ip_tunnel *tunnel = netdev_priv(dev);  	const struct iphdr  *tiph = &tunnel->parms.iph; -	if (likely(!skb->encapsulation)) { -		skb_reset_inner_headers(skb); -		skb->encapsulation = 1; -	} +	skb = iptunnel_handle_offloads(skb, false, SKB_GSO_IPIP); +	if (IS_ERR(skb)) +		goto out;  	ip_tunnel_xmit(skb, dev, tiph, IPPROTO_IPIP);  	return NETDEV_TX_OK; +out: +	dev->stats.tx_errors++; +	return NETDEV_TX_OK;  }  static netdev_tx_t sit_tunnel_xmit(struct sk_buff *skb, @@ -983,7 +1023,7 @@ static netdev_tx_t sit_tunnel_xmit(struct sk_buff *skb,  tx_err:  	dev->stats.tx_errors++; -	dev_kfree_skb(skb); +	kfree_skb(skb);  	return NETDEV_TX_OK;  } @@ -1044,6 +1084,7 @@ static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p)  		t->parms.link = p->link;  		ipip6_tunnel_bind_dev(t->dev);  	} +	ip_tunnel_dst_reset_all(t);  	netdev_state_change(t->dev);  } @@ -1074,6 +1115,7 @@ static int ipip6_tunnel_update_6rd(struct ip_tunnel *t,  	t->ip6rd.relay_prefix = relay_prefix;  	t->ip6rd.prefixlen = ip6rd->prefixlen;  	t->ip6rd.relay_prefixlen = ip6rd->relay_prefixlen; +	ip_tunnel_dst_reset_all(t);  	netdev_state_change(t->dev);  	return 0;  } @@ -1085,8 +1127,8 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)  	int err = 0;  	struct ip_tunnel_parm p;  	struct ip_tunnel_prl prl; -	struct ip_tunnel *t; -	struct net *net = dev_net(dev); +	struct ip_tunnel *t = netdev_priv(dev); +	struct net *net = t->net;  	struct sit_net *sitn = net_generic(net, sit_net_id);  #ifdef CONFIG_IPV6_SIT_6RD  	struct ip_tunnel_6rd ip6rd; @@ -1097,16 +1139,15 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)  #ifdef CONFIG_IPV6_SIT_6RD  	case SIOCGET6RD:  #endif -		t = NULL;  		if (dev == sitn->fb_tunnel_dev) {  			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {  				err = -EFAULT;  				break;  			}  			t = ipip6_tunnel_locate(net, &p, 0); +			if (t == NULL) +				t = netdev_priv(dev);  		} -		if (t == NULL) -			t = netdev_priv(dev);  		err = -EFAULT;  		if (cmd == SIOCGETTUNNEL) { @@ -1202,9 +1243,6 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)  		err = -EINVAL;  		if (dev == sitn->fb_tunnel_dev)  			goto done; -		err = -ENOENT; -		if (!(t = netdev_priv(dev))) -			goto done;  		err = ipip6_tunnel_get_prl(t, ifr->ifr_ifru.ifru_data);  		break; @@ -1220,9 +1258,6 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)  		err = -EFAULT;  		if (copy_from_user(&prl, ifr->ifr_ifru.ifru_data, sizeof(prl)))  			goto done; -		err = -ENOENT; -		if (!(t = netdev_priv(dev))) -			goto done;  		switch (cmd) {  		case SIOCDELPRL: @@ -1233,6 +1268,7 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)  			err = ipip6_tunnel_add_prl(t, &prl, cmd == SIOCCHGPRL);  			break;  		} +		ip_tunnel_dst_reset_all(t);  		netdev_state_change(dev);  		break; @@ -1249,8 +1285,6 @@ ipip6_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)  				   sizeof(ip6rd)))  			goto done; -		t = netdev_priv(dev); -  		if (cmd != SIOCDEL6RD) {  			err = ipip6_tunnel_update_6rd(t, &ip6rd);  			if (err < 0) @@ -1288,10 +1322,19 @@ static const struct net_device_ops ipip6_netdev_ops = {  static void ipip6_dev_free(struct net_device *dev)  { +	struct ip_tunnel *tunnel = netdev_priv(dev); + +	free_percpu(tunnel->dst_cache);  	free_percpu(dev->tstats);  	free_netdev(dev);  } +#define SIT_FEATURES (NETIF_F_SG	   | \ +		      NETIF_F_FRAGLIST	   | \ +		      NETIF_F_HIGHDMA	   | \ +		      NETIF_F_GSO_SOFTWARE | \ +		      NETIF_F_HW_CSUM) +  static void ipip6_tunnel_setup(struct net_device *dev)  {  	dev->netdev_ops		= &ipip6_netdev_ops; @@ -1305,6 +1348,8 @@ static void ipip6_tunnel_setup(struct net_device *dev)  	dev->iflink		= 0;  	dev->addr_len		= 4;  	dev->features		|= NETIF_F_LLTX; +	dev->features		|= SIT_FEATURES; +	dev->hw_features	|= SIT_FEATURES;  }  static int ipip6_tunnel_init(struct net_device *dev) @@ -1318,10 +1363,16 @@ static int ipip6_tunnel_init(struct net_device *dev)  	memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);  	ipip6_tunnel_bind_dev(dev); -	dev->tstats = alloc_percpu(struct pcpu_tstats); +	dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);  	if (!dev->tstats)  		return -ENOMEM; +	tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst); +	if (!tunnel->dst_cache) { +		free_percpu(dev->tstats); +		return -ENOMEM; +	} +  	return 0;  } @@ -1341,9 +1392,16 @@ static int __net_init ipip6_fb_tunnel_init(struct net_device *dev)  	iph->ihl		= 5;  	iph->ttl		= 64; -	dev->tstats = alloc_percpu(struct pcpu_tstats); +	dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);  	if (!dev->tstats)  		return -ENOMEM; + +	tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst); +	if (!tunnel->dst_cache) { +		free_percpu(dev->tstats); +		return -ENOMEM; +	} +  	dev_hold(dev);  	rcu_assign_pointer(sitn->tunnels_wc[0], tunnel);  	return 0; @@ -1594,6 +1652,15 @@ static const struct nla_policy ipip6_policy[IFLA_IPTUN_MAX + 1] = {  #endif  }; +static void ipip6_dellink(struct net_device *dev, struct list_head *head) +{ +	struct net *net = dev_net(dev); +	struct sit_net *sitn = net_generic(net, sit_net_id); + +	if (dev != sitn->fb_tunnel_dev) +		unregister_netdevice_queue(dev, head); +} +  static struct rtnl_link_ops sit_link_ops __read_mostly = {  	.kind		= "sit",  	.maxtype	= IFLA_IPTUN_MAX, @@ -1605,6 +1672,7 @@ static struct rtnl_link_ops sit_link_ops __read_mostly = {  	.changelink	= ipip6_changelink,  	.get_size	= ipip6_get_size,  	.fill_info	= ipip6_fill_info, +	.dellink	= ipip6_dellink,  };  static struct xfrm_tunnel sit_handler __read_mostly = { @@ -1619,9 +1687,10 @@ static struct xfrm_tunnel ipip_handler __read_mostly = {  	.priority	=	2,  }; -static void __net_exit sit_destroy_tunnels(struct sit_net *sitn, struct list_head *head) +static void __net_exit sit_destroy_tunnels(struct net *net, +					   struct list_head *head)  { -	struct net *net = dev_net(sitn->fb_tunnel_dev); +	struct sit_net *sitn = net_generic(net, sit_net_id);  	struct net_device *dev, *aux;  	int prio; @@ -1696,11 +1765,10 @@ err_alloc_dev:  static void __net_exit sit_exit_net(struct net *net)  { -	struct sit_net *sitn = net_generic(net, sit_net_id);  	LIST_HEAD(list);  	rtnl_lock(); -	sit_destroy_tunnels(sitn, &list); +	sit_destroy_tunnels(net, &list);  	unregister_netdevice_many(&list);  	rtnl_unlock();  } @@ -1760,4 +1828,5 @@ xfrm_tunnel_failed:  module_init(sit_init);  module_exit(sit_cleanup);  MODULE_LICENSE("GPL"); +MODULE_ALIAS_RTNL_LINK("sit");  MODULE_ALIAS_NETDEV("sit0"); diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index bf63ac8a49b..a822b880689 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -24,26 +24,23 @@  #define COOKIEBITS 24	/* Upper bits store count */  #define COOKIEMASK (((__u32)1 << COOKIEBITS) - 1) -/* Table must be sorted. */ +static u32 syncookie6_secret[2][16-4+SHA_DIGEST_WORDS]; + +/* RFC 2460, Section 8.3: + * [ipv6 tcp] MSS must be computed as the maximum packet size minus 60 [..] + * + * Due to IPV6_MIN_MTU=1280 the lowest possible MSS is 1220, which allows + * using higher values than ipv4 tcp syncookies. + * The other values are chosen based on ethernet (1500 and 9k MTU), plus + * one that accounts for common encap (PPPoe) overhead. Table must be sorted. + */  static __u16 const msstab[] = { -	64, -	512, -	536, -	1280 - 60, +	1280 - 60, /* IPV6_MIN_MTU - 60 */  	1480 - 60,  	1500 - 60, -	4460 - 60,  	9000 - 60,  }; -/* - * This (misnamed) value is the age of syncookie which is permitted. - * Its ideal value should be dependent on TCP_TIMEOUT_INIT and - * sysctl_tcp_retries1. It's a rather complicated formula (exponential - * backoff) to compute at runtime so it's currently hardcoded here. - */ -#define COUNTER_TRIES 4 -  static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb,  					   struct request_sock *req,  					   struct dst_entry *dst) @@ -66,14 +63,18 @@ static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS],  static u32 cookie_hash(const struct in6_addr *saddr, const struct in6_addr *daddr,  		       __be16 sport, __be16 dport, u32 count, int c)  { -	__u32 *tmp = __get_cpu_var(ipv6_cookie_scratch); +	__u32 *tmp; + +	net_get_random_once(syncookie6_secret, sizeof(syncookie6_secret)); + +	tmp  = __get_cpu_var(ipv6_cookie_scratch);  	/*  	 * we have 320 bits of information to hash, copy in the remaining -	 * 192 bits required for sha_transform, from the syncookie_secret +	 * 192 bits required for sha_transform, from the syncookie6_secret  	 * and overwrite the digest with the secret  	 */ -	memcpy(tmp + 10, syncookie_secret[c], 44); +	memcpy(tmp + 10, syncookie6_secret[c], 44);  	memcpy(tmp, saddr, 16);  	memcpy(tmp + 4, daddr, 16);  	tmp[8] = ((__force u32)sport << 16) + (__force u32)dport; @@ -86,8 +87,9 @@ static u32 cookie_hash(const struct in6_addr *saddr, const struct in6_addr *dadd  static __u32 secure_tcp_syn_cookie(const struct in6_addr *saddr,  				   const struct in6_addr *daddr,  				   __be16 sport, __be16 dport, __u32 sseq, -				   __u32 count, __u32 data) +				   __u32 data)  { +	u32 count = tcp_cookie_time();  	return (cookie_hash(saddr, daddr, sport, dport, 0, 0) +  		sseq + (count << COOKIEBITS) +  		((cookie_hash(saddr, daddr, sport, dport, count, 1) + data) @@ -96,15 +98,14 @@ static __u32 secure_tcp_syn_cookie(const struct in6_addr *saddr,  static __u32 check_tcp_syn_cookie(__u32 cookie, const struct in6_addr *saddr,  				  const struct in6_addr *daddr, __be16 sport, -				  __be16 dport, __u32 sseq, __u32 count, -				  __u32 maxdiff) +				  __be16 dport, __u32 sseq)  { -	__u32 diff; +	__u32 diff, count = tcp_cookie_time();  	cookie -= cookie_hash(saddr, daddr, sport, dport, 0, 0) + sseq;  	diff = (count - (cookie >> COOKIEBITS)) & ((__u32) -1 >> COOKIEBITS); -	if (diff >= maxdiff) +	if (diff >= MAX_SYNCOOKIE_AGE)  		return (__u32)-1;  	return (cookie - @@ -125,8 +126,7 @@ u32 __cookie_v6_init_sequence(const struct ipv6hdr *iph,  	*mssp = msstab[mssind];  	return secure_tcp_syn_cookie(&iph->saddr, &iph->daddr, th->source, -				     th->dest, ntohl(th->seq), -				     jiffies / (HZ * 60), mssind); +				     th->dest, ntohl(th->seq), mssind);  }  EXPORT_SYMBOL_GPL(__cookie_v6_init_sequence); @@ -146,8 +146,7 @@ int __cookie_v6_check(const struct ipv6hdr *iph, const struct tcphdr *th,  {  	__u32 seq = ntohl(th->seq) - 1;  	__u32 mssind = check_tcp_syn_cookie(cookie, &iph->saddr, &iph->daddr, -					    th->source, th->dest, seq, -					    jiffies / (HZ * 60), COUNTER_TRIES); +					    th->source, th->dest, seq);  	return mssind < ARRAY_SIZE(msstab) ? msstab[mssind] : 0;  } @@ -157,7 +156,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)  {  	struct tcp_options_received tcp_opt;  	struct inet_request_sock *ireq; -	struct inet6_request_sock *ireq6;  	struct tcp_request_sock *treq;  	struct ipv6_pinfo *np = inet6_sk(sk);  	struct tcp_sock *tp = tcp_sk(sk); @@ -194,7 +192,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)  		goto out;  	ireq = inet_rsk(req); -	ireq6 = inet6_rsk(req);  	treq = tcp_rsk(req);  	treq->listener = NULL; @@ -202,22 +199,24 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)  		goto out_free;  	req->mss = mss; -	ireq->rmt_port = th->source; -	ireq->loc_port = th->dest; -	ireq6->rmt_addr = ipv6_hdr(skb)->saddr; -	ireq6->loc_addr = ipv6_hdr(skb)->daddr; +	ireq->ir_rmt_port = th->source; +	ireq->ir_num = ntohs(th->dest); +	ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; +	ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;  	if (ipv6_opt_accepted(sk, skb) ||  	    np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||  	    np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {  		atomic_inc(&skb->users); -		ireq6->pktopts = skb; +		ireq->pktopts = skb;  	} -	ireq6->iif = sk->sk_bound_dev_if; +	ireq->ir_iif = sk->sk_bound_dev_if;  	/* So that link locals have meaning */  	if (!sk->sk_bound_dev_if && -	    ipv6_addr_type(&ireq6->rmt_addr) & IPV6_ADDR_LINKLOCAL) -		ireq6->iif = inet6_iif(skb); +	    ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) +		ireq->ir_iif = inet6_iif(skb); + +	ireq->ir_mark = inet_request_mark(sk, skb);  	req->expires = 0UL;  	req->num_retrans = 0; @@ -241,16 +240,16 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb)  		struct flowi6 fl6;  		memset(&fl6, 0, sizeof(fl6));  		fl6.flowi6_proto = IPPROTO_TCP; -		fl6.daddr = ireq6->rmt_addr; +		fl6.daddr = ireq->ir_v6_rmt_addr;  		final_p = fl6_update_dst(&fl6, np->opt, &final); -		fl6.saddr = ireq6->loc_addr; +		fl6.saddr = ireq->ir_v6_loc_addr;  		fl6.flowi6_oif = sk->sk_bound_dev_if; -		fl6.flowi6_mark = sk->sk_mark; -		fl6.fl6_dport = inet_rsk(req)->rmt_port; +		fl6.flowi6_mark = ireq->ir_mark; +		fl6.fl6_dport = ireq->ir_rmt_port;  		fl6.fl6_sport = inet_sk(sk)->inet_sport;  		security_req_classify_flow(req, flowi6_to_flowi(&fl6)); -		dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false); +		dst = ip6_dst_lookup_flow(sk, &fl6, final_p);  		if (IS_ERR(dst))  			goto out_free;  	} diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 107b2f1d90a..058f3eca2e5 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -24,6 +24,27 @@ static struct ctl_table ipv6_table_template[] = {  		.mode		= 0644,  		.proc_handler	= proc_dointvec  	}, +	{ +		.procname	= "anycast_src_echo_reply", +		.data		= &init_net.ipv6.sysctl.anycast_src_echo_reply, +		.maxlen		= sizeof(int), +		.mode		= 0644, +		.proc_handler	= proc_dointvec +	}, +	{ +		.procname	= "flowlabel_consistency", +		.data		= &init_net.ipv6.sysctl.flowlabel_consistency, +		.maxlen		= sizeof(int), +		.mode		= 0644, +		.proc_handler	= proc_dointvec +	}, +	{ +		.procname	= "fwmark_reflect", +		.data		= &init_net.ipv6.sysctl.fwmark_reflect, +		.maxlen		= sizeof(int), +		.mode		= 0644, +		.proc_handler	= proc_dointvec +	},  	{ }  }; @@ -51,6 +72,8 @@ static int __net_init ipv6_sysctl_net_init(struct net *net)  	if (!ipv6_table)  		goto out;  	ipv6_table[0].data = &net->ipv6.sysctl.bindv6only; +	ipv6_table[1].data = &net->ipv6.sysctl.anycast_src_echo_reply; +	ipv6_table[2].data = &net->ipv6.sysctl.flowlabel_consistency;  	ipv6_route_table = ipv6_route_sysctl_init(net);  	if (!ipv6_route_table) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 5c71501fc91..229239ad96b 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -39,7 +39,7 @@  #include <linux/ipsec.h>  #include <linux/times.h>  #include <linux/slab.h> - +#include <linux/uaccess.h>  #include <linux/ipv6.h>  #include <linux/icmpv6.h>  #include <linux/random.h> @@ -65,8 +65,6 @@  #include <net/tcp_memcontrol.h>  #include <net/busy_poll.h> -#include <asm/uaccess.h> -  #include <linux/proc_fs.h>  #include <linux/seq_file.h> @@ -156,7 +154,6 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,  			flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);  			if (flowlabel == NULL)  				return -EINVAL; -			usin->sin6_addr = flowlabel->dst;  			fl6_sock_release(flowlabel);  		}  	} @@ -165,12 +162,12 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,  	 *	connect() to INADDR_ANY means loopback (BSD'ism).  	 */ -	if(ipv6_addr_any(&usin->sin6_addr)) +	if (ipv6_addr_any(&usin->sin6_addr))  		usin->sin6_addr.s6_addr[15] = 0x1;  	addr_type = ipv6_addr_type(&usin->sin6_addr); -	if(addr_type & IPV6_ADDR_MULTICAST) +	if (addr_type & IPV6_ADDR_MULTICAST)  		return -ENETUNREACH;  	if (addr_type&IPV6_ADDR_LINKLOCAL) { @@ -192,13 +189,13 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,  	}  	if (tp->rx_opt.ts_recent_stamp && -	    !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) { +	    !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {  		tp->rx_opt.ts_recent = 0;  		tp->rx_opt.ts_recent_stamp = 0;  		tp->write_seq = 0;  	} -	np->daddr = usin->sin6_addr; +	sk->sk_v6_daddr = usin->sin6_addr;  	np->flow_label = fl6.flowlabel;  	/* @@ -237,17 +234,17 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,  		} else {  			ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr);  			ipv6_addr_set_v4mapped(inet->inet_rcv_saddr, -					       &np->rcv_saddr); +					       &sk->sk_v6_rcv_saddr);  		}  		return err;  	} -	if (!ipv6_addr_any(&np->rcv_saddr)) -		saddr = &np->rcv_saddr; +	if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) +		saddr = &sk->sk_v6_rcv_saddr;  	fl6.flowi6_proto = IPPROTO_TCP; -	fl6.daddr = np->daddr; +	fl6.daddr = sk->sk_v6_daddr;  	fl6.saddr = saddr ? *saddr : np->saddr;  	fl6.flowi6_oif = sk->sk_bound_dev_if;  	fl6.flowi6_mark = sk->sk_mark; @@ -258,7 +255,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,  	security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); -	dst = ip6_dst_lookup_flow(sk, &fl6, final_p, true); +	dst = ip6_dst_lookup_flow(sk, &fl6, final_p);  	if (IS_ERR(dst)) {  		err = PTR_ERR(dst);  		goto failure; @@ -266,7 +263,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,  	if (saddr == NULL) {  		saddr = &fl6.saddr; -		np->rcv_saddr = *saddr; +		sk->sk_v6_rcv_saddr = *saddr;  	}  	/* set the source address */ @@ -279,7 +276,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,  	rt = (struct rt6_info *) dst;  	if (tcp_death_row.sysctl_tw_recycle &&  	    !tp->rx_opt.ts_recent_stamp && -	    ipv6_addr_equal(&rt->rt6i_dst.addr, &np->daddr)) +	    ipv6_addr_equal(&rt->rt6i_dst.addr, &sk->sk_v6_daddr))  		tcp_fetch_timewait_stamp(sk, dst);  	icsk->icsk_ext_hdr_len = 0; @@ -298,7 +295,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,  	if (!tp->write_seq && likely(!tp->repair))  		tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32, -							     np->daddr.s6_addr32, +							     sk->sk_v6_daddr.s6_addr32,  							     inet->inet_sport,  							     inet->inet_dport); @@ -337,13 +334,14 @@ static void tcp_v6_mtu_reduced(struct sock *sk)  static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,  		u8 type, u8 code, int offset, __be32 info)  { -	const struct ipv6hdr *hdr = (const struct ipv6hdr*)skb->data; +	const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;  	const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);  	struct ipv6_pinfo *np;  	struct sock *sk;  	int err;  	struct tcp_sock *tp; -	__u32 seq; +	struct request_sock *fastopen; +	__u32 seq, snd_una;  	struct net *net = dev_net(skb->dev);  	sk = inet6_lookup(net, &tcp_hashinfo, &hdr->daddr, @@ -374,8 +372,11 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,  	tp = tcp_sk(sk);  	seq = ntohl(th->seq); +	/* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */ +	fastopen = tp->fastopen_rsk; +	snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;  	if (sk->sk_state != TCP_LISTEN && -	    !between(seq, tp->snd_una, tp->snd_nxt)) { +	    !between(seq, snd_una, tp->snd_nxt)) {  		NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);  		goto out;  	} @@ -398,6 +399,9 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,  		if (sk->sk_state == TCP_LISTEN)  			goto out; +		if (!ip6_sk_accept_pmtu(sk)) +			goto out; +  		tp->mtu_info = ntohl(info);  		if (!sock_owned_by_user(sk))  			tcp_v6_mtu_reduced(sk); @@ -436,8 +440,13 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,  		goto out;  	case TCP_SYN_SENT: -	case TCP_SYN_RECV:  /* Cannot happen. -			       It can, it SYNs are crossed. --ANK */ +	case TCP_SYN_RECV: +		/* Only in fast or simultaneous open. If a fast open socket is +		 * is already accepted it is treated as a connected one below. +		 */ +		if (fastopen && fastopen->sk == NULL) +			break; +  		if (!sock_owned_by_user(sk)) {  			sk->sk_err = err;  			sk->sk_error_report(sk);		/* Wake people up to see the error (see connect in sock.c) */ @@ -463,23 +472,28 @@ out:  static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst,  			      struct flowi6 *fl6,  			      struct request_sock *req, -			      u16 queue_mapping) +			      u16 queue_mapping, +			      struct tcp_fastopen_cookie *foc)  { -	struct inet6_request_sock *treq = inet6_rsk(req); +	struct inet_request_sock *ireq = inet_rsk(req);  	struct ipv6_pinfo *np = inet6_sk(sk); -	struct sk_buff * skb; +	struct sk_buff *skb;  	int err = -ENOMEM;  	/* First, grab a route. */  	if (!dst && (dst = inet6_csk_route_req(sk, fl6, req)) == NULL)  		goto done; -	skb = tcp_make_synack(sk, dst, req, NULL); +	skb = tcp_make_synack(sk, dst, req, foc);  	if (skb) { -		__tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr); +		__tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr, +				    &ireq->ir_v6_rmt_addr); + +		fl6->daddr = ireq->ir_v6_rmt_addr; +		if (np->repflow && (ireq->pktopts != NULL)) +			fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts)); -		fl6->daddr = treq->rmt_addr;  		skb_set_queue_mapping(skb, queue_mapping);  		err = ip6_xmit(sk, skb, fl6, np->opt, np->tclass);  		err = net_xmit_eval(err); @@ -494,15 +508,17 @@ static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req)  	struct flowi6 fl6;  	int res; -	res = tcp_v6_send_synack(sk, NULL, &fl6, req, 0); -	if (!res) +	res = tcp_v6_send_synack(sk, NULL, &fl6, req, 0, NULL); +	if (!res) {  		TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); +		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); +	}  	return res;  }  static void tcp_v6_reqsk_destructor(struct request_sock *req)  { -	kfree_skb(inet6_rsk(req)->pktopts); +	kfree_skb(inet_rsk(req)->pktopts);  }  #ifdef CONFIG_TCP_MD5SIG @@ -515,17 +531,17 @@ static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,  static struct tcp_md5sig_key *tcp_v6_md5_lookup(struct sock *sk,  						struct sock *addr_sk)  { -	return tcp_v6_md5_do_lookup(sk, &inet6_sk(addr_sk)->daddr); +	return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr);  }  static struct tcp_md5sig_key *tcp_v6_reqsk_md5_lookup(struct sock *sk,  						      struct request_sock *req)  { -	return tcp_v6_md5_do_lookup(sk, &inet6_rsk(req)->rmt_addr); +	return tcp_v6_md5_do_lookup(sk, &inet_rsk(req)->ir_v6_rmt_addr);  } -static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval, -				  int optlen) +static int tcp_v6_parse_md5_keys(struct sock *sk, char __user *optval, +				 int optlen)  {  	struct tcp_md5sig cmd;  	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr; @@ -621,10 +637,10 @@ static int tcp_v6_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,  	if (sk) {  		saddr = &inet6_sk(sk)->saddr; -		daddr = &inet6_sk(sk)->daddr; +		daddr = &sk->sk_v6_daddr;  	} else if (req) { -		saddr = &inet6_rsk(req)->loc_addr; -		daddr = &inet6_rsk(req)->rmt_addr; +		saddr = &inet_rsk(req)->ir_v6_loc_addr; +		daddr = &inet_rsk(req)->ir_v6_rmt_addr;  	} else {  		const struct ipv6hdr *ip6h = ipv6_hdr(skb);  		saddr = &ip6h->saddr; @@ -709,7 +725,7 @@ struct request_sock_ops tcp6_request_sock_ops __read_mostly = {  	.send_ack	=	tcp_v6_reqsk_send_ack,  	.destructor	=	tcp_v6_reqsk_destructor,  	.send_reset	=	tcp_v6_send_reset, -	.syn_ack_timeout = 	tcp_syn_ack_timeout, +	.syn_ack_timeout =	tcp_syn_ack_timeout,  };  #ifdef CONFIG_TCP_MD5SIG @@ -720,8 +736,9 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {  #endif  static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win, -				 u32 tsval, u32 tsecr, -				 struct tcp_md5sig_key *key, int rst, u8 tclass) +				 u32 tsval, u32 tsecr, int oif, +				 struct tcp_md5sig_key *key, int rst, u8 tclass, +				 u32 label)  {  	const struct tcphdr *th = tcp_hdr(skb);  	struct tcphdr *t1; @@ -783,6 +800,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,  	memset(&fl6, 0, sizeof(fl6));  	fl6.daddr = ipv6_hdr(skb)->saddr;  	fl6.saddr = ipv6_hdr(skb)->daddr; +	fl6.flowlabel = label;  	buff->ip_summed = CHECKSUM_PARTIAL;  	buff->csum = 0; @@ -790,8 +808,11 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,  	__tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);  	fl6.flowi6_proto = IPPROTO_TCP; -	if (ipv6_addr_type(&fl6.daddr) & IPV6_ADDR_LINKLOCAL) +	if (rt6_need_strict(&fl6.daddr) && !oif)  		fl6.flowi6_oif = inet6_iif(skb); +	else +		fl6.flowi6_oif = oif; +	fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark);  	fl6.fl6_dport = t1->dest;  	fl6.fl6_sport = t1->source;  	security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); @@ -800,7 +821,7 @@ static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,  	 * Underlying function will use this to retrieve the network  	 * namespace  	 */ -	dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL, false); +	dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL);  	if (!IS_ERR(dst)) {  		skb_dst_set(buff, dst);  		ip6_xmit(ctl_sk, buff, &fl6, NULL, tclass); @@ -825,6 +846,7 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)  	int genhash;  	struct sock *sk1 = NULL;  #endif +	int oif;  	if (th->rst)  		return; @@ -868,7 +890,8 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)  		ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -  			  (th->doff << 2); -	tcp_v6_send_response(skb, seq, ack_seq, 0, 0, 0, key, 1, 0); +	oif = sk ? sk->sk_bound_dev_if : 0; +	tcp_v6_send_response(skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, 0);  #ifdef CONFIG_TCP_MD5SIG  release_sk1: @@ -880,10 +903,12 @@ release_sk1:  }  static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, -			    u32 win, u32 tsval, u32 tsecr, -			    struct tcp_md5sig_key *key, u8 tclass) +			    u32 win, u32 tsval, u32 tsecr, int oif, +			    struct tcp_md5sig_key *key, u8 tclass, +			    u32 label)  { -	tcp_v6_send_response(skb, seq, ack, win, tsval, tsecr, key, 0, tclass); +	tcp_v6_send_response(skb, seq, ack, win, tsval, tsecr, oif, key, 0, tclass, +			     label);  }  static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) @@ -894,8 +919,8 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)  	tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,  			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,  			tcp_time_stamp + tcptw->tw_ts_offset, -			tcptw->tw_ts_recent, tcp_twsk_md5_key(tcptw), -			tw->tw_tclass); +			tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw), +			tw->tw_tclass, (tw->tw_flowlabel << 12));  	inet_twsk_put(tw);  } @@ -903,13 +928,19 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)  static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,  				  struct request_sock *req)  { -	tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, -			req->rcv_wnd, tcp_time_stamp, req->ts_recent, -			tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr), 0); +	/* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV +	 * sk->sk_state == TCP_SYN_RECV -> for Fast Open. +	 */ +	tcp_v6_send_ack(skb, (sk->sk_state == TCP_LISTEN) ? +			tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, +			tcp_rsk(req)->rcv_nxt, +			req->rcv_wnd, tcp_time_stamp, req->ts_recent, sk->sk_bound_dev_if, +			tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr), +			0, 0);  } -static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) +static struct sock *tcp_v6_hnd_req(struct sock *sk, struct sk_buff *skb)  {  	struct request_sock *req, **prev;  	const struct tcphdr *th = tcp_hdr(skb); @@ -949,13 +980,15 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)  {  	struct tcp_options_received tmp_opt;  	struct request_sock *req; -	struct inet6_request_sock *treq; +	struct inet_request_sock *ireq;  	struct ipv6_pinfo *np = inet6_sk(sk);  	struct tcp_sock *tp = tcp_sk(sk);  	__u32 isn = TCP_SKB_CB(skb)->when;  	struct dst_entry *dst = NULL; +	struct tcp_fastopen_cookie foc = { .len = -1 }; +	bool want_cookie = false, fastopen;  	struct flowi6 fl6; -	bool want_cookie = false; +	int err;  	if (skb->protocol == htons(ETH_P_IP))  		return tcp_v4_conn_request(sk, skb); @@ -986,7 +1019,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)  	tcp_clear_options(&tmp_opt);  	tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);  	tmp_opt.user_mss = tp->rx_opt.user_mss; -	tcp_parse_options(skb, &tmp_opt, 0, NULL); +	tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc);  	if (want_cookie && !tmp_opt.saw_tstamp)  		tcp_clear_options(&tmp_opt); @@ -994,25 +1027,27 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)  	tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;  	tcp_openreq_init(req, &tmp_opt, skb); -	treq = inet6_rsk(req); -	treq->rmt_addr = ipv6_hdr(skb)->saddr; -	treq->loc_addr = ipv6_hdr(skb)->daddr; +	ireq = inet_rsk(req); +	ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; +	ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;  	if (!want_cookie || tmp_opt.tstamp_ok)  		TCP_ECN_create_request(req, skb, sock_net(sk)); -	treq->iif = sk->sk_bound_dev_if; +	ireq->ir_iif = sk->sk_bound_dev_if; +	ireq->ir_mark = inet_request_mark(sk, skb);  	/* So that link locals have meaning */  	if (!sk->sk_bound_dev_if && -	    ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL) -		treq->iif = inet6_iif(skb); +	    ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) +		ireq->ir_iif = inet6_iif(skb);  	if (!isn) {  		if (ipv6_opt_accepted(sk, skb) ||  		    np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo || -		    np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) { +		    np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim || +		    np->repflow) {  			atomic_inc(&skb->users); -			treq->pktopts = skb; +			ireq->pktopts = skb;  		}  		if (want_cookie) { @@ -1051,26 +1086,34 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)  			 * to the moment of synflood.  			 */  			LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI6/%u\n", -				       &treq->rmt_addr, ntohs(tcp_hdr(skb)->source)); +				       &ireq->ir_v6_rmt_addr, ntohs(tcp_hdr(skb)->source));  			goto drop_and_release;  		}  		isn = tcp_v6_init_sequence(skb);  	}  have_isn: -	tcp_rsk(req)->snt_isn = isn;  	if (security_inet_conn_request(sk, skb, req))  		goto drop_and_release; -	if (tcp_v6_send_synack(sk, dst, &fl6, req, -			       skb_get_queue_mapping(skb)) || -	    want_cookie) +	if (!dst && (dst = inet6_csk_route_req(sk, &fl6, req)) == NULL)  		goto drop_and_free; +	tcp_rsk(req)->snt_isn = isn;  	tcp_rsk(req)->snt_synack = tcp_time_stamp; -	tcp_rsk(req)->listener = NULL; -	inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); +	tcp_openreq_init_rwin(req, sk, dst); +	fastopen = !want_cookie && +		   tcp_try_fastopen(sk, skb, req, &foc, dst); +	err = tcp_v6_send_synack(sk, dst, &fl6, req, +				 skb_get_queue_mapping(skb), &foc); +	if (!fastopen) { +		if (err || want_cookie) +			goto drop_and_free; + +		tcp_rsk(req)->listener = NULL; +		inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); +	}  	return 0;  drop_and_release: @@ -1082,11 +1125,11 @@ drop:  	return 0; /* don't send reset */  } -static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, -					  struct request_sock *req, -					  struct dst_entry *dst) +static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, +					 struct request_sock *req, +					 struct dst_entry *dst)  { -	struct inet6_request_sock *treq; +	struct inet_request_sock *ireq;  	struct ipv6_pinfo *newnp, *np = inet6_sk(sk);  	struct tcp6_sock *newtcp6sk;  	struct inet_sock *newinet; @@ -1116,11 +1159,11 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,  		memcpy(newnp, np, sizeof(struct ipv6_pinfo)); -		ipv6_addr_set_v4mapped(newinet->inet_daddr, &newnp->daddr); +		ipv6_addr_set_v4mapped(newinet->inet_daddr, &newsk->sk_v6_daddr);  		ipv6_addr_set_v4mapped(newinet->inet_saddr, &newnp->saddr); -		newnp->rcv_saddr = newnp->saddr; +		newsk->sk_v6_rcv_saddr = newnp->saddr;  		inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;  		newsk->sk_backlog_rcv = tcp_v4_do_rcv; @@ -1134,7 +1177,9 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,  		newnp->opt	   = NULL;  		newnp->mcast_oif   = inet6_iif(skb);  		newnp->mcast_hops  = ipv6_hdr(skb)->hop_limit; -		newnp->rcv_tclass  = ipv6_get_dsfield(ipv6_hdr(skb)); +		newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb)); +		if (np->repflow) +			newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));  		/*  		 * No need to charge this sock to the relevant IPv6 refcnt debug socks count @@ -1151,7 +1196,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,  		return newsk;  	} -	treq = inet6_rsk(req); +	ireq = inet_rsk(req);  	if (sk_acceptq_is_full(sk))  		goto out_overflow; @@ -1185,10 +1230,10 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,  	memcpy(newnp, np, sizeof(struct ipv6_pinfo)); -	newnp->daddr = treq->rmt_addr; -	newnp->saddr = treq->loc_addr; -	newnp->rcv_saddr = treq->loc_addr; -	newsk->sk_bound_dev_if = treq->iif; +	newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr; +	newnp->saddr = ireq->ir_v6_loc_addr; +	newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr; +	newsk->sk_bound_dev_if = ireq->ir_iif;  	/* Now IPv6 options... @@ -1203,18 +1248,20 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,  	/* Clone pktoptions received with SYN */  	newnp->pktoptions = NULL; -	if (treq->pktopts != NULL) { -		newnp->pktoptions = skb_clone(treq->pktopts, +	if (ireq->pktopts != NULL) { +		newnp->pktoptions = skb_clone(ireq->pktopts,  					      sk_gfp_atomic(sk, GFP_ATOMIC)); -		consume_skb(treq->pktopts); -		treq->pktopts = NULL; +		consume_skb(ireq->pktopts); +		ireq->pktopts = NULL;  		if (newnp->pktoptions)  			skb_set_owner_r(newnp->pktoptions, newsk);  	}  	newnp->opt	  = NULL;  	newnp->mcast_oif  = inet6_iif(skb);  	newnp->mcast_hops = ipv6_hdr(skb)->hop_limit; -	newnp->rcv_tclass = ipv6_get_dsfield(ipv6_hdr(skb)); +	newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb)); +	if (np->repflow) +		newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));  	/* Clone native IPv6 options from listening socket (if any) @@ -1230,7 +1277,6 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,  		inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen +  						     newnp->opt->opt_flen); -	tcp_mtup_init(newsk);  	tcp_sync_mss(newsk, dst_mtu(dst));  	newtp->advmss = dst_metric_advmss(dst);  	if (tcp_sk(sk)->rx_opt.user_mss && @@ -1244,13 +1290,14 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,  #ifdef CONFIG_TCP_MD5SIG  	/* Copy over the MD5 key from the original socket */ -	if ((key = tcp_v6_md5_do_lookup(sk, &newnp->daddr)) != NULL) { +	key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr); +	if (key != NULL) {  		/* We're using one, so create a matching key  		 * on the newsk structure. If we fail to get  		 * memory, then we end up not copying the key  		 * across. Shucks.  		 */ -		tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newnp->daddr, +		tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,  			       AF_INET6, key->key, key->keylen,  			       sk_gfp_atomic(sk, GFP_ATOMIC));  	} @@ -1274,26 +1321,6 @@ out:  	return NULL;  } -static __sum16 tcp_v6_checksum_init(struct sk_buff *skb) -{ -	if (skb->ip_summed == CHECKSUM_COMPLETE) { -		if (!tcp_v6_check(skb->len, &ipv6_hdr(skb)->saddr, -				  &ipv6_hdr(skb)->daddr, skb->csum)) { -			skb->ip_summed = CHECKSUM_UNNECESSARY; -			return 0; -		} -	} - -	skb->csum = ~csum_unfold(tcp_v6_check(skb->len, -					      &ipv6_hdr(skb)->saddr, -					      &ipv6_hdr(skb)->daddr, 0)); - -	if (skb->len <= 76) { -		return __skb_checksum_complete(skb); -	} -	return 0; -} -  /* The socket must have it's spinlock held when we get   * here.   * @@ -1320,7 +1347,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)  		return tcp_v4_do_rcv(sk, skb);  #ifdef CONFIG_TCP_MD5SIG -	if (tcp_v6_inbound_md5_hash (sk, skb)) +	if (tcp_v6_inbound_md5_hash(sk, skb))  		goto discard;  #endif @@ -1379,7 +1406,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)  		 * otherwise we just shortcircuit this and continue with  		 * the new socket..  		 */ -		if(nsk != sk) { +		if (nsk != sk) {  			sock_rps_save_rxhash(nsk, skb);  			if (tcp_child_process(sk, nsk, skb))  				goto reset; @@ -1424,8 +1451,10 @@ ipv6_pktoptions:  			np->mcast_oif = inet6_iif(opt_skb);  		if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)  			np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit; -		if (np->rxopt.bits.rxtclass) -			np->rcv_tclass = ipv6_get_dsfield(ipv6_hdr(opt_skb)); +		if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass) +			np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb)); +		if (np->repflow) +			np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));  		if (ipv6_opt_accepted(sk, opt_skb)) {  			skb_set_owner_r(opt_skb, sk);  			opt_skb = xchg(&np->pktoptions, opt_skb); @@ -1465,7 +1494,7 @@ static int tcp_v6_rcv(struct sk_buff *skb)  	if (!pskb_may_pull(skb, th->doff*4))  		goto discard_it; -	if (!skb_csum_unnecessary(skb) && tcp_v6_checksum_init(skb)) +	if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))  		goto csum_error;  	th = tcp_hdr(skb); @@ -1585,7 +1614,8 @@ do_time_wait:  		break;  	case TCP_TW_RST:  		goto no_tcp_socket; -	case TCP_TW_SUCCESS:; +	case TCP_TW_SUCCESS: +		;  	}  	goto discard_it;  } @@ -1630,7 +1660,7 @@ static void tcp_v6_early_demux(struct sk_buff *skb)  static struct timewait_sock_ops tcp6_timewait_sock_ops = {  	.twsk_obj_size	= sizeof(struct tcp6_timewait_sock),  	.twsk_unique	= tcp_twsk_unique, -	.twsk_destructor= tcp_twsk_destructor, +	.twsk_destructor = tcp_twsk_destructor,  };  static const struct inet_connection_sock_af_ops ipv6_specific = { @@ -1664,7 +1694,6 @@ static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {  /*   *	TCP over IPv4 via INET6 API   */ -  static const struct inet_connection_sock_af_ops ipv6_mapped = {  	.queue_xmit	   = ip_queue_xmit,  	.send_check	   = tcp_v4_send_check, @@ -1722,8 +1751,8 @@ static void get_openreq6(struct seq_file *seq,  			 const struct sock *sk, struct request_sock *req, int i, kuid_t uid)  {  	int ttd = req->expires - jiffies; -	const struct in6_addr *src = &inet6_rsk(req)->loc_addr; -	const struct in6_addr *dest = &inet6_rsk(req)->rmt_addr; +	const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr; +	const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;  	if (ttd < 0)  		ttd = 0; @@ -1734,12 +1763,12 @@ static void get_openreq6(struct seq_file *seq,  		   i,  		   src->s6_addr32[0], src->s6_addr32[1],  		   src->s6_addr32[2], src->s6_addr32[3], -		   ntohs(inet_rsk(req)->loc_port), +		   inet_rsk(req)->ir_num,  		   dest->s6_addr32[0], dest->s6_addr32[1],  		   dest->s6_addr32[2], dest->s6_addr32[3], -		   ntohs(inet_rsk(req)->rmt_port), +		   ntohs(inet_rsk(req)->ir_rmt_port),  		   TCP_SYN_RECV, -		   0,0, /* could print option size, but that is af dependent. */ +		   0, 0, /* could print option size, but that is af dependent. */  		   1,   /* timers active (only the expire timer) */  		   jiffies_to_clock_t(ttd),  		   req->num_timeout, @@ -1758,10 +1787,10 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)  	const struct inet_sock *inet = inet_sk(sp);  	const struct tcp_sock *tp = tcp_sk(sp);  	const struct inet_connection_sock *icsk = inet_csk(sp); -	const struct ipv6_pinfo *np = inet6_sk(sp); +	struct fastopen_queue *fastopenq = icsk->icsk_accept_queue.fastopenq; -	dest  = &np->daddr; -	src   = &np->rcv_saddr; +	dest  = &sp->sk_v6_daddr; +	src   = &sp->sk_v6_rcv_saddr;  	destp = ntohs(inet->inet_dport);  	srcp  = ntohs(inet->inet_sport); @@ -1799,9 +1828,11 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)  		   atomic_read(&sp->sk_refcnt), sp,  		   jiffies_to_clock_t(icsk->icsk_rto),  		   jiffies_to_clock_t(icsk->icsk_ack.ato), -		   (icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong, +		   (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong,  		   tp->snd_cwnd, -		   tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh +		   sp->sk_state == TCP_LISTEN ? +			(fastopenq ? fastopenq->max_qlen : 0) : +			(tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)  		   );  } @@ -1810,11 +1841,10 @@ static void get_timewait6_sock(struct seq_file *seq,  {  	const struct in6_addr *dest, *src;  	__u16 destp, srcp; -	const struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw); -	long delta = tw->tw_ttd - jiffies; +	s32 delta = tw->tw_ttd - inet_tw_time_stamp(); -	dest = &tw6->tw_v6_daddr; -	src  = &tw6->tw_v6_rcv_saddr; +	dest = &tw->tw_v6_daddr; +	src  = &tw->tw_v6_rcv_saddr;  	destp = ntohs(tw->tw_dport);  	srcp  = ntohs(tw->tw_sport); @@ -1834,6 +1864,7 @@ static void get_timewait6_sock(struct seq_file *seq,  static int tcp6_seq_show(struct seq_file *seq, void *v)  {  	struct tcp_iter_state *st; +	struct sock *sk = v;  	if (v == SEQ_START_TOKEN) {  		seq_puts(seq, @@ -1849,14 +1880,14 @@ static int tcp6_seq_show(struct seq_file *seq, void *v)  	switch (st->state) {  	case TCP_SEQ_STATE_LISTENING:  	case TCP_SEQ_STATE_ESTABLISHED: -		get_tcp6_sock(seq, v, st->num); +		if (sk->sk_state == TCP_TIME_WAIT) +			get_timewait6_sock(seq, v, st->num); +		else +			get_tcp6_sock(seq, v, st->num);  		break;  	case TCP_SEQ_STATE_OPENREQ:  		get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);  		break; -	case TCP_SEQ_STATE_TIME_WAIT: -		get_timewait6_sock(seq, v, st->num); -		break;  	}  out:  	return 0; @@ -1929,6 +1960,7 @@ struct proto tcpv6_prot = {  	.memory_allocated	= &tcp_memory_allocated,  	.memory_pressure	= &tcp_memory_pressure,  	.orphan_count		= &tcp_orphan_count, +	.sysctl_mem		= sysctl_tcp_mem,  	.sysctl_wmem		= sysctl_tcp_wmem,  	.sysctl_rmem		= sysctl_tcp_rmem,  	.max_header		= MAX_TCP_HEADER, @@ -1960,7 +1992,6 @@ static struct inet_protosw tcpv6_protosw = {  	.protocol	=	IPPROTO_TCP,  	.prot		=	&tcpv6_prot,  	.ops		=	&inet6_stream_ops, -	.no_check	=	0,  	.flags		=	INET_PROTOSW_PERMANENT |  				INET_PROTOSW_ICSK,  }; diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c index 2ec6bf6a0aa..01b0ff9a0c2 100644 --- a/net/ipv6/tcpv6_offload.c +++ b/net/ipv6/tcpv6_offload.c @@ -37,45 +37,43 @@ static struct sk_buff **tcp6_gro_receive(struct sk_buff **head,  {  	const struct ipv6hdr *iph = skb_gro_network_header(skb);  	__wsum wsum; -	__sum16 sum; + +	/* Don't bother verifying checksum if we're going to flush anyway. */ +	if (NAPI_GRO_CB(skb)->flush) +		goto skip_csum; + +	wsum = NAPI_GRO_CB(skb)->csum;  	switch (skb->ip_summed) { +	case CHECKSUM_NONE: +		wsum = skb_checksum(skb, skb_gro_offset(skb), skb_gro_len(skb), +				    wsum); + +		/* fall through */ +  	case CHECKSUM_COMPLETE:  		if (!tcp_v6_check(skb_gro_len(skb), &iph->saddr, &iph->daddr, -				  skb->csum)) { +				  wsum)) {  			skb->ip_summed = CHECKSUM_UNNECESSARY;  			break;  		} -flush: +  		NAPI_GRO_CB(skb)->flush = 1;  		return NULL; - -	case CHECKSUM_NONE: -		wsum = ~csum_unfold(csum_ipv6_magic(&iph->saddr, &iph->daddr, -						    skb_gro_len(skb), -						    IPPROTO_TCP, 0)); -		sum = csum_fold(skb_checksum(skb, -					     skb_gro_offset(skb), -					     skb_gro_len(skb), -					     wsum)); -		if (sum) -			goto flush; - -		skb->ip_summed = CHECKSUM_UNNECESSARY; -		break;  	} +skip_csum:  	return tcp_gro_receive(head, skb);  } -static int tcp6_gro_complete(struct sk_buff *skb) +static int tcp6_gro_complete(struct sk_buff *skb, int thoff)  {  	const struct ipv6hdr *iph = ipv6_hdr(skb);  	struct tcphdr *th = tcp_hdr(skb); -	th->check = ~tcp_v6_check(skb->len - skb_transport_offset(skb), -				  &iph->saddr, &iph->daddr, 0); -	skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6; +	th->check = ~tcp_v6_check(skb->len - thoff, &iph->saddr, +				  &iph->daddr, 0); +	skb_shinfo(skb)->gso_type |= SKB_GSO_TCPV6;  	return tcp_gro_complete(skb);  } @@ -83,7 +81,7 @@ static int tcp6_gro_complete(struct sk_buff *skb)  static const struct net_offload tcpv6_offload = {  	.callbacks = {  		.gso_send_check	=	tcp_v6_gso_send_check, -		.gso_segment	=	tcp_tso_segment, +		.gso_segment	=	tcp_gso_segment,  		.gro_receive	=	tcp6_gro_receive,  		.gro_complete	=	tcp6_gro_complete,  	}, diff --git a/net/ipv6/tunnel6.c b/net/ipv6/tunnel6.c index 4b0f50d9a96..2c4e4c5c761 100644 --- a/net/ipv6/tunnel6.c +++ b/net/ipv6/tunnel6.c @@ -12,8 +12,7 @@   * GNU General Public License for more details.   *   * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA + * along with this program; if not, see <http://www.gnu.org/licenses/>.   *   * Authors	Mitsuru KANDA  <mk@linux-ipv6.org>   * 		YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org> diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 72b7eaaf3ca..7092ff78fd8 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -53,22 +53,42 @@  #include <trace/events/skb.h>  #include "udp_impl.h" +static unsigned int udp6_ehashfn(struct net *net, +				  const struct in6_addr *laddr, +				  const u16 lport, +				  const struct in6_addr *faddr, +				  const __be16 fport) +{ +	static u32 udp6_ehash_secret __read_mostly; +	static u32 udp_ipv6_hash_secret __read_mostly; + +	u32 lhash, fhash; + +	net_get_random_once(&udp6_ehash_secret, +			    sizeof(udp6_ehash_secret)); +	net_get_random_once(&udp_ipv6_hash_secret, +			    sizeof(udp_ipv6_hash_secret)); + +	lhash = (__force u32)laddr->s6_addr32[3]; +	fhash = __ipv6_addr_jhash(faddr, udp_ipv6_hash_secret); + +	return __inet6_ehashfn(lhash, lport, fhash, fport, +			       udp_ipv6_hash_secret + net_hash_mix(net)); +} +  int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)  { -	const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr;  	const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2); -	__be32 sk1_rcv_saddr = sk_rcv_saddr(sk); -	__be32 sk2_rcv_saddr = sk_rcv_saddr(sk2);  	int sk_ipv6only = ipv6_only_sock(sk);  	int sk2_ipv6only = inet_v6_ipv6only(sk2); -	int addr_type = ipv6_addr_type(sk_rcv_saddr6); +	int addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);  	int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED;  	/* if both are mapped, treat as IPv4 */  	if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED)  		return (!sk2_ipv6only && -			(!sk1_rcv_saddr || !sk2_rcv_saddr || -			  sk1_rcv_saddr == sk2_rcv_saddr)); +			(!sk->sk_rcv_saddr || !sk2->sk_rcv_saddr || +			  sk->sk_rcv_saddr == sk2->sk_rcv_saddr));  	if (addr_type2 == IPV6_ADDR_ANY &&  	    !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED)) @@ -79,7 +99,7 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2)  		return 1;  	if (sk2_rcv_saddr6 && -	    ipv6_addr_equal(sk_rcv_saddr6, sk2_rcv_saddr6)) +	    ipv6_addr_equal(&sk->sk_v6_rcv_saddr, sk2_rcv_saddr6))  		return 1;  	return 0; @@ -107,7 +127,7 @@ int udp_v6_get_port(struct sock *sk, unsigned short snum)  	unsigned int hash2_nulladdr =  		udp6_portaddr_hash(sock_net(sk), &in6addr_any, snum);  	unsigned int hash2_partial = -		udp6_portaddr_hash(sock_net(sk), &inet6_sk(sk)->rcv_saddr, 0); +		udp6_portaddr_hash(sock_net(sk), &sk->sk_v6_rcv_saddr, 0);  	/* precompute partial secondary hash */  	udp_sk(sk)->udp_portaddr_hash = hash2_partial; @@ -117,7 +137,7 @@ int udp_v6_get_port(struct sock *sk, unsigned short snum)  static void udp_v6_rehash(struct sock *sk)  {  	u16 new_hash = udp6_portaddr_hash(sock_net(sk), -					  &inet6_sk(sk)->rcv_saddr, +					  &sk->sk_v6_rcv_saddr,  					  inet_sk(sk)->inet_num);  	udp_lib_rehash(sk, new_hash); @@ -133,7 +153,6 @@ static inline int compute_score(struct sock *sk, struct net *net,  	if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum &&  			sk->sk_family == PF_INET6) { -		struct ipv6_pinfo *np = inet6_sk(sk);  		struct inet_sock *inet = inet_sk(sk);  		score = 0; @@ -142,13 +161,13 @@ static inline int compute_score(struct sock *sk, struct net *net,  				return -1;  			score++;  		} -		if (!ipv6_addr_any(&np->rcv_saddr)) { -			if (!ipv6_addr_equal(&np->rcv_saddr, daddr)) +		if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) { +			if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))  				return -1;  			score++;  		} -		if (!ipv6_addr_any(&np->daddr)) { -			if (!ipv6_addr_equal(&np->daddr, saddr)) +		if (!ipv6_addr_any(&sk->sk_v6_daddr)) { +			if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr))  				return -1;  			score++;  		} @@ -171,10 +190,9 @@ static inline int compute_score2(struct sock *sk, struct net *net,  	if (net_eq(sock_net(sk), net) && udp_sk(sk)->udp_port_hash == hnum &&  			sk->sk_family == PF_INET6) { -		struct ipv6_pinfo *np = inet6_sk(sk);  		struct inet_sock *inet = inet_sk(sk); -		if (!ipv6_addr_equal(&np->rcv_saddr, daddr)) +		if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr))  			return -1;  		score = 0;  		if (inet->inet_dport) { @@ -182,8 +200,8 @@ static inline int compute_score2(struct sock *sk, struct net *net,  				return -1;  			score++;  		} -		if (!ipv6_addr_any(&np->daddr)) { -			if (!ipv6_addr_equal(&np->daddr, saddr)) +		if (!ipv6_addr_any(&sk->sk_v6_daddr)) { +			if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr))  				return -1;  			score++;  		} @@ -219,8 +237,8 @@ begin:  			badness = score;  			reuseport = sk->sk_reuseport;  			if (reuseport) { -				hash = inet6_ehashfn(net, daddr, hnum, -						     saddr, sport); +				hash = udp6_ehashfn(net, daddr, hnum, +						    saddr, sport);  				matches = 1;  			} else if (score == SCORE2_MAX)  				goto exact_match; @@ -300,8 +318,8 @@ begin:  			badness = score;  			reuseport = sk->sk_reuseport;  			if (reuseport) { -				hash = inet6_ehashfn(net, daddr, hnum, -						     saddr, sport); +				hash = udp6_ehashfn(net, daddr, hnum, +						    saddr, sport);  				matches = 1;  			}  		} else if (score == badness && reuseport) { @@ -374,14 +392,11 @@ int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk,  	int is_udp4;  	bool slow; -	if (addr_len) -		*addr_len = sizeof(struct sockaddr_in6); -  	if (flags & MSG_ERRQUEUE) -		return ipv6_recv_error(sk, msg, len); +		return ipv6_recv_error(sk, msg, len, addr_len);  	if (np->rxpmtu && np->rxopt.bits.rxpmtu) -		return ipv6_recv_rxpmtu(sk, msg, len); +		return ipv6_recv_rxpmtu(sk, msg, len, addr_len);  try_again:  	skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0), @@ -445,9 +460,7 @@ try_again:  	/* Copy the address. */  	if (msg->msg_name) { -		struct sockaddr_in6 *sin6; - -		sin6 = (struct sockaddr_in6 *) msg->msg_name; +		DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name);  		sin6->sin6_family = AF_INET6;  		sin6->sin6_port = udp_hdr(skb)->source;  		sin6->sin6_flowinfo = 0; @@ -462,14 +475,18 @@ try_again:  				ipv6_iface_scope_id(&sin6->sin6_addr,  						    IP6CB(skb)->iif);  		} - +		*addr_len = sizeof(*sin6);  	} + +	if (np->rxopt.all) +		ip6_datagram_recv_common_ctl(sk, msg, skb); +  	if (is_udp4) {  		if (inet->cmsg_flags)  			ip_cmsg_recv(msg, skb);  	} else {  		if (np->rxopt.all) -			ip6_datagram_recv_ctl(sk, msg, skb); +			ip6_datagram_recv_specific_ctl(sk, msg, skb);  	}  	err = copied; @@ -523,8 +540,11 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt,  	if (sk == NULL)  		return; -	if (type == ICMPV6_PKT_TOOBIG) +	if (type == ICMPV6_PKT_TOOBIG) { +		if (!ip6_sk_accept_pmtu(sk)) +			goto out;  		ip6_sk_update_pmtu(skb, sk, info); +	}  	if (type == NDISC_REDIRECT) {  		ip6_sk_redirect(skb, sk);  		goto out; @@ -551,8 +571,10 @@ static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)  {  	int rc; -	if (!ipv6_addr_any(&inet6_sk(sk)->daddr)) +	if (!ipv6_addr_any(&sk->sk_v6_daddr)) {  		sock_rps_save_rxhash(sk, skb); +		sk_mark_napi_id(sk, skb); +	}  	rc = sock_queue_rcv_skb(sk, skb);  	if (rc < 0) { @@ -612,6 +634,10 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)  		if (skb->len > sizeof(struct udphdr) && encap_rcv != NULL) {  			int ret; +			/* Verify checksum before giving to encap */ +			if (udp_lib_checksum_complete(skb)) +				goto csum_error; +  			ret = encap_rcv(sk, skb);  			if (ret <= 0) {  				UDP_INC_STATS_BH(sock_net(sk), @@ -648,8 +674,11 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)  			goto csum_error;  	} -	if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf)) +	if (sk_rcvqueues_full(sk, skb, sk->sk_rcvbuf)) { +		UDP6_INC_STATS_BH(sock_net(sk), +				  UDP_MIB_RCVBUFERRORS, is_udplite);  		goto drop; +	}  	skb_dst_drop(skb); @@ -664,6 +693,7 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)  	bh_unlock_sock(sk);  	return rc; +  csum_error:  	UDP6_INC_STATS_BH(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite);  drop: @@ -679,36 +709,34 @@ static struct sock *udp_v6_mcast_next(struct net *net, struct sock *sk,  				      int dif)  {  	struct hlist_nulls_node *node; -	struct sock *s = sk;  	unsigned short num = ntohs(loc_port); -	sk_nulls_for_each_from(s, node) { -		struct inet_sock *inet = inet_sk(s); +	sk_nulls_for_each_from(sk, node) { +		struct inet_sock *inet = inet_sk(sk); -		if (!net_eq(sock_net(s), net)) +		if (!net_eq(sock_net(sk), net))  			continue; -		if (udp_sk(s)->udp_port_hash == num && -		    s->sk_family == PF_INET6) { -			struct ipv6_pinfo *np = inet6_sk(s); +		if (udp_sk(sk)->udp_port_hash == num && +		    sk->sk_family == PF_INET6) {  			if (inet->inet_dport) {  				if (inet->inet_dport != rmt_port)  					continue;  			} -			if (!ipv6_addr_any(&np->daddr) && -			    !ipv6_addr_equal(&np->daddr, rmt_addr)) +			if (!ipv6_addr_any(&sk->sk_v6_daddr) && +			    !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr))  				continue; -			if (s->sk_bound_dev_if && s->sk_bound_dev_if != dif) +			if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)  				continue; -			if (!ipv6_addr_any(&np->rcv_saddr)) { -				if (!ipv6_addr_equal(&np->rcv_saddr, loc_addr)) +			if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) { +				if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, loc_addr))  					continue;  			} -			if (!inet6_mc_check(s, loc_addr, rmt_addr)) +			if (!inet6_mc_check(sk, loc_addr, rmt_addr))  				continue; -			return s; +			return sk;  		}  	}  	return NULL; @@ -739,6 +767,17 @@ static void flush_stack(struct sock **stack, unsigned int count,  	if (unlikely(skb1))  		kfree_skb(skb1);  } + +static void udp6_csum_zero_error(struct sk_buff *skb) +{ +	/* RFC 2460 section 8.1 says that we SHOULD log +	 * this error. Well, it is reasonable. +	 */ +	LIMIT_NETDEBUG(KERN_INFO "IPv6: udp checksum is 0 for [%pI6c]:%u->[%pI6c]:%u\n", +		       &ipv6_hdr(skb)->saddr, ntohs(udp_hdr(skb)->source), +		       &ipv6_hdr(skb)->daddr, ntohs(udp_hdr(skb)->dest)); +} +  /*   * Note: called only from the BH handler context,   * so we don't need to lock the hashes. @@ -758,7 +797,12 @@ static int __udp6_lib_mcast_deliver(struct net *net, struct sk_buff *skb,  	dif = inet6_iif(skb);  	sk = udp_v6_mcast_next(net, sk, uh->dest, daddr, uh->source, saddr, dif);  	while (sk) { -		stack[count++] = sk; +		/* If zero checksum and no_check is not on for +		 * the socket then skip it. +		 */ +		if (uh->check || udp_sk(sk)->no_check6_rx) +			stack[count++] = sk; +  		sk = udp_v6_mcast_next(net, sk_nulls_next(sk), uh->dest, daddr,  				       uh->source, saddr, dif);  		if (unlikely(count == ARRAY_SIZE(stack))) { @@ -846,7 +890,12 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,  	if (sk != NULL) {  		int ret; -		sk_mark_napi_id(sk, skb); +		if (!uh->check && !udp_sk(sk)->no_check6_rx) { +			sock_put(sk); +			udp6_csum_zero_error(skb); +			goto csum_error; +		} +  		ret = udpv6_queue_rcv_skb(sk, skb);  		sock_put(sk); @@ -859,6 +908,11 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,  		return 0;  	} +	if (!uh->check) { +		udp6_csum_zero_error(skb); +		goto csum_error; +	} +  	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))  		goto discard; @@ -986,7 +1040,10 @@ static int udp_v6_push_pending_frames(struct sock *sk)  	if (is_udplite)  		csum = udplite_csum_outgoing(sk, skb); -	else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */ +	else if (up->no_check6_tx) {   /* UDP csum disabled */ +		skb->ip_summed = CHECKSUM_NONE; +		goto send; +	} else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */  		udp6_hwcsum_outgoing(sk, skb, &fl6->saddr, &fl6->daddr,  				     up->len);  		goto send; @@ -1023,7 +1080,7 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,  	struct udp_sock *up = udp_sk(sk);  	struct inet_sock *inet = inet_sk(sk);  	struct ipv6_pinfo *np = inet6_sk(sk); -	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) msg->msg_name; +	DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name);  	struct in6_addr *daddr, *final_p, final;  	struct ipv6_txoptions *opt = NULL;  	struct ip6_flowlabel *flowlabel = NULL; @@ -1064,7 +1121,7 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,  	} else if (!up->pending) {  		if (sk->sk_state != TCP_ESTABLISHED)  			return -EDESTADDRREQ; -		daddr = &np->daddr; +		daddr = &sk->sk_v6_daddr;  	} else  		daddr = NULL; @@ -1125,7 +1182,6 @@ do_udp_sendmsg:  				flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);  				if (flowlabel == NULL)  					return -EINVAL; -				daddr = &flowlabel->dst;  			}  		} @@ -1134,8 +1190,8 @@ do_udp_sendmsg:  		 * sk->sk_dst_cache.  		 */  		if (sk->sk_state == TCP_ESTABLISHED && -		    ipv6_addr_equal(daddr, &np->daddr)) -			daddr = &np->daddr; +		    ipv6_addr_equal(daddr, &sk->sk_v6_daddr)) +			daddr = &sk->sk_v6_daddr;  		if (addr_len >= sizeof(struct sockaddr_in6) &&  		    sin6->sin6_scope_id && @@ -1146,7 +1202,7 @@ do_udp_sendmsg:  			return -EDESTADDRREQ;  		fl6.fl6_dport = inet->inet_dport; -		daddr = &np->daddr; +		daddr = &sk->sk_v6_daddr;  		fl6.flowlabel = np->flow_label;  		connected = 1;  	} @@ -1206,28 +1262,19 @@ do_udp_sendmsg:  	security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); -	dst = ip6_sk_dst_lookup_flow(sk, &fl6, final_p, true); +	dst = ip6_sk_dst_lookup_flow(sk, &fl6, final_p);  	if (IS_ERR(dst)) {  		err = PTR_ERR(dst);  		dst = NULL;  		goto out;  	} -	if (hlimit < 0) { -		if (ipv6_addr_is_multicast(&fl6.daddr)) -			hlimit = np->mcast_hops; -		else -			hlimit = np->hop_limit; -		if (hlimit < 0) -			hlimit = ip6_dst_hoplimit(dst); -	} +	if (hlimit < 0) +		hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);  	if (tclass < 0)  		tclass = np->tclass; -	if (dontfrag < 0) -		dontfrag = np->dontfrag; -  	if (msg->msg_flags&MSG_CONFIRM)  		goto do_confirm;  back_from_confirm: @@ -1246,6 +1293,8 @@ back_from_confirm:  	up->pending = AF_INET6;  do_append_data: +	if (dontfrag < 0) +		dontfrag = np->dontfrag;  	up->len += ulen;  	getfrag  =  is_udplite ?  udplite_getfrag : ip_generic_getfrag;  	err = ip6_append_data(sk, getfrag, msg->msg_iov, ulen, @@ -1262,8 +1311,8 @@ do_append_data:  	if (dst) {  		if (connected) {  			ip6_dst_store(sk, dst, -				      ipv6_addr_equal(&fl6.daddr, &np->daddr) ? -				      &np->daddr : NULL, +				      ipv6_addr_equal(&fl6.daddr, &sk->sk_v6_daddr) ? +				      &sk->sk_v6_daddr : NULL,  #ifdef CONFIG_IPV6_SUBTREES  				      ipv6_addr_equal(&fl6.saddr, &np->saddr) ?  				      &np->saddr : @@ -1461,7 +1510,6 @@ static struct inet_protosw udpv6_protosw = {  	.protocol =  IPPROTO_UDP,  	.prot =      &udpv6_prot,  	.ops =       &inet6_dgram_ops, -	.no_check =  UDP_CSUM_DEFAULT,  	.flags =     INET_PROTOSW_PERMANENT,  }; diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h index 4691ed50a92..c779c3c90b9 100644 --- a/net/ipv6/udp_impl.h +++ b/net/ipv6/udp_impl.h @@ -7,33 +7,32 @@  #include <net/inet_common.h>  #include <net/transp_v6.h> -extern int  	__udp6_lib_rcv(struct sk_buff *, struct udp_table *, int ); -extern void 	__udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *, -			       u8 , u8 , int , __be32 , struct udp_table *); +int __udp6_lib_rcv(struct sk_buff *, struct udp_table *, int); +void __udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *, u8, u8, int, +		    __be32, struct udp_table *); -extern int	udp_v6_get_port(struct sock *sk, unsigned short snum); +int udp_v6_get_port(struct sock *sk, unsigned short snum); -extern int	udpv6_getsockopt(struct sock *sk, int level, int optname, -				 char __user *optval, int __user *optlen); -extern int	udpv6_setsockopt(struct sock *sk, int level, int optname, -				 char __user *optval, unsigned int optlen); +int udpv6_getsockopt(struct sock *sk, int level, int optname, +		     char __user *optval, int __user *optlen); +int udpv6_setsockopt(struct sock *sk, int level, int optname, +		     char __user *optval, unsigned int optlen);  #ifdef CONFIG_COMPAT -extern int	compat_udpv6_setsockopt(struct sock *sk, int level, int optname, -					char __user *optval, unsigned int optlen); -extern int	compat_udpv6_getsockopt(struct sock *sk, int level, int optname, -				       char __user *optval, int __user *optlen); +int compat_udpv6_setsockopt(struct sock *sk, int level, int optname, +			    char __user *optval, unsigned int optlen); +int compat_udpv6_getsockopt(struct sock *sk, int level, int optname, +			    char __user *optval, int __user *optlen);  #endif -extern int	udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, -			      struct msghdr *msg, size_t len); -extern int	udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, -			      struct msghdr *msg, size_t len, -			      int noblock, int flags, int *addr_len); -extern int	udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb); -extern void	udpv6_destroy_sock(struct sock *sk); +int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, +		  size_t len); +int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, +		  size_t len, int noblock, int flags, int *addr_len); +int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); +void udpv6_destroy_sock(struct sock *sk); -extern void udp_v6_clear_sk(struct sock *sk, int size); +void udp_v6_clear_sk(struct sock *sk, int size);  #ifdef CONFIG_PROC_FS -extern int	udp6_seq_show(struct seq_file *seq, void *v); +int udp6_seq_show(struct seq_file *seq, void *v);  #endif  #endif	/* _UDP6_IMPL_H */ diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index 60559511bd9..0ae3d98f83e 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -63,7 +63,11 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,  		if (unlikely(type & ~(SKB_GSO_UDP |  				      SKB_GSO_DODGY |  				      SKB_GSO_UDP_TUNNEL | +				      SKB_GSO_UDP_TUNNEL_CSUM |  				      SKB_GSO_GRE | +				      SKB_GSO_GRE_CSUM | +				      SKB_GSO_IPIP | +				      SKB_GSO_SIT |  				      SKB_GSO_MPLS) ||  			     !(type & (SKB_GSO_UDP))))  			goto out; @@ -74,7 +78,8 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,  		goto out;  	} -	if (skb->encapsulation && skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL) +	if (skb->encapsulation && skb_shinfo(skb)->gso_type & +	    (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))  		segs = skb_udp_tunnel_segment(skb, features);  	else {  		/* Do software UFO. Complete and fill in the UDP checksum as HW cannot @@ -88,7 +93,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,  		/* Check if there is enough headroom to insert fragment header. */  		tnl_hlen = skb_tnl_header_len(skb); -		if (skb_headroom(skb) < (tnl_hlen + frag_hdr_sz)) { +		if (skb->mac_header < (tnl_hlen + frag_hdr_sz)) {  			if (gso_pskb_expand_head(skb, tnl_hlen + frag_hdr_sz))  				goto out;  		} @@ -111,7 +116,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,  		fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen);  		fptr->nexthdr = nexthdr;  		fptr->reserved = 0; -		ipv6_select_ident(fptr, (struct rt6_info *)skb_dst(skb)); +		fptr->identification = skb_shinfo(skb)->ip6_frag_id;  		/* Fragment the skb. ipv6 header and the remaining fields of the  		 * fragment header are updated in ipv6_gso_segment() diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index dfcc4be4689..9cf097e206e 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -64,7 +64,6 @@ static struct inet_protosw udplite6_protosw = {  	.protocol	= IPPROTO_UDPLITE,  	.prot		= &udplitev6_prot,  	.ops		= &inet6_dgram_ops, -	.no_check	= 0,  	.flags		= INET_PROTOSW_PERMANENT,  }; diff --git a/net/ipv6/xfrm6_mode_ro.c b/net/ipv6/xfrm6_mode_ro.c index 63d5d493098..0e015906f9c 100644 --- a/net/ipv6/xfrm6_mode_ro.c +++ b/net/ipv6/xfrm6_mode_ro.c @@ -15,8 +15,7 @@   * GNU General Public License for more details.   *   * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA + * along with this program; if not, see <http://www.gnu.org/licenses/>.   */  /*   * Authors: diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c index 4770d515c2c..901ef6f8add 100644 --- a/net/ipv6/xfrm6_mode_tunnel.c +++ b/net/ipv6/xfrm6_mode_tunnel.c @@ -63,6 +63,12 @@ static int xfrm6_mode_tunnel_output(struct xfrm_state *x, struct sk_buff *skb)  	return 0;  } +#define for_each_input_rcu(head, handler)	\ +	for (handler = rcu_dereference(head);	\ +	     handler != NULL;			\ +	     handler = rcu_dereference(handler->next)) + +  static int xfrm6_mode_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)  {  	int err = -EINVAL; diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 6cd625e3770..433672d07d0 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -78,7 +78,7 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb)  	if (mtu < IPV6_MIN_MTU)  		mtu = IPV6_MIN_MTU; -	if (!skb->local_df && skb->len > mtu) { +	if (!skb->ignore_df && skb->len > mtu) {  		skb->dev = dst->dev;  		if (xfrm6_local_dontfrag(skb)) @@ -114,13 +114,7 @@ int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb)  	if (err)  		return err; -	memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); -#ifdef CONFIG_NETFILTER -	IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED; -#endif - -	skb->protocol = htons(ETH_P_IPV6); -	skb->local_df = 1; +	skb->ignore_df = 1;  	return x->outer_mode->output2(x, skb);  } @@ -128,11 +122,13 @@ EXPORT_SYMBOL(xfrm6_prepare_output);  int xfrm6_output_finish(struct sk_buff *skb)  { +	memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); +	skb->protocol = htons(ETH_P_IPV6); +  #ifdef CONFIG_NETFILTER  	IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED;  #endif -	skb->protocol = htons(ETH_P_IPV6);  	return xfrm_output(skb);  } @@ -142,6 +138,13 @@ static int __xfrm6_output(struct sk_buff *skb)  	struct xfrm_state *x = dst->xfrm;  	int mtu; +#ifdef CONFIG_NETFILTER +	if (!x) { +		IP6CB(skb)->flags |= IP6SKB_REROUTED; +		return dst_output(skb); +	} +#endif +  	if (skb->protocol == htons(ETH_P_IPV6))  		mtu = ip6_skb_dst_mtu(skb);  	else @@ -150,7 +153,7 @@ static int __xfrm6_output(struct sk_buff *skb)  	if (skb->len > mtu && xfrm6_local_dontfrag(skb)) {  		xfrm6_local_rxpmtu(skb, mtu);  		return -EMSGSIZE; -	} else if (!skb->local_df && skb->len > mtu && skb->sk) { +	} else if (!skb->ignore_df && skb->len > mtu && skb->sk) {  		xfrm_local_error(skb, mtu);  		return -EMSGSIZE;  	} @@ -163,8 +166,9 @@ static int __xfrm6_output(struct sk_buff *skb)  	return x->outer_mode->afinfo->output_finish(skb);  } -int xfrm6_output(struct sk_buff *skb) +int xfrm6_output(struct sock *sk, struct sk_buff *skb)  { -	return NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, -		       skb_dst(skb)->dev, __xfrm6_output); +	return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, +			    NULL, skb_dst(skb)->dev, __xfrm6_output, +			    !(IP6CB(skb)->flags & IP6SKB_REROUTED));  } diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 23ed03d786c..2a0bbda2c76 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -135,9 +135,14 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)  	struct ipv6_opt_hdr *exthdr;  	const unsigned char *nh = skb_network_header(skb);  	u8 nexthdr = nh[IP6CB(skb)->nhoff]; +	int oif = 0; + +	if (skb_dst(skb)) +		oif = skb_dst(skb)->dev->ifindex;  	memset(fl6, 0, sizeof(struct flowi6));  	fl6->flowi6_mark = skb->mark; +	fl6->flowi6_oif = reverse ? skb->skb_iif : oif;  	fl6->daddr = reverse ? hdr->saddr : hdr->daddr;  	fl6->saddr = reverse ? hdr->daddr : hdr->saddr; @@ -284,7 +289,7 @@ static struct dst_ops xfrm6_dst_ops = {  	.destroy =		xfrm6_dst_destroy,  	.ifdown =		xfrm6_dst_ifdown,  	.local_out =		__ip6_local_out, -	.gc_thresh =		1024, +	.gc_thresh =		32768,  };  static struct xfrm_policy_afinfo xfrm6_policy_afinfo = { @@ -384,11 +389,17 @@ int __init xfrm6_init(void)  	if (ret)  		goto out_policy; +	ret = xfrm6_protocol_init(); +	if (ret) +		goto out_state; +  #ifdef CONFIG_SYSCTL  	register_pernet_subsys(&xfrm6_net_ops);  #endif  out:  	return ret; +out_state: +	xfrm6_state_fini();  out_policy:  	xfrm6_policy_fini();  	goto out; @@ -399,6 +410,7 @@ void xfrm6_fini(void)  #ifdef CONFIG_SYSCTL  	unregister_pernet_subsys(&xfrm6_net_ops);  #endif +	xfrm6_protocol_fini();  	xfrm6_policy_fini();  	xfrm6_state_fini();  	dst_entries_destroy(&xfrm6_dst_ops); diff --git a/net/ipv6/xfrm6_protocol.c b/net/ipv6/xfrm6_protocol.c new file mode 100644 index 00000000000..54d13f8dbba --- /dev/null +++ b/net/ipv6/xfrm6_protocol.c @@ -0,0 +1,279 @@ +/* xfrm6_protocol.c - Generic xfrm protocol multiplexer for ipv6. + * + * Copyright (C) 2013 secunet Security Networks AG + * + * Author: + * Steffen Klassert <steffen.klassert@secunet.com> + * + * Based on: + * net/ipv4/xfrm4_protocol.c + * + *	This program is free software; you can redistribute it and/or + *	modify it under the terms of the GNU General Public License + *	as published by the Free Software Foundation; either version + *	2 of the License, or (at your option) any later version. + */ + +#include <linux/init.h> +#include <linux/mutex.h> +#include <linux/skbuff.h> +#include <linux/icmpv6.h> +#include <net/ipv6.h> +#include <net/protocol.h> +#include <net/xfrm.h> + +static struct xfrm6_protocol __rcu *esp6_handlers __read_mostly; +static struct xfrm6_protocol __rcu *ah6_handlers __read_mostly; +static struct xfrm6_protocol __rcu *ipcomp6_handlers __read_mostly; +static DEFINE_MUTEX(xfrm6_protocol_mutex); + +static inline struct xfrm6_protocol __rcu **proto_handlers(u8 protocol) +{ +	switch (protocol) { +	case IPPROTO_ESP: +		return &esp6_handlers; +	case IPPROTO_AH: +		return &ah6_handlers; +	case IPPROTO_COMP: +		return &ipcomp6_handlers; +	} + +	return NULL; +} + +#define for_each_protocol_rcu(head, handler)		\ +	for (handler = rcu_dereference(head);		\ +	     handler != NULL;				\ +	     handler = rcu_dereference(handler->next))	\ + +int xfrm6_rcv_cb(struct sk_buff *skb, u8 protocol, int err) +{ +	int ret; +	struct xfrm6_protocol *handler; +	struct xfrm6_protocol __rcu **head = proto_handlers(protocol); + +	if (!head) +		return 0; + +	for_each_protocol_rcu(*proto_handlers(protocol), handler) +		if ((ret = handler->cb_handler(skb, err)) <= 0) +			return ret; + +	return 0; +} +EXPORT_SYMBOL(xfrm6_rcv_cb); + +static int xfrm6_esp_rcv(struct sk_buff *skb) +{ +	int ret; +	struct xfrm6_protocol *handler; + +	XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL; + +	for_each_protocol_rcu(esp6_handlers, handler) +		if ((ret = handler->handler(skb)) != -EINVAL) +			return ret; + +	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); + +	kfree_skb(skb); +	return 0; +} + +static void xfrm6_esp_err(struct sk_buff *skb, struct inet6_skb_parm *opt, +			  u8 type, u8 code, int offset, __be32 info) +{ +	struct xfrm6_protocol *handler; + +	for_each_protocol_rcu(esp6_handlers, handler) +		if (!handler->err_handler(skb, opt, type, code, offset, info)) +			break; +} + +static int xfrm6_ah_rcv(struct sk_buff *skb) +{ +	int ret; +	struct xfrm6_protocol *handler; + +	XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL; + +	for_each_protocol_rcu(ah6_handlers, handler) +		if ((ret = handler->handler(skb)) != -EINVAL) +			return ret; + +	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); + +	kfree_skb(skb); +	return 0; +} + +static void xfrm6_ah_err(struct sk_buff *skb, struct inet6_skb_parm *opt, +			 u8 type, u8 code, int offset, __be32 info) +{ +	struct xfrm6_protocol *handler; + +	for_each_protocol_rcu(ah6_handlers, handler) +		if (!handler->err_handler(skb, opt, type, code, offset, info)) +			break; +} + +static int xfrm6_ipcomp_rcv(struct sk_buff *skb) +{ +	int ret; +	struct xfrm6_protocol *handler; + +	XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL; + +	for_each_protocol_rcu(ipcomp6_handlers, handler) +		if ((ret = handler->handler(skb)) != -EINVAL) +			return ret; + +	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); + +	kfree_skb(skb); +	return 0; +} + +static void xfrm6_ipcomp_err(struct sk_buff *skb, struct inet6_skb_parm *opt, +			     u8 type, u8 code, int offset, __be32 info) +{ +	struct xfrm6_protocol *handler; + +	for_each_protocol_rcu(ipcomp6_handlers, handler) +		if (!handler->err_handler(skb, opt, type, code, offset, info)) +			break; +} + +static const struct inet6_protocol esp6_protocol = { +	.handler	=	xfrm6_esp_rcv, +	.err_handler	=	xfrm6_esp_err, +	.flags		=	INET6_PROTO_NOPOLICY, +}; + +static const struct inet6_protocol ah6_protocol = { +	.handler	=	xfrm6_ah_rcv, +	.err_handler	=	xfrm6_ah_err, +	.flags		=	INET6_PROTO_NOPOLICY, +}; + +static const struct inet6_protocol ipcomp6_protocol = { +	.handler	=	xfrm6_ipcomp_rcv, +	.err_handler	=	xfrm6_ipcomp_err, +	.flags		=	INET6_PROTO_NOPOLICY, +}; + +static struct xfrm_input_afinfo xfrm6_input_afinfo = { +	.family		=	AF_INET6, +	.owner		=	THIS_MODULE, +	.callback	=	xfrm6_rcv_cb, +}; + +static inline const struct inet6_protocol *netproto(unsigned char protocol) +{ +	switch (protocol) { +	case IPPROTO_ESP: +		return &esp6_protocol; +	case IPPROTO_AH: +		return &ah6_protocol; +	case IPPROTO_COMP: +		return &ipcomp6_protocol; +	} + +	return NULL; +} + +int xfrm6_protocol_register(struct xfrm6_protocol *handler, +			    unsigned char protocol) +{ +	struct xfrm6_protocol __rcu **pprev; +	struct xfrm6_protocol *t; +	bool add_netproto = false; +	int ret = -EEXIST; +	int priority = handler->priority; + +	if (!proto_handlers(protocol) || !netproto(protocol)) +		return -EINVAL; + +	mutex_lock(&xfrm6_protocol_mutex); + +	if (!rcu_dereference_protected(*proto_handlers(protocol), +				       lockdep_is_held(&xfrm6_protocol_mutex))) +		add_netproto = true; + +	for (pprev = proto_handlers(protocol); +	     (t = rcu_dereference_protected(*pprev, +			lockdep_is_held(&xfrm6_protocol_mutex))) != NULL; +	     pprev = &t->next) { +		if (t->priority < priority) +			break; +		if (t->priority == priority) +			goto err; +	} + +	handler->next = *pprev; +	rcu_assign_pointer(*pprev, handler); + +	ret = 0; + +err: +	mutex_unlock(&xfrm6_protocol_mutex); + +	if (add_netproto) { +		if (inet6_add_protocol(netproto(protocol), protocol)) { +			pr_err("%s: can't add protocol\n", __func__); +			ret = -EAGAIN; +		} +	} + +	return ret; +} +EXPORT_SYMBOL(xfrm6_protocol_register); + +int xfrm6_protocol_deregister(struct xfrm6_protocol *handler, +			      unsigned char protocol) +{ +	struct xfrm6_protocol __rcu **pprev; +	struct xfrm6_protocol *t; +	int ret = -ENOENT; + +	if (!proto_handlers(protocol) || !netproto(protocol)) +		return -EINVAL; + +	mutex_lock(&xfrm6_protocol_mutex); + +	for (pprev = proto_handlers(protocol); +	     (t = rcu_dereference_protected(*pprev, +			lockdep_is_held(&xfrm6_protocol_mutex))) != NULL; +	     pprev = &t->next) { +		if (t == handler) { +			*pprev = handler->next; +			ret = 0; +			break; +		} +	} + +	if (!rcu_dereference_protected(*proto_handlers(protocol), +				       lockdep_is_held(&xfrm6_protocol_mutex))) { +		if (inet6_del_protocol(netproto(protocol), protocol) < 0) { +			pr_err("%s: can't remove protocol\n", __func__); +			ret = -EAGAIN; +		} +	} + +	mutex_unlock(&xfrm6_protocol_mutex); + +	synchronize_net(); + +	return ret; +} +EXPORT_SYMBOL(xfrm6_protocol_deregister); + +int __init xfrm6_protocol_init(void) +{ +	return xfrm_input_register_afinfo(&xfrm6_input_afinfo); +} + +void xfrm6_protocol_fini(void) +{ +	xfrm_input_unregister_afinfo(&xfrm6_input_afinfo); +} diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index de2bcfaaf75..1c66465a42d 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -12,8 +12,7 @@   * GNU General Public License for more details.   *   * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA + * along with this program; if not, see <http://www.gnu.org/licenses/>.   *   * Authors	Mitsuru KANDA  <mk@linux-ipv6.org>   * 		YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>  | 
