diff options
Diffstat (limited to 'net/ipv4/ip_tunnel.c')
| -rw-r--r-- | net/ipv4/ip_tunnel.c | 259 | 
1 files changed, 149 insertions, 110 deletions
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index ac9fabe0300..6f9de61dce5 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -40,6 +40,7 @@  #include <linux/if_ether.h>  #include <linux/if_vlan.h>  #include <linux/rculist.h> +#include <linux/err.h>  #include <net/sock.h>  #include <net/ip.h> @@ -61,57 +62,59 @@  #include <net/ip6_route.h>  #endif -static unsigned int ip_tunnel_hash(struct ip_tunnel_net *itn, -				   __be32 key, __be32 remote) +static unsigned int ip_tunnel_hash(__be32 key, __be32 remote)  {  	return hash_32((__force u32)key ^ (__force u32)remote,  			 IP_TNL_HASH_BITS);  } -/* Often modified stats are per cpu, other are shared (netdev->stats) */ -struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev, -						struct rtnl_link_stats64 *tot) +static void __tunnel_dst_set(struct ip_tunnel_dst *idst, +			     struct dst_entry *dst)  { -	int i; - -	for_each_possible_cpu(i) { -		const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); -		u64 rx_packets, rx_bytes, tx_packets, tx_bytes; -		unsigned int start; - -		do { -			start = u64_stats_fetch_begin_bh(&tstats->syncp); -			rx_packets = tstats->rx_packets; -			tx_packets = tstats->tx_packets; -			rx_bytes = tstats->rx_bytes; -			tx_bytes = tstats->tx_bytes; -		} while (u64_stats_fetch_retry_bh(&tstats->syncp, start)); +	struct dst_entry *old_dst; -		tot->rx_packets += rx_packets; -		tot->tx_packets += tx_packets; -		tot->rx_bytes   += rx_bytes; -		tot->tx_bytes   += tx_bytes; -	} +	dst_clone(dst); +	old_dst = xchg((__force struct dst_entry **)&idst->dst, dst); +	dst_release(old_dst); +} -	tot->multicast = dev->stats.multicast; +static void tunnel_dst_set(struct ip_tunnel *t, struct dst_entry *dst) +{ +	__tunnel_dst_set(this_cpu_ptr(t->dst_cache), dst); +} -	tot->rx_crc_errors = dev->stats.rx_crc_errors; -	tot->rx_fifo_errors = dev->stats.rx_fifo_errors; -	tot->rx_length_errors = dev->stats.rx_length_errors; -	tot->rx_frame_errors = dev->stats.rx_frame_errors; -	tot->rx_errors = dev->stats.rx_errors; +static void tunnel_dst_reset(struct ip_tunnel *t) +{ +	tunnel_dst_set(t, NULL); +} -	tot->tx_fifo_errors = dev->stats.tx_fifo_errors; -	tot->tx_carrier_errors = dev->stats.tx_carrier_errors; -	tot->tx_dropped = dev->stats.tx_dropped; -	tot->tx_aborted_errors = dev->stats.tx_aborted_errors; -	tot->tx_errors = dev->stats.tx_errors; +void ip_tunnel_dst_reset_all(struct ip_tunnel *t) +{ +	int i; -	tot->collisions  = dev->stats.collisions; +	for_each_possible_cpu(i) +		__tunnel_dst_set(per_cpu_ptr(t->dst_cache, i), NULL); +} +EXPORT_SYMBOL(ip_tunnel_dst_reset_all); -	return tot; +static struct rtable *tunnel_rtable_get(struct ip_tunnel *t, u32 cookie) +{ +	struct dst_entry *dst; + +	rcu_read_lock(); +	dst = rcu_dereference(this_cpu_ptr(t->dst_cache)->dst); +	if (dst && !atomic_inc_not_zero(&dst->__refcnt)) +		dst = NULL; +	if (dst) { +		if (dst->obsolete && dst->ops->check(dst, cookie) == NULL) { +			tunnel_dst_reset(t); +			dst_release(dst); +			dst = NULL; +		} +	} +	rcu_read_unlock(); +	return (struct rtable *)dst;  } -EXPORT_SYMBOL_GPL(ip_tunnel_get_stats64);  static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,  				__be16 flags, __be32 key) @@ -146,7 +149,7 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,  	struct ip_tunnel *t, *cand = NULL;  	struct hlist_head *head; -	hash = ip_tunnel_hash(itn, key, remote); +	hash = ip_tunnel_hash(key, remote);  	head = &itn->tunnels[hash];  	hlist_for_each_entry_rcu(t, head, hash_node) { @@ -166,6 +169,7 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,  	hlist_for_each_entry_rcu(t, head, hash_node) {  		if (remote != t->parms.iph.daddr || +		    t->parms.iph.saddr != 0 ||  		    !(t->dev->flags & IFF_UP))  			continue; @@ -178,14 +182,15 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,  			cand = t;  	} -	hash = ip_tunnel_hash(itn, key, 0); +	hash = ip_tunnel_hash(key, 0);  	head = &itn->tunnels[hash];  	hlist_for_each_entry_rcu(t, head, hash_node) { -		if ((local != t->parms.iph.saddr && -		     (local != t->parms.iph.daddr || -		      !ipv4_is_multicast(local))) || -		    !(t->dev->flags & IFF_UP)) +		if ((local != t->parms.iph.saddr || t->parms.iph.daddr != 0) && +		    (local != t->parms.iph.daddr || !ipv4_is_multicast(local))) +			continue; + +		if (!(t->dev->flags & IFF_UP))  			continue;  		if (!ip_tunnel_key_match(&t->parms, flags, key)) @@ -202,6 +207,8 @@ struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,  	hlist_for_each_entry_rcu(t, head, hash_node) {  		if (t->parms.i_key != key || +		    t->parms.iph.saddr != 0 || +		    t->parms.iph.daddr != 0 ||  		    !(t->dev->flags & IFF_UP))  			continue; @@ -228,13 +235,17 @@ static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,  {  	unsigned int h;  	__be32 remote; +	__be32 i_key = parms->i_key;  	if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))  		remote = parms->iph.daddr;  	else  		remote = 0; -	h = ip_tunnel_hash(itn, parms->i_key, remote); +	if (!(parms->i_flags & TUNNEL_KEY) && (parms->i_flags & VTI_ISVTI)) +		i_key = 0; + +	h = ip_tunnel_hash(i_key, remote);  	return &itn->tunnels[h];  } @@ -257,6 +268,7 @@ static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,  	__be32 remote = parms->iph.daddr;  	__be32 local = parms->iph.saddr;  	__be32 key = parms->i_key; +	__be16 flags = parms->i_flags;  	int link = parms->link;  	struct ip_tunnel *t = NULL;  	struct hlist_head *head = ip_bucket(itn, parms); @@ -264,9 +276,9 @@ static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,  	hlist_for_each_entry_rcu(t, head, hash_node) {  		if (local == t->parms.iph.saddr &&  		    remote == t->parms.iph.daddr && -		    key == t->parms.i_key &&  		    link == t->parms.link && -		    type == t->dev->type) +		    type == t->dev->type && +		    ip_tunnel_key_match(&t->parms, flags, key))  			break;  	}  	return t; @@ -318,11 +330,10 @@ failed:  	return ERR_PTR(err);  } -static inline struct rtable *ip_route_output_tunnel(struct net *net, -						    struct flowi4 *fl4, -						    int proto, -						    __be32 daddr, __be32 saddr, -						    __be32 key, __u8 tos, int oif) +static inline void init_tunnel_flow(struct flowi4 *fl4, +				    int proto, +				    __be32 daddr, __be32 saddr, +				    __be32 key, __u8 tos, int oif)  {  	memset(fl4, 0, sizeof(*fl4));  	fl4->flowi4_oif = oif; @@ -331,7 +342,6 @@ static inline struct rtable *ip_route_output_tunnel(struct net *net,  	fl4->flowi4_tos = tos;  	fl4->flowi4_proto = proto;  	fl4->fl4_gre_key = key; -	return ip_route_output_key(net, fl4);  }  static int ip_tunnel_bind_dev(struct net_device *dev) @@ -350,14 +360,14 @@ static int ip_tunnel_bind_dev(struct net_device *dev)  		struct flowi4 fl4;  		struct rtable *rt; -		rt = ip_route_output_tunnel(tunnel->net, &fl4, -					    tunnel->parms.iph.protocol, -					    iph->daddr, iph->saddr, -					    tunnel->parms.o_key, -					    RT_TOS(iph->tos), -					    tunnel->parms.link); +		init_tunnel_flow(&fl4, iph->protocol, iph->daddr, +				 iph->saddr, tunnel->parms.o_key, +				 RT_TOS(iph->tos), tunnel->parms.link); +		rt = ip_route_output_key(tunnel->net, &fl4); +  		if (!IS_ERR(rt)) {  			tdev = rt->dst.dev; +			tunnel_dst_set(tunnel, &rt->dst);  			ip_rt_put(rt);  		}  		if (dev->type != ARPHRD_ETHER) @@ -386,14 +396,13 @@ static struct ip_tunnel *ip_tunnel_create(struct net *net,  					  struct ip_tunnel_net *itn,  					  struct ip_tunnel_parm *parms)  { -	struct ip_tunnel *nt, *fbt; +	struct ip_tunnel *nt;  	struct net_device *dev;  	BUG_ON(!itn->fb_tunnel_dev); -	fbt = netdev_priv(itn->fb_tunnel_dev);  	dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);  	if (IS_ERR(dev)) -		return NULL; +		return ERR_CAST(dev);  	dev->mtu = ip_tunnel_bind_dev(dev); @@ -405,15 +414,12 @@ static struct ip_tunnel *ip_tunnel_create(struct net *net,  int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,  		  const struct tnl_ptk_info *tpi, bool log_ecn_error)  { -	struct pcpu_tstats *tstats; +	struct pcpu_sw_netstats *tstats;  	const struct iphdr *iph = ip_hdr(skb);  	int err;  #ifdef CONFIG_NET_IPGRE_BROADCAST  	if (ipv4_is_multicast(iph->daddr)) { -		/* Looped back packet, drop it! */ -		if (rt_is_output_route(skb_rtable(skb))) -			goto drop;  		tunnel->dev->stats.multicast++;  		skb->pkt_type = PACKET_BROADCAST;  	} @@ -436,6 +442,8 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,  		tunnel->i_seqno = ntohl(tpi->seq) + 1;  	} +	skb_reset_network_header(skb); +  	err = IP_ECN_decapsulate(iph, skb);  	if (unlikely(err)) {  		if (log_ecn_error) @@ -454,6 +462,8 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,  	tstats->rx_bytes += skb->len;  	u64_stats_update_end(&tstats->syncp); +	skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev))); +  	if (tunnel->dev->type == ARPHRD_ETHER) {  		skb->protocol = eth_type_trans(skb, tunnel->dev);  		skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); @@ -461,8 +471,6 @@ int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,  		skb->dev = tunnel->dev;  	} -	skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(tunnel->dev))); -  	gro_cells_receive(&tunnel->gro_cells, skb);  	return 0; @@ -532,8 +540,10 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,  	unsigned int max_headroom;	/* The extra header space needed */  	__be32 dst;  	int err; +	bool connected;  	inner_iph = (const struct iphdr *)skb_inner_network_header(skb); +	connected = (tunnel->parms.iph.daddr != 0);  	dst = tnl_params->daddr;  	if (dst == 0) { @@ -581,27 +591,38 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,  #endif  		else  			goto tx_error; + +		connected = false;  	}  	tos = tnl_params->tos;  	if (tos & 0x1) {  		tos &= ~0x1; -		if (skb->protocol == htons(ETH_P_IP)) +		if (skb->protocol == htons(ETH_P_IP)) {  			tos = inner_iph->tos; -		else if (skb->protocol == htons(ETH_P_IPV6)) +			connected = false; +		} else if (skb->protocol == htons(ETH_P_IPV6)) {  			tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph); +			connected = false; +		}  	} -	rt = ip_route_output_tunnel(tunnel->net, &fl4, -				    protocol, -				    dst, tnl_params->saddr, -				    tunnel->parms.o_key, -				    RT_TOS(tos), -				    tunnel->parms.link); -	if (IS_ERR(rt)) { -		dev->stats.tx_carrier_errors++; -		goto tx_error; +	init_tunnel_flow(&fl4, protocol, dst, tnl_params->saddr, +			 tunnel->parms.o_key, RT_TOS(tos), tunnel->parms.link); + +	rt = connected ? tunnel_rtable_get(tunnel, 0) : NULL; + +	if (!rt) { +		rt = ip_route_output_key(tunnel->net, &fl4); + +		if (IS_ERR(rt)) { +			dev->stats.tx_carrier_errors++; +			goto tx_error; +		} +		if (connected) +			tunnel_dst_set(tunnel, &rt->dst);  	} +  	if (rt->dst.dev == dev) {  		ip_rt_put(rt);  		dev->stats.collisions++; @@ -618,11 +639,13 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,  				tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {  			tunnel->err_count--; +			memset(IPCB(skb), 0, sizeof(*IPCB(skb)));  			dst_link_failure(skb);  		} else  			tunnel->err_count = 0;  	} +	tos = ip_tunnel_ecn_encap(tos, inner_iph, skb);  	ttl = tnl_params->ttl;  	if (ttl == 0) {  		if (skb->protocol == htons(ETH_P_IP)) @@ -641,18 +664,18 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,  	max_headroom = LL_RESERVED_SPACE(rt->dst.dev) + sizeof(struct iphdr)  			+ rt->dst.header_len; -	if (max_headroom > dev->needed_headroom) { +	if (max_headroom > dev->needed_headroom)  		dev->needed_headroom = max_headroom; -		if (skb_cow_head(skb, dev->needed_headroom)) { -			dev->stats.tx_dropped++; -			dev_kfree_skb(skb); -			return; -		} + +	if (skb_cow_head(skb, dev->needed_headroom)) { +		ip_rt_put(rt); +		dev->stats.tx_dropped++; +		kfree_skb(skb); +		return;  	} -	err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, protocol, -			    ip_tunnel_ecn_encap(tos, inner_iph, skb), ttl, df, -			    !net_eq(tunnel->net, dev_net(dev))); +	err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr, protocol, +			    tos, ttl, df, !net_eq(tunnel->net, dev_net(dev)));  	iptunnel_xmit_stats(err, &dev->stats, dev->tstats);  	return; @@ -663,7 +686,7 @@ tx_error_icmp:  #endif  tx_error:  	dev->stats.tx_errors++; -	dev_kfree_skb(skb); +	kfree_skb(skb);  }  EXPORT_SYMBOL_GPL(ip_tunnel_xmit); @@ -696,25 +719,25 @@ static void ip_tunnel_update(struct ip_tunnel_net *itn,  		if (set_mtu)  			dev->mtu = mtu;  	} +	ip_tunnel_dst_reset_all(t);  	netdev_state_change(dev);  }  int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)  {  	int err = 0; -	struct ip_tunnel *t; -	struct net *net = dev_net(dev); -	struct ip_tunnel *tunnel = netdev_priv(dev); -	struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id); +	struct ip_tunnel *t = netdev_priv(dev); +	struct net *net = t->net; +	struct ip_tunnel_net *itn = net_generic(net, t->ip_tnl_net_id);  	BUG_ON(!itn->fb_tunnel_dev);  	switch (cmd) {  	case SIOCGETTUNNEL: -		t = NULL; -		if (dev == itn->fb_tunnel_dev) +		if (dev == itn->fb_tunnel_dev) {  			t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); -		if (t == NULL) -			t = netdev_priv(dev); +			if (t == NULL) +				t = netdev_priv(dev); +		}  		memcpy(p, &t->parms, sizeof(*p));  		break; @@ -725,16 +748,20 @@ int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)  			goto done;  		if (p->iph.ttl)  			p->iph.frag_off |= htons(IP_DF); -		if (!(p->i_flags&TUNNEL_KEY)) -			p->i_key = 0; -		if (!(p->o_flags&TUNNEL_KEY)) -			p->o_key = 0; +		if (!(p->i_flags & VTI_ISVTI)) { +			if (!(p->i_flags & TUNNEL_KEY)) +				p->i_key = 0; +			if (!(p->o_flags & TUNNEL_KEY)) +				p->o_key = 0; +		}  		t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type); -		if (!t && (cmd == SIOCADDTUNNEL)) +		if (!t && (cmd == SIOCADDTUNNEL)) {  			t = ip_tunnel_create(net, itn, p); - +			err = PTR_ERR_OR_ZERO(t); +			break; +		}  		if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {  			if (t != NULL) {  				if (t->dev != dev) { @@ -761,8 +788,9 @@ int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)  		if (t) {  			err = 0;  			ip_tunnel_update(itn, t, dev, p, true); -		} else -			err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); +		} else { +			err = -ENOENT; +		}  		break;  	case SIOCDELTUNNEL: @@ -811,6 +839,7 @@ static void ip_tunnel_dev_free(struct net_device *dev)  	struct ip_tunnel *tunnel = netdev_priv(dev);  	gro_cells_destroy(&tunnel->gro_cells); +	free_percpu(tunnel->dst_cache);  	free_percpu(dev->tstats);  	free_netdev(dev);  } @@ -853,11 +882,14 @@ int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,  	/* FB netdevice is special: we have one, and only one per netns.  	 * Allowing to move it to another netns is clearly unsafe.  	 */ -	if (!IS_ERR(itn->fb_tunnel_dev)) +	if (!IS_ERR(itn->fb_tunnel_dev)) {  		itn->fb_tunnel_dev->features |= NETIF_F_NETNS_LOCAL; +		itn->fb_tunnel_dev->mtu = ip_tunnel_bind_dev(itn->fb_tunnel_dev); +		ip_tunnel_add(itn, netdev_priv(itn->fb_tunnel_dev)); +	}  	rtnl_unlock(); -	return PTR_RET(itn->fb_tunnel_dev); +	return PTR_ERR_OR_ZERO(itn->fb_tunnel_dev);  }  EXPORT_SYMBOL_GPL(ip_tunnel_init_net); @@ -884,8 +916,6 @@ static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head,  			if (!net_eq(dev_net(t->dev), net))  				unregister_netdevice_queue(t->dev, head);  	} -	if (itn->fb_tunnel_dev) -		unregister_netdevice_queue(itn->fb_tunnel_dev, head);  }  void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops) @@ -979,12 +1009,19 @@ int ip_tunnel_init(struct net_device *dev)  	int err;  	dev->destructor	= ip_tunnel_dev_free; -	dev->tstats = alloc_percpu(struct pcpu_tstats); +	dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats);  	if (!dev->tstats)  		return -ENOMEM; +	tunnel->dst_cache = alloc_percpu(struct ip_tunnel_dst); +	if (!tunnel->dst_cache) { +		free_percpu(dev->tstats); +		return -ENOMEM; +	} +  	err = gro_cells_init(&tunnel->gro_cells, dev);  	if (err) { +		free_percpu(tunnel->dst_cache);  		free_percpu(dev->tstats);  		return err;  	} @@ -1009,6 +1046,8 @@ void ip_tunnel_uninit(struct net_device *dev)  	/* fb_tunnel_dev will be unregisted in net-exit call. */  	if (itn->fb_tunnel_dev != dev)  		ip_tunnel_del(netdev_priv(dev)); + +	ip_tunnel_dst_reset_all(tunnel);  }  EXPORT_SYMBOL_GPL(ip_tunnel_uninit);  | 
