diff options
Diffstat (limited to 'net/ipv4/ipip.c')
| -rw-r--r-- | net/ipv4/ipip.c | 864 | 
1 files changed, 219 insertions, 645 deletions
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 988f52fba54..62eaa005e14 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -111,206 +111,20 @@  #include <net/sock.h>  #include <net/ip.h>  #include <net/icmp.h> -#include <net/ipip.h> +#include <net/ip_tunnels.h>  #include <net/inet_ecn.h>  #include <net/xfrm.h>  #include <net/net_namespace.h>  #include <net/netns/generic.h> -#define HASH_SIZE  16 -#define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF) +static bool log_ecn_error = true; +module_param(log_ecn_error, bool, 0644); +MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN");  static int ipip_net_id __read_mostly; -struct ipip_net { -	struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE]; -	struct ip_tunnel __rcu *tunnels_r[HASH_SIZE]; -	struct ip_tunnel __rcu *tunnels_l[HASH_SIZE]; -	struct ip_tunnel __rcu *tunnels_wc[1]; -	struct ip_tunnel __rcu **tunnels[4]; - -	struct net_device *fb_tunnel_dev; -};  static int ipip_tunnel_init(struct net_device *dev); -static void ipip_tunnel_setup(struct net_device *dev); -static void ipip_dev_free(struct net_device *dev); - -/* - * Locking : hash tables are protected by RCU and RTNL - */ - -#define for_each_ip_tunnel_rcu(start) \ -	for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) - -/* often modified stats are per cpu, other are shared (netdev->stats) */ -struct pcpu_tstats { -	unsigned long	rx_packets; -	unsigned long	rx_bytes; -	unsigned long	tx_packets; -	unsigned long	tx_bytes; -}; - -static struct net_device_stats *ipip_get_stats(struct net_device *dev) -{ -	struct pcpu_tstats sum = { 0 }; -	int i; - -	for_each_possible_cpu(i) { -		const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); - -		sum.rx_packets += tstats->rx_packets; -		sum.rx_bytes   += tstats->rx_bytes; -		sum.tx_packets += tstats->tx_packets; -		sum.tx_bytes   += tstats->tx_bytes; -	} -	dev->stats.rx_packets = sum.rx_packets; -	dev->stats.rx_bytes   = sum.rx_bytes; -	dev->stats.tx_packets = sum.tx_packets; -	dev->stats.tx_bytes   = sum.tx_bytes; -	return &dev->stats; -} - -static struct ip_tunnel * ipip_tunnel_lookup(struct net *net, -		__be32 remote, __be32 local) -{ -	unsigned int h0 = HASH(remote); -	unsigned int h1 = HASH(local); -	struct ip_tunnel *t; -	struct ipip_net *ipn = net_generic(net, ipip_net_id); - -	for_each_ip_tunnel_rcu(ipn->tunnels_r_l[h0 ^ h1]) -		if (local == t->parms.iph.saddr && -		    remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP)) -			return t; - -	for_each_ip_tunnel_rcu(ipn->tunnels_r[h0]) -		if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP)) -			return t; - -	for_each_ip_tunnel_rcu(ipn->tunnels_l[h1]) -		if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP)) -			return t; - -	t = rcu_dereference(ipn->tunnels_wc[0]); -	if (t && (t->dev->flags&IFF_UP)) -		return t; -	return NULL; -} - -static struct ip_tunnel __rcu **__ipip_bucket(struct ipip_net *ipn, -		struct ip_tunnel_parm *parms) -{ -	__be32 remote = parms->iph.daddr; -	__be32 local = parms->iph.saddr; -	unsigned int h = 0; -	int prio = 0; - -	if (remote) { -		prio |= 2; -		h ^= HASH(remote); -	} -	if (local) { -		prio |= 1; -		h ^= HASH(local); -	} -	return &ipn->tunnels[prio][h]; -} - -static inline struct ip_tunnel __rcu **ipip_bucket(struct ipip_net *ipn, -		struct ip_tunnel *t) -{ -	return __ipip_bucket(ipn, &t->parms); -} - -static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t) -{ -	struct ip_tunnel __rcu **tp; -	struct ip_tunnel *iter; - -	for (tp = ipip_bucket(ipn, t); -	     (iter = rtnl_dereference(*tp)) != NULL; -	     tp = &iter->next) { -		if (t == iter) { -			rcu_assign_pointer(*tp, t->next); -			break; -		} -	} -} - -static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t) -{ -	struct ip_tunnel __rcu **tp = ipip_bucket(ipn, t); - -	rcu_assign_pointer(t->next, rtnl_dereference(*tp)); -	rcu_assign_pointer(*tp, t); -} - -static struct ip_tunnel * ipip_tunnel_locate(struct net *net, -		struct ip_tunnel_parm *parms, int create) -{ -	__be32 remote = parms->iph.daddr; -	__be32 local = parms->iph.saddr; -	struct ip_tunnel *t, *nt; -	struct ip_tunnel __rcu **tp; -	struct net_device *dev; -	char name[IFNAMSIZ]; -	struct ipip_net *ipn = net_generic(net, ipip_net_id); - -	for (tp = __ipip_bucket(ipn, parms); -		 (t = rtnl_dereference(*tp)) != NULL; -		 tp = &t->next) { -		if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr) -			return t; -	} -	if (!create) -		return NULL; - -	if (parms->name[0]) -		strlcpy(name, parms->name, IFNAMSIZ); -	else -		strcpy(name, "tunl%d"); - -	dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup); -	if (dev == NULL) -		return NULL; - -	dev_net_set(dev, net); - -	if (strchr(name, '%')) { -		if (dev_alloc_name(dev, name) < 0) -			goto failed_free; -	} - -	nt = netdev_priv(dev); -	nt->parms = *parms; - -	if (ipip_tunnel_init(dev) < 0) -		goto failed_free; - -	if (register_netdevice(dev) < 0) -		goto failed_free; - -	dev_hold(dev); -	ipip_tunnel_link(ipn, nt); -	return nt; - -failed_free: -	ipip_dev_free(dev); -	return NULL; -} - -/* called with RTNL */ -static void ipip_tunnel_uninit(struct net_device *dev) -{ -	struct net *net = dev_net(dev); -	struct ipip_net *ipn = net_generic(net, ipip_net_id); - -	if (dev == ipn->fb_tunnel_dev) -		rcu_assign_pointer(ipn->tunnels_wc[0], NULL); -	else -		ipip_tunnel_unlink(ipn, netdev_priv(dev)); -	dev_put(dev); -} +static struct rtnl_link_ops ipip_link_ops __read_mostly;  static int ipip_err(struct sk_buff *skb, u32 info)  { @@ -319,45 +133,35 @@ static int ipip_err(struct sk_buff *skb, u32 info)     8 bytes of packet payload. It means, that precise relaying of     ICMP in the real Internet is absolutely infeasible.   */ -	struct iphdr *iph = (struct iphdr *)skb->data; -	const int type = icmp_hdr(skb)->type; -	const int code = icmp_hdr(skb)->code; +	struct net *net = dev_net(skb->dev); +	struct ip_tunnel_net *itn = net_generic(net, ipip_net_id); +	const struct iphdr *iph = (const struct iphdr *)skb->data;  	struct ip_tunnel *t;  	int err; +	const int type = icmp_hdr(skb)->type; +	const int code = icmp_hdr(skb)->code; -	switch (type) { -	default: -	case ICMP_PARAMETERPROB: -		return 0; - -	case ICMP_DEST_UNREACH: -		switch (code) { -		case ICMP_SR_FAILED: -		case ICMP_PORT_UNREACH: -			/* Impossible event. */ -			return 0; -		case ICMP_FRAG_NEEDED: -			/* Soft state for pmtu is maintained by IP core. */ -			return 0; -		default: -			/* All others are translated to HOST_UNREACH. -			   rfc2003 contains "deep thoughts" about NET_UNREACH, -			   I believe they are just ether pollution. --ANK -			 */ -			break; -		} -		break; -	case ICMP_TIME_EXCEEDED: -		if (code != ICMP_EXC_TTL) -			return 0; -		break; +	err = -ENOENT; +	t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, +			     iph->daddr, iph->saddr, 0); +	if (t == NULL) +		goto out; + +	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { +		ipv4_update_pmtu(skb, dev_net(skb->dev), info, +				 t->parms.link, 0, IPPROTO_IPIP, 0); +		err = 0; +		goto out;  	} -	err = -ENOENT; +	if (type == ICMP_REDIRECT) { +		ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0, +			      IPPROTO_IPIP, 0); +		err = 0; +		goto out; +	} -	rcu_read_lock(); -	t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr); -	if (t == NULL || t->parms.iph.daddr == 0) +	if (t->parms.iph.daddr == 0)  		goto out;  	err = 0; @@ -369,543 +173,312 @@ static int ipip_err(struct sk_buff *skb, u32 info)  	else  		t->err_count = 1;  	t->err_time = jiffies; +  out: -	rcu_read_unlock();  	return err;  } -static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph, -					struct sk_buff *skb) -{ -	struct iphdr *inner_iph = ip_hdr(skb); - -	if (INET_ECN_is_ce(outer_iph->tos)) -		IP_ECN_set_ce(inner_iph); -} +static const struct tnl_ptk_info tpi = { +	/* no tunnel info required for ipip. */ +	.proto = htons(ETH_P_IP), +};  static int ipip_rcv(struct sk_buff *skb)  { +	struct net *net = dev_net(skb->dev); +	struct ip_tunnel_net *itn = net_generic(net, ipip_net_id);  	struct ip_tunnel *tunnel; -	const struct iphdr *iph = ip_hdr(skb); - -	rcu_read_lock(); -	tunnel = ipip_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr); -	if (tunnel != NULL) { -		struct pcpu_tstats *tstats; - -		if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { -			rcu_read_unlock(); -			kfree_skb(skb); -			return 0; -		} - -		secpath_reset(skb); - -		skb->mac_header = skb->network_header; -		skb_reset_network_header(skb); -		skb->protocol = htons(ETH_P_IP); -		skb->pkt_type = PACKET_HOST; - -		tstats = this_cpu_ptr(tunnel->dev->tstats); -		tstats->rx_packets++; -		tstats->rx_bytes += skb->len; - -		__skb_tunnel_rx(skb, tunnel->dev); - -		ipip_ecn_decapsulate(iph, skb); - -		netif_rx(skb); - -		rcu_read_unlock(); -		return 0; +	const struct iphdr *iph; + +	iph = ip_hdr(skb); +	tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, +			iph->saddr, iph->daddr, 0); +	if (tunnel) { +		if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) +			goto drop; +		if (iptunnel_pull_header(skb, 0, tpi.proto)) +			goto drop; +		return ip_tunnel_rcv(tunnel, skb, &tpi, log_ecn_error);  	} -	rcu_read_unlock();  	return -1; + +drop: +	kfree_skb(skb); +	return 0;  }  /*   *	This function assumes it is being called from dev_queue_xmit()   *	and that skb is filled properly by that function.   */ -  static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)  {  	struct ip_tunnel *tunnel = netdev_priv(dev); -	struct pcpu_tstats *tstats; -	struct iphdr  *tiph = &tunnel->parms.iph; -	u8     tos = tunnel->parms.iph.tos; -	__be16 df = tiph->frag_off; -	struct rtable *rt;     			/* Route to the other host */ -	struct net_device *tdev;		/* Device to other host */ -	struct iphdr  *old_iph = ip_hdr(skb); -	struct iphdr  *iph;			/* Our new IP header */ -	unsigned int max_headroom;		/* The extra header space needed */ -	__be32 dst = tiph->daddr; -	int    mtu; - -	if (skb->protocol != htons(ETH_P_IP)) -		goto tx_error; - -	if (tos & 1) -		tos = old_iph->tos; - -	if (!dst) { -		/* NBMA tunnel */ -		if ((rt = skb_rtable(skb)) == NULL) { -			dev->stats.tx_fifo_errors++; -			goto tx_error; -		} -		if ((dst = rt->rt_gateway) == 0) -			goto tx_error_icmp; -	} +	const struct iphdr  *tiph = &tunnel->parms.iph; -	{ -		struct flowi fl = { -			.oif = tunnel->parms.link, -			.fl4_dst = dst, -			.fl4_src= tiph->saddr, -			.fl4_tos = RT_TOS(tos), -			.proto = IPPROTO_IPIP -		}; - -		if (ip_route_output_key(dev_net(dev), &rt, &fl)) { -			dev->stats.tx_carrier_errors++; -			goto tx_error_icmp; -		} -	} -	tdev = rt->dst.dev; - -	if (tdev == dev) { -		ip_rt_put(rt); -		dev->stats.collisions++; +	if (unlikely(skb->protocol != htons(ETH_P_IP)))  		goto tx_error; -	} - -	df |= old_iph->frag_off & htons(IP_DF); - -	if (df) { -		mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr); - -		if (mtu < 68) { -			dev->stats.collisions++; -			ip_rt_put(rt); -			goto tx_error; -		} -		if (skb_dst(skb)) -			skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); - -		if ((old_iph->frag_off & htons(IP_DF)) && -		    mtu < ntohs(old_iph->tot_len)) { -			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, -				  htonl(mtu)); -			ip_rt_put(rt); -			goto tx_error; -		} -	} - -	if (tunnel->err_count > 0) { -		if (time_before(jiffies, -				tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { -			tunnel->err_count--; -			dst_link_failure(skb); -		} else -			tunnel->err_count = 0; -	} - -	/* -	 * Okay, now see if we can stuff it in the buffer as-is. -	 */ -	max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr)); - -	if (skb_headroom(skb) < max_headroom || skb_shared(skb) || -	    (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { -		struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); -		if (!new_skb) { -			ip_rt_put(rt); -			dev->stats.tx_dropped++; -			dev_kfree_skb(skb); -			return NETDEV_TX_OK; -		} -		if (skb->sk) -			skb_set_owner_w(new_skb, skb->sk); -		dev_kfree_skb(skb); -		skb = new_skb; -		old_iph = ip_hdr(skb); -	} +	skb = iptunnel_handle_offloads(skb, false, SKB_GSO_IPIP); +	if (IS_ERR(skb)) +		goto out; -	skb->transport_header = skb->network_header; -	skb_push(skb, sizeof(struct iphdr)); -	skb_reset_network_header(skb); -	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); -	IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | -			      IPSKB_REROUTED); -	skb_dst_drop(skb); -	skb_dst_set(skb, &rt->dst); - -	/* -	 *	Push down and install the IPIP header. -	 */ - -	iph 			=	ip_hdr(skb); -	iph->version		=	4; -	iph->ihl		=	sizeof(struct iphdr)>>2; -	iph->frag_off		=	df; -	iph->protocol		=	IPPROTO_IPIP; -	iph->tos		=	INET_ECN_encapsulate(tos, old_iph->tos); -	iph->daddr		=	rt->rt_dst; -	iph->saddr		=	rt->rt_src; - -	if ((iph->ttl = tiph->ttl) == 0) -		iph->ttl	=	old_iph->ttl; - -	nf_reset(skb); -	tstats = this_cpu_ptr(dev->tstats); -	__IPTUNNEL_XMIT(tstats, &dev->stats); +	ip_tunnel_xmit(skb, dev, tiph, tiph->protocol);  	return NETDEV_TX_OK; -tx_error_icmp: -	dst_link_failure(skb);  tx_error: +	kfree_skb(skb); +out:  	dev->stats.tx_errors++; -	dev_kfree_skb(skb);  	return NETDEV_TX_OK;  } -static void ipip_tunnel_bind_dev(struct net_device *dev) -{ -	struct net_device *tdev = NULL; -	struct ip_tunnel *tunnel; -	struct iphdr *iph; - -	tunnel = netdev_priv(dev); -	iph = &tunnel->parms.iph; - -	if (iph->daddr) { -		struct flowi fl = { -			.oif = tunnel->parms.link, -			.fl4_dst = iph->daddr, -			.fl4_src = iph->saddr, -			.fl4_tos = RT_TOS(iph->tos), -			.proto = IPPROTO_IPIP -		}; -		struct rtable *rt; - -		if (!ip_route_output_key(dev_net(dev), &rt, &fl)) { -			tdev = rt->dst.dev; -			ip_rt_put(rt); -		} -		dev->flags |= IFF_POINTOPOINT; -	} - -	if (!tdev && tunnel->parms.link) -		tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link); - -	if (tdev) { -		dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr); -		dev->mtu = tdev->mtu - sizeof(struct iphdr); -	} -	dev->iflink = tunnel->parms.link; -} -  static int -ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd) +ipip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)  {  	int err = 0;  	struct ip_tunnel_parm p; -	struct ip_tunnel *t; -	struct net *net = dev_net(dev); -	struct ipip_net *ipn = net_generic(net, ipip_net_id); - -	switch (cmd) { -	case SIOCGETTUNNEL: -		t = NULL; -		if (dev == ipn->fb_tunnel_dev) { -			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { -				err = -EFAULT; -				break; -			} -			t = ipip_tunnel_locate(net, &p, 0); -		} -		if (t == NULL) -			t = netdev_priv(dev); -		memcpy(&p, &t->parms, sizeof(p)); -		if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) -			err = -EFAULT; -		break; - -	case SIOCADDTUNNEL: -	case SIOCCHGTUNNEL: -		err = -EPERM; -		if (!capable(CAP_NET_ADMIN)) -			goto done; - -		err = -EFAULT; -		if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) -			goto done; - -		err = -EINVAL; + +	if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) +		return -EFAULT; + +	if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {  		if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||  		    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF))) -			goto done; -		if (p.iph.ttl) -			p.iph.frag_off |= htons(IP_DF); - -		t = ipip_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL); - -		if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { -			if (t != NULL) { -				if (t->dev != dev) { -					err = -EEXIST; -					break; -				} -			} else { -				if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) || -				    (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) { -					err = -EINVAL; -					break; -				} -				t = netdev_priv(dev); -				ipip_tunnel_unlink(ipn, t); -				synchronize_net(); -				t->parms.iph.saddr = p.iph.saddr; -				t->parms.iph.daddr = p.iph.daddr; -				memcpy(dev->dev_addr, &p.iph.saddr, 4); -				memcpy(dev->broadcast, &p.iph.daddr, 4); -				ipip_tunnel_link(ipn, t); -				netdev_state_change(dev); -			} -		} - -		if (t) { -			err = 0; -			if (cmd == SIOCCHGTUNNEL) { -				t->parms.iph.ttl = p.iph.ttl; -				t->parms.iph.tos = p.iph.tos; -				t->parms.iph.frag_off = p.iph.frag_off; -				if (t->parms.link != p.link) { -					t->parms.link = p.link; -					ipip_tunnel_bind_dev(dev); -					netdev_state_change(dev); -				} -			} -			if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p))) -				err = -EFAULT; -		} else -			err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); -		break; - -	case SIOCDELTUNNEL: -		err = -EPERM; -		if (!capable(CAP_NET_ADMIN)) -			goto done; - -		if (dev == ipn->fb_tunnel_dev) { -			err = -EFAULT; -			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) -				goto done; -			err = -ENOENT; -			if ((t = ipip_tunnel_locate(net, &p, 0)) == NULL) -				goto done; -			err = -EPERM; -			if (t->dev == ipn->fb_tunnel_dev) -				goto done; -			dev = t->dev; -		} -		unregister_netdevice(dev); -		err = 0; -		break; - -	default: -		err = -EINVAL; +			return -EINVAL;  	} -done: -	return err; -} +	p.i_key = p.o_key = p.i_flags = p.o_flags = 0; +	if (p.iph.ttl) +		p.iph.frag_off |= htons(IP_DF); + +	err = ip_tunnel_ioctl(dev, &p, cmd); +	if (err) +		return err; + +	if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) +		return -EFAULT; -static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu) -{ -	if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr)) -		return -EINVAL; -	dev->mtu = new_mtu;  	return 0;  }  static const struct net_device_ops ipip_netdev_ops = { -	.ndo_uninit	= ipip_tunnel_uninit, +	.ndo_init       = ipip_tunnel_init, +	.ndo_uninit     = ip_tunnel_uninit,  	.ndo_start_xmit	= ipip_tunnel_xmit,  	.ndo_do_ioctl	= ipip_tunnel_ioctl, -	.ndo_change_mtu	= ipip_tunnel_change_mtu, -	.ndo_get_stats  = ipip_get_stats, +	.ndo_change_mtu = ip_tunnel_change_mtu, +	.ndo_get_stats64 = ip_tunnel_get_stats64,  }; -static void ipip_dev_free(struct net_device *dev) -{ -	free_percpu(dev->tstats); -	free_netdev(dev); -} +#define IPIP_FEATURES (NETIF_F_SG |		\ +		       NETIF_F_FRAGLIST |	\ +		       NETIF_F_HIGHDMA |	\ +		       NETIF_F_GSO_SOFTWARE |	\ +		       NETIF_F_HW_CSUM)  static void ipip_tunnel_setup(struct net_device *dev)  {  	dev->netdev_ops		= &ipip_netdev_ops; -	dev->destructor		= ipip_dev_free;  	dev->type		= ARPHRD_TUNNEL; -	dev->hard_header_len 	= LL_MAX_HEADER + sizeof(struct iphdr); -	dev->mtu		= ETH_DATA_LEN - sizeof(struct iphdr);  	dev->flags		= IFF_NOARP;  	dev->iflink		= 0;  	dev->addr_len		= 4; -	dev->features		|= NETIF_F_NETNS_LOCAL;  	dev->features		|= NETIF_F_LLTX;  	dev->priv_flags		&= ~IFF_XMIT_DST_RELEASE; + +	dev->features		|= IPIP_FEATURES; +	dev->hw_features	|= IPIP_FEATURES; +	ip_tunnel_setup(dev, ipip_net_id);  }  static int ipip_tunnel_init(struct net_device *dev)  {  	struct ip_tunnel *tunnel = netdev_priv(dev); -	tunnel->dev = dev; -	strcpy(tunnel->parms.name, dev->name); -  	memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);  	memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4); -	ipip_tunnel_bind_dev(dev); - -	dev->tstats = alloc_percpu(struct pcpu_tstats); -	if (!dev->tstats) -		return -ENOMEM; - -	return 0; +	tunnel->hlen = 0; +	tunnel->parms.iph.protocol = IPPROTO_IPIP; +	return ip_tunnel_init(dev);  } -static int __net_init ipip_fb_tunnel_init(struct net_device *dev) +static void ipip_netlink_parms(struct nlattr *data[], +			       struct ip_tunnel_parm *parms)  { -	struct ip_tunnel *tunnel = netdev_priv(dev); -	struct iphdr *iph = &tunnel->parms.iph; -	struct ipip_net *ipn = net_generic(dev_net(dev), ipip_net_id); +	memset(parms, 0, sizeof(*parms)); -	tunnel->dev = dev; -	strcpy(tunnel->parms.name, dev->name); +	parms->iph.version = 4; +	parms->iph.protocol = IPPROTO_IPIP; +	parms->iph.ihl = 5; -	iph->version		= 4; -	iph->protocol		= IPPROTO_IPIP; -	iph->ihl		= 5; +	if (!data) +		return; -	dev->tstats = alloc_percpu(struct pcpu_tstats); -	if (!dev->tstats) -		return -ENOMEM; +	if (data[IFLA_IPTUN_LINK]) +		parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]); -	dev_hold(dev); -	rcu_assign_pointer(ipn->tunnels_wc[0], tunnel); -	return 0; -} +	if (data[IFLA_IPTUN_LOCAL]) +		parms->iph.saddr = nla_get_be32(data[IFLA_IPTUN_LOCAL]); -static struct xfrm_tunnel ipip_handler __read_mostly = { -	.handler	=	ipip_rcv, -	.err_handler	=	ipip_err, -	.priority	=	1, -}; +	if (data[IFLA_IPTUN_REMOTE]) +		parms->iph.daddr = nla_get_be32(data[IFLA_IPTUN_REMOTE]); -static const char banner[] __initconst = -	KERN_INFO "IPv4 over IPv4 tunneling driver\n"; +	if (data[IFLA_IPTUN_TTL]) { +		parms->iph.ttl = nla_get_u8(data[IFLA_IPTUN_TTL]); +		if (parms->iph.ttl) +			parms->iph.frag_off = htons(IP_DF); +	} -static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head) +	if (data[IFLA_IPTUN_TOS]) +		parms->iph.tos = nla_get_u8(data[IFLA_IPTUN_TOS]); + +	if (!data[IFLA_IPTUN_PMTUDISC] || nla_get_u8(data[IFLA_IPTUN_PMTUDISC])) +		parms->iph.frag_off = htons(IP_DF); +} + +static int ipip_newlink(struct net *src_net, struct net_device *dev, +			struct nlattr *tb[], struct nlattr *data[])  { -	int prio; - -	for (prio = 1; prio < 4; prio++) { -		int h; -		for (h = 0; h < HASH_SIZE; h++) { -			struct ip_tunnel *t; - -			t = rtnl_dereference(ipn->tunnels[prio][h]); -			while (t != NULL) { -				unregister_netdevice_queue(t->dev, head); -				t = rtnl_dereference(t->next); -			} -		} -	} +	struct ip_tunnel_parm p; + +	ipip_netlink_parms(data, &p); +	return ip_tunnel_newlink(dev, tb, &p);  } -static int __net_init ipip_init_net(struct net *net) +static int ipip_changelink(struct net_device *dev, struct nlattr *tb[], +			   struct nlattr *data[])  { -	struct ipip_net *ipn = net_generic(net, ipip_net_id); -	int err; +	struct ip_tunnel_parm p; -	ipn->tunnels[0] = ipn->tunnels_wc; -	ipn->tunnels[1] = ipn->tunnels_l; -	ipn->tunnels[2] = ipn->tunnels_r; -	ipn->tunnels[3] = ipn->tunnels_r_l; - -	ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), -					   "tunl0", -					   ipip_tunnel_setup); -	if (!ipn->fb_tunnel_dev) { -		err = -ENOMEM; -		goto err_alloc_dev; -	} -	dev_net_set(ipn->fb_tunnel_dev, net); +	ipip_netlink_parms(data, &p); -	err = ipip_fb_tunnel_init(ipn->fb_tunnel_dev); -	if (err) -		goto err_reg_dev; +	if (((dev->flags & IFF_POINTOPOINT) && !p.iph.daddr) || +	    (!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr)) +		return -EINVAL; + +	return ip_tunnel_changelink(dev, tb, &p); +} -	if ((err = register_netdev(ipn->fb_tunnel_dev))) -		goto err_reg_dev; +static size_t ipip_get_size(const struct net_device *dev) +{ +	return +		/* IFLA_IPTUN_LINK */ +		nla_total_size(4) + +		/* IFLA_IPTUN_LOCAL */ +		nla_total_size(4) + +		/* IFLA_IPTUN_REMOTE */ +		nla_total_size(4) + +		/* IFLA_IPTUN_TTL */ +		nla_total_size(1) + +		/* IFLA_IPTUN_TOS */ +		nla_total_size(1) + +		/* IFLA_IPTUN_PMTUDISC */ +		nla_total_size(1) + +		0; +} +static int ipip_fill_info(struct sk_buff *skb, const struct net_device *dev) +{ +	struct ip_tunnel *tunnel = netdev_priv(dev); +	struct ip_tunnel_parm *parm = &tunnel->parms; + +	if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) || +	    nla_put_be32(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) || +	    nla_put_be32(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) || +	    nla_put_u8(skb, IFLA_IPTUN_TTL, parm->iph.ttl) || +	    nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos) || +	    nla_put_u8(skb, IFLA_IPTUN_PMTUDISC, +		       !!(parm->iph.frag_off & htons(IP_DF)))) +		goto nla_put_failure;  	return 0; -err_reg_dev: -	ipip_dev_free(ipn->fb_tunnel_dev); -err_alloc_dev: -	/* nothing */ -	return err; +nla_put_failure: +	return -EMSGSIZE; +} + +static const struct nla_policy ipip_policy[IFLA_IPTUN_MAX + 1] = { +	[IFLA_IPTUN_LINK]		= { .type = NLA_U32 }, +	[IFLA_IPTUN_LOCAL]		= { .type = NLA_U32 }, +	[IFLA_IPTUN_REMOTE]		= { .type = NLA_U32 }, +	[IFLA_IPTUN_TTL]		= { .type = NLA_U8 }, +	[IFLA_IPTUN_TOS]		= { .type = NLA_U8 }, +	[IFLA_IPTUN_PMTUDISC]		= { .type = NLA_U8 }, +}; + +static struct rtnl_link_ops ipip_link_ops __read_mostly = { +	.kind		= "ipip", +	.maxtype	= IFLA_IPTUN_MAX, +	.policy		= ipip_policy, +	.priv_size	= sizeof(struct ip_tunnel), +	.setup		= ipip_tunnel_setup, +	.newlink	= ipip_newlink, +	.changelink	= ipip_changelink, +	.dellink	= ip_tunnel_dellink, +	.get_size	= ipip_get_size, +	.fill_info	= ipip_fill_info, +}; + +static struct xfrm_tunnel ipip_handler __read_mostly = { +	.handler	=	ipip_rcv, +	.err_handler	=	ipip_err, +	.priority	=	1, +}; + +static int __net_init ipip_init_net(struct net *net) +{ +	return ip_tunnel_init_net(net, ipip_net_id, &ipip_link_ops, "tunl0");  }  static void __net_exit ipip_exit_net(struct net *net)  { -	struct ipip_net *ipn = net_generic(net, ipip_net_id); -	LIST_HEAD(list); - -	rtnl_lock(); -	ipip_destroy_tunnels(ipn, &list); -	unregister_netdevice_queue(ipn->fb_tunnel_dev, &list); -	unregister_netdevice_many(&list); -	rtnl_unlock(); +	struct ip_tunnel_net *itn = net_generic(net, ipip_net_id); +	ip_tunnel_delete_net(itn, &ipip_link_ops);  }  static struct pernet_operations ipip_net_ops = {  	.init = ipip_init_net,  	.exit = ipip_exit_net,  	.id   = &ipip_net_id, -	.size = sizeof(struct ipip_net), +	.size = sizeof(struct ip_tunnel_net),  };  static int __init ipip_init(void)  {  	int err; -	printk(banner); +	pr_info("ipip: IPv4 over IPv4 tunneling driver\n");  	err = register_pernet_device(&ipip_net_ops);  	if (err < 0)  		return err;  	err = xfrm4_tunnel_register(&ipip_handler, AF_INET);  	if (err < 0) { -		unregister_pernet_device(&ipip_net_ops); -		printk(KERN_INFO "ipip init: can't register tunnel\n"); +		pr_info("%s: can't register tunnel\n", __func__); +		goto xfrm_tunnel_failed;  	} +	err = rtnl_link_register(&ipip_link_ops); +	if (err < 0) +		goto rtnl_link_failed; + +out:  	return err; + +rtnl_link_failed: +	xfrm4_tunnel_deregister(&ipip_handler, AF_INET); +xfrm_tunnel_failed: +	unregister_pernet_device(&ipip_net_ops); +	goto out;  }  static void __exit ipip_fini(void)  { +	rtnl_link_unregister(&ipip_link_ops);  	if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET)) -		printk(KERN_INFO "ipip close: can't deregister tunnel\n"); +		pr_info("%s: can't deregister tunnel\n", __func__);  	unregister_pernet_device(&ipip_net_ops);  } @@ -913,4 +486,5 @@ static void __exit ipip_fini(void)  module_init(ipip_init);  module_exit(ipip_fini);  MODULE_LICENSE("GPL"); -MODULE_ALIAS("tunl0"); +MODULE_ALIAS_RTNL_LINK("ipip"); +MODULE_ALIAS_NETDEV("tunl0");  | 
