diff options
Diffstat (limited to 'net/ipv4/ip_vti.c')
| -rw-r--r-- | net/ipv4/ip_vti.c | 367 | 
1 files changed, 249 insertions, 118 deletions
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index e805e7b3030..b8960f3527f 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -34,6 +34,7 @@  #include <linux/init.h>  #include <linux/netfilter_ipv4.h>  #include <linux/if_ether.h> +#include <linux/icmpv6.h>  #include <net/sock.h>  #include <net/ip.h> @@ -49,145 +50,131 @@ static struct rtnl_link_ops vti_link_ops __read_mostly;  static int vti_net_id __read_mostly;  static int vti_tunnel_init(struct net_device *dev); -static int vti_err(struct sk_buff *skb, u32 info) +static int vti_input(struct sk_buff *skb, int nexthdr, __be32 spi, +		     int encap_type)  { - -	/* All the routers (except for Linux) return only -	 * 8 bytes of packet payload. It means, that precise relaying of -	 * ICMP in the real Internet is absolutely infeasible. -	 */ +	struct ip_tunnel *tunnel; +	const struct iphdr *iph = ip_hdr(skb);  	struct net *net = dev_net(skb->dev);  	struct ip_tunnel_net *itn = net_generic(net, vti_net_id); -	struct iphdr *iph = (struct iphdr *)skb->data; -	const int type = icmp_hdr(skb)->type; -	const int code = icmp_hdr(skb)->code; -	struct ip_tunnel *t; -	int err; - -	switch (type) { -	default: -	case ICMP_PARAMETERPROB: -		return 0; - -	case ICMP_DEST_UNREACH: -		switch (code) { -		case ICMP_SR_FAILED: -		case ICMP_PORT_UNREACH: -			/* Impossible event. */ -			return 0; -		default: -			/* All others are translated to HOST_UNREACH. */ -			break; -		} -		break; -	case ICMP_TIME_EXCEEDED: -		if (code != ICMP_EXC_TTL) -			return 0; -		break; -	} -	err = -ENOENT; +	tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, +				  iph->saddr, iph->daddr, 0); +	if (tunnel != NULL) { +		if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) +			goto drop; -	t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, -			     iph->daddr, iph->saddr, 0); -	if (t == NULL) -		goto out; +		XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = tunnel; +		skb->mark = be32_to_cpu(tunnel->parms.i_key); -	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { -		ipv4_update_pmtu(skb, dev_net(skb->dev), info, -				 t->parms.link, 0, IPPROTO_IPIP, 0); -		err = 0; -		goto out; +		return xfrm_input(skb, nexthdr, spi, encap_type);  	} -	err = 0; -	if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) -		goto out; - -	if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO)) -		t->err_count++; -	else -		t->err_count = 1; -	t->err_time = jiffies; -out: -	return err; +	return -EINVAL; +drop: +	kfree_skb(skb); +	return 0;  } -/* We dont digest the packet therefore let the packet pass */  static int vti_rcv(struct sk_buff *skb)  { -	struct ip_tunnel *tunnel; -	const struct iphdr *iph = ip_hdr(skb); -	struct net *net = dev_net(skb->dev); -	struct ip_tunnel_net *itn = net_generic(net, vti_net_id); +	XFRM_SPI_SKB_CB(skb)->family = AF_INET; +	XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr); -	tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, -				  iph->saddr, iph->daddr, 0); -	if (tunnel != NULL) { -		struct pcpu_tstats *tstats; - -		if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) -			return -1; +	return vti_input(skb, ip_hdr(skb)->protocol, 0, 0); +} -		tstats = this_cpu_ptr(tunnel->dev->tstats); -		u64_stats_update_begin(&tstats->syncp); -		tstats->rx_packets++; -		tstats->rx_bytes += skb->len; -		u64_stats_update_end(&tstats->syncp); +static int vti_rcv_cb(struct sk_buff *skb, int err) +{ +	unsigned short family; +	struct net_device *dev; +	struct pcpu_sw_netstats *tstats; +	struct xfrm_state *x; +	struct ip_tunnel *tunnel = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4; -		skb->mark = 0; -		secpath_reset(skb); -		skb->dev = tunnel->dev; +	if (!tunnel)  		return 1; + +	dev = tunnel->dev; + +	if (err) { +		dev->stats.rx_errors++; +		dev->stats.rx_dropped++; + +		return 0;  	} -	return -1; +	x = xfrm_input_state(skb); +	family = x->inner_mode->afinfo->family; + +	if (!xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family)) +		return -EPERM; + +	skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(skb->dev))); +	skb->dev = dev; + +	tstats = this_cpu_ptr(dev->tstats); + +	u64_stats_update_begin(&tstats->syncp); +	tstats->rx_packets++; +	tstats->rx_bytes += skb->len; +	u64_stats_update_end(&tstats->syncp); + +	return 0;  } -/* This function assumes it is being called from dev_queue_xmit() - * and that skb is filled properly by that function. - */ +static bool vti_state_check(const struct xfrm_state *x, __be32 dst, __be32 src) +{ +	xfrm_address_t *daddr = (xfrm_address_t *)&dst; +	xfrm_address_t *saddr = (xfrm_address_t *)&src; -static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) +	/* if there is no transform then this tunnel is not functional. +	 * Or if the xfrm is not mode tunnel. +	 */ +	if (!x || x->props.mode != XFRM_MODE_TUNNEL || +	    x->props.family != AF_INET) +		return false; + +	if (!dst) +		return xfrm_addr_equal(saddr, &x->props.saddr, AF_INET); + +	if (!xfrm_state_addr_check(x, daddr, saddr, AF_INET)) +		return false; + +	return true; +} + +static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev, +			    struct flowi *fl)  {  	struct ip_tunnel *tunnel = netdev_priv(dev); -	struct iphdr  *tiph = &tunnel->parms.iph; -	u8     tos; -	struct rtable *rt;		/* Route to the other host */ +	struct ip_tunnel_parm *parms = &tunnel->parms; +	struct dst_entry *dst = skb_dst(skb);  	struct net_device *tdev;	/* Device to other host */ -	struct iphdr  *old_iph = ip_hdr(skb); -	__be32 dst = tiph->daddr; -	struct flowi4 fl4;  	int err; -	if (skb->protocol != htons(ETH_P_IP)) -		goto tx_error; - -	tos = old_iph->tos; +	if (!dst) { +		dev->stats.tx_carrier_errors++; +		goto tx_error_icmp; +	} -	memset(&fl4, 0, sizeof(fl4)); -	flowi4_init_output(&fl4, tunnel->parms.link, -			   be32_to_cpu(tunnel->parms.i_key), RT_TOS(tos), -			   RT_SCOPE_UNIVERSE, -			   IPPROTO_IPIP, 0, -			   dst, tiph->saddr, 0, 0); -	rt = ip_route_output_key(dev_net(dev), &fl4); -	if (IS_ERR(rt)) { +	dst_hold(dst); +	dst = xfrm_lookup(tunnel->net, dst, fl, NULL, 0); +	if (IS_ERR(dst)) {  		dev->stats.tx_carrier_errors++;  		goto tx_error_icmp;  	} -	/* if there is no transform then this tunnel is not functional. -	 * Or if the xfrm is not mode tunnel. -	 */ -	if (!rt->dst.xfrm || -	    rt->dst.xfrm->props.mode != XFRM_MODE_TUNNEL) { + +	if (!vti_state_check(dst->xfrm, parms->iph.daddr, parms->iph.saddr)) {  		dev->stats.tx_carrier_errors++; +		dst_release(dst);  		goto tx_error_icmp;  	} -	tdev = rt->dst.dev; + +	tdev = dst->dev;  	if (tdev == dev) { -		ip_rt_put(rt); +		dst_release(dst);  		dev->stats.collisions++;  		goto tx_error;  	} @@ -201,10 +188,8 @@ static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)  			tunnel->err_count = 0;  	} -	memset(IPCB(skb), 0, sizeof(*IPCB(skb))); -	skb_dst_drop(skb); -	skb_dst_set(skb, &rt->dst); -	nf_reset(skb); +	skb_scrub_packet(skb, !net_eq(tunnel->net, dev_net(dev))); +	skb_dst_set(skb, dst);  	skb->dev = skb_dst(skb)->dev;  	err = dst_output(skb); @@ -217,10 +202,102 @@ tx_error_icmp:  	dst_link_failure(skb);  tx_error:  	dev->stats.tx_errors++; -	dev_kfree_skb(skb); +	kfree_skb(skb);  	return NETDEV_TX_OK;  } +/* This function assumes it is being called from dev_queue_xmit() + * and that skb is filled properly by that function. + */ +static netdev_tx_t vti_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) +{ +	struct ip_tunnel *tunnel = netdev_priv(dev); +	struct flowi fl; + +	memset(&fl, 0, sizeof(fl)); + +	skb->mark = be32_to_cpu(tunnel->parms.o_key); + +	switch (skb->protocol) { +	case htons(ETH_P_IP): +		xfrm_decode_session(skb, &fl, AF_INET); +		memset(IPCB(skb), 0, sizeof(*IPCB(skb))); +		break; +	case htons(ETH_P_IPV6): +		xfrm_decode_session(skb, &fl, AF_INET6); +		memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); +		break; +	default: +		dev->stats.tx_errors++; +		dev_kfree_skb(skb); +		return NETDEV_TX_OK; +	} + +	return vti_xmit(skb, dev, &fl); +} + +static int vti4_err(struct sk_buff *skb, u32 info) +{ +	__be32 spi; +	__u32 mark; +	struct xfrm_state *x; +	struct ip_tunnel *tunnel; +	struct ip_esp_hdr *esph; +	struct ip_auth_hdr *ah ; +	struct ip_comp_hdr *ipch; +	struct net *net = dev_net(skb->dev); +	const struct iphdr *iph = (const struct iphdr *)skb->data; +	int protocol = iph->protocol; +	struct ip_tunnel_net *itn = net_generic(net, vti_net_id); + +	tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, +				  iph->daddr, iph->saddr, 0); +	if (!tunnel) +		return -1; + +	mark = be32_to_cpu(tunnel->parms.o_key); + +	switch (protocol) { +	case IPPROTO_ESP: +		esph = (struct ip_esp_hdr *)(skb->data+(iph->ihl<<2)); +		spi = esph->spi; +		break; +	case IPPROTO_AH: +		ah = (struct ip_auth_hdr *)(skb->data+(iph->ihl<<2)); +		spi = ah->spi; +		break; +	case IPPROTO_COMP: +		ipch = (struct ip_comp_hdr *)(skb->data+(iph->ihl<<2)); +		spi = htonl(ntohs(ipch->cpi)); +		break; +	default: +		return 0; +	} + +	switch (icmp_hdr(skb)->type) { +	case ICMP_DEST_UNREACH: +		if (icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) +			return 0; +	case ICMP_REDIRECT: +		break; +	default: +		return 0; +	} + +	x = xfrm_state_lookup(net, mark, (const xfrm_address_t *)&iph->daddr, +			      spi, protocol, AF_INET); +	if (!x) +		return 0; + +	if (icmp_hdr(skb)->type == ICMP_DEST_UNREACH) +		ipv4_update_pmtu(skb, net, info, 0, 0, protocol, 0); +	else +		ipv4_redirect(skb, net, 0, 0, protocol, 0); +	xfrm_state_put(x); + +	return 0; +} +  static int  vti_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)  { @@ -236,12 +313,19 @@ vti_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)  			return -EINVAL;  	} +	if (!(p.i_flags & GRE_KEY)) +		p.i_key = 0; +	if (!(p.o_flags & GRE_KEY)) +		p.o_key = 0; + +	p.i_flags = VTI_ISVTI; +  	err = ip_tunnel_ioctl(dev, &p, cmd);  	if (err)  		return err;  	if (cmd != SIOCDELTUNNEL) { -		p.i_flags |= GRE_KEY | VTI_ISVTI; +		p.i_flags |= GRE_KEY;  		p.o_flags |= GRE_KEY;  	} @@ -262,6 +346,7 @@ static const struct net_device_ops vti_netdev_ops = {  static void vti_tunnel_setup(struct net_device *dev)  {  	dev->netdev_ops		= &vti_netdev_ops; +	dev->type		= ARPHRD_TUNNEL;  	ip_tunnel_setup(dev, vti_net_id);  } @@ -273,13 +358,11 @@ static int vti_tunnel_init(struct net_device *dev)  	memcpy(dev->dev_addr, &iph->saddr, 4);  	memcpy(dev->broadcast, &iph->daddr, 4); -	dev->type		= ARPHRD_TUNNEL;  	dev->hard_header_len	= LL_MAX_HEADER + sizeof(struct iphdr);  	dev->mtu		= ETH_DATA_LEN;  	dev->flags		= IFF_NOARP;  	dev->iflink		= 0;  	dev->addr_len		= 4; -	dev->features		|= NETIF_F_NETNS_LOCAL;  	dev->features		|= NETIF_F_LLTX;  	dev->priv_flags		&= ~IFF_XMIT_DST_RELEASE; @@ -296,10 +379,28 @@ static void __net_init vti_fb_tunnel_init(struct net_device *dev)  	iph->ihl		= 5;  } -static struct xfrm_tunnel vti_handler __read_mostly = { +static struct xfrm4_protocol vti_esp4_protocol __read_mostly = {  	.handler	=	vti_rcv, -	.err_handler	=	vti_err, -	.priority	=	1, +	.input_handler	=	vti_input, +	.cb_handler	=	vti_rcv_cb, +	.err_handler	=	vti4_err, +	.priority	=	100, +}; + +static struct xfrm4_protocol vti_ah4_protocol __read_mostly = { +	.handler	=	vti_rcv, +	.input_handler	=	vti_input, +	.cb_handler	=	vti_rcv_cb, +	.err_handler	=	vti4_err, +	.priority	=	100, +}; + +static struct xfrm4_protocol vti_ipcomp4_protocol __read_mostly = { +	.handler	=	vti_rcv, +	.input_handler	=	vti_input, +	.cb_handler	=	vti_rcv_cb, +	.err_handler	=	vti4_err, +	.priority	=	100,  };  static int __net_init vti_init_net(struct net *net) @@ -343,6 +444,8 @@ static void vti_netlink_parms(struct nlattr *data[],  	if (!data)  		return; +	parms->i_flags = VTI_ISVTI; +  	if (data[IFLA_VTI_LINK])  		parms->link = nla_get_u32(data[IFLA_VTI_LINK]); @@ -438,10 +541,31 @@ static int __init vti_init(void)  	err = register_pernet_device(&vti_net_ops);  	if (err < 0)  		return err; -	err = xfrm4_mode_tunnel_input_register(&vti_handler); +	err = xfrm4_protocol_register(&vti_esp4_protocol, IPPROTO_ESP);  	if (err < 0) {  		unregister_pernet_device(&vti_net_ops);  		pr_info("vti init: can't register tunnel\n"); + +		return err; +	} + +	err = xfrm4_protocol_register(&vti_ah4_protocol, IPPROTO_AH); +	if (err < 0) { +		xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP); +		unregister_pernet_device(&vti_net_ops); +		pr_info("vti init: can't register tunnel\n"); + +		return err; +	} + +	err = xfrm4_protocol_register(&vti_ipcomp4_protocol, IPPROTO_COMP); +	if (err < 0) { +		xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH); +		xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP); +		unregister_pernet_device(&vti_net_ops); +		pr_info("vti init: can't register tunnel\n"); + +		return err;  	}  	err = rtnl_link_register(&vti_link_ops); @@ -451,7 +575,9 @@ static int __init vti_init(void)  	return err;  rtnl_link_failed: -	xfrm4_mode_tunnel_input_deregister(&vti_handler); +	xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP); +	xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH); +	xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP);  	unregister_pernet_device(&vti_net_ops);  	return err;  } @@ -459,8 +585,13 @@ rtnl_link_failed:  static void __exit vti_fini(void)  {  	rtnl_link_unregister(&vti_link_ops); -	if (xfrm4_mode_tunnel_input_deregister(&vti_handler)) +	if (xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP)) +		pr_info("vti close: can't deregister tunnel\n"); +	if (xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH))  		pr_info("vti close: can't deregister tunnel\n"); +	if (xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP)) +		pr_info("vti close: can't deregister tunnel\n"); +  	unregister_pernet_device(&vti_net_ops);  }  | 
