diff options
Diffstat (limited to 'net/ipv4/xfrm4_policy.c')
| -rw-r--r-- | net/ipv4/xfrm4_policy.c | 201 | 
1 files changed, 124 insertions, 77 deletions
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index b057d40adde..6156f68a1e9 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -18,47 +18,53 @@  static struct xfrm_policy_afinfo xfrm4_policy_afinfo; -static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, -					  xfrm_address_t *saddr, -					  xfrm_address_t *daddr) +static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4, +					    int tos, +					    const xfrm_address_t *saddr, +					    const xfrm_address_t *daddr)  { -	struct flowi fl = { -		.fl4_dst = daddr->a4, -		.fl4_tos = tos, -	}; -	struct dst_entry *dst;  	struct rtable *rt; -	int err; +	memset(fl4, 0, sizeof(*fl4)); +	fl4->daddr = daddr->a4; +	fl4->flowi4_tos = tos;  	if (saddr) -		fl.fl4_src = saddr->a4; +		fl4->saddr = saddr->a4; + +	rt = __ip_route_output_key(net, fl4); +	if (!IS_ERR(rt)) +		return &rt->dst; + +	return ERR_CAST(rt); +} -	err = __ip_route_output_key(net, &rt, &fl); -	dst = &rt->dst; -	if (err) -		dst = ERR_PTR(err); -	return dst; +static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, +					  const xfrm_address_t *saddr, +					  const xfrm_address_t *daddr) +{ +	struct flowi4 fl4; + +	return __xfrm4_dst_lookup(net, &fl4, tos, saddr, daddr);  }  static int xfrm4_get_saddr(struct net *net,  			   xfrm_address_t *saddr, xfrm_address_t *daddr)  {  	struct dst_entry *dst; -	struct rtable *rt; +	struct flowi4 fl4; -	dst = xfrm4_dst_lookup(net, 0, NULL, daddr); +	dst = __xfrm4_dst_lookup(net, &fl4, 0, NULL, daddr);  	if (IS_ERR(dst))  		return -EHOSTUNREACH; -	rt = (struct rtable *)dst; -	saddr->a4 = rt->rt_src; +	saddr->a4 = fl4.saddr;  	dst_release(dst);  	return 0;  } -static int xfrm4_get_tos(struct flowi *fl) +static int xfrm4_get_tos(const struct flowi *fl)  { -	return IPTOS_RT_MASK & fl->fl4_tos; /* Strip ECN bits */ +	return IPTOS_RT_MASK & fl->u.ip4.flowi4_tos; /* Strip ECN bits */  }  static int xfrm4_init_path(struct xfrm_dst *path, struct dst_entry *dst, @@ -68,28 +74,26 @@ static int xfrm4_init_path(struct xfrm_dst *path, struct dst_entry *dst,  }  static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, -			  struct flowi *fl) +			  const struct flowi *fl)  {  	struct rtable *rt = (struct rtable *)xdst->route; +	const struct flowi4 *fl4 = &fl->u.ip4; -	xdst->u.rt.fl = *fl; +	xdst->u.rt.rt_iif = fl4->flowi4_iif;  	xdst->u.dst.dev = dev;  	dev_hold(dev); -	xdst->u.rt.peer = rt->peer; -	if (rt->peer) -		atomic_inc(&rt->peer->refcnt); -  	/* Sheit... I remember I did this right. Apparently,  	 * it was magically lost, so this code needs audit */ +	xdst->u.rt.rt_is_input = rt->rt_is_input;  	xdst->u.rt.rt_flags = rt->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST |  					      RTCF_LOCAL);  	xdst->u.rt.rt_type = rt->rt_type; -	xdst->u.rt.rt_src = rt->rt_src; -	xdst->u.rt.rt_dst = rt->rt_dst;  	xdst->u.rt.rt_gateway = rt->rt_gateway; -	xdst->u.rt.rt_spec_dst = rt->rt_spec_dst; +	xdst->u.rt.rt_uses_gateway = rt->rt_uses_gateway; +	xdst->u.rt.rt_pmtu = rt->rt_pmtu; +	INIT_LIST_HEAD(&xdst->u.rt.rt_uncached);  	return 0;  } @@ -97,13 +101,19 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,  static void  _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)  { -	struct iphdr *iph = ip_hdr(skb); +	const struct iphdr *iph = ip_hdr(skb);  	u8 *xprth = skb_network_header(skb) + iph->ihl * 4; +	struct flowi4 *fl4 = &fl->u.ip4; +	int oif = 0; -	memset(fl, 0, sizeof(struct flowi)); -	fl->mark = skb->mark; +	if (skb_dst(skb)) +		oif = skb_dst(skb)->dev->ifindex; -	if (!(iph->frag_off & htons(IP_MF | IP_OFFSET))) { +	memset(fl4, 0, sizeof(struct flowi4)); +	fl4->flowi4_mark = skb->mark; +	fl4->flowi4_oif = reverse ? skb->skb_iif : oif; + +	if (!ip_is_fragment(iph)) {  		switch (iph->protocol) {  		case IPPROTO_UDP:  		case IPPROTO_UDPLITE: @@ -114,8 +124,8 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)  			    pskb_may_pull(skb, xprth + 4 - skb->data)) {  				__be16 *ports = (__be16 *)xprth; -				fl->fl_ip_sport = ports[!!reverse]; -				fl->fl_ip_dport = ports[!reverse]; +				fl4->fl4_sport = ports[!!reverse]; +				fl4->fl4_dport = ports[!reverse];  			}  			break; @@ -123,8 +133,8 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)  			if (pskb_may_pull(skb, xprth + 2 - skb->data)) {  				u8 *icmp = xprth; -				fl->fl_icmp_type = icmp[0]; -				fl->fl_icmp_code = icmp[1]; +				fl4->fl4_icmp_type = icmp[0]; +				fl4->fl4_icmp_code = icmp[1];  			}  			break; @@ -132,15 +142,15 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)  			if (pskb_may_pull(skb, xprth + 4 - skb->data)) {  				__be32 *ehdr = (__be32 *)xprth; -				fl->fl_ipsec_spi = ehdr[0]; +				fl4->fl4_ipsec_spi = ehdr[0];  			}  			break;  		case IPPROTO_AH:  			if (pskb_may_pull(skb, xprth + 8 - skb->data)) { -				__be32 *ah_hdr = (__be32*)xprth; +				__be32 *ah_hdr = (__be32 *)xprth; -				fl->fl_ipsec_spi = ah_hdr[1]; +				fl4->fl4_ipsec_spi = ah_hdr[1];  			}  			break; @@ -148,7 +158,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)  			if (pskb_may_pull(skb, xprth + 4 - skb->data)) {  				__be16 *ipcomp_hdr = (__be16 *)xprth; -				fl->fl_ipsec_spi = htonl(ntohs(ipcomp_hdr[1])); +				fl4->fl4_ipsec_spi = htonl(ntohs(ipcomp_hdr[1]));  			}  			break; @@ -160,20 +170,20 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)  				if (greflags[0] & GRE_KEY) {  					if (greflags[0] & GRE_CSUM)  						gre_hdr++; -					fl->fl_gre_key = gre_hdr[1]; +					fl4->fl4_gre_key = gre_hdr[1];  				}  			}  			break;  		default: -			fl->fl_ipsec_spi = 0; +			fl4->fl4_ipsec_spi = 0;  			break;  		}  	} -	fl->proto = iph->protocol; -	fl->fl4_dst = reverse ? iph->saddr : iph->daddr; -	fl->fl4_src = reverse ? iph->daddr : iph->saddr; -	fl->fl4_tos = iph->tos; +	fl4->flowi4_proto = iph->protocol; +	fl4->daddr = reverse ? iph->saddr : iph->daddr; +	fl4->saddr = reverse ? iph->daddr : iph->saddr; +	fl4->flowi4_tos = iph->tos;  }  static inline int xfrm4_garbage_collect(struct dst_ops *ops) @@ -184,20 +194,30 @@ static inline int xfrm4_garbage_collect(struct dst_ops *ops)  	return (dst_entries_get_slow(ops) > ops->gc_thresh * 2);  } -static void xfrm4_update_pmtu(struct dst_entry *dst, u32 mtu) +static void xfrm4_update_pmtu(struct dst_entry *dst, struct sock *sk, +			      struct sk_buff *skb, u32 mtu) +{ +	struct xfrm_dst *xdst = (struct xfrm_dst *)dst; +	struct dst_entry *path = xdst->route; + +	path->ops->update_pmtu(path, sk, skb, mtu); +} + +static void xfrm4_redirect(struct dst_entry *dst, struct sock *sk, +			   struct sk_buff *skb)  {  	struct xfrm_dst *xdst = (struct xfrm_dst *)dst;  	struct dst_entry *path = xdst->route; -	path->ops->update_pmtu(path, mtu); +	path->ops->redirect(path, sk, skb);  }  static void xfrm4_dst_destroy(struct dst_entry *dst)  {  	struct xfrm_dst *xdst = (struct xfrm_dst *)dst; -	if (likely(xdst->u.rt.peer)) -		inet_putpeer(xdst->u.rt.peer); +	dst_destroy_metrics_generic(dst); +  	xfrm_dst_destroy(xdst);  } @@ -215,10 +235,12 @@ static struct dst_ops xfrm4_dst_ops = {  	.protocol =		cpu_to_be16(ETH_P_IP),  	.gc =			xfrm4_garbage_collect,  	.update_pmtu =		xfrm4_update_pmtu, +	.redirect =		xfrm4_redirect, +	.cow_metrics =		dst_cow_metrics_generic,  	.destroy =		xfrm4_dst_destroy,  	.ifdown =		xfrm4_dst_ifdown,  	.local_out =		__ip_local_out, -	.gc_thresh =		1024, +	.gc_thresh =		32768,  };  static struct xfrm_policy_afinfo xfrm4_policy_afinfo = { @@ -230,6 +252,7 @@ static struct xfrm_policy_afinfo xfrm4_policy_afinfo = {  	.get_tos =		xfrm4_get_tos,  	.init_path =		xfrm4_init_path,  	.fill_dst =		xfrm4_fill_dst, +	.blackhole_route =	ipv4_blackhole_route,  };  #ifdef CONFIG_SYSCTL @@ -244,43 +267,67 @@ static struct ctl_table xfrm4_policy_table[] = {  	{ }  }; -static struct ctl_table_header *sysctl_hdr; -#endif - -static void __init xfrm4_policy_init(void) +static int __net_init xfrm4_net_init(struct net *net)  { -	xfrm_policy_register_afinfo(&xfrm4_policy_afinfo); +	struct ctl_table *table; +	struct ctl_table_header *hdr; + +	table = xfrm4_policy_table; +	if (!net_eq(net, &init_net)) { +		table = kmemdup(table, sizeof(xfrm4_policy_table), GFP_KERNEL); +		if (!table) +			goto err_alloc; + +		table[0].data = &net->xfrm.xfrm4_dst_ops.gc_thresh; +	} + +	hdr = register_net_sysctl(net, "net/ipv4", table); +	if (!hdr) +		goto err_reg; + +	net->ipv4.xfrm4_hdr = hdr; +	return 0; + +err_reg: +	if (!net_eq(net, &init_net)) +		kfree(table); +err_alloc: +	return -ENOMEM;  } -static void __exit xfrm4_policy_fini(void) +static void __net_exit xfrm4_net_exit(struct net *net)  { -#ifdef CONFIG_SYSCTL -	if (sysctl_hdr) -		unregister_net_sysctl_table(sysctl_hdr); +	struct ctl_table *table; + +	if (net->ipv4.xfrm4_hdr == NULL) +		return; + +	table = net->ipv4.xfrm4_hdr->ctl_table_arg; +	unregister_net_sysctl_table(net->ipv4.xfrm4_hdr); +	if (!net_eq(net, &init_net)) +		kfree(table); +} + +static struct pernet_operations __net_initdata xfrm4_net_ops = { +	.init	= xfrm4_net_init, +	.exit	= xfrm4_net_exit, +};  #endif -	xfrm_policy_unregister_afinfo(&xfrm4_policy_afinfo); + +static void __init xfrm4_policy_init(void) +{ +	xfrm_policy_register_afinfo(&xfrm4_policy_afinfo);  } -void __init xfrm4_init(int rt_max_size) +void __init xfrm4_init(void)  { -	/* -	 * Select a default value for the gc_thresh based on the main route -	 * table hash size.  It seems to me the worst case scenario is when -	 * we have ipsec operating in transport mode, in which we create a -	 * dst_entry per socket.  The xfrm gc algorithm starts trying to remove -	 * entries at gc_thresh, and prevents new allocations as 2*gc_thresh -	 * so lets set an initial xfrm gc_thresh value at the rt_max_size/2. -	 * That will let us store an ipsec connection per route table entry, -	 * and start cleaning when were 1/2 full -	 */ -	xfrm4_dst_ops.gc_thresh = rt_max_size/2;  	dst_entries_init(&xfrm4_dst_ops);  	xfrm4_state_init();  	xfrm4_policy_init(); +	xfrm4_protocol_init();  #ifdef CONFIG_SYSCTL -	sysctl_hdr = register_net_sysctl_table(&init_net, net_ipv4_ctl_path, -						xfrm4_policy_table); +	register_pernet_subsys(&xfrm4_net_ops);  #endif  }  | 
