diff options
| author | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2012-03-19 17:02:01 -0700 | 
|---|---|---|
| committer | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2012-03-19 17:02:01 -0700 | 
| commit | 10ce3cc919f50c2043b41ca968b43c26a3672600 (patch) | |
| tree | ea409366a5208aced495bc0516a08b81fd43222e /net/netfilter | |
| parent | 24e3e5ae1e4c2a3a32f5b1f96b4e3fd721806acd (diff) | |
| parent | 5c6a7a62c130afef3d61c1dee153012231ff5cd9 (diff) | |
Merge branch 'next' into for-linus
Diffstat (limited to 'net/netfilter')
51 files changed, 1108 insertions, 319 deletions
| diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 8260b13d93c..f8ac4ef0b79 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -4,6 +4,14 @@ menu "Core Netfilter Configuration"  config NETFILTER_NETLINK  	tristate +config NETFILTER_NETLINK_ACCT +tristate "Netfilter NFACCT over NFNETLINK interface" +	depends on NETFILTER_ADVANCED +	select NETFILTER_NETLINK +	help +	  If this option is enabled, the kernel will include support +	  for extended accounting via NFNETLINK. +  config NETFILTER_NETLINK_QUEUE  	tristate "Netfilter NFQUEUE over NFNETLINK interface"  	depends on NETFILTER_ADVANCED @@ -75,6 +83,16 @@ config NF_CONNTRACK_ZONES  	  If unsure, say `N'. +config NF_CONNTRACK_PROCFS +	bool "Supply CT list in procfs (OBSOLETE)" +	default y +	depends on PROC_FS +	---help--- +	This option enables for the list of known conntrack entries +	to be shown in procfs under net/netfilter/nf_conntrack. This +	is considered obsolete in favor of using the conntrack(8) +	tool which uses Netlink. +  config NF_CONNTRACK_EVENTS  	bool "Connection tracking events"  	depends on NETFILTER_ADVANCED @@ -201,7 +219,6 @@ config NF_CONNTRACK_BROADCAST  config NF_CONNTRACK_NETBIOS_NS  	tristate "NetBIOS name service protocol support" -	depends on NETFILTER_ADVANCED  	select NF_CONNTRACK_BROADCAST  	help  	  NetBIOS name service requests are sent as broadcast messages from an @@ -542,7 +559,6 @@ config NETFILTER_XT_TARGET_NOTRACK  	tristate  '"NOTRACK" target support'  	depends on IP_NF_RAW || IP6_NF_RAW  	depends on NF_CONNTRACK -	depends on NETFILTER_ADVANCED  	help  	  The NOTRACK target allows a select rule to specify  	  which packets *not* to enter the conntrack/NAT @@ -772,6 +788,15 @@ config NETFILTER_XT_MATCH_DSCP  	  To compile it as a module, choose M here.  If unsure, say N. +config NETFILTER_XT_MATCH_ECN +	tristate '"ecn" match support' +	depends on NETFILTER_ADVANCED +	---help--- +	This option adds an "ECN" match, which allows you to match against +	the IPv4 and TCP header ECN fields. + +	To compile it as a module, choose M here. If unsure, say N. +  config NETFILTER_XT_MATCH_ESP  	tristate '"esp" match support'  	depends on NETFILTER_ADVANCED @@ -881,6 +906,16 @@ config NETFILTER_XT_MATCH_MULTIPORT  	  To compile it as a module, choose M here.  If unsure, say N. +config NETFILTER_XT_MATCH_NFACCT +	tristate '"nfacct" match support' +	depends on NETFILTER_ADVANCED +	select NETFILTER_NETLINK_ACCT +	help +	  This option allows you to use the extended accounting through +	  nfnetlink_acct. + +	  To compile it as a module, choose M here.  If unsure, say N. +  config NETFILTER_XT_MATCH_OSF  	tristate '"osf" Passive OS fingerprint match'  	depends on NETFILTER_ADVANCED && NETFILTER_NETLINK diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 1a02853df86..40f4c3d636c 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -7,6 +7,7 @@ nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o  obj-$(CONFIG_NETFILTER) = netfilter.o  obj-$(CONFIG_NETFILTER_NETLINK) += nfnetlink.o +obj-$(CONFIG_NETFILTER_NETLINK_ACCT) += nfnetlink_acct.o  obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += nfnetlink_queue.o  obj-$(CONFIG_NETFILTER_NETLINK_LOG) += nfnetlink_log.o @@ -80,6 +81,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_CPU) += xt_cpu.o  obj-$(CONFIG_NETFILTER_XT_MATCH_DCCP) += xt_dccp.o  obj-$(CONFIG_NETFILTER_XT_MATCH_DEVGROUP) += xt_devgroup.o  obj-$(CONFIG_NETFILTER_XT_MATCH_DSCP) += xt_dscp.o +obj-$(CONFIG_NETFILTER_XT_MATCH_ECN) += xt_ecn.o  obj-$(CONFIG_NETFILTER_XT_MATCH_ESP) += xt_esp.o  obj-$(CONFIG_NETFILTER_XT_MATCH_HASHLIMIT) += xt_hashlimit.o  obj-$(CONFIG_NETFILTER_XT_MATCH_HELPER) += xt_helper.o @@ -90,6 +92,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_LENGTH) += xt_length.o  obj-$(CONFIG_NETFILTER_XT_MATCH_LIMIT) += xt_limit.o  obj-$(CONFIG_NETFILTER_XT_MATCH_MAC) += xt_mac.o  obj-$(CONFIG_NETFILTER_XT_MATCH_MULTIPORT) += xt_multiport.o +obj-$(CONFIG_NETFILTER_XT_MATCH_NFACCT) += xt_nfacct.o  obj-$(CONFIG_NETFILTER_XT_MATCH_OSF) += xt_osf.o  obj-$(CONFIG_NETFILTER_XT_MATCH_OWNER) += xt_owner.o  obj-$(CONFIG_NETFILTER_XT_MATCH_PHYSDEV) += xt_physdev.o diff --git a/net/netfilter/core.c b/net/netfilter/core.c index afca6c78948..b4e8ff05b30 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -54,6 +54,12 @@ EXPORT_SYMBOL_GPL(nf_unregister_afinfo);  struct list_head nf_hooks[NFPROTO_NUMPROTO][NF_MAX_HOOKS] __read_mostly;  EXPORT_SYMBOL(nf_hooks); + +#if defined(CONFIG_JUMP_LABEL) +struct jump_label_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS]; +EXPORT_SYMBOL(nf_hooks_needed); +#endif +  static DEFINE_MUTEX(nf_hook_mutex);  int nf_register_hook(struct nf_hook_ops *reg) @@ -70,6 +76,9 @@ int nf_register_hook(struct nf_hook_ops *reg)  	}  	list_add_rcu(®->list, elem->list.prev);  	mutex_unlock(&nf_hook_mutex); +#if defined(CONFIG_JUMP_LABEL) +	jump_label_inc(&nf_hooks_needed[reg->pf][reg->hooknum]); +#endif  	return 0;  }  EXPORT_SYMBOL(nf_register_hook); @@ -79,7 +88,9 @@ void nf_unregister_hook(struct nf_hook_ops *reg)  	mutex_lock(&nf_hook_mutex);  	list_del_rcu(®->list);  	mutex_unlock(&nf_hook_mutex); - +#if defined(CONFIG_JUMP_LABEL) +	jump_label_dec(&nf_hooks_needed[reg->pf][reg->hooknum]); +#endif  	synchronize_net();  }  EXPORT_SYMBOL(nf_unregister_hook); @@ -218,7 +229,7 @@ int skb_make_writable(struct sk_buff *skb, unsigned int writable_len)  }  EXPORT_SYMBOL(skb_make_writable); -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) +#if IS_ENABLED(CONFIG_NF_CONNTRACK)  /* This does not belong here, but locally generated errors need it if connection     tracking in use: without this, connection may not be in hash table, and hence     manufactured ICMP or RST packets will not be associated with it. */ diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 86137b558f4..32dbf0fa89d 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -77,35 +77,42 @@ find_set_type(const char *name, u8 family, u8 revision)  }  /* Unlock, try to load a set type module and lock again */ -static int -try_to_load_type(const char *name) +static bool +load_settype(const char *name)  {  	nfnl_unlock();  	pr_debug("try to load ip_set_%s\n", name);  	if (request_module("ip_set_%s", name) < 0) {  		pr_warning("Can't find ip_set type %s\n", name);  		nfnl_lock(); -		return -IPSET_ERR_FIND_TYPE; +		return false;  	}  	nfnl_lock(); -	return -EAGAIN; +	return true;  }  /* Find a set type and reference it */ +#define find_set_type_get(name, family, revision, found)	\ +	__find_set_type_get(name, family, revision, found, false) +  static int -find_set_type_get(const char *name, u8 family, u8 revision, -		  struct ip_set_type **found) +__find_set_type_get(const char *name, u8 family, u8 revision, +		    struct ip_set_type **found, bool retry)  {  	struct ip_set_type *type;  	int err; +	if (retry && !load_settype(name)) +		return -IPSET_ERR_FIND_TYPE; +  	rcu_read_lock();  	*found = find_set_type(name, family, revision);  	if (*found) {  		err = !try_module_get((*found)->me) ? -EFAULT : 0;  		goto unlock;  	} -	/* Make sure the type is loaded but we don't support the revision */ +	/* Make sure the type is already loaded +	 * but we don't support the revision */  	list_for_each_entry_rcu(type, &ip_set_type_list, list)  		if (STREQ(type->name, name)) {  			err = -IPSET_ERR_FIND_TYPE; @@ -113,7 +120,8 @@ find_set_type_get(const char *name, u8 family, u8 revision,  		}  	rcu_read_unlock(); -	return try_to_load_type(name); +	return retry ? -IPSET_ERR_FIND_TYPE : +		__find_set_type_get(name, family, revision, found, true);  unlock:  	rcu_read_unlock(); @@ -124,12 +132,19 @@ unlock:   * If we succeeded, the supported minimal and maximum revisions are   * filled out.   */ +#define find_set_type_minmax(name, family, min, max) \ +	__find_set_type_minmax(name, family, min, max, false) +  static int -find_set_type_minmax(const char *name, u8 family, u8 *min, u8 *max) +__find_set_type_minmax(const char *name, u8 family, u8 *min, u8 *max, +		       bool retry)  {  	struct ip_set_type *type;  	bool found = false; +	if (retry && !load_settype(name)) +		return -IPSET_ERR_FIND_TYPE; +  	*min = 255; *max = 0;  	rcu_read_lock();  	list_for_each_entry_rcu(type, &ip_set_type_list, list) @@ -145,7 +160,8 @@ find_set_type_minmax(const char *name, u8 family, u8 *min, u8 *max)  	if (found)  		return 0; -	return try_to_load_type(name); +	return retry ? -IPSET_ERR_FIND_TYPE : +		__find_set_type_minmax(name, family, min, max, true);  }  #define family_name(f)	((f) == AF_INET ? "inet" : \ @@ -1126,6 +1142,7 @@ release_refcount:  	if (ret || !cb->args[2]) {  		pr_debug("release set %s\n", ip_set_list[index]->name);  		ip_set_put_byindex(index); +		cb->args[2] = 0;  	}  out:  	if (nlh) { diff --git a/net/netfilter/ipset/ip_set_getport.c b/net/netfilter/ipset/ip_set_getport.c index 052579fe389..1f03556666f 100644 --- a/net/netfilter/ipset/ip_set_getport.c +++ b/net/netfilter/ipset/ip_set_getport.c @@ -109,16 +109,18 @@ ip_set_get_ip4_port(const struct sk_buff *skb, bool src,  }  EXPORT_SYMBOL_GPL(ip_set_get_ip4_port); -#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)  bool  ip_set_get_ip6_port(const struct sk_buff *skb, bool src,  		    __be16 *port, u8 *proto)  {  	int protoff;  	u8 nexthdr; +	__be16 frag_off;  	nexthdr = ipv6_hdr(skb)->nexthdr; -	protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr); +	protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr, +				   &frag_off);  	if (protoff < 0)  		return false; diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c index f2d576e6b76..4015fcaf87b 100644 --- a/net/netfilter/ipset/ip_set_hash_ip.c +++ b/net/netfilter/ipset/ip_set_hash_ip.c @@ -241,7 +241,7 @@ hash_ip6_data_isnull(const struct hash_ip6_elem *elem)  static inline void  hash_ip6_data_copy(struct hash_ip6_elem *dst, const struct hash_ip6_elem *src)  { -	ipv6_addr_copy(&dst->ip.in6, &src->ip.in6); +	dst->ip.in6 = src->ip.in6;  }  static inline void diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c index 6ee10f5d59b..37d667e3f6f 100644 --- a/net/netfilter/ipset/ip_set_hash_ipport.c +++ b/net/netfilter/ipset/ip_set_hash_ipport.c @@ -158,7 +158,7 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],  	const struct ip_set_hash *h = set->data;  	ipset_adtfn adtfn = set->variant->adt[adt];  	struct hash_ipport4_elem data = { }; -	u32 ip, ip_to, p = 0, port, port_to; +	u32 ip, ip_to = 0, p = 0, port, port_to;  	u32 timeout = h->timeout;  	bool with_ports = false;  	int ret; diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c index fb90e344e90..e69e2718fbe 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportip.c +++ b/net/netfilter/ipset/ip_set_hash_ipportip.c @@ -162,7 +162,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],  	const struct ip_set_hash *h = set->data;  	ipset_adtfn adtfn = set->variant->adt[adt];  	struct hash_ipportip4_elem data = { }; -	u32 ip, ip_to, p = 0, port, port_to; +	u32 ip, ip_to = 0, p = 0, port, port_to;  	u32 timeout = h->timeout;  	bool with_ports = false;  	int ret; diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index deb3e3dfa5f..64199b4e93c 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -184,7 +184,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],  	const struct ip_set_hash *h = set->data;  	ipset_adtfn adtfn = set->variant->adt[adt];  	struct hash_ipportnet4_elem data = { .cidr = HOST_MASK }; -	u32 ip, ip_to, p = 0, port, port_to; +	u32 ip, ip_to = 0, p = 0, port, port_to;  	u32 ip2_from = 0, ip2_to, ip2_last, ip2;  	u32 timeout = h->timeout;  	bool with_ports = false; diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index 60d016541c5..28988196775 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -267,7 +267,7 @@ static inline void  hash_net6_data_copy(struct hash_net6_elem *dst,  		    const struct hash_net6_elem *src)  { -	ipv6_addr_copy(&dst->ip.in6, &src->ip.in6); +	dst->ip.in6 = src->ip.in6;  	dst->cidr = src->cidr;  } diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig index 70bd1d0774c..f9871385a65 100644 --- a/net/netfilter/ipvs/Kconfig +++ b/net/netfilter/ipvs/Kconfig @@ -122,7 +122,6 @@ config	IP_VS_RR  config	IP_VS_WRR  	tristate "weighted round-robin scheduling" -	select GCD  	---help---  	  The weighted robin-robin scheduling algorithm directs network  	  connections to different real servers based on server weights @@ -232,6 +231,21 @@ config	IP_VS_NQ  	  If you want to compile it in kernel, say Y. To compile it as a  	  module, choose M here. If unsure, say N. +comment 'IPVS SH scheduler' + +config IP_VS_SH_TAB_BITS +	int "IPVS source hashing table size (the Nth power of 2)" +	range 4 20 +	default 8 +	---help--- +	  The source hashing scheduler maps source IPs to destinations +	  stored in a hash table. This table is tiled by each destination +	  until all slots in the table are filled. When using weights to +	  allow destinations to receive more connections, the table is +	  tiled an amount proportional to the weights specified. The table +	  needs to be large enough to effectively fit all the destinations +	  multiplied by their respective weights. +  comment 'IPVS application helper'  config	IP_VS_FTP diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 12571fb2881..29fa5badde7 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -616,7 +616,7 @@ struct ip_vs_dest *ip_vs_try_bind_dest(struct ip_vs_conn *cp)  	if ((cp) && (!cp->dest)) {  		dest = ip_vs_find_dest(ip_vs_conn_net(cp), cp->af, &cp->daddr,  				       cp->dport, &cp->vaddr, cp->vport, -				       cp->protocol, cp->fwmark); +				       cp->protocol, cp->fwmark, cp->flags);  		ip_vs_bind_dest(cp, dest);  		return dest;  	} else diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 093cc327020..2555816e778 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -232,6 +232,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc,  	__be16 dport = 0;		/* destination port to forward */  	unsigned int flags;  	struct ip_vs_conn_param param; +	const union nf_inet_addr fwmark = { .ip = htonl(svc->fwmark) };  	union nf_inet_addr snet;	/* source network of the client,  					   after masking */ @@ -267,7 +268,6 @@ ip_vs_sched_persist(struct ip_vs_service *svc,  	{  		int protocol = iph.protocol;  		const union nf_inet_addr *vaddr = &iph.daddr; -		const union nf_inet_addr fwmark = { .ip = htonl(svc->fwmark) };  		__be16 vport = 0;  		if (dst_port == svc->port) { @@ -983,7 +983,7 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related,  	if (!cp)  		return NF_ACCEPT; -	ipv6_addr_copy(&snet.in6, &iph->saddr); +	snet.in6 = iph->saddr;  	return handle_response_icmp(AF_INET6, skb, &snet, cih->nexthdr, cp,  				    pp, offset, sizeof(struct ipv6hdr));  } diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 008bf97cc91..b3afe189af6 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -85,7 +85,7 @@ static int __ip_vs_addr_is_local_v6(struct net *net,  	};  	rt = (struct rt6_info *)ip6_route_output(net, NULL, &fl6); -	if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK)) +	if (rt && rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK))  		return 1;  	return 0; @@ -619,15 +619,21 @@ struct ip_vs_dest *ip_vs_find_dest(struct net  *net, int af,  				   const union nf_inet_addr *daddr,  				   __be16 dport,  				   const union nf_inet_addr *vaddr, -				   __be16 vport, __u16 protocol, __u32 fwmark) +				   __be16 vport, __u16 protocol, __u32 fwmark, +				   __u32 flags)  {  	struct ip_vs_dest *dest;  	struct ip_vs_service *svc; +	__be16 port = dport;  	svc = ip_vs_service_get(net, af, fwmark, protocol, vaddr, vport);  	if (!svc)  		return NULL; -	dest = ip_vs_lookup_dest(svc, daddr, dport); +	if (fwmark && (flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) +		port = 0; +	dest = ip_vs_lookup_dest(svc, daddr, port); +	if (!dest) +		dest = ip_vs_lookup_dest(svc, daddr, port ^ dport);  	if (dest)  		atomic_inc(&dest->refcnt);  	ip_vs_service_put(svc); diff --git a/net/netfilter/ipvs/ip_vs_pe_sip.c b/net/netfilter/ipvs/ip_vs_pe_sip.c index 13d607ae9c5..1aa5cac748c 100644 --- a/net/netfilter/ipvs/ip_vs_pe_sip.c +++ b/net/netfilter/ipvs/ip_vs_pe_sip.c @@ -108,7 +108,7 @@ static bool ip_vs_sip_ct_match(const struct ip_vs_conn_param *p,  				  struct ip_vs_conn *ct)  { -	bool ret = 0; +	bool ret = false;  	if (ct->af == p->af &&  	    ip_vs_addr_equal(p->af, p->caddr, &ct->caddr) && @@ -121,7 +121,7 @@ static bool ip_vs_sip_ct_match(const struct ip_vs_conn_param *p,  	    ct->protocol == p->protocol &&  	    ct->pe_data && ct->pe_data_len == p->pe_data_len &&  	    !memcmp(ct->pe_data, p->pe_data, p->pe_data_len)) -		ret = 1; +		ret = true;  	IP_VS_DBG_BUF(9, "SIP template match %s %s->%s:%d %s\n",  		      ip_vs_proto_name(p->protocol), diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c index 33815f4fb45..069e8d4d5c0 100644 --- a/net/netfilter/ipvs/ip_vs_sh.c +++ b/net/netfilter/ipvs/ip_vs_sh.c @@ -30,6 +30,11 @@   * server is dead or overloaded, the load balancer can bypass the cache   * server and send requests to the original server directly.   * + * The weight destination attribute can be used to control the + * distribution of connections to the destinations in servernode. The + * greater the weight, the more connections the destination + * will receive. + *   */  #define KMSG_COMPONENT "IPVS" @@ -99,9 +104,11 @@ ip_vs_sh_assign(struct ip_vs_sh_bucket *tbl, struct ip_vs_service *svc)  	struct ip_vs_sh_bucket *b;  	struct list_head *p;  	struct ip_vs_dest *dest; +	int d_count;  	b = tbl;  	p = &svc->destinations; +	d_count = 0;  	for (i=0; i<IP_VS_SH_TAB_SIZE; i++) {  		if (list_empty(p)) {  			b->dest = NULL; @@ -113,7 +120,16 @@ ip_vs_sh_assign(struct ip_vs_sh_bucket *tbl, struct ip_vs_service *svc)  			atomic_inc(&dest->refcnt);  			b->dest = dest; -			p = p->next; +			IP_VS_DBG_BUF(6, "assigned i: %d dest: %s weight: %d\n", +				      i, IP_VS_DBG_ADDR(svc->af, &dest->addr), +				      atomic_read(&dest->weight)); + +			/* Don't move to next dest until filling weight */ +			if (++d_count >= atomic_read(&dest->weight)) { +				p = p->next; +				d_count = 0; +			} +  		}  		b++;  	} diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 3cdd479f9b5..8a0d6d6889f 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -603,9 +603,9 @@ sloop:  #ifdef CONFIG_IP_VS_IPV6  	if (cp->af == AF_INET6) {  		p += sizeof(struct ip_vs_sync_v6); -		ipv6_addr_copy(&s->v6.caddr, &cp->caddr.in6); -		ipv6_addr_copy(&s->v6.vaddr, &cp->vaddr.in6); -		ipv6_addr_copy(&s->v6.daddr, &cp->daddr.in6); +		s->v6.caddr = cp->caddr.in6; +		s->v6.vaddr = cp->vaddr.in6; +		s->v6.daddr = cp->daddr.in6;  	} else  #endif  	{ @@ -740,7 +740,7 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,  		 * but still handled.  		 */  		dest = ip_vs_find_dest(net, type, daddr, dport, param->vaddr, -				       param->vport, protocol, fwmark); +				       param->vport, protocol, fwmark, flags);  		/*  Set the approprite ativity flag */  		if (protocol == IPPROTO_TCP) { diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index aa2d7206ee8..7fd66dec859 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -207,7 +207,7 @@ __ip_vs_reroute_locally(struct sk_buff *skb)  static inline int __ip_vs_is_local_route6(struct rt6_info *rt)  { -	return rt->rt6i_dev && rt->rt6i_dev->flags & IFF_LOOPBACK; +	return rt->dst.dev && rt->dst.dev->flags & IFF_LOOPBACK;  }  static struct dst_entry * @@ -235,7 +235,7 @@ __ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr,  			goto out_err;  		}  	} -	ipv6_addr_copy(ret_saddr, &fl6.saddr); +	*ret_saddr = fl6.saddr;  	return dst;  out_err: @@ -279,7 +279,7 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest,  				  atomic_read(&rt->dst.__refcnt));  		}  		if (ret_saddr) -			ipv6_addr_copy(ret_saddr, &dest->dst_saddr.in6); +			*ret_saddr = dest->dst_saddr.in6;  		spin_unlock(&dest->dst_lock);  	} else {  		dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm); @@ -541,7 +541,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,  	 * Avoid duplicate tuple in reply direction for NAT traffic  	 * to local address when connection is sync-ed  	 */ -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) +#if IS_ENABLED(CONFIG_NF_CONNTRACK)  	if (cp->flags & IP_VS_CONN_F_SYNC && local) {  		enum ip_conntrack_info ctinfo;  		struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo); @@ -658,7 +658,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,  	 * Avoid duplicate tuple in reply direction for NAT traffic  	 * to local address when connection is sync-ed  	 */ -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) +#if IS_ENABLED(CONFIG_NF_CONNTRACK)  	if (cp->flags & IP_VS_CONN_F_SYNC && local) {  		enum ip_conntrack_info ctinfo;  		struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo); @@ -705,7 +705,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,  	/* mangle the packet */  	if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp))  		goto tx_error; -	ipv6_addr_copy(&ipv6_hdr(skb)->daddr, &cp->daddr.in6); +	ipv6_hdr(skb)->daddr = cp->daddr.in6;  	if (!local || !skb->dev) {  		/* drop the old route when skb is not shared */ @@ -967,8 +967,8 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,  	be16_add_cpu(&iph->payload_len, sizeof(*old_iph));  	iph->priority		=	old_iph->priority;  	memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl)); -	ipv6_addr_copy(&iph->daddr, &cp->daddr.in6); -	ipv6_addr_copy(&iph->saddr, &saddr); +	iph->daddr = cp->daddr.in6; +	iph->saddr = saddr;  	iph->hop_limit		=	old_iph->hop_limit;  	/* Another hack: avoid icmp_send in ip_fragment */ @@ -1173,7 +1173,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,  	 * Avoid duplicate tuple in reply direction for NAT traffic  	 * to local address when connection is sync-ed  	 */ -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) +#if IS_ENABLED(CONFIG_NF_CONNTRACK)  	if (cp->flags & IP_VS_CONN_F_SYNC && local) {  		enum ip_conntrack_info ctinfo;  		struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo); @@ -1293,7 +1293,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,  	 * Avoid duplicate tuple in reply direction for NAT traffic  	 * to local address when connection is sync-ed  	 */ -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) +#if IS_ENABLED(CONFIG_NF_CONNTRACK)  	if (cp->flags & IP_VS_CONN_F_SYNC && local) {  		enum ip_conntrack_info ctinfo;  		struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo); diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c index 369df3f08d4..f4f8cda0598 100644 --- a/net/netfilter/nf_conntrack_acct.c +++ b/net/netfilter/nf_conntrack_acct.c @@ -18,7 +18,7 @@  #include <net/netfilter/nf_conntrack_extend.h>  #include <net/netfilter/nf_conntrack_acct.h> -static int nf_ct_acct __read_mostly; +static bool nf_ct_acct __read_mostly;  module_param_named(acct, nf_ct_acct, bool, 0644);  MODULE_PARM_DESC(acct, "Enable connection tracking flow accounting."); @@ -46,8 +46,8 @@ seq_print_acct(struct seq_file *s, const struct nf_conn *ct, int dir)  		return 0;  	return seq_printf(s, "packets=%llu bytes=%llu ", -			  (unsigned long long)acct[dir].packets, -			  (unsigned long long)acct[dir].bytes); +			  (unsigned long long)atomic64_read(&acct[dir].packets), +			  (unsigned long long)atomic64_read(&acct[dir].bytes));  };  EXPORT_SYMBOL_GPL(seq_print_acct); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 7202b0631cd..ed86a3be678 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -67,6 +67,7 @@ DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked);  EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked);  unsigned int nf_conntrack_hash_rnd __read_mostly; +EXPORT_SYMBOL_GPL(nf_conntrack_hash_rnd);  static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple, u16 zone)  { @@ -403,19 +404,49 @@ static void __nf_conntrack_hash_insert(struct nf_conn *ct,  			   &net->ct.hash[repl_hash]);  } -void nf_conntrack_hash_insert(struct nf_conn *ct) +int +nf_conntrack_hash_check_insert(struct nf_conn *ct)  {  	struct net *net = nf_ct_net(ct);  	unsigned int hash, repl_hash; +	struct nf_conntrack_tuple_hash *h; +	struct hlist_nulls_node *n;  	u16 zone;  	zone = nf_ct_zone(ct); -	hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); -	repl_hash = hash_conntrack(net, zone, &ct->tuplehash[IP_CT_DIR_REPLY].tuple); +	hash = hash_conntrack(net, zone, +			      &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); +	repl_hash = hash_conntrack(net, zone, +				   &ct->tuplehash[IP_CT_DIR_REPLY].tuple); + +	spin_lock_bh(&nf_conntrack_lock); +	/* See if there's one in the list already, including reverse */ +	hlist_nulls_for_each_entry(h, n, &net->ct.hash[hash], hnnode) +		if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, +				      &h->tuple) && +		    zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h))) +			goto out; +	hlist_nulls_for_each_entry(h, n, &net->ct.hash[repl_hash], hnnode) +		if (nf_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple, +				      &h->tuple) && +		    zone == nf_ct_zone(nf_ct_tuplehash_to_ctrack(h))) +			goto out; + +	add_timer(&ct->timeout); +	nf_conntrack_get(&ct->ct_general);  	__nf_conntrack_hash_insert(ct, hash, repl_hash); +	NF_CT_STAT_INC(net, insert); +	spin_unlock_bh(&nf_conntrack_lock); + +	return 0; + +out: +	NF_CT_STAT_INC(net, insert_failed); +	spin_unlock_bh(&nf_conntrack_lock); +	return -EEXIST;  } -EXPORT_SYMBOL_GPL(nf_conntrack_hash_insert); +EXPORT_SYMBOL_GPL(nf_conntrack_hash_check_insert);  /* Confirm a connection given skb; places it in hash table */  int @@ -776,7 +807,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,  		if (exp->helper) {  			help = nf_ct_helper_ext_add(ct, GFP_ATOMIC);  			if (help) -				RCU_INIT_POINTER(help->helper, exp->helper); +				rcu_assign_pointer(help->helper, exp->helper);  		}  #ifdef CONFIG_NF_CONNTRACK_MARK @@ -1044,10 +1075,8 @@ acct:  		acct = nf_conn_acct_find(ct);  		if (acct) { -			spin_lock_bh(&ct->lock); -			acct[CTINFO2DIR(ctinfo)].packets++; -			acct[CTINFO2DIR(ctinfo)].bytes += skb->len; -			spin_unlock_bh(&ct->lock); +			atomic64_inc(&acct[CTINFO2DIR(ctinfo)].packets); +			atomic64_add(skb->len, &acct[CTINFO2DIR(ctinfo)].bytes);  		}  	}  } @@ -1063,11 +1092,9 @@ bool __nf_ct_kill_acct(struct nf_conn *ct,  		acct = nf_conn_acct_find(ct);  		if (acct) { -			spin_lock_bh(&ct->lock); -			acct[CTINFO2DIR(ctinfo)].packets++; -			acct[CTINFO2DIR(ctinfo)].bytes += -				skb->len - skb_network_offset(skb); -			spin_unlock_bh(&ct->lock); +			atomic64_inc(&acct[CTINFO2DIR(ctinfo)].packets); +			atomic64_add(skb->len - skb_network_offset(skb), +				     &acct[CTINFO2DIR(ctinfo)].bytes);  		}  	} @@ -1087,7 +1114,7 @@ static struct nf_ct_ext_type nf_ct_zone_extend __read_mostly = {  };  #endif -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) +#if IS_ENABLED(CONFIG_NF_CT_NETLINK)  #include <linux/netfilter/nfnetlink.h>  #include <linux/netfilter/nfnetlink_conntrack.h> @@ -1342,8 +1369,7 @@ void *nf_ct_alloc_hashtable(unsigned int *sizep, int nulls)  					get_order(sz));  	if (!hash) {  		printk(KERN_WARNING "nf_conntrack: falling back to vmalloc.\n"); -		hash = __vmalloc(sz, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, -				 PAGE_KERNEL); +		hash = vzalloc(sz);  	}  	if (hash && nulls) diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index 6b368be937c..14af6329bdd 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -27,22 +27,17 @@  static DEFINE_MUTEX(nf_ct_ecache_mutex); -struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb __read_mostly; -EXPORT_SYMBOL_GPL(nf_conntrack_event_cb); - -struct nf_exp_event_notifier __rcu *nf_expect_event_cb __read_mostly; -EXPORT_SYMBOL_GPL(nf_expect_event_cb); -  /* deliver cached events and clear cache entry - must be called with locally   * disabled softirqs */  void nf_ct_deliver_cached_events(struct nf_conn *ct)  { +	struct net *net = nf_ct_net(ct);  	unsigned long events;  	struct nf_ct_event_notifier *notify;  	struct nf_conntrack_ecache *e;  	rcu_read_lock(); -	notify = rcu_dereference(nf_conntrack_event_cb); +	notify = rcu_dereference(net->ct.nf_conntrack_event_cb);  	if (notify == NULL)  		goto out_unlock; @@ -83,19 +78,20 @@ out_unlock:  }  EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events); -int nf_conntrack_register_notifier(struct nf_ct_event_notifier *new) +int nf_conntrack_register_notifier(struct net *net, +				   struct nf_ct_event_notifier *new)  {  	int ret = 0;  	struct nf_ct_event_notifier *notify;  	mutex_lock(&nf_ct_ecache_mutex); -	notify = rcu_dereference_protected(nf_conntrack_event_cb, +	notify = rcu_dereference_protected(net->ct.nf_conntrack_event_cb,  					   lockdep_is_held(&nf_ct_ecache_mutex));  	if (notify != NULL) {  		ret = -EBUSY;  		goto out_unlock;  	} -	RCU_INIT_POINTER(nf_conntrack_event_cb, new); +	rcu_assign_pointer(net->ct.nf_conntrack_event_cb, new);  	mutex_unlock(&nf_ct_ecache_mutex);  	return ret; @@ -105,32 +101,34 @@ out_unlock:  }  EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier); -void nf_conntrack_unregister_notifier(struct nf_ct_event_notifier *new) +void nf_conntrack_unregister_notifier(struct net *net, +				      struct nf_ct_event_notifier *new)  {  	struct nf_ct_event_notifier *notify;  	mutex_lock(&nf_ct_ecache_mutex); -	notify = rcu_dereference_protected(nf_conntrack_event_cb, +	notify = rcu_dereference_protected(net->ct.nf_conntrack_event_cb,  					   lockdep_is_held(&nf_ct_ecache_mutex));  	BUG_ON(notify != new); -	RCU_INIT_POINTER(nf_conntrack_event_cb, NULL); +	RCU_INIT_POINTER(net->ct.nf_conntrack_event_cb, NULL);  	mutex_unlock(&nf_ct_ecache_mutex);  }  EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier); -int nf_ct_expect_register_notifier(struct nf_exp_event_notifier *new) +int nf_ct_expect_register_notifier(struct net *net, +				   struct nf_exp_event_notifier *new)  {  	int ret = 0;  	struct nf_exp_event_notifier *notify;  	mutex_lock(&nf_ct_ecache_mutex); -	notify = rcu_dereference_protected(nf_expect_event_cb, +	notify = rcu_dereference_protected(net->ct.nf_expect_event_cb,  					   lockdep_is_held(&nf_ct_ecache_mutex));  	if (notify != NULL) {  		ret = -EBUSY;  		goto out_unlock;  	} -	RCU_INIT_POINTER(nf_expect_event_cb, new); +	rcu_assign_pointer(net->ct.nf_expect_event_cb, new);  	mutex_unlock(&nf_ct_ecache_mutex);  	return ret; @@ -140,15 +138,16 @@ out_unlock:  }  EXPORT_SYMBOL_GPL(nf_ct_expect_register_notifier); -void nf_ct_expect_unregister_notifier(struct nf_exp_event_notifier *new) +void nf_ct_expect_unregister_notifier(struct net *net, +				      struct nf_exp_event_notifier *new)  {  	struct nf_exp_event_notifier *notify;  	mutex_lock(&nf_ct_ecache_mutex); -	notify = rcu_dereference_protected(nf_expect_event_cb, +	notify = rcu_dereference_protected(net->ct.nf_expect_event_cb,  					   lockdep_is_held(&nf_ct_ecache_mutex));  	BUG_ON(notify != new); -	RCU_INIT_POINTER(nf_expect_event_cb, NULL); +	RCU_INIT_POINTER(net->ct.nf_expect_event_cb, NULL);  	mutex_unlock(&nf_ct_ecache_mutex);  }  EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier); diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 340c80d968d..4147ba3f653 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -38,8 +38,6 @@ unsigned int nf_ct_expect_max __read_mostly;  static struct kmem_cache *nf_ct_expect_cachep __read_mostly; -static HLIST_HEAD(nf_ct_userspace_expect_list); -  /* nf_conntrack_expect helper functions */  void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,  				u32 pid, int report) @@ -47,14 +45,14 @@ void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,  	struct nf_conn_help *master_help = nfct_help(exp->master);  	struct net *net = nf_ct_exp_net(exp); +	NF_CT_ASSERT(master_help);  	NF_CT_ASSERT(!timer_pending(&exp->timeout));  	hlist_del_rcu(&exp->hnode);  	net->ct.expect_count--;  	hlist_del(&exp->lnode); -	if (!(exp->flags & NF_CT_EXPECT_USERSPACE)) -		master_help->expecting[exp->class]--; +	master_help->expecting[exp->class]--;  	nf_ct_expect_event_report(IPEXP_DESTROY, exp, pid, report);  	nf_ct_expect_put(exp); @@ -314,37 +312,34 @@ void nf_ct_expect_put(struct nf_conntrack_expect *exp)  }  EXPORT_SYMBOL_GPL(nf_ct_expect_put); -static void nf_ct_expect_insert(struct nf_conntrack_expect *exp) +static int nf_ct_expect_insert(struct nf_conntrack_expect *exp)  {  	struct nf_conn_help *master_help = nfct_help(exp->master); +	struct nf_conntrack_helper *helper;  	struct net *net = nf_ct_exp_net(exp); -	const struct nf_conntrack_expect_policy *p;  	unsigned int h = nf_ct_expect_dst_hash(&exp->tuple);  	/* two references : one for hash insert, one for the timer */  	atomic_add(2, &exp->use); -	if (master_help) { -		hlist_add_head(&exp->lnode, &master_help->expectations); -		master_help->expecting[exp->class]++; -	} else if (exp->flags & NF_CT_EXPECT_USERSPACE) -		hlist_add_head(&exp->lnode, &nf_ct_userspace_expect_list); +	hlist_add_head(&exp->lnode, &master_help->expectations); +	master_help->expecting[exp->class]++;  	hlist_add_head_rcu(&exp->hnode, &net->ct.expect_hash[h]);  	net->ct.expect_count++;  	setup_timer(&exp->timeout, nf_ct_expectation_timed_out,  		    (unsigned long)exp); -	if (master_help) { -		p = &rcu_dereference_protected( -				master_help->helper, -				lockdep_is_held(&nf_conntrack_lock) -				)->expect_policy[exp->class]; -		exp->timeout.expires = jiffies + p->timeout * HZ; +	helper = rcu_dereference_protected(master_help->helper, +					   lockdep_is_held(&nf_conntrack_lock)); +	if (helper) { +		exp->timeout.expires = jiffies + +			helper->expect_policy[exp->class].timeout * HZ;  	}  	add_timer(&exp->timeout);  	NF_CT_STAT_INC(net, expect_create); +	return 0;  }  /* Race with expectations being used means we could have none to find; OK. */ @@ -389,14 +384,13 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)  	struct nf_conntrack_expect *i;  	struct nf_conn *master = expect->master;  	struct nf_conn_help *master_help = nfct_help(master); +	struct nf_conntrack_helper *helper;  	struct net *net = nf_ct_exp_net(expect);  	struct hlist_node *n;  	unsigned int h;  	int ret = 1; -	/* Don't allow expectations created from kernel-space with no helper */ -	if (!(expect->flags & NF_CT_EXPECT_USERSPACE) && -	    (!master_help || (master_help && !master_help->helper))) { +	if (!master_help) {  		ret = -ESHUTDOWN;  		goto out;  	} @@ -414,11 +408,10 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)  		}  	}  	/* Will be over limit? */ -	if (master_help) { -		p = &rcu_dereference_protected( -			master_help->helper, -			lockdep_is_held(&nf_conntrack_lock) -			)->expect_policy[expect->class]; +	helper = rcu_dereference_protected(master_help->helper, +					   lockdep_is_held(&nf_conntrack_lock)); +	if (helper) { +		p = &helper->expect_policy[expect->class];  		if (p->max_expected &&  		    master_help->expecting[expect->class] >= p->max_expected) {  			evict_oldest_expect(master, expect); @@ -450,8 +443,9 @@ int nf_ct_expect_related_report(struct nf_conntrack_expect *expect,  	if (ret <= 0)  		goto out; -	ret = 0; -	nf_ct_expect_insert(expect); +	ret = nf_ct_expect_insert(expect); +	if (ret < 0) +		goto out;  	spin_unlock_bh(&nf_conntrack_lock);  	nf_ct_expect_event_report(IPEXP_NEW, expect, pid, report);  	return ret; @@ -461,22 +455,7 @@ out:  }  EXPORT_SYMBOL_GPL(nf_ct_expect_related_report); -void nf_ct_remove_userspace_expectations(void) -{ -	struct nf_conntrack_expect *exp; -	struct hlist_node *n, *next; - -	hlist_for_each_entry_safe(exp, n, next, -				  &nf_ct_userspace_expect_list, lnode) { -		if (del_timer(&exp->timeout)) { -			nf_ct_unlink_expect(exp); -			nf_ct_expect_put(exp); -		} -	} -} -EXPORT_SYMBOL_GPL(nf_ct_remove_userspace_expectations); - -#ifdef CONFIG_PROC_FS +#ifdef CONFIG_NF_CONNTRACK_PROCFS  struct ct_expect_iter_state {  	struct seq_net_private p;  	unsigned int bucket; @@ -604,25 +583,25 @@ static const struct file_operations exp_file_ops = {  	.llseek  = seq_lseek,  	.release = seq_release_net,  }; -#endif /* CONFIG_PROC_FS */ +#endif /* CONFIG_NF_CONNTRACK_PROCFS */  static int exp_proc_init(struct net *net)  { -#ifdef CONFIG_PROC_FS +#ifdef CONFIG_NF_CONNTRACK_PROCFS  	struct proc_dir_entry *proc;  	proc = proc_net_fops_create(net, "nf_conntrack_expect", 0440, &exp_file_ops);  	if (!proc)  		return -ENOMEM; -#endif /* CONFIG_PROC_FS */ +#endif /* CONFIG_NF_CONNTRACK_PROCFS */  	return 0;  }  static void exp_proc_remove(struct net *net)  { -#ifdef CONFIG_PROC_FS +#ifdef CONFIG_NF_CONNTRACK_PROCFS  	proc_net_remove(net, "nf_conntrack_expect"); -#endif /* CONFIG_PROC_FS */ +#endif /* CONFIG_NF_CONNTRACK_PROCFS */  }  module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0400); diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c index 4605c947dcc..641ff5f9671 100644 --- a/net/netfilter/nf_conntrack_extend.c +++ b/net/netfilter/nf_conntrack_extend.c @@ -169,7 +169,7 @@ int nf_ct_extend_register(struct nf_ct_ext_type *type)  	   before updating alloc_size */  	type->alloc_size = ALIGN(sizeof(struct nf_ct_ext), type->align)  			   + type->len; -	RCU_INIT_POINTER(nf_ct_ext_types[type->id], type); +	rcu_assign_pointer(nf_ct_ext_types[type->id], type);  	update_alloc_size(type);  out:  	mutex_unlock(&nf_ct_ext_type_mutex); diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index 6f5801eac99..8c5c95c6d34 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -42,7 +42,7 @@ static u_int16_t ports[MAX_PORTS];  static unsigned int ports_c;  module_param_array(ports, ushort, &ports_c, 0400); -static int loose; +static bool loose;  module_param(loose, bool, 0600);  unsigned int (*nf_nat_ftp_hook)(struct sk_buff *skb, diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c index f03c2d4539f..722291f8af7 100644 --- a/net/netfilter/nf_conntrack_h323_main.c +++ b/net/netfilter/nf_conntrack_h323_main.c @@ -42,7 +42,7 @@ static int gkrouted_only __read_mostly = 1;  module_param(gkrouted_only, int, 0600);  MODULE_PARM_DESC(gkrouted_only, "only accept calls from gatekeeper"); -static int callforward_filter __read_mostly = 1; +static bool callforward_filter __read_mostly = true;  module_param(callforward_filter, bool, 0600);  MODULE_PARM_DESC(callforward_filter, "only create call forwarding expectations "  				     "if both endpoints are on different sides " @@ -743,17 +743,16 @@ static int callforward_do_filter(const union nf_inet_addr *src,  		}  		break;  	} -#if defined(CONFIG_NF_CONNTRACK_IPV6) || \ -    defined(CONFIG_NF_CONNTRACK_IPV6_MODULE) +#if IS_ENABLED(CONFIG_NF_CONNTRACK_IPV6)  	case AF_INET6: {  		struct flowi6 fl1, fl2;  		struct rt6_info *rt1, *rt2;  		memset(&fl1, 0, sizeof(fl1)); -		ipv6_addr_copy(&fl1.daddr, &src->in6); +		fl1.daddr = src->in6;  		memset(&fl2, 0, sizeof(fl2)); -		ipv6_addr_copy(&fl2.daddr, &dst->in6); +		fl2.daddr = dst->in6;  		if (!afinfo->route(&init_net, (struct dst_entry **)&rt1,  				   flowi6_to_flowi(&fl1), false)) {  			if (!afinfo->route(&init_net, (struct dst_entry **)&rt2, diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 93c4bdbfc1a..bbe23baa19b 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -145,7 +145,7 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl,  		memset(&help->help, 0, sizeof(help->help));  	} -	RCU_INIT_POINTER(help->helper, helper); +	rcu_assign_pointer(help->helper, helper);  out:  	return ret;  } diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index e58aa9b1fe8..30c9d4ca021 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -4,7 +4,7 @@   * (C) 2001 by Jay Schulist <jschlst@samba.org>   * (C) 2002-2006 by Harald Welte <laforge@gnumonks.org>   * (C) 2003 by Patrick Mchardy <kaber@trash.net> - * (C) 2005-2008 by Pablo Neira Ayuso <pablo@netfilter.org> + * (C) 2005-2011 by Pablo Neira Ayuso <pablo@netfilter.org>   *   * Initial connection tracking via netlink development funded and   * generally made possible by Network Robots, Inc. (www.networkrobots.com) @@ -135,7 +135,7 @@ nla_put_failure:  static inline int  ctnetlink_dump_timeout(struct sk_buff *skb, const struct nf_conn *ct)  { -	long timeout = (ct->timeout.expires - jiffies) / HZ; +	long timeout = ((long)ct->timeout.expires - (long)jiffies) / HZ;  	if (timeout < 0)  		timeout = 0; @@ -203,25 +203,18 @@ nla_put_failure:  }  static int -ctnetlink_dump_counters(struct sk_buff *skb, const struct nf_conn *ct, -			enum ip_conntrack_dir dir) +dump_counters(struct sk_buff *skb, u64 pkts, u64 bytes, +	      enum ip_conntrack_dir dir)  {  	enum ctattr_type type = dir ? CTA_COUNTERS_REPLY: CTA_COUNTERS_ORIG;  	struct nlattr *nest_count; -	const struct nf_conn_counter *acct; - -	acct = nf_conn_acct_find(ct); -	if (!acct) -		return 0;  	nest_count = nla_nest_start(skb, type | NLA_F_NESTED);  	if (!nest_count)  		goto nla_put_failure; -	NLA_PUT_BE64(skb, CTA_COUNTERS_PACKETS, -		     cpu_to_be64(acct[dir].packets)); -	NLA_PUT_BE64(skb, CTA_COUNTERS_BYTES, -		     cpu_to_be64(acct[dir].bytes)); +	NLA_PUT_BE64(skb, CTA_COUNTERS_PACKETS, cpu_to_be64(pkts)); +	NLA_PUT_BE64(skb, CTA_COUNTERS_BYTES, cpu_to_be64(bytes));  	nla_nest_end(skb, nest_count); @@ -232,6 +225,27 @@ nla_put_failure:  }  static int +ctnetlink_dump_counters(struct sk_buff *skb, const struct nf_conn *ct, +			enum ip_conntrack_dir dir, int type) +{ +	struct nf_conn_counter *acct; +	u64 pkts, bytes; + +	acct = nf_conn_acct_find(ct); +	if (!acct) +		return 0; + +	if (type == IPCTNL_MSG_CT_GET_CTRZERO) { +		pkts = atomic64_xchg(&acct[dir].packets, 0); +		bytes = atomic64_xchg(&acct[dir].bytes, 0); +	} else { +		pkts = atomic64_read(&acct[dir].packets); +		bytes = atomic64_read(&acct[dir].bytes); +	} +	return dump_counters(skb, pkts, bytes, dir); +} + +static int  ctnetlink_dump_timestamp(struct sk_buff *skb, const struct nf_conn *ct)  {  	struct nlattr *nest_count; @@ -393,15 +407,15 @@ nla_put_failure:  }  static int -ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq, -		    int event, struct nf_conn *ct) +ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq, u32 type, +		    struct nf_conn *ct)  {  	struct nlmsghdr *nlh;  	struct nfgenmsg *nfmsg;  	struct nlattr *nest_parms; -	unsigned int flags = pid ? NLM_F_MULTI : 0; +	unsigned int flags = pid ? NLM_F_MULTI : 0, event; -	event |= NFNL_SUBSYS_CTNETLINK << 8; +	event = (NFNL_SUBSYS_CTNETLINK << 8 | IPCTNL_MSG_CT_NEW);  	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags);  	if (nlh == NULL)  		goto nlmsg_failure; @@ -430,8 +444,8 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq,  	if (ctnetlink_dump_status(skb, ct) < 0 ||  	    ctnetlink_dump_timeout(skb, ct) < 0 || -	    ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 || -	    ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0 || +	    ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL, type) < 0 || +	    ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY, type) < 0 ||  	    ctnetlink_dump_timestamp(skb, ct) < 0 ||  	    ctnetlink_dump_protoinfo(skb, ct) < 0 ||  	    ctnetlink_dump_helpinfo(skb, ct) < 0 || @@ -612,8 +626,10 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)  		goto nla_put_failure;  	if (events & (1 << IPCT_DESTROY)) { -		if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 || -		    ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0 || +		if (ctnetlink_dump_counters(skb, ct, +					    IP_CT_DIR_ORIGINAL, type) < 0 || +		    ctnetlink_dump_counters(skb, ct, +					    IP_CT_DIR_REPLY, type) < 0 ||  		    ctnetlink_dump_timestamp(skb, ct) < 0)  			goto nla_put_failure;  	} else { @@ -709,20 +725,13 @@ restart:  			}  			if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid,  						cb->nlh->nlmsg_seq, -						IPCTNL_MSG_CT_NEW, ct) < 0) { +						NFNL_MSG_TYPE( +							cb->nlh->nlmsg_type), +						ct) < 0) {  				nf_conntrack_get(&ct->ct_general);  				cb->args[1] = (unsigned long)ct;  				goto out;  			} - -			if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == -						IPCTNL_MSG_CT_GET_CTRZERO) { -				struct nf_conn_counter *acct; - -				acct = nf_conn_acct_find(ct); -				if (acct) -					memset(acct, 0, sizeof(struct nf_conn_counter[IP_CT_DIR_MAX])); -			}  		}  		if (cb->args[1]) {  			cb->args[1] = 0; @@ -1001,7 +1010,7 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,  	rcu_read_lock();  	err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, -				  IPCTNL_MSG_CT_NEW, ct); +				  NFNL_MSG_TYPE(nlh->nlmsg_type), ct);  	rcu_read_unlock();  	nf_ct_put(ct);  	if (err <= 0) @@ -1087,14 +1096,14 @@ ctnetlink_change_nat(struct nf_conn *ct, const struct nlattr * const cda[])  	if (cda[CTA_NAT_DST]) {  		ret = ctnetlink_parse_nat_setup(ct, -						IP_NAT_MANIP_DST, +						NF_NAT_MANIP_DST,  						cda[CTA_NAT_DST]);  		if (ret < 0)  			return ret;  	}  	if (cda[CTA_NAT_SRC]) {  		ret = ctnetlink_parse_nat_setup(ct, -						IP_NAT_MANIP_SRC, +						NF_NAT_MANIP_SRC,  						cda[CTA_NAT_SRC]);  		if (ret < 0)  			return ret; @@ -1163,7 +1172,7 @@ ctnetlink_change_helper(struct nf_conn *ct, const struct nlattr * const cda[])  		return -EOPNOTSUPP;  	} -	RCU_INIT_POINTER(help->helper, helper); +	rcu_assign_pointer(help->helper, helper);  	return 0;  } @@ -1456,8 +1465,10 @@ ctnetlink_create_conntrack(struct net *net, u16 zone,  	if (tstamp)  		tstamp->start = ktime_to_ns(ktime_get_real()); -	add_timer(&ct->timeout); -	nf_conntrack_hash_insert(ct); +	err = nf_conntrack_hash_check_insert(ct); +	if (err < 0) +		goto err2; +  	rcu_read_unlock();  	return ct; @@ -1478,6 +1489,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,  	struct nf_conntrack_tuple otuple, rtuple;  	struct nf_conntrack_tuple_hash *h = NULL;  	struct nfgenmsg *nfmsg = nlmsg_data(nlh); +	struct nf_conn *ct;  	u_int8_t u3 = nfmsg->nfgen_family;  	u16 zone;  	int err; @@ -1498,27 +1510,22 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,  			return err;  	} -	spin_lock_bh(&nf_conntrack_lock);  	if (cda[CTA_TUPLE_ORIG]) -		h = __nf_conntrack_find(net, zone, &otuple); +		h = nf_conntrack_find_get(net, zone, &otuple);  	else if (cda[CTA_TUPLE_REPLY]) -		h = __nf_conntrack_find(net, zone, &rtuple); +		h = nf_conntrack_find_get(net, zone, &rtuple);  	if (h == NULL) {  		err = -ENOENT;  		if (nlh->nlmsg_flags & NLM_F_CREATE) { -			struct nf_conn *ct;  			enum ip_conntrack_events events;  			ct = ctnetlink_create_conntrack(net, zone, cda, &otuple,  							&rtuple, u3); -			if (IS_ERR(ct)) { -				err = PTR_ERR(ct); -				goto out_unlock; -			} +			if (IS_ERR(ct)) +				return PTR_ERR(ct); +  			err = 0; -			nf_conntrack_get(&ct->ct_general); -			spin_unlock_bh(&nf_conntrack_lock);  			if (test_bit(IPS_EXPECTED_BIT, &ct->status))  				events = IPCT_RELATED;  			else @@ -1533,23 +1540,19 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,  						      ct, NETLINK_CB(skb).pid,  						      nlmsg_report(nlh));  			nf_ct_put(ct); -		} else -			spin_unlock_bh(&nf_conntrack_lock); +		}  		return err;  	}  	/* implicit 'else' */ -	/* We manipulate the conntrack inside the global conntrack table lock, -	 * so there's no need to increase the refcount */  	err = -EEXIST; +	ct = nf_ct_tuplehash_to_ctrack(h);  	if (!(nlh->nlmsg_flags & NLM_F_EXCL)) { -		struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); - +		spin_lock_bh(&nf_conntrack_lock);  		err = ctnetlink_change_conntrack(ct, cda); +		spin_unlock_bh(&nf_conntrack_lock);  		if (err == 0) { -			nf_conntrack_get(&ct->ct_general); -			spin_unlock_bh(&nf_conntrack_lock);  			nf_conntrack_eventmask_report((1 << IPCT_REPLY) |  						      (1 << IPCT_ASSURED) |  						      (1 << IPCT_HELPER) | @@ -1558,15 +1561,10 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,  						      (1 << IPCT_MARK),  						      ct, NETLINK_CB(skb).pid,  						      nlmsg_report(nlh)); -			nf_ct_put(ct); -		} else -			spin_unlock_bh(&nf_conntrack_lock); - -		return err; +		}  	} -out_unlock: -	spin_unlock_bh(&nf_conntrack_lock); +	nf_ct_put(ct);  	return err;  } @@ -1638,7 +1636,7 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb,  			  const struct nf_conntrack_expect *exp)  {  	struct nf_conn *master = exp->master; -	long timeout = (exp->timeout.expires - jiffies) / HZ; +	long timeout = ((long)exp->timeout.expires - (long)jiffies) / HZ;  	struct nf_conn_help *help;  	if (timeout < 0) @@ -1847,7 +1845,9 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,  	if (err < 0)  		return err; -	if (cda[CTA_EXPECT_MASTER]) +	if (cda[CTA_EXPECT_TUPLE]) +		err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3); +	else if (cda[CTA_EXPECT_MASTER])  		err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER, u3);  	else  		return -EINVAL; @@ -1869,25 +1869,30 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,  	err = -ENOMEM;  	skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); -	if (skb2 == NULL) +	if (skb2 == NULL) { +		nf_ct_expect_put(exp);  		goto out; +	}  	rcu_read_lock();  	err = ctnetlink_exp_fill_info(skb2, NETLINK_CB(skb).pid,  				      nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW, exp);  	rcu_read_unlock(); +	nf_ct_expect_put(exp);  	if (err <= 0)  		goto free; -	nf_ct_expect_put(exp); +	err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); +	if (err < 0) +		goto out; -	return netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); +	return 0;  free:  	kfree_skb(skb2);  out: -	nf_ct_expect_put(exp); -	return err; +	/* this avoids a loop in nfnetlink. */ +	return err == -EAGAIN ? -ENOBUFS : err;  }  static int @@ -2163,6 +2168,54 @@ MODULE_ALIAS("ip_conntrack_netlink");  MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK);  MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK_EXP); +static int __net_init ctnetlink_net_init(struct net *net) +{ +#ifdef CONFIG_NF_CONNTRACK_EVENTS +	int ret; + +	ret = nf_conntrack_register_notifier(net, &ctnl_notifier); +	if (ret < 0) { +		pr_err("ctnetlink_init: cannot register notifier.\n"); +		goto err_out; +	} + +	ret = nf_ct_expect_register_notifier(net, &ctnl_notifier_exp); +	if (ret < 0) { +		pr_err("ctnetlink_init: cannot expect register notifier.\n"); +		goto err_unreg_notifier; +	} +#endif +	return 0; + +#ifdef CONFIG_NF_CONNTRACK_EVENTS +err_unreg_notifier: +	nf_conntrack_unregister_notifier(net, &ctnl_notifier); +err_out: +	return ret; +#endif +} + +static void ctnetlink_net_exit(struct net *net) +{ +#ifdef CONFIG_NF_CONNTRACK_EVENTS +	nf_ct_expect_unregister_notifier(net, &ctnl_notifier_exp); +	nf_conntrack_unregister_notifier(net, &ctnl_notifier); +#endif +} + +static void __net_exit ctnetlink_net_exit_batch(struct list_head *net_exit_list) +{ +	struct net *net; + +	list_for_each_entry(net, net_exit_list, exit_list) +		ctnetlink_net_exit(net); +} + +static struct pernet_operations ctnetlink_net_ops = { +	.init		= ctnetlink_net_init, +	.exit_batch	= ctnetlink_net_exit_batch, +}; +  static int __init ctnetlink_init(void)  {  	int ret; @@ -2180,28 +2233,15 @@ static int __init ctnetlink_init(void)  		goto err_unreg_subsys;  	} -#ifdef CONFIG_NF_CONNTRACK_EVENTS -	ret = nf_conntrack_register_notifier(&ctnl_notifier); -	if (ret < 0) { -		pr_err("ctnetlink_init: cannot register notifier.\n"); +	if (register_pernet_subsys(&ctnetlink_net_ops)) { +		pr_err("ctnetlink_init: cannot register pernet operations\n");  		goto err_unreg_exp_subsys;  	} -	ret = nf_ct_expect_register_notifier(&ctnl_notifier_exp); -	if (ret < 0) { -		pr_err("ctnetlink_init: cannot expect register notifier.\n"); -		goto err_unreg_notifier; -	} -#endif -  	return 0; -#ifdef CONFIG_NF_CONNTRACK_EVENTS -err_unreg_notifier: -	nf_conntrack_unregister_notifier(&ctnl_notifier);  err_unreg_exp_subsys:  	nfnetlink_subsys_unregister(&ctnl_exp_subsys); -#endif  err_unreg_subsys:  	nfnetlink_subsys_unregister(&ctnl_subsys);  err_out: @@ -2212,12 +2252,7 @@ static void __exit ctnetlink_exit(void)  {  	pr_info("ctnetlink: unregistering from nfnetlink.\n"); -	nf_ct_remove_userspace_expectations(); -#ifdef CONFIG_NF_CONNTRACK_EVENTS -	nf_ct_expect_unregister_notifier(&ctnl_notifier_exp); -	nf_conntrack_unregister_notifier(&ctnl_notifier); -#endif - +	unregister_pernet_subsys(&ctnetlink_net_ops);  	nfnetlink_subsys_unregister(&ctnl_exp_subsys);  	nfnetlink_subsys_unregister(&ctnl_subsys);  } diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index 2e664a69d7d..d6dde6dc09e 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -629,7 +629,7 @@ static int dccp_print_conntrack(struct seq_file *s, struct nf_conn *ct)  	return seq_printf(s, "%s ", dccp_state_names[ct->proto.dccp.state]);  } -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) +#if IS_ENABLED(CONFIG_NF_CT_NETLINK)  static int dccp_to_nlattr(struct sk_buff *skb, struct nlattr *nla,  			  struct nf_conn *ct)  { @@ -770,7 +770,7 @@ static struct nf_conntrack_l4proto dccp_proto4 __read_mostly = {  	.error			= dccp_error,  	.print_tuple		= dccp_print_tuple,  	.print_conntrack	= dccp_print_conntrack, -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) +#if IS_ENABLED(CONFIG_NF_CT_NETLINK)  	.to_nlattr		= dccp_to_nlattr,  	.nlattr_size		= dccp_nlattr_size,  	.from_nlattr		= nlattr_to_dccp, @@ -792,7 +792,7 @@ static struct nf_conntrack_l4proto dccp_proto6 __read_mostly = {  	.error			= dccp_error,  	.print_tuple		= dccp_print_tuple,  	.print_conntrack	= dccp_print_conntrack, -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) +#if IS_ENABLED(CONFIG_NF_CT_NETLINK)  	.to_nlattr		= dccp_to_nlattr,  	.nlattr_size		= dccp_nlattr_size,  	.from_nlattr		= nlattr_to_dccp, diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index d69facdd9a7..f0338791b82 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -291,7 +291,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 __read_mostly = {  	.new		 = gre_new,  	.destroy	 = gre_destroy,  	.me 		 = THIS_MODULE, -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) +#if IS_ENABLED(CONFIG_NF_CT_NETLINK)  	.tuple_to_nlattr = nf_ct_port_tuple_to_nlattr,  	.nlattr_tuple_size = nf_ct_port_nlattr_tuple_size,  	.nlattr_to_tuple = nf_ct_port_nlattr_to_tuple, diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 6772b115465..afa69136061 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -461,7 +461,7 @@ static bool sctp_new(struct nf_conn *ct, const struct sk_buff *skb,  	return true;  } -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) +#if IS_ENABLED(CONFIG_NF_CT_NETLINK)  #include <linux/netfilter/nfnetlink.h>  #include <linux/netfilter/nfnetlink_conntrack.h> @@ -666,7 +666,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = {  	.packet 		= sctp_packet,  	.new 			= sctp_new,  	.me 			= THIS_MODULE, -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) +#if IS_ENABLED(CONFIG_NF_CT_NETLINK)  	.to_nlattr		= sctp_to_nlattr,  	.nlattr_size		= sctp_nlattr_size,  	.from_nlattr		= nlattr_to_sctp, @@ -696,7 +696,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = {  	.packet 		= sctp_packet,  	.new 			= sctp_new,  	.me 			= THIS_MODULE, -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) +#if IS_ENABLED(CONFIG_NF_CT_NETLINK)  	.to_nlattr		= sctp_to_nlattr,  	.nlattr_size		= sctp_nlattr_size,  	.from_nlattr		= nlattr_to_sctp, diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 8235b86b4e8..97b9f3ebf28 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -1126,7 +1126,7 @@ static bool tcp_new(struct nf_conn *ct, const struct sk_buff *skb,  	return true;  } -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) +#if IS_ENABLED(CONFIG_NF_CT_NETLINK)  #include <linux/netfilter/nfnetlink.h>  #include <linux/netfilter/nfnetlink_conntrack.h> @@ -1447,7 +1447,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly =  	.packet 		= tcp_packet,  	.new 			= tcp_new,  	.error			= tcp_error, -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) +#if IS_ENABLED(CONFIG_NF_CT_NETLINK)  	.to_nlattr		= tcp_to_nlattr,  	.nlattr_size		= tcp_nlattr_size,  	.from_nlattr		= nlattr_to_tcp, @@ -1479,7 +1479,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly =  	.packet 		= tcp_packet,  	.new 			= tcp_new,  	.error			= tcp_error, -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) +#if IS_ENABLED(CONFIG_NF_CT_NETLINK)  	.to_nlattr		= tcp_to_nlattr,  	.nlattr_size		= tcp_nlattr_size,  	.from_nlattr		= nlattr_to_tcp, diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index 8289088b821..5f35757fbff 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -188,7 +188,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly =  	.packet			= udp_packet,  	.new			= udp_new,  	.error			= udp_error, -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) +#if IS_ENABLED(CONFIG_NF_CT_NETLINK)  	.tuple_to_nlattr	= nf_ct_port_tuple_to_nlattr,  	.nlattr_to_tuple	= nf_ct_port_nlattr_to_tuple,  	.nlattr_tuple_size	= nf_ct_port_nlattr_tuple_size, @@ -216,7 +216,7 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 __read_mostly =  	.packet			= udp_packet,  	.new			= udp_new,  	.error			= udp_error, -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) +#if IS_ENABLED(CONFIG_NF_CT_NETLINK)  	.tuple_to_nlattr	= nf_ct_port_tuple_to_nlattr,  	.nlattr_to_tuple	= nf_ct_port_nlattr_to_tuple,  	.nlattr_tuple_size	= nf_ct_port_nlattr_tuple_size, diff --git a/net/netfilter/nf_conntrack_proto_udplite.c b/net/netfilter/nf_conntrack_proto_udplite.c index 263b5a72588..f52ca118101 100644 --- a/net/netfilter/nf_conntrack_proto_udplite.c +++ b/net/netfilter/nf_conntrack_proto_udplite.c @@ -174,7 +174,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly =  	.packet			= udplite_packet,  	.new			= udplite_new,  	.error			= udplite_error, -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) +#if IS_ENABLED(CONFIG_NF_CT_NETLINK)  	.tuple_to_nlattr	= nf_ct_port_tuple_to_nlattr,  	.nlattr_tuple_size	= nf_ct_port_nlattr_tuple_size,  	.nlattr_to_tuple	= nf_ct_port_nlattr_to_tuple, @@ -198,7 +198,7 @@ static struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly =  	.packet			= udplite_packet,  	.new			= udplite_new,  	.error			= udplite_error, -#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) +#if IS_ENABLED(CONFIG_NF_CT_NETLINK)  	.tuple_to_nlattr	= nf_ct_port_tuple_to_nlattr,  	.nlattr_tuple_size	= nf_ct_port_nlattr_tuple_size,  	.nlattr_to_tuple	= nf_ct_port_nlattr_to_tuple, diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 05e9feb101c..885f5ab9bc2 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -34,7 +34,7 @@  MODULE_LICENSE("GPL"); -#ifdef CONFIG_PROC_FS +#ifdef CONFIG_NF_CONNTRACK_PROCFS  int  print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple,              const struct nf_conntrack_l3proto *l3proto, @@ -396,7 +396,7 @@ static int nf_conntrack_standalone_init_proc(struct net *net)  static void nf_conntrack_standalone_fini_proc(struct net *net)  {  } -#endif /* CONFIG_PROC_FS */ +#endif /* CONFIG_NF_CONNTRACK_PROCFS */  /* Sysctl support */ diff --git a/net/netfilter/nf_conntrack_timestamp.c b/net/netfilter/nf_conntrack_timestamp.c index af7dd31af0a..e8d27afbbdb 100644 --- a/net/netfilter/nf_conntrack_timestamp.c +++ b/net/netfilter/nf_conntrack_timestamp.c @@ -15,7 +15,7 @@  #include <net/netfilter/nf_conntrack_extend.h>  #include <net/netfilter/nf_conntrack_timestamp.h> -static int nf_ct_tstamp __read_mostly; +static bool nf_ct_tstamp __read_mostly;  module_param_named(tstamp, nf_ct_tstamp, bool, 0644);  MODULE_PARM_DESC(tstamp, "Enable connection tracking flow timestamping."); diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index ce0c406f58a..957374a234d 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -55,7 +55,7 @@ int nf_log_register(u_int8_t pf, struct nf_logger *logger)  		llog = rcu_dereference_protected(nf_loggers[pf],  						 lockdep_is_held(&nf_log_mutex));  		if (llog == NULL) -			RCU_INIT_POINTER(nf_loggers[pf], logger); +			rcu_assign_pointer(nf_loggers[pf], logger);  	}  	mutex_unlock(&nf_log_mutex); @@ -92,7 +92,7 @@ int nf_log_bind_pf(u_int8_t pf, const struct nf_logger *logger)  		mutex_unlock(&nf_log_mutex);  		return -ENOENT;  	} -	RCU_INIT_POINTER(nf_loggers[pf], logger); +	rcu_assign_pointer(nf_loggers[pf], logger);  	mutex_unlock(&nf_log_mutex);  	return 0;  } @@ -250,7 +250,7 @@ static int nf_log_proc_dostring(ctl_table *table, int write,  			mutex_unlock(&nf_log_mutex);  			return -ENOENT;  		} -		RCU_INIT_POINTER(nf_loggers[tindex], logger); +		rcu_assign_pointer(nf_loggers[tindex], logger);  		mutex_unlock(&nf_log_mutex);  	} else {  		mutex_lock(&nf_log_mutex); diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 99ffd288508..ce60cf0f6c1 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -40,7 +40,7 @@ int nf_register_queue_handler(u_int8_t pf, const struct nf_queue_handler *qh)  	else if (old)  		ret = -EBUSY;  	else { -		RCU_INIT_POINTER(queue_handler[pf], qh); +		rcu_assign_pointer(queue_handler[pf], qh);  		ret = 0;  	}  	mutex_unlock(&queue_handler_mutex); @@ -203,6 +203,27 @@ err:  	return status;  } +#ifdef CONFIG_BRIDGE_NETFILTER +/* When called from bridge netfilter, skb->data must point to MAC header + * before calling skb_gso_segment(). Else, original MAC header is lost + * and segmented skbs will be sent to wrong destination. + */ +static void nf_bridge_adjust_skb_data(struct sk_buff *skb) +{ +	if (skb->nf_bridge) +		__skb_push(skb, skb->network_header - skb->mac_header); +} + +static void nf_bridge_adjust_segmented_data(struct sk_buff *skb) +{ +	if (skb->nf_bridge) +		__skb_pull(skb, skb->network_header - skb->mac_header); +} +#else +#define nf_bridge_adjust_skb_data(s) do {} while (0) +#define nf_bridge_adjust_segmented_data(s) do {} while (0) +#endif +  int nf_queue(struct sk_buff *skb,  	     struct list_head *elem,  	     u_int8_t pf, unsigned int hook, @@ -212,7 +233,7 @@ int nf_queue(struct sk_buff *skb,  	     unsigned int queuenum)  {  	struct sk_buff *segs; -	int err; +	int err = -EINVAL;  	unsigned int queued;  	if (!skb_is_gso(skb)) @@ -228,23 +249,25 @@ int nf_queue(struct sk_buff *skb,  		break;  	} +	nf_bridge_adjust_skb_data(skb);  	segs = skb_gso_segment(skb, 0);  	/* Does not use PTR_ERR to limit the number of error codes that can be  	 * returned by nf_queue.  For instance, callers rely on -ECANCELED to mean  	 * 'ignore this hook'.  	 */  	if (IS_ERR(segs)) -		return -EINVAL; - +		goto out_err;  	queued = 0;  	err = 0;  	do {  		struct sk_buff *nskb = segs->next;  		segs->next = NULL; -		if (err == 0) +		if (err == 0) { +			nf_bridge_adjust_segmented_data(segs);  			err = __nf_queue(segs, elem, pf, hook, indev,  					   outdev, okfn, queuenum); +		}  		if (err == 0)  			queued++;  		else @@ -252,11 +275,12 @@ int nf_queue(struct sk_buff *skb,  		segs = nskb;  	} while (segs); -	/* also free orig skb if only some segments were queued */ -	if (unlikely(err && queued)) -		err = 0; -	if (err == 0) +	if (queued) {  		kfree_skb(skb); +		return 0; +	} +  out_err: +	nf_bridge_adjust_segmented_data(skb);  	return err;  } diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c index c879c1a2370..4d70785b953 100644 --- a/net/netfilter/nfnetlink.c +++ b/net/netfilter/nfnetlink.c @@ -59,7 +59,7 @@ int nfnetlink_subsys_register(const struct nfnetlink_subsystem *n)  		nfnl_unlock();  		return -EBUSY;  	} -	RCU_INIT_POINTER(subsys_table[n->subsys_id], n); +	rcu_assign_pointer(subsys_table[n->subsys_id], n);  	nfnl_unlock();  	return 0; @@ -130,7 +130,7 @@ static int nfnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh)  	const struct nfnetlink_subsystem *ss;  	int type, err; -	if (security_netlink_recv(skb, CAP_NET_ADMIN)) +	if (!capable(CAP_NET_ADMIN))  		return -EPERM;  	/* All the messages must at least contain nfgenmsg */ @@ -210,7 +210,7 @@ static int __net_init nfnetlink_net_init(struct net *net)  	if (!nfnl)  		return -ENOMEM;  	net->nfnl_stash = nfnl; -	RCU_INIT_POINTER(net->nfnl, nfnl); +	rcu_assign_pointer(net->nfnl, nfnl);  	return 0;  } diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c new file mode 100644 index 00000000000..11ba013e47f --- /dev/null +++ b/net/netfilter/nfnetlink_acct.c @@ -0,0 +1,361 @@ +/* + * (C) 2011 Pablo Neira Ayuso <pablo@netfilter.org> + * (C) 2011 Intra2net AG <http://www.intra2net.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation (or any later at your option). + */ +#include <linux/init.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/skbuff.h> +#include <linux/netlink.h> +#include <linux/rculist.h> +#include <linux/slab.h> +#include <linux/types.h> +#include <linux/errno.h> +#include <net/netlink.h> +#include <net/sock.h> +#include <asm/atomic.h> + +#include <linux/netfilter.h> +#include <linux/netfilter/nfnetlink.h> +#include <linux/netfilter/nfnetlink_acct.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); +MODULE_DESCRIPTION("nfacct: Extended Netfilter accounting infrastructure"); + +static LIST_HEAD(nfnl_acct_list); + +struct nf_acct { +	atomic64_t		pkts; +	atomic64_t		bytes; +	struct list_head	head; +	atomic_t		refcnt; +	char			name[NFACCT_NAME_MAX]; +	struct rcu_head		rcu_head; +}; + +static int +nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb, +	     const struct nlmsghdr *nlh, const struct nlattr * const tb[]) +{ +	struct nf_acct *nfacct, *matching = NULL; +	char *acct_name; + +	if (!tb[NFACCT_NAME]) +		return -EINVAL; + +	acct_name = nla_data(tb[NFACCT_NAME]); + +	list_for_each_entry(nfacct, &nfnl_acct_list, head) { +		if (strncmp(nfacct->name, acct_name, NFACCT_NAME_MAX) != 0) +			continue; + +                if (nlh->nlmsg_flags & NLM_F_EXCL) +			return -EEXIST; + +		matching = nfacct; +		break; +        } + +	if (matching) { +		if (nlh->nlmsg_flags & NLM_F_REPLACE) { +			/* reset counters if you request a replacement. */ +			atomic64_set(&matching->pkts, 0); +			atomic64_set(&matching->bytes, 0); +			return 0; +		} +		return -EBUSY; +	} + +	nfacct = kzalloc(sizeof(struct nf_acct), GFP_KERNEL); +	if (nfacct == NULL) +		return -ENOMEM; + +	strncpy(nfacct->name, nla_data(tb[NFACCT_NAME]), NFACCT_NAME_MAX); + +	if (tb[NFACCT_BYTES]) { +		atomic64_set(&nfacct->bytes, +			     be64_to_cpu(nla_get_u64(tb[NFACCT_BYTES]))); +	} +	if (tb[NFACCT_PKTS]) { +		atomic64_set(&nfacct->pkts, +			     be64_to_cpu(nla_get_u64(tb[NFACCT_PKTS]))); +	} +	atomic_set(&nfacct->refcnt, 1); +	list_add_tail_rcu(&nfacct->head, &nfnl_acct_list); +	return 0; +} + +static int +nfnl_acct_fill_info(struct sk_buff *skb, u32 pid, u32 seq, u32 type, +		   int event, struct nf_acct *acct) +{ +	struct nlmsghdr *nlh; +	struct nfgenmsg *nfmsg; +	unsigned int flags = pid ? NLM_F_MULTI : 0; +	u64 pkts, bytes; + +	event |= NFNL_SUBSYS_ACCT << 8; +	nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags); +	if (nlh == NULL) +		goto nlmsg_failure; + +	nfmsg = nlmsg_data(nlh); +	nfmsg->nfgen_family = AF_UNSPEC; +	nfmsg->version = NFNETLINK_V0; +	nfmsg->res_id = 0; + +	NLA_PUT_STRING(skb, NFACCT_NAME, acct->name); + +	if (type == NFNL_MSG_ACCT_GET_CTRZERO) { +		pkts = atomic64_xchg(&acct->pkts, 0); +		bytes = atomic64_xchg(&acct->bytes, 0); +	} else { +		pkts = atomic64_read(&acct->pkts); +		bytes = atomic64_read(&acct->bytes); +	} +	NLA_PUT_BE64(skb, NFACCT_PKTS, cpu_to_be64(pkts)); +	NLA_PUT_BE64(skb, NFACCT_BYTES, cpu_to_be64(bytes)); +	NLA_PUT_BE32(skb, NFACCT_USE, htonl(atomic_read(&acct->refcnt))); + +	nlmsg_end(skb, nlh); +	return skb->len; + +nlmsg_failure: +nla_put_failure: +	nlmsg_cancel(skb, nlh); +	return -1; +} + +static int +nfnl_acct_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ +	struct nf_acct *cur, *last; + +	if (cb->args[2]) +		return 0; + +	last = (struct nf_acct *)cb->args[1]; +	if (cb->args[1]) +		cb->args[1] = 0; + +	rcu_read_lock(); +	list_for_each_entry_rcu(cur, &nfnl_acct_list, head) { +		if (last && cur != last) +			continue; + +		if (nfnl_acct_fill_info(skb, NETLINK_CB(cb->skb).pid, +				       cb->nlh->nlmsg_seq, +				       NFNL_MSG_TYPE(cb->nlh->nlmsg_type), +				       NFNL_MSG_ACCT_NEW, cur) < 0) { +			cb->args[1] = (unsigned long)cur; +			break; +		} +	} +	if (!cb->args[1]) +		cb->args[2] = 1; +	rcu_read_unlock(); +	return skb->len; +} + +static int +nfnl_acct_get(struct sock *nfnl, struct sk_buff *skb, +	     const struct nlmsghdr *nlh, const struct nlattr * const tb[]) +{ +	int ret = -ENOENT; +	struct nf_acct *cur; +	char *acct_name; + +	if (nlh->nlmsg_flags & NLM_F_DUMP) { +		return netlink_dump_start(nfnl, skb, nlh, nfnl_acct_dump, +					  NULL, 0); +	} + +	if (!tb[NFACCT_NAME]) +		return -EINVAL; +	acct_name = nla_data(tb[NFACCT_NAME]); + +	list_for_each_entry(cur, &nfnl_acct_list, head) { +		struct sk_buff *skb2; + +		if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX)!= 0) +			continue; + +		skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); +		if (skb2 == NULL) { +			ret = -ENOMEM; +			break; +		} + +		ret = nfnl_acct_fill_info(skb2, NETLINK_CB(skb).pid, +					 nlh->nlmsg_seq, +					 NFNL_MSG_TYPE(nlh->nlmsg_type), +					 NFNL_MSG_ACCT_NEW, cur); +		if (ret <= 0) { +			kfree_skb(skb2); +			break; +		} +		ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).pid, +					MSG_DONTWAIT); +		if (ret > 0) +			ret = 0; + +		/* this avoids a loop in nfnetlink. */ +		return ret == -EAGAIN ? -ENOBUFS : ret; +	} +	return ret; +} + +/* try to delete object, fail if it is still in use. */ +static int nfnl_acct_try_del(struct nf_acct *cur) +{ +	int ret = 0; + +	/* we want to avoid races with nfnl_acct_find_get. */ +	if (atomic_dec_and_test(&cur->refcnt)) { +		/* We are protected by nfnl mutex. */ +		list_del_rcu(&cur->head); +		kfree_rcu(cur, rcu_head); +	} else { +		/* still in use, restore reference counter. */ +		atomic_inc(&cur->refcnt); +		ret = -EBUSY; +	} +	return ret; +} + +static int +nfnl_acct_del(struct sock *nfnl, struct sk_buff *skb, +	     const struct nlmsghdr *nlh, const struct nlattr * const tb[]) +{ +	char *acct_name; +	struct nf_acct *cur; +	int ret = -ENOENT; + +	if (!tb[NFACCT_NAME]) { +		list_for_each_entry(cur, &nfnl_acct_list, head) +			nfnl_acct_try_del(cur); + +		return 0; +	} +	acct_name = nla_data(tb[NFACCT_NAME]); + +	list_for_each_entry(cur, &nfnl_acct_list, head) { +		if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX) != 0) +			continue; + +		ret = nfnl_acct_try_del(cur); +		if (ret < 0) +			return ret; + +		break; +	} +	return ret; +} + +static const struct nla_policy nfnl_acct_policy[NFACCT_MAX+1] = { +	[NFACCT_NAME] = { .type = NLA_NUL_STRING, .len = NFACCT_NAME_MAX-1 }, +	[NFACCT_BYTES] = { .type = NLA_U64 }, +	[NFACCT_PKTS] = { .type = NLA_U64 }, +}; + +static const struct nfnl_callback nfnl_acct_cb[NFNL_MSG_ACCT_MAX] = { +	[NFNL_MSG_ACCT_NEW]		= { .call = nfnl_acct_new, +					    .attr_count = NFACCT_MAX, +					    .policy = nfnl_acct_policy }, +	[NFNL_MSG_ACCT_GET] 		= { .call = nfnl_acct_get, +					    .attr_count = NFACCT_MAX, +					    .policy = nfnl_acct_policy }, +	[NFNL_MSG_ACCT_GET_CTRZERO] 	= { .call = nfnl_acct_get, +					    .attr_count = NFACCT_MAX, +					    .policy = nfnl_acct_policy }, +	[NFNL_MSG_ACCT_DEL]		= { .call = nfnl_acct_del, +					    .attr_count = NFACCT_MAX, +					    .policy = nfnl_acct_policy }, +}; + +static const struct nfnetlink_subsystem nfnl_acct_subsys = { +	.name				= "acct", +	.subsys_id			= NFNL_SUBSYS_ACCT, +	.cb_count			= NFNL_MSG_ACCT_MAX, +	.cb				= nfnl_acct_cb, +}; + +MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_ACCT); + +struct nf_acct *nfnl_acct_find_get(const char *acct_name) +{ +	struct nf_acct *cur, *acct = NULL; + +	rcu_read_lock(); +	list_for_each_entry_rcu(cur, &nfnl_acct_list, head) { +		if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX)!= 0) +			continue; + +		if (!try_module_get(THIS_MODULE)) +			goto err; + +		if (!atomic_inc_not_zero(&cur->refcnt)) { +			module_put(THIS_MODULE); +			goto err; +		} + +		acct = cur; +		break; +	} +err: +	rcu_read_unlock(); +	return acct; +} +EXPORT_SYMBOL_GPL(nfnl_acct_find_get); + +void nfnl_acct_put(struct nf_acct *acct) +{ +	atomic_dec(&acct->refcnt); +	module_put(THIS_MODULE); +} +EXPORT_SYMBOL_GPL(nfnl_acct_put); + +void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct) +{ +	atomic64_inc(&nfacct->pkts); +	atomic64_add(skb->len, &nfacct->bytes); +} +EXPORT_SYMBOL_GPL(nfnl_acct_update); + +static int __init nfnl_acct_init(void) +{ +	int ret; + +	pr_info("nfnl_acct: registering with nfnetlink.\n"); +	ret = nfnetlink_subsys_register(&nfnl_acct_subsys); +	if (ret < 0) { +		pr_err("nfnl_acct_init: cannot register with nfnetlink.\n"); +		goto err_out; +	} +	return 0; +err_out: +	return ret; +} + +static void __exit nfnl_acct_exit(void) +{ +	struct nf_acct *cur, *tmp; + +	pr_info("nfnl_acct: unregistering from nfnetlink.\n"); +	nfnetlink_subsys_unregister(&nfnl_acct_subsys); + +	list_for_each_entry_safe(cur, tmp, &nfnl_acct_list, head) { +		list_del_rcu(&cur->head); +		/* We are sure that our objects have no clients at this point, +		 * it's safe to release them all without checking refcnt. */ +		kfree_rcu(cur, rcu_head); +	} +} + +module_init(nfnl_acct_init); +module_exit(nfnl_acct_exit); diff --git a/net/netfilter/xt_AUDIT.c b/net/netfilter/xt_AUDIT.c index 4bca15a0c38..ba92824086f 100644 --- a/net/netfilter/xt_AUDIT.c +++ b/net/netfilter/xt_AUDIT.c @@ -98,6 +98,7 @@ static void audit_ip6(struct audit_buffer *ab, struct sk_buff *skb)  	struct ipv6hdr _ip6h;  	const struct ipv6hdr *ih;  	u8 nexthdr; +	__be16 frag_off;  	int offset;  	ih = skb_header_pointer(skb, skb_network_offset(skb), sizeof(_ip6h), &_ip6h); @@ -108,7 +109,7 @@ static void audit_ip6(struct audit_buffer *ab, struct sk_buff *skb)  	nexthdr = ih->nexthdr;  	offset = ipv6_skip_exthdr(skb, skb_network_offset(skb) + sizeof(_ip6h), -				  &nexthdr); +				  &nexthdr, &frag_off);  	audit_log_format(ab, " saddr=%pI6c daddr=%pI6c proto=%hhu",  			 &ih->saddr, &ih->daddr, nexthdr); diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c index d4f4b5d66b2..95237c89607 100644 --- a/net/netfilter/xt_NFQUEUE.c +++ b/net/netfilter/xt_NFQUEUE.c @@ -49,7 +49,7 @@ static u32 hash_v4(const struct sk_buff *skb)  	return jhash_2words((__force u32)ipaddr, iph->protocol, jhash_initval);  } -#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)  static u32 hash_v6(const struct sk_buff *skb)  {  	const struct ipv6hdr *ip6h = ipv6_hdr(skb); @@ -74,7 +74,7 @@ nfqueue_tg_v1(struct sk_buff *skb, const struct xt_action_param *par)  		if (par->family == NFPROTO_IPV4)  			queue = (((u64) hash_v4(skb) * info->queues_total) >>  				 32) + queue; -#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)  		else if (par->family == NFPROTO_IPV6)  			queue = (((u64) hash_v6(skb) * info->queues_total) >>  				 32) + queue; diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index 9e63b43faee..190ad37c5cf 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -161,7 +161,7 @@ static u_int32_t tcpmss_reverse_mtu(const struct sk_buff *skb,  		struct flowi6 *fl6 = &fl.u.ip6;  		memset(fl6, 0, sizeof(*fl6)); -		ipv6_addr_copy(&fl6->daddr, &ipv6_hdr(skb)->saddr); +		fl6->daddr = ipv6_hdr(skb)->saddr;  	}  	rcu_read_lock();  	ai = nf_get_afinfo(family); @@ -198,17 +198,18 @@ tcpmss_tg4(struct sk_buff *skb, const struct xt_action_param *par)  	return XT_CONTINUE;  } -#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)  static unsigned int  tcpmss_tg6(struct sk_buff *skb, const struct xt_action_param *par)  {  	struct ipv6hdr *ipv6h = ipv6_hdr(skb);  	u8 nexthdr; +	__be16 frag_off;  	int tcphoff;  	int ret;  	nexthdr = ipv6h->nexthdr; -	tcphoff = ipv6_skip_exthdr(skb, sizeof(*ipv6h), &nexthdr); +	tcphoff = ipv6_skip_exthdr(skb, sizeof(*ipv6h), &nexthdr, &frag_off);  	if (tcphoff < 0)  		return NF_DROP;  	ret = tcpmss_mangle_packet(skb, par->targinfo, @@ -259,7 +260,7 @@ static int tcpmss_tg4_check(const struct xt_tgchk_param *par)  	return -EINVAL;  } -#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)  static int tcpmss_tg6_check(const struct xt_tgchk_param *par)  {  	const struct xt_tcpmss_info *info = par->targinfo; @@ -292,7 +293,7 @@ static struct xt_target tcpmss_tg_reg[] __read_mostly = {  		.proto		= IPPROTO_TCP,  		.me		= THIS_MODULE,  	}, -#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)  	{  		.family		= NFPROTO_IPV6,  		.name		= "TCPMSS", diff --git a/net/netfilter/xt_TCPOPTSTRIP.c b/net/netfilter/xt_TCPOPTSTRIP.c index 9dc9ecfdd54..25fd1c4e1ee 100644 --- a/net/netfilter/xt_TCPOPTSTRIP.c +++ b/net/netfilter/xt_TCPOPTSTRIP.c @@ -80,16 +80,17 @@ tcpoptstrip_tg4(struct sk_buff *skb, const struct xt_action_param *par)  	       sizeof(struct iphdr) + sizeof(struct tcphdr));  } -#if defined(CONFIG_IP6_NF_MANGLE) || defined(CONFIG_IP6_NF_MANGLE_MODULE) +#if IS_ENABLED(CONFIG_IP6_NF_MANGLE)  static unsigned int  tcpoptstrip_tg6(struct sk_buff *skb, const struct xt_action_param *par)  {  	struct ipv6hdr *ipv6h = ipv6_hdr(skb);  	int tcphoff;  	u_int8_t nexthdr; +	__be16 frag_off;  	nexthdr = ipv6h->nexthdr; -	tcphoff = ipv6_skip_exthdr(skb, sizeof(*ipv6h), &nexthdr); +	tcphoff = ipv6_skip_exthdr(skb, sizeof(*ipv6h), &nexthdr, &frag_off);  	if (tcphoff < 0)  		return NF_DROP; @@ -108,7 +109,7 @@ static struct xt_target tcpoptstrip_tg_reg[] __read_mostly = {  		.targetsize = sizeof(struct xt_tcpoptstrip_target_info),  		.me         = THIS_MODULE,  	}, -#if defined(CONFIG_IP6_NF_MANGLE) || defined(CONFIG_IP6_NF_MANGLE_MODULE) +#if IS_ENABLED(CONFIG_IP6_NF_MANGLE)  	{  		.name       = "TCPOPTSTRIP",  		.family     = NFPROTO_IPV6, diff --git a/net/netfilter/xt_TEE.c b/net/netfilter/xt_TEE.c index 5f054a0dbbb..4d505790283 100644 --- a/net/netfilter/xt_TEE.c +++ b/net/netfilter/xt_TEE.c @@ -25,13 +25,10 @@  #include <linux/netfilter/x_tables.h>  #include <linux/netfilter/xt_TEE.h> -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) +#if IS_ENABLED(CONFIG_NF_CONNTRACK)  #	define WITH_CONNTRACK 1  #	include <net/netfilter/nf_conntrack.h>  #endif -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) -#	define WITH_IPV6 1 -#endif  struct xt_tee_priv {  	struct notifier_block	notifier; @@ -136,7 +133,7 @@ tee_tg4(struct sk_buff *skb, const struct xt_action_param *par)  	return XT_CONTINUE;  } -#ifdef WITH_IPV6 +#if IS_ENABLED(CONFIG_IPV6)  static bool  tee_tg_route6(struct sk_buff *skb, const struct xt_tee_tginfo *info)  { @@ -155,9 +152,10 @@ tee_tg_route6(struct sk_buff *skb, const struct xt_tee_tginfo *info)  	fl6.flowlabel = ((iph->flow_lbl[0] & 0xF) << 16) |  			   (iph->flow_lbl[1] << 8) | iph->flow_lbl[2];  	dst = ip6_route_output(net, NULL, &fl6); -	if (dst == NULL) +	if (dst->error) { +		dst_release(dst);  		return false; - +	}  	skb_dst_drop(skb);  	skb_dst_set(skb, dst);  	skb->dev      = dst->dev; @@ -196,7 +194,7 @@ tee_tg6(struct sk_buff *skb, const struct xt_action_param *par)  	}  	return XT_CONTINUE;  } -#endif /* WITH_IPV6 */ +#endif  static int tee_netdev_event(struct notifier_block *this, unsigned long event,  			    void *ptr) @@ -276,7 +274,7 @@ static struct xt_target tee_tg_reg[] __read_mostly = {  		.destroy    = tee_tg_destroy,  		.me         = THIS_MODULE,  	}, -#ifdef WITH_IPV6 +#if IS_ENABLED(CONFIG_IPV6)  	{  		.name       = "TEE",  		.revision   = 1, diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c index dcfd57eb9d0..35a959a096e 100644 --- a/net/netfilter/xt_TPROXY.c +++ b/net/netfilter/xt_TPROXY.c @@ -22,7 +22,7 @@  #include <net/netfilter/ipv4/nf_defrag_ipv4.h> -#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)  #define XT_TPROXY_HAVE_IPV6 1  #include <net/if_inet6.h>  #include <net/addrconf.h> diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c index b77d383cec7..49c5ff7f6dd 100644 --- a/net/netfilter/xt_addrtype.c +++ b/net/netfilter/xt_addrtype.c @@ -16,7 +16,7 @@  #include <linux/ip.h>  #include <net/route.h> -#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)  #include <net/ipv6.h>  #include <net/ip6_route.h>  #include <net/ip6_fib.h> @@ -31,7 +31,7 @@ MODULE_DESCRIPTION("Xtables: address type match");  MODULE_ALIAS("ipt_addrtype");  MODULE_ALIAS("ip6t_addrtype"); -#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)  static u32 match_lookup_rt6(struct net *net, const struct net_device *dev,  			    const struct in6_addr *addr)  { @@ -42,7 +42,7 @@ static u32 match_lookup_rt6(struct net *net, const struct net_device *dev,  	int route_err;  	memset(&flow, 0, sizeof(flow)); -	ipv6_addr_copy(&flow.daddr, addr); +	flow.daddr = *addr;  	if (dev)  		flow.flowi6_oif = dev->ifindex; @@ -149,7 +149,7 @@ addrtype_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)  	else if (info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT)  		dev = par->out; -#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)  	if (par->family == NFPROTO_IPV6)  		return addrtype_mt6(net, dev, skb, info);  #endif @@ -190,7 +190,7 @@ static int addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par)  		return -EINVAL;  	} -#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)  	if (par->family == NFPROTO_IPV6) {  		if ((info->source | info->dest) & XT_ADDRTYPE_BLACKHOLE) {  			pr_err("ipv6 BLACKHOLE matching not supported\n"); diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c index 5b138506690..e595e07a759 100644 --- a/net/netfilter/xt_connbytes.c +++ b/net/netfilter/xt_connbytes.c @@ -40,46 +40,46 @@ connbytes_mt(const struct sk_buff *skb, struct xt_action_param *par)  	case XT_CONNBYTES_PKTS:  		switch (sinfo->direction) {  		case XT_CONNBYTES_DIR_ORIGINAL: -			what = counters[IP_CT_DIR_ORIGINAL].packets; +			what = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].packets);  			break;  		case XT_CONNBYTES_DIR_REPLY: -			what = counters[IP_CT_DIR_REPLY].packets; +			what = atomic64_read(&counters[IP_CT_DIR_REPLY].packets);  			break;  		case XT_CONNBYTES_DIR_BOTH: -			what = counters[IP_CT_DIR_ORIGINAL].packets; -			what += counters[IP_CT_DIR_REPLY].packets; +			what = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].packets); +			what += atomic64_read(&counters[IP_CT_DIR_REPLY].packets);  			break;  		}  		break;  	case XT_CONNBYTES_BYTES:  		switch (sinfo->direction) {  		case XT_CONNBYTES_DIR_ORIGINAL: -			what = counters[IP_CT_DIR_ORIGINAL].bytes; +			what = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].bytes);  			break;  		case XT_CONNBYTES_DIR_REPLY: -			what = counters[IP_CT_DIR_REPLY].bytes; +			what = atomic64_read(&counters[IP_CT_DIR_REPLY].bytes);  			break;  		case XT_CONNBYTES_DIR_BOTH: -			what = counters[IP_CT_DIR_ORIGINAL].bytes; -			what += counters[IP_CT_DIR_REPLY].bytes; +			what = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].bytes); +			what += atomic64_read(&counters[IP_CT_DIR_REPLY].bytes);  			break;  		}  		break;  	case XT_CONNBYTES_AVGPKT:  		switch (sinfo->direction) {  		case XT_CONNBYTES_DIR_ORIGINAL: -			bytes = counters[IP_CT_DIR_ORIGINAL].bytes; -			pkts  = counters[IP_CT_DIR_ORIGINAL].packets; +			bytes = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].bytes); +			pkts  = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].packets);  			break;  		case XT_CONNBYTES_DIR_REPLY: -			bytes = counters[IP_CT_DIR_REPLY].bytes; -			pkts  = counters[IP_CT_DIR_REPLY].packets; +			bytes = atomic64_read(&counters[IP_CT_DIR_REPLY].bytes); +			pkts  = atomic64_read(&counters[IP_CT_DIR_REPLY].packets);  			break;  		case XT_CONNBYTES_DIR_BOTH: -			bytes = counters[IP_CT_DIR_ORIGINAL].bytes + -				counters[IP_CT_DIR_REPLY].bytes; -			pkts  = counters[IP_CT_DIR_ORIGINAL].packets + -				counters[IP_CT_DIR_REPLY].packets; +			bytes = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].bytes) + +				atomic64_read(&counters[IP_CT_DIR_REPLY].bytes); +			pkts  = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].packets) + +				atomic64_read(&counters[IP_CT_DIR_REPLY].packets);  			break;  		}  		if (pkts != 0) @@ -87,10 +87,10 @@ connbytes_mt(const struct sk_buff *skb, struct xt_action_param *par)  		break;  	} -	if (sinfo->count.to) +	if (sinfo->count.to >= sinfo->count.from)  		return what <= sinfo->count.to && what >= sinfo->count.from; -	else -		return what >= sinfo->count.from; +	else /* inverted */ +		return what < sinfo->count.to || what > sinfo->count.from;  }  static int connbytes_mt_check(const struct xt_mtchk_param *par) diff --git a/net/netfilter/xt_ecn.c b/net/netfilter/xt_ecn.c new file mode 100644 index 00000000000..3c831a8efeb --- /dev/null +++ b/net/netfilter/xt_ecn.c @@ -0,0 +1,179 @@ +/* + * Xtables module for matching the value of the IPv4/IPv6 and TCP ECN bits + * + * (C) 2002 by Harald Welte <laforge@gnumonks.org> + * (C) 2011 Patrick McHardy <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/in.h> +#include <linux/ip.h> +#include <net/ip.h> +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/tcp.h> + +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter/xt_ecn.h> +#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netfilter_ipv6/ip6_tables.h> + +MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); +MODULE_DESCRIPTION("Xtables: Explicit Congestion Notification (ECN) flag match"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("ipt_ecn"); +MODULE_ALIAS("ip6t_ecn"); + +static bool match_tcp(const struct sk_buff *skb, struct xt_action_param *par) +{ +	const struct xt_ecn_info *einfo = par->matchinfo; +	struct tcphdr _tcph; +	const struct tcphdr *th; + +	/* In practice, TCP match does this, so can't fail.  But let's +	 * be good citizens. +	 */ +	th = skb_header_pointer(skb, par->thoff, sizeof(_tcph), &_tcph); +	if (th == NULL) +		return false; + +	if (einfo->operation & XT_ECN_OP_MATCH_ECE) { +		if (einfo->invert & XT_ECN_OP_MATCH_ECE) { +			if (th->ece == 1) +				return false; +		} else { +			if (th->ece == 0) +				return false; +		} +	} + +	if (einfo->operation & XT_ECN_OP_MATCH_CWR) { +		if (einfo->invert & XT_ECN_OP_MATCH_CWR) { +			if (th->cwr == 1) +				return false; +		} else { +			if (th->cwr == 0) +				return false; +		} +	} + +	return true; +} + +static inline bool match_ip(const struct sk_buff *skb, +			    const struct xt_ecn_info *einfo) +{ +	return ((ip_hdr(skb)->tos & XT_ECN_IP_MASK) == einfo->ip_ect) ^ +	       !!(einfo->invert & XT_ECN_OP_MATCH_IP); +} + +static bool ecn_mt4(const struct sk_buff *skb, struct xt_action_param *par) +{ +	const struct xt_ecn_info *info = par->matchinfo; + +	if (info->operation & XT_ECN_OP_MATCH_IP && !match_ip(skb, info)) +		return false; + +	if (info->operation & (XT_ECN_OP_MATCH_ECE | XT_ECN_OP_MATCH_CWR) && +	    !match_tcp(skb, par)) +		return false; + +	return true; +} + +static int ecn_mt_check4(const struct xt_mtchk_param *par) +{ +	const struct xt_ecn_info *info = par->matchinfo; +	const struct ipt_ip *ip = par->entryinfo; + +	if (info->operation & XT_ECN_OP_MATCH_MASK) +		return -EINVAL; + +	if (info->invert & XT_ECN_OP_MATCH_MASK) +		return -EINVAL; + +	if (info->operation & (XT_ECN_OP_MATCH_ECE | XT_ECN_OP_MATCH_CWR) && +	    (ip->proto != IPPROTO_TCP || ip->invflags & IPT_INV_PROTO)) { +		pr_info("cannot match TCP bits in rule for non-tcp packets\n"); +		return -EINVAL; +	} + +	return 0; +} + +static inline bool match_ipv6(const struct sk_buff *skb, +			      const struct xt_ecn_info *einfo) +{ +	return (((ipv6_hdr(skb)->flow_lbl[0] >> 4) & XT_ECN_IP_MASK) == +	        einfo->ip_ect) ^ +	       !!(einfo->invert & XT_ECN_OP_MATCH_IP); +} + +static bool ecn_mt6(const struct sk_buff *skb, struct xt_action_param *par) +{ +	const struct xt_ecn_info *info = par->matchinfo; + +	if (info->operation & XT_ECN_OP_MATCH_IP && !match_ipv6(skb, info)) +		return false; + +	if (info->operation & (XT_ECN_OP_MATCH_ECE | XT_ECN_OP_MATCH_CWR) && +	    !match_tcp(skb, par)) +		return false; + +	return true; +} + +static int ecn_mt_check6(const struct xt_mtchk_param *par) +{ +	const struct xt_ecn_info *info = par->matchinfo; +	const struct ip6t_ip6 *ip = par->entryinfo; + +	if (info->operation & XT_ECN_OP_MATCH_MASK) +		return -EINVAL; + +	if (info->invert & XT_ECN_OP_MATCH_MASK) +		return -EINVAL; + +	if (info->operation & (XT_ECN_OP_MATCH_ECE | XT_ECN_OP_MATCH_CWR) && +	    (ip->proto != IPPROTO_TCP || ip->invflags & IP6T_INV_PROTO)) { +		pr_info("cannot match TCP bits in rule for non-tcp packets\n"); +		return -EINVAL; +	} + +	return 0; +} + +static struct xt_match ecn_mt_reg[] __read_mostly = { +	{ +		.name		= "ecn", +		.family		= NFPROTO_IPV4, +		.match		= ecn_mt4, +		.matchsize	= sizeof(struct xt_ecn_info), +		.checkentry	= ecn_mt_check4, +		.me		= THIS_MODULE, +	}, +	{ +		.name		= "ecn", +		.family		= NFPROTO_IPV6, +		.match		= ecn_mt6, +		.matchsize	= sizeof(struct xt_ecn_info), +		.checkentry	= ecn_mt_check6, +		.me		= THIS_MODULE, +	}, +}; + +static int __init ecn_mt_init(void) +{ +	return xt_register_matches(ecn_mt_reg, ARRAY_SIZE(ecn_mt_reg)); +} + +static void __exit ecn_mt_exit(void) +{ +	xt_unregister_matches(ecn_mt_reg, ARRAY_SIZE(ecn_mt_reg)); +} + +module_init(ecn_mt_init); +module_exit(ecn_mt_exit); diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index dfd52bad152..d95f9c963cd 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -21,7 +21,7 @@  #include <linux/mm.h>  #include <linux/in.h>  #include <linux/ip.h> -#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)  #include <linux/ipv6.h>  #include <net/ipv6.h>  #endif @@ -64,7 +64,7 @@ struct dsthash_dst {  			__be32 src;  			__be32 dst;  		} ip; -#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)  		struct {  			__be32 src[4];  			__be32 dst[4]; @@ -413,7 +413,7 @@ static inline __be32 maskl(__be32 a, unsigned int l)  	return l ? htonl(ntohl(a) & ~0 << (32 - l)) : 0;  } -#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)  static void hashlimit_ipv6_mask(__be32 *i, unsigned int p)  {  	switch (p) { @@ -463,8 +463,11 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo,  			return 0;  		nexthdr = ip_hdr(skb)->protocol;  		break; -#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)  	case NFPROTO_IPV6: +	{ +		__be16 frag_off; +  		if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_DIP) {  			memcpy(&dst->ip6.dst, &ipv6_hdr(skb)->daddr,  			       sizeof(dst->ip6.dst)); @@ -480,10 +483,11 @@ hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo,  		      (XT_HASHLIMIT_HASH_DPT | XT_HASHLIMIT_HASH_SPT)))  			return 0;  		nexthdr = ipv6_hdr(skb)->nexthdr; -		protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr); +		protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr, &frag_off);  		if ((int)protoff < 0)  			return -1;  		break; +	}  #endif  	default:  		BUG(); @@ -615,7 +619,7 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = {  		.destroy        = hashlimit_mt_destroy,  		.me             = THIS_MODULE,  	}, -#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)  	{  		.name           = "hashlimit",  		.revision       = 1, @@ -692,7 +696,7 @@ static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family,  				 ent->rateinfo.credit, ent->rateinfo.credit_cap,  				 ent->rateinfo.cost);  		break; -#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)  	case NFPROTO_IPV6:  		res = seq_printf(s, "%ld %pI6:%u->%pI6:%u %u %u %u\n",  				 (long)(ent->expires - jiffies)/HZ, @@ -760,7 +764,7 @@ static int __net_init hashlimit_proc_net_init(struct net *net)  	hashlimit_net->ipt_hashlimit = proc_mkdir("ipt_hashlimit", net->proc_net);  	if (!hashlimit_net->ipt_hashlimit)  		return -ENOMEM; -#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)  	hashlimit_net->ip6t_hashlimit = proc_mkdir("ip6t_hashlimit", net->proc_net);  	if (!hashlimit_net->ip6t_hashlimit) {  		proc_net_remove(net, "ipt_hashlimit"); @@ -773,7 +777,7 @@ static int __net_init hashlimit_proc_net_init(struct net *net)  static void __net_exit hashlimit_proc_net_exit(struct net *net)  {  	proc_net_remove(net, "ipt_hashlimit"); -#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)  	proc_net_remove(net, "ip6t_hashlimit");  #endif  } diff --git a/net/netfilter/xt_nfacct.c b/net/netfilter/xt_nfacct.c new file mode 100644 index 00000000000..b3be0ef21f1 --- /dev/null +++ b/net/netfilter/xt_nfacct.c @@ -0,0 +1,76 @@ +/* + * (C) 2011 Pablo Neira Ayuso <pablo@netfilter.org> + * (C) 2011 Intra2net AG <http://www.intra2net.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 (or any + * later at your option) as published by the Free Software Foundation. + */ +#include <linux/module.h> +#include <linux/skbuff.h> + +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter/nfnetlink_acct.h> +#include <linux/netfilter/xt_nfacct.h> + +MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); +MODULE_DESCRIPTION("Xtables: match for the extended accounting infrastructure"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("ipt_nfacct"); +MODULE_ALIAS("ip6t_nfacct"); + +static bool nfacct_mt(const struct sk_buff *skb, struct xt_action_param *par) +{ +	const struct xt_nfacct_match_info *info = par->targinfo; + +	nfnl_acct_update(skb, info->nfacct); + +	return true; +} + +static int +nfacct_mt_checkentry(const struct xt_mtchk_param *par) +{ +	struct xt_nfacct_match_info *info = par->matchinfo; +	struct nf_acct *nfacct; + +	nfacct = nfnl_acct_find_get(info->name); +	if (nfacct == NULL) { +		pr_info("xt_nfacct: accounting object with name `%s' " +			"does not exists\n", info->name); +		return -ENOENT; +	} +	info->nfacct = nfacct; +	return 0; +} + +static void +nfacct_mt_destroy(const struct xt_mtdtor_param *par) +{ +	const struct xt_nfacct_match_info *info = par->matchinfo; + +	nfnl_acct_put(info->nfacct); +} + +static struct xt_match nfacct_mt_reg __read_mostly = { +	.name       = "nfacct", +	.family     = NFPROTO_UNSPEC, +	.checkentry = nfacct_mt_checkentry, +	.match      = nfacct_mt, +	.destroy    = nfacct_mt_destroy, +	.matchsize  = sizeof(struct xt_nfacct_match_info), +	.me         = THIS_MODULE, +}; + +static int __init nfacct_mt_init(void) +{ +	return xt_register_match(&nfacct_mt_reg); +} + +static void __exit nfacct_mt_exit(void) +{ +	xt_unregister_match(&nfacct_mt_reg); +} + +module_init(nfacct_mt_init); +module_exit(nfacct_mt_exit); diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index fe39f7e913d..72bb07f57f9 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -22,7 +22,7 @@  #include <net/netfilter/nf_tproxy_core.h>  #include <net/netfilter/ipv4/nf_defrag_ipv4.h> -#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE) +#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)  #define XT_SOCKET_HAVE_IPV6 1  #include <linux/netfilter_ipv6/ip6_tables.h>  #include <net/netfilter/ipv6/nf_defrag_ipv6.h> @@ -30,7 +30,7 @@  #include <linux/netfilter/xt_socket.h> -#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) +#if IS_ENABLED(CONFIG_NF_CONNTRACK)  #define XT_SOCKET_HAVE_CONNTRACK 1  #include <net/netfilter/nf_conntrack.h>  #endif @@ -214,6 +214,7 @@ extract_icmp6_fields(const struct sk_buff *skb,  	struct icmp6hdr *icmph, _icmph;  	__be16 *ports, _ports[2];  	u8 inside_nexthdr; +	__be16 inside_fragoff;  	int inside_hdrlen;  	icmph = skb_header_pointer(skb, outside_hdrlen, @@ -229,7 +230,8 @@ extract_icmp6_fields(const struct sk_buff *skb,  		return 1;  	inside_nexthdr = inside_iph->nexthdr; -	inside_hdrlen = ipv6_skip_exthdr(skb, outside_hdrlen + sizeof(_icmph) + sizeof(_inside_iph), &inside_nexthdr); +	inside_hdrlen = ipv6_skip_exthdr(skb, outside_hdrlen + sizeof(_icmph) + sizeof(_inside_iph), +					 &inside_nexthdr, &inside_fragoff);  	if (inside_hdrlen < 0)  		return 1; /* hjm: Packet has no/incomplete transport layer headers. */ | 
