aboutsummaryrefslogtreecommitdiff
path: root/net/ipv4/netfilter
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/netfilter')
-rw-r--r--net/ipv4/netfilter/Kconfig217
-rw-r--r--net/ipv4/netfilter/Makefile36
-rw-r--r--net/ipv4/netfilter/arp_tables.c825
-rw-r--r--net/ipv4/netfilter/arpt_mangle.c12
-rw-r--r--net/ipv4/netfilter/arptable_filter.c105
-rw-r--r--net/ipv4/netfilter/ip_queue.c645
-rw-r--r--net/ipv4/netfilter/ip_tables.c1166
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c336
-rw-r--r--net/ipv4/netfilter/ipt_ECN.c33
-rw-r--r--net/ipv4/netfilter/ipt_LOG.c491
-rw-r--r--net/ipv4/netfilter/ipt_MASQUERADE.c76
-rw-r--r--net/ipv4/netfilter/ipt_NETMAP.c96
-rw-r--r--net/ipv4/netfilter/ipt_REDIRECT.c110
-rw-r--r--net/ipv4/netfilter/ipt_REJECT.c150
-rw-r--r--net/ipv4/netfilter/ipt_SYNPROXY.c482
-rw-r--r--net/ipv4/netfilter/ipt_TTL.c97
-rw-r--r--net/ipv4/netfilter/ipt_ULOG.c256
-rw-r--r--net/ipv4/netfilter/ipt_addrtype.c134
-rw-r--r--net/ipv4/netfilter/ipt_ah.c28
-rw-r--r--net/ipv4/netfilter/ipt_ecn.c129
-rw-r--r--net/ipv4/netfilter/ipt_rpfilter.c144
-rw-r--r--net/ipv4/netfilter/ipt_ttl.c63
-rw-r--r--net/ipv4/netfilter/iptable_filter.c148
-rw-r--r--net/ipv4/netfilter/iptable_mangle.c197
-rw-r--r--net/ipv4/netfilter/iptable_nat.c328
-rw-r--r--net/ipv4/netfilter/iptable_raw.c112
-rw-r--r--net/ipv4/netfilter/iptable_security.c129
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c290
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c133
-rw-r--r--net/ipv4/netfilter/nf_conntrack_proto_icmp.c222
-rw-r--r--net/ipv4/netfilter/nf_defrag_ipv4.c56
-rw-r--r--net/ipv4/netfilter/nf_nat_amanda.c78
-rw-r--r--net/ipv4/netfilter/nf_nat_core.c773
-rw-r--r--net/ipv4/netfilter/nf_nat_ftp.c165
-rw-r--r--net/ipv4/netfilter/nf_nat_h323.c225
-rw-r--r--net/ipv4/netfilter/nf_nat_helper.c444
-rw-r--r--net/ipv4/netfilter/nf_nat_irc.c92
-rw-r--r--net/ipv4/netfilter/nf_nat_l3proto_ipv4.c281
-rw-r--r--net/ipv4/netfilter/nf_nat_pptp.c58
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_common.c124
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_dccp.c108
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_gre.c50
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_icmp.c37
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_sctp.c96
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_tcp.c93
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_udp.c84
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_udplite.c99
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_unknown.c53
-rw-r--r--net/ipv4/netfilter/nf_nat_rule.c237
-rw-r--r--net/ipv4/netfilter/nf_nat_sip.c500
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic.c290
-rw-r--r--net/ipv4/netfilter/nf_nat_standalone.c332
-rw-r--r--net/ipv4/netfilter/nf_nat_tftp.c52
-rw-r--r--net/ipv4/netfilter/nf_tables_arp.c104
-rw-r--r--net/ipv4/netfilter/nf_tables_ipv4.c129
-rw-r--r--net/ipv4/netfilter/nft_chain_nat_ipv4.c199
-rw-r--r--net/ipv4/netfilter/nft_chain_route_ipv4.c90
-rw-r--r--net/ipv4/netfilter/nft_reject_ipv4.c75
58 files changed, 4389 insertions, 7725 deletions
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index 3816e1dc929..a26ce035e3f 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -27,27 +27,50 @@ config NF_CONNTRACK_IPV4
config NF_CONNTRACK_PROC_COMPAT
bool "proc/sysctl compatibility with old connection tracking"
- depends on NF_CONNTRACK_IPV4
+ depends on NF_CONNTRACK_PROCFS && NF_CONNTRACK_IPV4
default y
help
This option enables /proc and sysctl compatibility with the old
- layer 3 dependant connection tracking. This is needed to keep
+ layer 3 dependent connection tracking. This is needed to keep
old programs that have not been adapted to the new names working.
If unsure, say Y.
-config IP_NF_QUEUE
- tristate "IP Userspace queueing via NETLINK (OBSOLETE)"
- depends on NETFILTER_ADVANCED
+config NF_TABLES_IPV4
+ depends on NF_TABLES
+ tristate "IPv4 nf_tables support"
help
- Netfilter has the ability to queue packets to user space: the
- netlink device can be used to access them using this driver.
+ This option enables the IPv4 support for nf_tables.
- This option enables the old IPv4-only "ip_queue" implementation
- which has been obsoleted by the new "nfnetlink_queue" code (see
- CONFIG_NETFILTER_NETLINK_QUEUE).
+config NFT_CHAIN_ROUTE_IPV4
+ depends on NF_TABLES_IPV4
+ tristate "IPv4 nf_tables route chain support"
+ help
+ This option enables the "route" chain for IPv4 in nf_tables. This
+ chain type is used to force packet re-routing after mangling header
+ fields such as the source, destination, type of service and
+ the packet mark.
+
+config NFT_CHAIN_NAT_IPV4
+ depends on NF_TABLES_IPV4
+ depends on NF_NAT_IPV4 && NFT_NAT
+ tristate "IPv4 nf_tables nat chain support"
+ help
+ This option enables the "nat" chain for IPv4 in nf_tables. This
+ chain type is used to perform Network Address Translation (NAT)
+ packet transformations such as the source, destination address and
+ source and destination ports.
+
+config NFT_REJECT_IPV4
+ depends on NF_TABLES_IPV4
+ default NFT_REJECT
+ tristate
- To compile it as a module, choose M here. If unsure, say N.
+config NF_TABLES_ARP
+ depends on NF_TABLES
+ tristate "ARP nf_tables support"
+ help
+ This option enables the ARP support for nf_tables.
config IP_NF_IPTABLES
tristate "IP tables support (required for filtering/masq/NAT)"
@@ -64,16 +87,6 @@ config IP_NF_IPTABLES
if IP_NF_IPTABLES
# The matches.
-config IP_NF_MATCH_ADDRTYPE
- tristate '"addrtype" address type match support'
- depends on NETFILTER_ADVANCED
- help
- This option allows you to match what routing thinks of an address,
- eg. UNICAST, LOCAL, BROADCAST, ...
-
- If you want to compile it as a module, say M here and read
- <file:Documentation/kbuild/modules.txt>. If unsure, say `N'.
-
config IP_NF_MATCH_AH
tristate '"ah" match support'
depends on NETFILTER_ADVANCED
@@ -86,20 +99,30 @@ config IP_NF_MATCH_AH
config IP_NF_MATCH_ECN
tristate '"ecn" match support'
depends on NETFILTER_ADVANCED
- help
- This option adds a `ECN' match, which allows you to match against
- the IPv4 and TCP header ECN fields.
+ select NETFILTER_XT_MATCH_ECN
+ ---help---
+ This is a backwards-compat option for the user's convenience
+ (e.g. when running oldconfig). It selects
+ CONFIG_NETFILTER_XT_MATCH_ECN.
+
+config IP_NF_MATCH_RPFILTER
+ tristate '"rpfilter" reverse path filter match support'
+ depends on NETFILTER_ADVANCED && (IP_NF_MANGLE || IP_NF_RAW)
+ ---help---
+ This option allows you to match packets whose replies would
+ go out via the interface the packet came in.
To compile it as a module, choose M here. If unsure, say N.
+ The module will be called ipt_rpfilter.
config IP_NF_MATCH_TTL
tristate '"ttl" match support'
depends on NETFILTER_ADVANCED
- help
- This adds CONFIG_IP_NF_MATCH_TTL option, which enabled the user
- to match packets by their TTL value.
-
- To compile it as a module, choose M here. If unsure, say N.
+ select NETFILTER_XT_MATCH_HL
+ ---help---
+ This is a backwards-compat option for the user's convenience
+ (e.g. when running oldconfig). It selects
+ CONFIG_NETFILTER_XT_MATCH_HL.
# `filter', generic and specific targets
config IP_NF_FILTER
@@ -123,17 +146,21 @@ config IP_NF_TARGET_REJECT
To compile it as a module, choose M here. If unsure, say N.
-config IP_NF_TARGET_LOG
- tristate "LOG target support"
- default m if NETFILTER_ADVANCED=n
+config IP_NF_TARGET_SYNPROXY
+ tristate "SYNPROXY target support"
+ depends on NF_CONNTRACK && NETFILTER_ADVANCED
+ select NETFILTER_SYNPROXY
+ select SYN_COOKIES
help
- This option adds a `LOG' target, which allows you to create rules in
- any iptables table which records the packet header to the syslog.
+ The SYNPROXY target allows you to intercept TCP connections and
+ establish them using syncookies before they are passed on to the
+ server. This allows to avoid conntrack and server resource usage
+ during SYN-flood attacks.
- To compile it as a module, choose M here. If unsure, say N.
+ To compile it as a module, choose M here. If unsure, say N.
config IP_NF_TARGET_ULOG
- tristate "ULOG target support"
+ tristate "ULOG target support (obsolete)"
default m if NETFILTER_ADVANCED=n
---help---
@@ -147,30 +174,27 @@ config IP_NF_TARGET_ULOG
which can only be viewed through syslog.
The appropriate userspace logging daemon (ulogd) may be obtained from
- <http://www.gnumonks.org/projects/ulogd/>
+ <http://www.netfilter.org/projects/ulogd/index.html>
To compile it as a module, choose M here. If unsure, say N.
# NAT + specific targets: nf_conntrack
-config NF_NAT
- tristate "Full NAT"
+config NF_NAT_IPV4
+ tristate "IPv4 NAT"
depends on NF_CONNTRACK_IPV4
default m if NETFILTER_ADVANCED=n
+ select NF_NAT
help
- The Full NAT option allows masquerading, port forwarding and other
+ The IPv4 NAT option allows masquerading, port forwarding and other
forms of full Network Address Port Translation. It is controlled by
the `nat' table in iptables: see the man page for iptables(8).
To compile it as a module, choose M here. If unsure, say N.
-config NF_NAT_NEEDED
- bool
- depends on NF_NAT
- default y
+if NF_NAT_IPV4
config IP_NF_TARGET_MASQUERADE
tristate "MASQUERADE target support"
- depends on NF_NAT
default m if NETFILTER_ADVANCED=n
help
Masquerading is a special case of NAT: all outgoing connections are
@@ -183,31 +207,29 @@ config IP_NF_TARGET_MASQUERADE
config IP_NF_TARGET_NETMAP
tristate "NETMAP target support"
- depends on NF_NAT
depends on NETFILTER_ADVANCED
- help
- NETMAP is an implementation of static 1:1 NAT mapping of network
- addresses. It maps the network address part, while keeping the host
- address part intact.
-
- To compile it as a module, choose M here. If unsure, say N.
+ select NETFILTER_XT_TARGET_NETMAP
+ ---help---
+ This is a backwards-compat option for the user's convenience
+ (e.g. when running oldconfig). It selects
+ CONFIG_NETFILTER_XT_TARGET_NETMAP.
config IP_NF_TARGET_REDIRECT
tristate "REDIRECT target support"
- depends on NF_NAT
depends on NETFILTER_ADVANCED
- help
- REDIRECT is a special case of NAT: all incoming connections are
- mapped onto the incoming interface's address, causing the packets to
- come to the local machine instead of passing through. This is
- useful for transparent proxies.
+ select NETFILTER_XT_TARGET_REDIRECT
+ ---help---
+ This is a backwards-compat option for the user's convenience
+ (e.g. when running oldconfig). It selects
+ CONFIG_NETFILTER_XT_TARGET_REDIRECT.
- To compile it as a module, choose M here. If unsure, say N.
+endif
config NF_NAT_SNMP_BASIC
tristate "Basic SNMP-ALG support"
- depends on NF_NAT
+ depends on NF_CONNTRACK_SNMP && NF_NAT_IPV4
depends on NETFILTER_ADVANCED
+ default NF_NAT && NF_CONNTRACK_SNMP
---help---
This module implements an Application Layer Gateway (ALG) for
@@ -227,61 +249,21 @@ config NF_NAT_SNMP_BASIC
# <expr> '&&' <expr> (6)
#
# (6) Returns the result of min(/expr/, /expr/).
-config NF_NAT_PROTO_DCCP
- tristate
- depends on NF_NAT && NF_CT_PROTO_DCCP
- default NF_NAT && NF_CT_PROTO_DCCP
config NF_NAT_PROTO_GRE
tristate
- depends on NF_NAT && NF_CT_PROTO_GRE
-
-config NF_NAT_PROTO_UDPLITE
- tristate
- depends on NF_NAT && NF_CT_PROTO_UDPLITE
- default NF_NAT && NF_CT_PROTO_UDPLITE
-
-config NF_NAT_PROTO_SCTP
- tristate
- default NF_NAT && NF_CT_PROTO_SCTP
- depends on NF_NAT && NF_CT_PROTO_SCTP
- select LIBCRC32C
-
-config NF_NAT_FTP
- tristate
- depends on NF_CONNTRACK && NF_NAT
- default NF_NAT && NF_CONNTRACK_FTP
-
-config NF_NAT_IRC
- tristate
- depends on NF_CONNTRACK && NF_NAT
- default NF_NAT && NF_CONNTRACK_IRC
-
-config NF_NAT_TFTP
- tristate
- depends on NF_CONNTRACK && NF_NAT
- default NF_NAT && NF_CONNTRACK_TFTP
-
-config NF_NAT_AMANDA
- tristate
- depends on NF_CONNTRACK && NF_NAT
- default NF_NAT && NF_CONNTRACK_AMANDA
+ depends on NF_NAT_IPV4 && NF_CT_PROTO_GRE
config NF_NAT_PPTP
tristate
- depends on NF_CONNTRACK && NF_NAT
- default NF_NAT && NF_CONNTRACK_PPTP
+ depends on NF_CONNTRACK && NF_NAT_IPV4
+ default NF_NAT_IPV4 && NF_CONNTRACK_PPTP
select NF_NAT_PROTO_GRE
config NF_NAT_H323
tristate
- depends on NF_CONNTRACK && NF_NAT
- default NF_NAT && NF_CONNTRACK_H323
-
-config NF_NAT_SIP
- tristate
- depends on NF_CONNTRACK && NF_NAT
- default NF_NAT && NF_CONNTRACK_SIP
+ depends on NF_CONNTRACK && NF_NAT_IPV4
+ default NF_NAT_IPV4 && NF_CONNTRACK_H323
# mangle + specific targets
config IP_NF_MANGLE
@@ -295,8 +277,8 @@ config IP_NF_MANGLE
To compile it as a module, choose M here. If unsure, say N.
config IP_NF_TARGET_CLUSTERIP
- tristate "CLUSTERIP target support (EXPERIMENTAL)"
- depends on IP_NF_MANGLE && EXPERIMENTAL
+ tristate "CLUSTERIP target support"
+ depends on IP_NF_MANGLE
depends on NF_CONNTRACK_IPV4
depends on NETFILTER_ADVANCED
select NF_CONNTRACK_MARK
@@ -323,24 +305,17 @@ config IP_NF_TARGET_ECN
To compile it as a module, choose M here. If unsure, say N.
config IP_NF_TARGET_TTL
- tristate 'TTL target support'
- depends on IP_NF_MANGLE
- depends on NETFILTER_ADVANCED
- help
- This option adds a `TTL' target, which enables the user to modify
- the TTL value of the IP header.
-
- While it is safe to decrement/lower the TTL, this target also enables
- functionality to increment and set the TTL value of the IP header to
- arbitrary values. This is EXTREMELY DANGEROUS since you can easily
- create immortal packets that loop forever on the network.
-
- To compile it as a module, choose M here. If unsure, say N.
+ tristate '"TTL" target support'
+ depends on NETFILTER_ADVANCED && IP_NF_MANGLE
+ select NETFILTER_XT_TARGET_HL
+ ---help---
+ This is a backwards-compatible option for the user's convenience
+ (e.g. when running oldconfig). It selects
+ CONFIG_NETFILTER_XT_TARGET_HL.
# raw + specific targets
config IP_NF_RAW
tristate 'raw table support (required for NOTRACK/TRACE)'
- depends on NETFILTER_ADVANCED
help
This option adds a `raw' table to iptables. This table is the very
first in the netfilter framework and hooks in at the PREROUTING
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 5f9b650d90f..90b82405331 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -3,39 +3,35 @@
#
# objects for l3 independent conntrack
-nf_conntrack_ipv4-objs := nf_conntrack_l3proto_ipv4.o nf_conntrack_proto_icmp.o
+nf_conntrack_ipv4-y := nf_conntrack_l3proto_ipv4.o nf_conntrack_proto_icmp.o
ifeq ($(CONFIG_NF_CONNTRACK_PROC_COMPAT),y)
ifeq ($(CONFIG_PROC_FS),y)
nf_conntrack_ipv4-objs += nf_conntrack_l3proto_ipv4_compat.o
endif
endif
-nf_nat-objs := nf_nat_core.o nf_nat_helper.o nf_nat_proto_unknown.o nf_nat_proto_common.o nf_nat_proto_tcp.o nf_nat_proto_udp.o nf_nat_proto_icmp.o
-iptable_nat-objs := nf_nat_rule.o nf_nat_standalone.o
-
# connection tracking
obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o
-obj-$(CONFIG_NF_NAT) += nf_nat.o
+nf_nat_ipv4-y := nf_nat_l3proto_ipv4.o nf_nat_proto_icmp.o
+obj-$(CONFIG_NF_NAT_IPV4) += nf_nat_ipv4.o
# defrag
obj-$(CONFIG_NF_DEFRAG_IPV4) += nf_defrag_ipv4.o
# NAT helpers (nf_conntrack)
-obj-$(CONFIG_NF_NAT_AMANDA) += nf_nat_amanda.o
-obj-$(CONFIG_NF_NAT_FTP) += nf_nat_ftp.o
obj-$(CONFIG_NF_NAT_H323) += nf_nat_h323.o
-obj-$(CONFIG_NF_NAT_IRC) += nf_nat_irc.o
obj-$(CONFIG_NF_NAT_PPTP) += nf_nat_pptp.o
-obj-$(CONFIG_NF_NAT_SIP) += nf_nat_sip.o
obj-$(CONFIG_NF_NAT_SNMP_BASIC) += nf_nat_snmp_basic.o
-obj-$(CONFIG_NF_NAT_TFTP) += nf_nat_tftp.o
# NAT protocols (nf_nat)
-obj-$(CONFIG_NF_NAT_PROTO_DCCP) += nf_nat_proto_dccp.o
obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o
-obj-$(CONFIG_NF_NAT_PROTO_UDPLITE) += nf_nat_proto_udplite.o
-obj-$(CONFIG_NF_NAT_PROTO_SCTP) += nf_nat_proto_sctp.o
+
+obj-$(CONFIG_NF_TABLES_IPV4) += nf_tables_ipv4.o
+obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o
+obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o
+obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o
+obj-$(CONFIG_NF_TABLES_ARP) += nf_tables_arp.o
# generic IP tables
obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o
@@ -43,25 +39,20 @@ obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o
# the three instances of ip_tables
obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o
obj-$(CONFIG_IP_NF_MANGLE) += iptable_mangle.o
-obj-$(CONFIG_NF_NAT) += iptable_nat.o
+obj-$(CONFIG_NF_NAT_IPV4) += iptable_nat.o
obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o
obj-$(CONFIG_IP_NF_SECURITY) += iptable_security.o
# matches
-obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o
obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o
-obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o
-obj-$(CONFIG_IP_NF_MATCH_TTL) += ipt_ttl.o
+obj-$(CONFIG_IP_NF_MATCH_RPFILTER) += ipt_rpfilter.o
# targets
obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o
obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o
-obj-$(CONFIG_IP_NF_TARGET_LOG) += ipt_LOG.o
obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o
-obj-$(CONFIG_IP_NF_TARGET_NETMAP) += ipt_NETMAP.o
-obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o
obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o
-obj-$(CONFIG_IP_NF_TARGET_TTL) += ipt_TTL.o
+obj-$(CONFIG_IP_NF_TARGET_SYNPROXY) += ipt_SYNPROXY.o
obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o
# generic ARP tables
@@ -70,6 +61,3 @@ obj-$(CONFIG_IP_NF_ARP_MANGLE) += arpt_mangle.o
# just filtering instance of ARP tables for now
obj-$(CONFIG_IP_NF_ARPFILTER) += arptable_filter.o
-
-obj-$(CONFIG_IP_NF_QUEUE) += ip_queue.o
-
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 7ea88b61cb0..f95b6f93814 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -6,9 +6,10 @@
* Some ARP specific bits are:
*
* Copyright (C) 2002 David S. Miller (davem@redhat.com)
+ * Copyright (C) 2006-2009 Patrick McHardy <kaber@trash.net>
*
*/
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/kernel.h>
#include <linux/skbuff.h>
#include <linux/netdevice.h>
@@ -27,6 +28,7 @@
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter_arp/arp_tables.h>
+#include "../../netfilter/xt_repldata.h"
MODULE_LICENSE("GPL");
MODULE_AUTHOR("David S. Miller <davem@redhat.com>");
@@ -48,16 +50,17 @@ MODULE_DESCRIPTION("arptables core");
#endif
#ifdef CONFIG_NETFILTER_DEBUG
-#define ARP_NF_ASSERT(x) \
-do { \
- if (!(x)) \
- printk("ARP_NF_ASSERT: %s:%s:%u\n", \
- __func__, __FILE__, __LINE__); \
-} while(0)
+#define ARP_NF_ASSERT(x) WARN_ON(!(x))
#else
#define ARP_NF_ASSERT(x)
#endif
+void *arpt_alloc_initial_table(const struct xt_table *info)
+{
+ return xt_alloc_initial_table(arpt, ARPT);
+}
+EXPORT_SYMBOL_GPL(arpt_alloc_initial_table);
+
static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap,
const char *hdr_addr, int len)
{
@@ -70,7 +73,29 @@ static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap,
for (i = 0; i < len; i++)
ret |= (hdr_addr[i] ^ ap->addr[i]) & ap->mask[i];
- return (ret != 0);
+ return ret != 0;
+}
+
+/*
+ * Unfortunately, _b and _mask are not aligned to an int (or long int)
+ * Some arches dont care, unrolling the loop is a win on them.
+ * For other arches, we only have a 16bit alignement.
+ */
+static unsigned long ifname_compare(const char *_a, const char *_b, const char *_mask)
+{
+#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+ unsigned long ret = ifname_compare_aligned(_a, _b, _mask);
+#else
+ unsigned long ret = 0;
+ const u16 *a = (const u16 *)_a;
+ const u16 *b = (const u16 *)_b;
+ const u16 *mask = (const u16 *)_mask;
+ int i;
+
+ for (i = 0; i < IFNAMSIZ/sizeof(u16); i++)
+ ret |= (a[i] ^ b[i]) & mask[i];
+#endif
+ return ret;
}
/* Returns whether packet matches rule or not. */
@@ -83,7 +108,7 @@ static inline int arp_packet_match(const struct arphdr *arphdr,
const char *arpptr = (char *)(arphdr + 1);
const char *src_devaddr, *tgt_devaddr;
__be32 src_ipaddr, tgt_ipaddr;
- int i, ret;
+ long ret;
#define FWINV(bool, invflg) ((bool) ^ !!(arpinfo->invflags & (invflg)))
@@ -156,10 +181,7 @@ static inline int arp_packet_match(const struct arphdr *arphdr,
}
/* Look for ifname matches. */
- for (i = 0, ret = 0; i < IFNAMSIZ; i++) {
- ret |= (indev[i] ^ arpinfo->iniface[i])
- & arpinfo->iniface_mask[i];
- }
+ ret = ifname_compare(indev, arpinfo->iniface, arpinfo->iniface_mask);
if (FWINV(ret != 0, ARPT_INV_VIA_IN)) {
dprintf("VIA in mismatch (%s vs %s).%s\n",
@@ -168,10 +190,7 @@ static inline int arp_packet_match(const struct arphdr *arphdr,
return 0;
}
- for (i = 0, ret = 0; i < IFNAMSIZ; i++) {
- ret |= (outdev[i] ^ arpinfo->outiface[i])
- & arpinfo->outiface_mask[i];
- }
+ ret = ifname_compare(outdev, arpinfo->outiface, arpinfo->outiface_mask);
if (FWINV(ret != 0, ARPT_INV_VIA_OUT)) {
dprintf("VIA out mismatch (%s vs %s).%s\n",
@@ -201,35 +220,47 @@ static inline int arp_checkentry(const struct arpt_arp *arp)
}
static unsigned int
-arpt_error(struct sk_buff *skb, const struct xt_target_param *par)
+arpt_error(struct sk_buff *skb, const struct xt_action_param *par)
{
- if (net_ratelimit())
- printk("arp_tables: error: '%s'\n",
- (const char *)par->targinfo);
+ net_err_ratelimited("arp_tables: error: '%s'\n",
+ (const char *)par->targinfo);
return NF_DROP;
}
-static inline struct arpt_entry *get_entry(void *base, unsigned int offset)
+static inline const struct xt_entry_target *
+arpt_get_target_c(const struct arpt_entry *e)
+{
+ return arpt_get_target((struct arpt_entry *)e);
+}
+
+static inline struct arpt_entry *
+get_entry(const void *base, unsigned int offset)
{
return (struct arpt_entry *)(base + offset);
}
+static inline __pure
+struct arpt_entry *arpt_next_entry(const struct arpt_entry *entry)
+{
+ return (void *)entry + entry->next_offset;
+}
+
unsigned int arpt_do_table(struct sk_buff *skb,
unsigned int hook,
const struct net_device *in,
const struct net_device *out,
struct xt_table *table)
{
- static const char nulldevname[IFNAMSIZ];
+ static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
unsigned int verdict = NF_DROP;
const struct arphdr *arp;
- bool hotdrop = false;
struct arpt_entry *e, *back;
const char *indev, *outdev;
void *table_base;
const struct xt_table_info *private;
- struct xt_target_param tgpar;
+ struct xt_action_param acpar;
+ unsigned int addend;
if (!pskb_may_pull(skb, arp_hdr_len(skb->dev)))
return NF_DROP;
@@ -237,104 +268,104 @@ unsigned int arpt_do_table(struct sk_buff *skb,
indev = in ? in->name : nulldevname;
outdev = out ? out->name : nulldevname;
- read_lock_bh(&table->lock);
+ local_bh_disable();
+ addend = xt_write_recseq_begin();
private = table->private;
- table_base = (void *)private->entries[smp_processor_id()];
+ /*
+ * Ensure we load private-> members after we've fetched the base
+ * pointer.
+ */
+ smp_read_barrier_depends();
+ table_base = private->entries[smp_processor_id()];
+
e = get_entry(table_base, private->hook_entry[hook]);
back = get_entry(table_base, private->underflow[hook]);
- tgpar.in = in;
- tgpar.out = out;
- tgpar.hooknum = hook;
- tgpar.family = NFPROTO_ARP;
+ acpar.in = in;
+ acpar.out = out;
+ acpar.hooknum = hook;
+ acpar.family = NFPROTO_ARP;
+ acpar.hotdrop = false;
arp = arp_hdr(skb);
do {
- if (arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) {
- struct arpt_entry_target *t;
- int hdr_len;
-
- hdr_len = sizeof(*arp) + (2 * sizeof(struct in_addr)) +
- (2 * skb->dev->addr_len);
- ADD_COUNTER(e->counters, hdr_len, 1);
-
- t = arpt_get_target(e);
-
- /* Standard target? */
- if (!t->u.kernel.target->target) {
- int v;
-
- v = ((struct arpt_standard_target *)t)->verdict;
- if (v < 0) {
- /* Pop from stack? */
- if (v != ARPT_RETURN) {
- verdict = (unsigned)(-v) - 1;
- break;
- }
- e = back;
- back = get_entry(table_base,
- back->comefrom);
- continue;
- }
- if (table_base + v
- != (void *)e + e->next_offset) {
- /* Save old back ptr in next entry */
- struct arpt_entry *next
- = (void *)e + e->next_offset;
- next->comefrom =
- (void *)back - table_base;
-
- /* set back pointer to next entry */
- back = next;
- }
+ const struct xt_entry_target *t;
- e = get_entry(table_base, v);
- } else {
- /* Targets which reenter must return
- * abs. verdicts
- */
- tgpar.target = t->u.kernel.target;
- tgpar.targinfo = t->data;
- verdict = t->u.kernel.target->target(skb,
- &tgpar);
-
- /* Target might have changed stuff. */
- arp = arp_hdr(skb);
-
- if (verdict == ARPT_CONTINUE)
- e = (void *)e + e->next_offset;
- else
- /* Verdict */
+ if (!arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) {
+ e = arpt_next_entry(e);
+ continue;
+ }
+
+ ADD_COUNTER(e->counters, arp_hdr_len(skb->dev), 1);
+
+ t = arpt_get_target_c(e);
+
+ /* Standard target? */
+ if (!t->u.kernel.target->target) {
+ int v;
+
+ v = ((struct xt_standard_target *)t)->verdict;
+ if (v < 0) {
+ /* Pop from stack? */
+ if (v != XT_RETURN) {
+ verdict = (unsigned int)(-v) - 1;
break;
+ }
+ e = back;
+ back = get_entry(table_base, back->comefrom);
+ continue;
}
- } else {
- e = (void *)e + e->next_offset;
+ if (table_base + v
+ != arpt_next_entry(e)) {
+ /* Save old back ptr in next entry */
+ struct arpt_entry *next = arpt_next_entry(e);
+ next->comefrom = (void *)back - table_base;
+
+ /* set back pointer to next entry */
+ back = next;
+ }
+
+ e = get_entry(table_base, v);
+ continue;
}
- } while (!hotdrop);
- read_unlock_bh(&table->lock);
- if (hotdrop)
+ /* Targets which reenter must return
+ * abs. verdicts
+ */
+ acpar.target = t->u.kernel.target;
+ acpar.targinfo = t->data;
+ verdict = t->u.kernel.target->target(skb, &acpar);
+
+ /* Target might have changed stuff. */
+ arp = arp_hdr(skb);
+
+ if (verdict == XT_CONTINUE)
+ e = arpt_next_entry(e);
+ else
+ /* Verdict */
+ break;
+ } while (!acpar.hotdrop);
+ xt_write_recseq_end(addend);
+ local_bh_enable();
+
+ if (acpar.hotdrop)
return NF_DROP;
else
return verdict;
}
/* All zeroes == unconditional rule. */
-static inline int unconditional(const struct arpt_arp *arp)
+static inline bool unconditional(const struct arpt_arp *arp)
{
- unsigned int i;
+ static const struct arpt_arp uncond;
- for (i = 0; i < sizeof(*arp)/sizeof(__u32); i++)
- if (((__u32 *)arp)[i])
- return 0;
-
- return 1;
+ return memcmp(arp, &uncond, sizeof(uncond)) == 0;
}
/* Figures out from what hook each rule can be called: returns 0 if
* there are loops. Puts hook bitmask in comefrom.
*/
-static int mark_source_chains(struct xt_table_info *newinfo,
+static int mark_source_chains(const struct xt_table_info *newinfo,
unsigned int valid_hooks, void *entry0)
{
unsigned int hook;
@@ -354,12 +385,12 @@ static int mark_source_chains(struct xt_table_info *newinfo,
e->counters.pcnt = pos;
for (;;) {
- const struct arpt_standard_target *t
- = (void *)arpt_get_target(e);
+ const struct xt_standard_target *t
+ = (void *)arpt_get_target_c(e);
int visited = e->comefrom & (1 << hook);
if (e->comefrom & (1 << NF_ARP_NUMHOOKS)) {
- printk("arptables: loop hook %u pos %u %08X.\n",
+ pr_notice("arptables: loop hook %u pos %u %08X.\n",
hook, pos, e->comefrom);
return 0;
}
@@ -367,14 +398,16 @@ static int mark_source_chains(struct xt_table_info *newinfo,
|= ((1 << hook) | (1 << NF_ARP_NUMHOOKS));
/* Unconditional return/END. */
- if ((e->target_offset == sizeof(struct arpt_entry)
- && (strcmp(t->target.u.user.name,
- ARPT_STANDARD_TARGET) == 0)
- && t->verdict < 0
- && unconditional(&e->arp)) || visited) {
+ if ((e->target_offset == sizeof(struct arpt_entry) &&
+ (strcmp(t->target.u.user.name,
+ XT_STANDARD_TARGET) == 0) &&
+ t->verdict < 0 && unconditional(&e->arp)) ||
+ visited) {
unsigned int oldpos, size;
- if (t->verdict < -NF_MAX_VERDICT - 1) {
+ if ((strcmp(t->target.u.user.name,
+ XT_STANDARD_TARGET) == 0) &&
+ t->verdict < -NF_MAX_VERDICT - 1) {
duprintf("mark_source_chains: bad "
"negative verdict (%i)\n",
t->verdict);
@@ -408,8 +441,8 @@ static int mark_source_chains(struct xt_table_info *newinfo,
int newpos = t->verdict;
if (strcmp(t->target.u.user.name,
- ARPT_STANDARD_TARGET) == 0
- && newpos >= 0) {
+ XT_STANDARD_TARGET) == 0 &&
+ newpos >= 0) {
if (newpos > newinfo->size -
sizeof(struct arpt_entry)) {
duprintf("mark_source_chains: "
@@ -437,19 +470,19 @@ static int mark_source_chains(struct xt_table_info *newinfo,
return 1;
}
-static inline int check_entry(struct arpt_entry *e, const char *name)
+static inline int check_entry(const struct arpt_entry *e, const char *name)
{
- const struct arpt_entry_target *t;
+ const struct xt_entry_target *t;
if (!arp_checkentry(&e->arp)) {
duprintf("arp_tables: arp check failed %p %s.\n", e, name);
return -EINVAL;
}
- if (e->target_offset + sizeof(struct arpt_entry_target) > e->next_offset)
+ if (e->target_offset + sizeof(struct xt_entry_target) > e->next_offset)
return -EINVAL;
- t = arpt_get_target(e);
+ t = arpt_get_target_c(e);
if (e->target_offset + t->u.target_size > e->next_offset)
return -EINVAL;
@@ -458,7 +491,7 @@ static inline int check_entry(struct arpt_entry *e, const char *name)
static inline int check_target(struct arpt_entry *e, const char *name)
{
- struct arpt_entry_target *t = arpt_get_target(e);
+ struct xt_entry_target *t = arpt_get_target(e);
int ret;
struct xt_tgchk_param par = {
.table = name,
@@ -479,10 +512,9 @@ static inline int check_target(struct arpt_entry *e, const char *name)
}
static inline int
-find_check_entry(struct arpt_entry *e, const char *name, unsigned int size,
- unsigned int *i)
+find_check_entry(struct arpt_entry *e, const char *name, unsigned int size)
{
- struct arpt_entry_target *t;
+ struct xt_entry_target *t;
struct xt_target *target;
int ret;
@@ -491,13 +523,11 @@ find_check_entry(struct arpt_entry *e, const char *name, unsigned int size,
return ret;
t = arpt_get_target(e);
- target = try_then_request_module(xt_find_target(NFPROTO_ARP,
- t->u.user.name,
- t->u.user.revision),
- "arpt_%s", t->u.user.name);
- if (IS_ERR(target) || !target) {
+ target = xt_request_find_target(NFPROTO_ARP, t->u.user.name,
+ t->u.user.revision);
+ if (IS_ERR(target)) {
duprintf("find_check_entry: `%s' not found\n", t->u.user.name);
- ret = target ? PTR_ERR(target) : -ENOENT;
+ ret = PTR_ERR(target);
goto out;
}
t->u.kernel.target = target;
@@ -505,8 +535,6 @@ find_check_entry(struct arpt_entry *e, const char *name, unsigned int size,
ret = check_target(e, name);
if (ret)
goto err;
-
- (*i)++;
return 0;
err:
module_put(t->u.kernel.target->me);
@@ -514,24 +542,39 @@ out:
return ret;
}
+static bool check_underflow(const struct arpt_entry *e)
+{
+ const struct xt_entry_target *t;
+ unsigned int verdict;
+
+ if (!unconditional(&e->arp))
+ return false;
+ t = arpt_get_target_c(e);
+ if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0)
+ return false;
+ verdict = ((struct xt_standard_target *)t)->verdict;
+ verdict = -verdict - 1;
+ return verdict == NF_DROP || verdict == NF_ACCEPT;
+}
+
static inline int check_entry_size_and_hooks(struct arpt_entry *e,
struct xt_table_info *newinfo,
- unsigned char *base,
- unsigned char *limit,
+ const unsigned char *base,
+ const unsigned char *limit,
const unsigned int *hook_entries,
const unsigned int *underflows,
- unsigned int *i)
+ unsigned int valid_hooks)
{
unsigned int h;
- if ((unsigned long)e % __alignof__(struct arpt_entry) != 0
- || (unsigned char *)e + sizeof(struct arpt_entry) >= limit) {
+ if ((unsigned long)e % __alignof__(struct arpt_entry) != 0 ||
+ (unsigned char *)e + sizeof(struct arpt_entry) >= limit) {
duprintf("Bad offset %p\n", e);
return -EINVAL;
}
if (e->next_offset
- < sizeof(struct arpt_entry) + sizeof(struct arpt_entry_target)) {
+ < sizeof(struct arpt_entry) + sizeof(struct xt_entry_target)) {
duprintf("checking: element %p size %u\n",
e, e->next_offset);
return -EINVAL;
@@ -539,30 +582,31 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e,
/* Check hooks & underflows */
for (h = 0; h < NF_ARP_NUMHOOKS; h++) {
+ if (!(valid_hooks & (1 << h)))
+ continue;
if ((unsigned char *)e - base == hook_entries[h])
newinfo->hook_entry[h] = hook_entries[h];
- if ((unsigned char *)e - base == underflows[h])
+ if ((unsigned char *)e - base == underflows[h]) {
+ if (!check_underflow(e)) {
+ pr_err("Underflows must be unconditional and "
+ "use the STANDARD target with "
+ "ACCEPT/DROP\n");
+ return -EINVAL;
+ }
newinfo->underflow[h] = underflows[h];
+ }
}
- /* FIXME: underflows must be unconditional, standard verdicts
- < 0 (not ARPT_RETURN). --RR */
-
/* Clear counters and comefrom */
e->counters = ((struct xt_counters) { 0, 0 });
e->comefrom = 0;
-
- (*i)++;
return 0;
}
-static inline int cleanup_entry(struct arpt_entry *e, unsigned int *i)
+static inline void cleanup_entry(struct arpt_entry *e)
{
struct xt_tgdtor_param par;
- struct arpt_entry_target *t;
-
- if (i && (*i)-- == 0)
- return 1;
+ struct xt_entry_target *t;
t = arpt_get_target(e);
par.target = t->u.kernel.target;
@@ -571,26 +615,20 @@ static inline int cleanup_entry(struct arpt_entry *e, unsigned int *i)
if (par.target->destroy != NULL)
par.target->destroy(&par);
module_put(par.target->me);
- return 0;
}
/* Checks and translates the user-supplied table segment (held in
* newinfo).
*/
-static int translate_table(const char *name,
- unsigned int valid_hooks,
- struct xt_table_info *newinfo,
- void *entry0,
- unsigned int size,
- unsigned int number,
- const unsigned int *hook_entries,
- const unsigned int *underflows)
+static int translate_table(struct xt_table_info *newinfo, void *entry0,
+ const struct arpt_replace *repl)
{
+ struct arpt_entry *iter;
unsigned int i;
- int ret;
+ int ret = 0;
- newinfo->size = size;
- newinfo->number = number;
+ newinfo->size = repl->size;
+ newinfo->number = repl->num_entries;
/* Init all hooks to impossible value. */
for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
@@ -602,52 +640,66 @@ static int translate_table(const char *name,
i = 0;
/* Walk through entries, checking offsets. */
- ret = ARPT_ENTRY_ITERATE(entry0, newinfo->size,
- check_entry_size_and_hooks,
- newinfo,
- entry0,
- entry0 + size,
- hook_entries, underflows, &i);
+ xt_entry_foreach(iter, entry0, newinfo->size) {
+ ret = check_entry_size_and_hooks(iter, newinfo, entry0,
+ entry0 + repl->size,
+ repl->hook_entry,
+ repl->underflow,
+ repl->valid_hooks);
+ if (ret != 0)
+ break;
+ ++i;
+ if (strcmp(arpt_get_target(iter)->u.user.name,
+ XT_ERROR_TARGET) == 0)
+ ++newinfo->stacksize;
+ }
duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret);
if (ret != 0)
return ret;
- if (i != number) {
+ if (i != repl->num_entries) {
duprintf("translate_table: %u not %u entries\n",
- i, number);
+ i, repl->num_entries);
return -EINVAL;
}
/* Check hooks all assigned */
for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
/* Only hooks which are valid */
- if (!(valid_hooks & (1 << i)))
+ if (!(repl->valid_hooks & (1 << i)))
continue;
if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
duprintf("Invalid hook entry %u %u\n",
- i, hook_entries[i]);
+ i, repl->hook_entry[i]);
return -EINVAL;
}
if (newinfo->underflow[i] == 0xFFFFFFFF) {
duprintf("Invalid underflow %u %u\n",
- i, underflows[i]);
+ i, repl->underflow[i]);
return -EINVAL;
}
}
- if (!mark_source_chains(newinfo, valid_hooks, entry0)) {
+ if (!mark_source_chains(newinfo, repl->valid_hooks, entry0)) {
duprintf("Looping hook\n");
return -ELOOP;
}
/* Finally, each sanity check must pass */
i = 0;
- ret = ARPT_ENTRY_ITERATE(entry0, newinfo->size,
- find_check_entry, name, size, &i);
+ xt_entry_foreach(iter, entry0, newinfo->size) {
+ ret = find_check_entry(iter, repl->name, repl->size);
+ if (ret != 0)
+ break;
+ ++i;
+ }
if (ret != 0) {
- ARPT_ENTRY_ITERATE(entry0, newinfo->size,
- cleanup_entry, &i);
+ xt_entry_foreach(iter, entry0, newinfo->size) {
+ if (i-- == 0)
+ break;
+ cleanup_entry(iter);
+ }
return ret;
}
@@ -660,61 +712,34 @@ static int translate_table(const char *name,
return ret;
}
-/* Gets counters. */
-static inline int add_entry_to_counter(const struct arpt_entry *e,
- struct xt_counters total[],
- unsigned int *i)
-{
- ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
-
- (*i)++;
- return 0;
-}
-
-static inline int set_entry_to_counter(const struct arpt_entry *e,
- struct xt_counters total[],
- unsigned int *i)
-{
- SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
-
- (*i)++;
- return 0;
-}
-
static void get_counters(const struct xt_table_info *t,
struct xt_counters counters[])
{
+ struct arpt_entry *iter;
unsigned int cpu;
unsigned int i;
- unsigned int curcpu;
-
- /* Instead of clearing (by a previous call to memset())
- * the counters and using adds, we set the counters
- * with data used by 'current' CPU
- * We dont care about preemption here.
- */
- curcpu = raw_smp_processor_id();
-
- i = 0;
- ARPT_ENTRY_ITERATE(t->entries[curcpu],
- t->size,
- set_entry_to_counter,
- counters,
- &i);
for_each_possible_cpu(cpu) {
- if (cpu == curcpu)
- continue;
+ seqcount_t *s = &per_cpu(xt_recseq, cpu);
+
i = 0;
- ARPT_ENTRY_ITERATE(t->entries[cpu],
- t->size,
- add_entry_to_counter,
- counters,
- &i);
+ xt_entry_foreach(iter, t->entries[cpu], t->size) {
+ u64 bcnt, pcnt;
+ unsigned int start;
+
+ do {
+ start = read_seqcount_begin(s);
+ bcnt = iter->counters.bcnt;
+ pcnt = iter->counters.pcnt;
+ } while (read_seqcount_retry(s, start));
+
+ ADD_COUNTER(counters[i], bcnt, pcnt);
+ ++i;
+ }
}
}
-static inline struct xt_counters *alloc_counters(struct xt_table *table)
+static struct xt_counters *alloc_counters(const struct xt_table *table)
{
unsigned int countersize;
struct xt_counters *counters;
@@ -725,25 +750,22 @@ static inline struct xt_counters *alloc_counters(struct xt_table *table)
* about).
*/
countersize = sizeof(struct xt_counters) * private->number;
- counters = vmalloc_node(countersize, numa_node_id());
+ counters = vzalloc(countersize);
if (counters == NULL)
return ERR_PTR(-ENOMEM);
- /* First, sum counters... */
- write_lock_bh(&table->lock);
get_counters(private, counters);
- write_unlock_bh(&table->lock);
return counters;
}
static int copy_entries_to_user(unsigned int total_size,
- struct xt_table *table,
+ const struct xt_table *table,
void __user *userptr)
{
unsigned int off, num;
- struct arpt_entry *e;
+ const struct arpt_entry *e;
struct xt_counters *counters;
struct xt_table_info *private = table->private;
int ret = 0;
@@ -763,7 +785,7 @@ static int copy_entries_to_user(unsigned int total_size,
/* FIXME: use iterator macros --RR */
/* ... then go back and fix counters and names */
for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
- struct arpt_entry_target *t;
+ const struct xt_entry_target *t;
e = (struct arpt_entry *)(loc_cpu_entry + off);
if (copy_to_user(userptr + off
@@ -774,9 +796,9 @@ static int copy_entries_to_user(unsigned int total_size,
goto free_counters;
}
- t = arpt_get_target(e);
+ t = arpt_get_target_c(e);
if (copy_to_user(userptr + off + e->target_offset
- + offsetof(struct arpt_entry_target,
+ + offsetof(struct xt_entry_target,
u.user.name),
t->u.kernel.target->name,
strlen(t->u.kernel.target->name)+1) != 0) {
@@ -791,7 +813,7 @@ static int copy_entries_to_user(unsigned int total_size,
}
#ifdef CONFIG_COMPAT
-static void compat_standard_from_user(void *dst, void *src)
+static void compat_standard_from_user(void *dst, const void *src)
{
int v = *(compat_int_t *)src;
@@ -800,7 +822,7 @@ static void compat_standard_from_user(void *dst, void *src)
memcpy(dst, &v, sizeof(v));
}
-static int compat_standard_to_user(void __user *dst, void *src)
+static int compat_standard_to_user(void __user *dst, const void *src)
{
compat_int_t cv = *(int *)src;
@@ -809,18 +831,18 @@ static int compat_standard_to_user(void __user *dst, void *src)
return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0;
}
-static int compat_calc_entry(struct arpt_entry *e,
+static int compat_calc_entry(const struct arpt_entry *e,
const struct xt_table_info *info,
- void *base, struct xt_table_info *newinfo)
+ const void *base, struct xt_table_info *newinfo)
{
- struct arpt_entry_target *t;
+ const struct xt_entry_target *t;
unsigned int entry_offset;
int off, i, ret;
off = sizeof(struct arpt_entry) - sizeof(struct compat_arpt_entry);
entry_offset = (void *)e - base;
- t = arpt_get_target(e);
+ t = arpt_get_target_c(e);
off += xt_compat_target_offset(t->u.kernel.target);
newinfo->size -= off;
ret = xt_compat_add_offset(NFPROTO_ARP, entry_offset, off);
@@ -841,7 +863,9 @@ static int compat_calc_entry(struct arpt_entry *e,
static int compat_table_info(const struct xt_table_info *info,
struct xt_table_info *newinfo)
{
+ struct arpt_entry *iter;
void *loc_cpu_entry;
+ int ret;
if (!newinfo || !info)
return -EINVAL;
@@ -850,15 +874,20 @@ static int compat_table_info(const struct xt_table_info *info,
memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
newinfo->initial_entries = 0;
loc_cpu_entry = info->entries[raw_smp_processor_id()];
- return ARPT_ENTRY_ITERATE(loc_cpu_entry, info->size,
- compat_calc_entry, info, loc_cpu_entry,
- newinfo);
+ xt_compat_init_offsets(NFPROTO_ARP, info->number);
+ xt_entry_foreach(iter, loc_cpu_entry, info->size) {
+ ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
+ if (ret != 0)
+ return ret;
+ }
+ return 0;
}
#endif
-static int get_info(struct net *net, void __user *user, int *len, int compat)
+static int get_info(struct net *net, void __user *user,
+ const int *len, int compat)
{
- char name[ARPT_TABLE_MAXNAMELEN];
+ char name[XT_TABLE_MAXNAMELEN];
struct xt_table *t;
int ret;
@@ -871,25 +900,26 @@ static int get_info(struct net *net, void __user *user, int *len, int compat)
if (copy_from_user(name, user, sizeof(name)) != 0)
return -EFAULT;
- name[ARPT_TABLE_MAXNAMELEN-1] = '\0';
+ name[XT_TABLE_MAXNAMELEN-1] = '\0';
#ifdef CONFIG_COMPAT
if (compat)
xt_compat_lock(NFPROTO_ARP);
#endif
t = try_then_request_module(xt_find_table_lock(net, NFPROTO_ARP, name),
"arptable_%s", name);
- if (t && !IS_ERR(t)) {
+ if (!IS_ERR_OR_NULL(t)) {
struct arpt_getinfo info;
const struct xt_table_info *private = t->private;
-
#ifdef CONFIG_COMPAT
+ struct xt_table_info tmp;
+
if (compat) {
- struct xt_table_info tmp;
ret = compat_table_info(private, &tmp);
xt_compat_flush_offsets(NFPROTO_ARP);
private = &tmp;
}
#endif
+ memset(&info, 0, sizeof(info));
info.valid_hooks = t->valid_hooks;
memcpy(info.hook_entry, private->hook_entry,
sizeof(info.hook_entry));
@@ -915,7 +945,7 @@ static int get_info(struct net *net, void __user *user, int *len, int compat)
}
static int get_entries(struct net *net, struct arpt_get_entries __user *uptr,
- int *len)
+ const int *len)
{
int ret;
struct arpt_get_entries get;
@@ -934,7 +964,7 @@ static int get_entries(struct net *net, struct arpt_get_entries __user *uptr,
}
t = xt_find_table_lock(net, NFPROTO_ARP, get.name);
- if (t && !IS_ERR(t)) {
+ if (!IS_ERR_OR_NULL(t)) {
const struct xt_table_info *private = t->private;
duprintf("t->private->number = %u\n",
@@ -966,10 +996,10 @@ static int __do_replace(struct net *net, const char *name,
struct xt_table_info *oldinfo;
struct xt_counters *counters;
void *loc_cpu_old_entry;
+ struct arpt_entry *iter;
ret = 0;
- counters = vmalloc_node(num_counters * sizeof(struct xt_counters),
- numa_node_id());
+ counters = vzalloc(num_counters * sizeof(struct xt_counters));
if (!counters) {
ret = -ENOMEM;
goto out;
@@ -977,7 +1007,7 @@ static int __do_replace(struct net *net, const char *name,
t = try_then_request_module(xt_find_table_lock(net, NFPROTO_ARP, name),
"arptable_%s", name);
- if (!t || IS_ERR(t)) {
+ if (IS_ERR_OR_NULL(t)) {
ret = t ? PTR_ERR(t) : -ENOENT;
goto free_newinfo_counters_untrans;
}
@@ -1004,17 +1034,20 @@ static int __do_replace(struct net *net, const char *name,
(newinfo->number <= oldinfo->initial_entries))
module_put(t->me);
- /* Get the old counters. */
+ /* Get the old counters, and synchronize with replace */
get_counters(oldinfo, counters);
+
/* Decrease module usage counts and free resource */
loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
- ARPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,
- NULL);
+ xt_entry_foreach(iter, loc_cpu_old_entry, oldinfo->size)
+ cleanup_entry(iter);
xt_free_table_info(oldinfo);
if (copy_to_user(counters_ptr, counters,
- sizeof(struct xt_counters) * num_counters) != 0)
- ret = -EFAULT;
+ sizeof(struct xt_counters) * num_counters) != 0) {
+ /* Silent error, can't fail, new table is already in place */
+ net_warn_ratelimited("arptables: counters copy to user failed while replacing table\n");
+ }
vfree(counters);
xt_table_unlock(t);
return ret;
@@ -1028,12 +1061,14 @@ static int __do_replace(struct net *net, const char *name,
return ret;
}
-static int do_replace(struct net *net, void __user *user, unsigned int len)
+static int do_replace(struct net *net, const void __user *user,
+ unsigned int len)
{
int ret;
struct arpt_replace tmp;
struct xt_table_info *newinfo;
void *loc_cpu_entry;
+ struct arpt_entry *iter;
if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
return -EFAULT;
@@ -1041,6 +1076,7 @@ static int do_replace(struct net *net, void __user *user, unsigned int len)
/* overflow check */
if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
return -ENOMEM;
+ tmp.name[sizeof(tmp.name)-1] = 0;
newinfo = xt_alloc_table_info(tmp.size);
if (!newinfo)
@@ -1054,9 +1090,7 @@ static int do_replace(struct net *net, void __user *user, unsigned int len)
goto free_newinfo;
}
- ret = translate_table(tmp.name, tmp.valid_hooks,
- newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
- tmp.hook_entry, tmp.underflow);
+ ret = translate_table(newinfo, loc_cpu_entry, &tmp);
if (ret != 0)
goto free_newinfo;
@@ -1069,30 +1103,17 @@ static int do_replace(struct net *net, void __user *user, unsigned int len)
return 0;
free_newinfo_untrans:
- ARPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL);
+ xt_entry_foreach(iter, loc_cpu_entry, newinfo->size)
+ cleanup_entry(iter);
free_newinfo:
xt_free_table_info(newinfo);
return ret;
}
-/* We're lazy, and add to the first CPU; overflow works its fey magic
- * and everything is OK.
- */
-static inline int add_counter_to_entry(struct arpt_entry *e,
- const struct xt_counters addme[],
- unsigned int *i)
+static int do_add_counters(struct net *net, const void __user *user,
+ unsigned int len, int compat)
{
-
- ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
-
- (*i)++;
- return 0;
-}
-
-static int do_add_counters(struct net *net, void __user *user, unsigned int len,
- int compat)
-{
- unsigned int i;
+ unsigned int i, curcpu;
struct xt_counters_info tmp;
struct xt_counters *paddc;
unsigned int num_counters;
@@ -1103,6 +1124,8 @@ static int do_add_counters(struct net *net, void __user *user, unsigned int len,
const struct xt_table_info *private;
int ret = 0;
void *loc_cpu_entry;
+ struct arpt_entry *iter;
+ unsigned int addend;
#ifdef CONFIG_COMPAT
struct compat_xt_counters_info compat_tmp;
@@ -1133,7 +1156,7 @@ static int do_add_counters(struct net *net, void __user *user, unsigned int len,
if (len != size + num_counters * sizeof(struct xt_counters))
return -EINVAL;
- paddc = vmalloc_node(len - size, numa_node_id());
+ paddc = vmalloc(len - size);
if (!paddc)
return -ENOMEM;
@@ -1143,12 +1166,12 @@ static int do_add_counters(struct net *net, void __user *user, unsigned int len,
}
t = xt_find_table_lock(net, NFPROTO_ARP, name);
- if (!t || IS_ERR(t)) {
+ if (IS_ERR_OR_NULL(t)) {
ret = t ? PTR_ERR(t) : -ENOENT;
goto free;
}
- write_lock_bh(&t->lock);
+ local_bh_disable();
private = t->private;
if (private->number != num_counters) {
ret = -EINVAL;
@@ -1157,14 +1180,16 @@ static int do_add_counters(struct net *net, void __user *user, unsigned int len,
i = 0;
/* Choose the copy that is on our node */
- loc_cpu_entry = private->entries[smp_processor_id()];
- ARPT_ENTRY_ITERATE(loc_cpu_entry,
- private->size,
- add_counter_to_entry,
- paddc,
- &i);
+ curcpu = smp_processor_id();
+ loc_cpu_entry = private->entries[curcpu];
+ addend = xt_write_recseq_begin();
+ xt_entry_foreach(iter, loc_cpu_entry, private->size) {
+ ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt);
+ ++i;
+ }
+ xt_write_recseq_end(addend);
unlock_up_free:
- write_unlock_bh(&t->lock);
+ local_bh_enable();
xt_table_unlock(t);
module_put(t->me);
free:
@@ -1174,38 +1199,32 @@ static int do_add_counters(struct net *net, void __user *user, unsigned int len,
}
#ifdef CONFIG_COMPAT
-static inline int
-compat_release_entry(struct compat_arpt_entry *e, unsigned int *i)
+static inline void compat_release_entry(struct compat_arpt_entry *e)
{
- struct arpt_entry_target *t;
-
- if (i && (*i)-- == 0)
- return 1;
+ struct xt_entry_target *t;
t = compat_arpt_get_target(e);
module_put(t->u.kernel.target->me);
- return 0;
}
static inline int
check_compat_entry_size_and_hooks(struct compat_arpt_entry *e,
struct xt_table_info *newinfo,
unsigned int *size,
- unsigned char *base,
- unsigned char *limit,
- unsigned int *hook_entries,
- unsigned int *underflows,
- unsigned int *i,
+ const unsigned char *base,
+ const unsigned char *limit,
+ const unsigned int *hook_entries,
+ const unsigned int *underflows,
const char *name)
{
- struct arpt_entry_target *t;
+ struct xt_entry_target *t;
struct xt_target *target;
unsigned int entry_offset;
int ret, off, h;
duprintf("check_compat_entry_size_and_hooks %p\n", e);
- if ((unsigned long)e % __alignof__(struct compat_arpt_entry) != 0
- || (unsigned char *)e + sizeof(struct compat_arpt_entry) >= limit) {
+ if ((unsigned long)e % __alignof__(struct compat_arpt_entry) != 0 ||
+ (unsigned char *)e + sizeof(struct compat_arpt_entry) >= limit) {
duprintf("Bad offset %p, limit = %p\n", e, limit);
return -EINVAL;
}
@@ -1226,14 +1245,12 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e,
entry_offset = (void *)e - (void *)base;
t = compat_arpt_get_target(e);
- target = try_then_request_module(xt_find_target(NFPROTO_ARP,
- t->u.user.name,
- t->u.user.revision),
- "arpt_%s", t->u.user.name);
- if (IS_ERR(target) || !target) {
+ target = xt_request_find_target(NFPROTO_ARP, t->u.user.name,
+ t->u.user.revision);
+ if (IS_ERR(target)) {
duprintf("check_compat_entry_size_and_hooks: `%s' not found\n",
t->u.user.name);
- ret = target ? PTR_ERR(target) : -ENOENT;
+ ret = PTR_ERR(target);
goto out;
}
t->u.kernel.target = target;
@@ -1255,8 +1272,6 @@ check_compat_entry_size_and_hooks(struct compat_arpt_entry *e,
/* Clear counters and comefrom */
memset(&e->counters, 0, sizeof(e->counters));
e->comefrom = 0;
-
- (*i)++;
return 0;
release_target:
@@ -1270,7 +1285,7 @@ compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr,
unsigned int *size, const char *name,
struct xt_table_info *newinfo, unsigned char *base)
{
- struct arpt_entry_target *t;
+ struct xt_entry_target *t;
struct xt_target *target;
struct arpt_entry *de;
unsigned int origsize;
@@ -1300,19 +1315,6 @@ compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr,
return ret;
}
-static inline int compat_check_entry(struct arpt_entry *e, const char *name,
- unsigned int *i)
-{
- int ret;
-
- ret = check_target(e, name);
- if (ret)
- return ret;
-
- (*i)++;
- return 0;
-}
-
static int translate_compat_table(const char *name,
unsigned int valid_hooks,
struct xt_table_info **pinfo,
@@ -1325,8 +1327,10 @@ static int translate_compat_table(const char *name,
unsigned int i, j;
struct xt_table_info *newinfo, *info;
void *pos, *entry0, *entry1;
+ struct compat_arpt_entry *iter0;
+ struct arpt_entry *iter1;
unsigned int size;
- int ret;
+ int ret = 0;
info = *pinfo;
entry0 = *pentry0;
@@ -1342,14 +1346,19 @@ static int translate_compat_table(const char *name,
duprintf("translate_compat_table: size %u\n", info->size);
j = 0;
xt_compat_lock(NFPROTO_ARP);
+ xt_compat_init_offsets(NFPROTO_ARP, number);
/* Walk through entries, checking offsets. */
- ret = COMPAT_ARPT_ENTRY_ITERATE(entry0, total_size,
- check_compat_entry_size_and_hooks,
- info, &size, entry0,
- entry0 + total_size,
- hook_entries, underflows, &j, name);
- if (ret != 0)
- goto out_unlock;
+ xt_entry_foreach(iter0, entry0, total_size) {
+ ret = check_compat_entry_size_and_hooks(iter0, info, &size,
+ entry0,
+ entry0 + total_size,
+ hook_entries,
+ underflows,
+ name);
+ if (ret != 0)
+ goto out_unlock;
+ ++j;
+ }
ret = -EINVAL;
if (j != number) {
@@ -1388,9 +1397,12 @@ static int translate_compat_table(const char *name,
entry1 = newinfo->entries[raw_smp_processor_id()];
pos = entry1;
size = total_size;
- ret = COMPAT_ARPT_ENTRY_ITERATE(entry0, total_size,
- compat_copy_entry_from_user,
- &pos, &size, name, newinfo, entry1);
+ xt_entry_foreach(iter0, entry0, total_size) {
+ ret = compat_copy_entry_from_user(iter0, &pos, &size,
+ name, newinfo, entry1);
+ if (ret != 0)
+ break;
+ }
xt_compat_flush_offsets(NFPROTO_ARP);
xt_compat_unlock(NFPROTO_ARP);
if (ret)
@@ -1401,13 +1413,35 @@ static int translate_compat_table(const char *name,
goto free_newinfo;
i = 0;
- ret = ARPT_ENTRY_ITERATE(entry1, newinfo->size, compat_check_entry,
- name, &i);
+ xt_entry_foreach(iter1, entry1, newinfo->size) {
+ ret = check_target(iter1, name);
+ if (ret != 0)
+ break;
+ ++i;
+ if (strcmp(arpt_get_target(iter1)->u.user.name,
+ XT_ERROR_TARGET) == 0)
+ ++newinfo->stacksize;
+ }
if (ret) {
+ /*
+ * The first i matches need cleanup_entry (calls ->destroy)
+ * because they had called ->check already. The other j-i
+ * entries need only release.
+ */
+ int skip = i;
j -= i;
- COMPAT_ARPT_ENTRY_ITERATE_CONTINUE(entry0, newinfo->size, i,
- compat_release_entry, &j);
- ARPT_ENTRY_ITERATE(entry1, newinfo->size, cleanup_entry, &i);
+ xt_entry_foreach(iter0, entry0, newinfo->size) {
+ if (skip-- > 0)
+ continue;
+ if (j-- == 0)
+ break;
+ compat_release_entry(iter0);
+ }
+ xt_entry_foreach(iter1, entry1, newinfo->size) {
+ if (i-- == 0)
+ break;
+ cleanup_entry(iter1);
+ }
xt_free_table_info(newinfo);
return ret;
}
@@ -1425,7 +1459,11 @@ static int translate_compat_table(const char *name,
free_newinfo:
xt_free_table_info(newinfo);
out:
- COMPAT_ARPT_ENTRY_ITERATE(entry0, total_size, compat_release_entry, &j);
+ xt_entry_foreach(iter0, entry0, total_size) {
+ if (j-- == 0)
+ break;
+ compat_release_entry(iter0);
+ }
return ret;
out_unlock:
xt_compat_flush_offsets(NFPROTO_ARP);
@@ -1434,7 +1472,7 @@ out_unlock:
}
struct compat_arpt_replace {
- char name[ARPT_TABLE_MAXNAMELEN];
+ char name[XT_TABLE_MAXNAMELEN];
u32 valid_hooks;
u32 num_entries;
u32 size;
@@ -1452,6 +1490,7 @@ static int compat_do_replace(struct net *net, void __user *user,
struct compat_arpt_replace tmp;
struct xt_table_info *newinfo;
void *loc_cpu_entry;
+ struct arpt_entry *iter;
if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
return -EFAULT;
@@ -1461,6 +1500,7 @@ static int compat_do_replace(struct net *net, void __user *user,
return -ENOMEM;
if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
return -ENOMEM;
+ tmp.name[sizeof(tmp.name)-1] = 0;
newinfo = xt_alloc_table_info(tmp.size);
if (!newinfo)
@@ -1489,7 +1529,8 @@ static int compat_do_replace(struct net *net, void __user *user,
return 0;
free_newinfo_untrans:
- ARPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL);
+ xt_entry_foreach(iter, loc_cpu_entry, newinfo->size)
+ cleanup_entry(iter);
free_newinfo:
xt_free_table_info(newinfo);
return ret;
@@ -1500,7 +1541,7 @@ static int compat_do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user,
{
int ret;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
return -EPERM;
switch (cmd) {
@@ -1523,22 +1564,20 @@ static int compat_do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user,
static int compat_copy_entry_to_user(struct arpt_entry *e, void __user **dstptr,
compat_uint_t *size,
struct xt_counters *counters,
- unsigned int *i)
+ unsigned int i)
{
- struct arpt_entry_target *t;
+ struct xt_entry_target *t;
struct compat_arpt_entry __user *ce;
u_int16_t target_offset, next_offset;
compat_uint_t origsize;
int ret;
- ret = -EFAULT;
origsize = *size;
ce = (struct compat_arpt_entry __user *)*dstptr;
- if (copy_to_user(ce, e, sizeof(struct arpt_entry)))
- goto out;
-
- if (copy_to_user(&ce->counters, &counters[*i], sizeof(counters[*i])))
- goto out;
+ if (copy_to_user(ce, e, sizeof(struct arpt_entry)) != 0 ||
+ copy_to_user(&ce->counters, &counters[i],
+ sizeof(counters[i])) != 0)
+ return -EFAULT;
*dstptr += sizeof(struct compat_arpt_entry);
*size -= sizeof(struct arpt_entry) - sizeof(struct compat_arpt_entry);
@@ -1548,18 +1587,12 @@ static int compat_copy_entry_to_user(struct arpt_entry *e, void __user **dstptr,
t = arpt_get_target(e);
ret = xt_compat_target_to_user(t, dstptr, size);
if (ret)
- goto out;
- ret = -EFAULT;
+ return ret;
next_offset = e->next_offset - (origsize - *size);
- if (put_user(target_offset, &ce->target_offset))
- goto out;
- if (put_user(next_offset, &ce->next_offset))
- goto out;
-
- (*i)++;
+ if (put_user(target_offset, &ce->target_offset) != 0 ||
+ put_user(next_offset, &ce->next_offset) != 0)
+ return -EFAULT;
return 0;
-out:
- return ret;
}
static int compat_copy_entries_to_user(unsigned int total_size,
@@ -1573,6 +1606,7 @@ static int compat_copy_entries_to_user(unsigned int total_size,
int ret = 0;
void *loc_cpu_entry;
unsigned int i = 0;
+ struct arpt_entry *iter;
counters = alloc_counters(table);
if (IS_ERR(counters))
@@ -1582,15 +1616,18 @@ static int compat_copy_entries_to_user(unsigned int total_size,
loc_cpu_entry = private->entries[raw_smp_processor_id()];
pos = userptr;
size = total_size;
- ret = ARPT_ENTRY_ITERATE(loc_cpu_entry, total_size,
- compat_copy_entry_to_user,
- &pos, &size, counters, &i);
+ xt_entry_foreach(iter, loc_cpu_entry, total_size) {
+ ret = compat_copy_entry_to_user(iter, &pos,
+ &size, counters, i++);
+ if (ret != 0)
+ break;
+ }
vfree(counters);
return ret;
}
struct compat_arpt_get_entries {
- char name[ARPT_TABLE_MAXNAMELEN];
+ char name[XT_TABLE_MAXNAMELEN];
compat_uint_t size;
struct compat_arpt_entry entrytable[0];
};
@@ -1617,7 +1654,7 @@ static int compat_get_entries(struct net *net,
xt_compat_lock(NFPROTO_ARP);
t = xt_find_table_lock(net, NFPROTO_ARP, get.name);
- if (t && !IS_ERR(t)) {
+ if (!IS_ERR_OR_NULL(t)) {
const struct xt_table_info *private = t->private;
struct xt_table_info info;
@@ -1648,7 +1685,7 @@ static int compat_do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user,
{
int ret;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
return -EPERM;
switch (cmd) {
@@ -1669,7 +1706,7 @@ static int do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned
{
int ret;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
return -EPERM;
switch (cmd) {
@@ -1693,7 +1730,7 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
{
int ret;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
return -EPERM;
switch (cmd) {
@@ -1716,6 +1753,7 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
ret = -EFAULT;
break;
}
+ rev.name[sizeof(rev.name)-1] = 0;
try_then_request_module(xt_find_revision(NFPROTO_ARP, rev.name,
rev.revision, 1, &ret),
@@ -1731,13 +1769,13 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
return ret;
}
-struct xt_table *arpt_register_table(struct net *net, struct xt_table *table,
+struct xt_table *arpt_register_table(struct net *net,
+ const struct xt_table *table,
const struct arpt_replace *repl)
{
int ret;
struct xt_table_info *newinfo;
- struct xt_table_info bootstrap
- = { 0, 0, 0, { 0 }, { 0 }, { } };
+ struct xt_table_info bootstrap = {0};
void *loc_cpu_entry;
struct xt_table *new_table;
@@ -1751,12 +1789,7 @@ struct xt_table *arpt_register_table(struct net *net, struct xt_table *table,
loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
memcpy(loc_cpu_entry, repl->entries, repl->size);
- ret = translate_table(table->name, table->valid_hooks,
- newinfo, loc_cpu_entry, repl->size,
- repl->num_entries,
- repl->hook_entry,
- repl->underflow);
-
+ ret = translate_table(newinfo, loc_cpu_entry, repl);
duprintf("arpt_register_table: translate table gives %d\n", ret);
if (ret != 0)
goto out_free;
@@ -1779,35 +1812,37 @@ void arpt_unregister_table(struct xt_table *table)
struct xt_table_info *private;
void *loc_cpu_entry;
struct module *table_owner = table->me;
+ struct arpt_entry *iter;
private = xt_unregister_table(table);
/* Decrease module usage counts and free resources */
loc_cpu_entry = private->entries[raw_smp_processor_id()];
- ARPT_ENTRY_ITERATE(loc_cpu_entry, private->size,
- cleanup_entry, NULL);
+ xt_entry_foreach(iter, loc_cpu_entry, private->size)
+ cleanup_entry(iter);
if (private->number > private->initial_entries)
module_put(table_owner);
xt_free_table_info(private);
}
/* The built-in targets: standard (NULL) and error. */
-static struct xt_target arpt_standard_target __read_mostly = {
- .name = ARPT_STANDARD_TARGET,
- .targetsize = sizeof(int),
- .family = NFPROTO_ARP,
+static struct xt_target arpt_builtin_tg[] __read_mostly = {
+ {
+ .name = XT_STANDARD_TARGET,
+ .targetsize = sizeof(int),
+ .family = NFPROTO_ARP,
#ifdef CONFIG_COMPAT
- .compatsize = sizeof(compat_int_t),
- .compat_from_user = compat_standard_from_user,
- .compat_to_user = compat_standard_to_user,
+ .compatsize = sizeof(compat_int_t),
+ .compat_from_user = compat_standard_from_user,
+ .compat_to_user = compat_standard_to_user,
#endif
-};
-
-static struct xt_target arpt_error_target __read_mostly = {
- .name = ARPT_ERROR_TARGET,
- .target = arpt_error,
- .targetsize = ARPT_FUNCTION_MAXNAMELEN,
- .family = NFPROTO_ARP,
+ },
+ {
+ .name = XT_ERROR_TARGET,
+ .target = arpt_error,
+ .targetsize = XT_FUNCTION_MAXNAMELEN,
+ .family = NFPROTO_ARP,
+ },
};
static struct nf_sockopt_ops arpt_sockopts = {
@@ -1850,13 +1885,10 @@ static int __init arp_tables_init(void)
if (ret < 0)
goto err1;
- /* Noone else will be downing sem now, so we won't sleep */
- ret = xt_register_target(&arpt_standard_target);
+ /* No one else will be downing sem now, so we won't sleep */
+ ret = xt_register_targets(arpt_builtin_tg, ARRAY_SIZE(arpt_builtin_tg));
if (ret < 0)
goto err2;
- ret = xt_register_target(&arpt_error_target);
- if (ret < 0)
- goto err3;
/* Register setsockopt */
ret = nf_register_sockopt(&arpt_sockopts);
@@ -1867,9 +1899,7 @@ static int __init arp_tables_init(void)
return 0;
err4:
- xt_unregister_target(&arpt_error_target);
-err3:
- xt_unregister_target(&arpt_standard_target);
+ xt_unregister_targets(arpt_builtin_tg, ARRAY_SIZE(arpt_builtin_tg));
err2:
unregister_pernet_subsys(&arp_tables_net_ops);
err1:
@@ -1879,8 +1909,7 @@ err1:
static void __exit arp_tables_fini(void)
{
nf_unregister_sockopt(&arpt_sockopts);
- xt_unregister_target(&arpt_error_target);
- xt_unregister_target(&arpt_standard_target);
+ xt_unregister_targets(arpt_builtin_tg, ARRAY_SIZE(arpt_builtin_tg));
unregister_pernet_subsys(&arp_tables_net_ops);
}
diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c
index b0d5b1d0a76..a5e52a9f0a1 100644
--- a/net/ipv4/netfilter/arpt_mangle.c
+++ b/net/ipv4/netfilter/arpt_mangle.c
@@ -9,7 +9,7 @@ MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>");
MODULE_DESCRIPTION("arptables arp payload mangle target");
static unsigned int
-target(struct sk_buff *skb, const struct xt_target_param *par)
+target(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct arpt_mangle *mangle = par->targinfo;
const struct arphdr *arp;
@@ -54,18 +54,18 @@ target(struct sk_buff *skb, const struct xt_target_param *par)
return mangle->target;
}
-static bool checkentry(const struct xt_tgchk_param *par)
+static int checkentry(const struct xt_tgchk_param *par)
{
const struct arpt_mangle *mangle = par->targinfo;
if (mangle->flags & ~ARPT_MANGLE_MASK ||
!(mangle->flags & ARPT_MANGLE_MASK))
- return false;
+ return -EINVAL;
if (mangle->target != NF_DROP && mangle->target != NF_ACCEPT &&
- mangle->target != ARPT_CONTINUE)
- return false;
- return true;
+ mangle->target != XT_CONTINUE)
+ return -EINVAL;
+ return 0;
}
static struct xt_target arpt_mangle_reg __read_mostly = {
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index e091187e864..802ddecb30b 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -6,7 +6,9 @@
*/
#include <linux/module.h>
+#include <linux/netfilter/x_tables.h>
#include <linux/netfilter_arp/arp_tables.h>
+#include <linux/slab.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("David S. Miller <davem@redhat.com>");
@@ -15,98 +17,39 @@ MODULE_DESCRIPTION("arptables filter table");
#define FILTER_VALID_HOOKS ((1 << NF_ARP_IN) | (1 << NF_ARP_OUT) | \
(1 << NF_ARP_FORWARD))
-static struct
-{
- struct arpt_replace repl;
- struct arpt_standard entries[3];
- struct arpt_error term;
-} initial_table __net_initdata = {
- .repl = {
- .name = "filter",
- .valid_hooks = FILTER_VALID_HOOKS,
- .num_entries = 4,
- .size = sizeof(struct arpt_standard) * 3 + sizeof(struct arpt_error),
- .hook_entry = {
- [NF_ARP_IN] = 0,
- [NF_ARP_OUT] = sizeof(struct arpt_standard),
- [NF_ARP_FORWARD] = 2 * sizeof(struct arpt_standard),
- },
- .underflow = {
- [NF_ARP_IN] = 0,
- [NF_ARP_OUT] = sizeof(struct arpt_standard),
- [NF_ARP_FORWARD] = 2 * sizeof(struct arpt_standard),
- },
- },
- .entries = {
- ARPT_STANDARD_INIT(NF_ACCEPT), /* ARP_IN */
- ARPT_STANDARD_INIT(NF_ACCEPT), /* ARP_OUT */
- ARPT_STANDARD_INIT(NF_ACCEPT), /* ARP_FORWARD */
- },
- .term = ARPT_ERROR_INIT,
-};
-
-static struct xt_table packet_filter = {
+static const struct xt_table packet_filter = {
.name = "filter",
.valid_hooks = FILTER_VALID_HOOKS,
- .lock = __RW_LOCK_UNLOCKED(packet_filter.lock),
- .private = NULL,
.me = THIS_MODULE,
.af = NFPROTO_ARP,
+ .priority = NF_IP_PRI_FILTER,
};
/* The work comes in here from netfilter.c */
-static unsigned int arpt_in_hook(unsigned int hook,
- struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+static unsigned int
+arptable_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ const struct net_device *in, const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
{
- return arpt_do_table(skb, hook, in, out,
- dev_net(in)->ipv4.arptable_filter);
-}
+ const struct net *net = dev_net((in != NULL) ? in : out);
-static unsigned int arpt_out_hook(unsigned int hook,
- struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
-{
- return arpt_do_table(skb, hook, in, out,
- dev_net(out)->ipv4.arptable_filter);
+ return arpt_do_table(skb, ops->hooknum, in, out,
+ net->ipv4.arptable_filter);
}
-static struct nf_hook_ops arpt_ops[] __read_mostly = {
- {
- .hook = arpt_in_hook,
- .owner = THIS_MODULE,
- .pf = NFPROTO_ARP,
- .hooknum = NF_ARP_IN,
- .priority = NF_IP_PRI_FILTER,
- },
- {
- .hook = arpt_out_hook,
- .owner = THIS_MODULE,
- .pf = NFPROTO_ARP,
- .hooknum = NF_ARP_OUT,
- .priority = NF_IP_PRI_FILTER,
- },
- {
- .hook = arpt_in_hook,
- .owner = THIS_MODULE,
- .pf = NFPROTO_ARP,
- .hooknum = NF_ARP_FORWARD,
- .priority = NF_IP_PRI_FILTER,
- },
-};
+static struct nf_hook_ops *arpfilter_ops __read_mostly;
static int __net_init arptable_filter_net_init(struct net *net)
{
- /* Register table */
+ struct arpt_replace *repl;
+
+ repl = arpt_alloc_initial_table(&packet_filter);
+ if (repl == NULL)
+ return -ENOMEM;
net->ipv4.arptable_filter =
- arpt_register_table(net, &packet_filter, &initial_table.repl);
- if (IS_ERR(net->ipv4.arptable_filter))
- return PTR_ERR(net->ipv4.arptable_filter);
- return 0;
+ arpt_register_table(net, &packet_filter, repl);
+ kfree(repl);
+ return PTR_ERR_OR_ZERO(net->ipv4.arptable_filter);
}
static void __net_exit arptable_filter_net_exit(struct net *net)
@@ -127,9 +70,11 @@ static int __init arptable_filter_init(void)
if (ret < 0)
return ret;
- ret = nf_register_hooks(arpt_ops, ARRAY_SIZE(arpt_ops));
- if (ret < 0)
+ arpfilter_ops = xt_hook_link(&packet_filter, arptable_filter_hook);
+ if (IS_ERR(arpfilter_ops)) {
+ ret = PTR_ERR(arpfilter_ops);
goto cleanup_table;
+ }
return ret;
cleanup_table:
@@ -139,7 +84,7 @@ cleanup_table:
static void __exit arptable_filter_fini(void)
{
- nf_unregister_hooks(arpt_ops, ARRAY_SIZE(arpt_ops));
+ xt_hook_unlink(&packet_filter, arpfilter_ops);
unregister_pernet_subsys(&arptable_filter_net_ops);
}
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
deleted file mode 100644
index 432ce9d1c11..00000000000
--- a/net/ipv4/netfilter/ip_queue.c
+++ /dev/null
@@ -1,645 +0,0 @@
-/*
- * This is a module which is used for queueing IPv4 packets and
- * communicating with userspace via netlink.
- *
- * (C) 2000-2002 James Morris <jmorris@intercode.com.au>
- * (C) 2003-2005 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/init.h>
-#include <linux/ip.h>
-#include <linux/notifier.h>
-#include <linux/netdevice.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4/ip_queue.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netlink.h>
-#include <linux/spinlock.h>
-#include <linux/sysctl.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/security.h>
-#include <linux/mutex.h>
-#include <net/net_namespace.h>
-#include <net/sock.h>
-#include <net/route.h>
-#include <net/netfilter/nf_queue.h>
-#include <net/ip.h>
-
-#define IPQ_QMAX_DEFAULT 1024
-#define IPQ_PROC_FS_NAME "ip_queue"
-#define NET_IPQ_QMAX 2088
-#define NET_IPQ_QMAX_NAME "ip_queue_maxlen"
-
-typedef int (*ipq_cmpfn)(struct nf_queue_entry *, unsigned long);
-
-static unsigned char copy_mode __read_mostly = IPQ_COPY_NONE;
-static unsigned int queue_maxlen __read_mostly = IPQ_QMAX_DEFAULT;
-static DEFINE_RWLOCK(queue_lock);
-static int peer_pid __read_mostly;
-static unsigned int copy_range __read_mostly;
-static unsigned int queue_total;
-static unsigned int queue_dropped = 0;
-static unsigned int queue_user_dropped = 0;
-static struct sock *ipqnl __read_mostly;
-static LIST_HEAD(queue_list);
-static DEFINE_MUTEX(ipqnl_mutex);
-
-static inline void
-__ipq_enqueue_entry(struct nf_queue_entry *entry)
-{
- list_add_tail(&entry->list, &queue_list);
- queue_total++;
-}
-
-static inline int
-__ipq_set_mode(unsigned char mode, unsigned int range)
-{
- int status = 0;
-
- switch(mode) {
- case IPQ_COPY_NONE:
- case IPQ_COPY_META:
- copy_mode = mode;
- copy_range = 0;
- break;
-
- case IPQ_COPY_PACKET:
- copy_mode = mode;
- copy_range = range;
- if (copy_range > 0xFFFF)
- copy_range = 0xFFFF;
- break;
-
- default:
- status = -EINVAL;
-
- }
- return status;
-}
-
-static void __ipq_flush(ipq_cmpfn cmpfn, unsigned long data);
-
-static inline void
-__ipq_reset(void)
-{
- peer_pid = 0;
- net_disable_timestamp();
- __ipq_set_mode(IPQ_COPY_NONE, 0);
- __ipq_flush(NULL, 0);
-}
-
-static struct nf_queue_entry *
-ipq_find_dequeue_entry(unsigned long id)
-{
- struct nf_queue_entry *entry = NULL, *i;
-
- write_lock_bh(&queue_lock);
-
- list_for_each_entry(i, &queue_list, list) {
- if ((unsigned long)i == id) {
- entry = i;
- break;
- }
- }
-
- if (entry) {
- list_del(&entry->list);
- queue_total--;
- }
-
- write_unlock_bh(&queue_lock);
- return entry;
-}
-
-static void
-__ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
-{
- struct nf_queue_entry *entry, *next;
-
- list_for_each_entry_safe(entry, next, &queue_list, list) {
- if (!cmpfn || cmpfn(entry, data)) {
- list_del(&entry->list);
- queue_total--;
- nf_reinject(entry, NF_DROP);
- }
- }
-}
-
-static void
-ipq_flush(ipq_cmpfn cmpfn, unsigned long data)
-{
- write_lock_bh(&queue_lock);
- __ipq_flush(cmpfn, data);
- write_unlock_bh(&queue_lock);
-}
-
-static struct sk_buff *
-ipq_build_packet_message(struct nf_queue_entry *entry, int *errp)
-{
- sk_buff_data_t old_tail;
- size_t size = 0;
- size_t data_len = 0;
- struct sk_buff *skb;
- struct ipq_packet_msg *pmsg;
- struct nlmsghdr *nlh;
- struct timeval tv;
-
- read_lock_bh(&queue_lock);
-
- switch (copy_mode) {
- case IPQ_COPY_META:
- case IPQ_COPY_NONE:
- size = NLMSG_SPACE(sizeof(*pmsg));
- break;
-
- case IPQ_COPY_PACKET:
- if ((entry->skb->ip_summed == CHECKSUM_PARTIAL ||
- entry->skb->ip_summed == CHECKSUM_COMPLETE) &&
- (*errp = skb_checksum_help(entry->skb))) {
- read_unlock_bh(&queue_lock);
- return NULL;
- }
- if (copy_range == 0 || copy_range > entry->skb->len)
- data_len = entry->skb->len;
- else
- data_len = copy_range;
-
- size = NLMSG_SPACE(sizeof(*pmsg) + data_len);
- break;
-
- default:
- *errp = -EINVAL;
- read_unlock_bh(&queue_lock);
- return NULL;
- }
-
- read_unlock_bh(&queue_lock);
-
- skb = alloc_skb(size, GFP_ATOMIC);
- if (!skb)
- goto nlmsg_failure;
-
- old_tail = skb->tail;
- nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh));
- pmsg = NLMSG_DATA(nlh);
- memset(pmsg, 0, sizeof(*pmsg));
-
- pmsg->packet_id = (unsigned long )entry;
- pmsg->data_len = data_len;
- tv = ktime_to_timeval(entry->skb->tstamp);
- pmsg->timestamp_sec = tv.tv_sec;
- pmsg->timestamp_usec = tv.tv_usec;
- pmsg->mark = entry->skb->mark;
- pmsg->hook = entry->hook;
- pmsg->hw_protocol = entry->skb->protocol;
-
- if (entry->indev)
- strcpy(pmsg->indev_name, entry->indev->name);
- else
- pmsg->indev_name[0] = '\0';
-
- if (entry->outdev)
- strcpy(pmsg->outdev_name, entry->outdev->name);
- else
- pmsg->outdev_name[0] = '\0';
-
- if (entry->indev && entry->skb->dev) {
- pmsg->hw_type = entry->skb->dev->type;
- pmsg->hw_addrlen = dev_parse_header(entry->skb,
- pmsg->hw_addr);
- }
-
- if (data_len)
- if (skb_copy_bits(entry->skb, 0, pmsg->payload, data_len))
- BUG();
-
- nlh->nlmsg_len = skb->tail - old_tail;
- return skb;
-
-nlmsg_failure:
- *errp = -EINVAL;
- printk(KERN_ERR "ip_queue: error creating packet message\n");
- return NULL;
-}
-
-static int
-ipq_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
-{
- int status = -EINVAL;
- struct sk_buff *nskb;
-
- if (copy_mode == IPQ_COPY_NONE)
- return -EAGAIN;
-
- nskb = ipq_build_packet_message(entry, &status);
- if (nskb == NULL)
- return status;
-
- write_lock_bh(&queue_lock);
-
- if (!peer_pid)
- goto err_out_free_nskb;
-
- if (queue_total >= queue_maxlen) {
- queue_dropped++;
- status = -ENOSPC;
- if (net_ratelimit())
- printk (KERN_WARNING "ip_queue: full at %d entries, "
- "dropping packets(s). Dropped: %d\n", queue_total,
- queue_dropped);
- goto err_out_free_nskb;
- }
-
- /* netlink_unicast will either free the nskb or attach it to a socket */
- status = netlink_unicast(ipqnl, nskb, peer_pid, MSG_DONTWAIT);
- if (status < 0) {
- queue_user_dropped++;
- goto err_out_unlock;
- }
-
- __ipq_enqueue_entry(entry);
-
- write_unlock_bh(&queue_lock);
- return status;
-
-err_out_free_nskb:
- kfree_skb(nskb);
-
-err_out_unlock:
- write_unlock_bh(&queue_lock);
- return status;
-}
-
-static int
-ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct nf_queue_entry *e)
-{
- int diff;
- struct iphdr *user_iph = (struct iphdr *)v->payload;
- struct sk_buff *nskb;
-
- if (v->data_len < sizeof(*user_iph))
- return 0;
- diff = v->data_len - e->skb->len;
- if (diff < 0) {
- if (pskb_trim(e->skb, v->data_len))
- return -ENOMEM;
- } else if (diff > 0) {
- if (v->data_len > 0xFFFF)
- return -EINVAL;
- if (diff > skb_tailroom(e->skb)) {
- nskb = skb_copy_expand(e->skb, skb_headroom(e->skb),
- diff, GFP_ATOMIC);
- if (!nskb) {
- printk(KERN_WARNING "ip_queue: error "
- "in mangle, dropping packet\n");
- return -ENOMEM;
- }
- kfree_skb(e->skb);
- e->skb = nskb;
- }
- skb_put(e->skb, diff);
- }
- if (!skb_make_writable(e->skb, v->data_len))
- return -ENOMEM;
- skb_copy_to_linear_data(e->skb, v->payload, v->data_len);
- e->skb->ip_summed = CHECKSUM_NONE;
-
- return 0;
-}
-
-static int
-ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len)
-{
- struct nf_queue_entry *entry;
-
- if (vmsg->value > NF_MAX_VERDICT)
- return -EINVAL;
-
- entry = ipq_find_dequeue_entry(vmsg->id);
- if (entry == NULL)
- return -ENOENT;
- else {
- int verdict = vmsg->value;
-
- if (vmsg->data_len && vmsg->data_len == len)
- if (ipq_mangle_ipv4(vmsg, entry) < 0)
- verdict = NF_DROP;
-
- nf_reinject(entry, verdict);
- return 0;
- }
-}
-
-static int
-ipq_set_mode(unsigned char mode, unsigned int range)
-{
- int status;
-
- write_lock_bh(&queue_lock);
- status = __ipq_set_mode(mode, range);
- write_unlock_bh(&queue_lock);
- return status;
-}
-
-static int
-ipq_receive_peer(struct ipq_peer_msg *pmsg,
- unsigned char type, unsigned int len)
-{
- int status = 0;
-
- if (len < sizeof(*pmsg))
- return -EINVAL;
-
- switch (type) {
- case IPQM_MODE:
- status = ipq_set_mode(pmsg->msg.mode.value,
- pmsg->msg.mode.range);
- break;
-
- case IPQM_VERDICT:
- if (pmsg->msg.verdict.value > NF_MAX_VERDICT)
- status = -EINVAL;
- else
- status = ipq_set_verdict(&pmsg->msg.verdict,
- len - sizeof(*pmsg));
- break;
- default:
- status = -EINVAL;
- }
- return status;
-}
-
-static int
-dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex)
-{
- if (entry->indev)
- if (entry->indev->ifindex == ifindex)
- return 1;
- if (entry->outdev)
- if (entry->outdev->ifindex == ifindex)
- return 1;
-#ifdef CONFIG_BRIDGE_NETFILTER
- if (entry->skb->nf_bridge) {
- if (entry->skb->nf_bridge->physindev &&
- entry->skb->nf_bridge->physindev->ifindex == ifindex)
- return 1;
- if (entry->skb->nf_bridge->physoutdev &&
- entry->skb->nf_bridge->physoutdev->ifindex == ifindex)
- return 1;
- }
-#endif
- return 0;
-}
-
-static void
-ipq_dev_drop(int ifindex)
-{
- ipq_flush(dev_cmp, ifindex);
-}
-
-#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)
-
-static inline void
-__ipq_rcv_skb(struct sk_buff *skb)
-{
- int status, type, pid, flags, nlmsglen, skblen;
- struct nlmsghdr *nlh;
-
- skblen = skb->len;
- if (skblen < sizeof(*nlh))
- return;
-
- nlh = nlmsg_hdr(skb);
- nlmsglen = nlh->nlmsg_len;
- if (nlmsglen < sizeof(*nlh) || skblen < nlmsglen)
- return;
-
- pid = nlh->nlmsg_pid;
- flags = nlh->nlmsg_flags;
-
- if(pid <= 0 || !(flags & NLM_F_REQUEST) || flags & NLM_F_MULTI)
- RCV_SKB_FAIL(-EINVAL);
-
- if (flags & MSG_TRUNC)
- RCV_SKB_FAIL(-ECOMM);
-
- type = nlh->nlmsg_type;
- if (type < NLMSG_NOOP || type >= IPQM_MAX)
- RCV_SKB_FAIL(-EINVAL);
-
- if (type <= IPQM_BASE)
- return;
-
- if (security_netlink_recv(skb, CAP_NET_ADMIN))
- RCV_SKB_FAIL(-EPERM);
-
- write_lock_bh(&queue_lock);
-
- if (peer_pid) {
- if (peer_pid != pid) {
- write_unlock_bh(&queue_lock);
- RCV_SKB_FAIL(-EBUSY);
- }
- } else {
- net_enable_timestamp();
- peer_pid = pid;
- }
-
- write_unlock_bh(&queue_lock);
-
- status = ipq_receive_peer(NLMSG_DATA(nlh), type,
- nlmsglen - NLMSG_LENGTH(0));
- if (status < 0)
- RCV_SKB_FAIL(status);
-
- if (flags & NLM_F_ACK)
- netlink_ack(skb, nlh, 0);
- return;
-}
-
-static void
-ipq_rcv_skb(struct sk_buff *skb)
-{
- mutex_lock(&ipqnl_mutex);
- __ipq_rcv_skb(skb);
- mutex_unlock(&ipqnl_mutex);
-}
-
-static int
-ipq_rcv_dev_event(struct notifier_block *this,
- unsigned long event, void *ptr)
-{
- struct net_device *dev = ptr;
-
- if (!net_eq(dev_net(dev), &init_net))
- return NOTIFY_DONE;
-
- /* Drop any packets associated with the downed device */
- if (event == NETDEV_DOWN)
- ipq_dev_drop(dev->ifindex);
- return NOTIFY_DONE;
-}
-
-static struct notifier_block ipq_dev_notifier = {
- .notifier_call = ipq_rcv_dev_event,
-};
-
-static int
-ipq_rcv_nl_event(struct notifier_block *this,
- unsigned long event, void *ptr)
-{
- struct netlink_notify *n = ptr;
-
- if (event == NETLINK_URELEASE &&
- n->protocol == NETLINK_FIREWALL && n->pid) {
- write_lock_bh(&queue_lock);
- if ((n->net == &init_net) && (n->pid == peer_pid))
- __ipq_reset();
- write_unlock_bh(&queue_lock);
- }
- return NOTIFY_DONE;
-}
-
-static struct notifier_block ipq_nl_notifier = {
- .notifier_call = ipq_rcv_nl_event,
-};
-
-#ifdef CONFIG_SYSCTL
-static struct ctl_table_header *ipq_sysctl_header;
-
-static ctl_table ipq_table[] = {
- {
- .ctl_name = NET_IPQ_QMAX,
- .procname = NET_IPQ_QMAX_NAME,
- .data = &queue_maxlen,
- .maxlen = sizeof(queue_maxlen),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
- { .ctl_name = 0 }
-};
-#endif
-
-#ifdef CONFIG_PROC_FS
-static int ip_queue_show(struct seq_file *m, void *v)
-{
- read_lock_bh(&queue_lock);
-
- seq_printf(m,
- "Peer PID : %d\n"
- "Copy mode : %hu\n"
- "Copy range : %u\n"
- "Queue length : %u\n"
- "Queue max. length : %u\n"
- "Queue dropped : %u\n"
- "Netlink dropped : %u\n",
- peer_pid,
- copy_mode,
- copy_range,
- queue_total,
- queue_maxlen,
- queue_dropped,
- queue_user_dropped);
-
- read_unlock_bh(&queue_lock);
- return 0;
-}
-
-static int ip_queue_open(struct inode *inode, struct file *file)
-{
- return single_open(file, ip_queue_show, NULL);
-}
-
-static const struct file_operations ip_queue_proc_fops = {
- .open = ip_queue_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
- .owner = THIS_MODULE,
-};
-#endif
-
-static const struct nf_queue_handler nfqh = {
- .name = "ip_queue",
- .outfn = &ipq_enqueue_packet,
-};
-
-static int __init ip_queue_init(void)
-{
- int status = -ENOMEM;
- struct proc_dir_entry *proc __maybe_unused;
-
- netlink_register_notifier(&ipq_nl_notifier);
- ipqnl = netlink_kernel_create(&init_net, NETLINK_FIREWALL, 0,
- ipq_rcv_skb, NULL, THIS_MODULE);
- if (ipqnl == NULL) {
- printk(KERN_ERR "ip_queue: failed to create netlink socket\n");
- goto cleanup_netlink_notifier;
- }
-
-#ifdef CONFIG_PROC_FS
- proc = proc_create(IPQ_PROC_FS_NAME, 0, init_net.proc_net,
- &ip_queue_proc_fops);
- if (!proc) {
- printk(KERN_ERR "ip_queue: failed to create proc entry\n");
- goto cleanup_ipqnl;
- }
-#endif
- register_netdevice_notifier(&ipq_dev_notifier);
-#ifdef CONFIG_SYSCTL
- ipq_sysctl_header = register_sysctl_paths(net_ipv4_ctl_path, ipq_table);
-#endif
- status = nf_register_queue_handler(PF_INET, &nfqh);
- if (status < 0) {
- printk(KERN_ERR "ip_queue: failed to register queue handler\n");
- goto cleanup_sysctl;
- }
- return status;
-
-cleanup_sysctl:
-#ifdef CONFIG_SYSCTL
- unregister_sysctl_table(ipq_sysctl_header);
-#endif
- unregister_netdevice_notifier(&ipq_dev_notifier);
- proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
-cleanup_ipqnl: __maybe_unused
- netlink_kernel_release(ipqnl);
- mutex_lock(&ipqnl_mutex);
- mutex_unlock(&ipqnl_mutex);
-
-cleanup_netlink_notifier:
- netlink_unregister_notifier(&ipq_nl_notifier);
- return status;
-}
-
-static void __exit ip_queue_fini(void)
-{
- nf_unregister_queue_handlers(&nfqh);
- synchronize_net();
- ipq_flush(NULL, 0);
-
-#ifdef CONFIG_SYSCTL
- unregister_sysctl_table(ipq_sysctl_header);
-#endif
- unregister_netdevice_notifier(&ipq_dev_notifier);
- proc_net_remove(&init_net, IPQ_PROC_FS_NAME);
-
- netlink_kernel_release(ipqnl);
- mutex_lock(&ipqnl_mutex);
- mutex_unlock(&ipqnl_mutex);
-
- netlink_unregister_notifier(&ipq_nl_notifier);
-}
-
-MODULE_DESCRIPTION("IPv4 packet queue handler");
-MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
-MODULE_LICENSE("GPL");
-
-module_init(ip_queue_init);
-module_exit(ip_queue_fini);
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index ef8b6ca068b..99e810f8467 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -3,11 +3,13 @@
*
* Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
* Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org>
+ * Copyright (C) 2006-2010 Patrick McHardy <kaber@trash.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/cache.h>
#include <linux/capability.h>
#include <linux/skbuff.h>
@@ -27,6 +29,7 @@
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter_ipv4/ip_tables.h>
#include <net/netfilter/nf_log.h>
+#include "../../netfilter/xt_repldata.h"
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
@@ -37,24 +40,19 @@ MODULE_DESCRIPTION("IPv4 packet filter");
/*#define DEBUG_IP_FIREWALL_USER*/
#ifdef DEBUG_IP_FIREWALL
-#define dprintf(format, args...) printk(format , ## args)
+#define dprintf(format, args...) pr_info(format , ## args)
#else
#define dprintf(format, args...)
#endif
#ifdef DEBUG_IP_FIREWALL_USER
-#define duprintf(format, args...) printk(format , ## args)
+#define duprintf(format, args...) pr_info(format , ## args)
#else
#define duprintf(format, args...)
#endif
#ifdef CONFIG_NETFILTER_DEBUG
-#define IP_NF_ASSERT(x) \
-do { \
- if (!(x)) \
- printk("IP_NF_ASSERT: %s:%s:%u\n", \
- __func__, __FILE__, __LINE__); \
-} while(0)
+#define IP_NF_ASSERT(x) WARN_ON(!(x))
#else
#define IP_NF_ASSERT(x)
#endif
@@ -65,14 +63,11 @@ do { \
#define inline
#endif
-/*
- We keep a set of rules for each CPU, so we can avoid write-locking
- them in the softirq when updating the counters and therefore
- only need to read-lock in the softirq; doing a write_lock_bh() in user
- context stops packets coming through and allows user context to read
- the counters or update the rules.
-
- Hence the start of any table is given by get_table() below. */
+void *ipt_alloc_initial_table(const struct xt_table *info)
+{
+ return xt_alloc_initial_table(ipt, IPT);
+}
+EXPORT_SYMBOL_GPL(ipt_alloc_initial_table);
/* Returns whether matches rule or not. */
/* Performance critical - called for every packet */
@@ -83,15 +78,14 @@ ip_packet_match(const struct iphdr *ip,
const struct ipt_ip *ipinfo,
int isfrag)
{
- size_t i;
unsigned long ret;
#define FWINV(bool, invflg) ((bool) ^ !!(ipinfo->invflags & (invflg)))
if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
- IPT_INV_SRCIP)
- || FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
- IPT_INV_DSTIP)) {
+ IPT_INV_SRCIP) ||
+ FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
+ IPT_INV_DSTIP)) {
dprintf("Source or dest mismatch.\n");
dprintf("SRC: %pI4. Mask: %pI4. Target: %pI4.%s\n",
@@ -103,12 +97,7 @@ ip_packet_match(const struct iphdr *ip,
return false;
}
- /* Look for ifname matches; this should unroll nicely. */
- for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
- ret |= (((const unsigned long *)indev)[i]
- ^ ((const unsigned long *)ipinfo->iniface)[i])
- & ((const unsigned long *)ipinfo->iniface_mask)[i];
- }
+ ret = ifname_compare_aligned(indev, ipinfo->iniface, ipinfo->iniface_mask);
if (FWINV(ret != 0, IPT_INV_VIA_IN)) {
dprintf("VIA in mismatch (%s vs %s).%s\n",
@@ -117,11 +106,7 @@ ip_packet_match(const struct iphdr *ip,
return false;
}
- for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
- ret |= (((const unsigned long *)outdev)[i]
- ^ ((const unsigned long *)ipinfo->outiface)[i])
- & ((const unsigned long *)ipinfo->outiface_mask)[i];
- }
+ ret = ifname_compare_aligned(outdev, ipinfo->outiface, ipinfo->outiface_mask);
if (FWINV(ret != 0, IPT_INV_VIA_OUT)) {
dprintf("VIA out mismatch (%s vs %s).%s\n",
@@ -131,8 +116,8 @@ ip_packet_match(const struct iphdr *ip,
}
/* Check specific protocol */
- if (ipinfo->proto
- && FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) {
+ if (ipinfo->proto &&
+ FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) {
dprintf("Packet protocol %hi does not match %hi.%s\n",
ip->protocol, ipinfo->proto,
ipinfo->invflags&IPT_INV_PROTO ? " (INV)":"");
@@ -167,54 +152,38 @@ ip_checkentry(const struct ipt_ip *ip)
}
static unsigned int
-ipt_error(struct sk_buff *skb, const struct xt_target_param *par)
+ipt_error(struct sk_buff *skb, const struct xt_action_param *par)
{
- if (net_ratelimit())
- printk("ip_tables: error: `%s'\n",
- (const char *)par->targinfo);
+ net_info_ratelimited("error: `%s'\n", (const char *)par->targinfo);
return NF_DROP;
}
-/* Performance critical - called for every packet */
-static inline bool
-do_match(struct ipt_entry_match *m, const struct sk_buff *skb,
- struct xt_match_param *par)
-{
- par->match = m->u.kernel.match;
- par->matchinfo = m->data;
-
- /* Stop iteration if it doesn't match */
- if (!m->u.kernel.match->match(skb, par))
- return true;
- else
- return false;
-}
-
/* Performance critical */
static inline struct ipt_entry *
-get_entry(void *base, unsigned int offset)
+get_entry(const void *base, unsigned int offset)
{
return (struct ipt_entry *)(base + offset);
}
/* All zeroes == unconditional rule. */
/* Mildly perf critical (only if packet tracing is on) */
-static inline int
-unconditional(const struct ipt_ip *ip)
+static inline bool unconditional(const struct ipt_ip *ip)
{
- unsigned int i;
-
- for (i = 0; i < sizeof(*ip)/sizeof(__u32); i++)
- if (((__u32 *)ip)[i])
- return 0;
+ static const struct ipt_ip uncond;
- return 1;
+ return memcmp(ip, &uncond, sizeof(uncond)) == 0;
#undef FWINV
}
-#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
- defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
+/* for const-correctness */
+static inline const struct xt_entry_target *
+ipt_get_target_c(const struct ipt_entry *e)
+{
+ return ipt_get_target((struct ipt_entry *)e);
+}
+
+#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
static const char *const hooknames[] = {
[NF_INET_PRE_ROUTING] = "PREROUTING",
[NF_INET_LOCAL_IN] = "INPUT",
@@ -247,28 +216,28 @@ static struct nf_loginfo trace_loginfo = {
/* Mildly perf critical (only if packet tracing is on) */
static inline int
-get_chainname_rulenum(struct ipt_entry *s, struct ipt_entry *e,
- char *hookname, char **chainname,
- char **comment, unsigned int *rulenum)
+get_chainname_rulenum(const struct ipt_entry *s, const struct ipt_entry *e,
+ const char *hookname, const char **chainname,
+ const char **comment, unsigned int *rulenum)
{
- struct ipt_standard_target *t = (void *)ipt_get_target(s);
+ const struct xt_standard_target *t = (void *)ipt_get_target_c(s);
- if (strcmp(t->target.u.kernel.target->name, IPT_ERROR_TARGET) == 0) {
+ if (strcmp(t->target.u.kernel.target->name, XT_ERROR_TARGET) == 0) {
/* Head of user chain: ERROR target with chainname */
*chainname = t->target.data;
(*rulenum) = 0;
} else if (s == e) {
(*rulenum)++;
- if (s->target_offset == sizeof(struct ipt_entry)
- && strcmp(t->target.u.kernel.target->name,
- IPT_STANDARD_TARGET) == 0
- && t->verdict < 0
- && unconditional(&s->ip)) {
+ if (s->target_offset == sizeof(struct ipt_entry) &&
+ strcmp(t->target.u.kernel.target->name,
+ XT_STANDARD_TARGET) == 0 &&
+ t->verdict < 0 &&
+ unconditional(&s->ip)) {
/* Tail of chains: STANDARD target (return/policy) */
*comment = *chainname == hookname
- ? (char *)comments[NF_IP_TRACE_COMMENT_POLICY]
- : (char *)comments[NF_IP_TRACE_COMMENT_RETURN];
+ ? comments[NF_IP_TRACE_COMMENT_POLICY]
+ : comments[NF_IP_TRACE_COMMENT_RETURN];
}
return 1;
} else
@@ -277,36 +246,44 @@ get_chainname_rulenum(struct ipt_entry *s, struct ipt_entry *e,
return 0;
}
-static void trace_packet(struct sk_buff *skb,
+static void trace_packet(const struct sk_buff *skb,
unsigned int hook,
const struct net_device *in,
const struct net_device *out,
const char *tablename,
- struct xt_table_info *private,
- struct ipt_entry *e)
+ const struct xt_table_info *private,
+ const struct ipt_entry *e)
{
- void *table_base;
+ const void *table_base;
const struct ipt_entry *root;
- char *hookname, *chainname, *comment;
+ const char *hookname, *chainname, *comment;
+ const struct ipt_entry *iter;
unsigned int rulenum = 0;
+ struct net *net = dev_net(in ? in : out);
- table_base = (void *)private->entries[smp_processor_id()];
+ table_base = private->entries[smp_processor_id()];
root = get_entry(table_base, private->hook_entry[hook]);
- hookname = chainname = (char *)hooknames[hook];
- comment = (char *)comments[NF_IP_TRACE_COMMENT_RULE];
+ hookname = chainname = hooknames[hook];
+ comment = comments[NF_IP_TRACE_COMMENT_RULE];
- IPT_ENTRY_ITERATE(root,
- private->size - private->hook_entry[hook],
- get_chainname_rulenum,
- e, hookname, &chainname, &comment, &rulenum);
+ xt_entry_foreach(iter, root, private->size - private->hook_entry[hook])
+ if (get_chainname_rulenum(iter, e, hookname,
+ &chainname, &comment, &rulenum) != 0)
+ break;
- nf_log_packet(AF_INET, hook, skb, in, out, &trace_loginfo,
+ nf_log_packet(net, AF_INET, hook, skb, in, out, &trace_loginfo,
"TRACE: %s:%s:%s:%u ",
tablename, chainname, comment, rulenum);
}
#endif
+static inline __pure
+struct ipt_entry *ipt_next_entry(const struct ipt_entry *entry)
+{
+ return (void *)entry + entry->next_offset;
+}
+
/* Returns one of the generic firewall policies, like NF_ACCEPT. */
unsigned int
ipt_do_table(struct sk_buff *skb,
@@ -317,20 +294,18 @@ ipt_do_table(struct sk_buff *skb,
{
static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
const struct iphdr *ip;
- u_int16_t datalen;
- bool hotdrop = false;
/* Initializing verdict to NF_DROP keeps gcc happy. */
unsigned int verdict = NF_DROP;
const char *indev, *outdev;
- void *table_base;
- struct ipt_entry *e, *back;
- struct xt_table_info *private;
- struct xt_match_param mtpar;
- struct xt_target_param tgpar;
+ const void *table_base;
+ struct ipt_entry *e, **jumpstack;
+ unsigned int *stackptr, origptr, cpu;
+ const struct xt_table_info *private;
+ struct xt_action_param acpar;
+ unsigned int addend;
/* Initialization */
ip = ip_hdr(skb);
- datalen = skb->len - ip->ihl * 4;
indev = in ? in->name : nulldevname;
outdev = out ? out->name : nulldevname;
/* We handle fragments by dealing with the first fragment as
@@ -339,118 +314,126 @@ ipt_do_table(struct sk_buff *skb,
* things we don't know, ie. tcp syn flag or ports). If the
* rule is also a fragment-specific rule, non-fragments won't
* match it. */
- mtpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET;
- mtpar.thoff = ip_hdrlen(skb);
- mtpar.hotdrop = &hotdrop;
- mtpar.in = tgpar.in = in;
- mtpar.out = tgpar.out = out;
- mtpar.family = tgpar.family = NFPROTO_IPV4;
- tgpar.hooknum = hook;
-
- read_lock_bh(&table->lock);
+ acpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET;
+ acpar.thoff = ip_hdrlen(skb);
+ acpar.hotdrop = false;
+ acpar.in = in;
+ acpar.out = out;
+ acpar.family = NFPROTO_IPV4;
+ acpar.hooknum = hook;
+
IP_NF_ASSERT(table->valid_hooks & (1 << hook));
+ local_bh_disable();
+ addend = xt_write_recseq_begin();
private = table->private;
- table_base = (void *)private->entries[smp_processor_id()];
+ cpu = smp_processor_id();
+ /*
+ * Ensure we load private-> members after we've fetched the base
+ * pointer.
+ */
+ smp_read_barrier_depends();
+ table_base = private->entries[cpu];
+ jumpstack = (struct ipt_entry **)private->jumpstack[cpu];
+ stackptr = per_cpu_ptr(private->stackptr, cpu);
+ origptr = *stackptr;
+
e = get_entry(table_base, private->hook_entry[hook]);
- /* For return from builtin chain */
- back = get_entry(table_base, private->underflow[hook]);
+ pr_debug("Entering %s(hook %u); sp at %u (UF %p)\n",
+ table->name, hook, origptr,
+ get_entry(table_base, private->underflow[hook]));
do {
+ const struct xt_entry_target *t;
+ const struct xt_entry_match *ematch;
+
IP_NF_ASSERT(e);
- IP_NF_ASSERT(back);
- if (ip_packet_match(ip, indev, outdev,
- &e->ip, mtpar.fragoff)) {
- struct ipt_entry_target *t;
+ if (!ip_packet_match(ip, indev, outdev,
+ &e->ip, acpar.fragoff)) {
+ no_match:
+ e = ipt_next_entry(e);
+ continue;
+ }
- if (IPT_MATCH_ITERATE(e, do_match, skb, &mtpar) != 0)
+ xt_ematch_foreach(ematch, e) {
+ acpar.match = ematch->u.kernel.match;
+ acpar.matchinfo = ematch->data;
+ if (!acpar.match->match(skb, &acpar))
goto no_match;
+ }
- ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
+ ADD_COUNTER(e->counters, skb->len, 1);
- t = ipt_get_target(e);
- IP_NF_ASSERT(t->u.kernel.target);
+ t = ipt_get_target(e);
+ IP_NF_ASSERT(t->u.kernel.target);
-#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
- defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
- /* The packet is traced: log it */
- if (unlikely(skb->nf_trace))
- trace_packet(skb, hook, in, out,
- table->name, private, e);
+#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
+ /* The packet is traced: log it */
+ if (unlikely(skb->nf_trace))
+ trace_packet(skb, hook, in, out,
+ table->name, private, e);
#endif
- /* Standard target? */
- if (!t->u.kernel.target->target) {
- int v;
-
- v = ((struct ipt_standard_target *)t)->verdict;
- if (v < 0) {
- /* Pop from stack? */
- if (v != IPT_RETURN) {
- verdict = (unsigned)(-v) - 1;
- break;
- }
- e = back;
- back = get_entry(table_base,
- back->comefrom);
- continue;
+ /* Standard target? */
+ if (!t->u.kernel.target->target) {
+ int v;
+
+ v = ((struct xt_standard_target *)t)->verdict;
+ if (v < 0) {
+ /* Pop from stack? */
+ if (v != XT_RETURN) {
+ verdict = (unsigned int)(-v) - 1;
+ break;
}
- if (table_base + v != (void *)e + e->next_offset
- && !(e->ip.flags & IPT_F_GOTO)) {
- /* Save old back ptr in next entry */
- struct ipt_entry *next
- = (void *)e + e->next_offset;
- next->comefrom
- = (void *)back - table_base;
- /* set back pointer to next entry */
- back = next;
+ if (*stackptr <= origptr) {
+ e = get_entry(table_base,
+ private->underflow[hook]);
+ pr_debug("Underflow (this is normal) "
+ "to %p\n", e);
+ } else {
+ e = jumpstack[--*stackptr];
+ pr_debug("Pulled %p out from pos %u\n",
+ e, *stackptr);
+ e = ipt_next_entry(e);
}
-
- e = get_entry(table_base, v);
- } else {
- /* Targets which reenter must return
- abs. verdicts */
- tgpar.target = t->u.kernel.target;
- tgpar.targinfo = t->data;
-#ifdef CONFIG_NETFILTER_DEBUG
- ((struct ipt_entry *)table_base)->comefrom
- = 0xeeeeeeec;
-#endif
- verdict = t->u.kernel.target->target(skb,
- &tgpar);
-#ifdef CONFIG_NETFILTER_DEBUG
- if (((struct ipt_entry *)table_base)->comefrom
- != 0xeeeeeeec
- && verdict == IPT_CONTINUE) {
- printk("Target %s reentered!\n",
- t->u.kernel.target->name);
+ continue;
+ }
+ if (table_base + v != ipt_next_entry(e) &&
+ !(e->ip.flags & IPT_F_GOTO)) {
+ if (*stackptr >= private->stacksize) {
verdict = NF_DROP;
- }
- ((struct ipt_entry *)table_base)->comefrom
- = 0x57acc001;
-#endif
- /* Target might have changed stuff. */
- ip = ip_hdr(skb);
- datalen = skb->len - ip->ihl * 4;
-
- if (verdict == IPT_CONTINUE)
- e = (void *)e + e->next_offset;
- else
- /* Verdict */
break;
+ }
+ jumpstack[(*stackptr)++] = e;
+ pr_debug("Pushed %p into pos %u\n",
+ e, *stackptr - 1);
}
- } else {
- no_match:
- e = (void *)e + e->next_offset;
+ e = get_entry(table_base, v);
+ continue;
}
- } while (!hotdrop);
- read_unlock_bh(&table->lock);
+ acpar.target = t->u.kernel.target;
+ acpar.targinfo = t->data;
+
+ verdict = t->u.kernel.target->target(skb, &acpar);
+ /* Target might have changed stuff. */
+ ip = ip_hdr(skb);
+ if (verdict == XT_CONTINUE)
+ e = ipt_next_entry(e);
+ else
+ /* Verdict */
+ break;
+ } while (!acpar.hotdrop);
+ pr_debug("Exiting %s; resetting sp from %u to %u\n",
+ __func__, *stackptr, origptr);
+ *stackptr = origptr;
+ xt_write_recseq_end(addend);
+ local_bh_enable();
#ifdef DEBUG_ALLOW_ALL
return NF_ACCEPT;
#else
- if (hotdrop)
+ if (acpar.hotdrop)
return NF_DROP;
else return verdict;
#endif
@@ -459,7 +442,7 @@ ipt_do_table(struct sk_buff *skb,
/* Figures out from what hook each rule can be called: returns 0 if
there are loops. Puts hook bitmask in comefrom. */
static int
-mark_source_chains(struct xt_table_info *newinfo,
+mark_source_chains(const struct xt_table_info *newinfo,
unsigned int valid_hooks, void *entry0)
{
unsigned int hook;
@@ -477,26 +460,28 @@ mark_source_chains(struct xt_table_info *newinfo,
e->counters.pcnt = pos;
for (;;) {
- struct ipt_standard_target *t
- = (void *)ipt_get_target(e);
+ const struct xt_standard_target *t
+ = (void *)ipt_get_target_c(e);
int visited = e->comefrom & (1 << hook);
if (e->comefrom & (1 << NF_INET_NUMHOOKS)) {
- printk("iptables: loop hook %u pos %u %08X.\n",
+ pr_err("iptables: loop hook %u pos %u %08X.\n",
hook, pos, e->comefrom);
return 0;
}
e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS));
/* Unconditional return/END. */
- if ((e->target_offset == sizeof(struct ipt_entry)
- && (strcmp(t->target.u.user.name,
- IPT_STANDARD_TARGET) == 0)
- && t->verdict < 0
- && unconditional(&e->ip)) || visited) {
+ if ((e->target_offset == sizeof(struct ipt_entry) &&
+ (strcmp(t->target.u.user.name,
+ XT_STANDARD_TARGET) == 0) &&
+ t->verdict < 0 && unconditional(&e->ip)) ||
+ visited) {
unsigned int oldpos, size;
- if (t->verdict < -NF_MAX_VERDICT - 1) {
+ if ((strcmp(t->target.u.user.name,
+ XT_STANDARD_TARGET) == 0) &&
+ t->verdict < -NF_MAX_VERDICT - 1) {
duprintf("mark_source_chains: bad "
"negative verdict (%i)\n",
t->verdict);
@@ -538,8 +523,8 @@ mark_source_chains(struct xt_table_info *newinfo,
int newpos = t->verdict;
if (strcmp(t->target.u.user.name,
- IPT_STANDARD_TARGET) == 0
- && newpos >= 0) {
+ XT_STANDARD_TARGET) == 0 &&
+ newpos >= 0) {
if (newpos > newinfo->size -
sizeof(struct ipt_entry)) {
duprintf("mark_source_chains: "
@@ -566,38 +551,34 @@ mark_source_chains(struct xt_table_info *newinfo,
return 1;
}
-static int
-cleanup_match(struct ipt_entry_match *m, unsigned int *i)
+static void cleanup_match(struct xt_entry_match *m, struct net *net)
{
struct xt_mtdtor_param par;
- if (i && (*i)-- == 0)
- return 1;
-
+ par.net = net;
par.match = m->u.kernel.match;
par.matchinfo = m->data;
par.family = NFPROTO_IPV4;
if (par.match->destroy != NULL)
par.match->destroy(&par);
module_put(par.match->me);
- return 0;
}
static int
-check_entry(struct ipt_entry *e, const char *name)
+check_entry(const struct ipt_entry *e, const char *name)
{
- struct ipt_entry_target *t;
+ const struct xt_entry_target *t;
if (!ip_checkentry(&e->ip)) {
- duprintf("ip_tables: ip check failed %p %s.\n", e, name);
+ duprintf("ip check failed %p %s.\n", e, name);
return -EINVAL;
}
- if (e->target_offset + sizeof(struct ipt_entry_target) >
+ if (e->target_offset + sizeof(struct xt_entry_target) >
e->next_offset)
return -EINVAL;
- t = ipt_get_target(e);
+ t = ipt_get_target_c(e);
if (e->target_offset + t->u.target_size > e->next_offset)
return -EINVAL;
@@ -605,8 +586,7 @@ check_entry(struct ipt_entry *e, const char *name)
}
static int
-check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par,
- unsigned int *i)
+check_match(struct xt_entry_match *m, struct xt_mtchk_param *par)
{
const struct ipt_ip *ip = par->entryinfo;
int ret;
@@ -617,31 +597,27 @@ check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par,
ret = xt_check_match(par, m->u.match_size - sizeof(*m),
ip->proto, ip->invflags & IPT_INV_PROTO);
if (ret < 0) {
- duprintf("ip_tables: check failed for `%s'.\n",
- par.match->name);
+ duprintf("check failed for `%s'.\n", par->match->name);
return ret;
}
- ++*i;
return 0;
}
static int
-find_check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par,
- unsigned int *i)
+find_check_match(struct xt_entry_match *m, struct xt_mtchk_param *par)
{
struct xt_match *match;
int ret;
- match = try_then_request_module(xt_find_match(AF_INET, m->u.user.name,
- m->u.user.revision),
- "ipt_%s", m->u.user.name);
- if (IS_ERR(match) || !match) {
+ match = xt_request_find_match(NFPROTO_IPV4, m->u.user.name,
+ m->u.user.revision);
+ if (IS_ERR(match)) {
duprintf("find_check_match: `%s' not found\n", m->u.user.name);
- return match ? PTR_ERR(match) : -ENOENT;
+ return PTR_ERR(match);
}
m->u.kernel.match = match;
- ret = check_match(m, par, i);
+ ret = check_match(m, par);
if (ret)
goto err;
@@ -651,10 +627,11 @@ err:
return ret;
}
-static int check_target(struct ipt_entry *e, const char *name)
+static int check_target(struct ipt_entry *e, struct net *net, const char *name)
{
- struct ipt_entry_target *t = ipt_get_target(e);
+ struct xt_entry_target *t = ipt_get_target(e);
struct xt_tgchk_param par = {
+ .net = net,
.table = name,
.entryinfo = e,
.target = t->u.kernel.target,
@@ -667,7 +644,7 @@ static int check_target(struct ipt_entry *e, const char *name)
ret = xt_check_target(&par, t->u.target_size - sizeof(*t),
e->ip.proto, e->ip.invflags & IPT_INV_PROTO);
if (ret < 0) {
- duprintf("ip_tables: check failed for `%s'.\n",
+ duprintf("check failed for `%s'.\n",
t->u.kernel.target->name);
return ret;
}
@@ -675,72 +652,92 @@ static int check_target(struct ipt_entry *e, const char *name)
}
static int
-find_check_entry(struct ipt_entry *e, const char *name, unsigned int size,
- unsigned int *i)
+find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
+ unsigned int size)
{
- struct ipt_entry_target *t;
+ struct xt_entry_target *t;
struct xt_target *target;
int ret;
unsigned int j;
struct xt_mtchk_param mtpar;
+ struct xt_entry_match *ematch;
ret = check_entry(e, name);
if (ret)
return ret;
j = 0;
+ mtpar.net = net;
mtpar.table = name;
mtpar.entryinfo = &e->ip;
mtpar.hook_mask = e->comefrom;
mtpar.family = NFPROTO_IPV4;
- ret = IPT_MATCH_ITERATE(e, find_check_match, &mtpar, &j);
- if (ret != 0)
- goto cleanup_matches;
+ xt_ematch_foreach(ematch, e) {
+ ret = find_check_match(ematch, &mtpar);
+ if (ret != 0)
+ goto cleanup_matches;
+ ++j;
+ }
t = ipt_get_target(e);
- target = try_then_request_module(xt_find_target(AF_INET,
- t->u.user.name,
- t->u.user.revision),
- "ipt_%s", t->u.user.name);
- if (IS_ERR(target) || !target) {
+ target = xt_request_find_target(NFPROTO_IPV4, t->u.user.name,
+ t->u.user.revision);
+ if (IS_ERR(target)) {
duprintf("find_check_entry: `%s' not found\n", t->u.user.name);
- ret = target ? PTR_ERR(target) : -ENOENT;
+ ret = PTR_ERR(target);
goto cleanup_matches;
}
t->u.kernel.target = target;
- ret = check_target(e, name);
+ ret = check_target(e, net, name);
if (ret)
goto err;
-
- (*i)++;
return 0;
err:
module_put(t->u.kernel.target->me);
cleanup_matches:
- IPT_MATCH_ITERATE(e, cleanup_match, &j);
+ xt_ematch_foreach(ematch, e) {
+ if (j-- == 0)
+ break;
+ cleanup_match(ematch, net);
+ }
return ret;
}
+static bool check_underflow(const struct ipt_entry *e)
+{
+ const struct xt_entry_target *t;
+ unsigned int verdict;
+
+ if (!unconditional(&e->ip))
+ return false;
+ t = ipt_get_target_c(e);
+ if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0)
+ return false;
+ verdict = ((struct xt_standard_target *)t)->verdict;
+ verdict = -verdict - 1;
+ return verdict == NF_DROP || verdict == NF_ACCEPT;
+}
+
static int
check_entry_size_and_hooks(struct ipt_entry *e,
struct xt_table_info *newinfo,
- unsigned char *base,
- unsigned char *limit,
+ const unsigned char *base,
+ const unsigned char *limit,
const unsigned int *hook_entries,
const unsigned int *underflows,
- unsigned int *i)
+ unsigned int valid_hooks)
{
unsigned int h;
- if ((unsigned long)e % __alignof__(struct ipt_entry) != 0
- || (unsigned char *)e + sizeof(struct ipt_entry) >= limit) {
+ if ((unsigned long)e % __alignof__(struct ipt_entry) != 0 ||
+ (unsigned char *)e + sizeof(struct ipt_entry) >= limit) {
duprintf("Bad offset %p\n", e);
return -EINVAL;
}
if (e->next_offset
- < sizeof(struct ipt_entry) + sizeof(struct ipt_entry_target)) {
+ < sizeof(struct ipt_entry) + sizeof(struct xt_entry_target)) {
duprintf("checking: element %p size %u\n",
e, e->next_offset);
return -EINVAL;
@@ -748,62 +745,60 @@ check_entry_size_and_hooks(struct ipt_entry *e,
/* Check hooks & underflows */
for (h = 0; h < NF_INET_NUMHOOKS; h++) {
+ if (!(valid_hooks & (1 << h)))
+ continue;
if ((unsigned char *)e - base == hook_entries[h])
newinfo->hook_entry[h] = hook_entries[h];
- if ((unsigned char *)e - base == underflows[h])
+ if ((unsigned char *)e - base == underflows[h]) {
+ if (!check_underflow(e)) {
+ pr_err("Underflows must be unconditional and "
+ "use the STANDARD target with "
+ "ACCEPT/DROP\n");
+ return -EINVAL;
+ }
newinfo->underflow[h] = underflows[h];
+ }
}
- /* FIXME: underflows must be unconditional, standard verdicts
- < 0 (not IPT_RETURN). --RR */
-
/* Clear counters and comefrom */
e->counters = ((struct xt_counters) { 0, 0 });
e->comefrom = 0;
-
- (*i)++;
return 0;
}
-static int
-cleanup_entry(struct ipt_entry *e, unsigned int *i)
+static void
+cleanup_entry(struct ipt_entry *e, struct net *net)
{
struct xt_tgdtor_param par;
- struct ipt_entry_target *t;
-
- if (i && (*i)-- == 0)
- return 1;
+ struct xt_entry_target *t;
+ struct xt_entry_match *ematch;
/* Cleanup all matches */
- IPT_MATCH_ITERATE(e, cleanup_match, NULL);
+ xt_ematch_foreach(ematch, e)
+ cleanup_match(ematch, net);
t = ipt_get_target(e);
+ par.net = net;
par.target = t->u.kernel.target;
par.targinfo = t->data;
par.family = NFPROTO_IPV4;
if (par.target->destroy != NULL)
par.target->destroy(&par);
module_put(par.target->me);
- return 0;
}
/* Checks and translates the user-supplied table segment (held in
newinfo) */
static int
-translate_table(const char *name,
- unsigned int valid_hooks,
- struct xt_table_info *newinfo,
- void *entry0,
- unsigned int size,
- unsigned int number,
- const unsigned int *hook_entries,
- const unsigned int *underflows)
+translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
+ const struct ipt_replace *repl)
{
+ struct ipt_entry *iter;
unsigned int i;
- int ret;
+ int ret = 0;
- newinfo->size = size;
- newinfo->number = number;
+ newinfo->size = repl->size;
+ newinfo->number = repl->num_entries;
/* Init all hooks to impossible value. */
for (i = 0; i < NF_INET_NUMHOOKS; i++) {
@@ -814,49 +809,61 @@ translate_table(const char *name,
duprintf("translate_table: size %u\n", newinfo->size);
i = 0;
/* Walk through entries, checking offsets. */
- ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,
- check_entry_size_and_hooks,
- newinfo,
- entry0,
- entry0 + size,
- hook_entries, underflows, &i);
- if (ret != 0)
- return ret;
+ xt_entry_foreach(iter, entry0, newinfo->size) {
+ ret = check_entry_size_and_hooks(iter, newinfo, entry0,
+ entry0 + repl->size,
+ repl->hook_entry,
+ repl->underflow,
+ repl->valid_hooks);
+ if (ret != 0)
+ return ret;
+ ++i;
+ if (strcmp(ipt_get_target(iter)->u.user.name,
+ XT_ERROR_TARGET) == 0)
+ ++newinfo->stacksize;
+ }
- if (i != number) {
+ if (i != repl->num_entries) {
duprintf("translate_table: %u not %u entries\n",
- i, number);
+ i, repl->num_entries);
return -EINVAL;
}
/* Check hooks all assigned */
for (i = 0; i < NF_INET_NUMHOOKS; i++) {
/* Only hooks which are valid */
- if (!(valid_hooks & (1 << i)))
+ if (!(repl->valid_hooks & (1 << i)))
continue;
if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
duprintf("Invalid hook entry %u %u\n",
- i, hook_entries[i]);
+ i, repl->hook_entry[i]);
return -EINVAL;
}
if (newinfo->underflow[i] == 0xFFFFFFFF) {
duprintf("Invalid underflow %u %u\n",
- i, underflows[i]);
+ i, repl->underflow[i]);
return -EINVAL;
}
}
- if (!mark_source_chains(newinfo, valid_hooks, entry0))
+ if (!mark_source_chains(newinfo, repl->valid_hooks, entry0))
return -ELOOP;
/* Finally, each sanity check must pass */
i = 0;
- ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,
- find_check_entry, name, size, &i);
+ xt_entry_foreach(iter, entry0, newinfo->size) {
+ ret = find_check_entry(iter, net, repl->name, repl->size);
+ if (ret != 0)
+ break;
+ ++i;
+ }
if (ret != 0) {
- IPT_ENTRY_ITERATE(entry0, newinfo->size,
- cleanup_entry, &i);
+ xt_entry_foreach(iter, entry0, newinfo->size) {
+ if (i-- == 0)
+ break;
+ cleanup_entry(iter, net);
+ }
return ret;
}
@@ -869,64 +876,35 @@ translate_table(const char *name,
return ret;
}
-/* Gets counters. */
-static inline int
-add_entry_to_counter(const struct ipt_entry *e,
- struct xt_counters total[],
- unsigned int *i)
-{
- ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
-
- (*i)++;
- return 0;
-}
-
-static inline int
-set_entry_to_counter(const struct ipt_entry *e,
- struct ipt_counters total[],
- unsigned int *i)
-{
- SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
-
- (*i)++;
- return 0;
-}
-
static void
get_counters(const struct xt_table_info *t,
struct xt_counters counters[])
{
+ struct ipt_entry *iter;
unsigned int cpu;
unsigned int i;
- unsigned int curcpu;
-
- /* Instead of clearing (by a previous call to memset())
- * the counters and using adds, we set the counters
- * with data used by 'current' CPU
- * We dont care about preemption here.
- */
- curcpu = raw_smp_processor_id();
-
- i = 0;
- IPT_ENTRY_ITERATE(t->entries[curcpu],
- t->size,
- set_entry_to_counter,
- counters,
- &i);
for_each_possible_cpu(cpu) {
- if (cpu == curcpu)
- continue;
+ seqcount_t *s = &per_cpu(xt_recseq, cpu);
+
i = 0;
- IPT_ENTRY_ITERATE(t->entries[cpu],
- t->size,
- add_entry_to_counter,
- counters,
- &i);
+ xt_entry_foreach(iter, t->entries[cpu], t->size) {
+ u64 bcnt, pcnt;
+ unsigned int start;
+
+ do {
+ start = read_seqcount_begin(s);
+ bcnt = iter->counters.bcnt;
+ pcnt = iter->counters.pcnt;
+ } while (read_seqcount_retry(s, start));
+
+ ADD_COUNTER(counters[i], bcnt, pcnt);
+ ++i; /* macro does multi eval of i */
+ }
}
}
-static struct xt_counters * alloc_counters(struct xt_table *table)
+static struct xt_counters *alloc_counters(const struct xt_table *table)
{
unsigned int countersize;
struct xt_counters *counters;
@@ -936,26 +914,23 @@ static struct xt_counters * alloc_counters(struct xt_table *table)
(other than comefrom, which userspace doesn't care
about). */
countersize = sizeof(struct xt_counters) * private->number;
- counters = vmalloc_node(countersize, numa_node_id());
+ counters = vzalloc(countersize);
if (counters == NULL)
return ERR_PTR(-ENOMEM);
- /* First, sum counters... */
- write_lock_bh(&table->lock);
get_counters(private, counters);
- write_unlock_bh(&table->lock);
return counters;
}
static int
copy_entries_to_user(unsigned int total_size,
- struct xt_table *table,
+ const struct xt_table *table,
void __user *userptr)
{
unsigned int off, num;
- struct ipt_entry *e;
+ const struct ipt_entry *e;
struct xt_counters *counters;
const struct xt_table_info *private = table->private;
int ret = 0;
@@ -979,8 +954,8 @@ copy_entries_to_user(unsigned int total_size,
/* ... then go back and fix counters and names */
for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
unsigned int i;
- const struct ipt_entry_match *m;
- const struct ipt_entry_target *t;
+ const struct xt_entry_match *m;
+ const struct xt_entry_target *t;
e = (struct ipt_entry *)(loc_cpu_entry + off);
if (copy_to_user(userptr + off
@@ -997,7 +972,7 @@ copy_entries_to_user(unsigned int total_size,
m = (void *)e + i;
if (copy_to_user(userptr + off + i
- + offsetof(struct ipt_entry_match,
+ + offsetof(struct xt_entry_match,
u.user.name),
m->u.kernel.match->name,
strlen(m->u.kernel.match->name)+1)
@@ -1007,9 +982,9 @@ copy_entries_to_user(unsigned int total_size,
}
}
- t = ipt_get_target(e);
+ t = ipt_get_target_c(e);
if (copy_to_user(userptr + off + e->target_offset
- + offsetof(struct ipt_entry_target,
+ + offsetof(struct xt_entry_target,
u.user.name),
t->u.kernel.target->name,
strlen(t->u.kernel.target->name)+1) != 0) {
@@ -1024,7 +999,7 @@ copy_entries_to_user(unsigned int total_size,
}
#ifdef CONFIG_COMPAT
-static void compat_standard_from_user(void *dst, void *src)
+static void compat_standard_from_user(void *dst, const void *src)
{
int v = *(compat_int_t *)src;
@@ -1033,7 +1008,7 @@ static void compat_standard_from_user(void *dst, void *src)
memcpy(dst, &v, sizeof(v));
}
-static int compat_standard_to_user(void __user *dst, void *src)
+static int compat_standard_to_user(void __user *dst, const void *src)
{
compat_int_t cv = *(int *)src;
@@ -1042,25 +1017,20 @@ static int compat_standard_to_user(void __user *dst, void *src)
return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0;
}
-static inline int
-compat_calc_match(struct ipt_entry_match *m, int *size)
-{
- *size += xt_compat_match_offset(m->u.kernel.match);
- return 0;
-}
-
-static int compat_calc_entry(struct ipt_entry *e,
+static int compat_calc_entry(const struct ipt_entry *e,
const struct xt_table_info *info,
- void *base, struct xt_table_info *newinfo)
+ const void *base, struct xt_table_info *newinfo)
{
- struct ipt_entry_target *t;
+ const struct xt_entry_match *ematch;
+ const struct xt_entry_target *t;
unsigned int entry_offset;
int off, i, ret;
off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
entry_offset = (void *)e - base;
- IPT_MATCH_ITERATE(e, compat_calc_match, &off);
- t = ipt_get_target(e);
+ xt_ematch_foreach(ematch, e)
+ off += xt_compat_match_offset(ematch->u.kernel.match);
+ t = ipt_get_target_c(e);
off += xt_compat_target_offset(t->u.kernel.target);
newinfo->size -= off;
ret = xt_compat_add_offset(AF_INET, entry_offset, off);
@@ -1081,7 +1051,9 @@ static int compat_calc_entry(struct ipt_entry *e,
static int compat_table_info(const struct xt_table_info *info,
struct xt_table_info *newinfo)
{
+ struct ipt_entry *iter;
void *loc_cpu_entry;
+ int ret;
if (!newinfo || !info)
return -EINVAL;
@@ -1090,15 +1062,20 @@ static int compat_table_info(const struct xt_table_info *info,
memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
newinfo->initial_entries = 0;
loc_cpu_entry = info->entries[raw_smp_processor_id()];
- return IPT_ENTRY_ITERATE(loc_cpu_entry, info->size,
- compat_calc_entry, info, loc_cpu_entry,
- newinfo);
+ xt_compat_init_offsets(AF_INET, info->number);
+ xt_entry_foreach(iter, loc_cpu_entry, info->size) {
+ ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
+ if (ret != 0)
+ return ret;
+ }
+ return 0;
}
#endif
-static int get_info(struct net *net, void __user *user, int *len, int compat)
+static int get_info(struct net *net, void __user *user,
+ const int *len, int compat)
{
- char name[IPT_TABLE_MAXNAMELEN];
+ char name[XT_TABLE_MAXNAMELEN];
struct xt_table *t;
int ret;
@@ -1111,25 +1088,26 @@ static int get_info(struct net *net, void __user *user, int *len, int compat)
if (copy_from_user(name, user, sizeof(name)) != 0)
return -EFAULT;
- name[IPT_TABLE_MAXNAMELEN-1] = '\0';
+ name[XT_TABLE_MAXNAMELEN-1] = '\0';
#ifdef CONFIG_COMPAT
if (compat)
xt_compat_lock(AF_INET);
#endif
t = try_then_request_module(xt_find_table_lock(net, AF_INET, name),
"iptable_%s", name);
- if (t && !IS_ERR(t)) {
+ if (!IS_ERR_OR_NULL(t)) {
struct ipt_getinfo info;
const struct xt_table_info *private = t->private;
-
#ifdef CONFIG_COMPAT
+ struct xt_table_info tmp;
+
if (compat) {
- struct xt_table_info tmp;
ret = compat_table_info(private, &tmp);
xt_compat_flush_offsets(AF_INET);
private = &tmp;
}
#endif
+ memset(&info, 0, sizeof(info));
info.valid_hooks = t->valid_hooks;
memcpy(info.hook_entry, private->hook_entry,
sizeof(info.hook_entry));
@@ -1156,7 +1134,8 @@ static int get_info(struct net *net, void __user *user, int *len, int compat)
}
static int
-get_entries(struct net *net, struct ipt_get_entries __user *uptr, int *len)
+get_entries(struct net *net, struct ipt_get_entries __user *uptr,
+ const int *len)
{
int ret;
struct ipt_get_entries get;
@@ -1175,7 +1154,7 @@ get_entries(struct net *net, struct ipt_get_entries __user *uptr, int *len)
}
t = xt_find_table_lock(net, AF_INET, get.name);
- if (t && !IS_ERR(t)) {
+ if (!IS_ERR_OR_NULL(t)) {
const struct xt_table_info *private = t->private;
duprintf("t->private->number = %u\n", private->number);
if (get.size == private->size)
@@ -1204,9 +1183,10 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
struct xt_table_info *oldinfo;
struct xt_counters *counters;
void *loc_cpu_old_entry;
+ struct ipt_entry *iter;
ret = 0;
- counters = vmalloc(num_counters * sizeof(struct xt_counters));
+ counters = vzalloc(num_counters * sizeof(struct xt_counters));
if (!counters) {
ret = -ENOMEM;
goto out;
@@ -1214,7 +1194,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
t = try_then_request_module(xt_find_table_lock(net, AF_INET, name),
"iptable_%s", name);
- if (!t || IS_ERR(t)) {
+ if (IS_ERR_OR_NULL(t)) {
ret = t ? PTR_ERR(t) : -ENOENT;
goto free_newinfo_counters_untrans;
}
@@ -1241,16 +1221,20 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
(newinfo->number <= oldinfo->initial_entries))
module_put(t->me);
- /* Get the old counters. */
+ /* Get the old counters, and synchronize with replace */
get_counters(oldinfo, counters);
+
/* Decrease module usage counts and free resource */
loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
- IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,
- NULL);
+ xt_entry_foreach(iter, loc_cpu_old_entry, oldinfo->size)
+ cleanup_entry(iter, net);
+
xt_free_table_info(oldinfo);
if (copy_to_user(counters_ptr, counters,
- sizeof(struct xt_counters) * num_counters) != 0)
- ret = -EFAULT;
+ sizeof(struct xt_counters) * num_counters) != 0) {
+ /* Silent error, can't fail, new table is already in place */
+ net_warn_ratelimited("iptables: counters copy to user failed while replacing table\n");
+ }
vfree(counters);
xt_table_unlock(t);
return ret;
@@ -1265,12 +1249,13 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
}
static int
-do_replace(struct net *net, void __user *user, unsigned int len)
+do_replace(struct net *net, const void __user *user, unsigned int len)
{
int ret;
struct ipt_replace tmp;
struct xt_table_info *newinfo;
void *loc_cpu_entry;
+ struct ipt_entry *iter;
if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
return -EFAULT;
@@ -1278,6 +1263,7 @@ do_replace(struct net *net, void __user *user, unsigned int len)
/* overflow check */
if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
return -ENOMEM;
+ tmp.name[sizeof(tmp.name)-1] = 0;
newinfo = xt_alloc_table_info(tmp.size);
if (!newinfo)
@@ -1291,13 +1277,11 @@ do_replace(struct net *net, void __user *user, unsigned int len)
goto free_newinfo;
}
- ret = translate_table(tmp.name, tmp.valid_hooks,
- newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
- tmp.hook_entry, tmp.underflow);
+ ret = translate_table(net, newinfo, loc_cpu_entry, &tmp);
if (ret != 0)
goto free_newinfo;
- duprintf("ip_tables: Translated table\n");
+ duprintf("Translated table\n");
ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
tmp.num_counters, tmp.counters);
@@ -1306,38 +1290,18 @@ do_replace(struct net *net, void __user *user, unsigned int len)
return 0;
free_newinfo_untrans:
- IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL);
+ xt_entry_foreach(iter, loc_cpu_entry, newinfo->size)
+ cleanup_entry(iter, net);
free_newinfo:
xt_free_table_info(newinfo);
return ret;
}
-/* We're lazy, and add to the first CPU; overflow works its fey magic
- * and everything is OK. */
static int
-add_counter_to_entry(struct ipt_entry *e,
- const struct xt_counters addme[],
- unsigned int *i)
+do_add_counters(struct net *net, const void __user *user,
+ unsigned int len, int compat)
{
-#if 0
- duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n",
- *i,
- (long unsigned int)e->counters.pcnt,
- (long unsigned int)e->counters.bcnt,
- (long unsigned int)addme[*i].pcnt,
- (long unsigned int)addme[*i].bcnt);
-#endif
-
- ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
-
- (*i)++;
- return 0;
-}
-
-static int
-do_add_counters(struct net *net, void __user *user, unsigned int len, int compat)
-{
- unsigned int i;
+ unsigned int i, curcpu;
struct xt_counters_info tmp;
struct xt_counters *paddc;
unsigned int num_counters;
@@ -1348,6 +1312,8 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, int compat
const struct xt_table_info *private;
int ret = 0;
void *loc_cpu_entry;
+ struct ipt_entry *iter;
+ unsigned int addend;
#ifdef CONFIG_COMPAT
struct compat_xt_counters_info compat_tmp;
@@ -1378,7 +1344,7 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, int compat
if (len != size + num_counters * sizeof(struct xt_counters))
return -EINVAL;
- paddc = vmalloc_node(len - size, numa_node_id());
+ paddc = vmalloc(len - size);
if (!paddc)
return -ENOMEM;
@@ -1388,12 +1354,12 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, int compat
}
t = xt_find_table_lock(net, AF_INET, name);
- if (!t || IS_ERR(t)) {
+ if (IS_ERR_OR_NULL(t)) {
ret = t ? PTR_ERR(t) : -ENOENT;
goto free;
}
- write_lock_bh(&t->lock);
+ local_bh_disable();
private = t->private;
if (private->number != num_counters) {
ret = -EINVAL;
@@ -1402,14 +1368,16 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, int compat
i = 0;
/* Choose the copy that is on our node */
- loc_cpu_entry = private->entries[raw_smp_processor_id()];
- IPT_ENTRY_ITERATE(loc_cpu_entry,
- private->size,
- add_counter_to_entry,
- paddc,
- &i);
+ curcpu = smp_processor_id();
+ loc_cpu_entry = private->entries[curcpu];
+ addend = xt_write_recseq_begin();
+ xt_entry_foreach(iter, loc_cpu_entry, private->size) {
+ ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt);
+ ++i;
+ }
+ xt_write_recseq_end(addend);
unlock_up_free:
- write_unlock_bh(&t->lock);
+ local_bh_enable();
xt_table_unlock(t);
module_put(t->me);
free:
@@ -1420,130 +1388,109 @@ do_add_counters(struct net *net, void __user *user, unsigned int len, int compat
#ifdef CONFIG_COMPAT
struct compat_ipt_replace {
- char name[IPT_TABLE_MAXNAMELEN];
+ char name[XT_TABLE_MAXNAMELEN];
u32 valid_hooks;
u32 num_entries;
u32 size;
u32 hook_entry[NF_INET_NUMHOOKS];
u32 underflow[NF_INET_NUMHOOKS];
u32 num_counters;
- compat_uptr_t counters; /* struct ipt_counters * */
+ compat_uptr_t counters; /* struct xt_counters * */
struct compat_ipt_entry entries[0];
};
static int
compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr,
unsigned int *size, struct xt_counters *counters,
- unsigned int *i)
+ unsigned int i)
{
- struct ipt_entry_target *t;
+ struct xt_entry_target *t;
struct compat_ipt_entry __user *ce;
u_int16_t target_offset, next_offset;
compat_uint_t origsize;
- int ret;
+ const struct xt_entry_match *ematch;
+ int ret = 0;
- ret = -EFAULT;
origsize = *size;
ce = (struct compat_ipt_entry __user *)*dstptr;
- if (copy_to_user(ce, e, sizeof(struct ipt_entry)))
- goto out;
-
- if (copy_to_user(&ce->counters, &counters[*i], sizeof(counters[*i])))
- goto out;
+ if (copy_to_user(ce, e, sizeof(struct ipt_entry)) != 0 ||
+ copy_to_user(&ce->counters, &counters[i],
+ sizeof(counters[i])) != 0)
+ return -EFAULT;
*dstptr += sizeof(struct compat_ipt_entry);
*size -= sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
- ret = IPT_MATCH_ITERATE(e, xt_compat_match_to_user, dstptr, size);
+ xt_ematch_foreach(ematch, e) {
+ ret = xt_compat_match_to_user(ematch, dstptr, size);
+ if (ret != 0)
+ return ret;
+ }
target_offset = e->target_offset - (origsize - *size);
- if (ret)
- goto out;
t = ipt_get_target(e);
ret = xt_compat_target_to_user(t, dstptr, size);
if (ret)
- goto out;
- ret = -EFAULT;
+ return ret;
next_offset = e->next_offset - (origsize - *size);
- if (put_user(target_offset, &ce->target_offset))
- goto out;
- if (put_user(next_offset, &ce->next_offset))
- goto out;
-
- (*i)++;
+ if (put_user(target_offset, &ce->target_offset) != 0 ||
+ put_user(next_offset, &ce->next_offset) != 0)
+ return -EFAULT;
return 0;
-out:
- return ret;
}
static int
-compat_find_calc_match(struct ipt_entry_match *m,
+compat_find_calc_match(struct xt_entry_match *m,
const char *name,
const struct ipt_ip *ip,
unsigned int hookmask,
- int *size, unsigned int *i)
+ int *size)
{
struct xt_match *match;
- match = try_then_request_module(xt_find_match(AF_INET, m->u.user.name,
- m->u.user.revision),
- "ipt_%s", m->u.user.name);
- if (IS_ERR(match) || !match) {
+ match = xt_request_find_match(NFPROTO_IPV4, m->u.user.name,
+ m->u.user.revision);
+ if (IS_ERR(match)) {
duprintf("compat_check_calc_match: `%s' not found\n",
m->u.user.name);
- return match ? PTR_ERR(match) : -ENOENT;
+ return PTR_ERR(match);
}
m->u.kernel.match = match;
*size += xt_compat_match_offset(match);
-
- (*i)++;
- return 0;
-}
-
-static int
-compat_release_match(struct ipt_entry_match *m, unsigned int *i)
-{
- if (i && (*i)-- == 0)
- return 1;
-
- module_put(m->u.kernel.match->me);
return 0;
}
-static int
-compat_release_entry(struct compat_ipt_entry *e, unsigned int *i)
+static void compat_release_entry(struct compat_ipt_entry *e)
{
- struct ipt_entry_target *t;
-
- if (i && (*i)-- == 0)
- return 1;
+ struct xt_entry_target *t;
+ struct xt_entry_match *ematch;
/* Cleanup all matches */
- COMPAT_IPT_MATCH_ITERATE(e, compat_release_match, NULL);
+ xt_ematch_foreach(ematch, e)
+ module_put(ematch->u.kernel.match->me);
t = compat_ipt_get_target(e);
module_put(t->u.kernel.target->me);
- return 0;
}
static int
check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
struct xt_table_info *newinfo,
unsigned int *size,
- unsigned char *base,
- unsigned char *limit,
- unsigned int *hook_entries,
- unsigned int *underflows,
- unsigned int *i,
+ const unsigned char *base,
+ const unsigned char *limit,
+ const unsigned int *hook_entries,
+ const unsigned int *underflows,
const char *name)
{
- struct ipt_entry_target *t;
+ struct xt_entry_match *ematch;
+ struct xt_entry_target *t;
struct xt_target *target;
unsigned int entry_offset;
unsigned int j;
int ret, off, h;
duprintf("check_compat_entry_size_and_hooks %p\n", e);
- if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0
- || (unsigned char *)e + sizeof(struct compat_ipt_entry) >= limit) {
+ if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0 ||
+ (unsigned char *)e + sizeof(struct compat_ipt_entry) >= limit) {
duprintf("Bad offset %p, limit = %p\n", e, limit);
return -EINVAL;
}
@@ -1563,20 +1510,21 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
entry_offset = (void *)e - (void *)base;
j = 0;
- ret = COMPAT_IPT_MATCH_ITERATE(e, compat_find_calc_match, name,
- &e->ip, e->comefrom, &off, &j);
- if (ret != 0)
- goto release_matches;
+ xt_ematch_foreach(ematch, e) {
+ ret = compat_find_calc_match(ematch, name,
+ &e->ip, e->comefrom, &off);
+ if (ret != 0)
+ goto release_matches;
+ ++j;
+ }
t = compat_ipt_get_target(e);
- target = try_then_request_module(xt_find_target(AF_INET,
- t->u.user.name,
- t->u.user.revision),
- "ipt_%s", t->u.user.name);
- if (IS_ERR(target) || !target) {
+ target = xt_request_find_target(NFPROTO_IPV4, t->u.user.name,
+ t->u.user.revision);
+ if (IS_ERR(target)) {
duprintf("check_compat_entry_size_and_hooks: `%s' not found\n",
t->u.user.name);
- ret = target ? PTR_ERR(target) : -ENOENT;
+ ret = PTR_ERR(target);
goto release_matches;
}
t->u.kernel.target = target;
@@ -1598,14 +1546,16 @@ check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
/* Clear counters and comefrom */
memset(&e->counters, 0, sizeof(e->counters));
e->comefrom = 0;
-
- (*i)++;
return 0;
out:
module_put(t->u.kernel.target->me);
release_matches:
- IPT_MATCH_ITERATE(e, compat_release_match, &j);
+ xt_ematch_foreach(ematch, e) {
+ if (j-- == 0)
+ break;
+ module_put(ematch->u.kernel.match->me);
+ }
return ret;
}
@@ -1614,11 +1564,12 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr,
unsigned int *size, const char *name,
struct xt_table_info *newinfo, unsigned char *base)
{
- struct ipt_entry_target *t;
+ struct xt_entry_target *t;
struct xt_target *target;
struct ipt_entry *de;
unsigned int origsize;
int ret, h;
+ struct xt_entry_match *ematch;
ret = 0;
origsize = *size;
@@ -1629,10 +1580,11 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr,
*dstptr += sizeof(struct ipt_entry);
*size += sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
- ret = COMPAT_IPT_MATCH_ITERATE(e, xt_compat_match_from_user,
- dstptr, size);
- if (ret)
- return ret;
+ xt_ematch_foreach(ematch, e) {
+ ret = xt_compat_match_from_user(ematch, dstptr, size);
+ if (ret != 0)
+ return ret;
+ }
de->target_offset = e->target_offset - (origsize - *size);
t = compat_ipt_get_target(e);
target = t->u.kernel.target;
@@ -1649,36 +1601,43 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr,
}
static int
-compat_check_entry(struct ipt_entry *e, const char *name,
- unsigned int *i)
+compat_check_entry(struct ipt_entry *e, struct net *net, const char *name)
{
+ struct xt_entry_match *ematch;
struct xt_mtchk_param mtpar;
unsigned int j;
- int ret;
+ int ret = 0;
j = 0;
+ mtpar.net = net;
mtpar.table = name;
mtpar.entryinfo = &e->ip;
mtpar.hook_mask = e->comefrom;
mtpar.family = NFPROTO_IPV4;
- ret = IPT_MATCH_ITERATE(e, check_match, &mtpar, &j);
- if (ret)
- goto cleanup_matches;
+ xt_ematch_foreach(ematch, e) {
+ ret = check_match(ematch, &mtpar);
+ if (ret != 0)
+ goto cleanup_matches;
+ ++j;
+ }
- ret = check_target(e, name);
+ ret = check_target(e, net, name);
if (ret)
goto cleanup_matches;
-
- (*i)++;
return 0;
cleanup_matches:
- IPT_MATCH_ITERATE(e, cleanup_match, &j);
+ xt_ematch_foreach(ematch, e) {
+ if (j-- == 0)
+ break;
+ cleanup_match(ematch, net);
+ }
return ret;
}
static int
-translate_compat_table(const char *name,
+translate_compat_table(struct net *net,
+ const char *name,
unsigned int valid_hooks,
struct xt_table_info **pinfo,
void **pentry0,
@@ -1690,6 +1649,8 @@ translate_compat_table(const char *name,
unsigned int i, j;
struct xt_table_info *newinfo, *info;
void *pos, *entry0, *entry1;
+ struct compat_ipt_entry *iter0;
+ struct ipt_entry *iter1;
unsigned int size;
int ret;
@@ -1707,14 +1668,19 @@ translate_compat_table(const char *name,
duprintf("translate_compat_table: size %u\n", info->size);
j = 0;
xt_compat_lock(AF_INET);
+ xt_compat_init_offsets(AF_INET, number);
/* Walk through entries, checking offsets. */
- ret = COMPAT_IPT_ENTRY_ITERATE(entry0, total_size,
- check_compat_entry_size_and_hooks,
- info, &size, entry0,
- entry0 + total_size,
- hook_entries, underflows, &j, name);
- if (ret != 0)
- goto out_unlock;
+ xt_entry_foreach(iter0, entry0, total_size) {
+ ret = check_compat_entry_size_and_hooks(iter0, info, &size,
+ entry0,
+ entry0 + total_size,
+ hook_entries,
+ underflows,
+ name);
+ if (ret != 0)
+ goto out_unlock;
+ ++j;
+ }
ret = -EINVAL;
if (j != number) {
@@ -1753,9 +1719,12 @@ translate_compat_table(const char *name,
entry1 = newinfo->entries[raw_smp_processor_id()];
pos = entry1;
size = total_size;
- ret = COMPAT_IPT_ENTRY_ITERATE(entry0, total_size,
- compat_copy_entry_from_user,
- &pos, &size, name, newinfo, entry1);
+ xt_entry_foreach(iter0, entry0, total_size) {
+ ret = compat_copy_entry_from_user(iter0, &pos, &size,
+ name, newinfo, entry1);
+ if (ret != 0)
+ break;
+ }
xt_compat_flush_offsets(AF_INET);
xt_compat_unlock(AF_INET);
if (ret)
@@ -1766,13 +1735,35 @@ translate_compat_table(const char *name,
goto free_newinfo;
i = 0;
- ret = IPT_ENTRY_ITERATE(entry1, newinfo->size, compat_check_entry,
- name, &i);
+ xt_entry_foreach(iter1, entry1, newinfo->size) {
+ ret = compat_check_entry(iter1, net, name);
+ if (ret != 0)
+ break;
+ ++i;
+ if (strcmp(ipt_get_target(iter1)->u.user.name,
+ XT_ERROR_TARGET) == 0)
+ ++newinfo->stacksize;
+ }
if (ret) {
+ /*
+ * The first i matches need cleanup_entry (calls ->destroy)
+ * because they had called ->check already. The other j-i
+ * entries need only release.
+ */
+ int skip = i;
j -= i;
- COMPAT_IPT_ENTRY_ITERATE_CONTINUE(entry0, newinfo->size, i,
- compat_release_entry, &j);
- IPT_ENTRY_ITERATE(entry1, newinfo->size, cleanup_entry, &i);
+ xt_entry_foreach(iter0, entry0, newinfo->size) {
+ if (skip-- > 0)
+ continue;
+ if (j-- == 0)
+ break;
+ compat_release_entry(iter0);
+ }
+ xt_entry_foreach(iter1, entry1, newinfo->size) {
+ if (i-- == 0)
+ break;
+ cleanup_entry(iter1, net);
+ }
xt_free_table_info(newinfo);
return ret;
}
@@ -1790,7 +1781,11 @@ translate_compat_table(const char *name,
free_newinfo:
xt_free_table_info(newinfo);
out:
- COMPAT_IPT_ENTRY_ITERATE(entry0, total_size, compat_release_entry, &j);
+ xt_entry_foreach(iter0, entry0, total_size) {
+ if (j-- == 0)
+ break;
+ compat_release_entry(iter0);
+ }
return ret;
out_unlock:
xt_compat_flush_offsets(AF_INET);
@@ -1805,6 +1800,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
struct compat_ipt_replace tmp;
struct xt_table_info *newinfo;
void *loc_cpu_entry;
+ struct ipt_entry *iter;
if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
return -EFAULT;
@@ -1814,6 +1810,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
return -ENOMEM;
if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
return -ENOMEM;
+ tmp.name[sizeof(tmp.name)-1] = 0;
newinfo = xt_alloc_table_info(tmp.size);
if (!newinfo)
@@ -1827,7 +1824,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
goto free_newinfo;
}
- ret = translate_compat_table(tmp.name, tmp.valid_hooks,
+ ret = translate_compat_table(net, tmp.name, tmp.valid_hooks,
&newinfo, &loc_cpu_entry, tmp.size,
tmp.num_entries, tmp.hook_entry,
tmp.underflow);
@@ -1843,7 +1840,8 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
return 0;
free_newinfo_untrans:
- IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL);
+ xt_entry_foreach(iter, loc_cpu_entry, newinfo->size)
+ cleanup_entry(iter, net);
free_newinfo:
xt_free_table_info(newinfo);
return ret;
@@ -1855,7 +1853,7 @@ compat_do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user,
{
int ret;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
return -EPERM;
switch (cmd) {
@@ -1876,7 +1874,7 @@ compat_do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user,
}
struct compat_ipt_get_entries {
- char name[IPT_TABLE_MAXNAMELEN];
+ char name[XT_TABLE_MAXNAMELEN];
compat_uint_t size;
struct compat_ipt_entry entrytable[0];
};
@@ -1892,6 +1890,7 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
int ret = 0;
const void *loc_cpu_entry;
unsigned int i = 0;
+ struct ipt_entry *iter;
counters = alloc_counters(table);
if (IS_ERR(counters))
@@ -1904,9 +1903,12 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
loc_cpu_entry = private->entries[raw_smp_processor_id()];
pos = userptr;
size = total_size;
- ret = IPT_ENTRY_ITERATE(loc_cpu_entry, total_size,
- compat_copy_entry_to_user,
- &pos, &size, counters, &i);
+ xt_entry_foreach(iter, loc_cpu_entry, total_size) {
+ ret = compat_copy_entry_to_user(iter, &pos,
+ &size, counters, i++);
+ if (ret != 0)
+ break;
+ }
vfree(counters);
return ret;
@@ -1936,7 +1938,7 @@ compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr,
xt_compat_lock(AF_INET);
t = xt_find_table_lock(net, AF_INET, get.name);
- if (t && !IS_ERR(t)) {
+ if (!IS_ERR_OR_NULL(t)) {
const struct xt_table_info *private = t->private;
struct xt_table_info info;
duprintf("t->private->number = %u\n", private->number);
@@ -1966,7 +1968,7 @@ compat_do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
{
int ret;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
return -EPERM;
switch (cmd) {
@@ -1988,7 +1990,7 @@ do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
{
int ret;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
return -EPERM;
switch (cmd) {
@@ -2013,7 +2015,7 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
{
int ret;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
return -EPERM;
switch (cmd) {
@@ -2027,7 +2029,7 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
case IPT_SO_GET_REVISION_MATCH:
case IPT_SO_GET_REVISION_TARGET: {
- struct ipt_get_revision rev;
+ struct xt_get_revision rev;
int target;
if (*len != sizeof(rev)) {
@@ -2038,6 +2040,7 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
ret = -EFAULT;
break;
}
+ rev.name[sizeof(rev.name)-1] = 0;
if (cmd == IPT_SO_GET_REVISION_TARGET)
target = 1;
@@ -2059,13 +2062,13 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
return ret;
}
-struct xt_table *ipt_register_table(struct net *net, struct xt_table *table,
+struct xt_table *ipt_register_table(struct net *net,
+ const struct xt_table *table,
const struct ipt_replace *repl)
{
int ret;
struct xt_table_info *newinfo;
- struct xt_table_info bootstrap
- = { 0, 0, 0, { 0 }, { 0 }, { } };
+ struct xt_table_info bootstrap = {0};
void *loc_cpu_entry;
struct xt_table *new_table;
@@ -2079,11 +2082,7 @@ struct xt_table *ipt_register_table(struct net *net, struct xt_table *table,
loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
memcpy(loc_cpu_entry, repl->entries, repl->size);
- ret = translate_table(table->name, table->valid_hooks,
- newinfo, loc_cpu_entry, repl->size,
- repl->num_entries,
- repl->hook_entry,
- repl->underflow);
+ ret = translate_table(net, newinfo, loc_cpu_entry, repl);
if (ret != 0)
goto out_free;
@@ -2101,17 +2100,19 @@ out:
return ERR_PTR(ret);
}
-void ipt_unregister_table(struct xt_table *table)
+void ipt_unregister_table(struct net *net, struct xt_table *table)
{
struct xt_table_info *private;
void *loc_cpu_entry;
struct module *table_owner = table->me;
+ struct ipt_entry *iter;
private = xt_unregister_table(table);
/* Decrease module usage counts and free resources */
loc_cpu_entry = private->entries[raw_smp_processor_id()];
- IPT_ENTRY_ITERATE(loc_cpu_entry, private->size, cleanup_entry, NULL);
+ xt_entry_foreach(iter, loc_cpu_entry, private->size)
+ cleanup_entry(iter, net);
if (private->number > private->initial_entries)
module_put(table_owner);
xt_free_table_info(private);
@@ -2129,7 +2130,7 @@ icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
}
static bool
-icmp_match(const struct sk_buff *skb, const struct xt_match_param *par)
+icmp_match(const struct sk_buff *skb, struct xt_action_param *par)
{
const struct icmphdr *ic;
struct icmphdr _icmph;
@@ -2145,7 +2146,7 @@ icmp_match(const struct sk_buff *skb, const struct xt_match_param *par)
* can't. Hence, no choice but to drop.
*/
duprintf("Dropping evil ICMP tinygram.\n");
- *par->hotdrop = true;
+ par->hotdrop = true;
return false;
}
@@ -2156,31 +2157,31 @@ icmp_match(const struct sk_buff *skb, const struct xt_match_param *par)
!!(icmpinfo->invflags&IPT_ICMP_INV));
}
-static bool icmp_checkentry(const struct xt_mtchk_param *par)
+static int icmp_checkentry(const struct xt_mtchk_param *par)
{
const struct ipt_icmp *icmpinfo = par->matchinfo;
/* Must specify no unknown invflags */
- return !(icmpinfo->invflags & ~IPT_ICMP_INV);
+ return (icmpinfo->invflags & ~IPT_ICMP_INV) ? -EINVAL : 0;
}
-/* The built-in targets: standard (NULL) and error. */
-static struct xt_target ipt_standard_target __read_mostly = {
- .name = IPT_STANDARD_TARGET,
- .targetsize = sizeof(int),
- .family = AF_INET,
+static struct xt_target ipt_builtin_tg[] __read_mostly = {
+ {
+ .name = XT_STANDARD_TARGET,
+ .targetsize = sizeof(int),
+ .family = NFPROTO_IPV4,
#ifdef CONFIG_COMPAT
- .compatsize = sizeof(compat_int_t),
- .compat_from_user = compat_standard_from_user,
- .compat_to_user = compat_standard_to_user,
+ .compatsize = sizeof(compat_int_t),
+ .compat_from_user = compat_standard_from_user,
+ .compat_to_user = compat_standard_to_user,
#endif
-};
-
-static struct xt_target ipt_error_target __read_mostly = {
- .name = IPT_ERROR_TARGET,
- .target = ipt_error,
- .targetsize = IPT_FUNCTION_MAXNAMELEN,
- .family = AF_INET,
+ },
+ {
+ .name = XT_ERROR_TARGET,
+ .target = ipt_error,
+ .targetsize = XT_FUNCTION_MAXNAMELEN,
+ .family = NFPROTO_IPV4,
+ },
};
static struct nf_sockopt_ops ipt_sockopts = {
@@ -2200,23 +2201,25 @@ static struct nf_sockopt_ops ipt_sockopts = {
.owner = THIS_MODULE,
};
-static struct xt_match icmp_matchstruct __read_mostly = {
- .name = "icmp",
- .match = icmp_match,
- .matchsize = sizeof(struct ipt_icmp),
- .checkentry = icmp_checkentry,
- .proto = IPPROTO_ICMP,
- .family = AF_INET,
+static struct xt_match ipt_builtin_mt[] __read_mostly = {
+ {
+ .name = "icmp",
+ .match = icmp_match,
+ .matchsize = sizeof(struct ipt_icmp),
+ .checkentry = icmp_checkentry,
+ .proto = IPPROTO_ICMP,
+ .family = NFPROTO_IPV4,
+ },
};
static int __net_init ip_tables_net_init(struct net *net)
{
- return xt_proto_init(net, AF_INET);
+ return xt_proto_init(net, NFPROTO_IPV4);
}
static void __net_exit ip_tables_net_exit(struct net *net)
{
- xt_proto_fini(net, AF_INET);
+ xt_proto_fini(net, NFPROTO_IPV4);
}
static struct pernet_operations ip_tables_net_ops = {
@@ -2232,14 +2235,11 @@ static int __init ip_tables_init(void)
if (ret < 0)
goto err1;
- /* Noone else will be downing sem now, so we won't sleep */
- ret = xt_register_target(&ipt_standard_target);
+ /* No one else will be downing sem now, so we won't sleep */
+ ret = xt_register_targets(ipt_builtin_tg, ARRAY_SIZE(ipt_builtin_tg));
if (ret < 0)
goto err2;
- ret = xt_register_target(&ipt_error_target);
- if (ret < 0)
- goto err3;
- ret = xt_register_match(&icmp_matchstruct);
+ ret = xt_register_matches(ipt_builtin_mt, ARRAY_SIZE(ipt_builtin_mt));
if (ret < 0)
goto err4;
@@ -2248,15 +2248,13 @@ static int __init ip_tables_init(void)
if (ret < 0)
goto err5;
- printk(KERN_INFO "ip_tables: (C) 2000-2006 Netfilter Core Team\n");
+ pr_info("(C) 2000-2006 Netfilter Core Team\n");
return 0;
err5:
- xt_unregister_match(&icmp_matchstruct);
+ xt_unregister_matches(ipt_builtin_mt, ARRAY_SIZE(ipt_builtin_mt));
err4:
- xt_unregister_target(&ipt_error_target);
-err3:
- xt_unregister_target(&ipt_standard_target);
+ xt_unregister_targets(ipt_builtin_tg, ARRAY_SIZE(ipt_builtin_tg));
err2:
unregister_pernet_subsys(&ip_tables_net_ops);
err1:
@@ -2267,10 +2265,8 @@ static void __exit ip_tables_fini(void)
{
nf_unregister_sockopt(&ipt_sockopts);
- xt_unregister_match(&icmp_matchstruct);
- xt_unregister_target(&ipt_error_target);
- xt_unregister_target(&ipt_standard_target);
-
+ xt_unregister_matches(ipt_builtin_mt, ARRAY_SIZE(ipt_builtin_mt));
+ xt_unregister_targets(ipt_builtin_tg, ARRAY_SIZE(ipt_builtin_tg));
unregister_pernet_subsys(&ip_tables_net_ops);
}
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 2e4f98b8552..2510c02c2d2 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -9,11 +9,13 @@
* published by the Free Software Foundation.
*
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
#include <linux/proc_fs.h>
#include <linux/jhash.h>
#include <linux/bitops.h>
#include <linux/skbuff.h>
+#include <linux/slab.h>
#include <linux/ip.h>
#include <linux/tcp.h>
#include <linux/udp.h>
@@ -26,7 +28,9 @@
#include <linux/netfilter_ipv4/ipt_CLUSTERIP.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/net_namespace.h>
+#include <net/netns/generic.h>
#include <net/checksum.h>
+#include <net/ip.h>
#define CLUSTERIP_VERSION "0.8"
@@ -51,17 +55,24 @@ struct clusterip_config {
#endif
enum clusterip_hashmode hash_mode; /* which hashing mode */
u_int32_t hash_initval; /* hash initialization */
+ struct rcu_head rcu;
};
-static LIST_HEAD(clusterip_configs);
+#ifdef CONFIG_PROC_FS
+static const struct file_operations clusterip_proc_fops;
+#endif
-/* clusterip_lock protects the clusterip_configs list */
-static DEFINE_RWLOCK(clusterip_lock);
+static int clusterip_net_id __read_mostly;
+
+struct clusterip_net {
+ struct list_head configs;
+ /* lock protects the configs list */
+ spinlock_t lock;
#ifdef CONFIG_PROC_FS
-static const struct file_operations clusterip_proc_fops;
-static struct proc_dir_entry *clusterip_procdir;
+ struct proc_dir_entry *procdir;
#endif
+};
static inline void
clusterip_config_get(struct clusterip_config *c)
@@ -69,11 +80,17 @@ clusterip_config_get(struct clusterip_config *c)
atomic_inc(&c->refcount);
}
+
+static void clusterip_config_rcu_free(struct rcu_head *head)
+{
+ kfree(container_of(head, struct clusterip_config, rcu));
+}
+
static inline void
clusterip_config_put(struct clusterip_config *c)
{
if (atomic_dec_and_test(&c->refcount))
- kfree(c);
+ call_rcu_bh(&c->rcu, clusterip_config_rcu_free);
}
/* decrease the count of entries using/referencing this config. If last
@@ -82,31 +99,36 @@ clusterip_config_put(struct clusterip_config *c)
static inline void
clusterip_config_entry_put(struct clusterip_config *c)
{
- write_lock_bh(&clusterip_lock);
- if (atomic_dec_and_test(&c->entries)) {
- list_del(&c->list);
- write_unlock_bh(&clusterip_lock);
+ struct net *net = dev_net(c->dev);
+ struct clusterip_net *cn = net_generic(net, clusterip_net_id);
+
+ local_bh_disable();
+ if (atomic_dec_and_lock(&c->entries, &cn->lock)) {
+ list_del_rcu(&c->list);
+ spin_unlock(&cn->lock);
+ local_bh_enable();
- dev_mc_delete(c->dev, c->clustermac, ETH_ALEN, 0);
+ dev_mc_del(c->dev, c->clustermac);
dev_put(c->dev);
/* In case anyone still accesses the file, the open/close
* functions are also incrementing the refcount on their own,
* so it's safe to remove the entry even if it's in use. */
#ifdef CONFIG_PROC_FS
- remove_proc_entry(c->pde->name, c->pde->parent);
+ proc_remove(c->pde);
#endif
return;
}
- write_unlock_bh(&clusterip_lock);
+ local_bh_enable();
}
static struct clusterip_config *
-__clusterip_config_find(__be32 clusterip)
+__clusterip_config_find(struct net *net, __be32 clusterip)
{
struct clusterip_config *c;
+ struct clusterip_net *cn = net_generic(net, clusterip_net_id);
- list_for_each_entry(c, &clusterip_configs, list) {
+ list_for_each_entry_rcu(c, &cn->configs, list) {
if (c->clusterip == clusterip)
return c;
}
@@ -115,20 +137,19 @@ __clusterip_config_find(__be32 clusterip)
}
static inline struct clusterip_config *
-clusterip_config_find_get(__be32 clusterip, int entry)
+clusterip_config_find_get(struct net *net, __be32 clusterip, int entry)
{
struct clusterip_config *c;
- read_lock_bh(&clusterip_lock);
- c = __clusterip_config_find(clusterip);
- if (!c) {
- read_unlock_bh(&clusterip_lock);
- return NULL;
+ rcu_read_lock_bh();
+ c = __clusterip_config_find(net, clusterip);
+ if (c) {
+ if (unlikely(!atomic_inc_not_zero(&c->refcount)))
+ c = NULL;
+ else if (entry)
+ atomic_inc(&c->entries);
}
- atomic_inc(&c->refcount);
- if (entry)
- atomic_inc(&c->entries);
- read_unlock_bh(&clusterip_lock);
+ rcu_read_unlock_bh();
return c;
}
@@ -148,6 +169,7 @@ clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip,
struct net_device *dev)
{
struct clusterip_config *c;
+ struct clusterip_net *cn = net_generic(dev_net(dev), clusterip_net_id);
c = kzalloc(sizeof(*c), GFP_ATOMIC);
if (!c)
@@ -170,7 +192,7 @@ clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip,
/* create proc dir entry */
sprintf(buffer, "%pI4", &ip);
c->pde = proc_create_data(buffer, S_IWUSR|S_IRUSR,
- clusterip_procdir,
+ cn->procdir,
&clusterip_proc_fops, c);
if (!c->pde) {
kfree(c);
@@ -179,9 +201,9 @@ clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip,
}
#endif
- write_lock_bh(&clusterip_lock);
- list_add(&c->list, &clusterip_configs);
- write_unlock_bh(&clusterip_lock);
+ spin_lock_bh(&cn->lock);
+ list_add_rcu(&c->list, &cn->configs);
+ spin_unlock_bh(&cn->lock);
return c;
}
@@ -222,25 +244,21 @@ clusterip_hashfn(const struct sk_buff *skb,
{
const struct iphdr *iph = ip_hdr(skb);
unsigned long hashval;
- u_int16_t sport, dport;
- const u_int16_t *ports;
-
- switch (iph->protocol) {
- case IPPROTO_TCP:
- case IPPROTO_UDP:
- case IPPROTO_UDPLITE:
- case IPPROTO_SCTP:
- case IPPROTO_DCCP:
- case IPPROTO_ICMP:
- ports = (const void *)iph+iph->ihl*4;
- sport = ports[0];
- dport = ports[1];
- break;
- default:
- if (net_ratelimit())
- printk(KERN_NOTICE "CLUSTERIP: unknown protocol `%u'\n",
- iph->protocol);
- sport = dport = 0;
+ u_int16_t sport = 0, dport = 0;
+ int poff;
+
+ poff = proto_ports_offset(iph->protocol);
+ if (poff >= 0) {
+ const u_int16_t *ports;
+ u16 _ports[2];
+
+ ports = skb_header_pointer(skb, iph->ihl * 4 + poff, 4, _ports);
+ if (ports) {
+ sport = ports[0];
+ dport = ports[1];
+ }
+ } else {
+ net_info_ratelimited("unknown protocol %u\n", iph->protocol);
}
switch (config->hash_mode) {
@@ -261,7 +279,7 @@ clusterip_hashfn(const struct sk_buff *skb,
hashval = 0;
/* This cannot happen, unless the check function wasn't called
* at rule load time */
- printk("CLUSTERIP: unknown mode `%u'\n", config->hash_mode);
+ pr_info("unknown mode %u\n", config->hash_mode);
BUG();
break;
}
@@ -281,7 +299,7 @@ clusterip_responsible(const struct clusterip_config *config, u_int32_t hash)
***********************************************************************/
static unsigned int
-clusterip_tg(struct sk_buff *skb, const struct xt_target_param *par)
+clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct ipt_clusterip_tgt_info *cipinfo = par->targinfo;
struct nf_conn *ct;
@@ -293,19 +311,14 @@ clusterip_tg(struct sk_buff *skb, const struct xt_target_param *par)
* that the ->target() function isn't called after ->destroy() */
ct = nf_ct_get(skb, &ctinfo);
- if (ct == NULL) {
- printk(KERN_ERR "CLUSTERIP: no conntrack!\n");
- /* FIXME: need to drop invalid ones, since replies
- * to outgoing connections of other nodes will be
- * marked as INVALID */
+ if (ct == NULL)
return NF_DROP;
- }
/* special case: ICMP error handling. conntrack distinguishes between
* error messages (RELATED) and information requests (see below) */
- if (ip_hdr(skb)->protocol == IPPROTO_ICMP
- && (ctinfo == IP_CT_RELATED
- || ctinfo == IP_CT_RELATED+IP_CT_IS_REPLY))
+ if (ip_hdr(skb)->protocol == IPPROTO_ICMP &&
+ (ctinfo == IP_CT_RELATED ||
+ ctinfo == IP_CT_RELATED_REPLY))
return XT_CONTINUE;
/* ip_conntrack_icmp guarantees us that we only have ICMP_ECHO,
@@ -315,19 +328,19 @@ clusterip_tg(struct sk_buff *skb, const struct xt_target_param *par)
hash = clusterip_hashfn(skb, cipinfo->config);
switch (ctinfo) {
- case IP_CT_NEW:
- ct->mark = hash;
- break;
- case IP_CT_RELATED:
- case IP_CT_RELATED+IP_CT_IS_REPLY:
- /* FIXME: we don't handle expectations at the
- * moment. they can arrive on a different node than
- * the master connection (e.g. FTP passive mode) */
- case IP_CT_ESTABLISHED:
- case IP_CT_ESTABLISHED+IP_CT_IS_REPLY:
- break;
- default:
- break;
+ case IP_CT_NEW:
+ ct->mark = hash;
+ break;
+ case IP_CT_RELATED:
+ case IP_CT_RELATED_REPLY:
+ /* FIXME: we don't handle expectations at the moment.
+ * They can arrive on a different node than
+ * the master connection (e.g. FTP passive mode) */
+ case IP_CT_ESTABLISHED:
+ case IP_CT_ESTABLISHED_REPLY:
+ break;
+ default: /* Prevent gcc warnings */
+ break;
}
#ifdef DEBUG
@@ -347,67 +360,65 @@ clusterip_tg(struct sk_buff *skb, const struct xt_target_param *par)
return XT_CONTINUE;
}
-static bool clusterip_tg_check(const struct xt_tgchk_param *par)
+static int clusterip_tg_check(const struct xt_tgchk_param *par)
{
struct ipt_clusterip_tgt_info *cipinfo = par->targinfo;
const struct ipt_entry *e = par->entryinfo;
-
struct clusterip_config *config;
+ int ret;
if (cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP &&
cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT &&
cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT_DPT) {
- printk(KERN_WARNING "CLUSTERIP: unknown mode `%u'\n",
- cipinfo->hash_mode);
- return false;
+ pr_info("unknown mode %u\n", cipinfo->hash_mode);
+ return -EINVAL;
}
- if (e->ip.dmsk.s_addr != htonl(0xffffffff)
- || e->ip.dst.s_addr == 0) {
- printk(KERN_ERR "CLUSTERIP: Please specify destination IP\n");
- return false;
+ if (e->ip.dmsk.s_addr != htonl(0xffffffff) ||
+ e->ip.dst.s_addr == 0) {
+ pr_info("Please specify destination IP\n");
+ return -EINVAL;
}
/* FIXME: further sanity checks */
- config = clusterip_config_find_get(e->ip.dst.s_addr, 1);
+ config = clusterip_config_find_get(par->net, e->ip.dst.s_addr, 1);
if (!config) {
if (!(cipinfo->flags & CLUSTERIP_FLAG_NEW)) {
- printk(KERN_WARNING "CLUSTERIP: no config found for %pI4, need 'new'\n", &e->ip.dst.s_addr);
- return false;
+ pr_info("no config found for %pI4, need 'new'\n",
+ &e->ip.dst.s_addr);
+ return -EINVAL;
} else {
struct net_device *dev;
if (e->ip.iniface[0] == '\0') {
- printk(KERN_WARNING "CLUSTERIP: Please specify an interface name\n");
- return false;
+ pr_info("Please specify an interface name\n");
+ return -EINVAL;
}
- dev = dev_get_by_name(&init_net, e->ip.iniface);
+ dev = dev_get_by_name(par->net, e->ip.iniface);
if (!dev) {
- printk(KERN_WARNING "CLUSTERIP: no such interface %s\n", e->ip.iniface);
- return false;
+ pr_info("no such interface %s\n",
+ e->ip.iniface);
+ return -ENOENT;
}
config = clusterip_config_init(cipinfo,
e->ip.dst.s_addr, dev);
if (!config) {
- printk(KERN_WARNING "CLUSTERIP: cannot allocate config\n");
dev_put(dev);
- return false;
+ return -ENOMEM;
}
- dev_mc_add(config->dev,config->clustermac, ETH_ALEN, 0);
+ dev_mc_add(config->dev, config->clustermac);
}
}
cipinfo->config = config;
- if (nf_ct_l3proto_try_module_get(par->target->family) < 0) {
- printk(KERN_WARNING "can't load conntrack support for "
- "proto=%u\n", par->target->family);
- return false;
- }
-
- return true;
+ ret = nf_ct_l3proto_try_module_get(par->family);
+ if (ret < 0)
+ pr_info("cannot load conntrack support for proto=%u\n",
+ par->family);
+ return ret;
}
/* drop reference count of cluster config when rule is deleted */
@@ -421,7 +432,7 @@ static void clusterip_tg_destroy(const struct xt_tgdtor_param *par)
clusterip_config_put(cipinfo->config);
- nf_ct_l3proto_module_put(par->target->family);
+ nf_ct_l3proto_module_put(par->family);
}
#ifdef CONFIG_COMPAT
@@ -462,7 +473,7 @@ struct arp_payload {
__be32 src_ip;
u_int8_t dst_hw[ETH_ALEN];
__be32 dst_ip;
-} __attribute__ ((packed));
+} __packed;
#ifdef DEBUG
static void arp_print(struct arp_payload *payload)
@@ -478,13 +489,13 @@ static void arp_print(struct arp_payload *payload)
}
hbuffer[--k]='\0';
- printk("src %pI4@%s, dst %pI4\n",
- &payload->src_ip, hbuffer, &payload->dst_ip);
+ pr_debug("src %pI4@%s, dst %pI4\n",
+ &payload->src_ip, hbuffer, &payload->dst_ip);
}
#endif
static unsigned int
-arp_mangle(unsigned int hook,
+arp_mangle(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
@@ -493,23 +504,24 @@ arp_mangle(unsigned int hook,
struct arphdr *arp = arp_hdr(skb);
struct arp_payload *payload;
struct clusterip_config *c;
+ struct net *net = dev_net(in ? in : out);
/* we don't care about non-ethernet and non-ipv4 ARP */
- if (arp->ar_hrd != htons(ARPHRD_ETHER)
- || arp->ar_pro != htons(ETH_P_IP)
- || arp->ar_pln != 4 || arp->ar_hln != ETH_ALEN)
+ if (arp->ar_hrd != htons(ARPHRD_ETHER) ||
+ arp->ar_pro != htons(ETH_P_IP) ||
+ arp->ar_pln != 4 || arp->ar_hln != ETH_ALEN)
return NF_ACCEPT;
/* we only want to mangle arp requests and replies */
- if (arp->ar_op != htons(ARPOP_REPLY)
- && arp->ar_op != htons(ARPOP_REQUEST))
+ if (arp->ar_op != htons(ARPOP_REPLY) &&
+ arp->ar_op != htons(ARPOP_REQUEST))
return NF_ACCEPT;
payload = (void *)(arp+1);
/* if there is no clusterip configuration for the arp reply's
* source ip, we don't want to mangle it */
- c = clusterip_config_find_get(payload->src_ip, 0);
+ c = clusterip_config_find_get(net, payload->src_ip, 0);
if (!c)
return NF_ACCEPT;
@@ -518,7 +530,7 @@ arp_mangle(unsigned int hook,
* this wouldn't work, since we didn't subscribe the mcast group on
* other interfaces */
if (c->dev != out) {
- pr_debug("CLUSTERIP: not mangling arp reply on different "
+ pr_debug("not mangling arp reply on different "
"interface: cip'%s'-skb'%s'\n",
c->dev->name, out->name);
clusterip_config_put(c);
@@ -529,7 +541,7 @@ arp_mangle(unsigned int hook,
memcpy(payload->src_hw, c->clustermac, arp->ar_hln);
#ifdef DEBUG
- pr_debug(KERN_DEBUG "CLUSTERIP mangled arp reply: ");
+ pr_debug("mangled arp reply: ");
arp_print(payload);
#endif
@@ -560,8 +572,7 @@ struct clusterip_seq_position {
static void *clusterip_seq_start(struct seq_file *s, loff_t *pos)
{
- const struct proc_dir_entry *pde = s->private;
- struct clusterip_config *c = pde->data;
+ struct clusterip_config *c = s->private;
unsigned int weight;
u_int32_t local_nodes;
struct clusterip_seq_position *idx;
@@ -601,7 +612,8 @@ static void *clusterip_seq_next(struct seq_file *s, void *v, loff_t *pos)
static void clusterip_seq_stop(struct seq_file *s, void *v)
{
- kfree(v);
+ if (!IS_ERR(v))
+ kfree(v);
}
static int clusterip_seq_show(struct seq_file *s, void *v)
@@ -632,10 +644,9 @@ static int clusterip_proc_open(struct inode *inode, struct file *file)
if (!ret) {
struct seq_file *sf = file->private_data;
- struct proc_dir_entry *pde = PDE(inode);
- struct clusterip_config *c = pde->data;
+ struct clusterip_config *c = PDE_DATA(inode);
- sf->private = pde;
+ sf->private = c;
clusterip_config_get(c);
}
@@ -645,8 +656,7 @@ static int clusterip_proc_open(struct inode *inode, struct file *file)
static int clusterip_proc_release(struct inode *inode, struct file *file)
{
- struct proc_dir_entry *pde = PDE(inode);
- struct clusterip_config *c = pde->data;
+ struct clusterip_config *c = PDE_DATA(inode);
int ret;
ret = seq_release(inode, file);
@@ -660,21 +670,28 @@ static int clusterip_proc_release(struct inode *inode, struct file *file)
static ssize_t clusterip_proc_write(struct file *file, const char __user *input,
size_t size, loff_t *ofs)
{
+ struct clusterip_config *c = PDE_DATA(file_inode(file));
#define PROC_WRITELEN 10
char buffer[PROC_WRITELEN+1];
- const struct proc_dir_entry *pde = PDE(file->f_path.dentry->d_inode);
- struct clusterip_config *c = pde->data;
unsigned long nodenum;
+ int rc;
- if (copy_from_user(buffer, input, PROC_WRITELEN))
+ if (size > PROC_WRITELEN)
+ return -EIO;
+ if (copy_from_user(buffer, input, size))
return -EFAULT;
+ buffer[size] = 0;
if (*buffer == '+') {
- nodenum = simple_strtoul(buffer+1, NULL, 10);
+ rc = kstrtoul(buffer+1, 10, &nodenum);
+ if (rc)
+ return rc;
if (clusterip_add_node(c, nodenum))
return -ENOMEM;
} else if (*buffer == '-') {
- nodenum = simple_strtoul(buffer+1, NULL,10);
+ rc = kstrtoul(buffer+1, 10, &nodenum);
+ if (rc)
+ return rc;
if (clusterip_del_node(c, nodenum))
return -ENOENT;
} else
@@ -694,49 +711,78 @@ static const struct file_operations clusterip_proc_fops = {
#endif /* CONFIG_PROC_FS */
+static int clusterip_net_init(struct net *net)
+{
+ struct clusterip_net *cn = net_generic(net, clusterip_net_id);
+
+ INIT_LIST_HEAD(&cn->configs);
+
+ spin_lock_init(&cn->lock);
+
+#ifdef CONFIG_PROC_FS
+ cn->procdir = proc_mkdir("ipt_CLUSTERIP", net->proc_net);
+ if (!cn->procdir) {
+ pr_err("Unable to proc dir entry\n");
+ return -ENOMEM;
+ }
+#endif /* CONFIG_PROC_FS */
+
+ return 0;
+}
+
+static void clusterip_net_exit(struct net *net)
+{
+#ifdef CONFIG_PROC_FS
+ struct clusterip_net *cn = net_generic(net, clusterip_net_id);
+ proc_remove(cn->procdir);
+#endif
+}
+
+static struct pernet_operations clusterip_net_ops = {
+ .init = clusterip_net_init,
+ .exit = clusterip_net_exit,
+ .id = &clusterip_net_id,
+ .size = sizeof(struct clusterip_net),
+};
+
static int __init clusterip_tg_init(void)
{
int ret;
- ret = xt_register_target(&clusterip_tg_reg);
+ ret = register_pernet_subsys(&clusterip_net_ops);
if (ret < 0)
return ret;
+ ret = xt_register_target(&clusterip_tg_reg);
+ if (ret < 0)
+ goto cleanup_subsys;
+
ret = nf_register_hook(&cip_arp_ops);
if (ret < 0)
goto cleanup_target;
-#ifdef CONFIG_PROC_FS
- clusterip_procdir = proc_mkdir("ipt_CLUSTERIP", init_net.proc_net);
- if (!clusterip_procdir) {
- printk(KERN_ERR "CLUSTERIP: Unable to proc dir entry\n");
- ret = -ENOMEM;
- goto cleanup_hook;
- }
-#endif /* CONFIG_PROC_FS */
-
- printk(KERN_NOTICE "ClusterIP Version %s loaded successfully\n",
+ pr_info("ClusterIP Version %s loaded successfully\n",
CLUSTERIP_VERSION);
+
return 0;
-#ifdef CONFIG_PROC_FS
-cleanup_hook:
- nf_unregister_hook(&cip_arp_ops);
-#endif /* CONFIG_PROC_FS */
cleanup_target:
xt_unregister_target(&clusterip_tg_reg);
+cleanup_subsys:
+ unregister_pernet_subsys(&clusterip_net_ops);
return ret;
}
static void __exit clusterip_tg_exit(void)
{
- printk(KERN_NOTICE "ClusterIP Version %s unloading\n",
- CLUSTERIP_VERSION);
-#ifdef CONFIG_PROC_FS
- remove_proc_entry(clusterip_procdir->name, clusterip_procdir->parent);
-#endif
+ pr_info("ClusterIP Version %s unloading\n", CLUSTERIP_VERSION);
+
nf_unregister_hook(&cip_arp_ops);
xt_unregister_target(&clusterip_tg_reg);
+ unregister_pernet_subsys(&clusterip_net_ops);
+
+ /* Wait for completion of call_rcu_bh()'s (clusterip_config_rcu_free) */
+ rcu_barrier_bh();
}
module_init(clusterip_tg_init);
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c
index f7e2fa0974d..4bf3dc49ad1 100644
--- a/net/ipv4/netfilter/ipt_ECN.c
+++ b/net/ipv4/netfilter/ipt_ECN.c
@@ -6,7 +6,7 @@
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/in.h>
#include <linux/module.h>
#include <linux/skbuff.h>
@@ -50,7 +50,7 @@ set_ect_tcp(struct sk_buff *skb, const struct ipt_ECN_info *einfo)
struct tcphdr _tcph, *tcph;
__be16 oldval;
- /* Not enought header? */
+ /* Not enough header? */
tcph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
if (!tcph)
return false;
@@ -77,7 +77,7 @@ set_ect_tcp(struct sk_buff *skb, const struct ipt_ECN_info *einfo)
}
static unsigned int
-ecn_tg(struct sk_buff *skb, const struct xt_target_param *par)
+ecn_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct ipt_ECN_info *einfo = par->targinfo;
@@ -85,36 +85,33 @@ ecn_tg(struct sk_buff *skb, const struct xt_target_param *par)
if (!set_ect_ip(skb, einfo))
return NF_DROP;
- if (einfo->operation & (IPT_ECN_OP_SET_ECE | IPT_ECN_OP_SET_CWR)
- && ip_hdr(skb)->protocol == IPPROTO_TCP)
+ if (einfo->operation & (IPT_ECN_OP_SET_ECE | IPT_ECN_OP_SET_CWR) &&
+ ip_hdr(skb)->protocol == IPPROTO_TCP)
if (!set_ect_tcp(skb, einfo))
return NF_DROP;
return XT_CONTINUE;
}
-static bool ecn_tg_check(const struct xt_tgchk_param *par)
+static int ecn_tg_check(const struct xt_tgchk_param *par)
{
const struct ipt_ECN_info *einfo = par->targinfo;
const struct ipt_entry *e = par->entryinfo;
if (einfo->operation & IPT_ECN_OP_MASK) {
- printk(KERN_WARNING "ECN: unsupported ECN operation %x\n",
- einfo->operation);
- return false;
+ pr_info("unsupported ECN operation %x\n", einfo->operation);
+ return -EINVAL;
}
if (einfo->ip_ect & ~IPT_ECN_IP_MASK) {
- printk(KERN_WARNING "ECN: new ECT codepoint %x out of mask\n",
- einfo->ip_ect);
- return false;
+ pr_info("new ECT codepoint %x out of mask\n", einfo->ip_ect);
+ return -EINVAL;
}
- if ((einfo->operation & (IPT_ECN_OP_SET_ECE|IPT_ECN_OP_SET_CWR))
- && (e->ip.proto != IPPROTO_TCP || (e->ip.invflags & XT_INV_PROTO))) {
- printk(KERN_WARNING "ECN: cannot use TCP operations on a "
- "non-tcp rule\n");
- return false;
+ if ((einfo->operation & (IPT_ECN_OP_SET_ECE|IPT_ECN_OP_SET_CWR)) &&
+ (e->ip.proto != IPPROTO_TCP || (e->ip.invflags & XT_INV_PROTO))) {
+ pr_info("cannot use TCP operations on a non-tcp rule\n");
+ return -EINVAL;
}
- return true;
+ return 0;
}
static struct xt_target ecn_tg_reg __read_mostly = {
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
deleted file mode 100644
index 27a78fbbd92..00000000000
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ /dev/null
@@ -1,491 +0,0 @@
-/*
- * This is a module which is used for logging packets.
- */
-
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/spinlock.h>
-#include <linux/skbuff.h>
-#include <linux/ip.h>
-#include <net/icmp.h>
-#include <net/udp.h>
-#include <net/tcp.h>
-#include <net/route.h>
-
-#include <linux/netfilter.h>
-#include <linux/netfilter/x_tables.h>
-#include <linux/netfilter_ipv4/ipt_LOG.h>
-#include <net/netfilter/nf_log.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
-MODULE_DESCRIPTION("Xtables: IPv4 packet logging to syslog");
-
-/* Use lock to serialize, so printks don't overlap */
-static DEFINE_SPINLOCK(log_lock);
-
-/* One level of recursion won't kill us */
-static void dump_packet(const struct nf_loginfo *info,
- const struct sk_buff *skb,
- unsigned int iphoff)
-{
- struct iphdr _iph;
- const struct iphdr *ih;
- unsigned int logflags;
-
- if (info->type == NF_LOG_TYPE_LOG)
- logflags = info->u.log.logflags;
- else
- logflags = NF_LOG_MASK;
-
- ih = skb_header_pointer(skb, iphoff, sizeof(_iph), &_iph);
- if (ih == NULL) {
- printk("TRUNCATED");
- return;
- }
-
- /* Important fields:
- * TOS, len, DF/MF, fragment offset, TTL, src, dst, options. */
- /* Max length: 40 "SRC=255.255.255.255 DST=255.255.255.255 " */
- printk("SRC=%pI4 DST=%pI4 ",
- &ih->saddr, &ih->daddr);
-
- /* Max length: 46 "LEN=65535 TOS=0xFF PREC=0xFF TTL=255 ID=65535 " */
- printk("LEN=%u TOS=0x%02X PREC=0x%02X TTL=%u ID=%u ",
- ntohs(ih->tot_len), ih->tos & IPTOS_TOS_MASK,
- ih->tos & IPTOS_PREC_MASK, ih->ttl, ntohs(ih->id));
-
- /* Max length: 6 "CE DF MF " */
- if (ntohs(ih->frag_off) & IP_CE)
- printk("CE ");
- if (ntohs(ih->frag_off) & IP_DF)
- printk("DF ");
- if (ntohs(ih->frag_off) & IP_MF)
- printk("MF ");
-
- /* Max length: 11 "FRAG:65535 " */
- if (ntohs(ih->frag_off) & IP_OFFSET)
- printk("FRAG:%u ", ntohs(ih->frag_off) & IP_OFFSET);
-
- if ((logflags & IPT_LOG_IPOPT)
- && ih->ihl * 4 > sizeof(struct iphdr)) {
- const unsigned char *op;
- unsigned char _opt[4 * 15 - sizeof(struct iphdr)];
- unsigned int i, optsize;
-
- optsize = ih->ihl * 4 - sizeof(struct iphdr);
- op = skb_header_pointer(skb, iphoff+sizeof(_iph),
- optsize, _opt);
- if (op == NULL) {
- printk("TRUNCATED");
- return;
- }
-
- /* Max length: 127 "OPT (" 15*4*2chars ") " */
- printk("OPT (");
- for (i = 0; i < optsize; i++)
- printk("%02X", op[i]);
- printk(") ");
- }
-
- switch (ih->protocol) {
- case IPPROTO_TCP: {
- struct tcphdr _tcph;
- const struct tcphdr *th;
-
- /* Max length: 10 "PROTO=TCP " */
- printk("PROTO=TCP ");
-
- if (ntohs(ih->frag_off) & IP_OFFSET)
- break;
-
- /* Max length: 25 "INCOMPLETE [65535 bytes] " */
- th = skb_header_pointer(skb, iphoff + ih->ihl * 4,
- sizeof(_tcph), &_tcph);
- if (th == NULL) {
- printk("INCOMPLETE [%u bytes] ",
- skb->len - iphoff - ih->ihl*4);
- break;
- }
-
- /* Max length: 20 "SPT=65535 DPT=65535 " */
- printk("SPT=%u DPT=%u ",
- ntohs(th->source), ntohs(th->dest));
- /* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */
- if (logflags & IPT_LOG_TCPSEQ)
- printk("SEQ=%u ACK=%u ",
- ntohl(th->seq), ntohl(th->ack_seq));
- /* Max length: 13 "WINDOW=65535 " */
- printk("WINDOW=%u ", ntohs(th->window));
- /* Max length: 9 "RES=0x3F " */
- printk("RES=0x%02x ", (u8)(ntohl(tcp_flag_word(th) & TCP_RESERVED_BITS) >> 22));
- /* Max length: 32 "CWR ECE URG ACK PSH RST SYN FIN " */
- if (th->cwr)
- printk("CWR ");
- if (th->ece)
- printk("ECE ");
- if (th->urg)
- printk("URG ");
- if (th->ack)
- printk("ACK ");
- if (th->psh)
- printk("PSH ");
- if (th->rst)
- printk("RST ");
- if (th->syn)
- printk("SYN ");
- if (th->fin)
- printk("FIN ");
- /* Max length: 11 "URGP=65535 " */
- printk("URGP=%u ", ntohs(th->urg_ptr));
-
- if ((logflags & IPT_LOG_TCPOPT)
- && th->doff * 4 > sizeof(struct tcphdr)) {
- unsigned char _opt[4 * 15 - sizeof(struct tcphdr)];
- const unsigned char *op;
- unsigned int i, optsize;
-
- optsize = th->doff * 4 - sizeof(struct tcphdr);
- op = skb_header_pointer(skb,
- iphoff+ih->ihl*4+sizeof(_tcph),
- optsize, _opt);
- if (op == NULL) {
- printk("TRUNCATED");
- return;
- }
-
- /* Max length: 127 "OPT (" 15*4*2chars ") " */
- printk("OPT (");
- for (i = 0; i < optsize; i++)
- printk("%02X", op[i]);
- printk(") ");
- }
- break;
- }
- case IPPROTO_UDP:
- case IPPROTO_UDPLITE: {
- struct udphdr _udph;
- const struct udphdr *uh;
-
- if (ih->protocol == IPPROTO_UDP)
- /* Max length: 10 "PROTO=UDP " */
- printk("PROTO=UDP " );
- else /* Max length: 14 "PROTO=UDPLITE " */
- printk("PROTO=UDPLITE ");
-
- if (ntohs(ih->frag_off) & IP_OFFSET)
- break;
-
- /* Max length: 25 "INCOMPLETE [65535 bytes] " */
- uh = skb_header_pointer(skb, iphoff+ih->ihl*4,
- sizeof(_udph), &_udph);
- if (uh == NULL) {
- printk("INCOMPLETE [%u bytes] ",
- skb->len - iphoff - ih->ihl*4);
- break;
- }
-
- /* Max length: 20 "SPT=65535 DPT=65535 " */
- printk("SPT=%u DPT=%u LEN=%u ",
- ntohs(uh->source), ntohs(uh->dest),
- ntohs(uh->len));
- break;
- }
- case IPPROTO_ICMP: {
- struct icmphdr _icmph;
- const struct icmphdr *ich;
- static const size_t required_len[NR_ICMP_TYPES+1]
- = { [ICMP_ECHOREPLY] = 4,
- [ICMP_DEST_UNREACH]
- = 8 + sizeof(struct iphdr),
- [ICMP_SOURCE_QUENCH]
- = 8 + sizeof(struct iphdr),
- [ICMP_REDIRECT]
- = 8 + sizeof(struct iphdr),
- [ICMP_ECHO] = 4,
- [ICMP_TIME_EXCEEDED]
- = 8 + sizeof(struct iphdr),
- [ICMP_PARAMETERPROB]
- = 8 + sizeof(struct iphdr),
- [ICMP_TIMESTAMP] = 20,
- [ICMP_TIMESTAMPREPLY] = 20,
- [ICMP_ADDRESS] = 12,
- [ICMP_ADDRESSREPLY] = 12 };
-
- /* Max length: 11 "PROTO=ICMP " */
- printk("PROTO=ICMP ");
-
- if (ntohs(ih->frag_off) & IP_OFFSET)
- break;
-
- /* Max length: 25 "INCOMPLETE [65535 bytes] " */
- ich = skb_header_pointer(skb, iphoff + ih->ihl * 4,
- sizeof(_icmph), &_icmph);
- if (ich == NULL) {
- printk("INCOMPLETE [%u bytes] ",
- skb->len - iphoff - ih->ihl*4);
- break;
- }
-
- /* Max length: 18 "TYPE=255 CODE=255 " */
- printk("TYPE=%u CODE=%u ", ich->type, ich->code);
-
- /* Max length: 25 "INCOMPLETE [65535 bytes] " */
- if (ich->type <= NR_ICMP_TYPES
- && required_len[ich->type]
- && skb->len-iphoff-ih->ihl*4 < required_len[ich->type]) {
- printk("INCOMPLETE [%u bytes] ",
- skb->len - iphoff - ih->ihl*4);
- break;
- }
-
- switch (ich->type) {
- case ICMP_ECHOREPLY:
- case ICMP_ECHO:
- /* Max length: 19 "ID=65535 SEQ=65535 " */
- printk("ID=%u SEQ=%u ",
- ntohs(ich->un.echo.id),
- ntohs(ich->un.echo.sequence));
- break;
-
- case ICMP_PARAMETERPROB:
- /* Max length: 14 "PARAMETER=255 " */
- printk("PARAMETER=%u ",
- ntohl(ich->un.gateway) >> 24);
- break;
- case ICMP_REDIRECT:
- /* Max length: 24 "GATEWAY=255.255.255.255 " */
- printk("GATEWAY=%pI4 ", &ich->un.gateway);
- /* Fall through */
- case ICMP_DEST_UNREACH:
- case ICMP_SOURCE_QUENCH:
- case ICMP_TIME_EXCEEDED:
- /* Max length: 3+maxlen */
- if (!iphoff) { /* Only recurse once. */
- printk("[");
- dump_packet(info, skb,
- iphoff + ih->ihl*4+sizeof(_icmph));
- printk("] ");
- }
-
- /* Max length: 10 "MTU=65535 " */
- if (ich->type == ICMP_DEST_UNREACH
- && ich->code == ICMP_FRAG_NEEDED)
- printk("MTU=%u ", ntohs(ich->un.frag.mtu));
- }
- break;
- }
- /* Max Length */
- case IPPROTO_AH: {
- struct ip_auth_hdr _ahdr;
- const struct ip_auth_hdr *ah;
-
- if (ntohs(ih->frag_off) & IP_OFFSET)
- break;
-
- /* Max length: 9 "PROTO=AH " */
- printk("PROTO=AH ");
-
- /* Max length: 25 "INCOMPLETE [65535 bytes] " */
- ah = skb_header_pointer(skb, iphoff+ih->ihl*4,
- sizeof(_ahdr), &_ahdr);
- if (ah == NULL) {
- printk("INCOMPLETE [%u bytes] ",
- skb->len - iphoff - ih->ihl*4);
- break;
- }
-
- /* Length: 15 "SPI=0xF1234567 " */
- printk("SPI=0x%x ", ntohl(ah->spi));
- break;
- }
- case IPPROTO_ESP: {
- struct ip_esp_hdr _esph;
- const struct ip_esp_hdr *eh;
-
- /* Max length: 10 "PROTO=ESP " */
- printk("PROTO=ESP ");
-
- if (ntohs(ih->frag_off) & IP_OFFSET)
- break;
-
- /* Max length: 25 "INCOMPLETE [65535 bytes] " */
- eh = skb_header_pointer(skb, iphoff+ih->ihl*4,
- sizeof(_esph), &_esph);
- if (eh == NULL) {
- printk("INCOMPLETE [%u bytes] ",
- skb->len - iphoff - ih->ihl*4);
- break;
- }
-
- /* Length: 15 "SPI=0xF1234567 " */
- printk("SPI=0x%x ", ntohl(eh->spi));
- break;
- }
- /* Max length: 10 "PROTO 255 " */
- default:
- printk("PROTO=%u ", ih->protocol);
- }
-
- /* Max length: 15 "UID=4294967295 " */
- if ((logflags & IPT_LOG_UID) && !iphoff && skb->sk) {
- read_lock_bh(&skb->sk->sk_callback_lock);
- if (skb->sk->sk_socket && skb->sk->sk_socket->file)
- printk("UID=%u GID=%u ",
- skb->sk->sk_socket->file->f_cred->fsuid,
- skb->sk->sk_socket->file->f_cred->fsgid);
- read_unlock_bh(&skb->sk->sk_callback_lock);
- }
-
- /* Max length: 16 "MARK=0xFFFFFFFF " */
- if (!iphoff && skb->mark)
- printk("MARK=0x%x ", skb->mark);
-
- /* Proto Max log string length */
- /* IP: 40+46+6+11+127 = 230 */
- /* TCP: 10+max(25,20+30+13+9+32+11+127) = 252 */
- /* UDP: 10+max(25,20) = 35 */
- /* UDPLITE: 14+max(25,20) = 39 */
- /* ICMP: 11+max(25, 18+25+max(19,14,24+3+n+10,3+n+10)) = 91+n */
- /* ESP: 10+max(25)+15 = 50 */
- /* AH: 9+max(25)+15 = 49 */
- /* unknown: 10 */
-
- /* (ICMP allows recursion one level deep) */
- /* maxlen = IP + ICMP + IP + max(TCP,UDP,ICMP,unknown) */
- /* maxlen = 230+ 91 + 230 + 252 = 803 */
-}
-
-static struct nf_loginfo default_loginfo = {
- .type = NF_LOG_TYPE_LOG,
- .u = {
- .log = {
- .level = 0,
- .logflags = NF_LOG_MASK,
- },
- },
-};
-
-static void
-ipt_log_packet(u_int8_t pf,
- unsigned int hooknum,
- const struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- const struct nf_loginfo *loginfo,
- const char *prefix)
-{
- if (!loginfo)
- loginfo = &default_loginfo;
-
- spin_lock_bh(&log_lock);
- printk("<%d>%sIN=%s OUT=%s ", loginfo->u.log.level,
- prefix,
- in ? in->name : "",
- out ? out->name : "");
-#ifdef CONFIG_BRIDGE_NETFILTER
- if (skb->nf_bridge) {
- const struct net_device *physindev;
- const struct net_device *physoutdev;
-
- physindev = skb->nf_bridge->physindev;
- if (physindev && in != physindev)
- printk("PHYSIN=%s ", physindev->name);
- physoutdev = skb->nf_bridge->physoutdev;
- if (physoutdev && out != physoutdev)
- printk("PHYSOUT=%s ", physoutdev->name);
- }
-#endif
-
- if (in && !out) {
- /* MAC logging for input chain only. */
- printk("MAC=");
- if (skb->dev && skb->dev->hard_header_len
- && skb->mac_header != skb->network_header) {
- int i;
- const unsigned char *p = skb_mac_header(skb);
- for (i = 0; i < skb->dev->hard_header_len; i++,p++)
- printk("%02x%c", *p,
- i==skb->dev->hard_header_len - 1
- ? ' ':':');
- } else
- printk(" ");
- }
-
- dump_packet(loginfo, skb, 0);
- printk("\n");
- spin_unlock_bh(&log_lock);
-}
-
-static unsigned int
-log_tg(struct sk_buff *skb, const struct xt_target_param *par)
-{
- const struct ipt_log_info *loginfo = par->targinfo;
- struct nf_loginfo li;
-
- li.type = NF_LOG_TYPE_LOG;
- li.u.log.level = loginfo->level;
- li.u.log.logflags = loginfo->logflags;
-
- ipt_log_packet(NFPROTO_IPV4, par->hooknum, skb, par->in, par->out, &li,
- loginfo->prefix);
- return XT_CONTINUE;
-}
-
-static bool log_tg_check(const struct xt_tgchk_param *par)
-{
- const struct ipt_log_info *loginfo = par->targinfo;
-
- if (loginfo->level >= 8) {
- pr_debug("LOG: level %u >= 8\n", loginfo->level);
- return false;
- }
- if (loginfo->prefix[sizeof(loginfo->prefix)-1] != '\0') {
- pr_debug("LOG: prefix term %i\n",
- loginfo->prefix[sizeof(loginfo->prefix)-1]);
- return false;
- }
- return true;
-}
-
-static struct xt_target log_tg_reg __read_mostly = {
- .name = "LOG",
- .family = NFPROTO_IPV4,
- .target = log_tg,
- .targetsize = sizeof(struct ipt_log_info),
- .checkentry = log_tg_check,
- .me = THIS_MODULE,
-};
-
-static const struct nf_logger ipt_log_logger ={
- .name = "ipt_LOG",
- .logfn = &ipt_log_packet,
- .me = THIS_MODULE,
-};
-
-static int __init log_tg_init(void)
-{
- int ret;
-
- ret = xt_register_target(&log_tg_reg);
- if (ret < 0)
- return ret;
- nf_log_register(NFPROTO_IPV4, &ipt_log_logger);
- return 0;
-}
-
-static void __exit log_tg_exit(void)
-{
- nf_log_unregister(&ipt_log_logger);
- xt_unregister_target(&log_tg_reg);
-}
-
-module_init(log_tg_init);
-module_exit(log_tg_exit);
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index f389f60cb10..00352ce0f0d 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -8,7 +8,7 @@
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/types.h>
#include <linux/inetdevice.h>
#include <linux/ip.h>
@@ -19,51 +19,48 @@
#include <net/ip.h>
#include <net/checksum.h>
#include <net/route.h>
-#include <net/netfilter/nf_nat_rule.h>
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter/x_tables.h>
+#include <net/netfilter/nf_nat.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
MODULE_DESCRIPTION("Xtables: automatic-address SNAT");
-/* Lock protects masq region inside conntrack */
-static DEFINE_RWLOCK(masq_lock);
-
/* FIXME: Multiple targets. --RR */
-static bool masquerade_tg_check(const struct xt_tgchk_param *par)
+static int masquerade_tg_check(const struct xt_tgchk_param *par)
{
- const struct nf_nat_multi_range_compat *mr = par->targinfo;
+ const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
- if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) {
- pr_debug("masquerade_check: bad MAP_IPS.\n");
- return false;
+ if (mr->range[0].flags & NF_NAT_RANGE_MAP_IPS) {
+ pr_debug("bad MAP_IPS.\n");
+ return -EINVAL;
}
if (mr->rangesize != 1) {
- pr_debug("masquerade_check: bad rangesize %u\n", mr->rangesize);
- return false;
+ pr_debug("bad rangesize %u\n", mr->rangesize);
+ return -EINVAL;
}
- return true;
+ return 0;
}
static unsigned int
-masquerade_tg(struct sk_buff *skb, const struct xt_target_param *par)
+masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
struct nf_conn *ct;
struct nf_conn_nat *nat;
enum ip_conntrack_info ctinfo;
struct nf_nat_range newrange;
- const struct nf_nat_multi_range_compat *mr;
+ const struct nf_nat_ipv4_multi_range_compat *mr;
const struct rtable *rt;
- __be32 newsrc;
+ __be32 newsrc, nh;
NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING);
ct = nf_ct_get(skb, &ctinfo);
nat = nfct_nat(ct);
- NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED
- || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY));
+ NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
+ ctinfo == IP_CT_RELATED_REPLY));
/* Source address is 0.0.0.0 - locally generated packet that is
* probably not supposed to be masqueraded.
@@ -72,48 +69,46 @@ masquerade_tg(struct sk_buff *skb, const struct xt_target_param *par)
return NF_ACCEPT;
mr = par->targinfo;
- rt = skb->rtable;
- newsrc = inet_select_addr(par->out, rt->rt_gateway, RT_SCOPE_UNIVERSE);
+ rt = skb_rtable(skb);
+ nh = rt_nexthop(rt, ip_hdr(skb)->daddr);
+ newsrc = inet_select_addr(par->out, nh, RT_SCOPE_UNIVERSE);
if (!newsrc) {
- printk("MASQUERADE: %s ate my IP address\n", par->out->name);
+ pr_info("%s ate my IP address\n", par->out->name);
return NF_DROP;
}
- write_lock_bh(&masq_lock);
nat->masq_index = par->out->ifindex;
- write_unlock_bh(&masq_lock);
/* Transfer from original range. */
- newrange = ((struct nf_nat_range)
- { mr->range[0].flags | IP_NAT_RANGE_MAP_IPS,
- newsrc, newsrc,
- mr->range[0].min, mr->range[0].max });
+ memset(&newrange.min_addr, 0, sizeof(newrange.min_addr));
+ memset(&newrange.max_addr, 0, sizeof(newrange.max_addr));
+ newrange.flags = mr->range[0].flags | NF_NAT_RANGE_MAP_IPS;
+ newrange.min_addr.ip = newsrc;
+ newrange.max_addr.ip = newsrc;
+ newrange.min_proto = mr->range[0].min;
+ newrange.max_proto = mr->range[0].max;
/* Hand modified range to generic setup. */
- return nf_nat_setup_info(ct, &newrange, IP_NAT_MANIP_SRC);
+ return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
}
static int
device_cmp(struct nf_conn *i, void *ifindex)
{
const struct nf_conn_nat *nat = nfct_nat(i);
- int ret;
if (!nat)
return 0;
-
- read_lock_bh(&masq_lock);
- ret = (nat->masq_index == (int)(long)ifindex);
- read_unlock_bh(&masq_lock);
-
- return ret;
+ if (nf_ct_l3num(i) != NFPROTO_IPV4)
+ return 0;
+ return nat->masq_index == (int)(long)ifindex;
}
static int masq_device_event(struct notifier_block *this,
unsigned long event,
void *ptr)
{
- const struct net_device *dev = ptr;
+ const struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct net *net = dev_net(dev);
if (event == NETDEV_DOWN) {
@@ -123,7 +118,7 @@ static int masq_device_event(struct notifier_block *this,
NF_CT_ASSERT(dev->ifindex != 0);
nf_ct_iterate_cleanup(net, device_cmp,
- (void *)(long)dev->ifindex);
+ (void *)(long)dev->ifindex, 0, 0);
}
return NOTIFY_DONE;
@@ -134,7 +129,10 @@ static int masq_inet_event(struct notifier_block *this,
void *ptr)
{
struct net_device *dev = ((struct in_ifaddr *)ptr)->ifa_dev->dev;
- return masq_device_event(this, event, dev);
+ struct netdev_notifier_info info;
+
+ netdev_notifier_info_init(&info, dev);
+ return masq_device_event(this, event, &info);
}
static struct notifier_block masq_dev_notifier = {
@@ -149,7 +147,7 @@ static struct xt_target masquerade_tg_reg __read_mostly = {
.name = "MASQUERADE",
.family = NFPROTO_IPV4,
.target = masquerade_tg,
- .targetsize = sizeof(struct nf_nat_multi_range_compat),
+ .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat),
.table = "nat",
.hooks = 1 << NF_INET_POST_ROUTING,
.checkentry = masquerade_tg_check,
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
deleted file mode 100644
index 7c29582d4ec..00000000000
--- a/net/ipv4/netfilter/ipt_NETMAP.c
+++ /dev/null
@@ -1,96 +0,0 @@
-/* NETMAP - static NAT mapping of IP network addresses (1:1).
- * The mapping can be applied to source (POSTROUTING),
- * destination (PREROUTING), or both (with separate rules).
- */
-
-/* (C) 2000-2001 Svenning Soerensen <svenning@post5.tele.dk>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/ip.h>
-#include <linux/module.h>
-#include <linux/netdevice.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/netfilter/x_tables.h>
-#include <net/netfilter/nf_nat_rule.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Svenning Soerensen <svenning@post5.tele.dk>");
-MODULE_DESCRIPTION("Xtables: 1:1 NAT mapping of IPv4 subnets");
-
-static bool netmap_tg_check(const struct xt_tgchk_param *par)
-{
- const struct nf_nat_multi_range_compat *mr = par->targinfo;
-
- if (!(mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)) {
- pr_debug("NETMAP:check: bad MAP_IPS.\n");
- return false;
- }
- if (mr->rangesize != 1) {
- pr_debug("NETMAP:check: bad rangesize %u.\n", mr->rangesize);
- return false;
- }
- return true;
-}
-
-static unsigned int
-netmap_tg(struct sk_buff *skb, const struct xt_target_param *par)
-{
- struct nf_conn *ct;
- enum ip_conntrack_info ctinfo;
- __be32 new_ip, netmask;
- const struct nf_nat_multi_range_compat *mr = par->targinfo;
- struct nf_nat_range newrange;
-
- NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING ||
- par->hooknum == NF_INET_POST_ROUTING ||
- par->hooknum == NF_INET_LOCAL_OUT);
- ct = nf_ct_get(skb, &ctinfo);
-
- netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip);
-
- if (par->hooknum == NF_INET_PRE_ROUTING ||
- par->hooknum == NF_INET_LOCAL_OUT)
- new_ip = ip_hdr(skb)->daddr & ~netmask;
- else
- new_ip = ip_hdr(skb)->saddr & ~netmask;
- new_ip |= mr->range[0].min_ip & netmask;
-
- newrange = ((struct nf_nat_range)
- { mr->range[0].flags | IP_NAT_RANGE_MAP_IPS,
- new_ip, new_ip,
- mr->range[0].min, mr->range[0].max });
-
- /* Hand modified range to generic setup. */
- return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(par->hooknum));
-}
-
-static struct xt_target netmap_tg_reg __read_mostly = {
- .name = "NETMAP",
- .family = NFPROTO_IPV4,
- .target = netmap_tg,
- .targetsize = sizeof(struct nf_nat_multi_range_compat),
- .table = "nat",
- .hooks = (1 << NF_INET_PRE_ROUTING) |
- (1 << NF_INET_POST_ROUTING) |
- (1 << NF_INET_LOCAL_OUT),
- .checkentry = netmap_tg_check,
- .me = THIS_MODULE
-};
-
-static int __init netmap_tg_init(void)
-{
- return xt_register_target(&netmap_tg_reg);
-}
-
-static void __exit netmap_tg_exit(void)
-{
- xt_unregister_target(&netmap_tg_reg);
-}
-
-module_init(netmap_tg_init);
-module_exit(netmap_tg_exit);
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
deleted file mode 100644
index 698e5e78685..00000000000
--- a/net/ipv4/netfilter/ipt_REDIRECT.c
+++ /dev/null
@@ -1,110 +0,0 @@
-/* Redirect. Simple mapping which alters dst to a local IP address. */
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/ip.h>
-#include <linux/timer.h>
-#include <linux/module.h>
-#include <linux/netfilter.h>
-#include <linux/netdevice.h>
-#include <linux/if.h>
-#include <linux/inetdevice.h>
-#include <net/protocol.h>
-#include <net/checksum.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/netfilter/x_tables.h>
-#include <net/netfilter/nf_nat_rule.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
-MODULE_DESCRIPTION("Xtables: Connection redirection to localhost");
-
-/* FIXME: Take multiple ranges --RR */
-static bool redirect_tg_check(const struct xt_tgchk_param *par)
-{
- const struct nf_nat_multi_range_compat *mr = par->targinfo;
-
- if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) {
- pr_debug("redirect_check: bad MAP_IPS.\n");
- return false;
- }
- if (mr->rangesize != 1) {
- pr_debug("redirect_check: bad rangesize %u.\n", mr->rangesize);
- return false;
- }
- return true;
-}
-
-static unsigned int
-redirect_tg(struct sk_buff *skb, const struct xt_target_param *par)
-{
- struct nf_conn *ct;
- enum ip_conntrack_info ctinfo;
- __be32 newdst;
- const struct nf_nat_multi_range_compat *mr = par->targinfo;
- struct nf_nat_range newrange;
-
- NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING ||
- par->hooknum == NF_INET_LOCAL_OUT);
-
- ct = nf_ct_get(skb, &ctinfo);
- NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
-
- /* Local packets: make them go to loopback */
- if (par->hooknum == NF_INET_LOCAL_OUT)
- newdst = htonl(0x7F000001);
- else {
- struct in_device *indev;
- struct in_ifaddr *ifa;
-
- newdst = 0;
-
- rcu_read_lock();
- indev = __in_dev_get_rcu(skb->dev);
- if (indev && (ifa = indev->ifa_list))
- newdst = ifa->ifa_local;
- rcu_read_unlock();
-
- if (!newdst)
- return NF_DROP;
- }
-
- /* Transfer from original range. */
- newrange = ((struct nf_nat_range)
- { mr->range[0].flags | IP_NAT_RANGE_MAP_IPS,
- newdst, newdst,
- mr->range[0].min, mr->range[0].max });
-
- /* Hand modified range to generic setup. */
- return nf_nat_setup_info(ct, &newrange, IP_NAT_MANIP_DST);
-}
-
-static struct xt_target redirect_tg_reg __read_mostly = {
- .name = "REDIRECT",
- .family = NFPROTO_IPV4,
- .target = redirect_tg,
- .targetsize = sizeof(struct nf_nat_multi_range_compat),
- .table = "nat",
- .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT),
- .checkentry = redirect_tg_check,
- .me = THIS_MODULE,
-};
-
-static int __init redirect_tg_init(void)
-{
- return xt_register_target(&redirect_tg_reg);
-}
-
-static void __exit redirect_tg_exit(void)
-{
- xt_unregister_target(&redirect_tg_reg);
-}
-
-module_init(redirect_tg_init);
-module_exit(redirect_tg_exit);
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index 0b4b6e0ff2b..5b6e0df4ccf 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -9,17 +9,14 @@
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
#include <linux/skbuff.h>
+#include <linux/slab.h>
#include <linux/ip.h>
#include <linux/udp.h>
#include <linux/icmp.h>
#include <net/icmp.h>
-#include <net/ip.h>
-#include <net/tcp.h>
-#include <net/route.h>
-#include <net/dst.h>
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter_ipv4/ipt_REJECT.h>
@@ -27,146 +24,41 @@
#include <linux/netfilter_bridge.h>
#endif
+#include <net/netfilter/ipv4/nf_reject.h>
+
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
MODULE_DESCRIPTION("Xtables: packet \"rejection\" target for IPv4");
-/* Send RST reply */
-static void send_reset(struct sk_buff *oldskb, int hook)
-{
- struct sk_buff *nskb;
- const struct iphdr *oiph;
- struct iphdr *niph;
- const struct tcphdr *oth;
- struct tcphdr _otcph, *tcph;
- unsigned int addr_type;
-
- /* IP header checks: fragment. */
- if (ip_hdr(oldskb)->frag_off & htons(IP_OFFSET))
- return;
-
- oth = skb_header_pointer(oldskb, ip_hdrlen(oldskb),
- sizeof(_otcph), &_otcph);
- if (oth == NULL)
- return;
-
- /* No RST for RST. */
- if (oth->rst)
- return;
-
- /* Check checksum */
- if (nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), IPPROTO_TCP))
- return;
- oiph = ip_hdr(oldskb);
-
- nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct tcphdr) +
- LL_MAX_HEADER, GFP_ATOMIC);
- if (!nskb)
- return;
-
- skb_reserve(nskb, LL_MAX_HEADER);
-
- skb_reset_network_header(nskb);
- niph = (struct iphdr *)skb_put(nskb, sizeof(struct iphdr));
- niph->version = 4;
- niph->ihl = sizeof(struct iphdr) / 4;
- niph->tos = 0;
- niph->id = 0;
- niph->frag_off = htons(IP_DF);
- niph->protocol = IPPROTO_TCP;
- niph->check = 0;
- niph->saddr = oiph->daddr;
- niph->daddr = oiph->saddr;
-
- tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr));
- memset(tcph, 0, sizeof(*tcph));
- tcph->source = oth->dest;
- tcph->dest = oth->source;
- tcph->doff = sizeof(struct tcphdr) / 4;
-
- if (oth->ack)
- tcph->seq = oth->ack_seq;
- else {
- tcph->ack_seq = htonl(ntohl(oth->seq) + oth->syn + oth->fin +
- oldskb->len - ip_hdrlen(oldskb) -
- (oth->doff << 2));
- tcph->ack = 1;
- }
-
- tcph->rst = 1;
- tcph->check = tcp_v4_check(sizeof(struct tcphdr),
- niph->saddr, niph->daddr,
- csum_partial(tcph,
- sizeof(struct tcphdr), 0));
-
- addr_type = RTN_UNSPEC;
- if (hook != NF_INET_FORWARD
-#ifdef CONFIG_BRIDGE_NETFILTER
- || (nskb->nf_bridge && nskb->nf_bridge->mask & BRNF_BRIDGED)
-#endif
- )
- addr_type = RTN_LOCAL;
-
- /* ip_route_me_harder expects skb->dst to be set */
- dst_hold(oldskb->dst);
- nskb->dst = oldskb->dst;
-
- if (ip_route_me_harder(nskb, addr_type))
- goto free_nskb;
-
- niph->ttl = dst_metric(nskb->dst, RTAX_HOPLIMIT);
- nskb->ip_summed = CHECKSUM_NONE;
-
- /* "Never happens" */
- if (nskb->len > dst_mtu(nskb->dst))
- goto free_nskb;
-
- nf_ct_attach(nskb, oldskb);
-
- ip_local_out(nskb);
- return;
-
- free_nskb:
- kfree_skb(nskb);
-}
-
-static inline void send_unreach(struct sk_buff *skb_in, int code)
-{
- icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0);
-}
-
static unsigned int
-reject_tg(struct sk_buff *skb, const struct xt_target_param *par)
+reject_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct ipt_reject_info *reject = par->targinfo;
- /* WARNING: This code causes reentry within iptables.
- This means that the iptables jump stack is now crap. We
- must return an absolute verdict. --RR */
switch (reject->with) {
case IPT_ICMP_NET_UNREACHABLE:
- send_unreach(skb, ICMP_NET_UNREACH);
+ nf_send_unreach(skb, ICMP_NET_UNREACH);
break;
case IPT_ICMP_HOST_UNREACHABLE:
- send_unreach(skb, ICMP_HOST_UNREACH);
+ nf_send_unreach(skb, ICMP_HOST_UNREACH);
break;
case IPT_ICMP_PROT_UNREACHABLE:
- send_unreach(skb, ICMP_PROT_UNREACH);
+ nf_send_unreach(skb, ICMP_PROT_UNREACH);
break;
case IPT_ICMP_PORT_UNREACHABLE:
- send_unreach(skb, ICMP_PORT_UNREACH);
+ nf_send_unreach(skb, ICMP_PORT_UNREACH);
break;
case IPT_ICMP_NET_PROHIBITED:
- send_unreach(skb, ICMP_NET_ANO);
+ nf_send_unreach(skb, ICMP_NET_ANO);
break;
case IPT_ICMP_HOST_PROHIBITED:
- send_unreach(skb, ICMP_HOST_ANO);
+ nf_send_unreach(skb, ICMP_HOST_ANO);
break;
case IPT_ICMP_ADMIN_PROHIBITED:
- send_unreach(skb, ICMP_PKT_FILTERED);
+ nf_send_unreach(skb, ICMP_PKT_FILTERED);
break;
case IPT_TCP_RESET:
- send_reset(skb, par->hooknum);
+ nf_send_reset(skb, par->hooknum);
case IPT_ICMP_ECHOREPLY:
/* Doesn't happen. */
break;
@@ -175,23 +67,23 @@ reject_tg(struct sk_buff *skb, const struct xt_target_param *par)
return NF_DROP;
}
-static bool reject_tg_check(const struct xt_tgchk_param *par)
+static int reject_tg_check(const struct xt_tgchk_param *par)
{
const struct ipt_reject_info *rejinfo = par->targinfo;
const struct ipt_entry *e = par->entryinfo;
if (rejinfo->with == IPT_ICMP_ECHOREPLY) {
- printk("ipt_REJECT: ECHOREPLY no longer supported.\n");
- return false;
+ pr_info("ECHOREPLY no longer supported.\n");
+ return -EINVAL;
} else if (rejinfo->with == IPT_TCP_RESET) {
/* Must specify that it's a TCP packet */
- if (e->ip.proto != IPPROTO_TCP
- || (e->ip.invflags & XT_INV_PROTO)) {
- printk("ipt_REJECT: TCP_RESET invalid for non-tcp\n");
- return false;
+ if (e->ip.proto != IPPROTO_TCP ||
+ (e->ip.invflags & XT_INV_PROTO)) {
+ pr_info("TCP_RESET invalid for non-tcp\n");
+ return -EINVAL;
}
}
- return true;
+ return 0;
}
static struct xt_target reject_tg_reg __read_mostly = {
diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c
new file mode 100644
index 00000000000..a313c3fbeb4
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_SYNPROXY.c
@@ -0,0 +1,482 @@
+/*
+ * Copyright (c) 2013 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <net/tcp.h>
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_SYNPROXY.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
+#include <net/netfilter/nf_conntrack_synproxy.h>
+
+static struct iphdr *
+synproxy_build_ip(struct sk_buff *skb, u32 saddr, u32 daddr)
+{
+ struct iphdr *iph;
+
+ skb_reset_network_header(skb);
+ iph = (struct iphdr *)skb_put(skb, sizeof(*iph));
+ iph->version = 4;
+ iph->ihl = sizeof(*iph) / 4;
+ iph->tos = 0;
+ iph->id = 0;
+ iph->frag_off = htons(IP_DF);
+ iph->ttl = sysctl_ip_default_ttl;
+ iph->protocol = IPPROTO_TCP;
+ iph->check = 0;
+ iph->saddr = saddr;
+ iph->daddr = daddr;
+
+ return iph;
+}
+
+static void
+synproxy_send_tcp(const struct sk_buff *skb, struct sk_buff *nskb,
+ struct nf_conntrack *nfct, enum ip_conntrack_info ctinfo,
+ struct iphdr *niph, struct tcphdr *nth,
+ unsigned int tcp_hdr_size)
+{
+ nth->check = ~tcp_v4_check(tcp_hdr_size, niph->saddr, niph->daddr, 0);
+ nskb->ip_summed = CHECKSUM_PARTIAL;
+ nskb->csum_start = (unsigned char *)nth - nskb->head;
+ nskb->csum_offset = offsetof(struct tcphdr, check);
+
+ skb_dst_set_noref(nskb, skb_dst(skb));
+ nskb->protocol = htons(ETH_P_IP);
+ if (ip_route_me_harder(nskb, RTN_UNSPEC))
+ goto free_nskb;
+
+ if (nfct) {
+ nskb->nfct = nfct;
+ nskb->nfctinfo = ctinfo;
+ nf_conntrack_get(nfct);
+ }
+
+ ip_local_out(nskb);
+ return;
+
+free_nskb:
+ kfree_skb(nskb);
+}
+
+static void
+synproxy_send_client_synack(const struct sk_buff *skb, const struct tcphdr *th,
+ const struct synproxy_options *opts)
+{
+ struct sk_buff *nskb;
+ struct iphdr *iph, *niph;
+ struct tcphdr *nth;
+ unsigned int tcp_hdr_size;
+ u16 mss = opts->mss;
+
+ iph = ip_hdr(skb);
+
+ tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
+ nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
+ GFP_ATOMIC);
+ if (nskb == NULL)
+ return;
+ skb_reserve(nskb, MAX_TCP_HEADER);
+
+ niph = synproxy_build_ip(nskb, iph->daddr, iph->saddr);
+
+ skb_reset_transport_header(nskb);
+ nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
+ nth->source = th->dest;
+ nth->dest = th->source;
+ nth->seq = htonl(__cookie_v4_init_sequence(iph, th, &mss));
+ nth->ack_seq = htonl(ntohl(th->seq) + 1);
+ tcp_flag_word(nth) = TCP_FLAG_SYN | TCP_FLAG_ACK;
+ if (opts->options & XT_SYNPROXY_OPT_ECN)
+ tcp_flag_word(nth) |= TCP_FLAG_ECE;
+ nth->doff = tcp_hdr_size / 4;
+ nth->window = 0;
+ nth->check = 0;
+ nth->urg_ptr = 0;
+
+ synproxy_build_options(nth, opts);
+
+ synproxy_send_tcp(skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY,
+ niph, nth, tcp_hdr_size);
+}
+
+static void
+synproxy_send_server_syn(const struct synproxy_net *snet,
+ const struct sk_buff *skb, const struct tcphdr *th,
+ const struct synproxy_options *opts, u32 recv_seq)
+{
+ struct sk_buff *nskb;
+ struct iphdr *iph, *niph;
+ struct tcphdr *nth;
+ unsigned int tcp_hdr_size;
+
+ iph = ip_hdr(skb);
+
+ tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
+ nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
+ GFP_ATOMIC);
+ if (nskb == NULL)
+ return;
+ skb_reserve(nskb, MAX_TCP_HEADER);
+
+ niph = synproxy_build_ip(nskb, iph->saddr, iph->daddr);
+
+ skb_reset_transport_header(nskb);
+ nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
+ nth->source = th->source;
+ nth->dest = th->dest;
+ nth->seq = htonl(recv_seq - 1);
+ /* ack_seq is used to relay our ISN to the synproxy hook to initialize
+ * sequence number translation once a connection tracking entry exists.
+ */
+ nth->ack_seq = htonl(ntohl(th->ack_seq) - 1);
+ tcp_flag_word(nth) = TCP_FLAG_SYN;
+ if (opts->options & XT_SYNPROXY_OPT_ECN)
+ tcp_flag_word(nth) |= TCP_FLAG_ECE | TCP_FLAG_CWR;
+ nth->doff = tcp_hdr_size / 4;
+ nth->window = th->window;
+ nth->check = 0;
+ nth->urg_ptr = 0;
+
+ synproxy_build_options(nth, opts);
+
+ synproxy_send_tcp(skb, nskb, &snet->tmpl->ct_general, IP_CT_NEW,
+ niph, nth, tcp_hdr_size);
+}
+
+static void
+synproxy_send_server_ack(const struct synproxy_net *snet,
+ const struct ip_ct_tcp *state,
+ const struct sk_buff *skb, const struct tcphdr *th,
+ const struct synproxy_options *opts)
+{
+ struct sk_buff *nskb;
+ struct iphdr *iph, *niph;
+ struct tcphdr *nth;
+ unsigned int tcp_hdr_size;
+
+ iph = ip_hdr(skb);
+
+ tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
+ nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
+ GFP_ATOMIC);
+ if (nskb == NULL)
+ return;
+ skb_reserve(nskb, MAX_TCP_HEADER);
+
+ niph = synproxy_build_ip(nskb, iph->daddr, iph->saddr);
+
+ skb_reset_transport_header(nskb);
+ nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
+ nth->source = th->dest;
+ nth->dest = th->source;
+ nth->seq = htonl(ntohl(th->ack_seq));
+ nth->ack_seq = htonl(ntohl(th->seq) + 1);
+ tcp_flag_word(nth) = TCP_FLAG_ACK;
+ nth->doff = tcp_hdr_size / 4;
+ nth->window = htons(state->seen[IP_CT_DIR_ORIGINAL].td_maxwin);
+ nth->check = 0;
+ nth->urg_ptr = 0;
+
+ synproxy_build_options(nth, opts);
+
+ synproxy_send_tcp(skb, nskb, NULL, 0, niph, nth, tcp_hdr_size);
+}
+
+static void
+synproxy_send_client_ack(const struct synproxy_net *snet,
+ const struct sk_buff *skb, const struct tcphdr *th,
+ const struct synproxy_options *opts)
+{
+ struct sk_buff *nskb;
+ struct iphdr *iph, *niph;
+ struct tcphdr *nth;
+ unsigned int tcp_hdr_size;
+
+ iph = ip_hdr(skb);
+
+ tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
+ nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
+ GFP_ATOMIC);
+ if (nskb == NULL)
+ return;
+ skb_reserve(nskb, MAX_TCP_HEADER);
+
+ niph = synproxy_build_ip(nskb, iph->saddr, iph->daddr);
+
+ skb_reset_transport_header(nskb);
+ nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
+ nth->source = th->source;
+ nth->dest = th->dest;
+ nth->seq = htonl(ntohl(th->seq) + 1);
+ nth->ack_seq = th->ack_seq;
+ tcp_flag_word(nth) = TCP_FLAG_ACK;
+ nth->doff = tcp_hdr_size / 4;
+ nth->window = ntohs(htons(th->window) >> opts->wscale);
+ nth->check = 0;
+ nth->urg_ptr = 0;
+
+ synproxy_build_options(nth, opts);
+
+ synproxy_send_tcp(skb, nskb, NULL, 0, niph, nth, tcp_hdr_size);
+}
+
+static bool
+synproxy_recv_client_ack(const struct synproxy_net *snet,
+ const struct sk_buff *skb, const struct tcphdr *th,
+ struct synproxy_options *opts, u32 recv_seq)
+{
+ int mss;
+
+ mss = __cookie_v4_check(ip_hdr(skb), th, ntohl(th->ack_seq) - 1);
+ if (mss == 0) {
+ this_cpu_inc(snet->stats->cookie_invalid);
+ return false;
+ }
+
+ this_cpu_inc(snet->stats->cookie_valid);
+ opts->mss = mss;
+ opts->options |= XT_SYNPROXY_OPT_MSS;
+
+ if (opts->options & XT_SYNPROXY_OPT_TIMESTAMP)
+ synproxy_check_timestamp_cookie(opts);
+
+ synproxy_send_server_syn(snet, skb, th, opts, recv_seq);
+ return true;
+}
+
+static unsigned int
+synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par)
+{
+ const struct xt_synproxy_info *info = par->targinfo;
+ struct synproxy_net *snet = synproxy_pernet(dev_net(par->in));
+ struct synproxy_options opts = {};
+ struct tcphdr *th, _th;
+
+ if (nf_ip_checksum(skb, par->hooknum, par->thoff, IPPROTO_TCP))
+ return NF_DROP;
+
+ th = skb_header_pointer(skb, par->thoff, sizeof(_th), &_th);
+ if (th == NULL)
+ return NF_DROP;
+
+ if (!synproxy_parse_options(skb, par->thoff, th, &opts))
+ return NF_DROP;
+
+ if (th->syn && !(th->ack || th->fin || th->rst)) {
+ /* Initial SYN from client */
+ this_cpu_inc(snet->stats->syn_received);
+
+ if (th->ece && th->cwr)
+ opts.options |= XT_SYNPROXY_OPT_ECN;
+
+ opts.options &= info->options;
+ if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
+ synproxy_init_timestamp_cookie(info, &opts);
+ else
+ opts.options &= ~(XT_SYNPROXY_OPT_WSCALE |
+ XT_SYNPROXY_OPT_SACK_PERM |
+ XT_SYNPROXY_OPT_ECN);
+
+ synproxy_send_client_synack(skb, th, &opts);
+ return NF_DROP;
+
+ } else if (th->ack && !(th->fin || th->rst || th->syn)) {
+ /* ACK from client */
+ synproxy_recv_client_ack(snet, skb, th, &opts, ntohl(th->seq));
+ return NF_DROP;
+ }
+
+ return XT_CONTINUE;
+}
+
+static unsigned int ipv4_synproxy_hook(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ struct synproxy_net *snet = synproxy_pernet(dev_net(in ? : out));
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct;
+ struct nf_conn_synproxy *synproxy;
+ struct synproxy_options opts = {};
+ const struct ip_ct_tcp *state;
+ struct tcphdr *th, _th;
+ unsigned int thoff;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ if (ct == NULL)
+ return NF_ACCEPT;
+
+ synproxy = nfct_synproxy(ct);
+ if (synproxy == NULL)
+ return NF_ACCEPT;
+
+ if (nf_is_loopback_packet(skb))
+ return NF_ACCEPT;
+
+ thoff = ip_hdrlen(skb);
+ th = skb_header_pointer(skb, thoff, sizeof(_th), &_th);
+ if (th == NULL)
+ return NF_DROP;
+
+ state = &ct->proto.tcp;
+ switch (state->state) {
+ case TCP_CONNTRACK_CLOSE:
+ if (th->rst && !test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
+ nf_ct_seqadj_init(ct, ctinfo, synproxy->isn -
+ ntohl(th->seq) + 1);
+ break;
+ }
+
+ if (!th->syn || th->ack ||
+ CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
+ break;
+
+ /* Reopened connection - reset the sequence number and timestamp
+ * adjustments, they will get initialized once the connection is
+ * reestablished.
+ */
+ nf_ct_seqadj_init(ct, ctinfo, 0);
+ synproxy->tsoff = 0;
+ this_cpu_inc(snet->stats->conn_reopened);
+
+ /* fall through */
+ case TCP_CONNTRACK_SYN_SENT:
+ if (!synproxy_parse_options(skb, thoff, th, &opts))
+ return NF_DROP;
+
+ if (!th->syn && th->ack &&
+ CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) {
+ /* Keep-Alives are sent with SEG.SEQ = SND.NXT-1,
+ * therefore we need to add 1 to make the SYN sequence
+ * number match the one of first SYN.
+ */
+ if (synproxy_recv_client_ack(snet, skb, th, &opts,
+ ntohl(th->seq) + 1))
+ this_cpu_inc(snet->stats->cookie_retrans);
+
+ return NF_DROP;
+ }
+
+ synproxy->isn = ntohl(th->ack_seq);
+ if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
+ synproxy->its = opts.tsecr;
+ break;
+ case TCP_CONNTRACK_SYN_RECV:
+ if (!th->syn || !th->ack)
+ break;
+
+ if (!synproxy_parse_options(skb, thoff, th, &opts))
+ return NF_DROP;
+
+ if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
+ synproxy->tsoff = opts.tsval - synproxy->its;
+
+ opts.options &= ~(XT_SYNPROXY_OPT_MSS |
+ XT_SYNPROXY_OPT_WSCALE |
+ XT_SYNPROXY_OPT_SACK_PERM);
+
+ swap(opts.tsval, opts.tsecr);
+ synproxy_send_server_ack(snet, state, skb, th, &opts);
+
+ nf_ct_seqadj_init(ct, ctinfo, synproxy->isn - ntohl(th->seq));
+
+ swap(opts.tsval, opts.tsecr);
+ synproxy_send_client_ack(snet, skb, th, &opts);
+
+ consume_skb(skb);
+ return NF_STOLEN;
+ default:
+ break;
+ }
+
+ synproxy_tstamp_adjust(skb, thoff, th, ct, ctinfo, synproxy);
+ return NF_ACCEPT;
+}
+
+static int synproxy_tg4_check(const struct xt_tgchk_param *par)
+{
+ const struct ipt_entry *e = par->entryinfo;
+
+ if (e->ip.proto != IPPROTO_TCP ||
+ e->ip.invflags & XT_INV_PROTO)
+ return -EINVAL;
+
+ return nf_ct_l3proto_try_module_get(par->family);
+}
+
+static void synproxy_tg4_destroy(const struct xt_tgdtor_param *par)
+{
+ nf_ct_l3proto_module_put(par->family);
+}
+
+static struct xt_target synproxy_tg4_reg __read_mostly = {
+ .name = "SYNPROXY",
+ .family = NFPROTO_IPV4,
+ .hooks = (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_FORWARD),
+ .target = synproxy_tg4,
+ .targetsize = sizeof(struct xt_synproxy_info),
+ .checkentry = synproxy_tg4_check,
+ .destroy = synproxy_tg4_destroy,
+ .me = THIS_MODULE,
+};
+
+static struct nf_hook_ops ipv4_synproxy_ops[] __read_mostly = {
+ {
+ .hook = ipv4_synproxy_hook,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV4,
+ .hooknum = NF_INET_LOCAL_IN,
+ .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1,
+ },
+ {
+ .hook = ipv4_synproxy_hook,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV4,
+ .hooknum = NF_INET_POST_ROUTING,
+ .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1,
+ },
+};
+
+static int __init synproxy_tg4_init(void)
+{
+ int err;
+
+ err = nf_register_hooks(ipv4_synproxy_ops,
+ ARRAY_SIZE(ipv4_synproxy_ops));
+ if (err < 0)
+ goto err1;
+
+ err = xt_register_target(&synproxy_tg4_reg);
+ if (err < 0)
+ goto err2;
+
+ return 0;
+
+err2:
+ nf_unregister_hooks(ipv4_synproxy_ops, ARRAY_SIZE(ipv4_synproxy_ops));
+err1:
+ return err;
+}
+
+static void __exit synproxy_tg4_exit(void)
+{
+ xt_unregister_target(&synproxy_tg4_reg);
+ nf_unregister_hooks(ipv4_synproxy_ops, ARRAY_SIZE(ipv4_synproxy_ops));
+}
+
+module_init(synproxy_tg4_init);
+module_exit(synproxy_tg4_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c
deleted file mode 100644
index 6d76aae90cc..00000000000
--- a/net/ipv4/netfilter/ipt_TTL.c
+++ /dev/null
@@ -1,97 +0,0 @@
-/* TTL modification target for IP tables
- * (C) 2000,2005 by Harald Welte <laforge@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/ip.h>
-#include <net/checksum.h>
-
-#include <linux/netfilter/x_tables.h>
-#include <linux/netfilter_ipv4/ipt_TTL.h>
-
-MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
-MODULE_DESCRIPTION("Xtables: IPv4 TTL field modification target");
-MODULE_LICENSE("GPL");
-
-static unsigned int
-ttl_tg(struct sk_buff *skb, const struct xt_target_param *par)
-{
- struct iphdr *iph;
- const struct ipt_TTL_info *info = par->targinfo;
- int new_ttl;
-
- if (!skb_make_writable(skb, skb->len))
- return NF_DROP;
-
- iph = ip_hdr(skb);
-
- switch (info->mode) {
- case IPT_TTL_SET:
- new_ttl = info->ttl;
- break;
- case IPT_TTL_INC:
- new_ttl = iph->ttl + info->ttl;
- if (new_ttl > 255)
- new_ttl = 255;
- break;
- case IPT_TTL_DEC:
- new_ttl = iph->ttl - info->ttl;
- if (new_ttl < 0)
- new_ttl = 0;
- break;
- default:
- new_ttl = iph->ttl;
- break;
- }
-
- if (new_ttl != iph->ttl) {
- csum_replace2(&iph->check, htons(iph->ttl << 8),
- htons(new_ttl << 8));
- iph->ttl = new_ttl;
- }
-
- return XT_CONTINUE;
-}
-
-static bool ttl_tg_check(const struct xt_tgchk_param *par)
-{
- const struct ipt_TTL_info *info = par->targinfo;
-
- if (info->mode > IPT_TTL_MAXMODE) {
- printk(KERN_WARNING "ipt_TTL: invalid or unknown Mode %u\n",
- info->mode);
- return false;
- }
- if (info->mode != IPT_TTL_SET && info->ttl == 0)
- return false;
- return true;
-}
-
-static struct xt_target ttl_tg_reg __read_mostly = {
- .name = "TTL",
- .family = NFPROTO_IPV4,
- .target = ttl_tg,
- .targetsize = sizeof(struct ipt_TTL_info),
- .table = "mangle",
- .checkentry = ttl_tg_check,
- .me = THIS_MODULE,
-};
-
-static int __init ttl_tg_init(void)
-{
- return xt_register_target(&ttl_tg_reg);
-}
-
-static void __exit ttl_tg_exit(void)
-{
- xt_unregister_target(&ttl_tg_reg);
-}
-
-module_init(ttl_tg_init);
-module_exit(ttl_tg_exit);
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index 18a2826b57c..9cb993cd224 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -4,6 +4,7 @@
* (C) 2000-2004 by Harald Welte <laforge@netfilter.org>
* (C) 1999-2001 Paul `Rusty' Russell
* (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2005-2007 Patrick McHardy <kaber@trash.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -29,14 +30,15 @@
* Specify, after how many hundredths of a second the queue should be
* flushed even if it is not full yet.
*/
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
#include <linux/spinlock.h>
#include <linux/socket.h>
+#include <linux/slab.h>
#include <linux/skbuff.h>
#include <linux/kernel.h>
#include <linux/timer.h>
-#include <linux/netlink.h>
+#include <net/netlink.h>
#include <linux/netdevice.h>
#include <linux/mm.h>
#include <linux/moduleparam.h>
@@ -44,6 +46,7 @@
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter_ipv4/ipt_ULOG.h>
#include <net/netfilter/nf_log.h>
+#include <net/netns/generic.h>
#include <net/sock.h>
#include <linux/bitops.h>
#include <asm/unaligned.h>
@@ -56,8 +59,6 @@ MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NFLOG);
#define ULOG_NL_EVENT 111 /* Harald's favorite number */
#define ULOG_MAXNLGROUPS 32 /* numer of nlgroups */
-#define PRINTR(format, args...) do { if (net_ratelimit()) printk(format , ## args); } while (0)
-
static unsigned int nlbufsiz = NLMSG_GOODSIZE;
module_param(nlbufsiz, uint, 0400);
MODULE_PARM_DESC(nlbufsiz, "netlink buffer size");
@@ -66,7 +67,7 @@ static unsigned int flushtimeout = 10;
module_param(flushtimeout, uint, 0600);
MODULE_PARM_DESC(flushtimeout, "buffer flush timeout (hundredths of a second)");
-static int nflog = 1;
+static bool nflog = true;
module_param(nflog, bool, 0400);
MODULE_PARM_DESC(nflog, "register as internal netfilter logging module");
@@ -79,23 +80,29 @@ typedef struct {
struct timer_list timer; /* the timer function */
} ulog_buff_t;
-static ulog_buff_t ulog_buffers[ULOG_MAXNLGROUPS]; /* array of buffers */
+static int ulog_net_id __read_mostly;
+struct ulog_net {
+ unsigned int nlgroup[ULOG_MAXNLGROUPS];
+ ulog_buff_t ulog_buffers[ULOG_MAXNLGROUPS];
+ struct sock *nflognl;
+ spinlock_t lock;
+};
-static struct sock *nflognl; /* our socket */
-static DEFINE_SPINLOCK(ulog_lock); /* spinlock */
+static struct ulog_net *ulog_pernet(struct net *net)
+{
+ return net_generic(net, ulog_net_id);
+}
/* send one ulog_buff_t to userspace */
-static void ulog_send(unsigned int nlgroupnum)
+static void ulog_send(struct ulog_net *ulog, unsigned int nlgroupnum)
{
- ulog_buff_t *ub = &ulog_buffers[nlgroupnum];
+ ulog_buff_t *ub = &ulog->ulog_buffers[nlgroupnum];
- if (timer_pending(&ub->timer)) {
- pr_debug("ipt_ULOG: ulog_send: timer was pending, deleting\n");
- del_timer(&ub->timer);
- }
+ pr_debug("ulog_send: timer is deleting\n");
+ del_timer(&ub->timer);
if (!ub->skb) {
- pr_debug("ipt_ULOG: ulog_send: nothing to send\n");
+ pr_debug("ulog_send: nothing to send\n");
return;
}
@@ -104,9 +111,10 @@ static void ulog_send(unsigned int nlgroupnum)
ub->lastnlh->nlmsg_type = NLMSG_DONE;
NETLINK_CB(ub->skb).dst_group = nlgroupnum + 1;
- pr_debug("ipt_ULOG: throwing %d packets to netlink group %u\n",
+ pr_debug("throwing %d packets to netlink group %u\n",
ub->qlen, nlgroupnum + 1);
- netlink_broadcast(nflognl, ub->skb, 0, nlgroupnum + 1, GFP_ATOMIC);
+ netlink_broadcast(ulog->nflognl, ub->skb, 0, nlgroupnum + 1,
+ GFP_ATOMIC);
ub->qlen = 0;
ub->skb = NULL;
@@ -117,13 +125,17 @@ static void ulog_send(unsigned int nlgroupnum)
/* timer function to flush queue in flushtimeout time */
static void ulog_timer(unsigned long data)
{
- pr_debug("ipt_ULOG: timer function called, calling ulog_send\n");
+ unsigned int groupnum = *((unsigned int *)data);
+ struct ulog_net *ulog = container_of((void *)data,
+ struct ulog_net,
+ nlgroup[groupnum]);
+ pr_debug("timer function called, calling ulog_send\n");
/* lock to protect against somebody modifying our structure
* from ipt_ulog_target at the same time */
- spin_lock_bh(&ulog_lock);
- ulog_send(data);
- spin_unlock_bh(&ulog_lock);
+ spin_lock_bh(&ulog->lock);
+ ulog_send(ulog, groupnum);
+ spin_unlock_bh(&ulog->lock);
}
static struct sk_buff *ulog_alloc_skb(unsigned int size)
@@ -136,25 +148,23 @@ static struct sk_buff *ulog_alloc_skb(unsigned int size)
* due to slab allocator restrictions */
n = max(size, nlbufsiz);
- skb = alloc_skb(n, GFP_ATOMIC);
+ skb = alloc_skb(n, GFP_ATOMIC | __GFP_NOWARN);
if (!skb) {
- PRINTR("ipt_ULOG: can't alloc whole buffer %ub!\n", n);
-
if (n > size) {
/* try to allocate only as much as we need for
* current packet */
skb = alloc_skb(size, GFP_ATOMIC);
if (!skb)
- PRINTR("ipt_ULOG: can't even allocate %ub\n",
- size);
+ pr_debug("cannot even allocate %ub\n", size);
}
}
return skb;
}
-static void ipt_ulog_packet(unsigned int hooknum,
+static void ipt_ulog_packet(struct net *net,
+ unsigned int hooknum,
const struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
@@ -166,6 +176,7 @@ static void ipt_ulog_packet(unsigned int hooknum,
size_t size, copy_len;
struct nlmsghdr *nlh;
struct timeval tv;
+ struct ulog_net *ulog = ulog_pernet(net);
/* ffs == find first bit set, necessary because userspace
* is already shifting groupnumber, but we need unshifted.
@@ -178,11 +189,11 @@ static void ipt_ulog_packet(unsigned int hooknum,
else
copy_len = loginfo->copy_range;
- size = NLMSG_SPACE(sizeof(*pm) + copy_len);
+ size = nlmsg_total_size(sizeof(*pm) + copy_len);
- ub = &ulog_buffers[groupnum];
+ ub = &ulog->ulog_buffers[groupnum];
- spin_lock_bh(&ulog_lock);
+ spin_lock_bh(&ulog->lock);
if (!ub->skb) {
if (!(ub->skb = ulog_alloc_skb(size)))
@@ -192,21 +203,24 @@ static void ipt_ulog_packet(unsigned int hooknum,
/* either the queue len is too high or we don't have
* enough room in nlskb left. send it to userspace. */
- ulog_send(groupnum);
+ ulog_send(ulog, groupnum);
if (!(ub->skb = ulog_alloc_skb(size)))
goto alloc_failure;
}
- pr_debug("ipt_ULOG: qlen %d, qthreshold %Zu\n", ub->qlen,
- loginfo->qthreshold);
+ pr_debug("qlen %d, qthreshold %Zu\n", ub->qlen, loginfo->qthreshold);
- /* NLMSG_PUT contains a hidden goto nlmsg_failure !!! */
- nlh = NLMSG_PUT(ub->skb, 0, ub->qlen, ULOG_NL_EVENT,
- sizeof(*pm)+copy_len);
+ nlh = nlmsg_put(ub->skb, 0, ub->qlen, ULOG_NL_EVENT,
+ sizeof(*pm)+copy_len, 0);
+ if (!nlh) {
+ pr_debug("error during nlmsg_put\n");
+ goto out_unlock;
+ }
ub->qlen++;
- pm = NLMSG_DATA(nlh);
+ pm = nlmsg_data(nlh);
+ memset(pm, 0, sizeof(*pm));
/* We might not have a timestamp, get one */
if (skb->tstamp.tv64 == 0)
@@ -219,16 +233,16 @@ static void ipt_ulog_packet(unsigned int hooknum,
put_unaligned(tv.tv_usec, &pm->timestamp_usec);
put_unaligned(skb->mark, &pm->mark);
pm->hook = hooknum;
- if (prefix != NULL)
- strncpy(pm->prefix, prefix, sizeof(pm->prefix));
+ if (prefix != NULL) {
+ strncpy(pm->prefix, prefix, sizeof(pm->prefix) - 1);
+ pm->prefix[sizeof(pm->prefix) - 1] = '\0';
+ }
else if (loginfo->prefix[0] != '\0')
strncpy(pm->prefix, loginfo->prefix, sizeof(pm->prefix));
- else
- *(pm->prefix) = '\0';
- if (in && in->hard_header_len > 0
- && skb->mac_header != skb->network_header
- && in->hard_header_len <= ULOG_MAC_LEN) {
+ if (in && in->hard_header_len > 0 &&
+ skb->mac_header != skb->network_header &&
+ in->hard_header_len <= ULOG_MAC_LEN) {
memcpy(pm->mac, skb_mac_header(skb), in->hard_header_len);
pm->mac_len = in->hard_header_len;
} else
@@ -236,13 +250,9 @@ static void ipt_ulog_packet(unsigned int hooknum,
if (in)
strncpy(pm->indev_name, in->name, sizeof(pm->indev_name));
- else
- pm->indev_name[0] = '\0';
if (out)
strncpy(pm->outdev_name, out->name, sizeof(pm->outdev_name));
- else
- pm->outdev_name[0] = '\0';
/* copy_len <= skb->len, so can't fail. */
if (skb_copy_bits(skb, 0, pm->payload, copy_len) < 0)
@@ -264,31 +274,30 @@ static void ipt_ulog_packet(unsigned int hooknum,
if (ub->qlen >= loginfo->qthreshold) {
if (loginfo->qthreshold > 1)
nlh->nlmsg_type = NLMSG_DONE;
- ulog_send(groupnum);
+ ulog_send(ulog, groupnum);
}
-
- spin_unlock_bh(&ulog_lock);
+out_unlock:
+ spin_unlock_bh(&ulog->lock);
return;
-nlmsg_failure:
- PRINTR("ipt_ULOG: error during NLMSG_PUT\n");
-
alloc_failure:
- PRINTR("ipt_ULOG: Error building netlink message\n");
-
- spin_unlock_bh(&ulog_lock);
+ pr_debug("Error building netlink message\n");
+ spin_unlock_bh(&ulog->lock);
}
static unsigned int
-ulog_tg(struct sk_buff *skb, const struct xt_target_param *par)
+ulog_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
- ipt_ulog_packet(par->hooknum, skb, par->in, par->out,
+ struct net *net = dev_net(par->in ? par->in : par->out);
+
+ ipt_ulog_packet(net, par->hooknum, skb, par->in, par->out,
par->targinfo, NULL);
return XT_CONTINUE;
}
-static void ipt_logfn(u_int8_t pf,
+static void ipt_logfn(struct net *net,
+ u_int8_t pf,
unsigned int hooknum,
const struct sk_buff *skb,
const struct net_device *in,
@@ -310,24 +319,29 @@ static void ipt_logfn(u_int8_t pf,
strlcpy(loginfo.prefix, prefix, sizeof(loginfo.prefix));
}
- ipt_ulog_packet(hooknum, skb, in, out, &loginfo, prefix);
+ ipt_ulog_packet(net, hooknum, skb, in, out, &loginfo, prefix);
}
-static bool ulog_tg_check(const struct xt_tgchk_param *par)
+static int ulog_tg_check(const struct xt_tgchk_param *par)
{
const struct ipt_ulog_info *loginfo = par->targinfo;
+ if (!par->net->xt.ulog_warn_deprecated) {
+ pr_info("ULOG is deprecated and it will be removed soon, "
+ "use NFLOG instead\n");
+ par->net->xt.ulog_warn_deprecated = true;
+ }
+
if (loginfo->prefix[sizeof(loginfo->prefix) - 1] != '\0') {
- pr_debug("ipt_ULOG: prefix term %i\n",
- loginfo->prefix[sizeof(loginfo->prefix) - 1]);
- return false;
+ pr_debug("prefix not null-terminated\n");
+ return -EINVAL;
}
if (loginfo->qthreshold > ULOG_MAX_QLEN) {
- pr_debug("ipt_ULOG: queue threshold %Zu > MAX_QLEN\n",
+ pr_debug("queue threshold %Zu > MAX_QLEN\n",
loginfo->qthreshold);
- return false;
+ return -EINVAL;
}
- return true;
+ return 0;
}
#ifdef CONFIG_COMPAT
@@ -338,7 +352,7 @@ struct compat_ipt_ulog_info {
char prefix[ULOG_PREFIX_LEN];
};
-static void ulog_tg_compat_from_user(void *dst, void *src)
+static void ulog_tg_compat_from_user(void *dst, const void *src)
{
const struct compat_ipt_ulog_info *cl = src;
struct ipt_ulog_info l = {
@@ -351,7 +365,7 @@ static void ulog_tg_compat_from_user(void *dst, void *src)
memcpy(dst, &l, sizeof(l));
}
-static int ulog_tg_compat_to_user(void __user *dst, void *src)
+static int ulog_tg_compat_to_user(void __user *dst, const void *src)
{
const struct ipt_ulog_info *l = src;
struct compat_ipt_ulog_info cl = {
@@ -379,63 +393,54 @@ static struct xt_target ulog_tg_reg __read_mostly = {
.me = THIS_MODULE,
};
-static struct nf_logger ipt_ulog_logger = {
+static struct nf_logger ipt_ulog_logger __read_mostly = {
.name = "ipt_ULOG",
.logfn = ipt_logfn,
.me = THIS_MODULE,
};
-static int __init ulog_tg_init(void)
+static int __net_init ulog_tg_net_init(struct net *net)
{
- int ret, i;
-
- pr_debug("ipt_ULOG: init module\n");
-
- if (nlbufsiz > 128*1024) {
- printk("Netlink buffer has to be <= 128kB\n");
- return -EINVAL;
- }
+ int i;
+ struct ulog_net *ulog = ulog_pernet(net);
+ struct netlink_kernel_cfg cfg = {
+ .groups = ULOG_MAXNLGROUPS,
+ };
+ spin_lock_init(&ulog->lock);
/* initialize ulog_buffers */
- for (i = 0; i < ULOG_MAXNLGROUPS; i++)
- setup_timer(&ulog_buffers[i].timer, ulog_timer, i);
+ for (i = 0; i < ULOG_MAXNLGROUPS; i++) {
+ ulog->nlgroup[i] = i;
+ setup_timer(&ulog->ulog_buffers[i].timer, ulog_timer,
+ (unsigned long)&ulog->nlgroup[i]);
+ }
- nflognl = netlink_kernel_create(&init_net,
- NETLINK_NFLOG, ULOG_MAXNLGROUPS, NULL,
- NULL, THIS_MODULE);
- if (!nflognl)
+ ulog->nflognl = netlink_kernel_create(net, NETLINK_NFLOG, &cfg);
+ if (!ulog->nflognl)
return -ENOMEM;
- ret = xt_register_target(&ulog_tg_reg);
- if (ret < 0) {
- netlink_kernel_release(nflognl);
- return ret;
- }
if (nflog)
- nf_log_register(NFPROTO_IPV4, &ipt_ulog_logger);
+ nf_log_set(net, NFPROTO_IPV4, &ipt_ulog_logger);
return 0;
}
-static void __exit ulog_tg_exit(void)
+static void __net_exit ulog_tg_net_exit(struct net *net)
{
ulog_buff_t *ub;
int i;
-
- pr_debug("ipt_ULOG: cleanup_module\n");
+ struct ulog_net *ulog = ulog_pernet(net);
if (nflog)
- nf_log_unregister(&ipt_ulog_logger);
- xt_unregister_target(&ulog_tg_reg);
- netlink_kernel_release(nflognl);
+ nf_log_unset(net, &ipt_ulog_logger);
+
+ netlink_kernel_release(ulog->nflognl);
/* remove pending timers and free allocated skb's */
for (i = 0; i < ULOG_MAXNLGROUPS; i++) {
- ub = &ulog_buffers[i];
- if (timer_pending(&ub->timer)) {
- pr_debug("timer was pending, deleting\n");
- del_timer(&ub->timer);
- }
+ ub = &ulog->ulog_buffers[i];
+ pr_debug("timer is deleting\n");
+ del_timer(&ub->timer);
if (ub->skb) {
kfree_skb(ub->skb);
@@ -444,5 +449,50 @@ static void __exit ulog_tg_exit(void)
}
}
+static struct pernet_operations ulog_tg_net_ops = {
+ .init = ulog_tg_net_init,
+ .exit = ulog_tg_net_exit,
+ .id = &ulog_net_id,
+ .size = sizeof(struct ulog_net),
+};
+
+static int __init ulog_tg_init(void)
+{
+ int ret;
+ pr_debug("init module\n");
+
+ if (nlbufsiz > 128*1024) {
+ pr_warn("Netlink buffer has to be <= 128kB\n");
+ return -EINVAL;
+ }
+
+ ret = register_pernet_subsys(&ulog_tg_net_ops);
+ if (ret)
+ goto out_pernet;
+
+ ret = xt_register_target(&ulog_tg_reg);
+ if (ret < 0)
+ goto out_target;
+
+ if (nflog)
+ nf_log_register(NFPROTO_IPV4, &ipt_ulog_logger);
+
+ return 0;
+
+out_target:
+ unregister_pernet_subsys(&ulog_tg_net_ops);
+out_pernet:
+ return ret;
+}
+
+static void __exit ulog_tg_exit(void)
+{
+ pr_debug("cleanup_module\n");
+ if (nflog)
+ nf_log_unregister(&ipt_ulog_logger);
+ xt_unregister_target(&ulog_tg_reg);
+ unregister_pernet_subsys(&ulog_tg_net_ops);
+}
+
module_init(ulog_tg_init);
module_exit(ulog_tg_exit);
diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c
deleted file mode 100644
index 3b216be3bc9..00000000000
--- a/net/ipv4/netfilter/ipt_addrtype.c
+++ /dev/null
@@ -1,134 +0,0 @@
-/*
- * iptables module to match inet_addr_type() of an ip.
- *
- * Copyright (c) 2004 Patrick McHardy <kaber@trash.net>
- * (C) 2007 Laszlo Attila Toth <panther@balabit.hu>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/netdevice.h>
-#include <linux/ip.h>
-#include <net/route.h>
-
-#include <linux/netfilter_ipv4/ipt_addrtype.h>
-#include <linux/netfilter/x_tables.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_DESCRIPTION("Xtables: address type match for IPv4");
-
-static inline bool match_type(struct net *net, const struct net_device *dev,
- __be32 addr, u_int16_t mask)
-{
- return !!(mask & (1 << inet_dev_addr_type(net, dev, addr)));
-}
-
-static bool
-addrtype_mt_v0(const struct sk_buff *skb, const struct xt_match_param *par)
-{
- struct net *net = dev_net(par->in ? par->in : par->out);
- const struct ipt_addrtype_info *info = par->matchinfo;
- const struct iphdr *iph = ip_hdr(skb);
- bool ret = true;
-
- if (info->source)
- ret &= match_type(net, NULL, iph->saddr, info->source) ^
- info->invert_source;
- if (info->dest)
- ret &= match_type(net, NULL, iph->daddr, info->dest) ^
- info->invert_dest;
-
- return ret;
-}
-
-static bool
-addrtype_mt_v1(const struct sk_buff *skb, const struct xt_match_param *par)
-{
- struct net *net = dev_net(par->in ? par->in : par->out);
- const struct ipt_addrtype_info_v1 *info = par->matchinfo;
- const struct iphdr *iph = ip_hdr(skb);
- const struct net_device *dev = NULL;
- bool ret = true;
-
- if (info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN)
- dev = par->in;
- else if (info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT)
- dev = par->out;
-
- if (info->source)
- ret &= match_type(net, dev, iph->saddr, info->source) ^
- (info->flags & IPT_ADDRTYPE_INVERT_SOURCE);
- if (ret && info->dest)
- ret &= match_type(net, dev, iph->daddr, info->dest) ^
- !!(info->flags & IPT_ADDRTYPE_INVERT_DEST);
- return ret;
-}
-
-static bool addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par)
-{
- struct ipt_addrtype_info_v1 *info = par->matchinfo;
-
- if (info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN &&
- info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT) {
- printk(KERN_ERR "ipt_addrtype: both incoming and outgoing "
- "interface limitation cannot be selected\n");
- return false;
- }
-
- if (par->hook_mask & ((1 << NF_INET_PRE_ROUTING) |
- (1 << NF_INET_LOCAL_IN)) &&
- info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT) {
- printk(KERN_ERR "ipt_addrtype: output interface limitation "
- "not valid in PRE_ROUTING and INPUT\n");
- return false;
- }
-
- if (par->hook_mask & ((1 << NF_INET_POST_ROUTING) |
- (1 << NF_INET_LOCAL_OUT)) &&
- info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN) {
- printk(KERN_ERR "ipt_addrtype: input interface limitation "
- "not valid in POST_ROUTING and OUTPUT\n");
- return false;
- }
-
- return true;
-}
-
-static struct xt_match addrtype_mt_reg[] __read_mostly = {
- {
- .name = "addrtype",
- .family = NFPROTO_IPV4,
- .match = addrtype_mt_v0,
- .matchsize = sizeof(struct ipt_addrtype_info),
- .me = THIS_MODULE
- },
- {
- .name = "addrtype",
- .family = NFPROTO_IPV4,
- .revision = 1,
- .match = addrtype_mt_v1,
- .checkentry = addrtype_mt_checkentry_v1,
- .matchsize = sizeof(struct ipt_addrtype_info_v1),
- .me = THIS_MODULE
- }
-};
-
-static int __init addrtype_mt_init(void)
-{
- return xt_register_matches(addrtype_mt_reg,
- ARRAY_SIZE(addrtype_mt_reg));
-}
-
-static void __exit addrtype_mt_exit(void)
-{
- xt_unregister_matches(addrtype_mt_reg, ARRAY_SIZE(addrtype_mt_reg));
-}
-
-module_init(addrtype_mt_init);
-module_exit(addrtype_mt_exit);
diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c
index 0104c0b399d..14a2aa8b8a1 100644
--- a/net/ipv4/netfilter/ipt_ah.c
+++ b/net/ipv4/netfilter/ipt_ah.c
@@ -5,7 +5,7 @@
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/in.h>
#include <linux/module.h>
#include <linux/skbuff.h>
@@ -18,25 +18,19 @@ MODULE_LICENSE("GPL");
MODULE_AUTHOR("Yon Uriarte <yon@astaro.de>");
MODULE_DESCRIPTION("Xtables: IPv4 IPsec-AH SPI match");
-#ifdef DEBUG_CONNTRACK
-#define duprintf(format, args...) printk(format , ## args)
-#else
-#define duprintf(format, args...)
-#endif
-
/* Returns 1 if the spi is matched by the range, 0 otherwise */
static inline bool
spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert)
{
bool r;
- duprintf("ah spi_match:%c 0x%x <= 0x%x <= 0x%x",invert? '!':' ',
- min,spi,max);
+ pr_debug("spi_match:%c 0x%x <= 0x%x <= 0x%x\n",
+ invert ? '!' : ' ', min, spi, max);
r=(spi >= min && spi <= max) ^ invert;
- duprintf(" result %s\n",r? "PASS" : "FAILED");
+ pr_debug(" result %s\n", r ? "PASS" : "FAILED");
return r;
}
-static bool ah_mt(const struct sk_buff *skb, const struct xt_match_param *par)
+static bool ah_mt(const struct sk_buff *skb, struct xt_action_param *par)
{
struct ip_auth_hdr _ahdr;
const struct ip_auth_hdr *ah;
@@ -51,8 +45,8 @@ static bool ah_mt(const struct sk_buff *skb, const struct xt_match_param *par)
/* We've been asked to examine this packet, and we
* can't. Hence, no choice but to drop.
*/
- duprintf("Dropping evil AH tinygram.\n");
- *par->hotdrop = true;
+ pr_debug("Dropping evil AH tinygram.\n");
+ par->hotdrop = true;
return 0;
}
@@ -61,16 +55,16 @@ static bool ah_mt(const struct sk_buff *skb, const struct xt_match_param *par)
!!(ahinfo->invflags & IPT_AH_INV_SPI));
}
-static bool ah_mt_check(const struct xt_mtchk_param *par)
+static int ah_mt_check(const struct xt_mtchk_param *par)
{
const struct ipt_ah *ahinfo = par->matchinfo;
/* Must specify no unknown invflags */
if (ahinfo->invflags & ~IPT_AH_INV_MASK) {
- duprintf("ipt_ah: unknown flags %X\n", ahinfo->invflags);
- return false;
+ pr_debug("unknown flags %X\n", ahinfo->invflags);
+ return -EINVAL;
}
- return true;
+ return 0;
}
static struct xt_match ah_mt_reg __read_mostly = {
diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c
deleted file mode 100644
index 6289b64144c..00000000000
--- a/net/ipv4/netfilter/ipt_ecn.c
+++ /dev/null
@@ -1,129 +0,0 @@
-/* IP tables module for matching the value of the IPv4 and TCP ECN bits
- *
- * (C) 2002 by Harald Welte <laforge@gnumonks.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <net/ip.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/tcp.h>
-
-#include <linux/netfilter/x_tables.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ipt_ecn.h>
-
-MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
-MODULE_DESCRIPTION("Xtables: Explicit Congestion Notification (ECN) flag match for IPv4");
-MODULE_LICENSE("GPL");
-
-static inline bool match_ip(const struct sk_buff *skb,
- const struct ipt_ecn_info *einfo)
-{
- return (ip_hdr(skb)->tos & IPT_ECN_IP_MASK) == einfo->ip_ect;
-}
-
-static inline bool match_tcp(const struct sk_buff *skb,
- const struct ipt_ecn_info *einfo,
- bool *hotdrop)
-{
- struct tcphdr _tcph;
- const struct tcphdr *th;
-
- /* In practice, TCP match does this, so can't fail. But let's
- * be good citizens.
- */
- th = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
- if (th == NULL) {
- *hotdrop = false;
- return false;
- }
-
- if (einfo->operation & IPT_ECN_OP_MATCH_ECE) {
- if (einfo->invert & IPT_ECN_OP_MATCH_ECE) {
- if (th->ece == 1)
- return false;
- } else {
- if (th->ece == 0)
- return false;
- }
- }
-
- if (einfo->operation & IPT_ECN_OP_MATCH_CWR) {
- if (einfo->invert & IPT_ECN_OP_MATCH_CWR) {
- if (th->cwr == 1)
- return false;
- } else {
- if (th->cwr == 0)
- return false;
- }
- }
-
- return true;
-}
-
-static bool ecn_mt(const struct sk_buff *skb, const struct xt_match_param *par)
-{
- const struct ipt_ecn_info *info = par->matchinfo;
-
- if (info->operation & IPT_ECN_OP_MATCH_IP)
- if (!match_ip(skb, info))
- return false;
-
- if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR)) {
- if (ip_hdr(skb)->protocol != IPPROTO_TCP)
- return false;
- if (!match_tcp(skb, info, par->hotdrop))
- return false;
- }
-
- return true;
-}
-
-static bool ecn_mt_check(const struct xt_mtchk_param *par)
-{
- const struct ipt_ecn_info *info = par->matchinfo;
- const struct ipt_ip *ip = par->entryinfo;
-
- if (info->operation & IPT_ECN_OP_MATCH_MASK)
- return false;
-
- if (info->invert & IPT_ECN_OP_MATCH_MASK)
- return false;
-
- if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR)
- && ip->proto != IPPROTO_TCP) {
- printk(KERN_WARNING "ipt_ecn: can't match TCP bits in rule for"
- " non-tcp packets\n");
- return false;
- }
-
- return true;
-}
-
-static struct xt_match ecn_mt_reg __read_mostly = {
- .name = "ecn",
- .family = NFPROTO_IPV4,
- .match = ecn_mt,
- .matchsize = sizeof(struct ipt_ecn_info),
- .checkentry = ecn_mt_check,
- .me = THIS_MODULE,
-};
-
-static int __init ecn_mt_init(void)
-{
- return xt_register_match(&ecn_mt_reg);
-}
-
-static void __exit ecn_mt_exit(void)
-{
- xt_unregister_match(&ecn_mt_reg);
-}
-
-module_init(ecn_mt_init);
-module_exit(ecn_mt_exit);
diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c
new file mode 100644
index 00000000000..4bfaedf9b34
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_rpfilter.c
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2011 Florian Westphal <fw@strlen.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * based on fib_frontend.c; Author: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/ip.h>
+#include <net/ip.h>
+#include <net/ip_fib.h>
+#include <net/route.h>
+
+#include <linux/netfilter/xt_rpfilter.h>
+#include <linux/netfilter/x_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
+MODULE_DESCRIPTION("iptables: ipv4 reverse path filter match");
+
+/* don't try to find route from mcast/bcast/zeronet */
+static __be32 rpfilter_get_saddr(__be32 addr)
+{
+ if (ipv4_is_multicast(addr) || ipv4_is_lbcast(addr) ||
+ ipv4_is_zeronet(addr))
+ return 0;
+ return addr;
+}
+
+static bool rpfilter_lookup_reverse(struct flowi4 *fl4,
+ const struct net_device *dev, u8 flags)
+{
+ struct fib_result res;
+ bool dev_match;
+ struct net *net = dev_net(dev);
+ int ret __maybe_unused;
+
+ if (fib_lookup(net, fl4, &res))
+ return false;
+
+ if (res.type != RTN_UNICAST) {
+ if (res.type != RTN_LOCAL || !(flags & XT_RPFILTER_ACCEPT_LOCAL))
+ return false;
+ }
+ dev_match = false;
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+ for (ret = 0; ret < res.fi->fib_nhs; ret++) {
+ struct fib_nh *nh = &res.fi->fib_nh[ret];
+
+ if (nh->nh_dev == dev) {
+ dev_match = true;
+ break;
+ }
+ }
+#else
+ if (FIB_RES_DEV(res) == dev)
+ dev_match = true;
+#endif
+ if (dev_match || flags & XT_RPFILTER_LOOSE)
+ return FIB_RES_NH(res).nh_scope <= RT_SCOPE_HOST;
+ return dev_match;
+}
+
+static bool rpfilter_is_local(const struct sk_buff *skb)
+{
+ const struct rtable *rt = skb_rtable(skb);
+ return rt && (rt->rt_flags & RTCF_LOCAL);
+}
+
+static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+ const struct xt_rpfilter_info *info;
+ const struct iphdr *iph;
+ struct flowi4 flow;
+ bool invert;
+
+ info = par->matchinfo;
+ invert = info->flags & XT_RPFILTER_INVERT;
+
+ if (rpfilter_is_local(skb))
+ return true ^ invert;
+
+ iph = ip_hdr(skb);
+ if (ipv4_is_multicast(iph->daddr)) {
+ if (ipv4_is_zeronet(iph->saddr))
+ return ipv4_is_local_multicast(iph->daddr) ^ invert;
+ }
+ flow.flowi4_iif = LOOPBACK_IFINDEX;
+ flow.daddr = iph->saddr;
+ flow.saddr = rpfilter_get_saddr(iph->daddr);
+ flow.flowi4_oif = 0;
+ flow.flowi4_mark = info->flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0;
+ flow.flowi4_tos = RT_TOS(iph->tos);
+ flow.flowi4_scope = RT_SCOPE_UNIVERSE;
+
+ return rpfilter_lookup_reverse(&flow, par->in, info->flags) ^ invert;
+}
+
+static int rpfilter_check(const struct xt_mtchk_param *par)
+{
+ const struct xt_rpfilter_info *info = par->matchinfo;
+ unsigned int options = ~XT_RPFILTER_OPTION_MASK;
+ if (info->flags & options) {
+ pr_info("unknown options encountered");
+ return -EINVAL;
+ }
+
+ if (strcmp(par->table, "mangle") != 0 &&
+ strcmp(par->table, "raw") != 0) {
+ pr_info("match only valid in the \'raw\' "
+ "or \'mangle\' tables, not \'%s\'.\n", par->table);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static struct xt_match rpfilter_mt_reg __read_mostly = {
+ .name = "rpfilter",
+ .family = NFPROTO_IPV4,
+ .checkentry = rpfilter_check,
+ .match = rpfilter_mt,
+ .matchsize = sizeof(struct xt_rpfilter_info),
+ .hooks = (1 << NF_INET_PRE_ROUTING),
+ .me = THIS_MODULE
+};
+
+static int __init rpfilter_mt_init(void)
+{
+ return xt_register_match(&rpfilter_mt_reg);
+}
+
+static void __exit rpfilter_mt_exit(void)
+{
+ xt_unregister_match(&rpfilter_mt_reg);
+}
+
+module_init(rpfilter_mt_init);
+module_exit(rpfilter_mt_exit);
diff --git a/net/ipv4/netfilter/ipt_ttl.c b/net/ipv4/netfilter/ipt_ttl.c
deleted file mode 100644
index 297f1cbf4ff..00000000000
--- a/net/ipv4/netfilter/ipt_ttl.c
+++ /dev/null
@@ -1,63 +0,0 @@
-/* IP tables module for matching the value of the TTL
- *
- * (C) 2000,2001 by Harald Welte <laforge@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/ip.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-
-#include <linux/netfilter_ipv4/ipt_ttl.h>
-#include <linux/netfilter/x_tables.h>
-
-MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
-MODULE_DESCRIPTION("Xtables: IPv4 TTL field match");
-MODULE_LICENSE("GPL");
-
-static bool ttl_mt(const struct sk_buff *skb, const struct xt_match_param *par)
-{
- const struct ipt_ttl_info *info = par->matchinfo;
- const u8 ttl = ip_hdr(skb)->ttl;
-
- switch (info->mode) {
- case IPT_TTL_EQ:
- return ttl == info->ttl;
- case IPT_TTL_NE:
- return ttl != info->ttl;
- case IPT_TTL_LT:
- return ttl < info->ttl;
- case IPT_TTL_GT:
- return ttl > info->ttl;
- default:
- printk(KERN_WARNING "ipt_ttl: unknown mode %d\n",
- info->mode);
- return false;
- }
-
- return false;
-}
-
-static struct xt_match ttl_mt_reg __read_mostly = {
- .name = "ttl",
- .family = NFPROTO_IPV4,
- .match = ttl_mt,
- .matchsize = sizeof(struct ipt_ttl_info),
- .me = THIS_MODULE,
-};
-
-static int __init ttl_mt_init(void)
-{
- return xt_register_match(&ttl_mt_reg);
-}
-
-static void __exit ttl_mt_exit(void)
-{
- xt_unregister_match(&ttl_mt_reg);
-}
-
-module_init(ttl_mt_init);
-module_exit(ttl_mt_exit);
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index 52cb6939d09..e08a74a243a 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -13,6 +13,7 @@
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/slab.h>
#include <net/ip.h>
MODULE_LICENSE("GPL");
@@ -23,123 +24,58 @@ MODULE_DESCRIPTION("iptables filter table");
(1 << NF_INET_FORWARD) | \
(1 << NF_INET_LOCAL_OUT))
-static struct
-{
- struct ipt_replace repl;
- struct ipt_standard entries[3];
- struct ipt_error term;
-} initial_table __net_initdata = {
- .repl = {
- .name = "filter",
- .valid_hooks = FILTER_VALID_HOOKS,
- .num_entries = 4,
- .size = sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error),
- .hook_entry = {
- [NF_INET_LOCAL_IN] = 0,
- [NF_INET_FORWARD] = sizeof(struct ipt_standard),
- [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2,
- },
- .underflow = {
- [NF_INET_LOCAL_IN] = 0,
- [NF_INET_FORWARD] = sizeof(struct ipt_standard),
- [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2,
- },
- },
- .entries = {
- IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_IN */
- IPT_STANDARD_INIT(NF_ACCEPT), /* FORWARD */
- IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_OUT */
- },
- .term = IPT_ERROR_INIT, /* ERROR */
-};
-
-static struct xt_table packet_filter = {
+static const struct xt_table packet_filter = {
.name = "filter",
.valid_hooks = FILTER_VALID_HOOKS,
- .lock = __RW_LOCK_UNLOCKED(packet_filter.lock),
.me = THIS_MODULE,
- .af = AF_INET,
+ .af = NFPROTO_IPV4,
+ .priority = NF_IP_PRI_FILTER,
};
-/* The work comes in here from netfilter.c. */
-static unsigned int
-ipt_local_in_hook(unsigned int hook,
- struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
-{
- return ipt_do_table(skb, hook, in, out,
- dev_net(in)->ipv4.iptable_filter);
-}
-
static unsigned int
-ipt_hook(unsigned int hook,
- struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+iptable_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ const struct net_device *in, const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
{
- return ipt_do_table(skb, hook, in, out,
- dev_net(in)->ipv4.iptable_filter);
-}
+ const struct net *net;
-static unsigned int
-ipt_local_out_hook(unsigned int hook,
- struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
-{
- /* root is playing with raw sockets. */
- if (skb->len < sizeof(struct iphdr) ||
- ip_hdrlen(skb) < sizeof(struct iphdr))
+ if (ops->hooknum == NF_INET_LOCAL_OUT &&
+ (skb->len < sizeof(struct iphdr) ||
+ ip_hdrlen(skb) < sizeof(struct iphdr)))
+ /* root is playing with raw sockets. */
return NF_ACCEPT;
- return ipt_do_table(skb, hook, in, out,
- dev_net(out)->ipv4.iptable_filter);
+
+ net = dev_net((in != NULL) ? in : out);
+ return ipt_do_table(skb, ops->hooknum, in, out,
+ net->ipv4.iptable_filter);
}
-static struct nf_hook_ops ipt_ops[] __read_mostly = {
- {
- .hook = ipt_local_in_hook,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_INET_LOCAL_IN,
- .priority = NF_IP_PRI_FILTER,
- },
- {
- .hook = ipt_hook,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_INET_FORWARD,
- .priority = NF_IP_PRI_FILTER,
- },
- {
- .hook = ipt_local_out_hook,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_INET_LOCAL_OUT,
- .priority = NF_IP_PRI_FILTER,
- },
-};
+static struct nf_hook_ops *filter_ops __read_mostly;
/* Default to forward because I got too much mail already. */
-static int forward = NF_ACCEPT;
+static bool forward = true;
module_param(forward, bool, 0000);
static int __net_init iptable_filter_net_init(struct net *net)
{
- /* Register table */
+ struct ipt_replace *repl;
+
+ repl = ipt_alloc_initial_table(&packet_filter);
+ if (repl == NULL)
+ return -ENOMEM;
+ /* Entry 1 is the FORWARD hook */
+ ((struct ipt_standard *)repl->entries)[1].target.verdict =
+ forward ? -NF_ACCEPT - 1 : -NF_DROP - 1;
+
net->ipv4.iptable_filter =
- ipt_register_table(net, &packet_filter, &initial_table.repl);
- if (IS_ERR(net->ipv4.iptable_filter))
- return PTR_ERR(net->ipv4.iptable_filter);
- return 0;
+ ipt_register_table(net, &packet_filter, repl);
+ kfree(repl);
+ return PTR_ERR_OR_ZERO(net->ipv4.iptable_filter);
}
static void __net_exit iptable_filter_net_exit(struct net *net)
{
- ipt_unregister_table(net->ipv4.iptable_filter);
+ ipt_unregister_table(net, net->ipv4.iptable_filter);
}
static struct pernet_operations iptable_filter_net_ops = {
@@ -151,33 +87,23 @@ static int __init iptable_filter_init(void)
{
int ret;
- if (forward < 0 || forward > NF_MAX_VERDICT) {
- printk("iptables forward must be 0 or 1\n");
- return -EINVAL;
- }
-
- /* Entry 1 is the FORWARD hook */
- initial_table.entries[1].target.verdict = -forward - 1;
-
ret = register_pernet_subsys(&iptable_filter_net_ops);
if (ret < 0)
return ret;
/* Register hooks */
- ret = nf_register_hooks(ipt_ops, ARRAY_SIZE(ipt_ops));
- if (ret < 0)
- goto cleanup_table;
-
- return ret;
+ filter_ops = xt_hook_link(&packet_filter, iptable_filter_hook);
+ if (IS_ERR(filter_ops)) {
+ ret = PTR_ERR(filter_ops);
+ unregister_pernet_subsys(&iptable_filter_net_ops);
+ }
- cleanup_table:
- unregister_pernet_subsys(&iptable_filter_net_ops);
return ret;
}
static void __exit iptable_filter_fini(void)
{
- nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops));
+ xt_hook_unlink(&packet_filter, filter_ops);
unregister_pernet_subsys(&iptable_filter_net_ops);
}
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 3929d20b9e4..6a5079c34bb 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -12,6 +12,7 @@
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
+#include <linux/slab.h>
#include <net/sock.h>
#include <net/route.h>
#include <linux/ip.h>
@@ -27,112 +28,27 @@ MODULE_DESCRIPTION("iptables mangle table");
(1 << NF_INET_LOCAL_OUT) | \
(1 << NF_INET_POST_ROUTING))
-/* Ouch - five different hooks? Maybe this should be a config option..... -- BC */
-static struct
-{
- struct ipt_replace repl;
- struct ipt_standard entries[5];
- struct ipt_error term;
-} initial_table __net_initdata = {
- .repl = {
- .name = "mangle",
- .valid_hooks = MANGLE_VALID_HOOKS,
- .num_entries = 6,
- .size = sizeof(struct ipt_standard) * 5 + sizeof(struct ipt_error),
- .hook_entry = {
- [NF_INET_PRE_ROUTING] = 0,
- [NF_INET_LOCAL_IN] = sizeof(struct ipt_standard),
- [NF_INET_FORWARD] = sizeof(struct ipt_standard) * 2,
- [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 3,
- [NF_INET_POST_ROUTING] = sizeof(struct ipt_standard) * 4,
- },
- .underflow = {
- [NF_INET_PRE_ROUTING] = 0,
- [NF_INET_LOCAL_IN] = sizeof(struct ipt_standard),
- [NF_INET_FORWARD] = sizeof(struct ipt_standard) * 2,
- [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 3,
- [NF_INET_POST_ROUTING] = sizeof(struct ipt_standard) * 4,
- },
- },
- .entries = {
- IPT_STANDARD_INIT(NF_ACCEPT), /* PRE_ROUTING */
- IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_IN */
- IPT_STANDARD_INIT(NF_ACCEPT), /* FORWARD */
- IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_OUT */
- IPT_STANDARD_INIT(NF_ACCEPT), /* POST_ROUTING */
- },
- .term = IPT_ERROR_INIT, /* ERROR */
-};
-
-static struct xt_table packet_mangler = {
+static const struct xt_table packet_mangler = {
.name = "mangle",
.valid_hooks = MANGLE_VALID_HOOKS,
- .lock = __RW_LOCK_UNLOCKED(packet_mangler.lock),
.me = THIS_MODULE,
- .af = AF_INET,
+ .af = NFPROTO_IPV4,
+ .priority = NF_IP_PRI_MANGLE,
};
-/* The work comes in here from netfilter.c. */
-static unsigned int
-ipt_pre_routing_hook(unsigned int hook,
- struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
-{
- return ipt_do_table(skb, hook, in, out,
- dev_net(in)->ipv4.iptable_mangle);
-}
-
-static unsigned int
-ipt_post_routing_hook(unsigned int hook,
- struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
-{
- return ipt_do_table(skb, hook, in, out,
- dev_net(out)->ipv4.iptable_mangle);
-}
-
-static unsigned int
-ipt_local_in_hook(unsigned int hook,
- struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
-{
- return ipt_do_table(skb, hook, in, out,
- dev_net(in)->ipv4.iptable_mangle);
-}
-
static unsigned int
-ipt_forward_hook(unsigned int hook,
- struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
-{
- return ipt_do_table(skb, hook, in, out,
- dev_net(in)->ipv4.iptable_mangle);
-}
-
-static unsigned int
-ipt_local_hook(unsigned int hook,
- struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+ipt_mangle_out(struct sk_buff *skb, const struct net_device *out)
{
unsigned int ret;
const struct iphdr *iph;
u_int8_t tos;
__be32 saddr, daddr;
u_int32_t mark;
+ int err;
/* root is playing with raw sockets. */
- if (skb->len < sizeof(struct iphdr)
- || ip_hdrlen(skb) < sizeof(struct iphdr))
+ if (skb->len < sizeof(struct iphdr) ||
+ ip_hdrlen(skb) < sizeof(struct iphdr))
return NF_ACCEPT;
/* Save things which could affect route */
@@ -142,74 +58,61 @@ ipt_local_hook(unsigned int hook,
daddr = iph->daddr;
tos = iph->tos;
- ret = ipt_do_table(skb, hook, in, out,
+ ret = ipt_do_table(skb, NF_INET_LOCAL_OUT, NULL, out,
dev_net(out)->ipv4.iptable_mangle);
/* Reroute for ANY change. */
- if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE) {
+ if (ret != NF_DROP && ret != NF_STOLEN) {
iph = ip_hdr(skb);
if (iph->saddr != saddr ||
iph->daddr != daddr ||
skb->mark != mark ||
- iph->tos != tos)
- if (ip_route_me_harder(skb, RTN_UNSPEC))
- ret = NF_DROP;
+ iph->tos != tos) {
+ err = ip_route_me_harder(skb, RTN_UNSPEC);
+ if (err < 0)
+ ret = NF_DROP_ERR(err);
+ }
}
return ret;
}
-static struct nf_hook_ops ipt_ops[] __read_mostly = {
- {
- .hook = ipt_pre_routing_hook,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_INET_PRE_ROUTING,
- .priority = NF_IP_PRI_MANGLE,
- },
- {
- .hook = ipt_local_in_hook,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_INET_LOCAL_IN,
- .priority = NF_IP_PRI_MANGLE,
- },
- {
- .hook = ipt_forward_hook,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_INET_FORWARD,
- .priority = NF_IP_PRI_MANGLE,
- },
- {
- .hook = ipt_local_hook,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_INET_LOCAL_OUT,
- .priority = NF_IP_PRI_MANGLE,
- },
- {
- .hook = ipt_post_routing_hook,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_INET_POST_ROUTING,
- .priority = NF_IP_PRI_MANGLE,
- },
-};
+/* The work comes in here from netfilter.c. */
+static unsigned int
+iptable_mangle_hook(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ if (ops->hooknum == NF_INET_LOCAL_OUT)
+ return ipt_mangle_out(skb, out);
+ if (ops->hooknum == NF_INET_POST_ROUTING)
+ return ipt_do_table(skb, ops->hooknum, in, out,
+ dev_net(out)->ipv4.iptable_mangle);
+ /* PREROUTING/INPUT/FORWARD: */
+ return ipt_do_table(skb, ops->hooknum, in, out,
+ dev_net(in)->ipv4.iptable_mangle);
+}
+
+static struct nf_hook_ops *mangle_ops __read_mostly;
static int __net_init iptable_mangle_net_init(struct net *net)
{
- /* Register table */
+ struct ipt_replace *repl;
+
+ repl = ipt_alloc_initial_table(&packet_mangler);
+ if (repl == NULL)
+ return -ENOMEM;
net->ipv4.iptable_mangle =
- ipt_register_table(net, &packet_mangler, &initial_table.repl);
- if (IS_ERR(net->ipv4.iptable_mangle))
- return PTR_ERR(net->ipv4.iptable_mangle);
- return 0;
+ ipt_register_table(net, &packet_mangler, repl);
+ kfree(repl);
+ return PTR_ERR_OR_ZERO(net->ipv4.iptable_mangle);
}
static void __net_exit iptable_mangle_net_exit(struct net *net)
{
- ipt_unregister_table(net->ipv4.iptable_mangle);
+ ipt_unregister_table(net, net->ipv4.iptable_mangle);
}
static struct pernet_operations iptable_mangle_net_ops = {
@@ -226,20 +129,18 @@ static int __init iptable_mangle_init(void)
return ret;
/* Register hooks */
- ret = nf_register_hooks(ipt_ops, ARRAY_SIZE(ipt_ops));
- if (ret < 0)
- goto cleanup_table;
-
- return ret;
+ mangle_ops = xt_hook_link(&packet_mangler, iptable_mangle_hook);
+ if (IS_ERR(mangle_ops)) {
+ ret = PTR_ERR(mangle_ops);
+ unregister_pernet_subsys(&iptable_mangle_net_ops);
+ }
- cleanup_table:
- unregister_pernet_subsys(&iptable_mangle_net_ops);
return ret;
}
static void __exit iptable_mangle_fini(void)
{
- nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops));
+ xt_hook_unlink(&packet_mangler, mangle_ops);
unregister_pernet_subsys(&iptable_mangle_net_ops);
}
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
new file mode 100644
index 00000000000..f1787c04a4d
--- /dev/null
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -0,0 +1,328 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2011 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/ip.h>
+#include <net/ip.h>
+
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_nat_l3proto.h>
+
+static const struct xt_table nf_nat_ipv4_table = {
+ .name = "nat",
+ .valid_hooks = (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_POST_ROUTING) |
+ (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_LOCAL_IN),
+ .me = THIS_MODULE,
+ .af = NFPROTO_IPV4,
+};
+
+static unsigned int alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
+{
+ /* Force range to this IP; let proto decide mapping for
+ * per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED).
+ */
+ struct nf_nat_range range;
+
+ range.flags = 0;
+ pr_debug("Allocating NULL binding for %p (%pI4)\n", ct,
+ HOOK2MANIP(hooknum) == NF_NAT_MANIP_SRC ?
+ &ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip :
+ &ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip);
+
+ return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum));
+}
+
+static unsigned int nf_nat_rule_find(struct sk_buff *skb, unsigned int hooknum,
+ const struct net_device *in,
+ const struct net_device *out,
+ struct nf_conn *ct)
+{
+ struct net *net = nf_ct_net(ct);
+ unsigned int ret;
+
+ ret = ipt_do_table(skb, hooknum, in, out, net->ipv4.nat_table);
+ if (ret == NF_ACCEPT) {
+ if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum)))
+ ret = alloc_null_binding(ct, hooknum);
+ }
+ return ret;
+}
+
+static unsigned int
+nf_nat_ipv4_fn(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn_nat *nat;
+ /* maniptype == SRC for postrouting. */
+ enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum);
+
+ /* We never see fragments: conntrack defrags on pre-routing
+ * and local-out, and nf_nat_out protects post-routing.
+ */
+ NF_CT_ASSERT(!ip_is_fragment(ip_hdr(skb)));
+
+ ct = nf_ct_get(skb, &ctinfo);
+ /* Can't track? It's not due to stress, or conntrack would
+ * have dropped it. Hence it's the user's responsibilty to
+ * packet filter it out, or implement conntrack/NAT for that
+ * protocol. 8) --RR
+ */
+ if (!ct)
+ return NF_ACCEPT;
+
+ /* Don't try to NAT if this packet is not conntracked */
+ if (nf_ct_is_untracked(ct))
+ return NF_ACCEPT;
+
+ nat = nf_ct_nat_ext_add(ct);
+ if (nat == NULL)
+ return NF_ACCEPT;
+
+ switch (ctinfo) {
+ case IP_CT_RELATED:
+ case IP_CT_RELATED_REPLY:
+ if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
+ if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
+ ops->hooknum))
+ return NF_DROP;
+ else
+ return NF_ACCEPT;
+ }
+ /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */
+ case IP_CT_NEW:
+ /* Seen it before? This can happen for loopback, retrans,
+ * or local packets.
+ */
+ if (!nf_nat_initialized(ct, maniptype)) {
+ unsigned int ret;
+
+ ret = nf_nat_rule_find(skb, ops->hooknum, in, out, ct);
+ if (ret != NF_ACCEPT)
+ return ret;
+ } else {
+ pr_debug("Already setup manip %s for ct %p\n",
+ maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST",
+ ct);
+ if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out))
+ goto oif_changed;
+ }
+ break;
+
+ default:
+ /* ESTABLISHED */
+ NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
+ ctinfo == IP_CT_ESTABLISHED_REPLY);
+ if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out))
+ goto oif_changed;
+ }
+
+ return nf_nat_packet(ct, ctinfo, ops->hooknum, skb);
+
+oif_changed:
+ nf_ct_kill_acct(ct, ctinfo, skb);
+ return NF_DROP;
+}
+
+static unsigned int
+nf_nat_ipv4_in(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ unsigned int ret;
+ __be32 daddr = ip_hdr(skb)->daddr;
+
+ ret = nf_nat_ipv4_fn(ops, skb, in, out, okfn);
+ if (ret != NF_DROP && ret != NF_STOLEN &&
+ daddr != ip_hdr(skb)->daddr)
+ skb_dst_drop(skb);
+
+ return ret;
+}
+
+static unsigned int
+nf_nat_ipv4_out(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+#ifdef CONFIG_XFRM
+ const struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+ int err;
+#endif
+ unsigned int ret;
+
+ /* root is playing with raw sockets. */
+ if (skb->len < sizeof(struct iphdr) ||
+ ip_hdrlen(skb) < sizeof(struct iphdr))
+ return NF_ACCEPT;
+
+ ret = nf_nat_ipv4_fn(ops, skb, in, out, okfn);
+#ifdef CONFIG_XFRM
+ if (ret != NF_DROP && ret != NF_STOLEN &&
+ !(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
+ (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+ if ((ct->tuplehash[dir].tuple.src.u3.ip !=
+ ct->tuplehash[!dir].tuple.dst.u3.ip) ||
+ (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
+ ct->tuplehash[dir].tuple.src.u.all !=
+ ct->tuplehash[!dir].tuple.dst.u.all)) {
+ err = nf_xfrm_me_harder(skb, AF_INET);
+ if (err < 0)
+ ret = NF_DROP_ERR(err);
+ }
+ }
+#endif
+ return ret;
+}
+
+static unsigned int
+nf_nat_ipv4_local_fn(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ const struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+ unsigned int ret;
+ int err;
+
+ /* root is playing with raw sockets. */
+ if (skb->len < sizeof(struct iphdr) ||
+ ip_hdrlen(skb) < sizeof(struct iphdr))
+ return NF_ACCEPT;
+
+ ret = nf_nat_ipv4_fn(ops, skb, in, out, okfn);
+ if (ret != NF_DROP && ret != NF_STOLEN &&
+ (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+ if (ct->tuplehash[dir].tuple.dst.u3.ip !=
+ ct->tuplehash[!dir].tuple.src.u3.ip) {
+ err = ip_route_me_harder(skb, RTN_UNSPEC);
+ if (err < 0)
+ ret = NF_DROP_ERR(err);
+ }
+#ifdef CONFIG_XFRM
+ else if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
+ ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
+ ct->tuplehash[dir].tuple.dst.u.all !=
+ ct->tuplehash[!dir].tuple.src.u.all) {
+ err = nf_xfrm_me_harder(skb, AF_INET);
+ if (err < 0)
+ ret = NF_DROP_ERR(err);
+ }
+#endif
+ }
+ return ret;
+}
+
+static struct nf_hook_ops nf_nat_ipv4_ops[] __read_mostly = {
+ /* Before packet filtering, change destination */
+ {
+ .hook = nf_nat_ipv4_in,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV4,
+ .hooknum = NF_INET_PRE_ROUTING,
+ .priority = NF_IP_PRI_NAT_DST,
+ },
+ /* After packet filtering, change source */
+ {
+ .hook = nf_nat_ipv4_out,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV4,
+ .hooknum = NF_INET_POST_ROUTING,
+ .priority = NF_IP_PRI_NAT_SRC,
+ },
+ /* Before packet filtering, change destination */
+ {
+ .hook = nf_nat_ipv4_local_fn,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV4,
+ .hooknum = NF_INET_LOCAL_OUT,
+ .priority = NF_IP_PRI_NAT_DST,
+ },
+ /* After packet filtering, change source */
+ {
+ .hook = nf_nat_ipv4_fn,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV4,
+ .hooknum = NF_INET_LOCAL_IN,
+ .priority = NF_IP_PRI_NAT_SRC,
+ },
+};
+
+static int __net_init iptable_nat_net_init(struct net *net)
+{
+ struct ipt_replace *repl;
+
+ repl = ipt_alloc_initial_table(&nf_nat_ipv4_table);
+ if (repl == NULL)
+ return -ENOMEM;
+ net->ipv4.nat_table = ipt_register_table(net, &nf_nat_ipv4_table, repl);
+ kfree(repl);
+ return PTR_ERR_OR_ZERO(net->ipv4.nat_table);
+}
+
+static void __net_exit iptable_nat_net_exit(struct net *net)
+{
+ ipt_unregister_table(net, net->ipv4.nat_table);
+}
+
+static struct pernet_operations iptable_nat_net_ops = {
+ .init = iptable_nat_net_init,
+ .exit = iptable_nat_net_exit,
+};
+
+static int __init iptable_nat_init(void)
+{
+ int err;
+
+ err = register_pernet_subsys(&iptable_nat_net_ops);
+ if (err < 0)
+ goto err1;
+
+ err = nf_register_hooks(nf_nat_ipv4_ops, ARRAY_SIZE(nf_nat_ipv4_ops));
+ if (err < 0)
+ goto err2;
+ return 0;
+
+err2:
+ unregister_pernet_subsys(&iptable_nat_net_ops);
+err1:
+ return err;
+}
+
+static void __exit iptable_nat_exit(void)
+{
+ nf_unregister_hooks(nf_nat_ipv4_ops, ARRAY_SIZE(nf_nat_ipv4_ops));
+ unregister_pernet_subsys(&iptable_nat_net_ops);
+}
+
+module_init(iptable_nat_init);
+module_exit(iptable_nat_exit);
+
+MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index 7f65d18333e..b2f7e8f9831 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -5,103 +5,55 @@
*/
#include <linux/module.h>
#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/slab.h>
#include <net/ip.h>
#define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT))
-static struct
-{
- struct ipt_replace repl;
- struct ipt_standard entries[2];
- struct ipt_error term;
-} initial_table __net_initdata = {
- .repl = {
- .name = "raw",
- .valid_hooks = RAW_VALID_HOOKS,
- .num_entries = 3,
- .size = sizeof(struct ipt_standard) * 2 + sizeof(struct ipt_error),
- .hook_entry = {
- [NF_INET_PRE_ROUTING] = 0,
- [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard)
- },
- .underflow = {
- [NF_INET_PRE_ROUTING] = 0,
- [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard)
- },
- },
- .entries = {
- IPT_STANDARD_INIT(NF_ACCEPT), /* PRE_ROUTING */
- IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_OUT */
- },
- .term = IPT_ERROR_INIT, /* ERROR */
-};
-
-static struct xt_table packet_raw = {
+static const struct xt_table packet_raw = {
.name = "raw",
.valid_hooks = RAW_VALID_HOOKS,
- .lock = __RW_LOCK_UNLOCKED(packet_raw.lock),
.me = THIS_MODULE,
- .af = AF_INET,
+ .af = NFPROTO_IPV4,
+ .priority = NF_IP_PRI_RAW,
};
/* The work comes in here from netfilter.c. */
static unsigned int
-ipt_hook(unsigned int hook,
- struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+iptable_raw_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ const struct net_device *in, const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
{
- return ipt_do_table(skb, hook, in, out,
- dev_net(in)->ipv4.iptable_raw);
-}
+ const struct net *net;
-static unsigned int
-ipt_local_hook(unsigned int hook,
- struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
-{
- /* root is playing with raw sockets. */
- if (skb->len < sizeof(struct iphdr) ||
- ip_hdrlen(skb) < sizeof(struct iphdr))
+ if (ops->hooknum == NF_INET_LOCAL_OUT &&
+ (skb->len < sizeof(struct iphdr) ||
+ ip_hdrlen(skb) < sizeof(struct iphdr)))
+ /* root is playing with raw sockets. */
return NF_ACCEPT;
- return ipt_do_table(skb, hook, in, out,
- dev_net(out)->ipv4.iptable_raw);
+
+ net = dev_net((in != NULL) ? in : out);
+ return ipt_do_table(skb, ops->hooknum, in, out, net->ipv4.iptable_raw);
}
-/* 'raw' is the very first table. */
-static struct nf_hook_ops ipt_ops[] __read_mostly = {
- {
- .hook = ipt_hook,
- .pf = PF_INET,
- .hooknum = NF_INET_PRE_ROUTING,
- .priority = NF_IP_PRI_RAW,
- .owner = THIS_MODULE,
- },
- {
- .hook = ipt_local_hook,
- .pf = PF_INET,
- .hooknum = NF_INET_LOCAL_OUT,
- .priority = NF_IP_PRI_RAW,
- .owner = THIS_MODULE,
- },
-};
+static struct nf_hook_ops *rawtable_ops __read_mostly;
static int __net_init iptable_raw_net_init(struct net *net)
{
- /* Register table */
+ struct ipt_replace *repl;
+
+ repl = ipt_alloc_initial_table(&packet_raw);
+ if (repl == NULL)
+ return -ENOMEM;
net->ipv4.iptable_raw =
- ipt_register_table(net, &packet_raw, &initial_table.repl);
- if (IS_ERR(net->ipv4.iptable_raw))
- return PTR_ERR(net->ipv4.iptable_raw);
- return 0;
+ ipt_register_table(net, &packet_raw, repl);
+ kfree(repl);
+ return PTR_ERR_OR_ZERO(net->ipv4.iptable_raw);
}
static void __net_exit iptable_raw_net_exit(struct net *net)
{
- ipt_unregister_table(net->ipv4.iptable_raw);
+ ipt_unregister_table(net, net->ipv4.iptable_raw);
}
static struct pernet_operations iptable_raw_net_ops = {
@@ -118,20 +70,18 @@ static int __init iptable_raw_init(void)
return ret;
/* Register hooks */
- ret = nf_register_hooks(ipt_ops, ARRAY_SIZE(ipt_ops));
- if (ret < 0)
- goto cleanup_table;
+ rawtable_ops = xt_hook_link(&packet_raw, iptable_raw_hook);
+ if (IS_ERR(rawtable_ops)) {
+ ret = PTR_ERR(rawtable_ops);
+ unregister_pernet_subsys(&iptable_raw_net_ops);
+ }
return ret;
-
- cleanup_table:
- unregister_pernet_subsys(&iptable_raw_net_ops);
- return ret;
}
static void __exit iptable_raw_fini(void)
{
- nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops));
+ xt_hook_unlink(&packet_raw, rawtable_ops);
unregister_pernet_subsys(&iptable_raw_net_ops);
}
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c
index a52a35f4a58..c86647ed207 100644
--- a/net/ipv4/netfilter/iptable_security.c
+++ b/net/ipv4/netfilter/iptable_security.c
@@ -17,6 +17,7 @@
*/
#include <linux/module.h>
#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/slab.h>
#include <net/ip.h>
MODULE_LICENSE("GPL");
@@ -27,119 +28,51 @@ MODULE_DESCRIPTION("iptables security table, for MAC rules");
(1 << NF_INET_FORWARD) | \
(1 << NF_INET_LOCAL_OUT)
-static struct
-{
- struct ipt_replace repl;
- struct ipt_standard entries[3];
- struct ipt_error term;
-} initial_table __net_initdata = {
- .repl = {
- .name = "security",
- .valid_hooks = SECURITY_VALID_HOOKS,
- .num_entries = 4,
- .size = sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error),
- .hook_entry = {
- [NF_INET_LOCAL_IN] = 0,
- [NF_INET_FORWARD] = sizeof(struct ipt_standard),
- [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2,
- },
- .underflow = {
- [NF_INET_LOCAL_IN] = 0,
- [NF_INET_FORWARD] = sizeof(struct ipt_standard),
- [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2,
- },
- },
- .entries = {
- IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_IN */
- IPT_STANDARD_INIT(NF_ACCEPT), /* FORWARD */
- IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_OUT */
- },
- .term = IPT_ERROR_INIT, /* ERROR */
-};
-
-static struct xt_table security_table = {
+static const struct xt_table security_table = {
.name = "security",
.valid_hooks = SECURITY_VALID_HOOKS,
- .lock = __RW_LOCK_UNLOCKED(security_table.lock),
.me = THIS_MODULE,
- .af = AF_INET,
+ .af = NFPROTO_IPV4,
+ .priority = NF_IP_PRI_SECURITY,
};
static unsigned int
-ipt_local_in_hook(unsigned int hook,
- struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
-{
- return ipt_do_table(skb, hook, in, out,
- dev_net(in)->ipv4.iptable_security);
-}
-
-static unsigned int
-ipt_forward_hook(unsigned int hook,
- struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+iptable_security_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
{
- return ipt_do_table(skb, hook, in, out,
- dev_net(in)->ipv4.iptable_security);
-}
+ const struct net *net;
-static unsigned int
-ipt_local_out_hook(unsigned int hook,
- struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
-{
- /* Somebody is playing with raw sockets. */
- if (skb->len < sizeof(struct iphdr)
- || ip_hdrlen(skb) < sizeof(struct iphdr))
+ if (ops->hooknum == NF_INET_LOCAL_OUT &&
+ (skb->len < sizeof(struct iphdr) ||
+ ip_hdrlen(skb) < sizeof(struct iphdr)))
+ /* Somebody is playing with raw sockets. */
return NF_ACCEPT;
- return ipt_do_table(skb, hook, in, out,
- dev_net(out)->ipv4.iptable_security);
+
+ net = dev_net((in != NULL) ? in : out);
+ return ipt_do_table(skb, ops->hooknum, in, out,
+ net->ipv4.iptable_security);
}
-static struct nf_hook_ops ipt_ops[] __read_mostly = {
- {
- .hook = ipt_local_in_hook,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_INET_LOCAL_IN,
- .priority = NF_IP_PRI_SECURITY,
- },
- {
- .hook = ipt_forward_hook,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_INET_FORWARD,
- .priority = NF_IP_PRI_SECURITY,
- },
- {
- .hook = ipt_local_out_hook,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_INET_LOCAL_OUT,
- .priority = NF_IP_PRI_SECURITY,
- },
-};
+static struct nf_hook_ops *sectbl_ops __read_mostly;
static int __net_init iptable_security_net_init(struct net *net)
{
- net->ipv4.iptable_security =
- ipt_register_table(net, &security_table, &initial_table.repl);
-
- if (IS_ERR(net->ipv4.iptable_security))
- return PTR_ERR(net->ipv4.iptable_security);
+ struct ipt_replace *repl;
- return 0;
+ repl = ipt_alloc_initial_table(&security_table);
+ if (repl == NULL)
+ return -ENOMEM;
+ net->ipv4.iptable_security =
+ ipt_register_table(net, &security_table, repl);
+ kfree(repl);
+ return PTR_ERR_OR_ZERO(net->ipv4.iptable_security);
}
static void __net_exit iptable_security_net_exit(struct net *net)
{
- ipt_unregister_table(net->ipv4.iptable_security);
+ ipt_unregister_table(net, net->ipv4.iptable_security);
}
static struct pernet_operations iptable_security_net_ops = {
@@ -155,9 +88,11 @@ static int __init iptable_security_init(void)
if (ret < 0)
return ret;
- ret = nf_register_hooks(ipt_ops, ARRAY_SIZE(ipt_ops));
- if (ret < 0)
+ sectbl_ops = xt_hook_link(&security_table, iptable_security_hook);
+ if (IS_ERR(sectbl_ops)) {
+ ret = PTR_ERR(sectbl_ops);
goto cleanup_table;
+ }
return ret;
@@ -168,7 +103,7 @@ cleanup_table:
static void __exit iptable_security_fini(void)
{
- nf_unregister_hooks(ipt_ops, ARRAY_SIZE(ipt_ops));
+ xt_hook_unlink(&security_table, sectbl_ops);
unregister_pernet_subsys(&iptable_security_net_ops);
}
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 4beb04fac58..8127dc80286 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -1,6 +1,7 @@
/* (C) 1999-2001 Paul `Rusty' Russell
* (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -22,15 +23,13 @@
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_l3proto.h>
+#include <net/netfilter/nf_conntrack_zones.h>
#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
#include <net/netfilter/nf_nat_helper.h>
#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
-
-int (*nf_nat_seq_adjust_hook)(struct sk_buff *skb,
- struct nf_conn *ct,
- enum ip_conntrack_info ctinfo);
-EXPORT_SYMBOL_GPL(nf_nat_seq_adjust_hook);
+#include <net/netfilter/nf_log.h>
static bool ipv4_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
struct nf_conntrack_tuple *tuple)
@@ -72,72 +71,92 @@ static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph);
if (iph == NULL)
- return -NF_DROP;
+ return -NF_ACCEPT;
/* Conntrack defragments packets, we might still see fragments
* inside ICMP packets though. */
if (iph->frag_off & htons(IP_OFFSET))
- return -NF_DROP;
+ return -NF_ACCEPT;
*dataoff = nhoff + (iph->ihl << 2);
*protonum = iph->protocol;
+ /* Check bogus IP headers */
+ if (*dataoff > skb->len) {
+ pr_debug("nf_conntrack_ipv4: bogus IPv4 packet: "
+ "nhoff %u, ihl %u, skblen %u\n",
+ nhoff, iph->ihl << 2, skb->len);
+ return -NF_ACCEPT;
+ }
+
return NF_ACCEPT;
}
-static unsigned int ipv4_confirm(unsigned int hooknum,
- struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+static unsigned int ipv4_helper(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
{
struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
const struct nf_conn_help *help;
const struct nf_conntrack_helper *helper;
- unsigned int ret;
/* This is where we call the helper: as the packet goes out. */
ct = nf_ct_get(skb, &ctinfo);
- if (!ct || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)
- goto out;
+ if (!ct || ctinfo == IP_CT_RELATED_REPLY)
+ return NF_ACCEPT;
help = nfct_help(ct);
if (!help)
- goto out;
+ return NF_ACCEPT;
/* rcu_read_lock()ed by nf_hook_slow */
helper = rcu_dereference(help->helper);
if (!helper)
- goto out;
+ return NF_ACCEPT;
- ret = helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb),
- ct, ctinfo);
- if (ret != NF_ACCEPT)
- return ret;
+ return helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb),
+ ct, ctinfo);
+}
+
+static unsigned int ipv4_confirm(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
- if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status)) {
- typeof(nf_nat_seq_adjust_hook) seq_adjust;
+ ct = nf_ct_get(skb, &ctinfo);
+ if (!ct || ctinfo == IP_CT_RELATED_REPLY)
+ goto out;
- seq_adjust = rcu_dereference(nf_nat_seq_adjust_hook);
- if (!seq_adjust || !seq_adjust(skb, ct, ctinfo))
+ /* adjust seqs for loopback traffic only in outgoing direction */
+ if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
+ !nf_is_loopback_packet(skb)) {
+ if (!nf_ct_seq_adjust(skb, ct, ctinfo, ip_hdrlen(skb))) {
+ NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop);
return NF_DROP;
+ }
}
out:
/* We've seen it coming out the other side: confirm it */
return nf_conntrack_confirm(skb);
}
-static unsigned int ipv4_conntrack_in(unsigned int hooknum,
+static unsigned int ipv4_conntrack_in(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- return nf_conntrack_in(dev_net(in), PF_INET, hooknum, skb);
+ return nf_conntrack_in(dev_net(in), PF_INET, ops->hooknum, skb);
}
-static unsigned int ipv4_conntrack_local(unsigned int hooknum,
+static unsigned int ipv4_conntrack_local(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
@@ -147,7 +166,7 @@ static unsigned int ipv4_conntrack_local(unsigned int hooknum,
if (skb->len < sizeof(struct iphdr) ||
ip_hdrlen(skb) < sizeof(struct iphdr))
return NF_ACCEPT;
- return nf_conntrack_in(dev_net(out), PF_INET, hooknum, skb);
+ return nf_conntrack_in(dev_net(out), PF_INET, ops->hooknum, skb);
}
/* Connection tracking may drop packets, but never alters them, so
@@ -156,28 +175,42 @@ static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = {
{
.hook = ipv4_conntrack_in,
.owner = THIS_MODULE,
- .pf = PF_INET,
+ .pf = NFPROTO_IPV4,
.hooknum = NF_INET_PRE_ROUTING,
.priority = NF_IP_PRI_CONNTRACK,
},
{
.hook = ipv4_conntrack_local,
.owner = THIS_MODULE,
- .pf = PF_INET,
+ .pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP_PRI_CONNTRACK,
},
{
+ .hook = ipv4_helper,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV4,
+ .hooknum = NF_INET_POST_ROUTING,
+ .priority = NF_IP_PRI_CONNTRACK_HELPER,
+ },
+ {
.hook = ipv4_confirm,
.owner = THIS_MODULE,
- .pf = PF_INET,
+ .pf = NFPROTO_IPV4,
.hooknum = NF_INET_POST_ROUTING,
.priority = NF_IP_PRI_CONNTRACK_CONFIRM,
},
{
+ .hook = ipv4_helper,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV4,
+ .hooknum = NF_INET_LOCAL_IN,
+ .priority = NF_IP_PRI_CONNTRACK_HELPER,
+ },
+ {
.hook = ipv4_confirm,
.owner = THIS_MODULE,
- .pf = PF_INET,
+ .pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_IN,
.priority = NF_IP_PRI_CONNTRACK_CONFIRM,
},
@@ -187,53 +220,40 @@ static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = {
static int log_invalid_proto_min = 0;
static int log_invalid_proto_max = 255;
-static ctl_table ip_ct_sysctl_table[] = {
+static struct ctl_table ip_ct_sysctl_table[] = {
{
- .ctl_name = NET_IPV4_NF_CONNTRACK_MAX,
.procname = "ip_conntrack_max",
- .data = &nf_conntrack_max,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
- .ctl_name = NET_IPV4_NF_CONNTRACK_COUNT,
.procname = "ip_conntrack_count",
- .data = &init_net.ct.count,
.maxlen = sizeof(int),
.mode = 0444,
.proc_handler = proc_dointvec,
},
{
- .ctl_name = NET_IPV4_NF_CONNTRACK_BUCKETS,
.procname = "ip_conntrack_buckets",
- .data = &nf_conntrack_htable_size,
.maxlen = sizeof(unsigned int),
.mode = 0444,
.proc_handler = proc_dointvec,
},
{
- .ctl_name = NET_IPV4_NF_CONNTRACK_CHECKSUM,
.procname = "ip_conntrack_checksum",
- .data = &init_net.ct.sysctl_checksum,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
- .ctl_name = NET_IPV4_NF_CONNTRACK_LOG_INVALID,
.procname = "ip_conntrack_log_invalid",
- .data = &init_net.ct.sysctl_log_invalid,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .strategy = sysctl_intvec,
.extra1 = &log_invalid_proto_min,
.extra2 = &log_invalid_proto_max,
},
- {
- .ctl_name = 0
- }
+ { }
};
#endif /* CONFIG_SYSCTL && CONFIG_NF_CONNTRACK_PROC_COMPAT */
@@ -249,16 +269,16 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len)
struct nf_conntrack_tuple tuple;
memset(&tuple, 0, sizeof(tuple));
- tuple.src.u3.ip = inet->rcv_saddr;
- tuple.src.u.tcp.port = inet->sport;
- tuple.dst.u3.ip = inet->daddr;
- tuple.dst.u.tcp.port = inet->dport;
+ tuple.src.u3.ip = inet->inet_rcv_saddr;
+ tuple.src.u.tcp.port = inet->inet_sport;
+ tuple.dst.u3.ip = inet->inet_daddr;
+ tuple.dst.u.tcp.port = inet->inet_dport;
tuple.src.l3num = PF_INET;
- tuple.dst.protonum = IPPROTO_TCP;
+ tuple.dst.protonum = sk->sk_protocol;
- /* We only do TCP at the moment: is there a better way? */
- if (strcmp(sk->sk_prot->name, "TCP")) {
- pr_debug("SO_ORIGINAL_DST: Not a TCP socket\n");
+ /* We only do TCP and SCTP at the moment: is there a better way? */
+ if (sk->sk_protocol != IPPROTO_TCP && sk->sk_protocol != IPPROTO_SCTP) {
+ pr_debug("SO_ORIGINAL_DST: Not a TCP/SCTP socket\n");
return -ENOPROTOOPT;
}
@@ -268,7 +288,7 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len)
return -EINVAL;
}
- h = nf_conntrack_find_get(sock_net(sk), &tuple);
+ h = nf_conntrack_find_get(sock_net(sk), NF_CT_DEFAULT_ZONE, &tuple);
if (h) {
struct sockaddr_in sin;
struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
@@ -302,8 +322,9 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len)
static int ipv4_tuple_to_nlattr(struct sk_buff *skb,
const struct nf_conntrack_tuple *tuple)
{
- NLA_PUT_BE32(skb, CTA_IP_V4_SRC, tuple->src.u3.ip);
- NLA_PUT_BE32(skb, CTA_IP_V4_DST, tuple->dst.u3.ip);
+ if (nla_put_be32(skb, CTA_IP_V4_SRC, tuple->src.u3.ip) ||
+ nla_put_be32(skb, CTA_IP_V4_DST, tuple->dst.u3.ip))
+ goto nla_put_failure;
return 0;
nla_put_failure:
@@ -326,6 +347,11 @@ static int ipv4_nlattr_to_tuple(struct nlattr *tb[],
return 0;
}
+
+static int ipv4_nlattr_tuple_size(void)
+{
+ return nla_policy_len(ipv4_nla_policy, CTA_IP_MAX + 1);
+}
#endif
static struct nf_sockopt_ops so_getorigdst = {
@@ -336,6 +362,25 @@ static struct nf_sockopt_ops so_getorigdst = {
.owner = THIS_MODULE,
};
+static int ipv4_init_net(struct net *net)
+{
+#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
+ struct nf_ip_net *in = &net->ct.nf_ct_proto;
+ in->ctl_table = kmemdup(ip_ct_sysctl_table,
+ sizeof(ip_ct_sysctl_table),
+ GFP_KERNEL);
+ if (!in->ctl_table)
+ return -ENOMEM;
+
+ in->ctl_table[0].data = &nf_conntrack_max;
+ in->ctl_table[1].data = &net->ct.count;
+ in->ctl_table[2].data = &net->ct.htable_size;
+ in->ctl_table[3].data = &net->ct.sysctl_checksum;
+ in->ctl_table[4].data = &net->ct.sysctl_log_invalid;
+#endif
+ return 0;
+}
+
struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = {
.l3proto = PF_INET,
.name = "ipv4",
@@ -345,13 +390,14 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = {
.get_l4proto = ipv4_get_l4proto,
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
.tuple_to_nlattr = ipv4_tuple_to_nlattr,
+ .nlattr_tuple_size = ipv4_nlattr_tuple_size,
.nlattr_to_tuple = ipv4_nlattr_to_tuple,
.nla_policy = ipv4_nla_policy,
#endif
#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
- .ctl_table_path = nf_net_ipv4_netfilter_sysctl_path,
- .ctl_table = ip_ct_sysctl_table,
+ .ctl_table_path = "net/ipv4/netfilter",
#endif
+ .init_net = ipv4_init_net,
.me = THIS_MODULE,
};
@@ -362,6 +408,54 @@ MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET));
MODULE_ALIAS("ip_conntrack");
MODULE_LICENSE("GPL");
+static int ipv4_net_init(struct net *net)
+{
+ int ret = 0;
+
+ ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_tcp4);
+ if (ret < 0) {
+ pr_err("nf_conntrack_tcp4: pernet registration failed\n");
+ goto out_tcp;
+ }
+ ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_udp4);
+ if (ret < 0) {
+ pr_err("nf_conntrack_udp4: pernet registration failed\n");
+ goto out_udp;
+ }
+ ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_icmp);
+ if (ret < 0) {
+ pr_err("nf_conntrack_icmp4: pernet registration failed\n");
+ goto out_icmp;
+ }
+ ret = nf_ct_l3proto_pernet_register(net, &nf_conntrack_l3proto_ipv4);
+ if (ret < 0) {
+ pr_err("nf_conntrack_ipv4: pernet registration failed\n");
+ goto out_ipv4;
+ }
+ return 0;
+out_ipv4:
+ nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_icmp);
+out_icmp:
+ nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udp4);
+out_udp:
+ nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_tcp4);
+out_tcp:
+ return ret;
+}
+
+static void ipv4_net_exit(struct net *net)
+{
+ nf_ct_l3proto_pernet_unregister(net, &nf_conntrack_l3proto_ipv4);
+ nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_icmp);
+ nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udp4);
+ nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_tcp4);
+}
+
+static struct pernet_operations ipv4_net_ops = {
+ .init = ipv4_net_init,
+ .exit = ipv4_net_exit,
+};
+
static int __init nf_conntrack_l3proto_ipv4_init(void)
{
int ret = 0;
@@ -375,54 +469,63 @@ static int __init nf_conntrack_l3proto_ipv4_init(void)
return ret;
}
- ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_tcp4);
+ ret = register_pernet_subsys(&ipv4_net_ops);
if (ret < 0) {
- printk("nf_conntrack_ipv4: can't register tcp.\n");
+ pr_err("nf_conntrack_ipv4: can't register pernet ops\n");
goto cleanup_sockopt;
}
- ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_udp4);
+ ret = nf_register_hooks(ipv4_conntrack_ops,
+ ARRAY_SIZE(ipv4_conntrack_ops));
if (ret < 0) {
- printk("nf_conntrack_ipv4: can't register udp.\n");
- goto cleanup_tcp;
+ pr_err("nf_conntrack_ipv4: can't register hooks.\n");
+ goto cleanup_pernet;
}
- ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_icmp);
+ ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_tcp4);
if (ret < 0) {
- printk("nf_conntrack_ipv4: can't register icmp.\n");
- goto cleanup_udp;
+ pr_err("nf_conntrack_ipv4: can't register tcp4 proto.\n");
+ goto cleanup_hooks;
}
- ret = nf_conntrack_l3proto_register(&nf_conntrack_l3proto_ipv4);
+ ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_udp4);
if (ret < 0) {
- printk("nf_conntrack_ipv4: can't register ipv4\n");
- goto cleanup_icmp;
+ pr_err("nf_conntrack_ipv4: can't register udp4 proto.\n");
+ goto cleanup_tcp4;
}
- ret = nf_register_hooks(ipv4_conntrack_ops,
- ARRAY_SIZE(ipv4_conntrack_ops));
+ ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_icmp);
if (ret < 0) {
- printk("nf_conntrack_ipv4: can't register hooks.\n");
- goto cleanup_ipv4;
+ pr_err("nf_conntrack_ipv4: can't register icmpv4 proto.\n");
+ goto cleanup_udp4;
}
+
+ ret = nf_ct_l3proto_register(&nf_conntrack_l3proto_ipv4);
+ if (ret < 0) {
+ pr_err("nf_conntrack_ipv4: can't register ipv4 proto.\n");
+ goto cleanup_icmpv4;
+ }
+
#if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
ret = nf_conntrack_ipv4_compat_init();
if (ret < 0)
- goto cleanup_hooks;
+ goto cleanup_proto;
#endif
return ret;
#if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
+ cleanup_proto:
+ nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv4);
+#endif
+ cleanup_icmpv4:
+ nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmp);
+ cleanup_udp4:
+ nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp4);
+ cleanup_tcp4:
+ nf_ct_l4proto_unregister(&nf_conntrack_l4proto_tcp4);
cleanup_hooks:
nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops));
-#endif
- cleanup_ipv4:
- nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv4);
- cleanup_icmp:
- nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_icmp);
- cleanup_udp:
- nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udp4);
- cleanup_tcp:
- nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_tcp4);
+ cleanup_pernet:
+ unregister_pernet_subsys(&ipv4_net_ops);
cleanup_sockopt:
nf_unregister_sockopt(&so_getorigdst);
return ret;
@@ -434,19 +537,14 @@ static void __exit nf_conntrack_l3proto_ipv4_fini(void)
#if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
nf_conntrack_ipv4_compat_fini();
#endif
+ nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv4);
+ nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmp);
+ nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp4);
+ nf_ct_l4proto_unregister(&nf_conntrack_l4proto_tcp4);
nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops));
- nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv4);
- nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_icmp);
- nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udp4);
- nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_tcp4);
+ unregister_pernet_subsys(&ipv4_net_ops);
nf_unregister_sockopt(&so_getorigdst);
}
module_init(nf_conntrack_l3proto_ipv4_init);
module_exit(nf_conntrack_l3proto_ipv4_fini);
-
-void need_ipv4_conntrack(void)
-{
- return;
-}
-EXPORT_SYMBOL_GPL(need_ipv4_conntrack);
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
index 6ba5c557690..4c48e434bb1 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -2,6 +2,7 @@
*
* (C) 1999-2001 Paul `Rusty' Russell
* (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2006-2010 Patrick McHardy <kaber@trash.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -11,6 +12,7 @@
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <linux/percpu.h>
+#include <linux/security.h>
#include <net/net_namespace.h>
#include <linux/netfilter.h>
@@ -19,46 +21,52 @@
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_expect.h>
#include <net/netfilter/nf_conntrack_acct.h>
+#include <linux/rculist_nulls.h>
+#include <linux/export.h>
struct ct_iter_state {
struct seq_net_private p;
unsigned int bucket;
};
-static struct hlist_node *ct_get_first(struct seq_file *seq)
+static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
{
struct net *net = seq_file_net(seq);
struct ct_iter_state *st = seq->private;
- struct hlist_node *n;
+ struct hlist_nulls_node *n;
for (st->bucket = 0;
- st->bucket < nf_conntrack_htable_size;
+ st->bucket < net->ct.htable_size;
st->bucket++) {
- n = rcu_dereference(net->ct.hash[st->bucket].first);
- if (n)
+ n = rcu_dereference(
+ hlist_nulls_first_rcu(&net->ct.hash[st->bucket]));
+ if (!is_a_nulls(n))
return n;
}
return NULL;
}
-static struct hlist_node *ct_get_next(struct seq_file *seq,
- struct hlist_node *head)
+static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
+ struct hlist_nulls_node *head)
{
struct net *net = seq_file_net(seq);
struct ct_iter_state *st = seq->private;
- head = rcu_dereference(head->next);
- while (head == NULL) {
- if (++st->bucket >= nf_conntrack_htable_size)
- return NULL;
- head = rcu_dereference(net->ct.hash[st->bucket].first);
+ head = rcu_dereference(hlist_nulls_next_rcu(head));
+ while (is_a_nulls(head)) {
+ if (likely(get_nulls_value(head) == st->bucket)) {
+ if (++st->bucket >= net->ct.htable_size)
+ return NULL;
+ }
+ head = rcu_dereference(
+ hlist_nulls_first_rcu(&net->ct.hash[st->bucket]));
}
return head;
}
-static struct hlist_node *ct_get_idx(struct seq_file *seq, loff_t pos)
+static struct hlist_nulls_node *ct_get_idx(struct seq_file *seq, loff_t pos)
{
- struct hlist_node *head = ct_get_first(seq);
+ struct hlist_nulls_node *head = ct_get_first(seq);
if (head)
while (pos && (head = ct_get_next(seq, head)))
@@ -85,71 +93,99 @@ static void ct_seq_stop(struct seq_file *s, void *v)
rcu_read_unlock();
}
+#ifdef CONFIG_NF_CONNTRACK_SECMARK
+static int ct_show_secctx(struct seq_file *s, const struct nf_conn *ct)
+{
+ int ret;
+ u32 len;
+ char *secctx;
+
+ ret = security_secid_to_secctx(ct->secmark, &secctx, &len);
+ if (ret)
+ return 0;
+
+ ret = seq_printf(s, "secctx=%s ", secctx);
+
+ security_release_secctx(secctx, len);
+ return ret;
+}
+#else
+static inline int ct_show_secctx(struct seq_file *s, const struct nf_conn *ct)
+{
+ return 0;
+}
+#endif
+
static int ct_seq_show(struct seq_file *s, void *v)
{
- const struct nf_conntrack_tuple_hash *hash = v;
- const struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash);
+ struct nf_conntrack_tuple_hash *hash = v;
+ struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash);
const struct nf_conntrack_l3proto *l3proto;
const struct nf_conntrack_l4proto *l4proto;
+ int ret = 0;
NF_CT_ASSERT(ct);
+ if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use)))
+ return 0;
+
/* we only want to print DIR_ORIGINAL */
if (NF_CT_DIRECTION(hash))
- return 0;
+ goto release;
if (nf_ct_l3num(ct) != AF_INET)
- return 0;
+ goto release;
l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct));
NF_CT_ASSERT(l3proto);
l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
NF_CT_ASSERT(l4proto);
+ ret = -ENOSPC;
if (seq_printf(s, "%-8s %u %ld ",
l4proto->name, nf_ct_protonum(ct),
timer_pending(&ct->timeout)
? (long)(ct->timeout.expires - jiffies)/HZ : 0) != 0)
- return -ENOSPC;
+ goto release;
if (l4proto->print_conntrack && l4proto->print_conntrack(s, ct))
- return -ENOSPC;
+ goto release;
if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
l3proto, l4proto))
- return -ENOSPC;
+ goto release;
if (seq_print_acct(s, ct, IP_CT_DIR_ORIGINAL))
- return -ENOSPC;
+ goto release;
if (!(test_bit(IPS_SEEN_REPLY_BIT, &ct->status)))
if (seq_printf(s, "[UNREPLIED] "))
- return -ENOSPC;
+ goto release;
if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
l3proto, l4proto))
- return -ENOSPC;
+ goto release;
if (seq_print_acct(s, ct, IP_CT_DIR_REPLY))
- return -ENOSPC;
+ goto release;
if (test_bit(IPS_ASSURED_BIT, &ct->status))
if (seq_printf(s, "[ASSURED] "))
- return -ENOSPC;
+ goto release;
#ifdef CONFIG_NF_CONNTRACK_MARK
if (seq_printf(s, "mark=%u ", ct->mark))
- return -ENOSPC;
+ goto release;
#endif
-#ifdef CONFIG_NF_CONNTRACK_SECMARK
- if (seq_printf(s, "secmark=%u ", ct->secmark))
- return -ENOSPC;
-#endif
+ if (ct_show_secctx(s, ct))
+ goto release;
if (seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use)))
- return -ENOSPC;
-
- return 0;
+ goto release;
+ ret = 0;
+release:
+ nf_ct_put(ct);
+ return ret;
}
static const struct seq_operations ct_seq_ops = {
@@ -186,7 +222,8 @@ static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
struct hlist_node *n;
for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
- n = rcu_dereference(net->ct.expect_hash[st->bucket].first);
+ n = rcu_dereference(
+ hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
if (n)
return n;
}
@@ -199,11 +236,12 @@ static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
struct net *net = seq_file_net(seq);
struct ct_expect_iter_state *st = seq->private;
- head = rcu_dereference(head->next);
+ head = rcu_dereference(hlist_next_rcu(head));
while (head == NULL) {
if (++st->bucket >= nf_ct_expect_hsize)
return NULL;
- head = rcu_dereference(net->ct.expect_hash[st->bucket].first);
+ head = rcu_dereference(
+ hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
}
return head;
}
@@ -327,12 +365,12 @@ static int ct_cpu_seq_show(struct seq_file *seq, void *v)
const struct ip_conntrack_stat *st = v;
if (v == SEQ_START_TOKEN) {
- seq_printf(seq, "entries searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete\n");
+ seq_printf(seq, "entries searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete search_restart\n");
return 0;
}
seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x "
- "%08x %08x %08x %08x %08x %08x %08x %08x \n",
+ "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
nr_conntracks,
st->searched,
st->found,
@@ -349,7 +387,8 @@ static int ct_cpu_seq_show(struct seq_file *seq, void *v)
st->expect_new,
st->expect_create,
- st->expect_delete
+ st->expect_delete,
+ st->search_restart
);
return 0;
}
@@ -379,12 +418,12 @@ static int __net_init ip_conntrack_net_init(struct net *net)
{
struct proc_dir_entry *proc, *proc_exp, *proc_stat;
- proc = proc_net_fops_create(net, "ip_conntrack", 0440, &ct_file_ops);
+ proc = proc_create("ip_conntrack", 0440, net->proc_net, &ct_file_ops);
if (!proc)
goto err1;
- proc_exp = proc_net_fops_create(net, "ip_conntrack_expect", 0440,
- &ip_exp_file_ops);
+ proc_exp = proc_create("ip_conntrack_expect", 0440, net->proc_net,
+ &ip_exp_file_ops);
if (!proc_exp)
goto err2;
@@ -395,9 +434,9 @@ static int __net_init ip_conntrack_net_init(struct net *net)
return 0;
err3:
- proc_net_remove(net, "ip_conntrack_expect");
+ remove_proc_entry("ip_conntrack_expect", net->proc_net);
err2:
- proc_net_remove(net, "ip_conntrack");
+ remove_proc_entry("ip_conntrack", net->proc_net);
err1:
return -ENOMEM;
}
@@ -405,8 +444,8 @@ err1:
static void __net_exit ip_conntrack_net_exit(struct net *net)
{
remove_proc_entry("ip_conntrack", net->proc_net_stat);
- proc_net_remove(net, "ip_conntrack_expect");
- proc_net_remove(net, "ip_conntrack");
+ remove_proc_entry("ip_conntrack_expect", net->proc_net);
+ remove_proc_entry("ip_conntrack", net->proc_net);
}
static struct pernet_operations ip_conntrack_net_ops = {
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 2a8bee26f43..a338dad41b7 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -1,5 +1,6 @@
/* (C) 1999-2001 Paul `Rusty' Russell
* (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2006-2010 Patrick McHardy <kaber@trash.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -18,10 +19,16 @@
#include <net/netfilter/nf_conntrack_tuple.h>
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_zones.h>
#include <net/netfilter/nf_log.h>
static unsigned int nf_ct_icmp_timeout __read_mostly = 30*HZ;
+static inline struct nf_icmp_net *icmp_pernet(struct net *net)
+{
+ return &net->ct.nf_ct_proto.icmp;
+}
+
static bool icmp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
struct nf_conntrack_tuple *tuple)
{
@@ -54,8 +61,8 @@ static const u_int8_t invmap[] = {
static bool icmp_invert_tuple(struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_tuple *orig)
{
- if (orig->dst.u.icmp.type >= sizeof(invmap)
- || !invmap[orig->dst.u.icmp.type])
+ if (orig->dst.u.icmp.type >= sizeof(invmap) ||
+ !invmap[orig->dst.u.icmp.type])
return false;
tuple->src.u.icmp.id = orig->src.u.icmp.id;
@@ -74,33 +81,31 @@ static int icmp_print_tuple(struct seq_file *s,
ntohs(tuple->src.u.icmp.id));
}
+static unsigned int *icmp_get_timeouts(struct net *net)
+{
+ return &icmp_pernet(net)->timeout;
+}
+
/* Returns verdict for packet, or -1 for invalid. */
static int icmp_packet(struct nf_conn *ct,
const struct sk_buff *skb,
unsigned int dataoff,
enum ip_conntrack_info ctinfo,
u_int8_t pf,
- unsigned int hooknum)
+ unsigned int hooknum,
+ unsigned int *timeout)
{
- /* Try to delete connection immediately after all replies:
- won't actually vanish as we still have skb, and del_timer
- means this will only run once even if count hits zero twice
- (theoretically possible with SMP) */
- if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) {
- if (atomic_dec_and_test(&ct->proto.icmp.count))
- nf_ct_kill_acct(ct, ctinfo, skb);
- } else {
- atomic_inc(&ct->proto.icmp.count);
- nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, ct);
- nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmp_timeout);
- }
+ /* Do not immediately delete the connection after the first
+ successful reply to avoid excessive conntrackd traffic
+ and also to handle correctly ICMP echo reply duplicates. */
+ nf_ct_refresh_acct(ct, ctinfo, skb, *timeout);
return NF_ACCEPT;
}
/* Called when a new connection for this protocol found. */
static bool icmp_new(struct nf_conn *ct, const struct sk_buff *skb,
- unsigned int dataoff)
+ unsigned int dataoff, unsigned int *timeouts)
{
static const u_int8_t valid_new[] = {
[ICMP_ECHO] = 1,
@@ -109,27 +114,27 @@ static bool icmp_new(struct nf_conn *ct, const struct sk_buff *skb,
[ICMP_ADDRESS] = 1
};
- if (ct->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new)
- || !valid_new[ct->tuplehash[0].tuple.dst.u.icmp.type]) {
+ if (ct->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new) ||
+ !valid_new[ct->tuplehash[0].tuple.dst.u.icmp.type]) {
/* Can't create a new ICMP `conn' with this. */
pr_debug("icmp: can't create new conn with type %u\n",
ct->tuplehash[0].tuple.dst.u.icmp.type);
nf_ct_dump_tuple_ip(&ct->tuplehash[0].tuple);
return false;
}
- atomic_set(&ct->proto.icmp.count, 0);
return true;
}
/* Returns conntrack if it dealt with ICMP, and filled in skb fields */
static int
-icmp_error_message(struct net *net, struct sk_buff *skb,
+icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
enum ip_conntrack_info *ctinfo,
unsigned int hooknum)
{
struct nf_conntrack_tuple innertuple, origtuple;
const struct nf_conntrack_l4proto *innerproto;
const struct nf_conntrack_tuple_hash *h;
+ u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE;
NF_CT_ASSERT(skb->nfct == NULL);
@@ -155,7 +160,7 @@ icmp_error_message(struct net *net, struct sk_buff *skb,
*ctinfo = IP_CT_RELATED;
- h = nf_conntrack_find_get(net, &innertuple);
+ h = nf_conntrack_find_get(net, zone, &innertuple);
if (!h) {
pr_debug("icmp_error_message: no match\n");
return -NF_ACCEPT;
@@ -167,12 +172,13 @@ icmp_error_message(struct net *net, struct sk_buff *skb,
/* Update skb to refer to this connection */
skb->nfct = &nf_ct_tuplehash_to_ctrack(h)->ct_general;
skb->nfctinfo = *ctinfo;
- return -NF_ACCEPT;
+ return NF_ACCEPT;
}
/* Small and modified version of icmp_rcv */
static int
-icmp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff,
+icmp_error(struct net *net, struct nf_conn *tmpl,
+ struct sk_buff *skb, unsigned int dataoff,
enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum)
{
const struct icmphdr *icmph;
@@ -182,8 +188,8 @@ icmp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff,
icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih);
if (icmph == NULL) {
if (LOG_INVALID(net, IPPROTO_ICMP))
- nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
- "nf_ct_icmp: short packet ");
+ nf_log_packet(net, PF_INET, 0, skb, NULL, NULL,
+ NULL, "nf_ct_icmp: short packet ");
return -NF_ACCEPT;
}
@@ -191,7 +197,7 @@ icmp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff,
if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
nf_ip_checksum(skb, hooknum, dataoff, 0)) {
if (LOG_INVALID(net, IPPROTO_ICMP))
- nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
+ nf_log_packet(net, PF_INET, 0, skb, NULL, NULL, NULL,
"nf_ct_icmp: bad HW ICMP checksum ");
return -NF_ACCEPT;
}
@@ -204,20 +210,20 @@ icmp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff,
*/
if (icmph->type > NR_ICMP_TYPES) {
if (LOG_INVALID(net, IPPROTO_ICMP))
- nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
+ nf_log_packet(net, PF_INET, 0, skb, NULL, NULL, NULL,
"nf_ct_icmp: invalid ICMP type ");
return -NF_ACCEPT;
}
/* Need to track icmp error message? */
- if (icmph->type != ICMP_DEST_UNREACH
- && icmph->type != ICMP_SOURCE_QUENCH
- && icmph->type != ICMP_TIME_EXCEEDED
- && icmph->type != ICMP_PARAMETERPROB
- && icmph->type != ICMP_REDIRECT)
+ if (icmph->type != ICMP_DEST_UNREACH &&
+ icmph->type != ICMP_SOURCE_QUENCH &&
+ icmph->type != ICMP_TIME_EXCEEDED &&
+ icmph->type != ICMP_PARAMETERPROB &&
+ icmph->type != ICMP_REDIRECT)
return NF_ACCEPT;
- return icmp_error_message(net, skb, ctinfo, hooknum);
+ return icmp_error_message(net, tmpl, skb, ctinfo, hooknum);
}
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
@@ -228,10 +234,10 @@ icmp_error(struct net *net, struct sk_buff *skb, unsigned int dataoff,
static int icmp_tuple_to_nlattr(struct sk_buff *skb,
const struct nf_conntrack_tuple *t)
{
- NLA_PUT_BE16(skb, CTA_PROTO_ICMP_ID, t->src.u.icmp.id);
- NLA_PUT_U8(skb, CTA_PROTO_ICMP_TYPE, t->dst.u.icmp.type);
- NLA_PUT_U8(skb, CTA_PROTO_ICMP_CODE, t->dst.u.icmp.code);
-
+ if (nla_put_be16(skb, CTA_PROTO_ICMP_ID, t->src.u.icmp.id) ||
+ nla_put_u8(skb, CTA_PROTO_ICMP_TYPE, t->dst.u.icmp.type) ||
+ nla_put_u8(skb, CTA_PROTO_ICMP_CODE, t->dst.u.icmp.code))
+ goto nla_put_failure;
return 0;
nla_put_failure:
@@ -247,53 +253,147 @@ static const struct nla_policy icmp_nla_policy[CTA_PROTO_MAX+1] = {
static int icmp_nlattr_to_tuple(struct nlattr *tb[],
struct nf_conntrack_tuple *tuple)
{
- if (!tb[CTA_PROTO_ICMP_TYPE]
- || !tb[CTA_PROTO_ICMP_CODE]
- || !tb[CTA_PROTO_ICMP_ID])
+ if (!tb[CTA_PROTO_ICMP_TYPE] ||
+ !tb[CTA_PROTO_ICMP_CODE] ||
+ !tb[CTA_PROTO_ICMP_ID])
return -EINVAL;
tuple->dst.u.icmp.type = nla_get_u8(tb[CTA_PROTO_ICMP_TYPE]);
tuple->dst.u.icmp.code = nla_get_u8(tb[CTA_PROTO_ICMP_CODE]);
tuple->src.u.icmp.id = nla_get_be16(tb[CTA_PROTO_ICMP_ID]);
- if (tuple->dst.u.icmp.type >= sizeof(invmap)
- || !invmap[tuple->dst.u.icmp.type])
+ if (tuple->dst.u.icmp.type >= sizeof(invmap) ||
+ !invmap[tuple->dst.u.icmp.type])
return -EINVAL;
return 0;
}
+
+static int icmp_nlattr_tuple_size(void)
+{
+ return nla_policy_len(icmp_nla_policy, CTA_PROTO_MAX + 1);
+}
#endif
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_cttimeout.h>
+
+static int icmp_timeout_nlattr_to_obj(struct nlattr *tb[],
+ struct net *net, void *data)
+{
+ unsigned int *timeout = data;
+ struct nf_icmp_net *in = icmp_pernet(net);
+
+ if (tb[CTA_TIMEOUT_ICMP_TIMEOUT]) {
+ *timeout =
+ ntohl(nla_get_be32(tb[CTA_TIMEOUT_ICMP_TIMEOUT])) * HZ;
+ } else {
+ /* Set default ICMP timeout. */
+ *timeout = in->timeout;
+ }
+ return 0;
+}
+
+static int
+icmp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data)
+{
+ const unsigned int *timeout = data;
+
+ if (nla_put_be32(skb, CTA_TIMEOUT_ICMP_TIMEOUT, htonl(*timeout / HZ)))
+ goto nla_put_failure;
+ return 0;
+
+nla_put_failure:
+ return -ENOSPC;
+}
+
+static const struct nla_policy
+icmp_timeout_nla_policy[CTA_TIMEOUT_ICMP_MAX+1] = {
+ [CTA_TIMEOUT_ICMP_TIMEOUT] = { .type = NLA_U32 },
+};
+#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
+
#ifdef CONFIG_SYSCTL
-static struct ctl_table_header *icmp_sysctl_header;
static struct ctl_table icmp_sysctl_table[] = {
{
.procname = "nf_conntrack_icmp_timeout",
- .data = &nf_ct_icmp_timeout,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
- {
- .ctl_name = 0
- }
+ { }
};
#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
static struct ctl_table icmp_compat_sysctl_table[] = {
{
.procname = "ip_conntrack_icmp_timeout",
- .data = &nf_ct_icmp_timeout,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
- {
- .ctl_name = 0
- }
+ { }
};
#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
#endif /* CONFIG_SYSCTL */
+static int icmp_kmemdup_sysctl_table(struct nf_proto_net *pn,
+ struct nf_icmp_net *in)
+{
+#ifdef CONFIG_SYSCTL
+ pn->ctl_table = kmemdup(icmp_sysctl_table,
+ sizeof(icmp_sysctl_table),
+ GFP_KERNEL);
+ if (!pn->ctl_table)
+ return -ENOMEM;
+
+ pn->ctl_table[0].data = &in->timeout;
+#endif
+ return 0;
+}
+
+static int icmp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn,
+ struct nf_icmp_net *in)
+{
+#ifdef CONFIG_SYSCTL
+#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
+ pn->ctl_compat_table = kmemdup(icmp_compat_sysctl_table,
+ sizeof(icmp_compat_sysctl_table),
+ GFP_KERNEL);
+ if (!pn->ctl_compat_table)
+ return -ENOMEM;
+
+ pn->ctl_compat_table[0].data = &in->timeout;
+#endif
+#endif
+ return 0;
+}
+
+static int icmp_init_net(struct net *net, u_int16_t proto)
+{
+ int ret;
+ struct nf_icmp_net *in = icmp_pernet(net);
+ struct nf_proto_net *pn = &in->pn;
+
+ in->timeout = nf_ct_icmp_timeout;
+
+ ret = icmp_kmemdup_compat_sysctl_table(pn, in);
+ if (ret < 0)
+ return ret;
+
+ ret = icmp_kmemdup_sysctl_table(pn, in);
+ if (ret < 0)
+ nf_ct_kfree_compat_sysctl_table(pn);
+
+ return ret;
+}
+
+static struct nf_proto_net *icmp_get_net_proto(struct net *net)
+{
+ return &net->ct.nf_ct_proto.icmp.pn;
+}
+
struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly =
{
.l3proto = PF_INET,
@@ -303,20 +403,26 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly =
.invert_tuple = icmp_invert_tuple,
.print_tuple = icmp_print_tuple,
.packet = icmp_packet,
+ .get_timeouts = icmp_get_timeouts,
.new = icmp_new,
.error = icmp_error,
.destroy = NULL,
.me = NULL,
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
.tuple_to_nlattr = icmp_tuple_to_nlattr,
+ .nlattr_tuple_size = icmp_nlattr_tuple_size,
.nlattr_to_tuple = icmp_nlattr_to_tuple,
.nla_policy = icmp_nla_policy,
#endif
-#ifdef CONFIG_SYSCTL
- .ctl_table_header = &icmp_sysctl_header,
- .ctl_table = icmp_sysctl_table,
-#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
- .ctl_compat_table = icmp_compat_sysctl_table,
-#endif
-#endif
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
+ .ctnl_timeout = {
+ .nlattr_to_obj = icmp_timeout_nlattr_to_obj,
+ .obj_to_nlattr = icmp_timeout_obj_to_nlattr,
+ .nlattr_max = CTA_TIMEOUT_ICMP_MAX,
+ .obj_size = sizeof(unsigned int),
+ .nla_policy = icmp_timeout_nla_policy,
+ },
+#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
+ .init_net = icmp_init_net,
+ .get_net_proto = icmp_get_net_proto,
};
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index fa2d6b6fc3e..b8f6381c7d0 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -14,10 +14,14 @@
#include <net/route.h>
#include <net/ip.h>
+#include <linux/netfilter_bridge.h>
#include <linux/netfilter_ipv4.h>
#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+#include <net/netfilter/nf_conntrack.h>
+#endif
+#include <net/netfilter/nf_conntrack_zones.h>
-/* Returns new sk_buff, or NULL */
static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
{
int err;
@@ -28,32 +32,62 @@ static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
err = ip_defrag(skb, user);
local_bh_enable();
- if (!err)
+ if (!err) {
ip_send_check(ip_hdr(skb));
+ skb->ignore_df = 1;
+ }
return err;
}
-static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
+static enum ip_defrag_users nf_ct_defrag_user(unsigned int hooknum,
+ struct sk_buff *skb)
+{
+ u16 zone = NF_CT_DEFAULT_ZONE;
+
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+ if (skb->nfct)
+ zone = nf_ct_zone((struct nf_conn *)skb->nfct);
+#endif
+
+#ifdef CONFIG_BRIDGE_NETFILTER
+ if (skb->nf_bridge &&
+ skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)
+ return IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone;
+#endif
+ if (hooknum == NF_INET_PRE_ROUTING)
+ return IP_DEFRAG_CONNTRACK_IN + zone;
+ else
+ return IP_DEFRAG_CONNTRACK_OUT + zone;
+}
+
+static unsigned int ipv4_conntrack_defrag(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
+ struct sock *sk = skb->sk;
+ struct inet_sock *inet = inet_sk(skb->sk);
+
+ if (sk && (sk->sk_family == PF_INET) &&
+ inet->nodefrag)
+ return NF_ACCEPT;
+
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
#if !defined(CONFIG_NF_NAT) && !defined(CONFIG_NF_NAT_MODULE)
/* Previously seen (loopback)? Ignore. Do this before
fragment check. */
- if (skb->nfct)
+ if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct))
return NF_ACCEPT;
#endif
#endif
/* Gather fragments. */
- if (ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)) {
- if (nf_ct_ipv4_gather_frags(skb,
- hooknum == NF_INET_PRE_ROUTING ?
- IP_DEFRAG_CONNTRACK_IN :
- IP_DEFRAG_CONNTRACK_OUT))
+ if (ip_is_fragment(ip_hdr(skb))) {
+ enum ip_defrag_users user =
+ nf_ct_defrag_user(ops->hooknum, skb);
+
+ if (nf_ct_ipv4_gather_frags(skb, user))
return NF_STOLEN;
}
return NF_ACCEPT;
@@ -63,14 +97,14 @@ static struct nf_hook_ops ipv4_defrag_ops[] = {
{
.hook = ipv4_conntrack_defrag,
.owner = THIS_MODULE,
- .pf = PF_INET,
+ .pf = NFPROTO_IPV4,
.hooknum = NF_INET_PRE_ROUTING,
.priority = NF_IP_PRI_CONNTRACK_DEFRAG,
},
{
.hook = ipv4_conntrack_defrag,
.owner = THIS_MODULE,
- .pf = PF_INET,
+ .pf = NFPROTO_IPV4,
.hooknum = NF_INET_LOCAL_OUT,
.priority = NF_IP_PRI_CONNTRACK_DEFRAG,
},
diff --git a/net/ipv4/netfilter/nf_nat_amanda.c b/net/ipv4/netfilter/nf_nat_amanda.c
deleted file mode 100644
index c31b8766825..00000000000
--- a/net/ipv4/netfilter/nf_nat_amanda.c
+++ /dev/null
@@ -1,78 +0,0 @@
-/* Amanda extension for TCP NAT alteration.
- * (C) 2002 by Brian J. Murrell <netfilter@interlinx.bc.ca>
- * based on a copy of HW's ip_nat_irc.c as well as other modules
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/udp.h>
-
-#include <net/netfilter/nf_nat_helper.h>
-#include <net/netfilter/nf_nat_rule.h>
-#include <net/netfilter/nf_conntrack_helper.h>
-#include <net/netfilter/nf_conntrack_expect.h>
-#include <linux/netfilter/nf_conntrack_amanda.h>
-
-MODULE_AUTHOR("Brian J. Murrell <netfilter@interlinx.bc.ca>");
-MODULE_DESCRIPTION("Amanda NAT helper");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("ip_nat_amanda");
-
-static unsigned int help(struct sk_buff *skb,
- enum ip_conntrack_info ctinfo,
- unsigned int matchoff,
- unsigned int matchlen,
- struct nf_conntrack_expect *exp)
-{
- char buffer[sizeof("65535")];
- u_int16_t port;
- unsigned int ret;
-
- /* Connection comes from client. */
- exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
- exp->dir = IP_CT_DIR_ORIGINAL;
-
- /* When you see the packet, we need to NAT it the same as the
- * this one (ie. same IP: it will be TCP and master is UDP). */
- exp->expectfn = nf_nat_follow_master;
-
- /* Try to get same port: if not, try to change it. */
- for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
- exp->tuple.dst.u.tcp.port = htons(port);
- if (nf_ct_expect_related(exp) == 0)
- break;
- }
-
- if (port == 0)
- return NF_DROP;
-
- sprintf(buffer, "%u", port);
- ret = nf_nat_mangle_udp_packet(skb, exp->master, ctinfo,
- matchoff, matchlen,
- buffer, strlen(buffer));
- if (ret != NF_ACCEPT)
- nf_ct_unexpect_related(exp);
- return ret;
-}
-
-static void __exit nf_nat_amanda_fini(void)
-{
- rcu_assign_pointer(nf_nat_amanda_hook, NULL);
- synchronize_rcu();
-}
-
-static int __init nf_nat_amanda_init(void)
-{
- BUG_ON(nf_nat_amanda_hook != NULL);
- rcu_assign_pointer(nf_nat_amanda_hook, help);
- return 0;
-}
-
-module_init(nf_nat_amanda_init);
-module_exit(nf_nat_amanda_fini);
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
deleted file mode 100644
index a65cf692359..00000000000
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ /dev/null
@@ -1,773 +0,0 @@
-/* NAT for netfilter; shared with compatibility layer. */
-
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/timer.h>
-#include <linux/skbuff.h>
-#include <net/checksum.h>
-#include <net/icmp.h>
-#include <net/ip.h>
-#include <net/tcp.h> /* For tcp_prot in getorigdst */
-#include <linux/icmp.h>
-#include <linux/udp.h>
-#include <linux/jhash.h>
-
-#include <linux/netfilter_ipv4.h>
-#include <net/netfilter/nf_conntrack.h>
-#include <net/netfilter/nf_conntrack_core.h>
-#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_protocol.h>
-#include <net/netfilter/nf_nat_core.h>
-#include <net/netfilter/nf_nat_helper.h>
-#include <net/netfilter/nf_conntrack_helper.h>
-#include <net/netfilter/nf_conntrack_l3proto.h>
-#include <net/netfilter/nf_conntrack_l4proto.h>
-
-static DEFINE_SPINLOCK(nf_nat_lock);
-
-static struct nf_conntrack_l3proto *l3proto __read_mostly;
-
-/* Calculated at init based on memory size */
-static unsigned int nf_nat_htable_size __read_mostly;
-
-#define MAX_IP_NAT_PROTO 256
-static const struct nf_nat_protocol *nf_nat_protos[MAX_IP_NAT_PROTO]
- __read_mostly;
-
-static inline const struct nf_nat_protocol *
-__nf_nat_proto_find(u_int8_t protonum)
-{
- return rcu_dereference(nf_nat_protos[protonum]);
-}
-
-const struct nf_nat_protocol *
-nf_nat_proto_find_get(u_int8_t protonum)
-{
- const struct nf_nat_protocol *p;
-
- rcu_read_lock();
- p = __nf_nat_proto_find(protonum);
- if (!try_module_get(p->me))
- p = &nf_nat_unknown_protocol;
- rcu_read_unlock();
-
- return p;
-}
-EXPORT_SYMBOL_GPL(nf_nat_proto_find_get);
-
-void
-nf_nat_proto_put(const struct nf_nat_protocol *p)
-{
- module_put(p->me);
-}
-EXPORT_SYMBOL_GPL(nf_nat_proto_put);
-
-/* We keep an extra hash for each conntrack, for fast searching. */
-static inline unsigned int
-hash_by_src(const struct nf_conntrack_tuple *tuple)
-{
- unsigned int hash;
-
- /* Original src, to ensure we map it consistently if poss. */
- hash = jhash_3words((__force u32)tuple->src.u3.ip,
- (__force u32)tuple->src.u.all,
- tuple->dst.protonum, 0);
- return ((u64)hash * nf_nat_htable_size) >> 32;
-}
-
-/* Is this tuple already taken? (not by us) */
-int
-nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple,
- const struct nf_conn *ignored_conntrack)
-{
- /* Conntrack tracking doesn't keep track of outgoing tuples; only
- incoming ones. NAT means they don't have a fixed mapping,
- so we invert the tuple and look for the incoming reply.
-
- We could keep a separate hash if this proves too slow. */
- struct nf_conntrack_tuple reply;
-
- nf_ct_invert_tuplepr(&reply, tuple);
- return nf_conntrack_tuple_taken(&reply, ignored_conntrack);
-}
-EXPORT_SYMBOL(nf_nat_used_tuple);
-
-/* If we source map this tuple so reply looks like reply_tuple, will
- * that meet the constraints of range. */
-static int
-in_range(const struct nf_conntrack_tuple *tuple,
- const struct nf_nat_range *range)
-{
- const struct nf_nat_protocol *proto;
- int ret = 0;
-
- /* If we are supposed to map IPs, then we must be in the
- range specified, otherwise let this drag us onto a new src IP. */
- if (range->flags & IP_NAT_RANGE_MAP_IPS) {
- if (ntohl(tuple->src.u3.ip) < ntohl(range->min_ip) ||
- ntohl(tuple->src.u3.ip) > ntohl(range->max_ip))
- return 0;
- }
-
- rcu_read_lock();
- proto = __nf_nat_proto_find(tuple->dst.protonum);
- if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) ||
- proto->in_range(tuple, IP_NAT_MANIP_SRC,
- &range->min, &range->max))
- ret = 1;
- rcu_read_unlock();
-
- return ret;
-}
-
-static inline int
-same_src(const struct nf_conn *ct,
- const struct nf_conntrack_tuple *tuple)
-{
- const struct nf_conntrack_tuple *t;
-
- t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
- return (t->dst.protonum == tuple->dst.protonum &&
- t->src.u3.ip == tuple->src.u3.ip &&
- t->src.u.all == tuple->src.u.all);
-}
-
-/* Only called for SRC manip */
-static int
-find_appropriate_src(struct net *net,
- const struct nf_conntrack_tuple *tuple,
- struct nf_conntrack_tuple *result,
- const struct nf_nat_range *range)
-{
- unsigned int h = hash_by_src(tuple);
- const struct nf_conn_nat *nat;
- const struct nf_conn *ct;
- const struct hlist_node *n;
-
- rcu_read_lock();
- hlist_for_each_entry_rcu(nat, n, &net->ipv4.nat_bysource[h], bysource) {
- ct = nat->ct;
- if (same_src(ct, tuple)) {
- /* Copy source part from reply tuple. */
- nf_ct_invert_tuplepr(result,
- &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
- result->dst = tuple->dst;
-
- if (in_range(result, range)) {
- rcu_read_unlock();
- return 1;
- }
- }
- }
- rcu_read_unlock();
- return 0;
-}
-
-/* For [FUTURE] fragmentation handling, we want the least-used
- src-ip/dst-ip/proto triple. Fairness doesn't come into it. Thus
- if the range specifies 1.2.3.4 ports 10000-10005 and 1.2.3.5 ports
- 1-65535, we don't do pro-rata allocation based on ports; we choose
- the ip with the lowest src-ip/dst-ip/proto usage.
-*/
-static void
-find_best_ips_proto(struct nf_conntrack_tuple *tuple,
- const struct nf_nat_range *range,
- const struct nf_conn *ct,
- enum nf_nat_manip_type maniptype)
-{
- __be32 *var_ipp;
- /* Host order */
- u_int32_t minip, maxip, j;
-
- /* No IP mapping? Do nothing. */
- if (!(range->flags & IP_NAT_RANGE_MAP_IPS))
- return;
-
- if (maniptype == IP_NAT_MANIP_SRC)
- var_ipp = &tuple->src.u3.ip;
- else
- var_ipp = &tuple->dst.u3.ip;
-
- /* Fast path: only one choice. */
- if (range->min_ip == range->max_ip) {
- *var_ipp = range->min_ip;
- return;
- }
-
- /* Hashing source and destination IPs gives a fairly even
- * spread in practice (if there are a small number of IPs
- * involved, there usually aren't that many connections
- * anyway). The consistency means that servers see the same
- * client coming from the same IP (some Internet Banking sites
- * like this), even across reboots. */
- minip = ntohl(range->min_ip);
- maxip = ntohl(range->max_ip);
- j = jhash_2words((__force u32)tuple->src.u3.ip,
- (__force u32)tuple->dst.u3.ip, 0);
- j = ((u64)j * (maxip - minip + 1)) >> 32;
- *var_ipp = htonl(minip + j);
-}
-
-/* Manipulate the tuple into the range given. For NF_INET_POST_ROUTING,
- * we change the source to map into the range. For NF_INET_PRE_ROUTING
- * and NF_INET_LOCAL_OUT, we change the destination to map into the
- * range. It might not be possible to get a unique tuple, but we try.
- * At worst (or if we race), we will end up with a final duplicate in
- * __ip_conntrack_confirm and drop the packet. */
-static void
-get_unique_tuple(struct nf_conntrack_tuple *tuple,
- const struct nf_conntrack_tuple *orig_tuple,
- const struct nf_nat_range *range,
- struct nf_conn *ct,
- enum nf_nat_manip_type maniptype)
-{
- struct net *net = nf_ct_net(ct);
- const struct nf_nat_protocol *proto;
-
- /* 1) If this srcip/proto/src-proto-part is currently mapped,
- and that same mapping gives a unique tuple within the given
- range, use that.
-
- This is only required for source (ie. NAT/masq) mappings.
- So far, we don't do local source mappings, so multiple
- manips not an issue. */
- if (maniptype == IP_NAT_MANIP_SRC &&
- !(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) {
- if (find_appropriate_src(net, orig_tuple, tuple, range)) {
- pr_debug("get_unique_tuple: Found current src map\n");
- if (!nf_nat_used_tuple(tuple, ct))
- return;
- }
- }
-
- /* 2) Select the least-used IP/proto combination in the given
- range. */
- *tuple = *orig_tuple;
- find_best_ips_proto(tuple, range, ct, maniptype);
-
- /* 3) The per-protocol part of the manip is made to map into
- the range to make a unique tuple. */
-
- rcu_read_lock();
- proto = __nf_nat_proto_find(orig_tuple->dst.protonum);
-
- /* Change protocol info to have some randomization */
- if (range->flags & IP_NAT_RANGE_PROTO_RANDOM) {
- proto->unique_tuple(tuple, range, maniptype, ct);
- goto out;
- }
-
- /* Only bother mapping if it's not already in range and unique */
- if ((!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) ||
- proto->in_range(tuple, maniptype, &range->min, &range->max)) &&
- !nf_nat_used_tuple(tuple, ct))
- goto out;
-
- /* Last change: get protocol to try to obtain unique tuple. */
- proto->unique_tuple(tuple, range, maniptype, ct);
-out:
- rcu_read_unlock();
-}
-
-unsigned int
-nf_nat_setup_info(struct nf_conn *ct,
- const struct nf_nat_range *range,
- enum nf_nat_manip_type maniptype)
-{
- struct net *net = nf_ct_net(ct);
- struct nf_conntrack_tuple curr_tuple, new_tuple;
- struct nf_conn_nat *nat;
- int have_to_hash = !(ct->status & IPS_NAT_DONE_MASK);
-
- /* nat helper or nfctnetlink also setup binding */
- nat = nfct_nat(ct);
- if (!nat) {
- nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
- if (nat == NULL) {
- pr_debug("failed to add NAT extension\n");
- return NF_ACCEPT;
- }
- }
-
- NF_CT_ASSERT(maniptype == IP_NAT_MANIP_SRC ||
- maniptype == IP_NAT_MANIP_DST);
- BUG_ON(nf_nat_initialized(ct, maniptype));
-
- /* What we've got will look like inverse of reply. Normally
- this is what is in the conntrack, except for prior
- manipulations (future optimization: if num_manips == 0,
- orig_tp =
- conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple) */
- nf_ct_invert_tuplepr(&curr_tuple,
- &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
-
- get_unique_tuple(&new_tuple, &curr_tuple, range, ct, maniptype);
-
- if (!nf_ct_tuple_equal(&new_tuple, &curr_tuple)) {
- struct nf_conntrack_tuple reply;
-
- /* Alter conntrack table so will recognize replies. */
- nf_ct_invert_tuplepr(&reply, &new_tuple);
- nf_conntrack_alter_reply(ct, &reply);
-
- /* Non-atomic: we own this at the moment. */
- if (maniptype == IP_NAT_MANIP_SRC)
- ct->status |= IPS_SRC_NAT;
- else
- ct->status |= IPS_DST_NAT;
- }
-
- /* Place in source hash if this is the first time. */
- if (have_to_hash) {
- unsigned int srchash;
-
- srchash = hash_by_src(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
- spin_lock_bh(&nf_nat_lock);
- /* nf_conntrack_alter_reply might re-allocate exntension aera */
- nat = nfct_nat(ct);
- nat->ct = ct;
- hlist_add_head_rcu(&nat->bysource,
- &net->ipv4.nat_bysource[srchash]);
- spin_unlock_bh(&nf_nat_lock);
- }
-
- /* It's done. */
- if (maniptype == IP_NAT_MANIP_DST)
- set_bit(IPS_DST_NAT_DONE_BIT, &ct->status);
- else
- set_bit(IPS_SRC_NAT_DONE_BIT, &ct->status);
-
- return NF_ACCEPT;
-}
-EXPORT_SYMBOL(nf_nat_setup_info);
-
-/* Returns true if succeeded. */
-static bool
-manip_pkt(u_int16_t proto,
- struct sk_buff *skb,
- unsigned int iphdroff,
- const struct nf_conntrack_tuple *target,
- enum nf_nat_manip_type maniptype)
-{
- struct iphdr *iph;
- const struct nf_nat_protocol *p;
-
- if (!skb_make_writable(skb, iphdroff + sizeof(*iph)))
- return false;
-
- iph = (void *)skb->data + iphdroff;
-
- /* Manipulate protcol part. */
-
- /* rcu_read_lock()ed by nf_hook_slow */
- p = __nf_nat_proto_find(proto);
- if (!p->manip_pkt(skb, iphdroff, target, maniptype))
- return false;
-
- iph = (void *)skb->data + iphdroff;
-
- if (maniptype == IP_NAT_MANIP_SRC) {
- csum_replace4(&iph->check, iph->saddr, target->src.u3.ip);
- iph->saddr = target->src.u3.ip;
- } else {
- csum_replace4(&iph->check, iph->daddr, target->dst.u3.ip);
- iph->daddr = target->dst.u3.ip;
- }
- return true;
-}
-
-/* Do packet manipulations according to nf_nat_setup_info. */
-unsigned int nf_nat_packet(struct nf_conn *ct,
- enum ip_conntrack_info ctinfo,
- unsigned int hooknum,
- struct sk_buff *skb)
-{
- enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
- unsigned long statusbit;
- enum nf_nat_manip_type mtype = HOOK2MANIP(hooknum);
-
- if (mtype == IP_NAT_MANIP_SRC)
- statusbit = IPS_SRC_NAT;
- else
- statusbit = IPS_DST_NAT;
-
- /* Invert if this is reply dir. */
- if (dir == IP_CT_DIR_REPLY)
- statusbit ^= IPS_NAT_MASK;
-
- /* Non-atomic: these bits don't change. */
- if (ct->status & statusbit) {
- struct nf_conntrack_tuple target;
-
- /* We are aiming to look like inverse of other direction. */
- nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
-
- if (!manip_pkt(target.dst.protonum, skb, 0, &target, mtype))
- return NF_DROP;
- }
- return NF_ACCEPT;
-}
-EXPORT_SYMBOL_GPL(nf_nat_packet);
-
-/* Dir is direction ICMP is coming from (opposite to packet it contains) */
-int nf_nat_icmp_reply_translation(struct nf_conn *ct,
- enum ip_conntrack_info ctinfo,
- unsigned int hooknum,
- struct sk_buff *skb)
-{
- struct {
- struct icmphdr icmp;
- struct iphdr ip;
- } *inside;
- const struct nf_conntrack_l4proto *l4proto;
- struct nf_conntrack_tuple inner, target;
- int hdrlen = ip_hdrlen(skb);
- enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
- unsigned long statusbit;
- enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
-
- if (!skb_make_writable(skb, hdrlen + sizeof(*inside)))
- return 0;
-
- inside = (void *)skb->data + ip_hdrlen(skb);
-
- /* We're actually going to mangle it beyond trivial checksum
- adjustment, so make sure the current checksum is correct. */
- if (nf_ip_checksum(skb, hooknum, hdrlen, 0))
- return 0;
-
- /* Must be RELATED */
- NF_CT_ASSERT(skb->nfctinfo == IP_CT_RELATED ||
- skb->nfctinfo == IP_CT_RELATED+IP_CT_IS_REPLY);
-
- /* Redirects on non-null nats must be dropped, else they'll
- start talking to each other without our translation, and be
- confused... --RR */
- if (inside->icmp.type == ICMP_REDIRECT) {
- /* If NAT isn't finished, assume it and drop. */
- if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
- return 0;
-
- if (ct->status & IPS_NAT_MASK)
- return 0;
- }
-
- pr_debug("icmp_reply_translation: translating error %p manip %u "
- "dir %s\n", skb, manip,
- dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY");
-
- /* rcu_read_lock()ed by nf_hook_slow */
- l4proto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol);
-
- if (!nf_ct_get_tuple(skb,
- ip_hdrlen(skb) + sizeof(struct icmphdr),
- (ip_hdrlen(skb) +
- sizeof(struct icmphdr) + inside->ip.ihl * 4),
- (u_int16_t)AF_INET,
- inside->ip.protocol,
- &inner, l3proto, l4proto))
- return 0;
-
- /* Change inner back to look like incoming packet. We do the
- opposite manip on this hook to normal, because it might not
- pass all hooks (locally-generated ICMP). Consider incoming
- packet: PREROUTING (DST manip), routing produces ICMP, goes
- through POSTROUTING (which must correct the DST manip). */
- if (!manip_pkt(inside->ip.protocol, skb,
- ip_hdrlen(skb) + sizeof(inside->icmp),
- &ct->tuplehash[!dir].tuple,
- !manip))
- return 0;
-
- if (skb->ip_summed != CHECKSUM_PARTIAL) {
- /* Reloading "inside" here since manip_pkt inner. */
- inside = (void *)skb->data + ip_hdrlen(skb);
- inside->icmp.checksum = 0;
- inside->icmp.checksum =
- csum_fold(skb_checksum(skb, hdrlen,
- skb->len - hdrlen, 0));
- }
-
- /* Change outer to look the reply to an incoming packet
- * (proto 0 means don't invert per-proto part). */
- if (manip == IP_NAT_MANIP_SRC)
- statusbit = IPS_SRC_NAT;
- else
- statusbit = IPS_DST_NAT;
-
- /* Invert if this is reply dir. */
- if (dir == IP_CT_DIR_REPLY)
- statusbit ^= IPS_NAT_MASK;
-
- if (ct->status & statusbit) {
- nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
- if (!manip_pkt(0, skb, 0, &target, manip))
- return 0;
- }
-
- return 1;
-}
-EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation);
-
-/* Protocol registration. */
-int nf_nat_protocol_register(const struct nf_nat_protocol *proto)
-{
- int ret = 0;
-
- spin_lock_bh(&nf_nat_lock);
- if (nf_nat_protos[proto->protonum] != &nf_nat_unknown_protocol) {
- ret = -EBUSY;
- goto out;
- }
- rcu_assign_pointer(nf_nat_protos[proto->protonum], proto);
- out:
- spin_unlock_bh(&nf_nat_lock);
- return ret;
-}
-EXPORT_SYMBOL(nf_nat_protocol_register);
-
-/* Noone stores the protocol anywhere; simply delete it. */
-void nf_nat_protocol_unregister(const struct nf_nat_protocol *proto)
-{
- spin_lock_bh(&nf_nat_lock);
- rcu_assign_pointer(nf_nat_protos[proto->protonum],
- &nf_nat_unknown_protocol);
- spin_unlock_bh(&nf_nat_lock);
- synchronize_rcu();
-}
-EXPORT_SYMBOL(nf_nat_protocol_unregister);
-
-/* Noone using conntrack by the time this called. */
-static void nf_nat_cleanup_conntrack(struct nf_conn *ct)
-{
- struct nf_conn_nat *nat = nf_ct_ext_find(ct, NF_CT_EXT_NAT);
-
- if (nat == NULL || nat->ct == NULL)
- return;
-
- NF_CT_ASSERT(nat->ct->status & IPS_NAT_DONE_MASK);
-
- spin_lock_bh(&nf_nat_lock);
- hlist_del_rcu(&nat->bysource);
- spin_unlock_bh(&nf_nat_lock);
-}
-
-static void nf_nat_move_storage(void *new, void *old)
-{
- struct nf_conn_nat *new_nat = new;
- struct nf_conn_nat *old_nat = old;
- struct nf_conn *ct = old_nat->ct;
-
- if (!ct || !(ct->status & IPS_NAT_DONE_MASK))
- return;
-
- spin_lock_bh(&nf_nat_lock);
- new_nat->ct = ct;
- hlist_replace_rcu(&old_nat->bysource, &new_nat->bysource);
- spin_unlock_bh(&nf_nat_lock);
-}
-
-static struct nf_ct_ext_type nat_extend __read_mostly = {
- .len = sizeof(struct nf_conn_nat),
- .align = __alignof__(struct nf_conn_nat),
- .destroy = nf_nat_cleanup_conntrack,
- .move = nf_nat_move_storage,
- .id = NF_CT_EXT_NAT,
- .flags = NF_CT_EXT_F_PREALLOC,
-};
-
-#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
-
-#include <linux/netfilter/nfnetlink.h>
-#include <linux/netfilter/nfnetlink_conntrack.h>
-
-static const struct nla_policy protonat_nla_policy[CTA_PROTONAT_MAX+1] = {
- [CTA_PROTONAT_PORT_MIN] = { .type = NLA_U16 },
- [CTA_PROTONAT_PORT_MAX] = { .type = NLA_U16 },
-};
-
-static int nfnetlink_parse_nat_proto(struct nlattr *attr,
- const struct nf_conn *ct,
- struct nf_nat_range *range)
-{
- struct nlattr *tb[CTA_PROTONAT_MAX+1];
- const struct nf_nat_protocol *npt;
- int err;
-
- err = nla_parse_nested(tb, CTA_PROTONAT_MAX, attr, protonat_nla_policy);
- if (err < 0)
- return err;
-
- npt = nf_nat_proto_find_get(nf_ct_protonum(ct));
- if (npt->nlattr_to_range)
- err = npt->nlattr_to_range(tb, range);
- nf_nat_proto_put(npt);
- return err;
-}
-
-static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = {
- [CTA_NAT_MINIP] = { .type = NLA_U32 },
- [CTA_NAT_MAXIP] = { .type = NLA_U32 },
-};
-
-static int
-nfnetlink_parse_nat(struct nlattr *nat,
- const struct nf_conn *ct, struct nf_nat_range *range)
-{
- struct nlattr *tb[CTA_NAT_MAX+1];
- int err;
-
- memset(range, 0, sizeof(*range));
-
- err = nla_parse_nested(tb, CTA_NAT_MAX, nat, nat_nla_policy);
- if (err < 0)
- return err;
-
- if (tb[CTA_NAT_MINIP])
- range->min_ip = nla_get_be32(tb[CTA_NAT_MINIP]);
-
- if (!tb[CTA_NAT_MAXIP])
- range->max_ip = range->min_ip;
- else
- range->max_ip = nla_get_be32(tb[CTA_NAT_MAXIP]);
-
- if (range->min_ip)
- range->flags |= IP_NAT_RANGE_MAP_IPS;
-
- if (!tb[CTA_NAT_PROTO])
- return 0;
-
- err = nfnetlink_parse_nat_proto(tb[CTA_NAT_PROTO], ct, range);
- if (err < 0)
- return err;
-
- return 0;
-}
-
-static int
-nfnetlink_parse_nat_setup(struct nf_conn *ct,
- enum nf_nat_manip_type manip,
- struct nlattr *attr)
-{
- struct nf_nat_range range;
-
- if (nfnetlink_parse_nat(attr, ct, &range) < 0)
- return -EINVAL;
- if (nf_nat_initialized(ct, manip))
- return -EEXIST;
-
- return nf_nat_setup_info(ct, &range, manip);
-}
-#else
-static int
-nfnetlink_parse_nat_setup(struct nf_conn *ct,
- enum nf_nat_manip_type manip,
- struct nlattr *attr)
-{
- return -EOPNOTSUPP;
-}
-#endif
-
-static int __net_init nf_nat_net_init(struct net *net)
-{
- net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size,
- &net->ipv4.nat_vmalloced);
- if (!net->ipv4.nat_bysource)
- return -ENOMEM;
- return 0;
-}
-
-/* Clear NAT section of all conntracks, in case we're loaded again. */
-static int clean_nat(struct nf_conn *i, void *data)
-{
- struct nf_conn_nat *nat = nfct_nat(i);
-
- if (!nat)
- return 0;
- memset(nat, 0, sizeof(*nat));
- i->status &= ~(IPS_NAT_MASK | IPS_NAT_DONE_MASK | IPS_SEQ_ADJUST);
- return 0;
-}
-
-static void __net_exit nf_nat_net_exit(struct net *net)
-{
- nf_ct_iterate_cleanup(net, &clean_nat, NULL);
- synchronize_rcu();
- nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_vmalloced,
- nf_nat_htable_size);
-}
-
-static struct pernet_operations nf_nat_net_ops = {
- .init = nf_nat_net_init,
- .exit = nf_nat_net_exit,
-};
-
-static int __init nf_nat_init(void)
-{
- size_t i;
- int ret;
-
- need_ipv4_conntrack();
-
- ret = nf_ct_extend_register(&nat_extend);
- if (ret < 0) {
- printk(KERN_ERR "nf_nat_core: Unable to register extension\n");
- return ret;
- }
-
- /* Leave them the same for the moment. */
- nf_nat_htable_size = nf_conntrack_htable_size;
-
- ret = register_pernet_subsys(&nf_nat_net_ops);
- if (ret < 0)
- goto cleanup_extend;
-
- /* Sew in builtin protocols. */
- spin_lock_bh(&nf_nat_lock);
- for (i = 0; i < MAX_IP_NAT_PROTO; i++)
- rcu_assign_pointer(nf_nat_protos[i], &nf_nat_unknown_protocol);
- rcu_assign_pointer(nf_nat_protos[IPPROTO_TCP], &nf_nat_protocol_tcp);
- rcu_assign_pointer(nf_nat_protos[IPPROTO_UDP], &nf_nat_protocol_udp);
- rcu_assign_pointer(nf_nat_protos[IPPROTO_ICMP], &nf_nat_protocol_icmp);
- spin_unlock_bh(&nf_nat_lock);
-
- /* Initialize fake conntrack so that NAT will skip it */
- nf_conntrack_untracked.status |= IPS_NAT_DONE_MASK;
-
- l3proto = nf_ct_l3proto_find_get((u_int16_t)AF_INET);
-
- BUG_ON(nf_nat_seq_adjust_hook != NULL);
- rcu_assign_pointer(nf_nat_seq_adjust_hook, nf_nat_seq_adjust);
- BUG_ON(nfnetlink_parse_nat_setup_hook != NULL);
- rcu_assign_pointer(nfnetlink_parse_nat_setup_hook,
- nfnetlink_parse_nat_setup);
- return 0;
-
- cleanup_extend:
- nf_ct_extend_unregister(&nat_extend);
- return ret;
-}
-
-static void __exit nf_nat_cleanup(void)
-{
- unregister_pernet_subsys(&nf_nat_net_ops);
- nf_ct_l3proto_put(l3proto);
- nf_ct_extend_unregister(&nat_extend);
- rcu_assign_pointer(nf_nat_seq_adjust_hook, NULL);
- rcu_assign_pointer(nfnetlink_parse_nat_setup_hook, NULL);
- synchronize_net();
-}
-
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("nf-nat-ipv4");
-
-module_init(nf_nat_init);
-module_exit(nf_nat_cleanup);
diff --git a/net/ipv4/netfilter/nf_nat_ftp.c b/net/ipv4/netfilter/nf_nat_ftp.c
deleted file mode 100644
index a1d5d58a58b..00000000000
--- a/net/ipv4/netfilter/nf_nat_ftp.c
+++ /dev/null
@@ -1,165 +0,0 @@
-/* FTP extension for TCP NAT alteration. */
-
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <linux/netfilter_ipv4.h>
-#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_helper.h>
-#include <net/netfilter/nf_nat_rule.h>
-#include <net/netfilter/nf_conntrack_helper.h>
-#include <net/netfilter/nf_conntrack_expect.h>
-#include <linux/netfilter/nf_conntrack_ftp.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
-MODULE_DESCRIPTION("ftp NAT helper");
-MODULE_ALIAS("ip_nat_ftp");
-
-/* FIXME: Time out? --RR */
-
-static int
-mangle_rfc959_packet(struct sk_buff *skb,
- __be32 newip,
- u_int16_t port,
- unsigned int matchoff,
- unsigned int matchlen,
- struct nf_conn *ct,
- enum ip_conntrack_info ctinfo)
-{
- char buffer[sizeof("nnn,nnn,nnn,nnn,nnn,nnn")];
-
- sprintf(buffer, "%u,%u,%u,%u,%u,%u",
- NIPQUAD(newip), port>>8, port&0xFF);
-
- pr_debug("calling nf_nat_mangle_tcp_packet\n");
-
- return nf_nat_mangle_tcp_packet(skb, ct, ctinfo, matchoff,
- matchlen, buffer, strlen(buffer));
-}
-
-/* |1|132.235.1.2|6275| */
-static int
-mangle_eprt_packet(struct sk_buff *skb,
- __be32 newip,
- u_int16_t port,
- unsigned int matchoff,
- unsigned int matchlen,
- struct nf_conn *ct,
- enum ip_conntrack_info ctinfo)
-{
- char buffer[sizeof("|1|255.255.255.255|65535|")];
-
- sprintf(buffer, "|1|%u.%u.%u.%u|%u|", NIPQUAD(newip), port);
-
- pr_debug("calling nf_nat_mangle_tcp_packet\n");
-
- return nf_nat_mangle_tcp_packet(skb, ct, ctinfo, matchoff,
- matchlen, buffer, strlen(buffer));
-}
-
-/* |1|132.235.1.2|6275| */
-static int
-mangle_epsv_packet(struct sk_buff *skb,
- __be32 newip,
- u_int16_t port,
- unsigned int matchoff,
- unsigned int matchlen,
- struct nf_conn *ct,
- enum ip_conntrack_info ctinfo)
-{
- char buffer[sizeof("|||65535|")];
-
- sprintf(buffer, "|||%u|", port);
-
- pr_debug("calling nf_nat_mangle_tcp_packet\n");
-
- return nf_nat_mangle_tcp_packet(skb, ct, ctinfo, matchoff,
- matchlen, buffer, strlen(buffer));
-}
-
-static int (*mangle[])(struct sk_buff *, __be32, u_int16_t,
- unsigned int, unsigned int, struct nf_conn *,
- enum ip_conntrack_info)
-= {
- [NF_CT_FTP_PORT] = mangle_rfc959_packet,
- [NF_CT_FTP_PASV] = mangle_rfc959_packet,
- [NF_CT_FTP_EPRT] = mangle_eprt_packet,
- [NF_CT_FTP_EPSV] = mangle_epsv_packet
-};
-
-/* So, this packet has hit the connection tracking matching code.
- Mangle it, and change the expectation to match the new version. */
-static unsigned int nf_nat_ftp(struct sk_buff *skb,
- enum ip_conntrack_info ctinfo,
- enum nf_ct_ftp_type type,
- unsigned int matchoff,
- unsigned int matchlen,
- struct nf_conntrack_expect *exp)
-{
- __be32 newip;
- u_int16_t port;
- int dir = CTINFO2DIR(ctinfo);
- struct nf_conn *ct = exp->master;
-
- pr_debug("FTP_NAT: type %i, off %u len %u\n", type, matchoff, matchlen);
-
- /* Connection will come from wherever this packet goes, hence !dir */
- newip = ct->tuplehash[!dir].tuple.dst.u3.ip;
- exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
- exp->dir = !dir;
-
- /* When you see the packet, we need to NAT it the same as the
- * this one. */
- exp->expectfn = nf_nat_follow_master;
-
- /* Try to get same port: if not, try to change it. */
- for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
- exp->tuple.dst.u.tcp.port = htons(port);
- if (nf_ct_expect_related(exp) == 0)
- break;
- }
-
- if (port == 0)
- return NF_DROP;
-
- if (!mangle[type](skb, newip, port, matchoff, matchlen, ct, ctinfo)) {
- nf_ct_unexpect_related(exp);
- return NF_DROP;
- }
- return NF_ACCEPT;
-}
-
-static void __exit nf_nat_ftp_fini(void)
-{
- rcu_assign_pointer(nf_nat_ftp_hook, NULL);
- synchronize_rcu();
-}
-
-static int __init nf_nat_ftp_init(void)
-{
- BUG_ON(nf_nat_ftp_hook != NULL);
- rcu_assign_pointer(nf_nat_ftp_hook, nf_nat_ftp);
- return 0;
-}
-
-/* Prior to 2.6.11, we had a ports param. No longer, but don't break users. */
-static int warn_set(const char *val, struct kernel_param *kp)
-{
- printk(KERN_INFO KBUILD_MODNAME
- ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n");
- return 0;
-}
-module_param_call(ports, warn_set, NULL, NULL, 0);
-
-module_init(nf_nat_ftp_init);
-module_exit(nf_nat_ftp_fini);
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index 7e8e6fc7541..574f7ebba0b 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -2,6 +2,7 @@
* H.323 extension for NAT alteration.
*
* Copyright (c) 2006 Jing Min Zhao <zhaojingmin@users.sourceforge.net>
+ * Copyright (c) 2006-2012 Patrick McHardy <kaber@trash.net>
*
* This source code is licensed under General Public License version 2.
*
@@ -10,19 +11,17 @@
*/
#include <linux/module.h>
-#include <linux/moduleparam.h>
#include <linux/tcp.h>
#include <net/tcp.h>
#include <net/netfilter/nf_nat.h>
#include <net/netfilter/nf_nat_helper.h>
-#include <net/netfilter/nf_nat_rule.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_expect.h>
#include <linux/netfilter/nf_conntrack_h323.h>
/****************************************************************************/
-static int set_addr(struct sk_buff *skb,
+static int set_addr(struct sk_buff *skb, unsigned int protoff,
unsigned char **data, int dataoff,
unsigned int addroff, __be32 ip, __be16 port)
{
@@ -41,11 +40,9 @@ static int set_addr(struct sk_buff *skb,
if (ip_hdr(skb)->protocol == IPPROTO_TCP) {
if (!nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
- addroff, sizeof(buf),
+ protoff, addroff, sizeof(buf),
(char *) &buf, sizeof(buf))) {
- if (net_ratelimit())
- printk("nf_nat_h323: nf_nat_mangle_tcp_packet"
- " error\n");
+ net_notice_ratelimited("nf_nat_h323: nf_nat_mangle_tcp_packet error\n");
return -1;
}
@@ -57,11 +54,9 @@ static int set_addr(struct sk_buff *skb,
*data = skb->data + ip_hdrlen(skb) + th->doff * 4 + dataoff;
} else {
if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo,
- addroff, sizeof(buf),
+ protoff, addroff, sizeof(buf),
(char *) &buf, sizeof(buf))) {
- if (net_ratelimit())
- printk("nf_nat_h323: nf_nat_mangle_udp_packet"
- " error\n");
+ net_notice_ratelimited("nf_nat_h323: nf_nat_mangle_udp_packet error\n");
return -1;
}
/* nf_nat_mangle_udp_packet uses skb_make_writable() to copy
@@ -74,22 +69,22 @@ static int set_addr(struct sk_buff *skb,
}
/****************************************************************************/
-static int set_h225_addr(struct sk_buff *skb,
+static int set_h225_addr(struct sk_buff *skb, unsigned int protoff,
unsigned char **data, int dataoff,
TransportAddress *taddr,
union nf_inet_addr *addr, __be16 port)
{
- return set_addr(skb, data, dataoff, taddr->ipAddress.ip,
+ return set_addr(skb, protoff, data, dataoff, taddr->ipAddress.ip,
addr->ip, port);
}
/****************************************************************************/
-static int set_h245_addr(struct sk_buff *skb,
+static int set_h245_addr(struct sk_buff *skb, unsigned protoff,
unsigned char **data, int dataoff,
H245_TransportAddress *taddr,
union nf_inet_addr *addr, __be16 port)
{
- return set_addr(skb, data, dataoff,
+ return set_addr(skb, protoff, data, dataoff,
taddr->unicastAddress.iPAddress.network,
addr->ip, port);
}
@@ -97,10 +92,10 @@ static int set_h245_addr(struct sk_buff *skb,
/****************************************************************************/
static int set_sig_addr(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
- unsigned char **data,
+ unsigned int protoff, unsigned char **data,
TransportAddress *taddr, int count)
{
- const struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info;
+ const struct nf_ct_h323_master *info = nfct_help_data(ct);
int dir = CTINFO2DIR(ctinfo);
int i;
__be16 port;
@@ -123,7 +118,8 @@ static int set_sig_addr(struct sk_buff *skb, struct nf_conn *ct,
&addr.ip, port,
&ct->tuplehash[!dir].tuple.dst.u3.ip,
info->sig_port[!dir]);
- return set_h225_addr(skb, data, 0, &taddr[i],
+ return set_h225_addr(skb, protoff, data, 0,
+ &taddr[i],
&ct->tuplehash[!dir].
tuple.dst.u3,
info->sig_port[!dir]);
@@ -134,7 +130,8 @@ static int set_sig_addr(struct sk_buff *skb, struct nf_conn *ct,
&addr.ip, port,
&ct->tuplehash[!dir].tuple.src.u3.ip,
info->sig_port[!dir]);
- return set_h225_addr(skb, data, 0, &taddr[i],
+ return set_h225_addr(skb, protoff, data, 0,
+ &taddr[i],
&ct->tuplehash[!dir].
tuple.src.u3,
info->sig_port[!dir]);
@@ -148,7 +145,7 @@ static int set_sig_addr(struct sk_buff *skb, struct nf_conn *ct,
/****************************************************************************/
static int set_ras_addr(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
- unsigned char **data,
+ unsigned int protoff, unsigned char **data,
TransportAddress *taddr, int count)
{
int dir = CTINFO2DIR(ctinfo);
@@ -164,7 +161,7 @@ static int set_ras_addr(struct sk_buff *skb, struct nf_conn *ct,
&addr.ip, ntohs(port),
&ct->tuplehash[!dir].tuple.dst.u3.ip,
ntohs(ct->tuplehash[!dir].tuple.dst.u.udp.port));
- return set_h225_addr(skb, data, 0, &taddr[i],
+ return set_h225_addr(skb, protoff, data, 0, &taddr[i],
&ct->tuplehash[!dir].tuple.dst.u3,
ct->tuplehash[!dir].tuple.
dst.u.udp.port);
@@ -177,13 +174,13 @@ static int set_ras_addr(struct sk_buff *skb, struct nf_conn *ct,
/****************************************************************************/
static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
- unsigned char **data, int dataoff,
+ unsigned int protoff, unsigned char **data, int dataoff,
H245_TransportAddress *taddr,
__be16 port, __be16 rtp_port,
struct nf_conntrack_expect *rtp_exp,
struct nf_conntrack_expect *rtcp_exp)
{
- struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info;
+ struct nf_ct_h323_master *info = nfct_help_data(ct);
int dir = CTINFO2DIR(ctinfo);
int i;
u_int16_t nated_port;
@@ -215,32 +212,44 @@ static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
/* Run out of expectations */
if (i >= H323_RTP_CHANNEL_MAX) {
- if (net_ratelimit())
- printk("nf_nat_h323: out of expectations\n");
+ net_notice_ratelimited("nf_nat_h323: out of expectations\n");
return 0;
}
/* Try to get a pair of ports. */
for (nated_port = ntohs(rtp_exp->tuple.dst.u.udp.port);
nated_port != 0; nated_port += 2) {
+ int ret;
+
rtp_exp->tuple.dst.u.udp.port = htons(nated_port);
- if (nf_ct_expect_related(rtp_exp) == 0) {
+ ret = nf_ct_expect_related(rtp_exp);
+ if (ret == 0) {
rtcp_exp->tuple.dst.u.udp.port =
htons(nated_port + 1);
- if (nf_ct_expect_related(rtcp_exp) == 0)
+ ret = nf_ct_expect_related(rtcp_exp);
+ if (ret == 0)
+ break;
+ else if (ret == -EBUSY) {
+ nf_ct_unexpect_related(rtp_exp);
+ continue;
+ } else if (ret < 0) {
+ nf_ct_unexpect_related(rtp_exp);
+ nated_port = 0;
break;
- nf_ct_unexpect_related(rtp_exp);
+ }
+ } else if (ret != -EBUSY) {
+ nated_port = 0;
+ break;
}
}
if (nated_port == 0) { /* No port available */
- if (net_ratelimit())
- printk("nf_nat_h323: out of RTP ports\n");
+ net_notice_ratelimited("nf_nat_h323: out of RTP ports\n");
return 0;
}
/* Modify signal */
- if (set_h245_addr(skb, data, dataoff, taddr,
+ if (set_h245_addr(skb, protoff, data, dataoff, taddr,
&ct->tuplehash[!dir].tuple.dst.u3,
htons((port & htons(1)) ? nated_port + 1 :
nated_port)) == 0) {
@@ -271,7 +280,7 @@ static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
/****************************************************************************/
static int nat_t120(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
- unsigned char **data, int dataoff,
+ unsigned int protoff, unsigned char **data, int dataoff,
H245_TransportAddress *taddr, __be16 port,
struct nf_conntrack_expect *exp)
{
@@ -285,19 +294,25 @@ static int nat_t120(struct sk_buff *skb, struct nf_conn *ct,
/* Try to get same port: if not, try to change it. */
for (; nated_port != 0; nated_port++) {
+ int ret;
+
exp->tuple.dst.u.tcp.port = htons(nated_port);
- if (nf_ct_expect_related(exp) == 0)
+ ret = nf_ct_expect_related(exp);
+ if (ret == 0)
+ break;
+ else if (ret != -EBUSY) {
+ nated_port = 0;
break;
+ }
}
if (nated_port == 0) { /* No port available */
- if (net_ratelimit())
- printk("nf_nat_h323: out of TCP ports\n");
+ net_notice_ratelimited("nf_nat_h323: out of TCP ports\n");
return 0;
}
/* Modify signal */
- if (set_h245_addr(skb, data, dataoff, taddr,
+ if (set_h245_addr(skb, protoff, data, dataoff, taddr,
&ct->tuplehash[!dir].tuple.dst.u3,
htons(nated_port)) < 0) {
nf_ct_unexpect_related(exp);
@@ -316,11 +331,11 @@ static int nat_t120(struct sk_buff *skb, struct nf_conn *ct,
/****************************************************************************/
static int nat_h245(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
- unsigned char **data, int dataoff,
+ unsigned int protoff, unsigned char **data, int dataoff,
TransportAddress *taddr, __be16 port,
struct nf_conntrack_expect *exp)
{
- struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info;
+ struct nf_ct_h323_master *info = nfct_help_data(ct);
int dir = CTINFO2DIR(ctinfo);
u_int16_t nated_port = ntohs(port);
@@ -335,19 +350,25 @@ static int nat_h245(struct sk_buff *skb, struct nf_conn *ct,
/* Try to get same port: if not, try to change it. */
for (; nated_port != 0; nated_port++) {
+ int ret;
+
exp->tuple.dst.u.tcp.port = htons(nated_port);
- if (nf_ct_expect_related(exp) == 0)
+ ret = nf_ct_expect_related(exp);
+ if (ret == 0)
break;
+ else if (ret != -EBUSY) {
+ nated_port = 0;
+ break;
+ }
}
if (nated_port == 0) { /* No port available */
- if (net_ratelimit())
- printk("nf_nat_q931: out of TCP ports\n");
+ net_notice_ratelimited("nf_nat_q931: out of TCP ports\n");
return 0;
}
/* Modify signal */
- if (set_h225_addr(skb, data, dataoff, taddr,
+ if (set_h225_addr(skb, protoff, data, dataoff, taddr,
&ct->tuplehash[!dir].tuple.dst.u3,
htons(nated_port)) == 0) {
/* Save ports */
@@ -385,25 +406,27 @@ static void ip_nat_q931_expect(struct nf_conn *new,
BUG_ON(new->status & IPS_NAT_DONE_MASK);
/* Change src to where master sends to */
- range.flags = IP_NAT_RANGE_MAP_IPS;
- range.min_ip = range.max_ip = new->tuplehash[!this->dir].tuple.src.u3.ip;
- nf_nat_setup_info(new, &range, IP_NAT_MANIP_SRC);
+ range.flags = NF_NAT_RANGE_MAP_IPS;
+ range.min_addr = range.max_addr =
+ new->tuplehash[!this->dir].tuple.src.u3;
+ nf_nat_setup_info(new, &range, NF_NAT_MANIP_SRC);
/* For DST manip, map port here to where it's expected. */
- range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
- range.min = range.max = this->saved_proto;
- range.min_ip = range.max_ip =
- new->master->tuplehash[!this->dir].tuple.src.u3.ip;
- nf_nat_setup_info(new, &range, IP_NAT_MANIP_DST);
+ range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED);
+ range.min_proto = range.max_proto = this->saved_proto;
+ range.min_addr = range.max_addr =
+ new->master->tuplehash[!this->dir].tuple.src.u3;
+ nf_nat_setup_info(new, &range, NF_NAT_MANIP_DST);
}
/****************************************************************************/
static int nat_q931(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
- unsigned char **data, TransportAddress *taddr, int idx,
+ unsigned int protoff, unsigned char **data,
+ TransportAddress *taddr, int idx,
__be16 port, struct nf_conntrack_expect *exp)
{
- struct nf_ct_h323_master *info = &nfct_help(ct)->help.ct_h323_info;
+ struct nf_ct_h323_master *info = nfct_help_data(ct);
int dir = CTINFO2DIR(ctinfo);
u_int16_t nated_port = ntohs(port);
union nf_inet_addr addr;
@@ -419,19 +442,25 @@ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct,
/* Try to get same port: if not, try to change it. */
for (; nated_port != 0; nated_port++) {
+ int ret;
+
exp->tuple.dst.u.tcp.port = htons(nated_port);
- if (nf_ct_expect_related(exp) == 0)
+ ret = nf_ct_expect_related(exp);
+ if (ret == 0)
break;
+ else if (ret != -EBUSY) {
+ nated_port = 0;
+ break;
+ }
}
if (nated_port == 0) { /* No port available */
- if (net_ratelimit())
- printk("nf_nat_ras: out of TCP ports\n");
+ net_notice_ratelimited("nf_nat_ras: out of TCP ports\n");
return 0;
}
/* Modify signal */
- if (set_h225_addr(skb, data, 0, &taddr[idx],
+ if (set_h225_addr(skb, protoff, data, 0, &taddr[idx],
&ct->tuplehash[!dir].tuple.dst.u3,
htons(nated_port)) == 0) {
/* Save ports */
@@ -442,7 +471,7 @@ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct,
if (idx > 0 &&
get_h225_addr(ct, *data, &taddr[0], &addr, &port) &&
(ntohl(addr.ip) & 0xff000000) == 0x7f000000) {
- set_h225_addr(skb, data, 0, &taddr[0],
+ set_h225_addr(skb, protoff, data, 0, &taddr[0],
&ct->tuplehash[!dir].tuple.dst.u3,
info->sig_port[!dir]);
}
@@ -471,20 +500,22 @@ static void ip_nat_callforwarding_expect(struct nf_conn *new,
BUG_ON(new->status & IPS_NAT_DONE_MASK);
/* Change src to where master sends to */
- range.flags = IP_NAT_RANGE_MAP_IPS;
- range.min_ip = range.max_ip = new->tuplehash[!this->dir].tuple.src.u3.ip;
- nf_nat_setup_info(new, &range, IP_NAT_MANIP_SRC);
+ range.flags = NF_NAT_RANGE_MAP_IPS;
+ range.min_addr = range.max_addr =
+ new->tuplehash[!this->dir].tuple.src.u3;
+ nf_nat_setup_info(new, &range, NF_NAT_MANIP_SRC);
/* For DST manip, map port here to where it's expected. */
- range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
- range.min = range.max = this->saved_proto;
- range.min_ip = range.max_ip = this->saved_ip;
- nf_nat_setup_info(new, &range, IP_NAT_MANIP_DST);
+ range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED);
+ range.min_proto = range.max_proto = this->saved_proto;
+ range.min_addr = range.max_addr = this->saved_addr;
+ nf_nat_setup_info(new, &range, NF_NAT_MANIP_DST);
}
/****************************************************************************/
static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
+ unsigned int protoff,
unsigned char **data, int dataoff,
TransportAddress *taddr, __be16 port,
struct nf_conntrack_expect *exp)
@@ -493,7 +524,7 @@ static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct,
u_int16_t nated_port;
/* Set expectations for NAT */
- exp->saved_ip = exp->tuple.dst.u3.ip;
+ exp->saved_addr = exp->tuple.dst.u3;
exp->tuple.dst.u3.ip = ct->tuplehash[!dir].tuple.dst.u3.ip;
exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
exp->expectfn = ip_nat_callforwarding_expect;
@@ -501,19 +532,25 @@ static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct,
/* Try to get same port: if not, try to change it. */
for (nated_port = ntohs(port); nated_port != 0; nated_port++) {
+ int ret;
+
exp->tuple.dst.u.tcp.port = htons(nated_port);
- if (nf_ct_expect_related(exp) == 0)
+ ret = nf_ct_expect_related(exp);
+ if (ret == 0)
+ break;
+ else if (ret != -EBUSY) {
+ nated_port = 0;
break;
+ }
}
if (nated_port == 0) { /* No port available */
- if (net_ratelimit())
- printk("nf_nat_q931: out of TCP ports\n");
+ net_notice_ratelimited("nf_nat_q931: out of TCP ports\n");
return 0;
}
/* Modify signal */
- if (!set_h225_addr(skb, data, dataoff, taddr,
+ if (!set_h225_addr(skb, protoff, data, dataoff, taddr,
&ct->tuplehash[!dir].tuple.dst.u3,
htons(nated_port)) == 0) {
nf_ct_unexpect_related(exp);
@@ -530,6 +567,16 @@ static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct,
return 0;
}
+static struct nf_ct_helper_expectfn q931_nat = {
+ .name = "Q.931",
+ .expectfn = ip_nat_q931_expect,
+};
+
+static struct nf_ct_helper_expectfn callforwarding_nat = {
+ .name = "callforwarding",
+ .expectfn = ip_nat_callforwarding_expect,
+};
+
/****************************************************************************/
static int __init init(void)
{
@@ -543,30 +590,34 @@ static int __init init(void)
BUG_ON(nat_callforwarding_hook != NULL);
BUG_ON(nat_q931_hook != NULL);
- rcu_assign_pointer(set_h245_addr_hook, set_h245_addr);
- rcu_assign_pointer(set_h225_addr_hook, set_h225_addr);
- rcu_assign_pointer(set_sig_addr_hook, set_sig_addr);
- rcu_assign_pointer(set_ras_addr_hook, set_ras_addr);
- rcu_assign_pointer(nat_rtp_rtcp_hook, nat_rtp_rtcp);
- rcu_assign_pointer(nat_t120_hook, nat_t120);
- rcu_assign_pointer(nat_h245_hook, nat_h245);
- rcu_assign_pointer(nat_callforwarding_hook, nat_callforwarding);
- rcu_assign_pointer(nat_q931_hook, nat_q931);
+ RCU_INIT_POINTER(set_h245_addr_hook, set_h245_addr);
+ RCU_INIT_POINTER(set_h225_addr_hook, set_h225_addr);
+ RCU_INIT_POINTER(set_sig_addr_hook, set_sig_addr);
+ RCU_INIT_POINTER(set_ras_addr_hook, set_ras_addr);
+ RCU_INIT_POINTER(nat_rtp_rtcp_hook, nat_rtp_rtcp);
+ RCU_INIT_POINTER(nat_t120_hook, nat_t120);
+ RCU_INIT_POINTER(nat_h245_hook, nat_h245);
+ RCU_INIT_POINTER(nat_callforwarding_hook, nat_callforwarding);
+ RCU_INIT_POINTER(nat_q931_hook, nat_q931);
+ nf_ct_helper_expectfn_register(&q931_nat);
+ nf_ct_helper_expectfn_register(&callforwarding_nat);
return 0;
}
/****************************************************************************/
static void __exit fini(void)
{
- rcu_assign_pointer(set_h245_addr_hook, NULL);
- rcu_assign_pointer(set_h225_addr_hook, NULL);
- rcu_assign_pointer(set_sig_addr_hook, NULL);
- rcu_assign_pointer(set_ras_addr_hook, NULL);
- rcu_assign_pointer(nat_rtp_rtcp_hook, NULL);
- rcu_assign_pointer(nat_t120_hook, NULL);
- rcu_assign_pointer(nat_h245_hook, NULL);
- rcu_assign_pointer(nat_callforwarding_hook, NULL);
- rcu_assign_pointer(nat_q931_hook, NULL);
+ RCU_INIT_POINTER(set_h245_addr_hook, NULL);
+ RCU_INIT_POINTER(set_h225_addr_hook, NULL);
+ RCU_INIT_POINTER(set_sig_addr_hook, NULL);
+ RCU_INIT_POINTER(set_ras_addr_hook, NULL);
+ RCU_INIT_POINTER(nat_rtp_rtcp_hook, NULL);
+ RCU_INIT_POINTER(nat_t120_hook, NULL);
+ RCU_INIT_POINTER(nat_h245_hook, NULL);
+ RCU_INIT_POINTER(nat_callforwarding_hook, NULL);
+ RCU_INIT_POINTER(nat_q931_hook, NULL);
+ nf_ct_helper_expectfn_unregister(&q931_nat);
+ nf_ct_helper_expectfn_unregister(&callforwarding_nat);
synchronize_rcu();
}
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c
deleted file mode 100644
index cf7a42bf982..00000000000
--- a/net/ipv4/netfilter/nf_nat_helper.c
+++ /dev/null
@@ -1,444 +0,0 @@
-/* ip_nat_helper.c - generic support functions for NAT helpers
- *
- * (C) 2000-2002 Harald Welte <laforge@netfilter.org>
- * (C) 2003-2006 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#include <linux/module.h>
-#include <linux/kmod.h>
-#include <linux/types.h>
-#include <linux/timer.h>
-#include <linux/skbuff.h>
-#include <linux/tcp.h>
-#include <linux/udp.h>
-#include <net/checksum.h>
-#include <net/tcp.h>
-#include <net/route.h>
-
-#include <linux/netfilter_ipv4.h>
-#include <net/netfilter/nf_conntrack.h>
-#include <net/netfilter/nf_conntrack_helper.h>
-#include <net/netfilter/nf_conntrack_ecache.h>
-#include <net/netfilter/nf_conntrack_expect.h>
-#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_protocol.h>
-#include <net/netfilter/nf_nat_core.h>
-#include <net/netfilter/nf_nat_helper.h>
-
-#define DUMP_OFFSET(x) \
- pr_debug("offset_before=%d, offset_after=%d, correction_pos=%u\n", \
- x->offset_before, x->offset_after, x->correction_pos);
-
-static DEFINE_SPINLOCK(nf_nat_seqofs_lock);
-
-/* Setup TCP sequence correction given this change at this sequence */
-static inline void
-adjust_tcp_sequence(u32 seq,
- int sizediff,
- struct nf_conn *ct,
- enum ip_conntrack_info ctinfo)
-{
- int dir;
- struct nf_nat_seq *this_way, *other_way;
- struct nf_conn_nat *nat = nfct_nat(ct);
-
- pr_debug("adjust_tcp_sequence: seq = %u, sizediff = %d\n", seq, seq);
-
- dir = CTINFO2DIR(ctinfo);
-
- this_way = &nat->seq[dir];
- other_way = &nat->seq[!dir];
-
- pr_debug("nf_nat_resize_packet: Seq_offset before: ");
- DUMP_OFFSET(this_way);
-
- spin_lock_bh(&nf_nat_seqofs_lock);
-
- /* SYN adjust. If it's uninitialized, or this is after last
- * correction, record it: we don't handle more than one
- * adjustment in the window, but do deal with common case of a
- * retransmit */
- if (this_way->offset_before == this_way->offset_after ||
- before(this_way->correction_pos, seq)) {
- this_way->correction_pos = seq;
- this_way->offset_before = this_way->offset_after;
- this_way->offset_after += sizediff;
- }
- spin_unlock_bh(&nf_nat_seqofs_lock);
-
- pr_debug("nf_nat_resize_packet: Seq_offset after: ");
- DUMP_OFFSET(this_way);
-}
-
-/* Frobs data inside this packet, which is linear. */
-static void mangle_contents(struct sk_buff *skb,
- unsigned int dataoff,
- unsigned int match_offset,
- unsigned int match_len,
- const char *rep_buffer,
- unsigned int rep_len)
-{
- unsigned char *data;
-
- BUG_ON(skb_is_nonlinear(skb));
- data = skb_network_header(skb) + dataoff;
-
- /* move post-replacement */
- memmove(data + match_offset + rep_len,
- data + match_offset + match_len,
- skb->tail - (skb->network_header + dataoff +
- match_offset + match_len));
-
- /* insert data from buffer */
- memcpy(data + match_offset, rep_buffer, rep_len);
-
- /* update skb info */
- if (rep_len > match_len) {
- pr_debug("nf_nat_mangle_packet: Extending packet by "
- "%u from %u bytes\n", rep_len - match_len, skb->len);
- skb_put(skb, rep_len - match_len);
- } else {
- pr_debug("nf_nat_mangle_packet: Shrinking packet from "
- "%u from %u bytes\n", match_len - rep_len, skb->len);
- __skb_trim(skb, skb->len + rep_len - match_len);
- }
-
- /* fix IP hdr checksum information */
- ip_hdr(skb)->tot_len = htons(skb->len);
- ip_send_check(ip_hdr(skb));
-}
-
-/* Unusual, but possible case. */
-static int enlarge_skb(struct sk_buff *skb, unsigned int extra)
-{
- if (skb->len + extra > 65535)
- return 0;
-
- if (pskb_expand_head(skb, 0, extra - skb_tailroom(skb), GFP_ATOMIC))
- return 0;
-
- return 1;
-}
-
-/* Generic function for mangling variable-length address changes inside
- * NATed TCP connections (like the PORT XXX,XXX,XXX,XXX,XXX,XXX
- * command in FTP).
- *
- * Takes care about all the nasty sequence number changes, checksumming,
- * skb enlargement, ...
- *
- * */
-int
-nf_nat_mangle_tcp_packet(struct sk_buff *skb,
- struct nf_conn *ct,
- enum ip_conntrack_info ctinfo,
- unsigned int match_offset,
- unsigned int match_len,
- const char *rep_buffer,
- unsigned int rep_len)
-{
- struct rtable *rt = skb->rtable;
- struct iphdr *iph;
- struct tcphdr *tcph;
- int oldlen, datalen;
-
- if (!skb_make_writable(skb, skb->len))
- return 0;
-
- if (rep_len > match_len &&
- rep_len - match_len > skb_tailroom(skb) &&
- !enlarge_skb(skb, rep_len - match_len))
- return 0;
-
- SKB_LINEAR_ASSERT(skb);
-
- iph = ip_hdr(skb);
- tcph = (void *)iph + iph->ihl*4;
-
- oldlen = skb->len - iph->ihl*4;
- mangle_contents(skb, iph->ihl*4 + tcph->doff*4,
- match_offset, match_len, rep_buffer, rep_len);
-
- datalen = skb->len - iph->ihl*4;
- if (skb->ip_summed != CHECKSUM_PARTIAL) {
- if (!(rt->rt_flags & RTCF_LOCAL) &&
- skb->dev->features & NETIF_F_V4_CSUM) {
- skb->ip_summed = CHECKSUM_PARTIAL;
- skb->csum_start = skb_headroom(skb) +
- skb_network_offset(skb) +
- iph->ihl * 4;
- skb->csum_offset = offsetof(struct tcphdr, check);
- tcph->check = ~tcp_v4_check(datalen,
- iph->saddr, iph->daddr, 0);
- } else {
- tcph->check = 0;
- tcph->check = tcp_v4_check(datalen,
- iph->saddr, iph->daddr,
- csum_partial(tcph,
- datalen, 0));
- }
- } else
- inet_proto_csum_replace2(&tcph->check, skb,
- htons(oldlen), htons(datalen), 1);
-
- if (rep_len != match_len) {
- set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
- adjust_tcp_sequence(ntohl(tcph->seq),
- (int)rep_len - (int)match_len,
- ct, ctinfo);
- /* Tell TCP window tracking about seq change */
- nf_conntrack_tcp_update(skb, ip_hdrlen(skb),
- ct, CTINFO2DIR(ctinfo));
-
- nf_conntrack_event_cache(IPCT_NATSEQADJ, ct);
- }
- return 1;
-}
-EXPORT_SYMBOL(nf_nat_mangle_tcp_packet);
-
-/* Generic function for mangling variable-length address changes inside
- * NATed UDP connections (like the CONNECT DATA XXXXX MESG XXXXX INDEX XXXXX
- * command in the Amanda protocol)
- *
- * Takes care about all the nasty sequence number changes, checksumming,
- * skb enlargement, ...
- *
- * XXX - This function could be merged with nf_nat_mangle_tcp_packet which
- * should be fairly easy to do.
- */
-int
-nf_nat_mangle_udp_packet(struct sk_buff *skb,
- struct nf_conn *ct,
- enum ip_conntrack_info ctinfo,
- unsigned int match_offset,
- unsigned int match_len,
- const char *rep_buffer,
- unsigned int rep_len)
-{
- struct rtable *rt = skb->rtable;
- struct iphdr *iph;
- struct udphdr *udph;
- int datalen, oldlen;
-
- /* UDP helpers might accidentally mangle the wrong packet */
- iph = ip_hdr(skb);
- if (skb->len < iph->ihl*4 + sizeof(*udph) +
- match_offset + match_len)
- return 0;
-
- if (!skb_make_writable(skb, skb->len))
- return 0;
-
- if (rep_len > match_len &&
- rep_len - match_len > skb_tailroom(skb) &&
- !enlarge_skb(skb, rep_len - match_len))
- return 0;
-
- iph = ip_hdr(skb);
- udph = (void *)iph + iph->ihl*4;
-
- oldlen = skb->len - iph->ihl*4;
- mangle_contents(skb, iph->ihl*4 + sizeof(*udph),
- match_offset, match_len, rep_buffer, rep_len);
-
- /* update the length of the UDP packet */
- datalen = skb->len - iph->ihl*4;
- udph->len = htons(datalen);
-
- /* fix udp checksum if udp checksum was previously calculated */
- if (!udph->check && skb->ip_summed != CHECKSUM_PARTIAL)
- return 1;
-
- if (skb->ip_summed != CHECKSUM_PARTIAL) {
- if (!(rt->rt_flags & RTCF_LOCAL) &&
- skb->dev->features & NETIF_F_V4_CSUM) {
- skb->ip_summed = CHECKSUM_PARTIAL;
- skb->csum_start = skb_headroom(skb) +
- skb_network_offset(skb) +
- iph->ihl * 4;
- skb->csum_offset = offsetof(struct udphdr, check);
- udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
- datalen, IPPROTO_UDP,
- 0);
- } else {
- udph->check = 0;
- udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
- datalen, IPPROTO_UDP,
- csum_partial(udph,
- datalen, 0));
- if (!udph->check)
- udph->check = CSUM_MANGLED_0;
- }
- } else
- inet_proto_csum_replace2(&udph->check, skb,
- htons(oldlen), htons(datalen), 1);
-
- return 1;
-}
-EXPORT_SYMBOL(nf_nat_mangle_udp_packet);
-
-/* Adjust one found SACK option including checksum correction */
-static void
-sack_adjust(struct sk_buff *skb,
- struct tcphdr *tcph,
- unsigned int sackoff,
- unsigned int sackend,
- struct nf_nat_seq *natseq)
-{
- while (sackoff < sackend) {
- struct tcp_sack_block_wire *sack;
- __be32 new_start_seq, new_end_seq;
-
- sack = (void *)skb->data + sackoff;
- if (after(ntohl(sack->start_seq) - natseq->offset_before,
- natseq->correction_pos))
- new_start_seq = htonl(ntohl(sack->start_seq)
- - natseq->offset_after);
- else
- new_start_seq = htonl(ntohl(sack->start_seq)
- - natseq->offset_before);
-
- if (after(ntohl(sack->end_seq) - natseq->offset_before,
- natseq->correction_pos))
- new_end_seq = htonl(ntohl(sack->end_seq)
- - natseq->offset_after);
- else
- new_end_seq = htonl(ntohl(sack->end_seq)
- - natseq->offset_before);
-
- pr_debug("sack_adjust: start_seq: %d->%d, end_seq: %d->%d\n",
- ntohl(sack->start_seq), new_start_seq,
- ntohl(sack->end_seq), new_end_seq);
-
- inet_proto_csum_replace4(&tcph->check, skb,
- sack->start_seq, new_start_seq, 0);
- inet_proto_csum_replace4(&tcph->check, skb,
- sack->end_seq, new_end_seq, 0);
- sack->start_seq = new_start_seq;
- sack->end_seq = new_end_seq;
- sackoff += sizeof(*sack);
- }
-}
-
-/* TCP SACK sequence number adjustment */
-static inline unsigned int
-nf_nat_sack_adjust(struct sk_buff *skb,
- struct tcphdr *tcph,
- struct nf_conn *ct,
- enum ip_conntrack_info ctinfo)
-{
- unsigned int dir, optoff, optend;
- struct nf_conn_nat *nat = nfct_nat(ct);
-
- optoff = ip_hdrlen(skb) + sizeof(struct tcphdr);
- optend = ip_hdrlen(skb) + tcph->doff * 4;
-
- if (!skb_make_writable(skb, optend))
- return 0;
-
- dir = CTINFO2DIR(ctinfo);
-
- while (optoff < optend) {
- /* Usually: option, length. */
- unsigned char *op = skb->data + optoff;
-
- switch (op[0]) {
- case TCPOPT_EOL:
- return 1;
- case TCPOPT_NOP:
- optoff++;
- continue;
- default:
- /* no partial options */
- if (optoff + 1 == optend ||
- optoff + op[1] > optend ||
- op[1] < 2)
- return 0;
- if (op[0] == TCPOPT_SACK &&
- op[1] >= 2+TCPOLEN_SACK_PERBLOCK &&
- ((op[1] - 2) % TCPOLEN_SACK_PERBLOCK) == 0)
- sack_adjust(skb, tcph, optoff+2,
- optoff+op[1], &nat->seq[!dir]);
- optoff += op[1];
- }
- }
- return 1;
-}
-
-/* TCP sequence number adjustment. Returns 1 on success, 0 on failure */
-int
-nf_nat_seq_adjust(struct sk_buff *skb,
- struct nf_conn *ct,
- enum ip_conntrack_info ctinfo)
-{
- struct tcphdr *tcph;
- int dir;
- __be32 newseq, newack;
- struct nf_conn_nat *nat = nfct_nat(ct);
- struct nf_nat_seq *this_way, *other_way;
-
- dir = CTINFO2DIR(ctinfo);
-
- this_way = &nat->seq[dir];
- other_way = &nat->seq[!dir];
-
- if (!skb_make_writable(skb, ip_hdrlen(skb) + sizeof(*tcph)))
- return 0;
-
- tcph = (void *)skb->data + ip_hdrlen(skb);
- if (after(ntohl(tcph->seq), this_way->correction_pos))
- newseq = htonl(ntohl(tcph->seq) + this_way->offset_after);
- else
- newseq = htonl(ntohl(tcph->seq) + this_way->offset_before);
-
- if (after(ntohl(tcph->ack_seq) - other_way->offset_before,
- other_way->correction_pos))
- newack = htonl(ntohl(tcph->ack_seq) - other_way->offset_after);
- else
- newack = htonl(ntohl(tcph->ack_seq) - other_way->offset_before);
-
- inet_proto_csum_replace4(&tcph->check, skb, tcph->seq, newseq, 0);
- inet_proto_csum_replace4(&tcph->check, skb, tcph->ack_seq, newack, 0);
-
- pr_debug("Adjusting sequence number from %u->%u, ack from %u->%u\n",
- ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq),
- ntohl(newack));
-
- tcph->seq = newseq;
- tcph->ack_seq = newack;
-
- if (!nf_nat_sack_adjust(skb, tcph, ct, ctinfo))
- return 0;
-
- nf_conntrack_tcp_update(skb, ip_hdrlen(skb), ct, dir);
-
- return 1;
-}
-
-/* Setup NAT on this expected conntrack so it follows master. */
-/* If we fail to get a free NAT slot, we'll get dropped on confirm */
-void nf_nat_follow_master(struct nf_conn *ct,
- struct nf_conntrack_expect *exp)
-{
- struct nf_nat_range range;
-
- /* This must be a fresh one. */
- BUG_ON(ct->status & IPS_NAT_DONE_MASK);
-
- /* Change src to where master sends to */
- range.flags = IP_NAT_RANGE_MAP_IPS;
- range.min_ip = range.max_ip
- = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip;
- nf_nat_setup_info(ct, &range, IP_NAT_MANIP_SRC);
-
- /* For DST manip, map port here to where it's expected. */
- range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
- range.min = range.max = exp->saved_proto;
- range.min_ip = range.max_ip
- = ct->master->tuplehash[!exp->dir].tuple.src.u3.ip;
- nf_nat_setup_info(ct, &range, IP_NAT_MANIP_DST);
-}
-EXPORT_SYMBOL(nf_nat_follow_master);
diff --git a/net/ipv4/netfilter/nf_nat_irc.c b/net/ipv4/netfilter/nf_nat_irc.c
deleted file mode 100644
index ea83a886b03..00000000000
--- a/net/ipv4/netfilter/nf_nat_irc.c
+++ /dev/null
@@ -1,92 +0,0 @@
-/* IRC extension for TCP NAT alteration.
- *
- * (C) 2000-2001 by Harald Welte <laforge@gnumonks.org>
- * (C) 2004 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
- * based on a copy of RR's ip_nat_ftp.c
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/tcp.h>
-#include <linux/kernel.h>
-
-#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_helper.h>
-#include <net/netfilter/nf_nat_rule.h>
-#include <net/netfilter/nf_conntrack_helper.h>
-#include <net/netfilter/nf_conntrack_expect.h>
-#include <linux/netfilter/nf_conntrack_irc.h>
-
-MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
-MODULE_DESCRIPTION("IRC (DCC) NAT helper");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("ip_nat_irc");
-
-static unsigned int help(struct sk_buff *skb,
- enum ip_conntrack_info ctinfo,
- unsigned int matchoff,
- unsigned int matchlen,
- struct nf_conntrack_expect *exp)
-{
- char buffer[sizeof("4294967296 65635")];
- u_int32_t ip;
- u_int16_t port;
- unsigned int ret;
-
- /* Reply comes from server. */
- exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
- exp->dir = IP_CT_DIR_REPLY;
- exp->expectfn = nf_nat_follow_master;
-
- /* Try to get same port: if not, try to change it. */
- for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
- exp->tuple.dst.u.tcp.port = htons(port);
- if (nf_ct_expect_related(exp) == 0)
- break;
- }
-
- if (port == 0)
- return NF_DROP;
-
- ip = ntohl(exp->master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip);
- sprintf(buffer, "%u %u", ip, port);
- pr_debug("nf_nat_irc: inserting '%s' == %pI4, port %u\n",
- buffer, &ip, port);
-
- ret = nf_nat_mangle_tcp_packet(skb, exp->master, ctinfo,
- matchoff, matchlen, buffer,
- strlen(buffer));
- if (ret != NF_ACCEPT)
- nf_ct_unexpect_related(exp);
- return ret;
-}
-
-static void __exit nf_nat_irc_fini(void)
-{
- rcu_assign_pointer(nf_nat_irc_hook, NULL);
- synchronize_rcu();
-}
-
-static int __init nf_nat_irc_init(void)
-{
- BUG_ON(nf_nat_irc_hook != NULL);
- rcu_assign_pointer(nf_nat_irc_hook, help);
- return 0;
-}
-
-/* Prior to 2.6.11, we had a ports param. No longer, but don't break users. */
-static int warn_set(const char *val, struct kernel_param *kp)
-{
- printk(KERN_INFO KBUILD_MODNAME
- ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n");
- return 0;
-}
-module_param_call(ports, warn_set, NULL, NULL, 0);
-
-module_init(nf_nat_irc_init);
-module_exit(nf_nat_irc_fini);
diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
new file mode 100644
index 00000000000..d8b2e14efdd
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
@@ -0,0 +1,281 @@
+/*
+ * (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2011 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/icmp.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <net/secure_seq.h>
+#include <net/checksum.h>
+#include <net/route.h>
+#include <net/ip.h>
+
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_nat_l3proto.h>
+#include <net/netfilter/nf_nat_l4proto.h>
+
+static const struct nf_nat_l3proto nf_nat_l3proto_ipv4;
+
+#ifdef CONFIG_XFRM
+static void nf_nat_ipv4_decode_session(struct sk_buff *skb,
+ const struct nf_conn *ct,
+ enum ip_conntrack_dir dir,
+ unsigned long statusbit,
+ struct flowi *fl)
+{
+ const struct nf_conntrack_tuple *t = &ct->tuplehash[dir].tuple;
+ struct flowi4 *fl4 = &fl->u.ip4;
+
+ if (ct->status & statusbit) {
+ fl4->daddr = t->dst.u3.ip;
+ if (t->dst.protonum == IPPROTO_TCP ||
+ t->dst.protonum == IPPROTO_UDP ||
+ t->dst.protonum == IPPROTO_UDPLITE ||
+ t->dst.protonum == IPPROTO_DCCP ||
+ t->dst.protonum == IPPROTO_SCTP)
+ fl4->fl4_dport = t->dst.u.all;
+ }
+
+ statusbit ^= IPS_NAT_MASK;
+
+ if (ct->status & statusbit) {
+ fl4->saddr = t->src.u3.ip;
+ if (t->dst.protonum == IPPROTO_TCP ||
+ t->dst.protonum == IPPROTO_UDP ||
+ t->dst.protonum == IPPROTO_UDPLITE ||
+ t->dst.protonum == IPPROTO_DCCP ||
+ t->dst.protonum == IPPROTO_SCTP)
+ fl4->fl4_sport = t->src.u.all;
+ }
+}
+#endif /* CONFIG_XFRM */
+
+static bool nf_nat_ipv4_in_range(const struct nf_conntrack_tuple *t,
+ const struct nf_nat_range *range)
+{
+ return ntohl(t->src.u3.ip) >= ntohl(range->min_addr.ip) &&
+ ntohl(t->src.u3.ip) <= ntohl(range->max_addr.ip);
+}
+
+static u32 nf_nat_ipv4_secure_port(const struct nf_conntrack_tuple *t,
+ __be16 dport)
+{
+ return secure_ipv4_port_ephemeral(t->src.u3.ip, t->dst.u3.ip, dport);
+}
+
+static bool nf_nat_ipv4_manip_pkt(struct sk_buff *skb,
+ unsigned int iphdroff,
+ const struct nf_nat_l4proto *l4proto,
+ const struct nf_conntrack_tuple *target,
+ enum nf_nat_manip_type maniptype)
+{
+ struct iphdr *iph;
+ unsigned int hdroff;
+
+ if (!skb_make_writable(skb, iphdroff + sizeof(*iph)))
+ return false;
+
+ iph = (void *)skb->data + iphdroff;
+ hdroff = iphdroff + iph->ihl * 4;
+
+ if (!l4proto->manip_pkt(skb, &nf_nat_l3proto_ipv4, iphdroff, hdroff,
+ target, maniptype))
+ return false;
+ iph = (void *)skb->data + iphdroff;
+
+ if (maniptype == NF_NAT_MANIP_SRC) {
+ csum_replace4(&iph->check, iph->saddr, target->src.u3.ip);
+ iph->saddr = target->src.u3.ip;
+ } else {
+ csum_replace4(&iph->check, iph->daddr, target->dst.u3.ip);
+ iph->daddr = target->dst.u3.ip;
+ }
+ return true;
+}
+
+static void nf_nat_ipv4_csum_update(struct sk_buff *skb,
+ unsigned int iphdroff, __sum16 *check,
+ const struct nf_conntrack_tuple *t,
+ enum nf_nat_manip_type maniptype)
+{
+ struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
+ __be32 oldip, newip;
+
+ if (maniptype == NF_NAT_MANIP_SRC) {
+ oldip = iph->saddr;
+ newip = t->src.u3.ip;
+ } else {
+ oldip = iph->daddr;
+ newip = t->dst.u3.ip;
+ }
+ inet_proto_csum_replace4(check, skb, oldip, newip, 1);
+}
+
+static void nf_nat_ipv4_csum_recalc(struct sk_buff *skb,
+ u8 proto, void *data, __sum16 *check,
+ int datalen, int oldlen)
+{
+ const struct iphdr *iph = ip_hdr(skb);
+ struct rtable *rt = skb_rtable(skb);
+
+ if (skb->ip_summed != CHECKSUM_PARTIAL) {
+ if (!(rt->rt_flags & RTCF_LOCAL) &&
+ (!skb->dev || skb->dev->features & NETIF_F_V4_CSUM)) {
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ skb->csum_start = skb_headroom(skb) +
+ skb_network_offset(skb) +
+ ip_hdrlen(skb);
+ skb->csum_offset = (void *)check - data;
+ *check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
+ datalen, proto, 0);
+ } else {
+ *check = 0;
+ *check = csum_tcpudp_magic(iph->saddr, iph->daddr,
+ datalen, proto,
+ csum_partial(data, datalen,
+ 0));
+ if (proto == IPPROTO_UDP && !*check)
+ *check = CSUM_MANGLED_0;
+ }
+ } else
+ inet_proto_csum_replace2(check, skb,
+ htons(oldlen), htons(datalen), 1);
+}
+
+static int nf_nat_ipv4_nlattr_to_range(struct nlattr *tb[],
+ struct nf_nat_range *range)
+{
+ if (tb[CTA_NAT_V4_MINIP]) {
+ range->min_addr.ip = nla_get_be32(tb[CTA_NAT_V4_MINIP]);
+ range->flags |= NF_NAT_RANGE_MAP_IPS;
+ }
+
+ if (tb[CTA_NAT_V4_MAXIP])
+ range->max_addr.ip = nla_get_be32(tb[CTA_NAT_V4_MAXIP]);
+ else
+ range->max_addr.ip = range->min_addr.ip;
+
+ return 0;
+}
+
+static const struct nf_nat_l3proto nf_nat_l3proto_ipv4 = {
+ .l3proto = NFPROTO_IPV4,
+ .in_range = nf_nat_ipv4_in_range,
+ .secure_port = nf_nat_ipv4_secure_port,
+ .manip_pkt = nf_nat_ipv4_manip_pkt,
+ .csum_update = nf_nat_ipv4_csum_update,
+ .csum_recalc = nf_nat_ipv4_csum_recalc,
+ .nlattr_to_range = nf_nat_ipv4_nlattr_to_range,
+#ifdef CONFIG_XFRM
+ .decode_session = nf_nat_ipv4_decode_session,
+#endif
+};
+
+int nf_nat_icmp_reply_translation(struct sk_buff *skb,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned int hooknum)
+{
+ struct {
+ struct icmphdr icmp;
+ struct iphdr ip;
+ } *inside;
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+ enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
+ unsigned int hdrlen = ip_hdrlen(skb);
+ const struct nf_nat_l4proto *l4proto;
+ struct nf_conntrack_tuple target;
+ unsigned long statusbit;
+
+ NF_CT_ASSERT(ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY);
+
+ if (!skb_make_writable(skb, hdrlen + sizeof(*inside)))
+ return 0;
+ if (nf_ip_checksum(skb, hooknum, hdrlen, 0))
+ return 0;
+
+ inside = (void *)skb->data + hdrlen;
+ if (inside->icmp.type == ICMP_REDIRECT) {
+ if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
+ return 0;
+ if (ct->status & IPS_NAT_MASK)
+ return 0;
+ }
+
+ if (manip == NF_NAT_MANIP_SRC)
+ statusbit = IPS_SRC_NAT;
+ else
+ statusbit = IPS_DST_NAT;
+
+ /* Invert if this is reply direction */
+ if (dir == IP_CT_DIR_REPLY)
+ statusbit ^= IPS_NAT_MASK;
+
+ if (!(ct->status & statusbit))
+ return 1;
+
+ l4proto = __nf_nat_l4proto_find(NFPROTO_IPV4, inside->ip.protocol);
+ if (!nf_nat_ipv4_manip_pkt(skb, hdrlen + sizeof(inside->icmp),
+ l4proto, &ct->tuplehash[!dir].tuple, !manip))
+ return 0;
+
+ if (skb->ip_summed != CHECKSUM_PARTIAL) {
+ /* Reloading "inside" here since manip_pkt may reallocate */
+ inside = (void *)skb->data + hdrlen;
+ inside->icmp.checksum = 0;
+ inside->icmp.checksum =
+ csum_fold(skb_checksum(skb, hdrlen,
+ skb->len - hdrlen, 0));
+ }
+
+ /* Change outer to look like the reply to an incoming packet */
+ nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
+ l4proto = __nf_nat_l4proto_find(NFPROTO_IPV4, 0);
+ if (!nf_nat_ipv4_manip_pkt(skb, 0, l4proto, &target, manip))
+ return 0;
+
+ return 1;
+}
+EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation);
+
+static int __init nf_nat_l3proto_ipv4_init(void)
+{
+ int err;
+
+ err = nf_nat_l4proto_register(NFPROTO_IPV4, &nf_nat_l4proto_icmp);
+ if (err < 0)
+ goto err1;
+ err = nf_nat_l3proto_register(&nf_nat_l3proto_ipv4);
+ if (err < 0)
+ goto err2;
+ return err;
+
+err2:
+ nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_icmp);
+err1:
+ return err;
+}
+
+static void __exit nf_nat_l3proto_ipv4_exit(void)
+{
+ nf_nat_l3proto_unregister(&nf_nat_l3proto_ipv4);
+ nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_icmp);
+}
+
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("nf-nat-" __stringify(AF_INET));
+
+module_init(nf_nat_l3proto_ipv4_init);
+module_exit(nf_nat_l3proto_ipv4_exit);
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
index 9eb171056c6..657d2307f03 100644
--- a/net/ipv4/netfilter/nf_nat_pptp.c
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -13,6 +13,8 @@
*
* Development of this code funded by Astaro AG (http://www.astaro.com/)
*
+ * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
+ *
* TODO: - NAT to a unique tuple, not to TCP source port
* (needs netfilter tuple reservation)
*/
@@ -22,9 +24,9 @@
#include <net/netfilter/nf_nat.h>
#include <net/netfilter/nf_nat_helper.h>
-#include <net/netfilter/nf_nat_rule.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_conntrack_expect.h>
+#include <net/netfilter/nf_conntrack_zones.h>
#include <linux/netfilter/nf_conntrack_proto_gre.h>
#include <linux/netfilter/nf_conntrack_pptp.h>
@@ -48,7 +50,7 @@ static void pptp_nat_expected(struct nf_conn *ct,
const struct nf_nat_pptp *nat_pptp_info;
struct nf_nat_range range;
- ct_pptp_info = &nfct_help(master)->help.ct_pptp_info;
+ ct_pptp_info = nfct_help_data(master);
nat_pptp_info = &nfct_nat(master)->help.nat_pptp_info;
/* And here goes the grand finale of corrosion... */
@@ -74,7 +76,7 @@ static void pptp_nat_expected(struct nf_conn *ct,
pr_debug("trying to unexpect other dir: ");
nf_ct_dump_tuple_ip(&t);
- other_exp = nf_ct_expect_find_get(net, &t);
+ other_exp = nf_ct_expect_find_get(net, nf_ct_zone(ct), &t);
if (other_exp) {
nf_ct_unexpect_related(other_exp);
nf_ct_expect_put(other_exp);
@@ -87,24 +89,24 @@ static void pptp_nat_expected(struct nf_conn *ct,
BUG_ON(ct->status & IPS_NAT_DONE_MASK);
/* Change src to where master sends to */
- range.flags = IP_NAT_RANGE_MAP_IPS;
- range.min_ip = range.max_ip
- = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip;
+ range.flags = NF_NAT_RANGE_MAP_IPS;
+ range.min_addr = range.max_addr
+ = ct->master->tuplehash[!exp->dir].tuple.dst.u3;
if (exp->dir == IP_CT_DIR_ORIGINAL) {
- range.flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
- range.min = range.max = exp->saved_proto;
+ range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
+ range.min_proto = range.max_proto = exp->saved_proto;
}
- nf_nat_setup_info(ct, &range, IP_NAT_MANIP_SRC);
+ nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC);
/* For DST manip, map port here to where it's expected. */
- range.flags = IP_NAT_RANGE_MAP_IPS;
- range.min_ip = range.max_ip
- = ct->master->tuplehash[!exp->dir].tuple.src.u3.ip;
+ range.flags = NF_NAT_RANGE_MAP_IPS;
+ range.min_addr = range.max_addr
+ = ct->master->tuplehash[!exp->dir].tuple.src.u3;
if (exp->dir == IP_CT_DIR_REPLY) {
- range.flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
- range.min = range.max = exp->saved_proto;
+ range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
+ range.min_proto = range.max_proto = exp->saved_proto;
}
- nf_nat_setup_info(ct, &range, IP_NAT_MANIP_DST);
+ nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST);
}
/* outbound packets == from PNS to PAC */
@@ -112,6 +114,7 @@ static int
pptp_outbound_pkt(struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
+ unsigned int protoff,
struct PptpControlHeader *ctlh,
union pptp_ctrl_union *pptpReq)
@@ -122,7 +125,7 @@ pptp_outbound_pkt(struct sk_buff *skb,
__be16 new_callid;
unsigned int cid_off;
- ct_pptp_info = &nfct_help(ct)->help.ct_pptp_info;
+ ct_pptp_info = nfct_help_data(ct);
nat_pptp_info = &nfct_nat(ct)->help.nat_pptp_info;
new_callid = ct_pptp_info->pns_call_id;
@@ -174,7 +177,7 @@ pptp_outbound_pkt(struct sk_buff *skb,
ntohs(REQ_CID(pptpReq, cid_off)), ntohs(new_callid));
/* mangle packet */
- if (nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
+ if (nf_nat_mangle_tcp_packet(skb, ct, ctinfo, protoff,
cid_off + sizeof(struct pptp_pkt_hdr) +
sizeof(struct PptpControlHeader),
sizeof(new_callid), (char *)&new_callid,
@@ -191,7 +194,7 @@ pptp_exp_gre(struct nf_conntrack_expect *expect_orig,
struct nf_ct_pptp_master *ct_pptp_info;
struct nf_nat_pptp *nat_pptp_info;
- ct_pptp_info = &nfct_help(ct)->help.ct_pptp_info;
+ ct_pptp_info = nfct_help_data(ct);
nat_pptp_info = &nfct_nat(ct)->help.nat_pptp_info;
/* save original PAC call ID in nat_info */
@@ -215,6 +218,7 @@ static int
pptp_inbound_pkt(struct sk_buff *skb,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
+ unsigned int protoff,
struct PptpControlHeader *ctlh,
union pptp_ctrl_union *pptpReq)
{
@@ -267,7 +271,7 @@ pptp_inbound_pkt(struct sk_buff *skb,
pr_debug("altering peer call id from 0x%04x to 0x%04x\n",
ntohs(REQ_CID(pptpReq, pcid_off)), ntohs(new_pcid));
- if (nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
+ if (nf_nat_mangle_tcp_packet(skb, ct, ctinfo, protoff,
pcid_off + sizeof(struct pptp_pkt_hdr) +
sizeof(struct PptpControlHeader),
sizeof(new_pcid), (char *)&new_pcid,
@@ -281,25 +285,25 @@ static int __init nf_nat_helper_pptp_init(void)
nf_nat_need_gre();
BUG_ON(nf_nat_pptp_hook_outbound != NULL);
- rcu_assign_pointer(nf_nat_pptp_hook_outbound, pptp_outbound_pkt);
+ RCU_INIT_POINTER(nf_nat_pptp_hook_outbound, pptp_outbound_pkt);
BUG_ON(nf_nat_pptp_hook_inbound != NULL);
- rcu_assign_pointer(nf_nat_pptp_hook_inbound, pptp_inbound_pkt);
+ RCU_INIT_POINTER(nf_nat_pptp_hook_inbound, pptp_inbound_pkt);
BUG_ON(nf_nat_pptp_hook_exp_gre != NULL);
- rcu_assign_pointer(nf_nat_pptp_hook_exp_gre, pptp_exp_gre);
+ RCU_INIT_POINTER(nf_nat_pptp_hook_exp_gre, pptp_exp_gre);
BUG_ON(nf_nat_pptp_hook_expectfn != NULL);
- rcu_assign_pointer(nf_nat_pptp_hook_expectfn, pptp_nat_expected);
+ RCU_INIT_POINTER(nf_nat_pptp_hook_expectfn, pptp_nat_expected);
return 0;
}
static void __exit nf_nat_helper_pptp_fini(void)
{
- rcu_assign_pointer(nf_nat_pptp_hook_expectfn, NULL);
- rcu_assign_pointer(nf_nat_pptp_hook_exp_gre, NULL);
- rcu_assign_pointer(nf_nat_pptp_hook_inbound, NULL);
- rcu_assign_pointer(nf_nat_pptp_hook_outbound, NULL);
+ RCU_INIT_POINTER(nf_nat_pptp_hook_expectfn, NULL);
+ RCU_INIT_POINTER(nf_nat_pptp_hook_exp_gre, NULL);
+ RCU_INIT_POINTER(nf_nat_pptp_hook_inbound, NULL);
+ RCU_INIT_POINTER(nf_nat_pptp_hook_outbound, NULL);
synchronize_rcu();
}
diff --git a/net/ipv4/netfilter/nf_nat_proto_common.c b/net/ipv4/netfilter/nf_nat_proto_common.c
deleted file mode 100644
index 6c4f11f5144..00000000000
--- a/net/ipv4/netfilter/nf_nat_proto_common.c
+++ /dev/null
@@ -1,124 +0,0 @@
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
- * (C) 2008 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/random.h>
-#include <linux/ip.h>
-
-#include <linux/netfilter.h>
-#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_core.h>
-#include <net/netfilter/nf_nat_rule.h>
-#include <net/netfilter/nf_nat_protocol.h>
-
-bool nf_nat_proto_in_range(const struct nf_conntrack_tuple *tuple,
- enum nf_nat_manip_type maniptype,
- const union nf_conntrack_man_proto *min,
- const union nf_conntrack_man_proto *max)
-{
- __be16 port;
-
- if (maniptype == IP_NAT_MANIP_SRC)
- port = tuple->src.u.all;
- else
- port = tuple->dst.u.all;
-
- return ntohs(port) >= ntohs(min->all) &&
- ntohs(port) <= ntohs(max->all);
-}
-EXPORT_SYMBOL_GPL(nf_nat_proto_in_range);
-
-bool nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple,
- const struct nf_nat_range *range,
- enum nf_nat_manip_type maniptype,
- const struct nf_conn *ct,
- u_int16_t *rover)
-{
- unsigned int range_size, min, i;
- __be16 *portptr;
- u_int16_t off;
-
- if (maniptype == IP_NAT_MANIP_SRC)
- portptr = &tuple->src.u.all;
- else
- portptr = &tuple->dst.u.all;
-
- /* If no range specified... */
- if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) {
- /* If it's dst rewrite, can't change port */
- if (maniptype == IP_NAT_MANIP_DST)
- return false;
-
- if (ntohs(*portptr) < 1024) {
- /* Loose convention: >> 512 is credential passing */
- if (ntohs(*portptr) < 512) {
- min = 1;
- range_size = 511 - min + 1;
- } else {
- min = 600;
- range_size = 1023 - min + 1;
- }
- } else {
- min = 1024;
- range_size = 65535 - 1024 + 1;
- }
- } else {
- min = ntohs(range->min.all);
- range_size = ntohs(range->max.all) - min + 1;
- }
-
- if (range->flags & IP_NAT_RANGE_PROTO_RANDOM)
- off = secure_ipv4_port_ephemeral(tuple->src.u3.ip, tuple->dst.u3.ip,
- maniptype == IP_NAT_MANIP_SRC
- ? tuple->dst.u.all
- : tuple->src.u.all);
- else
- off = *rover;
-
- for (i = 0; i < range_size; i++, off++) {
- *portptr = htons(min + off % range_size);
- if (nf_nat_used_tuple(tuple, ct))
- continue;
- if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM))
- *rover = off;
- return true;
- }
- return false;
-}
-EXPORT_SYMBOL_GPL(nf_nat_proto_unique_tuple);
-
-#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
-int nf_nat_proto_range_to_nlattr(struct sk_buff *skb,
- const struct nf_nat_range *range)
-{
- NLA_PUT_BE16(skb, CTA_PROTONAT_PORT_MIN, range->min.all);
- NLA_PUT_BE16(skb, CTA_PROTONAT_PORT_MAX, range->max.all);
- return 0;
-
-nla_put_failure:
- return -1;
-}
-EXPORT_SYMBOL_GPL(nf_nat_proto_nlattr_to_range);
-
-int nf_nat_proto_nlattr_to_range(struct nlattr *tb[],
- struct nf_nat_range *range)
-{
- if (tb[CTA_PROTONAT_PORT_MIN]) {
- range->min.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MIN]);
- range->max.all = range->min.tcp.port;
- range->flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
- }
- if (tb[CTA_PROTONAT_PORT_MAX]) {
- range->max.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MAX]);
- range->flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
- }
- return 0;
-}
-EXPORT_SYMBOL_GPL(nf_nat_proto_range_to_nlattr);
-#endif
diff --git a/net/ipv4/netfilter/nf_nat_proto_dccp.c b/net/ipv4/netfilter/nf_nat_proto_dccp.c
deleted file mode 100644
index 22485ce306d..00000000000
--- a/net/ipv4/netfilter/nf_nat_proto_dccp.c
+++ /dev/null
@@ -1,108 +0,0 @@
-/*
- * DCCP NAT protocol helper
- *
- * Copyright (c) 2005, 2006. 2008 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/skbuff.h>
-#include <linux/ip.h>
-#include <linux/dccp.h>
-
-#include <net/netfilter/nf_conntrack.h>
-#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_protocol.h>
-
-static u_int16_t dccp_port_rover;
-
-static bool
-dccp_unique_tuple(struct nf_conntrack_tuple *tuple,
- const struct nf_nat_range *range,
- enum nf_nat_manip_type maniptype,
- const struct nf_conn *ct)
-{
- return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct,
- &dccp_port_rover);
-}
-
-static bool
-dccp_manip_pkt(struct sk_buff *skb,
- unsigned int iphdroff,
- const struct nf_conntrack_tuple *tuple,
- enum nf_nat_manip_type maniptype)
-{
- const struct iphdr *iph = (const void *)(skb->data + iphdroff);
- struct dccp_hdr *hdr;
- unsigned int hdroff = iphdroff + iph->ihl * 4;
- __be32 oldip, newip;
- __be16 *portptr, oldport, newport;
- int hdrsize = 8; /* DCCP connection tracking guarantees this much */
-
- if (skb->len >= hdroff + sizeof(struct dccp_hdr))
- hdrsize = sizeof(struct dccp_hdr);
-
- if (!skb_make_writable(skb, hdroff + hdrsize))
- return false;
-
- iph = (struct iphdr *)(skb->data + iphdroff);
- hdr = (struct dccp_hdr *)(skb->data + hdroff);
-
- if (maniptype == IP_NAT_MANIP_SRC) {
- oldip = iph->saddr;
- newip = tuple->src.u3.ip;
- newport = tuple->src.u.dccp.port;
- portptr = &hdr->dccph_sport;
- } else {
- oldip = iph->daddr;
- newip = tuple->dst.u3.ip;
- newport = tuple->dst.u.dccp.port;
- portptr = &hdr->dccph_dport;
- }
-
- oldport = *portptr;
- *portptr = newport;
-
- if (hdrsize < sizeof(*hdr))
- return true;
-
- inet_proto_csum_replace4(&hdr->dccph_checksum, skb, oldip, newip, 1);
- inet_proto_csum_replace2(&hdr->dccph_checksum, skb, oldport, newport,
- 0);
- return true;
-}
-
-static const struct nf_nat_protocol nf_nat_protocol_dccp = {
- .protonum = IPPROTO_DCCP,
- .me = THIS_MODULE,
- .manip_pkt = dccp_manip_pkt,
- .in_range = nf_nat_proto_in_range,
- .unique_tuple = dccp_unique_tuple,
-#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
- .range_to_nlattr = nf_nat_proto_range_to_nlattr,
- .nlattr_to_range = nf_nat_proto_nlattr_to_range,
-#endif
-};
-
-static int __init nf_nat_proto_dccp_init(void)
-{
- return nf_nat_protocol_register(&nf_nat_protocol_dccp);
-}
-
-static void __exit nf_nat_proto_dccp_fini(void)
-{
- nf_nat_protocol_unregister(&nf_nat_protocol_dccp);
-}
-
-module_init(nf_nat_proto_dccp_init);
-module_exit(nf_nat_proto_dccp_fini);
-
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_DESCRIPTION("DCCP NAT protocol helper");
-MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c
index d7e89201351..690d890111b 100644
--- a/net/ipv4/netfilter/nf_nat_proto_gre.c
+++ b/net/ipv4/netfilter/nf_nat_proto_gre.c
@@ -21,6 +21,8 @@
*
* Development of this code funded by Astaro AG (http://www.astaro.com/)
*
+ * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
+ *
*/
#include <linux/module.h>
@@ -28,8 +30,7 @@
#include <linux/ip.h>
#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_rule.h>
-#include <net/netfilter/nf_nat_protocol.h>
+#include <net/netfilter/nf_nat_l4proto.h>
#include <linux/netfilter/nf_conntrack_proto_gre.h>
MODULE_LICENSE("GPL");
@@ -37,8 +38,9 @@ MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE");
/* generate unique tuple ... */
-static bool
-gre_unique_tuple(struct nf_conntrack_tuple *tuple,
+static void
+gre_unique_tuple(const struct nf_nat_l3proto *l3proto,
+ struct nf_conntrack_tuple *tuple,
const struct nf_nat_range *range,
enum nf_nat_manip_type maniptype,
const struct nf_conn *ct)
@@ -50,44 +52,44 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple,
/* If there is no master conntrack we are not PPTP,
do not change tuples */
if (!ct->master)
- return false;
+ return;
- if (maniptype == IP_NAT_MANIP_SRC)
+ if (maniptype == NF_NAT_MANIP_SRC)
keyptr = &tuple->src.u.gre.key;
else
keyptr = &tuple->dst.u.gre.key;
- if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) {
+ if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) {
pr_debug("%p: NATing GRE PPTP\n", ct);
min = 1;
range_size = 0xffff;
} else {
- min = ntohs(range->min.gre.key);
- range_size = ntohs(range->max.gre.key) - min + 1;
+ min = ntohs(range->min_proto.gre.key);
+ range_size = ntohs(range->max_proto.gre.key) - min + 1;
}
pr_debug("min = %u, range_size = %u\n", min, range_size);
- for (i = 0; i < range_size; i++, key++) {
+ for (i = 0; ; ++key) {
*keyptr = htons(min + key % range_size);
- if (!nf_nat_used_tuple(tuple, ct))
- return true;
+ if (++i == range_size || !nf_nat_used_tuple(tuple, ct))
+ return;
}
pr_debug("%p: no NAT mapping\n", ct);
- return false;
+ return;
}
/* manipulate a GRE packet according to maniptype */
static bool
-gre_manip_pkt(struct sk_buff *skb, unsigned int iphdroff,
+gre_manip_pkt(struct sk_buff *skb,
+ const struct nf_nat_l3proto *l3proto,
+ unsigned int iphdroff, unsigned int hdroff,
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype)
{
const struct gre_hdr *greh;
struct gre_hdr_pptp *pgreh;
- const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
- unsigned int hdroff = iphdroff + iph->ihl * 4;
/* pgreh includes two optional 32bit fields which are not required
* to be there. That's where the magic '8' comes from */
@@ -99,7 +101,7 @@ gre_manip_pkt(struct sk_buff *skb, unsigned int iphdroff,
/* we only have destination manip of a packet, since 'source key'
* is not present in the packet itself */
- if (maniptype != IP_NAT_MANIP_DST)
+ if (maniptype != NF_NAT_MANIP_DST)
return true;
switch (greh->version) {
case GRE_VERSION_1701:
@@ -117,26 +119,24 @@ gre_manip_pkt(struct sk_buff *skb, unsigned int iphdroff,
return true;
}
-static const struct nf_nat_protocol gre = {
- .protonum = IPPROTO_GRE,
- .me = THIS_MODULE,
+static const struct nf_nat_l4proto gre = {
+ .l4proto = IPPROTO_GRE,
.manip_pkt = gre_manip_pkt,
- .in_range = nf_nat_proto_in_range,
+ .in_range = nf_nat_l4proto_in_range,
.unique_tuple = gre_unique_tuple,
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
- .range_to_nlattr = nf_nat_proto_range_to_nlattr,
- .nlattr_to_range = nf_nat_proto_nlattr_to_range,
+ .nlattr_to_range = nf_nat_l4proto_nlattr_to_range,
#endif
};
static int __init nf_nat_proto_gre_init(void)
{
- return nf_nat_protocol_register(&gre);
+ return nf_nat_l4proto_register(NFPROTO_IPV4, &gre);
}
static void __exit nf_nat_proto_gre_fini(void)
{
- nf_nat_protocol_unregister(&gre);
+ nf_nat_l4proto_unregister(NFPROTO_IPV4, &gre);
}
module_init(nf_nat_proto_gre_init);
diff --git a/net/ipv4/netfilter/nf_nat_proto_icmp.c b/net/ipv4/netfilter/nf_nat_proto_icmp.c
index 19a8b0b07d8..eb303471bcf 100644
--- a/net/ipv4/netfilter/nf_nat_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_nat_proto_icmp.c
@@ -8,14 +8,14 @@
#include <linux/types.h>
#include <linux/init.h>
+#include <linux/export.h>
#include <linux/ip.h>
#include <linux/icmp.h>
#include <linux/netfilter.h>
#include <net/netfilter/nf_nat.h>
#include <net/netfilter/nf_nat_core.h>
-#include <net/netfilter/nf_nat_rule.h>
-#include <net/netfilter/nf_nat_protocol.h>
+#include <net/netfilter/nf_nat_l4proto.h>
static bool
icmp_in_range(const struct nf_conntrack_tuple *tuple,
@@ -27,8 +27,9 @@ icmp_in_range(const struct nf_conntrack_tuple *tuple,
ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id);
}
-static bool
-icmp_unique_tuple(struct nf_conntrack_tuple *tuple,
+static void
+icmp_unique_tuple(const struct nf_nat_l3proto *l3proto,
+ struct nf_conntrack_tuple *tuple,
const struct nf_nat_range *range,
enum nf_nat_manip_type maniptype,
const struct nf_conn *ct)
@@ -37,29 +38,29 @@ icmp_unique_tuple(struct nf_conntrack_tuple *tuple,
unsigned int range_size;
unsigned int i;
- range_size = ntohs(range->max.icmp.id) - ntohs(range->min.icmp.id) + 1;
+ range_size = ntohs(range->max_proto.icmp.id) -
+ ntohs(range->min_proto.icmp.id) + 1;
/* If no range specified... */
- if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED))
+ if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED))
range_size = 0xFFFF;
- for (i = 0; i < range_size; i++, id++) {
- tuple->src.u.icmp.id = htons(ntohs(range->min.icmp.id) +
+ for (i = 0; ; ++id) {
+ tuple->src.u.icmp.id = htons(ntohs(range->min_proto.icmp.id) +
(id % range_size));
- if (!nf_nat_used_tuple(tuple, ct))
- return true;
+ if (++i == range_size || !nf_nat_used_tuple(tuple, ct))
+ return;
}
- return false;
+ return;
}
static bool
icmp_manip_pkt(struct sk_buff *skb,
- unsigned int iphdroff,
+ const struct nf_nat_l3proto *l3proto,
+ unsigned int iphdroff, unsigned int hdroff,
const struct nf_conntrack_tuple *tuple,
enum nf_nat_manip_type maniptype)
{
- const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
struct icmphdr *hdr;
- unsigned int hdroff = iphdroff + iph->ihl*4;
if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
return false;
@@ -71,14 +72,12 @@ icmp_manip_pkt(struct sk_buff *skb,
return true;
}
-const struct nf_nat_protocol nf_nat_protocol_icmp = {
- .protonum = IPPROTO_ICMP,
- .me = THIS_MODULE,
+const struct nf_nat_l4proto nf_nat_l4proto_icmp = {
+ .l4proto = IPPROTO_ICMP,
.manip_pkt = icmp_manip_pkt,
.in_range = icmp_in_range,
.unique_tuple = icmp_unique_tuple,
#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
- .range_to_nlattr = nf_nat_proto_range_to_nlattr,
- .nlattr_to_range = nf_nat_proto_nlattr_to_range,
+ .nlattr_to_range = nf_nat_l4proto_nlattr_to_range,
#endif
};
diff --git a/net/ipv4/netfilter/nf_nat_proto_sctp.c b/net/ipv4/netfilter/nf_nat_proto_sctp.c
deleted file mode 100644
index 65e470bc612..00000000000
--- a/net/ipv4/netfilter/nf_nat_proto_sctp.c
+++ /dev/null
@@ -1,96 +0,0 @@
-/*
- * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/ip.h>
-#include <linux/sctp.h>
-#include <net/sctp/checksum.h>
-
-#include <net/netfilter/nf_nat_protocol.h>
-
-static u_int16_t nf_sctp_port_rover;
-
-static bool
-sctp_unique_tuple(struct nf_conntrack_tuple *tuple,
- const struct nf_nat_range *range,
- enum nf_nat_manip_type maniptype,
- const struct nf_conn *ct)
-{
- return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct,
- &nf_sctp_port_rover);
-}
-
-static bool
-sctp_manip_pkt(struct sk_buff *skb,
- unsigned int iphdroff,
- const struct nf_conntrack_tuple *tuple,
- enum nf_nat_manip_type maniptype)
-{
- const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
- sctp_sctphdr_t *hdr;
- unsigned int hdroff = iphdroff + iph->ihl*4;
- __be32 oldip, newip;
- __be32 crc32;
-
- if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
- return false;
-
- iph = (struct iphdr *)(skb->data + iphdroff);
- hdr = (struct sctphdr *)(skb->data + hdroff);
-
- if (maniptype == IP_NAT_MANIP_SRC) {
- /* Get rid of src ip and src pt */
- oldip = iph->saddr;
- newip = tuple->src.u3.ip;
- hdr->source = tuple->src.u.sctp.port;
- } else {
- /* Get rid of dst ip and dst pt */
- oldip = iph->daddr;
- newip = tuple->dst.u3.ip;
- hdr->dest = tuple->dst.u.sctp.port;
- }
-
- crc32 = sctp_start_cksum((u8 *)hdr, skb_headlen(skb) - hdroff);
- for (skb = skb_shinfo(skb)->frag_list; skb; skb = skb->next)
- crc32 = sctp_update_cksum((u8 *)skb->data, skb_headlen(skb),
- crc32);
- crc32 = sctp_end_cksum(crc32);
- hdr->checksum = crc32;
-
- return true;
-}
-
-static const struct nf_nat_protocol nf_nat_protocol_sctp = {
- .protonum = IPPROTO_SCTP,
- .me = THIS_MODULE,
- .manip_pkt = sctp_manip_pkt,
- .in_range = nf_nat_proto_in_range,
- .unique_tuple = sctp_unique_tuple,
-#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
- .range_to_nlattr = nf_nat_proto_range_to_nlattr,
- .nlattr_to_range = nf_nat_proto_nlattr_to_range,
-#endif
-};
-
-static int __init nf_nat_proto_sctp_init(void)
-{
- return nf_nat_protocol_register(&nf_nat_protocol_sctp);
-}
-
-static void __exit nf_nat_proto_sctp_exit(void)
-{
- nf_nat_protocol_unregister(&nf_nat_protocol_sctp);
-}
-
-module_init(nf_nat_proto_sctp_init);
-module_exit(nf_nat_proto_sctp_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("SCTP NAT protocol helper");
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
diff --git a/net/ipv4/netfilter/nf_nat_proto_tcp.c b/net/ipv4/netfilter/nf_nat_proto_tcp.c
deleted file mode 100644
index 399e2cfa263..00000000000
--- a/net/ipv4/netfilter/nf_nat_proto_tcp.c
+++ /dev/null
@@ -1,93 +0,0 @@
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-
-#include <linux/netfilter.h>
-#include <linux/netfilter/nfnetlink_conntrack.h>
-#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_rule.h>
-#include <net/netfilter/nf_nat_protocol.h>
-#include <net/netfilter/nf_nat_core.h>
-
-static u_int16_t tcp_port_rover;
-
-static bool
-tcp_unique_tuple(struct nf_conntrack_tuple *tuple,
- const struct nf_nat_range *range,
- enum nf_nat_manip_type maniptype,
- const struct nf_conn *ct)
-{
- return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct,
- &tcp_port_rover);
-}
-
-static bool
-tcp_manip_pkt(struct sk_buff *skb,
- unsigned int iphdroff,
- const struct nf_conntrack_tuple *tuple,
- enum nf_nat_manip_type maniptype)
-{
- const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
- struct tcphdr *hdr;
- unsigned int hdroff = iphdroff + iph->ihl*4;
- __be32 oldip, newip;
- __be16 *portptr, newport, oldport;
- int hdrsize = 8; /* TCP connection tracking guarantees this much */
-
- /* this could be a inner header returned in icmp packet; in such
- cases we cannot update the checksum field since it is outside of
- the 8 bytes of transport layer headers we are guaranteed */
- if (skb->len >= hdroff + sizeof(struct tcphdr))
- hdrsize = sizeof(struct tcphdr);
-
- if (!skb_make_writable(skb, hdroff + hdrsize))
- return false;
-
- iph = (struct iphdr *)(skb->data + iphdroff);
- hdr = (struct tcphdr *)(skb->data + hdroff);
-
- if (maniptype == IP_NAT_MANIP_SRC) {
- /* Get rid of src ip and src pt */
- oldip = iph->saddr;
- newip = tuple->src.u3.ip;
- newport = tuple->src.u.tcp.port;
- portptr = &hdr->source;
- } else {
- /* Get rid of dst ip and dst pt */
- oldip = iph->daddr;
- newip = tuple->dst.u3.ip;
- newport = tuple->dst.u.tcp.port;
- portptr = &hdr->dest;
- }
-
- oldport = *portptr;
- *portptr = newport;
-
- if (hdrsize < sizeof(*hdr))
- return true;
-
- inet_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1);
- inet_proto_csum_replace2(&hdr->check, skb, oldport, newport, 0);
- return true;
-}
-
-const struct nf_nat_protocol nf_nat_protocol_tcp = {
- .protonum = IPPROTO_TCP,
- .me = THIS_MODULE,
- .manip_pkt = tcp_manip_pkt,
- .in_range = nf_nat_proto_in_range,
- .unique_tuple = tcp_unique_tuple,
-#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
- .range_to_nlattr = nf_nat_proto_range_to_nlattr,
- .nlattr_to_range = nf_nat_proto_nlattr_to_range,
-#endif
-};
diff --git a/net/ipv4/netfilter/nf_nat_proto_udp.c b/net/ipv4/netfilter/nf_nat_proto_udp.c
deleted file mode 100644
index 9e61c79492e..00000000000
--- a/net/ipv4/netfilter/nf_nat_proto_udp.c
+++ /dev/null
@@ -1,84 +0,0 @@
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/ip.h>
-#include <linux/udp.h>
-
-#include <linux/netfilter.h>
-#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_core.h>
-#include <net/netfilter/nf_nat_rule.h>
-#include <net/netfilter/nf_nat_protocol.h>
-
-static u_int16_t udp_port_rover;
-
-static bool
-udp_unique_tuple(struct nf_conntrack_tuple *tuple,
- const struct nf_nat_range *range,
- enum nf_nat_manip_type maniptype,
- const struct nf_conn *ct)
-{
- return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct,
- &udp_port_rover);
-}
-
-static bool
-udp_manip_pkt(struct sk_buff *skb,
- unsigned int iphdroff,
- const struct nf_conntrack_tuple *tuple,
- enum nf_nat_manip_type maniptype)
-{
- const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
- struct udphdr *hdr;
- unsigned int hdroff = iphdroff + iph->ihl*4;
- __be32 oldip, newip;
- __be16 *portptr, newport;
-
- if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
- return false;
-
- iph = (struct iphdr *)(skb->data + iphdroff);
- hdr = (struct udphdr *)(skb->data + hdroff);
-
- if (maniptype == IP_NAT_MANIP_SRC) {
- /* Get rid of src ip and src pt */
- oldip = iph->saddr;
- newip = tuple->src.u3.ip;
- newport = tuple->src.u.udp.port;
- portptr = &hdr->source;
- } else {
- /* Get rid of dst ip and dst pt */
- oldip = iph->daddr;
- newip = tuple->dst.u3.ip;
- newport = tuple->dst.u.udp.port;
- portptr = &hdr->dest;
- }
- if (hdr->check || skb->ip_summed == CHECKSUM_PARTIAL) {
- inet_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1);
- inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport,
- 0);
- if (!hdr->check)
- hdr->check = CSUM_MANGLED_0;
- }
- *portptr = newport;
- return true;
-}
-
-const struct nf_nat_protocol nf_nat_protocol_udp = {
- .protonum = IPPROTO_UDP,
- .me = THIS_MODULE,
- .manip_pkt = udp_manip_pkt,
- .in_range = nf_nat_proto_in_range,
- .unique_tuple = udp_unique_tuple,
-#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
- .range_to_nlattr = nf_nat_proto_range_to_nlattr,
- .nlattr_to_range = nf_nat_proto_nlattr_to_range,
-#endif
-};
diff --git a/net/ipv4/netfilter/nf_nat_proto_udplite.c b/net/ipv4/netfilter/nf_nat_proto_udplite.c
deleted file mode 100644
index 440a229bbd8..00000000000
--- a/net/ipv4/netfilter/nf_nat_proto_udplite.c
+++ /dev/null
@@ -1,99 +0,0 @@
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
- * (C) 2008 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/ip.h>
-#include <linux/udp.h>
-
-#include <linux/netfilter.h>
-#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_protocol.h>
-
-static u_int16_t udplite_port_rover;
-
-static bool
-udplite_unique_tuple(struct nf_conntrack_tuple *tuple,
- const struct nf_nat_range *range,
- enum nf_nat_manip_type maniptype,
- const struct nf_conn *ct)
-{
- return nf_nat_proto_unique_tuple(tuple, range, maniptype, ct,
- &udplite_port_rover);
-}
-
-static bool
-udplite_manip_pkt(struct sk_buff *skb,
- unsigned int iphdroff,
- const struct nf_conntrack_tuple *tuple,
- enum nf_nat_manip_type maniptype)
-{
- const struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
- struct udphdr *hdr;
- unsigned int hdroff = iphdroff + iph->ihl*4;
- __be32 oldip, newip;
- __be16 *portptr, newport;
-
- if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
- return false;
-
- iph = (struct iphdr *)(skb->data + iphdroff);
- hdr = (struct udphdr *)(skb->data + hdroff);
-
- if (maniptype == IP_NAT_MANIP_SRC) {
- /* Get rid of src ip and src pt */
- oldip = iph->saddr;
- newip = tuple->src.u3.ip;
- newport = tuple->src.u.udp.port;
- portptr = &hdr->source;
- } else {
- /* Get rid of dst ip and dst pt */
- oldip = iph->daddr;
- newip = tuple->dst.u3.ip;
- newport = tuple->dst.u.udp.port;
- portptr = &hdr->dest;
- }
-
- inet_proto_csum_replace4(&hdr->check, skb, oldip, newip, 1);
- inet_proto_csum_replace2(&hdr->check, skb, *portptr, newport, 0);
- if (!hdr->check)
- hdr->check = CSUM_MANGLED_0;
-
- *portptr = newport;
- return true;
-}
-
-static const struct nf_nat_protocol nf_nat_protocol_udplite = {
- .protonum = IPPROTO_UDPLITE,
- .me = THIS_MODULE,
- .manip_pkt = udplite_manip_pkt,
- .in_range = nf_nat_proto_in_range,
- .unique_tuple = udplite_unique_tuple,
-#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
- .range_to_nlattr = nf_nat_proto_range_to_nlattr,
- .nlattr_to_range = nf_nat_proto_nlattr_to_range,
-#endif
-};
-
-static int __init nf_nat_proto_udplite_init(void)
-{
- return nf_nat_protocol_register(&nf_nat_protocol_udplite);
-}
-
-static void __exit nf_nat_proto_udplite_fini(void)
-{
- nf_nat_protocol_unregister(&nf_nat_protocol_udplite);
-}
-
-module_init(nf_nat_proto_udplite_init);
-module_exit(nf_nat_proto_udplite_fini);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("UDP-Lite NAT protocol helper");
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
diff --git a/net/ipv4/netfilter/nf_nat_proto_unknown.c b/net/ipv4/netfilter/nf_nat_proto_unknown.c
deleted file mode 100644
index 14381c62ace..00000000000
--- a/net/ipv4/netfilter/nf_nat_proto_unknown.c
+++ /dev/null
@@ -1,53 +0,0 @@
-/* The "unknown" protocol. This is what is used for protocols we
- * don't understand. It's returned by ip_ct_find_proto().
- */
-
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/init.h>
-
-#include <linux/netfilter.h>
-#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_rule.h>
-#include <net/netfilter/nf_nat_protocol.h>
-
-static bool unknown_in_range(const struct nf_conntrack_tuple *tuple,
- enum nf_nat_manip_type manip_type,
- const union nf_conntrack_man_proto *min,
- const union nf_conntrack_man_proto *max)
-{
- return true;
-}
-
-static bool unknown_unique_tuple(struct nf_conntrack_tuple *tuple,
- const struct nf_nat_range *range,
- enum nf_nat_manip_type maniptype,
- const struct nf_conn *ct)
-{
- /* Sorry: we can't help you; if it's not unique, we can't frob
- anything. */
- return false;
-}
-
-static bool
-unknown_manip_pkt(struct sk_buff *skb,
- unsigned int iphdroff,
- const struct nf_conntrack_tuple *tuple,
- enum nf_nat_manip_type maniptype)
-{
- return true;
-}
-
-const struct nf_nat_protocol nf_nat_unknown_protocol = {
- /* .me isn't set: getting a ref to this cannot fail. */
- .manip_pkt = unknown_manip_pkt,
- .in_range = unknown_in_range,
- .unique_tuple = unknown_unique_tuple,
-};
diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
deleted file mode 100644
index a7eb0471904..00000000000
--- a/net/ipv4/netfilter/nf_nat_rule.c
+++ /dev/null
@@ -1,237 +0,0 @@
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-/* Everything about the rules for NAT. */
-#include <linux/types.h>
-#include <linux/ip.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/module.h>
-#include <linux/kmod.h>
-#include <linux/skbuff.h>
-#include <linux/proc_fs.h>
-#include <net/checksum.h>
-#include <net/route.h>
-#include <linux/bitops.h>
-
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_core.h>
-#include <net/netfilter/nf_nat_rule.h>
-
-#define NAT_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | \
- (1 << NF_INET_POST_ROUTING) | \
- (1 << NF_INET_LOCAL_OUT))
-
-static struct
-{
- struct ipt_replace repl;
- struct ipt_standard entries[3];
- struct ipt_error term;
-} nat_initial_table __net_initdata = {
- .repl = {
- .name = "nat",
- .valid_hooks = NAT_VALID_HOOKS,
- .num_entries = 4,
- .size = sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error),
- .hook_entry = {
- [NF_INET_PRE_ROUTING] = 0,
- [NF_INET_POST_ROUTING] = sizeof(struct ipt_standard),
- [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2
- },
- .underflow = {
- [NF_INET_PRE_ROUTING] = 0,
- [NF_INET_POST_ROUTING] = sizeof(struct ipt_standard),
- [NF_INET_LOCAL_OUT] = sizeof(struct ipt_standard) * 2
- },
- },
- .entries = {
- IPT_STANDARD_INIT(NF_ACCEPT), /* PRE_ROUTING */
- IPT_STANDARD_INIT(NF_ACCEPT), /* POST_ROUTING */
- IPT_STANDARD_INIT(NF_ACCEPT), /* LOCAL_OUT */
- },
- .term = IPT_ERROR_INIT, /* ERROR */
-};
-
-static struct xt_table nat_table = {
- .name = "nat",
- .valid_hooks = NAT_VALID_HOOKS,
- .lock = __RW_LOCK_UNLOCKED(nat_table.lock),
- .me = THIS_MODULE,
- .af = AF_INET,
-};
-
-/* Source NAT */
-static unsigned int
-ipt_snat_target(struct sk_buff *skb, const struct xt_target_param *par)
-{
- struct nf_conn *ct;
- enum ip_conntrack_info ctinfo;
- const struct nf_nat_multi_range_compat *mr = par->targinfo;
-
- NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING);
-
- ct = nf_ct_get(skb, &ctinfo);
-
- /* Connection must be valid and new. */
- NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
- ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY));
- NF_CT_ASSERT(par->out != NULL);
-
- return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_SRC);
-}
-
-static unsigned int
-ipt_dnat_target(struct sk_buff *skb, const struct xt_target_param *par)
-{
- struct nf_conn *ct;
- enum ip_conntrack_info ctinfo;
- const struct nf_nat_multi_range_compat *mr = par->targinfo;
-
- NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING ||
- par->hooknum == NF_INET_LOCAL_OUT);
-
- ct = nf_ct_get(skb, &ctinfo);
-
- /* Connection must be valid and new. */
- NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
-
- return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_DST);
-}
-
-static bool ipt_snat_checkentry(const struct xt_tgchk_param *par)
-{
- const struct nf_nat_multi_range_compat *mr = par->targinfo;
-
- /* Must be a valid range */
- if (mr->rangesize != 1) {
- printk("SNAT: multiple ranges no longer supported\n");
- return false;
- }
- return true;
-}
-
-static bool ipt_dnat_checkentry(const struct xt_tgchk_param *par)
-{
- const struct nf_nat_multi_range_compat *mr = par->targinfo;
-
- /* Must be a valid range */
- if (mr->rangesize != 1) {
- printk("DNAT: multiple ranges no longer supported\n");
- return false;
- }
- return true;
-}
-
-unsigned int
-alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
-{
- /* Force range to this IP; let proto decide mapping for
- per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED).
- Use reply in case it's already been mangled (eg local packet).
- */
- __be32 ip
- = (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC
- ? ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip
- : ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip);
- struct nf_nat_range range
- = { IP_NAT_RANGE_MAP_IPS, ip, ip, { 0 }, { 0 } };
-
- pr_debug("Allocating NULL binding for %p (%pI4)\n", ct, &ip);
- return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum));
-}
-
-int nf_nat_rule_find(struct sk_buff *skb,
- unsigned int hooknum,
- const struct net_device *in,
- const struct net_device *out,
- struct nf_conn *ct)
-{
- struct net *net = nf_ct_net(ct);
- int ret;
-
- ret = ipt_do_table(skb, hooknum, in, out, net->ipv4.nat_table);
-
- if (ret == NF_ACCEPT) {
- if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum)))
- /* NUL mapping */
- ret = alloc_null_binding(ct, hooknum);
- }
- return ret;
-}
-
-static struct xt_target ipt_snat_reg __read_mostly = {
- .name = "SNAT",
- .target = ipt_snat_target,
- .targetsize = sizeof(struct nf_nat_multi_range_compat),
- .table = "nat",
- .hooks = 1 << NF_INET_POST_ROUTING,
- .checkentry = ipt_snat_checkentry,
- .family = AF_INET,
-};
-
-static struct xt_target ipt_dnat_reg __read_mostly = {
- .name = "DNAT",
- .target = ipt_dnat_target,
- .targetsize = sizeof(struct nf_nat_multi_range_compat),
- .table = "nat",
- .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT),
- .checkentry = ipt_dnat_checkentry,
- .family = AF_INET,
-};
-
-static int __net_init nf_nat_rule_net_init(struct net *net)
-{
- net->ipv4.nat_table = ipt_register_table(net, &nat_table,
- &nat_initial_table.repl);
- if (IS_ERR(net->ipv4.nat_table))
- return PTR_ERR(net->ipv4.nat_table);
- return 0;
-}
-
-static void __net_exit nf_nat_rule_net_exit(struct net *net)
-{
- ipt_unregister_table(net->ipv4.nat_table);
-}
-
-static struct pernet_operations nf_nat_rule_net_ops = {
- .init = nf_nat_rule_net_init,
- .exit = nf_nat_rule_net_exit,
-};
-
-int __init nf_nat_rule_init(void)
-{
- int ret;
-
- ret = register_pernet_subsys(&nf_nat_rule_net_ops);
- if (ret != 0)
- goto out;
- ret = xt_register_target(&ipt_snat_reg);
- if (ret != 0)
- goto unregister_table;
-
- ret = xt_register_target(&ipt_dnat_reg);
- if (ret != 0)
- goto unregister_snat;
-
- return ret;
-
- unregister_snat:
- xt_unregister_target(&ipt_snat_reg);
- unregister_table:
- unregister_pernet_subsys(&nf_nat_rule_net_ops);
- out:
- return ret;
-}
-
-void nf_nat_rule_cleanup(void)
-{
- xt_unregister_target(&ipt_dnat_reg);
- xt_unregister_target(&ipt_snat_reg);
- unregister_pernet_subsys(&nf_nat_rule_net_ops);
-}
diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c
deleted file mode 100644
index 07d61a57613..00000000000
--- a/net/ipv4/netfilter/nf_nat_sip.c
+++ /dev/null
@@ -1,500 +0,0 @@
-/* SIP extension for UDP NAT alteration.
- *
- * (C) 2005 by Christian Hentschel <chentschel@arnet.com.ar>
- * based on RR's ip_nat_ftp.c and other modules.
- * (C) 2007 United Security Providers
- * (C) 2007, 2008 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/ip.h>
-#include <net/ip.h>
-#include <linux/udp.h>
-
-#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_helper.h>
-#include <net/netfilter/nf_nat_rule.h>
-#include <net/netfilter/nf_conntrack_helper.h>
-#include <net/netfilter/nf_conntrack_expect.h>
-#include <linux/netfilter/nf_conntrack_sip.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Christian Hentschel <chentschel@arnet.com.ar>");
-MODULE_DESCRIPTION("SIP NAT helper");
-MODULE_ALIAS("ip_nat_sip");
-
-
-static unsigned int mangle_packet(struct sk_buff *skb,
- const char **dptr, unsigned int *datalen,
- unsigned int matchoff, unsigned int matchlen,
- const char *buffer, unsigned int buflen)
-{
- enum ip_conntrack_info ctinfo;
- struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
-
- if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo, matchoff, matchlen,
- buffer, buflen))
- return 0;
-
- /* Reload data pointer and adjust datalen value */
- *dptr = skb->data + ip_hdrlen(skb) + sizeof(struct udphdr);
- *datalen += buflen - matchlen;
- return 1;
-}
-
-static int map_addr(struct sk_buff *skb,
- const char **dptr, unsigned int *datalen,
- unsigned int matchoff, unsigned int matchlen,
- union nf_inet_addr *addr, __be16 port)
-{
- enum ip_conntrack_info ctinfo;
- struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
- enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
- char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")];
- unsigned int buflen;
- __be32 newaddr;
- __be16 newport;
-
- if (ct->tuplehash[dir].tuple.src.u3.ip == addr->ip &&
- ct->tuplehash[dir].tuple.src.u.udp.port == port) {
- newaddr = ct->tuplehash[!dir].tuple.dst.u3.ip;
- newport = ct->tuplehash[!dir].tuple.dst.u.udp.port;
- } else if (ct->tuplehash[dir].tuple.dst.u3.ip == addr->ip &&
- ct->tuplehash[dir].tuple.dst.u.udp.port == port) {
- newaddr = ct->tuplehash[!dir].tuple.src.u3.ip;
- newport = ct->tuplehash[!dir].tuple.src.u.udp.port;
- } else
- return 1;
-
- if (newaddr == addr->ip && newport == port)
- return 1;
-
- buflen = sprintf(buffer, "%pI4:%u", &newaddr, ntohs(newport));
-
- return mangle_packet(skb, dptr, datalen, matchoff, matchlen,
- buffer, buflen);
-}
-
-static int map_sip_addr(struct sk_buff *skb,
- const char **dptr, unsigned int *datalen,
- enum sip_header_types type)
-{
- enum ip_conntrack_info ctinfo;
- struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
- unsigned int matchlen, matchoff;
- union nf_inet_addr addr;
- __be16 port;
-
- if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen, type, NULL,
- &matchoff, &matchlen, &addr, &port) <= 0)
- return 1;
- return map_addr(skb, dptr, datalen, matchoff, matchlen, &addr, port);
-}
-
-static unsigned int ip_nat_sip(struct sk_buff *skb,
- const char **dptr, unsigned int *datalen)
-{
- enum ip_conntrack_info ctinfo;
- struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
- enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
- unsigned int dataoff, matchoff, matchlen;
- union nf_inet_addr addr;
- __be16 port;
- int request, in_header;
-
- /* Basic rules: requests and responses. */
- if (strnicmp(*dptr, "SIP/2.0", strlen("SIP/2.0")) != 0) {
- if (ct_sip_parse_request(ct, *dptr, *datalen,
- &matchoff, &matchlen,
- &addr, &port) > 0 &&
- !map_addr(skb, dptr, datalen, matchoff, matchlen,
- &addr, port))
- return NF_DROP;
- request = 1;
- } else
- request = 0;
-
- /* Translate topmost Via header and parameters */
- if (ct_sip_parse_header_uri(ct, *dptr, NULL, *datalen,
- SIP_HDR_VIA, NULL, &matchoff, &matchlen,
- &addr, &port) > 0) {
- unsigned int matchend, poff, plen, buflen, n;
- char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")];
-
- /* We're only interested in headers related to this
- * connection */
- if (request) {
- if (addr.ip != ct->tuplehash[dir].tuple.src.u3.ip ||
- port != ct->tuplehash[dir].tuple.src.u.udp.port)
- goto next;
- } else {
- if (addr.ip != ct->tuplehash[dir].tuple.dst.u3.ip ||
- port != ct->tuplehash[dir].tuple.dst.u.udp.port)
- goto next;
- }
-
- if (!map_addr(skb, dptr, datalen, matchoff, matchlen,
- &addr, port))
- return NF_DROP;
-
- matchend = matchoff + matchlen;
-
- /* The maddr= parameter (RFC 2361) specifies where to send
- * the reply. */
- if (ct_sip_parse_address_param(ct, *dptr, matchend, *datalen,
- "maddr=", &poff, &plen,
- &addr) > 0 &&
- addr.ip == ct->tuplehash[dir].tuple.src.u3.ip &&
- addr.ip != ct->tuplehash[!dir].tuple.dst.u3.ip) {
- buflen = sprintf(buffer, "%pI4",
- &ct->tuplehash[!dir].tuple.dst.u3.ip);
- if (!mangle_packet(skb, dptr, datalen, poff, plen,
- buffer, buflen))
- return NF_DROP;
- }
-
- /* The received= parameter (RFC 2361) contains the address
- * from which the server received the request. */
- if (ct_sip_parse_address_param(ct, *dptr, matchend, *datalen,
- "received=", &poff, &plen,
- &addr) > 0 &&
- addr.ip == ct->tuplehash[dir].tuple.dst.u3.ip &&
- addr.ip != ct->tuplehash[!dir].tuple.src.u3.ip) {
- buflen = sprintf(buffer, "%pI4",
- &ct->tuplehash[!dir].tuple.src.u3.ip);
- if (!mangle_packet(skb, dptr, datalen, poff, plen,
- buffer, buflen))
- return NF_DROP;
- }
-
- /* The rport= parameter (RFC 3581) contains the port number
- * from which the server received the request. */
- if (ct_sip_parse_numerical_param(ct, *dptr, matchend, *datalen,
- "rport=", &poff, &plen,
- &n) > 0 &&
- htons(n) == ct->tuplehash[dir].tuple.dst.u.udp.port &&
- htons(n) != ct->tuplehash[!dir].tuple.src.u.udp.port) {
- __be16 p = ct->tuplehash[!dir].tuple.src.u.udp.port;
- buflen = sprintf(buffer, "%u", ntohs(p));
- if (!mangle_packet(skb, dptr, datalen, poff, plen,
- buffer, buflen))
- return NF_DROP;
- }
- }
-
-next:
- /* Translate Contact headers */
- dataoff = 0;
- in_header = 0;
- while (ct_sip_parse_header_uri(ct, *dptr, &dataoff, *datalen,
- SIP_HDR_CONTACT, &in_header,
- &matchoff, &matchlen,
- &addr, &port) > 0) {
- if (!map_addr(skb, dptr, datalen, matchoff, matchlen,
- &addr, port))
- return NF_DROP;
- }
-
- if (!map_sip_addr(skb, dptr, datalen, SIP_HDR_FROM) ||
- !map_sip_addr(skb, dptr, datalen, SIP_HDR_TO))
- return NF_DROP;
- return NF_ACCEPT;
-}
-
-/* Handles expected signalling connections and media streams */
-static void ip_nat_sip_expected(struct nf_conn *ct,
- struct nf_conntrack_expect *exp)
-{
- struct nf_nat_range range;
-
- /* This must be a fresh one. */
- BUG_ON(ct->status & IPS_NAT_DONE_MASK);
-
- /* For DST manip, map port here to where it's expected. */
- range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
- range.min = range.max = exp->saved_proto;
- range.min_ip = range.max_ip = exp->saved_ip;
- nf_nat_setup_info(ct, &range, IP_NAT_MANIP_DST);
-
- /* Change src to where master sends to, but only if the connection
- * actually came from the same source. */
- if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip ==
- ct->master->tuplehash[exp->dir].tuple.src.u3.ip) {
- range.flags = IP_NAT_RANGE_MAP_IPS;
- range.min_ip = range.max_ip
- = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip;
- nf_nat_setup_info(ct, &range, IP_NAT_MANIP_SRC);
- }
-}
-
-static unsigned int ip_nat_sip_expect(struct sk_buff *skb,
- const char **dptr, unsigned int *datalen,
- struct nf_conntrack_expect *exp,
- unsigned int matchoff,
- unsigned int matchlen)
-{
- enum ip_conntrack_info ctinfo;
- struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
- enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
- __be32 newip;
- u_int16_t port;
- char buffer[sizeof("nnn.nnn.nnn.nnn:nnnnn")];
- unsigned buflen;
-
- /* Connection will come from reply */
- if (ct->tuplehash[dir].tuple.src.u3.ip == ct->tuplehash[!dir].tuple.dst.u3.ip)
- newip = exp->tuple.dst.u3.ip;
- else
- newip = ct->tuplehash[!dir].tuple.dst.u3.ip;
-
- /* If the signalling port matches the connection's source port in the
- * original direction, try to use the destination port in the opposite
- * direction. */
- if (exp->tuple.dst.u.udp.port ==
- ct->tuplehash[dir].tuple.src.u.udp.port)
- port = ntohs(ct->tuplehash[!dir].tuple.dst.u.udp.port);
- else
- port = ntohs(exp->tuple.dst.u.udp.port);
-
- exp->saved_ip = exp->tuple.dst.u3.ip;
- exp->tuple.dst.u3.ip = newip;
- exp->saved_proto.udp.port = exp->tuple.dst.u.udp.port;
- exp->dir = !dir;
- exp->expectfn = ip_nat_sip_expected;
-
- for (; port != 0; port++) {
- exp->tuple.dst.u.udp.port = htons(port);
- if (nf_ct_expect_related(exp) == 0)
- break;
- }
-
- if (port == 0)
- return NF_DROP;
-
- if (exp->tuple.dst.u3.ip != exp->saved_ip ||
- exp->tuple.dst.u.udp.port != exp->saved_proto.udp.port) {
- buflen = sprintf(buffer, "%pI4:%u", &newip, port);
- if (!mangle_packet(skb, dptr, datalen, matchoff, matchlen,
- buffer, buflen))
- goto err;
- }
- return NF_ACCEPT;
-
-err:
- nf_ct_unexpect_related(exp);
- return NF_DROP;
-}
-
-static int mangle_content_len(struct sk_buff *skb,
- const char **dptr, unsigned int *datalen)
-{
- enum ip_conntrack_info ctinfo;
- struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
- unsigned int matchoff, matchlen;
- char buffer[sizeof("65536")];
- int buflen, c_len;
-
- /* Get actual SDP length */
- if (ct_sip_get_sdp_header(ct, *dptr, 0, *datalen,
- SDP_HDR_VERSION, SDP_HDR_UNSPEC,
- &matchoff, &matchlen) <= 0)
- return 0;
- c_len = *datalen - matchoff + strlen("v=");
-
- /* Now, update SDP length */
- if (ct_sip_get_header(ct, *dptr, 0, *datalen, SIP_HDR_CONTENT_LENGTH,
- &matchoff, &matchlen) <= 0)
- return 0;
-
- buflen = sprintf(buffer, "%u", c_len);
- return mangle_packet(skb, dptr, datalen, matchoff, matchlen,
- buffer, buflen);
-}
-
-static int mangle_sdp_packet(struct sk_buff *skb, const char **dptr,
- unsigned int dataoff, unsigned int *datalen,
- enum sdp_header_types type,
- enum sdp_header_types term,
- char *buffer, int buflen)
-{
- enum ip_conntrack_info ctinfo;
- struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
- unsigned int matchlen, matchoff;
-
- if (ct_sip_get_sdp_header(ct, *dptr, dataoff, *datalen, type, term,
- &matchoff, &matchlen) <= 0)
- return -ENOENT;
- return mangle_packet(skb, dptr, datalen, matchoff, matchlen,
- buffer, buflen) ? 0 : -EINVAL;
-}
-
-static unsigned int ip_nat_sdp_addr(struct sk_buff *skb, const char **dptr,
- unsigned int dataoff,
- unsigned int *datalen,
- enum sdp_header_types type,
- enum sdp_header_types term,
- const union nf_inet_addr *addr)
-{
- char buffer[sizeof("nnn.nnn.nnn.nnn")];
- unsigned int buflen;
-
- buflen = sprintf(buffer, "%pI4", &addr->ip);
- if (mangle_sdp_packet(skb, dptr, dataoff, datalen, type, term,
- buffer, buflen))
- return 0;
-
- return mangle_content_len(skb, dptr, datalen);
-}
-
-static unsigned int ip_nat_sdp_port(struct sk_buff *skb,
- const char **dptr,
- unsigned int *datalen,
- unsigned int matchoff,
- unsigned int matchlen,
- u_int16_t port)
-{
- char buffer[sizeof("nnnnn")];
- unsigned int buflen;
-
- buflen = sprintf(buffer, "%u", port);
- if (!mangle_packet(skb, dptr, datalen, matchoff, matchlen,
- buffer, buflen))
- return 0;
-
- return mangle_content_len(skb, dptr, datalen);
-}
-
-static unsigned int ip_nat_sdp_session(struct sk_buff *skb, const char **dptr,
- unsigned int dataoff,
- unsigned int *datalen,
- const union nf_inet_addr *addr)
-{
- char buffer[sizeof("nnn.nnn.nnn.nnn")];
- unsigned int buflen;
-
- /* Mangle session description owner and contact addresses */
- buflen = sprintf(buffer, "%pI4", &addr->ip);
- if (mangle_sdp_packet(skb, dptr, dataoff, datalen,
- SDP_HDR_OWNER_IP4, SDP_HDR_MEDIA,
- buffer, buflen))
- return 0;
-
- switch (mangle_sdp_packet(skb, dptr, dataoff, datalen,
- SDP_HDR_CONNECTION_IP4, SDP_HDR_MEDIA,
- buffer, buflen)) {
- case 0:
- /*
- * RFC 2327:
- *
- * Session description
- *
- * c=* (connection information - not required if included in all media)
- */
- case -ENOENT:
- break;
- default:
- return 0;
- }
-
- return mangle_content_len(skb, dptr, datalen);
-}
-
-/* So, this packet has hit the connection tracking matching code.
- Mangle it, and change the expectation to match the new version. */
-static unsigned int ip_nat_sdp_media(struct sk_buff *skb,
- const char **dptr,
- unsigned int *datalen,
- struct nf_conntrack_expect *rtp_exp,
- struct nf_conntrack_expect *rtcp_exp,
- unsigned int mediaoff,
- unsigned int medialen,
- union nf_inet_addr *rtp_addr)
-{
- enum ip_conntrack_info ctinfo;
- struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
- enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
- u_int16_t port;
-
- /* Connection will come from reply */
- if (ct->tuplehash[dir].tuple.src.u3.ip ==
- ct->tuplehash[!dir].tuple.dst.u3.ip)
- rtp_addr->ip = rtp_exp->tuple.dst.u3.ip;
- else
- rtp_addr->ip = ct->tuplehash[!dir].tuple.dst.u3.ip;
-
- rtp_exp->saved_ip = rtp_exp->tuple.dst.u3.ip;
- rtp_exp->tuple.dst.u3.ip = rtp_addr->ip;
- rtp_exp->saved_proto.udp.port = rtp_exp->tuple.dst.u.udp.port;
- rtp_exp->dir = !dir;
- rtp_exp->expectfn = ip_nat_sip_expected;
-
- rtcp_exp->saved_ip = rtcp_exp->tuple.dst.u3.ip;
- rtcp_exp->tuple.dst.u3.ip = rtp_addr->ip;
- rtcp_exp->saved_proto.udp.port = rtcp_exp->tuple.dst.u.udp.port;
- rtcp_exp->dir = !dir;
- rtcp_exp->expectfn = ip_nat_sip_expected;
-
- /* Try to get same pair of ports: if not, try to change them. */
- for (port = ntohs(rtp_exp->tuple.dst.u.udp.port);
- port != 0; port += 2) {
- rtp_exp->tuple.dst.u.udp.port = htons(port);
- if (nf_ct_expect_related(rtp_exp) != 0)
- continue;
- rtcp_exp->tuple.dst.u.udp.port = htons(port + 1);
- if (nf_ct_expect_related(rtcp_exp) == 0)
- break;
- nf_ct_unexpect_related(rtp_exp);
- }
-
- if (port == 0)
- goto err1;
-
- /* Update media port. */
- if (rtp_exp->tuple.dst.u.udp.port != rtp_exp->saved_proto.udp.port &&
- !ip_nat_sdp_port(skb, dptr, datalen, mediaoff, medialen, port))
- goto err2;
-
- return NF_ACCEPT;
-
-err2:
- nf_ct_unexpect_related(rtp_exp);
- nf_ct_unexpect_related(rtcp_exp);
-err1:
- return NF_DROP;
-}
-
-static void __exit nf_nat_sip_fini(void)
-{
- rcu_assign_pointer(nf_nat_sip_hook, NULL);
- rcu_assign_pointer(nf_nat_sip_expect_hook, NULL);
- rcu_assign_pointer(nf_nat_sdp_addr_hook, NULL);
- rcu_assign_pointer(nf_nat_sdp_port_hook, NULL);
- rcu_assign_pointer(nf_nat_sdp_session_hook, NULL);
- rcu_assign_pointer(nf_nat_sdp_media_hook, NULL);
- synchronize_rcu();
-}
-
-static int __init nf_nat_sip_init(void)
-{
- BUG_ON(nf_nat_sip_hook != NULL);
- BUG_ON(nf_nat_sip_expect_hook != NULL);
- BUG_ON(nf_nat_sdp_addr_hook != NULL);
- BUG_ON(nf_nat_sdp_port_hook != NULL);
- BUG_ON(nf_nat_sdp_session_hook != NULL);
- BUG_ON(nf_nat_sdp_media_hook != NULL);
- rcu_assign_pointer(nf_nat_sip_hook, ip_nat_sip);
- rcu_assign_pointer(nf_nat_sip_expect_hook, ip_nat_sip_expect);
- rcu_assign_pointer(nf_nat_sdp_addr_hook, ip_nat_sdp_addr);
- rcu_assign_pointer(nf_nat_sdp_port_hook, ip_nat_sdp_port);
- rcu_assign_pointer(nf_nat_sdp_session_hook, ip_nat_sdp_session);
- rcu_assign_pointer(nf_nat_sdp_media_hook, ip_nat_sdp_media);
- return 0;
-}
-
-module_init(nf_nat_sip_init);
-module_exit(nf_nat_sip_fini);
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index 182f845de92..7c676671329 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -34,15 +34,17 @@
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*
* Author: James Morris <jmorris@intercode.com.au>
+ *
+ * Copyright (c) 2006-2010 Patrick McHardy <kaber@trash.net>
*/
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/types.h>
#include <linux/kernel.h>
+#include <linux/slab.h>
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/udp.h>
@@ -53,6 +55,7 @@
#include <net/netfilter/nf_conntrack_expect.h>
#include <net/netfilter/nf_conntrack_helper.h>
#include <net/netfilter/nf_nat_helper.h>
+#include <linux/netfilter/nf_conntrack_snmp.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
@@ -398,15 +401,12 @@ static unsigned char asn1_octets_decode(struct asn1_ctx *ctx,
*len = 0;
*octets = kmalloc(eoc - ctx->pointer, GFP_ATOMIC);
- if (*octets == NULL) {
- if (net_ratelimit())
- printk("OOM in bsalg (%d)\n", __LINE__);
+ if (*octets == NULL)
return 0;
- }
ptr = *octets;
while (ctx->pointer < eoc) {
- if (!asn1_octet_decode(ctx, (unsigned char *)ptr++)) {
+ if (!asn1_octet_decode(ctx, ptr++)) {
kfree(*octets);
*octets = NULL;
return 0;
@@ -449,11 +449,8 @@ static unsigned char asn1_oid_decode(struct asn1_ctx *ctx,
return 0;
*oid = kmalloc(size * sizeof(unsigned long), GFP_ATOMIC);
- if (*oid == NULL) {
- if (net_ratelimit())
- printk("OOM in bsalg (%d)\n", __LINE__);
+ if (*oid == NULL)
return 0;
- }
optr = *oid;
@@ -464,14 +461,14 @@ static unsigned char asn1_oid_decode(struct asn1_ctx *ctx,
}
if (subid < 40) {
- optr [0] = 0;
- optr [1] = subid;
+ optr[0] = 0;
+ optr[1] = subid;
} else if (subid < 80) {
- optr [0] = 1;
- optr [1] = subid - 40;
+ optr[0] = 1;
+ optr[1] = subid - 40;
} else {
- optr [0] = 2;
- optr [1] = subid - 80;
+ optr[0] = 2;
+ optr[1] = subid - 80;
}
*len = 2;
@@ -717,117 +714,103 @@ static unsigned char snmp_object_decode(struct asn1_ctx *ctx,
l = 0;
switch (type) {
- case SNMP_INTEGER:
- len = sizeof(long);
- if (!asn1_long_decode(ctx, end, &l)) {
- kfree(id);
- return 0;
- }
- *obj = kmalloc(sizeof(struct snmp_object) + len,
- GFP_ATOMIC);
- if (*obj == NULL) {
- kfree(id);
- if (net_ratelimit())
- printk("OOM in bsalg (%d)\n", __LINE__);
- return 0;
- }
- (*obj)->syntax.l[0] = l;
- break;
- case SNMP_OCTETSTR:
- case SNMP_OPAQUE:
- if (!asn1_octets_decode(ctx, end, &p, &len)) {
- kfree(id);
- return 0;
- }
- *obj = kmalloc(sizeof(struct snmp_object) + len,
- GFP_ATOMIC);
- if (*obj == NULL) {
- kfree(p);
- kfree(id);
- if (net_ratelimit())
- printk("OOM in bsalg (%d)\n", __LINE__);
- return 0;
- }
- memcpy((*obj)->syntax.c, p, len);
+ case SNMP_INTEGER:
+ len = sizeof(long);
+ if (!asn1_long_decode(ctx, end, &l)) {
+ kfree(id);
+ return 0;
+ }
+ *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
+ if (*obj == NULL) {
+ kfree(id);
+ return 0;
+ }
+ (*obj)->syntax.l[0] = l;
+ break;
+ case SNMP_OCTETSTR:
+ case SNMP_OPAQUE:
+ if (!asn1_octets_decode(ctx, end, &p, &len)) {
+ kfree(id);
+ return 0;
+ }
+ *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
+ if (*obj == NULL) {
kfree(p);
- break;
- case SNMP_NULL:
- case SNMP_NOSUCHOBJECT:
- case SNMP_NOSUCHINSTANCE:
- case SNMP_ENDOFMIBVIEW:
- len = 0;
- *obj = kmalloc(sizeof(struct snmp_object), GFP_ATOMIC);
- if (*obj == NULL) {
- kfree(id);
- if (net_ratelimit())
- printk("OOM in bsalg (%d)\n", __LINE__);
- return 0;
- }
- if (!asn1_null_decode(ctx, end)) {
- kfree(id);
- kfree(*obj);
- *obj = NULL;
- return 0;
- }
- break;
- case SNMP_OBJECTID:
- if (!asn1_oid_decode(ctx, end, (unsigned long **)&lp, &len)) {
- kfree(id);
- return 0;
- }
- len *= sizeof(unsigned long);
- *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
- if (*obj == NULL) {
- kfree(lp);
- kfree(id);
- if (net_ratelimit())
- printk("OOM in bsalg (%d)\n", __LINE__);
- return 0;
- }
- memcpy((*obj)->syntax.ul, lp, len);
+ kfree(id);
+ return 0;
+ }
+ memcpy((*obj)->syntax.c, p, len);
+ kfree(p);
+ break;
+ case SNMP_NULL:
+ case SNMP_NOSUCHOBJECT:
+ case SNMP_NOSUCHINSTANCE:
+ case SNMP_ENDOFMIBVIEW:
+ len = 0;
+ *obj = kmalloc(sizeof(struct snmp_object), GFP_ATOMIC);
+ if (*obj == NULL) {
+ kfree(id);
+ return 0;
+ }
+ if (!asn1_null_decode(ctx, end)) {
+ kfree(id);
+ kfree(*obj);
+ *obj = NULL;
+ return 0;
+ }
+ break;
+ case SNMP_OBJECTID:
+ if (!asn1_oid_decode(ctx, end, &lp, &len)) {
+ kfree(id);
+ return 0;
+ }
+ len *= sizeof(unsigned long);
+ *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
+ if (*obj == NULL) {
kfree(lp);
- break;
- case SNMP_IPADDR:
- if (!asn1_octets_decode(ctx, end, &p, &len)) {
- kfree(id);
- return 0;
- }
- if (len != 4) {
- kfree(p);
- kfree(id);
- return 0;
- }
- *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
- if (*obj == NULL) {
- kfree(p);
- kfree(id);
- if (net_ratelimit())
- printk("OOM in bsalg (%d)\n", __LINE__);
- return 0;
- }
- memcpy((*obj)->syntax.uc, p, len);
+ kfree(id);
+ return 0;
+ }
+ memcpy((*obj)->syntax.ul, lp, len);
+ kfree(lp);
+ break;
+ case SNMP_IPADDR:
+ if (!asn1_octets_decode(ctx, end, &p, &len)) {
+ kfree(id);
+ return 0;
+ }
+ if (len != 4) {
kfree(p);
- break;
- case SNMP_COUNTER:
- case SNMP_GAUGE:
- case SNMP_TIMETICKS:
- len = sizeof(unsigned long);
- if (!asn1_ulong_decode(ctx, end, &ul)) {
- kfree(id);
- return 0;
- }
- *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
- if (*obj == NULL) {
- kfree(id);
- if (net_ratelimit())
- printk("OOM in bsalg (%d)\n", __LINE__);
- return 0;
- }
- (*obj)->syntax.ul[0] = ul;
- break;
- default:
kfree(id);
return 0;
+ }
+ *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
+ if (*obj == NULL) {
+ kfree(p);
+ kfree(id);
+ return 0;
+ }
+ memcpy((*obj)->syntax.uc, p, len);
+ kfree(p);
+ break;
+ case SNMP_COUNTER:
+ case SNMP_GAUGE:
+ case SNMP_TIMETICKS:
+ len = sizeof(unsigned long);
+ if (!asn1_ulong_decode(ctx, end, &ul)) {
+ kfree(id);
+ return 0;
+ }
+ *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
+ if (*obj == NULL) {
+ kfree(id);
+ return 0;
+ }
+ (*obj)->syntax.ul[0] = ul;
+ break;
+ default:
+ kfree(id);
+ return 0;
}
(*obj)->syntax_len = len;
@@ -892,13 +875,15 @@ static void fast_csum(__sum16 *csum,
unsigned char s[4];
if (offset & 1) {
- s[0] = s[2] = 0;
+ s[0] = ~0;
s[1] = ~*optr;
+ s[2] = 0;
s[3] = *nptr;
} else {
- s[1] = s[3] = 0;
s[0] = ~*optr;
+ s[1] = ~0;
s[2] = *nptr;
+ s[3] = 0;
}
*csum = csum_fold(csum_partial(s, 4, ~csum_unfold(*csum)));
@@ -1038,7 +1023,7 @@ static int snmp_parse_mangle(unsigned char *msg,
unsigned int cls, con, tag, vers, pdutype;
struct asn1_ctx ctx;
struct asn1_octstr comm;
- struct snmp_object **obj;
+ struct snmp_object *obj;
if (debug > 1)
hex_dump(msg, len);
@@ -1148,43 +1133,34 @@ static int snmp_parse_mangle(unsigned char *msg,
if (cls != ASN1_UNI || con != ASN1_CON || tag != ASN1_SEQ)
return 0;
- obj = kmalloc(sizeof(struct snmp_object), GFP_ATOMIC);
- if (obj == NULL) {
- if (net_ratelimit())
- printk(KERN_WARNING "OOM in bsalg(%d)\n", __LINE__);
- return 0;
- }
-
while (!asn1_eoc_decode(&ctx, eoc)) {
unsigned int i;
- if (!snmp_object_decode(&ctx, obj)) {
- if (*obj) {
- kfree((*obj)->id);
- kfree(*obj);
+ if (!snmp_object_decode(&ctx, &obj)) {
+ if (obj) {
+ kfree(obj->id);
+ kfree(obj);
}
- kfree(obj);
return 0;
}
if (debug > 1) {
printk(KERN_DEBUG "bsalg: object: ");
- for (i = 0; i < (*obj)->id_len; i++) {
+ for (i = 0; i < obj->id_len; i++) {
if (i > 0)
printk(".");
- printk("%lu", (*obj)->id[i]);
+ printk("%lu", obj->id[i]);
}
- printk(": type=%u\n", (*obj)->type);
+ printk(": type=%u\n", obj->type);
}
- if ((*obj)->type == SNMP_IPADDR)
+ if (obj->type == SNMP_IPADDR)
mangle_address(ctx.begin, ctx.pointer - 4 , map, check);
- kfree((*obj)->id);
- kfree(*obj);
+ kfree(obj->id);
+ kfree(obj);
}
- kfree(obj);
if (!asn1_eoc_decode(&ctx, eoc))
return 0;
@@ -1222,8 +1198,8 @@ static int snmp_translate(struct nf_conn *ct,
map.to = NOCT1(&ct->tuplehash[!dir].tuple.dst.u3.ip);
} else {
/* DNAT replies */
- map.from = NOCT1(&ct->tuplehash[dir].tuple.src.u3.ip);
- map.to = NOCT1(&ct->tuplehash[!dir].tuple.dst.u3.ip);
+ map.from = NOCT1(&ct->tuplehash[!dir].tuple.src.u3.ip);
+ map.to = NOCT1(&ct->tuplehash[dir].tuple.dst.u3.ip);
}
if (map.from == map.to)
@@ -1231,8 +1207,7 @@ static int snmp_translate(struct nf_conn *ct,
if (!snmp_parse_mangle((unsigned char *)udph + sizeof(struct udphdr),
paylen, &map, &udph->check)) {
- if (net_ratelimit())
- printk(KERN_WARNING "bsalg: parser failed\n");
+ net_warn_ratelimited("bsalg: parser failed\n");
return NF_DROP;
}
return NF_ACCEPT;
@@ -1266,9 +1241,8 @@ static int help(struct sk_buff *skb, unsigned int protoff,
* can mess around with the payload.
*/
if (ntohs(udph->len) != skb->len - (iph->ihl << 2)) {
- if (net_ratelimit())
- printk(KERN_WARNING "SNMP: dropping malformed packet src=%pI4 dst=%pI4\n",
- &iph->saddr, &iph->daddr);
+ net_warn_ratelimited("SNMP: dropping malformed packet src=%pI4 dst=%pI4\n",
+ &iph->saddr, &iph->daddr);
return NF_DROP;
}
@@ -1292,7 +1266,7 @@ static struct nf_conntrack_helper snmp_helper __read_mostly = {
.expect_policy = &snmp_exp_policy,
.name = "snmp",
.tuple.src.l3num = AF_INET,
- .tuple.src.u.udp.port = __constant_htons(SNMP_PORT),
+ .tuple.src.u.udp.port = cpu_to_be16(SNMP_PORT),
.tuple.dst.protonum = IPPROTO_UDP,
};
@@ -1302,7 +1276,7 @@ static struct nf_conntrack_helper snmp_trap_helper __read_mostly = {
.expect_policy = &snmp_exp_policy,
.name = "snmp_trap",
.tuple.src.l3num = AF_INET,
- .tuple.src.u.udp.port = __constant_htons(SNMP_TRAP_PORT),
+ .tuple.src.u.udp.port = cpu_to_be16(SNMP_TRAP_PORT),
.tuple.dst.protonum = IPPROTO_UDP,
};
@@ -1316,9 +1290,9 @@ static int __init nf_nat_snmp_basic_init(void)
{
int ret = 0;
- ret = nf_conntrack_helper_register(&snmp_helper);
- if (ret < 0)
- return ret;
+ BUG_ON(nf_nat_snmp_hook != NULL);
+ RCU_INIT_POINTER(nf_nat_snmp_hook, help);
+
ret = nf_conntrack_helper_register(&snmp_trap_helper);
if (ret < 0) {
nf_conntrack_helper_unregister(&snmp_helper);
@@ -1329,7 +1303,7 @@ static int __init nf_nat_snmp_basic_init(void)
static void __exit nf_nat_snmp_basic_fini(void)
{
- nf_conntrack_helper_unregister(&snmp_helper);
+ RCU_INIT_POINTER(nf_nat_snmp_hook, NULL);
nf_conntrack_helper_unregister(&snmp_trap_helper);
}
diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c
deleted file mode 100644
index b7dd695691a..00000000000
--- a/net/ipv4/netfilter/nf_nat_standalone.c
+++ /dev/null
@@ -1,332 +0,0 @@
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#include <linux/types.h>
-#include <linux/icmp.h>
-#include <linux/ip.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/proc_fs.h>
-#include <net/ip.h>
-#include <net/checksum.h>
-#include <linux/spinlock.h>
-
-#include <net/netfilter/nf_conntrack.h>
-#include <net/netfilter/nf_conntrack_core.h>
-#include <net/netfilter/nf_conntrack_extend.h>
-#include <net/netfilter/nf_nat.h>
-#include <net/netfilter/nf_nat_rule.h>
-#include <net/netfilter/nf_nat_protocol.h>
-#include <net/netfilter/nf_nat_core.h>
-#include <net/netfilter/nf_nat_helper.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-
-#ifdef CONFIG_XFRM
-static void nat_decode_session(struct sk_buff *skb, struct flowi *fl)
-{
- const struct nf_conn *ct;
- const struct nf_conntrack_tuple *t;
- enum ip_conntrack_info ctinfo;
- enum ip_conntrack_dir dir;
- unsigned long statusbit;
-
- ct = nf_ct_get(skb, &ctinfo);
- if (ct == NULL)
- return;
- dir = CTINFO2DIR(ctinfo);
- t = &ct->tuplehash[dir].tuple;
-
- if (dir == IP_CT_DIR_ORIGINAL)
- statusbit = IPS_DST_NAT;
- else
- statusbit = IPS_SRC_NAT;
-
- if (ct->status & statusbit) {
- fl->fl4_dst = t->dst.u3.ip;
- if (t->dst.protonum == IPPROTO_TCP ||
- t->dst.protonum == IPPROTO_UDP ||
- t->dst.protonum == IPPROTO_UDPLITE ||
- t->dst.protonum == IPPROTO_DCCP ||
- t->dst.protonum == IPPROTO_SCTP)
- fl->fl_ip_dport = t->dst.u.tcp.port;
- }
-
- statusbit ^= IPS_NAT_MASK;
-
- if (ct->status & statusbit) {
- fl->fl4_src = t->src.u3.ip;
- if (t->dst.protonum == IPPROTO_TCP ||
- t->dst.protonum == IPPROTO_UDP ||
- t->dst.protonum == IPPROTO_UDPLITE ||
- t->dst.protonum == IPPROTO_DCCP ||
- t->dst.protonum == IPPROTO_SCTP)
- fl->fl_ip_sport = t->src.u.tcp.port;
- }
-}
-#endif
-
-static unsigned int
-nf_nat_fn(unsigned int hooknum,
- struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
-{
- struct nf_conn *ct;
- enum ip_conntrack_info ctinfo;
- struct nf_conn_nat *nat;
- /* maniptype == SRC for postrouting. */
- enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum);
-
- /* We never see fragments: conntrack defrags on pre-routing
- and local-out, and nf_nat_out protects post-routing. */
- NF_CT_ASSERT(!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)));
-
- ct = nf_ct_get(skb, &ctinfo);
- /* Can't track? It's not due to stress, or conntrack would
- have dropped it. Hence it's the user's responsibilty to
- packet filter it out, or implement conntrack/NAT for that
- protocol. 8) --RR */
- if (!ct)
- return NF_ACCEPT;
-
- /* Don't try to NAT if this packet is not conntracked */
- if (ct == &nf_conntrack_untracked)
- return NF_ACCEPT;
-
- nat = nfct_nat(ct);
- if (!nat) {
- /* NAT module was loaded late. */
- if (nf_ct_is_confirmed(ct))
- return NF_ACCEPT;
- nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
- if (nat == NULL) {
- pr_debug("failed to add NAT extension\n");
- return NF_ACCEPT;
- }
- }
-
- switch (ctinfo) {
- case IP_CT_RELATED:
- case IP_CT_RELATED+IP_CT_IS_REPLY:
- if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
- if (!nf_nat_icmp_reply_translation(ct, ctinfo,
- hooknum, skb))
- return NF_DROP;
- else
- return NF_ACCEPT;
- }
- /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */
- case IP_CT_NEW:
-
- /* Seen it before? This can happen for loopback, retrans,
- or local packets.. */
- if (!nf_nat_initialized(ct, maniptype)) {
- unsigned int ret;
-
- if (hooknum == NF_INET_LOCAL_IN)
- /* LOCAL_IN hook doesn't have a chain! */
- ret = alloc_null_binding(ct, hooknum);
- else
- ret = nf_nat_rule_find(skb, hooknum, in, out,
- ct);
-
- if (ret != NF_ACCEPT) {
- return ret;
- }
- } else
- pr_debug("Already setup manip %s for ct %p\n",
- maniptype == IP_NAT_MANIP_SRC ? "SRC" : "DST",
- ct);
- break;
-
- default:
- /* ESTABLISHED */
- NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
- ctinfo == (IP_CT_ESTABLISHED+IP_CT_IS_REPLY));
- }
-
- return nf_nat_packet(ct, ctinfo, hooknum, skb);
-}
-
-static unsigned int
-nf_nat_in(unsigned int hooknum,
- struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
-{
- unsigned int ret;
- __be32 daddr = ip_hdr(skb)->daddr;
-
- ret = nf_nat_fn(hooknum, skb, in, out, okfn);
- if (ret != NF_DROP && ret != NF_STOLEN &&
- daddr != ip_hdr(skb)->daddr) {
- dst_release(skb->dst);
- skb->dst = NULL;
- }
- return ret;
-}
-
-static unsigned int
-nf_nat_out(unsigned int hooknum,
- struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
-{
-#ifdef CONFIG_XFRM
- const struct nf_conn *ct;
- enum ip_conntrack_info ctinfo;
-#endif
- unsigned int ret;
-
- /* root is playing with raw sockets. */
- if (skb->len < sizeof(struct iphdr) ||
- ip_hdrlen(skb) < sizeof(struct iphdr))
- return NF_ACCEPT;
-
- ret = nf_nat_fn(hooknum, skb, in, out, okfn);
-#ifdef CONFIG_XFRM
- if (ret != NF_DROP && ret != NF_STOLEN &&
- (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
- enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
-
- if (ct->tuplehash[dir].tuple.src.u3.ip !=
- ct->tuplehash[!dir].tuple.dst.u3.ip
- || ct->tuplehash[dir].tuple.src.u.all !=
- ct->tuplehash[!dir].tuple.dst.u.all
- )
- return ip_xfrm_me_harder(skb) == 0 ? ret : NF_DROP;
- }
-#endif
- return ret;
-}
-
-static unsigned int
-nf_nat_local_fn(unsigned int hooknum,
- struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
-{
- const struct nf_conn *ct;
- enum ip_conntrack_info ctinfo;
- unsigned int ret;
-
- /* root is playing with raw sockets. */
- if (skb->len < sizeof(struct iphdr) ||
- ip_hdrlen(skb) < sizeof(struct iphdr))
- return NF_ACCEPT;
-
- ret = nf_nat_fn(hooknum, skb, in, out, okfn);
- if (ret != NF_DROP && ret != NF_STOLEN &&
- (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
- enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
-
- if (ct->tuplehash[dir].tuple.dst.u3.ip !=
- ct->tuplehash[!dir].tuple.src.u3.ip) {
- if (ip_route_me_harder(skb, RTN_UNSPEC))
- ret = NF_DROP;
- }
-#ifdef CONFIG_XFRM
- else if (ct->tuplehash[dir].tuple.dst.u.all !=
- ct->tuplehash[!dir].tuple.src.u.all)
- if (ip_xfrm_me_harder(skb))
- ret = NF_DROP;
-#endif
- }
- return ret;
-}
-
-/* We must be after connection tracking and before packet filtering. */
-
-static struct nf_hook_ops nf_nat_ops[] __read_mostly = {
- /* Before packet filtering, change destination */
- {
- .hook = nf_nat_in,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_INET_PRE_ROUTING,
- .priority = NF_IP_PRI_NAT_DST,
- },
- /* After packet filtering, change source */
- {
- .hook = nf_nat_out,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_INET_POST_ROUTING,
- .priority = NF_IP_PRI_NAT_SRC,
- },
- /* Before packet filtering, change destination */
- {
- .hook = nf_nat_local_fn,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_INET_LOCAL_OUT,
- .priority = NF_IP_PRI_NAT_DST,
- },
- /* After packet filtering, change source */
- {
- .hook = nf_nat_fn,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_INET_LOCAL_IN,
- .priority = NF_IP_PRI_NAT_SRC,
- },
-};
-
-static int __init nf_nat_standalone_init(void)
-{
- int ret = 0;
-
- need_ipv4_conntrack();
-
-#ifdef CONFIG_XFRM
- BUG_ON(ip_nat_decode_session != NULL);
- rcu_assign_pointer(ip_nat_decode_session, nat_decode_session);
-#endif
- ret = nf_nat_rule_init();
- if (ret < 0) {
- printk("nf_nat_init: can't setup rules.\n");
- goto cleanup_decode_session;
- }
- ret = nf_register_hooks(nf_nat_ops, ARRAY_SIZE(nf_nat_ops));
- if (ret < 0) {
- printk("nf_nat_init: can't register hooks.\n");
- goto cleanup_rule_init;
- }
- return ret;
-
- cleanup_rule_init:
- nf_nat_rule_cleanup();
- cleanup_decode_session:
-#ifdef CONFIG_XFRM
- rcu_assign_pointer(ip_nat_decode_session, NULL);
- synchronize_net();
-#endif
- return ret;
-}
-
-static void __exit nf_nat_standalone_fini(void)
-{
- nf_unregister_hooks(nf_nat_ops, ARRAY_SIZE(nf_nat_ops));
- nf_nat_rule_cleanup();
-#ifdef CONFIG_XFRM
- rcu_assign_pointer(ip_nat_decode_session, NULL);
- synchronize_net();
-#endif
- /* Conntrack caches are unregistered in nf_conntrack_cleanup */
-}
-
-module_init(nf_nat_standalone_init);
-module_exit(nf_nat_standalone_fini);
-
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("ip_nat");
diff --git a/net/ipv4/netfilter/nf_nat_tftp.c b/net/ipv4/netfilter/nf_nat_tftp.c
deleted file mode 100644
index b096e81500a..00000000000
--- a/net/ipv4/netfilter/nf_nat_tftp.c
+++ /dev/null
@@ -1,52 +0,0 @@
-/* (C) 2001-2002 Magnus Boden <mb@ozaba.mine.nu>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/moduleparam.h>
-#include <linux/udp.h>
-
-#include <net/netfilter/nf_nat_helper.h>
-#include <net/netfilter/nf_nat_rule.h>
-#include <net/netfilter/nf_conntrack_helper.h>
-#include <net/netfilter/nf_conntrack_expect.h>
-#include <linux/netfilter/nf_conntrack_tftp.h>
-
-MODULE_AUTHOR("Magnus Boden <mb@ozaba.mine.nu>");
-MODULE_DESCRIPTION("TFTP NAT helper");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("ip_nat_tftp");
-
-static unsigned int help(struct sk_buff *skb,
- enum ip_conntrack_info ctinfo,
- struct nf_conntrack_expect *exp)
-{
- const struct nf_conn *ct = exp->master;
-
- exp->saved_proto.udp.port
- = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port;
- exp->dir = IP_CT_DIR_REPLY;
- exp->expectfn = nf_nat_follow_master;
- if (nf_ct_expect_related(exp) != 0)
- return NF_DROP;
- return NF_ACCEPT;
-}
-
-static void __exit nf_nat_tftp_fini(void)
-{
- rcu_assign_pointer(nf_nat_tftp_hook, NULL);
- synchronize_rcu();
-}
-
-static int __init nf_nat_tftp_init(void)
-{
- BUG_ON(nf_nat_tftp_hook != NULL);
- rcu_assign_pointer(nf_nat_tftp_hook, help);
- return 0;
-}
-
-module_init(nf_nat_tftp_init);
-module_exit(nf_nat_tftp_fini);
diff --git a/net/ipv4/netfilter/nf_tables_arp.c b/net/ipv4/netfilter/nf_tables_arp.c
new file mode 100644
index 00000000000..19412a4063f
--- /dev/null
+++ b/net/ipv4/netfilter/nf_tables_arp.c
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2008-2010 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2013 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/netfilter_arp.h>
+#include <net/netfilter/nf_tables.h>
+
+static unsigned int
+nft_do_chain_arp(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ struct nft_pktinfo pkt;
+
+ nft_set_pktinfo(&pkt, ops, skb, in, out);
+
+ return nft_do_chain(&pkt, ops);
+}
+
+static struct nft_af_info nft_af_arp __read_mostly = {
+ .family = NFPROTO_ARP,
+ .nhooks = NF_ARP_NUMHOOKS,
+ .owner = THIS_MODULE,
+ .nops = 1,
+ .hooks = {
+ [NF_ARP_IN] = nft_do_chain_arp,
+ [NF_ARP_OUT] = nft_do_chain_arp,
+ [NF_ARP_FORWARD] = nft_do_chain_arp,
+ },
+};
+
+static int nf_tables_arp_init_net(struct net *net)
+{
+ net->nft.arp = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL);
+ if (net->nft.arp== NULL)
+ return -ENOMEM;
+
+ memcpy(net->nft.arp, &nft_af_arp, sizeof(nft_af_arp));
+
+ if (nft_register_afinfo(net, net->nft.arp) < 0)
+ goto err;
+
+ return 0;
+err:
+ kfree(net->nft.arp);
+ return -ENOMEM;
+}
+
+static void nf_tables_arp_exit_net(struct net *net)
+{
+ nft_unregister_afinfo(net->nft.arp);
+ kfree(net->nft.arp);
+}
+
+static struct pernet_operations nf_tables_arp_net_ops = {
+ .init = nf_tables_arp_init_net,
+ .exit = nf_tables_arp_exit_net,
+};
+
+static const struct nf_chain_type filter_arp = {
+ .name = "filter",
+ .type = NFT_CHAIN_T_DEFAULT,
+ .family = NFPROTO_ARP,
+ .owner = THIS_MODULE,
+ .hook_mask = (1 << NF_ARP_IN) |
+ (1 << NF_ARP_OUT) |
+ (1 << NF_ARP_FORWARD),
+};
+
+static int __init nf_tables_arp_init(void)
+{
+ int ret;
+
+ nft_register_chain_type(&filter_arp);
+ ret = register_pernet_subsys(&nf_tables_arp_net_ops);
+ if (ret < 0)
+ nft_unregister_chain_type(&filter_arp);
+
+ return ret;
+}
+
+static void __exit nf_tables_arp_exit(void)
+{
+ unregister_pernet_subsys(&nf_tables_arp_net_ops);
+ nft_unregister_chain_type(&filter_arp);
+}
+
+module_init(nf_tables_arp_init);
+module_exit(nf_tables_arp_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_FAMILY(3); /* NFPROTO_ARP */
diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c
new file mode 100644
index 00000000000..6820c8c4084
--- /dev/null
+++ b/net/ipv4/netfilter/nf_tables_ipv4.c
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2012-2013 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/ip.h>
+#include <linux/netfilter_ipv4.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/net_namespace.h>
+#include <net/ip.h>
+#include <net/netfilter/nf_tables_ipv4.h>
+
+static unsigned int nft_do_chain_ipv4(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ struct nft_pktinfo pkt;
+
+ nft_set_pktinfo_ipv4(&pkt, ops, skb, in, out);
+
+ return nft_do_chain(&pkt, ops);
+}
+
+static unsigned int nft_ipv4_output(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ if (unlikely(skb->len < sizeof(struct iphdr) ||
+ ip_hdr(skb)->ihl < sizeof(struct iphdr) / 4)) {
+ if (net_ratelimit())
+ pr_info("nf_tables_ipv4: ignoring short SOCK_RAW "
+ "packet\n");
+ return NF_ACCEPT;
+ }
+
+ return nft_do_chain_ipv4(ops, skb, in, out, okfn);
+}
+
+struct nft_af_info nft_af_ipv4 __read_mostly = {
+ .family = NFPROTO_IPV4,
+ .nhooks = NF_INET_NUMHOOKS,
+ .owner = THIS_MODULE,
+ .nops = 1,
+ .hooks = {
+ [NF_INET_LOCAL_IN] = nft_do_chain_ipv4,
+ [NF_INET_LOCAL_OUT] = nft_ipv4_output,
+ [NF_INET_FORWARD] = nft_do_chain_ipv4,
+ [NF_INET_PRE_ROUTING] = nft_do_chain_ipv4,
+ [NF_INET_POST_ROUTING] = nft_do_chain_ipv4,
+ },
+};
+EXPORT_SYMBOL_GPL(nft_af_ipv4);
+
+static int nf_tables_ipv4_init_net(struct net *net)
+{
+ net->nft.ipv4 = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL);
+ if (net->nft.ipv4 == NULL)
+ return -ENOMEM;
+
+ memcpy(net->nft.ipv4, &nft_af_ipv4, sizeof(nft_af_ipv4));
+
+ if (nft_register_afinfo(net, net->nft.ipv4) < 0)
+ goto err;
+
+ return 0;
+err:
+ kfree(net->nft.ipv4);
+ return -ENOMEM;
+}
+
+static void nf_tables_ipv4_exit_net(struct net *net)
+{
+ nft_unregister_afinfo(net->nft.ipv4);
+ kfree(net->nft.ipv4);
+}
+
+static struct pernet_operations nf_tables_ipv4_net_ops = {
+ .init = nf_tables_ipv4_init_net,
+ .exit = nf_tables_ipv4_exit_net,
+};
+
+static const struct nf_chain_type filter_ipv4 = {
+ .name = "filter",
+ .type = NFT_CHAIN_T_DEFAULT,
+ .family = NFPROTO_IPV4,
+ .owner = THIS_MODULE,
+ .hook_mask = (1 << NF_INET_LOCAL_IN) |
+ (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_FORWARD) |
+ (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_POST_ROUTING),
+};
+
+static int __init nf_tables_ipv4_init(void)
+{
+ int ret;
+
+ nft_register_chain_type(&filter_ipv4);
+ ret = register_pernet_subsys(&nf_tables_ipv4_net_ops);
+ if (ret < 0)
+ nft_unregister_chain_type(&filter_ipv4);
+
+ return ret;
+}
+
+static void __exit nf_tables_ipv4_exit(void)
+{
+ unregister_pernet_subsys(&nf_tables_ipv4_net_ops);
+ nft_unregister_chain_type(&filter_ipv4);
+}
+
+module_init(nf_tables_ipv4_init);
+module_exit(nf_tables_ipv4_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_FAMILY(AF_INET);
diff --git a/net/ipv4/netfilter/nft_chain_nat_ipv4.c b/net/ipv4/netfilter/nft_chain_nat_ipv4.c
new file mode 100644
index 00000000000..3964157d826
--- /dev/null
+++ b/net/ipv4/netfilter/nft_chain_nat_ipv4.c
@@ -0,0 +1,199 @@
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2012 Pablo Neira Ayuso <pablo@netfilter.org>
+ * Copyright (c) 2012 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_ipv4.h>
+#include <net/netfilter/nf_nat_l3proto.h>
+#include <net/ip.h>
+
+/*
+ * NAT chains
+ */
+
+static unsigned int nf_nat_fn(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+ struct nf_conn_nat *nat;
+ enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum);
+ struct nft_pktinfo pkt;
+ unsigned int ret;
+
+ if (ct == NULL || nf_ct_is_untracked(ct))
+ return NF_ACCEPT;
+
+ NF_CT_ASSERT(!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)));
+
+ nat = nf_ct_nat_ext_add(ct);
+ if (nat == NULL)
+ return NF_ACCEPT;
+
+ switch (ctinfo) {
+ case IP_CT_RELATED:
+ case IP_CT_RELATED + IP_CT_IS_REPLY:
+ if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
+ if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
+ ops->hooknum))
+ return NF_DROP;
+ else
+ return NF_ACCEPT;
+ }
+ /* Fall through */
+ case IP_CT_NEW:
+ if (nf_nat_initialized(ct, maniptype))
+ break;
+
+ nft_set_pktinfo_ipv4(&pkt, ops, skb, in, out);
+
+ ret = nft_do_chain(&pkt, ops);
+ if (ret != NF_ACCEPT)
+ return ret;
+ if (!nf_nat_initialized(ct, maniptype)) {
+ ret = nf_nat_alloc_null_binding(ct, ops->hooknum);
+ if (ret != NF_ACCEPT)
+ return ret;
+ }
+ default:
+ break;
+ }
+
+ return nf_nat_packet(ct, ctinfo, ops->hooknum, skb);
+}
+
+static unsigned int nf_nat_prerouting(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ __be32 daddr = ip_hdr(skb)->daddr;
+ unsigned int ret;
+
+ ret = nf_nat_fn(ops, skb, in, out, okfn);
+ if (ret != NF_DROP && ret != NF_STOLEN &&
+ ip_hdr(skb)->daddr != daddr) {
+ skb_dst_drop(skb);
+ }
+ return ret;
+}
+
+static unsigned int nf_nat_postrouting(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ enum ip_conntrack_info ctinfo __maybe_unused;
+ const struct nf_conn *ct __maybe_unused;
+ unsigned int ret;
+
+ ret = nf_nat_fn(ops, skb, in, out, okfn);
+#ifdef CONFIG_XFRM
+ if (ret != NF_DROP && ret != NF_STOLEN &&
+ (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+ if (ct->tuplehash[dir].tuple.src.u3.ip !=
+ ct->tuplehash[!dir].tuple.dst.u3.ip ||
+ ct->tuplehash[dir].tuple.src.u.all !=
+ ct->tuplehash[!dir].tuple.dst.u.all)
+ return nf_xfrm_me_harder(skb, AF_INET) == 0 ?
+ ret : NF_DROP;
+ }
+#endif
+ return ret;
+}
+
+static unsigned int nf_nat_output(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ enum ip_conntrack_info ctinfo;
+ const struct nf_conn *ct;
+ unsigned int ret;
+
+ ret = nf_nat_fn(ops, skb, in, out, okfn);
+ if (ret != NF_DROP && ret != NF_STOLEN &&
+ (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+ if (ct->tuplehash[dir].tuple.dst.u3.ip !=
+ ct->tuplehash[!dir].tuple.src.u3.ip) {
+ if (ip_route_me_harder(skb, RTN_UNSPEC))
+ ret = NF_DROP;
+ }
+#ifdef CONFIG_XFRM
+ else if (ct->tuplehash[dir].tuple.dst.u.all !=
+ ct->tuplehash[!dir].tuple.src.u.all)
+ if (nf_xfrm_me_harder(skb, AF_INET))
+ ret = NF_DROP;
+#endif
+ }
+ return ret;
+}
+
+static const struct nf_chain_type nft_chain_nat_ipv4 = {
+ .name = "nat",
+ .type = NFT_CHAIN_T_NAT,
+ .family = NFPROTO_IPV4,
+ .owner = THIS_MODULE,
+ .hook_mask = (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_POST_ROUTING) |
+ (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_LOCAL_IN),
+ .hooks = {
+ [NF_INET_PRE_ROUTING] = nf_nat_prerouting,
+ [NF_INET_POST_ROUTING] = nf_nat_postrouting,
+ [NF_INET_LOCAL_OUT] = nf_nat_output,
+ [NF_INET_LOCAL_IN] = nf_nat_fn,
+ },
+};
+
+static int __init nft_chain_nat_init(void)
+{
+ int err;
+
+ err = nft_register_chain_type(&nft_chain_nat_ipv4);
+ if (err < 0)
+ return err;
+
+ return 0;
+}
+
+static void __exit nft_chain_nat_exit(void)
+{
+ nft_unregister_chain_type(&nft_chain_nat_ipv4);
+}
+
+module_init(nft_chain_nat_init);
+module_exit(nft_chain_nat_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_CHAIN(AF_INET, "nat");
diff --git a/net/ipv4/netfilter/nft_chain_route_ipv4.c b/net/ipv4/netfilter/nft_chain_route_ipv4.c
new file mode 100644
index 00000000000..125b66766c0
--- /dev/null
+++ b/net/ipv4/netfilter/nft_chain_route_ipv4.c
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2012 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_ipv4.h>
+#include <net/route.h>
+#include <net/ip.h>
+
+static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ unsigned int ret;
+ struct nft_pktinfo pkt;
+ u32 mark;
+ __be32 saddr, daddr;
+ u_int8_t tos;
+ const struct iphdr *iph;
+
+ /* root is playing with raw sockets. */
+ if (skb->len < sizeof(struct iphdr) ||
+ ip_hdrlen(skb) < sizeof(struct iphdr))
+ return NF_ACCEPT;
+
+ nft_set_pktinfo_ipv4(&pkt, ops, skb, in, out);
+
+ mark = skb->mark;
+ iph = ip_hdr(skb);
+ saddr = iph->saddr;
+ daddr = iph->daddr;
+ tos = iph->tos;
+
+ ret = nft_do_chain(&pkt, ops);
+ if (ret != NF_DROP && ret != NF_QUEUE) {
+ iph = ip_hdr(skb);
+
+ if (iph->saddr != saddr ||
+ iph->daddr != daddr ||
+ skb->mark != mark ||
+ iph->tos != tos)
+ if (ip_route_me_harder(skb, RTN_UNSPEC))
+ ret = NF_DROP;
+ }
+ return ret;
+}
+
+static const struct nf_chain_type nft_chain_route_ipv4 = {
+ .name = "route",
+ .type = NFT_CHAIN_T_ROUTE,
+ .family = NFPROTO_IPV4,
+ .owner = THIS_MODULE,
+ .hook_mask = (1 << NF_INET_LOCAL_OUT),
+ .hooks = {
+ [NF_INET_LOCAL_OUT] = nf_route_table_hook,
+ },
+};
+
+static int __init nft_chain_route_init(void)
+{
+ return nft_register_chain_type(&nft_chain_route_ipv4);
+}
+
+static void __exit nft_chain_route_exit(void)
+{
+ nft_unregister_chain_type(&nft_chain_route_ipv4);
+}
+
+module_init(nft_chain_route_init);
+module_exit(nft_chain_route_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_CHAIN(AF_INET, "route");
diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c
new file mode 100644
index 00000000000..e79718a382f
--- /dev/null
+++ b/net/ipv4/netfilter/nft_reject_ipv4.c
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2013 Eric Leblond <eric@regit.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/icmp.h>
+#include <net/netfilter/ipv4/nf_reject.h>
+#include <net/netfilter/nft_reject.h>
+
+void nft_reject_ipv4_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_reject *priv = nft_expr_priv(expr);
+
+ switch (priv->type) {
+ case NFT_REJECT_ICMP_UNREACH:
+ nf_send_unreach(pkt->skb, priv->icmp_code);
+ break;
+ case NFT_REJECT_TCP_RST:
+ nf_send_reset(pkt->skb, pkt->ops->hooknum);
+ break;
+ }
+
+ data[NFT_REG_VERDICT].verdict = NF_DROP;
+}
+EXPORT_SYMBOL_GPL(nft_reject_ipv4_eval);
+
+static struct nft_expr_type nft_reject_ipv4_type;
+static const struct nft_expr_ops nft_reject_ipv4_ops = {
+ .type = &nft_reject_ipv4_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_reject)),
+ .eval = nft_reject_ipv4_eval,
+ .init = nft_reject_init,
+ .dump = nft_reject_dump,
+};
+
+static struct nft_expr_type nft_reject_ipv4_type __read_mostly = {
+ .family = NFPROTO_IPV4,
+ .name = "reject",
+ .ops = &nft_reject_ipv4_ops,
+ .policy = nft_reject_policy,
+ .maxattr = NFTA_REJECT_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_reject_ipv4_module_init(void)
+{
+ return nft_register_expr(&nft_reject_ipv4_type);
+}
+
+static void __exit nft_reject_ipv4_module_exit(void)
+{
+ nft_unregister_expr(&nft_reject_ipv4_type);
+}
+
+module_init(nft_reject_ipv4_module_init);
+module_exit(nft_reject_ipv4_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "reject");