aboutsummaryrefslogtreecommitdiff
path: root/net/ipv4/netfilter
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/netfilter')
-rw-r--r--net/ipv4/netfilter/Kconfig581
-rw-r--r--net/ipv4/netfilter/Makefile88
-rw-r--r--net/ipv4/netfilter/arp_tables.c1734
-rw-r--r--net/ipv4/netfilter/arpt_mangle.c76
-rw-r--r--net/ipv4/netfilter/arptable_filter.c223
-rw-r--r--net/ipv4/netfilter/ip_conntrack_amanda.c181
-rw-r--r--net/ipv4/netfilter/ip_conntrack_core.c1546
-rw-r--r--net/ipv4/netfilter/ip_conntrack_ftp.c510
-rw-r--r--net/ipv4/netfilter/ip_conntrack_helper_pptp.c802
-rw-r--r--net/ipv4/netfilter/ip_conntrack_irc.c312
-rw-r--r--net/ipv4/netfilter/ip_conntrack_netbios_ns.c142
-rw-r--r--net/ipv4/netfilter/ip_conntrack_netlink.c1631
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_generic.c75
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_gre.c333
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_icmp.c327
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_sctp.c657
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_tcp.c1176
-rw-r--r--net/ipv4/netfilter/ip_conntrack_proto_udp.c153
-rw-r--r--net/ipv4/netfilter/ip_conntrack_standalone.c1011
-rw-r--r--net/ipv4/netfilter/ip_conntrack_tftp.c158
-rw-r--r--net/ipv4/netfilter/ip_nat_amanda.c86
-rw-r--r--net/ipv4/netfilter/ip_nat_core.c644
-rw-r--r--net/ipv4/netfilter/ip_nat_ftp.c181
-rw-r--r--net/ipv4/netfilter/ip_nat_helper.c431
-rw-r--r--net/ipv4/netfilter/ip_nat_helper_pptp.c414
-rw-r--r--net/ipv4/netfilter/ip_nat_irc.c123
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_gre.c176
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_icmp.c89
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_tcp.c151
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_udp.c137
-rw-r--r--net/ipv4/netfilter/ip_nat_proto_unknown.c55
-rw-r--r--net/ipv4/netfilter/ip_nat_rule.c341
-rw-r--r--net/ipv4/netfilter/ip_nat_standalone.c447
-rw-r--r--net/ipv4/netfilter/ip_nat_tftp.c71
-rw-r--r--net/ipv4/netfilter/ip_queue.c735
-rw-r--r--net/ipv4/netfilter/ip_tables.c2234
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c610
-rw-r--r--net/ipv4/netfilter/ipt_DSCP.c105
-rw-r--r--net/ipv4/netfilter/ipt_ECN.c181
-rw-r--r--net/ipv4/netfilter/ipt_LOG.c495
-rw-r--r--net/ipv4/netfilter/ipt_MASQUERADE.c171
-rw-r--r--net/ipv4/netfilter/ipt_NETMAP.c119
-rw-r--r--net/ipv4/netfilter/ipt_REDIRECT.c133
-rw-r--r--net/ipv4/netfilter/ipt_REJECT.c339
-rw-r--r--net/ipv4/netfilter/ipt_SAME.c211
-rw-r--r--net/ipv4/netfilter/ipt_SYNPROXY.c482
-rw-r--r--net/ipv4/netfilter/ipt_TCPMSS.c263
-rw-r--r--net/ipv4/netfilter/ipt_TOS.c104
-rw-r--r--net/ipv4/netfilter/ipt_TTL.c119
-rw-r--r--net/ipv4/netfilter/ipt_ULOG.c405
-rw-r--r--net/ipv4/netfilter/ipt_addrtype.c77
-rw-r--r--net/ipv4/netfilter/ipt_ah.c102
-rw-r--r--net/ipv4/netfilter/ipt_dscp.c63
-rw-r--r--net/ipv4/netfilter/ipt_ecn.c132
-rw-r--r--net/ipv4/netfilter/ipt_esp.c120
-rw-r--r--net/ipv4/netfilter/ipt_hashlimit.c731
-rw-r--r--net/ipv4/netfilter/ipt_iprange.c99
-rw-r--r--net/ipv4/netfilter/ipt_multiport.c214
-rw-r--r--net/ipv4/netfilter/ipt_owner.c101
-rw-r--r--net/ipv4/netfilter/ipt_policy.c173
-rw-r--r--net/ipv4/netfilter/ipt_recent.c1005
-rw-r--r--net/ipv4/netfilter/ipt_rpfilter.c144
-rw-r--r--net/ipv4/netfilter/ipt_tos.c65
-rw-r--r--net/ipv4/netfilter/ipt_ttl.c79
-rw-r--r--net/ipv4/netfilter/iptable_filter.c200
-rw-r--r--net/ipv4/netfilter/iptable_mangle.c293
-rw-r--r--net/ipv4/netfilter/iptable_nat.c328
-rw-r--r--net/ipv4/netfilter/iptable_raw.c183
-rw-r--r--net/ipv4/netfilter/iptable_security.c111
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c716
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c464
-rw-r--r--net/ipv4/netfilter/nf_conntrack_proto_icmp.c434
-rw-r--r--net/ipv4/netfilter/nf_defrag_ipv4.c131
-rw-r--r--net/ipv4/netfilter/nf_nat_h323.c631
-rw-r--r--net/ipv4/netfilter/nf_nat_l3proto_ipv4.c281
-rw-r--r--net/ipv4/netfilter/nf_nat_pptp.c311
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_gre.c149
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_icmp.c83
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic.c (renamed from net/ipv4/netfilter/ip_nat_snmp_basic.c)849
-rw-r--r--net/ipv4/netfilter/nf_tables_arp.c104
-rw-r--r--net/ipv4/netfilter/nf_tables_ipv4.c129
-rw-r--r--net/ipv4/netfilter/nft_chain_nat_ipv4.c199
-rw-r--r--net/ipv4/netfilter/nft_chain_route_ipv4.c90
-rw-r--r--net/ipv4/netfilter/nft_reject_ipv4.c75
84 files changed, 8769 insertions, 21865 deletions
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index db783036e4d..a26ce035e3f 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -5,9 +5,15 @@
menu "IP: Netfilter Configuration"
depends on INET && NETFILTER
+config NF_DEFRAG_IPV4
+ tristate
+ default n
+
config NF_CONNTRACK_IPV4
- tristate "IPv4 support for new connection tracking (EXPERIMENTAL)"
- depends on EXPERIMENTAL && NF_CONNTRACK
+ tristate "IPv4 connection tracking support (required for NAT)"
+ depends on NF_CONNTRACK
+ default m if NETFILTER_ADVANCED=n
+ select NF_DEFRAG_IPV4
---help---
Connection tracking keeps a record of what packets have passed
through your machine, in order to figure out how they are related
@@ -19,170 +25,57 @@ config NF_CONNTRACK_IPV4
To compile it as a module, choose M here. If unsure, say N.
-# connection tracking, helpers and protocols
-config IP_NF_CONNTRACK
- tristate "Connection tracking (required for masq/NAT)"
- ---help---
- Connection tracking keeps a record of what packets have passed
- through your machine, in order to figure out how they are related
- into connections.
-
- This is required to do Masquerading or other kinds of Network
- Address Translation (except for Fast NAT). It can also be used to
- enhance packet filtering (see `Connection state match support'
- below).
-
- To compile it as a module, choose M here. If unsure, say N.
-
-config IP_NF_CT_ACCT
- bool "Connection tracking flow accounting"
- depends on IP_NF_CONNTRACK
- help
- If this option is enabled, the connection tracking code will
- keep per-flow packet and byte counters.
-
- Those counters can be used for flow-based accounting or the
- `connbytes' match.
-
- If unsure, say `N'.
-
-config IP_NF_CONNTRACK_MARK
- bool 'Connection mark tracking support'
- depends on IP_NF_CONNTRACK
- help
- This option enables support for connection marks, used by the
- `CONNMARK' target and `connmark' match. Similar to the mark value
- of packets, but this mark value is kept in the conntrack session
- instead of the individual packets.
-
-config IP_NF_CONNTRACK_EVENTS
- bool "Connection tracking events (EXPERIMENTAL)"
- depends on EXPERIMENTAL && IP_NF_CONNTRACK
- help
- If this option is enabled, the connection tracking code will
- provide a notifier chain that can be used by other kernel code
- to get notified about changes in the connection tracking state.
-
- IF unsure, say `N'.
-
-config IP_NF_CONNTRACK_NETLINK
- tristate 'Connection tracking netlink interface (EXPERIMENTAL)'
- depends on EXPERIMENTAL && IP_NF_CONNTRACK && NETFILTER_NETLINK
- depends on IP_NF_CONNTRACK!=y || NETFILTER_NETLINK!=m
- help
- This option enables support for a netlink-based userspace interface
-
-
-config IP_NF_CT_PROTO_SCTP
- tristate 'SCTP protocol connection tracking support (EXPERIMENTAL)'
- depends on IP_NF_CONNTRACK && EXPERIMENTAL
+config NF_CONNTRACK_PROC_COMPAT
+ bool "proc/sysctl compatibility with old connection tracking"
+ depends on NF_CONNTRACK_PROCFS && NF_CONNTRACK_IPV4
+ default y
help
- With this option enabled, the connection tracking code will
- be able to do state tracking on SCTP connections.
+ This option enables /proc and sysctl compatibility with the old
+ layer 3 dependent connection tracking. This is needed to keep
+ old programs that have not been adapted to the new names working.
- If you want to compile it as a module, say M here and read
- <file:Documentation/modules.txt>. If unsure, say `N'.
+ If unsure, say Y.
-config IP_NF_FTP
- tristate "FTP protocol support"
- depends on IP_NF_CONNTRACK
+config NF_TABLES_IPV4
+ depends on NF_TABLES
+ tristate "IPv4 nf_tables support"
help
- Tracking FTP connections is problematic: special helpers are
- required for tracking them, and doing masquerading and other forms
- of Network Address Translation on them.
+ This option enables the IPv4 support for nf_tables.
- To compile it as a module, choose M here. If unsure, say Y.
-
-config IP_NF_IRC
- tristate "IRC protocol support"
- depends on IP_NF_CONNTRACK
- ---help---
- There is a commonly-used extension to IRC called
- Direct Client-to-Client Protocol (DCC). This enables users to send
- files to each other, and also chat to each other without the need
- of a server. DCC Sending is used anywhere you send files over IRC,
- and DCC Chat is most commonly used by Eggdrop bots. If you are
- using NAT, this extension will enable you to send files and initiate
- chats. Note that you do NOT need this extension to get files or
- have others initiate chats, or everything else in IRC.
-
- To compile it as a module, choose M here. If unsure, say Y.
-
-config IP_NF_NETBIOS_NS
- tristate "NetBIOS name service protocol support (EXPERIMENTAL)"
- depends on IP_NF_CONNTRACK && EXPERIMENTAL
+config NFT_CHAIN_ROUTE_IPV4
+ depends on NF_TABLES_IPV4
+ tristate "IPv4 nf_tables route chain support"
help
- NetBIOS name service requests are sent as broadcast messages from an
- unprivileged port and responded to with unicast messages to the
- same port. This make them hard to firewall properly because connection
- tracking doesn't deal with broadcasts. This helper tracks locally
- originating NetBIOS name service requests and the corresponding
- responses. It relies on correct IP address configuration, specifically
- netmask and broadcast address. When properly configured, the output
- of "ip address show" should look similar to this:
-
- $ ip -4 address show eth0
- 4: eth0: <BROADCAST,MULTICAST,UP> mtu 1500 qdisc pfifo_fast qlen 1000
- inet 172.16.2.252/24 brd 172.16.2.255 scope global eth0
-
- To compile it as a module, choose M here. If unsure, say N.
+ This option enables the "route" chain for IPv4 in nf_tables. This
+ chain type is used to force packet re-routing after mangling header
+ fields such as the source, destination, type of service and
+ the packet mark.
-config IP_NF_TFTP
- tristate "TFTP protocol support"
- depends on IP_NF_CONNTRACK
+config NFT_CHAIN_NAT_IPV4
+ depends on NF_TABLES_IPV4
+ depends on NF_NAT_IPV4 && NFT_NAT
+ tristate "IPv4 nf_tables nat chain support"
help
- TFTP connection tracking helper, this is required depending
- on how restrictive your ruleset is.
- If you are using a tftp client behind -j SNAT or -j MASQUERADING
- you will need this.
-
- To compile it as a module, choose M here. If unsure, say Y.
+ This option enables the "nat" chain for IPv4 in nf_tables. This
+ chain type is used to perform Network Address Translation (NAT)
+ packet transformations such as the source, destination address and
+ source and destination ports.
-config IP_NF_AMANDA
- tristate "Amanda backup protocol support"
- depends on IP_NF_CONNTRACK
- help
- If you are running the Amanda backup package <http://www.amanda.org/>
- on this machine or machines that will be MASQUERADED through this
- machine, then you may want to enable this feature. This allows the
- connection tracking and natting code to allow the sub-channels that
- Amanda requires for communication of the backup data, messages and
- index.
-
- To compile it as a module, choose M here. If unsure, say Y.
-
-config IP_NF_PPTP
- tristate 'PPTP protocol support'
- depends on IP_NF_CONNTRACK
- help
- This module adds support for PPTP (Point to Point Tunnelling
- Protocol, RFC2637) connection tracking and NAT.
-
- If you are running PPTP sessions over a stateful firewall or NAT
- box, you may want to enable this feature.
-
- Please note that not all PPTP modes of operation are supported yet.
- For more info, read top of the file
- net/ipv4/netfilter/ip_conntrack_pptp.c
-
- If you want to compile it as a module, say M here and read
- Documentation/modules.txt. If unsure, say `N'.
+config NFT_REJECT_IPV4
+ depends on NF_TABLES_IPV4
+ default NFT_REJECT
+ tristate
-config IP_NF_QUEUE
- tristate "IP Userspace queueing via NETLINK (OBSOLETE)"
+config NF_TABLES_ARP
+ depends on NF_TABLES
+ tristate "ARP nf_tables support"
help
- Netfilter has the ability to queue packets to user space: the
- netlink device can be used to access them using this driver.
-
- This option enables the old IPv4-only "ip_queue" implementation
- which has been obsoleted by the new "nfnetlink_queue" code (see
- CONFIG_NETFILTER_NETLINK_QUEUE).
-
- To compile it as a module, choose M here. If unsure, say N.
+ This option enables the ARP support for nf_tables.
config IP_NF_IPTABLES
tristate "IP tables support (required for filtering/masq/NAT)"
- depends on NETFILTER_XTABLES
+ default m if NETFILTER_ADVANCED=n
+ select NETFILTER_XTABLES
help
iptables is a general, extensible packet identification framework.
The packet filtering and full NAT (masquerading, port forwarding,
@@ -191,132 +84,50 @@ config IP_NF_IPTABLES
To compile it as a module, choose M here. If unsure, say N.
-# The matches.
-config IP_NF_MATCH_IPRANGE
- tristate "IP range match support"
- depends on IP_NF_IPTABLES
- help
- This option makes possible to match IP addresses against IP address
- ranges.
+if IP_NF_IPTABLES
- To compile it as a module, choose M here. If unsure, say N.
-
-config IP_NF_MATCH_MULTIPORT
- tristate "Multiple port match support"
- depends on IP_NF_IPTABLES
- help
- Multiport matching allows you to match TCP or UDP packets based on
- a series of source or destination ports: normally a rule can only
- match a single range of ports.
-
- To compile it as a module, choose M here. If unsure, say N.
-
-config IP_NF_MATCH_TOS
- tristate "TOS match support"
- depends on IP_NF_IPTABLES
- help
- TOS matching allows you to match packets based on the Type Of
- Service fields of the IP packet.
-
- To compile it as a module, choose M here. If unsure, say N.
-
-config IP_NF_MATCH_RECENT
- tristate "recent match support"
- depends on IP_NF_IPTABLES
+# The matches.
+config IP_NF_MATCH_AH
+ tristate '"ah" match support'
+ depends on NETFILTER_ADVANCED
help
- This match is used for creating one or many lists of recently
- used addresses and then matching against that/those list(s).
-
- Short options are available by using 'iptables -m recent -h'
- Official Website: <http://snowman.net/projects/ipt_recent/>
+ This match extension allows you to match a range of SPIs
+ inside AH header of IPSec packets.
To compile it as a module, choose M here. If unsure, say N.
config IP_NF_MATCH_ECN
- tristate "ECN match support"
- depends on IP_NF_IPTABLES
- help
- This option adds a `ECN' match, which allows you to match against
- the IPv4 and TCP header ECN fields.
-
- To compile it as a module, choose M here. If unsure, say N.
-
-config IP_NF_MATCH_DSCP
- tristate "DSCP match support"
- depends on IP_NF_IPTABLES
- help
- This option adds a `DSCP' match, which allows you to match against
- the IPv4 header DSCP field (DSCP codepoint).
-
- The DSCP codepoint can have any value between 0x0 and 0x4f.
-
- To compile it as a module, choose M here. If unsure, say N.
+ tristate '"ecn" match support'
+ depends on NETFILTER_ADVANCED
+ select NETFILTER_XT_MATCH_ECN
+ ---help---
+ This is a backwards-compat option for the user's convenience
+ (e.g. when running oldconfig). It selects
+ CONFIG_NETFILTER_XT_MATCH_ECN.
-config IP_NF_MATCH_AH_ESP
- tristate "AH/ESP match support"
- depends on IP_NF_IPTABLES
- help
- These two match extensions (`ah' and `esp') allow you to match a
- range of SPIs inside AH or ESP headers of IPSec packets.
+config IP_NF_MATCH_RPFILTER
+ tristate '"rpfilter" reverse path filter match support'
+ depends on NETFILTER_ADVANCED && (IP_NF_MANGLE || IP_NF_RAW)
+ ---help---
+ This option allows you to match packets whose replies would
+ go out via the interface the packet came in.
To compile it as a module, choose M here. If unsure, say N.
+ The module will be called ipt_rpfilter.
config IP_NF_MATCH_TTL
- tristate "TTL match support"
- depends on IP_NF_IPTABLES
- help
- This adds CONFIG_IP_NF_MATCH_TTL option, which enabled the user
- to match packets by their TTL value.
-
- To compile it as a module, choose M here. If unsure, say N.
-
-config IP_NF_MATCH_OWNER
- tristate "Owner match support"
- depends on IP_NF_IPTABLES
- help
- Packet owner matching allows you to match locally-generated packets
- based on who created them: the user, group, process or session.
-
- To compile it as a module, choose M here. If unsure, say N.
-
-config IP_NF_MATCH_ADDRTYPE
- tristate 'address type match support'
- depends on IP_NF_IPTABLES
- help
- This option allows you to match what routing thinks of an address,
- eg. UNICAST, LOCAL, BROADCAST, ...
-
- If you want to compile it as a module, say M here and read
- <file:Documentation/modules.txt>. If unsure, say `N'.
-
-config IP_NF_MATCH_HASHLIMIT
- tristate 'hashlimit match support'
- depends on IP_NF_IPTABLES
- help
- This option adds a new iptables `hashlimit' match.
-
- As opposed to `limit', this match dynamically crates a hash table
- of limit buckets, based on your selection of source/destination
- ip addresses and/or ports.
-
- It enables you to express policies like `10kpps for any given
- destination IP' or `500pps from any given source IP' with a single
- IPtables rule.
-
-config IP_NF_MATCH_POLICY
- tristate "IPsec policy match support"
- depends on IP_NF_IPTABLES && XFRM
- help
- Policy matching allows you to match packets based on the
- IPsec policy that was used during decapsulation/will
- be used during encapsulation.
-
- To compile it as a module, choose M here. If unsure, say N.
+ tristate '"ttl" match support'
+ depends on NETFILTER_ADVANCED
+ select NETFILTER_XT_MATCH_HL
+ ---help---
+ This is a backwards-compat option for the user's convenience
+ (e.g. when running oldconfig). It selects
+ CONFIG_NETFILTER_XT_MATCH_HL.
# `filter', generic and specific targets
config IP_NF_FILTER
tristate "Packet filtering"
- depends on IP_NF_IPTABLES
+ default m if NETFILTER_ADVANCED=n
help
Packet filtering defines a table `filter', which has a series of
rules for simple packet filtering at local input, forwarding and
@@ -327,6 +138,7 @@ config IP_NF_FILTER
config IP_NF_TARGET_REJECT
tristate "REJECT target support"
depends on IP_NF_FILTER
+ default m if NETFILTER_ADVANCED=n
help
The REJECT target allows a filtering rule to specify that an ICMP
error should be issued in response to an incoming packet, rather
@@ -334,18 +146,22 @@ config IP_NF_TARGET_REJECT
To compile it as a module, choose M here. If unsure, say N.
-config IP_NF_TARGET_LOG
- tristate "LOG target support"
- depends on IP_NF_IPTABLES
+config IP_NF_TARGET_SYNPROXY
+ tristate "SYNPROXY target support"
+ depends on NF_CONNTRACK && NETFILTER_ADVANCED
+ select NETFILTER_SYNPROXY
+ select SYN_COOKIES
help
- This option adds a `LOG' target, which allows you to create rules in
- any iptables table which records the packet header to the syslog.
+ The SYNPROXY target allows you to intercept TCP connections and
+ establish them using syncookies before they are passed on to the
+ server. This allows to avoid conntrack and server resource usage
+ during SYN-flood attacks.
- To compile it as a module, choose M here. If unsure, say N.
+ To compile it as a module, choose M here. If unsure, say N.
config IP_NF_TARGET_ULOG
- tristate "ULOG target support (OBSOLETE)"
- depends on IP_NF_IPTABLES
+ tristate "ULOG target support (obsolete)"
+ default m if NETFILTER_ADVANCED=n
---help---
This option enables the old IPv4-only "ipt_ULOG" implementation
@@ -357,56 +173,29 @@ config IP_NF_TARGET_ULOG
daemon using netlink multicast sockets; unlike the LOG target
which can only be viewed through syslog.
- The apropriate userspace logging daemon (ulogd) may be obtained from
- <http://www.gnumonks.org/projects/ulogd/>
-
- To compile it as a module, choose M here. If unsure, say N.
-
-config IP_NF_TARGET_TCPMSS
- tristate "TCPMSS target support"
- depends on IP_NF_IPTABLES
- ---help---
- This option adds a `TCPMSS' target, which allows you to alter the
- MSS value of TCP SYN packets, to control the maximum size for that
- connection (usually limiting it to your outgoing interface's MTU
- minus 40).
-
- This is used to overcome criminally braindead ISPs or servers which
- block ICMP Fragmentation Needed packets. The symptoms of this
- problem are that everything works fine from your Linux
- firewall/router, but machines behind it can never exchange large
- packets:
- 1) Web browsers connect, then hang with no data received.
- 2) Small mail works fine, but large emails hang.
- 3) ssh works fine, but scp hangs after initial handshaking.
-
- Workaround: activate this option and add a rule to your firewall
- configuration like:
-
- iptables -A FORWARD -p tcp --tcp-flags SYN,RST SYN \
- -j TCPMSS --clamp-mss-to-pmtu
+ The appropriate userspace logging daemon (ulogd) may be obtained from
+ <http://www.netfilter.org/projects/ulogd/index.html>
To compile it as a module, choose M here. If unsure, say N.
-# NAT + specific targets
-config IP_NF_NAT
- tristate "Full NAT"
- depends on IP_NF_IPTABLES && IP_NF_CONNTRACK
+# NAT + specific targets: nf_conntrack
+config NF_NAT_IPV4
+ tristate "IPv4 NAT"
+ depends on NF_CONNTRACK_IPV4
+ default m if NETFILTER_ADVANCED=n
+ select NF_NAT
help
- The Full NAT option allows masquerading, port forwarding and other
+ The IPv4 NAT option allows masquerading, port forwarding and other
forms of full Network Address Port Translation. It is controlled by
the `nat' table in iptables: see the man page for iptables(8).
To compile it as a module, choose M here. If unsure, say N.
-config IP_NF_NAT_NEEDED
- bool
- depends on IP_NF_NAT != n
- default y
+if NF_NAT_IPV4
config IP_NF_TARGET_MASQUERADE
tristate "MASQUERADE target support"
- depends on IP_NF_NAT
+ default m if NETFILTER_ADVANCED=n
help
Masquerading is a special case of NAT: all outgoing connections are
changed to seem to come from a particular interface's address, and
@@ -416,40 +205,31 @@ config IP_NF_TARGET_MASQUERADE
To compile it as a module, choose M here. If unsure, say N.
-config IP_NF_TARGET_REDIRECT
- tristate "REDIRECT target support"
- depends on IP_NF_NAT
- help
- REDIRECT is a special case of NAT: all incoming connections are
- mapped onto the incoming interface's address, causing the packets to
- come to the local machine instead of passing through. This is
- useful for transparent proxies.
-
- To compile it as a module, choose M here. If unsure, say N.
-
config IP_NF_TARGET_NETMAP
tristate "NETMAP target support"
- depends on IP_NF_NAT
- help
- NETMAP is an implementation of static 1:1 NAT mapping of network
- addresses. It maps the network address part, while keeping the host
- address part intact. It is similar to Fast NAT, except that
- Netfilter's connection tracking doesn't work well with Fast NAT.
-
- To compile it as a module, choose M here. If unsure, say N.
+ depends on NETFILTER_ADVANCED
+ select NETFILTER_XT_TARGET_NETMAP
+ ---help---
+ This is a backwards-compat option for the user's convenience
+ (e.g. when running oldconfig). It selects
+ CONFIG_NETFILTER_XT_TARGET_NETMAP.
-config IP_NF_TARGET_SAME
- tristate "SAME target support"
- depends on IP_NF_NAT
- help
- This option adds a `SAME' target, which works like the standard SNAT
- target, but attempts to give clients the same IP for all connections.
+config IP_NF_TARGET_REDIRECT
+ tristate "REDIRECT target support"
+ depends on NETFILTER_ADVANCED
+ select NETFILTER_XT_TARGET_REDIRECT
+ ---help---
+ This is a backwards-compat option for the user's convenience
+ (e.g. when running oldconfig). It selects
+ CONFIG_NETFILTER_XT_TARGET_REDIRECT.
- To compile it as a module, choose M here. If unsure, say N.
+endif
-config IP_NF_NAT_SNMP_BASIC
- tristate "Basic SNMP-ALG support (EXPERIMENTAL)"
- depends on EXPERIMENTAL && IP_NF_NAT
+config NF_NAT_SNMP_BASIC
+ tristate "Basic SNMP-ALG support"
+ depends on NF_CONNTRACK_SNMP && NF_NAT_IPV4
+ depends on NETFILTER_ADVANCED
+ default NF_NAT && NF_CONNTRACK_SNMP
---help---
This module implements an Application Layer Gateway (ALG) for
@@ -462,42 +242,33 @@ config IP_NF_NAT_SNMP_BASIC
To compile it as a module, choose M here. If unsure, say N.
-config IP_NF_NAT_IRC
- tristate
- depends on IP_NF_IPTABLES!=n && IP_NF_CONNTRACK!=n && IP_NF_NAT!=n
- default IP_NF_NAT if IP_NF_IRC=y
- default m if IP_NF_IRC=m
-
-# If they want FTP, set to $CONFIG_IP_NF_NAT (m or y),
-# or $CONFIG_IP_NF_FTP (m or y), whichever is weaker. Argh.
-config IP_NF_NAT_FTP
- tristate
- depends on IP_NF_IPTABLES!=n && IP_NF_CONNTRACK!=n && IP_NF_NAT!=n
- default IP_NF_NAT if IP_NF_FTP=y
- default m if IP_NF_FTP=m
+# If they want FTP, set to $CONFIG_IP_NF_NAT (m or y),
+# or $CONFIG_IP_NF_FTP (m or y), whichever is weaker.
+# From kconfig-language.txt:
+#
+# <expr> '&&' <expr> (6)
+#
+# (6) Returns the result of min(/expr/, /expr/).
-config IP_NF_NAT_TFTP
+config NF_NAT_PROTO_GRE
tristate
- depends on IP_NF_IPTABLES!=n && IP_NF_CONNTRACK!=n && IP_NF_NAT!=n
- default IP_NF_NAT if IP_NF_TFTP=y
- default m if IP_NF_TFTP=m
+ depends on NF_NAT_IPV4 && NF_CT_PROTO_GRE
-config IP_NF_NAT_AMANDA
+config NF_NAT_PPTP
tristate
- depends on IP_NF_IPTABLES!=n && IP_NF_CONNTRACK!=n && IP_NF_NAT!=n
- default IP_NF_NAT if IP_NF_AMANDA=y
- default m if IP_NF_AMANDA=m
+ depends on NF_CONNTRACK && NF_NAT_IPV4
+ default NF_NAT_IPV4 && NF_CONNTRACK_PPTP
+ select NF_NAT_PROTO_GRE
-config IP_NF_NAT_PPTP
+config NF_NAT_H323
tristate
- depends on IP_NF_NAT!=n && IP_NF_PPTP!=n
- default IP_NF_NAT if IP_NF_PPTP=y
- default m if IP_NF_PPTP=m
+ depends on NF_CONNTRACK && NF_NAT_IPV4
+ default NF_NAT_IPV4 && NF_CONNTRACK_H323
# mangle + specific targets
config IP_NF_MANGLE
tristate "Packet mangling"
- depends on IP_NF_IPTABLES
+ default m if NETFILTER_ADVANCED=n
help
This option adds a `mangle' table to iptables: see the man page for
iptables(8). This table is used for various packet alterations
@@ -505,19 +276,23 @@ config IP_NF_MANGLE
To compile it as a module, choose M here. If unsure, say N.
-config IP_NF_TARGET_TOS
- tristate "TOS target support"
+config IP_NF_TARGET_CLUSTERIP
+ tristate "CLUSTERIP target support"
depends on IP_NF_MANGLE
+ depends on NF_CONNTRACK_IPV4
+ depends on NETFILTER_ADVANCED
+ select NF_CONNTRACK_MARK
help
- This option adds a `TOS' target, which allows you to create rules in
- the `mangle' table which alter the Type Of Service field of an IP
- packet prior to routing.
-
+ The CLUSTERIP target allows you to build load-balancing clusters of
+ network servers without having a dedicated load-balancing
+ router/server/switch.
+
To compile it as a module, choose M here. If unsure, say N.
config IP_NF_TARGET_ECN
tristate "ECN target support"
depends on IP_NF_MANGLE
+ depends on NETFILTER_ADVANCED
---help---
This option adds a `ECN' target, which can be used in the iptables mangle
table.
@@ -529,58 +304,44 @@ config IP_NF_TARGET_ECN
To compile it as a module, choose M here. If unsure, say N.
-config IP_NF_TARGET_DSCP
- tristate "DSCP target support"
- depends on IP_NF_MANGLE
- help
- This option adds a `DSCP' match, which allows you to match against
- the IPv4 header DSCP field (DSCP codepoint).
-
- The DSCP codepoint can have any value between 0x0 and 0x4f.
-
- To compile it as a module, choose M here. If unsure, say N.
-
config IP_NF_TARGET_TTL
- tristate 'TTL target support'
- depends on IP_NF_MANGLE
- help
- This option adds a `TTL' target, which enables the user to modify
- the TTL value of the IP header.
-
- While it is safe to decrement/lower the TTL, this target also enables
- functionality to increment and set the TTL value of the IP header to
- arbitrary values. This is EXTREMELY DANGEROUS since you can easily
- create immortal packets that loop forever on the network.
-
- To compile it as a module, choose M here. If unsure, say N.
-
-config IP_NF_TARGET_CLUSTERIP
- tristate "CLUSTERIP target support (EXPERIMENTAL)"
- depends on IP_NF_MANGLE && EXPERIMENTAL
- depends on (IP_NF_CONNTRACK && IP_NF_CONNTRACK_MARK) || (NF_CONNTRACK_MARK && NF_CONNTRACK_IPV4)
- help
- The CLUSTERIP target allows you to build load-balancing clusters of
- network servers without having a dedicated load-balancing
- router/server/switch.
-
- To compile it as a module, choose M here. If unsure, say N.
+ tristate '"TTL" target support'
+ depends on NETFILTER_ADVANCED && IP_NF_MANGLE
+ select NETFILTER_XT_TARGET_HL
+ ---help---
+ This is a backwards-compatible option for the user's convenience
+ (e.g. when running oldconfig). It selects
+ CONFIG_NETFILTER_XT_TARGET_HL.
# raw + specific targets
config IP_NF_RAW
tristate 'raw table support (required for NOTRACK/TRACE)'
- depends on IP_NF_IPTABLES
help
This option adds a `raw' table to iptables. This table is the very
first in the netfilter framework and hooks in at the PREROUTING
and OUTPUT chains.
If you want to compile it as a module, say M here and read
- <file:Documentation/modules.txt>. If unsure, say `N'.
+ <file:Documentation/kbuild/modules.txt>. If unsure, say `N'.
+
+# security table for MAC policy
+config IP_NF_SECURITY
+ tristate "Security table"
+ depends on SECURITY
+ depends on NETFILTER_ADVANCED
+ help
+ This option adds a `security' table to iptables, for use
+ with Mandatory Access Control (MAC) policy.
+
+ If unsure, say N.
+
+endif # IP_NF_IPTABLES
# ARP tables
config IP_NF_ARPTABLES
tristate "ARP tables support"
- depends on NETFILTER_XTABLES
+ select NETFILTER_XTABLES
+ depends on NETFILTER_ADVANCED
help
arptables is a general, extensible packet identification framework.
The ARP packet filtering and mangling (manipulation)subsystems
@@ -588,9 +349,10 @@ config IP_NF_ARPTABLES
To compile it as a module, choose M here. If unsure, say N.
+if IP_NF_ARPTABLES
+
config IP_NF_ARPFILTER
tristate "ARP packet filtering"
- depends on IP_NF_ARPTABLES
help
ARP packet filtering defines a table `filter', which has a series of
rules for simple ARP packet filtering at local input and
@@ -601,10 +363,11 @@ config IP_NF_ARPFILTER
config IP_NF_ARP_MANGLE
tristate "ARP payload mangling"
- depends on IP_NF_ARPTABLES
help
Allows altering the ARP packet payload: source and destination
hardware and network addresses.
+endif # IP_NF_ARPTABLES
+
endmenu
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index e5c5b3202f0..90b82405331 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -2,39 +2,36 @@
# Makefile for the netfilter modules on top of IPv4.
#
-# objects for the standalone - connection tracking / NAT
-ip_conntrack-objs := ip_conntrack_standalone.o ip_conntrack_core.o ip_conntrack_proto_generic.o ip_conntrack_proto_tcp.o ip_conntrack_proto_udp.o ip_conntrack_proto_icmp.o
-ip_nat-objs := ip_nat_core.o ip_nat_helper.o ip_nat_proto_unknown.o ip_nat_proto_tcp.o ip_nat_proto_udp.o ip_nat_proto_icmp.o
-iptable_nat-objs := ip_nat_rule.o ip_nat_standalone.o
-
-ip_conntrack_pptp-objs := ip_conntrack_helper_pptp.o ip_conntrack_proto_gre.o
-ip_nat_pptp-objs := ip_nat_helper_pptp.o ip_nat_proto_gre.o
+# objects for l3 independent conntrack
+nf_conntrack_ipv4-y := nf_conntrack_l3proto_ipv4.o nf_conntrack_proto_icmp.o
+ifeq ($(CONFIG_NF_CONNTRACK_PROC_COMPAT),y)
+ifeq ($(CONFIG_PROC_FS),y)
+nf_conntrack_ipv4-objs += nf_conntrack_l3proto_ipv4_compat.o
+endif
+endif
# connection tracking
-obj-$(CONFIG_IP_NF_CONNTRACK) += ip_conntrack.o
-obj-$(CONFIG_IP_NF_NAT) += ip_nat.o
+obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o
-# conntrack netlink interface
-obj-$(CONFIG_IP_NF_CONNTRACK_NETLINK) += ip_conntrack_netlink.o
+nf_nat_ipv4-y := nf_nat_l3proto_ipv4.o nf_nat_proto_icmp.o
+obj-$(CONFIG_NF_NAT_IPV4) += nf_nat_ipv4.o
+# defrag
+obj-$(CONFIG_NF_DEFRAG_IPV4) += nf_defrag_ipv4.o
-# SCTP protocol connection tracking
-obj-$(CONFIG_IP_NF_CT_PROTO_SCTP) += ip_conntrack_proto_sctp.o
+# NAT helpers (nf_conntrack)
+obj-$(CONFIG_NF_NAT_H323) += nf_nat_h323.o
+obj-$(CONFIG_NF_NAT_PPTP) += nf_nat_pptp.o
+obj-$(CONFIG_NF_NAT_SNMP_BASIC) += nf_nat_snmp_basic.o
-# connection tracking helpers
-obj-$(CONFIG_IP_NF_PPTP) += ip_conntrack_pptp.o
-obj-$(CONFIG_IP_NF_AMANDA) += ip_conntrack_amanda.o
-obj-$(CONFIG_IP_NF_TFTP) += ip_conntrack_tftp.o
-obj-$(CONFIG_IP_NF_FTP) += ip_conntrack_ftp.o
-obj-$(CONFIG_IP_NF_IRC) += ip_conntrack_irc.o
-obj-$(CONFIG_IP_NF_NETBIOS_NS) += ip_conntrack_netbios_ns.o
+# NAT protocols (nf_nat)
+obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o
-# NAT helpers
-obj-$(CONFIG_IP_NF_NAT_PPTP) += ip_nat_pptp.o
-obj-$(CONFIG_IP_NF_NAT_AMANDA) += ip_nat_amanda.o
-obj-$(CONFIG_IP_NF_NAT_TFTP) += ip_nat_tftp.o
-obj-$(CONFIG_IP_NF_NAT_FTP) += ip_nat_ftp.o
-obj-$(CONFIG_IP_NF_NAT_IRC) += ip_nat_irc.o
+obj-$(CONFIG_NF_TABLES_IPV4) += nf_tables_ipv4.o
+obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o
+obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o
+obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o
+obj-$(CONFIG_NF_TABLES_ARP) += nf_tables_arp.o
# generic IP tables
obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o
@@ -42,38 +39,21 @@ obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o
# the three instances of ip_tables
obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o
obj-$(CONFIG_IP_NF_MANGLE) += iptable_mangle.o
-obj-$(CONFIG_IP_NF_NAT) += iptable_nat.o
+obj-$(CONFIG_NF_NAT_IPV4) += iptable_nat.o
obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o
+obj-$(CONFIG_IP_NF_SECURITY) += iptable_security.o
# matches
-obj-$(CONFIG_IP_NF_MATCH_HASHLIMIT) += ipt_hashlimit.o
-obj-$(CONFIG_IP_NF_MATCH_IPRANGE) += ipt_iprange.o
-obj-$(CONFIG_IP_NF_MATCH_MULTIPORT) += ipt_multiport.o
-obj-$(CONFIG_IP_NF_MATCH_OWNER) += ipt_owner.o
-obj-$(CONFIG_IP_NF_MATCH_TOS) += ipt_tos.o
-obj-$(CONFIG_IP_NF_MATCH_RECENT) += ipt_recent.o
-obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o
-obj-$(CONFIG_IP_NF_MATCH_DSCP) += ipt_dscp.o
-obj-$(CONFIG_IP_NF_MATCH_AH_ESP) += ipt_ah.o ipt_esp.o
-obj-$(CONFIG_IP_NF_MATCH_TTL) += ipt_ttl.o
-obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o
-obj-$(CONFIG_IP_NF_MATCH_POLICY) += ipt_policy.o
+obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o
+obj-$(CONFIG_IP_NF_MATCH_RPFILTER) += ipt_rpfilter.o
# targets
-obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o
-obj-$(CONFIG_IP_NF_TARGET_TOS) += ipt_TOS.o
+obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o
obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o
-obj-$(CONFIG_IP_NF_TARGET_DSCP) += ipt_DSCP.o
obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o
-obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o
-obj-$(CONFIG_IP_NF_TARGET_NETMAP) += ipt_NETMAP.o
-obj-$(CONFIG_IP_NF_TARGET_SAME) += ipt_SAME.o
-obj-$(CONFIG_IP_NF_NAT_SNMP_BASIC) += ip_nat_snmp_basic.o
-obj-$(CONFIG_IP_NF_TARGET_LOG) += ipt_LOG.o
+obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o
+obj-$(CONFIG_IP_NF_TARGET_SYNPROXY) += ipt_SYNPROXY.o
obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o
-obj-$(CONFIG_IP_NF_TARGET_TCPMSS) += ipt_TCPMSS.o
-obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o
-obj-$(CONFIG_IP_NF_TARGET_TTL) += ipt_TTL.o
# generic ARP tables
obj-$(CONFIG_IP_NF_ARPTABLES) += arp_tables.o
@@ -81,11 +61,3 @@ obj-$(CONFIG_IP_NF_ARP_MANGLE) += arpt_mangle.o
# just filtering instance of ARP tables for now
obj-$(CONFIG_IP_NF_ARPFILTER) += arptable_filter.o
-
-obj-$(CONFIG_IP_NF_QUEUE) += ip_queue.o
-
-# objects for l3 independent conntrack
-nf_conntrack_ipv4-objs := nf_conntrack_l3proto_ipv4.o nf_conntrack_proto_icmp.o
-
-# l3 independent conntrack
-obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index afe3d8f8177..f95b6f93814 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -6,10 +6,10 @@
* Some ARP specific bits are:
*
* Copyright (C) 2002 David S. Miller (davem@redhat.com)
+ * Copyright (C) 2006-2009 Patrick McHardy <kaber@trash.net>
*
*/
-
-#include <linux/config.h>
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/kernel.h>
#include <linux/skbuff.h>
#include <linux/netdevice.h>
@@ -20,12 +20,15 @@
#include <linux/proc_fs.h>
#include <linux/module.h>
#include <linux/init.h>
-
+#include <linux/mutex.h>
+#include <linux/err.h>
+#include <net/compat.h>
+#include <net/sock.h>
#include <asm/uaccess.h>
-#include <asm/semaphore.h>
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter_arp/arp_tables.h>
+#include "../../netfilter/xt_repldata.h"
MODULE_LICENSE("GPL");
MODULE_AUTHOR("David S. Miller <davem@redhat.com>");
@@ -47,20 +50,19 @@ MODULE_DESCRIPTION("arptables core");
#endif
#ifdef CONFIG_NETFILTER_DEBUG
-#define ARP_NF_ASSERT(x) \
-do { \
- if (!(x)) \
- printk("ARP_NF_ASSERT: %s:%s:%u\n", \
- __FUNCTION__, __FILE__, __LINE__); \
-} while(0)
+#define ARP_NF_ASSERT(x) WARN_ON(!(x))
#else
#define ARP_NF_ASSERT(x)
#endif
-#include <linux/netfilter_ipv4/listhelp.h>
+void *arpt_alloc_initial_table(const struct xt_table *info)
+{
+ return xt_alloc_initial_table(arpt, ARPT);
+}
+EXPORT_SYMBOL_GPL(arpt_alloc_initial_table);
static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap,
- char *hdr_addr, int len)
+ const char *hdr_addr, int len)
{
int i, ret;
@@ -71,7 +73,29 @@ static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap,
for (i = 0; i < len; i++)
ret |= (hdr_addr[i] ^ ap->addr[i]) & ap->mask[i];
- return (ret != 0);
+ return ret != 0;
+}
+
+/*
+ * Unfortunately, _b and _mask are not aligned to an int (or long int)
+ * Some arches dont care, unrolling the loop is a win on them.
+ * For other arches, we only have a 16bit alignement.
+ */
+static unsigned long ifname_compare(const char *_a, const char *_b, const char *_mask)
+{
+#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+ unsigned long ret = ifname_compare_aligned(_a, _b, _mask);
+#else
+ unsigned long ret = 0;
+ const u16 *a = (const u16 *)_a;
+ const u16 *b = (const u16 *)_b;
+ const u16 *mask = (const u16 *)_mask;
+ int i;
+
+ for (i = 0; i < IFNAMSIZ/sizeof(u16); i++)
+ ret |= (a[i] ^ b[i]) & mask[i];
+#endif
+ return ret;
}
/* Returns whether packet matches rule or not. */
@@ -81,12 +105,12 @@ static inline int arp_packet_match(const struct arphdr *arphdr,
const char *outdev,
const struct arpt_arp *arpinfo)
{
- char *arpptr = (char *)(arphdr + 1);
- char *src_devaddr, *tgt_devaddr;
- u32 src_ipaddr, tgt_ipaddr;
- int i, ret;
+ const char *arpptr = (char *)(arphdr + 1);
+ const char *src_devaddr, *tgt_devaddr;
+ __be32 src_ipaddr, tgt_ipaddr;
+ long ret;
-#define FWINV(bool,invflg) ((bool) ^ !!(arpinfo->invflags & invflg))
+#define FWINV(bool, invflg) ((bool) ^ !!(arpinfo->invflags & (invflg)))
if (FWINV((arphdr->ar_op & arpinfo->arpop_mask) != arpinfo->arpop,
ARPT_INV_ARPOP)) {
@@ -143,24 +167,21 @@ static inline int arp_packet_match(const struct arphdr *arphdr,
ARPT_INV_TGTIP)) {
dprintf("Source or target IP address mismatch.\n");
- dprintf("SRC: %u.%u.%u.%u. Mask: %u.%u.%u.%u. Target: %u.%u.%u.%u.%s\n",
- NIPQUAD(src_ipaddr),
- NIPQUAD(arpinfo->smsk.s_addr),
- NIPQUAD(arpinfo->src.s_addr),
+ dprintf("SRC: %pI4. Mask: %pI4. Target: %pI4.%s\n",
+ &src_ipaddr,
+ &arpinfo->smsk.s_addr,
+ &arpinfo->src.s_addr,
arpinfo->invflags & ARPT_INV_SRCIP ? " (INV)" : "");
- dprintf("TGT: %u.%u.%u.%u Mask: %u.%u.%u.%u Target: %u.%u.%u.%u.%s\n",
- NIPQUAD(tgt_ipaddr),
- NIPQUAD(arpinfo->tmsk.s_addr),
- NIPQUAD(arpinfo->tgt.s_addr),
+ dprintf("TGT: %pI4 Mask: %pI4 Target: %pI4.%s\n",
+ &tgt_ipaddr,
+ &arpinfo->tmsk.s_addr,
+ &arpinfo->tgt.s_addr,
arpinfo->invflags & ARPT_INV_TGTIP ? " (INV)" : "");
return 0;
}
/* Look for ifname matches. */
- for (i = 0, ret = 0; i < IFNAMSIZ; i++) {
- ret |= (indev[i] ^ arpinfo->iniface[i])
- & arpinfo->iniface_mask[i];
- }
+ ret = ifname_compare(indev, arpinfo->iniface, arpinfo->iniface_mask);
if (FWINV(ret != 0, ARPT_INV_VIA_IN)) {
dprintf("VIA in mismatch (%s vs %s).%s\n",
@@ -169,14 +190,7 @@ static inline int arp_packet_match(const struct arphdr *arphdr,
return 0;
}
- for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
- unsigned long odev;
- memcpy(&odev, outdev + i*sizeof(unsigned long),
- sizeof(unsigned long));
- ret |= (odev
- ^ ((const unsigned long *)arpinfo->outiface)[i])
- & ((const unsigned long *)arpinfo->outiface_mask)[i];
- }
+ ret = ifname_compare(outdev, arpinfo->outiface, arpinfo->outiface_mask);
if (FWINV(ret != 0, ARPT_INV_VIA_OUT)) {
dprintf("VIA out mismatch (%s vs %s).%s\n",
@@ -186,6 +200,7 @@ static inline int arp_packet_match(const struct arphdr *arphdr,
}
return 1;
+#undef FWINV
}
static inline int arp_checkentry(const struct arpt_arp *arp)
@@ -204,142 +219,153 @@ static inline int arp_checkentry(const struct arpt_arp *arp)
return 1;
}
-static unsigned int arpt_error(struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- unsigned int hooknum,
- const void *targinfo,
- void *userinfo)
+static unsigned int
+arpt_error(struct sk_buff *skb, const struct xt_action_param *par)
{
- if (net_ratelimit())
- printk("arp_tables: error: '%s'\n", (char *)targinfo);
+ net_err_ratelimited("arp_tables: error: '%s'\n",
+ (const char *)par->targinfo);
return NF_DROP;
}
-static inline struct arpt_entry *get_entry(void *base, unsigned int offset)
+static inline const struct xt_entry_target *
+arpt_get_target_c(const struct arpt_entry *e)
+{
+ return arpt_get_target((struct arpt_entry *)e);
+}
+
+static inline struct arpt_entry *
+get_entry(const void *base, unsigned int offset)
{
return (struct arpt_entry *)(base + offset);
}
-unsigned int arpt_do_table(struct sk_buff **pskb,
+static inline __pure
+struct arpt_entry *arpt_next_entry(const struct arpt_entry *entry)
+{
+ return (void *)entry + entry->next_offset;
+}
+
+unsigned int arpt_do_table(struct sk_buff *skb,
unsigned int hook,
const struct net_device *in,
const struct net_device *out,
- struct arpt_table *table,
- void *userdata)
+ struct xt_table *table)
{
- static const char nulldevname[IFNAMSIZ];
+ static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
unsigned int verdict = NF_DROP;
- struct arphdr *arp;
- int hotdrop = 0;
+ const struct arphdr *arp;
struct arpt_entry *e, *back;
const char *indev, *outdev;
void *table_base;
- struct xt_table_info *private = table->private;
+ const struct xt_table_info *private;
+ struct xt_action_param acpar;
+ unsigned int addend;
- /* ARP header, plus 2 device addresses, plus 2 IP addresses. */
- if (!pskb_may_pull((*pskb), (sizeof(struct arphdr) +
- (2 * (*pskb)->dev->addr_len) +
- (2 * sizeof(u32)))))
+ if (!pskb_may_pull(skb, arp_hdr_len(skb->dev)))
return NF_DROP;
indev = in ? in->name : nulldevname;
outdev = out ? out->name : nulldevname;
- read_lock_bh(&table->lock);
- table_base = (void *)private->entries[smp_processor_id()];
+ local_bh_disable();
+ addend = xt_write_recseq_begin();
+ private = table->private;
+ /*
+ * Ensure we load private-> members after we've fetched the base
+ * pointer.
+ */
+ smp_read_barrier_depends();
+ table_base = private->entries[smp_processor_id()];
+
e = get_entry(table_base, private->hook_entry[hook]);
back = get_entry(table_base, private->underflow[hook]);
- arp = (*pskb)->nh.arph;
+ acpar.in = in;
+ acpar.out = out;
+ acpar.hooknum = hook;
+ acpar.family = NFPROTO_ARP;
+ acpar.hotdrop = false;
+
+ arp = arp_hdr(skb);
do {
- if (arp_packet_match(arp, (*pskb)->dev, indev, outdev, &e->arp)) {
- struct arpt_entry_target *t;
- int hdr_len;
-
- hdr_len = sizeof(*arp) + (2 * sizeof(struct in_addr)) +
- (2 * (*pskb)->dev->addr_len);
- ADD_COUNTER(e->counters, hdr_len, 1);
-
- t = arpt_get_target(e);
-
- /* Standard target? */
- if (!t->u.kernel.target->target) {
- int v;
-
- v = ((struct arpt_standard_target *)t)->verdict;
- if (v < 0) {
- /* Pop from stack? */
- if (v != ARPT_RETURN) {
- verdict = (unsigned)(-v) - 1;
- break;
- }
- e = back;
- back = get_entry(table_base,
- back->comefrom);
- continue;
- }
- if (table_base + v
- != (void *)e + e->next_offset) {
- /* Save old back ptr in next entry */
- struct arpt_entry *next
- = (void *)e + e->next_offset;
- next->comefrom =
- (void *)back - table_base;
-
- /* set back pointer to next entry */
- back = next;
- }
+ const struct xt_entry_target *t;
- e = get_entry(table_base, v);
- } else {
- /* Targets which reenter must return
- * abs. verdicts
- */
- verdict = t->u.kernel.target->target(pskb,
- in, out,
- hook,
- t->data,
- userdata);
-
- /* Target might have changed stuff. */
- arp = (*pskb)->nh.arph;
-
- if (verdict == ARPT_CONTINUE)
- e = (void *)e + e->next_offset;
- else
- /* Verdict */
+ if (!arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) {
+ e = arpt_next_entry(e);
+ continue;
+ }
+
+ ADD_COUNTER(e->counters, arp_hdr_len(skb->dev), 1);
+
+ t = arpt_get_target_c(e);
+
+ /* Standard target? */
+ if (!t->u.kernel.target->target) {
+ int v;
+
+ v = ((struct xt_standard_target *)t)->verdict;
+ if (v < 0) {
+ /* Pop from stack? */
+ if (v != XT_RETURN) {
+ verdict = (unsigned int)(-v) - 1;
break;
+ }
+ e = back;
+ back = get_entry(table_base, back->comefrom);
+ continue;
}
- } else {
- e = (void *)e + e->next_offset;
+ if (table_base + v
+ != arpt_next_entry(e)) {
+ /* Save old back ptr in next entry */
+ struct arpt_entry *next = arpt_next_entry(e);
+ next->comefrom = (void *)back - table_base;
+
+ /* set back pointer to next entry */
+ back = next;
+ }
+
+ e = get_entry(table_base, v);
+ continue;
}
- } while (!hotdrop);
- read_unlock_bh(&table->lock);
- if (hotdrop)
+ /* Targets which reenter must return
+ * abs. verdicts
+ */
+ acpar.target = t->u.kernel.target;
+ acpar.targinfo = t->data;
+ verdict = t->u.kernel.target->target(skb, &acpar);
+
+ /* Target might have changed stuff. */
+ arp = arp_hdr(skb);
+
+ if (verdict == XT_CONTINUE)
+ e = arpt_next_entry(e);
+ else
+ /* Verdict */
+ break;
+ } while (!acpar.hotdrop);
+ xt_write_recseq_end(addend);
+ local_bh_enable();
+
+ if (acpar.hotdrop)
return NF_DROP;
else
return verdict;
}
/* All zeroes == unconditional rule. */
-static inline int unconditional(const struct arpt_arp *arp)
+static inline bool unconditional(const struct arpt_arp *arp)
{
- unsigned int i;
+ static const struct arpt_arp uncond;
- for (i = 0; i < sizeof(*arp)/sizeof(__u32); i++)
- if (((__u32 *)arp)[i])
- return 0;
-
- return 1;
+ return memcmp(arp, &uncond, sizeof(uncond)) == 0;
}
/* Figures out from what hook each rule can be called: returns 0 if
* there are loops. Puts hook bitmask in comefrom.
*/
-static int mark_source_chains(struct xt_table_info *newinfo,
+static int mark_source_chains(const struct xt_table_info *newinfo,
unsigned int valid_hooks, void *entry0)
{
unsigned int hook;
@@ -359,11 +385,12 @@ static int mark_source_chains(struct xt_table_info *newinfo,
e->counters.pcnt = pos;
for (;;) {
- struct arpt_standard_target *t
- = (void *)arpt_get_target(e);
+ const struct xt_standard_target *t
+ = (void *)arpt_get_target_c(e);
+ int visited = e->comefrom & (1 << hook);
if (e->comefrom & (1 << NF_ARP_NUMHOOKS)) {
- printk("arptables: loop hook %u pos %u %08X.\n",
+ pr_notice("arptables: loop hook %u pos %u %08X.\n",
hook, pos, e->comefrom);
return 0;
}
@@ -371,13 +398,22 @@ static int mark_source_chains(struct xt_table_info *newinfo,
|= ((1 << hook) | (1 << NF_ARP_NUMHOOKS));
/* Unconditional return/END. */
- if (e->target_offset == sizeof(struct arpt_entry)
- && (strcmp(t->target.u.user.name,
- ARPT_STANDARD_TARGET) == 0)
- && t->verdict < 0
- && unconditional(&e->arp)) {
+ if ((e->target_offset == sizeof(struct arpt_entry) &&
+ (strcmp(t->target.u.user.name,
+ XT_STANDARD_TARGET) == 0) &&
+ t->verdict < 0 && unconditional(&e->arp)) ||
+ visited) {
unsigned int oldpos, size;
+ if ((strcmp(t->target.u.user.name,
+ XT_STANDARD_TARGET) == 0) &&
+ t->verdict < -NF_MAX_VERDICT - 1) {
+ duprintf("mark_source_chains: bad "
+ "negative verdict (%i)\n",
+ t->verdict);
+ return 0;
+ }
+
/* Return: backtrack through the last
* big jump.
*/
@@ -405,8 +441,16 @@ static int mark_source_chains(struct xt_table_info *newinfo,
int newpos = t->verdict;
if (strcmp(t->target.u.user.name,
- ARPT_STANDARD_TARGET) == 0
- && newpos >= 0) {
+ XT_STANDARD_TARGET) == 0 &&
+ newpos >= 0) {
+ if (newpos > newinfo->size -
+ sizeof(struct arpt_entry)) {
+ duprintf("mark_source_chains: "
+ "bad verdict (%i)\n",
+ newpos);
+ return 0;
+ }
+
/* This a jump; chase it. */
duprintf("Jump rule %u -> %u\n",
pos, newpos);
@@ -426,102 +470,111 @@ static int mark_source_chains(struct xt_table_info *newinfo,
return 1;
}
-static inline int standard_check(const struct arpt_entry_target *t,
- unsigned int max_offset)
+static inline int check_entry(const struct arpt_entry *e, const char *name)
{
- struct arpt_standard_target *targ = (void *)t;
-
- /* Check standard info. */
- if (t->u.target_size
- != ARPT_ALIGN(sizeof(struct arpt_standard_target))) {
- duprintf("arpt_standard_check: target size %u != %Zu\n",
- t->u.target_size,
- ARPT_ALIGN(sizeof(struct arpt_standard_target)));
- return 0;
- }
+ const struct xt_entry_target *t;
- if (targ->verdict >= 0
- && targ->verdict > max_offset - sizeof(struct arpt_entry)) {
- duprintf("arpt_standard_check: bad verdict (%i)\n",
- targ->verdict);
- return 0;
+ if (!arp_checkentry(&e->arp)) {
+ duprintf("arp_tables: arp check failed %p %s.\n", e, name);
+ return -EINVAL;
}
- if (targ->verdict < -NF_MAX_VERDICT - 1) {
- duprintf("arpt_standard_check: bad negative verdict (%i)\n",
- targ->verdict);
- return 0;
- }
- return 1;
+ if (e->target_offset + sizeof(struct xt_entry_target) > e->next_offset)
+ return -EINVAL;
+
+ t = arpt_get_target_c(e);
+ if (e->target_offset + t->u.target_size > e->next_offset)
+ return -EINVAL;
+
+ return 0;
}
-static struct arpt_target arpt_standard_target;
+static inline int check_target(struct arpt_entry *e, const char *name)
+{
+ struct xt_entry_target *t = arpt_get_target(e);
+ int ret;
+ struct xt_tgchk_param par = {
+ .table = name,
+ .entryinfo = e,
+ .target = t->u.kernel.target,
+ .targinfo = t->data,
+ .hook_mask = e->comefrom,
+ .family = NFPROTO_ARP,
+ };
+
+ ret = xt_check_target(&par, t->u.target_size - sizeof(*t), 0, false);
+ if (ret < 0) {
+ duprintf("arp_tables: check failed for `%s'.\n",
+ t->u.kernel.target->name);
+ return ret;
+ }
+ return 0;
+}
-static inline int check_entry(struct arpt_entry *e, const char *name, unsigned int size,
- unsigned int *i)
+static inline int
+find_check_entry(struct arpt_entry *e, const char *name, unsigned int size)
{
- struct arpt_entry_target *t;
- struct arpt_target *target;
+ struct xt_entry_target *t;
+ struct xt_target *target;
int ret;
- if (!arp_checkentry(&e->arp)) {
- duprintf("arp_tables: arp check failed %p %s.\n", e, name);
- return -EINVAL;
- }
+ ret = check_entry(e, name);
+ if (ret)
+ return ret;
t = arpt_get_target(e);
- target = try_then_request_module(xt_find_target(NF_ARP, t->u.user.name,
- t->u.user.revision),
- "arpt_%s", t->u.user.name);
- if (IS_ERR(target) || !target) {
- duprintf("check_entry: `%s' not found\n", t->u.user.name);
- ret = target ? PTR_ERR(target) : -ENOENT;
+ target = xt_request_find_target(NFPROTO_ARP, t->u.user.name,
+ t->u.user.revision);
+ if (IS_ERR(target)) {
+ duprintf("find_check_entry: `%s' not found\n", t->u.user.name);
+ ret = PTR_ERR(target);
goto out;
}
t->u.kernel.target = target;
- if (t->u.kernel.target == &arpt_standard_target) {
- if (!standard_check(t, size)) {
- ret = -EINVAL;
- goto out;
- }
- } else if (t->u.kernel.target->checkentry
- && !t->u.kernel.target->checkentry(name, e, t->data,
- t->u.target_size
- - sizeof(*t),
- e->comefrom)) {
- module_put(t->u.kernel.target->me);
- duprintf("arp_tables: check failed for `%s'.\n",
- t->u.kernel.target->name);
- ret = -EINVAL;
- goto out;
- }
-
- (*i)++;
+ ret = check_target(e, name);
+ if (ret)
+ goto err;
return 0;
-
+err:
+ module_put(t->u.kernel.target->me);
out:
return ret;
}
+static bool check_underflow(const struct arpt_entry *e)
+{
+ const struct xt_entry_target *t;
+ unsigned int verdict;
+
+ if (!unconditional(&e->arp))
+ return false;
+ t = arpt_get_target_c(e);
+ if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0)
+ return false;
+ verdict = ((struct xt_standard_target *)t)->verdict;
+ verdict = -verdict - 1;
+ return verdict == NF_DROP || verdict == NF_ACCEPT;
+}
+
static inline int check_entry_size_and_hooks(struct arpt_entry *e,
struct xt_table_info *newinfo,
- unsigned char *base,
- unsigned char *limit,
+ const unsigned char *base,
+ const unsigned char *limit,
const unsigned int *hook_entries,
const unsigned int *underflows,
- unsigned int *i)
+ unsigned int valid_hooks)
{
unsigned int h;
- if ((unsigned long)e % __alignof__(struct arpt_entry) != 0
- || (unsigned char *)e + sizeof(struct arpt_entry) >= limit) {
+ if ((unsigned long)e % __alignof__(struct arpt_entry) != 0 ||
+ (unsigned char *)e + sizeof(struct arpt_entry) >= limit) {
duprintf("Bad offset %p\n", e);
return -EINVAL;
}
if (e->next_offset
- < sizeof(struct arpt_entry) + sizeof(struct arpt_entry_target)) {
+ < sizeof(struct arpt_entry) + sizeof(struct xt_entry_target)) {
duprintf("checking: element %p size %u\n",
e, e->next_offset);
return -EINVAL;
@@ -529,55 +582,53 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e,
/* Check hooks & underflows */
for (h = 0; h < NF_ARP_NUMHOOKS; h++) {
+ if (!(valid_hooks & (1 << h)))
+ continue;
if ((unsigned char *)e - base == hook_entries[h])
newinfo->hook_entry[h] = hook_entries[h];
- if ((unsigned char *)e - base == underflows[h])
+ if ((unsigned char *)e - base == underflows[h]) {
+ if (!check_underflow(e)) {
+ pr_err("Underflows must be unconditional and "
+ "use the STANDARD target with "
+ "ACCEPT/DROP\n");
+ return -EINVAL;
+ }
newinfo->underflow[h] = underflows[h];
+ }
}
- /* FIXME: underflows must be unconditional, standard verdicts
- < 0 (not ARPT_RETURN). --RR */
-
/* Clear counters and comefrom */
e->counters = ((struct xt_counters) { 0, 0 });
e->comefrom = 0;
-
- (*i)++;
return 0;
}
-static inline int cleanup_entry(struct arpt_entry *e, unsigned int *i)
+static inline void cleanup_entry(struct arpt_entry *e)
{
- struct arpt_entry_target *t;
-
- if (i && (*i)-- == 0)
- return 1;
+ struct xt_tgdtor_param par;
+ struct xt_entry_target *t;
t = arpt_get_target(e);
- if (t->u.kernel.target->destroy)
- t->u.kernel.target->destroy(t->data,
- t->u.target_size - sizeof(*t));
- module_put(t->u.kernel.target->me);
- return 0;
+ par.target = t->u.kernel.target;
+ par.targinfo = t->data;
+ par.family = NFPROTO_ARP;
+ if (par.target->destroy != NULL)
+ par.target->destroy(&par);
+ module_put(par.target->me);
}
/* Checks and translates the user-supplied table segment (held in
* newinfo).
*/
-static int translate_table(const char *name,
- unsigned int valid_hooks,
- struct xt_table_info *newinfo,
- void *entry0,
- unsigned int size,
- unsigned int number,
- const unsigned int *hook_entries,
- const unsigned int *underflows)
+static int translate_table(struct xt_table_info *newinfo, void *entry0,
+ const struct arpt_replace *repl)
{
+ struct arpt_entry *iter;
unsigned int i;
- int ret;
+ int ret = 0;
- newinfo->size = size;
- newinfo->number = number;
+ newinfo->size = repl->size;
+ newinfo->number = repl->num_entries;
/* Init all hooks to impossible value. */
for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
@@ -589,57 +640,71 @@ static int translate_table(const char *name,
i = 0;
/* Walk through entries, checking offsets. */
- ret = ARPT_ENTRY_ITERATE(entry0, newinfo->size,
- check_entry_size_and_hooks,
- newinfo,
- entry0,
- entry0 + size,
- hook_entries, underflows, &i);
+ xt_entry_foreach(iter, entry0, newinfo->size) {
+ ret = check_entry_size_and_hooks(iter, newinfo, entry0,
+ entry0 + repl->size,
+ repl->hook_entry,
+ repl->underflow,
+ repl->valid_hooks);
+ if (ret != 0)
+ break;
+ ++i;
+ if (strcmp(arpt_get_target(iter)->u.user.name,
+ XT_ERROR_TARGET) == 0)
+ ++newinfo->stacksize;
+ }
duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret);
if (ret != 0)
return ret;
- if (i != number) {
+ if (i != repl->num_entries) {
duprintf("translate_table: %u not %u entries\n",
- i, number);
+ i, repl->num_entries);
return -EINVAL;
}
/* Check hooks all assigned */
for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
/* Only hooks which are valid */
- if (!(valid_hooks & (1 << i)))
+ if (!(repl->valid_hooks & (1 << i)))
continue;
if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
duprintf("Invalid hook entry %u %u\n",
- i, hook_entries[i]);
+ i, repl->hook_entry[i]);
return -EINVAL;
}
if (newinfo->underflow[i] == 0xFFFFFFFF) {
duprintf("Invalid underflow %u %u\n",
- i, underflows[i]);
+ i, repl->underflow[i]);
return -EINVAL;
}
}
- if (!mark_source_chains(newinfo, valid_hooks, entry0)) {
+ if (!mark_source_chains(newinfo, repl->valid_hooks, entry0)) {
duprintf("Looping hook\n");
return -ELOOP;
}
/* Finally, each sanity check must pass */
i = 0;
- ret = ARPT_ENTRY_ITERATE(entry0, newinfo->size,
- check_entry, name, size, &i);
+ xt_entry_foreach(iter, entry0, newinfo->size) {
+ ret = find_check_entry(iter, repl->name, repl->size);
+ if (ret != 0)
+ break;
+ ++i;
+ }
if (ret != 0) {
- ARPT_ENTRY_ITERATE(entry0, newinfo->size,
- cleanup_entry, &i);
+ xt_entry_foreach(iter, entry0, newinfo->size) {
+ if (i-- == 0)
+ break;
+ cleanup_entry(iter);
+ }
return ret;
}
/* And one copy for every other CPU */
- for_each_cpu(i) {
+ for_each_possible_cpu(i) {
if (newinfo->entries[i] && newinfo->entries[i] != entry0)
memcpy(newinfo->entries[i], entry0, newinfo->size);
}
@@ -647,85 +712,68 @@ static int translate_table(const char *name,
return ret;
}
-/* Gets counters. */
-static inline int add_entry_to_counter(const struct arpt_entry *e,
- struct xt_counters total[],
- unsigned int *i)
-{
- ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
-
- (*i)++;
- return 0;
-}
-
-static inline int set_entry_to_counter(const struct arpt_entry *e,
- struct xt_counters total[],
- unsigned int *i)
-{
- SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
-
- (*i)++;
- return 0;
-}
-
static void get_counters(const struct xt_table_info *t,
struct xt_counters counters[])
{
+ struct arpt_entry *iter;
unsigned int cpu;
unsigned int i;
- unsigned int curcpu;
- /* Instead of clearing (by a previous call to memset())
- * the counters and using adds, we set the counters
- * with data used by 'current' CPU
- * We dont care about preemption here.
- */
- curcpu = raw_smp_processor_id();
+ for_each_possible_cpu(cpu) {
+ seqcount_t *s = &per_cpu(xt_recseq, cpu);
- i = 0;
- ARPT_ENTRY_ITERATE(t->entries[curcpu],
- t->size,
- set_entry_to_counter,
- counters,
- &i);
-
- for_each_cpu(cpu) {
- if (cpu == curcpu)
- continue;
i = 0;
- ARPT_ENTRY_ITERATE(t->entries[cpu],
- t->size,
- add_entry_to_counter,
- counters,
- &i);
+ xt_entry_foreach(iter, t->entries[cpu], t->size) {
+ u64 bcnt, pcnt;
+ unsigned int start;
+
+ do {
+ start = read_seqcount_begin(s);
+ bcnt = iter->counters.bcnt;
+ pcnt = iter->counters.pcnt;
+ } while (read_seqcount_retry(s, start));
+
+ ADD_COUNTER(counters[i], bcnt, pcnt);
+ ++i;
+ }
}
}
-static int copy_entries_to_user(unsigned int total_size,
- struct arpt_table *table,
- void __user *userptr)
+static struct xt_counters *alloc_counters(const struct xt_table *table)
{
- unsigned int off, num, countersize;
- struct arpt_entry *e;
+ unsigned int countersize;
struct xt_counters *counters;
- struct xt_table_info *private = table->private;
- int ret = 0;
- void *loc_cpu_entry;
+ const struct xt_table_info *private = table->private;
/* We need atomic snapshot of counters: rest doesn't change
* (other than comefrom, which userspace doesn't care
* about).
*/
countersize = sizeof(struct xt_counters) * private->number;
- counters = vmalloc_node(countersize, numa_node_id());
+ counters = vzalloc(countersize);
if (counters == NULL)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
- /* First, sum counters... */
- write_lock_bh(&table->lock);
get_counters(private, counters);
- write_unlock_bh(&table->lock);
+
+ return counters;
+}
+
+static int copy_entries_to_user(unsigned int total_size,
+ const struct xt_table *table,
+ void __user *userptr)
+{
+ unsigned int off, num;
+ const struct arpt_entry *e;
+ struct xt_counters *counters;
+ struct xt_table_info *private = table->private;
+ int ret = 0;
+ void *loc_cpu_entry;
+
+ counters = alloc_counters(table);
+ if (IS_ERR(counters))
+ return PTR_ERR(counters);
loc_cpu_entry = private->entries[raw_smp_processor_id()];
/* ... then copy entire thing ... */
@@ -737,7 +785,7 @@ static int copy_entries_to_user(unsigned int total_size,
/* FIXME: use iterator macros --RR */
/* ... then go back and fix counters and names */
for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
- struct arpt_entry_target *t;
+ const struct xt_entry_target *t;
e = (struct arpt_entry *)(loc_cpu_entry + off);
if (copy_to_user(userptr + off
@@ -748,9 +796,9 @@ static int copy_entries_to_user(unsigned int total_size,
goto free_counters;
}
- t = arpt_get_target(e);
+ t = arpt_get_target_c(e);
if (copy_to_user(userptr + off + e->target_offset
- + offsetof(struct arpt_entry_target,
+ + offsetof(struct xt_entry_target,
u.user.name),
t->u.kernel.target->name,
strlen(t->u.kernel.target->name)+1) != 0) {
@@ -764,24 +812,170 @@ static int copy_entries_to_user(unsigned int total_size,
return ret;
}
-static int get_entries(const struct arpt_get_entries *entries,
- struct arpt_get_entries __user *uptr)
+#ifdef CONFIG_COMPAT
+static void compat_standard_from_user(void *dst, const void *src)
+{
+ int v = *(compat_int_t *)src;
+
+ if (v > 0)
+ v += xt_compat_calc_jump(NFPROTO_ARP, v);
+ memcpy(dst, &v, sizeof(v));
+}
+
+static int compat_standard_to_user(void __user *dst, const void *src)
+{
+ compat_int_t cv = *(int *)src;
+
+ if (cv > 0)
+ cv -= xt_compat_calc_jump(NFPROTO_ARP, cv);
+ return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0;
+}
+
+static int compat_calc_entry(const struct arpt_entry *e,
+ const struct xt_table_info *info,
+ const void *base, struct xt_table_info *newinfo)
{
+ const struct xt_entry_target *t;
+ unsigned int entry_offset;
+ int off, i, ret;
+
+ off = sizeof(struct arpt_entry) - sizeof(struct compat_arpt_entry);
+ entry_offset = (void *)e - base;
+
+ t = arpt_get_target_c(e);
+ off += xt_compat_target_offset(t->u.kernel.target);
+ newinfo->size -= off;
+ ret = xt_compat_add_offset(NFPROTO_ARP, entry_offset, off);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
+ if (info->hook_entry[i] &&
+ (e < (struct arpt_entry *)(base + info->hook_entry[i])))
+ newinfo->hook_entry[i] -= off;
+ if (info->underflow[i] &&
+ (e < (struct arpt_entry *)(base + info->underflow[i])))
+ newinfo->underflow[i] -= off;
+ }
+ return 0;
+}
+
+static int compat_table_info(const struct xt_table_info *info,
+ struct xt_table_info *newinfo)
+{
+ struct arpt_entry *iter;
+ void *loc_cpu_entry;
int ret;
- struct arpt_table *t;
- t = xt_find_table_lock(NF_ARP, entries->name);
- if (t || !IS_ERR(t)) {
- struct xt_table_info *private = t->private;
+ if (!newinfo || !info)
+ return -EINVAL;
+
+ /* we dont care about newinfo->entries[] */
+ memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
+ newinfo->initial_entries = 0;
+ loc_cpu_entry = info->entries[raw_smp_processor_id()];
+ xt_compat_init_offsets(NFPROTO_ARP, info->number);
+ xt_entry_foreach(iter, loc_cpu_entry, info->size) {
+ ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
+ if (ret != 0)
+ return ret;
+ }
+ return 0;
+}
+#endif
+
+static int get_info(struct net *net, void __user *user,
+ const int *len, int compat)
+{
+ char name[XT_TABLE_MAXNAMELEN];
+ struct xt_table *t;
+ int ret;
+
+ if (*len != sizeof(struct arpt_getinfo)) {
+ duprintf("length %u != %Zu\n", *len,
+ sizeof(struct arpt_getinfo));
+ return -EINVAL;
+ }
+
+ if (copy_from_user(name, user, sizeof(name)) != 0)
+ return -EFAULT;
+
+ name[XT_TABLE_MAXNAMELEN-1] = '\0';
+#ifdef CONFIG_COMPAT
+ if (compat)
+ xt_compat_lock(NFPROTO_ARP);
+#endif
+ t = try_then_request_module(xt_find_table_lock(net, NFPROTO_ARP, name),
+ "arptable_%s", name);
+ if (!IS_ERR_OR_NULL(t)) {
+ struct arpt_getinfo info;
+ const struct xt_table_info *private = t->private;
+#ifdef CONFIG_COMPAT
+ struct xt_table_info tmp;
+
+ if (compat) {
+ ret = compat_table_info(private, &tmp);
+ xt_compat_flush_offsets(NFPROTO_ARP);
+ private = &tmp;
+ }
+#endif
+ memset(&info, 0, sizeof(info));
+ info.valid_hooks = t->valid_hooks;
+ memcpy(info.hook_entry, private->hook_entry,
+ sizeof(info.hook_entry));
+ memcpy(info.underflow, private->underflow,
+ sizeof(info.underflow));
+ info.num_entries = private->number;
+ info.size = private->size;
+ strcpy(info.name, name);
+
+ if (copy_to_user(user, &info, *len) != 0)
+ ret = -EFAULT;
+ else
+ ret = 0;
+ xt_table_unlock(t);
+ module_put(t->me);
+ } else
+ ret = t ? PTR_ERR(t) : -ENOENT;
+#ifdef CONFIG_COMPAT
+ if (compat)
+ xt_compat_unlock(NFPROTO_ARP);
+#endif
+ return ret;
+}
+
+static int get_entries(struct net *net, struct arpt_get_entries __user *uptr,
+ const int *len)
+{
+ int ret;
+ struct arpt_get_entries get;
+ struct xt_table *t;
+
+ if (*len < sizeof(get)) {
+ duprintf("get_entries: %u < %Zu\n", *len, sizeof(get));
+ return -EINVAL;
+ }
+ if (copy_from_user(&get, uptr, sizeof(get)) != 0)
+ return -EFAULT;
+ if (*len != sizeof(struct arpt_get_entries) + get.size) {
+ duprintf("get_entries: %u != %Zu\n", *len,
+ sizeof(struct arpt_get_entries) + get.size);
+ return -EINVAL;
+ }
+
+ t = xt_find_table_lock(net, NFPROTO_ARP, get.name);
+ if (!IS_ERR_OR_NULL(t)) {
+ const struct xt_table_info *private = t->private;
+
duprintf("t->private->number = %u\n",
private->number);
- if (entries->size == private->size)
+ if (get.size == private->size)
ret = copy_entries_to_user(private->size,
t, uptr->entrytable);
else {
duprintf("get_entries: I've got %u not %u!\n",
- private->size, entries->size);
- ret = -EINVAL;
+ private->size, get.size);
+ ret = -EAGAIN;
}
module_put(t->me);
xt_table_unlock(t);
@@ -791,87 +985,69 @@ static int get_entries(const struct arpt_get_entries *entries,
return ret;
}
-static int do_replace(void __user *user, unsigned int len)
+static int __do_replace(struct net *net, const char *name,
+ unsigned int valid_hooks,
+ struct xt_table_info *newinfo,
+ unsigned int num_counters,
+ void __user *counters_ptr)
{
int ret;
- struct arpt_replace tmp;
- struct arpt_table *t;
- struct xt_table_info *newinfo, *oldinfo;
+ struct xt_table *t;
+ struct xt_table_info *oldinfo;
struct xt_counters *counters;
- void *loc_cpu_entry, *loc_cpu_old_entry;
-
- if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
- return -EFAULT;
-
- /* Hack: Causes ipchains to give correct error msg --RR */
- if (len != sizeof(tmp) + tmp.size)
- return -ENOPROTOOPT;
+ void *loc_cpu_old_entry;
+ struct arpt_entry *iter;
- newinfo = xt_alloc_table_info(tmp.size);
- if (!newinfo)
- return -ENOMEM;
-
- /* choose the copy that is on our node/cpu */
- loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
- if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
- tmp.size) != 0) {
- ret = -EFAULT;
- goto free_newinfo;
- }
-
- counters = vmalloc(tmp.num_counters * sizeof(struct xt_counters));
+ ret = 0;
+ counters = vzalloc(num_counters * sizeof(struct xt_counters));
if (!counters) {
ret = -ENOMEM;
- goto free_newinfo;
+ goto out;
}
- ret = translate_table(tmp.name, tmp.valid_hooks,
- newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
- tmp.hook_entry, tmp.underflow);
- if (ret != 0)
- goto free_newinfo_counters;
-
- duprintf("arp_tables: Translated table\n");
-
- t = try_then_request_module(xt_find_table_lock(NF_ARP, tmp.name),
- "arptable_%s", tmp.name);
- if (!t || IS_ERR(t)) {
+ t = try_then_request_module(xt_find_table_lock(net, NFPROTO_ARP, name),
+ "arptable_%s", name);
+ if (IS_ERR_OR_NULL(t)) {
ret = t ? PTR_ERR(t) : -ENOENT;
goto free_newinfo_counters_untrans;
}
/* You lied! */
- if (tmp.valid_hooks != t->valid_hooks) {
+ if (valid_hooks != t->valid_hooks) {
duprintf("Valid hook crap: %08X vs %08X\n",
- tmp.valid_hooks, t->valid_hooks);
+ valid_hooks, t->valid_hooks);
ret = -EINVAL;
goto put_module;
}
- oldinfo = xt_replace_table(t, tmp.num_counters, newinfo, &ret);
+ oldinfo = xt_replace_table(t, num_counters, newinfo, &ret);
if (!oldinfo)
goto put_module;
/* Update module usage count based on number of rules */
duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
oldinfo->number, oldinfo->initial_entries, newinfo->number);
- if ((oldinfo->number > oldinfo->initial_entries) ||
- (newinfo->number <= oldinfo->initial_entries))
+ if ((oldinfo->number > oldinfo->initial_entries) ||
+ (newinfo->number <= oldinfo->initial_entries))
module_put(t->me);
if ((oldinfo->number > oldinfo->initial_entries) &&
(newinfo->number <= oldinfo->initial_entries))
module_put(t->me);
- /* Get the old counters. */
+ /* Get the old counters, and synchronize with replace */
get_counters(oldinfo, counters);
+
/* Decrease module usage counts and free resource */
loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
- ARPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,NULL);
+ xt_entry_foreach(iter, loc_cpu_old_entry, oldinfo->size)
+ cleanup_entry(iter);
xt_free_table_info(oldinfo);
- if (copy_to_user(tmp.counters, counters,
- sizeof(struct xt_counters) * tmp.num_counters) != 0)
- ret = -EFAULT;
+ if (copy_to_user(counters_ptr, counters,
+ sizeof(struct xt_counters) * num_counters) != 0) {
+ /* Silent error, can't fail, new table is already in place */
+ net_warn_ratelimited("arptables: counters copy to user failed while replacing table\n");
+ }
vfree(counters);
xt_table_unlock(t);
return ret;
@@ -880,75 +1056,140 @@ static int do_replace(void __user *user, unsigned int len)
module_put(t->me);
xt_table_unlock(t);
free_newinfo_counters_untrans:
- ARPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL);
- free_newinfo_counters:
vfree(counters);
- free_newinfo:
- xt_free_table_info(newinfo);
+ out:
return ret;
}
-/* We're lazy, and add to the first CPU; overflow works its fey magic
- * and everything is OK.
- */
-static inline int add_counter_to_entry(struct arpt_entry *e,
- const struct xt_counters addme[],
- unsigned int *i)
+static int do_replace(struct net *net, const void __user *user,
+ unsigned int len)
{
+ int ret;
+ struct arpt_replace tmp;
+ struct xt_table_info *newinfo;
+ void *loc_cpu_entry;
+ struct arpt_entry *iter;
- ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
+ if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+ return -EFAULT;
- (*i)++;
+ /* overflow check */
+ if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
+ return -ENOMEM;
+ tmp.name[sizeof(tmp.name)-1] = 0;
+
+ newinfo = xt_alloc_table_info(tmp.size);
+ if (!newinfo)
+ return -ENOMEM;
+
+ /* choose the copy that is on our node/cpu */
+ loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+ if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
+ tmp.size) != 0) {
+ ret = -EFAULT;
+ goto free_newinfo;
+ }
+
+ ret = translate_table(newinfo, loc_cpu_entry, &tmp);
+ if (ret != 0)
+ goto free_newinfo;
+
+ duprintf("arp_tables: Translated table\n");
+
+ ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
+ tmp.num_counters, tmp.counters);
+ if (ret)
+ goto free_newinfo_untrans;
return 0;
+
+ free_newinfo_untrans:
+ xt_entry_foreach(iter, loc_cpu_entry, newinfo->size)
+ cleanup_entry(iter);
+ free_newinfo:
+ xt_free_table_info(newinfo);
+ return ret;
}
-static int do_add_counters(void __user *user, unsigned int len)
+static int do_add_counters(struct net *net, const void __user *user,
+ unsigned int len, int compat)
{
- unsigned int i;
- struct xt_counters_info tmp, *paddc;
- struct arpt_table *t;
- struct xt_table_info *private;
+ unsigned int i, curcpu;
+ struct xt_counters_info tmp;
+ struct xt_counters *paddc;
+ unsigned int num_counters;
+ const char *name;
+ int size;
+ void *ptmp;
+ struct xt_table *t;
+ const struct xt_table_info *private;
int ret = 0;
void *loc_cpu_entry;
+ struct arpt_entry *iter;
+ unsigned int addend;
+#ifdef CONFIG_COMPAT
+ struct compat_xt_counters_info compat_tmp;
+
+ if (compat) {
+ ptmp = &compat_tmp;
+ size = sizeof(struct compat_xt_counters_info);
+ } else
+#endif
+ {
+ ptmp = &tmp;
+ size = sizeof(struct xt_counters_info);
+ }
- if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+ if (copy_from_user(ptmp, user, size) != 0)
return -EFAULT;
- if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct xt_counters))
+#ifdef CONFIG_COMPAT
+ if (compat) {
+ num_counters = compat_tmp.num_counters;
+ name = compat_tmp.name;
+ } else
+#endif
+ {
+ num_counters = tmp.num_counters;
+ name = tmp.name;
+ }
+
+ if (len != size + num_counters * sizeof(struct xt_counters))
return -EINVAL;
- paddc = vmalloc(len);
+ paddc = vmalloc(len - size);
if (!paddc)
return -ENOMEM;
- if (copy_from_user(paddc, user, len) != 0) {
+ if (copy_from_user(paddc, user + size, len - size) != 0) {
ret = -EFAULT;
goto free;
}
- t = xt_find_table_lock(NF_ARP, tmp.name);
- if (!t || IS_ERR(t)) {
+ t = xt_find_table_lock(net, NFPROTO_ARP, name);
+ if (IS_ERR_OR_NULL(t)) {
ret = t ? PTR_ERR(t) : -ENOENT;
goto free;
}
- write_lock_bh(&t->lock);
+ local_bh_disable();
private = t->private;
- if (private->number != paddc->num_counters) {
+ if (private->number != num_counters) {
ret = -EINVAL;
goto unlock_up_free;
}
i = 0;
/* Choose the copy that is on our node */
- loc_cpu_entry = private->entries[smp_processor_id()];
- ARPT_ENTRY_ITERATE(loc_cpu_entry,
- private->size,
- add_counter_to_entry,
- paddc->counters,
- &i);
+ curcpu = smp_processor_id();
+ loc_cpu_entry = private->entries[curcpu];
+ addend = xt_write_recseq_begin();
+ xt_entry_foreach(iter, loc_cpu_entry, private->size) {
+ ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt);
+ ++i;
+ }
+ xt_write_recseq_end(addend);
unlock_up_free:
- write_unlock_bh(&t->lock);
+ local_bh_enable();
xt_table_unlock(t);
module_put(t->me);
free:
@@ -957,20 +1198,359 @@ static int do_add_counters(void __user *user, unsigned int len)
return ret;
}
-static int do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
+#ifdef CONFIG_COMPAT
+static inline void compat_release_entry(struct compat_arpt_entry *e)
+{
+ struct xt_entry_target *t;
+
+ t = compat_arpt_get_target(e);
+ module_put(t->u.kernel.target->me);
+}
+
+static inline int
+check_compat_entry_size_and_hooks(struct compat_arpt_entry *e,
+ struct xt_table_info *newinfo,
+ unsigned int *size,
+ const unsigned char *base,
+ const unsigned char *limit,
+ const unsigned int *hook_entries,
+ const unsigned int *underflows,
+ const char *name)
+{
+ struct xt_entry_target *t;
+ struct xt_target *target;
+ unsigned int entry_offset;
+ int ret, off, h;
+
+ duprintf("check_compat_entry_size_and_hooks %p\n", e);
+ if ((unsigned long)e % __alignof__(struct compat_arpt_entry) != 0 ||
+ (unsigned char *)e + sizeof(struct compat_arpt_entry) >= limit) {
+ duprintf("Bad offset %p, limit = %p\n", e, limit);
+ return -EINVAL;
+ }
+
+ if (e->next_offset < sizeof(struct compat_arpt_entry) +
+ sizeof(struct compat_xt_entry_target)) {
+ duprintf("checking: element %p size %u\n",
+ e, e->next_offset);
+ return -EINVAL;
+ }
+
+ /* For purposes of check_entry casting the compat entry is fine */
+ ret = check_entry((struct arpt_entry *)e, name);
+ if (ret)
+ return ret;
+
+ off = sizeof(struct arpt_entry) - sizeof(struct compat_arpt_entry);
+ entry_offset = (void *)e - (void *)base;
+
+ t = compat_arpt_get_target(e);
+ target = xt_request_find_target(NFPROTO_ARP, t->u.user.name,
+ t->u.user.revision);
+ if (IS_ERR(target)) {
+ duprintf("check_compat_entry_size_and_hooks: `%s' not found\n",
+ t->u.user.name);
+ ret = PTR_ERR(target);
+ goto out;
+ }
+ t->u.kernel.target = target;
+
+ off += xt_compat_target_offset(target);
+ *size += off;
+ ret = xt_compat_add_offset(NFPROTO_ARP, entry_offset, off);
+ if (ret)
+ goto release_target;
+
+ /* Check hooks & underflows */
+ for (h = 0; h < NF_ARP_NUMHOOKS; h++) {
+ if ((unsigned char *)e - base == hook_entries[h])
+ newinfo->hook_entry[h] = hook_entries[h];
+ if ((unsigned char *)e - base == underflows[h])
+ newinfo->underflow[h] = underflows[h];
+ }
+
+ /* Clear counters and comefrom */
+ memset(&e->counters, 0, sizeof(e->counters));
+ e->comefrom = 0;
+ return 0;
+
+release_target:
+ module_put(t->u.kernel.target->me);
+out:
+ return ret;
+}
+
+static int
+compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr,
+ unsigned int *size, const char *name,
+ struct xt_table_info *newinfo, unsigned char *base)
+{
+ struct xt_entry_target *t;
+ struct xt_target *target;
+ struct arpt_entry *de;
+ unsigned int origsize;
+ int ret, h;
+
+ ret = 0;
+ origsize = *size;
+ de = (struct arpt_entry *)*dstptr;
+ memcpy(de, e, sizeof(struct arpt_entry));
+ memcpy(&de->counters, &e->counters, sizeof(e->counters));
+
+ *dstptr += sizeof(struct arpt_entry);
+ *size += sizeof(struct arpt_entry) - sizeof(struct compat_arpt_entry);
+
+ de->target_offset = e->target_offset - (origsize - *size);
+ t = compat_arpt_get_target(e);
+ target = t->u.kernel.target;
+ xt_compat_target_from_user(t, dstptr, size);
+
+ de->next_offset = e->next_offset - (origsize - *size);
+ for (h = 0; h < NF_ARP_NUMHOOKS; h++) {
+ if ((unsigned char *)de - base < newinfo->hook_entry[h])
+ newinfo->hook_entry[h] -= origsize - *size;
+ if ((unsigned char *)de - base < newinfo->underflow[h])
+ newinfo->underflow[h] -= origsize - *size;
+ }
+ return ret;
+}
+
+static int translate_compat_table(const char *name,
+ unsigned int valid_hooks,
+ struct xt_table_info **pinfo,
+ void **pentry0,
+ unsigned int total_size,
+ unsigned int number,
+ unsigned int *hook_entries,
+ unsigned int *underflows)
+{
+ unsigned int i, j;
+ struct xt_table_info *newinfo, *info;
+ void *pos, *entry0, *entry1;
+ struct compat_arpt_entry *iter0;
+ struct arpt_entry *iter1;
+ unsigned int size;
+ int ret = 0;
+
+ info = *pinfo;
+ entry0 = *pentry0;
+ size = total_size;
+ info->number = number;
+
+ /* Init all hooks to impossible value. */
+ for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
+ info->hook_entry[i] = 0xFFFFFFFF;
+ info->underflow[i] = 0xFFFFFFFF;
+ }
+
+ duprintf("translate_compat_table: size %u\n", info->size);
+ j = 0;
+ xt_compat_lock(NFPROTO_ARP);
+ xt_compat_init_offsets(NFPROTO_ARP, number);
+ /* Walk through entries, checking offsets. */
+ xt_entry_foreach(iter0, entry0, total_size) {
+ ret = check_compat_entry_size_and_hooks(iter0, info, &size,
+ entry0,
+ entry0 + total_size,
+ hook_entries,
+ underflows,
+ name);
+ if (ret != 0)
+ goto out_unlock;
+ ++j;
+ }
+
+ ret = -EINVAL;
+ if (j != number) {
+ duprintf("translate_compat_table: %u not %u entries\n",
+ j, number);
+ goto out_unlock;
+ }
+
+ /* Check hooks all assigned */
+ for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
+ /* Only hooks which are valid */
+ if (!(valid_hooks & (1 << i)))
+ continue;
+ if (info->hook_entry[i] == 0xFFFFFFFF) {
+ duprintf("Invalid hook entry %u %u\n",
+ i, hook_entries[i]);
+ goto out_unlock;
+ }
+ if (info->underflow[i] == 0xFFFFFFFF) {
+ duprintf("Invalid underflow %u %u\n",
+ i, underflows[i]);
+ goto out_unlock;
+ }
+ }
+
+ ret = -ENOMEM;
+ newinfo = xt_alloc_table_info(size);
+ if (!newinfo)
+ goto out_unlock;
+
+ newinfo->number = number;
+ for (i = 0; i < NF_ARP_NUMHOOKS; i++) {
+ newinfo->hook_entry[i] = info->hook_entry[i];
+ newinfo->underflow[i] = info->underflow[i];
+ }
+ entry1 = newinfo->entries[raw_smp_processor_id()];
+ pos = entry1;
+ size = total_size;
+ xt_entry_foreach(iter0, entry0, total_size) {
+ ret = compat_copy_entry_from_user(iter0, &pos, &size,
+ name, newinfo, entry1);
+ if (ret != 0)
+ break;
+ }
+ xt_compat_flush_offsets(NFPROTO_ARP);
+ xt_compat_unlock(NFPROTO_ARP);
+ if (ret)
+ goto free_newinfo;
+
+ ret = -ELOOP;
+ if (!mark_source_chains(newinfo, valid_hooks, entry1))
+ goto free_newinfo;
+
+ i = 0;
+ xt_entry_foreach(iter1, entry1, newinfo->size) {
+ ret = check_target(iter1, name);
+ if (ret != 0)
+ break;
+ ++i;
+ if (strcmp(arpt_get_target(iter1)->u.user.name,
+ XT_ERROR_TARGET) == 0)
+ ++newinfo->stacksize;
+ }
+ if (ret) {
+ /*
+ * The first i matches need cleanup_entry (calls ->destroy)
+ * because they had called ->check already. The other j-i
+ * entries need only release.
+ */
+ int skip = i;
+ j -= i;
+ xt_entry_foreach(iter0, entry0, newinfo->size) {
+ if (skip-- > 0)
+ continue;
+ if (j-- == 0)
+ break;
+ compat_release_entry(iter0);
+ }
+ xt_entry_foreach(iter1, entry1, newinfo->size) {
+ if (i-- == 0)
+ break;
+ cleanup_entry(iter1);
+ }
+ xt_free_table_info(newinfo);
+ return ret;
+ }
+
+ /* And one copy for every other CPU */
+ for_each_possible_cpu(i)
+ if (newinfo->entries[i] && newinfo->entries[i] != entry1)
+ memcpy(newinfo->entries[i], entry1, newinfo->size);
+
+ *pinfo = newinfo;
+ *pentry0 = entry1;
+ xt_free_table_info(info);
+ return 0;
+
+free_newinfo:
+ xt_free_table_info(newinfo);
+out:
+ xt_entry_foreach(iter0, entry0, total_size) {
+ if (j-- == 0)
+ break;
+ compat_release_entry(iter0);
+ }
+ return ret;
+out_unlock:
+ xt_compat_flush_offsets(NFPROTO_ARP);
+ xt_compat_unlock(NFPROTO_ARP);
+ goto out;
+}
+
+struct compat_arpt_replace {
+ char name[XT_TABLE_MAXNAMELEN];
+ u32 valid_hooks;
+ u32 num_entries;
+ u32 size;
+ u32 hook_entry[NF_ARP_NUMHOOKS];
+ u32 underflow[NF_ARP_NUMHOOKS];
+ u32 num_counters;
+ compat_uptr_t counters;
+ struct compat_arpt_entry entries[0];
+};
+
+static int compat_do_replace(struct net *net, void __user *user,
+ unsigned int len)
{
int ret;
+ struct compat_arpt_replace tmp;
+ struct xt_table_info *newinfo;
+ void *loc_cpu_entry;
+ struct arpt_entry *iter;
+
+ if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+ return -EFAULT;
- if (!capable(CAP_NET_ADMIN))
+ /* overflow check */
+ if (tmp.size >= INT_MAX / num_possible_cpus())
+ return -ENOMEM;
+ if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
+ return -ENOMEM;
+ tmp.name[sizeof(tmp.name)-1] = 0;
+
+ newinfo = xt_alloc_table_info(tmp.size);
+ if (!newinfo)
+ return -ENOMEM;
+
+ /* choose the copy that is on our node/cpu */
+ loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+ if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), tmp.size) != 0) {
+ ret = -EFAULT;
+ goto free_newinfo;
+ }
+
+ ret = translate_compat_table(tmp.name, tmp.valid_hooks,
+ &newinfo, &loc_cpu_entry, tmp.size,
+ tmp.num_entries, tmp.hook_entry,
+ tmp.underflow);
+ if (ret != 0)
+ goto free_newinfo;
+
+ duprintf("compat_do_replace: Translated table\n");
+
+ ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
+ tmp.num_counters, compat_ptr(tmp.counters));
+ if (ret)
+ goto free_newinfo_untrans;
+ return 0;
+
+ free_newinfo_untrans:
+ xt_entry_foreach(iter, loc_cpu_entry, newinfo->size)
+ cleanup_entry(iter);
+ free_newinfo:
+ xt_free_table_info(newinfo);
+ return ret;
+}
+
+static int compat_do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user,
+ unsigned int len)
+{
+ int ret;
+
+ if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
return -EPERM;
switch (cmd) {
case ARPT_SO_SET_REPLACE:
- ret = do_replace(user, len);
+ ret = compat_do_replace(sock_net(sk), user, len);
break;
case ARPT_SO_SET_ADD_COUNTERS:
- ret = do_add_counters(user, len);
+ ret = do_add_counters(sock_net(sk), user, len, 1);
break;
default:
@@ -981,73 +1561,186 @@ static int do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned
return ret;
}
-static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
+static int compat_copy_entry_to_user(struct arpt_entry *e, void __user **dstptr,
+ compat_uint_t *size,
+ struct xt_counters *counters,
+ unsigned int i)
{
+ struct xt_entry_target *t;
+ struct compat_arpt_entry __user *ce;
+ u_int16_t target_offset, next_offset;
+ compat_uint_t origsize;
int ret;
- if (!capable(CAP_NET_ADMIN))
- return -EPERM;
+ origsize = *size;
+ ce = (struct compat_arpt_entry __user *)*dstptr;
+ if (copy_to_user(ce, e, sizeof(struct arpt_entry)) != 0 ||
+ copy_to_user(&ce->counters, &counters[i],
+ sizeof(counters[i])) != 0)
+ return -EFAULT;
- switch (cmd) {
- case ARPT_SO_GET_INFO: {
- char name[ARPT_TABLE_MAXNAMELEN];
- struct arpt_table *t;
+ *dstptr += sizeof(struct compat_arpt_entry);
+ *size -= sizeof(struct arpt_entry) - sizeof(struct compat_arpt_entry);
- if (*len != sizeof(struct arpt_getinfo)) {
- duprintf("length %u != %Zu\n", *len,
- sizeof(struct arpt_getinfo));
- ret = -EINVAL;
- break;
- }
+ target_offset = e->target_offset - (origsize - *size);
- if (copy_from_user(name, user, sizeof(name)) != 0) {
- ret = -EFAULT;
+ t = arpt_get_target(e);
+ ret = xt_compat_target_to_user(t, dstptr, size);
+ if (ret)
+ return ret;
+ next_offset = e->next_offset - (origsize - *size);
+ if (put_user(target_offset, &ce->target_offset) != 0 ||
+ put_user(next_offset, &ce->next_offset) != 0)
+ return -EFAULT;
+ return 0;
+}
+
+static int compat_copy_entries_to_user(unsigned int total_size,
+ struct xt_table *table,
+ void __user *userptr)
+{
+ struct xt_counters *counters;
+ const struct xt_table_info *private = table->private;
+ void __user *pos;
+ unsigned int size;
+ int ret = 0;
+ void *loc_cpu_entry;
+ unsigned int i = 0;
+ struct arpt_entry *iter;
+
+ counters = alloc_counters(table);
+ if (IS_ERR(counters))
+ return PTR_ERR(counters);
+
+ /* choose the copy on our node/cpu */
+ loc_cpu_entry = private->entries[raw_smp_processor_id()];
+ pos = userptr;
+ size = total_size;
+ xt_entry_foreach(iter, loc_cpu_entry, total_size) {
+ ret = compat_copy_entry_to_user(iter, &pos,
+ &size, counters, i++);
+ if (ret != 0)
break;
+ }
+ vfree(counters);
+ return ret;
+}
+
+struct compat_arpt_get_entries {
+ char name[XT_TABLE_MAXNAMELEN];
+ compat_uint_t size;
+ struct compat_arpt_entry entrytable[0];
+};
+
+static int compat_get_entries(struct net *net,
+ struct compat_arpt_get_entries __user *uptr,
+ int *len)
+{
+ int ret;
+ struct compat_arpt_get_entries get;
+ struct xt_table *t;
+
+ if (*len < sizeof(get)) {
+ duprintf("compat_get_entries: %u < %zu\n", *len, sizeof(get));
+ return -EINVAL;
+ }
+ if (copy_from_user(&get, uptr, sizeof(get)) != 0)
+ return -EFAULT;
+ if (*len != sizeof(struct compat_arpt_get_entries) + get.size) {
+ duprintf("compat_get_entries: %u != %zu\n",
+ *len, sizeof(get) + get.size);
+ return -EINVAL;
+ }
+
+ xt_compat_lock(NFPROTO_ARP);
+ t = xt_find_table_lock(net, NFPROTO_ARP, get.name);
+ if (!IS_ERR_OR_NULL(t)) {
+ const struct xt_table_info *private = t->private;
+ struct xt_table_info info;
+
+ duprintf("t->private->number = %u\n", private->number);
+ ret = compat_table_info(private, &info);
+ if (!ret && get.size == info.size) {
+ ret = compat_copy_entries_to_user(private->size,
+ t, uptr->entrytable);
+ } else if (!ret) {
+ duprintf("compat_get_entries: I've got %u not %u!\n",
+ private->size, get.size);
+ ret = -EAGAIN;
}
- name[ARPT_TABLE_MAXNAMELEN-1] = '\0';
-
- t = try_then_request_module(xt_find_table_lock(NF_ARP, name),
- "arptable_%s", name);
- if (t && !IS_ERR(t)) {
- struct arpt_getinfo info;
- struct xt_table_info *private = t->private;
-
- info.valid_hooks = t->valid_hooks;
- memcpy(info.hook_entry, private->hook_entry,
- sizeof(info.hook_entry));
- memcpy(info.underflow, private->underflow,
- sizeof(info.underflow));
- info.num_entries = private->number;
- info.size = private->size;
- strcpy(info.name, name);
-
- if (copy_to_user(user, &info, *len) != 0)
- ret = -EFAULT;
- else
- ret = 0;
- xt_table_unlock(t);
- module_put(t->me);
- } else
- ret = t ? PTR_ERR(t) : -ENOENT;
- }
- break;
-
- case ARPT_SO_GET_ENTRIES: {
- struct arpt_get_entries get;
-
- if (*len < sizeof(get)) {
- duprintf("get_entries: %u < %Zu\n", *len, sizeof(get));
- ret = -EINVAL;
- } else if (copy_from_user(&get, user, sizeof(get)) != 0) {
- ret = -EFAULT;
- } else if (*len != sizeof(struct arpt_get_entries) + get.size) {
- duprintf("get_entries: %u != %Zu\n", *len,
- sizeof(struct arpt_get_entries) + get.size);
- ret = -EINVAL;
- } else
- ret = get_entries(&get, user);
+ xt_compat_flush_offsets(NFPROTO_ARP);
+ module_put(t->me);
+ xt_table_unlock(t);
+ } else
+ ret = t ? PTR_ERR(t) : -ENOENT;
+
+ xt_compat_unlock(NFPROTO_ARP);
+ return ret;
+}
+
+static int do_arpt_get_ctl(struct sock *, int, void __user *, int *);
+
+static int compat_do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user,
+ int *len)
+{
+ int ret;
+
+ if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
+ return -EPERM;
+
+ switch (cmd) {
+ case ARPT_SO_GET_INFO:
+ ret = get_info(sock_net(sk), user, len, 1);
break;
+ case ARPT_SO_GET_ENTRIES:
+ ret = compat_get_entries(sock_net(sk), user, len);
+ break;
+ default:
+ ret = do_arpt_get_ctl(sk, cmd, user, len);
}
+ return ret;
+}
+#endif
+
+static int do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
+{
+ int ret;
+
+ if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
+ return -EPERM;
+
+ switch (cmd) {
+ case ARPT_SO_SET_REPLACE:
+ ret = do_replace(sock_net(sk), user, len);
+ break;
+
+ case ARPT_SO_SET_ADD_COUNTERS:
+ ret = do_add_counters(sock_net(sk), user, len, 0);
+ break;
+
+ default:
+ duprintf("do_arpt_set_ctl: unknown request %i\n", cmd);
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
+{
+ int ret;
+
+ if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
+ return -EPERM;
+
+ switch (cmd) {
+ case ARPT_SO_GET_INFO:
+ ret = get_info(sock_net(sk), user, len, 0);
+ break;
+
+ case ARPT_SO_GET_ENTRIES:
+ ret = get_entries(sock_net(sk), user, len);
+ break;
case ARPT_SO_GET_REVISION_TARGET: {
struct xt_get_revision rev;
@@ -1060,8 +1753,9 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
ret = -EFAULT;
break;
}
+ rev.name[sizeof(rev.name)-1] = 0;
- try_then_request_module(xt_find_revision(NF_ARP, rev.name,
+ try_then_request_module(xt_find_revision(NFPROTO_ARP, rev.name,
rev.revision, 1, &ret),
"arpt_%s", rev.name);
break;
@@ -1075,67 +1769,80 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
return ret;
}
-int arpt_register_table(struct arpt_table *table,
- const struct arpt_replace *repl)
+struct xt_table *arpt_register_table(struct net *net,
+ const struct xt_table *table,
+ const struct arpt_replace *repl)
{
int ret;
struct xt_table_info *newinfo;
- static struct xt_table_info bootstrap
- = { 0, 0, 0, { 0 }, { 0 }, { } };
+ struct xt_table_info bootstrap = {0};
void *loc_cpu_entry;
+ struct xt_table *new_table;
newinfo = xt_alloc_table_info(repl->size);
if (!newinfo) {
ret = -ENOMEM;
- return ret;
+ goto out;
}
/* choose the copy on our node/cpu */
loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
memcpy(loc_cpu_entry, repl->entries, repl->size);
- ret = translate_table(table->name, table->valid_hooks,
- newinfo, loc_cpu_entry, repl->size,
- repl->num_entries,
- repl->hook_entry,
- repl->underflow);
-
+ ret = translate_table(newinfo, loc_cpu_entry, repl);
duprintf("arpt_register_table: translate table gives %d\n", ret);
- if (ret != 0) {
- xt_free_table_info(newinfo);
- return ret;
- }
+ if (ret != 0)
+ goto out_free;
- if (xt_register_table(table, &bootstrap, newinfo) != 0) {
- xt_free_table_info(newinfo);
- return ret;
+ new_table = xt_register_table(net, table, &bootstrap, newinfo);
+ if (IS_ERR(new_table)) {
+ ret = PTR_ERR(new_table);
+ goto out_free;
}
+ return new_table;
- return 0;
+out_free:
+ xt_free_table_info(newinfo);
+out:
+ return ERR_PTR(ret);
}
-void arpt_unregister_table(struct arpt_table *table)
+void arpt_unregister_table(struct xt_table *table)
{
struct xt_table_info *private;
void *loc_cpu_entry;
+ struct module *table_owner = table->me;
+ struct arpt_entry *iter;
private = xt_unregister_table(table);
/* Decrease module usage counts and free resources */
loc_cpu_entry = private->entries[raw_smp_processor_id()];
- ARPT_ENTRY_ITERATE(loc_cpu_entry, private->size,
- cleanup_entry, NULL);
+ xt_entry_foreach(iter, loc_cpu_entry, private->size)
+ cleanup_entry(iter);
+ if (private->number > private->initial_entries)
+ module_put(table_owner);
xt_free_table_info(private);
}
/* The built-in targets: standard (NULL) and error. */
-static struct arpt_target arpt_standard_target = {
- .name = ARPT_STANDARD_TARGET,
-};
-
-static struct arpt_target arpt_error_target = {
- .name = ARPT_ERROR_TARGET,
- .target = arpt_error,
+static struct xt_target arpt_builtin_tg[] __read_mostly = {
+ {
+ .name = XT_STANDARD_TARGET,
+ .targetsize = sizeof(int),
+ .family = NFPROTO_ARP,
+#ifdef CONFIG_COMPAT
+ .compatsize = sizeof(compat_int_t),
+ .compat_from_user = compat_standard_from_user,
+ .compat_to_user = compat_standard_to_user,
+#endif
+ },
+ {
+ .name = XT_ERROR_TARGET,
+ .target = arpt_error,
+ .targetsize = XT_FUNCTION_MAXNAMELEN,
+ .family = NFPROTO_ARP,
+ },
};
static struct nf_sockopt_ops arpt_sockopts = {
@@ -1143,41 +1850,72 @@ static struct nf_sockopt_ops arpt_sockopts = {
.set_optmin = ARPT_BASE_CTL,
.set_optmax = ARPT_SO_SET_MAX+1,
.set = do_arpt_set_ctl,
+#ifdef CONFIG_COMPAT
+ .compat_set = compat_do_arpt_set_ctl,
+#endif
.get_optmin = ARPT_BASE_CTL,
.get_optmax = ARPT_SO_GET_MAX+1,
.get = do_arpt_get_ctl,
+#ifdef CONFIG_COMPAT
+ .compat_get = compat_do_arpt_get_ctl,
+#endif
+ .owner = THIS_MODULE,
+};
+
+static int __net_init arp_tables_net_init(struct net *net)
+{
+ return xt_proto_init(net, NFPROTO_ARP);
+}
+
+static void __net_exit arp_tables_net_exit(struct net *net)
+{
+ xt_proto_fini(net, NFPROTO_ARP);
+}
+
+static struct pernet_operations arp_tables_net_ops = {
+ .init = arp_tables_net_init,
+ .exit = arp_tables_net_exit,
};
-static int __init init(void)
+static int __init arp_tables_init(void)
{
int ret;
- xt_proto_init(NF_ARP);
+ ret = register_pernet_subsys(&arp_tables_net_ops);
+ if (ret < 0)
+ goto err1;
- /* Noone else will be downing sem now, so we won't sleep */
- xt_register_target(NF_ARP, &arpt_standard_target);
- xt_register_target(NF_ARP, &arpt_error_target);
+ /* No one else will be downing sem now, so we won't sleep */
+ ret = xt_register_targets(arpt_builtin_tg, ARRAY_SIZE(arpt_builtin_tg));
+ if (ret < 0)
+ goto err2;
/* Register setsockopt */
ret = nf_register_sockopt(&arpt_sockopts);
- if (ret < 0) {
- duprintf("Unable to register sockopts.\n");
- return ret;
- }
+ if (ret < 0)
+ goto err4;
- printk("arp_tables: (C) 2002 David S. Miller\n");
+ printk(KERN_INFO "arp_tables: (C) 2002 David S. Miller\n");
return 0;
+
+err4:
+ xt_unregister_targets(arpt_builtin_tg, ARRAY_SIZE(arpt_builtin_tg));
+err2:
+ unregister_pernet_subsys(&arp_tables_net_ops);
+err1:
+ return ret;
}
-static void __exit fini(void)
+static void __exit arp_tables_fini(void)
{
nf_unregister_sockopt(&arpt_sockopts);
- xt_proto_fini(NF_ARP);
+ xt_unregister_targets(arpt_builtin_tg, ARRAY_SIZE(arpt_builtin_tg));
+ unregister_pernet_subsys(&arp_tables_net_ops);
}
EXPORT_SYMBOL(arpt_register_table);
EXPORT_SYMBOL(arpt_unregister_table);
EXPORT_SYMBOL(arpt_do_table);
-module_init(init);
-module_exit(fini);
+module_init(arp_tables_init);
+module_exit(arp_tables_fini);
diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c
index c97650a16a5..a5e52a9f0a1 100644
--- a/net/ipv4/netfilter/arpt_mangle.c
+++ b/net/ipv4/netfilter/arpt_mangle.c
@@ -1,5 +1,6 @@
/* module that allows mangling of the arp payload */
#include <linux/module.h>
+#include <linux/netfilter.h>
#include <linux/netfilter_arp/arpt_mangle.h>
#include <net/sock.h>
@@ -8,98 +9,83 @@ MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>");
MODULE_DESCRIPTION("arptables arp payload mangle target");
static unsigned int
-target(struct sk_buff **pskb, const struct net_device *in,
- const struct net_device *out, unsigned int hooknum, const void *targinfo,
- void *userinfo)
+target(struct sk_buff *skb, const struct xt_action_param *par)
{
- const struct arpt_mangle *mangle = targinfo;
- struct arphdr *arp;
+ const struct arpt_mangle *mangle = par->targinfo;
+ const struct arphdr *arp;
unsigned char *arpptr;
int pln, hln;
- if (skb_shared(*pskb) || skb_cloned(*pskb)) {
- struct sk_buff *nskb;
+ if (!skb_make_writable(skb, skb->len))
+ return NF_DROP;
- nskb = skb_copy(*pskb, GFP_ATOMIC);
- if (!nskb)
- return NF_DROP;
- if ((*pskb)->sk)
- skb_set_owner_w(nskb, (*pskb)->sk);
- kfree_skb(*pskb);
- *pskb = nskb;
- }
-
- arp = (*pskb)->nh.arph;
- arpptr = (*pskb)->nh.raw + sizeof(*arp);
+ arp = arp_hdr(skb);
+ arpptr = skb_network_header(skb) + sizeof(*arp);
pln = arp->ar_pln;
hln = arp->ar_hln;
/* We assume that pln and hln were checked in the match */
if (mangle->flags & ARPT_MANGLE_SDEV) {
if (ARPT_DEV_ADDR_LEN_MAX < hln ||
- (arpptr + hln > (**pskb).tail))
+ (arpptr + hln > skb_tail_pointer(skb)))
return NF_DROP;
memcpy(arpptr, mangle->src_devaddr, hln);
}
arpptr += hln;
if (mangle->flags & ARPT_MANGLE_SIP) {
if (ARPT_MANGLE_ADDR_LEN_MAX < pln ||
- (arpptr + pln > (**pskb).tail))
+ (arpptr + pln > skb_tail_pointer(skb)))
return NF_DROP;
memcpy(arpptr, &mangle->u_s.src_ip, pln);
}
arpptr += pln;
if (mangle->flags & ARPT_MANGLE_TDEV) {
if (ARPT_DEV_ADDR_LEN_MAX < hln ||
- (arpptr + hln > (**pskb).tail))
+ (arpptr + hln > skb_tail_pointer(skb)))
return NF_DROP;
memcpy(arpptr, mangle->tgt_devaddr, hln);
}
arpptr += hln;
if (mangle->flags & ARPT_MANGLE_TIP) {
if (ARPT_MANGLE_ADDR_LEN_MAX < pln ||
- (arpptr + pln > (**pskb).tail))
+ (arpptr + pln > skb_tail_pointer(skb)))
return NF_DROP;
memcpy(arpptr, &mangle->u_t.tgt_ip, pln);
}
return mangle->target;
}
-static int
-checkentry(const char *tablename, const void *e, void *targinfo,
- unsigned int targinfosize, unsigned int hook_mask)
+static int checkentry(const struct xt_tgchk_param *par)
{
- const struct arpt_mangle *mangle = targinfo;
+ const struct arpt_mangle *mangle = par->targinfo;
if (mangle->flags & ~ARPT_MANGLE_MASK ||
!(mangle->flags & ARPT_MANGLE_MASK))
- return 0;
+ return -EINVAL;
if (mangle->target != NF_DROP && mangle->target != NF_ACCEPT &&
- mangle->target != ARPT_CONTINUE)
- return 0;
- return 1;
+ mangle->target != XT_CONTINUE)
+ return -EINVAL;
+ return 0;
}
-static struct arpt_target arpt_mangle_reg
-= {
- .name = "mangle",
- .target = target,
- .checkentry = checkentry,
- .me = THIS_MODULE,
+static struct xt_target arpt_mangle_reg __read_mostly = {
+ .name = "mangle",
+ .family = NFPROTO_ARP,
+ .target = target,
+ .targetsize = sizeof(struct arpt_mangle),
+ .checkentry = checkentry,
+ .me = THIS_MODULE,
};
-static int __init init(void)
+static int __init arpt_mangle_init(void)
{
- if (arpt_register_target(&arpt_mangle_reg))
- return -EINVAL;
-
- return 0;
+ return xt_register_target(&arpt_mangle_reg);
}
-static void __exit fini(void)
+static void __exit arpt_mangle_fini(void)
{
- arpt_unregister_target(&arpt_mangle_reg);
+ xt_unregister_target(&arpt_mangle_reg);
}
-module_init(init);
-module_exit(fini);
+module_init(arpt_mangle_init);
+module_exit(arpt_mangle_fini);
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index f6ab45f4868..802ddecb30b 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -6,7 +6,9 @@
*/
#include <linux/module.h>
+#include <linux/netfilter/x_tables.h>
#include <linux/netfilter_arp/arp_tables.h>
+#include <linux/slab.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("David S. Miller <davem@redhat.com>");
@@ -15,201 +17,76 @@ MODULE_DESCRIPTION("arptables filter table");
#define FILTER_VALID_HOOKS ((1 << NF_ARP_IN) | (1 << NF_ARP_OUT) | \
(1 << NF_ARP_FORWARD))
-/* Standard entry. */
-struct arpt_standard
-{
- struct arpt_entry entry;
- struct arpt_standard_target target;
+static const struct xt_table packet_filter = {
+ .name = "filter",
+ .valid_hooks = FILTER_VALID_HOOKS,
+ .me = THIS_MODULE,
+ .af = NFPROTO_ARP,
+ .priority = NF_IP_PRI_FILTER,
};
-struct arpt_error_target
+/* The work comes in here from netfilter.c */
+static unsigned int
+arptable_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ const struct net_device *in, const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
{
- struct arpt_entry_target target;
- char errorname[ARPT_FUNCTION_MAXNAMELEN];
-};
+ const struct net *net = dev_net((in != NULL) ? in : out);
-struct arpt_error
-{
- struct arpt_entry entry;
- struct arpt_error_target target;
-};
+ return arpt_do_table(skb, ops->hooknum, in, out,
+ net->ipv4.arptable_filter);
+}
-static struct
-{
- struct arpt_replace repl;
- struct arpt_standard entries[3];
- struct arpt_error term;
-} initial_table __initdata
-= { { "filter", FILTER_VALID_HOOKS, 4,
- sizeof(struct arpt_standard) * 3 + sizeof(struct arpt_error),
- { [NF_ARP_IN] = 0,
- [NF_ARP_OUT] = sizeof(struct arpt_standard),
- [NF_ARP_FORWARD] = 2 * sizeof(struct arpt_standard), },
- { [NF_ARP_IN] = 0,
- [NF_ARP_OUT] = sizeof(struct arpt_standard),
- [NF_ARP_FORWARD] = 2 * sizeof(struct arpt_standard), },
- 0, NULL, { } },
- {
- /* ARP_IN */
- {
- {
- {
- { 0 }, { 0 }, { 0 }, { 0 },
- 0, 0,
- { { 0, }, { 0, } },
- { { 0, }, { 0, } },
- 0, 0,
- 0, 0,
- 0, 0,
- "", "", { 0 }, { 0 },
- 0, 0
- },
- sizeof(struct arpt_entry),
- sizeof(struct arpt_standard),
- 0,
- { 0, 0 }, { } },
- { { { { ARPT_ALIGN(sizeof(struct arpt_standard_target)), "" } }, { } },
- -NF_ACCEPT - 1 }
- },
- /* ARP_OUT */
- {
- {
- {
- { 0 }, { 0 }, { 0 }, { 0 },
- 0, 0,
- { { 0, }, { 0, } },
- { { 0, }, { 0, } },
- 0, 0,
- 0, 0,
- 0, 0,
- "", "", { 0 }, { 0 },
- 0, 0
- },
- sizeof(struct arpt_entry),
- sizeof(struct arpt_standard),
- 0,
- { 0, 0 }, { } },
- { { { { ARPT_ALIGN(sizeof(struct arpt_standard_target)), "" } }, { } },
- -NF_ACCEPT - 1 }
- },
- /* ARP_FORWARD */
- {
- {
- {
- { 0 }, { 0 }, { 0 }, { 0 },
- 0, 0,
- { { 0, }, { 0, } },
- { { 0, }, { 0, } },
- 0, 0,
- 0, 0,
- 0, 0,
- "", "", { 0 }, { 0 },
- 0, 0
- },
- sizeof(struct arpt_entry),
- sizeof(struct arpt_standard),
- 0,
- { 0, 0 }, { } },
- { { { { ARPT_ALIGN(sizeof(struct arpt_standard_target)), "" } }, { } },
- -NF_ACCEPT - 1 }
- }
- },
- /* ERROR */
- {
- {
- {
- { 0 }, { 0 }, { 0 }, { 0 },
- 0, 0,
- { { 0, }, { 0, } },
- { { 0, }, { 0, } },
- 0, 0,
- 0, 0,
- 0, 0,
- "", "", { 0 }, { 0 },
- 0, 0
- },
- sizeof(struct arpt_entry),
- sizeof(struct arpt_error),
- 0,
- { 0, 0 }, { } },
- { { { { ARPT_ALIGN(sizeof(struct arpt_error_target)), ARPT_ERROR_TARGET } },
- { } },
- "ERROR"
- }
- }
-};
+static struct nf_hook_ops *arpfilter_ops __read_mostly;
-static struct arpt_table packet_filter = {
- .name = "filter",
- .valid_hooks = FILTER_VALID_HOOKS,
- .lock = RW_LOCK_UNLOCKED,
- .private = NULL,
- .me = THIS_MODULE,
- .af = NF_ARP,
-};
+static int __net_init arptable_filter_net_init(struct net *net)
+{
+ struct arpt_replace *repl;
+
+ repl = arpt_alloc_initial_table(&packet_filter);
+ if (repl == NULL)
+ return -ENOMEM;
+ net->ipv4.arptable_filter =
+ arpt_register_table(net, &packet_filter, repl);
+ kfree(repl);
+ return PTR_ERR_OR_ZERO(net->ipv4.arptable_filter);
+}
-/* The work comes in here from netfilter.c */
-static unsigned int arpt_hook(unsigned int hook,
- struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+static void __net_exit arptable_filter_net_exit(struct net *net)
{
- return arpt_do_table(pskb, hook, in, out, &packet_filter, NULL);
+ arpt_unregister_table(net->ipv4.arptable_filter);
}
-static struct nf_hook_ops arpt_ops[] = {
- {
- .hook = arpt_hook,
- .owner = THIS_MODULE,
- .pf = NF_ARP,
- .hooknum = NF_ARP_IN,
- },
- {
- .hook = arpt_hook,
- .owner = THIS_MODULE,
- .pf = NF_ARP,
- .hooknum = NF_ARP_OUT,
- },
- {
- .hook = arpt_hook,
- .owner = THIS_MODULE,
- .pf = NF_ARP,
- .hooknum = NF_ARP_FORWARD,
- },
+static struct pernet_operations arptable_filter_net_ops = {
+ .init = arptable_filter_net_init,
+ .exit = arptable_filter_net_exit,
};
-static int __init init(void)
+static int __init arptable_filter_init(void)
{
- int ret, i;
+ int ret;
- /* Register table */
- ret = arpt_register_table(&packet_filter, &initial_table.repl);
+ ret = register_pernet_subsys(&arptable_filter_net_ops);
if (ret < 0)
return ret;
- for (i = 0; i < ARRAY_SIZE(arpt_ops); i++)
- if ((ret = nf_register_hook(&arpt_ops[i])) < 0)
- goto cleanup_hooks;
+ arpfilter_ops = xt_hook_link(&packet_filter, arptable_filter_hook);
+ if (IS_ERR(arpfilter_ops)) {
+ ret = PTR_ERR(arpfilter_ops);
+ goto cleanup_table;
+ }
return ret;
-cleanup_hooks:
- while (--i >= 0)
- nf_unregister_hook(&arpt_ops[i]);
-
- arpt_unregister_table(&packet_filter);
+cleanup_table:
+ unregister_pernet_subsys(&arptable_filter_net_ops);
return ret;
}
-static void __exit fini(void)
+static void __exit arptable_filter_fini(void)
{
- unsigned int i;
-
- for (i = 0; i < ARRAY_SIZE(arpt_ops); i++)
- nf_unregister_hook(&arpt_ops[i]);
-
- arpt_unregister_table(&packet_filter);
+ xt_hook_unlink(&packet_filter, arpfilter_ops);
+ unregister_pernet_subsys(&arptable_filter_net_ops);
}
-module_init(init);
-module_exit(fini);
+module_init(arptable_filter_init);
+module_exit(arptable_filter_fini);
diff --git a/net/ipv4/netfilter/ip_conntrack_amanda.c b/net/ipv4/netfilter/ip_conntrack_amanda.c
deleted file mode 100644
index 84e4f79b7ff..00000000000
--- a/net/ipv4/netfilter/ip_conntrack_amanda.c
+++ /dev/null
@@ -1,181 +0,0 @@
-/* Amanda extension for IP connection tracking, Version 0.2
- * (C) 2002 by Brian J. Murrell <netfilter@interlinx.bc.ca>
- * based on HW's ip_conntrack_irc.c as well as other modules
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- * Module load syntax:
- * insmod ip_conntrack_amanda.o [master_timeout=n]
- *
- * Where master_timeout is the timeout (in seconds) of the master
- * connection (port 10080). This defaults to 5 minutes but if
- * your clients take longer than 5 minutes to do their work
- * before getting back to the Amanda server, you can increase
- * this value.
- *
- */
-
-#include <linux/in.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/netfilter.h>
-#include <linux/ip.h>
-#include <linux/moduleparam.h>
-#include <linux/udp.h>
-#include <net/checksum.h>
-#include <net/udp.h>
-
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_amanda.h>
-
-static unsigned int master_timeout = 300;
-
-MODULE_AUTHOR("Brian J. Murrell <netfilter@interlinx.bc.ca>");
-MODULE_DESCRIPTION("Amanda connection tracking module");
-MODULE_LICENSE("GPL");
-module_param(master_timeout, uint, 0600);
-MODULE_PARM_DESC(master_timeout, "timeout for the master connection");
-
-static const char *conns[] = { "DATA ", "MESG ", "INDEX " };
-
-/* This is slow, but it's simple. --RR */
-static char *amanda_buffer;
-static DEFINE_SPINLOCK(amanda_buffer_lock);
-
-unsigned int (*ip_nat_amanda_hook)(struct sk_buff **pskb,
- enum ip_conntrack_info ctinfo,
- unsigned int matchoff,
- unsigned int matchlen,
- struct ip_conntrack_expect *exp);
-EXPORT_SYMBOL_GPL(ip_nat_amanda_hook);
-
-static int help(struct sk_buff **pskb,
- struct ip_conntrack *ct, enum ip_conntrack_info ctinfo)
-{
- struct ip_conntrack_expect *exp;
- char *data, *data_limit, *tmp;
- unsigned int dataoff, i;
- u_int16_t port, len;
- int ret = NF_ACCEPT;
-
- /* Only look at packets from the Amanda server */
- if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL)
- return NF_ACCEPT;
-
- /* increase the UDP timeout of the master connection as replies from
- * Amanda clients to the server can be quite delayed */
- ip_ct_refresh(ct, *pskb, master_timeout * HZ);
-
- /* No data? */
- dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr);
- if (dataoff >= (*pskb)->len) {
- if (net_ratelimit())
- printk("amanda_help: skblen = %u\n", (*pskb)->len);
- return NF_ACCEPT;
- }
-
- spin_lock_bh(&amanda_buffer_lock);
- skb_copy_bits(*pskb, dataoff, amanda_buffer, (*pskb)->len - dataoff);
- data = amanda_buffer;
- data_limit = amanda_buffer + (*pskb)->len - dataoff;
- *data_limit = '\0';
-
- /* Search for the CONNECT string */
- data = strstr(data, "CONNECT ");
- if (!data)
- goto out;
- data += strlen("CONNECT ");
-
- /* Only search first line. */
- if ((tmp = strchr(data, '\n')))
- *tmp = '\0';
-
- for (i = 0; i < ARRAY_SIZE(conns); i++) {
- char *match = strstr(data, conns[i]);
- if (!match)
- continue;
- tmp = data = match + strlen(conns[i]);
- port = simple_strtoul(data, &data, 10);
- len = data - tmp;
- if (port == 0 || len > 5)
- break;
-
- exp = ip_conntrack_expect_alloc(ct);
- if (exp == NULL) {
- ret = NF_DROP;
- goto out;
- }
-
- exp->expectfn = NULL;
- exp->flags = 0;
-
- exp->tuple.src.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip;
- exp->tuple.src.u.tcp.port = 0;
- exp->tuple.dst.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip;
- exp->tuple.dst.protonum = IPPROTO_TCP;
- exp->tuple.dst.u.tcp.port = htons(port);
-
- exp->mask.src.ip = 0xFFFFFFFF;
- exp->mask.src.u.tcp.port = 0;
- exp->mask.dst.ip = 0xFFFFFFFF;
- exp->mask.dst.protonum = 0xFF;
- exp->mask.dst.u.tcp.port = 0xFFFF;
-
- if (ip_nat_amanda_hook)
- ret = ip_nat_amanda_hook(pskb, ctinfo,
- tmp - amanda_buffer,
- len, exp);
- else if (ip_conntrack_expect_related(exp) != 0)
- ret = NF_DROP;
- ip_conntrack_expect_put(exp);
- }
-
-out:
- spin_unlock_bh(&amanda_buffer_lock);
- return ret;
-}
-
-static struct ip_conntrack_helper amanda_helper = {
- .max_expected = ARRAY_SIZE(conns),
- .timeout = 180,
- .me = THIS_MODULE,
- .help = help,
- .name = "amanda",
-
- .tuple = { .src = { .u = { __constant_htons(10080) } },
- .dst = { .protonum = IPPROTO_UDP },
- },
- .mask = { .src = { .u = { 0xFFFF } },
- .dst = { .protonum = 0xFF },
- },
-};
-
-static void __exit fini(void)
-{
- ip_conntrack_helper_unregister(&amanda_helper);
- kfree(amanda_buffer);
-}
-
-static int __init init(void)
-{
- int ret;
-
- amanda_buffer = kmalloc(65536, GFP_KERNEL);
- if (!amanda_buffer)
- return -ENOMEM;
-
- ret = ip_conntrack_helper_register(&amanda_helper);
- if (ret < 0) {
- kfree(amanda_buffer);
- return ret;
- }
- return 0;
-
-
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c
deleted file mode 100644
index 84c66dbfeda..00000000000
--- a/net/ipv4/netfilter/ip_conntrack_core.c
+++ /dev/null
@@ -1,1546 +0,0 @@
-/* Connection state tracking for netfilter. This is separated from,
- but required by, the NAT layer; it can also be used by an iptables
- extension. */
-
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * 23 Apr 2001: Harald Welte <laforge@gnumonks.org>
- * - new API and handling of conntrack/nat helpers
- * - now capable of multiple expectations for one master
- * 16 Jul 2002: Harald Welte <laforge@gnumonks.org>
- * - add usage/reference counts to ip_conntrack_expect
- * - export ip_conntrack[_expect]_{find_get,put} functions
- * */
-
-#include <linux/config.h>
-#include <linux/types.h>
-#include <linux/icmp.h>
-#include <linux/ip.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/proc_fs.h>
-#include <linux/vmalloc.h>
-#include <net/checksum.h>
-#include <net/ip.h>
-#include <linux/stddef.h>
-#include <linux/sysctl.h>
-#include <linux/slab.h>
-#include <linux/random.h>
-#include <linux/jhash.h>
-#include <linux/err.h>
-#include <linux/percpu.h>
-#include <linux/moduleparam.h>
-#include <linux/notifier.h>
-
-/* ip_conntrack_lock protects the main hash table, protocol/helper/expected
- registrations, conntrack timers*/
-#define ASSERT_READ_LOCK(x)
-#define ASSERT_WRITE_LOCK(x)
-
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_core.h>
-#include <linux/netfilter_ipv4/listhelp.h>
-
-#define IP_CONNTRACK_VERSION "2.4"
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-DEFINE_RWLOCK(ip_conntrack_lock);
-
-/* ip_conntrack_standalone needs this */
-atomic_t ip_conntrack_count = ATOMIC_INIT(0);
-
-void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL;
-LIST_HEAD(ip_conntrack_expect_list);
-struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO];
-static LIST_HEAD(helpers);
-unsigned int ip_conntrack_htable_size = 0;
-int ip_conntrack_max;
-struct list_head *ip_conntrack_hash;
-static kmem_cache_t *ip_conntrack_cachep __read_mostly;
-static kmem_cache_t *ip_conntrack_expect_cachep __read_mostly;
-struct ip_conntrack ip_conntrack_untracked;
-unsigned int ip_ct_log_invalid;
-static LIST_HEAD(unconfirmed);
-static int ip_conntrack_vmalloc;
-
-static unsigned int ip_conntrack_next_id = 1;
-static unsigned int ip_conntrack_expect_next_id = 1;
-#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
-struct notifier_block *ip_conntrack_chain;
-struct notifier_block *ip_conntrack_expect_chain;
-
-DEFINE_PER_CPU(struct ip_conntrack_ecache, ip_conntrack_ecache);
-
-/* deliver cached events and clear cache entry - must be called with locally
- * disabled softirqs */
-static inline void
-__ip_ct_deliver_cached_events(struct ip_conntrack_ecache *ecache)
-{
- DEBUGP("ecache: delivering events for %p\n", ecache->ct);
- if (is_confirmed(ecache->ct) && !is_dying(ecache->ct) && ecache->events)
- notifier_call_chain(&ip_conntrack_chain, ecache->events,
- ecache->ct);
- ecache->events = 0;
- ip_conntrack_put(ecache->ct);
- ecache->ct = NULL;
-}
-
-/* Deliver all cached events for a particular conntrack. This is called
- * by code prior to async packet handling or freeing the skb */
-void ip_ct_deliver_cached_events(const struct ip_conntrack *ct)
-{
- struct ip_conntrack_ecache *ecache;
-
- local_bh_disable();
- ecache = &__get_cpu_var(ip_conntrack_ecache);
- if (ecache->ct == ct)
- __ip_ct_deliver_cached_events(ecache);
- local_bh_enable();
-}
-
-void __ip_ct_event_cache_init(struct ip_conntrack *ct)
-{
- struct ip_conntrack_ecache *ecache;
-
- /* take care of delivering potentially old events */
- ecache = &__get_cpu_var(ip_conntrack_ecache);
- BUG_ON(ecache->ct == ct);
- if (ecache->ct)
- __ip_ct_deliver_cached_events(ecache);
- /* initialize for this conntrack/packet */
- ecache->ct = ct;
- nf_conntrack_get(&ct->ct_general);
-}
-
-/* flush the event cache - touches other CPU's data and must not be called while
- * packets are still passing through the code */
-static void ip_ct_event_cache_flush(void)
-{
- struct ip_conntrack_ecache *ecache;
- int cpu;
-
- for_each_cpu(cpu) {
- ecache = &per_cpu(ip_conntrack_ecache, cpu);
- if (ecache->ct)
- ip_conntrack_put(ecache->ct);
- }
-}
-#else
-static inline void ip_ct_event_cache_flush(void) {}
-#endif /* CONFIG_IP_NF_CONNTRACK_EVENTS */
-
-DEFINE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat);
-
-static int ip_conntrack_hash_rnd_initted;
-static unsigned int ip_conntrack_hash_rnd;
-
-static u_int32_t __hash_conntrack(const struct ip_conntrack_tuple *tuple,
- unsigned int size, unsigned int rnd)
-{
- return (jhash_3words(tuple->src.ip,
- (tuple->dst.ip ^ tuple->dst.protonum),
- (tuple->src.u.all | (tuple->dst.u.all << 16)),
- rnd) % size);
-}
-
-static u_int32_t
-hash_conntrack(const struct ip_conntrack_tuple *tuple)
-{
- return __hash_conntrack(tuple, ip_conntrack_htable_size,
- ip_conntrack_hash_rnd);
-}
-
-int
-ip_ct_get_tuple(const struct iphdr *iph,
- const struct sk_buff *skb,
- unsigned int dataoff,
- struct ip_conntrack_tuple *tuple,
- const struct ip_conntrack_protocol *protocol)
-{
- /* Never happen */
- if (iph->frag_off & htons(IP_OFFSET)) {
- printk("ip_conntrack_core: Frag of proto %u.\n",
- iph->protocol);
- return 0;
- }
-
- tuple->src.ip = iph->saddr;
- tuple->dst.ip = iph->daddr;
- tuple->dst.protonum = iph->protocol;
- tuple->dst.dir = IP_CT_DIR_ORIGINAL;
-
- return protocol->pkt_to_tuple(skb, dataoff, tuple);
-}
-
-int
-ip_ct_invert_tuple(struct ip_conntrack_tuple *inverse,
- const struct ip_conntrack_tuple *orig,
- const struct ip_conntrack_protocol *protocol)
-{
- inverse->src.ip = orig->dst.ip;
- inverse->dst.ip = orig->src.ip;
- inverse->dst.protonum = orig->dst.protonum;
- inverse->dst.dir = !orig->dst.dir;
-
- return protocol->invert_tuple(inverse, orig);
-}
-
-
-/* ip_conntrack_expect helper functions */
-void ip_ct_unlink_expect(struct ip_conntrack_expect *exp)
-{
- ASSERT_WRITE_LOCK(&ip_conntrack_lock);
- IP_NF_ASSERT(!timer_pending(&exp->timeout));
- list_del(&exp->list);
- CONNTRACK_STAT_INC(expect_delete);
- exp->master->expecting--;
- ip_conntrack_expect_put(exp);
-}
-
-static void expectation_timed_out(unsigned long ul_expect)
-{
- struct ip_conntrack_expect *exp = (void *)ul_expect;
-
- write_lock_bh(&ip_conntrack_lock);
- ip_ct_unlink_expect(exp);
- write_unlock_bh(&ip_conntrack_lock);
- ip_conntrack_expect_put(exp);
-}
-
-struct ip_conntrack_expect *
-__ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple)
-{
- struct ip_conntrack_expect *i;
-
- list_for_each_entry(i, &ip_conntrack_expect_list, list) {
- if (ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) {
- atomic_inc(&i->use);
- return i;
- }
- }
- return NULL;
-}
-
-/* Just find a expectation corresponding to a tuple. */
-struct ip_conntrack_expect *
-ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple)
-{
- struct ip_conntrack_expect *i;
-
- read_lock_bh(&ip_conntrack_lock);
- i = __ip_conntrack_expect_find(tuple);
- read_unlock_bh(&ip_conntrack_lock);
-
- return i;
-}
-
-/* If an expectation for this connection is found, it gets delete from
- * global list then returned. */
-static struct ip_conntrack_expect *
-find_expectation(const struct ip_conntrack_tuple *tuple)
-{
- struct ip_conntrack_expect *i;
-
- list_for_each_entry(i, &ip_conntrack_expect_list, list) {
- /* If master is not in hash table yet (ie. packet hasn't left
- this machine yet), how can other end know about expected?
- Hence these are not the droids you are looking for (if
- master ct never got confirmed, we'd hold a reference to it
- and weird things would happen to future packets). */
- if (ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)
- && is_confirmed(i->master)) {
- if (i->flags & IP_CT_EXPECT_PERMANENT) {
- atomic_inc(&i->use);
- return i;
- } else if (del_timer(&i->timeout)) {
- ip_ct_unlink_expect(i);
- return i;
- }
- }
- }
- return NULL;
-}
-
-/* delete all expectations for this conntrack */
-void ip_ct_remove_expectations(struct ip_conntrack *ct)
-{
- struct ip_conntrack_expect *i, *tmp;
-
- /* Optimization: most connection never expect any others. */
- if (ct->expecting == 0)
- return;
-
- list_for_each_entry_safe(i, tmp, &ip_conntrack_expect_list, list) {
- if (i->master == ct && del_timer(&i->timeout)) {
- ip_ct_unlink_expect(i);
- ip_conntrack_expect_put(i);
- }
- }
-}
-
-static void
-clean_from_lists(struct ip_conntrack *ct)
-{
- unsigned int ho, hr;
-
- DEBUGP("clean_from_lists(%p)\n", ct);
- ASSERT_WRITE_LOCK(&ip_conntrack_lock);
-
- ho = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
- hr = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
- LIST_DELETE(&ip_conntrack_hash[ho], &ct->tuplehash[IP_CT_DIR_ORIGINAL]);
- LIST_DELETE(&ip_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]);
-
- /* Destroy all pending expectations */
- ip_ct_remove_expectations(ct);
-}
-
-static void
-destroy_conntrack(struct nf_conntrack *nfct)
-{
- struct ip_conntrack *ct = (struct ip_conntrack *)nfct;
- struct ip_conntrack_protocol *proto;
-
- DEBUGP("destroy_conntrack(%p)\n", ct);
- IP_NF_ASSERT(atomic_read(&nfct->use) == 0);
- IP_NF_ASSERT(!timer_pending(&ct->timeout));
-
- ip_conntrack_event(IPCT_DESTROY, ct);
- set_bit(IPS_DYING_BIT, &ct->status);
-
- /* To make sure we don't get any weird locking issues here:
- * destroy_conntrack() MUST NOT be called with a write lock
- * to ip_conntrack_lock!!! -HW */
- proto = __ip_conntrack_proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum);
- if (proto && proto->destroy)
- proto->destroy(ct);
-
- if (ip_conntrack_destroyed)
- ip_conntrack_destroyed(ct);
-
- write_lock_bh(&ip_conntrack_lock);
- /* Expectations will have been removed in clean_from_lists,
- * except TFTP can create an expectation on the first packet,
- * before connection is in the list, so we need to clean here,
- * too. */
- ip_ct_remove_expectations(ct);
-
- /* We overload first tuple to link into unconfirmed list. */
- if (!is_confirmed(ct)) {
- BUG_ON(list_empty(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list));
- list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
- }
-
- CONNTRACK_STAT_INC(delete);
- write_unlock_bh(&ip_conntrack_lock);
-
- if (ct->master)
- ip_conntrack_put(ct->master);
-
- DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct);
- ip_conntrack_free(ct);
-}
-
-static void death_by_timeout(unsigned long ul_conntrack)
-{
- struct ip_conntrack *ct = (void *)ul_conntrack;
-
- write_lock_bh(&ip_conntrack_lock);
- /* Inside lock so preempt is disabled on module removal path.
- * Otherwise we can get spurious warnings. */
- CONNTRACK_STAT_INC(delete_list);
- clean_from_lists(ct);
- write_unlock_bh(&ip_conntrack_lock);
- ip_conntrack_put(ct);
-}
-
-static inline int
-conntrack_tuple_cmp(const struct ip_conntrack_tuple_hash *i,
- const struct ip_conntrack_tuple *tuple,
- const struct ip_conntrack *ignored_conntrack)
-{
- ASSERT_READ_LOCK(&ip_conntrack_lock);
- return tuplehash_to_ctrack(i) != ignored_conntrack
- && ip_ct_tuple_equal(tuple, &i->tuple);
-}
-
-struct ip_conntrack_tuple_hash *
-__ip_conntrack_find(const struct ip_conntrack_tuple *tuple,
- const struct ip_conntrack *ignored_conntrack)
-{
- struct ip_conntrack_tuple_hash *h;
- unsigned int hash = hash_conntrack(tuple);
-
- ASSERT_READ_LOCK(&ip_conntrack_lock);
- list_for_each_entry(h, &ip_conntrack_hash[hash], list) {
- if (conntrack_tuple_cmp(h, tuple, ignored_conntrack)) {
- CONNTRACK_STAT_INC(found);
- return h;
- }
- CONNTRACK_STAT_INC(searched);
- }
-
- return NULL;
-}
-
-/* Find a connection corresponding to a tuple. */
-struct ip_conntrack_tuple_hash *
-ip_conntrack_find_get(const struct ip_conntrack_tuple *tuple,
- const struct ip_conntrack *ignored_conntrack)
-{
- struct ip_conntrack_tuple_hash *h;
-
- read_lock_bh(&ip_conntrack_lock);
- h = __ip_conntrack_find(tuple, ignored_conntrack);
- if (h)
- atomic_inc(&tuplehash_to_ctrack(h)->ct_general.use);
- read_unlock_bh(&ip_conntrack_lock);
-
- return h;
-}
-
-static void __ip_conntrack_hash_insert(struct ip_conntrack *ct,
- unsigned int hash,
- unsigned int repl_hash)
-{
- ct->id = ++ip_conntrack_next_id;
- list_prepend(&ip_conntrack_hash[hash],
- &ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
- list_prepend(&ip_conntrack_hash[repl_hash],
- &ct->tuplehash[IP_CT_DIR_REPLY].list);
-}
-
-void ip_conntrack_hash_insert(struct ip_conntrack *ct)
-{
- unsigned int hash, repl_hash;
-
- hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
- repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
-
- write_lock_bh(&ip_conntrack_lock);
- __ip_conntrack_hash_insert(ct, hash, repl_hash);
- write_unlock_bh(&ip_conntrack_lock);
-}
-
-/* Confirm a connection given skb; places it in hash table */
-int
-__ip_conntrack_confirm(struct sk_buff **pskb)
-{
- unsigned int hash, repl_hash;
- struct ip_conntrack *ct;
- enum ip_conntrack_info ctinfo;
-
- ct = ip_conntrack_get(*pskb, &ctinfo);
-
- /* ipt_REJECT uses ip_conntrack_attach to attach related
- ICMP/TCP RST packets in other direction. Actual packet
- which created connection will be IP_CT_NEW or for an
- expected connection, IP_CT_RELATED. */
- if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
- return NF_ACCEPT;
-
- hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
- repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
-
- /* We're not in hash table, and we refuse to set up related
- connections for unconfirmed conns. But packet copies and
- REJECT will give spurious warnings here. */
- /* IP_NF_ASSERT(atomic_read(&ct->ct_general.use) == 1); */
-
- /* No external references means noone else could have
- confirmed us. */
- IP_NF_ASSERT(!is_confirmed(ct));
- DEBUGP("Confirming conntrack %p\n", ct);
-
- write_lock_bh(&ip_conntrack_lock);
-
- /* See if there's one in the list already, including reverse:
- NAT could have grabbed it without realizing, since we're
- not in the hash. If there is, we lost race. */
- if (!LIST_FIND(&ip_conntrack_hash[hash],
- conntrack_tuple_cmp,
- struct ip_conntrack_tuple_hash *,
- &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, NULL)
- && !LIST_FIND(&ip_conntrack_hash[repl_hash],
- conntrack_tuple_cmp,
- struct ip_conntrack_tuple_hash *,
- &ct->tuplehash[IP_CT_DIR_REPLY].tuple, NULL)) {
- /* Remove from unconfirmed list */
- list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
-
- __ip_conntrack_hash_insert(ct, hash, repl_hash);
- /* Timer relative to confirmation time, not original
- setting time, otherwise we'd get timer wrap in
- weird delay cases. */
- ct->timeout.expires += jiffies;
- add_timer(&ct->timeout);
- atomic_inc(&ct->ct_general.use);
- set_bit(IPS_CONFIRMED_BIT, &ct->status);
- CONNTRACK_STAT_INC(insert);
- write_unlock_bh(&ip_conntrack_lock);
- if (ct->helper)
- ip_conntrack_event_cache(IPCT_HELPER, *pskb);
-#ifdef CONFIG_IP_NF_NAT_NEEDED
- if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) ||
- test_bit(IPS_DST_NAT_DONE_BIT, &ct->status))
- ip_conntrack_event_cache(IPCT_NATINFO, *pskb);
-#endif
- ip_conntrack_event_cache(master_ct(ct) ?
- IPCT_RELATED : IPCT_NEW, *pskb);
-
- return NF_ACCEPT;
- }
-
- CONNTRACK_STAT_INC(insert_failed);
- write_unlock_bh(&ip_conntrack_lock);
-
- return NF_DROP;
-}
-
-/* Returns true if a connection correspondings to the tuple (required
- for NAT). */
-int
-ip_conntrack_tuple_taken(const struct ip_conntrack_tuple *tuple,
- const struct ip_conntrack *ignored_conntrack)
-{
- struct ip_conntrack_tuple_hash *h;
-
- read_lock_bh(&ip_conntrack_lock);
- h = __ip_conntrack_find(tuple, ignored_conntrack);
- read_unlock_bh(&ip_conntrack_lock);
-
- return h != NULL;
-}
-
-/* There's a small race here where we may free a just-assured
- connection. Too bad: we're in trouble anyway. */
-static inline int unreplied(const struct ip_conntrack_tuple_hash *i)
-{
- return !(test_bit(IPS_ASSURED_BIT, &tuplehash_to_ctrack(i)->status));
-}
-
-static int early_drop(struct list_head *chain)
-{
- /* Traverse backwards: gives us oldest, which is roughly LRU */
- struct ip_conntrack_tuple_hash *h;
- struct ip_conntrack *ct = NULL;
- int dropped = 0;
-
- read_lock_bh(&ip_conntrack_lock);
- h = LIST_FIND_B(chain, unreplied, struct ip_conntrack_tuple_hash *);
- if (h) {
- ct = tuplehash_to_ctrack(h);
- atomic_inc(&ct->ct_general.use);
- }
- read_unlock_bh(&ip_conntrack_lock);
-
- if (!ct)
- return dropped;
-
- if (del_timer(&ct->timeout)) {
- death_by_timeout((unsigned long)ct);
- dropped = 1;
- CONNTRACK_STAT_INC(early_drop);
- }
- ip_conntrack_put(ct);
- return dropped;
-}
-
-static inline int helper_cmp(const struct ip_conntrack_helper *i,
- const struct ip_conntrack_tuple *rtuple)
-{
- return ip_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask);
-}
-
-static struct ip_conntrack_helper *
-__ip_conntrack_helper_find( const struct ip_conntrack_tuple *tuple)
-{
- return LIST_FIND(&helpers, helper_cmp,
- struct ip_conntrack_helper *,
- tuple);
-}
-
-struct ip_conntrack_helper *
-ip_conntrack_helper_find_get( const struct ip_conntrack_tuple *tuple)
-{
- struct ip_conntrack_helper *helper;
-
- /* need ip_conntrack_lock to assure that helper exists until
- * try_module_get() is called */
- read_lock_bh(&ip_conntrack_lock);
-
- helper = __ip_conntrack_helper_find(tuple);
- if (helper) {
- /* need to increase module usage count to assure helper will
- * not go away while the caller is e.g. busy putting a
- * conntrack in the hash that uses the helper */
- if (!try_module_get(helper->me))
- helper = NULL;
- }
-
- read_unlock_bh(&ip_conntrack_lock);
-
- return helper;
-}
-
-void ip_conntrack_helper_put(struct ip_conntrack_helper *helper)
-{
- module_put(helper->me);
-}
-
-struct ip_conntrack_protocol *
-__ip_conntrack_proto_find(u_int8_t protocol)
-{
- return ip_ct_protos[protocol];
-}
-
-/* this is guaranteed to always return a valid protocol helper, since
- * it falls back to generic_protocol */
-struct ip_conntrack_protocol *
-ip_conntrack_proto_find_get(u_int8_t protocol)
-{
- struct ip_conntrack_protocol *p;
-
- preempt_disable();
- p = __ip_conntrack_proto_find(protocol);
- if (p) {
- if (!try_module_get(p->me))
- p = &ip_conntrack_generic_protocol;
- }
- preempt_enable();
-
- return p;
-}
-
-void ip_conntrack_proto_put(struct ip_conntrack_protocol *p)
-{
- module_put(p->me);
-}
-
-struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *orig,
- struct ip_conntrack_tuple *repl)
-{
- struct ip_conntrack *conntrack;
-
- if (!ip_conntrack_hash_rnd_initted) {
- get_random_bytes(&ip_conntrack_hash_rnd, 4);
- ip_conntrack_hash_rnd_initted = 1;
- }
-
- if (ip_conntrack_max
- && atomic_read(&ip_conntrack_count) >= ip_conntrack_max) {
- unsigned int hash = hash_conntrack(orig);
- /* Try dropping from this hash chain. */
- if (!early_drop(&ip_conntrack_hash[hash])) {
- if (net_ratelimit())
- printk(KERN_WARNING
- "ip_conntrack: table full, dropping"
- " packet.\n");
- return ERR_PTR(-ENOMEM);
- }
- }
-
- conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC);
- if (!conntrack) {
- DEBUGP("Can't allocate conntrack.\n");
- return ERR_PTR(-ENOMEM);
- }
-
- memset(conntrack, 0, sizeof(*conntrack));
- atomic_set(&conntrack->ct_general.use, 1);
- conntrack->ct_general.destroy = destroy_conntrack;
- conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
- conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *repl;
- /* Don't set timer yet: wait for confirmation */
- init_timer(&conntrack->timeout);
- conntrack->timeout.data = (unsigned long)conntrack;
- conntrack->timeout.function = death_by_timeout;
-
- atomic_inc(&ip_conntrack_count);
-
- return conntrack;
-}
-
-void
-ip_conntrack_free(struct ip_conntrack *conntrack)
-{
- atomic_dec(&ip_conntrack_count);
- kmem_cache_free(ip_conntrack_cachep, conntrack);
-}
-
-/* Allocate a new conntrack: we return -ENOMEM if classification
- * failed due to stress. Otherwise it really is unclassifiable */
-static struct ip_conntrack_tuple_hash *
-init_conntrack(struct ip_conntrack_tuple *tuple,
- struct ip_conntrack_protocol *protocol,
- struct sk_buff *skb)
-{
- struct ip_conntrack *conntrack;
- struct ip_conntrack_tuple repl_tuple;
- struct ip_conntrack_expect *exp;
-
- if (!ip_ct_invert_tuple(&repl_tuple, tuple, protocol)) {
- DEBUGP("Can't invert tuple.\n");
- return NULL;
- }
-
- conntrack = ip_conntrack_alloc(tuple, &repl_tuple);
- if (conntrack == NULL || IS_ERR(conntrack))
- return (struct ip_conntrack_tuple_hash *)conntrack;
-
- if (!protocol->new(conntrack, skb)) {
- ip_conntrack_free(conntrack);
- return NULL;
- }
-
- write_lock_bh(&ip_conntrack_lock);
- exp = find_expectation(tuple);
-
- if (exp) {
- DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n",
- conntrack, exp);
- /* Welcome, Mr. Bond. We've been expecting you... */
- __set_bit(IPS_EXPECTED_BIT, &conntrack->status);
- conntrack->master = exp->master;
-#ifdef CONFIG_IP_NF_CONNTRACK_MARK
- conntrack->mark = exp->master->mark;
-#endif
-#if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || \
- defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE)
- /* this is ugly, but there is no other place where to put it */
- conntrack->nat.masq_index = exp->master->nat.masq_index;
-#endif
- nf_conntrack_get(&conntrack->master->ct_general);
- CONNTRACK_STAT_INC(expect_new);
- } else {
- conntrack->helper = __ip_conntrack_helper_find(&repl_tuple);
-
- CONNTRACK_STAT_INC(new);
- }
-
- /* Overload tuple linked list to put us in unconfirmed list. */
- list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list, &unconfirmed);
-
- write_unlock_bh(&ip_conntrack_lock);
-
- if (exp) {
- if (exp->expectfn)
- exp->expectfn(conntrack, exp);
- ip_conntrack_expect_put(exp);
- }
-
- return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL];
-}
-
-/* On success, returns conntrack ptr, sets skb->nfct and ctinfo */
-static inline struct ip_conntrack *
-resolve_normal_ct(struct sk_buff *skb,
- struct ip_conntrack_protocol *proto,
- int *set_reply,
- unsigned int hooknum,
- enum ip_conntrack_info *ctinfo)
-{
- struct ip_conntrack_tuple tuple;
- struct ip_conntrack_tuple_hash *h;
- struct ip_conntrack *ct;
-
- IP_NF_ASSERT((skb->nh.iph->frag_off & htons(IP_OFFSET)) == 0);
-
- if (!ip_ct_get_tuple(skb->nh.iph, skb, skb->nh.iph->ihl*4,
- &tuple,proto))
- return NULL;
-
- /* look for tuple match */
- h = ip_conntrack_find_get(&tuple, NULL);
- if (!h) {
- h = init_conntrack(&tuple, proto, skb);
- if (!h)
- return NULL;
- if (IS_ERR(h))
- return (void *)h;
- }
- ct = tuplehash_to_ctrack(h);
-
- /* It exists; we have (non-exclusive) reference. */
- if (DIRECTION(h) == IP_CT_DIR_REPLY) {
- *ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY;
- /* Please set reply bit if this packet OK */
- *set_reply = 1;
- } else {
- /* Once we've had two way comms, always ESTABLISHED. */
- if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
- DEBUGP("ip_conntrack_in: normal packet for %p\n",
- ct);
- *ctinfo = IP_CT_ESTABLISHED;
- } else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) {
- DEBUGP("ip_conntrack_in: related packet for %p\n",
- ct);
- *ctinfo = IP_CT_RELATED;
- } else {
- DEBUGP("ip_conntrack_in: new packet for %p\n",
- ct);
- *ctinfo = IP_CT_NEW;
- }
- *set_reply = 0;
- }
- skb->nfct = &ct->ct_general;
- skb->nfctinfo = *ctinfo;
- return ct;
-}
-
-/* Netfilter hook itself. */
-unsigned int ip_conntrack_in(unsigned int hooknum,
- struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
-{
- struct ip_conntrack *ct;
- enum ip_conntrack_info ctinfo;
- struct ip_conntrack_protocol *proto;
- int set_reply = 0;
- int ret;
-
- /* Previously seen (loopback or untracked)? Ignore. */
- if ((*pskb)->nfct) {
- CONNTRACK_STAT_INC(ignore);
- return NF_ACCEPT;
- }
-
- /* Never happen */
- if ((*pskb)->nh.iph->frag_off & htons(IP_OFFSET)) {
- if (net_ratelimit()) {
- printk(KERN_ERR "ip_conntrack_in: Frag of proto %u (hook=%u)\n",
- (*pskb)->nh.iph->protocol, hooknum);
- }
- return NF_DROP;
- }
-
-/* Doesn't cover locally-generated broadcast, so not worth it. */
-#if 0
- /* Ignore broadcast: no `connection'. */
- if ((*pskb)->pkt_type == PACKET_BROADCAST) {
- printk("Broadcast packet!\n");
- return NF_ACCEPT;
- } else if (((*pskb)->nh.iph->daddr & htonl(0x000000FF))
- == htonl(0x000000FF)) {
- printk("Should bcast: %u.%u.%u.%u->%u.%u.%u.%u (sk=%p, ptype=%u)\n",
- NIPQUAD((*pskb)->nh.iph->saddr),
- NIPQUAD((*pskb)->nh.iph->daddr),
- (*pskb)->sk, (*pskb)->pkt_type);
- }
-#endif
-
- proto = __ip_conntrack_proto_find((*pskb)->nh.iph->protocol);
-
- /* It may be an special packet, error, unclean...
- * inverse of the return code tells to the netfilter
- * core what to do with the packet. */
- if (proto->error != NULL
- && (ret = proto->error(*pskb, &ctinfo, hooknum)) <= 0) {
- CONNTRACK_STAT_INC(error);
- CONNTRACK_STAT_INC(invalid);
- return -ret;
- }
-
- if (!(ct = resolve_normal_ct(*pskb, proto,&set_reply,hooknum,&ctinfo))) {
- /* Not valid part of a connection */
- CONNTRACK_STAT_INC(invalid);
- return NF_ACCEPT;
- }
-
- if (IS_ERR(ct)) {
- /* Too stressed to deal. */
- CONNTRACK_STAT_INC(drop);
- return NF_DROP;
- }
-
- IP_NF_ASSERT((*pskb)->nfct);
-
- ret = proto->packet(ct, *pskb, ctinfo);
- if (ret < 0) {
- /* Invalid: inverse of the return code tells
- * the netfilter core what to do*/
- nf_conntrack_put((*pskb)->nfct);
- (*pskb)->nfct = NULL;
- CONNTRACK_STAT_INC(invalid);
- return -ret;
- }
-
- if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
- ip_conntrack_event_cache(IPCT_STATUS, *pskb);
-
- return ret;
-}
-
-int invert_tuplepr(struct ip_conntrack_tuple *inverse,
- const struct ip_conntrack_tuple *orig)
-{
- return ip_ct_invert_tuple(inverse, orig,
- __ip_conntrack_proto_find(orig->dst.protonum));
-}
-
-/* Would two expected things clash? */
-static inline int expect_clash(const struct ip_conntrack_expect *a,
- const struct ip_conntrack_expect *b)
-{
- /* Part covered by intersection of masks must be unequal,
- otherwise they clash */
- struct ip_conntrack_tuple intersect_mask
- = { { a->mask.src.ip & b->mask.src.ip,
- { a->mask.src.u.all & b->mask.src.u.all } },
- { a->mask.dst.ip & b->mask.dst.ip,
- { a->mask.dst.u.all & b->mask.dst.u.all },
- a->mask.dst.protonum & b->mask.dst.protonum } };
-
- return ip_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask);
-}
-
-static inline int expect_matches(const struct ip_conntrack_expect *a,
- const struct ip_conntrack_expect *b)
-{
- return a->master == b->master
- && ip_ct_tuple_equal(&a->tuple, &b->tuple)
- && ip_ct_tuple_equal(&a->mask, &b->mask);
-}
-
-/* Generally a bad idea to call this: could have matched already. */
-void ip_conntrack_unexpect_related(struct ip_conntrack_expect *exp)
-{
- struct ip_conntrack_expect *i;
-
- write_lock_bh(&ip_conntrack_lock);
- /* choose the the oldest expectation to evict */
- list_for_each_entry_reverse(i, &ip_conntrack_expect_list, list) {
- if (expect_matches(i, exp) && del_timer(&i->timeout)) {
- ip_ct_unlink_expect(i);
- write_unlock_bh(&ip_conntrack_lock);
- ip_conntrack_expect_put(i);
- return;
- }
- }
- write_unlock_bh(&ip_conntrack_lock);
-}
-
-/* We don't increase the master conntrack refcount for non-fulfilled
- * conntracks. During the conntrack destruction, the expectations are
- * always killed before the conntrack itself */
-struct ip_conntrack_expect *ip_conntrack_expect_alloc(struct ip_conntrack *me)
-{
- struct ip_conntrack_expect *new;
-
- new = kmem_cache_alloc(ip_conntrack_expect_cachep, GFP_ATOMIC);
- if (!new) {
- DEBUGP("expect_related: OOM allocating expect\n");
- return NULL;
- }
- new->master = me;
- atomic_set(&new->use, 1);
- return new;
-}
-
-void ip_conntrack_expect_put(struct ip_conntrack_expect *exp)
-{
- if (atomic_dec_and_test(&exp->use))
- kmem_cache_free(ip_conntrack_expect_cachep, exp);
-}
-
-static void ip_conntrack_expect_insert(struct ip_conntrack_expect *exp)
-{
- atomic_inc(&exp->use);
- exp->master->expecting++;
- list_add(&exp->list, &ip_conntrack_expect_list);
-
- init_timer(&exp->timeout);
- exp->timeout.data = (unsigned long)exp;
- exp->timeout.function = expectation_timed_out;
- exp->timeout.expires = jiffies + exp->master->helper->timeout * HZ;
- add_timer(&exp->timeout);
-
- exp->id = ++ip_conntrack_expect_next_id;
- atomic_inc(&exp->use);
- CONNTRACK_STAT_INC(expect_create);
-}
-
-/* Race with expectations being used means we could have none to find; OK. */
-static void evict_oldest_expect(struct ip_conntrack *master)
-{
- struct ip_conntrack_expect *i;
-
- list_for_each_entry_reverse(i, &ip_conntrack_expect_list, list) {
- if (i->master == master) {
- if (del_timer(&i->timeout)) {
- ip_ct_unlink_expect(i);
- ip_conntrack_expect_put(i);
- }
- break;
- }
- }
-}
-
-static inline int refresh_timer(struct ip_conntrack_expect *i)
-{
- if (!del_timer(&i->timeout))
- return 0;
-
- i->timeout.expires = jiffies + i->master->helper->timeout*HZ;
- add_timer(&i->timeout);
- return 1;
-}
-
-int ip_conntrack_expect_related(struct ip_conntrack_expect *expect)
-{
- struct ip_conntrack_expect *i;
- int ret;
-
- DEBUGP("ip_conntrack_expect_related %p\n", related_to);
- DEBUGP("tuple: "); DUMP_TUPLE(&expect->tuple);
- DEBUGP("mask: "); DUMP_TUPLE(&expect->mask);
-
- write_lock_bh(&ip_conntrack_lock);
- list_for_each_entry(i, &ip_conntrack_expect_list, list) {
- if (expect_matches(i, expect)) {
- /* Refresh timer: if it's dying, ignore.. */
- if (refresh_timer(i)) {
- ret = 0;
- goto out;
- }
- } else if (expect_clash(i, expect)) {
- ret = -EBUSY;
- goto out;
- }
- }
-
- /* Will be over limit? */
- if (expect->master->helper->max_expected &&
- expect->master->expecting >= expect->master->helper->max_expected)
- evict_oldest_expect(expect->master);
-
- ip_conntrack_expect_insert(expect);
- ip_conntrack_expect_event(IPEXP_NEW, expect);
- ret = 0;
-out:
- write_unlock_bh(&ip_conntrack_lock);
- return ret;
-}
-
-/* Alter reply tuple (maybe alter helper). This is for NAT, and is
- implicitly racy: see __ip_conntrack_confirm */
-void ip_conntrack_alter_reply(struct ip_conntrack *conntrack,
- const struct ip_conntrack_tuple *newreply)
-{
- write_lock_bh(&ip_conntrack_lock);
- /* Should be unconfirmed, so not in hash table yet */
- IP_NF_ASSERT(!is_confirmed(conntrack));
-
- DEBUGP("Altering reply tuple of %p to ", conntrack);
- DUMP_TUPLE(newreply);
-
- conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply;
- if (!conntrack->master && conntrack->expecting == 0)
- conntrack->helper = __ip_conntrack_helper_find(newreply);
- write_unlock_bh(&ip_conntrack_lock);
-}
-
-int ip_conntrack_helper_register(struct ip_conntrack_helper *me)
-{
- BUG_ON(me->timeout == 0);
- write_lock_bh(&ip_conntrack_lock);
- list_prepend(&helpers, me);
- write_unlock_bh(&ip_conntrack_lock);
-
- return 0;
-}
-
-struct ip_conntrack_helper *
-__ip_conntrack_helper_find_byname(const char *name)
-{
- struct ip_conntrack_helper *h;
-
- list_for_each_entry(h, &helpers, list) {
- if (!strcmp(h->name, name))
- return h;
- }
-
- return NULL;
-}
-
-static inline int unhelp(struct ip_conntrack_tuple_hash *i,
- const struct ip_conntrack_helper *me)
-{
- if (tuplehash_to_ctrack(i)->helper == me) {
- ip_conntrack_event(IPCT_HELPER, tuplehash_to_ctrack(i));
- tuplehash_to_ctrack(i)->helper = NULL;
- }
- return 0;
-}
-
-void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me)
-{
- unsigned int i;
- struct ip_conntrack_expect *exp, *tmp;
-
- /* Need write lock here, to delete helper. */
- write_lock_bh(&ip_conntrack_lock);
- LIST_DELETE(&helpers, me);
-
- /* Get rid of expectations */
- list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list, list) {
- if (exp->master->helper == me && del_timer(&exp->timeout)) {
- ip_ct_unlink_expect(exp);
- ip_conntrack_expect_put(exp);
- }
- }
- /* Get rid of expecteds, set helpers to NULL. */
- LIST_FIND_W(&unconfirmed, unhelp, struct ip_conntrack_tuple_hash*, me);
- for (i = 0; i < ip_conntrack_htable_size; i++)
- LIST_FIND_W(&ip_conntrack_hash[i], unhelp,
- struct ip_conntrack_tuple_hash *, me);
- write_unlock_bh(&ip_conntrack_lock);
-
- /* Someone could be still looking at the helper in a bh. */
- synchronize_net();
-}
-
-/* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */
-void __ip_ct_refresh_acct(struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- const struct sk_buff *skb,
- unsigned long extra_jiffies,
- int do_acct)
-{
- int event = 0;
-
- IP_NF_ASSERT(ct->timeout.data == (unsigned long)ct);
- IP_NF_ASSERT(skb);
-
- write_lock_bh(&ip_conntrack_lock);
-
- /* If not in hash table, timer will not be active yet */
- if (!is_confirmed(ct)) {
- ct->timeout.expires = extra_jiffies;
- event = IPCT_REFRESH;
- } else {
- /* Need del_timer for race avoidance (may already be dying). */
- if (del_timer(&ct->timeout)) {
- ct->timeout.expires = jiffies + extra_jiffies;
- add_timer(&ct->timeout);
- event = IPCT_REFRESH;
- }
- }
-
-#ifdef CONFIG_IP_NF_CT_ACCT
- if (do_acct) {
- ct->counters[CTINFO2DIR(ctinfo)].packets++;
- ct->counters[CTINFO2DIR(ctinfo)].bytes +=
- ntohs(skb->nh.iph->tot_len);
- if ((ct->counters[CTINFO2DIR(ctinfo)].packets & 0x80000000)
- || (ct->counters[CTINFO2DIR(ctinfo)].bytes & 0x80000000))
- event |= IPCT_COUNTER_FILLING;
- }
-#endif
-
- write_unlock_bh(&ip_conntrack_lock);
-
- /* must be unlocked when calling event cache */
- if (event)
- ip_conntrack_event_cache(event, skb);
-}
-
-#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
- defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
-/* Generic function for tcp/udp/sctp/dccp and alike. This needs to be
- * in ip_conntrack_core, since we don't want the protocols to autoload
- * or depend on ctnetlink */
-int ip_ct_port_tuple_to_nfattr(struct sk_buff *skb,
- const struct ip_conntrack_tuple *tuple)
-{
- NFA_PUT(skb, CTA_PROTO_SRC_PORT, sizeof(u_int16_t),
- &tuple->src.u.tcp.port);
- NFA_PUT(skb, CTA_PROTO_DST_PORT, sizeof(u_int16_t),
- &tuple->dst.u.tcp.port);
- return 0;
-
-nfattr_failure:
- return -1;
-}
-
-int ip_ct_port_nfattr_to_tuple(struct nfattr *tb[],
- struct ip_conntrack_tuple *t)
-{
- if (!tb[CTA_PROTO_SRC_PORT-1] || !tb[CTA_PROTO_DST_PORT-1])
- return -EINVAL;
-
- t->src.u.tcp.port =
- *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_SRC_PORT-1]);
- t->dst.u.tcp.port =
- *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_DST_PORT-1]);
-
- return 0;
-}
-#endif
-
-/* Returns new sk_buff, or NULL */
-struct sk_buff *
-ip_ct_gather_frags(struct sk_buff *skb, u_int32_t user)
-{
- skb_orphan(skb);
-
- local_bh_disable();
- skb = ip_defrag(skb, user);
- local_bh_enable();
-
- if (skb)
- ip_send_check(skb->nh.iph);
- return skb;
-}
-
-/* Used by ipt_REJECT. */
-static void ip_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb)
-{
- struct ip_conntrack *ct;
- enum ip_conntrack_info ctinfo;
-
- /* This ICMP is in reverse direction to the packet which caused it */
- ct = ip_conntrack_get(skb, &ctinfo);
-
- if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL)
- ctinfo = IP_CT_RELATED + IP_CT_IS_REPLY;
- else
- ctinfo = IP_CT_RELATED;
-
- /* Attach to new skbuff, and increment count */
- nskb->nfct = &ct->ct_general;
- nskb->nfctinfo = ctinfo;
- nf_conntrack_get(nskb->nfct);
-}
-
-static inline int
-do_iter(const struct ip_conntrack_tuple_hash *i,
- int (*iter)(struct ip_conntrack *i, void *data),
- void *data)
-{
- return iter(tuplehash_to_ctrack(i), data);
-}
-
-/* Bring out ya dead! */
-static struct ip_conntrack_tuple_hash *
-get_next_corpse(int (*iter)(struct ip_conntrack *i, void *data),
- void *data, unsigned int *bucket)
-{
- struct ip_conntrack_tuple_hash *h = NULL;
-
- write_lock_bh(&ip_conntrack_lock);
- for (; *bucket < ip_conntrack_htable_size; (*bucket)++) {
- h = LIST_FIND_W(&ip_conntrack_hash[*bucket], do_iter,
- struct ip_conntrack_tuple_hash *, iter, data);
- if (h)
- break;
- }
- if (!h)
- h = LIST_FIND_W(&unconfirmed, do_iter,
- struct ip_conntrack_tuple_hash *, iter, data);
- if (h)
- atomic_inc(&tuplehash_to_ctrack(h)->ct_general.use);
- write_unlock_bh(&ip_conntrack_lock);
-
- return h;
-}
-
-void
-ip_ct_iterate_cleanup(int (*iter)(struct ip_conntrack *i, void *), void *data)
-{
- struct ip_conntrack_tuple_hash *h;
- unsigned int bucket = 0;
-
- while ((h = get_next_corpse(iter, data, &bucket)) != NULL) {
- struct ip_conntrack *ct = tuplehash_to_ctrack(h);
- /* Time to push up daises... */
- if (del_timer(&ct->timeout))
- death_by_timeout((unsigned long)ct);
- /* ... else the timer will get him soon. */
-
- ip_conntrack_put(ct);
- }
-}
-
-/* Fast function for those who don't want to parse /proc (and I don't
- blame them). */
-/* Reversing the socket's dst/src point of view gives us the reply
- mapping. */
-static int
-getorigdst(struct sock *sk, int optval, void __user *user, int *len)
-{
- struct inet_sock *inet = inet_sk(sk);
- struct ip_conntrack_tuple_hash *h;
- struct ip_conntrack_tuple tuple;
-
- IP_CT_TUPLE_U_BLANK(&tuple);
- tuple.src.ip = inet->rcv_saddr;
- tuple.src.u.tcp.port = inet->sport;
- tuple.dst.ip = inet->daddr;
- tuple.dst.u.tcp.port = inet->dport;
- tuple.dst.protonum = IPPROTO_TCP;
-
- /* We only do TCP at the moment: is there a better way? */
- if (strcmp(sk->sk_prot->name, "TCP")) {
- DEBUGP("SO_ORIGINAL_DST: Not a TCP socket\n");
- return -ENOPROTOOPT;
- }
-
- if ((unsigned int) *len < sizeof(struct sockaddr_in)) {
- DEBUGP("SO_ORIGINAL_DST: len %u not %u\n",
- *len, sizeof(struct sockaddr_in));
- return -EINVAL;
- }
-
- h = ip_conntrack_find_get(&tuple, NULL);
- if (h) {
- struct sockaddr_in sin;
- struct ip_conntrack *ct = tuplehash_to_ctrack(h);
-
- sin.sin_family = AF_INET;
- sin.sin_port = ct->tuplehash[IP_CT_DIR_ORIGINAL]
- .tuple.dst.u.tcp.port;
- sin.sin_addr.s_addr = ct->tuplehash[IP_CT_DIR_ORIGINAL]
- .tuple.dst.ip;
-
- DEBUGP("SO_ORIGINAL_DST: %u.%u.%u.%u %u\n",
- NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port));
- ip_conntrack_put(ct);
- if (copy_to_user(user, &sin, sizeof(sin)) != 0)
- return -EFAULT;
- else
- return 0;
- }
- DEBUGP("SO_ORIGINAL_DST: Can't find %u.%u.%u.%u/%u-%u.%u.%u.%u/%u.\n",
- NIPQUAD(tuple.src.ip), ntohs(tuple.src.u.tcp.port),
- NIPQUAD(tuple.dst.ip), ntohs(tuple.dst.u.tcp.port));
- return -ENOENT;
-}
-
-static struct nf_sockopt_ops so_getorigdst = {
- .pf = PF_INET,
- .get_optmin = SO_ORIGINAL_DST,
- .get_optmax = SO_ORIGINAL_DST+1,
- .get = &getorigdst,
-};
-
-static int kill_all(struct ip_conntrack *i, void *data)
-{
- return 1;
-}
-
-void ip_conntrack_flush(void)
-{
- ip_ct_iterate_cleanup(kill_all, NULL);
-}
-
-static void free_conntrack_hash(struct list_head *hash, int vmalloced,int size)
-{
- if (vmalloced)
- vfree(hash);
- else
- free_pages((unsigned long)hash,
- get_order(sizeof(struct list_head) * size));
-}
-
-/* Mishearing the voices in his head, our hero wonders how he's
- supposed to kill the mall. */
-void ip_conntrack_cleanup(void)
-{
- ip_ct_attach = NULL;
-
- /* This makes sure all current packets have passed through
- netfilter framework. Roll on, two-stage module
- delete... */
- synchronize_net();
-
- ip_ct_event_cache_flush();
- i_see_dead_people:
- ip_conntrack_flush();
- if (atomic_read(&ip_conntrack_count) != 0) {
- schedule();
- goto i_see_dead_people;
- }
- /* wait until all references to ip_conntrack_untracked are dropped */
- while (atomic_read(&ip_conntrack_untracked.ct_general.use) > 1)
- schedule();
-
- kmem_cache_destroy(ip_conntrack_cachep);
- kmem_cache_destroy(ip_conntrack_expect_cachep);
- free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc,
- ip_conntrack_htable_size);
- nf_unregister_sockopt(&so_getorigdst);
-}
-
-static struct list_head *alloc_hashtable(int size, int *vmalloced)
-{
- struct list_head *hash;
- unsigned int i;
-
- *vmalloced = 0;
- hash = (void*)__get_free_pages(GFP_KERNEL,
- get_order(sizeof(struct list_head)
- * size));
- if (!hash) {
- *vmalloced = 1;
- printk(KERN_WARNING"ip_conntrack: falling back to vmalloc.\n");
- hash = vmalloc(sizeof(struct list_head) * size);
- }
-
- if (hash)
- for (i = 0; i < size; i++)
- INIT_LIST_HEAD(&hash[i]);
-
- return hash;
-}
-
-static int set_hashsize(const char *val, struct kernel_param *kp)
-{
- int i, bucket, hashsize, vmalloced;
- int old_vmalloced, old_size;
- int rnd;
- struct list_head *hash, *old_hash;
- struct ip_conntrack_tuple_hash *h;
-
- /* On boot, we can set this without any fancy locking. */
- if (!ip_conntrack_htable_size)
- return param_set_int(val, kp);
-
- hashsize = simple_strtol(val, NULL, 0);
- if (!hashsize)
- return -EINVAL;
-
- hash = alloc_hashtable(hashsize, &vmalloced);
- if (!hash)
- return -ENOMEM;
-
- /* We have to rehash for the new table anyway, so we also can
- * use a new random seed */
- get_random_bytes(&rnd, 4);
-
- write_lock_bh(&ip_conntrack_lock);
- for (i = 0; i < ip_conntrack_htable_size; i++) {
- while (!list_empty(&ip_conntrack_hash[i])) {
- h = list_entry(ip_conntrack_hash[i].next,
- struct ip_conntrack_tuple_hash, list);
- list_del(&h->list);
- bucket = __hash_conntrack(&h->tuple, hashsize, rnd);
- list_add_tail(&h->list, &hash[bucket]);
- }
- }
- old_size = ip_conntrack_htable_size;
- old_vmalloced = ip_conntrack_vmalloc;
- old_hash = ip_conntrack_hash;
-
- ip_conntrack_htable_size = hashsize;
- ip_conntrack_vmalloc = vmalloced;
- ip_conntrack_hash = hash;
- ip_conntrack_hash_rnd = rnd;
- write_unlock_bh(&ip_conntrack_lock);
-
- free_conntrack_hash(old_hash, old_vmalloced, old_size);
- return 0;
-}
-
-module_param_call(hashsize, set_hashsize, param_get_uint,
- &ip_conntrack_htable_size, 0600);
-
-int __init ip_conntrack_init(void)
-{
- unsigned int i;
- int ret;
-
- /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB
- * machine has 256 buckets. >= 1GB machines have 8192 buckets. */
- if (!ip_conntrack_htable_size) {
- ip_conntrack_htable_size
- = (((num_physpages << PAGE_SHIFT) / 16384)
- / sizeof(struct list_head));
- if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE))
- ip_conntrack_htable_size = 8192;
- if (ip_conntrack_htable_size < 16)
- ip_conntrack_htable_size = 16;
- }
- ip_conntrack_max = 8 * ip_conntrack_htable_size;
-
- printk("ip_conntrack version %s (%u buckets, %d max)"
- " - %Zd bytes per conntrack\n", IP_CONNTRACK_VERSION,
- ip_conntrack_htable_size, ip_conntrack_max,
- sizeof(struct ip_conntrack));
-
- ret = nf_register_sockopt(&so_getorigdst);
- if (ret != 0) {
- printk(KERN_ERR "Unable to register netfilter socket option\n");
- return ret;
- }
-
- ip_conntrack_hash = alloc_hashtable(ip_conntrack_htable_size,
- &ip_conntrack_vmalloc);
- if (!ip_conntrack_hash) {
- printk(KERN_ERR "Unable to create ip_conntrack_hash\n");
- goto err_unreg_sockopt;
- }
-
- ip_conntrack_cachep = kmem_cache_create("ip_conntrack",
- sizeof(struct ip_conntrack), 0,
- 0, NULL, NULL);
- if (!ip_conntrack_cachep) {
- printk(KERN_ERR "Unable to create ip_conntrack slab cache\n");
- goto err_free_hash;
- }
-
- ip_conntrack_expect_cachep = kmem_cache_create("ip_conntrack_expect",
- sizeof(struct ip_conntrack_expect),
- 0, 0, NULL, NULL);
- if (!ip_conntrack_expect_cachep) {
- printk(KERN_ERR "Unable to create ip_expect slab cache\n");
- goto err_free_conntrack_slab;
- }
-
- /* Don't NEED lock here, but good form anyway. */
- write_lock_bh(&ip_conntrack_lock);
- for (i = 0; i < MAX_IP_CT_PROTO; i++)
- ip_ct_protos[i] = &ip_conntrack_generic_protocol;
- /* Sew in builtin protocols. */
- ip_ct_protos[IPPROTO_TCP] = &ip_conntrack_protocol_tcp;
- ip_ct_protos[IPPROTO_UDP] = &ip_conntrack_protocol_udp;
- ip_ct_protos[IPPROTO_ICMP] = &ip_conntrack_protocol_icmp;
- write_unlock_bh(&ip_conntrack_lock);
-
- /* For use by ipt_REJECT */
- ip_ct_attach = ip_conntrack_attach;
-
- /* Set up fake conntrack:
- - to never be deleted, not in any hashes */
- atomic_set(&ip_conntrack_untracked.ct_general.use, 1);
- /* - and look it like as a confirmed connection */
- set_bit(IPS_CONFIRMED_BIT, &ip_conntrack_untracked.status);
-
- return ret;
-
-err_free_conntrack_slab:
- kmem_cache_destroy(ip_conntrack_cachep);
-err_free_hash:
- free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc,
- ip_conntrack_htable_size);
-err_unreg_sockopt:
- nf_unregister_sockopt(&so_getorigdst);
-
- return -ENOMEM;
-}
diff --git a/net/ipv4/netfilter/ip_conntrack_ftp.c b/net/ipv4/netfilter/ip_conntrack_ftp.c
deleted file mode 100644
index e627e585617..00000000000
--- a/net/ipv4/netfilter/ip_conntrack_ftp.c
+++ /dev/null
@@ -1,510 +0,0 @@
-/* FTP extension for IP connection tracking. */
-
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/netfilter.h>
-#include <linux/ip.h>
-#include <linux/ctype.h>
-#include <net/checksum.h>
-#include <net/tcp.h>
-
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_ftp.h>
-#include <linux/moduleparam.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
-MODULE_DESCRIPTION("ftp connection tracking helper");
-
-/* This is slow, but it's simple. --RR */
-static char *ftp_buffer;
-static DEFINE_SPINLOCK(ip_ftp_lock);
-
-#define MAX_PORTS 8
-static unsigned short ports[MAX_PORTS];
-static int ports_c;
-module_param_array(ports, ushort, &ports_c, 0400);
-
-static int loose;
-module_param(loose, bool, 0600);
-
-unsigned int (*ip_nat_ftp_hook)(struct sk_buff **pskb,
- enum ip_conntrack_info ctinfo,
- enum ip_ct_ftp_type type,
- unsigned int matchoff,
- unsigned int matchlen,
- struct ip_conntrack_expect *exp,
- u32 *seq);
-EXPORT_SYMBOL_GPL(ip_nat_ftp_hook);
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-static int try_rfc959(const char *, size_t, u_int32_t [], char);
-static int try_eprt(const char *, size_t, u_int32_t [], char);
-static int try_epsv_response(const char *, size_t, u_int32_t [], char);
-
-static const struct ftp_search {
- enum ip_conntrack_dir dir;
- const char *pattern;
- size_t plen;
- char skip;
- char term;
- enum ip_ct_ftp_type ftptype;
- int (*getnum)(const char *, size_t, u_int32_t[], char);
-} search[] = {
- {
- IP_CT_DIR_ORIGINAL,
- "PORT", sizeof("PORT") - 1, ' ', '\r',
- IP_CT_FTP_PORT,
- try_rfc959,
- },
- {
- IP_CT_DIR_REPLY,
- "227 ", sizeof("227 ") - 1, '(', ')',
- IP_CT_FTP_PASV,
- try_rfc959,
- },
- {
- IP_CT_DIR_ORIGINAL,
- "EPRT", sizeof("EPRT") - 1, ' ', '\r',
- IP_CT_FTP_EPRT,
- try_eprt,
- },
- {
- IP_CT_DIR_REPLY,
- "229 ", sizeof("229 ") - 1, '(', ')',
- IP_CT_FTP_EPSV,
- try_epsv_response,
- },
-};
-
-static int try_number(const char *data, size_t dlen, u_int32_t array[],
- int array_size, char sep, char term)
-{
- u_int32_t i, len;
-
- memset(array, 0, sizeof(array[0])*array_size);
-
- /* Keep data pointing at next char. */
- for (i = 0, len = 0; len < dlen && i < array_size; len++, data++) {
- if (*data >= '0' && *data <= '9') {
- array[i] = array[i]*10 + *data - '0';
- }
- else if (*data == sep)
- i++;
- else {
- /* Unexpected character; true if it's the
- terminator and we're finished. */
- if (*data == term && i == array_size - 1)
- return len;
-
- DEBUGP("Char %u (got %u nums) `%u' unexpected\n",
- len, i, *data);
- return 0;
- }
- }
- DEBUGP("Failed to fill %u numbers separated by %c\n", array_size, sep);
-
- return 0;
-}
-
-/* Returns 0, or length of numbers: 192,168,1,1,5,6 */
-static int try_rfc959(const char *data, size_t dlen, u_int32_t array[6],
- char term)
-{
- return try_number(data, dlen, array, 6, ',', term);
-}
-
-/* Grab port: number up to delimiter */
-static int get_port(const char *data, int start, size_t dlen, char delim,
- u_int32_t array[2])
-{
- u_int16_t port = 0;
- int i;
-
- for (i = start; i < dlen; i++) {
- /* Finished? */
- if (data[i] == delim) {
- if (port == 0)
- break;
- array[0] = port >> 8;
- array[1] = port;
- return i + 1;
- }
- else if (data[i] >= '0' && data[i] <= '9')
- port = port*10 + data[i] - '0';
- else /* Some other crap */
- break;
- }
- return 0;
-}
-
-/* Returns 0, or length of numbers: |1|132.235.1.2|6275| */
-static int try_eprt(const char *data, size_t dlen, u_int32_t array[6],
- char term)
-{
- char delim;
- int length;
-
- /* First character is delimiter, then "1" for IPv4, then
- delimiter again. */
- if (dlen <= 3) return 0;
- delim = data[0];
- if (isdigit(delim) || delim < 33 || delim > 126
- || data[1] != '1' || data[2] != delim)
- return 0;
-
- DEBUGP("EPRT: Got |1|!\n");
- /* Now we have IP address. */
- length = try_number(data + 3, dlen - 3, array, 4, '.', delim);
- if (length == 0)
- return 0;
-
- DEBUGP("EPRT: Got IP address!\n");
- /* Start offset includes initial "|1|", and trailing delimiter */
- return get_port(data, 3 + length + 1, dlen, delim, array+4);
-}
-
-/* Returns 0, or length of numbers: |||6446| */
-static int try_epsv_response(const char *data, size_t dlen, u_int32_t array[6],
- char term)
-{
- char delim;
-
- /* Three delimiters. */
- if (dlen <= 3) return 0;
- delim = data[0];
- if (isdigit(delim) || delim < 33 || delim > 126
- || data[1] != delim || data[2] != delim)
- return 0;
-
- return get_port(data, 3, dlen, delim, array+4);
-}
-
-/* Return 1 for match, 0 for accept, -1 for partial. */
-static int find_pattern(const char *data, size_t dlen,
- const char *pattern, size_t plen,
- char skip, char term,
- unsigned int *numoff,
- unsigned int *numlen,
- u_int32_t array[6],
- int (*getnum)(const char *, size_t, u_int32_t[], char))
-{
- size_t i;
-
- DEBUGP("find_pattern `%s': dlen = %u\n", pattern, dlen);
- if (dlen == 0)
- return 0;
-
- if (dlen <= plen) {
- /* Short packet: try for partial? */
- if (strnicmp(data, pattern, dlen) == 0)
- return -1;
- else return 0;
- }
-
- if (strnicmp(data, pattern, plen) != 0) {
-#if 0
- size_t i;
-
- DEBUGP("ftp: string mismatch\n");
- for (i = 0; i < plen; i++) {
- DEBUGP("ftp:char %u `%c'(%u) vs `%c'(%u)\n",
- i, data[i], data[i],
- pattern[i], pattern[i]);
- }
-#endif
- return 0;
- }
-
- DEBUGP("Pattern matches!\n");
- /* Now we've found the constant string, try to skip
- to the 'skip' character */
- for (i = plen; data[i] != skip; i++)
- if (i == dlen - 1) return -1;
-
- /* Skip over the last character */
- i++;
-
- DEBUGP("Skipped up to `%c'!\n", skip);
-
- *numoff = i;
- *numlen = getnum(data + i, dlen - i, array, term);
- if (!*numlen)
- return -1;
-
- DEBUGP("Match succeeded!\n");
- return 1;
-}
-
-/* Look up to see if we're just after a \n. */
-static int find_nl_seq(u32 seq, const struct ip_ct_ftp_master *info, int dir)
-{
- unsigned int i;
-
- for (i = 0; i < info->seq_aft_nl_num[dir]; i++)
- if (info->seq_aft_nl[dir][i] == seq)
- return 1;
- return 0;
-}
-
-/* We don't update if it's older than what we have. */
-static void update_nl_seq(u32 nl_seq, struct ip_ct_ftp_master *info, int dir,
- struct sk_buff *skb)
-{
- unsigned int i, oldest = NUM_SEQ_TO_REMEMBER;
-
- /* Look for oldest: if we find exact match, we're done. */
- for (i = 0; i < info->seq_aft_nl_num[dir]; i++) {
- if (info->seq_aft_nl[dir][i] == nl_seq)
- return;
-
- if (oldest == info->seq_aft_nl_num[dir]
- || before(info->seq_aft_nl[dir][i], oldest))
- oldest = i;
- }
-
- if (info->seq_aft_nl_num[dir] < NUM_SEQ_TO_REMEMBER) {
- info->seq_aft_nl[dir][info->seq_aft_nl_num[dir]++] = nl_seq;
- ip_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, skb);
- } else if (oldest != NUM_SEQ_TO_REMEMBER) {
- info->seq_aft_nl[dir][oldest] = nl_seq;
- ip_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, skb);
- }
-}
-
-static int help(struct sk_buff **pskb,
- struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo)
-{
- unsigned int dataoff, datalen;
- struct tcphdr _tcph, *th;
- char *fb_ptr;
- int ret;
- u32 seq, array[6] = { 0 };
- int dir = CTINFO2DIR(ctinfo);
- unsigned int matchlen, matchoff;
- struct ip_ct_ftp_master *ct_ftp_info = &ct->help.ct_ftp_info;
- struct ip_conntrack_expect *exp;
- unsigned int i;
- int found = 0, ends_in_nl;
-
- /* Until there's been traffic both ways, don't look in packets. */
- if (ctinfo != IP_CT_ESTABLISHED
- && ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) {
- DEBUGP("ftp: Conntrackinfo = %u\n", ctinfo);
- return NF_ACCEPT;
- }
-
- th = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl*4,
- sizeof(_tcph), &_tcph);
- if (th == NULL)
- return NF_ACCEPT;
-
- dataoff = (*pskb)->nh.iph->ihl*4 + th->doff*4;
- /* No data? */
- if (dataoff >= (*pskb)->len) {
- DEBUGP("ftp: pskblen = %u\n", (*pskb)->len);
- return NF_ACCEPT;
- }
- datalen = (*pskb)->len - dataoff;
-
- spin_lock_bh(&ip_ftp_lock);
- fb_ptr = skb_header_pointer(*pskb, dataoff,
- (*pskb)->len - dataoff, ftp_buffer);
- BUG_ON(fb_ptr == NULL);
-
- ends_in_nl = (fb_ptr[datalen - 1] == '\n');
- seq = ntohl(th->seq) + datalen;
-
- /* Look up to see if we're just after a \n. */
- if (!find_nl_seq(ntohl(th->seq), ct_ftp_info, dir)) {
- /* Now if this ends in \n, update ftp info. */
- DEBUGP("ip_conntrack_ftp_help: wrong seq pos %s(%u) or %s(%u)\n",
- ct_ftp_info->seq_aft_nl[0][dir]
- old_seq_aft_nl_set ? "":"(UNSET) ", old_seq_aft_nl);
- ret = NF_ACCEPT;
- goto out_update_nl;
- }
-
- /* Initialize IP array to expected address (it's not mentioned
- in EPSV responses) */
- array[0] = (ntohl(ct->tuplehash[dir].tuple.src.ip) >> 24) & 0xFF;
- array[1] = (ntohl(ct->tuplehash[dir].tuple.src.ip) >> 16) & 0xFF;
- array[2] = (ntohl(ct->tuplehash[dir].tuple.src.ip) >> 8) & 0xFF;
- array[3] = ntohl(ct->tuplehash[dir].tuple.src.ip) & 0xFF;
-
- for (i = 0; i < ARRAY_SIZE(search); i++) {
- if (search[i].dir != dir) continue;
-
- found = find_pattern(fb_ptr, (*pskb)->len - dataoff,
- search[i].pattern,
- search[i].plen,
- search[i].skip,
- search[i].term,
- &matchoff, &matchlen,
- array,
- search[i].getnum);
- if (found) break;
- }
- if (found == -1) {
- /* We don't usually drop packets. After all, this is
- connection tracking, not packet filtering.
- However, it is necessary for accurate tracking in
- this case. */
- if (net_ratelimit())
- printk("conntrack_ftp: partial %s %u+%u\n",
- search[i].pattern,
- ntohl(th->seq), datalen);
- ret = NF_DROP;
- goto out;
- } else if (found == 0) { /* No match */
- ret = NF_ACCEPT;
- goto out_update_nl;
- }
-
- DEBUGP("conntrack_ftp: match `%s' (%u bytes at %u)\n",
- fb_ptr + matchoff, matchlen, ntohl(th->seq) + matchoff);
-
- /* Allocate expectation which will be inserted */
- exp = ip_conntrack_expect_alloc(ct);
- if (exp == NULL) {
- ret = NF_DROP;
- goto out;
- }
-
- /* We refer to the reverse direction ("!dir") tuples here,
- * because we're expecting something in the other direction.
- * Doesn't matter unless NAT is happening. */
- exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip;
-
- if (htonl((array[0] << 24) | (array[1] << 16) | (array[2] << 8) | array[3])
- != ct->tuplehash[dir].tuple.src.ip) {
- /* Enrico Scholz's passive FTP to partially RNAT'd ftp
- server: it really wants us to connect to a
- different IP address. Simply don't record it for
- NAT. */
- DEBUGP("conntrack_ftp: NOT RECORDING: %u,%u,%u,%u != %u.%u.%u.%u\n",
- array[0], array[1], array[2], array[3],
- NIPQUAD(ct->tuplehash[dir].tuple.src.ip));
-
- /* Thanks to Cristiano Lincoln Mattos
- <lincoln@cesar.org.br> for reporting this potential
- problem (DMZ machines opening holes to internal
- networks, or the packet filter itself). */
- if (!loose) {
- ret = NF_ACCEPT;
- goto out_put_expect;
- }
- exp->tuple.dst.ip = htonl((array[0] << 24) | (array[1] << 16)
- | (array[2] << 8) | array[3]);
- }
-
- exp->tuple.src.ip = ct->tuplehash[!dir].tuple.src.ip;
- exp->tuple.dst.u.tcp.port = htons(array[4] << 8 | array[5]);
- exp->tuple.src.u.tcp.port = 0; /* Don't care. */
- exp->tuple.dst.protonum = IPPROTO_TCP;
- exp->mask = ((struct ip_conntrack_tuple)
- { { 0xFFFFFFFF, { 0 } },
- { 0xFFFFFFFF, { .tcp = { 0xFFFF } }, 0xFF }});
-
- exp->expectfn = NULL;
- exp->flags = 0;
-
- /* Now, NAT might want to mangle the packet, and register the
- * (possibly changed) expectation itself. */
- if (ip_nat_ftp_hook)
- ret = ip_nat_ftp_hook(pskb, ctinfo, search[i].ftptype,
- matchoff, matchlen, exp, &seq);
- else {
- /* Can't expect this? Best to drop packet now. */
- if (ip_conntrack_expect_related(exp) != 0)
- ret = NF_DROP;
- else
- ret = NF_ACCEPT;
- }
-
-out_put_expect:
- ip_conntrack_expect_put(exp);
-
-out_update_nl:
- /* Now if this ends in \n, update ftp info. Seq may have been
- * adjusted by NAT code. */
- if (ends_in_nl)
- update_nl_seq(seq, ct_ftp_info,dir, *pskb);
- out:
- spin_unlock_bh(&ip_ftp_lock);
- return ret;
-}
-
-static struct ip_conntrack_helper ftp[MAX_PORTS];
-static char ftp_names[MAX_PORTS][sizeof("ftp-65535")];
-
-/* Not __exit: called from init() */
-static void fini(void)
-{
- int i;
- for (i = 0; i < ports_c; i++) {
- DEBUGP("ip_ct_ftp: unregistering helper for port %d\n",
- ports[i]);
- ip_conntrack_helper_unregister(&ftp[i]);
- }
-
- kfree(ftp_buffer);
-}
-
-static int __init init(void)
-{
- int i, ret;
- char *tmpname;
-
- ftp_buffer = kmalloc(65536, GFP_KERNEL);
- if (!ftp_buffer)
- return -ENOMEM;
-
- if (ports_c == 0)
- ports[ports_c++] = FTP_PORT;
-
- for (i = 0; i < ports_c; i++) {
- ftp[i].tuple.src.u.tcp.port = htons(ports[i]);
- ftp[i].tuple.dst.protonum = IPPROTO_TCP;
- ftp[i].mask.src.u.tcp.port = 0xFFFF;
- ftp[i].mask.dst.protonum = 0xFF;
- ftp[i].max_expected = 1;
- ftp[i].timeout = 5 * 60; /* 5 minutes */
- ftp[i].me = THIS_MODULE;
- ftp[i].help = help;
-
- tmpname = &ftp_names[i][0];
- if (ports[i] == FTP_PORT)
- sprintf(tmpname, "ftp");
- else
- sprintf(tmpname, "ftp-%d", ports[i]);
- ftp[i].name = tmpname;
-
- DEBUGP("ip_ct_ftp: registering helper for port %d\n",
- ports[i]);
- ret = ip_conntrack_helper_register(&ftp[i]);
-
- if (ret) {
- fini();
- return ret;
- }
- }
- return 0;
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
deleted file mode 100644
index d716bba798f..00000000000
--- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c
+++ /dev/null
@@ -1,802 +0,0 @@
-/*
- * ip_conntrack_pptp.c - Version 3.0
- *
- * Connection tracking support for PPTP (Point to Point Tunneling Protocol).
- * PPTP is a a protocol for creating virtual private networks.
- * It is a specification defined by Microsoft and some vendors
- * working with Microsoft. PPTP is built on top of a modified
- * version of the Internet Generic Routing Encapsulation Protocol.
- * GRE is defined in RFC 1701 and RFC 1702. Documentation of
- * PPTP can be found in RFC 2637
- *
- * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org>
- *
- * Development of this code funded by Astaro AG (http://www.astaro.com/)
- *
- * Limitations:
- * - We blindly assume that control connections are always
- * established in PNS->PAC direction. This is a violation
- * of RFFC2673
- * - We can only support one single call within each session
- *
- * TODO:
- * - testing of incoming PPTP calls
- *
- * Changes:
- * 2002-02-05 - Version 1.3
- * - Call ip_conntrack_unexpect_related() from
- * pptp_destroy_siblings() to destroy expectations in case
- * CALL_DISCONNECT_NOTIFY or tcp fin packet was seen
- * (Philip Craig <philipc@snapgear.com>)
- * - Add Version information at module loadtime
- * 2002-02-10 - Version 1.6
- * - move to C99 style initializers
- * - remove second expectation if first arrives
- * 2004-10-22 - Version 2.0
- * - merge Mandrake's 2.6.x port with recent 2.6.x API changes
- * - fix lots of linear skb assumptions from Mandrake's port
- * 2005-06-10 - Version 2.1
- * - use ip_conntrack_expect_free() instead of kfree() on the
- * expect's (which are from the slab for quite some time)
- * 2005-06-10 - Version 3.0
- * - port helper to post-2.6.11 API changes,
- * funded by Oxcoda NetBox Blue (http://www.netboxblue.com/)
- * 2005-07-30 - Version 3.1
- * - port helper to 2.6.13 API changes
- *
- */
-
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/netfilter.h>
-#include <linux/ip.h>
-#include <net/checksum.h>
-#include <net/tcp.h>
-
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-#include <linux/netfilter_ipv4/ip_conntrack_core.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h>
-#include <linux/netfilter_ipv4/ip_conntrack_pptp.h>
-
-#define IP_CT_PPTP_VERSION "3.1"
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
-MODULE_DESCRIPTION("Netfilter connection tracking helper module for PPTP");
-
-static DEFINE_SPINLOCK(ip_pptp_lock);
-
-int
-(*ip_nat_pptp_hook_outbound)(struct sk_buff **pskb,
- struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- struct PptpControlHeader *ctlh,
- union pptp_ctrl_union *pptpReq);
-
-int
-(*ip_nat_pptp_hook_inbound)(struct sk_buff **pskb,
- struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- struct PptpControlHeader *ctlh,
- union pptp_ctrl_union *pptpReq);
-
-int
-(*ip_nat_pptp_hook_exp_gre)(struct ip_conntrack_expect *expect_orig,
- struct ip_conntrack_expect *expect_reply);
-
-void
-(*ip_nat_pptp_hook_expectfn)(struct ip_conntrack *ct,
- struct ip_conntrack_expect *exp);
-
-#if 0
-/* PptpControlMessageType names */
-const char *pptp_msg_name[] = {
- "UNKNOWN_MESSAGE",
- "START_SESSION_REQUEST",
- "START_SESSION_REPLY",
- "STOP_SESSION_REQUEST",
- "STOP_SESSION_REPLY",
- "ECHO_REQUEST",
- "ECHO_REPLY",
- "OUT_CALL_REQUEST",
- "OUT_CALL_REPLY",
- "IN_CALL_REQUEST",
- "IN_CALL_REPLY",
- "IN_CALL_CONNECT",
- "CALL_CLEAR_REQUEST",
- "CALL_DISCONNECT_NOTIFY",
- "WAN_ERROR_NOTIFY",
- "SET_LINK_INFO"
-};
-EXPORT_SYMBOL(pptp_msg_name);
-#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, __FUNCTION__, ## args)
-#else
-#define DEBUGP(format, args...)
-#endif
-
-#define SECS *HZ
-#define MINS * 60 SECS
-#define HOURS * 60 MINS
-
-#define PPTP_GRE_TIMEOUT (10 MINS)
-#define PPTP_GRE_STREAM_TIMEOUT (5 HOURS)
-
-static void pptp_expectfn(struct ip_conntrack *ct,
- struct ip_conntrack_expect *exp)
-{
- DEBUGP("increasing timeouts\n");
-
- /* increase timeout of GRE data channel conntrack entry */
- ct->proto.gre.timeout = PPTP_GRE_TIMEOUT;
- ct->proto.gre.stream_timeout = PPTP_GRE_STREAM_TIMEOUT;
-
- /* Can you see how rusty this code is, compared with the pre-2.6.11
- * one? That's what happened to my shiny newnat of 2002 ;( -HW */
-
- if (!ip_nat_pptp_hook_expectfn) {
- struct ip_conntrack_tuple inv_t;
- struct ip_conntrack_expect *exp_other;
-
- /* obviously this tuple inversion only works until you do NAT */
- invert_tuplepr(&inv_t, &exp->tuple);
- DEBUGP("trying to unexpect other dir: ");
- DUMP_TUPLE(&inv_t);
-
- exp_other = ip_conntrack_expect_find(&inv_t);
- if (exp_other) {
- /* delete other expectation. */
- DEBUGP("found\n");
- ip_conntrack_unexpect_related(exp_other);
- ip_conntrack_expect_put(exp_other);
- } else {
- DEBUGP("not found\n");
- }
- } else {
- /* we need more than simple inversion */
- ip_nat_pptp_hook_expectfn(ct, exp);
- }
-}
-
-static int destroy_sibling_or_exp(const struct ip_conntrack_tuple *t)
-{
- struct ip_conntrack_tuple_hash *h;
- struct ip_conntrack_expect *exp;
-
- DEBUGP("trying to timeout ct or exp for tuple ");
- DUMP_TUPLE(t);
-
- h = ip_conntrack_find_get(t, NULL);
- if (h) {
- struct ip_conntrack *sibling = tuplehash_to_ctrack(h);
- DEBUGP("setting timeout of conntrack %p to 0\n", sibling);
- sibling->proto.gre.timeout = 0;
- sibling->proto.gre.stream_timeout = 0;
- if (del_timer(&sibling->timeout))
- sibling->timeout.function((unsigned long)sibling);
- ip_conntrack_put(sibling);
- return 1;
- } else {
- exp = ip_conntrack_expect_find(t);
- if (exp) {
- DEBUGP("unexpect_related of expect %p\n", exp);
- ip_conntrack_unexpect_related(exp);
- ip_conntrack_expect_put(exp);
- return 1;
- }
- }
-
- return 0;
-}
-
-
-/* timeout GRE data connections */
-static void pptp_destroy_siblings(struct ip_conntrack *ct)
-{
- struct ip_conntrack_tuple t;
-
- /* Since ct->sibling_list has literally rusted away in 2.6.11,
- * we now need another way to find out about our sibling
- * contrack and expects... -HW */
-
- /* try original (pns->pac) tuple */
- memcpy(&t, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, sizeof(t));
- t.dst.protonum = IPPROTO_GRE;
- t.src.u.gre.key = htons(ct->help.ct_pptp_info.pns_call_id);
- t.dst.u.gre.key = htons(ct->help.ct_pptp_info.pac_call_id);
-
- if (!destroy_sibling_or_exp(&t))
- DEBUGP("failed to timeout original pns->pac ct/exp\n");
-
- /* try reply (pac->pns) tuple */
- memcpy(&t, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, sizeof(t));
- t.dst.protonum = IPPROTO_GRE;
- t.src.u.gre.key = htons(ct->help.ct_pptp_info.pac_call_id);
- t.dst.u.gre.key = htons(ct->help.ct_pptp_info.pns_call_id);
-
- if (!destroy_sibling_or_exp(&t))
- DEBUGP("failed to timeout reply pac->pns ct/exp\n");
-}
-
-/* expect GRE connections (PNS->PAC and PAC->PNS direction) */
-static inline int
-exp_gre(struct ip_conntrack *master,
- u_int32_t seq,
- __be16 callid,
- __be16 peer_callid)
-{
- struct ip_conntrack_tuple inv_tuple;
- struct ip_conntrack_tuple exp_tuples[] = {
- /* tuple in original direction, PNS->PAC */
- { .src = { .ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip,
- .u = { .gre = { .key = peer_callid } }
- },
- .dst = { .ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip,
- .u = { .gre = { .key = callid } },
- .protonum = IPPROTO_GRE
- },
- },
- /* tuple in reply direction, PAC->PNS */
- { .src = { .ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip,
- .u = { .gre = { .key = callid } }
- },
- .dst = { .ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip,
- .u = { .gre = { .key = peer_callid } },
- .protonum = IPPROTO_GRE
- },
- }
- };
- struct ip_conntrack_expect *exp_orig, *exp_reply;
- int ret = 1;
-
- exp_orig = ip_conntrack_expect_alloc(master);
- if (exp_orig == NULL)
- goto out;
-
- exp_reply = ip_conntrack_expect_alloc(master);
- if (exp_reply == NULL)
- goto out_put_orig;
-
- memcpy(&exp_orig->tuple, &exp_tuples[0], sizeof(exp_orig->tuple));
-
- exp_orig->mask.src.ip = 0xffffffff;
- exp_orig->mask.src.u.all = 0;
- exp_orig->mask.dst.u.all = 0;
- exp_orig->mask.dst.u.gre.key = htons(0xffff);
- exp_orig->mask.dst.ip = 0xffffffff;
- exp_orig->mask.dst.protonum = 0xff;
-
- exp_orig->master = master;
- exp_orig->expectfn = pptp_expectfn;
- exp_orig->flags = 0;
-
- /* both expectations are identical apart from tuple */
- memcpy(exp_reply, exp_orig, sizeof(*exp_reply));
- memcpy(&exp_reply->tuple, &exp_tuples[1], sizeof(exp_reply->tuple));
-
- if (ip_nat_pptp_hook_exp_gre)
- ret = ip_nat_pptp_hook_exp_gre(exp_orig, exp_reply);
- else {
-
- DEBUGP("calling expect_related PNS->PAC");
- DUMP_TUPLE(&exp_orig->tuple);
-
- if (ip_conntrack_expect_related(exp_orig) != 0) {
- DEBUGP("cannot expect_related()\n");
- goto out_put_both;
- }
-
- DEBUGP("calling expect_related PAC->PNS");
- DUMP_TUPLE(&exp_reply->tuple);
-
- if (ip_conntrack_expect_related(exp_reply) != 0) {
- DEBUGP("cannot expect_related()\n");
- goto out_unexpect_orig;
- }
-
- /* Add GRE keymap entries */
- if (ip_ct_gre_keymap_add(master, &exp_reply->tuple, 0) != 0) {
- DEBUGP("cannot keymap_add() exp\n");
- goto out_unexpect_both;
- }
-
- invert_tuplepr(&inv_tuple, &exp_reply->tuple);
- if (ip_ct_gre_keymap_add(master, &inv_tuple, 1) != 0) {
- ip_ct_gre_keymap_destroy(master);
- DEBUGP("cannot keymap_add() exp_inv\n");
- goto out_unexpect_both;
- }
- ret = 0;
- }
-
-out_put_both:
- ip_conntrack_expect_put(exp_reply);
-out_put_orig:
- ip_conntrack_expect_put(exp_orig);
-out:
- return ret;
-
-out_unexpect_both:
- ip_conntrack_unexpect_related(exp_reply);
-out_unexpect_orig:
- ip_conntrack_unexpect_related(exp_orig);
- goto out_put_both;
-}
-
-static inline int
-pptp_inbound_pkt(struct sk_buff **pskb,
- struct tcphdr *tcph,
- unsigned int nexthdr_off,
- unsigned int datalen,
- struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo)
-{
- struct PptpControlHeader _ctlh, *ctlh;
- unsigned int reqlen;
- union pptp_ctrl_union _pptpReq, *pptpReq;
- struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
- u_int16_t msg;
- __be16 *cid, *pcid;
- u_int32_t seq;
-
- ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh);
- if (!ctlh) {
- DEBUGP("error during skb_header_pointer\n");
- return NF_ACCEPT;
- }
- nexthdr_off += sizeof(_ctlh);
- datalen -= sizeof(_ctlh);
-
- reqlen = datalen;
- if (reqlen > sizeof(*pptpReq))
- reqlen = sizeof(*pptpReq);
- pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq);
- if (!pptpReq) {
- DEBUGP("error during skb_header_pointer\n");
- return NF_ACCEPT;
- }
-
- msg = ntohs(ctlh->messageType);
- DEBUGP("inbound control message %s\n", pptp_msg_name[msg]);
-
- switch (msg) {
- case PPTP_START_SESSION_REPLY:
- if (reqlen < sizeof(_pptpReq.srep)) {
- DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
- break;
- }
-
- /* server confirms new control session */
- if (info->sstate < PPTP_SESSION_REQUESTED) {
- DEBUGP("%s without START_SESS_REQUEST\n",
- pptp_msg_name[msg]);
- break;
- }
- if (pptpReq->srep.resultCode == PPTP_START_OK)
- info->sstate = PPTP_SESSION_CONFIRMED;
- else
- info->sstate = PPTP_SESSION_ERROR;
- break;
-
- case PPTP_STOP_SESSION_REPLY:
- if (reqlen < sizeof(_pptpReq.strep)) {
- DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
- break;
- }
-
- /* server confirms end of control session */
- if (info->sstate > PPTP_SESSION_STOPREQ) {
- DEBUGP("%s without STOP_SESS_REQUEST\n",
- pptp_msg_name[msg]);
- break;
- }
- if (pptpReq->strep.resultCode == PPTP_STOP_OK)
- info->sstate = PPTP_SESSION_NONE;
- else
- info->sstate = PPTP_SESSION_ERROR;
- break;
-
- case PPTP_OUT_CALL_REPLY:
- if (reqlen < sizeof(_pptpReq.ocack)) {
- DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
- break;
- }
-
- /* server accepted call, we now expect GRE frames */
- if (info->sstate != PPTP_SESSION_CONFIRMED) {
- DEBUGP("%s but no session\n", pptp_msg_name[msg]);
- break;
- }
- if (info->cstate != PPTP_CALL_OUT_REQ &&
- info->cstate != PPTP_CALL_OUT_CONF) {
- DEBUGP("%s without OUTCALL_REQ\n", pptp_msg_name[msg]);
- break;
- }
- if (pptpReq->ocack.resultCode != PPTP_OUTCALL_CONNECT) {
- info->cstate = PPTP_CALL_NONE;
- break;
- }
-
- cid = &pptpReq->ocack.callID;
- pcid = &pptpReq->ocack.peersCallID;
-
- info->pac_call_id = ntohs(*cid);
-
- if (htons(info->pns_call_id) != *pcid) {
- DEBUGP("%s for unknown callid %u\n",
- pptp_msg_name[msg], ntohs(*pcid));
- break;
- }
-
- DEBUGP("%s, CID=%X, PCID=%X\n", pptp_msg_name[msg],
- ntohs(*cid), ntohs(*pcid));
-
- info->cstate = PPTP_CALL_OUT_CONF;
-
- seq = ntohl(tcph->seq) + sizeof(struct pptp_pkt_hdr)
- + sizeof(struct PptpControlHeader)
- + ((void *)pcid - (void *)pptpReq);
-
- if (exp_gre(ct, seq, *cid, *pcid) != 0)
- printk("ip_conntrack_pptp: error during exp_gre\n");
- break;
-
- case PPTP_IN_CALL_REQUEST:
- if (reqlen < sizeof(_pptpReq.icack)) {
- DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
- break;
- }
-
- /* server tells us about incoming call request */
- if (info->sstate != PPTP_SESSION_CONFIRMED) {
- DEBUGP("%s but no session\n", pptp_msg_name[msg]);
- break;
- }
- pcid = &pptpReq->icack.peersCallID;
- DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(*pcid));
- info->cstate = PPTP_CALL_IN_REQ;
- info->pac_call_id = ntohs(*pcid);
- break;
-
- case PPTP_IN_CALL_CONNECT:
- if (reqlen < sizeof(_pptpReq.iccon)) {
- DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
- break;
- }
-
- /* server tells us about incoming call established */
- if (info->sstate != PPTP_SESSION_CONFIRMED) {
- DEBUGP("%s but no session\n", pptp_msg_name[msg]);
- break;
- }
- if (info->sstate != PPTP_CALL_IN_REP
- && info->sstate != PPTP_CALL_IN_CONF) {
- DEBUGP("%s but never sent IN_CALL_REPLY\n",
- pptp_msg_name[msg]);
- break;
- }
-
- pcid = &pptpReq->iccon.peersCallID;
- cid = &info->pac_call_id;
-
- if (info->pns_call_id != ntohs(*pcid)) {
- DEBUGP("%s for unknown CallID %u\n",
- pptp_msg_name[msg], ntohs(*pcid));
- break;
- }
-
- DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(*pcid));
- info->cstate = PPTP_CALL_IN_CONF;
-
- /* we expect a GRE connection from PAC to PNS */
- seq = ntohl(tcph->seq) + sizeof(struct pptp_pkt_hdr)
- + sizeof(struct PptpControlHeader)
- + ((void *)pcid - (void *)pptpReq);
-
- if (exp_gre(ct, seq, *cid, *pcid) != 0)
- printk("ip_conntrack_pptp: error during exp_gre\n");
-
- break;
-
- case PPTP_CALL_DISCONNECT_NOTIFY:
- if (reqlen < sizeof(_pptpReq.disc)) {
- DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
- break;
- }
-
- /* server confirms disconnect */
- cid = &pptpReq->disc.callID;
- DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*cid));
- info->cstate = PPTP_CALL_NONE;
-
- /* untrack this call id, unexpect GRE packets */
- pptp_destroy_siblings(ct);
- break;
-
- case PPTP_WAN_ERROR_NOTIFY:
- break;
-
- case PPTP_ECHO_REQUEST:
- case PPTP_ECHO_REPLY:
- /* I don't have to explain these ;) */
- break;
- default:
- DEBUGP("invalid %s (TY=%d)\n", (msg <= PPTP_MSG_MAX)
- ? pptp_msg_name[msg]:pptp_msg_name[0], msg);
- break;
- }
-
-
- if (ip_nat_pptp_hook_inbound)
- return ip_nat_pptp_hook_inbound(pskb, ct, ctinfo, ctlh,
- pptpReq);
-
- return NF_ACCEPT;
-
-}
-
-static inline int
-pptp_outbound_pkt(struct sk_buff **pskb,
- struct tcphdr *tcph,
- unsigned int nexthdr_off,
- unsigned int datalen,
- struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo)
-{
- struct PptpControlHeader _ctlh, *ctlh;
- unsigned int reqlen;
- union pptp_ctrl_union _pptpReq, *pptpReq;
- struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
- u_int16_t msg;
- __be16 *cid, *pcid;
-
- ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh);
- if (!ctlh)
- return NF_ACCEPT;
- nexthdr_off += sizeof(_ctlh);
- datalen -= sizeof(_ctlh);
-
- reqlen = datalen;
- if (reqlen > sizeof(*pptpReq))
- reqlen = sizeof(*pptpReq);
- pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq);
- if (!pptpReq)
- return NF_ACCEPT;
-
- msg = ntohs(ctlh->messageType);
- DEBUGP("outbound control message %s\n", pptp_msg_name[msg]);
-
- switch (msg) {
- case PPTP_START_SESSION_REQUEST:
- /* client requests for new control session */
- if (info->sstate != PPTP_SESSION_NONE) {
- DEBUGP("%s but we already have one",
- pptp_msg_name[msg]);
- }
- info->sstate = PPTP_SESSION_REQUESTED;
- break;
- case PPTP_STOP_SESSION_REQUEST:
- /* client requests end of control session */
- info->sstate = PPTP_SESSION_STOPREQ;
- break;
-
- case PPTP_OUT_CALL_REQUEST:
- if (reqlen < sizeof(_pptpReq.ocreq)) {
- DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
- /* FIXME: break; */
- }
-
- /* client initiating connection to server */
- if (info->sstate != PPTP_SESSION_CONFIRMED) {
- DEBUGP("%s but no session\n",
- pptp_msg_name[msg]);
- break;
- }
- info->cstate = PPTP_CALL_OUT_REQ;
- /* track PNS call id */
- cid = &pptpReq->ocreq.callID;
- DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*cid));
- info->pns_call_id = ntohs(*cid);
- break;
- case PPTP_IN_CALL_REPLY:
- if (reqlen < sizeof(_pptpReq.icack)) {
- DEBUGP("%s: short packet\n", pptp_msg_name[msg]);
- break;
- }
-
- /* client answers incoming call */
- if (info->cstate != PPTP_CALL_IN_REQ
- && info->cstate != PPTP_CALL_IN_REP) {
- DEBUGP("%s without incall_req\n",
- pptp_msg_name[msg]);
- break;
- }
- if (pptpReq->icack.resultCode != PPTP_INCALL_ACCEPT) {
- info->cstate = PPTP_CALL_NONE;
- break;
- }
- pcid = &pptpReq->icack.peersCallID;
- if (info->pac_call_id != ntohs(*pcid)) {
- DEBUGP("%s for unknown call %u\n",
- pptp_msg_name[msg], ntohs(*pcid));
- break;
- }
- DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*pcid));
- /* part two of the three-way handshake */
- info->cstate = PPTP_CALL_IN_REP;
- info->pns_call_id = ntohs(pptpReq->icack.callID);
- break;
-
- case PPTP_CALL_CLEAR_REQUEST:
- /* client requests hangup of call */
- if (info->sstate != PPTP_SESSION_CONFIRMED) {
- DEBUGP("CLEAR_CALL but no session\n");
- break;
- }
- /* FUTURE: iterate over all calls and check if
- * call ID is valid. We don't do this without newnat,
- * because we only know about last call */
- info->cstate = PPTP_CALL_CLEAR_REQ;
- break;
- case PPTP_SET_LINK_INFO:
- break;
- case PPTP_ECHO_REQUEST:
- case PPTP_ECHO_REPLY:
- /* I don't have to explain these ;) */
- break;
- default:
- DEBUGP("invalid %s (TY=%d)\n", (msg <= PPTP_MSG_MAX)?
- pptp_msg_name[msg]:pptp_msg_name[0], msg);
- /* unknown: no need to create GRE masq table entry */
- break;
- }
-
- if (ip_nat_pptp_hook_outbound)
- return ip_nat_pptp_hook_outbound(pskb, ct, ctinfo, ctlh,
- pptpReq);
-
- return NF_ACCEPT;
-}
-
-
-/* track caller id inside control connection, call expect_related */
-static int
-conntrack_pptp_help(struct sk_buff **pskb,
- struct ip_conntrack *ct, enum ip_conntrack_info ctinfo)
-
-{
- struct pptp_pkt_hdr _pptph, *pptph;
- struct tcphdr _tcph, *tcph;
- u_int32_t tcplen = (*pskb)->len - (*pskb)->nh.iph->ihl * 4;
- u_int32_t datalen;
- int dir = CTINFO2DIR(ctinfo);
- struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info;
- unsigned int nexthdr_off;
-
- int oldsstate, oldcstate;
- int ret;
-
- /* don't do any tracking before tcp handshake complete */
- if (ctinfo != IP_CT_ESTABLISHED
- && ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) {
- DEBUGP("ctinfo = %u, skipping\n", ctinfo);
- return NF_ACCEPT;
- }
-
- nexthdr_off = (*pskb)->nh.iph->ihl*4;
- tcph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_tcph), &_tcph);
- BUG_ON(!tcph);
- nexthdr_off += tcph->doff * 4;
- datalen = tcplen - tcph->doff * 4;
-
- if (tcph->fin || tcph->rst) {
- DEBUGP("RST/FIN received, timeouting GRE\n");
- /* can't do this after real newnat */
- info->cstate = PPTP_CALL_NONE;
-
- /* untrack this call id, unexpect GRE packets */
- pptp_destroy_siblings(ct);
- }
-
- pptph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_pptph), &_pptph);
- if (!pptph) {
- DEBUGP("no full PPTP header, can't track\n");
- return NF_ACCEPT;
- }
- nexthdr_off += sizeof(_pptph);
- datalen -= sizeof(_pptph);
-
- /* if it's not a control message we can't do anything with it */
- if (ntohs(pptph->packetType) != PPTP_PACKET_CONTROL ||
- ntohl(pptph->magicCookie) != PPTP_MAGIC_COOKIE) {
- DEBUGP("not a control packet\n");
- return NF_ACCEPT;
- }
-
- oldsstate = info->sstate;
- oldcstate = info->cstate;
-
- spin_lock_bh(&ip_pptp_lock);
-
- /* FIXME: We just blindly assume that the control connection is always
- * established from PNS->PAC. However, RFC makes no guarantee */
- if (dir == IP_CT_DIR_ORIGINAL)
- /* client -> server (PNS -> PAC) */
- ret = pptp_outbound_pkt(pskb, tcph, nexthdr_off, datalen, ct,
- ctinfo);
- else
- /* server -> client (PAC -> PNS) */
- ret = pptp_inbound_pkt(pskb, tcph, nexthdr_off, datalen, ct,
- ctinfo);
- DEBUGP("sstate: %d->%d, cstate: %d->%d\n",
- oldsstate, info->sstate, oldcstate, info->cstate);
- spin_unlock_bh(&ip_pptp_lock);
-
- return ret;
-}
-
-/* control protocol helper */
-static struct ip_conntrack_helper pptp = {
- .list = { NULL, NULL },
- .name = "pptp",
- .me = THIS_MODULE,
- .max_expected = 2,
- .timeout = 5 * 60,
- .tuple = { .src = { .ip = 0,
- .u = { .tcp = { .port =
- __constant_htons(PPTP_CONTROL_PORT) } }
- },
- .dst = { .ip = 0,
- .u = { .all = 0 },
- .protonum = IPPROTO_TCP
- }
- },
- .mask = { .src = { .ip = 0,
- .u = { .tcp = { .port = __constant_htons(0xffff) } }
- },
- .dst = { .ip = 0,
- .u = { .all = 0 },
- .protonum = 0xff
- }
- },
- .help = conntrack_pptp_help
-};
-
-extern void ip_ct_proto_gre_fini(void);
-extern int __init ip_ct_proto_gre_init(void);
-
-/* ip_conntrack_pptp initialization */
-static int __init init(void)
-{
- int retcode;
-
- retcode = ip_ct_proto_gre_init();
- if (retcode < 0)
- return retcode;
-
- DEBUGP(" registering helper\n");
- if ((retcode = ip_conntrack_helper_register(&pptp))) {
- printk(KERN_ERR "Unable to register conntrack application "
- "helper for pptp: %d\n", retcode);
- ip_ct_proto_gre_fini();
- return retcode;
- }
-
- printk("ip_conntrack_pptp version %s loaded\n", IP_CT_PPTP_VERSION);
- return 0;
-}
-
-static void __exit fini(void)
-{
- ip_conntrack_helper_unregister(&pptp);
- ip_ct_proto_gre_fini();
- printk("ip_conntrack_pptp version %s unloaded\n", IP_CT_PPTP_VERSION);
-}
-
-module_init(init);
-module_exit(fini);
-
-EXPORT_SYMBOL(ip_nat_pptp_hook_outbound);
-EXPORT_SYMBOL(ip_nat_pptp_hook_inbound);
-EXPORT_SYMBOL(ip_nat_pptp_hook_exp_gre);
-EXPORT_SYMBOL(ip_nat_pptp_hook_expectfn);
diff --git a/net/ipv4/netfilter/ip_conntrack_irc.c b/net/ipv4/netfilter/ip_conntrack_irc.c
deleted file mode 100644
index c51a2cf71b4..00000000000
--- a/net/ipv4/netfilter/ip_conntrack_irc.c
+++ /dev/null
@@ -1,312 +0,0 @@
-/* IRC extension for IP connection tracking, Version 1.21
- * (C) 2000-2002 by Harald Welte <laforge@gnumonks.org>
- * based on RR's ip_conntrack_ftp.c
- *
- * ip_conntrack_irc.c,v 1.21 2002/02/05 14:49:26 laforge Exp
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- **
- * Module load syntax:
- * insmod ip_conntrack_irc.o ports=port1,port2,...port<MAX_PORTS>
- * max_dcc_channels=n dcc_timeout=secs
- *
- * please give the ports of all IRC servers You wish to connect to.
- * If You don't specify ports, the default will be port 6667.
- * With max_dcc_channels you can define the maximum number of not
- * yet answered DCC channels per IRC session (default 8).
- * With dcc_timeout you can specify how long the system waits for
- * an expected DCC channel (default 300 seconds).
- *
- */
-
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/netfilter.h>
-#include <linux/ip.h>
-#include <net/checksum.h>
-#include <net/tcp.h>
-
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_irc.h>
-#include <linux/moduleparam.h>
-
-#define MAX_PORTS 8
-static unsigned short ports[MAX_PORTS];
-static int ports_c;
-static unsigned int max_dcc_channels = 8;
-static unsigned int dcc_timeout = 300;
-/* This is slow, but it's simple. --RR */
-static char *irc_buffer;
-static DEFINE_SPINLOCK(irc_buffer_lock);
-
-unsigned int (*ip_nat_irc_hook)(struct sk_buff **pskb,
- enum ip_conntrack_info ctinfo,
- unsigned int matchoff,
- unsigned int matchlen,
- struct ip_conntrack_expect *exp);
-EXPORT_SYMBOL_GPL(ip_nat_irc_hook);
-
-MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
-MODULE_DESCRIPTION("IRC (DCC) connection tracking helper");
-MODULE_LICENSE("GPL");
-module_param_array(ports, ushort, &ports_c, 0400);
-MODULE_PARM_DESC(ports, "port numbers of IRC servers");
-module_param(max_dcc_channels, uint, 0400);
-MODULE_PARM_DESC(max_dcc_channels, "max number of expected DCC channels per IRC session");
-module_param(dcc_timeout, uint, 0400);
-MODULE_PARM_DESC(dcc_timeout, "timeout on for unestablished DCC channels");
-
-static const char *dccprotos[] = { "SEND ", "CHAT ", "MOVE ", "TSEND ", "SCHAT " };
-#define MINMATCHLEN 5
-
-#if 0
-#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s:" format, \
- __FILE__, __FUNCTION__ , ## args)
-#else
-#define DEBUGP(format, args...)
-#endif
-
-static int parse_dcc(char *data, char *data_end, u_int32_t *ip,
- u_int16_t *port, char **ad_beg_p, char **ad_end_p)
-/* tries to get the ip_addr and port out of a dcc command
- return value: -1 on failure, 0 on success
- data pointer to first byte of DCC command data
- data_end pointer to last byte of dcc command data
- ip returns parsed ip of dcc command
- port returns parsed port of dcc command
- ad_beg_p returns pointer to first byte of addr data
- ad_end_p returns pointer to last byte of addr data */
-{
-
- /* at least 12: "AAAAAAAA P\1\n" */
- while (*data++ != ' ')
- if (data > data_end - 12)
- return -1;
-
- *ad_beg_p = data;
- *ip = simple_strtoul(data, &data, 10);
-
- /* skip blanks between ip and port */
- while (*data == ' ') {
- if (data >= data_end)
- return -1;
- data++;
- }
-
- *port = simple_strtoul(data, &data, 10);
- *ad_end_p = data;
-
- return 0;
-}
-
-static int help(struct sk_buff **pskb,
- struct ip_conntrack *ct, enum ip_conntrack_info ctinfo)
-{
- unsigned int dataoff;
- struct tcphdr _tcph, *th;
- char *data, *data_limit, *ib_ptr;
- int dir = CTINFO2DIR(ctinfo);
- struct ip_conntrack_expect *exp;
- u32 seq;
- u_int32_t dcc_ip;
- u_int16_t dcc_port;
- int i, ret = NF_ACCEPT;
- char *addr_beg_p, *addr_end_p;
-
- DEBUGP("entered\n");
-
- /* If packet is coming from IRC server */
- if (dir == IP_CT_DIR_REPLY)
- return NF_ACCEPT;
-
- /* Until there's been traffic both ways, don't look in packets. */
- if (ctinfo != IP_CT_ESTABLISHED
- && ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) {
- DEBUGP("Conntrackinfo = %u\n", ctinfo);
- return NF_ACCEPT;
- }
-
- /* Not a full tcp header? */
- th = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl*4,
- sizeof(_tcph), &_tcph);
- if (th == NULL)
- return NF_ACCEPT;
-
- /* No data? */
- dataoff = (*pskb)->nh.iph->ihl*4 + th->doff*4;
- if (dataoff >= (*pskb)->len)
- return NF_ACCEPT;
-
- spin_lock_bh(&irc_buffer_lock);
- ib_ptr = skb_header_pointer(*pskb, dataoff,
- (*pskb)->len - dataoff, irc_buffer);
- BUG_ON(ib_ptr == NULL);
-
- data = ib_ptr;
- data_limit = ib_ptr + (*pskb)->len - dataoff;
-
- /* strlen("\1DCC SENT t AAAAAAAA P\1\n")=24
- * 5+MINMATCHLEN+strlen("t AAAAAAAA P\1\n")=14 */
- while (data < (data_limit - (19 + MINMATCHLEN))) {
- if (memcmp(data, "\1DCC ", 5)) {
- data++;
- continue;
- }
-
- data += 5;
- /* we have at least (19+MINMATCHLEN)-5 bytes valid data left */
-
- DEBUGP("DCC found in master %u.%u.%u.%u:%u %u.%u.%u.%u:%u...\n",
- NIPQUAD(iph->saddr), ntohs(th->source),
- NIPQUAD(iph->daddr), ntohs(th->dest));
-
- for (i = 0; i < ARRAY_SIZE(dccprotos); i++) {
- if (memcmp(data, dccprotos[i], strlen(dccprotos[i]))) {
- /* no match */
- continue;
- }
-
- DEBUGP("DCC %s detected\n", dccprotos[i]);
- data += strlen(dccprotos[i]);
- /* we have at least
- * (19+MINMATCHLEN)-5-dccprotos[i].matchlen bytes valid
- * data left (== 14/13 bytes) */
- if (parse_dcc((char *)data, data_limit, &dcc_ip,
- &dcc_port, &addr_beg_p, &addr_end_p)) {
- /* unable to parse */
- DEBUGP("unable to parse dcc command\n");
- continue;
- }
- DEBUGP("DCC bound ip/port: %u.%u.%u.%u:%u\n",
- HIPQUAD(dcc_ip), dcc_port);
-
- /* dcc_ip can be the internal OR external (NAT'ed) IP
- * Tiago Sousa <mirage@kaotik.org> */
- if (ct->tuplehash[dir].tuple.src.ip != htonl(dcc_ip)
- && ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip != htonl(dcc_ip)) {
- if (net_ratelimit())
- printk(KERN_WARNING
- "Forged DCC command from "
- "%u.%u.%u.%u: %u.%u.%u.%u:%u\n",
- NIPQUAD(ct->tuplehash[dir].tuple.src.ip),
- HIPQUAD(dcc_ip), dcc_port);
-
- continue;
- }
-
- exp = ip_conntrack_expect_alloc(ct);
- if (exp == NULL) {
- ret = NF_DROP;
- goto out;
- }
-
- /* save position of address in dcc string,
- * necessary for NAT */
- DEBUGP("tcph->seq = %u\n", th->seq);
- seq = ntohl(th->seq) + (addr_beg_p - ib_ptr);
-
- /* We refer to the reverse direction ("!dir")
- * tuples here, because we're expecting
- * something in the other * direction.
- * Doesn't matter unless NAT is happening. */
- exp->tuple = ((struct ip_conntrack_tuple)
- { { 0, { 0 } },
- { ct->tuplehash[!dir].tuple.dst.ip,
- { .tcp = { htons(dcc_port) } },
- IPPROTO_TCP }});
- exp->mask = ((struct ip_conntrack_tuple)
- { { 0, { 0 } },
- { 0xFFFFFFFF, { .tcp = { 0xFFFF } }, 0xFF }});
- exp->expectfn = NULL;
- exp->flags = 0;
- if (ip_nat_irc_hook)
- ret = ip_nat_irc_hook(pskb, ctinfo,
- addr_beg_p - ib_ptr,
- addr_end_p - addr_beg_p,
- exp);
- else if (ip_conntrack_expect_related(exp) != 0)
- ret = NF_DROP;
- ip_conntrack_expect_put(exp);
- goto out;
- } /* for .. NUM_DCCPROTO */
- } /* while data < ... */
-
- out:
- spin_unlock_bh(&irc_buffer_lock);
- return ret;
-}
-
-static struct ip_conntrack_helper irc_helpers[MAX_PORTS];
-static char irc_names[MAX_PORTS][sizeof("irc-65535")];
-
-static void fini(void);
-
-static int __init init(void)
-{
- int i, ret;
- struct ip_conntrack_helper *hlpr;
- char *tmpname;
-
- if (max_dcc_channels < 1) {
- printk("ip_conntrack_irc: max_dcc_channels must be a positive integer\n");
- return -EBUSY;
- }
-
- irc_buffer = kmalloc(65536, GFP_KERNEL);
- if (!irc_buffer)
- return -ENOMEM;
-
- /* If no port given, default to standard irc port */
- if (ports_c == 0)
- ports[ports_c++] = IRC_PORT;
-
- for (i = 0; i < ports_c; i++) {
- hlpr = &irc_helpers[i];
- hlpr->tuple.src.u.tcp.port = htons(ports[i]);
- hlpr->tuple.dst.protonum = IPPROTO_TCP;
- hlpr->mask.src.u.tcp.port = 0xFFFF;
- hlpr->mask.dst.protonum = 0xFF;
- hlpr->max_expected = max_dcc_channels;
- hlpr->timeout = dcc_timeout;
- hlpr->me = THIS_MODULE;
- hlpr->help = help;
-
- tmpname = &irc_names[i][0];
- if (ports[i] == IRC_PORT)
- sprintf(tmpname, "irc");
- else
- sprintf(tmpname, "irc-%d", i);
- hlpr->name = tmpname;
-
- DEBUGP("port #%d: %d\n", i, ports[i]);
-
- ret = ip_conntrack_helper_register(hlpr);
-
- if (ret) {
- printk("ip_conntrack_irc: ERROR registering port %d\n",
- ports[i]);
- fini();
- return -EBUSY;
- }
- }
- return 0;
-}
-
-/* This function is intentionally _NOT_ defined as __exit, because
- * it is needed by the init function */
-static void fini(void)
-{
- int i;
- for (i = 0; i < ports_c; i++) {
- DEBUGP("unregistering port %d\n",
- ports[i]);
- ip_conntrack_helper_unregister(&irc_helpers[i]);
- }
- kfree(irc_buffer);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
deleted file mode 100644
index 4e68e16a261..00000000000
--- a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c
+++ /dev/null
@@ -1,142 +0,0 @@
-/*
- * NetBIOS name service broadcast connection tracking helper
- *
- * (c) 2005 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-/*
- * This helper tracks locally originating NetBIOS name service
- * requests by issuing permanent expectations (valid until
- * timing out) matching all reply connections from the
- * destination network. The only NetBIOS specific thing is
- * actually the port number.
- */
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/skbuff.h>
-#include <linux/netdevice.h>
-#include <linux/inetdevice.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <net/route.h>
-
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-
-#define NMBD_PORT 137
-
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_DESCRIPTION("NetBIOS name service broadcast connection tracking helper");
-MODULE_LICENSE("GPL");
-
-static unsigned int timeout = 3;
-module_param(timeout, uint, 0400);
-MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds");
-
-static int help(struct sk_buff **pskb,
- struct ip_conntrack *ct, enum ip_conntrack_info ctinfo)
-{
- struct ip_conntrack_expect *exp;
- struct iphdr *iph = (*pskb)->nh.iph;
- struct rtable *rt = (struct rtable *)(*pskb)->dst;
- struct in_device *in_dev;
- u_int32_t mask = 0;
-
- /* we're only interested in locally generated packets */
- if ((*pskb)->sk == NULL)
- goto out;
- if (rt == NULL || !(rt->rt_flags & RTCF_BROADCAST))
- goto out;
- if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
- goto out;
-
- rcu_read_lock();
- in_dev = __in_dev_get_rcu(rt->u.dst.dev);
- if (in_dev != NULL) {
- for_primary_ifa(in_dev) {
- if (ifa->ifa_broadcast == iph->daddr) {
- mask = ifa->ifa_mask;
- break;
- }
- } endfor_ifa(in_dev);
- }
- rcu_read_unlock();
-
- if (mask == 0)
- goto out;
-
- exp = ip_conntrack_expect_alloc(ct);
- if (exp == NULL)
- goto out;
-
- exp->tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
- exp->tuple.src.u.udp.port = ntohs(NMBD_PORT);
-
- exp->mask.src.ip = mask;
- exp->mask.src.u.udp.port = 0xFFFF;
- exp->mask.dst.ip = 0xFFFFFFFF;
- exp->mask.dst.u.udp.port = 0xFFFF;
- exp->mask.dst.protonum = 0xFF;
-
- exp->expectfn = NULL;
- exp->flags = IP_CT_EXPECT_PERMANENT;
-
- ip_conntrack_expect_related(exp);
- ip_conntrack_expect_put(exp);
-
- ip_ct_refresh(ct, *pskb, timeout * HZ);
-out:
- return NF_ACCEPT;
-}
-
-static struct ip_conntrack_helper helper = {
- .name = "netbios-ns",
- .tuple = {
- .src = {
- .u = {
- .udp = {
- .port = __constant_htons(NMBD_PORT),
- }
- }
- },
- .dst = {
- .protonum = IPPROTO_UDP,
- },
- },
- .mask = {
- .src = {
- .u = {
- .udp = {
- .port = 0xFFFF,
- }
- }
- },
- .dst = {
- .protonum = 0xFF,
- },
- },
- .max_expected = 1,
- .me = THIS_MODULE,
- .help = help,
-};
-
-static int __init init(void)
-{
- helper.timeout = timeout;
- return ip_conntrack_helper_register(&helper);
-}
-
-static void __exit fini(void)
-{
- ip_conntrack_helper_unregister(&helper);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c
deleted file mode 100644
index c9ebbe0d2d9..00000000000
--- a/net/ipv4/netfilter/ip_conntrack_netlink.c
+++ /dev/null
@@ -1,1631 +0,0 @@
-/* Connection tracking via netlink socket. Allows for user space
- * protocol helpers and general trouble making from userspace.
- *
- * (C) 2001 by Jay Schulist <jschlst@samba.org>
- * (C) 2002-2005 by Harald Welte <laforge@gnumonks.org>
- * (C) 2003 by Patrick Mchardy <kaber@trash.net>
- * (C) 2005 by Pablo Neira Ayuso <pablo@eurodev.net>
- *
- * I've reworked this stuff to use attributes instead of conntrack
- * structures. 5.44 am. I need more tea. --pablo 05/07/11.
- *
- * Initial connection tracking via netlink development funded and
- * generally made possible by Network Robots, Inc. (www.networkrobots.com)
- *
- * Further development of this code funded by Astaro AG (http://www.astaro.com)
- *
- * This software may be used and distributed according to the terms
- * of the GNU General Public License, incorporated herein by reference.
- */
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/types.h>
-#include <linux/timer.h>
-#include <linux/skbuff.h>
-#include <linux/errno.h>
-#include <linux/netlink.h>
-#include <linux/spinlock.h>
-#include <linux/interrupt.h>
-#include <linux/notifier.h>
-
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-#include <linux/netfilter_ipv4/ip_conntrack_core.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
-#include <linux/netfilter_ipv4/ip_nat_protocol.h>
-
-#include <linux/netfilter/nfnetlink.h>
-#include <linux/netfilter/nfnetlink_conntrack.h>
-
-MODULE_LICENSE("GPL");
-
-static char __initdata version[] = "0.90";
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-
-static inline int
-ctnetlink_dump_tuples_proto(struct sk_buff *skb,
- const struct ip_conntrack_tuple *tuple)
-{
- struct ip_conntrack_protocol *proto;
- int ret = 0;
-
- NFA_PUT(skb, CTA_PROTO_NUM, sizeof(u_int8_t), &tuple->dst.protonum);
-
- /* If no protocol helper is found, this function will return the
- * generic protocol helper, so proto won't *ever* be NULL */
- proto = ip_conntrack_proto_find_get(tuple->dst.protonum);
- if (likely(proto->tuple_to_nfattr))
- ret = proto->tuple_to_nfattr(skb, tuple);
-
- ip_conntrack_proto_put(proto);
-
- return ret;
-
-nfattr_failure:
- return -1;
-}
-
-static inline int
-ctnetlink_dump_tuples(struct sk_buff *skb,
- const struct ip_conntrack_tuple *tuple)
-{
- struct nfattr *nest_parms;
- int ret;
-
- nest_parms = NFA_NEST(skb, CTA_TUPLE_IP);
- NFA_PUT(skb, CTA_IP_V4_SRC, sizeof(u_int32_t), &tuple->src.ip);
- NFA_PUT(skb, CTA_IP_V4_DST, sizeof(u_int32_t), &tuple->dst.ip);
- NFA_NEST_END(skb, nest_parms);
-
- nest_parms = NFA_NEST(skb, CTA_TUPLE_PROTO);
- ret = ctnetlink_dump_tuples_proto(skb, tuple);
- NFA_NEST_END(skb, nest_parms);
-
- return ret;
-
-nfattr_failure:
- return -1;
-}
-
-static inline int
-ctnetlink_dump_status(struct sk_buff *skb, const struct ip_conntrack *ct)
-{
- u_int32_t status = htonl((u_int32_t) ct->status);
- NFA_PUT(skb, CTA_STATUS, sizeof(status), &status);
- return 0;
-
-nfattr_failure:
- return -1;
-}
-
-static inline int
-ctnetlink_dump_timeout(struct sk_buff *skb, const struct ip_conntrack *ct)
-{
- long timeout_l = ct->timeout.expires - jiffies;
- u_int32_t timeout;
-
- if (timeout_l < 0)
- timeout = 0;
- else
- timeout = htonl(timeout_l / HZ);
-
- NFA_PUT(skb, CTA_TIMEOUT, sizeof(timeout), &timeout);
- return 0;
-
-nfattr_failure:
- return -1;
-}
-
-static inline int
-ctnetlink_dump_protoinfo(struct sk_buff *skb, const struct ip_conntrack *ct)
-{
- struct ip_conntrack_protocol *proto = ip_conntrack_proto_find_get(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum);
-
- struct nfattr *nest_proto;
- int ret;
-
- if (!proto->to_nfattr) {
- ip_conntrack_proto_put(proto);
- return 0;
- }
-
- nest_proto = NFA_NEST(skb, CTA_PROTOINFO);
-
- ret = proto->to_nfattr(skb, nest_proto, ct);
-
- ip_conntrack_proto_put(proto);
-
- NFA_NEST_END(skb, nest_proto);
-
- return ret;
-
-nfattr_failure:
- return -1;
-}
-
-static inline int
-ctnetlink_dump_helpinfo(struct sk_buff *skb, const struct ip_conntrack *ct)
-{
- struct nfattr *nest_helper;
-
- if (!ct->helper)
- return 0;
-
- nest_helper = NFA_NEST(skb, CTA_HELP);
- NFA_PUT(skb, CTA_HELP_NAME, strlen(ct->helper->name), ct->helper->name);
-
- if (ct->helper->to_nfattr)
- ct->helper->to_nfattr(skb, ct);
-
- NFA_NEST_END(skb, nest_helper);
-
- return 0;
-
-nfattr_failure:
- return -1;
-}
-
-#ifdef CONFIG_IP_NF_CT_ACCT
-static inline int
-ctnetlink_dump_counters(struct sk_buff *skb, const struct ip_conntrack *ct,
- enum ip_conntrack_dir dir)
-{
- enum ctattr_type type = dir ? CTA_COUNTERS_REPLY: CTA_COUNTERS_ORIG;
- struct nfattr *nest_count = NFA_NEST(skb, type);
- u_int32_t tmp;
-
- tmp = htonl(ct->counters[dir].packets);
- NFA_PUT(skb, CTA_COUNTERS32_PACKETS, sizeof(u_int32_t), &tmp);
-
- tmp = htonl(ct->counters[dir].bytes);
- NFA_PUT(skb, CTA_COUNTERS32_BYTES, sizeof(u_int32_t), &tmp);
-
- NFA_NEST_END(skb, nest_count);
-
- return 0;
-
-nfattr_failure:
- return -1;
-}
-#else
-#define ctnetlink_dump_counters(a, b, c) (0)
-#endif
-
-#ifdef CONFIG_IP_NF_CONNTRACK_MARK
-static inline int
-ctnetlink_dump_mark(struct sk_buff *skb, const struct ip_conntrack *ct)
-{
- u_int32_t mark = htonl(ct->mark);
-
- NFA_PUT(skb, CTA_MARK, sizeof(u_int32_t), &mark);
- return 0;
-
-nfattr_failure:
- return -1;
-}
-#else
-#define ctnetlink_dump_mark(a, b) (0)
-#endif
-
-static inline int
-ctnetlink_dump_id(struct sk_buff *skb, const struct ip_conntrack *ct)
-{
- u_int32_t id = htonl(ct->id);
- NFA_PUT(skb, CTA_ID, sizeof(u_int32_t), &id);
- return 0;
-
-nfattr_failure:
- return -1;
-}
-
-static inline int
-ctnetlink_dump_use(struct sk_buff *skb, const struct ip_conntrack *ct)
-{
- u_int32_t use = htonl(atomic_read(&ct->ct_general.use));
-
- NFA_PUT(skb, CTA_USE, sizeof(u_int32_t), &use);
- return 0;
-
-nfattr_failure:
- return -1;
-}
-
-#define tuple(ct, dir) (&(ct)->tuplehash[dir].tuple)
-
-static int
-ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
- int event, int nowait,
- const struct ip_conntrack *ct)
-{
- struct nlmsghdr *nlh;
- struct nfgenmsg *nfmsg;
- struct nfattr *nest_parms;
- unsigned char *b;
-
- b = skb->tail;
-
- event |= NFNL_SUBSYS_CTNETLINK << 8;
- nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(struct nfgenmsg));
- nfmsg = NLMSG_DATA(nlh);
-
- nlh->nlmsg_flags = (nowait && pid) ? NLM_F_MULTI : 0;
- nfmsg->nfgen_family = AF_INET;
- nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = 0;
-
- nest_parms = NFA_NEST(skb, CTA_TUPLE_ORIG);
- if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_ORIGINAL)) < 0)
- goto nfattr_failure;
- NFA_NEST_END(skb, nest_parms);
-
- nest_parms = NFA_NEST(skb, CTA_TUPLE_REPLY);
- if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_REPLY)) < 0)
- goto nfattr_failure;
- NFA_NEST_END(skb, nest_parms);
-
- if (ctnetlink_dump_status(skb, ct) < 0 ||
- ctnetlink_dump_timeout(skb, ct) < 0 ||
- ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
- ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0 ||
- ctnetlink_dump_protoinfo(skb, ct) < 0 ||
- ctnetlink_dump_helpinfo(skb, ct) < 0 ||
- ctnetlink_dump_mark(skb, ct) < 0 ||
- ctnetlink_dump_id(skb, ct) < 0 ||
- ctnetlink_dump_use(skb, ct) < 0)
- goto nfattr_failure;
-
- nlh->nlmsg_len = skb->tail - b;
- return skb->len;
-
-nlmsg_failure:
-nfattr_failure:
- skb_trim(skb, b - skb->data);
- return -1;
-}
-
-#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
-static int ctnetlink_conntrack_event(struct notifier_block *this,
- unsigned long events, void *ptr)
-{
- struct nlmsghdr *nlh;
- struct nfgenmsg *nfmsg;
- struct nfattr *nest_parms;
- struct ip_conntrack *ct = (struct ip_conntrack *)ptr;
- struct sk_buff *skb;
- unsigned int type;
- unsigned char *b;
- unsigned int flags = 0, group;
-
- /* ignore our fake conntrack entry */
- if (ct == &ip_conntrack_untracked)
- return NOTIFY_DONE;
-
- if (events & IPCT_DESTROY) {
- type = IPCTNL_MSG_CT_DELETE;
- group = NFNLGRP_CONNTRACK_DESTROY;
- } else if (events & (IPCT_NEW | IPCT_RELATED)) {
- type = IPCTNL_MSG_CT_NEW;
- flags = NLM_F_CREATE|NLM_F_EXCL;
- /* dump everything */
- events = ~0UL;
- group = NFNLGRP_CONNTRACK_NEW;
- } else if (events & (IPCT_STATUS |
- IPCT_PROTOINFO |
- IPCT_HELPER |
- IPCT_HELPINFO |
- IPCT_NATINFO)) {
- type = IPCTNL_MSG_CT_NEW;
- group = NFNLGRP_CONNTRACK_UPDATE;
- } else
- return NOTIFY_DONE;
-
- /* FIXME: Check if there are any listeners before, don't hurt performance */
-
- skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
- if (!skb)
- return NOTIFY_DONE;
-
- b = skb->tail;
-
- type |= NFNL_SUBSYS_CTNETLINK << 8;
- nlh = NLMSG_PUT(skb, 0, 0, type, sizeof(struct nfgenmsg));
- nfmsg = NLMSG_DATA(nlh);
-
- nlh->nlmsg_flags = flags;
- nfmsg->nfgen_family = AF_INET;
- nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = 0;
-
- nest_parms = NFA_NEST(skb, CTA_TUPLE_ORIG);
- if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_ORIGINAL)) < 0)
- goto nfattr_failure;
- NFA_NEST_END(skb, nest_parms);
-
- nest_parms = NFA_NEST(skb, CTA_TUPLE_REPLY);
- if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_REPLY)) < 0)
- goto nfattr_failure;
- NFA_NEST_END(skb, nest_parms);
-
- /* NAT stuff is now a status flag */
- if ((events & IPCT_STATUS || events & IPCT_NATINFO)
- && ctnetlink_dump_status(skb, ct) < 0)
- goto nfattr_failure;
- if (events & IPCT_REFRESH
- && ctnetlink_dump_timeout(skb, ct) < 0)
- goto nfattr_failure;
- if (events & IPCT_PROTOINFO
- && ctnetlink_dump_protoinfo(skb, ct) < 0)
- goto nfattr_failure;
- if (events & IPCT_HELPINFO
- && ctnetlink_dump_helpinfo(skb, ct) < 0)
- goto nfattr_failure;
-
- if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
- ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0)
- goto nfattr_failure;
-
- nlh->nlmsg_len = skb->tail - b;
- nfnetlink_send(skb, 0, group, 0);
- return NOTIFY_DONE;
-
-nlmsg_failure:
-nfattr_failure:
- kfree_skb(skb);
- return NOTIFY_DONE;
-}
-#endif /* CONFIG_IP_NF_CONNTRACK_EVENTS */
-
-static int ctnetlink_done(struct netlink_callback *cb)
-{
- DEBUGP("entered %s\n", __FUNCTION__);
- return 0;
-}
-
-static int
-ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
-{
- struct ip_conntrack *ct = NULL;
- struct ip_conntrack_tuple_hash *h;
- struct list_head *i;
- u_int32_t *id = (u_int32_t *) &cb->args[1];
-
- DEBUGP("entered %s, last bucket=%lu id=%u\n", __FUNCTION__,
- cb->args[0], *id);
-
- read_lock_bh(&ip_conntrack_lock);
- for (; cb->args[0] < ip_conntrack_htable_size; cb->args[0]++, *id = 0) {
- list_for_each_prev(i, &ip_conntrack_hash[cb->args[0]]) {
- h = (struct ip_conntrack_tuple_hash *) i;
- if (DIRECTION(h) != IP_CT_DIR_ORIGINAL)
- continue;
- ct = tuplehash_to_ctrack(h);
- if (ct->id <= *id)
- continue;
- if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid,
- cb->nlh->nlmsg_seq,
- IPCTNL_MSG_CT_NEW,
- 1, ct) < 0)
- goto out;
- *id = ct->id;
- }
- }
-out:
- read_unlock_bh(&ip_conntrack_lock);
-
- DEBUGP("leaving, last bucket=%lu id=%u\n", cb->args[0], *id);
-
- return skb->len;
-}
-
-#ifdef CONFIG_IP_NF_CT_ACCT
-static int
-ctnetlink_dump_table_w(struct sk_buff *skb, struct netlink_callback *cb)
-{
- struct ip_conntrack *ct = NULL;
- struct ip_conntrack_tuple_hash *h;
- struct list_head *i;
- u_int32_t *id = (u_int32_t *) &cb->args[1];
-
- DEBUGP("entered %s, last bucket=%u id=%u\n", __FUNCTION__,
- cb->args[0], *id);
-
- write_lock_bh(&ip_conntrack_lock);
- for (; cb->args[0] < ip_conntrack_htable_size; cb->args[0]++, *id = 0) {
- list_for_each_prev(i, &ip_conntrack_hash[cb->args[0]]) {
- h = (struct ip_conntrack_tuple_hash *) i;
- if (DIRECTION(h) != IP_CT_DIR_ORIGINAL)
- continue;
- ct = tuplehash_to_ctrack(h);
- if (ct->id <= *id)
- continue;
- if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid,
- cb->nlh->nlmsg_seq,
- IPCTNL_MSG_CT_NEW,
- 1, ct) < 0)
- goto out;
- *id = ct->id;
-
- memset(&ct->counters, 0, sizeof(ct->counters));
- }
- }
-out:
- write_unlock_bh(&ip_conntrack_lock);
-
- DEBUGP("leaving, last bucket=%lu id=%u\n", cb->args[0], *id);
-
- return skb->len;
-}
-#endif
-
-static const size_t cta_min_ip[CTA_IP_MAX] = {
- [CTA_IP_V4_SRC-1] = sizeof(u_int32_t),
- [CTA_IP_V4_DST-1] = sizeof(u_int32_t),
-};
-
-static inline int
-ctnetlink_parse_tuple_ip(struct nfattr *attr, struct ip_conntrack_tuple *tuple)
-{
- struct nfattr *tb[CTA_IP_MAX];
-
- DEBUGP("entered %s\n", __FUNCTION__);
-
- nfattr_parse_nested(tb, CTA_IP_MAX, attr);
-
- if (nfattr_bad_size(tb, CTA_IP_MAX, cta_min_ip))
- return -EINVAL;
-
- if (!tb[CTA_IP_V4_SRC-1])
- return -EINVAL;
- tuple->src.ip = *(u_int32_t *)NFA_DATA(tb[CTA_IP_V4_SRC-1]);
-
- if (!tb[CTA_IP_V4_DST-1])
- return -EINVAL;
- tuple->dst.ip = *(u_int32_t *)NFA_DATA(tb[CTA_IP_V4_DST-1]);
-
- DEBUGP("leaving\n");
-
- return 0;
-}
-
-static const size_t cta_min_proto[CTA_PROTO_MAX] = {
- [CTA_PROTO_NUM-1] = sizeof(u_int8_t),
- [CTA_PROTO_SRC_PORT-1] = sizeof(u_int16_t),
- [CTA_PROTO_DST_PORT-1] = sizeof(u_int16_t),
- [CTA_PROTO_ICMP_TYPE-1] = sizeof(u_int8_t),
- [CTA_PROTO_ICMP_CODE-1] = sizeof(u_int8_t),
- [CTA_PROTO_ICMP_ID-1] = sizeof(u_int16_t),
-};
-
-static inline int
-ctnetlink_parse_tuple_proto(struct nfattr *attr,
- struct ip_conntrack_tuple *tuple)
-{
- struct nfattr *tb[CTA_PROTO_MAX];
- struct ip_conntrack_protocol *proto;
- int ret = 0;
-
- DEBUGP("entered %s\n", __FUNCTION__);
-
- nfattr_parse_nested(tb, CTA_PROTO_MAX, attr);
-
- if (nfattr_bad_size(tb, CTA_PROTO_MAX, cta_min_proto))
- return -EINVAL;
-
- if (!tb[CTA_PROTO_NUM-1])
- return -EINVAL;
- tuple->dst.protonum = *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_NUM-1]);
-
- proto = ip_conntrack_proto_find_get(tuple->dst.protonum);
-
- if (likely(proto->nfattr_to_tuple))
- ret = proto->nfattr_to_tuple(tb, tuple);
-
- ip_conntrack_proto_put(proto);
-
- return ret;
-}
-
-static inline int
-ctnetlink_parse_tuple(struct nfattr *cda[], struct ip_conntrack_tuple *tuple,
- enum ctattr_tuple type)
-{
- struct nfattr *tb[CTA_TUPLE_MAX];
- int err;
-
- DEBUGP("entered %s\n", __FUNCTION__);
-
- memset(tuple, 0, sizeof(*tuple));
-
- nfattr_parse_nested(tb, CTA_TUPLE_MAX, cda[type-1]);
-
- if (!tb[CTA_TUPLE_IP-1])
- return -EINVAL;
-
- err = ctnetlink_parse_tuple_ip(tb[CTA_TUPLE_IP-1], tuple);
- if (err < 0)
- return err;
-
- if (!tb[CTA_TUPLE_PROTO-1])
- return -EINVAL;
-
- err = ctnetlink_parse_tuple_proto(tb[CTA_TUPLE_PROTO-1], tuple);
- if (err < 0)
- return err;
-
- /* orig and expect tuples get DIR_ORIGINAL */
- if (type == CTA_TUPLE_REPLY)
- tuple->dst.dir = IP_CT_DIR_REPLY;
- else
- tuple->dst.dir = IP_CT_DIR_ORIGINAL;
-
- DUMP_TUPLE(tuple);
-
- DEBUGP("leaving\n");
-
- return 0;
-}
-
-#ifdef CONFIG_IP_NF_NAT_NEEDED
-static const size_t cta_min_protonat[CTA_PROTONAT_MAX] = {
- [CTA_PROTONAT_PORT_MIN-1] = sizeof(u_int16_t),
- [CTA_PROTONAT_PORT_MAX-1] = sizeof(u_int16_t),
-};
-
-static int ctnetlink_parse_nat_proto(struct nfattr *attr,
- const struct ip_conntrack *ct,
- struct ip_nat_range *range)
-{
- struct nfattr *tb[CTA_PROTONAT_MAX];
- struct ip_nat_protocol *npt;
-
- DEBUGP("entered %s\n", __FUNCTION__);
-
- nfattr_parse_nested(tb, CTA_PROTONAT_MAX, attr);
-
- if (nfattr_bad_size(tb, CTA_PROTONAT_MAX, cta_min_protonat))
- return -EINVAL;
-
- npt = ip_nat_proto_find_get(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum);
-
- if (!npt->nfattr_to_range) {
- ip_nat_proto_put(npt);
- return 0;
- }
-
- /* nfattr_to_range returns 1 if it parsed, 0 if not, neg. on error */
- if (npt->nfattr_to_range(tb, range) > 0)
- range->flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
-
- ip_nat_proto_put(npt);
-
- DEBUGP("leaving\n");
- return 0;
-}
-
-static const size_t cta_min_nat[CTA_NAT_MAX] = {
- [CTA_NAT_MINIP-1] = sizeof(u_int32_t),
- [CTA_NAT_MAXIP-1] = sizeof(u_int32_t),
-};
-
-static inline int
-ctnetlink_parse_nat(struct nfattr *cda[],
- const struct ip_conntrack *ct, struct ip_nat_range *range)
-{
- struct nfattr *tb[CTA_NAT_MAX];
- int err;
-
- DEBUGP("entered %s\n", __FUNCTION__);
-
- memset(range, 0, sizeof(*range));
-
- nfattr_parse_nested(tb, CTA_NAT_MAX, cda[CTA_NAT-1]);
-
- if (nfattr_bad_size(tb, CTA_NAT_MAX, cta_min_nat))
- return -EINVAL;
-
- if (tb[CTA_NAT_MINIP-1])
- range->min_ip = *(u_int32_t *)NFA_DATA(tb[CTA_NAT_MINIP-1]);
-
- if (!tb[CTA_NAT_MAXIP-1])
- range->max_ip = range->min_ip;
- else
- range->max_ip = *(u_int32_t *)NFA_DATA(tb[CTA_NAT_MAXIP-1]);
-
- if (range->min_ip)
- range->flags |= IP_NAT_RANGE_MAP_IPS;
-
- if (!tb[CTA_NAT_PROTO-1])
- return 0;
-
- err = ctnetlink_parse_nat_proto(tb[CTA_NAT_PROTO-1], ct, range);
- if (err < 0)
- return err;
-
- DEBUGP("leaving\n");
- return 0;
-}
-#endif
-
-static inline int
-ctnetlink_parse_help(struct nfattr *attr, char **helper_name)
-{
- struct nfattr *tb[CTA_HELP_MAX];
-
- DEBUGP("entered %s\n", __FUNCTION__);
-
- nfattr_parse_nested(tb, CTA_HELP_MAX, attr);
-
- if (!tb[CTA_HELP_NAME-1])
- return -EINVAL;
-
- *helper_name = NFA_DATA(tb[CTA_HELP_NAME-1]);
-
- return 0;
-}
-
-static const size_t cta_min[CTA_MAX] = {
- [CTA_STATUS-1] = sizeof(u_int32_t),
- [CTA_TIMEOUT-1] = sizeof(u_int32_t),
- [CTA_MARK-1] = sizeof(u_int32_t),
- [CTA_USE-1] = sizeof(u_int32_t),
- [CTA_ID-1] = sizeof(u_int32_t)
-};
-
-static int
-ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
- struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
-{
- struct ip_conntrack_tuple_hash *h;
- struct ip_conntrack_tuple tuple;
- struct ip_conntrack *ct;
- int err = 0;
-
- DEBUGP("entered %s\n", __FUNCTION__);
-
- if (nfattr_bad_size(cda, CTA_MAX, cta_min))
- return -EINVAL;
-
- if (cda[CTA_TUPLE_ORIG-1])
- err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG);
- else if (cda[CTA_TUPLE_REPLY-1])
- err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY);
- else {
- /* Flush the whole table */
- ip_conntrack_flush();
- return 0;
- }
-
- if (err < 0)
- return err;
-
- h = ip_conntrack_find_get(&tuple, NULL);
- if (!h) {
- DEBUGP("tuple not found in conntrack hash\n");
- return -ENOENT;
- }
-
- ct = tuplehash_to_ctrack(h);
-
- if (cda[CTA_ID-1]) {
- u_int32_t id = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_ID-1]));
- if (ct->id != id) {
- ip_conntrack_put(ct);
- return -ENOENT;
- }
- }
- if (del_timer(&ct->timeout))
- ct->timeout.function((unsigned long)ct);
-
- ip_conntrack_put(ct);
- DEBUGP("leaving\n");
-
- return 0;
-}
-
-static int
-ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb,
- struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
-{
- struct ip_conntrack_tuple_hash *h;
- struct ip_conntrack_tuple tuple;
- struct ip_conntrack *ct;
- struct sk_buff *skb2 = NULL;
- int err = 0;
-
- DEBUGP("entered %s\n", __FUNCTION__);
-
- if (nlh->nlmsg_flags & NLM_F_DUMP) {
- struct nfgenmsg *msg = NLMSG_DATA(nlh);
- u32 rlen;
-
- if (msg->nfgen_family != AF_INET)
- return -EAFNOSUPPORT;
-
- if (NFNL_MSG_TYPE(nlh->nlmsg_type) ==
- IPCTNL_MSG_CT_GET_CTRZERO) {
-#ifdef CONFIG_IP_NF_CT_ACCT
- if ((*errp = netlink_dump_start(ctnl, skb, nlh,
- ctnetlink_dump_table_w,
- ctnetlink_done)) != 0)
- return -EINVAL;
-#else
- return -ENOTSUPP;
-#endif
- } else {
- if ((*errp = netlink_dump_start(ctnl, skb, nlh,
- ctnetlink_dump_table,
- ctnetlink_done)) != 0)
- return -EINVAL;
- }
-
- rlen = NLMSG_ALIGN(nlh->nlmsg_len);
- if (rlen > skb->len)
- rlen = skb->len;
- skb_pull(skb, rlen);
- return 0;
- }
-
- if (nfattr_bad_size(cda, CTA_MAX, cta_min))
- return -EINVAL;
-
- if (cda[CTA_TUPLE_ORIG-1])
- err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG);
- else if (cda[CTA_TUPLE_REPLY-1])
- err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY);
- else
- return -EINVAL;
-
- if (err < 0)
- return err;
-
- h = ip_conntrack_find_get(&tuple, NULL);
- if (!h) {
- DEBUGP("tuple not found in conntrack hash");
- return -ENOENT;
- }
- DEBUGP("tuple found\n");
- ct = tuplehash_to_ctrack(h);
-
- err = -ENOMEM;
- skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
- if (!skb2) {
- ip_conntrack_put(ct);
- return -ENOMEM;
- }
- NETLINK_CB(skb2).dst_pid = NETLINK_CB(skb).pid;
-
- err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq,
- IPCTNL_MSG_CT_NEW, 1, ct);
- ip_conntrack_put(ct);
- if (err <= 0)
- goto free;
-
- err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
- if (err < 0)
- goto out;
-
- DEBUGP("leaving\n");
- return 0;
-
-free:
- kfree_skb(skb2);
-out:
- return err;
-}
-
-static inline int
-ctnetlink_change_status(struct ip_conntrack *ct, struct nfattr *cda[])
-{
- unsigned long d;
- unsigned status = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_STATUS-1]));
- d = ct->status ^ status;
-
- if (d & (IPS_EXPECTED|IPS_CONFIRMED|IPS_DYING))
- /* unchangeable */
- return -EINVAL;
-
- if (d & IPS_SEEN_REPLY && !(status & IPS_SEEN_REPLY))
- /* SEEN_REPLY bit can only be set */
- return -EINVAL;
-
-
- if (d & IPS_ASSURED && !(status & IPS_ASSURED))
- /* ASSURED bit can only be set */
- return -EINVAL;
-
- if (cda[CTA_NAT-1]) {
-#ifndef CONFIG_IP_NF_NAT_NEEDED
- return -EINVAL;
-#else
- unsigned int hooknum;
- struct ip_nat_range range;
-
- if (ctnetlink_parse_nat(cda, ct, &range) < 0)
- return -EINVAL;
-
- DEBUGP("NAT: %u.%u.%u.%u-%u.%u.%u.%u:%u-%u\n",
- NIPQUAD(range.min_ip), NIPQUAD(range.max_ip),
- htons(range.min.all), htons(range.max.all));
-
- /* This is tricky but it works. ip_nat_setup_info needs the
- * hook number as parameter, so let's do the correct
- * conversion and run away */
- if (status & IPS_SRC_NAT_DONE)
- hooknum = NF_IP_POST_ROUTING; /* IP_NAT_MANIP_SRC */
- else if (status & IPS_DST_NAT_DONE)
- hooknum = NF_IP_PRE_ROUTING; /* IP_NAT_MANIP_DST */
- else
- return -EINVAL; /* Missing NAT flags */
-
- DEBUGP("NAT status: %lu\n",
- status & (IPS_NAT_MASK | IPS_NAT_DONE_MASK));
-
- if (ip_nat_initialized(ct, HOOK2MANIP(hooknum)))
- return -EEXIST;
- ip_nat_setup_info(ct, &range, hooknum);
-
- DEBUGP("NAT status after setup_info: %lu\n",
- ct->status & (IPS_NAT_MASK | IPS_NAT_DONE_MASK));
-#endif
- }
-
- /* Be careful here, modifying NAT bits can screw up things,
- * so don't let users modify them directly if they don't pass
- * ip_nat_range. */
- ct->status |= status & ~(IPS_NAT_DONE_MASK | IPS_NAT_MASK);
- return 0;
-}
-
-
-static inline int
-ctnetlink_change_helper(struct ip_conntrack *ct, struct nfattr *cda[])
-{
- struct ip_conntrack_helper *helper;
- char *helpname;
- int err;
-
- DEBUGP("entered %s\n", __FUNCTION__);
-
- /* don't change helper of sibling connections */
- if (ct->master)
- return -EINVAL;
-
- err = ctnetlink_parse_help(cda[CTA_HELP-1], &helpname);
- if (err < 0)
- return err;
-
- helper = __ip_conntrack_helper_find_byname(helpname);
- if (!helper) {
- if (!strcmp(helpname, ""))
- helper = NULL;
- else
- return -EINVAL;
- }
-
- if (ct->helper) {
- if (!helper) {
- /* we had a helper before ... */
- ip_ct_remove_expectations(ct);
- ct->helper = NULL;
- } else {
- /* need to zero data of old helper */
- memset(&ct->help, 0, sizeof(ct->help));
- }
- }
-
- ct->helper = helper;
-
- return 0;
-}
-
-static inline int
-ctnetlink_change_timeout(struct ip_conntrack *ct, struct nfattr *cda[])
-{
- u_int32_t timeout = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_TIMEOUT-1]));
-
- if (!del_timer(&ct->timeout))
- return -ETIME;
-
- ct->timeout.expires = jiffies + timeout * HZ;
- add_timer(&ct->timeout);
-
- return 0;
-}
-
-static inline int
-ctnetlink_change_protoinfo(struct ip_conntrack *ct, struct nfattr *cda[])
-{
- struct nfattr *tb[CTA_PROTOINFO_MAX], *attr = cda[CTA_PROTOINFO-1];
- struct ip_conntrack_protocol *proto;
- u_int16_t npt = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum;
- int err = 0;
-
- nfattr_parse_nested(tb, CTA_PROTOINFO_MAX, attr);
-
- proto = ip_conntrack_proto_find_get(npt);
-
- if (proto->from_nfattr)
- err = proto->from_nfattr(tb, ct);
- ip_conntrack_proto_put(proto);
-
- return err;
-}
-
-static int
-ctnetlink_change_conntrack(struct ip_conntrack *ct, struct nfattr *cda[])
-{
- int err;
-
- DEBUGP("entered %s\n", __FUNCTION__);
-
- if (cda[CTA_HELP-1]) {
- err = ctnetlink_change_helper(ct, cda);
- if (err < 0)
- return err;
- }
-
- if (cda[CTA_TIMEOUT-1]) {
- err = ctnetlink_change_timeout(ct, cda);
- if (err < 0)
- return err;
- }
-
- if (cda[CTA_STATUS-1]) {
- err = ctnetlink_change_status(ct, cda);
- if (err < 0)
- return err;
- }
-
- if (cda[CTA_PROTOINFO-1]) {
- err = ctnetlink_change_protoinfo(ct, cda);
- if (err < 0)
- return err;
- }
-
-#if defined(CONFIG_IP_NF_CONNTRACK_MARK)
- if (cda[CTA_MARK-1])
- ct->mark = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_MARK-1]));
-#endif
-
- DEBUGP("all done\n");
- return 0;
-}
-
-static int
-ctnetlink_create_conntrack(struct nfattr *cda[],
- struct ip_conntrack_tuple *otuple,
- struct ip_conntrack_tuple *rtuple)
-{
- struct ip_conntrack *ct;
- int err = -EINVAL;
-
- DEBUGP("entered %s\n", __FUNCTION__);
-
- ct = ip_conntrack_alloc(otuple, rtuple);
- if (ct == NULL || IS_ERR(ct))
- return -ENOMEM;
-
- if (!cda[CTA_TIMEOUT-1])
- goto err;
- ct->timeout.expires = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_TIMEOUT-1]));
-
- ct->timeout.expires = jiffies + ct->timeout.expires * HZ;
- ct->status |= IPS_CONFIRMED;
-
- err = ctnetlink_change_status(ct, cda);
- if (err < 0)
- goto err;
-
- if (cda[CTA_PROTOINFO-1]) {
- err = ctnetlink_change_protoinfo(ct, cda);
- if (err < 0)
- return err;
- }
-
-#if defined(CONFIG_IP_NF_CONNTRACK_MARK)
- if (cda[CTA_MARK-1])
- ct->mark = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_MARK-1]));
-#endif
-
- ct->helper = ip_conntrack_helper_find_get(rtuple);
-
- add_timer(&ct->timeout);
- ip_conntrack_hash_insert(ct);
-
- if (ct->helper)
- ip_conntrack_helper_put(ct->helper);
-
- DEBUGP("conntrack with id %u inserted\n", ct->id);
- return 0;
-
-err:
- ip_conntrack_free(ct);
- return err;
-}
-
-static int
-ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
- struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
-{
- struct ip_conntrack_tuple otuple, rtuple;
- struct ip_conntrack_tuple_hash *h = NULL;
- int err = 0;
-
- DEBUGP("entered %s\n", __FUNCTION__);
-
- if (nfattr_bad_size(cda, CTA_MAX, cta_min))
- return -EINVAL;
-
- if (cda[CTA_TUPLE_ORIG-1]) {
- err = ctnetlink_parse_tuple(cda, &otuple, CTA_TUPLE_ORIG);
- if (err < 0)
- return err;
- }
-
- if (cda[CTA_TUPLE_REPLY-1]) {
- err = ctnetlink_parse_tuple(cda, &rtuple, CTA_TUPLE_REPLY);
- if (err < 0)
- return err;
- }
-
- write_lock_bh(&ip_conntrack_lock);
- if (cda[CTA_TUPLE_ORIG-1])
- h = __ip_conntrack_find(&otuple, NULL);
- else if (cda[CTA_TUPLE_REPLY-1])
- h = __ip_conntrack_find(&rtuple, NULL);
-
- if (h == NULL) {
- write_unlock_bh(&ip_conntrack_lock);
- DEBUGP("no such conntrack, create new\n");
- err = -ENOENT;
- if (nlh->nlmsg_flags & NLM_F_CREATE)
- err = ctnetlink_create_conntrack(cda, &otuple, &rtuple);
- return err;
- }
- /* implicit 'else' */
-
- /* we only allow nat config for new conntracks */
- if (cda[CTA_NAT-1]) {
- err = -EINVAL;
- goto out_unlock;
- }
-
- /* We manipulate the conntrack inside the global conntrack table lock,
- * so there's no need to increase the refcount */
- DEBUGP("conntrack found\n");
- err = -EEXIST;
- if (!(nlh->nlmsg_flags & NLM_F_EXCL))
- err = ctnetlink_change_conntrack(tuplehash_to_ctrack(h), cda);
-
-out_unlock:
- write_unlock_bh(&ip_conntrack_lock);
- return err;
-}
-
-/***********************************************************************
- * EXPECT
- ***********************************************************************/
-
-static inline int
-ctnetlink_exp_dump_tuple(struct sk_buff *skb,
- const struct ip_conntrack_tuple *tuple,
- enum ctattr_expect type)
-{
- struct nfattr *nest_parms = NFA_NEST(skb, type);
-
- if (ctnetlink_dump_tuples(skb, tuple) < 0)
- goto nfattr_failure;
-
- NFA_NEST_END(skb, nest_parms);
-
- return 0;
-
-nfattr_failure:
- return -1;
-}
-
-static inline int
-ctnetlink_exp_dump_expect(struct sk_buff *skb,
- const struct ip_conntrack_expect *exp)
-{
- struct ip_conntrack *master = exp->master;
- u_int32_t timeout = htonl((exp->timeout.expires - jiffies) / HZ);
- u_int32_t id = htonl(exp->id);
-
- if (ctnetlink_exp_dump_tuple(skb, &exp->tuple, CTA_EXPECT_TUPLE) < 0)
- goto nfattr_failure;
- if (ctnetlink_exp_dump_tuple(skb, &exp->mask, CTA_EXPECT_MASK) < 0)
- goto nfattr_failure;
- if (ctnetlink_exp_dump_tuple(skb,
- &master->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
- CTA_EXPECT_MASTER) < 0)
- goto nfattr_failure;
-
- NFA_PUT(skb, CTA_EXPECT_TIMEOUT, sizeof(timeout), &timeout);
- NFA_PUT(skb, CTA_EXPECT_ID, sizeof(u_int32_t), &id);
-
- return 0;
-
-nfattr_failure:
- return -1;
-}
-
-static int
-ctnetlink_exp_fill_info(struct sk_buff *skb, u32 pid, u32 seq,
- int event,
- int nowait,
- const struct ip_conntrack_expect *exp)
-{
- struct nlmsghdr *nlh;
- struct nfgenmsg *nfmsg;
- unsigned char *b;
-
- b = skb->tail;
-
- event |= NFNL_SUBSYS_CTNETLINK_EXP << 8;
- nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(struct nfgenmsg));
- nfmsg = NLMSG_DATA(nlh);
-
- nlh->nlmsg_flags = (nowait && pid) ? NLM_F_MULTI : 0;
- nfmsg->nfgen_family = AF_INET;
- nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = 0;
-
- if (ctnetlink_exp_dump_expect(skb, exp) < 0)
- goto nfattr_failure;
-
- nlh->nlmsg_len = skb->tail - b;
- return skb->len;
-
-nlmsg_failure:
-nfattr_failure:
- skb_trim(skb, b - skb->data);
- return -1;
-}
-
-#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
-static int ctnetlink_expect_event(struct notifier_block *this,
- unsigned long events, void *ptr)
-{
- struct nlmsghdr *nlh;
- struct nfgenmsg *nfmsg;
- struct ip_conntrack_expect *exp = (struct ip_conntrack_expect *)ptr;
- struct sk_buff *skb;
- unsigned int type;
- unsigned char *b;
- int flags = 0;
-
- if (events & IPEXP_NEW) {
- type = IPCTNL_MSG_EXP_NEW;
- flags = NLM_F_CREATE|NLM_F_EXCL;
- } else
- return NOTIFY_DONE;
-
- skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
- if (!skb)
- return NOTIFY_DONE;
-
- b = skb->tail;
-
- type |= NFNL_SUBSYS_CTNETLINK << 8;
- nlh = NLMSG_PUT(skb, 0, 0, type, sizeof(struct nfgenmsg));
- nfmsg = NLMSG_DATA(nlh);
-
- nlh->nlmsg_flags = flags;
- nfmsg->nfgen_family = AF_INET;
- nfmsg->version = NFNETLINK_V0;
- nfmsg->res_id = 0;
-
- if (ctnetlink_exp_dump_expect(skb, exp) < 0)
- goto nfattr_failure;
-
- nlh->nlmsg_len = skb->tail - b;
- nfnetlink_send(skb, 0, NFNLGRP_CONNTRACK_EXP_NEW, 0);
- return NOTIFY_DONE;
-
-nlmsg_failure:
-nfattr_failure:
- kfree_skb(skb);
- return NOTIFY_DONE;
-}
-#endif
-
-static int
-ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
-{
- struct ip_conntrack_expect *exp = NULL;
- struct list_head *i;
- u_int32_t *id = (u_int32_t *) &cb->args[0];
-
- DEBUGP("entered %s, last id=%llu\n", __FUNCTION__, *id);
-
- read_lock_bh(&ip_conntrack_lock);
- list_for_each_prev(i, &ip_conntrack_expect_list) {
- exp = (struct ip_conntrack_expect *) i;
- if (exp->id <= *id)
- continue;
- if (ctnetlink_exp_fill_info(skb, NETLINK_CB(cb->skb).pid,
- cb->nlh->nlmsg_seq,
- IPCTNL_MSG_EXP_NEW,
- 1, exp) < 0)
- goto out;
- *id = exp->id;
- }
-out:
- read_unlock_bh(&ip_conntrack_lock);
-
- DEBUGP("leaving, last id=%llu\n", *id);
-
- return skb->len;
-}
-
-static const size_t cta_min_exp[CTA_EXPECT_MAX] = {
- [CTA_EXPECT_TIMEOUT-1] = sizeof(u_int32_t),
- [CTA_EXPECT_ID-1] = sizeof(u_int32_t)
-};
-
-static int
-ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb,
- struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
-{
- struct ip_conntrack_tuple tuple;
- struct ip_conntrack_expect *exp;
- struct sk_buff *skb2;
- int err = 0;
-
- DEBUGP("entered %s\n", __FUNCTION__);
-
- if (nfattr_bad_size(cda, CTA_EXPECT_MAX, cta_min_exp))
- return -EINVAL;
-
- if (nlh->nlmsg_flags & NLM_F_DUMP) {
- struct nfgenmsg *msg = NLMSG_DATA(nlh);
- u32 rlen;
-
- if (msg->nfgen_family != AF_INET)
- return -EAFNOSUPPORT;
-
- if ((*errp = netlink_dump_start(ctnl, skb, nlh,
- ctnetlink_exp_dump_table,
- ctnetlink_done)) != 0)
- return -EINVAL;
- rlen = NLMSG_ALIGN(nlh->nlmsg_len);
- if (rlen > skb->len)
- rlen = skb->len;
- skb_pull(skb, rlen);
- return 0;
- }
-
- if (cda[CTA_EXPECT_MASTER-1])
- err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER);
- else
- return -EINVAL;
-
- if (err < 0)
- return err;
-
- exp = ip_conntrack_expect_find(&tuple);
- if (!exp)
- return -ENOENT;
-
- if (cda[CTA_EXPECT_ID-1]) {
- u_int32_t id = *(u_int32_t *)NFA_DATA(cda[CTA_EXPECT_ID-1]);
- if (exp->id != ntohl(id)) {
- ip_conntrack_expect_put(exp);
- return -ENOENT;
- }
- }
-
- err = -ENOMEM;
- skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
- if (!skb2)
- goto out;
- NETLINK_CB(skb2).dst_pid = NETLINK_CB(skb).pid;
-
- err = ctnetlink_exp_fill_info(skb2, NETLINK_CB(skb).pid,
- nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW,
- 1, exp);
- if (err <= 0)
- goto free;
-
- ip_conntrack_expect_put(exp);
-
- return netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT);
-
-free:
- kfree_skb(skb2);
-out:
- ip_conntrack_expect_put(exp);
- return err;
-}
-
-static int
-ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb,
- struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
-{
- struct ip_conntrack_expect *exp, *tmp;
- struct ip_conntrack_tuple tuple;
- struct ip_conntrack_helper *h;
- int err;
-
- if (nfattr_bad_size(cda, CTA_EXPECT_MAX, cta_min_exp))
- return -EINVAL;
-
- if (cda[CTA_EXPECT_TUPLE-1]) {
- /* delete a single expect by tuple */
- err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE);
- if (err < 0)
- return err;
-
- /* bump usage count to 2 */
- exp = ip_conntrack_expect_find(&tuple);
- if (!exp)
- return -ENOENT;
-
- if (cda[CTA_EXPECT_ID-1]) {
- u_int32_t id =
- *(u_int32_t *)NFA_DATA(cda[CTA_EXPECT_ID-1]);
- if (exp->id != ntohl(id)) {
- ip_conntrack_expect_put(exp);
- return -ENOENT;
- }
- }
-
- /* after list removal, usage count == 1 */
- ip_conntrack_unexpect_related(exp);
- /* have to put what we 'get' above.
- * after this line usage count == 0 */
- ip_conntrack_expect_put(exp);
- } else if (cda[CTA_EXPECT_HELP_NAME-1]) {
- char *name = NFA_DATA(cda[CTA_EXPECT_HELP_NAME-1]);
-
- /* delete all expectations for this helper */
- write_lock_bh(&ip_conntrack_lock);
- h = __ip_conntrack_helper_find_byname(name);
- if (!h) {
- write_unlock_bh(&ip_conntrack_lock);
- return -EINVAL;
- }
- list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list,
- list) {
- if (exp->master->helper == h
- && del_timer(&exp->timeout)) {
- ip_ct_unlink_expect(exp);
- ip_conntrack_expect_put(exp);
- }
- }
- write_unlock_bh(&ip_conntrack_lock);
- } else {
- /* This basically means we have to flush everything*/
- write_lock_bh(&ip_conntrack_lock);
- list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list,
- list) {
- if (del_timer(&exp->timeout)) {
- ip_ct_unlink_expect(exp);
- ip_conntrack_expect_put(exp);
- }
- }
- write_unlock_bh(&ip_conntrack_lock);
- }
-
- return 0;
-}
-static int
-ctnetlink_change_expect(struct ip_conntrack_expect *x, struct nfattr *cda[])
-{
- return -EOPNOTSUPP;
-}
-
-static int
-ctnetlink_create_expect(struct nfattr *cda[])
-{
- struct ip_conntrack_tuple tuple, mask, master_tuple;
- struct ip_conntrack_tuple_hash *h = NULL;
- struct ip_conntrack_expect *exp;
- struct ip_conntrack *ct;
- int err = 0;
-
- DEBUGP("entered %s\n", __FUNCTION__);
-
- /* caller guarantees that those three CTA_EXPECT_* exist */
- err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE);
- if (err < 0)
- return err;
- err = ctnetlink_parse_tuple(cda, &mask, CTA_EXPECT_MASK);
- if (err < 0)
- return err;
- err = ctnetlink_parse_tuple(cda, &master_tuple, CTA_EXPECT_MASTER);
- if (err < 0)
- return err;
-
- /* Look for master conntrack of this expectation */
- h = ip_conntrack_find_get(&master_tuple, NULL);
- if (!h)
- return -ENOENT;
- ct = tuplehash_to_ctrack(h);
-
- if (!ct->helper) {
- /* such conntrack hasn't got any helper, abort */
- err = -EINVAL;
- goto out;
- }
-
- exp = ip_conntrack_expect_alloc(ct);
- if (!exp) {
- err = -ENOMEM;
- goto out;
- }
-
- exp->expectfn = NULL;
- exp->flags = 0;
- exp->master = ct;
- memcpy(&exp->tuple, &tuple, sizeof(struct ip_conntrack_tuple));
- memcpy(&exp->mask, &mask, sizeof(struct ip_conntrack_tuple));
-
- err = ip_conntrack_expect_related(exp);
- ip_conntrack_expect_put(exp);
-
-out:
- ip_conntrack_put(tuplehash_to_ctrack(h));
- return err;
-}
-
-static int
-ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb,
- struct nlmsghdr *nlh, struct nfattr *cda[], int *errp)
-{
- struct ip_conntrack_tuple tuple;
- struct ip_conntrack_expect *exp;
- int err = 0;
-
- DEBUGP("entered %s\n", __FUNCTION__);
-
- if (nfattr_bad_size(cda, CTA_EXPECT_MAX, cta_min_exp))
- return -EINVAL;
-
- if (!cda[CTA_EXPECT_TUPLE-1]
- || !cda[CTA_EXPECT_MASK-1]
- || !cda[CTA_EXPECT_MASTER-1])
- return -EINVAL;
-
- err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE);
- if (err < 0)
- return err;
-
- write_lock_bh(&ip_conntrack_lock);
- exp = __ip_conntrack_expect_find(&tuple);
-
- if (!exp) {
- write_unlock_bh(&ip_conntrack_lock);
- err = -ENOENT;
- if (nlh->nlmsg_flags & NLM_F_CREATE)
- err = ctnetlink_create_expect(cda);
- return err;
- }
-
- err = -EEXIST;
- if (!(nlh->nlmsg_flags & NLM_F_EXCL))
- err = ctnetlink_change_expect(exp, cda);
- write_unlock_bh(&ip_conntrack_lock);
-
- DEBUGP("leaving\n");
-
- return err;
-}
-
-#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
-static struct notifier_block ctnl_notifier = {
- .notifier_call = ctnetlink_conntrack_event,
-};
-
-static struct notifier_block ctnl_notifier_exp = {
- .notifier_call = ctnetlink_expect_event,
-};
-#endif
-
-static struct nfnl_callback ctnl_cb[IPCTNL_MSG_MAX] = {
- [IPCTNL_MSG_CT_NEW] = { .call = ctnetlink_new_conntrack,
- .attr_count = CTA_MAX, },
- [IPCTNL_MSG_CT_GET] = { .call = ctnetlink_get_conntrack,
- .attr_count = CTA_MAX, },
- [IPCTNL_MSG_CT_DELETE] = { .call = ctnetlink_del_conntrack,
- .attr_count = CTA_MAX, },
- [IPCTNL_MSG_CT_GET_CTRZERO] = { .call = ctnetlink_get_conntrack,
- .attr_count = CTA_MAX, },
-};
-
-static struct nfnl_callback ctnl_exp_cb[IPCTNL_MSG_EXP_MAX] = {
- [IPCTNL_MSG_EXP_GET] = { .call = ctnetlink_get_expect,
- .attr_count = CTA_EXPECT_MAX, },
- [IPCTNL_MSG_EXP_NEW] = { .call = ctnetlink_new_expect,
- .attr_count = CTA_EXPECT_MAX, },
- [IPCTNL_MSG_EXP_DELETE] = { .call = ctnetlink_del_expect,
- .attr_count = CTA_EXPECT_MAX, },
-};
-
-static struct nfnetlink_subsystem ctnl_subsys = {
- .name = "conntrack",
- .subsys_id = NFNL_SUBSYS_CTNETLINK,
- .cb_count = IPCTNL_MSG_MAX,
- .cb = ctnl_cb,
-};
-
-static struct nfnetlink_subsystem ctnl_exp_subsys = {
- .name = "conntrack_expect",
- .subsys_id = NFNL_SUBSYS_CTNETLINK_EXP,
- .cb_count = IPCTNL_MSG_EXP_MAX,
- .cb = ctnl_exp_cb,
-};
-
-MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK);
-
-static int __init ctnetlink_init(void)
-{
- int ret;
-
- printk("ctnetlink v%s: registering with nfnetlink.\n", version);
- ret = nfnetlink_subsys_register(&ctnl_subsys);
- if (ret < 0) {
- printk("ctnetlink_init: cannot register with nfnetlink.\n");
- goto err_out;
- }
-
- ret = nfnetlink_subsys_register(&ctnl_exp_subsys);
- if (ret < 0) {
- printk("ctnetlink_init: cannot register exp with nfnetlink.\n");
- goto err_unreg_subsys;
- }
-
-#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
- ret = ip_conntrack_register_notifier(&ctnl_notifier);
- if (ret < 0) {
- printk("ctnetlink_init: cannot register notifier.\n");
- goto err_unreg_exp_subsys;
- }
-
- ret = ip_conntrack_expect_register_notifier(&ctnl_notifier_exp);
- if (ret < 0) {
- printk("ctnetlink_init: cannot expect register notifier.\n");
- goto err_unreg_notifier;
- }
-#endif
-
- return 0;
-
-#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
-err_unreg_notifier:
- ip_conntrack_unregister_notifier(&ctnl_notifier);
-err_unreg_exp_subsys:
- nfnetlink_subsys_unregister(&ctnl_exp_subsys);
-#endif
-err_unreg_subsys:
- nfnetlink_subsys_unregister(&ctnl_subsys);
-err_out:
- return ret;
-}
-
-static void __exit ctnetlink_exit(void)
-{
- printk("ctnetlink: unregistering from nfnetlink.\n");
-
-#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
- ip_conntrack_unregister_notifier(&ctnl_notifier_exp);
- ip_conntrack_unregister_notifier(&ctnl_notifier);
-#endif
-
- nfnetlink_subsys_unregister(&ctnl_exp_subsys);
- nfnetlink_subsys_unregister(&ctnl_subsys);
- return;
-}
-
-module_init(ctnetlink_init);
-module_exit(ctnetlink_exit);
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_generic.c b/net/ipv4/netfilter/ip_conntrack_proto_generic.c
deleted file mode 100644
index f891308b5e4..00000000000
--- a/net/ipv4/netfilter/ip_conntrack_proto_generic.c
+++ /dev/null
@@ -1,75 +0,0 @@
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/sched.h>
-#include <linux/timer.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
-
-unsigned int ip_ct_generic_timeout = 600*HZ;
-
-static int generic_pkt_to_tuple(const struct sk_buff *skb,
- unsigned int dataoff,
- struct ip_conntrack_tuple *tuple)
-{
- tuple->src.u.all = 0;
- tuple->dst.u.all = 0;
-
- return 1;
-}
-
-static int generic_invert_tuple(struct ip_conntrack_tuple *tuple,
- const struct ip_conntrack_tuple *orig)
-{
- tuple->src.u.all = 0;
- tuple->dst.u.all = 0;
-
- return 1;
-}
-
-/* Print out the per-protocol part of the tuple. */
-static int generic_print_tuple(struct seq_file *s,
- const struct ip_conntrack_tuple *tuple)
-{
- return 0;
-}
-
-/* Print out the private part of the conntrack. */
-static int generic_print_conntrack(struct seq_file *s,
- const struct ip_conntrack *state)
-{
- return 0;
-}
-
-/* Returns verdict for packet, or -1 for invalid. */
-static int packet(struct ip_conntrack *conntrack,
- const struct sk_buff *skb,
- enum ip_conntrack_info ctinfo)
-{
- ip_ct_refresh_acct(conntrack, ctinfo, skb, ip_ct_generic_timeout);
- return NF_ACCEPT;
-}
-
-/* Called when a new connection for this protocol found. */
-static int new(struct ip_conntrack *conntrack, const struct sk_buff *skb)
-{
- return 1;
-}
-
-struct ip_conntrack_protocol ip_conntrack_generic_protocol =
-{
- .proto = 0,
- .name = "unknown",
- .pkt_to_tuple = generic_pkt_to_tuple,
- .invert_tuple = generic_invert_tuple,
- .print_tuple = generic_print_tuple,
- .print_conntrack = generic_print_conntrack,
- .packet = packet,
- .new = new,
-};
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_gre.c b/net/ipv4/netfilter/ip_conntrack_proto_gre.c
deleted file mode 100644
index 56794797d55..00000000000
--- a/net/ipv4/netfilter/ip_conntrack_proto_gre.c
+++ /dev/null
@@ -1,333 +0,0 @@
-/*
- * ip_conntrack_proto_gre.c - Version 3.0
- *
- * Connection tracking protocol helper module for GRE.
- *
- * GRE is a generic encapsulation protocol, which is generally not very
- * suited for NAT, as it has no protocol-specific part as port numbers.
- *
- * It has an optional key field, which may help us distinguishing two
- * connections between the same two hosts.
- *
- * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784
- *
- * PPTP is built on top of a modified version of GRE, and has a mandatory
- * field called "CallID", which serves us for the same purpose as the key
- * field in plain GRE.
- *
- * Documentation about PPTP can be found in RFC 2637
- *
- * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org>
- *
- * Development of this code funded by Astaro AG (http://www.astaro.com/)
- *
- */
-
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/timer.h>
-#include <linux/netfilter.h>
-#include <linux/ip.h>
-#include <linux/in.h>
-#include <linux/list.h>
-#include <linux/seq_file.h>
-#include <linux/interrupt.h>
-
-static DEFINE_RWLOCK(ip_ct_gre_lock);
-#define ASSERT_READ_LOCK(x)
-#define ASSERT_WRITE_LOCK(x)
-
-#include <linux/netfilter_ipv4/listhelp.h>
-#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_core.h>
-
-#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h>
-#include <linux/netfilter_ipv4/ip_conntrack_pptp.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
-MODULE_DESCRIPTION("netfilter connection tracking protocol helper for GRE");
-
-/* shamelessly stolen from ip_conntrack_proto_udp.c */
-#define GRE_TIMEOUT (30*HZ)
-#define GRE_STREAM_TIMEOUT (180*HZ)
-
-#if 0
-#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, __FUNCTION__, ## args)
-#define DUMP_TUPLE_GRE(x) printk("%u.%u.%u.%u:0x%x -> %u.%u.%u.%u:0x%x\n", \
- NIPQUAD((x)->src.ip), ntohs((x)->src.u.gre.key), \
- NIPQUAD((x)->dst.ip), ntohs((x)->dst.u.gre.key))
-#else
-#define DEBUGP(x, args...)
-#define DUMP_TUPLE_GRE(x)
-#endif
-
-/* GRE KEYMAP HANDLING FUNCTIONS */
-static LIST_HEAD(gre_keymap_list);
-
-static inline int gre_key_cmpfn(const struct ip_ct_gre_keymap *km,
- const struct ip_conntrack_tuple *t)
-{
- return ((km->tuple.src.ip == t->src.ip) &&
- (km->tuple.dst.ip == t->dst.ip) &&
- (km->tuple.dst.protonum == t->dst.protonum) &&
- (km->tuple.dst.u.all == t->dst.u.all));
-}
-
-/* look up the source key for a given tuple */
-static u_int32_t gre_keymap_lookup(struct ip_conntrack_tuple *t)
-{
- struct ip_ct_gre_keymap *km;
- u_int32_t key = 0;
-
- read_lock_bh(&ip_ct_gre_lock);
- km = LIST_FIND(&gre_keymap_list, gre_key_cmpfn,
- struct ip_ct_gre_keymap *, t);
- if (km)
- key = km->tuple.src.u.gre.key;
- read_unlock_bh(&ip_ct_gre_lock);
-
- DEBUGP("lookup src key 0x%x up key for ", key);
- DUMP_TUPLE_GRE(t);
-
- return key;
-}
-
-/* add a single keymap entry, associate with specified master ct */
-int
-ip_ct_gre_keymap_add(struct ip_conntrack *ct,
- struct ip_conntrack_tuple *t, int reply)
-{
- struct ip_ct_gre_keymap **exist_km, *km, *old;
-
- if (!ct->helper || strcmp(ct->helper->name, "pptp")) {
- DEBUGP("refusing to add GRE keymap to non-pptp session\n");
- return -1;
- }
-
- if (!reply)
- exist_km = &ct->help.ct_pptp_info.keymap_orig;
- else
- exist_km = &ct->help.ct_pptp_info.keymap_reply;
-
- if (*exist_km) {
- /* check whether it's a retransmission */
- old = LIST_FIND(&gre_keymap_list, gre_key_cmpfn,
- struct ip_ct_gre_keymap *, t);
- if (old == *exist_km) {
- DEBUGP("retransmission\n");
- return 0;
- }
-
- DEBUGP("trying to override keymap_%s for ct %p\n",
- reply? "reply":"orig", ct);
- return -EEXIST;
- }
-
- km = kmalloc(sizeof(*km), GFP_ATOMIC);
- if (!km)
- return -ENOMEM;
-
- memcpy(&km->tuple, t, sizeof(*t));
- *exist_km = km;
-
- DEBUGP("adding new entry %p: ", km);
- DUMP_TUPLE_GRE(&km->tuple);
-
- write_lock_bh(&ip_ct_gre_lock);
- list_append(&gre_keymap_list, km);
- write_unlock_bh(&ip_ct_gre_lock);
-
- return 0;
-}
-
-/* destroy the keymap entries associated with specified master ct */
-void ip_ct_gre_keymap_destroy(struct ip_conntrack *ct)
-{
- DEBUGP("entering for ct %p\n", ct);
-
- if (!ct->helper || strcmp(ct->helper->name, "pptp")) {
- DEBUGP("refusing to destroy GRE keymap to non-pptp session\n");
- return;
- }
-
- write_lock_bh(&ip_ct_gre_lock);
- if (ct->help.ct_pptp_info.keymap_orig) {
- DEBUGP("removing %p from list\n",
- ct->help.ct_pptp_info.keymap_orig);
- list_del(&ct->help.ct_pptp_info.keymap_orig->list);
- kfree(ct->help.ct_pptp_info.keymap_orig);
- ct->help.ct_pptp_info.keymap_orig = NULL;
- }
- if (ct->help.ct_pptp_info.keymap_reply) {
- DEBUGP("removing %p from list\n",
- ct->help.ct_pptp_info.keymap_reply);
- list_del(&ct->help.ct_pptp_info.keymap_reply->list);
- kfree(ct->help.ct_pptp_info.keymap_reply);
- ct->help.ct_pptp_info.keymap_reply = NULL;
- }
- write_unlock_bh(&ip_ct_gre_lock);
-}
-
-
-/* PUBLIC CONNTRACK PROTO HELPER FUNCTIONS */
-
-/* invert gre part of tuple */
-static int gre_invert_tuple(struct ip_conntrack_tuple *tuple,
- const struct ip_conntrack_tuple *orig)
-{
- tuple->dst.u.gre.key = orig->src.u.gre.key;
- tuple->src.u.gre.key = orig->dst.u.gre.key;
-
- return 1;
-}
-
-/* gre hdr info to tuple */
-static int gre_pkt_to_tuple(const struct sk_buff *skb,
- unsigned int dataoff,
- struct ip_conntrack_tuple *tuple)
-{
- struct gre_hdr_pptp _pgrehdr, *pgrehdr;
- u_int32_t srckey;
- struct gre_hdr _grehdr, *grehdr;
-
- /* first only delinearize old RFC1701 GRE header */
- grehdr = skb_header_pointer(skb, dataoff, sizeof(_grehdr), &_grehdr);
- if (!grehdr || grehdr->version != GRE_VERSION_PPTP) {
- /* try to behave like "ip_conntrack_proto_generic" */
- tuple->src.u.all = 0;
- tuple->dst.u.all = 0;
- return 1;
- }
-
- /* PPTP header is variable length, only need up to the call_id field */
- pgrehdr = skb_header_pointer(skb, dataoff, 8, &_pgrehdr);
- if (!pgrehdr)
- return 1;
-
- if (ntohs(grehdr->protocol) != GRE_PROTOCOL_PPTP) {
- DEBUGP("GRE_VERSION_PPTP but unknown proto\n");
- return 0;
- }
-
- tuple->dst.u.gre.key = pgrehdr->call_id;
- srckey = gre_keymap_lookup(tuple);
- tuple->src.u.gre.key = srckey;
-
- return 1;
-}
-
-/* print gre part of tuple */
-static int gre_print_tuple(struct seq_file *s,
- const struct ip_conntrack_tuple *tuple)
-{
- return seq_printf(s, "srckey=0x%x dstkey=0x%x ",
- ntohs(tuple->src.u.gre.key),
- ntohs(tuple->dst.u.gre.key));
-}
-
-/* print private data for conntrack */
-static int gre_print_conntrack(struct seq_file *s,
- const struct ip_conntrack *ct)
-{
- return seq_printf(s, "timeout=%u, stream_timeout=%u ",
- (ct->proto.gre.timeout / HZ),
- (ct->proto.gre.stream_timeout / HZ));
-}
-
-/* Returns verdict for packet, and may modify conntrack */
-static int gre_packet(struct ip_conntrack *ct,
- const struct sk_buff *skb,
- enum ip_conntrack_info conntrackinfo)
-{
- /* If we've seen traffic both ways, this is a GRE connection.
- * Extend timeout. */
- if (ct->status & IPS_SEEN_REPLY) {
- ip_ct_refresh_acct(ct, conntrackinfo, skb,
- ct->proto.gre.stream_timeout);
- /* Also, more likely to be important, and not a probe. */
- set_bit(IPS_ASSURED_BIT, &ct->status);
- ip_conntrack_event_cache(IPCT_STATUS, skb);
- } else
- ip_ct_refresh_acct(ct, conntrackinfo, skb,
- ct->proto.gre.timeout);
-
- return NF_ACCEPT;
-}
-
-/* Called when a new connection for this protocol found. */
-static int gre_new(struct ip_conntrack *ct,
- const struct sk_buff *skb)
-{
- DEBUGP(": ");
- DUMP_TUPLE_GRE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
-
- /* initialize to sane value. Ideally a conntrack helper
- * (e.g. in case of pptp) is increasing them */
- ct->proto.gre.stream_timeout = GRE_STREAM_TIMEOUT;
- ct->proto.gre.timeout = GRE_TIMEOUT;
-
- return 1;
-}
-
-/* Called when a conntrack entry has already been removed from the hashes
- * and is about to be deleted from memory */
-static void gre_destroy(struct ip_conntrack *ct)
-{
- struct ip_conntrack *master = ct->master;
- DEBUGP(" entering\n");
-
- if (!master)
- DEBUGP("no master !?!\n");
- else
- ip_ct_gre_keymap_destroy(master);
-}
-
-/* protocol helper struct */
-static struct ip_conntrack_protocol gre = {
- .proto = IPPROTO_GRE,
- .name = "gre",
- .pkt_to_tuple = gre_pkt_to_tuple,
- .invert_tuple = gre_invert_tuple,
- .print_tuple = gre_print_tuple,
- .print_conntrack = gre_print_conntrack,
- .packet = gre_packet,
- .new = gre_new,
- .destroy = gre_destroy,
- .me = THIS_MODULE,
-#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
- defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
- .tuple_to_nfattr = ip_ct_port_tuple_to_nfattr,
- .nfattr_to_tuple = ip_ct_port_nfattr_to_tuple,
-#endif
-};
-
-/* ip_conntrack_proto_gre initialization */
-int __init ip_ct_proto_gre_init(void)
-{
- return ip_conntrack_protocol_register(&gre);
-}
-
-/* This cannot be __exit, as it is invoked from ip_conntrack_helper_pptp.c's
- * init() code on errors.
- */
-void ip_ct_proto_gre_fini(void)
-{
- struct list_head *pos, *n;
-
- /* delete all keymap entries */
- write_lock_bh(&ip_ct_gre_lock);
- list_for_each_safe(pos, n, &gre_keymap_list) {
- DEBUGP("deleting keymap %p at module unload time\n", pos);
- list_del(pos);
- kfree(pos);
- }
- write_unlock_bh(&ip_ct_gre_lock);
-
- ip_conntrack_protocol_unregister(&gre);
-}
-
-EXPORT_SYMBOL(ip_ct_gre_keymap_add);
-EXPORT_SYMBOL(ip_ct_gre_keymap_destroy);
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
deleted file mode 100644
index 3021af0910f..00000000000
--- a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c
+++ /dev/null
@@ -1,327 +0,0 @@
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/sched.h>
-#include <linux/timer.h>
-#include <linux/netfilter.h>
-#include <linux/in.h>
-#include <linux/icmp.h>
-#include <linux/seq_file.h>
-#include <linux/skbuff.h>
-#include <net/ip.h>
-#include <net/checksum.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-#include <linux/netfilter_ipv4/ip_conntrack_core.h>
-#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
-
-unsigned int ip_ct_icmp_timeout = 30*HZ;
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-static int icmp_pkt_to_tuple(const struct sk_buff *skb,
- unsigned int dataoff,
- struct ip_conntrack_tuple *tuple)
-{
- struct icmphdr _hdr, *hp;
-
- hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
- if (hp == NULL)
- return 0;
-
- tuple->dst.u.icmp.type = hp->type;
- tuple->src.u.icmp.id = hp->un.echo.id;
- tuple->dst.u.icmp.code = hp->code;
-
- return 1;
-}
-
-/* Add 1; spaces filled with 0. */
-static const u_int8_t invmap[] = {
- [ICMP_ECHO] = ICMP_ECHOREPLY + 1,
- [ICMP_ECHOREPLY] = ICMP_ECHO + 1,
- [ICMP_TIMESTAMP] = ICMP_TIMESTAMPREPLY + 1,
- [ICMP_TIMESTAMPREPLY] = ICMP_TIMESTAMP + 1,
- [ICMP_INFO_REQUEST] = ICMP_INFO_REPLY + 1,
- [ICMP_INFO_REPLY] = ICMP_INFO_REQUEST + 1,
- [ICMP_ADDRESS] = ICMP_ADDRESSREPLY + 1,
- [ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1
-};
-
-static int icmp_invert_tuple(struct ip_conntrack_tuple *tuple,
- const struct ip_conntrack_tuple *orig)
-{
- if (orig->dst.u.icmp.type >= sizeof(invmap)
- || !invmap[orig->dst.u.icmp.type])
- return 0;
-
- tuple->src.u.icmp.id = orig->src.u.icmp.id;
- tuple->dst.u.icmp.type = invmap[orig->dst.u.icmp.type] - 1;
- tuple->dst.u.icmp.code = orig->dst.u.icmp.code;
- return 1;
-}
-
-/* Print out the per-protocol part of the tuple. */
-static int icmp_print_tuple(struct seq_file *s,
- const struct ip_conntrack_tuple *tuple)
-{
- return seq_printf(s, "type=%u code=%u id=%u ",
- tuple->dst.u.icmp.type,
- tuple->dst.u.icmp.code,
- ntohs(tuple->src.u.icmp.id));
-}
-
-/* Print out the private part of the conntrack. */
-static int icmp_print_conntrack(struct seq_file *s,
- const struct ip_conntrack *conntrack)
-{
- return 0;
-}
-
-/* Returns verdict for packet, or -1 for invalid. */
-static int icmp_packet(struct ip_conntrack *ct,
- const struct sk_buff *skb,
- enum ip_conntrack_info ctinfo)
-{
- /* Try to delete connection immediately after all replies:
- won't actually vanish as we still have skb, and del_timer
- means this will only run once even if count hits zero twice
- (theoretically possible with SMP) */
- if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) {
- if (atomic_dec_and_test(&ct->proto.icmp.count)
- && del_timer(&ct->timeout))
- ct->timeout.function((unsigned long)ct);
- } else {
- atomic_inc(&ct->proto.icmp.count);
- ip_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
- ip_ct_refresh_acct(ct, ctinfo, skb, ip_ct_icmp_timeout);
- }
-
- return NF_ACCEPT;
-}
-
-/* Called when a new connection for this protocol found. */
-static int icmp_new(struct ip_conntrack *conntrack,
- const struct sk_buff *skb)
-{
- static const u_int8_t valid_new[] = {
- [ICMP_ECHO] = 1,
- [ICMP_TIMESTAMP] = 1,
- [ICMP_INFO_REQUEST] = 1,
- [ICMP_ADDRESS] = 1
- };
-
- if (conntrack->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new)
- || !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmp.type]) {
- /* Can't create a new ICMP `conn' with this. */
- DEBUGP("icmp: can't create new conn with type %u\n",
- conntrack->tuplehash[0].tuple.dst.u.icmp.type);
- DUMP_TUPLE(&conntrack->tuplehash[0].tuple);
- return 0;
- }
- atomic_set(&conntrack->proto.icmp.count, 0);
- return 1;
-}
-
-static int
-icmp_error_message(struct sk_buff *skb,
- enum ip_conntrack_info *ctinfo,
- unsigned int hooknum)
-{
- struct ip_conntrack_tuple innertuple, origtuple;
- struct {
- struct icmphdr icmp;
- struct iphdr ip;
- } _in, *inside;
- struct ip_conntrack_protocol *innerproto;
- struct ip_conntrack_tuple_hash *h;
- int dataoff;
-
- IP_NF_ASSERT(skb->nfct == NULL);
-
- /* Not enough header? */
- inside = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_in), &_in);
- if (inside == NULL)
- return -NF_ACCEPT;
-
- /* Ignore ICMP's containing fragments (shouldn't happen) */
- if (inside->ip.frag_off & htons(IP_OFFSET)) {
- DEBUGP("icmp_error_track: fragment of proto %u\n",
- inside->ip.protocol);
- return -NF_ACCEPT;
- }
-
- innerproto = ip_conntrack_proto_find_get(inside->ip.protocol);
- dataoff = skb->nh.iph->ihl*4 + sizeof(inside->icmp) + inside->ip.ihl*4;
- /* Are they talking about one of our connections? */
- if (!ip_ct_get_tuple(&inside->ip, skb, dataoff, &origtuple, innerproto)) {
- DEBUGP("icmp_error: ! get_tuple p=%u", inside->ip.protocol);
- ip_conntrack_proto_put(innerproto);
- return -NF_ACCEPT;
- }
-
- /* Ordinarily, we'd expect the inverted tupleproto, but it's
- been preserved inside the ICMP. */
- if (!ip_ct_invert_tuple(&innertuple, &origtuple, innerproto)) {
- DEBUGP("icmp_error_track: Can't invert tuple\n");
- ip_conntrack_proto_put(innerproto);
- return -NF_ACCEPT;
- }
- ip_conntrack_proto_put(innerproto);
-
- *ctinfo = IP_CT_RELATED;
-
- h = ip_conntrack_find_get(&innertuple, NULL);
- if (!h) {
- /* Locally generated ICMPs will match inverted if they
- haven't been SNAT'ed yet */
- /* FIXME: NAT code has to handle half-done double NAT --RR */
- if (hooknum == NF_IP_LOCAL_OUT)
- h = ip_conntrack_find_get(&origtuple, NULL);
-
- if (!h) {
- DEBUGP("icmp_error_track: no match\n");
- return -NF_ACCEPT;
- }
- /* Reverse direction from that found */
- if (DIRECTION(h) != IP_CT_DIR_REPLY)
- *ctinfo += IP_CT_IS_REPLY;
- } else {
- if (DIRECTION(h) == IP_CT_DIR_REPLY)
- *ctinfo += IP_CT_IS_REPLY;
- }
-
- /* Update skb to refer to this connection */
- skb->nfct = &tuplehash_to_ctrack(h)->ct_general;
- skb->nfctinfo = *ctinfo;
- return -NF_ACCEPT;
-}
-
-/* Small and modified version of icmp_rcv */
-static int
-icmp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo,
- unsigned int hooknum)
-{
- struct icmphdr _ih, *icmph;
-
- /* Not enough header? */
- icmph = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_ih), &_ih);
- if (icmph == NULL) {
- if (LOG_INVALID(IPPROTO_ICMP))
- nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
- "ip_ct_icmp: short packet ");
- return -NF_ACCEPT;
- }
-
- /* See ip_conntrack_proto_tcp.c */
- if (hooknum != NF_IP_PRE_ROUTING)
- goto checksum_skipped;
-
- switch (skb->ip_summed) {
- case CHECKSUM_HW:
- if (!(u16)csum_fold(skb->csum))
- break;
- /* fall through */
- case CHECKSUM_NONE:
- skb->csum = 0;
- if (__skb_checksum_complete(skb)) {
- if (LOG_INVALID(IPPROTO_ICMP))
- nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
- "ip_ct_icmp: bad ICMP checksum ");
- return -NF_ACCEPT;
- }
- }
-
-checksum_skipped:
- /*
- * 18 is the highest 'known' ICMP type. Anything else is a mystery
- *
- * RFC 1122: 3.2.2 Unknown ICMP messages types MUST be silently
- * discarded.
- */
- if (icmph->type > NR_ICMP_TYPES) {
- if (LOG_INVALID(IPPROTO_ICMP))
- nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
- "ip_ct_icmp: invalid ICMP type ");
- return -NF_ACCEPT;
- }
-
- /* Need to track icmp error message? */
- if (icmph->type != ICMP_DEST_UNREACH
- && icmph->type != ICMP_SOURCE_QUENCH
- && icmph->type != ICMP_TIME_EXCEEDED
- && icmph->type != ICMP_PARAMETERPROB
- && icmph->type != ICMP_REDIRECT)
- return NF_ACCEPT;
-
- return icmp_error_message(skb, ctinfo, hooknum);
-}
-
-#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
- defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
-static int icmp_tuple_to_nfattr(struct sk_buff *skb,
- const struct ip_conntrack_tuple *t)
-{
- NFA_PUT(skb, CTA_PROTO_ICMP_ID, sizeof(u_int16_t),
- &t->src.u.icmp.id);
- NFA_PUT(skb, CTA_PROTO_ICMP_TYPE, sizeof(u_int8_t),
- &t->dst.u.icmp.type);
- NFA_PUT(skb, CTA_PROTO_ICMP_CODE, sizeof(u_int8_t),
- &t->dst.u.icmp.code);
-
- return 0;
-
-nfattr_failure:
- return -1;
-}
-
-static int icmp_nfattr_to_tuple(struct nfattr *tb[],
- struct ip_conntrack_tuple *tuple)
-{
- if (!tb[CTA_PROTO_ICMP_TYPE-1]
- || !tb[CTA_PROTO_ICMP_CODE-1]
- || !tb[CTA_PROTO_ICMP_ID-1])
- return -EINVAL;
-
- tuple->dst.u.icmp.type =
- *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_TYPE-1]);
- tuple->dst.u.icmp.code =
- *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_CODE-1]);
- tuple->src.u.icmp.id =
- *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_ICMP_ID-1]);
-
- if (tuple->dst.u.icmp.type >= sizeof(invmap)
- || !invmap[tuple->dst.u.icmp.type])
- return -EINVAL;
-
- return 0;
-}
-#endif
-
-struct ip_conntrack_protocol ip_conntrack_protocol_icmp =
-{
- .proto = IPPROTO_ICMP,
- .name = "icmp",
- .pkt_to_tuple = icmp_pkt_to_tuple,
- .invert_tuple = icmp_invert_tuple,
- .print_tuple = icmp_print_tuple,
- .print_conntrack = icmp_print_conntrack,
- .packet = icmp_packet,
- .new = icmp_new,
- .error = icmp_error,
-#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
- defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
- .tuple_to_nfattr = icmp_tuple_to_nfattr,
- .nfattr_to_tuple = icmp_nfattr_to_tuple,
-#endif
-};
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
deleted file mode 100644
index be602e8aeab..00000000000
--- a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c
+++ /dev/null
@@ -1,657 +0,0 @@
-/*
- * Connection tracking protocol helper module for SCTP.
- *
- * SCTP is defined in RFC 2960. References to various sections in this code
- * are to this RFC.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-/*
- * Added support for proc manipulation of timeouts.
- */
-
-#include <linux/types.h>
-#include <linux/sched.h>
-#include <linux/timer.h>
-#include <linux/interrupt.h>
-#include <linux/netfilter.h>
-#include <linux/module.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/sctp.h>
-#include <linux/string.h>
-#include <linux/seq_file.h>
-
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
-
-#if 0
-#define DEBUGP(format, ...) printk(format, ## __VA_ARGS__)
-#else
-#define DEBUGP(format, args...)
-#endif
-
-/* Protects conntrack->proto.sctp */
-static DEFINE_RWLOCK(sctp_lock);
-
-/* FIXME: Examine ipfilter's timeouts and conntrack transitions more
- closely. They're more complex. --RR
-
- And so for me for SCTP :D -Kiran */
-
-static const char *sctp_conntrack_names[] = {
- "NONE",
- "CLOSED",
- "COOKIE_WAIT",
- "COOKIE_ECHOED",
- "ESTABLISHED",
- "SHUTDOWN_SENT",
- "SHUTDOWN_RECD",
- "SHUTDOWN_ACK_SENT",
-};
-
-#define SECS * HZ
-#define MINS * 60 SECS
-#define HOURS * 60 MINS
-#define DAYS * 24 HOURS
-
-static unsigned int ip_ct_sctp_timeout_closed = 10 SECS;
-static unsigned int ip_ct_sctp_timeout_cookie_wait = 3 SECS;
-static unsigned int ip_ct_sctp_timeout_cookie_echoed = 3 SECS;
-static unsigned int ip_ct_sctp_timeout_established = 5 DAYS;
-static unsigned int ip_ct_sctp_timeout_shutdown_sent = 300 SECS / 1000;
-static unsigned int ip_ct_sctp_timeout_shutdown_recd = 300 SECS / 1000;
-static unsigned int ip_ct_sctp_timeout_shutdown_ack_sent = 3 SECS;
-
-static const unsigned int * sctp_timeouts[]
-= { NULL, /* SCTP_CONNTRACK_NONE */
- &ip_ct_sctp_timeout_closed, /* SCTP_CONNTRACK_CLOSED */
- &ip_ct_sctp_timeout_cookie_wait, /* SCTP_CONNTRACK_COOKIE_WAIT */
- &ip_ct_sctp_timeout_cookie_echoed, /* SCTP_CONNTRACK_COOKIE_ECHOED */
- &ip_ct_sctp_timeout_established, /* SCTP_CONNTRACK_ESTABLISHED */
- &ip_ct_sctp_timeout_shutdown_sent, /* SCTP_CONNTRACK_SHUTDOWN_SENT */
- &ip_ct_sctp_timeout_shutdown_recd, /* SCTP_CONNTRACK_SHUTDOWN_RECD */
- &ip_ct_sctp_timeout_shutdown_ack_sent /* SCTP_CONNTRACK_SHUTDOWN_ACK_SENT */
- };
-
-#define sNO SCTP_CONNTRACK_NONE
-#define sCL SCTP_CONNTRACK_CLOSED
-#define sCW SCTP_CONNTRACK_COOKIE_WAIT
-#define sCE SCTP_CONNTRACK_COOKIE_ECHOED
-#define sES SCTP_CONNTRACK_ESTABLISHED
-#define sSS SCTP_CONNTRACK_SHUTDOWN_SENT
-#define sSR SCTP_CONNTRACK_SHUTDOWN_RECD
-#define sSA SCTP_CONNTRACK_SHUTDOWN_ACK_SENT
-#define sIV SCTP_CONNTRACK_MAX
-
-/*
- These are the descriptions of the states:
-
-NOTE: These state names are tantalizingly similar to the states of an
-SCTP endpoint. But the interpretation of the states is a little different,
-considering that these are the states of the connection and not of an end
-point. Please note the subtleties. -Kiran
-
-NONE - Nothing so far.
-COOKIE WAIT - We have seen an INIT chunk in the original direction, or also
- an INIT_ACK chunk in the reply direction.
-COOKIE ECHOED - We have seen a COOKIE_ECHO chunk in the original direction.
-ESTABLISHED - We have seen a COOKIE_ACK in the reply direction.
-SHUTDOWN_SENT - We have seen a SHUTDOWN chunk in the original direction.
-SHUTDOWN_RECD - We have seen a SHUTDOWN chunk in the reply directoin.
-SHUTDOWN_ACK_SENT - We have seen a SHUTDOWN_ACK chunk in the direction opposite
- to that of the SHUTDOWN chunk.
-CLOSED - We have seen a SHUTDOWN_COMPLETE chunk in the direction of
- the SHUTDOWN chunk. Connection is closed.
-*/
-
-/* TODO
- - I have assumed that the first INIT is in the original direction.
- This messes things when an INIT comes in the reply direction in CLOSED
- state.
- - Check the error type in the reply dir before transitioning from
-cookie echoed to closed.
- - Sec 5.2.4 of RFC 2960
- - Multi Homing support.
-*/
-
-/* SCTP conntrack state transitions */
-static const enum sctp_conntrack sctp_conntracks[2][9][SCTP_CONNTRACK_MAX] = {
- {
-/* ORIGINAL */
-/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA */
-/* init */ {sCW, sCW, sCW, sCE, sES, sSS, sSR, sSA},
-/* init_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},
-/* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
-/* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA},
-/* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA},
-/* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant have Stale cookie*/
-/* cookie_echo */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA},/* 5.2.4 - Big TODO */
-/* cookie_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant come in orig dir */
-/* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL}
- },
- {
-/* REPLY */
-/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA */
-/* init */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* INIT in sCL Big TODO */
-/* init_ack */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},
-/* abort */ {sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
-/* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA},
-/* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA},
-/* error */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA},
-/* cookie_echo */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant come in reply dir */
-/* cookie_ack */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA},
-/* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL}
- }
-};
-
-static int sctp_pkt_to_tuple(const struct sk_buff *skb,
- unsigned int dataoff,
- struct ip_conntrack_tuple *tuple)
-{
- sctp_sctphdr_t _hdr, *hp;
-
- DEBUGP(__FUNCTION__);
- DEBUGP("\n");
-
- /* Actually only need first 8 bytes. */
- hp = skb_header_pointer(skb, dataoff, 8, &_hdr);
- if (hp == NULL)
- return 0;
-
- tuple->src.u.sctp.port = hp->source;
- tuple->dst.u.sctp.port = hp->dest;
- return 1;
-}
-
-static int sctp_invert_tuple(struct ip_conntrack_tuple *tuple,
- const struct ip_conntrack_tuple *orig)
-{
- DEBUGP(__FUNCTION__);
- DEBUGP("\n");
-
- tuple->src.u.sctp.port = orig->dst.u.sctp.port;
- tuple->dst.u.sctp.port = orig->src.u.sctp.port;
- return 1;
-}
-
-/* Print out the per-protocol part of the tuple. */
-static int sctp_print_tuple(struct seq_file *s,
- const struct ip_conntrack_tuple *tuple)
-{
- DEBUGP(__FUNCTION__);
- DEBUGP("\n");
-
- return seq_printf(s, "sport=%hu dport=%hu ",
- ntohs(tuple->src.u.sctp.port),
- ntohs(tuple->dst.u.sctp.port));
-}
-
-/* Print out the private part of the conntrack. */
-static int sctp_print_conntrack(struct seq_file *s,
- const struct ip_conntrack *conntrack)
-{
- enum sctp_conntrack state;
-
- DEBUGP(__FUNCTION__);
- DEBUGP("\n");
-
- read_lock_bh(&sctp_lock);
- state = conntrack->proto.sctp.state;
- read_unlock_bh(&sctp_lock);
-
- return seq_printf(s, "%s ", sctp_conntrack_names[state]);
-}
-
-#define for_each_sctp_chunk(skb, sch, _sch, offset, count) \
-for (offset = skb->nh.iph->ihl * 4 + sizeof(sctp_sctphdr_t), count = 0; \
- offset < skb->len && \
- (sch = skb_header_pointer(skb, offset, sizeof(_sch), &_sch)); \
- offset += (htons(sch->length) + 3) & ~3, count++)
-
-/* Some validity checks to make sure the chunks are fine */
-static int do_basic_checks(struct ip_conntrack *conntrack,
- const struct sk_buff *skb,
- char *map)
-{
- u_int32_t offset, count;
- sctp_chunkhdr_t _sch, *sch;
- int flag;
-
- DEBUGP(__FUNCTION__);
- DEBUGP("\n");
-
- flag = 0;
-
- for_each_sctp_chunk (skb, sch, _sch, offset, count) {
- DEBUGP("Chunk Num: %d Type: %d\n", count, sch->type);
-
- if (sch->type == SCTP_CID_INIT
- || sch->type == SCTP_CID_INIT_ACK
- || sch->type == SCTP_CID_SHUTDOWN_COMPLETE) {
- flag = 1;
- }
-
- /* Cookie Ack/Echo chunks not the first OR
- Init / Init Ack / Shutdown compl chunks not the only chunks */
- if ((sch->type == SCTP_CID_COOKIE_ACK
- || sch->type == SCTP_CID_COOKIE_ECHO
- || flag)
- && count !=0 ) {
- DEBUGP("Basic checks failed\n");
- return 1;
- }
-
- if (map) {
- set_bit(sch->type, (void *)map);
- }
- }
-
- DEBUGP("Basic checks passed\n");
- return 0;
-}
-
-static int new_state(enum ip_conntrack_dir dir,
- enum sctp_conntrack cur_state,
- int chunk_type)
-{
- int i;
-
- DEBUGP(__FUNCTION__);
- DEBUGP("\n");
-
- DEBUGP("Chunk type: %d\n", chunk_type);
-
- switch (chunk_type) {
- case SCTP_CID_INIT:
- DEBUGP("SCTP_CID_INIT\n");
- i = 0; break;
- case SCTP_CID_INIT_ACK:
- DEBUGP("SCTP_CID_INIT_ACK\n");
- i = 1; break;
- case SCTP_CID_ABORT:
- DEBUGP("SCTP_CID_ABORT\n");
- i = 2; break;
- case SCTP_CID_SHUTDOWN:
- DEBUGP("SCTP_CID_SHUTDOWN\n");
- i = 3; break;
- case SCTP_CID_SHUTDOWN_ACK:
- DEBUGP("SCTP_CID_SHUTDOWN_ACK\n");
- i = 4; break;
- case SCTP_CID_ERROR:
- DEBUGP("SCTP_CID_ERROR\n");
- i = 5; break;
- case SCTP_CID_COOKIE_ECHO:
- DEBUGP("SCTP_CID_COOKIE_ECHO\n");
- i = 6; break;
- case SCTP_CID_COOKIE_ACK:
- DEBUGP("SCTP_CID_COOKIE_ACK\n");
- i = 7; break;
- case SCTP_CID_SHUTDOWN_COMPLETE:
- DEBUGP("SCTP_CID_SHUTDOWN_COMPLETE\n");
- i = 8; break;
- default:
- /* Other chunks like DATA, SACK, HEARTBEAT and
- its ACK do not cause a change in state */
- DEBUGP("Unknown chunk type, Will stay in %s\n",
- sctp_conntrack_names[cur_state]);
- return cur_state;
- }
-
- DEBUGP("dir: %d cur_state: %s chunk_type: %d new_state: %s\n",
- dir, sctp_conntrack_names[cur_state], chunk_type,
- sctp_conntrack_names[sctp_conntracks[dir][i][cur_state]]);
-
- return sctp_conntracks[dir][i][cur_state];
-}
-
-/* Returns verdict for packet, or -1 for invalid. */
-static int sctp_packet(struct ip_conntrack *conntrack,
- const struct sk_buff *skb,
- enum ip_conntrack_info ctinfo)
-{
- enum sctp_conntrack newconntrack, oldsctpstate;
- struct iphdr *iph = skb->nh.iph;
- sctp_sctphdr_t _sctph, *sh;
- sctp_chunkhdr_t _sch, *sch;
- u_int32_t offset, count;
- char map[256 / sizeof (char)] = {0};
-
- DEBUGP(__FUNCTION__);
- DEBUGP("\n");
-
- sh = skb_header_pointer(skb, iph->ihl * 4, sizeof(_sctph), &_sctph);
- if (sh == NULL)
- return -1;
-
- if (do_basic_checks(conntrack, skb, map) != 0)
- return -1;
-
- /* Check the verification tag (Sec 8.5) */
- if (!test_bit(SCTP_CID_INIT, (void *)map)
- && !test_bit(SCTP_CID_SHUTDOWN_COMPLETE, (void *)map)
- && !test_bit(SCTP_CID_COOKIE_ECHO, (void *)map)
- && !test_bit(SCTP_CID_ABORT, (void *)map)
- && !test_bit(SCTP_CID_SHUTDOWN_ACK, (void *)map)
- && (sh->vtag != conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])) {
- DEBUGP("Verification tag check failed\n");
- return -1;
- }
-
- oldsctpstate = newconntrack = SCTP_CONNTRACK_MAX;
- for_each_sctp_chunk (skb, sch, _sch, offset, count) {
- write_lock_bh(&sctp_lock);
-
- /* Special cases of Verification tag check (Sec 8.5.1) */
- if (sch->type == SCTP_CID_INIT) {
- /* Sec 8.5.1 (A) */
- if (sh->vtag != 0) {
- write_unlock_bh(&sctp_lock);
- return -1;
- }
- } else if (sch->type == SCTP_CID_ABORT) {
- /* Sec 8.5.1 (B) */
- if (!(sh->vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])
- && !(sh->vtag == conntrack->proto.sctp.vtag
- [1 - CTINFO2DIR(ctinfo)])) {
- write_unlock_bh(&sctp_lock);
- return -1;
- }
- } else if (sch->type == SCTP_CID_SHUTDOWN_COMPLETE) {
- /* Sec 8.5.1 (C) */
- if (!(sh->vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])
- && !(sh->vtag == conntrack->proto.sctp.vtag
- [1 - CTINFO2DIR(ctinfo)]
- && (sch->flags & 1))) {
- write_unlock_bh(&sctp_lock);
- return -1;
- }
- } else if (sch->type == SCTP_CID_COOKIE_ECHO) {
- /* Sec 8.5.1 (D) */
- if (!(sh->vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])) {
- write_unlock_bh(&sctp_lock);
- return -1;
- }
- }
-
- oldsctpstate = conntrack->proto.sctp.state;
- newconntrack = new_state(CTINFO2DIR(ctinfo), oldsctpstate, sch->type);
-
- /* Invalid */
- if (newconntrack == SCTP_CONNTRACK_MAX) {
- DEBUGP("ip_conntrack_sctp: Invalid dir=%i ctype=%u conntrack=%u\n",
- CTINFO2DIR(ctinfo), sch->type, oldsctpstate);
- write_unlock_bh(&sctp_lock);
- return -1;
- }
-
- /* If it is an INIT or an INIT ACK note down the vtag */
- if (sch->type == SCTP_CID_INIT
- || sch->type == SCTP_CID_INIT_ACK) {
- sctp_inithdr_t _inithdr, *ih;
-
- ih = skb_header_pointer(skb, offset + sizeof(sctp_chunkhdr_t),
- sizeof(_inithdr), &_inithdr);
- if (ih == NULL) {
- write_unlock_bh(&sctp_lock);
- return -1;
- }
- DEBUGP("Setting vtag %x for dir %d\n",
- ih->init_tag, !CTINFO2DIR(ctinfo));
- conntrack->proto.sctp.vtag[!CTINFO2DIR(ctinfo)] = ih->init_tag;
- }
-
- conntrack->proto.sctp.state = newconntrack;
- if (oldsctpstate != newconntrack)
- ip_conntrack_event_cache(IPCT_PROTOINFO, skb);
- write_unlock_bh(&sctp_lock);
- }
-
- ip_ct_refresh_acct(conntrack, ctinfo, skb, *sctp_timeouts[newconntrack]);
-
- if (oldsctpstate == SCTP_CONNTRACK_COOKIE_ECHOED
- && CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY
- && newconntrack == SCTP_CONNTRACK_ESTABLISHED) {
- DEBUGP("Setting assured bit\n");
- set_bit(IPS_ASSURED_BIT, &conntrack->status);
- ip_conntrack_event_cache(IPCT_STATUS, skb);
- }
-
- return NF_ACCEPT;
-}
-
-/* Called when a new connection for this protocol found. */
-static int sctp_new(struct ip_conntrack *conntrack,
- const struct sk_buff *skb)
-{
- enum sctp_conntrack newconntrack;
- struct iphdr *iph = skb->nh.iph;
- sctp_sctphdr_t _sctph, *sh;
- sctp_chunkhdr_t _sch, *sch;
- u_int32_t offset, count;
- char map[256 / sizeof (char)] = {0};
-
- DEBUGP(__FUNCTION__);
- DEBUGP("\n");
-
- sh = skb_header_pointer(skb, iph->ihl * 4, sizeof(_sctph), &_sctph);
- if (sh == NULL)
- return 0;
-
- if (do_basic_checks(conntrack, skb, map) != 0)
- return 0;
-
- /* If an OOTB packet has any of these chunks discard (Sec 8.4) */
- if ((test_bit (SCTP_CID_ABORT, (void *)map))
- || (test_bit (SCTP_CID_SHUTDOWN_COMPLETE, (void *)map))
- || (test_bit (SCTP_CID_COOKIE_ACK, (void *)map))) {
- return 0;
- }
-
- newconntrack = SCTP_CONNTRACK_MAX;
- for_each_sctp_chunk (skb, sch, _sch, offset, count) {
- /* Don't need lock here: this conntrack not in circulation yet */
- newconntrack = new_state (IP_CT_DIR_ORIGINAL,
- SCTP_CONNTRACK_NONE, sch->type);
-
- /* Invalid: delete conntrack */
- if (newconntrack == SCTP_CONNTRACK_MAX) {
- DEBUGP("ip_conntrack_sctp: invalid new deleting.\n");
- return 0;
- }
-
- /* Copy the vtag into the state info */
- if (sch->type == SCTP_CID_INIT) {
- if (sh->vtag == 0) {
- sctp_inithdr_t _inithdr, *ih;
-
- ih = skb_header_pointer(skb, offset + sizeof(sctp_chunkhdr_t),
- sizeof(_inithdr), &_inithdr);
- if (ih == NULL)
- return 0;
-
- DEBUGP("Setting vtag %x for new conn\n",
- ih->init_tag);
-
- conntrack->proto.sctp.vtag[IP_CT_DIR_REPLY] =
- ih->init_tag;
- } else {
- /* Sec 8.5.1 (A) */
- return 0;
- }
- }
- /* If it is a shutdown ack OOTB packet, we expect a return
- shutdown complete, otherwise an ABORT Sec 8.4 (5) and (8) */
- else {
- DEBUGP("Setting vtag %x for new conn OOTB\n",
- sh->vtag);
- conntrack->proto.sctp.vtag[IP_CT_DIR_REPLY] = sh->vtag;
- }
-
- conntrack->proto.sctp.state = newconntrack;
- }
-
- return 1;
-}
-
-static struct ip_conntrack_protocol ip_conntrack_protocol_sctp = {
- .proto = IPPROTO_SCTP,
- .name = "sctp",
- .pkt_to_tuple = sctp_pkt_to_tuple,
- .invert_tuple = sctp_invert_tuple,
- .print_tuple = sctp_print_tuple,
- .print_conntrack = sctp_print_conntrack,
- .packet = sctp_packet,
- .new = sctp_new,
- .destroy = NULL,
- .me = THIS_MODULE,
-#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
- defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
- .tuple_to_nfattr = ip_ct_port_tuple_to_nfattr,
- .nfattr_to_tuple = ip_ct_port_nfattr_to_tuple,
-#endif
-};
-
-#ifdef CONFIG_SYSCTL
-static ctl_table ip_ct_sysctl_table[] = {
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_CLOSED,
- .procname = "ip_conntrack_sctp_timeout_closed",
- .data = &ip_ct_sctp_timeout_closed,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_WAIT,
- .procname = "ip_conntrack_sctp_timeout_cookie_wait",
- .data = &ip_ct_sctp_timeout_cookie_wait,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_ECHOED,
- .procname = "ip_conntrack_sctp_timeout_cookie_echoed",
- .data = &ip_ct_sctp_timeout_cookie_echoed,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_ESTABLISHED,
- .procname = "ip_conntrack_sctp_timeout_established",
- .data = &ip_ct_sctp_timeout_established,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_SENT,
- .procname = "ip_conntrack_sctp_timeout_shutdown_sent",
- .data = &ip_ct_sctp_timeout_shutdown_sent,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_RECD,
- .procname = "ip_conntrack_sctp_timeout_shutdown_recd",
- .data = &ip_ct_sctp_timeout_shutdown_recd,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_ACK_SENT,
- .procname = "ip_conntrack_sctp_timeout_shutdown_ack_sent",
- .data = &ip_ct_sctp_timeout_shutdown_ack_sent,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- { .ctl_name = 0 }
-};
-
-static ctl_table ip_ct_netfilter_table[] = {
- {
- .ctl_name = NET_IPV4_NETFILTER,
- .procname = "netfilter",
- .mode = 0555,
- .child = ip_ct_sysctl_table,
- },
- { .ctl_name = 0 }
-};
-
-static ctl_table ip_ct_ipv4_table[] = {
- {
- .ctl_name = NET_IPV4,
- .procname = "ipv4",
- .mode = 0555,
- .child = ip_ct_netfilter_table,
- },
- { .ctl_name = 0 }
-};
-
-static ctl_table ip_ct_net_table[] = {
- {
- .ctl_name = CTL_NET,
- .procname = "net",
- .mode = 0555,
- .child = ip_ct_ipv4_table,
- },
- { .ctl_name = 0 }
-};
-
-static struct ctl_table_header *ip_ct_sysctl_header;
-#endif
-
-static int __init init(void)
-{
- int ret;
-
- ret = ip_conntrack_protocol_register(&ip_conntrack_protocol_sctp);
- if (ret) {
- printk("ip_conntrack_proto_sctp: protocol register failed\n");
- goto out;
- }
-
-#ifdef CONFIG_SYSCTL
- ip_ct_sysctl_header = register_sysctl_table(ip_ct_net_table, 0);
- if (ip_ct_sysctl_header == NULL) {
- ret = -ENOMEM;
- printk("ip_conntrack_proto_sctp: can't register to sysctl.\n");
- goto cleanup;
- }
-#endif
-
- return ret;
-
-#ifdef CONFIG_SYSCTL
- cleanup:
- ip_conntrack_protocol_unregister(&ip_conntrack_protocol_sctp);
-#endif
- out:
- DEBUGP("SCTP conntrack module loading %s\n",
- ret ? "failed": "succeeded");
- return ret;
-}
-
-static void __exit fini(void)
-{
- ip_conntrack_protocol_unregister(&ip_conntrack_protocol_sctp);
-#ifdef CONFIG_SYSCTL
- unregister_sysctl_table(ip_ct_sysctl_header);
-#endif
- DEBUGP("SCTP conntrack module unloaded\n");
-}
-
-module_init(init);
-module_exit(fini);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Kiran Kumar Immidi");
-MODULE_DESCRIPTION("Netfilter connection tracking protocol helper for SCTP");
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
deleted file mode 100644
index e0dc3706354..00000000000
--- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c
+++ /dev/null
@@ -1,1176 +0,0 @@
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>:
- * - Real stateful connection tracking
- * - Modified state transitions table
- * - Window scaling support added
- * - SACK support added
- *
- * Willy Tarreau:
- * - State table bugfixes
- * - More robust state changes
- * - Tuning timer parameters
- *
- * version 2.2
- */
-
-#include <linux/config.h>
-#include <linux/types.h>
-#include <linux/sched.h>
-#include <linux/timer.h>
-#include <linux/netfilter.h>
-#include <linux/module.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <linux/spinlock.h>
-
-#include <net/tcp.h>
-
-#include <linux/netfilter_ipv4.h>
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
-
-#if 0
-#define DEBUGP printk
-#define DEBUGP_VARS
-#else
-#define DEBUGP(format, args...)
-#endif
-
-/* Protects conntrack->proto.tcp */
-static DEFINE_RWLOCK(tcp_lock);
-
-/* "Be conservative in what you do,
- be liberal in what you accept from others."
- If it's non-zero, we mark only out of window RST segments as INVALID. */
-int ip_ct_tcp_be_liberal = 0;
-
-/* When connection is picked up from the middle, how many packets are required
- to pass in each direction when we assume we are in sync - if any side uses
- window scaling, we lost the game.
- If it is set to zero, we disable picking up already established
- connections. */
-int ip_ct_tcp_loose = 3;
-
-/* Max number of the retransmitted packets without receiving an (acceptable)
- ACK from the destination. If this number is reached, a shorter timer
- will be started. */
-int ip_ct_tcp_max_retrans = 3;
-
- /* FIXME: Examine ipfilter's timeouts and conntrack transitions more
- closely. They're more complex. --RR */
-
-static const char *tcp_conntrack_names[] = {
- "NONE",
- "SYN_SENT",
- "SYN_RECV",
- "ESTABLISHED",
- "FIN_WAIT",
- "CLOSE_WAIT",
- "LAST_ACK",
- "TIME_WAIT",
- "CLOSE",
- "LISTEN"
-};
-
-#define SECS * HZ
-#define MINS * 60 SECS
-#define HOURS * 60 MINS
-#define DAYS * 24 HOURS
-
-unsigned int ip_ct_tcp_timeout_syn_sent = 2 MINS;
-unsigned int ip_ct_tcp_timeout_syn_recv = 60 SECS;
-unsigned int ip_ct_tcp_timeout_established = 5 DAYS;
-unsigned int ip_ct_tcp_timeout_fin_wait = 2 MINS;
-unsigned int ip_ct_tcp_timeout_close_wait = 60 SECS;
-unsigned int ip_ct_tcp_timeout_last_ack = 30 SECS;
-unsigned int ip_ct_tcp_timeout_time_wait = 2 MINS;
-unsigned int ip_ct_tcp_timeout_close = 10 SECS;
-
-/* RFC1122 says the R2 limit should be at least 100 seconds.
- Linux uses 15 packets as limit, which corresponds
- to ~13-30min depending on RTO. */
-unsigned int ip_ct_tcp_timeout_max_retrans = 5 MINS;
-
-static const unsigned int * tcp_timeouts[]
-= { NULL, /* TCP_CONNTRACK_NONE */
- &ip_ct_tcp_timeout_syn_sent, /* TCP_CONNTRACK_SYN_SENT, */
- &ip_ct_tcp_timeout_syn_recv, /* TCP_CONNTRACK_SYN_RECV, */
- &ip_ct_tcp_timeout_established, /* TCP_CONNTRACK_ESTABLISHED, */
- &ip_ct_tcp_timeout_fin_wait, /* TCP_CONNTRACK_FIN_WAIT, */
- &ip_ct_tcp_timeout_close_wait, /* TCP_CONNTRACK_CLOSE_WAIT, */
- &ip_ct_tcp_timeout_last_ack, /* TCP_CONNTRACK_LAST_ACK, */
- &ip_ct_tcp_timeout_time_wait, /* TCP_CONNTRACK_TIME_WAIT, */
- &ip_ct_tcp_timeout_close, /* TCP_CONNTRACK_CLOSE, */
- NULL, /* TCP_CONNTRACK_LISTEN */
- };
-
-#define sNO TCP_CONNTRACK_NONE
-#define sSS TCP_CONNTRACK_SYN_SENT
-#define sSR TCP_CONNTRACK_SYN_RECV
-#define sES TCP_CONNTRACK_ESTABLISHED
-#define sFW TCP_CONNTRACK_FIN_WAIT
-#define sCW TCP_CONNTRACK_CLOSE_WAIT
-#define sLA TCP_CONNTRACK_LAST_ACK
-#define sTW TCP_CONNTRACK_TIME_WAIT
-#define sCL TCP_CONNTRACK_CLOSE
-#define sLI TCP_CONNTRACK_LISTEN
-#define sIV TCP_CONNTRACK_MAX
-#define sIG TCP_CONNTRACK_IGNORE
-
-/* What TCP flags are set from RST/SYN/FIN/ACK. */
-enum tcp_bit_set {
- TCP_SYN_SET,
- TCP_SYNACK_SET,
- TCP_FIN_SET,
- TCP_ACK_SET,
- TCP_RST_SET,
- TCP_NONE_SET,
-};
-
-/*
- * The TCP state transition table needs a few words...
- *
- * We are the man in the middle. All the packets go through us
- * but might get lost in transit to the destination.
- * It is assumed that the destinations can't receive segments
- * we haven't seen.
- *
- * The checked segment is in window, but our windows are *not*
- * equivalent with the ones of the sender/receiver. We always
- * try to guess the state of the current sender.
- *
- * The meaning of the states are:
- *
- * NONE: initial state
- * SYN_SENT: SYN-only packet seen
- * SYN_RECV: SYN-ACK packet seen
- * ESTABLISHED: ACK packet seen
- * FIN_WAIT: FIN packet seen
- * CLOSE_WAIT: ACK seen (after FIN)
- * LAST_ACK: FIN seen (after FIN)
- * TIME_WAIT: last ACK seen
- * CLOSE: closed connection
- *
- * LISTEN state is not used.
- *
- * Packets marked as IGNORED (sIG):
- * if they may be either invalid or valid
- * and the receiver may send back a connection
- * closing RST or a SYN/ACK.
- *
- * Packets marked as INVALID (sIV):
- * if they are invalid
- * or we do not support the request (simultaneous open)
- */
-static const enum tcp_conntrack tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = {
- {
-/* ORIGINAL */
-/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
-/*syn*/ { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sIV },
-/*
- * sNO -> sSS Initialize a new connection
- * sSS -> sSS Retransmitted SYN
- * sSR -> sIG Late retransmitted SYN?
- * sES -> sIG Error: SYNs in window outside the SYN_SENT state
- * are errors. Receiver will reply with RST
- * and close the connection.
- * Or we are not in sync and hold a dead connection.
- * sFW -> sIG
- * sCW -> sIG
- * sLA -> sIG
- * sTW -> sSS Reopened connection (RFC 1122).
- * sCL -> sSS
- */
-/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
-/*synack*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV },
-/*
- * A SYN/ACK from the client is always invalid:
- * - either it tries to set up a simultaneous open, which is
- * not supported;
- * - or the firewall has just been inserted between the two hosts
- * during the session set-up. The SYN will be retransmitted
- * by the true client (or it'll time out).
- */
-/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
-/*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
-/*
- * sNO -> sIV Too late and no reason to do anything...
- * sSS -> sIV Client migth not send FIN in this state:
- * we enforce waiting for a SYN/ACK reply first.
- * sSR -> sFW Close started.
- * sES -> sFW
- * sFW -> sLA FIN seen in both directions, waiting for
- * the last ACK.
- * Migth be a retransmitted FIN as well...
- * sCW -> sLA
- * sLA -> sLA Retransmitted FIN. Remain in the same state.
- * sTW -> sTW
- * sCL -> sCL
- */
-/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
-/*ack*/ { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV },
-/*
- * sNO -> sES Assumed.
- * sSS -> sIV ACK is invalid: we haven't seen a SYN/ACK yet.
- * sSR -> sES Established state is reached.
- * sES -> sES :-)
- * sFW -> sCW Normal close request answered by ACK.
- * sCW -> sCW
- * sLA -> sTW Last ACK detected.
- * sTW -> sTW Retransmitted last ACK. Remain in the same state.
- * sCL -> sCL
- */
-/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
-/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV },
-/*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
- },
- {
-/* REPLY */
-/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
-/*syn*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV },
-/*
- * sNO -> sIV Never reached.
- * sSS -> sIV Simultaneous open, not supported
- * sSR -> sIV Simultaneous open, not supported.
- * sES -> sIV Server may not initiate a connection.
- * sFW -> sIV
- * sCW -> sIV
- * sLA -> sIV
- * sTW -> sIV Reopened connection, but server may not do it.
- * sCL -> sIV
- */
-/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
-/*synack*/ { sIV, sSR, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIV },
-/*
- * sSS -> sSR Standard open.
- * sSR -> sSR Retransmitted SYN/ACK.
- * sES -> sIG Late retransmitted SYN/ACK?
- * sFW -> sIG Might be SYN/ACK answering ignored SYN
- * sCW -> sIG
- * sLA -> sIG
- * sTW -> sIG
- * sCL -> sIG
- */
-/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
-/*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV },
-/*
- * sSS -> sIV Server might not send FIN in this state.
- * sSR -> sFW Close started.
- * sES -> sFW
- * sFW -> sLA FIN seen in both directions.
- * sCW -> sLA
- * sLA -> sLA Retransmitted FIN.
- * sTW -> sTW
- * sCL -> sCL
- */
-/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
-/*ack*/ { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIV },
-/*
- * sSS -> sIG Might be a half-open connection.
- * sSR -> sSR Might answer late resent SYN.
- * sES -> sES :-)
- * sFW -> sCW Normal close request answered by ACK.
- * sCW -> sCW
- * sLA -> sTW Last ACK detected.
- * sTW -> sTW Retransmitted last ACK.
- * sCL -> sCL
- */
-/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */
-/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV },
-/*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }
- }
-};
-
-static int tcp_pkt_to_tuple(const struct sk_buff *skb,
- unsigned int dataoff,
- struct ip_conntrack_tuple *tuple)
-{
- struct tcphdr _hdr, *hp;
-
- /* Actually only need first 8 bytes. */
- hp = skb_header_pointer(skb, dataoff, 8, &_hdr);
- if (hp == NULL)
- return 0;
-
- tuple->src.u.tcp.port = hp->source;
- tuple->dst.u.tcp.port = hp->dest;
-
- return 1;
-}
-
-static int tcp_invert_tuple(struct ip_conntrack_tuple *tuple,
- const struct ip_conntrack_tuple *orig)
-{
- tuple->src.u.tcp.port = orig->dst.u.tcp.port;
- tuple->dst.u.tcp.port = orig->src.u.tcp.port;
- return 1;
-}
-
-/* Print out the per-protocol part of the tuple. */
-static int tcp_print_tuple(struct seq_file *s,
- const struct ip_conntrack_tuple *tuple)
-{
- return seq_printf(s, "sport=%hu dport=%hu ",
- ntohs(tuple->src.u.tcp.port),
- ntohs(tuple->dst.u.tcp.port));
-}
-
-/* Print out the private part of the conntrack. */
-static int tcp_print_conntrack(struct seq_file *s,
- const struct ip_conntrack *conntrack)
-{
- enum tcp_conntrack state;
-
- read_lock_bh(&tcp_lock);
- state = conntrack->proto.tcp.state;
- read_unlock_bh(&tcp_lock);
-
- return seq_printf(s, "%s ", tcp_conntrack_names[state]);
-}
-
-#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
- defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
-static int tcp_to_nfattr(struct sk_buff *skb, struct nfattr *nfa,
- const struct ip_conntrack *ct)
-{
- struct nfattr *nest_parms;
-
- read_lock_bh(&tcp_lock);
- nest_parms = NFA_NEST(skb, CTA_PROTOINFO_TCP);
- NFA_PUT(skb, CTA_PROTOINFO_TCP_STATE, sizeof(u_int8_t),
- &ct->proto.tcp.state);
- read_unlock_bh(&tcp_lock);
-
- NFA_NEST_END(skb, nest_parms);
-
- return 0;
-
-nfattr_failure:
- read_unlock_bh(&tcp_lock);
- return -1;
-}
-
-static const size_t cta_min_tcp[CTA_PROTOINFO_TCP_MAX] = {
- [CTA_PROTOINFO_TCP_STATE-1] = sizeof(u_int8_t),
-};
-
-static int nfattr_to_tcp(struct nfattr *cda[], struct ip_conntrack *ct)
-{
- struct nfattr *attr = cda[CTA_PROTOINFO_TCP-1];
- struct nfattr *tb[CTA_PROTOINFO_TCP_MAX];
-
- /* updates could not contain anything about the private
- * protocol info, in that case skip the parsing */
- if (!attr)
- return 0;
-
- nfattr_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, attr);
-
- if (nfattr_bad_size(tb, CTA_PROTOINFO_TCP_MAX, cta_min_tcp))
- return -EINVAL;
-
- if (!tb[CTA_PROTOINFO_TCP_STATE-1])
- return -EINVAL;
-
- write_lock_bh(&tcp_lock);
- ct->proto.tcp.state =
- *(u_int8_t *)NFA_DATA(tb[CTA_PROTOINFO_TCP_STATE-1]);
- write_unlock_bh(&tcp_lock);
-
- return 0;
-}
-#endif
-
-static unsigned int get_conntrack_index(const struct tcphdr *tcph)
-{
- if (tcph->rst) return TCP_RST_SET;
- else if (tcph->syn) return (tcph->ack ? TCP_SYNACK_SET : TCP_SYN_SET);
- else if (tcph->fin) return TCP_FIN_SET;
- else if (tcph->ack) return TCP_ACK_SET;
- else return TCP_NONE_SET;
-}
-
-/* TCP connection tracking based on 'Real Stateful TCP Packet Filtering
- in IP Filter' by Guido van Rooij.
-
- http://www.nluug.nl/events/sane2000/papers.html
- http://www.iae.nl/users/guido/papers/tcp_filtering.ps.gz
-
- The boundaries and the conditions are changed according to RFC793:
- the packet must intersect the window (i.e. segments may be
- after the right or before the left edge) and thus receivers may ACK
- segments after the right edge of the window.
-
- td_maxend = max(sack + max(win,1)) seen in reply packets
- td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets
- td_maxwin += seq + len - sender.td_maxend
- if seq + len > sender.td_maxend
- td_end = max(seq + len) seen in sent packets
-
- I. Upper bound for valid data: seq <= sender.td_maxend
- II. Lower bound for valid data: seq + len >= sender.td_end - receiver.td_maxwin
- III. Upper bound for valid ack: sack <= receiver.td_end
- IV. Lower bound for valid ack: ack >= receiver.td_end - MAXACKWINDOW
-
- where sack is the highest right edge of sack block found in the packet.
-
- The upper bound limit for a valid ack is not ignored -
- we doesn't have to deal with fragments.
-*/
-
-static inline __u32 segment_seq_plus_len(__u32 seq,
- size_t len,
- struct iphdr *iph,
- struct tcphdr *tcph)
-{
- return (seq + len - (iph->ihl + tcph->doff)*4
- + (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0));
-}
-
-/* Fixme: what about big packets? */
-#define MAXACKWINCONST 66000
-#define MAXACKWINDOW(sender) \
- ((sender)->td_maxwin > MAXACKWINCONST ? (sender)->td_maxwin \
- : MAXACKWINCONST)
-
-/*
- * Simplified tcp_parse_options routine from tcp_input.c
- */
-static void tcp_options(const struct sk_buff *skb,
- struct iphdr *iph,
- struct tcphdr *tcph,
- struct ip_ct_tcp_state *state)
-{
- unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
- unsigned char *ptr;
- int length = (tcph->doff*4) - sizeof(struct tcphdr);
-
- if (!length)
- return;
-
- ptr = skb_header_pointer(skb,
- (iph->ihl * 4) + sizeof(struct tcphdr),
- length, buff);
- BUG_ON(ptr == NULL);
-
- state->td_scale =
- state->flags = 0;
-
- while (length > 0) {
- int opcode=*ptr++;
- int opsize;
-
- switch (opcode) {
- case TCPOPT_EOL:
- return;
- case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */
- length--;
- continue;
- default:
- opsize=*ptr++;
- if (opsize < 2) /* "silly options" */
- return;
- if (opsize > length)
- break; /* don't parse partial options */
-
- if (opcode == TCPOPT_SACK_PERM
- && opsize == TCPOLEN_SACK_PERM)
- state->flags |= IP_CT_TCP_FLAG_SACK_PERM;
- else if (opcode == TCPOPT_WINDOW
- && opsize == TCPOLEN_WINDOW) {
- state->td_scale = *(u_int8_t *)ptr;
-
- if (state->td_scale > 14) {
- /* See RFC1323 */
- state->td_scale = 14;
- }
- state->flags |=
- IP_CT_TCP_FLAG_WINDOW_SCALE;
- }
- ptr += opsize - 2;
- length -= opsize;
- }
- }
-}
-
-static void tcp_sack(const struct sk_buff *skb,
- struct iphdr *iph,
- struct tcphdr *tcph,
- __u32 *sack)
-{
- unsigned char buff[(15 * 4) - sizeof(struct tcphdr)];
- unsigned char *ptr;
- int length = (tcph->doff*4) - sizeof(struct tcphdr);
- __u32 tmp;
-
- if (!length)
- return;
-
- ptr = skb_header_pointer(skb,
- (iph->ihl * 4) + sizeof(struct tcphdr),
- length, buff);
- BUG_ON(ptr == NULL);
-
- /* Fast path for timestamp-only option */
- if (length == TCPOLEN_TSTAMP_ALIGNED*4
- && *(__u32 *)ptr ==
- __constant_ntohl((TCPOPT_NOP << 24)
- | (TCPOPT_NOP << 16)
- | (TCPOPT_TIMESTAMP << 8)
- | TCPOLEN_TIMESTAMP))
- return;
-
- while (length > 0) {
- int opcode=*ptr++;
- int opsize, i;
-
- switch (opcode) {
- case TCPOPT_EOL:
- return;
- case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */
- length--;
- continue;
- default:
- opsize=*ptr++;
- if (opsize < 2) /* "silly options" */
- return;
- if (opsize > length)
- break; /* don't parse partial options */
-
- if (opcode == TCPOPT_SACK
- && opsize >= (TCPOLEN_SACK_BASE
- + TCPOLEN_SACK_PERBLOCK)
- && !((opsize - TCPOLEN_SACK_BASE)
- % TCPOLEN_SACK_PERBLOCK)) {
- for (i = 0;
- i < (opsize - TCPOLEN_SACK_BASE);
- i += TCPOLEN_SACK_PERBLOCK) {
- tmp = ntohl(*((u_int32_t *)(ptr+i)+1));
-
- if (after(tmp, *sack))
- *sack = tmp;
- }
- return;
- }
- ptr += opsize - 2;
- length -= opsize;
- }
- }
-}
-
-static int tcp_in_window(struct ip_ct_tcp *state,
- enum ip_conntrack_dir dir,
- unsigned int index,
- const struct sk_buff *skb,
- struct iphdr *iph,
- struct tcphdr *tcph)
-{
- struct ip_ct_tcp_state *sender = &state->seen[dir];
- struct ip_ct_tcp_state *receiver = &state->seen[!dir];
- __u32 seq, ack, sack, end, win, swin;
- int res;
-
- /*
- * Get the required data from the packet.
- */
- seq = ntohl(tcph->seq);
- ack = sack = ntohl(tcph->ack_seq);
- win = ntohs(tcph->window);
- end = segment_seq_plus_len(seq, skb->len, iph, tcph);
-
- if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM)
- tcp_sack(skb, iph, tcph, &sack);
-
- DEBUGP("tcp_in_window: START\n");
- DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
- "seq=%u ack=%u sack=%u win=%u end=%u\n",
- NIPQUAD(iph->saddr), ntohs(tcph->source),
- NIPQUAD(iph->daddr), ntohs(tcph->dest),
- seq, ack, sack, win, end);
- DEBUGP("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
- "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
- sender->td_end, sender->td_maxend, sender->td_maxwin,
- sender->td_scale,
- receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
- receiver->td_scale);
-
- if (sender->td_end == 0) {
- /*
- * Initialize sender data.
- */
- if (tcph->syn && tcph->ack) {
- /*
- * Outgoing SYN-ACK in reply to a SYN.
- */
- sender->td_end =
- sender->td_maxend = end;
- sender->td_maxwin = (win == 0 ? 1 : win);
-
- tcp_options(skb, iph, tcph, sender);
- /*
- * RFC 1323:
- * Both sides must send the Window Scale option
- * to enable window scaling in either direction.
- */
- if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE
- && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE))
- sender->td_scale =
- receiver->td_scale = 0;
- } else {
- /*
- * We are in the middle of a connection,
- * its history is lost for us.
- * Let's try to use the data from the packet.
- */
- sender->td_end = end;
- sender->td_maxwin = (win == 0 ? 1 : win);
- sender->td_maxend = end + sender->td_maxwin;
- }
- } else if (((state->state == TCP_CONNTRACK_SYN_SENT
- && dir == IP_CT_DIR_ORIGINAL)
- || (state->state == TCP_CONNTRACK_SYN_RECV
- && dir == IP_CT_DIR_REPLY))
- && after(end, sender->td_end)) {
- /*
- * RFC 793: "if a TCP is reinitialized ... then it need
- * not wait at all; it must only be sure to use sequence
- * numbers larger than those recently used."
- */
- sender->td_end =
- sender->td_maxend = end;
- sender->td_maxwin = (win == 0 ? 1 : win);
-
- tcp_options(skb, iph, tcph, sender);
- }
-
- if (!(tcph->ack)) {
- /*
- * If there is no ACK, just pretend it was set and OK.
- */
- ack = sack = receiver->td_end;
- } else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) ==
- (TCP_FLAG_ACK|TCP_FLAG_RST))
- && (ack == 0)) {
- /*
- * Broken TCP stacks, that set ACK in RST packets as well
- * with zero ack value.
- */
- ack = sack = receiver->td_end;
- }
-
- if (seq == end
- && (!tcph->rst
- || (seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT)))
- /*
- * Packets contains no data: we assume it is valid
- * and check the ack value only.
- * However RST segments are always validated by their
- * SEQ number, except when seq == 0 (reset sent answering
- * SYN.
- */
- seq = end = sender->td_end;
-
- DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
- "seq=%u ack=%u sack =%u win=%u end=%u\n",
- NIPQUAD(iph->saddr), ntohs(tcph->source),
- NIPQUAD(iph->daddr), ntohs(tcph->dest),
- seq, ack, sack, win, end);
- DEBUGP("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i "
- "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
- sender->td_end, sender->td_maxend, sender->td_maxwin,
- sender->td_scale,
- receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
- receiver->td_scale);
-
- DEBUGP("tcp_in_window: I=%i II=%i III=%i IV=%i\n",
- before(seq, sender->td_maxend + 1),
- after(end, sender->td_end - receiver->td_maxwin - 1),
- before(sack, receiver->td_end + 1),
- after(ack, receiver->td_end - MAXACKWINDOW(sender)));
-
- if (sender->loose || receiver->loose ||
- (before(seq, sender->td_maxend + 1) &&
- after(end, sender->td_end - receiver->td_maxwin - 1) &&
- before(sack, receiver->td_end + 1) &&
- after(ack, receiver->td_end - MAXACKWINDOW(sender)))) {
- /*
- * Take into account window scaling (RFC 1323).
- */
- if (!tcph->syn)
- win <<= sender->td_scale;
-
- /*
- * Update sender data.
- */
- swin = win + (sack - ack);
- if (sender->td_maxwin < swin)
- sender->td_maxwin = swin;
- if (after(end, sender->td_end))
- sender->td_end = end;
- /*
- * Update receiver data.
- */
- if (after(end, sender->td_maxend))
- receiver->td_maxwin += end - sender->td_maxend;
- if (after(sack + win, receiver->td_maxend - 1)) {
- receiver->td_maxend = sack + win;
- if (win == 0)
- receiver->td_maxend++;
- }
-
- /*
- * Check retransmissions.
- */
- if (index == TCP_ACK_SET) {
- if (state->last_dir == dir
- && state->last_seq == seq
- && state->last_ack == ack
- && state->last_end == end)
- state->retrans++;
- else {
- state->last_dir = dir;
- state->last_seq = seq;
- state->last_ack = ack;
- state->last_end = end;
- state->retrans = 0;
- }
- }
- /*
- * Close the window of disabled window tracking :-)
- */
- if (sender->loose)
- sender->loose--;
-
- res = 1;
- } else {
- if (LOG_INVALID(IPPROTO_TCP))
- nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
- "ip_ct_tcp: %s ",
- before(seq, sender->td_maxend + 1) ?
- after(end, sender->td_end - receiver->td_maxwin - 1) ?
- before(sack, receiver->td_end + 1) ?
- after(ack, receiver->td_end - MAXACKWINDOW(sender)) ? "BUG"
- : "ACK is under the lower bound (possible overly delayed ACK)"
- : "ACK is over the upper bound (ACKed data not seen yet)"
- : "SEQ is under the lower bound (already ACKed data retransmitted)"
- : "SEQ is over the upper bound (over the window of the receiver)");
-
- res = ip_ct_tcp_be_liberal;
- }
-
- DEBUGP("tcp_in_window: res=%i sender end=%u maxend=%u maxwin=%u "
- "receiver end=%u maxend=%u maxwin=%u\n",
- res, sender->td_end, sender->td_maxend, sender->td_maxwin,
- receiver->td_end, receiver->td_maxend, receiver->td_maxwin);
-
- return res;
-}
-
-#ifdef CONFIG_IP_NF_NAT_NEEDED
-/* Update sender->td_end after NAT successfully mangled the packet */
-void ip_conntrack_tcp_update(struct sk_buff *skb,
- struct ip_conntrack *conntrack,
- enum ip_conntrack_dir dir)
-{
- struct iphdr *iph = skb->nh.iph;
- struct tcphdr *tcph = (void *)skb->nh.iph + skb->nh.iph->ihl*4;
- __u32 end;
-#ifdef DEBUGP_VARS
- struct ip_ct_tcp_state *sender = &conntrack->proto.tcp.seen[dir];
- struct ip_ct_tcp_state *receiver = &conntrack->proto.tcp.seen[!dir];
-#endif
-
- end = segment_seq_plus_len(ntohl(tcph->seq), skb->len, iph, tcph);
-
- write_lock_bh(&tcp_lock);
- /*
- * We have to worry for the ack in the reply packet only...
- */
- if (after(end, conntrack->proto.tcp.seen[dir].td_end))
- conntrack->proto.tcp.seen[dir].td_end = end;
- conntrack->proto.tcp.last_end = end;
- write_unlock_bh(&tcp_lock);
- DEBUGP("tcp_update: sender end=%u maxend=%u maxwin=%u scale=%i "
- "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
- sender->td_end, sender->td_maxend, sender->td_maxwin,
- sender->td_scale,
- receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
- receiver->td_scale);
-}
-
-#endif
-
-#define TH_FIN 0x01
-#define TH_SYN 0x02
-#define TH_RST 0x04
-#define TH_PUSH 0x08
-#define TH_ACK 0x10
-#define TH_URG 0x20
-#define TH_ECE 0x40
-#define TH_CWR 0x80
-
-/* table of valid flag combinations - ECE and CWR are always valid */
-static const u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_PUSH|TH_ACK|TH_URG) + 1] =
-{
- [TH_SYN] = 1,
- [TH_SYN|TH_ACK] = 1,
- [TH_SYN|TH_PUSH] = 1,
- [TH_SYN|TH_ACK|TH_PUSH] = 1,
- [TH_RST] = 1,
- [TH_RST|TH_ACK] = 1,
- [TH_RST|TH_ACK|TH_PUSH] = 1,
- [TH_FIN|TH_ACK] = 1,
- [TH_ACK] = 1,
- [TH_ACK|TH_PUSH] = 1,
- [TH_ACK|TH_URG] = 1,
- [TH_ACK|TH_URG|TH_PUSH] = 1,
- [TH_FIN|TH_ACK|TH_PUSH] = 1,
- [TH_FIN|TH_ACK|TH_URG] = 1,
- [TH_FIN|TH_ACK|TH_URG|TH_PUSH] = 1,
-};
-
-/* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */
-static int tcp_error(struct sk_buff *skb,
- enum ip_conntrack_info *ctinfo,
- unsigned int hooknum)
-{
- struct iphdr *iph = skb->nh.iph;
- struct tcphdr _tcph, *th;
- unsigned int tcplen = skb->len - iph->ihl * 4;
- u_int8_t tcpflags;
-
- /* Smaller that minimal TCP header? */
- th = skb_header_pointer(skb, iph->ihl * 4,
- sizeof(_tcph), &_tcph);
- if (th == NULL) {
- if (LOG_INVALID(IPPROTO_TCP))
- nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
- "ip_ct_tcp: short packet ");
- return -NF_ACCEPT;
- }
-
- /* Not whole TCP header or malformed packet */
- if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) {
- if (LOG_INVALID(IPPROTO_TCP))
- nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
- "ip_ct_tcp: truncated/malformed packet ");
- return -NF_ACCEPT;
- }
-
- /* Checksum invalid? Ignore.
- * We skip checking packets on the outgoing path
- * because the semantic of CHECKSUM_HW is different there
- * and moreover root might send raw packets.
- */
- /* FIXME: Source route IP option packets --RR */
- if (hooknum == NF_IP_PRE_ROUTING
- && skb->ip_summed != CHECKSUM_UNNECESSARY
- && csum_tcpudp_magic(iph->saddr, iph->daddr, tcplen, IPPROTO_TCP,
- skb->ip_summed == CHECKSUM_HW ? skb->csum
- : skb_checksum(skb, iph->ihl*4, tcplen, 0))) {
- if (LOG_INVALID(IPPROTO_TCP))
- nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
- "ip_ct_tcp: bad TCP checksum ");
- return -NF_ACCEPT;
- }
-
- /* Check TCP flags. */
- tcpflags = (((u_int8_t *)th)[13] & ~(TH_ECE|TH_CWR));
- if (!tcp_valid_flags[tcpflags]) {
- if (LOG_INVALID(IPPROTO_TCP))
- nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
- "ip_ct_tcp: invalid TCP flag combination ");
- return -NF_ACCEPT;
- }
-
- return NF_ACCEPT;
-}
-
-/* Returns verdict for packet, or -1 for invalid. */
-static int tcp_packet(struct ip_conntrack *conntrack,
- const struct sk_buff *skb,
- enum ip_conntrack_info ctinfo)
-{
- enum tcp_conntrack new_state, old_state;
- enum ip_conntrack_dir dir;
- struct iphdr *iph = skb->nh.iph;
- struct tcphdr *th, _tcph;
- unsigned long timeout;
- unsigned int index;
-
- th = skb_header_pointer(skb, iph->ihl * 4,
- sizeof(_tcph), &_tcph);
- BUG_ON(th == NULL);
-
- write_lock_bh(&tcp_lock);
- old_state = conntrack->proto.tcp.state;
- dir = CTINFO2DIR(ctinfo);
- index = get_conntrack_index(th);
- new_state = tcp_conntracks[dir][index][old_state];
-
- switch (new_state) {
- case TCP_CONNTRACK_IGNORE:
- /* Ignored packets:
- *
- * a) SYN in ORIGINAL
- * b) SYN/ACK in REPLY
- * c) ACK in reply direction after initial SYN in original.
- */
- if (index == TCP_SYNACK_SET
- && conntrack->proto.tcp.last_index == TCP_SYN_SET
- && conntrack->proto.tcp.last_dir != dir
- && ntohl(th->ack_seq) ==
- conntrack->proto.tcp.last_end) {
- /* This SYN/ACK acknowledges a SYN that we earlier
- * ignored as invalid. This means that the client and
- * the server are both in sync, while the firewall is
- * not. We kill this session and block the SYN/ACK so
- * that the client cannot but retransmit its SYN and
- * thus initiate a clean new session.
- */
- write_unlock_bh(&tcp_lock);
- if (LOG_INVALID(IPPROTO_TCP))
- nf_log_packet(PF_INET, 0, skb, NULL, NULL,
- NULL, "ip_ct_tcp: "
- "killing out of sync session ");
- if (del_timer(&conntrack->timeout))
- conntrack->timeout.function((unsigned long)
- conntrack);
- return -NF_DROP;
- }
- conntrack->proto.tcp.last_index = index;
- conntrack->proto.tcp.last_dir = dir;
- conntrack->proto.tcp.last_seq = ntohl(th->seq);
- conntrack->proto.tcp.last_end =
- segment_seq_plus_len(ntohl(th->seq), skb->len, iph, th);
-
- write_unlock_bh(&tcp_lock);
- if (LOG_INVALID(IPPROTO_TCP))
- nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
- "ip_ct_tcp: invalid packet ignored ");
- return NF_ACCEPT;
- case TCP_CONNTRACK_MAX:
- /* Invalid packet */
- DEBUGP("ip_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
- dir, get_conntrack_index(th),
- old_state);
- write_unlock_bh(&tcp_lock);
- if (LOG_INVALID(IPPROTO_TCP))
- nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
- "ip_ct_tcp: invalid state ");
- return -NF_ACCEPT;
- case TCP_CONNTRACK_SYN_SENT:
- if (old_state < TCP_CONNTRACK_TIME_WAIT)
- break;
- if ((conntrack->proto.tcp.seen[dir].flags &
- IP_CT_TCP_FLAG_CLOSE_INIT)
- || after(ntohl(th->seq),
- conntrack->proto.tcp.seen[dir].td_end)) {
- /* Attempt to reopen a closed connection.
- * Delete this connection and look up again. */
- write_unlock_bh(&tcp_lock);
- if (del_timer(&conntrack->timeout))
- conntrack->timeout.function((unsigned long)
- conntrack);
- return -NF_REPEAT;
- } else {
- write_unlock_bh(&tcp_lock);
- if (LOG_INVALID(IPPROTO_TCP))
- nf_log_packet(PF_INET, 0, skb, NULL, NULL,
- NULL, "ip_ct_tcp: invalid SYN");
- return -NF_ACCEPT;
- }
- case TCP_CONNTRACK_CLOSE:
- if (index == TCP_RST_SET
- && ((test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)
- && conntrack->proto.tcp.last_index == TCP_SYN_SET)
- || (!test_bit(IPS_ASSURED_BIT, &conntrack->status)
- && conntrack->proto.tcp.last_index == TCP_ACK_SET))
- && ntohl(th->ack_seq) == conntrack->proto.tcp.last_end) {
- /* RST sent to invalid SYN or ACK we had let through
- * at a) and c) above:
- *
- * a) SYN was in window then
- * c) we hold a half-open connection.
- *
- * Delete our connection entry.
- * We skip window checking, because packet might ACK
- * segments we ignored. */
- goto in_window;
- }
- /* Just fall through */
- default:
- /* Keep compilers happy. */
- break;
- }
-
- if (!tcp_in_window(&conntrack->proto.tcp, dir, index,
- skb, iph, th)) {
- write_unlock_bh(&tcp_lock);
- return -NF_ACCEPT;
- }
- in_window:
- /* From now on we have got in-window packets */
- conntrack->proto.tcp.last_index = index;
-
- DEBUGP("tcp_conntracks: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu "
- "syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n",
- NIPQUAD(iph->saddr), ntohs(th->source),
- NIPQUAD(iph->daddr), ntohs(th->dest),
- (th->syn ? 1 : 0), (th->ack ? 1 : 0),
- (th->fin ? 1 : 0), (th->rst ? 1 : 0),
- old_state, new_state);
-
- conntrack->proto.tcp.state = new_state;
- if (old_state != new_state
- && (new_state == TCP_CONNTRACK_FIN_WAIT
- || new_state == TCP_CONNTRACK_CLOSE))
- conntrack->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT;
- timeout = conntrack->proto.tcp.retrans >= ip_ct_tcp_max_retrans
- && *tcp_timeouts[new_state] > ip_ct_tcp_timeout_max_retrans
- ? ip_ct_tcp_timeout_max_retrans : *tcp_timeouts[new_state];
- write_unlock_bh(&tcp_lock);
-
- ip_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
- if (new_state != old_state)
- ip_conntrack_event_cache(IPCT_PROTOINFO, skb);
-
- if (!test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) {
- /* If only reply is a RST, we can consider ourselves not to
- have an established connection: this is a fairly common
- problem case, so we can delete the conntrack
- immediately. --RR */
- if (th->rst) {
- if (del_timer(&conntrack->timeout))
- conntrack->timeout.function((unsigned long)
- conntrack);
- return NF_ACCEPT;
- }
- } else if (!test_bit(IPS_ASSURED_BIT, &conntrack->status)
- && (old_state == TCP_CONNTRACK_SYN_RECV
- || old_state == TCP_CONNTRACK_ESTABLISHED)
- && new_state == TCP_CONNTRACK_ESTABLISHED) {
- /* Set ASSURED if we see see valid ack in ESTABLISHED
- after SYN_RECV or a valid answer for a picked up
- connection. */
- set_bit(IPS_ASSURED_BIT, &conntrack->status);
- ip_conntrack_event_cache(IPCT_STATUS, skb);
- }
- ip_ct_refresh_acct(conntrack, ctinfo, skb, timeout);
-
- return NF_ACCEPT;
-}
-
-/* Called when a new connection for this protocol found. */
-static int tcp_new(struct ip_conntrack *conntrack,
- const struct sk_buff *skb)
-{
- enum tcp_conntrack new_state;
- struct iphdr *iph = skb->nh.iph;
- struct tcphdr *th, _tcph;
-#ifdef DEBUGP_VARS
- struct ip_ct_tcp_state *sender = &conntrack->proto.tcp.seen[0];
- struct ip_ct_tcp_state *receiver = &conntrack->proto.tcp.seen[1];
-#endif
-
- th = skb_header_pointer(skb, iph->ihl * 4,
- sizeof(_tcph), &_tcph);
- BUG_ON(th == NULL);
-
- /* Don't need lock here: this conntrack not in circulation yet */
- new_state
- = tcp_conntracks[0][get_conntrack_index(th)]
- [TCP_CONNTRACK_NONE];
-
- /* Invalid: delete conntrack */
- if (new_state >= TCP_CONNTRACK_MAX) {
- DEBUGP("ip_ct_tcp: invalid new deleting.\n");
- return 0;
- }
-
- if (new_state == TCP_CONNTRACK_SYN_SENT) {
- /* SYN packet */
- conntrack->proto.tcp.seen[0].td_end =
- segment_seq_plus_len(ntohl(th->seq), skb->len,
- iph, th);
- conntrack->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
- if (conntrack->proto.tcp.seen[0].td_maxwin == 0)
- conntrack->proto.tcp.seen[0].td_maxwin = 1;
- conntrack->proto.tcp.seen[0].td_maxend =
- conntrack->proto.tcp.seen[0].td_end;
-
- tcp_options(skb, iph, th, &conntrack->proto.tcp.seen[0]);
- conntrack->proto.tcp.seen[1].flags = 0;
- conntrack->proto.tcp.seen[0].loose =
- conntrack->proto.tcp.seen[1].loose = 0;
- } else if (ip_ct_tcp_loose == 0) {
- /* Don't try to pick up connections. */
- return 0;
- } else {
- /*
- * We are in the middle of a connection,
- * its history is lost for us.
- * Let's try to use the data from the packet.
- */
- conntrack->proto.tcp.seen[0].td_end =
- segment_seq_plus_len(ntohl(th->seq), skb->len,
- iph, th);
- conntrack->proto.tcp.seen[0].td_maxwin = ntohs(th->window);
- if (conntrack->proto.tcp.seen[0].td_maxwin == 0)
- conntrack->proto.tcp.seen[0].td_maxwin = 1;
- conntrack->proto.tcp.seen[0].td_maxend =
- conntrack->proto.tcp.seen[0].td_end +
- conntrack->proto.tcp.seen[0].td_maxwin;
- conntrack->proto.tcp.seen[0].td_scale = 0;
-
- /* We assume SACK. Should we assume window scaling too? */
- conntrack->proto.tcp.seen[0].flags =
- conntrack->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM;
- conntrack->proto.tcp.seen[0].loose =
- conntrack->proto.tcp.seen[1].loose = ip_ct_tcp_loose;
- }
-
- conntrack->proto.tcp.seen[1].td_end = 0;
- conntrack->proto.tcp.seen[1].td_maxend = 0;
- conntrack->proto.tcp.seen[1].td_maxwin = 1;
- conntrack->proto.tcp.seen[1].td_scale = 0;
-
- /* tcp_packet will set them */
- conntrack->proto.tcp.state = TCP_CONNTRACK_NONE;
- conntrack->proto.tcp.last_index = TCP_NONE_SET;
-
- DEBUGP("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i "
- "receiver end=%u maxend=%u maxwin=%u scale=%i\n",
- sender->td_end, sender->td_maxend, sender->td_maxwin,
- sender->td_scale,
- receiver->td_end, receiver->td_maxend, receiver->td_maxwin,
- receiver->td_scale);
- return 1;
-}
-
-struct ip_conntrack_protocol ip_conntrack_protocol_tcp =
-{
- .proto = IPPROTO_TCP,
- .name = "tcp",
- .pkt_to_tuple = tcp_pkt_to_tuple,
- .invert_tuple = tcp_invert_tuple,
- .print_tuple = tcp_print_tuple,
- .print_conntrack = tcp_print_conntrack,
- .packet = tcp_packet,
- .new = tcp_new,
- .error = tcp_error,
-#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
- defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
- .to_nfattr = tcp_to_nfattr,
- .from_nfattr = nfattr_to_tcp,
- .tuple_to_nfattr = ip_ct_port_tuple_to_nfattr,
- .nfattr_to_tuple = ip_ct_port_nfattr_to_tuple,
-#endif
-};
diff --git a/net/ipv4/netfilter/ip_conntrack_proto_udp.c b/net/ipv4/netfilter/ip_conntrack_proto_udp.c
deleted file mode 100644
index 55b7d3210ad..00000000000
--- a/net/ipv4/netfilter/ip_conntrack_proto_udp.c
+++ /dev/null
@@ -1,153 +0,0 @@
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/sched.h>
-#include <linux/timer.h>
-#include <linux/netfilter.h>
-#include <linux/in.h>
-#include <linux/ip.h>
-#include <linux/udp.h>
-#include <linux/seq_file.h>
-#include <net/checksum.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
-
-unsigned int ip_ct_udp_timeout = 30*HZ;
-unsigned int ip_ct_udp_timeout_stream = 180*HZ;
-
-static int udp_pkt_to_tuple(const struct sk_buff *skb,
- unsigned int dataoff,
- struct ip_conntrack_tuple *tuple)
-{
- struct udphdr _hdr, *hp;
-
- /* Actually only need first 8 bytes. */
- hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
- if (hp == NULL)
- return 0;
-
- tuple->src.u.udp.port = hp->source;
- tuple->dst.u.udp.port = hp->dest;
-
- return 1;
-}
-
-static int udp_invert_tuple(struct ip_conntrack_tuple *tuple,
- const struct ip_conntrack_tuple *orig)
-{
- tuple->src.u.udp.port = orig->dst.u.udp.port;
- tuple->dst.u.udp.port = orig->src.u.udp.port;
- return 1;
-}
-
-/* Print out the per-protocol part of the tuple. */
-static int udp_print_tuple(struct seq_file *s,
- const struct ip_conntrack_tuple *tuple)
-{
- return seq_printf(s, "sport=%hu dport=%hu ",
- ntohs(tuple->src.u.udp.port),
- ntohs(tuple->dst.u.udp.port));
-}
-
-/* Print out the private part of the conntrack. */
-static int udp_print_conntrack(struct seq_file *s,
- const struct ip_conntrack *conntrack)
-{
- return 0;
-}
-
-/* Returns verdict for packet, and may modify conntracktype */
-static int udp_packet(struct ip_conntrack *conntrack,
- const struct sk_buff *skb,
- enum ip_conntrack_info ctinfo)
-{
- /* If we've seen traffic both ways, this is some kind of UDP
- stream. Extend timeout. */
- if (test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) {
- ip_ct_refresh_acct(conntrack, ctinfo, skb,
- ip_ct_udp_timeout_stream);
- /* Also, more likely to be important, and not a probe */
- if (!test_and_set_bit(IPS_ASSURED_BIT, &conntrack->status))
- ip_conntrack_event_cache(IPCT_STATUS, skb);
- } else
- ip_ct_refresh_acct(conntrack, ctinfo, skb, ip_ct_udp_timeout);
-
- return NF_ACCEPT;
-}
-
-/* Called when a new connection for this protocol found. */
-static int udp_new(struct ip_conntrack *conntrack, const struct sk_buff *skb)
-{
- return 1;
-}
-
-static int udp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo,
- unsigned int hooknum)
-{
- struct iphdr *iph = skb->nh.iph;
- unsigned int udplen = skb->len - iph->ihl * 4;
- struct udphdr _hdr, *hdr;
-
- /* Header is too small? */
- hdr = skb_header_pointer(skb, iph->ihl*4, sizeof(_hdr), &_hdr);
- if (hdr == NULL) {
- if (LOG_INVALID(IPPROTO_UDP))
- nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
- "ip_ct_udp: short packet ");
- return -NF_ACCEPT;
- }
-
- /* Truncated/malformed packets */
- if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) {
- if (LOG_INVALID(IPPROTO_UDP))
- nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
- "ip_ct_udp: truncated/malformed packet ");
- return -NF_ACCEPT;
- }
-
- /* Packet with no checksum */
- if (!hdr->check)
- return NF_ACCEPT;
-
- /* Checksum invalid? Ignore.
- * We skip checking packets on the outgoing path
- * because the semantic of CHECKSUM_HW is different there
- * and moreover root might send raw packets.
- * FIXME: Source route IP option packets --RR */
- if (hooknum == NF_IP_PRE_ROUTING
- && skb->ip_summed != CHECKSUM_UNNECESSARY
- && csum_tcpudp_magic(iph->saddr, iph->daddr, udplen, IPPROTO_UDP,
- skb->ip_summed == CHECKSUM_HW ? skb->csum
- : skb_checksum(skb, iph->ihl*4, udplen, 0))) {
- if (LOG_INVALID(IPPROTO_UDP))
- nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
- "ip_ct_udp: bad UDP checksum ");
- return -NF_ACCEPT;
- }
-
- return NF_ACCEPT;
-}
-
-struct ip_conntrack_protocol ip_conntrack_protocol_udp =
-{
- .proto = IPPROTO_UDP,
- .name = "udp",
- .pkt_to_tuple = udp_pkt_to_tuple,
- .invert_tuple = udp_invert_tuple,
- .print_tuple = udp_print_tuple,
- .print_conntrack = udp_print_conntrack,
- .packet = udp_packet,
- .new = udp_new,
- .error = udp_error,
-#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
- defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
- .tuple_to_nfattr = ip_ct_port_tuple_to_nfattr,
- .nfattr_to_tuple = ip_ct_port_nfattr_to_tuple,
-#endif
-};
diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c
deleted file mode 100644
index 833fcb4be5e..00000000000
--- a/net/ipv4/netfilter/ip_conntrack_standalone.c
+++ /dev/null
@@ -1,1011 +0,0 @@
-/* This file contains all the functions required for the standalone
- ip_conntrack module.
-
- These are not required by the compatibility layer.
-*/
-
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2005 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/config.h>
-#include <linux/types.h>
-#include <linux/ip.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/percpu.h>
-#ifdef CONFIG_SYSCTL
-#include <linux/sysctl.h>
-#endif
-#include <net/checksum.h>
-#include <net/ip.h>
-#include <net/route.h>
-
-#define ASSERT_READ_LOCK(x)
-#define ASSERT_WRITE_LOCK(x)
-
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
-#include <linux/netfilter_ipv4/ip_conntrack_core.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/listhelp.h>
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-MODULE_LICENSE("GPL");
-
-extern atomic_t ip_conntrack_count;
-DECLARE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat);
-
-static int kill_proto(struct ip_conntrack *i, void *data)
-{
- return (i->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum ==
- *((u_int8_t *) data));
-}
-
-#ifdef CONFIG_PROC_FS
-static int
-print_tuple(struct seq_file *s, const struct ip_conntrack_tuple *tuple,
- struct ip_conntrack_protocol *proto)
-{
- seq_printf(s, "src=%u.%u.%u.%u dst=%u.%u.%u.%u ",
- NIPQUAD(tuple->src.ip), NIPQUAD(tuple->dst.ip));
- return proto->print_tuple(s, tuple);
-}
-
-#ifdef CONFIG_IP_NF_CT_ACCT
-static unsigned int
-seq_print_counters(struct seq_file *s,
- const struct ip_conntrack_counter *counter)
-{
- return seq_printf(s, "packets=%llu bytes=%llu ",
- (unsigned long long)counter->packets,
- (unsigned long long)counter->bytes);
-}
-#else
-#define seq_print_counters(x, y) 0
-#endif
-
-struct ct_iter_state {
- unsigned int bucket;
-};
-
-static struct list_head *ct_get_first(struct seq_file *seq)
-{
- struct ct_iter_state *st = seq->private;
-
- for (st->bucket = 0;
- st->bucket < ip_conntrack_htable_size;
- st->bucket++) {
- if (!list_empty(&ip_conntrack_hash[st->bucket]))
- return ip_conntrack_hash[st->bucket].next;
- }
- return NULL;
-}
-
-static struct list_head *ct_get_next(struct seq_file *seq, struct list_head *head)
-{
- struct ct_iter_state *st = seq->private;
-
- head = head->next;
- while (head == &ip_conntrack_hash[st->bucket]) {
- if (++st->bucket >= ip_conntrack_htable_size)
- return NULL;
- head = ip_conntrack_hash[st->bucket].next;
- }
- return head;
-}
-
-static struct list_head *ct_get_idx(struct seq_file *seq, loff_t pos)
-{
- struct list_head *head = ct_get_first(seq);
-
- if (head)
- while (pos && (head = ct_get_next(seq, head)))
- pos--;
- return pos ? NULL : head;
-}
-
-static void *ct_seq_start(struct seq_file *seq, loff_t *pos)
-{
- read_lock_bh(&ip_conntrack_lock);
- return ct_get_idx(seq, *pos);
-}
-
-static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos)
-{
- (*pos)++;
- return ct_get_next(s, v);
-}
-
-static void ct_seq_stop(struct seq_file *s, void *v)
-{
- read_unlock_bh(&ip_conntrack_lock);
-}
-
-static int ct_seq_show(struct seq_file *s, void *v)
-{
- const struct ip_conntrack_tuple_hash *hash = v;
- const struct ip_conntrack *conntrack = tuplehash_to_ctrack(hash);
- struct ip_conntrack_protocol *proto;
-
- ASSERT_READ_LOCK(&ip_conntrack_lock);
- IP_NF_ASSERT(conntrack);
-
- /* we only want to print DIR_ORIGINAL */
- if (DIRECTION(hash))
- return 0;
-
- proto = __ip_conntrack_proto_find(conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum);
- IP_NF_ASSERT(proto);
-
- if (seq_printf(s, "%-8s %u %ld ",
- proto->name,
- conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum,
- timer_pending(&conntrack->timeout)
- ? (long)(conntrack->timeout.expires - jiffies)/HZ
- : 0) != 0)
- return -ENOSPC;
-
- if (proto->print_conntrack(s, conntrack))
- return -ENOSPC;
-
- if (print_tuple(s, &conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
- proto))
- return -ENOSPC;
-
- if (seq_print_counters(s, &conntrack->counters[IP_CT_DIR_ORIGINAL]))
- return -ENOSPC;
-
- if (!(test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)))
- if (seq_printf(s, "[UNREPLIED] "))
- return -ENOSPC;
-
- if (print_tuple(s, &conntrack->tuplehash[IP_CT_DIR_REPLY].tuple,
- proto))
- return -ENOSPC;
-
- if (seq_print_counters(s, &conntrack->counters[IP_CT_DIR_REPLY]))
- return -ENOSPC;
-
- if (test_bit(IPS_ASSURED_BIT, &conntrack->status))
- if (seq_printf(s, "[ASSURED] "))
- return -ENOSPC;
-
-#if defined(CONFIG_IP_NF_CONNTRACK_MARK)
- if (seq_printf(s, "mark=%u ", conntrack->mark))
- return -ENOSPC;
-#endif
-
- if (seq_printf(s, "use=%u\n", atomic_read(&conntrack->ct_general.use)))
- return -ENOSPC;
-
- return 0;
-}
-
-static struct seq_operations ct_seq_ops = {
- .start = ct_seq_start,
- .next = ct_seq_next,
- .stop = ct_seq_stop,
- .show = ct_seq_show
-};
-
-static int ct_open(struct inode *inode, struct file *file)
-{
- struct seq_file *seq;
- struct ct_iter_state *st;
- int ret;
-
- st = kmalloc(sizeof(struct ct_iter_state), GFP_KERNEL);
- if (st == NULL)
- return -ENOMEM;
- ret = seq_open(file, &ct_seq_ops);
- if (ret)
- goto out_free;
- seq = file->private_data;
- seq->private = st;
- memset(st, 0, sizeof(struct ct_iter_state));
- return ret;
-out_free:
- kfree(st);
- return ret;
-}
-
-static struct file_operations ct_file_ops = {
- .owner = THIS_MODULE,
- .open = ct_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_private,
-};
-
-/* expects */
-static void *exp_seq_start(struct seq_file *s, loff_t *pos)
-{
- struct list_head *e = &ip_conntrack_expect_list;
- loff_t i;
-
- /* strange seq_file api calls stop even if we fail,
- * thus we need to grab lock since stop unlocks */
- read_lock_bh(&ip_conntrack_lock);
-
- if (list_empty(e))
- return NULL;
-
- for (i = 0; i <= *pos; i++) {
- e = e->next;
- if (e == &ip_conntrack_expect_list)
- return NULL;
- }
- return e;
-}
-
-static void *exp_seq_next(struct seq_file *s, void *v, loff_t *pos)
-{
- struct list_head *e = v;
-
- ++*pos;
- e = e->next;
-
- if (e == &ip_conntrack_expect_list)
- return NULL;
-
- return e;
-}
-
-static void exp_seq_stop(struct seq_file *s, void *v)
-{
- read_unlock_bh(&ip_conntrack_lock);
-}
-
-static int exp_seq_show(struct seq_file *s, void *v)
-{
- struct ip_conntrack_expect *expect = v;
-
- if (expect->timeout.function)
- seq_printf(s, "%ld ", timer_pending(&expect->timeout)
- ? (long)(expect->timeout.expires - jiffies)/HZ : 0);
- else
- seq_printf(s, "- ");
-
- seq_printf(s, "proto=%u ", expect->tuple.dst.protonum);
-
- print_tuple(s, &expect->tuple,
- __ip_conntrack_proto_find(expect->tuple.dst.protonum));
- return seq_putc(s, '\n');
-}
-
-static struct seq_operations exp_seq_ops = {
- .start = exp_seq_start,
- .next = exp_seq_next,
- .stop = exp_seq_stop,
- .show = exp_seq_show
-};
-
-static int exp_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &exp_seq_ops);
-}
-
-static struct file_operations exp_file_ops = {
- .owner = THIS_MODULE,
- .open = exp_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release
-};
-
-static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
-{
- int cpu;
-
- if (*pos == 0)
- return SEQ_START_TOKEN;
-
- for (cpu = *pos-1; cpu < NR_CPUS; ++cpu) {
- if (!cpu_possible(cpu))
- continue;
- *pos = cpu+1;
- return &per_cpu(ip_conntrack_stat, cpu);
- }
-
- return NULL;
-}
-
-static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
-{
- int cpu;
-
- for (cpu = *pos; cpu < NR_CPUS; ++cpu) {
- if (!cpu_possible(cpu))
- continue;
- *pos = cpu+1;
- return &per_cpu(ip_conntrack_stat, cpu);
- }
-
- return NULL;
-}
-
-static void ct_cpu_seq_stop(struct seq_file *seq, void *v)
-{
-}
-
-static int ct_cpu_seq_show(struct seq_file *seq, void *v)
-{
- unsigned int nr_conntracks = atomic_read(&ip_conntrack_count);
- struct ip_conntrack_stat *st = v;
-
- if (v == SEQ_START_TOKEN) {
- seq_printf(seq, "entries searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete\n");
- return 0;
- }
-
- seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x "
- "%08x %08x %08x %08x %08x %08x %08x %08x \n",
- nr_conntracks,
- st->searched,
- st->found,
- st->new,
- st->invalid,
- st->ignore,
- st->delete,
- st->delete_list,
- st->insert,
- st->insert_failed,
- st->drop,
- st->early_drop,
- st->error,
-
- st->expect_new,
- st->expect_create,
- st->expect_delete
- );
- return 0;
-}
-
-static struct seq_operations ct_cpu_seq_ops = {
- .start = ct_cpu_seq_start,
- .next = ct_cpu_seq_next,
- .stop = ct_cpu_seq_stop,
- .show = ct_cpu_seq_show,
-};
-
-static int ct_cpu_seq_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &ct_cpu_seq_ops);
-}
-
-static struct file_operations ct_cpu_seq_fops = {
- .owner = THIS_MODULE,
- .open = ct_cpu_seq_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release_private,
-};
-#endif
-
-static unsigned int ip_confirm(unsigned int hooknum,
- struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
-{
- /* We've seen it coming out the other side: confirm it */
- return ip_conntrack_confirm(pskb);
-}
-
-static unsigned int ip_conntrack_help(unsigned int hooknum,
- struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
-{
- struct ip_conntrack *ct;
- enum ip_conntrack_info ctinfo;
-
- /* This is where we call the helper: as the packet goes out. */
- ct = ip_conntrack_get(*pskb, &ctinfo);
- if (ct && ct->helper) {
- unsigned int ret;
- ret = ct->helper->help(pskb, ct, ctinfo);
- if (ret != NF_ACCEPT)
- return ret;
- }
- return NF_ACCEPT;
-}
-
-static unsigned int ip_conntrack_defrag(unsigned int hooknum,
- struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
-{
-#if !defined(CONFIG_IP_NF_NAT) && !defined(CONFIG_IP_NF_NAT_MODULE)
- /* Previously seen (loopback)? Ignore. Do this before
- fragment check. */
- if ((*pskb)->nfct)
- return NF_ACCEPT;
-#endif
-
- /* Gather fragments. */
- if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
- *pskb = ip_ct_gather_frags(*pskb,
- hooknum == NF_IP_PRE_ROUTING ?
- IP_DEFRAG_CONNTRACK_IN :
- IP_DEFRAG_CONNTRACK_OUT);
- if (!*pskb)
- return NF_STOLEN;
- }
- return NF_ACCEPT;
-}
-
-static unsigned int ip_conntrack_local(unsigned int hooknum,
- struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
-{
- /* root is playing with raw sockets. */
- if ((*pskb)->len < sizeof(struct iphdr)
- || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) {
- if (net_ratelimit())
- printk("ipt_hook: happy cracking.\n");
- return NF_ACCEPT;
- }
- return ip_conntrack_in(hooknum, pskb, in, out, okfn);
-}
-
-/* Connection tracking may drop packets, but never alters them, so
- make it the first hook. */
-static struct nf_hook_ops ip_conntrack_defrag_ops = {
- .hook = ip_conntrack_defrag,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_IP_PRE_ROUTING,
- .priority = NF_IP_PRI_CONNTRACK_DEFRAG,
-};
-
-static struct nf_hook_ops ip_conntrack_in_ops = {
- .hook = ip_conntrack_in,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_IP_PRE_ROUTING,
- .priority = NF_IP_PRI_CONNTRACK,
-};
-
-static struct nf_hook_ops ip_conntrack_defrag_local_out_ops = {
- .hook = ip_conntrack_defrag,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_IP_LOCAL_OUT,
- .priority = NF_IP_PRI_CONNTRACK_DEFRAG,
-};
-
-static struct nf_hook_ops ip_conntrack_local_out_ops = {
- .hook = ip_conntrack_local,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_IP_LOCAL_OUT,
- .priority = NF_IP_PRI_CONNTRACK,
-};
-
-/* helpers */
-static struct nf_hook_ops ip_conntrack_helper_out_ops = {
- .hook = ip_conntrack_help,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_IP_POST_ROUTING,
- .priority = NF_IP_PRI_CONNTRACK_HELPER,
-};
-
-static struct nf_hook_ops ip_conntrack_helper_in_ops = {
- .hook = ip_conntrack_help,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_IP_LOCAL_IN,
- .priority = NF_IP_PRI_CONNTRACK_HELPER,
-};
-
-/* Refragmenter; last chance. */
-static struct nf_hook_ops ip_conntrack_out_ops = {
- .hook = ip_confirm,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_IP_POST_ROUTING,
- .priority = NF_IP_PRI_CONNTRACK_CONFIRM,
-};
-
-static struct nf_hook_ops ip_conntrack_local_in_ops = {
- .hook = ip_confirm,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_IP_LOCAL_IN,
- .priority = NF_IP_PRI_CONNTRACK_CONFIRM,
-};
-
-/* Sysctl support */
-
-#ifdef CONFIG_SYSCTL
-
-/* From ip_conntrack_core.c */
-extern int ip_conntrack_max;
-extern unsigned int ip_conntrack_htable_size;
-
-/* From ip_conntrack_proto_tcp.c */
-extern unsigned int ip_ct_tcp_timeout_syn_sent;
-extern unsigned int ip_ct_tcp_timeout_syn_recv;
-extern unsigned int ip_ct_tcp_timeout_established;
-extern unsigned int ip_ct_tcp_timeout_fin_wait;
-extern unsigned int ip_ct_tcp_timeout_close_wait;
-extern unsigned int ip_ct_tcp_timeout_last_ack;
-extern unsigned int ip_ct_tcp_timeout_time_wait;
-extern unsigned int ip_ct_tcp_timeout_close;
-extern unsigned int ip_ct_tcp_timeout_max_retrans;
-extern int ip_ct_tcp_loose;
-extern int ip_ct_tcp_be_liberal;
-extern int ip_ct_tcp_max_retrans;
-
-/* From ip_conntrack_proto_udp.c */
-extern unsigned int ip_ct_udp_timeout;
-extern unsigned int ip_ct_udp_timeout_stream;
-
-/* From ip_conntrack_proto_icmp.c */
-extern unsigned int ip_ct_icmp_timeout;
-
-/* From ip_conntrack_proto_icmp.c */
-extern unsigned int ip_ct_generic_timeout;
-
-/* Log invalid packets of a given protocol */
-static int log_invalid_proto_min = 0;
-static int log_invalid_proto_max = 255;
-
-static struct ctl_table_header *ip_ct_sysctl_header;
-
-static ctl_table ip_ct_sysctl_table[] = {
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_MAX,
- .procname = "ip_conntrack_max",
- .data = &ip_conntrack_max,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_COUNT,
- .procname = "ip_conntrack_count",
- .data = &ip_conntrack_count,
- .maxlen = sizeof(int),
- .mode = 0444,
- .proc_handler = &proc_dointvec,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_BUCKETS,
- .procname = "ip_conntrack_buckets",
- .data = &ip_conntrack_htable_size,
- .maxlen = sizeof(unsigned int),
- .mode = 0444,
- .proc_handler = &proc_dointvec,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT,
- .procname = "ip_conntrack_tcp_timeout_syn_sent",
- .data = &ip_ct_tcp_timeout_syn_sent,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_RECV,
- .procname = "ip_conntrack_tcp_timeout_syn_recv",
- .data = &ip_ct_tcp_timeout_syn_recv,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_ESTABLISHED,
- .procname = "ip_conntrack_tcp_timeout_established",
- .data = &ip_ct_tcp_timeout_established,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_FIN_WAIT,
- .procname = "ip_conntrack_tcp_timeout_fin_wait",
- .data = &ip_ct_tcp_timeout_fin_wait,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_CLOSE_WAIT,
- .procname = "ip_conntrack_tcp_timeout_close_wait",
- .data = &ip_ct_tcp_timeout_close_wait,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_LAST_ACK,
- .procname = "ip_conntrack_tcp_timeout_last_ack",
- .data = &ip_ct_tcp_timeout_last_ack,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_TIME_WAIT,
- .procname = "ip_conntrack_tcp_timeout_time_wait",
- .data = &ip_ct_tcp_timeout_time_wait,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_CLOSE,
- .procname = "ip_conntrack_tcp_timeout_close",
- .data = &ip_ct_tcp_timeout_close,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_UDP_TIMEOUT,
- .procname = "ip_conntrack_udp_timeout",
- .data = &ip_ct_udp_timeout,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_UDP_TIMEOUT_STREAM,
- .procname = "ip_conntrack_udp_timeout_stream",
- .data = &ip_ct_udp_timeout_stream,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_ICMP_TIMEOUT,
- .procname = "ip_conntrack_icmp_timeout",
- .data = &ip_ct_icmp_timeout,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_GENERIC_TIMEOUT,
- .procname = "ip_conntrack_generic_timeout",
- .data = &ip_ct_generic_timeout,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_LOG_INVALID,
- .procname = "ip_conntrack_log_invalid",
- .data = &ip_ct_log_invalid,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_minmax,
- .strategy = &sysctl_intvec,
- .extra1 = &log_invalid_proto_min,
- .extra2 = &log_invalid_proto_max,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_MAX_RETRANS,
- .procname = "ip_conntrack_tcp_timeout_max_retrans",
- .data = &ip_ct_tcp_timeout_max_retrans,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_LOOSE,
- .procname = "ip_conntrack_tcp_loose",
- .data = &ip_ct_tcp_loose,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_BE_LIBERAL,
- .procname = "ip_conntrack_tcp_be_liberal",
- .data = &ip_ct_tcp_be_liberal,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec,
- },
- {
- .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_MAX_RETRANS,
- .procname = "ip_conntrack_tcp_max_retrans",
- .data = &ip_ct_tcp_max_retrans,
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = &proc_dointvec,
- },
- { .ctl_name = 0 }
-};
-
-#define NET_IP_CONNTRACK_MAX 2089
-
-static ctl_table ip_ct_netfilter_table[] = {
- {
- .ctl_name = NET_IPV4_NETFILTER,
- .procname = "netfilter",
- .mode = 0555,
- .child = ip_ct_sysctl_table,
- },
- {
- .ctl_name = NET_IP_CONNTRACK_MAX,
- .procname = "ip_conntrack_max",
- .data = &ip_conntrack_max,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = &proc_dointvec
- },
- { .ctl_name = 0 }
-};
-
-static ctl_table ip_ct_ipv4_table[] = {
- {
- .ctl_name = NET_IPV4,
- .procname = "ipv4",
- .mode = 0555,
- .child = ip_ct_netfilter_table,
- },
- { .ctl_name = 0 }
-};
-
-static ctl_table ip_ct_net_table[] = {
- {
- .ctl_name = CTL_NET,
- .procname = "net",
- .mode = 0555,
- .child = ip_ct_ipv4_table,
- },
- { .ctl_name = 0 }
-};
-
-EXPORT_SYMBOL(ip_ct_log_invalid);
-#endif /* CONFIG_SYSCTL */
-
-static int init_or_cleanup(int init)
-{
-#ifdef CONFIG_PROC_FS
- struct proc_dir_entry *proc, *proc_exp, *proc_stat;
-#endif
- int ret = 0;
-
- if (!init) goto cleanup;
-
- ret = ip_conntrack_init();
- if (ret < 0)
- goto cleanup_nothing;
-
-#ifdef CONFIG_PROC_FS
- ret = -ENOMEM;
- proc = proc_net_fops_create("ip_conntrack", 0440, &ct_file_ops);
- if (!proc) goto cleanup_init;
-
- proc_exp = proc_net_fops_create("ip_conntrack_expect", 0440,
- &exp_file_ops);
- if (!proc_exp) goto cleanup_proc;
-
- proc_stat = create_proc_entry("ip_conntrack", S_IRUGO, proc_net_stat);
- if (!proc_stat)
- goto cleanup_proc_exp;
-
- proc_stat->proc_fops = &ct_cpu_seq_fops;
- proc_stat->owner = THIS_MODULE;
-#endif
-
- ret = nf_register_hook(&ip_conntrack_defrag_ops);
- if (ret < 0) {
- printk("ip_conntrack: can't register pre-routing defrag hook.\n");
- goto cleanup_proc_stat;
- }
- ret = nf_register_hook(&ip_conntrack_defrag_local_out_ops);
- if (ret < 0) {
- printk("ip_conntrack: can't register local_out defrag hook.\n");
- goto cleanup_defragops;
- }
- ret = nf_register_hook(&ip_conntrack_in_ops);
- if (ret < 0) {
- printk("ip_conntrack: can't register pre-routing hook.\n");
- goto cleanup_defraglocalops;
- }
- ret = nf_register_hook(&ip_conntrack_local_out_ops);
- if (ret < 0) {
- printk("ip_conntrack: can't register local out hook.\n");
- goto cleanup_inops;
- }
- ret = nf_register_hook(&ip_conntrack_helper_in_ops);
- if (ret < 0) {
- printk("ip_conntrack: can't register local in helper hook.\n");
- goto cleanup_inandlocalops;
- }
- ret = nf_register_hook(&ip_conntrack_helper_out_ops);
- if (ret < 0) {
- printk("ip_conntrack: can't register postrouting helper hook.\n");
- goto cleanup_helperinops;
- }
- ret = nf_register_hook(&ip_conntrack_out_ops);
- if (ret < 0) {
- printk("ip_conntrack: can't register post-routing hook.\n");
- goto cleanup_helperoutops;
- }
- ret = nf_register_hook(&ip_conntrack_local_in_ops);
- if (ret < 0) {
- printk("ip_conntrack: can't register local in hook.\n");
- goto cleanup_inoutandlocalops;
- }
-#ifdef CONFIG_SYSCTL
- ip_ct_sysctl_header = register_sysctl_table(ip_ct_net_table, 0);
- if (ip_ct_sysctl_header == NULL) {
- printk("ip_conntrack: can't register to sysctl.\n");
- ret = -ENOMEM;
- goto cleanup_localinops;
- }
-#endif
-
- return ret;
-
- cleanup:
- synchronize_net();
-#ifdef CONFIG_SYSCTL
- unregister_sysctl_table(ip_ct_sysctl_header);
- cleanup_localinops:
-#endif
- nf_unregister_hook(&ip_conntrack_local_in_ops);
- cleanup_inoutandlocalops:
- nf_unregister_hook(&ip_conntrack_out_ops);
- cleanup_helperoutops:
- nf_unregister_hook(&ip_conntrack_helper_out_ops);
- cleanup_helperinops:
- nf_unregister_hook(&ip_conntrack_helper_in_ops);
- cleanup_inandlocalops:
- nf_unregister_hook(&ip_conntrack_local_out_ops);
- cleanup_inops:
- nf_unregister_hook(&ip_conntrack_in_ops);
- cleanup_defraglocalops:
- nf_unregister_hook(&ip_conntrack_defrag_local_out_ops);
- cleanup_defragops:
- nf_unregister_hook(&ip_conntrack_defrag_ops);
- cleanup_proc_stat:
-#ifdef CONFIG_PROC_FS
- remove_proc_entry("ip_conntrack", proc_net_stat);
- cleanup_proc_exp:
- proc_net_remove("ip_conntrack_expect");
- cleanup_proc:
- proc_net_remove("ip_conntrack");
- cleanup_init:
-#endif /* CONFIG_PROC_FS */
- ip_conntrack_cleanup();
- cleanup_nothing:
- return ret;
-}
-
-/* FIXME: Allow NULL functions and sub in pointers to generic for
- them. --RR */
-int ip_conntrack_protocol_register(struct ip_conntrack_protocol *proto)
-{
- int ret = 0;
-
- write_lock_bh(&ip_conntrack_lock);
- if (ip_ct_protos[proto->proto] != &ip_conntrack_generic_protocol) {
- ret = -EBUSY;
- goto out;
- }
- ip_ct_protos[proto->proto] = proto;
- out:
- write_unlock_bh(&ip_conntrack_lock);
- return ret;
-}
-
-void ip_conntrack_protocol_unregister(struct ip_conntrack_protocol *proto)
-{
- write_lock_bh(&ip_conntrack_lock);
- ip_ct_protos[proto->proto] = &ip_conntrack_generic_protocol;
- write_unlock_bh(&ip_conntrack_lock);
-
- /* Somebody could be still looking at the proto in bh. */
- synchronize_net();
-
- /* Remove all contrack entries for this protocol */
- ip_ct_iterate_cleanup(kill_proto, &proto->proto);
-}
-
-static int __init init(void)
-{
- return init_or_cleanup(1);
-}
-
-static void __exit fini(void)
-{
- init_or_cleanup(0);
-}
-
-module_init(init);
-module_exit(fini);
-
-/* Some modules need us, but don't depend directly on any symbol.
- They should call this. */
-void need_conntrack(void)
-{
-}
-
-#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
-EXPORT_SYMBOL_GPL(ip_conntrack_chain);
-EXPORT_SYMBOL_GPL(ip_conntrack_expect_chain);
-EXPORT_SYMBOL_GPL(ip_conntrack_register_notifier);
-EXPORT_SYMBOL_GPL(ip_conntrack_unregister_notifier);
-EXPORT_SYMBOL_GPL(__ip_ct_event_cache_init);
-EXPORT_PER_CPU_SYMBOL_GPL(ip_conntrack_ecache);
-#endif
-EXPORT_SYMBOL(ip_conntrack_protocol_register);
-EXPORT_SYMBOL(ip_conntrack_protocol_unregister);
-EXPORT_SYMBOL(ip_ct_get_tuple);
-EXPORT_SYMBOL(invert_tuplepr);
-EXPORT_SYMBOL(ip_conntrack_alter_reply);
-EXPORT_SYMBOL(ip_conntrack_destroyed);
-EXPORT_SYMBOL(need_conntrack);
-EXPORT_SYMBOL(ip_conntrack_helper_register);
-EXPORT_SYMBOL(ip_conntrack_helper_unregister);
-EXPORT_SYMBOL(ip_ct_iterate_cleanup);
-EXPORT_SYMBOL(__ip_ct_refresh_acct);
-
-EXPORT_SYMBOL(ip_conntrack_expect_alloc);
-EXPORT_SYMBOL(ip_conntrack_expect_put);
-EXPORT_SYMBOL_GPL(__ip_conntrack_expect_find);
-EXPORT_SYMBOL_GPL(ip_conntrack_expect_find);
-EXPORT_SYMBOL(ip_conntrack_expect_related);
-EXPORT_SYMBOL(ip_conntrack_unexpect_related);
-EXPORT_SYMBOL_GPL(ip_conntrack_expect_list);
-EXPORT_SYMBOL_GPL(ip_ct_unlink_expect);
-
-EXPORT_SYMBOL(ip_conntrack_tuple_taken);
-EXPORT_SYMBOL(ip_ct_gather_frags);
-EXPORT_SYMBOL(ip_conntrack_htable_size);
-EXPORT_SYMBOL(ip_conntrack_lock);
-EXPORT_SYMBOL(ip_conntrack_hash);
-EXPORT_SYMBOL(ip_conntrack_untracked);
-EXPORT_SYMBOL_GPL(ip_conntrack_find_get);
-#ifdef CONFIG_IP_NF_NAT_NEEDED
-EXPORT_SYMBOL(ip_conntrack_tcp_update);
-#endif
-
-EXPORT_SYMBOL_GPL(ip_conntrack_flush);
-EXPORT_SYMBOL_GPL(__ip_conntrack_find);
-
-EXPORT_SYMBOL_GPL(ip_conntrack_alloc);
-EXPORT_SYMBOL_GPL(ip_conntrack_free);
-EXPORT_SYMBOL_GPL(ip_conntrack_hash_insert);
-
-EXPORT_SYMBOL_GPL(ip_ct_remove_expectations);
-
-EXPORT_SYMBOL_GPL(ip_conntrack_helper_find_get);
-EXPORT_SYMBOL_GPL(ip_conntrack_helper_put);
-EXPORT_SYMBOL_GPL(__ip_conntrack_helper_find_byname);
-
-EXPORT_SYMBOL_GPL(ip_conntrack_proto_find_get);
-EXPORT_SYMBOL_GPL(ip_conntrack_proto_put);
-EXPORT_SYMBOL_GPL(__ip_conntrack_proto_find);
-#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
- defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
-EXPORT_SYMBOL_GPL(ip_ct_port_tuple_to_nfattr);
-EXPORT_SYMBOL_GPL(ip_ct_port_nfattr_to_tuple);
-#endif
diff --git a/net/ipv4/netfilter/ip_conntrack_tftp.c b/net/ipv4/netfilter/ip_conntrack_tftp.c
deleted file mode 100644
index d3c5a371f99..00000000000
--- a/net/ipv4/netfilter/ip_conntrack_tftp.c
+++ /dev/null
@@ -1,158 +0,0 @@
-/* (C) 2001-2002 Magnus Boden <mb@ozaba.mine.nu>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Version: 0.0.7
- *
- * Thu 21 Mar 2002 Harald Welte <laforge@gnumonks.org>
- * - port to newnat API
- *
- */
-
-#include <linux/module.h>
-#include <linux/ip.h>
-#include <linux/udp.h>
-
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_tftp.h>
-#include <linux/moduleparam.h>
-
-MODULE_AUTHOR("Magnus Boden <mb@ozaba.mine.nu>");
-MODULE_DESCRIPTION("tftp connection tracking helper");
-MODULE_LICENSE("GPL");
-
-#define MAX_PORTS 8
-static unsigned short ports[MAX_PORTS];
-static int ports_c;
-module_param_array(ports, ushort, &ports_c, 0400);
-MODULE_PARM_DESC(ports, "port numbers of tftp servers");
-
-#if 0
-#define DEBUGP(format, args...) printk("%s:%s:" format, \
- __FILE__, __FUNCTION__ , ## args)
-#else
-#define DEBUGP(format, args...)
-#endif
-
-unsigned int (*ip_nat_tftp_hook)(struct sk_buff **pskb,
- enum ip_conntrack_info ctinfo,
- struct ip_conntrack_expect *exp);
-EXPORT_SYMBOL_GPL(ip_nat_tftp_hook);
-
-static int tftp_help(struct sk_buff **pskb,
- struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo)
-{
- struct tftphdr _tftph, *tfh;
- struct ip_conntrack_expect *exp;
- unsigned int ret = NF_ACCEPT;
-
- tfh = skb_header_pointer(*pskb,
- (*pskb)->nh.iph->ihl*4+sizeof(struct udphdr),
- sizeof(_tftph), &_tftph);
- if (tfh == NULL)
- return NF_ACCEPT;
-
- switch (ntohs(tfh->opcode)) {
- /* RRQ and WRQ works the same way */
- case TFTP_OPCODE_READ:
- case TFTP_OPCODE_WRITE:
- DEBUGP("");
- DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
- DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
-
- exp = ip_conntrack_expect_alloc(ct);
- if (exp == NULL)
- return NF_DROP;
-
- exp->tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
- exp->mask.src.ip = 0xffffffff;
- exp->mask.dst.ip = 0xffffffff;
- exp->mask.dst.u.udp.port = 0xffff;
- exp->mask.dst.protonum = 0xff;
- exp->expectfn = NULL;
- exp->flags = 0;
-
- DEBUGP("expect: ");
- DUMP_TUPLE(&exp->tuple);
- DUMP_TUPLE(&exp->mask);
- if (ip_nat_tftp_hook)
- ret = ip_nat_tftp_hook(pskb, ctinfo, exp);
- else if (ip_conntrack_expect_related(exp) != 0)
- ret = NF_DROP;
- ip_conntrack_expect_put(exp);
- break;
- case TFTP_OPCODE_DATA:
- case TFTP_OPCODE_ACK:
- DEBUGP("Data/ACK opcode\n");
- break;
- case TFTP_OPCODE_ERROR:
- DEBUGP("Error opcode\n");
- break;
- default:
- DEBUGP("Unknown opcode\n");
- }
- return NF_ACCEPT;
-}
-
-static struct ip_conntrack_helper tftp[MAX_PORTS];
-static char tftp_names[MAX_PORTS][sizeof("tftp-65535")];
-
-static void fini(void)
-{
- int i;
-
- for (i = 0 ; i < ports_c; i++) {
- DEBUGP("unregistering helper for port %d\n",
- ports[i]);
- ip_conntrack_helper_unregister(&tftp[i]);
- }
-}
-
-static int __init init(void)
-{
- int i, ret;
- char *tmpname;
-
- if (ports_c == 0)
- ports[ports_c++] = TFTP_PORT;
-
- for (i = 0; i < ports_c; i++) {
- /* Create helper structure */
- memset(&tftp[i], 0, sizeof(struct ip_conntrack_helper));
-
- tftp[i].tuple.dst.protonum = IPPROTO_UDP;
- tftp[i].tuple.src.u.udp.port = htons(ports[i]);
- tftp[i].mask.dst.protonum = 0xFF;
- tftp[i].mask.src.u.udp.port = 0xFFFF;
- tftp[i].max_expected = 1;
- tftp[i].timeout = 5 * 60; /* 5 minutes */
- tftp[i].me = THIS_MODULE;
- tftp[i].help = tftp_help;
-
- tmpname = &tftp_names[i][0];
- if (ports[i] == TFTP_PORT)
- sprintf(tmpname, "tftp");
- else
- sprintf(tmpname, "tftp-%d", i);
- tftp[i].name = tmpname;
-
- DEBUGP("port #%d: %d\n", i, ports[i]);
-
- ret=ip_conntrack_helper_register(&tftp[i]);
- if (ret) {
- printk("ERROR registering helper for port %d\n",
- ports[i]);
- fini();
- return(ret);
- }
- }
- return(0);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ip_nat_amanda.c b/net/ipv4/netfilter/ip_nat_amanda.c
deleted file mode 100644
index 706c8074f42..00000000000
--- a/net/ipv4/netfilter/ip_nat_amanda.c
+++ /dev/null
@@ -1,86 +0,0 @@
-/* Amanda extension for TCP NAT alteration.
- * (C) 2002 by Brian J. Murrell <netfilter@interlinx.bc.ca>
- * based on a copy of HW's ip_nat_irc.c as well as other modules
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- *
- * Module load syntax:
- * insmod ip_nat_amanda.o
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/netfilter.h>
-#include <linux/skbuff.h>
-#include <linux/ip.h>
-#include <linux/udp.h>
-#include <net/tcp.h>
-#include <net/udp.h>
-
-#include <linux/netfilter_ipv4.h>
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_amanda.h>
-
-
-MODULE_AUTHOR("Brian J. Murrell <netfilter@interlinx.bc.ca>");
-MODULE_DESCRIPTION("Amanda NAT helper");
-MODULE_LICENSE("GPL");
-
-static unsigned int help(struct sk_buff **pskb,
- enum ip_conntrack_info ctinfo,
- unsigned int matchoff,
- unsigned int matchlen,
- struct ip_conntrack_expect *exp)
-{
- char buffer[sizeof("65535")];
- u_int16_t port;
- unsigned int ret;
-
- /* Connection comes from client. */
- exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
- exp->dir = IP_CT_DIR_ORIGINAL;
-
- /* When you see the packet, we need to NAT it the same as the
- * this one (ie. same IP: it will be TCP and master is UDP). */
- exp->expectfn = ip_nat_follow_master;
-
- /* Try to get same port: if not, try to change it. */
- for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
- exp->tuple.dst.u.tcp.port = htons(port);
- if (ip_conntrack_expect_related(exp) == 0)
- break;
- }
-
- if (port == 0)
- return NF_DROP;
-
- sprintf(buffer, "%u", port);
- ret = ip_nat_mangle_udp_packet(pskb, exp->master, ctinfo,
- matchoff, matchlen,
- buffer, strlen(buffer));
- if (ret != NF_ACCEPT)
- ip_conntrack_unexpect_related(exp);
- return ret;
-}
-
-static void __exit fini(void)
-{
- ip_nat_amanda_hook = NULL;
- /* Make sure noone calls it, meanwhile. */
- synchronize_net();
-}
-
-static int __init init(void)
-{
- BUG_ON(ip_nat_amanda_hook);
- ip_nat_amanda_hook = help;
- return 0;
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c
deleted file mode 100644
index c1a61462507..00000000000
--- a/net/ipv4/netfilter/ip_nat_core.c
+++ /dev/null
@@ -1,644 +0,0 @@
-/* NAT for netfilter; shared with compatibility layer. */
-
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/timer.h>
-#include <linux/skbuff.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/vmalloc.h>
-#include <net/checksum.h>
-#include <net/icmp.h>
-#include <net/ip.h>
-#include <net/tcp.h> /* For tcp_prot in getorigdst */
-#include <linux/icmp.h>
-#include <linux/udp.h>
-#include <linux/jhash.h>
-
-#define ASSERT_READ_LOCK(x)
-#define ASSERT_WRITE_LOCK(x)
-
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-#include <linux/netfilter_ipv4/ip_conntrack_core.h>
-#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_protocol.h>
-#include <linux/netfilter_ipv4/ip_nat_core.h>
-#include <linux/netfilter_ipv4/ip_nat_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/listhelp.h>
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-DEFINE_RWLOCK(ip_nat_lock);
-
-/* Calculated at init based on memory size */
-static unsigned int ip_nat_htable_size;
-
-static struct list_head *bysource;
-
-#define MAX_IP_NAT_PROTO 256
-static struct ip_nat_protocol *ip_nat_protos[MAX_IP_NAT_PROTO];
-
-static inline struct ip_nat_protocol *
-__ip_nat_proto_find(u_int8_t protonum)
-{
- return ip_nat_protos[protonum];
-}
-
-struct ip_nat_protocol *
-ip_nat_proto_find_get(u_int8_t protonum)
-{
- struct ip_nat_protocol *p;
-
- /* we need to disable preemption to make sure 'p' doesn't get
- * removed until we've grabbed the reference */
- preempt_disable();
- p = __ip_nat_proto_find(protonum);
- if (!try_module_get(p->me))
- p = &ip_nat_unknown_protocol;
- preempt_enable();
-
- return p;
-}
-EXPORT_SYMBOL_GPL(ip_nat_proto_find_get);
-
-void
-ip_nat_proto_put(struct ip_nat_protocol *p)
-{
- module_put(p->me);
-}
-EXPORT_SYMBOL_GPL(ip_nat_proto_put);
-
-/* We keep an extra hash for each conntrack, for fast searching. */
-static inline unsigned int
-hash_by_src(const struct ip_conntrack_tuple *tuple)
-{
- /* Original src, to ensure we map it consistently if poss. */
- return jhash_3words(tuple->src.ip, tuple->src.u.all,
- tuple->dst.protonum, 0) % ip_nat_htable_size;
-}
-
-/* Noone using conntrack by the time this called. */
-static void ip_nat_cleanup_conntrack(struct ip_conntrack *conn)
-{
- if (!(conn->status & IPS_NAT_DONE_MASK))
- return;
-
- write_lock_bh(&ip_nat_lock);
- list_del(&conn->nat.info.bysource);
- write_unlock_bh(&ip_nat_lock);
-}
-
-/* We do checksum mangling, so if they were wrong before they're still
- * wrong. Also works for incomplete packets (eg. ICMP dest
- * unreachables.) */
-u_int16_t
-ip_nat_cheat_check(u_int32_t oldvalinv, u_int32_t newval, u_int16_t oldcheck)
-{
- u_int32_t diffs[] = { oldvalinv, newval };
- return csum_fold(csum_partial((char *)diffs, sizeof(diffs),
- oldcheck^0xFFFF));
-}
-EXPORT_SYMBOL(ip_nat_cheat_check);
-
-/* Is this tuple already taken? (not by us) */
-int
-ip_nat_used_tuple(const struct ip_conntrack_tuple *tuple,
- const struct ip_conntrack *ignored_conntrack)
-{
- /* Conntrack tracking doesn't keep track of outgoing tuples; only
- incoming ones. NAT means they don't have a fixed mapping,
- so we invert the tuple and look for the incoming reply.
-
- We could keep a separate hash if this proves too slow. */
- struct ip_conntrack_tuple reply;
-
- invert_tuplepr(&reply, tuple);
- return ip_conntrack_tuple_taken(&reply, ignored_conntrack);
-}
-EXPORT_SYMBOL(ip_nat_used_tuple);
-
-/* If we source map this tuple so reply looks like reply_tuple, will
- * that meet the constraints of range. */
-static int
-in_range(const struct ip_conntrack_tuple *tuple,
- const struct ip_nat_range *range)
-{
- struct ip_nat_protocol *proto =
- __ip_nat_proto_find(tuple->dst.protonum);
-
- /* If we are supposed to map IPs, then we must be in the
- range specified, otherwise let this drag us onto a new src IP. */
- if (range->flags & IP_NAT_RANGE_MAP_IPS) {
- if (ntohl(tuple->src.ip) < ntohl(range->min_ip)
- || ntohl(tuple->src.ip) > ntohl(range->max_ip))
- return 0;
- }
-
- if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)
- || proto->in_range(tuple, IP_NAT_MANIP_SRC,
- &range->min, &range->max))
- return 1;
-
- return 0;
-}
-
-static inline int
-same_src(const struct ip_conntrack *ct,
- const struct ip_conntrack_tuple *tuple)
-{
- return (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum
- == tuple->dst.protonum
- && ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip
- == tuple->src.ip
- && ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.all
- == tuple->src.u.all);
-}
-
-/* Only called for SRC manip */
-static int
-find_appropriate_src(const struct ip_conntrack_tuple *tuple,
- struct ip_conntrack_tuple *result,
- const struct ip_nat_range *range)
-{
- unsigned int h = hash_by_src(tuple);
- struct ip_conntrack *ct;
-
- read_lock_bh(&ip_nat_lock);
- list_for_each_entry(ct, &bysource[h], nat.info.bysource) {
- if (same_src(ct, tuple)) {
- /* Copy source part from reply tuple. */
- invert_tuplepr(result,
- &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
- result->dst = tuple->dst;
-
- if (in_range(result, range)) {
- read_unlock_bh(&ip_nat_lock);
- return 1;
- }
- }
- }
- read_unlock_bh(&ip_nat_lock);
- return 0;
-}
-
-/* For [FUTURE] fragmentation handling, we want the least-used
- src-ip/dst-ip/proto triple. Fairness doesn't come into it. Thus
- if the range specifies 1.2.3.4 ports 10000-10005 and 1.2.3.5 ports
- 1-65535, we don't do pro-rata allocation based on ports; we choose
- the ip with the lowest src-ip/dst-ip/proto usage.
-*/
-static void
-find_best_ips_proto(struct ip_conntrack_tuple *tuple,
- const struct ip_nat_range *range,
- const struct ip_conntrack *conntrack,
- enum ip_nat_manip_type maniptype)
-{
- u_int32_t *var_ipp;
- /* Host order */
- u_int32_t minip, maxip, j;
-
- /* No IP mapping? Do nothing. */
- if (!(range->flags & IP_NAT_RANGE_MAP_IPS))
- return;
-
- if (maniptype == IP_NAT_MANIP_SRC)
- var_ipp = &tuple->src.ip;
- else
- var_ipp = &tuple->dst.ip;
-
- /* Fast path: only one choice. */
- if (range->min_ip == range->max_ip) {
- *var_ipp = range->min_ip;
- return;
- }
-
- /* Hashing source and destination IPs gives a fairly even
- * spread in practice (if there are a small number of IPs
- * involved, there usually aren't that many connections
- * anyway). The consistency means that servers see the same
- * client coming from the same IP (some Internet Banking sites
- * like this), even across reboots. */
- minip = ntohl(range->min_ip);
- maxip = ntohl(range->max_ip);
- j = jhash_2words(tuple->src.ip, tuple->dst.ip, 0);
- *var_ipp = htonl(minip + j % (maxip - minip + 1));
-}
-
-/* Manipulate the tuple into the range given. For NF_IP_POST_ROUTING,
- * we change the source to map into the range. For NF_IP_PRE_ROUTING
- * and NF_IP_LOCAL_OUT, we change the destination to map into the
- * range. It might not be possible to get a unique tuple, but we try.
- * At worst (or if we race), we will end up with a final duplicate in
- * __ip_conntrack_confirm and drop the packet. */
-static void
-get_unique_tuple(struct ip_conntrack_tuple *tuple,
- const struct ip_conntrack_tuple *orig_tuple,
- const struct ip_nat_range *range,
- struct ip_conntrack *conntrack,
- enum ip_nat_manip_type maniptype)
-{
- struct ip_nat_protocol *proto;
-
- /* 1) If this srcip/proto/src-proto-part is currently mapped,
- and that same mapping gives a unique tuple within the given
- range, use that.
-
- This is only required for source (ie. NAT/masq) mappings.
- So far, we don't do local source mappings, so multiple
- manips not an issue. */
- if (maniptype == IP_NAT_MANIP_SRC) {
- if (find_appropriate_src(orig_tuple, tuple, range)) {
- DEBUGP("get_unique_tuple: Found current src map\n");
- if (!ip_nat_used_tuple(tuple, conntrack))
- return;
- }
- }
-
- /* 2) Select the least-used IP/proto combination in the given
- range. */
- *tuple = *orig_tuple;
- find_best_ips_proto(tuple, range, conntrack, maniptype);
-
- /* 3) The per-protocol part of the manip is made to map into
- the range to make a unique tuple. */
-
- proto = ip_nat_proto_find_get(orig_tuple->dst.protonum);
-
- /* Only bother mapping if it's not already in range and unique */
- if ((!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)
- || proto->in_range(tuple, maniptype, &range->min, &range->max))
- && !ip_nat_used_tuple(tuple, conntrack)) {
- ip_nat_proto_put(proto);
- return;
- }
-
- /* Last change: get protocol to try to obtain unique tuple. */
- proto->unique_tuple(tuple, range, maniptype, conntrack);
-
- ip_nat_proto_put(proto);
-}
-
-unsigned int
-ip_nat_setup_info(struct ip_conntrack *conntrack,
- const struct ip_nat_range *range,
- unsigned int hooknum)
-{
- struct ip_conntrack_tuple curr_tuple, new_tuple;
- struct ip_nat_info *info = &conntrack->nat.info;
- int have_to_hash = !(conntrack->status & IPS_NAT_DONE_MASK);
- enum ip_nat_manip_type maniptype = HOOK2MANIP(hooknum);
-
- IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING
- || hooknum == NF_IP_POST_ROUTING
- || hooknum == NF_IP_LOCAL_IN
- || hooknum == NF_IP_LOCAL_OUT);
- BUG_ON(ip_nat_initialized(conntrack, maniptype));
-
- /* What we've got will look like inverse of reply. Normally
- this is what is in the conntrack, except for prior
- manipulations (future optimization: if num_manips == 0,
- orig_tp =
- conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple) */
- invert_tuplepr(&curr_tuple,
- &conntrack->tuplehash[IP_CT_DIR_REPLY].tuple);
-
- get_unique_tuple(&new_tuple, &curr_tuple, range, conntrack, maniptype);
-
- if (!ip_ct_tuple_equal(&new_tuple, &curr_tuple)) {
- struct ip_conntrack_tuple reply;
-
- /* Alter conntrack table so will recognize replies. */
- invert_tuplepr(&reply, &new_tuple);
- ip_conntrack_alter_reply(conntrack, &reply);
-
- /* Non-atomic: we own this at the moment. */
- if (maniptype == IP_NAT_MANIP_SRC)
- conntrack->status |= IPS_SRC_NAT;
- else
- conntrack->status |= IPS_DST_NAT;
- }
-
- /* Place in source hash if this is the first time. */
- if (have_to_hash) {
- unsigned int srchash
- = hash_by_src(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
- .tuple);
- write_lock_bh(&ip_nat_lock);
- list_add(&info->bysource, &bysource[srchash]);
- write_unlock_bh(&ip_nat_lock);
- }
-
- /* It's done. */
- if (maniptype == IP_NAT_MANIP_DST)
- set_bit(IPS_DST_NAT_DONE_BIT, &conntrack->status);
- else
- set_bit(IPS_SRC_NAT_DONE_BIT, &conntrack->status);
-
- return NF_ACCEPT;
-}
-EXPORT_SYMBOL(ip_nat_setup_info);
-
-/* Returns true if succeeded. */
-static int
-manip_pkt(u_int16_t proto,
- struct sk_buff **pskb,
- unsigned int iphdroff,
- const struct ip_conntrack_tuple *target,
- enum ip_nat_manip_type maniptype)
-{
- struct iphdr *iph;
- struct ip_nat_protocol *p;
-
- if (!skb_make_writable(pskb, iphdroff + sizeof(*iph)))
- return 0;
-
- iph = (void *)(*pskb)->data + iphdroff;
-
- /* Manipulate protcol part. */
- p = ip_nat_proto_find_get(proto);
- if (!p->manip_pkt(pskb, iphdroff, target, maniptype)) {
- ip_nat_proto_put(p);
- return 0;
- }
- ip_nat_proto_put(p);
-
- iph = (void *)(*pskb)->data + iphdroff;
-
- if (maniptype == IP_NAT_MANIP_SRC) {
- iph->check = ip_nat_cheat_check(~iph->saddr, target->src.ip,
- iph->check);
- iph->saddr = target->src.ip;
- } else {
- iph->check = ip_nat_cheat_check(~iph->daddr, target->dst.ip,
- iph->check);
- iph->daddr = target->dst.ip;
- }
- return 1;
-}
-
-/* Do packet manipulations according to ip_nat_setup_info. */
-unsigned int ip_nat_packet(struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned int hooknum,
- struct sk_buff **pskb)
-{
- enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
- unsigned long statusbit;
- enum ip_nat_manip_type mtype = HOOK2MANIP(hooknum);
-
- if (mtype == IP_NAT_MANIP_SRC)
- statusbit = IPS_SRC_NAT;
- else
- statusbit = IPS_DST_NAT;
-
- /* Invert if this is reply dir. */
- if (dir == IP_CT_DIR_REPLY)
- statusbit ^= IPS_NAT_MASK;
-
- /* Non-atomic: these bits don't change. */
- if (ct->status & statusbit) {
- struct ip_conntrack_tuple target;
-
- /* We are aiming to look like inverse of other direction. */
- invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
-
- if (!manip_pkt(target.dst.protonum, pskb, 0, &target, mtype))
- return NF_DROP;
- }
- return NF_ACCEPT;
-}
-EXPORT_SYMBOL_GPL(ip_nat_packet);
-
-/* Dir is direction ICMP is coming from (opposite to packet it contains) */
-int ip_nat_icmp_reply_translation(struct sk_buff **pskb,
- struct ip_conntrack *ct,
- enum ip_nat_manip_type manip,
- enum ip_conntrack_dir dir)
-{
- struct {
- struct icmphdr icmp;
- struct iphdr ip;
- } *inside;
- struct ip_conntrack_tuple inner, target;
- int hdrlen = (*pskb)->nh.iph->ihl * 4;
-
- if (!skb_make_writable(pskb, hdrlen + sizeof(*inside)))
- return 0;
-
- inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
-
- /* We're actually going to mangle it beyond trivial checksum
- adjustment, so make sure the current checksum is correct. */
- if ((*pskb)->ip_summed != CHECKSUM_UNNECESSARY) {
- hdrlen = (*pskb)->nh.iph->ihl * 4;
- if ((u16)csum_fold(skb_checksum(*pskb, hdrlen,
- (*pskb)->len - hdrlen, 0)))
- return 0;
- }
-
- /* Must be RELATED */
- IP_NF_ASSERT((*pskb)->nfctinfo == IP_CT_RELATED ||
- (*pskb)->nfctinfo == IP_CT_RELATED+IP_CT_IS_REPLY);
-
- /* Redirects on non-null nats must be dropped, else they'll
- start talking to each other without our translation, and be
- confused... --RR */
- if (inside->icmp.type == ICMP_REDIRECT) {
- /* If NAT isn't finished, assume it and drop. */
- if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
- return 0;
-
- if (ct->status & IPS_NAT_MASK)
- return 0;
- }
-
- DEBUGP("icmp_reply_translation: translating error %p manp %u dir %s\n",
- *pskb, manip, dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY");
-
- if (!ip_ct_get_tuple(&inside->ip, *pskb, (*pskb)->nh.iph->ihl*4 +
- sizeof(struct icmphdr) + inside->ip.ihl*4,
- &inner,
- __ip_conntrack_proto_find(inside->ip.protocol)))
- return 0;
-
- /* Change inner back to look like incoming packet. We do the
- opposite manip on this hook to normal, because it might not
- pass all hooks (locally-generated ICMP). Consider incoming
- packet: PREROUTING (DST manip), routing produces ICMP, goes
- through POSTROUTING (which must correct the DST manip). */
- if (!manip_pkt(inside->ip.protocol, pskb,
- (*pskb)->nh.iph->ihl*4
- + sizeof(inside->icmp),
- &ct->tuplehash[!dir].tuple,
- !manip))
- return 0;
-
- /* Reloading "inside" here since manip_pkt inner. */
- inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
- inside->icmp.checksum = 0;
- inside->icmp.checksum = csum_fold(skb_checksum(*pskb, hdrlen,
- (*pskb)->len - hdrlen,
- 0));
-
- /* Change outer to look the reply to an incoming packet
- * (proto 0 means don't invert per-proto part). */
-
- /* Obviously, we need to NAT destination IP, but source IP
- should be NAT'ed only if it is from a NAT'd host.
-
- Explanation: some people use NAT for anonymizing. Also,
- CERT recommends dropping all packets from private IP
- addresses (although ICMP errors from internal links with
- such addresses are not too uncommon, as Alan Cox points
- out) */
- if (manip != IP_NAT_MANIP_SRC
- || ((*pskb)->nh.iph->saddr == ct->tuplehash[dir].tuple.src.ip)) {
- invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
- if (!manip_pkt(0, pskb, 0, &target, manip))
- return 0;
- }
-
- return 1;
-}
-EXPORT_SYMBOL_GPL(ip_nat_icmp_reply_translation);
-
-/* Protocol registration. */
-int ip_nat_protocol_register(struct ip_nat_protocol *proto)
-{
- int ret = 0;
-
- write_lock_bh(&ip_nat_lock);
- if (ip_nat_protos[proto->protonum] != &ip_nat_unknown_protocol) {
- ret = -EBUSY;
- goto out;
- }
- ip_nat_protos[proto->protonum] = proto;
- out:
- write_unlock_bh(&ip_nat_lock);
- return ret;
-}
-EXPORT_SYMBOL(ip_nat_protocol_register);
-
-/* Noone stores the protocol anywhere; simply delete it. */
-void ip_nat_protocol_unregister(struct ip_nat_protocol *proto)
-{
- write_lock_bh(&ip_nat_lock);
- ip_nat_protos[proto->protonum] = &ip_nat_unknown_protocol;
- write_unlock_bh(&ip_nat_lock);
-
- /* Someone could be still looking at the proto in a bh. */
- synchronize_net();
-}
-EXPORT_SYMBOL(ip_nat_protocol_unregister);
-
-#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
- defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
-int
-ip_nat_port_range_to_nfattr(struct sk_buff *skb,
- const struct ip_nat_range *range)
-{
- NFA_PUT(skb, CTA_PROTONAT_PORT_MIN, sizeof(u_int16_t),
- &range->min.tcp.port);
- NFA_PUT(skb, CTA_PROTONAT_PORT_MAX, sizeof(u_int16_t),
- &range->max.tcp.port);
-
- return 0;
-
-nfattr_failure:
- return -1;
-}
-
-int
-ip_nat_port_nfattr_to_range(struct nfattr *tb[], struct ip_nat_range *range)
-{
- int ret = 0;
-
- /* we have to return whether we actually parsed something or not */
-
- if (tb[CTA_PROTONAT_PORT_MIN-1]) {
- ret = 1;
- range->min.tcp.port =
- *(u_int16_t *)NFA_DATA(tb[CTA_PROTONAT_PORT_MIN-1]);
- }
-
- if (!tb[CTA_PROTONAT_PORT_MAX-1]) {
- if (ret)
- range->max.tcp.port = range->min.tcp.port;
- } else {
- ret = 1;
- range->max.tcp.port =
- *(u_int16_t *)NFA_DATA(tb[CTA_PROTONAT_PORT_MAX-1]);
- }
-
- return ret;
-}
-EXPORT_SYMBOL_GPL(ip_nat_port_nfattr_to_range);
-EXPORT_SYMBOL_GPL(ip_nat_port_range_to_nfattr);
-#endif
-
-static int __init ip_nat_init(void)
-{
- size_t i;
-
- /* Leave them the same for the moment. */
- ip_nat_htable_size = ip_conntrack_htable_size;
-
- /* One vmalloc for both hash tables */
- bysource = vmalloc(sizeof(struct list_head) * ip_nat_htable_size);
- if (!bysource)
- return -ENOMEM;
-
- /* Sew in builtin protocols. */
- write_lock_bh(&ip_nat_lock);
- for (i = 0; i < MAX_IP_NAT_PROTO; i++)
- ip_nat_protos[i] = &ip_nat_unknown_protocol;
- ip_nat_protos[IPPROTO_TCP] = &ip_nat_protocol_tcp;
- ip_nat_protos[IPPROTO_UDP] = &ip_nat_protocol_udp;
- ip_nat_protos[IPPROTO_ICMP] = &ip_nat_protocol_icmp;
- write_unlock_bh(&ip_nat_lock);
-
- for (i = 0; i < ip_nat_htable_size; i++) {
- INIT_LIST_HEAD(&bysource[i]);
- }
-
- /* FIXME: Man, this is a hack. <SIGH> */
- IP_NF_ASSERT(ip_conntrack_destroyed == NULL);
- ip_conntrack_destroyed = &ip_nat_cleanup_conntrack;
-
- /* Initialize fake conntrack so that NAT will skip it */
- ip_conntrack_untracked.status |= IPS_NAT_DONE_MASK;
- return 0;
-}
-
-/* Clear NAT section of all conntracks, in case we're loaded again. */
-static int clean_nat(struct ip_conntrack *i, void *data)
-{
- memset(&i->nat, 0, sizeof(i->nat));
- i->status &= ~(IPS_NAT_MASK | IPS_NAT_DONE_MASK | IPS_SEQ_ADJUST);
- return 0;
-}
-
-static void __exit ip_nat_cleanup(void)
-{
- ip_ct_iterate_cleanup(&clean_nat, NULL);
- ip_conntrack_destroyed = NULL;
- vfree(bysource);
-}
-
-MODULE_LICENSE("GPL");
-
-module_init(ip_nat_init);
-module_exit(ip_nat_cleanup);
diff --git a/net/ipv4/netfilter/ip_nat_ftp.c b/net/ipv4/netfilter/ip_nat_ftp.c
deleted file mode 100644
index b8daab3c64a..00000000000
--- a/net/ipv4/netfilter/ip_nat_ftp.c
+++ /dev/null
@@ -1,181 +0,0 @@
-/* FTP extension for TCP NAT alteration. */
-
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <linux/moduleparam.h>
-#include <net/tcp.h>
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_helper.h>
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#include <linux/netfilter_ipv4/ip_conntrack_ftp.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>");
-MODULE_DESCRIPTION("ftp NAT helper");
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-/* FIXME: Time out? --RR */
-
-static int
-mangle_rfc959_packet(struct sk_buff **pskb,
- u_int32_t newip,
- u_int16_t port,
- unsigned int matchoff,
- unsigned int matchlen,
- struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- u32 *seq)
-{
- char buffer[sizeof("nnn,nnn,nnn,nnn,nnn,nnn")];
-
- sprintf(buffer, "%u,%u,%u,%u,%u,%u",
- NIPQUAD(newip), port>>8, port&0xFF);
-
- DEBUGP("calling ip_nat_mangle_tcp_packet\n");
-
- *seq += strlen(buffer) - matchlen;
- return ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff,
- matchlen, buffer, strlen(buffer));
-}
-
-/* |1|132.235.1.2|6275| */
-static int
-mangle_eprt_packet(struct sk_buff **pskb,
- u_int32_t newip,
- u_int16_t port,
- unsigned int matchoff,
- unsigned int matchlen,
- struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- u32 *seq)
-{
- char buffer[sizeof("|1|255.255.255.255|65535|")];
-
- sprintf(buffer, "|1|%u.%u.%u.%u|%u|", NIPQUAD(newip), port);
-
- DEBUGP("calling ip_nat_mangle_tcp_packet\n");
-
- *seq += strlen(buffer) - matchlen;
- return ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff,
- matchlen, buffer, strlen(buffer));
-}
-
-/* |1|132.235.1.2|6275| */
-static int
-mangle_epsv_packet(struct sk_buff **pskb,
- u_int32_t newip,
- u_int16_t port,
- unsigned int matchoff,
- unsigned int matchlen,
- struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- u32 *seq)
-{
- char buffer[sizeof("|||65535|")];
-
- sprintf(buffer, "|||%u|", port);
-
- DEBUGP("calling ip_nat_mangle_tcp_packet\n");
-
- *seq += strlen(buffer) - matchlen;
- return ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff,
- matchlen, buffer, strlen(buffer));
-}
-
-static int (*mangle[])(struct sk_buff **, u_int32_t, u_int16_t,
- unsigned int,
- unsigned int,
- struct ip_conntrack *,
- enum ip_conntrack_info,
- u32 *seq)
-= { [IP_CT_FTP_PORT] = mangle_rfc959_packet,
- [IP_CT_FTP_PASV] = mangle_rfc959_packet,
- [IP_CT_FTP_EPRT] = mangle_eprt_packet,
- [IP_CT_FTP_EPSV] = mangle_epsv_packet
-};
-
-/* So, this packet has hit the connection tracking matching code.
- Mangle it, and change the expectation to match the new version. */
-static unsigned int ip_nat_ftp(struct sk_buff **pskb,
- enum ip_conntrack_info ctinfo,
- enum ip_ct_ftp_type type,
- unsigned int matchoff,
- unsigned int matchlen,
- struct ip_conntrack_expect *exp,
- u32 *seq)
-{
- u_int32_t newip;
- u_int16_t port;
- int dir = CTINFO2DIR(ctinfo);
- struct ip_conntrack *ct = exp->master;
-
- DEBUGP("FTP_NAT: type %i, off %u len %u\n", type, matchoff, matchlen);
-
- /* Connection will come from wherever this packet goes, hence !dir */
- newip = ct->tuplehash[!dir].tuple.dst.ip;
- exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
- exp->dir = !dir;
-
- /* When you see the packet, we need to NAT it the same as the
- * this one. */
- exp->expectfn = ip_nat_follow_master;
-
- /* Try to get same port: if not, try to change it. */
- for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
- exp->tuple.dst.u.tcp.port = htons(port);
- if (ip_conntrack_expect_related(exp) == 0)
- break;
- }
-
- if (port == 0)
- return NF_DROP;
-
- if (!mangle[type](pskb, newip, port, matchoff, matchlen, ct, ctinfo,
- seq)) {
- ip_conntrack_unexpect_related(exp);
- return NF_DROP;
- }
- return NF_ACCEPT;
-}
-
-static void __exit fini(void)
-{
- ip_nat_ftp_hook = NULL;
- /* Make sure noone calls it, meanwhile. */
- synchronize_net();
-}
-
-static int __init init(void)
-{
- BUG_ON(ip_nat_ftp_hook);
- ip_nat_ftp_hook = ip_nat_ftp;
- return 0;
-}
-
-/* Prior to 2.6.11, we had a ports param. No longer, but don't break users. */
-static int warn_set(const char *val, struct kernel_param *kp)
-{
- printk(KERN_INFO KBUILD_MODNAME
- ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n");
- return 0;
-}
-module_param_call(ports, warn_set, NULL, NULL, 0);
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ip_nat_helper.c b/net/ipv4/netfilter/ip_nat_helper.c
deleted file mode 100644
index 5d506e0564d..00000000000
--- a/net/ipv4/netfilter/ip_nat_helper.c
+++ /dev/null
@@ -1,431 +0,0 @@
-/* ip_nat_helper.c - generic support functions for NAT helpers
- *
- * (C) 2000-2002 Harald Welte <laforge@netfilter.org>
- * (C) 2003-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * 14 Jan 2002 Harald Welte <laforge@gnumonks.org>:
- * - add support for SACK adjustment
- * 14 Mar 2002 Harald Welte <laforge@gnumonks.org>:
- * - merge SACK support into newnat API
- * 16 Aug 2002 Brian J. Murrell <netfilter@interlinx.bc.ca>:
- * - make ip_nat_resize_packet more generic (TCP and UDP)
- * - add ip_nat_mangle_udp_packet
- */
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/kmod.h>
-#include <linux/types.h>
-#include <linux/timer.h>
-#include <linux/skbuff.h>
-#include <linux/netfilter_ipv4.h>
-#include <net/checksum.h>
-#include <net/icmp.h>
-#include <net/ip.h>
-#include <net/tcp.h>
-#include <net/udp.h>
-
-#define ASSERT_READ_LOCK(x)
-#define ASSERT_WRITE_LOCK(x)
-
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_protocol.h>
-#include <linux/netfilter_ipv4/ip_nat_core.h>
-#include <linux/netfilter_ipv4/ip_nat_helper.h>
-#include <linux/netfilter_ipv4/listhelp.h>
-
-#if 0
-#define DEBUGP printk
-#define DUMP_OFFSET(x) printk("offset_before=%d, offset_after=%d, correction_pos=%u\n", x->offset_before, x->offset_after, x->correction_pos);
-#else
-#define DEBUGP(format, args...)
-#define DUMP_OFFSET(x)
-#endif
-
-static DEFINE_SPINLOCK(ip_nat_seqofs_lock);
-
-/* Setup TCP sequence correction given this change at this sequence */
-static inline void
-adjust_tcp_sequence(u32 seq,
- int sizediff,
- struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo)
-{
- int dir;
- struct ip_nat_seq *this_way, *other_way;
-
- DEBUGP("ip_nat_resize_packet: old_size = %u, new_size = %u\n",
- (*skb)->len, new_size);
-
- dir = CTINFO2DIR(ctinfo);
-
- this_way = &ct->nat.info.seq[dir];
- other_way = &ct->nat.info.seq[!dir];
-
- DEBUGP("ip_nat_resize_packet: Seq_offset before: ");
- DUMP_OFFSET(this_way);
-
- spin_lock_bh(&ip_nat_seqofs_lock);
-
- /* SYN adjust. If it's uninitialized, or this is after last
- * correction, record it: we don't handle more than one
- * adjustment in the window, but do deal with common case of a
- * retransmit */
- if (this_way->offset_before == this_way->offset_after
- || before(this_way->correction_pos, seq)) {
- this_way->correction_pos = seq;
- this_way->offset_before = this_way->offset_after;
- this_way->offset_after += sizediff;
- }
- spin_unlock_bh(&ip_nat_seqofs_lock);
-
- DEBUGP("ip_nat_resize_packet: Seq_offset after: ");
- DUMP_OFFSET(this_way);
-}
-
-/* Frobs data inside this packet, which is linear. */
-static void mangle_contents(struct sk_buff *skb,
- unsigned int dataoff,
- unsigned int match_offset,
- unsigned int match_len,
- const char *rep_buffer,
- unsigned int rep_len)
-{
- unsigned char *data;
-
- BUG_ON(skb_is_nonlinear(skb));
- data = (unsigned char *)skb->nh.iph + dataoff;
-
- /* move post-replacement */
- memmove(data + match_offset + rep_len,
- data + match_offset + match_len,
- skb->tail - (data + match_offset + match_len));
-
- /* insert data from buffer */
- memcpy(data + match_offset, rep_buffer, rep_len);
-
- /* update skb info */
- if (rep_len > match_len) {
- DEBUGP("ip_nat_mangle_packet: Extending packet by "
- "%u from %u bytes\n", rep_len - match_len,
- skb->len);
- skb_put(skb, rep_len - match_len);
- } else {
- DEBUGP("ip_nat_mangle_packet: Shrinking packet from "
- "%u from %u bytes\n", match_len - rep_len,
- skb->len);
- __skb_trim(skb, skb->len + rep_len - match_len);
- }
-
- /* fix IP hdr checksum information */
- skb->nh.iph->tot_len = htons(skb->len);
- ip_send_check(skb->nh.iph);
-}
-
-/* Unusual, but possible case. */
-static int enlarge_skb(struct sk_buff **pskb, unsigned int extra)
-{
- struct sk_buff *nskb;
-
- if ((*pskb)->len + extra > 65535)
- return 0;
-
- nskb = skb_copy_expand(*pskb, skb_headroom(*pskb), extra, GFP_ATOMIC);
- if (!nskb)
- return 0;
-
- /* Transfer socket to new skb. */
- if ((*pskb)->sk)
- skb_set_owner_w(nskb, (*pskb)->sk);
- kfree_skb(*pskb);
- *pskb = nskb;
- return 1;
-}
-
-/* Generic function for mangling variable-length address changes inside
- * NATed TCP connections (like the PORT XXX,XXX,XXX,XXX,XXX,XXX
- * command in FTP).
- *
- * Takes care about all the nasty sequence number changes, checksumming,
- * skb enlargement, ...
- *
- * */
-int
-ip_nat_mangle_tcp_packet(struct sk_buff **pskb,
- struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned int match_offset,
- unsigned int match_len,
- const char *rep_buffer,
- unsigned int rep_len)
-{
- struct iphdr *iph;
- struct tcphdr *tcph;
- int datalen;
-
- if (!skb_make_writable(pskb, (*pskb)->len))
- return 0;
-
- if (rep_len > match_len
- && rep_len - match_len > skb_tailroom(*pskb)
- && !enlarge_skb(pskb, rep_len - match_len))
- return 0;
-
- SKB_LINEAR_ASSERT(*pskb);
-
- iph = (*pskb)->nh.iph;
- tcph = (void *)iph + iph->ihl*4;
-
- mangle_contents(*pskb, iph->ihl*4 + tcph->doff*4,
- match_offset, match_len, rep_buffer, rep_len);
-
- datalen = (*pskb)->len - iph->ihl*4;
- tcph->check = 0;
- tcph->check = tcp_v4_check(tcph, datalen, iph->saddr, iph->daddr,
- csum_partial((char *)tcph, datalen, 0));
-
- if (rep_len != match_len) {
- set_bit(IPS_SEQ_ADJUST_BIT, &ct->status);
- adjust_tcp_sequence(ntohl(tcph->seq),
- (int)rep_len - (int)match_len,
- ct, ctinfo);
- /* Tell TCP window tracking about seq change */
- ip_conntrack_tcp_update(*pskb, ct, CTINFO2DIR(ctinfo));
- }
- return 1;
-}
-EXPORT_SYMBOL(ip_nat_mangle_tcp_packet);
-
-/* Generic function for mangling variable-length address changes inside
- * NATed UDP connections (like the CONNECT DATA XXXXX MESG XXXXX INDEX XXXXX
- * command in the Amanda protocol)
- *
- * Takes care about all the nasty sequence number changes, checksumming,
- * skb enlargement, ...
- *
- * XXX - This function could be merged with ip_nat_mangle_tcp_packet which
- * should be fairly easy to do.
- */
-int
-ip_nat_mangle_udp_packet(struct sk_buff **pskb,
- struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- unsigned int match_offset,
- unsigned int match_len,
- const char *rep_buffer,
- unsigned int rep_len)
-{
- struct iphdr *iph;
- struct udphdr *udph;
-
- /* UDP helpers might accidentally mangle the wrong packet */
- iph = (*pskb)->nh.iph;
- if ((*pskb)->len < iph->ihl*4 + sizeof(*udph) +
- match_offset + match_len)
- return 0;
-
- if (!skb_make_writable(pskb, (*pskb)->len))
- return 0;
-
- if (rep_len > match_len
- && rep_len - match_len > skb_tailroom(*pskb)
- && !enlarge_skb(pskb, rep_len - match_len))
- return 0;
-
- iph = (*pskb)->nh.iph;
- udph = (void *)iph + iph->ihl*4;
- mangle_contents(*pskb, iph->ihl*4 + sizeof(*udph),
- match_offset, match_len, rep_buffer, rep_len);
-
- /* update the length of the UDP packet */
- udph->len = htons((*pskb)->len - iph->ihl*4);
-
- /* fix udp checksum if udp checksum was previously calculated */
- if (udph->check) {
- int datalen = (*pskb)->len - iph->ihl * 4;
- udph->check = 0;
- udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
- datalen, IPPROTO_UDP,
- csum_partial((char *)udph,
- datalen, 0));
- }
-
- return 1;
-}
-EXPORT_SYMBOL(ip_nat_mangle_udp_packet);
-
-/* Adjust one found SACK option including checksum correction */
-static void
-sack_adjust(struct sk_buff *skb,
- struct tcphdr *tcph,
- unsigned int sackoff,
- unsigned int sackend,
- struct ip_nat_seq *natseq)
-{
- while (sackoff < sackend) {
- struct tcp_sack_block *sack;
- u_int32_t new_start_seq, new_end_seq;
-
- sack = (void *)skb->data + sackoff;
- if (after(ntohl(sack->start_seq) - natseq->offset_before,
- natseq->correction_pos))
- new_start_seq = ntohl(sack->start_seq)
- - natseq->offset_after;
- else
- new_start_seq = ntohl(sack->start_seq)
- - natseq->offset_before;
- new_start_seq = htonl(new_start_seq);
-
- if (after(ntohl(sack->end_seq) - natseq->offset_before,
- natseq->correction_pos))
- new_end_seq = ntohl(sack->end_seq)
- - natseq->offset_after;
- else
- new_end_seq = ntohl(sack->end_seq)
- - natseq->offset_before;
- new_end_seq = htonl(new_end_seq);
-
- DEBUGP("sack_adjust: start_seq: %d->%d, end_seq: %d->%d\n",
- ntohl(sack->start_seq), new_start_seq,
- ntohl(sack->end_seq), new_end_seq);
-
- tcph->check =
- ip_nat_cheat_check(~sack->start_seq, new_start_seq,
- ip_nat_cheat_check(~sack->end_seq,
- new_end_seq,
- tcph->check));
- sack->start_seq = new_start_seq;
- sack->end_seq = new_end_seq;
- sackoff += sizeof(*sack);
- }
-}
-
-/* TCP SACK sequence number adjustment */
-static inline unsigned int
-ip_nat_sack_adjust(struct sk_buff **pskb,
- struct tcphdr *tcph,
- struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo)
-{
- unsigned int dir, optoff, optend;
-
- optoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct tcphdr);
- optend = (*pskb)->nh.iph->ihl*4 + tcph->doff*4;
-
- if (!skb_make_writable(pskb, optend))
- return 0;
-
- dir = CTINFO2DIR(ctinfo);
-
- while (optoff < optend) {
- /* Usually: option, length. */
- unsigned char *op = (*pskb)->data + optoff;
-
- switch (op[0]) {
- case TCPOPT_EOL:
- return 1;
- case TCPOPT_NOP:
- optoff++;
- continue;
- default:
- /* no partial options */
- if (optoff + 1 == optend
- || optoff + op[1] > optend
- || op[1] < 2)
- return 0;
- if (op[0] == TCPOPT_SACK
- && op[1] >= 2+TCPOLEN_SACK_PERBLOCK
- && ((op[1] - 2) % TCPOLEN_SACK_PERBLOCK) == 0)
- sack_adjust(*pskb, tcph, optoff+2,
- optoff+op[1],
- &ct->nat.info.seq[!dir]);
- optoff += op[1];
- }
- }
- return 1;
-}
-
-/* TCP sequence number adjustment. Returns 1 on success, 0 on failure */
-int
-ip_nat_seq_adjust(struct sk_buff **pskb,
- struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo)
-{
- struct tcphdr *tcph;
- int dir, newseq, newack;
- struct ip_nat_seq *this_way, *other_way;
-
- dir = CTINFO2DIR(ctinfo);
-
- this_way = &ct->nat.info.seq[dir];
- other_way = &ct->nat.info.seq[!dir];
-
- if (!skb_make_writable(pskb, (*pskb)->nh.iph->ihl*4+sizeof(*tcph)))
- return 0;
-
- tcph = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4;
- if (after(ntohl(tcph->seq), this_way->correction_pos))
- newseq = ntohl(tcph->seq) + this_way->offset_after;
- else
- newseq = ntohl(tcph->seq) + this_way->offset_before;
- newseq = htonl(newseq);
-
- if (after(ntohl(tcph->ack_seq) - other_way->offset_before,
- other_way->correction_pos))
- newack = ntohl(tcph->ack_seq) - other_way->offset_after;
- else
- newack = ntohl(tcph->ack_seq) - other_way->offset_before;
- newack = htonl(newack);
-
- tcph->check = ip_nat_cheat_check(~tcph->seq, newseq,
- ip_nat_cheat_check(~tcph->ack_seq,
- newack,
- tcph->check));
-
- DEBUGP("Adjusting sequence number from %u->%u, ack from %u->%u\n",
- ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq),
- ntohl(newack));
-
- tcph->seq = newseq;
- tcph->ack_seq = newack;
-
- if (!ip_nat_sack_adjust(pskb, tcph, ct, ctinfo))
- return 0;
-
- ip_conntrack_tcp_update(*pskb, ct, dir);
-
- return 1;
-}
-EXPORT_SYMBOL(ip_nat_seq_adjust);
-
-/* Setup NAT on this expected conntrack so it follows master. */
-/* If we fail to get a free NAT slot, we'll get dropped on confirm */
-void ip_nat_follow_master(struct ip_conntrack *ct,
- struct ip_conntrack_expect *exp)
-{
- struct ip_nat_range range;
-
- /* This must be a fresh one. */
- BUG_ON(ct->status & IPS_NAT_DONE_MASK);
-
- /* Change src to where master sends to */
- range.flags = IP_NAT_RANGE_MAP_IPS;
- range.min_ip = range.max_ip
- = ct->master->tuplehash[!exp->dir].tuple.dst.ip;
- /* hook doesn't matter, but it has to do source manip */
- ip_nat_setup_info(ct, &range, NF_IP_POST_ROUTING);
-
- /* For DST manip, map port here to where it's expected. */
- range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED);
- range.min = range.max = exp->saved_proto;
- range.min_ip = range.max_ip
- = ct->master->tuplehash[!exp->dir].tuple.src.ip;
- /* hook doesn't matter, but it has to do destination manip */
- ip_nat_setup_info(ct, &range, NF_IP_PRE_ROUTING);
-}
-EXPORT_SYMBOL(ip_nat_follow_master);
diff --git a/net/ipv4/netfilter/ip_nat_helper_pptp.c b/net/ipv4/netfilter/ip_nat_helper_pptp.c
deleted file mode 100644
index ac004895781..00000000000
--- a/net/ipv4/netfilter/ip_nat_helper_pptp.c
+++ /dev/null
@@ -1,414 +0,0 @@
-/*
- * ip_nat_pptp.c - Version 3.0
- *
- * NAT support for PPTP (Point to Point Tunneling Protocol).
- * PPTP is a a protocol for creating virtual private networks.
- * It is a specification defined by Microsoft and some vendors
- * working with Microsoft. PPTP is built on top of a modified
- * version of the Internet Generic Routing Encapsulation Protocol.
- * GRE is defined in RFC 1701 and RFC 1702. Documentation of
- * PPTP can be found in RFC 2637
- *
- * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org>
- *
- * Development of this code funded by Astaro AG (http://www.astaro.com/)
- *
- * TODO: - NAT to a unique tuple, not to TCP source port
- * (needs netfilter tuple reservation)
- *
- * Changes:
- * 2002-02-10 - Version 1.3
- * - Use ip_nat_mangle_tcp_packet() because of cloned skb's
- * in local connections (Philip Craig <philipc@snapgear.com>)
- * - add checks for magicCookie and pptp version
- * - make argument list of pptp_{out,in}bound_packet() shorter
- * - move to C99 style initializers
- * - print version number at module loadtime
- * 2003-09-22 - Version 1.5
- * - use SNATed tcp sourceport as callid, since we get called before
- * TCP header is mangled (Philip Craig <philipc@snapgear.com>)
- * 2004-10-22 - Version 2.0
- * - kernel 2.6.x version
- * 2005-06-10 - Version 3.0
- * - kernel >= 2.6.11 version,
- * funded by Oxcoda NetBox Blue (http://www.netboxblue.com/)
- *
- */
-
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <net/tcp.h>
-
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#include <linux/netfilter_ipv4/ip_nat_helper.h>
-#include <linux/netfilter_ipv4/ip_nat_pptp.h>
-#include <linux/netfilter_ipv4/ip_conntrack_core.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h>
-#include <linux/netfilter_ipv4/ip_conntrack_pptp.h>
-
-#define IP_NAT_PPTP_VERSION "3.0"
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
-MODULE_DESCRIPTION("Netfilter NAT helper module for PPTP");
-
-
-#if 0
-extern const char *pptp_msg_name[];
-#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, \
- __FUNCTION__, ## args)
-#else
-#define DEBUGP(format, args...)
-#endif
-
-static void pptp_nat_expected(struct ip_conntrack *ct,
- struct ip_conntrack_expect *exp)
-{
- struct ip_conntrack *master = ct->master;
- struct ip_conntrack_expect *other_exp;
- struct ip_conntrack_tuple t;
- struct ip_ct_pptp_master *ct_pptp_info;
- struct ip_nat_pptp *nat_pptp_info;
- struct ip_nat_range range;
-
- ct_pptp_info = &master->help.ct_pptp_info;
- nat_pptp_info = &master->nat.help.nat_pptp_info;
-
- /* And here goes the grand finale of corrosion... */
-
- if (exp->dir == IP_CT_DIR_ORIGINAL) {
- DEBUGP("we are PNS->PAC\n");
- /* therefore, build tuple for PAC->PNS */
- t.src.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip;
- t.src.u.gre.key = htons(master->help.ct_pptp_info.pac_call_id);
- t.dst.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip;
- t.dst.u.gre.key = htons(master->help.ct_pptp_info.pns_call_id);
- t.dst.protonum = IPPROTO_GRE;
- } else {
- DEBUGP("we are PAC->PNS\n");
- /* build tuple for PNS->PAC */
- t.src.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip;
- t.src.u.gre.key =
- htons(master->nat.help.nat_pptp_info.pns_call_id);
- t.dst.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip;
- t.dst.u.gre.key =
- htons(master->nat.help.nat_pptp_info.pac_call_id);
- t.dst.protonum = IPPROTO_GRE;
- }
-
- DEBUGP("trying to unexpect other dir: ");
- DUMP_TUPLE(&t);
- other_exp = ip_conntrack_expect_find(&t);
- if (other_exp) {
- ip_conntrack_unexpect_related(other_exp);
- ip_conntrack_expect_put(other_exp);
- DEBUGP("success\n");
- } else {
- DEBUGP("not found!\n");
- }
-
- /* This must be a fresh one. */
- BUG_ON(ct->status & IPS_NAT_DONE_MASK);
-
- /* Change src to where master sends to */
- range.flags = IP_NAT_RANGE_MAP_IPS;
- range.min_ip = range.max_ip
- = ct->master->tuplehash[!exp->dir].tuple.dst.ip;
- if (exp->dir == IP_CT_DIR_ORIGINAL) {
- range.flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
- range.min = range.max = exp->saved_proto;
- }
- /* hook doesn't matter, but it has to do source manip */
- ip_nat_setup_info(ct, &range, NF_IP_POST_ROUTING);
-
- /* For DST manip, map port here to where it's expected. */
- range.flags = IP_NAT_RANGE_MAP_IPS;
- range.min_ip = range.max_ip
- = ct->master->tuplehash[!exp->dir].tuple.src.ip;
- if (exp->dir == IP_CT_DIR_REPLY) {
- range.flags |= IP_NAT_RANGE_PROTO_SPECIFIED;
- range.min = range.max = exp->saved_proto;
- }
- /* hook doesn't matter, but it has to do destination manip */
- ip_nat_setup_info(ct, &range, NF_IP_PRE_ROUTING);
-}
-
-/* outbound packets == from PNS to PAC */
-static int
-pptp_outbound_pkt(struct sk_buff **pskb,
- struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- struct PptpControlHeader *ctlh,
- union pptp_ctrl_union *pptpReq)
-
-{
- struct ip_ct_pptp_master *ct_pptp_info = &ct->help.ct_pptp_info;
- struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info;
- u_int16_t msg, new_callid;
- unsigned int cid_off;
-
- new_callid = htons(ct_pptp_info->pns_call_id);
-
- switch (msg = ntohs(ctlh->messageType)) {
- case PPTP_OUT_CALL_REQUEST:
- cid_off = offsetof(union pptp_ctrl_union, ocreq.callID);
- /* FIXME: ideally we would want to reserve a call ID
- * here. current netfilter NAT core is not able to do
- * this :( For now we use TCP source port. This breaks
- * multiple calls within one control session */
-
- /* save original call ID in nat_info */
- nat_pptp_info->pns_call_id = ct_pptp_info->pns_call_id;
-
- /* don't use tcph->source since we are at a DSTmanip
- * hook (e.g. PREROUTING) and pkt is not mangled yet */
- new_callid = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port;
-
- /* save new call ID in ct info */
- ct_pptp_info->pns_call_id = ntohs(new_callid);
- break;
- case PPTP_IN_CALL_REPLY:
- cid_off = offsetof(union pptp_ctrl_union, icreq.callID);
- break;
- case PPTP_CALL_CLEAR_REQUEST:
- cid_off = offsetof(union pptp_ctrl_union, clrreq.callID);
- break;
- default:
- DEBUGP("unknown outbound packet 0x%04x:%s\n", msg,
- (msg <= PPTP_MSG_MAX)?
- pptp_msg_name[msg]:pptp_msg_name[0]);
- /* fall through */
-
- case PPTP_SET_LINK_INFO:
- /* only need to NAT in case PAC is behind NAT box */
- case PPTP_START_SESSION_REQUEST:
- case PPTP_START_SESSION_REPLY:
- case PPTP_STOP_SESSION_REQUEST:
- case PPTP_STOP_SESSION_REPLY:
- case PPTP_ECHO_REQUEST:
- case PPTP_ECHO_REPLY:
- /* no need to alter packet */
- return NF_ACCEPT;
- }
-
- /* only OUT_CALL_REQUEST, IN_CALL_REPLY, CALL_CLEAR_REQUEST pass
- * down to here */
- DEBUGP("altering call id from 0x%04x to 0x%04x\n",
- ntohs(*(u_int16_t *)pptpReq + cid_off), ntohs(new_callid));
-
- /* mangle packet */
- if (ip_nat_mangle_tcp_packet(pskb, ct, ctinfo,
- cid_off + sizeof(struct pptp_pkt_hdr) +
- sizeof(struct PptpControlHeader),
- sizeof(new_callid), (char *)&new_callid,
- sizeof(new_callid)) == 0)
- return NF_DROP;
-
- return NF_ACCEPT;
-}
-
-static int
-pptp_exp_gre(struct ip_conntrack_expect *expect_orig,
- struct ip_conntrack_expect *expect_reply)
-{
- struct ip_ct_pptp_master *ct_pptp_info =
- &expect_orig->master->help.ct_pptp_info;
- struct ip_nat_pptp *nat_pptp_info =
- &expect_orig->master->nat.help.nat_pptp_info;
-
- struct ip_conntrack *ct = expect_orig->master;
-
- struct ip_conntrack_tuple inv_t;
- struct ip_conntrack_tuple *orig_t, *reply_t;
-
- /* save original PAC call ID in nat_info */
- nat_pptp_info->pac_call_id = ct_pptp_info->pac_call_id;
-
- /* alter expectation */
- orig_t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
- reply_t = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
-
- /* alter expectation for PNS->PAC direction */
- invert_tuplepr(&inv_t, &expect_orig->tuple);
- expect_orig->saved_proto.gre.key = htons(ct_pptp_info->pns_call_id);
- expect_orig->tuple.src.u.gre.key = htons(nat_pptp_info->pns_call_id);
- expect_orig->tuple.dst.u.gre.key = htons(ct_pptp_info->pac_call_id);
- expect_orig->dir = IP_CT_DIR_ORIGINAL;
- inv_t.src.ip = reply_t->src.ip;
- inv_t.dst.ip = reply_t->dst.ip;
- inv_t.src.u.gre.key = htons(nat_pptp_info->pac_call_id);
- inv_t.dst.u.gre.key = htons(ct_pptp_info->pns_call_id);
-
- if (!ip_conntrack_expect_related(expect_orig)) {
- DEBUGP("successfully registered expect\n");
- } else {
- DEBUGP("can't expect_related(expect_orig)\n");
- return 1;
- }
-
- /* alter expectation for PAC->PNS direction */
- invert_tuplepr(&inv_t, &expect_reply->tuple);
- expect_reply->saved_proto.gre.key = htons(nat_pptp_info->pns_call_id);
- expect_reply->tuple.src.u.gre.key = htons(nat_pptp_info->pac_call_id);
- expect_reply->tuple.dst.u.gre.key = htons(ct_pptp_info->pns_call_id);
- expect_reply->dir = IP_CT_DIR_REPLY;
- inv_t.src.ip = orig_t->src.ip;
- inv_t.dst.ip = orig_t->dst.ip;
- inv_t.src.u.gre.key = htons(nat_pptp_info->pns_call_id);
- inv_t.dst.u.gre.key = htons(ct_pptp_info->pac_call_id);
-
- if (!ip_conntrack_expect_related(expect_reply)) {
- DEBUGP("successfully registered expect\n");
- } else {
- DEBUGP("can't expect_related(expect_reply)\n");
- ip_conntrack_unexpect_related(expect_orig);
- return 1;
- }
-
- if (ip_ct_gre_keymap_add(ct, &expect_reply->tuple, 0) < 0) {
- DEBUGP("can't register original keymap\n");
- ip_conntrack_unexpect_related(expect_orig);
- ip_conntrack_unexpect_related(expect_reply);
- return 1;
- }
-
- if (ip_ct_gre_keymap_add(ct, &inv_t, 1) < 0) {
- DEBUGP("can't register reply keymap\n");
- ip_conntrack_unexpect_related(expect_orig);
- ip_conntrack_unexpect_related(expect_reply);
- ip_ct_gre_keymap_destroy(ct);
- return 1;
- }
-
- return 0;
-}
-
-/* inbound packets == from PAC to PNS */
-static int
-pptp_inbound_pkt(struct sk_buff **pskb,
- struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- struct PptpControlHeader *ctlh,
- union pptp_ctrl_union *pptpReq)
-{
- struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info;
- u_int16_t msg, new_cid = 0, new_pcid;
- unsigned int pcid_off, cid_off = 0;
-
- new_pcid = htons(nat_pptp_info->pns_call_id);
-
- switch (msg = ntohs(ctlh->messageType)) {
- case PPTP_OUT_CALL_REPLY:
- pcid_off = offsetof(union pptp_ctrl_union, ocack.peersCallID);
- cid_off = offsetof(union pptp_ctrl_union, ocack.callID);
- break;
- case PPTP_IN_CALL_CONNECT:
- pcid_off = offsetof(union pptp_ctrl_union, iccon.peersCallID);
- break;
- case PPTP_IN_CALL_REQUEST:
- /* only need to nat in case PAC is behind NAT box */
- return NF_ACCEPT;
- case PPTP_WAN_ERROR_NOTIFY:
- pcid_off = offsetof(union pptp_ctrl_union, wanerr.peersCallID);
- break;
- case PPTP_CALL_DISCONNECT_NOTIFY:
- pcid_off = offsetof(union pptp_ctrl_union, disc.callID);
- break;
- case PPTP_SET_LINK_INFO:
- pcid_off = offsetof(union pptp_ctrl_union, setlink.peersCallID);
- break;
-
- default:
- DEBUGP("unknown inbound packet %s\n", (msg <= PPTP_MSG_MAX)?
- pptp_msg_name[msg]:pptp_msg_name[0]);
- /* fall through */
-
- case PPTP_START_SESSION_REQUEST:
- case PPTP_START_SESSION_REPLY:
- case PPTP_STOP_SESSION_REQUEST:
- case PPTP_STOP_SESSION_REPLY:
- case PPTP_ECHO_REQUEST:
- case PPTP_ECHO_REPLY:
- /* no need to alter packet */
- return NF_ACCEPT;
- }
-
- /* only OUT_CALL_REPLY, IN_CALL_CONNECT, IN_CALL_REQUEST,
- * WAN_ERROR_NOTIFY, CALL_DISCONNECT_NOTIFY pass down here */
-
- /* mangle packet */
- DEBUGP("altering peer call id from 0x%04x to 0x%04x\n",
- ntohs(*(u_int16_t *)pptpReq + pcid_off), ntohs(new_pcid));
-
- if (ip_nat_mangle_tcp_packet(pskb, ct, ctinfo,
- pcid_off + sizeof(struct pptp_pkt_hdr) +
- sizeof(struct PptpControlHeader),
- sizeof(new_pcid), (char *)&new_pcid,
- sizeof(new_pcid)) == 0)
- return NF_DROP;
-
- if (new_cid) {
- DEBUGP("altering call id from 0x%04x to 0x%04x\n",
- ntohs(*(u_int16_t *)pptpReq + cid_off), ntohs(new_cid));
- if (ip_nat_mangle_tcp_packet(pskb, ct, ctinfo,
- cid_off + sizeof(struct pptp_pkt_hdr) +
- sizeof(struct PptpControlHeader),
- sizeof(new_cid), (char *)&new_cid,
- sizeof(new_cid)) == 0)
- return NF_DROP;
- }
- return NF_ACCEPT;
-}
-
-
-extern int __init ip_nat_proto_gre_init(void);
-extern void __exit ip_nat_proto_gre_fini(void);
-
-static int __init init(void)
-{
- int ret;
-
- DEBUGP("%s: registering NAT helper\n", __FILE__);
-
- ret = ip_nat_proto_gre_init();
- if (ret < 0)
- return ret;
-
- BUG_ON(ip_nat_pptp_hook_outbound);
- ip_nat_pptp_hook_outbound = &pptp_outbound_pkt;
-
- BUG_ON(ip_nat_pptp_hook_inbound);
- ip_nat_pptp_hook_inbound = &pptp_inbound_pkt;
-
- BUG_ON(ip_nat_pptp_hook_exp_gre);
- ip_nat_pptp_hook_exp_gre = &pptp_exp_gre;
-
- BUG_ON(ip_nat_pptp_hook_expectfn);
- ip_nat_pptp_hook_expectfn = &pptp_nat_expected;
-
- printk("ip_nat_pptp version %s loaded\n", IP_NAT_PPTP_VERSION);
- return 0;
-}
-
-static void __exit fini(void)
-{
- DEBUGP("cleanup_module\n" );
-
- ip_nat_pptp_hook_expectfn = NULL;
- ip_nat_pptp_hook_exp_gre = NULL;
- ip_nat_pptp_hook_inbound = NULL;
- ip_nat_pptp_hook_outbound = NULL;
-
- ip_nat_proto_gre_fini();
- /* Make sure noone calls it, meanwhile */
- synchronize_net();
-
- printk("ip_nat_pptp version %s unloaded\n", IP_NAT_PPTP_VERSION);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ip_nat_irc.c b/net/ipv4/netfilter/ip_nat_irc.c
deleted file mode 100644
index 461c833eaca..00000000000
--- a/net/ipv4/netfilter/ip_nat_irc.c
+++ /dev/null
@@ -1,123 +0,0 @@
-/* IRC extension for TCP NAT alteration.
- * (C) 2000-2001 by Harald Welte <laforge@gnumonks.org>
- * (C) 2004 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation
- * based on a copy of RR's ip_nat_ftp.c
- *
- * ip_nat_irc.c,v 1.16 2001/12/06 07:42:10 laforge Exp
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/module.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <linux/kernel.h>
-#include <net/tcp.h>
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_helper.h>
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#include <linux/netfilter_ipv4/ip_conntrack_irc.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/moduleparam.h>
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
-MODULE_DESCRIPTION("IRC (DCC) NAT helper");
-MODULE_LICENSE("GPL");
-
-static unsigned int help(struct sk_buff **pskb,
- enum ip_conntrack_info ctinfo,
- unsigned int matchoff,
- unsigned int matchlen,
- struct ip_conntrack_expect *exp)
-{
- u_int16_t port;
- unsigned int ret;
-
- /* "4294967296 65635 " */
- char buffer[18];
-
- DEBUGP("IRC_NAT: info (seq %u + %u) in %u\n",
- expect->seq, exp_irc_info->len,
- ntohl(tcph->seq));
-
- /* Reply comes from server. */
- exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
- exp->dir = IP_CT_DIR_REPLY;
-
- /* When you see the packet, we need to NAT it the same as the
- * this one. */
- exp->expectfn = ip_nat_follow_master;
-
- /* Try to get same port: if not, try to change it. */
- for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) {
- exp->tuple.dst.u.tcp.port = htons(port);
- if (ip_conntrack_expect_related(exp) == 0)
- break;
- }
-
- if (port == 0)
- return NF_DROP;
-
- /* strlen("\1DCC CHAT chat AAAAAAAA P\1\n")=27
- * strlen("\1DCC SCHAT chat AAAAAAAA P\1\n")=28
- * strlen("\1DCC SEND F AAAAAAAA P S\1\n")=26
- * strlen("\1DCC MOVE F AAAAAAAA P S\1\n")=26
- * strlen("\1DCC TSEND F AAAAAAAA P S\1\n")=27
- * AAAAAAAAA: bound addr (1.0.0.0==16777216, min 8 digits,
- * 255.255.255.255==4294967296, 10 digits)
- * P: bound port (min 1 d, max 5d (65635))
- * F: filename (min 1 d )
- * S: size (min 1 d )
- * 0x01, \n: terminators
- */
-
- /* AAA = "us", ie. where server normally talks to. */
- sprintf(buffer, "%u %u",
- ntohl(exp->master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip),
- port);
- DEBUGP("ip_nat_irc: Inserting '%s' == %u.%u.%u.%u, port %u\n",
- buffer, NIPQUAD(exp->tuple.src.ip), port);
-
- ret = ip_nat_mangle_tcp_packet(pskb, exp->master, ctinfo,
- matchoff, matchlen, buffer,
- strlen(buffer));
- if (ret != NF_ACCEPT)
- ip_conntrack_unexpect_related(exp);
- return ret;
-}
-
-static void __exit fini(void)
-{
- ip_nat_irc_hook = NULL;
- /* Make sure noone calls it, meanwhile. */
- synchronize_net();
-}
-
-static int __init init(void)
-{
- BUG_ON(ip_nat_irc_hook);
- ip_nat_irc_hook = help;
- return 0;
-}
-
-/* Prior to 2.6.11, we had a ports param. No longer, but don't break users. */
-static int warn_set(const char *val, struct kernel_param *kp)
-{
- printk(KERN_INFO KBUILD_MODNAME
- ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n");
- return 0;
-}
-module_param_call(ports, warn_set, NULL, NULL, 0);
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ip_nat_proto_gre.c b/net/ipv4/netfilter/ip_nat_proto_gre.c
deleted file mode 100644
index 6c4899d8046..00000000000
--- a/net/ipv4/netfilter/ip_nat_proto_gre.c
+++ /dev/null
@@ -1,176 +0,0 @@
-/*
- * ip_nat_proto_gre.c - Version 2.0
- *
- * NAT protocol helper module for GRE.
- *
- * GRE is a generic encapsulation protocol, which is generally not very
- * suited for NAT, as it has no protocol-specific part as port numbers.
- *
- * It has an optional key field, which may help us distinguishing two
- * connections between the same two hosts.
- *
- * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784
- *
- * PPTP is built on top of a modified version of GRE, and has a mandatory
- * field called "CallID", which serves us for the same purpose as the key
- * field in plain GRE.
- *
- * Documentation about PPTP can be found in RFC 2637
- *
- * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org>
- *
- * Development of this code funded by Astaro AG (http://www.astaro.com/)
- *
- */
-
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/ip.h>
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#include <linux/netfilter_ipv4/ip_nat_protocol.h>
-#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
-MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE");
-
-#if 0
-#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, \
- __FUNCTION__, ## args)
-#else
-#define DEBUGP(x, args...)
-#endif
-
-/* is key in given range between min and max */
-static int
-gre_in_range(const struct ip_conntrack_tuple *tuple,
- enum ip_nat_manip_type maniptype,
- const union ip_conntrack_manip_proto *min,
- const union ip_conntrack_manip_proto *max)
-{
- u_int32_t key;
-
- if (maniptype == IP_NAT_MANIP_SRC)
- key = tuple->src.u.gre.key;
- else
- key = tuple->dst.u.gre.key;
-
- return ntohl(key) >= ntohl(min->gre.key)
- && ntohl(key) <= ntohl(max->gre.key);
-}
-
-/* generate unique tuple ... */
-static int
-gre_unique_tuple(struct ip_conntrack_tuple *tuple,
- const struct ip_nat_range *range,
- enum ip_nat_manip_type maniptype,
- const struct ip_conntrack *conntrack)
-{
- static u_int16_t key;
- u_int16_t *keyptr;
- unsigned int min, i, range_size;
-
- if (maniptype == IP_NAT_MANIP_SRC)
- keyptr = &tuple->src.u.gre.key;
- else
- keyptr = &tuple->dst.u.gre.key;
-
- if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) {
- DEBUGP("%p: NATing GRE PPTP\n", conntrack);
- min = 1;
- range_size = 0xffff;
- } else {
- min = ntohl(range->min.gre.key);
- range_size = ntohl(range->max.gre.key) - min + 1;
- }
-
- DEBUGP("min = %u, range_size = %u\n", min, range_size);
-
- for (i = 0; i < range_size; i++, key++) {
- *keyptr = htonl(min + key % range_size);
- if (!ip_nat_used_tuple(tuple, conntrack))
- return 1;
- }
-
- DEBUGP("%p: no NAT mapping\n", conntrack);
-
- return 0;
-}
-
-/* manipulate a GRE packet according to maniptype */
-static int
-gre_manip_pkt(struct sk_buff **pskb,
- unsigned int iphdroff,
- const struct ip_conntrack_tuple *tuple,
- enum ip_nat_manip_type maniptype)
-{
- struct gre_hdr *greh;
- struct gre_hdr_pptp *pgreh;
- struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff);
- unsigned int hdroff = iphdroff + iph->ihl*4;
-
- /* pgreh includes two optional 32bit fields which are not required
- * to be there. That's where the magic '8' comes from */
- if (!skb_make_writable(pskb, hdroff + sizeof(*pgreh)-8))
- return 0;
-
- greh = (void *)(*pskb)->data + hdroff;
- pgreh = (struct gre_hdr_pptp *) greh;
-
- /* we only have destination manip of a packet, since 'source key'
- * is not present in the packet itself */
- if (maniptype == IP_NAT_MANIP_DST) {
- /* key manipulation is always dest */
- switch (greh->version) {
- case 0:
- if (!greh->key) {
- DEBUGP("can't nat GRE w/o key\n");
- break;
- }
- if (greh->csum) {
- /* FIXME: Never tested this code... */
- *(gre_csum(greh)) =
- ip_nat_cheat_check(~*(gre_key(greh)),
- tuple->dst.u.gre.key,
- *(gre_csum(greh)));
- }
- *(gre_key(greh)) = tuple->dst.u.gre.key;
- break;
- case GRE_VERSION_PPTP:
- DEBUGP("call_id -> 0x%04x\n",
- ntohs(tuple->dst.u.gre.key));
- pgreh->call_id = tuple->dst.u.gre.key;
- break;
- default:
- DEBUGP("can't nat unknown GRE version\n");
- return 0;
- break;
- }
- }
- return 1;
-}
-
-/* nat helper struct */
-static struct ip_nat_protocol gre = {
- .name = "GRE",
- .protonum = IPPROTO_GRE,
- .manip_pkt = gre_manip_pkt,
- .in_range = gre_in_range,
- .unique_tuple = gre_unique_tuple,
-#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
- defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
- .range_to_nfattr = ip_nat_port_range_to_nfattr,
- .nfattr_to_range = ip_nat_port_nfattr_to_range,
-#endif
-};
-
-int __init ip_nat_proto_gre_init(void)
-{
- return ip_nat_protocol_register(&gre);
-}
-
-void __exit ip_nat_proto_gre_fini(void)
-{
- ip_nat_protocol_unregister(&gre);
-}
diff --git a/net/ipv4/netfilter/ip_nat_proto_icmp.c b/net/ipv4/netfilter/ip_nat_proto_icmp.c
deleted file mode 100644
index 31a3f4ccb99..00000000000
--- a/net/ipv4/netfilter/ip_nat_proto_icmp.c
+++ /dev/null
@@ -1,89 +0,0 @@
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/netfilter.h>
-#include <linux/ip.h>
-#include <linux/icmp.h>
-#include <linux/if.h>
-
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_core.h>
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#include <linux/netfilter_ipv4/ip_nat_protocol.h>
-
-static int
-icmp_in_range(const struct ip_conntrack_tuple *tuple,
- enum ip_nat_manip_type maniptype,
- const union ip_conntrack_manip_proto *min,
- const union ip_conntrack_manip_proto *max)
-{
- return (tuple->src.u.icmp.id >= min->icmp.id
- && tuple->src.u.icmp.id <= max->icmp.id);
-}
-
-static int
-icmp_unique_tuple(struct ip_conntrack_tuple *tuple,
- const struct ip_nat_range *range,
- enum ip_nat_manip_type maniptype,
- const struct ip_conntrack *conntrack)
-{
- static u_int16_t id;
- unsigned int range_size;
- unsigned int i;
-
- range_size = ntohs(range->max.icmp.id) - ntohs(range->min.icmp.id) + 1;
- /* If no range specified... */
- if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED))
- range_size = 0xFFFF;
-
- for (i = 0; i < range_size; i++, id++) {
- tuple->src.u.icmp.id = htons(ntohs(range->min.icmp.id) +
- (id % range_size));
- if (!ip_nat_used_tuple(tuple, conntrack))
- return 1;
- }
- return 0;
-}
-
-static int
-icmp_manip_pkt(struct sk_buff **pskb,
- unsigned int iphdroff,
- const struct ip_conntrack_tuple *tuple,
- enum ip_nat_manip_type maniptype)
-{
- struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff);
- struct icmphdr *hdr;
- unsigned int hdroff = iphdroff + iph->ihl*4;
-
- if (!skb_make_writable(pskb, hdroff + sizeof(*hdr)))
- return 0;
-
- hdr = (struct icmphdr *)((*pskb)->data + hdroff);
-
- hdr->checksum = ip_nat_cheat_check(hdr->un.echo.id ^ 0xFFFF,
- tuple->src.u.icmp.id,
- hdr->checksum);
- hdr->un.echo.id = tuple->src.u.icmp.id;
- return 1;
-}
-
-struct ip_nat_protocol ip_nat_protocol_icmp = {
- .name = "ICMP",
- .protonum = IPPROTO_ICMP,
- .me = THIS_MODULE,
- .manip_pkt = icmp_manip_pkt,
- .in_range = icmp_in_range,
- .unique_tuple = icmp_unique_tuple,
-#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
- defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
- .range_to_nfattr = ip_nat_port_range_to_nfattr,
- .nfattr_to_range = ip_nat_port_nfattr_to_range,
-#endif
-};
diff --git a/net/ipv4/netfilter/ip_nat_proto_tcp.c b/net/ipv4/netfilter/ip_nat_proto_tcp.c
deleted file mode 100644
index a3d14079eba..00000000000
--- a/net/ipv4/netfilter/ip_nat_proto_tcp.c
+++ /dev/null
@@ -1,151 +0,0 @@
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/netfilter.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <linux/if.h>
-#include <linux/netfilter/nfnetlink_conntrack.h>
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#include <linux/netfilter_ipv4/ip_nat_protocol.h>
-#include <linux/netfilter_ipv4/ip_nat_core.h>
-
-static int
-tcp_in_range(const struct ip_conntrack_tuple *tuple,
- enum ip_nat_manip_type maniptype,
- const union ip_conntrack_manip_proto *min,
- const union ip_conntrack_manip_proto *max)
-{
- u_int16_t port;
-
- if (maniptype == IP_NAT_MANIP_SRC)
- port = tuple->src.u.tcp.port;
- else
- port = tuple->dst.u.tcp.port;
-
- return ntohs(port) >= ntohs(min->tcp.port)
- && ntohs(port) <= ntohs(max->tcp.port);
-}
-
-static int
-tcp_unique_tuple(struct ip_conntrack_tuple *tuple,
- const struct ip_nat_range *range,
- enum ip_nat_manip_type maniptype,
- const struct ip_conntrack *conntrack)
-{
- static u_int16_t port;
- u_int16_t *portptr;
- unsigned int range_size, min, i;
-
- if (maniptype == IP_NAT_MANIP_SRC)
- portptr = &tuple->src.u.tcp.port;
- else
- portptr = &tuple->dst.u.tcp.port;
-
- /* If no range specified... */
- if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) {
- /* If it's dst rewrite, can't change port */
- if (maniptype == IP_NAT_MANIP_DST)
- return 0;
-
- /* Map privileged onto privileged. */
- if (ntohs(*portptr) < 1024) {
- /* Loose convention: >> 512 is credential passing */
- if (ntohs(*portptr)<512) {
- min = 1;
- range_size = 511 - min + 1;
- } else {
- min = 600;
- range_size = 1023 - min + 1;
- }
- } else {
- min = 1024;
- range_size = 65535 - 1024 + 1;
- }
- } else {
- min = ntohs(range->min.tcp.port);
- range_size = ntohs(range->max.tcp.port) - min + 1;
- }
-
- for (i = 0; i < range_size; i++, port++) {
- *portptr = htons(min + port % range_size);
- if (!ip_nat_used_tuple(tuple, conntrack)) {
- return 1;
- }
- }
- return 0;
-}
-
-static int
-tcp_manip_pkt(struct sk_buff **pskb,
- unsigned int iphdroff,
- const struct ip_conntrack_tuple *tuple,
- enum ip_nat_manip_type maniptype)
-{
- struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff);
- struct tcphdr *hdr;
- unsigned int hdroff = iphdroff + iph->ihl*4;
- u32 oldip, newip;
- u16 *portptr, newport, oldport;
- int hdrsize = 8; /* TCP connection tracking guarantees this much */
-
- /* this could be a inner header returned in icmp packet; in such
- cases we cannot update the checksum field since it is outside of
- the 8 bytes of transport layer headers we are guaranteed */
- if ((*pskb)->len >= hdroff + sizeof(struct tcphdr))
- hdrsize = sizeof(struct tcphdr);
-
- if (!skb_make_writable(pskb, hdroff + hdrsize))
- return 0;
-
- iph = (struct iphdr *)((*pskb)->data + iphdroff);
- hdr = (struct tcphdr *)((*pskb)->data + hdroff);
-
- if (maniptype == IP_NAT_MANIP_SRC) {
- /* Get rid of src ip and src pt */
- oldip = iph->saddr;
- newip = tuple->src.ip;
- newport = tuple->src.u.tcp.port;
- portptr = &hdr->source;
- } else {
- /* Get rid of dst ip and dst pt */
- oldip = iph->daddr;
- newip = tuple->dst.ip;
- newport = tuple->dst.u.tcp.port;
- portptr = &hdr->dest;
- }
-
- oldport = *portptr;
- *portptr = newport;
-
- if (hdrsize < sizeof(*hdr))
- return 1;
-
- hdr->check = ip_nat_cheat_check(~oldip, newip,
- ip_nat_cheat_check(oldport ^ 0xFFFF,
- newport,
- hdr->check));
- return 1;
-}
-
-struct ip_nat_protocol ip_nat_protocol_tcp = {
- .name = "TCP",
- .protonum = IPPROTO_TCP,
- .me = THIS_MODULE,
- .manip_pkt = tcp_manip_pkt,
- .in_range = tcp_in_range,
- .unique_tuple = tcp_unique_tuple,
-#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
- defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
- .range_to_nfattr = ip_nat_port_range_to_nfattr,
- .nfattr_to_range = ip_nat_port_nfattr_to_range,
-#endif
-};
diff --git a/net/ipv4/netfilter/ip_nat_proto_udp.c b/net/ipv4/netfilter/ip_nat_proto_udp.c
deleted file mode 100644
index ec6053fdc86..00000000000
--- a/net/ipv4/netfilter/ip_nat_proto_udp.c
+++ /dev/null
@@ -1,137 +0,0 @@
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/netfilter.h>
-#include <linux/ip.h>
-#include <linux/udp.h>
-#include <linux/if.h>
-
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_core.h>
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#include <linux/netfilter_ipv4/ip_nat_protocol.h>
-
-static int
-udp_in_range(const struct ip_conntrack_tuple *tuple,
- enum ip_nat_manip_type maniptype,
- const union ip_conntrack_manip_proto *min,
- const union ip_conntrack_manip_proto *max)
-{
- u_int16_t port;
-
- if (maniptype == IP_NAT_MANIP_SRC)
- port = tuple->src.u.udp.port;
- else
- port = tuple->dst.u.udp.port;
-
- return ntohs(port) >= ntohs(min->udp.port)
- && ntohs(port) <= ntohs(max->udp.port);
-}
-
-static int
-udp_unique_tuple(struct ip_conntrack_tuple *tuple,
- const struct ip_nat_range *range,
- enum ip_nat_manip_type maniptype,
- const struct ip_conntrack *conntrack)
-{
- static u_int16_t port;
- u_int16_t *portptr;
- unsigned int range_size, min, i;
-
- if (maniptype == IP_NAT_MANIP_SRC)
- portptr = &tuple->src.u.udp.port;
- else
- portptr = &tuple->dst.u.udp.port;
-
- /* If no range specified... */
- if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) {
- /* If it's dst rewrite, can't change port */
- if (maniptype == IP_NAT_MANIP_DST)
- return 0;
-
- if (ntohs(*portptr) < 1024) {
- /* Loose convention: >> 512 is credential passing */
- if (ntohs(*portptr)<512) {
- min = 1;
- range_size = 511 - min + 1;
- } else {
- min = 600;
- range_size = 1023 - min + 1;
- }
- } else {
- min = 1024;
- range_size = 65535 - 1024 + 1;
- }
- } else {
- min = ntohs(range->min.udp.port);
- range_size = ntohs(range->max.udp.port) - min + 1;
- }
-
- for (i = 0; i < range_size; i++, port++) {
- *portptr = htons(min + port % range_size);
- if (!ip_nat_used_tuple(tuple, conntrack))
- return 1;
- }
- return 0;
-}
-
-static int
-udp_manip_pkt(struct sk_buff **pskb,
- unsigned int iphdroff,
- const struct ip_conntrack_tuple *tuple,
- enum ip_nat_manip_type maniptype)
-{
- struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff);
- struct udphdr *hdr;
- unsigned int hdroff = iphdroff + iph->ihl*4;
- u32 oldip, newip;
- u16 *portptr, newport;
-
- if (!skb_make_writable(pskb, hdroff + sizeof(*hdr)))
- return 0;
-
- iph = (struct iphdr *)((*pskb)->data + iphdroff);
- hdr = (struct udphdr *)((*pskb)->data + hdroff);
-
- if (maniptype == IP_NAT_MANIP_SRC) {
- /* Get rid of src ip and src pt */
- oldip = iph->saddr;
- newip = tuple->src.ip;
- newport = tuple->src.u.udp.port;
- portptr = &hdr->source;
- } else {
- /* Get rid of dst ip and dst pt */
- oldip = iph->daddr;
- newip = tuple->dst.ip;
- newport = tuple->dst.u.udp.port;
- portptr = &hdr->dest;
- }
- if (hdr->check) /* 0 is a special case meaning no checksum */
- hdr->check = ip_nat_cheat_check(~oldip, newip,
- ip_nat_cheat_check(*portptr ^ 0xFFFF,
- newport,
- hdr->check));
- *portptr = newport;
- return 1;
-}
-
-struct ip_nat_protocol ip_nat_protocol_udp = {
- .name = "UDP",
- .protonum = IPPROTO_UDP,
- .me = THIS_MODULE,
- .manip_pkt = udp_manip_pkt,
- .in_range = udp_in_range,
- .unique_tuple = udp_unique_tuple,
-#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
- defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
- .range_to_nfattr = ip_nat_port_range_to_nfattr,
- .nfattr_to_range = ip_nat_port_nfattr_to_range,
-#endif
-};
diff --git a/net/ipv4/netfilter/ip_nat_proto_unknown.c b/net/ipv4/netfilter/ip_nat_proto_unknown.c
deleted file mode 100644
index 3bf04951724..00000000000
--- a/net/ipv4/netfilter/ip_nat_proto_unknown.c
+++ /dev/null
@@ -1,55 +0,0 @@
-/* The "unknown" protocol. This is what is used for protocols we
- * don't understand. It's returned by ip_ct_find_proto().
- */
-
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/init.h>
-#include <linux/netfilter.h>
-#include <linux/if.h>
-
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#include <linux/netfilter_ipv4/ip_nat_protocol.h>
-
-static int unknown_in_range(const struct ip_conntrack_tuple *tuple,
- enum ip_nat_manip_type manip_type,
- const union ip_conntrack_manip_proto *min,
- const union ip_conntrack_manip_proto *max)
-{
- return 1;
-}
-
-static int unknown_unique_tuple(struct ip_conntrack_tuple *tuple,
- const struct ip_nat_range *range,
- enum ip_nat_manip_type maniptype,
- const struct ip_conntrack *conntrack)
-{
- /* Sorry: we can't help you; if it's not unique, we can't frob
- anything. */
- return 0;
-}
-
-static int
-unknown_manip_pkt(struct sk_buff **pskb,
- unsigned int iphdroff,
- const struct ip_conntrack_tuple *tuple,
- enum ip_nat_manip_type maniptype)
-{
- return 1;
-}
-
-struct ip_nat_protocol ip_nat_unknown_protocol = {
- .name = "unknown",
- /* .me isn't set: getting a ref to this cannot fail. */
- .manip_pkt = unknown_manip_pkt,
- .in_range = unknown_in_range,
- .unique_tuple = unknown_unique_tuple,
-};
diff --git a/net/ipv4/netfilter/ip_nat_rule.c b/net/ipv4/netfilter/ip_nat_rule.c
deleted file mode 100644
index 1de86282d23..00000000000
--- a/net/ipv4/netfilter/ip_nat_rule.c
+++ /dev/null
@@ -1,341 +0,0 @@
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-/* Everything about the rules for NAT. */
-#include <linux/types.h>
-#include <linux/ip.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/module.h>
-#include <linux/kmod.h>
-#include <linux/skbuff.h>
-#include <linux/proc_fs.h>
-#include <net/checksum.h>
-#include <net/route.h>
-#include <linux/bitops.h>
-
-#define ASSERT_READ_LOCK(x)
-#define ASSERT_WRITE_LOCK(x)
-
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_core.h>
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#include <linux/netfilter_ipv4/listhelp.h>
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-#define NAT_VALID_HOOKS ((1<<NF_IP_PRE_ROUTING) | (1<<NF_IP_POST_ROUTING) | (1<<NF_IP_LOCAL_OUT))
-
-static struct
-{
- struct ipt_replace repl;
- struct ipt_standard entries[3];
- struct ipt_error term;
-} nat_initial_table __initdata
-= { { "nat", NAT_VALID_HOOKS, 4,
- sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error),
- { [NF_IP_PRE_ROUTING] = 0,
- [NF_IP_POST_ROUTING] = sizeof(struct ipt_standard),
- [NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) * 2 },
- { [NF_IP_PRE_ROUTING] = 0,
- [NF_IP_POST_ROUTING] = sizeof(struct ipt_standard),
- [NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) * 2 },
- 0, NULL, { } },
- {
- /* PRE_ROUTING */
- { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 },
- 0,
- sizeof(struct ipt_entry),
- sizeof(struct ipt_standard),
- 0, { 0, 0 }, { } },
- { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } },
- -NF_ACCEPT - 1 } },
- /* POST_ROUTING */
- { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 },
- 0,
- sizeof(struct ipt_entry),
- sizeof(struct ipt_standard),
- 0, { 0, 0 }, { } },
- { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } },
- -NF_ACCEPT - 1 } },
- /* LOCAL_OUT */
- { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 },
- 0,
- sizeof(struct ipt_entry),
- sizeof(struct ipt_standard),
- 0, { 0, 0 }, { } },
- { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } },
- -NF_ACCEPT - 1 } }
- },
- /* ERROR */
- { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 },
- 0,
- sizeof(struct ipt_entry),
- sizeof(struct ipt_error),
- 0, { 0, 0 }, { } },
- { { { { IPT_ALIGN(sizeof(struct ipt_error_target)), IPT_ERROR_TARGET } },
- { } },
- "ERROR"
- }
- }
-};
-
-static struct ipt_table nat_table = {
- .name = "nat",
- .valid_hooks = NAT_VALID_HOOKS,
- .lock = RW_LOCK_UNLOCKED,
- .me = THIS_MODULE,
- .af = AF_INET,
-};
-
-/* Source NAT */
-static unsigned int ipt_snat_target(struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- unsigned int hooknum,
- const void *targinfo,
- void *userinfo)
-{
- struct ip_conntrack *ct;
- enum ip_conntrack_info ctinfo;
- const struct ip_nat_multi_range_compat *mr = targinfo;
-
- IP_NF_ASSERT(hooknum == NF_IP_POST_ROUTING);
-
- ct = ip_conntrack_get(*pskb, &ctinfo);
-
- /* Connection must be valid and new. */
- IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED
- || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY));
- IP_NF_ASSERT(out);
-
- return ip_nat_setup_info(ct, &mr->range[0], hooknum);
-}
-
-/* Before 2.6.11 we did implicit source NAT if required. Warn about change. */
-static void warn_if_extra_mangle(u32 dstip, u32 srcip)
-{
- static int warned = 0;
- struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dstip } } };
- struct rtable *rt;
-
- if (ip_route_output_key(&rt, &fl) != 0)
- return;
-
- if (rt->rt_src != srcip && !warned) {
- printk("NAT: no longer support implicit source local NAT\n");
- printk("NAT: packet src %u.%u.%u.%u -> dst %u.%u.%u.%u\n",
- NIPQUAD(srcip), NIPQUAD(dstip));
- warned = 1;
- }
- ip_rt_put(rt);
-}
-
-static unsigned int ipt_dnat_target(struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- unsigned int hooknum,
- const void *targinfo,
- void *userinfo)
-{
- struct ip_conntrack *ct;
- enum ip_conntrack_info ctinfo;
- const struct ip_nat_multi_range_compat *mr = targinfo;
-
- IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING
- || hooknum == NF_IP_LOCAL_OUT);
-
- ct = ip_conntrack_get(*pskb, &ctinfo);
-
- /* Connection must be valid and new. */
- IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
-
- if (hooknum == NF_IP_LOCAL_OUT
- && mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)
- warn_if_extra_mangle((*pskb)->nh.iph->daddr,
- mr->range[0].min_ip);
-
- return ip_nat_setup_info(ct, &mr->range[0], hooknum);
-}
-
-static int ipt_snat_checkentry(const char *tablename,
- const void *entry,
- void *targinfo,
- unsigned int targinfosize,
- unsigned int hook_mask)
-{
- struct ip_nat_multi_range_compat *mr = targinfo;
-
- /* Must be a valid range */
- if (mr->rangesize != 1) {
- printk("SNAT: multiple ranges no longer supported\n");
- return 0;
- }
-
- if (targinfosize != IPT_ALIGN(sizeof(struct ip_nat_multi_range_compat))) {
- DEBUGP("SNAT: Target size %u wrong for %u ranges\n",
- targinfosize, mr->rangesize);
- return 0;
- }
-
- /* Only allow these for NAT. */
- if (strcmp(tablename, "nat") != 0) {
- DEBUGP("SNAT: wrong table %s\n", tablename);
- return 0;
- }
-
- if (hook_mask & ~(1 << NF_IP_POST_ROUTING)) {
- DEBUGP("SNAT: hook mask 0x%x bad\n", hook_mask);
- return 0;
- }
- return 1;
-}
-
-static int ipt_dnat_checkentry(const char *tablename,
- const void *entry,
- void *targinfo,
- unsigned int targinfosize,
- unsigned int hook_mask)
-{
- struct ip_nat_multi_range_compat *mr = targinfo;
-
- /* Must be a valid range */
- if (mr->rangesize != 1) {
- printk("DNAT: multiple ranges no longer supported\n");
- return 0;
- }
-
- if (targinfosize != IPT_ALIGN(sizeof(struct ip_nat_multi_range_compat))) {
- DEBUGP("DNAT: Target size %u wrong for %u ranges\n",
- targinfosize, mr->rangesize);
- return 0;
- }
-
- /* Only allow these for NAT. */
- if (strcmp(tablename, "nat") != 0) {
- DEBUGP("DNAT: wrong table %s\n", tablename);
- return 0;
- }
-
- if (hook_mask & ~((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_OUT))) {
- DEBUGP("DNAT: hook mask 0x%x bad\n", hook_mask);
- return 0;
- }
-
- return 1;
-}
-
-inline unsigned int
-alloc_null_binding(struct ip_conntrack *conntrack,
- struct ip_nat_info *info,
- unsigned int hooknum)
-{
- /* Force range to this IP; let proto decide mapping for
- per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED).
- Use reply in case it's already been mangled (eg local packet).
- */
- u_int32_t ip
- = (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC
- ? conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip
- : conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip);
- struct ip_nat_range range
- = { IP_NAT_RANGE_MAP_IPS, ip, ip, { 0 }, { 0 } };
-
- DEBUGP("Allocating NULL binding for %p (%u.%u.%u.%u)\n", conntrack,
- NIPQUAD(ip));
- return ip_nat_setup_info(conntrack, &range, hooknum);
-}
-
-unsigned int
-alloc_null_binding_confirmed(struct ip_conntrack *conntrack,
- struct ip_nat_info *info,
- unsigned int hooknum)
-{
- u_int32_t ip
- = (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC
- ? conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip
- : conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip);
- u_int16_t all
- = (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC
- ? conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.all
- : conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.src.u.all);
- struct ip_nat_range range
- = { IP_NAT_RANGE_MAP_IPS, ip, ip, { all }, { all } };
-
- DEBUGP("Allocating NULL binding for confirmed %p (%u.%u.%u.%u)\n",
- conntrack, NIPQUAD(ip));
- return ip_nat_setup_info(conntrack, &range, hooknum);
-}
-
-int ip_nat_rule_find(struct sk_buff **pskb,
- unsigned int hooknum,
- const struct net_device *in,
- const struct net_device *out,
- struct ip_conntrack *ct,
- struct ip_nat_info *info)
-{
- int ret;
-
- ret = ipt_do_table(pskb, hooknum, in, out, &nat_table, NULL);
-
- if (ret == NF_ACCEPT) {
- if (!ip_nat_initialized(ct, HOOK2MANIP(hooknum)))
- /* NUL mapping */
- ret = alloc_null_binding(ct, info, hooknum);
- }
- return ret;
-}
-
-static struct ipt_target ipt_snat_reg = {
- .name = "SNAT",
- .target = ipt_snat_target,
- .checkentry = ipt_snat_checkentry,
-};
-
-static struct ipt_target ipt_dnat_reg = {
- .name = "DNAT",
- .target = ipt_dnat_target,
- .checkentry = ipt_dnat_checkentry,
-};
-
-int __init ip_nat_rule_init(void)
-{
- int ret;
-
- ret = ipt_register_table(&nat_table, &nat_initial_table.repl);
- if (ret != 0)
- return ret;
- ret = ipt_register_target(&ipt_snat_reg);
- if (ret != 0)
- goto unregister_table;
-
- ret = ipt_register_target(&ipt_dnat_reg);
- if (ret != 0)
- goto unregister_snat;
-
- return ret;
-
- unregister_snat:
- ipt_unregister_target(&ipt_snat_reg);
- unregister_table:
- ipt_unregister_table(&nat_table);
-
- return ret;
-}
-
-void ip_nat_rule_cleanup(void)
-{
- ipt_unregister_target(&ipt_dnat_reg);
- ipt_unregister_target(&ipt_snat_reg);
- ipt_unregister_table(&nat_table);
-}
diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c
deleted file mode 100644
index ad438fb185b..00000000000
--- a/net/ipv4/netfilter/ip_nat_standalone.c
+++ /dev/null
@@ -1,447 +0,0 @@
-/* This file contains all the functions required for the standalone
- ip_nat module.
-
- These are not required by the compatibility layer.
-*/
-
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-/*
- * 23 Apr 2001: Harald Welte <laforge@gnumonks.org>
- * - new API and handling of conntrack/nat helpers
- * - now capable of multiple expectations for one master
- * */
-
-#include <linux/config.h>
-#include <linux/types.h>
-#include <linux/icmp.h>
-#include <linux/ip.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/proc_fs.h>
-#include <net/ip.h>
-#include <net/checksum.h>
-#include <linux/spinlock.h>
-
-#define ASSERT_READ_LOCK(x)
-#define ASSERT_WRITE_LOCK(x)
-
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#include <linux/netfilter_ipv4/ip_nat_protocol.h>
-#include <linux/netfilter_ipv4/ip_nat_core.h>
-#include <linux/netfilter_ipv4/ip_nat_helper.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ip_conntrack_core.h>
-#include <linux/netfilter_ipv4/listhelp.h>
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-#define HOOKNAME(hooknum) ((hooknum) == NF_IP_POST_ROUTING ? "POST_ROUTING" \
- : ((hooknum) == NF_IP_PRE_ROUTING ? "PRE_ROUTING" \
- : ((hooknum) == NF_IP_LOCAL_OUT ? "LOCAL_OUT" \
- : ((hooknum) == NF_IP_LOCAL_IN ? "LOCAL_IN" \
- : "*ERROR*")))
-
-#ifdef CONFIG_XFRM
-static void nat_decode_session(struct sk_buff *skb, struct flowi *fl)
-{
- struct ip_conntrack *ct;
- struct ip_conntrack_tuple *t;
- enum ip_conntrack_info ctinfo;
- enum ip_conntrack_dir dir;
- unsigned long statusbit;
-
- ct = ip_conntrack_get(skb, &ctinfo);
- if (ct == NULL)
- return;
- dir = CTINFO2DIR(ctinfo);
- t = &ct->tuplehash[dir].tuple;
-
- if (dir == IP_CT_DIR_ORIGINAL)
- statusbit = IPS_DST_NAT;
- else
- statusbit = IPS_SRC_NAT;
-
- if (ct->status & statusbit) {
- fl->fl4_dst = t->dst.ip;
- if (t->dst.protonum == IPPROTO_TCP ||
- t->dst.protonum == IPPROTO_UDP)
- fl->fl_ip_dport = t->dst.u.tcp.port;
- }
-
- statusbit ^= IPS_NAT_MASK;
-
- if (ct->status & statusbit) {
- fl->fl4_src = t->src.ip;
- if (t->dst.protonum == IPPROTO_TCP ||
- t->dst.protonum == IPPROTO_UDP)
- fl->fl_ip_sport = t->src.u.tcp.port;
- }
-}
-#endif
-
-static unsigned int
-ip_nat_fn(unsigned int hooknum,
- struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
-{
- struct ip_conntrack *ct;
- enum ip_conntrack_info ctinfo;
- struct ip_nat_info *info;
- /* maniptype == SRC for postrouting. */
- enum ip_nat_manip_type maniptype = HOOK2MANIP(hooknum);
-
- /* We never see fragments: conntrack defrags on pre-routing
- and local-out, and ip_nat_out protects post-routing. */
- IP_NF_ASSERT(!((*pskb)->nh.iph->frag_off
- & htons(IP_MF|IP_OFFSET)));
-
- /* If we had a hardware checksum before, it's now invalid */
- if ((*pskb)->ip_summed == CHECKSUM_HW)
- if (skb_checksum_help(*pskb, (out == NULL)))
- return NF_DROP;
-
- ct = ip_conntrack_get(*pskb, &ctinfo);
- /* Can't track? It's not due to stress, or conntrack would
- have dropped it. Hence it's the user's responsibilty to
- packet filter it out, or implement conntrack/NAT for that
- protocol. 8) --RR */
- if (!ct) {
- /* Exception: ICMP redirect to new connection (not in
- hash table yet). We must not let this through, in
- case we're doing NAT to the same network. */
- if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) {
- struct icmphdr _hdr, *hp;
-
- hp = skb_header_pointer(*pskb,
- (*pskb)->nh.iph->ihl*4,
- sizeof(_hdr), &_hdr);
- if (hp != NULL &&
- hp->type == ICMP_REDIRECT)
- return NF_DROP;
- }
- return NF_ACCEPT;
- }
-
- /* Don't try to NAT if this packet is not conntracked */
- if (ct == &ip_conntrack_untracked)
- return NF_ACCEPT;
-
- switch (ctinfo) {
- case IP_CT_RELATED:
- case IP_CT_RELATED+IP_CT_IS_REPLY:
- if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) {
- if (!ip_nat_icmp_reply_translation(pskb, ct, maniptype,
- CTINFO2DIR(ctinfo)))
- return NF_DROP;
- else
- return NF_ACCEPT;
- }
- /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */
- case IP_CT_NEW:
- info = &ct->nat.info;
-
- /* Seen it before? This can happen for loopback, retrans,
- or local packets.. */
- if (!ip_nat_initialized(ct, maniptype)) {
- unsigned int ret;
-
- if (unlikely(is_confirmed(ct)))
- /* NAT module was loaded late */
- ret = alloc_null_binding_confirmed(ct, info,
- hooknum);
- else if (hooknum == NF_IP_LOCAL_IN)
- /* LOCAL_IN hook doesn't have a chain! */
- ret = alloc_null_binding(ct, info, hooknum);
- else
- ret = ip_nat_rule_find(pskb, hooknum,
- in, out, ct,
- info);
-
- if (ret != NF_ACCEPT) {
- return ret;
- }
- } else
- DEBUGP("Already setup manip %s for ct %p\n",
- maniptype == IP_NAT_MANIP_SRC ? "SRC" : "DST",
- ct);
- break;
-
- default:
- /* ESTABLISHED */
- IP_NF_ASSERT(ctinfo == IP_CT_ESTABLISHED
- || ctinfo == (IP_CT_ESTABLISHED+IP_CT_IS_REPLY));
- info = &ct->nat.info;
- }
-
- IP_NF_ASSERT(info);
- return ip_nat_packet(ct, ctinfo, hooknum, pskb);
-}
-
-static unsigned int
-ip_nat_in(unsigned int hooknum,
- struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
-{
- struct ip_conntrack *ct;
- enum ip_conntrack_info ctinfo;
- unsigned int ret;
-
- ret = ip_nat_fn(hooknum, pskb, in, out, okfn);
- if (ret != NF_DROP && ret != NF_STOLEN
- && (ct = ip_conntrack_get(*pskb, &ctinfo)) != NULL) {
- enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
-
- if (ct->tuplehash[dir].tuple.src.ip !=
- ct->tuplehash[!dir].tuple.dst.ip) {
- dst_release((*pskb)->dst);
- (*pskb)->dst = NULL;
- }
- }
- return ret;
-}
-
-static unsigned int
-ip_nat_out(unsigned int hooknum,
- struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
-{
- struct ip_conntrack *ct;
- enum ip_conntrack_info ctinfo;
- unsigned int ret;
-
- /* root is playing with raw sockets. */
- if ((*pskb)->len < sizeof(struct iphdr)
- || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr))
- return NF_ACCEPT;
-
- ret = ip_nat_fn(hooknum, pskb, in, out, okfn);
- if (ret != NF_DROP && ret != NF_STOLEN
- && (ct = ip_conntrack_get(*pskb, &ctinfo)) != NULL) {
- enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
-
- if (ct->tuplehash[dir].tuple.src.ip !=
- ct->tuplehash[!dir].tuple.dst.ip
-#ifdef CONFIG_XFRM
- || ct->tuplehash[dir].tuple.src.u.all !=
- ct->tuplehash[!dir].tuple.dst.u.all
-#endif
- )
- return ip_route_me_harder(pskb) == 0 ? ret : NF_DROP;
- }
- return ret;
-}
-
-static unsigned int
-ip_nat_local_fn(unsigned int hooknum,
- struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
-{
- struct ip_conntrack *ct;
- enum ip_conntrack_info ctinfo;
- unsigned int ret;
-
- /* root is playing with raw sockets. */
- if ((*pskb)->len < sizeof(struct iphdr)
- || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr))
- return NF_ACCEPT;
-
- ret = ip_nat_fn(hooknum, pskb, in, out, okfn);
- if (ret != NF_DROP && ret != NF_STOLEN
- && (ct = ip_conntrack_get(*pskb, &ctinfo)) != NULL) {
- enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
-
- if (ct->tuplehash[dir].tuple.dst.ip !=
- ct->tuplehash[!dir].tuple.src.ip
-#ifdef CONFIG_XFRM
- || ct->tuplehash[dir].tuple.dst.u.all !=
- ct->tuplehash[dir].tuple.src.u.all
-#endif
- )
- return ip_route_me_harder(pskb) == 0 ? ret : NF_DROP;
- }
- return ret;
-}
-
-static unsigned int
-ip_nat_adjust(unsigned int hooknum,
- struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
-{
- struct ip_conntrack *ct;
- enum ip_conntrack_info ctinfo;
-
- ct = ip_conntrack_get(*pskb, &ctinfo);
- if (ct && test_bit(IPS_SEQ_ADJUST_BIT, &ct->status)) {
- DEBUGP("ip_nat_standalone: adjusting sequence number\n");
- if (!ip_nat_seq_adjust(pskb, ct, ctinfo))
- return NF_DROP;
- }
- return NF_ACCEPT;
-}
-
-/* We must be after connection tracking and before packet filtering. */
-
-/* Before packet filtering, change destination */
-static struct nf_hook_ops ip_nat_in_ops = {
- .hook = ip_nat_in,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_IP_PRE_ROUTING,
- .priority = NF_IP_PRI_NAT_DST,
-};
-
-/* After packet filtering, change source */
-static struct nf_hook_ops ip_nat_out_ops = {
- .hook = ip_nat_out,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_IP_POST_ROUTING,
- .priority = NF_IP_PRI_NAT_SRC,
-};
-
-/* After conntrack, adjust sequence number */
-static struct nf_hook_ops ip_nat_adjust_out_ops = {
- .hook = ip_nat_adjust,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_IP_POST_ROUTING,
- .priority = NF_IP_PRI_NAT_SEQ_ADJUST,
-};
-
-/* Before packet filtering, change destination */
-static struct nf_hook_ops ip_nat_local_out_ops = {
- .hook = ip_nat_local_fn,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_IP_LOCAL_OUT,
- .priority = NF_IP_PRI_NAT_DST,
-};
-
-/* After packet filtering, change source for reply packets of LOCAL_OUT DNAT */
-static struct nf_hook_ops ip_nat_local_in_ops = {
- .hook = ip_nat_fn,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_IP_LOCAL_IN,
- .priority = NF_IP_PRI_NAT_SRC,
-};
-
-/* After conntrack, adjust sequence number */
-static struct nf_hook_ops ip_nat_adjust_in_ops = {
- .hook = ip_nat_adjust,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_IP_LOCAL_IN,
- .priority = NF_IP_PRI_NAT_SEQ_ADJUST,
-};
-
-
-static int init_or_cleanup(int init)
-{
- int ret = 0;
-
- need_conntrack();
-
- if (!init) goto cleanup;
-
-#ifdef CONFIG_XFRM
- BUG_ON(ip_nat_decode_session != NULL);
- ip_nat_decode_session = nat_decode_session;
-#endif
- ret = ip_nat_rule_init();
- if (ret < 0) {
- printk("ip_nat_init: can't setup rules.\n");
- goto cleanup_decode_session;
- }
- ret = nf_register_hook(&ip_nat_in_ops);
- if (ret < 0) {
- printk("ip_nat_init: can't register in hook.\n");
- goto cleanup_rule_init;
- }
- ret = nf_register_hook(&ip_nat_out_ops);
- if (ret < 0) {
- printk("ip_nat_init: can't register out hook.\n");
- goto cleanup_inops;
- }
- ret = nf_register_hook(&ip_nat_adjust_in_ops);
- if (ret < 0) {
- printk("ip_nat_init: can't register adjust in hook.\n");
- goto cleanup_outops;
- }
- ret = nf_register_hook(&ip_nat_adjust_out_ops);
- if (ret < 0) {
- printk("ip_nat_init: can't register adjust out hook.\n");
- goto cleanup_adjustin_ops;
- }
- ret = nf_register_hook(&ip_nat_local_out_ops);
- if (ret < 0) {
- printk("ip_nat_init: can't register local out hook.\n");
- goto cleanup_adjustout_ops;;
- }
- ret = nf_register_hook(&ip_nat_local_in_ops);
- if (ret < 0) {
- printk("ip_nat_init: can't register local in hook.\n");
- goto cleanup_localoutops;
- }
- return ret;
-
- cleanup:
- nf_unregister_hook(&ip_nat_local_in_ops);
- cleanup_localoutops:
- nf_unregister_hook(&ip_nat_local_out_ops);
- cleanup_adjustout_ops:
- nf_unregister_hook(&ip_nat_adjust_out_ops);
- cleanup_adjustin_ops:
- nf_unregister_hook(&ip_nat_adjust_in_ops);
- cleanup_outops:
- nf_unregister_hook(&ip_nat_out_ops);
- cleanup_inops:
- nf_unregister_hook(&ip_nat_in_ops);
- cleanup_rule_init:
- ip_nat_rule_cleanup();
- cleanup_decode_session:
-#ifdef CONFIG_XFRM
- ip_nat_decode_session = NULL;
- synchronize_net();
-#endif
- return ret;
-}
-
-static int __init init(void)
-{
- return init_or_cleanup(1);
-}
-
-static void __exit fini(void)
-{
- init_or_cleanup(0);
-}
-
-module_init(init);
-module_exit(fini);
-
-MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/ip_nat_tftp.c b/net/ipv4/netfilter/ip_nat_tftp.c
deleted file mode 100644
index 43c3bd7c118..00000000000
--- a/net/ipv4/netfilter/ip_nat_tftp.c
+++ /dev/null
@@ -1,71 +0,0 @@
-/* (C) 2001-2002 Magnus Boden <mb@ozaba.mine.nu>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * Version: 0.0.7
- *
- * Thu 21 Mar 2002 Harald Welte <laforge@gnumonks.org>
- * - Port to newnat API
- *
- * This module currently supports DNAT:
- * iptables -t nat -A PREROUTING -d x.x.x.x -j DNAT --to-dest x.x.x.y
- *
- * and SNAT:
- * iptables -t nat -A POSTROUTING { -j MASQUERADE , -j SNAT --to-source x.x.x.x }
- *
- * It has not been tested with
- * -j SNAT --to-source x.x.x.x-x.x.x.y since I only have one external ip
- * If you do test this please let me know if it works or not.
- *
- */
-
-#include <linux/module.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/ip.h>
-#include <linux/udp.h>
-
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_conntrack_tftp.h>
-#include <linux/netfilter_ipv4/ip_nat_helper.h>
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#include <linux/moduleparam.h>
-
-MODULE_AUTHOR("Magnus Boden <mb@ozaba.mine.nu>");
-MODULE_DESCRIPTION("tftp NAT helper");
-MODULE_LICENSE("GPL");
-
-static unsigned int help(struct sk_buff **pskb,
- enum ip_conntrack_info ctinfo,
- struct ip_conntrack_expect *exp)
-{
- struct ip_conntrack *ct = exp->master;
-
- exp->saved_proto.udp.port
- = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port;
- exp->dir = IP_CT_DIR_REPLY;
- exp->expectfn = ip_nat_follow_master;
- if (ip_conntrack_expect_related(exp) != 0)
- return NF_DROP;
- return NF_ACCEPT;
-}
-
-static void __exit fini(void)
-{
- ip_nat_tftp_hook = NULL;
- /* Make sure noone calls it, meanwhile. */
- synchronize_net();
-}
-
-static int __init init(void)
-{
- BUG_ON(ip_nat_tftp_hook);
- ip_nat_tftp_hook = help;
- return 0;
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
deleted file mode 100644
index 36339eb39e1..00000000000
--- a/net/ipv4/netfilter/ip_queue.c
+++ /dev/null
@@ -1,735 +0,0 @@
-/*
- * This is a module which is used for queueing IPv4 packets and
- * communicating with userspace via netlink.
- *
- * (C) 2000-2002 James Morris <jmorris@intercode.com.au>
- * (C) 2003-2005 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * 2000-03-27: Simplified code (thanks to Andi Kleen for clues).
- * 2000-05-20: Fixed notifier problems (following Miguel Freitas' report).
- * 2000-06-19: Fixed so nfmark is copied to metadata (reported by Sebastian
- * Zander).
- * 2000-08-01: Added Nick Williams' MAC support.
- * 2002-06-25: Code cleanup.
- * 2005-01-10: Added /proc counter for dropped packets; fixed so
- * packets aren't delivered to user space if they're going
- * to be dropped.
- * 2005-05-26: local_bh_{disable,enable} around nf_reinject (Harald Welte)
- *
- */
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/init.h>
-#include <linux/ip.h>
-#include <linux/notifier.h>
-#include <linux/netdevice.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4/ip_queue.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netlink.h>
-#include <linux/spinlock.h>
-#include <linux/sysctl.h>
-#include <linux/proc_fs.h>
-#include <linux/security.h>
-#include <net/sock.h>
-#include <net/route.h>
-
-#define IPQ_QMAX_DEFAULT 1024
-#define IPQ_PROC_FS_NAME "ip_queue"
-#define NET_IPQ_QMAX 2088
-#define NET_IPQ_QMAX_NAME "ip_queue_maxlen"
-
-struct ipq_queue_entry {
- struct list_head list;
- struct nf_info *info;
- struct sk_buff *skb;
-};
-
-typedef int (*ipq_cmpfn)(struct ipq_queue_entry *, unsigned long);
-
-static unsigned char copy_mode = IPQ_COPY_NONE;
-static unsigned int queue_maxlen = IPQ_QMAX_DEFAULT;
-static DEFINE_RWLOCK(queue_lock);
-static int peer_pid;
-static unsigned int copy_range;
-static unsigned int queue_total;
-static unsigned int queue_dropped = 0;
-static unsigned int queue_user_dropped = 0;
-static struct sock *ipqnl;
-static LIST_HEAD(queue_list);
-static DECLARE_MUTEX(ipqnl_sem);
-
-static void
-ipq_issue_verdict(struct ipq_queue_entry *entry, int verdict)
-{
- /* TCP input path (and probably other bits) assume to be called
- * from softirq context, not from syscall, like ipq_issue_verdict is
- * called. TCP input path deadlocks with locks taken from timer
- * softirq, e.g. We therefore emulate this by local_bh_disable() */
-
- local_bh_disable();
- nf_reinject(entry->skb, entry->info, verdict);
- local_bh_enable();
-
- kfree(entry);
-}
-
-static inline void
-__ipq_enqueue_entry(struct ipq_queue_entry *entry)
-{
- list_add(&entry->list, &queue_list);
- queue_total++;
-}
-
-/*
- * Find and return a queued entry matched by cmpfn, or return the last
- * entry if cmpfn is NULL.
- */
-static inline struct ipq_queue_entry *
-__ipq_find_entry(ipq_cmpfn cmpfn, unsigned long data)
-{
- struct list_head *p;
-
- list_for_each_prev(p, &queue_list) {
- struct ipq_queue_entry *entry = (struct ipq_queue_entry *)p;
-
- if (!cmpfn || cmpfn(entry, data))
- return entry;
- }
- return NULL;
-}
-
-static inline void
-__ipq_dequeue_entry(struct ipq_queue_entry *entry)
-{
- list_del(&entry->list);
- queue_total--;
-}
-
-static inline struct ipq_queue_entry *
-__ipq_find_dequeue_entry(ipq_cmpfn cmpfn, unsigned long data)
-{
- struct ipq_queue_entry *entry;
-
- entry = __ipq_find_entry(cmpfn, data);
- if (entry == NULL)
- return NULL;
-
- __ipq_dequeue_entry(entry);
- return entry;
-}
-
-
-static inline void
-__ipq_flush(int verdict)
-{
- struct ipq_queue_entry *entry;
-
- while ((entry = __ipq_find_dequeue_entry(NULL, 0)))
- ipq_issue_verdict(entry, verdict);
-}
-
-static inline int
-__ipq_set_mode(unsigned char mode, unsigned int range)
-{
- int status = 0;
-
- switch(mode) {
- case IPQ_COPY_NONE:
- case IPQ_COPY_META:
- copy_mode = mode;
- copy_range = 0;
- break;
-
- case IPQ_COPY_PACKET:
- copy_mode = mode;
- copy_range = range;
- if (copy_range > 0xFFFF)
- copy_range = 0xFFFF;
- break;
-
- default:
- status = -EINVAL;
-
- }
- return status;
-}
-
-static inline void
-__ipq_reset(void)
-{
- peer_pid = 0;
- net_disable_timestamp();
- __ipq_set_mode(IPQ_COPY_NONE, 0);
- __ipq_flush(NF_DROP);
-}
-
-static struct ipq_queue_entry *
-ipq_find_dequeue_entry(ipq_cmpfn cmpfn, unsigned long data)
-{
- struct ipq_queue_entry *entry;
-
- write_lock_bh(&queue_lock);
- entry = __ipq_find_dequeue_entry(cmpfn, data);
- write_unlock_bh(&queue_lock);
- return entry;
-}
-
-static void
-ipq_flush(int verdict)
-{
- write_lock_bh(&queue_lock);
- __ipq_flush(verdict);
- write_unlock_bh(&queue_lock);
-}
-
-static struct sk_buff *
-ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
-{
- unsigned char *old_tail;
- size_t size = 0;
- size_t data_len = 0;
- struct sk_buff *skb;
- struct ipq_packet_msg *pmsg;
- struct nlmsghdr *nlh;
-
- read_lock_bh(&queue_lock);
-
- switch (copy_mode) {
- case IPQ_COPY_META:
- case IPQ_COPY_NONE:
- size = NLMSG_SPACE(sizeof(*pmsg));
- data_len = 0;
- break;
-
- case IPQ_COPY_PACKET:
- if (entry->skb->ip_summed == CHECKSUM_HW &&
- (*errp = skb_checksum_help(entry->skb,
- entry->info->outdev == NULL))) {
- read_unlock_bh(&queue_lock);
- return NULL;
- }
- if (copy_range == 0 || copy_range > entry->skb->len)
- data_len = entry->skb->len;
- else
- data_len = copy_range;
-
- size = NLMSG_SPACE(sizeof(*pmsg) + data_len);
- break;
-
- default:
- *errp = -EINVAL;
- read_unlock_bh(&queue_lock);
- return NULL;
- }
-
- read_unlock_bh(&queue_lock);
-
- skb = alloc_skb(size, GFP_ATOMIC);
- if (!skb)
- goto nlmsg_failure;
-
- old_tail= skb->tail;
- nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh));
- pmsg = NLMSG_DATA(nlh);
- memset(pmsg, 0, sizeof(*pmsg));
-
- pmsg->packet_id = (unsigned long )entry;
- pmsg->data_len = data_len;
- pmsg->timestamp_sec = entry->skb->tstamp.off_sec;
- pmsg->timestamp_usec = entry->skb->tstamp.off_usec;
- pmsg->mark = entry->skb->nfmark;
- pmsg->hook = entry->info->hook;
- pmsg->hw_protocol = entry->skb->protocol;
-
- if (entry->info->indev)
- strcpy(pmsg->indev_name, entry->info->indev->name);
- else
- pmsg->indev_name[0] = '\0';
-
- if (entry->info->outdev)
- strcpy(pmsg->outdev_name, entry->info->outdev->name);
- else
- pmsg->outdev_name[0] = '\0';
-
- if (entry->info->indev && entry->skb->dev) {
- pmsg->hw_type = entry->skb->dev->type;
- if (entry->skb->dev->hard_header_parse)
- pmsg->hw_addrlen =
- entry->skb->dev->hard_header_parse(entry->skb,
- pmsg->hw_addr);
- }
-
- if (data_len)
- if (skb_copy_bits(entry->skb, 0, pmsg->payload, data_len))
- BUG();
-
- nlh->nlmsg_len = skb->tail - old_tail;
- return skb;
-
-nlmsg_failure:
- if (skb)
- kfree_skb(skb);
- *errp = -EINVAL;
- printk(KERN_ERR "ip_queue: error creating packet message\n");
- return NULL;
-}
-
-static int
-ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info,
- unsigned int queuenum, void *data)
-{
- int status = -EINVAL;
- struct sk_buff *nskb;
- struct ipq_queue_entry *entry;
-
- if (copy_mode == IPQ_COPY_NONE)
- return -EAGAIN;
-
- entry = kmalloc(sizeof(*entry), GFP_ATOMIC);
- if (entry == NULL) {
- printk(KERN_ERR "ip_queue: OOM in ipq_enqueue_packet()\n");
- return -ENOMEM;
- }
-
- entry->info = info;
- entry->skb = skb;
-
- nskb = ipq_build_packet_message(entry, &status);
- if (nskb == NULL)
- goto err_out_free;
-
- write_lock_bh(&queue_lock);
-
- if (!peer_pid)
- goto err_out_free_nskb;
-
- if (queue_total >= queue_maxlen) {
- queue_dropped++;
- status = -ENOSPC;
- if (net_ratelimit())
- printk (KERN_WARNING "ip_queue: full at %d entries, "
- "dropping packets(s). Dropped: %d\n", queue_total,
- queue_dropped);
- goto err_out_free_nskb;
- }
-
- /* netlink_unicast will either free the nskb or attach it to a socket */
- status = netlink_unicast(ipqnl, nskb, peer_pid, MSG_DONTWAIT);
- if (status < 0) {
- queue_user_dropped++;
- goto err_out_unlock;
- }
-
- __ipq_enqueue_entry(entry);
-
- write_unlock_bh(&queue_lock);
- return status;
-
-err_out_free_nskb:
- kfree_skb(nskb);
-
-err_out_unlock:
- write_unlock_bh(&queue_lock);
-
-err_out_free:
- kfree(entry);
- return status;
-}
-
-static int
-ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct ipq_queue_entry *e)
-{
- int diff;
- struct iphdr *user_iph = (struct iphdr *)v->payload;
-
- if (v->data_len < sizeof(*user_iph))
- return 0;
- diff = v->data_len - e->skb->len;
- if (diff < 0)
- skb_trim(e->skb, v->data_len);
- else if (diff > 0) {
- if (v->data_len > 0xFFFF)
- return -EINVAL;
- if (diff > skb_tailroom(e->skb)) {
- struct sk_buff *newskb;
-
- newskb = skb_copy_expand(e->skb,
- skb_headroom(e->skb),
- diff,
- GFP_ATOMIC);
- if (newskb == NULL) {
- printk(KERN_WARNING "ip_queue: OOM "
- "in mangle, dropping packet\n");
- return -ENOMEM;
- }
- if (e->skb->sk)
- skb_set_owner_w(newskb, e->skb->sk);
- kfree_skb(e->skb);
- e->skb = newskb;
- }
- skb_put(e->skb, diff);
- }
- if (!skb_make_writable(&e->skb, v->data_len))
- return -ENOMEM;
- memcpy(e->skb->data, v->payload, v->data_len);
- e->skb->ip_summed = CHECKSUM_NONE;
-
- return 0;
-}
-
-static inline int
-id_cmp(struct ipq_queue_entry *e, unsigned long id)
-{
- return (id == (unsigned long )e);
-}
-
-static int
-ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len)
-{
- struct ipq_queue_entry *entry;
-
- if (vmsg->value > NF_MAX_VERDICT)
- return -EINVAL;
-
- entry = ipq_find_dequeue_entry(id_cmp, vmsg->id);
- if (entry == NULL)
- return -ENOENT;
- else {
- int verdict = vmsg->value;
-
- if (vmsg->data_len && vmsg->data_len == len)
- if (ipq_mangle_ipv4(vmsg, entry) < 0)
- verdict = NF_DROP;
-
- ipq_issue_verdict(entry, verdict);
- return 0;
- }
-}
-
-static int
-ipq_set_mode(unsigned char mode, unsigned int range)
-{
- int status;
-
- write_lock_bh(&queue_lock);
- status = __ipq_set_mode(mode, range);
- write_unlock_bh(&queue_lock);
- return status;
-}
-
-static int
-ipq_receive_peer(struct ipq_peer_msg *pmsg,
- unsigned char type, unsigned int len)
-{
- int status = 0;
-
- if (len < sizeof(*pmsg))
- return -EINVAL;
-
- switch (type) {
- case IPQM_MODE:
- status = ipq_set_mode(pmsg->msg.mode.value,
- pmsg->msg.mode.range);
- break;
-
- case IPQM_VERDICT:
- if (pmsg->msg.verdict.value > NF_MAX_VERDICT)
- status = -EINVAL;
- else
- status = ipq_set_verdict(&pmsg->msg.verdict,
- len - sizeof(*pmsg));
- break;
- default:
- status = -EINVAL;
- }
- return status;
-}
-
-static int
-dev_cmp(struct ipq_queue_entry *entry, unsigned long ifindex)
-{
- if (entry->info->indev)
- if (entry->info->indev->ifindex == ifindex)
- return 1;
-
- if (entry->info->outdev)
- if (entry->info->outdev->ifindex == ifindex)
- return 1;
-
- return 0;
-}
-
-static void
-ipq_dev_drop(int ifindex)
-{
- struct ipq_queue_entry *entry;
-
- while ((entry = ipq_find_dequeue_entry(dev_cmp, ifindex)) != NULL)
- ipq_issue_verdict(entry, NF_DROP);
-}
-
-#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)
-
-static inline void
-ipq_rcv_skb(struct sk_buff *skb)
-{
- int status, type, pid, flags, nlmsglen, skblen;
- struct nlmsghdr *nlh;
-
- skblen = skb->len;
- if (skblen < sizeof(*nlh))
- return;
-
- nlh = (struct nlmsghdr *)skb->data;
- nlmsglen = nlh->nlmsg_len;
- if (nlmsglen < sizeof(*nlh) || skblen < nlmsglen)
- return;
-
- pid = nlh->nlmsg_pid;
- flags = nlh->nlmsg_flags;
-
- if(pid <= 0 || !(flags & NLM_F_REQUEST) || flags & NLM_F_MULTI)
- RCV_SKB_FAIL(-EINVAL);
-
- if (flags & MSG_TRUNC)
- RCV_SKB_FAIL(-ECOMM);
-
- type = nlh->nlmsg_type;
- if (type < NLMSG_NOOP || type >= IPQM_MAX)
- RCV_SKB_FAIL(-EINVAL);
-
- if (type <= IPQM_BASE)
- return;
-
- if (security_netlink_recv(skb))
- RCV_SKB_FAIL(-EPERM);
-
- write_lock_bh(&queue_lock);
-
- if (peer_pid) {
- if (peer_pid != pid) {
- write_unlock_bh(&queue_lock);
- RCV_SKB_FAIL(-EBUSY);
- }
- } else {
- net_enable_timestamp();
- peer_pid = pid;
- }
-
- write_unlock_bh(&queue_lock);
-
- status = ipq_receive_peer(NLMSG_DATA(nlh), type,
- skblen - NLMSG_LENGTH(0));
- if (status < 0)
- RCV_SKB_FAIL(status);
-
- if (flags & NLM_F_ACK)
- netlink_ack(skb, nlh, 0);
- return;
-}
-
-static void
-ipq_rcv_sk(struct sock *sk, int len)
-{
- struct sk_buff *skb;
- unsigned int qlen;
-
- down(&ipqnl_sem);
-
- for (qlen = skb_queue_len(&sk->sk_receive_queue); qlen; qlen--) {
- skb = skb_dequeue(&sk->sk_receive_queue);
- ipq_rcv_skb(skb);
- kfree_skb(skb);
- }
-
- up(&ipqnl_sem);
-}
-
-static int
-ipq_rcv_dev_event(struct notifier_block *this,
- unsigned long event, void *ptr)
-{
- struct net_device *dev = ptr;
-
- /* Drop any packets associated with the downed device */
- if (event == NETDEV_DOWN)
- ipq_dev_drop(dev->ifindex);
- return NOTIFY_DONE;
-}
-
-static struct notifier_block ipq_dev_notifier = {
- .notifier_call = ipq_rcv_dev_event,
-};
-
-static int
-ipq_rcv_nl_event(struct notifier_block *this,
- unsigned long event, void *ptr)
-{
- struct netlink_notify *n = ptr;
-
- if (event == NETLINK_URELEASE &&
- n->protocol == NETLINK_FIREWALL && n->pid) {
- write_lock_bh(&queue_lock);
- if (n->pid == peer_pid)
- __ipq_reset();
- write_unlock_bh(&queue_lock);
- }
- return NOTIFY_DONE;
-}
-
-static struct notifier_block ipq_nl_notifier = {
- .notifier_call = ipq_rcv_nl_event,
-};
-
-static struct ctl_table_header *ipq_sysctl_header;
-
-static ctl_table ipq_table[] = {
- {
- .ctl_name = NET_IPQ_QMAX,
- .procname = NET_IPQ_QMAX_NAME,
- .data = &queue_maxlen,
- .maxlen = sizeof(queue_maxlen),
- .mode = 0644,
- .proc_handler = proc_dointvec
- },
- { .ctl_name = 0 }
-};
-
-static ctl_table ipq_dir_table[] = {
- {
- .ctl_name = NET_IPV4,
- .procname = "ipv4",
- .mode = 0555,
- .child = ipq_table
- },
- { .ctl_name = 0 }
-};
-
-static ctl_table ipq_root_table[] = {
- {
- .ctl_name = CTL_NET,
- .procname = "net",
- .mode = 0555,
- .child = ipq_dir_table
- },
- { .ctl_name = 0 }
-};
-
-#ifdef CONFIG_PROC_FS
-static int
-ipq_get_info(char *buffer, char **start, off_t offset, int length)
-{
- int len;
-
- read_lock_bh(&queue_lock);
-
- len = sprintf(buffer,
- "Peer PID : %d\n"
- "Copy mode : %hu\n"
- "Copy range : %u\n"
- "Queue length : %u\n"
- "Queue max. length : %u\n"
- "Queue dropped : %u\n"
- "Netlink dropped : %u\n",
- peer_pid,
- copy_mode,
- copy_range,
- queue_total,
- queue_maxlen,
- queue_dropped,
- queue_user_dropped);
-
- read_unlock_bh(&queue_lock);
-
- *start = buffer + offset;
- len -= offset;
- if (len > length)
- len = length;
- else if (len < 0)
- len = 0;
- return len;
-}
-#endif /* CONFIG_PROC_FS */
-
-static struct nf_queue_handler nfqh = {
- .name = "ip_queue",
- .outfn = &ipq_enqueue_packet,
-};
-
-static int
-init_or_cleanup(int init)
-{
- int status = -ENOMEM;
- struct proc_dir_entry *proc;
-
- if (!init)
- goto cleanup;
-
- netlink_register_notifier(&ipq_nl_notifier);
- ipqnl = netlink_kernel_create(NETLINK_FIREWALL, 0, ipq_rcv_sk,
- THIS_MODULE);
- if (ipqnl == NULL) {
- printk(KERN_ERR "ip_queue: failed to create netlink socket\n");
- goto cleanup_netlink_notifier;
- }
-
- proc = proc_net_create(IPQ_PROC_FS_NAME, 0, ipq_get_info);
- if (proc)
- proc->owner = THIS_MODULE;
- else {
- printk(KERN_ERR "ip_queue: failed to create proc entry\n");
- goto cleanup_ipqnl;
- }
-
- register_netdevice_notifier(&ipq_dev_notifier);
- ipq_sysctl_header = register_sysctl_table(ipq_root_table, 0);
-
- status = nf_register_queue_handler(PF_INET, &nfqh);
- if (status < 0) {
- printk(KERN_ERR "ip_queue: failed to register queue handler\n");
- goto cleanup_sysctl;
- }
- return status;
-
-cleanup:
- nf_unregister_queue_handlers(&nfqh);
- synchronize_net();
- ipq_flush(NF_DROP);
-
-cleanup_sysctl:
- unregister_sysctl_table(ipq_sysctl_header);
- unregister_netdevice_notifier(&ipq_dev_notifier);
- proc_net_remove(IPQ_PROC_FS_NAME);
-
-cleanup_ipqnl:
- sock_release(ipqnl->sk_socket);
- down(&ipqnl_sem);
- up(&ipqnl_sem);
-
-cleanup_netlink_notifier:
- netlink_unregister_notifier(&ipq_nl_notifier);
- return status;
-}
-
-static int __init init(void)
-{
-
- return init_or_cleanup(1);
-}
-
-static void __exit fini(void)
-{
- init_or_cleanup(0);
-}
-
-MODULE_DESCRIPTION("IPv4 packet queue handler");
-MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
-MODULE_LICENSE("GPL");
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 2371b2062c2..99e810f8467 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -3,18 +3,13 @@
*
* Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
* Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org>
+ * Copyright (C) 2006-2010 Patrick McHardy <kaber@trash.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
- *
- * 19 Jan 2002 Harald Welte <laforge@gnumonks.org>
- * - increase module usage count as soon as we have rules inside
- * a table
- * 08 Oct 2005 Harald Welte <lafore@netfilter.org>
- * - Generalize into "x_tables" layer and "{ip,ip6,arp}_tables"
*/
-#include <linux/config.h>
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/cache.h>
#include <linux/capability.h>
#include <linux/skbuff.h>
@@ -24,14 +19,17 @@
#include <linux/module.h>
#include <linux/icmp.h>
#include <net/ip.h>
+#include <net/compat.h>
#include <asm/uaccess.h>
-#include <asm/semaphore.h>
+#include <linux/mutex.h>
#include <linux/proc_fs.h>
#include <linux/err.h>
#include <linux/cpumask.h>
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter_ipv4/ip_tables.h>
+#include <net/netfilter/nf_log.h>
+#include "../../netfilter/xt_repldata.h"
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
@@ -42,24 +40,19 @@ MODULE_DESCRIPTION("IPv4 packet filter");
/*#define DEBUG_IP_FIREWALL_USER*/
#ifdef DEBUG_IP_FIREWALL
-#define dprintf(format, args...) printk(format , ## args)
+#define dprintf(format, args...) pr_info(format , ## args)
#else
#define dprintf(format, args...)
#endif
#ifdef DEBUG_IP_FIREWALL_USER
-#define duprintf(format, args...) printk(format , ## args)
+#define duprintf(format, args...) pr_info(format , ## args)
#else
#define duprintf(format, args...)
#endif
#ifdef CONFIG_NETFILTER_DEBUG
-#define IP_NF_ASSERT(x) \
-do { \
- if (!(x)) \
- printk("IP_NF_ASSERT: %s:%s:%u\n", \
- __FUNCTION__, __FILE__, __LINE__); \
-} while(0)
+#define IP_NF_ASSERT(x) WARN_ON(!(x))
#else
#define IP_NF_ASSERT(x)
#endif
@@ -70,81 +63,65 @@ do { \
#define inline
#endif
-/*
- We keep a set of rules for each CPU, so we can avoid write-locking
- them in the softirq when updating the counters and therefore
- only need to read-lock in the softirq; doing a write_lock_bh() in user
- context stops packets coming through and allows user context to read
- the counters or update the rules.
-
- Hence the start of any table is given by get_table() below. */
+void *ipt_alloc_initial_table(const struct xt_table *info)
+{
+ return xt_alloc_initial_table(ipt, IPT);
+}
+EXPORT_SYMBOL_GPL(ipt_alloc_initial_table);
/* Returns whether matches rule or not. */
-static inline int
+/* Performance critical - called for every packet */
+static inline bool
ip_packet_match(const struct iphdr *ip,
const char *indev,
const char *outdev,
const struct ipt_ip *ipinfo,
int isfrag)
{
- size_t i;
unsigned long ret;
-#define FWINV(bool,invflg) ((bool) ^ !!(ipinfo->invflags & invflg))
+#define FWINV(bool, invflg) ((bool) ^ !!(ipinfo->invflags & (invflg)))
if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
- IPT_INV_SRCIP)
- || FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
- IPT_INV_DSTIP)) {
+ IPT_INV_SRCIP) ||
+ FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
+ IPT_INV_DSTIP)) {
dprintf("Source or dest mismatch.\n");
- dprintf("SRC: %u.%u.%u.%u. Mask: %u.%u.%u.%u. Target: %u.%u.%u.%u.%s\n",
- NIPQUAD(ip->saddr),
- NIPQUAD(ipinfo->smsk.s_addr),
- NIPQUAD(ipinfo->src.s_addr),
+ dprintf("SRC: %pI4. Mask: %pI4. Target: %pI4.%s\n",
+ &ip->saddr, &ipinfo->smsk.s_addr, &ipinfo->src.s_addr,
ipinfo->invflags & IPT_INV_SRCIP ? " (INV)" : "");
- dprintf("DST: %u.%u.%u.%u Mask: %u.%u.%u.%u Target: %u.%u.%u.%u.%s\n",
- NIPQUAD(ip->daddr),
- NIPQUAD(ipinfo->dmsk.s_addr),
- NIPQUAD(ipinfo->dst.s_addr),
+ dprintf("DST: %pI4 Mask: %pI4 Target: %pI4.%s\n",
+ &ip->daddr, &ipinfo->dmsk.s_addr, &ipinfo->dst.s_addr,
ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : "");
- return 0;
+ return false;
}
- /* Look for ifname matches; this should unroll nicely. */
- for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
- ret |= (((const unsigned long *)indev)[i]
- ^ ((const unsigned long *)ipinfo->iniface)[i])
- & ((const unsigned long *)ipinfo->iniface_mask)[i];
- }
+ ret = ifname_compare_aligned(indev, ipinfo->iniface, ipinfo->iniface_mask);
if (FWINV(ret != 0, IPT_INV_VIA_IN)) {
dprintf("VIA in mismatch (%s vs %s).%s\n",
indev, ipinfo->iniface,
ipinfo->invflags&IPT_INV_VIA_IN ?" (INV)":"");
- return 0;
+ return false;
}
- for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
- ret |= (((const unsigned long *)outdev)[i]
- ^ ((const unsigned long *)ipinfo->outiface)[i])
- & ((const unsigned long *)ipinfo->outiface_mask)[i];
- }
+ ret = ifname_compare_aligned(outdev, ipinfo->outiface, ipinfo->outiface_mask);
if (FWINV(ret != 0, IPT_INV_VIA_OUT)) {
dprintf("VIA out mismatch (%s vs %s).%s\n",
outdev, ipinfo->outiface,
ipinfo->invflags&IPT_INV_VIA_OUT ?" (INV)":"");
- return 0;
+ return false;
}
/* Check specific protocol */
- if (ipinfo->proto
- && FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) {
+ if (ipinfo->proto &&
+ FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) {
dprintf("Packet protocol %hi does not match %hi.%s\n",
ip->protocol, ipinfo->proto,
ipinfo->invflags&IPT_INV_PROTO ? " (INV)":"");
- return 0;
+ return false;
}
/* If we have a fragment rule but the packet is not a fragment
@@ -152,88 +129,183 @@ ip_packet_match(const struct iphdr *ip,
if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) {
dprintf("Fragment rule but not fragment.%s\n",
ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : "");
- return 0;
+ return false;
}
- return 1;
+ return true;
}
-static inline int
+static bool
ip_checkentry(const struct ipt_ip *ip)
{
if (ip->flags & ~IPT_F_MASK) {
duprintf("Unknown flag bits set: %08X\n",
ip->flags & ~IPT_F_MASK);
- return 0;
+ return false;
}
if (ip->invflags & ~IPT_INV_MASK) {
duprintf("Unknown invflag bits set: %08X\n",
ip->invflags & ~IPT_INV_MASK);
- return 0;
+ return false;
}
- return 1;
+ return true;
}
static unsigned int
-ipt_error(struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- unsigned int hooknum,
- const void *targinfo,
- void *userinfo)
+ipt_error(struct sk_buff *skb, const struct xt_action_param *par)
{
- if (net_ratelimit())
- printk("ip_tables: error: `%s'\n", (char *)targinfo);
+ net_info_ratelimited("error: `%s'\n", (const char *)par->targinfo);
return NF_DROP;
}
-static inline
-int do_match(struct ipt_entry_match *m,
- const struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- int offset,
- int *hotdrop)
+/* Performance critical */
+static inline struct ipt_entry *
+get_entry(const void *base, unsigned int offset)
+{
+ return (struct ipt_entry *)(base + offset);
+}
+
+/* All zeroes == unconditional rule. */
+/* Mildly perf critical (only if packet tracing is on) */
+static inline bool unconditional(const struct ipt_ip *ip)
{
- /* Stop iteration if it doesn't match */
- if (!m->u.kernel.match->match(skb, in, out, m->data, offset,
- skb->nh.iph->ihl*4, hotdrop))
+ static const struct ipt_ip uncond;
+
+ return memcmp(ip, &uncond, sizeof(uncond)) == 0;
+#undef FWINV
+}
+
+/* for const-correctness */
+static inline const struct xt_entry_target *
+ipt_get_target_c(const struct ipt_entry *e)
+{
+ return ipt_get_target((struct ipt_entry *)e);
+}
+
+#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
+static const char *const hooknames[] = {
+ [NF_INET_PRE_ROUTING] = "PREROUTING",
+ [NF_INET_LOCAL_IN] = "INPUT",
+ [NF_INET_FORWARD] = "FORWARD",
+ [NF_INET_LOCAL_OUT] = "OUTPUT",
+ [NF_INET_POST_ROUTING] = "POSTROUTING",
+};
+
+enum nf_ip_trace_comments {
+ NF_IP_TRACE_COMMENT_RULE,
+ NF_IP_TRACE_COMMENT_RETURN,
+ NF_IP_TRACE_COMMENT_POLICY,
+};
+
+static const char *const comments[] = {
+ [NF_IP_TRACE_COMMENT_RULE] = "rule",
+ [NF_IP_TRACE_COMMENT_RETURN] = "return",
+ [NF_IP_TRACE_COMMENT_POLICY] = "policy",
+};
+
+static struct nf_loginfo trace_loginfo = {
+ .type = NF_LOG_TYPE_LOG,
+ .u = {
+ .log = {
+ .level = 4,
+ .logflags = NF_LOG_MASK,
+ },
+ },
+};
+
+/* Mildly perf critical (only if packet tracing is on) */
+static inline int
+get_chainname_rulenum(const struct ipt_entry *s, const struct ipt_entry *e,
+ const char *hookname, const char **chainname,
+ const char **comment, unsigned int *rulenum)
+{
+ const struct xt_standard_target *t = (void *)ipt_get_target_c(s);
+
+ if (strcmp(t->target.u.kernel.target->name, XT_ERROR_TARGET) == 0) {
+ /* Head of user chain: ERROR target with chainname */
+ *chainname = t->target.data;
+ (*rulenum) = 0;
+ } else if (s == e) {
+ (*rulenum)++;
+
+ if (s->target_offset == sizeof(struct ipt_entry) &&
+ strcmp(t->target.u.kernel.target->name,
+ XT_STANDARD_TARGET) == 0 &&
+ t->verdict < 0 &&
+ unconditional(&s->ip)) {
+ /* Tail of chains: STANDARD target (return/policy) */
+ *comment = *chainname == hookname
+ ? comments[NF_IP_TRACE_COMMENT_POLICY]
+ : comments[NF_IP_TRACE_COMMENT_RETURN];
+ }
return 1;
- else
- return 0;
+ } else
+ (*rulenum)++;
+
+ return 0;
}
-static inline struct ipt_entry *
-get_entry(void *base, unsigned int offset)
+static void trace_packet(const struct sk_buff *skb,
+ unsigned int hook,
+ const struct net_device *in,
+ const struct net_device *out,
+ const char *tablename,
+ const struct xt_table_info *private,
+ const struct ipt_entry *e)
{
- return (struct ipt_entry *)(base + offset);
+ const void *table_base;
+ const struct ipt_entry *root;
+ const char *hookname, *chainname, *comment;
+ const struct ipt_entry *iter;
+ unsigned int rulenum = 0;
+ struct net *net = dev_net(in ? in : out);
+
+ table_base = private->entries[smp_processor_id()];
+ root = get_entry(table_base, private->hook_entry[hook]);
+
+ hookname = chainname = hooknames[hook];
+ comment = comments[NF_IP_TRACE_COMMENT_RULE];
+
+ xt_entry_foreach(iter, root, private->size - private->hook_entry[hook])
+ if (get_chainname_rulenum(iter, e, hookname,
+ &chainname, &comment, &rulenum) != 0)
+ break;
+
+ nf_log_packet(net, AF_INET, hook, skb, in, out, &trace_loginfo,
+ "TRACE: %s:%s:%s:%u ",
+ tablename, chainname, comment, rulenum);
+}
+#endif
+
+static inline __pure
+struct ipt_entry *ipt_next_entry(const struct ipt_entry *entry)
+{
+ return (void *)entry + entry->next_offset;
}
/* Returns one of the generic firewall policies, like NF_ACCEPT. */
unsigned int
-ipt_do_table(struct sk_buff **pskb,
+ipt_do_table(struct sk_buff *skb,
unsigned int hook,
const struct net_device *in,
const struct net_device *out,
- struct ipt_table *table,
- void *userdata)
+ struct xt_table *table)
{
static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
- u_int16_t offset;
- struct iphdr *ip;
- u_int16_t datalen;
- int hotdrop = 0;
+ const struct iphdr *ip;
/* Initializing verdict to NF_DROP keeps gcc happy. */
unsigned int verdict = NF_DROP;
const char *indev, *outdev;
- void *table_base;
- struct ipt_entry *e, *back;
- struct xt_table_info *private = table->private;
+ const void *table_base;
+ struct ipt_entry *e, **jumpstack;
+ unsigned int *stackptr, origptr, cpu;
+ const struct xt_table_info *private;
+ struct xt_action_param acpar;
+ unsigned int addend;
/* Initialization */
- ip = (*pskb)->nh.iph;
- datalen = (*pskb)->len - ip->ihl * 4;
+ ip = ip_hdr(skb);
indev = in ? in->name : nulldevname;
outdev = out ? out->name : nulldevname;
/* We handle fragments by dealing with the first fragment as
@@ -242,138 +314,144 @@ ipt_do_table(struct sk_buff **pskb,
* things we don't know, ie. tcp syn flag or ports). If the
* rule is also a fragment-specific rule, non-fragments won't
* match it. */
- offset = ntohs(ip->frag_off) & IP_OFFSET;
+ acpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET;
+ acpar.thoff = ip_hdrlen(skb);
+ acpar.hotdrop = false;
+ acpar.in = in;
+ acpar.out = out;
+ acpar.family = NFPROTO_IPV4;
+ acpar.hooknum = hook;
- read_lock_bh(&table->lock);
IP_NF_ASSERT(table->valid_hooks & (1 << hook));
- table_base = (void *)private->entries[smp_processor_id()];
+ local_bh_disable();
+ addend = xt_write_recseq_begin();
+ private = table->private;
+ cpu = smp_processor_id();
+ /*
+ * Ensure we load private-> members after we've fetched the base
+ * pointer.
+ */
+ smp_read_barrier_depends();
+ table_base = private->entries[cpu];
+ jumpstack = (struct ipt_entry **)private->jumpstack[cpu];
+ stackptr = per_cpu_ptr(private->stackptr, cpu);
+ origptr = *stackptr;
+
e = get_entry(table_base, private->hook_entry[hook]);
- /* For return from builtin chain */
- back = get_entry(table_base, private->underflow[hook]);
+ pr_debug("Entering %s(hook %u); sp at %u (UF %p)\n",
+ table->name, hook, origptr,
+ get_entry(table_base, private->underflow[hook]));
do {
+ const struct xt_entry_target *t;
+ const struct xt_entry_match *ematch;
+
IP_NF_ASSERT(e);
- IP_NF_ASSERT(back);
- if (ip_packet_match(ip, indev, outdev, &e->ip, offset)) {
- struct ipt_entry_target *t;
+ if (!ip_packet_match(ip, indev, outdev,
+ &e->ip, acpar.fragoff)) {
+ no_match:
+ e = ipt_next_entry(e);
+ continue;
+ }
- if (IPT_MATCH_ITERATE(e, do_match,
- *pskb, in, out,
- offset, &hotdrop) != 0)
+ xt_ematch_foreach(ematch, e) {
+ acpar.match = ematch->u.kernel.match;
+ acpar.matchinfo = ematch->data;
+ if (!acpar.match->match(skb, &acpar))
goto no_match;
+ }
- ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
+ ADD_COUNTER(e->counters, skb->len, 1);
- t = ipt_get_target(e);
- IP_NF_ASSERT(t->u.kernel.target);
- /* Standard target? */
- if (!t->u.kernel.target->target) {
- int v;
+ t = ipt_get_target(e);
+ IP_NF_ASSERT(t->u.kernel.target);
- v = ((struct ipt_standard_target *)t)->verdict;
- if (v < 0) {
- /* Pop from stack? */
- if (v != IPT_RETURN) {
- verdict = (unsigned)(-v) - 1;
- break;
- }
- e = back;
- back = get_entry(table_base,
- back->comefrom);
- continue;
+#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
+ /* The packet is traced: log it */
+ if (unlikely(skb->nf_trace))
+ trace_packet(skb, hook, in, out,
+ table->name, private, e);
+#endif
+ /* Standard target? */
+ if (!t->u.kernel.target->target) {
+ int v;
+
+ v = ((struct xt_standard_target *)t)->verdict;
+ if (v < 0) {
+ /* Pop from stack? */
+ if (v != XT_RETURN) {
+ verdict = (unsigned int)(-v) - 1;
+ break;
}
- if (table_base + v != (void *)e + e->next_offset
- && !(e->ip.flags & IPT_F_GOTO)) {
- /* Save old back ptr in next entry */
- struct ipt_entry *next
- = (void *)e + e->next_offset;
- next->comefrom
- = (void *)back - table_base;
- /* set back pointer to next entry */
- back = next;
+ if (*stackptr <= origptr) {
+ e = get_entry(table_base,
+ private->underflow[hook]);
+ pr_debug("Underflow (this is normal) "
+ "to %p\n", e);
+ } else {
+ e = jumpstack[--*stackptr];
+ pr_debug("Pulled %p out from pos %u\n",
+ e, *stackptr);
+ e = ipt_next_entry(e);
}
-
- e = get_entry(table_base, v);
- } else {
- /* Targets which reenter must return
- abs. verdicts */
-#ifdef CONFIG_NETFILTER_DEBUG
- ((struct ipt_entry *)table_base)->comefrom
- = 0xeeeeeeec;
-#endif
- verdict = t->u.kernel.target->target(pskb,
- in, out,
- hook,
- t->data,
- userdata);
-
-#ifdef CONFIG_NETFILTER_DEBUG
- if (((struct ipt_entry *)table_base)->comefrom
- != 0xeeeeeeec
- && verdict == IPT_CONTINUE) {
- printk("Target %s reentered!\n",
- t->u.kernel.target->name);
+ continue;
+ }
+ if (table_base + v != ipt_next_entry(e) &&
+ !(e->ip.flags & IPT_F_GOTO)) {
+ if (*stackptr >= private->stacksize) {
verdict = NF_DROP;
- }
- ((struct ipt_entry *)table_base)->comefrom
- = 0x57acc001;
-#endif
- /* Target might have changed stuff. */
- ip = (*pskb)->nh.iph;
- datalen = (*pskb)->len - ip->ihl * 4;
-
- if (verdict == IPT_CONTINUE)
- e = (void *)e + e->next_offset;
- else
- /* Verdict */
break;
+ }
+ jumpstack[(*stackptr)++] = e;
+ pr_debug("Pushed %p into pos %u\n",
+ e, *stackptr - 1);
}
- } else {
- no_match:
- e = (void *)e + e->next_offset;
+ e = get_entry(table_base, v);
+ continue;
}
- } while (!hotdrop);
- read_unlock_bh(&table->lock);
+ acpar.target = t->u.kernel.target;
+ acpar.targinfo = t->data;
+
+ verdict = t->u.kernel.target->target(skb, &acpar);
+ /* Target might have changed stuff. */
+ ip = ip_hdr(skb);
+ if (verdict == XT_CONTINUE)
+ e = ipt_next_entry(e);
+ else
+ /* Verdict */
+ break;
+ } while (!acpar.hotdrop);
+ pr_debug("Exiting %s; resetting sp from %u to %u\n",
+ __func__, *stackptr, origptr);
+ *stackptr = origptr;
+ xt_write_recseq_end(addend);
+ local_bh_enable();
#ifdef DEBUG_ALLOW_ALL
return NF_ACCEPT;
#else
- if (hotdrop)
+ if (acpar.hotdrop)
return NF_DROP;
else return verdict;
#endif
}
-/* All zeroes == unconditional rule. */
-static inline int
-unconditional(const struct ipt_ip *ip)
-{
- unsigned int i;
-
- for (i = 0; i < sizeof(*ip)/sizeof(__u32); i++)
- if (((__u32 *)ip)[i])
- return 0;
-
- return 1;
-}
-
/* Figures out from what hook each rule can be called: returns 0 if
there are loops. Puts hook bitmask in comefrom. */
static int
-mark_source_chains(struct xt_table_info *newinfo,
+mark_source_chains(const struct xt_table_info *newinfo,
unsigned int valid_hooks, void *entry0)
{
unsigned int hook;
/* No recursion; use packet counter to save back ptrs (reset
to 0 as we leave), and comefrom to save source hook bitmask */
- for (hook = 0; hook < NF_IP_NUMHOOKS; hook++) {
+ for (hook = 0; hook < NF_INET_NUMHOOKS; hook++) {
unsigned int pos = newinfo->hook_entry[hook];
- struct ipt_entry *e
- = (struct ipt_entry *)(entry0 + pos);
+ struct ipt_entry *e = (struct ipt_entry *)(entry0 + pos);
if (!(valid_hooks & (1 << hook)))
continue;
@@ -382,32 +460,41 @@ mark_source_chains(struct xt_table_info *newinfo,
e->counters.pcnt = pos;
for (;;) {
- struct ipt_standard_target *t
- = (void *)ipt_get_target(e);
+ const struct xt_standard_target *t
+ = (void *)ipt_get_target_c(e);
+ int visited = e->comefrom & (1 << hook);
- if (e->comefrom & (1 << NF_IP_NUMHOOKS)) {
- printk("iptables: loop hook %u pos %u %08X.\n",
+ if (e->comefrom & (1 << NF_INET_NUMHOOKS)) {
+ pr_err("iptables: loop hook %u pos %u %08X.\n",
hook, pos, e->comefrom);
return 0;
}
- e->comefrom
- |= ((1 << hook) | (1 << NF_IP_NUMHOOKS));
+ e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS));
/* Unconditional return/END. */
- if (e->target_offset == sizeof(struct ipt_entry)
- && (strcmp(t->target.u.user.name,
- IPT_STANDARD_TARGET) == 0)
- && t->verdict < 0
- && unconditional(&e->ip)) {
+ if ((e->target_offset == sizeof(struct ipt_entry) &&
+ (strcmp(t->target.u.user.name,
+ XT_STANDARD_TARGET) == 0) &&
+ t->verdict < 0 && unconditional(&e->ip)) ||
+ visited) {
unsigned int oldpos, size;
+ if ((strcmp(t->target.u.user.name,
+ XT_STANDARD_TARGET) == 0) &&
+ t->verdict < -NF_MAX_VERDICT - 1) {
+ duprintf("mark_source_chains: bad "
+ "negative verdict (%i)\n",
+ t->verdict);
+ return 0;
+ }
+
/* Return: backtrack through the last
big jump. */
do {
- e->comefrom ^= (1<<NF_IP_NUMHOOKS);
+ e->comefrom ^= (1<<NF_INET_NUMHOOKS);
#ifdef DEBUG_IP_FIREWALL_USER
if (e->comefrom
- & (1 << NF_IP_NUMHOOKS)) {
+ & (1 << NF_INET_NUMHOOKS)) {
duprintf("Back unset "
"on hook %u "
"rule %u\n",
@@ -436,8 +523,15 @@ mark_source_chains(struct xt_table_info *newinfo,
int newpos = t->verdict;
if (strcmp(t->target.u.user.name,
- IPT_STANDARD_TARGET) == 0
- && newpos >= 0) {
+ XT_STANDARD_TARGET) == 0 &&
+ newpos >= 0) {
+ if (newpos > newinfo->size -
+ sizeof(struct ipt_entry)) {
+ duprintf("mark_source_chains: "
+ "bad verdict (%i)\n",
+ newpos);
+ return 0;
+ }
/* This a jump; chase it. */
duprintf("Jump rule %u -> %u\n",
pos, newpos);
@@ -457,220 +551,257 @@ mark_source_chains(struct xt_table_info *newinfo,
return 1;
}
-static inline int
-cleanup_match(struct ipt_entry_match *m, unsigned int *i)
+static void cleanup_match(struct xt_entry_match *m, struct net *net)
{
- if (i && (*i)-- == 0)
- return 1;
-
- if (m->u.kernel.match->destroy)
- m->u.kernel.match->destroy(m->data,
- m->u.match_size - sizeof(*m));
- module_put(m->u.kernel.match->me);
- return 0;
+ struct xt_mtdtor_param par;
+
+ par.net = net;
+ par.match = m->u.kernel.match;
+ par.matchinfo = m->data;
+ par.family = NFPROTO_IPV4;
+ if (par.match->destroy != NULL)
+ par.match->destroy(&par);
+ module_put(par.match->me);
}
-static inline int
-standard_check(const struct ipt_entry_target *t,
- unsigned int max_offset)
+static int
+check_entry(const struct ipt_entry *e, const char *name)
{
- struct ipt_standard_target *targ = (void *)t;
+ const struct xt_entry_target *t;
- /* Check standard info. */
- if (t->u.target_size
- != IPT_ALIGN(sizeof(struct ipt_standard_target))) {
- duprintf("standard_check: target size %u != %u\n",
- t->u.target_size,
- IPT_ALIGN(sizeof(struct ipt_standard_target)));
- return 0;
+ if (!ip_checkentry(&e->ip)) {
+ duprintf("ip check failed %p %s.\n", e, name);
+ return -EINVAL;
}
- if (targ->verdict >= 0
- && targ->verdict > max_offset - sizeof(struct ipt_entry)) {
- duprintf("ipt_standard_check: bad verdict (%i)\n",
- targ->verdict);
- return 0;
- }
+ if (e->target_offset + sizeof(struct xt_entry_target) >
+ e->next_offset)
+ return -EINVAL;
+
+ t = ipt_get_target_c(e);
+ if (e->target_offset + t->u.target_size > e->next_offset)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int
+check_match(struct xt_entry_match *m, struct xt_mtchk_param *par)
+{
+ const struct ipt_ip *ip = par->entryinfo;
+ int ret;
- if (targ->verdict < -NF_MAX_VERDICT - 1) {
- duprintf("ipt_standard_check: bad negative verdict (%i)\n",
- targ->verdict);
- return 0;
+ par->match = m->u.kernel.match;
+ par->matchinfo = m->data;
+
+ ret = xt_check_match(par, m->u.match_size - sizeof(*m),
+ ip->proto, ip->invflags & IPT_INV_PROTO);
+ if (ret < 0) {
+ duprintf("check failed for `%s'.\n", par->match->name);
+ return ret;
}
- return 1;
+ return 0;
}
-static inline int
-check_match(struct ipt_entry_match *m,
- const char *name,
- const struct ipt_ip *ip,
- unsigned int hookmask,
- unsigned int *i)
+static int
+find_check_match(struct xt_entry_match *m, struct xt_mtchk_param *par)
{
- struct ipt_match *match;
+ struct xt_match *match;
+ int ret;
- match = try_then_request_module(xt_find_match(AF_INET, m->u.user.name,
- m->u.user.revision),
- "ipt_%s", m->u.user.name);
- if (IS_ERR(match) || !match) {
- duprintf("check_match: `%s' not found\n", m->u.user.name);
- return match ? PTR_ERR(match) : -ENOENT;
+ match = xt_request_find_match(NFPROTO_IPV4, m->u.user.name,
+ m->u.user.revision);
+ if (IS_ERR(match)) {
+ duprintf("find_check_match: `%s' not found\n", m->u.user.name);
+ return PTR_ERR(match);
}
m->u.kernel.match = match;
- if (m->u.kernel.match->checkentry
- && !m->u.kernel.match->checkentry(name, ip, m->data,
- m->u.match_size - sizeof(*m),
- hookmask)) {
- module_put(m->u.kernel.match->me);
- duprintf("ip_tables: check failed for `%s'.\n",
- m->u.kernel.match->name);
- return -EINVAL;
- }
+ ret = check_match(m, par);
+ if (ret)
+ goto err;
- (*i)++;
return 0;
+err:
+ module_put(m->u.kernel.match->me);
+ return ret;
}
-static struct ipt_target ipt_standard_target;
+static int check_target(struct ipt_entry *e, struct net *net, const char *name)
+{
+ struct xt_entry_target *t = ipt_get_target(e);
+ struct xt_tgchk_param par = {
+ .net = net,
+ .table = name,
+ .entryinfo = e,
+ .target = t->u.kernel.target,
+ .targinfo = t->data,
+ .hook_mask = e->comefrom,
+ .family = NFPROTO_IPV4,
+ };
+ int ret;
-static inline int
-check_entry(struct ipt_entry *e, const char *name, unsigned int size,
- unsigned int *i)
+ ret = xt_check_target(&par, t->u.target_size - sizeof(*t),
+ e->ip.proto, e->ip.invflags & IPT_INV_PROTO);
+ if (ret < 0) {
+ duprintf("check failed for `%s'.\n",
+ t->u.kernel.target->name);
+ return ret;
+ }
+ return 0;
+}
+
+static int
+find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
+ unsigned int size)
{
- struct ipt_entry_target *t;
- struct ipt_target *target;
+ struct xt_entry_target *t;
+ struct xt_target *target;
int ret;
unsigned int j;
+ struct xt_mtchk_param mtpar;
+ struct xt_entry_match *ematch;
- if (!ip_checkentry(&e->ip)) {
- duprintf("ip_tables: ip check failed %p %s.\n", e, name);
- return -EINVAL;
- }
+ ret = check_entry(e, name);
+ if (ret)
+ return ret;
j = 0;
- ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, e->comefrom, &j);
- if (ret != 0)
- goto cleanup_matches;
+ mtpar.net = net;
+ mtpar.table = name;
+ mtpar.entryinfo = &e->ip;
+ mtpar.hook_mask = e->comefrom;
+ mtpar.family = NFPROTO_IPV4;
+ xt_ematch_foreach(ematch, e) {
+ ret = find_check_match(ematch, &mtpar);
+ if (ret != 0)
+ goto cleanup_matches;
+ ++j;
+ }
t = ipt_get_target(e);
- target = try_then_request_module(xt_find_target(AF_INET,
- t->u.user.name,
- t->u.user.revision),
- "ipt_%s", t->u.user.name);
- if (IS_ERR(target) || !target) {
- duprintf("check_entry: `%s' not found\n", t->u.user.name);
- ret = target ? PTR_ERR(target) : -ENOENT;
+ target = xt_request_find_target(NFPROTO_IPV4, t->u.user.name,
+ t->u.user.revision);
+ if (IS_ERR(target)) {
+ duprintf("find_check_entry: `%s' not found\n", t->u.user.name);
+ ret = PTR_ERR(target);
goto cleanup_matches;
}
t->u.kernel.target = target;
- if (t->u.kernel.target == &ipt_standard_target) {
- if (!standard_check(t, size)) {
- ret = -EINVAL;
- goto cleanup_matches;
- }
- } else if (t->u.kernel.target->checkentry
- && !t->u.kernel.target->checkentry(name, e, t->data,
- t->u.target_size
- - sizeof(*t),
- e->comefrom)) {
- module_put(t->u.kernel.target->me);
- duprintf("ip_tables: check failed for `%s'.\n",
- t->u.kernel.target->name);
- ret = -EINVAL;
- goto cleanup_matches;
- }
-
- (*i)++;
+ ret = check_target(e, net, name);
+ if (ret)
+ goto err;
return 0;
-
+ err:
+ module_put(t->u.kernel.target->me);
cleanup_matches:
- IPT_MATCH_ITERATE(e, cleanup_match, &j);
+ xt_ematch_foreach(ematch, e) {
+ if (j-- == 0)
+ break;
+ cleanup_match(ematch, net);
+ }
return ret;
}
-static inline int
+static bool check_underflow(const struct ipt_entry *e)
+{
+ const struct xt_entry_target *t;
+ unsigned int verdict;
+
+ if (!unconditional(&e->ip))
+ return false;
+ t = ipt_get_target_c(e);
+ if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0)
+ return false;
+ verdict = ((struct xt_standard_target *)t)->verdict;
+ verdict = -verdict - 1;
+ return verdict == NF_DROP || verdict == NF_ACCEPT;
+}
+
+static int
check_entry_size_and_hooks(struct ipt_entry *e,
struct xt_table_info *newinfo,
- unsigned char *base,
- unsigned char *limit,
+ const unsigned char *base,
+ const unsigned char *limit,
const unsigned int *hook_entries,
const unsigned int *underflows,
- unsigned int *i)
+ unsigned int valid_hooks)
{
unsigned int h;
- if ((unsigned long)e % __alignof__(struct ipt_entry) != 0
- || (unsigned char *)e + sizeof(struct ipt_entry) >= limit) {
+ if ((unsigned long)e % __alignof__(struct ipt_entry) != 0 ||
+ (unsigned char *)e + sizeof(struct ipt_entry) >= limit) {
duprintf("Bad offset %p\n", e);
return -EINVAL;
}
if (e->next_offset
- < sizeof(struct ipt_entry) + sizeof(struct ipt_entry_target)) {
+ < sizeof(struct ipt_entry) + sizeof(struct xt_entry_target)) {
duprintf("checking: element %p size %u\n",
e, e->next_offset);
return -EINVAL;
}
/* Check hooks & underflows */
- for (h = 0; h < NF_IP_NUMHOOKS; h++) {
+ for (h = 0; h < NF_INET_NUMHOOKS; h++) {
+ if (!(valid_hooks & (1 << h)))
+ continue;
if ((unsigned char *)e - base == hook_entries[h])
newinfo->hook_entry[h] = hook_entries[h];
- if ((unsigned char *)e - base == underflows[h])
+ if ((unsigned char *)e - base == underflows[h]) {
+ if (!check_underflow(e)) {
+ pr_err("Underflows must be unconditional and "
+ "use the STANDARD target with "
+ "ACCEPT/DROP\n");
+ return -EINVAL;
+ }
newinfo->underflow[h] = underflows[h];
+ }
}
- /* FIXME: underflows must be unconditional, standard verdicts
- < 0 (not IPT_RETURN). --RR */
-
/* Clear counters and comefrom */
e->counters = ((struct xt_counters) { 0, 0 });
e->comefrom = 0;
-
- (*i)++;
return 0;
}
-static inline int
-cleanup_entry(struct ipt_entry *e, unsigned int *i)
+static void
+cleanup_entry(struct ipt_entry *e, struct net *net)
{
- struct ipt_entry_target *t;
-
- if (i && (*i)-- == 0)
- return 1;
+ struct xt_tgdtor_param par;
+ struct xt_entry_target *t;
+ struct xt_entry_match *ematch;
/* Cleanup all matches */
- IPT_MATCH_ITERATE(e, cleanup_match, NULL);
+ xt_ematch_foreach(ematch, e)
+ cleanup_match(ematch, net);
t = ipt_get_target(e);
- if (t->u.kernel.target->destroy)
- t->u.kernel.target->destroy(t->data,
- t->u.target_size - sizeof(*t));
- module_put(t->u.kernel.target->me);
- return 0;
+
+ par.net = net;
+ par.target = t->u.kernel.target;
+ par.targinfo = t->data;
+ par.family = NFPROTO_IPV4;
+ if (par.target->destroy != NULL)
+ par.target->destroy(&par);
+ module_put(par.target->me);
}
/* Checks and translates the user-supplied table segment (held in
newinfo) */
static int
-translate_table(const char *name,
- unsigned int valid_hooks,
- struct xt_table_info *newinfo,
- void *entry0,
- unsigned int size,
- unsigned int number,
- const unsigned int *hook_entries,
- const unsigned int *underflows)
+translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
+ const struct ipt_replace *repl)
{
+ struct ipt_entry *iter;
unsigned int i;
- int ret;
+ int ret = 0;
- newinfo->size = size;
- newinfo->number = number;
+ newinfo->size = repl->size;
+ newinfo->number = repl->num_entries;
/* Init all hooks to impossible value. */
- for (i = 0; i < NF_IP_NUMHOOKS; i++) {
+ for (i = 0; i < NF_INET_NUMHOOKS; i++) {
newinfo->hook_entry[i] = 0xFFFFFFFF;
newinfo->underflow[i] = 0xFFFFFFFF;
}
@@ -678,54 +809,66 @@ translate_table(const char *name,
duprintf("translate_table: size %u\n", newinfo->size);
i = 0;
/* Walk through entries, checking offsets. */
- ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,
- check_entry_size_and_hooks,
- newinfo,
- entry0,
- entry0 + size,
- hook_entries, underflows, &i);
- if (ret != 0)
- return ret;
+ xt_entry_foreach(iter, entry0, newinfo->size) {
+ ret = check_entry_size_and_hooks(iter, newinfo, entry0,
+ entry0 + repl->size,
+ repl->hook_entry,
+ repl->underflow,
+ repl->valid_hooks);
+ if (ret != 0)
+ return ret;
+ ++i;
+ if (strcmp(ipt_get_target(iter)->u.user.name,
+ XT_ERROR_TARGET) == 0)
+ ++newinfo->stacksize;
+ }
- if (i != number) {
+ if (i != repl->num_entries) {
duprintf("translate_table: %u not %u entries\n",
- i, number);
+ i, repl->num_entries);
return -EINVAL;
}
/* Check hooks all assigned */
- for (i = 0; i < NF_IP_NUMHOOKS; i++) {
+ for (i = 0; i < NF_INET_NUMHOOKS; i++) {
/* Only hooks which are valid */
- if (!(valid_hooks & (1 << i)))
+ if (!(repl->valid_hooks & (1 << i)))
continue;
if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
duprintf("Invalid hook entry %u %u\n",
- i, hook_entries[i]);
+ i, repl->hook_entry[i]);
return -EINVAL;
}
if (newinfo->underflow[i] == 0xFFFFFFFF) {
duprintf("Invalid underflow %u %u\n",
- i, underflows[i]);
+ i, repl->underflow[i]);
return -EINVAL;
}
}
- if (!mark_source_chains(newinfo, valid_hooks, entry0))
+ if (!mark_source_chains(newinfo, repl->valid_hooks, entry0))
return -ELOOP;
/* Finally, each sanity check must pass */
i = 0;
- ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,
- check_entry, name, size, &i);
+ xt_entry_foreach(iter, entry0, newinfo->size) {
+ ret = find_check_entry(iter, net, repl->name, repl->size);
+ if (ret != 0)
+ break;
+ ++i;
+ }
if (ret != 0) {
- IPT_ENTRY_ITERATE(entry0, newinfo->size,
- cleanup_entry, &i);
+ xt_entry_foreach(iter, entry0, newinfo->size) {
+ if (i-- == 0)
+ break;
+ cleanup_entry(iter, net);
+ }
return ret;
}
/* And one copy for every other CPU */
- for_each_cpu(i) {
+ for_each_possible_cpu(i) {
if (newinfo->entries[i] && newinfo->entries[i] != entry0)
memcpy(newinfo->entries[i], entry0, newinfo->size);
}
@@ -733,95 +876,75 @@ translate_table(const char *name,
return ret;
}
-/* Gets counters. */
-static inline int
-add_entry_to_counter(const struct ipt_entry *e,
- struct xt_counters total[],
- unsigned int *i)
-{
- ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
-
- (*i)++;
- return 0;
-}
-
-static inline int
-set_entry_to_counter(const struct ipt_entry *e,
- struct ipt_counters total[],
- unsigned int *i)
-{
- SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
-
- (*i)++;
- return 0;
-}
-
static void
get_counters(const struct xt_table_info *t,
struct xt_counters counters[])
{
+ struct ipt_entry *iter;
unsigned int cpu;
unsigned int i;
- unsigned int curcpu;
- /* Instead of clearing (by a previous call to memset())
- * the counters and using adds, we set the counters
- * with data used by 'current' CPU
- * We dont care about preemption here.
- */
- curcpu = raw_smp_processor_id();
+ for_each_possible_cpu(cpu) {
+ seqcount_t *s = &per_cpu(xt_recseq, cpu);
- i = 0;
- IPT_ENTRY_ITERATE(t->entries[curcpu],
- t->size,
- set_entry_to_counter,
- counters,
- &i);
-
- for_each_cpu(cpu) {
- if (cpu == curcpu)
- continue;
i = 0;
- IPT_ENTRY_ITERATE(t->entries[cpu],
- t->size,
- add_entry_to_counter,
- counters,
- &i);
+ xt_entry_foreach(iter, t->entries[cpu], t->size) {
+ u64 bcnt, pcnt;
+ unsigned int start;
+
+ do {
+ start = read_seqcount_begin(s);
+ bcnt = iter->counters.bcnt;
+ pcnt = iter->counters.pcnt;
+ } while (read_seqcount_retry(s, start));
+
+ ADD_COUNTER(counters[i], bcnt, pcnt);
+ ++i; /* macro does multi eval of i */
+ }
}
}
-static int
-copy_entries_to_user(unsigned int total_size,
- struct ipt_table *table,
- void __user *userptr)
+static struct xt_counters *alloc_counters(const struct xt_table *table)
{
- unsigned int off, num, countersize;
- struct ipt_entry *e;
+ unsigned int countersize;
struct xt_counters *counters;
- struct xt_table_info *private = table->private;
- int ret = 0;
- void *loc_cpu_entry;
+ const struct xt_table_info *private = table->private;
/* We need atomic snapshot of counters: rest doesn't change
(other than comefrom, which userspace doesn't care
about). */
countersize = sizeof(struct xt_counters) * private->number;
- counters = vmalloc_node(countersize, numa_node_id());
+ counters = vzalloc(countersize);
if (counters == NULL)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
- /* First, sum counters... */
- write_lock_bh(&table->lock);
get_counters(private, counters);
- write_unlock_bh(&table->lock);
+
+ return counters;
+}
+
+static int
+copy_entries_to_user(unsigned int total_size,
+ const struct xt_table *table,
+ void __user *userptr)
+{
+ unsigned int off, num;
+ const struct ipt_entry *e;
+ struct xt_counters *counters;
+ const struct xt_table_info *private = table->private;
+ int ret = 0;
+ const void *loc_cpu_entry;
+
+ counters = alloc_counters(table);
+ if (IS_ERR(counters))
+ return PTR_ERR(counters);
/* choose the copy that is on our node/cpu, ...
* This choice is lazy (because current thread is
* allowed to migrate to another cpu)
*/
loc_cpu_entry = private->entries[raw_smp_processor_id()];
- /* ... then copy entire thing ... */
if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
ret = -EFAULT;
goto free_counters;
@@ -831,8 +954,8 @@ copy_entries_to_user(unsigned int total_size,
/* ... then go back and fix counters and names */
for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
unsigned int i;
- struct ipt_entry_match *m;
- struct ipt_entry_target *t;
+ const struct xt_entry_match *m;
+ const struct xt_entry_target *t;
e = (struct ipt_entry *)(loc_cpu_entry + off);
if (copy_to_user(userptr + off
@@ -849,7 +972,7 @@ copy_entries_to_user(unsigned int total_size,
m = (void *)e + i;
if (copy_to_user(userptr + off + i
- + offsetof(struct ipt_entry_match,
+ + offsetof(struct xt_entry_match,
u.user.name),
m->u.kernel.match->name,
strlen(m->u.kernel.match->name)+1)
@@ -859,9 +982,9 @@ copy_entries_to_user(unsigned int total_size,
}
}
- t = ipt_get_target(e);
+ t = ipt_get_target_c(e);
if (copy_to_user(userptr + off + e->target_offset
- + offsetof(struct ipt_entry_target,
+ + offsetof(struct xt_entry_target,
u.user.name),
t->u.kernel.target->name,
strlen(t->u.kernel.target->name)+1) != 0) {
@@ -875,26 +998,172 @@ copy_entries_to_user(unsigned int total_size,
return ret;
}
+#ifdef CONFIG_COMPAT
+static void compat_standard_from_user(void *dst, const void *src)
+{
+ int v = *(compat_int_t *)src;
+
+ if (v > 0)
+ v += xt_compat_calc_jump(AF_INET, v);
+ memcpy(dst, &v, sizeof(v));
+}
+
+static int compat_standard_to_user(void __user *dst, const void *src)
+{
+ compat_int_t cv = *(int *)src;
+
+ if (cv > 0)
+ cv -= xt_compat_calc_jump(AF_INET, cv);
+ return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0;
+}
+
+static int compat_calc_entry(const struct ipt_entry *e,
+ const struct xt_table_info *info,
+ const void *base, struct xt_table_info *newinfo)
+{
+ const struct xt_entry_match *ematch;
+ const struct xt_entry_target *t;
+ unsigned int entry_offset;
+ int off, i, ret;
+
+ off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
+ entry_offset = (void *)e - base;
+ xt_ematch_foreach(ematch, e)
+ off += xt_compat_match_offset(ematch->u.kernel.match);
+ t = ipt_get_target_c(e);
+ off += xt_compat_target_offset(t->u.kernel.target);
+ newinfo->size -= off;
+ ret = xt_compat_add_offset(AF_INET, entry_offset, off);
+ if (ret)
+ return ret;
+
+ for (i = 0; i < NF_INET_NUMHOOKS; i++) {
+ if (info->hook_entry[i] &&
+ (e < (struct ipt_entry *)(base + info->hook_entry[i])))
+ newinfo->hook_entry[i] -= off;
+ if (info->underflow[i] &&
+ (e < (struct ipt_entry *)(base + info->underflow[i])))
+ newinfo->underflow[i] -= off;
+ }
+ return 0;
+}
+
+static int compat_table_info(const struct xt_table_info *info,
+ struct xt_table_info *newinfo)
+{
+ struct ipt_entry *iter;
+ void *loc_cpu_entry;
+ int ret;
+
+ if (!newinfo || !info)
+ return -EINVAL;
+
+ /* we dont care about newinfo->entries[] */
+ memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
+ newinfo->initial_entries = 0;
+ loc_cpu_entry = info->entries[raw_smp_processor_id()];
+ xt_compat_init_offsets(AF_INET, info->number);
+ xt_entry_foreach(iter, loc_cpu_entry, info->size) {
+ ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
+ if (ret != 0)
+ return ret;
+ }
+ return 0;
+}
+#endif
+
+static int get_info(struct net *net, void __user *user,
+ const int *len, int compat)
+{
+ char name[XT_TABLE_MAXNAMELEN];
+ struct xt_table *t;
+ int ret;
+
+ if (*len != sizeof(struct ipt_getinfo)) {
+ duprintf("length %u != %zu\n", *len,
+ sizeof(struct ipt_getinfo));
+ return -EINVAL;
+ }
+
+ if (copy_from_user(name, user, sizeof(name)) != 0)
+ return -EFAULT;
+
+ name[XT_TABLE_MAXNAMELEN-1] = '\0';
+#ifdef CONFIG_COMPAT
+ if (compat)
+ xt_compat_lock(AF_INET);
+#endif
+ t = try_then_request_module(xt_find_table_lock(net, AF_INET, name),
+ "iptable_%s", name);
+ if (!IS_ERR_OR_NULL(t)) {
+ struct ipt_getinfo info;
+ const struct xt_table_info *private = t->private;
+#ifdef CONFIG_COMPAT
+ struct xt_table_info tmp;
+
+ if (compat) {
+ ret = compat_table_info(private, &tmp);
+ xt_compat_flush_offsets(AF_INET);
+ private = &tmp;
+ }
+#endif
+ memset(&info, 0, sizeof(info));
+ info.valid_hooks = t->valid_hooks;
+ memcpy(info.hook_entry, private->hook_entry,
+ sizeof(info.hook_entry));
+ memcpy(info.underflow, private->underflow,
+ sizeof(info.underflow));
+ info.num_entries = private->number;
+ info.size = private->size;
+ strcpy(info.name, name);
+
+ if (copy_to_user(user, &info, *len) != 0)
+ ret = -EFAULT;
+ else
+ ret = 0;
+
+ xt_table_unlock(t);
+ module_put(t->me);
+ } else
+ ret = t ? PTR_ERR(t) : -ENOENT;
+#ifdef CONFIG_COMPAT
+ if (compat)
+ xt_compat_unlock(AF_INET);
+#endif
+ return ret;
+}
+
static int
-get_entries(const struct ipt_get_entries *entries,
- struct ipt_get_entries __user *uptr)
+get_entries(struct net *net, struct ipt_get_entries __user *uptr,
+ const int *len)
{
int ret;
- struct ipt_table *t;
-
- t = xt_find_table_lock(AF_INET, entries->name);
- if (t && !IS_ERR(t)) {
- struct xt_table_info *private = t->private;
- duprintf("t->private->number = %u\n",
- private->number);
- if (entries->size == private->size)
+ struct ipt_get_entries get;
+ struct xt_table *t;
+
+ if (*len < sizeof(get)) {
+ duprintf("get_entries: %u < %zu\n", *len, sizeof(get));
+ return -EINVAL;
+ }
+ if (copy_from_user(&get, uptr, sizeof(get)) != 0)
+ return -EFAULT;
+ if (*len != sizeof(struct ipt_get_entries) + get.size) {
+ duprintf("get_entries: %u != %zu\n",
+ *len, sizeof(get) + get.size);
+ return -EINVAL;
+ }
+
+ t = xt_find_table_lock(net, AF_INET, get.name);
+ if (!IS_ERR_OR_NULL(t)) {
+ const struct xt_table_info *private = t->private;
+ duprintf("t->private->number = %u\n", private->number);
+ if (get.size == private->size)
ret = copy_entries_to_user(private->size,
t, uptr->entrytable);
else {
duprintf("get_entries: I've got %u not %u!\n",
- private->size,
- entries->size);
- ret = -EINVAL;
+ private->size, get.size);
+ ret = -EAGAIN;
}
module_put(t->me);
xt_table_unlock(t);
@@ -905,86 +1174,67 @@ get_entries(const struct ipt_get_entries *entries,
}
static int
-do_replace(void __user *user, unsigned int len)
+__do_replace(struct net *net, const char *name, unsigned int valid_hooks,
+ struct xt_table_info *newinfo, unsigned int num_counters,
+ void __user *counters_ptr)
{
int ret;
- struct ipt_replace tmp;
- struct ipt_table *t;
- struct xt_table_info *newinfo, *oldinfo;
+ struct xt_table *t;
+ struct xt_table_info *oldinfo;
struct xt_counters *counters;
- void *loc_cpu_entry, *loc_cpu_old_entry;
-
- if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
- return -EFAULT;
-
- /* Hack: Causes ipchains to give correct error msg --RR */
- if (len != sizeof(tmp) + tmp.size)
- return -ENOPROTOOPT;
-
- newinfo = xt_alloc_table_info(tmp.size);
- if (!newinfo)
- return -ENOMEM;
-
- /* choose the copy that is our node/cpu */
- loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
- if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
- tmp.size) != 0) {
- ret = -EFAULT;
- goto free_newinfo;
- }
+ void *loc_cpu_old_entry;
+ struct ipt_entry *iter;
- counters = vmalloc(tmp.num_counters * sizeof(struct xt_counters));
+ ret = 0;
+ counters = vzalloc(num_counters * sizeof(struct xt_counters));
if (!counters) {
ret = -ENOMEM;
- goto free_newinfo;
+ goto out;
}
- ret = translate_table(tmp.name, tmp.valid_hooks,
- newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
- tmp.hook_entry, tmp.underflow);
- if (ret != 0)
- goto free_newinfo_counters;
-
- duprintf("ip_tables: Translated table\n");
-
- t = try_then_request_module(xt_find_table_lock(AF_INET, tmp.name),
- "iptable_%s", tmp.name);
- if (!t || IS_ERR(t)) {
+ t = try_then_request_module(xt_find_table_lock(net, AF_INET, name),
+ "iptable_%s", name);
+ if (IS_ERR_OR_NULL(t)) {
ret = t ? PTR_ERR(t) : -ENOENT;
goto free_newinfo_counters_untrans;
}
/* You lied! */
- if (tmp.valid_hooks != t->valid_hooks) {
+ if (valid_hooks != t->valid_hooks) {
duprintf("Valid hook crap: %08X vs %08X\n",
- tmp.valid_hooks, t->valid_hooks);
+ valid_hooks, t->valid_hooks);
ret = -EINVAL;
goto put_module;
}
- oldinfo = xt_replace_table(t, tmp.num_counters, newinfo, &ret);
+ oldinfo = xt_replace_table(t, num_counters, newinfo, &ret);
if (!oldinfo)
goto put_module;
/* Update module usage count based on number of rules */
duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
oldinfo->number, oldinfo->initial_entries, newinfo->number);
- if ((oldinfo->number > oldinfo->initial_entries) ||
- (newinfo->number <= oldinfo->initial_entries))
+ if ((oldinfo->number > oldinfo->initial_entries) ||
+ (newinfo->number <= oldinfo->initial_entries))
module_put(t->me);
if ((oldinfo->number > oldinfo->initial_entries) &&
(newinfo->number <= oldinfo->initial_entries))
module_put(t->me);
- /* Get the old counters. */
+ /* Get the old counters, and synchronize with replace */
get_counters(oldinfo, counters);
+
/* Decrease module usage counts and free resource */
loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
- IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,NULL);
+ xt_entry_foreach(iter, loc_cpu_old_entry, oldinfo->size)
+ cleanup_entry(iter, net);
+
xt_free_table_info(oldinfo);
- if (copy_to_user(tmp.counters, counters,
- sizeof(struct xt_counters) * tmp.num_counters) != 0)
- ret = -EFAULT;
+ if (copy_to_user(counters_ptr, counters,
+ sizeof(struct xt_counters) * num_counters) != 0) {
+ /* Silent error, can't fail, new table is already in place */
+ net_warn_ratelimited("iptables: counters copy to user failed while replacing table\n");
+ }
vfree(counters);
xt_table_unlock(t);
return ret;
@@ -993,84 +1243,141 @@ do_replace(void __user *user, unsigned int len)
module_put(t->me);
xt_table_unlock(t);
free_newinfo_counters_untrans:
- IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry,NULL);
- free_newinfo_counters:
vfree(counters);
- free_newinfo:
- xt_free_table_info(newinfo);
+ out:
return ret;
}
-/* We're lazy, and add to the first CPU; overflow works its fey magic
- * and everything is OK. */
-static inline int
-add_counter_to_entry(struct ipt_entry *e,
- const struct xt_counters addme[],
- unsigned int *i)
+static int
+do_replace(struct net *net, const void __user *user, unsigned int len)
{
-#if 0
- duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n",
- *i,
- (long unsigned int)e->counters.pcnt,
- (long unsigned int)e->counters.bcnt,
- (long unsigned int)addme[*i].pcnt,
- (long unsigned int)addme[*i].bcnt);
-#endif
+ int ret;
+ struct ipt_replace tmp;
+ struct xt_table_info *newinfo;
+ void *loc_cpu_entry;
+ struct ipt_entry *iter;
+
+ if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+ return -EFAULT;
+
+ /* overflow check */
+ if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
+ return -ENOMEM;
+ tmp.name[sizeof(tmp.name)-1] = 0;
+
+ newinfo = xt_alloc_table_info(tmp.size);
+ if (!newinfo)
+ return -ENOMEM;
+
+ /* choose the copy that is on our node/cpu */
+ loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+ if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
+ tmp.size) != 0) {
+ ret = -EFAULT;
+ goto free_newinfo;
+ }
- ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
+ ret = translate_table(net, newinfo, loc_cpu_entry, &tmp);
+ if (ret != 0)
+ goto free_newinfo;
+
+ duprintf("Translated table\n");
- (*i)++;
+ ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
+ tmp.num_counters, tmp.counters);
+ if (ret)
+ goto free_newinfo_untrans;
return 0;
+
+ free_newinfo_untrans:
+ xt_entry_foreach(iter, loc_cpu_entry, newinfo->size)
+ cleanup_entry(iter, net);
+ free_newinfo:
+ xt_free_table_info(newinfo);
+ return ret;
}
static int
-do_add_counters(void __user *user, unsigned int len)
+do_add_counters(struct net *net, const void __user *user,
+ unsigned int len, int compat)
{
- unsigned int i;
- struct xt_counters_info tmp, *paddc;
- struct ipt_table *t;
- struct xt_table_info *private;
+ unsigned int i, curcpu;
+ struct xt_counters_info tmp;
+ struct xt_counters *paddc;
+ unsigned int num_counters;
+ const char *name;
+ int size;
+ void *ptmp;
+ struct xt_table *t;
+ const struct xt_table_info *private;
int ret = 0;
void *loc_cpu_entry;
+ struct ipt_entry *iter;
+ unsigned int addend;
+#ifdef CONFIG_COMPAT
+ struct compat_xt_counters_info compat_tmp;
+
+ if (compat) {
+ ptmp = &compat_tmp;
+ size = sizeof(struct compat_xt_counters_info);
+ } else
+#endif
+ {
+ ptmp = &tmp;
+ size = sizeof(struct xt_counters_info);
+ }
- if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+ if (copy_from_user(ptmp, user, size) != 0)
return -EFAULT;
- if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct xt_counters))
+#ifdef CONFIG_COMPAT
+ if (compat) {
+ num_counters = compat_tmp.num_counters;
+ name = compat_tmp.name;
+ } else
+#endif
+ {
+ num_counters = tmp.num_counters;
+ name = tmp.name;
+ }
+
+ if (len != size + num_counters * sizeof(struct xt_counters))
return -EINVAL;
- paddc = vmalloc_node(len, numa_node_id());
+ paddc = vmalloc(len - size);
if (!paddc)
return -ENOMEM;
- if (copy_from_user(paddc, user, len) != 0) {
+ if (copy_from_user(paddc, user + size, len - size) != 0) {
ret = -EFAULT;
goto free;
}
- t = xt_find_table_lock(AF_INET, tmp.name);
- if (!t || IS_ERR(t)) {
+ t = xt_find_table_lock(net, AF_INET, name);
+ if (IS_ERR_OR_NULL(t)) {
ret = t ? PTR_ERR(t) : -ENOENT;
goto free;
}
- write_lock_bh(&t->lock);
+ local_bh_disable();
private = t->private;
- if (private->number != paddc->num_counters) {
+ if (private->number != num_counters) {
ret = -EINVAL;
goto unlock_up_free;
}
i = 0;
/* Choose the copy that is on our node */
- loc_cpu_entry = private->entries[raw_smp_processor_id()];
- IPT_ENTRY_ITERATE(loc_cpu_entry,
- private->size,
- add_counter_to_entry,
- paddc->counters,
- &i);
+ curcpu = smp_processor_id();
+ loc_cpu_entry = private->entries[curcpu];
+ addend = xt_write_recseq_begin();
+ xt_entry_foreach(iter, loc_cpu_entry, private->size) {
+ ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt);
+ ++i;
+ }
+ xt_write_recseq_end(addend);
unlock_up_free:
- write_unlock_bh(&t->lock);
+ local_bh_enable();
xt_table_unlock(t);
module_put(t->me);
free:
@@ -1079,21 +1386,483 @@ do_add_counters(void __user *user, unsigned int len)
return ret;
}
+#ifdef CONFIG_COMPAT
+struct compat_ipt_replace {
+ char name[XT_TABLE_MAXNAMELEN];
+ u32 valid_hooks;
+ u32 num_entries;
+ u32 size;
+ u32 hook_entry[NF_INET_NUMHOOKS];
+ u32 underflow[NF_INET_NUMHOOKS];
+ u32 num_counters;
+ compat_uptr_t counters; /* struct xt_counters * */
+ struct compat_ipt_entry entries[0];
+};
+
+static int
+compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr,
+ unsigned int *size, struct xt_counters *counters,
+ unsigned int i)
+{
+ struct xt_entry_target *t;
+ struct compat_ipt_entry __user *ce;
+ u_int16_t target_offset, next_offset;
+ compat_uint_t origsize;
+ const struct xt_entry_match *ematch;
+ int ret = 0;
+
+ origsize = *size;
+ ce = (struct compat_ipt_entry __user *)*dstptr;
+ if (copy_to_user(ce, e, sizeof(struct ipt_entry)) != 0 ||
+ copy_to_user(&ce->counters, &counters[i],
+ sizeof(counters[i])) != 0)
+ return -EFAULT;
+
+ *dstptr += sizeof(struct compat_ipt_entry);
+ *size -= sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
+
+ xt_ematch_foreach(ematch, e) {
+ ret = xt_compat_match_to_user(ematch, dstptr, size);
+ if (ret != 0)
+ return ret;
+ }
+ target_offset = e->target_offset - (origsize - *size);
+ t = ipt_get_target(e);
+ ret = xt_compat_target_to_user(t, dstptr, size);
+ if (ret)
+ return ret;
+ next_offset = e->next_offset - (origsize - *size);
+ if (put_user(target_offset, &ce->target_offset) != 0 ||
+ put_user(next_offset, &ce->next_offset) != 0)
+ return -EFAULT;
+ return 0;
+}
+
+static int
+compat_find_calc_match(struct xt_entry_match *m,
+ const char *name,
+ const struct ipt_ip *ip,
+ unsigned int hookmask,
+ int *size)
+{
+ struct xt_match *match;
+
+ match = xt_request_find_match(NFPROTO_IPV4, m->u.user.name,
+ m->u.user.revision);
+ if (IS_ERR(match)) {
+ duprintf("compat_check_calc_match: `%s' not found\n",
+ m->u.user.name);
+ return PTR_ERR(match);
+ }
+ m->u.kernel.match = match;
+ *size += xt_compat_match_offset(match);
+ return 0;
+}
+
+static void compat_release_entry(struct compat_ipt_entry *e)
+{
+ struct xt_entry_target *t;
+ struct xt_entry_match *ematch;
+
+ /* Cleanup all matches */
+ xt_ematch_foreach(ematch, e)
+ module_put(ematch->u.kernel.match->me);
+ t = compat_ipt_get_target(e);
+ module_put(t->u.kernel.target->me);
+}
+
+static int
+check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
+ struct xt_table_info *newinfo,
+ unsigned int *size,
+ const unsigned char *base,
+ const unsigned char *limit,
+ const unsigned int *hook_entries,
+ const unsigned int *underflows,
+ const char *name)
+{
+ struct xt_entry_match *ematch;
+ struct xt_entry_target *t;
+ struct xt_target *target;
+ unsigned int entry_offset;
+ unsigned int j;
+ int ret, off, h;
+
+ duprintf("check_compat_entry_size_and_hooks %p\n", e);
+ if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0 ||
+ (unsigned char *)e + sizeof(struct compat_ipt_entry) >= limit) {
+ duprintf("Bad offset %p, limit = %p\n", e, limit);
+ return -EINVAL;
+ }
+
+ if (e->next_offset < sizeof(struct compat_ipt_entry) +
+ sizeof(struct compat_xt_entry_target)) {
+ duprintf("checking: element %p size %u\n",
+ e, e->next_offset);
+ return -EINVAL;
+ }
+
+ /* For purposes of check_entry casting the compat entry is fine */
+ ret = check_entry((struct ipt_entry *)e, name);
+ if (ret)
+ return ret;
+
+ off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
+ entry_offset = (void *)e - (void *)base;
+ j = 0;
+ xt_ematch_foreach(ematch, e) {
+ ret = compat_find_calc_match(ematch, name,
+ &e->ip, e->comefrom, &off);
+ if (ret != 0)
+ goto release_matches;
+ ++j;
+ }
+
+ t = compat_ipt_get_target(e);
+ target = xt_request_find_target(NFPROTO_IPV4, t->u.user.name,
+ t->u.user.revision);
+ if (IS_ERR(target)) {
+ duprintf("check_compat_entry_size_and_hooks: `%s' not found\n",
+ t->u.user.name);
+ ret = PTR_ERR(target);
+ goto release_matches;
+ }
+ t->u.kernel.target = target;
+
+ off += xt_compat_target_offset(target);
+ *size += off;
+ ret = xt_compat_add_offset(AF_INET, entry_offset, off);
+ if (ret)
+ goto out;
+
+ /* Check hooks & underflows */
+ for (h = 0; h < NF_INET_NUMHOOKS; h++) {
+ if ((unsigned char *)e - base == hook_entries[h])
+ newinfo->hook_entry[h] = hook_entries[h];
+ if ((unsigned char *)e - base == underflows[h])
+ newinfo->underflow[h] = underflows[h];
+ }
+
+ /* Clear counters and comefrom */
+ memset(&e->counters, 0, sizeof(e->counters));
+ e->comefrom = 0;
+ return 0;
+
+out:
+ module_put(t->u.kernel.target->me);
+release_matches:
+ xt_ematch_foreach(ematch, e) {
+ if (j-- == 0)
+ break;
+ module_put(ematch->u.kernel.match->me);
+ }
+ return ret;
+}
+
+static int
+compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr,
+ unsigned int *size, const char *name,
+ struct xt_table_info *newinfo, unsigned char *base)
+{
+ struct xt_entry_target *t;
+ struct xt_target *target;
+ struct ipt_entry *de;
+ unsigned int origsize;
+ int ret, h;
+ struct xt_entry_match *ematch;
+
+ ret = 0;
+ origsize = *size;
+ de = (struct ipt_entry *)*dstptr;
+ memcpy(de, e, sizeof(struct ipt_entry));
+ memcpy(&de->counters, &e->counters, sizeof(e->counters));
+
+ *dstptr += sizeof(struct ipt_entry);
+ *size += sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
+
+ xt_ematch_foreach(ematch, e) {
+ ret = xt_compat_match_from_user(ematch, dstptr, size);
+ if (ret != 0)
+ return ret;
+ }
+ de->target_offset = e->target_offset - (origsize - *size);
+ t = compat_ipt_get_target(e);
+ target = t->u.kernel.target;
+ xt_compat_target_from_user(t, dstptr, size);
+
+ de->next_offset = e->next_offset - (origsize - *size);
+ for (h = 0; h < NF_INET_NUMHOOKS; h++) {
+ if ((unsigned char *)de - base < newinfo->hook_entry[h])
+ newinfo->hook_entry[h] -= origsize - *size;
+ if ((unsigned char *)de - base < newinfo->underflow[h])
+ newinfo->underflow[h] -= origsize - *size;
+ }
+ return ret;
+}
+
+static int
+compat_check_entry(struct ipt_entry *e, struct net *net, const char *name)
+{
+ struct xt_entry_match *ematch;
+ struct xt_mtchk_param mtpar;
+ unsigned int j;
+ int ret = 0;
+
+ j = 0;
+ mtpar.net = net;
+ mtpar.table = name;
+ mtpar.entryinfo = &e->ip;
+ mtpar.hook_mask = e->comefrom;
+ mtpar.family = NFPROTO_IPV4;
+ xt_ematch_foreach(ematch, e) {
+ ret = check_match(ematch, &mtpar);
+ if (ret != 0)
+ goto cleanup_matches;
+ ++j;
+ }
+
+ ret = check_target(e, net, name);
+ if (ret)
+ goto cleanup_matches;
+ return 0;
+
+ cleanup_matches:
+ xt_ematch_foreach(ematch, e) {
+ if (j-- == 0)
+ break;
+ cleanup_match(ematch, net);
+ }
+ return ret;
+}
+
+static int
+translate_compat_table(struct net *net,
+ const char *name,
+ unsigned int valid_hooks,
+ struct xt_table_info **pinfo,
+ void **pentry0,
+ unsigned int total_size,
+ unsigned int number,
+ unsigned int *hook_entries,
+ unsigned int *underflows)
+{
+ unsigned int i, j;
+ struct xt_table_info *newinfo, *info;
+ void *pos, *entry0, *entry1;
+ struct compat_ipt_entry *iter0;
+ struct ipt_entry *iter1;
+ unsigned int size;
+ int ret;
+
+ info = *pinfo;
+ entry0 = *pentry0;
+ size = total_size;
+ info->number = number;
+
+ /* Init all hooks to impossible value. */
+ for (i = 0; i < NF_INET_NUMHOOKS; i++) {
+ info->hook_entry[i] = 0xFFFFFFFF;
+ info->underflow[i] = 0xFFFFFFFF;
+ }
+
+ duprintf("translate_compat_table: size %u\n", info->size);
+ j = 0;
+ xt_compat_lock(AF_INET);
+ xt_compat_init_offsets(AF_INET, number);
+ /* Walk through entries, checking offsets. */
+ xt_entry_foreach(iter0, entry0, total_size) {
+ ret = check_compat_entry_size_and_hooks(iter0, info, &size,
+ entry0,
+ entry0 + total_size,
+ hook_entries,
+ underflows,
+ name);
+ if (ret != 0)
+ goto out_unlock;
+ ++j;
+ }
+
+ ret = -EINVAL;
+ if (j != number) {
+ duprintf("translate_compat_table: %u not %u entries\n",
+ j, number);
+ goto out_unlock;
+ }
+
+ /* Check hooks all assigned */
+ for (i = 0; i < NF_INET_NUMHOOKS; i++) {
+ /* Only hooks which are valid */
+ if (!(valid_hooks & (1 << i)))
+ continue;
+ if (info->hook_entry[i] == 0xFFFFFFFF) {
+ duprintf("Invalid hook entry %u %u\n",
+ i, hook_entries[i]);
+ goto out_unlock;
+ }
+ if (info->underflow[i] == 0xFFFFFFFF) {
+ duprintf("Invalid underflow %u %u\n",
+ i, underflows[i]);
+ goto out_unlock;
+ }
+ }
+
+ ret = -ENOMEM;
+ newinfo = xt_alloc_table_info(size);
+ if (!newinfo)
+ goto out_unlock;
+
+ newinfo->number = number;
+ for (i = 0; i < NF_INET_NUMHOOKS; i++) {
+ newinfo->hook_entry[i] = info->hook_entry[i];
+ newinfo->underflow[i] = info->underflow[i];
+ }
+ entry1 = newinfo->entries[raw_smp_processor_id()];
+ pos = entry1;
+ size = total_size;
+ xt_entry_foreach(iter0, entry0, total_size) {
+ ret = compat_copy_entry_from_user(iter0, &pos, &size,
+ name, newinfo, entry1);
+ if (ret != 0)
+ break;
+ }
+ xt_compat_flush_offsets(AF_INET);
+ xt_compat_unlock(AF_INET);
+ if (ret)
+ goto free_newinfo;
+
+ ret = -ELOOP;
+ if (!mark_source_chains(newinfo, valid_hooks, entry1))
+ goto free_newinfo;
+
+ i = 0;
+ xt_entry_foreach(iter1, entry1, newinfo->size) {
+ ret = compat_check_entry(iter1, net, name);
+ if (ret != 0)
+ break;
+ ++i;
+ if (strcmp(ipt_get_target(iter1)->u.user.name,
+ XT_ERROR_TARGET) == 0)
+ ++newinfo->stacksize;
+ }
+ if (ret) {
+ /*
+ * The first i matches need cleanup_entry (calls ->destroy)
+ * because they had called ->check already. The other j-i
+ * entries need only release.
+ */
+ int skip = i;
+ j -= i;
+ xt_entry_foreach(iter0, entry0, newinfo->size) {
+ if (skip-- > 0)
+ continue;
+ if (j-- == 0)
+ break;
+ compat_release_entry(iter0);
+ }
+ xt_entry_foreach(iter1, entry1, newinfo->size) {
+ if (i-- == 0)
+ break;
+ cleanup_entry(iter1, net);
+ }
+ xt_free_table_info(newinfo);
+ return ret;
+ }
+
+ /* And one copy for every other CPU */
+ for_each_possible_cpu(i)
+ if (newinfo->entries[i] && newinfo->entries[i] != entry1)
+ memcpy(newinfo->entries[i], entry1, newinfo->size);
+
+ *pinfo = newinfo;
+ *pentry0 = entry1;
+ xt_free_table_info(info);
+ return 0;
+
+free_newinfo:
+ xt_free_table_info(newinfo);
+out:
+ xt_entry_foreach(iter0, entry0, total_size) {
+ if (j-- == 0)
+ break;
+ compat_release_entry(iter0);
+ }
+ return ret;
+out_unlock:
+ xt_compat_flush_offsets(AF_INET);
+ xt_compat_unlock(AF_INET);
+ goto out;
+}
+
+static int
+compat_do_replace(struct net *net, void __user *user, unsigned int len)
+{
+ int ret;
+ struct compat_ipt_replace tmp;
+ struct xt_table_info *newinfo;
+ void *loc_cpu_entry;
+ struct ipt_entry *iter;
+
+ if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
+ return -EFAULT;
+
+ /* overflow check */
+ if (tmp.size >= INT_MAX / num_possible_cpus())
+ return -ENOMEM;
+ if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
+ return -ENOMEM;
+ tmp.name[sizeof(tmp.name)-1] = 0;
+
+ newinfo = xt_alloc_table_info(tmp.size);
+ if (!newinfo)
+ return -ENOMEM;
+
+ /* choose the copy that is on our node/cpu */
+ loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
+ if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
+ tmp.size) != 0) {
+ ret = -EFAULT;
+ goto free_newinfo;
+ }
+
+ ret = translate_compat_table(net, tmp.name, tmp.valid_hooks,
+ &newinfo, &loc_cpu_entry, tmp.size,
+ tmp.num_entries, tmp.hook_entry,
+ tmp.underflow);
+ if (ret != 0)
+ goto free_newinfo;
+
+ duprintf("compat_do_replace: Translated table\n");
+
+ ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
+ tmp.num_counters, compat_ptr(tmp.counters));
+ if (ret)
+ goto free_newinfo_untrans;
+ return 0;
+
+ free_newinfo_untrans:
+ xt_entry_foreach(iter, loc_cpu_entry, newinfo->size)
+ cleanup_entry(iter, net);
+ free_newinfo:
+ xt_free_table_info(newinfo);
+ return ret;
+}
+
static int
-do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
+compat_do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user,
+ unsigned int len)
{
int ret;
- if (!capable(CAP_NET_ADMIN))
+ if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
return -EPERM;
switch (cmd) {
case IPT_SO_SET_REPLACE:
- ret = do_replace(user, len);
+ ret = compat_do_replace(sock_net(sk), user, len);
break;
case IPT_SO_SET_ADD_COUNTERS:
- ret = do_add_counters(user, len);
+ ret = do_add_counters(sock_net(sk), user, len, 1);
break;
default:
@@ -1104,78 +1873,163 @@ do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
return ret;
}
+struct compat_ipt_get_entries {
+ char name[XT_TABLE_MAXNAMELEN];
+ compat_uint_t size;
+ struct compat_ipt_entry entrytable[0];
+};
+
static int
-do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
+compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
+ void __user *userptr)
+{
+ struct xt_counters *counters;
+ const struct xt_table_info *private = table->private;
+ void __user *pos;
+ unsigned int size;
+ int ret = 0;
+ const void *loc_cpu_entry;
+ unsigned int i = 0;
+ struct ipt_entry *iter;
+
+ counters = alloc_counters(table);
+ if (IS_ERR(counters))
+ return PTR_ERR(counters);
+
+ /* choose the copy that is on our node/cpu, ...
+ * This choice is lazy (because current thread is
+ * allowed to migrate to another cpu)
+ */
+ loc_cpu_entry = private->entries[raw_smp_processor_id()];
+ pos = userptr;
+ size = total_size;
+ xt_entry_foreach(iter, loc_cpu_entry, total_size) {
+ ret = compat_copy_entry_to_user(iter, &pos,
+ &size, counters, i++);
+ if (ret != 0)
+ break;
+ }
+
+ vfree(counters);
+ return ret;
+}
+
+static int
+compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr,
+ int *len)
{
int ret;
+ struct compat_ipt_get_entries get;
+ struct xt_table *t;
- if (!capable(CAP_NET_ADMIN))
- return -EPERM;
+ if (*len < sizeof(get)) {
+ duprintf("compat_get_entries: %u < %zu\n", *len, sizeof(get));
+ return -EINVAL;
+ }
- switch (cmd) {
- case IPT_SO_GET_INFO: {
- char name[IPT_TABLE_MAXNAMELEN];
- struct ipt_table *t;
+ if (copy_from_user(&get, uptr, sizeof(get)) != 0)
+ return -EFAULT;
- if (*len != sizeof(struct ipt_getinfo)) {
- duprintf("length %u != %u\n", *len,
- sizeof(struct ipt_getinfo));
- ret = -EINVAL;
- break;
- }
+ if (*len != sizeof(struct compat_ipt_get_entries) + get.size) {
+ duprintf("compat_get_entries: %u != %zu\n",
+ *len, sizeof(get) + get.size);
+ return -EINVAL;
+ }
- if (copy_from_user(name, user, sizeof(name)) != 0) {
- ret = -EFAULT;
- break;
+ xt_compat_lock(AF_INET);
+ t = xt_find_table_lock(net, AF_INET, get.name);
+ if (!IS_ERR_OR_NULL(t)) {
+ const struct xt_table_info *private = t->private;
+ struct xt_table_info info;
+ duprintf("t->private->number = %u\n", private->number);
+ ret = compat_table_info(private, &info);
+ if (!ret && get.size == info.size) {
+ ret = compat_copy_entries_to_user(private->size,
+ t, uptr->entrytable);
+ } else if (!ret) {
+ duprintf("compat_get_entries: I've got %u not %u!\n",
+ private->size, get.size);
+ ret = -EAGAIN;
}
- name[IPT_TABLE_MAXNAMELEN-1] = '\0';
-
- t = try_then_request_module(xt_find_table_lock(AF_INET, name),
- "iptable_%s", name);
- if (t && !IS_ERR(t)) {
- struct ipt_getinfo info;
- struct xt_table_info *private = t->private;
-
- info.valid_hooks = t->valid_hooks;
- memcpy(info.hook_entry, private->hook_entry,
- sizeof(info.hook_entry));
- memcpy(info.underflow, private->underflow,
- sizeof(info.underflow));
- info.num_entries = private->number;
- info.size = private->size;
- memcpy(info.name, name, sizeof(info.name));
-
- if (copy_to_user(user, &info, *len) != 0)
- ret = -EFAULT;
- else
- ret = 0;
- xt_table_unlock(t);
- module_put(t->me);
- } else
- ret = t ? PTR_ERR(t) : -ENOENT;
+ xt_compat_flush_offsets(AF_INET);
+ module_put(t->me);
+ xt_table_unlock(t);
+ } else
+ ret = t ? PTR_ERR(t) : -ENOENT;
+
+ xt_compat_unlock(AF_INET);
+ return ret;
+}
+
+static int do_ipt_get_ctl(struct sock *, int, void __user *, int *);
+
+static int
+compat_do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
+{
+ int ret;
+
+ if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
+ return -EPERM;
+
+ switch (cmd) {
+ case IPT_SO_GET_INFO:
+ ret = get_info(sock_net(sk), user, len, 1);
+ break;
+ case IPT_SO_GET_ENTRIES:
+ ret = compat_get_entries(sock_net(sk), user, len);
+ break;
+ default:
+ ret = do_ipt_get_ctl(sk, cmd, user, len);
}
- break;
+ return ret;
+}
+#endif
- case IPT_SO_GET_ENTRIES: {
- struct ipt_get_entries get;
+static int
+do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
+{
+ int ret;
- if (*len < sizeof(get)) {
- duprintf("get_entries: %u < %u\n", *len, sizeof(get));
- ret = -EINVAL;
- } else if (copy_from_user(&get, user, sizeof(get)) != 0) {
- ret = -EFAULT;
- } else if (*len != sizeof(struct ipt_get_entries) + get.size) {
- duprintf("get_entries: %u != %u\n", *len,
- sizeof(struct ipt_get_entries) + get.size);
- ret = -EINVAL;
- } else
- ret = get_entries(&get, user);
+ if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
+ return -EPERM;
+
+ switch (cmd) {
+ case IPT_SO_SET_REPLACE:
+ ret = do_replace(sock_net(sk), user, len);
break;
+
+ case IPT_SO_SET_ADD_COUNTERS:
+ ret = do_add_counters(sock_net(sk), user, len, 0);
+ break;
+
+ default:
+ duprintf("do_ipt_set_ctl: unknown request %i\n", cmd);
+ ret = -EINVAL;
}
+ return ret;
+}
+
+static int
+do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
+{
+ int ret;
+
+ if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
+ return -EPERM;
+
+ switch (cmd) {
+ case IPT_SO_GET_INFO:
+ ret = get_info(sock_net(sk), user, len, 0);
+ break;
+
+ case IPT_SO_GET_ENTRIES:
+ ret = get_entries(sock_net(sk), user, len);
+ break;
+
case IPT_SO_GET_REVISION_MATCH:
case IPT_SO_GET_REVISION_TARGET: {
- struct ipt_get_revision rev;
+ struct xt_get_revision rev;
int target;
if (*len != sizeof(rev)) {
@@ -1186,6 +2040,7 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
ret = -EFAULT;
break;
}
+ rev.name[sizeof(rev.name)-1] = 0;
if (cmd == IPT_SO_GET_REVISION_TARGET)
target = 1;
@@ -1207,89 +2062,92 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
return ret;
}
-int ipt_register_table(struct xt_table *table, const struct ipt_replace *repl)
+struct xt_table *ipt_register_table(struct net *net,
+ const struct xt_table *table,
+ const struct ipt_replace *repl)
{
int ret;
struct xt_table_info *newinfo;
- static struct xt_table_info bootstrap
- = { 0, 0, 0, { 0 }, { 0 }, { } };
+ struct xt_table_info bootstrap = {0};
void *loc_cpu_entry;
+ struct xt_table *new_table;
newinfo = xt_alloc_table_info(repl->size);
- if (!newinfo)
- return -ENOMEM;
+ if (!newinfo) {
+ ret = -ENOMEM;
+ goto out;
+ }
- /* choose the copy on our node/cpu
- * but dont care of preemption
- */
+ /* choose the copy on our node/cpu, but dont care about preemption */
loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
memcpy(loc_cpu_entry, repl->entries, repl->size);
- ret = translate_table(table->name, table->valid_hooks,
- newinfo, loc_cpu_entry, repl->size,
- repl->num_entries,
- repl->hook_entry,
- repl->underflow);
- if (ret != 0) {
- xt_free_table_info(newinfo);
- return ret;
- }
+ ret = translate_table(net, newinfo, loc_cpu_entry, repl);
+ if (ret != 0)
+ goto out_free;
- if (xt_register_table(table, &bootstrap, newinfo) != 0) {
- xt_free_table_info(newinfo);
- return ret;
+ new_table = xt_register_table(net, table, &bootstrap, newinfo);
+ if (IS_ERR(new_table)) {
+ ret = PTR_ERR(new_table);
+ goto out_free;
}
- return 0;
+ return new_table;
+
+out_free:
+ xt_free_table_info(newinfo);
+out:
+ return ERR_PTR(ret);
}
-void ipt_unregister_table(struct ipt_table *table)
+void ipt_unregister_table(struct net *net, struct xt_table *table)
{
struct xt_table_info *private;
void *loc_cpu_entry;
+ struct module *table_owner = table->me;
+ struct ipt_entry *iter;
- private = xt_unregister_table(table);
+ private = xt_unregister_table(table);
/* Decrease module usage counts and free resources */
loc_cpu_entry = private->entries[raw_smp_processor_id()];
- IPT_ENTRY_ITERATE(loc_cpu_entry, private->size, cleanup_entry, NULL);
+ xt_entry_foreach(iter, loc_cpu_entry, private->size)
+ cleanup_entry(iter, net);
+ if (private->number > private->initial_entries)
+ module_put(table_owner);
xt_free_table_info(private);
}
/* Returns 1 if the type and code is matched by the range, 0 otherwise */
-static inline int
+static inline bool
icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
u_int8_t type, u_int8_t code,
- int invert)
+ bool invert)
{
- return ((test_type == 0xFF) || (type == test_type && code >= min_code && code <= max_code))
+ return ((test_type == 0xFF) ||
+ (type == test_type && code >= min_code && code <= max_code))
^ invert;
}
-static int
-icmp_match(const struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- const void *matchinfo,
- int offset,
- unsigned int protoff,
- int *hotdrop)
+static bool
+icmp_match(const struct sk_buff *skb, struct xt_action_param *par)
{
- struct icmphdr _icmph, *ic;
- const struct ipt_icmp *icmpinfo = matchinfo;
+ const struct icmphdr *ic;
+ struct icmphdr _icmph;
+ const struct ipt_icmp *icmpinfo = par->matchinfo;
/* Must not be a fragment. */
- if (offset)
- return 0;
+ if (par->fragoff != 0)
+ return false;
- ic = skb_header_pointer(skb, protoff, sizeof(_icmph), &_icmph);
+ ic = skb_header_pointer(skb, par->thoff, sizeof(_icmph), &_icmph);
if (ic == NULL) {
/* We've been asked to examine this packet, and we
* can't. Hence, no choice but to drop.
*/
duprintf("Dropping evil ICMP tinygram.\n");
- *hotdrop = 1;
- return 0;
+ par->hotdrop = true;
+ return false;
}
return icmp_type_code_match(icmpinfo->type,
@@ -1299,32 +2157,31 @@ icmp_match(const struct sk_buff *skb,
!!(icmpinfo->invflags&IPT_ICMP_INV));
}
-/* Called when user tries to insert an entry of this type. */
-static int
-icmp_checkentry(const char *tablename,
- const void *info,
- void *matchinfo,
- unsigned int matchsize,
- unsigned int hook_mask)
+static int icmp_checkentry(const struct xt_mtchk_param *par)
{
- const struct ipt_ip *ip = info;
- const struct ipt_icmp *icmpinfo = matchinfo;
-
- /* Must specify proto == ICMP, and no unknown invflags */
- return ip->proto == IPPROTO_ICMP
- && !(ip->invflags & IPT_INV_PROTO)
- && matchsize == IPT_ALIGN(sizeof(struct ipt_icmp))
- && !(icmpinfo->invflags & ~IPT_ICMP_INV);
-}
+ const struct ipt_icmp *icmpinfo = par->matchinfo;
-/* The built-in targets: standard (NULL) and error. */
-static struct ipt_target ipt_standard_target = {
- .name = IPT_STANDARD_TARGET,
-};
+ /* Must specify no unknown invflags */
+ return (icmpinfo->invflags & ~IPT_ICMP_INV) ? -EINVAL : 0;
+}
-static struct ipt_target ipt_error_target = {
- .name = IPT_ERROR_TARGET,
- .target = ipt_error,
+static struct xt_target ipt_builtin_tg[] __read_mostly = {
+ {
+ .name = XT_STANDARD_TARGET,
+ .targetsize = sizeof(int),
+ .family = NFPROTO_IPV4,
+#ifdef CONFIG_COMPAT
+ .compatsize = sizeof(compat_int_t),
+ .compat_from_user = compat_standard_from_user,
+ .compat_to_user = compat_standard_to_user,
+#endif
+ },
+ {
+ .name = XT_ERROR_TARGET,
+ .target = ipt_error,
+ .targetsize = XT_FUNCTION_MAXNAMELEN,
+ .family = NFPROTO_IPV4,
+ },
};
static struct nf_sockopt_ops ipt_sockopts = {
@@ -1332,52 +2189,89 @@ static struct nf_sockopt_ops ipt_sockopts = {
.set_optmin = IPT_BASE_CTL,
.set_optmax = IPT_SO_SET_MAX+1,
.set = do_ipt_set_ctl,
+#ifdef CONFIG_COMPAT
+ .compat_set = compat_do_ipt_set_ctl,
+#endif
.get_optmin = IPT_BASE_CTL,
.get_optmax = IPT_SO_GET_MAX+1,
.get = do_ipt_get_ctl,
+#ifdef CONFIG_COMPAT
+ .compat_get = compat_do_ipt_get_ctl,
+#endif
+ .owner = THIS_MODULE,
+};
+
+static struct xt_match ipt_builtin_mt[] __read_mostly = {
+ {
+ .name = "icmp",
+ .match = icmp_match,
+ .matchsize = sizeof(struct ipt_icmp),
+ .checkentry = icmp_checkentry,
+ .proto = IPPROTO_ICMP,
+ .family = NFPROTO_IPV4,
+ },
};
-static struct ipt_match icmp_matchstruct = {
- .name = "icmp",
- .match = &icmp_match,
- .checkentry = &icmp_checkentry,
+static int __net_init ip_tables_net_init(struct net *net)
+{
+ return xt_proto_init(net, NFPROTO_IPV4);
+}
+
+static void __net_exit ip_tables_net_exit(struct net *net)
+{
+ xt_proto_fini(net, NFPROTO_IPV4);
+}
+
+static struct pernet_operations ip_tables_net_ops = {
+ .init = ip_tables_net_init,
+ .exit = ip_tables_net_exit,
};
-static int __init init(void)
+static int __init ip_tables_init(void)
{
int ret;
- xt_proto_init(AF_INET);
+ ret = register_pernet_subsys(&ip_tables_net_ops);
+ if (ret < 0)
+ goto err1;
- /* Noone else will be downing sem now, so we won't sleep */
- xt_register_target(AF_INET, &ipt_standard_target);
- xt_register_target(AF_INET, &ipt_error_target);
- xt_register_match(AF_INET, &icmp_matchstruct);
+ /* No one else will be downing sem now, so we won't sleep */
+ ret = xt_register_targets(ipt_builtin_tg, ARRAY_SIZE(ipt_builtin_tg));
+ if (ret < 0)
+ goto err2;
+ ret = xt_register_matches(ipt_builtin_mt, ARRAY_SIZE(ipt_builtin_mt));
+ if (ret < 0)
+ goto err4;
/* Register setsockopt */
ret = nf_register_sockopt(&ipt_sockopts);
- if (ret < 0) {
- duprintf("Unable to register sockopts.\n");
- return ret;
- }
+ if (ret < 0)
+ goto err5;
- printk("ip_tables: (C) 2000-2006 Netfilter Core Team\n");
+ pr_info("(C) 2000-2006 Netfilter Core Team\n");
return 0;
+
+err5:
+ xt_unregister_matches(ipt_builtin_mt, ARRAY_SIZE(ipt_builtin_mt));
+err4:
+ xt_unregister_targets(ipt_builtin_tg, ARRAY_SIZE(ipt_builtin_tg));
+err2:
+ unregister_pernet_subsys(&ip_tables_net_ops);
+err1:
+ return ret;
}
-static void __exit fini(void)
+static void __exit ip_tables_fini(void)
{
nf_unregister_sockopt(&ipt_sockopts);
- xt_unregister_match(AF_INET, &icmp_matchstruct);
- xt_unregister_target(AF_INET, &ipt_error_target);
- xt_unregister_target(AF_INET, &ipt_standard_target);
-
- xt_proto_fini(AF_INET);
+ xt_unregister_matches(ipt_builtin_mt, ARRAY_SIZE(ipt_builtin_mt));
+ xt_unregister_targets(ipt_builtin_tg, ARRAY_SIZE(ipt_builtin_tg));
+ unregister_pernet_subsys(&ip_tables_net_ops);
}
EXPORT_SYMBOL(ipt_register_table);
EXPORT_SYMBOL(ipt_unregister_table);
EXPORT_SYMBOL(ipt_do_table);
-module_init(init);
-module_exit(fini);
+module_init(ip_tables_init);
+module_exit(ip_tables_fini);
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index d9bc971f03a..2510c02c2d2 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -1,4 +1,4 @@
-/* Cluster IP hashmark target
+/* Cluster IP hashmark target
* (C) 2003-2004 by Harald Welte <laforge@netfilter.org>
* based on ideas of Fabio Olive Leite <olive@unixforge.org>
*
@@ -9,43 +9,34 @@
* published by the Free Software Foundation.
*
*/
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
-#include <linux/config.h>
#include <linux/proc_fs.h>
#include <linux/jhash.h>
#include <linux/bitops.h>
#include <linux/skbuff.h>
+#include <linux/slab.h>
#include <linux/ip.h>
#include <linux/tcp.h>
#include <linux/udp.h>
#include <linux/icmp.h>
#include <linux/if_arp.h>
-#include <linux/proc_fs.h>
#include <linux/seq_file.h>
-
-#include <net/checksum.h>
-
#include <linux/netfilter_arp.h>
-
+#include <linux/netfilter/x_tables.h>
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter_ipv4/ipt_CLUSTERIP.h>
-#include <net/netfilter/nf_conntrack_compat.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+#include <net/checksum.h>
+#include <net/ip.h>
#define CLUSTERIP_VERSION "0.8"
-#define DEBUG_CLUSTERIP
-
-#ifdef DEBUG_CLUSTERIP
-#define DEBUGP printk
-#else
-#define DEBUGP
-#endif
-
-#define ASSERT_READ_LOCK(x)
-
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
-MODULE_DESCRIPTION("iptables target for CLUSTERIP");
+MODULE_DESCRIPTION("Xtables: CLUSTERIP target");
struct clusterip_config {
struct list_head list; /* list of all configs */
@@ -53,7 +44,7 @@ struct clusterip_config {
atomic_t entries; /* number of entries/rules
* referencing us */
- u_int32_t clusterip; /* the IP address */
+ __be32 clusterip; /* the IP address */
u_int8_t clustermac[ETH_ALEN]; /* the MAC address */
struct net_device *dev; /* device */
u_int16_t num_total_nodes; /* total number of nodes */
@@ -64,17 +55,24 @@ struct clusterip_config {
#endif
enum clusterip_hashmode hash_mode; /* which hashing mode */
u_int32_t hash_initval; /* hash initialization */
+ struct rcu_head rcu;
};
-static LIST_HEAD(clusterip_configs);
+#ifdef CONFIG_PROC_FS
+static const struct file_operations clusterip_proc_fops;
+#endif
+
+static int clusterip_net_id __read_mostly;
-/* clusterip_lock protects the clusterip_configs list */
-static DEFINE_RWLOCK(clusterip_lock);
+struct clusterip_net {
+ struct list_head configs;
+ /* lock protects the configs list */
+ spinlock_t lock;
#ifdef CONFIG_PROC_FS
-static struct file_operations clusterip_proc_fops;
-static struct proc_dir_entry *clusterip_procdir;
+ struct proc_dir_entry *procdir;
#endif
+};
static inline void
clusterip_config_get(struct clusterip_config *c)
@@ -82,18 +80,17 @@ clusterip_config_get(struct clusterip_config *c)
atomic_inc(&c->refcount);
}
-static inline void
-clusterip_config_put(struct clusterip_config *c)
+
+static void clusterip_config_rcu_free(struct rcu_head *head)
{
- if (atomic_dec_and_test(&c->refcount))
- kfree(c);
+ kfree(container_of(head, struct clusterip_config, rcu));
}
-/* increase the count of entries(rules) using/referencing this config */
static inline void
-clusterip_config_entry_get(struct clusterip_config *c)
+clusterip_config_put(struct clusterip_config *c)
{
- atomic_inc(&c->entries);
+ if (atomic_dec_and_test(&c->refcount))
+ call_rcu_bh(&c->rcu, clusterip_config_rcu_free);
}
/* decrease the count of entries using/referencing this config. If last
@@ -102,55 +99,57 @@ clusterip_config_entry_get(struct clusterip_config *c)
static inline void
clusterip_config_entry_put(struct clusterip_config *c)
{
- if (atomic_dec_and_test(&c->entries)) {
- write_lock_bh(&clusterip_lock);
- list_del(&c->list);
- write_unlock_bh(&clusterip_lock);
+ struct net *net = dev_net(c->dev);
+ struct clusterip_net *cn = net_generic(net, clusterip_net_id);
- dev_mc_delete(c->dev, c->clustermac, ETH_ALEN, 0);
+ local_bh_disable();
+ if (atomic_dec_and_lock(&c->entries, &cn->lock)) {
+ list_del_rcu(&c->list);
+ spin_unlock(&cn->lock);
+ local_bh_enable();
+
+ dev_mc_del(c->dev, c->clustermac);
dev_put(c->dev);
/* In case anyone still accesses the file, the open/close
* functions are also incrementing the refcount on their own,
* so it's safe to remove the entry even if it's in use. */
#ifdef CONFIG_PROC_FS
- remove_proc_entry(c->pde->name, c->pde->parent);
+ proc_remove(c->pde);
#endif
+ return;
}
+ local_bh_enable();
}
static struct clusterip_config *
-__clusterip_config_find(u_int32_t clusterip)
+__clusterip_config_find(struct net *net, __be32 clusterip)
{
- struct list_head *pos;
+ struct clusterip_config *c;
+ struct clusterip_net *cn = net_generic(net, clusterip_net_id);
- ASSERT_READ_LOCK(&clusterip_lock);
- list_for_each(pos, &clusterip_configs) {
- struct clusterip_config *c = list_entry(pos,
- struct clusterip_config, list);
- if (c->clusterip == clusterip) {
+ list_for_each_entry_rcu(c, &cn->configs, list) {
+ if (c->clusterip == clusterip)
return c;
- }
}
return NULL;
}
static inline struct clusterip_config *
-clusterip_config_find_get(u_int32_t clusterip, int entry)
+clusterip_config_find_get(struct net *net, __be32 clusterip, int entry)
{
struct clusterip_config *c;
- read_lock_bh(&clusterip_lock);
- c = __clusterip_config_find(clusterip);
- if (!c) {
- read_unlock_bh(&clusterip_lock);
- return NULL;
+ rcu_read_lock_bh();
+ c = __clusterip_config_find(net, clusterip);
+ if (c) {
+ if (unlikely(!atomic_inc_not_zero(&c->refcount)))
+ c = NULL;
+ else if (entry)
+ atomic_inc(&c->entries);
}
- atomic_inc(&c->refcount);
- if (entry)
- atomic_inc(&c->entries);
- read_unlock_bh(&clusterip_lock);
+ rcu_read_unlock_bh();
return c;
}
@@ -161,23 +160,21 @@ clusterip_config_init_nodelist(struct clusterip_config *c,
{
int n;
- for (n = 0; n < i->num_local_nodes; n++) {
+ for (n = 0; n < i->num_local_nodes; n++)
set_bit(i->local_nodes[n] - 1, &c->local_nodes);
- }
}
static struct clusterip_config *
-clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip,
+clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip,
struct net_device *dev)
{
struct clusterip_config *c;
- char buffer[16];
+ struct clusterip_net *cn = net_generic(dev_net(dev), clusterip_net_id);
- c = kmalloc(sizeof(*c), GFP_ATOMIC);
+ c = kzalloc(sizeof(*c), GFP_ATOMIC);
if (!c)
return NULL;
- memset(c, 0, sizeof(*c));
c->dev = dev;
c->clusterip = ip;
memcpy(&c->clustermac, &i->clustermac, ETH_ALEN);
@@ -189,24 +186,29 @@ clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip,
atomic_set(&c->entries, 1);
#ifdef CONFIG_PROC_FS
- /* create proc dir entry */
- sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(ip));
- c->pde = create_proc_entry(buffer, S_IWUSR|S_IRUSR, clusterip_procdir);
- if (!c->pde) {
- kfree(c);
- return NULL;
+ {
+ char buffer[16];
+
+ /* create proc dir entry */
+ sprintf(buffer, "%pI4", &ip);
+ c->pde = proc_create_data(buffer, S_IWUSR|S_IRUSR,
+ cn->procdir,
+ &clusterip_proc_fops, c);
+ if (!c->pde) {
+ kfree(c);
+ return NULL;
+ }
}
- c->pde->proc_fops = &clusterip_proc_fops;
- c->pde->data = c;
#endif
- write_lock_bh(&clusterip_lock);
- list_add(&c->list, &clusterip_configs);
- write_unlock_bh(&clusterip_lock);
+ spin_lock_bh(&cn->lock);
+ list_add_rcu(&c->list, &cn->configs);
+ spin_unlock_bh(&cn->lock);
return c;
}
+#ifdef CONFIG_PROC_FS
static int
clusterip_add_node(struct clusterip_config *c, u_int16_t nodenum)
{
@@ -222,51 +224,41 @@ clusterip_add_node(struct clusterip_config *c, u_int16_t nodenum)
return 0;
}
-static int
+static bool
clusterip_del_node(struct clusterip_config *c, u_int16_t nodenum)
{
if (nodenum == 0 ||
nodenum > c->num_total_nodes)
- return 1;
-
+ return true;
+
if (test_and_clear_bit(nodenum - 1, &c->local_nodes))
- return 0;
+ return false;
- return 1;
+ return true;
}
+#endif
static inline u_int32_t
-clusterip_hashfn(struct sk_buff *skb, struct clusterip_config *config)
+clusterip_hashfn(const struct sk_buff *skb,
+ const struct clusterip_config *config)
{
- struct iphdr *iph = skb->nh.iph;
+ const struct iphdr *iph = ip_hdr(skb);
unsigned long hashval;
- u_int16_t sport, dport;
- struct tcphdr *th;
- struct udphdr *uh;
- struct icmphdr *ih;
-
- switch (iph->protocol) {
- case IPPROTO_TCP:
- th = (void *)iph+iph->ihl*4;
- sport = ntohs(th->source);
- dport = ntohs(th->dest);
- break;
- case IPPROTO_UDP:
- uh = (void *)iph+iph->ihl*4;
- sport = ntohs(uh->source);
- dport = ntohs(uh->dest);
- break;
- case IPPROTO_ICMP:
- ih = (void *)iph+iph->ihl*4;
- sport = ntohs(ih->un.echo.id);
- dport = (ih->type<<8)|ih->code;
- break;
- default:
- if (net_ratelimit()) {
- printk(KERN_NOTICE "CLUSTERIP: unknown protocol `%u'\n",
- iph->protocol);
+ u_int16_t sport = 0, dport = 0;
+ int poff;
+
+ poff = proto_ports_offset(iph->protocol);
+ if (poff >= 0) {
+ const u_int16_t *ports;
+ u16 _ports[2];
+
+ ports = skb_header_pointer(skb, iph->ihl * 4 + poff, 4, _ports);
+ if (ports) {
+ sport = ports[0];
+ dport = ports[1];
}
- sport = dport = 0;
+ } else {
+ net_info_ratelimited("unknown protocol %u\n", iph->protocol);
}
switch (config->hash_mode) {
@@ -275,7 +267,7 @@ clusterip_hashfn(struct sk_buff *skb, struct clusterip_config *config)
config->hash_initval);
break;
case CLUSTERIP_HASHMODE_SIP_SPT:
- hashval = jhash_2words(ntohl(iph->saddr), sport,
+ hashval = jhash_2words(ntohl(iph->saddr), sport,
config->hash_initval);
break;
case CLUSTERIP_HASHMODE_SIP_SPT_DPT:
@@ -287,274 +279,260 @@ clusterip_hashfn(struct sk_buff *skb, struct clusterip_config *config)
hashval = 0;
/* This cannot happen, unless the check function wasn't called
* at rule load time */
- printk("CLUSTERIP: unknown mode `%u'\n", config->hash_mode);
+ pr_info("unknown mode %u\n", config->hash_mode);
BUG();
break;
}
/* node numbers are 1..n, not 0..n */
- return ((hashval % config->num_total_nodes)+1);
+ return (((u64)hashval * config->num_total_nodes) >> 32) + 1;
}
static inline int
-clusterip_responsible(struct clusterip_config *config, u_int32_t hash)
+clusterip_responsible(const struct clusterip_config *config, u_int32_t hash)
{
return test_bit(hash - 1, &config->local_nodes);
}
-/***********************************************************************
- * IPTABLES TARGET
+/***********************************************************************
+ * IPTABLES TARGET
***********************************************************************/
static unsigned int
-target(struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- unsigned int hooknum,
- const void *targinfo,
- void *userinfo)
+clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
- const struct ipt_clusterip_tgt_info *cipinfo = targinfo;
+ const struct ipt_clusterip_tgt_info *cipinfo = par->targinfo;
+ struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
- u_int32_t *mark, hash;
+ u_int32_t hash;
/* don't need to clusterip_config_get() here, since refcount
* is only decremented by destroy() - and ip_tables guarantees
* that the ->target() function isn't called after ->destroy() */
- mark = nf_ct_get_mark((*pskb), &ctinfo);
- if (mark == NULL) {
- printk(KERN_ERR "CLUSTERIP: no conntrack!\n");
- /* FIXME: need to drop invalid ones, since replies
- * to outgoing connections of other nodes will be
- * marked as INVALID */
+ ct = nf_ct_get(skb, &ctinfo);
+ if (ct == NULL)
return NF_DROP;
- }
/* special case: ICMP error handling. conntrack distinguishes between
* error messages (RELATED) and information requests (see below) */
- if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP
- && (ctinfo == IP_CT_RELATED
- || ctinfo == IP_CT_RELATED+IP_CT_IS_REPLY))
- return IPT_CONTINUE;
+ if (ip_hdr(skb)->protocol == IPPROTO_ICMP &&
+ (ctinfo == IP_CT_RELATED ||
+ ctinfo == IP_CT_RELATED_REPLY))
+ return XT_CONTINUE;
- /* ip_conntrack_icmp guarantees us that we only have ICMP_ECHO,
+ /* ip_conntrack_icmp guarantees us that we only have ICMP_ECHO,
* TIMESTAMP, INFO_REQUEST or ADDRESS type icmp packets from here
* on, which all have an ID field [relevant for hashing]. */
- hash = clusterip_hashfn(*pskb, cipinfo->config);
+ hash = clusterip_hashfn(skb, cipinfo->config);
switch (ctinfo) {
- case IP_CT_NEW:
- *mark = hash;
- break;
- case IP_CT_RELATED:
- case IP_CT_RELATED+IP_CT_IS_REPLY:
- /* FIXME: we don't handle expectations at the
- * moment. they can arrive on a different node than
- * the master connection (e.g. FTP passive mode) */
- case IP_CT_ESTABLISHED:
- case IP_CT_ESTABLISHED+IP_CT_IS_REPLY:
- break;
- default:
- break;
+ case IP_CT_NEW:
+ ct->mark = hash;
+ break;
+ case IP_CT_RELATED:
+ case IP_CT_RELATED_REPLY:
+ /* FIXME: we don't handle expectations at the moment.
+ * They can arrive on a different node than
+ * the master connection (e.g. FTP passive mode) */
+ case IP_CT_ESTABLISHED:
+ case IP_CT_ESTABLISHED_REPLY:
+ break;
+ default: /* Prevent gcc warnings */
+ break;
}
-#ifdef DEBUG_CLUSTERP
- DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+#ifdef DEBUG
+ nf_ct_dump_tuple_ip(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
#endif
- DEBUGP("hash=%u ct_hash=%u ", hash, *mark);
+ pr_debug("hash=%u ct_hash=%u ", hash, ct->mark);
if (!clusterip_responsible(cipinfo->config, hash)) {
- DEBUGP("not responsible\n");
+ pr_debug("not responsible\n");
return NF_DROP;
}
- DEBUGP("responsible\n");
+ pr_debug("responsible\n");
/* despite being received via linklayer multicast, this is
* actually a unicast IP packet. TCP doesn't like PACKET_MULTICAST */
- (*pskb)->pkt_type = PACKET_HOST;
+ skb->pkt_type = PACKET_HOST;
- return IPT_CONTINUE;
+ return XT_CONTINUE;
}
-static int
-checkentry(const char *tablename,
- const void *e_void,
- void *targinfo,
- unsigned int targinfosize,
- unsigned int hook_mask)
+static int clusterip_tg_check(const struct xt_tgchk_param *par)
{
- struct ipt_clusterip_tgt_info *cipinfo = targinfo;
- const struct ipt_entry *e = e_void;
-
+ struct ipt_clusterip_tgt_info *cipinfo = par->targinfo;
+ const struct ipt_entry *e = par->entryinfo;
struct clusterip_config *config;
-
- if (targinfosize != IPT_ALIGN(sizeof(struct ipt_clusterip_tgt_info))) {
- printk(KERN_WARNING "CLUSTERIP: targinfosize %u != %Zu\n",
- targinfosize,
- IPT_ALIGN(sizeof(struct ipt_clusterip_tgt_info)));
- return 0;
- }
+ int ret;
if (cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP &&
cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT &&
cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT_DPT) {
- printk(KERN_WARNING "CLUSTERIP: unknown mode `%u'\n",
- cipinfo->hash_mode);
- return 0;
+ pr_info("unknown mode %u\n", cipinfo->hash_mode);
+ return -EINVAL;
}
- if (e->ip.dmsk.s_addr != 0xffffffff
- || e->ip.dst.s_addr == 0) {
- printk(KERN_ERR "CLUSTERIP: Please specify destination IP\n");
- return 0;
+ if (e->ip.dmsk.s_addr != htonl(0xffffffff) ||
+ e->ip.dst.s_addr == 0) {
+ pr_info("Please specify destination IP\n");
+ return -EINVAL;
}
/* FIXME: further sanity checks */
- config = clusterip_config_find_get(e->ip.dst.s_addr, 1);
- if (config) {
- if (cipinfo->config != NULL) {
- /* Case A: This is an entry that gets reloaded, since
- * it still has a cipinfo->config pointer. Simply
- * increase the entry refcount and return */
- if (cipinfo->config != config) {
- printk(KERN_ERR "CLUSTERIP: Reloaded entry "
- "has invalid config pointer!\n");
- return 0;
- }
- clusterip_config_entry_get(cipinfo->config);
- } else {
- /* Case B: This is a new rule referring to an existing
- * clusterip config. */
- cipinfo->config = config;
- clusterip_config_entry_get(cipinfo->config);
- }
- } else {
- /* Case C: This is a completely new clusterip config */
+ config = clusterip_config_find_get(par->net, e->ip.dst.s_addr, 1);
+ if (!config) {
if (!(cipinfo->flags & CLUSTERIP_FLAG_NEW)) {
- printk(KERN_WARNING "CLUSTERIP: no config found for %u.%u.%u.%u, need 'new'\n", NIPQUAD(e->ip.dst.s_addr));
- return 0;
+ pr_info("no config found for %pI4, need 'new'\n",
+ &e->ip.dst.s_addr);
+ return -EINVAL;
} else {
struct net_device *dev;
if (e->ip.iniface[0] == '\0') {
- printk(KERN_WARNING "CLUSTERIP: Please specify an interface name\n");
- return 0;
+ pr_info("Please specify an interface name\n");
+ return -EINVAL;
}
- dev = dev_get_by_name(e->ip.iniface);
+ dev = dev_get_by_name(par->net, e->ip.iniface);
if (!dev) {
- printk(KERN_WARNING "CLUSTERIP: no such interface %s\n", e->ip.iniface);
- return 0;
+ pr_info("no such interface %s\n",
+ e->ip.iniface);
+ return -ENOENT;
}
- config = clusterip_config_init(cipinfo,
+ config = clusterip_config_init(cipinfo,
e->ip.dst.s_addr, dev);
if (!config) {
- printk(KERN_WARNING "CLUSTERIP: cannot allocate config\n");
dev_put(dev);
- return 0;
+ return -ENOMEM;
}
- dev_mc_add(config->dev,config->clustermac, ETH_ALEN, 0);
+ dev_mc_add(config->dev, config->clustermac);
}
- cipinfo->config = config;
}
+ cipinfo->config = config;
- return 1;
+ ret = nf_ct_l3proto_try_module_get(par->family);
+ if (ret < 0)
+ pr_info("cannot load conntrack support for proto=%u\n",
+ par->family);
+ return ret;
}
/* drop reference count of cluster config when rule is deleted */
-static void destroy(void *matchinfo, unsigned int matchinfosize)
+static void clusterip_tg_destroy(const struct xt_tgdtor_param *par)
{
- struct ipt_clusterip_tgt_info *cipinfo = matchinfo;
+ const struct ipt_clusterip_tgt_info *cipinfo = par->targinfo;
/* if no more entries are referencing the config, remove it
* from the list and destroy the proc entry */
clusterip_config_entry_put(cipinfo->config);
clusterip_config_put(cipinfo->config);
+
+ nf_ct_l3proto_module_put(par->family);
}
-static struct ipt_target clusterip_tgt = {
- .name = "CLUSTERIP",
- .target = &target,
- .checkentry = &checkentry,
- .destroy = &destroy,
- .me = THIS_MODULE
+#ifdef CONFIG_COMPAT
+struct compat_ipt_clusterip_tgt_info
+{
+ u_int32_t flags;
+ u_int8_t clustermac[6];
+ u_int16_t num_total_nodes;
+ u_int16_t num_local_nodes;
+ u_int16_t local_nodes[CLUSTERIP_MAX_NODES];
+ u_int32_t hash_mode;
+ u_int32_t hash_initval;
+ compat_uptr_t config;
+};
+#endif /* CONFIG_COMPAT */
+
+static struct xt_target clusterip_tg_reg __read_mostly = {
+ .name = "CLUSTERIP",
+ .family = NFPROTO_IPV4,
+ .target = clusterip_tg,
+ .checkentry = clusterip_tg_check,
+ .destroy = clusterip_tg_destroy,
+ .targetsize = sizeof(struct ipt_clusterip_tgt_info),
+#ifdef CONFIG_COMPAT
+ .compatsize = sizeof(struct compat_ipt_clusterip_tgt_info),
+#endif /* CONFIG_COMPAT */
+ .me = THIS_MODULE
};
-/***********************************************************************
- * ARP MANGLING CODE
+/***********************************************************************
+ * ARP MANGLING CODE
***********************************************************************/
/* hardcoded for 48bit ethernet and 32bit ipv4 addresses */
struct arp_payload {
u_int8_t src_hw[ETH_ALEN];
- u_int32_t src_ip;
+ __be32 src_ip;
u_int8_t dst_hw[ETH_ALEN];
- u_int32_t dst_ip;
-} __attribute__ ((packed));
+ __be32 dst_ip;
+} __packed;
-#ifdef CLUSTERIP_DEBUG
-static void arp_print(struct arp_payload *payload)
+#ifdef DEBUG
+static void arp_print(struct arp_payload *payload)
{
#define HBUFFERLEN 30
char hbuffer[HBUFFERLEN];
int j,k;
- const char hexbuf[]= "0123456789abcdef";
for (k=0, j=0; k < HBUFFERLEN-3 && j < ETH_ALEN; j++) {
- hbuffer[k++]=hexbuf[(payload->src_hw[j]>>4)&15];
- hbuffer[k++]=hexbuf[payload->src_hw[j]&15];
+ hbuffer[k++] = hex_asc_hi(payload->src_hw[j]);
+ hbuffer[k++] = hex_asc_lo(payload->src_hw[j]);
hbuffer[k++]=':';
}
hbuffer[--k]='\0';
- printk("src %u.%u.%u.%u@%s, dst %u.%u.%u.%u\n",
- NIPQUAD(payload->src_ip), hbuffer,
- NIPQUAD(payload->dst_ip));
+ pr_debug("src %pI4@%s, dst %pI4\n",
+ &payload->src_ip, hbuffer, &payload->dst_ip);
}
#endif
static unsigned int
-arp_mangle(unsigned int hook,
- struct sk_buff **pskb,
+arp_mangle(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- struct arphdr *arp = (*pskb)->nh.arph;
+ struct arphdr *arp = arp_hdr(skb);
struct arp_payload *payload;
struct clusterip_config *c;
+ struct net *net = dev_net(in ? in : out);
/* we don't care about non-ethernet and non-ipv4 ARP */
- if (arp->ar_hrd != htons(ARPHRD_ETHER)
- || arp->ar_pro != htons(ETH_P_IP)
- || arp->ar_pln != 4 || arp->ar_hln != ETH_ALEN)
+ if (arp->ar_hrd != htons(ARPHRD_ETHER) ||
+ arp->ar_pro != htons(ETH_P_IP) ||
+ arp->ar_pln != 4 || arp->ar_hln != ETH_ALEN)
return NF_ACCEPT;
/* we only want to mangle arp requests and replies */
- if (arp->ar_op != htons(ARPOP_REPLY)
- && arp->ar_op != htons(ARPOP_REQUEST))
+ if (arp->ar_op != htons(ARPOP_REPLY) &&
+ arp->ar_op != htons(ARPOP_REQUEST))
return NF_ACCEPT;
payload = (void *)(arp+1);
- /* if there is no clusterip configuration for the arp reply's
+ /* if there is no clusterip configuration for the arp reply's
* source ip, we don't want to mangle it */
- c = clusterip_config_find_get(payload->src_ip, 0);
+ c = clusterip_config_find_get(net, payload->src_ip, 0);
if (!c)
return NF_ACCEPT;
- /* normally the linux kernel always replies to arp queries of
+ /* normally the linux kernel always replies to arp queries of
* addresses on different interfacs. However, in the CLUSTERIP case
* this wouldn't work, since we didn't subscribe the mcast group on
* other interfaces */
if (c->dev != out) {
- DEBUGP("CLUSTERIP: not mangling arp reply on different "
- "interface: cip'%s'-skb'%s'\n", c->dev->name, out->name);
+ pr_debug("not mangling arp reply on different "
+ "interface: cip'%s'-skb'%s'\n",
+ c->dev->name, out->name);
clusterip_config_put(c);
return NF_ACCEPT;
}
@@ -562,8 +540,8 @@ arp_mangle(unsigned int hook,
/* mangle reply hardware address */
memcpy(payload->src_hw, c->clustermac, arp->ar_hln);
-#ifdef CLUSTERIP_DEBUG
- DEBUGP(KERN_DEBUG "CLUSTERIP mangled arp reply: ");
+#ifdef DEBUG
+ pr_debug("mangled arp reply: ");
arp_print(payload);
#endif
@@ -572,15 +550,15 @@ arp_mangle(unsigned int hook,
return NF_ACCEPT;
}
-static struct nf_hook_ops cip_arp_ops = {
+static struct nf_hook_ops cip_arp_ops __read_mostly = {
.hook = arp_mangle,
- .pf = NF_ARP,
+ .pf = NFPROTO_ARP,
.hooknum = NF_ARP_OUT,
.priority = -1
};
-/***********************************************************************
- * PROC DIR HANDLING
+/***********************************************************************
+ * PROC DIR HANDLING
***********************************************************************/
#ifdef CONFIG_PROC_FS
@@ -594,8 +572,7 @@ struct clusterip_seq_position {
static void *clusterip_seq_start(struct seq_file *s, loff_t *pos)
{
- struct proc_dir_entry *pde = s->private;
- struct clusterip_config *c = pde->data;
+ struct clusterip_config *c = s->private;
unsigned int weight;
u_int32_t local_nodes;
struct clusterip_seq_position *idx;
@@ -621,7 +598,7 @@ static void *clusterip_seq_start(struct seq_file *s, loff_t *pos)
static void *clusterip_seq_next(struct seq_file *s, void *v, loff_t *pos)
{
- struct clusterip_seq_position *idx = (struct clusterip_seq_position *)v;
+ struct clusterip_seq_position *idx = v;
*pos = ++idx->pos;
if (*pos >= idx->weight) {
@@ -635,14 +612,15 @@ static void *clusterip_seq_next(struct seq_file *s, void *v, loff_t *pos)
static void clusterip_seq_stop(struct seq_file *s, void *v)
{
- kfree(v);
+ if (!IS_ERR(v))
+ kfree(v);
}
static int clusterip_seq_show(struct seq_file *s, void *v)
{
- struct clusterip_seq_position *idx = (struct clusterip_seq_position *)v;
+ struct clusterip_seq_position *idx = v;
- if (idx->pos != 0)
+ if (idx->pos != 0)
seq_putc(s, ',');
seq_printf(s, "%u", idx->bit);
@@ -653,7 +631,7 @@ static int clusterip_seq_show(struct seq_file *s, void *v)
return 0;
}
-static struct seq_operations clusterip_seq_ops = {
+static const struct seq_operations clusterip_seq_ops = {
.start = clusterip_seq_start,
.next = clusterip_seq_next,
.stop = clusterip_seq_stop,
@@ -666,10 +644,9 @@ static int clusterip_proc_open(struct inode *inode, struct file *file)
if (!ret) {
struct seq_file *sf = file->private_data;
- struct proc_dir_entry *pde = PDE(inode);
- struct clusterip_config *c = pde->data;
+ struct clusterip_config *c = PDE_DATA(inode);
- sf->private = pde;
+ sf->private = c;
clusterip_config_get(c);
}
@@ -679,8 +656,7 @@ static int clusterip_proc_open(struct inode *inode, struct file *file)
static int clusterip_proc_release(struct inode *inode, struct file *file)
{
- struct proc_dir_entry *pde = PDE(inode);
- struct clusterip_config *c = pde->data;
+ struct clusterip_config *c = PDE_DATA(inode);
int ret;
ret = seq_release(inode, file);
@@ -694,21 +670,28 @@ static int clusterip_proc_release(struct inode *inode, struct file *file)
static ssize_t clusterip_proc_write(struct file *file, const char __user *input,
size_t size, loff_t *ofs)
{
+ struct clusterip_config *c = PDE_DATA(file_inode(file));
#define PROC_WRITELEN 10
char buffer[PROC_WRITELEN+1];
- struct proc_dir_entry *pde = PDE(file->f_dentry->d_inode);
- struct clusterip_config *c = pde->data;
unsigned long nodenum;
+ int rc;
- if (copy_from_user(buffer, input, PROC_WRITELEN))
+ if (size > PROC_WRITELEN)
+ return -EIO;
+ if (copy_from_user(buffer, input, size))
return -EFAULT;
+ buffer[size] = 0;
if (*buffer == '+') {
- nodenum = simple_strtoul(buffer+1, NULL, 10);
+ rc = kstrtoul(buffer+1, 10, &nodenum);
+ if (rc)
+ return rc;
if (clusterip_add_node(c, nodenum))
return -ENOMEM;
} else if (*buffer == '-') {
- nodenum = simple_strtoul(buffer+1, NULL,10);
+ rc = kstrtoul(buffer+1, 10, &nodenum);
+ if (rc)
+ return rc;
if (clusterip_del_node(c, nodenum))
return -ENOENT;
} else
@@ -717,7 +700,7 @@ static ssize_t clusterip_proc_write(struct file *file, const char __user *input,
return size;
}
-static struct file_operations clusterip_proc_fops = {
+static const struct file_operations clusterip_proc_fops = {
.owner = THIS_MODULE,
.open = clusterip_proc_open,
.read = seq_read,
@@ -728,60 +711,79 @@ static struct file_operations clusterip_proc_fops = {
#endif /* CONFIG_PROC_FS */
-static int init_or_cleanup(int fini)
+static int clusterip_net_init(struct net *net)
{
- int ret;
+ struct clusterip_net *cn = net_generic(net, clusterip_net_id);
- if (fini)
- goto cleanup;
+ INIT_LIST_HEAD(&cn->configs);
- if (ipt_register_target(&clusterip_tgt)) {
- ret = -EINVAL;
- goto cleanup_none;
- }
-
- if (nf_register_hook(&cip_arp_ops) < 0) {
- ret = -EINVAL;
- goto cleanup_target;
- }
+ spin_lock_init(&cn->lock);
#ifdef CONFIG_PROC_FS
- clusterip_procdir = proc_mkdir("ipt_CLUSTERIP", proc_net);
- if (!clusterip_procdir) {
- printk(KERN_ERR "CLUSTERIP: Unable to proc dir entry\n");
- ret = -ENOMEM;
- goto cleanup_hook;
+ cn->procdir = proc_mkdir("ipt_CLUSTERIP", net->proc_net);
+ if (!cn->procdir) {
+ pr_err("Unable to proc dir entry\n");
+ return -ENOMEM;
}
#endif /* CONFIG_PROC_FS */
- printk(KERN_NOTICE "ClusterIP Version %s loaded successfully\n",
- CLUSTERIP_VERSION);
-
return 0;
+}
-cleanup:
- printk(KERN_NOTICE "ClusterIP Version %s unloading\n",
- CLUSTERIP_VERSION);
+static void clusterip_net_exit(struct net *net)
+{
#ifdef CONFIG_PROC_FS
- remove_proc_entry(clusterip_procdir->name, clusterip_procdir->parent);
+ struct clusterip_net *cn = net_generic(net, clusterip_net_id);
+ proc_remove(cn->procdir);
#endif
-cleanup_hook:
- nf_unregister_hook(&cip_arp_ops);
-cleanup_target:
- ipt_unregister_target(&clusterip_tgt);
-cleanup_none:
- return -EINVAL;
}
-static int __init init(void)
+static struct pernet_operations clusterip_net_ops = {
+ .init = clusterip_net_init,
+ .exit = clusterip_net_exit,
+ .id = &clusterip_net_id,
+ .size = sizeof(struct clusterip_net),
+};
+
+static int __init clusterip_tg_init(void)
{
- return init_or_cleanup(0);
+ int ret;
+
+ ret = register_pernet_subsys(&clusterip_net_ops);
+ if (ret < 0)
+ return ret;
+
+ ret = xt_register_target(&clusterip_tg_reg);
+ if (ret < 0)
+ goto cleanup_subsys;
+
+ ret = nf_register_hook(&cip_arp_ops);
+ if (ret < 0)
+ goto cleanup_target;
+
+ pr_info("ClusterIP Version %s loaded successfully\n",
+ CLUSTERIP_VERSION);
+
+ return 0;
+
+cleanup_target:
+ xt_unregister_target(&clusterip_tg_reg);
+cleanup_subsys:
+ unregister_pernet_subsys(&clusterip_net_ops);
+ return ret;
}
-static void __exit fini(void)
+static void __exit clusterip_tg_exit(void)
{
- init_or_cleanup(1);
+ pr_info("ClusterIP Version %s unloading\n", CLUSTERIP_VERSION);
+
+ nf_unregister_hook(&cip_arp_ops);
+ xt_unregister_target(&clusterip_tg_reg);
+ unregister_pernet_subsys(&clusterip_net_ops);
+
+ /* Wait for completion of call_rcu_bh()'s (clusterip_config_rcu_free) */
+ rcu_barrier_bh();
}
-module_init(init);
-module_exit(fini);
+module_init(clusterip_tg_init);
+module_exit(clusterip_tg_exit);
diff --git a/net/ipv4/netfilter/ipt_DSCP.c b/net/ipv4/netfilter/ipt_DSCP.c
deleted file mode 100644
index 898cdf79ce1..00000000000
--- a/net/ipv4/netfilter/ipt_DSCP.c
+++ /dev/null
@@ -1,105 +0,0 @@
-/* iptables module for setting the IPv4 DSCP field, Version 1.8
- *
- * (C) 2002 by Harald Welte <laforge@netfilter.org>
- * based on ipt_FTOS.c (C) 2000 by Matthew G. Marsh <mgm@paktronix.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * See RFC2474 for a description of the DSCP field within the IP Header.
- *
- * ipt_DSCP.c,v 1.8 2002/08/06 18:41:57 laforge Exp
-*/
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/ip.h>
-#include <net/checksum.h>
-
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ipt_DSCP.h>
-
-MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
-MODULE_DESCRIPTION("iptables DSCP modification module");
-MODULE_LICENSE("GPL");
-
-static unsigned int
-target(struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- unsigned int hooknum,
- const void *targinfo,
- void *userinfo)
-{
- const struct ipt_DSCP_info *dinfo = targinfo;
- u_int8_t sh_dscp = ((dinfo->dscp << IPT_DSCP_SHIFT) & IPT_DSCP_MASK);
-
-
- if (((*pskb)->nh.iph->tos & IPT_DSCP_MASK) != sh_dscp) {
- u_int16_t diffs[2];
-
- if (!skb_make_writable(pskb, sizeof(struct iphdr)))
- return NF_DROP;
-
- diffs[0] = htons((*pskb)->nh.iph->tos) ^ 0xFFFF;
- (*pskb)->nh.iph->tos = ((*pskb)->nh.iph->tos & ~IPT_DSCP_MASK)
- | sh_dscp;
- diffs[1] = htons((*pskb)->nh.iph->tos);
- (*pskb)->nh.iph->check
- = csum_fold(csum_partial((char *)diffs,
- sizeof(diffs),
- (*pskb)->nh.iph->check
- ^ 0xFFFF));
- }
- return IPT_CONTINUE;
-}
-
-static int
-checkentry(const char *tablename,
- const void *e_void,
- void *targinfo,
- unsigned int targinfosize,
- unsigned int hook_mask)
-{
- const u_int8_t dscp = ((struct ipt_DSCP_info *)targinfo)->dscp;
-
- if (targinfosize != IPT_ALIGN(sizeof(struct ipt_DSCP_info))) {
- printk(KERN_WARNING "DSCP: targinfosize %u != %Zu\n",
- targinfosize,
- IPT_ALIGN(sizeof(struct ipt_DSCP_info)));
- return 0;
- }
-
- if (strcmp(tablename, "mangle") != 0) {
- printk(KERN_WARNING "DSCP: can only be called from \"mangle\" table, not \"%s\"\n", tablename);
- return 0;
- }
-
- if ((dscp > IPT_DSCP_MAX)) {
- printk(KERN_WARNING "DSCP: dscp %x out of range\n", dscp);
- return 0;
- }
-
- return 1;
-}
-
-static struct ipt_target ipt_dscp_reg = {
- .name = "DSCP",
- .target = target,
- .checkentry = checkentry,
- .me = THIS_MODULE,
-};
-
-static int __init init(void)
-{
- return ipt_register_target(&ipt_dscp_reg);
-}
-
-static void __exit fini(void)
-{
- ipt_unregister_target(&ipt_dscp_reg);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c
index 706445426a6..4bf3dc49ad1 100644
--- a/net/ipv4/netfilter/ipt_ECN.c
+++ b/net/ipv4/netfilter/ipt_ECN.c
@@ -1,175 +1,138 @@
/* iptables module for the IPv4 and TCP ECN bits, Version 1.5
*
* (C) 2002 by Harald Welte <laforge@netfilter.org>
- *
+ *
* This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
+ * it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
- *
- * ipt_ECN.c,v 1.5 2002/08/18 19:36:51 laforge Exp
*/
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/in.h>
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/ip.h>
+#include <net/ip.h>
#include <linux/tcp.h>
#include <net/checksum.h>
+#include <linux/netfilter/x_tables.h>
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter_ipv4/ipt_ECN.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
-MODULE_DESCRIPTION("iptables ECN modification module");
+MODULE_DESCRIPTION("Xtables: Explicit Congestion Notification (ECN) flag modification");
/* set ECT codepoint from IP header.
- * return 0 if there was an error. */
-static inline int
-set_ect_ip(struct sk_buff **pskb, const struct ipt_ECN_info *einfo)
+ * return false if there was an error. */
+static inline bool
+set_ect_ip(struct sk_buff *skb, const struct ipt_ECN_info *einfo)
{
- if (((*pskb)->nh.iph->tos & IPT_ECN_IP_MASK)
- != (einfo->ip_ect & IPT_ECN_IP_MASK)) {
- u_int16_t diffs[2];
-
- if (!skb_make_writable(pskb, sizeof(struct iphdr)))
- return 0;
-
- diffs[0] = htons((*pskb)->nh.iph->tos) ^ 0xFFFF;
- (*pskb)->nh.iph->tos &= ~IPT_ECN_IP_MASK;
- (*pskb)->nh.iph->tos |= (einfo->ip_ect & IPT_ECN_IP_MASK);
- diffs[1] = htons((*pskb)->nh.iph->tos);
- (*pskb)->nh.iph->check
- = csum_fold(csum_partial((char *)diffs,
- sizeof(diffs),
- (*pskb)->nh.iph->check
- ^0xFFFF));
- }
- return 1;
+ struct iphdr *iph = ip_hdr(skb);
+
+ if ((iph->tos & IPT_ECN_IP_MASK) != (einfo->ip_ect & IPT_ECN_IP_MASK)) {
+ __u8 oldtos;
+ if (!skb_make_writable(skb, sizeof(struct iphdr)))
+ return false;
+ iph = ip_hdr(skb);
+ oldtos = iph->tos;
+ iph->tos &= ~IPT_ECN_IP_MASK;
+ iph->tos |= (einfo->ip_ect & IPT_ECN_IP_MASK);
+ csum_replace2(&iph->check, htons(oldtos), htons(iph->tos));
+ }
+ return true;
}
-/* Return 0 if there was an error. */
-static inline int
-set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo, int inward)
+/* Return false if there was an error. */
+static inline bool
+set_ect_tcp(struct sk_buff *skb, const struct ipt_ECN_info *einfo)
{
struct tcphdr _tcph, *tcph;
- u_int16_t diffs[2];
+ __be16 oldval;
- /* Not enought header? */
- tcph = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl*4,
- sizeof(_tcph), &_tcph);
+ /* Not enough header? */
+ tcph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph);
if (!tcph)
- return 0;
+ return false;
if ((!(einfo->operation & IPT_ECN_OP_SET_ECE) ||
tcph->ece == einfo->proto.tcp.ece) &&
- ((!(einfo->operation & IPT_ECN_OP_SET_CWR) ||
- tcph->cwr == einfo->proto.tcp.cwr)))
- return 1;
-
- if (!skb_make_writable(pskb, (*pskb)->nh.iph->ihl*4+sizeof(*tcph)))
- return 0;
- tcph = (void *)(*pskb)->nh.iph + (*pskb)->nh.iph->ihl*4;
+ (!(einfo->operation & IPT_ECN_OP_SET_CWR) ||
+ tcph->cwr == einfo->proto.tcp.cwr))
+ return true;
- if ((*pskb)->ip_summed == CHECKSUM_HW &&
- skb_checksum_help(*pskb, inward))
- return 0;
+ if (!skb_make_writable(skb, ip_hdrlen(skb) + sizeof(*tcph)))
+ return false;
+ tcph = (void *)ip_hdr(skb) + ip_hdrlen(skb);
- diffs[0] = ((u_int16_t *)tcph)[6];
+ oldval = ((__be16 *)tcph)[6];
if (einfo->operation & IPT_ECN_OP_SET_ECE)
tcph->ece = einfo->proto.tcp.ece;
if (einfo->operation & IPT_ECN_OP_SET_CWR)
tcph->cwr = einfo->proto.tcp.cwr;
- diffs[1] = ((u_int16_t *)tcph)[6];
- diffs[0] = diffs[0] ^ 0xFFFF;
-
- if ((*pskb)->ip_summed != CHECKSUM_UNNECESSARY)
- tcph->check = csum_fold(csum_partial((char *)diffs,
- sizeof(diffs),
- tcph->check^0xFFFF));
- return 1;
+
+ inet_proto_csum_replace2(&tcph->check, skb,
+ oldval, ((__be16 *)tcph)[6], 0);
+ return true;
}
static unsigned int
-target(struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- unsigned int hooknum,
- const void *targinfo,
- void *userinfo)
+ecn_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
- const struct ipt_ECN_info *einfo = targinfo;
+ const struct ipt_ECN_info *einfo = par->targinfo;
if (einfo->operation & IPT_ECN_OP_SET_IP)
- if (!set_ect_ip(pskb, einfo))
+ if (!set_ect_ip(skb, einfo))
return NF_DROP;
- if (einfo->operation & (IPT_ECN_OP_SET_ECE | IPT_ECN_OP_SET_CWR)
- && (*pskb)->nh.iph->protocol == IPPROTO_TCP)
- if (!set_ect_tcp(pskb, einfo, (out == NULL)))
+ if (einfo->operation & (IPT_ECN_OP_SET_ECE | IPT_ECN_OP_SET_CWR) &&
+ ip_hdr(skb)->protocol == IPPROTO_TCP)
+ if (!set_ect_tcp(skb, einfo))
return NF_DROP;
- return IPT_CONTINUE;
+ return XT_CONTINUE;
}
-static int
-checkentry(const char *tablename,
- const void *e_void,
- void *targinfo,
- unsigned int targinfosize,
- unsigned int hook_mask)
+static int ecn_tg_check(const struct xt_tgchk_param *par)
{
- const struct ipt_ECN_info *einfo = (struct ipt_ECN_info *)targinfo;
- const struct ipt_entry *e = e_void;
-
- if (targinfosize != IPT_ALIGN(sizeof(struct ipt_ECN_info))) {
- printk(KERN_WARNING "ECN: targinfosize %u != %Zu\n",
- targinfosize,
- IPT_ALIGN(sizeof(struct ipt_ECN_info)));
- return 0;
- }
-
- if (strcmp(tablename, "mangle") != 0) {
- printk(KERN_WARNING "ECN: can only be called from \"mangle\" table, not \"%s\"\n", tablename);
- return 0;
- }
+ const struct ipt_ECN_info *einfo = par->targinfo;
+ const struct ipt_entry *e = par->entryinfo;
if (einfo->operation & IPT_ECN_OP_MASK) {
- printk(KERN_WARNING "ECN: unsupported ECN operation %x\n",
- einfo->operation);
- return 0;
+ pr_info("unsupported ECN operation %x\n", einfo->operation);
+ return -EINVAL;
}
if (einfo->ip_ect & ~IPT_ECN_IP_MASK) {
- printk(KERN_WARNING "ECN: new ECT codepoint %x out of mask\n",
- einfo->ip_ect);
- return 0;
+ pr_info("new ECT codepoint %x out of mask\n", einfo->ip_ect);
+ return -EINVAL;
}
-
- if ((einfo->operation & (IPT_ECN_OP_SET_ECE|IPT_ECN_OP_SET_CWR))
- && (e->ip.proto != IPPROTO_TCP || (e->ip.invflags & IPT_INV_PROTO))) {
- printk(KERN_WARNING "ECN: cannot use TCP operations on a "
- "non-tcp rule\n");
- return 0;
+ if ((einfo->operation & (IPT_ECN_OP_SET_ECE|IPT_ECN_OP_SET_CWR)) &&
+ (e->ip.proto != IPPROTO_TCP || (e->ip.invflags & XT_INV_PROTO))) {
+ pr_info("cannot use TCP operations on a non-tcp rule\n");
+ return -EINVAL;
}
-
- return 1;
+ return 0;
}
-static struct ipt_target ipt_ecn_reg = {
+static struct xt_target ecn_tg_reg __read_mostly = {
.name = "ECN",
- .target = target,
- .checkentry = checkentry,
+ .family = NFPROTO_IPV4,
+ .target = ecn_tg,
+ .targetsize = sizeof(struct ipt_ECN_info),
+ .table = "mangle",
+ .checkentry = ecn_tg_check,
.me = THIS_MODULE,
};
-static int __init init(void)
+static int __init ecn_tg_init(void)
{
- return ipt_register_target(&ipt_ecn_reg);
+ return xt_register_target(&ecn_tg_reg);
}
-static void __exit fini(void)
+static void __exit ecn_tg_exit(void)
{
- ipt_unregister_target(&ipt_ecn_reg);
+ xt_unregister_target(&ecn_tg_reg);
}
-module_init(init);
-module_exit(fini);
+module_init(ecn_tg_init);
+module_exit(ecn_tg_exit);
diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c
deleted file mode 100644
index 6606ddb66a2..00000000000
--- a/net/ipv4/netfilter/ipt_LOG.c
+++ /dev/null
@@ -1,495 +0,0 @@
-/*
- * This is a module which is used for logging packets.
- */
-
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/spinlock.h>
-#include <linux/skbuff.h>
-#include <linux/ip.h>
-#include <net/icmp.h>
-#include <net/udp.h>
-#include <net/tcp.h>
-#include <net/route.h>
-
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ipt_LOG.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
-MODULE_DESCRIPTION("iptables syslog logging module");
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-/* Use lock to serialize, so printks don't overlap */
-static DEFINE_SPINLOCK(log_lock);
-
-/* One level of recursion won't kill us */
-static void dump_packet(const struct nf_loginfo *info,
- const struct sk_buff *skb,
- unsigned int iphoff)
-{
- struct iphdr _iph, *ih;
- unsigned int logflags;
-
- if (info->type == NF_LOG_TYPE_LOG)
- logflags = info->u.log.logflags;
- else
- logflags = NF_LOG_MASK;
-
- ih = skb_header_pointer(skb, iphoff, sizeof(_iph), &_iph);
- if (ih == NULL) {
- printk("TRUNCATED");
- return;
- }
-
- /* Important fields:
- * TOS, len, DF/MF, fragment offset, TTL, src, dst, options. */
- /* Max length: 40 "SRC=255.255.255.255 DST=255.255.255.255 " */
- printk("SRC=%u.%u.%u.%u DST=%u.%u.%u.%u ",
- NIPQUAD(ih->saddr), NIPQUAD(ih->daddr));
-
- /* Max length: 46 "LEN=65535 TOS=0xFF PREC=0xFF TTL=255 ID=65535 " */
- printk("LEN=%u TOS=0x%02X PREC=0x%02X TTL=%u ID=%u ",
- ntohs(ih->tot_len), ih->tos & IPTOS_TOS_MASK,
- ih->tos & IPTOS_PREC_MASK, ih->ttl, ntohs(ih->id));
-
- /* Max length: 6 "CE DF MF " */
- if (ntohs(ih->frag_off) & IP_CE)
- printk("CE ");
- if (ntohs(ih->frag_off) & IP_DF)
- printk("DF ");
- if (ntohs(ih->frag_off) & IP_MF)
- printk("MF ");
-
- /* Max length: 11 "FRAG:65535 " */
- if (ntohs(ih->frag_off) & IP_OFFSET)
- printk("FRAG:%u ", ntohs(ih->frag_off) & IP_OFFSET);
-
- if ((logflags & IPT_LOG_IPOPT)
- && ih->ihl * 4 > sizeof(struct iphdr)) {
- unsigned char _opt[4 * 15 - sizeof(struct iphdr)], *op;
- unsigned int i, optsize;
-
- optsize = ih->ihl * 4 - sizeof(struct iphdr);
- op = skb_header_pointer(skb, iphoff+sizeof(_iph),
- optsize, _opt);
- if (op == NULL) {
- printk("TRUNCATED");
- return;
- }
-
- /* Max length: 127 "OPT (" 15*4*2chars ") " */
- printk("OPT (");
- for (i = 0; i < optsize; i++)
- printk("%02X", op[i]);
- printk(") ");
- }
-
- switch (ih->protocol) {
- case IPPROTO_TCP: {
- struct tcphdr _tcph, *th;
-
- /* Max length: 10 "PROTO=TCP " */
- printk("PROTO=TCP ");
-
- if (ntohs(ih->frag_off) & IP_OFFSET)
- break;
-
- /* Max length: 25 "INCOMPLETE [65535 bytes] " */
- th = skb_header_pointer(skb, iphoff + ih->ihl * 4,
- sizeof(_tcph), &_tcph);
- if (th == NULL) {
- printk("INCOMPLETE [%u bytes] ",
- skb->len - iphoff - ih->ihl*4);
- break;
- }
-
- /* Max length: 20 "SPT=65535 DPT=65535 " */
- printk("SPT=%u DPT=%u ",
- ntohs(th->source), ntohs(th->dest));
- /* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */
- if (logflags & IPT_LOG_TCPSEQ)
- printk("SEQ=%u ACK=%u ",
- ntohl(th->seq), ntohl(th->ack_seq));
- /* Max length: 13 "WINDOW=65535 " */
- printk("WINDOW=%u ", ntohs(th->window));
- /* Max length: 9 "RES=0x3F " */
- printk("RES=0x%02x ", (u8)(ntohl(tcp_flag_word(th) & TCP_RESERVED_BITS) >> 22));
- /* Max length: 32 "CWR ECE URG ACK PSH RST SYN FIN " */
- if (th->cwr)
- printk("CWR ");
- if (th->ece)
- printk("ECE ");
- if (th->urg)
- printk("URG ");
- if (th->ack)
- printk("ACK ");
- if (th->psh)
- printk("PSH ");
- if (th->rst)
- printk("RST ");
- if (th->syn)
- printk("SYN ");
- if (th->fin)
- printk("FIN ");
- /* Max length: 11 "URGP=65535 " */
- printk("URGP=%u ", ntohs(th->urg_ptr));
-
- if ((logflags & IPT_LOG_TCPOPT)
- && th->doff * 4 > sizeof(struct tcphdr)) {
- unsigned char _opt[4 * 15 - sizeof(struct tcphdr)];
- unsigned char *op;
- unsigned int i, optsize;
-
- optsize = th->doff * 4 - sizeof(struct tcphdr);
- op = skb_header_pointer(skb,
- iphoff+ih->ihl*4+sizeof(_tcph),
- optsize, _opt);
- if (op == NULL) {
- printk("TRUNCATED");
- return;
- }
-
- /* Max length: 127 "OPT (" 15*4*2chars ") " */
- printk("OPT (");
- for (i = 0; i < optsize; i++)
- printk("%02X", op[i]);
- printk(") ");
- }
- break;
- }
- case IPPROTO_UDP: {
- struct udphdr _udph, *uh;
-
- /* Max length: 10 "PROTO=UDP " */
- printk("PROTO=UDP ");
-
- if (ntohs(ih->frag_off) & IP_OFFSET)
- break;
-
- /* Max length: 25 "INCOMPLETE [65535 bytes] " */
- uh = skb_header_pointer(skb, iphoff+ih->ihl*4,
- sizeof(_udph), &_udph);
- if (uh == NULL) {
- printk("INCOMPLETE [%u bytes] ",
- skb->len - iphoff - ih->ihl*4);
- break;
- }
-
- /* Max length: 20 "SPT=65535 DPT=65535 " */
- printk("SPT=%u DPT=%u LEN=%u ",
- ntohs(uh->source), ntohs(uh->dest),
- ntohs(uh->len));
- break;
- }
- case IPPROTO_ICMP: {
- struct icmphdr _icmph, *ich;
- static const size_t required_len[NR_ICMP_TYPES+1]
- = { [ICMP_ECHOREPLY] = 4,
- [ICMP_DEST_UNREACH]
- = 8 + sizeof(struct iphdr),
- [ICMP_SOURCE_QUENCH]
- = 8 + sizeof(struct iphdr),
- [ICMP_REDIRECT]
- = 8 + sizeof(struct iphdr),
- [ICMP_ECHO] = 4,
- [ICMP_TIME_EXCEEDED]
- = 8 + sizeof(struct iphdr),
- [ICMP_PARAMETERPROB]
- = 8 + sizeof(struct iphdr),
- [ICMP_TIMESTAMP] = 20,
- [ICMP_TIMESTAMPREPLY] = 20,
- [ICMP_ADDRESS] = 12,
- [ICMP_ADDRESSREPLY] = 12 };
-
- /* Max length: 11 "PROTO=ICMP " */
- printk("PROTO=ICMP ");
-
- if (ntohs(ih->frag_off) & IP_OFFSET)
- break;
-
- /* Max length: 25 "INCOMPLETE [65535 bytes] " */
- ich = skb_header_pointer(skb, iphoff + ih->ihl * 4,
- sizeof(_icmph), &_icmph);
- if (ich == NULL) {
- printk("INCOMPLETE [%u bytes] ",
- skb->len - iphoff - ih->ihl*4);
- break;
- }
-
- /* Max length: 18 "TYPE=255 CODE=255 " */
- printk("TYPE=%u CODE=%u ", ich->type, ich->code);
-
- /* Max length: 25 "INCOMPLETE [65535 bytes] " */
- if (ich->type <= NR_ICMP_TYPES
- && required_len[ich->type]
- && skb->len-iphoff-ih->ihl*4 < required_len[ich->type]) {
- printk("INCOMPLETE [%u bytes] ",
- skb->len - iphoff - ih->ihl*4);
- break;
- }
-
- switch (ich->type) {
- case ICMP_ECHOREPLY:
- case ICMP_ECHO:
- /* Max length: 19 "ID=65535 SEQ=65535 " */
- printk("ID=%u SEQ=%u ",
- ntohs(ich->un.echo.id),
- ntohs(ich->un.echo.sequence));
- break;
-
- case ICMP_PARAMETERPROB:
- /* Max length: 14 "PARAMETER=255 " */
- printk("PARAMETER=%u ",
- ntohl(ich->un.gateway) >> 24);
- break;
- case ICMP_REDIRECT:
- /* Max length: 24 "GATEWAY=255.255.255.255 " */
- printk("GATEWAY=%u.%u.%u.%u ",
- NIPQUAD(ich->un.gateway));
- /* Fall through */
- case ICMP_DEST_UNREACH:
- case ICMP_SOURCE_QUENCH:
- case ICMP_TIME_EXCEEDED:
- /* Max length: 3+maxlen */
- if (!iphoff) { /* Only recurse once. */
- printk("[");
- dump_packet(info, skb,
- iphoff + ih->ihl*4+sizeof(_icmph));
- printk("] ");
- }
-
- /* Max length: 10 "MTU=65535 " */
- if (ich->type == ICMP_DEST_UNREACH
- && ich->code == ICMP_FRAG_NEEDED)
- printk("MTU=%u ", ntohs(ich->un.frag.mtu));
- }
- break;
- }
- /* Max Length */
- case IPPROTO_AH: {
- struct ip_auth_hdr _ahdr, *ah;
-
- if (ntohs(ih->frag_off) & IP_OFFSET)
- break;
-
- /* Max length: 9 "PROTO=AH " */
- printk("PROTO=AH ");
-
- /* Max length: 25 "INCOMPLETE [65535 bytes] " */
- ah = skb_header_pointer(skb, iphoff+ih->ihl*4,
- sizeof(_ahdr), &_ahdr);
- if (ah == NULL) {
- printk("INCOMPLETE [%u bytes] ",
- skb->len - iphoff - ih->ihl*4);
- break;
- }
-
- /* Length: 15 "SPI=0xF1234567 " */
- printk("SPI=0x%x ", ntohl(ah->spi));
- break;
- }
- case IPPROTO_ESP: {
- struct ip_esp_hdr _esph, *eh;
-
- /* Max length: 10 "PROTO=ESP " */
- printk("PROTO=ESP ");
-
- if (ntohs(ih->frag_off) & IP_OFFSET)
- break;
-
- /* Max length: 25 "INCOMPLETE [65535 bytes] " */
- eh = skb_header_pointer(skb, iphoff+ih->ihl*4,
- sizeof(_esph), &_esph);
- if (eh == NULL) {
- printk("INCOMPLETE [%u bytes] ",
- skb->len - iphoff - ih->ihl*4);
- break;
- }
-
- /* Length: 15 "SPI=0xF1234567 " */
- printk("SPI=0x%x ", ntohl(eh->spi));
- break;
- }
- /* Max length: 10 "PROTO 255 " */
- default:
- printk("PROTO=%u ", ih->protocol);
- }
-
- /* Max length: 15 "UID=4294967295 " */
- if ((logflags & IPT_LOG_UID) && !iphoff && skb->sk) {
- read_lock_bh(&skb->sk->sk_callback_lock);
- if (skb->sk->sk_socket && skb->sk->sk_socket->file)
- printk("UID=%u ", skb->sk->sk_socket->file->f_uid);
- read_unlock_bh(&skb->sk->sk_callback_lock);
- }
-
- /* Proto Max log string length */
- /* IP: 40+46+6+11+127 = 230 */
- /* TCP: 10+max(25,20+30+13+9+32+11+127) = 252 */
- /* UDP: 10+max(25,20) = 35 */
- /* ICMP: 11+max(25, 18+25+max(19,14,24+3+n+10,3+n+10)) = 91+n */
- /* ESP: 10+max(25)+15 = 50 */
- /* AH: 9+max(25)+15 = 49 */
- /* unknown: 10 */
-
- /* (ICMP allows recursion one level deep) */
- /* maxlen = IP + ICMP + IP + max(TCP,UDP,ICMP,unknown) */
- /* maxlen = 230+ 91 + 230 + 252 = 803 */
-}
-
-static struct nf_loginfo default_loginfo = {
- .type = NF_LOG_TYPE_LOG,
- .u = {
- .log = {
- .level = 0,
- .logflags = NF_LOG_MASK,
- },
- },
-};
-
-static void
-ipt_log_packet(unsigned int pf,
- unsigned int hooknum,
- const struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- const struct nf_loginfo *loginfo,
- const char *prefix)
-{
- if (!loginfo)
- loginfo = &default_loginfo;
-
- spin_lock_bh(&log_lock);
- printk("<%d>%sIN=%s OUT=%s ", loginfo->u.log.level,
- prefix,
- in ? in->name : "",
- out ? out->name : "");
-#ifdef CONFIG_BRIDGE_NETFILTER
- if (skb->nf_bridge) {
- struct net_device *physindev = skb->nf_bridge->physindev;
- struct net_device *physoutdev = skb->nf_bridge->physoutdev;
-
- if (physindev && in != physindev)
- printk("PHYSIN=%s ", physindev->name);
- if (physoutdev && out != physoutdev)
- printk("PHYSOUT=%s ", physoutdev->name);
- }
-#endif
-
- if (in && !out) {
- /* MAC logging for input chain only. */
- printk("MAC=");
- if (skb->dev && skb->dev->hard_header_len
- && skb->mac.raw != (void*)skb->nh.iph) {
- int i;
- unsigned char *p = skb->mac.raw;
- for (i = 0; i < skb->dev->hard_header_len; i++,p++)
- printk("%02x%c", *p,
- i==skb->dev->hard_header_len - 1
- ? ' ':':');
- } else
- printk(" ");
- }
-
- dump_packet(loginfo, skb, 0);
- printk("\n");
- spin_unlock_bh(&log_lock);
-}
-
-static unsigned int
-ipt_log_target(struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- unsigned int hooknum,
- const void *targinfo,
- void *userinfo)
-{
- const struct ipt_log_info *loginfo = targinfo;
- struct nf_loginfo li;
-
- li.type = NF_LOG_TYPE_LOG;
- li.u.log.level = loginfo->level;
- li.u.log.logflags = loginfo->logflags;
-
- nf_log_packet(PF_INET, hooknum, *pskb, in, out, &li, loginfo->prefix);
-
- return IPT_CONTINUE;
-}
-
-static int ipt_log_checkentry(const char *tablename,
- const void *e,
- void *targinfo,
- unsigned int targinfosize,
- unsigned int hook_mask)
-{
- const struct ipt_log_info *loginfo = targinfo;
-
- if (targinfosize != IPT_ALIGN(sizeof(struct ipt_log_info))) {
- DEBUGP("LOG: targinfosize %u != %u\n",
- targinfosize, IPT_ALIGN(sizeof(struct ipt_log_info)));
- return 0;
- }
-
- if (loginfo->level >= 8) {
- DEBUGP("LOG: level %u >= 8\n", loginfo->level);
- return 0;
- }
-
- if (loginfo->prefix[sizeof(loginfo->prefix)-1] != '\0') {
- DEBUGP("LOG: prefix term %i\n",
- loginfo->prefix[sizeof(loginfo->prefix)-1]);
- return 0;
- }
-
- return 1;
-}
-
-static struct ipt_target ipt_log_reg = {
- .name = "LOG",
- .target = ipt_log_target,
- .checkentry = ipt_log_checkentry,
- .me = THIS_MODULE,
-};
-
-static struct nf_logger ipt_log_logger ={
- .name = "ipt_LOG",
- .logfn = &ipt_log_packet,
- .me = THIS_MODULE,
-};
-
-static int __init init(void)
-{
- if (ipt_register_target(&ipt_log_reg))
- return -EINVAL;
- if (nf_log_register(PF_INET, &ipt_log_logger) < 0) {
- printk(KERN_WARNING "ipt_LOG: not logging via system console "
- "since somebody else already registered for PF_INET\n");
- /* we cannot make module load fail here, since otherwise
- * iptables userspace would abort */
- }
-
- return 0;
-}
-
-static void __exit fini(void)
-{
- nf_log_unregister_logger(&ipt_log_logger);
- ipt_unregister_target(&ipt_log_reg);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 12c56d3343c..00352ce0f0d 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -2,14 +2,13 @@
(depending on route). */
/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
-
-#include <linux/config.h>
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/types.h>
#include <linux/inetdevice.h>
#include <linux/ip.h>
@@ -21,130 +20,105 @@
#include <net/checksum.h>
#include <net/route.h>
#include <linux/netfilter_ipv4.h>
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter/x_tables.h>
+#include <net/netfilter/nf_nat.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
-MODULE_DESCRIPTION("iptables MASQUERADE target module");
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-/* Lock protects masq region inside conntrack */
-static DEFINE_RWLOCK(masq_lock);
+MODULE_DESCRIPTION("Xtables: automatic-address SNAT");
/* FIXME: Multiple targets. --RR */
-static int
-masquerade_check(const char *tablename,
- const void *e,
- void *targinfo,
- unsigned int targinfosize,
- unsigned int hook_mask)
+static int masquerade_tg_check(const struct xt_tgchk_param *par)
{
- const struct ip_nat_multi_range_compat *mr = targinfo;
+ const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo;
- if (strcmp(tablename, "nat") != 0) {
- DEBUGP("masquerade_check: bad table `%s'.\n", tablename);
- return 0;
- }
- if (targinfosize != IPT_ALIGN(sizeof(*mr))) {
- DEBUGP("masquerade_check: size %u != %u.\n",
- targinfosize, sizeof(*mr));
- return 0;
- }
- if (hook_mask & ~(1 << NF_IP_POST_ROUTING)) {
- DEBUGP("masquerade_check: bad hooks %x.\n", hook_mask);
- return 0;
- }
- if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) {
- DEBUGP("masquerade_check: bad MAP_IPS.\n");
- return 0;
+ if (mr->range[0].flags & NF_NAT_RANGE_MAP_IPS) {
+ pr_debug("bad MAP_IPS.\n");
+ return -EINVAL;
}
if (mr->rangesize != 1) {
- DEBUGP("masquerade_check: bad rangesize %u.\n", mr->rangesize);
- return 0;
+ pr_debug("bad rangesize %u\n", mr->rangesize);
+ return -EINVAL;
}
- return 1;
+ return 0;
}
static unsigned int
-masquerade_target(struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- unsigned int hooknum,
- const void *targinfo,
- void *userinfo)
+masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
- struct ip_conntrack *ct;
+ struct nf_conn *ct;
+ struct nf_conn_nat *nat;
enum ip_conntrack_info ctinfo;
- const struct ip_nat_multi_range_compat *mr;
- struct ip_nat_range newrange;
- struct rtable *rt;
- u_int32_t newsrc;
+ struct nf_nat_range newrange;
+ const struct nf_nat_ipv4_multi_range_compat *mr;
+ const struct rtable *rt;
+ __be32 newsrc, nh;
+
+ NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING);
- IP_NF_ASSERT(hooknum == NF_IP_POST_ROUTING);
+ ct = nf_ct_get(skb, &ctinfo);
+ nat = nfct_nat(ct);
- ct = ip_conntrack_get(*pskb, &ctinfo);
- IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED
- || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY));
+ NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
+ ctinfo == IP_CT_RELATED_REPLY));
/* Source address is 0.0.0.0 - locally generated packet that is
* probably not supposed to be masqueraded.
*/
- if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip == 0)
+ if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip == 0)
return NF_ACCEPT;
- mr = targinfo;
- rt = (struct rtable *)(*pskb)->dst;
- newsrc = inet_select_addr(out, rt->rt_gateway, RT_SCOPE_UNIVERSE);
+ mr = par->targinfo;
+ rt = skb_rtable(skb);
+ nh = rt_nexthop(rt, ip_hdr(skb)->daddr);
+ newsrc = inet_select_addr(par->out, nh, RT_SCOPE_UNIVERSE);
if (!newsrc) {
- printk("MASQUERADE: %s ate my IP address\n", out->name);
+ pr_info("%s ate my IP address\n", par->out->name);
return NF_DROP;
}
- write_lock_bh(&masq_lock);
- ct->nat.masq_index = out->ifindex;
- write_unlock_bh(&masq_lock);
+ nat->masq_index = par->out->ifindex;
/* Transfer from original range. */
- newrange = ((struct ip_nat_range)
- { mr->range[0].flags | IP_NAT_RANGE_MAP_IPS,
- newsrc, newsrc,
- mr->range[0].min, mr->range[0].max });
+ memset(&newrange.min_addr, 0, sizeof(newrange.min_addr));
+ memset(&newrange.max_addr, 0, sizeof(newrange.max_addr));
+ newrange.flags = mr->range[0].flags | NF_NAT_RANGE_MAP_IPS;
+ newrange.min_addr.ip = newsrc;
+ newrange.max_addr.ip = newsrc;
+ newrange.min_proto = mr->range[0].min;
+ newrange.max_proto = mr->range[0].max;
/* Hand modified range to generic setup. */
- return ip_nat_setup_info(ct, &newrange, hooknum);
+ return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
}
-static inline int
-device_cmp(struct ip_conntrack *i, void *ifindex)
+static int
+device_cmp(struct nf_conn *i, void *ifindex)
{
- int ret;
+ const struct nf_conn_nat *nat = nfct_nat(i);
- read_lock_bh(&masq_lock);
- ret = (i->nat.masq_index == (int)(long)ifindex);
- read_unlock_bh(&masq_lock);
-
- return ret;
+ if (!nat)
+ return 0;
+ if (nf_ct_l3num(i) != NFPROTO_IPV4)
+ return 0;
+ return nat->masq_index == (int)(long)ifindex;
}
static int masq_device_event(struct notifier_block *this,
unsigned long event,
void *ptr)
{
- struct net_device *dev = ptr;
+ const struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct net *net = dev_net(dev);
if (event == NETDEV_DOWN) {
/* Device was downed. Search entire table for
conntracks which were associated with that device,
and forget them. */
- IP_NF_ASSERT(dev->ifindex != 0);
+ NF_CT_ASSERT(dev->ifindex != 0);
- ip_ct_iterate_cleanup(device_cmp, (void *)(long)dev->ifindex);
+ nf_ct_iterate_cleanup(net, device_cmp,
+ (void *)(long)dev->ifindex, 0, 0);
}
return NOTIFY_DONE;
@@ -155,17 +129,10 @@ static int masq_inet_event(struct notifier_block *this,
void *ptr)
{
struct net_device *dev = ((struct in_ifaddr *)ptr)->ifa_dev->dev;
+ struct netdev_notifier_info info;
- if (event == NETDEV_DOWN) {
- /* IP address was deleted. Search entire table for
- conntracks which were associated with that device,
- and forget them. */
- IP_NF_ASSERT(dev->ifindex != 0);
-
- ip_ct_iterate_cleanup(device_cmp, (void *)(long)dev->ifindex);
- }
-
- return NOTIFY_DONE;
+ netdev_notifier_info_init(&info, dev);
+ return masq_device_event(this, event, &info);
}
static struct notifier_block masq_dev_notifier = {
@@ -176,18 +143,22 @@ static struct notifier_block masq_inet_notifier = {
.notifier_call = masq_inet_event,
};
-static struct ipt_target masquerade = {
+static struct xt_target masquerade_tg_reg __read_mostly = {
.name = "MASQUERADE",
- .target = masquerade_target,
- .checkentry = masquerade_check,
+ .family = NFPROTO_IPV4,
+ .target = masquerade_tg,
+ .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat),
+ .table = "nat",
+ .hooks = 1 << NF_INET_POST_ROUTING,
+ .checkentry = masquerade_tg_check,
.me = THIS_MODULE,
};
-static int __init init(void)
+static int __init masquerade_tg_init(void)
{
int ret;
- ret = ipt_register_target(&masquerade);
+ ret = xt_register_target(&masquerade_tg_reg);
if (ret == 0) {
/* Register for device down reports */
@@ -199,12 +170,12 @@ static int __init init(void)
return ret;
}
-static void __exit fini(void)
+static void __exit masquerade_tg_exit(void)
{
- ipt_unregister_target(&masquerade);
+ xt_unregister_target(&masquerade_tg_reg);
unregister_netdevice_notifier(&masq_dev_notifier);
- unregister_inetaddr_notifier(&masq_inet_notifier);
+ unregister_inetaddr_notifier(&masq_inet_notifier);
}
-module_init(init);
-module_exit(fini);
+module_init(masquerade_tg_init);
+module_exit(masquerade_tg_exit);
diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c
deleted file mode 100644
index b074467fe67..00000000000
--- a/net/ipv4/netfilter/ipt_NETMAP.c
+++ /dev/null
@@ -1,119 +0,0 @@
-/* NETMAP - static NAT mapping of IP network addresses (1:1).
- * The mapping can be applied to source (POSTROUTING),
- * destination (PREROUTING), or both (with separate rules).
- */
-
-/* (C) 2000-2001 Svenning Soerensen <svenning@post5.tele.dk>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/config.h>
-#include <linux/ip.h>
-#include <linux/module.h>
-#include <linux/netdevice.h>
-#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-
-#define MODULENAME "NETMAP"
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Svenning Soerensen <svenning@post5.tele.dk>");
-MODULE_DESCRIPTION("iptables 1:1 NAT mapping of IP networks target");
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-static int
-check(const char *tablename,
- const void *e,
- void *targinfo,
- unsigned int targinfosize,
- unsigned int hook_mask)
-{
- const struct ip_nat_multi_range_compat *mr = targinfo;
-
- if (strcmp(tablename, "nat") != 0) {
- DEBUGP(MODULENAME":check: bad table `%s'.\n", tablename);
- return 0;
- }
- if (targinfosize != IPT_ALIGN(sizeof(*mr))) {
- DEBUGP(MODULENAME":check: size %u.\n", targinfosize);
- return 0;
- }
- if (hook_mask & ~((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_POST_ROUTING) |
- (1 << NF_IP_LOCAL_OUT))) {
- DEBUGP(MODULENAME":check: bad hooks %x.\n", hook_mask);
- return 0;
- }
- if (!(mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)) {
- DEBUGP(MODULENAME":check: bad MAP_IPS.\n");
- return 0;
- }
- if (mr->rangesize != 1) {
- DEBUGP(MODULENAME":check: bad rangesize %u.\n", mr->rangesize);
- return 0;
- }
- return 1;
-}
-
-static unsigned int
-target(struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- unsigned int hooknum,
- const void *targinfo,
- void *userinfo)
-{
- struct ip_conntrack *ct;
- enum ip_conntrack_info ctinfo;
- u_int32_t new_ip, netmask;
- const struct ip_nat_multi_range_compat *mr = targinfo;
- struct ip_nat_range newrange;
-
- IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING
- || hooknum == NF_IP_POST_ROUTING
- || hooknum == NF_IP_LOCAL_OUT);
- ct = ip_conntrack_get(*pskb, &ctinfo);
-
- netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip);
-
- if (hooknum == NF_IP_PRE_ROUTING || hooknum == NF_IP_LOCAL_OUT)
- new_ip = (*pskb)->nh.iph->daddr & ~netmask;
- else
- new_ip = (*pskb)->nh.iph->saddr & ~netmask;
- new_ip |= mr->range[0].min_ip & netmask;
-
- newrange = ((struct ip_nat_range)
- { mr->range[0].flags | IP_NAT_RANGE_MAP_IPS,
- new_ip, new_ip,
- mr->range[0].min, mr->range[0].max });
-
- /* Hand modified range to generic setup. */
- return ip_nat_setup_info(ct, &newrange, hooknum);
-}
-
-static struct ipt_target target_module = {
- .name = MODULENAME,
- .target = target,
- .checkentry = check,
- .me = THIS_MODULE
-};
-
-static int __init init(void)
-{
- return ipt_register_target(&target_module);
-}
-
-static void __exit fini(void)
-{
- ipt_unregister_target(&target_module);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c
deleted file mode 100644
index 140be51f2f0..00000000000
--- a/net/ipv4/netfilter/ipt_REDIRECT.c
+++ /dev/null
@@ -1,133 +0,0 @@
-/* Redirect. Simple mapping which alters dst to a local IP address. */
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/types.h>
-#include <linux/ip.h>
-#include <linux/timer.h>
-#include <linux/module.h>
-#include <linux/netfilter.h>
-#include <linux/netdevice.h>
-#include <linux/if.h>
-#include <linux/inetdevice.h>
-#include <net/protocol.h>
-#include <net/checksum.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
-MODULE_DESCRIPTION("iptables REDIRECT target module");
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-/* FIXME: Take multiple ranges --RR */
-static int
-redirect_check(const char *tablename,
- const void *e,
- void *targinfo,
- unsigned int targinfosize,
- unsigned int hook_mask)
-{
- const struct ip_nat_multi_range_compat *mr = targinfo;
-
- if (strcmp(tablename, "nat") != 0) {
- DEBUGP("redirect_check: bad table `%s'.\n", table);
- return 0;
- }
- if (targinfosize != IPT_ALIGN(sizeof(*mr))) {
- DEBUGP("redirect_check: size %u.\n", targinfosize);
- return 0;
- }
- if (hook_mask & ~((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_OUT))) {
- DEBUGP("redirect_check: bad hooks %x.\n", hook_mask);
- return 0;
- }
- if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) {
- DEBUGP("redirect_check: bad MAP_IPS.\n");
- return 0;
- }
- if (mr->rangesize != 1) {
- DEBUGP("redirect_check: bad rangesize %u.\n", mr->rangesize);
- return 0;
- }
- return 1;
-}
-
-static unsigned int
-redirect_target(struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- unsigned int hooknum,
- const void *targinfo,
- void *userinfo)
-{
- struct ip_conntrack *ct;
- enum ip_conntrack_info ctinfo;
- u_int32_t newdst;
- const struct ip_nat_multi_range_compat *mr = targinfo;
- struct ip_nat_range newrange;
-
- IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING
- || hooknum == NF_IP_LOCAL_OUT);
-
- ct = ip_conntrack_get(*pskb, &ctinfo);
- IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED));
-
- /* Local packets: make them go to loopback */
- if (hooknum == NF_IP_LOCAL_OUT)
- newdst = htonl(0x7F000001);
- else {
- struct in_device *indev;
- struct in_ifaddr *ifa;
-
- newdst = 0;
-
- rcu_read_lock();
- indev = __in_dev_get_rcu((*pskb)->dev);
- if (indev && (ifa = indev->ifa_list))
- newdst = ifa->ifa_local;
- rcu_read_unlock();
-
- if (!newdst)
- return NF_DROP;
- }
-
- /* Transfer from original range. */
- newrange = ((struct ip_nat_range)
- { mr->range[0].flags | IP_NAT_RANGE_MAP_IPS,
- newdst, newdst,
- mr->range[0].min, mr->range[0].max });
-
- /* Hand modified range to generic setup. */
- return ip_nat_setup_info(ct, &newrange, hooknum);
-}
-
-static struct ipt_target redirect_reg = {
- .name = "REDIRECT",
- .target = redirect_target,
- .checkentry = redirect_check,
- .me = THIS_MODULE,
-};
-
-static int __init init(void)
-{
- return ipt_register_target(&redirect_reg);
-}
-
-static void __exit fini(void)
-{
- ipt_unregister_target(&redirect_reg);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index 3eb47aae78c..5b6e0df4ccf 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -1,7 +1,5 @@
/*
* This is a module which is used for rejecting packets.
- * Added support for customized reject packets (Jozsef Kadlecsik).
- * Added support for ICMP type-3-code-13 (Maciej Soltysiak). [RFC 1812]
*/
/* (C) 1999-2001 Paul `Rusty' Russell
@@ -11,268 +9,56 @@
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
-
-#include <linux/config.h>
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
#include <linux/skbuff.h>
+#include <linux/slab.h>
#include <linux/ip.h>
#include <linux/udp.h>
#include <linux/icmp.h>
#include <net/icmp.h>
-#include <net/ip.h>
-#include <net/tcp.h>
-#include <net/route.h>
-#include <net/dst.h>
+#include <linux/netfilter/x_tables.h>
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter_ipv4/ipt_REJECT.h>
#ifdef CONFIG_BRIDGE_NETFILTER
#include <linux/netfilter_bridge.h>
#endif
+#include <net/netfilter/ipv4/nf_reject.h>
+
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
-MODULE_DESCRIPTION("iptables REJECT target module");
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-static inline struct rtable *route_reverse(struct sk_buff *skb,
- struct tcphdr *tcph, int hook)
-{
- struct iphdr *iph = skb->nh.iph;
- struct dst_entry *odst;
- struct flowi fl = {};
- struct rtable *rt;
-
- /* We don't require ip forwarding to be enabled to be able to
- * send a RST reply for bridged traffic. */
- if (hook != NF_IP_FORWARD
-#ifdef CONFIG_BRIDGE_NETFILTER
- || (skb->nf_bridge && skb->nf_bridge->mask & BRNF_BRIDGED)
-#endif
- ) {
- fl.nl_u.ip4_u.daddr = iph->saddr;
- if (hook == NF_IP_LOCAL_IN)
- fl.nl_u.ip4_u.saddr = iph->daddr;
- fl.nl_u.ip4_u.tos = RT_TOS(iph->tos);
-
- if (ip_route_output_key(&rt, &fl) != 0)
- return NULL;
- } else {
- /* non-local src, find valid iif to satisfy
- * rp-filter when calling ip_route_input. */
- fl.nl_u.ip4_u.daddr = iph->daddr;
- if (ip_route_output_key(&rt, &fl) != 0)
- return NULL;
-
- odst = skb->dst;
- if (ip_route_input(skb, iph->saddr, iph->daddr,
- RT_TOS(iph->tos), rt->u.dst.dev) != 0) {
- dst_release(&rt->u.dst);
- return NULL;
- }
- dst_release(&rt->u.dst);
- rt = (struct rtable *)skb->dst;
- skb->dst = odst;
-
- fl.nl_u.ip4_u.daddr = iph->saddr;
- fl.nl_u.ip4_u.saddr = iph->daddr;
- fl.nl_u.ip4_u.tos = RT_TOS(iph->tos);
- }
-
- if (rt->u.dst.error) {
- dst_release(&rt->u.dst);
- return NULL;
- }
+MODULE_DESCRIPTION("Xtables: packet \"rejection\" target for IPv4");
- fl.proto = IPPROTO_TCP;
- fl.fl_ip_sport = tcph->dest;
- fl.fl_ip_dport = tcph->source;
-
- xfrm_lookup((struct dst_entry **)&rt, &fl, NULL, 0);
-
- return rt;
-}
-
-/* Send RST reply */
-static void send_reset(struct sk_buff *oldskb, int hook)
+static unsigned int
+reject_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
- struct sk_buff *nskb;
- struct iphdr *iph = oldskb->nh.iph;
- struct tcphdr _otcph, *oth, *tcph;
- struct rtable *rt;
- u_int16_t tmp_port;
- u_int32_t tmp_addr;
- unsigned int tcplen;
- int needs_ack;
- int hh_len;
-
- /* IP header checks: fragment. */
- if (oldskb->nh.iph->frag_off & htons(IP_OFFSET))
- return;
-
- oth = skb_header_pointer(oldskb, oldskb->nh.iph->ihl * 4,
- sizeof(_otcph), &_otcph);
- if (oth == NULL)
- return;
-
- /* No RST for RST. */
- if (oth->rst)
- return;
-
- /* Check checksum */
- tcplen = oldskb->len - iph->ihl * 4;
- if (((hook != NF_IP_LOCAL_IN && oldskb->ip_summed != CHECKSUM_HW) ||
- (hook == NF_IP_LOCAL_IN &&
- oldskb->ip_summed != CHECKSUM_UNNECESSARY)) &&
- csum_tcpudp_magic(iph->saddr, iph->daddr, tcplen, IPPROTO_TCP,
- oldskb->ip_summed == CHECKSUM_HW ? oldskb->csum :
- skb_checksum(oldskb, iph->ihl * 4, tcplen, 0)))
- return;
-
- if ((rt = route_reverse(oldskb, oth, hook)) == NULL)
- return;
-
- hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
-
- /* We need a linear, writeable skb. We also need to expand
- headroom in case hh_len of incoming interface < hh_len of
- outgoing interface */
- nskb = skb_copy_expand(oldskb, hh_len, skb_tailroom(oldskb),
- GFP_ATOMIC);
- if (!nskb) {
- dst_release(&rt->u.dst);
- return;
- }
-
- dst_release(nskb->dst);
- nskb->dst = &rt->u.dst;
-
- /* This packet will not be the same as the other: clear nf fields */
- nf_reset(nskb);
- nskb->nfmark = 0;
-#ifdef CONFIG_BRIDGE_NETFILTER
- nf_bridge_put(nskb->nf_bridge);
- nskb->nf_bridge = NULL;
-#endif
+ const struct ipt_reject_info *reject = par->targinfo;
- tcph = (struct tcphdr *)((u_int32_t*)nskb->nh.iph + nskb->nh.iph->ihl);
-
- /* Swap source and dest */
- tmp_addr = nskb->nh.iph->saddr;
- nskb->nh.iph->saddr = nskb->nh.iph->daddr;
- nskb->nh.iph->daddr = tmp_addr;
- tmp_port = tcph->source;
- tcph->source = tcph->dest;
- tcph->dest = tmp_port;
-
- /* Truncate to length (no data) */
- tcph->doff = sizeof(struct tcphdr)/4;
- skb_trim(nskb, nskb->nh.iph->ihl*4 + sizeof(struct tcphdr));
- nskb->nh.iph->tot_len = htons(nskb->len);
-
- if (tcph->ack) {
- needs_ack = 0;
- tcph->seq = oth->ack_seq;
- tcph->ack_seq = 0;
- } else {
- needs_ack = 1;
- tcph->ack_seq = htonl(ntohl(oth->seq) + oth->syn + oth->fin
- + oldskb->len - oldskb->nh.iph->ihl*4
- - (oth->doff<<2));
- tcph->seq = 0;
- }
-
- /* Reset flags */
- ((u_int8_t *)tcph)[13] = 0;
- tcph->rst = 1;
- tcph->ack = needs_ack;
-
- tcph->window = 0;
- tcph->urg_ptr = 0;
-
- /* Adjust TCP checksum */
- tcph->check = 0;
- tcph->check = tcp_v4_check(tcph, sizeof(struct tcphdr),
- nskb->nh.iph->saddr,
- nskb->nh.iph->daddr,
- csum_partial((char *)tcph,
- sizeof(struct tcphdr), 0));
-
- /* Adjust IP TTL, DF */
- nskb->nh.iph->ttl = dst_metric(nskb->dst, RTAX_HOPLIMIT);
- /* Set DF, id = 0 */
- nskb->nh.iph->frag_off = htons(IP_DF);
- nskb->nh.iph->id = 0;
-
- /* Adjust IP checksum */
- nskb->nh.iph->check = 0;
- nskb->nh.iph->check = ip_fast_csum((unsigned char *)nskb->nh.iph,
- nskb->nh.iph->ihl);
-
- /* "Never happens" */
- if (nskb->len > dst_mtu(nskb->dst))
- goto free_nskb;
-
- nf_ct_attach(nskb, oldskb);
-
- NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, nskb, NULL, nskb->dst->dev,
- dst_output);
- return;
-
- free_nskb:
- kfree_skb(nskb);
-}
-
-static inline void send_unreach(struct sk_buff *skb_in, int code)
-{
- icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0);
-}
-
-static unsigned int reject(struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- unsigned int hooknum,
- const void *targinfo,
- void *userinfo)
-{
- const struct ipt_reject_info *reject = targinfo;
-
- /* Our naive response construction doesn't deal with IP
- options, and probably shouldn't try. */
- if ((*pskb)->nh.iph->ihl<<2 != sizeof(struct iphdr))
- return NF_DROP;
-
- /* WARNING: This code causes reentry within iptables.
- This means that the iptables jump stack is now crap. We
- must return an absolute verdict. --RR */
- switch (reject->with) {
- case IPT_ICMP_NET_UNREACHABLE:
- send_unreach(*pskb, ICMP_NET_UNREACH);
- break;
- case IPT_ICMP_HOST_UNREACHABLE:
- send_unreach(*pskb, ICMP_HOST_UNREACH);
- break;
- case IPT_ICMP_PROT_UNREACHABLE:
- send_unreach(*pskb, ICMP_PROT_UNREACH);
- break;
- case IPT_ICMP_PORT_UNREACHABLE:
- send_unreach(*pskb, ICMP_PORT_UNREACH);
- break;
- case IPT_ICMP_NET_PROHIBITED:
- send_unreach(*pskb, ICMP_NET_ANO);
- break;
+ switch (reject->with) {
+ case IPT_ICMP_NET_UNREACHABLE:
+ nf_send_unreach(skb, ICMP_NET_UNREACH);
+ break;
+ case IPT_ICMP_HOST_UNREACHABLE:
+ nf_send_unreach(skb, ICMP_HOST_UNREACH);
+ break;
+ case IPT_ICMP_PROT_UNREACHABLE:
+ nf_send_unreach(skb, ICMP_PROT_UNREACH);
+ break;
+ case IPT_ICMP_PORT_UNREACHABLE:
+ nf_send_unreach(skb, ICMP_PORT_UNREACH);
+ break;
+ case IPT_ICMP_NET_PROHIBITED:
+ nf_send_unreach(skb, ICMP_NET_ANO);
+ break;
case IPT_ICMP_HOST_PROHIBITED:
- send_unreach(*pskb, ICMP_HOST_ANO);
- break;
- case IPT_ICMP_ADMIN_PROHIBITED:
- send_unreach(*pskb, ICMP_PKT_FILTERED);
+ nf_send_unreach(skb, ICMP_HOST_ANO);
+ break;
+ case IPT_ICMP_ADMIN_PROHIBITED:
+ nf_send_unreach(skb, ICMP_PKT_FILTERED);
break;
case IPT_TCP_RESET:
- send_reset(*pskb, hooknum);
+ nf_send_reset(skb, par->hooknum);
case IPT_ICMP_ECHOREPLY:
/* Doesn't happen. */
break;
@@ -281,63 +67,46 @@ static unsigned int reject(struct sk_buff **pskb,
return NF_DROP;
}
-static int check(const char *tablename,
- const void *e_void,
- void *targinfo,
- unsigned int targinfosize,
- unsigned int hook_mask)
+static int reject_tg_check(const struct xt_tgchk_param *par)
{
- const struct ipt_reject_info *rejinfo = targinfo;
- const struct ipt_entry *e = e_void;
-
- if (targinfosize != IPT_ALIGN(sizeof(struct ipt_reject_info))) {
- DEBUGP("REJECT: targinfosize %u != 0\n", targinfosize);
- return 0;
- }
-
- /* Only allow these for packet filtering. */
- if (strcmp(tablename, "filter") != 0) {
- DEBUGP("REJECT: bad table `%s'.\n", tablename);
- return 0;
- }
- if ((hook_mask & ~((1 << NF_IP_LOCAL_IN)
- | (1 << NF_IP_FORWARD)
- | (1 << NF_IP_LOCAL_OUT))) != 0) {
- DEBUGP("REJECT: bad hook mask %X\n", hook_mask);
- return 0;
- }
+ const struct ipt_reject_info *rejinfo = par->targinfo;
+ const struct ipt_entry *e = par->entryinfo;
if (rejinfo->with == IPT_ICMP_ECHOREPLY) {
- printk("REJECT: ECHOREPLY no longer supported.\n");
- return 0;
+ pr_info("ECHOREPLY no longer supported.\n");
+ return -EINVAL;
} else if (rejinfo->with == IPT_TCP_RESET) {
/* Must specify that it's a TCP packet */
- if (e->ip.proto != IPPROTO_TCP
- || (e->ip.invflags & IPT_INV_PROTO)) {
- DEBUGP("REJECT: TCP_RESET invalid for non-tcp\n");
- return 0;
+ if (e->ip.proto != IPPROTO_TCP ||
+ (e->ip.invflags & XT_INV_PROTO)) {
+ pr_info("TCP_RESET invalid for non-tcp\n");
+ return -EINVAL;
}
}
-
- return 1;
+ return 0;
}
-static struct ipt_target ipt_reject_reg = {
+static struct xt_target reject_tg_reg __read_mostly = {
.name = "REJECT",
- .target = reject,
- .checkentry = check,
+ .family = NFPROTO_IPV4,
+ .target = reject_tg,
+ .targetsize = sizeof(struct ipt_reject_info),
+ .table = "filter",
+ .hooks = (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_FORWARD) |
+ (1 << NF_INET_LOCAL_OUT),
+ .checkentry = reject_tg_check,
.me = THIS_MODULE,
};
-static int __init init(void)
+static int __init reject_tg_init(void)
{
- return ipt_register_target(&ipt_reject_reg);
+ return xt_register_target(&reject_tg_reg);
}
-static void __exit fini(void)
+static void __exit reject_tg_exit(void)
{
- ipt_unregister_target(&ipt_reject_reg);
+ xt_unregister_target(&reject_tg_reg);
}
-module_init(init);
-module_exit(fini);
+module_init(reject_tg_init);
+module_exit(reject_tg_exit);
diff --git a/net/ipv4/netfilter/ipt_SAME.c b/net/ipv4/netfilter/ipt_SAME.c
deleted file mode 100644
index a22de59bba0..00000000000
--- a/net/ipv4/netfilter/ipt_SAME.c
+++ /dev/null
@@ -1,211 +0,0 @@
-/* Same. Just like SNAT, only try to make the connections
- * between client A and server B always have the same source ip.
- *
- * (C) 2000 Paul `Rusty' Russell
- * (C) 2001 Martin Josefsson
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * 010320 Martin Josefsson <gandalf@wlug.westbo.se>
- * * copied ipt_BALANCE.c to ipt_SAME.c and changed a few things.
- * 010728 Martin Josefsson <gandalf@wlug.westbo.se>
- * * added --nodst to not include destination-ip in new source
- * calculations.
- * * added some more sanity-checks.
- * 010729 Martin Josefsson <gandalf@wlug.westbo.se>
- * * fixed a buggy if-statement in same_check(), should have
- * used ntohl() but didn't.
- * * added support for multiple ranges. IPT_SAME_MAX_RANGE is
- * defined in linux/include/linux/netfilter_ipv4/ipt_SAME.h
- * and is currently set to 10.
- * * added support for 1-address range, nice to have now that
- * we have multiple ranges.
- */
-#include <linux/types.h>
-#include <linux/ip.h>
-#include <linux/timer.h>
-#include <linux/module.h>
-#include <linux/netfilter.h>
-#include <linux/netdevice.h>
-#include <linux/if.h>
-#include <linux/inetdevice.h>
-#include <net/protocol.h>
-#include <net/checksum.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/netfilter_ipv4/ip_nat_rule.h>
-#include <linux/netfilter_ipv4/ipt_SAME.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Martin Josefsson <gandalf@wlug.westbo.se>");
-MODULE_DESCRIPTION("iptables special SNAT module for consistent sourceip");
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-static int
-same_check(const char *tablename,
- const void *e,
- void *targinfo,
- unsigned int targinfosize,
- unsigned int hook_mask)
-{
- unsigned int count, countess, rangeip, index = 0;
- struct ipt_same_info *mr = targinfo;
-
- mr->ipnum = 0;
-
- if (strcmp(tablename, "nat") != 0) {
- DEBUGP("same_check: bad table `%s'.\n", tablename);
- return 0;
- }
- if (targinfosize != IPT_ALIGN(sizeof(*mr))) {
- DEBUGP("same_check: size %u.\n", targinfosize);
- return 0;
- }
- if (hook_mask & ~(1 << NF_IP_PRE_ROUTING | 1 << NF_IP_POST_ROUTING)) {
- DEBUGP("same_check: bad hooks %x.\n", hook_mask);
- return 0;
- }
- if (mr->rangesize < 1) {
- DEBUGP("same_check: need at least one dest range.\n");
- return 0;
- }
- if (mr->rangesize > IPT_SAME_MAX_RANGE) {
- DEBUGP("same_check: too many ranges specified, maximum "
- "is %u ranges\n",
- IPT_SAME_MAX_RANGE);
- return 0;
- }
- for (count = 0; count < mr->rangesize; count++) {
- if (ntohl(mr->range[count].min_ip) >
- ntohl(mr->range[count].max_ip)) {
- DEBUGP("same_check: min_ip is larger than max_ip in "
- "range `%u.%u.%u.%u-%u.%u.%u.%u'.\n",
- NIPQUAD(mr->range[count].min_ip),
- NIPQUAD(mr->range[count].max_ip));
- return 0;
- }
- if (!(mr->range[count].flags & IP_NAT_RANGE_MAP_IPS)) {
- DEBUGP("same_check: bad MAP_IPS.\n");
- return 0;
- }
- rangeip = (ntohl(mr->range[count].max_ip) -
- ntohl(mr->range[count].min_ip) + 1);
- mr->ipnum += rangeip;
-
- DEBUGP("same_check: range %u, ipnum = %u\n", count, rangeip);
- }
- DEBUGP("same_check: total ipaddresses = %u\n", mr->ipnum);
-
- mr->iparray = kmalloc((sizeof(u_int32_t) * mr->ipnum), GFP_KERNEL);
- if (!mr->iparray) {
- DEBUGP("same_check: Couldn't allocate %u bytes "
- "for %u ipaddresses!\n",
- (sizeof(u_int32_t) * mr->ipnum), mr->ipnum);
- return 0;
- }
- DEBUGP("same_check: Allocated %u bytes for %u ipaddresses.\n",
- (sizeof(u_int32_t) * mr->ipnum), mr->ipnum);
-
- for (count = 0; count < mr->rangesize; count++) {
- for (countess = ntohl(mr->range[count].min_ip);
- countess <= ntohl(mr->range[count].max_ip);
- countess++) {
- mr->iparray[index] = countess;
- DEBUGP("same_check: Added ipaddress `%u.%u.%u.%u' "
- "in index %u.\n",
- HIPQUAD(countess), index);
- index++;
- }
- }
- return 1;
-}
-
-static void
-same_destroy(void *targinfo,
- unsigned int targinfosize)
-{
- struct ipt_same_info *mr = targinfo;
-
- kfree(mr->iparray);
-
- DEBUGP("same_destroy: Deallocated %u bytes for %u ipaddresses.\n",
- (sizeof(u_int32_t) * mr->ipnum), mr->ipnum);
-}
-
-static unsigned int
-same_target(struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- unsigned int hooknum,
- const void *targinfo,
- void *userinfo)
-{
- struct ip_conntrack *ct;
- enum ip_conntrack_info ctinfo;
- u_int32_t tmpip, aindex, new_ip;
- const struct ipt_same_info *same = targinfo;
- struct ip_nat_range newrange;
- const struct ip_conntrack_tuple *t;
-
- IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING ||
- hooknum == NF_IP_POST_ROUTING);
- ct = ip_conntrack_get(*pskb, &ctinfo);
-
- t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
-
- /* Base new source on real src ip and optionally dst ip,
- giving some hope for consistency across reboots.
- Here we calculate the index in same->iparray which
- holds the ipaddress we should use */
-
- tmpip = ntohl(t->src.ip);
-
- if (!(same->info & IPT_SAME_NODST))
- tmpip += ntohl(t->dst.ip);
-
- aindex = tmpip % same->ipnum;
-
- new_ip = htonl(same->iparray[aindex]);
-
- DEBUGP("ipt_SAME: src=%u.%u.%u.%u dst=%u.%u.%u.%u, "
- "new src=%u.%u.%u.%u\n",
- NIPQUAD(t->src.ip), NIPQUAD(t->dst.ip),
- NIPQUAD(new_ip));
-
- /* Transfer from original range. */
- newrange = ((struct ip_nat_range)
- { same->range[0].flags, new_ip, new_ip,
- /* FIXME: Use ports from correct range! */
- same->range[0].min, same->range[0].max });
-
- /* Hand modified range to generic setup. */
- return ip_nat_setup_info(ct, &newrange, hooknum);
-}
-
-static struct ipt_target same_reg = {
- .name = "SAME",
- .target = same_target,
- .checkentry = same_check,
- .destroy = same_destroy,
- .me = THIS_MODULE,
-};
-
-static int __init init(void)
-{
- return ipt_register_target(&same_reg);
-}
-
-static void __exit fini(void)
-{
- ipt_unregister_target(&same_reg);
-}
-
-module_init(init);
-module_exit(fini);
-
diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c
new file mode 100644
index 00000000000..a313c3fbeb4
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_SYNPROXY.c
@@ -0,0 +1,482 @@
+/*
+ * Copyright (c) 2013 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <net/tcp.h>
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_SYNPROXY.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
+#include <net/netfilter/nf_conntrack_synproxy.h>
+
+static struct iphdr *
+synproxy_build_ip(struct sk_buff *skb, u32 saddr, u32 daddr)
+{
+ struct iphdr *iph;
+
+ skb_reset_network_header(skb);
+ iph = (struct iphdr *)skb_put(skb, sizeof(*iph));
+ iph->version = 4;
+ iph->ihl = sizeof(*iph) / 4;
+ iph->tos = 0;
+ iph->id = 0;
+ iph->frag_off = htons(IP_DF);
+ iph->ttl = sysctl_ip_default_ttl;
+ iph->protocol = IPPROTO_TCP;
+ iph->check = 0;
+ iph->saddr = saddr;
+ iph->daddr = daddr;
+
+ return iph;
+}
+
+static void
+synproxy_send_tcp(const struct sk_buff *skb, struct sk_buff *nskb,
+ struct nf_conntrack *nfct, enum ip_conntrack_info ctinfo,
+ struct iphdr *niph, struct tcphdr *nth,
+ unsigned int tcp_hdr_size)
+{
+ nth->check = ~tcp_v4_check(tcp_hdr_size, niph->saddr, niph->daddr, 0);
+ nskb->ip_summed = CHECKSUM_PARTIAL;
+ nskb->csum_start = (unsigned char *)nth - nskb->head;
+ nskb->csum_offset = offsetof(struct tcphdr, check);
+
+ skb_dst_set_noref(nskb, skb_dst(skb));
+ nskb->protocol = htons(ETH_P_IP);
+ if (ip_route_me_harder(nskb, RTN_UNSPEC))
+ goto free_nskb;
+
+ if (nfct) {
+ nskb->nfct = nfct;
+ nskb->nfctinfo = ctinfo;
+ nf_conntrack_get(nfct);
+ }
+
+ ip_local_out(nskb);
+ return;
+
+free_nskb:
+ kfree_skb(nskb);
+}
+
+static void
+synproxy_send_client_synack(const struct sk_buff *skb, const struct tcphdr *th,
+ const struct synproxy_options *opts)
+{
+ struct sk_buff *nskb;
+ struct iphdr *iph, *niph;
+ struct tcphdr *nth;
+ unsigned int tcp_hdr_size;
+ u16 mss = opts->mss;
+
+ iph = ip_hdr(skb);
+
+ tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
+ nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
+ GFP_ATOMIC);
+ if (nskb == NULL)
+ return;
+ skb_reserve(nskb, MAX_TCP_HEADER);
+
+ niph = synproxy_build_ip(nskb, iph->daddr, iph->saddr);
+
+ skb_reset_transport_header(nskb);
+ nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
+ nth->source = th->dest;
+ nth->dest = th->source;
+ nth->seq = htonl(__cookie_v4_init_sequence(iph, th, &mss));
+ nth->ack_seq = htonl(ntohl(th->seq) + 1);
+ tcp_flag_word(nth) = TCP_FLAG_SYN | TCP_FLAG_ACK;
+ if (opts->options & XT_SYNPROXY_OPT_ECN)
+ tcp_flag_word(nth) |= TCP_FLAG_ECE;
+ nth->doff = tcp_hdr_size / 4;
+ nth->window = 0;
+ nth->check = 0;
+ nth->urg_ptr = 0;
+
+ synproxy_build_options(nth, opts);
+
+ synproxy_send_tcp(skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY,
+ niph, nth, tcp_hdr_size);
+}
+
+static void
+synproxy_send_server_syn(const struct synproxy_net *snet,
+ const struct sk_buff *skb, const struct tcphdr *th,
+ const struct synproxy_options *opts, u32 recv_seq)
+{
+ struct sk_buff *nskb;
+ struct iphdr *iph, *niph;
+ struct tcphdr *nth;
+ unsigned int tcp_hdr_size;
+
+ iph = ip_hdr(skb);
+
+ tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
+ nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
+ GFP_ATOMIC);
+ if (nskb == NULL)
+ return;
+ skb_reserve(nskb, MAX_TCP_HEADER);
+
+ niph = synproxy_build_ip(nskb, iph->saddr, iph->daddr);
+
+ skb_reset_transport_header(nskb);
+ nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
+ nth->source = th->source;
+ nth->dest = th->dest;
+ nth->seq = htonl(recv_seq - 1);
+ /* ack_seq is used to relay our ISN to the synproxy hook to initialize
+ * sequence number translation once a connection tracking entry exists.
+ */
+ nth->ack_seq = htonl(ntohl(th->ack_seq) - 1);
+ tcp_flag_word(nth) = TCP_FLAG_SYN;
+ if (opts->options & XT_SYNPROXY_OPT_ECN)
+ tcp_flag_word(nth) |= TCP_FLAG_ECE | TCP_FLAG_CWR;
+ nth->doff = tcp_hdr_size / 4;
+ nth->window = th->window;
+ nth->check = 0;
+ nth->urg_ptr = 0;
+
+ synproxy_build_options(nth, opts);
+
+ synproxy_send_tcp(skb, nskb, &snet->tmpl->ct_general, IP_CT_NEW,
+ niph, nth, tcp_hdr_size);
+}
+
+static void
+synproxy_send_server_ack(const struct synproxy_net *snet,
+ const struct ip_ct_tcp *state,
+ const struct sk_buff *skb, const struct tcphdr *th,
+ const struct synproxy_options *opts)
+{
+ struct sk_buff *nskb;
+ struct iphdr *iph, *niph;
+ struct tcphdr *nth;
+ unsigned int tcp_hdr_size;
+
+ iph = ip_hdr(skb);
+
+ tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
+ nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
+ GFP_ATOMIC);
+ if (nskb == NULL)
+ return;
+ skb_reserve(nskb, MAX_TCP_HEADER);
+
+ niph = synproxy_build_ip(nskb, iph->daddr, iph->saddr);
+
+ skb_reset_transport_header(nskb);
+ nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
+ nth->source = th->dest;
+ nth->dest = th->source;
+ nth->seq = htonl(ntohl(th->ack_seq));
+ nth->ack_seq = htonl(ntohl(th->seq) + 1);
+ tcp_flag_word(nth) = TCP_FLAG_ACK;
+ nth->doff = tcp_hdr_size / 4;
+ nth->window = htons(state->seen[IP_CT_DIR_ORIGINAL].td_maxwin);
+ nth->check = 0;
+ nth->urg_ptr = 0;
+
+ synproxy_build_options(nth, opts);
+
+ synproxy_send_tcp(skb, nskb, NULL, 0, niph, nth, tcp_hdr_size);
+}
+
+static void
+synproxy_send_client_ack(const struct synproxy_net *snet,
+ const struct sk_buff *skb, const struct tcphdr *th,
+ const struct synproxy_options *opts)
+{
+ struct sk_buff *nskb;
+ struct iphdr *iph, *niph;
+ struct tcphdr *nth;
+ unsigned int tcp_hdr_size;
+
+ iph = ip_hdr(skb);
+
+ tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
+ nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
+ GFP_ATOMIC);
+ if (nskb == NULL)
+ return;
+ skb_reserve(nskb, MAX_TCP_HEADER);
+
+ niph = synproxy_build_ip(nskb, iph->saddr, iph->daddr);
+
+ skb_reset_transport_header(nskb);
+ nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
+ nth->source = th->source;
+ nth->dest = th->dest;
+ nth->seq = htonl(ntohl(th->seq) + 1);
+ nth->ack_seq = th->ack_seq;
+ tcp_flag_word(nth) = TCP_FLAG_ACK;
+ nth->doff = tcp_hdr_size / 4;
+ nth->window = ntohs(htons(th->window) >> opts->wscale);
+ nth->check = 0;
+ nth->urg_ptr = 0;
+
+ synproxy_build_options(nth, opts);
+
+ synproxy_send_tcp(skb, nskb, NULL, 0, niph, nth, tcp_hdr_size);
+}
+
+static bool
+synproxy_recv_client_ack(const struct synproxy_net *snet,
+ const struct sk_buff *skb, const struct tcphdr *th,
+ struct synproxy_options *opts, u32 recv_seq)
+{
+ int mss;
+
+ mss = __cookie_v4_check(ip_hdr(skb), th, ntohl(th->ack_seq) - 1);
+ if (mss == 0) {
+ this_cpu_inc(snet->stats->cookie_invalid);
+ return false;
+ }
+
+ this_cpu_inc(snet->stats->cookie_valid);
+ opts->mss = mss;
+ opts->options |= XT_SYNPROXY_OPT_MSS;
+
+ if (opts->options & XT_SYNPROXY_OPT_TIMESTAMP)
+ synproxy_check_timestamp_cookie(opts);
+
+ synproxy_send_server_syn(snet, skb, th, opts, recv_seq);
+ return true;
+}
+
+static unsigned int
+synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par)
+{
+ const struct xt_synproxy_info *info = par->targinfo;
+ struct synproxy_net *snet = synproxy_pernet(dev_net(par->in));
+ struct synproxy_options opts = {};
+ struct tcphdr *th, _th;
+
+ if (nf_ip_checksum(skb, par->hooknum, par->thoff, IPPROTO_TCP))
+ return NF_DROP;
+
+ th = skb_header_pointer(skb, par->thoff, sizeof(_th), &_th);
+ if (th == NULL)
+ return NF_DROP;
+
+ if (!synproxy_parse_options(skb, par->thoff, th, &opts))
+ return NF_DROP;
+
+ if (th->syn && !(th->ack || th->fin || th->rst)) {
+ /* Initial SYN from client */
+ this_cpu_inc(snet->stats->syn_received);
+
+ if (th->ece && th->cwr)
+ opts.options |= XT_SYNPROXY_OPT_ECN;
+
+ opts.options &= info->options;
+ if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
+ synproxy_init_timestamp_cookie(info, &opts);
+ else
+ opts.options &= ~(XT_SYNPROXY_OPT_WSCALE |
+ XT_SYNPROXY_OPT_SACK_PERM |
+ XT_SYNPROXY_OPT_ECN);
+
+ synproxy_send_client_synack(skb, th, &opts);
+ return NF_DROP;
+
+ } else if (th->ack && !(th->fin || th->rst || th->syn)) {
+ /* ACK from client */
+ synproxy_recv_client_ack(snet, skb, th, &opts, ntohl(th->seq));
+ return NF_DROP;
+ }
+
+ return XT_CONTINUE;
+}
+
+static unsigned int ipv4_synproxy_hook(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ struct synproxy_net *snet = synproxy_pernet(dev_net(in ? : out));
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct;
+ struct nf_conn_synproxy *synproxy;
+ struct synproxy_options opts = {};
+ const struct ip_ct_tcp *state;
+ struct tcphdr *th, _th;
+ unsigned int thoff;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ if (ct == NULL)
+ return NF_ACCEPT;
+
+ synproxy = nfct_synproxy(ct);
+ if (synproxy == NULL)
+ return NF_ACCEPT;
+
+ if (nf_is_loopback_packet(skb))
+ return NF_ACCEPT;
+
+ thoff = ip_hdrlen(skb);
+ th = skb_header_pointer(skb, thoff, sizeof(_th), &_th);
+ if (th == NULL)
+ return NF_DROP;
+
+ state = &ct->proto.tcp;
+ switch (state->state) {
+ case TCP_CONNTRACK_CLOSE:
+ if (th->rst && !test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
+ nf_ct_seqadj_init(ct, ctinfo, synproxy->isn -
+ ntohl(th->seq) + 1);
+ break;
+ }
+
+ if (!th->syn || th->ack ||
+ CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
+ break;
+
+ /* Reopened connection - reset the sequence number and timestamp
+ * adjustments, they will get initialized once the connection is
+ * reestablished.
+ */
+ nf_ct_seqadj_init(ct, ctinfo, 0);
+ synproxy->tsoff = 0;
+ this_cpu_inc(snet->stats->conn_reopened);
+
+ /* fall through */
+ case TCP_CONNTRACK_SYN_SENT:
+ if (!synproxy_parse_options(skb, thoff, th, &opts))
+ return NF_DROP;
+
+ if (!th->syn && th->ack &&
+ CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) {
+ /* Keep-Alives are sent with SEG.SEQ = SND.NXT-1,
+ * therefore we need to add 1 to make the SYN sequence
+ * number match the one of first SYN.
+ */
+ if (synproxy_recv_client_ack(snet, skb, th, &opts,
+ ntohl(th->seq) + 1))
+ this_cpu_inc(snet->stats->cookie_retrans);
+
+ return NF_DROP;
+ }
+
+ synproxy->isn = ntohl(th->ack_seq);
+ if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
+ synproxy->its = opts.tsecr;
+ break;
+ case TCP_CONNTRACK_SYN_RECV:
+ if (!th->syn || !th->ack)
+ break;
+
+ if (!synproxy_parse_options(skb, thoff, th, &opts))
+ return NF_DROP;
+
+ if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
+ synproxy->tsoff = opts.tsval - synproxy->its;
+
+ opts.options &= ~(XT_SYNPROXY_OPT_MSS |
+ XT_SYNPROXY_OPT_WSCALE |
+ XT_SYNPROXY_OPT_SACK_PERM);
+
+ swap(opts.tsval, opts.tsecr);
+ synproxy_send_server_ack(snet, state, skb, th, &opts);
+
+ nf_ct_seqadj_init(ct, ctinfo, synproxy->isn - ntohl(th->seq));
+
+ swap(opts.tsval, opts.tsecr);
+ synproxy_send_client_ack(snet, skb, th, &opts);
+
+ consume_skb(skb);
+ return NF_STOLEN;
+ default:
+ break;
+ }
+
+ synproxy_tstamp_adjust(skb, thoff, th, ct, ctinfo, synproxy);
+ return NF_ACCEPT;
+}
+
+static int synproxy_tg4_check(const struct xt_tgchk_param *par)
+{
+ const struct ipt_entry *e = par->entryinfo;
+
+ if (e->ip.proto != IPPROTO_TCP ||
+ e->ip.invflags & XT_INV_PROTO)
+ return -EINVAL;
+
+ return nf_ct_l3proto_try_module_get(par->family);
+}
+
+static void synproxy_tg4_destroy(const struct xt_tgdtor_param *par)
+{
+ nf_ct_l3proto_module_put(par->family);
+}
+
+static struct xt_target synproxy_tg4_reg __read_mostly = {
+ .name = "SYNPROXY",
+ .family = NFPROTO_IPV4,
+ .hooks = (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_FORWARD),
+ .target = synproxy_tg4,
+ .targetsize = sizeof(struct xt_synproxy_info),
+ .checkentry = synproxy_tg4_check,
+ .destroy = synproxy_tg4_destroy,
+ .me = THIS_MODULE,
+};
+
+static struct nf_hook_ops ipv4_synproxy_ops[] __read_mostly = {
+ {
+ .hook = ipv4_synproxy_hook,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV4,
+ .hooknum = NF_INET_LOCAL_IN,
+ .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1,
+ },
+ {
+ .hook = ipv4_synproxy_hook,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV4,
+ .hooknum = NF_INET_POST_ROUTING,
+ .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1,
+ },
+};
+
+static int __init synproxy_tg4_init(void)
+{
+ int err;
+
+ err = nf_register_hooks(ipv4_synproxy_ops,
+ ARRAY_SIZE(ipv4_synproxy_ops));
+ if (err < 0)
+ goto err1;
+
+ err = xt_register_target(&synproxy_tg4_reg);
+ if (err < 0)
+ goto err2;
+
+ return 0;
+
+err2:
+ nf_unregister_hooks(ipv4_synproxy_ops, ARRAY_SIZE(ipv4_synproxy_ops));
+err1:
+ return err;
+}
+
+static void __exit synproxy_tg4_exit(void)
+{
+ xt_unregister_target(&synproxy_tg4_reg);
+ nf_unregister_hooks(ipv4_synproxy_ops, ARRAY_SIZE(ipv4_synproxy_ops));
+}
+
+module_init(synproxy_tg4_init);
+module_exit(synproxy_tg4_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
diff --git a/net/ipv4/netfilter/ipt_TCPMSS.c b/net/ipv4/netfilter/ipt_TCPMSS.c
deleted file mode 100644
index c122841e182..00000000000
--- a/net/ipv4/netfilter/ipt_TCPMSS.c
+++ /dev/null
@@ -1,263 +0,0 @@
-/*
- * This is a module which is used for setting the MSS option in TCP packets.
- *
- * Copyright (C) 2000 Marc Boucher <marc@mbsi.ca>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-
-#include <linux/ip.h>
-#include <net/tcp.h>
-
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ipt_TCPMSS.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
-MODULE_DESCRIPTION("iptables TCP MSS modification module");
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-static u_int16_t
-cheat_check(u_int32_t oldvalinv, u_int32_t newval, u_int16_t oldcheck)
-{
- u_int32_t diffs[] = { oldvalinv, newval };
- return csum_fold(csum_partial((char *)diffs, sizeof(diffs),
- oldcheck^0xFFFF));
-}
-
-static inline unsigned int
-optlen(const u_int8_t *opt, unsigned int offset)
-{
- /* Beware zero-length options: make finite progress */
- if (opt[offset] <= TCPOPT_NOP || opt[offset+1] == 0) return 1;
- else return opt[offset+1];
-}
-
-static unsigned int
-ipt_tcpmss_target(struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- unsigned int hooknum,
- const void *targinfo,
- void *userinfo)
-{
- const struct ipt_tcpmss_info *tcpmssinfo = targinfo;
- struct tcphdr *tcph;
- struct iphdr *iph;
- u_int16_t tcplen, newtotlen, oldval, newmss;
- unsigned int i;
- u_int8_t *opt;
-
- if (!skb_make_writable(pskb, (*pskb)->len))
- return NF_DROP;
-
- if ((*pskb)->ip_summed == CHECKSUM_HW &&
- skb_checksum_help(*pskb, out == NULL))
- return NF_DROP;
-
- iph = (*pskb)->nh.iph;
- tcplen = (*pskb)->len - iph->ihl*4;
-
- tcph = (void *)iph + iph->ihl*4;
-
- /* Since it passed flags test in tcp match, we know it is is
- not a fragment, and has data >= tcp header length. SYN
- packets should not contain data: if they did, then we risk
- running over MTU, sending Frag Needed and breaking things
- badly. --RR */
- if (tcplen != tcph->doff*4) {
- if (net_ratelimit())
- printk(KERN_ERR
- "ipt_tcpmss_target: bad length (%d bytes)\n",
- (*pskb)->len);
- return NF_DROP;
- }
-
- if(tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU) {
- if(!(*pskb)->dst) {
- if (net_ratelimit())
- printk(KERN_ERR
- "ipt_tcpmss_target: no dst?! can't determine path-MTU\n");
- return NF_DROP; /* or IPT_CONTINUE ?? */
- }
-
- if(dst_mtu((*pskb)->dst) <= (sizeof(struct iphdr) + sizeof(struct tcphdr))) {
- if (net_ratelimit())
- printk(KERN_ERR
- "ipt_tcpmss_target: unknown or invalid path-MTU (%d)\n", dst_mtu((*pskb)->dst));
- return NF_DROP; /* or IPT_CONTINUE ?? */
- }
-
- newmss = dst_mtu((*pskb)->dst) - sizeof(struct iphdr) - sizeof(struct tcphdr);
- } else
- newmss = tcpmssinfo->mss;
-
- opt = (u_int8_t *)tcph;
- for (i = sizeof(struct tcphdr); i < tcph->doff*4; i += optlen(opt, i)){
- if ((opt[i] == TCPOPT_MSS) &&
- ((tcph->doff*4 - i) >= TCPOLEN_MSS) &&
- (opt[i+1] == TCPOLEN_MSS)) {
- u_int16_t oldmss;
-
- oldmss = (opt[i+2] << 8) | opt[i+3];
-
- if((tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU) &&
- (oldmss <= newmss))
- return IPT_CONTINUE;
-
- opt[i+2] = (newmss & 0xff00) >> 8;
- opt[i+3] = (newmss & 0x00ff);
-
- tcph->check = cheat_check(htons(oldmss)^0xFFFF,
- htons(newmss),
- tcph->check);
-
- DEBUGP(KERN_INFO "ipt_tcpmss_target: %u.%u.%u.%u:%hu"
- "->%u.%u.%u.%u:%hu changed TCP MSS option"
- " (from %u to %u)\n",
- NIPQUAD((*pskb)->nh.iph->saddr),
- ntohs(tcph->source),
- NIPQUAD((*pskb)->nh.iph->daddr),
- ntohs(tcph->dest),
- oldmss, newmss);
- goto retmodified;
- }
- }
-
- /*
- * MSS Option not found ?! add it..
- */
- if (skb_tailroom((*pskb)) < TCPOLEN_MSS) {
- struct sk_buff *newskb;
-
- newskb = skb_copy_expand(*pskb, skb_headroom(*pskb),
- TCPOLEN_MSS, GFP_ATOMIC);
- if (!newskb) {
- if (net_ratelimit())
- printk(KERN_ERR "ipt_tcpmss_target:"
- " unable to allocate larger skb\n");
- return NF_DROP;
- }
-
- kfree_skb(*pskb);
- *pskb = newskb;
- iph = (*pskb)->nh.iph;
- tcph = (void *)iph + iph->ihl*4;
- }
-
- skb_put((*pskb), TCPOLEN_MSS);
-
- opt = (u_int8_t *)tcph + sizeof(struct tcphdr);
- memmove(opt + TCPOLEN_MSS, opt, tcplen - sizeof(struct tcphdr));
-
- tcph->check = cheat_check(htons(tcplen) ^ 0xFFFF,
- htons(tcplen + TCPOLEN_MSS), tcph->check);
- tcplen += TCPOLEN_MSS;
-
- opt[0] = TCPOPT_MSS;
- opt[1] = TCPOLEN_MSS;
- opt[2] = (newmss & 0xff00) >> 8;
- opt[3] = (newmss & 0x00ff);
-
- tcph->check = cheat_check(~0, *((u_int32_t *)opt), tcph->check);
-
- oldval = ((u_int16_t *)tcph)[6];
- tcph->doff += TCPOLEN_MSS/4;
- tcph->check = cheat_check(oldval ^ 0xFFFF,
- ((u_int16_t *)tcph)[6], tcph->check);
-
- newtotlen = htons(ntohs(iph->tot_len) + TCPOLEN_MSS);
- iph->check = cheat_check(iph->tot_len ^ 0xFFFF,
- newtotlen, iph->check);
- iph->tot_len = newtotlen;
-
- DEBUGP(KERN_INFO "ipt_tcpmss_target: %u.%u.%u.%u:%hu"
- "->%u.%u.%u.%u:%hu added TCP MSS option (%u)\n",
- NIPQUAD((*pskb)->nh.iph->saddr),
- ntohs(tcph->source),
- NIPQUAD((*pskb)->nh.iph->daddr),
- ntohs(tcph->dest),
- newmss);
-
- retmodified:
- return IPT_CONTINUE;
-}
-
-#define TH_SYN 0x02
-
-static inline int find_syn_match(const struct ipt_entry_match *m)
-{
- const struct ipt_tcp *tcpinfo = (const struct ipt_tcp *)m->data;
-
- if (strcmp(m->u.kernel.match->name, "tcp") == 0
- && (tcpinfo->flg_cmp & TH_SYN)
- && !(tcpinfo->invflags & IPT_TCP_INV_FLAGS))
- return 1;
-
- return 0;
-}
-
-/* Must specify -p tcp --syn/--tcp-flags SYN */
-static int
-ipt_tcpmss_checkentry(const char *tablename,
- const void *e_void,
- void *targinfo,
- unsigned int targinfosize,
- unsigned int hook_mask)
-{
- const struct ipt_tcpmss_info *tcpmssinfo = targinfo;
- const struct ipt_entry *e = e_void;
-
- if (targinfosize != IPT_ALIGN(sizeof(struct ipt_tcpmss_info))) {
- DEBUGP("ipt_tcpmss_checkentry: targinfosize %u != %u\n",
- targinfosize, IPT_ALIGN(sizeof(struct ipt_tcpmss_info)));
- return 0;
- }
-
-
- if((tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU) &&
- ((hook_mask & ~((1 << NF_IP_FORWARD)
- | (1 << NF_IP_LOCAL_OUT)
- | (1 << NF_IP_POST_ROUTING))) != 0)) {
- printk("TCPMSS: path-MTU clamping only supported in FORWARD, OUTPUT and POSTROUTING hooks\n");
- return 0;
- }
-
- if (e->ip.proto == IPPROTO_TCP
- && !(e->ip.invflags & IPT_INV_PROTO)
- && IPT_MATCH_ITERATE(e, find_syn_match))
- return 1;
-
- printk("TCPMSS: Only works on TCP SYN packets\n");
- return 0;
-}
-
-static struct ipt_target ipt_tcpmss_reg = {
- .name = "TCPMSS",
- .target = ipt_tcpmss_target,
- .checkentry = ipt_tcpmss_checkentry,
- .me = THIS_MODULE,
-};
-
-static int __init init(void)
-{
- return ipt_register_target(&ipt_tcpmss_reg);
-}
-
-static void __exit fini(void)
-{
- ipt_unregister_target(&ipt_tcpmss_reg);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c
deleted file mode 100644
index 3a44a56db23..00000000000
--- a/net/ipv4/netfilter/ipt_TOS.c
+++ /dev/null
@@ -1,104 +0,0 @@
-/* This is a module which is used for setting the TOS field of a packet. */
-
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/ip.h>
-#include <net/checksum.h>
-
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ipt_TOS.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
-MODULE_DESCRIPTION("iptables TOS mangling module");
-
-static unsigned int
-target(struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- unsigned int hooknum,
- const void *targinfo,
- void *userinfo)
-{
- const struct ipt_tos_target_info *tosinfo = targinfo;
-
- if (((*pskb)->nh.iph->tos & IPTOS_TOS_MASK) != tosinfo->tos) {
- u_int16_t diffs[2];
-
- if (!skb_make_writable(pskb, sizeof(struct iphdr)))
- return NF_DROP;
-
- diffs[0] = htons((*pskb)->nh.iph->tos) ^ 0xFFFF;
- (*pskb)->nh.iph->tos
- = ((*pskb)->nh.iph->tos & IPTOS_PREC_MASK)
- | tosinfo->tos;
- diffs[1] = htons((*pskb)->nh.iph->tos);
- (*pskb)->nh.iph->check
- = csum_fold(csum_partial((char *)diffs,
- sizeof(diffs),
- (*pskb)->nh.iph->check
- ^0xFFFF));
- }
- return IPT_CONTINUE;
-}
-
-static int
-checkentry(const char *tablename,
- const void *e_void,
- void *targinfo,
- unsigned int targinfosize,
- unsigned int hook_mask)
-{
- const u_int8_t tos = ((struct ipt_tos_target_info *)targinfo)->tos;
-
- if (targinfosize != IPT_ALIGN(sizeof(struct ipt_tos_target_info))) {
- printk(KERN_WARNING "TOS: targinfosize %u != %Zu\n",
- targinfosize,
- IPT_ALIGN(sizeof(struct ipt_tos_target_info)));
- return 0;
- }
-
- if (strcmp(tablename, "mangle") != 0) {
- printk(KERN_WARNING "TOS: can only be called from \"mangle\" table, not \"%s\"\n", tablename);
- return 0;
- }
-
- if (tos != IPTOS_LOWDELAY
- && tos != IPTOS_THROUGHPUT
- && tos != IPTOS_RELIABILITY
- && tos != IPTOS_MINCOST
- && tos != IPTOS_NORMALSVC) {
- printk(KERN_WARNING "TOS: bad tos value %#x\n", tos);
- return 0;
- }
-
- return 1;
-}
-
-static struct ipt_target ipt_tos_reg = {
- .name = "TOS",
- .target = target,
- .checkentry = checkentry,
- .me = THIS_MODULE,
-};
-
-static int __init init(void)
-{
- return ipt_register_target(&ipt_tos_reg);
-}
-
-static void __exit fini(void)
-{
- ipt_unregister_target(&ipt_tos_reg);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c
deleted file mode 100644
index b769eb23197..00000000000
--- a/net/ipv4/netfilter/ipt_TTL.c
+++ /dev/null
@@ -1,119 +0,0 @@
-/* TTL modification target for IP tables
- * (C) 2000,2005 by Harald Welte <laforge@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/ip.h>
-#include <net/checksum.h>
-
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ipt_TTL.h>
-
-MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
-MODULE_DESCRIPTION("IP tables TTL modification module");
-MODULE_LICENSE("GPL");
-
-static unsigned int
-ipt_ttl_target(struct sk_buff **pskb, const struct net_device *in,
- const struct net_device *out, unsigned int hooknum,
- const void *targinfo, void *userinfo)
-{
- struct iphdr *iph;
- const struct ipt_TTL_info *info = targinfo;
- u_int16_t diffs[2];
- int new_ttl;
-
- if (!skb_make_writable(pskb, (*pskb)->len))
- return NF_DROP;
-
- iph = (*pskb)->nh.iph;
-
- switch (info->mode) {
- case IPT_TTL_SET:
- new_ttl = info->ttl;
- break;
- case IPT_TTL_INC:
- new_ttl = iph->ttl + info->ttl;
- if (new_ttl > 255)
- new_ttl = 255;
- break;
- case IPT_TTL_DEC:
- new_ttl = iph->ttl - info->ttl;
- if (new_ttl < 0)
- new_ttl = 0;
- break;
- default:
- new_ttl = iph->ttl;
- break;
- }
-
- if (new_ttl != iph->ttl) {
- diffs[0] = htons(((unsigned)iph->ttl) << 8) ^ 0xFFFF;
- iph->ttl = new_ttl;
- diffs[1] = htons(((unsigned)iph->ttl) << 8);
- iph->check = csum_fold(csum_partial((char *)diffs,
- sizeof(diffs),
- iph->check^0xFFFF));
- }
-
- return IPT_CONTINUE;
-}
-
-static int ipt_ttl_checkentry(const char *tablename,
- const void *e,
- void *targinfo,
- unsigned int targinfosize,
- unsigned int hook_mask)
-{
- struct ipt_TTL_info *info = targinfo;
-
- if (targinfosize != IPT_ALIGN(sizeof(struct ipt_TTL_info))) {
- printk(KERN_WARNING "ipt_TTL: targinfosize %u != %Zu\n",
- targinfosize,
- IPT_ALIGN(sizeof(struct ipt_TTL_info)));
- return 0;
- }
-
- if (strcmp(tablename, "mangle")) {
- printk(KERN_WARNING "ipt_TTL: can only be called from "
- "\"mangle\" table, not \"%s\"\n", tablename);
- return 0;
- }
-
- if (info->mode > IPT_TTL_MAXMODE) {
- printk(KERN_WARNING "ipt_TTL: invalid or unknown Mode %u\n",
- info->mode);
- return 0;
- }
-
- if ((info->mode != IPT_TTL_SET) && (info->ttl == 0))
- return 0;
-
- return 1;
-}
-
-static struct ipt_target ipt_TTL = {
- .name = "TTL",
- .target = ipt_ttl_target,
- .checkentry = ipt_ttl_checkentry,
- .me = THIS_MODULE,
-};
-
-static int __init init(void)
-{
- return ipt_register_target(&ipt_TTL);
-}
-
-static void __exit fini(void)
-{
- ipt_unregister_target(&ipt_TTL);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index 641dbc47765..9cb993cd224 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -2,29 +2,16 @@
* netfilter module for userspace packet logging daemons
*
* (C) 2000-2004 by Harald Welte <laforge@netfilter.org>
- *
- * 2000/09/22 ulog-cprange feature added
- * 2001/01/04 in-kernel queue as proposed by Sebastian Zander
- * <zander@fokus.gmd.de>
- * 2001/01/30 per-rule nlgroup conflicts with global queue.
- * nlgroup now global (sysctl)
- * 2001/04/19 ulog-queue reworked, now fixed buffer size specified at
- * module loadtime -HW
- * 2002/07/07 remove broken nflog_rcv() function -HW
- * 2002/08/29 fix shifted/unshifted nlgroup bug -HW
- * 2002/10/30 fix uninitialized mac_len field - <Anders K. Pedersen>
- * 2004/10/25 fix erroneous calculation of 'len' parameter to NLMSG_PUT
- * resulting in bogus 'error during NLMSG_PUT' messages.
- *
* (C) 1999-2001 Paul `Rusty' Russell
* (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2005-2007 Patrick McHardy <kaber@trash.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*
- * This module accepts two parameters:
- *
+ * This module accepts two parameters:
+ *
* nlbufsiz:
* The parameter specifies how big the buffer for each netlink multicast
* group is. e.g. If you say nlbufsiz=8192, up to eight kb of packets will
@@ -35,48 +22,44 @@
* each nlgroup you are using, so the total kernel memory usage increases
* by that factor.
*
+ * Actually you should use nlbufsiz a bit smaller than PAGE_SIZE, since
+ * nlbufsiz is used with alloc_skb, which adds another
+ * sizeof(struct skb_shared_info). Use NLMSG_GOODSIZE instead.
+ *
* flushtimeout:
* Specify, after how many hundredths of a second the queue should be
* flushed even if it is not full yet.
- *
- * ipt_ULOG.c,v 1.22 2002/10/30 09:07:31 laforge Exp
*/
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
-#include <linux/config.h>
#include <linux/spinlock.h>
#include <linux/socket.h>
+#include <linux/slab.h>
#include <linux/skbuff.h>
#include <linux/kernel.h>
#include <linux/timer.h>
-#include <linux/netlink.h>
+#include <net/netlink.h>
#include <linux/netdevice.h>
#include <linux/mm.h>
#include <linux/moduleparam.h>
#include <linux/netfilter.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter/x_tables.h>
#include <linux/netfilter_ipv4/ipt_ULOG.h>
+#include <net/netfilter/nf_log.h>
+#include <net/netns/generic.h>
#include <net/sock.h>
#include <linux/bitops.h>
+#include <asm/unaligned.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
-MODULE_DESCRIPTION("iptables userspace logging module");
+MODULE_DESCRIPTION("Xtables: packet logging to netlink using ULOG");
MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NFLOG);
#define ULOG_NL_EVENT 111 /* Harald's favorite number */
#define ULOG_MAXNLGROUPS 32 /* numer of nlgroups */
-#if 0
-#define DEBUGP(format, args...) printk("%s:%s:" format, \
- __FILE__, __FUNCTION__ , ## args)
-#else
-#define DEBUGP(format, args...)
-#endif
-
-#define PRINTR(format, args...) do { if (net_ratelimit()) printk(format , ## args); } while (0)
-
-static unsigned int nlbufsiz = 4096;
+static unsigned int nlbufsiz = NLMSG_GOODSIZE;
module_param(nlbufsiz, uint, 0400);
MODULE_PARM_DESC(nlbufsiz, "netlink buffer size");
@@ -84,7 +67,7 @@ static unsigned int flushtimeout = 10;
module_param(flushtimeout, uint, 0600);
MODULE_PARM_DESC(flushtimeout, "buffer flush timeout (hundredths of a second)");
-static int nflog = 1;
+static bool nflog = true;
module_param(nflog, bool, 0400);
MODULE_PARM_DESC(nflog, "register as internal netfilter logging module");
@@ -97,19 +80,30 @@ typedef struct {
struct timer_list timer; /* the timer function */
} ulog_buff_t;
-static ulog_buff_t ulog_buffers[ULOG_MAXNLGROUPS]; /* array of buffers */
+static int ulog_net_id __read_mostly;
+struct ulog_net {
+ unsigned int nlgroup[ULOG_MAXNLGROUPS];
+ ulog_buff_t ulog_buffers[ULOG_MAXNLGROUPS];
+ struct sock *nflognl;
+ spinlock_t lock;
+};
-static struct sock *nflognl; /* our socket */
-static DEFINE_SPINLOCK(ulog_lock); /* spinlock */
+static struct ulog_net *ulog_pernet(struct net *net)
+{
+ return net_generic(net, ulog_net_id);
+}
/* send one ulog_buff_t to userspace */
-static void ulog_send(unsigned int nlgroupnum)
+static void ulog_send(struct ulog_net *ulog, unsigned int nlgroupnum)
{
- ulog_buff_t *ub = &ulog_buffers[nlgroupnum];
+ ulog_buff_t *ub = &ulog->ulog_buffers[nlgroupnum];
- if (timer_pending(&ub->timer)) {
- DEBUGP("ipt_ULOG: ulog_send: timer was pending, deleting\n");
- del_timer(&ub->timer);
+ pr_debug("ulog_send: timer is deleting\n");
+ del_timer(&ub->timer);
+
+ if (!ub->skb) {
+ pr_debug("ulog_send: nothing to send\n");
+ return;
}
/* last nlmsg needs NLMSG_DONE */
@@ -117,54 +111,60 @@ static void ulog_send(unsigned int nlgroupnum)
ub->lastnlh->nlmsg_type = NLMSG_DONE;
NETLINK_CB(ub->skb).dst_group = nlgroupnum + 1;
- DEBUGP("ipt_ULOG: throwing %d packets to netlink group %u\n",
- ub->qlen, nlgroupnum + 1);
- netlink_broadcast(nflognl, ub->skb, 0, nlgroupnum + 1, GFP_ATOMIC);
+ pr_debug("throwing %d packets to netlink group %u\n",
+ ub->qlen, nlgroupnum + 1);
+ netlink_broadcast(ulog->nflognl, ub->skb, 0, nlgroupnum + 1,
+ GFP_ATOMIC);
ub->qlen = 0;
ub->skb = NULL;
ub->lastnlh = NULL;
-
}
/* timer function to flush queue in flushtimeout time */
static void ulog_timer(unsigned long data)
{
- DEBUGP("ipt_ULOG: timer function called, calling ulog_send\n");
+ unsigned int groupnum = *((unsigned int *)data);
+ struct ulog_net *ulog = container_of((void *)data,
+ struct ulog_net,
+ nlgroup[groupnum]);
+ pr_debug("timer function called, calling ulog_send\n");
/* lock to protect against somebody modifying our structure
* from ipt_ulog_target at the same time */
- spin_lock_bh(&ulog_lock);
- ulog_send(data);
- spin_unlock_bh(&ulog_lock);
+ spin_lock_bh(&ulog->lock);
+ ulog_send(ulog, groupnum);
+ spin_unlock_bh(&ulog->lock);
}
static struct sk_buff *ulog_alloc_skb(unsigned int size)
{
struct sk_buff *skb;
+ unsigned int n;
/* alloc skb which should be big enough for a whole
* multipart message. WARNING: has to be <= 131000
* due to slab allocator restrictions */
- skb = alloc_skb(nlbufsiz, GFP_ATOMIC);
+ n = max(size, nlbufsiz);
+ skb = alloc_skb(n, GFP_ATOMIC | __GFP_NOWARN);
if (!skb) {
- PRINTR("ipt_ULOG: can't alloc whole buffer %ub!\n",
- nlbufsiz);
-
- /* try to allocate only as much as we need for
- * current packet */
+ if (n > size) {
+ /* try to allocate only as much as we need for
+ * current packet */
- skb = alloc_skb(size, GFP_ATOMIC);
- if (!skb)
- PRINTR("ipt_ULOG: can't even allocate %ub\n", size);
+ skb = alloc_skb(size, GFP_ATOMIC);
+ if (!skb)
+ pr_debug("cannot even allocate %ub\n", size);
+ }
}
return skb;
}
-static void ipt_ulog_packet(unsigned int hooknum,
+static void ipt_ulog_packet(struct net *net,
+ unsigned int hooknum,
const struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
@@ -175,6 +175,8 @@ static void ipt_ulog_packet(unsigned int hooknum,
ulog_packet_msg_t *pm;
size_t size, copy_len;
struct nlmsghdr *nlh;
+ struct timeval tv;
+ struct ulog_net *ulog = ulog_pernet(net);
/* ffs == find first bit set, necessary because userspace
* is already shifting groupnumber, but we need unshifted.
@@ -182,86 +184,83 @@ static void ipt_ulog_packet(unsigned int hooknum,
unsigned int groupnum = ffs(loginfo->nl_group) - 1;
/* calculate the size of the skb needed */
- if ((loginfo->copy_range == 0) ||
- (loginfo->copy_range > skb->len)) {
+ if (loginfo->copy_range == 0 || loginfo->copy_range > skb->len)
copy_len = skb->len;
- } else {
+ else
copy_len = loginfo->copy_range;
- }
- size = NLMSG_SPACE(sizeof(*pm) + copy_len);
+ size = nlmsg_total_size(sizeof(*pm) + copy_len);
+
+ ub = &ulog->ulog_buffers[groupnum];
- ub = &ulog_buffers[groupnum];
-
- spin_lock_bh(&ulog_lock);
+ spin_lock_bh(&ulog->lock);
if (!ub->skb) {
if (!(ub->skb = ulog_alloc_skb(size)))
goto alloc_failure;
} else if (ub->qlen >= loginfo->qthreshold ||
size > skb_tailroom(ub->skb)) {
- /* either the queue len is too high or we don't have
+ /* either the queue len is too high or we don't have
* enough room in nlskb left. send it to userspace. */
- ulog_send(groupnum);
+ ulog_send(ulog, groupnum);
if (!(ub->skb = ulog_alloc_skb(size)))
goto alloc_failure;
}
- DEBUGP("ipt_ULOG: qlen %d, qthreshold %d\n", ub->qlen,
- loginfo->qthreshold);
+ pr_debug("qlen %d, qthreshold %Zu\n", ub->qlen, loginfo->qthreshold);
- /* NLMSG_PUT contains a hidden goto nlmsg_failure !!! */
- nlh = NLMSG_PUT(ub->skb, 0, ub->qlen, ULOG_NL_EVENT,
- sizeof(*pm)+copy_len);
+ nlh = nlmsg_put(ub->skb, 0, ub->qlen, ULOG_NL_EVENT,
+ sizeof(*pm)+copy_len, 0);
+ if (!nlh) {
+ pr_debug("error during nlmsg_put\n");
+ goto out_unlock;
+ }
ub->qlen++;
- pm = NLMSG_DATA(nlh);
+ pm = nlmsg_data(nlh);
+ memset(pm, 0, sizeof(*pm));
/* We might not have a timestamp, get one */
- if (skb->tstamp.off_sec == 0)
+ if (skb->tstamp.tv64 == 0)
__net_timestamp((struct sk_buff *)skb);
/* copy hook, prefix, timestamp, payload, etc. */
pm->data_len = copy_len;
- pm->timestamp_sec = skb->tstamp.off_sec;
- pm->timestamp_usec = skb->tstamp.off_usec;
- pm->mark = skb->nfmark;
+ tv = ktime_to_timeval(skb->tstamp);
+ put_unaligned(tv.tv_sec, &pm->timestamp_sec);
+ put_unaligned(tv.tv_usec, &pm->timestamp_usec);
+ put_unaligned(skb->mark, &pm->mark);
pm->hook = hooknum;
- if (prefix != NULL)
- strncpy(pm->prefix, prefix, sizeof(pm->prefix));
+ if (prefix != NULL) {
+ strncpy(pm->prefix, prefix, sizeof(pm->prefix) - 1);
+ pm->prefix[sizeof(pm->prefix) - 1] = '\0';
+ }
else if (loginfo->prefix[0] != '\0')
strncpy(pm->prefix, loginfo->prefix, sizeof(pm->prefix));
- else
- *(pm->prefix) = '\0';
- if (in && in->hard_header_len > 0
- && skb->mac.raw != (void *) skb->nh.iph
- && in->hard_header_len <= ULOG_MAC_LEN) {
- memcpy(pm->mac, skb->mac.raw, in->hard_header_len);
+ if (in && in->hard_header_len > 0 &&
+ skb->mac_header != skb->network_header &&
+ in->hard_header_len <= ULOG_MAC_LEN) {
+ memcpy(pm->mac, skb_mac_header(skb), in->hard_header_len);
pm->mac_len = in->hard_header_len;
} else
pm->mac_len = 0;
if (in)
strncpy(pm->indev_name, in->name, sizeof(pm->indev_name));
- else
- pm->indev_name[0] = '\0';
if (out)
strncpy(pm->outdev_name, out->name, sizeof(pm->outdev_name));
- else
- pm->outdev_name[0] = '\0';
/* copy_len <= skb->len, so can't fail. */
if (skb_copy_bits(skb, 0, pm->payload, copy_len) < 0)
BUG();
-
+
/* check if we are building multi-part messages */
- if (ub->qlen > 1) {
+ if (ub->qlen > 1)
ub->lastnlh->nlmsg_flags |= NLM_F_MULTI;
- }
ub->lastnlh = nlh;
@@ -275,36 +274,30 @@ static void ipt_ulog_packet(unsigned int hooknum,
if (ub->qlen >= loginfo->qthreshold) {
if (loginfo->qthreshold > 1)
nlh->nlmsg_type = NLMSG_DONE;
- ulog_send(groupnum);
+ ulog_send(ulog, groupnum);
}
-
- spin_unlock_bh(&ulog_lock);
+out_unlock:
+ spin_unlock_bh(&ulog->lock);
return;
-nlmsg_failure:
- PRINTR("ipt_ULOG: error during NLMSG_PUT\n");
-
alloc_failure:
- PRINTR("ipt_ULOG: Error building netlink message\n");
-
- spin_unlock_bh(&ulog_lock);
+ pr_debug("Error building netlink message\n");
+ spin_unlock_bh(&ulog->lock);
}
-static unsigned int ipt_ulog_target(struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- unsigned int hooknum,
- const void *targinfo, void *userinfo)
+static unsigned int
+ulog_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
- struct ipt_ulog_info *loginfo = (struct ipt_ulog_info *) targinfo;
+ struct net *net = dev_net(par->in ? par->in : par->out);
- ipt_ulog_packet(hooknum, *pskb, in, out, loginfo, NULL);
-
- return IPT_CONTINUE;
+ ipt_ulog_packet(net, par->hooknum, skb, par->in, par->out,
+ par->targinfo, NULL);
+ return XT_CONTINUE;
}
-
-static void ipt_logfn(unsigned int pf,
+
+static void ipt_logfn(struct net *net,
+ u_int8_t pf,
unsigned int hooknum,
const struct sk_buff *skb,
const struct net_device *in,
@@ -326,110 +319,180 @@ static void ipt_logfn(unsigned int pf,
strlcpy(loginfo.prefix, prefix, sizeof(loginfo.prefix));
}
- ipt_ulog_packet(hooknum, skb, in, out, &loginfo, prefix);
+ ipt_ulog_packet(net, hooknum, skb, in, out, &loginfo, prefix);
}
-static int ipt_ulog_checkentry(const char *tablename,
- const void *e,
- void *targinfo,
- unsigned int targinfosize,
- unsigned int hookmask)
+static int ulog_tg_check(const struct xt_tgchk_param *par)
{
- struct ipt_ulog_info *loginfo = (struct ipt_ulog_info *) targinfo;
+ const struct ipt_ulog_info *loginfo = par->targinfo;
- if (targinfosize != IPT_ALIGN(sizeof(struct ipt_ulog_info))) {
- DEBUGP("ipt_ULOG: targinfosize %u != 0\n", targinfosize);
- return 0;
+ if (!par->net->xt.ulog_warn_deprecated) {
+ pr_info("ULOG is deprecated and it will be removed soon, "
+ "use NFLOG instead\n");
+ par->net->xt.ulog_warn_deprecated = true;
}
if (loginfo->prefix[sizeof(loginfo->prefix) - 1] != '\0') {
- DEBUGP("ipt_ULOG: prefix term %i\n",
- loginfo->prefix[sizeof(loginfo->prefix) - 1]);
- return 0;
+ pr_debug("prefix not null-terminated\n");
+ return -EINVAL;
}
-
if (loginfo->qthreshold > ULOG_MAX_QLEN) {
- DEBUGP("ipt_ULOG: queue threshold %i > MAX_QLEN\n",
- loginfo->qthreshold);
- return 0;
+ pr_debug("queue threshold %Zu > MAX_QLEN\n",
+ loginfo->qthreshold);
+ return -EINVAL;
}
+ return 0;
+}
- return 1;
+#ifdef CONFIG_COMPAT
+struct compat_ipt_ulog_info {
+ compat_uint_t nl_group;
+ compat_size_t copy_range;
+ compat_size_t qthreshold;
+ char prefix[ULOG_PREFIX_LEN];
+};
+
+static void ulog_tg_compat_from_user(void *dst, const void *src)
+{
+ const struct compat_ipt_ulog_info *cl = src;
+ struct ipt_ulog_info l = {
+ .nl_group = cl->nl_group,
+ .copy_range = cl->copy_range,
+ .qthreshold = cl->qthreshold,
+ };
+
+ memcpy(l.prefix, cl->prefix, sizeof(l.prefix));
+ memcpy(dst, &l, sizeof(l));
+}
+
+static int ulog_tg_compat_to_user(void __user *dst, const void *src)
+{
+ const struct ipt_ulog_info *l = src;
+ struct compat_ipt_ulog_info cl = {
+ .nl_group = l->nl_group,
+ .copy_range = l->copy_range,
+ .qthreshold = l->qthreshold,
+ };
+
+ memcpy(cl.prefix, l->prefix, sizeof(cl.prefix));
+ return copy_to_user(dst, &cl, sizeof(cl)) ? -EFAULT : 0;
}
+#endif /* CONFIG_COMPAT */
-static struct ipt_target ipt_ulog_reg = {
+static struct xt_target ulog_tg_reg __read_mostly = {
.name = "ULOG",
- .target = ipt_ulog_target,
- .checkentry = ipt_ulog_checkentry,
+ .family = NFPROTO_IPV4,
+ .target = ulog_tg,
+ .targetsize = sizeof(struct ipt_ulog_info),
+ .checkentry = ulog_tg_check,
+#ifdef CONFIG_COMPAT
+ .compatsize = sizeof(struct compat_ipt_ulog_info),
+ .compat_from_user = ulog_tg_compat_from_user,
+ .compat_to_user = ulog_tg_compat_to_user,
+#endif
.me = THIS_MODULE,
};
-static struct nf_logger ipt_ulog_logger = {
+static struct nf_logger ipt_ulog_logger __read_mostly = {
.name = "ipt_ULOG",
- .logfn = &ipt_logfn,
+ .logfn = ipt_logfn,
.me = THIS_MODULE,
};
-static int __init init(void)
+static int __net_init ulog_tg_net_init(struct net *net)
{
int i;
+ struct ulog_net *ulog = ulog_pernet(net);
+ struct netlink_kernel_cfg cfg = {
+ .groups = ULOG_MAXNLGROUPS,
+ };
- DEBUGP("ipt_ULOG: init module\n");
-
- if (nlbufsiz > 128*1024) {
- printk("Netlink buffer has to be <= 128kB\n");
- return -EINVAL;
- }
-
+ spin_lock_init(&ulog->lock);
/* initialize ulog_buffers */
for (i = 0; i < ULOG_MAXNLGROUPS; i++) {
- init_timer(&ulog_buffers[i].timer);
- ulog_buffers[i].timer.function = ulog_timer;
- ulog_buffers[i].timer.data = i;
+ ulog->nlgroup[i] = i;
+ setup_timer(&ulog->ulog_buffers[i].timer, ulog_timer,
+ (unsigned long)&ulog->nlgroup[i]);
}
- nflognl = netlink_kernel_create(NETLINK_NFLOG, ULOG_MAXNLGROUPS, NULL,
- THIS_MODULE);
- if (!nflognl)
+ ulog->nflognl = netlink_kernel_create(net, NETLINK_NFLOG, &cfg);
+ if (!ulog->nflognl)
return -ENOMEM;
- if (ipt_register_target(&ipt_ulog_reg) != 0) {
- sock_release(nflognl->sk_socket);
- return -EINVAL;
- }
if (nflog)
- nf_log_register(PF_INET, &ipt_ulog_logger);
-
+ nf_log_set(net, NFPROTO_IPV4, &ipt_ulog_logger);
+
return 0;
}
-static void __exit fini(void)
+static void __net_exit ulog_tg_net_exit(struct net *net)
{
ulog_buff_t *ub;
int i;
-
- DEBUGP("ipt_ULOG: cleanup_module\n");
+ struct ulog_net *ulog = ulog_pernet(net);
if (nflog)
- nf_log_unregister_logger(&ipt_ulog_logger);
- ipt_unregister_target(&ipt_ulog_reg);
- sock_release(nflognl->sk_socket);
+ nf_log_unset(net, &ipt_ulog_logger);
+
+ netlink_kernel_release(ulog->nflognl);
/* remove pending timers and free allocated skb's */
for (i = 0; i < ULOG_MAXNLGROUPS; i++) {
- ub = &ulog_buffers[i];
- if (timer_pending(&ub->timer)) {
- DEBUGP("timer was pending, deleting\n");
- del_timer(&ub->timer);
- }
+ ub = &ulog->ulog_buffers[i];
+ pr_debug("timer is deleting\n");
+ del_timer(&ub->timer);
if (ub->skb) {
kfree_skb(ub->skb);
ub->skb = NULL;
}
}
+}
+
+static struct pernet_operations ulog_tg_net_ops = {
+ .init = ulog_tg_net_init,
+ .exit = ulog_tg_net_exit,
+ .id = &ulog_net_id,
+ .size = sizeof(struct ulog_net),
+};
+
+static int __init ulog_tg_init(void)
+{
+ int ret;
+ pr_debug("init module\n");
+
+ if (nlbufsiz > 128*1024) {
+ pr_warn("Netlink buffer has to be <= 128kB\n");
+ return -EINVAL;
+ }
+
+ ret = register_pernet_subsys(&ulog_tg_net_ops);
+ if (ret)
+ goto out_pernet;
+ ret = xt_register_target(&ulog_tg_reg);
+ if (ret < 0)
+ goto out_target;
+
+ if (nflog)
+ nf_log_register(NFPROTO_IPV4, &ipt_ulog_logger);
+
+ return 0;
+
+out_target:
+ unregister_pernet_subsys(&ulog_tg_net_ops);
+out_pernet:
+ return ret;
+}
+
+static void __exit ulog_tg_exit(void)
+{
+ pr_debug("cleanup_module\n");
+ if (nflog)
+ nf_log_unregister(&ipt_ulog_logger);
+ xt_unregister_target(&ulog_tg_reg);
+ unregister_pernet_subsys(&ulog_tg_net_ops);
}
-module_init(init);
-module_exit(fini);
+module_init(ulog_tg_init);
+module_exit(ulog_tg_exit);
diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c
deleted file mode 100644
index d6b83a97651..00000000000
--- a/net/ipv4/netfilter/ipt_addrtype.c
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * iptables module to match inet_addr_type() of an ip.
- *
- * Copyright (c) 2004 Patrick McHardy <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/netdevice.h>
-#include <linux/ip.h>
-#include <net/route.h>
-
-#include <linux/netfilter_ipv4/ipt_addrtype.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_DESCRIPTION("iptables addrtype match");
-
-static inline int match_type(u_int32_t addr, u_int16_t mask)
-{
- return !!(mask & (1 << inet_addr_type(addr)));
-}
-
-static int match(const struct sk_buff *skb, const struct net_device *in,
- const struct net_device *out, const void *matchinfo,
- int offset, unsigned int protoff, int *hotdrop)
-{
- const struct ipt_addrtype_info *info = matchinfo;
- const struct iphdr *iph = skb->nh.iph;
- int ret = 1;
-
- if (info->source)
- ret &= match_type(iph->saddr, info->source)^info->invert_source;
- if (info->dest)
- ret &= match_type(iph->daddr, info->dest)^info->invert_dest;
-
- return ret;
-}
-
-static int checkentry(const char *tablename, const void *ip,
- void *matchinfo, unsigned int matchsize,
- unsigned int hook_mask)
-{
- if (matchsize != IPT_ALIGN(sizeof(struct ipt_addrtype_info))) {
- printk(KERN_ERR "ipt_addrtype: invalid size (%u != %Zu)\n",
- matchsize, IPT_ALIGN(sizeof(struct ipt_addrtype_info)));
- return 0;
- }
-
- return 1;
-}
-
-static struct ipt_match addrtype_match = {
- .name = "addrtype",
- .match = match,
- .checkentry = checkentry,
- .me = THIS_MODULE
-};
-
-static int __init init(void)
-{
- return ipt_register_match(&addrtype_match);
-}
-
-static void __exit fini(void)
-{
- ipt_unregister_match(&addrtype_match);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c
index 144adfec13c..14a2aa8b8a1 100644
--- a/net/ipv4/netfilter/ipt_ah.c
+++ b/net/ipv4/netfilter/ipt_ah.c
@@ -5,60 +5,48 @@
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
-
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/in.h>
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/ip.h>
#include <linux/netfilter_ipv4/ipt_ah.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter/x_tables.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Yon Uriarte <yon@astaro.de>");
-MODULE_DESCRIPTION("iptables AH SPI match module");
-
-#ifdef DEBUG_CONNTRACK
-#define duprintf(format, args...) printk(format , ## args)
-#else
-#define duprintf(format, args...)
-#endif
+MODULE_DESCRIPTION("Xtables: IPv4 IPsec-AH SPI match");
/* Returns 1 if the spi is matched by the range, 0 otherwise */
-static inline int
-spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, int invert)
+static inline bool
+spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert)
{
- int r=0;
- duprintf("ah spi_match:%c 0x%x <= 0x%x <= 0x%x",invert? '!':' ',
- min,spi,max);
+ bool r;
+ pr_debug("spi_match:%c 0x%x <= 0x%x <= 0x%x\n",
+ invert ? '!' : ' ', min, spi, max);
r=(spi >= min && spi <= max) ^ invert;
- duprintf(" result %s\n",r? "PASS" : "FAILED");
+ pr_debug(" result %s\n", r ? "PASS" : "FAILED");
return r;
}
-static int
-match(const struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- const void *matchinfo,
- int offset,
- unsigned int protoff,
- int *hotdrop)
+static bool ah_mt(const struct sk_buff *skb, struct xt_action_param *par)
{
- struct ip_auth_hdr _ahdr, *ah;
- const struct ipt_ah *ahinfo = matchinfo;
+ struct ip_auth_hdr _ahdr;
+ const struct ip_auth_hdr *ah;
+ const struct ipt_ah *ahinfo = par->matchinfo;
/* Must not be a fragment. */
- if (offset)
- return 0;
+ if (par->fragoff != 0)
+ return false;
- ah = skb_header_pointer(skb, protoff,
- sizeof(_ahdr), &_ahdr);
+ ah = skb_header_pointer(skb, par->thoff, sizeof(_ahdr), &_ahdr);
if (ah == NULL) {
/* We've been asked to examine this packet, and we
* can't. Hence, no choice but to drop.
*/
- duprintf("Dropping evil AH tinygram.\n");
- *hotdrop = 1;
+ pr_debug("Dropping evil AH tinygram.\n");
+ par->hotdrop = true;
return 0;
}
@@ -67,53 +55,37 @@ match(const struct sk_buff *skb,
!!(ahinfo->invflags & IPT_AH_INV_SPI));
}
-/* Called when user tries to insert an entry of this type. */
-static int
-checkentry(const char *tablename,
- const void *ip_void,
- void *matchinfo,
- unsigned int matchinfosize,
- unsigned int hook_mask)
+static int ah_mt_check(const struct xt_mtchk_param *par)
{
- const struct ipt_ah *ahinfo = matchinfo;
- const struct ipt_ip *ip = ip_void;
+ const struct ipt_ah *ahinfo = par->matchinfo;
- /* Must specify proto == AH, and no unknown invflags */
- if (ip->proto != IPPROTO_AH || (ip->invflags & IPT_INV_PROTO)) {
- duprintf("ipt_ah: Protocol %u != %u\n", ip->proto,
- IPPROTO_AH);
- return 0;
- }
- if (matchinfosize != IPT_ALIGN(sizeof(struct ipt_ah))) {
- duprintf("ipt_ah: matchsize %u != %u\n",
- matchinfosize, IPT_ALIGN(sizeof(struct ipt_ah)));
- return 0;
- }
+ /* Must specify no unknown invflags */
if (ahinfo->invflags & ~IPT_AH_INV_MASK) {
- duprintf("ipt_ah: unknown flags %X\n",
- ahinfo->invflags);
- return 0;
+ pr_debug("unknown flags %X\n", ahinfo->invflags);
+ return -EINVAL;
}
-
- return 1;
+ return 0;
}
-static struct ipt_match ah_match = {
+static struct xt_match ah_mt_reg __read_mostly = {
.name = "ah",
- .match = &match,
- .checkentry = &checkentry,
+ .family = NFPROTO_IPV4,
+ .match = ah_mt,
+ .matchsize = sizeof(struct ipt_ah),
+ .proto = IPPROTO_AH,
+ .checkentry = ah_mt_check,
.me = THIS_MODULE,
};
-static int __init init(void)
+static int __init ah_mt_init(void)
{
- return ipt_register_match(&ah_match);
+ return xt_register_match(&ah_mt_reg);
}
-static void __exit cleanup(void)
+static void __exit ah_mt_exit(void)
{
- ipt_unregister_match(&ah_match);
+ xt_unregister_match(&ah_mt_reg);
}
-module_init(init);
-module_exit(cleanup);
+module_init(ah_mt_init);
+module_exit(ah_mt_exit);
diff --git a/net/ipv4/netfilter/ipt_dscp.c b/net/ipv4/netfilter/ipt_dscp.c
deleted file mode 100644
index 92063b4f860..00000000000
--- a/net/ipv4/netfilter/ipt_dscp.c
+++ /dev/null
@@ -1,63 +0,0 @@
-/* IP tables module for matching the value of the IPv4 DSCP field
- *
- * ipt_dscp.c,v 1.3 2002/08/05 19:00:21 laforge Exp
- *
- * (C) 2002 by Harald Welte <laforge@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-
-#include <linux/netfilter_ipv4/ipt_dscp.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-
-MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
-MODULE_DESCRIPTION("iptables DSCP matching module");
-MODULE_LICENSE("GPL");
-
-static int match(const struct sk_buff *skb, const struct net_device *in,
- const struct net_device *out, const void *matchinfo,
- int offset, unsigned int protoff, int *hotdrop)
-{
- const struct ipt_dscp_info *info = matchinfo;
- const struct iphdr *iph = skb->nh.iph;
-
- u_int8_t sh_dscp = ((info->dscp << IPT_DSCP_SHIFT) & IPT_DSCP_MASK);
-
- return ((iph->tos&IPT_DSCP_MASK) == sh_dscp) ^ info->invert;
-}
-
-static int checkentry(const char *tablename, const void *ip,
- void *matchinfo, unsigned int matchsize,
- unsigned int hook_mask)
-{
- if (matchsize != IPT_ALIGN(sizeof(struct ipt_dscp_info)))
- return 0;
-
- return 1;
-}
-
-static struct ipt_match dscp_match = {
- .name = "dscp",
- .match = &match,
- .checkentry = &checkentry,
- .me = THIS_MODULE,
-};
-
-static int __init init(void)
-{
- return ipt_register_match(&dscp_match);
-}
-
-static void __exit fini(void)
-{
- ipt_unregister_match(&dscp_match);
-
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c
deleted file mode 100644
index e68b0c7981f..00000000000
--- a/net/ipv4/netfilter/ipt_ecn.c
+++ /dev/null
@@ -1,132 +0,0 @@
-/* IP tables module for matching the value of the IPv4 and TCP ECN bits
- *
- * ipt_ecn.c,v 1.3 2002/05/29 15:09:00 laforge Exp
- *
- * (C) 2002 by Harald Welte <laforge@gnumonks.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/tcp.h>
-
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ipt_ecn.h>
-
-MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
-MODULE_DESCRIPTION("iptables ECN matching module");
-MODULE_LICENSE("GPL");
-
-static inline int match_ip(const struct sk_buff *skb,
- const struct ipt_ecn_info *einfo)
-{
- return ((skb->nh.iph->tos&IPT_ECN_IP_MASK) == einfo->ip_ect);
-}
-
-static inline int match_tcp(const struct sk_buff *skb,
- const struct ipt_ecn_info *einfo,
- int *hotdrop)
-{
- struct tcphdr _tcph, *th;
-
- /* In practice, TCP match does this, so can't fail. But let's
- * be good citizens.
- */
- th = skb_header_pointer(skb, skb->nh.iph->ihl * 4,
- sizeof(_tcph), &_tcph);
- if (th == NULL) {
- *hotdrop = 0;
- return 0;
- }
-
- if (einfo->operation & IPT_ECN_OP_MATCH_ECE) {
- if (einfo->invert & IPT_ECN_OP_MATCH_ECE) {
- if (th->ece == 1)
- return 0;
- } else {
- if (th->ece == 0)
- return 0;
- }
- }
-
- if (einfo->operation & IPT_ECN_OP_MATCH_CWR) {
- if (einfo->invert & IPT_ECN_OP_MATCH_CWR) {
- if (th->cwr == 1)
- return 0;
- } else {
- if (th->cwr == 0)
- return 0;
- }
- }
-
- return 1;
-}
-
-static int match(const struct sk_buff *skb, const struct net_device *in,
- const struct net_device *out, const void *matchinfo,
- int offset, unsigned int protoff, int *hotdrop)
-{
- const struct ipt_ecn_info *info = matchinfo;
-
- if (info->operation & IPT_ECN_OP_MATCH_IP)
- if (!match_ip(skb, info))
- return 0;
-
- if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR)) {
- if (skb->nh.iph->protocol != IPPROTO_TCP)
- return 0;
- if (!match_tcp(skb, info, hotdrop))
- return 0;
- }
-
- return 1;
-}
-
-static int checkentry(const char *tablename, const void *ip_void,
- void *matchinfo, unsigned int matchsize,
- unsigned int hook_mask)
-{
- const struct ipt_ecn_info *info = matchinfo;
- const struct ipt_ip *ip = ip_void;
-
- if (matchsize != IPT_ALIGN(sizeof(struct ipt_ecn_info)))
- return 0;
-
- if (info->operation & IPT_ECN_OP_MATCH_MASK)
- return 0;
-
- if (info->invert & IPT_ECN_OP_MATCH_MASK)
- return 0;
-
- if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR)
- && ip->proto != IPPROTO_TCP) {
- printk(KERN_WARNING "ipt_ecn: can't match TCP bits in rule for"
- " non-tcp packets\n");
- return 0;
- }
-
- return 1;
-}
-
-static struct ipt_match ecn_match = {
- .name = "ecn",
- .match = &match,
- .checkentry = &checkentry,
- .me = THIS_MODULE,
-};
-
-static int __init init(void)
-{
- return ipt_register_match(&ecn_match);
-}
-
-static void __exit fini(void)
-{
- ipt_unregister_match(&ecn_match);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_esp.c b/net/ipv4/netfilter/ipt_esp.c
deleted file mode 100644
index 9de191a8162..00000000000
--- a/net/ipv4/netfilter/ipt_esp.c
+++ /dev/null
@@ -1,120 +0,0 @@
-/* Kernel module to match ESP parameters. */
-
-/* (C) 1999-2000 Yon Uriarte <yon@astaro.de>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/ip.h>
-
-#include <linux/netfilter_ipv4/ipt_esp.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Yon Uriarte <yon@astaro.de>");
-MODULE_DESCRIPTION("iptables ESP SPI match module");
-
-#ifdef DEBUG_CONNTRACK
-#define duprintf(format, args...) printk(format , ## args)
-#else
-#define duprintf(format, args...)
-#endif
-
-/* Returns 1 if the spi is matched by the range, 0 otherwise */
-static inline int
-spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, int invert)
-{
- int r=0;
- duprintf("esp spi_match:%c 0x%x <= 0x%x <= 0x%x",invert? '!':' ',
- min,spi,max);
- r=(spi >= min && spi <= max) ^ invert;
- duprintf(" result %s\n",r? "PASS" : "FAILED");
- return r;
-}
-
-static int
-match(const struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- const void *matchinfo,
- int offset,
- unsigned int protoff,
- int *hotdrop)
-{
- struct ip_esp_hdr _esp, *eh;
- const struct ipt_esp *espinfo = matchinfo;
-
- /* Must not be a fragment. */
- if (offset)
- return 0;
-
- eh = skb_header_pointer(skb, protoff,
- sizeof(_esp), &_esp);
- if (eh == NULL) {
- /* We've been asked to examine this packet, and we
- * can't. Hence, no choice but to drop.
- */
- duprintf("Dropping evil ESP tinygram.\n");
- *hotdrop = 1;
- return 0;
- }
-
- return spi_match(espinfo->spis[0], espinfo->spis[1],
- ntohl(eh->spi),
- !!(espinfo->invflags & IPT_ESP_INV_SPI));
-}
-
-/* Called when user tries to insert an entry of this type. */
-static int
-checkentry(const char *tablename,
- const void *ip_void,
- void *matchinfo,
- unsigned int matchinfosize,
- unsigned int hook_mask)
-{
- const struct ipt_esp *espinfo = matchinfo;
- const struct ipt_ip *ip = ip_void;
-
- /* Must specify proto == ESP, and no unknown invflags */
- if (ip->proto != IPPROTO_ESP || (ip->invflags & IPT_INV_PROTO)) {
- duprintf("ipt_esp: Protocol %u != %u\n", ip->proto,
- IPPROTO_ESP);
- return 0;
- }
- if (matchinfosize != IPT_ALIGN(sizeof(struct ipt_esp))) {
- duprintf("ipt_esp: matchsize %u != %u\n",
- matchinfosize, IPT_ALIGN(sizeof(struct ipt_esp)));
- return 0;
- }
- if (espinfo->invflags & ~IPT_ESP_INV_MASK) {
- duprintf("ipt_esp: unknown flags %X\n",
- espinfo->invflags);
- return 0;
- }
-
- return 1;
-}
-
-static struct ipt_match esp_match = {
- .name = "esp",
- .match = &match,
- .checkentry = &checkentry,
- .me = THIS_MODULE,
-};
-
-static int __init init(void)
-{
- return ipt_register_match(&esp_match);
-}
-
-static void __exit cleanup(void)
-{
- ipt_unregister_match(&esp_match);
-}
-
-module_init(init);
-module_exit(cleanup);
diff --git a/net/ipv4/netfilter/ipt_hashlimit.c b/net/ipv4/netfilter/ipt_hashlimit.c
deleted file mode 100644
index 4fe48c1bd5f..00000000000
--- a/net/ipv4/netfilter/ipt_hashlimit.c
+++ /dev/null
@@ -1,731 +0,0 @@
-/* iptables match extension to limit the number of packets per second
- * seperately for each hashbucket (sourceip/sourceport/dstip/dstport)
- *
- * (C) 2003-2004 by Harald Welte <laforge@netfilter.org>
- *
- * $Id: ipt_hashlimit.c 3244 2004-10-20 16:24:29Z laforge@netfilter.org $
- *
- * Development of this code was funded by Astaro AG, http://www.astaro.com/
- *
- * based on ipt_limit.c by:
- * Jérôme de Vivie <devivie@info.enserb.u-bordeaux.fr>
- * Hervé Eychenne <eychenne@info.enserb.u-bordeaux.fr>
- * Rusty Russell <rusty@rustcorp.com.au>
- *
- * The general idea is to create a hash table for every dstip and have a
- * seperate limit counter per tuple. This way you can do something like 'limit
- * the number of syn packets for each of my internal addresses.
- *
- * Ideally this would just be implemented as a general 'hash' match, which would
- * allow us to attach any iptables target to it's hash buckets. But this is
- * not possible in the current iptables architecture. As always, pkttables for
- * 2.7.x will help ;)
- */
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/spinlock.h>
-#include <linux/random.h>
-#include <linux/jhash.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-#include <linux/tcp.h>
-#include <linux/udp.h>
-#include <linux/sctp.h>
-#include <linux/proc_fs.h>
-#include <linux/seq_file.h>
-#include <linux/list.h>
-
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ipt_hashlimit.h>
-
-/* FIXME: this is just for IP_NF_ASSERRT */
-#include <linux/netfilter_ipv4/ip_conntrack.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
-MODULE_DESCRIPTION("iptables match for limiting per hash-bucket");
-
-/* need to declare this at the top */
-static struct proc_dir_entry *hashlimit_procdir;
-static struct file_operations dl_file_ops;
-
-/* hash table crap */
-
-struct dsthash_dst {
- u_int32_t src_ip;
- u_int32_t dst_ip;
- /* ports have to be consecutive !!! */
- u_int16_t src_port;
- u_int16_t dst_port;
-};
-
-struct dsthash_ent {
- /* static / read-only parts in the beginning */
- struct hlist_node node;
- struct dsthash_dst dst;
-
- /* modified structure members in the end */
- unsigned long expires; /* precalculated expiry time */
- struct {
- unsigned long prev; /* last modification */
- u_int32_t credit;
- u_int32_t credit_cap, cost;
- } rateinfo;
-};
-
-struct ipt_hashlimit_htable {
- struct hlist_node node; /* global list of all htables */
- atomic_t use;
-
- struct hashlimit_cfg cfg; /* config */
-
- /* used internally */
- spinlock_t lock; /* lock for list_head */
- u_int32_t rnd; /* random seed for hash */
- struct timer_list timer; /* timer for gc */
- atomic_t count; /* number entries in table */
-
- /* seq_file stuff */
- struct proc_dir_entry *pde;
-
- struct hlist_head hash[0]; /* hashtable itself */
-};
-
-static DEFINE_SPINLOCK(hashlimit_lock); /* protects htables list */
-static DECLARE_MUTEX(hlimit_mutex); /* additional checkentry protection */
-static HLIST_HEAD(hashlimit_htables);
-static kmem_cache_t *hashlimit_cachep __read_mostly;
-
-static inline int dst_cmp(const struct dsthash_ent *ent, struct dsthash_dst *b)
-{
- return (ent->dst.dst_ip == b->dst_ip
- && ent->dst.dst_port == b->dst_port
- && ent->dst.src_port == b->src_port
- && ent->dst.src_ip == b->src_ip);
-}
-
-static inline u_int32_t
-hash_dst(const struct ipt_hashlimit_htable *ht, const struct dsthash_dst *dst)
-{
- return (jhash_3words(dst->dst_ip, (dst->dst_port<<16 | dst->src_port),
- dst->src_ip, ht->rnd) % ht->cfg.size);
-}
-
-static inline struct dsthash_ent *
-__dsthash_find(const struct ipt_hashlimit_htable *ht, struct dsthash_dst *dst)
-{
- struct dsthash_ent *ent;
- struct hlist_node *pos;
- u_int32_t hash = hash_dst(ht, dst);
-
- if (!hlist_empty(&ht->hash[hash]))
- hlist_for_each_entry(ent, pos, &ht->hash[hash], node) {
- if (dst_cmp(ent, dst)) {
- return ent;
- }
- }
-
- return NULL;
-}
-
-/* allocate dsthash_ent, initialize dst, put in htable and lock it */
-static struct dsthash_ent *
-__dsthash_alloc_init(struct ipt_hashlimit_htable *ht, struct dsthash_dst *dst)
-{
- struct dsthash_ent *ent;
-
- /* initialize hash with random val at the time we allocate
- * the first hashtable entry */
- if (!ht->rnd)
- get_random_bytes(&ht->rnd, 4);
-
- if (ht->cfg.max &&
- atomic_read(&ht->count) >= ht->cfg.max) {
- /* FIXME: do something. question is what.. */
- if (net_ratelimit())
- printk(KERN_WARNING
- "ipt_hashlimit: max count of %u reached\n",
- ht->cfg.max);
- return NULL;
- }
-
- ent = kmem_cache_alloc(hashlimit_cachep, GFP_ATOMIC);
- if (!ent) {
- if (net_ratelimit())
- printk(KERN_ERR
- "ipt_hashlimit: can't allocate dsthash_ent\n");
- return NULL;
- }
-
- atomic_inc(&ht->count);
-
- ent->dst.dst_ip = dst->dst_ip;
- ent->dst.dst_port = dst->dst_port;
- ent->dst.src_ip = dst->src_ip;
- ent->dst.src_port = dst->src_port;
-
- hlist_add_head(&ent->node, &ht->hash[hash_dst(ht, dst)]);
-
- return ent;
-}
-
-static inline void
-__dsthash_free(struct ipt_hashlimit_htable *ht, struct dsthash_ent *ent)
-{
- hlist_del(&ent->node);
- kmem_cache_free(hashlimit_cachep, ent);
- atomic_dec(&ht->count);
-}
-static void htable_gc(unsigned long htlong);
-
-static int htable_create(struct ipt_hashlimit_info *minfo)
-{
- int i;
- unsigned int size;
- struct ipt_hashlimit_htable *hinfo;
-
- if (minfo->cfg.size)
- size = minfo->cfg.size;
- else {
- size = (((num_physpages << PAGE_SHIFT) / 16384)
- / sizeof(struct list_head));
- if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE))
- size = 8192;
- if (size < 16)
- size = 16;
- }
- /* FIXME: don't use vmalloc() here or anywhere else -HW */
- hinfo = vmalloc(sizeof(struct ipt_hashlimit_htable)
- + (sizeof(struct list_head) * size));
- if (!hinfo) {
- printk(KERN_ERR "ipt_hashlimit: Unable to create hashtable\n");
- return -1;
- }
- minfo->hinfo = hinfo;
-
- /* copy match config into hashtable config */
- memcpy(&hinfo->cfg, &minfo->cfg, sizeof(hinfo->cfg));
- hinfo->cfg.size = size;
- if (!hinfo->cfg.max)
- hinfo->cfg.max = 8 * hinfo->cfg.size;
- else if (hinfo->cfg.max < hinfo->cfg.size)
- hinfo->cfg.max = hinfo->cfg.size;
-
- for (i = 0; i < hinfo->cfg.size; i++)
- INIT_HLIST_HEAD(&hinfo->hash[i]);
-
- atomic_set(&hinfo->count, 0);
- atomic_set(&hinfo->use, 1);
- hinfo->rnd = 0;
- spin_lock_init(&hinfo->lock);
- hinfo->pde = create_proc_entry(minfo->name, 0, hashlimit_procdir);
- if (!hinfo->pde) {
- vfree(hinfo);
- return -1;
- }
- hinfo->pde->proc_fops = &dl_file_ops;
- hinfo->pde->data = hinfo;
-
- init_timer(&hinfo->timer);
- hinfo->timer.expires = jiffies + msecs_to_jiffies(hinfo->cfg.gc_interval);
- hinfo->timer.data = (unsigned long )hinfo;
- hinfo->timer.function = htable_gc;
- add_timer(&hinfo->timer);
-
- spin_lock_bh(&hashlimit_lock);
- hlist_add_head(&hinfo->node, &hashlimit_htables);
- spin_unlock_bh(&hashlimit_lock);
-
- return 0;
-}
-
-static int select_all(struct ipt_hashlimit_htable *ht, struct dsthash_ent *he)
-{
- return 1;
-}
-
-static int select_gc(struct ipt_hashlimit_htable *ht, struct dsthash_ent *he)
-{
- return (jiffies >= he->expires);
-}
-
-static void htable_selective_cleanup(struct ipt_hashlimit_htable *ht,
- int (*select)(struct ipt_hashlimit_htable *ht,
- struct dsthash_ent *he))
-{
- int i;
-
- IP_NF_ASSERT(ht->cfg.size && ht->cfg.max);
-
- /* lock hash table and iterate over it */
- spin_lock_bh(&ht->lock);
- for (i = 0; i < ht->cfg.size; i++) {
- struct dsthash_ent *dh;
- struct hlist_node *pos, *n;
- hlist_for_each_entry_safe(dh, pos, n, &ht->hash[i], node) {
- if ((*select)(ht, dh))
- __dsthash_free(ht, dh);
- }
- }
- spin_unlock_bh(&ht->lock);
-}
-
-/* hash table garbage collector, run by timer */
-static void htable_gc(unsigned long htlong)
-{
- struct ipt_hashlimit_htable *ht = (struct ipt_hashlimit_htable *)htlong;
-
- htable_selective_cleanup(ht, select_gc);
-
- /* re-add the timer accordingly */
- ht->timer.expires = jiffies + msecs_to_jiffies(ht->cfg.gc_interval);
- add_timer(&ht->timer);
-}
-
-static void htable_destroy(struct ipt_hashlimit_htable *hinfo)
-{
- /* remove timer, if it is pending */
- if (timer_pending(&hinfo->timer))
- del_timer(&hinfo->timer);
-
- /* remove proc entry */
- remove_proc_entry(hinfo->pde->name, hashlimit_procdir);
-
- htable_selective_cleanup(hinfo, select_all);
- vfree(hinfo);
-}
-
-static struct ipt_hashlimit_htable *htable_find_get(char *name)
-{
- struct ipt_hashlimit_htable *hinfo;
- struct hlist_node *pos;
-
- spin_lock_bh(&hashlimit_lock);
- hlist_for_each_entry(hinfo, pos, &hashlimit_htables, node) {
- if (!strcmp(name, hinfo->pde->name)) {
- atomic_inc(&hinfo->use);
- spin_unlock_bh(&hashlimit_lock);
- return hinfo;
- }
- }
- spin_unlock_bh(&hashlimit_lock);
-
- return NULL;
-}
-
-static void htable_put(struct ipt_hashlimit_htable *hinfo)
-{
- if (atomic_dec_and_test(&hinfo->use)) {
- spin_lock_bh(&hashlimit_lock);
- hlist_del(&hinfo->node);
- spin_unlock_bh(&hashlimit_lock);
- htable_destroy(hinfo);
- }
-}
-
-
-/* The algorithm used is the Simple Token Bucket Filter (TBF)
- * see net/sched/sch_tbf.c in the linux source tree
- */
-
-/* Rusty: This is my (non-mathematically-inclined) understanding of
- this algorithm. The `average rate' in jiffies becomes your initial
- amount of credit `credit' and the most credit you can ever have
- `credit_cap'. The `peak rate' becomes the cost of passing the
- test, `cost'.
-
- `prev' tracks the last packet hit: you gain one credit per jiffy.
- If you get credit balance more than this, the extra credit is
- discarded. Every time the match passes, you lose `cost' credits;
- if you don't have that many, the test fails.
-
- See Alexey's formal explanation in net/sched/sch_tbf.c.
-
- To get the maximum range, we multiply by this factor (ie. you get N
- credits per jiffy). We want to allow a rate as low as 1 per day
- (slowest userspace tool allows), which means
- CREDITS_PER_JIFFY*HZ*60*60*24 < 2^32 ie.
-*/
-#define MAX_CPJ (0xFFFFFFFF / (HZ*60*60*24))
-
-/* Repeated shift and or gives us all 1s, final shift and add 1 gives
- * us the power of 2 below the theoretical max, so GCC simply does a
- * shift. */
-#define _POW2_BELOW2(x) ((x)|((x)>>1))
-#define _POW2_BELOW4(x) (_POW2_BELOW2(x)|_POW2_BELOW2((x)>>2))
-#define _POW2_BELOW8(x) (_POW2_BELOW4(x)|_POW2_BELOW4((x)>>4))
-#define _POW2_BELOW16(x) (_POW2_BELOW8(x)|_POW2_BELOW8((x)>>8))
-#define _POW2_BELOW32(x) (_POW2_BELOW16(x)|_POW2_BELOW16((x)>>16))
-#define POW2_BELOW32(x) ((_POW2_BELOW32(x)>>1) + 1)
-
-#define CREDITS_PER_JIFFY POW2_BELOW32(MAX_CPJ)
-
-/* Precision saver. */
-static inline u_int32_t
-user2credits(u_int32_t user)
-{
- /* If multiplying would overflow... */
- if (user > 0xFFFFFFFF / (HZ*CREDITS_PER_JIFFY))
- /* Divide first. */
- return (user / IPT_HASHLIMIT_SCALE) * HZ * CREDITS_PER_JIFFY;
-
- return (user * HZ * CREDITS_PER_JIFFY) / IPT_HASHLIMIT_SCALE;
-}
-
-static inline void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now)
-{
- dh->rateinfo.credit += (now - xchg(&dh->rateinfo.prev, now))
- * CREDITS_PER_JIFFY;
- if (dh->rateinfo.credit > dh->rateinfo.credit_cap)
- dh->rateinfo.credit = dh->rateinfo.credit_cap;
-}
-
-static inline int get_ports(const struct sk_buff *skb, int offset,
- u16 ports[2])
-{
- union {
- struct tcphdr th;
- struct udphdr uh;
- sctp_sctphdr_t sctph;
- } hdr_u, *ptr_u;
-
- /* Must not be a fragment. */
- if (offset)
- return 1;
-
- /* Must be big enough to read ports (both UDP and TCP have
- them at the start). */
- ptr_u = skb_header_pointer(skb, skb->nh.iph->ihl*4, 8, &hdr_u);
- if (!ptr_u)
- return 1;
-
- switch (skb->nh.iph->protocol) {
- case IPPROTO_TCP:
- ports[0] = ptr_u->th.source;
- ports[1] = ptr_u->th.dest;
- break;
- case IPPROTO_UDP:
- ports[0] = ptr_u->uh.source;
- ports[1] = ptr_u->uh.dest;
- break;
- case IPPROTO_SCTP:
- ports[0] = ptr_u->sctph.source;
- ports[1] = ptr_u->sctph.dest;
- break;
- default:
- /* all other protocols don't supprot per-port hash
- * buckets */
- ports[0] = ports[1] = 0;
- break;
- }
-
- return 0;
-}
-
-
-static int
-hashlimit_match(const struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- const void *matchinfo,
- int offset,
- unsigned int protoff,
- int *hotdrop)
-{
- struct ipt_hashlimit_info *r =
- ((struct ipt_hashlimit_info *)matchinfo)->u.master;
- struct ipt_hashlimit_htable *hinfo = r->hinfo;
- unsigned long now = jiffies;
- struct dsthash_ent *dh;
- struct dsthash_dst dst;
-
- /* build 'dst' according to hinfo->cfg and current packet */
- memset(&dst, 0, sizeof(dst));
- if (hinfo->cfg.mode & IPT_HASHLIMIT_HASH_DIP)
- dst.dst_ip = skb->nh.iph->daddr;
- if (hinfo->cfg.mode & IPT_HASHLIMIT_HASH_SIP)
- dst.src_ip = skb->nh.iph->saddr;
- if (hinfo->cfg.mode & IPT_HASHLIMIT_HASH_DPT
- ||hinfo->cfg.mode & IPT_HASHLIMIT_HASH_SPT) {
- u_int16_t ports[2];
- if (get_ports(skb, offset, ports)) {
- /* We've been asked to examine this packet, and we
- can't. Hence, no choice but to drop. */
- *hotdrop = 1;
- return 0;
- }
- if (hinfo->cfg.mode & IPT_HASHLIMIT_HASH_SPT)
- dst.src_port = ports[0];
- if (hinfo->cfg.mode & IPT_HASHLIMIT_HASH_DPT)
- dst.dst_port = ports[1];
- }
-
- spin_lock_bh(&hinfo->lock);
- dh = __dsthash_find(hinfo, &dst);
- if (!dh) {
- dh = __dsthash_alloc_init(hinfo, &dst);
-
- if (!dh) {
- /* enomem... don't match == DROP */
- if (net_ratelimit())
- printk(KERN_ERR "%s: ENOMEM\n", __FUNCTION__);
- spin_unlock_bh(&hinfo->lock);
- return 0;
- }
-
- dh->expires = jiffies + msecs_to_jiffies(hinfo->cfg.expire);
-
- dh->rateinfo.prev = jiffies;
- dh->rateinfo.credit = user2credits(hinfo->cfg.avg *
- hinfo->cfg.burst);
- dh->rateinfo.credit_cap = user2credits(hinfo->cfg.avg *
- hinfo->cfg.burst);
- dh->rateinfo.cost = user2credits(hinfo->cfg.avg);
-
- spin_unlock_bh(&hinfo->lock);
- return 1;
- }
-
- /* update expiration timeout */
- dh->expires = now + msecs_to_jiffies(hinfo->cfg.expire);
-
- rateinfo_recalc(dh, now);
- if (dh->rateinfo.credit >= dh->rateinfo.cost) {
- /* We're underlimit. */
- dh->rateinfo.credit -= dh->rateinfo.cost;
- spin_unlock_bh(&hinfo->lock);
- return 1;
- }
-
- spin_unlock_bh(&hinfo->lock);
-
- /* default case: we're overlimit, thus don't match */
- return 0;
-}
-
-static int
-hashlimit_checkentry(const char *tablename,
- const void *inf,
- void *matchinfo,
- unsigned int matchsize,
- unsigned int hook_mask)
-{
- struct ipt_hashlimit_info *r = matchinfo;
-
- if (matchsize != IPT_ALIGN(sizeof(struct ipt_hashlimit_info)))
- return 0;
-
- /* Check for overflow. */
- if (r->cfg.burst == 0
- || user2credits(r->cfg.avg * r->cfg.burst) <
- user2credits(r->cfg.avg)) {
- printk(KERN_ERR "ipt_hashlimit: Overflow, try lower: %u/%u\n",
- r->cfg.avg, r->cfg.burst);
- return 0;
- }
-
- if (r->cfg.mode == 0
- || r->cfg.mode > (IPT_HASHLIMIT_HASH_DPT
- |IPT_HASHLIMIT_HASH_DIP
- |IPT_HASHLIMIT_HASH_SIP
- |IPT_HASHLIMIT_HASH_SPT))
- return 0;
-
- if (!r->cfg.gc_interval)
- return 0;
-
- if (!r->cfg.expire)
- return 0;
-
- /* This is the best we've got: We cannot release and re-grab lock,
- * since checkentry() is called before ip_tables.c grabs ipt_mutex.
- * We also cannot grab the hashtable spinlock, since htable_create will
- * call vmalloc, and that can sleep. And we cannot just re-search
- * the list of htable's in htable_create(), since then we would
- * create duplicate proc files. -HW */
- down(&hlimit_mutex);
- r->hinfo = htable_find_get(r->name);
- if (!r->hinfo && (htable_create(r) != 0)) {
- up(&hlimit_mutex);
- return 0;
- }
- up(&hlimit_mutex);
-
- /* Ugly hack: For SMP, we only want to use one set */
- r->u.master = r;
-
- return 1;
-}
-
-static void
-hashlimit_destroy(void *matchinfo, unsigned int matchsize)
-{
- struct ipt_hashlimit_info *r = (struct ipt_hashlimit_info *) matchinfo;
-
- htable_put(r->hinfo);
-}
-
-static struct ipt_match ipt_hashlimit = {
- .name = "hashlimit",
- .match = hashlimit_match,
- .checkentry = hashlimit_checkentry,
- .destroy = hashlimit_destroy,
- .me = THIS_MODULE
-};
-
-/* PROC stuff */
-
-static void *dl_seq_start(struct seq_file *s, loff_t *pos)
-{
- struct proc_dir_entry *pde = s->private;
- struct ipt_hashlimit_htable *htable = pde->data;
- unsigned int *bucket;
-
- spin_lock_bh(&htable->lock);
- if (*pos >= htable->cfg.size)
- return NULL;
-
- bucket = kmalloc(sizeof(unsigned int), GFP_ATOMIC);
- if (!bucket)
- return ERR_PTR(-ENOMEM);
-
- *bucket = *pos;
- return bucket;
-}
-
-static void *dl_seq_next(struct seq_file *s, void *v, loff_t *pos)
-{
- struct proc_dir_entry *pde = s->private;
- struct ipt_hashlimit_htable *htable = pde->data;
- unsigned int *bucket = (unsigned int *)v;
-
- *pos = ++(*bucket);
- if (*pos >= htable->cfg.size) {
- kfree(v);
- return NULL;
- }
- return bucket;
-}
-
-static void dl_seq_stop(struct seq_file *s, void *v)
-{
- struct proc_dir_entry *pde = s->private;
- struct ipt_hashlimit_htable *htable = pde->data;
- unsigned int *bucket = (unsigned int *)v;
-
- kfree(bucket);
-
- spin_unlock_bh(&htable->lock);
-}
-
-static inline int dl_seq_real_show(struct dsthash_ent *ent, struct seq_file *s)
-{
- /* recalculate to show accurate numbers */
- rateinfo_recalc(ent, jiffies);
-
- return seq_printf(s, "%ld %u.%u.%u.%u:%u->%u.%u.%u.%u:%u %u %u %u\n",
- (long)(ent->expires - jiffies)/HZ,
- NIPQUAD(ent->dst.src_ip), ntohs(ent->dst.src_port),
- NIPQUAD(ent->dst.dst_ip), ntohs(ent->dst.dst_port),
- ent->rateinfo.credit, ent->rateinfo.credit_cap,
- ent->rateinfo.cost);
-}
-
-static int dl_seq_show(struct seq_file *s, void *v)
-{
- struct proc_dir_entry *pde = s->private;
- struct ipt_hashlimit_htable *htable = pde->data;
- unsigned int *bucket = (unsigned int *)v;
- struct dsthash_ent *ent;
- struct hlist_node *pos;
-
- if (!hlist_empty(&htable->hash[*bucket]))
- hlist_for_each_entry(ent, pos, &htable->hash[*bucket], node) {
- if (dl_seq_real_show(ent, s)) {
- /* buffer was filled and unable to print that tuple */
- return 1;
- }
- }
-
- return 0;
-}
-
-static struct seq_operations dl_seq_ops = {
- .start = dl_seq_start,
- .next = dl_seq_next,
- .stop = dl_seq_stop,
- .show = dl_seq_show
-};
-
-static int dl_proc_open(struct inode *inode, struct file *file)
-{
- int ret = seq_open(file, &dl_seq_ops);
-
- if (!ret) {
- struct seq_file *sf = file->private_data;
- sf->private = PDE(inode);
- }
- return ret;
-}
-
-static struct file_operations dl_file_ops = {
- .owner = THIS_MODULE,
- .open = dl_proc_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = seq_release
-};
-
-static int init_or_fini(int fini)
-{
- int ret = 0;
-
- if (fini)
- goto cleanup;
-
- if (ipt_register_match(&ipt_hashlimit)) {
- ret = -EINVAL;
- goto cleanup_nothing;
- }
-
- hashlimit_cachep = kmem_cache_create("ipt_hashlimit",
- sizeof(struct dsthash_ent), 0,
- 0, NULL, NULL);
- if (!hashlimit_cachep) {
- printk(KERN_ERR "Unable to create ipt_hashlimit slab cache\n");
- ret = -ENOMEM;
- goto cleanup_unreg_match;
- }
-
- hashlimit_procdir = proc_mkdir("ipt_hashlimit", proc_net);
- if (!hashlimit_procdir) {
- printk(KERN_ERR "Unable to create proc dir entry\n");
- ret = -ENOMEM;
- goto cleanup_free_slab;
- }
-
- return ret;
-
-cleanup:
- remove_proc_entry("ipt_hashlimit", proc_net);
-cleanup_free_slab:
- kmem_cache_destroy(hashlimit_cachep);
-cleanup_unreg_match:
- ipt_unregister_match(&ipt_hashlimit);
-cleanup_nothing:
- return ret;
-
-}
-
-static int __init init(void)
-{
- return init_or_fini(0);
-}
-
-static void __exit fini(void)
-{
- init_or_fini(1);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_iprange.c b/net/ipv4/netfilter/ipt_iprange.c
deleted file mode 100644
index 13fb16fb789..00000000000
--- a/net/ipv4/netfilter/ipt_iprange.c
+++ /dev/null
@@ -1,99 +0,0 @@
-/*
- * iptables module to match IP address ranges
- *
- * (C) 2003 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/ip.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ipt_iprange.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>");
-MODULE_DESCRIPTION("iptables arbitrary IP range match module");
-
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-static int
-match(const struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- const void *matchinfo,
- int offset, unsigned int protoff, int *hotdrop)
-{
- const struct ipt_iprange_info *info = matchinfo;
- const struct iphdr *iph = skb->nh.iph;
-
- if (info->flags & IPRANGE_SRC) {
- if (((ntohl(iph->saddr) < ntohl(info->src.min_ip))
- || (ntohl(iph->saddr) > ntohl(info->src.max_ip)))
- ^ !!(info->flags & IPRANGE_SRC_INV)) {
- DEBUGP("src IP %u.%u.%u.%u NOT in range %s"
- "%u.%u.%u.%u-%u.%u.%u.%u\n",
- NIPQUAD(iph->saddr),
- info->flags & IPRANGE_SRC_INV ? "(INV) " : "",
- NIPQUAD(info->src.min_ip),
- NIPQUAD(info->src.max_ip));
- return 0;
- }
- }
- if (info->flags & IPRANGE_DST) {
- if (((ntohl(iph->daddr) < ntohl(info->dst.min_ip))
- || (ntohl(iph->daddr) > ntohl(info->dst.max_ip)))
- ^ !!(info->flags & IPRANGE_DST_INV)) {
- DEBUGP("dst IP %u.%u.%u.%u NOT in range %s"
- "%u.%u.%u.%u-%u.%u.%u.%u\n",
- NIPQUAD(iph->daddr),
- info->flags & IPRANGE_DST_INV ? "(INV) " : "",
- NIPQUAD(info->dst.min_ip),
- NIPQUAD(info->dst.max_ip));
- return 0;
- }
- }
- return 1;
-}
-
-static int check(const char *tablename,
- const void *inf,
- void *matchinfo,
- unsigned int matchsize,
- unsigned int hook_mask)
-{
- /* verify size */
- if (matchsize != IPT_ALIGN(sizeof(struct ipt_iprange_info)))
- return 0;
-
- return 1;
-}
-
-static struct ipt_match iprange_match =
-{
- .list = { NULL, NULL },
- .name = "iprange",
- .match = &match,
- .checkentry = &check,
- .destroy = NULL,
- .me = THIS_MODULE
-};
-
-static int __init init(void)
-{
- return ipt_register_match(&iprange_match);
-}
-
-static void __exit fini(void)
-{
- ipt_unregister_match(&iprange_match);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_multiport.c b/net/ipv4/netfilter/ipt_multiport.c
deleted file mode 100644
index 2d52326553f..00000000000
--- a/net/ipv4/netfilter/ipt_multiport.c
+++ /dev/null
@@ -1,214 +0,0 @@
-/* Kernel module to match one of a list of TCP/UDP ports: ports are in
- the same place so we can treat them as equal. */
-
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/udp.h>
-#include <linux/skbuff.h>
-
-#include <linux/netfilter_ipv4/ipt_multiport.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
-MODULE_DESCRIPTION("iptables multiple port match module");
-
-#if 0
-#define duprintf(format, args...) printk(format , ## args)
-#else
-#define duprintf(format, args...)
-#endif
-
-/* Returns 1 if the port is matched by the test, 0 otherwise. */
-static inline int
-ports_match(const u_int16_t *portlist, enum ipt_multiport_flags flags,
- u_int8_t count, u_int16_t src, u_int16_t dst)
-{
- unsigned int i;
- for (i=0; i<count; i++) {
- if (flags != IPT_MULTIPORT_DESTINATION
- && portlist[i] == src)
- return 1;
-
- if (flags != IPT_MULTIPORT_SOURCE
- && portlist[i] == dst)
- return 1;
- }
-
- return 0;
-}
-
-/* Returns 1 if the port is matched by the test, 0 otherwise. */
-static inline int
-ports_match_v1(const struct ipt_multiport_v1 *minfo,
- u_int16_t src, u_int16_t dst)
-{
- unsigned int i;
- u_int16_t s, e;
-
- for (i=0; i < minfo->count; i++) {
- s = minfo->ports[i];
-
- if (minfo->pflags[i]) {
- /* range port matching */
- e = minfo->ports[++i];
- duprintf("src or dst matches with %d-%d?\n", s, e);
-
- if (minfo->flags == IPT_MULTIPORT_SOURCE
- && src >= s && src <= e)
- return 1 ^ minfo->invert;
- if (minfo->flags == IPT_MULTIPORT_DESTINATION
- && dst >= s && dst <= e)
- return 1 ^ minfo->invert;
- if (minfo->flags == IPT_MULTIPORT_EITHER
- && ((dst >= s && dst <= e)
- || (src >= s && src <= e)))
- return 1 ^ minfo->invert;
- } else {
- /* exact port matching */
- duprintf("src or dst matches with %d?\n", s);
-
- if (minfo->flags == IPT_MULTIPORT_SOURCE
- && src == s)
- return 1 ^ minfo->invert;
- if (minfo->flags == IPT_MULTIPORT_DESTINATION
- && dst == s)
- return 1 ^ minfo->invert;
- if (minfo->flags == IPT_MULTIPORT_EITHER
- && (src == s || dst == s))
- return 1 ^ minfo->invert;
- }
- }
-
- return minfo->invert;
-}
-
-static int
-match(const struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- const void *matchinfo,
- int offset,
- unsigned int protoff,
- int *hotdrop)
-{
- u16 _ports[2], *pptr;
- const struct ipt_multiport *multiinfo = matchinfo;
-
- if (offset)
- return 0;
-
- pptr = skb_header_pointer(skb, protoff,
- sizeof(_ports), _ports);
- if (pptr == NULL) {
- /* We've been asked to examine this packet, and we
- * can't. Hence, no choice but to drop.
- */
- duprintf("ipt_multiport:"
- " Dropping evil offset=0 tinygram.\n");
- *hotdrop = 1;
- return 0;
- }
-
- return ports_match(multiinfo->ports,
- multiinfo->flags, multiinfo->count,
- ntohs(pptr[0]), ntohs(pptr[1]));
-}
-
-static int
-match_v1(const struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- const void *matchinfo,
- int offset,
- unsigned int protoff,
- int *hotdrop)
-{
- u16 _ports[2], *pptr;
- const struct ipt_multiport_v1 *multiinfo = matchinfo;
-
- if (offset)
- return 0;
-
- pptr = skb_header_pointer(skb, protoff,
- sizeof(_ports), _ports);
- if (pptr == NULL) {
- /* We've been asked to examine this packet, and we
- * can't. Hence, no choice but to drop.
- */
- duprintf("ipt_multiport:"
- " Dropping evil offset=0 tinygram.\n");
- *hotdrop = 1;
- return 0;
- }
-
- return ports_match_v1(multiinfo, ntohs(pptr[0]), ntohs(pptr[1]));
-}
-
-/* Called when user tries to insert an entry of this type. */
-static int
-checkentry(const char *tablename,
- const void *ip,
- void *matchinfo,
- unsigned int matchsize,
- unsigned int hook_mask)
-{
- return (matchsize == IPT_ALIGN(sizeof(struct ipt_multiport)));
-}
-
-static int
-checkentry_v1(const char *tablename,
- const void *ip,
- void *matchinfo,
- unsigned int matchsize,
- unsigned int hook_mask)
-{
- return (matchsize == IPT_ALIGN(sizeof(struct ipt_multiport_v1)));
-}
-
-static struct ipt_match multiport_match = {
- .name = "multiport",
- .revision = 0,
- .match = &match,
- .checkentry = &checkentry,
- .me = THIS_MODULE,
-};
-
-static struct ipt_match multiport_match_v1 = {
- .name = "multiport",
- .revision = 1,
- .match = &match_v1,
- .checkentry = &checkentry_v1,
- .me = THIS_MODULE,
-};
-
-static int __init init(void)
-{
- int err;
-
- err = ipt_register_match(&multiport_match);
- if (!err) {
- err = ipt_register_match(&multiport_match_v1);
- if (err)
- ipt_unregister_match(&multiport_match);
- }
-
- return err;
-}
-
-static void __exit fini(void)
-{
- ipt_unregister_match(&multiport_match);
- ipt_unregister_match(&multiport_match_v1);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_owner.c b/net/ipv4/netfilter/ipt_owner.c
deleted file mode 100644
index 4843d0c9734..00000000000
--- a/net/ipv4/netfilter/ipt_owner.c
+++ /dev/null
@@ -1,101 +0,0 @@
-/* Kernel module to match various things tied to sockets associated with
- locally generated outgoing packets. */
-
-/* (C) 2000 Marc Boucher <marc@mbsi.ca>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/file.h>
-#include <linux/rcupdate.h>
-#include <net/sock.h>
-
-#include <linux/netfilter_ipv4/ipt_owner.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
-MODULE_DESCRIPTION("iptables owner match");
-
-static int
-match(const struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- const void *matchinfo,
- int offset,
- unsigned int protoff,
- int *hotdrop)
-{
- const struct ipt_owner_info *info = matchinfo;
-
- if (!skb->sk || !skb->sk->sk_socket || !skb->sk->sk_socket->file)
- return 0;
-
- if(info->match & IPT_OWNER_UID) {
- if ((skb->sk->sk_socket->file->f_uid != info->uid) ^
- !!(info->invert & IPT_OWNER_UID))
- return 0;
- }
-
- if(info->match & IPT_OWNER_GID) {
- if ((skb->sk->sk_socket->file->f_gid != info->gid) ^
- !!(info->invert & IPT_OWNER_GID))
- return 0;
- }
-
- return 1;
-}
-
-static int
-checkentry(const char *tablename,
- const void *ip,
- void *matchinfo,
- unsigned int matchsize,
- unsigned int hook_mask)
-{
- const struct ipt_owner_info *info = matchinfo;
-
- if (hook_mask
- & ~((1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_POST_ROUTING))) {
- printk("ipt_owner: only valid for LOCAL_OUT or POST_ROUTING.\n");
- return 0;
- }
-
- if (matchsize != IPT_ALIGN(sizeof(struct ipt_owner_info))) {
- printk("Matchsize %u != %Zu\n", matchsize,
- IPT_ALIGN(sizeof(struct ipt_owner_info)));
- return 0;
- }
-
- if (info->match & (IPT_OWNER_PID|IPT_OWNER_SID|IPT_OWNER_COMM)) {
- printk("ipt_owner: pid, sid and command matching "
- "not supported anymore\n");
- return 0;
- }
-
- return 1;
-}
-
-static struct ipt_match owner_match = {
- .name = "owner",
- .match = &match,
- .checkentry = &checkentry,
- .me = THIS_MODULE,
-};
-
-static int __init init(void)
-{
- return ipt_register_match(&owner_match);
-}
-
-static void __exit fini(void)
-{
- ipt_unregister_match(&owner_match);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_policy.c b/net/ipv4/netfilter/ipt_policy.c
deleted file mode 100644
index 18ca8258a1c..00000000000
--- a/net/ipv4/netfilter/ipt_policy.c
+++ /dev/null
@@ -1,173 +0,0 @@
-/* IP tables module for matching IPsec policy
- *
- * Copyright (c) 2004,2005 Patrick McHardy, <kaber@trash.net>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/kernel.h>
-#include <linux/config.h>
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/init.h>
-#include <net/xfrm.h>
-
-#include <linux/netfilter_ipv4.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ipt_policy.h>
-
-MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
-MODULE_DESCRIPTION("IPtables IPsec policy matching module");
-MODULE_LICENSE("GPL");
-
-
-static inline int
-match_xfrm_state(struct xfrm_state *x, const struct ipt_policy_elem *e)
-{
-#define MATCH(x,y) (!e->match.x || ((e->x == (y)) ^ e->invert.x))
-
- return MATCH(saddr, x->props.saddr.a4 & e->smask) &&
- MATCH(daddr, x->id.daddr.a4 & e->dmask) &&
- MATCH(proto, x->id.proto) &&
- MATCH(mode, x->props.mode) &&
- MATCH(spi, x->id.spi) &&
- MATCH(reqid, x->props.reqid);
-}
-
-static int
-match_policy_in(const struct sk_buff *skb, const struct ipt_policy_info *info)
-{
- const struct ipt_policy_elem *e;
- struct sec_path *sp = skb->sp;
- int strict = info->flags & IPT_POLICY_MATCH_STRICT;
- int i, pos;
-
- if (sp == NULL)
- return -1;
- if (strict && info->len != sp->len)
- return 0;
-
- for (i = sp->len - 1; i >= 0; i--) {
- pos = strict ? i - sp->len + 1 : 0;
- if (pos >= info->len)
- return 0;
- e = &info->pol[pos];
-
- if (match_xfrm_state(sp->x[i].xvec, e)) {
- if (!strict)
- return 1;
- } else if (strict)
- return 0;
- }
-
- return strict ? 1 : 0;
-}
-
-static int
-match_policy_out(const struct sk_buff *skb, const struct ipt_policy_info *info)
-{
- const struct ipt_policy_elem *e;
- struct dst_entry *dst = skb->dst;
- int strict = info->flags & IPT_POLICY_MATCH_STRICT;
- int i, pos;
-
- if (dst->xfrm == NULL)
- return -1;
-
- for (i = 0; dst && dst->xfrm; dst = dst->child, i++) {
- pos = strict ? i : 0;
- if (pos >= info->len)
- return 0;
- e = &info->pol[pos];
-
- if (match_xfrm_state(dst->xfrm, e)) {
- if (!strict)
- return 1;
- } else if (strict)
- return 0;
- }
-
- return strict ? 1 : 0;
-}
-
-static int match(const struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- const void *matchinfo,
- int offset,
- unsigned int protoff,
- int *hotdrop)
-{
- const struct ipt_policy_info *info = matchinfo;
- int ret;
-
- if (info->flags & IPT_POLICY_MATCH_IN)
- ret = match_policy_in(skb, info);
- else
- ret = match_policy_out(skb, info);
-
- if (ret < 0)
- ret = info->flags & IPT_POLICY_MATCH_NONE ? 1 : 0;
- else if (info->flags & IPT_POLICY_MATCH_NONE)
- ret = 0;
-
- return ret;
-}
-
-static int checkentry(const char *tablename, const void *ip_void,
- void *matchinfo, unsigned int matchsize,
- unsigned int hook_mask)
-{
- struct ipt_policy_info *info = matchinfo;
-
- if (matchsize != IPT_ALIGN(sizeof(*info))) {
- printk(KERN_ERR "ipt_policy: matchsize %u != %zu\n",
- matchsize, IPT_ALIGN(sizeof(*info)));
- return 0;
- }
- if (!(info->flags & (IPT_POLICY_MATCH_IN|IPT_POLICY_MATCH_OUT))) {
- printk(KERN_ERR "ipt_policy: neither incoming nor "
- "outgoing policy selected\n");
- return 0;
- }
- if (hook_mask & (1 << NF_IP_PRE_ROUTING | 1 << NF_IP_LOCAL_IN)
- && info->flags & IPT_POLICY_MATCH_OUT) {
- printk(KERN_ERR "ipt_policy: output policy not valid in "
- "PRE_ROUTING and INPUT\n");
- return 0;
- }
- if (hook_mask & (1 << NF_IP_POST_ROUTING | 1 << NF_IP_LOCAL_OUT)
- && info->flags & IPT_POLICY_MATCH_IN) {
- printk(KERN_ERR "ipt_policy: input policy not valid in "
- "POST_ROUTING and OUTPUT\n");
- return 0;
- }
- if (info->len > IPT_POLICY_MAX_ELEM) {
- printk(KERN_ERR "ipt_policy: too many policy elements\n");
- return 0;
- }
-
- return 1;
-}
-
-static struct ipt_match policy_match = {
- .name = "policy",
- .match = match,
- .checkentry = checkentry,
- .me = THIS_MODULE,
-};
-
-static int __init init(void)
-{
- return ipt_register_match(&policy_match);
-}
-
-static void __exit fini(void)
-{
- ipt_unregister_match(&policy_match);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c
deleted file mode 100644
index 44611d6d14f..00000000000
--- a/net/ipv4/netfilter/ipt_recent.c
+++ /dev/null
@@ -1,1005 +0,0 @@
-/* Kernel module to check if the source address has been seen recently. */
-/* Copyright 2002-2003, Stephen Frost, 2.5.x port by laforge@netfilter.org */
-/* Author: Stephen Frost <sfrost@snowman.net> */
-/* Project Page: http://snowman.net/projects/ipt_recent/ */
-/* This software is distributed under the terms of the GPL, Version 2 */
-/* This copyright does not cover user programs that use kernel services
- * by normal system calls. */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-#include <linux/proc_fs.h>
-#include <linux/spinlock.h>
-#include <linux/interrupt.h>
-#include <asm/uaccess.h>
-#include <linux/ctype.h>
-#include <linux/ip.h>
-#include <linux/vmalloc.h>
-#include <linux/moduleparam.h>
-
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ipt_recent.h>
-
-#undef DEBUG
-#define HASH_LOG 9
-
-/* Defaults, these can be overridden on the module command-line. */
-static unsigned int ip_list_tot = 100;
-static unsigned int ip_pkt_list_tot = 20;
-static unsigned int ip_list_hash_size = 0;
-static unsigned int ip_list_perms = 0644;
-#ifdef DEBUG
-static int debug = 1;
-#endif
-
-static char version[] =
-KERN_INFO RECENT_NAME " " RECENT_VER ": Stephen Frost <sfrost@snowman.net>. http://snowman.net/projects/ipt_recent/\n";
-
-MODULE_AUTHOR("Stephen Frost <sfrost@snowman.net>");
-MODULE_DESCRIPTION("IP tables recently seen matching module " RECENT_VER);
-MODULE_LICENSE("GPL");
-module_param(ip_list_tot, uint, 0400);
-module_param(ip_pkt_list_tot, uint, 0400);
-module_param(ip_list_hash_size, uint, 0400);
-module_param(ip_list_perms, uint, 0400);
-#ifdef DEBUG
-module_param(debug, bool, 0600);
-MODULE_PARM_DESC(debug,"enable debugging output");
-#endif
-MODULE_PARM_DESC(ip_list_tot,"number of IPs to remember per list");
-MODULE_PARM_DESC(ip_pkt_list_tot,"number of packets per IP to remember");
-MODULE_PARM_DESC(ip_list_hash_size,"size of hash table used to look up IPs");
-MODULE_PARM_DESC(ip_list_perms,"permissions on /proc/net/ipt_recent/* files");
-
-/* Structure of our list of recently seen addresses. */
-struct recent_ip_list {
- u_int32_t addr;
- u_int8_t ttl;
- unsigned long last_seen;
- unsigned long *last_pkts;
- u_int32_t oldest_pkt;
- u_int32_t hash_entry;
- u_int32_t time_pos;
-};
-
-struct time_info_list {
- u_int32_t position;
- u_int32_t time;
-};
-
-/* Structure of our linked list of tables of recent lists. */
-struct recent_ip_tables {
- char name[IPT_RECENT_NAME_LEN];
- int count;
- int time_pos;
- struct recent_ip_list *table;
- struct recent_ip_tables *next;
- spinlock_t list_lock;
- int *hash_table;
- struct time_info_list *time_info;
-#ifdef CONFIG_PROC_FS
- struct proc_dir_entry *status_proc;
-#endif /* CONFIG_PROC_FS */
-};
-
-/* Our current list of addresses we have recently seen.
- * Only added to on a --set, and only updated on --set || --update
- */
-static struct recent_ip_tables *r_tables = NULL;
-
-/* We protect r_list with this spinlock so two processors are not modifying
- * the list at the same time.
- */
-static DEFINE_SPINLOCK(recent_lock);
-
-#ifdef CONFIG_PROC_FS
-/* Our /proc/net/ipt_recent entry */
-static struct proc_dir_entry *proc_net_ipt_recent = NULL;
-#endif
-
-/* Function declaration for later. */
-static int
-match(const struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- const void *matchinfo,
- int offset,
- unsigned int protoff,
- int *hotdrop);
-
-/* Function to hash a given address into the hash table of table_size size */
-static int hash_func(unsigned int addr, int table_size)
-{
- int result = 0;
- unsigned int value = addr;
- do { result ^= value; } while((value >>= HASH_LOG));
-
-#ifdef DEBUG
- if(debug) printk(KERN_INFO RECENT_NAME ": %d = hash_func(%u,%d)\n",
- result & (table_size - 1),
- addr,
- table_size);
-#endif
-
- return(result & (table_size - 1));
-}
-
-#ifdef CONFIG_PROC_FS
-/* This is the function which produces the output for our /proc output
- * interface which lists each IP address, the last seen time and the
- * other recent times the address was seen.
- */
-
-static int ip_recent_get_info(char *buffer, char **start, off_t offset, int length, int *eof, void *data)
-{
- int len = 0, count, last_len = 0, pkt_count;
- off_t pos = 0;
- off_t begin = 0;
- struct recent_ip_tables *curr_table;
-
- curr_table = (struct recent_ip_tables*) data;
-
- spin_lock_bh(&curr_table->list_lock);
- for(count = 0; count < ip_list_tot; count++) {
- if(!curr_table->table[count].addr) continue;
- last_len = len;
- len += sprintf(buffer+len,"src=%u.%u.%u.%u ",NIPQUAD(curr_table->table[count].addr));
- len += sprintf(buffer+len,"ttl: %u ",curr_table->table[count].ttl);
- len += sprintf(buffer+len,"last_seen: %lu ",curr_table->table[count].last_seen);
- len += sprintf(buffer+len,"oldest_pkt: %u ",curr_table->table[count].oldest_pkt);
- len += sprintf(buffer+len,"last_pkts: %lu",curr_table->table[count].last_pkts[0]);
- for(pkt_count = 1; pkt_count < ip_pkt_list_tot; pkt_count++) {
- if(!curr_table->table[count].last_pkts[pkt_count]) break;
- len += sprintf(buffer+len,", %lu",curr_table->table[count].last_pkts[pkt_count]);
- }
- len += sprintf(buffer+len,"\n");
- pos = begin + len;
- if(pos < offset) { len = 0; begin = pos; }
- if(pos > offset + length) { len = last_len; break; }
- }
-
- *start = buffer + (offset - begin);
- len -= (offset - begin);
- if(len > length) len = length;
-
- spin_unlock_bh(&curr_table->list_lock);
- return len;
-}
-
-/* ip_recent_ctrl provides an interface for users to modify the table
- * directly. This allows adding entries, removing entries, and
- * flushing the entire table.
- * This is done by opening up the appropriate table for writing and
- * sending one of:
- * xx.xx.xx.xx -- Add entry to table with current time
- * +xx.xx.xx.xx -- Add entry to table with current time
- * -xx.xx.xx.xx -- Remove entry from table
- * clear -- Flush table, remove all entries
- */
-
-static int ip_recent_ctrl(struct file *file, const char __user *input, unsigned long size, void *data)
-{
- static const u_int32_t max[4] = { 0xffffffff, 0xffffff, 0xffff, 0xff };
- u_int32_t val;
- int base, used = 0;
- char c, *cp;
- union iaddr {
- uint8_t bytes[4];
- uint32_t word;
- } res;
- uint8_t *pp = res.bytes;
- int digit;
-
- char buffer[20];
- int len, check_set = 0, count;
- u_int32_t addr = 0;
- struct sk_buff *skb;
- struct ipt_recent_info *info;
- struct recent_ip_tables *curr_table;
-
- curr_table = (struct recent_ip_tables*) data;
-
- if(size > 20) len = 20; else len = size;
-
- if(copy_from_user(buffer,input,len)) return -EFAULT;
-
- if(len < 20) buffer[len] = '\0';
-
-#ifdef DEBUG
- if(debug) printk(KERN_INFO RECENT_NAME ": ip_recent_ctrl len: %d, input: `%.20s'\n",len,buffer);
-#endif
-
- cp = buffer;
- while(isspace(*cp)) { cp++; used++; if(used >= len-5) return used; }
-
- /* Check if we are asked to flush the entire table */
- if(!memcmp(cp,"clear",5)) {
- used += 5;
- spin_lock_bh(&curr_table->list_lock);
- curr_table->time_pos = 0;
- for(count = 0; count < ip_list_hash_size; count++) {
- curr_table->hash_table[count] = -1;
- }
- for(count = 0; count < ip_list_tot; count++) {
- curr_table->table[count].last_seen = 0;
- curr_table->table[count].addr = 0;
- curr_table->table[count].ttl = 0;
- memset(curr_table->table[count].last_pkts,0,ip_pkt_list_tot*sizeof(unsigned long));
- curr_table->table[count].oldest_pkt = 0;
- curr_table->table[count].time_pos = 0;
- curr_table->time_info[count].position = count;
- curr_table->time_info[count].time = 0;
- }
- spin_unlock_bh(&curr_table->list_lock);
- return used;
- }
-
- check_set = IPT_RECENT_SET;
- switch(*cp) {
- case '+': check_set = IPT_RECENT_SET; cp++; used++; break;
- case '-': check_set = IPT_RECENT_REMOVE; cp++; used++; break;
- default: if(!isdigit(*cp)) return (used+1); break;
- }
-
-#ifdef DEBUG
- if(debug) printk(KERN_INFO RECENT_NAME ": ip_recent_ctrl cp: `%c', check_set: %d\n",*cp,check_set);
-#endif
- /* Get addr (effectively inet_aton()) */
- /* Shamelessly stolen from libc, a function in the kernel for doing
- * this would, of course, be greatly preferred, but our options appear
- * to be rather limited, so we will just do it ourselves here.
- */
- res.word = 0;
-
- c = *cp;
- for(;;) {
- if(!isdigit(c)) return used;
- val = 0; base = 10; digit = 0;
- if(c == '0') {
- c = *++cp;
- if(c == 'x' || c == 'X') base = 16, c = *++cp;
- else { base = 8; digit = 1; }
- }
- for(;;) {
- if(isascii(c) && isdigit(c)) {
- if(base == 8 && (c == '8' || c == '0')) return used;
- val = (val * base) + (c - '0');
- c = *++cp;
- digit = 1;
- } else if(base == 16 && isascii(c) && isxdigit(c)) {
- val = (val << 4) | (c + 10 - (islower(c) ? 'a' : 'A'));
- c = *++cp;
- digit = 1;
- } else break;
- }
- if(c == '.') {
- if(pp > res.bytes + 2 || val > 0xff) return used;
- *pp++ = val;
- c = *++cp;
- } else break;
- }
- used = cp - buffer;
- if(c != '\0' && (!isascii(c) || !isspace(c))) return used;
- if(c == '\n') used++;
- if(!digit) return used;
-
- if(val > max[pp - res.bytes]) return used;
- addr = res.word | htonl(val);
-
- if(!addr && check_set == IPT_RECENT_SET) return used;
-
-#ifdef DEBUG
- if(debug) printk(KERN_INFO RECENT_NAME ": ip_recent_ctrl c: %c, addr: %u used: %d\n",c,addr,used);
-#endif
-
- /* Set up and just call match */
- info = kmalloc(sizeof(struct ipt_recent_info),GFP_KERNEL);
- if(!info) { return -ENOMEM; }
- info->seconds = 0;
- info->hit_count = 0;
- info->check_set = check_set;
- info->invert = 0;
- info->side = IPT_RECENT_SOURCE;
- strncpy(info->name,curr_table->name,IPT_RECENT_NAME_LEN);
- info->name[IPT_RECENT_NAME_LEN-1] = '\0';
-
- skb = kmalloc(sizeof(struct sk_buff),GFP_KERNEL);
- if (!skb) {
- used = -ENOMEM;
- goto out_free_info;
- }
- skb->nh.iph = kmalloc(sizeof(struct iphdr),GFP_KERNEL);
- if (!skb->nh.iph) {
- used = -ENOMEM;
- goto out_free_skb;
- }
-
- skb->nh.iph->saddr = addr;
- skb->nh.iph->daddr = 0;
- /* Clear ttl since we have no way of knowing it */
- skb->nh.iph->ttl = 0;
- match(skb,NULL,NULL,info,0,0,NULL);
-
- kfree(skb->nh.iph);
-out_free_skb:
- kfree(skb);
-out_free_info:
- kfree(info);
-
-#ifdef DEBUG
- if(debug) printk(KERN_INFO RECENT_NAME ": Leaving ip_recent_ctrl addr: %u used: %d\n",addr,used);
-#endif
- return used;
-}
-
-#endif /* CONFIG_PROC_FS */
-
-/* 'match' is our primary function, called by the kernel whenever a rule is
- * hit with our module as an option to it.
- * What this function does depends on what was specifically asked of it by
- * the user:
- * --set -- Add or update last seen time of the source address of the packet
- * -- matchinfo->check_set == IPT_RECENT_SET
- * --rcheck -- Just check if the source address is in the list
- * -- matchinfo->check_set == IPT_RECENT_CHECK
- * --update -- If the source address is in the list, update last_seen
- * -- matchinfo->check_set == IPT_RECENT_UPDATE
- * --remove -- If the source address is in the list, remove it
- * -- matchinfo->check_set == IPT_RECENT_REMOVE
- * --seconds -- Option to --rcheck/--update, only match if last_seen within seconds
- * -- matchinfo->seconds
- * --hitcount -- Option to --rcheck/--update, only match if seen hitcount times
- * -- matchinfo->hit_count
- * --seconds and --hitcount can be combined
- */
-static int
-match(const struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- const void *matchinfo,
- int offset,
- unsigned int protoff,
- int *hotdrop)
-{
- int pkt_count, hits_found, ans;
- unsigned long now;
- const struct ipt_recent_info *info = matchinfo;
- u_int32_t addr = 0, time_temp;
- u_int8_t ttl = skb->nh.iph->ttl;
- int *hash_table;
- int orig_hash_result, hash_result, temp, location = 0, time_loc, end_collision_chain = -1;
- struct time_info_list *time_info;
- struct recent_ip_tables *curr_table;
- struct recent_ip_tables *last_table;
- struct recent_ip_list *r_list;
-
-#ifdef DEBUG
- if(debug) printk(KERN_INFO RECENT_NAME ": match() called\n");
-#endif
-
- /* Default is false ^ info->invert */
- ans = info->invert;
-
-#ifdef DEBUG
- if(debug) printk(KERN_INFO RECENT_NAME ": match(): name = '%s'\n",info->name);
-#endif
-
- /* if out != NULL then routing has been done and TTL changed.
- * We change it back here internally for match what came in before routing. */
- if(out) ttl++;
-
- /* Find the right table */
- spin_lock_bh(&recent_lock);
- curr_table = r_tables;
- while( (last_table = curr_table) && strncmp(info->name,curr_table->name,IPT_RECENT_NAME_LEN) && (curr_table = curr_table->next) );
-
-#ifdef DEBUG
- if(debug) printk(KERN_INFO RECENT_NAME ": match(): table found('%s')\n",info->name);
-#endif
-
- spin_unlock_bh(&recent_lock);
-
- /* Table with this name not found, match impossible */
- if(!curr_table) { return ans; }
-
- /* Make sure no one is changing the list while we work with it */
- spin_lock_bh(&curr_table->list_lock);
-
- r_list = curr_table->table;
- if(info->side == IPT_RECENT_DEST) addr = skb->nh.iph->daddr; else addr = skb->nh.iph->saddr;
-
- if(!addr) {
-#ifdef DEBUG
- if(debug) printk(KERN_INFO RECENT_NAME ": match() address (%u) invalid, leaving.\n",addr);
-#endif
- spin_unlock_bh(&curr_table->list_lock);
- return ans;
- }
-
-#ifdef DEBUG
- if(debug) printk(KERN_INFO RECENT_NAME ": match(): checking table, addr: %u, ttl: %u, orig_ttl: %u\n",addr,ttl,skb->nh.iph->ttl);
-#endif
-
- /* Get jiffies now in case they changed while we were waiting for a lock */
- now = jiffies;
- hash_table = curr_table->hash_table;
- time_info = curr_table->time_info;
-
- orig_hash_result = hash_result = hash_func(addr,ip_list_hash_size);
- /* Hash entry at this result used */
- /* Check for TTL match if requested. If TTL is zero then a match would never
- * happen, so match regardless of existing TTL in that case. Zero means the
- * entry was added via the /proc interface anyway, so we will just use the
- * first TTL we get for that IP address. */
- if(info->check_set & IPT_RECENT_TTL) {
- while(hash_table[hash_result] != -1 && !(r_list[hash_table[hash_result]].addr == addr &&
- (!r_list[hash_table[hash_result]].ttl || r_list[hash_table[hash_result]].ttl == ttl))) {
- /* Collision in hash table */
- hash_result = (hash_result + 1) % ip_list_hash_size;
- }
- } else {
- while(hash_table[hash_result] != -1 && r_list[hash_table[hash_result]].addr != addr) {
- /* Collision in hash table */
- hash_result = (hash_result + 1) % ip_list_hash_size;
- }
- }
-
- if(hash_table[hash_result] == -1 && !(info->check_set & IPT_RECENT_SET)) {
- /* IP not in list and not asked to SET */
- spin_unlock_bh(&curr_table->list_lock);
- return ans;
- }
-
- /* Check if we need to handle the collision, do not need to on REMOVE */
- if(orig_hash_result != hash_result && !(info->check_set & IPT_RECENT_REMOVE)) {
-#ifdef DEBUG
- if(debug) printk(KERN_INFO RECENT_NAME ": match(): Collision in hash table. (or: %d,hr: %d,oa: %u,ha: %u)\n",
- orig_hash_result,
- hash_result,
- r_list[hash_table[orig_hash_result]].addr,
- addr);
-#endif
-
- /* We had a collision.
- * orig_hash_result is where we started, hash_result is where we ended up.
- * So, swap them because we are likely to see the same guy again sooner */
-#ifdef DEBUG
- if(debug) {
- printk(KERN_INFO RECENT_NAME ": match(): Collision; hash_table[orig_hash_result] = %d\n",hash_table[orig_hash_result]);
- printk(KERN_INFO RECENT_NAME ": match(): Collision; r_list[hash_table[orig_hash_result]].hash_entry = %d\n",
- r_list[hash_table[orig_hash_result]].hash_entry);
- }
-#endif
-
- r_list[hash_table[orig_hash_result]].hash_entry = hash_result;
-
-
- temp = hash_table[orig_hash_result];
-#ifdef DEBUG
- if(debug) printk(KERN_INFO RECENT_NAME ": match(): Collision; hash_table[hash_result] = %d\n",hash_table[hash_result]);
-#endif
- hash_table[orig_hash_result] = hash_table[hash_result];
- hash_table[hash_result] = temp;
- temp = hash_result;
- hash_result = orig_hash_result;
- orig_hash_result = temp;
- time_info[r_list[hash_table[orig_hash_result]].time_pos].position = hash_table[orig_hash_result];
- if(hash_table[hash_result] != -1) {
- r_list[hash_table[hash_result]].hash_entry = hash_result;
- time_info[r_list[hash_table[hash_result]].time_pos].position = hash_table[hash_result];
- }
-
-#ifdef DEBUG
- if(debug) printk(KERN_INFO RECENT_NAME ": match(): Collision handled.\n");
-#endif
- }
-
- if(hash_table[hash_result] == -1) {
-#ifdef DEBUG
- if(debug) printk(KERN_INFO RECENT_NAME ": match(): New table entry. (hr: %d,ha: %u)\n",
- hash_result, addr);
-#endif
-
- /* New item found and IPT_RECENT_SET, so we need to add it */
- location = time_info[curr_table->time_pos].position;
- hash_table[r_list[location].hash_entry] = -1;
- hash_table[hash_result] = location;
- memset(r_list[location].last_pkts,0,ip_pkt_list_tot*sizeof(unsigned long));
- r_list[location].time_pos = curr_table->time_pos;
- r_list[location].addr = addr;
- r_list[location].ttl = ttl;
- r_list[location].last_seen = now;
- r_list[location].oldest_pkt = 1;
- r_list[location].last_pkts[0] = now;
- r_list[location].hash_entry = hash_result;
- time_info[curr_table->time_pos].time = r_list[location].last_seen;
- curr_table->time_pos = (curr_table->time_pos + 1) % ip_list_tot;
-
- ans = !info->invert;
- } else {
-#ifdef DEBUG
- if(debug) printk(KERN_INFO RECENT_NAME ": match(): Existing table entry. (hr: %d,ha: %u)\n",
- hash_result,
- addr);
-#endif
-
- /* Existing item found */
- location = hash_table[hash_result];
- /* We have a match on address, now to make sure it meets all requirements for a
- * full match. */
- if(info->check_set & IPT_RECENT_CHECK || info->check_set & IPT_RECENT_UPDATE) {
- if(!info->seconds && !info->hit_count) ans = !info->invert; else ans = info->invert;
- if(info->seconds && !info->hit_count) {
- if(time_before_eq(now,r_list[location].last_seen+info->seconds*HZ)) ans = !info->invert; else ans = info->invert;
- }
- if(info->seconds && info->hit_count) {
- for(pkt_count = 0, hits_found = 0; pkt_count < ip_pkt_list_tot; pkt_count++) {
- if(r_list[location].last_pkts[pkt_count] == 0) break;
- if(time_before_eq(now,r_list[location].last_pkts[pkt_count]+info->seconds*HZ)) hits_found++;
- }
- if(hits_found >= info->hit_count) ans = !info->invert; else ans = info->invert;
- }
- if(info->hit_count && !info->seconds) {
- for(pkt_count = 0, hits_found = 0; pkt_count < ip_pkt_list_tot; pkt_count++) {
- if(r_list[location].last_pkts[pkt_count] == 0) break;
- hits_found++;
- }
- if(hits_found >= info->hit_count) ans = !info->invert; else ans = info->invert;
- }
- }
-#ifdef DEBUG
- if(debug) {
- if(ans)
- printk(KERN_INFO RECENT_NAME ": match(): match addr: %u\n",addr);
- else
- printk(KERN_INFO RECENT_NAME ": match(): no match addr: %u\n",addr);
- }
-#endif
-
- /* If and only if we have been asked to SET, or to UPDATE (on match) do we add the
- * current timestamp to the last_seen. */
- if((info->check_set & IPT_RECENT_SET && (ans = !info->invert)) || (info->check_set & IPT_RECENT_UPDATE && ans)) {
-#ifdef DEBUG
- if(debug) printk(KERN_INFO RECENT_NAME ": match(): SET or UPDATE; updating time info.\n");
-#endif
- /* Have to update our time info */
- time_loc = r_list[location].time_pos;
- time_info[time_loc].time = now;
- time_info[time_loc].position = location;
- while((time_info[(time_loc+1) % ip_list_tot].time < time_info[time_loc].time) && ((time_loc+1) % ip_list_tot) != curr_table->time_pos) {
- time_temp = time_info[time_loc].time;
- time_info[time_loc].time = time_info[(time_loc+1)%ip_list_tot].time;
- time_info[(time_loc+1)%ip_list_tot].time = time_temp;
- time_temp = time_info[time_loc].position;
- time_info[time_loc].position = time_info[(time_loc+1)%ip_list_tot].position;
- time_info[(time_loc+1)%ip_list_tot].position = time_temp;
- r_list[time_info[time_loc].position].time_pos = time_loc;
- r_list[time_info[(time_loc+1)%ip_list_tot].position].time_pos = (time_loc+1)%ip_list_tot;
- time_loc = (time_loc+1) % ip_list_tot;
- }
- r_list[location].time_pos = time_loc;
- r_list[location].ttl = ttl;
- r_list[location].last_pkts[r_list[location].oldest_pkt] = now;
- r_list[location].oldest_pkt = ++r_list[location].oldest_pkt % ip_pkt_list_tot;
- r_list[location].last_seen = now;
- }
- /* If we have been asked to remove the entry from the list, just set it to 0 */
- if(info->check_set & IPT_RECENT_REMOVE) {
-#ifdef DEBUG
- if(debug) printk(KERN_INFO RECENT_NAME ": match(): REMOVE; clearing entry (or: %d, hr: %d).\n",orig_hash_result,hash_result);
-#endif
- /* Check if this is part of a collision chain */
- while(hash_table[(orig_hash_result+1) % ip_list_hash_size] != -1) {
- orig_hash_result++;
- if(hash_func(r_list[hash_table[orig_hash_result]].addr,ip_list_hash_size) == hash_result) {
- /* Found collision chain, how deep does this rabbit hole go? */
-#ifdef DEBUG
- if(debug) printk(KERN_INFO RECENT_NAME ": match(): REMOVE; found collision chain.\n");
-#endif
- end_collision_chain = orig_hash_result;
- }
- }
- if(end_collision_chain != -1) {
-#ifdef DEBUG
- if(debug) printk(KERN_INFO RECENT_NAME ": match(): REMOVE; part of collision chain, moving to end.\n");
-#endif
- /* Part of a collision chain, swap it with the end of the chain
- * before removing. */
- r_list[hash_table[end_collision_chain]].hash_entry = hash_result;
- temp = hash_table[end_collision_chain];
- hash_table[end_collision_chain] = hash_table[hash_result];
- hash_table[hash_result] = temp;
- time_info[r_list[hash_table[hash_result]].time_pos].position = hash_table[hash_result];
- hash_result = end_collision_chain;
- r_list[hash_table[hash_result]].hash_entry = hash_result;
- time_info[r_list[hash_table[hash_result]].time_pos].position = hash_table[hash_result];
- }
- location = hash_table[hash_result];
- hash_table[r_list[location].hash_entry] = -1;
- time_loc = r_list[location].time_pos;
- time_info[time_loc].time = 0;
- time_info[time_loc].position = location;
- while((time_info[(time_loc+1) % ip_list_tot].time < time_info[time_loc].time) && ((time_loc+1) % ip_list_tot) != curr_table->time_pos) {
- time_temp = time_info[time_loc].time;
- time_info[time_loc].time = time_info[(time_loc+1)%ip_list_tot].time;
- time_info[(time_loc+1)%ip_list_tot].time = time_temp;
- time_temp = time_info[time_loc].position;
- time_info[time_loc].position = time_info[(time_loc+1)%ip_list_tot].position;
- time_info[(time_loc+1)%ip_list_tot].position = time_temp;
- r_list[time_info[time_loc].position].time_pos = time_loc;
- r_list[time_info[(time_loc+1)%ip_list_tot].position].time_pos = (time_loc+1)%ip_list_tot;
- time_loc = (time_loc+1) % ip_list_tot;
- }
- r_list[location].time_pos = time_loc;
- r_list[location].last_seen = 0;
- r_list[location].addr = 0;
- r_list[location].ttl = 0;
- memset(r_list[location].last_pkts,0,ip_pkt_list_tot*sizeof(unsigned long));
- r_list[location].oldest_pkt = 0;
- ans = !info->invert;
- }
- spin_unlock_bh(&curr_table->list_lock);
- return ans;
- }
-
- spin_unlock_bh(&curr_table->list_lock);
-#ifdef DEBUG
- if(debug) printk(KERN_INFO RECENT_NAME ": match() left.\n");
-#endif
- return ans;
-}
-
-/* This function is to verify that the rule given during the userspace iptables
- * command is correct.
- * If the command is valid then we check if the table name referred to by the
- * rule exists, if not it is created.
- */
-static int
-checkentry(const char *tablename,
- const void *ip,
- void *matchinfo,
- unsigned int matchsize,
- unsigned int hook_mask)
-{
- int flag = 0, c;
- unsigned long *hold;
- const struct ipt_recent_info *info = matchinfo;
- struct recent_ip_tables *curr_table, *find_table, *last_table;
-
-#ifdef DEBUG
- if(debug) printk(KERN_INFO RECENT_NAME ": checkentry() entered.\n");
-#endif
-
- if (matchsize != IPT_ALIGN(sizeof(struct ipt_recent_info))) return 0;
-
- /* seconds and hit_count only valid for CHECK/UPDATE */
- if(info->check_set & IPT_RECENT_SET) { flag++; if(info->seconds || info->hit_count) return 0; }
- if(info->check_set & IPT_RECENT_REMOVE) { flag++; if(info->seconds || info->hit_count) return 0; }
- if(info->check_set & IPT_RECENT_CHECK) flag++;
- if(info->check_set & IPT_RECENT_UPDATE) flag++;
-
- /* One and only one of these should ever be set */
- if(flag != 1) return 0;
-
- /* Name must be set to something */
- if(!info->name || !info->name[0]) return 0;
-
- /* Things look good, create a list for this if it does not exist */
- /* Lock the linked list while we play with it */
- spin_lock_bh(&recent_lock);
-
- /* Look for an entry with this name already created */
- /* Finds the end of the list and the entry before the end if current name does not exist */
- find_table = r_tables;
- while( (last_table = find_table) && strncmp(info->name,find_table->name,IPT_RECENT_NAME_LEN) && (find_table = find_table->next) );
-
- /* If a table already exists just increment the count on that table and return */
- if(find_table) {
-#ifdef DEBUG
- if(debug) printk(KERN_INFO RECENT_NAME ": checkentry: table found (%s), incrementing count.\n",info->name);
-#endif
- find_table->count++;
- spin_unlock_bh(&recent_lock);
- return 1;
- }
-
- spin_unlock_bh(&recent_lock);
-
- /* Table with this name not found */
- /* Allocate memory for new linked list item */
-
-#ifdef DEBUG
- if(debug) {
- printk(KERN_INFO RECENT_NAME ": checkentry: no table found (%s)\n",info->name);
- printk(KERN_INFO RECENT_NAME ": checkentry: Allocationg %d for link-list entry.\n",sizeof(struct recent_ip_tables));
- }
-#endif
-
- curr_table = vmalloc(sizeof(struct recent_ip_tables));
- if(curr_table == NULL) return 0;
-
- spin_lock_init(&curr_table->list_lock);
- curr_table->next = NULL;
- curr_table->count = 1;
- curr_table->time_pos = 0;
- strncpy(curr_table->name,info->name,IPT_RECENT_NAME_LEN);
- curr_table->name[IPT_RECENT_NAME_LEN-1] = '\0';
-
- /* Allocate memory for this table and the list of packets in each entry. */
-#ifdef DEBUG
- if(debug) printk(KERN_INFO RECENT_NAME ": checkentry: Allocating %d for table (%s).\n",
- sizeof(struct recent_ip_list)*ip_list_tot,
- info->name);
-#endif
-
- curr_table->table = vmalloc(sizeof(struct recent_ip_list)*ip_list_tot);
- if(curr_table->table == NULL) { vfree(curr_table); return 0; }
- memset(curr_table->table,0,sizeof(struct recent_ip_list)*ip_list_tot);
-#ifdef DEBUG
- if(debug) printk(KERN_INFO RECENT_NAME ": checkentry: Allocating %d for pkt_list.\n",
- sizeof(unsigned long)*ip_pkt_list_tot*ip_list_tot);
-#endif
-
- hold = vmalloc(sizeof(unsigned long)*ip_pkt_list_tot*ip_list_tot);
-#ifdef DEBUG
- if(debug) printk(KERN_INFO RECENT_NAME ": checkentry: After pkt_list allocation.\n");
-#endif
- if(hold == NULL) {
- printk(KERN_INFO RECENT_NAME ": checkentry: unable to allocate for pkt_list.\n");
- vfree(curr_table->table);
- vfree(curr_table);
- return 0;
- }
- for(c = 0; c < ip_list_tot; c++) {
- curr_table->table[c].last_pkts = hold + c*ip_pkt_list_tot;
- }
-
- /* Allocate memory for the hash table */
-#ifdef DEBUG
- if(debug) printk(KERN_INFO RECENT_NAME ": checkentry: Allocating %d for hash_table.\n",
- sizeof(int)*ip_list_hash_size);
-#endif
-
- curr_table->hash_table = vmalloc(sizeof(int)*ip_list_hash_size);
- if(!curr_table->hash_table) {
- printk(KERN_INFO RECENT_NAME ": checkentry: unable to allocate for hash_table.\n");
- vfree(hold);
- vfree(curr_table->table);
- vfree(curr_table);
- return 0;
- }
-
- for(c = 0; c < ip_list_hash_size; c++) {
- curr_table->hash_table[c] = -1;
- }
-
- /* Allocate memory for the time info */
-#ifdef DEBUG
- if(debug) printk(KERN_INFO RECENT_NAME ": checkentry: Allocating %d for time_info.\n",
- sizeof(struct time_info_list)*ip_list_tot);
-#endif
-
- curr_table->time_info = vmalloc(sizeof(struct time_info_list)*ip_list_tot);
- if(!curr_table->time_info) {
- printk(KERN_INFO RECENT_NAME ": checkentry: unable to allocate for time_info.\n");
- vfree(curr_table->hash_table);
- vfree(hold);
- vfree(curr_table->table);
- vfree(curr_table);
- return 0;
- }
- for(c = 0; c < ip_list_tot; c++) {
- curr_table->time_info[c].position = c;
- curr_table->time_info[c].time = 0;
- }
-
- /* Put the new table in place */
- spin_lock_bh(&recent_lock);
- find_table = r_tables;
- while( (last_table = find_table) && strncmp(info->name,find_table->name,IPT_RECENT_NAME_LEN) && (find_table = find_table->next) );
-
- /* If a table already exists just increment the count on that table and return */
- if(find_table) {
- find_table->count++;
- spin_unlock_bh(&recent_lock);
-#ifdef DEBUG
- if(debug) printk(KERN_INFO RECENT_NAME ": checkentry: table found (%s), created by other process.\n",info->name);
-#endif
- vfree(curr_table->time_info);
- vfree(curr_table->hash_table);
- vfree(hold);
- vfree(curr_table->table);
- vfree(curr_table);
- return 1;
- }
- if(!last_table) r_tables = curr_table; else last_table->next = curr_table;
-
- spin_unlock_bh(&recent_lock);
-
-#ifdef CONFIG_PROC_FS
- /* Create our proc 'status' entry. */
- curr_table->status_proc = create_proc_entry(curr_table->name, ip_list_perms, proc_net_ipt_recent);
- if (!curr_table->status_proc) {
- printk(KERN_INFO RECENT_NAME ": checkentry: unable to allocate for /proc entry.\n");
- /* Destroy the created table */
- spin_lock_bh(&recent_lock);
- last_table = NULL;
- curr_table = r_tables;
- if(!curr_table) {
-#ifdef DEBUG
- if(debug) printk(KERN_INFO RECENT_NAME ": checkentry() create_proc failed, no tables.\n");
-#endif
- spin_unlock_bh(&recent_lock);
- return 0;
- }
- while( strncmp(info->name,curr_table->name,IPT_RECENT_NAME_LEN) && (last_table = curr_table) && (curr_table = curr_table->next) );
- if(!curr_table) {
-#ifdef DEBUG
- if(debug) printk(KERN_INFO RECENT_NAME ": checkentry() create_proc failed, table already destroyed.\n");
-#endif
- spin_unlock_bh(&recent_lock);
- return 0;
- }
- if(last_table) last_table->next = curr_table->next; else r_tables = curr_table->next;
- spin_unlock_bh(&recent_lock);
- vfree(curr_table->time_info);
- vfree(curr_table->hash_table);
- vfree(hold);
- vfree(curr_table->table);
- vfree(curr_table);
- return 0;
- }
-
- curr_table->status_proc->owner = THIS_MODULE;
- curr_table->status_proc->data = curr_table;
- wmb();
- curr_table->status_proc->read_proc = ip_recent_get_info;
- curr_table->status_proc->write_proc = ip_recent_ctrl;
-#endif /* CONFIG_PROC_FS */
-
-#ifdef DEBUG
- if(debug) printk(KERN_INFO RECENT_NAME ": checkentry() left.\n");
-#endif
-
- return 1;
-}
-
-/* This function is called in the event that a rule matching this module is
- * removed.
- * When this happens we need to check if there are no other rules matching
- * the table given. If that is the case then we remove the table and clean
- * up its memory.
- */
-static void
-destroy(void *matchinfo, unsigned int matchsize)
-{
- const struct ipt_recent_info *info = matchinfo;
- struct recent_ip_tables *curr_table, *last_table;
-
-#ifdef DEBUG
- if(debug) printk(KERN_INFO RECENT_NAME ": destroy() entered.\n");
-#endif
-
- if(matchsize != IPT_ALIGN(sizeof(struct ipt_recent_info))) return;
-
- /* Lock the linked list while we play with it */
- spin_lock_bh(&recent_lock);
-
- /* Look for an entry with this name already created */
- /* Finds the end of the list and the entry before the end if current name does not exist */
- last_table = NULL;
- curr_table = r_tables;
- if(!curr_table) {
-#ifdef DEBUG
- if(debug) printk(KERN_INFO RECENT_NAME ": destroy() No tables found, leaving.\n");
-#endif
- spin_unlock_bh(&recent_lock);
- return;
- }
- while( strncmp(info->name,curr_table->name,IPT_RECENT_NAME_LEN) && (last_table = curr_table) && (curr_table = curr_table->next) );
-
- /* If a table does not exist then do nothing and return */
- if(!curr_table) {
-#ifdef DEBUG
- if(debug) printk(KERN_INFO RECENT_NAME ": destroy() table not found, leaving.\n");
-#endif
- spin_unlock_bh(&recent_lock);
- return;
- }
-
- curr_table->count--;
-
- /* If count is still non-zero then there are still rules referenceing it so we do nothing */
- if(curr_table->count) {
-#ifdef DEBUG
- if(debug) printk(KERN_INFO RECENT_NAME ": destroy() table found, non-zero count, leaving.\n");
-#endif
- spin_unlock_bh(&recent_lock);
- return;
- }
-
-#ifdef DEBUG
- if(debug) printk(KERN_INFO RECENT_NAME ": destroy() table found, zero count, removing.\n");
-#endif
-
- /* Count must be zero so we remove this table from the list */
- if(last_table) last_table->next = curr_table->next; else r_tables = curr_table->next;
-
- spin_unlock_bh(&recent_lock);
-
- /* lock to make sure any late-runners still using this after we removed it from
- * the list finish up then remove everything */
- spin_lock_bh(&curr_table->list_lock);
- spin_unlock_bh(&curr_table->list_lock);
-
-#ifdef CONFIG_PROC_FS
- if(curr_table->status_proc) remove_proc_entry(curr_table->name,proc_net_ipt_recent);
-#endif /* CONFIG_PROC_FS */
- vfree(curr_table->table[0].last_pkts);
- vfree(curr_table->table);
- vfree(curr_table->hash_table);
- vfree(curr_table->time_info);
- vfree(curr_table);
-
-#ifdef DEBUG
- if(debug) printk(KERN_INFO RECENT_NAME ": destroy() left.\n");
-#endif
-
- return;
-}
-
-/* This is the structure we pass to ipt_register to register our
- * module with iptables.
- */
-static struct ipt_match recent_match = {
- .name = "recent",
- .match = &match,
- .checkentry = &checkentry,
- .destroy = &destroy,
- .me = THIS_MODULE
-};
-
-/* Kernel module initialization. */
-static int __init init(void)
-{
- int err, count;
-
- printk(version);
-#ifdef CONFIG_PROC_FS
- proc_net_ipt_recent = proc_mkdir("ipt_recent",proc_net);
- if(!proc_net_ipt_recent) return -ENOMEM;
-#endif
-
- if(ip_list_hash_size && ip_list_hash_size <= ip_list_tot) {
- printk(KERN_WARNING RECENT_NAME ": ip_list_hash_size too small, resetting to default.\n");
- ip_list_hash_size = 0;
- }
-
- if(!ip_list_hash_size) {
- ip_list_hash_size = ip_list_tot*3;
- count = 2*2;
- while(ip_list_hash_size > count) count = count*2;
- ip_list_hash_size = count;
- }
-
-#ifdef DEBUG
- if(debug) printk(KERN_INFO RECENT_NAME ": ip_list_hash_size: %d\n",ip_list_hash_size);
-#endif
-
- err = ipt_register_match(&recent_match);
- if (err)
- remove_proc_entry("ipt_recent", proc_net);
- return err;
-}
-
-/* Kernel module destruction. */
-static void __exit fini(void)
-{
- ipt_unregister_match(&recent_match);
-
- remove_proc_entry("ipt_recent",proc_net);
-}
-
-/* Register our module with the kernel. */
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c
new file mode 100644
index 00000000000..4bfaedf9b34
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_rpfilter.c
@@ -0,0 +1,144 @@
+/*
+ * Copyright (c) 2011 Florian Westphal <fw@strlen.de>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * based on fib_frontend.c; Author: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/ip.h>
+#include <net/ip.h>
+#include <net/ip_fib.h>
+#include <net/route.h>
+
+#include <linux/netfilter/xt_rpfilter.h>
+#include <linux/netfilter/x_tables.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Florian Westphal <fw@strlen.de>");
+MODULE_DESCRIPTION("iptables: ipv4 reverse path filter match");
+
+/* don't try to find route from mcast/bcast/zeronet */
+static __be32 rpfilter_get_saddr(__be32 addr)
+{
+ if (ipv4_is_multicast(addr) || ipv4_is_lbcast(addr) ||
+ ipv4_is_zeronet(addr))
+ return 0;
+ return addr;
+}
+
+static bool rpfilter_lookup_reverse(struct flowi4 *fl4,
+ const struct net_device *dev, u8 flags)
+{
+ struct fib_result res;
+ bool dev_match;
+ struct net *net = dev_net(dev);
+ int ret __maybe_unused;
+
+ if (fib_lookup(net, fl4, &res))
+ return false;
+
+ if (res.type != RTN_UNICAST) {
+ if (res.type != RTN_LOCAL || !(flags & XT_RPFILTER_ACCEPT_LOCAL))
+ return false;
+ }
+ dev_match = false;
+#ifdef CONFIG_IP_ROUTE_MULTIPATH
+ for (ret = 0; ret < res.fi->fib_nhs; ret++) {
+ struct fib_nh *nh = &res.fi->fib_nh[ret];
+
+ if (nh->nh_dev == dev) {
+ dev_match = true;
+ break;
+ }
+ }
+#else
+ if (FIB_RES_DEV(res) == dev)
+ dev_match = true;
+#endif
+ if (dev_match || flags & XT_RPFILTER_LOOSE)
+ return FIB_RES_NH(res).nh_scope <= RT_SCOPE_HOST;
+ return dev_match;
+}
+
+static bool rpfilter_is_local(const struct sk_buff *skb)
+{
+ const struct rtable *rt = skb_rtable(skb);
+ return rt && (rt->rt_flags & RTCF_LOCAL);
+}
+
+static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
+{
+ const struct xt_rpfilter_info *info;
+ const struct iphdr *iph;
+ struct flowi4 flow;
+ bool invert;
+
+ info = par->matchinfo;
+ invert = info->flags & XT_RPFILTER_INVERT;
+
+ if (rpfilter_is_local(skb))
+ return true ^ invert;
+
+ iph = ip_hdr(skb);
+ if (ipv4_is_multicast(iph->daddr)) {
+ if (ipv4_is_zeronet(iph->saddr))
+ return ipv4_is_local_multicast(iph->daddr) ^ invert;
+ }
+ flow.flowi4_iif = LOOPBACK_IFINDEX;
+ flow.daddr = iph->saddr;
+ flow.saddr = rpfilter_get_saddr(iph->daddr);
+ flow.flowi4_oif = 0;
+ flow.flowi4_mark = info->flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0;
+ flow.flowi4_tos = RT_TOS(iph->tos);
+ flow.flowi4_scope = RT_SCOPE_UNIVERSE;
+
+ return rpfilter_lookup_reverse(&flow, par->in, info->flags) ^ invert;
+}
+
+static int rpfilter_check(const struct xt_mtchk_param *par)
+{
+ const struct xt_rpfilter_info *info = par->matchinfo;
+ unsigned int options = ~XT_RPFILTER_OPTION_MASK;
+ if (info->flags & options) {
+ pr_info("unknown options encountered");
+ return -EINVAL;
+ }
+
+ if (strcmp(par->table, "mangle") != 0 &&
+ strcmp(par->table, "raw") != 0) {
+ pr_info("match only valid in the \'raw\' "
+ "or \'mangle\' tables, not \'%s\'.\n", par->table);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static struct xt_match rpfilter_mt_reg __read_mostly = {
+ .name = "rpfilter",
+ .family = NFPROTO_IPV4,
+ .checkentry = rpfilter_check,
+ .match = rpfilter_mt,
+ .matchsize = sizeof(struct xt_rpfilter_info),
+ .hooks = (1 << NF_INET_PRE_ROUTING),
+ .me = THIS_MODULE
+};
+
+static int __init rpfilter_mt_init(void)
+{
+ return xt_register_match(&rpfilter_mt_reg);
+}
+
+static void __exit rpfilter_mt_exit(void)
+{
+ xt_unregister_match(&rpfilter_mt_reg);
+}
+
+module_init(rpfilter_mt_init);
+module_exit(rpfilter_mt_exit);
diff --git a/net/ipv4/netfilter/ipt_tos.c b/net/ipv4/netfilter/ipt_tos.c
deleted file mode 100644
index 9ab765e126f..00000000000
--- a/net/ipv4/netfilter/ipt_tos.c
+++ /dev/null
@@ -1,65 +0,0 @@
-/* Kernel module to match TOS values. */
-
-/* (C) 1999-2001 Paul `Rusty' Russell
- * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-
-#include <linux/netfilter_ipv4/ipt_tos.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("iptables TOS match module");
-
-static int
-match(const struct sk_buff *skb,
- const struct net_device *in,
- const struct net_device *out,
- const void *matchinfo,
- int offset,
- unsigned int protoff,
- int *hotdrop)
-{
- const struct ipt_tos_info *info = matchinfo;
-
- return (skb->nh.iph->tos == info->tos) ^ info->invert;
-}
-
-static int
-checkentry(const char *tablename,
- const void *ip,
- void *matchinfo,
- unsigned int matchsize,
- unsigned int hook_mask)
-{
- if (matchsize != IPT_ALIGN(sizeof(struct ipt_tos_info)))
- return 0;
-
- return 1;
-}
-
-static struct ipt_match tos_match = {
- .name = "tos",
- .match = &match,
- .checkentry = &checkentry,
- .me = THIS_MODULE,
-};
-
-static int __init init(void)
-{
- return ipt_register_match(&tos_match);
-}
-
-static void __exit fini(void)
-{
- ipt_unregister_match(&tos_match);
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/ipt_ttl.c b/net/ipv4/netfilter/ipt_ttl.c
deleted file mode 100644
index 82da53f430a..00000000000
--- a/net/ipv4/netfilter/ipt_ttl.c
+++ /dev/null
@@ -1,79 +0,0 @@
-/* IP tables module for matching the value of the TTL
- *
- * ipt_ttl.c,v 1.5 2000/11/13 11:16:08 laforge Exp
- *
- * (C) 2000,2001 by Harald Welte <laforge@netfilter.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-
-#include <linux/module.h>
-#include <linux/skbuff.h>
-
-#include <linux/netfilter_ipv4/ipt_ttl.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-
-MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
-MODULE_DESCRIPTION("IP tables TTL matching module");
-MODULE_LICENSE("GPL");
-
-static int match(const struct sk_buff *skb, const struct net_device *in,
- const struct net_device *out, const void *matchinfo,
- int offset, unsigned int protoff, int *hotdrop)
-{
- const struct ipt_ttl_info *info = matchinfo;
-
- switch (info->mode) {
- case IPT_TTL_EQ:
- return (skb->nh.iph->ttl == info->ttl);
- break;
- case IPT_TTL_NE:
- return (!(skb->nh.iph->ttl == info->ttl));
- break;
- case IPT_TTL_LT:
- return (skb->nh.iph->ttl < info->ttl);
- break;
- case IPT_TTL_GT:
- return (skb->nh.iph->ttl > info->ttl);
- break;
- default:
- printk(KERN_WARNING "ipt_ttl: unknown mode %d\n",
- info->mode);
- return 0;
- }
-
- return 0;
-}
-
-static int checkentry(const char *tablename, const void *ip,
- void *matchinfo, unsigned int matchsize,
- unsigned int hook_mask)
-{
- if (matchsize != IPT_ALIGN(sizeof(struct ipt_ttl_info)))
- return 0;
-
- return 1;
-}
-
-static struct ipt_match ttl_match = {
- .name = "ttl",
- .match = &match,
- .checkentry = &checkentry,
- .me = THIS_MODULE,
-};
-
-static int __init init(void)
-{
- return ipt_register_match(&ttl_match);
-}
-
-static void __exit fini(void)
-{
- ipt_unregister_match(&ttl_match);
-
-}
-
-module_init(init);
-module_exit(fini);
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index 212a3079085..e08a74a243a 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -13,183 +13,99 @@
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/slab.h>
+#include <net/ip.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
MODULE_DESCRIPTION("iptables filter table");
-#define FILTER_VALID_HOOKS ((1 << NF_IP_LOCAL_IN) | (1 << NF_IP_FORWARD) | (1 << NF_IP_LOCAL_OUT))
+#define FILTER_VALID_HOOKS ((1 << NF_INET_LOCAL_IN) | \
+ (1 << NF_INET_FORWARD) | \
+ (1 << NF_INET_LOCAL_OUT))
-static struct
-{
- struct ipt_replace repl;
- struct ipt_standard entries[3];
- struct ipt_error term;
-} initial_table __initdata
-= { { "filter", FILTER_VALID_HOOKS, 4,
- sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error),
- { [NF_IP_LOCAL_IN] = 0,
- [NF_IP_FORWARD] = sizeof(struct ipt_standard),
- [NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) * 2 },
- { [NF_IP_LOCAL_IN] = 0,
- [NF_IP_FORWARD] = sizeof(struct ipt_standard),
- [NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) * 2 },
- 0, NULL, { } },
- {
- /* LOCAL_IN */
- { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 },
- 0,
- sizeof(struct ipt_entry),
- sizeof(struct ipt_standard),
- 0, { 0, 0 }, { } },
- { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } },
- -NF_ACCEPT - 1 } },
- /* FORWARD */
- { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 },
- 0,
- sizeof(struct ipt_entry),
- sizeof(struct ipt_standard),
- 0, { 0, 0 }, { } },
- { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } },
- -NF_ACCEPT - 1 } },
- /* LOCAL_OUT */
- { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 },
- 0,
- sizeof(struct ipt_entry),
- sizeof(struct ipt_standard),
- 0, { 0, 0 }, { } },
- { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } },
- -NF_ACCEPT - 1 } }
- },
- /* ERROR */
- { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 },
- 0,
- sizeof(struct ipt_entry),
- sizeof(struct ipt_error),
- 0, { 0, 0 }, { } },
- { { { { IPT_ALIGN(sizeof(struct ipt_error_target)), IPT_ERROR_TARGET } },
- { } },
- "ERROR"
- }
- }
-};
-
-static struct ipt_table packet_filter = {
+static const struct xt_table packet_filter = {
.name = "filter",
.valid_hooks = FILTER_VALID_HOOKS,
- .lock = RW_LOCK_UNLOCKED,
.me = THIS_MODULE,
- .af = AF_INET,
+ .af = NFPROTO_IPV4,
+ .priority = NF_IP_PRI_FILTER,
};
-/* The work comes in here from netfilter.c. */
static unsigned int
-ipt_hook(unsigned int hook,
- struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+iptable_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ const struct net_device *in, const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
{
- return ipt_do_table(pskb, hook, in, out, &packet_filter, NULL);
-}
+ const struct net *net;
-static unsigned int
-ipt_local_out_hook(unsigned int hook,
- struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
-{
- /* root is playing with raw sockets. */
- if ((*pskb)->len < sizeof(struct iphdr)
- || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) {
- if (net_ratelimit())
- printk("ipt_hook: happy cracking.\n");
+ if (ops->hooknum == NF_INET_LOCAL_OUT &&
+ (skb->len < sizeof(struct iphdr) ||
+ ip_hdrlen(skb) < sizeof(struct iphdr)))
+ /* root is playing with raw sockets. */
return NF_ACCEPT;
- }
- return ipt_do_table(pskb, hook, in, out, &packet_filter, NULL);
+ net = dev_net((in != NULL) ? in : out);
+ return ipt_do_table(skb, ops->hooknum, in, out,
+ net->ipv4.iptable_filter);
}
-static struct nf_hook_ops ipt_ops[] = {
- {
- .hook = ipt_hook,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_IP_LOCAL_IN,
- .priority = NF_IP_PRI_FILTER,
- },
- {
- .hook = ipt_hook,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_IP_FORWARD,
- .priority = NF_IP_PRI_FILTER,
- },
- {
- .hook = ipt_local_out_hook,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_IP_LOCAL_OUT,
- .priority = NF_IP_PRI_FILTER,
- },
-};
+static struct nf_hook_ops *filter_ops __read_mostly;
/* Default to forward because I got too much mail already. */
-static int forward = NF_ACCEPT;
+static bool forward = true;
module_param(forward, bool, 0000);
-static int __init init(void)
+static int __net_init iptable_filter_net_init(struct net *net)
{
- int ret;
-
- if (forward < 0 || forward > NF_MAX_VERDICT) {
- printk("iptables forward must be 0 or 1\n");
- return -EINVAL;
- }
+ struct ipt_replace *repl;
+ repl = ipt_alloc_initial_table(&packet_filter);
+ if (repl == NULL)
+ return -ENOMEM;
/* Entry 1 is the FORWARD hook */
- initial_table.entries[1].target.verdict = -forward - 1;
+ ((struct ipt_standard *)repl->entries)[1].target.verdict =
+ forward ? -NF_ACCEPT - 1 : -NF_DROP - 1;
- /* Register table */
- ret = ipt_register_table(&packet_filter, &initial_table.repl);
- if (ret < 0)
- return ret;
+ net->ipv4.iptable_filter =
+ ipt_register_table(net, &packet_filter, repl);
+ kfree(repl);
+ return PTR_ERR_OR_ZERO(net->ipv4.iptable_filter);
+}
- /* Register hooks */
- ret = nf_register_hook(&ipt_ops[0]);
- if (ret < 0)
- goto cleanup_table;
+static void __net_exit iptable_filter_net_exit(struct net *net)
+{
+ ipt_unregister_table(net, net->ipv4.iptable_filter);
+}
- ret = nf_register_hook(&ipt_ops[1]);
- if (ret < 0)
- goto cleanup_hook0;
+static struct pernet_operations iptable_filter_net_ops = {
+ .init = iptable_filter_net_init,
+ .exit = iptable_filter_net_exit,
+};
- ret = nf_register_hook(&ipt_ops[2]);
- if (ret < 0)
- goto cleanup_hook1;
+static int __init iptable_filter_init(void)
+{
+ int ret;
- return ret;
+ ret = register_pernet_subsys(&iptable_filter_net_ops);
+ if (ret < 0)
+ return ret;
- cleanup_hook1:
- nf_unregister_hook(&ipt_ops[1]);
- cleanup_hook0:
- nf_unregister_hook(&ipt_ops[0]);
- cleanup_table:
- ipt_unregister_table(&packet_filter);
+ /* Register hooks */
+ filter_ops = xt_hook_link(&packet_filter, iptable_filter_hook);
+ if (IS_ERR(filter_ops)) {
+ ret = PTR_ERR(filter_ops);
+ unregister_pernet_subsys(&iptable_filter_net_ops);
+ }
return ret;
}
-static void __exit fini(void)
+static void __exit iptable_filter_fini(void)
{
- unsigned int i;
-
- for (i = 0; i < sizeof(ipt_ops)/sizeof(struct nf_hook_ops); i++)
- nf_unregister_hook(&ipt_ops[i]);
-
- ipt_unregister_table(&packet_filter);
+ xt_hook_unlink(&packet_filter, filter_ops);
+ unregister_pernet_subsys(&iptable_filter_net_ops);
}
-module_init(init);
-module_exit(fini);
+module_init(iptable_filter_init);
+module_exit(iptable_filter_fini);
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 3212a5cc4b6..6a5079c34bb 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -7,255 +7,142 @@
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
- *
- * Extended to all five netfilter hooks by Brad Chapman & Harald Welte
*/
-#include <linux/config.h>
#include <linux/module.h>
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
+#include <linux/slab.h>
#include <net/sock.h>
#include <net/route.h>
#include <linux/ip.h>
+#include <net/ip.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
MODULE_DESCRIPTION("iptables mangle table");
-#define MANGLE_VALID_HOOKS ((1 << NF_IP_PRE_ROUTING) | \
- (1 << NF_IP_LOCAL_IN) | \
- (1 << NF_IP_FORWARD) | \
- (1 << NF_IP_LOCAL_OUT) | \
- (1 << NF_IP_POST_ROUTING))
+#define MANGLE_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | \
+ (1 << NF_INET_LOCAL_IN) | \
+ (1 << NF_INET_FORWARD) | \
+ (1 << NF_INET_LOCAL_OUT) | \
+ (1 << NF_INET_POST_ROUTING))
-/* Ouch - five different hooks? Maybe this should be a config option..... -- BC */
-static struct
-{
- struct ipt_replace repl;
- struct ipt_standard entries[5];
- struct ipt_error term;
-} initial_table __initdata
-= { { "mangle", MANGLE_VALID_HOOKS, 6,
- sizeof(struct ipt_standard) * 5 + sizeof(struct ipt_error),
- { [NF_IP_PRE_ROUTING] = 0,
- [NF_IP_LOCAL_IN] = sizeof(struct ipt_standard),
- [NF_IP_FORWARD] = sizeof(struct ipt_standard) * 2,
- [NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) * 3,
- [NF_IP_POST_ROUTING] = sizeof(struct ipt_standard) * 4 },
- { [NF_IP_PRE_ROUTING] = 0,
- [NF_IP_LOCAL_IN] = sizeof(struct ipt_standard),
- [NF_IP_FORWARD] = sizeof(struct ipt_standard) * 2,
- [NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) * 3,
- [NF_IP_POST_ROUTING] = sizeof(struct ipt_standard) * 4 },
- 0, NULL, { } },
- {
- /* PRE_ROUTING */
- { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 },
- 0,
- sizeof(struct ipt_entry),
- sizeof(struct ipt_standard),
- 0, { 0, 0 }, { } },
- { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } },
- -NF_ACCEPT - 1 } },
- /* LOCAL_IN */
- { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 },
- 0,
- sizeof(struct ipt_entry),
- sizeof(struct ipt_standard),
- 0, { 0, 0 }, { } },
- { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } },
- -NF_ACCEPT - 1 } },
- /* FORWARD */
- { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 },
- 0,
- sizeof(struct ipt_entry),
- sizeof(struct ipt_standard),
- 0, { 0, 0 }, { } },
- { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } },
- -NF_ACCEPT - 1 } },
- /* LOCAL_OUT */
- { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 },
- 0,
- sizeof(struct ipt_entry),
- sizeof(struct ipt_standard),
- 0, { 0, 0 }, { } },
- { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } },
- -NF_ACCEPT - 1 } },
- /* POST_ROUTING */
- { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 },
- 0,
- sizeof(struct ipt_entry),
- sizeof(struct ipt_standard),
- 0, { 0, 0 }, { } },
- { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } },
- -NF_ACCEPT - 1 } },
- },
- /* ERROR */
- { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 },
- 0,
- sizeof(struct ipt_entry),
- sizeof(struct ipt_error),
- 0, { 0, 0 }, { } },
- { { { { IPT_ALIGN(sizeof(struct ipt_error_target)), IPT_ERROR_TARGET } },
- { } },
- "ERROR"
- }
- }
-};
-
-static struct ipt_table packet_mangler = {
+static const struct xt_table packet_mangler = {
.name = "mangle",
.valid_hooks = MANGLE_VALID_HOOKS,
- .lock = RW_LOCK_UNLOCKED,
.me = THIS_MODULE,
- .af = AF_INET,
+ .af = NFPROTO_IPV4,
+ .priority = NF_IP_PRI_MANGLE,
};
-/* The work comes in here from netfilter.c. */
-static unsigned int
-ipt_route_hook(unsigned int hook,
- struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
-{
- return ipt_do_table(pskb, hook, in, out, &packet_mangler, NULL);
-}
-
static unsigned int
-ipt_local_hook(unsigned int hook,
- struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+ipt_mangle_out(struct sk_buff *skb, const struct net_device *out)
{
unsigned int ret;
+ const struct iphdr *iph;
u_int8_t tos;
- u_int32_t saddr, daddr;
- unsigned long nfmark;
+ __be32 saddr, daddr;
+ u_int32_t mark;
+ int err;
/* root is playing with raw sockets. */
- if ((*pskb)->len < sizeof(struct iphdr)
- || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) {
- if (net_ratelimit())
- printk("ipt_hook: happy cracking.\n");
+ if (skb->len < sizeof(struct iphdr) ||
+ ip_hdrlen(skb) < sizeof(struct iphdr))
return NF_ACCEPT;
- }
/* Save things which could affect route */
- nfmark = (*pskb)->nfmark;
- saddr = (*pskb)->nh.iph->saddr;
- daddr = (*pskb)->nh.iph->daddr;
- tos = (*pskb)->nh.iph->tos;
-
- ret = ipt_do_table(pskb, hook, in, out, &packet_mangler, NULL);
+ mark = skb->mark;
+ iph = ip_hdr(skb);
+ saddr = iph->saddr;
+ daddr = iph->daddr;
+ tos = iph->tos;
+
+ ret = ipt_do_table(skb, NF_INET_LOCAL_OUT, NULL, out,
+ dev_net(out)->ipv4.iptable_mangle);
/* Reroute for ANY change. */
- if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE
- && ((*pskb)->nh.iph->saddr != saddr
- || (*pskb)->nh.iph->daddr != daddr
-#ifdef CONFIG_IP_ROUTE_FWMARK
- || (*pskb)->nfmark != nfmark
-#endif
- || (*pskb)->nh.iph->tos != tos))
- return ip_route_me_harder(pskb) == 0 ? ret : NF_DROP;
+ if (ret != NF_DROP && ret != NF_STOLEN) {
+ iph = ip_hdr(skb);
+
+ if (iph->saddr != saddr ||
+ iph->daddr != daddr ||
+ skb->mark != mark ||
+ iph->tos != tos) {
+ err = ip_route_me_harder(skb, RTN_UNSPEC);
+ if (err < 0)
+ ret = NF_DROP_ERR(err);
+ }
+ }
return ret;
}
-static struct nf_hook_ops ipt_ops[] = {
- {
- .hook = ipt_route_hook,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_IP_PRE_ROUTING,
- .priority = NF_IP_PRI_MANGLE,
- },
- {
- .hook = ipt_route_hook,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_IP_LOCAL_IN,
- .priority = NF_IP_PRI_MANGLE,
- },
- {
- .hook = ipt_route_hook,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_IP_FORWARD,
- .priority = NF_IP_PRI_MANGLE,
- },
- {
- .hook = ipt_local_hook,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_IP_LOCAL_OUT,
- .priority = NF_IP_PRI_MANGLE,
- },
- {
- .hook = ipt_route_hook,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_IP_POST_ROUTING,
- .priority = NF_IP_PRI_MANGLE,
- },
-};
-
-static int __init init(void)
+/* The work comes in here from netfilter.c. */
+static unsigned int
+iptable_mangle_hook(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
{
- int ret;
+ if (ops->hooknum == NF_INET_LOCAL_OUT)
+ return ipt_mangle_out(skb, out);
+ if (ops->hooknum == NF_INET_POST_ROUTING)
+ return ipt_do_table(skb, ops->hooknum, in, out,
+ dev_net(out)->ipv4.iptable_mangle);
+ /* PREROUTING/INPUT/FORWARD: */
+ return ipt_do_table(skb, ops->hooknum, in, out,
+ dev_net(in)->ipv4.iptable_mangle);
+}
- /* Register table */
- ret = ipt_register_table(&packet_mangler, &initial_table.repl);
- if (ret < 0)
- return ret;
+static struct nf_hook_ops *mangle_ops __read_mostly;
- /* Register hooks */
- ret = nf_register_hook(&ipt_ops[0]);
- if (ret < 0)
- goto cleanup_table;
+static int __net_init iptable_mangle_net_init(struct net *net)
+{
+ struct ipt_replace *repl;
+
+ repl = ipt_alloc_initial_table(&packet_mangler);
+ if (repl == NULL)
+ return -ENOMEM;
+ net->ipv4.iptable_mangle =
+ ipt_register_table(net, &packet_mangler, repl);
+ kfree(repl);
+ return PTR_ERR_OR_ZERO(net->ipv4.iptable_mangle);
+}
- ret = nf_register_hook(&ipt_ops[1]);
- if (ret < 0)
- goto cleanup_hook0;
+static void __net_exit iptable_mangle_net_exit(struct net *net)
+{
+ ipt_unregister_table(net, net->ipv4.iptable_mangle);
+}
- ret = nf_register_hook(&ipt_ops[2]);
- if (ret < 0)
- goto cleanup_hook1;
+static struct pernet_operations iptable_mangle_net_ops = {
+ .init = iptable_mangle_net_init,
+ .exit = iptable_mangle_net_exit,
+};
- ret = nf_register_hook(&ipt_ops[3]);
- if (ret < 0)
- goto cleanup_hook2;
+static int __init iptable_mangle_init(void)
+{
+ int ret;
- ret = nf_register_hook(&ipt_ops[4]);
+ ret = register_pernet_subsys(&iptable_mangle_net_ops);
if (ret < 0)
- goto cleanup_hook3;
-
- return ret;
+ return ret;
- cleanup_hook3:
- nf_unregister_hook(&ipt_ops[3]);
- cleanup_hook2:
- nf_unregister_hook(&ipt_ops[2]);
- cleanup_hook1:
- nf_unregister_hook(&ipt_ops[1]);
- cleanup_hook0:
- nf_unregister_hook(&ipt_ops[0]);
- cleanup_table:
- ipt_unregister_table(&packet_mangler);
+ /* Register hooks */
+ mangle_ops = xt_hook_link(&packet_mangler, iptable_mangle_hook);
+ if (IS_ERR(mangle_ops)) {
+ ret = PTR_ERR(mangle_ops);
+ unregister_pernet_subsys(&iptable_mangle_net_ops);
+ }
return ret;
}
-static void __exit fini(void)
+static void __exit iptable_mangle_fini(void)
{
- unsigned int i;
-
- for (i = 0; i < sizeof(ipt_ops)/sizeof(struct nf_hook_ops); i++)
- nf_unregister_hook(&ipt_ops[i]);
-
- ipt_unregister_table(&packet_mangler);
+ xt_hook_unlink(&packet_mangler, mangle_ops);
+ unregister_pernet_subsys(&iptable_mangle_net_ops);
}
-module_init(init);
-module_exit(fini);
+module_init(iptable_mangle_init);
+module_exit(iptable_mangle_fini);
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
new file mode 100644
index 00000000000..f1787c04a4d
--- /dev/null
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -0,0 +1,328 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2011 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/ip.h>
+#include <net/ip.h>
+
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_nat_l3proto.h>
+
+static const struct xt_table nf_nat_ipv4_table = {
+ .name = "nat",
+ .valid_hooks = (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_POST_ROUTING) |
+ (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_LOCAL_IN),
+ .me = THIS_MODULE,
+ .af = NFPROTO_IPV4,
+};
+
+static unsigned int alloc_null_binding(struct nf_conn *ct, unsigned int hooknum)
+{
+ /* Force range to this IP; let proto decide mapping for
+ * per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED).
+ */
+ struct nf_nat_range range;
+
+ range.flags = 0;
+ pr_debug("Allocating NULL binding for %p (%pI4)\n", ct,
+ HOOK2MANIP(hooknum) == NF_NAT_MANIP_SRC ?
+ &ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip :
+ &ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip);
+
+ return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum));
+}
+
+static unsigned int nf_nat_rule_find(struct sk_buff *skb, unsigned int hooknum,
+ const struct net_device *in,
+ const struct net_device *out,
+ struct nf_conn *ct)
+{
+ struct net *net = nf_ct_net(ct);
+ unsigned int ret;
+
+ ret = ipt_do_table(skb, hooknum, in, out, net->ipv4.nat_table);
+ if (ret == NF_ACCEPT) {
+ if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum)))
+ ret = alloc_null_binding(ct, hooknum);
+ }
+ return ret;
+}
+
+static unsigned int
+nf_nat_ipv4_fn(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn_nat *nat;
+ /* maniptype == SRC for postrouting. */
+ enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum);
+
+ /* We never see fragments: conntrack defrags on pre-routing
+ * and local-out, and nf_nat_out protects post-routing.
+ */
+ NF_CT_ASSERT(!ip_is_fragment(ip_hdr(skb)));
+
+ ct = nf_ct_get(skb, &ctinfo);
+ /* Can't track? It's not due to stress, or conntrack would
+ * have dropped it. Hence it's the user's responsibilty to
+ * packet filter it out, or implement conntrack/NAT for that
+ * protocol. 8) --RR
+ */
+ if (!ct)
+ return NF_ACCEPT;
+
+ /* Don't try to NAT if this packet is not conntracked */
+ if (nf_ct_is_untracked(ct))
+ return NF_ACCEPT;
+
+ nat = nf_ct_nat_ext_add(ct);
+ if (nat == NULL)
+ return NF_ACCEPT;
+
+ switch (ctinfo) {
+ case IP_CT_RELATED:
+ case IP_CT_RELATED_REPLY:
+ if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
+ if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
+ ops->hooknum))
+ return NF_DROP;
+ else
+ return NF_ACCEPT;
+ }
+ /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */
+ case IP_CT_NEW:
+ /* Seen it before? This can happen for loopback, retrans,
+ * or local packets.
+ */
+ if (!nf_nat_initialized(ct, maniptype)) {
+ unsigned int ret;
+
+ ret = nf_nat_rule_find(skb, ops->hooknum, in, out, ct);
+ if (ret != NF_ACCEPT)
+ return ret;
+ } else {
+ pr_debug("Already setup manip %s for ct %p\n",
+ maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST",
+ ct);
+ if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out))
+ goto oif_changed;
+ }
+ break;
+
+ default:
+ /* ESTABLISHED */
+ NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
+ ctinfo == IP_CT_ESTABLISHED_REPLY);
+ if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out))
+ goto oif_changed;
+ }
+
+ return nf_nat_packet(ct, ctinfo, ops->hooknum, skb);
+
+oif_changed:
+ nf_ct_kill_acct(ct, ctinfo, skb);
+ return NF_DROP;
+}
+
+static unsigned int
+nf_nat_ipv4_in(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ unsigned int ret;
+ __be32 daddr = ip_hdr(skb)->daddr;
+
+ ret = nf_nat_ipv4_fn(ops, skb, in, out, okfn);
+ if (ret != NF_DROP && ret != NF_STOLEN &&
+ daddr != ip_hdr(skb)->daddr)
+ skb_dst_drop(skb);
+
+ return ret;
+}
+
+static unsigned int
+nf_nat_ipv4_out(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+#ifdef CONFIG_XFRM
+ const struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+ int err;
+#endif
+ unsigned int ret;
+
+ /* root is playing with raw sockets. */
+ if (skb->len < sizeof(struct iphdr) ||
+ ip_hdrlen(skb) < sizeof(struct iphdr))
+ return NF_ACCEPT;
+
+ ret = nf_nat_ipv4_fn(ops, skb, in, out, okfn);
+#ifdef CONFIG_XFRM
+ if (ret != NF_DROP && ret != NF_STOLEN &&
+ !(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
+ (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+ if ((ct->tuplehash[dir].tuple.src.u3.ip !=
+ ct->tuplehash[!dir].tuple.dst.u3.ip) ||
+ (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
+ ct->tuplehash[dir].tuple.src.u.all !=
+ ct->tuplehash[!dir].tuple.dst.u.all)) {
+ err = nf_xfrm_me_harder(skb, AF_INET);
+ if (err < 0)
+ ret = NF_DROP_ERR(err);
+ }
+ }
+#endif
+ return ret;
+}
+
+static unsigned int
+nf_nat_ipv4_local_fn(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ const struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+ unsigned int ret;
+ int err;
+
+ /* root is playing with raw sockets. */
+ if (skb->len < sizeof(struct iphdr) ||
+ ip_hdrlen(skb) < sizeof(struct iphdr))
+ return NF_ACCEPT;
+
+ ret = nf_nat_ipv4_fn(ops, skb, in, out, okfn);
+ if (ret != NF_DROP && ret != NF_STOLEN &&
+ (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+ if (ct->tuplehash[dir].tuple.dst.u3.ip !=
+ ct->tuplehash[!dir].tuple.src.u3.ip) {
+ err = ip_route_me_harder(skb, RTN_UNSPEC);
+ if (err < 0)
+ ret = NF_DROP_ERR(err);
+ }
+#ifdef CONFIG_XFRM
+ else if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
+ ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
+ ct->tuplehash[dir].tuple.dst.u.all !=
+ ct->tuplehash[!dir].tuple.src.u.all) {
+ err = nf_xfrm_me_harder(skb, AF_INET);
+ if (err < 0)
+ ret = NF_DROP_ERR(err);
+ }
+#endif
+ }
+ return ret;
+}
+
+static struct nf_hook_ops nf_nat_ipv4_ops[] __read_mostly = {
+ /* Before packet filtering, change destination */
+ {
+ .hook = nf_nat_ipv4_in,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV4,
+ .hooknum = NF_INET_PRE_ROUTING,
+ .priority = NF_IP_PRI_NAT_DST,
+ },
+ /* After packet filtering, change source */
+ {
+ .hook = nf_nat_ipv4_out,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV4,
+ .hooknum = NF_INET_POST_ROUTING,
+ .priority = NF_IP_PRI_NAT_SRC,
+ },
+ /* Before packet filtering, change destination */
+ {
+ .hook = nf_nat_ipv4_local_fn,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV4,
+ .hooknum = NF_INET_LOCAL_OUT,
+ .priority = NF_IP_PRI_NAT_DST,
+ },
+ /* After packet filtering, change source */
+ {
+ .hook = nf_nat_ipv4_fn,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV4,
+ .hooknum = NF_INET_LOCAL_IN,
+ .priority = NF_IP_PRI_NAT_SRC,
+ },
+};
+
+static int __net_init iptable_nat_net_init(struct net *net)
+{
+ struct ipt_replace *repl;
+
+ repl = ipt_alloc_initial_table(&nf_nat_ipv4_table);
+ if (repl == NULL)
+ return -ENOMEM;
+ net->ipv4.nat_table = ipt_register_table(net, &nf_nat_ipv4_table, repl);
+ kfree(repl);
+ return PTR_ERR_OR_ZERO(net->ipv4.nat_table);
+}
+
+static void __net_exit iptable_nat_net_exit(struct net *net)
+{
+ ipt_unregister_table(net, net->ipv4.nat_table);
+}
+
+static struct pernet_operations iptable_nat_net_ops = {
+ .init = iptable_nat_net_init,
+ .exit = iptable_nat_net_exit,
+};
+
+static int __init iptable_nat_init(void)
+{
+ int err;
+
+ err = register_pernet_subsys(&iptable_nat_net_ops);
+ if (err < 0)
+ goto err1;
+
+ err = nf_register_hooks(nf_nat_ipv4_ops, ARRAY_SIZE(nf_nat_ipv4_ops));
+ if (err < 0)
+ goto err2;
+ return 0;
+
+err2:
+ unregister_pernet_subsys(&iptable_nat_net_ops);
+err1:
+ return err;
+}
+
+static void __exit iptable_nat_exit(void)
+{
+ nf_unregister_hooks(nf_nat_ipv4_ops, ARRAY_SIZE(nf_nat_ipv4_ops));
+ unregister_pernet_subsys(&iptable_nat_net_ops);
+}
+
+module_init(iptable_nat_init);
+module_exit(iptable_nat_exit);
+
+MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index fdb9e9c81e8..b2f7e8f9831 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -1,159 +1,90 @@
-/*
+/*
* 'raw' table, which is the very first hooked in at PRE_ROUTING and LOCAL_OUT .
*
* Copyright (C) 2003 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
*/
#include <linux/module.h>
#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/slab.h>
+#include <net/ip.h>
-#define RAW_VALID_HOOKS ((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_OUT))
+#define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT))
-static struct
-{
- struct ipt_replace repl;
- struct ipt_standard entries[2];
- struct ipt_error term;
-} initial_table __initdata = {
- .repl = {
- .name = "raw",
- .valid_hooks = RAW_VALID_HOOKS,
- .num_entries = 3,
- .size = sizeof(struct ipt_standard) * 2 + sizeof(struct ipt_error),
- .hook_entry = {
- [NF_IP_PRE_ROUTING] = 0,
- [NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) },
- .underflow = {
- [NF_IP_PRE_ROUTING] = 0,
- [NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) },
- },
- .entries = {
- /* PRE_ROUTING */
- {
- .entry = {
- .target_offset = sizeof(struct ipt_entry),
- .next_offset = sizeof(struct ipt_standard),
- },
- .target = {
- .target = {
- .u = {
- .target_size = IPT_ALIGN(sizeof(struct ipt_standard_target)),
- },
- },
- .verdict = -NF_ACCEPT - 1,
- },
- },
-
- /* LOCAL_OUT */
- {
- .entry = {
- .target_offset = sizeof(struct ipt_entry),
- .next_offset = sizeof(struct ipt_standard),
- },
- .target = {
- .target = {
- .u = {
- .target_size = IPT_ALIGN(sizeof(struct ipt_standard_target)),
- },
- },
- .verdict = -NF_ACCEPT - 1,
- },
- },
- },
- /* ERROR */
- .term = {
- .entry = {
- .target_offset = sizeof(struct ipt_entry),
- .next_offset = sizeof(struct ipt_error),
- },
- .target = {
- .target = {
- .u = {
- .user = {
- .target_size = IPT_ALIGN(sizeof(struct ipt_error_target)),
- .name = IPT_ERROR_TARGET,
- },
- },
- },
- .errorname = "ERROR",
- },
- }
-};
-
-static struct ipt_table packet_raw = {
- .name = "raw",
- .valid_hooks = RAW_VALID_HOOKS,
- .lock = RW_LOCK_UNLOCKED,
+static const struct xt_table packet_raw = {
+ .name = "raw",
+ .valid_hooks = RAW_VALID_HOOKS,
.me = THIS_MODULE,
- .af = AF_INET,
+ .af = NFPROTO_IPV4,
+ .priority = NF_IP_PRI_RAW,
};
/* The work comes in here from netfilter.c. */
static unsigned int
-ipt_hook(unsigned int hook,
- struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+iptable_raw_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ const struct net_device *in, const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ const struct net *net;
+
+ if (ops->hooknum == NF_INET_LOCAL_OUT &&
+ (skb->len < sizeof(struct iphdr) ||
+ ip_hdrlen(skb) < sizeof(struct iphdr)))
+ /* root is playing with raw sockets. */
+ return NF_ACCEPT;
+
+ net = dev_net((in != NULL) ? in : out);
+ return ipt_do_table(skb, ops->hooknum, in, out, net->ipv4.iptable_raw);
+}
+
+static struct nf_hook_ops *rawtable_ops __read_mostly;
+
+static int __net_init iptable_raw_net_init(struct net *net)
+{
+ struct ipt_replace *repl;
+
+ repl = ipt_alloc_initial_table(&packet_raw);
+ if (repl == NULL)
+ return -ENOMEM;
+ net->ipv4.iptable_raw =
+ ipt_register_table(net, &packet_raw, repl);
+ kfree(repl);
+ return PTR_ERR_OR_ZERO(net->ipv4.iptable_raw);
+}
+
+static void __net_exit iptable_raw_net_exit(struct net *net)
{
- return ipt_do_table(pskb, hook, in, out, &packet_raw, NULL);
+ ipt_unregister_table(net, net->ipv4.iptable_raw);
}
-/* 'raw' is the very first table. */
-static struct nf_hook_ops ipt_ops[] = {
- {
- .hook = ipt_hook,
- .pf = PF_INET,
- .hooknum = NF_IP_PRE_ROUTING,
- .priority = NF_IP_PRI_RAW,
- .owner = THIS_MODULE,
- },
- {
- .hook = ipt_hook,
- .pf = PF_INET,
- .hooknum = NF_IP_LOCAL_OUT,
- .priority = NF_IP_PRI_RAW,
- .owner = THIS_MODULE,
- },
+static struct pernet_operations iptable_raw_net_ops = {
+ .init = iptable_raw_net_init,
+ .exit = iptable_raw_net_exit,
};
-static int __init init(void)
+static int __init iptable_raw_init(void)
{
int ret;
- /* Register table */
- ret = ipt_register_table(&packet_raw, &initial_table.repl);
+ ret = register_pernet_subsys(&iptable_raw_net_ops);
if (ret < 0)
return ret;
/* Register hooks */
- ret = nf_register_hook(&ipt_ops[0]);
- if (ret < 0)
- goto cleanup_table;
-
- ret = nf_register_hook(&ipt_ops[1]);
- if (ret < 0)
- goto cleanup_hook0;
-
- return ret;
-
- cleanup_hook0:
- nf_unregister_hook(&ipt_ops[0]);
- cleanup_table:
- ipt_unregister_table(&packet_raw);
+ rawtable_ops = xt_hook_link(&packet_raw, iptable_raw_hook);
+ if (IS_ERR(rawtable_ops)) {
+ ret = PTR_ERR(rawtable_ops);
+ unregister_pernet_subsys(&iptable_raw_net_ops);
+ }
return ret;
}
-static void __exit fini(void)
+static void __exit iptable_raw_fini(void)
{
- unsigned int i;
-
- for (i = 0; i < sizeof(ipt_ops)/sizeof(struct nf_hook_ops); i++)
- nf_unregister_hook(&ipt_ops[i]);
-
- ipt_unregister_table(&packet_raw);
+ xt_hook_unlink(&packet_raw, rawtable_ops);
+ unregister_pernet_subsys(&iptable_raw_net_ops);
}
-module_init(init);
-module_exit(fini);
+module_init(iptable_raw_init);
+module_exit(iptable_raw_fini);
MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c
new file mode 100644
index 00000000000..c86647ed207
--- /dev/null
+++ b/net/ipv4/netfilter/iptable_security.c
@@ -0,0 +1,111 @@
+/*
+ * "security" table
+ *
+ * This is for use by Mandatory Access Control (MAC) security models,
+ * which need to be able to manage security policy in separate context
+ * to DAC.
+ *
+ * Based on iptable_mangle.c
+ *
+ * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
+ * Copyright (C) 2000-2004 Netfilter Core Team <coreteam <at> netfilter.org>
+ * Copyright (C) 2008 Red Hat, Inc., James Morris <jmorris <at> redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/module.h>
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/slab.h>
+#include <net/ip.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("James Morris <jmorris <at> redhat.com>");
+MODULE_DESCRIPTION("iptables security table, for MAC rules");
+
+#define SECURITY_VALID_HOOKS (1 << NF_INET_LOCAL_IN) | \
+ (1 << NF_INET_FORWARD) | \
+ (1 << NF_INET_LOCAL_OUT)
+
+static const struct xt_table security_table = {
+ .name = "security",
+ .valid_hooks = SECURITY_VALID_HOOKS,
+ .me = THIS_MODULE,
+ .af = NFPROTO_IPV4,
+ .priority = NF_IP_PRI_SECURITY,
+};
+
+static unsigned int
+iptable_security_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ const struct net *net;
+
+ if (ops->hooknum == NF_INET_LOCAL_OUT &&
+ (skb->len < sizeof(struct iphdr) ||
+ ip_hdrlen(skb) < sizeof(struct iphdr)))
+ /* Somebody is playing with raw sockets. */
+ return NF_ACCEPT;
+
+ net = dev_net((in != NULL) ? in : out);
+ return ipt_do_table(skb, ops->hooknum, in, out,
+ net->ipv4.iptable_security);
+}
+
+static struct nf_hook_ops *sectbl_ops __read_mostly;
+
+static int __net_init iptable_security_net_init(struct net *net)
+{
+ struct ipt_replace *repl;
+
+ repl = ipt_alloc_initial_table(&security_table);
+ if (repl == NULL)
+ return -ENOMEM;
+ net->ipv4.iptable_security =
+ ipt_register_table(net, &security_table, repl);
+ kfree(repl);
+ return PTR_ERR_OR_ZERO(net->ipv4.iptable_security);
+}
+
+static void __net_exit iptable_security_net_exit(struct net *net)
+{
+ ipt_unregister_table(net, net->ipv4.iptable_security);
+}
+
+static struct pernet_operations iptable_security_net_ops = {
+ .init = iptable_security_net_init,
+ .exit = iptable_security_net_exit,
+};
+
+static int __init iptable_security_init(void)
+{
+ int ret;
+
+ ret = register_pernet_subsys(&iptable_security_net_ops);
+ if (ret < 0)
+ return ret;
+
+ sectbl_ops = xt_hook_link(&security_table, iptable_security_hook);
+ if (IS_ERR(sectbl_ops)) {
+ ret = PTR_ERR(sectbl_ops);
+ goto cleanup_table;
+ }
+
+ return ret;
+
+cleanup_table:
+ unregister_pernet_subsys(&iptable_security_net_ops);
+ return ret;
+}
+
+static void __exit iptable_security_fini(void)
+{
+ xt_hook_unlink(&security_table, sectbl_ops);
+ unregister_pernet_subsys(&iptable_security_net_ops);
+}
+
+module_init(iptable_security_init);
+module_exit(iptable_security_fini);
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index 167619f638c..8127dc80286 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -1,20 +1,13 @@
+
/* (C) 1999-2001 Paul `Rusty' Russell
* (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
- *
- * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- * - move L3 protocol dependent part to this file.
- * 23 Mar 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- * - add get_features() to support various size of conntrack
- * structures.
- *
- * Derived from net/ipv4/netfilter/ip_conntrack_standalone.c
*/
-#include <linux/config.h>
#include <linux/types.h>
#include <linux/ip.h>
#include <linux/netfilter.h>
@@ -28,290 +21,241 @@
#include <linux/netfilter_ipv4.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_helper.h>
-#include <net/netfilter/nf_conntrack_protocol.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_l3proto.h>
+#include <net/netfilter/nf_conntrack_zones.h>
#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
+#include <net/netfilter/nf_nat_helper.h>
+#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
+#include <net/netfilter/nf_log.h>
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
-
-DECLARE_PER_CPU(struct nf_conntrack_stat, nf_conntrack_stat);
-
-static int ipv4_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
- struct nf_conntrack_tuple *tuple)
+static bool ipv4_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff,
+ struct nf_conntrack_tuple *tuple)
{
- u_int32_t _addrs[2], *ap;
+ const __be32 *ap;
+ __be32 _addrs[2];
ap = skb_header_pointer(skb, nhoff + offsetof(struct iphdr, saddr),
sizeof(u_int32_t) * 2, _addrs);
if (ap == NULL)
- return 0;
+ return false;
tuple->src.u3.ip = ap[0];
tuple->dst.u3.ip = ap[1];
- return 1;
+ return true;
}
-static int ipv4_invert_tuple(struct nf_conntrack_tuple *tuple,
- const struct nf_conntrack_tuple *orig)
+static bool ipv4_invert_tuple(struct nf_conntrack_tuple *tuple,
+ const struct nf_conntrack_tuple *orig)
{
tuple->src.u3.ip = orig->dst.u3.ip;
tuple->dst.u3.ip = orig->src.u3.ip;
- return 1;
+ return true;
}
static int ipv4_print_tuple(struct seq_file *s,
const struct nf_conntrack_tuple *tuple)
{
- return seq_printf(s, "src=%u.%u.%u.%u dst=%u.%u.%u.%u ",
- NIPQUAD(tuple->src.u3.ip),
- NIPQUAD(tuple->dst.u3.ip));
-}
-
-static int ipv4_print_conntrack(struct seq_file *s,
- const struct nf_conn *conntrack)
-{
- return 0;
+ return seq_printf(s, "src=%pI4 dst=%pI4 ",
+ &tuple->src.u3.ip, &tuple->dst.u3.ip);
}
-/* Returns new sk_buff, or NULL */
-static struct sk_buff *
-nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
+static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
+ unsigned int *dataoff, u_int8_t *protonum)
{
- skb_orphan(skb);
-
- local_bh_disable();
- skb = ip_defrag(skb, user);
- local_bh_enable();
-
- if (skb)
- ip_send_check(skb->nh.iph);
+ const struct iphdr *iph;
+ struct iphdr _iph;
+
+ iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph);
+ if (iph == NULL)
+ return -NF_ACCEPT;
+
+ /* Conntrack defragments packets, we might still see fragments
+ * inside ICMP packets though. */
+ if (iph->frag_off & htons(IP_OFFSET))
+ return -NF_ACCEPT;
+
+ *dataoff = nhoff + (iph->ihl << 2);
+ *protonum = iph->protocol;
+
+ /* Check bogus IP headers */
+ if (*dataoff > skb->len) {
+ pr_debug("nf_conntrack_ipv4: bogus IPv4 packet: "
+ "nhoff %u, ihl %u, skblen %u\n",
+ nhoff, iph->ihl << 2, skb->len);
+ return -NF_ACCEPT;
+ }
- return skb;
+ return NF_ACCEPT;
}
-static int
-ipv4_prepare(struct sk_buff **pskb, unsigned int hooknum, unsigned int *dataoff,
- u_int8_t *protonum)
+static unsigned int ipv4_helper(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
{
- /* Never happen */
- if ((*pskb)->nh.iph->frag_off & htons(IP_OFFSET)) {
- if (net_ratelimit()) {
- printk(KERN_ERR "ipv4_prepare: Frag of proto %u (hook=%u)\n",
- (*pskb)->nh.iph->protocol, hooknum);
- }
- return -NF_DROP;
- }
+ struct nf_conn *ct;
+ enum ip_conntrack_info ctinfo;
+ const struct nf_conn_help *help;
+ const struct nf_conntrack_helper *helper;
- *dataoff = (*pskb)->nh.raw - (*pskb)->data + (*pskb)->nh.iph->ihl*4;
- *protonum = (*pskb)->nh.iph->protocol;
+ /* This is where we call the helper: as the packet goes out. */
+ ct = nf_ct_get(skb, &ctinfo);
+ if (!ct || ctinfo == IP_CT_RELATED_REPLY)
+ return NF_ACCEPT;
- return NF_ACCEPT;
-}
+ help = nfct_help(ct);
+ if (!help)
+ return NF_ACCEPT;
-int nat_module_is_loaded = 0;
-static u_int32_t ipv4_get_features(const struct nf_conntrack_tuple *tuple)
-{
- if (nat_module_is_loaded)
- return NF_CT_F_NAT;
+ /* rcu_read_lock()ed by nf_hook_slow */
+ helper = rcu_dereference(help->helper);
+ if (!helper)
+ return NF_ACCEPT;
- return NF_CT_F_BASIC;
+ return helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb),
+ ct, ctinfo);
}
-static unsigned int ipv4_confirm(unsigned int hooknum,
- struct sk_buff **pskb,
+static unsigned int ipv4_confirm(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- /* We've seen it coming out the other side: confirm it */
- return nf_conntrack_confirm(pskb);
-}
-
-static unsigned int ipv4_conntrack_help(unsigned int hooknum,
- struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
-{
struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
- /* This is where we call the helper: as the packet goes out. */
- ct = nf_ct_get(*pskb, &ctinfo);
- if (ct && ct->helper) {
- unsigned int ret;
- ret = ct->helper->help(pskb,
- (*pskb)->nh.raw - (*pskb)->data
- + (*pskb)->nh.iph->ihl*4,
- ct, ctinfo);
- if (ret != NF_ACCEPT)
- return ret;
- }
- return NF_ACCEPT;
-}
+ ct = nf_ct_get(skb, &ctinfo);
+ if (!ct || ctinfo == IP_CT_RELATED_REPLY)
+ goto out;
-static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
- struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
-{
-#if !defined(CONFIG_IP_NF_NAT) && !defined(CONFIG_IP_NF_NAT_MODULE)
- /* Previously seen (loopback)? Ignore. Do this before
- fragment check. */
- if ((*pskb)->nfct)
- return NF_ACCEPT;
-#endif
-
- /* Gather fragments. */
- if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) {
- *pskb = nf_ct_ipv4_gather_frags(*pskb,
- hooknum == NF_IP_PRE_ROUTING ?
- IP_DEFRAG_CONNTRACK_IN :
- IP_DEFRAG_CONNTRACK_OUT);
- if (!*pskb)
- return NF_STOLEN;
+ /* adjust seqs for loopback traffic only in outgoing direction */
+ if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
+ !nf_is_loopback_packet(skb)) {
+ if (!nf_ct_seq_adjust(skb, ct, ctinfo, ip_hdrlen(skb))) {
+ NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop);
+ return NF_DROP;
+ }
}
- return NF_ACCEPT;
+out:
+ /* We've seen it coming out the other side: confirm it */
+ return nf_conntrack_confirm(skb);
}
-static unsigned int ipv4_conntrack_in(unsigned int hooknum,
- struct sk_buff **pskb,
+static unsigned int ipv4_conntrack_in(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- return nf_conntrack_in(PF_INET, hooknum, pskb);
+ return nf_conntrack_in(dev_net(in), PF_INET, ops->hooknum, skb);
}
-static unsigned int ipv4_conntrack_local(unsigned int hooknum,
- struct sk_buff **pskb,
- const struct net_device *in,
- const struct net_device *out,
- int (*okfn)(struct sk_buff *))
+static unsigned int ipv4_conntrack_local(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
{
/* root is playing with raw sockets. */
- if ((*pskb)->len < sizeof(struct iphdr)
- || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) {
- if (net_ratelimit())
- printk("ipt_hook: happy cracking.\n");
+ if (skb->len < sizeof(struct iphdr) ||
+ ip_hdrlen(skb) < sizeof(struct iphdr))
return NF_ACCEPT;
- }
- return nf_conntrack_in(PF_INET, hooknum, pskb);
+ return nf_conntrack_in(dev_net(out), PF_INET, ops->hooknum, skb);
}
/* Connection tracking may drop packets, but never alters them, so
make it the first hook. */
-static struct nf_hook_ops ipv4_conntrack_defrag_ops = {
- .hook = ipv4_conntrack_defrag,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_IP_PRE_ROUTING,
- .priority = NF_IP_PRI_CONNTRACK_DEFRAG,
-};
-
-static struct nf_hook_ops ipv4_conntrack_in_ops = {
- .hook = ipv4_conntrack_in,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_IP_PRE_ROUTING,
- .priority = NF_IP_PRI_CONNTRACK,
-};
-
-static struct nf_hook_ops ipv4_conntrack_defrag_local_out_ops = {
- .hook = ipv4_conntrack_defrag,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_IP_LOCAL_OUT,
- .priority = NF_IP_PRI_CONNTRACK_DEFRAG,
-};
-
-static struct nf_hook_ops ipv4_conntrack_local_out_ops = {
- .hook = ipv4_conntrack_local,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_IP_LOCAL_OUT,
- .priority = NF_IP_PRI_CONNTRACK,
-};
-
-/* helpers */
-static struct nf_hook_ops ipv4_conntrack_helper_out_ops = {
- .hook = ipv4_conntrack_help,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_IP_POST_ROUTING,
- .priority = NF_IP_PRI_CONNTRACK_HELPER,
-};
-
-static struct nf_hook_ops ipv4_conntrack_helper_in_ops = {
- .hook = ipv4_conntrack_help,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_IP_LOCAL_IN,
- .priority = NF_IP_PRI_CONNTRACK_HELPER,
-};
-
-
-/* Refragmenter; last chance. */
-static struct nf_hook_ops ipv4_conntrack_out_ops = {
- .hook = ipv4_confirm,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_IP_POST_ROUTING,
- .priority = NF_IP_PRI_CONNTRACK_CONFIRM,
-};
-
-static struct nf_hook_ops ipv4_conntrack_local_in_ops = {
- .hook = ipv4_confirm,
- .owner = THIS_MODULE,
- .pf = PF_INET,
- .hooknum = NF_IP_LOCAL_IN,
- .priority = NF_IP_PRI_CONNTRACK_CONFIRM,
+static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = {
+ {
+ .hook = ipv4_conntrack_in,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV4,
+ .hooknum = NF_INET_PRE_ROUTING,
+ .priority = NF_IP_PRI_CONNTRACK,
+ },
+ {
+ .hook = ipv4_conntrack_local,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV4,
+ .hooknum = NF_INET_LOCAL_OUT,
+ .priority = NF_IP_PRI_CONNTRACK,
+ },
+ {
+ .hook = ipv4_helper,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV4,
+ .hooknum = NF_INET_POST_ROUTING,
+ .priority = NF_IP_PRI_CONNTRACK_HELPER,
+ },
+ {
+ .hook = ipv4_confirm,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV4,
+ .hooknum = NF_INET_POST_ROUTING,
+ .priority = NF_IP_PRI_CONNTRACK_CONFIRM,
+ },
+ {
+ .hook = ipv4_helper,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV4,
+ .hooknum = NF_INET_LOCAL_IN,
+ .priority = NF_IP_PRI_CONNTRACK_HELPER,
+ },
+ {
+ .hook = ipv4_confirm,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV4,
+ .hooknum = NF_INET_LOCAL_IN,
+ .priority = NF_IP_PRI_CONNTRACK_CONFIRM,
+ },
};
-#ifdef CONFIG_SYSCTL
-/* From nf_conntrack_proto_icmp.c */
-extern unsigned int nf_ct_icmp_timeout;
-static struct ctl_table_header *nf_ct_ipv4_sysctl_header;
+#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
+static int log_invalid_proto_min = 0;
+static int log_invalid_proto_max = 255;
-static ctl_table nf_ct_sysctl_table[] = {
+static struct ctl_table ip_ct_sysctl_table[] = {
{
- .ctl_name = NET_NF_CONNTRACK_ICMP_TIMEOUT,
- .procname = "nf_conntrack_icmp_timeout",
- .data = &nf_ct_icmp_timeout,
- .maxlen = sizeof(unsigned int),
+ .procname = "ip_conntrack_max",
+ .maxlen = sizeof(int),
.mode = 0644,
- .proc_handler = &proc_dointvec_jiffies,
+ .proc_handler = proc_dointvec,
},
- { .ctl_name = 0 }
-};
-
-static ctl_table nf_ct_netfilter_table[] = {
{
- .ctl_name = NET_NETFILTER,
- .procname = "netfilter",
- .mode = 0555,
- .child = nf_ct_sysctl_table,
+ .procname = "ip_conntrack_count",
+ .maxlen = sizeof(int),
+ .mode = 0444,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "ip_conntrack_buckets",
+ .maxlen = sizeof(unsigned int),
+ .mode = 0444,
+ .proc_handler = proc_dointvec,
},
- { .ctl_name = 0 }
-};
-
-static ctl_table nf_ct_net_table[] = {
{
- .ctl_name = CTL_NET,
- .procname = "net",
- .mode = 0555,
- .child = nf_ct_netfilter_table,
+ .procname = "ip_conntrack_checksum",
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "ip_conntrack_log_invalid",
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &log_invalid_proto_min,
+ .extra2 = &log_invalid_proto_max,
},
- { .ctl_name = 0 }
+ { }
};
-#endif
+#endif /* CONFIG_SYSCTL && CONFIG_NF_CONNTRACK_PROC_COMPAT */
/* Fast function for those who don't want to parse /proc (and I don't
blame them). */
@@ -320,31 +264,31 @@ static ctl_table nf_ct_net_table[] = {
static int
getorigdst(struct sock *sk, int optval, void __user *user, int *len)
{
- struct inet_sock *inet = inet_sk(sk);
- struct nf_conntrack_tuple_hash *h;
+ const struct inet_sock *inet = inet_sk(sk);
+ const struct nf_conntrack_tuple_hash *h;
struct nf_conntrack_tuple tuple;
-
- NF_CT_TUPLE_U_BLANK(&tuple);
- tuple.src.u3.ip = inet->rcv_saddr;
- tuple.src.u.tcp.port = inet->sport;
- tuple.dst.u3.ip = inet->daddr;
- tuple.dst.u.tcp.port = inet->dport;
+
+ memset(&tuple, 0, sizeof(tuple));
+ tuple.src.u3.ip = inet->inet_rcv_saddr;
+ tuple.src.u.tcp.port = inet->inet_sport;
+ tuple.dst.u3.ip = inet->inet_daddr;
+ tuple.dst.u.tcp.port = inet->inet_dport;
tuple.src.l3num = PF_INET;
- tuple.dst.protonum = IPPROTO_TCP;
+ tuple.dst.protonum = sk->sk_protocol;
- /* We only do TCP at the moment: is there a better way? */
- if (strcmp(sk->sk_prot->name, "TCP")) {
- DEBUGP("SO_ORIGINAL_DST: Not a TCP socket\n");
+ /* We only do TCP and SCTP at the moment: is there a better way? */
+ if (sk->sk_protocol != IPPROTO_TCP && sk->sk_protocol != IPPROTO_SCTP) {
+ pr_debug("SO_ORIGINAL_DST: Not a TCP/SCTP socket\n");
return -ENOPROTOOPT;
}
if ((unsigned int) *len < sizeof(struct sockaddr_in)) {
- DEBUGP("SO_ORIGINAL_DST: len %u not %u\n",
- *len, sizeof(struct sockaddr_in));
+ pr_debug("SO_ORIGINAL_DST: len %d not %Zu\n",
+ *len, sizeof(struct sockaddr_in));
return -EINVAL;
}
- h = nf_conntrack_find_get(&tuple, NULL);
+ h = nf_conntrack_find_get(sock_net(sk), NF_CT_DEFAULT_ZONE, &tuple);
if (h) {
struct sockaddr_in sin;
struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
@@ -354,61 +298,60 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len)
.tuple.dst.u.tcp.port;
sin.sin_addr.s_addr = ct->tuplehash[IP_CT_DIR_ORIGINAL]
.tuple.dst.u3.ip;
+ memset(sin.sin_zero, 0, sizeof(sin.sin_zero));
- DEBUGP("SO_ORIGINAL_DST: %u.%u.%u.%u %u\n",
- NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port));
+ pr_debug("SO_ORIGINAL_DST: %pI4 %u\n",
+ &sin.sin_addr.s_addr, ntohs(sin.sin_port));
nf_ct_put(ct);
if (copy_to_user(user, &sin, sizeof(sin)) != 0)
return -EFAULT;
else
return 0;
}
- DEBUGP("SO_ORIGINAL_DST: Can't find %u.%u.%u.%u/%u-%u.%u.%u.%u/%u.\n",
- NIPQUAD(tuple.src.u3.ip), ntohs(tuple.src.u.tcp.port),
- NIPQUAD(tuple.dst.u3.ip), ntohs(tuple.dst.u.tcp.port));
+ pr_debug("SO_ORIGINAL_DST: Can't find %pI4/%u-%pI4/%u.\n",
+ &tuple.src.u3.ip, ntohs(tuple.src.u.tcp.port),
+ &tuple.dst.u3.ip, ntohs(tuple.dst.u.tcp.port));
return -ENOENT;
}
-#if defined(CONFIG_NF_CT_NETLINK) || \
- defined(CONFIG_NF_CT_NETLINK_MODULE)
+#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nfnetlink_conntrack.h>
-static int ipv4_tuple_to_nfattr(struct sk_buff *skb,
+static int ipv4_tuple_to_nlattr(struct sk_buff *skb,
const struct nf_conntrack_tuple *tuple)
{
- NFA_PUT(skb, CTA_IP_V4_SRC, sizeof(u_int32_t),
- &tuple->src.u3.ip);
- NFA_PUT(skb, CTA_IP_V4_DST, sizeof(u_int32_t),
- &tuple->dst.u3.ip);
+ if (nla_put_be32(skb, CTA_IP_V4_SRC, tuple->src.u3.ip) ||
+ nla_put_be32(skb, CTA_IP_V4_DST, tuple->dst.u3.ip))
+ goto nla_put_failure;
return 0;
-nfattr_failure:
+nla_put_failure:
return -1;
}
-static const size_t cta_min_ip[CTA_IP_MAX] = {
- [CTA_IP_V4_SRC-1] = sizeof(u_int32_t),
- [CTA_IP_V4_DST-1] = sizeof(u_int32_t),
+static const struct nla_policy ipv4_nla_policy[CTA_IP_MAX+1] = {
+ [CTA_IP_V4_SRC] = { .type = NLA_U32 },
+ [CTA_IP_V4_DST] = { .type = NLA_U32 },
};
-static int ipv4_nfattr_to_tuple(struct nfattr *tb[],
+static int ipv4_nlattr_to_tuple(struct nlattr *tb[],
struct nf_conntrack_tuple *t)
{
- if (!tb[CTA_IP_V4_SRC-1] || !tb[CTA_IP_V4_DST-1])
+ if (!tb[CTA_IP_V4_SRC] || !tb[CTA_IP_V4_DST])
return -EINVAL;
- if (nfattr_bad_size(tb, CTA_IP_MAX, cta_min_ip))
- return -EINVAL;
-
- t->src.u3.ip =
- *(u_int32_t *)NFA_DATA(tb[CTA_IP_V4_SRC-1]);
- t->dst.u3.ip =
- *(u_int32_t *)NFA_DATA(tb[CTA_IP_V4_DST-1]);
+ t->src.u3.ip = nla_get_be32(tb[CTA_IP_V4_SRC]);
+ t->dst.u3.ip = nla_get_be32(tb[CTA_IP_V4_DST]);
return 0;
}
+
+static int ipv4_nlattr_tuple_size(void)
+{
+ return nla_policy_len(ipv4_nla_policy, CTA_IP_MAX + 1);
+}
#endif
static struct nf_sockopt_ops so_getorigdst = {
@@ -416,175 +359,192 @@ static struct nf_sockopt_ops so_getorigdst = {
.get_optmin = SO_ORIGINAL_DST,
.get_optmax = SO_ORIGINAL_DST+1,
.get = &getorigdst,
+ .owner = THIS_MODULE,
};
-struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 = {
+static int ipv4_init_net(struct net *net)
+{
+#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
+ struct nf_ip_net *in = &net->ct.nf_ct_proto;
+ in->ctl_table = kmemdup(ip_ct_sysctl_table,
+ sizeof(ip_ct_sysctl_table),
+ GFP_KERNEL);
+ if (!in->ctl_table)
+ return -ENOMEM;
+
+ in->ctl_table[0].data = &nf_conntrack_max;
+ in->ctl_table[1].data = &net->ct.count;
+ in->ctl_table[2].data = &net->ct.htable_size;
+ in->ctl_table[3].data = &net->ct.sysctl_checksum;
+ in->ctl_table[4].data = &net->ct.sysctl_log_invalid;
+#endif
+ return 0;
+}
+
+struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = {
.l3proto = PF_INET,
.name = "ipv4",
.pkt_to_tuple = ipv4_pkt_to_tuple,
.invert_tuple = ipv4_invert_tuple,
.print_tuple = ipv4_print_tuple,
- .print_conntrack = ipv4_print_conntrack,
- .prepare = ipv4_prepare,
- .get_features = ipv4_get_features,
-#if defined(CONFIG_NF_CT_NETLINK) || \
- defined(CONFIG_NF_CT_NETLINK_MODULE)
- .tuple_to_nfattr = ipv4_tuple_to_nfattr,
- .nfattr_to_tuple = ipv4_nfattr_to_tuple,
+ .get_l4proto = ipv4_get_l4proto,
+#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+ .tuple_to_nlattr = ipv4_tuple_to_nlattr,
+ .nlattr_tuple_size = ipv4_nlattr_tuple_size,
+ .nlattr_to_tuple = ipv4_nlattr_to_tuple,
+ .nla_policy = ipv4_nla_policy,
#endif
+#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
+ .ctl_table_path = "net/ipv4/netfilter",
+#endif
+ .init_net = ipv4_init_net,
.me = THIS_MODULE,
};
-extern struct nf_conntrack_protocol nf_conntrack_protocol_tcp4;
-extern struct nf_conntrack_protocol nf_conntrack_protocol_udp4;
-extern struct nf_conntrack_protocol nf_conntrack_protocol_icmp;
-static int init_or_cleanup(int init)
+module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint,
+ &nf_conntrack_htable_size, 0600);
+
+MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET));
+MODULE_ALIAS("ip_conntrack");
+MODULE_LICENSE("GPL");
+
+static int ipv4_net_init(struct net *net)
{
int ret = 0;
- if (!init) goto cleanup;
-
- ret = nf_register_sockopt(&so_getorigdst);
+ ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_tcp4);
if (ret < 0) {
- printk(KERN_ERR "Unable to register netfilter socket option\n");
- goto cleanup_nothing;
+ pr_err("nf_conntrack_tcp4: pernet registration failed\n");
+ goto out_tcp;
}
-
- ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_tcp4);
+ ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_udp4);
if (ret < 0) {
- printk("nf_conntrack_ipv4: can't register tcp.\n");
- goto cleanup_sockopt;
+ pr_err("nf_conntrack_udp4: pernet registration failed\n");
+ goto out_udp;
}
-
- ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_udp4);
+ ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_icmp);
if (ret < 0) {
- printk("nf_conntrack_ipv4: can't register udp.\n");
- goto cleanup_tcp;
+ pr_err("nf_conntrack_icmp4: pernet registration failed\n");
+ goto out_icmp;
}
-
- ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_icmp);
+ ret = nf_ct_l3proto_pernet_register(net, &nf_conntrack_l3proto_ipv4);
if (ret < 0) {
- printk("nf_conntrack_ipv4: can't register icmp.\n");
- goto cleanup_udp;
+ pr_err("nf_conntrack_ipv4: pernet registration failed\n");
+ goto out_ipv4;
}
+ return 0;
+out_ipv4:
+ nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_icmp);
+out_icmp:
+ nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udp4);
+out_udp:
+ nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_tcp4);
+out_tcp:
+ return ret;
+}
- ret = nf_conntrack_l3proto_register(&nf_conntrack_l3proto_ipv4);
- if (ret < 0) {
- printk("nf_conntrack_ipv4: can't register ipv4\n");
- goto cleanup_icmp;
- }
+static void ipv4_net_exit(struct net *net)
+{
+ nf_ct_l3proto_pernet_unregister(net, &nf_conntrack_l3proto_ipv4);
+ nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_icmp);
+ nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udp4);
+ nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_tcp4);
+}
- ret = nf_register_hook(&ipv4_conntrack_defrag_ops);
- if (ret < 0) {
- printk("nf_conntrack_ipv4: can't register pre-routing defrag hook.\n");
- goto cleanup_ipv4;
- }
- ret = nf_register_hook(&ipv4_conntrack_defrag_local_out_ops);
+static struct pernet_operations ipv4_net_ops = {
+ .init = ipv4_net_init,
+ .exit = ipv4_net_exit,
+};
+
+static int __init nf_conntrack_l3proto_ipv4_init(void)
+{
+ int ret = 0;
+
+ need_conntrack();
+ nf_defrag_ipv4_enable();
+
+ ret = nf_register_sockopt(&so_getorigdst);
if (ret < 0) {
- printk("nf_conntrack_ipv4: can't register local_out defrag hook.\n");
- goto cleanup_defragops;
+ printk(KERN_ERR "Unable to register netfilter socket option\n");
+ return ret;
}
- ret = nf_register_hook(&ipv4_conntrack_in_ops);
+ ret = register_pernet_subsys(&ipv4_net_ops);
if (ret < 0) {
- printk("nf_conntrack_ipv4: can't register pre-routing hook.\n");
- goto cleanup_defraglocalops;
+ pr_err("nf_conntrack_ipv4: can't register pernet ops\n");
+ goto cleanup_sockopt;
}
- ret = nf_register_hook(&ipv4_conntrack_local_out_ops);
+ ret = nf_register_hooks(ipv4_conntrack_ops,
+ ARRAY_SIZE(ipv4_conntrack_ops));
if (ret < 0) {
- printk("nf_conntrack_ipv4: can't register local out hook.\n");
- goto cleanup_inops;
+ pr_err("nf_conntrack_ipv4: can't register hooks.\n");
+ goto cleanup_pernet;
}
- ret = nf_register_hook(&ipv4_conntrack_helper_in_ops);
+ ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_tcp4);
if (ret < 0) {
- printk("nf_conntrack_ipv4: can't register local helper hook.\n");
- goto cleanup_inandlocalops;
+ pr_err("nf_conntrack_ipv4: can't register tcp4 proto.\n");
+ goto cleanup_hooks;
}
- ret = nf_register_hook(&ipv4_conntrack_helper_out_ops);
+ ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_udp4);
if (ret < 0) {
- printk("nf_conntrack_ipv4: can't register postrouting helper hook.\n");
- goto cleanup_helperinops;
+ pr_err("nf_conntrack_ipv4: can't register udp4 proto.\n");
+ goto cleanup_tcp4;
}
- ret = nf_register_hook(&ipv4_conntrack_out_ops);
+ ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_icmp);
if (ret < 0) {
- printk("nf_conntrack_ipv4: can't register post-routing hook.\n");
- goto cleanup_helperoutops;
+ pr_err("nf_conntrack_ipv4: can't register icmpv4 proto.\n");
+ goto cleanup_udp4;
}
- ret = nf_register_hook(&ipv4_conntrack_local_in_ops);
+ ret = nf_ct_l3proto_register(&nf_conntrack_l3proto_ipv4);
if (ret < 0) {
- printk("nf_conntrack_ipv4: can't register local in hook.\n");
- goto cleanup_inoutandlocalops;
+ pr_err("nf_conntrack_ipv4: can't register ipv4 proto.\n");
+ goto cleanup_icmpv4;
}
-#ifdef CONFIG_SYSCTL
- nf_ct_ipv4_sysctl_header = register_sysctl_table(nf_ct_net_table, 0);
- if (nf_ct_ipv4_sysctl_header == NULL) {
- printk("nf_conntrack: can't register to sysctl.\n");
- ret = -ENOMEM;
- goto cleanup_localinops;
- }
+#if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
+ ret = nf_conntrack_ipv4_compat_init();
+ if (ret < 0)
+ goto cleanup_proto;
#endif
-
- /* For use by REJECT target */
- ip_ct_attach = __nf_conntrack_attach;
-
return ret;
-
- cleanup:
- synchronize_net();
- ip_ct_attach = NULL;
-#ifdef CONFIG_SYSCTL
- unregister_sysctl_table(nf_ct_ipv4_sysctl_header);
- cleanup_localinops:
+#if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
+ cleanup_proto:
+ nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv4);
#endif
- nf_unregister_hook(&ipv4_conntrack_local_in_ops);
- cleanup_inoutandlocalops:
- nf_unregister_hook(&ipv4_conntrack_out_ops);
- cleanup_helperoutops:
- nf_unregister_hook(&ipv4_conntrack_helper_out_ops);
- cleanup_helperinops:
- nf_unregister_hook(&ipv4_conntrack_helper_in_ops);
- cleanup_inandlocalops:
- nf_unregister_hook(&ipv4_conntrack_local_out_ops);
- cleanup_inops:
- nf_unregister_hook(&ipv4_conntrack_in_ops);
- cleanup_defraglocalops:
- nf_unregister_hook(&ipv4_conntrack_defrag_local_out_ops);
- cleanup_defragops:
- nf_unregister_hook(&ipv4_conntrack_defrag_ops);
- cleanup_ipv4:
- nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv4);
- cleanup_icmp:
- nf_conntrack_protocol_unregister(&nf_conntrack_protocol_icmp);
- cleanup_udp:
- nf_conntrack_protocol_unregister(&nf_conntrack_protocol_udp4);
- cleanup_tcp:
- nf_conntrack_protocol_unregister(&nf_conntrack_protocol_tcp4);
+ cleanup_icmpv4:
+ nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmp);
+ cleanup_udp4:
+ nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp4);
+ cleanup_tcp4:
+ nf_ct_l4proto_unregister(&nf_conntrack_l4proto_tcp4);
+ cleanup_hooks:
+ nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops));
+ cleanup_pernet:
+ unregister_pernet_subsys(&ipv4_net_ops);
cleanup_sockopt:
nf_unregister_sockopt(&so_getorigdst);
- cleanup_nothing:
return ret;
}
-MODULE_LICENSE("GPL");
-
-static int __init init(void)
-{
- need_conntrack();
- return init_or_cleanup(1);
-}
-
-static void __exit fini(void)
+static void __exit nf_conntrack_l3proto_ipv4_fini(void)
{
- init_or_cleanup(0);
+ synchronize_net();
+#if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
+ nf_conntrack_ipv4_compat_fini();
+#endif
+ nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv4);
+ nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmp);
+ nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp4);
+ nf_ct_l4proto_unregister(&nf_conntrack_l4proto_tcp4);
+ nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops));
+ unregister_pernet_subsys(&ipv4_net_ops);
+ nf_unregister_sockopt(&so_getorigdst);
}
-module_init(init);
-module_exit(fini);
-
-EXPORT_SYMBOL(nf_ct_ipv4_gather_frags);
+module_init(nf_conntrack_l3proto_ipv4_init);
+module_exit(nf_conntrack_l3proto_ipv4_fini);
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
new file mode 100644
index 00000000000..4c48e434bb1
--- /dev/null
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -0,0 +1,464 @@
+/* ip_conntrack proc compat - based on ip_conntrack_standalone.c
+ *
+ * (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2006-2010 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/types.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
+#include <linux/percpu.h>
+#include <linux/security.h>
+#include <net/net_namespace.h>
+
+#include <linux/netfilter.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_l3proto.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <net/netfilter/nf_conntrack_acct.h>
+#include <linux/rculist_nulls.h>
+#include <linux/export.h>
+
+struct ct_iter_state {
+ struct seq_net_private p;
+ unsigned int bucket;
+};
+
+static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
+{
+ struct net *net = seq_file_net(seq);
+ struct ct_iter_state *st = seq->private;
+ struct hlist_nulls_node *n;
+
+ for (st->bucket = 0;
+ st->bucket < net->ct.htable_size;
+ st->bucket++) {
+ n = rcu_dereference(
+ hlist_nulls_first_rcu(&net->ct.hash[st->bucket]));
+ if (!is_a_nulls(n))
+ return n;
+ }
+ return NULL;
+}
+
+static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
+ struct hlist_nulls_node *head)
+{
+ struct net *net = seq_file_net(seq);
+ struct ct_iter_state *st = seq->private;
+
+ head = rcu_dereference(hlist_nulls_next_rcu(head));
+ while (is_a_nulls(head)) {
+ if (likely(get_nulls_value(head) == st->bucket)) {
+ if (++st->bucket >= net->ct.htable_size)
+ return NULL;
+ }
+ head = rcu_dereference(
+ hlist_nulls_first_rcu(&net->ct.hash[st->bucket]));
+ }
+ return head;
+}
+
+static struct hlist_nulls_node *ct_get_idx(struct seq_file *seq, loff_t pos)
+{
+ struct hlist_nulls_node *head = ct_get_first(seq);
+
+ if (head)
+ while (pos && (head = ct_get_next(seq, head)))
+ pos--;
+ return pos ? NULL : head;
+}
+
+static void *ct_seq_start(struct seq_file *seq, loff_t *pos)
+ __acquires(RCU)
+{
+ rcu_read_lock();
+ return ct_get_idx(seq, *pos);
+}
+
+static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+ (*pos)++;
+ return ct_get_next(s, v);
+}
+
+static void ct_seq_stop(struct seq_file *s, void *v)
+ __releases(RCU)
+{
+ rcu_read_unlock();
+}
+
+#ifdef CONFIG_NF_CONNTRACK_SECMARK
+static int ct_show_secctx(struct seq_file *s, const struct nf_conn *ct)
+{
+ int ret;
+ u32 len;
+ char *secctx;
+
+ ret = security_secid_to_secctx(ct->secmark, &secctx, &len);
+ if (ret)
+ return 0;
+
+ ret = seq_printf(s, "secctx=%s ", secctx);
+
+ security_release_secctx(secctx, len);
+ return ret;
+}
+#else
+static inline int ct_show_secctx(struct seq_file *s, const struct nf_conn *ct)
+{
+ return 0;
+}
+#endif
+
+static int ct_seq_show(struct seq_file *s, void *v)
+{
+ struct nf_conntrack_tuple_hash *hash = v;
+ struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash);
+ const struct nf_conntrack_l3proto *l3proto;
+ const struct nf_conntrack_l4proto *l4proto;
+ int ret = 0;
+
+ NF_CT_ASSERT(ct);
+ if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use)))
+ return 0;
+
+
+ /* we only want to print DIR_ORIGINAL */
+ if (NF_CT_DIRECTION(hash))
+ goto release;
+ if (nf_ct_l3num(ct) != AF_INET)
+ goto release;
+
+ l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct));
+ NF_CT_ASSERT(l3proto);
+ l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct));
+ NF_CT_ASSERT(l4proto);
+
+ ret = -ENOSPC;
+ if (seq_printf(s, "%-8s %u %ld ",
+ l4proto->name, nf_ct_protonum(ct),
+ timer_pending(&ct->timeout)
+ ? (long)(ct->timeout.expires - jiffies)/HZ : 0) != 0)
+ goto release;
+
+ if (l4proto->print_conntrack && l4proto->print_conntrack(s, ct))
+ goto release;
+
+ if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
+ l3proto, l4proto))
+ goto release;
+
+ if (seq_print_acct(s, ct, IP_CT_DIR_ORIGINAL))
+ goto release;
+
+ if (!(test_bit(IPS_SEEN_REPLY_BIT, &ct->status)))
+ if (seq_printf(s, "[UNREPLIED] "))
+ goto release;
+
+ if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple,
+ l3proto, l4proto))
+ goto release;
+
+ if (seq_print_acct(s, ct, IP_CT_DIR_REPLY))
+ goto release;
+
+ if (test_bit(IPS_ASSURED_BIT, &ct->status))
+ if (seq_printf(s, "[ASSURED] "))
+ goto release;
+
+#ifdef CONFIG_NF_CONNTRACK_MARK
+ if (seq_printf(s, "mark=%u ", ct->mark))
+ goto release;
+#endif
+
+ if (ct_show_secctx(s, ct))
+ goto release;
+
+ if (seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use)))
+ goto release;
+ ret = 0;
+release:
+ nf_ct_put(ct);
+ return ret;
+}
+
+static const struct seq_operations ct_seq_ops = {
+ .start = ct_seq_start,
+ .next = ct_seq_next,
+ .stop = ct_seq_stop,
+ .show = ct_seq_show
+};
+
+static int ct_open(struct inode *inode, struct file *file)
+{
+ return seq_open_net(inode, file, &ct_seq_ops,
+ sizeof(struct ct_iter_state));
+}
+
+static const struct file_operations ct_file_ops = {
+ .owner = THIS_MODULE,
+ .open = ct_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_net,
+};
+
+/* expects */
+struct ct_expect_iter_state {
+ struct seq_net_private p;
+ unsigned int bucket;
+};
+
+static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
+{
+ struct net *net = seq_file_net(seq);
+ struct ct_expect_iter_state *st = seq->private;
+ struct hlist_node *n;
+
+ for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
+ n = rcu_dereference(
+ hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
+ if (n)
+ return n;
+ }
+ return NULL;
+}
+
+static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
+ struct hlist_node *head)
+{
+ struct net *net = seq_file_net(seq);
+ struct ct_expect_iter_state *st = seq->private;
+
+ head = rcu_dereference(hlist_next_rcu(head));
+ while (head == NULL) {
+ if (++st->bucket >= nf_ct_expect_hsize)
+ return NULL;
+ head = rcu_dereference(
+ hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
+ }
+ return head;
+}
+
+static struct hlist_node *ct_expect_get_idx(struct seq_file *seq, loff_t pos)
+{
+ struct hlist_node *head = ct_expect_get_first(seq);
+
+ if (head)
+ while (pos && (head = ct_expect_get_next(seq, head)))
+ pos--;
+ return pos ? NULL : head;
+}
+
+static void *exp_seq_start(struct seq_file *seq, loff_t *pos)
+ __acquires(RCU)
+{
+ rcu_read_lock();
+ return ct_expect_get_idx(seq, *pos);
+}
+
+static void *exp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ (*pos)++;
+ return ct_expect_get_next(seq, v);
+}
+
+static void exp_seq_stop(struct seq_file *seq, void *v)
+ __releases(RCU)
+{
+ rcu_read_unlock();
+}
+
+static int exp_seq_show(struct seq_file *s, void *v)
+{
+ struct nf_conntrack_expect *exp;
+ const struct hlist_node *n = v;
+
+ exp = hlist_entry(n, struct nf_conntrack_expect, hnode);
+
+ if (exp->tuple.src.l3num != AF_INET)
+ return 0;
+
+ if (exp->timeout.function)
+ seq_printf(s, "%ld ", timer_pending(&exp->timeout)
+ ? (long)(exp->timeout.expires - jiffies)/HZ : 0);
+ else
+ seq_printf(s, "- ");
+
+ seq_printf(s, "proto=%u ", exp->tuple.dst.protonum);
+
+ print_tuple(s, &exp->tuple,
+ __nf_ct_l3proto_find(exp->tuple.src.l3num),
+ __nf_ct_l4proto_find(exp->tuple.src.l3num,
+ exp->tuple.dst.protonum));
+ return seq_putc(s, '\n');
+}
+
+static const struct seq_operations exp_seq_ops = {
+ .start = exp_seq_start,
+ .next = exp_seq_next,
+ .stop = exp_seq_stop,
+ .show = exp_seq_show
+};
+
+static int exp_open(struct inode *inode, struct file *file)
+{
+ return seq_open_net(inode, file, &exp_seq_ops,
+ sizeof(struct ct_expect_iter_state));
+}
+
+static const struct file_operations ip_exp_file_ops = {
+ .owner = THIS_MODULE,
+ .open = exp_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_net,
+};
+
+static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ struct net *net = seq_file_net(seq);
+ int cpu;
+
+ if (*pos == 0)
+ return SEQ_START_TOKEN;
+
+ for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
+ if (!cpu_possible(cpu))
+ continue;
+ *pos = cpu+1;
+ return per_cpu_ptr(net->ct.stat, cpu);
+ }
+
+ return NULL;
+}
+
+static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct net *net = seq_file_net(seq);
+ int cpu;
+
+ for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
+ if (!cpu_possible(cpu))
+ continue;
+ *pos = cpu+1;
+ return per_cpu_ptr(net->ct.stat, cpu);
+ }
+
+ return NULL;
+}
+
+static void ct_cpu_seq_stop(struct seq_file *seq, void *v)
+{
+}
+
+static int ct_cpu_seq_show(struct seq_file *seq, void *v)
+{
+ struct net *net = seq_file_net(seq);
+ unsigned int nr_conntracks = atomic_read(&net->ct.count);
+ const struct ip_conntrack_stat *st = v;
+
+ if (v == SEQ_START_TOKEN) {
+ seq_printf(seq, "entries searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete search_restart\n");
+ return 0;
+ }
+
+ seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x "
+ "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
+ nr_conntracks,
+ st->searched,
+ st->found,
+ st->new,
+ st->invalid,
+ st->ignore,
+ st->delete,
+ st->delete_list,
+ st->insert,
+ st->insert_failed,
+ st->drop,
+ st->early_drop,
+ st->error,
+
+ st->expect_new,
+ st->expect_create,
+ st->expect_delete,
+ st->search_restart
+ );
+ return 0;
+}
+
+static const struct seq_operations ct_cpu_seq_ops = {
+ .start = ct_cpu_seq_start,
+ .next = ct_cpu_seq_next,
+ .stop = ct_cpu_seq_stop,
+ .show = ct_cpu_seq_show,
+};
+
+static int ct_cpu_seq_open(struct inode *inode, struct file *file)
+{
+ return seq_open_net(inode, file, &ct_cpu_seq_ops,
+ sizeof(struct seq_net_private));
+}
+
+static const struct file_operations ct_cpu_seq_fops = {
+ .owner = THIS_MODULE,
+ .open = ct_cpu_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = seq_release_net,
+};
+
+static int __net_init ip_conntrack_net_init(struct net *net)
+{
+ struct proc_dir_entry *proc, *proc_exp, *proc_stat;
+
+ proc = proc_create("ip_conntrack", 0440, net->proc_net, &ct_file_ops);
+ if (!proc)
+ goto err1;
+
+ proc_exp = proc_create("ip_conntrack_expect", 0440, net->proc_net,
+ &ip_exp_file_ops);
+ if (!proc_exp)
+ goto err2;
+
+ proc_stat = proc_create("ip_conntrack", S_IRUGO,
+ net->proc_net_stat, &ct_cpu_seq_fops);
+ if (!proc_stat)
+ goto err3;
+ return 0;
+
+err3:
+ remove_proc_entry("ip_conntrack_expect", net->proc_net);
+err2:
+ remove_proc_entry("ip_conntrack", net->proc_net);
+err1:
+ return -ENOMEM;
+}
+
+static void __net_exit ip_conntrack_net_exit(struct net *net)
+{
+ remove_proc_entry("ip_conntrack", net->proc_net_stat);
+ remove_proc_entry("ip_conntrack_expect", net->proc_net);
+ remove_proc_entry("ip_conntrack", net->proc_net);
+}
+
+static struct pernet_operations ip_conntrack_net_ops = {
+ .init = ip_conntrack_net_init,
+ .exit = ip_conntrack_net_exit,
+};
+
+int __init nf_conntrack_ipv4_compat_init(void)
+{
+ return register_pernet_subsys(&ip_conntrack_net_ops);
+}
+
+void __exit nf_conntrack_ipv4_compat_fini(void)
+{
+ unregister_pernet_subsys(&ip_conntrack_net_ops);
+}
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 52dc175be39..a338dad41b7 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -1,18 +1,13 @@
/* (C) 1999-2001 Paul `Rusty' Russell
* (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2006-2010 Patrick McHardy <kaber@trash.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
- *
- * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
- * - enable working with Layer 3 protocol independent connection tracking.
- *
- * Derived from net/ipv4/netfilter/ip_conntrack_proto_icmp.c
*/
#include <linux/types.h>
-#include <linux/sched.h>
#include <linux/timer.h>
#include <linux/netfilter.h>
#include <linux/in.h>
@@ -22,32 +17,33 @@
#include <net/checksum.h>
#include <linux/netfilter_ipv4.h>
#include <net/netfilter/nf_conntrack_tuple.h>
-#include <net/netfilter/nf_conntrack_protocol.h>
+#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+#include <net/netfilter/nf_log.h>
-unsigned long nf_ct_icmp_timeout = 30*HZ;
+static unsigned int nf_ct_icmp_timeout __read_mostly = 30*HZ;
-#if 0
-#define DEBUGP printk
-#else
-#define DEBUGP(format, args...)
-#endif
+static inline struct nf_icmp_net *icmp_pernet(struct net *net)
+{
+ return &net->ct.nf_ct_proto.icmp;
+}
-static int icmp_pkt_to_tuple(const struct sk_buff *skb,
- unsigned int dataoff,
- struct nf_conntrack_tuple *tuple)
+static bool icmp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
+ struct nf_conntrack_tuple *tuple)
{
- struct icmphdr _hdr, *hp;
+ const struct icmphdr *hp;
+ struct icmphdr _hdr;
hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr);
if (hp == NULL)
- return 0;
+ return false;
tuple->dst.u.icmp.type = hp->type;
tuple->src.u.icmp.id = hp->un.echo.id;
tuple->dst.u.icmp.code = hp->code;
- return 1;
+ return true;
}
/* Add 1; spaces filled with 0. */
@@ -62,17 +58,17 @@ static const u_int8_t invmap[] = {
[ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1
};
-static int icmp_invert_tuple(struct nf_conntrack_tuple *tuple,
- const struct nf_conntrack_tuple *orig)
+static bool icmp_invert_tuple(struct nf_conntrack_tuple *tuple,
+ const struct nf_conntrack_tuple *orig)
{
- if (orig->dst.u.icmp.type >= sizeof(invmap)
- || !invmap[orig->dst.u.icmp.type])
- return 0;
+ if (orig->dst.u.icmp.type >= sizeof(invmap) ||
+ !invmap[orig->dst.u.icmp.type])
+ return false;
tuple->src.u.icmp.id = orig->src.u.icmp.id;
tuple->dst.u.icmp.type = invmap[orig->dst.u.icmp.type] - 1;
tuple->dst.u.icmp.code = orig->dst.u.icmp.code;
- return 1;
+ return true;
}
/* Print out the per-protocol part of the tuple. */
@@ -85,11 +81,9 @@ static int icmp_print_tuple(struct seq_file *s,
ntohs(tuple->src.u.icmp.id));
}
-/* Print out the private part of the conntrack. */
-static int icmp_print_conntrack(struct seq_file *s,
- const struct nf_conn *conntrack)
+static unsigned int *icmp_get_timeouts(struct net *net)
{
- return 0;
+ return &icmp_pernet(net)->timeout;
}
/* Returns verdict for packet, or -1 for invalid. */
@@ -97,29 +91,21 @@ static int icmp_packet(struct nf_conn *ct,
const struct sk_buff *skb,
unsigned int dataoff,
enum ip_conntrack_info ctinfo,
- int pf,
- unsigned int hooknum)
+ u_int8_t pf,
+ unsigned int hooknum,
+ unsigned int *timeout)
{
- /* Try to delete connection immediately after all replies:
- won't actually vanish as we still have skb, and del_timer
- means this will only run once even if count hits zero twice
- (theoretically possible with SMP) */
- if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) {
- if (atomic_dec_and_test(&ct->proto.icmp.count)
- && del_timer(&ct->timeout))
- ct->timeout.function((unsigned long)ct);
- } else {
- atomic_inc(&ct->proto.icmp.count);
- nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb);
- nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmp_timeout);
- }
+ /* Do not immediately delete the connection after the first
+ successful reply to avoid excessive conntrackd traffic
+ and also to handle correctly ICMP echo reply duplicates. */
+ nf_ct_refresh_acct(ct, ctinfo, skb, *timeout);
return NF_ACCEPT;
}
/* Called when a new connection for this protocol found. */
-static int icmp_new(struct nf_conn *conntrack,
- const struct sk_buff *skb, unsigned int dataoff)
+static bool icmp_new(struct nf_conn *ct, const struct sk_buff *skb,
+ unsigned int dataoff, unsigned int *timeouts)
{
static const u_int8_t valid_new[] = {
[ICMP_ECHO] = 1,
@@ -128,137 +114,94 @@ static int icmp_new(struct nf_conn *conntrack,
[ICMP_ADDRESS] = 1
};
- if (conntrack->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new)
- || !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmp.type]) {
+ if (ct->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new) ||
+ !valid_new[ct->tuplehash[0].tuple.dst.u.icmp.type]) {
/* Can't create a new ICMP `conn' with this. */
- DEBUGP("icmp: can't create new conn with type %u\n",
- conntrack->tuplehash[0].tuple.dst.u.icmp.type);
- NF_CT_DUMP_TUPLE(&conntrack->tuplehash[0].tuple);
- return 0;
+ pr_debug("icmp: can't create new conn with type %u\n",
+ ct->tuplehash[0].tuple.dst.u.icmp.type);
+ nf_ct_dump_tuple_ip(&ct->tuplehash[0].tuple);
+ return false;
}
- atomic_set(&conntrack->proto.icmp.count, 0);
- return 1;
+ return true;
}
-extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4;
/* Returns conntrack if it dealt with ICMP, and filled in skb fields */
static int
-icmp_error_message(struct sk_buff *skb,
- enum ip_conntrack_info *ctinfo,
- unsigned int hooknum)
+icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb,
+ enum ip_conntrack_info *ctinfo,
+ unsigned int hooknum)
{
struct nf_conntrack_tuple innertuple, origtuple;
- struct {
- struct icmphdr icmp;
- struct iphdr ip;
- } _in, *inside;
- struct nf_conntrack_protocol *innerproto;
- struct nf_conntrack_tuple_hash *h;
- int dataoff;
+ const struct nf_conntrack_l4proto *innerproto;
+ const struct nf_conntrack_tuple_hash *h;
+ u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE;
NF_CT_ASSERT(skb->nfct == NULL);
- /* Not enough header? */
- inside = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_in), &_in);
- if (inside == NULL)
- return -NF_ACCEPT;
-
- /* Ignore ICMP's containing fragments (shouldn't happen) */
- if (inside->ip.frag_off & htons(IP_OFFSET)) {
- DEBUGP("icmp_error_message: fragment of proto %u\n",
- inside->ip.protocol);
- return -NF_ACCEPT;
- }
-
- innerproto = __nf_ct_proto_find(PF_INET, inside->ip.protocol);
- dataoff = skb->nh.iph->ihl*4 + sizeof(inside->icmp);
/* Are they talking about one of our connections? */
- if (!nf_ct_get_tuple(skb, dataoff, dataoff + inside->ip.ihl*4, PF_INET,
- inside->ip.protocol, &origtuple,
- &nf_conntrack_l3proto_ipv4, innerproto)) {
- DEBUGP("icmp_error_message: ! get_tuple p=%u",
- inside->ip.protocol);
+ if (!nf_ct_get_tuplepr(skb,
+ skb_network_offset(skb) + ip_hdrlen(skb)
+ + sizeof(struct icmphdr),
+ PF_INET, &origtuple)) {
+ pr_debug("icmp_error_message: failed to get tuple\n");
return -NF_ACCEPT;
}
- /* Ordinarily, we'd expect the inverted tupleproto, but it's
- been preserved inside the ICMP. */
- if (!nf_ct_invert_tuple(&innertuple, &origtuple,
+ /* rcu_read_lock()ed by nf_hook_slow */
+ innerproto = __nf_ct_l4proto_find(PF_INET, origtuple.dst.protonum);
+
+ /* Ordinarily, we'd expect the inverted tupleproto, but it's
+ been preserved inside the ICMP. */
+ if (!nf_ct_invert_tuple(&innertuple, &origtuple,
&nf_conntrack_l3proto_ipv4, innerproto)) {
- DEBUGP("icmp_error_message: no match\n");
+ pr_debug("icmp_error_message: no match\n");
return -NF_ACCEPT;
}
*ctinfo = IP_CT_RELATED;
- h = nf_conntrack_find_get(&innertuple, NULL);
+ h = nf_conntrack_find_get(net, zone, &innertuple);
if (!h) {
- /* Locally generated ICMPs will match inverted if they
- haven't been SNAT'ed yet */
- /* FIXME: NAT code has to handle half-done double NAT --RR */
- if (hooknum == NF_IP_LOCAL_OUT)
- h = nf_conntrack_find_get(&origtuple, NULL);
-
- if (!h) {
- DEBUGP("icmp_error_message: no match\n");
- return -NF_ACCEPT;
- }
-
- /* Reverse direction from that found */
- if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY)
- *ctinfo += IP_CT_IS_REPLY;
- } else {
- if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY)
- *ctinfo += IP_CT_IS_REPLY;
+ pr_debug("icmp_error_message: no match\n");
+ return -NF_ACCEPT;
}
- /* Update skb to refer to this connection */
- skb->nfct = &nf_ct_tuplehash_to_ctrack(h)->ct_general;
- skb->nfctinfo = *ctinfo;
- return -NF_ACCEPT;
+ if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY)
+ *ctinfo += IP_CT_IS_REPLY;
+
+ /* Update skb to refer to this connection */
+ skb->nfct = &nf_ct_tuplehash_to_ctrack(h)->ct_general;
+ skb->nfctinfo = *ctinfo;
+ return NF_ACCEPT;
}
/* Small and modified version of icmp_rcv */
static int
-icmp_error(struct sk_buff *skb, unsigned int dataoff,
- enum ip_conntrack_info *ctinfo, int pf, unsigned int hooknum)
+icmp_error(struct net *net, struct nf_conn *tmpl,
+ struct sk_buff *skb, unsigned int dataoff,
+ enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum)
{
- struct icmphdr _ih, *icmph;
+ const struct icmphdr *icmph;
+ struct icmphdr _ih;
/* Not enough header? */
- icmph = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_ih), &_ih);
+ icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih);
if (icmph == NULL) {
- if (LOG_INVALID(IPPROTO_ICMP))
- nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
- "nf_ct_icmp: short packet ");
+ if (LOG_INVALID(net, IPPROTO_ICMP))
+ nf_log_packet(net, PF_INET, 0, skb, NULL, NULL,
+ NULL, "nf_ct_icmp: short packet ");
return -NF_ACCEPT;
}
/* See ip_conntrack_proto_tcp.c */
- if (hooknum != NF_IP_PRE_ROUTING)
- goto checksum_skipped;
-
- switch (skb->ip_summed) {
- case CHECKSUM_HW:
- if (!(u16)csum_fold(skb->csum))
- break;
- if (LOG_INVALID(IPPROTO_ICMP))
- nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
+ if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
+ nf_ip_checksum(skb, hooknum, dataoff, 0)) {
+ if (LOG_INVALID(net, IPPROTO_ICMP))
+ nf_log_packet(net, PF_INET, 0, skb, NULL, NULL, NULL,
"nf_ct_icmp: bad HW ICMP checksum ");
return -NF_ACCEPT;
- case CHECKSUM_NONE:
- if ((u16)csum_fold(skb_checksum(skb, 0, skb->len, 0))) {
- if (LOG_INVALID(IPPROTO_ICMP))
- nf_log_packet(PF_INET, 0, skb, NULL, NULL,
- NULL,
- "nf_ct_icmp: bad ICMP checksum ");
- return -NF_ACCEPT;
- }
- default:
- break;
}
-checksum_skipped:
/*
* 18 is the highest 'known' ICMP type. Anything else is a mystery
*
@@ -266,97 +209,220 @@ checksum_skipped:
* discarded.
*/
if (icmph->type > NR_ICMP_TYPES) {
- if (LOG_INVALID(IPPROTO_ICMP))
- nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
+ if (LOG_INVALID(net, IPPROTO_ICMP))
+ nf_log_packet(net, PF_INET, 0, skb, NULL, NULL, NULL,
"nf_ct_icmp: invalid ICMP type ");
return -NF_ACCEPT;
}
/* Need to track icmp error message? */
- if (icmph->type != ICMP_DEST_UNREACH
- && icmph->type != ICMP_SOURCE_QUENCH
- && icmph->type != ICMP_TIME_EXCEEDED
- && icmph->type != ICMP_PARAMETERPROB
- && icmph->type != ICMP_REDIRECT)
+ if (icmph->type != ICMP_DEST_UNREACH &&
+ icmph->type != ICMP_SOURCE_QUENCH &&
+ icmph->type != ICMP_TIME_EXCEEDED &&
+ icmph->type != ICMP_PARAMETERPROB &&
+ icmph->type != ICMP_REDIRECT)
return NF_ACCEPT;
- return icmp_error_message(skb, ctinfo, hooknum);
+ return icmp_error_message(net, tmpl, skb, ctinfo, hooknum);
}
-#if defined(CONFIG_NF_CT_NETLINK) || \
- defined(CONFIG_NF_CT_NETLINK_MODULE)
+#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
#include <linux/netfilter/nfnetlink.h>
#include <linux/netfilter/nfnetlink_conntrack.h>
-static int icmp_tuple_to_nfattr(struct sk_buff *skb,
+static int icmp_tuple_to_nlattr(struct sk_buff *skb,
const struct nf_conntrack_tuple *t)
{
- NFA_PUT(skb, CTA_PROTO_ICMP_ID, sizeof(u_int16_t),
- &t->src.u.icmp.id);
- NFA_PUT(skb, CTA_PROTO_ICMP_TYPE, sizeof(u_int8_t),
- &t->dst.u.icmp.type);
- NFA_PUT(skb, CTA_PROTO_ICMP_CODE, sizeof(u_int8_t),
- &t->dst.u.icmp.code);
-
+ if (nla_put_be16(skb, CTA_PROTO_ICMP_ID, t->src.u.icmp.id) ||
+ nla_put_u8(skb, CTA_PROTO_ICMP_TYPE, t->dst.u.icmp.type) ||
+ nla_put_u8(skb, CTA_PROTO_ICMP_CODE, t->dst.u.icmp.code))
+ goto nla_put_failure;
return 0;
-nfattr_failure:
+nla_put_failure:
return -1;
}
-static const size_t cta_min_proto[CTA_PROTO_MAX] = {
- [CTA_PROTO_ICMP_TYPE-1] = sizeof(u_int8_t),
- [CTA_PROTO_ICMP_CODE-1] = sizeof(u_int8_t),
- [CTA_PROTO_ICMP_ID-1] = sizeof(u_int16_t)
+static const struct nla_policy icmp_nla_policy[CTA_PROTO_MAX+1] = {
+ [CTA_PROTO_ICMP_TYPE] = { .type = NLA_U8 },
+ [CTA_PROTO_ICMP_CODE] = { .type = NLA_U8 },
+ [CTA_PROTO_ICMP_ID] = { .type = NLA_U16 },
};
-static int icmp_nfattr_to_tuple(struct nfattr *tb[],
+static int icmp_nlattr_to_tuple(struct nlattr *tb[],
struct nf_conntrack_tuple *tuple)
{
- if (!tb[CTA_PROTO_ICMP_TYPE-1]
- || !tb[CTA_PROTO_ICMP_CODE-1]
- || !tb[CTA_PROTO_ICMP_ID-1])
+ if (!tb[CTA_PROTO_ICMP_TYPE] ||
+ !tb[CTA_PROTO_ICMP_CODE] ||
+ !tb[CTA_PROTO_ICMP_ID])
return -EINVAL;
- if (nfattr_bad_size(tb, CTA_PROTO_MAX, cta_min_proto))
+ tuple->dst.u.icmp.type = nla_get_u8(tb[CTA_PROTO_ICMP_TYPE]);
+ tuple->dst.u.icmp.code = nla_get_u8(tb[CTA_PROTO_ICMP_CODE]);
+ tuple->src.u.icmp.id = nla_get_be16(tb[CTA_PROTO_ICMP_ID]);
+
+ if (tuple->dst.u.icmp.type >= sizeof(invmap) ||
+ !invmap[tuple->dst.u.icmp.type])
return -EINVAL;
- tuple->dst.u.icmp.type =
- *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_TYPE-1]);
- tuple->dst.u.icmp.code =
- *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_CODE-1]);
- tuple->src.u.icmp.id =
- *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_ICMP_ID-1]);
+ return 0;
+}
- if (tuple->dst.u.icmp.type >= sizeof(invmap)
- || !invmap[tuple->dst.u.icmp.type])
- return -EINVAL;
+static int icmp_nlattr_tuple_size(void)
+{
+ return nla_policy_len(icmp_nla_policy, CTA_PROTO_MAX + 1);
+}
+#endif
+
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
+
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_cttimeout.h>
+
+static int icmp_timeout_nlattr_to_obj(struct nlattr *tb[],
+ struct net *net, void *data)
+{
+ unsigned int *timeout = data;
+ struct nf_icmp_net *in = icmp_pernet(net);
+
+ if (tb[CTA_TIMEOUT_ICMP_TIMEOUT]) {
+ *timeout =
+ ntohl(nla_get_be32(tb[CTA_TIMEOUT_ICMP_TIMEOUT])) * HZ;
+ } else {
+ /* Set default ICMP timeout. */
+ *timeout = in->timeout;
+ }
+ return 0;
+}
+static int
+icmp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data)
+{
+ const unsigned int *timeout = data;
+
+ if (nla_put_be32(skb, CTA_TIMEOUT_ICMP_TIMEOUT, htonl(*timeout / HZ)))
+ goto nla_put_failure;
return 0;
+
+nla_put_failure:
+ return -ENOSPC;
}
+
+static const struct nla_policy
+icmp_timeout_nla_policy[CTA_TIMEOUT_ICMP_MAX+1] = {
+ [CTA_TIMEOUT_ICMP_TIMEOUT] = { .type = NLA_U32 },
+};
+#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
+
+#ifdef CONFIG_SYSCTL
+static struct ctl_table icmp_sysctl_table[] = {
+ {
+ .procname = "nf_conntrack_icmp_timeout",
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ { }
+};
+#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
+static struct ctl_table icmp_compat_sysctl_table[] = {
+ {
+ .procname = "ip_conntrack_icmp_timeout",
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_jiffies,
+ },
+ { }
+};
+#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */
+#endif /* CONFIG_SYSCTL */
+
+static int icmp_kmemdup_sysctl_table(struct nf_proto_net *pn,
+ struct nf_icmp_net *in)
+{
+#ifdef CONFIG_SYSCTL
+ pn->ctl_table = kmemdup(icmp_sysctl_table,
+ sizeof(icmp_sysctl_table),
+ GFP_KERNEL);
+ if (!pn->ctl_table)
+ return -ENOMEM;
+
+ pn->ctl_table[0].data = &in->timeout;
#endif
+ return 0;
+}
-struct nf_conntrack_protocol nf_conntrack_protocol_icmp =
+static int icmp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn,
+ struct nf_icmp_net *in)
+{
+#ifdef CONFIG_SYSCTL
+#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT
+ pn->ctl_compat_table = kmemdup(icmp_compat_sysctl_table,
+ sizeof(icmp_compat_sysctl_table),
+ GFP_KERNEL);
+ if (!pn->ctl_compat_table)
+ return -ENOMEM;
+
+ pn->ctl_compat_table[0].data = &in->timeout;
+#endif
+#endif
+ return 0;
+}
+
+static int icmp_init_net(struct net *net, u_int16_t proto)
+{
+ int ret;
+ struct nf_icmp_net *in = icmp_pernet(net);
+ struct nf_proto_net *pn = &in->pn;
+
+ in->timeout = nf_ct_icmp_timeout;
+
+ ret = icmp_kmemdup_compat_sysctl_table(pn, in);
+ if (ret < 0)
+ return ret;
+
+ ret = icmp_kmemdup_sysctl_table(pn, in);
+ if (ret < 0)
+ nf_ct_kfree_compat_sysctl_table(pn);
+
+ return ret;
+}
+
+static struct nf_proto_net *icmp_get_net_proto(struct net *net)
+{
+ return &net->ct.nf_ct_proto.icmp.pn;
+}
+
+struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly =
{
- .list = { NULL, NULL },
.l3proto = PF_INET,
- .proto = IPPROTO_ICMP,
+ .l4proto = IPPROTO_ICMP,
.name = "icmp",
.pkt_to_tuple = icmp_pkt_to_tuple,
.invert_tuple = icmp_invert_tuple,
.print_tuple = icmp_print_tuple,
- .print_conntrack = icmp_print_conntrack,
.packet = icmp_packet,
+ .get_timeouts = icmp_get_timeouts,
.new = icmp_new,
.error = icmp_error,
.destroy = NULL,
.me = NULL,
-#if defined(CONFIG_NF_CT_NETLINK) || \
- defined(CONFIG_NF_CT_NETLINK_MODULE)
- .tuple_to_nfattr = icmp_tuple_to_nfattr,
- .nfattr_to_tuple = icmp_nfattr_to_tuple,
+#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+ .tuple_to_nlattr = icmp_tuple_to_nlattr,
+ .nlattr_tuple_size = icmp_nlattr_tuple_size,
+ .nlattr_to_tuple = icmp_nlattr_to_tuple,
+ .nla_policy = icmp_nla_policy,
#endif
+#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT)
+ .ctnl_timeout = {
+ .nlattr_to_obj = icmp_timeout_nlattr_to_obj,
+ .obj_to_nlattr = icmp_timeout_obj_to_nlattr,
+ .nlattr_max = CTA_TIMEOUT_ICMP_MAX,
+ .obj_size = sizeof(unsigned int),
+ .nla_policy = icmp_timeout_nla_policy,
+ },
+#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */
+ .init_net = icmp_init_net,
+ .get_net_proto = icmp_get_net_proto,
};
-
-EXPORT_SYMBOL(nf_conntrack_protocol_icmp);
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
new file mode 100644
index 00000000000..b8f6381c7d0
--- /dev/null
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -0,0 +1,131 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/ip.h>
+#include <linux/netfilter.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <net/route.h>
+#include <net/ip.h>
+
+#include <linux/netfilter_bridge.h>
+#include <linux/netfilter_ipv4.h>
+#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+#include <net/netfilter/nf_conntrack.h>
+#endif
+#include <net/netfilter/nf_conntrack_zones.h>
+
+static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
+{
+ int err;
+
+ skb_orphan(skb);
+
+ local_bh_disable();
+ err = ip_defrag(skb, user);
+ local_bh_enable();
+
+ if (!err) {
+ ip_send_check(ip_hdr(skb));
+ skb->ignore_df = 1;
+ }
+
+ return err;
+}
+
+static enum ip_defrag_users nf_ct_defrag_user(unsigned int hooknum,
+ struct sk_buff *skb)
+{
+ u16 zone = NF_CT_DEFAULT_ZONE;
+
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+ if (skb->nfct)
+ zone = nf_ct_zone((struct nf_conn *)skb->nfct);
+#endif
+
+#ifdef CONFIG_BRIDGE_NETFILTER
+ if (skb->nf_bridge &&
+ skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING)
+ return IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone;
+#endif
+ if (hooknum == NF_INET_PRE_ROUTING)
+ return IP_DEFRAG_CONNTRACK_IN + zone;
+ else
+ return IP_DEFRAG_CONNTRACK_OUT + zone;
+}
+
+static unsigned int ipv4_conntrack_defrag(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ struct sock *sk = skb->sk;
+ struct inet_sock *inet = inet_sk(skb->sk);
+
+ if (sk && (sk->sk_family == PF_INET) &&
+ inet->nodefrag)
+ return NF_ACCEPT;
+
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+#if !defined(CONFIG_NF_NAT) && !defined(CONFIG_NF_NAT_MODULE)
+ /* Previously seen (loopback)? Ignore. Do this before
+ fragment check. */
+ if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct))
+ return NF_ACCEPT;
+#endif
+#endif
+ /* Gather fragments. */
+ if (ip_is_fragment(ip_hdr(skb))) {
+ enum ip_defrag_users user =
+ nf_ct_defrag_user(ops->hooknum, skb);
+
+ if (nf_ct_ipv4_gather_frags(skb, user))
+ return NF_STOLEN;
+ }
+ return NF_ACCEPT;
+}
+
+static struct nf_hook_ops ipv4_defrag_ops[] = {
+ {
+ .hook = ipv4_conntrack_defrag,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV4,
+ .hooknum = NF_INET_PRE_ROUTING,
+ .priority = NF_IP_PRI_CONNTRACK_DEFRAG,
+ },
+ {
+ .hook = ipv4_conntrack_defrag,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV4,
+ .hooknum = NF_INET_LOCAL_OUT,
+ .priority = NF_IP_PRI_CONNTRACK_DEFRAG,
+ },
+};
+
+static int __init nf_defrag_init(void)
+{
+ return nf_register_hooks(ipv4_defrag_ops, ARRAY_SIZE(ipv4_defrag_ops));
+}
+
+static void __exit nf_defrag_fini(void)
+{
+ nf_unregister_hooks(ipv4_defrag_ops, ARRAY_SIZE(ipv4_defrag_ops));
+}
+
+void nf_defrag_ipv4_enable(void)
+{
+}
+EXPORT_SYMBOL_GPL(nf_defrag_ipv4_enable);
+
+module_init(nf_defrag_init);
+module_exit(nf_defrag_fini);
+
+MODULE_LICENSE("GPL");
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
new file mode 100644
index 00000000000..574f7ebba0b
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -0,0 +1,631 @@
+/*
+ * H.323 extension for NAT alteration.
+ *
+ * Copyright (c) 2006 Jing Min Zhao <zhaojingmin@users.sourceforge.net>
+ * Copyright (c) 2006-2012 Patrick McHardy <kaber@trash.net>
+ *
+ * This source code is licensed under General Public License version 2.
+ *
+ * Based on the 'brute force' H.323 NAT module by
+ * Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
+ */
+
+#include <linux/module.h>
+#include <linux/tcp.h>
+#include <net/tcp.h>
+
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_helper.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <linux/netfilter/nf_conntrack_h323.h>
+
+/****************************************************************************/
+static int set_addr(struct sk_buff *skb, unsigned int protoff,
+ unsigned char **data, int dataoff,
+ unsigned int addroff, __be32 ip, __be16 port)
+{
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+ struct {
+ __be32 ip;
+ __be16 port;
+ } __attribute__ ((__packed__)) buf;
+ const struct tcphdr *th;
+ struct tcphdr _tcph;
+
+ buf.ip = ip;
+ buf.port = port;
+ addroff += dataoff;
+
+ if (ip_hdr(skb)->protocol == IPPROTO_TCP) {
+ if (!nf_nat_mangle_tcp_packet(skb, ct, ctinfo,
+ protoff, addroff, sizeof(buf),
+ (char *) &buf, sizeof(buf))) {
+ net_notice_ratelimited("nf_nat_h323: nf_nat_mangle_tcp_packet error\n");
+ return -1;
+ }
+
+ /* Relocate data pointer */
+ th = skb_header_pointer(skb, ip_hdrlen(skb),
+ sizeof(_tcph), &_tcph);
+ if (th == NULL)
+ return -1;
+ *data = skb->data + ip_hdrlen(skb) + th->doff * 4 + dataoff;
+ } else {
+ if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo,
+ protoff, addroff, sizeof(buf),
+ (char *) &buf, sizeof(buf))) {
+ net_notice_ratelimited("nf_nat_h323: nf_nat_mangle_udp_packet error\n");
+ return -1;
+ }
+ /* nf_nat_mangle_udp_packet uses skb_make_writable() to copy
+ * or pull everything in a linear buffer, so we can safely
+ * use the skb pointers now */
+ *data = skb->data + ip_hdrlen(skb) + sizeof(struct udphdr);
+ }
+
+ return 0;
+}
+
+/****************************************************************************/
+static int set_h225_addr(struct sk_buff *skb, unsigned int protoff,
+ unsigned char **data, int dataoff,
+ TransportAddress *taddr,
+ union nf_inet_addr *addr, __be16 port)
+{
+ return set_addr(skb, protoff, data, dataoff, taddr->ipAddress.ip,
+ addr->ip, port);
+}
+
+/****************************************************************************/
+static int set_h245_addr(struct sk_buff *skb, unsigned protoff,
+ unsigned char **data, int dataoff,
+ H245_TransportAddress *taddr,
+ union nf_inet_addr *addr, __be16 port)
+{
+ return set_addr(skb, protoff, data, dataoff,
+ taddr->unicastAddress.iPAddress.network,
+ addr->ip, port);
+}
+
+/****************************************************************************/
+static int set_sig_addr(struct sk_buff *skb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned int protoff, unsigned char **data,
+ TransportAddress *taddr, int count)
+{
+ const struct nf_ct_h323_master *info = nfct_help_data(ct);
+ int dir = CTINFO2DIR(ctinfo);
+ int i;
+ __be16 port;
+ union nf_inet_addr addr;
+
+ for (i = 0; i < count; i++) {
+ if (get_h225_addr(ct, *data, &taddr[i], &addr, &port)) {
+ if (addr.ip == ct->tuplehash[dir].tuple.src.u3.ip &&
+ port == info->sig_port[dir]) {
+ /* GW->GK */
+
+ /* Fix for Gnomemeeting */
+ if (i > 0 &&
+ get_h225_addr(ct, *data, &taddr[0],
+ &addr, &port) &&
+ (ntohl(addr.ip) & 0xff000000) == 0x7f000000)
+ i = 0;
+
+ pr_debug("nf_nat_ras: set signal address %pI4:%hu->%pI4:%hu\n",
+ &addr.ip, port,
+ &ct->tuplehash[!dir].tuple.dst.u3.ip,
+ info->sig_port[!dir]);
+ return set_h225_addr(skb, protoff, data, 0,
+ &taddr[i],
+ &ct->tuplehash[!dir].
+ tuple.dst.u3,
+ info->sig_port[!dir]);
+ } else if (addr.ip == ct->tuplehash[dir].tuple.dst.u3.ip &&
+ port == info->sig_port[dir]) {
+ /* GK->GW */
+ pr_debug("nf_nat_ras: set signal address %pI4:%hu->%pI4:%hu\n",
+ &addr.ip, port,
+ &ct->tuplehash[!dir].tuple.src.u3.ip,
+ info->sig_port[!dir]);
+ return set_h225_addr(skb, protoff, data, 0,
+ &taddr[i],
+ &ct->tuplehash[!dir].
+ tuple.src.u3,
+ info->sig_port[!dir]);
+ }
+ }
+ }
+
+ return 0;
+}
+
+/****************************************************************************/
+static int set_ras_addr(struct sk_buff *skb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned int protoff, unsigned char **data,
+ TransportAddress *taddr, int count)
+{
+ int dir = CTINFO2DIR(ctinfo);
+ int i;
+ __be16 port;
+ union nf_inet_addr addr;
+
+ for (i = 0; i < count; i++) {
+ if (get_h225_addr(ct, *data, &taddr[i], &addr, &port) &&
+ addr.ip == ct->tuplehash[dir].tuple.src.u3.ip &&
+ port == ct->tuplehash[dir].tuple.src.u.udp.port) {
+ pr_debug("nf_nat_ras: set rasAddress %pI4:%hu->%pI4:%hu\n",
+ &addr.ip, ntohs(port),
+ &ct->tuplehash[!dir].tuple.dst.u3.ip,
+ ntohs(ct->tuplehash[!dir].tuple.dst.u.udp.port));
+ return set_h225_addr(skb, protoff, data, 0, &taddr[i],
+ &ct->tuplehash[!dir].tuple.dst.u3,
+ ct->tuplehash[!dir].tuple.
+ dst.u.udp.port);
+ }
+ }
+
+ return 0;
+}
+
+/****************************************************************************/
+static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned int protoff, unsigned char **data, int dataoff,
+ H245_TransportAddress *taddr,
+ __be16 port, __be16 rtp_port,
+ struct nf_conntrack_expect *rtp_exp,
+ struct nf_conntrack_expect *rtcp_exp)
+{
+ struct nf_ct_h323_master *info = nfct_help_data(ct);
+ int dir = CTINFO2DIR(ctinfo);
+ int i;
+ u_int16_t nated_port;
+
+ /* Set expectations for NAT */
+ rtp_exp->saved_proto.udp.port = rtp_exp->tuple.dst.u.udp.port;
+ rtp_exp->expectfn = nf_nat_follow_master;
+ rtp_exp->dir = !dir;
+ rtcp_exp->saved_proto.udp.port = rtcp_exp->tuple.dst.u.udp.port;
+ rtcp_exp->expectfn = nf_nat_follow_master;
+ rtcp_exp->dir = !dir;
+
+ /* Lookup existing expects */
+ for (i = 0; i < H323_RTP_CHANNEL_MAX; i++) {
+ if (info->rtp_port[i][dir] == rtp_port) {
+ /* Expected */
+
+ /* Use allocated ports first. This will refresh
+ * the expects */
+ rtp_exp->tuple.dst.u.udp.port = info->rtp_port[i][dir];
+ rtcp_exp->tuple.dst.u.udp.port =
+ htons(ntohs(info->rtp_port[i][dir]) + 1);
+ break;
+ } else if (info->rtp_port[i][dir] == 0) {
+ /* Not expected */
+ break;
+ }
+ }
+
+ /* Run out of expectations */
+ if (i >= H323_RTP_CHANNEL_MAX) {
+ net_notice_ratelimited("nf_nat_h323: out of expectations\n");
+ return 0;
+ }
+
+ /* Try to get a pair of ports. */
+ for (nated_port = ntohs(rtp_exp->tuple.dst.u.udp.port);
+ nated_port != 0; nated_port += 2) {
+ int ret;
+
+ rtp_exp->tuple.dst.u.udp.port = htons(nated_port);
+ ret = nf_ct_expect_related(rtp_exp);
+ if (ret == 0) {
+ rtcp_exp->tuple.dst.u.udp.port =
+ htons(nated_port + 1);
+ ret = nf_ct_expect_related(rtcp_exp);
+ if (ret == 0)
+ break;
+ else if (ret == -EBUSY) {
+ nf_ct_unexpect_related(rtp_exp);
+ continue;
+ } else if (ret < 0) {
+ nf_ct_unexpect_related(rtp_exp);
+ nated_port = 0;
+ break;
+ }
+ } else if (ret != -EBUSY) {
+ nated_port = 0;
+ break;
+ }
+ }
+
+ if (nated_port == 0) { /* No port available */
+ net_notice_ratelimited("nf_nat_h323: out of RTP ports\n");
+ return 0;
+ }
+
+ /* Modify signal */
+ if (set_h245_addr(skb, protoff, data, dataoff, taddr,
+ &ct->tuplehash[!dir].tuple.dst.u3,
+ htons((port & htons(1)) ? nated_port + 1 :
+ nated_port)) == 0) {
+ /* Save ports */
+ info->rtp_port[i][dir] = rtp_port;
+ info->rtp_port[i][!dir] = htons(nated_port);
+ } else {
+ nf_ct_unexpect_related(rtp_exp);
+ nf_ct_unexpect_related(rtcp_exp);
+ return -1;
+ }
+
+ /* Success */
+ pr_debug("nf_nat_h323: expect RTP %pI4:%hu->%pI4:%hu\n",
+ &rtp_exp->tuple.src.u3.ip,
+ ntohs(rtp_exp->tuple.src.u.udp.port),
+ &rtp_exp->tuple.dst.u3.ip,
+ ntohs(rtp_exp->tuple.dst.u.udp.port));
+ pr_debug("nf_nat_h323: expect RTCP %pI4:%hu->%pI4:%hu\n",
+ &rtcp_exp->tuple.src.u3.ip,
+ ntohs(rtcp_exp->tuple.src.u.udp.port),
+ &rtcp_exp->tuple.dst.u3.ip,
+ ntohs(rtcp_exp->tuple.dst.u.udp.port));
+
+ return 0;
+}
+
+/****************************************************************************/
+static int nat_t120(struct sk_buff *skb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned int protoff, unsigned char **data, int dataoff,
+ H245_TransportAddress *taddr, __be16 port,
+ struct nf_conntrack_expect *exp)
+{
+ int dir = CTINFO2DIR(ctinfo);
+ u_int16_t nated_port = ntohs(port);
+
+ /* Set expectations for NAT */
+ exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
+ exp->expectfn = nf_nat_follow_master;
+ exp->dir = !dir;
+
+ /* Try to get same port: if not, try to change it. */
+ for (; nated_port != 0; nated_port++) {
+ int ret;
+
+ exp->tuple.dst.u.tcp.port = htons(nated_port);
+ ret = nf_ct_expect_related(exp);
+ if (ret == 0)
+ break;
+ else if (ret != -EBUSY) {
+ nated_port = 0;
+ break;
+ }
+ }
+
+ if (nated_port == 0) { /* No port available */
+ net_notice_ratelimited("nf_nat_h323: out of TCP ports\n");
+ return 0;
+ }
+
+ /* Modify signal */
+ if (set_h245_addr(skb, protoff, data, dataoff, taddr,
+ &ct->tuplehash[!dir].tuple.dst.u3,
+ htons(nated_port)) < 0) {
+ nf_ct_unexpect_related(exp);
+ return -1;
+ }
+
+ pr_debug("nf_nat_h323: expect T.120 %pI4:%hu->%pI4:%hu\n",
+ &exp->tuple.src.u3.ip,
+ ntohs(exp->tuple.src.u.tcp.port),
+ &exp->tuple.dst.u3.ip,
+ ntohs(exp->tuple.dst.u.tcp.port));
+
+ return 0;
+}
+
+/****************************************************************************/
+static int nat_h245(struct sk_buff *skb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned int protoff, unsigned char **data, int dataoff,
+ TransportAddress *taddr, __be16 port,
+ struct nf_conntrack_expect *exp)
+{
+ struct nf_ct_h323_master *info = nfct_help_data(ct);
+ int dir = CTINFO2DIR(ctinfo);
+ u_int16_t nated_port = ntohs(port);
+
+ /* Set expectations for NAT */
+ exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
+ exp->expectfn = nf_nat_follow_master;
+ exp->dir = !dir;
+
+ /* Check existing expects */
+ if (info->sig_port[dir] == port)
+ nated_port = ntohs(info->sig_port[!dir]);
+
+ /* Try to get same port: if not, try to change it. */
+ for (; nated_port != 0; nated_port++) {
+ int ret;
+
+ exp->tuple.dst.u.tcp.port = htons(nated_port);
+ ret = nf_ct_expect_related(exp);
+ if (ret == 0)
+ break;
+ else if (ret != -EBUSY) {
+ nated_port = 0;
+ break;
+ }
+ }
+
+ if (nated_port == 0) { /* No port available */
+ net_notice_ratelimited("nf_nat_q931: out of TCP ports\n");
+ return 0;
+ }
+
+ /* Modify signal */
+ if (set_h225_addr(skb, protoff, data, dataoff, taddr,
+ &ct->tuplehash[!dir].tuple.dst.u3,
+ htons(nated_port)) == 0) {
+ /* Save ports */
+ info->sig_port[dir] = port;
+ info->sig_port[!dir] = htons(nated_port);
+ } else {
+ nf_ct_unexpect_related(exp);
+ return -1;
+ }
+
+ pr_debug("nf_nat_q931: expect H.245 %pI4:%hu->%pI4:%hu\n",
+ &exp->tuple.src.u3.ip,
+ ntohs(exp->tuple.src.u.tcp.port),
+ &exp->tuple.dst.u3.ip,
+ ntohs(exp->tuple.dst.u.tcp.port));
+
+ return 0;
+}
+
+/****************************************************************************
+ * This conntrack expect function replaces nf_conntrack_q931_expect()
+ * which was set by nf_conntrack_h323.c.
+ ****************************************************************************/
+static void ip_nat_q931_expect(struct nf_conn *new,
+ struct nf_conntrack_expect *this)
+{
+ struct nf_nat_range range;
+
+ if (this->tuple.src.u3.ip != 0) { /* Only accept calls from GK */
+ nf_nat_follow_master(new, this);
+ return;
+ }
+
+ /* This must be a fresh one. */
+ BUG_ON(new->status & IPS_NAT_DONE_MASK);
+
+ /* Change src to where master sends to */
+ range.flags = NF_NAT_RANGE_MAP_IPS;
+ range.min_addr = range.max_addr =
+ new->tuplehash[!this->dir].tuple.src.u3;
+ nf_nat_setup_info(new, &range, NF_NAT_MANIP_SRC);
+
+ /* For DST manip, map port here to where it's expected. */
+ range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED);
+ range.min_proto = range.max_proto = this->saved_proto;
+ range.min_addr = range.max_addr =
+ new->master->tuplehash[!this->dir].tuple.src.u3;
+ nf_nat_setup_info(new, &range, NF_NAT_MANIP_DST);
+}
+
+/****************************************************************************/
+static int nat_q931(struct sk_buff *skb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned int protoff, unsigned char **data,
+ TransportAddress *taddr, int idx,
+ __be16 port, struct nf_conntrack_expect *exp)
+{
+ struct nf_ct_h323_master *info = nfct_help_data(ct);
+ int dir = CTINFO2DIR(ctinfo);
+ u_int16_t nated_port = ntohs(port);
+ union nf_inet_addr addr;
+
+ /* Set expectations for NAT */
+ exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
+ exp->expectfn = ip_nat_q931_expect;
+ exp->dir = !dir;
+
+ /* Check existing expects */
+ if (info->sig_port[dir] == port)
+ nated_port = ntohs(info->sig_port[!dir]);
+
+ /* Try to get same port: if not, try to change it. */
+ for (; nated_port != 0; nated_port++) {
+ int ret;
+
+ exp->tuple.dst.u.tcp.port = htons(nated_port);
+ ret = nf_ct_expect_related(exp);
+ if (ret == 0)
+ break;
+ else if (ret != -EBUSY) {
+ nated_port = 0;
+ break;
+ }
+ }
+
+ if (nated_port == 0) { /* No port available */
+ net_notice_ratelimited("nf_nat_ras: out of TCP ports\n");
+ return 0;
+ }
+
+ /* Modify signal */
+ if (set_h225_addr(skb, protoff, data, 0, &taddr[idx],
+ &ct->tuplehash[!dir].tuple.dst.u3,
+ htons(nated_port)) == 0) {
+ /* Save ports */
+ info->sig_port[dir] = port;
+ info->sig_port[!dir] = htons(nated_port);
+
+ /* Fix for Gnomemeeting */
+ if (idx > 0 &&
+ get_h225_addr(ct, *data, &taddr[0], &addr, &port) &&
+ (ntohl(addr.ip) & 0xff000000) == 0x7f000000) {
+ set_h225_addr(skb, protoff, data, 0, &taddr[0],
+ &ct->tuplehash[!dir].tuple.dst.u3,
+ info->sig_port[!dir]);
+ }
+ } else {
+ nf_ct_unexpect_related(exp);
+ return -1;
+ }
+
+ /* Success */
+ pr_debug("nf_nat_ras: expect Q.931 %pI4:%hu->%pI4:%hu\n",
+ &exp->tuple.src.u3.ip,
+ ntohs(exp->tuple.src.u.tcp.port),
+ &exp->tuple.dst.u3.ip,
+ ntohs(exp->tuple.dst.u.tcp.port));
+
+ return 0;
+}
+
+/****************************************************************************/
+static void ip_nat_callforwarding_expect(struct nf_conn *new,
+ struct nf_conntrack_expect *this)
+{
+ struct nf_nat_range range;
+
+ /* This must be a fresh one. */
+ BUG_ON(new->status & IPS_NAT_DONE_MASK);
+
+ /* Change src to where master sends to */
+ range.flags = NF_NAT_RANGE_MAP_IPS;
+ range.min_addr = range.max_addr =
+ new->tuplehash[!this->dir].tuple.src.u3;
+ nf_nat_setup_info(new, &range, NF_NAT_MANIP_SRC);
+
+ /* For DST manip, map port here to where it's expected. */
+ range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED);
+ range.min_proto = range.max_proto = this->saved_proto;
+ range.min_addr = range.max_addr = this->saved_addr;
+ nf_nat_setup_info(new, &range, NF_NAT_MANIP_DST);
+}
+
+/****************************************************************************/
+static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned int protoff,
+ unsigned char **data, int dataoff,
+ TransportAddress *taddr, __be16 port,
+ struct nf_conntrack_expect *exp)
+{
+ int dir = CTINFO2DIR(ctinfo);
+ u_int16_t nated_port;
+
+ /* Set expectations for NAT */
+ exp->saved_addr = exp->tuple.dst.u3;
+ exp->tuple.dst.u3.ip = ct->tuplehash[!dir].tuple.dst.u3.ip;
+ exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port;
+ exp->expectfn = ip_nat_callforwarding_expect;
+ exp->dir = !dir;
+
+ /* Try to get same port: if not, try to change it. */
+ for (nated_port = ntohs(port); nated_port != 0; nated_port++) {
+ int ret;
+
+ exp->tuple.dst.u.tcp.port = htons(nated_port);
+ ret = nf_ct_expect_related(exp);
+ if (ret == 0)
+ break;
+ else if (ret != -EBUSY) {
+ nated_port = 0;
+ break;
+ }
+ }
+
+ if (nated_port == 0) { /* No port available */
+ net_notice_ratelimited("nf_nat_q931: out of TCP ports\n");
+ return 0;
+ }
+
+ /* Modify signal */
+ if (!set_h225_addr(skb, protoff, data, dataoff, taddr,
+ &ct->tuplehash[!dir].tuple.dst.u3,
+ htons(nated_port)) == 0) {
+ nf_ct_unexpect_related(exp);
+ return -1;
+ }
+
+ /* Success */
+ pr_debug("nf_nat_q931: expect Call Forwarding %pI4:%hu->%pI4:%hu\n",
+ &exp->tuple.src.u3.ip,
+ ntohs(exp->tuple.src.u.tcp.port),
+ &exp->tuple.dst.u3.ip,
+ ntohs(exp->tuple.dst.u.tcp.port));
+
+ return 0;
+}
+
+static struct nf_ct_helper_expectfn q931_nat = {
+ .name = "Q.931",
+ .expectfn = ip_nat_q931_expect,
+};
+
+static struct nf_ct_helper_expectfn callforwarding_nat = {
+ .name = "callforwarding",
+ .expectfn = ip_nat_callforwarding_expect,
+};
+
+/****************************************************************************/
+static int __init init(void)
+{
+ BUG_ON(set_h245_addr_hook != NULL);
+ BUG_ON(set_h225_addr_hook != NULL);
+ BUG_ON(set_sig_addr_hook != NULL);
+ BUG_ON(set_ras_addr_hook != NULL);
+ BUG_ON(nat_rtp_rtcp_hook != NULL);
+ BUG_ON(nat_t120_hook != NULL);
+ BUG_ON(nat_h245_hook != NULL);
+ BUG_ON(nat_callforwarding_hook != NULL);
+ BUG_ON(nat_q931_hook != NULL);
+
+ RCU_INIT_POINTER(set_h245_addr_hook, set_h245_addr);
+ RCU_INIT_POINTER(set_h225_addr_hook, set_h225_addr);
+ RCU_INIT_POINTER(set_sig_addr_hook, set_sig_addr);
+ RCU_INIT_POINTER(set_ras_addr_hook, set_ras_addr);
+ RCU_INIT_POINTER(nat_rtp_rtcp_hook, nat_rtp_rtcp);
+ RCU_INIT_POINTER(nat_t120_hook, nat_t120);
+ RCU_INIT_POINTER(nat_h245_hook, nat_h245);
+ RCU_INIT_POINTER(nat_callforwarding_hook, nat_callforwarding);
+ RCU_INIT_POINTER(nat_q931_hook, nat_q931);
+ nf_ct_helper_expectfn_register(&q931_nat);
+ nf_ct_helper_expectfn_register(&callforwarding_nat);
+ return 0;
+}
+
+/****************************************************************************/
+static void __exit fini(void)
+{
+ RCU_INIT_POINTER(set_h245_addr_hook, NULL);
+ RCU_INIT_POINTER(set_h225_addr_hook, NULL);
+ RCU_INIT_POINTER(set_sig_addr_hook, NULL);
+ RCU_INIT_POINTER(set_ras_addr_hook, NULL);
+ RCU_INIT_POINTER(nat_rtp_rtcp_hook, NULL);
+ RCU_INIT_POINTER(nat_t120_hook, NULL);
+ RCU_INIT_POINTER(nat_h245_hook, NULL);
+ RCU_INIT_POINTER(nat_callforwarding_hook, NULL);
+ RCU_INIT_POINTER(nat_q931_hook, NULL);
+ nf_ct_helper_expectfn_unregister(&q931_nat);
+ nf_ct_helper_expectfn_unregister(&callforwarding_nat);
+ synchronize_rcu();
+}
+
+/****************************************************************************/
+module_init(init);
+module_exit(fini);
+
+MODULE_AUTHOR("Jing Min Zhao <zhaojingmin@users.sourceforge.net>");
+MODULE_DESCRIPTION("H.323 NAT helper");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("ip_nat_h323");
diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
new file mode 100644
index 00000000000..d8b2e14efdd
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c
@@ -0,0 +1,281 @@
+/*
+ * (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2011 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/icmp.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <net/secure_seq.h>
+#include <net/checksum.h>
+#include <net/route.h>
+#include <net/ip.h>
+
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_nat_l3proto.h>
+#include <net/netfilter/nf_nat_l4proto.h>
+
+static const struct nf_nat_l3proto nf_nat_l3proto_ipv4;
+
+#ifdef CONFIG_XFRM
+static void nf_nat_ipv4_decode_session(struct sk_buff *skb,
+ const struct nf_conn *ct,
+ enum ip_conntrack_dir dir,
+ unsigned long statusbit,
+ struct flowi *fl)
+{
+ const struct nf_conntrack_tuple *t = &ct->tuplehash[dir].tuple;
+ struct flowi4 *fl4 = &fl->u.ip4;
+
+ if (ct->status & statusbit) {
+ fl4->daddr = t->dst.u3.ip;
+ if (t->dst.protonum == IPPROTO_TCP ||
+ t->dst.protonum == IPPROTO_UDP ||
+ t->dst.protonum == IPPROTO_UDPLITE ||
+ t->dst.protonum == IPPROTO_DCCP ||
+ t->dst.protonum == IPPROTO_SCTP)
+ fl4->fl4_dport = t->dst.u.all;
+ }
+
+ statusbit ^= IPS_NAT_MASK;
+
+ if (ct->status & statusbit) {
+ fl4->saddr = t->src.u3.ip;
+ if (t->dst.protonum == IPPROTO_TCP ||
+ t->dst.protonum == IPPROTO_UDP ||
+ t->dst.protonum == IPPROTO_UDPLITE ||
+ t->dst.protonum == IPPROTO_DCCP ||
+ t->dst.protonum == IPPROTO_SCTP)
+ fl4->fl4_sport = t->src.u.all;
+ }
+}
+#endif /* CONFIG_XFRM */
+
+static bool nf_nat_ipv4_in_range(const struct nf_conntrack_tuple *t,
+ const struct nf_nat_range *range)
+{
+ return ntohl(t->src.u3.ip) >= ntohl(range->min_addr.ip) &&
+ ntohl(t->src.u3.ip) <= ntohl(range->max_addr.ip);
+}
+
+static u32 nf_nat_ipv4_secure_port(const struct nf_conntrack_tuple *t,
+ __be16 dport)
+{
+ return secure_ipv4_port_ephemeral(t->src.u3.ip, t->dst.u3.ip, dport);
+}
+
+static bool nf_nat_ipv4_manip_pkt(struct sk_buff *skb,
+ unsigned int iphdroff,
+ const struct nf_nat_l4proto *l4proto,
+ const struct nf_conntrack_tuple *target,
+ enum nf_nat_manip_type maniptype)
+{
+ struct iphdr *iph;
+ unsigned int hdroff;
+
+ if (!skb_make_writable(skb, iphdroff + sizeof(*iph)))
+ return false;
+
+ iph = (void *)skb->data + iphdroff;
+ hdroff = iphdroff + iph->ihl * 4;
+
+ if (!l4proto->manip_pkt(skb, &nf_nat_l3proto_ipv4, iphdroff, hdroff,
+ target, maniptype))
+ return false;
+ iph = (void *)skb->data + iphdroff;
+
+ if (maniptype == NF_NAT_MANIP_SRC) {
+ csum_replace4(&iph->check, iph->saddr, target->src.u3.ip);
+ iph->saddr = target->src.u3.ip;
+ } else {
+ csum_replace4(&iph->check, iph->daddr, target->dst.u3.ip);
+ iph->daddr = target->dst.u3.ip;
+ }
+ return true;
+}
+
+static void nf_nat_ipv4_csum_update(struct sk_buff *skb,
+ unsigned int iphdroff, __sum16 *check,
+ const struct nf_conntrack_tuple *t,
+ enum nf_nat_manip_type maniptype)
+{
+ struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff);
+ __be32 oldip, newip;
+
+ if (maniptype == NF_NAT_MANIP_SRC) {
+ oldip = iph->saddr;
+ newip = t->src.u3.ip;
+ } else {
+ oldip = iph->daddr;
+ newip = t->dst.u3.ip;
+ }
+ inet_proto_csum_replace4(check, skb, oldip, newip, 1);
+}
+
+static void nf_nat_ipv4_csum_recalc(struct sk_buff *skb,
+ u8 proto, void *data, __sum16 *check,
+ int datalen, int oldlen)
+{
+ const struct iphdr *iph = ip_hdr(skb);
+ struct rtable *rt = skb_rtable(skb);
+
+ if (skb->ip_summed != CHECKSUM_PARTIAL) {
+ if (!(rt->rt_flags & RTCF_LOCAL) &&
+ (!skb->dev || skb->dev->features & NETIF_F_V4_CSUM)) {
+ skb->ip_summed = CHECKSUM_PARTIAL;
+ skb->csum_start = skb_headroom(skb) +
+ skb_network_offset(skb) +
+ ip_hdrlen(skb);
+ skb->csum_offset = (void *)check - data;
+ *check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
+ datalen, proto, 0);
+ } else {
+ *check = 0;
+ *check = csum_tcpudp_magic(iph->saddr, iph->daddr,
+ datalen, proto,
+ csum_partial(data, datalen,
+ 0));
+ if (proto == IPPROTO_UDP && !*check)
+ *check = CSUM_MANGLED_0;
+ }
+ } else
+ inet_proto_csum_replace2(check, skb,
+ htons(oldlen), htons(datalen), 1);
+}
+
+static int nf_nat_ipv4_nlattr_to_range(struct nlattr *tb[],
+ struct nf_nat_range *range)
+{
+ if (tb[CTA_NAT_V4_MINIP]) {
+ range->min_addr.ip = nla_get_be32(tb[CTA_NAT_V4_MINIP]);
+ range->flags |= NF_NAT_RANGE_MAP_IPS;
+ }
+
+ if (tb[CTA_NAT_V4_MAXIP])
+ range->max_addr.ip = nla_get_be32(tb[CTA_NAT_V4_MAXIP]);
+ else
+ range->max_addr.ip = range->min_addr.ip;
+
+ return 0;
+}
+
+static const struct nf_nat_l3proto nf_nat_l3proto_ipv4 = {
+ .l3proto = NFPROTO_IPV4,
+ .in_range = nf_nat_ipv4_in_range,
+ .secure_port = nf_nat_ipv4_secure_port,
+ .manip_pkt = nf_nat_ipv4_manip_pkt,
+ .csum_update = nf_nat_ipv4_csum_update,
+ .csum_recalc = nf_nat_ipv4_csum_recalc,
+ .nlattr_to_range = nf_nat_ipv4_nlattr_to_range,
+#ifdef CONFIG_XFRM
+ .decode_session = nf_nat_ipv4_decode_session,
+#endif
+};
+
+int nf_nat_icmp_reply_translation(struct sk_buff *skb,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned int hooknum)
+{
+ struct {
+ struct icmphdr icmp;
+ struct iphdr ip;
+ } *inside;
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+ enum nf_nat_manip_type manip = HOOK2MANIP(hooknum);
+ unsigned int hdrlen = ip_hdrlen(skb);
+ const struct nf_nat_l4proto *l4proto;
+ struct nf_conntrack_tuple target;
+ unsigned long statusbit;
+
+ NF_CT_ASSERT(ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY);
+
+ if (!skb_make_writable(skb, hdrlen + sizeof(*inside)))
+ return 0;
+ if (nf_ip_checksum(skb, hooknum, hdrlen, 0))
+ return 0;
+
+ inside = (void *)skb->data + hdrlen;
+ if (inside->icmp.type == ICMP_REDIRECT) {
+ if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK)
+ return 0;
+ if (ct->status & IPS_NAT_MASK)
+ return 0;
+ }
+
+ if (manip == NF_NAT_MANIP_SRC)
+ statusbit = IPS_SRC_NAT;
+ else
+ statusbit = IPS_DST_NAT;
+
+ /* Invert if this is reply direction */
+ if (dir == IP_CT_DIR_REPLY)
+ statusbit ^= IPS_NAT_MASK;
+
+ if (!(ct->status & statusbit))
+ return 1;
+
+ l4proto = __nf_nat_l4proto_find(NFPROTO_IPV4, inside->ip.protocol);
+ if (!nf_nat_ipv4_manip_pkt(skb, hdrlen + sizeof(inside->icmp),
+ l4proto, &ct->tuplehash[!dir].tuple, !manip))
+ return 0;
+
+ if (skb->ip_summed != CHECKSUM_PARTIAL) {
+ /* Reloading "inside" here since manip_pkt may reallocate */
+ inside = (void *)skb->data + hdrlen;
+ inside->icmp.checksum = 0;
+ inside->icmp.checksum =
+ csum_fold(skb_checksum(skb, hdrlen,
+ skb->len - hdrlen, 0));
+ }
+
+ /* Change outer to look like the reply to an incoming packet */
+ nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple);
+ l4proto = __nf_nat_l4proto_find(NFPROTO_IPV4, 0);
+ if (!nf_nat_ipv4_manip_pkt(skb, 0, l4proto, &target, manip))
+ return 0;
+
+ return 1;
+}
+EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation);
+
+static int __init nf_nat_l3proto_ipv4_init(void)
+{
+ int err;
+
+ err = nf_nat_l4proto_register(NFPROTO_IPV4, &nf_nat_l4proto_icmp);
+ if (err < 0)
+ goto err1;
+ err = nf_nat_l3proto_register(&nf_nat_l3proto_ipv4);
+ if (err < 0)
+ goto err2;
+ return err;
+
+err2:
+ nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_icmp);
+err1:
+ return err;
+}
+
+static void __exit nf_nat_l3proto_ipv4_exit(void)
+{
+ nf_nat_l3proto_unregister(&nf_nat_l3proto_ipv4);
+ nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_icmp);
+}
+
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("nf-nat-" __stringify(AF_INET));
+
+module_init(nf_nat_l3proto_ipv4_init);
+module_exit(nf_nat_l3proto_ipv4_exit);
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
new file mode 100644
index 00000000000..657d2307f03
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -0,0 +1,311 @@
+/*
+ * nf_nat_pptp.c
+ *
+ * NAT support for PPTP (Point to Point Tunneling Protocol).
+ * PPTP is a a protocol for creating virtual private networks.
+ * It is a specification defined by Microsoft and some vendors
+ * working with Microsoft. PPTP is built on top of a modified
+ * version of the Internet Generic Routing Encapsulation Protocol.
+ * GRE is defined in RFC 1701 and RFC 1702. Documentation of
+ * PPTP can be found in RFC 2637
+ *
+ * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org>
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ *
+ * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
+ *
+ * TODO: - NAT to a unique tuple, not to TCP source port
+ * (needs netfilter tuple reservation)
+ */
+
+#include <linux/module.h>
+#include <linux/tcp.h>
+
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_helper.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+#include <linux/netfilter/nf_conntrack_proto_gre.h>
+#include <linux/netfilter/nf_conntrack_pptp.h>
+
+#define NF_NAT_PPTP_VERSION "3.0"
+
+#define REQ_CID(req, off) (*(__be16 *)((char *)(req) + (off)))
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
+MODULE_DESCRIPTION("Netfilter NAT helper module for PPTP");
+MODULE_ALIAS("ip_nat_pptp");
+
+static void pptp_nat_expected(struct nf_conn *ct,
+ struct nf_conntrack_expect *exp)
+{
+ struct net *net = nf_ct_net(ct);
+ const struct nf_conn *master = ct->master;
+ struct nf_conntrack_expect *other_exp;
+ struct nf_conntrack_tuple t;
+ const struct nf_ct_pptp_master *ct_pptp_info;
+ const struct nf_nat_pptp *nat_pptp_info;
+ struct nf_nat_range range;
+
+ ct_pptp_info = nfct_help_data(master);
+ nat_pptp_info = &nfct_nat(master)->help.nat_pptp_info;
+
+ /* And here goes the grand finale of corrosion... */
+ if (exp->dir == IP_CT_DIR_ORIGINAL) {
+ pr_debug("we are PNS->PAC\n");
+ /* therefore, build tuple for PAC->PNS */
+ t.src.l3num = AF_INET;
+ t.src.u3.ip = master->tuplehash[!exp->dir].tuple.src.u3.ip;
+ t.src.u.gre.key = ct_pptp_info->pac_call_id;
+ t.dst.u3.ip = master->tuplehash[!exp->dir].tuple.dst.u3.ip;
+ t.dst.u.gre.key = ct_pptp_info->pns_call_id;
+ t.dst.protonum = IPPROTO_GRE;
+ } else {
+ pr_debug("we are PAC->PNS\n");
+ /* build tuple for PNS->PAC */
+ t.src.l3num = AF_INET;
+ t.src.u3.ip = master->tuplehash[!exp->dir].tuple.src.u3.ip;
+ t.src.u.gre.key = nat_pptp_info->pns_call_id;
+ t.dst.u3.ip = master->tuplehash[!exp->dir].tuple.dst.u3.ip;
+ t.dst.u.gre.key = nat_pptp_info->pac_call_id;
+ t.dst.protonum = IPPROTO_GRE;
+ }
+
+ pr_debug("trying to unexpect other dir: ");
+ nf_ct_dump_tuple_ip(&t);
+ other_exp = nf_ct_expect_find_get(net, nf_ct_zone(ct), &t);
+ if (other_exp) {
+ nf_ct_unexpect_related(other_exp);
+ nf_ct_expect_put(other_exp);
+ pr_debug("success\n");
+ } else {
+ pr_debug("not found!\n");
+ }
+
+ /* This must be a fresh one. */
+ BUG_ON(ct->status & IPS_NAT_DONE_MASK);
+
+ /* Change src to where master sends to */
+ range.flags = NF_NAT_RANGE_MAP_IPS;
+ range.min_addr = range.max_addr
+ = ct->master->tuplehash[!exp->dir].tuple.dst.u3;
+ if (exp->dir == IP_CT_DIR_ORIGINAL) {
+ range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
+ range.min_proto = range.max_proto = exp->saved_proto;
+ }
+ nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC);
+
+ /* For DST manip, map port here to where it's expected. */
+ range.flags = NF_NAT_RANGE_MAP_IPS;
+ range.min_addr = range.max_addr
+ = ct->master->tuplehash[!exp->dir].tuple.src.u3;
+ if (exp->dir == IP_CT_DIR_REPLY) {
+ range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
+ range.min_proto = range.max_proto = exp->saved_proto;
+ }
+ nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST);
+}
+
+/* outbound packets == from PNS to PAC */
+static int
+pptp_outbound_pkt(struct sk_buff *skb,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned int protoff,
+ struct PptpControlHeader *ctlh,
+ union pptp_ctrl_union *pptpReq)
+
+{
+ struct nf_ct_pptp_master *ct_pptp_info;
+ struct nf_nat_pptp *nat_pptp_info;
+ u_int16_t msg;
+ __be16 new_callid;
+ unsigned int cid_off;
+
+ ct_pptp_info = nfct_help_data(ct);
+ nat_pptp_info = &nfct_nat(ct)->help.nat_pptp_info;
+
+ new_callid = ct_pptp_info->pns_call_id;
+
+ switch (msg = ntohs(ctlh->messageType)) {
+ case PPTP_OUT_CALL_REQUEST:
+ cid_off = offsetof(union pptp_ctrl_union, ocreq.callID);
+ /* FIXME: ideally we would want to reserve a call ID
+ * here. current netfilter NAT core is not able to do
+ * this :( For now we use TCP source port. This breaks
+ * multiple calls within one control session */
+
+ /* save original call ID in nat_info */
+ nat_pptp_info->pns_call_id = ct_pptp_info->pns_call_id;
+
+ /* don't use tcph->source since we are at a DSTmanip
+ * hook (e.g. PREROUTING) and pkt is not mangled yet */
+ new_callid = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port;
+
+ /* save new call ID in ct info */
+ ct_pptp_info->pns_call_id = new_callid;
+ break;
+ case PPTP_IN_CALL_REPLY:
+ cid_off = offsetof(union pptp_ctrl_union, icack.callID);
+ break;
+ case PPTP_CALL_CLEAR_REQUEST:
+ cid_off = offsetof(union pptp_ctrl_union, clrreq.callID);
+ break;
+ default:
+ pr_debug("unknown outbound packet 0x%04x:%s\n", msg,
+ msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] :
+ pptp_msg_name[0]);
+ /* fall through */
+ case PPTP_SET_LINK_INFO:
+ /* only need to NAT in case PAC is behind NAT box */
+ case PPTP_START_SESSION_REQUEST:
+ case PPTP_START_SESSION_REPLY:
+ case PPTP_STOP_SESSION_REQUEST:
+ case PPTP_STOP_SESSION_REPLY:
+ case PPTP_ECHO_REQUEST:
+ case PPTP_ECHO_REPLY:
+ /* no need to alter packet */
+ return NF_ACCEPT;
+ }
+
+ /* only OUT_CALL_REQUEST, IN_CALL_REPLY, CALL_CLEAR_REQUEST pass
+ * down to here */
+ pr_debug("altering call id from 0x%04x to 0x%04x\n",
+ ntohs(REQ_CID(pptpReq, cid_off)), ntohs(new_callid));
+
+ /* mangle packet */
+ if (nf_nat_mangle_tcp_packet(skb, ct, ctinfo, protoff,
+ cid_off + sizeof(struct pptp_pkt_hdr) +
+ sizeof(struct PptpControlHeader),
+ sizeof(new_callid), (char *)&new_callid,
+ sizeof(new_callid)) == 0)
+ return NF_DROP;
+ return NF_ACCEPT;
+}
+
+static void
+pptp_exp_gre(struct nf_conntrack_expect *expect_orig,
+ struct nf_conntrack_expect *expect_reply)
+{
+ const struct nf_conn *ct = expect_orig->master;
+ struct nf_ct_pptp_master *ct_pptp_info;
+ struct nf_nat_pptp *nat_pptp_info;
+
+ ct_pptp_info = nfct_help_data(ct);
+ nat_pptp_info = &nfct_nat(ct)->help.nat_pptp_info;
+
+ /* save original PAC call ID in nat_info */
+ nat_pptp_info->pac_call_id = ct_pptp_info->pac_call_id;
+
+ /* alter expectation for PNS->PAC direction */
+ expect_orig->saved_proto.gre.key = ct_pptp_info->pns_call_id;
+ expect_orig->tuple.src.u.gre.key = nat_pptp_info->pns_call_id;
+ expect_orig->tuple.dst.u.gre.key = ct_pptp_info->pac_call_id;
+ expect_orig->dir = IP_CT_DIR_ORIGINAL;
+
+ /* alter expectation for PAC->PNS direction */
+ expect_reply->saved_proto.gre.key = nat_pptp_info->pns_call_id;
+ expect_reply->tuple.src.u.gre.key = nat_pptp_info->pac_call_id;
+ expect_reply->tuple.dst.u.gre.key = ct_pptp_info->pns_call_id;
+ expect_reply->dir = IP_CT_DIR_REPLY;
+}
+
+/* inbound packets == from PAC to PNS */
+static int
+pptp_inbound_pkt(struct sk_buff *skb,
+ struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ unsigned int protoff,
+ struct PptpControlHeader *ctlh,
+ union pptp_ctrl_union *pptpReq)
+{
+ const struct nf_nat_pptp *nat_pptp_info;
+ u_int16_t msg;
+ __be16 new_pcid;
+ unsigned int pcid_off;
+
+ nat_pptp_info = &nfct_nat(ct)->help.nat_pptp_info;
+ new_pcid = nat_pptp_info->pns_call_id;
+
+ switch (msg = ntohs(ctlh->messageType)) {
+ case PPTP_OUT_CALL_REPLY:
+ pcid_off = offsetof(union pptp_ctrl_union, ocack.peersCallID);
+ break;
+ case PPTP_IN_CALL_CONNECT:
+ pcid_off = offsetof(union pptp_ctrl_union, iccon.peersCallID);
+ break;
+ case PPTP_IN_CALL_REQUEST:
+ /* only need to nat in case PAC is behind NAT box */
+ return NF_ACCEPT;
+ case PPTP_WAN_ERROR_NOTIFY:
+ pcid_off = offsetof(union pptp_ctrl_union, wanerr.peersCallID);
+ break;
+ case PPTP_CALL_DISCONNECT_NOTIFY:
+ pcid_off = offsetof(union pptp_ctrl_union, disc.callID);
+ break;
+ case PPTP_SET_LINK_INFO:
+ pcid_off = offsetof(union pptp_ctrl_union, setlink.peersCallID);
+ break;
+ default:
+ pr_debug("unknown inbound packet %s\n",
+ msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] :
+ pptp_msg_name[0]);
+ /* fall through */
+ case PPTP_START_SESSION_REQUEST:
+ case PPTP_START_SESSION_REPLY:
+ case PPTP_STOP_SESSION_REQUEST:
+ case PPTP_STOP_SESSION_REPLY:
+ case PPTP_ECHO_REQUEST:
+ case PPTP_ECHO_REPLY:
+ /* no need to alter packet */
+ return NF_ACCEPT;
+ }
+
+ /* only OUT_CALL_REPLY, IN_CALL_CONNECT, IN_CALL_REQUEST,
+ * WAN_ERROR_NOTIFY, CALL_DISCONNECT_NOTIFY pass down here */
+
+ /* mangle packet */
+ pr_debug("altering peer call id from 0x%04x to 0x%04x\n",
+ ntohs(REQ_CID(pptpReq, pcid_off)), ntohs(new_pcid));
+
+ if (nf_nat_mangle_tcp_packet(skb, ct, ctinfo, protoff,
+ pcid_off + sizeof(struct pptp_pkt_hdr) +
+ sizeof(struct PptpControlHeader),
+ sizeof(new_pcid), (char *)&new_pcid,
+ sizeof(new_pcid)) == 0)
+ return NF_DROP;
+ return NF_ACCEPT;
+}
+
+static int __init nf_nat_helper_pptp_init(void)
+{
+ nf_nat_need_gre();
+
+ BUG_ON(nf_nat_pptp_hook_outbound != NULL);
+ RCU_INIT_POINTER(nf_nat_pptp_hook_outbound, pptp_outbound_pkt);
+
+ BUG_ON(nf_nat_pptp_hook_inbound != NULL);
+ RCU_INIT_POINTER(nf_nat_pptp_hook_inbound, pptp_inbound_pkt);
+
+ BUG_ON(nf_nat_pptp_hook_exp_gre != NULL);
+ RCU_INIT_POINTER(nf_nat_pptp_hook_exp_gre, pptp_exp_gre);
+
+ BUG_ON(nf_nat_pptp_hook_expectfn != NULL);
+ RCU_INIT_POINTER(nf_nat_pptp_hook_expectfn, pptp_nat_expected);
+ return 0;
+}
+
+static void __exit nf_nat_helper_pptp_fini(void)
+{
+ RCU_INIT_POINTER(nf_nat_pptp_hook_expectfn, NULL);
+ RCU_INIT_POINTER(nf_nat_pptp_hook_exp_gre, NULL);
+ RCU_INIT_POINTER(nf_nat_pptp_hook_inbound, NULL);
+ RCU_INIT_POINTER(nf_nat_pptp_hook_outbound, NULL);
+ synchronize_rcu();
+}
+
+module_init(nf_nat_helper_pptp_init);
+module_exit(nf_nat_helper_pptp_fini);
diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c
new file mode 100644
index 00000000000..690d890111b
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_proto_gre.c
@@ -0,0 +1,149 @@
+/*
+ * nf_nat_proto_gre.c
+ *
+ * NAT protocol helper module for GRE.
+ *
+ * GRE is a generic encapsulation protocol, which is generally not very
+ * suited for NAT, as it has no protocol-specific part as port numbers.
+ *
+ * It has an optional key field, which may help us distinguishing two
+ * connections between the same two hosts.
+ *
+ * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784
+ *
+ * PPTP is built on top of a modified version of GRE, and has a mandatory
+ * field called "CallID", which serves us for the same purpose as the key
+ * field in plain GRE.
+ *
+ * Documentation about PPTP can be found in RFC 2637
+ *
+ * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org>
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ *
+ * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_l4proto.h>
+#include <linux/netfilter/nf_conntrack_proto_gre.h>
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>");
+MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE");
+
+/* generate unique tuple ... */
+static void
+gre_unique_tuple(const struct nf_nat_l3proto *l3proto,
+ struct nf_conntrack_tuple *tuple,
+ const struct nf_nat_range *range,
+ enum nf_nat_manip_type maniptype,
+ const struct nf_conn *ct)
+{
+ static u_int16_t key;
+ __be16 *keyptr;
+ unsigned int min, i, range_size;
+
+ /* If there is no master conntrack we are not PPTP,
+ do not change tuples */
+ if (!ct->master)
+ return;
+
+ if (maniptype == NF_NAT_MANIP_SRC)
+ keyptr = &tuple->src.u.gre.key;
+ else
+ keyptr = &tuple->dst.u.gre.key;
+
+ if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) {
+ pr_debug("%p: NATing GRE PPTP\n", ct);
+ min = 1;
+ range_size = 0xffff;
+ } else {
+ min = ntohs(range->min_proto.gre.key);
+ range_size = ntohs(range->max_proto.gre.key) - min + 1;
+ }
+
+ pr_debug("min = %u, range_size = %u\n", min, range_size);
+
+ for (i = 0; ; ++key) {
+ *keyptr = htons(min + key % range_size);
+ if (++i == range_size || !nf_nat_used_tuple(tuple, ct))
+ return;
+ }
+
+ pr_debug("%p: no NAT mapping\n", ct);
+ return;
+}
+
+/* manipulate a GRE packet according to maniptype */
+static bool
+gre_manip_pkt(struct sk_buff *skb,
+ const struct nf_nat_l3proto *l3proto,
+ unsigned int iphdroff, unsigned int hdroff,
+ const struct nf_conntrack_tuple *tuple,
+ enum nf_nat_manip_type maniptype)
+{
+ const struct gre_hdr *greh;
+ struct gre_hdr_pptp *pgreh;
+
+ /* pgreh includes two optional 32bit fields which are not required
+ * to be there. That's where the magic '8' comes from */
+ if (!skb_make_writable(skb, hdroff + sizeof(*pgreh) - 8))
+ return false;
+
+ greh = (void *)skb->data + hdroff;
+ pgreh = (struct gre_hdr_pptp *)greh;
+
+ /* we only have destination manip of a packet, since 'source key'
+ * is not present in the packet itself */
+ if (maniptype != NF_NAT_MANIP_DST)
+ return true;
+ switch (greh->version) {
+ case GRE_VERSION_1701:
+ /* We do not currently NAT any GREv0 packets.
+ * Try to behave like "nf_nat_proto_unknown" */
+ break;
+ case GRE_VERSION_PPTP:
+ pr_debug("call_id -> 0x%04x\n", ntohs(tuple->dst.u.gre.key));
+ pgreh->call_id = tuple->dst.u.gre.key;
+ break;
+ default:
+ pr_debug("can't nat unknown GRE version\n");
+ return false;
+ }
+ return true;
+}
+
+static const struct nf_nat_l4proto gre = {
+ .l4proto = IPPROTO_GRE,
+ .manip_pkt = gre_manip_pkt,
+ .in_range = nf_nat_l4proto_in_range,
+ .unique_tuple = gre_unique_tuple,
+#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+ .nlattr_to_range = nf_nat_l4proto_nlattr_to_range,
+#endif
+};
+
+static int __init nf_nat_proto_gre_init(void)
+{
+ return nf_nat_l4proto_register(NFPROTO_IPV4, &gre);
+}
+
+static void __exit nf_nat_proto_gre_fini(void)
+{
+ nf_nat_l4proto_unregister(NFPROTO_IPV4, &gre);
+}
+
+module_init(nf_nat_proto_gre_init);
+module_exit(nf_nat_proto_gre_fini);
+
+void nf_nat_need_gre(void)
+{
+ return;
+}
+EXPORT_SYMBOL_GPL(nf_nat_need_gre);
diff --git a/net/ipv4/netfilter/nf_nat_proto_icmp.c b/net/ipv4/netfilter/nf_nat_proto_icmp.c
new file mode 100644
index 00000000000..eb303471bcf
--- /dev/null
+++ b/net/ipv4/netfilter/nf_nat_proto_icmp.c
@@ -0,0 +1,83 @@
+/* (C) 1999-2001 Paul `Rusty' Russell
+ * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/export.h>
+#include <linux/ip.h>
+#include <linux/icmp.h>
+
+#include <linux/netfilter.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_nat_l4proto.h>
+
+static bool
+icmp_in_range(const struct nf_conntrack_tuple *tuple,
+ enum nf_nat_manip_type maniptype,
+ const union nf_conntrack_man_proto *min,
+ const union nf_conntrack_man_proto *max)
+{
+ return ntohs(tuple->src.u.icmp.id) >= ntohs(min->icmp.id) &&
+ ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id);
+}
+
+static void
+icmp_unique_tuple(const struct nf_nat_l3proto *l3proto,
+ struct nf_conntrack_tuple *tuple,
+ const struct nf_nat_range *range,
+ enum nf_nat_manip_type maniptype,
+ const struct nf_conn *ct)
+{
+ static u_int16_t id;
+ unsigned int range_size;
+ unsigned int i;
+
+ range_size = ntohs(range->max_proto.icmp.id) -
+ ntohs(range->min_proto.icmp.id) + 1;
+ /* If no range specified... */
+ if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED))
+ range_size = 0xFFFF;
+
+ for (i = 0; ; ++id) {
+ tuple->src.u.icmp.id = htons(ntohs(range->min_proto.icmp.id) +
+ (id % range_size));
+ if (++i == range_size || !nf_nat_used_tuple(tuple, ct))
+ return;
+ }
+ return;
+}
+
+static bool
+icmp_manip_pkt(struct sk_buff *skb,
+ const struct nf_nat_l3proto *l3proto,
+ unsigned int iphdroff, unsigned int hdroff,
+ const struct nf_conntrack_tuple *tuple,
+ enum nf_nat_manip_type maniptype)
+{
+ struct icmphdr *hdr;
+
+ if (!skb_make_writable(skb, hdroff + sizeof(*hdr)))
+ return false;
+
+ hdr = (struct icmphdr *)(skb->data + hdroff);
+ inet_proto_csum_replace2(&hdr->checksum, skb,
+ hdr->un.echo.id, tuple->src.u.icmp.id, 0);
+ hdr->un.echo.id = tuple->src.u.icmp.id;
+ return true;
+}
+
+const struct nf_nat_l4proto nf_nat_l4proto_icmp = {
+ .l4proto = IPPROTO_ICMP,
+ .manip_pkt = icmp_manip_pkt,
+ .in_range = icmp_in_range,
+ .unique_tuple = icmp_unique_tuple,
+#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE)
+ .nlattr_to_range = nf_nat_l4proto_nlattr_to_range,
+#endif
+};
diff --git a/net/ipv4/netfilter/ip_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index 4f95d477805..7c676671329 100644
--- a/net/ipv4/netfilter/ip_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -1,13 +1,13 @@
/*
- * ip_nat_snmp_basic.c
+ * nf_nat_snmp_basic.c
*
* Basic SNMP Application Layer Gateway
*
- * This IP NAT module is intended for use with SNMP network
- * discovery and monitoring applications where target networks use
+ * This IP NAT module is intended for use with SNMP network
+ * discovery and monitoring applications where target networks use
* conflicting private address realms.
*
- * Static NAT is used to remap the networks from the view of the network
+ * Static NAT is used to remap the networks from the view of the network
* management system at the IP layer, and this module remaps some application
* layer addresses to match.
*
@@ -20,7 +20,7 @@
* More information on ALG and associated issues can be found in
* RFC 2962
*
- * The ASB.1/BER parsing code is derived from the gxsnmp package by Gregory
+ * The ASB.1/BER parsing code is derived from the gxsnmp package by Gregory
* McLean & Jochen Friedrich, stripped down for use in the kernel.
*
* Copyright (c) 2000 RP Internet (www.rpi.net.au).
@@ -34,44 +34,43 @@
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*
* Author: James Morris <jmorris@intercode.com.au>
*
- * Updates:
- * 2000-08-06: Convert to new helper API (Harald Welte).
- *
+ * Copyright (c) 2006-2010 Patrick McHardy <kaber@trash.net>
*/
-#include <linux/config.h>
-#include <linux/in.h>
#include <linux/module.h>
+#include <linux/moduleparam.h>
#include <linux/types.h>
#include <linux/kernel.h>
-#include <linux/moduleparam.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/netfilter_ipv4/ip_nat.h>
-#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
-#include <linux/netfilter_ipv4/ip_nat_helper.h>
+#include <linux/slab.h>
+#include <linux/in.h>
#include <linux/ip.h>
#include <linux/udp.h>
#include <net/checksum.h>
#include <net/udp.h>
-#include <asm/uaccess.h>
+
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_conntrack_expect.h>
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_nat_helper.h>
+#include <linux/netfilter/nf_conntrack_snmp.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
MODULE_DESCRIPTION("Basic SNMP Application Layer Gateway");
+MODULE_ALIAS("ip_nat_snmp_basic");
#define SNMP_PORT 161
#define SNMP_TRAP_PORT 162
-#define NOCT1(n) (u_int8_t )((n) & 0xff)
+#define NOCT1(n) (*(u8 *)(n))
static int debug;
static DEFINE_SPINLOCK(snmp_lock);
-/*
- * Application layer address mapping mimics the NAT mapping, but
+/*
+ * Application layer address mapping mimics the NAT mapping, but
* only for the first octet in this case (a more flexible system
* can be implemented if needed).
*/
@@ -81,7 +80,7 @@ struct oct1_map
u_int8_t to;
};
-
+
/*****************************************************************************
*
* Basic ASN.1 decoding routines (gxsnmp author Dirk Wisse)
@@ -130,7 +129,7 @@ struct oct1_map
#define ASN1_ERR_DEC_LENGTH_MISMATCH 4
#define ASN1_ERR_DEC_BADVALUE 5
-/*
+/*
* ASN.1 context.
*/
struct asn1_ctx
@@ -149,10 +148,10 @@ struct asn1_octstr
unsigned char *data;
unsigned int len;
};
-
+
static void asn1_open(struct asn1_ctx *ctx,
- unsigned char *buf,
- unsigned int len)
+ unsigned char *buf,
+ unsigned int len)
{
ctx->begin = buf;
ctx->end = buf + len;
@@ -173,9 +172,9 @@ static unsigned char asn1_octet_decode(struct asn1_ctx *ctx, unsigned char *ch)
static unsigned char asn1_tag_decode(struct asn1_ctx *ctx, unsigned int *tag)
{
unsigned char ch;
-
+
*tag = 0;
-
+
do
{
if (!asn1_octet_decode(ctx, &ch))
@@ -186,20 +185,20 @@ static unsigned char asn1_tag_decode(struct asn1_ctx *ctx, unsigned int *tag)
return 1;
}
-static unsigned char asn1_id_decode(struct asn1_ctx *ctx,
- unsigned int *cls,
- unsigned int *con,
- unsigned int *tag)
+static unsigned char asn1_id_decode(struct asn1_ctx *ctx,
+ unsigned int *cls,
+ unsigned int *con,
+ unsigned int *tag)
{
unsigned char ch;
-
+
if (!asn1_octet_decode(ctx, &ch))
return 0;
-
+
*cls = (ch & 0xC0) >> 6;
*con = (ch & 0x20) >> 5;
*tag = (ch & 0x1F);
-
+
if (*tag == 0x1F) {
if (!asn1_tag_decode(ctx, tag))
return 0;
@@ -208,25 +207,25 @@ static unsigned char asn1_id_decode(struct asn1_ctx *ctx,
}
static unsigned char asn1_length_decode(struct asn1_ctx *ctx,
- unsigned int *def,
- unsigned int *len)
+ unsigned int *def,
+ unsigned int *len)
{
unsigned char ch, cnt;
-
+
if (!asn1_octet_decode(ctx, &ch))
return 0;
-
+
if (ch == 0x80)
*def = 0;
else {
*def = 1;
-
+
if (ch < 0x80)
*len = ch;
else {
- cnt = (unsigned char) (ch & 0x7F);
+ cnt = ch & 0x7F;
*len = 0;
-
+
while (cnt > 0) {
if (!asn1_octet_decode(ctx, &ch))
return 0;
@@ -236,23 +235,33 @@ static unsigned char asn1_length_decode(struct asn1_ctx *ctx,
}
}
}
+
+ /* don't trust len bigger than ctx buffer */
+ if (*len > ctx->end - ctx->pointer)
+ return 0;
+
return 1;
}
static unsigned char asn1_header_decode(struct asn1_ctx *ctx,
- unsigned char **eoc,
- unsigned int *cls,
- unsigned int *con,
- unsigned int *tag)
+ unsigned char **eoc,
+ unsigned int *cls,
+ unsigned int *con,
+ unsigned int *tag)
{
unsigned int def, len;
-
+
if (!asn1_id_decode(ctx, cls, con, tag))
return 0;
-
+
+ def = len = 0;
if (!asn1_length_decode(ctx, &def, &len))
return 0;
-
+
+ /* primitive shall be definite, indefinite shall be constructed */
+ if (*con == ASN1_PRI && !def)
+ return 0;
+
if (def)
*eoc = ctx->pointer + len;
else
@@ -263,19 +272,19 @@ static unsigned char asn1_header_decode(struct asn1_ctx *ctx,
static unsigned char asn1_eoc_decode(struct asn1_ctx *ctx, unsigned char *eoc)
{
unsigned char ch;
-
- if (eoc == 0) {
+
+ if (eoc == NULL) {
if (!asn1_octet_decode(ctx, &ch))
return 0;
-
+
if (ch != 0x00) {
ctx->error = ASN1_ERR_DEC_EOC_MISMATCH;
return 0;
}
-
+
if (!asn1_octet_decode(ctx, &ch))
return 0;
-
+
if (ch != 0x00) {
ctx->error = ASN1_ERR_DEC_EOC_MISMATCH;
return 0;
@@ -297,27 +306,27 @@ static unsigned char asn1_null_decode(struct asn1_ctx *ctx, unsigned char *eoc)
}
static unsigned char asn1_long_decode(struct asn1_ctx *ctx,
- unsigned char *eoc,
- long *integer)
+ unsigned char *eoc,
+ long *integer)
{
unsigned char ch;
unsigned int len;
-
+
if (!asn1_octet_decode(ctx, &ch))
return 0;
-
+
*integer = (signed char) ch;
len = 1;
-
+
while (ctx->pointer < eoc) {
if (++len > sizeof (long)) {
ctx->error = ASN1_ERR_DEC_BADVALUE;
return 0;
}
-
+
if (!asn1_octet_decode(ctx, &ch))
return 0;
-
+
*integer <<= 8;
*integer |= ch;
}
@@ -325,28 +334,28 @@ static unsigned char asn1_long_decode(struct asn1_ctx *ctx,
}
static unsigned char asn1_uint_decode(struct asn1_ctx *ctx,
- unsigned char *eoc,
- unsigned int *integer)
+ unsigned char *eoc,
+ unsigned int *integer)
{
unsigned char ch;
unsigned int len;
-
+
if (!asn1_octet_decode(ctx, &ch))
return 0;
-
+
*integer = ch;
if (ch == 0) len = 0;
else len = 1;
-
+
while (ctx->pointer < eoc) {
if (++len > sizeof (unsigned int)) {
ctx->error = ASN1_ERR_DEC_BADVALUE;
return 0;
}
-
+
if (!asn1_octet_decode(ctx, &ch))
return 0;
-
+
*integer <<= 8;
*integer |= ch;
}
@@ -354,28 +363,28 @@ static unsigned char asn1_uint_decode(struct asn1_ctx *ctx,
}
static unsigned char asn1_ulong_decode(struct asn1_ctx *ctx,
- unsigned char *eoc,
- unsigned long *integer)
+ unsigned char *eoc,
+ unsigned long *integer)
{
unsigned char ch;
unsigned int len;
-
+
if (!asn1_octet_decode(ctx, &ch))
return 0;
-
+
*integer = ch;
if (ch == 0) len = 0;
else len = 1;
-
+
while (ctx->pointer < eoc) {
if (++len > sizeof (unsigned long)) {
ctx->error = ASN1_ERR_DEC_BADVALUE;
return 0;
}
-
+
if (!asn1_octet_decode(ctx, &ch))
return 0;
-
+
*integer <<= 8;
*integer |= ch;
}
@@ -383,24 +392,21 @@ static unsigned char asn1_ulong_decode(struct asn1_ctx *ctx,
}
static unsigned char asn1_octets_decode(struct asn1_ctx *ctx,
- unsigned char *eoc,
- unsigned char **octets,
- unsigned int *len)
+ unsigned char *eoc,
+ unsigned char **octets,
+ unsigned int *len)
{
unsigned char *ptr;
-
+
*len = 0;
-
+
*octets = kmalloc(eoc - ctx->pointer, GFP_ATOMIC);
- if (*octets == NULL) {
- if (net_ratelimit())
- printk("OOM in bsalg (%d)\n", __LINE__);
+ if (*octets == NULL)
return 0;
- }
-
+
ptr = *octets;
while (ctx->pointer < eoc) {
- if (!asn1_octet_decode(ctx, (unsigned char *)ptr++)) {
+ if (!asn1_octet_decode(ctx, ptr++)) {
kfree(*octets);
*octets = NULL;
return 0;
@@ -411,16 +417,16 @@ static unsigned char asn1_octets_decode(struct asn1_ctx *ctx,
}
static unsigned char asn1_subid_decode(struct asn1_ctx *ctx,
- unsigned long *subid)
+ unsigned long *subid)
{
unsigned char ch;
-
+
*subid = 0;
-
+
do {
if (!asn1_octet_decode(ctx, &ch))
return 0;
-
+
*subid <<= 7;
*subid |= ch & 0x7F;
} while ((ch & 0x80) == 0x80);
@@ -428,44 +434,46 @@ static unsigned char asn1_subid_decode(struct asn1_ctx *ctx,
}
static unsigned char asn1_oid_decode(struct asn1_ctx *ctx,
- unsigned char *eoc,
- unsigned long **oid,
- unsigned int *len)
+ unsigned char *eoc,
+ unsigned long **oid,
+ unsigned int *len)
{
unsigned long subid;
- unsigned int size;
unsigned long *optr;
-
+ size_t size;
+
size = eoc - ctx->pointer + 1;
+
+ /* first subid actually encodes first two subids */
+ if (size < 2 || size > ULONG_MAX/sizeof(unsigned long))
+ return 0;
+
*oid = kmalloc(size * sizeof(unsigned long), GFP_ATOMIC);
- if (*oid == NULL) {
- if (net_ratelimit())
- printk("OOM in bsalg (%d)\n", __LINE__);
+ if (*oid == NULL)
return 0;
- }
-
+
optr = *oid;
-
+
if (!asn1_subid_decode(ctx, &subid)) {
kfree(*oid);
*oid = NULL;
return 0;
}
-
+
if (subid < 40) {
- optr [0] = 0;
- optr [1] = subid;
+ optr[0] = 0;
+ optr[1] = subid;
} else if (subid < 80) {
- optr [0] = 1;
- optr [1] = subid - 40;
+ optr[0] = 1;
+ optr[1] = subid - 40;
} else {
- optr [0] = 2;
- optr [1] = subid - 80;
+ optr[0] = 2;
+ optr[1] = subid - 80;
}
-
+
*len = 2;
optr += 2;
-
+
while (ctx->pointer < eoc) {
if (++(*len) > size) {
ctx->error = ASN1_ERR_DEC_BADVALUE;
@@ -473,7 +481,7 @@ static unsigned char asn1_oid_decode(struct asn1_ctx *ctx,
*oid = NULL;
return 0;
}
-
+
if (!asn1_subid_decode(ctx, optr++)) {
kfree(*oid);
*oid = NULL;
@@ -611,9 +619,9 @@ struct snmp_v1_trap
#define SERR_EOM 2
static inline void mangle_address(unsigned char *begin,
- unsigned char *addr,
- const struct oct1_map *map,
- u_int16_t *check);
+ unsigned char *addr,
+ const struct oct1_map *map,
+ __sum16 *check);
struct snmp_cnv
{
unsigned int class;
@@ -621,8 +629,7 @@ struct snmp_cnv
int syntax;
};
-static struct snmp_cnv snmp_conv [] =
-{
+static const struct snmp_cnv snmp_conv[] = {
{ASN1_UNI, ASN1_NUL, SNMP_NULL},
{ASN1_UNI, ASN1_INT, SNMP_INTEGER},
{ASN1_UNI, ASN1_OTS, SNMP_OCTETSTR},
@@ -633,7 +640,7 @@ static struct snmp_cnv snmp_conv [] =
{ASN1_APL, SNMP_GGE, SNMP_GAUGE}, /* Gauge32 == Unsigned32 */
{ASN1_APL, SNMP_TIT, SNMP_TIMETICKS},
{ASN1_APL, SNMP_OPQ, SNMP_OPAQUE},
-
+
/* SNMPv2 data types and errors */
{ASN1_UNI, ASN1_BTS, SNMP_BITSTR},
{ASN1_APL, SNMP_C64, SNMP_COUNTER64},
@@ -644,13 +651,13 @@ static struct snmp_cnv snmp_conv [] =
};
static unsigned char snmp_tag_cls2syntax(unsigned int tag,
- unsigned int cls,
- unsigned short *syntax)
+ unsigned int cls,
+ unsigned short *syntax)
{
- struct snmp_cnv *cnv;
-
+ const struct snmp_cnv *cnv;
+
cnv = snmp_conv;
-
+
while (cnv->syntax != -1) {
if (cnv->tag == tag && cnv->class == cls) {
*syntax = cnv->syntax;
@@ -662,165 +669,155 @@ static unsigned char snmp_tag_cls2syntax(unsigned int tag,
}
static unsigned char snmp_object_decode(struct asn1_ctx *ctx,
- struct snmp_object **obj)
+ struct snmp_object **obj)
{
unsigned int cls, con, tag, len, idlen;
unsigned short type;
unsigned char *eoc, *end, *p;
unsigned long *lp, *id;
unsigned long ul;
- long l;
-
+ long l;
+
*obj = NULL;
id = NULL;
-
+
if (!asn1_header_decode(ctx, &eoc, &cls, &con, &tag))
return 0;
-
+
if (cls != ASN1_UNI || con != ASN1_CON || tag != ASN1_SEQ)
return 0;
-
+
if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
return 0;
-
+
if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_OJI)
return 0;
-
+
if (!asn1_oid_decode(ctx, end, &id, &idlen))
return 0;
-
+
if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) {
kfree(id);
return 0;
}
-
+
if (con != ASN1_PRI) {
kfree(id);
return 0;
}
-
+
+ type = 0;
if (!snmp_tag_cls2syntax(tag, cls, &type)) {
kfree(id);
return 0;
}
-
+
+ l = 0;
switch (type) {
- case SNMP_INTEGER:
- len = sizeof(long);
- if (!asn1_long_decode(ctx, end, &l)) {
- kfree(id);
- return 0;
- }
- *obj = kmalloc(sizeof(struct snmp_object) + len,
- GFP_ATOMIC);
- if (*obj == NULL) {
- kfree(id);
- if (net_ratelimit())
- printk("OOM in bsalg (%d)\n", __LINE__);
- return 0;
- }
- (*obj)->syntax.l[0] = l;
- break;
- case SNMP_OCTETSTR:
- case SNMP_OPAQUE:
- if (!asn1_octets_decode(ctx, end, &p, &len)) {
- kfree(id);
- return 0;
- }
- *obj = kmalloc(sizeof(struct snmp_object) + len,
- GFP_ATOMIC);
- if (*obj == NULL) {
- kfree(id);
- if (net_ratelimit())
- printk("OOM in bsalg (%d)\n", __LINE__);
- return 0;
- }
- memcpy((*obj)->syntax.c, p, len);
+ case SNMP_INTEGER:
+ len = sizeof(long);
+ if (!asn1_long_decode(ctx, end, &l)) {
+ kfree(id);
+ return 0;
+ }
+ *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
+ if (*obj == NULL) {
+ kfree(id);
+ return 0;
+ }
+ (*obj)->syntax.l[0] = l;
+ break;
+ case SNMP_OCTETSTR:
+ case SNMP_OPAQUE:
+ if (!asn1_octets_decode(ctx, end, &p, &len)) {
+ kfree(id);
+ return 0;
+ }
+ *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
+ if (*obj == NULL) {
kfree(p);
- break;
- case SNMP_NULL:
- case SNMP_NOSUCHOBJECT:
- case SNMP_NOSUCHINSTANCE:
- case SNMP_ENDOFMIBVIEW:
- len = 0;
- *obj = kmalloc(sizeof(struct snmp_object), GFP_ATOMIC);
- if (*obj == NULL) {
- kfree(id);
- if (net_ratelimit())
- printk("OOM in bsalg (%d)\n", __LINE__);
- return 0;
- }
- if (!asn1_null_decode(ctx, end)) {
- kfree(id);
- kfree(*obj);
- *obj = NULL;
- return 0;
- }
- break;
- case SNMP_OBJECTID:
- if (!asn1_oid_decode(ctx, end, (unsigned long **)&lp, &len)) {
- kfree(id);
- return 0;
- }
- len *= sizeof(unsigned long);
- *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
- if (*obj == NULL) {
- kfree(id);
- if (net_ratelimit())
- printk("OOM in bsalg (%d)\n", __LINE__);
- return 0;
- }
- memcpy((*obj)->syntax.ul, lp, len);
+ kfree(id);
+ return 0;
+ }
+ memcpy((*obj)->syntax.c, p, len);
+ kfree(p);
+ break;
+ case SNMP_NULL:
+ case SNMP_NOSUCHOBJECT:
+ case SNMP_NOSUCHINSTANCE:
+ case SNMP_ENDOFMIBVIEW:
+ len = 0;
+ *obj = kmalloc(sizeof(struct snmp_object), GFP_ATOMIC);
+ if (*obj == NULL) {
+ kfree(id);
+ return 0;
+ }
+ if (!asn1_null_decode(ctx, end)) {
+ kfree(id);
+ kfree(*obj);
+ *obj = NULL;
+ return 0;
+ }
+ break;
+ case SNMP_OBJECTID:
+ if (!asn1_oid_decode(ctx, end, &lp, &len)) {
+ kfree(id);
+ return 0;
+ }
+ len *= sizeof(unsigned long);
+ *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
+ if (*obj == NULL) {
kfree(lp);
- break;
- case SNMP_IPADDR:
- if (!asn1_octets_decode(ctx, end, &p, &len)) {
- kfree(id);
- return 0;
- }
- if (len != 4) {
- kfree(p);
- kfree(id);
- return 0;
- }
- *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
- if (*obj == NULL) {
- kfree(p);
- kfree(id);
- if (net_ratelimit())
- printk("OOM in bsalg (%d)\n", __LINE__);
- return 0;
- }
- memcpy((*obj)->syntax.uc, p, len);
+ kfree(id);
+ return 0;
+ }
+ memcpy((*obj)->syntax.ul, lp, len);
+ kfree(lp);
+ break;
+ case SNMP_IPADDR:
+ if (!asn1_octets_decode(ctx, end, &p, &len)) {
+ kfree(id);
+ return 0;
+ }
+ if (len != 4) {
kfree(p);
- break;
- case SNMP_COUNTER:
- case SNMP_GAUGE:
- case SNMP_TIMETICKS:
- len = sizeof(unsigned long);
- if (!asn1_ulong_decode(ctx, end, &ul)) {
- kfree(id);
- return 0;
- }
- *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
- if (*obj == NULL) {
- kfree(id);
- if (net_ratelimit())
- printk("OOM in bsalg (%d)\n", __LINE__);
- return 0;
- }
- (*obj)->syntax.ul[0] = ul;
- break;
- default:
kfree(id);
return 0;
+ }
+ *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
+ if (*obj == NULL) {
+ kfree(p);
+ kfree(id);
+ return 0;
+ }
+ memcpy((*obj)->syntax.uc, p, len);
+ kfree(p);
+ break;
+ case SNMP_COUNTER:
+ case SNMP_GAUGE:
+ case SNMP_TIMETICKS:
+ len = sizeof(unsigned long);
+ if (!asn1_ulong_decode(ctx, end, &ul)) {
+ kfree(id);
+ return 0;
+ }
+ *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC);
+ if (*obj == NULL) {
+ kfree(id);
+ return 0;
+ }
+ (*obj)->syntax.ul[0] = ul;
+ break;
+ default:
+ kfree(id);
+ return 0;
}
-
+
(*obj)->syntax_len = len;
(*obj)->type = type;
(*obj)->id = id;
(*obj)->id_len = idlen;
-
+
if (!asn1_eoc_decode(ctx, eoc)) {
kfree(id);
kfree(*obj);
@@ -831,181 +828,167 @@ static unsigned char snmp_object_decode(struct asn1_ctx *ctx,
}
static unsigned char snmp_request_decode(struct asn1_ctx *ctx,
- struct snmp_request *request)
+ struct snmp_request *request)
{
unsigned int cls, con, tag;
unsigned char *end;
-
+
if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
return 0;
-
+
if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
return 0;
-
+
if (!asn1_ulong_decode(ctx, end, &request->id))
return 0;
-
+
if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
return 0;
-
+
if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
return 0;
-
+
if (!asn1_uint_decode(ctx, end, &request->error_status))
return 0;
-
+
if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
return 0;
-
+
if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
return 0;
-
+
if (!asn1_uint_decode(ctx, end, &request->error_index))
return 0;
-
+
return 1;
}
-/*
+/*
* Fast checksum update for possibly oddly-aligned UDP byte, from the
* code example in the draft.
*/
-static void fast_csum(unsigned char *csum,
- const unsigned char *optr,
- const unsigned char *nptr,
- int odd)
+static void fast_csum(__sum16 *csum,
+ const unsigned char *optr,
+ const unsigned char *nptr,
+ int offset)
{
- long x, old, new;
-
- x = csum[0] * 256 + csum[1];
-
- x =~ x & 0xFFFF;
-
- if (odd) old = optr[0] * 256;
- else old = optr[0];
-
- x -= old & 0xFFFF;
- if (x <= 0) {
- x--;
- x &= 0xFFFF;
- }
-
- if (odd) new = nptr[0] * 256;
- else new = nptr[0];
-
- x += new & 0xFFFF;
- if (x & 0x10000) {
- x++;
- x &= 0xFFFF;
+ unsigned char s[4];
+
+ if (offset & 1) {
+ s[0] = ~0;
+ s[1] = ~*optr;
+ s[2] = 0;
+ s[3] = *nptr;
+ } else {
+ s[0] = ~*optr;
+ s[1] = ~0;
+ s[2] = *nptr;
+ s[3] = 0;
}
-
- x =~ x & 0xFFFF;
- csum[0] = x / 256;
- csum[1] = x & 0xFF;
+
+ *csum = csum_fold(csum_partial(s, 4, ~csum_unfold(*csum)));
}
-/*
+/*
* Mangle IP address.
* - begin points to the start of the snmp messgae
* - addr points to the start of the address
*/
static inline void mangle_address(unsigned char *begin,
- unsigned char *addr,
- const struct oct1_map *map,
- u_int16_t *check)
+ unsigned char *addr,
+ const struct oct1_map *map,
+ __sum16 *check)
{
- if (map->from == NOCT1(*addr)) {
+ if (map->from == NOCT1(addr)) {
u_int32_t old;
-
+
if (debug)
- memcpy(&old, (unsigned char *)addr, sizeof(old));
-
+ memcpy(&old, addr, sizeof(old));
+
*addr = map->to;
-
+
/* Update UDP checksum if being used */
if (*check) {
- unsigned char odd = !((addr - begin) % 2);
-
- fast_csum((unsigned char *)check,
- &map->from, &map->to, odd);
-
+ fast_csum(check,
+ &map->from, &map->to, addr - begin);
+
}
-
+
if (debug)
- printk(KERN_DEBUG "bsalg: mapped %u.%u.%u.%u to "
- "%u.%u.%u.%u\n", NIPQUAD(old), NIPQUAD(*addr));
+ printk(KERN_DEBUG "bsalg: mapped %pI4 to %pI4\n",
+ &old, addr);
}
}
static unsigned char snmp_trap_decode(struct asn1_ctx *ctx,
- struct snmp_v1_trap *trap,
- const struct oct1_map *map,
- u_int16_t *check)
+ struct snmp_v1_trap *trap,
+ const struct oct1_map *map,
+ __sum16 *check)
{
unsigned int cls, con, tag, len;
unsigned char *end;
if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
return 0;
-
+
if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_OJI)
return 0;
-
+
if (!asn1_oid_decode(ctx, end, &trap->id, &trap->id_len))
return 0;
-
+
if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
goto err_id_free;
if (!((cls == ASN1_APL && con == ASN1_PRI && tag == SNMP_IPA) ||
(cls == ASN1_UNI && con == ASN1_PRI && tag == ASN1_OTS)))
goto err_id_free;
-
+
if (!asn1_octets_decode(ctx, end, (unsigned char **)&trap->ip_address, &len))
goto err_id_free;
-
+
/* IPv4 only */
if (len != 4)
goto err_addr_free;
-
+
mangle_address(ctx->begin, ctx->pointer - 4, map, check);
-
+
if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
goto err_addr_free;
-
+
if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
goto err_addr_free;
-
+
if (!asn1_uint_decode(ctx, end, &trap->general))
goto err_addr_free;
-
+
if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
goto err_addr_free;
-
+
if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT)
goto err_addr_free;
-
+
if (!asn1_uint_decode(ctx, end, &trap->specific))
goto err_addr_free;
-
+
if (!asn1_header_decode(ctx, &end, &cls, &con, &tag))
goto err_addr_free;
-
+
if (!((cls == ASN1_APL && con == ASN1_PRI && tag == SNMP_TIT) ||
(cls == ASN1_UNI && con == ASN1_PRI && tag == ASN1_INT)))
goto err_addr_free;
-
+
if (!asn1_ulong_decode(ctx, end, &trap->time))
goto err_addr_free;
-
+
return 1;
+err_addr_free:
+ kfree((unsigned long *)trap->ip_address);
+
err_id_free:
kfree(trap->id);
-err_addr_free:
- kfree((unsigned long *)trap->ip_address);
-
return 0;
}
@@ -1015,10 +998,10 @@ err_addr_free:
*
*****************************************************************************/
-static void hex_dump(unsigned char *buf, size_t len)
+static void hex_dump(const unsigned char *buf, size_t len)
{
size_t i;
-
+
for (i = 0; i < len; i++) {
if (i && !(i % 16))
printk("\n");
@@ -1032,30 +1015,30 @@ static void hex_dump(unsigned char *buf, size_t len)
* (And this is the fucking 'basic' method).
*/
static int snmp_parse_mangle(unsigned char *msg,
- u_int16_t len,
- const struct oct1_map *map,
- u_int16_t *check)
+ u_int16_t len,
+ const struct oct1_map *map,
+ __sum16 *check)
{
unsigned char *eoc, *end;
unsigned int cls, con, tag, vers, pdutype;
struct asn1_ctx ctx;
struct asn1_octstr comm;
- struct snmp_object **obj;
-
+ struct snmp_object *obj;
+
if (debug > 1)
hex_dump(msg, len);
asn1_open(&ctx, msg, len);
-
- /*
+
+ /*
* Start of SNMP message.
*/
if (!asn1_header_decode(&ctx, &eoc, &cls, &con, &tag))
return 0;
if (cls != ASN1_UNI || con != ASN1_CON || tag != ASN1_SEQ)
return 0;
-
- /*
+
+ /*
* Version 1 or 2 handled.
*/
if (!asn1_header_decode(&ctx, &end, &cls, &con, &tag))
@@ -1068,7 +1051,7 @@ static int snmp_parse_mangle(unsigned char *msg,
printk(KERN_DEBUG "bsalg: snmp version: %u\n", vers + 1);
if (vers > 1)
return 1;
-
+
/*
* Community.
*/
@@ -1080,14 +1063,14 @@ static int snmp_parse_mangle(unsigned char *msg,
return 0;
if (debug > 1) {
unsigned int i;
-
+
printk(KERN_DEBUG "bsalg: community: ");
for (i = 0; i < comm.len; i++)
printk("%c", comm.data[i]);
printk("\n");
}
kfree(comm.data);
-
+
/*
* PDU type
*/
@@ -1096,7 +1079,7 @@ static int snmp_parse_mangle(unsigned char *msg,
if (cls != ASN1_CTX || con != ASN1_CON)
return 0;
if (debug > 1) {
- unsigned char *pdus[] = {
+ static const unsigned char *const pdus[] = {
[SNMP_PDU_GET] = "get",
[SNMP_PDU_NEXT] = "get-next",
[SNMP_PDU_RESPONSE] = "response",
@@ -1106,7 +1089,7 @@ static int snmp_parse_mangle(unsigned char *msg,
[SNMP_PDU_INFORM] = "inform",
[SNMP_PDU_TRAP2] = "trapv2"
};
-
+
if (pdutype > SNMP_PDU_TRAP2)
printk(KERN_DEBUG "bsalg: bad pdu type %u\n", pdutype);
else
@@ -1115,83 +1098,73 @@ static int snmp_parse_mangle(unsigned char *msg,
if (pdutype != SNMP_PDU_RESPONSE &&
pdutype != SNMP_PDU_TRAP1 && pdutype != SNMP_PDU_TRAP2)
return 1;
-
+
/*
* Request header or v1 trap
*/
if (pdutype == SNMP_PDU_TRAP1) {
struct snmp_v1_trap trap;
unsigned char ret = snmp_trap_decode(&ctx, &trap, map, check);
-
- /* Discard trap allocations regardless */
- kfree(trap.id);
- kfree((unsigned long *)trap.ip_address);
-
- if (!ret)
+
+ if (ret) {
+ kfree(trap.id);
+ kfree((unsigned long *)trap.ip_address);
+ } else
return ret;
-
+
} else {
struct snmp_request req;
-
+
if (!snmp_request_decode(&ctx, &req))
return 0;
-
+
if (debug > 1)
printk(KERN_DEBUG "bsalg: request: id=0x%lx error_status=%u "
"error_index=%u\n", req.id, req.error_status,
req.error_index);
}
-
+
/*
* Loop through objects, look for IP addresses to mangle.
*/
if (!asn1_header_decode(&ctx, &eoc, &cls, &con, &tag))
return 0;
-
+
if (cls != ASN1_UNI || con != ASN1_CON || tag != ASN1_SEQ)
return 0;
-
- obj = kmalloc(sizeof(struct snmp_object), GFP_ATOMIC);
- if (obj == NULL) {
- if (net_ratelimit())
- printk(KERN_WARNING "OOM in bsalg(%d)\n", __LINE__);
- return 0;
- }
while (!asn1_eoc_decode(&ctx, eoc)) {
unsigned int i;
-
- if (!snmp_object_decode(&ctx, obj)) {
- if (*obj) {
- kfree((*obj)->id);
- kfree(*obj);
- }
- kfree(obj);
+
+ if (!snmp_object_decode(&ctx, &obj)) {
+ if (obj) {
+ kfree(obj->id);
+ kfree(obj);
+ }
return 0;
}
if (debug > 1) {
printk(KERN_DEBUG "bsalg: object: ");
- for (i = 0; i < (*obj)->id_len; i++) {
+ for (i = 0; i < obj->id_len; i++) {
if (i > 0)
printk(".");
- printk("%lu", (*obj)->id[i]);
+ printk("%lu", obj->id[i]);
}
- printk(": type=%u\n", (*obj)->type);
-
+ printk(": type=%u\n", obj->type);
+
}
- if ((*obj)->type == SNMP_IPADDR)
+ if (obj->type == SNMP_IPADDR)
mangle_address(ctx.begin, ctx.pointer - 4 , map, check);
-
- kfree((*obj)->id);
- kfree(*obj);
+
+ kfree(obj->id);
+ kfree(obj);
}
- kfree(obj);
-
+
if (!asn1_eoc_decode(&ctx, eoc))
return 0;
-
+
return 1;
}
@@ -1201,15 +1174,15 @@ static int snmp_parse_mangle(unsigned char *msg,
*
*****************************************************************************/
-/*
+/*
* SNMP translation routine.
*/
-static int snmp_translate(struct ip_conntrack *ct,
- enum ip_conntrack_info ctinfo,
- struct sk_buff **pskb)
+static int snmp_translate(struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo,
+ struct sk_buff *skb)
{
- struct iphdr *iph = (*pskb)->nh.iph;
- struct udphdr *udph = (struct udphdr *)((u_int32_t *)iph + iph->ihl);
+ struct iphdr *iph = ip_hdr(skb);
+ struct udphdr *udph = (struct udphdr *)((__be32 *)iph + iph->ihl);
u_int16_t udplen = ntohs(udph->len);
u_int16_t paylen = udplen - sizeof(struct udphdr);
int dir = CTINFO2DIR(ctinfo);
@@ -1221,21 +1194,20 @@ static int snmp_translate(struct ip_conntrack *ct,
*/
if (dir == IP_CT_DIR_ORIGINAL) {
/* SNAT traps */
- map.from = NOCT1(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip);
- map.to = NOCT1(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip);
+ map.from = NOCT1(&ct->tuplehash[dir].tuple.src.u3.ip);
+ map.to = NOCT1(&ct->tuplehash[!dir].tuple.dst.u3.ip);
} else {
/* DNAT replies */
- map.from = NOCT1(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip);
- map.to = NOCT1(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip);
+ map.from = NOCT1(&ct->tuplehash[!dir].tuple.src.u3.ip);
+ map.to = NOCT1(&ct->tuplehash[dir].tuple.dst.u3.ip);
}
-
+
if (map.from == map.to)
return NF_ACCEPT;
-
+
if (!snmp_parse_mangle((unsigned char *)udph + sizeof(struct udphdr),
- paylen, &map, &udph->check)) {
- if (net_ratelimit())
- printk(KERN_WARNING "bsalg: parser failed\n");
+ paylen, &map, &udph->check)) {
+ net_warn_ratelimited("bsalg: parser failed\n");
return NF_DROP;
}
return NF_ACCEPT;
@@ -1243,76 +1215,69 @@ static int snmp_translate(struct ip_conntrack *ct,
/* We don't actually set up expectations, just adjust internal IP
* addresses if this is being NATted */
-static int help(struct sk_buff **pskb,
- struct ip_conntrack *ct,
+static int help(struct sk_buff *skb, unsigned int protoff,
+ struct nf_conn *ct,
enum ip_conntrack_info ctinfo)
{
int dir = CTINFO2DIR(ctinfo);
unsigned int ret;
- struct iphdr *iph = (*pskb)->nh.iph;
- struct udphdr *udph = (struct udphdr *)((u_int32_t *)iph + iph->ihl);
+ const struct iphdr *iph = ip_hdr(skb);
+ const struct udphdr *udph = (struct udphdr *)((__be32 *)iph + iph->ihl);
/* SNMP replies and originating SNMP traps get mangled */
- if (udph->source == ntohs(SNMP_PORT) && dir != IP_CT_DIR_REPLY)
+ if (udph->source == htons(SNMP_PORT) && dir != IP_CT_DIR_REPLY)
return NF_ACCEPT;
- if (udph->dest == ntohs(SNMP_TRAP_PORT) && dir != IP_CT_DIR_ORIGINAL)
+ if (udph->dest == htons(SNMP_TRAP_PORT) && dir != IP_CT_DIR_ORIGINAL)
return NF_ACCEPT;
/* No NAT? */
if (!(ct->status & IPS_NAT_MASK))
return NF_ACCEPT;
- /*
+ /*
* Make sure the packet length is ok. So far, we were only guaranteed
* to have a valid length IP header plus 8 bytes, which means we have
* enough room for a UDP header. Just verify the UDP length field so we
* can mess around with the payload.
*/
- if (ntohs(udph->len) != (*pskb)->len - (iph->ihl << 2)) {
- if (net_ratelimit())
- printk(KERN_WARNING "SNMP: dropping malformed packet "
- "src=%u.%u.%u.%u dst=%u.%u.%u.%u\n",
- NIPQUAD(iph->saddr), NIPQUAD(iph->daddr));
+ if (ntohs(udph->len) != skb->len - (iph->ihl << 2)) {
+ net_warn_ratelimited("SNMP: dropping malformed packet src=%pI4 dst=%pI4\n",
+ &iph->saddr, &iph->daddr);
return NF_DROP;
}
- if (!skb_make_writable(pskb, (*pskb)->len))
+ if (!skb_make_writable(skb, skb->len))
return NF_DROP;
spin_lock_bh(&snmp_lock);
- ret = snmp_translate(ct, ctinfo, pskb);
+ ret = snmp_translate(ct, ctinfo, skb);
spin_unlock_bh(&snmp_lock);
return ret;
}
-static struct ip_conntrack_helper snmp_helper = {
- .max_expected = 0,
- .timeout = 180,
- .me = THIS_MODULE,
- .help = help,
- .name = "snmp",
-
- .tuple = { .src = { .u = { __constant_htons(SNMP_PORT) } },
- .dst = { .protonum = IPPROTO_UDP },
- },
- .mask = { .src = { .u = { 0xFFFF } },
- .dst = { .protonum = 0xFF },
- },
+static const struct nf_conntrack_expect_policy snmp_exp_policy = {
+ .max_expected = 0,
+ .timeout = 180,
+};
+
+static struct nf_conntrack_helper snmp_helper __read_mostly = {
+ .me = THIS_MODULE,
+ .help = help,
+ .expect_policy = &snmp_exp_policy,
+ .name = "snmp",
+ .tuple.src.l3num = AF_INET,
+ .tuple.src.u.udp.port = cpu_to_be16(SNMP_PORT),
+ .tuple.dst.protonum = IPPROTO_UDP,
};
-static struct ip_conntrack_helper snmp_trap_helper = {
- .max_expected = 0,
- .timeout = 180,
- .me = THIS_MODULE,
- .help = help,
- .name = "snmp_trap",
-
- .tuple = { .src = { .u = { __constant_htons(SNMP_TRAP_PORT) } },
- .dst = { .protonum = IPPROTO_UDP },
- },
- .mask = { .src = { .u = { 0xFFFF } },
- .dst = { .protonum = 0xFF },
- },
+static struct nf_conntrack_helper snmp_trap_helper __read_mostly = {
+ .me = THIS_MODULE,
+ .help = help,
+ .expect_policy = &snmp_exp_policy,
+ .name = "snmp_trap",
+ .tuple.src.l3num = AF_INET,
+ .tuple.src.u.udp.port = cpu_to_be16(SNMP_TRAP_PORT),
+ .tuple.dst.protonum = IPPROTO_UDP,
};
/*****************************************************************************
@@ -1320,29 +1285,29 @@ static struct ip_conntrack_helper snmp_trap_helper = {
* Module stuff.
*
*****************************************************************************/
-
-static int __init init(void)
+
+static int __init nf_nat_snmp_basic_init(void)
{
int ret = 0;
- ret = ip_conntrack_helper_register(&snmp_helper);
- if (ret < 0)
- return ret;
- ret = ip_conntrack_helper_register(&snmp_trap_helper);
+ BUG_ON(nf_nat_snmp_hook != NULL);
+ RCU_INIT_POINTER(nf_nat_snmp_hook, help);
+
+ ret = nf_conntrack_helper_register(&snmp_trap_helper);
if (ret < 0) {
- ip_conntrack_helper_unregister(&snmp_helper);
+ nf_conntrack_helper_unregister(&snmp_helper);
return ret;
}
return ret;
}
-static void __exit fini(void)
+static void __exit nf_nat_snmp_basic_fini(void)
{
- ip_conntrack_helper_unregister(&snmp_helper);
- ip_conntrack_helper_unregister(&snmp_trap_helper);
+ RCU_INIT_POINTER(nf_nat_snmp_hook, NULL);
+ nf_conntrack_helper_unregister(&snmp_trap_helper);
}
-module_init(init);
-module_exit(fini);
+module_init(nf_nat_snmp_basic_init);
+module_exit(nf_nat_snmp_basic_fini);
-module_param(debug, bool, 0600);
+module_param(debug, int, 0600);
diff --git a/net/ipv4/netfilter/nf_tables_arp.c b/net/ipv4/netfilter/nf_tables_arp.c
new file mode 100644
index 00000000000..19412a4063f
--- /dev/null
+++ b/net/ipv4/netfilter/nf_tables_arp.c
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2008-2010 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2013 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/netfilter_arp.h>
+#include <net/netfilter/nf_tables.h>
+
+static unsigned int
+nft_do_chain_arp(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ struct nft_pktinfo pkt;
+
+ nft_set_pktinfo(&pkt, ops, skb, in, out);
+
+ return nft_do_chain(&pkt, ops);
+}
+
+static struct nft_af_info nft_af_arp __read_mostly = {
+ .family = NFPROTO_ARP,
+ .nhooks = NF_ARP_NUMHOOKS,
+ .owner = THIS_MODULE,
+ .nops = 1,
+ .hooks = {
+ [NF_ARP_IN] = nft_do_chain_arp,
+ [NF_ARP_OUT] = nft_do_chain_arp,
+ [NF_ARP_FORWARD] = nft_do_chain_arp,
+ },
+};
+
+static int nf_tables_arp_init_net(struct net *net)
+{
+ net->nft.arp = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL);
+ if (net->nft.arp== NULL)
+ return -ENOMEM;
+
+ memcpy(net->nft.arp, &nft_af_arp, sizeof(nft_af_arp));
+
+ if (nft_register_afinfo(net, net->nft.arp) < 0)
+ goto err;
+
+ return 0;
+err:
+ kfree(net->nft.arp);
+ return -ENOMEM;
+}
+
+static void nf_tables_arp_exit_net(struct net *net)
+{
+ nft_unregister_afinfo(net->nft.arp);
+ kfree(net->nft.arp);
+}
+
+static struct pernet_operations nf_tables_arp_net_ops = {
+ .init = nf_tables_arp_init_net,
+ .exit = nf_tables_arp_exit_net,
+};
+
+static const struct nf_chain_type filter_arp = {
+ .name = "filter",
+ .type = NFT_CHAIN_T_DEFAULT,
+ .family = NFPROTO_ARP,
+ .owner = THIS_MODULE,
+ .hook_mask = (1 << NF_ARP_IN) |
+ (1 << NF_ARP_OUT) |
+ (1 << NF_ARP_FORWARD),
+};
+
+static int __init nf_tables_arp_init(void)
+{
+ int ret;
+
+ nft_register_chain_type(&filter_arp);
+ ret = register_pernet_subsys(&nf_tables_arp_net_ops);
+ if (ret < 0)
+ nft_unregister_chain_type(&filter_arp);
+
+ return ret;
+}
+
+static void __exit nf_tables_arp_exit(void)
+{
+ unregister_pernet_subsys(&nf_tables_arp_net_ops);
+ nft_unregister_chain_type(&filter_arp);
+}
+
+module_init(nf_tables_arp_init);
+module_exit(nf_tables_arp_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_FAMILY(3); /* NFPROTO_ARP */
diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c
new file mode 100644
index 00000000000..6820c8c4084
--- /dev/null
+++ b/net/ipv4/netfilter/nf_tables_ipv4.c
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2012-2013 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/ip.h>
+#include <linux/netfilter_ipv4.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/net_namespace.h>
+#include <net/ip.h>
+#include <net/netfilter/nf_tables_ipv4.h>
+
+static unsigned int nft_do_chain_ipv4(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ struct nft_pktinfo pkt;
+
+ nft_set_pktinfo_ipv4(&pkt, ops, skb, in, out);
+
+ return nft_do_chain(&pkt, ops);
+}
+
+static unsigned int nft_ipv4_output(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ if (unlikely(skb->len < sizeof(struct iphdr) ||
+ ip_hdr(skb)->ihl < sizeof(struct iphdr) / 4)) {
+ if (net_ratelimit())
+ pr_info("nf_tables_ipv4: ignoring short SOCK_RAW "
+ "packet\n");
+ return NF_ACCEPT;
+ }
+
+ return nft_do_chain_ipv4(ops, skb, in, out, okfn);
+}
+
+struct nft_af_info nft_af_ipv4 __read_mostly = {
+ .family = NFPROTO_IPV4,
+ .nhooks = NF_INET_NUMHOOKS,
+ .owner = THIS_MODULE,
+ .nops = 1,
+ .hooks = {
+ [NF_INET_LOCAL_IN] = nft_do_chain_ipv4,
+ [NF_INET_LOCAL_OUT] = nft_ipv4_output,
+ [NF_INET_FORWARD] = nft_do_chain_ipv4,
+ [NF_INET_PRE_ROUTING] = nft_do_chain_ipv4,
+ [NF_INET_POST_ROUTING] = nft_do_chain_ipv4,
+ },
+};
+EXPORT_SYMBOL_GPL(nft_af_ipv4);
+
+static int nf_tables_ipv4_init_net(struct net *net)
+{
+ net->nft.ipv4 = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL);
+ if (net->nft.ipv4 == NULL)
+ return -ENOMEM;
+
+ memcpy(net->nft.ipv4, &nft_af_ipv4, sizeof(nft_af_ipv4));
+
+ if (nft_register_afinfo(net, net->nft.ipv4) < 0)
+ goto err;
+
+ return 0;
+err:
+ kfree(net->nft.ipv4);
+ return -ENOMEM;
+}
+
+static void nf_tables_ipv4_exit_net(struct net *net)
+{
+ nft_unregister_afinfo(net->nft.ipv4);
+ kfree(net->nft.ipv4);
+}
+
+static struct pernet_operations nf_tables_ipv4_net_ops = {
+ .init = nf_tables_ipv4_init_net,
+ .exit = nf_tables_ipv4_exit_net,
+};
+
+static const struct nf_chain_type filter_ipv4 = {
+ .name = "filter",
+ .type = NFT_CHAIN_T_DEFAULT,
+ .family = NFPROTO_IPV4,
+ .owner = THIS_MODULE,
+ .hook_mask = (1 << NF_INET_LOCAL_IN) |
+ (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_FORWARD) |
+ (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_POST_ROUTING),
+};
+
+static int __init nf_tables_ipv4_init(void)
+{
+ int ret;
+
+ nft_register_chain_type(&filter_ipv4);
+ ret = register_pernet_subsys(&nf_tables_ipv4_net_ops);
+ if (ret < 0)
+ nft_unregister_chain_type(&filter_ipv4);
+
+ return ret;
+}
+
+static void __exit nf_tables_ipv4_exit(void)
+{
+ unregister_pernet_subsys(&nf_tables_ipv4_net_ops);
+ nft_unregister_chain_type(&filter_ipv4);
+}
+
+module_init(nf_tables_ipv4_init);
+module_exit(nf_tables_ipv4_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_FAMILY(AF_INET);
diff --git a/net/ipv4/netfilter/nft_chain_nat_ipv4.c b/net/ipv4/netfilter/nft_chain_nat_ipv4.c
new file mode 100644
index 00000000000..3964157d826
--- /dev/null
+++ b/net/ipv4/netfilter/nft_chain_nat_ipv4.c
@@ -0,0 +1,199 @@
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2012 Pablo Neira Ayuso <pablo@netfilter.org>
+ * Copyright (c) 2012 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_ipv4.h>
+#include <net/netfilter/nf_nat_l3proto.h>
+#include <net/ip.h>
+
+/*
+ * NAT chains
+ */
+
+static unsigned int nf_nat_fn(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+ struct nf_conn_nat *nat;
+ enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum);
+ struct nft_pktinfo pkt;
+ unsigned int ret;
+
+ if (ct == NULL || nf_ct_is_untracked(ct))
+ return NF_ACCEPT;
+
+ NF_CT_ASSERT(!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)));
+
+ nat = nf_ct_nat_ext_add(ct);
+ if (nat == NULL)
+ return NF_ACCEPT;
+
+ switch (ctinfo) {
+ case IP_CT_RELATED:
+ case IP_CT_RELATED + IP_CT_IS_REPLY:
+ if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
+ if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
+ ops->hooknum))
+ return NF_DROP;
+ else
+ return NF_ACCEPT;
+ }
+ /* Fall through */
+ case IP_CT_NEW:
+ if (nf_nat_initialized(ct, maniptype))
+ break;
+
+ nft_set_pktinfo_ipv4(&pkt, ops, skb, in, out);
+
+ ret = nft_do_chain(&pkt, ops);
+ if (ret != NF_ACCEPT)
+ return ret;
+ if (!nf_nat_initialized(ct, maniptype)) {
+ ret = nf_nat_alloc_null_binding(ct, ops->hooknum);
+ if (ret != NF_ACCEPT)
+ return ret;
+ }
+ default:
+ break;
+ }
+
+ return nf_nat_packet(ct, ctinfo, ops->hooknum, skb);
+}
+
+static unsigned int nf_nat_prerouting(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ __be32 daddr = ip_hdr(skb)->daddr;
+ unsigned int ret;
+
+ ret = nf_nat_fn(ops, skb, in, out, okfn);
+ if (ret != NF_DROP && ret != NF_STOLEN &&
+ ip_hdr(skb)->daddr != daddr) {
+ skb_dst_drop(skb);
+ }
+ return ret;
+}
+
+static unsigned int nf_nat_postrouting(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ enum ip_conntrack_info ctinfo __maybe_unused;
+ const struct nf_conn *ct __maybe_unused;
+ unsigned int ret;
+
+ ret = nf_nat_fn(ops, skb, in, out, okfn);
+#ifdef CONFIG_XFRM
+ if (ret != NF_DROP && ret != NF_STOLEN &&
+ (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+ if (ct->tuplehash[dir].tuple.src.u3.ip !=
+ ct->tuplehash[!dir].tuple.dst.u3.ip ||
+ ct->tuplehash[dir].tuple.src.u.all !=
+ ct->tuplehash[!dir].tuple.dst.u.all)
+ return nf_xfrm_me_harder(skb, AF_INET) == 0 ?
+ ret : NF_DROP;
+ }
+#endif
+ return ret;
+}
+
+static unsigned int nf_nat_output(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ enum ip_conntrack_info ctinfo;
+ const struct nf_conn *ct;
+ unsigned int ret;
+
+ ret = nf_nat_fn(ops, skb, in, out, okfn);
+ if (ret != NF_DROP && ret != NF_STOLEN &&
+ (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+ if (ct->tuplehash[dir].tuple.dst.u3.ip !=
+ ct->tuplehash[!dir].tuple.src.u3.ip) {
+ if (ip_route_me_harder(skb, RTN_UNSPEC))
+ ret = NF_DROP;
+ }
+#ifdef CONFIG_XFRM
+ else if (ct->tuplehash[dir].tuple.dst.u.all !=
+ ct->tuplehash[!dir].tuple.src.u.all)
+ if (nf_xfrm_me_harder(skb, AF_INET))
+ ret = NF_DROP;
+#endif
+ }
+ return ret;
+}
+
+static const struct nf_chain_type nft_chain_nat_ipv4 = {
+ .name = "nat",
+ .type = NFT_CHAIN_T_NAT,
+ .family = NFPROTO_IPV4,
+ .owner = THIS_MODULE,
+ .hook_mask = (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_POST_ROUTING) |
+ (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_LOCAL_IN),
+ .hooks = {
+ [NF_INET_PRE_ROUTING] = nf_nat_prerouting,
+ [NF_INET_POST_ROUTING] = nf_nat_postrouting,
+ [NF_INET_LOCAL_OUT] = nf_nat_output,
+ [NF_INET_LOCAL_IN] = nf_nat_fn,
+ },
+};
+
+static int __init nft_chain_nat_init(void)
+{
+ int err;
+
+ err = nft_register_chain_type(&nft_chain_nat_ipv4);
+ if (err < 0)
+ return err;
+
+ return 0;
+}
+
+static void __exit nft_chain_nat_exit(void)
+{
+ nft_unregister_chain_type(&nft_chain_nat_ipv4);
+}
+
+module_init(nft_chain_nat_init);
+module_exit(nft_chain_nat_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_CHAIN(AF_INET, "nat");
diff --git a/net/ipv4/netfilter/nft_chain_route_ipv4.c b/net/ipv4/netfilter/nft_chain_route_ipv4.c
new file mode 100644
index 00000000000..125b66766c0
--- /dev/null
+++ b/net/ipv4/netfilter/nft_chain_route_ipv4.c
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2012 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_ipv4.h>
+#include <net/route.h>
+#include <net/ip.h>
+
+static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ unsigned int ret;
+ struct nft_pktinfo pkt;
+ u32 mark;
+ __be32 saddr, daddr;
+ u_int8_t tos;
+ const struct iphdr *iph;
+
+ /* root is playing with raw sockets. */
+ if (skb->len < sizeof(struct iphdr) ||
+ ip_hdrlen(skb) < sizeof(struct iphdr))
+ return NF_ACCEPT;
+
+ nft_set_pktinfo_ipv4(&pkt, ops, skb, in, out);
+
+ mark = skb->mark;
+ iph = ip_hdr(skb);
+ saddr = iph->saddr;
+ daddr = iph->daddr;
+ tos = iph->tos;
+
+ ret = nft_do_chain(&pkt, ops);
+ if (ret != NF_DROP && ret != NF_QUEUE) {
+ iph = ip_hdr(skb);
+
+ if (iph->saddr != saddr ||
+ iph->daddr != daddr ||
+ skb->mark != mark ||
+ iph->tos != tos)
+ if (ip_route_me_harder(skb, RTN_UNSPEC))
+ ret = NF_DROP;
+ }
+ return ret;
+}
+
+static const struct nf_chain_type nft_chain_route_ipv4 = {
+ .name = "route",
+ .type = NFT_CHAIN_T_ROUTE,
+ .family = NFPROTO_IPV4,
+ .owner = THIS_MODULE,
+ .hook_mask = (1 << NF_INET_LOCAL_OUT),
+ .hooks = {
+ [NF_INET_LOCAL_OUT] = nf_route_table_hook,
+ },
+};
+
+static int __init nft_chain_route_init(void)
+{
+ return nft_register_chain_type(&nft_chain_route_ipv4);
+}
+
+static void __exit nft_chain_route_exit(void)
+{
+ nft_unregister_chain_type(&nft_chain_route_ipv4);
+}
+
+module_init(nft_chain_route_init);
+module_exit(nft_chain_route_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_CHAIN(AF_INET, "route");
diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c
new file mode 100644
index 00000000000..e79718a382f
--- /dev/null
+++ b/net/ipv4/netfilter/nft_reject_ipv4.c
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2013 Eric Leblond <eric@regit.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/icmp.h>
+#include <net/netfilter/ipv4/nf_reject.h>
+#include <net/netfilter/nft_reject.h>
+
+void nft_reject_ipv4_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_reject *priv = nft_expr_priv(expr);
+
+ switch (priv->type) {
+ case NFT_REJECT_ICMP_UNREACH:
+ nf_send_unreach(pkt->skb, priv->icmp_code);
+ break;
+ case NFT_REJECT_TCP_RST:
+ nf_send_reset(pkt->skb, pkt->ops->hooknum);
+ break;
+ }
+
+ data[NFT_REG_VERDICT].verdict = NF_DROP;
+}
+EXPORT_SYMBOL_GPL(nft_reject_ipv4_eval);
+
+static struct nft_expr_type nft_reject_ipv4_type;
+static const struct nft_expr_ops nft_reject_ipv4_ops = {
+ .type = &nft_reject_ipv4_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_reject)),
+ .eval = nft_reject_ipv4_eval,
+ .init = nft_reject_init,
+ .dump = nft_reject_dump,
+};
+
+static struct nft_expr_type nft_reject_ipv4_type __read_mostly = {
+ .family = NFPROTO_IPV4,
+ .name = "reject",
+ .ops = &nft_reject_ipv4_ops,
+ .policy = nft_reject_policy,
+ .maxattr = NFTA_REJECT_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_reject_ipv4_module_init(void)
+{
+ return nft_register_expr(&nft_reject_ipv4_type);
+}
+
+static void __exit nft_reject_ipv4_module_exit(void)
+{
+ nft_unregister_expr(&nft_reject_ipv4_type);
+}
+
+module_init(nft_reject_ipv4_module_init);
+module_exit(nft_reject_ipv4_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "reject");