diff options
Diffstat (limited to 'net/ipv4/netfilter')
84 files changed, 8769 insertions, 21865 deletions
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index db783036e4d..a26ce035e3f 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -5,9 +5,15 @@ menu "IP: Netfilter Configuration" depends on INET && NETFILTER +config NF_DEFRAG_IPV4 + tristate + default n + config NF_CONNTRACK_IPV4 - tristate "IPv4 support for new connection tracking (EXPERIMENTAL)" - depends on EXPERIMENTAL && NF_CONNTRACK + tristate "IPv4 connection tracking support (required for NAT)" + depends on NF_CONNTRACK + default m if NETFILTER_ADVANCED=n + select NF_DEFRAG_IPV4 ---help--- Connection tracking keeps a record of what packets have passed through your machine, in order to figure out how they are related @@ -19,170 +25,57 @@ config NF_CONNTRACK_IPV4 To compile it as a module, choose M here. If unsure, say N. -# connection tracking, helpers and protocols -config IP_NF_CONNTRACK - tristate "Connection tracking (required for masq/NAT)" - ---help--- - Connection tracking keeps a record of what packets have passed - through your machine, in order to figure out how they are related - into connections. - - This is required to do Masquerading or other kinds of Network - Address Translation (except for Fast NAT). It can also be used to - enhance packet filtering (see `Connection state match support' - below). - - To compile it as a module, choose M here. If unsure, say N. - -config IP_NF_CT_ACCT - bool "Connection tracking flow accounting" - depends on IP_NF_CONNTRACK - help - If this option is enabled, the connection tracking code will - keep per-flow packet and byte counters. - - Those counters can be used for flow-based accounting or the - `connbytes' match. - - If unsure, say `N'. - -config IP_NF_CONNTRACK_MARK - bool 'Connection mark tracking support' - depends on IP_NF_CONNTRACK - help - This option enables support for connection marks, used by the - `CONNMARK' target and `connmark' match. Similar to the mark value - of packets, but this mark value is kept in the conntrack session - instead of the individual packets. - -config IP_NF_CONNTRACK_EVENTS - bool "Connection tracking events (EXPERIMENTAL)" - depends on EXPERIMENTAL && IP_NF_CONNTRACK - help - If this option is enabled, the connection tracking code will - provide a notifier chain that can be used by other kernel code - to get notified about changes in the connection tracking state. - - IF unsure, say `N'. - -config IP_NF_CONNTRACK_NETLINK - tristate 'Connection tracking netlink interface (EXPERIMENTAL)' - depends on EXPERIMENTAL && IP_NF_CONNTRACK && NETFILTER_NETLINK - depends on IP_NF_CONNTRACK!=y || NETFILTER_NETLINK!=m - help - This option enables support for a netlink-based userspace interface - - -config IP_NF_CT_PROTO_SCTP - tristate 'SCTP protocol connection tracking support (EXPERIMENTAL)' - depends on IP_NF_CONNTRACK && EXPERIMENTAL +config NF_CONNTRACK_PROC_COMPAT + bool "proc/sysctl compatibility with old connection tracking" + depends on NF_CONNTRACK_PROCFS && NF_CONNTRACK_IPV4 + default y help - With this option enabled, the connection tracking code will - be able to do state tracking on SCTP connections. + This option enables /proc and sysctl compatibility with the old + layer 3 dependent connection tracking. This is needed to keep + old programs that have not been adapted to the new names working. - If you want to compile it as a module, say M here and read - <file:Documentation/modules.txt>. If unsure, say `N'. + If unsure, say Y. -config IP_NF_FTP - tristate "FTP protocol support" - depends on IP_NF_CONNTRACK +config NF_TABLES_IPV4 + depends on NF_TABLES + tristate "IPv4 nf_tables support" help - Tracking FTP connections is problematic: special helpers are - required for tracking them, and doing masquerading and other forms - of Network Address Translation on them. + This option enables the IPv4 support for nf_tables. - To compile it as a module, choose M here. If unsure, say Y. - -config IP_NF_IRC - tristate "IRC protocol support" - depends on IP_NF_CONNTRACK - ---help--- - There is a commonly-used extension to IRC called - Direct Client-to-Client Protocol (DCC). This enables users to send - files to each other, and also chat to each other without the need - of a server. DCC Sending is used anywhere you send files over IRC, - and DCC Chat is most commonly used by Eggdrop bots. If you are - using NAT, this extension will enable you to send files and initiate - chats. Note that you do NOT need this extension to get files or - have others initiate chats, or everything else in IRC. - - To compile it as a module, choose M here. If unsure, say Y. - -config IP_NF_NETBIOS_NS - tristate "NetBIOS name service protocol support (EXPERIMENTAL)" - depends on IP_NF_CONNTRACK && EXPERIMENTAL +config NFT_CHAIN_ROUTE_IPV4 + depends on NF_TABLES_IPV4 + tristate "IPv4 nf_tables route chain support" help - NetBIOS name service requests are sent as broadcast messages from an - unprivileged port and responded to with unicast messages to the - same port. This make them hard to firewall properly because connection - tracking doesn't deal with broadcasts. This helper tracks locally - originating NetBIOS name service requests and the corresponding - responses. It relies on correct IP address configuration, specifically - netmask and broadcast address. When properly configured, the output - of "ip address show" should look similar to this: - - $ ip -4 address show eth0 - 4: eth0: <BROADCAST,MULTICAST,UP> mtu 1500 qdisc pfifo_fast qlen 1000 - inet 172.16.2.252/24 brd 172.16.2.255 scope global eth0 - - To compile it as a module, choose M here. If unsure, say N. + This option enables the "route" chain for IPv4 in nf_tables. This + chain type is used to force packet re-routing after mangling header + fields such as the source, destination, type of service and + the packet mark. -config IP_NF_TFTP - tristate "TFTP protocol support" - depends on IP_NF_CONNTRACK +config NFT_CHAIN_NAT_IPV4 + depends on NF_TABLES_IPV4 + depends on NF_NAT_IPV4 && NFT_NAT + tristate "IPv4 nf_tables nat chain support" help - TFTP connection tracking helper, this is required depending - on how restrictive your ruleset is. - If you are using a tftp client behind -j SNAT or -j MASQUERADING - you will need this. - - To compile it as a module, choose M here. If unsure, say Y. + This option enables the "nat" chain for IPv4 in nf_tables. This + chain type is used to perform Network Address Translation (NAT) + packet transformations such as the source, destination address and + source and destination ports. -config IP_NF_AMANDA - tristate "Amanda backup protocol support" - depends on IP_NF_CONNTRACK - help - If you are running the Amanda backup package <http://www.amanda.org/> - on this machine or machines that will be MASQUERADED through this - machine, then you may want to enable this feature. This allows the - connection tracking and natting code to allow the sub-channels that - Amanda requires for communication of the backup data, messages and - index. - - To compile it as a module, choose M here. If unsure, say Y. - -config IP_NF_PPTP - tristate 'PPTP protocol support' - depends on IP_NF_CONNTRACK - help - This module adds support for PPTP (Point to Point Tunnelling - Protocol, RFC2637) connection tracking and NAT. - - If you are running PPTP sessions over a stateful firewall or NAT - box, you may want to enable this feature. - - Please note that not all PPTP modes of operation are supported yet. - For more info, read top of the file - net/ipv4/netfilter/ip_conntrack_pptp.c - - If you want to compile it as a module, say M here and read - Documentation/modules.txt. If unsure, say `N'. +config NFT_REJECT_IPV4 + depends on NF_TABLES_IPV4 + default NFT_REJECT + tristate -config IP_NF_QUEUE - tristate "IP Userspace queueing via NETLINK (OBSOLETE)" +config NF_TABLES_ARP + depends on NF_TABLES + tristate "ARP nf_tables support" help - Netfilter has the ability to queue packets to user space: the - netlink device can be used to access them using this driver. - - This option enables the old IPv4-only "ip_queue" implementation - which has been obsoleted by the new "nfnetlink_queue" code (see - CONFIG_NETFILTER_NETLINK_QUEUE). - - To compile it as a module, choose M here. If unsure, say N. + This option enables the ARP support for nf_tables. config IP_NF_IPTABLES tristate "IP tables support (required for filtering/masq/NAT)" - depends on NETFILTER_XTABLES + default m if NETFILTER_ADVANCED=n + select NETFILTER_XTABLES help iptables is a general, extensible packet identification framework. The packet filtering and full NAT (masquerading, port forwarding, @@ -191,132 +84,50 @@ config IP_NF_IPTABLES To compile it as a module, choose M here. If unsure, say N. -# The matches. -config IP_NF_MATCH_IPRANGE - tristate "IP range match support" - depends on IP_NF_IPTABLES - help - This option makes possible to match IP addresses against IP address - ranges. +if IP_NF_IPTABLES - To compile it as a module, choose M here. If unsure, say N. - -config IP_NF_MATCH_MULTIPORT - tristate "Multiple port match support" - depends on IP_NF_IPTABLES - help - Multiport matching allows you to match TCP or UDP packets based on - a series of source or destination ports: normally a rule can only - match a single range of ports. - - To compile it as a module, choose M here. If unsure, say N. - -config IP_NF_MATCH_TOS - tristate "TOS match support" - depends on IP_NF_IPTABLES - help - TOS matching allows you to match packets based on the Type Of - Service fields of the IP packet. - - To compile it as a module, choose M here. If unsure, say N. - -config IP_NF_MATCH_RECENT - tristate "recent match support" - depends on IP_NF_IPTABLES +# The matches. +config IP_NF_MATCH_AH + tristate '"ah" match support' + depends on NETFILTER_ADVANCED help - This match is used for creating one or many lists of recently - used addresses and then matching against that/those list(s). - - Short options are available by using 'iptables -m recent -h' - Official Website: <http://snowman.net/projects/ipt_recent/> + This match extension allows you to match a range of SPIs + inside AH header of IPSec packets. To compile it as a module, choose M here. If unsure, say N. config IP_NF_MATCH_ECN - tristate "ECN match support" - depends on IP_NF_IPTABLES - help - This option adds a `ECN' match, which allows you to match against - the IPv4 and TCP header ECN fields. - - To compile it as a module, choose M here. If unsure, say N. - -config IP_NF_MATCH_DSCP - tristate "DSCP match support" - depends on IP_NF_IPTABLES - help - This option adds a `DSCP' match, which allows you to match against - the IPv4 header DSCP field (DSCP codepoint). - - The DSCP codepoint can have any value between 0x0 and 0x4f. - - To compile it as a module, choose M here. If unsure, say N. + tristate '"ecn" match support' + depends on NETFILTER_ADVANCED + select NETFILTER_XT_MATCH_ECN + ---help--- + This is a backwards-compat option for the user's convenience + (e.g. when running oldconfig). It selects + CONFIG_NETFILTER_XT_MATCH_ECN. -config IP_NF_MATCH_AH_ESP - tristate "AH/ESP match support" - depends on IP_NF_IPTABLES - help - These two match extensions (`ah' and `esp') allow you to match a - range of SPIs inside AH or ESP headers of IPSec packets. +config IP_NF_MATCH_RPFILTER + tristate '"rpfilter" reverse path filter match support' + depends on NETFILTER_ADVANCED && (IP_NF_MANGLE || IP_NF_RAW) + ---help--- + This option allows you to match packets whose replies would + go out via the interface the packet came in. To compile it as a module, choose M here. If unsure, say N. + The module will be called ipt_rpfilter. config IP_NF_MATCH_TTL - tristate "TTL match support" - depends on IP_NF_IPTABLES - help - This adds CONFIG_IP_NF_MATCH_TTL option, which enabled the user - to match packets by their TTL value. - - To compile it as a module, choose M here. If unsure, say N. - -config IP_NF_MATCH_OWNER - tristate "Owner match support" - depends on IP_NF_IPTABLES - help - Packet owner matching allows you to match locally-generated packets - based on who created them: the user, group, process or session. - - To compile it as a module, choose M here. If unsure, say N. - -config IP_NF_MATCH_ADDRTYPE - tristate 'address type match support' - depends on IP_NF_IPTABLES - help - This option allows you to match what routing thinks of an address, - eg. UNICAST, LOCAL, BROADCAST, ... - - If you want to compile it as a module, say M here and read - <file:Documentation/modules.txt>. If unsure, say `N'. - -config IP_NF_MATCH_HASHLIMIT - tristate 'hashlimit match support' - depends on IP_NF_IPTABLES - help - This option adds a new iptables `hashlimit' match. - - As opposed to `limit', this match dynamically crates a hash table - of limit buckets, based on your selection of source/destination - ip addresses and/or ports. - - It enables you to express policies like `10kpps for any given - destination IP' or `500pps from any given source IP' with a single - IPtables rule. - -config IP_NF_MATCH_POLICY - tristate "IPsec policy match support" - depends on IP_NF_IPTABLES && XFRM - help - Policy matching allows you to match packets based on the - IPsec policy that was used during decapsulation/will - be used during encapsulation. - - To compile it as a module, choose M here. If unsure, say N. + tristate '"ttl" match support' + depends on NETFILTER_ADVANCED + select NETFILTER_XT_MATCH_HL + ---help--- + This is a backwards-compat option for the user's convenience + (e.g. when running oldconfig). It selects + CONFIG_NETFILTER_XT_MATCH_HL. # `filter', generic and specific targets config IP_NF_FILTER tristate "Packet filtering" - depends on IP_NF_IPTABLES + default m if NETFILTER_ADVANCED=n help Packet filtering defines a table `filter', which has a series of rules for simple packet filtering at local input, forwarding and @@ -327,6 +138,7 @@ config IP_NF_FILTER config IP_NF_TARGET_REJECT tristate "REJECT target support" depends on IP_NF_FILTER + default m if NETFILTER_ADVANCED=n help The REJECT target allows a filtering rule to specify that an ICMP error should be issued in response to an incoming packet, rather @@ -334,18 +146,22 @@ config IP_NF_TARGET_REJECT To compile it as a module, choose M here. If unsure, say N. -config IP_NF_TARGET_LOG - tristate "LOG target support" - depends on IP_NF_IPTABLES +config IP_NF_TARGET_SYNPROXY + tristate "SYNPROXY target support" + depends on NF_CONNTRACK && NETFILTER_ADVANCED + select NETFILTER_SYNPROXY + select SYN_COOKIES help - This option adds a `LOG' target, which allows you to create rules in - any iptables table which records the packet header to the syslog. + The SYNPROXY target allows you to intercept TCP connections and + establish them using syncookies before they are passed on to the + server. This allows to avoid conntrack and server resource usage + during SYN-flood attacks. - To compile it as a module, choose M here. If unsure, say N. + To compile it as a module, choose M here. If unsure, say N. config IP_NF_TARGET_ULOG - tristate "ULOG target support (OBSOLETE)" - depends on IP_NF_IPTABLES + tristate "ULOG target support (obsolete)" + default m if NETFILTER_ADVANCED=n ---help--- This option enables the old IPv4-only "ipt_ULOG" implementation @@ -357,56 +173,29 @@ config IP_NF_TARGET_ULOG daemon using netlink multicast sockets; unlike the LOG target which can only be viewed through syslog. - The apropriate userspace logging daemon (ulogd) may be obtained from - <http://www.gnumonks.org/projects/ulogd/> - - To compile it as a module, choose M here. If unsure, say N. - -config IP_NF_TARGET_TCPMSS - tristate "TCPMSS target support" - depends on IP_NF_IPTABLES - ---help--- - This option adds a `TCPMSS' target, which allows you to alter the - MSS value of TCP SYN packets, to control the maximum size for that - connection (usually limiting it to your outgoing interface's MTU - minus 40). - - This is used to overcome criminally braindead ISPs or servers which - block ICMP Fragmentation Needed packets. The symptoms of this - problem are that everything works fine from your Linux - firewall/router, but machines behind it can never exchange large - packets: - 1) Web browsers connect, then hang with no data received. - 2) Small mail works fine, but large emails hang. - 3) ssh works fine, but scp hangs after initial handshaking. - - Workaround: activate this option and add a rule to your firewall - configuration like: - - iptables -A FORWARD -p tcp --tcp-flags SYN,RST SYN \ - -j TCPMSS --clamp-mss-to-pmtu + The appropriate userspace logging daemon (ulogd) may be obtained from + <http://www.netfilter.org/projects/ulogd/index.html> To compile it as a module, choose M here. If unsure, say N. -# NAT + specific targets -config IP_NF_NAT - tristate "Full NAT" - depends on IP_NF_IPTABLES && IP_NF_CONNTRACK +# NAT + specific targets: nf_conntrack +config NF_NAT_IPV4 + tristate "IPv4 NAT" + depends on NF_CONNTRACK_IPV4 + default m if NETFILTER_ADVANCED=n + select NF_NAT help - The Full NAT option allows masquerading, port forwarding and other + The IPv4 NAT option allows masquerading, port forwarding and other forms of full Network Address Port Translation. It is controlled by the `nat' table in iptables: see the man page for iptables(8). To compile it as a module, choose M here. If unsure, say N. -config IP_NF_NAT_NEEDED - bool - depends on IP_NF_NAT != n - default y +if NF_NAT_IPV4 config IP_NF_TARGET_MASQUERADE tristate "MASQUERADE target support" - depends on IP_NF_NAT + default m if NETFILTER_ADVANCED=n help Masquerading is a special case of NAT: all outgoing connections are changed to seem to come from a particular interface's address, and @@ -416,40 +205,31 @@ config IP_NF_TARGET_MASQUERADE To compile it as a module, choose M here. If unsure, say N. -config IP_NF_TARGET_REDIRECT - tristate "REDIRECT target support" - depends on IP_NF_NAT - help - REDIRECT is a special case of NAT: all incoming connections are - mapped onto the incoming interface's address, causing the packets to - come to the local machine instead of passing through. This is - useful for transparent proxies. - - To compile it as a module, choose M here. If unsure, say N. - config IP_NF_TARGET_NETMAP tristate "NETMAP target support" - depends on IP_NF_NAT - help - NETMAP is an implementation of static 1:1 NAT mapping of network - addresses. It maps the network address part, while keeping the host - address part intact. It is similar to Fast NAT, except that - Netfilter's connection tracking doesn't work well with Fast NAT. - - To compile it as a module, choose M here. If unsure, say N. + depends on NETFILTER_ADVANCED + select NETFILTER_XT_TARGET_NETMAP + ---help--- + This is a backwards-compat option for the user's convenience + (e.g. when running oldconfig). It selects + CONFIG_NETFILTER_XT_TARGET_NETMAP. -config IP_NF_TARGET_SAME - tristate "SAME target support" - depends on IP_NF_NAT - help - This option adds a `SAME' target, which works like the standard SNAT - target, but attempts to give clients the same IP for all connections. +config IP_NF_TARGET_REDIRECT + tristate "REDIRECT target support" + depends on NETFILTER_ADVANCED + select NETFILTER_XT_TARGET_REDIRECT + ---help--- + This is a backwards-compat option for the user's convenience + (e.g. when running oldconfig). It selects + CONFIG_NETFILTER_XT_TARGET_REDIRECT. - To compile it as a module, choose M here. If unsure, say N. +endif -config IP_NF_NAT_SNMP_BASIC - tristate "Basic SNMP-ALG support (EXPERIMENTAL)" - depends on EXPERIMENTAL && IP_NF_NAT +config NF_NAT_SNMP_BASIC + tristate "Basic SNMP-ALG support" + depends on NF_CONNTRACK_SNMP && NF_NAT_IPV4 + depends on NETFILTER_ADVANCED + default NF_NAT && NF_CONNTRACK_SNMP ---help--- This module implements an Application Layer Gateway (ALG) for @@ -462,42 +242,33 @@ config IP_NF_NAT_SNMP_BASIC To compile it as a module, choose M here. If unsure, say N. -config IP_NF_NAT_IRC - tristate - depends on IP_NF_IPTABLES!=n && IP_NF_CONNTRACK!=n && IP_NF_NAT!=n - default IP_NF_NAT if IP_NF_IRC=y - default m if IP_NF_IRC=m - -# If they want FTP, set to $CONFIG_IP_NF_NAT (m or y), -# or $CONFIG_IP_NF_FTP (m or y), whichever is weaker. Argh. -config IP_NF_NAT_FTP - tristate - depends on IP_NF_IPTABLES!=n && IP_NF_CONNTRACK!=n && IP_NF_NAT!=n - default IP_NF_NAT if IP_NF_FTP=y - default m if IP_NF_FTP=m +# If they want FTP, set to $CONFIG_IP_NF_NAT (m or y), +# or $CONFIG_IP_NF_FTP (m or y), whichever is weaker. +# From kconfig-language.txt: +# +# <expr> '&&' <expr> (6) +# +# (6) Returns the result of min(/expr/, /expr/). -config IP_NF_NAT_TFTP +config NF_NAT_PROTO_GRE tristate - depends on IP_NF_IPTABLES!=n && IP_NF_CONNTRACK!=n && IP_NF_NAT!=n - default IP_NF_NAT if IP_NF_TFTP=y - default m if IP_NF_TFTP=m + depends on NF_NAT_IPV4 && NF_CT_PROTO_GRE -config IP_NF_NAT_AMANDA +config NF_NAT_PPTP tristate - depends on IP_NF_IPTABLES!=n && IP_NF_CONNTRACK!=n && IP_NF_NAT!=n - default IP_NF_NAT if IP_NF_AMANDA=y - default m if IP_NF_AMANDA=m + depends on NF_CONNTRACK && NF_NAT_IPV4 + default NF_NAT_IPV4 && NF_CONNTRACK_PPTP + select NF_NAT_PROTO_GRE -config IP_NF_NAT_PPTP +config NF_NAT_H323 tristate - depends on IP_NF_NAT!=n && IP_NF_PPTP!=n - default IP_NF_NAT if IP_NF_PPTP=y - default m if IP_NF_PPTP=m + depends on NF_CONNTRACK && NF_NAT_IPV4 + default NF_NAT_IPV4 && NF_CONNTRACK_H323 # mangle + specific targets config IP_NF_MANGLE tristate "Packet mangling" - depends on IP_NF_IPTABLES + default m if NETFILTER_ADVANCED=n help This option adds a `mangle' table to iptables: see the man page for iptables(8). This table is used for various packet alterations @@ -505,19 +276,23 @@ config IP_NF_MANGLE To compile it as a module, choose M here. If unsure, say N. -config IP_NF_TARGET_TOS - tristate "TOS target support" +config IP_NF_TARGET_CLUSTERIP + tristate "CLUSTERIP target support" depends on IP_NF_MANGLE + depends on NF_CONNTRACK_IPV4 + depends on NETFILTER_ADVANCED + select NF_CONNTRACK_MARK help - This option adds a `TOS' target, which allows you to create rules in - the `mangle' table which alter the Type Of Service field of an IP - packet prior to routing. - + The CLUSTERIP target allows you to build load-balancing clusters of + network servers without having a dedicated load-balancing + router/server/switch. + To compile it as a module, choose M here. If unsure, say N. config IP_NF_TARGET_ECN tristate "ECN target support" depends on IP_NF_MANGLE + depends on NETFILTER_ADVANCED ---help--- This option adds a `ECN' target, which can be used in the iptables mangle table. @@ -529,58 +304,44 @@ config IP_NF_TARGET_ECN To compile it as a module, choose M here. If unsure, say N. -config IP_NF_TARGET_DSCP - tristate "DSCP target support" - depends on IP_NF_MANGLE - help - This option adds a `DSCP' match, which allows you to match against - the IPv4 header DSCP field (DSCP codepoint). - - The DSCP codepoint can have any value between 0x0 and 0x4f. - - To compile it as a module, choose M here. If unsure, say N. - config IP_NF_TARGET_TTL - tristate 'TTL target support' - depends on IP_NF_MANGLE - help - This option adds a `TTL' target, which enables the user to modify - the TTL value of the IP header. - - While it is safe to decrement/lower the TTL, this target also enables - functionality to increment and set the TTL value of the IP header to - arbitrary values. This is EXTREMELY DANGEROUS since you can easily - create immortal packets that loop forever on the network. - - To compile it as a module, choose M here. If unsure, say N. - -config IP_NF_TARGET_CLUSTERIP - tristate "CLUSTERIP target support (EXPERIMENTAL)" - depends on IP_NF_MANGLE && EXPERIMENTAL - depends on (IP_NF_CONNTRACK && IP_NF_CONNTRACK_MARK) || (NF_CONNTRACK_MARK && NF_CONNTRACK_IPV4) - help - The CLUSTERIP target allows you to build load-balancing clusters of - network servers without having a dedicated load-balancing - router/server/switch. - - To compile it as a module, choose M here. If unsure, say N. + tristate '"TTL" target support' + depends on NETFILTER_ADVANCED && IP_NF_MANGLE + select NETFILTER_XT_TARGET_HL + ---help--- + This is a backwards-compatible option for the user's convenience + (e.g. when running oldconfig). It selects + CONFIG_NETFILTER_XT_TARGET_HL. # raw + specific targets config IP_NF_RAW tristate 'raw table support (required for NOTRACK/TRACE)' - depends on IP_NF_IPTABLES help This option adds a `raw' table to iptables. This table is the very first in the netfilter framework and hooks in at the PREROUTING and OUTPUT chains. If you want to compile it as a module, say M here and read - <file:Documentation/modules.txt>. If unsure, say `N'. + <file:Documentation/kbuild/modules.txt>. If unsure, say `N'. + +# security table for MAC policy +config IP_NF_SECURITY + tristate "Security table" + depends on SECURITY + depends on NETFILTER_ADVANCED + help + This option adds a `security' table to iptables, for use + with Mandatory Access Control (MAC) policy. + + If unsure, say N. + +endif # IP_NF_IPTABLES # ARP tables config IP_NF_ARPTABLES tristate "ARP tables support" - depends on NETFILTER_XTABLES + select NETFILTER_XTABLES + depends on NETFILTER_ADVANCED help arptables is a general, extensible packet identification framework. The ARP packet filtering and mangling (manipulation)subsystems @@ -588,9 +349,10 @@ config IP_NF_ARPTABLES To compile it as a module, choose M here. If unsure, say N. +if IP_NF_ARPTABLES + config IP_NF_ARPFILTER tristate "ARP packet filtering" - depends on IP_NF_ARPTABLES help ARP packet filtering defines a table `filter', which has a series of rules for simple ARP packet filtering at local input and @@ -601,10 +363,11 @@ config IP_NF_ARPFILTER config IP_NF_ARP_MANGLE tristate "ARP payload mangling" - depends on IP_NF_ARPTABLES help Allows altering the ARP packet payload: source and destination hardware and network addresses. +endif # IP_NF_ARPTABLES + endmenu diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index e5c5b3202f0..90b82405331 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -2,39 +2,36 @@ # Makefile for the netfilter modules on top of IPv4. # -# objects for the standalone - connection tracking / NAT -ip_conntrack-objs := ip_conntrack_standalone.o ip_conntrack_core.o ip_conntrack_proto_generic.o ip_conntrack_proto_tcp.o ip_conntrack_proto_udp.o ip_conntrack_proto_icmp.o -ip_nat-objs := ip_nat_core.o ip_nat_helper.o ip_nat_proto_unknown.o ip_nat_proto_tcp.o ip_nat_proto_udp.o ip_nat_proto_icmp.o -iptable_nat-objs := ip_nat_rule.o ip_nat_standalone.o - -ip_conntrack_pptp-objs := ip_conntrack_helper_pptp.o ip_conntrack_proto_gre.o -ip_nat_pptp-objs := ip_nat_helper_pptp.o ip_nat_proto_gre.o +# objects for l3 independent conntrack +nf_conntrack_ipv4-y := nf_conntrack_l3proto_ipv4.o nf_conntrack_proto_icmp.o +ifeq ($(CONFIG_NF_CONNTRACK_PROC_COMPAT),y) +ifeq ($(CONFIG_PROC_FS),y) +nf_conntrack_ipv4-objs += nf_conntrack_l3proto_ipv4_compat.o +endif +endif # connection tracking -obj-$(CONFIG_IP_NF_CONNTRACK) += ip_conntrack.o -obj-$(CONFIG_IP_NF_NAT) += ip_nat.o +obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o -# conntrack netlink interface -obj-$(CONFIG_IP_NF_CONNTRACK_NETLINK) += ip_conntrack_netlink.o +nf_nat_ipv4-y := nf_nat_l3proto_ipv4.o nf_nat_proto_icmp.o +obj-$(CONFIG_NF_NAT_IPV4) += nf_nat_ipv4.o +# defrag +obj-$(CONFIG_NF_DEFRAG_IPV4) += nf_defrag_ipv4.o -# SCTP protocol connection tracking -obj-$(CONFIG_IP_NF_CT_PROTO_SCTP) += ip_conntrack_proto_sctp.o +# NAT helpers (nf_conntrack) +obj-$(CONFIG_NF_NAT_H323) += nf_nat_h323.o +obj-$(CONFIG_NF_NAT_PPTP) += nf_nat_pptp.o +obj-$(CONFIG_NF_NAT_SNMP_BASIC) += nf_nat_snmp_basic.o -# connection tracking helpers -obj-$(CONFIG_IP_NF_PPTP) += ip_conntrack_pptp.o -obj-$(CONFIG_IP_NF_AMANDA) += ip_conntrack_amanda.o -obj-$(CONFIG_IP_NF_TFTP) += ip_conntrack_tftp.o -obj-$(CONFIG_IP_NF_FTP) += ip_conntrack_ftp.o -obj-$(CONFIG_IP_NF_IRC) += ip_conntrack_irc.o -obj-$(CONFIG_IP_NF_NETBIOS_NS) += ip_conntrack_netbios_ns.o +# NAT protocols (nf_nat) +obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o -# NAT helpers -obj-$(CONFIG_IP_NF_NAT_PPTP) += ip_nat_pptp.o -obj-$(CONFIG_IP_NF_NAT_AMANDA) += ip_nat_amanda.o -obj-$(CONFIG_IP_NF_NAT_TFTP) += ip_nat_tftp.o -obj-$(CONFIG_IP_NF_NAT_FTP) += ip_nat_ftp.o -obj-$(CONFIG_IP_NF_NAT_IRC) += ip_nat_irc.o +obj-$(CONFIG_NF_TABLES_IPV4) += nf_tables_ipv4.o +obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o +obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o +obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o +obj-$(CONFIG_NF_TABLES_ARP) += nf_tables_arp.o # generic IP tables obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o @@ -42,38 +39,21 @@ obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o # the three instances of ip_tables obj-$(CONFIG_IP_NF_FILTER) += iptable_filter.o obj-$(CONFIG_IP_NF_MANGLE) += iptable_mangle.o -obj-$(CONFIG_IP_NF_NAT) += iptable_nat.o +obj-$(CONFIG_NF_NAT_IPV4) += iptable_nat.o obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o +obj-$(CONFIG_IP_NF_SECURITY) += iptable_security.o # matches -obj-$(CONFIG_IP_NF_MATCH_HASHLIMIT) += ipt_hashlimit.o -obj-$(CONFIG_IP_NF_MATCH_IPRANGE) += ipt_iprange.o -obj-$(CONFIG_IP_NF_MATCH_MULTIPORT) += ipt_multiport.o -obj-$(CONFIG_IP_NF_MATCH_OWNER) += ipt_owner.o -obj-$(CONFIG_IP_NF_MATCH_TOS) += ipt_tos.o -obj-$(CONFIG_IP_NF_MATCH_RECENT) += ipt_recent.o -obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o -obj-$(CONFIG_IP_NF_MATCH_DSCP) += ipt_dscp.o -obj-$(CONFIG_IP_NF_MATCH_AH_ESP) += ipt_ah.o ipt_esp.o -obj-$(CONFIG_IP_NF_MATCH_TTL) += ipt_ttl.o -obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o -obj-$(CONFIG_IP_NF_MATCH_POLICY) += ipt_policy.o +obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o +obj-$(CONFIG_IP_NF_MATCH_RPFILTER) += ipt_rpfilter.o # targets -obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o -obj-$(CONFIG_IP_NF_TARGET_TOS) += ipt_TOS.o +obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o -obj-$(CONFIG_IP_NF_TARGET_DSCP) += ipt_DSCP.o obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o -obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o -obj-$(CONFIG_IP_NF_TARGET_NETMAP) += ipt_NETMAP.o -obj-$(CONFIG_IP_NF_TARGET_SAME) += ipt_SAME.o -obj-$(CONFIG_IP_NF_NAT_SNMP_BASIC) += ip_nat_snmp_basic.o -obj-$(CONFIG_IP_NF_TARGET_LOG) += ipt_LOG.o +obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o +obj-$(CONFIG_IP_NF_TARGET_SYNPROXY) += ipt_SYNPROXY.o obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o -obj-$(CONFIG_IP_NF_TARGET_TCPMSS) += ipt_TCPMSS.o -obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o -obj-$(CONFIG_IP_NF_TARGET_TTL) += ipt_TTL.o # generic ARP tables obj-$(CONFIG_IP_NF_ARPTABLES) += arp_tables.o @@ -81,11 +61,3 @@ obj-$(CONFIG_IP_NF_ARP_MANGLE) += arpt_mangle.o # just filtering instance of ARP tables for now obj-$(CONFIG_IP_NF_ARPFILTER) += arptable_filter.o - -obj-$(CONFIG_IP_NF_QUEUE) += ip_queue.o - -# objects for l3 independent conntrack -nf_conntrack_ipv4-objs := nf_conntrack_l3proto_ipv4.o nf_conntrack_proto_icmp.o - -# l3 independent conntrack -obj-$(CONFIG_NF_CONNTRACK_IPV4) += nf_conntrack_ipv4.o diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index afe3d8f8177..f95b6f93814 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -6,10 +6,10 @@ * Some ARP specific bits are: * * Copyright (C) 2002 David S. Miller (davem@redhat.com) + * Copyright (C) 2006-2009 Patrick McHardy <kaber@trash.net> * */ - -#include <linux/config.h> +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/kernel.h> #include <linux/skbuff.h> #include <linux/netdevice.h> @@ -20,12 +20,15 @@ #include <linux/proc_fs.h> #include <linux/module.h> #include <linux/init.h> - +#include <linux/mutex.h> +#include <linux/err.h> +#include <net/compat.h> +#include <net/sock.h> #include <asm/uaccess.h> -#include <asm/semaphore.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter_arp/arp_tables.h> +#include "../../netfilter/xt_repldata.h" MODULE_LICENSE("GPL"); MODULE_AUTHOR("David S. Miller <davem@redhat.com>"); @@ -47,20 +50,19 @@ MODULE_DESCRIPTION("arptables core"); #endif #ifdef CONFIG_NETFILTER_DEBUG -#define ARP_NF_ASSERT(x) \ -do { \ - if (!(x)) \ - printk("ARP_NF_ASSERT: %s:%s:%u\n", \ - __FUNCTION__, __FILE__, __LINE__); \ -} while(0) +#define ARP_NF_ASSERT(x) WARN_ON(!(x)) #else #define ARP_NF_ASSERT(x) #endif -#include <linux/netfilter_ipv4/listhelp.h> +void *arpt_alloc_initial_table(const struct xt_table *info) +{ + return xt_alloc_initial_table(arpt, ARPT); +} +EXPORT_SYMBOL_GPL(arpt_alloc_initial_table); static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap, - char *hdr_addr, int len) + const char *hdr_addr, int len) { int i, ret; @@ -71,7 +73,29 @@ static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap, for (i = 0; i < len; i++) ret |= (hdr_addr[i] ^ ap->addr[i]) & ap->mask[i]; - return (ret != 0); + return ret != 0; +} + +/* + * Unfortunately, _b and _mask are not aligned to an int (or long int) + * Some arches dont care, unrolling the loop is a win on them. + * For other arches, we only have a 16bit alignement. + */ +static unsigned long ifname_compare(const char *_a, const char *_b, const char *_mask) +{ +#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS + unsigned long ret = ifname_compare_aligned(_a, _b, _mask); +#else + unsigned long ret = 0; + const u16 *a = (const u16 *)_a; + const u16 *b = (const u16 *)_b; + const u16 *mask = (const u16 *)_mask; + int i; + + for (i = 0; i < IFNAMSIZ/sizeof(u16); i++) + ret |= (a[i] ^ b[i]) & mask[i]; +#endif + return ret; } /* Returns whether packet matches rule or not. */ @@ -81,12 +105,12 @@ static inline int arp_packet_match(const struct arphdr *arphdr, const char *outdev, const struct arpt_arp *arpinfo) { - char *arpptr = (char *)(arphdr + 1); - char *src_devaddr, *tgt_devaddr; - u32 src_ipaddr, tgt_ipaddr; - int i, ret; + const char *arpptr = (char *)(arphdr + 1); + const char *src_devaddr, *tgt_devaddr; + __be32 src_ipaddr, tgt_ipaddr; + long ret; -#define FWINV(bool,invflg) ((bool) ^ !!(arpinfo->invflags & invflg)) +#define FWINV(bool, invflg) ((bool) ^ !!(arpinfo->invflags & (invflg))) if (FWINV((arphdr->ar_op & arpinfo->arpop_mask) != arpinfo->arpop, ARPT_INV_ARPOP)) { @@ -143,24 +167,21 @@ static inline int arp_packet_match(const struct arphdr *arphdr, ARPT_INV_TGTIP)) { dprintf("Source or target IP address mismatch.\n"); - dprintf("SRC: %u.%u.%u.%u. Mask: %u.%u.%u.%u. Target: %u.%u.%u.%u.%s\n", - NIPQUAD(src_ipaddr), - NIPQUAD(arpinfo->smsk.s_addr), - NIPQUAD(arpinfo->src.s_addr), + dprintf("SRC: %pI4. Mask: %pI4. Target: %pI4.%s\n", + &src_ipaddr, + &arpinfo->smsk.s_addr, + &arpinfo->src.s_addr, arpinfo->invflags & ARPT_INV_SRCIP ? " (INV)" : ""); - dprintf("TGT: %u.%u.%u.%u Mask: %u.%u.%u.%u Target: %u.%u.%u.%u.%s\n", - NIPQUAD(tgt_ipaddr), - NIPQUAD(arpinfo->tmsk.s_addr), - NIPQUAD(arpinfo->tgt.s_addr), + dprintf("TGT: %pI4 Mask: %pI4 Target: %pI4.%s\n", + &tgt_ipaddr, + &arpinfo->tmsk.s_addr, + &arpinfo->tgt.s_addr, arpinfo->invflags & ARPT_INV_TGTIP ? " (INV)" : ""); return 0; } /* Look for ifname matches. */ - for (i = 0, ret = 0; i < IFNAMSIZ; i++) { - ret |= (indev[i] ^ arpinfo->iniface[i]) - & arpinfo->iniface_mask[i]; - } + ret = ifname_compare(indev, arpinfo->iniface, arpinfo->iniface_mask); if (FWINV(ret != 0, ARPT_INV_VIA_IN)) { dprintf("VIA in mismatch (%s vs %s).%s\n", @@ -169,14 +190,7 @@ static inline int arp_packet_match(const struct arphdr *arphdr, return 0; } - for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) { - unsigned long odev; - memcpy(&odev, outdev + i*sizeof(unsigned long), - sizeof(unsigned long)); - ret |= (odev - ^ ((const unsigned long *)arpinfo->outiface)[i]) - & ((const unsigned long *)arpinfo->outiface_mask)[i]; - } + ret = ifname_compare(outdev, arpinfo->outiface, arpinfo->outiface_mask); if (FWINV(ret != 0, ARPT_INV_VIA_OUT)) { dprintf("VIA out mismatch (%s vs %s).%s\n", @@ -186,6 +200,7 @@ static inline int arp_packet_match(const struct arphdr *arphdr, } return 1; +#undef FWINV } static inline int arp_checkentry(const struct arpt_arp *arp) @@ -204,142 +219,153 @@ static inline int arp_checkentry(const struct arpt_arp *arp) return 1; } -static unsigned int arpt_error(struct sk_buff **pskb, - const struct net_device *in, - const struct net_device *out, - unsigned int hooknum, - const void *targinfo, - void *userinfo) +static unsigned int +arpt_error(struct sk_buff *skb, const struct xt_action_param *par) { - if (net_ratelimit()) - printk("arp_tables: error: '%s'\n", (char *)targinfo); + net_err_ratelimited("arp_tables: error: '%s'\n", + (const char *)par->targinfo); return NF_DROP; } -static inline struct arpt_entry *get_entry(void *base, unsigned int offset) +static inline const struct xt_entry_target * +arpt_get_target_c(const struct arpt_entry *e) +{ + return arpt_get_target((struct arpt_entry *)e); +} + +static inline struct arpt_entry * +get_entry(const void *base, unsigned int offset) { return (struct arpt_entry *)(base + offset); } -unsigned int arpt_do_table(struct sk_buff **pskb, +static inline __pure +struct arpt_entry *arpt_next_entry(const struct arpt_entry *entry) +{ + return (void *)entry + entry->next_offset; +} + +unsigned int arpt_do_table(struct sk_buff *skb, unsigned int hook, const struct net_device *in, const struct net_device *out, - struct arpt_table *table, - void *userdata) + struct xt_table *table) { - static const char nulldevname[IFNAMSIZ]; + static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); unsigned int verdict = NF_DROP; - struct arphdr *arp; - int hotdrop = 0; + const struct arphdr *arp; struct arpt_entry *e, *back; const char *indev, *outdev; void *table_base; - struct xt_table_info *private = table->private; + const struct xt_table_info *private; + struct xt_action_param acpar; + unsigned int addend; - /* ARP header, plus 2 device addresses, plus 2 IP addresses. */ - if (!pskb_may_pull((*pskb), (sizeof(struct arphdr) + - (2 * (*pskb)->dev->addr_len) + - (2 * sizeof(u32))))) + if (!pskb_may_pull(skb, arp_hdr_len(skb->dev))) return NF_DROP; indev = in ? in->name : nulldevname; outdev = out ? out->name : nulldevname; - read_lock_bh(&table->lock); - table_base = (void *)private->entries[smp_processor_id()]; + local_bh_disable(); + addend = xt_write_recseq_begin(); + private = table->private; + /* + * Ensure we load private-> members after we've fetched the base + * pointer. + */ + smp_read_barrier_depends(); + table_base = private->entries[smp_processor_id()]; + e = get_entry(table_base, private->hook_entry[hook]); back = get_entry(table_base, private->underflow[hook]); - arp = (*pskb)->nh.arph; + acpar.in = in; + acpar.out = out; + acpar.hooknum = hook; + acpar.family = NFPROTO_ARP; + acpar.hotdrop = false; + + arp = arp_hdr(skb); do { - if (arp_packet_match(arp, (*pskb)->dev, indev, outdev, &e->arp)) { - struct arpt_entry_target *t; - int hdr_len; - - hdr_len = sizeof(*arp) + (2 * sizeof(struct in_addr)) + - (2 * (*pskb)->dev->addr_len); - ADD_COUNTER(e->counters, hdr_len, 1); - - t = arpt_get_target(e); - - /* Standard target? */ - if (!t->u.kernel.target->target) { - int v; - - v = ((struct arpt_standard_target *)t)->verdict; - if (v < 0) { - /* Pop from stack? */ - if (v != ARPT_RETURN) { - verdict = (unsigned)(-v) - 1; - break; - } - e = back; - back = get_entry(table_base, - back->comefrom); - continue; - } - if (table_base + v - != (void *)e + e->next_offset) { - /* Save old back ptr in next entry */ - struct arpt_entry *next - = (void *)e + e->next_offset; - next->comefrom = - (void *)back - table_base; - - /* set back pointer to next entry */ - back = next; - } + const struct xt_entry_target *t; - e = get_entry(table_base, v); - } else { - /* Targets which reenter must return - * abs. verdicts - */ - verdict = t->u.kernel.target->target(pskb, - in, out, - hook, - t->data, - userdata); - - /* Target might have changed stuff. */ - arp = (*pskb)->nh.arph; - - if (verdict == ARPT_CONTINUE) - e = (void *)e + e->next_offset; - else - /* Verdict */ + if (!arp_packet_match(arp, skb->dev, indev, outdev, &e->arp)) { + e = arpt_next_entry(e); + continue; + } + + ADD_COUNTER(e->counters, arp_hdr_len(skb->dev), 1); + + t = arpt_get_target_c(e); + + /* Standard target? */ + if (!t->u.kernel.target->target) { + int v; + + v = ((struct xt_standard_target *)t)->verdict; + if (v < 0) { + /* Pop from stack? */ + if (v != XT_RETURN) { + verdict = (unsigned int)(-v) - 1; break; + } + e = back; + back = get_entry(table_base, back->comefrom); + continue; } - } else { - e = (void *)e + e->next_offset; + if (table_base + v + != arpt_next_entry(e)) { + /* Save old back ptr in next entry */ + struct arpt_entry *next = arpt_next_entry(e); + next->comefrom = (void *)back - table_base; + + /* set back pointer to next entry */ + back = next; + } + + e = get_entry(table_base, v); + continue; } - } while (!hotdrop); - read_unlock_bh(&table->lock); - if (hotdrop) + /* Targets which reenter must return + * abs. verdicts + */ + acpar.target = t->u.kernel.target; + acpar.targinfo = t->data; + verdict = t->u.kernel.target->target(skb, &acpar); + + /* Target might have changed stuff. */ + arp = arp_hdr(skb); + + if (verdict == XT_CONTINUE) + e = arpt_next_entry(e); + else + /* Verdict */ + break; + } while (!acpar.hotdrop); + xt_write_recseq_end(addend); + local_bh_enable(); + + if (acpar.hotdrop) return NF_DROP; else return verdict; } /* All zeroes == unconditional rule. */ -static inline int unconditional(const struct arpt_arp *arp) +static inline bool unconditional(const struct arpt_arp *arp) { - unsigned int i; + static const struct arpt_arp uncond; - for (i = 0; i < sizeof(*arp)/sizeof(__u32); i++) - if (((__u32 *)arp)[i]) - return 0; - - return 1; + return memcmp(arp, &uncond, sizeof(uncond)) == 0; } /* Figures out from what hook each rule can be called: returns 0 if * there are loops. Puts hook bitmask in comefrom. */ -static int mark_source_chains(struct xt_table_info *newinfo, +static int mark_source_chains(const struct xt_table_info *newinfo, unsigned int valid_hooks, void *entry0) { unsigned int hook; @@ -359,11 +385,12 @@ static int mark_source_chains(struct xt_table_info *newinfo, e->counters.pcnt = pos; for (;;) { - struct arpt_standard_target *t - = (void *)arpt_get_target(e); + const struct xt_standard_target *t + = (void *)arpt_get_target_c(e); + int visited = e->comefrom & (1 << hook); if (e->comefrom & (1 << NF_ARP_NUMHOOKS)) { - printk("arptables: loop hook %u pos %u %08X.\n", + pr_notice("arptables: loop hook %u pos %u %08X.\n", hook, pos, e->comefrom); return 0; } @@ -371,13 +398,22 @@ static int mark_source_chains(struct xt_table_info *newinfo, |= ((1 << hook) | (1 << NF_ARP_NUMHOOKS)); /* Unconditional return/END. */ - if (e->target_offset == sizeof(struct arpt_entry) - && (strcmp(t->target.u.user.name, - ARPT_STANDARD_TARGET) == 0) - && t->verdict < 0 - && unconditional(&e->arp)) { + if ((e->target_offset == sizeof(struct arpt_entry) && + (strcmp(t->target.u.user.name, + XT_STANDARD_TARGET) == 0) && + t->verdict < 0 && unconditional(&e->arp)) || + visited) { unsigned int oldpos, size; + if ((strcmp(t->target.u.user.name, + XT_STANDARD_TARGET) == 0) && + t->verdict < -NF_MAX_VERDICT - 1) { + duprintf("mark_source_chains: bad " + "negative verdict (%i)\n", + t->verdict); + return 0; + } + /* Return: backtrack through the last * big jump. */ @@ -405,8 +441,16 @@ static int mark_source_chains(struct xt_table_info *newinfo, int newpos = t->verdict; if (strcmp(t->target.u.user.name, - ARPT_STANDARD_TARGET) == 0 - && newpos >= 0) { + XT_STANDARD_TARGET) == 0 && + newpos >= 0) { + if (newpos > newinfo->size - + sizeof(struct arpt_entry)) { + duprintf("mark_source_chains: " + "bad verdict (%i)\n", + newpos); + return 0; + } + /* This a jump; chase it. */ duprintf("Jump rule %u -> %u\n", pos, newpos); @@ -426,102 +470,111 @@ static int mark_source_chains(struct xt_table_info *newinfo, return 1; } -static inline int standard_check(const struct arpt_entry_target *t, - unsigned int max_offset) +static inline int check_entry(const struct arpt_entry *e, const char *name) { - struct arpt_standard_target *targ = (void *)t; - - /* Check standard info. */ - if (t->u.target_size - != ARPT_ALIGN(sizeof(struct arpt_standard_target))) { - duprintf("arpt_standard_check: target size %u != %Zu\n", - t->u.target_size, - ARPT_ALIGN(sizeof(struct arpt_standard_target))); - return 0; - } + const struct xt_entry_target *t; - if (targ->verdict >= 0 - && targ->verdict > max_offset - sizeof(struct arpt_entry)) { - duprintf("arpt_standard_check: bad verdict (%i)\n", - targ->verdict); - return 0; + if (!arp_checkentry(&e->arp)) { + duprintf("arp_tables: arp check failed %p %s.\n", e, name); + return -EINVAL; } - if (targ->verdict < -NF_MAX_VERDICT - 1) { - duprintf("arpt_standard_check: bad negative verdict (%i)\n", - targ->verdict); - return 0; - } - return 1; + if (e->target_offset + sizeof(struct xt_entry_target) > e->next_offset) + return -EINVAL; + + t = arpt_get_target_c(e); + if (e->target_offset + t->u.target_size > e->next_offset) + return -EINVAL; + + return 0; } -static struct arpt_target arpt_standard_target; +static inline int check_target(struct arpt_entry *e, const char *name) +{ + struct xt_entry_target *t = arpt_get_target(e); + int ret; + struct xt_tgchk_param par = { + .table = name, + .entryinfo = e, + .target = t->u.kernel.target, + .targinfo = t->data, + .hook_mask = e->comefrom, + .family = NFPROTO_ARP, + }; + + ret = xt_check_target(&par, t->u.target_size - sizeof(*t), 0, false); + if (ret < 0) { + duprintf("arp_tables: check failed for `%s'.\n", + t->u.kernel.target->name); + return ret; + } + return 0; +} -static inline int check_entry(struct arpt_entry *e, const char *name, unsigned int size, - unsigned int *i) +static inline int +find_check_entry(struct arpt_entry *e, const char *name, unsigned int size) { - struct arpt_entry_target *t; - struct arpt_target *target; + struct xt_entry_target *t; + struct xt_target *target; int ret; - if (!arp_checkentry(&e->arp)) { - duprintf("arp_tables: arp check failed %p %s.\n", e, name); - return -EINVAL; - } + ret = check_entry(e, name); + if (ret) + return ret; t = arpt_get_target(e); - target = try_then_request_module(xt_find_target(NF_ARP, t->u.user.name, - t->u.user.revision), - "arpt_%s", t->u.user.name); - if (IS_ERR(target) || !target) { - duprintf("check_entry: `%s' not found\n", t->u.user.name); - ret = target ? PTR_ERR(target) : -ENOENT; + target = xt_request_find_target(NFPROTO_ARP, t->u.user.name, + t->u.user.revision); + if (IS_ERR(target)) { + duprintf("find_check_entry: `%s' not found\n", t->u.user.name); + ret = PTR_ERR(target); goto out; } t->u.kernel.target = target; - if (t->u.kernel.target == &arpt_standard_target) { - if (!standard_check(t, size)) { - ret = -EINVAL; - goto out; - } - } else if (t->u.kernel.target->checkentry - && !t->u.kernel.target->checkentry(name, e, t->data, - t->u.target_size - - sizeof(*t), - e->comefrom)) { - module_put(t->u.kernel.target->me); - duprintf("arp_tables: check failed for `%s'.\n", - t->u.kernel.target->name); - ret = -EINVAL; - goto out; - } - - (*i)++; + ret = check_target(e, name); + if (ret) + goto err; return 0; - +err: + module_put(t->u.kernel.target->me); out: return ret; } +static bool check_underflow(const struct arpt_entry *e) +{ + const struct xt_entry_target *t; + unsigned int verdict; + + if (!unconditional(&e->arp)) + return false; + t = arpt_get_target_c(e); + if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0) + return false; + verdict = ((struct xt_standard_target *)t)->verdict; + verdict = -verdict - 1; + return verdict == NF_DROP || verdict == NF_ACCEPT; +} + static inline int check_entry_size_and_hooks(struct arpt_entry *e, struct xt_table_info *newinfo, - unsigned char *base, - unsigned char *limit, + const unsigned char *base, + const unsigned char *limit, const unsigned int *hook_entries, const unsigned int *underflows, - unsigned int *i) + unsigned int valid_hooks) { unsigned int h; - if ((unsigned long)e % __alignof__(struct arpt_entry) != 0 - || (unsigned char *)e + sizeof(struct arpt_entry) >= limit) { + if ((unsigned long)e % __alignof__(struct arpt_entry) != 0 || + (unsigned char *)e + sizeof(struct arpt_entry) >= limit) { duprintf("Bad offset %p\n", e); return -EINVAL; } if (e->next_offset - < sizeof(struct arpt_entry) + sizeof(struct arpt_entry_target)) { + < sizeof(struct arpt_entry) + sizeof(struct xt_entry_target)) { duprintf("checking: element %p size %u\n", e, e->next_offset); return -EINVAL; @@ -529,55 +582,53 @@ static inline int check_entry_size_and_hooks(struct arpt_entry *e, /* Check hooks & underflows */ for (h = 0; h < NF_ARP_NUMHOOKS; h++) { + if (!(valid_hooks & (1 << h))) + continue; if ((unsigned char *)e - base == hook_entries[h]) newinfo->hook_entry[h] = hook_entries[h]; - if ((unsigned char *)e - base == underflows[h]) + if ((unsigned char *)e - base == underflows[h]) { + if (!check_underflow(e)) { + pr_err("Underflows must be unconditional and " + "use the STANDARD target with " + "ACCEPT/DROP\n"); + return -EINVAL; + } newinfo->underflow[h] = underflows[h]; + } } - /* FIXME: underflows must be unconditional, standard verdicts - < 0 (not ARPT_RETURN). --RR */ - /* Clear counters and comefrom */ e->counters = ((struct xt_counters) { 0, 0 }); e->comefrom = 0; - - (*i)++; return 0; } -static inline int cleanup_entry(struct arpt_entry *e, unsigned int *i) +static inline void cleanup_entry(struct arpt_entry *e) { - struct arpt_entry_target *t; - - if (i && (*i)-- == 0) - return 1; + struct xt_tgdtor_param par; + struct xt_entry_target *t; t = arpt_get_target(e); - if (t->u.kernel.target->destroy) - t->u.kernel.target->destroy(t->data, - t->u.target_size - sizeof(*t)); - module_put(t->u.kernel.target->me); - return 0; + par.target = t->u.kernel.target; + par.targinfo = t->data; + par.family = NFPROTO_ARP; + if (par.target->destroy != NULL) + par.target->destroy(&par); + module_put(par.target->me); } /* Checks and translates the user-supplied table segment (held in * newinfo). */ -static int translate_table(const char *name, - unsigned int valid_hooks, - struct xt_table_info *newinfo, - void *entry0, - unsigned int size, - unsigned int number, - const unsigned int *hook_entries, - const unsigned int *underflows) +static int translate_table(struct xt_table_info *newinfo, void *entry0, + const struct arpt_replace *repl) { + struct arpt_entry *iter; unsigned int i; - int ret; + int ret = 0; - newinfo->size = size; - newinfo->number = number; + newinfo->size = repl->size; + newinfo->number = repl->num_entries; /* Init all hooks to impossible value. */ for (i = 0; i < NF_ARP_NUMHOOKS; i++) { @@ -589,57 +640,71 @@ static int translate_table(const char *name, i = 0; /* Walk through entries, checking offsets. */ - ret = ARPT_ENTRY_ITERATE(entry0, newinfo->size, - check_entry_size_and_hooks, - newinfo, - entry0, - entry0 + size, - hook_entries, underflows, &i); + xt_entry_foreach(iter, entry0, newinfo->size) { + ret = check_entry_size_and_hooks(iter, newinfo, entry0, + entry0 + repl->size, + repl->hook_entry, + repl->underflow, + repl->valid_hooks); + if (ret != 0) + break; + ++i; + if (strcmp(arpt_get_target(iter)->u.user.name, + XT_ERROR_TARGET) == 0) + ++newinfo->stacksize; + } duprintf("translate_table: ARPT_ENTRY_ITERATE gives %d\n", ret); if (ret != 0) return ret; - if (i != number) { + if (i != repl->num_entries) { duprintf("translate_table: %u not %u entries\n", - i, number); + i, repl->num_entries); return -EINVAL; } /* Check hooks all assigned */ for (i = 0; i < NF_ARP_NUMHOOKS; i++) { /* Only hooks which are valid */ - if (!(valid_hooks & (1 << i))) + if (!(repl->valid_hooks & (1 << i))) continue; if (newinfo->hook_entry[i] == 0xFFFFFFFF) { duprintf("Invalid hook entry %u %u\n", - i, hook_entries[i]); + i, repl->hook_entry[i]); return -EINVAL; } if (newinfo->underflow[i] == 0xFFFFFFFF) { duprintf("Invalid underflow %u %u\n", - i, underflows[i]); + i, repl->underflow[i]); return -EINVAL; } } - if (!mark_source_chains(newinfo, valid_hooks, entry0)) { + if (!mark_source_chains(newinfo, repl->valid_hooks, entry0)) { duprintf("Looping hook\n"); return -ELOOP; } /* Finally, each sanity check must pass */ i = 0; - ret = ARPT_ENTRY_ITERATE(entry0, newinfo->size, - check_entry, name, size, &i); + xt_entry_foreach(iter, entry0, newinfo->size) { + ret = find_check_entry(iter, repl->name, repl->size); + if (ret != 0) + break; + ++i; + } if (ret != 0) { - ARPT_ENTRY_ITERATE(entry0, newinfo->size, - cleanup_entry, &i); + xt_entry_foreach(iter, entry0, newinfo->size) { + if (i-- == 0) + break; + cleanup_entry(iter); + } return ret; } /* And one copy for every other CPU */ - for_each_cpu(i) { + for_each_possible_cpu(i) { if (newinfo->entries[i] && newinfo->entries[i] != entry0) memcpy(newinfo->entries[i], entry0, newinfo->size); } @@ -647,85 +712,68 @@ static int translate_table(const char *name, return ret; } -/* Gets counters. */ -static inline int add_entry_to_counter(const struct arpt_entry *e, - struct xt_counters total[], - unsigned int *i) -{ - ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt); - - (*i)++; - return 0; -} - -static inline int set_entry_to_counter(const struct arpt_entry *e, - struct xt_counters total[], - unsigned int *i) -{ - SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt); - - (*i)++; - return 0; -} - static void get_counters(const struct xt_table_info *t, struct xt_counters counters[]) { + struct arpt_entry *iter; unsigned int cpu; unsigned int i; - unsigned int curcpu; - /* Instead of clearing (by a previous call to memset()) - * the counters and using adds, we set the counters - * with data used by 'current' CPU - * We dont care about preemption here. - */ - curcpu = raw_smp_processor_id(); + for_each_possible_cpu(cpu) { + seqcount_t *s = &per_cpu(xt_recseq, cpu); - i = 0; - ARPT_ENTRY_ITERATE(t->entries[curcpu], - t->size, - set_entry_to_counter, - counters, - &i); - - for_each_cpu(cpu) { - if (cpu == curcpu) - continue; i = 0; - ARPT_ENTRY_ITERATE(t->entries[cpu], - t->size, - add_entry_to_counter, - counters, - &i); + xt_entry_foreach(iter, t->entries[cpu], t->size) { + u64 bcnt, pcnt; + unsigned int start; + + do { + start = read_seqcount_begin(s); + bcnt = iter->counters.bcnt; + pcnt = iter->counters.pcnt; + } while (read_seqcount_retry(s, start)); + + ADD_COUNTER(counters[i], bcnt, pcnt); + ++i; + } } } -static int copy_entries_to_user(unsigned int total_size, - struct arpt_table *table, - void __user *userptr) +static struct xt_counters *alloc_counters(const struct xt_table *table) { - unsigned int off, num, countersize; - struct arpt_entry *e; + unsigned int countersize; struct xt_counters *counters; - struct xt_table_info *private = table->private; - int ret = 0; - void *loc_cpu_entry; + const struct xt_table_info *private = table->private; /* We need atomic snapshot of counters: rest doesn't change * (other than comefrom, which userspace doesn't care * about). */ countersize = sizeof(struct xt_counters) * private->number; - counters = vmalloc_node(countersize, numa_node_id()); + counters = vzalloc(countersize); if (counters == NULL) - return -ENOMEM; + return ERR_PTR(-ENOMEM); - /* First, sum counters... */ - write_lock_bh(&table->lock); get_counters(private, counters); - write_unlock_bh(&table->lock); + + return counters; +} + +static int copy_entries_to_user(unsigned int total_size, + const struct xt_table *table, + void __user *userptr) +{ + unsigned int off, num; + const struct arpt_entry *e; + struct xt_counters *counters; + struct xt_table_info *private = table->private; + int ret = 0; + void *loc_cpu_entry; + + counters = alloc_counters(table); + if (IS_ERR(counters)) + return PTR_ERR(counters); loc_cpu_entry = private->entries[raw_smp_processor_id()]; /* ... then copy entire thing ... */ @@ -737,7 +785,7 @@ static int copy_entries_to_user(unsigned int total_size, /* FIXME: use iterator macros --RR */ /* ... then go back and fix counters and names */ for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){ - struct arpt_entry_target *t; + const struct xt_entry_target *t; e = (struct arpt_entry *)(loc_cpu_entry + off); if (copy_to_user(userptr + off @@ -748,9 +796,9 @@ static int copy_entries_to_user(unsigned int total_size, goto free_counters; } - t = arpt_get_target(e); + t = arpt_get_target_c(e); if (copy_to_user(userptr + off + e->target_offset - + offsetof(struct arpt_entry_target, + + offsetof(struct xt_entry_target, u.user.name), t->u.kernel.target->name, strlen(t->u.kernel.target->name)+1) != 0) { @@ -764,24 +812,170 @@ static int copy_entries_to_user(unsigned int total_size, return ret; } -static int get_entries(const struct arpt_get_entries *entries, - struct arpt_get_entries __user *uptr) +#ifdef CONFIG_COMPAT +static void compat_standard_from_user(void *dst, const void *src) +{ + int v = *(compat_int_t *)src; + + if (v > 0) + v += xt_compat_calc_jump(NFPROTO_ARP, v); + memcpy(dst, &v, sizeof(v)); +} + +static int compat_standard_to_user(void __user *dst, const void *src) +{ + compat_int_t cv = *(int *)src; + + if (cv > 0) + cv -= xt_compat_calc_jump(NFPROTO_ARP, cv); + return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0; +} + +static int compat_calc_entry(const struct arpt_entry *e, + const struct xt_table_info *info, + const void *base, struct xt_table_info *newinfo) { + const struct xt_entry_target *t; + unsigned int entry_offset; + int off, i, ret; + + off = sizeof(struct arpt_entry) - sizeof(struct compat_arpt_entry); + entry_offset = (void *)e - base; + + t = arpt_get_target_c(e); + off += xt_compat_target_offset(t->u.kernel.target); + newinfo->size -= off; + ret = xt_compat_add_offset(NFPROTO_ARP, entry_offset, off); + if (ret) + return ret; + + for (i = 0; i < NF_ARP_NUMHOOKS; i++) { + if (info->hook_entry[i] && + (e < (struct arpt_entry *)(base + info->hook_entry[i]))) + newinfo->hook_entry[i] -= off; + if (info->underflow[i] && + (e < (struct arpt_entry *)(base + info->underflow[i]))) + newinfo->underflow[i] -= off; + } + return 0; +} + +static int compat_table_info(const struct xt_table_info *info, + struct xt_table_info *newinfo) +{ + struct arpt_entry *iter; + void *loc_cpu_entry; int ret; - struct arpt_table *t; - t = xt_find_table_lock(NF_ARP, entries->name); - if (t || !IS_ERR(t)) { - struct xt_table_info *private = t->private; + if (!newinfo || !info) + return -EINVAL; + + /* we dont care about newinfo->entries[] */ + memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); + newinfo->initial_entries = 0; + loc_cpu_entry = info->entries[raw_smp_processor_id()]; + xt_compat_init_offsets(NFPROTO_ARP, info->number); + xt_entry_foreach(iter, loc_cpu_entry, info->size) { + ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); + if (ret != 0) + return ret; + } + return 0; +} +#endif + +static int get_info(struct net *net, void __user *user, + const int *len, int compat) +{ + char name[XT_TABLE_MAXNAMELEN]; + struct xt_table *t; + int ret; + + if (*len != sizeof(struct arpt_getinfo)) { + duprintf("length %u != %Zu\n", *len, + sizeof(struct arpt_getinfo)); + return -EINVAL; + } + + if (copy_from_user(name, user, sizeof(name)) != 0) + return -EFAULT; + + name[XT_TABLE_MAXNAMELEN-1] = '\0'; +#ifdef CONFIG_COMPAT + if (compat) + xt_compat_lock(NFPROTO_ARP); +#endif + t = try_then_request_module(xt_find_table_lock(net, NFPROTO_ARP, name), + "arptable_%s", name); + if (!IS_ERR_OR_NULL(t)) { + struct arpt_getinfo info; + const struct xt_table_info *private = t->private; +#ifdef CONFIG_COMPAT + struct xt_table_info tmp; + + if (compat) { + ret = compat_table_info(private, &tmp); + xt_compat_flush_offsets(NFPROTO_ARP); + private = &tmp; + } +#endif + memset(&info, 0, sizeof(info)); + info.valid_hooks = t->valid_hooks; + memcpy(info.hook_entry, private->hook_entry, + sizeof(info.hook_entry)); + memcpy(info.underflow, private->underflow, + sizeof(info.underflow)); + info.num_entries = private->number; + info.size = private->size; + strcpy(info.name, name); + + if (copy_to_user(user, &info, *len) != 0) + ret = -EFAULT; + else + ret = 0; + xt_table_unlock(t); + module_put(t->me); + } else + ret = t ? PTR_ERR(t) : -ENOENT; +#ifdef CONFIG_COMPAT + if (compat) + xt_compat_unlock(NFPROTO_ARP); +#endif + return ret; +} + +static int get_entries(struct net *net, struct arpt_get_entries __user *uptr, + const int *len) +{ + int ret; + struct arpt_get_entries get; + struct xt_table *t; + + if (*len < sizeof(get)) { + duprintf("get_entries: %u < %Zu\n", *len, sizeof(get)); + return -EINVAL; + } + if (copy_from_user(&get, uptr, sizeof(get)) != 0) + return -EFAULT; + if (*len != sizeof(struct arpt_get_entries) + get.size) { + duprintf("get_entries: %u != %Zu\n", *len, + sizeof(struct arpt_get_entries) + get.size); + return -EINVAL; + } + + t = xt_find_table_lock(net, NFPROTO_ARP, get.name); + if (!IS_ERR_OR_NULL(t)) { + const struct xt_table_info *private = t->private; + duprintf("t->private->number = %u\n", private->number); - if (entries->size == private->size) + if (get.size == private->size) ret = copy_entries_to_user(private->size, t, uptr->entrytable); else { duprintf("get_entries: I've got %u not %u!\n", - private->size, entries->size); - ret = -EINVAL; + private->size, get.size); + ret = -EAGAIN; } module_put(t->me); xt_table_unlock(t); @@ -791,87 +985,69 @@ static int get_entries(const struct arpt_get_entries *entries, return ret; } -static int do_replace(void __user *user, unsigned int len) +static int __do_replace(struct net *net, const char *name, + unsigned int valid_hooks, + struct xt_table_info *newinfo, + unsigned int num_counters, + void __user *counters_ptr) { int ret; - struct arpt_replace tmp; - struct arpt_table *t; - struct xt_table_info *newinfo, *oldinfo; + struct xt_table *t; + struct xt_table_info *oldinfo; struct xt_counters *counters; - void *loc_cpu_entry, *loc_cpu_old_entry; - - if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) - return -EFAULT; - - /* Hack: Causes ipchains to give correct error msg --RR */ - if (len != sizeof(tmp) + tmp.size) - return -ENOPROTOOPT; + void *loc_cpu_old_entry; + struct arpt_entry *iter; - newinfo = xt_alloc_table_info(tmp.size); - if (!newinfo) - return -ENOMEM; - - /* choose the copy that is on our node/cpu */ - loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; - if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), - tmp.size) != 0) { - ret = -EFAULT; - goto free_newinfo; - } - - counters = vmalloc(tmp.num_counters * sizeof(struct xt_counters)); + ret = 0; + counters = vzalloc(num_counters * sizeof(struct xt_counters)); if (!counters) { ret = -ENOMEM; - goto free_newinfo; + goto out; } - ret = translate_table(tmp.name, tmp.valid_hooks, - newinfo, loc_cpu_entry, tmp.size, tmp.num_entries, - tmp.hook_entry, tmp.underflow); - if (ret != 0) - goto free_newinfo_counters; - - duprintf("arp_tables: Translated table\n"); - - t = try_then_request_module(xt_find_table_lock(NF_ARP, tmp.name), - "arptable_%s", tmp.name); - if (!t || IS_ERR(t)) { + t = try_then_request_module(xt_find_table_lock(net, NFPROTO_ARP, name), + "arptable_%s", name); + if (IS_ERR_OR_NULL(t)) { ret = t ? PTR_ERR(t) : -ENOENT; goto free_newinfo_counters_untrans; } /* You lied! */ - if (tmp.valid_hooks != t->valid_hooks) { + if (valid_hooks != t->valid_hooks) { duprintf("Valid hook crap: %08X vs %08X\n", - tmp.valid_hooks, t->valid_hooks); + valid_hooks, t->valid_hooks); ret = -EINVAL; goto put_module; } - oldinfo = xt_replace_table(t, tmp.num_counters, newinfo, &ret); + oldinfo = xt_replace_table(t, num_counters, newinfo, &ret); if (!oldinfo) goto put_module; /* Update module usage count based on number of rules */ duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n", oldinfo->number, oldinfo->initial_entries, newinfo->number); - if ((oldinfo->number > oldinfo->initial_entries) || - (newinfo->number <= oldinfo->initial_entries)) + if ((oldinfo->number > oldinfo->initial_entries) || + (newinfo->number <= oldinfo->initial_entries)) module_put(t->me); if ((oldinfo->number > oldinfo->initial_entries) && (newinfo->number <= oldinfo->initial_entries)) module_put(t->me); - /* Get the old counters. */ + /* Get the old counters, and synchronize with replace */ get_counters(oldinfo, counters); + /* Decrease module usage counts and free resource */ loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; - ARPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,NULL); + xt_entry_foreach(iter, loc_cpu_old_entry, oldinfo->size) + cleanup_entry(iter); xt_free_table_info(oldinfo); - if (copy_to_user(tmp.counters, counters, - sizeof(struct xt_counters) * tmp.num_counters) != 0) - ret = -EFAULT; + if (copy_to_user(counters_ptr, counters, + sizeof(struct xt_counters) * num_counters) != 0) { + /* Silent error, can't fail, new table is already in place */ + net_warn_ratelimited("arptables: counters copy to user failed while replacing table\n"); + } vfree(counters); xt_table_unlock(t); return ret; @@ -880,75 +1056,140 @@ static int do_replace(void __user *user, unsigned int len) module_put(t->me); xt_table_unlock(t); free_newinfo_counters_untrans: - ARPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL); - free_newinfo_counters: vfree(counters); - free_newinfo: - xt_free_table_info(newinfo); + out: return ret; } -/* We're lazy, and add to the first CPU; overflow works its fey magic - * and everything is OK. - */ -static inline int add_counter_to_entry(struct arpt_entry *e, - const struct xt_counters addme[], - unsigned int *i) +static int do_replace(struct net *net, const void __user *user, + unsigned int len) { + int ret; + struct arpt_replace tmp; + struct xt_table_info *newinfo; + void *loc_cpu_entry; + struct arpt_entry *iter; - ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); + if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) + return -EFAULT; - (*i)++; + /* overflow check */ + if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) + return -ENOMEM; + tmp.name[sizeof(tmp.name)-1] = 0; + + newinfo = xt_alloc_table_info(tmp.size); + if (!newinfo) + return -ENOMEM; + + /* choose the copy that is on our node/cpu */ + loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; + if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), + tmp.size) != 0) { + ret = -EFAULT; + goto free_newinfo; + } + + ret = translate_table(newinfo, loc_cpu_entry, &tmp); + if (ret != 0) + goto free_newinfo; + + duprintf("arp_tables: Translated table\n"); + + ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo, + tmp.num_counters, tmp.counters); + if (ret) + goto free_newinfo_untrans; return 0; + + free_newinfo_untrans: + xt_entry_foreach(iter, loc_cpu_entry, newinfo->size) + cleanup_entry(iter); + free_newinfo: + xt_free_table_info(newinfo); + return ret; } -static int do_add_counters(void __user *user, unsigned int len) +static int do_add_counters(struct net *net, const void __user *user, + unsigned int len, int compat) { - unsigned int i; - struct xt_counters_info tmp, *paddc; - struct arpt_table *t; - struct xt_table_info *private; + unsigned int i, curcpu; + struct xt_counters_info tmp; + struct xt_counters *paddc; + unsigned int num_counters; + const char *name; + int size; + void *ptmp; + struct xt_table *t; + const struct xt_table_info *private; int ret = 0; void *loc_cpu_entry; + struct arpt_entry *iter; + unsigned int addend; +#ifdef CONFIG_COMPAT + struct compat_xt_counters_info compat_tmp; + + if (compat) { + ptmp = &compat_tmp; + size = sizeof(struct compat_xt_counters_info); + } else +#endif + { + ptmp = &tmp; + size = sizeof(struct xt_counters_info); + } - if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) + if (copy_from_user(ptmp, user, size) != 0) return -EFAULT; - if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct xt_counters)) +#ifdef CONFIG_COMPAT + if (compat) { + num_counters = compat_tmp.num_counters; + name = compat_tmp.name; + } else +#endif + { + num_counters = tmp.num_counters; + name = tmp.name; + } + + if (len != size + num_counters * sizeof(struct xt_counters)) return -EINVAL; - paddc = vmalloc(len); + paddc = vmalloc(len - size); if (!paddc) return -ENOMEM; - if (copy_from_user(paddc, user, len) != 0) { + if (copy_from_user(paddc, user + size, len - size) != 0) { ret = -EFAULT; goto free; } - t = xt_find_table_lock(NF_ARP, tmp.name); - if (!t || IS_ERR(t)) { + t = xt_find_table_lock(net, NFPROTO_ARP, name); + if (IS_ERR_OR_NULL(t)) { ret = t ? PTR_ERR(t) : -ENOENT; goto free; } - write_lock_bh(&t->lock); + local_bh_disable(); private = t->private; - if (private->number != paddc->num_counters) { + if (private->number != num_counters) { ret = -EINVAL; goto unlock_up_free; } i = 0; /* Choose the copy that is on our node */ - loc_cpu_entry = private->entries[smp_processor_id()]; - ARPT_ENTRY_ITERATE(loc_cpu_entry, - private->size, - add_counter_to_entry, - paddc->counters, - &i); + curcpu = smp_processor_id(); + loc_cpu_entry = private->entries[curcpu]; + addend = xt_write_recseq_begin(); + xt_entry_foreach(iter, loc_cpu_entry, private->size) { + ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt); + ++i; + } + xt_write_recseq_end(addend); unlock_up_free: - write_unlock_bh(&t->lock); + local_bh_enable(); xt_table_unlock(t); module_put(t->me); free: @@ -957,20 +1198,359 @@ static int do_add_counters(void __user *user, unsigned int len) return ret; } -static int do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) +#ifdef CONFIG_COMPAT +static inline void compat_release_entry(struct compat_arpt_entry *e) +{ + struct xt_entry_target *t; + + t = compat_arpt_get_target(e); + module_put(t->u.kernel.target->me); +} + +static inline int +check_compat_entry_size_and_hooks(struct compat_arpt_entry *e, + struct xt_table_info *newinfo, + unsigned int *size, + const unsigned char *base, + const unsigned char *limit, + const unsigned int *hook_entries, + const unsigned int *underflows, + const char *name) +{ + struct xt_entry_target *t; + struct xt_target *target; + unsigned int entry_offset; + int ret, off, h; + + duprintf("check_compat_entry_size_and_hooks %p\n", e); + if ((unsigned long)e % __alignof__(struct compat_arpt_entry) != 0 || + (unsigned char *)e + sizeof(struct compat_arpt_entry) >= limit) { + duprintf("Bad offset %p, limit = %p\n", e, limit); + return -EINVAL; + } + + if (e->next_offset < sizeof(struct compat_arpt_entry) + + sizeof(struct compat_xt_entry_target)) { + duprintf("checking: element %p size %u\n", + e, e->next_offset); + return -EINVAL; + } + + /* For purposes of check_entry casting the compat entry is fine */ + ret = check_entry((struct arpt_entry *)e, name); + if (ret) + return ret; + + off = sizeof(struct arpt_entry) - sizeof(struct compat_arpt_entry); + entry_offset = (void *)e - (void *)base; + + t = compat_arpt_get_target(e); + target = xt_request_find_target(NFPROTO_ARP, t->u.user.name, + t->u.user.revision); + if (IS_ERR(target)) { + duprintf("check_compat_entry_size_and_hooks: `%s' not found\n", + t->u.user.name); + ret = PTR_ERR(target); + goto out; + } + t->u.kernel.target = target; + + off += xt_compat_target_offset(target); + *size += off; + ret = xt_compat_add_offset(NFPROTO_ARP, entry_offset, off); + if (ret) + goto release_target; + + /* Check hooks & underflows */ + for (h = 0; h < NF_ARP_NUMHOOKS; h++) { + if ((unsigned char *)e - base == hook_entries[h]) + newinfo->hook_entry[h] = hook_entries[h]; + if ((unsigned char *)e - base == underflows[h]) + newinfo->underflow[h] = underflows[h]; + } + + /* Clear counters and comefrom */ + memset(&e->counters, 0, sizeof(e->counters)); + e->comefrom = 0; + return 0; + +release_target: + module_put(t->u.kernel.target->me); +out: + return ret; +} + +static int +compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr, + unsigned int *size, const char *name, + struct xt_table_info *newinfo, unsigned char *base) +{ + struct xt_entry_target *t; + struct xt_target *target; + struct arpt_entry *de; + unsigned int origsize; + int ret, h; + + ret = 0; + origsize = *size; + de = (struct arpt_entry *)*dstptr; + memcpy(de, e, sizeof(struct arpt_entry)); + memcpy(&de->counters, &e->counters, sizeof(e->counters)); + + *dstptr += sizeof(struct arpt_entry); + *size += sizeof(struct arpt_entry) - sizeof(struct compat_arpt_entry); + + de->target_offset = e->target_offset - (origsize - *size); + t = compat_arpt_get_target(e); + target = t->u.kernel.target; + xt_compat_target_from_user(t, dstptr, size); + + de->next_offset = e->next_offset - (origsize - *size); + for (h = 0; h < NF_ARP_NUMHOOKS; h++) { + if ((unsigned char *)de - base < newinfo->hook_entry[h]) + newinfo->hook_entry[h] -= origsize - *size; + if ((unsigned char *)de - base < newinfo->underflow[h]) + newinfo->underflow[h] -= origsize - *size; + } + return ret; +} + +static int translate_compat_table(const char *name, + unsigned int valid_hooks, + struct xt_table_info **pinfo, + void **pentry0, + unsigned int total_size, + unsigned int number, + unsigned int *hook_entries, + unsigned int *underflows) +{ + unsigned int i, j; + struct xt_table_info *newinfo, *info; + void *pos, *entry0, *entry1; + struct compat_arpt_entry *iter0; + struct arpt_entry *iter1; + unsigned int size; + int ret = 0; + + info = *pinfo; + entry0 = *pentry0; + size = total_size; + info->number = number; + + /* Init all hooks to impossible value. */ + for (i = 0; i < NF_ARP_NUMHOOKS; i++) { + info->hook_entry[i] = 0xFFFFFFFF; + info->underflow[i] = 0xFFFFFFFF; + } + + duprintf("translate_compat_table: size %u\n", info->size); + j = 0; + xt_compat_lock(NFPROTO_ARP); + xt_compat_init_offsets(NFPROTO_ARP, number); + /* Walk through entries, checking offsets. */ + xt_entry_foreach(iter0, entry0, total_size) { + ret = check_compat_entry_size_and_hooks(iter0, info, &size, + entry0, + entry0 + total_size, + hook_entries, + underflows, + name); + if (ret != 0) + goto out_unlock; + ++j; + } + + ret = -EINVAL; + if (j != number) { + duprintf("translate_compat_table: %u not %u entries\n", + j, number); + goto out_unlock; + } + + /* Check hooks all assigned */ + for (i = 0; i < NF_ARP_NUMHOOKS; i++) { + /* Only hooks which are valid */ + if (!(valid_hooks & (1 << i))) + continue; + if (info->hook_entry[i] == 0xFFFFFFFF) { + duprintf("Invalid hook entry %u %u\n", + i, hook_entries[i]); + goto out_unlock; + } + if (info->underflow[i] == 0xFFFFFFFF) { + duprintf("Invalid underflow %u %u\n", + i, underflows[i]); + goto out_unlock; + } + } + + ret = -ENOMEM; + newinfo = xt_alloc_table_info(size); + if (!newinfo) + goto out_unlock; + + newinfo->number = number; + for (i = 0; i < NF_ARP_NUMHOOKS; i++) { + newinfo->hook_entry[i] = info->hook_entry[i]; + newinfo->underflow[i] = info->underflow[i]; + } + entry1 = newinfo->entries[raw_smp_processor_id()]; + pos = entry1; + size = total_size; + xt_entry_foreach(iter0, entry0, total_size) { + ret = compat_copy_entry_from_user(iter0, &pos, &size, + name, newinfo, entry1); + if (ret != 0) + break; + } + xt_compat_flush_offsets(NFPROTO_ARP); + xt_compat_unlock(NFPROTO_ARP); + if (ret) + goto free_newinfo; + + ret = -ELOOP; + if (!mark_source_chains(newinfo, valid_hooks, entry1)) + goto free_newinfo; + + i = 0; + xt_entry_foreach(iter1, entry1, newinfo->size) { + ret = check_target(iter1, name); + if (ret != 0) + break; + ++i; + if (strcmp(arpt_get_target(iter1)->u.user.name, + XT_ERROR_TARGET) == 0) + ++newinfo->stacksize; + } + if (ret) { + /* + * The first i matches need cleanup_entry (calls ->destroy) + * because they had called ->check already. The other j-i + * entries need only release. + */ + int skip = i; + j -= i; + xt_entry_foreach(iter0, entry0, newinfo->size) { + if (skip-- > 0) + continue; + if (j-- == 0) + break; + compat_release_entry(iter0); + } + xt_entry_foreach(iter1, entry1, newinfo->size) { + if (i-- == 0) + break; + cleanup_entry(iter1); + } + xt_free_table_info(newinfo); + return ret; + } + + /* And one copy for every other CPU */ + for_each_possible_cpu(i) + if (newinfo->entries[i] && newinfo->entries[i] != entry1) + memcpy(newinfo->entries[i], entry1, newinfo->size); + + *pinfo = newinfo; + *pentry0 = entry1; + xt_free_table_info(info); + return 0; + +free_newinfo: + xt_free_table_info(newinfo); +out: + xt_entry_foreach(iter0, entry0, total_size) { + if (j-- == 0) + break; + compat_release_entry(iter0); + } + return ret; +out_unlock: + xt_compat_flush_offsets(NFPROTO_ARP); + xt_compat_unlock(NFPROTO_ARP); + goto out; +} + +struct compat_arpt_replace { + char name[XT_TABLE_MAXNAMELEN]; + u32 valid_hooks; + u32 num_entries; + u32 size; + u32 hook_entry[NF_ARP_NUMHOOKS]; + u32 underflow[NF_ARP_NUMHOOKS]; + u32 num_counters; + compat_uptr_t counters; + struct compat_arpt_entry entries[0]; +}; + +static int compat_do_replace(struct net *net, void __user *user, + unsigned int len) { int ret; + struct compat_arpt_replace tmp; + struct xt_table_info *newinfo; + void *loc_cpu_entry; + struct arpt_entry *iter; + + if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) + return -EFAULT; - if (!capable(CAP_NET_ADMIN)) + /* overflow check */ + if (tmp.size >= INT_MAX / num_possible_cpus()) + return -ENOMEM; + if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) + return -ENOMEM; + tmp.name[sizeof(tmp.name)-1] = 0; + + newinfo = xt_alloc_table_info(tmp.size); + if (!newinfo) + return -ENOMEM; + + /* choose the copy that is on our node/cpu */ + loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; + if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), tmp.size) != 0) { + ret = -EFAULT; + goto free_newinfo; + } + + ret = translate_compat_table(tmp.name, tmp.valid_hooks, + &newinfo, &loc_cpu_entry, tmp.size, + tmp.num_entries, tmp.hook_entry, + tmp.underflow); + if (ret != 0) + goto free_newinfo; + + duprintf("compat_do_replace: Translated table\n"); + + ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo, + tmp.num_counters, compat_ptr(tmp.counters)); + if (ret) + goto free_newinfo_untrans; + return 0; + + free_newinfo_untrans: + xt_entry_foreach(iter, loc_cpu_entry, newinfo->size) + cleanup_entry(iter); + free_newinfo: + xt_free_table_info(newinfo); + return ret; +} + +static int compat_do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user, + unsigned int len) +{ + int ret; + + if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) return -EPERM; switch (cmd) { case ARPT_SO_SET_REPLACE: - ret = do_replace(user, len); + ret = compat_do_replace(sock_net(sk), user, len); break; case ARPT_SO_SET_ADD_COUNTERS: - ret = do_add_counters(user, len); + ret = do_add_counters(sock_net(sk), user, len, 1); break; default: @@ -981,73 +1561,186 @@ static int do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned return ret; } -static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) +static int compat_copy_entry_to_user(struct arpt_entry *e, void __user **dstptr, + compat_uint_t *size, + struct xt_counters *counters, + unsigned int i) { + struct xt_entry_target *t; + struct compat_arpt_entry __user *ce; + u_int16_t target_offset, next_offset; + compat_uint_t origsize; int ret; - if (!capable(CAP_NET_ADMIN)) - return -EPERM; + origsize = *size; + ce = (struct compat_arpt_entry __user *)*dstptr; + if (copy_to_user(ce, e, sizeof(struct arpt_entry)) != 0 || + copy_to_user(&ce->counters, &counters[i], + sizeof(counters[i])) != 0) + return -EFAULT; - switch (cmd) { - case ARPT_SO_GET_INFO: { - char name[ARPT_TABLE_MAXNAMELEN]; - struct arpt_table *t; + *dstptr += sizeof(struct compat_arpt_entry); + *size -= sizeof(struct arpt_entry) - sizeof(struct compat_arpt_entry); - if (*len != sizeof(struct arpt_getinfo)) { - duprintf("length %u != %Zu\n", *len, - sizeof(struct arpt_getinfo)); - ret = -EINVAL; - break; - } + target_offset = e->target_offset - (origsize - *size); - if (copy_from_user(name, user, sizeof(name)) != 0) { - ret = -EFAULT; + t = arpt_get_target(e); + ret = xt_compat_target_to_user(t, dstptr, size); + if (ret) + return ret; + next_offset = e->next_offset - (origsize - *size); + if (put_user(target_offset, &ce->target_offset) != 0 || + put_user(next_offset, &ce->next_offset) != 0) + return -EFAULT; + return 0; +} + +static int compat_copy_entries_to_user(unsigned int total_size, + struct xt_table *table, + void __user *userptr) +{ + struct xt_counters *counters; + const struct xt_table_info *private = table->private; + void __user *pos; + unsigned int size; + int ret = 0; + void *loc_cpu_entry; + unsigned int i = 0; + struct arpt_entry *iter; + + counters = alloc_counters(table); + if (IS_ERR(counters)) + return PTR_ERR(counters); + + /* choose the copy on our node/cpu */ + loc_cpu_entry = private->entries[raw_smp_processor_id()]; + pos = userptr; + size = total_size; + xt_entry_foreach(iter, loc_cpu_entry, total_size) { + ret = compat_copy_entry_to_user(iter, &pos, + &size, counters, i++); + if (ret != 0) break; + } + vfree(counters); + return ret; +} + +struct compat_arpt_get_entries { + char name[XT_TABLE_MAXNAMELEN]; + compat_uint_t size; + struct compat_arpt_entry entrytable[0]; +}; + +static int compat_get_entries(struct net *net, + struct compat_arpt_get_entries __user *uptr, + int *len) +{ + int ret; + struct compat_arpt_get_entries get; + struct xt_table *t; + + if (*len < sizeof(get)) { + duprintf("compat_get_entries: %u < %zu\n", *len, sizeof(get)); + return -EINVAL; + } + if (copy_from_user(&get, uptr, sizeof(get)) != 0) + return -EFAULT; + if (*len != sizeof(struct compat_arpt_get_entries) + get.size) { + duprintf("compat_get_entries: %u != %zu\n", + *len, sizeof(get) + get.size); + return -EINVAL; + } + + xt_compat_lock(NFPROTO_ARP); + t = xt_find_table_lock(net, NFPROTO_ARP, get.name); + if (!IS_ERR_OR_NULL(t)) { + const struct xt_table_info *private = t->private; + struct xt_table_info info; + + duprintf("t->private->number = %u\n", private->number); + ret = compat_table_info(private, &info); + if (!ret && get.size == info.size) { + ret = compat_copy_entries_to_user(private->size, + t, uptr->entrytable); + } else if (!ret) { + duprintf("compat_get_entries: I've got %u not %u!\n", + private->size, get.size); + ret = -EAGAIN; } - name[ARPT_TABLE_MAXNAMELEN-1] = '\0'; - - t = try_then_request_module(xt_find_table_lock(NF_ARP, name), - "arptable_%s", name); - if (t && !IS_ERR(t)) { - struct arpt_getinfo info; - struct xt_table_info *private = t->private; - - info.valid_hooks = t->valid_hooks; - memcpy(info.hook_entry, private->hook_entry, - sizeof(info.hook_entry)); - memcpy(info.underflow, private->underflow, - sizeof(info.underflow)); - info.num_entries = private->number; - info.size = private->size; - strcpy(info.name, name); - - if (copy_to_user(user, &info, *len) != 0) - ret = -EFAULT; - else - ret = 0; - xt_table_unlock(t); - module_put(t->me); - } else - ret = t ? PTR_ERR(t) : -ENOENT; - } - break; - - case ARPT_SO_GET_ENTRIES: { - struct arpt_get_entries get; - - if (*len < sizeof(get)) { - duprintf("get_entries: %u < %Zu\n", *len, sizeof(get)); - ret = -EINVAL; - } else if (copy_from_user(&get, user, sizeof(get)) != 0) { - ret = -EFAULT; - } else if (*len != sizeof(struct arpt_get_entries) + get.size) { - duprintf("get_entries: %u != %Zu\n", *len, - sizeof(struct arpt_get_entries) + get.size); - ret = -EINVAL; - } else - ret = get_entries(&get, user); + xt_compat_flush_offsets(NFPROTO_ARP); + module_put(t->me); + xt_table_unlock(t); + } else + ret = t ? PTR_ERR(t) : -ENOENT; + + xt_compat_unlock(NFPROTO_ARP); + return ret; +} + +static int do_arpt_get_ctl(struct sock *, int, void __user *, int *); + +static int compat_do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, + int *len) +{ + int ret; + + if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + switch (cmd) { + case ARPT_SO_GET_INFO: + ret = get_info(sock_net(sk), user, len, 1); break; + case ARPT_SO_GET_ENTRIES: + ret = compat_get_entries(sock_net(sk), user, len); + break; + default: + ret = do_arpt_get_ctl(sk, cmd, user, len); } + return ret; +} +#endif + +static int do_arpt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) +{ + int ret; + + if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + switch (cmd) { + case ARPT_SO_SET_REPLACE: + ret = do_replace(sock_net(sk), user, len); + break; + + case ARPT_SO_SET_ADD_COUNTERS: + ret = do_add_counters(sock_net(sk), user, len, 0); + break; + + default: + duprintf("do_arpt_set_ctl: unknown request %i\n", cmd); + ret = -EINVAL; + } + + return ret; +} + +static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) +{ + int ret; + + if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + switch (cmd) { + case ARPT_SO_GET_INFO: + ret = get_info(sock_net(sk), user, len, 0); + break; + + case ARPT_SO_GET_ENTRIES: + ret = get_entries(sock_net(sk), user, len); + break; case ARPT_SO_GET_REVISION_TARGET: { struct xt_get_revision rev; @@ -1060,8 +1753,9 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len ret = -EFAULT; break; } + rev.name[sizeof(rev.name)-1] = 0; - try_then_request_module(xt_find_revision(NF_ARP, rev.name, + try_then_request_module(xt_find_revision(NFPROTO_ARP, rev.name, rev.revision, 1, &ret), "arpt_%s", rev.name); break; @@ -1075,67 +1769,80 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len return ret; } -int arpt_register_table(struct arpt_table *table, - const struct arpt_replace *repl) +struct xt_table *arpt_register_table(struct net *net, + const struct xt_table *table, + const struct arpt_replace *repl) { int ret; struct xt_table_info *newinfo; - static struct xt_table_info bootstrap - = { 0, 0, 0, { 0 }, { 0 }, { } }; + struct xt_table_info bootstrap = {0}; void *loc_cpu_entry; + struct xt_table *new_table; newinfo = xt_alloc_table_info(repl->size); if (!newinfo) { ret = -ENOMEM; - return ret; + goto out; } /* choose the copy on our node/cpu */ loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; memcpy(loc_cpu_entry, repl->entries, repl->size); - ret = translate_table(table->name, table->valid_hooks, - newinfo, loc_cpu_entry, repl->size, - repl->num_entries, - repl->hook_entry, - repl->underflow); - + ret = translate_table(newinfo, loc_cpu_entry, repl); duprintf("arpt_register_table: translate table gives %d\n", ret); - if (ret != 0) { - xt_free_table_info(newinfo); - return ret; - } + if (ret != 0) + goto out_free; - if (xt_register_table(table, &bootstrap, newinfo) != 0) { - xt_free_table_info(newinfo); - return ret; + new_table = xt_register_table(net, table, &bootstrap, newinfo); + if (IS_ERR(new_table)) { + ret = PTR_ERR(new_table); + goto out_free; } + return new_table; - return 0; +out_free: + xt_free_table_info(newinfo); +out: + return ERR_PTR(ret); } -void arpt_unregister_table(struct arpt_table *table) +void arpt_unregister_table(struct xt_table *table) { struct xt_table_info *private; void *loc_cpu_entry; + struct module *table_owner = table->me; + struct arpt_entry *iter; private = xt_unregister_table(table); /* Decrease module usage counts and free resources */ loc_cpu_entry = private->entries[raw_smp_processor_id()]; - ARPT_ENTRY_ITERATE(loc_cpu_entry, private->size, - cleanup_entry, NULL); + xt_entry_foreach(iter, loc_cpu_entry, private->size) + cleanup_entry(iter); + if (private->number > private->initial_entries) + module_put(table_owner); xt_free_table_info(private); } /* The built-in targets: standard (NULL) and error. */ -static struct arpt_target arpt_standard_target = { - .name = ARPT_STANDARD_TARGET, -}; - -static struct arpt_target arpt_error_target = { - .name = ARPT_ERROR_TARGET, - .target = arpt_error, +static struct xt_target arpt_builtin_tg[] __read_mostly = { + { + .name = XT_STANDARD_TARGET, + .targetsize = sizeof(int), + .family = NFPROTO_ARP, +#ifdef CONFIG_COMPAT + .compatsize = sizeof(compat_int_t), + .compat_from_user = compat_standard_from_user, + .compat_to_user = compat_standard_to_user, +#endif + }, + { + .name = XT_ERROR_TARGET, + .target = arpt_error, + .targetsize = XT_FUNCTION_MAXNAMELEN, + .family = NFPROTO_ARP, + }, }; static struct nf_sockopt_ops arpt_sockopts = { @@ -1143,41 +1850,72 @@ static struct nf_sockopt_ops arpt_sockopts = { .set_optmin = ARPT_BASE_CTL, .set_optmax = ARPT_SO_SET_MAX+1, .set = do_arpt_set_ctl, +#ifdef CONFIG_COMPAT + .compat_set = compat_do_arpt_set_ctl, +#endif .get_optmin = ARPT_BASE_CTL, .get_optmax = ARPT_SO_GET_MAX+1, .get = do_arpt_get_ctl, +#ifdef CONFIG_COMPAT + .compat_get = compat_do_arpt_get_ctl, +#endif + .owner = THIS_MODULE, +}; + +static int __net_init arp_tables_net_init(struct net *net) +{ + return xt_proto_init(net, NFPROTO_ARP); +} + +static void __net_exit arp_tables_net_exit(struct net *net) +{ + xt_proto_fini(net, NFPROTO_ARP); +} + +static struct pernet_operations arp_tables_net_ops = { + .init = arp_tables_net_init, + .exit = arp_tables_net_exit, }; -static int __init init(void) +static int __init arp_tables_init(void) { int ret; - xt_proto_init(NF_ARP); + ret = register_pernet_subsys(&arp_tables_net_ops); + if (ret < 0) + goto err1; - /* Noone else will be downing sem now, so we won't sleep */ - xt_register_target(NF_ARP, &arpt_standard_target); - xt_register_target(NF_ARP, &arpt_error_target); + /* No one else will be downing sem now, so we won't sleep */ + ret = xt_register_targets(arpt_builtin_tg, ARRAY_SIZE(arpt_builtin_tg)); + if (ret < 0) + goto err2; /* Register setsockopt */ ret = nf_register_sockopt(&arpt_sockopts); - if (ret < 0) { - duprintf("Unable to register sockopts.\n"); - return ret; - } + if (ret < 0) + goto err4; - printk("arp_tables: (C) 2002 David S. Miller\n"); + printk(KERN_INFO "arp_tables: (C) 2002 David S. Miller\n"); return 0; + +err4: + xt_unregister_targets(arpt_builtin_tg, ARRAY_SIZE(arpt_builtin_tg)); +err2: + unregister_pernet_subsys(&arp_tables_net_ops); +err1: + return ret; } -static void __exit fini(void) +static void __exit arp_tables_fini(void) { nf_unregister_sockopt(&arpt_sockopts); - xt_proto_fini(NF_ARP); + xt_unregister_targets(arpt_builtin_tg, ARRAY_SIZE(arpt_builtin_tg)); + unregister_pernet_subsys(&arp_tables_net_ops); } EXPORT_SYMBOL(arpt_register_table); EXPORT_SYMBOL(arpt_unregister_table); EXPORT_SYMBOL(arpt_do_table); -module_init(init); -module_exit(fini); +module_init(arp_tables_init); +module_exit(arp_tables_fini); diff --git a/net/ipv4/netfilter/arpt_mangle.c b/net/ipv4/netfilter/arpt_mangle.c index c97650a16a5..a5e52a9f0a1 100644 --- a/net/ipv4/netfilter/arpt_mangle.c +++ b/net/ipv4/netfilter/arpt_mangle.c @@ -1,5 +1,6 @@ /* module that allows mangling of the arp payload */ #include <linux/module.h> +#include <linux/netfilter.h> #include <linux/netfilter_arp/arpt_mangle.h> #include <net/sock.h> @@ -8,98 +9,83 @@ MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>"); MODULE_DESCRIPTION("arptables arp payload mangle target"); static unsigned int -target(struct sk_buff **pskb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, const void *targinfo, - void *userinfo) +target(struct sk_buff *skb, const struct xt_action_param *par) { - const struct arpt_mangle *mangle = targinfo; - struct arphdr *arp; + const struct arpt_mangle *mangle = par->targinfo; + const struct arphdr *arp; unsigned char *arpptr; int pln, hln; - if (skb_shared(*pskb) || skb_cloned(*pskb)) { - struct sk_buff *nskb; + if (!skb_make_writable(skb, skb->len)) + return NF_DROP; - nskb = skb_copy(*pskb, GFP_ATOMIC); - if (!nskb) - return NF_DROP; - if ((*pskb)->sk) - skb_set_owner_w(nskb, (*pskb)->sk); - kfree_skb(*pskb); - *pskb = nskb; - } - - arp = (*pskb)->nh.arph; - arpptr = (*pskb)->nh.raw + sizeof(*arp); + arp = arp_hdr(skb); + arpptr = skb_network_header(skb) + sizeof(*arp); pln = arp->ar_pln; hln = arp->ar_hln; /* We assume that pln and hln were checked in the match */ if (mangle->flags & ARPT_MANGLE_SDEV) { if (ARPT_DEV_ADDR_LEN_MAX < hln || - (arpptr + hln > (**pskb).tail)) + (arpptr + hln > skb_tail_pointer(skb))) return NF_DROP; memcpy(arpptr, mangle->src_devaddr, hln); } arpptr += hln; if (mangle->flags & ARPT_MANGLE_SIP) { if (ARPT_MANGLE_ADDR_LEN_MAX < pln || - (arpptr + pln > (**pskb).tail)) + (arpptr + pln > skb_tail_pointer(skb))) return NF_DROP; memcpy(arpptr, &mangle->u_s.src_ip, pln); } arpptr += pln; if (mangle->flags & ARPT_MANGLE_TDEV) { if (ARPT_DEV_ADDR_LEN_MAX < hln || - (arpptr + hln > (**pskb).tail)) + (arpptr + hln > skb_tail_pointer(skb))) return NF_DROP; memcpy(arpptr, mangle->tgt_devaddr, hln); } arpptr += hln; if (mangle->flags & ARPT_MANGLE_TIP) { if (ARPT_MANGLE_ADDR_LEN_MAX < pln || - (arpptr + pln > (**pskb).tail)) + (arpptr + pln > skb_tail_pointer(skb))) return NF_DROP; memcpy(arpptr, &mangle->u_t.tgt_ip, pln); } return mangle->target; } -static int -checkentry(const char *tablename, const void *e, void *targinfo, - unsigned int targinfosize, unsigned int hook_mask) +static int checkentry(const struct xt_tgchk_param *par) { - const struct arpt_mangle *mangle = targinfo; + const struct arpt_mangle *mangle = par->targinfo; if (mangle->flags & ~ARPT_MANGLE_MASK || !(mangle->flags & ARPT_MANGLE_MASK)) - return 0; + return -EINVAL; if (mangle->target != NF_DROP && mangle->target != NF_ACCEPT && - mangle->target != ARPT_CONTINUE) - return 0; - return 1; + mangle->target != XT_CONTINUE) + return -EINVAL; + return 0; } -static struct arpt_target arpt_mangle_reg -= { - .name = "mangle", - .target = target, - .checkentry = checkentry, - .me = THIS_MODULE, +static struct xt_target arpt_mangle_reg __read_mostly = { + .name = "mangle", + .family = NFPROTO_ARP, + .target = target, + .targetsize = sizeof(struct arpt_mangle), + .checkentry = checkentry, + .me = THIS_MODULE, }; -static int __init init(void) +static int __init arpt_mangle_init(void) { - if (arpt_register_target(&arpt_mangle_reg)) - return -EINVAL; - - return 0; + return xt_register_target(&arpt_mangle_reg); } -static void __exit fini(void) +static void __exit arpt_mangle_fini(void) { - arpt_unregister_target(&arpt_mangle_reg); + xt_unregister_target(&arpt_mangle_reg); } -module_init(init); -module_exit(fini); +module_init(arpt_mangle_init); +module_exit(arpt_mangle_fini); diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index f6ab45f4868..802ddecb30b 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c @@ -6,7 +6,9 @@ */ #include <linux/module.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_arp/arp_tables.h> +#include <linux/slab.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("David S. Miller <davem@redhat.com>"); @@ -15,201 +17,76 @@ MODULE_DESCRIPTION("arptables filter table"); #define FILTER_VALID_HOOKS ((1 << NF_ARP_IN) | (1 << NF_ARP_OUT) | \ (1 << NF_ARP_FORWARD)) -/* Standard entry. */ -struct arpt_standard -{ - struct arpt_entry entry; - struct arpt_standard_target target; +static const struct xt_table packet_filter = { + .name = "filter", + .valid_hooks = FILTER_VALID_HOOKS, + .me = THIS_MODULE, + .af = NFPROTO_ARP, + .priority = NF_IP_PRI_FILTER, }; -struct arpt_error_target +/* The work comes in here from netfilter.c */ +static unsigned int +arptable_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, + const struct net_device *in, const struct net_device *out, + int (*okfn)(struct sk_buff *)) { - struct arpt_entry_target target; - char errorname[ARPT_FUNCTION_MAXNAMELEN]; -}; + const struct net *net = dev_net((in != NULL) ? in : out); -struct arpt_error -{ - struct arpt_entry entry; - struct arpt_error_target target; -}; + return arpt_do_table(skb, ops->hooknum, in, out, + net->ipv4.arptable_filter); +} -static struct -{ - struct arpt_replace repl; - struct arpt_standard entries[3]; - struct arpt_error term; -} initial_table __initdata -= { { "filter", FILTER_VALID_HOOKS, 4, - sizeof(struct arpt_standard) * 3 + sizeof(struct arpt_error), - { [NF_ARP_IN] = 0, - [NF_ARP_OUT] = sizeof(struct arpt_standard), - [NF_ARP_FORWARD] = 2 * sizeof(struct arpt_standard), }, - { [NF_ARP_IN] = 0, - [NF_ARP_OUT] = sizeof(struct arpt_standard), - [NF_ARP_FORWARD] = 2 * sizeof(struct arpt_standard), }, - 0, NULL, { } }, - { - /* ARP_IN */ - { - { - { - { 0 }, { 0 }, { 0 }, { 0 }, - 0, 0, - { { 0, }, { 0, } }, - { { 0, }, { 0, } }, - 0, 0, - 0, 0, - 0, 0, - "", "", { 0 }, { 0 }, - 0, 0 - }, - sizeof(struct arpt_entry), - sizeof(struct arpt_standard), - 0, - { 0, 0 }, { } }, - { { { { ARPT_ALIGN(sizeof(struct arpt_standard_target)), "" } }, { } }, - -NF_ACCEPT - 1 } - }, - /* ARP_OUT */ - { - { - { - { 0 }, { 0 }, { 0 }, { 0 }, - 0, 0, - { { 0, }, { 0, } }, - { { 0, }, { 0, } }, - 0, 0, - 0, 0, - 0, 0, - "", "", { 0 }, { 0 }, - 0, 0 - }, - sizeof(struct arpt_entry), - sizeof(struct arpt_standard), - 0, - { 0, 0 }, { } }, - { { { { ARPT_ALIGN(sizeof(struct arpt_standard_target)), "" } }, { } }, - -NF_ACCEPT - 1 } - }, - /* ARP_FORWARD */ - { - { - { - { 0 }, { 0 }, { 0 }, { 0 }, - 0, 0, - { { 0, }, { 0, } }, - { { 0, }, { 0, } }, - 0, 0, - 0, 0, - 0, 0, - "", "", { 0 }, { 0 }, - 0, 0 - }, - sizeof(struct arpt_entry), - sizeof(struct arpt_standard), - 0, - { 0, 0 }, { } }, - { { { { ARPT_ALIGN(sizeof(struct arpt_standard_target)), "" } }, { } }, - -NF_ACCEPT - 1 } - } - }, - /* ERROR */ - { - { - { - { 0 }, { 0 }, { 0 }, { 0 }, - 0, 0, - { { 0, }, { 0, } }, - { { 0, }, { 0, } }, - 0, 0, - 0, 0, - 0, 0, - "", "", { 0 }, { 0 }, - 0, 0 - }, - sizeof(struct arpt_entry), - sizeof(struct arpt_error), - 0, - { 0, 0 }, { } }, - { { { { ARPT_ALIGN(sizeof(struct arpt_error_target)), ARPT_ERROR_TARGET } }, - { } }, - "ERROR" - } - } -}; +static struct nf_hook_ops *arpfilter_ops __read_mostly; -static struct arpt_table packet_filter = { - .name = "filter", - .valid_hooks = FILTER_VALID_HOOKS, - .lock = RW_LOCK_UNLOCKED, - .private = NULL, - .me = THIS_MODULE, - .af = NF_ARP, -}; +static int __net_init arptable_filter_net_init(struct net *net) +{ + struct arpt_replace *repl; + + repl = arpt_alloc_initial_table(&packet_filter); + if (repl == NULL) + return -ENOMEM; + net->ipv4.arptable_filter = + arpt_register_table(net, &packet_filter, repl); + kfree(repl); + return PTR_ERR_OR_ZERO(net->ipv4.arptable_filter); +} -/* The work comes in here from netfilter.c */ -static unsigned int arpt_hook(unsigned int hook, - struct sk_buff **pskb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) +static void __net_exit arptable_filter_net_exit(struct net *net) { - return arpt_do_table(pskb, hook, in, out, &packet_filter, NULL); + arpt_unregister_table(net->ipv4.arptable_filter); } -static struct nf_hook_ops arpt_ops[] = { - { - .hook = arpt_hook, - .owner = THIS_MODULE, - .pf = NF_ARP, - .hooknum = NF_ARP_IN, - }, - { - .hook = arpt_hook, - .owner = THIS_MODULE, - .pf = NF_ARP, - .hooknum = NF_ARP_OUT, - }, - { - .hook = arpt_hook, - .owner = THIS_MODULE, - .pf = NF_ARP, - .hooknum = NF_ARP_FORWARD, - }, +static struct pernet_operations arptable_filter_net_ops = { + .init = arptable_filter_net_init, + .exit = arptable_filter_net_exit, }; -static int __init init(void) +static int __init arptable_filter_init(void) { - int ret, i; + int ret; - /* Register table */ - ret = arpt_register_table(&packet_filter, &initial_table.repl); + ret = register_pernet_subsys(&arptable_filter_net_ops); if (ret < 0) return ret; - for (i = 0; i < ARRAY_SIZE(arpt_ops); i++) - if ((ret = nf_register_hook(&arpt_ops[i])) < 0) - goto cleanup_hooks; + arpfilter_ops = xt_hook_link(&packet_filter, arptable_filter_hook); + if (IS_ERR(arpfilter_ops)) { + ret = PTR_ERR(arpfilter_ops); + goto cleanup_table; + } return ret; -cleanup_hooks: - while (--i >= 0) - nf_unregister_hook(&arpt_ops[i]); - - arpt_unregister_table(&packet_filter); +cleanup_table: + unregister_pernet_subsys(&arptable_filter_net_ops); return ret; } -static void __exit fini(void) +static void __exit arptable_filter_fini(void) { - unsigned int i; - - for (i = 0; i < ARRAY_SIZE(arpt_ops); i++) - nf_unregister_hook(&arpt_ops[i]); - - arpt_unregister_table(&packet_filter); + xt_hook_unlink(&packet_filter, arpfilter_ops); + unregister_pernet_subsys(&arptable_filter_net_ops); } -module_init(init); -module_exit(fini); +module_init(arptable_filter_init); +module_exit(arptable_filter_fini); diff --git a/net/ipv4/netfilter/ip_conntrack_amanda.c b/net/ipv4/netfilter/ip_conntrack_amanda.c deleted file mode 100644 index 84e4f79b7ff..00000000000 --- a/net/ipv4/netfilter/ip_conntrack_amanda.c +++ /dev/null @@ -1,181 +0,0 @@ -/* Amanda extension for IP connection tracking, Version 0.2 - * (C) 2002 by Brian J. Murrell <netfilter@interlinx.bc.ca> - * based on HW's ip_conntrack_irc.c as well as other modules - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * Module load syntax: - * insmod ip_conntrack_amanda.o [master_timeout=n] - * - * Where master_timeout is the timeout (in seconds) of the master - * connection (port 10080). This defaults to 5 minutes but if - * your clients take longer than 5 minutes to do their work - * before getting back to the Amanda server, you can increase - * this value. - * - */ - -#include <linux/in.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/netfilter.h> -#include <linux/ip.h> -#include <linux/moduleparam.h> -#include <linux/udp.h> -#include <net/checksum.h> -#include <net/udp.h> - -#include <linux/netfilter_ipv4/ip_conntrack_helper.h> -#include <linux/netfilter_ipv4/ip_conntrack_amanda.h> - -static unsigned int master_timeout = 300; - -MODULE_AUTHOR("Brian J. Murrell <netfilter@interlinx.bc.ca>"); -MODULE_DESCRIPTION("Amanda connection tracking module"); -MODULE_LICENSE("GPL"); -module_param(master_timeout, uint, 0600); -MODULE_PARM_DESC(master_timeout, "timeout for the master connection"); - -static const char *conns[] = { "DATA ", "MESG ", "INDEX " }; - -/* This is slow, but it's simple. --RR */ -static char *amanda_buffer; -static DEFINE_SPINLOCK(amanda_buffer_lock); - -unsigned int (*ip_nat_amanda_hook)(struct sk_buff **pskb, - enum ip_conntrack_info ctinfo, - unsigned int matchoff, - unsigned int matchlen, - struct ip_conntrack_expect *exp); -EXPORT_SYMBOL_GPL(ip_nat_amanda_hook); - -static int help(struct sk_buff **pskb, - struct ip_conntrack *ct, enum ip_conntrack_info ctinfo) -{ - struct ip_conntrack_expect *exp; - char *data, *data_limit, *tmp; - unsigned int dataoff, i; - u_int16_t port, len; - int ret = NF_ACCEPT; - - /* Only look at packets from the Amanda server */ - if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) - return NF_ACCEPT; - - /* increase the UDP timeout of the master connection as replies from - * Amanda clients to the server can be quite delayed */ - ip_ct_refresh(ct, *pskb, master_timeout * HZ); - - /* No data? */ - dataoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct udphdr); - if (dataoff >= (*pskb)->len) { - if (net_ratelimit()) - printk("amanda_help: skblen = %u\n", (*pskb)->len); - return NF_ACCEPT; - } - - spin_lock_bh(&amanda_buffer_lock); - skb_copy_bits(*pskb, dataoff, amanda_buffer, (*pskb)->len - dataoff); - data = amanda_buffer; - data_limit = amanda_buffer + (*pskb)->len - dataoff; - *data_limit = '\0'; - - /* Search for the CONNECT string */ - data = strstr(data, "CONNECT "); - if (!data) - goto out; - data += strlen("CONNECT "); - - /* Only search first line. */ - if ((tmp = strchr(data, '\n'))) - *tmp = '\0'; - - for (i = 0; i < ARRAY_SIZE(conns); i++) { - char *match = strstr(data, conns[i]); - if (!match) - continue; - tmp = data = match + strlen(conns[i]); - port = simple_strtoul(data, &data, 10); - len = data - tmp; - if (port == 0 || len > 5) - break; - - exp = ip_conntrack_expect_alloc(ct); - if (exp == NULL) { - ret = NF_DROP; - goto out; - } - - exp->expectfn = NULL; - exp->flags = 0; - - exp->tuple.src.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip; - exp->tuple.src.u.tcp.port = 0; - exp->tuple.dst.ip = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip; - exp->tuple.dst.protonum = IPPROTO_TCP; - exp->tuple.dst.u.tcp.port = htons(port); - - exp->mask.src.ip = 0xFFFFFFFF; - exp->mask.src.u.tcp.port = 0; - exp->mask.dst.ip = 0xFFFFFFFF; - exp->mask.dst.protonum = 0xFF; - exp->mask.dst.u.tcp.port = 0xFFFF; - - if (ip_nat_amanda_hook) - ret = ip_nat_amanda_hook(pskb, ctinfo, - tmp - amanda_buffer, - len, exp); - else if (ip_conntrack_expect_related(exp) != 0) - ret = NF_DROP; - ip_conntrack_expect_put(exp); - } - -out: - spin_unlock_bh(&amanda_buffer_lock); - return ret; -} - -static struct ip_conntrack_helper amanda_helper = { - .max_expected = ARRAY_SIZE(conns), - .timeout = 180, - .me = THIS_MODULE, - .help = help, - .name = "amanda", - - .tuple = { .src = { .u = { __constant_htons(10080) } }, - .dst = { .protonum = IPPROTO_UDP }, - }, - .mask = { .src = { .u = { 0xFFFF } }, - .dst = { .protonum = 0xFF }, - }, -}; - -static void __exit fini(void) -{ - ip_conntrack_helper_unregister(&amanda_helper); - kfree(amanda_buffer); -} - -static int __init init(void) -{ - int ret; - - amanda_buffer = kmalloc(65536, GFP_KERNEL); - if (!amanda_buffer) - return -ENOMEM; - - ret = ip_conntrack_helper_register(&amanda_helper); - if (ret < 0) { - kfree(amanda_buffer); - return ret; - } - return 0; - - -} - -module_init(init); -module_exit(fini); diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c deleted file mode 100644 index 84c66dbfeda..00000000000 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ /dev/null @@ -1,1546 +0,0 @@ -/* Connection state tracking for netfilter. This is separated from, - but required by, the NAT layer; it can also be used by an iptables - extension. */ - -/* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * 23 Apr 2001: Harald Welte <laforge@gnumonks.org> - * - new API and handling of conntrack/nat helpers - * - now capable of multiple expectations for one master - * 16 Jul 2002: Harald Welte <laforge@gnumonks.org> - * - add usage/reference counts to ip_conntrack_expect - * - export ip_conntrack[_expect]_{find_get,put} functions - * */ - -#include <linux/config.h> -#include <linux/types.h> -#include <linux/icmp.h> -#include <linux/ip.h> -#include <linux/netfilter.h> -#include <linux/netfilter_ipv4.h> -#include <linux/module.h> -#include <linux/skbuff.h> -#include <linux/proc_fs.h> -#include <linux/vmalloc.h> -#include <net/checksum.h> -#include <net/ip.h> -#include <linux/stddef.h> -#include <linux/sysctl.h> -#include <linux/slab.h> -#include <linux/random.h> -#include <linux/jhash.h> -#include <linux/err.h> -#include <linux/percpu.h> -#include <linux/moduleparam.h> -#include <linux/notifier.h> - -/* ip_conntrack_lock protects the main hash table, protocol/helper/expected - registrations, conntrack timers*/ -#define ASSERT_READ_LOCK(x) -#define ASSERT_WRITE_LOCK(x) - -#include <linux/netfilter_ipv4/ip_conntrack.h> -#include <linux/netfilter_ipv4/ip_conntrack_protocol.h> -#include <linux/netfilter_ipv4/ip_conntrack_helper.h> -#include <linux/netfilter_ipv4/ip_conntrack_core.h> -#include <linux/netfilter_ipv4/listhelp.h> - -#define IP_CONNTRACK_VERSION "2.4" - -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - -DEFINE_RWLOCK(ip_conntrack_lock); - -/* ip_conntrack_standalone needs this */ -atomic_t ip_conntrack_count = ATOMIC_INIT(0); - -void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL; -LIST_HEAD(ip_conntrack_expect_list); -struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO]; -static LIST_HEAD(helpers); -unsigned int ip_conntrack_htable_size = 0; -int ip_conntrack_max; -struct list_head *ip_conntrack_hash; -static kmem_cache_t *ip_conntrack_cachep __read_mostly; -static kmem_cache_t *ip_conntrack_expect_cachep __read_mostly; -struct ip_conntrack ip_conntrack_untracked; -unsigned int ip_ct_log_invalid; -static LIST_HEAD(unconfirmed); -static int ip_conntrack_vmalloc; - -static unsigned int ip_conntrack_next_id = 1; -static unsigned int ip_conntrack_expect_next_id = 1; -#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS -struct notifier_block *ip_conntrack_chain; -struct notifier_block *ip_conntrack_expect_chain; - -DEFINE_PER_CPU(struct ip_conntrack_ecache, ip_conntrack_ecache); - -/* deliver cached events and clear cache entry - must be called with locally - * disabled softirqs */ -static inline void -__ip_ct_deliver_cached_events(struct ip_conntrack_ecache *ecache) -{ - DEBUGP("ecache: delivering events for %p\n", ecache->ct); - if (is_confirmed(ecache->ct) && !is_dying(ecache->ct) && ecache->events) - notifier_call_chain(&ip_conntrack_chain, ecache->events, - ecache->ct); - ecache->events = 0; - ip_conntrack_put(ecache->ct); - ecache->ct = NULL; -} - -/* Deliver all cached events for a particular conntrack. This is called - * by code prior to async packet handling or freeing the skb */ -void ip_ct_deliver_cached_events(const struct ip_conntrack *ct) -{ - struct ip_conntrack_ecache *ecache; - - local_bh_disable(); - ecache = &__get_cpu_var(ip_conntrack_ecache); - if (ecache->ct == ct) - __ip_ct_deliver_cached_events(ecache); - local_bh_enable(); -} - -void __ip_ct_event_cache_init(struct ip_conntrack *ct) -{ - struct ip_conntrack_ecache *ecache; - - /* take care of delivering potentially old events */ - ecache = &__get_cpu_var(ip_conntrack_ecache); - BUG_ON(ecache->ct == ct); - if (ecache->ct) - __ip_ct_deliver_cached_events(ecache); - /* initialize for this conntrack/packet */ - ecache->ct = ct; - nf_conntrack_get(&ct->ct_general); -} - -/* flush the event cache - touches other CPU's data and must not be called while - * packets are still passing through the code */ -static void ip_ct_event_cache_flush(void) -{ - struct ip_conntrack_ecache *ecache; - int cpu; - - for_each_cpu(cpu) { - ecache = &per_cpu(ip_conntrack_ecache, cpu); - if (ecache->ct) - ip_conntrack_put(ecache->ct); - } -} -#else -static inline void ip_ct_event_cache_flush(void) {} -#endif /* CONFIG_IP_NF_CONNTRACK_EVENTS */ - -DEFINE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat); - -static int ip_conntrack_hash_rnd_initted; -static unsigned int ip_conntrack_hash_rnd; - -static u_int32_t __hash_conntrack(const struct ip_conntrack_tuple *tuple, - unsigned int size, unsigned int rnd) -{ - return (jhash_3words(tuple->src.ip, - (tuple->dst.ip ^ tuple->dst.protonum), - (tuple->src.u.all | (tuple->dst.u.all << 16)), - rnd) % size); -} - -static u_int32_t -hash_conntrack(const struct ip_conntrack_tuple *tuple) -{ - return __hash_conntrack(tuple, ip_conntrack_htable_size, - ip_conntrack_hash_rnd); -} - -int -ip_ct_get_tuple(const struct iphdr *iph, - const struct sk_buff *skb, - unsigned int dataoff, - struct ip_conntrack_tuple *tuple, - const struct ip_conntrack_protocol *protocol) -{ - /* Never happen */ - if (iph->frag_off & htons(IP_OFFSET)) { - printk("ip_conntrack_core: Frag of proto %u.\n", - iph->protocol); - return 0; - } - - tuple->src.ip = iph->saddr; - tuple->dst.ip = iph->daddr; - tuple->dst.protonum = iph->protocol; - tuple->dst.dir = IP_CT_DIR_ORIGINAL; - - return protocol->pkt_to_tuple(skb, dataoff, tuple); -} - -int -ip_ct_invert_tuple(struct ip_conntrack_tuple *inverse, - const struct ip_conntrack_tuple *orig, - const struct ip_conntrack_protocol *protocol) -{ - inverse->src.ip = orig->dst.ip; - inverse->dst.ip = orig->src.ip; - inverse->dst.protonum = orig->dst.protonum; - inverse->dst.dir = !orig->dst.dir; - - return protocol->invert_tuple(inverse, orig); -} - - -/* ip_conntrack_expect helper functions */ -void ip_ct_unlink_expect(struct ip_conntrack_expect *exp) -{ - ASSERT_WRITE_LOCK(&ip_conntrack_lock); - IP_NF_ASSERT(!timer_pending(&exp->timeout)); - list_del(&exp->list); - CONNTRACK_STAT_INC(expect_delete); - exp->master->expecting--; - ip_conntrack_expect_put(exp); -} - -static void expectation_timed_out(unsigned long ul_expect) -{ - struct ip_conntrack_expect *exp = (void *)ul_expect; - - write_lock_bh(&ip_conntrack_lock); - ip_ct_unlink_expect(exp); - write_unlock_bh(&ip_conntrack_lock); - ip_conntrack_expect_put(exp); -} - -struct ip_conntrack_expect * -__ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple) -{ - struct ip_conntrack_expect *i; - - list_for_each_entry(i, &ip_conntrack_expect_list, list) { - if (ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) { - atomic_inc(&i->use); - return i; - } - } - return NULL; -} - -/* Just find a expectation corresponding to a tuple. */ -struct ip_conntrack_expect * -ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple) -{ - struct ip_conntrack_expect *i; - - read_lock_bh(&ip_conntrack_lock); - i = __ip_conntrack_expect_find(tuple); - read_unlock_bh(&ip_conntrack_lock); - - return i; -} - -/* If an expectation for this connection is found, it gets delete from - * global list then returned. */ -static struct ip_conntrack_expect * -find_expectation(const struct ip_conntrack_tuple *tuple) -{ - struct ip_conntrack_expect *i; - - list_for_each_entry(i, &ip_conntrack_expect_list, list) { - /* If master is not in hash table yet (ie. packet hasn't left - this machine yet), how can other end know about expected? - Hence these are not the droids you are looking for (if - master ct never got confirmed, we'd hold a reference to it - and weird things would happen to future packets). */ - if (ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) - && is_confirmed(i->master)) { - if (i->flags & IP_CT_EXPECT_PERMANENT) { - atomic_inc(&i->use); - return i; - } else if (del_timer(&i->timeout)) { - ip_ct_unlink_expect(i); - return i; - } - } - } - return NULL; -} - -/* delete all expectations for this conntrack */ -void ip_ct_remove_expectations(struct ip_conntrack *ct) -{ - struct ip_conntrack_expect *i, *tmp; - - /* Optimization: most connection never expect any others. */ - if (ct->expecting == 0) - return; - - list_for_each_entry_safe(i, tmp, &ip_conntrack_expect_list, list) { - if (i->master == ct && del_timer(&i->timeout)) { - ip_ct_unlink_expect(i); - ip_conntrack_expect_put(i); - } - } -} - -static void -clean_from_lists(struct ip_conntrack *ct) -{ - unsigned int ho, hr; - - DEBUGP("clean_from_lists(%p)\n", ct); - ASSERT_WRITE_LOCK(&ip_conntrack_lock); - - ho = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); - hr = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); - LIST_DELETE(&ip_conntrack_hash[ho], &ct->tuplehash[IP_CT_DIR_ORIGINAL]); - LIST_DELETE(&ip_conntrack_hash[hr], &ct->tuplehash[IP_CT_DIR_REPLY]); - - /* Destroy all pending expectations */ - ip_ct_remove_expectations(ct); -} - -static void -destroy_conntrack(struct nf_conntrack *nfct) -{ - struct ip_conntrack *ct = (struct ip_conntrack *)nfct; - struct ip_conntrack_protocol *proto; - - DEBUGP("destroy_conntrack(%p)\n", ct); - IP_NF_ASSERT(atomic_read(&nfct->use) == 0); - IP_NF_ASSERT(!timer_pending(&ct->timeout)); - - ip_conntrack_event(IPCT_DESTROY, ct); - set_bit(IPS_DYING_BIT, &ct->status); - - /* To make sure we don't get any weird locking issues here: - * destroy_conntrack() MUST NOT be called with a write lock - * to ip_conntrack_lock!!! -HW */ - proto = __ip_conntrack_proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum); - if (proto && proto->destroy) - proto->destroy(ct); - - if (ip_conntrack_destroyed) - ip_conntrack_destroyed(ct); - - write_lock_bh(&ip_conntrack_lock); - /* Expectations will have been removed in clean_from_lists, - * except TFTP can create an expectation on the first packet, - * before connection is in the list, so we need to clean here, - * too. */ - ip_ct_remove_expectations(ct); - - /* We overload first tuple to link into unconfirmed list. */ - if (!is_confirmed(ct)) { - BUG_ON(list_empty(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list)); - list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list); - } - - CONNTRACK_STAT_INC(delete); - write_unlock_bh(&ip_conntrack_lock); - - if (ct->master) - ip_conntrack_put(ct->master); - - DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct); - ip_conntrack_free(ct); -} - -static void death_by_timeout(unsigned long ul_conntrack) -{ - struct ip_conntrack *ct = (void *)ul_conntrack; - - write_lock_bh(&ip_conntrack_lock); - /* Inside lock so preempt is disabled on module removal path. - * Otherwise we can get spurious warnings. */ - CONNTRACK_STAT_INC(delete_list); - clean_from_lists(ct); - write_unlock_bh(&ip_conntrack_lock); - ip_conntrack_put(ct); -} - -static inline int -conntrack_tuple_cmp(const struct ip_conntrack_tuple_hash *i, - const struct ip_conntrack_tuple *tuple, - const struct ip_conntrack *ignored_conntrack) -{ - ASSERT_READ_LOCK(&ip_conntrack_lock); - return tuplehash_to_ctrack(i) != ignored_conntrack - && ip_ct_tuple_equal(tuple, &i->tuple); -} - -struct ip_conntrack_tuple_hash * -__ip_conntrack_find(const struct ip_conntrack_tuple *tuple, - const struct ip_conntrack *ignored_conntrack) -{ - struct ip_conntrack_tuple_hash *h; - unsigned int hash = hash_conntrack(tuple); - - ASSERT_READ_LOCK(&ip_conntrack_lock); - list_for_each_entry(h, &ip_conntrack_hash[hash], list) { - if (conntrack_tuple_cmp(h, tuple, ignored_conntrack)) { - CONNTRACK_STAT_INC(found); - return h; - } - CONNTRACK_STAT_INC(searched); - } - - return NULL; -} - -/* Find a connection corresponding to a tuple. */ -struct ip_conntrack_tuple_hash * -ip_conntrack_find_get(const struct ip_conntrack_tuple *tuple, - const struct ip_conntrack *ignored_conntrack) -{ - struct ip_conntrack_tuple_hash *h; - - read_lock_bh(&ip_conntrack_lock); - h = __ip_conntrack_find(tuple, ignored_conntrack); - if (h) - atomic_inc(&tuplehash_to_ctrack(h)->ct_general.use); - read_unlock_bh(&ip_conntrack_lock); - - return h; -} - -static void __ip_conntrack_hash_insert(struct ip_conntrack *ct, - unsigned int hash, - unsigned int repl_hash) -{ - ct->id = ++ip_conntrack_next_id; - list_prepend(&ip_conntrack_hash[hash], - &ct->tuplehash[IP_CT_DIR_ORIGINAL].list); - list_prepend(&ip_conntrack_hash[repl_hash], - &ct->tuplehash[IP_CT_DIR_REPLY].list); -} - -void ip_conntrack_hash_insert(struct ip_conntrack *ct) -{ - unsigned int hash, repl_hash; - - hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); - repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); - - write_lock_bh(&ip_conntrack_lock); - __ip_conntrack_hash_insert(ct, hash, repl_hash); - write_unlock_bh(&ip_conntrack_lock); -} - -/* Confirm a connection given skb; places it in hash table */ -int -__ip_conntrack_confirm(struct sk_buff **pskb) -{ - unsigned int hash, repl_hash; - struct ip_conntrack *ct; - enum ip_conntrack_info ctinfo; - - ct = ip_conntrack_get(*pskb, &ctinfo); - - /* ipt_REJECT uses ip_conntrack_attach to attach related - ICMP/TCP RST packets in other direction. Actual packet - which created connection will be IP_CT_NEW or for an - expected connection, IP_CT_RELATED. */ - if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) - return NF_ACCEPT; - - hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); - repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); - - /* We're not in hash table, and we refuse to set up related - connections for unconfirmed conns. But packet copies and - REJECT will give spurious warnings here. */ - /* IP_NF_ASSERT(atomic_read(&ct->ct_general.use) == 1); */ - - /* No external references means noone else could have - confirmed us. */ - IP_NF_ASSERT(!is_confirmed(ct)); - DEBUGP("Confirming conntrack %p\n", ct); - - write_lock_bh(&ip_conntrack_lock); - - /* See if there's one in the list already, including reverse: - NAT could have grabbed it without realizing, since we're - not in the hash. If there is, we lost race. */ - if (!LIST_FIND(&ip_conntrack_hash[hash], - conntrack_tuple_cmp, - struct ip_conntrack_tuple_hash *, - &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, NULL) - && !LIST_FIND(&ip_conntrack_hash[repl_hash], - conntrack_tuple_cmp, - struct ip_conntrack_tuple_hash *, - &ct->tuplehash[IP_CT_DIR_REPLY].tuple, NULL)) { - /* Remove from unconfirmed list */ - list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list); - - __ip_conntrack_hash_insert(ct, hash, repl_hash); - /* Timer relative to confirmation time, not original - setting time, otherwise we'd get timer wrap in - weird delay cases. */ - ct->timeout.expires += jiffies; - add_timer(&ct->timeout); - atomic_inc(&ct->ct_general.use); - set_bit(IPS_CONFIRMED_BIT, &ct->status); - CONNTRACK_STAT_INC(insert); - write_unlock_bh(&ip_conntrack_lock); - if (ct->helper) - ip_conntrack_event_cache(IPCT_HELPER, *pskb); -#ifdef CONFIG_IP_NF_NAT_NEEDED - if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) || - test_bit(IPS_DST_NAT_DONE_BIT, &ct->status)) - ip_conntrack_event_cache(IPCT_NATINFO, *pskb); -#endif - ip_conntrack_event_cache(master_ct(ct) ? - IPCT_RELATED : IPCT_NEW, *pskb); - - return NF_ACCEPT; - } - - CONNTRACK_STAT_INC(insert_failed); - write_unlock_bh(&ip_conntrack_lock); - - return NF_DROP; -} - -/* Returns true if a connection correspondings to the tuple (required - for NAT). */ -int -ip_conntrack_tuple_taken(const struct ip_conntrack_tuple *tuple, - const struct ip_conntrack *ignored_conntrack) -{ - struct ip_conntrack_tuple_hash *h; - - read_lock_bh(&ip_conntrack_lock); - h = __ip_conntrack_find(tuple, ignored_conntrack); - read_unlock_bh(&ip_conntrack_lock); - - return h != NULL; -} - -/* There's a small race here where we may free a just-assured - connection. Too bad: we're in trouble anyway. */ -static inline int unreplied(const struct ip_conntrack_tuple_hash *i) -{ - return !(test_bit(IPS_ASSURED_BIT, &tuplehash_to_ctrack(i)->status)); -} - -static int early_drop(struct list_head *chain) -{ - /* Traverse backwards: gives us oldest, which is roughly LRU */ - struct ip_conntrack_tuple_hash *h; - struct ip_conntrack *ct = NULL; - int dropped = 0; - - read_lock_bh(&ip_conntrack_lock); - h = LIST_FIND_B(chain, unreplied, struct ip_conntrack_tuple_hash *); - if (h) { - ct = tuplehash_to_ctrack(h); - atomic_inc(&ct->ct_general.use); - } - read_unlock_bh(&ip_conntrack_lock); - - if (!ct) - return dropped; - - if (del_timer(&ct->timeout)) { - death_by_timeout((unsigned long)ct); - dropped = 1; - CONNTRACK_STAT_INC(early_drop); - } - ip_conntrack_put(ct); - return dropped; -} - -static inline int helper_cmp(const struct ip_conntrack_helper *i, - const struct ip_conntrack_tuple *rtuple) -{ - return ip_ct_tuple_mask_cmp(rtuple, &i->tuple, &i->mask); -} - -static struct ip_conntrack_helper * -__ip_conntrack_helper_find( const struct ip_conntrack_tuple *tuple) -{ - return LIST_FIND(&helpers, helper_cmp, - struct ip_conntrack_helper *, - tuple); -} - -struct ip_conntrack_helper * -ip_conntrack_helper_find_get( const struct ip_conntrack_tuple *tuple) -{ - struct ip_conntrack_helper *helper; - - /* need ip_conntrack_lock to assure that helper exists until - * try_module_get() is called */ - read_lock_bh(&ip_conntrack_lock); - - helper = __ip_conntrack_helper_find(tuple); - if (helper) { - /* need to increase module usage count to assure helper will - * not go away while the caller is e.g. busy putting a - * conntrack in the hash that uses the helper */ - if (!try_module_get(helper->me)) - helper = NULL; - } - - read_unlock_bh(&ip_conntrack_lock); - - return helper; -} - -void ip_conntrack_helper_put(struct ip_conntrack_helper *helper) -{ - module_put(helper->me); -} - -struct ip_conntrack_protocol * -__ip_conntrack_proto_find(u_int8_t protocol) -{ - return ip_ct_protos[protocol]; -} - -/* this is guaranteed to always return a valid protocol helper, since - * it falls back to generic_protocol */ -struct ip_conntrack_protocol * -ip_conntrack_proto_find_get(u_int8_t protocol) -{ - struct ip_conntrack_protocol *p; - - preempt_disable(); - p = __ip_conntrack_proto_find(protocol); - if (p) { - if (!try_module_get(p->me)) - p = &ip_conntrack_generic_protocol; - } - preempt_enable(); - - return p; -} - -void ip_conntrack_proto_put(struct ip_conntrack_protocol *p) -{ - module_put(p->me); -} - -struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *orig, - struct ip_conntrack_tuple *repl) -{ - struct ip_conntrack *conntrack; - - if (!ip_conntrack_hash_rnd_initted) { - get_random_bytes(&ip_conntrack_hash_rnd, 4); - ip_conntrack_hash_rnd_initted = 1; - } - - if (ip_conntrack_max - && atomic_read(&ip_conntrack_count) >= ip_conntrack_max) { - unsigned int hash = hash_conntrack(orig); - /* Try dropping from this hash chain. */ - if (!early_drop(&ip_conntrack_hash[hash])) { - if (net_ratelimit()) - printk(KERN_WARNING - "ip_conntrack: table full, dropping" - " packet.\n"); - return ERR_PTR(-ENOMEM); - } - } - - conntrack = kmem_cache_alloc(ip_conntrack_cachep, GFP_ATOMIC); - if (!conntrack) { - DEBUGP("Can't allocate conntrack.\n"); - return ERR_PTR(-ENOMEM); - } - - memset(conntrack, 0, sizeof(*conntrack)); - atomic_set(&conntrack->ct_general.use, 1); - conntrack->ct_general.destroy = destroy_conntrack; - conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig; - conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *repl; - /* Don't set timer yet: wait for confirmation */ - init_timer(&conntrack->timeout); - conntrack->timeout.data = (unsigned long)conntrack; - conntrack->timeout.function = death_by_timeout; - - atomic_inc(&ip_conntrack_count); - - return conntrack; -} - -void -ip_conntrack_free(struct ip_conntrack *conntrack) -{ - atomic_dec(&ip_conntrack_count); - kmem_cache_free(ip_conntrack_cachep, conntrack); -} - -/* Allocate a new conntrack: we return -ENOMEM if classification - * failed due to stress. Otherwise it really is unclassifiable */ -static struct ip_conntrack_tuple_hash * -init_conntrack(struct ip_conntrack_tuple *tuple, - struct ip_conntrack_protocol *protocol, - struct sk_buff *skb) -{ - struct ip_conntrack *conntrack; - struct ip_conntrack_tuple repl_tuple; - struct ip_conntrack_expect *exp; - - if (!ip_ct_invert_tuple(&repl_tuple, tuple, protocol)) { - DEBUGP("Can't invert tuple.\n"); - return NULL; - } - - conntrack = ip_conntrack_alloc(tuple, &repl_tuple); - if (conntrack == NULL || IS_ERR(conntrack)) - return (struct ip_conntrack_tuple_hash *)conntrack; - - if (!protocol->new(conntrack, skb)) { - ip_conntrack_free(conntrack); - return NULL; - } - - write_lock_bh(&ip_conntrack_lock); - exp = find_expectation(tuple); - - if (exp) { - DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n", - conntrack, exp); - /* Welcome, Mr. Bond. We've been expecting you... */ - __set_bit(IPS_EXPECTED_BIT, &conntrack->status); - conntrack->master = exp->master; -#ifdef CONFIG_IP_NF_CONNTRACK_MARK - conntrack->mark = exp->master->mark; -#endif -#if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || \ - defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE) - /* this is ugly, but there is no other place where to put it */ - conntrack->nat.masq_index = exp->master->nat.masq_index; -#endif - nf_conntrack_get(&conntrack->master->ct_general); - CONNTRACK_STAT_INC(expect_new); - } else { - conntrack->helper = __ip_conntrack_helper_find(&repl_tuple); - - CONNTRACK_STAT_INC(new); - } - - /* Overload tuple linked list to put us in unconfirmed list. */ - list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list, &unconfirmed); - - write_unlock_bh(&ip_conntrack_lock); - - if (exp) { - if (exp->expectfn) - exp->expectfn(conntrack, exp); - ip_conntrack_expect_put(exp); - } - - return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL]; -} - -/* On success, returns conntrack ptr, sets skb->nfct and ctinfo */ -static inline struct ip_conntrack * -resolve_normal_ct(struct sk_buff *skb, - struct ip_conntrack_protocol *proto, - int *set_reply, - unsigned int hooknum, - enum ip_conntrack_info *ctinfo) -{ - struct ip_conntrack_tuple tuple; - struct ip_conntrack_tuple_hash *h; - struct ip_conntrack *ct; - - IP_NF_ASSERT((skb->nh.iph->frag_off & htons(IP_OFFSET)) == 0); - - if (!ip_ct_get_tuple(skb->nh.iph, skb, skb->nh.iph->ihl*4, - &tuple,proto)) - return NULL; - - /* look for tuple match */ - h = ip_conntrack_find_get(&tuple, NULL); - if (!h) { - h = init_conntrack(&tuple, proto, skb); - if (!h) - return NULL; - if (IS_ERR(h)) - return (void *)h; - } - ct = tuplehash_to_ctrack(h); - - /* It exists; we have (non-exclusive) reference. */ - if (DIRECTION(h) == IP_CT_DIR_REPLY) { - *ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY; - /* Please set reply bit if this packet OK */ - *set_reply = 1; - } else { - /* Once we've had two way comms, always ESTABLISHED. */ - if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { - DEBUGP("ip_conntrack_in: normal packet for %p\n", - ct); - *ctinfo = IP_CT_ESTABLISHED; - } else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) { - DEBUGP("ip_conntrack_in: related packet for %p\n", - ct); - *ctinfo = IP_CT_RELATED; - } else { - DEBUGP("ip_conntrack_in: new packet for %p\n", - ct); - *ctinfo = IP_CT_NEW; - } - *set_reply = 0; - } - skb->nfct = &ct->ct_general; - skb->nfctinfo = *ctinfo; - return ct; -} - -/* Netfilter hook itself. */ -unsigned int ip_conntrack_in(unsigned int hooknum, - struct sk_buff **pskb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - struct ip_conntrack *ct; - enum ip_conntrack_info ctinfo; - struct ip_conntrack_protocol *proto; - int set_reply = 0; - int ret; - - /* Previously seen (loopback or untracked)? Ignore. */ - if ((*pskb)->nfct) { - CONNTRACK_STAT_INC(ignore); - return NF_ACCEPT; - } - - /* Never happen */ - if ((*pskb)->nh.iph->frag_off & htons(IP_OFFSET)) { - if (net_ratelimit()) { - printk(KERN_ERR "ip_conntrack_in: Frag of proto %u (hook=%u)\n", - (*pskb)->nh.iph->protocol, hooknum); - } - return NF_DROP; - } - -/* Doesn't cover locally-generated broadcast, so not worth it. */ -#if 0 - /* Ignore broadcast: no `connection'. */ - if ((*pskb)->pkt_type == PACKET_BROADCAST) { - printk("Broadcast packet!\n"); - return NF_ACCEPT; - } else if (((*pskb)->nh.iph->daddr & htonl(0x000000FF)) - == htonl(0x000000FF)) { - printk("Should bcast: %u.%u.%u.%u->%u.%u.%u.%u (sk=%p, ptype=%u)\n", - NIPQUAD((*pskb)->nh.iph->saddr), - NIPQUAD((*pskb)->nh.iph->daddr), - (*pskb)->sk, (*pskb)->pkt_type); - } -#endif - - proto = __ip_conntrack_proto_find((*pskb)->nh.iph->protocol); - - /* It may be an special packet, error, unclean... - * inverse of the return code tells to the netfilter - * core what to do with the packet. */ - if (proto->error != NULL - && (ret = proto->error(*pskb, &ctinfo, hooknum)) <= 0) { - CONNTRACK_STAT_INC(error); - CONNTRACK_STAT_INC(invalid); - return -ret; - } - - if (!(ct = resolve_normal_ct(*pskb, proto,&set_reply,hooknum,&ctinfo))) { - /* Not valid part of a connection */ - CONNTRACK_STAT_INC(invalid); - return NF_ACCEPT; - } - - if (IS_ERR(ct)) { - /* Too stressed to deal. */ - CONNTRACK_STAT_INC(drop); - return NF_DROP; - } - - IP_NF_ASSERT((*pskb)->nfct); - - ret = proto->packet(ct, *pskb, ctinfo); - if (ret < 0) { - /* Invalid: inverse of the return code tells - * the netfilter core what to do*/ - nf_conntrack_put((*pskb)->nfct); - (*pskb)->nfct = NULL; - CONNTRACK_STAT_INC(invalid); - return -ret; - } - - if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status)) - ip_conntrack_event_cache(IPCT_STATUS, *pskb); - - return ret; -} - -int invert_tuplepr(struct ip_conntrack_tuple *inverse, - const struct ip_conntrack_tuple *orig) -{ - return ip_ct_invert_tuple(inverse, orig, - __ip_conntrack_proto_find(orig->dst.protonum)); -} - -/* Would two expected things clash? */ -static inline int expect_clash(const struct ip_conntrack_expect *a, - const struct ip_conntrack_expect *b) -{ - /* Part covered by intersection of masks must be unequal, - otherwise they clash */ - struct ip_conntrack_tuple intersect_mask - = { { a->mask.src.ip & b->mask.src.ip, - { a->mask.src.u.all & b->mask.src.u.all } }, - { a->mask.dst.ip & b->mask.dst.ip, - { a->mask.dst.u.all & b->mask.dst.u.all }, - a->mask.dst.protonum & b->mask.dst.protonum } }; - - return ip_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask); -} - -static inline int expect_matches(const struct ip_conntrack_expect *a, - const struct ip_conntrack_expect *b) -{ - return a->master == b->master - && ip_ct_tuple_equal(&a->tuple, &b->tuple) - && ip_ct_tuple_equal(&a->mask, &b->mask); -} - -/* Generally a bad idea to call this: could have matched already. */ -void ip_conntrack_unexpect_related(struct ip_conntrack_expect *exp) -{ - struct ip_conntrack_expect *i; - - write_lock_bh(&ip_conntrack_lock); - /* choose the the oldest expectation to evict */ - list_for_each_entry_reverse(i, &ip_conntrack_expect_list, list) { - if (expect_matches(i, exp) && del_timer(&i->timeout)) { - ip_ct_unlink_expect(i); - write_unlock_bh(&ip_conntrack_lock); - ip_conntrack_expect_put(i); - return; - } - } - write_unlock_bh(&ip_conntrack_lock); -} - -/* We don't increase the master conntrack refcount for non-fulfilled - * conntracks. During the conntrack destruction, the expectations are - * always killed before the conntrack itself */ -struct ip_conntrack_expect *ip_conntrack_expect_alloc(struct ip_conntrack *me) -{ - struct ip_conntrack_expect *new; - - new = kmem_cache_alloc(ip_conntrack_expect_cachep, GFP_ATOMIC); - if (!new) { - DEBUGP("expect_related: OOM allocating expect\n"); - return NULL; - } - new->master = me; - atomic_set(&new->use, 1); - return new; -} - -void ip_conntrack_expect_put(struct ip_conntrack_expect *exp) -{ - if (atomic_dec_and_test(&exp->use)) - kmem_cache_free(ip_conntrack_expect_cachep, exp); -} - -static void ip_conntrack_expect_insert(struct ip_conntrack_expect *exp) -{ - atomic_inc(&exp->use); - exp->master->expecting++; - list_add(&exp->list, &ip_conntrack_expect_list); - - init_timer(&exp->timeout); - exp->timeout.data = (unsigned long)exp; - exp->timeout.function = expectation_timed_out; - exp->timeout.expires = jiffies + exp->master->helper->timeout * HZ; - add_timer(&exp->timeout); - - exp->id = ++ip_conntrack_expect_next_id; - atomic_inc(&exp->use); - CONNTRACK_STAT_INC(expect_create); -} - -/* Race with expectations being used means we could have none to find; OK. */ -static void evict_oldest_expect(struct ip_conntrack *master) -{ - struct ip_conntrack_expect *i; - - list_for_each_entry_reverse(i, &ip_conntrack_expect_list, list) { - if (i->master == master) { - if (del_timer(&i->timeout)) { - ip_ct_unlink_expect(i); - ip_conntrack_expect_put(i); - } - break; - } - } -} - -static inline int refresh_timer(struct ip_conntrack_expect *i) -{ - if (!del_timer(&i->timeout)) - return 0; - - i->timeout.expires = jiffies + i->master->helper->timeout*HZ; - add_timer(&i->timeout); - return 1; -} - -int ip_conntrack_expect_related(struct ip_conntrack_expect *expect) -{ - struct ip_conntrack_expect *i; - int ret; - - DEBUGP("ip_conntrack_expect_related %p\n", related_to); - DEBUGP("tuple: "); DUMP_TUPLE(&expect->tuple); - DEBUGP("mask: "); DUMP_TUPLE(&expect->mask); - - write_lock_bh(&ip_conntrack_lock); - list_for_each_entry(i, &ip_conntrack_expect_list, list) { - if (expect_matches(i, expect)) { - /* Refresh timer: if it's dying, ignore.. */ - if (refresh_timer(i)) { - ret = 0; - goto out; - } - } else if (expect_clash(i, expect)) { - ret = -EBUSY; - goto out; - } - } - - /* Will be over limit? */ - if (expect->master->helper->max_expected && - expect->master->expecting >= expect->master->helper->max_expected) - evict_oldest_expect(expect->master); - - ip_conntrack_expect_insert(expect); - ip_conntrack_expect_event(IPEXP_NEW, expect); - ret = 0; -out: - write_unlock_bh(&ip_conntrack_lock); - return ret; -} - -/* Alter reply tuple (maybe alter helper). This is for NAT, and is - implicitly racy: see __ip_conntrack_confirm */ -void ip_conntrack_alter_reply(struct ip_conntrack *conntrack, - const struct ip_conntrack_tuple *newreply) -{ - write_lock_bh(&ip_conntrack_lock); - /* Should be unconfirmed, so not in hash table yet */ - IP_NF_ASSERT(!is_confirmed(conntrack)); - - DEBUGP("Altering reply tuple of %p to ", conntrack); - DUMP_TUPLE(newreply); - - conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply; - if (!conntrack->master && conntrack->expecting == 0) - conntrack->helper = __ip_conntrack_helper_find(newreply); - write_unlock_bh(&ip_conntrack_lock); -} - -int ip_conntrack_helper_register(struct ip_conntrack_helper *me) -{ - BUG_ON(me->timeout == 0); - write_lock_bh(&ip_conntrack_lock); - list_prepend(&helpers, me); - write_unlock_bh(&ip_conntrack_lock); - - return 0; -} - -struct ip_conntrack_helper * -__ip_conntrack_helper_find_byname(const char *name) -{ - struct ip_conntrack_helper *h; - - list_for_each_entry(h, &helpers, list) { - if (!strcmp(h->name, name)) - return h; - } - - return NULL; -} - -static inline int unhelp(struct ip_conntrack_tuple_hash *i, - const struct ip_conntrack_helper *me) -{ - if (tuplehash_to_ctrack(i)->helper == me) { - ip_conntrack_event(IPCT_HELPER, tuplehash_to_ctrack(i)); - tuplehash_to_ctrack(i)->helper = NULL; - } - return 0; -} - -void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me) -{ - unsigned int i; - struct ip_conntrack_expect *exp, *tmp; - - /* Need write lock here, to delete helper. */ - write_lock_bh(&ip_conntrack_lock); - LIST_DELETE(&helpers, me); - - /* Get rid of expectations */ - list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list, list) { - if (exp->master->helper == me && del_timer(&exp->timeout)) { - ip_ct_unlink_expect(exp); - ip_conntrack_expect_put(exp); - } - } - /* Get rid of expecteds, set helpers to NULL. */ - LIST_FIND_W(&unconfirmed, unhelp, struct ip_conntrack_tuple_hash*, me); - for (i = 0; i < ip_conntrack_htable_size; i++) - LIST_FIND_W(&ip_conntrack_hash[i], unhelp, - struct ip_conntrack_tuple_hash *, me); - write_unlock_bh(&ip_conntrack_lock); - - /* Someone could be still looking at the helper in a bh. */ - synchronize_net(); -} - -/* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */ -void __ip_ct_refresh_acct(struct ip_conntrack *ct, - enum ip_conntrack_info ctinfo, - const struct sk_buff *skb, - unsigned long extra_jiffies, - int do_acct) -{ - int event = 0; - - IP_NF_ASSERT(ct->timeout.data == (unsigned long)ct); - IP_NF_ASSERT(skb); - - write_lock_bh(&ip_conntrack_lock); - - /* If not in hash table, timer will not be active yet */ - if (!is_confirmed(ct)) { - ct->timeout.expires = extra_jiffies; - event = IPCT_REFRESH; - } else { - /* Need del_timer for race avoidance (may already be dying). */ - if (del_timer(&ct->timeout)) { - ct->timeout.expires = jiffies + extra_jiffies; - add_timer(&ct->timeout); - event = IPCT_REFRESH; - } - } - -#ifdef CONFIG_IP_NF_CT_ACCT - if (do_acct) { - ct->counters[CTINFO2DIR(ctinfo)].packets++; - ct->counters[CTINFO2DIR(ctinfo)].bytes += - ntohs(skb->nh.iph->tot_len); - if ((ct->counters[CTINFO2DIR(ctinfo)].packets & 0x80000000) - || (ct->counters[CTINFO2DIR(ctinfo)].bytes & 0x80000000)) - event |= IPCT_COUNTER_FILLING; - } -#endif - - write_unlock_bh(&ip_conntrack_lock); - - /* must be unlocked when calling event cache */ - if (event) - ip_conntrack_event_cache(event, skb); -} - -#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ - defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) -/* Generic function for tcp/udp/sctp/dccp and alike. This needs to be - * in ip_conntrack_core, since we don't want the protocols to autoload - * or depend on ctnetlink */ -int ip_ct_port_tuple_to_nfattr(struct sk_buff *skb, - const struct ip_conntrack_tuple *tuple) -{ - NFA_PUT(skb, CTA_PROTO_SRC_PORT, sizeof(u_int16_t), - &tuple->src.u.tcp.port); - NFA_PUT(skb, CTA_PROTO_DST_PORT, sizeof(u_int16_t), - &tuple->dst.u.tcp.port); - return 0; - -nfattr_failure: - return -1; -} - -int ip_ct_port_nfattr_to_tuple(struct nfattr *tb[], - struct ip_conntrack_tuple *t) -{ - if (!tb[CTA_PROTO_SRC_PORT-1] || !tb[CTA_PROTO_DST_PORT-1]) - return -EINVAL; - - t->src.u.tcp.port = - *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_SRC_PORT-1]); - t->dst.u.tcp.port = - *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_DST_PORT-1]); - - return 0; -} -#endif - -/* Returns new sk_buff, or NULL */ -struct sk_buff * -ip_ct_gather_frags(struct sk_buff *skb, u_int32_t user) -{ - skb_orphan(skb); - - local_bh_disable(); - skb = ip_defrag(skb, user); - local_bh_enable(); - - if (skb) - ip_send_check(skb->nh.iph); - return skb; -} - -/* Used by ipt_REJECT. */ -static void ip_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb) -{ - struct ip_conntrack *ct; - enum ip_conntrack_info ctinfo; - - /* This ICMP is in reverse direction to the packet which caused it */ - ct = ip_conntrack_get(skb, &ctinfo); - - if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) - ctinfo = IP_CT_RELATED + IP_CT_IS_REPLY; - else - ctinfo = IP_CT_RELATED; - - /* Attach to new skbuff, and increment count */ - nskb->nfct = &ct->ct_general; - nskb->nfctinfo = ctinfo; - nf_conntrack_get(nskb->nfct); -} - -static inline int -do_iter(const struct ip_conntrack_tuple_hash *i, - int (*iter)(struct ip_conntrack *i, void *data), - void *data) -{ - return iter(tuplehash_to_ctrack(i), data); -} - -/* Bring out ya dead! */ -static struct ip_conntrack_tuple_hash * -get_next_corpse(int (*iter)(struct ip_conntrack *i, void *data), - void *data, unsigned int *bucket) -{ - struct ip_conntrack_tuple_hash *h = NULL; - - write_lock_bh(&ip_conntrack_lock); - for (; *bucket < ip_conntrack_htable_size; (*bucket)++) { - h = LIST_FIND_W(&ip_conntrack_hash[*bucket], do_iter, - struct ip_conntrack_tuple_hash *, iter, data); - if (h) - break; - } - if (!h) - h = LIST_FIND_W(&unconfirmed, do_iter, - struct ip_conntrack_tuple_hash *, iter, data); - if (h) - atomic_inc(&tuplehash_to_ctrack(h)->ct_general.use); - write_unlock_bh(&ip_conntrack_lock); - - return h; -} - -void -ip_ct_iterate_cleanup(int (*iter)(struct ip_conntrack *i, void *), void *data) -{ - struct ip_conntrack_tuple_hash *h; - unsigned int bucket = 0; - - while ((h = get_next_corpse(iter, data, &bucket)) != NULL) { - struct ip_conntrack *ct = tuplehash_to_ctrack(h); - /* Time to push up daises... */ - if (del_timer(&ct->timeout)) - death_by_timeout((unsigned long)ct); - /* ... else the timer will get him soon. */ - - ip_conntrack_put(ct); - } -} - -/* Fast function for those who don't want to parse /proc (and I don't - blame them). */ -/* Reversing the socket's dst/src point of view gives us the reply - mapping. */ -static int -getorigdst(struct sock *sk, int optval, void __user *user, int *len) -{ - struct inet_sock *inet = inet_sk(sk); - struct ip_conntrack_tuple_hash *h; - struct ip_conntrack_tuple tuple; - - IP_CT_TUPLE_U_BLANK(&tuple); - tuple.src.ip = inet->rcv_saddr; - tuple.src.u.tcp.port = inet->sport; - tuple.dst.ip = inet->daddr; - tuple.dst.u.tcp.port = inet->dport; - tuple.dst.protonum = IPPROTO_TCP; - - /* We only do TCP at the moment: is there a better way? */ - if (strcmp(sk->sk_prot->name, "TCP")) { - DEBUGP("SO_ORIGINAL_DST: Not a TCP socket\n"); - return -ENOPROTOOPT; - } - - if ((unsigned int) *len < sizeof(struct sockaddr_in)) { - DEBUGP("SO_ORIGINAL_DST: len %u not %u\n", - *len, sizeof(struct sockaddr_in)); - return -EINVAL; - } - - h = ip_conntrack_find_get(&tuple, NULL); - if (h) { - struct sockaddr_in sin; - struct ip_conntrack *ct = tuplehash_to_ctrack(h); - - sin.sin_family = AF_INET; - sin.sin_port = ct->tuplehash[IP_CT_DIR_ORIGINAL] - .tuple.dst.u.tcp.port; - sin.sin_addr.s_addr = ct->tuplehash[IP_CT_DIR_ORIGINAL] - .tuple.dst.ip; - - DEBUGP("SO_ORIGINAL_DST: %u.%u.%u.%u %u\n", - NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port)); - ip_conntrack_put(ct); - if (copy_to_user(user, &sin, sizeof(sin)) != 0) - return -EFAULT; - else - return 0; - } - DEBUGP("SO_ORIGINAL_DST: Can't find %u.%u.%u.%u/%u-%u.%u.%u.%u/%u.\n", - NIPQUAD(tuple.src.ip), ntohs(tuple.src.u.tcp.port), - NIPQUAD(tuple.dst.ip), ntohs(tuple.dst.u.tcp.port)); - return -ENOENT; -} - -static struct nf_sockopt_ops so_getorigdst = { - .pf = PF_INET, - .get_optmin = SO_ORIGINAL_DST, - .get_optmax = SO_ORIGINAL_DST+1, - .get = &getorigdst, -}; - -static int kill_all(struct ip_conntrack *i, void *data) -{ - return 1; -} - -void ip_conntrack_flush(void) -{ - ip_ct_iterate_cleanup(kill_all, NULL); -} - -static void free_conntrack_hash(struct list_head *hash, int vmalloced,int size) -{ - if (vmalloced) - vfree(hash); - else - free_pages((unsigned long)hash, - get_order(sizeof(struct list_head) * size)); -} - -/* Mishearing the voices in his head, our hero wonders how he's - supposed to kill the mall. */ -void ip_conntrack_cleanup(void) -{ - ip_ct_attach = NULL; - - /* This makes sure all current packets have passed through - netfilter framework. Roll on, two-stage module - delete... */ - synchronize_net(); - - ip_ct_event_cache_flush(); - i_see_dead_people: - ip_conntrack_flush(); - if (atomic_read(&ip_conntrack_count) != 0) { - schedule(); - goto i_see_dead_people; - } - /* wait until all references to ip_conntrack_untracked are dropped */ - while (atomic_read(&ip_conntrack_untracked.ct_general.use) > 1) - schedule(); - - kmem_cache_destroy(ip_conntrack_cachep); - kmem_cache_destroy(ip_conntrack_expect_cachep); - free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc, - ip_conntrack_htable_size); - nf_unregister_sockopt(&so_getorigdst); -} - -static struct list_head *alloc_hashtable(int size, int *vmalloced) -{ - struct list_head *hash; - unsigned int i; - - *vmalloced = 0; - hash = (void*)__get_free_pages(GFP_KERNEL, - get_order(sizeof(struct list_head) - * size)); - if (!hash) { - *vmalloced = 1; - printk(KERN_WARNING"ip_conntrack: falling back to vmalloc.\n"); - hash = vmalloc(sizeof(struct list_head) * size); - } - - if (hash) - for (i = 0; i < size; i++) - INIT_LIST_HEAD(&hash[i]); - - return hash; -} - -static int set_hashsize(const char *val, struct kernel_param *kp) -{ - int i, bucket, hashsize, vmalloced; - int old_vmalloced, old_size; - int rnd; - struct list_head *hash, *old_hash; - struct ip_conntrack_tuple_hash *h; - - /* On boot, we can set this without any fancy locking. */ - if (!ip_conntrack_htable_size) - return param_set_int(val, kp); - - hashsize = simple_strtol(val, NULL, 0); - if (!hashsize) - return -EINVAL; - - hash = alloc_hashtable(hashsize, &vmalloced); - if (!hash) - return -ENOMEM; - - /* We have to rehash for the new table anyway, so we also can - * use a new random seed */ - get_random_bytes(&rnd, 4); - - write_lock_bh(&ip_conntrack_lock); - for (i = 0; i < ip_conntrack_htable_size; i++) { - while (!list_empty(&ip_conntrack_hash[i])) { - h = list_entry(ip_conntrack_hash[i].next, - struct ip_conntrack_tuple_hash, list); - list_del(&h->list); - bucket = __hash_conntrack(&h->tuple, hashsize, rnd); - list_add_tail(&h->list, &hash[bucket]); - } - } - old_size = ip_conntrack_htable_size; - old_vmalloced = ip_conntrack_vmalloc; - old_hash = ip_conntrack_hash; - - ip_conntrack_htable_size = hashsize; - ip_conntrack_vmalloc = vmalloced; - ip_conntrack_hash = hash; - ip_conntrack_hash_rnd = rnd; - write_unlock_bh(&ip_conntrack_lock); - - free_conntrack_hash(old_hash, old_vmalloced, old_size); - return 0; -} - -module_param_call(hashsize, set_hashsize, param_get_uint, - &ip_conntrack_htable_size, 0600); - -int __init ip_conntrack_init(void) -{ - unsigned int i; - int ret; - - /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB - * machine has 256 buckets. >= 1GB machines have 8192 buckets. */ - if (!ip_conntrack_htable_size) { - ip_conntrack_htable_size - = (((num_physpages << PAGE_SHIFT) / 16384) - / sizeof(struct list_head)); - if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE)) - ip_conntrack_htable_size = 8192; - if (ip_conntrack_htable_size < 16) - ip_conntrack_htable_size = 16; - } - ip_conntrack_max = 8 * ip_conntrack_htable_size; - - printk("ip_conntrack version %s (%u buckets, %d max)" - " - %Zd bytes per conntrack\n", IP_CONNTRACK_VERSION, - ip_conntrack_htable_size, ip_conntrack_max, - sizeof(struct ip_conntrack)); - - ret = nf_register_sockopt(&so_getorigdst); - if (ret != 0) { - printk(KERN_ERR "Unable to register netfilter socket option\n"); - return ret; - } - - ip_conntrack_hash = alloc_hashtable(ip_conntrack_htable_size, - &ip_conntrack_vmalloc); - if (!ip_conntrack_hash) { - printk(KERN_ERR "Unable to create ip_conntrack_hash\n"); - goto err_unreg_sockopt; - } - - ip_conntrack_cachep = kmem_cache_create("ip_conntrack", - sizeof(struct ip_conntrack), 0, - 0, NULL, NULL); - if (!ip_conntrack_cachep) { - printk(KERN_ERR "Unable to create ip_conntrack slab cache\n"); - goto err_free_hash; - } - - ip_conntrack_expect_cachep = kmem_cache_create("ip_conntrack_expect", - sizeof(struct ip_conntrack_expect), - 0, 0, NULL, NULL); - if (!ip_conntrack_expect_cachep) { - printk(KERN_ERR "Unable to create ip_expect slab cache\n"); - goto err_free_conntrack_slab; - } - - /* Don't NEED lock here, but good form anyway. */ - write_lock_bh(&ip_conntrack_lock); - for (i = 0; i < MAX_IP_CT_PROTO; i++) - ip_ct_protos[i] = &ip_conntrack_generic_protocol; - /* Sew in builtin protocols. */ - ip_ct_protos[IPPROTO_TCP] = &ip_conntrack_protocol_tcp; - ip_ct_protos[IPPROTO_UDP] = &ip_conntrack_protocol_udp; - ip_ct_protos[IPPROTO_ICMP] = &ip_conntrack_protocol_icmp; - write_unlock_bh(&ip_conntrack_lock); - - /* For use by ipt_REJECT */ - ip_ct_attach = ip_conntrack_attach; - - /* Set up fake conntrack: - - to never be deleted, not in any hashes */ - atomic_set(&ip_conntrack_untracked.ct_general.use, 1); - /* - and look it like as a confirmed connection */ - set_bit(IPS_CONFIRMED_BIT, &ip_conntrack_untracked.status); - - return ret; - -err_free_conntrack_slab: - kmem_cache_destroy(ip_conntrack_cachep); -err_free_hash: - free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc, - ip_conntrack_htable_size); -err_unreg_sockopt: - nf_unregister_sockopt(&so_getorigdst); - - return -ENOMEM; -} diff --git a/net/ipv4/netfilter/ip_conntrack_ftp.c b/net/ipv4/netfilter/ip_conntrack_ftp.c deleted file mode 100644 index e627e585617..00000000000 --- a/net/ipv4/netfilter/ip_conntrack_ftp.c +++ /dev/null @@ -1,510 +0,0 @@ -/* FTP extension for IP connection tracking. */ - -/* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/config.h> -#include <linux/module.h> -#include <linux/netfilter.h> -#include <linux/ip.h> -#include <linux/ctype.h> -#include <net/checksum.h> -#include <net/tcp.h> - -#include <linux/netfilter_ipv4/ip_conntrack_helper.h> -#include <linux/netfilter_ipv4/ip_conntrack_ftp.h> -#include <linux/moduleparam.h> - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>"); -MODULE_DESCRIPTION("ftp connection tracking helper"); - -/* This is slow, but it's simple. --RR */ -static char *ftp_buffer; -static DEFINE_SPINLOCK(ip_ftp_lock); - -#define MAX_PORTS 8 -static unsigned short ports[MAX_PORTS]; -static int ports_c; -module_param_array(ports, ushort, &ports_c, 0400); - -static int loose; -module_param(loose, bool, 0600); - -unsigned int (*ip_nat_ftp_hook)(struct sk_buff **pskb, - enum ip_conntrack_info ctinfo, - enum ip_ct_ftp_type type, - unsigned int matchoff, - unsigned int matchlen, - struct ip_conntrack_expect *exp, - u32 *seq); -EXPORT_SYMBOL_GPL(ip_nat_ftp_hook); - -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - -static int try_rfc959(const char *, size_t, u_int32_t [], char); -static int try_eprt(const char *, size_t, u_int32_t [], char); -static int try_epsv_response(const char *, size_t, u_int32_t [], char); - -static const struct ftp_search { - enum ip_conntrack_dir dir; - const char *pattern; - size_t plen; - char skip; - char term; - enum ip_ct_ftp_type ftptype; - int (*getnum)(const char *, size_t, u_int32_t[], char); -} search[] = { - { - IP_CT_DIR_ORIGINAL, - "PORT", sizeof("PORT") - 1, ' ', '\r', - IP_CT_FTP_PORT, - try_rfc959, - }, - { - IP_CT_DIR_REPLY, - "227 ", sizeof("227 ") - 1, '(', ')', - IP_CT_FTP_PASV, - try_rfc959, - }, - { - IP_CT_DIR_ORIGINAL, - "EPRT", sizeof("EPRT") - 1, ' ', '\r', - IP_CT_FTP_EPRT, - try_eprt, - }, - { - IP_CT_DIR_REPLY, - "229 ", sizeof("229 ") - 1, '(', ')', - IP_CT_FTP_EPSV, - try_epsv_response, - }, -}; - -static int try_number(const char *data, size_t dlen, u_int32_t array[], - int array_size, char sep, char term) -{ - u_int32_t i, len; - - memset(array, 0, sizeof(array[0])*array_size); - - /* Keep data pointing at next char. */ - for (i = 0, len = 0; len < dlen && i < array_size; len++, data++) { - if (*data >= '0' && *data <= '9') { - array[i] = array[i]*10 + *data - '0'; - } - else if (*data == sep) - i++; - else { - /* Unexpected character; true if it's the - terminator and we're finished. */ - if (*data == term && i == array_size - 1) - return len; - - DEBUGP("Char %u (got %u nums) `%u' unexpected\n", - len, i, *data); - return 0; - } - } - DEBUGP("Failed to fill %u numbers separated by %c\n", array_size, sep); - - return 0; -} - -/* Returns 0, or length of numbers: 192,168,1,1,5,6 */ -static int try_rfc959(const char *data, size_t dlen, u_int32_t array[6], - char term) -{ - return try_number(data, dlen, array, 6, ',', term); -} - -/* Grab port: number up to delimiter */ -static int get_port(const char *data, int start, size_t dlen, char delim, - u_int32_t array[2]) -{ - u_int16_t port = 0; - int i; - - for (i = start; i < dlen; i++) { - /* Finished? */ - if (data[i] == delim) { - if (port == 0) - break; - array[0] = port >> 8; - array[1] = port; - return i + 1; - } - else if (data[i] >= '0' && data[i] <= '9') - port = port*10 + data[i] - '0'; - else /* Some other crap */ - break; - } - return 0; -} - -/* Returns 0, or length of numbers: |1|132.235.1.2|6275| */ -static int try_eprt(const char *data, size_t dlen, u_int32_t array[6], - char term) -{ - char delim; - int length; - - /* First character is delimiter, then "1" for IPv4, then - delimiter again. */ - if (dlen <= 3) return 0; - delim = data[0]; - if (isdigit(delim) || delim < 33 || delim > 126 - || data[1] != '1' || data[2] != delim) - return 0; - - DEBUGP("EPRT: Got |1|!\n"); - /* Now we have IP address. */ - length = try_number(data + 3, dlen - 3, array, 4, '.', delim); - if (length == 0) - return 0; - - DEBUGP("EPRT: Got IP address!\n"); - /* Start offset includes initial "|1|", and trailing delimiter */ - return get_port(data, 3 + length + 1, dlen, delim, array+4); -} - -/* Returns 0, or length of numbers: |||6446| */ -static int try_epsv_response(const char *data, size_t dlen, u_int32_t array[6], - char term) -{ - char delim; - - /* Three delimiters. */ - if (dlen <= 3) return 0; - delim = data[0]; - if (isdigit(delim) || delim < 33 || delim > 126 - || data[1] != delim || data[2] != delim) - return 0; - - return get_port(data, 3, dlen, delim, array+4); -} - -/* Return 1 for match, 0 for accept, -1 for partial. */ -static int find_pattern(const char *data, size_t dlen, - const char *pattern, size_t plen, - char skip, char term, - unsigned int *numoff, - unsigned int *numlen, - u_int32_t array[6], - int (*getnum)(const char *, size_t, u_int32_t[], char)) -{ - size_t i; - - DEBUGP("find_pattern `%s': dlen = %u\n", pattern, dlen); - if (dlen == 0) - return 0; - - if (dlen <= plen) { - /* Short packet: try for partial? */ - if (strnicmp(data, pattern, dlen) == 0) - return -1; - else return 0; - } - - if (strnicmp(data, pattern, plen) != 0) { -#if 0 - size_t i; - - DEBUGP("ftp: string mismatch\n"); - for (i = 0; i < plen; i++) { - DEBUGP("ftp:char %u `%c'(%u) vs `%c'(%u)\n", - i, data[i], data[i], - pattern[i], pattern[i]); - } -#endif - return 0; - } - - DEBUGP("Pattern matches!\n"); - /* Now we've found the constant string, try to skip - to the 'skip' character */ - for (i = plen; data[i] != skip; i++) - if (i == dlen - 1) return -1; - - /* Skip over the last character */ - i++; - - DEBUGP("Skipped up to `%c'!\n", skip); - - *numoff = i; - *numlen = getnum(data + i, dlen - i, array, term); - if (!*numlen) - return -1; - - DEBUGP("Match succeeded!\n"); - return 1; -} - -/* Look up to see if we're just after a \n. */ -static int find_nl_seq(u32 seq, const struct ip_ct_ftp_master *info, int dir) -{ - unsigned int i; - - for (i = 0; i < info->seq_aft_nl_num[dir]; i++) - if (info->seq_aft_nl[dir][i] == seq) - return 1; - return 0; -} - -/* We don't update if it's older than what we have. */ -static void update_nl_seq(u32 nl_seq, struct ip_ct_ftp_master *info, int dir, - struct sk_buff *skb) -{ - unsigned int i, oldest = NUM_SEQ_TO_REMEMBER; - - /* Look for oldest: if we find exact match, we're done. */ - for (i = 0; i < info->seq_aft_nl_num[dir]; i++) { - if (info->seq_aft_nl[dir][i] == nl_seq) - return; - - if (oldest == info->seq_aft_nl_num[dir] - || before(info->seq_aft_nl[dir][i], oldest)) - oldest = i; - } - - if (info->seq_aft_nl_num[dir] < NUM_SEQ_TO_REMEMBER) { - info->seq_aft_nl[dir][info->seq_aft_nl_num[dir]++] = nl_seq; - ip_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, skb); - } else if (oldest != NUM_SEQ_TO_REMEMBER) { - info->seq_aft_nl[dir][oldest] = nl_seq; - ip_conntrack_event_cache(IPCT_HELPINFO_VOLATILE, skb); - } -} - -static int help(struct sk_buff **pskb, - struct ip_conntrack *ct, - enum ip_conntrack_info ctinfo) -{ - unsigned int dataoff, datalen; - struct tcphdr _tcph, *th; - char *fb_ptr; - int ret; - u32 seq, array[6] = { 0 }; - int dir = CTINFO2DIR(ctinfo); - unsigned int matchlen, matchoff; - struct ip_ct_ftp_master *ct_ftp_info = &ct->help.ct_ftp_info; - struct ip_conntrack_expect *exp; - unsigned int i; - int found = 0, ends_in_nl; - - /* Until there's been traffic both ways, don't look in packets. */ - if (ctinfo != IP_CT_ESTABLISHED - && ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) { - DEBUGP("ftp: Conntrackinfo = %u\n", ctinfo); - return NF_ACCEPT; - } - - th = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl*4, - sizeof(_tcph), &_tcph); - if (th == NULL) - return NF_ACCEPT; - - dataoff = (*pskb)->nh.iph->ihl*4 + th->doff*4; - /* No data? */ - if (dataoff >= (*pskb)->len) { - DEBUGP("ftp: pskblen = %u\n", (*pskb)->len); - return NF_ACCEPT; - } - datalen = (*pskb)->len - dataoff; - - spin_lock_bh(&ip_ftp_lock); - fb_ptr = skb_header_pointer(*pskb, dataoff, - (*pskb)->len - dataoff, ftp_buffer); - BUG_ON(fb_ptr == NULL); - - ends_in_nl = (fb_ptr[datalen - 1] == '\n'); - seq = ntohl(th->seq) + datalen; - - /* Look up to see if we're just after a \n. */ - if (!find_nl_seq(ntohl(th->seq), ct_ftp_info, dir)) { - /* Now if this ends in \n, update ftp info. */ - DEBUGP("ip_conntrack_ftp_help: wrong seq pos %s(%u) or %s(%u)\n", - ct_ftp_info->seq_aft_nl[0][dir] - old_seq_aft_nl_set ? "":"(UNSET) ", old_seq_aft_nl); - ret = NF_ACCEPT; - goto out_update_nl; - } - - /* Initialize IP array to expected address (it's not mentioned - in EPSV responses) */ - array[0] = (ntohl(ct->tuplehash[dir].tuple.src.ip) >> 24) & 0xFF; - array[1] = (ntohl(ct->tuplehash[dir].tuple.src.ip) >> 16) & 0xFF; - array[2] = (ntohl(ct->tuplehash[dir].tuple.src.ip) >> 8) & 0xFF; - array[3] = ntohl(ct->tuplehash[dir].tuple.src.ip) & 0xFF; - - for (i = 0; i < ARRAY_SIZE(search); i++) { - if (search[i].dir != dir) continue; - - found = find_pattern(fb_ptr, (*pskb)->len - dataoff, - search[i].pattern, - search[i].plen, - search[i].skip, - search[i].term, - &matchoff, &matchlen, - array, - search[i].getnum); - if (found) break; - } - if (found == -1) { - /* We don't usually drop packets. After all, this is - connection tracking, not packet filtering. - However, it is necessary for accurate tracking in - this case. */ - if (net_ratelimit()) - printk("conntrack_ftp: partial %s %u+%u\n", - search[i].pattern, - ntohl(th->seq), datalen); - ret = NF_DROP; - goto out; - } else if (found == 0) { /* No match */ - ret = NF_ACCEPT; - goto out_update_nl; - } - - DEBUGP("conntrack_ftp: match `%s' (%u bytes at %u)\n", - fb_ptr + matchoff, matchlen, ntohl(th->seq) + matchoff); - - /* Allocate expectation which will be inserted */ - exp = ip_conntrack_expect_alloc(ct); - if (exp == NULL) { - ret = NF_DROP; - goto out; - } - - /* We refer to the reverse direction ("!dir") tuples here, - * because we're expecting something in the other direction. - * Doesn't matter unless NAT is happening. */ - exp->tuple.dst.ip = ct->tuplehash[!dir].tuple.dst.ip; - - if (htonl((array[0] << 24) | (array[1] << 16) | (array[2] << 8) | array[3]) - != ct->tuplehash[dir].tuple.src.ip) { - /* Enrico Scholz's passive FTP to partially RNAT'd ftp - server: it really wants us to connect to a - different IP address. Simply don't record it for - NAT. */ - DEBUGP("conntrack_ftp: NOT RECORDING: %u,%u,%u,%u != %u.%u.%u.%u\n", - array[0], array[1], array[2], array[3], - NIPQUAD(ct->tuplehash[dir].tuple.src.ip)); - - /* Thanks to Cristiano Lincoln Mattos - <lincoln@cesar.org.br> for reporting this potential - problem (DMZ machines opening holes to internal - networks, or the packet filter itself). */ - if (!loose) { - ret = NF_ACCEPT; - goto out_put_expect; - } - exp->tuple.dst.ip = htonl((array[0] << 24) | (array[1] << 16) - | (array[2] << 8) | array[3]); - } - - exp->tuple.src.ip = ct->tuplehash[!dir].tuple.src.ip; - exp->tuple.dst.u.tcp.port = htons(array[4] << 8 | array[5]); - exp->tuple.src.u.tcp.port = 0; /* Don't care. */ - exp->tuple.dst.protonum = IPPROTO_TCP; - exp->mask = ((struct ip_conntrack_tuple) - { { 0xFFFFFFFF, { 0 } }, - { 0xFFFFFFFF, { .tcp = { 0xFFFF } }, 0xFF }}); - - exp->expectfn = NULL; - exp->flags = 0; - - /* Now, NAT might want to mangle the packet, and register the - * (possibly changed) expectation itself. */ - if (ip_nat_ftp_hook) - ret = ip_nat_ftp_hook(pskb, ctinfo, search[i].ftptype, - matchoff, matchlen, exp, &seq); - else { - /* Can't expect this? Best to drop packet now. */ - if (ip_conntrack_expect_related(exp) != 0) - ret = NF_DROP; - else - ret = NF_ACCEPT; - } - -out_put_expect: - ip_conntrack_expect_put(exp); - -out_update_nl: - /* Now if this ends in \n, update ftp info. Seq may have been - * adjusted by NAT code. */ - if (ends_in_nl) - update_nl_seq(seq, ct_ftp_info,dir, *pskb); - out: - spin_unlock_bh(&ip_ftp_lock); - return ret; -} - -static struct ip_conntrack_helper ftp[MAX_PORTS]; -static char ftp_names[MAX_PORTS][sizeof("ftp-65535")]; - -/* Not __exit: called from init() */ -static void fini(void) -{ - int i; - for (i = 0; i < ports_c; i++) { - DEBUGP("ip_ct_ftp: unregistering helper for port %d\n", - ports[i]); - ip_conntrack_helper_unregister(&ftp[i]); - } - - kfree(ftp_buffer); -} - -static int __init init(void) -{ - int i, ret; - char *tmpname; - - ftp_buffer = kmalloc(65536, GFP_KERNEL); - if (!ftp_buffer) - return -ENOMEM; - - if (ports_c == 0) - ports[ports_c++] = FTP_PORT; - - for (i = 0; i < ports_c; i++) { - ftp[i].tuple.src.u.tcp.port = htons(ports[i]); - ftp[i].tuple.dst.protonum = IPPROTO_TCP; - ftp[i].mask.src.u.tcp.port = 0xFFFF; - ftp[i].mask.dst.protonum = 0xFF; - ftp[i].max_expected = 1; - ftp[i].timeout = 5 * 60; /* 5 minutes */ - ftp[i].me = THIS_MODULE; - ftp[i].help = help; - - tmpname = &ftp_names[i][0]; - if (ports[i] == FTP_PORT) - sprintf(tmpname, "ftp"); - else - sprintf(tmpname, "ftp-%d", ports[i]); - ftp[i].name = tmpname; - - DEBUGP("ip_ct_ftp: registering helper for port %d\n", - ports[i]); - ret = ip_conntrack_helper_register(&ftp[i]); - - if (ret) { - fini(); - return ret; - } - } - return 0; -} - -module_init(init); -module_exit(fini); diff --git a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c b/net/ipv4/netfilter/ip_conntrack_helper_pptp.c deleted file mode 100644 index d716bba798f..00000000000 --- a/net/ipv4/netfilter/ip_conntrack_helper_pptp.c +++ /dev/null @@ -1,802 +0,0 @@ -/* - * ip_conntrack_pptp.c - Version 3.0 - * - * Connection tracking support for PPTP (Point to Point Tunneling Protocol). - * PPTP is a a protocol for creating virtual private networks. - * It is a specification defined by Microsoft and some vendors - * working with Microsoft. PPTP is built on top of a modified - * version of the Internet Generic Routing Encapsulation Protocol. - * GRE is defined in RFC 1701 and RFC 1702. Documentation of - * PPTP can be found in RFC 2637 - * - * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org> - * - * Development of this code funded by Astaro AG (http://www.astaro.com/) - * - * Limitations: - * - We blindly assume that control connections are always - * established in PNS->PAC direction. This is a violation - * of RFFC2673 - * - We can only support one single call within each session - * - * TODO: - * - testing of incoming PPTP calls - * - * Changes: - * 2002-02-05 - Version 1.3 - * - Call ip_conntrack_unexpect_related() from - * pptp_destroy_siblings() to destroy expectations in case - * CALL_DISCONNECT_NOTIFY or tcp fin packet was seen - * (Philip Craig <philipc@snapgear.com>) - * - Add Version information at module loadtime - * 2002-02-10 - Version 1.6 - * - move to C99 style initializers - * - remove second expectation if first arrives - * 2004-10-22 - Version 2.0 - * - merge Mandrake's 2.6.x port with recent 2.6.x API changes - * - fix lots of linear skb assumptions from Mandrake's port - * 2005-06-10 - Version 2.1 - * - use ip_conntrack_expect_free() instead of kfree() on the - * expect's (which are from the slab for quite some time) - * 2005-06-10 - Version 3.0 - * - port helper to post-2.6.11 API changes, - * funded by Oxcoda NetBox Blue (http://www.netboxblue.com/) - * 2005-07-30 - Version 3.1 - * - port helper to 2.6.13 API changes - * - */ - -#include <linux/config.h> -#include <linux/module.h> -#include <linux/netfilter.h> -#include <linux/ip.h> -#include <net/checksum.h> -#include <net/tcp.h> - -#include <linux/netfilter_ipv4/ip_conntrack.h> -#include <linux/netfilter_ipv4/ip_conntrack_core.h> -#include <linux/netfilter_ipv4/ip_conntrack_helper.h> -#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h> -#include <linux/netfilter_ipv4/ip_conntrack_pptp.h> - -#define IP_CT_PPTP_VERSION "3.1" - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>"); -MODULE_DESCRIPTION("Netfilter connection tracking helper module for PPTP"); - -static DEFINE_SPINLOCK(ip_pptp_lock); - -int -(*ip_nat_pptp_hook_outbound)(struct sk_buff **pskb, - struct ip_conntrack *ct, - enum ip_conntrack_info ctinfo, - struct PptpControlHeader *ctlh, - union pptp_ctrl_union *pptpReq); - -int -(*ip_nat_pptp_hook_inbound)(struct sk_buff **pskb, - struct ip_conntrack *ct, - enum ip_conntrack_info ctinfo, - struct PptpControlHeader *ctlh, - union pptp_ctrl_union *pptpReq); - -int -(*ip_nat_pptp_hook_exp_gre)(struct ip_conntrack_expect *expect_orig, - struct ip_conntrack_expect *expect_reply); - -void -(*ip_nat_pptp_hook_expectfn)(struct ip_conntrack *ct, - struct ip_conntrack_expect *exp); - -#if 0 -/* PptpControlMessageType names */ -const char *pptp_msg_name[] = { - "UNKNOWN_MESSAGE", - "START_SESSION_REQUEST", - "START_SESSION_REPLY", - "STOP_SESSION_REQUEST", - "STOP_SESSION_REPLY", - "ECHO_REQUEST", - "ECHO_REPLY", - "OUT_CALL_REQUEST", - "OUT_CALL_REPLY", - "IN_CALL_REQUEST", - "IN_CALL_REPLY", - "IN_CALL_CONNECT", - "CALL_CLEAR_REQUEST", - "CALL_DISCONNECT_NOTIFY", - "WAN_ERROR_NOTIFY", - "SET_LINK_INFO" -}; -EXPORT_SYMBOL(pptp_msg_name); -#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, __FUNCTION__, ## args) -#else -#define DEBUGP(format, args...) -#endif - -#define SECS *HZ -#define MINS * 60 SECS -#define HOURS * 60 MINS - -#define PPTP_GRE_TIMEOUT (10 MINS) -#define PPTP_GRE_STREAM_TIMEOUT (5 HOURS) - -static void pptp_expectfn(struct ip_conntrack *ct, - struct ip_conntrack_expect *exp) -{ - DEBUGP("increasing timeouts\n"); - - /* increase timeout of GRE data channel conntrack entry */ - ct->proto.gre.timeout = PPTP_GRE_TIMEOUT; - ct->proto.gre.stream_timeout = PPTP_GRE_STREAM_TIMEOUT; - - /* Can you see how rusty this code is, compared with the pre-2.6.11 - * one? That's what happened to my shiny newnat of 2002 ;( -HW */ - - if (!ip_nat_pptp_hook_expectfn) { - struct ip_conntrack_tuple inv_t; - struct ip_conntrack_expect *exp_other; - - /* obviously this tuple inversion only works until you do NAT */ - invert_tuplepr(&inv_t, &exp->tuple); - DEBUGP("trying to unexpect other dir: "); - DUMP_TUPLE(&inv_t); - - exp_other = ip_conntrack_expect_find(&inv_t); - if (exp_other) { - /* delete other expectation. */ - DEBUGP("found\n"); - ip_conntrack_unexpect_related(exp_other); - ip_conntrack_expect_put(exp_other); - } else { - DEBUGP("not found\n"); - } - } else { - /* we need more than simple inversion */ - ip_nat_pptp_hook_expectfn(ct, exp); - } -} - -static int destroy_sibling_or_exp(const struct ip_conntrack_tuple *t) -{ - struct ip_conntrack_tuple_hash *h; - struct ip_conntrack_expect *exp; - - DEBUGP("trying to timeout ct or exp for tuple "); - DUMP_TUPLE(t); - - h = ip_conntrack_find_get(t, NULL); - if (h) { - struct ip_conntrack *sibling = tuplehash_to_ctrack(h); - DEBUGP("setting timeout of conntrack %p to 0\n", sibling); - sibling->proto.gre.timeout = 0; - sibling->proto.gre.stream_timeout = 0; - if (del_timer(&sibling->timeout)) - sibling->timeout.function((unsigned long)sibling); - ip_conntrack_put(sibling); - return 1; - } else { - exp = ip_conntrack_expect_find(t); - if (exp) { - DEBUGP("unexpect_related of expect %p\n", exp); - ip_conntrack_unexpect_related(exp); - ip_conntrack_expect_put(exp); - return 1; - } - } - - return 0; -} - - -/* timeout GRE data connections */ -static void pptp_destroy_siblings(struct ip_conntrack *ct) -{ - struct ip_conntrack_tuple t; - - /* Since ct->sibling_list has literally rusted away in 2.6.11, - * we now need another way to find out about our sibling - * contrack and expects... -HW */ - - /* try original (pns->pac) tuple */ - memcpy(&t, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, sizeof(t)); - t.dst.protonum = IPPROTO_GRE; - t.src.u.gre.key = htons(ct->help.ct_pptp_info.pns_call_id); - t.dst.u.gre.key = htons(ct->help.ct_pptp_info.pac_call_id); - - if (!destroy_sibling_or_exp(&t)) - DEBUGP("failed to timeout original pns->pac ct/exp\n"); - - /* try reply (pac->pns) tuple */ - memcpy(&t, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, sizeof(t)); - t.dst.protonum = IPPROTO_GRE; - t.src.u.gre.key = htons(ct->help.ct_pptp_info.pac_call_id); - t.dst.u.gre.key = htons(ct->help.ct_pptp_info.pns_call_id); - - if (!destroy_sibling_or_exp(&t)) - DEBUGP("failed to timeout reply pac->pns ct/exp\n"); -} - -/* expect GRE connections (PNS->PAC and PAC->PNS direction) */ -static inline int -exp_gre(struct ip_conntrack *master, - u_int32_t seq, - __be16 callid, - __be16 peer_callid) -{ - struct ip_conntrack_tuple inv_tuple; - struct ip_conntrack_tuple exp_tuples[] = { - /* tuple in original direction, PNS->PAC */ - { .src = { .ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip, - .u = { .gre = { .key = peer_callid } } - }, - .dst = { .ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip, - .u = { .gre = { .key = callid } }, - .protonum = IPPROTO_GRE - }, - }, - /* tuple in reply direction, PAC->PNS */ - { .src = { .ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip, - .u = { .gre = { .key = callid } } - }, - .dst = { .ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip, - .u = { .gre = { .key = peer_callid } }, - .protonum = IPPROTO_GRE - }, - } - }; - struct ip_conntrack_expect *exp_orig, *exp_reply; - int ret = 1; - - exp_orig = ip_conntrack_expect_alloc(master); - if (exp_orig == NULL) - goto out; - - exp_reply = ip_conntrack_expect_alloc(master); - if (exp_reply == NULL) - goto out_put_orig; - - memcpy(&exp_orig->tuple, &exp_tuples[0], sizeof(exp_orig->tuple)); - - exp_orig->mask.src.ip = 0xffffffff; - exp_orig->mask.src.u.all = 0; - exp_orig->mask.dst.u.all = 0; - exp_orig->mask.dst.u.gre.key = htons(0xffff); - exp_orig->mask.dst.ip = 0xffffffff; - exp_orig->mask.dst.protonum = 0xff; - - exp_orig->master = master; - exp_orig->expectfn = pptp_expectfn; - exp_orig->flags = 0; - - /* both expectations are identical apart from tuple */ - memcpy(exp_reply, exp_orig, sizeof(*exp_reply)); - memcpy(&exp_reply->tuple, &exp_tuples[1], sizeof(exp_reply->tuple)); - - if (ip_nat_pptp_hook_exp_gre) - ret = ip_nat_pptp_hook_exp_gre(exp_orig, exp_reply); - else { - - DEBUGP("calling expect_related PNS->PAC"); - DUMP_TUPLE(&exp_orig->tuple); - - if (ip_conntrack_expect_related(exp_orig) != 0) { - DEBUGP("cannot expect_related()\n"); - goto out_put_both; - } - - DEBUGP("calling expect_related PAC->PNS"); - DUMP_TUPLE(&exp_reply->tuple); - - if (ip_conntrack_expect_related(exp_reply) != 0) { - DEBUGP("cannot expect_related()\n"); - goto out_unexpect_orig; - } - - /* Add GRE keymap entries */ - if (ip_ct_gre_keymap_add(master, &exp_reply->tuple, 0) != 0) { - DEBUGP("cannot keymap_add() exp\n"); - goto out_unexpect_both; - } - - invert_tuplepr(&inv_tuple, &exp_reply->tuple); - if (ip_ct_gre_keymap_add(master, &inv_tuple, 1) != 0) { - ip_ct_gre_keymap_destroy(master); - DEBUGP("cannot keymap_add() exp_inv\n"); - goto out_unexpect_both; - } - ret = 0; - } - -out_put_both: - ip_conntrack_expect_put(exp_reply); -out_put_orig: - ip_conntrack_expect_put(exp_orig); -out: - return ret; - -out_unexpect_both: - ip_conntrack_unexpect_related(exp_reply); -out_unexpect_orig: - ip_conntrack_unexpect_related(exp_orig); - goto out_put_both; -} - -static inline int -pptp_inbound_pkt(struct sk_buff **pskb, - struct tcphdr *tcph, - unsigned int nexthdr_off, - unsigned int datalen, - struct ip_conntrack *ct, - enum ip_conntrack_info ctinfo) -{ - struct PptpControlHeader _ctlh, *ctlh; - unsigned int reqlen; - union pptp_ctrl_union _pptpReq, *pptpReq; - struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info; - u_int16_t msg; - __be16 *cid, *pcid; - u_int32_t seq; - - ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh); - if (!ctlh) { - DEBUGP("error during skb_header_pointer\n"); - return NF_ACCEPT; - } - nexthdr_off += sizeof(_ctlh); - datalen -= sizeof(_ctlh); - - reqlen = datalen; - if (reqlen > sizeof(*pptpReq)) - reqlen = sizeof(*pptpReq); - pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq); - if (!pptpReq) { - DEBUGP("error during skb_header_pointer\n"); - return NF_ACCEPT; - } - - msg = ntohs(ctlh->messageType); - DEBUGP("inbound control message %s\n", pptp_msg_name[msg]); - - switch (msg) { - case PPTP_START_SESSION_REPLY: - if (reqlen < sizeof(_pptpReq.srep)) { - DEBUGP("%s: short packet\n", pptp_msg_name[msg]); - break; - } - - /* server confirms new control session */ - if (info->sstate < PPTP_SESSION_REQUESTED) { - DEBUGP("%s without START_SESS_REQUEST\n", - pptp_msg_name[msg]); - break; - } - if (pptpReq->srep.resultCode == PPTP_START_OK) - info->sstate = PPTP_SESSION_CONFIRMED; - else - info->sstate = PPTP_SESSION_ERROR; - break; - - case PPTP_STOP_SESSION_REPLY: - if (reqlen < sizeof(_pptpReq.strep)) { - DEBUGP("%s: short packet\n", pptp_msg_name[msg]); - break; - } - - /* server confirms end of control session */ - if (info->sstate > PPTP_SESSION_STOPREQ) { - DEBUGP("%s without STOP_SESS_REQUEST\n", - pptp_msg_name[msg]); - break; - } - if (pptpReq->strep.resultCode == PPTP_STOP_OK) - info->sstate = PPTP_SESSION_NONE; - else - info->sstate = PPTP_SESSION_ERROR; - break; - - case PPTP_OUT_CALL_REPLY: - if (reqlen < sizeof(_pptpReq.ocack)) { - DEBUGP("%s: short packet\n", pptp_msg_name[msg]); - break; - } - - /* server accepted call, we now expect GRE frames */ - if (info->sstate != PPTP_SESSION_CONFIRMED) { - DEBUGP("%s but no session\n", pptp_msg_name[msg]); - break; - } - if (info->cstate != PPTP_CALL_OUT_REQ && - info->cstate != PPTP_CALL_OUT_CONF) { - DEBUGP("%s without OUTCALL_REQ\n", pptp_msg_name[msg]); - break; - } - if (pptpReq->ocack.resultCode != PPTP_OUTCALL_CONNECT) { - info->cstate = PPTP_CALL_NONE; - break; - } - - cid = &pptpReq->ocack.callID; - pcid = &pptpReq->ocack.peersCallID; - - info->pac_call_id = ntohs(*cid); - - if (htons(info->pns_call_id) != *pcid) { - DEBUGP("%s for unknown callid %u\n", - pptp_msg_name[msg], ntohs(*pcid)); - break; - } - - DEBUGP("%s, CID=%X, PCID=%X\n", pptp_msg_name[msg], - ntohs(*cid), ntohs(*pcid)); - - info->cstate = PPTP_CALL_OUT_CONF; - - seq = ntohl(tcph->seq) + sizeof(struct pptp_pkt_hdr) - + sizeof(struct PptpControlHeader) - + ((void *)pcid - (void *)pptpReq); - - if (exp_gre(ct, seq, *cid, *pcid) != 0) - printk("ip_conntrack_pptp: error during exp_gre\n"); - break; - - case PPTP_IN_CALL_REQUEST: - if (reqlen < sizeof(_pptpReq.icack)) { - DEBUGP("%s: short packet\n", pptp_msg_name[msg]); - break; - } - - /* server tells us about incoming call request */ - if (info->sstate != PPTP_SESSION_CONFIRMED) { - DEBUGP("%s but no session\n", pptp_msg_name[msg]); - break; - } - pcid = &pptpReq->icack.peersCallID; - DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(*pcid)); - info->cstate = PPTP_CALL_IN_REQ; - info->pac_call_id = ntohs(*pcid); - break; - - case PPTP_IN_CALL_CONNECT: - if (reqlen < sizeof(_pptpReq.iccon)) { - DEBUGP("%s: short packet\n", pptp_msg_name[msg]); - break; - } - - /* server tells us about incoming call established */ - if (info->sstate != PPTP_SESSION_CONFIRMED) { - DEBUGP("%s but no session\n", pptp_msg_name[msg]); - break; - } - if (info->sstate != PPTP_CALL_IN_REP - && info->sstate != PPTP_CALL_IN_CONF) { - DEBUGP("%s but never sent IN_CALL_REPLY\n", - pptp_msg_name[msg]); - break; - } - - pcid = &pptpReq->iccon.peersCallID; - cid = &info->pac_call_id; - - if (info->pns_call_id != ntohs(*pcid)) { - DEBUGP("%s for unknown CallID %u\n", - pptp_msg_name[msg], ntohs(*pcid)); - break; - } - - DEBUGP("%s, PCID=%X\n", pptp_msg_name[msg], ntohs(*pcid)); - info->cstate = PPTP_CALL_IN_CONF; - - /* we expect a GRE connection from PAC to PNS */ - seq = ntohl(tcph->seq) + sizeof(struct pptp_pkt_hdr) - + sizeof(struct PptpControlHeader) - + ((void *)pcid - (void *)pptpReq); - - if (exp_gre(ct, seq, *cid, *pcid) != 0) - printk("ip_conntrack_pptp: error during exp_gre\n"); - - break; - - case PPTP_CALL_DISCONNECT_NOTIFY: - if (reqlen < sizeof(_pptpReq.disc)) { - DEBUGP("%s: short packet\n", pptp_msg_name[msg]); - break; - } - - /* server confirms disconnect */ - cid = &pptpReq->disc.callID; - DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*cid)); - info->cstate = PPTP_CALL_NONE; - - /* untrack this call id, unexpect GRE packets */ - pptp_destroy_siblings(ct); - break; - - case PPTP_WAN_ERROR_NOTIFY: - break; - - case PPTP_ECHO_REQUEST: - case PPTP_ECHO_REPLY: - /* I don't have to explain these ;) */ - break; - default: - DEBUGP("invalid %s (TY=%d)\n", (msg <= PPTP_MSG_MAX) - ? pptp_msg_name[msg]:pptp_msg_name[0], msg); - break; - } - - - if (ip_nat_pptp_hook_inbound) - return ip_nat_pptp_hook_inbound(pskb, ct, ctinfo, ctlh, - pptpReq); - - return NF_ACCEPT; - -} - -static inline int -pptp_outbound_pkt(struct sk_buff **pskb, - struct tcphdr *tcph, - unsigned int nexthdr_off, - unsigned int datalen, - struct ip_conntrack *ct, - enum ip_conntrack_info ctinfo) -{ - struct PptpControlHeader _ctlh, *ctlh; - unsigned int reqlen; - union pptp_ctrl_union _pptpReq, *pptpReq; - struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info; - u_int16_t msg; - __be16 *cid, *pcid; - - ctlh = skb_header_pointer(*pskb, nexthdr_off, sizeof(_ctlh), &_ctlh); - if (!ctlh) - return NF_ACCEPT; - nexthdr_off += sizeof(_ctlh); - datalen -= sizeof(_ctlh); - - reqlen = datalen; - if (reqlen > sizeof(*pptpReq)) - reqlen = sizeof(*pptpReq); - pptpReq = skb_header_pointer(*pskb, nexthdr_off, reqlen, &_pptpReq); - if (!pptpReq) - return NF_ACCEPT; - - msg = ntohs(ctlh->messageType); - DEBUGP("outbound control message %s\n", pptp_msg_name[msg]); - - switch (msg) { - case PPTP_START_SESSION_REQUEST: - /* client requests for new control session */ - if (info->sstate != PPTP_SESSION_NONE) { - DEBUGP("%s but we already have one", - pptp_msg_name[msg]); - } - info->sstate = PPTP_SESSION_REQUESTED; - break; - case PPTP_STOP_SESSION_REQUEST: - /* client requests end of control session */ - info->sstate = PPTP_SESSION_STOPREQ; - break; - - case PPTP_OUT_CALL_REQUEST: - if (reqlen < sizeof(_pptpReq.ocreq)) { - DEBUGP("%s: short packet\n", pptp_msg_name[msg]); - /* FIXME: break; */ - } - - /* client initiating connection to server */ - if (info->sstate != PPTP_SESSION_CONFIRMED) { - DEBUGP("%s but no session\n", - pptp_msg_name[msg]); - break; - } - info->cstate = PPTP_CALL_OUT_REQ; - /* track PNS call id */ - cid = &pptpReq->ocreq.callID; - DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*cid)); - info->pns_call_id = ntohs(*cid); - break; - case PPTP_IN_CALL_REPLY: - if (reqlen < sizeof(_pptpReq.icack)) { - DEBUGP("%s: short packet\n", pptp_msg_name[msg]); - break; - } - - /* client answers incoming call */ - if (info->cstate != PPTP_CALL_IN_REQ - && info->cstate != PPTP_CALL_IN_REP) { - DEBUGP("%s without incall_req\n", - pptp_msg_name[msg]); - break; - } - if (pptpReq->icack.resultCode != PPTP_INCALL_ACCEPT) { - info->cstate = PPTP_CALL_NONE; - break; - } - pcid = &pptpReq->icack.peersCallID; - if (info->pac_call_id != ntohs(*pcid)) { - DEBUGP("%s for unknown call %u\n", - pptp_msg_name[msg], ntohs(*pcid)); - break; - } - DEBUGP("%s, CID=%X\n", pptp_msg_name[msg], ntohs(*pcid)); - /* part two of the three-way handshake */ - info->cstate = PPTP_CALL_IN_REP; - info->pns_call_id = ntohs(pptpReq->icack.callID); - break; - - case PPTP_CALL_CLEAR_REQUEST: - /* client requests hangup of call */ - if (info->sstate != PPTP_SESSION_CONFIRMED) { - DEBUGP("CLEAR_CALL but no session\n"); - break; - } - /* FUTURE: iterate over all calls and check if - * call ID is valid. We don't do this without newnat, - * because we only know about last call */ - info->cstate = PPTP_CALL_CLEAR_REQ; - break; - case PPTP_SET_LINK_INFO: - break; - case PPTP_ECHO_REQUEST: - case PPTP_ECHO_REPLY: - /* I don't have to explain these ;) */ - break; - default: - DEBUGP("invalid %s (TY=%d)\n", (msg <= PPTP_MSG_MAX)? - pptp_msg_name[msg]:pptp_msg_name[0], msg); - /* unknown: no need to create GRE masq table entry */ - break; - } - - if (ip_nat_pptp_hook_outbound) - return ip_nat_pptp_hook_outbound(pskb, ct, ctinfo, ctlh, - pptpReq); - - return NF_ACCEPT; -} - - -/* track caller id inside control connection, call expect_related */ -static int -conntrack_pptp_help(struct sk_buff **pskb, - struct ip_conntrack *ct, enum ip_conntrack_info ctinfo) - -{ - struct pptp_pkt_hdr _pptph, *pptph; - struct tcphdr _tcph, *tcph; - u_int32_t tcplen = (*pskb)->len - (*pskb)->nh.iph->ihl * 4; - u_int32_t datalen; - int dir = CTINFO2DIR(ctinfo); - struct ip_ct_pptp_master *info = &ct->help.ct_pptp_info; - unsigned int nexthdr_off; - - int oldsstate, oldcstate; - int ret; - - /* don't do any tracking before tcp handshake complete */ - if (ctinfo != IP_CT_ESTABLISHED - && ctinfo != IP_CT_ESTABLISHED+IP_CT_IS_REPLY) { - DEBUGP("ctinfo = %u, skipping\n", ctinfo); - return NF_ACCEPT; - } - - nexthdr_off = (*pskb)->nh.iph->ihl*4; - tcph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_tcph), &_tcph); - BUG_ON(!tcph); - nexthdr_off += tcph->doff * 4; - datalen = tcplen - tcph->doff * 4; - - if (tcph->fin || tcph->rst) { - DEBUGP("RST/FIN received, timeouting GRE\n"); - /* can't do this after real newnat */ - info->cstate = PPTP_CALL_NONE; - - /* untrack this call id, unexpect GRE packets */ - pptp_destroy_siblings(ct); - } - - pptph = skb_header_pointer(*pskb, nexthdr_off, sizeof(_pptph), &_pptph); - if (!pptph) { - DEBUGP("no full PPTP header, can't track\n"); - return NF_ACCEPT; - } - nexthdr_off += sizeof(_pptph); - datalen -= sizeof(_pptph); - - /* if it's not a control message we can't do anything with it */ - if (ntohs(pptph->packetType) != PPTP_PACKET_CONTROL || - ntohl(pptph->magicCookie) != PPTP_MAGIC_COOKIE) { - DEBUGP("not a control packet\n"); - return NF_ACCEPT; - } - - oldsstate = info->sstate; - oldcstate = info->cstate; - - spin_lock_bh(&ip_pptp_lock); - - /* FIXME: We just blindly assume that the control connection is always - * established from PNS->PAC. However, RFC makes no guarantee */ - if (dir == IP_CT_DIR_ORIGINAL) - /* client -> server (PNS -> PAC) */ - ret = pptp_outbound_pkt(pskb, tcph, nexthdr_off, datalen, ct, - ctinfo); - else - /* server -> client (PAC -> PNS) */ - ret = pptp_inbound_pkt(pskb, tcph, nexthdr_off, datalen, ct, - ctinfo); - DEBUGP("sstate: %d->%d, cstate: %d->%d\n", - oldsstate, info->sstate, oldcstate, info->cstate); - spin_unlock_bh(&ip_pptp_lock); - - return ret; -} - -/* control protocol helper */ -static struct ip_conntrack_helper pptp = { - .list = { NULL, NULL }, - .name = "pptp", - .me = THIS_MODULE, - .max_expected = 2, - .timeout = 5 * 60, - .tuple = { .src = { .ip = 0, - .u = { .tcp = { .port = - __constant_htons(PPTP_CONTROL_PORT) } } - }, - .dst = { .ip = 0, - .u = { .all = 0 }, - .protonum = IPPROTO_TCP - } - }, - .mask = { .src = { .ip = 0, - .u = { .tcp = { .port = __constant_htons(0xffff) } } - }, - .dst = { .ip = 0, - .u = { .all = 0 }, - .protonum = 0xff - } - }, - .help = conntrack_pptp_help -}; - -extern void ip_ct_proto_gre_fini(void); -extern int __init ip_ct_proto_gre_init(void); - -/* ip_conntrack_pptp initialization */ -static int __init init(void) -{ - int retcode; - - retcode = ip_ct_proto_gre_init(); - if (retcode < 0) - return retcode; - - DEBUGP(" registering helper\n"); - if ((retcode = ip_conntrack_helper_register(&pptp))) { - printk(KERN_ERR "Unable to register conntrack application " - "helper for pptp: %d\n", retcode); - ip_ct_proto_gre_fini(); - return retcode; - } - - printk("ip_conntrack_pptp version %s loaded\n", IP_CT_PPTP_VERSION); - return 0; -} - -static void __exit fini(void) -{ - ip_conntrack_helper_unregister(&pptp); - ip_ct_proto_gre_fini(); - printk("ip_conntrack_pptp version %s unloaded\n", IP_CT_PPTP_VERSION); -} - -module_init(init); -module_exit(fini); - -EXPORT_SYMBOL(ip_nat_pptp_hook_outbound); -EXPORT_SYMBOL(ip_nat_pptp_hook_inbound); -EXPORT_SYMBOL(ip_nat_pptp_hook_exp_gre); -EXPORT_SYMBOL(ip_nat_pptp_hook_expectfn); diff --git a/net/ipv4/netfilter/ip_conntrack_irc.c b/net/ipv4/netfilter/ip_conntrack_irc.c deleted file mode 100644 index c51a2cf71b4..00000000000 --- a/net/ipv4/netfilter/ip_conntrack_irc.c +++ /dev/null @@ -1,312 +0,0 @@ -/* IRC extension for IP connection tracking, Version 1.21 - * (C) 2000-2002 by Harald Welte <laforge@gnumonks.org> - * based on RR's ip_conntrack_ftp.c - * - * ip_conntrack_irc.c,v 1.21 2002/02/05 14:49:26 laforge Exp - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - ** - * Module load syntax: - * insmod ip_conntrack_irc.o ports=port1,port2,...port<MAX_PORTS> - * max_dcc_channels=n dcc_timeout=secs - * - * please give the ports of all IRC servers You wish to connect to. - * If You don't specify ports, the default will be port 6667. - * With max_dcc_channels you can define the maximum number of not - * yet answered DCC channels per IRC session (default 8). - * With dcc_timeout you can specify how long the system waits for - * an expected DCC channel (default 300 seconds). - * - */ - -#include <linux/config.h> -#include <linux/module.h> -#include <linux/netfilter.h> -#include <linux/ip.h> -#include <net/checksum.h> -#include <net/tcp.h> - -#include <linux/netfilter_ipv4/ip_conntrack_helper.h> -#include <linux/netfilter_ipv4/ip_conntrack_irc.h> -#include <linux/moduleparam.h> - -#define MAX_PORTS 8 -static unsigned short ports[MAX_PORTS]; -static int ports_c; -static unsigned int max_dcc_channels = 8; -static unsigned int dcc_timeout = 300; -/* This is slow, but it's simple. --RR */ -static char *irc_buffer; -static DEFINE_SPINLOCK(irc_buffer_lock); - -unsigned int (*ip_nat_irc_hook)(struct sk_buff **pskb, - enum ip_conntrack_info ctinfo, - unsigned int matchoff, - unsigned int matchlen, - struct ip_conntrack_expect *exp); -EXPORT_SYMBOL_GPL(ip_nat_irc_hook); - -MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); -MODULE_DESCRIPTION("IRC (DCC) connection tracking helper"); -MODULE_LICENSE("GPL"); -module_param_array(ports, ushort, &ports_c, 0400); -MODULE_PARM_DESC(ports, "port numbers of IRC servers"); -module_param(max_dcc_channels, uint, 0400); -MODULE_PARM_DESC(max_dcc_channels, "max number of expected DCC channels per IRC session"); -module_param(dcc_timeout, uint, 0400); -MODULE_PARM_DESC(dcc_timeout, "timeout on for unestablished DCC channels"); - -static const char *dccprotos[] = { "SEND ", "CHAT ", "MOVE ", "TSEND ", "SCHAT " }; -#define MINMATCHLEN 5 - -#if 0 -#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s:" format, \ - __FILE__, __FUNCTION__ , ## args) -#else -#define DEBUGP(format, args...) -#endif - -static int parse_dcc(char *data, char *data_end, u_int32_t *ip, - u_int16_t *port, char **ad_beg_p, char **ad_end_p) -/* tries to get the ip_addr and port out of a dcc command - return value: -1 on failure, 0 on success - data pointer to first byte of DCC command data - data_end pointer to last byte of dcc command data - ip returns parsed ip of dcc command - port returns parsed port of dcc command - ad_beg_p returns pointer to first byte of addr data - ad_end_p returns pointer to last byte of addr data */ -{ - - /* at least 12: "AAAAAAAA P\1\n" */ - while (*data++ != ' ') - if (data > data_end - 12) - return -1; - - *ad_beg_p = data; - *ip = simple_strtoul(data, &data, 10); - - /* skip blanks between ip and port */ - while (*data == ' ') { - if (data >= data_end) - return -1; - data++; - } - - *port = simple_strtoul(data, &data, 10); - *ad_end_p = data; - - return 0; -} - -static int help(struct sk_buff **pskb, - struct ip_conntrack *ct, enum ip_conntrack_info ctinfo) -{ - unsigned int dataoff; - struct tcphdr _tcph, *th; - char *data, *data_limit, *ib_ptr; - int dir = CTINFO2DIR(ctinfo); - struct ip_conntrack_expect *exp; - u32 seq; - u_int32_t dcc_ip; - u_int16_t dcc_port; - int i, ret = NF_ACCEPT; - char *addr_beg_p, *addr_end_p; - - DEBUGP("entered\n"); - - /* If packet is coming from IRC server */ - if (dir == IP_CT_DIR_REPLY) - return NF_ACCEPT; - - /* Until there's been traffic both ways, don't look in packets. */ - if (ctinfo != IP_CT_ESTABLISHED - && ctinfo != IP_CT_ESTABLISHED + IP_CT_IS_REPLY) { - DEBUGP("Conntrackinfo = %u\n", ctinfo); - return NF_ACCEPT; - } - - /* Not a full tcp header? */ - th = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl*4, - sizeof(_tcph), &_tcph); - if (th == NULL) - return NF_ACCEPT; - - /* No data? */ - dataoff = (*pskb)->nh.iph->ihl*4 + th->doff*4; - if (dataoff >= (*pskb)->len) - return NF_ACCEPT; - - spin_lock_bh(&irc_buffer_lock); - ib_ptr = skb_header_pointer(*pskb, dataoff, - (*pskb)->len - dataoff, irc_buffer); - BUG_ON(ib_ptr == NULL); - - data = ib_ptr; - data_limit = ib_ptr + (*pskb)->len - dataoff; - - /* strlen("\1DCC SENT t AAAAAAAA P\1\n")=24 - * 5+MINMATCHLEN+strlen("t AAAAAAAA P\1\n")=14 */ - while (data < (data_limit - (19 + MINMATCHLEN))) { - if (memcmp(data, "\1DCC ", 5)) { - data++; - continue; - } - - data += 5; - /* we have at least (19+MINMATCHLEN)-5 bytes valid data left */ - - DEBUGP("DCC found in master %u.%u.%u.%u:%u %u.%u.%u.%u:%u...\n", - NIPQUAD(iph->saddr), ntohs(th->source), - NIPQUAD(iph->daddr), ntohs(th->dest)); - - for (i = 0; i < ARRAY_SIZE(dccprotos); i++) { - if (memcmp(data, dccprotos[i], strlen(dccprotos[i]))) { - /* no match */ - continue; - } - - DEBUGP("DCC %s detected\n", dccprotos[i]); - data += strlen(dccprotos[i]); - /* we have at least - * (19+MINMATCHLEN)-5-dccprotos[i].matchlen bytes valid - * data left (== 14/13 bytes) */ - if (parse_dcc((char *)data, data_limit, &dcc_ip, - &dcc_port, &addr_beg_p, &addr_end_p)) { - /* unable to parse */ - DEBUGP("unable to parse dcc command\n"); - continue; - } - DEBUGP("DCC bound ip/port: %u.%u.%u.%u:%u\n", - HIPQUAD(dcc_ip), dcc_port); - - /* dcc_ip can be the internal OR external (NAT'ed) IP - * Tiago Sousa <mirage@kaotik.org> */ - if (ct->tuplehash[dir].tuple.src.ip != htonl(dcc_ip) - && ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip != htonl(dcc_ip)) { - if (net_ratelimit()) - printk(KERN_WARNING - "Forged DCC command from " - "%u.%u.%u.%u: %u.%u.%u.%u:%u\n", - NIPQUAD(ct->tuplehash[dir].tuple.src.ip), - HIPQUAD(dcc_ip), dcc_port); - - continue; - } - - exp = ip_conntrack_expect_alloc(ct); - if (exp == NULL) { - ret = NF_DROP; - goto out; - } - - /* save position of address in dcc string, - * necessary for NAT */ - DEBUGP("tcph->seq = %u\n", th->seq); - seq = ntohl(th->seq) + (addr_beg_p - ib_ptr); - - /* We refer to the reverse direction ("!dir") - * tuples here, because we're expecting - * something in the other * direction. - * Doesn't matter unless NAT is happening. */ - exp->tuple = ((struct ip_conntrack_tuple) - { { 0, { 0 } }, - { ct->tuplehash[!dir].tuple.dst.ip, - { .tcp = { htons(dcc_port) } }, - IPPROTO_TCP }}); - exp->mask = ((struct ip_conntrack_tuple) - { { 0, { 0 } }, - { 0xFFFFFFFF, { .tcp = { 0xFFFF } }, 0xFF }}); - exp->expectfn = NULL; - exp->flags = 0; - if (ip_nat_irc_hook) - ret = ip_nat_irc_hook(pskb, ctinfo, - addr_beg_p - ib_ptr, - addr_end_p - addr_beg_p, - exp); - else if (ip_conntrack_expect_related(exp) != 0) - ret = NF_DROP; - ip_conntrack_expect_put(exp); - goto out; - } /* for .. NUM_DCCPROTO */ - } /* while data < ... */ - - out: - spin_unlock_bh(&irc_buffer_lock); - return ret; -} - -static struct ip_conntrack_helper irc_helpers[MAX_PORTS]; -static char irc_names[MAX_PORTS][sizeof("irc-65535")]; - -static void fini(void); - -static int __init init(void) -{ - int i, ret; - struct ip_conntrack_helper *hlpr; - char *tmpname; - - if (max_dcc_channels < 1) { - printk("ip_conntrack_irc: max_dcc_channels must be a positive integer\n"); - return -EBUSY; - } - - irc_buffer = kmalloc(65536, GFP_KERNEL); - if (!irc_buffer) - return -ENOMEM; - - /* If no port given, default to standard irc port */ - if (ports_c == 0) - ports[ports_c++] = IRC_PORT; - - for (i = 0; i < ports_c; i++) { - hlpr = &irc_helpers[i]; - hlpr->tuple.src.u.tcp.port = htons(ports[i]); - hlpr->tuple.dst.protonum = IPPROTO_TCP; - hlpr->mask.src.u.tcp.port = 0xFFFF; - hlpr->mask.dst.protonum = 0xFF; - hlpr->max_expected = max_dcc_channels; - hlpr->timeout = dcc_timeout; - hlpr->me = THIS_MODULE; - hlpr->help = help; - - tmpname = &irc_names[i][0]; - if (ports[i] == IRC_PORT) - sprintf(tmpname, "irc"); - else - sprintf(tmpname, "irc-%d", i); - hlpr->name = tmpname; - - DEBUGP("port #%d: %d\n", i, ports[i]); - - ret = ip_conntrack_helper_register(hlpr); - - if (ret) { - printk("ip_conntrack_irc: ERROR registering port %d\n", - ports[i]); - fini(); - return -EBUSY; - } - } - return 0; -} - -/* This function is intentionally _NOT_ defined as __exit, because - * it is needed by the init function */ -static void fini(void) -{ - int i; - for (i = 0; i < ports_c; i++) { - DEBUGP("unregistering port %d\n", - ports[i]); - ip_conntrack_helper_unregister(&irc_helpers[i]); - } - kfree(irc_buffer); -} - -module_init(init); -module_exit(fini); diff --git a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c b/net/ipv4/netfilter/ip_conntrack_netbios_ns.c deleted file mode 100644 index 4e68e16a261..00000000000 --- a/net/ipv4/netfilter/ip_conntrack_netbios_ns.c +++ /dev/null @@ -1,142 +0,0 @@ -/* - * NetBIOS name service broadcast connection tracking helper - * - * (c) 2005 Patrick McHardy <kaber@trash.net> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ -/* - * This helper tracks locally originating NetBIOS name service - * requests by issuing permanent expectations (valid until - * timing out) matching all reply connections from the - * destination network. The only NetBIOS specific thing is - * actually the port number. - */ -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/init.h> -#include <linux/skbuff.h> -#include <linux/netdevice.h> -#include <linux/inetdevice.h> -#include <linux/in.h> -#include <linux/ip.h> -#include <net/route.h> - -#include <linux/netfilter.h> -#include <linux/netfilter_ipv4.h> -#include <linux/netfilter_ipv4/ip_conntrack.h> -#include <linux/netfilter_ipv4/ip_conntrack_helper.h> - -#define NMBD_PORT 137 - -MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); -MODULE_DESCRIPTION("NetBIOS name service broadcast connection tracking helper"); -MODULE_LICENSE("GPL"); - -static unsigned int timeout = 3; -module_param(timeout, uint, 0400); -MODULE_PARM_DESC(timeout, "timeout for master connection/replies in seconds"); - -static int help(struct sk_buff **pskb, - struct ip_conntrack *ct, enum ip_conntrack_info ctinfo) -{ - struct ip_conntrack_expect *exp; - struct iphdr *iph = (*pskb)->nh.iph; - struct rtable *rt = (struct rtable *)(*pskb)->dst; - struct in_device *in_dev; - u_int32_t mask = 0; - - /* we're only interested in locally generated packets */ - if ((*pskb)->sk == NULL) - goto out; - if (rt == NULL || !(rt->rt_flags & RTCF_BROADCAST)) - goto out; - if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) - goto out; - - rcu_read_lock(); - in_dev = __in_dev_get_rcu(rt->u.dst.dev); - if (in_dev != NULL) { - for_primary_ifa(in_dev) { - if (ifa->ifa_broadcast == iph->daddr) { - mask = ifa->ifa_mask; - break; - } - } endfor_ifa(in_dev); - } - rcu_read_unlock(); - - if (mask == 0) - goto out; - - exp = ip_conntrack_expect_alloc(ct); - if (exp == NULL) - goto out; - - exp->tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple; - exp->tuple.src.u.udp.port = ntohs(NMBD_PORT); - - exp->mask.src.ip = mask; - exp->mask.src.u.udp.port = 0xFFFF; - exp->mask.dst.ip = 0xFFFFFFFF; - exp->mask.dst.u.udp.port = 0xFFFF; - exp->mask.dst.protonum = 0xFF; - - exp->expectfn = NULL; - exp->flags = IP_CT_EXPECT_PERMANENT; - - ip_conntrack_expect_related(exp); - ip_conntrack_expect_put(exp); - - ip_ct_refresh(ct, *pskb, timeout * HZ); -out: - return NF_ACCEPT; -} - -static struct ip_conntrack_helper helper = { - .name = "netbios-ns", - .tuple = { - .src = { - .u = { - .udp = { - .port = __constant_htons(NMBD_PORT), - } - } - }, - .dst = { - .protonum = IPPROTO_UDP, - }, - }, - .mask = { - .src = { - .u = { - .udp = { - .port = 0xFFFF, - } - } - }, - .dst = { - .protonum = 0xFF, - }, - }, - .max_expected = 1, - .me = THIS_MODULE, - .help = help, -}; - -static int __init init(void) -{ - helper.timeout = timeout; - return ip_conntrack_helper_register(&helper); -} - -static void __exit fini(void) -{ - ip_conntrack_helper_unregister(&helper); -} - -module_init(init); -module_exit(fini); diff --git a/net/ipv4/netfilter/ip_conntrack_netlink.c b/net/ipv4/netfilter/ip_conntrack_netlink.c deleted file mode 100644 index c9ebbe0d2d9..00000000000 --- a/net/ipv4/netfilter/ip_conntrack_netlink.c +++ /dev/null @@ -1,1631 +0,0 @@ -/* Connection tracking via netlink socket. Allows for user space - * protocol helpers and general trouble making from userspace. - * - * (C) 2001 by Jay Schulist <jschlst@samba.org> - * (C) 2002-2005 by Harald Welte <laforge@gnumonks.org> - * (C) 2003 by Patrick Mchardy <kaber@trash.net> - * (C) 2005 by Pablo Neira Ayuso <pablo@eurodev.net> - * - * I've reworked this stuff to use attributes instead of conntrack - * structures. 5.44 am. I need more tea. --pablo 05/07/11. - * - * Initial connection tracking via netlink development funded and - * generally made possible by Network Robots, Inc. (www.networkrobots.com) - * - * Further development of this code funded by Astaro AG (http://www.astaro.com) - * - * This software may be used and distributed according to the terms - * of the GNU General Public License, incorporated herein by reference. - */ - -#include <linux/init.h> -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/types.h> -#include <linux/timer.h> -#include <linux/skbuff.h> -#include <linux/errno.h> -#include <linux/netlink.h> -#include <linux/spinlock.h> -#include <linux/interrupt.h> -#include <linux/notifier.h> - -#include <linux/netfilter.h> -#include <linux/netfilter_ipv4/ip_conntrack.h> -#include <linux/netfilter_ipv4/ip_conntrack_core.h> -#include <linux/netfilter_ipv4/ip_conntrack_helper.h> -#include <linux/netfilter_ipv4/ip_conntrack_protocol.h> -#include <linux/netfilter_ipv4/ip_nat_protocol.h> - -#include <linux/netfilter/nfnetlink.h> -#include <linux/netfilter/nfnetlink_conntrack.h> - -MODULE_LICENSE("GPL"); - -static char __initdata version[] = "0.90"; - -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - - -static inline int -ctnetlink_dump_tuples_proto(struct sk_buff *skb, - const struct ip_conntrack_tuple *tuple) -{ - struct ip_conntrack_protocol *proto; - int ret = 0; - - NFA_PUT(skb, CTA_PROTO_NUM, sizeof(u_int8_t), &tuple->dst.protonum); - - /* If no protocol helper is found, this function will return the - * generic protocol helper, so proto won't *ever* be NULL */ - proto = ip_conntrack_proto_find_get(tuple->dst.protonum); - if (likely(proto->tuple_to_nfattr)) - ret = proto->tuple_to_nfattr(skb, tuple); - - ip_conntrack_proto_put(proto); - - return ret; - -nfattr_failure: - return -1; -} - -static inline int -ctnetlink_dump_tuples(struct sk_buff *skb, - const struct ip_conntrack_tuple *tuple) -{ - struct nfattr *nest_parms; - int ret; - - nest_parms = NFA_NEST(skb, CTA_TUPLE_IP); - NFA_PUT(skb, CTA_IP_V4_SRC, sizeof(u_int32_t), &tuple->src.ip); - NFA_PUT(skb, CTA_IP_V4_DST, sizeof(u_int32_t), &tuple->dst.ip); - NFA_NEST_END(skb, nest_parms); - - nest_parms = NFA_NEST(skb, CTA_TUPLE_PROTO); - ret = ctnetlink_dump_tuples_proto(skb, tuple); - NFA_NEST_END(skb, nest_parms); - - return ret; - -nfattr_failure: - return -1; -} - -static inline int -ctnetlink_dump_status(struct sk_buff *skb, const struct ip_conntrack *ct) -{ - u_int32_t status = htonl((u_int32_t) ct->status); - NFA_PUT(skb, CTA_STATUS, sizeof(status), &status); - return 0; - -nfattr_failure: - return -1; -} - -static inline int -ctnetlink_dump_timeout(struct sk_buff *skb, const struct ip_conntrack *ct) -{ - long timeout_l = ct->timeout.expires - jiffies; - u_int32_t timeout; - - if (timeout_l < 0) - timeout = 0; - else - timeout = htonl(timeout_l / HZ); - - NFA_PUT(skb, CTA_TIMEOUT, sizeof(timeout), &timeout); - return 0; - -nfattr_failure: - return -1; -} - -static inline int -ctnetlink_dump_protoinfo(struct sk_buff *skb, const struct ip_conntrack *ct) -{ - struct ip_conntrack_protocol *proto = ip_conntrack_proto_find_get(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum); - - struct nfattr *nest_proto; - int ret; - - if (!proto->to_nfattr) { - ip_conntrack_proto_put(proto); - return 0; - } - - nest_proto = NFA_NEST(skb, CTA_PROTOINFO); - - ret = proto->to_nfattr(skb, nest_proto, ct); - - ip_conntrack_proto_put(proto); - - NFA_NEST_END(skb, nest_proto); - - return ret; - -nfattr_failure: - return -1; -} - -static inline int -ctnetlink_dump_helpinfo(struct sk_buff *skb, const struct ip_conntrack *ct) -{ - struct nfattr *nest_helper; - - if (!ct->helper) - return 0; - - nest_helper = NFA_NEST(skb, CTA_HELP); - NFA_PUT(skb, CTA_HELP_NAME, strlen(ct->helper->name), ct->helper->name); - - if (ct->helper->to_nfattr) - ct->helper->to_nfattr(skb, ct); - - NFA_NEST_END(skb, nest_helper); - - return 0; - -nfattr_failure: - return -1; -} - -#ifdef CONFIG_IP_NF_CT_ACCT -static inline int -ctnetlink_dump_counters(struct sk_buff *skb, const struct ip_conntrack *ct, - enum ip_conntrack_dir dir) -{ - enum ctattr_type type = dir ? CTA_COUNTERS_REPLY: CTA_COUNTERS_ORIG; - struct nfattr *nest_count = NFA_NEST(skb, type); - u_int32_t tmp; - - tmp = htonl(ct->counters[dir].packets); - NFA_PUT(skb, CTA_COUNTERS32_PACKETS, sizeof(u_int32_t), &tmp); - - tmp = htonl(ct->counters[dir].bytes); - NFA_PUT(skb, CTA_COUNTERS32_BYTES, sizeof(u_int32_t), &tmp); - - NFA_NEST_END(skb, nest_count); - - return 0; - -nfattr_failure: - return -1; -} -#else -#define ctnetlink_dump_counters(a, b, c) (0) -#endif - -#ifdef CONFIG_IP_NF_CONNTRACK_MARK -static inline int -ctnetlink_dump_mark(struct sk_buff *skb, const struct ip_conntrack *ct) -{ - u_int32_t mark = htonl(ct->mark); - - NFA_PUT(skb, CTA_MARK, sizeof(u_int32_t), &mark); - return 0; - -nfattr_failure: - return -1; -} -#else -#define ctnetlink_dump_mark(a, b) (0) -#endif - -static inline int -ctnetlink_dump_id(struct sk_buff *skb, const struct ip_conntrack *ct) -{ - u_int32_t id = htonl(ct->id); - NFA_PUT(skb, CTA_ID, sizeof(u_int32_t), &id); - return 0; - -nfattr_failure: - return -1; -} - -static inline int -ctnetlink_dump_use(struct sk_buff *skb, const struct ip_conntrack *ct) -{ - u_int32_t use = htonl(atomic_read(&ct->ct_general.use)); - - NFA_PUT(skb, CTA_USE, sizeof(u_int32_t), &use); - return 0; - -nfattr_failure: - return -1; -} - -#define tuple(ct, dir) (&(ct)->tuplehash[dir].tuple) - -static int -ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq, - int event, int nowait, - const struct ip_conntrack *ct) -{ - struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; - struct nfattr *nest_parms; - unsigned char *b; - - b = skb->tail; - - event |= NFNL_SUBSYS_CTNETLINK << 8; - nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(struct nfgenmsg)); - nfmsg = NLMSG_DATA(nlh); - - nlh->nlmsg_flags = (nowait && pid) ? NLM_F_MULTI : 0; - nfmsg->nfgen_family = AF_INET; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = 0; - - nest_parms = NFA_NEST(skb, CTA_TUPLE_ORIG); - if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_ORIGINAL)) < 0) - goto nfattr_failure; - NFA_NEST_END(skb, nest_parms); - - nest_parms = NFA_NEST(skb, CTA_TUPLE_REPLY); - if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_REPLY)) < 0) - goto nfattr_failure; - NFA_NEST_END(skb, nest_parms); - - if (ctnetlink_dump_status(skb, ct) < 0 || - ctnetlink_dump_timeout(skb, ct) < 0 || - ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 || - ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0 || - ctnetlink_dump_protoinfo(skb, ct) < 0 || - ctnetlink_dump_helpinfo(skb, ct) < 0 || - ctnetlink_dump_mark(skb, ct) < 0 || - ctnetlink_dump_id(skb, ct) < 0 || - ctnetlink_dump_use(skb, ct) < 0) - goto nfattr_failure; - - nlh->nlmsg_len = skb->tail - b; - return skb->len; - -nlmsg_failure: -nfattr_failure: - skb_trim(skb, b - skb->data); - return -1; -} - -#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS -static int ctnetlink_conntrack_event(struct notifier_block *this, - unsigned long events, void *ptr) -{ - struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; - struct nfattr *nest_parms; - struct ip_conntrack *ct = (struct ip_conntrack *)ptr; - struct sk_buff *skb; - unsigned int type; - unsigned char *b; - unsigned int flags = 0, group; - - /* ignore our fake conntrack entry */ - if (ct == &ip_conntrack_untracked) - return NOTIFY_DONE; - - if (events & IPCT_DESTROY) { - type = IPCTNL_MSG_CT_DELETE; - group = NFNLGRP_CONNTRACK_DESTROY; - } else if (events & (IPCT_NEW | IPCT_RELATED)) { - type = IPCTNL_MSG_CT_NEW; - flags = NLM_F_CREATE|NLM_F_EXCL; - /* dump everything */ - events = ~0UL; - group = NFNLGRP_CONNTRACK_NEW; - } else if (events & (IPCT_STATUS | - IPCT_PROTOINFO | - IPCT_HELPER | - IPCT_HELPINFO | - IPCT_NATINFO)) { - type = IPCTNL_MSG_CT_NEW; - group = NFNLGRP_CONNTRACK_UPDATE; - } else - return NOTIFY_DONE; - - /* FIXME: Check if there are any listeners before, don't hurt performance */ - - skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); - if (!skb) - return NOTIFY_DONE; - - b = skb->tail; - - type |= NFNL_SUBSYS_CTNETLINK << 8; - nlh = NLMSG_PUT(skb, 0, 0, type, sizeof(struct nfgenmsg)); - nfmsg = NLMSG_DATA(nlh); - - nlh->nlmsg_flags = flags; - nfmsg->nfgen_family = AF_INET; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = 0; - - nest_parms = NFA_NEST(skb, CTA_TUPLE_ORIG); - if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_ORIGINAL)) < 0) - goto nfattr_failure; - NFA_NEST_END(skb, nest_parms); - - nest_parms = NFA_NEST(skb, CTA_TUPLE_REPLY); - if (ctnetlink_dump_tuples(skb, tuple(ct, IP_CT_DIR_REPLY)) < 0) - goto nfattr_failure; - NFA_NEST_END(skb, nest_parms); - - /* NAT stuff is now a status flag */ - if ((events & IPCT_STATUS || events & IPCT_NATINFO) - && ctnetlink_dump_status(skb, ct) < 0) - goto nfattr_failure; - if (events & IPCT_REFRESH - && ctnetlink_dump_timeout(skb, ct) < 0) - goto nfattr_failure; - if (events & IPCT_PROTOINFO - && ctnetlink_dump_protoinfo(skb, ct) < 0) - goto nfattr_failure; - if (events & IPCT_HELPINFO - && ctnetlink_dump_helpinfo(skb, ct) < 0) - goto nfattr_failure; - - if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 || - ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0) - goto nfattr_failure; - - nlh->nlmsg_len = skb->tail - b; - nfnetlink_send(skb, 0, group, 0); - return NOTIFY_DONE; - -nlmsg_failure: -nfattr_failure: - kfree_skb(skb); - return NOTIFY_DONE; -} -#endif /* CONFIG_IP_NF_CONNTRACK_EVENTS */ - -static int ctnetlink_done(struct netlink_callback *cb) -{ - DEBUGP("entered %s\n", __FUNCTION__); - return 0; -} - -static int -ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb) -{ - struct ip_conntrack *ct = NULL; - struct ip_conntrack_tuple_hash *h; - struct list_head *i; - u_int32_t *id = (u_int32_t *) &cb->args[1]; - - DEBUGP("entered %s, last bucket=%lu id=%u\n", __FUNCTION__, - cb->args[0], *id); - - read_lock_bh(&ip_conntrack_lock); - for (; cb->args[0] < ip_conntrack_htable_size; cb->args[0]++, *id = 0) { - list_for_each_prev(i, &ip_conntrack_hash[cb->args[0]]) { - h = (struct ip_conntrack_tuple_hash *) i; - if (DIRECTION(h) != IP_CT_DIR_ORIGINAL) - continue; - ct = tuplehash_to_ctrack(h); - if (ct->id <= *id) - continue; - if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, - IPCTNL_MSG_CT_NEW, - 1, ct) < 0) - goto out; - *id = ct->id; - } - } -out: - read_unlock_bh(&ip_conntrack_lock); - - DEBUGP("leaving, last bucket=%lu id=%u\n", cb->args[0], *id); - - return skb->len; -} - -#ifdef CONFIG_IP_NF_CT_ACCT -static int -ctnetlink_dump_table_w(struct sk_buff *skb, struct netlink_callback *cb) -{ - struct ip_conntrack *ct = NULL; - struct ip_conntrack_tuple_hash *h; - struct list_head *i; - u_int32_t *id = (u_int32_t *) &cb->args[1]; - - DEBUGP("entered %s, last bucket=%u id=%u\n", __FUNCTION__, - cb->args[0], *id); - - write_lock_bh(&ip_conntrack_lock); - for (; cb->args[0] < ip_conntrack_htable_size; cb->args[0]++, *id = 0) { - list_for_each_prev(i, &ip_conntrack_hash[cb->args[0]]) { - h = (struct ip_conntrack_tuple_hash *) i; - if (DIRECTION(h) != IP_CT_DIR_ORIGINAL) - continue; - ct = tuplehash_to_ctrack(h); - if (ct->id <= *id) - continue; - if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, - IPCTNL_MSG_CT_NEW, - 1, ct) < 0) - goto out; - *id = ct->id; - - memset(&ct->counters, 0, sizeof(ct->counters)); - } - } -out: - write_unlock_bh(&ip_conntrack_lock); - - DEBUGP("leaving, last bucket=%lu id=%u\n", cb->args[0], *id); - - return skb->len; -} -#endif - -static const size_t cta_min_ip[CTA_IP_MAX] = { - [CTA_IP_V4_SRC-1] = sizeof(u_int32_t), - [CTA_IP_V4_DST-1] = sizeof(u_int32_t), -}; - -static inline int -ctnetlink_parse_tuple_ip(struct nfattr *attr, struct ip_conntrack_tuple *tuple) -{ - struct nfattr *tb[CTA_IP_MAX]; - - DEBUGP("entered %s\n", __FUNCTION__); - - nfattr_parse_nested(tb, CTA_IP_MAX, attr); - - if (nfattr_bad_size(tb, CTA_IP_MAX, cta_min_ip)) - return -EINVAL; - - if (!tb[CTA_IP_V4_SRC-1]) - return -EINVAL; - tuple->src.ip = *(u_int32_t *)NFA_DATA(tb[CTA_IP_V4_SRC-1]); - - if (!tb[CTA_IP_V4_DST-1]) - return -EINVAL; - tuple->dst.ip = *(u_int32_t *)NFA_DATA(tb[CTA_IP_V4_DST-1]); - - DEBUGP("leaving\n"); - - return 0; -} - -static const size_t cta_min_proto[CTA_PROTO_MAX] = { - [CTA_PROTO_NUM-1] = sizeof(u_int8_t), - [CTA_PROTO_SRC_PORT-1] = sizeof(u_int16_t), - [CTA_PROTO_DST_PORT-1] = sizeof(u_int16_t), - [CTA_PROTO_ICMP_TYPE-1] = sizeof(u_int8_t), - [CTA_PROTO_ICMP_CODE-1] = sizeof(u_int8_t), - [CTA_PROTO_ICMP_ID-1] = sizeof(u_int16_t), -}; - -static inline int -ctnetlink_parse_tuple_proto(struct nfattr *attr, - struct ip_conntrack_tuple *tuple) -{ - struct nfattr *tb[CTA_PROTO_MAX]; - struct ip_conntrack_protocol *proto; - int ret = 0; - - DEBUGP("entered %s\n", __FUNCTION__); - - nfattr_parse_nested(tb, CTA_PROTO_MAX, attr); - - if (nfattr_bad_size(tb, CTA_PROTO_MAX, cta_min_proto)) - return -EINVAL; - - if (!tb[CTA_PROTO_NUM-1]) - return -EINVAL; - tuple->dst.protonum = *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_NUM-1]); - - proto = ip_conntrack_proto_find_get(tuple->dst.protonum); - - if (likely(proto->nfattr_to_tuple)) - ret = proto->nfattr_to_tuple(tb, tuple); - - ip_conntrack_proto_put(proto); - - return ret; -} - -static inline int -ctnetlink_parse_tuple(struct nfattr *cda[], struct ip_conntrack_tuple *tuple, - enum ctattr_tuple type) -{ - struct nfattr *tb[CTA_TUPLE_MAX]; - int err; - - DEBUGP("entered %s\n", __FUNCTION__); - - memset(tuple, 0, sizeof(*tuple)); - - nfattr_parse_nested(tb, CTA_TUPLE_MAX, cda[type-1]); - - if (!tb[CTA_TUPLE_IP-1]) - return -EINVAL; - - err = ctnetlink_parse_tuple_ip(tb[CTA_TUPLE_IP-1], tuple); - if (err < 0) - return err; - - if (!tb[CTA_TUPLE_PROTO-1]) - return -EINVAL; - - err = ctnetlink_parse_tuple_proto(tb[CTA_TUPLE_PROTO-1], tuple); - if (err < 0) - return err; - - /* orig and expect tuples get DIR_ORIGINAL */ - if (type == CTA_TUPLE_REPLY) - tuple->dst.dir = IP_CT_DIR_REPLY; - else - tuple->dst.dir = IP_CT_DIR_ORIGINAL; - - DUMP_TUPLE(tuple); - - DEBUGP("leaving\n"); - - return 0; -} - -#ifdef CONFIG_IP_NF_NAT_NEEDED -static const size_t cta_min_protonat[CTA_PROTONAT_MAX] = { - [CTA_PROTONAT_PORT_MIN-1] = sizeof(u_int16_t), - [CTA_PROTONAT_PORT_MAX-1] = sizeof(u_int16_t), -}; - -static int ctnetlink_parse_nat_proto(struct nfattr *attr, - const struct ip_conntrack *ct, - struct ip_nat_range *range) -{ - struct nfattr *tb[CTA_PROTONAT_MAX]; - struct ip_nat_protocol *npt; - - DEBUGP("entered %s\n", __FUNCTION__); - - nfattr_parse_nested(tb, CTA_PROTONAT_MAX, attr); - - if (nfattr_bad_size(tb, CTA_PROTONAT_MAX, cta_min_protonat)) - return -EINVAL; - - npt = ip_nat_proto_find_get(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum); - - if (!npt->nfattr_to_range) { - ip_nat_proto_put(npt); - return 0; - } - - /* nfattr_to_range returns 1 if it parsed, 0 if not, neg. on error */ - if (npt->nfattr_to_range(tb, range) > 0) - range->flags |= IP_NAT_RANGE_PROTO_SPECIFIED; - - ip_nat_proto_put(npt); - - DEBUGP("leaving\n"); - return 0; -} - -static const size_t cta_min_nat[CTA_NAT_MAX] = { - [CTA_NAT_MINIP-1] = sizeof(u_int32_t), - [CTA_NAT_MAXIP-1] = sizeof(u_int32_t), -}; - -static inline int -ctnetlink_parse_nat(struct nfattr *cda[], - const struct ip_conntrack *ct, struct ip_nat_range *range) -{ - struct nfattr *tb[CTA_NAT_MAX]; - int err; - - DEBUGP("entered %s\n", __FUNCTION__); - - memset(range, 0, sizeof(*range)); - - nfattr_parse_nested(tb, CTA_NAT_MAX, cda[CTA_NAT-1]); - - if (nfattr_bad_size(tb, CTA_NAT_MAX, cta_min_nat)) - return -EINVAL; - - if (tb[CTA_NAT_MINIP-1]) - range->min_ip = *(u_int32_t *)NFA_DATA(tb[CTA_NAT_MINIP-1]); - - if (!tb[CTA_NAT_MAXIP-1]) - range->max_ip = range->min_ip; - else - range->max_ip = *(u_int32_t *)NFA_DATA(tb[CTA_NAT_MAXIP-1]); - - if (range->min_ip) - range->flags |= IP_NAT_RANGE_MAP_IPS; - - if (!tb[CTA_NAT_PROTO-1]) - return 0; - - err = ctnetlink_parse_nat_proto(tb[CTA_NAT_PROTO-1], ct, range); - if (err < 0) - return err; - - DEBUGP("leaving\n"); - return 0; -} -#endif - -static inline int -ctnetlink_parse_help(struct nfattr *attr, char **helper_name) -{ - struct nfattr *tb[CTA_HELP_MAX]; - - DEBUGP("entered %s\n", __FUNCTION__); - - nfattr_parse_nested(tb, CTA_HELP_MAX, attr); - - if (!tb[CTA_HELP_NAME-1]) - return -EINVAL; - - *helper_name = NFA_DATA(tb[CTA_HELP_NAME-1]); - - return 0; -} - -static const size_t cta_min[CTA_MAX] = { - [CTA_STATUS-1] = sizeof(u_int32_t), - [CTA_TIMEOUT-1] = sizeof(u_int32_t), - [CTA_MARK-1] = sizeof(u_int32_t), - [CTA_USE-1] = sizeof(u_int32_t), - [CTA_ID-1] = sizeof(u_int32_t) -}; - -static int -ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, - struct nlmsghdr *nlh, struct nfattr *cda[], int *errp) -{ - struct ip_conntrack_tuple_hash *h; - struct ip_conntrack_tuple tuple; - struct ip_conntrack *ct; - int err = 0; - - DEBUGP("entered %s\n", __FUNCTION__); - - if (nfattr_bad_size(cda, CTA_MAX, cta_min)) - return -EINVAL; - - if (cda[CTA_TUPLE_ORIG-1]) - err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG); - else if (cda[CTA_TUPLE_REPLY-1]) - err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY); - else { - /* Flush the whole table */ - ip_conntrack_flush(); - return 0; - } - - if (err < 0) - return err; - - h = ip_conntrack_find_get(&tuple, NULL); - if (!h) { - DEBUGP("tuple not found in conntrack hash\n"); - return -ENOENT; - } - - ct = tuplehash_to_ctrack(h); - - if (cda[CTA_ID-1]) { - u_int32_t id = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_ID-1])); - if (ct->id != id) { - ip_conntrack_put(ct); - return -ENOENT; - } - } - if (del_timer(&ct->timeout)) - ct->timeout.function((unsigned long)ct); - - ip_conntrack_put(ct); - DEBUGP("leaving\n"); - - return 0; -} - -static int -ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, - struct nlmsghdr *nlh, struct nfattr *cda[], int *errp) -{ - struct ip_conntrack_tuple_hash *h; - struct ip_conntrack_tuple tuple; - struct ip_conntrack *ct; - struct sk_buff *skb2 = NULL; - int err = 0; - - DEBUGP("entered %s\n", __FUNCTION__); - - if (nlh->nlmsg_flags & NLM_F_DUMP) { - struct nfgenmsg *msg = NLMSG_DATA(nlh); - u32 rlen; - - if (msg->nfgen_family != AF_INET) - return -EAFNOSUPPORT; - - if (NFNL_MSG_TYPE(nlh->nlmsg_type) == - IPCTNL_MSG_CT_GET_CTRZERO) { -#ifdef CONFIG_IP_NF_CT_ACCT - if ((*errp = netlink_dump_start(ctnl, skb, nlh, - ctnetlink_dump_table_w, - ctnetlink_done)) != 0) - return -EINVAL; -#else - return -ENOTSUPP; -#endif - } else { - if ((*errp = netlink_dump_start(ctnl, skb, nlh, - ctnetlink_dump_table, - ctnetlink_done)) != 0) - return -EINVAL; - } - - rlen = NLMSG_ALIGN(nlh->nlmsg_len); - if (rlen > skb->len) - rlen = skb->len; - skb_pull(skb, rlen); - return 0; - } - - if (nfattr_bad_size(cda, CTA_MAX, cta_min)) - return -EINVAL; - - if (cda[CTA_TUPLE_ORIG-1]) - err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_ORIG); - else if (cda[CTA_TUPLE_REPLY-1]) - err = ctnetlink_parse_tuple(cda, &tuple, CTA_TUPLE_REPLY); - else - return -EINVAL; - - if (err < 0) - return err; - - h = ip_conntrack_find_get(&tuple, NULL); - if (!h) { - DEBUGP("tuple not found in conntrack hash"); - return -ENOENT; - } - DEBUGP("tuple found\n"); - ct = tuplehash_to_ctrack(h); - - err = -ENOMEM; - skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); - if (!skb2) { - ip_conntrack_put(ct); - return -ENOMEM; - } - NETLINK_CB(skb2).dst_pid = NETLINK_CB(skb).pid; - - err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, - IPCTNL_MSG_CT_NEW, 1, ct); - ip_conntrack_put(ct); - if (err <= 0) - goto free; - - err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); - if (err < 0) - goto out; - - DEBUGP("leaving\n"); - return 0; - -free: - kfree_skb(skb2); -out: - return err; -} - -static inline int -ctnetlink_change_status(struct ip_conntrack *ct, struct nfattr *cda[]) -{ - unsigned long d; - unsigned status = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_STATUS-1])); - d = ct->status ^ status; - - if (d & (IPS_EXPECTED|IPS_CONFIRMED|IPS_DYING)) - /* unchangeable */ - return -EINVAL; - - if (d & IPS_SEEN_REPLY && !(status & IPS_SEEN_REPLY)) - /* SEEN_REPLY bit can only be set */ - return -EINVAL; - - - if (d & IPS_ASSURED && !(status & IPS_ASSURED)) - /* ASSURED bit can only be set */ - return -EINVAL; - - if (cda[CTA_NAT-1]) { -#ifndef CONFIG_IP_NF_NAT_NEEDED - return -EINVAL; -#else - unsigned int hooknum; - struct ip_nat_range range; - - if (ctnetlink_parse_nat(cda, ct, &range) < 0) - return -EINVAL; - - DEBUGP("NAT: %u.%u.%u.%u-%u.%u.%u.%u:%u-%u\n", - NIPQUAD(range.min_ip), NIPQUAD(range.max_ip), - htons(range.min.all), htons(range.max.all)); - - /* This is tricky but it works. ip_nat_setup_info needs the - * hook number as parameter, so let's do the correct - * conversion and run away */ - if (status & IPS_SRC_NAT_DONE) - hooknum = NF_IP_POST_ROUTING; /* IP_NAT_MANIP_SRC */ - else if (status & IPS_DST_NAT_DONE) - hooknum = NF_IP_PRE_ROUTING; /* IP_NAT_MANIP_DST */ - else - return -EINVAL; /* Missing NAT flags */ - - DEBUGP("NAT status: %lu\n", - status & (IPS_NAT_MASK | IPS_NAT_DONE_MASK)); - - if (ip_nat_initialized(ct, HOOK2MANIP(hooknum))) - return -EEXIST; - ip_nat_setup_info(ct, &range, hooknum); - - DEBUGP("NAT status after setup_info: %lu\n", - ct->status & (IPS_NAT_MASK | IPS_NAT_DONE_MASK)); -#endif - } - - /* Be careful here, modifying NAT bits can screw up things, - * so don't let users modify them directly if they don't pass - * ip_nat_range. */ - ct->status |= status & ~(IPS_NAT_DONE_MASK | IPS_NAT_MASK); - return 0; -} - - -static inline int -ctnetlink_change_helper(struct ip_conntrack *ct, struct nfattr *cda[]) -{ - struct ip_conntrack_helper *helper; - char *helpname; - int err; - - DEBUGP("entered %s\n", __FUNCTION__); - - /* don't change helper of sibling connections */ - if (ct->master) - return -EINVAL; - - err = ctnetlink_parse_help(cda[CTA_HELP-1], &helpname); - if (err < 0) - return err; - - helper = __ip_conntrack_helper_find_byname(helpname); - if (!helper) { - if (!strcmp(helpname, "")) - helper = NULL; - else - return -EINVAL; - } - - if (ct->helper) { - if (!helper) { - /* we had a helper before ... */ - ip_ct_remove_expectations(ct); - ct->helper = NULL; - } else { - /* need to zero data of old helper */ - memset(&ct->help, 0, sizeof(ct->help)); - } - } - - ct->helper = helper; - - return 0; -} - -static inline int -ctnetlink_change_timeout(struct ip_conntrack *ct, struct nfattr *cda[]) -{ - u_int32_t timeout = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_TIMEOUT-1])); - - if (!del_timer(&ct->timeout)) - return -ETIME; - - ct->timeout.expires = jiffies + timeout * HZ; - add_timer(&ct->timeout); - - return 0; -} - -static inline int -ctnetlink_change_protoinfo(struct ip_conntrack *ct, struct nfattr *cda[]) -{ - struct nfattr *tb[CTA_PROTOINFO_MAX], *attr = cda[CTA_PROTOINFO-1]; - struct ip_conntrack_protocol *proto; - u_int16_t npt = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum; - int err = 0; - - nfattr_parse_nested(tb, CTA_PROTOINFO_MAX, attr); - - proto = ip_conntrack_proto_find_get(npt); - - if (proto->from_nfattr) - err = proto->from_nfattr(tb, ct); - ip_conntrack_proto_put(proto); - - return err; -} - -static int -ctnetlink_change_conntrack(struct ip_conntrack *ct, struct nfattr *cda[]) -{ - int err; - - DEBUGP("entered %s\n", __FUNCTION__); - - if (cda[CTA_HELP-1]) { - err = ctnetlink_change_helper(ct, cda); - if (err < 0) - return err; - } - - if (cda[CTA_TIMEOUT-1]) { - err = ctnetlink_change_timeout(ct, cda); - if (err < 0) - return err; - } - - if (cda[CTA_STATUS-1]) { - err = ctnetlink_change_status(ct, cda); - if (err < 0) - return err; - } - - if (cda[CTA_PROTOINFO-1]) { - err = ctnetlink_change_protoinfo(ct, cda); - if (err < 0) - return err; - } - -#if defined(CONFIG_IP_NF_CONNTRACK_MARK) - if (cda[CTA_MARK-1]) - ct->mark = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_MARK-1])); -#endif - - DEBUGP("all done\n"); - return 0; -} - -static int -ctnetlink_create_conntrack(struct nfattr *cda[], - struct ip_conntrack_tuple *otuple, - struct ip_conntrack_tuple *rtuple) -{ - struct ip_conntrack *ct; - int err = -EINVAL; - - DEBUGP("entered %s\n", __FUNCTION__); - - ct = ip_conntrack_alloc(otuple, rtuple); - if (ct == NULL || IS_ERR(ct)) - return -ENOMEM; - - if (!cda[CTA_TIMEOUT-1]) - goto err; - ct->timeout.expires = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_TIMEOUT-1])); - - ct->timeout.expires = jiffies + ct->timeout.expires * HZ; - ct->status |= IPS_CONFIRMED; - - err = ctnetlink_change_status(ct, cda); - if (err < 0) - goto err; - - if (cda[CTA_PROTOINFO-1]) { - err = ctnetlink_change_protoinfo(ct, cda); - if (err < 0) - return err; - } - -#if defined(CONFIG_IP_NF_CONNTRACK_MARK) - if (cda[CTA_MARK-1]) - ct->mark = ntohl(*(u_int32_t *)NFA_DATA(cda[CTA_MARK-1])); -#endif - - ct->helper = ip_conntrack_helper_find_get(rtuple); - - add_timer(&ct->timeout); - ip_conntrack_hash_insert(ct); - - if (ct->helper) - ip_conntrack_helper_put(ct->helper); - - DEBUGP("conntrack with id %u inserted\n", ct->id); - return 0; - -err: - ip_conntrack_free(ct); - return err; -} - -static int -ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, - struct nlmsghdr *nlh, struct nfattr *cda[], int *errp) -{ - struct ip_conntrack_tuple otuple, rtuple; - struct ip_conntrack_tuple_hash *h = NULL; - int err = 0; - - DEBUGP("entered %s\n", __FUNCTION__); - - if (nfattr_bad_size(cda, CTA_MAX, cta_min)) - return -EINVAL; - - if (cda[CTA_TUPLE_ORIG-1]) { - err = ctnetlink_parse_tuple(cda, &otuple, CTA_TUPLE_ORIG); - if (err < 0) - return err; - } - - if (cda[CTA_TUPLE_REPLY-1]) { - err = ctnetlink_parse_tuple(cda, &rtuple, CTA_TUPLE_REPLY); - if (err < 0) - return err; - } - - write_lock_bh(&ip_conntrack_lock); - if (cda[CTA_TUPLE_ORIG-1]) - h = __ip_conntrack_find(&otuple, NULL); - else if (cda[CTA_TUPLE_REPLY-1]) - h = __ip_conntrack_find(&rtuple, NULL); - - if (h == NULL) { - write_unlock_bh(&ip_conntrack_lock); - DEBUGP("no such conntrack, create new\n"); - err = -ENOENT; - if (nlh->nlmsg_flags & NLM_F_CREATE) - err = ctnetlink_create_conntrack(cda, &otuple, &rtuple); - return err; - } - /* implicit 'else' */ - - /* we only allow nat config for new conntracks */ - if (cda[CTA_NAT-1]) { - err = -EINVAL; - goto out_unlock; - } - - /* We manipulate the conntrack inside the global conntrack table lock, - * so there's no need to increase the refcount */ - DEBUGP("conntrack found\n"); - err = -EEXIST; - if (!(nlh->nlmsg_flags & NLM_F_EXCL)) - err = ctnetlink_change_conntrack(tuplehash_to_ctrack(h), cda); - -out_unlock: - write_unlock_bh(&ip_conntrack_lock); - return err; -} - -/*********************************************************************** - * EXPECT - ***********************************************************************/ - -static inline int -ctnetlink_exp_dump_tuple(struct sk_buff *skb, - const struct ip_conntrack_tuple *tuple, - enum ctattr_expect type) -{ - struct nfattr *nest_parms = NFA_NEST(skb, type); - - if (ctnetlink_dump_tuples(skb, tuple) < 0) - goto nfattr_failure; - - NFA_NEST_END(skb, nest_parms); - - return 0; - -nfattr_failure: - return -1; -} - -static inline int -ctnetlink_exp_dump_expect(struct sk_buff *skb, - const struct ip_conntrack_expect *exp) -{ - struct ip_conntrack *master = exp->master; - u_int32_t timeout = htonl((exp->timeout.expires - jiffies) / HZ); - u_int32_t id = htonl(exp->id); - - if (ctnetlink_exp_dump_tuple(skb, &exp->tuple, CTA_EXPECT_TUPLE) < 0) - goto nfattr_failure; - if (ctnetlink_exp_dump_tuple(skb, &exp->mask, CTA_EXPECT_MASK) < 0) - goto nfattr_failure; - if (ctnetlink_exp_dump_tuple(skb, - &master->tuplehash[IP_CT_DIR_ORIGINAL].tuple, - CTA_EXPECT_MASTER) < 0) - goto nfattr_failure; - - NFA_PUT(skb, CTA_EXPECT_TIMEOUT, sizeof(timeout), &timeout); - NFA_PUT(skb, CTA_EXPECT_ID, sizeof(u_int32_t), &id); - - return 0; - -nfattr_failure: - return -1; -} - -static int -ctnetlink_exp_fill_info(struct sk_buff *skb, u32 pid, u32 seq, - int event, - int nowait, - const struct ip_conntrack_expect *exp) -{ - struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; - unsigned char *b; - - b = skb->tail; - - event |= NFNL_SUBSYS_CTNETLINK_EXP << 8; - nlh = NLMSG_PUT(skb, pid, seq, event, sizeof(struct nfgenmsg)); - nfmsg = NLMSG_DATA(nlh); - - nlh->nlmsg_flags = (nowait && pid) ? NLM_F_MULTI : 0; - nfmsg->nfgen_family = AF_INET; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = 0; - - if (ctnetlink_exp_dump_expect(skb, exp) < 0) - goto nfattr_failure; - - nlh->nlmsg_len = skb->tail - b; - return skb->len; - -nlmsg_failure: -nfattr_failure: - skb_trim(skb, b - skb->data); - return -1; -} - -#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS -static int ctnetlink_expect_event(struct notifier_block *this, - unsigned long events, void *ptr) -{ - struct nlmsghdr *nlh; - struct nfgenmsg *nfmsg; - struct ip_conntrack_expect *exp = (struct ip_conntrack_expect *)ptr; - struct sk_buff *skb; - unsigned int type; - unsigned char *b; - int flags = 0; - - if (events & IPEXP_NEW) { - type = IPCTNL_MSG_EXP_NEW; - flags = NLM_F_CREATE|NLM_F_EXCL; - } else - return NOTIFY_DONE; - - skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC); - if (!skb) - return NOTIFY_DONE; - - b = skb->tail; - - type |= NFNL_SUBSYS_CTNETLINK << 8; - nlh = NLMSG_PUT(skb, 0, 0, type, sizeof(struct nfgenmsg)); - nfmsg = NLMSG_DATA(nlh); - - nlh->nlmsg_flags = flags; - nfmsg->nfgen_family = AF_INET; - nfmsg->version = NFNETLINK_V0; - nfmsg->res_id = 0; - - if (ctnetlink_exp_dump_expect(skb, exp) < 0) - goto nfattr_failure; - - nlh->nlmsg_len = skb->tail - b; - nfnetlink_send(skb, 0, NFNLGRP_CONNTRACK_EXP_NEW, 0); - return NOTIFY_DONE; - -nlmsg_failure: -nfattr_failure: - kfree_skb(skb); - return NOTIFY_DONE; -} -#endif - -static int -ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb) -{ - struct ip_conntrack_expect *exp = NULL; - struct list_head *i; - u_int32_t *id = (u_int32_t *) &cb->args[0]; - - DEBUGP("entered %s, last id=%llu\n", __FUNCTION__, *id); - - read_lock_bh(&ip_conntrack_lock); - list_for_each_prev(i, &ip_conntrack_expect_list) { - exp = (struct ip_conntrack_expect *) i; - if (exp->id <= *id) - continue; - if (ctnetlink_exp_fill_info(skb, NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, - IPCTNL_MSG_EXP_NEW, - 1, exp) < 0) - goto out; - *id = exp->id; - } -out: - read_unlock_bh(&ip_conntrack_lock); - - DEBUGP("leaving, last id=%llu\n", *id); - - return skb->len; -} - -static const size_t cta_min_exp[CTA_EXPECT_MAX] = { - [CTA_EXPECT_TIMEOUT-1] = sizeof(u_int32_t), - [CTA_EXPECT_ID-1] = sizeof(u_int32_t) -}; - -static int -ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, - struct nlmsghdr *nlh, struct nfattr *cda[], int *errp) -{ - struct ip_conntrack_tuple tuple; - struct ip_conntrack_expect *exp; - struct sk_buff *skb2; - int err = 0; - - DEBUGP("entered %s\n", __FUNCTION__); - - if (nfattr_bad_size(cda, CTA_EXPECT_MAX, cta_min_exp)) - return -EINVAL; - - if (nlh->nlmsg_flags & NLM_F_DUMP) { - struct nfgenmsg *msg = NLMSG_DATA(nlh); - u32 rlen; - - if (msg->nfgen_family != AF_INET) - return -EAFNOSUPPORT; - - if ((*errp = netlink_dump_start(ctnl, skb, nlh, - ctnetlink_exp_dump_table, - ctnetlink_done)) != 0) - return -EINVAL; - rlen = NLMSG_ALIGN(nlh->nlmsg_len); - if (rlen > skb->len) - rlen = skb->len; - skb_pull(skb, rlen); - return 0; - } - - if (cda[CTA_EXPECT_MASTER-1]) - err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER); - else - return -EINVAL; - - if (err < 0) - return err; - - exp = ip_conntrack_expect_find(&tuple); - if (!exp) - return -ENOENT; - - if (cda[CTA_EXPECT_ID-1]) { - u_int32_t id = *(u_int32_t *)NFA_DATA(cda[CTA_EXPECT_ID-1]); - if (exp->id != ntohl(id)) { - ip_conntrack_expect_put(exp); - return -ENOENT; - } - } - - err = -ENOMEM; - skb2 = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); - if (!skb2) - goto out; - NETLINK_CB(skb2).dst_pid = NETLINK_CB(skb).pid; - - err = ctnetlink_exp_fill_info(skb2, NETLINK_CB(skb).pid, - nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW, - 1, exp); - if (err <= 0) - goto free; - - ip_conntrack_expect_put(exp); - - return netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); - -free: - kfree_skb(skb2); -out: - ip_conntrack_expect_put(exp); - return err; -} - -static int -ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, - struct nlmsghdr *nlh, struct nfattr *cda[], int *errp) -{ - struct ip_conntrack_expect *exp, *tmp; - struct ip_conntrack_tuple tuple; - struct ip_conntrack_helper *h; - int err; - - if (nfattr_bad_size(cda, CTA_EXPECT_MAX, cta_min_exp)) - return -EINVAL; - - if (cda[CTA_EXPECT_TUPLE-1]) { - /* delete a single expect by tuple */ - err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE); - if (err < 0) - return err; - - /* bump usage count to 2 */ - exp = ip_conntrack_expect_find(&tuple); - if (!exp) - return -ENOENT; - - if (cda[CTA_EXPECT_ID-1]) { - u_int32_t id = - *(u_int32_t *)NFA_DATA(cda[CTA_EXPECT_ID-1]); - if (exp->id != ntohl(id)) { - ip_conntrack_expect_put(exp); - return -ENOENT; - } - } - - /* after list removal, usage count == 1 */ - ip_conntrack_unexpect_related(exp); - /* have to put what we 'get' above. - * after this line usage count == 0 */ - ip_conntrack_expect_put(exp); - } else if (cda[CTA_EXPECT_HELP_NAME-1]) { - char *name = NFA_DATA(cda[CTA_EXPECT_HELP_NAME-1]); - - /* delete all expectations for this helper */ - write_lock_bh(&ip_conntrack_lock); - h = __ip_conntrack_helper_find_byname(name); - if (!h) { - write_unlock_bh(&ip_conntrack_lock); - return -EINVAL; - } - list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list, - list) { - if (exp->master->helper == h - && del_timer(&exp->timeout)) { - ip_ct_unlink_expect(exp); - ip_conntrack_expect_put(exp); - } - } - write_unlock_bh(&ip_conntrack_lock); - } else { - /* This basically means we have to flush everything*/ - write_lock_bh(&ip_conntrack_lock); - list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list, - list) { - if (del_timer(&exp->timeout)) { - ip_ct_unlink_expect(exp); - ip_conntrack_expect_put(exp); - } - } - write_unlock_bh(&ip_conntrack_lock); - } - - return 0; -} -static int -ctnetlink_change_expect(struct ip_conntrack_expect *x, struct nfattr *cda[]) -{ - return -EOPNOTSUPP; -} - -static int -ctnetlink_create_expect(struct nfattr *cda[]) -{ - struct ip_conntrack_tuple tuple, mask, master_tuple; - struct ip_conntrack_tuple_hash *h = NULL; - struct ip_conntrack_expect *exp; - struct ip_conntrack *ct; - int err = 0; - - DEBUGP("entered %s\n", __FUNCTION__); - - /* caller guarantees that those three CTA_EXPECT_* exist */ - err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE); - if (err < 0) - return err; - err = ctnetlink_parse_tuple(cda, &mask, CTA_EXPECT_MASK); - if (err < 0) - return err; - err = ctnetlink_parse_tuple(cda, &master_tuple, CTA_EXPECT_MASTER); - if (err < 0) - return err; - - /* Look for master conntrack of this expectation */ - h = ip_conntrack_find_get(&master_tuple, NULL); - if (!h) - return -ENOENT; - ct = tuplehash_to_ctrack(h); - - if (!ct->helper) { - /* such conntrack hasn't got any helper, abort */ - err = -EINVAL; - goto out; - } - - exp = ip_conntrack_expect_alloc(ct); - if (!exp) { - err = -ENOMEM; - goto out; - } - - exp->expectfn = NULL; - exp->flags = 0; - exp->master = ct; - memcpy(&exp->tuple, &tuple, sizeof(struct ip_conntrack_tuple)); - memcpy(&exp->mask, &mask, sizeof(struct ip_conntrack_tuple)); - - err = ip_conntrack_expect_related(exp); - ip_conntrack_expect_put(exp); - -out: - ip_conntrack_put(tuplehash_to_ctrack(h)); - return err; -} - -static int -ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb, - struct nlmsghdr *nlh, struct nfattr *cda[], int *errp) -{ - struct ip_conntrack_tuple tuple; - struct ip_conntrack_expect *exp; - int err = 0; - - DEBUGP("entered %s\n", __FUNCTION__); - - if (nfattr_bad_size(cda, CTA_EXPECT_MAX, cta_min_exp)) - return -EINVAL; - - if (!cda[CTA_EXPECT_TUPLE-1] - || !cda[CTA_EXPECT_MASK-1] - || !cda[CTA_EXPECT_MASTER-1]) - return -EINVAL; - - err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE); - if (err < 0) - return err; - - write_lock_bh(&ip_conntrack_lock); - exp = __ip_conntrack_expect_find(&tuple); - - if (!exp) { - write_unlock_bh(&ip_conntrack_lock); - err = -ENOENT; - if (nlh->nlmsg_flags & NLM_F_CREATE) - err = ctnetlink_create_expect(cda); - return err; - } - - err = -EEXIST; - if (!(nlh->nlmsg_flags & NLM_F_EXCL)) - err = ctnetlink_change_expect(exp, cda); - write_unlock_bh(&ip_conntrack_lock); - - DEBUGP("leaving\n"); - - return err; -} - -#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS -static struct notifier_block ctnl_notifier = { - .notifier_call = ctnetlink_conntrack_event, -}; - -static struct notifier_block ctnl_notifier_exp = { - .notifier_call = ctnetlink_expect_event, -}; -#endif - -static struct nfnl_callback ctnl_cb[IPCTNL_MSG_MAX] = { - [IPCTNL_MSG_CT_NEW] = { .call = ctnetlink_new_conntrack, - .attr_count = CTA_MAX, }, - [IPCTNL_MSG_CT_GET] = { .call = ctnetlink_get_conntrack, - .attr_count = CTA_MAX, }, - [IPCTNL_MSG_CT_DELETE] = { .call = ctnetlink_del_conntrack, - .attr_count = CTA_MAX, }, - [IPCTNL_MSG_CT_GET_CTRZERO] = { .call = ctnetlink_get_conntrack, - .attr_count = CTA_MAX, }, -}; - -static struct nfnl_callback ctnl_exp_cb[IPCTNL_MSG_EXP_MAX] = { - [IPCTNL_MSG_EXP_GET] = { .call = ctnetlink_get_expect, - .attr_count = CTA_EXPECT_MAX, }, - [IPCTNL_MSG_EXP_NEW] = { .call = ctnetlink_new_expect, - .attr_count = CTA_EXPECT_MAX, }, - [IPCTNL_MSG_EXP_DELETE] = { .call = ctnetlink_del_expect, - .attr_count = CTA_EXPECT_MAX, }, -}; - -static struct nfnetlink_subsystem ctnl_subsys = { - .name = "conntrack", - .subsys_id = NFNL_SUBSYS_CTNETLINK, - .cb_count = IPCTNL_MSG_MAX, - .cb = ctnl_cb, -}; - -static struct nfnetlink_subsystem ctnl_exp_subsys = { - .name = "conntrack_expect", - .subsys_id = NFNL_SUBSYS_CTNETLINK_EXP, - .cb_count = IPCTNL_MSG_EXP_MAX, - .cb = ctnl_exp_cb, -}; - -MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_CTNETLINK); - -static int __init ctnetlink_init(void) -{ - int ret; - - printk("ctnetlink v%s: registering with nfnetlink.\n", version); - ret = nfnetlink_subsys_register(&ctnl_subsys); - if (ret < 0) { - printk("ctnetlink_init: cannot register with nfnetlink.\n"); - goto err_out; - } - - ret = nfnetlink_subsys_register(&ctnl_exp_subsys); - if (ret < 0) { - printk("ctnetlink_init: cannot register exp with nfnetlink.\n"); - goto err_unreg_subsys; - } - -#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS - ret = ip_conntrack_register_notifier(&ctnl_notifier); - if (ret < 0) { - printk("ctnetlink_init: cannot register notifier.\n"); - goto err_unreg_exp_subsys; - } - - ret = ip_conntrack_expect_register_notifier(&ctnl_notifier_exp); - if (ret < 0) { - printk("ctnetlink_init: cannot expect register notifier.\n"); - goto err_unreg_notifier; - } -#endif - - return 0; - -#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS -err_unreg_notifier: - ip_conntrack_unregister_notifier(&ctnl_notifier); -err_unreg_exp_subsys: - nfnetlink_subsys_unregister(&ctnl_exp_subsys); -#endif -err_unreg_subsys: - nfnetlink_subsys_unregister(&ctnl_subsys); -err_out: - return ret; -} - -static void __exit ctnetlink_exit(void) -{ - printk("ctnetlink: unregistering from nfnetlink.\n"); - -#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS - ip_conntrack_unregister_notifier(&ctnl_notifier_exp); - ip_conntrack_unregister_notifier(&ctnl_notifier); -#endif - - nfnetlink_subsys_unregister(&ctnl_exp_subsys); - nfnetlink_subsys_unregister(&ctnl_subsys); - return; -} - -module_init(ctnetlink_init); -module_exit(ctnetlink_exit); diff --git a/net/ipv4/netfilter/ip_conntrack_proto_generic.c b/net/ipv4/netfilter/ip_conntrack_proto_generic.c deleted file mode 100644 index f891308b5e4..00000000000 --- a/net/ipv4/netfilter/ip_conntrack_proto_generic.c +++ /dev/null @@ -1,75 +0,0 @@ -/* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/types.h> -#include <linux/sched.h> -#include <linux/timer.h> -#include <linux/netfilter.h> -#include <linux/netfilter_ipv4/ip_conntrack_protocol.h> - -unsigned int ip_ct_generic_timeout = 600*HZ; - -static int generic_pkt_to_tuple(const struct sk_buff *skb, - unsigned int dataoff, - struct ip_conntrack_tuple *tuple) -{ - tuple->src.u.all = 0; - tuple->dst.u.all = 0; - - return 1; -} - -static int generic_invert_tuple(struct ip_conntrack_tuple *tuple, - const struct ip_conntrack_tuple *orig) -{ - tuple->src.u.all = 0; - tuple->dst.u.all = 0; - - return 1; -} - -/* Print out the per-protocol part of the tuple. */ -static int generic_print_tuple(struct seq_file *s, - const struct ip_conntrack_tuple *tuple) -{ - return 0; -} - -/* Print out the private part of the conntrack. */ -static int generic_print_conntrack(struct seq_file *s, - const struct ip_conntrack *state) -{ - return 0; -} - -/* Returns verdict for packet, or -1 for invalid. */ -static int packet(struct ip_conntrack *conntrack, - const struct sk_buff *skb, - enum ip_conntrack_info ctinfo) -{ - ip_ct_refresh_acct(conntrack, ctinfo, skb, ip_ct_generic_timeout); - return NF_ACCEPT; -} - -/* Called when a new connection for this protocol found. */ -static int new(struct ip_conntrack *conntrack, const struct sk_buff *skb) -{ - return 1; -} - -struct ip_conntrack_protocol ip_conntrack_generic_protocol = -{ - .proto = 0, - .name = "unknown", - .pkt_to_tuple = generic_pkt_to_tuple, - .invert_tuple = generic_invert_tuple, - .print_tuple = generic_print_tuple, - .print_conntrack = generic_print_conntrack, - .packet = packet, - .new = new, -}; diff --git a/net/ipv4/netfilter/ip_conntrack_proto_gre.c b/net/ipv4/netfilter/ip_conntrack_proto_gre.c deleted file mode 100644 index 56794797d55..00000000000 --- a/net/ipv4/netfilter/ip_conntrack_proto_gre.c +++ /dev/null @@ -1,333 +0,0 @@ -/* - * ip_conntrack_proto_gre.c - Version 3.0 - * - * Connection tracking protocol helper module for GRE. - * - * GRE is a generic encapsulation protocol, which is generally not very - * suited for NAT, as it has no protocol-specific part as port numbers. - * - * It has an optional key field, which may help us distinguishing two - * connections between the same two hosts. - * - * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784 - * - * PPTP is built on top of a modified version of GRE, and has a mandatory - * field called "CallID", which serves us for the same purpose as the key - * field in plain GRE. - * - * Documentation about PPTP can be found in RFC 2637 - * - * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org> - * - * Development of this code funded by Astaro AG (http://www.astaro.com/) - * - */ - -#include <linux/config.h> -#include <linux/module.h> -#include <linux/types.h> -#include <linux/timer.h> -#include <linux/netfilter.h> -#include <linux/ip.h> -#include <linux/in.h> -#include <linux/list.h> -#include <linux/seq_file.h> -#include <linux/interrupt.h> - -static DEFINE_RWLOCK(ip_ct_gre_lock); -#define ASSERT_READ_LOCK(x) -#define ASSERT_WRITE_LOCK(x) - -#include <linux/netfilter_ipv4/listhelp.h> -#include <linux/netfilter_ipv4/ip_conntrack_protocol.h> -#include <linux/netfilter_ipv4/ip_conntrack_helper.h> -#include <linux/netfilter_ipv4/ip_conntrack_core.h> - -#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h> -#include <linux/netfilter_ipv4/ip_conntrack_pptp.h> - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>"); -MODULE_DESCRIPTION("netfilter connection tracking protocol helper for GRE"); - -/* shamelessly stolen from ip_conntrack_proto_udp.c */ -#define GRE_TIMEOUT (30*HZ) -#define GRE_STREAM_TIMEOUT (180*HZ) - -#if 0 -#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, __FUNCTION__, ## args) -#define DUMP_TUPLE_GRE(x) printk("%u.%u.%u.%u:0x%x -> %u.%u.%u.%u:0x%x\n", \ - NIPQUAD((x)->src.ip), ntohs((x)->src.u.gre.key), \ - NIPQUAD((x)->dst.ip), ntohs((x)->dst.u.gre.key)) -#else -#define DEBUGP(x, args...) -#define DUMP_TUPLE_GRE(x) -#endif - -/* GRE KEYMAP HANDLING FUNCTIONS */ -static LIST_HEAD(gre_keymap_list); - -static inline int gre_key_cmpfn(const struct ip_ct_gre_keymap *km, - const struct ip_conntrack_tuple *t) -{ - return ((km->tuple.src.ip == t->src.ip) && - (km->tuple.dst.ip == t->dst.ip) && - (km->tuple.dst.protonum == t->dst.protonum) && - (km->tuple.dst.u.all == t->dst.u.all)); -} - -/* look up the source key for a given tuple */ -static u_int32_t gre_keymap_lookup(struct ip_conntrack_tuple *t) -{ - struct ip_ct_gre_keymap *km; - u_int32_t key = 0; - - read_lock_bh(&ip_ct_gre_lock); - km = LIST_FIND(&gre_keymap_list, gre_key_cmpfn, - struct ip_ct_gre_keymap *, t); - if (km) - key = km->tuple.src.u.gre.key; - read_unlock_bh(&ip_ct_gre_lock); - - DEBUGP("lookup src key 0x%x up key for ", key); - DUMP_TUPLE_GRE(t); - - return key; -} - -/* add a single keymap entry, associate with specified master ct */ -int -ip_ct_gre_keymap_add(struct ip_conntrack *ct, - struct ip_conntrack_tuple *t, int reply) -{ - struct ip_ct_gre_keymap **exist_km, *km, *old; - - if (!ct->helper || strcmp(ct->helper->name, "pptp")) { - DEBUGP("refusing to add GRE keymap to non-pptp session\n"); - return -1; - } - - if (!reply) - exist_km = &ct->help.ct_pptp_info.keymap_orig; - else - exist_km = &ct->help.ct_pptp_info.keymap_reply; - - if (*exist_km) { - /* check whether it's a retransmission */ - old = LIST_FIND(&gre_keymap_list, gre_key_cmpfn, - struct ip_ct_gre_keymap *, t); - if (old == *exist_km) { - DEBUGP("retransmission\n"); - return 0; - } - - DEBUGP("trying to override keymap_%s for ct %p\n", - reply? "reply":"orig", ct); - return -EEXIST; - } - - km = kmalloc(sizeof(*km), GFP_ATOMIC); - if (!km) - return -ENOMEM; - - memcpy(&km->tuple, t, sizeof(*t)); - *exist_km = km; - - DEBUGP("adding new entry %p: ", km); - DUMP_TUPLE_GRE(&km->tuple); - - write_lock_bh(&ip_ct_gre_lock); - list_append(&gre_keymap_list, km); - write_unlock_bh(&ip_ct_gre_lock); - - return 0; -} - -/* destroy the keymap entries associated with specified master ct */ -void ip_ct_gre_keymap_destroy(struct ip_conntrack *ct) -{ - DEBUGP("entering for ct %p\n", ct); - - if (!ct->helper || strcmp(ct->helper->name, "pptp")) { - DEBUGP("refusing to destroy GRE keymap to non-pptp session\n"); - return; - } - - write_lock_bh(&ip_ct_gre_lock); - if (ct->help.ct_pptp_info.keymap_orig) { - DEBUGP("removing %p from list\n", - ct->help.ct_pptp_info.keymap_orig); - list_del(&ct->help.ct_pptp_info.keymap_orig->list); - kfree(ct->help.ct_pptp_info.keymap_orig); - ct->help.ct_pptp_info.keymap_orig = NULL; - } - if (ct->help.ct_pptp_info.keymap_reply) { - DEBUGP("removing %p from list\n", - ct->help.ct_pptp_info.keymap_reply); - list_del(&ct->help.ct_pptp_info.keymap_reply->list); - kfree(ct->help.ct_pptp_info.keymap_reply); - ct->help.ct_pptp_info.keymap_reply = NULL; - } - write_unlock_bh(&ip_ct_gre_lock); -} - - -/* PUBLIC CONNTRACK PROTO HELPER FUNCTIONS */ - -/* invert gre part of tuple */ -static int gre_invert_tuple(struct ip_conntrack_tuple *tuple, - const struct ip_conntrack_tuple *orig) -{ - tuple->dst.u.gre.key = orig->src.u.gre.key; - tuple->src.u.gre.key = orig->dst.u.gre.key; - - return 1; -} - -/* gre hdr info to tuple */ -static int gre_pkt_to_tuple(const struct sk_buff *skb, - unsigned int dataoff, - struct ip_conntrack_tuple *tuple) -{ - struct gre_hdr_pptp _pgrehdr, *pgrehdr; - u_int32_t srckey; - struct gre_hdr _grehdr, *grehdr; - - /* first only delinearize old RFC1701 GRE header */ - grehdr = skb_header_pointer(skb, dataoff, sizeof(_grehdr), &_grehdr); - if (!grehdr || grehdr->version != GRE_VERSION_PPTP) { - /* try to behave like "ip_conntrack_proto_generic" */ - tuple->src.u.all = 0; - tuple->dst.u.all = 0; - return 1; - } - - /* PPTP header is variable length, only need up to the call_id field */ - pgrehdr = skb_header_pointer(skb, dataoff, 8, &_pgrehdr); - if (!pgrehdr) - return 1; - - if (ntohs(grehdr->protocol) != GRE_PROTOCOL_PPTP) { - DEBUGP("GRE_VERSION_PPTP but unknown proto\n"); - return 0; - } - - tuple->dst.u.gre.key = pgrehdr->call_id; - srckey = gre_keymap_lookup(tuple); - tuple->src.u.gre.key = srckey; - - return 1; -} - -/* print gre part of tuple */ -static int gre_print_tuple(struct seq_file *s, - const struct ip_conntrack_tuple *tuple) -{ - return seq_printf(s, "srckey=0x%x dstkey=0x%x ", - ntohs(tuple->src.u.gre.key), - ntohs(tuple->dst.u.gre.key)); -} - -/* print private data for conntrack */ -static int gre_print_conntrack(struct seq_file *s, - const struct ip_conntrack *ct) -{ - return seq_printf(s, "timeout=%u, stream_timeout=%u ", - (ct->proto.gre.timeout / HZ), - (ct->proto.gre.stream_timeout / HZ)); -} - -/* Returns verdict for packet, and may modify conntrack */ -static int gre_packet(struct ip_conntrack *ct, - const struct sk_buff *skb, - enum ip_conntrack_info conntrackinfo) -{ - /* If we've seen traffic both ways, this is a GRE connection. - * Extend timeout. */ - if (ct->status & IPS_SEEN_REPLY) { - ip_ct_refresh_acct(ct, conntrackinfo, skb, - ct->proto.gre.stream_timeout); - /* Also, more likely to be important, and not a probe. */ - set_bit(IPS_ASSURED_BIT, &ct->status); - ip_conntrack_event_cache(IPCT_STATUS, skb); - } else - ip_ct_refresh_acct(ct, conntrackinfo, skb, - ct->proto.gre.timeout); - - return NF_ACCEPT; -} - -/* Called when a new connection for this protocol found. */ -static int gre_new(struct ip_conntrack *ct, - const struct sk_buff *skb) -{ - DEBUGP(": "); - DUMP_TUPLE_GRE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); - - /* initialize to sane value. Ideally a conntrack helper - * (e.g. in case of pptp) is increasing them */ - ct->proto.gre.stream_timeout = GRE_STREAM_TIMEOUT; - ct->proto.gre.timeout = GRE_TIMEOUT; - - return 1; -} - -/* Called when a conntrack entry has already been removed from the hashes - * and is about to be deleted from memory */ -static void gre_destroy(struct ip_conntrack *ct) -{ - struct ip_conntrack *master = ct->master; - DEBUGP(" entering\n"); - - if (!master) - DEBUGP("no master !?!\n"); - else - ip_ct_gre_keymap_destroy(master); -} - -/* protocol helper struct */ -static struct ip_conntrack_protocol gre = { - .proto = IPPROTO_GRE, - .name = "gre", - .pkt_to_tuple = gre_pkt_to_tuple, - .invert_tuple = gre_invert_tuple, - .print_tuple = gre_print_tuple, - .print_conntrack = gre_print_conntrack, - .packet = gre_packet, - .new = gre_new, - .destroy = gre_destroy, - .me = THIS_MODULE, -#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ - defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) - .tuple_to_nfattr = ip_ct_port_tuple_to_nfattr, - .nfattr_to_tuple = ip_ct_port_nfattr_to_tuple, -#endif -}; - -/* ip_conntrack_proto_gre initialization */ -int __init ip_ct_proto_gre_init(void) -{ - return ip_conntrack_protocol_register(&gre); -} - -/* This cannot be __exit, as it is invoked from ip_conntrack_helper_pptp.c's - * init() code on errors. - */ -void ip_ct_proto_gre_fini(void) -{ - struct list_head *pos, *n; - - /* delete all keymap entries */ - write_lock_bh(&ip_ct_gre_lock); - list_for_each_safe(pos, n, &gre_keymap_list) { - DEBUGP("deleting keymap %p at module unload time\n", pos); - list_del(pos); - kfree(pos); - } - write_unlock_bh(&ip_ct_gre_lock); - - ip_conntrack_protocol_unregister(&gre); -} - -EXPORT_SYMBOL(ip_ct_gre_keymap_add); -EXPORT_SYMBOL(ip_ct_gre_keymap_destroy); diff --git a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c b/net/ipv4/netfilter/ip_conntrack_proto_icmp.c deleted file mode 100644 index 3021af0910f..00000000000 --- a/net/ipv4/netfilter/ip_conntrack_proto_icmp.c +++ /dev/null @@ -1,327 +0,0 @@ -/* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/types.h> -#include <linux/sched.h> -#include <linux/timer.h> -#include <linux/netfilter.h> -#include <linux/in.h> -#include <linux/icmp.h> -#include <linux/seq_file.h> -#include <linux/skbuff.h> -#include <net/ip.h> -#include <net/checksum.h> -#include <linux/netfilter_ipv4.h> -#include <linux/netfilter_ipv4/ip_conntrack.h> -#include <linux/netfilter_ipv4/ip_conntrack_core.h> -#include <linux/netfilter_ipv4/ip_conntrack_protocol.h> - -unsigned int ip_ct_icmp_timeout = 30*HZ; - -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - -static int icmp_pkt_to_tuple(const struct sk_buff *skb, - unsigned int dataoff, - struct ip_conntrack_tuple *tuple) -{ - struct icmphdr _hdr, *hp; - - hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); - if (hp == NULL) - return 0; - - tuple->dst.u.icmp.type = hp->type; - tuple->src.u.icmp.id = hp->un.echo.id; - tuple->dst.u.icmp.code = hp->code; - - return 1; -} - -/* Add 1; spaces filled with 0. */ -static const u_int8_t invmap[] = { - [ICMP_ECHO] = ICMP_ECHOREPLY + 1, - [ICMP_ECHOREPLY] = ICMP_ECHO + 1, - [ICMP_TIMESTAMP] = ICMP_TIMESTAMPREPLY + 1, - [ICMP_TIMESTAMPREPLY] = ICMP_TIMESTAMP + 1, - [ICMP_INFO_REQUEST] = ICMP_INFO_REPLY + 1, - [ICMP_INFO_REPLY] = ICMP_INFO_REQUEST + 1, - [ICMP_ADDRESS] = ICMP_ADDRESSREPLY + 1, - [ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1 -}; - -static int icmp_invert_tuple(struct ip_conntrack_tuple *tuple, - const struct ip_conntrack_tuple *orig) -{ - if (orig->dst.u.icmp.type >= sizeof(invmap) - || !invmap[orig->dst.u.icmp.type]) - return 0; - - tuple->src.u.icmp.id = orig->src.u.icmp.id; - tuple->dst.u.icmp.type = invmap[orig->dst.u.icmp.type] - 1; - tuple->dst.u.icmp.code = orig->dst.u.icmp.code; - return 1; -} - -/* Print out the per-protocol part of the tuple. */ -static int icmp_print_tuple(struct seq_file *s, - const struct ip_conntrack_tuple *tuple) -{ - return seq_printf(s, "type=%u code=%u id=%u ", - tuple->dst.u.icmp.type, - tuple->dst.u.icmp.code, - ntohs(tuple->src.u.icmp.id)); -} - -/* Print out the private part of the conntrack. */ -static int icmp_print_conntrack(struct seq_file *s, - const struct ip_conntrack *conntrack) -{ - return 0; -} - -/* Returns verdict for packet, or -1 for invalid. */ -static int icmp_packet(struct ip_conntrack *ct, - const struct sk_buff *skb, - enum ip_conntrack_info ctinfo) -{ - /* Try to delete connection immediately after all replies: - won't actually vanish as we still have skb, and del_timer - means this will only run once even if count hits zero twice - (theoretically possible with SMP) */ - if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) { - if (atomic_dec_and_test(&ct->proto.icmp.count) - && del_timer(&ct->timeout)) - ct->timeout.function((unsigned long)ct); - } else { - atomic_inc(&ct->proto.icmp.count); - ip_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb); - ip_ct_refresh_acct(ct, ctinfo, skb, ip_ct_icmp_timeout); - } - - return NF_ACCEPT; -} - -/* Called when a new connection for this protocol found. */ -static int icmp_new(struct ip_conntrack *conntrack, - const struct sk_buff *skb) -{ - static const u_int8_t valid_new[] = { - [ICMP_ECHO] = 1, - [ICMP_TIMESTAMP] = 1, - [ICMP_INFO_REQUEST] = 1, - [ICMP_ADDRESS] = 1 - }; - - if (conntrack->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new) - || !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmp.type]) { - /* Can't create a new ICMP `conn' with this. */ - DEBUGP("icmp: can't create new conn with type %u\n", - conntrack->tuplehash[0].tuple.dst.u.icmp.type); - DUMP_TUPLE(&conntrack->tuplehash[0].tuple); - return 0; - } - atomic_set(&conntrack->proto.icmp.count, 0); - return 1; -} - -static int -icmp_error_message(struct sk_buff *skb, - enum ip_conntrack_info *ctinfo, - unsigned int hooknum) -{ - struct ip_conntrack_tuple innertuple, origtuple; - struct { - struct icmphdr icmp; - struct iphdr ip; - } _in, *inside; - struct ip_conntrack_protocol *innerproto; - struct ip_conntrack_tuple_hash *h; - int dataoff; - - IP_NF_ASSERT(skb->nfct == NULL); - - /* Not enough header? */ - inside = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_in), &_in); - if (inside == NULL) - return -NF_ACCEPT; - - /* Ignore ICMP's containing fragments (shouldn't happen) */ - if (inside->ip.frag_off & htons(IP_OFFSET)) { - DEBUGP("icmp_error_track: fragment of proto %u\n", - inside->ip.protocol); - return -NF_ACCEPT; - } - - innerproto = ip_conntrack_proto_find_get(inside->ip.protocol); - dataoff = skb->nh.iph->ihl*4 + sizeof(inside->icmp) + inside->ip.ihl*4; - /* Are they talking about one of our connections? */ - if (!ip_ct_get_tuple(&inside->ip, skb, dataoff, &origtuple, innerproto)) { - DEBUGP("icmp_error: ! get_tuple p=%u", inside->ip.protocol); - ip_conntrack_proto_put(innerproto); - return -NF_ACCEPT; - } - - /* Ordinarily, we'd expect the inverted tupleproto, but it's - been preserved inside the ICMP. */ - if (!ip_ct_invert_tuple(&innertuple, &origtuple, innerproto)) { - DEBUGP("icmp_error_track: Can't invert tuple\n"); - ip_conntrack_proto_put(innerproto); - return -NF_ACCEPT; - } - ip_conntrack_proto_put(innerproto); - - *ctinfo = IP_CT_RELATED; - - h = ip_conntrack_find_get(&innertuple, NULL); - if (!h) { - /* Locally generated ICMPs will match inverted if they - haven't been SNAT'ed yet */ - /* FIXME: NAT code has to handle half-done double NAT --RR */ - if (hooknum == NF_IP_LOCAL_OUT) - h = ip_conntrack_find_get(&origtuple, NULL); - - if (!h) { - DEBUGP("icmp_error_track: no match\n"); - return -NF_ACCEPT; - } - /* Reverse direction from that found */ - if (DIRECTION(h) != IP_CT_DIR_REPLY) - *ctinfo += IP_CT_IS_REPLY; - } else { - if (DIRECTION(h) == IP_CT_DIR_REPLY) - *ctinfo += IP_CT_IS_REPLY; - } - - /* Update skb to refer to this connection */ - skb->nfct = &tuplehash_to_ctrack(h)->ct_general; - skb->nfctinfo = *ctinfo; - return -NF_ACCEPT; -} - -/* Small and modified version of icmp_rcv */ -static int -icmp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo, - unsigned int hooknum) -{ - struct icmphdr _ih, *icmph; - - /* Not enough header? */ - icmph = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_ih), &_ih); - if (icmph == NULL) { - if (LOG_INVALID(IPPROTO_ICMP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, - "ip_ct_icmp: short packet "); - return -NF_ACCEPT; - } - - /* See ip_conntrack_proto_tcp.c */ - if (hooknum != NF_IP_PRE_ROUTING) - goto checksum_skipped; - - switch (skb->ip_summed) { - case CHECKSUM_HW: - if (!(u16)csum_fold(skb->csum)) - break; - /* fall through */ - case CHECKSUM_NONE: - skb->csum = 0; - if (__skb_checksum_complete(skb)) { - if (LOG_INVALID(IPPROTO_ICMP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, - "ip_ct_icmp: bad ICMP checksum "); - return -NF_ACCEPT; - } - } - -checksum_skipped: - /* - * 18 is the highest 'known' ICMP type. Anything else is a mystery - * - * RFC 1122: 3.2.2 Unknown ICMP messages types MUST be silently - * discarded. - */ - if (icmph->type > NR_ICMP_TYPES) { - if (LOG_INVALID(IPPROTO_ICMP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, - "ip_ct_icmp: invalid ICMP type "); - return -NF_ACCEPT; - } - - /* Need to track icmp error message? */ - if (icmph->type != ICMP_DEST_UNREACH - && icmph->type != ICMP_SOURCE_QUENCH - && icmph->type != ICMP_TIME_EXCEEDED - && icmph->type != ICMP_PARAMETERPROB - && icmph->type != ICMP_REDIRECT) - return NF_ACCEPT; - - return icmp_error_message(skb, ctinfo, hooknum); -} - -#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ - defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) -static int icmp_tuple_to_nfattr(struct sk_buff *skb, - const struct ip_conntrack_tuple *t) -{ - NFA_PUT(skb, CTA_PROTO_ICMP_ID, sizeof(u_int16_t), - &t->src.u.icmp.id); - NFA_PUT(skb, CTA_PROTO_ICMP_TYPE, sizeof(u_int8_t), - &t->dst.u.icmp.type); - NFA_PUT(skb, CTA_PROTO_ICMP_CODE, sizeof(u_int8_t), - &t->dst.u.icmp.code); - - return 0; - -nfattr_failure: - return -1; -} - -static int icmp_nfattr_to_tuple(struct nfattr *tb[], - struct ip_conntrack_tuple *tuple) -{ - if (!tb[CTA_PROTO_ICMP_TYPE-1] - || !tb[CTA_PROTO_ICMP_CODE-1] - || !tb[CTA_PROTO_ICMP_ID-1]) - return -EINVAL; - - tuple->dst.u.icmp.type = - *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_TYPE-1]); - tuple->dst.u.icmp.code = - *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_CODE-1]); - tuple->src.u.icmp.id = - *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_ICMP_ID-1]); - - if (tuple->dst.u.icmp.type >= sizeof(invmap) - || !invmap[tuple->dst.u.icmp.type]) - return -EINVAL; - - return 0; -} -#endif - -struct ip_conntrack_protocol ip_conntrack_protocol_icmp = -{ - .proto = IPPROTO_ICMP, - .name = "icmp", - .pkt_to_tuple = icmp_pkt_to_tuple, - .invert_tuple = icmp_invert_tuple, - .print_tuple = icmp_print_tuple, - .print_conntrack = icmp_print_conntrack, - .packet = icmp_packet, - .new = icmp_new, - .error = icmp_error, -#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ - defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) - .tuple_to_nfattr = icmp_tuple_to_nfattr, - .nfattr_to_tuple = icmp_nfattr_to_tuple, -#endif -}; diff --git a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c b/net/ipv4/netfilter/ip_conntrack_proto_sctp.c deleted file mode 100644 index be602e8aeab..00000000000 --- a/net/ipv4/netfilter/ip_conntrack_proto_sctp.c +++ /dev/null @@ -1,657 +0,0 @@ -/* - * Connection tracking protocol helper module for SCTP. - * - * SCTP is defined in RFC 2960. References to various sections in this code - * are to this RFC. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -/* - * Added support for proc manipulation of timeouts. - */ - -#include <linux/types.h> -#include <linux/sched.h> -#include <linux/timer.h> -#include <linux/interrupt.h> -#include <linux/netfilter.h> -#include <linux/module.h> -#include <linux/in.h> -#include <linux/ip.h> -#include <linux/sctp.h> -#include <linux/string.h> -#include <linux/seq_file.h> - -#include <linux/netfilter_ipv4/ip_conntrack.h> -#include <linux/netfilter_ipv4/ip_conntrack_protocol.h> - -#if 0 -#define DEBUGP(format, ...) printk(format, ## __VA_ARGS__) -#else -#define DEBUGP(format, args...) -#endif - -/* Protects conntrack->proto.sctp */ -static DEFINE_RWLOCK(sctp_lock); - -/* FIXME: Examine ipfilter's timeouts and conntrack transitions more - closely. They're more complex. --RR - - And so for me for SCTP :D -Kiran */ - -static const char *sctp_conntrack_names[] = { - "NONE", - "CLOSED", - "COOKIE_WAIT", - "COOKIE_ECHOED", - "ESTABLISHED", - "SHUTDOWN_SENT", - "SHUTDOWN_RECD", - "SHUTDOWN_ACK_SENT", -}; - -#define SECS * HZ -#define MINS * 60 SECS -#define HOURS * 60 MINS -#define DAYS * 24 HOURS - -static unsigned int ip_ct_sctp_timeout_closed = 10 SECS; -static unsigned int ip_ct_sctp_timeout_cookie_wait = 3 SECS; -static unsigned int ip_ct_sctp_timeout_cookie_echoed = 3 SECS; -static unsigned int ip_ct_sctp_timeout_established = 5 DAYS; -static unsigned int ip_ct_sctp_timeout_shutdown_sent = 300 SECS / 1000; -static unsigned int ip_ct_sctp_timeout_shutdown_recd = 300 SECS / 1000; -static unsigned int ip_ct_sctp_timeout_shutdown_ack_sent = 3 SECS; - -static const unsigned int * sctp_timeouts[] -= { NULL, /* SCTP_CONNTRACK_NONE */ - &ip_ct_sctp_timeout_closed, /* SCTP_CONNTRACK_CLOSED */ - &ip_ct_sctp_timeout_cookie_wait, /* SCTP_CONNTRACK_COOKIE_WAIT */ - &ip_ct_sctp_timeout_cookie_echoed, /* SCTP_CONNTRACK_COOKIE_ECHOED */ - &ip_ct_sctp_timeout_established, /* SCTP_CONNTRACK_ESTABLISHED */ - &ip_ct_sctp_timeout_shutdown_sent, /* SCTP_CONNTRACK_SHUTDOWN_SENT */ - &ip_ct_sctp_timeout_shutdown_recd, /* SCTP_CONNTRACK_SHUTDOWN_RECD */ - &ip_ct_sctp_timeout_shutdown_ack_sent /* SCTP_CONNTRACK_SHUTDOWN_ACK_SENT */ - }; - -#define sNO SCTP_CONNTRACK_NONE -#define sCL SCTP_CONNTRACK_CLOSED -#define sCW SCTP_CONNTRACK_COOKIE_WAIT -#define sCE SCTP_CONNTRACK_COOKIE_ECHOED -#define sES SCTP_CONNTRACK_ESTABLISHED -#define sSS SCTP_CONNTRACK_SHUTDOWN_SENT -#define sSR SCTP_CONNTRACK_SHUTDOWN_RECD -#define sSA SCTP_CONNTRACK_SHUTDOWN_ACK_SENT -#define sIV SCTP_CONNTRACK_MAX - -/* - These are the descriptions of the states: - -NOTE: These state names are tantalizingly similar to the states of an -SCTP endpoint. But the interpretation of the states is a little different, -considering that these are the states of the connection and not of an end -point. Please note the subtleties. -Kiran - -NONE - Nothing so far. -COOKIE WAIT - We have seen an INIT chunk in the original direction, or also - an INIT_ACK chunk in the reply direction. -COOKIE ECHOED - We have seen a COOKIE_ECHO chunk in the original direction. -ESTABLISHED - We have seen a COOKIE_ACK in the reply direction. -SHUTDOWN_SENT - We have seen a SHUTDOWN chunk in the original direction. -SHUTDOWN_RECD - We have seen a SHUTDOWN chunk in the reply directoin. -SHUTDOWN_ACK_SENT - We have seen a SHUTDOWN_ACK chunk in the direction opposite - to that of the SHUTDOWN chunk. -CLOSED - We have seen a SHUTDOWN_COMPLETE chunk in the direction of - the SHUTDOWN chunk. Connection is closed. -*/ - -/* TODO - - I have assumed that the first INIT is in the original direction. - This messes things when an INIT comes in the reply direction in CLOSED - state. - - Check the error type in the reply dir before transitioning from -cookie echoed to closed. - - Sec 5.2.4 of RFC 2960 - - Multi Homing support. -*/ - -/* SCTP conntrack state transitions */ -static const enum sctp_conntrack sctp_conntracks[2][9][SCTP_CONNTRACK_MAX] = { - { -/* ORIGINAL */ -/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA */ -/* init */ {sCW, sCW, sCW, sCE, sES, sSS, sSR, sSA}, -/* init_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA}, -/* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL}, -/* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA}, -/* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA}, -/* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant have Stale cookie*/ -/* cookie_echo */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA},/* 5.2.4 - Big TODO */ -/* cookie_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant come in orig dir */ -/* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL} - }, - { -/* REPLY */ -/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA */ -/* init */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* INIT in sCL Big TODO */ -/* init_ack */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA}, -/* abort */ {sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL}, -/* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA}, -/* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA}, -/* error */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA}, -/* cookie_echo */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA},/* Cant come in reply dir */ -/* cookie_ack */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA}, -/* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL} - } -}; - -static int sctp_pkt_to_tuple(const struct sk_buff *skb, - unsigned int dataoff, - struct ip_conntrack_tuple *tuple) -{ - sctp_sctphdr_t _hdr, *hp; - - DEBUGP(__FUNCTION__); - DEBUGP("\n"); - - /* Actually only need first 8 bytes. */ - hp = skb_header_pointer(skb, dataoff, 8, &_hdr); - if (hp == NULL) - return 0; - - tuple->src.u.sctp.port = hp->source; - tuple->dst.u.sctp.port = hp->dest; - return 1; -} - -static int sctp_invert_tuple(struct ip_conntrack_tuple *tuple, - const struct ip_conntrack_tuple *orig) -{ - DEBUGP(__FUNCTION__); - DEBUGP("\n"); - - tuple->src.u.sctp.port = orig->dst.u.sctp.port; - tuple->dst.u.sctp.port = orig->src.u.sctp.port; - return 1; -} - -/* Print out the per-protocol part of the tuple. */ -static int sctp_print_tuple(struct seq_file *s, - const struct ip_conntrack_tuple *tuple) -{ - DEBUGP(__FUNCTION__); - DEBUGP("\n"); - - return seq_printf(s, "sport=%hu dport=%hu ", - ntohs(tuple->src.u.sctp.port), - ntohs(tuple->dst.u.sctp.port)); -} - -/* Print out the private part of the conntrack. */ -static int sctp_print_conntrack(struct seq_file *s, - const struct ip_conntrack *conntrack) -{ - enum sctp_conntrack state; - - DEBUGP(__FUNCTION__); - DEBUGP("\n"); - - read_lock_bh(&sctp_lock); - state = conntrack->proto.sctp.state; - read_unlock_bh(&sctp_lock); - - return seq_printf(s, "%s ", sctp_conntrack_names[state]); -} - -#define for_each_sctp_chunk(skb, sch, _sch, offset, count) \ -for (offset = skb->nh.iph->ihl * 4 + sizeof(sctp_sctphdr_t), count = 0; \ - offset < skb->len && \ - (sch = skb_header_pointer(skb, offset, sizeof(_sch), &_sch)); \ - offset += (htons(sch->length) + 3) & ~3, count++) - -/* Some validity checks to make sure the chunks are fine */ -static int do_basic_checks(struct ip_conntrack *conntrack, - const struct sk_buff *skb, - char *map) -{ - u_int32_t offset, count; - sctp_chunkhdr_t _sch, *sch; - int flag; - - DEBUGP(__FUNCTION__); - DEBUGP("\n"); - - flag = 0; - - for_each_sctp_chunk (skb, sch, _sch, offset, count) { - DEBUGP("Chunk Num: %d Type: %d\n", count, sch->type); - - if (sch->type == SCTP_CID_INIT - || sch->type == SCTP_CID_INIT_ACK - || sch->type == SCTP_CID_SHUTDOWN_COMPLETE) { - flag = 1; - } - - /* Cookie Ack/Echo chunks not the first OR - Init / Init Ack / Shutdown compl chunks not the only chunks */ - if ((sch->type == SCTP_CID_COOKIE_ACK - || sch->type == SCTP_CID_COOKIE_ECHO - || flag) - && count !=0 ) { - DEBUGP("Basic checks failed\n"); - return 1; - } - - if (map) { - set_bit(sch->type, (void *)map); - } - } - - DEBUGP("Basic checks passed\n"); - return 0; -} - -static int new_state(enum ip_conntrack_dir dir, - enum sctp_conntrack cur_state, - int chunk_type) -{ - int i; - - DEBUGP(__FUNCTION__); - DEBUGP("\n"); - - DEBUGP("Chunk type: %d\n", chunk_type); - - switch (chunk_type) { - case SCTP_CID_INIT: - DEBUGP("SCTP_CID_INIT\n"); - i = 0; break; - case SCTP_CID_INIT_ACK: - DEBUGP("SCTP_CID_INIT_ACK\n"); - i = 1; break; - case SCTP_CID_ABORT: - DEBUGP("SCTP_CID_ABORT\n"); - i = 2; break; - case SCTP_CID_SHUTDOWN: - DEBUGP("SCTP_CID_SHUTDOWN\n"); - i = 3; break; - case SCTP_CID_SHUTDOWN_ACK: - DEBUGP("SCTP_CID_SHUTDOWN_ACK\n"); - i = 4; break; - case SCTP_CID_ERROR: - DEBUGP("SCTP_CID_ERROR\n"); - i = 5; break; - case SCTP_CID_COOKIE_ECHO: - DEBUGP("SCTP_CID_COOKIE_ECHO\n"); - i = 6; break; - case SCTP_CID_COOKIE_ACK: - DEBUGP("SCTP_CID_COOKIE_ACK\n"); - i = 7; break; - case SCTP_CID_SHUTDOWN_COMPLETE: - DEBUGP("SCTP_CID_SHUTDOWN_COMPLETE\n"); - i = 8; break; - default: - /* Other chunks like DATA, SACK, HEARTBEAT and - its ACK do not cause a change in state */ - DEBUGP("Unknown chunk type, Will stay in %s\n", - sctp_conntrack_names[cur_state]); - return cur_state; - } - - DEBUGP("dir: %d cur_state: %s chunk_type: %d new_state: %s\n", - dir, sctp_conntrack_names[cur_state], chunk_type, - sctp_conntrack_names[sctp_conntracks[dir][i][cur_state]]); - - return sctp_conntracks[dir][i][cur_state]; -} - -/* Returns verdict for packet, or -1 for invalid. */ -static int sctp_packet(struct ip_conntrack *conntrack, - const struct sk_buff *skb, - enum ip_conntrack_info ctinfo) -{ - enum sctp_conntrack newconntrack, oldsctpstate; - struct iphdr *iph = skb->nh.iph; - sctp_sctphdr_t _sctph, *sh; - sctp_chunkhdr_t _sch, *sch; - u_int32_t offset, count; - char map[256 / sizeof (char)] = {0}; - - DEBUGP(__FUNCTION__); - DEBUGP("\n"); - - sh = skb_header_pointer(skb, iph->ihl * 4, sizeof(_sctph), &_sctph); - if (sh == NULL) - return -1; - - if (do_basic_checks(conntrack, skb, map) != 0) - return -1; - - /* Check the verification tag (Sec 8.5) */ - if (!test_bit(SCTP_CID_INIT, (void *)map) - && !test_bit(SCTP_CID_SHUTDOWN_COMPLETE, (void *)map) - && !test_bit(SCTP_CID_COOKIE_ECHO, (void *)map) - && !test_bit(SCTP_CID_ABORT, (void *)map) - && !test_bit(SCTP_CID_SHUTDOWN_ACK, (void *)map) - && (sh->vtag != conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])) { - DEBUGP("Verification tag check failed\n"); - return -1; - } - - oldsctpstate = newconntrack = SCTP_CONNTRACK_MAX; - for_each_sctp_chunk (skb, sch, _sch, offset, count) { - write_lock_bh(&sctp_lock); - - /* Special cases of Verification tag check (Sec 8.5.1) */ - if (sch->type == SCTP_CID_INIT) { - /* Sec 8.5.1 (A) */ - if (sh->vtag != 0) { - write_unlock_bh(&sctp_lock); - return -1; - } - } else if (sch->type == SCTP_CID_ABORT) { - /* Sec 8.5.1 (B) */ - if (!(sh->vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)]) - && !(sh->vtag == conntrack->proto.sctp.vtag - [1 - CTINFO2DIR(ctinfo)])) { - write_unlock_bh(&sctp_lock); - return -1; - } - } else if (sch->type == SCTP_CID_SHUTDOWN_COMPLETE) { - /* Sec 8.5.1 (C) */ - if (!(sh->vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)]) - && !(sh->vtag == conntrack->proto.sctp.vtag - [1 - CTINFO2DIR(ctinfo)] - && (sch->flags & 1))) { - write_unlock_bh(&sctp_lock); - return -1; - } - } else if (sch->type == SCTP_CID_COOKIE_ECHO) { - /* Sec 8.5.1 (D) */ - if (!(sh->vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])) { - write_unlock_bh(&sctp_lock); - return -1; - } - } - - oldsctpstate = conntrack->proto.sctp.state; - newconntrack = new_state(CTINFO2DIR(ctinfo), oldsctpstate, sch->type); - - /* Invalid */ - if (newconntrack == SCTP_CONNTRACK_MAX) { - DEBUGP("ip_conntrack_sctp: Invalid dir=%i ctype=%u conntrack=%u\n", - CTINFO2DIR(ctinfo), sch->type, oldsctpstate); - write_unlock_bh(&sctp_lock); - return -1; - } - - /* If it is an INIT or an INIT ACK note down the vtag */ - if (sch->type == SCTP_CID_INIT - || sch->type == SCTP_CID_INIT_ACK) { - sctp_inithdr_t _inithdr, *ih; - - ih = skb_header_pointer(skb, offset + sizeof(sctp_chunkhdr_t), - sizeof(_inithdr), &_inithdr); - if (ih == NULL) { - write_unlock_bh(&sctp_lock); - return -1; - } - DEBUGP("Setting vtag %x for dir %d\n", - ih->init_tag, !CTINFO2DIR(ctinfo)); - conntrack->proto.sctp.vtag[!CTINFO2DIR(ctinfo)] = ih->init_tag; - } - - conntrack->proto.sctp.state = newconntrack; - if (oldsctpstate != newconntrack) - ip_conntrack_event_cache(IPCT_PROTOINFO, skb); - write_unlock_bh(&sctp_lock); - } - - ip_ct_refresh_acct(conntrack, ctinfo, skb, *sctp_timeouts[newconntrack]); - - if (oldsctpstate == SCTP_CONNTRACK_COOKIE_ECHOED - && CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY - && newconntrack == SCTP_CONNTRACK_ESTABLISHED) { - DEBUGP("Setting assured bit\n"); - set_bit(IPS_ASSURED_BIT, &conntrack->status); - ip_conntrack_event_cache(IPCT_STATUS, skb); - } - - return NF_ACCEPT; -} - -/* Called when a new connection for this protocol found. */ -static int sctp_new(struct ip_conntrack *conntrack, - const struct sk_buff *skb) -{ - enum sctp_conntrack newconntrack; - struct iphdr *iph = skb->nh.iph; - sctp_sctphdr_t _sctph, *sh; - sctp_chunkhdr_t _sch, *sch; - u_int32_t offset, count; - char map[256 / sizeof (char)] = {0}; - - DEBUGP(__FUNCTION__); - DEBUGP("\n"); - - sh = skb_header_pointer(skb, iph->ihl * 4, sizeof(_sctph), &_sctph); - if (sh == NULL) - return 0; - - if (do_basic_checks(conntrack, skb, map) != 0) - return 0; - - /* If an OOTB packet has any of these chunks discard (Sec 8.4) */ - if ((test_bit (SCTP_CID_ABORT, (void *)map)) - || (test_bit (SCTP_CID_SHUTDOWN_COMPLETE, (void *)map)) - || (test_bit (SCTP_CID_COOKIE_ACK, (void *)map))) { - return 0; - } - - newconntrack = SCTP_CONNTRACK_MAX; - for_each_sctp_chunk (skb, sch, _sch, offset, count) { - /* Don't need lock here: this conntrack not in circulation yet */ - newconntrack = new_state (IP_CT_DIR_ORIGINAL, - SCTP_CONNTRACK_NONE, sch->type); - - /* Invalid: delete conntrack */ - if (newconntrack == SCTP_CONNTRACK_MAX) { - DEBUGP("ip_conntrack_sctp: invalid new deleting.\n"); - return 0; - } - - /* Copy the vtag into the state info */ - if (sch->type == SCTP_CID_INIT) { - if (sh->vtag == 0) { - sctp_inithdr_t _inithdr, *ih; - - ih = skb_header_pointer(skb, offset + sizeof(sctp_chunkhdr_t), - sizeof(_inithdr), &_inithdr); - if (ih == NULL) - return 0; - - DEBUGP("Setting vtag %x for new conn\n", - ih->init_tag); - - conntrack->proto.sctp.vtag[IP_CT_DIR_REPLY] = - ih->init_tag; - } else { - /* Sec 8.5.1 (A) */ - return 0; - } - } - /* If it is a shutdown ack OOTB packet, we expect a return - shutdown complete, otherwise an ABORT Sec 8.4 (5) and (8) */ - else { - DEBUGP("Setting vtag %x for new conn OOTB\n", - sh->vtag); - conntrack->proto.sctp.vtag[IP_CT_DIR_REPLY] = sh->vtag; - } - - conntrack->proto.sctp.state = newconntrack; - } - - return 1; -} - -static struct ip_conntrack_protocol ip_conntrack_protocol_sctp = { - .proto = IPPROTO_SCTP, - .name = "sctp", - .pkt_to_tuple = sctp_pkt_to_tuple, - .invert_tuple = sctp_invert_tuple, - .print_tuple = sctp_print_tuple, - .print_conntrack = sctp_print_conntrack, - .packet = sctp_packet, - .new = sctp_new, - .destroy = NULL, - .me = THIS_MODULE, -#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ - defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) - .tuple_to_nfattr = ip_ct_port_tuple_to_nfattr, - .nfattr_to_tuple = ip_ct_port_nfattr_to_tuple, -#endif -}; - -#ifdef CONFIG_SYSCTL -static ctl_table ip_ct_sysctl_table[] = { - { - .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_CLOSED, - .procname = "ip_conntrack_sctp_timeout_closed", - .data = &ip_ct_sctp_timeout_closed, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - }, - { - .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_WAIT, - .procname = "ip_conntrack_sctp_timeout_cookie_wait", - .data = &ip_ct_sctp_timeout_cookie_wait, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - }, - { - .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_COOKIE_ECHOED, - .procname = "ip_conntrack_sctp_timeout_cookie_echoed", - .data = &ip_ct_sctp_timeout_cookie_echoed, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - }, - { - .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_ESTABLISHED, - .procname = "ip_conntrack_sctp_timeout_established", - .data = &ip_ct_sctp_timeout_established, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - }, - { - .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_SENT, - .procname = "ip_conntrack_sctp_timeout_shutdown_sent", - .data = &ip_ct_sctp_timeout_shutdown_sent, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - }, - { - .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_RECD, - .procname = "ip_conntrack_sctp_timeout_shutdown_recd", - .data = &ip_ct_sctp_timeout_shutdown_recd, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - }, - { - .ctl_name = NET_IPV4_NF_CONNTRACK_SCTP_TIMEOUT_SHUTDOWN_ACK_SENT, - .procname = "ip_conntrack_sctp_timeout_shutdown_ack_sent", - .data = &ip_ct_sctp_timeout_shutdown_ack_sent, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - }, - { .ctl_name = 0 } -}; - -static ctl_table ip_ct_netfilter_table[] = { - { - .ctl_name = NET_IPV4_NETFILTER, - .procname = "netfilter", - .mode = 0555, - .child = ip_ct_sysctl_table, - }, - { .ctl_name = 0 } -}; - -static ctl_table ip_ct_ipv4_table[] = { - { - .ctl_name = NET_IPV4, - .procname = "ipv4", - .mode = 0555, - .child = ip_ct_netfilter_table, - }, - { .ctl_name = 0 } -}; - -static ctl_table ip_ct_net_table[] = { - { - .ctl_name = CTL_NET, - .procname = "net", - .mode = 0555, - .child = ip_ct_ipv4_table, - }, - { .ctl_name = 0 } -}; - -static struct ctl_table_header *ip_ct_sysctl_header; -#endif - -static int __init init(void) -{ - int ret; - - ret = ip_conntrack_protocol_register(&ip_conntrack_protocol_sctp); - if (ret) { - printk("ip_conntrack_proto_sctp: protocol register failed\n"); - goto out; - } - -#ifdef CONFIG_SYSCTL - ip_ct_sysctl_header = register_sysctl_table(ip_ct_net_table, 0); - if (ip_ct_sysctl_header == NULL) { - ret = -ENOMEM; - printk("ip_conntrack_proto_sctp: can't register to sysctl.\n"); - goto cleanup; - } -#endif - - return ret; - -#ifdef CONFIG_SYSCTL - cleanup: - ip_conntrack_protocol_unregister(&ip_conntrack_protocol_sctp); -#endif - out: - DEBUGP("SCTP conntrack module loading %s\n", - ret ? "failed": "succeeded"); - return ret; -} - -static void __exit fini(void) -{ - ip_conntrack_protocol_unregister(&ip_conntrack_protocol_sctp); -#ifdef CONFIG_SYSCTL - unregister_sysctl_table(ip_ct_sysctl_header); -#endif - DEBUGP("SCTP conntrack module unloaded\n"); -} - -module_init(init); -module_exit(fini); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Kiran Kumar Immidi"); -MODULE_DESCRIPTION("Netfilter connection tracking protocol helper for SCTP"); diff --git a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c b/net/ipv4/netfilter/ip_conntrack_proto_tcp.c deleted file mode 100644 index e0dc3706354..00000000000 --- a/net/ipv4/netfilter/ip_conntrack_proto_tcp.c +++ /dev/null @@ -1,1176 +0,0 @@ -/* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>: - * - Real stateful connection tracking - * - Modified state transitions table - * - Window scaling support added - * - SACK support added - * - * Willy Tarreau: - * - State table bugfixes - * - More robust state changes - * - Tuning timer parameters - * - * version 2.2 - */ - -#include <linux/config.h> -#include <linux/types.h> -#include <linux/sched.h> -#include <linux/timer.h> -#include <linux/netfilter.h> -#include <linux/module.h> -#include <linux/in.h> -#include <linux/ip.h> -#include <linux/tcp.h> -#include <linux/spinlock.h> - -#include <net/tcp.h> - -#include <linux/netfilter_ipv4.h> -#include <linux/netfilter_ipv4/ip_conntrack.h> -#include <linux/netfilter_ipv4/ip_conntrack_protocol.h> - -#if 0 -#define DEBUGP printk -#define DEBUGP_VARS -#else -#define DEBUGP(format, args...) -#endif - -/* Protects conntrack->proto.tcp */ -static DEFINE_RWLOCK(tcp_lock); - -/* "Be conservative in what you do, - be liberal in what you accept from others." - If it's non-zero, we mark only out of window RST segments as INVALID. */ -int ip_ct_tcp_be_liberal = 0; - -/* When connection is picked up from the middle, how many packets are required - to pass in each direction when we assume we are in sync - if any side uses - window scaling, we lost the game. - If it is set to zero, we disable picking up already established - connections. */ -int ip_ct_tcp_loose = 3; - -/* Max number of the retransmitted packets without receiving an (acceptable) - ACK from the destination. If this number is reached, a shorter timer - will be started. */ -int ip_ct_tcp_max_retrans = 3; - - /* FIXME: Examine ipfilter's timeouts and conntrack transitions more - closely. They're more complex. --RR */ - -static const char *tcp_conntrack_names[] = { - "NONE", - "SYN_SENT", - "SYN_RECV", - "ESTABLISHED", - "FIN_WAIT", - "CLOSE_WAIT", - "LAST_ACK", - "TIME_WAIT", - "CLOSE", - "LISTEN" -}; - -#define SECS * HZ -#define MINS * 60 SECS -#define HOURS * 60 MINS -#define DAYS * 24 HOURS - -unsigned int ip_ct_tcp_timeout_syn_sent = 2 MINS; -unsigned int ip_ct_tcp_timeout_syn_recv = 60 SECS; -unsigned int ip_ct_tcp_timeout_established = 5 DAYS; -unsigned int ip_ct_tcp_timeout_fin_wait = 2 MINS; -unsigned int ip_ct_tcp_timeout_close_wait = 60 SECS; -unsigned int ip_ct_tcp_timeout_last_ack = 30 SECS; -unsigned int ip_ct_tcp_timeout_time_wait = 2 MINS; -unsigned int ip_ct_tcp_timeout_close = 10 SECS; - -/* RFC1122 says the R2 limit should be at least 100 seconds. - Linux uses 15 packets as limit, which corresponds - to ~13-30min depending on RTO. */ -unsigned int ip_ct_tcp_timeout_max_retrans = 5 MINS; - -static const unsigned int * tcp_timeouts[] -= { NULL, /* TCP_CONNTRACK_NONE */ - &ip_ct_tcp_timeout_syn_sent, /* TCP_CONNTRACK_SYN_SENT, */ - &ip_ct_tcp_timeout_syn_recv, /* TCP_CONNTRACK_SYN_RECV, */ - &ip_ct_tcp_timeout_established, /* TCP_CONNTRACK_ESTABLISHED, */ - &ip_ct_tcp_timeout_fin_wait, /* TCP_CONNTRACK_FIN_WAIT, */ - &ip_ct_tcp_timeout_close_wait, /* TCP_CONNTRACK_CLOSE_WAIT, */ - &ip_ct_tcp_timeout_last_ack, /* TCP_CONNTRACK_LAST_ACK, */ - &ip_ct_tcp_timeout_time_wait, /* TCP_CONNTRACK_TIME_WAIT, */ - &ip_ct_tcp_timeout_close, /* TCP_CONNTRACK_CLOSE, */ - NULL, /* TCP_CONNTRACK_LISTEN */ - }; - -#define sNO TCP_CONNTRACK_NONE -#define sSS TCP_CONNTRACK_SYN_SENT -#define sSR TCP_CONNTRACK_SYN_RECV -#define sES TCP_CONNTRACK_ESTABLISHED -#define sFW TCP_CONNTRACK_FIN_WAIT -#define sCW TCP_CONNTRACK_CLOSE_WAIT -#define sLA TCP_CONNTRACK_LAST_ACK -#define sTW TCP_CONNTRACK_TIME_WAIT -#define sCL TCP_CONNTRACK_CLOSE -#define sLI TCP_CONNTRACK_LISTEN -#define sIV TCP_CONNTRACK_MAX -#define sIG TCP_CONNTRACK_IGNORE - -/* What TCP flags are set from RST/SYN/FIN/ACK. */ -enum tcp_bit_set { - TCP_SYN_SET, - TCP_SYNACK_SET, - TCP_FIN_SET, - TCP_ACK_SET, - TCP_RST_SET, - TCP_NONE_SET, -}; - -/* - * The TCP state transition table needs a few words... - * - * We are the man in the middle. All the packets go through us - * but might get lost in transit to the destination. - * It is assumed that the destinations can't receive segments - * we haven't seen. - * - * The checked segment is in window, but our windows are *not* - * equivalent with the ones of the sender/receiver. We always - * try to guess the state of the current sender. - * - * The meaning of the states are: - * - * NONE: initial state - * SYN_SENT: SYN-only packet seen - * SYN_RECV: SYN-ACK packet seen - * ESTABLISHED: ACK packet seen - * FIN_WAIT: FIN packet seen - * CLOSE_WAIT: ACK seen (after FIN) - * LAST_ACK: FIN seen (after FIN) - * TIME_WAIT: last ACK seen - * CLOSE: closed connection - * - * LISTEN state is not used. - * - * Packets marked as IGNORED (sIG): - * if they may be either invalid or valid - * and the receiver may send back a connection - * closing RST or a SYN/ACK. - * - * Packets marked as INVALID (sIV): - * if they are invalid - * or we do not support the request (simultaneous open) - */ -static const enum tcp_conntrack tcp_conntracks[2][6][TCP_CONNTRACK_MAX] = { - { -/* ORIGINAL */ -/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ -/*syn*/ { sSS, sSS, sIG, sIG, sIG, sIG, sIG, sSS, sSS, sIV }, -/* - * sNO -> sSS Initialize a new connection - * sSS -> sSS Retransmitted SYN - * sSR -> sIG Late retransmitted SYN? - * sES -> sIG Error: SYNs in window outside the SYN_SENT state - * are errors. Receiver will reply with RST - * and close the connection. - * Or we are not in sync and hold a dead connection. - * sFW -> sIG - * sCW -> sIG - * sLA -> sIG - * sTW -> sSS Reopened connection (RFC 1122). - * sCL -> sSS - */ -/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ -/*synack*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }, -/* - * A SYN/ACK from the client is always invalid: - * - either it tries to set up a simultaneous open, which is - * not supported; - * - or the firewall has just been inserted between the two hosts - * during the session set-up. The SYN will be retransmitted - * by the true client (or it'll time out). - */ -/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ -/*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV }, -/* - * sNO -> sIV Too late and no reason to do anything... - * sSS -> sIV Client migth not send FIN in this state: - * we enforce waiting for a SYN/ACK reply first. - * sSR -> sFW Close started. - * sES -> sFW - * sFW -> sLA FIN seen in both directions, waiting for - * the last ACK. - * Migth be a retransmitted FIN as well... - * sCW -> sLA - * sLA -> sLA Retransmitted FIN. Remain in the same state. - * sTW -> sTW - * sCL -> sCL - */ -/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ -/*ack*/ { sES, sIV, sES, sES, sCW, sCW, sTW, sTW, sCL, sIV }, -/* - * sNO -> sES Assumed. - * sSS -> sIV ACK is invalid: we haven't seen a SYN/ACK yet. - * sSR -> sES Established state is reached. - * sES -> sES :-) - * sFW -> sCW Normal close request answered by ACK. - * sCW -> sCW - * sLA -> sTW Last ACK detected. - * sTW -> sTW Retransmitted last ACK. Remain in the same state. - * sCL -> sCL - */ -/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ -/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV }, -/*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV } - }, - { -/* REPLY */ -/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ -/*syn*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV }, -/* - * sNO -> sIV Never reached. - * sSS -> sIV Simultaneous open, not supported - * sSR -> sIV Simultaneous open, not supported. - * sES -> sIV Server may not initiate a connection. - * sFW -> sIV - * sCW -> sIV - * sLA -> sIV - * sTW -> sIV Reopened connection, but server may not do it. - * sCL -> sIV - */ -/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ -/*synack*/ { sIV, sSR, sSR, sIG, sIG, sIG, sIG, sIG, sIG, sIV }, -/* - * sSS -> sSR Standard open. - * sSR -> sSR Retransmitted SYN/ACK. - * sES -> sIG Late retransmitted SYN/ACK? - * sFW -> sIG Might be SYN/ACK answering ignored SYN - * sCW -> sIG - * sLA -> sIG - * sTW -> sIG - * sCL -> sIG - */ -/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ -/*fin*/ { sIV, sIV, sFW, sFW, sLA, sLA, sLA, sTW, sCL, sIV }, -/* - * sSS -> sIV Server might not send FIN in this state. - * sSR -> sFW Close started. - * sES -> sFW - * sFW -> sLA FIN seen in both directions. - * sCW -> sLA - * sLA -> sLA Retransmitted FIN. - * sTW -> sTW - * sCL -> sCL - */ -/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ -/*ack*/ { sIV, sIG, sSR, sES, sCW, sCW, sTW, sTW, sCL, sIV }, -/* - * sSS -> sIG Might be a half-open connection. - * sSR -> sSR Might answer late resent SYN. - * sES -> sES :-) - * sFW -> sCW Normal close request answered by ACK. - * sCW -> sCW - * sLA -> sTW Last ACK detected. - * sTW -> sTW Retransmitted last ACK. - * sCL -> sCL - */ -/* sNO, sSS, sSR, sES, sFW, sCW, sLA, sTW, sCL, sLI */ -/*rst*/ { sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV }, -/*none*/ { sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV, sIV } - } -}; - -static int tcp_pkt_to_tuple(const struct sk_buff *skb, - unsigned int dataoff, - struct ip_conntrack_tuple *tuple) -{ - struct tcphdr _hdr, *hp; - - /* Actually only need first 8 bytes. */ - hp = skb_header_pointer(skb, dataoff, 8, &_hdr); - if (hp == NULL) - return 0; - - tuple->src.u.tcp.port = hp->source; - tuple->dst.u.tcp.port = hp->dest; - - return 1; -} - -static int tcp_invert_tuple(struct ip_conntrack_tuple *tuple, - const struct ip_conntrack_tuple *orig) -{ - tuple->src.u.tcp.port = orig->dst.u.tcp.port; - tuple->dst.u.tcp.port = orig->src.u.tcp.port; - return 1; -} - -/* Print out the per-protocol part of the tuple. */ -static int tcp_print_tuple(struct seq_file *s, - const struct ip_conntrack_tuple *tuple) -{ - return seq_printf(s, "sport=%hu dport=%hu ", - ntohs(tuple->src.u.tcp.port), - ntohs(tuple->dst.u.tcp.port)); -} - -/* Print out the private part of the conntrack. */ -static int tcp_print_conntrack(struct seq_file *s, - const struct ip_conntrack *conntrack) -{ - enum tcp_conntrack state; - - read_lock_bh(&tcp_lock); - state = conntrack->proto.tcp.state; - read_unlock_bh(&tcp_lock); - - return seq_printf(s, "%s ", tcp_conntrack_names[state]); -} - -#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ - defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) -static int tcp_to_nfattr(struct sk_buff *skb, struct nfattr *nfa, - const struct ip_conntrack *ct) -{ - struct nfattr *nest_parms; - - read_lock_bh(&tcp_lock); - nest_parms = NFA_NEST(skb, CTA_PROTOINFO_TCP); - NFA_PUT(skb, CTA_PROTOINFO_TCP_STATE, sizeof(u_int8_t), - &ct->proto.tcp.state); - read_unlock_bh(&tcp_lock); - - NFA_NEST_END(skb, nest_parms); - - return 0; - -nfattr_failure: - read_unlock_bh(&tcp_lock); - return -1; -} - -static const size_t cta_min_tcp[CTA_PROTOINFO_TCP_MAX] = { - [CTA_PROTOINFO_TCP_STATE-1] = sizeof(u_int8_t), -}; - -static int nfattr_to_tcp(struct nfattr *cda[], struct ip_conntrack *ct) -{ - struct nfattr *attr = cda[CTA_PROTOINFO_TCP-1]; - struct nfattr *tb[CTA_PROTOINFO_TCP_MAX]; - - /* updates could not contain anything about the private - * protocol info, in that case skip the parsing */ - if (!attr) - return 0; - - nfattr_parse_nested(tb, CTA_PROTOINFO_TCP_MAX, attr); - - if (nfattr_bad_size(tb, CTA_PROTOINFO_TCP_MAX, cta_min_tcp)) - return -EINVAL; - - if (!tb[CTA_PROTOINFO_TCP_STATE-1]) - return -EINVAL; - - write_lock_bh(&tcp_lock); - ct->proto.tcp.state = - *(u_int8_t *)NFA_DATA(tb[CTA_PROTOINFO_TCP_STATE-1]); - write_unlock_bh(&tcp_lock); - - return 0; -} -#endif - -static unsigned int get_conntrack_index(const struct tcphdr *tcph) -{ - if (tcph->rst) return TCP_RST_SET; - else if (tcph->syn) return (tcph->ack ? TCP_SYNACK_SET : TCP_SYN_SET); - else if (tcph->fin) return TCP_FIN_SET; - else if (tcph->ack) return TCP_ACK_SET; - else return TCP_NONE_SET; -} - -/* TCP connection tracking based on 'Real Stateful TCP Packet Filtering - in IP Filter' by Guido van Rooij. - - http://www.nluug.nl/events/sane2000/papers.html - http://www.iae.nl/users/guido/papers/tcp_filtering.ps.gz - - The boundaries and the conditions are changed according to RFC793: - the packet must intersect the window (i.e. segments may be - after the right or before the left edge) and thus receivers may ACK - segments after the right edge of the window. - - td_maxend = max(sack + max(win,1)) seen in reply packets - td_maxwin = max(max(win, 1)) + (sack - ack) seen in sent packets - td_maxwin += seq + len - sender.td_maxend - if seq + len > sender.td_maxend - td_end = max(seq + len) seen in sent packets - - I. Upper bound for valid data: seq <= sender.td_maxend - II. Lower bound for valid data: seq + len >= sender.td_end - receiver.td_maxwin - III. Upper bound for valid ack: sack <= receiver.td_end - IV. Lower bound for valid ack: ack >= receiver.td_end - MAXACKWINDOW - - where sack is the highest right edge of sack block found in the packet. - - The upper bound limit for a valid ack is not ignored - - we doesn't have to deal with fragments. -*/ - -static inline __u32 segment_seq_plus_len(__u32 seq, - size_t len, - struct iphdr *iph, - struct tcphdr *tcph) -{ - return (seq + len - (iph->ihl + tcph->doff)*4 - + (tcph->syn ? 1 : 0) + (tcph->fin ? 1 : 0)); -} - -/* Fixme: what about big packets? */ -#define MAXACKWINCONST 66000 -#define MAXACKWINDOW(sender) \ - ((sender)->td_maxwin > MAXACKWINCONST ? (sender)->td_maxwin \ - : MAXACKWINCONST) - -/* - * Simplified tcp_parse_options routine from tcp_input.c - */ -static void tcp_options(const struct sk_buff *skb, - struct iphdr *iph, - struct tcphdr *tcph, - struct ip_ct_tcp_state *state) -{ - unsigned char buff[(15 * 4) - sizeof(struct tcphdr)]; - unsigned char *ptr; - int length = (tcph->doff*4) - sizeof(struct tcphdr); - - if (!length) - return; - - ptr = skb_header_pointer(skb, - (iph->ihl * 4) + sizeof(struct tcphdr), - length, buff); - BUG_ON(ptr == NULL); - - state->td_scale = - state->flags = 0; - - while (length > 0) { - int opcode=*ptr++; - int opsize; - - switch (opcode) { - case TCPOPT_EOL: - return; - case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */ - length--; - continue; - default: - opsize=*ptr++; - if (opsize < 2) /* "silly options" */ - return; - if (opsize > length) - break; /* don't parse partial options */ - - if (opcode == TCPOPT_SACK_PERM - && opsize == TCPOLEN_SACK_PERM) - state->flags |= IP_CT_TCP_FLAG_SACK_PERM; - else if (opcode == TCPOPT_WINDOW - && opsize == TCPOLEN_WINDOW) { - state->td_scale = *(u_int8_t *)ptr; - - if (state->td_scale > 14) { - /* See RFC1323 */ - state->td_scale = 14; - } - state->flags |= - IP_CT_TCP_FLAG_WINDOW_SCALE; - } - ptr += opsize - 2; - length -= opsize; - } - } -} - -static void tcp_sack(const struct sk_buff *skb, - struct iphdr *iph, - struct tcphdr *tcph, - __u32 *sack) -{ - unsigned char buff[(15 * 4) - sizeof(struct tcphdr)]; - unsigned char *ptr; - int length = (tcph->doff*4) - sizeof(struct tcphdr); - __u32 tmp; - - if (!length) - return; - - ptr = skb_header_pointer(skb, - (iph->ihl * 4) + sizeof(struct tcphdr), - length, buff); - BUG_ON(ptr == NULL); - - /* Fast path for timestamp-only option */ - if (length == TCPOLEN_TSTAMP_ALIGNED*4 - && *(__u32 *)ptr == - __constant_ntohl((TCPOPT_NOP << 24) - | (TCPOPT_NOP << 16) - | (TCPOPT_TIMESTAMP << 8) - | TCPOLEN_TIMESTAMP)) - return; - - while (length > 0) { - int opcode=*ptr++; - int opsize, i; - - switch (opcode) { - case TCPOPT_EOL: - return; - case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */ - length--; - continue; - default: - opsize=*ptr++; - if (opsize < 2) /* "silly options" */ - return; - if (opsize > length) - break; /* don't parse partial options */ - - if (opcode == TCPOPT_SACK - && opsize >= (TCPOLEN_SACK_BASE - + TCPOLEN_SACK_PERBLOCK) - && !((opsize - TCPOLEN_SACK_BASE) - % TCPOLEN_SACK_PERBLOCK)) { - for (i = 0; - i < (opsize - TCPOLEN_SACK_BASE); - i += TCPOLEN_SACK_PERBLOCK) { - tmp = ntohl(*((u_int32_t *)(ptr+i)+1)); - - if (after(tmp, *sack)) - *sack = tmp; - } - return; - } - ptr += opsize - 2; - length -= opsize; - } - } -} - -static int tcp_in_window(struct ip_ct_tcp *state, - enum ip_conntrack_dir dir, - unsigned int index, - const struct sk_buff *skb, - struct iphdr *iph, - struct tcphdr *tcph) -{ - struct ip_ct_tcp_state *sender = &state->seen[dir]; - struct ip_ct_tcp_state *receiver = &state->seen[!dir]; - __u32 seq, ack, sack, end, win, swin; - int res; - - /* - * Get the required data from the packet. - */ - seq = ntohl(tcph->seq); - ack = sack = ntohl(tcph->ack_seq); - win = ntohs(tcph->window); - end = segment_seq_plus_len(seq, skb->len, iph, tcph); - - if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM) - tcp_sack(skb, iph, tcph, &sack); - - DEBUGP("tcp_in_window: START\n"); - DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu " - "seq=%u ack=%u sack=%u win=%u end=%u\n", - NIPQUAD(iph->saddr), ntohs(tcph->source), - NIPQUAD(iph->daddr), ntohs(tcph->dest), - seq, ack, sack, win, end); - DEBUGP("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i " - "receiver end=%u maxend=%u maxwin=%u scale=%i\n", - sender->td_end, sender->td_maxend, sender->td_maxwin, - sender->td_scale, - receiver->td_end, receiver->td_maxend, receiver->td_maxwin, - receiver->td_scale); - - if (sender->td_end == 0) { - /* - * Initialize sender data. - */ - if (tcph->syn && tcph->ack) { - /* - * Outgoing SYN-ACK in reply to a SYN. - */ - sender->td_end = - sender->td_maxend = end; - sender->td_maxwin = (win == 0 ? 1 : win); - - tcp_options(skb, iph, tcph, sender); - /* - * RFC 1323: - * Both sides must send the Window Scale option - * to enable window scaling in either direction. - */ - if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE - && receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE)) - sender->td_scale = - receiver->td_scale = 0; - } else { - /* - * We are in the middle of a connection, - * its history is lost for us. - * Let's try to use the data from the packet. - */ - sender->td_end = end; - sender->td_maxwin = (win == 0 ? 1 : win); - sender->td_maxend = end + sender->td_maxwin; - } - } else if (((state->state == TCP_CONNTRACK_SYN_SENT - && dir == IP_CT_DIR_ORIGINAL) - || (state->state == TCP_CONNTRACK_SYN_RECV - && dir == IP_CT_DIR_REPLY)) - && after(end, sender->td_end)) { - /* - * RFC 793: "if a TCP is reinitialized ... then it need - * not wait at all; it must only be sure to use sequence - * numbers larger than those recently used." - */ - sender->td_end = - sender->td_maxend = end; - sender->td_maxwin = (win == 0 ? 1 : win); - - tcp_options(skb, iph, tcph, sender); - } - - if (!(tcph->ack)) { - /* - * If there is no ACK, just pretend it was set and OK. - */ - ack = sack = receiver->td_end; - } else if (((tcp_flag_word(tcph) & (TCP_FLAG_ACK|TCP_FLAG_RST)) == - (TCP_FLAG_ACK|TCP_FLAG_RST)) - && (ack == 0)) { - /* - * Broken TCP stacks, that set ACK in RST packets as well - * with zero ack value. - */ - ack = sack = receiver->td_end; - } - - if (seq == end - && (!tcph->rst - || (seq == 0 && state->state == TCP_CONNTRACK_SYN_SENT))) - /* - * Packets contains no data: we assume it is valid - * and check the ack value only. - * However RST segments are always validated by their - * SEQ number, except when seq == 0 (reset sent answering - * SYN. - */ - seq = end = sender->td_end; - - DEBUGP("tcp_in_window: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu " - "seq=%u ack=%u sack =%u win=%u end=%u\n", - NIPQUAD(iph->saddr), ntohs(tcph->source), - NIPQUAD(iph->daddr), ntohs(tcph->dest), - seq, ack, sack, win, end); - DEBUGP("tcp_in_window: sender end=%u maxend=%u maxwin=%u scale=%i " - "receiver end=%u maxend=%u maxwin=%u scale=%i\n", - sender->td_end, sender->td_maxend, sender->td_maxwin, - sender->td_scale, - receiver->td_end, receiver->td_maxend, receiver->td_maxwin, - receiver->td_scale); - - DEBUGP("tcp_in_window: I=%i II=%i III=%i IV=%i\n", - before(seq, sender->td_maxend + 1), - after(end, sender->td_end - receiver->td_maxwin - 1), - before(sack, receiver->td_end + 1), - after(ack, receiver->td_end - MAXACKWINDOW(sender))); - - if (sender->loose || receiver->loose || - (before(seq, sender->td_maxend + 1) && - after(end, sender->td_end - receiver->td_maxwin - 1) && - before(sack, receiver->td_end + 1) && - after(ack, receiver->td_end - MAXACKWINDOW(sender)))) { - /* - * Take into account window scaling (RFC 1323). - */ - if (!tcph->syn) - win <<= sender->td_scale; - - /* - * Update sender data. - */ - swin = win + (sack - ack); - if (sender->td_maxwin < swin) - sender->td_maxwin = swin; - if (after(end, sender->td_end)) - sender->td_end = end; - /* - * Update receiver data. - */ - if (after(end, sender->td_maxend)) - receiver->td_maxwin += end - sender->td_maxend; - if (after(sack + win, receiver->td_maxend - 1)) { - receiver->td_maxend = sack + win; - if (win == 0) - receiver->td_maxend++; - } - - /* - * Check retransmissions. - */ - if (index == TCP_ACK_SET) { - if (state->last_dir == dir - && state->last_seq == seq - && state->last_ack == ack - && state->last_end == end) - state->retrans++; - else { - state->last_dir = dir; - state->last_seq = seq; - state->last_ack = ack; - state->last_end = end; - state->retrans = 0; - } - } - /* - * Close the window of disabled window tracking :-) - */ - if (sender->loose) - sender->loose--; - - res = 1; - } else { - if (LOG_INVALID(IPPROTO_TCP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, - "ip_ct_tcp: %s ", - before(seq, sender->td_maxend + 1) ? - after(end, sender->td_end - receiver->td_maxwin - 1) ? - before(sack, receiver->td_end + 1) ? - after(ack, receiver->td_end - MAXACKWINDOW(sender)) ? "BUG" - : "ACK is under the lower bound (possible overly delayed ACK)" - : "ACK is over the upper bound (ACKed data not seen yet)" - : "SEQ is under the lower bound (already ACKed data retransmitted)" - : "SEQ is over the upper bound (over the window of the receiver)"); - - res = ip_ct_tcp_be_liberal; - } - - DEBUGP("tcp_in_window: res=%i sender end=%u maxend=%u maxwin=%u " - "receiver end=%u maxend=%u maxwin=%u\n", - res, sender->td_end, sender->td_maxend, sender->td_maxwin, - receiver->td_end, receiver->td_maxend, receiver->td_maxwin); - - return res; -} - -#ifdef CONFIG_IP_NF_NAT_NEEDED -/* Update sender->td_end after NAT successfully mangled the packet */ -void ip_conntrack_tcp_update(struct sk_buff *skb, - struct ip_conntrack *conntrack, - enum ip_conntrack_dir dir) -{ - struct iphdr *iph = skb->nh.iph; - struct tcphdr *tcph = (void *)skb->nh.iph + skb->nh.iph->ihl*4; - __u32 end; -#ifdef DEBUGP_VARS - struct ip_ct_tcp_state *sender = &conntrack->proto.tcp.seen[dir]; - struct ip_ct_tcp_state *receiver = &conntrack->proto.tcp.seen[!dir]; -#endif - - end = segment_seq_plus_len(ntohl(tcph->seq), skb->len, iph, tcph); - - write_lock_bh(&tcp_lock); - /* - * We have to worry for the ack in the reply packet only... - */ - if (after(end, conntrack->proto.tcp.seen[dir].td_end)) - conntrack->proto.tcp.seen[dir].td_end = end; - conntrack->proto.tcp.last_end = end; - write_unlock_bh(&tcp_lock); - DEBUGP("tcp_update: sender end=%u maxend=%u maxwin=%u scale=%i " - "receiver end=%u maxend=%u maxwin=%u scale=%i\n", - sender->td_end, sender->td_maxend, sender->td_maxwin, - sender->td_scale, - receiver->td_end, receiver->td_maxend, receiver->td_maxwin, - receiver->td_scale); -} - -#endif - -#define TH_FIN 0x01 -#define TH_SYN 0x02 -#define TH_RST 0x04 -#define TH_PUSH 0x08 -#define TH_ACK 0x10 -#define TH_URG 0x20 -#define TH_ECE 0x40 -#define TH_CWR 0x80 - -/* table of valid flag combinations - ECE and CWR are always valid */ -static const u8 tcp_valid_flags[(TH_FIN|TH_SYN|TH_RST|TH_PUSH|TH_ACK|TH_URG) + 1] = -{ - [TH_SYN] = 1, - [TH_SYN|TH_ACK] = 1, - [TH_SYN|TH_PUSH] = 1, - [TH_SYN|TH_ACK|TH_PUSH] = 1, - [TH_RST] = 1, - [TH_RST|TH_ACK] = 1, - [TH_RST|TH_ACK|TH_PUSH] = 1, - [TH_FIN|TH_ACK] = 1, - [TH_ACK] = 1, - [TH_ACK|TH_PUSH] = 1, - [TH_ACK|TH_URG] = 1, - [TH_ACK|TH_URG|TH_PUSH] = 1, - [TH_FIN|TH_ACK|TH_PUSH] = 1, - [TH_FIN|TH_ACK|TH_URG] = 1, - [TH_FIN|TH_ACK|TH_URG|TH_PUSH] = 1, -}; - -/* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */ -static int tcp_error(struct sk_buff *skb, - enum ip_conntrack_info *ctinfo, - unsigned int hooknum) -{ - struct iphdr *iph = skb->nh.iph; - struct tcphdr _tcph, *th; - unsigned int tcplen = skb->len - iph->ihl * 4; - u_int8_t tcpflags; - - /* Smaller that minimal TCP header? */ - th = skb_header_pointer(skb, iph->ihl * 4, - sizeof(_tcph), &_tcph); - if (th == NULL) { - if (LOG_INVALID(IPPROTO_TCP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, - "ip_ct_tcp: short packet "); - return -NF_ACCEPT; - } - - /* Not whole TCP header or malformed packet */ - if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) { - if (LOG_INVALID(IPPROTO_TCP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, - "ip_ct_tcp: truncated/malformed packet "); - return -NF_ACCEPT; - } - - /* Checksum invalid? Ignore. - * We skip checking packets on the outgoing path - * because the semantic of CHECKSUM_HW is different there - * and moreover root might send raw packets. - */ - /* FIXME: Source route IP option packets --RR */ - if (hooknum == NF_IP_PRE_ROUTING - && skb->ip_summed != CHECKSUM_UNNECESSARY - && csum_tcpudp_magic(iph->saddr, iph->daddr, tcplen, IPPROTO_TCP, - skb->ip_summed == CHECKSUM_HW ? skb->csum - : skb_checksum(skb, iph->ihl*4, tcplen, 0))) { - if (LOG_INVALID(IPPROTO_TCP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, - "ip_ct_tcp: bad TCP checksum "); - return -NF_ACCEPT; - } - - /* Check TCP flags. */ - tcpflags = (((u_int8_t *)th)[13] & ~(TH_ECE|TH_CWR)); - if (!tcp_valid_flags[tcpflags]) { - if (LOG_INVALID(IPPROTO_TCP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, - "ip_ct_tcp: invalid TCP flag combination "); - return -NF_ACCEPT; - } - - return NF_ACCEPT; -} - -/* Returns verdict for packet, or -1 for invalid. */ -static int tcp_packet(struct ip_conntrack *conntrack, - const struct sk_buff *skb, - enum ip_conntrack_info ctinfo) -{ - enum tcp_conntrack new_state, old_state; - enum ip_conntrack_dir dir; - struct iphdr *iph = skb->nh.iph; - struct tcphdr *th, _tcph; - unsigned long timeout; - unsigned int index; - - th = skb_header_pointer(skb, iph->ihl * 4, - sizeof(_tcph), &_tcph); - BUG_ON(th == NULL); - - write_lock_bh(&tcp_lock); - old_state = conntrack->proto.tcp.state; - dir = CTINFO2DIR(ctinfo); - index = get_conntrack_index(th); - new_state = tcp_conntracks[dir][index][old_state]; - - switch (new_state) { - case TCP_CONNTRACK_IGNORE: - /* Ignored packets: - * - * a) SYN in ORIGINAL - * b) SYN/ACK in REPLY - * c) ACK in reply direction after initial SYN in original. - */ - if (index == TCP_SYNACK_SET - && conntrack->proto.tcp.last_index == TCP_SYN_SET - && conntrack->proto.tcp.last_dir != dir - && ntohl(th->ack_seq) == - conntrack->proto.tcp.last_end) { - /* This SYN/ACK acknowledges a SYN that we earlier - * ignored as invalid. This means that the client and - * the server are both in sync, while the firewall is - * not. We kill this session and block the SYN/ACK so - * that the client cannot but retransmit its SYN and - * thus initiate a clean new session. - */ - write_unlock_bh(&tcp_lock); - if (LOG_INVALID(IPPROTO_TCP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, - NULL, "ip_ct_tcp: " - "killing out of sync session "); - if (del_timer(&conntrack->timeout)) - conntrack->timeout.function((unsigned long) - conntrack); - return -NF_DROP; - } - conntrack->proto.tcp.last_index = index; - conntrack->proto.tcp.last_dir = dir; - conntrack->proto.tcp.last_seq = ntohl(th->seq); - conntrack->proto.tcp.last_end = - segment_seq_plus_len(ntohl(th->seq), skb->len, iph, th); - - write_unlock_bh(&tcp_lock); - if (LOG_INVALID(IPPROTO_TCP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, - "ip_ct_tcp: invalid packet ignored "); - return NF_ACCEPT; - case TCP_CONNTRACK_MAX: - /* Invalid packet */ - DEBUGP("ip_ct_tcp: Invalid dir=%i index=%u ostate=%u\n", - dir, get_conntrack_index(th), - old_state); - write_unlock_bh(&tcp_lock); - if (LOG_INVALID(IPPROTO_TCP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, - "ip_ct_tcp: invalid state "); - return -NF_ACCEPT; - case TCP_CONNTRACK_SYN_SENT: - if (old_state < TCP_CONNTRACK_TIME_WAIT) - break; - if ((conntrack->proto.tcp.seen[dir].flags & - IP_CT_TCP_FLAG_CLOSE_INIT) - || after(ntohl(th->seq), - conntrack->proto.tcp.seen[dir].td_end)) { - /* Attempt to reopen a closed connection. - * Delete this connection and look up again. */ - write_unlock_bh(&tcp_lock); - if (del_timer(&conntrack->timeout)) - conntrack->timeout.function((unsigned long) - conntrack); - return -NF_REPEAT; - } else { - write_unlock_bh(&tcp_lock); - if (LOG_INVALID(IPPROTO_TCP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, - NULL, "ip_ct_tcp: invalid SYN"); - return -NF_ACCEPT; - } - case TCP_CONNTRACK_CLOSE: - if (index == TCP_RST_SET - && ((test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status) - && conntrack->proto.tcp.last_index == TCP_SYN_SET) - || (!test_bit(IPS_ASSURED_BIT, &conntrack->status) - && conntrack->proto.tcp.last_index == TCP_ACK_SET)) - && ntohl(th->ack_seq) == conntrack->proto.tcp.last_end) { - /* RST sent to invalid SYN or ACK we had let through - * at a) and c) above: - * - * a) SYN was in window then - * c) we hold a half-open connection. - * - * Delete our connection entry. - * We skip window checking, because packet might ACK - * segments we ignored. */ - goto in_window; - } - /* Just fall through */ - default: - /* Keep compilers happy. */ - break; - } - - if (!tcp_in_window(&conntrack->proto.tcp, dir, index, - skb, iph, th)) { - write_unlock_bh(&tcp_lock); - return -NF_ACCEPT; - } - in_window: - /* From now on we have got in-window packets */ - conntrack->proto.tcp.last_index = index; - - DEBUGP("tcp_conntracks: src=%u.%u.%u.%u:%hu dst=%u.%u.%u.%u:%hu " - "syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n", - NIPQUAD(iph->saddr), ntohs(th->source), - NIPQUAD(iph->daddr), ntohs(th->dest), - (th->syn ? 1 : 0), (th->ack ? 1 : 0), - (th->fin ? 1 : 0), (th->rst ? 1 : 0), - old_state, new_state); - - conntrack->proto.tcp.state = new_state; - if (old_state != new_state - && (new_state == TCP_CONNTRACK_FIN_WAIT - || new_state == TCP_CONNTRACK_CLOSE)) - conntrack->proto.tcp.seen[dir].flags |= IP_CT_TCP_FLAG_CLOSE_INIT; - timeout = conntrack->proto.tcp.retrans >= ip_ct_tcp_max_retrans - && *tcp_timeouts[new_state] > ip_ct_tcp_timeout_max_retrans - ? ip_ct_tcp_timeout_max_retrans : *tcp_timeouts[new_state]; - write_unlock_bh(&tcp_lock); - - ip_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb); - if (new_state != old_state) - ip_conntrack_event_cache(IPCT_PROTOINFO, skb); - - if (!test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) { - /* If only reply is a RST, we can consider ourselves not to - have an established connection: this is a fairly common - problem case, so we can delete the conntrack - immediately. --RR */ - if (th->rst) { - if (del_timer(&conntrack->timeout)) - conntrack->timeout.function((unsigned long) - conntrack); - return NF_ACCEPT; - } - } else if (!test_bit(IPS_ASSURED_BIT, &conntrack->status) - && (old_state == TCP_CONNTRACK_SYN_RECV - || old_state == TCP_CONNTRACK_ESTABLISHED) - && new_state == TCP_CONNTRACK_ESTABLISHED) { - /* Set ASSURED if we see see valid ack in ESTABLISHED - after SYN_RECV or a valid answer for a picked up - connection. */ - set_bit(IPS_ASSURED_BIT, &conntrack->status); - ip_conntrack_event_cache(IPCT_STATUS, skb); - } - ip_ct_refresh_acct(conntrack, ctinfo, skb, timeout); - - return NF_ACCEPT; -} - -/* Called when a new connection for this protocol found. */ -static int tcp_new(struct ip_conntrack *conntrack, - const struct sk_buff *skb) -{ - enum tcp_conntrack new_state; - struct iphdr *iph = skb->nh.iph; - struct tcphdr *th, _tcph; -#ifdef DEBUGP_VARS - struct ip_ct_tcp_state *sender = &conntrack->proto.tcp.seen[0]; - struct ip_ct_tcp_state *receiver = &conntrack->proto.tcp.seen[1]; -#endif - - th = skb_header_pointer(skb, iph->ihl * 4, - sizeof(_tcph), &_tcph); - BUG_ON(th == NULL); - - /* Don't need lock here: this conntrack not in circulation yet */ - new_state - = tcp_conntracks[0][get_conntrack_index(th)] - [TCP_CONNTRACK_NONE]; - - /* Invalid: delete conntrack */ - if (new_state >= TCP_CONNTRACK_MAX) { - DEBUGP("ip_ct_tcp: invalid new deleting.\n"); - return 0; - } - - if (new_state == TCP_CONNTRACK_SYN_SENT) { - /* SYN packet */ - conntrack->proto.tcp.seen[0].td_end = - segment_seq_plus_len(ntohl(th->seq), skb->len, - iph, th); - conntrack->proto.tcp.seen[0].td_maxwin = ntohs(th->window); - if (conntrack->proto.tcp.seen[0].td_maxwin == 0) - conntrack->proto.tcp.seen[0].td_maxwin = 1; - conntrack->proto.tcp.seen[0].td_maxend = - conntrack->proto.tcp.seen[0].td_end; - - tcp_options(skb, iph, th, &conntrack->proto.tcp.seen[0]); - conntrack->proto.tcp.seen[1].flags = 0; - conntrack->proto.tcp.seen[0].loose = - conntrack->proto.tcp.seen[1].loose = 0; - } else if (ip_ct_tcp_loose == 0) { - /* Don't try to pick up connections. */ - return 0; - } else { - /* - * We are in the middle of a connection, - * its history is lost for us. - * Let's try to use the data from the packet. - */ - conntrack->proto.tcp.seen[0].td_end = - segment_seq_plus_len(ntohl(th->seq), skb->len, - iph, th); - conntrack->proto.tcp.seen[0].td_maxwin = ntohs(th->window); - if (conntrack->proto.tcp.seen[0].td_maxwin == 0) - conntrack->proto.tcp.seen[0].td_maxwin = 1; - conntrack->proto.tcp.seen[0].td_maxend = - conntrack->proto.tcp.seen[0].td_end + - conntrack->proto.tcp.seen[0].td_maxwin; - conntrack->proto.tcp.seen[0].td_scale = 0; - - /* We assume SACK. Should we assume window scaling too? */ - conntrack->proto.tcp.seen[0].flags = - conntrack->proto.tcp.seen[1].flags = IP_CT_TCP_FLAG_SACK_PERM; - conntrack->proto.tcp.seen[0].loose = - conntrack->proto.tcp.seen[1].loose = ip_ct_tcp_loose; - } - - conntrack->proto.tcp.seen[1].td_end = 0; - conntrack->proto.tcp.seen[1].td_maxend = 0; - conntrack->proto.tcp.seen[1].td_maxwin = 1; - conntrack->proto.tcp.seen[1].td_scale = 0; - - /* tcp_packet will set them */ - conntrack->proto.tcp.state = TCP_CONNTRACK_NONE; - conntrack->proto.tcp.last_index = TCP_NONE_SET; - - DEBUGP("tcp_new: sender end=%u maxend=%u maxwin=%u scale=%i " - "receiver end=%u maxend=%u maxwin=%u scale=%i\n", - sender->td_end, sender->td_maxend, sender->td_maxwin, - sender->td_scale, - receiver->td_end, receiver->td_maxend, receiver->td_maxwin, - receiver->td_scale); - return 1; -} - -struct ip_conntrack_protocol ip_conntrack_protocol_tcp = -{ - .proto = IPPROTO_TCP, - .name = "tcp", - .pkt_to_tuple = tcp_pkt_to_tuple, - .invert_tuple = tcp_invert_tuple, - .print_tuple = tcp_print_tuple, - .print_conntrack = tcp_print_conntrack, - .packet = tcp_packet, - .new = tcp_new, - .error = tcp_error, -#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ - defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) - .to_nfattr = tcp_to_nfattr, - .from_nfattr = nfattr_to_tcp, - .tuple_to_nfattr = ip_ct_port_tuple_to_nfattr, - .nfattr_to_tuple = ip_ct_port_nfattr_to_tuple, -#endif -}; diff --git a/net/ipv4/netfilter/ip_conntrack_proto_udp.c b/net/ipv4/netfilter/ip_conntrack_proto_udp.c deleted file mode 100644 index 55b7d3210ad..00000000000 --- a/net/ipv4/netfilter/ip_conntrack_proto_udp.c +++ /dev/null @@ -1,153 +0,0 @@ -/* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/types.h> -#include <linux/sched.h> -#include <linux/timer.h> -#include <linux/netfilter.h> -#include <linux/in.h> -#include <linux/ip.h> -#include <linux/udp.h> -#include <linux/seq_file.h> -#include <net/checksum.h> -#include <linux/netfilter_ipv4.h> -#include <linux/netfilter_ipv4/ip_conntrack_protocol.h> - -unsigned int ip_ct_udp_timeout = 30*HZ; -unsigned int ip_ct_udp_timeout_stream = 180*HZ; - -static int udp_pkt_to_tuple(const struct sk_buff *skb, - unsigned int dataoff, - struct ip_conntrack_tuple *tuple) -{ - struct udphdr _hdr, *hp; - - /* Actually only need first 8 bytes. */ - hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); - if (hp == NULL) - return 0; - - tuple->src.u.udp.port = hp->source; - tuple->dst.u.udp.port = hp->dest; - - return 1; -} - -static int udp_invert_tuple(struct ip_conntrack_tuple *tuple, - const struct ip_conntrack_tuple *orig) -{ - tuple->src.u.udp.port = orig->dst.u.udp.port; - tuple->dst.u.udp.port = orig->src.u.udp.port; - return 1; -} - -/* Print out the per-protocol part of the tuple. */ -static int udp_print_tuple(struct seq_file *s, - const struct ip_conntrack_tuple *tuple) -{ - return seq_printf(s, "sport=%hu dport=%hu ", - ntohs(tuple->src.u.udp.port), - ntohs(tuple->dst.u.udp.port)); -} - -/* Print out the private part of the conntrack. */ -static int udp_print_conntrack(struct seq_file *s, - const struct ip_conntrack *conntrack) -{ - return 0; -} - -/* Returns verdict for packet, and may modify conntracktype */ -static int udp_packet(struct ip_conntrack *conntrack, - const struct sk_buff *skb, - enum ip_conntrack_info ctinfo) -{ - /* If we've seen traffic both ways, this is some kind of UDP - stream. Extend timeout. */ - if (test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) { - ip_ct_refresh_acct(conntrack, ctinfo, skb, - ip_ct_udp_timeout_stream); - /* Also, more likely to be important, and not a probe */ - if (!test_and_set_bit(IPS_ASSURED_BIT, &conntrack->status)) - ip_conntrack_event_cache(IPCT_STATUS, skb); - } else - ip_ct_refresh_acct(conntrack, ctinfo, skb, ip_ct_udp_timeout); - - return NF_ACCEPT; -} - -/* Called when a new connection for this protocol found. */ -static int udp_new(struct ip_conntrack *conntrack, const struct sk_buff *skb) -{ - return 1; -} - -static int udp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo, - unsigned int hooknum) -{ - struct iphdr *iph = skb->nh.iph; - unsigned int udplen = skb->len - iph->ihl * 4; - struct udphdr _hdr, *hdr; - - /* Header is too small? */ - hdr = skb_header_pointer(skb, iph->ihl*4, sizeof(_hdr), &_hdr); - if (hdr == NULL) { - if (LOG_INVALID(IPPROTO_UDP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, - "ip_ct_udp: short packet "); - return -NF_ACCEPT; - } - - /* Truncated/malformed packets */ - if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) { - if (LOG_INVALID(IPPROTO_UDP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, - "ip_ct_udp: truncated/malformed packet "); - return -NF_ACCEPT; - } - - /* Packet with no checksum */ - if (!hdr->check) - return NF_ACCEPT; - - /* Checksum invalid? Ignore. - * We skip checking packets on the outgoing path - * because the semantic of CHECKSUM_HW is different there - * and moreover root might send raw packets. - * FIXME: Source route IP option packets --RR */ - if (hooknum == NF_IP_PRE_ROUTING - && skb->ip_summed != CHECKSUM_UNNECESSARY - && csum_tcpudp_magic(iph->saddr, iph->daddr, udplen, IPPROTO_UDP, - skb->ip_summed == CHECKSUM_HW ? skb->csum - : skb_checksum(skb, iph->ihl*4, udplen, 0))) { - if (LOG_INVALID(IPPROTO_UDP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, - "ip_ct_udp: bad UDP checksum "); - return -NF_ACCEPT; - } - - return NF_ACCEPT; -} - -struct ip_conntrack_protocol ip_conntrack_protocol_udp = -{ - .proto = IPPROTO_UDP, - .name = "udp", - .pkt_to_tuple = udp_pkt_to_tuple, - .invert_tuple = udp_invert_tuple, - .print_tuple = udp_print_tuple, - .print_conntrack = udp_print_conntrack, - .packet = udp_packet, - .new = udp_new, - .error = udp_error, -#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ - defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) - .tuple_to_nfattr = ip_ct_port_tuple_to_nfattr, - .nfattr_to_tuple = ip_ct_port_nfattr_to_tuple, -#endif -}; diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c deleted file mode 100644 index 833fcb4be5e..00000000000 --- a/net/ipv4/netfilter/ip_conntrack_standalone.c +++ /dev/null @@ -1,1011 +0,0 @@ -/* This file contains all the functions required for the standalone - ip_conntrack module. - - These are not required by the compatibility layer. -*/ - -/* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2005 Netfilter Core Team <coreteam@netfilter.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/config.h> -#include <linux/types.h> -#include <linux/ip.h> -#include <linux/netfilter.h> -#include <linux/netfilter_ipv4.h> -#include <linux/module.h> -#include <linux/skbuff.h> -#include <linux/proc_fs.h> -#include <linux/seq_file.h> -#include <linux/percpu.h> -#ifdef CONFIG_SYSCTL -#include <linux/sysctl.h> -#endif -#include <net/checksum.h> -#include <net/ip.h> -#include <net/route.h> - -#define ASSERT_READ_LOCK(x) -#define ASSERT_WRITE_LOCK(x) - -#include <linux/netfilter_ipv4/ip_conntrack.h> -#include <linux/netfilter_ipv4/ip_conntrack_protocol.h> -#include <linux/netfilter_ipv4/ip_conntrack_core.h> -#include <linux/netfilter_ipv4/ip_conntrack_helper.h> -#include <linux/netfilter_ipv4/listhelp.h> - -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - -MODULE_LICENSE("GPL"); - -extern atomic_t ip_conntrack_count; -DECLARE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat); - -static int kill_proto(struct ip_conntrack *i, void *data) -{ - return (i->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum == - *((u_int8_t *) data)); -} - -#ifdef CONFIG_PROC_FS -static int -print_tuple(struct seq_file *s, const struct ip_conntrack_tuple *tuple, - struct ip_conntrack_protocol *proto) -{ - seq_printf(s, "src=%u.%u.%u.%u dst=%u.%u.%u.%u ", - NIPQUAD(tuple->src.ip), NIPQUAD(tuple->dst.ip)); - return proto->print_tuple(s, tuple); -} - -#ifdef CONFIG_IP_NF_CT_ACCT -static unsigned int -seq_print_counters(struct seq_file *s, - const struct ip_conntrack_counter *counter) -{ - return seq_printf(s, "packets=%llu bytes=%llu ", - (unsigned long long)counter->packets, - (unsigned long long)counter->bytes); -} -#else -#define seq_print_counters(x, y) 0 -#endif - -struct ct_iter_state { - unsigned int bucket; -}; - -static struct list_head *ct_get_first(struct seq_file *seq) -{ - struct ct_iter_state *st = seq->private; - - for (st->bucket = 0; - st->bucket < ip_conntrack_htable_size; - st->bucket++) { - if (!list_empty(&ip_conntrack_hash[st->bucket])) - return ip_conntrack_hash[st->bucket].next; - } - return NULL; -} - -static struct list_head *ct_get_next(struct seq_file *seq, struct list_head *head) -{ - struct ct_iter_state *st = seq->private; - - head = head->next; - while (head == &ip_conntrack_hash[st->bucket]) { - if (++st->bucket >= ip_conntrack_htable_size) - return NULL; - head = ip_conntrack_hash[st->bucket].next; - } - return head; -} - -static struct list_head *ct_get_idx(struct seq_file *seq, loff_t pos) -{ - struct list_head *head = ct_get_first(seq); - - if (head) - while (pos && (head = ct_get_next(seq, head))) - pos--; - return pos ? NULL : head; -} - -static void *ct_seq_start(struct seq_file *seq, loff_t *pos) -{ - read_lock_bh(&ip_conntrack_lock); - return ct_get_idx(seq, *pos); -} - -static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos) -{ - (*pos)++; - return ct_get_next(s, v); -} - -static void ct_seq_stop(struct seq_file *s, void *v) -{ - read_unlock_bh(&ip_conntrack_lock); -} - -static int ct_seq_show(struct seq_file *s, void *v) -{ - const struct ip_conntrack_tuple_hash *hash = v; - const struct ip_conntrack *conntrack = tuplehash_to_ctrack(hash); - struct ip_conntrack_protocol *proto; - - ASSERT_READ_LOCK(&ip_conntrack_lock); - IP_NF_ASSERT(conntrack); - - /* we only want to print DIR_ORIGINAL */ - if (DIRECTION(hash)) - return 0; - - proto = __ip_conntrack_proto_find(conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum); - IP_NF_ASSERT(proto); - - if (seq_printf(s, "%-8s %u %ld ", - proto->name, - conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum, - timer_pending(&conntrack->timeout) - ? (long)(conntrack->timeout.expires - jiffies)/HZ - : 0) != 0) - return -ENOSPC; - - if (proto->print_conntrack(s, conntrack)) - return -ENOSPC; - - if (print_tuple(s, &conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple, - proto)) - return -ENOSPC; - - if (seq_print_counters(s, &conntrack->counters[IP_CT_DIR_ORIGINAL])) - return -ENOSPC; - - if (!(test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status))) - if (seq_printf(s, "[UNREPLIED] ")) - return -ENOSPC; - - if (print_tuple(s, &conntrack->tuplehash[IP_CT_DIR_REPLY].tuple, - proto)) - return -ENOSPC; - - if (seq_print_counters(s, &conntrack->counters[IP_CT_DIR_REPLY])) - return -ENOSPC; - - if (test_bit(IPS_ASSURED_BIT, &conntrack->status)) - if (seq_printf(s, "[ASSURED] ")) - return -ENOSPC; - -#if defined(CONFIG_IP_NF_CONNTRACK_MARK) - if (seq_printf(s, "mark=%u ", conntrack->mark)) - return -ENOSPC; -#endif - - if (seq_printf(s, "use=%u\n", atomic_read(&conntrack->ct_general.use))) - return -ENOSPC; - - return 0; -} - -static struct seq_operations ct_seq_ops = { - .start = ct_seq_start, - .next = ct_seq_next, - .stop = ct_seq_stop, - .show = ct_seq_show -}; - -static int ct_open(struct inode *inode, struct file *file) -{ - struct seq_file *seq; - struct ct_iter_state *st; - int ret; - - st = kmalloc(sizeof(struct ct_iter_state), GFP_KERNEL); - if (st == NULL) - return -ENOMEM; - ret = seq_open(file, &ct_seq_ops); - if (ret) - goto out_free; - seq = file->private_data; - seq->private = st; - memset(st, 0, sizeof(struct ct_iter_state)); - return ret; -out_free: - kfree(st); - return ret; -} - -static struct file_operations ct_file_ops = { - .owner = THIS_MODULE, - .open = ct_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_private, -}; - -/* expects */ -static void *exp_seq_start(struct seq_file *s, loff_t *pos) -{ - struct list_head *e = &ip_conntrack_expect_list; - loff_t i; - - /* strange seq_file api calls stop even if we fail, - * thus we need to grab lock since stop unlocks */ - read_lock_bh(&ip_conntrack_lock); - - if (list_empty(e)) - return NULL; - - for (i = 0; i <= *pos; i++) { - e = e->next; - if (e == &ip_conntrack_expect_list) - return NULL; - } - return e; -} - -static void *exp_seq_next(struct seq_file *s, void *v, loff_t *pos) -{ - struct list_head *e = v; - - ++*pos; - e = e->next; - - if (e == &ip_conntrack_expect_list) - return NULL; - - return e; -} - -static void exp_seq_stop(struct seq_file *s, void *v) -{ - read_unlock_bh(&ip_conntrack_lock); -} - -static int exp_seq_show(struct seq_file *s, void *v) -{ - struct ip_conntrack_expect *expect = v; - - if (expect->timeout.function) - seq_printf(s, "%ld ", timer_pending(&expect->timeout) - ? (long)(expect->timeout.expires - jiffies)/HZ : 0); - else - seq_printf(s, "- "); - - seq_printf(s, "proto=%u ", expect->tuple.dst.protonum); - - print_tuple(s, &expect->tuple, - __ip_conntrack_proto_find(expect->tuple.dst.protonum)); - return seq_putc(s, '\n'); -} - -static struct seq_operations exp_seq_ops = { - .start = exp_seq_start, - .next = exp_seq_next, - .stop = exp_seq_stop, - .show = exp_seq_show -}; - -static int exp_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &exp_seq_ops); -} - -static struct file_operations exp_file_ops = { - .owner = THIS_MODULE, - .open = exp_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release -}; - -static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos) -{ - int cpu; - - if (*pos == 0) - return SEQ_START_TOKEN; - - for (cpu = *pos-1; cpu < NR_CPUS; ++cpu) { - if (!cpu_possible(cpu)) - continue; - *pos = cpu+1; - return &per_cpu(ip_conntrack_stat, cpu); - } - - return NULL; -} - -static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - int cpu; - - for (cpu = *pos; cpu < NR_CPUS; ++cpu) { - if (!cpu_possible(cpu)) - continue; - *pos = cpu+1; - return &per_cpu(ip_conntrack_stat, cpu); - } - - return NULL; -} - -static void ct_cpu_seq_stop(struct seq_file *seq, void *v) -{ -} - -static int ct_cpu_seq_show(struct seq_file *seq, void *v) -{ - unsigned int nr_conntracks = atomic_read(&ip_conntrack_count); - struct ip_conntrack_stat *st = v; - - if (v == SEQ_START_TOKEN) { - seq_printf(seq, "entries searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete\n"); - return 0; - } - - seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x " - "%08x %08x %08x %08x %08x %08x %08x %08x \n", - nr_conntracks, - st->searched, - st->found, - st->new, - st->invalid, - st->ignore, - st->delete, - st->delete_list, - st->insert, - st->insert_failed, - st->drop, - st->early_drop, - st->error, - - st->expect_new, - st->expect_create, - st->expect_delete - ); - return 0; -} - -static struct seq_operations ct_cpu_seq_ops = { - .start = ct_cpu_seq_start, - .next = ct_cpu_seq_next, - .stop = ct_cpu_seq_stop, - .show = ct_cpu_seq_show, -}; - -static int ct_cpu_seq_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &ct_cpu_seq_ops); -} - -static struct file_operations ct_cpu_seq_fops = { - .owner = THIS_MODULE, - .open = ct_cpu_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_private, -}; -#endif - -static unsigned int ip_confirm(unsigned int hooknum, - struct sk_buff **pskb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - /* We've seen it coming out the other side: confirm it */ - return ip_conntrack_confirm(pskb); -} - -static unsigned int ip_conntrack_help(unsigned int hooknum, - struct sk_buff **pskb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - struct ip_conntrack *ct; - enum ip_conntrack_info ctinfo; - - /* This is where we call the helper: as the packet goes out. */ - ct = ip_conntrack_get(*pskb, &ctinfo); - if (ct && ct->helper) { - unsigned int ret; - ret = ct->helper->help(pskb, ct, ctinfo); - if (ret != NF_ACCEPT) - return ret; - } - return NF_ACCEPT; -} - -static unsigned int ip_conntrack_defrag(unsigned int hooknum, - struct sk_buff **pskb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ -#if !defined(CONFIG_IP_NF_NAT) && !defined(CONFIG_IP_NF_NAT_MODULE) - /* Previously seen (loopback)? Ignore. Do this before - fragment check. */ - if ((*pskb)->nfct) - return NF_ACCEPT; -#endif - - /* Gather fragments. */ - if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) { - *pskb = ip_ct_gather_frags(*pskb, - hooknum == NF_IP_PRE_ROUTING ? - IP_DEFRAG_CONNTRACK_IN : - IP_DEFRAG_CONNTRACK_OUT); - if (!*pskb) - return NF_STOLEN; - } - return NF_ACCEPT; -} - -static unsigned int ip_conntrack_local(unsigned int hooknum, - struct sk_buff **pskb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - /* root is playing with raw sockets. */ - if ((*pskb)->len < sizeof(struct iphdr) - || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) { - if (net_ratelimit()) - printk("ipt_hook: happy cracking.\n"); - return NF_ACCEPT; - } - return ip_conntrack_in(hooknum, pskb, in, out, okfn); -} - -/* Connection tracking may drop packets, but never alters them, so - make it the first hook. */ -static struct nf_hook_ops ip_conntrack_defrag_ops = { - .hook = ip_conntrack_defrag, - .owner = THIS_MODULE, - .pf = PF_INET, - .hooknum = NF_IP_PRE_ROUTING, - .priority = NF_IP_PRI_CONNTRACK_DEFRAG, -}; - -static struct nf_hook_ops ip_conntrack_in_ops = { - .hook = ip_conntrack_in, - .owner = THIS_MODULE, - .pf = PF_INET, - .hooknum = NF_IP_PRE_ROUTING, - .priority = NF_IP_PRI_CONNTRACK, -}; - -static struct nf_hook_ops ip_conntrack_defrag_local_out_ops = { - .hook = ip_conntrack_defrag, - .owner = THIS_MODULE, - .pf = PF_INET, - .hooknum = NF_IP_LOCAL_OUT, - .priority = NF_IP_PRI_CONNTRACK_DEFRAG, -}; - -static struct nf_hook_ops ip_conntrack_local_out_ops = { - .hook = ip_conntrack_local, - .owner = THIS_MODULE, - .pf = PF_INET, - .hooknum = NF_IP_LOCAL_OUT, - .priority = NF_IP_PRI_CONNTRACK, -}; - -/* helpers */ -static struct nf_hook_ops ip_conntrack_helper_out_ops = { - .hook = ip_conntrack_help, - .owner = THIS_MODULE, - .pf = PF_INET, - .hooknum = NF_IP_POST_ROUTING, - .priority = NF_IP_PRI_CONNTRACK_HELPER, -}; - -static struct nf_hook_ops ip_conntrack_helper_in_ops = { - .hook = ip_conntrack_help, - .owner = THIS_MODULE, - .pf = PF_INET, - .hooknum = NF_IP_LOCAL_IN, - .priority = NF_IP_PRI_CONNTRACK_HELPER, -}; - -/* Refragmenter; last chance. */ -static struct nf_hook_ops ip_conntrack_out_ops = { - .hook = ip_confirm, - .owner = THIS_MODULE, - .pf = PF_INET, - .hooknum = NF_IP_POST_ROUTING, - .priority = NF_IP_PRI_CONNTRACK_CONFIRM, -}; - -static struct nf_hook_ops ip_conntrack_local_in_ops = { - .hook = ip_confirm, - .owner = THIS_MODULE, - .pf = PF_INET, - .hooknum = NF_IP_LOCAL_IN, - .priority = NF_IP_PRI_CONNTRACK_CONFIRM, -}; - -/* Sysctl support */ - -#ifdef CONFIG_SYSCTL - -/* From ip_conntrack_core.c */ -extern int ip_conntrack_max; -extern unsigned int ip_conntrack_htable_size; - -/* From ip_conntrack_proto_tcp.c */ -extern unsigned int ip_ct_tcp_timeout_syn_sent; -extern unsigned int ip_ct_tcp_timeout_syn_recv; -extern unsigned int ip_ct_tcp_timeout_established; -extern unsigned int ip_ct_tcp_timeout_fin_wait; -extern unsigned int ip_ct_tcp_timeout_close_wait; -extern unsigned int ip_ct_tcp_timeout_last_ack; -extern unsigned int ip_ct_tcp_timeout_time_wait; -extern unsigned int ip_ct_tcp_timeout_close; -extern unsigned int ip_ct_tcp_timeout_max_retrans; -extern int ip_ct_tcp_loose; -extern int ip_ct_tcp_be_liberal; -extern int ip_ct_tcp_max_retrans; - -/* From ip_conntrack_proto_udp.c */ -extern unsigned int ip_ct_udp_timeout; -extern unsigned int ip_ct_udp_timeout_stream; - -/* From ip_conntrack_proto_icmp.c */ -extern unsigned int ip_ct_icmp_timeout; - -/* From ip_conntrack_proto_icmp.c */ -extern unsigned int ip_ct_generic_timeout; - -/* Log invalid packets of a given protocol */ -static int log_invalid_proto_min = 0; -static int log_invalid_proto_max = 255; - -static struct ctl_table_header *ip_ct_sysctl_header; - -static ctl_table ip_ct_sysctl_table[] = { - { - .ctl_name = NET_IPV4_NF_CONNTRACK_MAX, - .procname = "ip_conntrack_max", - .data = &ip_conntrack_max, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = NET_IPV4_NF_CONNTRACK_COUNT, - .procname = "ip_conntrack_count", - .data = &ip_conntrack_count, - .maxlen = sizeof(int), - .mode = 0444, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = NET_IPV4_NF_CONNTRACK_BUCKETS, - .procname = "ip_conntrack_buckets", - .data = &ip_conntrack_htable_size, - .maxlen = sizeof(unsigned int), - .mode = 0444, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_SENT, - .procname = "ip_conntrack_tcp_timeout_syn_sent", - .data = &ip_ct_tcp_timeout_syn_sent, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - }, - { - .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_SYN_RECV, - .procname = "ip_conntrack_tcp_timeout_syn_recv", - .data = &ip_ct_tcp_timeout_syn_recv, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - }, - { - .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_ESTABLISHED, - .procname = "ip_conntrack_tcp_timeout_established", - .data = &ip_ct_tcp_timeout_established, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - }, - { - .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_FIN_WAIT, - .procname = "ip_conntrack_tcp_timeout_fin_wait", - .data = &ip_ct_tcp_timeout_fin_wait, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - }, - { - .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_CLOSE_WAIT, - .procname = "ip_conntrack_tcp_timeout_close_wait", - .data = &ip_ct_tcp_timeout_close_wait, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - }, - { - .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_LAST_ACK, - .procname = "ip_conntrack_tcp_timeout_last_ack", - .data = &ip_ct_tcp_timeout_last_ack, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - }, - { - .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_TIME_WAIT, - .procname = "ip_conntrack_tcp_timeout_time_wait", - .data = &ip_ct_tcp_timeout_time_wait, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - }, - { - .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_CLOSE, - .procname = "ip_conntrack_tcp_timeout_close", - .data = &ip_ct_tcp_timeout_close, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - }, - { - .ctl_name = NET_IPV4_NF_CONNTRACK_UDP_TIMEOUT, - .procname = "ip_conntrack_udp_timeout", - .data = &ip_ct_udp_timeout, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - }, - { - .ctl_name = NET_IPV4_NF_CONNTRACK_UDP_TIMEOUT_STREAM, - .procname = "ip_conntrack_udp_timeout_stream", - .data = &ip_ct_udp_timeout_stream, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - }, - { - .ctl_name = NET_IPV4_NF_CONNTRACK_ICMP_TIMEOUT, - .procname = "ip_conntrack_icmp_timeout", - .data = &ip_ct_icmp_timeout, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - }, - { - .ctl_name = NET_IPV4_NF_CONNTRACK_GENERIC_TIMEOUT, - .procname = "ip_conntrack_generic_timeout", - .data = &ip_ct_generic_timeout, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - }, - { - .ctl_name = NET_IPV4_NF_CONNTRACK_LOG_INVALID, - .procname = "ip_conntrack_log_invalid", - .data = &ip_ct_log_invalid, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = &proc_dointvec_minmax, - .strategy = &sysctl_intvec, - .extra1 = &log_invalid_proto_min, - .extra2 = &log_invalid_proto_max, - }, - { - .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_TIMEOUT_MAX_RETRANS, - .procname = "ip_conntrack_tcp_timeout_max_retrans", - .data = &ip_ct_tcp_timeout_max_retrans, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, - }, - { - .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_LOOSE, - .procname = "ip_conntrack_tcp_loose", - .data = &ip_ct_tcp_loose, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_BE_LIBERAL, - .procname = "ip_conntrack_tcp_be_liberal", - .data = &ip_ct_tcp_be_liberal, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { - .ctl_name = NET_IPV4_NF_CONNTRACK_TCP_MAX_RETRANS, - .procname = "ip_conntrack_tcp_max_retrans", - .data = &ip_ct_tcp_max_retrans, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = &proc_dointvec, - }, - { .ctl_name = 0 } -}; - -#define NET_IP_CONNTRACK_MAX 2089 - -static ctl_table ip_ct_netfilter_table[] = { - { - .ctl_name = NET_IPV4_NETFILTER, - .procname = "netfilter", - .mode = 0555, - .child = ip_ct_sysctl_table, - }, - { - .ctl_name = NET_IP_CONNTRACK_MAX, - .procname = "ip_conntrack_max", - .data = &ip_conntrack_max, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec - }, - { .ctl_name = 0 } -}; - -static ctl_table ip_ct_ipv4_table[] = { - { - .ctl_name = NET_IPV4, - .procname = "ipv4", - .mode = 0555, - .child = ip_ct_netfilter_table, - }, - { .ctl_name = 0 } -}; - -static ctl_table ip_ct_net_table[] = { - { - .ctl_name = CTL_NET, - .procname = "net", - .mode = 0555, - .child = ip_ct_ipv4_table, - }, - { .ctl_name = 0 } -}; - -EXPORT_SYMBOL(ip_ct_log_invalid); -#endif /* CONFIG_SYSCTL */ - -static int init_or_cleanup(int init) -{ -#ifdef CONFIG_PROC_FS - struct proc_dir_entry *proc, *proc_exp, *proc_stat; -#endif - int ret = 0; - - if (!init) goto cleanup; - - ret = ip_conntrack_init(); - if (ret < 0) - goto cleanup_nothing; - -#ifdef CONFIG_PROC_FS - ret = -ENOMEM; - proc = proc_net_fops_create("ip_conntrack", 0440, &ct_file_ops); - if (!proc) goto cleanup_init; - - proc_exp = proc_net_fops_create("ip_conntrack_expect", 0440, - &exp_file_ops); - if (!proc_exp) goto cleanup_proc; - - proc_stat = create_proc_entry("ip_conntrack", S_IRUGO, proc_net_stat); - if (!proc_stat) - goto cleanup_proc_exp; - - proc_stat->proc_fops = &ct_cpu_seq_fops; - proc_stat->owner = THIS_MODULE; -#endif - - ret = nf_register_hook(&ip_conntrack_defrag_ops); - if (ret < 0) { - printk("ip_conntrack: can't register pre-routing defrag hook.\n"); - goto cleanup_proc_stat; - } - ret = nf_register_hook(&ip_conntrack_defrag_local_out_ops); - if (ret < 0) { - printk("ip_conntrack: can't register local_out defrag hook.\n"); - goto cleanup_defragops; - } - ret = nf_register_hook(&ip_conntrack_in_ops); - if (ret < 0) { - printk("ip_conntrack: can't register pre-routing hook.\n"); - goto cleanup_defraglocalops; - } - ret = nf_register_hook(&ip_conntrack_local_out_ops); - if (ret < 0) { - printk("ip_conntrack: can't register local out hook.\n"); - goto cleanup_inops; - } - ret = nf_register_hook(&ip_conntrack_helper_in_ops); - if (ret < 0) { - printk("ip_conntrack: can't register local in helper hook.\n"); - goto cleanup_inandlocalops; - } - ret = nf_register_hook(&ip_conntrack_helper_out_ops); - if (ret < 0) { - printk("ip_conntrack: can't register postrouting helper hook.\n"); - goto cleanup_helperinops; - } - ret = nf_register_hook(&ip_conntrack_out_ops); - if (ret < 0) { - printk("ip_conntrack: can't register post-routing hook.\n"); - goto cleanup_helperoutops; - } - ret = nf_register_hook(&ip_conntrack_local_in_ops); - if (ret < 0) { - printk("ip_conntrack: can't register local in hook.\n"); - goto cleanup_inoutandlocalops; - } -#ifdef CONFIG_SYSCTL - ip_ct_sysctl_header = register_sysctl_table(ip_ct_net_table, 0); - if (ip_ct_sysctl_header == NULL) { - printk("ip_conntrack: can't register to sysctl.\n"); - ret = -ENOMEM; - goto cleanup_localinops; - } -#endif - - return ret; - - cleanup: - synchronize_net(); -#ifdef CONFIG_SYSCTL - unregister_sysctl_table(ip_ct_sysctl_header); - cleanup_localinops: -#endif - nf_unregister_hook(&ip_conntrack_local_in_ops); - cleanup_inoutandlocalops: - nf_unregister_hook(&ip_conntrack_out_ops); - cleanup_helperoutops: - nf_unregister_hook(&ip_conntrack_helper_out_ops); - cleanup_helperinops: - nf_unregister_hook(&ip_conntrack_helper_in_ops); - cleanup_inandlocalops: - nf_unregister_hook(&ip_conntrack_local_out_ops); - cleanup_inops: - nf_unregister_hook(&ip_conntrack_in_ops); - cleanup_defraglocalops: - nf_unregister_hook(&ip_conntrack_defrag_local_out_ops); - cleanup_defragops: - nf_unregister_hook(&ip_conntrack_defrag_ops); - cleanup_proc_stat: -#ifdef CONFIG_PROC_FS - remove_proc_entry("ip_conntrack", proc_net_stat); - cleanup_proc_exp: - proc_net_remove("ip_conntrack_expect"); - cleanup_proc: - proc_net_remove("ip_conntrack"); - cleanup_init: -#endif /* CONFIG_PROC_FS */ - ip_conntrack_cleanup(); - cleanup_nothing: - return ret; -} - -/* FIXME: Allow NULL functions and sub in pointers to generic for - them. --RR */ -int ip_conntrack_protocol_register(struct ip_conntrack_protocol *proto) -{ - int ret = 0; - - write_lock_bh(&ip_conntrack_lock); - if (ip_ct_protos[proto->proto] != &ip_conntrack_generic_protocol) { - ret = -EBUSY; - goto out; - } - ip_ct_protos[proto->proto] = proto; - out: - write_unlock_bh(&ip_conntrack_lock); - return ret; -} - -void ip_conntrack_protocol_unregister(struct ip_conntrack_protocol *proto) -{ - write_lock_bh(&ip_conntrack_lock); - ip_ct_protos[proto->proto] = &ip_conntrack_generic_protocol; - write_unlock_bh(&ip_conntrack_lock); - - /* Somebody could be still looking at the proto in bh. */ - synchronize_net(); - - /* Remove all contrack entries for this protocol */ - ip_ct_iterate_cleanup(kill_proto, &proto->proto); -} - -static int __init init(void) -{ - return init_or_cleanup(1); -} - -static void __exit fini(void) -{ - init_or_cleanup(0); -} - -module_init(init); -module_exit(fini); - -/* Some modules need us, but don't depend directly on any symbol. - They should call this. */ -void need_conntrack(void) -{ -} - -#ifdef CONFIG_IP_NF_CONNTRACK_EVENTS -EXPORT_SYMBOL_GPL(ip_conntrack_chain); -EXPORT_SYMBOL_GPL(ip_conntrack_expect_chain); -EXPORT_SYMBOL_GPL(ip_conntrack_register_notifier); -EXPORT_SYMBOL_GPL(ip_conntrack_unregister_notifier); -EXPORT_SYMBOL_GPL(__ip_ct_event_cache_init); -EXPORT_PER_CPU_SYMBOL_GPL(ip_conntrack_ecache); -#endif -EXPORT_SYMBOL(ip_conntrack_protocol_register); -EXPORT_SYMBOL(ip_conntrack_protocol_unregister); -EXPORT_SYMBOL(ip_ct_get_tuple); -EXPORT_SYMBOL(invert_tuplepr); -EXPORT_SYMBOL(ip_conntrack_alter_reply); -EXPORT_SYMBOL(ip_conntrack_destroyed); -EXPORT_SYMBOL(need_conntrack); -EXPORT_SYMBOL(ip_conntrack_helper_register); -EXPORT_SYMBOL(ip_conntrack_helper_unregister); -EXPORT_SYMBOL(ip_ct_iterate_cleanup); -EXPORT_SYMBOL(__ip_ct_refresh_acct); - -EXPORT_SYMBOL(ip_conntrack_expect_alloc); -EXPORT_SYMBOL(ip_conntrack_expect_put); -EXPORT_SYMBOL_GPL(__ip_conntrack_expect_find); -EXPORT_SYMBOL_GPL(ip_conntrack_expect_find); -EXPORT_SYMBOL(ip_conntrack_expect_related); -EXPORT_SYMBOL(ip_conntrack_unexpect_related); -EXPORT_SYMBOL_GPL(ip_conntrack_expect_list); -EXPORT_SYMBOL_GPL(ip_ct_unlink_expect); - -EXPORT_SYMBOL(ip_conntrack_tuple_taken); -EXPORT_SYMBOL(ip_ct_gather_frags); -EXPORT_SYMBOL(ip_conntrack_htable_size); -EXPORT_SYMBOL(ip_conntrack_lock); -EXPORT_SYMBOL(ip_conntrack_hash); -EXPORT_SYMBOL(ip_conntrack_untracked); -EXPORT_SYMBOL_GPL(ip_conntrack_find_get); -#ifdef CONFIG_IP_NF_NAT_NEEDED -EXPORT_SYMBOL(ip_conntrack_tcp_update); -#endif - -EXPORT_SYMBOL_GPL(ip_conntrack_flush); -EXPORT_SYMBOL_GPL(__ip_conntrack_find); - -EXPORT_SYMBOL_GPL(ip_conntrack_alloc); -EXPORT_SYMBOL_GPL(ip_conntrack_free); -EXPORT_SYMBOL_GPL(ip_conntrack_hash_insert); - -EXPORT_SYMBOL_GPL(ip_ct_remove_expectations); - -EXPORT_SYMBOL_GPL(ip_conntrack_helper_find_get); -EXPORT_SYMBOL_GPL(ip_conntrack_helper_put); -EXPORT_SYMBOL_GPL(__ip_conntrack_helper_find_byname); - -EXPORT_SYMBOL_GPL(ip_conntrack_proto_find_get); -EXPORT_SYMBOL_GPL(ip_conntrack_proto_put); -EXPORT_SYMBOL_GPL(__ip_conntrack_proto_find); -#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ - defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) -EXPORT_SYMBOL_GPL(ip_ct_port_tuple_to_nfattr); -EXPORT_SYMBOL_GPL(ip_ct_port_nfattr_to_tuple); -#endif diff --git a/net/ipv4/netfilter/ip_conntrack_tftp.c b/net/ipv4/netfilter/ip_conntrack_tftp.c deleted file mode 100644 index d3c5a371f99..00000000000 --- a/net/ipv4/netfilter/ip_conntrack_tftp.c +++ /dev/null @@ -1,158 +0,0 @@ -/* (C) 2001-2002 Magnus Boden <mb@ozaba.mine.nu> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Version: 0.0.7 - * - * Thu 21 Mar 2002 Harald Welte <laforge@gnumonks.org> - * - port to newnat API - * - */ - -#include <linux/module.h> -#include <linux/ip.h> -#include <linux/udp.h> - -#include <linux/netfilter.h> -#include <linux/netfilter_ipv4/ip_tables.h> -#include <linux/netfilter_ipv4/ip_conntrack_helper.h> -#include <linux/netfilter_ipv4/ip_conntrack_tftp.h> -#include <linux/moduleparam.h> - -MODULE_AUTHOR("Magnus Boden <mb@ozaba.mine.nu>"); -MODULE_DESCRIPTION("tftp connection tracking helper"); -MODULE_LICENSE("GPL"); - -#define MAX_PORTS 8 -static unsigned short ports[MAX_PORTS]; -static int ports_c; -module_param_array(ports, ushort, &ports_c, 0400); -MODULE_PARM_DESC(ports, "port numbers of tftp servers"); - -#if 0 -#define DEBUGP(format, args...) printk("%s:%s:" format, \ - __FILE__, __FUNCTION__ , ## args) -#else -#define DEBUGP(format, args...) -#endif - -unsigned int (*ip_nat_tftp_hook)(struct sk_buff **pskb, - enum ip_conntrack_info ctinfo, - struct ip_conntrack_expect *exp); -EXPORT_SYMBOL_GPL(ip_nat_tftp_hook); - -static int tftp_help(struct sk_buff **pskb, - struct ip_conntrack *ct, - enum ip_conntrack_info ctinfo) -{ - struct tftphdr _tftph, *tfh; - struct ip_conntrack_expect *exp; - unsigned int ret = NF_ACCEPT; - - tfh = skb_header_pointer(*pskb, - (*pskb)->nh.iph->ihl*4+sizeof(struct udphdr), - sizeof(_tftph), &_tftph); - if (tfh == NULL) - return NF_ACCEPT; - - switch (ntohs(tfh->opcode)) { - /* RRQ and WRQ works the same way */ - case TFTP_OPCODE_READ: - case TFTP_OPCODE_WRITE: - DEBUGP(""); - DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); - DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); - - exp = ip_conntrack_expect_alloc(ct); - if (exp == NULL) - return NF_DROP; - - exp->tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple; - exp->mask.src.ip = 0xffffffff; - exp->mask.dst.ip = 0xffffffff; - exp->mask.dst.u.udp.port = 0xffff; - exp->mask.dst.protonum = 0xff; - exp->expectfn = NULL; - exp->flags = 0; - - DEBUGP("expect: "); - DUMP_TUPLE(&exp->tuple); - DUMP_TUPLE(&exp->mask); - if (ip_nat_tftp_hook) - ret = ip_nat_tftp_hook(pskb, ctinfo, exp); - else if (ip_conntrack_expect_related(exp) != 0) - ret = NF_DROP; - ip_conntrack_expect_put(exp); - break; - case TFTP_OPCODE_DATA: - case TFTP_OPCODE_ACK: - DEBUGP("Data/ACK opcode\n"); - break; - case TFTP_OPCODE_ERROR: - DEBUGP("Error opcode\n"); - break; - default: - DEBUGP("Unknown opcode\n"); - } - return NF_ACCEPT; -} - -static struct ip_conntrack_helper tftp[MAX_PORTS]; -static char tftp_names[MAX_PORTS][sizeof("tftp-65535")]; - -static void fini(void) -{ - int i; - - for (i = 0 ; i < ports_c; i++) { - DEBUGP("unregistering helper for port %d\n", - ports[i]); - ip_conntrack_helper_unregister(&tftp[i]); - } -} - -static int __init init(void) -{ - int i, ret; - char *tmpname; - - if (ports_c == 0) - ports[ports_c++] = TFTP_PORT; - - for (i = 0; i < ports_c; i++) { - /* Create helper structure */ - memset(&tftp[i], 0, sizeof(struct ip_conntrack_helper)); - - tftp[i].tuple.dst.protonum = IPPROTO_UDP; - tftp[i].tuple.src.u.udp.port = htons(ports[i]); - tftp[i].mask.dst.protonum = 0xFF; - tftp[i].mask.src.u.udp.port = 0xFFFF; - tftp[i].max_expected = 1; - tftp[i].timeout = 5 * 60; /* 5 minutes */ - tftp[i].me = THIS_MODULE; - tftp[i].help = tftp_help; - - tmpname = &tftp_names[i][0]; - if (ports[i] == TFTP_PORT) - sprintf(tmpname, "tftp"); - else - sprintf(tmpname, "tftp-%d", i); - tftp[i].name = tmpname; - - DEBUGP("port #%d: %d\n", i, ports[i]); - - ret=ip_conntrack_helper_register(&tftp[i]); - if (ret) { - printk("ERROR registering helper for port %d\n", - ports[i]); - fini(); - return(ret); - } - } - return(0); -} - -module_init(init); -module_exit(fini); diff --git a/net/ipv4/netfilter/ip_nat_amanda.c b/net/ipv4/netfilter/ip_nat_amanda.c deleted file mode 100644 index 706c8074f42..00000000000 --- a/net/ipv4/netfilter/ip_nat_amanda.c +++ /dev/null @@ -1,86 +0,0 @@ -/* Amanda extension for TCP NAT alteration. - * (C) 2002 by Brian J. Murrell <netfilter@interlinx.bc.ca> - * based on a copy of HW's ip_nat_irc.c as well as other modules - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * Module load syntax: - * insmod ip_nat_amanda.o - */ - -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/netfilter.h> -#include <linux/skbuff.h> -#include <linux/ip.h> -#include <linux/udp.h> -#include <net/tcp.h> -#include <net/udp.h> - -#include <linux/netfilter_ipv4.h> -#include <linux/netfilter_ipv4/ip_nat.h> -#include <linux/netfilter_ipv4/ip_nat_helper.h> -#include <linux/netfilter_ipv4/ip_conntrack_helper.h> -#include <linux/netfilter_ipv4/ip_conntrack_amanda.h> - - -MODULE_AUTHOR("Brian J. Murrell <netfilter@interlinx.bc.ca>"); -MODULE_DESCRIPTION("Amanda NAT helper"); -MODULE_LICENSE("GPL"); - -static unsigned int help(struct sk_buff **pskb, - enum ip_conntrack_info ctinfo, - unsigned int matchoff, - unsigned int matchlen, - struct ip_conntrack_expect *exp) -{ - char buffer[sizeof("65535")]; - u_int16_t port; - unsigned int ret; - - /* Connection comes from client. */ - exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port; - exp->dir = IP_CT_DIR_ORIGINAL; - - /* When you see the packet, we need to NAT it the same as the - * this one (ie. same IP: it will be TCP and master is UDP). */ - exp->expectfn = ip_nat_follow_master; - - /* Try to get same port: if not, try to change it. */ - for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) { - exp->tuple.dst.u.tcp.port = htons(port); - if (ip_conntrack_expect_related(exp) == 0) - break; - } - - if (port == 0) - return NF_DROP; - - sprintf(buffer, "%u", port); - ret = ip_nat_mangle_udp_packet(pskb, exp->master, ctinfo, - matchoff, matchlen, - buffer, strlen(buffer)); - if (ret != NF_ACCEPT) - ip_conntrack_unexpect_related(exp); - return ret; -} - -static void __exit fini(void) -{ - ip_nat_amanda_hook = NULL; - /* Make sure noone calls it, meanwhile. */ - synchronize_net(); -} - -static int __init init(void) -{ - BUG_ON(ip_nat_amanda_hook); - ip_nat_amanda_hook = help; - return 0; -} - -module_init(init); -module_exit(fini); diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c deleted file mode 100644 index c1a61462507..00000000000 --- a/net/ipv4/netfilter/ip_nat_core.c +++ /dev/null @@ -1,644 +0,0 @@ -/* NAT for netfilter; shared with compatibility layer. */ - -/* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/module.h> -#include <linux/types.h> -#include <linux/timer.h> -#include <linux/skbuff.h> -#include <linux/netfilter_ipv4.h> -#include <linux/vmalloc.h> -#include <net/checksum.h> -#include <net/icmp.h> -#include <net/ip.h> -#include <net/tcp.h> /* For tcp_prot in getorigdst */ -#include <linux/icmp.h> -#include <linux/udp.h> -#include <linux/jhash.h> - -#define ASSERT_READ_LOCK(x) -#define ASSERT_WRITE_LOCK(x) - -#include <linux/netfilter_ipv4/ip_conntrack.h> -#include <linux/netfilter_ipv4/ip_conntrack_core.h> -#include <linux/netfilter_ipv4/ip_conntrack_protocol.h> -#include <linux/netfilter_ipv4/ip_nat.h> -#include <linux/netfilter_ipv4/ip_nat_protocol.h> -#include <linux/netfilter_ipv4/ip_nat_core.h> -#include <linux/netfilter_ipv4/ip_nat_helper.h> -#include <linux/netfilter_ipv4/ip_conntrack_helper.h> -#include <linux/netfilter_ipv4/listhelp.h> - -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - -DEFINE_RWLOCK(ip_nat_lock); - -/* Calculated at init based on memory size */ -static unsigned int ip_nat_htable_size; - -static struct list_head *bysource; - -#define MAX_IP_NAT_PROTO 256 -static struct ip_nat_protocol *ip_nat_protos[MAX_IP_NAT_PROTO]; - -static inline struct ip_nat_protocol * -__ip_nat_proto_find(u_int8_t protonum) -{ - return ip_nat_protos[protonum]; -} - -struct ip_nat_protocol * -ip_nat_proto_find_get(u_int8_t protonum) -{ - struct ip_nat_protocol *p; - - /* we need to disable preemption to make sure 'p' doesn't get - * removed until we've grabbed the reference */ - preempt_disable(); - p = __ip_nat_proto_find(protonum); - if (!try_module_get(p->me)) - p = &ip_nat_unknown_protocol; - preempt_enable(); - - return p; -} -EXPORT_SYMBOL_GPL(ip_nat_proto_find_get); - -void -ip_nat_proto_put(struct ip_nat_protocol *p) -{ - module_put(p->me); -} -EXPORT_SYMBOL_GPL(ip_nat_proto_put); - -/* We keep an extra hash for each conntrack, for fast searching. */ -static inline unsigned int -hash_by_src(const struct ip_conntrack_tuple *tuple) -{ - /* Original src, to ensure we map it consistently if poss. */ - return jhash_3words(tuple->src.ip, tuple->src.u.all, - tuple->dst.protonum, 0) % ip_nat_htable_size; -} - -/* Noone using conntrack by the time this called. */ -static void ip_nat_cleanup_conntrack(struct ip_conntrack *conn) -{ - if (!(conn->status & IPS_NAT_DONE_MASK)) - return; - - write_lock_bh(&ip_nat_lock); - list_del(&conn->nat.info.bysource); - write_unlock_bh(&ip_nat_lock); -} - -/* We do checksum mangling, so if they were wrong before they're still - * wrong. Also works for incomplete packets (eg. ICMP dest - * unreachables.) */ -u_int16_t -ip_nat_cheat_check(u_int32_t oldvalinv, u_int32_t newval, u_int16_t oldcheck) -{ - u_int32_t diffs[] = { oldvalinv, newval }; - return csum_fold(csum_partial((char *)diffs, sizeof(diffs), - oldcheck^0xFFFF)); -} -EXPORT_SYMBOL(ip_nat_cheat_check); - -/* Is this tuple already taken? (not by us) */ -int -ip_nat_used_tuple(const struct ip_conntrack_tuple *tuple, - const struct ip_conntrack *ignored_conntrack) -{ - /* Conntrack tracking doesn't keep track of outgoing tuples; only - incoming ones. NAT means they don't have a fixed mapping, - so we invert the tuple and look for the incoming reply. - - We could keep a separate hash if this proves too slow. */ - struct ip_conntrack_tuple reply; - - invert_tuplepr(&reply, tuple); - return ip_conntrack_tuple_taken(&reply, ignored_conntrack); -} -EXPORT_SYMBOL(ip_nat_used_tuple); - -/* If we source map this tuple so reply looks like reply_tuple, will - * that meet the constraints of range. */ -static int -in_range(const struct ip_conntrack_tuple *tuple, - const struct ip_nat_range *range) -{ - struct ip_nat_protocol *proto = - __ip_nat_proto_find(tuple->dst.protonum); - - /* If we are supposed to map IPs, then we must be in the - range specified, otherwise let this drag us onto a new src IP. */ - if (range->flags & IP_NAT_RANGE_MAP_IPS) { - if (ntohl(tuple->src.ip) < ntohl(range->min_ip) - || ntohl(tuple->src.ip) > ntohl(range->max_ip)) - return 0; - } - - if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) - || proto->in_range(tuple, IP_NAT_MANIP_SRC, - &range->min, &range->max)) - return 1; - - return 0; -} - -static inline int -same_src(const struct ip_conntrack *ct, - const struct ip_conntrack_tuple *tuple) -{ - return (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.protonum - == tuple->dst.protonum - && ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip - == tuple->src.ip - && ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.all - == tuple->src.u.all); -} - -/* Only called for SRC manip */ -static int -find_appropriate_src(const struct ip_conntrack_tuple *tuple, - struct ip_conntrack_tuple *result, - const struct ip_nat_range *range) -{ - unsigned int h = hash_by_src(tuple); - struct ip_conntrack *ct; - - read_lock_bh(&ip_nat_lock); - list_for_each_entry(ct, &bysource[h], nat.info.bysource) { - if (same_src(ct, tuple)) { - /* Copy source part from reply tuple. */ - invert_tuplepr(result, - &ct->tuplehash[IP_CT_DIR_REPLY].tuple); - result->dst = tuple->dst; - - if (in_range(result, range)) { - read_unlock_bh(&ip_nat_lock); - return 1; - } - } - } - read_unlock_bh(&ip_nat_lock); - return 0; -} - -/* For [FUTURE] fragmentation handling, we want the least-used - src-ip/dst-ip/proto triple. Fairness doesn't come into it. Thus - if the range specifies 1.2.3.4 ports 10000-10005 and 1.2.3.5 ports - 1-65535, we don't do pro-rata allocation based on ports; we choose - the ip with the lowest src-ip/dst-ip/proto usage. -*/ -static void -find_best_ips_proto(struct ip_conntrack_tuple *tuple, - const struct ip_nat_range *range, - const struct ip_conntrack *conntrack, - enum ip_nat_manip_type maniptype) -{ - u_int32_t *var_ipp; - /* Host order */ - u_int32_t minip, maxip, j; - - /* No IP mapping? Do nothing. */ - if (!(range->flags & IP_NAT_RANGE_MAP_IPS)) - return; - - if (maniptype == IP_NAT_MANIP_SRC) - var_ipp = &tuple->src.ip; - else - var_ipp = &tuple->dst.ip; - - /* Fast path: only one choice. */ - if (range->min_ip == range->max_ip) { - *var_ipp = range->min_ip; - return; - } - - /* Hashing source and destination IPs gives a fairly even - * spread in practice (if there are a small number of IPs - * involved, there usually aren't that many connections - * anyway). The consistency means that servers see the same - * client coming from the same IP (some Internet Banking sites - * like this), even across reboots. */ - minip = ntohl(range->min_ip); - maxip = ntohl(range->max_ip); - j = jhash_2words(tuple->src.ip, tuple->dst.ip, 0); - *var_ipp = htonl(minip + j % (maxip - minip + 1)); -} - -/* Manipulate the tuple into the range given. For NF_IP_POST_ROUTING, - * we change the source to map into the range. For NF_IP_PRE_ROUTING - * and NF_IP_LOCAL_OUT, we change the destination to map into the - * range. It might not be possible to get a unique tuple, but we try. - * At worst (or if we race), we will end up with a final duplicate in - * __ip_conntrack_confirm and drop the packet. */ -static void -get_unique_tuple(struct ip_conntrack_tuple *tuple, - const struct ip_conntrack_tuple *orig_tuple, - const struct ip_nat_range *range, - struct ip_conntrack *conntrack, - enum ip_nat_manip_type maniptype) -{ - struct ip_nat_protocol *proto; - - /* 1) If this srcip/proto/src-proto-part is currently mapped, - and that same mapping gives a unique tuple within the given - range, use that. - - This is only required for source (ie. NAT/masq) mappings. - So far, we don't do local source mappings, so multiple - manips not an issue. */ - if (maniptype == IP_NAT_MANIP_SRC) { - if (find_appropriate_src(orig_tuple, tuple, range)) { - DEBUGP("get_unique_tuple: Found current src map\n"); - if (!ip_nat_used_tuple(tuple, conntrack)) - return; - } - } - - /* 2) Select the least-used IP/proto combination in the given - range. */ - *tuple = *orig_tuple; - find_best_ips_proto(tuple, range, conntrack, maniptype); - - /* 3) The per-protocol part of the manip is made to map into - the range to make a unique tuple. */ - - proto = ip_nat_proto_find_get(orig_tuple->dst.protonum); - - /* Only bother mapping if it's not already in range and unique */ - if ((!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) - || proto->in_range(tuple, maniptype, &range->min, &range->max)) - && !ip_nat_used_tuple(tuple, conntrack)) { - ip_nat_proto_put(proto); - return; - } - - /* Last change: get protocol to try to obtain unique tuple. */ - proto->unique_tuple(tuple, range, maniptype, conntrack); - - ip_nat_proto_put(proto); -} - -unsigned int -ip_nat_setup_info(struct ip_conntrack *conntrack, - const struct ip_nat_range *range, - unsigned int hooknum) -{ - struct ip_conntrack_tuple curr_tuple, new_tuple; - struct ip_nat_info *info = &conntrack->nat.info; - int have_to_hash = !(conntrack->status & IPS_NAT_DONE_MASK); - enum ip_nat_manip_type maniptype = HOOK2MANIP(hooknum); - - IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING - || hooknum == NF_IP_POST_ROUTING - || hooknum == NF_IP_LOCAL_IN - || hooknum == NF_IP_LOCAL_OUT); - BUG_ON(ip_nat_initialized(conntrack, maniptype)); - - /* What we've got will look like inverse of reply. Normally - this is what is in the conntrack, except for prior - manipulations (future optimization: if num_manips == 0, - orig_tp = - conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple) */ - invert_tuplepr(&curr_tuple, - &conntrack->tuplehash[IP_CT_DIR_REPLY].tuple); - - get_unique_tuple(&new_tuple, &curr_tuple, range, conntrack, maniptype); - - if (!ip_ct_tuple_equal(&new_tuple, &curr_tuple)) { - struct ip_conntrack_tuple reply; - - /* Alter conntrack table so will recognize replies. */ - invert_tuplepr(&reply, &new_tuple); - ip_conntrack_alter_reply(conntrack, &reply); - - /* Non-atomic: we own this at the moment. */ - if (maniptype == IP_NAT_MANIP_SRC) - conntrack->status |= IPS_SRC_NAT; - else - conntrack->status |= IPS_DST_NAT; - } - - /* Place in source hash if this is the first time. */ - if (have_to_hash) { - unsigned int srchash - = hash_by_src(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL] - .tuple); - write_lock_bh(&ip_nat_lock); - list_add(&info->bysource, &bysource[srchash]); - write_unlock_bh(&ip_nat_lock); - } - - /* It's done. */ - if (maniptype == IP_NAT_MANIP_DST) - set_bit(IPS_DST_NAT_DONE_BIT, &conntrack->status); - else - set_bit(IPS_SRC_NAT_DONE_BIT, &conntrack->status); - - return NF_ACCEPT; -} -EXPORT_SYMBOL(ip_nat_setup_info); - -/* Returns true if succeeded. */ -static int -manip_pkt(u_int16_t proto, - struct sk_buff **pskb, - unsigned int iphdroff, - const struct ip_conntrack_tuple *target, - enum ip_nat_manip_type maniptype) -{ - struct iphdr *iph; - struct ip_nat_protocol *p; - - if (!skb_make_writable(pskb, iphdroff + sizeof(*iph))) - return 0; - - iph = (void *)(*pskb)->data + iphdroff; - - /* Manipulate protcol part. */ - p = ip_nat_proto_find_get(proto); - if (!p->manip_pkt(pskb, iphdroff, target, maniptype)) { - ip_nat_proto_put(p); - return 0; - } - ip_nat_proto_put(p); - - iph = (void *)(*pskb)->data + iphdroff; - - if (maniptype == IP_NAT_MANIP_SRC) { - iph->check = ip_nat_cheat_check(~iph->saddr, target->src.ip, - iph->check); - iph->saddr = target->src.ip; - } else { - iph->check = ip_nat_cheat_check(~iph->daddr, target->dst.ip, - iph->check); - iph->daddr = target->dst.ip; - } - return 1; -} - -/* Do packet manipulations according to ip_nat_setup_info. */ -unsigned int ip_nat_packet(struct ip_conntrack *ct, - enum ip_conntrack_info ctinfo, - unsigned int hooknum, - struct sk_buff **pskb) -{ - enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); - unsigned long statusbit; - enum ip_nat_manip_type mtype = HOOK2MANIP(hooknum); - - if (mtype == IP_NAT_MANIP_SRC) - statusbit = IPS_SRC_NAT; - else - statusbit = IPS_DST_NAT; - - /* Invert if this is reply dir. */ - if (dir == IP_CT_DIR_REPLY) - statusbit ^= IPS_NAT_MASK; - - /* Non-atomic: these bits don't change. */ - if (ct->status & statusbit) { - struct ip_conntrack_tuple target; - - /* We are aiming to look like inverse of other direction. */ - invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); - - if (!manip_pkt(target.dst.protonum, pskb, 0, &target, mtype)) - return NF_DROP; - } - return NF_ACCEPT; -} -EXPORT_SYMBOL_GPL(ip_nat_packet); - -/* Dir is direction ICMP is coming from (opposite to packet it contains) */ -int ip_nat_icmp_reply_translation(struct sk_buff **pskb, - struct ip_conntrack *ct, - enum ip_nat_manip_type manip, - enum ip_conntrack_dir dir) -{ - struct { - struct icmphdr icmp; - struct iphdr ip; - } *inside; - struct ip_conntrack_tuple inner, target; - int hdrlen = (*pskb)->nh.iph->ihl * 4; - - if (!skb_make_writable(pskb, hdrlen + sizeof(*inside))) - return 0; - - inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4; - - /* We're actually going to mangle it beyond trivial checksum - adjustment, so make sure the current checksum is correct. */ - if ((*pskb)->ip_summed != CHECKSUM_UNNECESSARY) { - hdrlen = (*pskb)->nh.iph->ihl * 4; - if ((u16)csum_fold(skb_checksum(*pskb, hdrlen, - (*pskb)->len - hdrlen, 0))) - return 0; - } - - /* Must be RELATED */ - IP_NF_ASSERT((*pskb)->nfctinfo == IP_CT_RELATED || - (*pskb)->nfctinfo == IP_CT_RELATED+IP_CT_IS_REPLY); - - /* Redirects on non-null nats must be dropped, else they'll - start talking to each other without our translation, and be - confused... --RR */ - if (inside->icmp.type == ICMP_REDIRECT) { - /* If NAT isn't finished, assume it and drop. */ - if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK) - return 0; - - if (ct->status & IPS_NAT_MASK) - return 0; - } - - DEBUGP("icmp_reply_translation: translating error %p manp %u dir %s\n", - *pskb, manip, dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY"); - - if (!ip_ct_get_tuple(&inside->ip, *pskb, (*pskb)->nh.iph->ihl*4 + - sizeof(struct icmphdr) + inside->ip.ihl*4, - &inner, - __ip_conntrack_proto_find(inside->ip.protocol))) - return 0; - - /* Change inner back to look like incoming packet. We do the - opposite manip on this hook to normal, because it might not - pass all hooks (locally-generated ICMP). Consider incoming - packet: PREROUTING (DST manip), routing produces ICMP, goes - through POSTROUTING (which must correct the DST manip). */ - if (!manip_pkt(inside->ip.protocol, pskb, - (*pskb)->nh.iph->ihl*4 - + sizeof(inside->icmp), - &ct->tuplehash[!dir].tuple, - !manip)) - return 0; - - /* Reloading "inside" here since manip_pkt inner. */ - inside = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4; - inside->icmp.checksum = 0; - inside->icmp.checksum = csum_fold(skb_checksum(*pskb, hdrlen, - (*pskb)->len - hdrlen, - 0)); - - /* Change outer to look the reply to an incoming packet - * (proto 0 means don't invert per-proto part). */ - - /* Obviously, we need to NAT destination IP, but source IP - should be NAT'ed only if it is from a NAT'd host. - - Explanation: some people use NAT for anonymizing. Also, - CERT recommends dropping all packets from private IP - addresses (although ICMP errors from internal links with - such addresses are not too uncommon, as Alan Cox points - out) */ - if (manip != IP_NAT_MANIP_SRC - || ((*pskb)->nh.iph->saddr == ct->tuplehash[dir].tuple.src.ip)) { - invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); - if (!manip_pkt(0, pskb, 0, &target, manip)) - return 0; - } - - return 1; -} -EXPORT_SYMBOL_GPL(ip_nat_icmp_reply_translation); - -/* Protocol registration. */ -int ip_nat_protocol_register(struct ip_nat_protocol *proto) -{ - int ret = 0; - - write_lock_bh(&ip_nat_lock); - if (ip_nat_protos[proto->protonum] != &ip_nat_unknown_protocol) { - ret = -EBUSY; - goto out; - } - ip_nat_protos[proto->protonum] = proto; - out: - write_unlock_bh(&ip_nat_lock); - return ret; -} -EXPORT_SYMBOL(ip_nat_protocol_register); - -/* Noone stores the protocol anywhere; simply delete it. */ -void ip_nat_protocol_unregister(struct ip_nat_protocol *proto) -{ - write_lock_bh(&ip_nat_lock); - ip_nat_protos[proto->protonum] = &ip_nat_unknown_protocol; - write_unlock_bh(&ip_nat_lock); - - /* Someone could be still looking at the proto in a bh. */ - synchronize_net(); -} -EXPORT_SYMBOL(ip_nat_protocol_unregister); - -#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ - defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) -int -ip_nat_port_range_to_nfattr(struct sk_buff *skb, - const struct ip_nat_range *range) -{ - NFA_PUT(skb, CTA_PROTONAT_PORT_MIN, sizeof(u_int16_t), - &range->min.tcp.port); - NFA_PUT(skb, CTA_PROTONAT_PORT_MAX, sizeof(u_int16_t), - &range->max.tcp.port); - - return 0; - -nfattr_failure: - return -1; -} - -int -ip_nat_port_nfattr_to_range(struct nfattr *tb[], struct ip_nat_range *range) -{ - int ret = 0; - - /* we have to return whether we actually parsed something or not */ - - if (tb[CTA_PROTONAT_PORT_MIN-1]) { - ret = 1; - range->min.tcp.port = - *(u_int16_t *)NFA_DATA(tb[CTA_PROTONAT_PORT_MIN-1]); - } - - if (!tb[CTA_PROTONAT_PORT_MAX-1]) { - if (ret) - range->max.tcp.port = range->min.tcp.port; - } else { - ret = 1; - range->max.tcp.port = - *(u_int16_t *)NFA_DATA(tb[CTA_PROTONAT_PORT_MAX-1]); - } - - return ret; -} -EXPORT_SYMBOL_GPL(ip_nat_port_nfattr_to_range); -EXPORT_SYMBOL_GPL(ip_nat_port_range_to_nfattr); -#endif - -static int __init ip_nat_init(void) -{ - size_t i; - - /* Leave them the same for the moment. */ - ip_nat_htable_size = ip_conntrack_htable_size; - - /* One vmalloc for both hash tables */ - bysource = vmalloc(sizeof(struct list_head) * ip_nat_htable_size); - if (!bysource) - return -ENOMEM; - - /* Sew in builtin protocols. */ - write_lock_bh(&ip_nat_lock); - for (i = 0; i < MAX_IP_NAT_PROTO; i++) - ip_nat_protos[i] = &ip_nat_unknown_protocol; - ip_nat_protos[IPPROTO_TCP] = &ip_nat_protocol_tcp; - ip_nat_protos[IPPROTO_UDP] = &ip_nat_protocol_udp; - ip_nat_protos[IPPROTO_ICMP] = &ip_nat_protocol_icmp; - write_unlock_bh(&ip_nat_lock); - - for (i = 0; i < ip_nat_htable_size; i++) { - INIT_LIST_HEAD(&bysource[i]); - } - - /* FIXME: Man, this is a hack. <SIGH> */ - IP_NF_ASSERT(ip_conntrack_destroyed == NULL); - ip_conntrack_destroyed = &ip_nat_cleanup_conntrack; - - /* Initialize fake conntrack so that NAT will skip it */ - ip_conntrack_untracked.status |= IPS_NAT_DONE_MASK; - return 0; -} - -/* Clear NAT section of all conntracks, in case we're loaded again. */ -static int clean_nat(struct ip_conntrack *i, void *data) -{ - memset(&i->nat, 0, sizeof(i->nat)); - i->status &= ~(IPS_NAT_MASK | IPS_NAT_DONE_MASK | IPS_SEQ_ADJUST); - return 0; -} - -static void __exit ip_nat_cleanup(void) -{ - ip_ct_iterate_cleanup(&clean_nat, NULL); - ip_conntrack_destroyed = NULL; - vfree(bysource); -} - -MODULE_LICENSE("GPL"); - -module_init(ip_nat_init); -module_exit(ip_nat_cleanup); diff --git a/net/ipv4/netfilter/ip_nat_ftp.c b/net/ipv4/netfilter/ip_nat_ftp.c deleted file mode 100644 index b8daab3c64a..00000000000 --- a/net/ipv4/netfilter/ip_nat_ftp.c +++ /dev/null @@ -1,181 +0,0 @@ -/* FTP extension for TCP NAT alteration. */ - -/* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/module.h> -#include <linux/netfilter_ipv4.h> -#include <linux/ip.h> -#include <linux/tcp.h> -#include <linux/moduleparam.h> -#include <net/tcp.h> -#include <linux/netfilter_ipv4/ip_nat.h> -#include <linux/netfilter_ipv4/ip_nat_helper.h> -#include <linux/netfilter_ipv4/ip_nat_rule.h> -#include <linux/netfilter_ipv4/ip_conntrack_ftp.h> -#include <linux/netfilter_ipv4/ip_conntrack_helper.h> - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>"); -MODULE_DESCRIPTION("ftp NAT helper"); - -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - -/* FIXME: Time out? --RR */ - -static int -mangle_rfc959_packet(struct sk_buff **pskb, - u_int32_t newip, - u_int16_t port, - unsigned int matchoff, - unsigned int matchlen, - struct ip_conntrack *ct, - enum ip_conntrack_info ctinfo, - u32 *seq) -{ - char buffer[sizeof("nnn,nnn,nnn,nnn,nnn,nnn")]; - - sprintf(buffer, "%u,%u,%u,%u,%u,%u", - NIPQUAD(newip), port>>8, port&0xFF); - - DEBUGP("calling ip_nat_mangle_tcp_packet\n"); - - *seq += strlen(buffer) - matchlen; - return ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff, - matchlen, buffer, strlen(buffer)); -} - -/* |1|132.235.1.2|6275| */ -static int -mangle_eprt_packet(struct sk_buff **pskb, - u_int32_t newip, - u_int16_t port, - unsigned int matchoff, - unsigned int matchlen, - struct ip_conntrack *ct, - enum ip_conntrack_info ctinfo, - u32 *seq) -{ - char buffer[sizeof("|1|255.255.255.255|65535|")]; - - sprintf(buffer, "|1|%u.%u.%u.%u|%u|", NIPQUAD(newip), port); - - DEBUGP("calling ip_nat_mangle_tcp_packet\n"); - - *seq += strlen(buffer) - matchlen; - return ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff, - matchlen, buffer, strlen(buffer)); -} - -/* |1|132.235.1.2|6275| */ -static int -mangle_epsv_packet(struct sk_buff **pskb, - u_int32_t newip, - u_int16_t port, - unsigned int matchoff, - unsigned int matchlen, - struct ip_conntrack *ct, - enum ip_conntrack_info ctinfo, - u32 *seq) -{ - char buffer[sizeof("|||65535|")]; - - sprintf(buffer, "|||%u|", port); - - DEBUGP("calling ip_nat_mangle_tcp_packet\n"); - - *seq += strlen(buffer) - matchlen; - return ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, matchoff, - matchlen, buffer, strlen(buffer)); -} - -static int (*mangle[])(struct sk_buff **, u_int32_t, u_int16_t, - unsigned int, - unsigned int, - struct ip_conntrack *, - enum ip_conntrack_info, - u32 *seq) -= { [IP_CT_FTP_PORT] = mangle_rfc959_packet, - [IP_CT_FTP_PASV] = mangle_rfc959_packet, - [IP_CT_FTP_EPRT] = mangle_eprt_packet, - [IP_CT_FTP_EPSV] = mangle_epsv_packet -}; - -/* So, this packet has hit the connection tracking matching code. - Mangle it, and change the expectation to match the new version. */ -static unsigned int ip_nat_ftp(struct sk_buff **pskb, - enum ip_conntrack_info ctinfo, - enum ip_ct_ftp_type type, - unsigned int matchoff, - unsigned int matchlen, - struct ip_conntrack_expect *exp, - u32 *seq) -{ - u_int32_t newip; - u_int16_t port; - int dir = CTINFO2DIR(ctinfo); - struct ip_conntrack *ct = exp->master; - - DEBUGP("FTP_NAT: type %i, off %u len %u\n", type, matchoff, matchlen); - - /* Connection will come from wherever this packet goes, hence !dir */ - newip = ct->tuplehash[!dir].tuple.dst.ip; - exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port; - exp->dir = !dir; - - /* When you see the packet, we need to NAT it the same as the - * this one. */ - exp->expectfn = ip_nat_follow_master; - - /* Try to get same port: if not, try to change it. */ - for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) { - exp->tuple.dst.u.tcp.port = htons(port); - if (ip_conntrack_expect_related(exp) == 0) - break; - } - - if (port == 0) - return NF_DROP; - - if (!mangle[type](pskb, newip, port, matchoff, matchlen, ct, ctinfo, - seq)) { - ip_conntrack_unexpect_related(exp); - return NF_DROP; - } - return NF_ACCEPT; -} - -static void __exit fini(void) -{ - ip_nat_ftp_hook = NULL; - /* Make sure noone calls it, meanwhile. */ - synchronize_net(); -} - -static int __init init(void) -{ - BUG_ON(ip_nat_ftp_hook); - ip_nat_ftp_hook = ip_nat_ftp; - return 0; -} - -/* Prior to 2.6.11, we had a ports param. No longer, but don't break users. */ -static int warn_set(const char *val, struct kernel_param *kp) -{ - printk(KERN_INFO KBUILD_MODNAME - ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n"); - return 0; -} -module_param_call(ports, warn_set, NULL, NULL, 0); - -module_init(init); -module_exit(fini); diff --git a/net/ipv4/netfilter/ip_nat_helper.c b/net/ipv4/netfilter/ip_nat_helper.c deleted file mode 100644 index 5d506e0564d..00000000000 --- a/net/ipv4/netfilter/ip_nat_helper.c +++ /dev/null @@ -1,431 +0,0 @@ -/* ip_nat_helper.c - generic support functions for NAT helpers - * - * (C) 2000-2002 Harald Welte <laforge@netfilter.org> - * (C) 2003-2004 Netfilter Core Team <coreteam@netfilter.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * 14 Jan 2002 Harald Welte <laforge@gnumonks.org>: - * - add support for SACK adjustment - * 14 Mar 2002 Harald Welte <laforge@gnumonks.org>: - * - merge SACK support into newnat API - * 16 Aug 2002 Brian J. Murrell <netfilter@interlinx.bc.ca>: - * - make ip_nat_resize_packet more generic (TCP and UDP) - * - add ip_nat_mangle_udp_packet - */ -#include <linux/config.h> -#include <linux/module.h> -#include <linux/kmod.h> -#include <linux/types.h> -#include <linux/timer.h> -#include <linux/skbuff.h> -#include <linux/netfilter_ipv4.h> -#include <net/checksum.h> -#include <net/icmp.h> -#include <net/ip.h> -#include <net/tcp.h> -#include <net/udp.h> - -#define ASSERT_READ_LOCK(x) -#define ASSERT_WRITE_LOCK(x) - -#include <linux/netfilter_ipv4/ip_conntrack.h> -#include <linux/netfilter_ipv4/ip_conntrack_helper.h> -#include <linux/netfilter_ipv4/ip_nat.h> -#include <linux/netfilter_ipv4/ip_nat_protocol.h> -#include <linux/netfilter_ipv4/ip_nat_core.h> -#include <linux/netfilter_ipv4/ip_nat_helper.h> -#include <linux/netfilter_ipv4/listhelp.h> - -#if 0 -#define DEBUGP printk -#define DUMP_OFFSET(x) printk("offset_before=%d, offset_after=%d, correction_pos=%u\n", x->offset_before, x->offset_after, x->correction_pos); -#else -#define DEBUGP(format, args...) -#define DUMP_OFFSET(x) -#endif - -static DEFINE_SPINLOCK(ip_nat_seqofs_lock); - -/* Setup TCP sequence correction given this change at this sequence */ -static inline void -adjust_tcp_sequence(u32 seq, - int sizediff, - struct ip_conntrack *ct, - enum ip_conntrack_info ctinfo) -{ - int dir; - struct ip_nat_seq *this_way, *other_way; - - DEBUGP("ip_nat_resize_packet: old_size = %u, new_size = %u\n", - (*skb)->len, new_size); - - dir = CTINFO2DIR(ctinfo); - - this_way = &ct->nat.info.seq[dir]; - other_way = &ct->nat.info.seq[!dir]; - - DEBUGP("ip_nat_resize_packet: Seq_offset before: "); - DUMP_OFFSET(this_way); - - spin_lock_bh(&ip_nat_seqofs_lock); - - /* SYN adjust. If it's uninitialized, or this is after last - * correction, record it: we don't handle more than one - * adjustment in the window, but do deal with common case of a - * retransmit */ - if (this_way->offset_before == this_way->offset_after - || before(this_way->correction_pos, seq)) { - this_way->correction_pos = seq; - this_way->offset_before = this_way->offset_after; - this_way->offset_after += sizediff; - } - spin_unlock_bh(&ip_nat_seqofs_lock); - - DEBUGP("ip_nat_resize_packet: Seq_offset after: "); - DUMP_OFFSET(this_way); -} - -/* Frobs data inside this packet, which is linear. */ -static void mangle_contents(struct sk_buff *skb, - unsigned int dataoff, - unsigned int match_offset, - unsigned int match_len, - const char *rep_buffer, - unsigned int rep_len) -{ - unsigned char *data; - - BUG_ON(skb_is_nonlinear(skb)); - data = (unsigned char *)skb->nh.iph + dataoff; - - /* move post-replacement */ - memmove(data + match_offset + rep_len, - data + match_offset + match_len, - skb->tail - (data + match_offset + match_len)); - - /* insert data from buffer */ - memcpy(data + match_offset, rep_buffer, rep_len); - - /* update skb info */ - if (rep_len > match_len) { - DEBUGP("ip_nat_mangle_packet: Extending packet by " - "%u from %u bytes\n", rep_len - match_len, - skb->len); - skb_put(skb, rep_len - match_len); - } else { - DEBUGP("ip_nat_mangle_packet: Shrinking packet from " - "%u from %u bytes\n", match_len - rep_len, - skb->len); - __skb_trim(skb, skb->len + rep_len - match_len); - } - - /* fix IP hdr checksum information */ - skb->nh.iph->tot_len = htons(skb->len); - ip_send_check(skb->nh.iph); -} - -/* Unusual, but possible case. */ -static int enlarge_skb(struct sk_buff **pskb, unsigned int extra) -{ - struct sk_buff *nskb; - - if ((*pskb)->len + extra > 65535) - return 0; - - nskb = skb_copy_expand(*pskb, skb_headroom(*pskb), extra, GFP_ATOMIC); - if (!nskb) - return 0; - - /* Transfer socket to new skb. */ - if ((*pskb)->sk) - skb_set_owner_w(nskb, (*pskb)->sk); - kfree_skb(*pskb); - *pskb = nskb; - return 1; -} - -/* Generic function for mangling variable-length address changes inside - * NATed TCP connections (like the PORT XXX,XXX,XXX,XXX,XXX,XXX - * command in FTP). - * - * Takes care about all the nasty sequence number changes, checksumming, - * skb enlargement, ... - * - * */ -int -ip_nat_mangle_tcp_packet(struct sk_buff **pskb, - struct ip_conntrack *ct, - enum ip_conntrack_info ctinfo, - unsigned int match_offset, - unsigned int match_len, - const char *rep_buffer, - unsigned int rep_len) -{ - struct iphdr *iph; - struct tcphdr *tcph; - int datalen; - - if (!skb_make_writable(pskb, (*pskb)->len)) - return 0; - - if (rep_len > match_len - && rep_len - match_len > skb_tailroom(*pskb) - && !enlarge_skb(pskb, rep_len - match_len)) - return 0; - - SKB_LINEAR_ASSERT(*pskb); - - iph = (*pskb)->nh.iph; - tcph = (void *)iph + iph->ihl*4; - - mangle_contents(*pskb, iph->ihl*4 + tcph->doff*4, - match_offset, match_len, rep_buffer, rep_len); - - datalen = (*pskb)->len - iph->ihl*4; - tcph->check = 0; - tcph->check = tcp_v4_check(tcph, datalen, iph->saddr, iph->daddr, - csum_partial((char *)tcph, datalen, 0)); - - if (rep_len != match_len) { - set_bit(IPS_SEQ_ADJUST_BIT, &ct->status); - adjust_tcp_sequence(ntohl(tcph->seq), - (int)rep_len - (int)match_len, - ct, ctinfo); - /* Tell TCP window tracking about seq change */ - ip_conntrack_tcp_update(*pskb, ct, CTINFO2DIR(ctinfo)); - } - return 1; -} -EXPORT_SYMBOL(ip_nat_mangle_tcp_packet); - -/* Generic function for mangling variable-length address changes inside - * NATed UDP connections (like the CONNECT DATA XXXXX MESG XXXXX INDEX XXXXX - * command in the Amanda protocol) - * - * Takes care about all the nasty sequence number changes, checksumming, - * skb enlargement, ... - * - * XXX - This function could be merged with ip_nat_mangle_tcp_packet which - * should be fairly easy to do. - */ -int -ip_nat_mangle_udp_packet(struct sk_buff **pskb, - struct ip_conntrack *ct, - enum ip_conntrack_info ctinfo, - unsigned int match_offset, - unsigned int match_len, - const char *rep_buffer, - unsigned int rep_len) -{ - struct iphdr *iph; - struct udphdr *udph; - - /* UDP helpers might accidentally mangle the wrong packet */ - iph = (*pskb)->nh.iph; - if ((*pskb)->len < iph->ihl*4 + sizeof(*udph) + - match_offset + match_len) - return 0; - - if (!skb_make_writable(pskb, (*pskb)->len)) - return 0; - - if (rep_len > match_len - && rep_len - match_len > skb_tailroom(*pskb) - && !enlarge_skb(pskb, rep_len - match_len)) - return 0; - - iph = (*pskb)->nh.iph; - udph = (void *)iph + iph->ihl*4; - mangle_contents(*pskb, iph->ihl*4 + sizeof(*udph), - match_offset, match_len, rep_buffer, rep_len); - - /* update the length of the UDP packet */ - udph->len = htons((*pskb)->len - iph->ihl*4); - - /* fix udp checksum if udp checksum was previously calculated */ - if (udph->check) { - int datalen = (*pskb)->len - iph->ihl * 4; - udph->check = 0; - udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr, - datalen, IPPROTO_UDP, - csum_partial((char *)udph, - datalen, 0)); - } - - return 1; -} -EXPORT_SYMBOL(ip_nat_mangle_udp_packet); - -/* Adjust one found SACK option including checksum correction */ -static void -sack_adjust(struct sk_buff *skb, - struct tcphdr *tcph, - unsigned int sackoff, - unsigned int sackend, - struct ip_nat_seq *natseq) -{ - while (sackoff < sackend) { - struct tcp_sack_block *sack; - u_int32_t new_start_seq, new_end_seq; - - sack = (void *)skb->data + sackoff; - if (after(ntohl(sack->start_seq) - natseq->offset_before, - natseq->correction_pos)) - new_start_seq = ntohl(sack->start_seq) - - natseq->offset_after; - else - new_start_seq = ntohl(sack->start_seq) - - natseq->offset_before; - new_start_seq = htonl(new_start_seq); - - if (after(ntohl(sack->end_seq) - natseq->offset_before, - natseq->correction_pos)) - new_end_seq = ntohl(sack->end_seq) - - natseq->offset_after; - else - new_end_seq = ntohl(sack->end_seq) - - natseq->offset_before; - new_end_seq = htonl(new_end_seq); - - DEBUGP("sack_adjust: start_seq: %d->%d, end_seq: %d->%d\n", - ntohl(sack->start_seq), new_start_seq, - ntohl(sack->end_seq), new_end_seq); - - tcph->check = - ip_nat_cheat_check(~sack->start_seq, new_start_seq, - ip_nat_cheat_check(~sack->end_seq, - new_end_seq, - tcph->check)); - sack->start_seq = new_start_seq; - sack->end_seq = new_end_seq; - sackoff += sizeof(*sack); - } -} - -/* TCP SACK sequence number adjustment */ -static inline unsigned int -ip_nat_sack_adjust(struct sk_buff **pskb, - struct tcphdr *tcph, - struct ip_conntrack *ct, - enum ip_conntrack_info ctinfo) -{ - unsigned int dir, optoff, optend; - - optoff = (*pskb)->nh.iph->ihl*4 + sizeof(struct tcphdr); - optend = (*pskb)->nh.iph->ihl*4 + tcph->doff*4; - - if (!skb_make_writable(pskb, optend)) - return 0; - - dir = CTINFO2DIR(ctinfo); - - while (optoff < optend) { - /* Usually: option, length. */ - unsigned char *op = (*pskb)->data + optoff; - - switch (op[0]) { - case TCPOPT_EOL: - return 1; - case TCPOPT_NOP: - optoff++; - continue; - default: - /* no partial options */ - if (optoff + 1 == optend - || optoff + op[1] > optend - || op[1] < 2) - return 0; - if (op[0] == TCPOPT_SACK - && op[1] >= 2+TCPOLEN_SACK_PERBLOCK - && ((op[1] - 2) % TCPOLEN_SACK_PERBLOCK) == 0) - sack_adjust(*pskb, tcph, optoff+2, - optoff+op[1], - &ct->nat.info.seq[!dir]); - optoff += op[1]; - } - } - return 1; -} - -/* TCP sequence number adjustment. Returns 1 on success, 0 on failure */ -int -ip_nat_seq_adjust(struct sk_buff **pskb, - struct ip_conntrack *ct, - enum ip_conntrack_info ctinfo) -{ - struct tcphdr *tcph; - int dir, newseq, newack; - struct ip_nat_seq *this_way, *other_way; - - dir = CTINFO2DIR(ctinfo); - - this_way = &ct->nat.info.seq[dir]; - other_way = &ct->nat.info.seq[!dir]; - - if (!skb_make_writable(pskb, (*pskb)->nh.iph->ihl*4+sizeof(*tcph))) - return 0; - - tcph = (void *)(*pskb)->data + (*pskb)->nh.iph->ihl*4; - if (after(ntohl(tcph->seq), this_way->correction_pos)) - newseq = ntohl(tcph->seq) + this_way->offset_after; - else - newseq = ntohl(tcph->seq) + this_way->offset_before; - newseq = htonl(newseq); - - if (after(ntohl(tcph->ack_seq) - other_way->offset_before, - other_way->correction_pos)) - newack = ntohl(tcph->ack_seq) - other_way->offset_after; - else - newack = ntohl(tcph->ack_seq) - other_way->offset_before; - newack = htonl(newack); - - tcph->check = ip_nat_cheat_check(~tcph->seq, newseq, - ip_nat_cheat_check(~tcph->ack_seq, - newack, - tcph->check)); - - DEBUGP("Adjusting sequence number from %u->%u, ack from %u->%u\n", - ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq), - ntohl(newack)); - - tcph->seq = newseq; - tcph->ack_seq = newack; - - if (!ip_nat_sack_adjust(pskb, tcph, ct, ctinfo)) - return 0; - - ip_conntrack_tcp_update(*pskb, ct, dir); - - return 1; -} -EXPORT_SYMBOL(ip_nat_seq_adjust); - -/* Setup NAT on this expected conntrack so it follows master. */ -/* If we fail to get a free NAT slot, we'll get dropped on confirm */ -void ip_nat_follow_master(struct ip_conntrack *ct, - struct ip_conntrack_expect *exp) -{ - struct ip_nat_range range; - - /* This must be a fresh one. */ - BUG_ON(ct->status & IPS_NAT_DONE_MASK); - - /* Change src to where master sends to */ - range.flags = IP_NAT_RANGE_MAP_IPS; - range.min_ip = range.max_ip - = ct->master->tuplehash[!exp->dir].tuple.dst.ip; - /* hook doesn't matter, but it has to do source manip */ - ip_nat_setup_info(ct, &range, NF_IP_POST_ROUTING); - - /* For DST manip, map port here to where it's expected. */ - range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED); - range.min = range.max = exp->saved_proto; - range.min_ip = range.max_ip - = ct->master->tuplehash[!exp->dir].tuple.src.ip; - /* hook doesn't matter, but it has to do destination manip */ - ip_nat_setup_info(ct, &range, NF_IP_PRE_ROUTING); -} -EXPORT_SYMBOL(ip_nat_follow_master); diff --git a/net/ipv4/netfilter/ip_nat_helper_pptp.c b/net/ipv4/netfilter/ip_nat_helper_pptp.c deleted file mode 100644 index ac004895781..00000000000 --- a/net/ipv4/netfilter/ip_nat_helper_pptp.c +++ /dev/null @@ -1,414 +0,0 @@ -/* - * ip_nat_pptp.c - Version 3.0 - * - * NAT support for PPTP (Point to Point Tunneling Protocol). - * PPTP is a a protocol for creating virtual private networks. - * It is a specification defined by Microsoft and some vendors - * working with Microsoft. PPTP is built on top of a modified - * version of the Internet Generic Routing Encapsulation Protocol. - * GRE is defined in RFC 1701 and RFC 1702. Documentation of - * PPTP can be found in RFC 2637 - * - * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org> - * - * Development of this code funded by Astaro AG (http://www.astaro.com/) - * - * TODO: - NAT to a unique tuple, not to TCP source port - * (needs netfilter tuple reservation) - * - * Changes: - * 2002-02-10 - Version 1.3 - * - Use ip_nat_mangle_tcp_packet() because of cloned skb's - * in local connections (Philip Craig <philipc@snapgear.com>) - * - add checks for magicCookie and pptp version - * - make argument list of pptp_{out,in}bound_packet() shorter - * - move to C99 style initializers - * - print version number at module loadtime - * 2003-09-22 - Version 1.5 - * - use SNATed tcp sourceport as callid, since we get called before - * TCP header is mangled (Philip Craig <philipc@snapgear.com>) - * 2004-10-22 - Version 2.0 - * - kernel 2.6.x version - * 2005-06-10 - Version 3.0 - * - kernel >= 2.6.11 version, - * funded by Oxcoda NetBox Blue (http://www.netboxblue.com/) - * - */ - -#include <linux/config.h> -#include <linux/module.h> -#include <linux/ip.h> -#include <linux/tcp.h> -#include <net/tcp.h> - -#include <linux/netfilter_ipv4/ip_nat.h> -#include <linux/netfilter_ipv4/ip_nat_rule.h> -#include <linux/netfilter_ipv4/ip_nat_helper.h> -#include <linux/netfilter_ipv4/ip_nat_pptp.h> -#include <linux/netfilter_ipv4/ip_conntrack_core.h> -#include <linux/netfilter_ipv4/ip_conntrack_helper.h> -#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h> -#include <linux/netfilter_ipv4/ip_conntrack_pptp.h> - -#define IP_NAT_PPTP_VERSION "3.0" - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>"); -MODULE_DESCRIPTION("Netfilter NAT helper module for PPTP"); - - -#if 0 -extern const char *pptp_msg_name[]; -#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, \ - __FUNCTION__, ## args) -#else -#define DEBUGP(format, args...) -#endif - -static void pptp_nat_expected(struct ip_conntrack *ct, - struct ip_conntrack_expect *exp) -{ - struct ip_conntrack *master = ct->master; - struct ip_conntrack_expect *other_exp; - struct ip_conntrack_tuple t; - struct ip_ct_pptp_master *ct_pptp_info; - struct ip_nat_pptp *nat_pptp_info; - struct ip_nat_range range; - - ct_pptp_info = &master->help.ct_pptp_info; - nat_pptp_info = &master->nat.help.nat_pptp_info; - - /* And here goes the grand finale of corrosion... */ - - if (exp->dir == IP_CT_DIR_ORIGINAL) { - DEBUGP("we are PNS->PAC\n"); - /* therefore, build tuple for PAC->PNS */ - t.src.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip; - t.src.u.gre.key = htons(master->help.ct_pptp_info.pac_call_id); - t.dst.ip = master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip; - t.dst.u.gre.key = htons(master->help.ct_pptp_info.pns_call_id); - t.dst.protonum = IPPROTO_GRE; - } else { - DEBUGP("we are PAC->PNS\n"); - /* build tuple for PNS->PAC */ - t.src.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip; - t.src.u.gre.key = - htons(master->nat.help.nat_pptp_info.pns_call_id); - t.dst.ip = master->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip; - t.dst.u.gre.key = - htons(master->nat.help.nat_pptp_info.pac_call_id); - t.dst.protonum = IPPROTO_GRE; - } - - DEBUGP("trying to unexpect other dir: "); - DUMP_TUPLE(&t); - other_exp = ip_conntrack_expect_find(&t); - if (other_exp) { - ip_conntrack_unexpect_related(other_exp); - ip_conntrack_expect_put(other_exp); - DEBUGP("success\n"); - } else { - DEBUGP("not found!\n"); - } - - /* This must be a fresh one. */ - BUG_ON(ct->status & IPS_NAT_DONE_MASK); - - /* Change src to where master sends to */ - range.flags = IP_NAT_RANGE_MAP_IPS; - range.min_ip = range.max_ip - = ct->master->tuplehash[!exp->dir].tuple.dst.ip; - if (exp->dir == IP_CT_DIR_ORIGINAL) { - range.flags |= IP_NAT_RANGE_PROTO_SPECIFIED; - range.min = range.max = exp->saved_proto; - } - /* hook doesn't matter, but it has to do source manip */ - ip_nat_setup_info(ct, &range, NF_IP_POST_ROUTING); - - /* For DST manip, map port here to where it's expected. */ - range.flags = IP_NAT_RANGE_MAP_IPS; - range.min_ip = range.max_ip - = ct->master->tuplehash[!exp->dir].tuple.src.ip; - if (exp->dir == IP_CT_DIR_REPLY) { - range.flags |= IP_NAT_RANGE_PROTO_SPECIFIED; - range.min = range.max = exp->saved_proto; - } - /* hook doesn't matter, but it has to do destination manip */ - ip_nat_setup_info(ct, &range, NF_IP_PRE_ROUTING); -} - -/* outbound packets == from PNS to PAC */ -static int -pptp_outbound_pkt(struct sk_buff **pskb, - struct ip_conntrack *ct, - enum ip_conntrack_info ctinfo, - struct PptpControlHeader *ctlh, - union pptp_ctrl_union *pptpReq) - -{ - struct ip_ct_pptp_master *ct_pptp_info = &ct->help.ct_pptp_info; - struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info; - u_int16_t msg, new_callid; - unsigned int cid_off; - - new_callid = htons(ct_pptp_info->pns_call_id); - - switch (msg = ntohs(ctlh->messageType)) { - case PPTP_OUT_CALL_REQUEST: - cid_off = offsetof(union pptp_ctrl_union, ocreq.callID); - /* FIXME: ideally we would want to reserve a call ID - * here. current netfilter NAT core is not able to do - * this :( For now we use TCP source port. This breaks - * multiple calls within one control session */ - - /* save original call ID in nat_info */ - nat_pptp_info->pns_call_id = ct_pptp_info->pns_call_id; - - /* don't use tcph->source since we are at a DSTmanip - * hook (e.g. PREROUTING) and pkt is not mangled yet */ - new_callid = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port; - - /* save new call ID in ct info */ - ct_pptp_info->pns_call_id = ntohs(new_callid); - break; - case PPTP_IN_CALL_REPLY: - cid_off = offsetof(union pptp_ctrl_union, icreq.callID); - break; - case PPTP_CALL_CLEAR_REQUEST: - cid_off = offsetof(union pptp_ctrl_union, clrreq.callID); - break; - default: - DEBUGP("unknown outbound packet 0x%04x:%s\n", msg, - (msg <= PPTP_MSG_MAX)? - pptp_msg_name[msg]:pptp_msg_name[0]); - /* fall through */ - - case PPTP_SET_LINK_INFO: - /* only need to NAT in case PAC is behind NAT box */ - case PPTP_START_SESSION_REQUEST: - case PPTP_START_SESSION_REPLY: - case PPTP_STOP_SESSION_REQUEST: - case PPTP_STOP_SESSION_REPLY: - case PPTP_ECHO_REQUEST: - case PPTP_ECHO_REPLY: - /* no need to alter packet */ - return NF_ACCEPT; - } - - /* only OUT_CALL_REQUEST, IN_CALL_REPLY, CALL_CLEAR_REQUEST pass - * down to here */ - DEBUGP("altering call id from 0x%04x to 0x%04x\n", - ntohs(*(u_int16_t *)pptpReq + cid_off), ntohs(new_callid)); - - /* mangle packet */ - if (ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, - cid_off + sizeof(struct pptp_pkt_hdr) + - sizeof(struct PptpControlHeader), - sizeof(new_callid), (char *)&new_callid, - sizeof(new_callid)) == 0) - return NF_DROP; - - return NF_ACCEPT; -} - -static int -pptp_exp_gre(struct ip_conntrack_expect *expect_orig, - struct ip_conntrack_expect *expect_reply) -{ - struct ip_ct_pptp_master *ct_pptp_info = - &expect_orig->master->help.ct_pptp_info; - struct ip_nat_pptp *nat_pptp_info = - &expect_orig->master->nat.help.nat_pptp_info; - - struct ip_conntrack *ct = expect_orig->master; - - struct ip_conntrack_tuple inv_t; - struct ip_conntrack_tuple *orig_t, *reply_t; - - /* save original PAC call ID in nat_info */ - nat_pptp_info->pac_call_id = ct_pptp_info->pac_call_id; - - /* alter expectation */ - orig_t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; - reply_t = &ct->tuplehash[IP_CT_DIR_REPLY].tuple; - - /* alter expectation for PNS->PAC direction */ - invert_tuplepr(&inv_t, &expect_orig->tuple); - expect_orig->saved_proto.gre.key = htons(ct_pptp_info->pns_call_id); - expect_orig->tuple.src.u.gre.key = htons(nat_pptp_info->pns_call_id); - expect_orig->tuple.dst.u.gre.key = htons(ct_pptp_info->pac_call_id); - expect_orig->dir = IP_CT_DIR_ORIGINAL; - inv_t.src.ip = reply_t->src.ip; - inv_t.dst.ip = reply_t->dst.ip; - inv_t.src.u.gre.key = htons(nat_pptp_info->pac_call_id); - inv_t.dst.u.gre.key = htons(ct_pptp_info->pns_call_id); - - if (!ip_conntrack_expect_related(expect_orig)) { - DEBUGP("successfully registered expect\n"); - } else { - DEBUGP("can't expect_related(expect_orig)\n"); - return 1; - } - - /* alter expectation for PAC->PNS direction */ - invert_tuplepr(&inv_t, &expect_reply->tuple); - expect_reply->saved_proto.gre.key = htons(nat_pptp_info->pns_call_id); - expect_reply->tuple.src.u.gre.key = htons(nat_pptp_info->pac_call_id); - expect_reply->tuple.dst.u.gre.key = htons(ct_pptp_info->pns_call_id); - expect_reply->dir = IP_CT_DIR_REPLY; - inv_t.src.ip = orig_t->src.ip; - inv_t.dst.ip = orig_t->dst.ip; - inv_t.src.u.gre.key = htons(nat_pptp_info->pns_call_id); - inv_t.dst.u.gre.key = htons(ct_pptp_info->pac_call_id); - - if (!ip_conntrack_expect_related(expect_reply)) { - DEBUGP("successfully registered expect\n"); - } else { - DEBUGP("can't expect_related(expect_reply)\n"); - ip_conntrack_unexpect_related(expect_orig); - return 1; - } - - if (ip_ct_gre_keymap_add(ct, &expect_reply->tuple, 0) < 0) { - DEBUGP("can't register original keymap\n"); - ip_conntrack_unexpect_related(expect_orig); - ip_conntrack_unexpect_related(expect_reply); - return 1; - } - - if (ip_ct_gre_keymap_add(ct, &inv_t, 1) < 0) { - DEBUGP("can't register reply keymap\n"); - ip_conntrack_unexpect_related(expect_orig); - ip_conntrack_unexpect_related(expect_reply); - ip_ct_gre_keymap_destroy(ct); - return 1; - } - - return 0; -} - -/* inbound packets == from PAC to PNS */ -static int -pptp_inbound_pkt(struct sk_buff **pskb, - struct ip_conntrack *ct, - enum ip_conntrack_info ctinfo, - struct PptpControlHeader *ctlh, - union pptp_ctrl_union *pptpReq) -{ - struct ip_nat_pptp *nat_pptp_info = &ct->nat.help.nat_pptp_info; - u_int16_t msg, new_cid = 0, new_pcid; - unsigned int pcid_off, cid_off = 0; - - new_pcid = htons(nat_pptp_info->pns_call_id); - - switch (msg = ntohs(ctlh->messageType)) { - case PPTP_OUT_CALL_REPLY: - pcid_off = offsetof(union pptp_ctrl_union, ocack.peersCallID); - cid_off = offsetof(union pptp_ctrl_union, ocack.callID); - break; - case PPTP_IN_CALL_CONNECT: - pcid_off = offsetof(union pptp_ctrl_union, iccon.peersCallID); - break; - case PPTP_IN_CALL_REQUEST: - /* only need to nat in case PAC is behind NAT box */ - return NF_ACCEPT; - case PPTP_WAN_ERROR_NOTIFY: - pcid_off = offsetof(union pptp_ctrl_union, wanerr.peersCallID); - break; - case PPTP_CALL_DISCONNECT_NOTIFY: - pcid_off = offsetof(union pptp_ctrl_union, disc.callID); - break; - case PPTP_SET_LINK_INFO: - pcid_off = offsetof(union pptp_ctrl_union, setlink.peersCallID); - break; - - default: - DEBUGP("unknown inbound packet %s\n", (msg <= PPTP_MSG_MAX)? - pptp_msg_name[msg]:pptp_msg_name[0]); - /* fall through */ - - case PPTP_START_SESSION_REQUEST: - case PPTP_START_SESSION_REPLY: - case PPTP_STOP_SESSION_REQUEST: - case PPTP_STOP_SESSION_REPLY: - case PPTP_ECHO_REQUEST: - case PPTP_ECHO_REPLY: - /* no need to alter packet */ - return NF_ACCEPT; - } - - /* only OUT_CALL_REPLY, IN_CALL_CONNECT, IN_CALL_REQUEST, - * WAN_ERROR_NOTIFY, CALL_DISCONNECT_NOTIFY pass down here */ - - /* mangle packet */ - DEBUGP("altering peer call id from 0x%04x to 0x%04x\n", - ntohs(*(u_int16_t *)pptpReq + pcid_off), ntohs(new_pcid)); - - if (ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, - pcid_off + sizeof(struct pptp_pkt_hdr) + - sizeof(struct PptpControlHeader), - sizeof(new_pcid), (char *)&new_pcid, - sizeof(new_pcid)) == 0) - return NF_DROP; - - if (new_cid) { - DEBUGP("altering call id from 0x%04x to 0x%04x\n", - ntohs(*(u_int16_t *)pptpReq + cid_off), ntohs(new_cid)); - if (ip_nat_mangle_tcp_packet(pskb, ct, ctinfo, - cid_off + sizeof(struct pptp_pkt_hdr) + - sizeof(struct PptpControlHeader), - sizeof(new_cid), (char *)&new_cid, - sizeof(new_cid)) == 0) - return NF_DROP; - } - return NF_ACCEPT; -} - - -extern int __init ip_nat_proto_gre_init(void); -extern void __exit ip_nat_proto_gre_fini(void); - -static int __init init(void) -{ - int ret; - - DEBUGP("%s: registering NAT helper\n", __FILE__); - - ret = ip_nat_proto_gre_init(); - if (ret < 0) - return ret; - - BUG_ON(ip_nat_pptp_hook_outbound); - ip_nat_pptp_hook_outbound = &pptp_outbound_pkt; - - BUG_ON(ip_nat_pptp_hook_inbound); - ip_nat_pptp_hook_inbound = &pptp_inbound_pkt; - - BUG_ON(ip_nat_pptp_hook_exp_gre); - ip_nat_pptp_hook_exp_gre = &pptp_exp_gre; - - BUG_ON(ip_nat_pptp_hook_expectfn); - ip_nat_pptp_hook_expectfn = &pptp_nat_expected; - - printk("ip_nat_pptp version %s loaded\n", IP_NAT_PPTP_VERSION); - return 0; -} - -static void __exit fini(void) -{ - DEBUGP("cleanup_module\n" ); - - ip_nat_pptp_hook_expectfn = NULL; - ip_nat_pptp_hook_exp_gre = NULL; - ip_nat_pptp_hook_inbound = NULL; - ip_nat_pptp_hook_outbound = NULL; - - ip_nat_proto_gre_fini(); - /* Make sure noone calls it, meanwhile */ - synchronize_net(); - - printk("ip_nat_pptp version %s unloaded\n", IP_NAT_PPTP_VERSION); -} - -module_init(init); -module_exit(fini); diff --git a/net/ipv4/netfilter/ip_nat_irc.c b/net/ipv4/netfilter/ip_nat_irc.c deleted file mode 100644 index 461c833eaca..00000000000 --- a/net/ipv4/netfilter/ip_nat_irc.c +++ /dev/null @@ -1,123 +0,0 @@ -/* IRC extension for TCP NAT alteration. - * (C) 2000-2001 by Harald Welte <laforge@gnumonks.org> - * (C) 2004 Rusty Russell <rusty@rustcorp.com.au> IBM Corporation - * based on a copy of RR's ip_nat_ftp.c - * - * ip_nat_irc.c,v 1.16 2001/12/06 07:42:10 laforge Exp - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/module.h> -#include <linux/netfilter_ipv4.h> -#include <linux/ip.h> -#include <linux/tcp.h> -#include <linux/kernel.h> -#include <net/tcp.h> -#include <linux/netfilter_ipv4/ip_nat.h> -#include <linux/netfilter_ipv4/ip_nat_helper.h> -#include <linux/netfilter_ipv4/ip_nat_rule.h> -#include <linux/netfilter_ipv4/ip_conntrack_irc.h> -#include <linux/netfilter_ipv4/ip_conntrack_helper.h> -#include <linux/moduleparam.h> - -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - -MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>"); -MODULE_DESCRIPTION("IRC (DCC) NAT helper"); -MODULE_LICENSE("GPL"); - -static unsigned int help(struct sk_buff **pskb, - enum ip_conntrack_info ctinfo, - unsigned int matchoff, - unsigned int matchlen, - struct ip_conntrack_expect *exp) -{ - u_int16_t port; - unsigned int ret; - - /* "4294967296 65635 " */ - char buffer[18]; - - DEBUGP("IRC_NAT: info (seq %u + %u) in %u\n", - expect->seq, exp_irc_info->len, - ntohl(tcph->seq)); - - /* Reply comes from server. */ - exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port; - exp->dir = IP_CT_DIR_REPLY; - - /* When you see the packet, we need to NAT it the same as the - * this one. */ - exp->expectfn = ip_nat_follow_master; - - /* Try to get same port: if not, try to change it. */ - for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) { - exp->tuple.dst.u.tcp.port = htons(port); - if (ip_conntrack_expect_related(exp) == 0) - break; - } - - if (port == 0) - return NF_DROP; - - /* strlen("\1DCC CHAT chat AAAAAAAA P\1\n")=27 - * strlen("\1DCC SCHAT chat AAAAAAAA P\1\n")=28 - * strlen("\1DCC SEND F AAAAAAAA P S\1\n")=26 - * strlen("\1DCC MOVE F AAAAAAAA P S\1\n")=26 - * strlen("\1DCC TSEND F AAAAAAAA P S\1\n")=27 - * AAAAAAAAA: bound addr (1.0.0.0==16777216, min 8 digits, - * 255.255.255.255==4294967296, 10 digits) - * P: bound port (min 1 d, max 5d (65635)) - * F: filename (min 1 d ) - * S: size (min 1 d ) - * 0x01, \n: terminators - */ - - /* AAA = "us", ie. where server normally talks to. */ - sprintf(buffer, "%u %u", - ntohl(exp->master->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip), - port); - DEBUGP("ip_nat_irc: Inserting '%s' == %u.%u.%u.%u, port %u\n", - buffer, NIPQUAD(exp->tuple.src.ip), port); - - ret = ip_nat_mangle_tcp_packet(pskb, exp->master, ctinfo, - matchoff, matchlen, buffer, - strlen(buffer)); - if (ret != NF_ACCEPT) - ip_conntrack_unexpect_related(exp); - return ret; -} - -static void __exit fini(void) -{ - ip_nat_irc_hook = NULL; - /* Make sure noone calls it, meanwhile. */ - synchronize_net(); -} - -static int __init init(void) -{ - BUG_ON(ip_nat_irc_hook); - ip_nat_irc_hook = help; - return 0; -} - -/* Prior to 2.6.11, we had a ports param. No longer, but don't break users. */ -static int warn_set(const char *val, struct kernel_param *kp) -{ - printk(KERN_INFO KBUILD_MODNAME - ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n"); - return 0; -} -module_param_call(ports, warn_set, NULL, NULL, 0); - -module_init(init); -module_exit(fini); diff --git a/net/ipv4/netfilter/ip_nat_proto_gre.c b/net/ipv4/netfilter/ip_nat_proto_gre.c deleted file mode 100644 index 6c4899d8046..00000000000 --- a/net/ipv4/netfilter/ip_nat_proto_gre.c +++ /dev/null @@ -1,176 +0,0 @@ -/* - * ip_nat_proto_gre.c - Version 2.0 - * - * NAT protocol helper module for GRE. - * - * GRE is a generic encapsulation protocol, which is generally not very - * suited for NAT, as it has no protocol-specific part as port numbers. - * - * It has an optional key field, which may help us distinguishing two - * connections between the same two hosts. - * - * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784 - * - * PPTP is built on top of a modified version of GRE, and has a mandatory - * field called "CallID", which serves us for the same purpose as the key - * field in plain GRE. - * - * Documentation about PPTP can be found in RFC 2637 - * - * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org> - * - * Development of this code funded by Astaro AG (http://www.astaro.com/) - * - */ - -#include <linux/config.h> -#include <linux/module.h> -#include <linux/ip.h> -#include <linux/netfilter_ipv4/ip_nat.h> -#include <linux/netfilter_ipv4/ip_nat_rule.h> -#include <linux/netfilter_ipv4/ip_nat_protocol.h> -#include <linux/netfilter_ipv4/ip_conntrack_proto_gre.h> - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>"); -MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE"); - -#if 0 -#define DEBUGP(format, args...) printk(KERN_DEBUG "%s:%s: " format, __FILE__, \ - __FUNCTION__, ## args) -#else -#define DEBUGP(x, args...) -#endif - -/* is key in given range between min and max */ -static int -gre_in_range(const struct ip_conntrack_tuple *tuple, - enum ip_nat_manip_type maniptype, - const union ip_conntrack_manip_proto *min, - const union ip_conntrack_manip_proto *max) -{ - u_int32_t key; - - if (maniptype == IP_NAT_MANIP_SRC) - key = tuple->src.u.gre.key; - else - key = tuple->dst.u.gre.key; - - return ntohl(key) >= ntohl(min->gre.key) - && ntohl(key) <= ntohl(max->gre.key); -} - -/* generate unique tuple ... */ -static int -gre_unique_tuple(struct ip_conntrack_tuple *tuple, - const struct ip_nat_range *range, - enum ip_nat_manip_type maniptype, - const struct ip_conntrack *conntrack) -{ - static u_int16_t key; - u_int16_t *keyptr; - unsigned int min, i, range_size; - - if (maniptype == IP_NAT_MANIP_SRC) - keyptr = &tuple->src.u.gre.key; - else - keyptr = &tuple->dst.u.gre.key; - - if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) { - DEBUGP("%p: NATing GRE PPTP\n", conntrack); - min = 1; - range_size = 0xffff; - } else { - min = ntohl(range->min.gre.key); - range_size = ntohl(range->max.gre.key) - min + 1; - } - - DEBUGP("min = %u, range_size = %u\n", min, range_size); - - for (i = 0; i < range_size; i++, key++) { - *keyptr = htonl(min + key % range_size); - if (!ip_nat_used_tuple(tuple, conntrack)) - return 1; - } - - DEBUGP("%p: no NAT mapping\n", conntrack); - - return 0; -} - -/* manipulate a GRE packet according to maniptype */ -static int -gre_manip_pkt(struct sk_buff **pskb, - unsigned int iphdroff, - const struct ip_conntrack_tuple *tuple, - enum ip_nat_manip_type maniptype) -{ - struct gre_hdr *greh; - struct gre_hdr_pptp *pgreh; - struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff); - unsigned int hdroff = iphdroff + iph->ihl*4; - - /* pgreh includes two optional 32bit fields which are not required - * to be there. That's where the magic '8' comes from */ - if (!skb_make_writable(pskb, hdroff + sizeof(*pgreh)-8)) - return 0; - - greh = (void *)(*pskb)->data + hdroff; - pgreh = (struct gre_hdr_pptp *) greh; - - /* we only have destination manip of a packet, since 'source key' - * is not present in the packet itself */ - if (maniptype == IP_NAT_MANIP_DST) { - /* key manipulation is always dest */ - switch (greh->version) { - case 0: - if (!greh->key) { - DEBUGP("can't nat GRE w/o key\n"); - break; - } - if (greh->csum) { - /* FIXME: Never tested this code... */ - *(gre_csum(greh)) = - ip_nat_cheat_check(~*(gre_key(greh)), - tuple->dst.u.gre.key, - *(gre_csum(greh))); - } - *(gre_key(greh)) = tuple->dst.u.gre.key; - break; - case GRE_VERSION_PPTP: - DEBUGP("call_id -> 0x%04x\n", - ntohs(tuple->dst.u.gre.key)); - pgreh->call_id = tuple->dst.u.gre.key; - break; - default: - DEBUGP("can't nat unknown GRE version\n"); - return 0; - break; - } - } - return 1; -} - -/* nat helper struct */ -static struct ip_nat_protocol gre = { - .name = "GRE", - .protonum = IPPROTO_GRE, - .manip_pkt = gre_manip_pkt, - .in_range = gre_in_range, - .unique_tuple = gre_unique_tuple, -#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ - defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) - .range_to_nfattr = ip_nat_port_range_to_nfattr, - .nfattr_to_range = ip_nat_port_nfattr_to_range, -#endif -}; - -int __init ip_nat_proto_gre_init(void) -{ - return ip_nat_protocol_register(&gre); -} - -void __exit ip_nat_proto_gre_fini(void) -{ - ip_nat_protocol_unregister(&gre); -} diff --git a/net/ipv4/netfilter/ip_nat_proto_icmp.c b/net/ipv4/netfilter/ip_nat_proto_icmp.c deleted file mode 100644 index 31a3f4ccb99..00000000000 --- a/net/ipv4/netfilter/ip_nat_proto_icmp.c +++ /dev/null @@ -1,89 +0,0 @@ -/* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/types.h> -#include <linux/init.h> -#include <linux/netfilter.h> -#include <linux/ip.h> -#include <linux/icmp.h> -#include <linux/if.h> - -#include <linux/netfilter_ipv4/ip_nat.h> -#include <linux/netfilter_ipv4/ip_nat_core.h> -#include <linux/netfilter_ipv4/ip_nat_rule.h> -#include <linux/netfilter_ipv4/ip_nat_protocol.h> - -static int -icmp_in_range(const struct ip_conntrack_tuple *tuple, - enum ip_nat_manip_type maniptype, - const union ip_conntrack_manip_proto *min, - const union ip_conntrack_manip_proto *max) -{ - return (tuple->src.u.icmp.id >= min->icmp.id - && tuple->src.u.icmp.id <= max->icmp.id); -} - -static int -icmp_unique_tuple(struct ip_conntrack_tuple *tuple, - const struct ip_nat_range *range, - enum ip_nat_manip_type maniptype, - const struct ip_conntrack *conntrack) -{ - static u_int16_t id; - unsigned int range_size; - unsigned int i; - - range_size = ntohs(range->max.icmp.id) - ntohs(range->min.icmp.id) + 1; - /* If no range specified... */ - if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) - range_size = 0xFFFF; - - for (i = 0; i < range_size; i++, id++) { - tuple->src.u.icmp.id = htons(ntohs(range->min.icmp.id) + - (id % range_size)); - if (!ip_nat_used_tuple(tuple, conntrack)) - return 1; - } - return 0; -} - -static int -icmp_manip_pkt(struct sk_buff **pskb, - unsigned int iphdroff, - const struct ip_conntrack_tuple *tuple, - enum ip_nat_manip_type maniptype) -{ - struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff); - struct icmphdr *hdr; - unsigned int hdroff = iphdroff + iph->ihl*4; - - if (!skb_make_writable(pskb, hdroff + sizeof(*hdr))) - return 0; - - hdr = (struct icmphdr *)((*pskb)->data + hdroff); - - hdr->checksum = ip_nat_cheat_check(hdr->un.echo.id ^ 0xFFFF, - tuple->src.u.icmp.id, - hdr->checksum); - hdr->un.echo.id = tuple->src.u.icmp.id; - return 1; -} - -struct ip_nat_protocol ip_nat_protocol_icmp = { - .name = "ICMP", - .protonum = IPPROTO_ICMP, - .me = THIS_MODULE, - .manip_pkt = icmp_manip_pkt, - .in_range = icmp_in_range, - .unique_tuple = icmp_unique_tuple, -#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ - defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) - .range_to_nfattr = ip_nat_port_range_to_nfattr, - .nfattr_to_range = ip_nat_port_nfattr_to_range, -#endif -}; diff --git a/net/ipv4/netfilter/ip_nat_proto_tcp.c b/net/ipv4/netfilter/ip_nat_proto_tcp.c deleted file mode 100644 index a3d14079eba..00000000000 --- a/net/ipv4/netfilter/ip_nat_proto_tcp.c +++ /dev/null @@ -1,151 +0,0 @@ -/* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/types.h> -#include <linux/init.h> -#include <linux/netfilter.h> -#include <linux/ip.h> -#include <linux/tcp.h> -#include <linux/if.h> -#include <linux/netfilter/nfnetlink_conntrack.h> -#include <linux/netfilter_ipv4/ip_nat.h> -#include <linux/netfilter_ipv4/ip_nat_rule.h> -#include <linux/netfilter_ipv4/ip_nat_protocol.h> -#include <linux/netfilter_ipv4/ip_nat_core.h> - -static int -tcp_in_range(const struct ip_conntrack_tuple *tuple, - enum ip_nat_manip_type maniptype, - const union ip_conntrack_manip_proto *min, - const union ip_conntrack_manip_proto *max) -{ - u_int16_t port; - - if (maniptype == IP_NAT_MANIP_SRC) - port = tuple->src.u.tcp.port; - else - port = tuple->dst.u.tcp.port; - - return ntohs(port) >= ntohs(min->tcp.port) - && ntohs(port) <= ntohs(max->tcp.port); -} - -static int -tcp_unique_tuple(struct ip_conntrack_tuple *tuple, - const struct ip_nat_range *range, - enum ip_nat_manip_type maniptype, - const struct ip_conntrack *conntrack) -{ - static u_int16_t port; - u_int16_t *portptr; - unsigned int range_size, min, i; - - if (maniptype == IP_NAT_MANIP_SRC) - portptr = &tuple->src.u.tcp.port; - else - portptr = &tuple->dst.u.tcp.port; - - /* If no range specified... */ - if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) { - /* If it's dst rewrite, can't change port */ - if (maniptype == IP_NAT_MANIP_DST) - return 0; - - /* Map privileged onto privileged. */ - if (ntohs(*portptr) < 1024) { - /* Loose convention: >> 512 is credential passing */ - if (ntohs(*portptr)<512) { - min = 1; - range_size = 511 - min + 1; - } else { - min = 600; - range_size = 1023 - min + 1; - } - } else { - min = 1024; - range_size = 65535 - 1024 + 1; - } - } else { - min = ntohs(range->min.tcp.port); - range_size = ntohs(range->max.tcp.port) - min + 1; - } - - for (i = 0; i < range_size; i++, port++) { - *portptr = htons(min + port % range_size); - if (!ip_nat_used_tuple(tuple, conntrack)) { - return 1; - } - } - return 0; -} - -static int -tcp_manip_pkt(struct sk_buff **pskb, - unsigned int iphdroff, - const struct ip_conntrack_tuple *tuple, - enum ip_nat_manip_type maniptype) -{ - struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff); - struct tcphdr *hdr; - unsigned int hdroff = iphdroff + iph->ihl*4; - u32 oldip, newip; - u16 *portptr, newport, oldport; - int hdrsize = 8; /* TCP connection tracking guarantees this much */ - - /* this could be a inner header returned in icmp packet; in such - cases we cannot update the checksum field since it is outside of - the 8 bytes of transport layer headers we are guaranteed */ - if ((*pskb)->len >= hdroff + sizeof(struct tcphdr)) - hdrsize = sizeof(struct tcphdr); - - if (!skb_make_writable(pskb, hdroff + hdrsize)) - return 0; - - iph = (struct iphdr *)((*pskb)->data + iphdroff); - hdr = (struct tcphdr *)((*pskb)->data + hdroff); - - if (maniptype == IP_NAT_MANIP_SRC) { - /* Get rid of src ip and src pt */ - oldip = iph->saddr; - newip = tuple->src.ip; - newport = tuple->src.u.tcp.port; - portptr = &hdr->source; - } else { - /* Get rid of dst ip and dst pt */ - oldip = iph->daddr; - newip = tuple->dst.ip; - newport = tuple->dst.u.tcp.port; - portptr = &hdr->dest; - } - - oldport = *portptr; - *portptr = newport; - - if (hdrsize < sizeof(*hdr)) - return 1; - - hdr->check = ip_nat_cheat_check(~oldip, newip, - ip_nat_cheat_check(oldport ^ 0xFFFF, - newport, - hdr->check)); - return 1; -} - -struct ip_nat_protocol ip_nat_protocol_tcp = { - .name = "TCP", - .protonum = IPPROTO_TCP, - .me = THIS_MODULE, - .manip_pkt = tcp_manip_pkt, - .in_range = tcp_in_range, - .unique_tuple = tcp_unique_tuple, -#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ - defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) - .range_to_nfattr = ip_nat_port_range_to_nfattr, - .nfattr_to_range = ip_nat_port_nfattr_to_range, -#endif -}; diff --git a/net/ipv4/netfilter/ip_nat_proto_udp.c b/net/ipv4/netfilter/ip_nat_proto_udp.c deleted file mode 100644 index ec6053fdc86..00000000000 --- a/net/ipv4/netfilter/ip_nat_proto_udp.c +++ /dev/null @@ -1,137 +0,0 @@ -/* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/types.h> -#include <linux/init.h> -#include <linux/netfilter.h> -#include <linux/ip.h> -#include <linux/udp.h> -#include <linux/if.h> - -#include <linux/netfilter_ipv4/ip_nat.h> -#include <linux/netfilter_ipv4/ip_nat_core.h> -#include <linux/netfilter_ipv4/ip_nat_rule.h> -#include <linux/netfilter_ipv4/ip_nat_protocol.h> - -static int -udp_in_range(const struct ip_conntrack_tuple *tuple, - enum ip_nat_manip_type maniptype, - const union ip_conntrack_manip_proto *min, - const union ip_conntrack_manip_proto *max) -{ - u_int16_t port; - - if (maniptype == IP_NAT_MANIP_SRC) - port = tuple->src.u.udp.port; - else - port = tuple->dst.u.udp.port; - - return ntohs(port) >= ntohs(min->udp.port) - && ntohs(port) <= ntohs(max->udp.port); -} - -static int -udp_unique_tuple(struct ip_conntrack_tuple *tuple, - const struct ip_nat_range *range, - enum ip_nat_manip_type maniptype, - const struct ip_conntrack *conntrack) -{ - static u_int16_t port; - u_int16_t *portptr; - unsigned int range_size, min, i; - - if (maniptype == IP_NAT_MANIP_SRC) - portptr = &tuple->src.u.udp.port; - else - portptr = &tuple->dst.u.udp.port; - - /* If no range specified... */ - if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) { - /* If it's dst rewrite, can't change port */ - if (maniptype == IP_NAT_MANIP_DST) - return 0; - - if (ntohs(*portptr) < 1024) { - /* Loose convention: >> 512 is credential passing */ - if (ntohs(*portptr)<512) { - min = 1; - range_size = 511 - min + 1; - } else { - min = 600; - range_size = 1023 - min + 1; - } - } else { - min = 1024; - range_size = 65535 - 1024 + 1; - } - } else { - min = ntohs(range->min.udp.port); - range_size = ntohs(range->max.udp.port) - min + 1; - } - - for (i = 0; i < range_size; i++, port++) { - *portptr = htons(min + port % range_size); - if (!ip_nat_used_tuple(tuple, conntrack)) - return 1; - } - return 0; -} - -static int -udp_manip_pkt(struct sk_buff **pskb, - unsigned int iphdroff, - const struct ip_conntrack_tuple *tuple, - enum ip_nat_manip_type maniptype) -{ - struct iphdr *iph = (struct iphdr *)((*pskb)->data + iphdroff); - struct udphdr *hdr; - unsigned int hdroff = iphdroff + iph->ihl*4; - u32 oldip, newip; - u16 *portptr, newport; - - if (!skb_make_writable(pskb, hdroff + sizeof(*hdr))) - return 0; - - iph = (struct iphdr *)((*pskb)->data + iphdroff); - hdr = (struct udphdr *)((*pskb)->data + hdroff); - - if (maniptype == IP_NAT_MANIP_SRC) { - /* Get rid of src ip and src pt */ - oldip = iph->saddr; - newip = tuple->src.ip; - newport = tuple->src.u.udp.port; - portptr = &hdr->source; - } else { - /* Get rid of dst ip and dst pt */ - oldip = iph->daddr; - newip = tuple->dst.ip; - newport = tuple->dst.u.udp.port; - portptr = &hdr->dest; - } - if (hdr->check) /* 0 is a special case meaning no checksum */ - hdr->check = ip_nat_cheat_check(~oldip, newip, - ip_nat_cheat_check(*portptr ^ 0xFFFF, - newport, - hdr->check)); - *portptr = newport; - return 1; -} - -struct ip_nat_protocol ip_nat_protocol_udp = { - .name = "UDP", - .protonum = IPPROTO_UDP, - .me = THIS_MODULE, - .manip_pkt = udp_manip_pkt, - .in_range = udp_in_range, - .unique_tuple = udp_unique_tuple, -#if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \ - defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE) - .range_to_nfattr = ip_nat_port_range_to_nfattr, - .nfattr_to_range = ip_nat_port_nfattr_to_range, -#endif -}; diff --git a/net/ipv4/netfilter/ip_nat_proto_unknown.c b/net/ipv4/netfilter/ip_nat_proto_unknown.c deleted file mode 100644 index 3bf04951724..00000000000 --- a/net/ipv4/netfilter/ip_nat_proto_unknown.c +++ /dev/null @@ -1,55 +0,0 @@ -/* The "unknown" protocol. This is what is used for protocols we - * don't understand. It's returned by ip_ct_find_proto(). - */ - -/* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/types.h> -#include <linux/init.h> -#include <linux/netfilter.h> -#include <linux/if.h> - -#include <linux/netfilter_ipv4/ip_nat.h> -#include <linux/netfilter_ipv4/ip_nat_rule.h> -#include <linux/netfilter_ipv4/ip_nat_protocol.h> - -static int unknown_in_range(const struct ip_conntrack_tuple *tuple, - enum ip_nat_manip_type manip_type, - const union ip_conntrack_manip_proto *min, - const union ip_conntrack_manip_proto *max) -{ - return 1; -} - -static int unknown_unique_tuple(struct ip_conntrack_tuple *tuple, - const struct ip_nat_range *range, - enum ip_nat_manip_type maniptype, - const struct ip_conntrack *conntrack) -{ - /* Sorry: we can't help you; if it's not unique, we can't frob - anything. */ - return 0; -} - -static int -unknown_manip_pkt(struct sk_buff **pskb, - unsigned int iphdroff, - const struct ip_conntrack_tuple *tuple, - enum ip_nat_manip_type maniptype) -{ - return 1; -} - -struct ip_nat_protocol ip_nat_unknown_protocol = { - .name = "unknown", - /* .me isn't set: getting a ref to this cannot fail. */ - .manip_pkt = unknown_manip_pkt, - .in_range = unknown_in_range, - .unique_tuple = unknown_unique_tuple, -}; diff --git a/net/ipv4/netfilter/ip_nat_rule.c b/net/ipv4/netfilter/ip_nat_rule.c deleted file mode 100644 index 1de86282d23..00000000000 --- a/net/ipv4/netfilter/ip_nat_rule.c +++ /dev/null @@ -1,341 +0,0 @@ -/* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -/* Everything about the rules for NAT. */ -#include <linux/types.h> -#include <linux/ip.h> -#include <linux/netfilter.h> -#include <linux/netfilter_ipv4.h> -#include <linux/module.h> -#include <linux/kmod.h> -#include <linux/skbuff.h> -#include <linux/proc_fs.h> -#include <net/checksum.h> -#include <net/route.h> -#include <linux/bitops.h> - -#define ASSERT_READ_LOCK(x) -#define ASSERT_WRITE_LOCK(x) - -#include <linux/netfilter_ipv4/ip_tables.h> -#include <linux/netfilter_ipv4/ip_nat.h> -#include <linux/netfilter_ipv4/ip_nat_core.h> -#include <linux/netfilter_ipv4/ip_nat_rule.h> -#include <linux/netfilter_ipv4/listhelp.h> - -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - -#define NAT_VALID_HOOKS ((1<<NF_IP_PRE_ROUTING) | (1<<NF_IP_POST_ROUTING) | (1<<NF_IP_LOCAL_OUT)) - -static struct -{ - struct ipt_replace repl; - struct ipt_standard entries[3]; - struct ipt_error term; -} nat_initial_table __initdata -= { { "nat", NAT_VALID_HOOKS, 4, - sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error), - { [NF_IP_PRE_ROUTING] = 0, - [NF_IP_POST_ROUTING] = sizeof(struct ipt_standard), - [NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) * 2 }, - { [NF_IP_PRE_ROUTING] = 0, - [NF_IP_POST_ROUTING] = sizeof(struct ipt_standard), - [NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) * 2 }, - 0, NULL, { } }, - { - /* PRE_ROUTING */ - { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 }, - 0, - sizeof(struct ipt_entry), - sizeof(struct ipt_standard), - 0, { 0, 0 }, { } }, - { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } }, - -NF_ACCEPT - 1 } }, - /* POST_ROUTING */ - { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 }, - 0, - sizeof(struct ipt_entry), - sizeof(struct ipt_standard), - 0, { 0, 0 }, { } }, - { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } }, - -NF_ACCEPT - 1 } }, - /* LOCAL_OUT */ - { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 }, - 0, - sizeof(struct ipt_entry), - sizeof(struct ipt_standard), - 0, { 0, 0 }, { } }, - { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } }, - -NF_ACCEPT - 1 } } - }, - /* ERROR */ - { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 }, - 0, - sizeof(struct ipt_entry), - sizeof(struct ipt_error), - 0, { 0, 0 }, { } }, - { { { { IPT_ALIGN(sizeof(struct ipt_error_target)), IPT_ERROR_TARGET } }, - { } }, - "ERROR" - } - } -}; - -static struct ipt_table nat_table = { - .name = "nat", - .valid_hooks = NAT_VALID_HOOKS, - .lock = RW_LOCK_UNLOCKED, - .me = THIS_MODULE, - .af = AF_INET, -}; - -/* Source NAT */ -static unsigned int ipt_snat_target(struct sk_buff **pskb, - const struct net_device *in, - const struct net_device *out, - unsigned int hooknum, - const void *targinfo, - void *userinfo) -{ - struct ip_conntrack *ct; - enum ip_conntrack_info ctinfo; - const struct ip_nat_multi_range_compat *mr = targinfo; - - IP_NF_ASSERT(hooknum == NF_IP_POST_ROUTING); - - ct = ip_conntrack_get(*pskb, &ctinfo); - - /* Connection must be valid and new. */ - IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED - || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)); - IP_NF_ASSERT(out); - - return ip_nat_setup_info(ct, &mr->range[0], hooknum); -} - -/* Before 2.6.11 we did implicit source NAT if required. Warn about change. */ -static void warn_if_extra_mangle(u32 dstip, u32 srcip) -{ - static int warned = 0; - struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dstip } } }; - struct rtable *rt; - - if (ip_route_output_key(&rt, &fl) != 0) - return; - - if (rt->rt_src != srcip && !warned) { - printk("NAT: no longer support implicit source local NAT\n"); - printk("NAT: packet src %u.%u.%u.%u -> dst %u.%u.%u.%u\n", - NIPQUAD(srcip), NIPQUAD(dstip)); - warned = 1; - } - ip_rt_put(rt); -} - -static unsigned int ipt_dnat_target(struct sk_buff **pskb, - const struct net_device *in, - const struct net_device *out, - unsigned int hooknum, - const void *targinfo, - void *userinfo) -{ - struct ip_conntrack *ct; - enum ip_conntrack_info ctinfo; - const struct ip_nat_multi_range_compat *mr = targinfo; - - IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING - || hooknum == NF_IP_LOCAL_OUT); - - ct = ip_conntrack_get(*pskb, &ctinfo); - - /* Connection must be valid and new. */ - IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)); - - if (hooknum == NF_IP_LOCAL_OUT - && mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) - warn_if_extra_mangle((*pskb)->nh.iph->daddr, - mr->range[0].min_ip); - - return ip_nat_setup_info(ct, &mr->range[0], hooknum); -} - -static int ipt_snat_checkentry(const char *tablename, - const void *entry, - void *targinfo, - unsigned int targinfosize, - unsigned int hook_mask) -{ - struct ip_nat_multi_range_compat *mr = targinfo; - - /* Must be a valid range */ - if (mr->rangesize != 1) { - printk("SNAT: multiple ranges no longer supported\n"); - return 0; - } - - if (targinfosize != IPT_ALIGN(sizeof(struct ip_nat_multi_range_compat))) { - DEBUGP("SNAT: Target size %u wrong for %u ranges\n", - targinfosize, mr->rangesize); - return 0; - } - - /* Only allow these for NAT. */ - if (strcmp(tablename, "nat") != 0) { - DEBUGP("SNAT: wrong table %s\n", tablename); - return 0; - } - - if (hook_mask & ~(1 << NF_IP_POST_ROUTING)) { - DEBUGP("SNAT: hook mask 0x%x bad\n", hook_mask); - return 0; - } - return 1; -} - -static int ipt_dnat_checkentry(const char *tablename, - const void *entry, - void *targinfo, - unsigned int targinfosize, - unsigned int hook_mask) -{ - struct ip_nat_multi_range_compat *mr = targinfo; - - /* Must be a valid range */ - if (mr->rangesize != 1) { - printk("DNAT: multiple ranges no longer supported\n"); - return 0; - } - - if (targinfosize != IPT_ALIGN(sizeof(struct ip_nat_multi_range_compat))) { - DEBUGP("DNAT: Target size %u wrong for %u ranges\n", - targinfosize, mr->rangesize); - return 0; - } - - /* Only allow these for NAT. */ - if (strcmp(tablename, "nat") != 0) { - DEBUGP("DNAT: wrong table %s\n", tablename); - return 0; - } - - if (hook_mask & ~((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_OUT))) { - DEBUGP("DNAT: hook mask 0x%x bad\n", hook_mask); - return 0; - } - - return 1; -} - -inline unsigned int -alloc_null_binding(struct ip_conntrack *conntrack, - struct ip_nat_info *info, - unsigned int hooknum) -{ - /* Force range to this IP; let proto decide mapping for - per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED). - Use reply in case it's already been mangled (eg local packet). - */ - u_int32_t ip - = (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC - ? conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip - : conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip); - struct ip_nat_range range - = { IP_NAT_RANGE_MAP_IPS, ip, ip, { 0 }, { 0 } }; - - DEBUGP("Allocating NULL binding for %p (%u.%u.%u.%u)\n", conntrack, - NIPQUAD(ip)); - return ip_nat_setup_info(conntrack, &range, hooknum); -} - -unsigned int -alloc_null_binding_confirmed(struct ip_conntrack *conntrack, - struct ip_nat_info *info, - unsigned int hooknum) -{ - u_int32_t ip - = (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC - ? conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip - : conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip); - u_int16_t all - = (HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC - ? conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.all - : conntrack->tuplehash[IP_CT_DIR_REPLY].tuple.src.u.all); - struct ip_nat_range range - = { IP_NAT_RANGE_MAP_IPS, ip, ip, { all }, { all } }; - - DEBUGP("Allocating NULL binding for confirmed %p (%u.%u.%u.%u)\n", - conntrack, NIPQUAD(ip)); - return ip_nat_setup_info(conntrack, &range, hooknum); -} - -int ip_nat_rule_find(struct sk_buff **pskb, - unsigned int hooknum, - const struct net_device *in, - const struct net_device *out, - struct ip_conntrack *ct, - struct ip_nat_info *info) -{ - int ret; - - ret = ipt_do_table(pskb, hooknum, in, out, &nat_table, NULL); - - if (ret == NF_ACCEPT) { - if (!ip_nat_initialized(ct, HOOK2MANIP(hooknum))) - /* NUL mapping */ - ret = alloc_null_binding(ct, info, hooknum); - } - return ret; -} - -static struct ipt_target ipt_snat_reg = { - .name = "SNAT", - .target = ipt_snat_target, - .checkentry = ipt_snat_checkentry, -}; - -static struct ipt_target ipt_dnat_reg = { - .name = "DNAT", - .target = ipt_dnat_target, - .checkentry = ipt_dnat_checkentry, -}; - -int __init ip_nat_rule_init(void) -{ - int ret; - - ret = ipt_register_table(&nat_table, &nat_initial_table.repl); - if (ret != 0) - return ret; - ret = ipt_register_target(&ipt_snat_reg); - if (ret != 0) - goto unregister_table; - - ret = ipt_register_target(&ipt_dnat_reg); - if (ret != 0) - goto unregister_snat; - - return ret; - - unregister_snat: - ipt_unregister_target(&ipt_snat_reg); - unregister_table: - ipt_unregister_table(&nat_table); - - return ret; -} - -void ip_nat_rule_cleanup(void) -{ - ipt_unregister_target(&ipt_dnat_reg); - ipt_unregister_target(&ipt_snat_reg); - ipt_unregister_table(&nat_table); -} diff --git a/net/ipv4/netfilter/ip_nat_standalone.c b/net/ipv4/netfilter/ip_nat_standalone.c deleted file mode 100644 index ad438fb185b..00000000000 --- a/net/ipv4/netfilter/ip_nat_standalone.c +++ /dev/null @@ -1,447 +0,0 @@ -/* This file contains all the functions required for the standalone - ip_nat module. - - These are not required by the compatibility layer. -*/ - -/* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -/* - * 23 Apr 2001: Harald Welte <laforge@gnumonks.org> - * - new API and handling of conntrack/nat helpers - * - now capable of multiple expectations for one master - * */ - -#include <linux/config.h> -#include <linux/types.h> -#include <linux/icmp.h> -#include <linux/ip.h> -#include <linux/netfilter.h> -#include <linux/netfilter_ipv4.h> -#include <linux/module.h> -#include <linux/skbuff.h> -#include <linux/proc_fs.h> -#include <net/ip.h> -#include <net/checksum.h> -#include <linux/spinlock.h> - -#define ASSERT_READ_LOCK(x) -#define ASSERT_WRITE_LOCK(x) - -#include <linux/netfilter_ipv4/ip_nat.h> -#include <linux/netfilter_ipv4/ip_nat_rule.h> -#include <linux/netfilter_ipv4/ip_nat_protocol.h> -#include <linux/netfilter_ipv4/ip_nat_core.h> -#include <linux/netfilter_ipv4/ip_nat_helper.h> -#include <linux/netfilter_ipv4/ip_tables.h> -#include <linux/netfilter_ipv4/ip_conntrack_core.h> -#include <linux/netfilter_ipv4/listhelp.h> - -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - -#define HOOKNAME(hooknum) ((hooknum) == NF_IP_POST_ROUTING ? "POST_ROUTING" \ - : ((hooknum) == NF_IP_PRE_ROUTING ? "PRE_ROUTING" \ - : ((hooknum) == NF_IP_LOCAL_OUT ? "LOCAL_OUT" \ - : ((hooknum) == NF_IP_LOCAL_IN ? "LOCAL_IN" \ - : "*ERROR*"))) - -#ifdef CONFIG_XFRM -static void nat_decode_session(struct sk_buff *skb, struct flowi *fl) -{ - struct ip_conntrack *ct; - struct ip_conntrack_tuple *t; - enum ip_conntrack_info ctinfo; - enum ip_conntrack_dir dir; - unsigned long statusbit; - - ct = ip_conntrack_get(skb, &ctinfo); - if (ct == NULL) - return; - dir = CTINFO2DIR(ctinfo); - t = &ct->tuplehash[dir].tuple; - - if (dir == IP_CT_DIR_ORIGINAL) - statusbit = IPS_DST_NAT; - else - statusbit = IPS_SRC_NAT; - - if (ct->status & statusbit) { - fl->fl4_dst = t->dst.ip; - if (t->dst.protonum == IPPROTO_TCP || - t->dst.protonum == IPPROTO_UDP) - fl->fl_ip_dport = t->dst.u.tcp.port; - } - - statusbit ^= IPS_NAT_MASK; - - if (ct->status & statusbit) { - fl->fl4_src = t->src.ip; - if (t->dst.protonum == IPPROTO_TCP || - t->dst.protonum == IPPROTO_UDP) - fl->fl_ip_sport = t->src.u.tcp.port; - } -} -#endif - -static unsigned int -ip_nat_fn(unsigned int hooknum, - struct sk_buff **pskb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - struct ip_conntrack *ct; - enum ip_conntrack_info ctinfo; - struct ip_nat_info *info; - /* maniptype == SRC for postrouting. */ - enum ip_nat_manip_type maniptype = HOOK2MANIP(hooknum); - - /* We never see fragments: conntrack defrags on pre-routing - and local-out, and ip_nat_out protects post-routing. */ - IP_NF_ASSERT(!((*pskb)->nh.iph->frag_off - & htons(IP_MF|IP_OFFSET))); - - /* If we had a hardware checksum before, it's now invalid */ - if ((*pskb)->ip_summed == CHECKSUM_HW) - if (skb_checksum_help(*pskb, (out == NULL))) - return NF_DROP; - - ct = ip_conntrack_get(*pskb, &ctinfo); - /* Can't track? It's not due to stress, or conntrack would - have dropped it. Hence it's the user's responsibilty to - packet filter it out, or implement conntrack/NAT for that - protocol. 8) --RR */ - if (!ct) { - /* Exception: ICMP redirect to new connection (not in - hash table yet). We must not let this through, in - case we're doing NAT to the same network. */ - if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) { - struct icmphdr _hdr, *hp; - - hp = skb_header_pointer(*pskb, - (*pskb)->nh.iph->ihl*4, - sizeof(_hdr), &_hdr); - if (hp != NULL && - hp->type == ICMP_REDIRECT) - return NF_DROP; - } - return NF_ACCEPT; - } - - /* Don't try to NAT if this packet is not conntracked */ - if (ct == &ip_conntrack_untracked) - return NF_ACCEPT; - - switch (ctinfo) { - case IP_CT_RELATED: - case IP_CT_RELATED+IP_CT_IS_REPLY: - if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP) { - if (!ip_nat_icmp_reply_translation(pskb, ct, maniptype, - CTINFO2DIR(ctinfo))) - return NF_DROP; - else - return NF_ACCEPT; - } - /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */ - case IP_CT_NEW: - info = &ct->nat.info; - - /* Seen it before? This can happen for loopback, retrans, - or local packets.. */ - if (!ip_nat_initialized(ct, maniptype)) { - unsigned int ret; - - if (unlikely(is_confirmed(ct))) - /* NAT module was loaded late */ - ret = alloc_null_binding_confirmed(ct, info, - hooknum); - else if (hooknum == NF_IP_LOCAL_IN) - /* LOCAL_IN hook doesn't have a chain! */ - ret = alloc_null_binding(ct, info, hooknum); - else - ret = ip_nat_rule_find(pskb, hooknum, - in, out, ct, - info); - - if (ret != NF_ACCEPT) { - return ret; - } - } else - DEBUGP("Already setup manip %s for ct %p\n", - maniptype == IP_NAT_MANIP_SRC ? "SRC" : "DST", - ct); - break; - - default: - /* ESTABLISHED */ - IP_NF_ASSERT(ctinfo == IP_CT_ESTABLISHED - || ctinfo == (IP_CT_ESTABLISHED+IP_CT_IS_REPLY)); - info = &ct->nat.info; - } - - IP_NF_ASSERT(info); - return ip_nat_packet(ct, ctinfo, hooknum, pskb); -} - -static unsigned int -ip_nat_in(unsigned int hooknum, - struct sk_buff **pskb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - struct ip_conntrack *ct; - enum ip_conntrack_info ctinfo; - unsigned int ret; - - ret = ip_nat_fn(hooknum, pskb, in, out, okfn); - if (ret != NF_DROP && ret != NF_STOLEN - && (ct = ip_conntrack_get(*pskb, &ctinfo)) != NULL) { - enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); - - if (ct->tuplehash[dir].tuple.src.ip != - ct->tuplehash[!dir].tuple.dst.ip) { - dst_release((*pskb)->dst); - (*pskb)->dst = NULL; - } - } - return ret; -} - -static unsigned int -ip_nat_out(unsigned int hooknum, - struct sk_buff **pskb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - struct ip_conntrack *ct; - enum ip_conntrack_info ctinfo; - unsigned int ret; - - /* root is playing with raw sockets. */ - if ((*pskb)->len < sizeof(struct iphdr) - || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) - return NF_ACCEPT; - - ret = ip_nat_fn(hooknum, pskb, in, out, okfn); - if (ret != NF_DROP && ret != NF_STOLEN - && (ct = ip_conntrack_get(*pskb, &ctinfo)) != NULL) { - enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); - - if (ct->tuplehash[dir].tuple.src.ip != - ct->tuplehash[!dir].tuple.dst.ip -#ifdef CONFIG_XFRM - || ct->tuplehash[dir].tuple.src.u.all != - ct->tuplehash[!dir].tuple.dst.u.all -#endif - ) - return ip_route_me_harder(pskb) == 0 ? ret : NF_DROP; - } - return ret; -} - -static unsigned int -ip_nat_local_fn(unsigned int hooknum, - struct sk_buff **pskb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - struct ip_conntrack *ct; - enum ip_conntrack_info ctinfo; - unsigned int ret; - - /* root is playing with raw sockets. */ - if ((*pskb)->len < sizeof(struct iphdr) - || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) - return NF_ACCEPT; - - ret = ip_nat_fn(hooknum, pskb, in, out, okfn); - if (ret != NF_DROP && ret != NF_STOLEN - && (ct = ip_conntrack_get(*pskb, &ctinfo)) != NULL) { - enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); - - if (ct->tuplehash[dir].tuple.dst.ip != - ct->tuplehash[!dir].tuple.src.ip -#ifdef CONFIG_XFRM - || ct->tuplehash[dir].tuple.dst.u.all != - ct->tuplehash[dir].tuple.src.u.all -#endif - ) - return ip_route_me_harder(pskb) == 0 ? ret : NF_DROP; - } - return ret; -} - -static unsigned int -ip_nat_adjust(unsigned int hooknum, - struct sk_buff **pskb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - struct ip_conntrack *ct; - enum ip_conntrack_info ctinfo; - - ct = ip_conntrack_get(*pskb, &ctinfo); - if (ct && test_bit(IPS_SEQ_ADJUST_BIT, &ct->status)) { - DEBUGP("ip_nat_standalone: adjusting sequence number\n"); - if (!ip_nat_seq_adjust(pskb, ct, ctinfo)) - return NF_DROP; - } - return NF_ACCEPT; -} - -/* We must be after connection tracking and before packet filtering. */ - -/* Before packet filtering, change destination */ -static struct nf_hook_ops ip_nat_in_ops = { - .hook = ip_nat_in, - .owner = THIS_MODULE, - .pf = PF_INET, - .hooknum = NF_IP_PRE_ROUTING, - .priority = NF_IP_PRI_NAT_DST, -}; - -/* After packet filtering, change source */ -static struct nf_hook_ops ip_nat_out_ops = { - .hook = ip_nat_out, - .owner = THIS_MODULE, - .pf = PF_INET, - .hooknum = NF_IP_POST_ROUTING, - .priority = NF_IP_PRI_NAT_SRC, -}; - -/* After conntrack, adjust sequence number */ -static struct nf_hook_ops ip_nat_adjust_out_ops = { - .hook = ip_nat_adjust, - .owner = THIS_MODULE, - .pf = PF_INET, - .hooknum = NF_IP_POST_ROUTING, - .priority = NF_IP_PRI_NAT_SEQ_ADJUST, -}; - -/* Before packet filtering, change destination */ -static struct nf_hook_ops ip_nat_local_out_ops = { - .hook = ip_nat_local_fn, - .owner = THIS_MODULE, - .pf = PF_INET, - .hooknum = NF_IP_LOCAL_OUT, - .priority = NF_IP_PRI_NAT_DST, -}; - -/* After packet filtering, change source for reply packets of LOCAL_OUT DNAT */ -static struct nf_hook_ops ip_nat_local_in_ops = { - .hook = ip_nat_fn, - .owner = THIS_MODULE, - .pf = PF_INET, - .hooknum = NF_IP_LOCAL_IN, - .priority = NF_IP_PRI_NAT_SRC, -}; - -/* After conntrack, adjust sequence number */ -static struct nf_hook_ops ip_nat_adjust_in_ops = { - .hook = ip_nat_adjust, - .owner = THIS_MODULE, - .pf = PF_INET, - .hooknum = NF_IP_LOCAL_IN, - .priority = NF_IP_PRI_NAT_SEQ_ADJUST, -}; - - -static int init_or_cleanup(int init) -{ - int ret = 0; - - need_conntrack(); - - if (!init) goto cleanup; - -#ifdef CONFIG_XFRM - BUG_ON(ip_nat_decode_session != NULL); - ip_nat_decode_session = nat_decode_session; -#endif - ret = ip_nat_rule_init(); - if (ret < 0) { - printk("ip_nat_init: can't setup rules.\n"); - goto cleanup_decode_session; - } - ret = nf_register_hook(&ip_nat_in_ops); - if (ret < 0) { - printk("ip_nat_init: can't register in hook.\n"); - goto cleanup_rule_init; - } - ret = nf_register_hook(&ip_nat_out_ops); - if (ret < 0) { - printk("ip_nat_init: can't register out hook.\n"); - goto cleanup_inops; - } - ret = nf_register_hook(&ip_nat_adjust_in_ops); - if (ret < 0) { - printk("ip_nat_init: can't register adjust in hook.\n"); - goto cleanup_outops; - } - ret = nf_register_hook(&ip_nat_adjust_out_ops); - if (ret < 0) { - printk("ip_nat_init: can't register adjust out hook.\n"); - goto cleanup_adjustin_ops; - } - ret = nf_register_hook(&ip_nat_local_out_ops); - if (ret < 0) { - printk("ip_nat_init: can't register local out hook.\n"); - goto cleanup_adjustout_ops;; - } - ret = nf_register_hook(&ip_nat_local_in_ops); - if (ret < 0) { - printk("ip_nat_init: can't register local in hook.\n"); - goto cleanup_localoutops; - } - return ret; - - cleanup: - nf_unregister_hook(&ip_nat_local_in_ops); - cleanup_localoutops: - nf_unregister_hook(&ip_nat_local_out_ops); - cleanup_adjustout_ops: - nf_unregister_hook(&ip_nat_adjust_out_ops); - cleanup_adjustin_ops: - nf_unregister_hook(&ip_nat_adjust_in_ops); - cleanup_outops: - nf_unregister_hook(&ip_nat_out_ops); - cleanup_inops: - nf_unregister_hook(&ip_nat_in_ops); - cleanup_rule_init: - ip_nat_rule_cleanup(); - cleanup_decode_session: -#ifdef CONFIG_XFRM - ip_nat_decode_session = NULL; - synchronize_net(); -#endif - return ret; -} - -static int __init init(void) -{ - return init_or_cleanup(1); -} - -static void __exit fini(void) -{ - init_or_cleanup(0); -} - -module_init(init); -module_exit(fini); - -MODULE_LICENSE("GPL"); diff --git a/net/ipv4/netfilter/ip_nat_tftp.c b/net/ipv4/netfilter/ip_nat_tftp.c deleted file mode 100644 index 43c3bd7c118..00000000000 --- a/net/ipv4/netfilter/ip_nat_tftp.c +++ /dev/null @@ -1,71 +0,0 @@ -/* (C) 2001-2002 Magnus Boden <mb@ozaba.mine.nu> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Version: 0.0.7 - * - * Thu 21 Mar 2002 Harald Welte <laforge@gnumonks.org> - * - Port to newnat API - * - * This module currently supports DNAT: - * iptables -t nat -A PREROUTING -d x.x.x.x -j DNAT --to-dest x.x.x.y - * - * and SNAT: - * iptables -t nat -A POSTROUTING { -j MASQUERADE , -j SNAT --to-source x.x.x.x } - * - * It has not been tested with - * -j SNAT --to-source x.x.x.x-x.x.x.y since I only have one external ip - * If you do test this please let me know if it works or not. - * - */ - -#include <linux/module.h> -#include <linux/netfilter_ipv4.h> -#include <linux/ip.h> -#include <linux/udp.h> - -#include <linux/netfilter.h> -#include <linux/netfilter_ipv4/ip_tables.h> -#include <linux/netfilter_ipv4/ip_conntrack_helper.h> -#include <linux/netfilter_ipv4/ip_conntrack_tftp.h> -#include <linux/netfilter_ipv4/ip_nat_helper.h> -#include <linux/netfilter_ipv4/ip_nat_rule.h> -#include <linux/moduleparam.h> - -MODULE_AUTHOR("Magnus Boden <mb@ozaba.mine.nu>"); -MODULE_DESCRIPTION("tftp NAT helper"); -MODULE_LICENSE("GPL"); - -static unsigned int help(struct sk_buff **pskb, - enum ip_conntrack_info ctinfo, - struct ip_conntrack_expect *exp) -{ - struct ip_conntrack *ct = exp->master; - - exp->saved_proto.udp.port - = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port; - exp->dir = IP_CT_DIR_REPLY; - exp->expectfn = ip_nat_follow_master; - if (ip_conntrack_expect_related(exp) != 0) - return NF_DROP; - return NF_ACCEPT; -} - -static void __exit fini(void) -{ - ip_nat_tftp_hook = NULL; - /* Make sure noone calls it, meanwhile. */ - synchronize_net(); -} - -static int __init init(void) -{ - BUG_ON(ip_nat_tftp_hook); - ip_nat_tftp_hook = help; - return 0; -} - -module_init(init); -module_exit(fini); diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c deleted file mode 100644 index 36339eb39e1..00000000000 --- a/net/ipv4/netfilter/ip_queue.c +++ /dev/null @@ -1,735 +0,0 @@ -/* - * This is a module which is used for queueing IPv4 packets and - * communicating with userspace via netlink. - * - * (C) 2000-2002 James Morris <jmorris@intercode.com.au> - * (C) 2003-2005 Netfilter Core Team <coreteam@netfilter.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * 2000-03-27: Simplified code (thanks to Andi Kleen for clues). - * 2000-05-20: Fixed notifier problems (following Miguel Freitas' report). - * 2000-06-19: Fixed so nfmark is copied to metadata (reported by Sebastian - * Zander). - * 2000-08-01: Added Nick Williams' MAC support. - * 2002-06-25: Code cleanup. - * 2005-01-10: Added /proc counter for dropped packets; fixed so - * packets aren't delivered to user space if they're going - * to be dropped. - * 2005-05-26: local_bh_{disable,enable} around nf_reinject (Harald Welte) - * - */ -#include <linux/module.h> -#include <linux/skbuff.h> -#include <linux/init.h> -#include <linux/ip.h> -#include <linux/notifier.h> -#include <linux/netdevice.h> -#include <linux/netfilter.h> -#include <linux/netfilter_ipv4/ip_queue.h> -#include <linux/netfilter_ipv4/ip_tables.h> -#include <linux/netlink.h> -#include <linux/spinlock.h> -#include <linux/sysctl.h> -#include <linux/proc_fs.h> -#include <linux/security.h> -#include <net/sock.h> -#include <net/route.h> - -#define IPQ_QMAX_DEFAULT 1024 -#define IPQ_PROC_FS_NAME "ip_queue" -#define NET_IPQ_QMAX 2088 -#define NET_IPQ_QMAX_NAME "ip_queue_maxlen" - -struct ipq_queue_entry { - struct list_head list; - struct nf_info *info; - struct sk_buff *skb; -}; - -typedef int (*ipq_cmpfn)(struct ipq_queue_entry *, unsigned long); - -static unsigned char copy_mode = IPQ_COPY_NONE; -static unsigned int queue_maxlen = IPQ_QMAX_DEFAULT; -static DEFINE_RWLOCK(queue_lock); -static int peer_pid; -static unsigned int copy_range; -static unsigned int queue_total; -static unsigned int queue_dropped = 0; -static unsigned int queue_user_dropped = 0; -static struct sock *ipqnl; -static LIST_HEAD(queue_list); -static DECLARE_MUTEX(ipqnl_sem); - -static void -ipq_issue_verdict(struct ipq_queue_entry *entry, int verdict) -{ - /* TCP input path (and probably other bits) assume to be called - * from softirq context, not from syscall, like ipq_issue_verdict is - * called. TCP input path deadlocks with locks taken from timer - * softirq, e.g. We therefore emulate this by local_bh_disable() */ - - local_bh_disable(); - nf_reinject(entry->skb, entry->info, verdict); - local_bh_enable(); - - kfree(entry); -} - -static inline void -__ipq_enqueue_entry(struct ipq_queue_entry *entry) -{ - list_add(&entry->list, &queue_list); - queue_total++; -} - -/* - * Find and return a queued entry matched by cmpfn, or return the last - * entry if cmpfn is NULL. - */ -static inline struct ipq_queue_entry * -__ipq_find_entry(ipq_cmpfn cmpfn, unsigned long data) -{ - struct list_head *p; - - list_for_each_prev(p, &queue_list) { - struct ipq_queue_entry *entry = (struct ipq_queue_entry *)p; - - if (!cmpfn || cmpfn(entry, data)) - return entry; - } - return NULL; -} - -static inline void -__ipq_dequeue_entry(struct ipq_queue_entry *entry) -{ - list_del(&entry->list); - queue_total--; -} - -static inline struct ipq_queue_entry * -__ipq_find_dequeue_entry(ipq_cmpfn cmpfn, unsigned long data) -{ - struct ipq_queue_entry *entry; - - entry = __ipq_find_entry(cmpfn, data); - if (entry == NULL) - return NULL; - - __ipq_dequeue_entry(entry); - return entry; -} - - -static inline void -__ipq_flush(int verdict) -{ - struct ipq_queue_entry *entry; - - while ((entry = __ipq_find_dequeue_entry(NULL, 0))) - ipq_issue_verdict(entry, verdict); -} - -static inline int -__ipq_set_mode(unsigned char mode, unsigned int range) -{ - int status = 0; - - switch(mode) { - case IPQ_COPY_NONE: - case IPQ_COPY_META: - copy_mode = mode; - copy_range = 0; - break; - - case IPQ_COPY_PACKET: - copy_mode = mode; - copy_range = range; - if (copy_range > 0xFFFF) - copy_range = 0xFFFF; - break; - - default: - status = -EINVAL; - - } - return status; -} - -static inline void -__ipq_reset(void) -{ - peer_pid = 0; - net_disable_timestamp(); - __ipq_set_mode(IPQ_COPY_NONE, 0); - __ipq_flush(NF_DROP); -} - -static struct ipq_queue_entry * -ipq_find_dequeue_entry(ipq_cmpfn cmpfn, unsigned long data) -{ - struct ipq_queue_entry *entry; - - write_lock_bh(&queue_lock); - entry = __ipq_find_dequeue_entry(cmpfn, data); - write_unlock_bh(&queue_lock); - return entry; -} - -static void -ipq_flush(int verdict) -{ - write_lock_bh(&queue_lock); - __ipq_flush(verdict); - write_unlock_bh(&queue_lock); -} - -static struct sk_buff * -ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp) -{ - unsigned char *old_tail; - size_t size = 0; - size_t data_len = 0; - struct sk_buff *skb; - struct ipq_packet_msg *pmsg; - struct nlmsghdr *nlh; - - read_lock_bh(&queue_lock); - - switch (copy_mode) { - case IPQ_COPY_META: - case IPQ_COPY_NONE: - size = NLMSG_SPACE(sizeof(*pmsg)); - data_len = 0; - break; - - case IPQ_COPY_PACKET: - if (entry->skb->ip_summed == CHECKSUM_HW && - (*errp = skb_checksum_help(entry->skb, - entry->info->outdev == NULL))) { - read_unlock_bh(&queue_lock); - return NULL; - } - if (copy_range == 0 || copy_range > entry->skb->len) - data_len = entry->skb->len; - else - data_len = copy_range; - - size = NLMSG_SPACE(sizeof(*pmsg) + data_len); - break; - - default: - *errp = -EINVAL; - read_unlock_bh(&queue_lock); - return NULL; - } - - read_unlock_bh(&queue_lock); - - skb = alloc_skb(size, GFP_ATOMIC); - if (!skb) - goto nlmsg_failure; - - old_tail= skb->tail; - nlh = NLMSG_PUT(skb, 0, 0, IPQM_PACKET, size - sizeof(*nlh)); - pmsg = NLMSG_DATA(nlh); - memset(pmsg, 0, sizeof(*pmsg)); - - pmsg->packet_id = (unsigned long )entry; - pmsg->data_len = data_len; - pmsg->timestamp_sec = entry->skb->tstamp.off_sec; - pmsg->timestamp_usec = entry->skb->tstamp.off_usec; - pmsg->mark = entry->skb->nfmark; - pmsg->hook = entry->info->hook; - pmsg->hw_protocol = entry->skb->protocol; - - if (entry->info->indev) - strcpy(pmsg->indev_name, entry->info->indev->name); - else - pmsg->indev_name[0] = '\0'; - - if (entry->info->outdev) - strcpy(pmsg->outdev_name, entry->info->outdev->name); - else - pmsg->outdev_name[0] = '\0'; - - if (entry->info->indev && entry->skb->dev) { - pmsg->hw_type = entry->skb->dev->type; - if (entry->skb->dev->hard_header_parse) - pmsg->hw_addrlen = - entry->skb->dev->hard_header_parse(entry->skb, - pmsg->hw_addr); - } - - if (data_len) - if (skb_copy_bits(entry->skb, 0, pmsg->payload, data_len)) - BUG(); - - nlh->nlmsg_len = skb->tail - old_tail; - return skb; - -nlmsg_failure: - if (skb) - kfree_skb(skb); - *errp = -EINVAL; - printk(KERN_ERR "ip_queue: error creating packet message\n"); - return NULL; -} - -static int -ipq_enqueue_packet(struct sk_buff *skb, struct nf_info *info, - unsigned int queuenum, void *data) -{ - int status = -EINVAL; - struct sk_buff *nskb; - struct ipq_queue_entry *entry; - - if (copy_mode == IPQ_COPY_NONE) - return -EAGAIN; - - entry = kmalloc(sizeof(*entry), GFP_ATOMIC); - if (entry == NULL) { - printk(KERN_ERR "ip_queue: OOM in ipq_enqueue_packet()\n"); - return -ENOMEM; - } - - entry->info = info; - entry->skb = skb; - - nskb = ipq_build_packet_message(entry, &status); - if (nskb == NULL) - goto err_out_free; - - write_lock_bh(&queue_lock); - - if (!peer_pid) - goto err_out_free_nskb; - - if (queue_total >= queue_maxlen) { - queue_dropped++; - status = -ENOSPC; - if (net_ratelimit()) - printk (KERN_WARNING "ip_queue: full at %d entries, " - "dropping packets(s). Dropped: %d\n", queue_total, - queue_dropped); - goto err_out_free_nskb; - } - - /* netlink_unicast will either free the nskb or attach it to a socket */ - status = netlink_unicast(ipqnl, nskb, peer_pid, MSG_DONTWAIT); - if (status < 0) { - queue_user_dropped++; - goto err_out_unlock; - } - - __ipq_enqueue_entry(entry); - - write_unlock_bh(&queue_lock); - return status; - -err_out_free_nskb: - kfree_skb(nskb); - -err_out_unlock: - write_unlock_bh(&queue_lock); - -err_out_free: - kfree(entry); - return status; -} - -static int -ipq_mangle_ipv4(ipq_verdict_msg_t *v, struct ipq_queue_entry *e) -{ - int diff; - struct iphdr *user_iph = (struct iphdr *)v->payload; - - if (v->data_len < sizeof(*user_iph)) - return 0; - diff = v->data_len - e->skb->len; - if (diff < 0) - skb_trim(e->skb, v->data_len); - else if (diff > 0) { - if (v->data_len > 0xFFFF) - return -EINVAL; - if (diff > skb_tailroom(e->skb)) { - struct sk_buff *newskb; - - newskb = skb_copy_expand(e->skb, - skb_headroom(e->skb), - diff, - GFP_ATOMIC); - if (newskb == NULL) { - printk(KERN_WARNING "ip_queue: OOM " - "in mangle, dropping packet\n"); - return -ENOMEM; - } - if (e->skb->sk) - skb_set_owner_w(newskb, e->skb->sk); - kfree_skb(e->skb); - e->skb = newskb; - } - skb_put(e->skb, diff); - } - if (!skb_make_writable(&e->skb, v->data_len)) - return -ENOMEM; - memcpy(e->skb->data, v->payload, v->data_len); - e->skb->ip_summed = CHECKSUM_NONE; - - return 0; -} - -static inline int -id_cmp(struct ipq_queue_entry *e, unsigned long id) -{ - return (id == (unsigned long )e); -} - -static int -ipq_set_verdict(struct ipq_verdict_msg *vmsg, unsigned int len) -{ - struct ipq_queue_entry *entry; - - if (vmsg->value > NF_MAX_VERDICT) - return -EINVAL; - - entry = ipq_find_dequeue_entry(id_cmp, vmsg->id); - if (entry == NULL) - return -ENOENT; - else { - int verdict = vmsg->value; - - if (vmsg->data_len && vmsg->data_len == len) - if (ipq_mangle_ipv4(vmsg, entry) < 0) - verdict = NF_DROP; - - ipq_issue_verdict(entry, verdict); - return 0; - } -} - -static int -ipq_set_mode(unsigned char mode, unsigned int range) -{ - int status; - - write_lock_bh(&queue_lock); - status = __ipq_set_mode(mode, range); - write_unlock_bh(&queue_lock); - return status; -} - -static int -ipq_receive_peer(struct ipq_peer_msg *pmsg, - unsigned char type, unsigned int len) -{ - int status = 0; - - if (len < sizeof(*pmsg)) - return -EINVAL; - - switch (type) { - case IPQM_MODE: - status = ipq_set_mode(pmsg->msg.mode.value, - pmsg->msg.mode.range); - break; - - case IPQM_VERDICT: - if (pmsg->msg.verdict.value > NF_MAX_VERDICT) - status = -EINVAL; - else - status = ipq_set_verdict(&pmsg->msg.verdict, - len - sizeof(*pmsg)); - break; - default: - status = -EINVAL; - } - return status; -} - -static int -dev_cmp(struct ipq_queue_entry *entry, unsigned long ifindex) -{ - if (entry->info->indev) - if (entry->info->indev->ifindex == ifindex) - return 1; - - if (entry->info->outdev) - if (entry->info->outdev->ifindex == ifindex) - return 1; - - return 0; -} - -static void -ipq_dev_drop(int ifindex) -{ - struct ipq_queue_entry *entry; - - while ((entry = ipq_find_dequeue_entry(dev_cmp, ifindex)) != NULL) - ipq_issue_verdict(entry, NF_DROP); -} - -#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0) - -static inline void -ipq_rcv_skb(struct sk_buff *skb) -{ - int status, type, pid, flags, nlmsglen, skblen; - struct nlmsghdr *nlh; - - skblen = skb->len; - if (skblen < sizeof(*nlh)) - return; - - nlh = (struct nlmsghdr *)skb->data; - nlmsglen = nlh->nlmsg_len; - if (nlmsglen < sizeof(*nlh) || skblen < nlmsglen) - return; - - pid = nlh->nlmsg_pid; - flags = nlh->nlmsg_flags; - - if(pid <= 0 || !(flags & NLM_F_REQUEST) || flags & NLM_F_MULTI) - RCV_SKB_FAIL(-EINVAL); - - if (flags & MSG_TRUNC) - RCV_SKB_FAIL(-ECOMM); - - type = nlh->nlmsg_type; - if (type < NLMSG_NOOP || type >= IPQM_MAX) - RCV_SKB_FAIL(-EINVAL); - - if (type <= IPQM_BASE) - return; - - if (security_netlink_recv(skb)) - RCV_SKB_FAIL(-EPERM); - - write_lock_bh(&queue_lock); - - if (peer_pid) { - if (peer_pid != pid) { - write_unlock_bh(&queue_lock); - RCV_SKB_FAIL(-EBUSY); - } - } else { - net_enable_timestamp(); - peer_pid = pid; - } - - write_unlock_bh(&queue_lock); - - status = ipq_receive_peer(NLMSG_DATA(nlh), type, - skblen - NLMSG_LENGTH(0)); - if (status < 0) - RCV_SKB_FAIL(status); - - if (flags & NLM_F_ACK) - netlink_ack(skb, nlh, 0); - return; -} - -static void -ipq_rcv_sk(struct sock *sk, int len) -{ - struct sk_buff *skb; - unsigned int qlen; - - down(&ipqnl_sem); - - for (qlen = skb_queue_len(&sk->sk_receive_queue); qlen; qlen--) { - skb = skb_dequeue(&sk->sk_receive_queue); - ipq_rcv_skb(skb); - kfree_skb(skb); - } - - up(&ipqnl_sem); -} - -static int -ipq_rcv_dev_event(struct notifier_block *this, - unsigned long event, void *ptr) -{ - struct net_device *dev = ptr; - - /* Drop any packets associated with the downed device */ - if (event == NETDEV_DOWN) - ipq_dev_drop(dev->ifindex); - return NOTIFY_DONE; -} - -static struct notifier_block ipq_dev_notifier = { - .notifier_call = ipq_rcv_dev_event, -}; - -static int -ipq_rcv_nl_event(struct notifier_block *this, - unsigned long event, void *ptr) -{ - struct netlink_notify *n = ptr; - - if (event == NETLINK_URELEASE && - n->protocol == NETLINK_FIREWALL && n->pid) { - write_lock_bh(&queue_lock); - if (n->pid == peer_pid) - __ipq_reset(); - write_unlock_bh(&queue_lock); - } - return NOTIFY_DONE; -} - -static struct notifier_block ipq_nl_notifier = { - .notifier_call = ipq_rcv_nl_event, -}; - -static struct ctl_table_header *ipq_sysctl_header; - -static ctl_table ipq_table[] = { - { - .ctl_name = NET_IPQ_QMAX, - .procname = NET_IPQ_QMAX_NAME, - .data = &queue_maxlen, - .maxlen = sizeof(queue_maxlen), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { .ctl_name = 0 } -}; - -static ctl_table ipq_dir_table[] = { - { - .ctl_name = NET_IPV4, - .procname = "ipv4", - .mode = 0555, - .child = ipq_table - }, - { .ctl_name = 0 } -}; - -static ctl_table ipq_root_table[] = { - { - .ctl_name = CTL_NET, - .procname = "net", - .mode = 0555, - .child = ipq_dir_table - }, - { .ctl_name = 0 } -}; - -#ifdef CONFIG_PROC_FS -static int -ipq_get_info(char *buffer, char **start, off_t offset, int length) -{ - int len; - - read_lock_bh(&queue_lock); - - len = sprintf(buffer, - "Peer PID : %d\n" - "Copy mode : %hu\n" - "Copy range : %u\n" - "Queue length : %u\n" - "Queue max. length : %u\n" - "Queue dropped : %u\n" - "Netlink dropped : %u\n", - peer_pid, - copy_mode, - copy_range, - queue_total, - queue_maxlen, - queue_dropped, - queue_user_dropped); - - read_unlock_bh(&queue_lock); - - *start = buffer + offset; - len -= offset; - if (len > length) - len = length; - else if (len < 0) - len = 0; - return len; -} -#endif /* CONFIG_PROC_FS */ - -static struct nf_queue_handler nfqh = { - .name = "ip_queue", - .outfn = &ipq_enqueue_packet, -}; - -static int -init_or_cleanup(int init) -{ - int status = -ENOMEM; - struct proc_dir_entry *proc; - - if (!init) - goto cleanup; - - netlink_register_notifier(&ipq_nl_notifier); - ipqnl = netlink_kernel_create(NETLINK_FIREWALL, 0, ipq_rcv_sk, - THIS_MODULE); - if (ipqnl == NULL) { - printk(KERN_ERR "ip_queue: failed to create netlink socket\n"); - goto cleanup_netlink_notifier; - } - - proc = proc_net_create(IPQ_PROC_FS_NAME, 0, ipq_get_info); - if (proc) - proc->owner = THIS_MODULE; - else { - printk(KERN_ERR "ip_queue: failed to create proc entry\n"); - goto cleanup_ipqnl; - } - - register_netdevice_notifier(&ipq_dev_notifier); - ipq_sysctl_header = register_sysctl_table(ipq_root_table, 0); - - status = nf_register_queue_handler(PF_INET, &nfqh); - if (status < 0) { - printk(KERN_ERR "ip_queue: failed to register queue handler\n"); - goto cleanup_sysctl; - } - return status; - -cleanup: - nf_unregister_queue_handlers(&nfqh); - synchronize_net(); - ipq_flush(NF_DROP); - -cleanup_sysctl: - unregister_sysctl_table(ipq_sysctl_header); - unregister_netdevice_notifier(&ipq_dev_notifier); - proc_net_remove(IPQ_PROC_FS_NAME); - -cleanup_ipqnl: - sock_release(ipqnl->sk_socket); - down(&ipqnl_sem); - up(&ipqnl_sem); - -cleanup_netlink_notifier: - netlink_unregister_notifier(&ipq_nl_notifier); - return status; -} - -static int __init init(void) -{ - - return init_or_cleanup(1); -} - -static void __exit fini(void) -{ - init_or_cleanup(0); -} - -MODULE_DESCRIPTION("IPv4 packet queue handler"); -MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>"); -MODULE_LICENSE("GPL"); - -module_init(init); -module_exit(fini); diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 2371b2062c2..99e810f8467 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -3,18 +3,13 @@ * * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling * Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org> + * Copyright (C) 2006-2010 Patrick McHardy <kaber@trash.net> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. - * - * 19 Jan 2002 Harald Welte <laforge@gnumonks.org> - * - increase module usage count as soon as we have rules inside - * a table - * 08 Oct 2005 Harald Welte <lafore@netfilter.org> - * - Generalize into "x_tables" layer and "{ip,ip6,arp}_tables" */ -#include <linux/config.h> +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/cache.h> #include <linux/capability.h> #include <linux/skbuff.h> @@ -24,14 +19,17 @@ #include <linux/module.h> #include <linux/icmp.h> #include <net/ip.h> +#include <net/compat.h> #include <asm/uaccess.h> -#include <asm/semaphore.h> +#include <linux/mutex.h> #include <linux/proc_fs.h> #include <linux/err.h> #include <linux/cpumask.h> #include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv4/ip_tables.h> +#include <net/netfilter/nf_log.h> +#include "../../netfilter/xt_repldata.h" MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); @@ -42,24 +40,19 @@ MODULE_DESCRIPTION("IPv4 packet filter"); /*#define DEBUG_IP_FIREWALL_USER*/ #ifdef DEBUG_IP_FIREWALL -#define dprintf(format, args...) printk(format , ## args) +#define dprintf(format, args...) pr_info(format , ## args) #else #define dprintf(format, args...) #endif #ifdef DEBUG_IP_FIREWALL_USER -#define duprintf(format, args...) printk(format , ## args) +#define duprintf(format, args...) pr_info(format , ## args) #else #define duprintf(format, args...) #endif #ifdef CONFIG_NETFILTER_DEBUG -#define IP_NF_ASSERT(x) \ -do { \ - if (!(x)) \ - printk("IP_NF_ASSERT: %s:%s:%u\n", \ - __FUNCTION__, __FILE__, __LINE__); \ -} while(0) +#define IP_NF_ASSERT(x) WARN_ON(!(x)) #else #define IP_NF_ASSERT(x) #endif @@ -70,81 +63,65 @@ do { \ #define inline #endif -/* - We keep a set of rules for each CPU, so we can avoid write-locking - them in the softirq when updating the counters and therefore - only need to read-lock in the softirq; doing a write_lock_bh() in user - context stops packets coming through and allows user context to read - the counters or update the rules. - - Hence the start of any table is given by get_table() below. */ +void *ipt_alloc_initial_table(const struct xt_table *info) +{ + return xt_alloc_initial_table(ipt, IPT); +} +EXPORT_SYMBOL_GPL(ipt_alloc_initial_table); /* Returns whether matches rule or not. */ -static inline int +/* Performance critical - called for every packet */ +static inline bool ip_packet_match(const struct iphdr *ip, const char *indev, const char *outdev, const struct ipt_ip *ipinfo, int isfrag) { - size_t i; unsigned long ret; -#define FWINV(bool,invflg) ((bool) ^ !!(ipinfo->invflags & invflg)) +#define FWINV(bool, invflg) ((bool) ^ !!(ipinfo->invflags & (invflg))) if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr, - IPT_INV_SRCIP) - || FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr, - IPT_INV_DSTIP)) { + IPT_INV_SRCIP) || + FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr, + IPT_INV_DSTIP)) { dprintf("Source or dest mismatch.\n"); - dprintf("SRC: %u.%u.%u.%u. Mask: %u.%u.%u.%u. Target: %u.%u.%u.%u.%s\n", - NIPQUAD(ip->saddr), - NIPQUAD(ipinfo->smsk.s_addr), - NIPQUAD(ipinfo->src.s_addr), + dprintf("SRC: %pI4. Mask: %pI4. Target: %pI4.%s\n", + &ip->saddr, &ipinfo->smsk.s_addr, &ipinfo->src.s_addr, ipinfo->invflags & IPT_INV_SRCIP ? " (INV)" : ""); - dprintf("DST: %u.%u.%u.%u Mask: %u.%u.%u.%u Target: %u.%u.%u.%u.%s\n", - NIPQUAD(ip->daddr), - NIPQUAD(ipinfo->dmsk.s_addr), - NIPQUAD(ipinfo->dst.s_addr), + dprintf("DST: %pI4 Mask: %pI4 Target: %pI4.%s\n", + &ip->daddr, &ipinfo->dmsk.s_addr, &ipinfo->dst.s_addr, ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : ""); - return 0; + return false; } - /* Look for ifname matches; this should unroll nicely. */ - for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) { - ret |= (((const unsigned long *)indev)[i] - ^ ((const unsigned long *)ipinfo->iniface)[i]) - & ((const unsigned long *)ipinfo->iniface_mask)[i]; - } + ret = ifname_compare_aligned(indev, ipinfo->iniface, ipinfo->iniface_mask); if (FWINV(ret != 0, IPT_INV_VIA_IN)) { dprintf("VIA in mismatch (%s vs %s).%s\n", indev, ipinfo->iniface, ipinfo->invflags&IPT_INV_VIA_IN ?" (INV)":""); - return 0; + return false; } - for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) { - ret |= (((const unsigned long *)outdev)[i] - ^ ((const unsigned long *)ipinfo->outiface)[i]) - & ((const unsigned long *)ipinfo->outiface_mask)[i]; - } + ret = ifname_compare_aligned(outdev, ipinfo->outiface, ipinfo->outiface_mask); if (FWINV(ret != 0, IPT_INV_VIA_OUT)) { dprintf("VIA out mismatch (%s vs %s).%s\n", outdev, ipinfo->outiface, ipinfo->invflags&IPT_INV_VIA_OUT ?" (INV)":""); - return 0; + return false; } /* Check specific protocol */ - if (ipinfo->proto - && FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) { + if (ipinfo->proto && + FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) { dprintf("Packet protocol %hi does not match %hi.%s\n", ip->protocol, ipinfo->proto, ipinfo->invflags&IPT_INV_PROTO ? " (INV)":""); - return 0; + return false; } /* If we have a fragment rule but the packet is not a fragment @@ -152,88 +129,183 @@ ip_packet_match(const struct iphdr *ip, if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) { dprintf("Fragment rule but not fragment.%s\n", ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : ""); - return 0; + return false; } - return 1; + return true; } -static inline int +static bool ip_checkentry(const struct ipt_ip *ip) { if (ip->flags & ~IPT_F_MASK) { duprintf("Unknown flag bits set: %08X\n", ip->flags & ~IPT_F_MASK); - return 0; + return false; } if (ip->invflags & ~IPT_INV_MASK) { duprintf("Unknown invflag bits set: %08X\n", ip->invflags & ~IPT_INV_MASK); - return 0; + return false; } - return 1; + return true; } static unsigned int -ipt_error(struct sk_buff **pskb, - const struct net_device *in, - const struct net_device *out, - unsigned int hooknum, - const void *targinfo, - void *userinfo) +ipt_error(struct sk_buff *skb, const struct xt_action_param *par) { - if (net_ratelimit()) - printk("ip_tables: error: `%s'\n", (char *)targinfo); + net_info_ratelimited("error: `%s'\n", (const char *)par->targinfo); return NF_DROP; } -static inline -int do_match(struct ipt_entry_match *m, - const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int offset, - int *hotdrop) +/* Performance critical */ +static inline struct ipt_entry * +get_entry(const void *base, unsigned int offset) +{ + return (struct ipt_entry *)(base + offset); +} + +/* All zeroes == unconditional rule. */ +/* Mildly perf critical (only if packet tracing is on) */ +static inline bool unconditional(const struct ipt_ip *ip) { - /* Stop iteration if it doesn't match */ - if (!m->u.kernel.match->match(skb, in, out, m->data, offset, - skb->nh.iph->ihl*4, hotdrop)) + static const struct ipt_ip uncond; + + return memcmp(ip, &uncond, sizeof(uncond)) == 0; +#undef FWINV +} + +/* for const-correctness */ +static inline const struct xt_entry_target * +ipt_get_target_c(const struct ipt_entry *e) +{ + return ipt_get_target((struct ipt_entry *)e); +} + +#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) +static const char *const hooknames[] = { + [NF_INET_PRE_ROUTING] = "PREROUTING", + [NF_INET_LOCAL_IN] = "INPUT", + [NF_INET_FORWARD] = "FORWARD", + [NF_INET_LOCAL_OUT] = "OUTPUT", + [NF_INET_POST_ROUTING] = "POSTROUTING", +}; + +enum nf_ip_trace_comments { + NF_IP_TRACE_COMMENT_RULE, + NF_IP_TRACE_COMMENT_RETURN, + NF_IP_TRACE_COMMENT_POLICY, +}; + +static const char *const comments[] = { + [NF_IP_TRACE_COMMENT_RULE] = "rule", + [NF_IP_TRACE_COMMENT_RETURN] = "return", + [NF_IP_TRACE_COMMENT_POLICY] = "policy", +}; + +static struct nf_loginfo trace_loginfo = { + .type = NF_LOG_TYPE_LOG, + .u = { + .log = { + .level = 4, + .logflags = NF_LOG_MASK, + }, + }, +}; + +/* Mildly perf critical (only if packet tracing is on) */ +static inline int +get_chainname_rulenum(const struct ipt_entry *s, const struct ipt_entry *e, + const char *hookname, const char **chainname, + const char **comment, unsigned int *rulenum) +{ + const struct xt_standard_target *t = (void *)ipt_get_target_c(s); + + if (strcmp(t->target.u.kernel.target->name, XT_ERROR_TARGET) == 0) { + /* Head of user chain: ERROR target with chainname */ + *chainname = t->target.data; + (*rulenum) = 0; + } else if (s == e) { + (*rulenum)++; + + if (s->target_offset == sizeof(struct ipt_entry) && + strcmp(t->target.u.kernel.target->name, + XT_STANDARD_TARGET) == 0 && + t->verdict < 0 && + unconditional(&s->ip)) { + /* Tail of chains: STANDARD target (return/policy) */ + *comment = *chainname == hookname + ? comments[NF_IP_TRACE_COMMENT_POLICY] + : comments[NF_IP_TRACE_COMMENT_RETURN]; + } return 1; - else - return 0; + } else + (*rulenum)++; + + return 0; } -static inline struct ipt_entry * -get_entry(void *base, unsigned int offset) +static void trace_packet(const struct sk_buff *skb, + unsigned int hook, + const struct net_device *in, + const struct net_device *out, + const char *tablename, + const struct xt_table_info *private, + const struct ipt_entry *e) { - return (struct ipt_entry *)(base + offset); + const void *table_base; + const struct ipt_entry *root; + const char *hookname, *chainname, *comment; + const struct ipt_entry *iter; + unsigned int rulenum = 0; + struct net *net = dev_net(in ? in : out); + + table_base = private->entries[smp_processor_id()]; + root = get_entry(table_base, private->hook_entry[hook]); + + hookname = chainname = hooknames[hook]; + comment = comments[NF_IP_TRACE_COMMENT_RULE]; + + xt_entry_foreach(iter, root, private->size - private->hook_entry[hook]) + if (get_chainname_rulenum(iter, e, hookname, + &chainname, &comment, &rulenum) != 0) + break; + + nf_log_packet(net, AF_INET, hook, skb, in, out, &trace_loginfo, + "TRACE: %s:%s:%s:%u ", + tablename, chainname, comment, rulenum); +} +#endif + +static inline __pure +struct ipt_entry *ipt_next_entry(const struct ipt_entry *entry) +{ + return (void *)entry + entry->next_offset; } /* Returns one of the generic firewall policies, like NF_ACCEPT. */ unsigned int -ipt_do_table(struct sk_buff **pskb, +ipt_do_table(struct sk_buff *skb, unsigned int hook, const struct net_device *in, const struct net_device *out, - struct ipt_table *table, - void *userdata) + struct xt_table *table) { static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); - u_int16_t offset; - struct iphdr *ip; - u_int16_t datalen; - int hotdrop = 0; + const struct iphdr *ip; /* Initializing verdict to NF_DROP keeps gcc happy. */ unsigned int verdict = NF_DROP; const char *indev, *outdev; - void *table_base; - struct ipt_entry *e, *back; - struct xt_table_info *private = table->private; + const void *table_base; + struct ipt_entry *e, **jumpstack; + unsigned int *stackptr, origptr, cpu; + const struct xt_table_info *private; + struct xt_action_param acpar; + unsigned int addend; /* Initialization */ - ip = (*pskb)->nh.iph; - datalen = (*pskb)->len - ip->ihl * 4; + ip = ip_hdr(skb); indev = in ? in->name : nulldevname; outdev = out ? out->name : nulldevname; /* We handle fragments by dealing with the first fragment as @@ -242,138 +314,144 @@ ipt_do_table(struct sk_buff **pskb, * things we don't know, ie. tcp syn flag or ports). If the * rule is also a fragment-specific rule, non-fragments won't * match it. */ - offset = ntohs(ip->frag_off) & IP_OFFSET; + acpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET; + acpar.thoff = ip_hdrlen(skb); + acpar.hotdrop = false; + acpar.in = in; + acpar.out = out; + acpar.family = NFPROTO_IPV4; + acpar.hooknum = hook; - read_lock_bh(&table->lock); IP_NF_ASSERT(table->valid_hooks & (1 << hook)); - table_base = (void *)private->entries[smp_processor_id()]; + local_bh_disable(); + addend = xt_write_recseq_begin(); + private = table->private; + cpu = smp_processor_id(); + /* + * Ensure we load private-> members after we've fetched the base + * pointer. + */ + smp_read_barrier_depends(); + table_base = private->entries[cpu]; + jumpstack = (struct ipt_entry **)private->jumpstack[cpu]; + stackptr = per_cpu_ptr(private->stackptr, cpu); + origptr = *stackptr; + e = get_entry(table_base, private->hook_entry[hook]); - /* For return from builtin chain */ - back = get_entry(table_base, private->underflow[hook]); + pr_debug("Entering %s(hook %u); sp at %u (UF %p)\n", + table->name, hook, origptr, + get_entry(table_base, private->underflow[hook])); do { + const struct xt_entry_target *t; + const struct xt_entry_match *ematch; + IP_NF_ASSERT(e); - IP_NF_ASSERT(back); - if (ip_packet_match(ip, indev, outdev, &e->ip, offset)) { - struct ipt_entry_target *t; + if (!ip_packet_match(ip, indev, outdev, + &e->ip, acpar.fragoff)) { + no_match: + e = ipt_next_entry(e); + continue; + } - if (IPT_MATCH_ITERATE(e, do_match, - *pskb, in, out, - offset, &hotdrop) != 0) + xt_ematch_foreach(ematch, e) { + acpar.match = ematch->u.kernel.match; + acpar.matchinfo = ematch->data; + if (!acpar.match->match(skb, &acpar)) goto no_match; + } - ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1); + ADD_COUNTER(e->counters, skb->len, 1); - t = ipt_get_target(e); - IP_NF_ASSERT(t->u.kernel.target); - /* Standard target? */ - if (!t->u.kernel.target->target) { - int v; + t = ipt_get_target(e); + IP_NF_ASSERT(t->u.kernel.target); - v = ((struct ipt_standard_target *)t)->verdict; - if (v < 0) { - /* Pop from stack? */ - if (v != IPT_RETURN) { - verdict = (unsigned)(-v) - 1; - break; - } - e = back; - back = get_entry(table_base, - back->comefrom); - continue; +#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) + /* The packet is traced: log it */ + if (unlikely(skb->nf_trace)) + trace_packet(skb, hook, in, out, + table->name, private, e); +#endif + /* Standard target? */ + if (!t->u.kernel.target->target) { + int v; + + v = ((struct xt_standard_target *)t)->verdict; + if (v < 0) { + /* Pop from stack? */ + if (v != XT_RETURN) { + verdict = (unsigned int)(-v) - 1; + break; } - if (table_base + v != (void *)e + e->next_offset - && !(e->ip.flags & IPT_F_GOTO)) { - /* Save old back ptr in next entry */ - struct ipt_entry *next - = (void *)e + e->next_offset; - next->comefrom - = (void *)back - table_base; - /* set back pointer to next entry */ - back = next; + if (*stackptr <= origptr) { + e = get_entry(table_base, + private->underflow[hook]); + pr_debug("Underflow (this is normal) " + "to %p\n", e); + } else { + e = jumpstack[--*stackptr]; + pr_debug("Pulled %p out from pos %u\n", + e, *stackptr); + e = ipt_next_entry(e); } - - e = get_entry(table_base, v); - } else { - /* Targets which reenter must return - abs. verdicts */ -#ifdef CONFIG_NETFILTER_DEBUG - ((struct ipt_entry *)table_base)->comefrom - = 0xeeeeeeec; -#endif - verdict = t->u.kernel.target->target(pskb, - in, out, - hook, - t->data, - userdata); - -#ifdef CONFIG_NETFILTER_DEBUG - if (((struct ipt_entry *)table_base)->comefrom - != 0xeeeeeeec - && verdict == IPT_CONTINUE) { - printk("Target %s reentered!\n", - t->u.kernel.target->name); + continue; + } + if (table_base + v != ipt_next_entry(e) && + !(e->ip.flags & IPT_F_GOTO)) { + if (*stackptr >= private->stacksize) { verdict = NF_DROP; - } - ((struct ipt_entry *)table_base)->comefrom - = 0x57acc001; -#endif - /* Target might have changed stuff. */ - ip = (*pskb)->nh.iph; - datalen = (*pskb)->len - ip->ihl * 4; - - if (verdict == IPT_CONTINUE) - e = (void *)e + e->next_offset; - else - /* Verdict */ break; + } + jumpstack[(*stackptr)++] = e; + pr_debug("Pushed %p into pos %u\n", + e, *stackptr - 1); } - } else { - no_match: - e = (void *)e + e->next_offset; + e = get_entry(table_base, v); + continue; } - } while (!hotdrop); - read_unlock_bh(&table->lock); + acpar.target = t->u.kernel.target; + acpar.targinfo = t->data; + + verdict = t->u.kernel.target->target(skb, &acpar); + /* Target might have changed stuff. */ + ip = ip_hdr(skb); + if (verdict == XT_CONTINUE) + e = ipt_next_entry(e); + else + /* Verdict */ + break; + } while (!acpar.hotdrop); + pr_debug("Exiting %s; resetting sp from %u to %u\n", + __func__, *stackptr, origptr); + *stackptr = origptr; + xt_write_recseq_end(addend); + local_bh_enable(); #ifdef DEBUG_ALLOW_ALL return NF_ACCEPT; #else - if (hotdrop) + if (acpar.hotdrop) return NF_DROP; else return verdict; #endif } -/* All zeroes == unconditional rule. */ -static inline int -unconditional(const struct ipt_ip *ip) -{ - unsigned int i; - - for (i = 0; i < sizeof(*ip)/sizeof(__u32); i++) - if (((__u32 *)ip)[i]) - return 0; - - return 1; -} - /* Figures out from what hook each rule can be called: returns 0 if there are loops. Puts hook bitmask in comefrom. */ static int -mark_source_chains(struct xt_table_info *newinfo, +mark_source_chains(const struct xt_table_info *newinfo, unsigned int valid_hooks, void *entry0) { unsigned int hook; /* No recursion; use packet counter to save back ptrs (reset to 0 as we leave), and comefrom to save source hook bitmask */ - for (hook = 0; hook < NF_IP_NUMHOOKS; hook++) { + for (hook = 0; hook < NF_INET_NUMHOOKS; hook++) { unsigned int pos = newinfo->hook_entry[hook]; - struct ipt_entry *e - = (struct ipt_entry *)(entry0 + pos); + struct ipt_entry *e = (struct ipt_entry *)(entry0 + pos); if (!(valid_hooks & (1 << hook))) continue; @@ -382,32 +460,41 @@ mark_source_chains(struct xt_table_info *newinfo, e->counters.pcnt = pos; for (;;) { - struct ipt_standard_target *t - = (void *)ipt_get_target(e); + const struct xt_standard_target *t + = (void *)ipt_get_target_c(e); + int visited = e->comefrom & (1 << hook); - if (e->comefrom & (1 << NF_IP_NUMHOOKS)) { - printk("iptables: loop hook %u pos %u %08X.\n", + if (e->comefrom & (1 << NF_INET_NUMHOOKS)) { + pr_err("iptables: loop hook %u pos %u %08X.\n", hook, pos, e->comefrom); return 0; } - e->comefrom - |= ((1 << hook) | (1 << NF_IP_NUMHOOKS)); + e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS)); /* Unconditional return/END. */ - if (e->target_offset == sizeof(struct ipt_entry) - && (strcmp(t->target.u.user.name, - IPT_STANDARD_TARGET) == 0) - && t->verdict < 0 - && unconditional(&e->ip)) { + if ((e->target_offset == sizeof(struct ipt_entry) && + (strcmp(t->target.u.user.name, + XT_STANDARD_TARGET) == 0) && + t->verdict < 0 && unconditional(&e->ip)) || + visited) { unsigned int oldpos, size; + if ((strcmp(t->target.u.user.name, + XT_STANDARD_TARGET) == 0) && + t->verdict < -NF_MAX_VERDICT - 1) { + duprintf("mark_source_chains: bad " + "negative verdict (%i)\n", + t->verdict); + return 0; + } + /* Return: backtrack through the last big jump. */ do { - e->comefrom ^= (1<<NF_IP_NUMHOOKS); + e->comefrom ^= (1<<NF_INET_NUMHOOKS); #ifdef DEBUG_IP_FIREWALL_USER if (e->comefrom - & (1 << NF_IP_NUMHOOKS)) { + & (1 << NF_INET_NUMHOOKS)) { duprintf("Back unset " "on hook %u " "rule %u\n", @@ -436,8 +523,15 @@ mark_source_chains(struct xt_table_info *newinfo, int newpos = t->verdict; if (strcmp(t->target.u.user.name, - IPT_STANDARD_TARGET) == 0 - && newpos >= 0) { + XT_STANDARD_TARGET) == 0 && + newpos >= 0) { + if (newpos > newinfo->size - + sizeof(struct ipt_entry)) { + duprintf("mark_source_chains: " + "bad verdict (%i)\n", + newpos); + return 0; + } /* This a jump; chase it. */ duprintf("Jump rule %u -> %u\n", pos, newpos); @@ -457,220 +551,257 @@ mark_source_chains(struct xt_table_info *newinfo, return 1; } -static inline int -cleanup_match(struct ipt_entry_match *m, unsigned int *i) +static void cleanup_match(struct xt_entry_match *m, struct net *net) { - if (i && (*i)-- == 0) - return 1; - - if (m->u.kernel.match->destroy) - m->u.kernel.match->destroy(m->data, - m->u.match_size - sizeof(*m)); - module_put(m->u.kernel.match->me); - return 0; + struct xt_mtdtor_param par; + + par.net = net; + par.match = m->u.kernel.match; + par.matchinfo = m->data; + par.family = NFPROTO_IPV4; + if (par.match->destroy != NULL) + par.match->destroy(&par); + module_put(par.match->me); } -static inline int -standard_check(const struct ipt_entry_target *t, - unsigned int max_offset) +static int +check_entry(const struct ipt_entry *e, const char *name) { - struct ipt_standard_target *targ = (void *)t; + const struct xt_entry_target *t; - /* Check standard info. */ - if (t->u.target_size - != IPT_ALIGN(sizeof(struct ipt_standard_target))) { - duprintf("standard_check: target size %u != %u\n", - t->u.target_size, - IPT_ALIGN(sizeof(struct ipt_standard_target))); - return 0; + if (!ip_checkentry(&e->ip)) { + duprintf("ip check failed %p %s.\n", e, name); + return -EINVAL; } - if (targ->verdict >= 0 - && targ->verdict > max_offset - sizeof(struct ipt_entry)) { - duprintf("ipt_standard_check: bad verdict (%i)\n", - targ->verdict); - return 0; - } + if (e->target_offset + sizeof(struct xt_entry_target) > + e->next_offset) + return -EINVAL; + + t = ipt_get_target_c(e); + if (e->target_offset + t->u.target_size > e->next_offset) + return -EINVAL; + + return 0; +} + +static int +check_match(struct xt_entry_match *m, struct xt_mtchk_param *par) +{ + const struct ipt_ip *ip = par->entryinfo; + int ret; - if (targ->verdict < -NF_MAX_VERDICT - 1) { - duprintf("ipt_standard_check: bad negative verdict (%i)\n", - targ->verdict); - return 0; + par->match = m->u.kernel.match; + par->matchinfo = m->data; + + ret = xt_check_match(par, m->u.match_size - sizeof(*m), + ip->proto, ip->invflags & IPT_INV_PROTO); + if (ret < 0) { + duprintf("check failed for `%s'.\n", par->match->name); + return ret; } - return 1; + return 0; } -static inline int -check_match(struct ipt_entry_match *m, - const char *name, - const struct ipt_ip *ip, - unsigned int hookmask, - unsigned int *i) +static int +find_check_match(struct xt_entry_match *m, struct xt_mtchk_param *par) { - struct ipt_match *match; + struct xt_match *match; + int ret; - match = try_then_request_module(xt_find_match(AF_INET, m->u.user.name, - m->u.user.revision), - "ipt_%s", m->u.user.name); - if (IS_ERR(match) || !match) { - duprintf("check_match: `%s' not found\n", m->u.user.name); - return match ? PTR_ERR(match) : -ENOENT; + match = xt_request_find_match(NFPROTO_IPV4, m->u.user.name, + m->u.user.revision); + if (IS_ERR(match)) { + duprintf("find_check_match: `%s' not found\n", m->u.user.name); + return PTR_ERR(match); } m->u.kernel.match = match; - if (m->u.kernel.match->checkentry - && !m->u.kernel.match->checkentry(name, ip, m->data, - m->u.match_size - sizeof(*m), - hookmask)) { - module_put(m->u.kernel.match->me); - duprintf("ip_tables: check failed for `%s'.\n", - m->u.kernel.match->name); - return -EINVAL; - } + ret = check_match(m, par); + if (ret) + goto err; - (*i)++; return 0; +err: + module_put(m->u.kernel.match->me); + return ret; } -static struct ipt_target ipt_standard_target; +static int check_target(struct ipt_entry *e, struct net *net, const char *name) +{ + struct xt_entry_target *t = ipt_get_target(e); + struct xt_tgchk_param par = { + .net = net, + .table = name, + .entryinfo = e, + .target = t->u.kernel.target, + .targinfo = t->data, + .hook_mask = e->comefrom, + .family = NFPROTO_IPV4, + }; + int ret; -static inline int -check_entry(struct ipt_entry *e, const char *name, unsigned int size, - unsigned int *i) + ret = xt_check_target(&par, t->u.target_size - sizeof(*t), + e->ip.proto, e->ip.invflags & IPT_INV_PROTO); + if (ret < 0) { + duprintf("check failed for `%s'.\n", + t->u.kernel.target->name); + return ret; + } + return 0; +} + +static int +find_check_entry(struct ipt_entry *e, struct net *net, const char *name, + unsigned int size) { - struct ipt_entry_target *t; - struct ipt_target *target; + struct xt_entry_target *t; + struct xt_target *target; int ret; unsigned int j; + struct xt_mtchk_param mtpar; + struct xt_entry_match *ematch; - if (!ip_checkentry(&e->ip)) { - duprintf("ip_tables: ip check failed %p %s.\n", e, name); - return -EINVAL; - } + ret = check_entry(e, name); + if (ret) + return ret; j = 0; - ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, e->comefrom, &j); - if (ret != 0) - goto cleanup_matches; + mtpar.net = net; + mtpar.table = name; + mtpar.entryinfo = &e->ip; + mtpar.hook_mask = e->comefrom; + mtpar.family = NFPROTO_IPV4; + xt_ematch_foreach(ematch, e) { + ret = find_check_match(ematch, &mtpar); + if (ret != 0) + goto cleanup_matches; + ++j; + } t = ipt_get_target(e); - target = try_then_request_module(xt_find_target(AF_INET, - t->u.user.name, - t->u.user.revision), - "ipt_%s", t->u.user.name); - if (IS_ERR(target) || !target) { - duprintf("check_entry: `%s' not found\n", t->u.user.name); - ret = target ? PTR_ERR(target) : -ENOENT; + target = xt_request_find_target(NFPROTO_IPV4, t->u.user.name, + t->u.user.revision); + if (IS_ERR(target)) { + duprintf("find_check_entry: `%s' not found\n", t->u.user.name); + ret = PTR_ERR(target); goto cleanup_matches; } t->u.kernel.target = target; - if (t->u.kernel.target == &ipt_standard_target) { - if (!standard_check(t, size)) { - ret = -EINVAL; - goto cleanup_matches; - } - } else if (t->u.kernel.target->checkentry - && !t->u.kernel.target->checkentry(name, e, t->data, - t->u.target_size - - sizeof(*t), - e->comefrom)) { - module_put(t->u.kernel.target->me); - duprintf("ip_tables: check failed for `%s'.\n", - t->u.kernel.target->name); - ret = -EINVAL; - goto cleanup_matches; - } - - (*i)++; + ret = check_target(e, net, name); + if (ret) + goto err; return 0; - + err: + module_put(t->u.kernel.target->me); cleanup_matches: - IPT_MATCH_ITERATE(e, cleanup_match, &j); + xt_ematch_foreach(ematch, e) { + if (j-- == 0) + break; + cleanup_match(ematch, net); + } return ret; } -static inline int +static bool check_underflow(const struct ipt_entry *e) +{ + const struct xt_entry_target *t; + unsigned int verdict; + + if (!unconditional(&e->ip)) + return false; + t = ipt_get_target_c(e); + if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0) + return false; + verdict = ((struct xt_standard_target *)t)->verdict; + verdict = -verdict - 1; + return verdict == NF_DROP || verdict == NF_ACCEPT; +} + +static int check_entry_size_and_hooks(struct ipt_entry *e, struct xt_table_info *newinfo, - unsigned char *base, - unsigned char *limit, + const unsigned char *base, + const unsigned char *limit, const unsigned int *hook_entries, const unsigned int *underflows, - unsigned int *i) + unsigned int valid_hooks) { unsigned int h; - if ((unsigned long)e % __alignof__(struct ipt_entry) != 0 - || (unsigned char *)e + sizeof(struct ipt_entry) >= limit) { + if ((unsigned long)e % __alignof__(struct ipt_entry) != 0 || + (unsigned char *)e + sizeof(struct ipt_entry) >= limit) { duprintf("Bad offset %p\n", e); return -EINVAL; } if (e->next_offset - < sizeof(struct ipt_entry) + sizeof(struct ipt_entry_target)) { + < sizeof(struct ipt_entry) + sizeof(struct xt_entry_target)) { duprintf("checking: element %p size %u\n", e, e->next_offset); return -EINVAL; } /* Check hooks & underflows */ - for (h = 0; h < NF_IP_NUMHOOKS; h++) { + for (h = 0; h < NF_INET_NUMHOOKS; h++) { + if (!(valid_hooks & (1 << h))) + continue; if ((unsigned char *)e - base == hook_entries[h]) newinfo->hook_entry[h] = hook_entries[h]; - if ((unsigned char *)e - base == underflows[h]) + if ((unsigned char *)e - base == underflows[h]) { + if (!check_underflow(e)) { + pr_err("Underflows must be unconditional and " + "use the STANDARD target with " + "ACCEPT/DROP\n"); + return -EINVAL; + } newinfo->underflow[h] = underflows[h]; + } } - /* FIXME: underflows must be unconditional, standard verdicts - < 0 (not IPT_RETURN). --RR */ - /* Clear counters and comefrom */ e->counters = ((struct xt_counters) { 0, 0 }); e->comefrom = 0; - - (*i)++; return 0; } -static inline int -cleanup_entry(struct ipt_entry *e, unsigned int *i) +static void +cleanup_entry(struct ipt_entry *e, struct net *net) { - struct ipt_entry_target *t; - - if (i && (*i)-- == 0) - return 1; + struct xt_tgdtor_param par; + struct xt_entry_target *t; + struct xt_entry_match *ematch; /* Cleanup all matches */ - IPT_MATCH_ITERATE(e, cleanup_match, NULL); + xt_ematch_foreach(ematch, e) + cleanup_match(ematch, net); t = ipt_get_target(e); - if (t->u.kernel.target->destroy) - t->u.kernel.target->destroy(t->data, - t->u.target_size - sizeof(*t)); - module_put(t->u.kernel.target->me); - return 0; + + par.net = net; + par.target = t->u.kernel.target; + par.targinfo = t->data; + par.family = NFPROTO_IPV4; + if (par.target->destroy != NULL) + par.target->destroy(&par); + module_put(par.target->me); } /* Checks and translates the user-supplied table segment (held in newinfo) */ static int -translate_table(const char *name, - unsigned int valid_hooks, - struct xt_table_info *newinfo, - void *entry0, - unsigned int size, - unsigned int number, - const unsigned int *hook_entries, - const unsigned int *underflows) +translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, + const struct ipt_replace *repl) { + struct ipt_entry *iter; unsigned int i; - int ret; + int ret = 0; - newinfo->size = size; - newinfo->number = number; + newinfo->size = repl->size; + newinfo->number = repl->num_entries; /* Init all hooks to impossible value. */ - for (i = 0; i < NF_IP_NUMHOOKS; i++) { + for (i = 0; i < NF_INET_NUMHOOKS; i++) { newinfo->hook_entry[i] = 0xFFFFFFFF; newinfo->underflow[i] = 0xFFFFFFFF; } @@ -678,54 +809,66 @@ translate_table(const char *name, duprintf("translate_table: size %u\n", newinfo->size); i = 0; /* Walk through entries, checking offsets. */ - ret = IPT_ENTRY_ITERATE(entry0, newinfo->size, - check_entry_size_and_hooks, - newinfo, - entry0, - entry0 + size, - hook_entries, underflows, &i); - if (ret != 0) - return ret; + xt_entry_foreach(iter, entry0, newinfo->size) { + ret = check_entry_size_and_hooks(iter, newinfo, entry0, + entry0 + repl->size, + repl->hook_entry, + repl->underflow, + repl->valid_hooks); + if (ret != 0) + return ret; + ++i; + if (strcmp(ipt_get_target(iter)->u.user.name, + XT_ERROR_TARGET) == 0) + ++newinfo->stacksize; + } - if (i != number) { + if (i != repl->num_entries) { duprintf("translate_table: %u not %u entries\n", - i, number); + i, repl->num_entries); return -EINVAL; } /* Check hooks all assigned */ - for (i = 0; i < NF_IP_NUMHOOKS; i++) { + for (i = 0; i < NF_INET_NUMHOOKS; i++) { /* Only hooks which are valid */ - if (!(valid_hooks & (1 << i))) + if (!(repl->valid_hooks & (1 << i))) continue; if (newinfo->hook_entry[i] == 0xFFFFFFFF) { duprintf("Invalid hook entry %u %u\n", - i, hook_entries[i]); + i, repl->hook_entry[i]); return -EINVAL; } if (newinfo->underflow[i] == 0xFFFFFFFF) { duprintf("Invalid underflow %u %u\n", - i, underflows[i]); + i, repl->underflow[i]); return -EINVAL; } } - if (!mark_source_chains(newinfo, valid_hooks, entry0)) + if (!mark_source_chains(newinfo, repl->valid_hooks, entry0)) return -ELOOP; /* Finally, each sanity check must pass */ i = 0; - ret = IPT_ENTRY_ITERATE(entry0, newinfo->size, - check_entry, name, size, &i); + xt_entry_foreach(iter, entry0, newinfo->size) { + ret = find_check_entry(iter, net, repl->name, repl->size); + if (ret != 0) + break; + ++i; + } if (ret != 0) { - IPT_ENTRY_ITERATE(entry0, newinfo->size, - cleanup_entry, &i); + xt_entry_foreach(iter, entry0, newinfo->size) { + if (i-- == 0) + break; + cleanup_entry(iter, net); + } return ret; } /* And one copy for every other CPU */ - for_each_cpu(i) { + for_each_possible_cpu(i) { if (newinfo->entries[i] && newinfo->entries[i] != entry0) memcpy(newinfo->entries[i], entry0, newinfo->size); } @@ -733,95 +876,75 @@ translate_table(const char *name, return ret; } -/* Gets counters. */ -static inline int -add_entry_to_counter(const struct ipt_entry *e, - struct xt_counters total[], - unsigned int *i) -{ - ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt); - - (*i)++; - return 0; -} - -static inline int -set_entry_to_counter(const struct ipt_entry *e, - struct ipt_counters total[], - unsigned int *i) -{ - SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt); - - (*i)++; - return 0; -} - static void get_counters(const struct xt_table_info *t, struct xt_counters counters[]) { + struct ipt_entry *iter; unsigned int cpu; unsigned int i; - unsigned int curcpu; - /* Instead of clearing (by a previous call to memset()) - * the counters and using adds, we set the counters - * with data used by 'current' CPU - * We dont care about preemption here. - */ - curcpu = raw_smp_processor_id(); + for_each_possible_cpu(cpu) { + seqcount_t *s = &per_cpu(xt_recseq, cpu); - i = 0; - IPT_ENTRY_ITERATE(t->entries[curcpu], - t->size, - set_entry_to_counter, - counters, - &i); - - for_each_cpu(cpu) { - if (cpu == curcpu) - continue; i = 0; - IPT_ENTRY_ITERATE(t->entries[cpu], - t->size, - add_entry_to_counter, - counters, - &i); + xt_entry_foreach(iter, t->entries[cpu], t->size) { + u64 bcnt, pcnt; + unsigned int start; + + do { + start = read_seqcount_begin(s); + bcnt = iter->counters.bcnt; + pcnt = iter->counters.pcnt; + } while (read_seqcount_retry(s, start)); + + ADD_COUNTER(counters[i], bcnt, pcnt); + ++i; /* macro does multi eval of i */ + } } } -static int -copy_entries_to_user(unsigned int total_size, - struct ipt_table *table, - void __user *userptr) +static struct xt_counters *alloc_counters(const struct xt_table *table) { - unsigned int off, num, countersize; - struct ipt_entry *e; + unsigned int countersize; struct xt_counters *counters; - struct xt_table_info *private = table->private; - int ret = 0; - void *loc_cpu_entry; + const struct xt_table_info *private = table->private; /* We need atomic snapshot of counters: rest doesn't change (other than comefrom, which userspace doesn't care about). */ countersize = sizeof(struct xt_counters) * private->number; - counters = vmalloc_node(countersize, numa_node_id()); + counters = vzalloc(countersize); if (counters == NULL) - return -ENOMEM; + return ERR_PTR(-ENOMEM); - /* First, sum counters... */ - write_lock_bh(&table->lock); get_counters(private, counters); - write_unlock_bh(&table->lock); + + return counters; +} + +static int +copy_entries_to_user(unsigned int total_size, + const struct xt_table *table, + void __user *userptr) +{ + unsigned int off, num; + const struct ipt_entry *e; + struct xt_counters *counters; + const struct xt_table_info *private = table->private; + int ret = 0; + const void *loc_cpu_entry; + + counters = alloc_counters(table); + if (IS_ERR(counters)) + return PTR_ERR(counters); /* choose the copy that is on our node/cpu, ... * This choice is lazy (because current thread is * allowed to migrate to another cpu) */ loc_cpu_entry = private->entries[raw_smp_processor_id()]; - /* ... then copy entire thing ... */ if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) { ret = -EFAULT; goto free_counters; @@ -831,8 +954,8 @@ copy_entries_to_user(unsigned int total_size, /* ... then go back and fix counters and names */ for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){ unsigned int i; - struct ipt_entry_match *m; - struct ipt_entry_target *t; + const struct xt_entry_match *m; + const struct xt_entry_target *t; e = (struct ipt_entry *)(loc_cpu_entry + off); if (copy_to_user(userptr + off @@ -849,7 +972,7 @@ copy_entries_to_user(unsigned int total_size, m = (void *)e + i; if (copy_to_user(userptr + off + i - + offsetof(struct ipt_entry_match, + + offsetof(struct xt_entry_match, u.user.name), m->u.kernel.match->name, strlen(m->u.kernel.match->name)+1) @@ -859,9 +982,9 @@ copy_entries_to_user(unsigned int total_size, } } - t = ipt_get_target(e); + t = ipt_get_target_c(e); if (copy_to_user(userptr + off + e->target_offset - + offsetof(struct ipt_entry_target, + + offsetof(struct xt_entry_target, u.user.name), t->u.kernel.target->name, strlen(t->u.kernel.target->name)+1) != 0) { @@ -875,26 +998,172 @@ copy_entries_to_user(unsigned int total_size, return ret; } +#ifdef CONFIG_COMPAT +static void compat_standard_from_user(void *dst, const void *src) +{ + int v = *(compat_int_t *)src; + + if (v > 0) + v += xt_compat_calc_jump(AF_INET, v); + memcpy(dst, &v, sizeof(v)); +} + +static int compat_standard_to_user(void __user *dst, const void *src) +{ + compat_int_t cv = *(int *)src; + + if (cv > 0) + cv -= xt_compat_calc_jump(AF_INET, cv); + return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0; +} + +static int compat_calc_entry(const struct ipt_entry *e, + const struct xt_table_info *info, + const void *base, struct xt_table_info *newinfo) +{ + const struct xt_entry_match *ematch; + const struct xt_entry_target *t; + unsigned int entry_offset; + int off, i, ret; + + off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry); + entry_offset = (void *)e - base; + xt_ematch_foreach(ematch, e) + off += xt_compat_match_offset(ematch->u.kernel.match); + t = ipt_get_target_c(e); + off += xt_compat_target_offset(t->u.kernel.target); + newinfo->size -= off; + ret = xt_compat_add_offset(AF_INET, entry_offset, off); + if (ret) + return ret; + + for (i = 0; i < NF_INET_NUMHOOKS; i++) { + if (info->hook_entry[i] && + (e < (struct ipt_entry *)(base + info->hook_entry[i]))) + newinfo->hook_entry[i] -= off; + if (info->underflow[i] && + (e < (struct ipt_entry *)(base + info->underflow[i]))) + newinfo->underflow[i] -= off; + } + return 0; +} + +static int compat_table_info(const struct xt_table_info *info, + struct xt_table_info *newinfo) +{ + struct ipt_entry *iter; + void *loc_cpu_entry; + int ret; + + if (!newinfo || !info) + return -EINVAL; + + /* we dont care about newinfo->entries[] */ + memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); + newinfo->initial_entries = 0; + loc_cpu_entry = info->entries[raw_smp_processor_id()]; + xt_compat_init_offsets(AF_INET, info->number); + xt_entry_foreach(iter, loc_cpu_entry, info->size) { + ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); + if (ret != 0) + return ret; + } + return 0; +} +#endif + +static int get_info(struct net *net, void __user *user, + const int *len, int compat) +{ + char name[XT_TABLE_MAXNAMELEN]; + struct xt_table *t; + int ret; + + if (*len != sizeof(struct ipt_getinfo)) { + duprintf("length %u != %zu\n", *len, + sizeof(struct ipt_getinfo)); + return -EINVAL; + } + + if (copy_from_user(name, user, sizeof(name)) != 0) + return -EFAULT; + + name[XT_TABLE_MAXNAMELEN-1] = '\0'; +#ifdef CONFIG_COMPAT + if (compat) + xt_compat_lock(AF_INET); +#endif + t = try_then_request_module(xt_find_table_lock(net, AF_INET, name), + "iptable_%s", name); + if (!IS_ERR_OR_NULL(t)) { + struct ipt_getinfo info; + const struct xt_table_info *private = t->private; +#ifdef CONFIG_COMPAT + struct xt_table_info tmp; + + if (compat) { + ret = compat_table_info(private, &tmp); + xt_compat_flush_offsets(AF_INET); + private = &tmp; + } +#endif + memset(&info, 0, sizeof(info)); + info.valid_hooks = t->valid_hooks; + memcpy(info.hook_entry, private->hook_entry, + sizeof(info.hook_entry)); + memcpy(info.underflow, private->underflow, + sizeof(info.underflow)); + info.num_entries = private->number; + info.size = private->size; + strcpy(info.name, name); + + if (copy_to_user(user, &info, *len) != 0) + ret = -EFAULT; + else + ret = 0; + + xt_table_unlock(t); + module_put(t->me); + } else + ret = t ? PTR_ERR(t) : -ENOENT; +#ifdef CONFIG_COMPAT + if (compat) + xt_compat_unlock(AF_INET); +#endif + return ret; +} + static int -get_entries(const struct ipt_get_entries *entries, - struct ipt_get_entries __user *uptr) +get_entries(struct net *net, struct ipt_get_entries __user *uptr, + const int *len) { int ret; - struct ipt_table *t; - - t = xt_find_table_lock(AF_INET, entries->name); - if (t && !IS_ERR(t)) { - struct xt_table_info *private = t->private; - duprintf("t->private->number = %u\n", - private->number); - if (entries->size == private->size) + struct ipt_get_entries get; + struct xt_table *t; + + if (*len < sizeof(get)) { + duprintf("get_entries: %u < %zu\n", *len, sizeof(get)); + return -EINVAL; + } + if (copy_from_user(&get, uptr, sizeof(get)) != 0) + return -EFAULT; + if (*len != sizeof(struct ipt_get_entries) + get.size) { + duprintf("get_entries: %u != %zu\n", + *len, sizeof(get) + get.size); + return -EINVAL; + } + + t = xt_find_table_lock(net, AF_INET, get.name); + if (!IS_ERR_OR_NULL(t)) { + const struct xt_table_info *private = t->private; + duprintf("t->private->number = %u\n", private->number); + if (get.size == private->size) ret = copy_entries_to_user(private->size, t, uptr->entrytable); else { duprintf("get_entries: I've got %u not %u!\n", - private->size, - entries->size); - ret = -EINVAL; + private->size, get.size); + ret = -EAGAIN; } module_put(t->me); xt_table_unlock(t); @@ -905,86 +1174,67 @@ get_entries(const struct ipt_get_entries *entries, } static int -do_replace(void __user *user, unsigned int len) +__do_replace(struct net *net, const char *name, unsigned int valid_hooks, + struct xt_table_info *newinfo, unsigned int num_counters, + void __user *counters_ptr) { int ret; - struct ipt_replace tmp; - struct ipt_table *t; - struct xt_table_info *newinfo, *oldinfo; + struct xt_table *t; + struct xt_table_info *oldinfo; struct xt_counters *counters; - void *loc_cpu_entry, *loc_cpu_old_entry; - - if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) - return -EFAULT; - - /* Hack: Causes ipchains to give correct error msg --RR */ - if (len != sizeof(tmp) + tmp.size) - return -ENOPROTOOPT; - - newinfo = xt_alloc_table_info(tmp.size); - if (!newinfo) - return -ENOMEM; - - /* choose the copy that is our node/cpu */ - loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; - if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), - tmp.size) != 0) { - ret = -EFAULT; - goto free_newinfo; - } + void *loc_cpu_old_entry; + struct ipt_entry *iter; - counters = vmalloc(tmp.num_counters * sizeof(struct xt_counters)); + ret = 0; + counters = vzalloc(num_counters * sizeof(struct xt_counters)); if (!counters) { ret = -ENOMEM; - goto free_newinfo; + goto out; } - ret = translate_table(tmp.name, tmp.valid_hooks, - newinfo, loc_cpu_entry, tmp.size, tmp.num_entries, - tmp.hook_entry, tmp.underflow); - if (ret != 0) - goto free_newinfo_counters; - - duprintf("ip_tables: Translated table\n"); - - t = try_then_request_module(xt_find_table_lock(AF_INET, tmp.name), - "iptable_%s", tmp.name); - if (!t || IS_ERR(t)) { + t = try_then_request_module(xt_find_table_lock(net, AF_INET, name), + "iptable_%s", name); + if (IS_ERR_OR_NULL(t)) { ret = t ? PTR_ERR(t) : -ENOENT; goto free_newinfo_counters_untrans; } /* You lied! */ - if (tmp.valid_hooks != t->valid_hooks) { + if (valid_hooks != t->valid_hooks) { duprintf("Valid hook crap: %08X vs %08X\n", - tmp.valid_hooks, t->valid_hooks); + valid_hooks, t->valid_hooks); ret = -EINVAL; goto put_module; } - oldinfo = xt_replace_table(t, tmp.num_counters, newinfo, &ret); + oldinfo = xt_replace_table(t, num_counters, newinfo, &ret); if (!oldinfo) goto put_module; /* Update module usage count based on number of rules */ duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n", oldinfo->number, oldinfo->initial_entries, newinfo->number); - if ((oldinfo->number > oldinfo->initial_entries) || - (newinfo->number <= oldinfo->initial_entries)) + if ((oldinfo->number > oldinfo->initial_entries) || + (newinfo->number <= oldinfo->initial_entries)) module_put(t->me); if ((oldinfo->number > oldinfo->initial_entries) && (newinfo->number <= oldinfo->initial_entries)) module_put(t->me); - /* Get the old counters. */ + /* Get the old counters, and synchronize with replace */ get_counters(oldinfo, counters); + /* Decrease module usage counts and free resource */ loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; - IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,NULL); + xt_entry_foreach(iter, loc_cpu_old_entry, oldinfo->size) + cleanup_entry(iter, net); + xt_free_table_info(oldinfo); - if (copy_to_user(tmp.counters, counters, - sizeof(struct xt_counters) * tmp.num_counters) != 0) - ret = -EFAULT; + if (copy_to_user(counters_ptr, counters, + sizeof(struct xt_counters) * num_counters) != 0) { + /* Silent error, can't fail, new table is already in place */ + net_warn_ratelimited("iptables: counters copy to user failed while replacing table\n"); + } vfree(counters); xt_table_unlock(t); return ret; @@ -993,84 +1243,141 @@ do_replace(void __user *user, unsigned int len) module_put(t->me); xt_table_unlock(t); free_newinfo_counters_untrans: - IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry,NULL); - free_newinfo_counters: vfree(counters); - free_newinfo: - xt_free_table_info(newinfo); + out: return ret; } -/* We're lazy, and add to the first CPU; overflow works its fey magic - * and everything is OK. */ -static inline int -add_counter_to_entry(struct ipt_entry *e, - const struct xt_counters addme[], - unsigned int *i) +static int +do_replace(struct net *net, const void __user *user, unsigned int len) { -#if 0 - duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n", - *i, - (long unsigned int)e->counters.pcnt, - (long unsigned int)e->counters.bcnt, - (long unsigned int)addme[*i].pcnt, - (long unsigned int)addme[*i].bcnt); -#endif + int ret; + struct ipt_replace tmp; + struct xt_table_info *newinfo; + void *loc_cpu_entry; + struct ipt_entry *iter; + + if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) + return -EFAULT; + + /* overflow check */ + if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) + return -ENOMEM; + tmp.name[sizeof(tmp.name)-1] = 0; + + newinfo = xt_alloc_table_info(tmp.size); + if (!newinfo) + return -ENOMEM; + + /* choose the copy that is on our node/cpu */ + loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; + if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), + tmp.size) != 0) { + ret = -EFAULT; + goto free_newinfo; + } - ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); + ret = translate_table(net, newinfo, loc_cpu_entry, &tmp); + if (ret != 0) + goto free_newinfo; + + duprintf("Translated table\n"); - (*i)++; + ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo, + tmp.num_counters, tmp.counters); + if (ret) + goto free_newinfo_untrans; return 0; + + free_newinfo_untrans: + xt_entry_foreach(iter, loc_cpu_entry, newinfo->size) + cleanup_entry(iter, net); + free_newinfo: + xt_free_table_info(newinfo); + return ret; } static int -do_add_counters(void __user *user, unsigned int len) +do_add_counters(struct net *net, const void __user *user, + unsigned int len, int compat) { - unsigned int i; - struct xt_counters_info tmp, *paddc; - struct ipt_table *t; - struct xt_table_info *private; + unsigned int i, curcpu; + struct xt_counters_info tmp; + struct xt_counters *paddc; + unsigned int num_counters; + const char *name; + int size; + void *ptmp; + struct xt_table *t; + const struct xt_table_info *private; int ret = 0; void *loc_cpu_entry; + struct ipt_entry *iter; + unsigned int addend; +#ifdef CONFIG_COMPAT + struct compat_xt_counters_info compat_tmp; + + if (compat) { + ptmp = &compat_tmp; + size = sizeof(struct compat_xt_counters_info); + } else +#endif + { + ptmp = &tmp; + size = sizeof(struct xt_counters_info); + } - if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) + if (copy_from_user(ptmp, user, size) != 0) return -EFAULT; - if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct xt_counters)) +#ifdef CONFIG_COMPAT + if (compat) { + num_counters = compat_tmp.num_counters; + name = compat_tmp.name; + } else +#endif + { + num_counters = tmp.num_counters; + name = tmp.name; + } + + if (len != size + num_counters * sizeof(struct xt_counters)) return -EINVAL; - paddc = vmalloc_node(len, numa_node_id()); + paddc = vmalloc(len - size); if (!paddc) return -ENOMEM; - if (copy_from_user(paddc, user, len) != 0) { + if (copy_from_user(paddc, user + size, len - size) != 0) { ret = -EFAULT; goto free; } - t = xt_find_table_lock(AF_INET, tmp.name); - if (!t || IS_ERR(t)) { + t = xt_find_table_lock(net, AF_INET, name); + if (IS_ERR_OR_NULL(t)) { ret = t ? PTR_ERR(t) : -ENOENT; goto free; } - write_lock_bh(&t->lock); + local_bh_disable(); private = t->private; - if (private->number != paddc->num_counters) { + if (private->number != num_counters) { ret = -EINVAL; goto unlock_up_free; } i = 0; /* Choose the copy that is on our node */ - loc_cpu_entry = private->entries[raw_smp_processor_id()]; - IPT_ENTRY_ITERATE(loc_cpu_entry, - private->size, - add_counter_to_entry, - paddc->counters, - &i); + curcpu = smp_processor_id(); + loc_cpu_entry = private->entries[curcpu]; + addend = xt_write_recseq_begin(); + xt_entry_foreach(iter, loc_cpu_entry, private->size) { + ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt); + ++i; + } + xt_write_recseq_end(addend); unlock_up_free: - write_unlock_bh(&t->lock); + local_bh_enable(); xt_table_unlock(t); module_put(t->me); free: @@ -1079,21 +1386,483 @@ do_add_counters(void __user *user, unsigned int len) return ret; } +#ifdef CONFIG_COMPAT +struct compat_ipt_replace { + char name[XT_TABLE_MAXNAMELEN]; + u32 valid_hooks; + u32 num_entries; + u32 size; + u32 hook_entry[NF_INET_NUMHOOKS]; + u32 underflow[NF_INET_NUMHOOKS]; + u32 num_counters; + compat_uptr_t counters; /* struct xt_counters * */ + struct compat_ipt_entry entries[0]; +}; + +static int +compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr, + unsigned int *size, struct xt_counters *counters, + unsigned int i) +{ + struct xt_entry_target *t; + struct compat_ipt_entry __user *ce; + u_int16_t target_offset, next_offset; + compat_uint_t origsize; + const struct xt_entry_match *ematch; + int ret = 0; + + origsize = *size; + ce = (struct compat_ipt_entry __user *)*dstptr; + if (copy_to_user(ce, e, sizeof(struct ipt_entry)) != 0 || + copy_to_user(&ce->counters, &counters[i], + sizeof(counters[i])) != 0) + return -EFAULT; + + *dstptr += sizeof(struct compat_ipt_entry); + *size -= sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry); + + xt_ematch_foreach(ematch, e) { + ret = xt_compat_match_to_user(ematch, dstptr, size); + if (ret != 0) + return ret; + } + target_offset = e->target_offset - (origsize - *size); + t = ipt_get_target(e); + ret = xt_compat_target_to_user(t, dstptr, size); + if (ret) + return ret; + next_offset = e->next_offset - (origsize - *size); + if (put_user(target_offset, &ce->target_offset) != 0 || + put_user(next_offset, &ce->next_offset) != 0) + return -EFAULT; + return 0; +} + +static int +compat_find_calc_match(struct xt_entry_match *m, + const char *name, + const struct ipt_ip *ip, + unsigned int hookmask, + int *size) +{ + struct xt_match *match; + + match = xt_request_find_match(NFPROTO_IPV4, m->u.user.name, + m->u.user.revision); + if (IS_ERR(match)) { + duprintf("compat_check_calc_match: `%s' not found\n", + m->u.user.name); + return PTR_ERR(match); + } + m->u.kernel.match = match; + *size += xt_compat_match_offset(match); + return 0; +} + +static void compat_release_entry(struct compat_ipt_entry *e) +{ + struct xt_entry_target *t; + struct xt_entry_match *ematch; + + /* Cleanup all matches */ + xt_ematch_foreach(ematch, e) + module_put(ematch->u.kernel.match->me); + t = compat_ipt_get_target(e); + module_put(t->u.kernel.target->me); +} + +static int +check_compat_entry_size_and_hooks(struct compat_ipt_entry *e, + struct xt_table_info *newinfo, + unsigned int *size, + const unsigned char *base, + const unsigned char *limit, + const unsigned int *hook_entries, + const unsigned int *underflows, + const char *name) +{ + struct xt_entry_match *ematch; + struct xt_entry_target *t; + struct xt_target *target; + unsigned int entry_offset; + unsigned int j; + int ret, off, h; + + duprintf("check_compat_entry_size_and_hooks %p\n", e); + if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0 || + (unsigned char *)e + sizeof(struct compat_ipt_entry) >= limit) { + duprintf("Bad offset %p, limit = %p\n", e, limit); + return -EINVAL; + } + + if (e->next_offset < sizeof(struct compat_ipt_entry) + + sizeof(struct compat_xt_entry_target)) { + duprintf("checking: element %p size %u\n", + e, e->next_offset); + return -EINVAL; + } + + /* For purposes of check_entry casting the compat entry is fine */ + ret = check_entry((struct ipt_entry *)e, name); + if (ret) + return ret; + + off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry); + entry_offset = (void *)e - (void *)base; + j = 0; + xt_ematch_foreach(ematch, e) { + ret = compat_find_calc_match(ematch, name, + &e->ip, e->comefrom, &off); + if (ret != 0) + goto release_matches; + ++j; + } + + t = compat_ipt_get_target(e); + target = xt_request_find_target(NFPROTO_IPV4, t->u.user.name, + t->u.user.revision); + if (IS_ERR(target)) { + duprintf("check_compat_entry_size_and_hooks: `%s' not found\n", + t->u.user.name); + ret = PTR_ERR(target); + goto release_matches; + } + t->u.kernel.target = target; + + off += xt_compat_target_offset(target); + *size += off; + ret = xt_compat_add_offset(AF_INET, entry_offset, off); + if (ret) + goto out; + + /* Check hooks & underflows */ + for (h = 0; h < NF_INET_NUMHOOKS; h++) { + if ((unsigned char *)e - base == hook_entries[h]) + newinfo->hook_entry[h] = hook_entries[h]; + if ((unsigned char *)e - base == underflows[h]) + newinfo->underflow[h] = underflows[h]; + } + + /* Clear counters and comefrom */ + memset(&e->counters, 0, sizeof(e->counters)); + e->comefrom = 0; + return 0; + +out: + module_put(t->u.kernel.target->me); +release_matches: + xt_ematch_foreach(ematch, e) { + if (j-- == 0) + break; + module_put(ematch->u.kernel.match->me); + } + return ret; +} + +static int +compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr, + unsigned int *size, const char *name, + struct xt_table_info *newinfo, unsigned char *base) +{ + struct xt_entry_target *t; + struct xt_target *target; + struct ipt_entry *de; + unsigned int origsize; + int ret, h; + struct xt_entry_match *ematch; + + ret = 0; + origsize = *size; + de = (struct ipt_entry *)*dstptr; + memcpy(de, e, sizeof(struct ipt_entry)); + memcpy(&de->counters, &e->counters, sizeof(e->counters)); + + *dstptr += sizeof(struct ipt_entry); + *size += sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry); + + xt_ematch_foreach(ematch, e) { + ret = xt_compat_match_from_user(ematch, dstptr, size); + if (ret != 0) + return ret; + } + de->target_offset = e->target_offset - (origsize - *size); + t = compat_ipt_get_target(e); + target = t->u.kernel.target; + xt_compat_target_from_user(t, dstptr, size); + + de->next_offset = e->next_offset - (origsize - *size); + for (h = 0; h < NF_INET_NUMHOOKS; h++) { + if ((unsigned char *)de - base < newinfo->hook_entry[h]) + newinfo->hook_entry[h] -= origsize - *size; + if ((unsigned char *)de - base < newinfo->underflow[h]) + newinfo->underflow[h] -= origsize - *size; + } + return ret; +} + +static int +compat_check_entry(struct ipt_entry *e, struct net *net, const char *name) +{ + struct xt_entry_match *ematch; + struct xt_mtchk_param mtpar; + unsigned int j; + int ret = 0; + + j = 0; + mtpar.net = net; + mtpar.table = name; + mtpar.entryinfo = &e->ip; + mtpar.hook_mask = e->comefrom; + mtpar.family = NFPROTO_IPV4; + xt_ematch_foreach(ematch, e) { + ret = check_match(ematch, &mtpar); + if (ret != 0) + goto cleanup_matches; + ++j; + } + + ret = check_target(e, net, name); + if (ret) + goto cleanup_matches; + return 0; + + cleanup_matches: + xt_ematch_foreach(ematch, e) { + if (j-- == 0) + break; + cleanup_match(ematch, net); + } + return ret; +} + +static int +translate_compat_table(struct net *net, + const char *name, + unsigned int valid_hooks, + struct xt_table_info **pinfo, + void **pentry0, + unsigned int total_size, + unsigned int number, + unsigned int *hook_entries, + unsigned int *underflows) +{ + unsigned int i, j; + struct xt_table_info *newinfo, *info; + void *pos, *entry0, *entry1; + struct compat_ipt_entry *iter0; + struct ipt_entry *iter1; + unsigned int size; + int ret; + + info = *pinfo; + entry0 = *pentry0; + size = total_size; + info->number = number; + + /* Init all hooks to impossible value. */ + for (i = 0; i < NF_INET_NUMHOOKS; i++) { + info->hook_entry[i] = 0xFFFFFFFF; + info->underflow[i] = 0xFFFFFFFF; + } + + duprintf("translate_compat_table: size %u\n", info->size); + j = 0; + xt_compat_lock(AF_INET); + xt_compat_init_offsets(AF_INET, number); + /* Walk through entries, checking offsets. */ + xt_entry_foreach(iter0, entry0, total_size) { + ret = check_compat_entry_size_and_hooks(iter0, info, &size, + entry0, + entry0 + total_size, + hook_entries, + underflows, + name); + if (ret != 0) + goto out_unlock; + ++j; + } + + ret = -EINVAL; + if (j != number) { + duprintf("translate_compat_table: %u not %u entries\n", + j, number); + goto out_unlock; + } + + /* Check hooks all assigned */ + for (i = 0; i < NF_INET_NUMHOOKS; i++) { + /* Only hooks which are valid */ + if (!(valid_hooks & (1 << i))) + continue; + if (info->hook_entry[i] == 0xFFFFFFFF) { + duprintf("Invalid hook entry %u %u\n", + i, hook_entries[i]); + goto out_unlock; + } + if (info->underflow[i] == 0xFFFFFFFF) { + duprintf("Invalid underflow %u %u\n", + i, underflows[i]); + goto out_unlock; + } + } + + ret = -ENOMEM; + newinfo = xt_alloc_table_info(size); + if (!newinfo) + goto out_unlock; + + newinfo->number = number; + for (i = 0; i < NF_INET_NUMHOOKS; i++) { + newinfo->hook_entry[i] = info->hook_entry[i]; + newinfo->underflow[i] = info->underflow[i]; + } + entry1 = newinfo->entries[raw_smp_processor_id()]; + pos = entry1; + size = total_size; + xt_entry_foreach(iter0, entry0, total_size) { + ret = compat_copy_entry_from_user(iter0, &pos, &size, + name, newinfo, entry1); + if (ret != 0) + break; + } + xt_compat_flush_offsets(AF_INET); + xt_compat_unlock(AF_INET); + if (ret) + goto free_newinfo; + + ret = -ELOOP; + if (!mark_source_chains(newinfo, valid_hooks, entry1)) + goto free_newinfo; + + i = 0; + xt_entry_foreach(iter1, entry1, newinfo->size) { + ret = compat_check_entry(iter1, net, name); + if (ret != 0) + break; + ++i; + if (strcmp(ipt_get_target(iter1)->u.user.name, + XT_ERROR_TARGET) == 0) + ++newinfo->stacksize; + } + if (ret) { + /* + * The first i matches need cleanup_entry (calls ->destroy) + * because they had called ->check already. The other j-i + * entries need only release. + */ + int skip = i; + j -= i; + xt_entry_foreach(iter0, entry0, newinfo->size) { + if (skip-- > 0) + continue; + if (j-- == 0) + break; + compat_release_entry(iter0); + } + xt_entry_foreach(iter1, entry1, newinfo->size) { + if (i-- == 0) + break; + cleanup_entry(iter1, net); + } + xt_free_table_info(newinfo); + return ret; + } + + /* And one copy for every other CPU */ + for_each_possible_cpu(i) + if (newinfo->entries[i] && newinfo->entries[i] != entry1) + memcpy(newinfo->entries[i], entry1, newinfo->size); + + *pinfo = newinfo; + *pentry0 = entry1; + xt_free_table_info(info); + return 0; + +free_newinfo: + xt_free_table_info(newinfo); +out: + xt_entry_foreach(iter0, entry0, total_size) { + if (j-- == 0) + break; + compat_release_entry(iter0); + } + return ret; +out_unlock: + xt_compat_flush_offsets(AF_INET); + xt_compat_unlock(AF_INET); + goto out; +} + +static int +compat_do_replace(struct net *net, void __user *user, unsigned int len) +{ + int ret; + struct compat_ipt_replace tmp; + struct xt_table_info *newinfo; + void *loc_cpu_entry; + struct ipt_entry *iter; + + if (copy_from_user(&tmp, user, sizeof(tmp)) != 0) + return -EFAULT; + + /* overflow check */ + if (tmp.size >= INT_MAX / num_possible_cpus()) + return -ENOMEM; + if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) + return -ENOMEM; + tmp.name[sizeof(tmp.name)-1] = 0; + + newinfo = xt_alloc_table_info(tmp.size); + if (!newinfo) + return -ENOMEM; + + /* choose the copy that is on our node/cpu */ + loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; + if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), + tmp.size) != 0) { + ret = -EFAULT; + goto free_newinfo; + } + + ret = translate_compat_table(net, tmp.name, tmp.valid_hooks, + &newinfo, &loc_cpu_entry, tmp.size, + tmp.num_entries, tmp.hook_entry, + tmp.underflow); + if (ret != 0) + goto free_newinfo; + + duprintf("compat_do_replace: Translated table\n"); + + ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo, + tmp.num_counters, compat_ptr(tmp.counters)); + if (ret) + goto free_newinfo_untrans; + return 0; + + free_newinfo_untrans: + xt_entry_foreach(iter, loc_cpu_entry, newinfo->size) + cleanup_entry(iter, net); + free_newinfo: + xt_free_table_info(newinfo); + return ret; +} + static int -do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) +compat_do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, + unsigned int len) { int ret; - if (!capable(CAP_NET_ADMIN)) + if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) return -EPERM; switch (cmd) { case IPT_SO_SET_REPLACE: - ret = do_replace(user, len); + ret = compat_do_replace(sock_net(sk), user, len); break; case IPT_SO_SET_ADD_COUNTERS: - ret = do_add_counters(user, len); + ret = do_add_counters(sock_net(sk), user, len, 1); break; default: @@ -1104,78 +1873,163 @@ do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) return ret; } +struct compat_ipt_get_entries { + char name[XT_TABLE_MAXNAMELEN]; + compat_uint_t size; + struct compat_ipt_entry entrytable[0]; +}; + static int -do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) +compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table, + void __user *userptr) +{ + struct xt_counters *counters; + const struct xt_table_info *private = table->private; + void __user *pos; + unsigned int size; + int ret = 0; + const void *loc_cpu_entry; + unsigned int i = 0; + struct ipt_entry *iter; + + counters = alloc_counters(table); + if (IS_ERR(counters)) + return PTR_ERR(counters); + + /* choose the copy that is on our node/cpu, ... + * This choice is lazy (because current thread is + * allowed to migrate to another cpu) + */ + loc_cpu_entry = private->entries[raw_smp_processor_id()]; + pos = userptr; + size = total_size; + xt_entry_foreach(iter, loc_cpu_entry, total_size) { + ret = compat_copy_entry_to_user(iter, &pos, + &size, counters, i++); + if (ret != 0) + break; + } + + vfree(counters); + return ret; +} + +static int +compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr, + int *len) { int ret; + struct compat_ipt_get_entries get; + struct xt_table *t; - if (!capable(CAP_NET_ADMIN)) - return -EPERM; + if (*len < sizeof(get)) { + duprintf("compat_get_entries: %u < %zu\n", *len, sizeof(get)); + return -EINVAL; + } - switch (cmd) { - case IPT_SO_GET_INFO: { - char name[IPT_TABLE_MAXNAMELEN]; - struct ipt_table *t; + if (copy_from_user(&get, uptr, sizeof(get)) != 0) + return -EFAULT; - if (*len != sizeof(struct ipt_getinfo)) { - duprintf("length %u != %u\n", *len, - sizeof(struct ipt_getinfo)); - ret = -EINVAL; - break; - } + if (*len != sizeof(struct compat_ipt_get_entries) + get.size) { + duprintf("compat_get_entries: %u != %zu\n", + *len, sizeof(get) + get.size); + return -EINVAL; + } - if (copy_from_user(name, user, sizeof(name)) != 0) { - ret = -EFAULT; - break; + xt_compat_lock(AF_INET); + t = xt_find_table_lock(net, AF_INET, get.name); + if (!IS_ERR_OR_NULL(t)) { + const struct xt_table_info *private = t->private; + struct xt_table_info info; + duprintf("t->private->number = %u\n", private->number); + ret = compat_table_info(private, &info); + if (!ret && get.size == info.size) { + ret = compat_copy_entries_to_user(private->size, + t, uptr->entrytable); + } else if (!ret) { + duprintf("compat_get_entries: I've got %u not %u!\n", + private->size, get.size); + ret = -EAGAIN; } - name[IPT_TABLE_MAXNAMELEN-1] = '\0'; - - t = try_then_request_module(xt_find_table_lock(AF_INET, name), - "iptable_%s", name); - if (t && !IS_ERR(t)) { - struct ipt_getinfo info; - struct xt_table_info *private = t->private; - - info.valid_hooks = t->valid_hooks; - memcpy(info.hook_entry, private->hook_entry, - sizeof(info.hook_entry)); - memcpy(info.underflow, private->underflow, - sizeof(info.underflow)); - info.num_entries = private->number; - info.size = private->size; - memcpy(info.name, name, sizeof(info.name)); - - if (copy_to_user(user, &info, *len) != 0) - ret = -EFAULT; - else - ret = 0; - xt_table_unlock(t); - module_put(t->me); - } else - ret = t ? PTR_ERR(t) : -ENOENT; + xt_compat_flush_offsets(AF_INET); + module_put(t->me); + xt_table_unlock(t); + } else + ret = t ? PTR_ERR(t) : -ENOENT; + + xt_compat_unlock(AF_INET); + return ret; +} + +static int do_ipt_get_ctl(struct sock *, int, void __user *, int *); + +static int +compat_do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) +{ + int ret; + + if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + switch (cmd) { + case IPT_SO_GET_INFO: + ret = get_info(sock_net(sk), user, len, 1); + break; + case IPT_SO_GET_ENTRIES: + ret = compat_get_entries(sock_net(sk), user, len); + break; + default: + ret = do_ipt_get_ctl(sk, cmd, user, len); } - break; + return ret; +} +#endif - case IPT_SO_GET_ENTRIES: { - struct ipt_get_entries get; +static int +do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len) +{ + int ret; - if (*len < sizeof(get)) { - duprintf("get_entries: %u < %u\n", *len, sizeof(get)); - ret = -EINVAL; - } else if (copy_from_user(&get, user, sizeof(get)) != 0) { - ret = -EFAULT; - } else if (*len != sizeof(struct ipt_get_entries) + get.size) { - duprintf("get_entries: %u != %u\n", *len, - sizeof(struct ipt_get_entries) + get.size); - ret = -EINVAL; - } else - ret = get_entries(&get, user); + if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + switch (cmd) { + case IPT_SO_SET_REPLACE: + ret = do_replace(sock_net(sk), user, len); break; + + case IPT_SO_SET_ADD_COUNTERS: + ret = do_add_counters(sock_net(sk), user, len, 0); + break; + + default: + duprintf("do_ipt_set_ctl: unknown request %i\n", cmd); + ret = -EINVAL; } + return ret; +} + +static int +do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) +{ + int ret; + + if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) + return -EPERM; + + switch (cmd) { + case IPT_SO_GET_INFO: + ret = get_info(sock_net(sk), user, len, 0); + break; + + case IPT_SO_GET_ENTRIES: + ret = get_entries(sock_net(sk), user, len); + break; + case IPT_SO_GET_REVISION_MATCH: case IPT_SO_GET_REVISION_TARGET: { - struct ipt_get_revision rev; + struct xt_get_revision rev; int target; if (*len != sizeof(rev)) { @@ -1186,6 +2040,7 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) ret = -EFAULT; break; } + rev.name[sizeof(rev.name)-1] = 0; if (cmd == IPT_SO_GET_REVISION_TARGET) target = 1; @@ -1207,89 +2062,92 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) return ret; } -int ipt_register_table(struct xt_table *table, const struct ipt_replace *repl) +struct xt_table *ipt_register_table(struct net *net, + const struct xt_table *table, + const struct ipt_replace *repl) { int ret; struct xt_table_info *newinfo; - static struct xt_table_info bootstrap - = { 0, 0, 0, { 0 }, { 0 }, { } }; + struct xt_table_info bootstrap = {0}; void *loc_cpu_entry; + struct xt_table *new_table; newinfo = xt_alloc_table_info(repl->size); - if (!newinfo) - return -ENOMEM; + if (!newinfo) { + ret = -ENOMEM; + goto out; + } - /* choose the copy on our node/cpu - * but dont care of preemption - */ + /* choose the copy on our node/cpu, but dont care about preemption */ loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; memcpy(loc_cpu_entry, repl->entries, repl->size); - ret = translate_table(table->name, table->valid_hooks, - newinfo, loc_cpu_entry, repl->size, - repl->num_entries, - repl->hook_entry, - repl->underflow); - if (ret != 0) { - xt_free_table_info(newinfo); - return ret; - } + ret = translate_table(net, newinfo, loc_cpu_entry, repl); + if (ret != 0) + goto out_free; - if (xt_register_table(table, &bootstrap, newinfo) != 0) { - xt_free_table_info(newinfo); - return ret; + new_table = xt_register_table(net, table, &bootstrap, newinfo); + if (IS_ERR(new_table)) { + ret = PTR_ERR(new_table); + goto out_free; } - return 0; + return new_table; + +out_free: + xt_free_table_info(newinfo); +out: + return ERR_PTR(ret); } -void ipt_unregister_table(struct ipt_table *table) +void ipt_unregister_table(struct net *net, struct xt_table *table) { struct xt_table_info *private; void *loc_cpu_entry; + struct module *table_owner = table->me; + struct ipt_entry *iter; - private = xt_unregister_table(table); + private = xt_unregister_table(table); /* Decrease module usage counts and free resources */ loc_cpu_entry = private->entries[raw_smp_processor_id()]; - IPT_ENTRY_ITERATE(loc_cpu_entry, private->size, cleanup_entry, NULL); + xt_entry_foreach(iter, loc_cpu_entry, private->size) + cleanup_entry(iter, net); + if (private->number > private->initial_entries) + module_put(table_owner); xt_free_table_info(private); } /* Returns 1 if the type and code is matched by the range, 0 otherwise */ -static inline int +static inline bool icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code, u_int8_t type, u_int8_t code, - int invert) + bool invert) { - return ((test_type == 0xFF) || (type == test_type && code >= min_code && code <= max_code)) + return ((test_type == 0xFF) || + (type == test_type && code >= min_code && code <= max_code)) ^ invert; } -static int -icmp_match(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const void *matchinfo, - int offset, - unsigned int protoff, - int *hotdrop) +static bool +icmp_match(const struct sk_buff *skb, struct xt_action_param *par) { - struct icmphdr _icmph, *ic; - const struct ipt_icmp *icmpinfo = matchinfo; + const struct icmphdr *ic; + struct icmphdr _icmph; + const struct ipt_icmp *icmpinfo = par->matchinfo; /* Must not be a fragment. */ - if (offset) - return 0; + if (par->fragoff != 0) + return false; - ic = skb_header_pointer(skb, protoff, sizeof(_icmph), &_icmph); + ic = skb_header_pointer(skb, par->thoff, sizeof(_icmph), &_icmph); if (ic == NULL) { /* We've been asked to examine this packet, and we * can't. Hence, no choice but to drop. */ duprintf("Dropping evil ICMP tinygram.\n"); - *hotdrop = 1; - return 0; + par->hotdrop = true; + return false; } return icmp_type_code_match(icmpinfo->type, @@ -1299,32 +2157,31 @@ icmp_match(const struct sk_buff *skb, !!(icmpinfo->invflags&IPT_ICMP_INV)); } -/* Called when user tries to insert an entry of this type. */ -static int -icmp_checkentry(const char *tablename, - const void *info, - void *matchinfo, - unsigned int matchsize, - unsigned int hook_mask) +static int icmp_checkentry(const struct xt_mtchk_param *par) { - const struct ipt_ip *ip = info; - const struct ipt_icmp *icmpinfo = matchinfo; - - /* Must specify proto == ICMP, and no unknown invflags */ - return ip->proto == IPPROTO_ICMP - && !(ip->invflags & IPT_INV_PROTO) - && matchsize == IPT_ALIGN(sizeof(struct ipt_icmp)) - && !(icmpinfo->invflags & ~IPT_ICMP_INV); -} + const struct ipt_icmp *icmpinfo = par->matchinfo; -/* The built-in targets: standard (NULL) and error. */ -static struct ipt_target ipt_standard_target = { - .name = IPT_STANDARD_TARGET, -}; + /* Must specify no unknown invflags */ + return (icmpinfo->invflags & ~IPT_ICMP_INV) ? -EINVAL : 0; +} -static struct ipt_target ipt_error_target = { - .name = IPT_ERROR_TARGET, - .target = ipt_error, +static struct xt_target ipt_builtin_tg[] __read_mostly = { + { + .name = XT_STANDARD_TARGET, + .targetsize = sizeof(int), + .family = NFPROTO_IPV4, +#ifdef CONFIG_COMPAT + .compatsize = sizeof(compat_int_t), + .compat_from_user = compat_standard_from_user, + .compat_to_user = compat_standard_to_user, +#endif + }, + { + .name = XT_ERROR_TARGET, + .target = ipt_error, + .targetsize = XT_FUNCTION_MAXNAMELEN, + .family = NFPROTO_IPV4, + }, }; static struct nf_sockopt_ops ipt_sockopts = { @@ -1332,52 +2189,89 @@ static struct nf_sockopt_ops ipt_sockopts = { .set_optmin = IPT_BASE_CTL, .set_optmax = IPT_SO_SET_MAX+1, .set = do_ipt_set_ctl, +#ifdef CONFIG_COMPAT + .compat_set = compat_do_ipt_set_ctl, +#endif .get_optmin = IPT_BASE_CTL, .get_optmax = IPT_SO_GET_MAX+1, .get = do_ipt_get_ctl, +#ifdef CONFIG_COMPAT + .compat_get = compat_do_ipt_get_ctl, +#endif + .owner = THIS_MODULE, +}; + +static struct xt_match ipt_builtin_mt[] __read_mostly = { + { + .name = "icmp", + .match = icmp_match, + .matchsize = sizeof(struct ipt_icmp), + .checkentry = icmp_checkentry, + .proto = IPPROTO_ICMP, + .family = NFPROTO_IPV4, + }, }; -static struct ipt_match icmp_matchstruct = { - .name = "icmp", - .match = &icmp_match, - .checkentry = &icmp_checkentry, +static int __net_init ip_tables_net_init(struct net *net) +{ + return xt_proto_init(net, NFPROTO_IPV4); +} + +static void __net_exit ip_tables_net_exit(struct net *net) +{ + xt_proto_fini(net, NFPROTO_IPV4); +} + +static struct pernet_operations ip_tables_net_ops = { + .init = ip_tables_net_init, + .exit = ip_tables_net_exit, }; -static int __init init(void) +static int __init ip_tables_init(void) { int ret; - xt_proto_init(AF_INET); + ret = register_pernet_subsys(&ip_tables_net_ops); + if (ret < 0) + goto err1; - /* Noone else will be downing sem now, so we won't sleep */ - xt_register_target(AF_INET, &ipt_standard_target); - xt_register_target(AF_INET, &ipt_error_target); - xt_register_match(AF_INET, &icmp_matchstruct); + /* No one else will be downing sem now, so we won't sleep */ + ret = xt_register_targets(ipt_builtin_tg, ARRAY_SIZE(ipt_builtin_tg)); + if (ret < 0) + goto err2; + ret = xt_register_matches(ipt_builtin_mt, ARRAY_SIZE(ipt_builtin_mt)); + if (ret < 0) + goto err4; /* Register setsockopt */ ret = nf_register_sockopt(&ipt_sockopts); - if (ret < 0) { - duprintf("Unable to register sockopts.\n"); - return ret; - } + if (ret < 0) + goto err5; - printk("ip_tables: (C) 2000-2006 Netfilter Core Team\n"); + pr_info("(C) 2000-2006 Netfilter Core Team\n"); return 0; + +err5: + xt_unregister_matches(ipt_builtin_mt, ARRAY_SIZE(ipt_builtin_mt)); +err4: + xt_unregister_targets(ipt_builtin_tg, ARRAY_SIZE(ipt_builtin_tg)); +err2: + unregister_pernet_subsys(&ip_tables_net_ops); +err1: + return ret; } -static void __exit fini(void) +static void __exit ip_tables_fini(void) { nf_unregister_sockopt(&ipt_sockopts); - xt_unregister_match(AF_INET, &icmp_matchstruct); - xt_unregister_target(AF_INET, &ipt_error_target); - xt_unregister_target(AF_INET, &ipt_standard_target); - - xt_proto_fini(AF_INET); + xt_unregister_matches(ipt_builtin_mt, ARRAY_SIZE(ipt_builtin_mt)); + xt_unregister_targets(ipt_builtin_tg, ARRAY_SIZE(ipt_builtin_tg)); + unregister_pernet_subsys(&ip_tables_net_ops); } EXPORT_SYMBOL(ipt_register_table); EXPORT_SYMBOL(ipt_unregister_table); EXPORT_SYMBOL(ipt_do_table); -module_init(init); -module_exit(fini); +module_init(ip_tables_init); +module_exit(ip_tables_fini); diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index d9bc971f03a..2510c02c2d2 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -1,4 +1,4 @@ -/* Cluster IP hashmark target +/* Cluster IP hashmark target * (C) 2003-2004 by Harald Welte <laforge@netfilter.org> * based on ideas of Fabio Olive Leite <olive@unixforge.org> * @@ -9,43 +9,34 @@ * published by the Free Software Foundation. * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> -#include <linux/config.h> #include <linux/proc_fs.h> #include <linux/jhash.h> #include <linux/bitops.h> #include <linux/skbuff.h> +#include <linux/slab.h> #include <linux/ip.h> #include <linux/tcp.h> #include <linux/udp.h> #include <linux/icmp.h> #include <linux/if_arp.h> -#include <linux/proc_fs.h> #include <linux/seq_file.h> - -#include <net/checksum.h> - #include <linux/netfilter_arp.h> - +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv4/ip_tables.h> #include <linux/netfilter_ipv4/ipt_CLUSTERIP.h> -#include <net/netfilter/nf_conntrack_compat.h> +#include <net/netfilter/nf_conntrack.h> +#include <net/net_namespace.h> +#include <net/netns/generic.h> +#include <net/checksum.h> +#include <net/ip.h> #define CLUSTERIP_VERSION "0.8" -#define DEBUG_CLUSTERIP - -#ifdef DEBUG_CLUSTERIP -#define DEBUGP printk -#else -#define DEBUGP -#endif - -#define ASSERT_READ_LOCK(x) - MODULE_LICENSE("GPL"); MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); -MODULE_DESCRIPTION("iptables target for CLUSTERIP"); +MODULE_DESCRIPTION("Xtables: CLUSTERIP target"); struct clusterip_config { struct list_head list; /* list of all configs */ @@ -53,7 +44,7 @@ struct clusterip_config { atomic_t entries; /* number of entries/rules * referencing us */ - u_int32_t clusterip; /* the IP address */ + __be32 clusterip; /* the IP address */ u_int8_t clustermac[ETH_ALEN]; /* the MAC address */ struct net_device *dev; /* device */ u_int16_t num_total_nodes; /* total number of nodes */ @@ -64,17 +55,24 @@ struct clusterip_config { #endif enum clusterip_hashmode hash_mode; /* which hashing mode */ u_int32_t hash_initval; /* hash initialization */ + struct rcu_head rcu; }; -static LIST_HEAD(clusterip_configs); +#ifdef CONFIG_PROC_FS +static const struct file_operations clusterip_proc_fops; +#endif + +static int clusterip_net_id __read_mostly; -/* clusterip_lock protects the clusterip_configs list */ -static DEFINE_RWLOCK(clusterip_lock); +struct clusterip_net { + struct list_head configs; + /* lock protects the configs list */ + spinlock_t lock; #ifdef CONFIG_PROC_FS -static struct file_operations clusterip_proc_fops; -static struct proc_dir_entry *clusterip_procdir; + struct proc_dir_entry *procdir; #endif +}; static inline void clusterip_config_get(struct clusterip_config *c) @@ -82,18 +80,17 @@ clusterip_config_get(struct clusterip_config *c) atomic_inc(&c->refcount); } -static inline void -clusterip_config_put(struct clusterip_config *c) + +static void clusterip_config_rcu_free(struct rcu_head *head) { - if (atomic_dec_and_test(&c->refcount)) - kfree(c); + kfree(container_of(head, struct clusterip_config, rcu)); } -/* increase the count of entries(rules) using/referencing this config */ static inline void -clusterip_config_entry_get(struct clusterip_config *c) +clusterip_config_put(struct clusterip_config *c) { - atomic_inc(&c->entries); + if (atomic_dec_and_test(&c->refcount)) + call_rcu_bh(&c->rcu, clusterip_config_rcu_free); } /* decrease the count of entries using/referencing this config. If last @@ -102,55 +99,57 @@ clusterip_config_entry_get(struct clusterip_config *c) static inline void clusterip_config_entry_put(struct clusterip_config *c) { - if (atomic_dec_and_test(&c->entries)) { - write_lock_bh(&clusterip_lock); - list_del(&c->list); - write_unlock_bh(&clusterip_lock); + struct net *net = dev_net(c->dev); + struct clusterip_net *cn = net_generic(net, clusterip_net_id); - dev_mc_delete(c->dev, c->clustermac, ETH_ALEN, 0); + local_bh_disable(); + if (atomic_dec_and_lock(&c->entries, &cn->lock)) { + list_del_rcu(&c->list); + spin_unlock(&cn->lock); + local_bh_enable(); + + dev_mc_del(c->dev, c->clustermac); dev_put(c->dev); /* In case anyone still accesses the file, the open/close * functions are also incrementing the refcount on their own, * so it's safe to remove the entry even if it's in use. */ #ifdef CONFIG_PROC_FS - remove_proc_entry(c->pde->name, c->pde->parent); + proc_remove(c->pde); #endif + return; } + local_bh_enable(); } static struct clusterip_config * -__clusterip_config_find(u_int32_t clusterip) +__clusterip_config_find(struct net *net, __be32 clusterip) { - struct list_head *pos; + struct clusterip_config *c; + struct clusterip_net *cn = net_generic(net, clusterip_net_id); - ASSERT_READ_LOCK(&clusterip_lock); - list_for_each(pos, &clusterip_configs) { - struct clusterip_config *c = list_entry(pos, - struct clusterip_config, list); - if (c->clusterip == clusterip) { + list_for_each_entry_rcu(c, &cn->configs, list) { + if (c->clusterip == clusterip) return c; - } } return NULL; } static inline struct clusterip_config * -clusterip_config_find_get(u_int32_t clusterip, int entry) +clusterip_config_find_get(struct net *net, __be32 clusterip, int entry) { struct clusterip_config *c; - read_lock_bh(&clusterip_lock); - c = __clusterip_config_find(clusterip); - if (!c) { - read_unlock_bh(&clusterip_lock); - return NULL; + rcu_read_lock_bh(); + c = __clusterip_config_find(net, clusterip); + if (c) { + if (unlikely(!atomic_inc_not_zero(&c->refcount))) + c = NULL; + else if (entry) + atomic_inc(&c->entries); } - atomic_inc(&c->refcount); - if (entry) - atomic_inc(&c->entries); - read_unlock_bh(&clusterip_lock); + rcu_read_unlock_bh(); return c; } @@ -161,23 +160,21 @@ clusterip_config_init_nodelist(struct clusterip_config *c, { int n; - for (n = 0; n < i->num_local_nodes; n++) { + for (n = 0; n < i->num_local_nodes; n++) set_bit(i->local_nodes[n] - 1, &c->local_nodes); - } } static struct clusterip_config * -clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip, +clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip, struct net_device *dev) { struct clusterip_config *c; - char buffer[16]; + struct clusterip_net *cn = net_generic(dev_net(dev), clusterip_net_id); - c = kmalloc(sizeof(*c), GFP_ATOMIC); + c = kzalloc(sizeof(*c), GFP_ATOMIC); if (!c) return NULL; - memset(c, 0, sizeof(*c)); c->dev = dev; c->clusterip = ip; memcpy(&c->clustermac, &i->clustermac, ETH_ALEN); @@ -189,24 +186,29 @@ clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip, atomic_set(&c->entries, 1); #ifdef CONFIG_PROC_FS - /* create proc dir entry */ - sprintf(buffer, "%u.%u.%u.%u", NIPQUAD(ip)); - c->pde = create_proc_entry(buffer, S_IWUSR|S_IRUSR, clusterip_procdir); - if (!c->pde) { - kfree(c); - return NULL; + { + char buffer[16]; + + /* create proc dir entry */ + sprintf(buffer, "%pI4", &ip); + c->pde = proc_create_data(buffer, S_IWUSR|S_IRUSR, + cn->procdir, + &clusterip_proc_fops, c); + if (!c->pde) { + kfree(c); + return NULL; + } } - c->pde->proc_fops = &clusterip_proc_fops; - c->pde->data = c; #endif - write_lock_bh(&clusterip_lock); - list_add(&c->list, &clusterip_configs); - write_unlock_bh(&clusterip_lock); + spin_lock_bh(&cn->lock); + list_add_rcu(&c->list, &cn->configs); + spin_unlock_bh(&cn->lock); return c; } +#ifdef CONFIG_PROC_FS static int clusterip_add_node(struct clusterip_config *c, u_int16_t nodenum) { @@ -222,51 +224,41 @@ clusterip_add_node(struct clusterip_config *c, u_int16_t nodenum) return 0; } -static int +static bool clusterip_del_node(struct clusterip_config *c, u_int16_t nodenum) { if (nodenum == 0 || nodenum > c->num_total_nodes) - return 1; - + return true; + if (test_and_clear_bit(nodenum - 1, &c->local_nodes)) - return 0; + return false; - return 1; + return true; } +#endif static inline u_int32_t -clusterip_hashfn(struct sk_buff *skb, struct clusterip_config *config) +clusterip_hashfn(const struct sk_buff *skb, + const struct clusterip_config *config) { - struct iphdr *iph = skb->nh.iph; + const struct iphdr *iph = ip_hdr(skb); unsigned long hashval; - u_int16_t sport, dport; - struct tcphdr *th; - struct udphdr *uh; - struct icmphdr *ih; - - switch (iph->protocol) { - case IPPROTO_TCP: - th = (void *)iph+iph->ihl*4; - sport = ntohs(th->source); - dport = ntohs(th->dest); - break; - case IPPROTO_UDP: - uh = (void *)iph+iph->ihl*4; - sport = ntohs(uh->source); - dport = ntohs(uh->dest); - break; - case IPPROTO_ICMP: - ih = (void *)iph+iph->ihl*4; - sport = ntohs(ih->un.echo.id); - dport = (ih->type<<8)|ih->code; - break; - default: - if (net_ratelimit()) { - printk(KERN_NOTICE "CLUSTERIP: unknown protocol `%u'\n", - iph->protocol); + u_int16_t sport = 0, dport = 0; + int poff; + + poff = proto_ports_offset(iph->protocol); + if (poff >= 0) { + const u_int16_t *ports; + u16 _ports[2]; + + ports = skb_header_pointer(skb, iph->ihl * 4 + poff, 4, _ports); + if (ports) { + sport = ports[0]; + dport = ports[1]; } - sport = dport = 0; + } else { + net_info_ratelimited("unknown protocol %u\n", iph->protocol); } switch (config->hash_mode) { @@ -275,7 +267,7 @@ clusterip_hashfn(struct sk_buff *skb, struct clusterip_config *config) config->hash_initval); break; case CLUSTERIP_HASHMODE_SIP_SPT: - hashval = jhash_2words(ntohl(iph->saddr), sport, + hashval = jhash_2words(ntohl(iph->saddr), sport, config->hash_initval); break; case CLUSTERIP_HASHMODE_SIP_SPT_DPT: @@ -287,274 +279,260 @@ clusterip_hashfn(struct sk_buff *skb, struct clusterip_config *config) hashval = 0; /* This cannot happen, unless the check function wasn't called * at rule load time */ - printk("CLUSTERIP: unknown mode `%u'\n", config->hash_mode); + pr_info("unknown mode %u\n", config->hash_mode); BUG(); break; } /* node numbers are 1..n, not 0..n */ - return ((hashval % config->num_total_nodes)+1); + return (((u64)hashval * config->num_total_nodes) >> 32) + 1; } static inline int -clusterip_responsible(struct clusterip_config *config, u_int32_t hash) +clusterip_responsible(const struct clusterip_config *config, u_int32_t hash) { return test_bit(hash - 1, &config->local_nodes); } -/*********************************************************************** - * IPTABLES TARGET +/*********************************************************************** + * IPTABLES TARGET ***********************************************************************/ static unsigned int -target(struct sk_buff **pskb, - const struct net_device *in, - const struct net_device *out, - unsigned int hooknum, - const void *targinfo, - void *userinfo) +clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par) { - const struct ipt_clusterip_tgt_info *cipinfo = targinfo; + const struct ipt_clusterip_tgt_info *cipinfo = par->targinfo; + struct nf_conn *ct; enum ip_conntrack_info ctinfo; - u_int32_t *mark, hash; + u_int32_t hash; /* don't need to clusterip_config_get() here, since refcount * is only decremented by destroy() - and ip_tables guarantees * that the ->target() function isn't called after ->destroy() */ - mark = nf_ct_get_mark((*pskb), &ctinfo); - if (mark == NULL) { - printk(KERN_ERR "CLUSTERIP: no conntrack!\n"); - /* FIXME: need to drop invalid ones, since replies - * to outgoing connections of other nodes will be - * marked as INVALID */ + ct = nf_ct_get(skb, &ctinfo); + if (ct == NULL) return NF_DROP; - } /* special case: ICMP error handling. conntrack distinguishes between * error messages (RELATED) and information requests (see below) */ - if ((*pskb)->nh.iph->protocol == IPPROTO_ICMP - && (ctinfo == IP_CT_RELATED - || ctinfo == IP_CT_RELATED+IP_CT_IS_REPLY)) - return IPT_CONTINUE; + if (ip_hdr(skb)->protocol == IPPROTO_ICMP && + (ctinfo == IP_CT_RELATED || + ctinfo == IP_CT_RELATED_REPLY)) + return XT_CONTINUE; - /* ip_conntrack_icmp guarantees us that we only have ICMP_ECHO, + /* ip_conntrack_icmp guarantees us that we only have ICMP_ECHO, * TIMESTAMP, INFO_REQUEST or ADDRESS type icmp packets from here * on, which all have an ID field [relevant for hashing]. */ - hash = clusterip_hashfn(*pskb, cipinfo->config); + hash = clusterip_hashfn(skb, cipinfo->config); switch (ctinfo) { - case IP_CT_NEW: - *mark = hash; - break; - case IP_CT_RELATED: - case IP_CT_RELATED+IP_CT_IS_REPLY: - /* FIXME: we don't handle expectations at the - * moment. they can arrive on a different node than - * the master connection (e.g. FTP passive mode) */ - case IP_CT_ESTABLISHED: - case IP_CT_ESTABLISHED+IP_CT_IS_REPLY: - break; - default: - break; + case IP_CT_NEW: + ct->mark = hash; + break; + case IP_CT_RELATED: + case IP_CT_RELATED_REPLY: + /* FIXME: we don't handle expectations at the moment. + * They can arrive on a different node than + * the master connection (e.g. FTP passive mode) */ + case IP_CT_ESTABLISHED: + case IP_CT_ESTABLISHED_REPLY: + break; + default: /* Prevent gcc warnings */ + break; } -#ifdef DEBUG_CLUSTERP - DUMP_TUPLE(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); +#ifdef DEBUG + nf_ct_dump_tuple_ip(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); #endif - DEBUGP("hash=%u ct_hash=%u ", hash, *mark); + pr_debug("hash=%u ct_hash=%u ", hash, ct->mark); if (!clusterip_responsible(cipinfo->config, hash)) { - DEBUGP("not responsible\n"); + pr_debug("not responsible\n"); return NF_DROP; } - DEBUGP("responsible\n"); + pr_debug("responsible\n"); /* despite being received via linklayer multicast, this is * actually a unicast IP packet. TCP doesn't like PACKET_MULTICAST */ - (*pskb)->pkt_type = PACKET_HOST; + skb->pkt_type = PACKET_HOST; - return IPT_CONTINUE; + return XT_CONTINUE; } -static int -checkentry(const char *tablename, - const void *e_void, - void *targinfo, - unsigned int targinfosize, - unsigned int hook_mask) +static int clusterip_tg_check(const struct xt_tgchk_param *par) { - struct ipt_clusterip_tgt_info *cipinfo = targinfo; - const struct ipt_entry *e = e_void; - + struct ipt_clusterip_tgt_info *cipinfo = par->targinfo; + const struct ipt_entry *e = par->entryinfo; struct clusterip_config *config; - - if (targinfosize != IPT_ALIGN(sizeof(struct ipt_clusterip_tgt_info))) { - printk(KERN_WARNING "CLUSTERIP: targinfosize %u != %Zu\n", - targinfosize, - IPT_ALIGN(sizeof(struct ipt_clusterip_tgt_info))); - return 0; - } + int ret; if (cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP && cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT && cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT_DPT) { - printk(KERN_WARNING "CLUSTERIP: unknown mode `%u'\n", - cipinfo->hash_mode); - return 0; + pr_info("unknown mode %u\n", cipinfo->hash_mode); + return -EINVAL; } - if (e->ip.dmsk.s_addr != 0xffffffff - || e->ip.dst.s_addr == 0) { - printk(KERN_ERR "CLUSTERIP: Please specify destination IP\n"); - return 0; + if (e->ip.dmsk.s_addr != htonl(0xffffffff) || + e->ip.dst.s_addr == 0) { + pr_info("Please specify destination IP\n"); + return -EINVAL; } /* FIXME: further sanity checks */ - config = clusterip_config_find_get(e->ip.dst.s_addr, 1); - if (config) { - if (cipinfo->config != NULL) { - /* Case A: This is an entry that gets reloaded, since - * it still has a cipinfo->config pointer. Simply - * increase the entry refcount and return */ - if (cipinfo->config != config) { - printk(KERN_ERR "CLUSTERIP: Reloaded entry " - "has invalid config pointer!\n"); - return 0; - } - clusterip_config_entry_get(cipinfo->config); - } else { - /* Case B: This is a new rule referring to an existing - * clusterip config. */ - cipinfo->config = config; - clusterip_config_entry_get(cipinfo->config); - } - } else { - /* Case C: This is a completely new clusterip config */ + config = clusterip_config_find_get(par->net, e->ip.dst.s_addr, 1); + if (!config) { if (!(cipinfo->flags & CLUSTERIP_FLAG_NEW)) { - printk(KERN_WARNING "CLUSTERIP: no config found for %u.%u.%u.%u, need 'new'\n", NIPQUAD(e->ip.dst.s_addr)); - return 0; + pr_info("no config found for %pI4, need 'new'\n", + &e->ip.dst.s_addr); + return -EINVAL; } else { struct net_device *dev; if (e->ip.iniface[0] == '\0') { - printk(KERN_WARNING "CLUSTERIP: Please specify an interface name\n"); - return 0; + pr_info("Please specify an interface name\n"); + return -EINVAL; } - dev = dev_get_by_name(e->ip.iniface); + dev = dev_get_by_name(par->net, e->ip.iniface); if (!dev) { - printk(KERN_WARNING "CLUSTERIP: no such interface %s\n", e->ip.iniface); - return 0; + pr_info("no such interface %s\n", + e->ip.iniface); + return -ENOENT; } - config = clusterip_config_init(cipinfo, + config = clusterip_config_init(cipinfo, e->ip.dst.s_addr, dev); if (!config) { - printk(KERN_WARNING "CLUSTERIP: cannot allocate config\n"); dev_put(dev); - return 0; + return -ENOMEM; } - dev_mc_add(config->dev,config->clustermac, ETH_ALEN, 0); + dev_mc_add(config->dev, config->clustermac); } - cipinfo->config = config; } + cipinfo->config = config; - return 1; + ret = nf_ct_l3proto_try_module_get(par->family); + if (ret < 0) + pr_info("cannot load conntrack support for proto=%u\n", + par->family); + return ret; } /* drop reference count of cluster config when rule is deleted */ -static void destroy(void *matchinfo, unsigned int matchinfosize) +static void clusterip_tg_destroy(const struct xt_tgdtor_param *par) { - struct ipt_clusterip_tgt_info *cipinfo = matchinfo; + const struct ipt_clusterip_tgt_info *cipinfo = par->targinfo; /* if no more entries are referencing the config, remove it * from the list and destroy the proc entry */ clusterip_config_entry_put(cipinfo->config); clusterip_config_put(cipinfo->config); + + nf_ct_l3proto_module_put(par->family); } -static struct ipt_target clusterip_tgt = { - .name = "CLUSTERIP", - .target = &target, - .checkentry = &checkentry, - .destroy = &destroy, - .me = THIS_MODULE +#ifdef CONFIG_COMPAT +struct compat_ipt_clusterip_tgt_info +{ + u_int32_t flags; + u_int8_t clustermac[6]; + u_int16_t num_total_nodes; + u_int16_t num_local_nodes; + u_int16_t local_nodes[CLUSTERIP_MAX_NODES]; + u_int32_t hash_mode; + u_int32_t hash_initval; + compat_uptr_t config; +}; +#endif /* CONFIG_COMPAT */ + +static struct xt_target clusterip_tg_reg __read_mostly = { + .name = "CLUSTERIP", + .family = NFPROTO_IPV4, + .target = clusterip_tg, + .checkentry = clusterip_tg_check, + .destroy = clusterip_tg_destroy, + .targetsize = sizeof(struct ipt_clusterip_tgt_info), +#ifdef CONFIG_COMPAT + .compatsize = sizeof(struct compat_ipt_clusterip_tgt_info), +#endif /* CONFIG_COMPAT */ + .me = THIS_MODULE }; -/*********************************************************************** - * ARP MANGLING CODE +/*********************************************************************** + * ARP MANGLING CODE ***********************************************************************/ /* hardcoded for 48bit ethernet and 32bit ipv4 addresses */ struct arp_payload { u_int8_t src_hw[ETH_ALEN]; - u_int32_t src_ip; + __be32 src_ip; u_int8_t dst_hw[ETH_ALEN]; - u_int32_t dst_ip; -} __attribute__ ((packed)); + __be32 dst_ip; +} __packed; -#ifdef CLUSTERIP_DEBUG -static void arp_print(struct arp_payload *payload) +#ifdef DEBUG +static void arp_print(struct arp_payload *payload) { #define HBUFFERLEN 30 char hbuffer[HBUFFERLEN]; int j,k; - const char hexbuf[]= "0123456789abcdef"; for (k=0, j=0; k < HBUFFERLEN-3 && j < ETH_ALEN; j++) { - hbuffer[k++]=hexbuf[(payload->src_hw[j]>>4)&15]; - hbuffer[k++]=hexbuf[payload->src_hw[j]&15]; + hbuffer[k++] = hex_asc_hi(payload->src_hw[j]); + hbuffer[k++] = hex_asc_lo(payload->src_hw[j]); hbuffer[k++]=':'; } hbuffer[--k]='\0'; - printk("src %u.%u.%u.%u@%s, dst %u.%u.%u.%u\n", - NIPQUAD(payload->src_ip), hbuffer, - NIPQUAD(payload->dst_ip)); + pr_debug("src %pI4@%s, dst %pI4\n", + &payload->src_ip, hbuffer, &payload->dst_ip); } #endif static unsigned int -arp_mangle(unsigned int hook, - struct sk_buff **pskb, +arp_mangle(const struct nf_hook_ops *ops, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - struct arphdr *arp = (*pskb)->nh.arph; + struct arphdr *arp = arp_hdr(skb); struct arp_payload *payload; struct clusterip_config *c; + struct net *net = dev_net(in ? in : out); /* we don't care about non-ethernet and non-ipv4 ARP */ - if (arp->ar_hrd != htons(ARPHRD_ETHER) - || arp->ar_pro != htons(ETH_P_IP) - || arp->ar_pln != 4 || arp->ar_hln != ETH_ALEN) + if (arp->ar_hrd != htons(ARPHRD_ETHER) || + arp->ar_pro != htons(ETH_P_IP) || + arp->ar_pln != 4 || arp->ar_hln != ETH_ALEN) return NF_ACCEPT; /* we only want to mangle arp requests and replies */ - if (arp->ar_op != htons(ARPOP_REPLY) - && arp->ar_op != htons(ARPOP_REQUEST)) + if (arp->ar_op != htons(ARPOP_REPLY) && + arp->ar_op != htons(ARPOP_REQUEST)) return NF_ACCEPT; payload = (void *)(arp+1); - /* if there is no clusterip configuration for the arp reply's + /* if there is no clusterip configuration for the arp reply's * source ip, we don't want to mangle it */ - c = clusterip_config_find_get(payload->src_ip, 0); + c = clusterip_config_find_get(net, payload->src_ip, 0); if (!c) return NF_ACCEPT; - /* normally the linux kernel always replies to arp queries of + /* normally the linux kernel always replies to arp queries of * addresses on different interfacs. However, in the CLUSTERIP case * this wouldn't work, since we didn't subscribe the mcast group on * other interfaces */ if (c->dev != out) { - DEBUGP("CLUSTERIP: not mangling arp reply on different " - "interface: cip'%s'-skb'%s'\n", c->dev->name, out->name); + pr_debug("not mangling arp reply on different " + "interface: cip'%s'-skb'%s'\n", + c->dev->name, out->name); clusterip_config_put(c); return NF_ACCEPT; } @@ -562,8 +540,8 @@ arp_mangle(unsigned int hook, /* mangle reply hardware address */ memcpy(payload->src_hw, c->clustermac, arp->ar_hln); -#ifdef CLUSTERIP_DEBUG - DEBUGP(KERN_DEBUG "CLUSTERIP mangled arp reply: "); +#ifdef DEBUG + pr_debug("mangled arp reply: "); arp_print(payload); #endif @@ -572,15 +550,15 @@ arp_mangle(unsigned int hook, return NF_ACCEPT; } -static struct nf_hook_ops cip_arp_ops = { +static struct nf_hook_ops cip_arp_ops __read_mostly = { .hook = arp_mangle, - .pf = NF_ARP, + .pf = NFPROTO_ARP, .hooknum = NF_ARP_OUT, .priority = -1 }; -/*********************************************************************** - * PROC DIR HANDLING +/*********************************************************************** + * PROC DIR HANDLING ***********************************************************************/ #ifdef CONFIG_PROC_FS @@ -594,8 +572,7 @@ struct clusterip_seq_position { static void *clusterip_seq_start(struct seq_file *s, loff_t *pos) { - struct proc_dir_entry *pde = s->private; - struct clusterip_config *c = pde->data; + struct clusterip_config *c = s->private; unsigned int weight; u_int32_t local_nodes; struct clusterip_seq_position *idx; @@ -621,7 +598,7 @@ static void *clusterip_seq_start(struct seq_file *s, loff_t *pos) static void *clusterip_seq_next(struct seq_file *s, void *v, loff_t *pos) { - struct clusterip_seq_position *idx = (struct clusterip_seq_position *)v; + struct clusterip_seq_position *idx = v; *pos = ++idx->pos; if (*pos >= idx->weight) { @@ -635,14 +612,15 @@ static void *clusterip_seq_next(struct seq_file *s, void *v, loff_t *pos) static void clusterip_seq_stop(struct seq_file *s, void *v) { - kfree(v); + if (!IS_ERR(v)) + kfree(v); } static int clusterip_seq_show(struct seq_file *s, void *v) { - struct clusterip_seq_position *idx = (struct clusterip_seq_position *)v; + struct clusterip_seq_position *idx = v; - if (idx->pos != 0) + if (idx->pos != 0) seq_putc(s, ','); seq_printf(s, "%u", idx->bit); @@ -653,7 +631,7 @@ static int clusterip_seq_show(struct seq_file *s, void *v) return 0; } -static struct seq_operations clusterip_seq_ops = { +static const struct seq_operations clusterip_seq_ops = { .start = clusterip_seq_start, .next = clusterip_seq_next, .stop = clusterip_seq_stop, @@ -666,10 +644,9 @@ static int clusterip_proc_open(struct inode *inode, struct file *file) if (!ret) { struct seq_file *sf = file->private_data; - struct proc_dir_entry *pde = PDE(inode); - struct clusterip_config *c = pde->data; + struct clusterip_config *c = PDE_DATA(inode); - sf->private = pde; + sf->private = c; clusterip_config_get(c); } @@ -679,8 +656,7 @@ static int clusterip_proc_open(struct inode *inode, struct file *file) static int clusterip_proc_release(struct inode *inode, struct file *file) { - struct proc_dir_entry *pde = PDE(inode); - struct clusterip_config *c = pde->data; + struct clusterip_config *c = PDE_DATA(inode); int ret; ret = seq_release(inode, file); @@ -694,21 +670,28 @@ static int clusterip_proc_release(struct inode *inode, struct file *file) static ssize_t clusterip_proc_write(struct file *file, const char __user *input, size_t size, loff_t *ofs) { + struct clusterip_config *c = PDE_DATA(file_inode(file)); #define PROC_WRITELEN 10 char buffer[PROC_WRITELEN+1]; - struct proc_dir_entry *pde = PDE(file->f_dentry->d_inode); - struct clusterip_config *c = pde->data; unsigned long nodenum; + int rc; - if (copy_from_user(buffer, input, PROC_WRITELEN)) + if (size > PROC_WRITELEN) + return -EIO; + if (copy_from_user(buffer, input, size)) return -EFAULT; + buffer[size] = 0; if (*buffer == '+') { - nodenum = simple_strtoul(buffer+1, NULL, 10); + rc = kstrtoul(buffer+1, 10, &nodenum); + if (rc) + return rc; if (clusterip_add_node(c, nodenum)) return -ENOMEM; } else if (*buffer == '-') { - nodenum = simple_strtoul(buffer+1, NULL,10); + rc = kstrtoul(buffer+1, 10, &nodenum); + if (rc) + return rc; if (clusterip_del_node(c, nodenum)) return -ENOENT; } else @@ -717,7 +700,7 @@ static ssize_t clusterip_proc_write(struct file *file, const char __user *input, return size; } -static struct file_operations clusterip_proc_fops = { +static const struct file_operations clusterip_proc_fops = { .owner = THIS_MODULE, .open = clusterip_proc_open, .read = seq_read, @@ -728,60 +711,79 @@ static struct file_operations clusterip_proc_fops = { #endif /* CONFIG_PROC_FS */ -static int init_or_cleanup(int fini) +static int clusterip_net_init(struct net *net) { - int ret; + struct clusterip_net *cn = net_generic(net, clusterip_net_id); - if (fini) - goto cleanup; + INIT_LIST_HEAD(&cn->configs); - if (ipt_register_target(&clusterip_tgt)) { - ret = -EINVAL; - goto cleanup_none; - } - - if (nf_register_hook(&cip_arp_ops) < 0) { - ret = -EINVAL; - goto cleanup_target; - } + spin_lock_init(&cn->lock); #ifdef CONFIG_PROC_FS - clusterip_procdir = proc_mkdir("ipt_CLUSTERIP", proc_net); - if (!clusterip_procdir) { - printk(KERN_ERR "CLUSTERIP: Unable to proc dir entry\n"); - ret = -ENOMEM; - goto cleanup_hook; + cn->procdir = proc_mkdir("ipt_CLUSTERIP", net->proc_net); + if (!cn->procdir) { + pr_err("Unable to proc dir entry\n"); + return -ENOMEM; } #endif /* CONFIG_PROC_FS */ - printk(KERN_NOTICE "ClusterIP Version %s loaded successfully\n", - CLUSTERIP_VERSION); - return 0; +} -cleanup: - printk(KERN_NOTICE "ClusterIP Version %s unloading\n", - CLUSTERIP_VERSION); +static void clusterip_net_exit(struct net *net) +{ #ifdef CONFIG_PROC_FS - remove_proc_entry(clusterip_procdir->name, clusterip_procdir->parent); + struct clusterip_net *cn = net_generic(net, clusterip_net_id); + proc_remove(cn->procdir); #endif -cleanup_hook: - nf_unregister_hook(&cip_arp_ops); -cleanup_target: - ipt_unregister_target(&clusterip_tgt); -cleanup_none: - return -EINVAL; } -static int __init init(void) +static struct pernet_operations clusterip_net_ops = { + .init = clusterip_net_init, + .exit = clusterip_net_exit, + .id = &clusterip_net_id, + .size = sizeof(struct clusterip_net), +}; + +static int __init clusterip_tg_init(void) { - return init_or_cleanup(0); + int ret; + + ret = register_pernet_subsys(&clusterip_net_ops); + if (ret < 0) + return ret; + + ret = xt_register_target(&clusterip_tg_reg); + if (ret < 0) + goto cleanup_subsys; + + ret = nf_register_hook(&cip_arp_ops); + if (ret < 0) + goto cleanup_target; + + pr_info("ClusterIP Version %s loaded successfully\n", + CLUSTERIP_VERSION); + + return 0; + +cleanup_target: + xt_unregister_target(&clusterip_tg_reg); +cleanup_subsys: + unregister_pernet_subsys(&clusterip_net_ops); + return ret; } -static void __exit fini(void) +static void __exit clusterip_tg_exit(void) { - init_or_cleanup(1); + pr_info("ClusterIP Version %s unloading\n", CLUSTERIP_VERSION); + + nf_unregister_hook(&cip_arp_ops); + xt_unregister_target(&clusterip_tg_reg); + unregister_pernet_subsys(&clusterip_net_ops); + + /* Wait for completion of call_rcu_bh()'s (clusterip_config_rcu_free) */ + rcu_barrier_bh(); } -module_init(init); -module_exit(fini); +module_init(clusterip_tg_init); +module_exit(clusterip_tg_exit); diff --git a/net/ipv4/netfilter/ipt_DSCP.c b/net/ipv4/netfilter/ipt_DSCP.c deleted file mode 100644 index 898cdf79ce1..00000000000 --- a/net/ipv4/netfilter/ipt_DSCP.c +++ /dev/null @@ -1,105 +0,0 @@ -/* iptables module for setting the IPv4 DSCP field, Version 1.8 - * - * (C) 2002 by Harald Welte <laforge@netfilter.org> - * based on ipt_FTOS.c (C) 2000 by Matthew G. Marsh <mgm@paktronix.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * See RFC2474 for a description of the DSCP field within the IP Header. - * - * ipt_DSCP.c,v 1.8 2002/08/06 18:41:57 laforge Exp -*/ - -#include <linux/module.h> -#include <linux/skbuff.h> -#include <linux/ip.h> -#include <net/checksum.h> - -#include <linux/netfilter_ipv4/ip_tables.h> -#include <linux/netfilter_ipv4/ipt_DSCP.h> - -MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); -MODULE_DESCRIPTION("iptables DSCP modification module"); -MODULE_LICENSE("GPL"); - -static unsigned int -target(struct sk_buff **pskb, - const struct net_device *in, - const struct net_device *out, - unsigned int hooknum, - const void *targinfo, - void *userinfo) -{ - const struct ipt_DSCP_info *dinfo = targinfo; - u_int8_t sh_dscp = ((dinfo->dscp << IPT_DSCP_SHIFT) & IPT_DSCP_MASK); - - - if (((*pskb)->nh.iph->tos & IPT_DSCP_MASK) != sh_dscp) { - u_int16_t diffs[2]; - - if (!skb_make_writable(pskb, sizeof(struct iphdr))) - return NF_DROP; - - diffs[0] = htons((*pskb)->nh.iph->tos) ^ 0xFFFF; - (*pskb)->nh.iph->tos = ((*pskb)->nh.iph->tos & ~IPT_DSCP_MASK) - | sh_dscp; - diffs[1] = htons((*pskb)->nh.iph->tos); - (*pskb)->nh.iph->check - = csum_fold(csum_partial((char *)diffs, - sizeof(diffs), - (*pskb)->nh.iph->check - ^ 0xFFFF)); - } - return IPT_CONTINUE; -} - -static int -checkentry(const char *tablename, - const void *e_void, - void *targinfo, - unsigned int targinfosize, - unsigned int hook_mask) -{ - const u_int8_t dscp = ((struct ipt_DSCP_info *)targinfo)->dscp; - - if (targinfosize != IPT_ALIGN(sizeof(struct ipt_DSCP_info))) { - printk(KERN_WARNING "DSCP: targinfosize %u != %Zu\n", - targinfosize, - IPT_ALIGN(sizeof(struct ipt_DSCP_info))); - return 0; - } - - if (strcmp(tablename, "mangle") != 0) { - printk(KERN_WARNING "DSCP: can only be called from \"mangle\" table, not \"%s\"\n", tablename); - return 0; - } - - if ((dscp > IPT_DSCP_MAX)) { - printk(KERN_WARNING "DSCP: dscp %x out of range\n", dscp); - return 0; - } - - return 1; -} - -static struct ipt_target ipt_dscp_reg = { - .name = "DSCP", - .target = target, - .checkentry = checkentry, - .me = THIS_MODULE, -}; - -static int __init init(void) -{ - return ipt_register_target(&ipt_dscp_reg); -} - -static void __exit fini(void) -{ - ipt_unregister_target(&ipt_dscp_reg); -} - -module_init(init); -module_exit(fini); diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c index 706445426a6..4bf3dc49ad1 100644 --- a/net/ipv4/netfilter/ipt_ECN.c +++ b/net/ipv4/netfilter/ipt_ECN.c @@ -1,175 +1,138 @@ /* iptables module for the IPv4 and TCP ECN bits, Version 1.5 * * (C) 2002 by Harald Welte <laforge@netfilter.org> - * + * * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as + * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. - * - * ipt_ECN.c,v 1.5 2002/08/18 19:36:51 laforge Exp */ - +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/in.h> #include <linux/module.h> #include <linux/skbuff.h> #include <linux/ip.h> +#include <net/ip.h> #include <linux/tcp.h> #include <net/checksum.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv4/ip_tables.h> #include <linux/netfilter_ipv4/ipt_ECN.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); -MODULE_DESCRIPTION("iptables ECN modification module"); +MODULE_DESCRIPTION("Xtables: Explicit Congestion Notification (ECN) flag modification"); /* set ECT codepoint from IP header. - * return 0 if there was an error. */ -static inline int -set_ect_ip(struct sk_buff **pskb, const struct ipt_ECN_info *einfo) + * return false if there was an error. */ +static inline bool +set_ect_ip(struct sk_buff *skb, const struct ipt_ECN_info *einfo) { - if (((*pskb)->nh.iph->tos & IPT_ECN_IP_MASK) - != (einfo->ip_ect & IPT_ECN_IP_MASK)) { - u_int16_t diffs[2]; - - if (!skb_make_writable(pskb, sizeof(struct iphdr))) - return 0; - - diffs[0] = htons((*pskb)->nh.iph->tos) ^ 0xFFFF; - (*pskb)->nh.iph->tos &= ~IPT_ECN_IP_MASK; - (*pskb)->nh.iph->tos |= (einfo->ip_ect & IPT_ECN_IP_MASK); - diffs[1] = htons((*pskb)->nh.iph->tos); - (*pskb)->nh.iph->check - = csum_fold(csum_partial((char *)diffs, - sizeof(diffs), - (*pskb)->nh.iph->check - ^0xFFFF)); - } - return 1; + struct iphdr *iph = ip_hdr(skb); + + if ((iph->tos & IPT_ECN_IP_MASK) != (einfo->ip_ect & IPT_ECN_IP_MASK)) { + __u8 oldtos; + if (!skb_make_writable(skb, sizeof(struct iphdr))) + return false; + iph = ip_hdr(skb); + oldtos = iph->tos; + iph->tos &= ~IPT_ECN_IP_MASK; + iph->tos |= (einfo->ip_ect & IPT_ECN_IP_MASK); + csum_replace2(&iph->check, htons(oldtos), htons(iph->tos)); + } + return true; } -/* Return 0 if there was an error. */ -static inline int -set_ect_tcp(struct sk_buff **pskb, const struct ipt_ECN_info *einfo, int inward) +/* Return false if there was an error. */ +static inline bool +set_ect_tcp(struct sk_buff *skb, const struct ipt_ECN_info *einfo) { struct tcphdr _tcph, *tcph; - u_int16_t diffs[2]; + __be16 oldval; - /* Not enought header? */ - tcph = skb_header_pointer(*pskb, (*pskb)->nh.iph->ihl*4, - sizeof(_tcph), &_tcph); + /* Not enough header? */ + tcph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_tcph), &_tcph); if (!tcph) - return 0; + return false; if ((!(einfo->operation & IPT_ECN_OP_SET_ECE) || tcph->ece == einfo->proto.tcp.ece) && - ((!(einfo->operation & IPT_ECN_OP_SET_CWR) || - tcph->cwr == einfo->proto.tcp.cwr))) - return 1; - - if (!skb_make_writable(pskb, (*pskb)->nh.iph->ihl*4+sizeof(*tcph))) - return 0; - tcph = (void *)(*pskb)->nh.iph + (*pskb)->nh.iph->ihl*4; + (!(einfo->operation & IPT_ECN_OP_SET_CWR) || + tcph->cwr == einfo->proto.tcp.cwr)) + return true; - if ((*pskb)->ip_summed == CHECKSUM_HW && - skb_checksum_help(*pskb, inward)) - return 0; + if (!skb_make_writable(skb, ip_hdrlen(skb) + sizeof(*tcph))) + return false; + tcph = (void *)ip_hdr(skb) + ip_hdrlen(skb); - diffs[0] = ((u_int16_t *)tcph)[6]; + oldval = ((__be16 *)tcph)[6]; if (einfo->operation & IPT_ECN_OP_SET_ECE) tcph->ece = einfo->proto.tcp.ece; if (einfo->operation & IPT_ECN_OP_SET_CWR) tcph->cwr = einfo->proto.tcp.cwr; - diffs[1] = ((u_int16_t *)tcph)[6]; - diffs[0] = diffs[0] ^ 0xFFFF; - - if ((*pskb)->ip_summed != CHECKSUM_UNNECESSARY) - tcph->check = csum_fold(csum_partial((char *)diffs, - sizeof(diffs), - tcph->check^0xFFFF)); - return 1; + + inet_proto_csum_replace2(&tcph->check, skb, + oldval, ((__be16 *)tcph)[6], 0); + return true; } static unsigned int -target(struct sk_buff **pskb, - const struct net_device *in, - const struct net_device *out, - unsigned int hooknum, - const void *targinfo, - void *userinfo) +ecn_tg(struct sk_buff *skb, const struct xt_action_param *par) { - const struct ipt_ECN_info *einfo = targinfo; + const struct ipt_ECN_info *einfo = par->targinfo; if (einfo->operation & IPT_ECN_OP_SET_IP) - if (!set_ect_ip(pskb, einfo)) + if (!set_ect_ip(skb, einfo)) return NF_DROP; - if (einfo->operation & (IPT_ECN_OP_SET_ECE | IPT_ECN_OP_SET_CWR) - && (*pskb)->nh.iph->protocol == IPPROTO_TCP) - if (!set_ect_tcp(pskb, einfo, (out == NULL))) + if (einfo->operation & (IPT_ECN_OP_SET_ECE | IPT_ECN_OP_SET_CWR) && + ip_hdr(skb)->protocol == IPPROTO_TCP) + if (!set_ect_tcp(skb, einfo)) return NF_DROP; - return IPT_CONTINUE; + return XT_CONTINUE; } -static int -checkentry(const char *tablename, - const void *e_void, - void *targinfo, - unsigned int targinfosize, - unsigned int hook_mask) +static int ecn_tg_check(const struct xt_tgchk_param *par) { - const struct ipt_ECN_info *einfo = (struct ipt_ECN_info *)targinfo; - const struct ipt_entry *e = e_void; - - if (targinfosize != IPT_ALIGN(sizeof(struct ipt_ECN_info))) { - printk(KERN_WARNING "ECN: targinfosize %u != %Zu\n", - targinfosize, - IPT_ALIGN(sizeof(struct ipt_ECN_info))); - return 0; - } - - if (strcmp(tablename, "mangle") != 0) { - printk(KERN_WARNING "ECN: can only be called from \"mangle\" table, not \"%s\"\n", tablename); - return 0; - } + const struct ipt_ECN_info *einfo = par->targinfo; + const struct ipt_entry *e = par->entryinfo; if (einfo->operation & IPT_ECN_OP_MASK) { - printk(KERN_WARNING "ECN: unsupported ECN operation %x\n", - einfo->operation); - return 0; + pr_info("unsupported ECN operation %x\n", einfo->operation); + return -EINVAL; } if (einfo->ip_ect & ~IPT_ECN_IP_MASK) { - printk(KERN_WARNING "ECN: new ECT codepoint %x out of mask\n", - einfo->ip_ect); - return 0; + pr_info("new ECT codepoint %x out of mask\n", einfo->ip_ect); + return -EINVAL; } - - if ((einfo->operation & (IPT_ECN_OP_SET_ECE|IPT_ECN_OP_SET_CWR)) - && (e->ip.proto != IPPROTO_TCP || (e->ip.invflags & IPT_INV_PROTO))) { - printk(KERN_WARNING "ECN: cannot use TCP operations on a " - "non-tcp rule\n"); - return 0; + if ((einfo->operation & (IPT_ECN_OP_SET_ECE|IPT_ECN_OP_SET_CWR)) && + (e->ip.proto != IPPROTO_TCP || (e->ip.invflags & XT_INV_PROTO))) { + pr_info("cannot use TCP operations on a non-tcp rule\n"); + return -EINVAL; } - - return 1; + return 0; } -static struct ipt_target ipt_ecn_reg = { +static struct xt_target ecn_tg_reg __read_mostly = { .name = "ECN", - .target = target, - .checkentry = checkentry, + .family = NFPROTO_IPV4, + .target = ecn_tg, + .targetsize = sizeof(struct ipt_ECN_info), + .table = "mangle", + .checkentry = ecn_tg_check, .me = THIS_MODULE, }; -static int __init init(void) +static int __init ecn_tg_init(void) { - return ipt_register_target(&ipt_ecn_reg); + return xt_register_target(&ecn_tg_reg); } -static void __exit fini(void) +static void __exit ecn_tg_exit(void) { - ipt_unregister_target(&ipt_ecn_reg); + xt_unregister_target(&ecn_tg_reg); } -module_init(init); -module_exit(fini); +module_init(ecn_tg_init); +module_exit(ecn_tg_exit); diff --git a/net/ipv4/netfilter/ipt_LOG.c b/net/ipv4/netfilter/ipt_LOG.c deleted file mode 100644 index 6606ddb66a2..00000000000 --- a/net/ipv4/netfilter/ipt_LOG.c +++ /dev/null @@ -1,495 +0,0 @@ -/* - * This is a module which is used for logging packets. - */ - -/* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/module.h> -#include <linux/spinlock.h> -#include <linux/skbuff.h> -#include <linux/ip.h> -#include <net/icmp.h> -#include <net/udp.h> -#include <net/tcp.h> -#include <net/route.h> - -#include <linux/netfilter.h> -#include <linux/netfilter_ipv4/ip_tables.h> -#include <linux/netfilter_ipv4/ipt_LOG.h> - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); -MODULE_DESCRIPTION("iptables syslog logging module"); - -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - -/* Use lock to serialize, so printks don't overlap */ -static DEFINE_SPINLOCK(log_lock); - -/* One level of recursion won't kill us */ -static void dump_packet(const struct nf_loginfo *info, - const struct sk_buff *skb, - unsigned int iphoff) -{ - struct iphdr _iph, *ih; - unsigned int logflags; - - if (info->type == NF_LOG_TYPE_LOG) - logflags = info->u.log.logflags; - else - logflags = NF_LOG_MASK; - - ih = skb_header_pointer(skb, iphoff, sizeof(_iph), &_iph); - if (ih == NULL) { - printk("TRUNCATED"); - return; - } - - /* Important fields: - * TOS, len, DF/MF, fragment offset, TTL, src, dst, options. */ - /* Max length: 40 "SRC=255.255.255.255 DST=255.255.255.255 " */ - printk("SRC=%u.%u.%u.%u DST=%u.%u.%u.%u ", - NIPQUAD(ih->saddr), NIPQUAD(ih->daddr)); - - /* Max length: 46 "LEN=65535 TOS=0xFF PREC=0xFF TTL=255 ID=65535 " */ - printk("LEN=%u TOS=0x%02X PREC=0x%02X TTL=%u ID=%u ", - ntohs(ih->tot_len), ih->tos & IPTOS_TOS_MASK, - ih->tos & IPTOS_PREC_MASK, ih->ttl, ntohs(ih->id)); - - /* Max length: 6 "CE DF MF " */ - if (ntohs(ih->frag_off) & IP_CE) - printk("CE "); - if (ntohs(ih->frag_off) & IP_DF) - printk("DF "); - if (ntohs(ih->frag_off) & IP_MF) - printk("MF "); - - /* Max length: 11 "FRAG:65535 " */ - if (ntohs(ih->frag_off) & IP_OFFSET) - printk("FRAG:%u ", ntohs(ih->frag_off) & IP_OFFSET); - - if ((logflags & IPT_LOG_IPOPT) - && ih->ihl * 4 > sizeof(struct iphdr)) { - unsigned char _opt[4 * 15 - sizeof(struct iphdr)], *op; - unsigned int i, optsize; - - optsize = ih->ihl * 4 - sizeof(struct iphdr); - op = skb_header_pointer(skb, iphoff+sizeof(_iph), - optsize, _opt); - if (op == NULL) { - printk("TRUNCATED"); - return; - } - - /* Max length: 127 "OPT (" 15*4*2chars ") " */ - printk("OPT ("); - for (i = 0; i < optsize; i++) - printk("%02X", op[i]); - printk(") "); - } - - switch (ih->protocol) { - case IPPROTO_TCP: { - struct tcphdr _tcph, *th; - - /* Max length: 10 "PROTO=TCP " */ - printk("PROTO=TCP "); - - if (ntohs(ih->frag_off) & IP_OFFSET) - break; - - /* Max length: 25 "INCOMPLETE [65535 bytes] " */ - th = skb_header_pointer(skb, iphoff + ih->ihl * 4, - sizeof(_tcph), &_tcph); - if (th == NULL) { - printk("INCOMPLETE [%u bytes] ", - skb->len - iphoff - ih->ihl*4); - break; - } - - /* Max length: 20 "SPT=65535 DPT=65535 " */ - printk("SPT=%u DPT=%u ", - ntohs(th->source), ntohs(th->dest)); - /* Max length: 30 "SEQ=4294967295 ACK=4294967295 " */ - if (logflags & IPT_LOG_TCPSEQ) - printk("SEQ=%u ACK=%u ", - ntohl(th->seq), ntohl(th->ack_seq)); - /* Max length: 13 "WINDOW=65535 " */ - printk("WINDOW=%u ", ntohs(th->window)); - /* Max length: 9 "RES=0x3F " */ - printk("RES=0x%02x ", (u8)(ntohl(tcp_flag_word(th) & TCP_RESERVED_BITS) >> 22)); - /* Max length: 32 "CWR ECE URG ACK PSH RST SYN FIN " */ - if (th->cwr) - printk("CWR "); - if (th->ece) - printk("ECE "); - if (th->urg) - printk("URG "); - if (th->ack) - printk("ACK "); - if (th->psh) - printk("PSH "); - if (th->rst) - printk("RST "); - if (th->syn) - printk("SYN "); - if (th->fin) - printk("FIN "); - /* Max length: 11 "URGP=65535 " */ - printk("URGP=%u ", ntohs(th->urg_ptr)); - - if ((logflags & IPT_LOG_TCPOPT) - && th->doff * 4 > sizeof(struct tcphdr)) { - unsigned char _opt[4 * 15 - sizeof(struct tcphdr)]; - unsigned char *op; - unsigned int i, optsize; - - optsize = th->doff * 4 - sizeof(struct tcphdr); - op = skb_header_pointer(skb, - iphoff+ih->ihl*4+sizeof(_tcph), - optsize, _opt); - if (op == NULL) { - printk("TRUNCATED"); - return; - } - - /* Max length: 127 "OPT (" 15*4*2chars ") " */ - printk("OPT ("); - for (i = 0; i < optsize; i++) - printk("%02X", op[i]); - printk(") "); - } - break; - } - case IPPROTO_UDP: { - struct udphdr _udph, *uh; - - /* Max length: 10 "PROTO=UDP " */ - printk("PROTO=UDP "); - - if (ntohs(ih->frag_off) & IP_OFFSET) - break; - - /* Max length: 25 "INCOMPLETE [65535 bytes] " */ - uh = skb_header_pointer(skb, iphoff+ih->ihl*4, - sizeof(_udph), &_udph); - if (uh == NULL) { - printk("INCOMPLETE [%u bytes] ", - skb->len - iphoff - ih->ihl*4); - break; - } - - /* Max length: 20 "SPT=65535 DPT=65535 " */ - printk("SPT=%u DPT=%u LEN=%u ", - ntohs(uh->source), ntohs(uh->dest), - ntohs(uh->len)); - break; - } - case IPPROTO_ICMP: { - struct icmphdr _icmph, *ich; - static const size_t required_len[NR_ICMP_TYPES+1] - = { [ICMP_ECHOREPLY] = 4, - [ICMP_DEST_UNREACH] - = 8 + sizeof(struct iphdr), - [ICMP_SOURCE_QUENCH] - = 8 + sizeof(struct iphdr), - [ICMP_REDIRECT] - = 8 + sizeof(struct iphdr), - [ICMP_ECHO] = 4, - [ICMP_TIME_EXCEEDED] - = 8 + sizeof(struct iphdr), - [ICMP_PARAMETERPROB] - = 8 + sizeof(struct iphdr), - [ICMP_TIMESTAMP] = 20, - [ICMP_TIMESTAMPREPLY] = 20, - [ICMP_ADDRESS] = 12, - [ICMP_ADDRESSREPLY] = 12 }; - - /* Max length: 11 "PROTO=ICMP " */ - printk("PROTO=ICMP "); - - if (ntohs(ih->frag_off) & IP_OFFSET) - break; - - /* Max length: 25 "INCOMPLETE [65535 bytes] " */ - ich = skb_header_pointer(skb, iphoff + ih->ihl * 4, - sizeof(_icmph), &_icmph); - if (ich == NULL) { - printk("INCOMPLETE [%u bytes] ", - skb->len - iphoff - ih->ihl*4); - break; - } - - /* Max length: 18 "TYPE=255 CODE=255 " */ - printk("TYPE=%u CODE=%u ", ich->type, ich->code); - - /* Max length: 25 "INCOMPLETE [65535 bytes] " */ - if (ich->type <= NR_ICMP_TYPES - && required_len[ich->type] - && skb->len-iphoff-ih->ihl*4 < required_len[ich->type]) { - printk("INCOMPLETE [%u bytes] ", - skb->len - iphoff - ih->ihl*4); - break; - } - - switch (ich->type) { - case ICMP_ECHOREPLY: - case ICMP_ECHO: - /* Max length: 19 "ID=65535 SEQ=65535 " */ - printk("ID=%u SEQ=%u ", - ntohs(ich->un.echo.id), - ntohs(ich->un.echo.sequence)); - break; - - case ICMP_PARAMETERPROB: - /* Max length: 14 "PARAMETER=255 " */ - printk("PARAMETER=%u ", - ntohl(ich->un.gateway) >> 24); - break; - case ICMP_REDIRECT: - /* Max length: 24 "GATEWAY=255.255.255.255 " */ - printk("GATEWAY=%u.%u.%u.%u ", - NIPQUAD(ich->un.gateway)); - /* Fall through */ - case ICMP_DEST_UNREACH: - case ICMP_SOURCE_QUENCH: - case ICMP_TIME_EXCEEDED: - /* Max length: 3+maxlen */ - if (!iphoff) { /* Only recurse once. */ - printk("["); - dump_packet(info, skb, - iphoff + ih->ihl*4+sizeof(_icmph)); - printk("] "); - } - - /* Max length: 10 "MTU=65535 " */ - if (ich->type == ICMP_DEST_UNREACH - && ich->code == ICMP_FRAG_NEEDED) - printk("MTU=%u ", ntohs(ich->un.frag.mtu)); - } - break; - } - /* Max Length */ - case IPPROTO_AH: { - struct ip_auth_hdr _ahdr, *ah; - - if (ntohs(ih->frag_off) & IP_OFFSET) - break; - - /* Max length: 9 "PROTO=AH " */ - printk("PROTO=AH "); - - /* Max length: 25 "INCOMPLETE [65535 bytes] " */ - ah = skb_header_pointer(skb, iphoff+ih->ihl*4, - sizeof(_ahdr), &_ahdr); - if (ah == NULL) { - printk("INCOMPLETE [%u bytes] ", - skb->len - iphoff - ih->ihl*4); - break; - } - - /* Length: 15 "SPI=0xF1234567 " */ - printk("SPI=0x%x ", ntohl(ah->spi)); - break; - } - case IPPROTO_ESP: { - struct ip_esp_hdr _esph, *eh; - - /* Max length: 10 "PROTO=ESP " */ - printk("PROTO=ESP "); - - if (ntohs(ih->frag_off) & IP_OFFSET) - break; - - /* Max length: 25 "INCOMPLETE [65535 bytes] " */ - eh = skb_header_pointer(skb, iphoff+ih->ihl*4, - sizeof(_esph), &_esph); - if (eh == NULL) { - printk("INCOMPLETE [%u bytes] ", - skb->len - iphoff - ih->ihl*4); - break; - } - - /* Length: 15 "SPI=0xF1234567 " */ - printk("SPI=0x%x ", ntohl(eh->spi)); - break; - } - /* Max length: 10 "PROTO 255 " */ - default: - printk("PROTO=%u ", ih->protocol); - } - - /* Max length: 15 "UID=4294967295 " */ - if ((logflags & IPT_LOG_UID) && !iphoff && skb->sk) { - read_lock_bh(&skb->sk->sk_callback_lock); - if (skb->sk->sk_socket && skb->sk->sk_socket->file) - printk("UID=%u ", skb->sk->sk_socket->file->f_uid); - read_unlock_bh(&skb->sk->sk_callback_lock); - } - - /* Proto Max log string length */ - /* IP: 40+46+6+11+127 = 230 */ - /* TCP: 10+max(25,20+30+13+9+32+11+127) = 252 */ - /* UDP: 10+max(25,20) = 35 */ - /* ICMP: 11+max(25, 18+25+max(19,14,24+3+n+10,3+n+10)) = 91+n */ - /* ESP: 10+max(25)+15 = 50 */ - /* AH: 9+max(25)+15 = 49 */ - /* unknown: 10 */ - - /* (ICMP allows recursion one level deep) */ - /* maxlen = IP + ICMP + IP + max(TCP,UDP,ICMP,unknown) */ - /* maxlen = 230+ 91 + 230 + 252 = 803 */ -} - -static struct nf_loginfo default_loginfo = { - .type = NF_LOG_TYPE_LOG, - .u = { - .log = { - .level = 0, - .logflags = NF_LOG_MASK, - }, - }, -}; - -static void -ipt_log_packet(unsigned int pf, - unsigned int hooknum, - const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const struct nf_loginfo *loginfo, - const char *prefix) -{ - if (!loginfo) - loginfo = &default_loginfo; - - spin_lock_bh(&log_lock); - printk("<%d>%sIN=%s OUT=%s ", loginfo->u.log.level, - prefix, - in ? in->name : "", - out ? out->name : ""); -#ifdef CONFIG_BRIDGE_NETFILTER - if (skb->nf_bridge) { - struct net_device *physindev = skb->nf_bridge->physindev; - struct net_device *physoutdev = skb->nf_bridge->physoutdev; - - if (physindev && in != physindev) - printk("PHYSIN=%s ", physindev->name); - if (physoutdev && out != physoutdev) - printk("PHYSOUT=%s ", physoutdev->name); - } -#endif - - if (in && !out) { - /* MAC logging for input chain only. */ - printk("MAC="); - if (skb->dev && skb->dev->hard_header_len - && skb->mac.raw != (void*)skb->nh.iph) { - int i; - unsigned char *p = skb->mac.raw; - for (i = 0; i < skb->dev->hard_header_len; i++,p++) - printk("%02x%c", *p, - i==skb->dev->hard_header_len - 1 - ? ' ':':'); - } else - printk(" "); - } - - dump_packet(loginfo, skb, 0); - printk("\n"); - spin_unlock_bh(&log_lock); -} - -static unsigned int -ipt_log_target(struct sk_buff **pskb, - const struct net_device *in, - const struct net_device *out, - unsigned int hooknum, - const void *targinfo, - void *userinfo) -{ - const struct ipt_log_info *loginfo = targinfo; - struct nf_loginfo li; - - li.type = NF_LOG_TYPE_LOG; - li.u.log.level = loginfo->level; - li.u.log.logflags = loginfo->logflags; - - nf_log_packet(PF_INET, hooknum, *pskb, in, out, &li, loginfo->prefix); - - return IPT_CONTINUE; -} - -static int ipt_log_checkentry(const char *tablename, - const void *e, - void *targinfo, - unsigned int targinfosize, - unsigned int hook_mask) -{ - const struct ipt_log_info *loginfo = targinfo; - - if (targinfosize != IPT_ALIGN(sizeof(struct ipt_log_info))) { - DEBUGP("LOG: targinfosize %u != %u\n", - targinfosize, IPT_ALIGN(sizeof(struct ipt_log_info))); - return 0; - } - - if (loginfo->level >= 8) { - DEBUGP("LOG: level %u >= 8\n", loginfo->level); - return 0; - } - - if (loginfo->prefix[sizeof(loginfo->prefix)-1] != '\0') { - DEBUGP("LOG: prefix term %i\n", - loginfo->prefix[sizeof(loginfo->prefix)-1]); - return 0; - } - - return 1; -} - -static struct ipt_target ipt_log_reg = { - .name = "LOG", - .target = ipt_log_target, - .checkentry = ipt_log_checkentry, - .me = THIS_MODULE, -}; - -static struct nf_logger ipt_log_logger ={ - .name = "ipt_LOG", - .logfn = &ipt_log_packet, - .me = THIS_MODULE, -}; - -static int __init init(void) -{ - if (ipt_register_target(&ipt_log_reg)) - return -EINVAL; - if (nf_log_register(PF_INET, &ipt_log_logger) < 0) { - printk(KERN_WARNING "ipt_LOG: not logging via system console " - "since somebody else already registered for PF_INET\n"); - /* we cannot make module load fail here, since otherwise - * iptables userspace would abort */ - } - - return 0; -} - -static void __exit fini(void) -{ - nf_log_unregister_logger(&ipt_log_logger); - ipt_unregister_target(&ipt_log_reg); -} - -module_init(init); -module_exit(fini); diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index 12c56d3343c..00352ce0f0d 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -2,14 +2,13 @@ (depending on route). */ /* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> + * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ - -#include <linux/config.h> +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/types.h> #include <linux/inetdevice.h> #include <linux/ip.h> @@ -21,130 +20,105 @@ #include <net/checksum.h> #include <net/route.h> #include <linux/netfilter_ipv4.h> -#include <linux/netfilter_ipv4/ip_nat_rule.h> -#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netfilter/x_tables.h> +#include <net/netfilter/nf_nat.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); -MODULE_DESCRIPTION("iptables MASQUERADE target module"); - -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - -/* Lock protects masq region inside conntrack */ -static DEFINE_RWLOCK(masq_lock); +MODULE_DESCRIPTION("Xtables: automatic-address SNAT"); /* FIXME: Multiple targets. --RR */ -static int -masquerade_check(const char *tablename, - const void *e, - void *targinfo, - unsigned int targinfosize, - unsigned int hook_mask) +static int masquerade_tg_check(const struct xt_tgchk_param *par) { - const struct ip_nat_multi_range_compat *mr = targinfo; + const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; - if (strcmp(tablename, "nat") != 0) { - DEBUGP("masquerade_check: bad table `%s'.\n", tablename); - return 0; - } - if (targinfosize != IPT_ALIGN(sizeof(*mr))) { - DEBUGP("masquerade_check: size %u != %u.\n", - targinfosize, sizeof(*mr)); - return 0; - } - if (hook_mask & ~(1 << NF_IP_POST_ROUTING)) { - DEBUGP("masquerade_check: bad hooks %x.\n", hook_mask); - return 0; - } - if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) { - DEBUGP("masquerade_check: bad MAP_IPS.\n"); - return 0; + if (mr->range[0].flags & NF_NAT_RANGE_MAP_IPS) { + pr_debug("bad MAP_IPS.\n"); + return -EINVAL; } if (mr->rangesize != 1) { - DEBUGP("masquerade_check: bad rangesize %u.\n", mr->rangesize); - return 0; + pr_debug("bad rangesize %u\n", mr->rangesize); + return -EINVAL; } - return 1; + return 0; } static unsigned int -masquerade_target(struct sk_buff **pskb, - const struct net_device *in, - const struct net_device *out, - unsigned int hooknum, - const void *targinfo, - void *userinfo) +masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par) { - struct ip_conntrack *ct; + struct nf_conn *ct; + struct nf_conn_nat *nat; enum ip_conntrack_info ctinfo; - const struct ip_nat_multi_range_compat *mr; - struct ip_nat_range newrange; - struct rtable *rt; - u_int32_t newsrc; + struct nf_nat_range newrange; + const struct nf_nat_ipv4_multi_range_compat *mr; + const struct rtable *rt; + __be32 newsrc, nh; + + NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING); - IP_NF_ASSERT(hooknum == NF_IP_POST_ROUTING); + ct = nf_ct_get(skb, &ctinfo); + nat = nfct_nat(ct); - ct = ip_conntrack_get(*pskb, &ctinfo); - IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED - || ctinfo == IP_CT_RELATED + IP_CT_IS_REPLY)); + NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || + ctinfo == IP_CT_RELATED_REPLY)); /* Source address is 0.0.0.0 - locally generated packet that is * probably not supposed to be masqueraded. */ - if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip == 0) + if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip == 0) return NF_ACCEPT; - mr = targinfo; - rt = (struct rtable *)(*pskb)->dst; - newsrc = inet_select_addr(out, rt->rt_gateway, RT_SCOPE_UNIVERSE); + mr = par->targinfo; + rt = skb_rtable(skb); + nh = rt_nexthop(rt, ip_hdr(skb)->daddr); + newsrc = inet_select_addr(par->out, nh, RT_SCOPE_UNIVERSE); if (!newsrc) { - printk("MASQUERADE: %s ate my IP address\n", out->name); + pr_info("%s ate my IP address\n", par->out->name); return NF_DROP; } - write_lock_bh(&masq_lock); - ct->nat.masq_index = out->ifindex; - write_unlock_bh(&masq_lock); + nat->masq_index = par->out->ifindex; /* Transfer from original range. */ - newrange = ((struct ip_nat_range) - { mr->range[0].flags | IP_NAT_RANGE_MAP_IPS, - newsrc, newsrc, - mr->range[0].min, mr->range[0].max }); + memset(&newrange.min_addr, 0, sizeof(newrange.min_addr)); + memset(&newrange.max_addr, 0, sizeof(newrange.max_addr)); + newrange.flags = mr->range[0].flags | NF_NAT_RANGE_MAP_IPS; + newrange.min_addr.ip = newsrc; + newrange.max_addr.ip = newsrc; + newrange.min_proto = mr->range[0].min; + newrange.max_proto = mr->range[0].max; /* Hand modified range to generic setup. */ - return ip_nat_setup_info(ct, &newrange, hooknum); + return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC); } -static inline int -device_cmp(struct ip_conntrack *i, void *ifindex) +static int +device_cmp(struct nf_conn *i, void *ifindex) { - int ret; + const struct nf_conn_nat *nat = nfct_nat(i); - read_lock_bh(&masq_lock); - ret = (i->nat.masq_index == (int)(long)ifindex); - read_unlock_bh(&masq_lock); - - return ret; + if (!nat) + return 0; + if (nf_ct_l3num(i) != NFPROTO_IPV4) + return 0; + return nat->masq_index == (int)(long)ifindex; } static int masq_device_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + const struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct net *net = dev_net(dev); if (event == NETDEV_DOWN) { /* Device was downed. Search entire table for conntracks which were associated with that device, and forget them. */ - IP_NF_ASSERT(dev->ifindex != 0); + NF_CT_ASSERT(dev->ifindex != 0); - ip_ct_iterate_cleanup(device_cmp, (void *)(long)dev->ifindex); + nf_ct_iterate_cleanup(net, device_cmp, + (void *)(long)dev->ifindex, 0, 0); } return NOTIFY_DONE; @@ -155,17 +129,10 @@ static int masq_inet_event(struct notifier_block *this, void *ptr) { struct net_device *dev = ((struct in_ifaddr *)ptr)->ifa_dev->dev; + struct netdev_notifier_info info; - if (event == NETDEV_DOWN) { - /* IP address was deleted. Search entire table for - conntracks which were associated with that device, - and forget them. */ - IP_NF_ASSERT(dev->ifindex != 0); - - ip_ct_iterate_cleanup(device_cmp, (void *)(long)dev->ifindex); - } - - return NOTIFY_DONE; + netdev_notifier_info_init(&info, dev); + return masq_device_event(this, event, &info); } static struct notifier_block masq_dev_notifier = { @@ -176,18 +143,22 @@ static struct notifier_block masq_inet_notifier = { .notifier_call = masq_inet_event, }; -static struct ipt_target masquerade = { +static struct xt_target masquerade_tg_reg __read_mostly = { .name = "MASQUERADE", - .target = masquerade_target, - .checkentry = masquerade_check, + .family = NFPROTO_IPV4, + .target = masquerade_tg, + .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat), + .table = "nat", + .hooks = 1 << NF_INET_POST_ROUTING, + .checkentry = masquerade_tg_check, .me = THIS_MODULE, }; -static int __init init(void) +static int __init masquerade_tg_init(void) { int ret; - ret = ipt_register_target(&masquerade); + ret = xt_register_target(&masquerade_tg_reg); if (ret == 0) { /* Register for device down reports */ @@ -199,12 +170,12 @@ static int __init init(void) return ret; } -static void __exit fini(void) +static void __exit masquerade_tg_exit(void) { - ipt_unregister_target(&masquerade); + xt_unregister_target(&masquerade_tg_reg); unregister_netdevice_notifier(&masq_dev_notifier); - unregister_inetaddr_notifier(&masq_inet_notifier); + unregister_inetaddr_notifier(&masq_inet_notifier); } -module_init(init); -module_exit(fini); +module_init(masquerade_tg_init); +module_exit(masquerade_tg_exit); diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c deleted file mode 100644 index b074467fe67..00000000000 --- a/net/ipv4/netfilter/ipt_NETMAP.c +++ /dev/null @@ -1,119 +0,0 @@ -/* NETMAP - static NAT mapping of IP network addresses (1:1). - * The mapping can be applied to source (POSTROUTING), - * destination (PREROUTING), or both (with separate rules). - */ - -/* (C) 2000-2001 Svenning Soerensen <svenning@post5.tele.dk> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/config.h> -#include <linux/ip.h> -#include <linux/module.h> -#include <linux/netdevice.h> -#include <linux/netfilter.h> -#include <linux/netfilter_ipv4.h> -#include <linux/netfilter_ipv4/ip_nat_rule.h> - -#define MODULENAME "NETMAP" -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Svenning Soerensen <svenning@post5.tele.dk>"); -MODULE_DESCRIPTION("iptables 1:1 NAT mapping of IP networks target"); - -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - -static int -check(const char *tablename, - const void *e, - void *targinfo, - unsigned int targinfosize, - unsigned int hook_mask) -{ - const struct ip_nat_multi_range_compat *mr = targinfo; - - if (strcmp(tablename, "nat") != 0) { - DEBUGP(MODULENAME":check: bad table `%s'.\n", tablename); - return 0; - } - if (targinfosize != IPT_ALIGN(sizeof(*mr))) { - DEBUGP(MODULENAME":check: size %u.\n", targinfosize); - return 0; - } - if (hook_mask & ~((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_POST_ROUTING) | - (1 << NF_IP_LOCAL_OUT))) { - DEBUGP(MODULENAME":check: bad hooks %x.\n", hook_mask); - return 0; - } - if (!(mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)) { - DEBUGP(MODULENAME":check: bad MAP_IPS.\n"); - return 0; - } - if (mr->rangesize != 1) { - DEBUGP(MODULENAME":check: bad rangesize %u.\n", mr->rangesize); - return 0; - } - return 1; -} - -static unsigned int -target(struct sk_buff **pskb, - const struct net_device *in, - const struct net_device *out, - unsigned int hooknum, - const void *targinfo, - void *userinfo) -{ - struct ip_conntrack *ct; - enum ip_conntrack_info ctinfo; - u_int32_t new_ip, netmask; - const struct ip_nat_multi_range_compat *mr = targinfo; - struct ip_nat_range newrange; - - IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING - || hooknum == NF_IP_POST_ROUTING - || hooknum == NF_IP_LOCAL_OUT); - ct = ip_conntrack_get(*pskb, &ctinfo); - - netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip); - - if (hooknum == NF_IP_PRE_ROUTING || hooknum == NF_IP_LOCAL_OUT) - new_ip = (*pskb)->nh.iph->daddr & ~netmask; - else - new_ip = (*pskb)->nh.iph->saddr & ~netmask; - new_ip |= mr->range[0].min_ip & netmask; - - newrange = ((struct ip_nat_range) - { mr->range[0].flags | IP_NAT_RANGE_MAP_IPS, - new_ip, new_ip, - mr->range[0].min, mr->range[0].max }); - - /* Hand modified range to generic setup. */ - return ip_nat_setup_info(ct, &newrange, hooknum); -} - -static struct ipt_target target_module = { - .name = MODULENAME, - .target = target, - .checkentry = check, - .me = THIS_MODULE -}; - -static int __init init(void) -{ - return ipt_register_target(&target_module); -} - -static void __exit fini(void) -{ - ipt_unregister_target(&target_module); -} - -module_init(init); -module_exit(fini); diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c deleted file mode 100644 index 140be51f2f0..00000000000 --- a/net/ipv4/netfilter/ipt_REDIRECT.c +++ /dev/null @@ -1,133 +0,0 @@ -/* Redirect. Simple mapping which alters dst to a local IP address. */ -/* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/types.h> -#include <linux/ip.h> -#include <linux/timer.h> -#include <linux/module.h> -#include <linux/netfilter.h> -#include <linux/netdevice.h> -#include <linux/if.h> -#include <linux/inetdevice.h> -#include <net/protocol.h> -#include <net/checksum.h> -#include <linux/netfilter_ipv4.h> -#include <linux/netfilter_ipv4/ip_nat_rule.h> - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); -MODULE_DESCRIPTION("iptables REDIRECT target module"); - -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - -/* FIXME: Take multiple ranges --RR */ -static int -redirect_check(const char *tablename, - const void *e, - void *targinfo, - unsigned int targinfosize, - unsigned int hook_mask) -{ - const struct ip_nat_multi_range_compat *mr = targinfo; - - if (strcmp(tablename, "nat") != 0) { - DEBUGP("redirect_check: bad table `%s'.\n", table); - return 0; - } - if (targinfosize != IPT_ALIGN(sizeof(*mr))) { - DEBUGP("redirect_check: size %u.\n", targinfosize); - return 0; - } - if (hook_mask & ~((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_OUT))) { - DEBUGP("redirect_check: bad hooks %x.\n", hook_mask); - return 0; - } - if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) { - DEBUGP("redirect_check: bad MAP_IPS.\n"); - return 0; - } - if (mr->rangesize != 1) { - DEBUGP("redirect_check: bad rangesize %u.\n", mr->rangesize); - return 0; - } - return 1; -} - -static unsigned int -redirect_target(struct sk_buff **pskb, - const struct net_device *in, - const struct net_device *out, - unsigned int hooknum, - const void *targinfo, - void *userinfo) -{ - struct ip_conntrack *ct; - enum ip_conntrack_info ctinfo; - u_int32_t newdst; - const struct ip_nat_multi_range_compat *mr = targinfo; - struct ip_nat_range newrange; - - IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING - || hooknum == NF_IP_LOCAL_OUT); - - ct = ip_conntrack_get(*pskb, &ctinfo); - IP_NF_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)); - - /* Local packets: make them go to loopback */ - if (hooknum == NF_IP_LOCAL_OUT) - newdst = htonl(0x7F000001); - else { - struct in_device *indev; - struct in_ifaddr *ifa; - - newdst = 0; - - rcu_read_lock(); - indev = __in_dev_get_rcu((*pskb)->dev); - if (indev && (ifa = indev->ifa_list)) - newdst = ifa->ifa_local; - rcu_read_unlock(); - - if (!newdst) - return NF_DROP; - } - - /* Transfer from original range. */ - newrange = ((struct ip_nat_range) - { mr->range[0].flags | IP_NAT_RANGE_MAP_IPS, - newdst, newdst, - mr->range[0].min, mr->range[0].max }); - - /* Hand modified range to generic setup. */ - return ip_nat_setup_info(ct, &newrange, hooknum); -} - -static struct ipt_target redirect_reg = { - .name = "REDIRECT", - .target = redirect_target, - .checkentry = redirect_check, - .me = THIS_MODULE, -}; - -static int __init init(void) -{ - return ipt_register_target(&redirect_reg); -} - -static void __exit fini(void) -{ - ipt_unregister_target(&redirect_reg); -} - -module_init(init); -module_exit(fini); diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index 3eb47aae78c..5b6e0df4ccf 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -1,7 +1,5 @@ /* * This is a module which is used for rejecting packets. - * Added support for customized reject packets (Jozsef Kadlecsik). - * Added support for ICMP type-3-code-13 (Maciej Soltysiak). [RFC 1812] */ /* (C) 1999-2001 Paul `Rusty' Russell @@ -11,268 +9,56 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ - -#include <linux/config.h> +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> #include <linux/skbuff.h> +#include <linux/slab.h> #include <linux/ip.h> #include <linux/udp.h> #include <linux/icmp.h> #include <net/icmp.h> -#include <net/ip.h> -#include <net/tcp.h> -#include <net/route.h> -#include <net/dst.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv4/ip_tables.h> #include <linux/netfilter_ipv4/ipt_REJECT.h> #ifdef CONFIG_BRIDGE_NETFILTER #include <linux/netfilter_bridge.h> #endif +#include <net/netfilter/ipv4/nf_reject.h> + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); -MODULE_DESCRIPTION("iptables REJECT target module"); - -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - -static inline struct rtable *route_reverse(struct sk_buff *skb, - struct tcphdr *tcph, int hook) -{ - struct iphdr *iph = skb->nh.iph; - struct dst_entry *odst; - struct flowi fl = {}; - struct rtable *rt; - - /* We don't require ip forwarding to be enabled to be able to - * send a RST reply for bridged traffic. */ - if (hook != NF_IP_FORWARD -#ifdef CONFIG_BRIDGE_NETFILTER - || (skb->nf_bridge && skb->nf_bridge->mask & BRNF_BRIDGED) -#endif - ) { - fl.nl_u.ip4_u.daddr = iph->saddr; - if (hook == NF_IP_LOCAL_IN) - fl.nl_u.ip4_u.saddr = iph->daddr; - fl.nl_u.ip4_u.tos = RT_TOS(iph->tos); - - if (ip_route_output_key(&rt, &fl) != 0) - return NULL; - } else { - /* non-local src, find valid iif to satisfy - * rp-filter when calling ip_route_input. */ - fl.nl_u.ip4_u.daddr = iph->daddr; - if (ip_route_output_key(&rt, &fl) != 0) - return NULL; - - odst = skb->dst; - if (ip_route_input(skb, iph->saddr, iph->daddr, - RT_TOS(iph->tos), rt->u.dst.dev) != 0) { - dst_release(&rt->u.dst); - return NULL; - } - dst_release(&rt->u.dst); - rt = (struct rtable *)skb->dst; - skb->dst = odst; - - fl.nl_u.ip4_u.daddr = iph->saddr; - fl.nl_u.ip4_u.saddr = iph->daddr; - fl.nl_u.ip4_u.tos = RT_TOS(iph->tos); - } - - if (rt->u.dst.error) { - dst_release(&rt->u.dst); - return NULL; - } +MODULE_DESCRIPTION("Xtables: packet \"rejection\" target for IPv4"); - fl.proto = IPPROTO_TCP; - fl.fl_ip_sport = tcph->dest; - fl.fl_ip_dport = tcph->source; - - xfrm_lookup((struct dst_entry **)&rt, &fl, NULL, 0); - - return rt; -} - -/* Send RST reply */ -static void send_reset(struct sk_buff *oldskb, int hook) +static unsigned int +reject_tg(struct sk_buff *skb, const struct xt_action_param *par) { - struct sk_buff *nskb; - struct iphdr *iph = oldskb->nh.iph; - struct tcphdr _otcph, *oth, *tcph; - struct rtable *rt; - u_int16_t tmp_port; - u_int32_t tmp_addr; - unsigned int tcplen; - int needs_ack; - int hh_len; - - /* IP header checks: fragment. */ - if (oldskb->nh.iph->frag_off & htons(IP_OFFSET)) - return; - - oth = skb_header_pointer(oldskb, oldskb->nh.iph->ihl * 4, - sizeof(_otcph), &_otcph); - if (oth == NULL) - return; - - /* No RST for RST. */ - if (oth->rst) - return; - - /* Check checksum */ - tcplen = oldskb->len - iph->ihl * 4; - if (((hook != NF_IP_LOCAL_IN && oldskb->ip_summed != CHECKSUM_HW) || - (hook == NF_IP_LOCAL_IN && - oldskb->ip_summed != CHECKSUM_UNNECESSARY)) && - csum_tcpudp_magic(iph->saddr, iph->daddr, tcplen, IPPROTO_TCP, - oldskb->ip_summed == CHECKSUM_HW ? oldskb->csum : - skb_checksum(oldskb, iph->ihl * 4, tcplen, 0))) - return; - - if ((rt = route_reverse(oldskb, oth, hook)) == NULL) - return; - - hh_len = LL_RESERVED_SPACE(rt->u.dst.dev); - - /* We need a linear, writeable skb. We also need to expand - headroom in case hh_len of incoming interface < hh_len of - outgoing interface */ - nskb = skb_copy_expand(oldskb, hh_len, skb_tailroom(oldskb), - GFP_ATOMIC); - if (!nskb) { - dst_release(&rt->u.dst); - return; - } - - dst_release(nskb->dst); - nskb->dst = &rt->u.dst; - - /* This packet will not be the same as the other: clear nf fields */ - nf_reset(nskb); - nskb->nfmark = 0; -#ifdef CONFIG_BRIDGE_NETFILTER - nf_bridge_put(nskb->nf_bridge); - nskb->nf_bridge = NULL; -#endif + const struct ipt_reject_info *reject = par->targinfo; - tcph = (struct tcphdr *)((u_int32_t*)nskb->nh.iph + nskb->nh.iph->ihl); - - /* Swap source and dest */ - tmp_addr = nskb->nh.iph->saddr; - nskb->nh.iph->saddr = nskb->nh.iph->daddr; - nskb->nh.iph->daddr = tmp_addr; - tmp_port = tcph->source; - tcph->source = tcph->dest; - tcph->dest = tmp_port; - - /* Truncate to length (no data) */ - tcph->doff = sizeof(struct tcphdr)/4; - skb_trim(nskb, nskb->nh.iph->ihl*4 + sizeof(struct tcphdr)); - nskb->nh.iph->tot_len = htons(nskb->len); - - if (tcph->ack) { - needs_ack = 0; - tcph->seq = oth->ack_seq; - tcph->ack_seq = 0; - } else { - needs_ack = 1; - tcph->ack_seq = htonl(ntohl(oth->seq) + oth->syn + oth->fin - + oldskb->len - oldskb->nh.iph->ihl*4 - - (oth->doff<<2)); - tcph->seq = 0; - } - - /* Reset flags */ - ((u_int8_t *)tcph)[13] = 0; - tcph->rst = 1; - tcph->ack = needs_ack; - - tcph->window = 0; - tcph->urg_ptr = 0; - - /* Adjust TCP checksum */ - tcph->check = 0; - tcph->check = tcp_v4_check(tcph, sizeof(struct tcphdr), - nskb->nh.iph->saddr, - nskb->nh.iph->daddr, - csum_partial((char *)tcph, - sizeof(struct tcphdr), 0)); - - /* Adjust IP TTL, DF */ - nskb->nh.iph->ttl = dst_metric(nskb->dst, RTAX_HOPLIMIT); - /* Set DF, id = 0 */ - nskb->nh.iph->frag_off = htons(IP_DF); - nskb->nh.iph->id = 0; - - /* Adjust IP checksum */ - nskb->nh.iph->check = 0; - nskb->nh.iph->check = ip_fast_csum((unsigned char *)nskb->nh.iph, - nskb->nh.iph->ihl); - - /* "Never happens" */ - if (nskb->len > dst_mtu(nskb->dst)) - goto free_nskb; - - nf_ct_attach(nskb, oldskb); - - NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, nskb, NULL, nskb->dst->dev, - dst_output); - return; - - free_nskb: - kfree_skb(nskb); -} - -static inline void send_unreach(struct sk_buff *skb_in, int code) -{ - icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0); -} - -static unsigned int reject(struct sk_buff **pskb, - const struct net_device *in, - const struct net_device *out, - unsigned int hooknum, - const void *targinfo, - void *userinfo) -{ - const struct ipt_reject_info *reject = targinfo; - - /* Our naive response construction doesn't deal with IP - options, and probably shouldn't try. */ - if ((*pskb)->nh.iph->ihl<<2 != sizeof(struct iphdr)) - return NF_DROP; - - /* WARNING: This code causes reentry within iptables. - This means that the iptables jump stack is now crap. We - must return an absolute verdict. --RR */ - switch (reject->with) { - case IPT_ICMP_NET_UNREACHABLE: - send_unreach(*pskb, ICMP_NET_UNREACH); - break; - case IPT_ICMP_HOST_UNREACHABLE: - send_unreach(*pskb, ICMP_HOST_UNREACH); - break; - case IPT_ICMP_PROT_UNREACHABLE: - send_unreach(*pskb, ICMP_PROT_UNREACH); - break; - case IPT_ICMP_PORT_UNREACHABLE: - send_unreach(*pskb, ICMP_PORT_UNREACH); - break; - case IPT_ICMP_NET_PROHIBITED: - send_unreach(*pskb, ICMP_NET_ANO); - break; + switch (reject->with) { + case IPT_ICMP_NET_UNREACHABLE: + nf_send_unreach(skb, ICMP_NET_UNREACH); + break; + case IPT_ICMP_HOST_UNREACHABLE: + nf_send_unreach(skb, ICMP_HOST_UNREACH); + break; + case IPT_ICMP_PROT_UNREACHABLE: + nf_send_unreach(skb, ICMP_PROT_UNREACH); + break; + case IPT_ICMP_PORT_UNREACHABLE: + nf_send_unreach(skb, ICMP_PORT_UNREACH); + break; + case IPT_ICMP_NET_PROHIBITED: + nf_send_unreach(skb, ICMP_NET_ANO); + break; case IPT_ICMP_HOST_PROHIBITED: - send_unreach(*pskb, ICMP_HOST_ANO); - break; - case IPT_ICMP_ADMIN_PROHIBITED: - send_unreach(*pskb, ICMP_PKT_FILTERED); + nf_send_unreach(skb, ICMP_HOST_ANO); + break; + case IPT_ICMP_ADMIN_PROHIBITED: + nf_send_unreach(skb, ICMP_PKT_FILTERED); break; case IPT_TCP_RESET: - send_reset(*pskb, hooknum); + nf_send_reset(skb, par->hooknum); case IPT_ICMP_ECHOREPLY: /* Doesn't happen. */ break; @@ -281,63 +67,46 @@ static unsigned int reject(struct sk_buff **pskb, return NF_DROP; } -static int check(const char *tablename, - const void *e_void, - void *targinfo, - unsigned int targinfosize, - unsigned int hook_mask) +static int reject_tg_check(const struct xt_tgchk_param *par) { - const struct ipt_reject_info *rejinfo = targinfo; - const struct ipt_entry *e = e_void; - - if (targinfosize != IPT_ALIGN(sizeof(struct ipt_reject_info))) { - DEBUGP("REJECT: targinfosize %u != 0\n", targinfosize); - return 0; - } - - /* Only allow these for packet filtering. */ - if (strcmp(tablename, "filter") != 0) { - DEBUGP("REJECT: bad table `%s'.\n", tablename); - return 0; - } - if ((hook_mask & ~((1 << NF_IP_LOCAL_IN) - | (1 << NF_IP_FORWARD) - | (1 << NF_IP_LOCAL_OUT))) != 0) { - DEBUGP("REJECT: bad hook mask %X\n", hook_mask); - return 0; - } + const struct ipt_reject_info *rejinfo = par->targinfo; + const struct ipt_entry *e = par->entryinfo; if (rejinfo->with == IPT_ICMP_ECHOREPLY) { - printk("REJECT: ECHOREPLY no longer supported.\n"); - return 0; + pr_info("ECHOREPLY no longer supported.\n"); + return -EINVAL; } else if (rejinfo->with == IPT_TCP_RESET) { /* Must specify that it's a TCP packet */ - if (e->ip.proto != IPPROTO_TCP - || (e->ip.invflags & IPT_INV_PROTO)) { - DEBUGP("REJECT: TCP_RESET invalid for non-tcp\n"); - return 0; + if (e->ip.proto != IPPROTO_TCP || + (e->ip.invflags & XT_INV_PROTO)) { + pr_info("TCP_RESET invalid for non-tcp\n"); + return -EINVAL; } } - - return 1; + return 0; } -static struct ipt_target ipt_reject_reg = { +static struct xt_target reject_tg_reg __read_mostly = { .name = "REJECT", - .target = reject, - .checkentry = check, + .family = NFPROTO_IPV4, + .target = reject_tg, + .targetsize = sizeof(struct ipt_reject_info), + .table = "filter", + .hooks = (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_FORWARD) | + (1 << NF_INET_LOCAL_OUT), + .checkentry = reject_tg_check, .me = THIS_MODULE, }; -static int __init init(void) +static int __init reject_tg_init(void) { - return ipt_register_target(&ipt_reject_reg); + return xt_register_target(&reject_tg_reg); } -static void __exit fini(void) +static void __exit reject_tg_exit(void) { - ipt_unregister_target(&ipt_reject_reg); + xt_unregister_target(&reject_tg_reg); } -module_init(init); -module_exit(fini); +module_init(reject_tg_init); +module_exit(reject_tg_exit); diff --git a/net/ipv4/netfilter/ipt_SAME.c b/net/ipv4/netfilter/ipt_SAME.c deleted file mode 100644 index a22de59bba0..00000000000 --- a/net/ipv4/netfilter/ipt_SAME.c +++ /dev/null @@ -1,211 +0,0 @@ -/* Same. Just like SNAT, only try to make the connections - * between client A and server B always have the same source ip. - * - * (C) 2000 Paul `Rusty' Russell - * (C) 2001 Martin Josefsson - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * 010320 Martin Josefsson <gandalf@wlug.westbo.se> - * * copied ipt_BALANCE.c to ipt_SAME.c and changed a few things. - * 010728 Martin Josefsson <gandalf@wlug.westbo.se> - * * added --nodst to not include destination-ip in new source - * calculations. - * * added some more sanity-checks. - * 010729 Martin Josefsson <gandalf@wlug.westbo.se> - * * fixed a buggy if-statement in same_check(), should have - * used ntohl() but didn't. - * * added support for multiple ranges. IPT_SAME_MAX_RANGE is - * defined in linux/include/linux/netfilter_ipv4/ipt_SAME.h - * and is currently set to 10. - * * added support for 1-address range, nice to have now that - * we have multiple ranges. - */ -#include <linux/types.h> -#include <linux/ip.h> -#include <linux/timer.h> -#include <linux/module.h> -#include <linux/netfilter.h> -#include <linux/netdevice.h> -#include <linux/if.h> -#include <linux/inetdevice.h> -#include <net/protocol.h> -#include <net/checksum.h> -#include <linux/netfilter_ipv4.h> -#include <linux/netfilter_ipv4/ip_nat_rule.h> -#include <linux/netfilter_ipv4/ipt_SAME.h> - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Martin Josefsson <gandalf@wlug.westbo.se>"); -MODULE_DESCRIPTION("iptables special SNAT module for consistent sourceip"); - -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - -static int -same_check(const char *tablename, - const void *e, - void *targinfo, - unsigned int targinfosize, - unsigned int hook_mask) -{ - unsigned int count, countess, rangeip, index = 0; - struct ipt_same_info *mr = targinfo; - - mr->ipnum = 0; - - if (strcmp(tablename, "nat") != 0) { - DEBUGP("same_check: bad table `%s'.\n", tablename); - return 0; - } - if (targinfosize != IPT_ALIGN(sizeof(*mr))) { - DEBUGP("same_check: size %u.\n", targinfosize); - return 0; - } - if (hook_mask & ~(1 << NF_IP_PRE_ROUTING | 1 << NF_IP_POST_ROUTING)) { - DEBUGP("same_check: bad hooks %x.\n", hook_mask); - return 0; - } - if (mr->rangesize < 1) { - DEBUGP("same_check: need at least one dest range.\n"); - return 0; - } - if (mr->rangesize > IPT_SAME_MAX_RANGE) { - DEBUGP("same_check: too many ranges specified, maximum " - "is %u ranges\n", - IPT_SAME_MAX_RANGE); - return 0; - } - for (count = 0; count < mr->rangesize; count++) { - if (ntohl(mr->range[count].min_ip) > - ntohl(mr->range[count].max_ip)) { - DEBUGP("same_check: min_ip is larger than max_ip in " - "range `%u.%u.%u.%u-%u.%u.%u.%u'.\n", - NIPQUAD(mr->range[count].min_ip), - NIPQUAD(mr->range[count].max_ip)); - return 0; - } - if (!(mr->range[count].flags & IP_NAT_RANGE_MAP_IPS)) { - DEBUGP("same_check: bad MAP_IPS.\n"); - return 0; - } - rangeip = (ntohl(mr->range[count].max_ip) - - ntohl(mr->range[count].min_ip) + 1); - mr->ipnum += rangeip; - - DEBUGP("same_check: range %u, ipnum = %u\n", count, rangeip); - } - DEBUGP("same_check: total ipaddresses = %u\n", mr->ipnum); - - mr->iparray = kmalloc((sizeof(u_int32_t) * mr->ipnum), GFP_KERNEL); - if (!mr->iparray) { - DEBUGP("same_check: Couldn't allocate %u bytes " - "for %u ipaddresses!\n", - (sizeof(u_int32_t) * mr->ipnum), mr->ipnum); - return 0; - } - DEBUGP("same_check: Allocated %u bytes for %u ipaddresses.\n", - (sizeof(u_int32_t) * mr->ipnum), mr->ipnum); - - for (count = 0; count < mr->rangesize; count++) { - for (countess = ntohl(mr->range[count].min_ip); - countess <= ntohl(mr->range[count].max_ip); - countess++) { - mr->iparray[index] = countess; - DEBUGP("same_check: Added ipaddress `%u.%u.%u.%u' " - "in index %u.\n", - HIPQUAD(countess), index); - index++; - } - } - return 1; -} - -static void -same_destroy(void *targinfo, - unsigned int targinfosize) -{ - struct ipt_same_info *mr = targinfo; - - kfree(mr->iparray); - - DEBUGP("same_destroy: Deallocated %u bytes for %u ipaddresses.\n", - (sizeof(u_int32_t) * mr->ipnum), mr->ipnum); -} - -static unsigned int -same_target(struct sk_buff **pskb, - const struct net_device *in, - const struct net_device *out, - unsigned int hooknum, - const void *targinfo, - void *userinfo) -{ - struct ip_conntrack *ct; - enum ip_conntrack_info ctinfo; - u_int32_t tmpip, aindex, new_ip; - const struct ipt_same_info *same = targinfo; - struct ip_nat_range newrange; - const struct ip_conntrack_tuple *t; - - IP_NF_ASSERT(hooknum == NF_IP_PRE_ROUTING || - hooknum == NF_IP_POST_ROUTING); - ct = ip_conntrack_get(*pskb, &ctinfo); - - t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; - - /* Base new source on real src ip and optionally dst ip, - giving some hope for consistency across reboots. - Here we calculate the index in same->iparray which - holds the ipaddress we should use */ - - tmpip = ntohl(t->src.ip); - - if (!(same->info & IPT_SAME_NODST)) - tmpip += ntohl(t->dst.ip); - - aindex = tmpip % same->ipnum; - - new_ip = htonl(same->iparray[aindex]); - - DEBUGP("ipt_SAME: src=%u.%u.%u.%u dst=%u.%u.%u.%u, " - "new src=%u.%u.%u.%u\n", - NIPQUAD(t->src.ip), NIPQUAD(t->dst.ip), - NIPQUAD(new_ip)); - - /* Transfer from original range. */ - newrange = ((struct ip_nat_range) - { same->range[0].flags, new_ip, new_ip, - /* FIXME: Use ports from correct range! */ - same->range[0].min, same->range[0].max }); - - /* Hand modified range to generic setup. */ - return ip_nat_setup_info(ct, &newrange, hooknum); -} - -static struct ipt_target same_reg = { - .name = "SAME", - .target = same_target, - .checkentry = same_check, - .destroy = same_destroy, - .me = THIS_MODULE, -}; - -static int __init init(void) -{ - return ipt_register_target(&same_reg); -} - -static void __exit fini(void) -{ - ipt_unregister_target(&same_reg); -} - -module_init(init); -module_exit(fini); - diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c new file mode 100644 index 00000000000..a313c3fbeb4 --- /dev/null +++ b/net/ipv4/netfilter/ipt_SYNPROXY.c @@ -0,0 +1,482 @@ +/* + * Copyright (c) 2013 Patrick McHardy <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/skbuff.h> +#include <net/tcp.h> + +#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter/xt_SYNPROXY.h> +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_conntrack_seqadj.h> +#include <net/netfilter/nf_conntrack_synproxy.h> + +static struct iphdr * +synproxy_build_ip(struct sk_buff *skb, u32 saddr, u32 daddr) +{ + struct iphdr *iph; + + skb_reset_network_header(skb); + iph = (struct iphdr *)skb_put(skb, sizeof(*iph)); + iph->version = 4; + iph->ihl = sizeof(*iph) / 4; + iph->tos = 0; + iph->id = 0; + iph->frag_off = htons(IP_DF); + iph->ttl = sysctl_ip_default_ttl; + iph->protocol = IPPROTO_TCP; + iph->check = 0; + iph->saddr = saddr; + iph->daddr = daddr; + + return iph; +} + +static void +synproxy_send_tcp(const struct sk_buff *skb, struct sk_buff *nskb, + struct nf_conntrack *nfct, enum ip_conntrack_info ctinfo, + struct iphdr *niph, struct tcphdr *nth, + unsigned int tcp_hdr_size) +{ + nth->check = ~tcp_v4_check(tcp_hdr_size, niph->saddr, niph->daddr, 0); + nskb->ip_summed = CHECKSUM_PARTIAL; + nskb->csum_start = (unsigned char *)nth - nskb->head; + nskb->csum_offset = offsetof(struct tcphdr, check); + + skb_dst_set_noref(nskb, skb_dst(skb)); + nskb->protocol = htons(ETH_P_IP); + if (ip_route_me_harder(nskb, RTN_UNSPEC)) + goto free_nskb; + + if (nfct) { + nskb->nfct = nfct; + nskb->nfctinfo = ctinfo; + nf_conntrack_get(nfct); + } + + ip_local_out(nskb); + return; + +free_nskb: + kfree_skb(nskb); +} + +static void +synproxy_send_client_synack(const struct sk_buff *skb, const struct tcphdr *th, + const struct synproxy_options *opts) +{ + struct sk_buff *nskb; + struct iphdr *iph, *niph; + struct tcphdr *nth; + unsigned int tcp_hdr_size; + u16 mss = opts->mss; + + iph = ip_hdr(skb); + + tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts); + nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER, + GFP_ATOMIC); + if (nskb == NULL) + return; + skb_reserve(nskb, MAX_TCP_HEADER); + + niph = synproxy_build_ip(nskb, iph->daddr, iph->saddr); + + skb_reset_transport_header(nskb); + nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size); + nth->source = th->dest; + nth->dest = th->source; + nth->seq = htonl(__cookie_v4_init_sequence(iph, th, &mss)); + nth->ack_seq = htonl(ntohl(th->seq) + 1); + tcp_flag_word(nth) = TCP_FLAG_SYN | TCP_FLAG_ACK; + if (opts->options & XT_SYNPROXY_OPT_ECN) + tcp_flag_word(nth) |= TCP_FLAG_ECE; + nth->doff = tcp_hdr_size / 4; + nth->window = 0; + nth->check = 0; + nth->urg_ptr = 0; + + synproxy_build_options(nth, opts); + + synproxy_send_tcp(skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY, + niph, nth, tcp_hdr_size); +} + +static void +synproxy_send_server_syn(const struct synproxy_net *snet, + const struct sk_buff *skb, const struct tcphdr *th, + const struct synproxy_options *opts, u32 recv_seq) +{ + struct sk_buff *nskb; + struct iphdr *iph, *niph; + struct tcphdr *nth; + unsigned int tcp_hdr_size; + + iph = ip_hdr(skb); + + tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts); + nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER, + GFP_ATOMIC); + if (nskb == NULL) + return; + skb_reserve(nskb, MAX_TCP_HEADER); + + niph = synproxy_build_ip(nskb, iph->saddr, iph->daddr); + + skb_reset_transport_header(nskb); + nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size); + nth->source = th->source; + nth->dest = th->dest; + nth->seq = htonl(recv_seq - 1); + /* ack_seq is used to relay our ISN to the synproxy hook to initialize + * sequence number translation once a connection tracking entry exists. + */ + nth->ack_seq = htonl(ntohl(th->ack_seq) - 1); + tcp_flag_word(nth) = TCP_FLAG_SYN; + if (opts->options & XT_SYNPROXY_OPT_ECN) + tcp_flag_word(nth) |= TCP_FLAG_ECE | TCP_FLAG_CWR; + nth->doff = tcp_hdr_size / 4; + nth->window = th->window; + nth->check = 0; + nth->urg_ptr = 0; + + synproxy_build_options(nth, opts); + + synproxy_send_tcp(skb, nskb, &snet->tmpl->ct_general, IP_CT_NEW, + niph, nth, tcp_hdr_size); +} + +static void +synproxy_send_server_ack(const struct synproxy_net *snet, + const struct ip_ct_tcp *state, + const struct sk_buff *skb, const struct tcphdr *th, + const struct synproxy_options *opts) +{ + struct sk_buff *nskb; + struct iphdr *iph, *niph; + struct tcphdr *nth; + unsigned int tcp_hdr_size; + + iph = ip_hdr(skb); + + tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts); + nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER, + GFP_ATOMIC); + if (nskb == NULL) + return; + skb_reserve(nskb, MAX_TCP_HEADER); + + niph = synproxy_build_ip(nskb, iph->daddr, iph->saddr); + + skb_reset_transport_header(nskb); + nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size); + nth->source = th->dest; + nth->dest = th->source; + nth->seq = htonl(ntohl(th->ack_seq)); + nth->ack_seq = htonl(ntohl(th->seq) + 1); + tcp_flag_word(nth) = TCP_FLAG_ACK; + nth->doff = tcp_hdr_size / 4; + nth->window = htons(state->seen[IP_CT_DIR_ORIGINAL].td_maxwin); + nth->check = 0; + nth->urg_ptr = 0; + + synproxy_build_options(nth, opts); + + synproxy_send_tcp(skb, nskb, NULL, 0, niph, nth, tcp_hdr_size); +} + +static void +synproxy_send_client_ack(const struct synproxy_net *snet, + const struct sk_buff *skb, const struct tcphdr *th, + const struct synproxy_options *opts) +{ + struct sk_buff *nskb; + struct iphdr *iph, *niph; + struct tcphdr *nth; + unsigned int tcp_hdr_size; + + iph = ip_hdr(skb); + + tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts); + nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER, + GFP_ATOMIC); + if (nskb == NULL) + return; + skb_reserve(nskb, MAX_TCP_HEADER); + + niph = synproxy_build_ip(nskb, iph->saddr, iph->daddr); + + skb_reset_transport_header(nskb); + nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size); + nth->source = th->source; + nth->dest = th->dest; + nth->seq = htonl(ntohl(th->seq) + 1); + nth->ack_seq = th->ack_seq; + tcp_flag_word(nth) = TCP_FLAG_ACK; + nth->doff = tcp_hdr_size / 4; + nth->window = ntohs(htons(th->window) >> opts->wscale); + nth->check = 0; + nth->urg_ptr = 0; + + synproxy_build_options(nth, opts); + + synproxy_send_tcp(skb, nskb, NULL, 0, niph, nth, tcp_hdr_size); +} + +static bool +synproxy_recv_client_ack(const struct synproxy_net *snet, + const struct sk_buff *skb, const struct tcphdr *th, + struct synproxy_options *opts, u32 recv_seq) +{ + int mss; + + mss = __cookie_v4_check(ip_hdr(skb), th, ntohl(th->ack_seq) - 1); + if (mss == 0) { + this_cpu_inc(snet->stats->cookie_invalid); + return false; + } + + this_cpu_inc(snet->stats->cookie_valid); + opts->mss = mss; + opts->options |= XT_SYNPROXY_OPT_MSS; + + if (opts->options & XT_SYNPROXY_OPT_TIMESTAMP) + synproxy_check_timestamp_cookie(opts); + + synproxy_send_server_syn(snet, skb, th, opts, recv_seq); + return true; +} + +static unsigned int +synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct xt_synproxy_info *info = par->targinfo; + struct synproxy_net *snet = synproxy_pernet(dev_net(par->in)); + struct synproxy_options opts = {}; + struct tcphdr *th, _th; + + if (nf_ip_checksum(skb, par->hooknum, par->thoff, IPPROTO_TCP)) + return NF_DROP; + + th = skb_header_pointer(skb, par->thoff, sizeof(_th), &_th); + if (th == NULL) + return NF_DROP; + + if (!synproxy_parse_options(skb, par->thoff, th, &opts)) + return NF_DROP; + + if (th->syn && !(th->ack || th->fin || th->rst)) { + /* Initial SYN from client */ + this_cpu_inc(snet->stats->syn_received); + + if (th->ece && th->cwr) + opts.options |= XT_SYNPROXY_OPT_ECN; + + opts.options &= info->options; + if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP) + synproxy_init_timestamp_cookie(info, &opts); + else + opts.options &= ~(XT_SYNPROXY_OPT_WSCALE | + XT_SYNPROXY_OPT_SACK_PERM | + XT_SYNPROXY_OPT_ECN); + + synproxy_send_client_synack(skb, th, &opts); + return NF_DROP; + + } else if (th->ack && !(th->fin || th->rst || th->syn)) { + /* ACK from client */ + synproxy_recv_client_ack(snet, skb, th, &opts, ntohl(th->seq)); + return NF_DROP; + } + + return XT_CONTINUE; +} + +static unsigned int ipv4_synproxy_hook(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct synproxy_net *snet = synproxy_pernet(dev_net(in ? : out)); + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; + struct nf_conn_synproxy *synproxy; + struct synproxy_options opts = {}; + const struct ip_ct_tcp *state; + struct tcphdr *th, _th; + unsigned int thoff; + + ct = nf_ct_get(skb, &ctinfo); + if (ct == NULL) + return NF_ACCEPT; + + synproxy = nfct_synproxy(ct); + if (synproxy == NULL) + return NF_ACCEPT; + + if (nf_is_loopback_packet(skb)) + return NF_ACCEPT; + + thoff = ip_hdrlen(skb); + th = skb_header_pointer(skb, thoff, sizeof(_th), &_th); + if (th == NULL) + return NF_DROP; + + state = &ct->proto.tcp; + switch (state->state) { + case TCP_CONNTRACK_CLOSE: + if (th->rst && !test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) { + nf_ct_seqadj_init(ct, ctinfo, synproxy->isn - + ntohl(th->seq) + 1); + break; + } + + if (!th->syn || th->ack || + CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) + break; + + /* Reopened connection - reset the sequence number and timestamp + * adjustments, they will get initialized once the connection is + * reestablished. + */ + nf_ct_seqadj_init(ct, ctinfo, 0); + synproxy->tsoff = 0; + this_cpu_inc(snet->stats->conn_reopened); + + /* fall through */ + case TCP_CONNTRACK_SYN_SENT: + if (!synproxy_parse_options(skb, thoff, th, &opts)) + return NF_DROP; + + if (!th->syn && th->ack && + CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) { + /* Keep-Alives are sent with SEG.SEQ = SND.NXT-1, + * therefore we need to add 1 to make the SYN sequence + * number match the one of first SYN. + */ + if (synproxy_recv_client_ack(snet, skb, th, &opts, + ntohl(th->seq) + 1)) + this_cpu_inc(snet->stats->cookie_retrans); + + return NF_DROP; + } + + synproxy->isn = ntohl(th->ack_seq); + if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP) + synproxy->its = opts.tsecr; + break; + case TCP_CONNTRACK_SYN_RECV: + if (!th->syn || !th->ack) + break; + + if (!synproxy_parse_options(skb, thoff, th, &opts)) + return NF_DROP; + + if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP) + synproxy->tsoff = opts.tsval - synproxy->its; + + opts.options &= ~(XT_SYNPROXY_OPT_MSS | + XT_SYNPROXY_OPT_WSCALE | + XT_SYNPROXY_OPT_SACK_PERM); + + swap(opts.tsval, opts.tsecr); + synproxy_send_server_ack(snet, state, skb, th, &opts); + + nf_ct_seqadj_init(ct, ctinfo, synproxy->isn - ntohl(th->seq)); + + swap(opts.tsval, opts.tsecr); + synproxy_send_client_ack(snet, skb, th, &opts); + + consume_skb(skb); + return NF_STOLEN; + default: + break; + } + + synproxy_tstamp_adjust(skb, thoff, th, ct, ctinfo, synproxy); + return NF_ACCEPT; +} + +static int synproxy_tg4_check(const struct xt_tgchk_param *par) +{ + const struct ipt_entry *e = par->entryinfo; + + if (e->ip.proto != IPPROTO_TCP || + e->ip.invflags & XT_INV_PROTO) + return -EINVAL; + + return nf_ct_l3proto_try_module_get(par->family); +} + +static void synproxy_tg4_destroy(const struct xt_tgdtor_param *par) +{ + nf_ct_l3proto_module_put(par->family); +} + +static struct xt_target synproxy_tg4_reg __read_mostly = { + .name = "SYNPROXY", + .family = NFPROTO_IPV4, + .hooks = (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_FORWARD), + .target = synproxy_tg4, + .targetsize = sizeof(struct xt_synproxy_info), + .checkentry = synproxy_tg4_check, + .destroy = synproxy_tg4_destroy, + .me = THIS_MODULE, +}; + +static struct nf_hook_ops ipv4_synproxy_ops[] __read_mostly = { + { + .hook = ipv4_synproxy_hook, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_LOCAL_IN, + .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1, + }, + { + .hook = ipv4_synproxy_hook, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_POST_ROUTING, + .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1, + }, +}; + +static int __init synproxy_tg4_init(void) +{ + int err; + + err = nf_register_hooks(ipv4_synproxy_ops, + ARRAY_SIZE(ipv4_synproxy_ops)); + if (err < 0) + goto err1; + + err = xt_register_target(&synproxy_tg4_reg); + if (err < 0) + goto err2; + + return 0; + +err2: + nf_unregister_hooks(ipv4_synproxy_ops, ARRAY_SIZE(ipv4_synproxy_ops)); +err1: + return err; +} + +static void __exit synproxy_tg4_exit(void) +{ + xt_unregister_target(&synproxy_tg4_reg); + nf_unregister_hooks(ipv4_synproxy_ops, ARRAY_SIZE(ipv4_synproxy_ops)); +} + +module_init(synproxy_tg4_init); +module_exit(synproxy_tg4_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); diff --git a/net/ipv4/netfilter/ipt_TCPMSS.c b/net/ipv4/netfilter/ipt_TCPMSS.c deleted file mode 100644 index c122841e182..00000000000 --- a/net/ipv4/netfilter/ipt_TCPMSS.c +++ /dev/null @@ -1,263 +0,0 @@ -/* - * This is a module which is used for setting the MSS option in TCP packets. - * - * Copyright (C) 2000 Marc Boucher <marc@mbsi.ca> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/module.h> -#include <linux/skbuff.h> - -#include <linux/ip.h> -#include <net/tcp.h> - -#include <linux/netfilter_ipv4/ip_tables.h> -#include <linux/netfilter_ipv4/ipt_TCPMSS.h> - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>"); -MODULE_DESCRIPTION("iptables TCP MSS modification module"); - -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - -static u_int16_t -cheat_check(u_int32_t oldvalinv, u_int32_t newval, u_int16_t oldcheck) -{ - u_int32_t diffs[] = { oldvalinv, newval }; - return csum_fold(csum_partial((char *)diffs, sizeof(diffs), - oldcheck^0xFFFF)); -} - -static inline unsigned int -optlen(const u_int8_t *opt, unsigned int offset) -{ - /* Beware zero-length options: make finite progress */ - if (opt[offset] <= TCPOPT_NOP || opt[offset+1] == 0) return 1; - else return opt[offset+1]; -} - -static unsigned int -ipt_tcpmss_target(struct sk_buff **pskb, - const struct net_device *in, - const struct net_device *out, - unsigned int hooknum, - const void *targinfo, - void *userinfo) -{ - const struct ipt_tcpmss_info *tcpmssinfo = targinfo; - struct tcphdr *tcph; - struct iphdr *iph; - u_int16_t tcplen, newtotlen, oldval, newmss; - unsigned int i; - u_int8_t *opt; - - if (!skb_make_writable(pskb, (*pskb)->len)) - return NF_DROP; - - if ((*pskb)->ip_summed == CHECKSUM_HW && - skb_checksum_help(*pskb, out == NULL)) - return NF_DROP; - - iph = (*pskb)->nh.iph; - tcplen = (*pskb)->len - iph->ihl*4; - - tcph = (void *)iph + iph->ihl*4; - - /* Since it passed flags test in tcp match, we know it is is - not a fragment, and has data >= tcp header length. SYN - packets should not contain data: if they did, then we risk - running over MTU, sending Frag Needed and breaking things - badly. --RR */ - if (tcplen != tcph->doff*4) { - if (net_ratelimit()) - printk(KERN_ERR - "ipt_tcpmss_target: bad length (%d bytes)\n", - (*pskb)->len); - return NF_DROP; - } - - if(tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU) { - if(!(*pskb)->dst) { - if (net_ratelimit()) - printk(KERN_ERR - "ipt_tcpmss_target: no dst?! can't determine path-MTU\n"); - return NF_DROP; /* or IPT_CONTINUE ?? */ - } - - if(dst_mtu((*pskb)->dst) <= (sizeof(struct iphdr) + sizeof(struct tcphdr))) { - if (net_ratelimit()) - printk(KERN_ERR - "ipt_tcpmss_target: unknown or invalid path-MTU (%d)\n", dst_mtu((*pskb)->dst)); - return NF_DROP; /* or IPT_CONTINUE ?? */ - } - - newmss = dst_mtu((*pskb)->dst) - sizeof(struct iphdr) - sizeof(struct tcphdr); - } else - newmss = tcpmssinfo->mss; - - opt = (u_int8_t *)tcph; - for (i = sizeof(struct tcphdr); i < tcph->doff*4; i += optlen(opt, i)){ - if ((opt[i] == TCPOPT_MSS) && - ((tcph->doff*4 - i) >= TCPOLEN_MSS) && - (opt[i+1] == TCPOLEN_MSS)) { - u_int16_t oldmss; - - oldmss = (opt[i+2] << 8) | opt[i+3]; - - if((tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU) && - (oldmss <= newmss)) - return IPT_CONTINUE; - - opt[i+2] = (newmss & 0xff00) >> 8; - opt[i+3] = (newmss & 0x00ff); - - tcph->check = cheat_check(htons(oldmss)^0xFFFF, - htons(newmss), - tcph->check); - - DEBUGP(KERN_INFO "ipt_tcpmss_target: %u.%u.%u.%u:%hu" - "->%u.%u.%u.%u:%hu changed TCP MSS option" - " (from %u to %u)\n", - NIPQUAD((*pskb)->nh.iph->saddr), - ntohs(tcph->source), - NIPQUAD((*pskb)->nh.iph->daddr), - ntohs(tcph->dest), - oldmss, newmss); - goto retmodified; - } - } - - /* - * MSS Option not found ?! add it.. - */ - if (skb_tailroom((*pskb)) < TCPOLEN_MSS) { - struct sk_buff *newskb; - - newskb = skb_copy_expand(*pskb, skb_headroom(*pskb), - TCPOLEN_MSS, GFP_ATOMIC); - if (!newskb) { - if (net_ratelimit()) - printk(KERN_ERR "ipt_tcpmss_target:" - " unable to allocate larger skb\n"); - return NF_DROP; - } - - kfree_skb(*pskb); - *pskb = newskb; - iph = (*pskb)->nh.iph; - tcph = (void *)iph + iph->ihl*4; - } - - skb_put((*pskb), TCPOLEN_MSS); - - opt = (u_int8_t *)tcph + sizeof(struct tcphdr); - memmove(opt + TCPOLEN_MSS, opt, tcplen - sizeof(struct tcphdr)); - - tcph->check = cheat_check(htons(tcplen) ^ 0xFFFF, - htons(tcplen + TCPOLEN_MSS), tcph->check); - tcplen += TCPOLEN_MSS; - - opt[0] = TCPOPT_MSS; - opt[1] = TCPOLEN_MSS; - opt[2] = (newmss & 0xff00) >> 8; - opt[3] = (newmss & 0x00ff); - - tcph->check = cheat_check(~0, *((u_int32_t *)opt), tcph->check); - - oldval = ((u_int16_t *)tcph)[6]; - tcph->doff += TCPOLEN_MSS/4; - tcph->check = cheat_check(oldval ^ 0xFFFF, - ((u_int16_t *)tcph)[6], tcph->check); - - newtotlen = htons(ntohs(iph->tot_len) + TCPOLEN_MSS); - iph->check = cheat_check(iph->tot_len ^ 0xFFFF, - newtotlen, iph->check); - iph->tot_len = newtotlen; - - DEBUGP(KERN_INFO "ipt_tcpmss_target: %u.%u.%u.%u:%hu" - "->%u.%u.%u.%u:%hu added TCP MSS option (%u)\n", - NIPQUAD((*pskb)->nh.iph->saddr), - ntohs(tcph->source), - NIPQUAD((*pskb)->nh.iph->daddr), - ntohs(tcph->dest), - newmss); - - retmodified: - return IPT_CONTINUE; -} - -#define TH_SYN 0x02 - -static inline int find_syn_match(const struct ipt_entry_match *m) -{ - const struct ipt_tcp *tcpinfo = (const struct ipt_tcp *)m->data; - - if (strcmp(m->u.kernel.match->name, "tcp") == 0 - && (tcpinfo->flg_cmp & TH_SYN) - && !(tcpinfo->invflags & IPT_TCP_INV_FLAGS)) - return 1; - - return 0; -} - -/* Must specify -p tcp --syn/--tcp-flags SYN */ -static int -ipt_tcpmss_checkentry(const char *tablename, - const void *e_void, - void *targinfo, - unsigned int targinfosize, - unsigned int hook_mask) -{ - const struct ipt_tcpmss_info *tcpmssinfo = targinfo; - const struct ipt_entry *e = e_void; - - if (targinfosize != IPT_ALIGN(sizeof(struct ipt_tcpmss_info))) { - DEBUGP("ipt_tcpmss_checkentry: targinfosize %u != %u\n", - targinfosize, IPT_ALIGN(sizeof(struct ipt_tcpmss_info))); - return 0; - } - - - if((tcpmssinfo->mss == IPT_TCPMSS_CLAMP_PMTU) && - ((hook_mask & ~((1 << NF_IP_FORWARD) - | (1 << NF_IP_LOCAL_OUT) - | (1 << NF_IP_POST_ROUTING))) != 0)) { - printk("TCPMSS: path-MTU clamping only supported in FORWARD, OUTPUT and POSTROUTING hooks\n"); - return 0; - } - - if (e->ip.proto == IPPROTO_TCP - && !(e->ip.invflags & IPT_INV_PROTO) - && IPT_MATCH_ITERATE(e, find_syn_match)) - return 1; - - printk("TCPMSS: Only works on TCP SYN packets\n"); - return 0; -} - -static struct ipt_target ipt_tcpmss_reg = { - .name = "TCPMSS", - .target = ipt_tcpmss_target, - .checkentry = ipt_tcpmss_checkentry, - .me = THIS_MODULE, -}; - -static int __init init(void) -{ - return ipt_register_target(&ipt_tcpmss_reg); -} - -static void __exit fini(void) -{ - ipt_unregister_target(&ipt_tcpmss_reg); -} - -module_init(init); -module_exit(fini); diff --git a/net/ipv4/netfilter/ipt_TOS.c b/net/ipv4/netfilter/ipt_TOS.c deleted file mode 100644 index 3a44a56db23..00000000000 --- a/net/ipv4/netfilter/ipt_TOS.c +++ /dev/null @@ -1,104 +0,0 @@ -/* This is a module which is used for setting the TOS field of a packet. */ - -/* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/module.h> -#include <linux/skbuff.h> -#include <linux/ip.h> -#include <net/checksum.h> - -#include <linux/netfilter_ipv4/ip_tables.h> -#include <linux/netfilter_ipv4/ipt_TOS.h> - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); -MODULE_DESCRIPTION("iptables TOS mangling module"); - -static unsigned int -target(struct sk_buff **pskb, - const struct net_device *in, - const struct net_device *out, - unsigned int hooknum, - const void *targinfo, - void *userinfo) -{ - const struct ipt_tos_target_info *tosinfo = targinfo; - - if (((*pskb)->nh.iph->tos & IPTOS_TOS_MASK) != tosinfo->tos) { - u_int16_t diffs[2]; - - if (!skb_make_writable(pskb, sizeof(struct iphdr))) - return NF_DROP; - - diffs[0] = htons((*pskb)->nh.iph->tos) ^ 0xFFFF; - (*pskb)->nh.iph->tos - = ((*pskb)->nh.iph->tos & IPTOS_PREC_MASK) - | tosinfo->tos; - diffs[1] = htons((*pskb)->nh.iph->tos); - (*pskb)->nh.iph->check - = csum_fold(csum_partial((char *)diffs, - sizeof(diffs), - (*pskb)->nh.iph->check - ^0xFFFF)); - } - return IPT_CONTINUE; -} - -static int -checkentry(const char *tablename, - const void *e_void, - void *targinfo, - unsigned int targinfosize, - unsigned int hook_mask) -{ - const u_int8_t tos = ((struct ipt_tos_target_info *)targinfo)->tos; - - if (targinfosize != IPT_ALIGN(sizeof(struct ipt_tos_target_info))) { - printk(KERN_WARNING "TOS: targinfosize %u != %Zu\n", - targinfosize, - IPT_ALIGN(sizeof(struct ipt_tos_target_info))); - return 0; - } - - if (strcmp(tablename, "mangle") != 0) { - printk(KERN_WARNING "TOS: can only be called from \"mangle\" table, not \"%s\"\n", tablename); - return 0; - } - - if (tos != IPTOS_LOWDELAY - && tos != IPTOS_THROUGHPUT - && tos != IPTOS_RELIABILITY - && tos != IPTOS_MINCOST - && tos != IPTOS_NORMALSVC) { - printk(KERN_WARNING "TOS: bad tos value %#x\n", tos); - return 0; - } - - return 1; -} - -static struct ipt_target ipt_tos_reg = { - .name = "TOS", - .target = target, - .checkentry = checkentry, - .me = THIS_MODULE, -}; - -static int __init init(void) -{ - return ipt_register_target(&ipt_tos_reg); -} - -static void __exit fini(void) -{ - ipt_unregister_target(&ipt_tos_reg); -} - -module_init(init); -module_exit(fini); diff --git a/net/ipv4/netfilter/ipt_TTL.c b/net/ipv4/netfilter/ipt_TTL.c deleted file mode 100644 index b769eb23197..00000000000 --- a/net/ipv4/netfilter/ipt_TTL.c +++ /dev/null @@ -1,119 +0,0 @@ -/* TTL modification target for IP tables - * (C) 2000,2005 by Harald Welte <laforge@netfilter.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - */ - -#include <linux/module.h> -#include <linux/skbuff.h> -#include <linux/ip.h> -#include <net/checksum.h> - -#include <linux/netfilter_ipv4/ip_tables.h> -#include <linux/netfilter_ipv4/ipt_TTL.h> - -MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); -MODULE_DESCRIPTION("IP tables TTL modification module"); -MODULE_LICENSE("GPL"); - -static unsigned int -ipt_ttl_target(struct sk_buff **pskb, const struct net_device *in, - const struct net_device *out, unsigned int hooknum, - const void *targinfo, void *userinfo) -{ - struct iphdr *iph; - const struct ipt_TTL_info *info = targinfo; - u_int16_t diffs[2]; - int new_ttl; - - if (!skb_make_writable(pskb, (*pskb)->len)) - return NF_DROP; - - iph = (*pskb)->nh.iph; - - switch (info->mode) { - case IPT_TTL_SET: - new_ttl = info->ttl; - break; - case IPT_TTL_INC: - new_ttl = iph->ttl + info->ttl; - if (new_ttl > 255) - new_ttl = 255; - break; - case IPT_TTL_DEC: - new_ttl = iph->ttl - info->ttl; - if (new_ttl < 0) - new_ttl = 0; - break; - default: - new_ttl = iph->ttl; - break; - } - - if (new_ttl != iph->ttl) { - diffs[0] = htons(((unsigned)iph->ttl) << 8) ^ 0xFFFF; - iph->ttl = new_ttl; - diffs[1] = htons(((unsigned)iph->ttl) << 8); - iph->check = csum_fold(csum_partial((char *)diffs, - sizeof(diffs), - iph->check^0xFFFF)); - } - - return IPT_CONTINUE; -} - -static int ipt_ttl_checkentry(const char *tablename, - const void *e, - void *targinfo, - unsigned int targinfosize, - unsigned int hook_mask) -{ - struct ipt_TTL_info *info = targinfo; - - if (targinfosize != IPT_ALIGN(sizeof(struct ipt_TTL_info))) { - printk(KERN_WARNING "ipt_TTL: targinfosize %u != %Zu\n", - targinfosize, - IPT_ALIGN(sizeof(struct ipt_TTL_info))); - return 0; - } - - if (strcmp(tablename, "mangle")) { - printk(KERN_WARNING "ipt_TTL: can only be called from " - "\"mangle\" table, not \"%s\"\n", tablename); - return 0; - } - - if (info->mode > IPT_TTL_MAXMODE) { - printk(KERN_WARNING "ipt_TTL: invalid or unknown Mode %u\n", - info->mode); - return 0; - } - - if ((info->mode != IPT_TTL_SET) && (info->ttl == 0)) - return 0; - - return 1; -} - -static struct ipt_target ipt_TTL = { - .name = "TTL", - .target = ipt_ttl_target, - .checkentry = ipt_ttl_checkentry, - .me = THIS_MODULE, -}; - -static int __init init(void) -{ - return ipt_register_target(&ipt_TTL); -} - -static void __exit fini(void) -{ - ipt_unregister_target(&ipt_TTL); -} - -module_init(init); -module_exit(fini); diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c index 641dbc47765..9cb993cd224 100644 --- a/net/ipv4/netfilter/ipt_ULOG.c +++ b/net/ipv4/netfilter/ipt_ULOG.c @@ -2,29 +2,16 @@ * netfilter module for userspace packet logging daemons * * (C) 2000-2004 by Harald Welte <laforge@netfilter.org> - * - * 2000/09/22 ulog-cprange feature added - * 2001/01/04 in-kernel queue as proposed by Sebastian Zander - * <zander@fokus.gmd.de> - * 2001/01/30 per-rule nlgroup conflicts with global queue. - * nlgroup now global (sysctl) - * 2001/04/19 ulog-queue reworked, now fixed buffer size specified at - * module loadtime -HW - * 2002/07/07 remove broken nflog_rcv() function -HW - * 2002/08/29 fix shifted/unshifted nlgroup bug -HW - * 2002/10/30 fix uninitialized mac_len field - <Anders K. Pedersen> - * 2004/10/25 fix erroneous calculation of 'len' parameter to NLMSG_PUT - * resulting in bogus 'error during NLMSG_PUT' messages. - * * (C) 1999-2001 Paul `Rusty' Russell * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> + * (C) 2005-2007 Patrick McHardy <kaber@trash.net> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. * - * This module accepts two parameters: - * + * This module accepts two parameters: + * * nlbufsiz: * The parameter specifies how big the buffer for each netlink multicast * group is. e.g. If you say nlbufsiz=8192, up to eight kb of packets will @@ -35,48 +22,44 @@ * each nlgroup you are using, so the total kernel memory usage increases * by that factor. * + * Actually you should use nlbufsiz a bit smaller than PAGE_SIZE, since + * nlbufsiz is used with alloc_skb, which adds another + * sizeof(struct skb_shared_info). Use NLMSG_GOODSIZE instead. + * * flushtimeout: * Specify, after how many hundredths of a second the queue should be * flushed even if it is not full yet. - * - * ipt_ULOG.c,v 1.22 2002/10/30 09:07:31 laforge Exp */ - +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/module.h> -#include <linux/config.h> #include <linux/spinlock.h> #include <linux/socket.h> +#include <linux/slab.h> #include <linux/skbuff.h> #include <linux/kernel.h> #include <linux/timer.h> -#include <linux/netlink.h> +#include <net/netlink.h> #include <linux/netdevice.h> #include <linux/mm.h> #include <linux/moduleparam.h> #include <linux/netfilter.h> -#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netfilter/x_tables.h> #include <linux/netfilter_ipv4/ipt_ULOG.h> +#include <net/netfilter/nf_log.h> +#include <net/netns/generic.h> #include <net/sock.h> #include <linux/bitops.h> +#include <asm/unaligned.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>"); -MODULE_DESCRIPTION("iptables userspace logging module"); +MODULE_DESCRIPTION("Xtables: packet logging to netlink using ULOG"); MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_NFLOG); #define ULOG_NL_EVENT 111 /* Harald's favorite number */ #define ULOG_MAXNLGROUPS 32 /* numer of nlgroups */ -#if 0 -#define DEBUGP(format, args...) printk("%s:%s:" format, \ - __FILE__, __FUNCTION__ , ## args) -#else -#define DEBUGP(format, args...) -#endif - -#define PRINTR(format, args...) do { if (net_ratelimit()) printk(format , ## args); } while (0) - -static unsigned int nlbufsiz = 4096; +static unsigned int nlbufsiz = NLMSG_GOODSIZE; module_param(nlbufsiz, uint, 0400); MODULE_PARM_DESC(nlbufsiz, "netlink buffer size"); @@ -84,7 +67,7 @@ static unsigned int flushtimeout = 10; module_param(flushtimeout, uint, 0600); MODULE_PARM_DESC(flushtimeout, "buffer flush timeout (hundredths of a second)"); -static int nflog = 1; +static bool nflog = true; module_param(nflog, bool, 0400); MODULE_PARM_DESC(nflog, "register as internal netfilter logging module"); @@ -97,19 +80,30 @@ typedef struct { struct timer_list timer; /* the timer function */ } ulog_buff_t; -static ulog_buff_t ulog_buffers[ULOG_MAXNLGROUPS]; /* array of buffers */ +static int ulog_net_id __read_mostly; +struct ulog_net { + unsigned int nlgroup[ULOG_MAXNLGROUPS]; + ulog_buff_t ulog_buffers[ULOG_MAXNLGROUPS]; + struct sock *nflognl; + spinlock_t lock; +}; -static struct sock *nflognl; /* our socket */ -static DEFINE_SPINLOCK(ulog_lock); /* spinlock */ +static struct ulog_net *ulog_pernet(struct net *net) +{ + return net_generic(net, ulog_net_id); +} /* send one ulog_buff_t to userspace */ -static void ulog_send(unsigned int nlgroupnum) +static void ulog_send(struct ulog_net *ulog, unsigned int nlgroupnum) { - ulog_buff_t *ub = &ulog_buffers[nlgroupnum]; + ulog_buff_t *ub = &ulog->ulog_buffers[nlgroupnum]; - if (timer_pending(&ub->timer)) { - DEBUGP("ipt_ULOG: ulog_send: timer was pending, deleting\n"); - del_timer(&ub->timer); + pr_debug("ulog_send: timer is deleting\n"); + del_timer(&ub->timer); + + if (!ub->skb) { + pr_debug("ulog_send: nothing to send\n"); + return; } /* last nlmsg needs NLMSG_DONE */ @@ -117,54 +111,60 @@ static void ulog_send(unsigned int nlgroupnum) ub->lastnlh->nlmsg_type = NLMSG_DONE; NETLINK_CB(ub->skb).dst_group = nlgroupnum + 1; - DEBUGP("ipt_ULOG: throwing %d packets to netlink group %u\n", - ub->qlen, nlgroupnum + 1); - netlink_broadcast(nflognl, ub->skb, 0, nlgroupnum + 1, GFP_ATOMIC); + pr_debug("throwing %d packets to netlink group %u\n", + ub->qlen, nlgroupnum + 1); + netlink_broadcast(ulog->nflognl, ub->skb, 0, nlgroupnum + 1, + GFP_ATOMIC); ub->qlen = 0; ub->skb = NULL; ub->lastnlh = NULL; - } /* timer function to flush queue in flushtimeout time */ static void ulog_timer(unsigned long data) { - DEBUGP("ipt_ULOG: timer function called, calling ulog_send\n"); + unsigned int groupnum = *((unsigned int *)data); + struct ulog_net *ulog = container_of((void *)data, + struct ulog_net, + nlgroup[groupnum]); + pr_debug("timer function called, calling ulog_send\n"); /* lock to protect against somebody modifying our structure * from ipt_ulog_target at the same time */ - spin_lock_bh(&ulog_lock); - ulog_send(data); - spin_unlock_bh(&ulog_lock); + spin_lock_bh(&ulog->lock); + ulog_send(ulog, groupnum); + spin_unlock_bh(&ulog->lock); } static struct sk_buff *ulog_alloc_skb(unsigned int size) { struct sk_buff *skb; + unsigned int n; /* alloc skb which should be big enough for a whole * multipart message. WARNING: has to be <= 131000 * due to slab allocator restrictions */ - skb = alloc_skb(nlbufsiz, GFP_ATOMIC); + n = max(size, nlbufsiz); + skb = alloc_skb(n, GFP_ATOMIC | __GFP_NOWARN); if (!skb) { - PRINTR("ipt_ULOG: can't alloc whole buffer %ub!\n", - nlbufsiz); - - /* try to allocate only as much as we need for - * current packet */ + if (n > size) { + /* try to allocate only as much as we need for + * current packet */ - skb = alloc_skb(size, GFP_ATOMIC); - if (!skb) - PRINTR("ipt_ULOG: can't even allocate %ub\n", size); + skb = alloc_skb(size, GFP_ATOMIC); + if (!skb) + pr_debug("cannot even allocate %ub\n", size); + } } return skb; } -static void ipt_ulog_packet(unsigned int hooknum, +static void ipt_ulog_packet(struct net *net, + unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, const struct net_device *out, @@ -175,6 +175,8 @@ static void ipt_ulog_packet(unsigned int hooknum, ulog_packet_msg_t *pm; size_t size, copy_len; struct nlmsghdr *nlh; + struct timeval tv; + struct ulog_net *ulog = ulog_pernet(net); /* ffs == find first bit set, necessary because userspace * is already shifting groupnumber, but we need unshifted. @@ -182,86 +184,83 @@ static void ipt_ulog_packet(unsigned int hooknum, unsigned int groupnum = ffs(loginfo->nl_group) - 1; /* calculate the size of the skb needed */ - if ((loginfo->copy_range == 0) || - (loginfo->copy_range > skb->len)) { + if (loginfo->copy_range == 0 || loginfo->copy_range > skb->len) copy_len = skb->len; - } else { + else copy_len = loginfo->copy_range; - } - size = NLMSG_SPACE(sizeof(*pm) + copy_len); + size = nlmsg_total_size(sizeof(*pm) + copy_len); + + ub = &ulog->ulog_buffers[groupnum]; - ub = &ulog_buffers[groupnum]; - - spin_lock_bh(&ulog_lock); + spin_lock_bh(&ulog->lock); if (!ub->skb) { if (!(ub->skb = ulog_alloc_skb(size))) goto alloc_failure; } else if (ub->qlen >= loginfo->qthreshold || size > skb_tailroom(ub->skb)) { - /* either the queue len is too high or we don't have + /* either the queue len is too high or we don't have * enough room in nlskb left. send it to userspace. */ - ulog_send(groupnum); + ulog_send(ulog, groupnum); if (!(ub->skb = ulog_alloc_skb(size))) goto alloc_failure; } - DEBUGP("ipt_ULOG: qlen %d, qthreshold %d\n", ub->qlen, - loginfo->qthreshold); + pr_debug("qlen %d, qthreshold %Zu\n", ub->qlen, loginfo->qthreshold); - /* NLMSG_PUT contains a hidden goto nlmsg_failure !!! */ - nlh = NLMSG_PUT(ub->skb, 0, ub->qlen, ULOG_NL_EVENT, - sizeof(*pm)+copy_len); + nlh = nlmsg_put(ub->skb, 0, ub->qlen, ULOG_NL_EVENT, + sizeof(*pm)+copy_len, 0); + if (!nlh) { + pr_debug("error during nlmsg_put\n"); + goto out_unlock; + } ub->qlen++; - pm = NLMSG_DATA(nlh); + pm = nlmsg_data(nlh); + memset(pm, 0, sizeof(*pm)); /* We might not have a timestamp, get one */ - if (skb->tstamp.off_sec == 0) + if (skb->tstamp.tv64 == 0) __net_timestamp((struct sk_buff *)skb); /* copy hook, prefix, timestamp, payload, etc. */ pm->data_len = copy_len; - pm->timestamp_sec = skb->tstamp.off_sec; - pm->timestamp_usec = skb->tstamp.off_usec; - pm->mark = skb->nfmark; + tv = ktime_to_timeval(skb->tstamp); + put_unaligned(tv.tv_sec, &pm->timestamp_sec); + put_unaligned(tv.tv_usec, &pm->timestamp_usec); + put_unaligned(skb->mark, &pm->mark); pm->hook = hooknum; - if (prefix != NULL) - strncpy(pm->prefix, prefix, sizeof(pm->prefix)); + if (prefix != NULL) { + strncpy(pm->prefix, prefix, sizeof(pm->prefix) - 1); + pm->prefix[sizeof(pm->prefix) - 1] = '\0'; + } else if (loginfo->prefix[0] != '\0') strncpy(pm->prefix, loginfo->prefix, sizeof(pm->prefix)); - else - *(pm->prefix) = '\0'; - if (in && in->hard_header_len > 0 - && skb->mac.raw != (void *) skb->nh.iph - && in->hard_header_len <= ULOG_MAC_LEN) { - memcpy(pm->mac, skb->mac.raw, in->hard_header_len); + if (in && in->hard_header_len > 0 && + skb->mac_header != skb->network_header && + in->hard_header_len <= ULOG_MAC_LEN) { + memcpy(pm->mac, skb_mac_header(skb), in->hard_header_len); pm->mac_len = in->hard_header_len; } else pm->mac_len = 0; if (in) strncpy(pm->indev_name, in->name, sizeof(pm->indev_name)); - else - pm->indev_name[0] = '\0'; if (out) strncpy(pm->outdev_name, out->name, sizeof(pm->outdev_name)); - else - pm->outdev_name[0] = '\0'; /* copy_len <= skb->len, so can't fail. */ if (skb_copy_bits(skb, 0, pm->payload, copy_len) < 0) BUG(); - + /* check if we are building multi-part messages */ - if (ub->qlen > 1) { + if (ub->qlen > 1) ub->lastnlh->nlmsg_flags |= NLM_F_MULTI; - } ub->lastnlh = nlh; @@ -275,36 +274,30 @@ static void ipt_ulog_packet(unsigned int hooknum, if (ub->qlen >= loginfo->qthreshold) { if (loginfo->qthreshold > 1) nlh->nlmsg_type = NLMSG_DONE; - ulog_send(groupnum); + ulog_send(ulog, groupnum); } - - spin_unlock_bh(&ulog_lock); +out_unlock: + spin_unlock_bh(&ulog->lock); return; -nlmsg_failure: - PRINTR("ipt_ULOG: error during NLMSG_PUT\n"); - alloc_failure: - PRINTR("ipt_ULOG: Error building netlink message\n"); - - spin_unlock_bh(&ulog_lock); + pr_debug("Error building netlink message\n"); + spin_unlock_bh(&ulog->lock); } -static unsigned int ipt_ulog_target(struct sk_buff **pskb, - const struct net_device *in, - const struct net_device *out, - unsigned int hooknum, - const void *targinfo, void *userinfo) +static unsigned int +ulog_tg(struct sk_buff *skb, const struct xt_action_param *par) { - struct ipt_ulog_info *loginfo = (struct ipt_ulog_info *) targinfo; + struct net *net = dev_net(par->in ? par->in : par->out); - ipt_ulog_packet(hooknum, *pskb, in, out, loginfo, NULL); - - return IPT_CONTINUE; + ipt_ulog_packet(net, par->hooknum, skb, par->in, par->out, + par->targinfo, NULL); + return XT_CONTINUE; } - -static void ipt_logfn(unsigned int pf, + +static void ipt_logfn(struct net *net, + u_int8_t pf, unsigned int hooknum, const struct sk_buff *skb, const struct net_device *in, @@ -326,110 +319,180 @@ static void ipt_logfn(unsigned int pf, strlcpy(loginfo.prefix, prefix, sizeof(loginfo.prefix)); } - ipt_ulog_packet(hooknum, skb, in, out, &loginfo, prefix); + ipt_ulog_packet(net, hooknum, skb, in, out, &loginfo, prefix); } -static int ipt_ulog_checkentry(const char *tablename, - const void *e, - void *targinfo, - unsigned int targinfosize, - unsigned int hookmask) +static int ulog_tg_check(const struct xt_tgchk_param *par) { - struct ipt_ulog_info *loginfo = (struct ipt_ulog_info *) targinfo; + const struct ipt_ulog_info *loginfo = par->targinfo; - if (targinfosize != IPT_ALIGN(sizeof(struct ipt_ulog_info))) { - DEBUGP("ipt_ULOG: targinfosize %u != 0\n", targinfosize); - return 0; + if (!par->net->xt.ulog_warn_deprecated) { + pr_info("ULOG is deprecated and it will be removed soon, " + "use NFLOG instead\n"); + par->net->xt.ulog_warn_deprecated = true; } if (loginfo->prefix[sizeof(loginfo->prefix) - 1] != '\0') { - DEBUGP("ipt_ULOG: prefix term %i\n", - loginfo->prefix[sizeof(loginfo->prefix) - 1]); - return 0; + pr_debug("prefix not null-terminated\n"); + return -EINVAL; } - if (loginfo->qthreshold > ULOG_MAX_QLEN) { - DEBUGP("ipt_ULOG: queue threshold %i > MAX_QLEN\n", - loginfo->qthreshold); - return 0; + pr_debug("queue threshold %Zu > MAX_QLEN\n", + loginfo->qthreshold); + return -EINVAL; } + return 0; +} - return 1; +#ifdef CONFIG_COMPAT +struct compat_ipt_ulog_info { + compat_uint_t nl_group; + compat_size_t copy_range; + compat_size_t qthreshold; + char prefix[ULOG_PREFIX_LEN]; +}; + +static void ulog_tg_compat_from_user(void *dst, const void *src) +{ + const struct compat_ipt_ulog_info *cl = src; + struct ipt_ulog_info l = { + .nl_group = cl->nl_group, + .copy_range = cl->copy_range, + .qthreshold = cl->qthreshold, + }; + + memcpy(l.prefix, cl->prefix, sizeof(l.prefix)); + memcpy(dst, &l, sizeof(l)); +} + +static int ulog_tg_compat_to_user(void __user *dst, const void *src) +{ + const struct ipt_ulog_info *l = src; + struct compat_ipt_ulog_info cl = { + .nl_group = l->nl_group, + .copy_range = l->copy_range, + .qthreshold = l->qthreshold, + }; + + memcpy(cl.prefix, l->prefix, sizeof(cl.prefix)); + return copy_to_user(dst, &cl, sizeof(cl)) ? -EFAULT : 0; } +#endif /* CONFIG_COMPAT */ -static struct ipt_target ipt_ulog_reg = { +static struct xt_target ulog_tg_reg __read_mostly = { .name = "ULOG", - .target = ipt_ulog_target, - .checkentry = ipt_ulog_checkentry, + .family = NFPROTO_IPV4, + .target = ulog_tg, + .targetsize = sizeof(struct ipt_ulog_info), + .checkentry = ulog_tg_check, +#ifdef CONFIG_COMPAT + .compatsize = sizeof(struct compat_ipt_ulog_info), + .compat_from_user = ulog_tg_compat_from_user, + .compat_to_user = ulog_tg_compat_to_user, +#endif .me = THIS_MODULE, }; -static struct nf_logger ipt_ulog_logger = { +static struct nf_logger ipt_ulog_logger __read_mostly = { .name = "ipt_ULOG", - .logfn = &ipt_logfn, + .logfn = ipt_logfn, .me = THIS_MODULE, }; -static int __init init(void) +static int __net_init ulog_tg_net_init(struct net *net) { int i; + struct ulog_net *ulog = ulog_pernet(net); + struct netlink_kernel_cfg cfg = { + .groups = ULOG_MAXNLGROUPS, + }; - DEBUGP("ipt_ULOG: init module\n"); - - if (nlbufsiz > 128*1024) { - printk("Netlink buffer has to be <= 128kB\n"); - return -EINVAL; - } - + spin_lock_init(&ulog->lock); /* initialize ulog_buffers */ for (i = 0; i < ULOG_MAXNLGROUPS; i++) { - init_timer(&ulog_buffers[i].timer); - ulog_buffers[i].timer.function = ulog_timer; - ulog_buffers[i].timer.data = i; + ulog->nlgroup[i] = i; + setup_timer(&ulog->ulog_buffers[i].timer, ulog_timer, + (unsigned long)&ulog->nlgroup[i]); } - nflognl = netlink_kernel_create(NETLINK_NFLOG, ULOG_MAXNLGROUPS, NULL, - THIS_MODULE); - if (!nflognl) + ulog->nflognl = netlink_kernel_create(net, NETLINK_NFLOG, &cfg); + if (!ulog->nflognl) return -ENOMEM; - if (ipt_register_target(&ipt_ulog_reg) != 0) { - sock_release(nflognl->sk_socket); - return -EINVAL; - } if (nflog) - nf_log_register(PF_INET, &ipt_ulog_logger); - + nf_log_set(net, NFPROTO_IPV4, &ipt_ulog_logger); + return 0; } -static void __exit fini(void) +static void __net_exit ulog_tg_net_exit(struct net *net) { ulog_buff_t *ub; int i; - - DEBUGP("ipt_ULOG: cleanup_module\n"); + struct ulog_net *ulog = ulog_pernet(net); if (nflog) - nf_log_unregister_logger(&ipt_ulog_logger); - ipt_unregister_target(&ipt_ulog_reg); - sock_release(nflognl->sk_socket); + nf_log_unset(net, &ipt_ulog_logger); + + netlink_kernel_release(ulog->nflognl); /* remove pending timers and free allocated skb's */ for (i = 0; i < ULOG_MAXNLGROUPS; i++) { - ub = &ulog_buffers[i]; - if (timer_pending(&ub->timer)) { - DEBUGP("timer was pending, deleting\n"); - del_timer(&ub->timer); - } + ub = &ulog->ulog_buffers[i]; + pr_debug("timer is deleting\n"); + del_timer(&ub->timer); if (ub->skb) { kfree_skb(ub->skb); ub->skb = NULL; } } +} + +static struct pernet_operations ulog_tg_net_ops = { + .init = ulog_tg_net_init, + .exit = ulog_tg_net_exit, + .id = &ulog_net_id, + .size = sizeof(struct ulog_net), +}; + +static int __init ulog_tg_init(void) +{ + int ret; + pr_debug("init module\n"); + + if (nlbufsiz > 128*1024) { + pr_warn("Netlink buffer has to be <= 128kB\n"); + return -EINVAL; + } + + ret = register_pernet_subsys(&ulog_tg_net_ops); + if (ret) + goto out_pernet; + ret = xt_register_target(&ulog_tg_reg); + if (ret < 0) + goto out_target; + + if (nflog) + nf_log_register(NFPROTO_IPV4, &ipt_ulog_logger); + + return 0; + +out_target: + unregister_pernet_subsys(&ulog_tg_net_ops); +out_pernet: + return ret; +} + +static void __exit ulog_tg_exit(void) +{ + pr_debug("cleanup_module\n"); + if (nflog) + nf_log_unregister(&ipt_ulog_logger); + xt_unregister_target(&ulog_tg_reg); + unregister_pernet_subsys(&ulog_tg_net_ops); } -module_init(init); -module_exit(fini); +module_init(ulog_tg_init); +module_exit(ulog_tg_exit); diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c deleted file mode 100644 index d6b83a97651..00000000000 --- a/net/ipv4/netfilter/ipt_addrtype.c +++ /dev/null @@ -1,77 +0,0 @@ -/* - * iptables module to match inet_addr_type() of an ip. - * - * Copyright (c) 2004 Patrick McHardy <kaber@trash.net> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/skbuff.h> -#include <linux/netdevice.h> -#include <linux/ip.h> -#include <net/route.h> - -#include <linux/netfilter_ipv4/ipt_addrtype.h> -#include <linux/netfilter_ipv4/ip_tables.h> - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); -MODULE_DESCRIPTION("iptables addrtype match"); - -static inline int match_type(u_int32_t addr, u_int16_t mask) -{ - return !!(mask & (1 << inet_addr_type(addr))); -} - -static int match(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const void *matchinfo, - int offset, unsigned int protoff, int *hotdrop) -{ - const struct ipt_addrtype_info *info = matchinfo; - const struct iphdr *iph = skb->nh.iph; - int ret = 1; - - if (info->source) - ret &= match_type(iph->saddr, info->source)^info->invert_source; - if (info->dest) - ret &= match_type(iph->daddr, info->dest)^info->invert_dest; - - return ret; -} - -static int checkentry(const char *tablename, const void *ip, - void *matchinfo, unsigned int matchsize, - unsigned int hook_mask) -{ - if (matchsize != IPT_ALIGN(sizeof(struct ipt_addrtype_info))) { - printk(KERN_ERR "ipt_addrtype: invalid size (%u != %Zu)\n", - matchsize, IPT_ALIGN(sizeof(struct ipt_addrtype_info))); - return 0; - } - - return 1; -} - -static struct ipt_match addrtype_match = { - .name = "addrtype", - .match = match, - .checkentry = checkentry, - .me = THIS_MODULE -}; - -static int __init init(void) -{ - return ipt_register_match(&addrtype_match); -} - -static void __exit fini(void) -{ - ipt_unregister_match(&addrtype_match); -} - -module_init(init); -module_exit(fini); diff --git a/net/ipv4/netfilter/ipt_ah.c b/net/ipv4/netfilter/ipt_ah.c index 144adfec13c..14a2aa8b8a1 100644 --- a/net/ipv4/netfilter/ipt_ah.c +++ b/net/ipv4/netfilter/ipt_ah.c @@ -5,60 +5,48 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ - +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/in.h> #include <linux/module.h> #include <linux/skbuff.h> #include <linux/ip.h> #include <linux/netfilter_ipv4/ipt_ah.h> -#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/netfilter/x_tables.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Yon Uriarte <yon@astaro.de>"); -MODULE_DESCRIPTION("iptables AH SPI match module"); - -#ifdef DEBUG_CONNTRACK -#define duprintf(format, args...) printk(format , ## args) -#else -#define duprintf(format, args...) -#endif +MODULE_DESCRIPTION("Xtables: IPv4 IPsec-AH SPI match"); /* Returns 1 if the spi is matched by the range, 0 otherwise */ -static inline int -spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, int invert) +static inline bool +spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, bool invert) { - int r=0; - duprintf("ah spi_match:%c 0x%x <= 0x%x <= 0x%x",invert? '!':' ', - min,spi,max); + bool r; + pr_debug("spi_match:%c 0x%x <= 0x%x <= 0x%x\n", + invert ? '!' : ' ', min, spi, max); r=(spi >= min && spi <= max) ^ invert; - duprintf(" result %s\n",r? "PASS" : "FAILED"); + pr_debug(" result %s\n", r ? "PASS" : "FAILED"); return r; } -static int -match(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const void *matchinfo, - int offset, - unsigned int protoff, - int *hotdrop) +static bool ah_mt(const struct sk_buff *skb, struct xt_action_param *par) { - struct ip_auth_hdr _ahdr, *ah; - const struct ipt_ah *ahinfo = matchinfo; + struct ip_auth_hdr _ahdr; + const struct ip_auth_hdr *ah; + const struct ipt_ah *ahinfo = par->matchinfo; /* Must not be a fragment. */ - if (offset) - return 0; + if (par->fragoff != 0) + return false; - ah = skb_header_pointer(skb, protoff, - sizeof(_ahdr), &_ahdr); + ah = skb_header_pointer(skb, par->thoff, sizeof(_ahdr), &_ahdr); if (ah == NULL) { /* We've been asked to examine this packet, and we * can't. Hence, no choice but to drop. */ - duprintf("Dropping evil AH tinygram.\n"); - *hotdrop = 1; + pr_debug("Dropping evil AH tinygram.\n"); + par->hotdrop = true; return 0; } @@ -67,53 +55,37 @@ match(const struct sk_buff *skb, !!(ahinfo->invflags & IPT_AH_INV_SPI)); } -/* Called when user tries to insert an entry of this type. */ -static int -checkentry(const char *tablename, - const void *ip_void, - void *matchinfo, - unsigned int matchinfosize, - unsigned int hook_mask) +static int ah_mt_check(const struct xt_mtchk_param *par) { - const struct ipt_ah *ahinfo = matchinfo; - const struct ipt_ip *ip = ip_void; + const struct ipt_ah *ahinfo = par->matchinfo; - /* Must specify proto == AH, and no unknown invflags */ - if (ip->proto != IPPROTO_AH || (ip->invflags & IPT_INV_PROTO)) { - duprintf("ipt_ah: Protocol %u != %u\n", ip->proto, - IPPROTO_AH); - return 0; - } - if (matchinfosize != IPT_ALIGN(sizeof(struct ipt_ah))) { - duprintf("ipt_ah: matchsize %u != %u\n", - matchinfosize, IPT_ALIGN(sizeof(struct ipt_ah))); - return 0; - } + /* Must specify no unknown invflags */ if (ahinfo->invflags & ~IPT_AH_INV_MASK) { - duprintf("ipt_ah: unknown flags %X\n", - ahinfo->invflags); - return 0; + pr_debug("unknown flags %X\n", ahinfo->invflags); + return -EINVAL; } - - return 1; + return 0; } -static struct ipt_match ah_match = { +static struct xt_match ah_mt_reg __read_mostly = { .name = "ah", - .match = &match, - .checkentry = &checkentry, + .family = NFPROTO_IPV4, + .match = ah_mt, + .matchsize = sizeof(struct ipt_ah), + .proto = IPPROTO_AH, + .checkentry = ah_mt_check, .me = THIS_MODULE, }; -static int __init init(void) +static int __init ah_mt_init(void) { - return ipt_register_match(&ah_match); + return xt_register_match(&ah_mt_reg); } -static void __exit cleanup(void) +static void __exit ah_mt_exit(void) { - ipt_unregister_match(&ah_match); + xt_unregister_match(&ah_mt_reg); } -module_init(init); -module_exit(cleanup); +module_init(ah_mt_init); +module_exit(ah_mt_exit); diff --git a/net/ipv4/netfilter/ipt_dscp.c b/net/ipv4/netfilter/ipt_dscp.c deleted file mode 100644 index 92063b4f860..00000000000 --- a/net/ipv4/netfilter/ipt_dscp.c +++ /dev/null @@ -1,63 +0,0 @@ -/* IP tables module for matching the value of the IPv4 DSCP field - * - * ipt_dscp.c,v 1.3 2002/08/05 19:00:21 laforge Exp - * - * (C) 2002 by Harald Welte <laforge@netfilter.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/module.h> -#include <linux/skbuff.h> - -#include <linux/netfilter_ipv4/ipt_dscp.h> -#include <linux/netfilter_ipv4/ip_tables.h> - -MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); -MODULE_DESCRIPTION("iptables DSCP matching module"); -MODULE_LICENSE("GPL"); - -static int match(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const void *matchinfo, - int offset, unsigned int protoff, int *hotdrop) -{ - const struct ipt_dscp_info *info = matchinfo; - const struct iphdr *iph = skb->nh.iph; - - u_int8_t sh_dscp = ((info->dscp << IPT_DSCP_SHIFT) & IPT_DSCP_MASK); - - return ((iph->tos&IPT_DSCP_MASK) == sh_dscp) ^ info->invert; -} - -static int checkentry(const char *tablename, const void *ip, - void *matchinfo, unsigned int matchsize, - unsigned int hook_mask) -{ - if (matchsize != IPT_ALIGN(sizeof(struct ipt_dscp_info))) - return 0; - - return 1; -} - -static struct ipt_match dscp_match = { - .name = "dscp", - .match = &match, - .checkentry = &checkentry, - .me = THIS_MODULE, -}; - -static int __init init(void) -{ - return ipt_register_match(&dscp_match); -} - -static void __exit fini(void) -{ - ipt_unregister_match(&dscp_match); - -} - -module_init(init); -module_exit(fini); diff --git a/net/ipv4/netfilter/ipt_ecn.c b/net/ipv4/netfilter/ipt_ecn.c deleted file mode 100644 index e68b0c7981f..00000000000 --- a/net/ipv4/netfilter/ipt_ecn.c +++ /dev/null @@ -1,132 +0,0 @@ -/* IP tables module for matching the value of the IPv4 and TCP ECN bits - * - * ipt_ecn.c,v 1.3 2002/05/29 15:09:00 laforge Exp - * - * (C) 2002 by Harald Welte <laforge@gnumonks.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/module.h> -#include <linux/skbuff.h> -#include <linux/tcp.h> - -#include <linux/netfilter_ipv4/ip_tables.h> -#include <linux/netfilter_ipv4/ipt_ecn.h> - -MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); -MODULE_DESCRIPTION("iptables ECN matching module"); -MODULE_LICENSE("GPL"); - -static inline int match_ip(const struct sk_buff *skb, - const struct ipt_ecn_info *einfo) -{ - return ((skb->nh.iph->tos&IPT_ECN_IP_MASK) == einfo->ip_ect); -} - -static inline int match_tcp(const struct sk_buff *skb, - const struct ipt_ecn_info *einfo, - int *hotdrop) -{ - struct tcphdr _tcph, *th; - - /* In practice, TCP match does this, so can't fail. But let's - * be good citizens. - */ - th = skb_header_pointer(skb, skb->nh.iph->ihl * 4, - sizeof(_tcph), &_tcph); - if (th == NULL) { - *hotdrop = 0; - return 0; - } - - if (einfo->operation & IPT_ECN_OP_MATCH_ECE) { - if (einfo->invert & IPT_ECN_OP_MATCH_ECE) { - if (th->ece == 1) - return 0; - } else { - if (th->ece == 0) - return 0; - } - } - - if (einfo->operation & IPT_ECN_OP_MATCH_CWR) { - if (einfo->invert & IPT_ECN_OP_MATCH_CWR) { - if (th->cwr == 1) - return 0; - } else { - if (th->cwr == 0) - return 0; - } - } - - return 1; -} - -static int match(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const void *matchinfo, - int offset, unsigned int protoff, int *hotdrop) -{ - const struct ipt_ecn_info *info = matchinfo; - - if (info->operation & IPT_ECN_OP_MATCH_IP) - if (!match_ip(skb, info)) - return 0; - - if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR)) { - if (skb->nh.iph->protocol != IPPROTO_TCP) - return 0; - if (!match_tcp(skb, info, hotdrop)) - return 0; - } - - return 1; -} - -static int checkentry(const char *tablename, const void *ip_void, - void *matchinfo, unsigned int matchsize, - unsigned int hook_mask) -{ - const struct ipt_ecn_info *info = matchinfo; - const struct ipt_ip *ip = ip_void; - - if (matchsize != IPT_ALIGN(sizeof(struct ipt_ecn_info))) - return 0; - - if (info->operation & IPT_ECN_OP_MATCH_MASK) - return 0; - - if (info->invert & IPT_ECN_OP_MATCH_MASK) - return 0; - - if (info->operation & (IPT_ECN_OP_MATCH_ECE|IPT_ECN_OP_MATCH_CWR) - && ip->proto != IPPROTO_TCP) { - printk(KERN_WARNING "ipt_ecn: can't match TCP bits in rule for" - " non-tcp packets\n"); - return 0; - } - - return 1; -} - -static struct ipt_match ecn_match = { - .name = "ecn", - .match = &match, - .checkentry = &checkentry, - .me = THIS_MODULE, -}; - -static int __init init(void) -{ - return ipt_register_match(&ecn_match); -} - -static void __exit fini(void) -{ - ipt_unregister_match(&ecn_match); -} - -module_init(init); -module_exit(fini); diff --git a/net/ipv4/netfilter/ipt_esp.c b/net/ipv4/netfilter/ipt_esp.c deleted file mode 100644 index 9de191a8162..00000000000 --- a/net/ipv4/netfilter/ipt_esp.c +++ /dev/null @@ -1,120 +0,0 @@ -/* Kernel module to match ESP parameters. */ - -/* (C) 1999-2000 Yon Uriarte <yon@astaro.de> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/module.h> -#include <linux/skbuff.h> -#include <linux/ip.h> - -#include <linux/netfilter_ipv4/ipt_esp.h> -#include <linux/netfilter_ipv4/ip_tables.h> - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Yon Uriarte <yon@astaro.de>"); -MODULE_DESCRIPTION("iptables ESP SPI match module"); - -#ifdef DEBUG_CONNTRACK -#define duprintf(format, args...) printk(format , ## args) -#else -#define duprintf(format, args...) -#endif - -/* Returns 1 if the spi is matched by the range, 0 otherwise */ -static inline int -spi_match(u_int32_t min, u_int32_t max, u_int32_t spi, int invert) -{ - int r=0; - duprintf("esp spi_match:%c 0x%x <= 0x%x <= 0x%x",invert? '!':' ', - min,spi,max); - r=(spi >= min && spi <= max) ^ invert; - duprintf(" result %s\n",r? "PASS" : "FAILED"); - return r; -} - -static int -match(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const void *matchinfo, - int offset, - unsigned int protoff, - int *hotdrop) -{ - struct ip_esp_hdr _esp, *eh; - const struct ipt_esp *espinfo = matchinfo; - - /* Must not be a fragment. */ - if (offset) - return 0; - - eh = skb_header_pointer(skb, protoff, - sizeof(_esp), &_esp); - if (eh == NULL) { - /* We've been asked to examine this packet, and we - * can't. Hence, no choice but to drop. - */ - duprintf("Dropping evil ESP tinygram.\n"); - *hotdrop = 1; - return 0; - } - - return spi_match(espinfo->spis[0], espinfo->spis[1], - ntohl(eh->spi), - !!(espinfo->invflags & IPT_ESP_INV_SPI)); -} - -/* Called when user tries to insert an entry of this type. */ -static int -checkentry(const char *tablename, - const void *ip_void, - void *matchinfo, - unsigned int matchinfosize, - unsigned int hook_mask) -{ - const struct ipt_esp *espinfo = matchinfo; - const struct ipt_ip *ip = ip_void; - - /* Must specify proto == ESP, and no unknown invflags */ - if (ip->proto != IPPROTO_ESP || (ip->invflags & IPT_INV_PROTO)) { - duprintf("ipt_esp: Protocol %u != %u\n", ip->proto, - IPPROTO_ESP); - return 0; - } - if (matchinfosize != IPT_ALIGN(sizeof(struct ipt_esp))) { - duprintf("ipt_esp: matchsize %u != %u\n", - matchinfosize, IPT_ALIGN(sizeof(struct ipt_esp))); - return 0; - } - if (espinfo->invflags & ~IPT_ESP_INV_MASK) { - duprintf("ipt_esp: unknown flags %X\n", - espinfo->invflags); - return 0; - } - - return 1; -} - -static struct ipt_match esp_match = { - .name = "esp", - .match = &match, - .checkentry = &checkentry, - .me = THIS_MODULE, -}; - -static int __init init(void) -{ - return ipt_register_match(&esp_match); -} - -static void __exit cleanup(void) -{ - ipt_unregister_match(&esp_match); -} - -module_init(init); -module_exit(cleanup); diff --git a/net/ipv4/netfilter/ipt_hashlimit.c b/net/ipv4/netfilter/ipt_hashlimit.c deleted file mode 100644 index 4fe48c1bd5f..00000000000 --- a/net/ipv4/netfilter/ipt_hashlimit.c +++ /dev/null @@ -1,731 +0,0 @@ -/* iptables match extension to limit the number of packets per second - * seperately for each hashbucket (sourceip/sourceport/dstip/dstport) - * - * (C) 2003-2004 by Harald Welte <laforge@netfilter.org> - * - * $Id: ipt_hashlimit.c 3244 2004-10-20 16:24:29Z laforge@netfilter.org $ - * - * Development of this code was funded by Astaro AG, http://www.astaro.com/ - * - * based on ipt_limit.c by: - * Jérôme de Vivie <devivie@info.enserb.u-bordeaux.fr> - * Hervé Eychenne <eychenne@info.enserb.u-bordeaux.fr> - * Rusty Russell <rusty@rustcorp.com.au> - * - * The general idea is to create a hash table for every dstip and have a - * seperate limit counter per tuple. This way you can do something like 'limit - * the number of syn packets for each of my internal addresses. - * - * Ideally this would just be implemented as a general 'hash' match, which would - * allow us to attach any iptables target to it's hash buckets. But this is - * not possible in the current iptables architecture. As always, pkttables for - * 2.7.x will help ;) - */ -#include <linux/module.h> -#include <linux/skbuff.h> -#include <linux/spinlock.h> -#include <linux/random.h> -#include <linux/jhash.h> -#include <linux/slab.h> -#include <linux/vmalloc.h> -#include <linux/tcp.h> -#include <linux/udp.h> -#include <linux/sctp.h> -#include <linux/proc_fs.h> -#include <linux/seq_file.h> -#include <linux/list.h> - -#include <linux/netfilter_ipv4/ip_tables.h> -#include <linux/netfilter_ipv4/ipt_hashlimit.h> - -/* FIXME: this is just for IP_NF_ASSERRT */ -#include <linux/netfilter_ipv4/ip_conntrack.h> - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); -MODULE_DESCRIPTION("iptables match for limiting per hash-bucket"); - -/* need to declare this at the top */ -static struct proc_dir_entry *hashlimit_procdir; -static struct file_operations dl_file_ops; - -/* hash table crap */ - -struct dsthash_dst { - u_int32_t src_ip; - u_int32_t dst_ip; - /* ports have to be consecutive !!! */ - u_int16_t src_port; - u_int16_t dst_port; -}; - -struct dsthash_ent { - /* static / read-only parts in the beginning */ - struct hlist_node node; - struct dsthash_dst dst; - - /* modified structure members in the end */ - unsigned long expires; /* precalculated expiry time */ - struct { - unsigned long prev; /* last modification */ - u_int32_t credit; - u_int32_t credit_cap, cost; - } rateinfo; -}; - -struct ipt_hashlimit_htable { - struct hlist_node node; /* global list of all htables */ - atomic_t use; - - struct hashlimit_cfg cfg; /* config */ - - /* used internally */ - spinlock_t lock; /* lock for list_head */ - u_int32_t rnd; /* random seed for hash */ - struct timer_list timer; /* timer for gc */ - atomic_t count; /* number entries in table */ - - /* seq_file stuff */ - struct proc_dir_entry *pde; - - struct hlist_head hash[0]; /* hashtable itself */ -}; - -static DEFINE_SPINLOCK(hashlimit_lock); /* protects htables list */ -static DECLARE_MUTEX(hlimit_mutex); /* additional checkentry protection */ -static HLIST_HEAD(hashlimit_htables); -static kmem_cache_t *hashlimit_cachep __read_mostly; - -static inline int dst_cmp(const struct dsthash_ent *ent, struct dsthash_dst *b) -{ - return (ent->dst.dst_ip == b->dst_ip - && ent->dst.dst_port == b->dst_port - && ent->dst.src_port == b->src_port - && ent->dst.src_ip == b->src_ip); -} - -static inline u_int32_t -hash_dst(const struct ipt_hashlimit_htable *ht, const struct dsthash_dst *dst) -{ - return (jhash_3words(dst->dst_ip, (dst->dst_port<<16 | dst->src_port), - dst->src_ip, ht->rnd) % ht->cfg.size); -} - -static inline struct dsthash_ent * -__dsthash_find(const struct ipt_hashlimit_htable *ht, struct dsthash_dst *dst) -{ - struct dsthash_ent *ent; - struct hlist_node *pos; - u_int32_t hash = hash_dst(ht, dst); - - if (!hlist_empty(&ht->hash[hash])) - hlist_for_each_entry(ent, pos, &ht->hash[hash], node) { - if (dst_cmp(ent, dst)) { - return ent; - } - } - - return NULL; -} - -/* allocate dsthash_ent, initialize dst, put in htable and lock it */ -static struct dsthash_ent * -__dsthash_alloc_init(struct ipt_hashlimit_htable *ht, struct dsthash_dst *dst) -{ - struct dsthash_ent *ent; - - /* initialize hash with random val at the time we allocate - * the first hashtable entry */ - if (!ht->rnd) - get_random_bytes(&ht->rnd, 4); - - if (ht->cfg.max && - atomic_read(&ht->count) >= ht->cfg.max) { - /* FIXME: do something. question is what.. */ - if (net_ratelimit()) - printk(KERN_WARNING - "ipt_hashlimit: max count of %u reached\n", - ht->cfg.max); - return NULL; - } - - ent = kmem_cache_alloc(hashlimit_cachep, GFP_ATOMIC); - if (!ent) { - if (net_ratelimit()) - printk(KERN_ERR - "ipt_hashlimit: can't allocate dsthash_ent\n"); - return NULL; - } - - atomic_inc(&ht->count); - - ent->dst.dst_ip = dst->dst_ip; - ent->dst.dst_port = dst->dst_port; - ent->dst.src_ip = dst->src_ip; - ent->dst.src_port = dst->src_port; - - hlist_add_head(&ent->node, &ht->hash[hash_dst(ht, dst)]); - - return ent; -} - -static inline void -__dsthash_free(struct ipt_hashlimit_htable *ht, struct dsthash_ent *ent) -{ - hlist_del(&ent->node); - kmem_cache_free(hashlimit_cachep, ent); - atomic_dec(&ht->count); -} -static void htable_gc(unsigned long htlong); - -static int htable_create(struct ipt_hashlimit_info *minfo) -{ - int i; - unsigned int size; - struct ipt_hashlimit_htable *hinfo; - - if (minfo->cfg.size) - size = minfo->cfg.size; - else { - size = (((num_physpages << PAGE_SHIFT) / 16384) - / sizeof(struct list_head)); - if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE)) - size = 8192; - if (size < 16) - size = 16; - } - /* FIXME: don't use vmalloc() here or anywhere else -HW */ - hinfo = vmalloc(sizeof(struct ipt_hashlimit_htable) - + (sizeof(struct list_head) * size)); - if (!hinfo) { - printk(KERN_ERR "ipt_hashlimit: Unable to create hashtable\n"); - return -1; - } - minfo->hinfo = hinfo; - - /* copy match config into hashtable config */ - memcpy(&hinfo->cfg, &minfo->cfg, sizeof(hinfo->cfg)); - hinfo->cfg.size = size; - if (!hinfo->cfg.max) - hinfo->cfg.max = 8 * hinfo->cfg.size; - else if (hinfo->cfg.max < hinfo->cfg.size) - hinfo->cfg.max = hinfo->cfg.size; - - for (i = 0; i < hinfo->cfg.size; i++) - INIT_HLIST_HEAD(&hinfo->hash[i]); - - atomic_set(&hinfo->count, 0); - atomic_set(&hinfo->use, 1); - hinfo->rnd = 0; - spin_lock_init(&hinfo->lock); - hinfo->pde = create_proc_entry(minfo->name, 0, hashlimit_procdir); - if (!hinfo->pde) { - vfree(hinfo); - return -1; - } - hinfo->pde->proc_fops = &dl_file_ops; - hinfo->pde->data = hinfo; - - init_timer(&hinfo->timer); - hinfo->timer.expires = jiffies + msecs_to_jiffies(hinfo->cfg.gc_interval); - hinfo->timer.data = (unsigned long )hinfo; - hinfo->timer.function = htable_gc; - add_timer(&hinfo->timer); - - spin_lock_bh(&hashlimit_lock); - hlist_add_head(&hinfo->node, &hashlimit_htables); - spin_unlock_bh(&hashlimit_lock); - - return 0; -} - -static int select_all(struct ipt_hashlimit_htable *ht, struct dsthash_ent *he) -{ - return 1; -} - -static int select_gc(struct ipt_hashlimit_htable *ht, struct dsthash_ent *he) -{ - return (jiffies >= he->expires); -} - -static void htable_selective_cleanup(struct ipt_hashlimit_htable *ht, - int (*select)(struct ipt_hashlimit_htable *ht, - struct dsthash_ent *he)) -{ - int i; - - IP_NF_ASSERT(ht->cfg.size && ht->cfg.max); - - /* lock hash table and iterate over it */ - spin_lock_bh(&ht->lock); - for (i = 0; i < ht->cfg.size; i++) { - struct dsthash_ent *dh; - struct hlist_node *pos, *n; - hlist_for_each_entry_safe(dh, pos, n, &ht->hash[i], node) { - if ((*select)(ht, dh)) - __dsthash_free(ht, dh); - } - } - spin_unlock_bh(&ht->lock); -} - -/* hash table garbage collector, run by timer */ -static void htable_gc(unsigned long htlong) -{ - struct ipt_hashlimit_htable *ht = (struct ipt_hashlimit_htable *)htlong; - - htable_selective_cleanup(ht, select_gc); - - /* re-add the timer accordingly */ - ht->timer.expires = jiffies + msecs_to_jiffies(ht->cfg.gc_interval); - add_timer(&ht->timer); -} - -static void htable_destroy(struct ipt_hashlimit_htable *hinfo) -{ - /* remove timer, if it is pending */ - if (timer_pending(&hinfo->timer)) - del_timer(&hinfo->timer); - - /* remove proc entry */ - remove_proc_entry(hinfo->pde->name, hashlimit_procdir); - - htable_selective_cleanup(hinfo, select_all); - vfree(hinfo); -} - -static struct ipt_hashlimit_htable *htable_find_get(char *name) -{ - struct ipt_hashlimit_htable *hinfo; - struct hlist_node *pos; - - spin_lock_bh(&hashlimit_lock); - hlist_for_each_entry(hinfo, pos, &hashlimit_htables, node) { - if (!strcmp(name, hinfo->pde->name)) { - atomic_inc(&hinfo->use); - spin_unlock_bh(&hashlimit_lock); - return hinfo; - } - } - spin_unlock_bh(&hashlimit_lock); - - return NULL; -} - -static void htable_put(struct ipt_hashlimit_htable *hinfo) -{ - if (atomic_dec_and_test(&hinfo->use)) { - spin_lock_bh(&hashlimit_lock); - hlist_del(&hinfo->node); - spin_unlock_bh(&hashlimit_lock); - htable_destroy(hinfo); - } -} - - -/* The algorithm used is the Simple Token Bucket Filter (TBF) - * see net/sched/sch_tbf.c in the linux source tree - */ - -/* Rusty: This is my (non-mathematically-inclined) understanding of - this algorithm. The `average rate' in jiffies becomes your initial - amount of credit `credit' and the most credit you can ever have - `credit_cap'. The `peak rate' becomes the cost of passing the - test, `cost'. - - `prev' tracks the last packet hit: you gain one credit per jiffy. - If you get credit balance more than this, the extra credit is - discarded. Every time the match passes, you lose `cost' credits; - if you don't have that many, the test fails. - - See Alexey's formal explanation in net/sched/sch_tbf.c. - - To get the maximum range, we multiply by this factor (ie. you get N - credits per jiffy). We want to allow a rate as low as 1 per day - (slowest userspace tool allows), which means - CREDITS_PER_JIFFY*HZ*60*60*24 < 2^32 ie. -*/ -#define MAX_CPJ (0xFFFFFFFF / (HZ*60*60*24)) - -/* Repeated shift and or gives us all 1s, final shift and add 1 gives - * us the power of 2 below the theoretical max, so GCC simply does a - * shift. */ -#define _POW2_BELOW2(x) ((x)|((x)>>1)) -#define _POW2_BELOW4(x) (_POW2_BELOW2(x)|_POW2_BELOW2((x)>>2)) -#define _POW2_BELOW8(x) (_POW2_BELOW4(x)|_POW2_BELOW4((x)>>4)) -#define _POW2_BELOW16(x) (_POW2_BELOW8(x)|_POW2_BELOW8((x)>>8)) -#define _POW2_BELOW32(x) (_POW2_BELOW16(x)|_POW2_BELOW16((x)>>16)) -#define POW2_BELOW32(x) ((_POW2_BELOW32(x)>>1) + 1) - -#define CREDITS_PER_JIFFY POW2_BELOW32(MAX_CPJ) - -/* Precision saver. */ -static inline u_int32_t -user2credits(u_int32_t user) -{ - /* If multiplying would overflow... */ - if (user > 0xFFFFFFFF / (HZ*CREDITS_PER_JIFFY)) - /* Divide first. */ - return (user / IPT_HASHLIMIT_SCALE) * HZ * CREDITS_PER_JIFFY; - - return (user * HZ * CREDITS_PER_JIFFY) / IPT_HASHLIMIT_SCALE; -} - -static inline void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now) -{ - dh->rateinfo.credit += (now - xchg(&dh->rateinfo.prev, now)) - * CREDITS_PER_JIFFY; - if (dh->rateinfo.credit > dh->rateinfo.credit_cap) - dh->rateinfo.credit = dh->rateinfo.credit_cap; -} - -static inline int get_ports(const struct sk_buff *skb, int offset, - u16 ports[2]) -{ - union { - struct tcphdr th; - struct udphdr uh; - sctp_sctphdr_t sctph; - } hdr_u, *ptr_u; - - /* Must not be a fragment. */ - if (offset) - return 1; - - /* Must be big enough to read ports (both UDP and TCP have - them at the start). */ - ptr_u = skb_header_pointer(skb, skb->nh.iph->ihl*4, 8, &hdr_u); - if (!ptr_u) - return 1; - - switch (skb->nh.iph->protocol) { - case IPPROTO_TCP: - ports[0] = ptr_u->th.source; - ports[1] = ptr_u->th.dest; - break; - case IPPROTO_UDP: - ports[0] = ptr_u->uh.source; - ports[1] = ptr_u->uh.dest; - break; - case IPPROTO_SCTP: - ports[0] = ptr_u->sctph.source; - ports[1] = ptr_u->sctph.dest; - break; - default: - /* all other protocols don't supprot per-port hash - * buckets */ - ports[0] = ports[1] = 0; - break; - } - - return 0; -} - - -static int -hashlimit_match(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const void *matchinfo, - int offset, - unsigned int protoff, - int *hotdrop) -{ - struct ipt_hashlimit_info *r = - ((struct ipt_hashlimit_info *)matchinfo)->u.master; - struct ipt_hashlimit_htable *hinfo = r->hinfo; - unsigned long now = jiffies; - struct dsthash_ent *dh; - struct dsthash_dst dst; - - /* build 'dst' according to hinfo->cfg and current packet */ - memset(&dst, 0, sizeof(dst)); - if (hinfo->cfg.mode & IPT_HASHLIMIT_HASH_DIP) - dst.dst_ip = skb->nh.iph->daddr; - if (hinfo->cfg.mode & IPT_HASHLIMIT_HASH_SIP) - dst.src_ip = skb->nh.iph->saddr; - if (hinfo->cfg.mode & IPT_HASHLIMIT_HASH_DPT - ||hinfo->cfg.mode & IPT_HASHLIMIT_HASH_SPT) { - u_int16_t ports[2]; - if (get_ports(skb, offset, ports)) { - /* We've been asked to examine this packet, and we - can't. Hence, no choice but to drop. */ - *hotdrop = 1; - return 0; - } - if (hinfo->cfg.mode & IPT_HASHLIMIT_HASH_SPT) - dst.src_port = ports[0]; - if (hinfo->cfg.mode & IPT_HASHLIMIT_HASH_DPT) - dst.dst_port = ports[1]; - } - - spin_lock_bh(&hinfo->lock); - dh = __dsthash_find(hinfo, &dst); - if (!dh) { - dh = __dsthash_alloc_init(hinfo, &dst); - - if (!dh) { - /* enomem... don't match == DROP */ - if (net_ratelimit()) - printk(KERN_ERR "%s: ENOMEM\n", __FUNCTION__); - spin_unlock_bh(&hinfo->lock); - return 0; - } - - dh->expires = jiffies + msecs_to_jiffies(hinfo->cfg.expire); - - dh->rateinfo.prev = jiffies; - dh->rateinfo.credit = user2credits(hinfo->cfg.avg * - hinfo->cfg.burst); - dh->rateinfo.credit_cap = user2credits(hinfo->cfg.avg * - hinfo->cfg.burst); - dh->rateinfo.cost = user2credits(hinfo->cfg.avg); - - spin_unlock_bh(&hinfo->lock); - return 1; - } - - /* update expiration timeout */ - dh->expires = now + msecs_to_jiffies(hinfo->cfg.expire); - - rateinfo_recalc(dh, now); - if (dh->rateinfo.credit >= dh->rateinfo.cost) { - /* We're underlimit. */ - dh->rateinfo.credit -= dh->rateinfo.cost; - spin_unlock_bh(&hinfo->lock); - return 1; - } - - spin_unlock_bh(&hinfo->lock); - - /* default case: we're overlimit, thus don't match */ - return 0; -} - -static int -hashlimit_checkentry(const char *tablename, - const void *inf, - void *matchinfo, - unsigned int matchsize, - unsigned int hook_mask) -{ - struct ipt_hashlimit_info *r = matchinfo; - - if (matchsize != IPT_ALIGN(sizeof(struct ipt_hashlimit_info))) - return 0; - - /* Check for overflow. */ - if (r->cfg.burst == 0 - || user2credits(r->cfg.avg * r->cfg.burst) < - user2credits(r->cfg.avg)) { - printk(KERN_ERR "ipt_hashlimit: Overflow, try lower: %u/%u\n", - r->cfg.avg, r->cfg.burst); - return 0; - } - - if (r->cfg.mode == 0 - || r->cfg.mode > (IPT_HASHLIMIT_HASH_DPT - |IPT_HASHLIMIT_HASH_DIP - |IPT_HASHLIMIT_HASH_SIP - |IPT_HASHLIMIT_HASH_SPT)) - return 0; - - if (!r->cfg.gc_interval) - return 0; - - if (!r->cfg.expire) - return 0; - - /* This is the best we've got: We cannot release and re-grab lock, - * since checkentry() is called before ip_tables.c grabs ipt_mutex. - * We also cannot grab the hashtable spinlock, since htable_create will - * call vmalloc, and that can sleep. And we cannot just re-search - * the list of htable's in htable_create(), since then we would - * create duplicate proc files. -HW */ - down(&hlimit_mutex); - r->hinfo = htable_find_get(r->name); - if (!r->hinfo && (htable_create(r) != 0)) { - up(&hlimit_mutex); - return 0; - } - up(&hlimit_mutex); - - /* Ugly hack: For SMP, we only want to use one set */ - r->u.master = r; - - return 1; -} - -static void -hashlimit_destroy(void *matchinfo, unsigned int matchsize) -{ - struct ipt_hashlimit_info *r = (struct ipt_hashlimit_info *) matchinfo; - - htable_put(r->hinfo); -} - -static struct ipt_match ipt_hashlimit = { - .name = "hashlimit", - .match = hashlimit_match, - .checkentry = hashlimit_checkentry, - .destroy = hashlimit_destroy, - .me = THIS_MODULE -}; - -/* PROC stuff */ - -static void *dl_seq_start(struct seq_file *s, loff_t *pos) -{ - struct proc_dir_entry *pde = s->private; - struct ipt_hashlimit_htable *htable = pde->data; - unsigned int *bucket; - - spin_lock_bh(&htable->lock); - if (*pos >= htable->cfg.size) - return NULL; - - bucket = kmalloc(sizeof(unsigned int), GFP_ATOMIC); - if (!bucket) - return ERR_PTR(-ENOMEM); - - *bucket = *pos; - return bucket; -} - -static void *dl_seq_next(struct seq_file *s, void *v, loff_t *pos) -{ - struct proc_dir_entry *pde = s->private; - struct ipt_hashlimit_htable *htable = pde->data; - unsigned int *bucket = (unsigned int *)v; - - *pos = ++(*bucket); - if (*pos >= htable->cfg.size) { - kfree(v); - return NULL; - } - return bucket; -} - -static void dl_seq_stop(struct seq_file *s, void *v) -{ - struct proc_dir_entry *pde = s->private; - struct ipt_hashlimit_htable *htable = pde->data; - unsigned int *bucket = (unsigned int *)v; - - kfree(bucket); - - spin_unlock_bh(&htable->lock); -} - -static inline int dl_seq_real_show(struct dsthash_ent *ent, struct seq_file *s) -{ - /* recalculate to show accurate numbers */ - rateinfo_recalc(ent, jiffies); - - return seq_printf(s, "%ld %u.%u.%u.%u:%u->%u.%u.%u.%u:%u %u %u %u\n", - (long)(ent->expires - jiffies)/HZ, - NIPQUAD(ent->dst.src_ip), ntohs(ent->dst.src_port), - NIPQUAD(ent->dst.dst_ip), ntohs(ent->dst.dst_port), - ent->rateinfo.credit, ent->rateinfo.credit_cap, - ent->rateinfo.cost); -} - -static int dl_seq_show(struct seq_file *s, void *v) -{ - struct proc_dir_entry *pde = s->private; - struct ipt_hashlimit_htable *htable = pde->data; - unsigned int *bucket = (unsigned int *)v; - struct dsthash_ent *ent; - struct hlist_node *pos; - - if (!hlist_empty(&htable->hash[*bucket])) - hlist_for_each_entry(ent, pos, &htable->hash[*bucket], node) { - if (dl_seq_real_show(ent, s)) { - /* buffer was filled and unable to print that tuple */ - return 1; - } - } - - return 0; -} - -static struct seq_operations dl_seq_ops = { - .start = dl_seq_start, - .next = dl_seq_next, - .stop = dl_seq_stop, - .show = dl_seq_show -}; - -static int dl_proc_open(struct inode *inode, struct file *file) -{ - int ret = seq_open(file, &dl_seq_ops); - - if (!ret) { - struct seq_file *sf = file->private_data; - sf->private = PDE(inode); - } - return ret; -} - -static struct file_operations dl_file_ops = { - .owner = THIS_MODULE, - .open = dl_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release -}; - -static int init_or_fini(int fini) -{ - int ret = 0; - - if (fini) - goto cleanup; - - if (ipt_register_match(&ipt_hashlimit)) { - ret = -EINVAL; - goto cleanup_nothing; - } - - hashlimit_cachep = kmem_cache_create("ipt_hashlimit", - sizeof(struct dsthash_ent), 0, - 0, NULL, NULL); - if (!hashlimit_cachep) { - printk(KERN_ERR "Unable to create ipt_hashlimit slab cache\n"); - ret = -ENOMEM; - goto cleanup_unreg_match; - } - - hashlimit_procdir = proc_mkdir("ipt_hashlimit", proc_net); - if (!hashlimit_procdir) { - printk(KERN_ERR "Unable to create proc dir entry\n"); - ret = -ENOMEM; - goto cleanup_free_slab; - } - - return ret; - -cleanup: - remove_proc_entry("ipt_hashlimit", proc_net); -cleanup_free_slab: - kmem_cache_destroy(hashlimit_cachep); -cleanup_unreg_match: - ipt_unregister_match(&ipt_hashlimit); -cleanup_nothing: - return ret; - -} - -static int __init init(void) -{ - return init_or_fini(0); -} - -static void __exit fini(void) -{ - init_or_fini(1); -} - -module_init(init); -module_exit(fini); diff --git a/net/ipv4/netfilter/ipt_iprange.c b/net/ipv4/netfilter/ipt_iprange.c deleted file mode 100644 index 13fb16fb789..00000000000 --- a/net/ipv4/netfilter/ipt_iprange.c +++ /dev/null @@ -1,99 +0,0 @@ -/* - * iptables module to match IP address ranges - * - * (C) 2003 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include <linux/module.h> -#include <linux/skbuff.h> -#include <linux/ip.h> -#include <linux/netfilter_ipv4/ip_tables.h> -#include <linux/netfilter_ipv4/ipt_iprange.h> - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>"); -MODULE_DESCRIPTION("iptables arbitrary IP range match module"); - -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - -static int -match(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const void *matchinfo, - int offset, unsigned int protoff, int *hotdrop) -{ - const struct ipt_iprange_info *info = matchinfo; - const struct iphdr *iph = skb->nh.iph; - - if (info->flags & IPRANGE_SRC) { - if (((ntohl(iph->saddr) < ntohl(info->src.min_ip)) - || (ntohl(iph->saddr) > ntohl(info->src.max_ip))) - ^ !!(info->flags & IPRANGE_SRC_INV)) { - DEBUGP("src IP %u.%u.%u.%u NOT in range %s" - "%u.%u.%u.%u-%u.%u.%u.%u\n", - NIPQUAD(iph->saddr), - info->flags & IPRANGE_SRC_INV ? "(INV) " : "", - NIPQUAD(info->src.min_ip), - NIPQUAD(info->src.max_ip)); - return 0; - } - } - if (info->flags & IPRANGE_DST) { - if (((ntohl(iph->daddr) < ntohl(info->dst.min_ip)) - || (ntohl(iph->daddr) > ntohl(info->dst.max_ip))) - ^ !!(info->flags & IPRANGE_DST_INV)) { - DEBUGP("dst IP %u.%u.%u.%u NOT in range %s" - "%u.%u.%u.%u-%u.%u.%u.%u\n", - NIPQUAD(iph->daddr), - info->flags & IPRANGE_DST_INV ? "(INV) " : "", - NIPQUAD(info->dst.min_ip), - NIPQUAD(info->dst.max_ip)); - return 0; - } - } - return 1; -} - -static int check(const char *tablename, - const void *inf, - void *matchinfo, - unsigned int matchsize, - unsigned int hook_mask) -{ - /* verify size */ - if (matchsize != IPT_ALIGN(sizeof(struct ipt_iprange_info))) - return 0; - - return 1; -} - -static struct ipt_match iprange_match = -{ - .list = { NULL, NULL }, - .name = "iprange", - .match = &match, - .checkentry = &check, - .destroy = NULL, - .me = THIS_MODULE -}; - -static int __init init(void) -{ - return ipt_register_match(&iprange_match); -} - -static void __exit fini(void) -{ - ipt_unregister_match(&iprange_match); -} - -module_init(init); -module_exit(fini); diff --git a/net/ipv4/netfilter/ipt_multiport.c b/net/ipv4/netfilter/ipt_multiport.c deleted file mode 100644 index 2d52326553f..00000000000 --- a/net/ipv4/netfilter/ipt_multiport.c +++ /dev/null @@ -1,214 +0,0 @@ -/* Kernel module to match one of a list of TCP/UDP ports: ports are in - the same place so we can treat them as equal. */ - -/* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/module.h> -#include <linux/types.h> -#include <linux/udp.h> -#include <linux/skbuff.h> - -#include <linux/netfilter_ipv4/ipt_multiport.h> -#include <linux/netfilter_ipv4/ip_tables.h> - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); -MODULE_DESCRIPTION("iptables multiple port match module"); - -#if 0 -#define duprintf(format, args...) printk(format , ## args) -#else -#define duprintf(format, args...) -#endif - -/* Returns 1 if the port is matched by the test, 0 otherwise. */ -static inline int -ports_match(const u_int16_t *portlist, enum ipt_multiport_flags flags, - u_int8_t count, u_int16_t src, u_int16_t dst) -{ - unsigned int i; - for (i=0; i<count; i++) { - if (flags != IPT_MULTIPORT_DESTINATION - && portlist[i] == src) - return 1; - - if (flags != IPT_MULTIPORT_SOURCE - && portlist[i] == dst) - return 1; - } - - return 0; -} - -/* Returns 1 if the port is matched by the test, 0 otherwise. */ -static inline int -ports_match_v1(const struct ipt_multiport_v1 *minfo, - u_int16_t src, u_int16_t dst) -{ - unsigned int i; - u_int16_t s, e; - - for (i=0; i < minfo->count; i++) { - s = minfo->ports[i]; - - if (minfo->pflags[i]) { - /* range port matching */ - e = minfo->ports[++i]; - duprintf("src or dst matches with %d-%d?\n", s, e); - - if (minfo->flags == IPT_MULTIPORT_SOURCE - && src >= s && src <= e) - return 1 ^ minfo->invert; - if (minfo->flags == IPT_MULTIPORT_DESTINATION - && dst >= s && dst <= e) - return 1 ^ minfo->invert; - if (minfo->flags == IPT_MULTIPORT_EITHER - && ((dst >= s && dst <= e) - || (src >= s && src <= e))) - return 1 ^ minfo->invert; - } else { - /* exact port matching */ - duprintf("src or dst matches with %d?\n", s); - - if (minfo->flags == IPT_MULTIPORT_SOURCE - && src == s) - return 1 ^ minfo->invert; - if (minfo->flags == IPT_MULTIPORT_DESTINATION - && dst == s) - return 1 ^ minfo->invert; - if (minfo->flags == IPT_MULTIPORT_EITHER - && (src == s || dst == s)) - return 1 ^ minfo->invert; - } - } - - return minfo->invert; -} - -static int -match(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const void *matchinfo, - int offset, - unsigned int protoff, - int *hotdrop) -{ - u16 _ports[2], *pptr; - const struct ipt_multiport *multiinfo = matchinfo; - - if (offset) - return 0; - - pptr = skb_header_pointer(skb, protoff, - sizeof(_ports), _ports); - if (pptr == NULL) { - /* We've been asked to examine this packet, and we - * can't. Hence, no choice but to drop. - */ - duprintf("ipt_multiport:" - " Dropping evil offset=0 tinygram.\n"); - *hotdrop = 1; - return 0; - } - - return ports_match(multiinfo->ports, - multiinfo->flags, multiinfo->count, - ntohs(pptr[0]), ntohs(pptr[1])); -} - -static int -match_v1(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const void *matchinfo, - int offset, - unsigned int protoff, - int *hotdrop) -{ - u16 _ports[2], *pptr; - const struct ipt_multiport_v1 *multiinfo = matchinfo; - - if (offset) - return 0; - - pptr = skb_header_pointer(skb, protoff, - sizeof(_ports), _ports); - if (pptr == NULL) { - /* We've been asked to examine this packet, and we - * can't. Hence, no choice but to drop. - */ - duprintf("ipt_multiport:" - " Dropping evil offset=0 tinygram.\n"); - *hotdrop = 1; - return 0; - } - - return ports_match_v1(multiinfo, ntohs(pptr[0]), ntohs(pptr[1])); -} - -/* Called when user tries to insert an entry of this type. */ -static int -checkentry(const char *tablename, - const void *ip, - void *matchinfo, - unsigned int matchsize, - unsigned int hook_mask) -{ - return (matchsize == IPT_ALIGN(sizeof(struct ipt_multiport))); -} - -static int -checkentry_v1(const char *tablename, - const void *ip, - void *matchinfo, - unsigned int matchsize, - unsigned int hook_mask) -{ - return (matchsize == IPT_ALIGN(sizeof(struct ipt_multiport_v1))); -} - -static struct ipt_match multiport_match = { - .name = "multiport", - .revision = 0, - .match = &match, - .checkentry = &checkentry, - .me = THIS_MODULE, -}; - -static struct ipt_match multiport_match_v1 = { - .name = "multiport", - .revision = 1, - .match = &match_v1, - .checkentry = &checkentry_v1, - .me = THIS_MODULE, -}; - -static int __init init(void) -{ - int err; - - err = ipt_register_match(&multiport_match); - if (!err) { - err = ipt_register_match(&multiport_match_v1); - if (err) - ipt_unregister_match(&multiport_match); - } - - return err; -} - -static void __exit fini(void) -{ - ipt_unregister_match(&multiport_match); - ipt_unregister_match(&multiport_match_v1); -} - -module_init(init); -module_exit(fini); diff --git a/net/ipv4/netfilter/ipt_owner.c b/net/ipv4/netfilter/ipt_owner.c deleted file mode 100644 index 4843d0c9734..00000000000 --- a/net/ipv4/netfilter/ipt_owner.c +++ /dev/null @@ -1,101 +0,0 @@ -/* Kernel module to match various things tied to sockets associated with - locally generated outgoing packets. */ - -/* (C) 2000 Marc Boucher <marc@mbsi.ca> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/module.h> -#include <linux/skbuff.h> -#include <linux/file.h> -#include <linux/rcupdate.h> -#include <net/sock.h> - -#include <linux/netfilter_ipv4/ipt_owner.h> -#include <linux/netfilter_ipv4/ip_tables.h> - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>"); -MODULE_DESCRIPTION("iptables owner match"); - -static int -match(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const void *matchinfo, - int offset, - unsigned int protoff, - int *hotdrop) -{ - const struct ipt_owner_info *info = matchinfo; - - if (!skb->sk || !skb->sk->sk_socket || !skb->sk->sk_socket->file) - return 0; - - if(info->match & IPT_OWNER_UID) { - if ((skb->sk->sk_socket->file->f_uid != info->uid) ^ - !!(info->invert & IPT_OWNER_UID)) - return 0; - } - - if(info->match & IPT_OWNER_GID) { - if ((skb->sk->sk_socket->file->f_gid != info->gid) ^ - !!(info->invert & IPT_OWNER_GID)) - return 0; - } - - return 1; -} - -static int -checkentry(const char *tablename, - const void *ip, - void *matchinfo, - unsigned int matchsize, - unsigned int hook_mask) -{ - const struct ipt_owner_info *info = matchinfo; - - if (hook_mask - & ~((1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_POST_ROUTING))) { - printk("ipt_owner: only valid for LOCAL_OUT or POST_ROUTING.\n"); - return 0; - } - - if (matchsize != IPT_ALIGN(sizeof(struct ipt_owner_info))) { - printk("Matchsize %u != %Zu\n", matchsize, - IPT_ALIGN(sizeof(struct ipt_owner_info))); - return 0; - } - - if (info->match & (IPT_OWNER_PID|IPT_OWNER_SID|IPT_OWNER_COMM)) { - printk("ipt_owner: pid, sid and command matching " - "not supported anymore\n"); - return 0; - } - - return 1; -} - -static struct ipt_match owner_match = { - .name = "owner", - .match = &match, - .checkentry = &checkentry, - .me = THIS_MODULE, -}; - -static int __init init(void) -{ - return ipt_register_match(&owner_match); -} - -static void __exit fini(void) -{ - ipt_unregister_match(&owner_match); -} - -module_init(init); -module_exit(fini); diff --git a/net/ipv4/netfilter/ipt_policy.c b/net/ipv4/netfilter/ipt_policy.c deleted file mode 100644 index 18ca8258a1c..00000000000 --- a/net/ipv4/netfilter/ipt_policy.c +++ /dev/null @@ -1,173 +0,0 @@ -/* IP tables module for matching IPsec policy - * - * Copyright (c) 2004,2005 Patrick McHardy, <kaber@trash.net> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/kernel.h> -#include <linux/config.h> -#include <linux/module.h> -#include <linux/skbuff.h> -#include <linux/init.h> -#include <net/xfrm.h> - -#include <linux/netfilter_ipv4.h> -#include <linux/netfilter_ipv4/ip_tables.h> -#include <linux/netfilter_ipv4/ipt_policy.h> - -MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); -MODULE_DESCRIPTION("IPtables IPsec policy matching module"); -MODULE_LICENSE("GPL"); - - -static inline int -match_xfrm_state(struct xfrm_state *x, const struct ipt_policy_elem *e) -{ -#define MATCH(x,y) (!e->match.x || ((e->x == (y)) ^ e->invert.x)) - - return MATCH(saddr, x->props.saddr.a4 & e->smask) && - MATCH(daddr, x->id.daddr.a4 & e->dmask) && - MATCH(proto, x->id.proto) && - MATCH(mode, x->props.mode) && - MATCH(spi, x->id.spi) && - MATCH(reqid, x->props.reqid); -} - -static int -match_policy_in(const struct sk_buff *skb, const struct ipt_policy_info *info) -{ - const struct ipt_policy_elem *e; - struct sec_path *sp = skb->sp; - int strict = info->flags & IPT_POLICY_MATCH_STRICT; - int i, pos; - - if (sp == NULL) - return -1; - if (strict && info->len != sp->len) - return 0; - - for (i = sp->len - 1; i >= 0; i--) { - pos = strict ? i - sp->len + 1 : 0; - if (pos >= info->len) - return 0; - e = &info->pol[pos]; - - if (match_xfrm_state(sp->x[i].xvec, e)) { - if (!strict) - return 1; - } else if (strict) - return 0; - } - - return strict ? 1 : 0; -} - -static int -match_policy_out(const struct sk_buff *skb, const struct ipt_policy_info *info) -{ - const struct ipt_policy_elem *e; - struct dst_entry *dst = skb->dst; - int strict = info->flags & IPT_POLICY_MATCH_STRICT; - int i, pos; - - if (dst->xfrm == NULL) - return -1; - - for (i = 0; dst && dst->xfrm; dst = dst->child, i++) { - pos = strict ? i : 0; - if (pos >= info->len) - return 0; - e = &info->pol[pos]; - - if (match_xfrm_state(dst->xfrm, e)) { - if (!strict) - return 1; - } else if (strict) - return 0; - } - - return strict ? 1 : 0; -} - -static int match(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const void *matchinfo, - int offset, - unsigned int protoff, - int *hotdrop) -{ - const struct ipt_policy_info *info = matchinfo; - int ret; - - if (info->flags & IPT_POLICY_MATCH_IN) - ret = match_policy_in(skb, info); - else - ret = match_policy_out(skb, info); - - if (ret < 0) - ret = info->flags & IPT_POLICY_MATCH_NONE ? 1 : 0; - else if (info->flags & IPT_POLICY_MATCH_NONE) - ret = 0; - - return ret; -} - -static int checkentry(const char *tablename, const void *ip_void, - void *matchinfo, unsigned int matchsize, - unsigned int hook_mask) -{ - struct ipt_policy_info *info = matchinfo; - - if (matchsize != IPT_ALIGN(sizeof(*info))) { - printk(KERN_ERR "ipt_policy: matchsize %u != %zu\n", - matchsize, IPT_ALIGN(sizeof(*info))); - return 0; - } - if (!(info->flags & (IPT_POLICY_MATCH_IN|IPT_POLICY_MATCH_OUT))) { - printk(KERN_ERR "ipt_policy: neither incoming nor " - "outgoing policy selected\n"); - return 0; - } - if (hook_mask & (1 << NF_IP_PRE_ROUTING | 1 << NF_IP_LOCAL_IN) - && info->flags & IPT_POLICY_MATCH_OUT) { - printk(KERN_ERR "ipt_policy: output policy not valid in " - "PRE_ROUTING and INPUT\n"); - return 0; - } - if (hook_mask & (1 << NF_IP_POST_ROUTING | 1 << NF_IP_LOCAL_OUT) - && info->flags & IPT_POLICY_MATCH_IN) { - printk(KERN_ERR "ipt_policy: input policy not valid in " - "POST_ROUTING and OUTPUT\n"); - return 0; - } - if (info->len > IPT_POLICY_MAX_ELEM) { - printk(KERN_ERR "ipt_policy: too many policy elements\n"); - return 0; - } - - return 1; -} - -static struct ipt_match policy_match = { - .name = "policy", - .match = match, - .checkentry = checkentry, - .me = THIS_MODULE, -}; - -static int __init init(void) -{ - return ipt_register_match(&policy_match); -} - -static void __exit fini(void) -{ - ipt_unregister_match(&policy_match); -} - -module_init(init); -module_exit(fini); diff --git a/net/ipv4/netfilter/ipt_recent.c b/net/ipv4/netfilter/ipt_recent.c deleted file mode 100644 index 44611d6d14f..00000000000 --- a/net/ipv4/netfilter/ipt_recent.c +++ /dev/null @@ -1,1005 +0,0 @@ -/* Kernel module to check if the source address has been seen recently. */ -/* Copyright 2002-2003, Stephen Frost, 2.5.x port by laforge@netfilter.org */ -/* Author: Stephen Frost <sfrost@snowman.net> */ -/* Project Page: http://snowman.net/projects/ipt_recent/ */ -/* This software is distributed under the terms of the GPL, Version 2 */ -/* This copyright does not cover user programs that use kernel services - * by normal system calls. */ - -#include <linux/module.h> -#include <linux/skbuff.h> -#include <linux/proc_fs.h> -#include <linux/spinlock.h> -#include <linux/interrupt.h> -#include <asm/uaccess.h> -#include <linux/ctype.h> -#include <linux/ip.h> -#include <linux/vmalloc.h> -#include <linux/moduleparam.h> - -#include <linux/netfilter_ipv4/ip_tables.h> -#include <linux/netfilter_ipv4/ipt_recent.h> - -#undef DEBUG -#define HASH_LOG 9 - -/* Defaults, these can be overridden on the module command-line. */ -static unsigned int ip_list_tot = 100; -static unsigned int ip_pkt_list_tot = 20; -static unsigned int ip_list_hash_size = 0; -static unsigned int ip_list_perms = 0644; -#ifdef DEBUG -static int debug = 1; -#endif - -static char version[] = -KERN_INFO RECENT_NAME " " RECENT_VER ": Stephen Frost <sfrost@snowman.net>. http://snowman.net/projects/ipt_recent/\n"; - -MODULE_AUTHOR("Stephen Frost <sfrost@snowman.net>"); -MODULE_DESCRIPTION("IP tables recently seen matching module " RECENT_VER); -MODULE_LICENSE("GPL"); -module_param(ip_list_tot, uint, 0400); -module_param(ip_pkt_list_tot, uint, 0400); -module_param(ip_list_hash_size, uint, 0400); -module_param(ip_list_perms, uint, 0400); -#ifdef DEBUG -module_param(debug, bool, 0600); -MODULE_PARM_DESC(debug,"enable debugging output"); -#endif -MODULE_PARM_DESC(ip_list_tot,"number of IPs to remember per list"); -MODULE_PARM_DESC(ip_pkt_list_tot,"number of packets per IP to remember"); -MODULE_PARM_DESC(ip_list_hash_size,"size of hash table used to look up IPs"); -MODULE_PARM_DESC(ip_list_perms,"permissions on /proc/net/ipt_recent/* files"); - -/* Structure of our list of recently seen addresses. */ -struct recent_ip_list { - u_int32_t addr; - u_int8_t ttl; - unsigned long last_seen; - unsigned long *last_pkts; - u_int32_t oldest_pkt; - u_int32_t hash_entry; - u_int32_t time_pos; -}; - -struct time_info_list { - u_int32_t position; - u_int32_t time; -}; - -/* Structure of our linked list of tables of recent lists. */ -struct recent_ip_tables { - char name[IPT_RECENT_NAME_LEN]; - int count; - int time_pos; - struct recent_ip_list *table; - struct recent_ip_tables *next; - spinlock_t list_lock; - int *hash_table; - struct time_info_list *time_info; -#ifdef CONFIG_PROC_FS - struct proc_dir_entry *status_proc; -#endif /* CONFIG_PROC_FS */ -}; - -/* Our current list of addresses we have recently seen. - * Only added to on a --set, and only updated on --set || --update - */ -static struct recent_ip_tables *r_tables = NULL; - -/* We protect r_list with this spinlock so two processors are not modifying - * the list at the same time. - */ -static DEFINE_SPINLOCK(recent_lock); - -#ifdef CONFIG_PROC_FS -/* Our /proc/net/ipt_recent entry */ -static struct proc_dir_entry *proc_net_ipt_recent = NULL; -#endif - -/* Function declaration for later. */ -static int -match(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const void *matchinfo, - int offset, - unsigned int protoff, - int *hotdrop); - -/* Function to hash a given address into the hash table of table_size size */ -static int hash_func(unsigned int addr, int table_size) -{ - int result = 0; - unsigned int value = addr; - do { result ^= value; } while((value >>= HASH_LOG)); - -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": %d = hash_func(%u,%d)\n", - result & (table_size - 1), - addr, - table_size); -#endif - - return(result & (table_size - 1)); -} - -#ifdef CONFIG_PROC_FS -/* This is the function which produces the output for our /proc output - * interface which lists each IP address, the last seen time and the - * other recent times the address was seen. - */ - -static int ip_recent_get_info(char *buffer, char **start, off_t offset, int length, int *eof, void *data) -{ - int len = 0, count, last_len = 0, pkt_count; - off_t pos = 0; - off_t begin = 0; - struct recent_ip_tables *curr_table; - - curr_table = (struct recent_ip_tables*) data; - - spin_lock_bh(&curr_table->list_lock); - for(count = 0; count < ip_list_tot; count++) { - if(!curr_table->table[count].addr) continue; - last_len = len; - len += sprintf(buffer+len,"src=%u.%u.%u.%u ",NIPQUAD(curr_table->table[count].addr)); - len += sprintf(buffer+len,"ttl: %u ",curr_table->table[count].ttl); - len += sprintf(buffer+len,"last_seen: %lu ",curr_table->table[count].last_seen); - len += sprintf(buffer+len,"oldest_pkt: %u ",curr_table->table[count].oldest_pkt); - len += sprintf(buffer+len,"last_pkts: %lu",curr_table->table[count].last_pkts[0]); - for(pkt_count = 1; pkt_count < ip_pkt_list_tot; pkt_count++) { - if(!curr_table->table[count].last_pkts[pkt_count]) break; - len += sprintf(buffer+len,", %lu",curr_table->table[count].last_pkts[pkt_count]); - } - len += sprintf(buffer+len,"\n"); - pos = begin + len; - if(pos < offset) { len = 0; begin = pos; } - if(pos > offset + length) { len = last_len; break; } - } - - *start = buffer + (offset - begin); - len -= (offset - begin); - if(len > length) len = length; - - spin_unlock_bh(&curr_table->list_lock); - return len; -} - -/* ip_recent_ctrl provides an interface for users to modify the table - * directly. This allows adding entries, removing entries, and - * flushing the entire table. - * This is done by opening up the appropriate table for writing and - * sending one of: - * xx.xx.xx.xx -- Add entry to table with current time - * +xx.xx.xx.xx -- Add entry to table with current time - * -xx.xx.xx.xx -- Remove entry from table - * clear -- Flush table, remove all entries - */ - -static int ip_recent_ctrl(struct file *file, const char __user *input, unsigned long size, void *data) -{ - static const u_int32_t max[4] = { 0xffffffff, 0xffffff, 0xffff, 0xff }; - u_int32_t val; - int base, used = 0; - char c, *cp; - union iaddr { - uint8_t bytes[4]; - uint32_t word; - } res; - uint8_t *pp = res.bytes; - int digit; - - char buffer[20]; - int len, check_set = 0, count; - u_int32_t addr = 0; - struct sk_buff *skb; - struct ipt_recent_info *info; - struct recent_ip_tables *curr_table; - - curr_table = (struct recent_ip_tables*) data; - - if(size > 20) len = 20; else len = size; - - if(copy_from_user(buffer,input,len)) return -EFAULT; - - if(len < 20) buffer[len] = '\0'; - -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": ip_recent_ctrl len: %d, input: `%.20s'\n",len,buffer); -#endif - - cp = buffer; - while(isspace(*cp)) { cp++; used++; if(used >= len-5) return used; } - - /* Check if we are asked to flush the entire table */ - if(!memcmp(cp,"clear",5)) { - used += 5; - spin_lock_bh(&curr_table->list_lock); - curr_table->time_pos = 0; - for(count = 0; count < ip_list_hash_size; count++) { - curr_table->hash_table[count] = -1; - } - for(count = 0; count < ip_list_tot; count++) { - curr_table->table[count].last_seen = 0; - curr_table->table[count].addr = 0; - curr_table->table[count].ttl = 0; - memset(curr_table->table[count].last_pkts,0,ip_pkt_list_tot*sizeof(unsigned long)); - curr_table->table[count].oldest_pkt = 0; - curr_table->table[count].time_pos = 0; - curr_table->time_info[count].position = count; - curr_table->time_info[count].time = 0; - } - spin_unlock_bh(&curr_table->list_lock); - return used; - } - - check_set = IPT_RECENT_SET; - switch(*cp) { - case '+': check_set = IPT_RECENT_SET; cp++; used++; break; - case '-': check_set = IPT_RECENT_REMOVE; cp++; used++; break; - default: if(!isdigit(*cp)) return (used+1); break; - } - -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": ip_recent_ctrl cp: `%c', check_set: %d\n",*cp,check_set); -#endif - /* Get addr (effectively inet_aton()) */ - /* Shamelessly stolen from libc, a function in the kernel for doing - * this would, of course, be greatly preferred, but our options appear - * to be rather limited, so we will just do it ourselves here. - */ - res.word = 0; - - c = *cp; - for(;;) { - if(!isdigit(c)) return used; - val = 0; base = 10; digit = 0; - if(c == '0') { - c = *++cp; - if(c == 'x' || c == 'X') base = 16, c = *++cp; - else { base = 8; digit = 1; } - } - for(;;) { - if(isascii(c) && isdigit(c)) { - if(base == 8 && (c == '8' || c == '0')) return used; - val = (val * base) + (c - '0'); - c = *++cp; - digit = 1; - } else if(base == 16 && isascii(c) && isxdigit(c)) { - val = (val << 4) | (c + 10 - (islower(c) ? 'a' : 'A')); - c = *++cp; - digit = 1; - } else break; - } - if(c == '.') { - if(pp > res.bytes + 2 || val > 0xff) return used; - *pp++ = val; - c = *++cp; - } else break; - } - used = cp - buffer; - if(c != '\0' && (!isascii(c) || !isspace(c))) return used; - if(c == '\n') used++; - if(!digit) return used; - - if(val > max[pp - res.bytes]) return used; - addr = res.word | htonl(val); - - if(!addr && check_set == IPT_RECENT_SET) return used; - -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": ip_recent_ctrl c: %c, addr: %u used: %d\n",c,addr,used); -#endif - - /* Set up and just call match */ - info = kmalloc(sizeof(struct ipt_recent_info),GFP_KERNEL); - if(!info) { return -ENOMEM; } - info->seconds = 0; - info->hit_count = 0; - info->check_set = check_set; - info->invert = 0; - info->side = IPT_RECENT_SOURCE; - strncpy(info->name,curr_table->name,IPT_RECENT_NAME_LEN); - info->name[IPT_RECENT_NAME_LEN-1] = '\0'; - - skb = kmalloc(sizeof(struct sk_buff),GFP_KERNEL); - if (!skb) { - used = -ENOMEM; - goto out_free_info; - } - skb->nh.iph = kmalloc(sizeof(struct iphdr),GFP_KERNEL); - if (!skb->nh.iph) { - used = -ENOMEM; - goto out_free_skb; - } - - skb->nh.iph->saddr = addr; - skb->nh.iph->daddr = 0; - /* Clear ttl since we have no way of knowing it */ - skb->nh.iph->ttl = 0; - match(skb,NULL,NULL,info,0,0,NULL); - - kfree(skb->nh.iph); -out_free_skb: - kfree(skb); -out_free_info: - kfree(info); - -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": Leaving ip_recent_ctrl addr: %u used: %d\n",addr,used); -#endif - return used; -} - -#endif /* CONFIG_PROC_FS */ - -/* 'match' is our primary function, called by the kernel whenever a rule is - * hit with our module as an option to it. - * What this function does depends on what was specifically asked of it by - * the user: - * --set -- Add or update last seen time of the source address of the packet - * -- matchinfo->check_set == IPT_RECENT_SET - * --rcheck -- Just check if the source address is in the list - * -- matchinfo->check_set == IPT_RECENT_CHECK - * --update -- If the source address is in the list, update last_seen - * -- matchinfo->check_set == IPT_RECENT_UPDATE - * --remove -- If the source address is in the list, remove it - * -- matchinfo->check_set == IPT_RECENT_REMOVE - * --seconds -- Option to --rcheck/--update, only match if last_seen within seconds - * -- matchinfo->seconds - * --hitcount -- Option to --rcheck/--update, only match if seen hitcount times - * -- matchinfo->hit_count - * --seconds and --hitcount can be combined - */ -static int -match(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const void *matchinfo, - int offset, - unsigned int protoff, - int *hotdrop) -{ - int pkt_count, hits_found, ans; - unsigned long now; - const struct ipt_recent_info *info = matchinfo; - u_int32_t addr = 0, time_temp; - u_int8_t ttl = skb->nh.iph->ttl; - int *hash_table; - int orig_hash_result, hash_result, temp, location = 0, time_loc, end_collision_chain = -1; - struct time_info_list *time_info; - struct recent_ip_tables *curr_table; - struct recent_ip_tables *last_table; - struct recent_ip_list *r_list; - -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": match() called\n"); -#endif - - /* Default is false ^ info->invert */ - ans = info->invert; - -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": match(): name = '%s'\n",info->name); -#endif - - /* if out != NULL then routing has been done and TTL changed. - * We change it back here internally for match what came in before routing. */ - if(out) ttl++; - - /* Find the right table */ - spin_lock_bh(&recent_lock); - curr_table = r_tables; - while( (last_table = curr_table) && strncmp(info->name,curr_table->name,IPT_RECENT_NAME_LEN) && (curr_table = curr_table->next) ); - -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": match(): table found('%s')\n",info->name); -#endif - - spin_unlock_bh(&recent_lock); - - /* Table with this name not found, match impossible */ - if(!curr_table) { return ans; } - - /* Make sure no one is changing the list while we work with it */ - spin_lock_bh(&curr_table->list_lock); - - r_list = curr_table->table; - if(info->side == IPT_RECENT_DEST) addr = skb->nh.iph->daddr; else addr = skb->nh.iph->saddr; - - if(!addr) { -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": match() address (%u) invalid, leaving.\n",addr); -#endif - spin_unlock_bh(&curr_table->list_lock); - return ans; - } - -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": match(): checking table, addr: %u, ttl: %u, orig_ttl: %u\n",addr,ttl,skb->nh.iph->ttl); -#endif - - /* Get jiffies now in case they changed while we were waiting for a lock */ - now = jiffies; - hash_table = curr_table->hash_table; - time_info = curr_table->time_info; - - orig_hash_result = hash_result = hash_func(addr,ip_list_hash_size); - /* Hash entry at this result used */ - /* Check for TTL match if requested. If TTL is zero then a match would never - * happen, so match regardless of existing TTL in that case. Zero means the - * entry was added via the /proc interface anyway, so we will just use the - * first TTL we get for that IP address. */ - if(info->check_set & IPT_RECENT_TTL) { - while(hash_table[hash_result] != -1 && !(r_list[hash_table[hash_result]].addr == addr && - (!r_list[hash_table[hash_result]].ttl || r_list[hash_table[hash_result]].ttl == ttl))) { - /* Collision in hash table */ - hash_result = (hash_result + 1) % ip_list_hash_size; - } - } else { - while(hash_table[hash_result] != -1 && r_list[hash_table[hash_result]].addr != addr) { - /* Collision in hash table */ - hash_result = (hash_result + 1) % ip_list_hash_size; - } - } - - if(hash_table[hash_result] == -1 && !(info->check_set & IPT_RECENT_SET)) { - /* IP not in list and not asked to SET */ - spin_unlock_bh(&curr_table->list_lock); - return ans; - } - - /* Check if we need to handle the collision, do not need to on REMOVE */ - if(orig_hash_result != hash_result && !(info->check_set & IPT_RECENT_REMOVE)) { -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": match(): Collision in hash table. (or: %d,hr: %d,oa: %u,ha: %u)\n", - orig_hash_result, - hash_result, - r_list[hash_table[orig_hash_result]].addr, - addr); -#endif - - /* We had a collision. - * orig_hash_result is where we started, hash_result is where we ended up. - * So, swap them because we are likely to see the same guy again sooner */ -#ifdef DEBUG - if(debug) { - printk(KERN_INFO RECENT_NAME ": match(): Collision; hash_table[orig_hash_result] = %d\n",hash_table[orig_hash_result]); - printk(KERN_INFO RECENT_NAME ": match(): Collision; r_list[hash_table[orig_hash_result]].hash_entry = %d\n", - r_list[hash_table[orig_hash_result]].hash_entry); - } -#endif - - r_list[hash_table[orig_hash_result]].hash_entry = hash_result; - - - temp = hash_table[orig_hash_result]; -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": match(): Collision; hash_table[hash_result] = %d\n",hash_table[hash_result]); -#endif - hash_table[orig_hash_result] = hash_table[hash_result]; - hash_table[hash_result] = temp; - temp = hash_result; - hash_result = orig_hash_result; - orig_hash_result = temp; - time_info[r_list[hash_table[orig_hash_result]].time_pos].position = hash_table[orig_hash_result]; - if(hash_table[hash_result] != -1) { - r_list[hash_table[hash_result]].hash_entry = hash_result; - time_info[r_list[hash_table[hash_result]].time_pos].position = hash_table[hash_result]; - } - -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": match(): Collision handled.\n"); -#endif - } - - if(hash_table[hash_result] == -1) { -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": match(): New table entry. (hr: %d,ha: %u)\n", - hash_result, addr); -#endif - - /* New item found and IPT_RECENT_SET, so we need to add it */ - location = time_info[curr_table->time_pos].position; - hash_table[r_list[location].hash_entry] = -1; - hash_table[hash_result] = location; - memset(r_list[location].last_pkts,0,ip_pkt_list_tot*sizeof(unsigned long)); - r_list[location].time_pos = curr_table->time_pos; - r_list[location].addr = addr; - r_list[location].ttl = ttl; - r_list[location].last_seen = now; - r_list[location].oldest_pkt = 1; - r_list[location].last_pkts[0] = now; - r_list[location].hash_entry = hash_result; - time_info[curr_table->time_pos].time = r_list[location].last_seen; - curr_table->time_pos = (curr_table->time_pos + 1) % ip_list_tot; - - ans = !info->invert; - } else { -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": match(): Existing table entry. (hr: %d,ha: %u)\n", - hash_result, - addr); -#endif - - /* Existing item found */ - location = hash_table[hash_result]; - /* We have a match on address, now to make sure it meets all requirements for a - * full match. */ - if(info->check_set & IPT_RECENT_CHECK || info->check_set & IPT_RECENT_UPDATE) { - if(!info->seconds && !info->hit_count) ans = !info->invert; else ans = info->invert; - if(info->seconds && !info->hit_count) { - if(time_before_eq(now,r_list[location].last_seen+info->seconds*HZ)) ans = !info->invert; else ans = info->invert; - } - if(info->seconds && info->hit_count) { - for(pkt_count = 0, hits_found = 0; pkt_count < ip_pkt_list_tot; pkt_count++) { - if(r_list[location].last_pkts[pkt_count] == 0) break; - if(time_before_eq(now,r_list[location].last_pkts[pkt_count]+info->seconds*HZ)) hits_found++; - } - if(hits_found >= info->hit_count) ans = !info->invert; else ans = info->invert; - } - if(info->hit_count && !info->seconds) { - for(pkt_count = 0, hits_found = 0; pkt_count < ip_pkt_list_tot; pkt_count++) { - if(r_list[location].last_pkts[pkt_count] == 0) break; - hits_found++; - } - if(hits_found >= info->hit_count) ans = !info->invert; else ans = info->invert; - } - } -#ifdef DEBUG - if(debug) { - if(ans) - printk(KERN_INFO RECENT_NAME ": match(): match addr: %u\n",addr); - else - printk(KERN_INFO RECENT_NAME ": match(): no match addr: %u\n",addr); - } -#endif - - /* If and only if we have been asked to SET, or to UPDATE (on match) do we add the - * current timestamp to the last_seen. */ - if((info->check_set & IPT_RECENT_SET && (ans = !info->invert)) || (info->check_set & IPT_RECENT_UPDATE && ans)) { -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": match(): SET or UPDATE; updating time info.\n"); -#endif - /* Have to update our time info */ - time_loc = r_list[location].time_pos; - time_info[time_loc].time = now; - time_info[time_loc].position = location; - while((time_info[(time_loc+1) % ip_list_tot].time < time_info[time_loc].time) && ((time_loc+1) % ip_list_tot) != curr_table->time_pos) { - time_temp = time_info[time_loc].time; - time_info[time_loc].time = time_info[(time_loc+1)%ip_list_tot].time; - time_info[(time_loc+1)%ip_list_tot].time = time_temp; - time_temp = time_info[time_loc].position; - time_info[time_loc].position = time_info[(time_loc+1)%ip_list_tot].position; - time_info[(time_loc+1)%ip_list_tot].position = time_temp; - r_list[time_info[time_loc].position].time_pos = time_loc; - r_list[time_info[(time_loc+1)%ip_list_tot].position].time_pos = (time_loc+1)%ip_list_tot; - time_loc = (time_loc+1) % ip_list_tot; - } - r_list[location].time_pos = time_loc; - r_list[location].ttl = ttl; - r_list[location].last_pkts[r_list[location].oldest_pkt] = now; - r_list[location].oldest_pkt = ++r_list[location].oldest_pkt % ip_pkt_list_tot; - r_list[location].last_seen = now; - } - /* If we have been asked to remove the entry from the list, just set it to 0 */ - if(info->check_set & IPT_RECENT_REMOVE) { -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": match(): REMOVE; clearing entry (or: %d, hr: %d).\n",orig_hash_result,hash_result); -#endif - /* Check if this is part of a collision chain */ - while(hash_table[(orig_hash_result+1) % ip_list_hash_size] != -1) { - orig_hash_result++; - if(hash_func(r_list[hash_table[orig_hash_result]].addr,ip_list_hash_size) == hash_result) { - /* Found collision chain, how deep does this rabbit hole go? */ -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": match(): REMOVE; found collision chain.\n"); -#endif - end_collision_chain = orig_hash_result; - } - } - if(end_collision_chain != -1) { -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": match(): REMOVE; part of collision chain, moving to end.\n"); -#endif - /* Part of a collision chain, swap it with the end of the chain - * before removing. */ - r_list[hash_table[end_collision_chain]].hash_entry = hash_result; - temp = hash_table[end_collision_chain]; - hash_table[end_collision_chain] = hash_table[hash_result]; - hash_table[hash_result] = temp; - time_info[r_list[hash_table[hash_result]].time_pos].position = hash_table[hash_result]; - hash_result = end_collision_chain; - r_list[hash_table[hash_result]].hash_entry = hash_result; - time_info[r_list[hash_table[hash_result]].time_pos].position = hash_table[hash_result]; - } - location = hash_table[hash_result]; - hash_table[r_list[location].hash_entry] = -1; - time_loc = r_list[location].time_pos; - time_info[time_loc].time = 0; - time_info[time_loc].position = location; - while((time_info[(time_loc+1) % ip_list_tot].time < time_info[time_loc].time) && ((time_loc+1) % ip_list_tot) != curr_table->time_pos) { - time_temp = time_info[time_loc].time; - time_info[time_loc].time = time_info[(time_loc+1)%ip_list_tot].time; - time_info[(time_loc+1)%ip_list_tot].time = time_temp; - time_temp = time_info[time_loc].position; - time_info[time_loc].position = time_info[(time_loc+1)%ip_list_tot].position; - time_info[(time_loc+1)%ip_list_tot].position = time_temp; - r_list[time_info[time_loc].position].time_pos = time_loc; - r_list[time_info[(time_loc+1)%ip_list_tot].position].time_pos = (time_loc+1)%ip_list_tot; - time_loc = (time_loc+1) % ip_list_tot; - } - r_list[location].time_pos = time_loc; - r_list[location].last_seen = 0; - r_list[location].addr = 0; - r_list[location].ttl = 0; - memset(r_list[location].last_pkts,0,ip_pkt_list_tot*sizeof(unsigned long)); - r_list[location].oldest_pkt = 0; - ans = !info->invert; - } - spin_unlock_bh(&curr_table->list_lock); - return ans; - } - - spin_unlock_bh(&curr_table->list_lock); -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": match() left.\n"); -#endif - return ans; -} - -/* This function is to verify that the rule given during the userspace iptables - * command is correct. - * If the command is valid then we check if the table name referred to by the - * rule exists, if not it is created. - */ -static int -checkentry(const char *tablename, - const void *ip, - void *matchinfo, - unsigned int matchsize, - unsigned int hook_mask) -{ - int flag = 0, c; - unsigned long *hold; - const struct ipt_recent_info *info = matchinfo; - struct recent_ip_tables *curr_table, *find_table, *last_table; - -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": checkentry() entered.\n"); -#endif - - if (matchsize != IPT_ALIGN(sizeof(struct ipt_recent_info))) return 0; - - /* seconds and hit_count only valid for CHECK/UPDATE */ - if(info->check_set & IPT_RECENT_SET) { flag++; if(info->seconds || info->hit_count) return 0; } - if(info->check_set & IPT_RECENT_REMOVE) { flag++; if(info->seconds || info->hit_count) return 0; } - if(info->check_set & IPT_RECENT_CHECK) flag++; - if(info->check_set & IPT_RECENT_UPDATE) flag++; - - /* One and only one of these should ever be set */ - if(flag != 1) return 0; - - /* Name must be set to something */ - if(!info->name || !info->name[0]) return 0; - - /* Things look good, create a list for this if it does not exist */ - /* Lock the linked list while we play with it */ - spin_lock_bh(&recent_lock); - - /* Look for an entry with this name already created */ - /* Finds the end of the list and the entry before the end if current name does not exist */ - find_table = r_tables; - while( (last_table = find_table) && strncmp(info->name,find_table->name,IPT_RECENT_NAME_LEN) && (find_table = find_table->next) ); - - /* If a table already exists just increment the count on that table and return */ - if(find_table) { -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": checkentry: table found (%s), incrementing count.\n",info->name); -#endif - find_table->count++; - spin_unlock_bh(&recent_lock); - return 1; - } - - spin_unlock_bh(&recent_lock); - - /* Table with this name not found */ - /* Allocate memory for new linked list item */ - -#ifdef DEBUG - if(debug) { - printk(KERN_INFO RECENT_NAME ": checkentry: no table found (%s)\n",info->name); - printk(KERN_INFO RECENT_NAME ": checkentry: Allocationg %d for link-list entry.\n",sizeof(struct recent_ip_tables)); - } -#endif - - curr_table = vmalloc(sizeof(struct recent_ip_tables)); - if(curr_table == NULL) return 0; - - spin_lock_init(&curr_table->list_lock); - curr_table->next = NULL; - curr_table->count = 1; - curr_table->time_pos = 0; - strncpy(curr_table->name,info->name,IPT_RECENT_NAME_LEN); - curr_table->name[IPT_RECENT_NAME_LEN-1] = '\0'; - - /* Allocate memory for this table and the list of packets in each entry. */ -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": checkentry: Allocating %d for table (%s).\n", - sizeof(struct recent_ip_list)*ip_list_tot, - info->name); -#endif - - curr_table->table = vmalloc(sizeof(struct recent_ip_list)*ip_list_tot); - if(curr_table->table == NULL) { vfree(curr_table); return 0; } - memset(curr_table->table,0,sizeof(struct recent_ip_list)*ip_list_tot); -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": checkentry: Allocating %d for pkt_list.\n", - sizeof(unsigned long)*ip_pkt_list_tot*ip_list_tot); -#endif - - hold = vmalloc(sizeof(unsigned long)*ip_pkt_list_tot*ip_list_tot); -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": checkentry: After pkt_list allocation.\n"); -#endif - if(hold == NULL) { - printk(KERN_INFO RECENT_NAME ": checkentry: unable to allocate for pkt_list.\n"); - vfree(curr_table->table); - vfree(curr_table); - return 0; - } - for(c = 0; c < ip_list_tot; c++) { - curr_table->table[c].last_pkts = hold + c*ip_pkt_list_tot; - } - - /* Allocate memory for the hash table */ -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": checkentry: Allocating %d for hash_table.\n", - sizeof(int)*ip_list_hash_size); -#endif - - curr_table->hash_table = vmalloc(sizeof(int)*ip_list_hash_size); - if(!curr_table->hash_table) { - printk(KERN_INFO RECENT_NAME ": checkentry: unable to allocate for hash_table.\n"); - vfree(hold); - vfree(curr_table->table); - vfree(curr_table); - return 0; - } - - for(c = 0; c < ip_list_hash_size; c++) { - curr_table->hash_table[c] = -1; - } - - /* Allocate memory for the time info */ -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": checkentry: Allocating %d for time_info.\n", - sizeof(struct time_info_list)*ip_list_tot); -#endif - - curr_table->time_info = vmalloc(sizeof(struct time_info_list)*ip_list_tot); - if(!curr_table->time_info) { - printk(KERN_INFO RECENT_NAME ": checkentry: unable to allocate for time_info.\n"); - vfree(curr_table->hash_table); - vfree(hold); - vfree(curr_table->table); - vfree(curr_table); - return 0; - } - for(c = 0; c < ip_list_tot; c++) { - curr_table->time_info[c].position = c; - curr_table->time_info[c].time = 0; - } - - /* Put the new table in place */ - spin_lock_bh(&recent_lock); - find_table = r_tables; - while( (last_table = find_table) && strncmp(info->name,find_table->name,IPT_RECENT_NAME_LEN) && (find_table = find_table->next) ); - - /* If a table already exists just increment the count on that table and return */ - if(find_table) { - find_table->count++; - spin_unlock_bh(&recent_lock); -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": checkentry: table found (%s), created by other process.\n",info->name); -#endif - vfree(curr_table->time_info); - vfree(curr_table->hash_table); - vfree(hold); - vfree(curr_table->table); - vfree(curr_table); - return 1; - } - if(!last_table) r_tables = curr_table; else last_table->next = curr_table; - - spin_unlock_bh(&recent_lock); - -#ifdef CONFIG_PROC_FS - /* Create our proc 'status' entry. */ - curr_table->status_proc = create_proc_entry(curr_table->name, ip_list_perms, proc_net_ipt_recent); - if (!curr_table->status_proc) { - printk(KERN_INFO RECENT_NAME ": checkentry: unable to allocate for /proc entry.\n"); - /* Destroy the created table */ - spin_lock_bh(&recent_lock); - last_table = NULL; - curr_table = r_tables; - if(!curr_table) { -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": checkentry() create_proc failed, no tables.\n"); -#endif - spin_unlock_bh(&recent_lock); - return 0; - } - while( strncmp(info->name,curr_table->name,IPT_RECENT_NAME_LEN) && (last_table = curr_table) && (curr_table = curr_table->next) ); - if(!curr_table) { -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": checkentry() create_proc failed, table already destroyed.\n"); -#endif - spin_unlock_bh(&recent_lock); - return 0; - } - if(last_table) last_table->next = curr_table->next; else r_tables = curr_table->next; - spin_unlock_bh(&recent_lock); - vfree(curr_table->time_info); - vfree(curr_table->hash_table); - vfree(hold); - vfree(curr_table->table); - vfree(curr_table); - return 0; - } - - curr_table->status_proc->owner = THIS_MODULE; - curr_table->status_proc->data = curr_table; - wmb(); - curr_table->status_proc->read_proc = ip_recent_get_info; - curr_table->status_proc->write_proc = ip_recent_ctrl; -#endif /* CONFIG_PROC_FS */ - -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": checkentry() left.\n"); -#endif - - return 1; -} - -/* This function is called in the event that a rule matching this module is - * removed. - * When this happens we need to check if there are no other rules matching - * the table given. If that is the case then we remove the table and clean - * up its memory. - */ -static void -destroy(void *matchinfo, unsigned int matchsize) -{ - const struct ipt_recent_info *info = matchinfo; - struct recent_ip_tables *curr_table, *last_table; - -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": destroy() entered.\n"); -#endif - - if(matchsize != IPT_ALIGN(sizeof(struct ipt_recent_info))) return; - - /* Lock the linked list while we play with it */ - spin_lock_bh(&recent_lock); - - /* Look for an entry with this name already created */ - /* Finds the end of the list and the entry before the end if current name does not exist */ - last_table = NULL; - curr_table = r_tables; - if(!curr_table) { -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": destroy() No tables found, leaving.\n"); -#endif - spin_unlock_bh(&recent_lock); - return; - } - while( strncmp(info->name,curr_table->name,IPT_RECENT_NAME_LEN) && (last_table = curr_table) && (curr_table = curr_table->next) ); - - /* If a table does not exist then do nothing and return */ - if(!curr_table) { -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": destroy() table not found, leaving.\n"); -#endif - spin_unlock_bh(&recent_lock); - return; - } - - curr_table->count--; - - /* If count is still non-zero then there are still rules referenceing it so we do nothing */ - if(curr_table->count) { -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": destroy() table found, non-zero count, leaving.\n"); -#endif - spin_unlock_bh(&recent_lock); - return; - } - -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": destroy() table found, zero count, removing.\n"); -#endif - - /* Count must be zero so we remove this table from the list */ - if(last_table) last_table->next = curr_table->next; else r_tables = curr_table->next; - - spin_unlock_bh(&recent_lock); - - /* lock to make sure any late-runners still using this after we removed it from - * the list finish up then remove everything */ - spin_lock_bh(&curr_table->list_lock); - spin_unlock_bh(&curr_table->list_lock); - -#ifdef CONFIG_PROC_FS - if(curr_table->status_proc) remove_proc_entry(curr_table->name,proc_net_ipt_recent); -#endif /* CONFIG_PROC_FS */ - vfree(curr_table->table[0].last_pkts); - vfree(curr_table->table); - vfree(curr_table->hash_table); - vfree(curr_table->time_info); - vfree(curr_table); - -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": destroy() left.\n"); -#endif - - return; -} - -/* This is the structure we pass to ipt_register to register our - * module with iptables. - */ -static struct ipt_match recent_match = { - .name = "recent", - .match = &match, - .checkentry = &checkentry, - .destroy = &destroy, - .me = THIS_MODULE -}; - -/* Kernel module initialization. */ -static int __init init(void) -{ - int err, count; - - printk(version); -#ifdef CONFIG_PROC_FS - proc_net_ipt_recent = proc_mkdir("ipt_recent",proc_net); - if(!proc_net_ipt_recent) return -ENOMEM; -#endif - - if(ip_list_hash_size && ip_list_hash_size <= ip_list_tot) { - printk(KERN_WARNING RECENT_NAME ": ip_list_hash_size too small, resetting to default.\n"); - ip_list_hash_size = 0; - } - - if(!ip_list_hash_size) { - ip_list_hash_size = ip_list_tot*3; - count = 2*2; - while(ip_list_hash_size > count) count = count*2; - ip_list_hash_size = count; - } - -#ifdef DEBUG - if(debug) printk(KERN_INFO RECENT_NAME ": ip_list_hash_size: %d\n",ip_list_hash_size); -#endif - - err = ipt_register_match(&recent_match); - if (err) - remove_proc_entry("ipt_recent", proc_net); - return err; -} - -/* Kernel module destruction. */ -static void __exit fini(void) -{ - ipt_unregister_match(&recent_match); - - remove_proc_entry("ipt_recent",proc_net); -} - -/* Register our module with the kernel. */ -module_init(init); -module_exit(fini); diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c new file mode 100644 index 00000000000..4bfaedf9b34 --- /dev/null +++ b/net/ipv4/netfilter/ipt_rpfilter.c @@ -0,0 +1,144 @@ +/* + * Copyright (c) 2011 Florian Westphal <fw@strlen.de> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * based on fib_frontend.c; Author: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/netdevice.h> +#include <linux/ip.h> +#include <net/ip.h> +#include <net/ip_fib.h> +#include <net/route.h> + +#include <linux/netfilter/xt_rpfilter.h> +#include <linux/netfilter/x_tables.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Florian Westphal <fw@strlen.de>"); +MODULE_DESCRIPTION("iptables: ipv4 reverse path filter match"); + +/* don't try to find route from mcast/bcast/zeronet */ +static __be32 rpfilter_get_saddr(__be32 addr) +{ + if (ipv4_is_multicast(addr) || ipv4_is_lbcast(addr) || + ipv4_is_zeronet(addr)) + return 0; + return addr; +} + +static bool rpfilter_lookup_reverse(struct flowi4 *fl4, + const struct net_device *dev, u8 flags) +{ + struct fib_result res; + bool dev_match; + struct net *net = dev_net(dev); + int ret __maybe_unused; + + if (fib_lookup(net, fl4, &res)) + return false; + + if (res.type != RTN_UNICAST) { + if (res.type != RTN_LOCAL || !(flags & XT_RPFILTER_ACCEPT_LOCAL)) + return false; + } + dev_match = false; +#ifdef CONFIG_IP_ROUTE_MULTIPATH + for (ret = 0; ret < res.fi->fib_nhs; ret++) { + struct fib_nh *nh = &res.fi->fib_nh[ret]; + + if (nh->nh_dev == dev) { + dev_match = true; + break; + } + } +#else + if (FIB_RES_DEV(res) == dev) + dev_match = true; +#endif + if (dev_match || flags & XT_RPFILTER_LOOSE) + return FIB_RES_NH(res).nh_scope <= RT_SCOPE_HOST; + return dev_match; +} + +static bool rpfilter_is_local(const struct sk_buff *skb) +{ + const struct rtable *rt = skb_rtable(skb); + return rt && (rt->rt_flags & RTCF_LOCAL); +} + +static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_rpfilter_info *info; + const struct iphdr *iph; + struct flowi4 flow; + bool invert; + + info = par->matchinfo; + invert = info->flags & XT_RPFILTER_INVERT; + + if (rpfilter_is_local(skb)) + return true ^ invert; + + iph = ip_hdr(skb); + if (ipv4_is_multicast(iph->daddr)) { + if (ipv4_is_zeronet(iph->saddr)) + return ipv4_is_local_multicast(iph->daddr) ^ invert; + } + flow.flowi4_iif = LOOPBACK_IFINDEX; + flow.daddr = iph->saddr; + flow.saddr = rpfilter_get_saddr(iph->daddr); + flow.flowi4_oif = 0; + flow.flowi4_mark = info->flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0; + flow.flowi4_tos = RT_TOS(iph->tos); + flow.flowi4_scope = RT_SCOPE_UNIVERSE; + + return rpfilter_lookup_reverse(&flow, par->in, info->flags) ^ invert; +} + +static int rpfilter_check(const struct xt_mtchk_param *par) +{ + const struct xt_rpfilter_info *info = par->matchinfo; + unsigned int options = ~XT_RPFILTER_OPTION_MASK; + if (info->flags & options) { + pr_info("unknown options encountered"); + return -EINVAL; + } + + if (strcmp(par->table, "mangle") != 0 && + strcmp(par->table, "raw") != 0) { + pr_info("match only valid in the \'raw\' " + "or \'mangle\' tables, not \'%s\'.\n", par->table); + return -EINVAL; + } + + return 0; +} + +static struct xt_match rpfilter_mt_reg __read_mostly = { + .name = "rpfilter", + .family = NFPROTO_IPV4, + .checkentry = rpfilter_check, + .match = rpfilter_mt, + .matchsize = sizeof(struct xt_rpfilter_info), + .hooks = (1 << NF_INET_PRE_ROUTING), + .me = THIS_MODULE +}; + +static int __init rpfilter_mt_init(void) +{ + return xt_register_match(&rpfilter_mt_reg); +} + +static void __exit rpfilter_mt_exit(void) +{ + xt_unregister_match(&rpfilter_mt_reg); +} + +module_init(rpfilter_mt_init); +module_exit(rpfilter_mt_exit); diff --git a/net/ipv4/netfilter/ipt_tos.c b/net/ipv4/netfilter/ipt_tos.c deleted file mode 100644 index 9ab765e126f..00000000000 --- a/net/ipv4/netfilter/ipt_tos.c +++ /dev/null @@ -1,65 +0,0 @@ -/* Kernel module to match TOS values. */ - -/* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/module.h> -#include <linux/skbuff.h> - -#include <linux/netfilter_ipv4/ipt_tos.h> -#include <linux/netfilter_ipv4/ip_tables.h> - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("iptables TOS match module"); - -static int -match(const struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - const void *matchinfo, - int offset, - unsigned int protoff, - int *hotdrop) -{ - const struct ipt_tos_info *info = matchinfo; - - return (skb->nh.iph->tos == info->tos) ^ info->invert; -} - -static int -checkentry(const char *tablename, - const void *ip, - void *matchinfo, - unsigned int matchsize, - unsigned int hook_mask) -{ - if (matchsize != IPT_ALIGN(sizeof(struct ipt_tos_info))) - return 0; - - return 1; -} - -static struct ipt_match tos_match = { - .name = "tos", - .match = &match, - .checkentry = &checkentry, - .me = THIS_MODULE, -}; - -static int __init init(void) -{ - return ipt_register_match(&tos_match); -} - -static void __exit fini(void) -{ - ipt_unregister_match(&tos_match); -} - -module_init(init); -module_exit(fini); diff --git a/net/ipv4/netfilter/ipt_ttl.c b/net/ipv4/netfilter/ipt_ttl.c deleted file mode 100644 index 82da53f430a..00000000000 --- a/net/ipv4/netfilter/ipt_ttl.c +++ /dev/null @@ -1,79 +0,0 @@ -/* IP tables module for matching the value of the TTL - * - * ipt_ttl.c,v 1.5 2000/11/13 11:16:08 laforge Exp - * - * (C) 2000,2001 by Harald Welte <laforge@netfilter.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/module.h> -#include <linux/skbuff.h> - -#include <linux/netfilter_ipv4/ipt_ttl.h> -#include <linux/netfilter_ipv4/ip_tables.h> - -MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); -MODULE_DESCRIPTION("IP tables TTL matching module"); -MODULE_LICENSE("GPL"); - -static int match(const struct sk_buff *skb, const struct net_device *in, - const struct net_device *out, const void *matchinfo, - int offset, unsigned int protoff, int *hotdrop) -{ - const struct ipt_ttl_info *info = matchinfo; - - switch (info->mode) { - case IPT_TTL_EQ: - return (skb->nh.iph->ttl == info->ttl); - break; - case IPT_TTL_NE: - return (!(skb->nh.iph->ttl == info->ttl)); - break; - case IPT_TTL_LT: - return (skb->nh.iph->ttl < info->ttl); - break; - case IPT_TTL_GT: - return (skb->nh.iph->ttl > info->ttl); - break; - default: - printk(KERN_WARNING "ipt_ttl: unknown mode %d\n", - info->mode); - return 0; - } - - return 0; -} - -static int checkentry(const char *tablename, const void *ip, - void *matchinfo, unsigned int matchsize, - unsigned int hook_mask) -{ - if (matchsize != IPT_ALIGN(sizeof(struct ipt_ttl_info))) - return 0; - - return 1; -} - -static struct ipt_match ttl_match = { - .name = "ttl", - .match = &match, - .checkentry = &checkentry, - .me = THIS_MODULE, -}; - -static int __init init(void) -{ - return ipt_register_match(&ttl_match); -} - -static void __exit fini(void) -{ - ipt_unregister_match(&ttl_match); - -} - -module_init(init); -module_exit(fini); diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index 212a3079085..e08a74a243a 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -13,183 +13,99 @@ #include <linux/module.h> #include <linux/moduleparam.h> #include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/slab.h> +#include <net/ip.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); MODULE_DESCRIPTION("iptables filter table"); -#define FILTER_VALID_HOOKS ((1 << NF_IP_LOCAL_IN) | (1 << NF_IP_FORWARD) | (1 << NF_IP_LOCAL_OUT)) +#define FILTER_VALID_HOOKS ((1 << NF_INET_LOCAL_IN) | \ + (1 << NF_INET_FORWARD) | \ + (1 << NF_INET_LOCAL_OUT)) -static struct -{ - struct ipt_replace repl; - struct ipt_standard entries[3]; - struct ipt_error term; -} initial_table __initdata -= { { "filter", FILTER_VALID_HOOKS, 4, - sizeof(struct ipt_standard) * 3 + sizeof(struct ipt_error), - { [NF_IP_LOCAL_IN] = 0, - [NF_IP_FORWARD] = sizeof(struct ipt_standard), - [NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) * 2 }, - { [NF_IP_LOCAL_IN] = 0, - [NF_IP_FORWARD] = sizeof(struct ipt_standard), - [NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) * 2 }, - 0, NULL, { } }, - { - /* LOCAL_IN */ - { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 }, - 0, - sizeof(struct ipt_entry), - sizeof(struct ipt_standard), - 0, { 0, 0 }, { } }, - { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } }, - -NF_ACCEPT - 1 } }, - /* FORWARD */ - { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 }, - 0, - sizeof(struct ipt_entry), - sizeof(struct ipt_standard), - 0, { 0, 0 }, { } }, - { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } }, - -NF_ACCEPT - 1 } }, - /* LOCAL_OUT */ - { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 }, - 0, - sizeof(struct ipt_entry), - sizeof(struct ipt_standard), - 0, { 0, 0 }, { } }, - { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } }, - -NF_ACCEPT - 1 } } - }, - /* ERROR */ - { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 }, - 0, - sizeof(struct ipt_entry), - sizeof(struct ipt_error), - 0, { 0, 0 }, { } }, - { { { { IPT_ALIGN(sizeof(struct ipt_error_target)), IPT_ERROR_TARGET } }, - { } }, - "ERROR" - } - } -}; - -static struct ipt_table packet_filter = { +static const struct xt_table packet_filter = { .name = "filter", .valid_hooks = FILTER_VALID_HOOKS, - .lock = RW_LOCK_UNLOCKED, .me = THIS_MODULE, - .af = AF_INET, + .af = NFPROTO_IPV4, + .priority = NF_IP_PRI_FILTER, }; -/* The work comes in here from netfilter.c. */ static unsigned int -ipt_hook(unsigned int hook, - struct sk_buff **pskb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) +iptable_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, + const struct net_device *in, const struct net_device *out, + int (*okfn)(struct sk_buff *)) { - return ipt_do_table(pskb, hook, in, out, &packet_filter, NULL); -} + const struct net *net; -static unsigned int -ipt_local_out_hook(unsigned int hook, - struct sk_buff **pskb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - /* root is playing with raw sockets. */ - if ((*pskb)->len < sizeof(struct iphdr) - || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) { - if (net_ratelimit()) - printk("ipt_hook: happy cracking.\n"); + if (ops->hooknum == NF_INET_LOCAL_OUT && + (skb->len < sizeof(struct iphdr) || + ip_hdrlen(skb) < sizeof(struct iphdr))) + /* root is playing with raw sockets. */ return NF_ACCEPT; - } - return ipt_do_table(pskb, hook, in, out, &packet_filter, NULL); + net = dev_net((in != NULL) ? in : out); + return ipt_do_table(skb, ops->hooknum, in, out, + net->ipv4.iptable_filter); } -static struct nf_hook_ops ipt_ops[] = { - { - .hook = ipt_hook, - .owner = THIS_MODULE, - .pf = PF_INET, - .hooknum = NF_IP_LOCAL_IN, - .priority = NF_IP_PRI_FILTER, - }, - { - .hook = ipt_hook, - .owner = THIS_MODULE, - .pf = PF_INET, - .hooknum = NF_IP_FORWARD, - .priority = NF_IP_PRI_FILTER, - }, - { - .hook = ipt_local_out_hook, - .owner = THIS_MODULE, - .pf = PF_INET, - .hooknum = NF_IP_LOCAL_OUT, - .priority = NF_IP_PRI_FILTER, - }, -}; +static struct nf_hook_ops *filter_ops __read_mostly; /* Default to forward because I got too much mail already. */ -static int forward = NF_ACCEPT; +static bool forward = true; module_param(forward, bool, 0000); -static int __init init(void) +static int __net_init iptable_filter_net_init(struct net *net) { - int ret; - - if (forward < 0 || forward > NF_MAX_VERDICT) { - printk("iptables forward must be 0 or 1\n"); - return -EINVAL; - } + struct ipt_replace *repl; + repl = ipt_alloc_initial_table(&packet_filter); + if (repl == NULL) + return -ENOMEM; /* Entry 1 is the FORWARD hook */ - initial_table.entries[1].target.verdict = -forward - 1; + ((struct ipt_standard *)repl->entries)[1].target.verdict = + forward ? -NF_ACCEPT - 1 : -NF_DROP - 1; - /* Register table */ - ret = ipt_register_table(&packet_filter, &initial_table.repl); - if (ret < 0) - return ret; + net->ipv4.iptable_filter = + ipt_register_table(net, &packet_filter, repl); + kfree(repl); + return PTR_ERR_OR_ZERO(net->ipv4.iptable_filter); +} - /* Register hooks */ - ret = nf_register_hook(&ipt_ops[0]); - if (ret < 0) - goto cleanup_table; +static void __net_exit iptable_filter_net_exit(struct net *net) +{ + ipt_unregister_table(net, net->ipv4.iptable_filter); +} - ret = nf_register_hook(&ipt_ops[1]); - if (ret < 0) - goto cleanup_hook0; +static struct pernet_operations iptable_filter_net_ops = { + .init = iptable_filter_net_init, + .exit = iptable_filter_net_exit, +}; - ret = nf_register_hook(&ipt_ops[2]); - if (ret < 0) - goto cleanup_hook1; +static int __init iptable_filter_init(void) +{ + int ret; - return ret; + ret = register_pernet_subsys(&iptable_filter_net_ops); + if (ret < 0) + return ret; - cleanup_hook1: - nf_unregister_hook(&ipt_ops[1]); - cleanup_hook0: - nf_unregister_hook(&ipt_ops[0]); - cleanup_table: - ipt_unregister_table(&packet_filter); + /* Register hooks */ + filter_ops = xt_hook_link(&packet_filter, iptable_filter_hook); + if (IS_ERR(filter_ops)) { + ret = PTR_ERR(filter_ops); + unregister_pernet_subsys(&iptable_filter_net_ops); + } return ret; } -static void __exit fini(void) +static void __exit iptable_filter_fini(void) { - unsigned int i; - - for (i = 0; i < sizeof(ipt_ops)/sizeof(struct nf_hook_ops); i++) - nf_unregister_hook(&ipt_ops[i]); - - ipt_unregister_table(&packet_filter); + xt_hook_unlink(&packet_filter, filter_ops); + unregister_pernet_subsys(&iptable_filter_net_ops); } -module_init(init); -module_exit(fini); +module_init(iptable_filter_init); +module_exit(iptable_filter_fini); diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index 3212a5cc4b6..6a5079c34bb 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -7,255 +7,142 @@ * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. - * - * Extended to all five netfilter hooks by Brad Chapman & Harald Welte */ -#include <linux/config.h> #include <linux/module.h> #include <linux/netfilter_ipv4/ip_tables.h> #include <linux/netdevice.h> #include <linux/skbuff.h> +#include <linux/slab.h> #include <net/sock.h> #include <net/route.h> #include <linux/ip.h> +#include <net/ip.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); MODULE_DESCRIPTION("iptables mangle table"); -#define MANGLE_VALID_HOOKS ((1 << NF_IP_PRE_ROUTING) | \ - (1 << NF_IP_LOCAL_IN) | \ - (1 << NF_IP_FORWARD) | \ - (1 << NF_IP_LOCAL_OUT) | \ - (1 << NF_IP_POST_ROUTING)) +#define MANGLE_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | \ + (1 << NF_INET_LOCAL_IN) | \ + (1 << NF_INET_FORWARD) | \ + (1 << NF_INET_LOCAL_OUT) | \ + (1 << NF_INET_POST_ROUTING)) -/* Ouch - five different hooks? Maybe this should be a config option..... -- BC */ -static struct -{ - struct ipt_replace repl; - struct ipt_standard entries[5]; - struct ipt_error term; -} initial_table __initdata -= { { "mangle", MANGLE_VALID_HOOKS, 6, - sizeof(struct ipt_standard) * 5 + sizeof(struct ipt_error), - { [NF_IP_PRE_ROUTING] = 0, - [NF_IP_LOCAL_IN] = sizeof(struct ipt_standard), - [NF_IP_FORWARD] = sizeof(struct ipt_standard) * 2, - [NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) * 3, - [NF_IP_POST_ROUTING] = sizeof(struct ipt_standard) * 4 }, - { [NF_IP_PRE_ROUTING] = 0, - [NF_IP_LOCAL_IN] = sizeof(struct ipt_standard), - [NF_IP_FORWARD] = sizeof(struct ipt_standard) * 2, - [NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) * 3, - [NF_IP_POST_ROUTING] = sizeof(struct ipt_standard) * 4 }, - 0, NULL, { } }, - { - /* PRE_ROUTING */ - { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 }, - 0, - sizeof(struct ipt_entry), - sizeof(struct ipt_standard), - 0, { 0, 0 }, { } }, - { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } }, - -NF_ACCEPT - 1 } }, - /* LOCAL_IN */ - { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 }, - 0, - sizeof(struct ipt_entry), - sizeof(struct ipt_standard), - 0, { 0, 0 }, { } }, - { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } }, - -NF_ACCEPT - 1 } }, - /* FORWARD */ - { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 }, - 0, - sizeof(struct ipt_entry), - sizeof(struct ipt_standard), - 0, { 0, 0 }, { } }, - { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } }, - -NF_ACCEPT - 1 } }, - /* LOCAL_OUT */ - { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 }, - 0, - sizeof(struct ipt_entry), - sizeof(struct ipt_standard), - 0, { 0, 0 }, { } }, - { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } }, - -NF_ACCEPT - 1 } }, - /* POST_ROUTING */ - { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 }, - 0, - sizeof(struct ipt_entry), - sizeof(struct ipt_standard), - 0, { 0, 0 }, { } }, - { { { { IPT_ALIGN(sizeof(struct ipt_standard_target)), "" } }, { } }, - -NF_ACCEPT - 1 } }, - }, - /* ERROR */ - { { { { 0 }, { 0 }, { 0 }, { 0 }, "", "", { 0 }, { 0 }, 0, 0, 0 }, - 0, - sizeof(struct ipt_entry), - sizeof(struct ipt_error), - 0, { 0, 0 }, { } }, - { { { { IPT_ALIGN(sizeof(struct ipt_error_target)), IPT_ERROR_TARGET } }, - { } }, - "ERROR" - } - } -}; - -static struct ipt_table packet_mangler = { +static const struct xt_table packet_mangler = { .name = "mangle", .valid_hooks = MANGLE_VALID_HOOKS, - .lock = RW_LOCK_UNLOCKED, .me = THIS_MODULE, - .af = AF_INET, + .af = NFPROTO_IPV4, + .priority = NF_IP_PRI_MANGLE, }; -/* The work comes in here from netfilter.c. */ -static unsigned int -ipt_route_hook(unsigned int hook, - struct sk_buff **pskb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - return ipt_do_table(pskb, hook, in, out, &packet_mangler, NULL); -} - static unsigned int -ipt_local_hook(unsigned int hook, - struct sk_buff **pskb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) +ipt_mangle_out(struct sk_buff *skb, const struct net_device *out) { unsigned int ret; + const struct iphdr *iph; u_int8_t tos; - u_int32_t saddr, daddr; - unsigned long nfmark; + __be32 saddr, daddr; + u_int32_t mark; + int err; /* root is playing with raw sockets. */ - if ((*pskb)->len < sizeof(struct iphdr) - || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) { - if (net_ratelimit()) - printk("ipt_hook: happy cracking.\n"); + if (skb->len < sizeof(struct iphdr) || + ip_hdrlen(skb) < sizeof(struct iphdr)) return NF_ACCEPT; - } /* Save things which could affect route */ - nfmark = (*pskb)->nfmark; - saddr = (*pskb)->nh.iph->saddr; - daddr = (*pskb)->nh.iph->daddr; - tos = (*pskb)->nh.iph->tos; - - ret = ipt_do_table(pskb, hook, in, out, &packet_mangler, NULL); + mark = skb->mark; + iph = ip_hdr(skb); + saddr = iph->saddr; + daddr = iph->daddr; + tos = iph->tos; + + ret = ipt_do_table(skb, NF_INET_LOCAL_OUT, NULL, out, + dev_net(out)->ipv4.iptable_mangle); /* Reroute for ANY change. */ - if (ret != NF_DROP && ret != NF_STOLEN && ret != NF_QUEUE - && ((*pskb)->nh.iph->saddr != saddr - || (*pskb)->nh.iph->daddr != daddr -#ifdef CONFIG_IP_ROUTE_FWMARK - || (*pskb)->nfmark != nfmark -#endif - || (*pskb)->nh.iph->tos != tos)) - return ip_route_me_harder(pskb) == 0 ? ret : NF_DROP; + if (ret != NF_DROP && ret != NF_STOLEN) { + iph = ip_hdr(skb); + + if (iph->saddr != saddr || + iph->daddr != daddr || + skb->mark != mark || + iph->tos != tos) { + err = ip_route_me_harder(skb, RTN_UNSPEC); + if (err < 0) + ret = NF_DROP_ERR(err); + } + } return ret; } -static struct nf_hook_ops ipt_ops[] = { - { - .hook = ipt_route_hook, - .owner = THIS_MODULE, - .pf = PF_INET, - .hooknum = NF_IP_PRE_ROUTING, - .priority = NF_IP_PRI_MANGLE, - }, - { - .hook = ipt_route_hook, - .owner = THIS_MODULE, - .pf = PF_INET, - .hooknum = NF_IP_LOCAL_IN, - .priority = NF_IP_PRI_MANGLE, - }, - { - .hook = ipt_route_hook, - .owner = THIS_MODULE, - .pf = PF_INET, - .hooknum = NF_IP_FORWARD, - .priority = NF_IP_PRI_MANGLE, - }, - { - .hook = ipt_local_hook, - .owner = THIS_MODULE, - .pf = PF_INET, - .hooknum = NF_IP_LOCAL_OUT, - .priority = NF_IP_PRI_MANGLE, - }, - { - .hook = ipt_route_hook, - .owner = THIS_MODULE, - .pf = PF_INET, - .hooknum = NF_IP_POST_ROUTING, - .priority = NF_IP_PRI_MANGLE, - }, -}; - -static int __init init(void) +/* The work comes in here from netfilter.c. */ +static unsigned int +iptable_mangle_hook(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) { - int ret; + if (ops->hooknum == NF_INET_LOCAL_OUT) + return ipt_mangle_out(skb, out); + if (ops->hooknum == NF_INET_POST_ROUTING) + return ipt_do_table(skb, ops->hooknum, in, out, + dev_net(out)->ipv4.iptable_mangle); + /* PREROUTING/INPUT/FORWARD: */ + return ipt_do_table(skb, ops->hooknum, in, out, + dev_net(in)->ipv4.iptable_mangle); +} - /* Register table */ - ret = ipt_register_table(&packet_mangler, &initial_table.repl); - if (ret < 0) - return ret; +static struct nf_hook_ops *mangle_ops __read_mostly; - /* Register hooks */ - ret = nf_register_hook(&ipt_ops[0]); - if (ret < 0) - goto cleanup_table; +static int __net_init iptable_mangle_net_init(struct net *net) +{ + struct ipt_replace *repl; + + repl = ipt_alloc_initial_table(&packet_mangler); + if (repl == NULL) + return -ENOMEM; + net->ipv4.iptable_mangle = + ipt_register_table(net, &packet_mangler, repl); + kfree(repl); + return PTR_ERR_OR_ZERO(net->ipv4.iptable_mangle); +} - ret = nf_register_hook(&ipt_ops[1]); - if (ret < 0) - goto cleanup_hook0; +static void __net_exit iptable_mangle_net_exit(struct net *net) +{ + ipt_unregister_table(net, net->ipv4.iptable_mangle); +} - ret = nf_register_hook(&ipt_ops[2]); - if (ret < 0) - goto cleanup_hook1; +static struct pernet_operations iptable_mangle_net_ops = { + .init = iptable_mangle_net_init, + .exit = iptable_mangle_net_exit, +}; - ret = nf_register_hook(&ipt_ops[3]); - if (ret < 0) - goto cleanup_hook2; +static int __init iptable_mangle_init(void) +{ + int ret; - ret = nf_register_hook(&ipt_ops[4]); + ret = register_pernet_subsys(&iptable_mangle_net_ops); if (ret < 0) - goto cleanup_hook3; - - return ret; + return ret; - cleanup_hook3: - nf_unregister_hook(&ipt_ops[3]); - cleanup_hook2: - nf_unregister_hook(&ipt_ops[2]); - cleanup_hook1: - nf_unregister_hook(&ipt_ops[1]); - cleanup_hook0: - nf_unregister_hook(&ipt_ops[0]); - cleanup_table: - ipt_unregister_table(&packet_mangler); + /* Register hooks */ + mangle_ops = xt_hook_link(&packet_mangler, iptable_mangle_hook); + if (IS_ERR(mangle_ops)) { + ret = PTR_ERR(mangle_ops); + unregister_pernet_subsys(&iptable_mangle_net_ops); + } return ret; } -static void __exit fini(void) +static void __exit iptable_mangle_fini(void) { - unsigned int i; - - for (i = 0; i < sizeof(ipt_ops)/sizeof(struct nf_hook_ops); i++) - nf_unregister_hook(&ipt_ops[i]); - - ipt_unregister_table(&packet_mangler); + xt_hook_unlink(&packet_mangler, mangle_ops); + unregister_pernet_subsys(&iptable_mangle_net_ops); } -module_init(init); -module_exit(fini); +module_init(iptable_mangle_init); +module_exit(iptable_mangle_fini); diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c new file mode 100644 index 00000000000..f1787c04a4d --- /dev/null +++ b/net/ipv4/netfilter/iptable_nat.c @@ -0,0 +1,328 @@ +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> + * (C) 2011 Patrick McHardy <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/netfilter.h> +#include <linux/netfilter_ipv4.h> +#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/ip.h> +#include <net/ip.h> + +#include <net/netfilter/nf_nat.h> +#include <net/netfilter/nf_nat_core.h> +#include <net/netfilter/nf_nat_l3proto.h> + +static const struct xt_table nf_nat_ipv4_table = { + .name = "nat", + .valid_hooks = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_POST_ROUTING) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_LOCAL_IN), + .me = THIS_MODULE, + .af = NFPROTO_IPV4, +}; + +static unsigned int alloc_null_binding(struct nf_conn *ct, unsigned int hooknum) +{ + /* Force range to this IP; let proto decide mapping for + * per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED). + */ + struct nf_nat_range range; + + range.flags = 0; + pr_debug("Allocating NULL binding for %p (%pI4)\n", ct, + HOOK2MANIP(hooknum) == NF_NAT_MANIP_SRC ? + &ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip : + &ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip); + + return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum)); +} + +static unsigned int nf_nat_rule_find(struct sk_buff *skb, unsigned int hooknum, + const struct net_device *in, + const struct net_device *out, + struct nf_conn *ct) +{ + struct net *net = nf_ct_net(ct); + unsigned int ret; + + ret = ipt_do_table(skb, hooknum, in, out, net->ipv4.nat_table); + if (ret == NF_ACCEPT) { + if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum))) + ret = alloc_null_binding(ct, hooknum); + } + return ret; +} + +static unsigned int +nf_nat_ipv4_fn(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + struct nf_conn_nat *nat; + /* maniptype == SRC for postrouting. */ + enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum); + + /* We never see fragments: conntrack defrags on pre-routing + * and local-out, and nf_nat_out protects post-routing. + */ + NF_CT_ASSERT(!ip_is_fragment(ip_hdr(skb))); + + ct = nf_ct_get(skb, &ctinfo); + /* Can't track? It's not due to stress, or conntrack would + * have dropped it. Hence it's the user's responsibilty to + * packet filter it out, or implement conntrack/NAT for that + * protocol. 8) --RR + */ + if (!ct) + return NF_ACCEPT; + + /* Don't try to NAT if this packet is not conntracked */ + if (nf_ct_is_untracked(ct)) + return NF_ACCEPT; + + nat = nf_ct_nat_ext_add(ct); + if (nat == NULL) + return NF_ACCEPT; + + switch (ctinfo) { + case IP_CT_RELATED: + case IP_CT_RELATED_REPLY: + if (ip_hdr(skb)->protocol == IPPROTO_ICMP) { + if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo, + ops->hooknum)) + return NF_DROP; + else + return NF_ACCEPT; + } + /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */ + case IP_CT_NEW: + /* Seen it before? This can happen for loopback, retrans, + * or local packets. + */ + if (!nf_nat_initialized(ct, maniptype)) { + unsigned int ret; + + ret = nf_nat_rule_find(skb, ops->hooknum, in, out, ct); + if (ret != NF_ACCEPT) + return ret; + } else { + pr_debug("Already setup manip %s for ct %p\n", + maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST", + ct); + if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out)) + goto oif_changed; + } + break; + + default: + /* ESTABLISHED */ + NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || + ctinfo == IP_CT_ESTABLISHED_REPLY); + if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out)) + goto oif_changed; + } + + return nf_nat_packet(ct, ctinfo, ops->hooknum, skb); + +oif_changed: + nf_ct_kill_acct(ct, ctinfo, skb); + return NF_DROP; +} + +static unsigned int +nf_nat_ipv4_in(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + unsigned int ret; + __be32 daddr = ip_hdr(skb)->daddr; + + ret = nf_nat_ipv4_fn(ops, skb, in, out, okfn); + if (ret != NF_DROP && ret != NF_STOLEN && + daddr != ip_hdr(skb)->daddr) + skb_dst_drop(skb); + + return ret; +} + +static unsigned int +nf_nat_ipv4_out(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ +#ifdef CONFIG_XFRM + const struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + int err; +#endif + unsigned int ret; + + /* root is playing with raw sockets. */ + if (skb->len < sizeof(struct iphdr) || + ip_hdrlen(skb) < sizeof(struct iphdr)) + return NF_ACCEPT; + + ret = nf_nat_ipv4_fn(ops, skb, in, out, okfn); +#ifdef CONFIG_XFRM + if (ret != NF_DROP && ret != NF_STOLEN && + !(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && + (ct = nf_ct_get(skb, &ctinfo)) != NULL) { + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + + if ((ct->tuplehash[dir].tuple.src.u3.ip != + ct->tuplehash[!dir].tuple.dst.u3.ip) || + (ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP && + ct->tuplehash[dir].tuple.src.u.all != + ct->tuplehash[!dir].tuple.dst.u.all)) { + err = nf_xfrm_me_harder(skb, AF_INET); + if (err < 0) + ret = NF_DROP_ERR(err); + } + } +#endif + return ret; +} + +static unsigned int +nf_nat_ipv4_local_fn(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + const struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + unsigned int ret; + int err; + + /* root is playing with raw sockets. */ + if (skb->len < sizeof(struct iphdr) || + ip_hdrlen(skb) < sizeof(struct iphdr)) + return NF_ACCEPT; + + ret = nf_nat_ipv4_fn(ops, skb, in, out, okfn); + if (ret != NF_DROP && ret != NF_STOLEN && + (ct = nf_ct_get(skb, &ctinfo)) != NULL) { + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + + if (ct->tuplehash[dir].tuple.dst.u3.ip != + ct->tuplehash[!dir].tuple.src.u3.ip) { + err = ip_route_me_harder(skb, RTN_UNSPEC); + if (err < 0) + ret = NF_DROP_ERR(err); + } +#ifdef CONFIG_XFRM + else if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) && + ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP && + ct->tuplehash[dir].tuple.dst.u.all != + ct->tuplehash[!dir].tuple.src.u.all) { + err = nf_xfrm_me_harder(skb, AF_INET); + if (err < 0) + ret = NF_DROP_ERR(err); + } +#endif + } + return ret; +} + +static struct nf_hook_ops nf_nat_ipv4_ops[] __read_mostly = { + /* Before packet filtering, change destination */ + { + .hook = nf_nat_ipv4_in, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_PRE_ROUTING, + .priority = NF_IP_PRI_NAT_DST, + }, + /* After packet filtering, change source */ + { + .hook = nf_nat_ipv4_out, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_POST_ROUTING, + .priority = NF_IP_PRI_NAT_SRC, + }, + /* Before packet filtering, change destination */ + { + .hook = nf_nat_ipv4_local_fn, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_LOCAL_OUT, + .priority = NF_IP_PRI_NAT_DST, + }, + /* After packet filtering, change source */ + { + .hook = nf_nat_ipv4_fn, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_LOCAL_IN, + .priority = NF_IP_PRI_NAT_SRC, + }, +}; + +static int __net_init iptable_nat_net_init(struct net *net) +{ + struct ipt_replace *repl; + + repl = ipt_alloc_initial_table(&nf_nat_ipv4_table); + if (repl == NULL) + return -ENOMEM; + net->ipv4.nat_table = ipt_register_table(net, &nf_nat_ipv4_table, repl); + kfree(repl); + return PTR_ERR_OR_ZERO(net->ipv4.nat_table); +} + +static void __net_exit iptable_nat_net_exit(struct net *net) +{ + ipt_unregister_table(net, net->ipv4.nat_table); +} + +static struct pernet_operations iptable_nat_net_ops = { + .init = iptable_nat_net_init, + .exit = iptable_nat_net_exit, +}; + +static int __init iptable_nat_init(void) +{ + int err; + + err = register_pernet_subsys(&iptable_nat_net_ops); + if (err < 0) + goto err1; + + err = nf_register_hooks(nf_nat_ipv4_ops, ARRAY_SIZE(nf_nat_ipv4_ops)); + if (err < 0) + goto err2; + return 0; + +err2: + unregister_pernet_subsys(&iptable_nat_net_ops); +err1: + return err; +} + +static void __exit iptable_nat_exit(void) +{ + nf_unregister_hooks(nf_nat_ipv4_ops, ARRAY_SIZE(nf_nat_ipv4_ops)); + unregister_pernet_subsys(&iptable_nat_net_ops); +} + +module_init(iptable_nat_init); +module_exit(iptable_nat_exit); + +MODULE_LICENSE("GPL"); diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index fdb9e9c81e8..b2f7e8f9831 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -1,159 +1,90 @@ -/* +/* * 'raw' table, which is the very first hooked in at PRE_ROUTING and LOCAL_OUT . * * Copyright (C) 2003 Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> */ #include <linux/module.h> #include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/slab.h> +#include <net/ip.h> -#define RAW_VALID_HOOKS ((1 << NF_IP_PRE_ROUTING) | (1 << NF_IP_LOCAL_OUT)) +#define RAW_VALID_HOOKS ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT)) -static struct -{ - struct ipt_replace repl; - struct ipt_standard entries[2]; - struct ipt_error term; -} initial_table __initdata = { - .repl = { - .name = "raw", - .valid_hooks = RAW_VALID_HOOKS, - .num_entries = 3, - .size = sizeof(struct ipt_standard) * 2 + sizeof(struct ipt_error), - .hook_entry = { - [NF_IP_PRE_ROUTING] = 0, - [NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) }, - .underflow = { - [NF_IP_PRE_ROUTING] = 0, - [NF_IP_LOCAL_OUT] = sizeof(struct ipt_standard) }, - }, - .entries = { - /* PRE_ROUTING */ - { - .entry = { - .target_offset = sizeof(struct ipt_entry), - .next_offset = sizeof(struct ipt_standard), - }, - .target = { - .target = { - .u = { - .target_size = IPT_ALIGN(sizeof(struct ipt_standard_target)), - }, - }, - .verdict = -NF_ACCEPT - 1, - }, - }, - - /* LOCAL_OUT */ - { - .entry = { - .target_offset = sizeof(struct ipt_entry), - .next_offset = sizeof(struct ipt_standard), - }, - .target = { - .target = { - .u = { - .target_size = IPT_ALIGN(sizeof(struct ipt_standard_target)), - }, - }, - .verdict = -NF_ACCEPT - 1, - }, - }, - }, - /* ERROR */ - .term = { - .entry = { - .target_offset = sizeof(struct ipt_entry), - .next_offset = sizeof(struct ipt_error), - }, - .target = { - .target = { - .u = { - .user = { - .target_size = IPT_ALIGN(sizeof(struct ipt_error_target)), - .name = IPT_ERROR_TARGET, - }, - }, - }, - .errorname = "ERROR", - }, - } -}; - -static struct ipt_table packet_raw = { - .name = "raw", - .valid_hooks = RAW_VALID_HOOKS, - .lock = RW_LOCK_UNLOCKED, +static const struct xt_table packet_raw = { + .name = "raw", + .valid_hooks = RAW_VALID_HOOKS, .me = THIS_MODULE, - .af = AF_INET, + .af = NFPROTO_IPV4, + .priority = NF_IP_PRI_RAW, }; /* The work comes in here from netfilter.c. */ static unsigned int -ipt_hook(unsigned int hook, - struct sk_buff **pskb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) +iptable_raw_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, + const struct net_device *in, const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + const struct net *net; + + if (ops->hooknum == NF_INET_LOCAL_OUT && + (skb->len < sizeof(struct iphdr) || + ip_hdrlen(skb) < sizeof(struct iphdr))) + /* root is playing with raw sockets. */ + return NF_ACCEPT; + + net = dev_net((in != NULL) ? in : out); + return ipt_do_table(skb, ops->hooknum, in, out, net->ipv4.iptable_raw); +} + +static struct nf_hook_ops *rawtable_ops __read_mostly; + +static int __net_init iptable_raw_net_init(struct net *net) +{ + struct ipt_replace *repl; + + repl = ipt_alloc_initial_table(&packet_raw); + if (repl == NULL) + return -ENOMEM; + net->ipv4.iptable_raw = + ipt_register_table(net, &packet_raw, repl); + kfree(repl); + return PTR_ERR_OR_ZERO(net->ipv4.iptable_raw); +} + +static void __net_exit iptable_raw_net_exit(struct net *net) { - return ipt_do_table(pskb, hook, in, out, &packet_raw, NULL); + ipt_unregister_table(net, net->ipv4.iptable_raw); } -/* 'raw' is the very first table. */ -static struct nf_hook_ops ipt_ops[] = { - { - .hook = ipt_hook, - .pf = PF_INET, - .hooknum = NF_IP_PRE_ROUTING, - .priority = NF_IP_PRI_RAW, - .owner = THIS_MODULE, - }, - { - .hook = ipt_hook, - .pf = PF_INET, - .hooknum = NF_IP_LOCAL_OUT, - .priority = NF_IP_PRI_RAW, - .owner = THIS_MODULE, - }, +static struct pernet_operations iptable_raw_net_ops = { + .init = iptable_raw_net_init, + .exit = iptable_raw_net_exit, }; -static int __init init(void) +static int __init iptable_raw_init(void) { int ret; - /* Register table */ - ret = ipt_register_table(&packet_raw, &initial_table.repl); + ret = register_pernet_subsys(&iptable_raw_net_ops); if (ret < 0) return ret; /* Register hooks */ - ret = nf_register_hook(&ipt_ops[0]); - if (ret < 0) - goto cleanup_table; - - ret = nf_register_hook(&ipt_ops[1]); - if (ret < 0) - goto cleanup_hook0; - - return ret; - - cleanup_hook0: - nf_unregister_hook(&ipt_ops[0]); - cleanup_table: - ipt_unregister_table(&packet_raw); + rawtable_ops = xt_hook_link(&packet_raw, iptable_raw_hook); + if (IS_ERR(rawtable_ops)) { + ret = PTR_ERR(rawtable_ops); + unregister_pernet_subsys(&iptable_raw_net_ops); + } return ret; } -static void __exit fini(void) +static void __exit iptable_raw_fini(void) { - unsigned int i; - - for (i = 0; i < sizeof(ipt_ops)/sizeof(struct nf_hook_ops); i++) - nf_unregister_hook(&ipt_ops[i]); - - ipt_unregister_table(&packet_raw); + xt_hook_unlink(&packet_raw, rawtable_ops); + unregister_pernet_subsys(&iptable_raw_net_ops); } -module_init(init); -module_exit(fini); +module_init(iptable_raw_init); +module_exit(iptable_raw_fini); MODULE_LICENSE("GPL"); diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c new file mode 100644 index 00000000000..c86647ed207 --- /dev/null +++ b/net/ipv4/netfilter/iptable_security.c @@ -0,0 +1,111 @@ +/* + * "security" table + * + * This is for use by Mandatory Access Control (MAC) security models, + * which need to be able to manage security policy in separate context + * to DAC. + * + * Based on iptable_mangle.c + * + * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling + * Copyright (C) 2000-2004 Netfilter Core Team <coreteam <at> netfilter.org> + * Copyright (C) 2008 Red Hat, Inc., James Morris <jmorris <at> redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/module.h> +#include <linux/netfilter_ipv4/ip_tables.h> +#include <linux/slab.h> +#include <net/ip.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("James Morris <jmorris <at> redhat.com>"); +MODULE_DESCRIPTION("iptables security table, for MAC rules"); + +#define SECURITY_VALID_HOOKS (1 << NF_INET_LOCAL_IN) | \ + (1 << NF_INET_FORWARD) | \ + (1 << NF_INET_LOCAL_OUT) + +static const struct xt_table security_table = { + .name = "security", + .valid_hooks = SECURITY_VALID_HOOKS, + .me = THIS_MODULE, + .af = NFPROTO_IPV4, + .priority = NF_IP_PRI_SECURITY, +}; + +static unsigned int +iptable_security_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + const struct net *net; + + if (ops->hooknum == NF_INET_LOCAL_OUT && + (skb->len < sizeof(struct iphdr) || + ip_hdrlen(skb) < sizeof(struct iphdr))) + /* Somebody is playing with raw sockets. */ + return NF_ACCEPT; + + net = dev_net((in != NULL) ? in : out); + return ipt_do_table(skb, ops->hooknum, in, out, + net->ipv4.iptable_security); +} + +static struct nf_hook_ops *sectbl_ops __read_mostly; + +static int __net_init iptable_security_net_init(struct net *net) +{ + struct ipt_replace *repl; + + repl = ipt_alloc_initial_table(&security_table); + if (repl == NULL) + return -ENOMEM; + net->ipv4.iptable_security = + ipt_register_table(net, &security_table, repl); + kfree(repl); + return PTR_ERR_OR_ZERO(net->ipv4.iptable_security); +} + +static void __net_exit iptable_security_net_exit(struct net *net) +{ + ipt_unregister_table(net, net->ipv4.iptable_security); +} + +static struct pernet_operations iptable_security_net_ops = { + .init = iptable_security_net_init, + .exit = iptable_security_net_exit, +}; + +static int __init iptable_security_init(void) +{ + int ret; + + ret = register_pernet_subsys(&iptable_security_net_ops); + if (ret < 0) + return ret; + + sectbl_ops = xt_hook_link(&security_table, iptable_security_hook); + if (IS_ERR(sectbl_ops)) { + ret = PTR_ERR(sectbl_ops); + goto cleanup_table; + } + + return ret; + +cleanup_table: + unregister_pernet_subsys(&iptable_security_net_ops); + return ret; +} + +static void __exit iptable_security_fini(void) +{ + xt_hook_unlink(&security_table, sectbl_ops); + unregister_pernet_subsys(&iptable_security_net_ops); +} + +module_init(iptable_security_init); +module_exit(iptable_security_fini); diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 167619f638c..8127dc80286 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -1,20 +1,13 @@ + /* (C) 1999-2001 Paul `Rusty' Russell * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> + * (C) 2006-2012 Patrick McHardy <kaber@trash.net> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. - * - * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> - * - move L3 protocol dependent part to this file. - * 23 Mar 2004: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> - * - add get_features() to support various size of conntrack - * structures. - * - * Derived from net/ipv4/netfilter/ip_conntrack_standalone.c */ -#include <linux/config.h> #include <linux/types.h> #include <linux/ip.h> #include <linux/netfilter.h> @@ -28,290 +21,241 @@ #include <linux/netfilter_ipv4.h> #include <net/netfilter/nf_conntrack.h> #include <net/netfilter/nf_conntrack_helper.h> -#include <net/netfilter/nf_conntrack_protocol.h> +#include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_l3proto.h> +#include <net/netfilter/nf_conntrack_zones.h> #include <net/netfilter/nf_conntrack_core.h> +#include <net/netfilter/nf_conntrack_seqadj.h> #include <net/netfilter/ipv4/nf_conntrack_ipv4.h> +#include <net/netfilter/nf_nat_helper.h> +#include <net/netfilter/ipv4/nf_defrag_ipv4.h> +#include <net/netfilter/nf_log.h> -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif - -DECLARE_PER_CPU(struct nf_conntrack_stat, nf_conntrack_stat); - -static int ipv4_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff, - struct nf_conntrack_tuple *tuple) +static bool ipv4_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff, + struct nf_conntrack_tuple *tuple) { - u_int32_t _addrs[2], *ap; + const __be32 *ap; + __be32 _addrs[2]; ap = skb_header_pointer(skb, nhoff + offsetof(struct iphdr, saddr), sizeof(u_int32_t) * 2, _addrs); if (ap == NULL) - return 0; + return false; tuple->src.u3.ip = ap[0]; tuple->dst.u3.ip = ap[1]; - return 1; + return true; } -static int ipv4_invert_tuple(struct nf_conntrack_tuple *tuple, - const struct nf_conntrack_tuple *orig) +static bool ipv4_invert_tuple(struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_tuple *orig) { tuple->src.u3.ip = orig->dst.u3.ip; tuple->dst.u3.ip = orig->src.u3.ip; - return 1; + return true; } static int ipv4_print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple) { - return seq_printf(s, "src=%u.%u.%u.%u dst=%u.%u.%u.%u ", - NIPQUAD(tuple->src.u3.ip), - NIPQUAD(tuple->dst.u3.ip)); -} - -static int ipv4_print_conntrack(struct seq_file *s, - const struct nf_conn *conntrack) -{ - return 0; + return seq_printf(s, "src=%pI4 dst=%pI4 ", + &tuple->src.u3.ip, &tuple->dst.u3.ip); } -/* Returns new sk_buff, or NULL */ -static struct sk_buff * -nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) +static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, + unsigned int *dataoff, u_int8_t *protonum) { - skb_orphan(skb); - - local_bh_disable(); - skb = ip_defrag(skb, user); - local_bh_enable(); - - if (skb) - ip_send_check(skb->nh.iph); + const struct iphdr *iph; + struct iphdr _iph; + + iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph); + if (iph == NULL) + return -NF_ACCEPT; + + /* Conntrack defragments packets, we might still see fragments + * inside ICMP packets though. */ + if (iph->frag_off & htons(IP_OFFSET)) + return -NF_ACCEPT; + + *dataoff = nhoff + (iph->ihl << 2); + *protonum = iph->protocol; + + /* Check bogus IP headers */ + if (*dataoff > skb->len) { + pr_debug("nf_conntrack_ipv4: bogus IPv4 packet: " + "nhoff %u, ihl %u, skblen %u\n", + nhoff, iph->ihl << 2, skb->len); + return -NF_ACCEPT; + } - return skb; + return NF_ACCEPT; } -static int -ipv4_prepare(struct sk_buff **pskb, unsigned int hooknum, unsigned int *dataoff, - u_int8_t *protonum) +static unsigned int ipv4_helper(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) { - /* Never happen */ - if ((*pskb)->nh.iph->frag_off & htons(IP_OFFSET)) { - if (net_ratelimit()) { - printk(KERN_ERR "ipv4_prepare: Frag of proto %u (hook=%u)\n", - (*pskb)->nh.iph->protocol, hooknum); - } - return -NF_DROP; - } + struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + const struct nf_conn_help *help; + const struct nf_conntrack_helper *helper; - *dataoff = (*pskb)->nh.raw - (*pskb)->data + (*pskb)->nh.iph->ihl*4; - *protonum = (*pskb)->nh.iph->protocol; + /* This is where we call the helper: as the packet goes out. */ + ct = nf_ct_get(skb, &ctinfo); + if (!ct || ctinfo == IP_CT_RELATED_REPLY) + return NF_ACCEPT; - return NF_ACCEPT; -} + help = nfct_help(ct); + if (!help) + return NF_ACCEPT; -int nat_module_is_loaded = 0; -static u_int32_t ipv4_get_features(const struct nf_conntrack_tuple *tuple) -{ - if (nat_module_is_loaded) - return NF_CT_F_NAT; + /* rcu_read_lock()ed by nf_hook_slow */ + helper = rcu_dereference(help->helper); + if (!helper) + return NF_ACCEPT; - return NF_CT_F_BASIC; + return helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb), + ct, ctinfo); } -static unsigned int ipv4_confirm(unsigned int hooknum, - struct sk_buff **pskb, +static unsigned int ipv4_confirm(const struct nf_hook_ops *ops, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - /* We've seen it coming out the other side: confirm it */ - return nf_conntrack_confirm(pskb); -} - -static unsigned int ipv4_conntrack_help(unsigned int hooknum, - struct sk_buff **pskb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ struct nf_conn *ct; enum ip_conntrack_info ctinfo; - /* This is where we call the helper: as the packet goes out. */ - ct = nf_ct_get(*pskb, &ctinfo); - if (ct && ct->helper) { - unsigned int ret; - ret = ct->helper->help(pskb, - (*pskb)->nh.raw - (*pskb)->data - + (*pskb)->nh.iph->ihl*4, - ct, ctinfo); - if (ret != NF_ACCEPT) - return ret; - } - return NF_ACCEPT; -} + ct = nf_ct_get(skb, &ctinfo); + if (!ct || ctinfo == IP_CT_RELATED_REPLY) + goto out; -static unsigned int ipv4_conntrack_defrag(unsigned int hooknum, - struct sk_buff **pskb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ -#if !defined(CONFIG_IP_NF_NAT) && !defined(CONFIG_IP_NF_NAT_MODULE) - /* Previously seen (loopback)? Ignore. Do this before - fragment check. */ - if ((*pskb)->nfct) - return NF_ACCEPT; -#endif - - /* Gather fragments. */ - if ((*pskb)->nh.iph->frag_off & htons(IP_MF|IP_OFFSET)) { - *pskb = nf_ct_ipv4_gather_frags(*pskb, - hooknum == NF_IP_PRE_ROUTING ? - IP_DEFRAG_CONNTRACK_IN : - IP_DEFRAG_CONNTRACK_OUT); - if (!*pskb) - return NF_STOLEN; + /* adjust seqs for loopback traffic only in outgoing direction */ + if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) && + !nf_is_loopback_packet(skb)) { + if (!nf_ct_seq_adjust(skb, ct, ctinfo, ip_hdrlen(skb))) { + NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop); + return NF_DROP; + } } - return NF_ACCEPT; +out: + /* We've seen it coming out the other side: confirm it */ + return nf_conntrack_confirm(skb); } -static unsigned int ipv4_conntrack_in(unsigned int hooknum, - struct sk_buff **pskb, +static unsigned int ipv4_conntrack_in(const struct nf_hook_ops *ops, + struct sk_buff *skb, const struct net_device *in, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return nf_conntrack_in(PF_INET, hooknum, pskb); + return nf_conntrack_in(dev_net(in), PF_INET, ops->hooknum, skb); } -static unsigned int ipv4_conntrack_local(unsigned int hooknum, - struct sk_buff **pskb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) +static unsigned int ipv4_conntrack_local(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) { /* root is playing with raw sockets. */ - if ((*pskb)->len < sizeof(struct iphdr) - || (*pskb)->nh.iph->ihl * 4 < sizeof(struct iphdr)) { - if (net_ratelimit()) - printk("ipt_hook: happy cracking.\n"); + if (skb->len < sizeof(struct iphdr) || + ip_hdrlen(skb) < sizeof(struct iphdr)) return NF_ACCEPT; - } - return nf_conntrack_in(PF_INET, hooknum, pskb); + return nf_conntrack_in(dev_net(out), PF_INET, ops->hooknum, skb); } /* Connection tracking may drop packets, but never alters them, so make it the first hook. */ -static struct nf_hook_ops ipv4_conntrack_defrag_ops = { - .hook = ipv4_conntrack_defrag, - .owner = THIS_MODULE, - .pf = PF_INET, - .hooknum = NF_IP_PRE_ROUTING, - .priority = NF_IP_PRI_CONNTRACK_DEFRAG, -}; - -static struct nf_hook_ops ipv4_conntrack_in_ops = { - .hook = ipv4_conntrack_in, - .owner = THIS_MODULE, - .pf = PF_INET, - .hooknum = NF_IP_PRE_ROUTING, - .priority = NF_IP_PRI_CONNTRACK, -}; - -static struct nf_hook_ops ipv4_conntrack_defrag_local_out_ops = { - .hook = ipv4_conntrack_defrag, - .owner = THIS_MODULE, - .pf = PF_INET, - .hooknum = NF_IP_LOCAL_OUT, - .priority = NF_IP_PRI_CONNTRACK_DEFRAG, -}; - -static struct nf_hook_ops ipv4_conntrack_local_out_ops = { - .hook = ipv4_conntrack_local, - .owner = THIS_MODULE, - .pf = PF_INET, - .hooknum = NF_IP_LOCAL_OUT, - .priority = NF_IP_PRI_CONNTRACK, -}; - -/* helpers */ -static struct nf_hook_ops ipv4_conntrack_helper_out_ops = { - .hook = ipv4_conntrack_help, - .owner = THIS_MODULE, - .pf = PF_INET, - .hooknum = NF_IP_POST_ROUTING, - .priority = NF_IP_PRI_CONNTRACK_HELPER, -}; - -static struct nf_hook_ops ipv4_conntrack_helper_in_ops = { - .hook = ipv4_conntrack_help, - .owner = THIS_MODULE, - .pf = PF_INET, - .hooknum = NF_IP_LOCAL_IN, - .priority = NF_IP_PRI_CONNTRACK_HELPER, -}; - - -/* Refragmenter; last chance. */ -static struct nf_hook_ops ipv4_conntrack_out_ops = { - .hook = ipv4_confirm, - .owner = THIS_MODULE, - .pf = PF_INET, - .hooknum = NF_IP_POST_ROUTING, - .priority = NF_IP_PRI_CONNTRACK_CONFIRM, -}; - -static struct nf_hook_ops ipv4_conntrack_local_in_ops = { - .hook = ipv4_confirm, - .owner = THIS_MODULE, - .pf = PF_INET, - .hooknum = NF_IP_LOCAL_IN, - .priority = NF_IP_PRI_CONNTRACK_CONFIRM, +static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = { + { + .hook = ipv4_conntrack_in, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_PRE_ROUTING, + .priority = NF_IP_PRI_CONNTRACK, + }, + { + .hook = ipv4_conntrack_local, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_LOCAL_OUT, + .priority = NF_IP_PRI_CONNTRACK, + }, + { + .hook = ipv4_helper, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_POST_ROUTING, + .priority = NF_IP_PRI_CONNTRACK_HELPER, + }, + { + .hook = ipv4_confirm, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_POST_ROUTING, + .priority = NF_IP_PRI_CONNTRACK_CONFIRM, + }, + { + .hook = ipv4_helper, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_LOCAL_IN, + .priority = NF_IP_PRI_CONNTRACK_HELPER, + }, + { + .hook = ipv4_confirm, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_LOCAL_IN, + .priority = NF_IP_PRI_CONNTRACK_CONFIRM, + }, }; -#ifdef CONFIG_SYSCTL -/* From nf_conntrack_proto_icmp.c */ -extern unsigned int nf_ct_icmp_timeout; -static struct ctl_table_header *nf_ct_ipv4_sysctl_header; +#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) +static int log_invalid_proto_min = 0; +static int log_invalid_proto_max = 255; -static ctl_table nf_ct_sysctl_table[] = { +static struct ctl_table ip_ct_sysctl_table[] = { { - .ctl_name = NET_NF_CONNTRACK_ICMP_TIMEOUT, - .procname = "nf_conntrack_icmp_timeout", - .data = &nf_ct_icmp_timeout, - .maxlen = sizeof(unsigned int), + .procname = "ip_conntrack_max", + .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec_jiffies, + .proc_handler = proc_dointvec, }, - { .ctl_name = 0 } -}; - -static ctl_table nf_ct_netfilter_table[] = { { - .ctl_name = NET_NETFILTER, - .procname = "netfilter", - .mode = 0555, - .child = nf_ct_sysctl_table, + .procname = "ip_conntrack_count", + .maxlen = sizeof(int), + .mode = 0444, + .proc_handler = proc_dointvec, + }, + { + .procname = "ip_conntrack_buckets", + .maxlen = sizeof(unsigned int), + .mode = 0444, + .proc_handler = proc_dointvec, }, - { .ctl_name = 0 } -}; - -static ctl_table nf_ct_net_table[] = { { - .ctl_name = CTL_NET, - .procname = "net", - .mode = 0555, - .child = nf_ct_netfilter_table, + .procname = "ip_conntrack_checksum", + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "ip_conntrack_log_invalid", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &log_invalid_proto_min, + .extra2 = &log_invalid_proto_max, }, - { .ctl_name = 0 } + { } }; -#endif +#endif /* CONFIG_SYSCTL && CONFIG_NF_CONNTRACK_PROC_COMPAT */ /* Fast function for those who don't want to parse /proc (and I don't blame them). */ @@ -320,31 +264,31 @@ static ctl_table nf_ct_net_table[] = { static int getorigdst(struct sock *sk, int optval, void __user *user, int *len) { - struct inet_sock *inet = inet_sk(sk); - struct nf_conntrack_tuple_hash *h; + const struct inet_sock *inet = inet_sk(sk); + const struct nf_conntrack_tuple_hash *h; struct nf_conntrack_tuple tuple; - - NF_CT_TUPLE_U_BLANK(&tuple); - tuple.src.u3.ip = inet->rcv_saddr; - tuple.src.u.tcp.port = inet->sport; - tuple.dst.u3.ip = inet->daddr; - tuple.dst.u.tcp.port = inet->dport; + + memset(&tuple, 0, sizeof(tuple)); + tuple.src.u3.ip = inet->inet_rcv_saddr; + tuple.src.u.tcp.port = inet->inet_sport; + tuple.dst.u3.ip = inet->inet_daddr; + tuple.dst.u.tcp.port = inet->inet_dport; tuple.src.l3num = PF_INET; - tuple.dst.protonum = IPPROTO_TCP; + tuple.dst.protonum = sk->sk_protocol; - /* We only do TCP at the moment: is there a better way? */ - if (strcmp(sk->sk_prot->name, "TCP")) { - DEBUGP("SO_ORIGINAL_DST: Not a TCP socket\n"); + /* We only do TCP and SCTP at the moment: is there a better way? */ + if (sk->sk_protocol != IPPROTO_TCP && sk->sk_protocol != IPPROTO_SCTP) { + pr_debug("SO_ORIGINAL_DST: Not a TCP/SCTP socket\n"); return -ENOPROTOOPT; } if ((unsigned int) *len < sizeof(struct sockaddr_in)) { - DEBUGP("SO_ORIGINAL_DST: len %u not %u\n", - *len, sizeof(struct sockaddr_in)); + pr_debug("SO_ORIGINAL_DST: len %d not %Zu\n", + *len, sizeof(struct sockaddr_in)); return -EINVAL; } - h = nf_conntrack_find_get(&tuple, NULL); + h = nf_conntrack_find_get(sock_net(sk), NF_CT_DEFAULT_ZONE, &tuple); if (h) { struct sockaddr_in sin; struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); @@ -354,61 +298,60 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len) .tuple.dst.u.tcp.port; sin.sin_addr.s_addr = ct->tuplehash[IP_CT_DIR_ORIGINAL] .tuple.dst.u3.ip; + memset(sin.sin_zero, 0, sizeof(sin.sin_zero)); - DEBUGP("SO_ORIGINAL_DST: %u.%u.%u.%u %u\n", - NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port)); + pr_debug("SO_ORIGINAL_DST: %pI4 %u\n", + &sin.sin_addr.s_addr, ntohs(sin.sin_port)); nf_ct_put(ct); if (copy_to_user(user, &sin, sizeof(sin)) != 0) return -EFAULT; else return 0; } - DEBUGP("SO_ORIGINAL_DST: Can't find %u.%u.%u.%u/%u-%u.%u.%u.%u/%u.\n", - NIPQUAD(tuple.src.u3.ip), ntohs(tuple.src.u.tcp.port), - NIPQUAD(tuple.dst.u3.ip), ntohs(tuple.dst.u.tcp.port)); + pr_debug("SO_ORIGINAL_DST: Can't find %pI4/%u-%pI4/%u.\n", + &tuple.src.u3.ip, ntohs(tuple.src.u.tcp.port), + &tuple.dst.u3.ip, ntohs(tuple.dst.u.tcp.port)); return -ENOENT; } -#if defined(CONFIG_NF_CT_NETLINK) || \ - defined(CONFIG_NF_CT_NETLINK_MODULE) +#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nfnetlink_conntrack.h> -static int ipv4_tuple_to_nfattr(struct sk_buff *skb, +static int ipv4_tuple_to_nlattr(struct sk_buff *skb, const struct nf_conntrack_tuple *tuple) { - NFA_PUT(skb, CTA_IP_V4_SRC, sizeof(u_int32_t), - &tuple->src.u3.ip); - NFA_PUT(skb, CTA_IP_V4_DST, sizeof(u_int32_t), - &tuple->dst.u3.ip); + if (nla_put_be32(skb, CTA_IP_V4_SRC, tuple->src.u3.ip) || + nla_put_be32(skb, CTA_IP_V4_DST, tuple->dst.u3.ip)) + goto nla_put_failure; return 0; -nfattr_failure: +nla_put_failure: return -1; } -static const size_t cta_min_ip[CTA_IP_MAX] = { - [CTA_IP_V4_SRC-1] = sizeof(u_int32_t), - [CTA_IP_V4_DST-1] = sizeof(u_int32_t), +static const struct nla_policy ipv4_nla_policy[CTA_IP_MAX+1] = { + [CTA_IP_V4_SRC] = { .type = NLA_U32 }, + [CTA_IP_V4_DST] = { .type = NLA_U32 }, }; -static int ipv4_nfattr_to_tuple(struct nfattr *tb[], +static int ipv4_nlattr_to_tuple(struct nlattr *tb[], struct nf_conntrack_tuple *t) { - if (!tb[CTA_IP_V4_SRC-1] || !tb[CTA_IP_V4_DST-1]) + if (!tb[CTA_IP_V4_SRC] || !tb[CTA_IP_V4_DST]) return -EINVAL; - if (nfattr_bad_size(tb, CTA_IP_MAX, cta_min_ip)) - return -EINVAL; - - t->src.u3.ip = - *(u_int32_t *)NFA_DATA(tb[CTA_IP_V4_SRC-1]); - t->dst.u3.ip = - *(u_int32_t *)NFA_DATA(tb[CTA_IP_V4_DST-1]); + t->src.u3.ip = nla_get_be32(tb[CTA_IP_V4_SRC]); + t->dst.u3.ip = nla_get_be32(tb[CTA_IP_V4_DST]); return 0; } + +static int ipv4_nlattr_tuple_size(void) +{ + return nla_policy_len(ipv4_nla_policy, CTA_IP_MAX + 1); +} #endif static struct nf_sockopt_ops so_getorigdst = { @@ -416,175 +359,192 @@ static struct nf_sockopt_ops so_getorigdst = { .get_optmin = SO_ORIGINAL_DST, .get_optmax = SO_ORIGINAL_DST+1, .get = &getorigdst, + .owner = THIS_MODULE, }; -struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 = { +static int ipv4_init_net(struct net *net) +{ +#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) + struct nf_ip_net *in = &net->ct.nf_ct_proto; + in->ctl_table = kmemdup(ip_ct_sysctl_table, + sizeof(ip_ct_sysctl_table), + GFP_KERNEL); + if (!in->ctl_table) + return -ENOMEM; + + in->ctl_table[0].data = &nf_conntrack_max; + in->ctl_table[1].data = &net->ct.count; + in->ctl_table[2].data = &net->ct.htable_size; + in->ctl_table[3].data = &net->ct.sysctl_checksum; + in->ctl_table[4].data = &net->ct.sysctl_log_invalid; +#endif + return 0; +} + +struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = { .l3proto = PF_INET, .name = "ipv4", .pkt_to_tuple = ipv4_pkt_to_tuple, .invert_tuple = ipv4_invert_tuple, .print_tuple = ipv4_print_tuple, - .print_conntrack = ipv4_print_conntrack, - .prepare = ipv4_prepare, - .get_features = ipv4_get_features, -#if defined(CONFIG_NF_CT_NETLINK) || \ - defined(CONFIG_NF_CT_NETLINK_MODULE) - .tuple_to_nfattr = ipv4_tuple_to_nfattr, - .nfattr_to_tuple = ipv4_nfattr_to_tuple, + .get_l4proto = ipv4_get_l4proto, +#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) + .tuple_to_nlattr = ipv4_tuple_to_nlattr, + .nlattr_tuple_size = ipv4_nlattr_tuple_size, + .nlattr_to_tuple = ipv4_nlattr_to_tuple, + .nla_policy = ipv4_nla_policy, #endif +#if defined(CONFIG_SYSCTL) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) + .ctl_table_path = "net/ipv4/netfilter", +#endif + .init_net = ipv4_init_net, .me = THIS_MODULE, }; -extern struct nf_conntrack_protocol nf_conntrack_protocol_tcp4; -extern struct nf_conntrack_protocol nf_conntrack_protocol_udp4; -extern struct nf_conntrack_protocol nf_conntrack_protocol_icmp; -static int init_or_cleanup(int init) +module_param_call(hashsize, nf_conntrack_set_hashsize, param_get_uint, + &nf_conntrack_htable_size, 0600); + +MODULE_ALIAS("nf_conntrack-" __stringify(AF_INET)); +MODULE_ALIAS("ip_conntrack"); +MODULE_LICENSE("GPL"); + +static int ipv4_net_init(struct net *net) { int ret = 0; - if (!init) goto cleanup; - - ret = nf_register_sockopt(&so_getorigdst); + ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_tcp4); if (ret < 0) { - printk(KERN_ERR "Unable to register netfilter socket option\n"); - goto cleanup_nothing; + pr_err("nf_conntrack_tcp4: pernet registration failed\n"); + goto out_tcp; } - - ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_tcp4); + ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_udp4); if (ret < 0) { - printk("nf_conntrack_ipv4: can't register tcp.\n"); - goto cleanup_sockopt; + pr_err("nf_conntrack_udp4: pernet registration failed\n"); + goto out_udp; } - - ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_udp4); + ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_icmp); if (ret < 0) { - printk("nf_conntrack_ipv4: can't register udp.\n"); - goto cleanup_tcp; + pr_err("nf_conntrack_icmp4: pernet registration failed\n"); + goto out_icmp; } - - ret = nf_conntrack_protocol_register(&nf_conntrack_protocol_icmp); + ret = nf_ct_l3proto_pernet_register(net, &nf_conntrack_l3proto_ipv4); if (ret < 0) { - printk("nf_conntrack_ipv4: can't register icmp.\n"); - goto cleanup_udp; + pr_err("nf_conntrack_ipv4: pernet registration failed\n"); + goto out_ipv4; } + return 0; +out_ipv4: + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_icmp); +out_icmp: + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udp4); +out_udp: + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_tcp4); +out_tcp: + return ret; +} - ret = nf_conntrack_l3proto_register(&nf_conntrack_l3proto_ipv4); - if (ret < 0) { - printk("nf_conntrack_ipv4: can't register ipv4\n"); - goto cleanup_icmp; - } +static void ipv4_net_exit(struct net *net) +{ + nf_ct_l3proto_pernet_unregister(net, &nf_conntrack_l3proto_ipv4); + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_icmp); + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udp4); + nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_tcp4); +} - ret = nf_register_hook(&ipv4_conntrack_defrag_ops); - if (ret < 0) { - printk("nf_conntrack_ipv4: can't register pre-routing defrag hook.\n"); - goto cleanup_ipv4; - } - ret = nf_register_hook(&ipv4_conntrack_defrag_local_out_ops); +static struct pernet_operations ipv4_net_ops = { + .init = ipv4_net_init, + .exit = ipv4_net_exit, +}; + +static int __init nf_conntrack_l3proto_ipv4_init(void) +{ + int ret = 0; + + need_conntrack(); + nf_defrag_ipv4_enable(); + + ret = nf_register_sockopt(&so_getorigdst); if (ret < 0) { - printk("nf_conntrack_ipv4: can't register local_out defrag hook.\n"); - goto cleanup_defragops; + printk(KERN_ERR "Unable to register netfilter socket option\n"); + return ret; } - ret = nf_register_hook(&ipv4_conntrack_in_ops); + ret = register_pernet_subsys(&ipv4_net_ops); if (ret < 0) { - printk("nf_conntrack_ipv4: can't register pre-routing hook.\n"); - goto cleanup_defraglocalops; + pr_err("nf_conntrack_ipv4: can't register pernet ops\n"); + goto cleanup_sockopt; } - ret = nf_register_hook(&ipv4_conntrack_local_out_ops); + ret = nf_register_hooks(ipv4_conntrack_ops, + ARRAY_SIZE(ipv4_conntrack_ops)); if (ret < 0) { - printk("nf_conntrack_ipv4: can't register local out hook.\n"); - goto cleanup_inops; + pr_err("nf_conntrack_ipv4: can't register hooks.\n"); + goto cleanup_pernet; } - ret = nf_register_hook(&ipv4_conntrack_helper_in_ops); + ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_tcp4); if (ret < 0) { - printk("nf_conntrack_ipv4: can't register local helper hook.\n"); - goto cleanup_inandlocalops; + pr_err("nf_conntrack_ipv4: can't register tcp4 proto.\n"); + goto cleanup_hooks; } - ret = nf_register_hook(&ipv4_conntrack_helper_out_ops); + ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_udp4); if (ret < 0) { - printk("nf_conntrack_ipv4: can't register postrouting helper hook.\n"); - goto cleanup_helperinops; + pr_err("nf_conntrack_ipv4: can't register udp4 proto.\n"); + goto cleanup_tcp4; } - ret = nf_register_hook(&ipv4_conntrack_out_ops); + ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_icmp); if (ret < 0) { - printk("nf_conntrack_ipv4: can't register post-routing hook.\n"); - goto cleanup_helperoutops; + pr_err("nf_conntrack_ipv4: can't register icmpv4 proto.\n"); + goto cleanup_udp4; } - ret = nf_register_hook(&ipv4_conntrack_local_in_ops); + ret = nf_ct_l3proto_register(&nf_conntrack_l3proto_ipv4); if (ret < 0) { - printk("nf_conntrack_ipv4: can't register local in hook.\n"); - goto cleanup_inoutandlocalops; + pr_err("nf_conntrack_ipv4: can't register ipv4 proto.\n"); + goto cleanup_icmpv4; } -#ifdef CONFIG_SYSCTL - nf_ct_ipv4_sysctl_header = register_sysctl_table(nf_ct_net_table, 0); - if (nf_ct_ipv4_sysctl_header == NULL) { - printk("nf_conntrack: can't register to sysctl.\n"); - ret = -ENOMEM; - goto cleanup_localinops; - } +#if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) + ret = nf_conntrack_ipv4_compat_init(); + if (ret < 0) + goto cleanup_proto; #endif - - /* For use by REJECT target */ - ip_ct_attach = __nf_conntrack_attach; - return ret; - - cleanup: - synchronize_net(); - ip_ct_attach = NULL; -#ifdef CONFIG_SYSCTL - unregister_sysctl_table(nf_ct_ipv4_sysctl_header); - cleanup_localinops: +#if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) + cleanup_proto: + nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv4); #endif - nf_unregister_hook(&ipv4_conntrack_local_in_ops); - cleanup_inoutandlocalops: - nf_unregister_hook(&ipv4_conntrack_out_ops); - cleanup_helperoutops: - nf_unregister_hook(&ipv4_conntrack_helper_out_ops); - cleanup_helperinops: - nf_unregister_hook(&ipv4_conntrack_helper_in_ops); - cleanup_inandlocalops: - nf_unregister_hook(&ipv4_conntrack_local_out_ops); - cleanup_inops: - nf_unregister_hook(&ipv4_conntrack_in_ops); - cleanup_defraglocalops: - nf_unregister_hook(&ipv4_conntrack_defrag_local_out_ops); - cleanup_defragops: - nf_unregister_hook(&ipv4_conntrack_defrag_ops); - cleanup_ipv4: - nf_conntrack_l3proto_unregister(&nf_conntrack_l3proto_ipv4); - cleanup_icmp: - nf_conntrack_protocol_unregister(&nf_conntrack_protocol_icmp); - cleanup_udp: - nf_conntrack_protocol_unregister(&nf_conntrack_protocol_udp4); - cleanup_tcp: - nf_conntrack_protocol_unregister(&nf_conntrack_protocol_tcp4); + cleanup_icmpv4: + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmp); + cleanup_udp4: + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp4); + cleanup_tcp4: + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_tcp4); + cleanup_hooks: + nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops)); + cleanup_pernet: + unregister_pernet_subsys(&ipv4_net_ops); cleanup_sockopt: nf_unregister_sockopt(&so_getorigdst); - cleanup_nothing: return ret; } -MODULE_LICENSE("GPL"); - -static int __init init(void) -{ - need_conntrack(); - return init_or_cleanup(1); -} - -static void __exit fini(void) +static void __exit nf_conntrack_l3proto_ipv4_fini(void) { - init_or_cleanup(0); + synchronize_net(); +#if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT) + nf_conntrack_ipv4_compat_fini(); +#endif + nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv4); + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmp); + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp4); + nf_ct_l4proto_unregister(&nf_conntrack_l4proto_tcp4); + nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops)); + unregister_pernet_subsys(&ipv4_net_ops); + nf_unregister_sockopt(&so_getorigdst); } -module_init(init); -module_exit(fini); - -EXPORT_SYMBOL(nf_ct_ipv4_gather_frags); +module_init(nf_conntrack_l3proto_ipv4_init); +module_exit(nf_conntrack_l3proto_ipv4_fini); diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c new file mode 100644 index 00000000000..4c48e434bb1 --- /dev/null +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c @@ -0,0 +1,464 @@ +/* ip_conntrack proc compat - based on ip_conntrack_standalone.c + * + * (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> + * (C) 2006-2010 Patrick McHardy <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/types.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> +#include <linux/percpu.h> +#include <linux/security.h> +#include <net/net_namespace.h> + +#include <linux/netfilter.h> +#include <net/netfilter/nf_conntrack_core.h> +#include <net/netfilter/nf_conntrack_l3proto.h> +#include <net/netfilter/nf_conntrack_l4proto.h> +#include <net/netfilter/nf_conntrack_expect.h> +#include <net/netfilter/nf_conntrack_acct.h> +#include <linux/rculist_nulls.h> +#include <linux/export.h> + +struct ct_iter_state { + struct seq_net_private p; + unsigned int bucket; +}; + +static struct hlist_nulls_node *ct_get_first(struct seq_file *seq) +{ + struct net *net = seq_file_net(seq); + struct ct_iter_state *st = seq->private; + struct hlist_nulls_node *n; + + for (st->bucket = 0; + st->bucket < net->ct.htable_size; + st->bucket++) { + n = rcu_dereference( + hlist_nulls_first_rcu(&net->ct.hash[st->bucket])); + if (!is_a_nulls(n)) + return n; + } + return NULL; +} + +static struct hlist_nulls_node *ct_get_next(struct seq_file *seq, + struct hlist_nulls_node *head) +{ + struct net *net = seq_file_net(seq); + struct ct_iter_state *st = seq->private; + + head = rcu_dereference(hlist_nulls_next_rcu(head)); + while (is_a_nulls(head)) { + if (likely(get_nulls_value(head) == st->bucket)) { + if (++st->bucket >= net->ct.htable_size) + return NULL; + } + head = rcu_dereference( + hlist_nulls_first_rcu(&net->ct.hash[st->bucket])); + } + return head; +} + +static struct hlist_nulls_node *ct_get_idx(struct seq_file *seq, loff_t pos) +{ + struct hlist_nulls_node *head = ct_get_first(seq); + + if (head) + while (pos && (head = ct_get_next(seq, head))) + pos--; + return pos ? NULL : head; +} + +static void *ct_seq_start(struct seq_file *seq, loff_t *pos) + __acquires(RCU) +{ + rcu_read_lock(); + return ct_get_idx(seq, *pos); +} + +static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos) +{ + (*pos)++; + return ct_get_next(s, v); +} + +static void ct_seq_stop(struct seq_file *s, void *v) + __releases(RCU) +{ + rcu_read_unlock(); +} + +#ifdef CONFIG_NF_CONNTRACK_SECMARK +static int ct_show_secctx(struct seq_file *s, const struct nf_conn *ct) +{ + int ret; + u32 len; + char *secctx; + + ret = security_secid_to_secctx(ct->secmark, &secctx, &len); + if (ret) + return 0; + + ret = seq_printf(s, "secctx=%s ", secctx); + + security_release_secctx(secctx, len); + return ret; +} +#else +static inline int ct_show_secctx(struct seq_file *s, const struct nf_conn *ct) +{ + return 0; +} +#endif + +static int ct_seq_show(struct seq_file *s, void *v) +{ + struct nf_conntrack_tuple_hash *hash = v; + struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(hash); + const struct nf_conntrack_l3proto *l3proto; + const struct nf_conntrack_l4proto *l4proto; + int ret = 0; + + NF_CT_ASSERT(ct); + if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use))) + return 0; + + + /* we only want to print DIR_ORIGINAL */ + if (NF_CT_DIRECTION(hash)) + goto release; + if (nf_ct_l3num(ct) != AF_INET) + goto release; + + l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct)); + NF_CT_ASSERT(l3proto); + l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); + NF_CT_ASSERT(l4proto); + + ret = -ENOSPC; + if (seq_printf(s, "%-8s %u %ld ", + l4proto->name, nf_ct_protonum(ct), + timer_pending(&ct->timeout) + ? (long)(ct->timeout.expires - jiffies)/HZ : 0) != 0) + goto release; + + if (l4proto->print_conntrack && l4proto->print_conntrack(s, ct)) + goto release; + + if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, + l3proto, l4proto)) + goto release; + + if (seq_print_acct(s, ct, IP_CT_DIR_ORIGINAL)) + goto release; + + if (!(test_bit(IPS_SEEN_REPLY_BIT, &ct->status))) + if (seq_printf(s, "[UNREPLIED] ")) + goto release; + + if (print_tuple(s, &ct->tuplehash[IP_CT_DIR_REPLY].tuple, + l3proto, l4proto)) + goto release; + + if (seq_print_acct(s, ct, IP_CT_DIR_REPLY)) + goto release; + + if (test_bit(IPS_ASSURED_BIT, &ct->status)) + if (seq_printf(s, "[ASSURED] ")) + goto release; + +#ifdef CONFIG_NF_CONNTRACK_MARK + if (seq_printf(s, "mark=%u ", ct->mark)) + goto release; +#endif + + if (ct_show_secctx(s, ct)) + goto release; + + if (seq_printf(s, "use=%u\n", atomic_read(&ct->ct_general.use))) + goto release; + ret = 0; +release: + nf_ct_put(ct); + return ret; +} + +static const struct seq_operations ct_seq_ops = { + .start = ct_seq_start, + .next = ct_seq_next, + .stop = ct_seq_stop, + .show = ct_seq_show +}; + +static int ct_open(struct inode *inode, struct file *file) +{ + return seq_open_net(inode, file, &ct_seq_ops, + sizeof(struct ct_iter_state)); +} + +static const struct file_operations ct_file_ops = { + .owner = THIS_MODULE, + .open = ct_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_net, +}; + +/* expects */ +struct ct_expect_iter_state { + struct seq_net_private p; + unsigned int bucket; +}; + +static struct hlist_node *ct_expect_get_first(struct seq_file *seq) +{ + struct net *net = seq_file_net(seq); + struct ct_expect_iter_state *st = seq->private; + struct hlist_node *n; + + for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) { + n = rcu_dereference( + hlist_first_rcu(&net->ct.expect_hash[st->bucket])); + if (n) + return n; + } + return NULL; +} + +static struct hlist_node *ct_expect_get_next(struct seq_file *seq, + struct hlist_node *head) +{ + struct net *net = seq_file_net(seq); + struct ct_expect_iter_state *st = seq->private; + + head = rcu_dereference(hlist_next_rcu(head)); + while (head == NULL) { + if (++st->bucket >= nf_ct_expect_hsize) + return NULL; + head = rcu_dereference( + hlist_first_rcu(&net->ct.expect_hash[st->bucket])); + } + return head; +} + +static struct hlist_node *ct_expect_get_idx(struct seq_file *seq, loff_t pos) +{ + struct hlist_node *head = ct_expect_get_first(seq); + + if (head) + while (pos && (head = ct_expect_get_next(seq, head))) + pos--; + return pos ? NULL : head; +} + +static void *exp_seq_start(struct seq_file *seq, loff_t *pos) + __acquires(RCU) +{ + rcu_read_lock(); + return ct_expect_get_idx(seq, *pos); +} + +static void *exp_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + (*pos)++; + return ct_expect_get_next(seq, v); +} + +static void exp_seq_stop(struct seq_file *seq, void *v) + __releases(RCU) +{ + rcu_read_unlock(); +} + +static int exp_seq_show(struct seq_file *s, void *v) +{ + struct nf_conntrack_expect *exp; + const struct hlist_node *n = v; + + exp = hlist_entry(n, struct nf_conntrack_expect, hnode); + + if (exp->tuple.src.l3num != AF_INET) + return 0; + + if (exp->timeout.function) + seq_printf(s, "%ld ", timer_pending(&exp->timeout) + ? (long)(exp->timeout.expires - jiffies)/HZ : 0); + else + seq_printf(s, "- "); + + seq_printf(s, "proto=%u ", exp->tuple.dst.protonum); + + print_tuple(s, &exp->tuple, + __nf_ct_l3proto_find(exp->tuple.src.l3num), + __nf_ct_l4proto_find(exp->tuple.src.l3num, + exp->tuple.dst.protonum)); + return seq_putc(s, '\n'); +} + +static const struct seq_operations exp_seq_ops = { + .start = exp_seq_start, + .next = exp_seq_next, + .stop = exp_seq_stop, + .show = exp_seq_show +}; + +static int exp_open(struct inode *inode, struct file *file) +{ + return seq_open_net(inode, file, &exp_seq_ops, + sizeof(struct ct_expect_iter_state)); +} + +static const struct file_operations ip_exp_file_ops = { + .owner = THIS_MODULE, + .open = exp_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_net, +}; + +static void *ct_cpu_seq_start(struct seq_file *seq, loff_t *pos) +{ + struct net *net = seq_file_net(seq); + int cpu; + + if (*pos == 0) + return SEQ_START_TOKEN; + + for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) { + if (!cpu_possible(cpu)) + continue; + *pos = cpu+1; + return per_cpu_ptr(net->ct.stat, cpu); + } + + return NULL; +} + +static void *ct_cpu_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + struct net *net = seq_file_net(seq); + int cpu; + + for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) { + if (!cpu_possible(cpu)) + continue; + *pos = cpu+1; + return per_cpu_ptr(net->ct.stat, cpu); + } + + return NULL; +} + +static void ct_cpu_seq_stop(struct seq_file *seq, void *v) +{ +} + +static int ct_cpu_seq_show(struct seq_file *seq, void *v) +{ + struct net *net = seq_file_net(seq); + unsigned int nr_conntracks = atomic_read(&net->ct.count); + const struct ip_conntrack_stat *st = v; + + if (v == SEQ_START_TOKEN) { + seq_printf(seq, "entries searched found new invalid ignore delete delete_list insert insert_failed drop early_drop icmp_error expect_new expect_create expect_delete search_restart\n"); + return 0; + } + + seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x " + "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n", + nr_conntracks, + st->searched, + st->found, + st->new, + st->invalid, + st->ignore, + st->delete, + st->delete_list, + st->insert, + st->insert_failed, + st->drop, + st->early_drop, + st->error, + + st->expect_new, + st->expect_create, + st->expect_delete, + st->search_restart + ); + return 0; +} + +static const struct seq_operations ct_cpu_seq_ops = { + .start = ct_cpu_seq_start, + .next = ct_cpu_seq_next, + .stop = ct_cpu_seq_stop, + .show = ct_cpu_seq_show, +}; + +static int ct_cpu_seq_open(struct inode *inode, struct file *file) +{ + return seq_open_net(inode, file, &ct_cpu_seq_ops, + sizeof(struct seq_net_private)); +} + +static const struct file_operations ct_cpu_seq_fops = { + .owner = THIS_MODULE, + .open = ct_cpu_seq_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_net, +}; + +static int __net_init ip_conntrack_net_init(struct net *net) +{ + struct proc_dir_entry *proc, *proc_exp, *proc_stat; + + proc = proc_create("ip_conntrack", 0440, net->proc_net, &ct_file_ops); + if (!proc) + goto err1; + + proc_exp = proc_create("ip_conntrack_expect", 0440, net->proc_net, + &ip_exp_file_ops); + if (!proc_exp) + goto err2; + + proc_stat = proc_create("ip_conntrack", S_IRUGO, + net->proc_net_stat, &ct_cpu_seq_fops); + if (!proc_stat) + goto err3; + return 0; + +err3: + remove_proc_entry("ip_conntrack_expect", net->proc_net); +err2: + remove_proc_entry("ip_conntrack", net->proc_net); +err1: + return -ENOMEM; +} + +static void __net_exit ip_conntrack_net_exit(struct net *net) +{ + remove_proc_entry("ip_conntrack", net->proc_net_stat); + remove_proc_entry("ip_conntrack_expect", net->proc_net); + remove_proc_entry("ip_conntrack", net->proc_net); +} + +static struct pernet_operations ip_conntrack_net_ops = { + .init = ip_conntrack_net_init, + .exit = ip_conntrack_net_exit, +}; + +int __init nf_conntrack_ipv4_compat_init(void) +{ + return register_pernet_subsys(&ip_conntrack_net_ops); +} + +void __exit nf_conntrack_ipv4_compat_fini(void) +{ + unregister_pernet_subsys(&ip_conntrack_net_ops); +} diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 52dc175be39..a338dad41b7 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -1,18 +1,13 @@ /* (C) 1999-2001 Paul `Rusty' Russell * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> + * (C) 2006-2010 Patrick McHardy <kaber@trash.net> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. - * - * 16 Dec 2003: Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp> - * - enable working with Layer 3 protocol independent connection tracking. - * - * Derived from net/ipv4/netfilter/ip_conntrack_proto_icmp.c */ #include <linux/types.h> -#include <linux/sched.h> #include <linux/timer.h> #include <linux/netfilter.h> #include <linux/in.h> @@ -22,32 +17,33 @@ #include <net/checksum.h> #include <linux/netfilter_ipv4.h> #include <net/netfilter/nf_conntrack_tuple.h> -#include <net/netfilter/nf_conntrack_protocol.h> +#include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_core.h> +#include <net/netfilter/nf_conntrack_zones.h> +#include <net/netfilter/nf_log.h> -unsigned long nf_ct_icmp_timeout = 30*HZ; +static unsigned int nf_ct_icmp_timeout __read_mostly = 30*HZ; -#if 0 -#define DEBUGP printk -#else -#define DEBUGP(format, args...) -#endif +static inline struct nf_icmp_net *icmp_pernet(struct net *net) +{ + return &net->ct.nf_ct_proto.icmp; +} -static int icmp_pkt_to_tuple(const struct sk_buff *skb, - unsigned int dataoff, - struct nf_conntrack_tuple *tuple) +static bool icmp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, + struct nf_conntrack_tuple *tuple) { - struct icmphdr _hdr, *hp; + const struct icmphdr *hp; + struct icmphdr _hdr; hp = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); if (hp == NULL) - return 0; + return false; tuple->dst.u.icmp.type = hp->type; tuple->src.u.icmp.id = hp->un.echo.id; tuple->dst.u.icmp.code = hp->code; - return 1; + return true; } /* Add 1; spaces filled with 0. */ @@ -62,17 +58,17 @@ static const u_int8_t invmap[] = { [ICMP_ADDRESSREPLY] = ICMP_ADDRESS + 1 }; -static int icmp_invert_tuple(struct nf_conntrack_tuple *tuple, - const struct nf_conntrack_tuple *orig) +static bool icmp_invert_tuple(struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_tuple *orig) { - if (orig->dst.u.icmp.type >= sizeof(invmap) - || !invmap[orig->dst.u.icmp.type]) - return 0; + if (orig->dst.u.icmp.type >= sizeof(invmap) || + !invmap[orig->dst.u.icmp.type]) + return false; tuple->src.u.icmp.id = orig->src.u.icmp.id; tuple->dst.u.icmp.type = invmap[orig->dst.u.icmp.type] - 1; tuple->dst.u.icmp.code = orig->dst.u.icmp.code; - return 1; + return true; } /* Print out the per-protocol part of the tuple. */ @@ -85,11 +81,9 @@ static int icmp_print_tuple(struct seq_file *s, ntohs(tuple->src.u.icmp.id)); } -/* Print out the private part of the conntrack. */ -static int icmp_print_conntrack(struct seq_file *s, - const struct nf_conn *conntrack) +static unsigned int *icmp_get_timeouts(struct net *net) { - return 0; + return &icmp_pernet(net)->timeout; } /* Returns verdict for packet, or -1 for invalid. */ @@ -97,29 +91,21 @@ static int icmp_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, - int pf, - unsigned int hooknum) + u_int8_t pf, + unsigned int hooknum, + unsigned int *timeout) { - /* Try to delete connection immediately after all replies: - won't actually vanish as we still have skb, and del_timer - means this will only run once even if count hits zero twice - (theoretically possible with SMP) */ - if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) { - if (atomic_dec_and_test(&ct->proto.icmp.count) - && del_timer(&ct->timeout)) - ct->timeout.function((unsigned long)ct); - } else { - atomic_inc(&ct->proto.icmp.count); - nf_conntrack_event_cache(IPCT_PROTOINFO_VOLATILE, skb); - nf_ct_refresh_acct(ct, ctinfo, skb, nf_ct_icmp_timeout); - } + /* Do not immediately delete the connection after the first + successful reply to avoid excessive conntrackd traffic + and also to handle correctly ICMP echo reply duplicates. */ + nf_ct_refresh_acct(ct, ctinfo, skb, *timeout); return NF_ACCEPT; } /* Called when a new connection for this protocol found. */ -static int icmp_new(struct nf_conn *conntrack, - const struct sk_buff *skb, unsigned int dataoff) +static bool icmp_new(struct nf_conn *ct, const struct sk_buff *skb, + unsigned int dataoff, unsigned int *timeouts) { static const u_int8_t valid_new[] = { [ICMP_ECHO] = 1, @@ -128,137 +114,94 @@ static int icmp_new(struct nf_conn *conntrack, [ICMP_ADDRESS] = 1 }; - if (conntrack->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new) - || !valid_new[conntrack->tuplehash[0].tuple.dst.u.icmp.type]) { + if (ct->tuplehash[0].tuple.dst.u.icmp.type >= sizeof(valid_new) || + !valid_new[ct->tuplehash[0].tuple.dst.u.icmp.type]) { /* Can't create a new ICMP `conn' with this. */ - DEBUGP("icmp: can't create new conn with type %u\n", - conntrack->tuplehash[0].tuple.dst.u.icmp.type); - NF_CT_DUMP_TUPLE(&conntrack->tuplehash[0].tuple); - return 0; + pr_debug("icmp: can't create new conn with type %u\n", + ct->tuplehash[0].tuple.dst.u.icmp.type); + nf_ct_dump_tuple_ip(&ct->tuplehash[0].tuple); + return false; } - atomic_set(&conntrack->proto.icmp.count, 0); - return 1; + return true; } -extern struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4; /* Returns conntrack if it dealt with ICMP, and filled in skb fields */ static int -icmp_error_message(struct sk_buff *skb, - enum ip_conntrack_info *ctinfo, - unsigned int hooknum) +icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, + enum ip_conntrack_info *ctinfo, + unsigned int hooknum) { struct nf_conntrack_tuple innertuple, origtuple; - struct { - struct icmphdr icmp; - struct iphdr ip; - } _in, *inside; - struct nf_conntrack_protocol *innerproto; - struct nf_conntrack_tuple_hash *h; - int dataoff; + const struct nf_conntrack_l4proto *innerproto; + const struct nf_conntrack_tuple_hash *h; + u16 zone = tmpl ? nf_ct_zone(tmpl) : NF_CT_DEFAULT_ZONE; NF_CT_ASSERT(skb->nfct == NULL); - /* Not enough header? */ - inside = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_in), &_in); - if (inside == NULL) - return -NF_ACCEPT; - - /* Ignore ICMP's containing fragments (shouldn't happen) */ - if (inside->ip.frag_off & htons(IP_OFFSET)) { - DEBUGP("icmp_error_message: fragment of proto %u\n", - inside->ip.protocol); - return -NF_ACCEPT; - } - - innerproto = __nf_ct_proto_find(PF_INET, inside->ip.protocol); - dataoff = skb->nh.iph->ihl*4 + sizeof(inside->icmp); /* Are they talking about one of our connections? */ - if (!nf_ct_get_tuple(skb, dataoff, dataoff + inside->ip.ihl*4, PF_INET, - inside->ip.protocol, &origtuple, - &nf_conntrack_l3proto_ipv4, innerproto)) { - DEBUGP("icmp_error_message: ! get_tuple p=%u", - inside->ip.protocol); + if (!nf_ct_get_tuplepr(skb, + skb_network_offset(skb) + ip_hdrlen(skb) + + sizeof(struct icmphdr), + PF_INET, &origtuple)) { + pr_debug("icmp_error_message: failed to get tuple\n"); return -NF_ACCEPT; } - /* Ordinarily, we'd expect the inverted tupleproto, but it's - been preserved inside the ICMP. */ - if (!nf_ct_invert_tuple(&innertuple, &origtuple, + /* rcu_read_lock()ed by nf_hook_slow */ + innerproto = __nf_ct_l4proto_find(PF_INET, origtuple.dst.protonum); + + /* Ordinarily, we'd expect the inverted tupleproto, but it's + been preserved inside the ICMP. */ + if (!nf_ct_invert_tuple(&innertuple, &origtuple, &nf_conntrack_l3proto_ipv4, innerproto)) { - DEBUGP("icmp_error_message: no match\n"); + pr_debug("icmp_error_message: no match\n"); return -NF_ACCEPT; } *ctinfo = IP_CT_RELATED; - h = nf_conntrack_find_get(&innertuple, NULL); + h = nf_conntrack_find_get(net, zone, &innertuple); if (!h) { - /* Locally generated ICMPs will match inverted if they - haven't been SNAT'ed yet */ - /* FIXME: NAT code has to handle half-done double NAT --RR */ - if (hooknum == NF_IP_LOCAL_OUT) - h = nf_conntrack_find_get(&origtuple, NULL); - - if (!h) { - DEBUGP("icmp_error_message: no match\n"); - return -NF_ACCEPT; - } - - /* Reverse direction from that found */ - if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) - *ctinfo += IP_CT_IS_REPLY; - } else { - if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) - *ctinfo += IP_CT_IS_REPLY; + pr_debug("icmp_error_message: no match\n"); + return -NF_ACCEPT; } - /* Update skb to refer to this connection */ - skb->nfct = &nf_ct_tuplehash_to_ctrack(h)->ct_general; - skb->nfctinfo = *ctinfo; - return -NF_ACCEPT; + if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) + *ctinfo += IP_CT_IS_REPLY; + + /* Update skb to refer to this connection */ + skb->nfct = &nf_ct_tuplehash_to_ctrack(h)->ct_general; + skb->nfctinfo = *ctinfo; + return NF_ACCEPT; } /* Small and modified version of icmp_rcv */ static int -icmp_error(struct sk_buff *skb, unsigned int dataoff, - enum ip_conntrack_info *ctinfo, int pf, unsigned int hooknum) +icmp_error(struct net *net, struct nf_conn *tmpl, + struct sk_buff *skb, unsigned int dataoff, + enum ip_conntrack_info *ctinfo, u_int8_t pf, unsigned int hooknum) { - struct icmphdr _ih, *icmph; + const struct icmphdr *icmph; + struct icmphdr _ih; /* Not enough header? */ - icmph = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_ih), &_ih); + icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih); if (icmph == NULL) { - if (LOG_INVALID(IPPROTO_ICMP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, - "nf_ct_icmp: short packet "); + if (LOG_INVALID(net, IPPROTO_ICMP)) + nf_log_packet(net, PF_INET, 0, skb, NULL, NULL, + NULL, "nf_ct_icmp: short packet "); return -NF_ACCEPT; } /* See ip_conntrack_proto_tcp.c */ - if (hooknum != NF_IP_PRE_ROUTING) - goto checksum_skipped; - - switch (skb->ip_summed) { - case CHECKSUM_HW: - if (!(u16)csum_fold(skb->csum)) - break; - if (LOG_INVALID(IPPROTO_ICMP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, + if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && + nf_ip_checksum(skb, hooknum, dataoff, 0)) { + if (LOG_INVALID(net, IPPROTO_ICMP)) + nf_log_packet(net, PF_INET, 0, skb, NULL, NULL, NULL, "nf_ct_icmp: bad HW ICMP checksum "); return -NF_ACCEPT; - case CHECKSUM_NONE: - if ((u16)csum_fold(skb_checksum(skb, 0, skb->len, 0))) { - if (LOG_INVALID(IPPROTO_ICMP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, - NULL, - "nf_ct_icmp: bad ICMP checksum "); - return -NF_ACCEPT; - } - default: - break; } -checksum_skipped: /* * 18 is the highest 'known' ICMP type. Anything else is a mystery * @@ -266,97 +209,220 @@ checksum_skipped: * discarded. */ if (icmph->type > NR_ICMP_TYPES) { - if (LOG_INVALID(IPPROTO_ICMP)) - nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL, + if (LOG_INVALID(net, IPPROTO_ICMP)) + nf_log_packet(net, PF_INET, 0, skb, NULL, NULL, NULL, "nf_ct_icmp: invalid ICMP type "); return -NF_ACCEPT; } /* Need to track icmp error message? */ - if (icmph->type != ICMP_DEST_UNREACH - && icmph->type != ICMP_SOURCE_QUENCH - && icmph->type != ICMP_TIME_EXCEEDED - && icmph->type != ICMP_PARAMETERPROB - && icmph->type != ICMP_REDIRECT) + if (icmph->type != ICMP_DEST_UNREACH && + icmph->type != ICMP_SOURCE_QUENCH && + icmph->type != ICMP_TIME_EXCEEDED && + icmph->type != ICMP_PARAMETERPROB && + icmph->type != ICMP_REDIRECT) return NF_ACCEPT; - return icmp_error_message(skb, ctinfo, hooknum); + return icmp_error_message(net, tmpl, skb, ctinfo, hooknum); } -#if defined(CONFIG_NF_CT_NETLINK) || \ - defined(CONFIG_NF_CT_NETLINK_MODULE) +#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nfnetlink_conntrack.h> -static int icmp_tuple_to_nfattr(struct sk_buff *skb, +static int icmp_tuple_to_nlattr(struct sk_buff *skb, const struct nf_conntrack_tuple *t) { - NFA_PUT(skb, CTA_PROTO_ICMP_ID, sizeof(u_int16_t), - &t->src.u.icmp.id); - NFA_PUT(skb, CTA_PROTO_ICMP_TYPE, sizeof(u_int8_t), - &t->dst.u.icmp.type); - NFA_PUT(skb, CTA_PROTO_ICMP_CODE, sizeof(u_int8_t), - &t->dst.u.icmp.code); - + if (nla_put_be16(skb, CTA_PROTO_ICMP_ID, t->src.u.icmp.id) || + nla_put_u8(skb, CTA_PROTO_ICMP_TYPE, t->dst.u.icmp.type) || + nla_put_u8(skb, CTA_PROTO_ICMP_CODE, t->dst.u.icmp.code)) + goto nla_put_failure; return 0; -nfattr_failure: +nla_put_failure: return -1; } -static const size_t cta_min_proto[CTA_PROTO_MAX] = { - [CTA_PROTO_ICMP_TYPE-1] = sizeof(u_int8_t), - [CTA_PROTO_ICMP_CODE-1] = sizeof(u_int8_t), - [CTA_PROTO_ICMP_ID-1] = sizeof(u_int16_t) +static const struct nla_policy icmp_nla_policy[CTA_PROTO_MAX+1] = { + [CTA_PROTO_ICMP_TYPE] = { .type = NLA_U8 }, + [CTA_PROTO_ICMP_CODE] = { .type = NLA_U8 }, + [CTA_PROTO_ICMP_ID] = { .type = NLA_U16 }, }; -static int icmp_nfattr_to_tuple(struct nfattr *tb[], +static int icmp_nlattr_to_tuple(struct nlattr *tb[], struct nf_conntrack_tuple *tuple) { - if (!tb[CTA_PROTO_ICMP_TYPE-1] - || !tb[CTA_PROTO_ICMP_CODE-1] - || !tb[CTA_PROTO_ICMP_ID-1]) + if (!tb[CTA_PROTO_ICMP_TYPE] || + !tb[CTA_PROTO_ICMP_CODE] || + !tb[CTA_PROTO_ICMP_ID]) return -EINVAL; - if (nfattr_bad_size(tb, CTA_PROTO_MAX, cta_min_proto)) + tuple->dst.u.icmp.type = nla_get_u8(tb[CTA_PROTO_ICMP_TYPE]); + tuple->dst.u.icmp.code = nla_get_u8(tb[CTA_PROTO_ICMP_CODE]); + tuple->src.u.icmp.id = nla_get_be16(tb[CTA_PROTO_ICMP_ID]); + + if (tuple->dst.u.icmp.type >= sizeof(invmap) || + !invmap[tuple->dst.u.icmp.type]) return -EINVAL; - tuple->dst.u.icmp.type = - *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_TYPE-1]); - tuple->dst.u.icmp.code = - *(u_int8_t *)NFA_DATA(tb[CTA_PROTO_ICMP_CODE-1]); - tuple->src.u.icmp.id = - *(u_int16_t *)NFA_DATA(tb[CTA_PROTO_ICMP_ID-1]); + return 0; +} - if (tuple->dst.u.icmp.type >= sizeof(invmap) - || !invmap[tuple->dst.u.icmp.type]) - return -EINVAL; +static int icmp_nlattr_tuple_size(void) +{ + return nla_policy_len(icmp_nla_policy, CTA_PROTO_MAX + 1); +} +#endif + +#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) + +#include <linux/netfilter/nfnetlink.h> +#include <linux/netfilter/nfnetlink_cttimeout.h> + +static int icmp_timeout_nlattr_to_obj(struct nlattr *tb[], + struct net *net, void *data) +{ + unsigned int *timeout = data; + struct nf_icmp_net *in = icmp_pernet(net); + + if (tb[CTA_TIMEOUT_ICMP_TIMEOUT]) { + *timeout = + ntohl(nla_get_be32(tb[CTA_TIMEOUT_ICMP_TIMEOUT])) * HZ; + } else { + /* Set default ICMP timeout. */ + *timeout = in->timeout; + } + return 0; +} +static int +icmp_timeout_obj_to_nlattr(struct sk_buff *skb, const void *data) +{ + const unsigned int *timeout = data; + + if (nla_put_be32(skb, CTA_TIMEOUT_ICMP_TIMEOUT, htonl(*timeout / HZ))) + goto nla_put_failure; return 0; + +nla_put_failure: + return -ENOSPC; } + +static const struct nla_policy +icmp_timeout_nla_policy[CTA_TIMEOUT_ICMP_MAX+1] = { + [CTA_TIMEOUT_ICMP_TIMEOUT] = { .type = NLA_U32 }, +}; +#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ + +#ifdef CONFIG_SYSCTL +static struct ctl_table icmp_sysctl_table[] = { + { + .procname = "nf_conntrack_icmp_timeout", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + { } +}; +#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT +static struct ctl_table icmp_compat_sysctl_table[] = { + { + .procname = "ip_conntrack_icmp_timeout", + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + { } +}; +#endif /* CONFIG_NF_CONNTRACK_PROC_COMPAT */ +#endif /* CONFIG_SYSCTL */ + +static int icmp_kmemdup_sysctl_table(struct nf_proto_net *pn, + struct nf_icmp_net *in) +{ +#ifdef CONFIG_SYSCTL + pn->ctl_table = kmemdup(icmp_sysctl_table, + sizeof(icmp_sysctl_table), + GFP_KERNEL); + if (!pn->ctl_table) + return -ENOMEM; + + pn->ctl_table[0].data = &in->timeout; #endif + return 0; +} -struct nf_conntrack_protocol nf_conntrack_protocol_icmp = +static int icmp_kmemdup_compat_sysctl_table(struct nf_proto_net *pn, + struct nf_icmp_net *in) +{ +#ifdef CONFIG_SYSCTL +#ifdef CONFIG_NF_CONNTRACK_PROC_COMPAT + pn->ctl_compat_table = kmemdup(icmp_compat_sysctl_table, + sizeof(icmp_compat_sysctl_table), + GFP_KERNEL); + if (!pn->ctl_compat_table) + return -ENOMEM; + + pn->ctl_compat_table[0].data = &in->timeout; +#endif +#endif + return 0; +} + +static int icmp_init_net(struct net *net, u_int16_t proto) +{ + int ret; + struct nf_icmp_net *in = icmp_pernet(net); + struct nf_proto_net *pn = &in->pn; + + in->timeout = nf_ct_icmp_timeout; + + ret = icmp_kmemdup_compat_sysctl_table(pn, in); + if (ret < 0) + return ret; + + ret = icmp_kmemdup_sysctl_table(pn, in); + if (ret < 0) + nf_ct_kfree_compat_sysctl_table(pn); + + return ret; +} + +static struct nf_proto_net *icmp_get_net_proto(struct net *net) +{ + return &net->ct.nf_ct_proto.icmp.pn; +} + +struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly = { - .list = { NULL, NULL }, .l3proto = PF_INET, - .proto = IPPROTO_ICMP, + .l4proto = IPPROTO_ICMP, .name = "icmp", .pkt_to_tuple = icmp_pkt_to_tuple, .invert_tuple = icmp_invert_tuple, .print_tuple = icmp_print_tuple, - .print_conntrack = icmp_print_conntrack, .packet = icmp_packet, + .get_timeouts = icmp_get_timeouts, .new = icmp_new, .error = icmp_error, .destroy = NULL, .me = NULL, -#if defined(CONFIG_NF_CT_NETLINK) || \ - defined(CONFIG_NF_CT_NETLINK_MODULE) - .tuple_to_nfattr = icmp_tuple_to_nfattr, - .nfattr_to_tuple = icmp_nfattr_to_tuple, +#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) + .tuple_to_nlattr = icmp_tuple_to_nlattr, + .nlattr_tuple_size = icmp_nlattr_tuple_size, + .nlattr_to_tuple = icmp_nlattr_to_tuple, + .nla_policy = icmp_nla_policy, #endif +#if IS_ENABLED(CONFIG_NF_CT_NETLINK_TIMEOUT) + .ctnl_timeout = { + .nlattr_to_obj = icmp_timeout_nlattr_to_obj, + .obj_to_nlattr = icmp_timeout_obj_to_nlattr, + .nlattr_max = CTA_TIMEOUT_ICMP_MAX, + .obj_size = sizeof(unsigned int), + .nla_policy = icmp_timeout_nla_policy, + }, +#endif /* CONFIG_NF_CT_NETLINK_TIMEOUT */ + .init_net = icmp_init_net, + .get_net_proto = icmp_get_net_proto, }; - -EXPORT_SYMBOL(nf_conntrack_protocol_icmp); diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c new file mode 100644 index 00000000000..b8f6381c7d0 --- /dev/null +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -0,0 +1,131 @@ +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/types.h> +#include <linux/ip.h> +#include <linux/netfilter.h> +#include <linux/module.h> +#include <linux/skbuff.h> +#include <net/route.h> +#include <net/ip.h> + +#include <linux/netfilter_bridge.h> +#include <linux/netfilter_ipv4.h> +#include <net/netfilter/ipv4/nf_defrag_ipv4.h> +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) +#include <net/netfilter/nf_conntrack.h> +#endif +#include <net/netfilter/nf_conntrack_zones.h> + +static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user) +{ + int err; + + skb_orphan(skb); + + local_bh_disable(); + err = ip_defrag(skb, user); + local_bh_enable(); + + if (!err) { + ip_send_check(ip_hdr(skb)); + skb->ignore_df = 1; + } + + return err; +} + +static enum ip_defrag_users nf_ct_defrag_user(unsigned int hooknum, + struct sk_buff *skb) +{ + u16 zone = NF_CT_DEFAULT_ZONE; + +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) + if (skb->nfct) + zone = nf_ct_zone((struct nf_conn *)skb->nfct); +#endif + +#ifdef CONFIG_BRIDGE_NETFILTER + if (skb->nf_bridge && + skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING) + return IP_DEFRAG_CONNTRACK_BRIDGE_IN + zone; +#endif + if (hooknum == NF_INET_PRE_ROUTING) + return IP_DEFRAG_CONNTRACK_IN + zone; + else + return IP_DEFRAG_CONNTRACK_OUT + zone; +} + +static unsigned int ipv4_conntrack_defrag(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct sock *sk = skb->sk; + struct inet_sock *inet = inet_sk(skb->sk); + + if (sk && (sk->sk_family == PF_INET) && + inet->nodefrag) + return NF_ACCEPT; + +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) +#if !defined(CONFIG_NF_NAT) && !defined(CONFIG_NF_NAT_MODULE) + /* Previously seen (loopback)? Ignore. Do this before + fragment check. */ + if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct)) + return NF_ACCEPT; +#endif +#endif + /* Gather fragments. */ + if (ip_is_fragment(ip_hdr(skb))) { + enum ip_defrag_users user = + nf_ct_defrag_user(ops->hooknum, skb); + + if (nf_ct_ipv4_gather_frags(skb, user)) + return NF_STOLEN; + } + return NF_ACCEPT; +} + +static struct nf_hook_ops ipv4_defrag_ops[] = { + { + .hook = ipv4_conntrack_defrag, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_PRE_ROUTING, + .priority = NF_IP_PRI_CONNTRACK_DEFRAG, + }, + { + .hook = ipv4_conntrack_defrag, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV4, + .hooknum = NF_INET_LOCAL_OUT, + .priority = NF_IP_PRI_CONNTRACK_DEFRAG, + }, +}; + +static int __init nf_defrag_init(void) +{ + return nf_register_hooks(ipv4_defrag_ops, ARRAY_SIZE(ipv4_defrag_ops)); +} + +static void __exit nf_defrag_fini(void) +{ + nf_unregister_hooks(ipv4_defrag_ops, ARRAY_SIZE(ipv4_defrag_ops)); +} + +void nf_defrag_ipv4_enable(void) +{ +} +EXPORT_SYMBOL_GPL(nf_defrag_ipv4_enable); + +module_init(nf_defrag_init); +module_exit(nf_defrag_fini); + +MODULE_LICENSE("GPL"); diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c new file mode 100644 index 00000000000..574f7ebba0b --- /dev/null +++ b/net/ipv4/netfilter/nf_nat_h323.c @@ -0,0 +1,631 @@ +/* + * H.323 extension for NAT alteration. + * + * Copyright (c) 2006 Jing Min Zhao <zhaojingmin@users.sourceforge.net> + * Copyright (c) 2006-2012 Patrick McHardy <kaber@trash.net> + * + * This source code is licensed under General Public License version 2. + * + * Based on the 'brute force' H.323 NAT module by + * Jozsef Kadlecsik <kadlec@blackhole.kfki.hu> + */ + +#include <linux/module.h> +#include <linux/tcp.h> +#include <net/tcp.h> + +#include <net/netfilter/nf_nat.h> +#include <net/netfilter/nf_nat_helper.h> +#include <net/netfilter/nf_conntrack_helper.h> +#include <net/netfilter/nf_conntrack_expect.h> +#include <linux/netfilter/nf_conntrack_h323.h> + +/****************************************************************************/ +static int set_addr(struct sk_buff *skb, unsigned int protoff, + unsigned char **data, int dataoff, + unsigned int addroff, __be32 ip, __be16 port) +{ + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + struct { + __be32 ip; + __be16 port; + } __attribute__ ((__packed__)) buf; + const struct tcphdr *th; + struct tcphdr _tcph; + + buf.ip = ip; + buf.port = port; + addroff += dataoff; + + if (ip_hdr(skb)->protocol == IPPROTO_TCP) { + if (!nf_nat_mangle_tcp_packet(skb, ct, ctinfo, + protoff, addroff, sizeof(buf), + (char *) &buf, sizeof(buf))) { + net_notice_ratelimited("nf_nat_h323: nf_nat_mangle_tcp_packet error\n"); + return -1; + } + + /* Relocate data pointer */ + th = skb_header_pointer(skb, ip_hdrlen(skb), + sizeof(_tcph), &_tcph); + if (th == NULL) + return -1; + *data = skb->data + ip_hdrlen(skb) + th->doff * 4 + dataoff; + } else { + if (!nf_nat_mangle_udp_packet(skb, ct, ctinfo, + protoff, addroff, sizeof(buf), + (char *) &buf, sizeof(buf))) { + net_notice_ratelimited("nf_nat_h323: nf_nat_mangle_udp_packet error\n"); + return -1; + } + /* nf_nat_mangle_udp_packet uses skb_make_writable() to copy + * or pull everything in a linear buffer, so we can safely + * use the skb pointers now */ + *data = skb->data + ip_hdrlen(skb) + sizeof(struct udphdr); + } + + return 0; +} + +/****************************************************************************/ +static int set_h225_addr(struct sk_buff *skb, unsigned int protoff, + unsigned char **data, int dataoff, + TransportAddress *taddr, + union nf_inet_addr *addr, __be16 port) +{ + return set_addr(skb, protoff, data, dataoff, taddr->ipAddress.ip, + addr->ip, port); +} + +/****************************************************************************/ +static int set_h245_addr(struct sk_buff *skb, unsigned protoff, + unsigned char **data, int dataoff, + H245_TransportAddress *taddr, + union nf_inet_addr *addr, __be16 port) +{ + return set_addr(skb, protoff, data, dataoff, + taddr->unicastAddress.iPAddress.network, + addr->ip, port); +} + +/****************************************************************************/ +static int set_sig_addr(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, + TransportAddress *taddr, int count) +{ + const struct nf_ct_h323_master *info = nfct_help_data(ct); + int dir = CTINFO2DIR(ctinfo); + int i; + __be16 port; + union nf_inet_addr addr; + + for (i = 0; i < count; i++) { + if (get_h225_addr(ct, *data, &taddr[i], &addr, &port)) { + if (addr.ip == ct->tuplehash[dir].tuple.src.u3.ip && + port == info->sig_port[dir]) { + /* GW->GK */ + + /* Fix for Gnomemeeting */ + if (i > 0 && + get_h225_addr(ct, *data, &taddr[0], + &addr, &port) && + (ntohl(addr.ip) & 0xff000000) == 0x7f000000) + i = 0; + + pr_debug("nf_nat_ras: set signal address %pI4:%hu->%pI4:%hu\n", + &addr.ip, port, + &ct->tuplehash[!dir].tuple.dst.u3.ip, + info->sig_port[!dir]); + return set_h225_addr(skb, protoff, data, 0, + &taddr[i], + &ct->tuplehash[!dir]. + tuple.dst.u3, + info->sig_port[!dir]); + } else if (addr.ip == ct->tuplehash[dir].tuple.dst.u3.ip && + port == info->sig_port[dir]) { + /* GK->GW */ + pr_debug("nf_nat_ras: set signal address %pI4:%hu->%pI4:%hu\n", + &addr.ip, port, + &ct->tuplehash[!dir].tuple.src.u3.ip, + info->sig_port[!dir]); + return set_h225_addr(skb, protoff, data, 0, + &taddr[i], + &ct->tuplehash[!dir]. + tuple.src.u3, + info->sig_port[!dir]); + } + } + } + + return 0; +} + +/****************************************************************************/ +static int set_ras_addr(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, + TransportAddress *taddr, int count) +{ + int dir = CTINFO2DIR(ctinfo); + int i; + __be16 port; + union nf_inet_addr addr; + + for (i = 0; i < count; i++) { + if (get_h225_addr(ct, *data, &taddr[i], &addr, &port) && + addr.ip == ct->tuplehash[dir].tuple.src.u3.ip && + port == ct->tuplehash[dir].tuple.src.u.udp.port) { + pr_debug("nf_nat_ras: set rasAddress %pI4:%hu->%pI4:%hu\n", + &addr.ip, ntohs(port), + &ct->tuplehash[!dir].tuple.dst.u3.ip, + ntohs(ct->tuplehash[!dir].tuple.dst.u.udp.port)); + return set_h225_addr(skb, protoff, data, 0, &taddr[i], + &ct->tuplehash[!dir].tuple.dst.u3, + ct->tuplehash[!dir].tuple. + dst.u.udp.port); + } + } + + return 0; +} + +/****************************************************************************/ +static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, int dataoff, + H245_TransportAddress *taddr, + __be16 port, __be16 rtp_port, + struct nf_conntrack_expect *rtp_exp, + struct nf_conntrack_expect *rtcp_exp) +{ + struct nf_ct_h323_master *info = nfct_help_data(ct); + int dir = CTINFO2DIR(ctinfo); + int i; + u_int16_t nated_port; + + /* Set expectations for NAT */ + rtp_exp->saved_proto.udp.port = rtp_exp->tuple.dst.u.udp.port; + rtp_exp->expectfn = nf_nat_follow_master; + rtp_exp->dir = !dir; + rtcp_exp->saved_proto.udp.port = rtcp_exp->tuple.dst.u.udp.port; + rtcp_exp->expectfn = nf_nat_follow_master; + rtcp_exp->dir = !dir; + + /* Lookup existing expects */ + for (i = 0; i < H323_RTP_CHANNEL_MAX; i++) { + if (info->rtp_port[i][dir] == rtp_port) { + /* Expected */ + + /* Use allocated ports first. This will refresh + * the expects */ + rtp_exp->tuple.dst.u.udp.port = info->rtp_port[i][dir]; + rtcp_exp->tuple.dst.u.udp.port = + htons(ntohs(info->rtp_port[i][dir]) + 1); + break; + } else if (info->rtp_port[i][dir] == 0) { + /* Not expected */ + break; + } + } + + /* Run out of expectations */ + if (i >= H323_RTP_CHANNEL_MAX) { + net_notice_ratelimited("nf_nat_h323: out of expectations\n"); + return 0; + } + + /* Try to get a pair of ports. */ + for (nated_port = ntohs(rtp_exp->tuple.dst.u.udp.port); + nated_port != 0; nated_port += 2) { + int ret; + + rtp_exp->tuple.dst.u.udp.port = htons(nated_port); + ret = nf_ct_expect_related(rtp_exp); + if (ret == 0) { + rtcp_exp->tuple.dst.u.udp.port = + htons(nated_port + 1); + ret = nf_ct_expect_related(rtcp_exp); + if (ret == 0) + break; + else if (ret == -EBUSY) { + nf_ct_unexpect_related(rtp_exp); + continue; + } else if (ret < 0) { + nf_ct_unexpect_related(rtp_exp); + nated_port = 0; + break; + } + } else if (ret != -EBUSY) { + nated_port = 0; + break; + } + } + + if (nated_port == 0) { /* No port available */ + net_notice_ratelimited("nf_nat_h323: out of RTP ports\n"); + return 0; + } + + /* Modify signal */ + if (set_h245_addr(skb, protoff, data, dataoff, taddr, + &ct->tuplehash[!dir].tuple.dst.u3, + htons((port & htons(1)) ? nated_port + 1 : + nated_port)) == 0) { + /* Save ports */ + info->rtp_port[i][dir] = rtp_port; + info->rtp_port[i][!dir] = htons(nated_port); + } else { + nf_ct_unexpect_related(rtp_exp); + nf_ct_unexpect_related(rtcp_exp); + return -1; + } + + /* Success */ + pr_debug("nf_nat_h323: expect RTP %pI4:%hu->%pI4:%hu\n", + &rtp_exp->tuple.src.u3.ip, + ntohs(rtp_exp->tuple.src.u.udp.port), + &rtp_exp->tuple.dst.u3.ip, + ntohs(rtp_exp->tuple.dst.u.udp.port)); + pr_debug("nf_nat_h323: expect RTCP %pI4:%hu->%pI4:%hu\n", + &rtcp_exp->tuple.src.u3.ip, + ntohs(rtcp_exp->tuple.src.u.udp.port), + &rtcp_exp->tuple.dst.u3.ip, + ntohs(rtcp_exp->tuple.dst.u.udp.port)); + + return 0; +} + +/****************************************************************************/ +static int nat_t120(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, int dataoff, + H245_TransportAddress *taddr, __be16 port, + struct nf_conntrack_expect *exp) +{ + int dir = CTINFO2DIR(ctinfo); + u_int16_t nated_port = ntohs(port); + + /* Set expectations for NAT */ + exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port; + exp->expectfn = nf_nat_follow_master; + exp->dir = !dir; + + /* Try to get same port: if not, try to change it. */ + for (; nated_port != 0; nated_port++) { + int ret; + + exp->tuple.dst.u.tcp.port = htons(nated_port); + ret = nf_ct_expect_related(exp); + if (ret == 0) + break; + else if (ret != -EBUSY) { + nated_port = 0; + break; + } + } + + if (nated_port == 0) { /* No port available */ + net_notice_ratelimited("nf_nat_h323: out of TCP ports\n"); + return 0; + } + + /* Modify signal */ + if (set_h245_addr(skb, protoff, data, dataoff, taddr, + &ct->tuplehash[!dir].tuple.dst.u3, + htons(nated_port)) < 0) { + nf_ct_unexpect_related(exp); + return -1; + } + + pr_debug("nf_nat_h323: expect T.120 %pI4:%hu->%pI4:%hu\n", + &exp->tuple.src.u3.ip, + ntohs(exp->tuple.src.u.tcp.port), + &exp->tuple.dst.u3.ip, + ntohs(exp->tuple.dst.u.tcp.port)); + + return 0; +} + +/****************************************************************************/ +static int nat_h245(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, int dataoff, + TransportAddress *taddr, __be16 port, + struct nf_conntrack_expect *exp) +{ + struct nf_ct_h323_master *info = nfct_help_data(ct); + int dir = CTINFO2DIR(ctinfo); + u_int16_t nated_port = ntohs(port); + + /* Set expectations for NAT */ + exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port; + exp->expectfn = nf_nat_follow_master; + exp->dir = !dir; + + /* Check existing expects */ + if (info->sig_port[dir] == port) + nated_port = ntohs(info->sig_port[!dir]); + + /* Try to get same port: if not, try to change it. */ + for (; nated_port != 0; nated_port++) { + int ret; + + exp->tuple.dst.u.tcp.port = htons(nated_port); + ret = nf_ct_expect_related(exp); + if (ret == 0) + break; + else if (ret != -EBUSY) { + nated_port = 0; + break; + } + } + + if (nated_port == 0) { /* No port available */ + net_notice_ratelimited("nf_nat_q931: out of TCP ports\n"); + return 0; + } + + /* Modify signal */ + if (set_h225_addr(skb, protoff, data, dataoff, taddr, + &ct->tuplehash[!dir].tuple.dst.u3, + htons(nated_port)) == 0) { + /* Save ports */ + info->sig_port[dir] = port; + info->sig_port[!dir] = htons(nated_port); + } else { + nf_ct_unexpect_related(exp); + return -1; + } + + pr_debug("nf_nat_q931: expect H.245 %pI4:%hu->%pI4:%hu\n", + &exp->tuple.src.u3.ip, + ntohs(exp->tuple.src.u.tcp.port), + &exp->tuple.dst.u3.ip, + ntohs(exp->tuple.dst.u.tcp.port)); + + return 0; +} + +/**************************************************************************** + * This conntrack expect function replaces nf_conntrack_q931_expect() + * which was set by nf_conntrack_h323.c. + ****************************************************************************/ +static void ip_nat_q931_expect(struct nf_conn *new, + struct nf_conntrack_expect *this) +{ + struct nf_nat_range range; + + if (this->tuple.src.u3.ip != 0) { /* Only accept calls from GK */ + nf_nat_follow_master(new, this); + return; + } + + /* This must be a fresh one. */ + BUG_ON(new->status & IPS_NAT_DONE_MASK); + + /* Change src to where master sends to */ + range.flags = NF_NAT_RANGE_MAP_IPS; + range.min_addr = range.max_addr = + new->tuplehash[!this->dir].tuple.src.u3; + nf_nat_setup_info(new, &range, NF_NAT_MANIP_SRC); + + /* For DST manip, map port here to where it's expected. */ + range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED); + range.min_proto = range.max_proto = this->saved_proto; + range.min_addr = range.max_addr = + new->master->tuplehash[!this->dir].tuple.src.u3; + nf_nat_setup_info(new, &range, NF_NAT_MANIP_DST); +} + +/****************************************************************************/ +static int nat_q931(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int protoff, unsigned char **data, + TransportAddress *taddr, int idx, + __be16 port, struct nf_conntrack_expect *exp) +{ + struct nf_ct_h323_master *info = nfct_help_data(ct); + int dir = CTINFO2DIR(ctinfo); + u_int16_t nated_port = ntohs(port); + union nf_inet_addr addr; + + /* Set expectations for NAT */ + exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port; + exp->expectfn = ip_nat_q931_expect; + exp->dir = !dir; + + /* Check existing expects */ + if (info->sig_port[dir] == port) + nated_port = ntohs(info->sig_port[!dir]); + + /* Try to get same port: if not, try to change it. */ + for (; nated_port != 0; nated_port++) { + int ret; + + exp->tuple.dst.u.tcp.port = htons(nated_port); + ret = nf_ct_expect_related(exp); + if (ret == 0) + break; + else if (ret != -EBUSY) { + nated_port = 0; + break; + } + } + + if (nated_port == 0) { /* No port available */ + net_notice_ratelimited("nf_nat_ras: out of TCP ports\n"); + return 0; + } + + /* Modify signal */ + if (set_h225_addr(skb, protoff, data, 0, &taddr[idx], + &ct->tuplehash[!dir].tuple.dst.u3, + htons(nated_port)) == 0) { + /* Save ports */ + info->sig_port[dir] = port; + info->sig_port[!dir] = htons(nated_port); + + /* Fix for Gnomemeeting */ + if (idx > 0 && + get_h225_addr(ct, *data, &taddr[0], &addr, &port) && + (ntohl(addr.ip) & 0xff000000) == 0x7f000000) { + set_h225_addr(skb, protoff, data, 0, &taddr[0], + &ct->tuplehash[!dir].tuple.dst.u3, + info->sig_port[!dir]); + } + } else { + nf_ct_unexpect_related(exp); + return -1; + } + + /* Success */ + pr_debug("nf_nat_ras: expect Q.931 %pI4:%hu->%pI4:%hu\n", + &exp->tuple.src.u3.ip, + ntohs(exp->tuple.src.u.tcp.port), + &exp->tuple.dst.u3.ip, + ntohs(exp->tuple.dst.u.tcp.port)); + + return 0; +} + +/****************************************************************************/ +static void ip_nat_callforwarding_expect(struct nf_conn *new, + struct nf_conntrack_expect *this) +{ + struct nf_nat_range range; + + /* This must be a fresh one. */ + BUG_ON(new->status & IPS_NAT_DONE_MASK); + + /* Change src to where master sends to */ + range.flags = NF_NAT_RANGE_MAP_IPS; + range.min_addr = range.max_addr = + new->tuplehash[!this->dir].tuple.src.u3; + nf_nat_setup_info(new, &range, NF_NAT_MANIP_SRC); + + /* For DST manip, map port here to where it's expected. */ + range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED); + range.min_proto = range.max_proto = this->saved_proto; + range.min_addr = range.max_addr = this->saved_addr; + nf_nat_setup_info(new, &range, NF_NAT_MANIP_DST); +} + +/****************************************************************************/ +static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int protoff, + unsigned char **data, int dataoff, + TransportAddress *taddr, __be16 port, + struct nf_conntrack_expect *exp) +{ + int dir = CTINFO2DIR(ctinfo); + u_int16_t nated_port; + + /* Set expectations for NAT */ + exp->saved_addr = exp->tuple.dst.u3; + exp->tuple.dst.u3.ip = ct->tuplehash[!dir].tuple.dst.u3.ip; + exp->saved_proto.tcp.port = exp->tuple.dst.u.tcp.port; + exp->expectfn = ip_nat_callforwarding_expect; + exp->dir = !dir; + + /* Try to get same port: if not, try to change it. */ + for (nated_port = ntohs(port); nated_port != 0; nated_port++) { + int ret; + + exp->tuple.dst.u.tcp.port = htons(nated_port); + ret = nf_ct_expect_related(exp); + if (ret == 0) + break; + else if (ret != -EBUSY) { + nated_port = 0; + break; + } + } + + if (nated_port == 0) { /* No port available */ + net_notice_ratelimited("nf_nat_q931: out of TCP ports\n"); + return 0; + } + + /* Modify signal */ + if (!set_h225_addr(skb, protoff, data, dataoff, taddr, + &ct->tuplehash[!dir].tuple.dst.u3, + htons(nated_port)) == 0) { + nf_ct_unexpect_related(exp); + return -1; + } + + /* Success */ + pr_debug("nf_nat_q931: expect Call Forwarding %pI4:%hu->%pI4:%hu\n", + &exp->tuple.src.u3.ip, + ntohs(exp->tuple.src.u.tcp.port), + &exp->tuple.dst.u3.ip, + ntohs(exp->tuple.dst.u.tcp.port)); + + return 0; +} + +static struct nf_ct_helper_expectfn q931_nat = { + .name = "Q.931", + .expectfn = ip_nat_q931_expect, +}; + +static struct nf_ct_helper_expectfn callforwarding_nat = { + .name = "callforwarding", + .expectfn = ip_nat_callforwarding_expect, +}; + +/****************************************************************************/ +static int __init init(void) +{ + BUG_ON(set_h245_addr_hook != NULL); + BUG_ON(set_h225_addr_hook != NULL); + BUG_ON(set_sig_addr_hook != NULL); + BUG_ON(set_ras_addr_hook != NULL); + BUG_ON(nat_rtp_rtcp_hook != NULL); + BUG_ON(nat_t120_hook != NULL); + BUG_ON(nat_h245_hook != NULL); + BUG_ON(nat_callforwarding_hook != NULL); + BUG_ON(nat_q931_hook != NULL); + + RCU_INIT_POINTER(set_h245_addr_hook, set_h245_addr); + RCU_INIT_POINTER(set_h225_addr_hook, set_h225_addr); + RCU_INIT_POINTER(set_sig_addr_hook, set_sig_addr); + RCU_INIT_POINTER(set_ras_addr_hook, set_ras_addr); + RCU_INIT_POINTER(nat_rtp_rtcp_hook, nat_rtp_rtcp); + RCU_INIT_POINTER(nat_t120_hook, nat_t120); + RCU_INIT_POINTER(nat_h245_hook, nat_h245); + RCU_INIT_POINTER(nat_callforwarding_hook, nat_callforwarding); + RCU_INIT_POINTER(nat_q931_hook, nat_q931); + nf_ct_helper_expectfn_register(&q931_nat); + nf_ct_helper_expectfn_register(&callforwarding_nat); + return 0; +} + +/****************************************************************************/ +static void __exit fini(void) +{ + RCU_INIT_POINTER(set_h245_addr_hook, NULL); + RCU_INIT_POINTER(set_h225_addr_hook, NULL); + RCU_INIT_POINTER(set_sig_addr_hook, NULL); + RCU_INIT_POINTER(set_ras_addr_hook, NULL); + RCU_INIT_POINTER(nat_rtp_rtcp_hook, NULL); + RCU_INIT_POINTER(nat_t120_hook, NULL); + RCU_INIT_POINTER(nat_h245_hook, NULL); + RCU_INIT_POINTER(nat_callforwarding_hook, NULL); + RCU_INIT_POINTER(nat_q931_hook, NULL); + nf_ct_helper_expectfn_unregister(&q931_nat); + nf_ct_helper_expectfn_unregister(&callforwarding_nat); + synchronize_rcu(); +} + +/****************************************************************************/ +module_init(init); +module_exit(fini); + +MODULE_AUTHOR("Jing Min Zhao <zhaojingmin@users.sourceforge.net>"); +MODULE_DESCRIPTION("H.323 NAT helper"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("ip_nat_h323"); diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c new file mode 100644 index 00000000000..d8b2e14efdd --- /dev/null +++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c @@ -0,0 +1,281 @@ +/* + * (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> + * (C) 2011 Patrick McHardy <kaber@trash.net> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/types.h> +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/ip.h> +#include <linux/icmp.h> +#include <linux/netfilter.h> +#include <linux/netfilter_ipv4.h> +#include <net/secure_seq.h> +#include <net/checksum.h> +#include <net/route.h> +#include <net/ip.h> + +#include <net/netfilter/nf_conntrack_core.h> +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_nat_core.h> +#include <net/netfilter/nf_nat_l3proto.h> +#include <net/netfilter/nf_nat_l4proto.h> + +static const struct nf_nat_l3proto nf_nat_l3proto_ipv4; + +#ifdef CONFIG_XFRM +static void nf_nat_ipv4_decode_session(struct sk_buff *skb, + const struct nf_conn *ct, + enum ip_conntrack_dir dir, + unsigned long statusbit, + struct flowi *fl) +{ + const struct nf_conntrack_tuple *t = &ct->tuplehash[dir].tuple; + struct flowi4 *fl4 = &fl->u.ip4; + + if (ct->status & statusbit) { + fl4->daddr = t->dst.u3.ip; + if (t->dst.protonum == IPPROTO_TCP || + t->dst.protonum == IPPROTO_UDP || + t->dst.protonum == IPPROTO_UDPLITE || + t->dst.protonum == IPPROTO_DCCP || + t->dst.protonum == IPPROTO_SCTP) + fl4->fl4_dport = t->dst.u.all; + } + + statusbit ^= IPS_NAT_MASK; + + if (ct->status & statusbit) { + fl4->saddr = t->src.u3.ip; + if (t->dst.protonum == IPPROTO_TCP || + t->dst.protonum == IPPROTO_UDP || + t->dst.protonum == IPPROTO_UDPLITE || + t->dst.protonum == IPPROTO_DCCP || + t->dst.protonum == IPPROTO_SCTP) + fl4->fl4_sport = t->src.u.all; + } +} +#endif /* CONFIG_XFRM */ + +static bool nf_nat_ipv4_in_range(const struct nf_conntrack_tuple *t, + const struct nf_nat_range *range) +{ + return ntohl(t->src.u3.ip) >= ntohl(range->min_addr.ip) && + ntohl(t->src.u3.ip) <= ntohl(range->max_addr.ip); +} + +static u32 nf_nat_ipv4_secure_port(const struct nf_conntrack_tuple *t, + __be16 dport) +{ + return secure_ipv4_port_ephemeral(t->src.u3.ip, t->dst.u3.ip, dport); +} + +static bool nf_nat_ipv4_manip_pkt(struct sk_buff *skb, + unsigned int iphdroff, + const struct nf_nat_l4proto *l4proto, + const struct nf_conntrack_tuple *target, + enum nf_nat_manip_type maniptype) +{ + struct iphdr *iph; + unsigned int hdroff; + + if (!skb_make_writable(skb, iphdroff + sizeof(*iph))) + return false; + + iph = (void *)skb->data + iphdroff; + hdroff = iphdroff + iph->ihl * 4; + + if (!l4proto->manip_pkt(skb, &nf_nat_l3proto_ipv4, iphdroff, hdroff, + target, maniptype)) + return false; + iph = (void *)skb->data + iphdroff; + + if (maniptype == NF_NAT_MANIP_SRC) { + csum_replace4(&iph->check, iph->saddr, target->src.u3.ip); + iph->saddr = target->src.u3.ip; + } else { + csum_replace4(&iph->check, iph->daddr, target->dst.u3.ip); + iph->daddr = target->dst.u3.ip; + } + return true; +} + +static void nf_nat_ipv4_csum_update(struct sk_buff *skb, + unsigned int iphdroff, __sum16 *check, + const struct nf_conntrack_tuple *t, + enum nf_nat_manip_type maniptype) +{ + struct iphdr *iph = (struct iphdr *)(skb->data + iphdroff); + __be32 oldip, newip; + + if (maniptype == NF_NAT_MANIP_SRC) { + oldip = iph->saddr; + newip = t->src.u3.ip; + } else { + oldip = iph->daddr; + newip = t->dst.u3.ip; + } + inet_proto_csum_replace4(check, skb, oldip, newip, 1); +} + +static void nf_nat_ipv4_csum_recalc(struct sk_buff *skb, + u8 proto, void *data, __sum16 *check, + int datalen, int oldlen) +{ + const struct iphdr *iph = ip_hdr(skb); + struct rtable *rt = skb_rtable(skb); + + if (skb->ip_summed != CHECKSUM_PARTIAL) { + if (!(rt->rt_flags & RTCF_LOCAL) && + (!skb->dev || skb->dev->features & NETIF_F_V4_CSUM)) { + skb->ip_summed = CHECKSUM_PARTIAL; + skb->csum_start = skb_headroom(skb) + + skb_network_offset(skb) + + ip_hdrlen(skb); + skb->csum_offset = (void *)check - data; + *check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, + datalen, proto, 0); + } else { + *check = 0; + *check = csum_tcpudp_magic(iph->saddr, iph->daddr, + datalen, proto, + csum_partial(data, datalen, + 0)); + if (proto == IPPROTO_UDP && !*check) + *check = CSUM_MANGLED_0; + } + } else + inet_proto_csum_replace2(check, skb, + htons(oldlen), htons(datalen), 1); +} + +static int nf_nat_ipv4_nlattr_to_range(struct nlattr *tb[], + struct nf_nat_range *range) +{ + if (tb[CTA_NAT_V4_MINIP]) { + range->min_addr.ip = nla_get_be32(tb[CTA_NAT_V4_MINIP]); + range->flags |= NF_NAT_RANGE_MAP_IPS; + } + + if (tb[CTA_NAT_V4_MAXIP]) + range->max_addr.ip = nla_get_be32(tb[CTA_NAT_V4_MAXIP]); + else + range->max_addr.ip = range->min_addr.ip; + + return 0; +} + +static const struct nf_nat_l3proto nf_nat_l3proto_ipv4 = { + .l3proto = NFPROTO_IPV4, + .in_range = nf_nat_ipv4_in_range, + .secure_port = nf_nat_ipv4_secure_port, + .manip_pkt = nf_nat_ipv4_manip_pkt, + .csum_update = nf_nat_ipv4_csum_update, + .csum_recalc = nf_nat_ipv4_csum_recalc, + .nlattr_to_range = nf_nat_ipv4_nlattr_to_range, +#ifdef CONFIG_XFRM + .decode_session = nf_nat_ipv4_decode_session, +#endif +}; + +int nf_nat_icmp_reply_translation(struct sk_buff *skb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int hooknum) +{ + struct { + struct icmphdr icmp; + struct iphdr ip; + } *inside; + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + enum nf_nat_manip_type manip = HOOK2MANIP(hooknum); + unsigned int hdrlen = ip_hdrlen(skb); + const struct nf_nat_l4proto *l4proto; + struct nf_conntrack_tuple target; + unsigned long statusbit; + + NF_CT_ASSERT(ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY); + + if (!skb_make_writable(skb, hdrlen + sizeof(*inside))) + return 0; + if (nf_ip_checksum(skb, hooknum, hdrlen, 0)) + return 0; + + inside = (void *)skb->data + hdrlen; + if (inside->icmp.type == ICMP_REDIRECT) { + if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK) + return 0; + if (ct->status & IPS_NAT_MASK) + return 0; + } + + if (manip == NF_NAT_MANIP_SRC) + statusbit = IPS_SRC_NAT; + else + statusbit = IPS_DST_NAT; + + /* Invert if this is reply direction */ + if (dir == IP_CT_DIR_REPLY) + statusbit ^= IPS_NAT_MASK; + + if (!(ct->status & statusbit)) + return 1; + + l4proto = __nf_nat_l4proto_find(NFPROTO_IPV4, inside->ip.protocol); + if (!nf_nat_ipv4_manip_pkt(skb, hdrlen + sizeof(inside->icmp), + l4proto, &ct->tuplehash[!dir].tuple, !manip)) + return 0; + + if (skb->ip_summed != CHECKSUM_PARTIAL) { + /* Reloading "inside" here since manip_pkt may reallocate */ + inside = (void *)skb->data + hdrlen; + inside->icmp.checksum = 0; + inside->icmp.checksum = + csum_fold(skb_checksum(skb, hdrlen, + skb->len - hdrlen, 0)); + } + + /* Change outer to look like the reply to an incoming packet */ + nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); + l4proto = __nf_nat_l4proto_find(NFPROTO_IPV4, 0); + if (!nf_nat_ipv4_manip_pkt(skb, 0, l4proto, &target, manip)) + return 0; + + return 1; +} +EXPORT_SYMBOL_GPL(nf_nat_icmp_reply_translation); + +static int __init nf_nat_l3proto_ipv4_init(void) +{ + int err; + + err = nf_nat_l4proto_register(NFPROTO_IPV4, &nf_nat_l4proto_icmp); + if (err < 0) + goto err1; + err = nf_nat_l3proto_register(&nf_nat_l3proto_ipv4); + if (err < 0) + goto err2; + return err; + +err2: + nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_icmp); +err1: + return err; +} + +static void __exit nf_nat_l3proto_ipv4_exit(void) +{ + nf_nat_l3proto_unregister(&nf_nat_l3proto_ipv4); + nf_nat_l4proto_unregister(NFPROTO_IPV4, &nf_nat_l4proto_icmp); +} + +MODULE_LICENSE("GPL"); +MODULE_ALIAS("nf-nat-" __stringify(AF_INET)); + +module_init(nf_nat_l3proto_ipv4_init); +module_exit(nf_nat_l3proto_ipv4_exit); diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c new file mode 100644 index 00000000000..657d2307f03 --- /dev/null +++ b/net/ipv4/netfilter/nf_nat_pptp.c @@ -0,0 +1,311 @@ +/* + * nf_nat_pptp.c + * + * NAT support for PPTP (Point to Point Tunneling Protocol). + * PPTP is a a protocol for creating virtual private networks. + * It is a specification defined by Microsoft and some vendors + * working with Microsoft. PPTP is built on top of a modified + * version of the Internet Generic Routing Encapsulation Protocol. + * GRE is defined in RFC 1701 and RFC 1702. Documentation of + * PPTP can be found in RFC 2637 + * + * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org> + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + * + * (C) 2006-2012 Patrick McHardy <kaber@trash.net> + * + * TODO: - NAT to a unique tuple, not to TCP source port + * (needs netfilter tuple reservation) + */ + +#include <linux/module.h> +#include <linux/tcp.h> + +#include <net/netfilter/nf_nat.h> +#include <net/netfilter/nf_nat_helper.h> +#include <net/netfilter/nf_conntrack_helper.h> +#include <net/netfilter/nf_conntrack_expect.h> +#include <net/netfilter/nf_conntrack_zones.h> +#include <linux/netfilter/nf_conntrack_proto_gre.h> +#include <linux/netfilter/nf_conntrack_pptp.h> + +#define NF_NAT_PPTP_VERSION "3.0" + +#define REQ_CID(req, off) (*(__be16 *)((char *)(req) + (off))) + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>"); +MODULE_DESCRIPTION("Netfilter NAT helper module for PPTP"); +MODULE_ALIAS("ip_nat_pptp"); + +static void pptp_nat_expected(struct nf_conn *ct, + struct nf_conntrack_expect *exp) +{ + struct net *net = nf_ct_net(ct); + const struct nf_conn *master = ct->master; + struct nf_conntrack_expect *other_exp; + struct nf_conntrack_tuple t; + const struct nf_ct_pptp_master *ct_pptp_info; + const struct nf_nat_pptp *nat_pptp_info; + struct nf_nat_range range; + + ct_pptp_info = nfct_help_data(master); + nat_pptp_info = &nfct_nat(master)->help.nat_pptp_info; + + /* And here goes the grand finale of corrosion... */ + if (exp->dir == IP_CT_DIR_ORIGINAL) { + pr_debug("we are PNS->PAC\n"); + /* therefore, build tuple for PAC->PNS */ + t.src.l3num = AF_INET; + t.src.u3.ip = master->tuplehash[!exp->dir].tuple.src.u3.ip; + t.src.u.gre.key = ct_pptp_info->pac_call_id; + t.dst.u3.ip = master->tuplehash[!exp->dir].tuple.dst.u3.ip; + t.dst.u.gre.key = ct_pptp_info->pns_call_id; + t.dst.protonum = IPPROTO_GRE; + } else { + pr_debug("we are PAC->PNS\n"); + /* build tuple for PNS->PAC */ + t.src.l3num = AF_INET; + t.src.u3.ip = master->tuplehash[!exp->dir].tuple.src.u3.ip; + t.src.u.gre.key = nat_pptp_info->pns_call_id; + t.dst.u3.ip = master->tuplehash[!exp->dir].tuple.dst.u3.ip; + t.dst.u.gre.key = nat_pptp_info->pac_call_id; + t.dst.protonum = IPPROTO_GRE; + } + + pr_debug("trying to unexpect other dir: "); + nf_ct_dump_tuple_ip(&t); + other_exp = nf_ct_expect_find_get(net, nf_ct_zone(ct), &t); + if (other_exp) { + nf_ct_unexpect_related(other_exp); + nf_ct_expect_put(other_exp); + pr_debug("success\n"); + } else { + pr_debug("not found!\n"); + } + + /* This must be a fresh one. */ + BUG_ON(ct->status & IPS_NAT_DONE_MASK); + + /* Change src to where master sends to */ + range.flags = NF_NAT_RANGE_MAP_IPS; + range.min_addr = range.max_addr + = ct->master->tuplehash[!exp->dir].tuple.dst.u3; + if (exp->dir == IP_CT_DIR_ORIGINAL) { + range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; + range.min_proto = range.max_proto = exp->saved_proto; + } + nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC); + + /* For DST manip, map port here to where it's expected. */ + range.flags = NF_NAT_RANGE_MAP_IPS; + range.min_addr = range.max_addr + = ct->master->tuplehash[!exp->dir].tuple.src.u3; + if (exp->dir == IP_CT_DIR_REPLY) { + range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; + range.min_proto = range.max_proto = exp->saved_proto; + } + nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST); +} + +/* outbound packets == from PNS to PAC */ +static int +pptp_outbound_pkt(struct sk_buff *skb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int protoff, + struct PptpControlHeader *ctlh, + union pptp_ctrl_union *pptpReq) + +{ + struct nf_ct_pptp_master *ct_pptp_info; + struct nf_nat_pptp *nat_pptp_info; + u_int16_t msg; + __be16 new_callid; + unsigned int cid_off; + + ct_pptp_info = nfct_help_data(ct); + nat_pptp_info = &nfct_nat(ct)->help.nat_pptp_info; + + new_callid = ct_pptp_info->pns_call_id; + + switch (msg = ntohs(ctlh->messageType)) { + case PPTP_OUT_CALL_REQUEST: + cid_off = offsetof(union pptp_ctrl_union, ocreq.callID); + /* FIXME: ideally we would want to reserve a call ID + * here. current netfilter NAT core is not able to do + * this :( For now we use TCP source port. This breaks + * multiple calls within one control session */ + + /* save original call ID in nat_info */ + nat_pptp_info->pns_call_id = ct_pptp_info->pns_call_id; + + /* don't use tcph->source since we are at a DSTmanip + * hook (e.g. PREROUTING) and pkt is not mangled yet */ + new_callid = ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u.tcp.port; + + /* save new call ID in ct info */ + ct_pptp_info->pns_call_id = new_callid; + break; + case PPTP_IN_CALL_REPLY: + cid_off = offsetof(union pptp_ctrl_union, icack.callID); + break; + case PPTP_CALL_CLEAR_REQUEST: + cid_off = offsetof(union pptp_ctrl_union, clrreq.callID); + break; + default: + pr_debug("unknown outbound packet 0x%04x:%s\n", msg, + msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : + pptp_msg_name[0]); + /* fall through */ + case PPTP_SET_LINK_INFO: + /* only need to NAT in case PAC is behind NAT box */ + case PPTP_START_SESSION_REQUEST: + case PPTP_START_SESSION_REPLY: + case PPTP_STOP_SESSION_REQUEST: + case PPTP_STOP_SESSION_REPLY: + case PPTP_ECHO_REQUEST: + case PPTP_ECHO_REPLY: + /* no need to alter packet */ + return NF_ACCEPT; + } + + /* only OUT_CALL_REQUEST, IN_CALL_REPLY, CALL_CLEAR_REQUEST pass + * down to here */ + pr_debug("altering call id from 0x%04x to 0x%04x\n", + ntohs(REQ_CID(pptpReq, cid_off)), ntohs(new_callid)); + + /* mangle packet */ + if (nf_nat_mangle_tcp_packet(skb, ct, ctinfo, protoff, + cid_off + sizeof(struct pptp_pkt_hdr) + + sizeof(struct PptpControlHeader), + sizeof(new_callid), (char *)&new_callid, + sizeof(new_callid)) == 0) + return NF_DROP; + return NF_ACCEPT; +} + +static void +pptp_exp_gre(struct nf_conntrack_expect *expect_orig, + struct nf_conntrack_expect *expect_reply) +{ + const struct nf_conn *ct = expect_orig->master; + struct nf_ct_pptp_master *ct_pptp_info; + struct nf_nat_pptp *nat_pptp_info; + + ct_pptp_info = nfct_help_data(ct); + nat_pptp_info = &nfct_nat(ct)->help.nat_pptp_info; + + /* save original PAC call ID in nat_info */ + nat_pptp_info->pac_call_id = ct_pptp_info->pac_call_id; + + /* alter expectation for PNS->PAC direction */ + expect_orig->saved_proto.gre.key = ct_pptp_info->pns_call_id; + expect_orig->tuple.src.u.gre.key = nat_pptp_info->pns_call_id; + expect_orig->tuple.dst.u.gre.key = ct_pptp_info->pac_call_id; + expect_orig->dir = IP_CT_DIR_ORIGINAL; + + /* alter expectation for PAC->PNS direction */ + expect_reply->saved_proto.gre.key = nat_pptp_info->pns_call_id; + expect_reply->tuple.src.u.gre.key = nat_pptp_info->pac_call_id; + expect_reply->tuple.dst.u.gre.key = ct_pptp_info->pns_call_id; + expect_reply->dir = IP_CT_DIR_REPLY; +} + +/* inbound packets == from PAC to PNS */ +static int +pptp_inbound_pkt(struct sk_buff *skb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int protoff, + struct PptpControlHeader *ctlh, + union pptp_ctrl_union *pptpReq) +{ + const struct nf_nat_pptp *nat_pptp_info; + u_int16_t msg; + __be16 new_pcid; + unsigned int pcid_off; + + nat_pptp_info = &nfct_nat(ct)->help.nat_pptp_info; + new_pcid = nat_pptp_info->pns_call_id; + + switch (msg = ntohs(ctlh->messageType)) { + case PPTP_OUT_CALL_REPLY: + pcid_off = offsetof(union pptp_ctrl_union, ocack.peersCallID); + break; + case PPTP_IN_CALL_CONNECT: + pcid_off = offsetof(union pptp_ctrl_union, iccon.peersCallID); + break; + case PPTP_IN_CALL_REQUEST: + /* only need to nat in case PAC is behind NAT box */ + return NF_ACCEPT; + case PPTP_WAN_ERROR_NOTIFY: + pcid_off = offsetof(union pptp_ctrl_union, wanerr.peersCallID); + break; + case PPTP_CALL_DISCONNECT_NOTIFY: + pcid_off = offsetof(union pptp_ctrl_union, disc.callID); + break; + case PPTP_SET_LINK_INFO: + pcid_off = offsetof(union pptp_ctrl_union, setlink.peersCallID); + break; + default: + pr_debug("unknown inbound packet %s\n", + msg <= PPTP_MSG_MAX ? pptp_msg_name[msg] : + pptp_msg_name[0]); + /* fall through */ + case PPTP_START_SESSION_REQUEST: + case PPTP_START_SESSION_REPLY: + case PPTP_STOP_SESSION_REQUEST: + case PPTP_STOP_SESSION_REPLY: + case PPTP_ECHO_REQUEST: + case PPTP_ECHO_REPLY: + /* no need to alter packet */ + return NF_ACCEPT; + } + + /* only OUT_CALL_REPLY, IN_CALL_CONNECT, IN_CALL_REQUEST, + * WAN_ERROR_NOTIFY, CALL_DISCONNECT_NOTIFY pass down here */ + + /* mangle packet */ + pr_debug("altering peer call id from 0x%04x to 0x%04x\n", + ntohs(REQ_CID(pptpReq, pcid_off)), ntohs(new_pcid)); + + if (nf_nat_mangle_tcp_packet(skb, ct, ctinfo, protoff, + pcid_off + sizeof(struct pptp_pkt_hdr) + + sizeof(struct PptpControlHeader), + sizeof(new_pcid), (char *)&new_pcid, + sizeof(new_pcid)) == 0) + return NF_DROP; + return NF_ACCEPT; +} + +static int __init nf_nat_helper_pptp_init(void) +{ + nf_nat_need_gre(); + + BUG_ON(nf_nat_pptp_hook_outbound != NULL); + RCU_INIT_POINTER(nf_nat_pptp_hook_outbound, pptp_outbound_pkt); + + BUG_ON(nf_nat_pptp_hook_inbound != NULL); + RCU_INIT_POINTER(nf_nat_pptp_hook_inbound, pptp_inbound_pkt); + + BUG_ON(nf_nat_pptp_hook_exp_gre != NULL); + RCU_INIT_POINTER(nf_nat_pptp_hook_exp_gre, pptp_exp_gre); + + BUG_ON(nf_nat_pptp_hook_expectfn != NULL); + RCU_INIT_POINTER(nf_nat_pptp_hook_expectfn, pptp_nat_expected); + return 0; +} + +static void __exit nf_nat_helper_pptp_fini(void) +{ + RCU_INIT_POINTER(nf_nat_pptp_hook_expectfn, NULL); + RCU_INIT_POINTER(nf_nat_pptp_hook_exp_gre, NULL); + RCU_INIT_POINTER(nf_nat_pptp_hook_inbound, NULL); + RCU_INIT_POINTER(nf_nat_pptp_hook_outbound, NULL); + synchronize_rcu(); +} + +module_init(nf_nat_helper_pptp_init); +module_exit(nf_nat_helper_pptp_fini); diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c new file mode 100644 index 00000000000..690d890111b --- /dev/null +++ b/net/ipv4/netfilter/nf_nat_proto_gre.c @@ -0,0 +1,149 @@ +/* + * nf_nat_proto_gre.c + * + * NAT protocol helper module for GRE. + * + * GRE is a generic encapsulation protocol, which is generally not very + * suited for NAT, as it has no protocol-specific part as port numbers. + * + * It has an optional key field, which may help us distinguishing two + * connections between the same two hosts. + * + * GRE is defined in RFC 1701 and RFC 1702, as well as RFC 2784 + * + * PPTP is built on top of a modified version of GRE, and has a mandatory + * field called "CallID", which serves us for the same purpose as the key + * field in plain GRE. + * + * Documentation about PPTP can be found in RFC 2637 + * + * (C) 2000-2005 by Harald Welte <laforge@gnumonks.org> + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + * + * (C) 2006-2012 Patrick McHardy <kaber@trash.net> + * + */ + +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/ip.h> + +#include <net/netfilter/nf_nat.h> +#include <net/netfilter/nf_nat_l4proto.h> +#include <linux/netfilter/nf_conntrack_proto_gre.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Harald Welte <laforge@gnumonks.org>"); +MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE"); + +/* generate unique tuple ... */ +static void +gre_unique_tuple(const struct nf_nat_l3proto *l3proto, + struct nf_conntrack_tuple *tuple, + const struct nf_nat_range *range, + enum nf_nat_manip_type maniptype, + const struct nf_conn *ct) +{ + static u_int16_t key; + __be16 *keyptr; + unsigned int min, i, range_size; + + /* If there is no master conntrack we are not PPTP, + do not change tuples */ + if (!ct->master) + return; + + if (maniptype == NF_NAT_MANIP_SRC) + keyptr = &tuple->src.u.gre.key; + else + keyptr = &tuple->dst.u.gre.key; + + if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) { + pr_debug("%p: NATing GRE PPTP\n", ct); + min = 1; + range_size = 0xffff; + } else { + min = ntohs(range->min_proto.gre.key); + range_size = ntohs(range->max_proto.gre.key) - min + 1; + } + + pr_debug("min = %u, range_size = %u\n", min, range_size); + + for (i = 0; ; ++key) { + *keyptr = htons(min + key % range_size); + if (++i == range_size || !nf_nat_used_tuple(tuple, ct)) + return; + } + + pr_debug("%p: no NAT mapping\n", ct); + return; +} + +/* manipulate a GRE packet according to maniptype */ +static bool +gre_manip_pkt(struct sk_buff *skb, + const struct nf_nat_l3proto *l3proto, + unsigned int iphdroff, unsigned int hdroff, + const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype) +{ + const struct gre_hdr *greh; + struct gre_hdr_pptp *pgreh; + + /* pgreh includes two optional 32bit fields which are not required + * to be there. That's where the magic '8' comes from */ + if (!skb_make_writable(skb, hdroff + sizeof(*pgreh) - 8)) + return false; + + greh = (void *)skb->data + hdroff; + pgreh = (struct gre_hdr_pptp *)greh; + + /* we only have destination manip of a packet, since 'source key' + * is not present in the packet itself */ + if (maniptype != NF_NAT_MANIP_DST) + return true; + switch (greh->version) { + case GRE_VERSION_1701: + /* We do not currently NAT any GREv0 packets. + * Try to behave like "nf_nat_proto_unknown" */ + break; + case GRE_VERSION_PPTP: + pr_debug("call_id -> 0x%04x\n", ntohs(tuple->dst.u.gre.key)); + pgreh->call_id = tuple->dst.u.gre.key; + break; + default: + pr_debug("can't nat unknown GRE version\n"); + return false; + } + return true; +} + +static const struct nf_nat_l4proto gre = { + .l4proto = IPPROTO_GRE, + .manip_pkt = gre_manip_pkt, + .in_range = nf_nat_l4proto_in_range, + .unique_tuple = gre_unique_tuple, +#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) + .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, +#endif +}; + +static int __init nf_nat_proto_gre_init(void) +{ + return nf_nat_l4proto_register(NFPROTO_IPV4, &gre); +} + +static void __exit nf_nat_proto_gre_fini(void) +{ + nf_nat_l4proto_unregister(NFPROTO_IPV4, &gre); +} + +module_init(nf_nat_proto_gre_init); +module_exit(nf_nat_proto_gre_fini); + +void nf_nat_need_gre(void) +{ + return; +} +EXPORT_SYMBOL_GPL(nf_nat_need_gre); diff --git a/net/ipv4/netfilter/nf_nat_proto_icmp.c b/net/ipv4/netfilter/nf_nat_proto_icmp.c new file mode 100644 index 00000000000..eb303471bcf --- /dev/null +++ b/net/ipv4/netfilter/nf_nat_proto_icmp.c @@ -0,0 +1,83 @@ +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/types.h> +#include <linux/init.h> +#include <linux/export.h> +#include <linux/ip.h> +#include <linux/icmp.h> + +#include <linux/netfilter.h> +#include <net/netfilter/nf_nat.h> +#include <net/netfilter/nf_nat_core.h> +#include <net/netfilter/nf_nat_l4proto.h> + +static bool +icmp_in_range(const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype, + const union nf_conntrack_man_proto *min, + const union nf_conntrack_man_proto *max) +{ + return ntohs(tuple->src.u.icmp.id) >= ntohs(min->icmp.id) && + ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id); +} + +static void +icmp_unique_tuple(const struct nf_nat_l3proto *l3proto, + struct nf_conntrack_tuple *tuple, + const struct nf_nat_range *range, + enum nf_nat_manip_type maniptype, + const struct nf_conn *ct) +{ + static u_int16_t id; + unsigned int range_size; + unsigned int i; + + range_size = ntohs(range->max_proto.icmp.id) - + ntohs(range->min_proto.icmp.id) + 1; + /* If no range specified... */ + if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) + range_size = 0xFFFF; + + for (i = 0; ; ++id) { + tuple->src.u.icmp.id = htons(ntohs(range->min_proto.icmp.id) + + (id % range_size)); + if (++i == range_size || !nf_nat_used_tuple(tuple, ct)) + return; + } + return; +} + +static bool +icmp_manip_pkt(struct sk_buff *skb, + const struct nf_nat_l3proto *l3proto, + unsigned int iphdroff, unsigned int hdroff, + const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype) +{ + struct icmphdr *hdr; + + if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) + return false; + + hdr = (struct icmphdr *)(skb->data + hdroff); + inet_proto_csum_replace2(&hdr->checksum, skb, + hdr->un.echo.id, tuple->src.u.icmp.id, 0); + hdr->un.echo.id = tuple->src.u.icmp.id; + return true; +} + +const struct nf_nat_l4proto nf_nat_l4proto_icmp = { + .l4proto = IPPROTO_ICMP, + .manip_pkt = icmp_manip_pkt, + .in_range = icmp_in_range, + .unique_tuple = icmp_unique_tuple, +#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) + .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, +#endif +}; diff --git a/net/ipv4/netfilter/ip_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c index 4f95d477805..7c676671329 100644 --- a/net/ipv4/netfilter/ip_nat_snmp_basic.c +++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c @@ -1,13 +1,13 @@ /* - * ip_nat_snmp_basic.c + * nf_nat_snmp_basic.c * * Basic SNMP Application Layer Gateway * - * This IP NAT module is intended for use with SNMP network - * discovery and monitoring applications where target networks use + * This IP NAT module is intended for use with SNMP network + * discovery and monitoring applications where target networks use * conflicting private address realms. * - * Static NAT is used to remap the networks from the view of the network + * Static NAT is used to remap the networks from the view of the network * management system at the IP layer, and this module remaps some application * layer addresses to match. * @@ -20,7 +20,7 @@ * More information on ALG and associated issues can be found in * RFC 2962 * - * The ASB.1/BER parsing code is derived from the gxsnmp package by Gregory + * The ASB.1/BER parsing code is derived from the gxsnmp package by Gregory * McLean & Jochen Friedrich, stripped down for use in the kernel. * * Copyright (c) 2000 RP Internet (www.rpi.net.au). @@ -34,44 +34,43 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * along with this program; if not, see <http://www.gnu.org/licenses/>. * * Author: James Morris <jmorris@intercode.com.au> * - * Updates: - * 2000-08-06: Convert to new helper API (Harald Welte). - * + * Copyright (c) 2006-2010 Patrick McHardy <kaber@trash.net> */ -#include <linux/config.h> -#include <linux/in.h> #include <linux/module.h> +#include <linux/moduleparam.h> #include <linux/types.h> #include <linux/kernel.h> -#include <linux/moduleparam.h> -#include <linux/netfilter_ipv4.h> -#include <linux/netfilter_ipv4/ip_nat.h> -#include <linux/netfilter_ipv4/ip_conntrack_helper.h> -#include <linux/netfilter_ipv4/ip_nat_helper.h> +#include <linux/slab.h> +#include <linux/in.h> #include <linux/ip.h> #include <linux/udp.h> #include <net/checksum.h> #include <net/udp.h> -#include <asm/uaccess.h> + +#include <net/netfilter/nf_nat.h> +#include <net/netfilter/nf_conntrack_expect.h> +#include <net/netfilter/nf_conntrack_helper.h> +#include <net/netfilter/nf_nat_helper.h> +#include <linux/netfilter/nf_conntrack_snmp.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>"); MODULE_DESCRIPTION("Basic SNMP Application Layer Gateway"); +MODULE_ALIAS("ip_nat_snmp_basic"); #define SNMP_PORT 161 #define SNMP_TRAP_PORT 162 -#define NOCT1(n) (u_int8_t )((n) & 0xff) +#define NOCT1(n) (*(u8 *)(n)) static int debug; static DEFINE_SPINLOCK(snmp_lock); -/* - * Application layer address mapping mimics the NAT mapping, but +/* + * Application layer address mapping mimics the NAT mapping, but * only for the first octet in this case (a more flexible system * can be implemented if needed). */ @@ -81,7 +80,7 @@ struct oct1_map u_int8_t to; }; - + /***************************************************************************** * * Basic ASN.1 decoding routines (gxsnmp author Dirk Wisse) @@ -130,7 +129,7 @@ struct oct1_map #define ASN1_ERR_DEC_LENGTH_MISMATCH 4 #define ASN1_ERR_DEC_BADVALUE 5 -/* +/* * ASN.1 context. */ struct asn1_ctx @@ -149,10 +148,10 @@ struct asn1_octstr unsigned char *data; unsigned int len; }; - + static void asn1_open(struct asn1_ctx *ctx, - unsigned char *buf, - unsigned int len) + unsigned char *buf, + unsigned int len) { ctx->begin = buf; ctx->end = buf + len; @@ -173,9 +172,9 @@ static unsigned char asn1_octet_decode(struct asn1_ctx *ctx, unsigned char *ch) static unsigned char asn1_tag_decode(struct asn1_ctx *ctx, unsigned int *tag) { unsigned char ch; - + *tag = 0; - + do { if (!asn1_octet_decode(ctx, &ch)) @@ -186,20 +185,20 @@ static unsigned char asn1_tag_decode(struct asn1_ctx *ctx, unsigned int *tag) return 1; } -static unsigned char asn1_id_decode(struct asn1_ctx *ctx, - unsigned int *cls, - unsigned int *con, - unsigned int *tag) +static unsigned char asn1_id_decode(struct asn1_ctx *ctx, + unsigned int *cls, + unsigned int *con, + unsigned int *tag) { unsigned char ch; - + if (!asn1_octet_decode(ctx, &ch)) return 0; - + *cls = (ch & 0xC0) >> 6; *con = (ch & 0x20) >> 5; *tag = (ch & 0x1F); - + if (*tag == 0x1F) { if (!asn1_tag_decode(ctx, tag)) return 0; @@ -208,25 +207,25 @@ static unsigned char asn1_id_decode(struct asn1_ctx *ctx, } static unsigned char asn1_length_decode(struct asn1_ctx *ctx, - unsigned int *def, - unsigned int *len) + unsigned int *def, + unsigned int *len) { unsigned char ch, cnt; - + if (!asn1_octet_decode(ctx, &ch)) return 0; - + if (ch == 0x80) *def = 0; else { *def = 1; - + if (ch < 0x80) *len = ch; else { - cnt = (unsigned char) (ch & 0x7F); + cnt = ch & 0x7F; *len = 0; - + while (cnt > 0) { if (!asn1_octet_decode(ctx, &ch)) return 0; @@ -236,23 +235,33 @@ static unsigned char asn1_length_decode(struct asn1_ctx *ctx, } } } + + /* don't trust len bigger than ctx buffer */ + if (*len > ctx->end - ctx->pointer) + return 0; + return 1; } static unsigned char asn1_header_decode(struct asn1_ctx *ctx, - unsigned char **eoc, - unsigned int *cls, - unsigned int *con, - unsigned int *tag) + unsigned char **eoc, + unsigned int *cls, + unsigned int *con, + unsigned int *tag) { unsigned int def, len; - + if (!asn1_id_decode(ctx, cls, con, tag)) return 0; - + + def = len = 0; if (!asn1_length_decode(ctx, &def, &len)) return 0; - + + /* primitive shall be definite, indefinite shall be constructed */ + if (*con == ASN1_PRI && !def) + return 0; + if (def) *eoc = ctx->pointer + len; else @@ -263,19 +272,19 @@ static unsigned char asn1_header_decode(struct asn1_ctx *ctx, static unsigned char asn1_eoc_decode(struct asn1_ctx *ctx, unsigned char *eoc) { unsigned char ch; - - if (eoc == 0) { + + if (eoc == NULL) { if (!asn1_octet_decode(ctx, &ch)) return 0; - + if (ch != 0x00) { ctx->error = ASN1_ERR_DEC_EOC_MISMATCH; return 0; } - + if (!asn1_octet_decode(ctx, &ch)) return 0; - + if (ch != 0x00) { ctx->error = ASN1_ERR_DEC_EOC_MISMATCH; return 0; @@ -297,27 +306,27 @@ static unsigned char asn1_null_decode(struct asn1_ctx *ctx, unsigned char *eoc) } static unsigned char asn1_long_decode(struct asn1_ctx *ctx, - unsigned char *eoc, - long *integer) + unsigned char *eoc, + long *integer) { unsigned char ch; unsigned int len; - + if (!asn1_octet_decode(ctx, &ch)) return 0; - + *integer = (signed char) ch; len = 1; - + while (ctx->pointer < eoc) { if (++len > sizeof (long)) { ctx->error = ASN1_ERR_DEC_BADVALUE; return 0; } - + if (!asn1_octet_decode(ctx, &ch)) return 0; - + *integer <<= 8; *integer |= ch; } @@ -325,28 +334,28 @@ static unsigned char asn1_long_decode(struct asn1_ctx *ctx, } static unsigned char asn1_uint_decode(struct asn1_ctx *ctx, - unsigned char *eoc, - unsigned int *integer) + unsigned char *eoc, + unsigned int *integer) { unsigned char ch; unsigned int len; - + if (!asn1_octet_decode(ctx, &ch)) return 0; - + *integer = ch; if (ch == 0) len = 0; else len = 1; - + while (ctx->pointer < eoc) { if (++len > sizeof (unsigned int)) { ctx->error = ASN1_ERR_DEC_BADVALUE; return 0; } - + if (!asn1_octet_decode(ctx, &ch)) return 0; - + *integer <<= 8; *integer |= ch; } @@ -354,28 +363,28 @@ static unsigned char asn1_uint_decode(struct asn1_ctx *ctx, } static unsigned char asn1_ulong_decode(struct asn1_ctx *ctx, - unsigned char *eoc, - unsigned long *integer) + unsigned char *eoc, + unsigned long *integer) { unsigned char ch; unsigned int len; - + if (!asn1_octet_decode(ctx, &ch)) return 0; - + *integer = ch; if (ch == 0) len = 0; else len = 1; - + while (ctx->pointer < eoc) { if (++len > sizeof (unsigned long)) { ctx->error = ASN1_ERR_DEC_BADVALUE; return 0; } - + if (!asn1_octet_decode(ctx, &ch)) return 0; - + *integer <<= 8; *integer |= ch; } @@ -383,24 +392,21 @@ static unsigned char asn1_ulong_decode(struct asn1_ctx *ctx, } static unsigned char asn1_octets_decode(struct asn1_ctx *ctx, - unsigned char *eoc, - unsigned char **octets, - unsigned int *len) + unsigned char *eoc, + unsigned char **octets, + unsigned int *len) { unsigned char *ptr; - + *len = 0; - + *octets = kmalloc(eoc - ctx->pointer, GFP_ATOMIC); - if (*octets == NULL) { - if (net_ratelimit()) - printk("OOM in bsalg (%d)\n", __LINE__); + if (*octets == NULL) return 0; - } - + ptr = *octets; while (ctx->pointer < eoc) { - if (!asn1_octet_decode(ctx, (unsigned char *)ptr++)) { + if (!asn1_octet_decode(ctx, ptr++)) { kfree(*octets); *octets = NULL; return 0; @@ -411,16 +417,16 @@ static unsigned char asn1_octets_decode(struct asn1_ctx *ctx, } static unsigned char asn1_subid_decode(struct asn1_ctx *ctx, - unsigned long *subid) + unsigned long *subid) { unsigned char ch; - + *subid = 0; - + do { if (!asn1_octet_decode(ctx, &ch)) return 0; - + *subid <<= 7; *subid |= ch & 0x7F; } while ((ch & 0x80) == 0x80); @@ -428,44 +434,46 @@ static unsigned char asn1_subid_decode(struct asn1_ctx *ctx, } static unsigned char asn1_oid_decode(struct asn1_ctx *ctx, - unsigned char *eoc, - unsigned long **oid, - unsigned int *len) + unsigned char *eoc, + unsigned long **oid, + unsigned int *len) { unsigned long subid; - unsigned int size; unsigned long *optr; - + size_t size; + size = eoc - ctx->pointer + 1; + + /* first subid actually encodes first two subids */ + if (size < 2 || size > ULONG_MAX/sizeof(unsigned long)) + return 0; + *oid = kmalloc(size * sizeof(unsigned long), GFP_ATOMIC); - if (*oid == NULL) { - if (net_ratelimit()) - printk("OOM in bsalg (%d)\n", __LINE__); + if (*oid == NULL) return 0; - } - + optr = *oid; - + if (!asn1_subid_decode(ctx, &subid)) { kfree(*oid); *oid = NULL; return 0; } - + if (subid < 40) { - optr [0] = 0; - optr [1] = subid; + optr[0] = 0; + optr[1] = subid; } else if (subid < 80) { - optr [0] = 1; - optr [1] = subid - 40; + optr[0] = 1; + optr[1] = subid - 40; } else { - optr [0] = 2; - optr [1] = subid - 80; + optr[0] = 2; + optr[1] = subid - 80; } - + *len = 2; optr += 2; - + while (ctx->pointer < eoc) { if (++(*len) > size) { ctx->error = ASN1_ERR_DEC_BADVALUE; @@ -473,7 +481,7 @@ static unsigned char asn1_oid_decode(struct asn1_ctx *ctx, *oid = NULL; return 0; } - + if (!asn1_subid_decode(ctx, optr++)) { kfree(*oid); *oid = NULL; @@ -611,9 +619,9 @@ struct snmp_v1_trap #define SERR_EOM 2 static inline void mangle_address(unsigned char *begin, - unsigned char *addr, - const struct oct1_map *map, - u_int16_t *check); + unsigned char *addr, + const struct oct1_map *map, + __sum16 *check); struct snmp_cnv { unsigned int class; @@ -621,8 +629,7 @@ struct snmp_cnv int syntax; }; -static struct snmp_cnv snmp_conv [] = -{ +static const struct snmp_cnv snmp_conv[] = { {ASN1_UNI, ASN1_NUL, SNMP_NULL}, {ASN1_UNI, ASN1_INT, SNMP_INTEGER}, {ASN1_UNI, ASN1_OTS, SNMP_OCTETSTR}, @@ -633,7 +640,7 @@ static struct snmp_cnv snmp_conv [] = {ASN1_APL, SNMP_GGE, SNMP_GAUGE}, /* Gauge32 == Unsigned32 */ {ASN1_APL, SNMP_TIT, SNMP_TIMETICKS}, {ASN1_APL, SNMP_OPQ, SNMP_OPAQUE}, - + /* SNMPv2 data types and errors */ {ASN1_UNI, ASN1_BTS, SNMP_BITSTR}, {ASN1_APL, SNMP_C64, SNMP_COUNTER64}, @@ -644,13 +651,13 @@ static struct snmp_cnv snmp_conv [] = }; static unsigned char snmp_tag_cls2syntax(unsigned int tag, - unsigned int cls, - unsigned short *syntax) + unsigned int cls, + unsigned short *syntax) { - struct snmp_cnv *cnv; - + const struct snmp_cnv *cnv; + cnv = snmp_conv; - + while (cnv->syntax != -1) { if (cnv->tag == tag && cnv->class == cls) { *syntax = cnv->syntax; @@ -662,165 +669,155 @@ static unsigned char snmp_tag_cls2syntax(unsigned int tag, } static unsigned char snmp_object_decode(struct asn1_ctx *ctx, - struct snmp_object **obj) + struct snmp_object **obj) { unsigned int cls, con, tag, len, idlen; unsigned short type; unsigned char *eoc, *end, *p; unsigned long *lp, *id; unsigned long ul; - long l; - + long l; + *obj = NULL; id = NULL; - + if (!asn1_header_decode(ctx, &eoc, &cls, &con, &tag)) return 0; - + if (cls != ASN1_UNI || con != ASN1_CON || tag != ASN1_SEQ) return 0; - + if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) return 0; - + if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_OJI) return 0; - + if (!asn1_oid_decode(ctx, end, &id, &idlen)) return 0; - + if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) { kfree(id); return 0; } - + if (con != ASN1_PRI) { kfree(id); return 0; } - + + type = 0; if (!snmp_tag_cls2syntax(tag, cls, &type)) { kfree(id); return 0; } - + + l = 0; switch (type) { - case SNMP_INTEGER: - len = sizeof(long); - if (!asn1_long_decode(ctx, end, &l)) { - kfree(id); - return 0; - } - *obj = kmalloc(sizeof(struct snmp_object) + len, - GFP_ATOMIC); - if (*obj == NULL) { - kfree(id); - if (net_ratelimit()) - printk("OOM in bsalg (%d)\n", __LINE__); - return 0; - } - (*obj)->syntax.l[0] = l; - break; - case SNMP_OCTETSTR: - case SNMP_OPAQUE: - if (!asn1_octets_decode(ctx, end, &p, &len)) { - kfree(id); - return 0; - } - *obj = kmalloc(sizeof(struct snmp_object) + len, - GFP_ATOMIC); - if (*obj == NULL) { - kfree(id); - if (net_ratelimit()) - printk("OOM in bsalg (%d)\n", __LINE__); - return 0; - } - memcpy((*obj)->syntax.c, p, len); + case SNMP_INTEGER: + len = sizeof(long); + if (!asn1_long_decode(ctx, end, &l)) { + kfree(id); + return 0; + } + *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC); + if (*obj == NULL) { + kfree(id); + return 0; + } + (*obj)->syntax.l[0] = l; + break; + case SNMP_OCTETSTR: + case SNMP_OPAQUE: + if (!asn1_octets_decode(ctx, end, &p, &len)) { + kfree(id); + return 0; + } + *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC); + if (*obj == NULL) { kfree(p); - break; - case SNMP_NULL: - case SNMP_NOSUCHOBJECT: - case SNMP_NOSUCHINSTANCE: - case SNMP_ENDOFMIBVIEW: - len = 0; - *obj = kmalloc(sizeof(struct snmp_object), GFP_ATOMIC); - if (*obj == NULL) { - kfree(id); - if (net_ratelimit()) - printk("OOM in bsalg (%d)\n", __LINE__); - return 0; - } - if (!asn1_null_decode(ctx, end)) { - kfree(id); - kfree(*obj); - *obj = NULL; - return 0; - } - break; - case SNMP_OBJECTID: - if (!asn1_oid_decode(ctx, end, (unsigned long **)&lp, &len)) { - kfree(id); - return 0; - } - len *= sizeof(unsigned long); - *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC); - if (*obj == NULL) { - kfree(id); - if (net_ratelimit()) - printk("OOM in bsalg (%d)\n", __LINE__); - return 0; - } - memcpy((*obj)->syntax.ul, lp, len); + kfree(id); + return 0; + } + memcpy((*obj)->syntax.c, p, len); + kfree(p); + break; + case SNMP_NULL: + case SNMP_NOSUCHOBJECT: + case SNMP_NOSUCHINSTANCE: + case SNMP_ENDOFMIBVIEW: + len = 0; + *obj = kmalloc(sizeof(struct snmp_object), GFP_ATOMIC); + if (*obj == NULL) { + kfree(id); + return 0; + } + if (!asn1_null_decode(ctx, end)) { + kfree(id); + kfree(*obj); + *obj = NULL; + return 0; + } + break; + case SNMP_OBJECTID: + if (!asn1_oid_decode(ctx, end, &lp, &len)) { + kfree(id); + return 0; + } + len *= sizeof(unsigned long); + *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC); + if (*obj == NULL) { kfree(lp); - break; - case SNMP_IPADDR: - if (!asn1_octets_decode(ctx, end, &p, &len)) { - kfree(id); - return 0; - } - if (len != 4) { - kfree(p); - kfree(id); - return 0; - } - *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC); - if (*obj == NULL) { - kfree(p); - kfree(id); - if (net_ratelimit()) - printk("OOM in bsalg (%d)\n", __LINE__); - return 0; - } - memcpy((*obj)->syntax.uc, p, len); + kfree(id); + return 0; + } + memcpy((*obj)->syntax.ul, lp, len); + kfree(lp); + break; + case SNMP_IPADDR: + if (!asn1_octets_decode(ctx, end, &p, &len)) { + kfree(id); + return 0; + } + if (len != 4) { kfree(p); - break; - case SNMP_COUNTER: - case SNMP_GAUGE: - case SNMP_TIMETICKS: - len = sizeof(unsigned long); - if (!asn1_ulong_decode(ctx, end, &ul)) { - kfree(id); - return 0; - } - *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC); - if (*obj == NULL) { - kfree(id); - if (net_ratelimit()) - printk("OOM in bsalg (%d)\n", __LINE__); - return 0; - } - (*obj)->syntax.ul[0] = ul; - break; - default: kfree(id); return 0; + } + *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC); + if (*obj == NULL) { + kfree(p); + kfree(id); + return 0; + } + memcpy((*obj)->syntax.uc, p, len); + kfree(p); + break; + case SNMP_COUNTER: + case SNMP_GAUGE: + case SNMP_TIMETICKS: + len = sizeof(unsigned long); + if (!asn1_ulong_decode(ctx, end, &ul)) { + kfree(id); + return 0; + } + *obj = kmalloc(sizeof(struct snmp_object) + len, GFP_ATOMIC); + if (*obj == NULL) { + kfree(id); + return 0; + } + (*obj)->syntax.ul[0] = ul; + break; + default: + kfree(id); + return 0; } - + (*obj)->syntax_len = len; (*obj)->type = type; (*obj)->id = id; (*obj)->id_len = idlen; - + if (!asn1_eoc_decode(ctx, eoc)) { kfree(id); kfree(*obj); @@ -831,181 +828,167 @@ static unsigned char snmp_object_decode(struct asn1_ctx *ctx, } static unsigned char snmp_request_decode(struct asn1_ctx *ctx, - struct snmp_request *request) + struct snmp_request *request) { unsigned int cls, con, tag; unsigned char *end; - + if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) return 0; - + if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT) return 0; - + if (!asn1_ulong_decode(ctx, end, &request->id)) return 0; - + if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) return 0; - + if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT) return 0; - + if (!asn1_uint_decode(ctx, end, &request->error_status)) return 0; - + if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) return 0; - + if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT) return 0; - + if (!asn1_uint_decode(ctx, end, &request->error_index)) return 0; - + return 1; } -/* +/* * Fast checksum update for possibly oddly-aligned UDP byte, from the * code example in the draft. */ -static void fast_csum(unsigned char *csum, - const unsigned char *optr, - const unsigned char *nptr, - int odd) +static void fast_csum(__sum16 *csum, + const unsigned char *optr, + const unsigned char *nptr, + int offset) { - long x, old, new; - - x = csum[0] * 256 + csum[1]; - - x =~ x & 0xFFFF; - - if (odd) old = optr[0] * 256; - else old = optr[0]; - - x -= old & 0xFFFF; - if (x <= 0) { - x--; - x &= 0xFFFF; - } - - if (odd) new = nptr[0] * 256; - else new = nptr[0]; - - x += new & 0xFFFF; - if (x & 0x10000) { - x++; - x &= 0xFFFF; + unsigned char s[4]; + + if (offset & 1) { + s[0] = ~0; + s[1] = ~*optr; + s[2] = 0; + s[3] = *nptr; + } else { + s[0] = ~*optr; + s[1] = ~0; + s[2] = *nptr; + s[3] = 0; } - - x =~ x & 0xFFFF; - csum[0] = x / 256; - csum[1] = x & 0xFF; + + *csum = csum_fold(csum_partial(s, 4, ~csum_unfold(*csum))); } -/* +/* * Mangle IP address. * - begin points to the start of the snmp messgae * - addr points to the start of the address */ static inline void mangle_address(unsigned char *begin, - unsigned char *addr, - const struct oct1_map *map, - u_int16_t *check) + unsigned char *addr, + const struct oct1_map *map, + __sum16 *check) { - if (map->from == NOCT1(*addr)) { + if (map->from == NOCT1(addr)) { u_int32_t old; - + if (debug) - memcpy(&old, (unsigned char *)addr, sizeof(old)); - + memcpy(&old, addr, sizeof(old)); + *addr = map->to; - + /* Update UDP checksum if being used */ if (*check) { - unsigned char odd = !((addr - begin) % 2); - - fast_csum((unsigned char *)check, - &map->from, &map->to, odd); - + fast_csum(check, + &map->from, &map->to, addr - begin); + } - + if (debug) - printk(KERN_DEBUG "bsalg: mapped %u.%u.%u.%u to " - "%u.%u.%u.%u\n", NIPQUAD(old), NIPQUAD(*addr)); + printk(KERN_DEBUG "bsalg: mapped %pI4 to %pI4\n", + &old, addr); } } static unsigned char snmp_trap_decode(struct asn1_ctx *ctx, - struct snmp_v1_trap *trap, - const struct oct1_map *map, - u_int16_t *check) + struct snmp_v1_trap *trap, + const struct oct1_map *map, + __sum16 *check) { unsigned int cls, con, tag, len; unsigned char *end; if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) return 0; - + if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_OJI) return 0; - + if (!asn1_oid_decode(ctx, end, &trap->id, &trap->id_len)) return 0; - + if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) goto err_id_free; if (!((cls == ASN1_APL && con == ASN1_PRI && tag == SNMP_IPA) || (cls == ASN1_UNI && con == ASN1_PRI && tag == ASN1_OTS))) goto err_id_free; - + if (!asn1_octets_decode(ctx, end, (unsigned char **)&trap->ip_address, &len)) goto err_id_free; - + /* IPv4 only */ if (len != 4) goto err_addr_free; - + mangle_address(ctx->begin, ctx->pointer - 4, map, check); - + if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) goto err_addr_free; - + if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT) goto err_addr_free; - + if (!asn1_uint_decode(ctx, end, &trap->general)) goto err_addr_free; - + if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) goto err_addr_free; - + if (cls != ASN1_UNI || con != ASN1_PRI || tag != ASN1_INT) goto err_addr_free; - + if (!asn1_uint_decode(ctx, end, &trap->specific)) goto err_addr_free; - + if (!asn1_header_decode(ctx, &end, &cls, &con, &tag)) goto err_addr_free; - + if (!((cls == ASN1_APL && con == ASN1_PRI && tag == SNMP_TIT) || (cls == ASN1_UNI && con == ASN1_PRI && tag == ASN1_INT))) goto err_addr_free; - + if (!asn1_ulong_decode(ctx, end, &trap->time)) goto err_addr_free; - + return 1; +err_addr_free: + kfree((unsigned long *)trap->ip_address); + err_id_free: kfree(trap->id); -err_addr_free: - kfree((unsigned long *)trap->ip_address); - return 0; } @@ -1015,10 +998,10 @@ err_addr_free: * *****************************************************************************/ -static void hex_dump(unsigned char *buf, size_t len) +static void hex_dump(const unsigned char *buf, size_t len) { size_t i; - + for (i = 0; i < len; i++) { if (i && !(i % 16)) printk("\n"); @@ -1032,30 +1015,30 @@ static void hex_dump(unsigned char *buf, size_t len) * (And this is the fucking 'basic' method). */ static int snmp_parse_mangle(unsigned char *msg, - u_int16_t len, - const struct oct1_map *map, - u_int16_t *check) + u_int16_t len, + const struct oct1_map *map, + __sum16 *check) { unsigned char *eoc, *end; unsigned int cls, con, tag, vers, pdutype; struct asn1_ctx ctx; struct asn1_octstr comm; - struct snmp_object **obj; - + struct snmp_object *obj; + if (debug > 1) hex_dump(msg, len); asn1_open(&ctx, msg, len); - - /* + + /* * Start of SNMP message. */ if (!asn1_header_decode(&ctx, &eoc, &cls, &con, &tag)) return 0; if (cls != ASN1_UNI || con != ASN1_CON || tag != ASN1_SEQ) return 0; - - /* + + /* * Version 1 or 2 handled. */ if (!asn1_header_decode(&ctx, &end, &cls, &con, &tag)) @@ -1068,7 +1051,7 @@ static int snmp_parse_mangle(unsigned char *msg, printk(KERN_DEBUG "bsalg: snmp version: %u\n", vers + 1); if (vers > 1) return 1; - + /* * Community. */ @@ -1080,14 +1063,14 @@ static int snmp_parse_mangle(unsigned char *msg, return 0; if (debug > 1) { unsigned int i; - + printk(KERN_DEBUG "bsalg: community: "); for (i = 0; i < comm.len; i++) printk("%c", comm.data[i]); printk("\n"); } kfree(comm.data); - + /* * PDU type */ @@ -1096,7 +1079,7 @@ static int snmp_parse_mangle(unsigned char *msg, if (cls != ASN1_CTX || con != ASN1_CON) return 0; if (debug > 1) { - unsigned char *pdus[] = { + static const unsigned char *const pdus[] = { [SNMP_PDU_GET] = "get", [SNMP_PDU_NEXT] = "get-next", [SNMP_PDU_RESPONSE] = "response", @@ -1106,7 +1089,7 @@ static int snmp_parse_mangle(unsigned char *msg, [SNMP_PDU_INFORM] = "inform", [SNMP_PDU_TRAP2] = "trapv2" }; - + if (pdutype > SNMP_PDU_TRAP2) printk(KERN_DEBUG "bsalg: bad pdu type %u\n", pdutype); else @@ -1115,83 +1098,73 @@ static int snmp_parse_mangle(unsigned char *msg, if (pdutype != SNMP_PDU_RESPONSE && pdutype != SNMP_PDU_TRAP1 && pdutype != SNMP_PDU_TRAP2) return 1; - + /* * Request header or v1 trap */ if (pdutype == SNMP_PDU_TRAP1) { struct snmp_v1_trap trap; unsigned char ret = snmp_trap_decode(&ctx, &trap, map, check); - - /* Discard trap allocations regardless */ - kfree(trap.id); - kfree((unsigned long *)trap.ip_address); - - if (!ret) + + if (ret) { + kfree(trap.id); + kfree((unsigned long *)trap.ip_address); + } else return ret; - + } else { struct snmp_request req; - + if (!snmp_request_decode(&ctx, &req)) return 0; - + if (debug > 1) printk(KERN_DEBUG "bsalg: request: id=0x%lx error_status=%u " "error_index=%u\n", req.id, req.error_status, req.error_index); } - + /* * Loop through objects, look for IP addresses to mangle. */ if (!asn1_header_decode(&ctx, &eoc, &cls, &con, &tag)) return 0; - + if (cls != ASN1_UNI || con != ASN1_CON || tag != ASN1_SEQ) return 0; - - obj = kmalloc(sizeof(struct snmp_object), GFP_ATOMIC); - if (obj == NULL) { - if (net_ratelimit()) - printk(KERN_WARNING "OOM in bsalg(%d)\n", __LINE__); - return 0; - } while (!asn1_eoc_decode(&ctx, eoc)) { unsigned int i; - - if (!snmp_object_decode(&ctx, obj)) { - if (*obj) { - kfree((*obj)->id); - kfree(*obj); - } - kfree(obj); + + if (!snmp_object_decode(&ctx, &obj)) { + if (obj) { + kfree(obj->id); + kfree(obj); + } return 0; } if (debug > 1) { printk(KERN_DEBUG "bsalg: object: "); - for (i = 0; i < (*obj)->id_len; i++) { + for (i = 0; i < obj->id_len; i++) { if (i > 0) printk("."); - printk("%lu", (*obj)->id[i]); + printk("%lu", obj->id[i]); } - printk(": type=%u\n", (*obj)->type); - + printk(": type=%u\n", obj->type); + } - if ((*obj)->type == SNMP_IPADDR) + if (obj->type == SNMP_IPADDR) mangle_address(ctx.begin, ctx.pointer - 4 , map, check); - - kfree((*obj)->id); - kfree(*obj); + + kfree(obj->id); + kfree(obj); } - kfree(obj); - + if (!asn1_eoc_decode(&ctx, eoc)) return 0; - + return 1; } @@ -1201,15 +1174,15 @@ static int snmp_parse_mangle(unsigned char *msg, * *****************************************************************************/ -/* +/* * SNMP translation routine. */ -static int snmp_translate(struct ip_conntrack *ct, - enum ip_conntrack_info ctinfo, - struct sk_buff **pskb) +static int snmp_translate(struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + struct sk_buff *skb) { - struct iphdr *iph = (*pskb)->nh.iph; - struct udphdr *udph = (struct udphdr *)((u_int32_t *)iph + iph->ihl); + struct iphdr *iph = ip_hdr(skb); + struct udphdr *udph = (struct udphdr *)((__be32 *)iph + iph->ihl); u_int16_t udplen = ntohs(udph->len); u_int16_t paylen = udplen - sizeof(struct udphdr); int dir = CTINFO2DIR(ctinfo); @@ -1221,21 +1194,20 @@ static int snmp_translate(struct ip_conntrack *ct, */ if (dir == IP_CT_DIR_ORIGINAL) { /* SNAT traps */ - map.from = NOCT1(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.ip); - map.to = NOCT1(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.ip); + map.from = NOCT1(&ct->tuplehash[dir].tuple.src.u3.ip); + map.to = NOCT1(&ct->tuplehash[!dir].tuple.dst.u3.ip); } else { /* DNAT replies */ - map.from = NOCT1(ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.ip); - map.to = NOCT1(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.dst.ip); + map.from = NOCT1(&ct->tuplehash[!dir].tuple.src.u3.ip); + map.to = NOCT1(&ct->tuplehash[dir].tuple.dst.u3.ip); } - + if (map.from == map.to) return NF_ACCEPT; - + if (!snmp_parse_mangle((unsigned char *)udph + sizeof(struct udphdr), - paylen, &map, &udph->check)) { - if (net_ratelimit()) - printk(KERN_WARNING "bsalg: parser failed\n"); + paylen, &map, &udph->check)) { + net_warn_ratelimited("bsalg: parser failed\n"); return NF_DROP; } return NF_ACCEPT; @@ -1243,76 +1215,69 @@ static int snmp_translate(struct ip_conntrack *ct, /* We don't actually set up expectations, just adjust internal IP * addresses if this is being NATted */ -static int help(struct sk_buff **pskb, - struct ip_conntrack *ct, +static int help(struct sk_buff *skb, unsigned int protoff, + struct nf_conn *ct, enum ip_conntrack_info ctinfo) { int dir = CTINFO2DIR(ctinfo); unsigned int ret; - struct iphdr *iph = (*pskb)->nh.iph; - struct udphdr *udph = (struct udphdr *)((u_int32_t *)iph + iph->ihl); + const struct iphdr *iph = ip_hdr(skb); + const struct udphdr *udph = (struct udphdr *)((__be32 *)iph + iph->ihl); /* SNMP replies and originating SNMP traps get mangled */ - if (udph->source == ntohs(SNMP_PORT) && dir != IP_CT_DIR_REPLY) + if (udph->source == htons(SNMP_PORT) && dir != IP_CT_DIR_REPLY) return NF_ACCEPT; - if (udph->dest == ntohs(SNMP_TRAP_PORT) && dir != IP_CT_DIR_ORIGINAL) + if (udph->dest == htons(SNMP_TRAP_PORT) && dir != IP_CT_DIR_ORIGINAL) return NF_ACCEPT; /* No NAT? */ if (!(ct->status & IPS_NAT_MASK)) return NF_ACCEPT; - /* + /* * Make sure the packet length is ok. So far, we were only guaranteed * to have a valid length IP header plus 8 bytes, which means we have * enough room for a UDP header. Just verify the UDP length field so we * can mess around with the payload. */ - if (ntohs(udph->len) != (*pskb)->len - (iph->ihl << 2)) { - if (net_ratelimit()) - printk(KERN_WARNING "SNMP: dropping malformed packet " - "src=%u.%u.%u.%u dst=%u.%u.%u.%u\n", - NIPQUAD(iph->saddr), NIPQUAD(iph->daddr)); + if (ntohs(udph->len) != skb->len - (iph->ihl << 2)) { + net_warn_ratelimited("SNMP: dropping malformed packet src=%pI4 dst=%pI4\n", + &iph->saddr, &iph->daddr); return NF_DROP; } - if (!skb_make_writable(pskb, (*pskb)->len)) + if (!skb_make_writable(skb, skb->len)) return NF_DROP; spin_lock_bh(&snmp_lock); - ret = snmp_translate(ct, ctinfo, pskb); + ret = snmp_translate(ct, ctinfo, skb); spin_unlock_bh(&snmp_lock); return ret; } -static struct ip_conntrack_helper snmp_helper = { - .max_expected = 0, - .timeout = 180, - .me = THIS_MODULE, - .help = help, - .name = "snmp", - - .tuple = { .src = { .u = { __constant_htons(SNMP_PORT) } }, - .dst = { .protonum = IPPROTO_UDP }, - }, - .mask = { .src = { .u = { 0xFFFF } }, - .dst = { .protonum = 0xFF }, - }, +static const struct nf_conntrack_expect_policy snmp_exp_policy = { + .max_expected = 0, + .timeout = 180, +}; + +static struct nf_conntrack_helper snmp_helper __read_mostly = { + .me = THIS_MODULE, + .help = help, + .expect_policy = &snmp_exp_policy, + .name = "snmp", + .tuple.src.l3num = AF_INET, + .tuple.src.u.udp.port = cpu_to_be16(SNMP_PORT), + .tuple.dst.protonum = IPPROTO_UDP, }; -static struct ip_conntrack_helper snmp_trap_helper = { - .max_expected = 0, - .timeout = 180, - .me = THIS_MODULE, - .help = help, - .name = "snmp_trap", - - .tuple = { .src = { .u = { __constant_htons(SNMP_TRAP_PORT) } }, - .dst = { .protonum = IPPROTO_UDP }, - }, - .mask = { .src = { .u = { 0xFFFF } }, - .dst = { .protonum = 0xFF }, - }, +static struct nf_conntrack_helper snmp_trap_helper __read_mostly = { + .me = THIS_MODULE, + .help = help, + .expect_policy = &snmp_exp_policy, + .name = "snmp_trap", + .tuple.src.l3num = AF_INET, + .tuple.src.u.udp.port = cpu_to_be16(SNMP_TRAP_PORT), + .tuple.dst.protonum = IPPROTO_UDP, }; /***************************************************************************** @@ -1320,29 +1285,29 @@ static struct ip_conntrack_helper snmp_trap_helper = { * Module stuff. * *****************************************************************************/ - -static int __init init(void) + +static int __init nf_nat_snmp_basic_init(void) { int ret = 0; - ret = ip_conntrack_helper_register(&snmp_helper); - if (ret < 0) - return ret; - ret = ip_conntrack_helper_register(&snmp_trap_helper); + BUG_ON(nf_nat_snmp_hook != NULL); + RCU_INIT_POINTER(nf_nat_snmp_hook, help); + + ret = nf_conntrack_helper_register(&snmp_trap_helper); if (ret < 0) { - ip_conntrack_helper_unregister(&snmp_helper); + nf_conntrack_helper_unregister(&snmp_helper); return ret; } return ret; } -static void __exit fini(void) +static void __exit nf_nat_snmp_basic_fini(void) { - ip_conntrack_helper_unregister(&snmp_helper); - ip_conntrack_helper_unregister(&snmp_trap_helper); + RCU_INIT_POINTER(nf_nat_snmp_hook, NULL); + nf_conntrack_helper_unregister(&snmp_trap_helper); } -module_init(init); -module_exit(fini); +module_init(nf_nat_snmp_basic_init); +module_exit(nf_nat_snmp_basic_fini); -module_param(debug, bool, 0600); +module_param(debug, int, 0600); diff --git a/net/ipv4/netfilter/nf_tables_arp.c b/net/ipv4/netfilter/nf_tables_arp.c new file mode 100644 index 00000000000..19412a4063f --- /dev/null +++ b/net/ipv4/netfilter/nf_tables_arp.c @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2008-2010 Patrick McHardy <kaber@trash.net> + * Copyright (c) 2013 Pablo Neira Ayuso <pablo@netfilter.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/netfilter_arp.h> +#include <net/netfilter/nf_tables.h> + +static unsigned int +nft_do_chain_arp(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct nft_pktinfo pkt; + + nft_set_pktinfo(&pkt, ops, skb, in, out); + + return nft_do_chain(&pkt, ops); +} + +static struct nft_af_info nft_af_arp __read_mostly = { + .family = NFPROTO_ARP, + .nhooks = NF_ARP_NUMHOOKS, + .owner = THIS_MODULE, + .nops = 1, + .hooks = { + [NF_ARP_IN] = nft_do_chain_arp, + [NF_ARP_OUT] = nft_do_chain_arp, + [NF_ARP_FORWARD] = nft_do_chain_arp, + }, +}; + +static int nf_tables_arp_init_net(struct net *net) +{ + net->nft.arp = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL); + if (net->nft.arp== NULL) + return -ENOMEM; + + memcpy(net->nft.arp, &nft_af_arp, sizeof(nft_af_arp)); + + if (nft_register_afinfo(net, net->nft.arp) < 0) + goto err; + + return 0; +err: + kfree(net->nft.arp); + return -ENOMEM; +} + +static void nf_tables_arp_exit_net(struct net *net) +{ + nft_unregister_afinfo(net->nft.arp); + kfree(net->nft.arp); +} + +static struct pernet_operations nf_tables_arp_net_ops = { + .init = nf_tables_arp_init_net, + .exit = nf_tables_arp_exit_net, +}; + +static const struct nf_chain_type filter_arp = { + .name = "filter", + .type = NFT_CHAIN_T_DEFAULT, + .family = NFPROTO_ARP, + .owner = THIS_MODULE, + .hook_mask = (1 << NF_ARP_IN) | + (1 << NF_ARP_OUT) | + (1 << NF_ARP_FORWARD), +}; + +static int __init nf_tables_arp_init(void) +{ + int ret; + + nft_register_chain_type(&filter_arp); + ret = register_pernet_subsys(&nf_tables_arp_net_ops); + if (ret < 0) + nft_unregister_chain_type(&filter_arp); + + return ret; +} + +static void __exit nf_tables_arp_exit(void) +{ + unregister_pernet_subsys(&nf_tables_arp_net_ops); + nft_unregister_chain_type(&filter_arp); +} + +module_init(nf_tables_arp_init); +module_exit(nf_tables_arp_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); +MODULE_ALIAS_NFT_FAMILY(3); /* NFPROTO_ARP */ diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c new file mode 100644 index 00000000000..6820c8c4084 --- /dev/null +++ b/net/ipv4/netfilter/nf_tables_ipv4.c @@ -0,0 +1,129 @@ +/* + * Copyright (c) 2008 Patrick McHardy <kaber@trash.net> + * Copyright (c) 2012-2013 Pablo Neira Ayuso <pablo@netfilter.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/ip.h> +#include <linux/netfilter_ipv4.h> +#include <net/netfilter/nf_tables.h> +#include <net/net_namespace.h> +#include <net/ip.h> +#include <net/netfilter/nf_tables_ipv4.h> + +static unsigned int nft_do_chain_ipv4(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct nft_pktinfo pkt; + + nft_set_pktinfo_ipv4(&pkt, ops, skb, in, out); + + return nft_do_chain(&pkt, ops); +} + +static unsigned int nft_ipv4_output(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + if (unlikely(skb->len < sizeof(struct iphdr) || + ip_hdr(skb)->ihl < sizeof(struct iphdr) / 4)) { + if (net_ratelimit()) + pr_info("nf_tables_ipv4: ignoring short SOCK_RAW " + "packet\n"); + return NF_ACCEPT; + } + + return nft_do_chain_ipv4(ops, skb, in, out, okfn); +} + +struct nft_af_info nft_af_ipv4 __read_mostly = { + .family = NFPROTO_IPV4, + .nhooks = NF_INET_NUMHOOKS, + .owner = THIS_MODULE, + .nops = 1, + .hooks = { + [NF_INET_LOCAL_IN] = nft_do_chain_ipv4, + [NF_INET_LOCAL_OUT] = nft_ipv4_output, + [NF_INET_FORWARD] = nft_do_chain_ipv4, + [NF_INET_PRE_ROUTING] = nft_do_chain_ipv4, + [NF_INET_POST_ROUTING] = nft_do_chain_ipv4, + }, +}; +EXPORT_SYMBOL_GPL(nft_af_ipv4); + +static int nf_tables_ipv4_init_net(struct net *net) +{ + net->nft.ipv4 = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL); + if (net->nft.ipv4 == NULL) + return -ENOMEM; + + memcpy(net->nft.ipv4, &nft_af_ipv4, sizeof(nft_af_ipv4)); + + if (nft_register_afinfo(net, net->nft.ipv4) < 0) + goto err; + + return 0; +err: + kfree(net->nft.ipv4); + return -ENOMEM; +} + +static void nf_tables_ipv4_exit_net(struct net *net) +{ + nft_unregister_afinfo(net->nft.ipv4); + kfree(net->nft.ipv4); +} + +static struct pernet_operations nf_tables_ipv4_net_ops = { + .init = nf_tables_ipv4_init_net, + .exit = nf_tables_ipv4_exit_net, +}; + +static const struct nf_chain_type filter_ipv4 = { + .name = "filter", + .type = NFT_CHAIN_T_DEFAULT, + .family = NFPROTO_IPV4, + .owner = THIS_MODULE, + .hook_mask = (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_FORWARD) | + (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_POST_ROUTING), +}; + +static int __init nf_tables_ipv4_init(void) +{ + int ret; + + nft_register_chain_type(&filter_ipv4); + ret = register_pernet_subsys(&nf_tables_ipv4_net_ops); + if (ret < 0) + nft_unregister_chain_type(&filter_ipv4); + + return ret; +} + +static void __exit nf_tables_ipv4_exit(void) +{ + unregister_pernet_subsys(&nf_tables_ipv4_net_ops); + nft_unregister_chain_type(&filter_ipv4); +} + +module_init(nf_tables_ipv4_init); +module_exit(nf_tables_ipv4_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); +MODULE_ALIAS_NFT_FAMILY(AF_INET); diff --git a/net/ipv4/netfilter/nft_chain_nat_ipv4.c b/net/ipv4/netfilter/nft_chain_nat_ipv4.c new file mode 100644 index 00000000000..3964157d826 --- /dev/null +++ b/net/ipv4/netfilter/nft_chain_nat_ipv4.c @@ -0,0 +1,199 @@ +/* + * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net> + * Copyright (c) 2012 Pablo Neira Ayuso <pablo@netfilter.org> + * Copyright (c) 2012 Intel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/list.h> +#include <linux/skbuff.h> +#include <linux/ip.h> +#include <linux/netfilter.h> +#include <linux/netfilter_ipv4.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_conntrack.h> +#include <net/netfilter/nf_nat.h> +#include <net/netfilter/nf_nat_core.h> +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables_ipv4.h> +#include <net/netfilter/nf_nat_l3proto.h> +#include <net/ip.h> + +/* + * NAT chains + */ + +static unsigned int nf_nat_fn(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = nf_ct_get(skb, &ctinfo); + struct nf_conn_nat *nat; + enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum); + struct nft_pktinfo pkt; + unsigned int ret; + + if (ct == NULL || nf_ct_is_untracked(ct)) + return NF_ACCEPT; + + NF_CT_ASSERT(!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET))); + + nat = nf_ct_nat_ext_add(ct); + if (nat == NULL) + return NF_ACCEPT; + + switch (ctinfo) { + case IP_CT_RELATED: + case IP_CT_RELATED + IP_CT_IS_REPLY: + if (ip_hdr(skb)->protocol == IPPROTO_ICMP) { + if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo, + ops->hooknum)) + return NF_DROP; + else + return NF_ACCEPT; + } + /* Fall through */ + case IP_CT_NEW: + if (nf_nat_initialized(ct, maniptype)) + break; + + nft_set_pktinfo_ipv4(&pkt, ops, skb, in, out); + + ret = nft_do_chain(&pkt, ops); + if (ret != NF_ACCEPT) + return ret; + if (!nf_nat_initialized(ct, maniptype)) { + ret = nf_nat_alloc_null_binding(ct, ops->hooknum); + if (ret != NF_ACCEPT) + return ret; + } + default: + break; + } + + return nf_nat_packet(ct, ctinfo, ops->hooknum, skb); +} + +static unsigned int nf_nat_prerouting(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + __be32 daddr = ip_hdr(skb)->daddr; + unsigned int ret; + + ret = nf_nat_fn(ops, skb, in, out, okfn); + if (ret != NF_DROP && ret != NF_STOLEN && + ip_hdr(skb)->daddr != daddr) { + skb_dst_drop(skb); + } + return ret; +} + +static unsigned int nf_nat_postrouting(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + enum ip_conntrack_info ctinfo __maybe_unused; + const struct nf_conn *ct __maybe_unused; + unsigned int ret; + + ret = nf_nat_fn(ops, skb, in, out, okfn); +#ifdef CONFIG_XFRM + if (ret != NF_DROP && ret != NF_STOLEN && + (ct = nf_ct_get(skb, &ctinfo)) != NULL) { + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + + if (ct->tuplehash[dir].tuple.src.u3.ip != + ct->tuplehash[!dir].tuple.dst.u3.ip || + ct->tuplehash[dir].tuple.src.u.all != + ct->tuplehash[!dir].tuple.dst.u.all) + return nf_xfrm_me_harder(skb, AF_INET) == 0 ? + ret : NF_DROP; + } +#endif + return ret; +} + +static unsigned int nf_nat_output(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + enum ip_conntrack_info ctinfo; + const struct nf_conn *ct; + unsigned int ret; + + ret = nf_nat_fn(ops, skb, in, out, okfn); + if (ret != NF_DROP && ret != NF_STOLEN && + (ct = nf_ct_get(skb, &ctinfo)) != NULL) { + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + + if (ct->tuplehash[dir].tuple.dst.u3.ip != + ct->tuplehash[!dir].tuple.src.u3.ip) { + if (ip_route_me_harder(skb, RTN_UNSPEC)) + ret = NF_DROP; + } +#ifdef CONFIG_XFRM + else if (ct->tuplehash[dir].tuple.dst.u.all != + ct->tuplehash[!dir].tuple.src.u.all) + if (nf_xfrm_me_harder(skb, AF_INET)) + ret = NF_DROP; +#endif + } + return ret; +} + +static const struct nf_chain_type nft_chain_nat_ipv4 = { + .name = "nat", + .type = NFT_CHAIN_T_NAT, + .family = NFPROTO_IPV4, + .owner = THIS_MODULE, + .hook_mask = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_POST_ROUTING) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_LOCAL_IN), + .hooks = { + [NF_INET_PRE_ROUTING] = nf_nat_prerouting, + [NF_INET_POST_ROUTING] = nf_nat_postrouting, + [NF_INET_LOCAL_OUT] = nf_nat_output, + [NF_INET_LOCAL_IN] = nf_nat_fn, + }, +}; + +static int __init nft_chain_nat_init(void) +{ + int err; + + err = nft_register_chain_type(&nft_chain_nat_ipv4); + if (err < 0) + return err; + + return 0; +} + +static void __exit nft_chain_nat_exit(void) +{ + nft_unregister_chain_type(&nft_chain_nat_ipv4); +} + +module_init(nft_chain_nat_init); +module_exit(nft_chain_nat_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); +MODULE_ALIAS_NFT_CHAIN(AF_INET, "nat"); diff --git a/net/ipv4/netfilter/nft_chain_route_ipv4.c b/net/ipv4/netfilter/nft_chain_route_ipv4.c new file mode 100644 index 00000000000..125b66766c0 --- /dev/null +++ b/net/ipv4/netfilter/nft_chain_route_ipv4.c @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2008 Patrick McHardy <kaber@trash.net> + * Copyright (c) 2012 Pablo Neira Ayuso <pablo@netfilter.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/list.h> +#include <linux/skbuff.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter_ipv4.h> +#include <linux/netfilter/nfnetlink.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables_ipv4.h> +#include <net/route.h> +#include <net/ip.h> + +static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + unsigned int ret; + struct nft_pktinfo pkt; + u32 mark; + __be32 saddr, daddr; + u_int8_t tos; + const struct iphdr *iph; + + /* root is playing with raw sockets. */ + if (skb->len < sizeof(struct iphdr) || + ip_hdrlen(skb) < sizeof(struct iphdr)) + return NF_ACCEPT; + + nft_set_pktinfo_ipv4(&pkt, ops, skb, in, out); + + mark = skb->mark; + iph = ip_hdr(skb); + saddr = iph->saddr; + daddr = iph->daddr; + tos = iph->tos; + + ret = nft_do_chain(&pkt, ops); + if (ret != NF_DROP && ret != NF_QUEUE) { + iph = ip_hdr(skb); + + if (iph->saddr != saddr || + iph->daddr != daddr || + skb->mark != mark || + iph->tos != tos) + if (ip_route_me_harder(skb, RTN_UNSPEC)) + ret = NF_DROP; + } + return ret; +} + +static const struct nf_chain_type nft_chain_route_ipv4 = { + .name = "route", + .type = NFT_CHAIN_T_ROUTE, + .family = NFPROTO_IPV4, + .owner = THIS_MODULE, + .hook_mask = (1 << NF_INET_LOCAL_OUT), + .hooks = { + [NF_INET_LOCAL_OUT] = nf_route_table_hook, + }, +}; + +static int __init nft_chain_route_init(void) +{ + return nft_register_chain_type(&nft_chain_route_ipv4); +} + +static void __exit nft_chain_route_exit(void) +{ + nft_unregister_chain_type(&nft_chain_route_ipv4); +} + +module_init(nft_chain_route_init); +module_exit(nft_chain_route_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); +MODULE_ALIAS_NFT_CHAIN(AF_INET, "route"); diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c new file mode 100644 index 00000000000..e79718a382f --- /dev/null +++ b/net/ipv4/netfilter/nft_reject_ipv4.c @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net> + * Copyright (c) 2013 Eric Leblond <eric@regit.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of this code funded by Astaro AG (http://www.astaro.com/) + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables.h> +#include <net/icmp.h> +#include <net/netfilter/ipv4/nf_reject.h> +#include <net/netfilter/nft_reject.h> + +void nft_reject_ipv4_eval(const struct nft_expr *expr, + struct nft_data data[NFT_REG_MAX + 1], + const struct nft_pktinfo *pkt) +{ + struct nft_reject *priv = nft_expr_priv(expr); + + switch (priv->type) { + case NFT_REJECT_ICMP_UNREACH: + nf_send_unreach(pkt->skb, priv->icmp_code); + break; + case NFT_REJECT_TCP_RST: + nf_send_reset(pkt->skb, pkt->ops->hooknum); + break; + } + + data[NFT_REG_VERDICT].verdict = NF_DROP; +} +EXPORT_SYMBOL_GPL(nft_reject_ipv4_eval); + +static struct nft_expr_type nft_reject_ipv4_type; +static const struct nft_expr_ops nft_reject_ipv4_ops = { + .type = &nft_reject_ipv4_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_reject)), + .eval = nft_reject_ipv4_eval, + .init = nft_reject_init, + .dump = nft_reject_dump, +}; + +static struct nft_expr_type nft_reject_ipv4_type __read_mostly = { + .family = NFPROTO_IPV4, + .name = "reject", + .ops = &nft_reject_ipv4_ops, + .policy = nft_reject_policy, + .maxattr = NFTA_REJECT_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_reject_ipv4_module_init(void) +{ + return nft_register_expr(&nft_reject_ipv4_type); +} + +static void __exit nft_reject_ipv4_module_exit(void) +{ + nft_unregister_expr(&nft_reject_ipv4_type); +} + +module_init(nft_reject_ipv4_module_init); +module_exit(nft_reject_ipv4_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); +MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "reject"); |
