aboutsummaryrefslogtreecommitdiff
path: root/net/ipv4/netfilter
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/netfilter')
-rw-r--r--net/ipv4/netfilter/Kconfig62
-rw-r--r--net/ipv4/netfilter/Makefile7
-rw-r--r--net/ipv4/netfilter/arp_tables.c22
-rw-r--r--net/ipv4/netfilter/arptable_filter.c9
-rw-r--r--net/ipv4/netfilter/ip_tables.c31
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c120
-rw-r--r--net/ipv4/netfilter/ipt_MASQUERADE.c9
-rw-r--r--net/ipv4/netfilter/ipt_REJECT.c120
-rw-r--r--net/ipv4/netfilter/ipt_SYNPROXY.c482
-rw-r--r--net/ipv4/netfilter/ipt_ULOG.c182
-rw-r--r--net/ipv4/netfilter/ipt_rpfilter.c13
-rw-r--r--net/ipv4/netfilter/iptable_filter.c9
-rw-r--r--net/ipv4/netfilter/iptable_mangle.c21
-rw-r--r--net/ipv4/netfilter/iptable_nat.c76
-rw-r--r--net/ipv4/netfilter/iptable_raw.c8
-rw-r--r--net/ipv4/netfilter/iptable_security.c9
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c120
-rw-r--r--net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c15
-rw-r--r--net/ipv4/netfilter/nf_conntrack_proto_icmp.c9
-rw-r--r--net/ipv4/netfilter/nf_defrag_ipv4.c11
-rw-r--r--net/ipv4/netfilter/nf_nat_h323.c6
-rw-r--r--net/ipv4/netfilter/nf_nat_pptp.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_proto_gre.c2
-rw-r--r--net/ipv4/netfilter/nf_nat_snmp_basic.c21
-rw-r--r--net/ipv4/netfilter/nf_tables_arp.c104
-rw-r--r--net/ipv4/netfilter/nf_tables_ipv4.c129
-rw-r--r--net/ipv4/netfilter/nft_chain_nat_ipv4.c199
-rw-r--r--net/ipv4/netfilter/nft_chain_route_ipv4.c90
-rw-r--r--net/ipv4/netfilter/nft_reject_ipv4.c75
29 files changed, 1568 insertions, 395 deletions
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index d8d6f2a5bf1..a26ce035e3f 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -36,18 +36,41 @@ config NF_CONNTRACK_PROC_COMPAT
If unsure, say Y.
-config IP_NF_QUEUE
- tristate "IP Userspace queueing via NETLINK (OBSOLETE)"
- depends on NETFILTER_ADVANCED
+config NF_TABLES_IPV4
+ depends on NF_TABLES
+ tristate "IPv4 nf_tables support"
help
- Netfilter has the ability to queue packets to user space: the
- netlink device can be used to access them using this driver.
+ This option enables the IPv4 support for nf_tables.
- This option enables the old IPv4-only "ip_queue" implementation
- which has been obsoleted by the new "nfnetlink_queue" code (see
- CONFIG_NETFILTER_NETLINK_QUEUE).
+config NFT_CHAIN_ROUTE_IPV4
+ depends on NF_TABLES_IPV4
+ tristate "IPv4 nf_tables route chain support"
+ help
+ This option enables the "route" chain for IPv4 in nf_tables. This
+ chain type is used to force packet re-routing after mangling header
+ fields such as the source, destination, type of service and
+ the packet mark.
+
+config NFT_CHAIN_NAT_IPV4
+ depends on NF_TABLES_IPV4
+ depends on NF_NAT_IPV4 && NFT_NAT
+ tristate "IPv4 nf_tables nat chain support"
+ help
+ This option enables the "nat" chain for IPv4 in nf_tables. This
+ chain type is used to perform Network Address Translation (NAT)
+ packet transformations such as the source, destination address and
+ source and destination ports.
+
+config NFT_REJECT_IPV4
+ depends on NF_TABLES_IPV4
+ default NFT_REJECT
+ tristate
- To compile it as a module, choose M here. If unsure, say N.
+config NF_TABLES_ARP
+ depends on NF_TABLES
+ tristate "ARP nf_tables support"
+ help
+ This option enables the ARP support for nf_tables.
config IP_NF_IPTABLES
tristate "IP tables support (required for filtering/masq/NAT)"
@@ -84,7 +107,7 @@ config IP_NF_MATCH_ECN
config IP_NF_MATCH_RPFILTER
tristate '"rpfilter" reverse path filter match support'
- depends on NETFILTER_ADVANCED
+ depends on NETFILTER_ADVANCED && (IP_NF_MANGLE || IP_NF_RAW)
---help---
This option allows you to match packets whose replies would
go out via the interface the packet came in.
@@ -123,8 +146,21 @@ config IP_NF_TARGET_REJECT
To compile it as a module, choose M here. If unsure, say N.
+config IP_NF_TARGET_SYNPROXY
+ tristate "SYNPROXY target support"
+ depends on NF_CONNTRACK && NETFILTER_ADVANCED
+ select NETFILTER_SYNPROXY
+ select SYN_COOKIES
+ help
+ The SYNPROXY target allows you to intercept TCP connections and
+ establish them using syncookies before they are passed on to the
+ server. This allows to avoid conntrack and server resource usage
+ during SYN-flood attacks.
+
+ To compile it as a module, choose M here. If unsure, say N.
+
config IP_NF_TARGET_ULOG
- tristate "ULOG target support"
+ tristate "ULOG target support (obsolete)"
default m if NETFILTER_ADVANCED=n
---help---
@@ -241,8 +277,8 @@ config IP_NF_MANGLE
To compile it as a module, choose M here. If unsure, say N.
config IP_NF_TARGET_CLUSTERIP
- tristate "CLUSTERIP target support (EXPERIMENTAL)"
- depends on IP_NF_MANGLE && EXPERIMENTAL
+ tristate "CLUSTERIP target support"
+ depends on IP_NF_MANGLE
depends on NF_CONNTRACK_IPV4
depends on NETFILTER_ADVANCED
select NF_CONNTRACK_MARK
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 007b128eecc..90b82405331 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -27,6 +27,12 @@ obj-$(CONFIG_NF_NAT_SNMP_BASIC) += nf_nat_snmp_basic.o
# NAT protocols (nf_nat)
obj-$(CONFIG_NF_NAT_PROTO_GRE) += nf_nat_proto_gre.o
+obj-$(CONFIG_NF_TABLES_IPV4) += nf_tables_ipv4.o
+obj-$(CONFIG_NFT_CHAIN_ROUTE_IPV4) += nft_chain_route_ipv4.o
+obj-$(CONFIG_NFT_CHAIN_NAT_IPV4) += nft_chain_nat_ipv4.o
+obj-$(CONFIG_NFT_REJECT_IPV4) += nft_reject_ipv4.o
+obj-$(CONFIG_NF_TABLES_ARP) += nf_tables_arp.o
+
# generic IP tables
obj-$(CONFIG_IP_NF_IPTABLES) += ip_tables.o
@@ -46,6 +52,7 @@ obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o
obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o
obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o
obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o
+obj-$(CONFIG_IP_NF_TARGET_SYNPROXY) += ipt_SYNPROXY.o
obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o
# generic ARP tables
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 3ea4127404d..f95b6f93814 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -6,6 +6,7 @@
* Some ARP specific bits are:
*
* Copyright (C) 2002 David S. Miller (davem@redhat.com)
+ * Copyright (C) 2006-2009 Patrick McHardy <kaber@trash.net>
*
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -270,6 +271,11 @@ unsigned int arpt_do_table(struct sk_buff *skb,
local_bh_disable();
addend = xt_write_recseq_begin();
private = table->private;
+ /*
+ * Ensure we load private-> members after we've fetched the base
+ * pointer.
+ */
+ smp_read_barrier_depends();
table_base = private->entries[smp_processor_id()];
e = get_entry(table_base, private->hook_entry[hook]);
@@ -901,7 +907,7 @@ static int get_info(struct net *net, void __user *user,
#endif
t = try_then_request_module(xt_find_table_lock(net, NFPROTO_ARP, name),
"arptable_%s", name);
- if (t && !IS_ERR(t)) {
+ if (!IS_ERR_OR_NULL(t)) {
struct arpt_getinfo info;
const struct xt_table_info *private = t->private;
#ifdef CONFIG_COMPAT
@@ -958,7 +964,7 @@ static int get_entries(struct net *net, struct arpt_get_entries __user *uptr,
}
t = xt_find_table_lock(net, NFPROTO_ARP, get.name);
- if (t && !IS_ERR(t)) {
+ if (!IS_ERR_OR_NULL(t)) {
const struct xt_table_info *private = t->private;
duprintf("t->private->number = %u\n",
@@ -1001,7 +1007,7 @@ static int __do_replace(struct net *net, const char *name,
t = try_then_request_module(xt_find_table_lock(net, NFPROTO_ARP, name),
"arptable_%s", name);
- if (!t || IS_ERR(t)) {
+ if (IS_ERR_OR_NULL(t)) {
ret = t ? PTR_ERR(t) : -ENOENT;
goto free_newinfo_counters_untrans;
}
@@ -1038,8 +1044,10 @@ static int __do_replace(struct net *net, const char *name,
xt_free_table_info(oldinfo);
if (copy_to_user(counters_ptr, counters,
- sizeof(struct xt_counters) * num_counters) != 0)
- ret = -EFAULT;
+ sizeof(struct xt_counters) * num_counters) != 0) {
+ /* Silent error, can't fail, new table is already in place */
+ net_warn_ratelimited("arptables: counters copy to user failed while replacing table\n");
+ }
vfree(counters);
xt_table_unlock(t);
return ret;
@@ -1158,7 +1166,7 @@ static int do_add_counters(struct net *net, const void __user *user,
}
t = xt_find_table_lock(net, NFPROTO_ARP, name);
- if (!t || IS_ERR(t)) {
+ if (IS_ERR_OR_NULL(t)) {
ret = t ? PTR_ERR(t) : -ENOENT;
goto free;
}
@@ -1646,7 +1654,7 @@ static int compat_get_entries(struct net *net,
xt_compat_lock(NFPROTO_ARP);
t = xt_find_table_lock(net, NFPROTO_ARP, get.name);
- if (t && !IS_ERR(t)) {
+ if (!IS_ERR_OR_NULL(t)) {
const struct xt_table_info *private = t->private;
struct xt_table_info info;
diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c
index 79ca5e70d49..802ddecb30b 100644
--- a/net/ipv4/netfilter/arptable_filter.c
+++ b/net/ipv4/netfilter/arptable_filter.c
@@ -27,13 +27,14 @@ static const struct xt_table packet_filter = {
/* The work comes in here from netfilter.c */
static unsigned int
-arptable_filter_hook(unsigned int hook, struct sk_buff *skb,
+arptable_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
const struct net *net = dev_net((in != NULL) ? in : out);
- return arpt_do_table(skb, hook, in, out, net->ipv4.arptable_filter);
+ return arpt_do_table(skb, ops->hooknum, in, out,
+ net->ipv4.arptable_filter);
}
static struct nf_hook_ops *arpfilter_ops __read_mostly;
@@ -48,9 +49,7 @@ static int __net_init arptable_filter_net_init(struct net *net)
net->ipv4.arptable_filter =
arpt_register_table(net, &packet_filter, repl);
kfree(repl);
- if (IS_ERR(net->ipv4.arptable_filter))
- return PTR_ERR(net->ipv4.arptable_filter);
- return 0;
+ return PTR_ERR_OR_ZERO(net->ipv4.arptable_filter);
}
static void __net_exit arptable_filter_net_exit(struct net *net)
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 17c5e06da66..99e810f8467 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -3,6 +3,7 @@
*
* Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
* Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org>
+ * Copyright (C) 2006-2010 Patrick McHardy <kaber@trash.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -182,8 +183,7 @@ ipt_get_target_c(const struct ipt_entry *e)
return ipt_get_target((struct ipt_entry *)e);
}
-#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
- defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
+#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
static const char *const hooknames[] = {
[NF_INET_PRE_ROUTING] = "PREROUTING",
[NF_INET_LOCAL_IN] = "INPUT",
@@ -259,6 +259,7 @@ static void trace_packet(const struct sk_buff *skb,
const char *hookname, *chainname, *comment;
const struct ipt_entry *iter;
unsigned int rulenum = 0;
+ struct net *net = dev_net(in ? in : out);
table_base = private->entries[smp_processor_id()];
root = get_entry(table_base, private->hook_entry[hook]);
@@ -271,7 +272,7 @@ static void trace_packet(const struct sk_buff *skb,
&chainname, &comment, &rulenum) != 0)
break;
- nf_log_packet(AF_INET, hook, skb, in, out, &trace_loginfo,
+ nf_log_packet(net, AF_INET, hook, skb, in, out, &trace_loginfo,
"TRACE: %s:%s:%s:%u ",
tablename, chainname, comment, rulenum);
}
@@ -326,6 +327,11 @@ ipt_do_table(struct sk_buff *skb,
addend = xt_write_recseq_begin();
private = table->private;
cpu = smp_processor_id();
+ /*
+ * Ensure we load private-> members after we've fetched the base
+ * pointer.
+ */
+ smp_read_barrier_depends();
table_base = private->entries[cpu];
jumpstack = (struct ipt_entry **)private->jumpstack[cpu];
stackptr = per_cpu_ptr(private->stackptr, cpu);
@@ -361,8 +367,7 @@ ipt_do_table(struct sk_buff *skb,
t = ipt_get_target(e);
IP_NF_ASSERT(t->u.kernel.target);
-#if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
- defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
+#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
/* The packet is traced: log it */
if (unlikely(skb->nf_trace))
trace_packet(skb, hook, in, out,
@@ -1090,7 +1095,7 @@ static int get_info(struct net *net, void __user *user,
#endif
t = try_then_request_module(xt_find_table_lock(net, AF_INET, name),
"iptable_%s", name);
- if (t && !IS_ERR(t)) {
+ if (!IS_ERR_OR_NULL(t)) {
struct ipt_getinfo info;
const struct xt_table_info *private = t->private;
#ifdef CONFIG_COMPAT
@@ -1149,7 +1154,7 @@ get_entries(struct net *net, struct ipt_get_entries __user *uptr,
}
t = xt_find_table_lock(net, AF_INET, get.name);
- if (t && !IS_ERR(t)) {
+ if (!IS_ERR_OR_NULL(t)) {
const struct xt_table_info *private = t->private;
duprintf("t->private->number = %u\n", private->number);
if (get.size == private->size)
@@ -1189,7 +1194,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
t = try_then_request_module(xt_find_table_lock(net, AF_INET, name),
"iptable_%s", name);
- if (!t || IS_ERR(t)) {
+ if (IS_ERR_OR_NULL(t)) {
ret = t ? PTR_ERR(t) : -ENOENT;
goto free_newinfo_counters_untrans;
}
@@ -1226,8 +1231,10 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
xt_free_table_info(oldinfo);
if (copy_to_user(counters_ptr, counters,
- sizeof(struct xt_counters) * num_counters) != 0)
- ret = -EFAULT;
+ sizeof(struct xt_counters) * num_counters) != 0) {
+ /* Silent error, can't fail, new table is already in place */
+ net_warn_ratelimited("iptables: counters copy to user failed while replacing table\n");
+ }
vfree(counters);
xt_table_unlock(t);
return ret;
@@ -1347,7 +1354,7 @@ do_add_counters(struct net *net, const void __user *user,
}
t = xt_find_table_lock(net, AF_INET, name);
- if (!t || IS_ERR(t)) {
+ if (IS_ERR_OR_NULL(t)) {
ret = t ? PTR_ERR(t) : -ENOENT;
goto free;
}
@@ -1931,7 +1938,7 @@ compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr,
xt_compat_lock(AF_INET);
t = xt_find_table_lock(net, AF_INET, get.name);
- if (t && !IS_ERR(t)) {
+ if (!IS_ERR_OR_NULL(t)) {
const struct xt_table_info *private = t->private;
struct xt_table_info info;
duprintf("t->private->number = %u\n", private->number);
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 75e33a7048f..2510c02c2d2 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -28,6 +28,7 @@
#include <linux/netfilter_ipv4/ipt_CLUSTERIP.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/net_namespace.h>
+#include <net/netns/generic.h>
#include <net/checksum.h>
#include <net/ip.h>
@@ -57,15 +58,21 @@ struct clusterip_config {
struct rcu_head rcu;
};
-static LIST_HEAD(clusterip_configs);
+#ifdef CONFIG_PROC_FS
+static const struct file_operations clusterip_proc_fops;
+#endif
-/* clusterip_lock protects the clusterip_configs list */
-static DEFINE_SPINLOCK(clusterip_lock);
+static int clusterip_net_id __read_mostly;
+
+struct clusterip_net {
+ struct list_head configs;
+ /* lock protects the configs list */
+ spinlock_t lock;
#ifdef CONFIG_PROC_FS
-static const struct file_operations clusterip_proc_fops;
-static struct proc_dir_entry *clusterip_procdir;
+ struct proc_dir_entry *procdir;
#endif
+};
static inline void
clusterip_config_get(struct clusterip_config *c)
@@ -92,10 +99,13 @@ clusterip_config_put(struct clusterip_config *c)
static inline void
clusterip_config_entry_put(struct clusterip_config *c)
{
+ struct net *net = dev_net(c->dev);
+ struct clusterip_net *cn = net_generic(net, clusterip_net_id);
+
local_bh_disable();
- if (atomic_dec_and_lock(&c->entries, &clusterip_lock)) {
+ if (atomic_dec_and_lock(&c->entries, &cn->lock)) {
list_del_rcu(&c->list);
- spin_unlock(&clusterip_lock);
+ spin_unlock(&cn->lock);
local_bh_enable();
dev_mc_del(c->dev, c->clustermac);
@@ -105,7 +115,7 @@ clusterip_config_entry_put(struct clusterip_config *c)
* functions are also incrementing the refcount on their own,
* so it's safe to remove the entry even if it's in use. */
#ifdef CONFIG_PROC_FS
- remove_proc_entry(c->pde->name, c->pde->parent);
+ proc_remove(c->pde);
#endif
return;
}
@@ -113,11 +123,12 @@ clusterip_config_entry_put(struct clusterip_config *c)
}
static struct clusterip_config *
-__clusterip_config_find(__be32 clusterip)
+__clusterip_config_find(struct net *net, __be32 clusterip)
{
struct clusterip_config *c;
+ struct clusterip_net *cn = net_generic(net, clusterip_net_id);
- list_for_each_entry_rcu(c, &clusterip_configs, list) {
+ list_for_each_entry_rcu(c, &cn->configs, list) {
if (c->clusterip == clusterip)
return c;
}
@@ -126,12 +137,12 @@ __clusterip_config_find(__be32 clusterip)
}
static inline struct clusterip_config *
-clusterip_config_find_get(__be32 clusterip, int entry)
+clusterip_config_find_get(struct net *net, __be32 clusterip, int entry)
{
struct clusterip_config *c;
rcu_read_lock_bh();
- c = __clusterip_config_find(clusterip);
+ c = __clusterip_config_find(net, clusterip);
if (c) {
if (unlikely(!atomic_inc_not_zero(&c->refcount)))
c = NULL;
@@ -158,6 +169,7 @@ clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip,
struct net_device *dev)
{
struct clusterip_config *c;
+ struct clusterip_net *cn = net_generic(dev_net(dev), clusterip_net_id);
c = kzalloc(sizeof(*c), GFP_ATOMIC);
if (!c)
@@ -180,7 +192,7 @@ clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip,
/* create proc dir entry */
sprintf(buffer, "%pI4", &ip);
c->pde = proc_create_data(buffer, S_IWUSR|S_IRUSR,
- clusterip_procdir,
+ cn->procdir,
&clusterip_proc_fops, c);
if (!c->pde) {
kfree(c);
@@ -189,9 +201,9 @@ clusterip_config_init(const struct ipt_clusterip_tgt_info *i, __be32 ip,
}
#endif
- spin_lock_bh(&clusterip_lock);
- list_add_rcu(&c->list, &clusterip_configs);
- spin_unlock_bh(&clusterip_lock);
+ spin_lock_bh(&cn->lock);
+ list_add_rcu(&c->list, &cn->configs);
+ spin_unlock_bh(&cn->lock);
return c;
}
@@ -370,7 +382,7 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par)
/* FIXME: further sanity checks */
- config = clusterip_config_find_get(e->ip.dst.s_addr, 1);
+ config = clusterip_config_find_get(par->net, e->ip.dst.s_addr, 1);
if (!config) {
if (!(cipinfo->flags & CLUSTERIP_FLAG_NEW)) {
pr_info("no config found for %pI4, need 'new'\n",
@@ -384,7 +396,7 @@ static int clusterip_tg_check(const struct xt_tgchk_param *par)
return -EINVAL;
}
- dev = dev_get_by_name(&init_net, e->ip.iniface);
+ dev = dev_get_by_name(par->net, e->ip.iniface);
if (!dev) {
pr_info("no such interface %s\n",
e->ip.iniface);
@@ -483,7 +495,7 @@ static void arp_print(struct arp_payload *payload)
#endif
static unsigned int
-arp_mangle(unsigned int hook,
+arp_mangle(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
@@ -492,6 +504,7 @@ arp_mangle(unsigned int hook,
struct arphdr *arp = arp_hdr(skb);
struct arp_payload *payload;
struct clusterip_config *c;
+ struct net *net = dev_net(in ? in : out);
/* we don't care about non-ethernet and non-ipv4 ARP */
if (arp->ar_hrd != htons(ARPHRD_ETHER) ||
@@ -508,7 +521,7 @@ arp_mangle(unsigned int hook,
/* if there is no clusterip configuration for the arp reply's
* source ip, we don't want to mangle it */
- c = clusterip_config_find_get(payload->src_ip, 0);
+ c = clusterip_config_find_get(net, payload->src_ip, 0);
if (!c)
return NF_ACCEPT;
@@ -631,7 +644,7 @@ static int clusterip_proc_open(struct inode *inode, struct file *file)
if (!ret) {
struct seq_file *sf = file->private_data;
- struct clusterip_config *c = PDE(inode)->data;
+ struct clusterip_config *c = PDE_DATA(inode);
sf->private = c;
@@ -643,7 +656,7 @@ static int clusterip_proc_open(struct inode *inode, struct file *file)
static int clusterip_proc_release(struct inode *inode, struct file *file)
{
- struct clusterip_config *c = PDE(inode)->data;
+ struct clusterip_config *c = PDE_DATA(inode);
int ret;
ret = seq_release(inode, file);
@@ -657,7 +670,7 @@ static int clusterip_proc_release(struct inode *inode, struct file *file)
static ssize_t clusterip_proc_write(struct file *file, const char __user *input,
size_t size, loff_t *ofs)
{
- struct clusterip_config *c = PDE(file->f_path.dentry->d_inode)->data;
+ struct clusterip_config *c = PDE_DATA(file_inode(file));
#define PROC_WRITELEN 10
char buffer[PROC_WRITELEN+1];
unsigned long nodenum;
@@ -698,48 +711,75 @@ static const struct file_operations clusterip_proc_fops = {
#endif /* CONFIG_PROC_FS */
+static int clusterip_net_init(struct net *net)
+{
+ struct clusterip_net *cn = net_generic(net, clusterip_net_id);
+
+ INIT_LIST_HEAD(&cn->configs);
+
+ spin_lock_init(&cn->lock);
+
+#ifdef CONFIG_PROC_FS
+ cn->procdir = proc_mkdir("ipt_CLUSTERIP", net->proc_net);
+ if (!cn->procdir) {
+ pr_err("Unable to proc dir entry\n");
+ return -ENOMEM;
+ }
+#endif /* CONFIG_PROC_FS */
+
+ return 0;
+}
+
+static void clusterip_net_exit(struct net *net)
+{
+#ifdef CONFIG_PROC_FS
+ struct clusterip_net *cn = net_generic(net, clusterip_net_id);
+ proc_remove(cn->procdir);
+#endif
+}
+
+static struct pernet_operations clusterip_net_ops = {
+ .init = clusterip_net_init,
+ .exit = clusterip_net_exit,
+ .id = &clusterip_net_id,
+ .size = sizeof(struct clusterip_net),
+};
+
static int __init clusterip_tg_init(void)
{
int ret;
- ret = xt_register_target(&clusterip_tg_reg);
+ ret = register_pernet_subsys(&clusterip_net_ops);
if (ret < 0)
return ret;
+ ret = xt_register_target(&clusterip_tg_reg);
+ if (ret < 0)
+ goto cleanup_subsys;
+
ret = nf_register_hook(&cip_arp_ops);
if (ret < 0)
goto cleanup_target;
-#ifdef CONFIG_PROC_FS
- clusterip_procdir = proc_mkdir("ipt_CLUSTERIP", init_net.proc_net);
- if (!clusterip_procdir) {
- pr_err("Unable to proc dir entry\n");
- ret = -ENOMEM;
- goto cleanup_hook;
- }
-#endif /* CONFIG_PROC_FS */
-
pr_info("ClusterIP Version %s loaded successfully\n",
CLUSTERIP_VERSION);
+
return 0;
-#ifdef CONFIG_PROC_FS
-cleanup_hook:
- nf_unregister_hook(&cip_arp_ops);
-#endif /* CONFIG_PROC_FS */
cleanup_target:
xt_unregister_target(&clusterip_tg_reg);
+cleanup_subsys:
+ unregister_pernet_subsys(&clusterip_net_ops);
return ret;
}
static void __exit clusterip_tg_exit(void)
{
pr_info("ClusterIP Version %s unloading\n", CLUSTERIP_VERSION);
-#ifdef CONFIG_PROC_FS
- remove_proc_entry(clusterip_procdir->name, clusterip_procdir->parent);
-#endif
+
nf_unregister_hook(&cip_arp_ops);
xt_unregister_target(&clusterip_tg_reg);
+ unregister_pernet_subsys(&clusterip_net_ops);
/* Wait for completion of call_rcu_bh()'s (clusterip_config_rcu_free) */
rcu_barrier_bh();
diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
index 5d5d4d1be9c..00352ce0f0d 100644
--- a/net/ipv4/netfilter/ipt_MASQUERADE.c
+++ b/net/ipv4/netfilter/ipt_MASQUERADE.c
@@ -108,7 +108,7 @@ static int masq_device_event(struct notifier_block *this,
unsigned long event,
void *ptr)
{
- const struct net_device *dev = ptr;
+ const struct net_device *dev = netdev_notifier_info_to_dev(ptr);
struct net *net = dev_net(dev);
if (event == NETDEV_DOWN) {
@@ -118,7 +118,7 @@ static int masq_device_event(struct notifier_block *this,
NF_CT_ASSERT(dev->ifindex != 0);
nf_ct_iterate_cleanup(net, device_cmp,
- (void *)(long)dev->ifindex);
+ (void *)(long)dev->ifindex, 0, 0);
}
return NOTIFY_DONE;
@@ -129,7 +129,10 @@ static int masq_inet_event(struct notifier_block *this,
void *ptr)
{
struct net_device *dev = ((struct in_ifaddr *)ptr)->ifa_dev->dev;
- return masq_device_event(this, event, dev);
+ struct netdev_notifier_info info;
+
+ netdev_notifier_info_init(&info, dev);
+ return masq_device_event(this, event, &info);
}
static struct notifier_block masq_dev_notifier = {
diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
index 51f13f8ec72..5b6e0df4ccf 100644
--- a/net/ipv4/netfilter/ipt_REJECT.c
+++ b/net/ipv4/netfilter/ipt_REJECT.c
@@ -17,10 +17,6 @@
#include <linux/udp.h>
#include <linux/icmp.h>
#include <net/icmp.h>
-#include <net/ip.h>
-#include <net/tcp.h>
-#include <net/route.h>
-#include <net/dst.h>
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter_ipv4/ipt_REJECT.h>
@@ -28,108 +24,12 @@
#include <linux/netfilter_bridge.h>
#endif
+#include <net/netfilter/ipv4/nf_reject.h>
+
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
MODULE_DESCRIPTION("Xtables: packet \"rejection\" target for IPv4");
-/* Send RST reply */
-static void send_reset(struct sk_buff *oldskb, int hook)
-{
- struct sk_buff *nskb;
- const struct iphdr *oiph;
- struct iphdr *niph;
- const struct tcphdr *oth;
- struct tcphdr _otcph, *tcph;
-
- /* IP header checks: fragment. */
- if (ip_hdr(oldskb)->frag_off & htons(IP_OFFSET))
- return;
-
- oth = skb_header_pointer(oldskb, ip_hdrlen(oldskb),
- sizeof(_otcph), &_otcph);
- if (oth == NULL)
- return;
-
- /* No RST for RST. */
- if (oth->rst)
- return;
-
- if (skb_rtable(oldskb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST))
- return;
-
- /* Check checksum */
- if (nf_ip_checksum(oldskb, hook, ip_hdrlen(oldskb), IPPROTO_TCP))
- return;
- oiph = ip_hdr(oldskb);
-
- nskb = alloc_skb(sizeof(struct iphdr) + sizeof(struct tcphdr) +
- LL_MAX_HEADER, GFP_ATOMIC);
- if (!nskb)
- return;
-
- skb_reserve(nskb, LL_MAX_HEADER);
-
- skb_reset_network_header(nskb);
- niph = (struct iphdr *)skb_put(nskb, sizeof(struct iphdr));
- niph->version = 4;
- niph->ihl = sizeof(struct iphdr) / 4;
- niph->tos = 0;
- niph->id = 0;
- niph->frag_off = htons(IP_DF);
- niph->protocol = IPPROTO_TCP;
- niph->check = 0;
- niph->saddr = oiph->daddr;
- niph->daddr = oiph->saddr;
-
- tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr));
- memset(tcph, 0, sizeof(*tcph));
- tcph->source = oth->dest;
- tcph->dest = oth->source;
- tcph->doff = sizeof(struct tcphdr) / 4;
-
- if (oth->ack)
- tcph->seq = oth->ack_seq;
- else {
- tcph->ack_seq = htonl(ntohl(oth->seq) + oth->syn + oth->fin +
- oldskb->len - ip_hdrlen(oldskb) -
- (oth->doff << 2));
- tcph->ack = 1;
- }
-
- tcph->rst = 1;
- tcph->check = ~tcp_v4_check(sizeof(struct tcphdr), niph->saddr,
- niph->daddr, 0);
- nskb->ip_summed = CHECKSUM_PARTIAL;
- nskb->csum_start = (unsigned char *)tcph - nskb->head;
- nskb->csum_offset = offsetof(struct tcphdr, check);
-
- /* ip_route_me_harder expects skb->dst to be set */
- skb_dst_set_noref(nskb, skb_dst(oldskb));
-
- nskb->protocol = htons(ETH_P_IP);
- if (ip_route_me_harder(nskb, RTN_UNSPEC))
- goto free_nskb;
-
- niph->ttl = ip4_dst_hoplimit(skb_dst(nskb));
-
- /* "Never happens" */
- if (nskb->len > dst_mtu(skb_dst(nskb)))
- goto free_nskb;
-
- nf_ct_attach(nskb, oldskb);
-
- ip_local_out(nskb);
- return;
-
- free_nskb:
- kfree_skb(nskb);
-}
-
-static inline void send_unreach(struct sk_buff *skb_in, int code)
-{
- icmp_send(skb_in, ICMP_DEST_UNREACH, code, 0);
-}
-
static unsigned int
reject_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
@@ -137,28 +37,28 @@ reject_tg(struct sk_buff *skb, const struct xt_action_param *par)
switch (reject->with) {
case IPT_ICMP_NET_UNREACHABLE:
- send_unreach(skb, ICMP_NET_UNREACH);
+ nf_send_unreach(skb, ICMP_NET_UNREACH);
break;
case IPT_ICMP_HOST_UNREACHABLE:
- send_unreach(skb, ICMP_HOST_UNREACH);
+ nf_send_unreach(skb, ICMP_HOST_UNREACH);
break;
case IPT_ICMP_PROT_UNREACHABLE:
- send_unreach(skb, ICMP_PROT_UNREACH);
+ nf_send_unreach(skb, ICMP_PROT_UNREACH);
break;
case IPT_ICMP_PORT_UNREACHABLE:
- send_unreach(skb, ICMP_PORT_UNREACH);
+ nf_send_unreach(skb, ICMP_PORT_UNREACH);
break;
case IPT_ICMP_NET_PROHIBITED:
- send_unreach(skb, ICMP_NET_ANO);
+ nf_send_unreach(skb, ICMP_NET_ANO);
break;
case IPT_ICMP_HOST_PROHIBITED:
- send_unreach(skb, ICMP_HOST_ANO);
+ nf_send_unreach(skb, ICMP_HOST_ANO);
break;
case IPT_ICMP_ADMIN_PROHIBITED:
- send_unreach(skb, ICMP_PKT_FILTERED);
+ nf_send_unreach(skb, ICMP_PKT_FILTERED);
break;
case IPT_TCP_RESET:
- send_reset(skb, par->hooknum);
+ nf_send_reset(skb, par->hooknum);
case IPT_ICMP_ECHOREPLY:
/* Doesn't happen. */
break;
diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c
new file mode 100644
index 00000000000..a313c3fbeb4
--- /dev/null
+++ b/net/ipv4/netfilter/ipt_SYNPROXY.c
@@ -0,0 +1,482 @@
+/*
+ * Copyright (c) 2013 Patrick McHardy <kaber@trash.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <net/tcp.h>
+
+#include <linux/netfilter_ipv4/ip_tables.h>
+#include <linux/netfilter/x_tables.h>
+#include <linux/netfilter/xt_SYNPROXY.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
+#include <net/netfilter/nf_conntrack_synproxy.h>
+
+static struct iphdr *
+synproxy_build_ip(struct sk_buff *skb, u32 saddr, u32 daddr)
+{
+ struct iphdr *iph;
+
+ skb_reset_network_header(skb);
+ iph = (struct iphdr *)skb_put(skb, sizeof(*iph));
+ iph->version = 4;
+ iph->ihl = sizeof(*iph) / 4;
+ iph->tos = 0;
+ iph->id = 0;
+ iph->frag_off = htons(IP_DF);
+ iph->ttl = sysctl_ip_default_ttl;
+ iph->protocol = IPPROTO_TCP;
+ iph->check = 0;
+ iph->saddr = saddr;
+ iph->daddr = daddr;
+
+ return iph;
+}
+
+static void
+synproxy_send_tcp(const struct sk_buff *skb, struct sk_buff *nskb,
+ struct nf_conntrack *nfct, enum ip_conntrack_info ctinfo,
+ struct iphdr *niph, struct tcphdr *nth,
+ unsigned int tcp_hdr_size)
+{
+ nth->check = ~tcp_v4_check(tcp_hdr_size, niph->saddr, niph->daddr, 0);
+ nskb->ip_summed = CHECKSUM_PARTIAL;
+ nskb->csum_start = (unsigned char *)nth - nskb->head;
+ nskb->csum_offset = offsetof(struct tcphdr, check);
+
+ skb_dst_set_noref(nskb, skb_dst(skb));
+ nskb->protocol = htons(ETH_P_IP);
+ if (ip_route_me_harder(nskb, RTN_UNSPEC))
+ goto free_nskb;
+
+ if (nfct) {
+ nskb->nfct = nfct;
+ nskb->nfctinfo = ctinfo;
+ nf_conntrack_get(nfct);
+ }
+
+ ip_local_out(nskb);
+ return;
+
+free_nskb:
+ kfree_skb(nskb);
+}
+
+static void
+synproxy_send_client_synack(const struct sk_buff *skb, const struct tcphdr *th,
+ const struct synproxy_options *opts)
+{
+ struct sk_buff *nskb;
+ struct iphdr *iph, *niph;
+ struct tcphdr *nth;
+ unsigned int tcp_hdr_size;
+ u16 mss = opts->mss;
+
+ iph = ip_hdr(skb);
+
+ tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
+ nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
+ GFP_ATOMIC);
+ if (nskb == NULL)
+ return;
+ skb_reserve(nskb, MAX_TCP_HEADER);
+
+ niph = synproxy_build_ip(nskb, iph->daddr, iph->saddr);
+
+ skb_reset_transport_header(nskb);
+ nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
+ nth->source = th->dest;
+ nth->dest = th->source;
+ nth->seq = htonl(__cookie_v4_init_sequence(iph, th, &mss));
+ nth->ack_seq = htonl(ntohl(th->seq) + 1);
+ tcp_flag_word(nth) = TCP_FLAG_SYN | TCP_FLAG_ACK;
+ if (opts->options & XT_SYNPROXY_OPT_ECN)
+ tcp_flag_word(nth) |= TCP_FLAG_ECE;
+ nth->doff = tcp_hdr_size / 4;
+ nth->window = 0;
+ nth->check = 0;
+ nth->urg_ptr = 0;
+
+ synproxy_build_options(nth, opts);
+
+ synproxy_send_tcp(skb, nskb, skb->nfct, IP_CT_ESTABLISHED_REPLY,
+ niph, nth, tcp_hdr_size);
+}
+
+static void
+synproxy_send_server_syn(const struct synproxy_net *snet,
+ const struct sk_buff *skb, const struct tcphdr *th,
+ const struct synproxy_options *opts, u32 recv_seq)
+{
+ struct sk_buff *nskb;
+ struct iphdr *iph, *niph;
+ struct tcphdr *nth;
+ unsigned int tcp_hdr_size;
+
+ iph = ip_hdr(skb);
+
+ tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
+ nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
+ GFP_ATOMIC);
+ if (nskb == NULL)
+ return;
+ skb_reserve(nskb, MAX_TCP_HEADER);
+
+ niph = synproxy_build_ip(nskb, iph->saddr, iph->daddr);
+
+ skb_reset_transport_header(nskb);
+ nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
+ nth->source = th->source;
+ nth->dest = th->dest;
+ nth->seq = htonl(recv_seq - 1);
+ /* ack_seq is used to relay our ISN to the synproxy hook to initialize
+ * sequence number translation once a connection tracking entry exists.
+ */
+ nth->ack_seq = htonl(ntohl(th->ack_seq) - 1);
+ tcp_flag_word(nth) = TCP_FLAG_SYN;
+ if (opts->options & XT_SYNPROXY_OPT_ECN)
+ tcp_flag_word(nth) |= TCP_FLAG_ECE | TCP_FLAG_CWR;
+ nth->doff = tcp_hdr_size / 4;
+ nth->window = th->window;
+ nth->check = 0;
+ nth->urg_ptr = 0;
+
+ synproxy_build_options(nth, opts);
+
+ synproxy_send_tcp(skb, nskb, &snet->tmpl->ct_general, IP_CT_NEW,
+ niph, nth, tcp_hdr_size);
+}
+
+static void
+synproxy_send_server_ack(const struct synproxy_net *snet,
+ const struct ip_ct_tcp *state,
+ const struct sk_buff *skb, const struct tcphdr *th,
+ const struct synproxy_options *opts)
+{
+ struct sk_buff *nskb;
+ struct iphdr *iph, *niph;
+ struct tcphdr *nth;
+ unsigned int tcp_hdr_size;
+
+ iph = ip_hdr(skb);
+
+ tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
+ nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
+ GFP_ATOMIC);
+ if (nskb == NULL)
+ return;
+ skb_reserve(nskb, MAX_TCP_HEADER);
+
+ niph = synproxy_build_ip(nskb, iph->daddr, iph->saddr);
+
+ skb_reset_transport_header(nskb);
+ nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
+ nth->source = th->dest;
+ nth->dest = th->source;
+ nth->seq = htonl(ntohl(th->ack_seq));
+ nth->ack_seq = htonl(ntohl(th->seq) + 1);
+ tcp_flag_word(nth) = TCP_FLAG_ACK;
+ nth->doff = tcp_hdr_size / 4;
+ nth->window = htons(state->seen[IP_CT_DIR_ORIGINAL].td_maxwin);
+ nth->check = 0;
+ nth->urg_ptr = 0;
+
+ synproxy_build_options(nth, opts);
+
+ synproxy_send_tcp(skb, nskb, NULL, 0, niph, nth, tcp_hdr_size);
+}
+
+static void
+synproxy_send_client_ack(const struct synproxy_net *snet,
+ const struct sk_buff *skb, const struct tcphdr *th,
+ const struct synproxy_options *opts)
+{
+ struct sk_buff *nskb;
+ struct iphdr *iph, *niph;
+ struct tcphdr *nth;
+ unsigned int tcp_hdr_size;
+
+ iph = ip_hdr(skb);
+
+ tcp_hdr_size = sizeof(*nth) + synproxy_options_size(opts);
+ nskb = alloc_skb(sizeof(*niph) + tcp_hdr_size + MAX_TCP_HEADER,
+ GFP_ATOMIC);
+ if (nskb == NULL)
+ return;
+ skb_reserve(nskb, MAX_TCP_HEADER);
+
+ niph = synproxy_build_ip(nskb, iph->saddr, iph->daddr);
+
+ skb_reset_transport_header(nskb);
+ nth = (struct tcphdr *)skb_put(nskb, tcp_hdr_size);
+ nth->source = th->source;
+ nth->dest = th->dest;
+ nth->seq = htonl(ntohl(th->seq) + 1);
+ nth->ack_seq = th->ack_seq;
+ tcp_flag_word(nth) = TCP_FLAG_ACK;
+ nth->doff = tcp_hdr_size / 4;
+ nth->window = ntohs(htons(th->window) >> opts->wscale);
+ nth->check = 0;
+ nth->urg_ptr = 0;
+
+ synproxy_build_options(nth, opts);
+
+ synproxy_send_tcp(skb, nskb, NULL, 0, niph, nth, tcp_hdr_size);
+}
+
+static bool
+synproxy_recv_client_ack(const struct synproxy_net *snet,
+ const struct sk_buff *skb, const struct tcphdr *th,
+ struct synproxy_options *opts, u32 recv_seq)
+{
+ int mss;
+
+ mss = __cookie_v4_check(ip_hdr(skb), th, ntohl(th->ack_seq) - 1);
+ if (mss == 0) {
+ this_cpu_inc(snet->stats->cookie_invalid);
+ return false;
+ }
+
+ this_cpu_inc(snet->stats->cookie_valid);
+ opts->mss = mss;
+ opts->options |= XT_SYNPROXY_OPT_MSS;
+
+ if (opts->options & XT_SYNPROXY_OPT_TIMESTAMP)
+ synproxy_check_timestamp_cookie(opts);
+
+ synproxy_send_server_syn(snet, skb, th, opts, recv_seq);
+ return true;
+}
+
+static unsigned int
+synproxy_tg4(struct sk_buff *skb, const struct xt_action_param *par)
+{
+ const struct xt_synproxy_info *info = par->targinfo;
+ struct synproxy_net *snet = synproxy_pernet(dev_net(par->in));
+ struct synproxy_options opts = {};
+ struct tcphdr *th, _th;
+
+ if (nf_ip_checksum(skb, par->hooknum, par->thoff, IPPROTO_TCP))
+ return NF_DROP;
+
+ th = skb_header_pointer(skb, par->thoff, sizeof(_th), &_th);
+ if (th == NULL)
+ return NF_DROP;
+
+ if (!synproxy_parse_options(skb, par->thoff, th, &opts))
+ return NF_DROP;
+
+ if (th->syn && !(th->ack || th->fin || th->rst)) {
+ /* Initial SYN from client */
+ this_cpu_inc(snet->stats->syn_received);
+
+ if (th->ece && th->cwr)
+ opts.options |= XT_SYNPROXY_OPT_ECN;
+
+ opts.options &= info->options;
+ if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
+ synproxy_init_timestamp_cookie(info, &opts);
+ else
+ opts.options &= ~(XT_SYNPROXY_OPT_WSCALE |
+ XT_SYNPROXY_OPT_SACK_PERM |
+ XT_SYNPROXY_OPT_ECN);
+
+ synproxy_send_client_synack(skb, th, &opts);
+ return NF_DROP;
+
+ } else if (th->ack && !(th->fin || th->rst || th->syn)) {
+ /* ACK from client */
+ synproxy_recv_client_ack(snet, skb, th, &opts, ntohl(th->seq));
+ return NF_DROP;
+ }
+
+ return XT_CONTINUE;
+}
+
+static unsigned int ipv4_synproxy_hook(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ struct synproxy_net *snet = synproxy_pernet(dev_net(in ? : out));
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct;
+ struct nf_conn_synproxy *synproxy;
+ struct synproxy_options opts = {};
+ const struct ip_ct_tcp *state;
+ struct tcphdr *th, _th;
+ unsigned int thoff;
+
+ ct = nf_ct_get(skb, &ctinfo);
+ if (ct == NULL)
+ return NF_ACCEPT;
+
+ synproxy = nfct_synproxy(ct);
+ if (synproxy == NULL)
+ return NF_ACCEPT;
+
+ if (nf_is_loopback_packet(skb))
+ return NF_ACCEPT;
+
+ thoff = ip_hdrlen(skb);
+ th = skb_header_pointer(skb, thoff, sizeof(_th), &_th);
+ if (th == NULL)
+ return NF_DROP;
+
+ state = &ct->proto.tcp;
+ switch (state->state) {
+ case TCP_CONNTRACK_CLOSE:
+ if (th->rst && !test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
+ nf_ct_seqadj_init(ct, ctinfo, synproxy->isn -
+ ntohl(th->seq) + 1);
+ break;
+ }
+
+ if (!th->syn || th->ack ||
+ CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
+ break;
+
+ /* Reopened connection - reset the sequence number and timestamp
+ * adjustments, they will get initialized once the connection is
+ * reestablished.
+ */
+ nf_ct_seqadj_init(ct, ctinfo, 0);
+ synproxy->tsoff = 0;
+ this_cpu_inc(snet->stats->conn_reopened);
+
+ /* fall through */
+ case TCP_CONNTRACK_SYN_SENT:
+ if (!synproxy_parse_options(skb, thoff, th, &opts))
+ return NF_DROP;
+
+ if (!th->syn && th->ack &&
+ CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) {
+ /* Keep-Alives are sent with SEG.SEQ = SND.NXT-1,
+ * therefore we need to add 1 to make the SYN sequence
+ * number match the one of first SYN.
+ */
+ if (synproxy_recv_client_ack(snet, skb, th, &opts,
+ ntohl(th->seq) + 1))
+ this_cpu_inc(snet->stats->cookie_retrans);
+
+ return NF_DROP;
+ }
+
+ synproxy->isn = ntohl(th->ack_seq);
+ if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
+ synproxy->its = opts.tsecr;
+ break;
+ case TCP_CONNTRACK_SYN_RECV:
+ if (!th->syn || !th->ack)
+ break;
+
+ if (!synproxy_parse_options(skb, thoff, th, &opts))
+ return NF_DROP;
+
+ if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
+ synproxy->tsoff = opts.tsval - synproxy->its;
+
+ opts.options &= ~(XT_SYNPROXY_OPT_MSS |
+ XT_SYNPROXY_OPT_WSCALE |
+ XT_SYNPROXY_OPT_SACK_PERM);
+
+ swap(opts.tsval, opts.tsecr);
+ synproxy_send_server_ack(snet, state, skb, th, &opts);
+
+ nf_ct_seqadj_init(ct, ctinfo, synproxy->isn - ntohl(th->seq));
+
+ swap(opts.tsval, opts.tsecr);
+ synproxy_send_client_ack(snet, skb, th, &opts);
+
+ consume_skb(skb);
+ return NF_STOLEN;
+ default:
+ break;
+ }
+
+ synproxy_tstamp_adjust(skb, thoff, th, ct, ctinfo, synproxy);
+ return NF_ACCEPT;
+}
+
+static int synproxy_tg4_check(const struct xt_tgchk_param *par)
+{
+ const struct ipt_entry *e = par->entryinfo;
+
+ if (e->ip.proto != IPPROTO_TCP ||
+ e->ip.invflags & XT_INV_PROTO)
+ return -EINVAL;
+
+ return nf_ct_l3proto_try_module_get(par->family);
+}
+
+static void synproxy_tg4_destroy(const struct xt_tgdtor_param *par)
+{
+ nf_ct_l3proto_module_put(par->family);
+}
+
+static struct xt_target synproxy_tg4_reg __read_mostly = {
+ .name = "SYNPROXY",
+ .family = NFPROTO_IPV4,
+ .hooks = (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_FORWARD),
+ .target = synproxy_tg4,
+ .targetsize = sizeof(struct xt_synproxy_info),
+ .checkentry = synproxy_tg4_check,
+ .destroy = synproxy_tg4_destroy,
+ .me = THIS_MODULE,
+};
+
+static struct nf_hook_ops ipv4_synproxy_ops[] __read_mostly = {
+ {
+ .hook = ipv4_synproxy_hook,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV4,
+ .hooknum = NF_INET_LOCAL_IN,
+ .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1,
+ },
+ {
+ .hook = ipv4_synproxy_hook,
+ .owner = THIS_MODULE,
+ .pf = NFPROTO_IPV4,
+ .hooknum = NF_INET_POST_ROUTING,
+ .priority = NF_IP_PRI_CONNTRACK_CONFIRM - 1,
+ },
+};
+
+static int __init synproxy_tg4_init(void)
+{
+ int err;
+
+ err = nf_register_hooks(ipv4_synproxy_ops,
+ ARRAY_SIZE(ipv4_synproxy_ops));
+ if (err < 0)
+ goto err1;
+
+ err = xt_register_target(&synproxy_tg4_reg);
+ if (err < 0)
+ goto err2;
+
+ return 0;
+
+err2:
+ nf_unregister_hooks(ipv4_synproxy_ops, ARRAY_SIZE(ipv4_synproxy_ops));
+err1:
+ return err;
+}
+
+static void __exit synproxy_tg4_exit(void)
+{
+ xt_unregister_target(&synproxy_tg4_reg);
+ nf_unregister_hooks(ipv4_synproxy_ops, ARRAY_SIZE(ipv4_synproxy_ops));
+}
+
+module_init(synproxy_tg4_init);
+module_exit(synproxy_tg4_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
diff --git a/net/ipv4/netfilter/ipt_ULOG.c b/net/ipv4/netfilter/ipt_ULOG.c
index b5ef3cba225..9cb993cd224 100644
--- a/net/ipv4/netfilter/ipt_ULOG.c
+++ b/net/ipv4/netfilter/ipt_ULOG.c
@@ -4,6 +4,7 @@
* (C) 2000-2004 by Harald Welte <laforge@netfilter.org>
* (C) 1999-2001 Paul `Rusty' Russell
* (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2005-2007 Patrick McHardy <kaber@trash.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -37,7 +38,7 @@
#include <linux/skbuff.h>
#include <linux/kernel.h>
#include <linux/timer.h>
-#include <linux/netlink.h>
+#include <net/netlink.h>
#include <linux/netdevice.h>
#include <linux/mm.h>
#include <linux/moduleparam.h>
@@ -45,6 +46,7 @@
#include <linux/netfilter/x_tables.h>
#include <linux/netfilter_ipv4/ipt_ULOG.h>
#include <net/netfilter/nf_log.h>
+#include <net/netns/generic.h>
#include <net/sock.h>
#include <linux/bitops.h>
#include <asm/unaligned.h>
@@ -78,20 +80,26 @@ typedef struct {
struct timer_list timer; /* the timer function */
} ulog_buff_t;
-static ulog_buff_t ulog_buffers[ULOG_MAXNLGROUPS]; /* array of buffers */
+static int ulog_net_id __read_mostly;
+struct ulog_net {
+ unsigned int nlgroup[ULOG_MAXNLGROUPS];
+ ulog_buff_t ulog_buffers[ULOG_MAXNLGROUPS];
+ struct sock *nflognl;
+ spinlock_t lock;
+};
-static struct sock *nflognl; /* our socket */
-static DEFINE_SPINLOCK(ulog_lock); /* spinlock */
+static struct ulog_net *ulog_pernet(struct net *net)
+{
+ return net_generic(net, ulog_net_id);
+}
/* send one ulog_buff_t to userspace */
-static void ulog_send(unsigned int nlgroupnum)
+static void ulog_send(struct ulog_net *ulog, unsigned int nlgroupnum)
{
- ulog_buff_t *ub = &ulog_buffers[nlgroupnum];
+ ulog_buff_t *ub = &ulog->ulog_buffers[nlgroupnum];
- if (timer_pending(&ub->timer)) {
- pr_debug("ulog_send: timer was pending, deleting\n");
- del_timer(&ub->timer);
- }
+ pr_debug("ulog_send: timer is deleting\n");
+ del_timer(&ub->timer);
if (!ub->skb) {
pr_debug("ulog_send: nothing to send\n");
@@ -105,7 +113,8 @@ static void ulog_send(unsigned int nlgroupnum)
NETLINK_CB(ub->skb).dst_group = nlgroupnum + 1;
pr_debug("throwing %d packets to netlink group %u\n",
ub->qlen, nlgroupnum + 1);
- netlink_broadcast(nflognl, ub->skb, 0, nlgroupnum + 1, GFP_ATOMIC);
+ netlink_broadcast(ulog->nflognl, ub->skb, 0, nlgroupnum + 1,
+ GFP_ATOMIC);
ub->qlen = 0;
ub->skb = NULL;
@@ -116,13 +125,17 @@ static void ulog_send(unsigned int nlgroupnum)
/* timer function to flush queue in flushtimeout time */
static void ulog_timer(unsigned long data)
{
+ unsigned int groupnum = *((unsigned int *)data);
+ struct ulog_net *ulog = container_of((void *)data,
+ struct ulog_net,
+ nlgroup[groupnum]);
pr_debug("timer function called, calling ulog_send\n");
/* lock to protect against somebody modifying our structure
* from ipt_ulog_target at the same time */
- spin_lock_bh(&ulog_lock);
- ulog_send(data);
- spin_unlock_bh(&ulog_lock);
+ spin_lock_bh(&ulog->lock);
+ ulog_send(ulog, groupnum);
+ spin_unlock_bh(&ulog->lock);
}
static struct sk_buff *ulog_alloc_skb(unsigned int size)
@@ -150,7 +163,8 @@ static struct sk_buff *ulog_alloc_skb(unsigned int size)
return skb;
}
-static void ipt_ulog_packet(unsigned int hooknum,
+static void ipt_ulog_packet(struct net *net,
+ unsigned int hooknum,
const struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
@@ -162,6 +176,7 @@ static void ipt_ulog_packet(unsigned int hooknum,
size_t size, copy_len;
struct nlmsghdr *nlh;
struct timeval tv;
+ struct ulog_net *ulog = ulog_pernet(net);
/* ffs == find first bit set, necessary because userspace
* is already shifting groupnumber, but we need unshifted.
@@ -174,11 +189,11 @@ static void ipt_ulog_packet(unsigned int hooknum,
else
copy_len = loginfo->copy_range;
- size = NLMSG_SPACE(sizeof(*pm) + copy_len);
+ size = nlmsg_total_size(sizeof(*pm) + copy_len);
- ub = &ulog_buffers[groupnum];
+ ub = &ulog->ulog_buffers[groupnum];
- spin_lock_bh(&ulog_lock);
+ spin_lock_bh(&ulog->lock);
if (!ub->skb) {
if (!(ub->skb = ulog_alloc_skb(size)))
@@ -188,7 +203,7 @@ static void ipt_ulog_packet(unsigned int hooknum,
/* either the queue len is too high or we don't have
* enough room in nlskb left. send it to userspace. */
- ulog_send(groupnum);
+ ulog_send(ulog, groupnum);
if (!(ub->skb = ulog_alloc_skb(size)))
goto alloc_failure;
@@ -205,6 +220,7 @@ static void ipt_ulog_packet(unsigned int hooknum,
ub->qlen++;
pm = nlmsg_data(nlh);
+ memset(pm, 0, sizeof(*pm));
/* We might not have a timestamp, get one */
if (skb->tstamp.tv64 == 0)
@@ -217,12 +233,12 @@ static void ipt_ulog_packet(unsigned int hooknum,
put_unaligned(tv.tv_usec, &pm->timestamp_usec);
put_unaligned(skb->mark, &pm->mark);
pm->hook = hooknum;
- if (prefix != NULL)
- strncpy(pm->prefix, prefix, sizeof(pm->prefix));
+ if (prefix != NULL) {
+ strncpy(pm->prefix, prefix, sizeof(pm->prefix) - 1);
+ pm->prefix[sizeof(pm->prefix) - 1] = '\0';
+ }
else if (loginfo->prefix[0] != '\0')
strncpy(pm->prefix, loginfo->prefix, sizeof(pm->prefix));
- else
- *(pm->prefix) = '\0';
if (in && in->hard_header_len > 0 &&
skb->mac_header != skb->network_header &&
@@ -234,13 +250,9 @@ static void ipt_ulog_packet(unsigned int hooknum,
if (in)
strncpy(pm->indev_name, in->name, sizeof(pm->indev_name));
- else
- pm->indev_name[0] = '\0';
if (out)
strncpy(pm->outdev_name, out->name, sizeof(pm->outdev_name));
- else
- pm->outdev_name[0] = '\0';
/* copy_len <= skb->len, so can't fail. */
if (skb_copy_bits(skb, 0, pm->payload, copy_len) < 0)
@@ -262,27 +274,30 @@ static void ipt_ulog_packet(unsigned int hooknum,
if (ub->qlen >= loginfo->qthreshold) {
if (loginfo->qthreshold > 1)
nlh->nlmsg_type = NLMSG_DONE;
- ulog_send(groupnum);
+ ulog_send(ulog, groupnum);
}
out_unlock:
- spin_unlock_bh(&ulog_lock);
+ spin_unlock_bh(&ulog->lock);
return;
alloc_failure:
pr_debug("Error building netlink message\n");
- spin_unlock_bh(&ulog_lock);
+ spin_unlock_bh(&ulog->lock);
}
static unsigned int
ulog_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
- ipt_ulog_packet(par->hooknum, skb, par->in, par->out,
+ struct net *net = dev_net(par->in ? par->in : par->out);
+
+ ipt_ulog_packet(net, par->hooknum, skb, par->in, par->out,
par->targinfo, NULL);
return XT_CONTINUE;
}
-static void ipt_logfn(u_int8_t pf,
+static void ipt_logfn(struct net *net,
+ u_int8_t pf,
unsigned int hooknum,
const struct sk_buff *skb,
const struct net_device *in,
@@ -304,13 +319,19 @@ static void ipt_logfn(u_int8_t pf,
strlcpy(loginfo.prefix, prefix, sizeof(loginfo.prefix));
}
- ipt_ulog_packet(hooknum, skb, in, out, &loginfo, prefix);
+ ipt_ulog_packet(net, hooknum, skb, in, out, &loginfo, prefix);
}
static int ulog_tg_check(const struct xt_tgchk_param *par)
{
const struct ipt_ulog_info *loginfo = par->targinfo;
+ if (!par->net->xt.ulog_warn_deprecated) {
+ pr_info("ULOG is deprecated and it will be removed soon, "
+ "use NFLOG instead\n");
+ par->net->xt.ulog_warn_deprecated = true;
+ }
+
if (loginfo->prefix[sizeof(loginfo->prefix) - 1] != '\0') {
pr_debug("prefix not null-terminated\n");
return -EINVAL;
@@ -378,58 +399,48 @@ static struct nf_logger ipt_ulog_logger __read_mostly = {
.me = THIS_MODULE,
};
-static int __init ulog_tg_init(void)
+static int __net_init ulog_tg_net_init(struct net *net)
{
- int ret, i;
+ int i;
+ struct ulog_net *ulog = ulog_pernet(net);
struct netlink_kernel_cfg cfg = {
.groups = ULOG_MAXNLGROUPS,
};
- pr_debug("init module\n");
-
- if (nlbufsiz > 128*1024) {
- pr_warning("Netlink buffer has to be <= 128kB\n");
- return -EINVAL;
- }
-
+ spin_lock_init(&ulog->lock);
/* initialize ulog_buffers */
- for (i = 0; i < ULOG_MAXNLGROUPS; i++)
- setup_timer(&ulog_buffers[i].timer, ulog_timer, i);
+ for (i = 0; i < ULOG_MAXNLGROUPS; i++) {
+ ulog->nlgroup[i] = i;
+ setup_timer(&ulog->ulog_buffers[i].timer, ulog_timer,
+ (unsigned long)&ulog->nlgroup[i]);
+ }
- nflognl = netlink_kernel_create(&init_net, NETLINK_NFLOG, &cfg);
- if (!nflognl)
+ ulog->nflognl = netlink_kernel_create(net, NETLINK_NFLOG, &cfg);
+ if (!ulog->nflognl)
return -ENOMEM;
- ret = xt_register_target(&ulog_tg_reg);
- if (ret < 0) {
- netlink_kernel_release(nflognl);
- return ret;
- }
if (nflog)
- nf_log_register(NFPROTO_IPV4, &ipt_ulog_logger);
+ nf_log_set(net, NFPROTO_IPV4, &ipt_ulog_logger);
return 0;
}
-static void __exit ulog_tg_exit(void)
+static void __net_exit ulog_tg_net_exit(struct net *net)
{
ulog_buff_t *ub;
int i;
-
- pr_debug("cleanup_module\n");
+ struct ulog_net *ulog = ulog_pernet(net);
if (nflog)
- nf_log_unregister(&ipt_ulog_logger);
- xt_unregister_target(&ulog_tg_reg);
- netlink_kernel_release(nflognl);
+ nf_log_unset(net, &ipt_ulog_logger);
+
+ netlink_kernel_release(ulog->nflognl);
/* remove pending timers and free allocated skb's */
for (i = 0; i < ULOG_MAXNLGROUPS; i++) {
- ub = &ulog_buffers[i];
- if (timer_pending(&ub->timer)) {
- pr_debug("timer was pending, deleting\n");
- del_timer(&ub->timer);
- }
+ ub = &ulog->ulog_buffers[i];
+ pr_debug("timer is deleting\n");
+ del_timer(&ub->timer);
if (ub->skb) {
kfree_skb(ub->skb);
@@ -438,5 +449,50 @@ static void __exit ulog_tg_exit(void)
}
}
+static struct pernet_operations ulog_tg_net_ops = {
+ .init = ulog_tg_net_init,
+ .exit = ulog_tg_net_exit,
+ .id = &ulog_net_id,
+ .size = sizeof(struct ulog_net),
+};
+
+static int __init ulog_tg_init(void)
+{
+ int ret;
+ pr_debug("init module\n");
+
+ if (nlbufsiz > 128*1024) {
+ pr_warn("Netlink buffer has to be <= 128kB\n");
+ return -EINVAL;
+ }
+
+ ret = register_pernet_subsys(&ulog_tg_net_ops);
+ if (ret)
+ goto out_pernet;
+
+ ret = xt_register_target(&ulog_tg_reg);
+ if (ret < 0)
+ goto out_target;
+
+ if (nflog)
+ nf_log_register(NFPROTO_IPV4, &ipt_ulog_logger);
+
+ return 0;
+
+out_target:
+ unregister_pernet_subsys(&ulog_tg_net_ops);
+out_pernet:
+ return ret;
+}
+
+static void __exit ulog_tg_exit(void)
+{
+ pr_debug("cleanup_module\n");
+ if (nflog)
+ nf_log_unregister(&ipt_ulog_logger);
+ xt_unregister_target(&ulog_tg_reg);
+ unregister_pernet_subsys(&ulog_tg_net_ops);
+}
+
module_init(ulog_tg_init);
module_exit(ulog_tg_exit);
diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c
index c30130062cd..4bfaedf9b34 100644
--- a/net/ipv4/netfilter/ipt_rpfilter.c
+++ b/net/ipv4/netfilter/ipt_rpfilter.c
@@ -66,6 +66,12 @@ static bool rpfilter_lookup_reverse(struct flowi4 *fl4,
return dev_match;
}
+static bool rpfilter_is_local(const struct sk_buff *skb)
+{
+ const struct rtable *rt = skb_rtable(skb);
+ return rt && (rt->rt_flags & RTCF_LOCAL);
+}
+
static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
{
const struct xt_rpfilter_info *info;
@@ -76,18 +82,15 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par)
info = par->matchinfo;
invert = info->flags & XT_RPFILTER_INVERT;
- if (par->in->flags & IFF_LOOPBACK)
+ if (rpfilter_is_local(skb))
return true ^ invert;
iph = ip_hdr(skb);
if (ipv4_is_multicast(iph->daddr)) {
if (ipv4_is_zeronet(iph->saddr))
return ipv4_is_local_multicast(iph->daddr) ^ invert;
- flow.flowi4_iif = 0;
- } else {
- flow.flowi4_iif = LOOPBACK_IFINDEX;
}
-
+ flow.flowi4_iif = LOOPBACK_IFINDEX;
flow.daddr = iph->saddr;
flow.saddr = rpfilter_get_saddr(iph->daddr);
flow.flowi4_oif = 0;
diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c
index 6b3da5cf54e..e08a74a243a 100644
--- a/net/ipv4/netfilter/iptable_filter.c
+++ b/net/ipv4/netfilter/iptable_filter.c
@@ -33,20 +33,21 @@ static const struct xt_table packet_filter = {
};
static unsigned int
-iptable_filter_hook(unsigned int hook, struct sk_buff *skb,
+iptable_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
const struct net *net;
- if (hook == NF_INET_LOCAL_OUT &&
+ if (ops->hooknum == NF_INET_LOCAL_OUT &&
(skb->len < sizeof(struct iphdr) ||
ip_hdrlen(skb) < sizeof(struct iphdr)))
/* root is playing with raw sockets. */
return NF_ACCEPT;
net = dev_net((in != NULL) ? in : out);
- return ipt_do_table(skb, hook, in, out, net->ipv4.iptable_filter);
+ return ipt_do_table(skb, ops->hooknum, in, out,
+ net->ipv4.iptable_filter);
}
static struct nf_hook_ops *filter_ops __read_mostly;
@@ -69,7 +70,7 @@ static int __net_init iptable_filter_net_init(struct net *net)
net->ipv4.iptable_filter =
ipt_register_table(net, &packet_filter, repl);
kfree(repl);
- return PTR_RET(net->ipv4.iptable_filter);
+ return PTR_ERR_OR_ZERO(net->ipv4.iptable_filter);
}
static void __net_exit iptable_filter_net_exit(struct net *net)
diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c
index 85d88f20644..6a5079c34bb 100644
--- a/net/ipv4/netfilter/iptable_mangle.c
+++ b/net/ipv4/netfilter/iptable_mangle.c
@@ -44,6 +44,7 @@ ipt_mangle_out(struct sk_buff *skb, const struct net_device *out)
u_int8_t tos;
__be32 saddr, daddr;
u_int32_t mark;
+ int err;
/* root is playing with raw sockets. */
if (skb->len < sizeof(struct iphdr) ||
@@ -66,9 +67,11 @@ ipt_mangle_out(struct sk_buff *skb, const struct net_device *out)
if (iph->saddr != saddr ||
iph->daddr != daddr ||
skb->mark != mark ||
- iph->tos != tos)
- if (ip_route_me_harder(skb, RTN_UNSPEC))
- ret = NF_DROP;
+ iph->tos != tos) {
+ err = ip_route_me_harder(skb, RTN_UNSPEC);
+ if (err < 0)
+ ret = NF_DROP_ERR(err);
+ }
}
return ret;
@@ -76,19 +79,19 @@ ipt_mangle_out(struct sk_buff *skb, const struct net_device *out)
/* The work comes in here from netfilter.c. */
static unsigned int
-iptable_mangle_hook(unsigned int hook,
+iptable_mangle_hook(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- if (hook == NF_INET_LOCAL_OUT)
+ if (ops->hooknum == NF_INET_LOCAL_OUT)
return ipt_mangle_out(skb, out);
- if (hook == NF_INET_POST_ROUTING)
- return ipt_do_table(skb, hook, in, out,
+ if (ops->hooknum == NF_INET_POST_ROUTING)
+ return ipt_do_table(skb, ops->hooknum, in, out,
dev_net(out)->ipv4.iptable_mangle);
/* PREROUTING/INPUT/FORWARD: */
- return ipt_do_table(skb, hook, in, out,
+ return ipt_do_table(skb, ops->hooknum, in, out,
dev_net(in)->ipv4.iptable_mangle);
}
@@ -104,7 +107,7 @@ static int __net_init iptable_mangle_net_init(struct net *net)
net->ipv4.iptable_mangle =
ipt_register_table(net, &packet_mangler, repl);
kfree(repl);
- return PTR_RET(net->ipv4.iptable_mangle);
+ return PTR_ERR_OR_ZERO(net->ipv4.iptable_mangle);
}
static void __net_exit iptable_mangle_net_exit(struct net *net)
diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c
index da2c8a368f6..f1787c04a4d 100644
--- a/net/ipv4/netfilter/iptable_nat.c
+++ b/net/ipv4/netfilter/iptable_nat.c
@@ -61,7 +61,7 @@ static unsigned int nf_nat_rule_find(struct sk_buff *skb, unsigned int hooknum,
}
static unsigned int
-nf_nat_ipv4_fn(unsigned int hooknum,
+nf_nat_ipv4_fn(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
@@ -71,7 +71,7 @@ nf_nat_ipv4_fn(unsigned int hooknum,
enum ip_conntrack_info ctinfo;
struct nf_conn_nat *nat;
/* maniptype == SRC for postrouting. */
- enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum);
+ enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum);
/* We never see fragments: conntrack defrags on pre-routing
* and local-out, and nf_nat_out protects post-routing.
@@ -91,24 +91,16 @@ nf_nat_ipv4_fn(unsigned int hooknum,
if (nf_ct_is_untracked(ct))
return NF_ACCEPT;
- nat = nfct_nat(ct);
- if (!nat) {
- /* NAT module was loaded late. */
- if (nf_ct_is_confirmed(ct))
- return NF_ACCEPT;
- nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC);
- if (nat == NULL) {
- pr_debug("failed to add NAT extension\n");
- return NF_ACCEPT;
- }
- }
+ nat = nf_ct_nat_ext_add(ct);
+ if (nat == NULL)
+ return NF_ACCEPT;
switch (ctinfo) {
case IP_CT_RELATED:
case IP_CT_RELATED_REPLY:
if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
- hooknum))
+ ops->hooknum))
return NF_DROP;
else
return NF_ACCEPT;
@@ -121,30 +113,35 @@ nf_nat_ipv4_fn(unsigned int hooknum,
if (!nf_nat_initialized(ct, maniptype)) {
unsigned int ret;
- ret = nf_nat_rule_find(skb, hooknum, in, out, ct);
+ ret = nf_nat_rule_find(skb, ops->hooknum, in, out, ct);
if (ret != NF_ACCEPT)
return ret;
- } else
+ } else {
pr_debug("Already setup manip %s for ct %p\n",
maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST",
ct);
+ if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out))
+ goto oif_changed;
+ }
break;
default:
/* ESTABLISHED */
NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
ctinfo == IP_CT_ESTABLISHED_REPLY);
- if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) {
- nf_ct_kill_acct(ct, ctinfo, skb);
- return NF_DROP;
- }
+ if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out))
+ goto oif_changed;
}
- return nf_nat_packet(ct, ctinfo, hooknum, skb);
+ return nf_nat_packet(ct, ctinfo, ops->hooknum, skb);
+
+oif_changed:
+ nf_ct_kill_acct(ct, ctinfo, skb);
+ return NF_DROP;
}
static unsigned int
-nf_nat_ipv4_in(unsigned int hooknum,
+nf_nat_ipv4_in(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
@@ -153,7 +150,7 @@ nf_nat_ipv4_in(unsigned int hooknum,
unsigned int ret;
__be32 daddr = ip_hdr(skb)->daddr;
- ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn);
+ ret = nf_nat_ipv4_fn(ops, skb, in, out, okfn);
if (ret != NF_DROP && ret != NF_STOLEN &&
daddr != ip_hdr(skb)->daddr)
skb_dst_drop(skb);
@@ -162,7 +159,7 @@ nf_nat_ipv4_in(unsigned int hooknum,
}
static unsigned int
-nf_nat_ipv4_out(unsigned int hooknum,
+nf_nat_ipv4_out(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
@@ -171,6 +168,7 @@ nf_nat_ipv4_out(unsigned int hooknum,
#ifdef CONFIG_XFRM
const struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
+ int err;
#endif
unsigned int ret;
@@ -179,7 +177,7 @@ nf_nat_ipv4_out(unsigned int hooknum,
ip_hdrlen(skb) < sizeof(struct iphdr))
return NF_ACCEPT;
- ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn);
+ ret = nf_nat_ipv4_fn(ops, skb, in, out, okfn);
#ifdef CONFIG_XFRM
if (ret != NF_DROP && ret != NF_STOLEN &&
!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
@@ -190,16 +188,18 @@ nf_nat_ipv4_out(unsigned int hooknum,
ct->tuplehash[!dir].tuple.dst.u3.ip) ||
(ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
ct->tuplehash[dir].tuple.src.u.all !=
- ct->tuplehash[!dir].tuple.dst.u.all))
- if (nf_xfrm_me_harder(skb, AF_INET) < 0)
- ret = NF_DROP;
+ ct->tuplehash[!dir].tuple.dst.u.all)) {
+ err = nf_xfrm_me_harder(skb, AF_INET);
+ if (err < 0)
+ ret = NF_DROP_ERR(err);
+ }
}
#endif
return ret;
}
static unsigned int
-nf_nat_ipv4_local_fn(unsigned int hooknum,
+nf_nat_ipv4_local_fn(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
@@ -208,29 +208,33 @@ nf_nat_ipv4_local_fn(unsigned int hooknum,
const struct nf_conn *ct;
enum ip_conntrack_info ctinfo;
unsigned int ret;
+ int err;
/* root is playing with raw sockets. */
if (skb->len < sizeof(struct iphdr) ||
ip_hdrlen(skb) < sizeof(struct iphdr))
return NF_ACCEPT;
- ret = nf_nat_ipv4_fn(hooknum, skb, in, out, okfn);
+ ret = nf_nat_ipv4_fn(ops, skb, in, out, okfn);
if (ret != NF_DROP && ret != NF_STOLEN &&
(ct = nf_ct_get(skb, &ctinfo)) != NULL) {
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
if (ct->tuplehash[dir].tuple.dst.u3.ip !=
ct->tuplehash[!dir].tuple.src.u3.ip) {
- if (ip_route_me_harder(skb, RTN_UNSPEC))
- ret = NF_DROP;
+ err = ip_route_me_harder(skb, RTN_UNSPEC);
+ if (err < 0)
+ ret = NF_DROP_ERR(err);
}
#ifdef CONFIG_XFRM
else if (!(IPCB(skb)->flags & IPSKB_XFRM_TRANSFORMED) &&
ct->tuplehash[dir].tuple.dst.protonum != IPPROTO_ICMP &&
ct->tuplehash[dir].tuple.dst.u.all !=
- ct->tuplehash[!dir].tuple.src.u.all)
- if (nf_xfrm_me_harder(skb, AF_INET) < 0)
- ret = NF_DROP;
+ ct->tuplehash[!dir].tuple.src.u.all) {
+ err = nf_xfrm_me_harder(skb, AF_INET);
+ if (err < 0)
+ ret = NF_DROP_ERR(err);
+ }
#endif
}
return ret;
@@ -280,7 +284,7 @@ static int __net_init iptable_nat_net_init(struct net *net)
return -ENOMEM;
net->ipv4.nat_table = ipt_register_table(net, &nf_nat_ipv4_table, repl);
kfree(repl);
- return PTR_RET(net->ipv4.nat_table);
+ return PTR_ERR_OR_ZERO(net->ipv4.nat_table);
}
static void __net_exit iptable_nat_net_exit(struct net *net)
diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c
index 03d9696d3c6..b2f7e8f9831 100644
--- a/net/ipv4/netfilter/iptable_raw.c
+++ b/net/ipv4/netfilter/iptable_raw.c
@@ -20,20 +20,20 @@ static const struct xt_table packet_raw = {
/* The work comes in here from netfilter.c. */
static unsigned int
-iptable_raw_hook(unsigned int hook, struct sk_buff *skb,
+iptable_raw_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
const struct net_device *in, const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
const struct net *net;
- if (hook == NF_INET_LOCAL_OUT &&
+ if (ops->hooknum == NF_INET_LOCAL_OUT &&
(skb->len < sizeof(struct iphdr) ||
ip_hdrlen(skb) < sizeof(struct iphdr)))
/* root is playing with raw sockets. */
return NF_ACCEPT;
net = dev_net((in != NULL) ? in : out);
- return ipt_do_table(skb, hook, in, out, net->ipv4.iptable_raw);
+ return ipt_do_table(skb, ops->hooknum, in, out, net->ipv4.iptable_raw);
}
static struct nf_hook_ops *rawtable_ops __read_mostly;
@@ -48,7 +48,7 @@ static int __net_init iptable_raw_net_init(struct net *net)
net->ipv4.iptable_raw =
ipt_register_table(net, &packet_raw, repl);
kfree(repl);
- return PTR_RET(net->ipv4.iptable_raw);
+ return PTR_ERR_OR_ZERO(net->ipv4.iptable_raw);
}
static void __net_exit iptable_raw_net_exit(struct net *net)
diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c
index b283d8e2601..c86647ed207 100644
--- a/net/ipv4/netfilter/iptable_security.c
+++ b/net/ipv4/netfilter/iptable_security.c
@@ -37,21 +37,22 @@ static const struct xt_table security_table = {
};
static unsigned int
-iptable_security_hook(unsigned int hook, struct sk_buff *skb,
+iptable_security_hook(const struct nf_hook_ops *ops, struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
const struct net *net;
- if (hook == NF_INET_LOCAL_OUT &&
+ if (ops->hooknum == NF_INET_LOCAL_OUT &&
(skb->len < sizeof(struct iphdr) ||
ip_hdrlen(skb) < sizeof(struct iphdr)))
/* Somebody is playing with raw sockets. */
return NF_ACCEPT;
net = dev_net((in != NULL) ? in : out);
- return ipt_do_table(skb, hook, in, out, net->ipv4.iptable_security);
+ return ipt_do_table(skb, ops->hooknum, in, out,
+ net->ipv4.iptable_security);
}
static struct nf_hook_ops *sectbl_ops __read_mostly;
@@ -66,7 +67,7 @@ static int __net_init iptable_security_net_init(struct net *net)
net->ipv4.iptable_security =
ipt_register_table(net, &security_table, repl);
kfree(repl);
- return PTR_RET(net->ipv4.iptable_security);
+ return PTR_ERR_OR_ZERO(net->ipv4.iptable_security);
}
static void __net_exit iptable_security_net_exit(struct net *net)
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index fcdd0c2406e..8127dc80286 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -1,6 +1,7 @@
/* (C) 1999-2001 Paul `Rusty' Russell
* (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -24,6 +25,7 @@
#include <net/netfilter/nf_conntrack_l3proto.h>
#include <net/netfilter/nf_conntrack_zones.h>
#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
#include <net/netfilter/ipv4/nf_conntrack_ipv4.h>
#include <net/netfilter/nf_nat_helper.h>
#include <net/netfilter/ipv4/nf_defrag_ipv4.h>
@@ -90,7 +92,7 @@ static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
return NF_ACCEPT;
}
-static unsigned int ipv4_helper(unsigned int hooknum,
+static unsigned int ipv4_helper(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
@@ -100,7 +102,6 @@ static unsigned int ipv4_helper(unsigned int hooknum,
enum ip_conntrack_info ctinfo;
const struct nf_conn_help *help;
const struct nf_conntrack_helper *helper;
- unsigned int ret;
/* This is where we call the helper: as the packet goes out. */
ct = nf_ct_get(skb, &ctinfo);
@@ -116,16 +117,11 @@ static unsigned int ipv4_helper(unsigned int hooknum,
if (!helper)
return NF_ACCEPT;
- ret = helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb),
- ct, ctinfo);
- if (ret != NF_ACCEPT && (ret & NF_VERDICT_MASK) != NF_QUEUE) {
- nf_log_packet(NFPROTO_IPV4, hooknum, skb, in, out, NULL,
- "nf_ct_%s: dropping packet", helper->name);
- }
- return ret;
+ return helper->help(skb, skb_network_offset(skb) + ip_hdrlen(skb),
+ ct, ctinfo);
}
-static unsigned int ipv4_confirm(unsigned int hooknum,
+static unsigned int ipv4_confirm(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
@@ -141,11 +137,7 @@ static unsigned int ipv4_confirm(unsigned int hooknum,
/* adjust seqs for loopback traffic only in outgoing direction */
if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
!nf_is_loopback_packet(skb)) {
- typeof(nf_nat_seq_adjust_hook) seq_adjust;
-
- seq_adjust = rcu_dereference(nf_nat_seq_adjust_hook);
- if (!seq_adjust ||
- !seq_adjust(skb, ct, ctinfo, ip_hdrlen(skb))) {
+ if (!nf_ct_seq_adjust(skb, ct, ctinfo, ip_hdrlen(skb))) {
NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop);
return NF_DROP;
}
@@ -155,16 +147,16 @@ out:
return nf_conntrack_confirm(skb);
}
-static unsigned int ipv4_conntrack_in(unsigned int hooknum,
+static unsigned int ipv4_conntrack_in(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
int (*okfn)(struct sk_buff *))
{
- return nf_conntrack_in(dev_net(in), PF_INET, hooknum, skb);
+ return nf_conntrack_in(dev_net(in), PF_INET, ops->hooknum, skb);
}
-static unsigned int ipv4_conntrack_local(unsigned int hooknum,
+static unsigned int ipv4_conntrack_local(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
@@ -174,7 +166,7 @@ static unsigned int ipv4_conntrack_local(unsigned int hooknum,
if (skb->len < sizeof(struct iphdr) ||
ip_hdrlen(skb) < sizeof(struct iphdr))
return NF_ACCEPT;
- return nf_conntrack_in(dev_net(out), PF_INET, hooknum, skb);
+ return nf_conntrack_in(dev_net(out), PF_INET, ops->hooknum, skb);
}
/* Connection tracking may drop packets, but never alters them, so
@@ -228,7 +220,7 @@ static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = {
static int log_invalid_proto_min = 0;
static int log_invalid_proto_max = 255;
-static ctl_table ip_ct_sysctl_table[] = {
+static struct ctl_table ip_ct_sysctl_table[] = {
{
.procname = "ip_conntrack_max",
.maxlen = sizeof(int),
@@ -420,54 +412,43 @@ static int ipv4_net_init(struct net *net)
{
int ret = 0;
- ret = nf_conntrack_l4proto_register(net,
- &nf_conntrack_l4proto_tcp4);
+ ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_tcp4);
if (ret < 0) {
- pr_err("nf_conntrack_l4proto_tcp4 :protocol register failed\n");
+ pr_err("nf_conntrack_tcp4: pernet registration failed\n");
goto out_tcp;
}
- ret = nf_conntrack_l4proto_register(net,
- &nf_conntrack_l4proto_udp4);
+ ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_udp4);
if (ret < 0) {
- pr_err("nf_conntrack_l4proto_udp4 :protocol register failed\n");
+ pr_err("nf_conntrack_udp4: pernet registration failed\n");
goto out_udp;
}
- ret = nf_conntrack_l4proto_register(net,
- &nf_conntrack_l4proto_icmp);
+ ret = nf_ct_l4proto_pernet_register(net, &nf_conntrack_l4proto_icmp);
if (ret < 0) {
- pr_err("nf_conntrack_l4proto_icmp4 :protocol register failed\n");
+ pr_err("nf_conntrack_icmp4: pernet registration failed\n");
goto out_icmp;
}
- ret = nf_conntrack_l3proto_register(net,
- &nf_conntrack_l3proto_ipv4);
+ ret = nf_ct_l3proto_pernet_register(net, &nf_conntrack_l3proto_ipv4);
if (ret < 0) {
- pr_err("nf_conntrack_l3proto_ipv4 :protocol register failed\n");
+ pr_err("nf_conntrack_ipv4: pernet registration failed\n");
goto out_ipv4;
}
return 0;
out_ipv4:
- nf_conntrack_l4proto_unregister(net,
- &nf_conntrack_l4proto_icmp);
+ nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_icmp);
out_icmp:
- nf_conntrack_l4proto_unregister(net,
- &nf_conntrack_l4proto_udp4);
+ nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udp4);
out_udp:
- nf_conntrack_l4proto_unregister(net,
- &nf_conntrack_l4proto_tcp4);
+ nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_tcp4);
out_tcp:
return ret;
}
static void ipv4_net_exit(struct net *net)
{
- nf_conntrack_l3proto_unregister(net,
- &nf_conntrack_l3proto_ipv4);
- nf_conntrack_l4proto_unregister(net,
- &nf_conntrack_l4proto_icmp);
- nf_conntrack_l4proto_unregister(net,
- &nf_conntrack_l4proto_udp4);
- nf_conntrack_l4proto_unregister(net,
- &nf_conntrack_l4proto_tcp4);
+ nf_ct_l3proto_pernet_unregister(net, &nf_conntrack_l3proto_ipv4);
+ nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_icmp);
+ nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_udp4);
+ nf_ct_l4proto_pernet_unregister(net, &nf_conntrack_l4proto_tcp4);
}
static struct pernet_operations ipv4_net_ops = {
@@ -500,16 +481,49 @@ static int __init nf_conntrack_l3proto_ipv4_init(void)
pr_err("nf_conntrack_ipv4: can't register hooks.\n");
goto cleanup_pernet;
}
+
+ ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_tcp4);
+ if (ret < 0) {
+ pr_err("nf_conntrack_ipv4: can't register tcp4 proto.\n");
+ goto cleanup_hooks;
+ }
+
+ ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_udp4);
+ if (ret < 0) {
+ pr_err("nf_conntrack_ipv4: can't register udp4 proto.\n");
+ goto cleanup_tcp4;
+ }
+
+ ret = nf_ct_l4proto_register(&nf_conntrack_l4proto_icmp);
+ if (ret < 0) {
+ pr_err("nf_conntrack_ipv4: can't register icmpv4 proto.\n");
+ goto cleanup_udp4;
+ }
+
+ ret = nf_ct_l3proto_register(&nf_conntrack_l3proto_ipv4);
+ if (ret < 0) {
+ pr_err("nf_conntrack_ipv4: can't register ipv4 proto.\n");
+ goto cleanup_icmpv4;
+ }
+
#if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
ret = nf_conntrack_ipv4_compat_init();
if (ret < 0)
- goto cleanup_hooks;
+ goto cleanup_proto;
#endif
return ret;
#if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
+ cleanup_proto:
+ nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv4);
+#endif
+ cleanup_icmpv4:
+ nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmp);
+ cleanup_udp4:
+ nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp4);
+ cleanup_tcp4:
+ nf_ct_l4proto_unregister(&nf_conntrack_l4proto_tcp4);
cleanup_hooks:
nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops));
-#endif
cleanup_pernet:
unregister_pernet_subsys(&ipv4_net_ops);
cleanup_sockopt:
@@ -523,6 +537,10 @@ static void __exit nf_conntrack_l3proto_ipv4_fini(void)
#if defined(CONFIG_PROC_FS) && defined(CONFIG_NF_CONNTRACK_PROC_COMPAT)
nf_conntrack_ipv4_compat_fini();
#endif
+ nf_ct_l3proto_unregister(&nf_conntrack_l3proto_ipv4);
+ nf_ct_l4proto_unregister(&nf_conntrack_l4proto_icmp);
+ nf_ct_l4proto_unregister(&nf_conntrack_l4proto_udp4);
+ nf_ct_l4proto_unregister(&nf_conntrack_l4proto_tcp4);
nf_unregister_hooks(ipv4_conntrack_ops, ARRAY_SIZE(ipv4_conntrack_ops));
unregister_pernet_subsys(&ipv4_net_ops);
nf_unregister_sockopt(&so_getorigdst);
@@ -530,9 +548,3 @@ static void __exit nf_conntrack_l3proto_ipv4_fini(void)
module_init(nf_conntrack_l3proto_ipv4_init);
module_exit(nf_conntrack_l3proto_ipv4_fini);
-
-void need_ipv4_conntrack(void)
-{
- return;
-}
-EXPORT_SYMBOL_GPL(need_ipv4_conntrack);
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
index 9682b36df38..4c48e434bb1 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -2,6 +2,7 @@
*
* (C) 1999-2001 Paul `Rusty' Russell
* (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2006-2010 Patrick McHardy <kaber@trash.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -417,12 +418,12 @@ static int __net_init ip_conntrack_net_init(struct net *net)
{
struct proc_dir_entry *proc, *proc_exp, *proc_stat;
- proc = proc_net_fops_create(net, "ip_conntrack", 0440, &ct_file_ops);
+ proc = proc_create("ip_conntrack", 0440, net->proc_net, &ct_file_ops);
if (!proc)
goto err1;
- proc_exp = proc_net_fops_create(net, "ip_conntrack_expect", 0440,
- &ip_exp_file_ops);
+ proc_exp = proc_create("ip_conntrack_expect", 0440, net->proc_net,
+ &ip_exp_file_ops);
if (!proc_exp)
goto err2;
@@ -433,9 +434,9 @@ static int __net_init ip_conntrack_net_init(struct net *net)
return 0;
err3:
- proc_net_remove(net, "ip_conntrack_expect");
+ remove_proc_entry("ip_conntrack_expect", net->proc_net);
err2:
- proc_net_remove(net, "ip_conntrack");
+ remove_proc_entry("ip_conntrack", net->proc_net);
err1:
return -ENOMEM;
}
@@ -443,8 +444,8 @@ err1:
static void __net_exit ip_conntrack_net_exit(struct net *net)
{
remove_proc_entry("ip_conntrack", net->proc_net_stat);
- proc_net_remove(net, "ip_conntrack_expect");
- proc_net_remove(net, "ip_conntrack");
+ remove_proc_entry("ip_conntrack_expect", net->proc_net);
+ remove_proc_entry("ip_conntrack", net->proc_net);
}
static struct pernet_operations ip_conntrack_net_ops = {
diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
index 5241d997ab7..a338dad41b7 100644
--- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c
@@ -1,5 +1,6 @@
/* (C) 1999-2001 Paul `Rusty' Russell
* (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
+ * (C) 2006-2010 Patrick McHardy <kaber@trash.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -187,8 +188,8 @@ icmp_error(struct net *net, struct nf_conn *tmpl,
icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih);
if (icmph == NULL) {
if (LOG_INVALID(net, IPPROTO_ICMP))
- nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
- "nf_ct_icmp: short packet ");
+ nf_log_packet(net, PF_INET, 0, skb, NULL, NULL,
+ NULL, "nf_ct_icmp: short packet ");
return -NF_ACCEPT;
}
@@ -196,7 +197,7 @@ icmp_error(struct net *net, struct nf_conn *tmpl,
if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING &&
nf_ip_checksum(skb, hooknum, dataoff, 0)) {
if (LOG_INVALID(net, IPPROTO_ICMP))
- nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
+ nf_log_packet(net, PF_INET, 0, skb, NULL, NULL, NULL,
"nf_ct_icmp: bad HW ICMP checksum ");
return -NF_ACCEPT;
}
@@ -209,7 +210,7 @@ icmp_error(struct net *net, struct nf_conn *tmpl,
*/
if (icmph->type > NR_ICMP_TYPES) {
if (LOG_INVALID(net, IPPROTO_ICMP))
- nf_log_packet(PF_INET, 0, skb, NULL, NULL, NULL,
+ nf_log_packet(net, PF_INET, 0, skb, NULL, NULL, NULL,
"nf_ct_icmp: invalid ICMP type ");
return -NF_ACCEPT;
}
diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c
index 742815518b0..b8f6381c7d0 100644
--- a/net/ipv4/netfilter/nf_defrag_ipv4.c
+++ b/net/ipv4/netfilter/nf_defrag_ipv4.c
@@ -22,7 +22,6 @@
#endif
#include <net/netfilter/nf_conntrack_zones.h>
-/* Returns new sk_buff, or NULL */
static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
{
int err;
@@ -33,8 +32,10 @@ static int nf_ct_ipv4_gather_frags(struct sk_buff *skb, u_int32_t user)
err = ip_defrag(skb, user);
local_bh_enable();
- if (!err)
+ if (!err) {
ip_send_check(ip_hdr(skb));
+ skb->ignore_df = 1;
+ }
return err;
}
@@ -60,7 +61,7 @@ static enum ip_defrag_users nf_ct_defrag_user(unsigned int hooknum,
return IP_DEFRAG_CONNTRACK_OUT + zone;
}
-static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
+static unsigned int ipv4_conntrack_defrag(const struct nf_hook_ops *ops,
struct sk_buff *skb,
const struct net_device *in,
const struct net_device *out,
@@ -83,7 +84,9 @@ static unsigned int ipv4_conntrack_defrag(unsigned int hooknum,
#endif
/* Gather fragments. */
if (ip_is_fragment(ip_hdr(skb))) {
- enum ip_defrag_users user = nf_ct_defrag_user(hooknum, skb);
+ enum ip_defrag_users user =
+ nf_ct_defrag_user(ops->hooknum, skb);
+
if (nf_ct_ipv4_gather_frags(skb, user))
return NF_STOLEN;
}
diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c
index 9c3db10b22d..574f7ebba0b 100644
--- a/net/ipv4/netfilter/nf_nat_h323.c
+++ b/net/ipv4/netfilter/nf_nat_h323.c
@@ -2,6 +2,7 @@
* H.323 extension for NAT alteration.
*
* Copyright (c) 2006 Jing Min Zhao <zhaojingmin@users.sourceforge.net>
+ * Copyright (c) 2006-2012 Patrick McHardy <kaber@trash.net>
*
* This source code is licensed under General Public License version 2.
*
@@ -228,7 +229,10 @@ static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct,
ret = nf_ct_expect_related(rtcp_exp);
if (ret == 0)
break;
- else if (ret != -EBUSY) {
+ else if (ret == -EBUSY) {
+ nf_ct_unexpect_related(rtp_exp);
+ continue;
+ } else if (ret < 0) {
nf_ct_unexpect_related(rtp_exp);
nated_port = 0;
break;
diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c
index a06d7d74817..657d2307f03 100644
--- a/net/ipv4/netfilter/nf_nat_pptp.c
+++ b/net/ipv4/netfilter/nf_nat_pptp.c
@@ -13,6 +13,8 @@
*
* Development of this code funded by Astaro AG (http://www.astaro.com/)
*
+ * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
+ *
* TODO: - NAT to a unique tuple, not to TCP source port
* (needs netfilter tuple reservation)
*/
diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c
index ea44f02563b..690d890111b 100644
--- a/net/ipv4/netfilter/nf_nat_proto_gre.c
+++ b/net/ipv4/netfilter/nf_nat_proto_gre.c
@@ -21,6 +21,8 @@
*
* Development of this code funded by Astaro AG (http://www.astaro.com/)
*
+ * (C) 2006-2012 Patrick McHardy <kaber@trash.net>
+ *
*/
#include <linux/module.h>
diff --git a/net/ipv4/netfilter/nf_nat_snmp_basic.c b/net/ipv4/netfilter/nf_nat_snmp_basic.c
index bac712293fd..7c676671329 100644
--- a/net/ipv4/netfilter/nf_nat_snmp_basic.c
+++ b/net/ipv4/netfilter/nf_nat_snmp_basic.c
@@ -34,10 +34,11 @@
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
* You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
*
* Author: James Morris <jmorris@intercode.com.au>
+ *
+ * Copyright (c) 2006-2010 Patrick McHardy <kaber@trash.net>
*/
#include <linux/module.h>
#include <linux/moduleparam.h>
@@ -460,14 +461,14 @@ static unsigned char asn1_oid_decode(struct asn1_ctx *ctx,
}
if (subid < 40) {
- optr [0] = 0;
- optr [1] = subid;
+ optr[0] = 0;
+ optr[1] = subid;
} else if (subid < 80) {
- optr [0] = 1;
- optr [1] = subid - 40;
+ optr[0] = 1;
+ optr[1] = subid - 40;
} else {
- optr [0] = 2;
- optr [1] = subid - 80;
+ optr[0] = 2;
+ optr[1] = subid - 80;
}
*len = 2;
@@ -1197,8 +1198,8 @@ static int snmp_translate(struct nf_conn *ct,
map.to = NOCT1(&ct->tuplehash[!dir].tuple.dst.u3.ip);
} else {
/* DNAT replies */
- map.from = NOCT1(&ct->tuplehash[dir].tuple.src.u3.ip);
- map.to = NOCT1(&ct->tuplehash[!dir].tuple.dst.u3.ip);
+ map.from = NOCT1(&ct->tuplehash[!dir].tuple.src.u3.ip);
+ map.to = NOCT1(&ct->tuplehash[dir].tuple.dst.u3.ip);
}
if (map.from == map.to)
diff --git a/net/ipv4/netfilter/nf_tables_arp.c b/net/ipv4/netfilter/nf_tables_arp.c
new file mode 100644
index 00000000000..19412a4063f
--- /dev/null
+++ b/net/ipv4/netfilter/nf_tables_arp.c
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2008-2010 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2013 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/netfilter_arp.h>
+#include <net/netfilter/nf_tables.h>
+
+static unsigned int
+nft_do_chain_arp(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ struct nft_pktinfo pkt;
+
+ nft_set_pktinfo(&pkt, ops, skb, in, out);
+
+ return nft_do_chain(&pkt, ops);
+}
+
+static struct nft_af_info nft_af_arp __read_mostly = {
+ .family = NFPROTO_ARP,
+ .nhooks = NF_ARP_NUMHOOKS,
+ .owner = THIS_MODULE,
+ .nops = 1,
+ .hooks = {
+ [NF_ARP_IN] = nft_do_chain_arp,
+ [NF_ARP_OUT] = nft_do_chain_arp,
+ [NF_ARP_FORWARD] = nft_do_chain_arp,
+ },
+};
+
+static int nf_tables_arp_init_net(struct net *net)
+{
+ net->nft.arp = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL);
+ if (net->nft.arp== NULL)
+ return -ENOMEM;
+
+ memcpy(net->nft.arp, &nft_af_arp, sizeof(nft_af_arp));
+
+ if (nft_register_afinfo(net, net->nft.arp) < 0)
+ goto err;
+
+ return 0;
+err:
+ kfree(net->nft.arp);
+ return -ENOMEM;
+}
+
+static void nf_tables_arp_exit_net(struct net *net)
+{
+ nft_unregister_afinfo(net->nft.arp);
+ kfree(net->nft.arp);
+}
+
+static struct pernet_operations nf_tables_arp_net_ops = {
+ .init = nf_tables_arp_init_net,
+ .exit = nf_tables_arp_exit_net,
+};
+
+static const struct nf_chain_type filter_arp = {
+ .name = "filter",
+ .type = NFT_CHAIN_T_DEFAULT,
+ .family = NFPROTO_ARP,
+ .owner = THIS_MODULE,
+ .hook_mask = (1 << NF_ARP_IN) |
+ (1 << NF_ARP_OUT) |
+ (1 << NF_ARP_FORWARD),
+};
+
+static int __init nf_tables_arp_init(void)
+{
+ int ret;
+
+ nft_register_chain_type(&filter_arp);
+ ret = register_pernet_subsys(&nf_tables_arp_net_ops);
+ if (ret < 0)
+ nft_unregister_chain_type(&filter_arp);
+
+ return ret;
+}
+
+static void __exit nf_tables_arp_exit(void)
+{
+ unregister_pernet_subsys(&nf_tables_arp_net_ops);
+ nft_unregister_chain_type(&filter_arp);
+}
+
+module_init(nf_tables_arp_init);
+module_exit(nf_tables_arp_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_FAMILY(3); /* NFPROTO_ARP */
diff --git a/net/ipv4/netfilter/nf_tables_ipv4.c b/net/ipv4/netfilter/nf_tables_ipv4.c
new file mode 100644
index 00000000000..6820c8c4084
--- /dev/null
+++ b/net/ipv4/netfilter/nf_tables_ipv4.c
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2012-2013 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/ip.h>
+#include <linux/netfilter_ipv4.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/net_namespace.h>
+#include <net/ip.h>
+#include <net/netfilter/nf_tables_ipv4.h>
+
+static unsigned int nft_do_chain_ipv4(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ struct nft_pktinfo pkt;
+
+ nft_set_pktinfo_ipv4(&pkt, ops, skb, in, out);
+
+ return nft_do_chain(&pkt, ops);
+}
+
+static unsigned int nft_ipv4_output(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ if (unlikely(skb->len < sizeof(struct iphdr) ||
+ ip_hdr(skb)->ihl < sizeof(struct iphdr) / 4)) {
+ if (net_ratelimit())
+ pr_info("nf_tables_ipv4: ignoring short SOCK_RAW "
+ "packet\n");
+ return NF_ACCEPT;
+ }
+
+ return nft_do_chain_ipv4(ops, skb, in, out, okfn);
+}
+
+struct nft_af_info nft_af_ipv4 __read_mostly = {
+ .family = NFPROTO_IPV4,
+ .nhooks = NF_INET_NUMHOOKS,
+ .owner = THIS_MODULE,
+ .nops = 1,
+ .hooks = {
+ [NF_INET_LOCAL_IN] = nft_do_chain_ipv4,
+ [NF_INET_LOCAL_OUT] = nft_ipv4_output,
+ [NF_INET_FORWARD] = nft_do_chain_ipv4,
+ [NF_INET_PRE_ROUTING] = nft_do_chain_ipv4,
+ [NF_INET_POST_ROUTING] = nft_do_chain_ipv4,
+ },
+};
+EXPORT_SYMBOL_GPL(nft_af_ipv4);
+
+static int nf_tables_ipv4_init_net(struct net *net)
+{
+ net->nft.ipv4 = kmalloc(sizeof(struct nft_af_info), GFP_KERNEL);
+ if (net->nft.ipv4 == NULL)
+ return -ENOMEM;
+
+ memcpy(net->nft.ipv4, &nft_af_ipv4, sizeof(nft_af_ipv4));
+
+ if (nft_register_afinfo(net, net->nft.ipv4) < 0)
+ goto err;
+
+ return 0;
+err:
+ kfree(net->nft.ipv4);
+ return -ENOMEM;
+}
+
+static void nf_tables_ipv4_exit_net(struct net *net)
+{
+ nft_unregister_afinfo(net->nft.ipv4);
+ kfree(net->nft.ipv4);
+}
+
+static struct pernet_operations nf_tables_ipv4_net_ops = {
+ .init = nf_tables_ipv4_init_net,
+ .exit = nf_tables_ipv4_exit_net,
+};
+
+static const struct nf_chain_type filter_ipv4 = {
+ .name = "filter",
+ .type = NFT_CHAIN_T_DEFAULT,
+ .family = NFPROTO_IPV4,
+ .owner = THIS_MODULE,
+ .hook_mask = (1 << NF_INET_LOCAL_IN) |
+ (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_FORWARD) |
+ (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_POST_ROUTING),
+};
+
+static int __init nf_tables_ipv4_init(void)
+{
+ int ret;
+
+ nft_register_chain_type(&filter_ipv4);
+ ret = register_pernet_subsys(&nf_tables_ipv4_net_ops);
+ if (ret < 0)
+ nft_unregister_chain_type(&filter_ipv4);
+
+ return ret;
+}
+
+static void __exit nf_tables_ipv4_exit(void)
+{
+ unregister_pernet_subsys(&nf_tables_ipv4_net_ops);
+ nft_unregister_chain_type(&filter_ipv4);
+}
+
+module_init(nf_tables_ipv4_init);
+module_exit(nf_tables_ipv4_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_FAMILY(AF_INET);
diff --git a/net/ipv4/netfilter/nft_chain_nat_ipv4.c b/net/ipv4/netfilter/nft_chain_nat_ipv4.c
new file mode 100644
index 00000000000..3964157d826
--- /dev/null
+++ b/net/ipv4/netfilter/nft_chain_nat_ipv4.c
@@ -0,0 +1,199 @@
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2012 Pablo Neira Ayuso <pablo@netfilter.org>
+ * Copyright (c) 2012 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/skbuff.h>
+#include <linux/ip.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_nat.h>
+#include <net/netfilter/nf_nat_core.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_ipv4.h>
+#include <net/netfilter/nf_nat_l3proto.h>
+#include <net/ip.h>
+
+/*
+ * NAT chains
+ */
+
+static unsigned int nf_nat_fn(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ enum ip_conntrack_info ctinfo;
+ struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
+ struct nf_conn_nat *nat;
+ enum nf_nat_manip_type maniptype = HOOK2MANIP(ops->hooknum);
+ struct nft_pktinfo pkt;
+ unsigned int ret;
+
+ if (ct == NULL || nf_ct_is_untracked(ct))
+ return NF_ACCEPT;
+
+ NF_CT_ASSERT(!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)));
+
+ nat = nf_ct_nat_ext_add(ct);
+ if (nat == NULL)
+ return NF_ACCEPT;
+
+ switch (ctinfo) {
+ case IP_CT_RELATED:
+ case IP_CT_RELATED + IP_CT_IS_REPLY:
+ if (ip_hdr(skb)->protocol == IPPROTO_ICMP) {
+ if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo,
+ ops->hooknum))
+ return NF_DROP;
+ else
+ return NF_ACCEPT;
+ }
+ /* Fall through */
+ case IP_CT_NEW:
+ if (nf_nat_initialized(ct, maniptype))
+ break;
+
+ nft_set_pktinfo_ipv4(&pkt, ops, skb, in, out);
+
+ ret = nft_do_chain(&pkt, ops);
+ if (ret != NF_ACCEPT)
+ return ret;
+ if (!nf_nat_initialized(ct, maniptype)) {
+ ret = nf_nat_alloc_null_binding(ct, ops->hooknum);
+ if (ret != NF_ACCEPT)
+ return ret;
+ }
+ default:
+ break;
+ }
+
+ return nf_nat_packet(ct, ctinfo, ops->hooknum, skb);
+}
+
+static unsigned int nf_nat_prerouting(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ __be32 daddr = ip_hdr(skb)->daddr;
+ unsigned int ret;
+
+ ret = nf_nat_fn(ops, skb, in, out, okfn);
+ if (ret != NF_DROP && ret != NF_STOLEN &&
+ ip_hdr(skb)->daddr != daddr) {
+ skb_dst_drop(skb);
+ }
+ return ret;
+}
+
+static unsigned int nf_nat_postrouting(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ enum ip_conntrack_info ctinfo __maybe_unused;
+ const struct nf_conn *ct __maybe_unused;
+ unsigned int ret;
+
+ ret = nf_nat_fn(ops, skb, in, out, okfn);
+#ifdef CONFIG_XFRM
+ if (ret != NF_DROP && ret != NF_STOLEN &&
+ (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+ if (ct->tuplehash[dir].tuple.src.u3.ip !=
+ ct->tuplehash[!dir].tuple.dst.u3.ip ||
+ ct->tuplehash[dir].tuple.src.u.all !=
+ ct->tuplehash[!dir].tuple.dst.u.all)
+ return nf_xfrm_me_harder(skb, AF_INET) == 0 ?
+ ret : NF_DROP;
+ }
+#endif
+ return ret;
+}
+
+static unsigned int nf_nat_output(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ enum ip_conntrack_info ctinfo;
+ const struct nf_conn *ct;
+ unsigned int ret;
+
+ ret = nf_nat_fn(ops, skb, in, out, okfn);
+ if (ret != NF_DROP && ret != NF_STOLEN &&
+ (ct = nf_ct_get(skb, &ctinfo)) != NULL) {
+ enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
+
+ if (ct->tuplehash[dir].tuple.dst.u3.ip !=
+ ct->tuplehash[!dir].tuple.src.u3.ip) {
+ if (ip_route_me_harder(skb, RTN_UNSPEC))
+ ret = NF_DROP;
+ }
+#ifdef CONFIG_XFRM
+ else if (ct->tuplehash[dir].tuple.dst.u.all !=
+ ct->tuplehash[!dir].tuple.src.u.all)
+ if (nf_xfrm_me_harder(skb, AF_INET))
+ ret = NF_DROP;
+#endif
+ }
+ return ret;
+}
+
+static const struct nf_chain_type nft_chain_nat_ipv4 = {
+ .name = "nat",
+ .type = NFT_CHAIN_T_NAT,
+ .family = NFPROTO_IPV4,
+ .owner = THIS_MODULE,
+ .hook_mask = (1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_POST_ROUTING) |
+ (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_LOCAL_IN),
+ .hooks = {
+ [NF_INET_PRE_ROUTING] = nf_nat_prerouting,
+ [NF_INET_POST_ROUTING] = nf_nat_postrouting,
+ [NF_INET_LOCAL_OUT] = nf_nat_output,
+ [NF_INET_LOCAL_IN] = nf_nat_fn,
+ },
+};
+
+static int __init nft_chain_nat_init(void)
+{
+ int err;
+
+ err = nft_register_chain_type(&nft_chain_nat_ipv4);
+ if (err < 0)
+ return err;
+
+ return 0;
+}
+
+static void __exit nft_chain_nat_exit(void)
+{
+ nft_unregister_chain_type(&nft_chain_nat_ipv4);
+}
+
+module_init(nft_chain_nat_init);
+module_exit(nft_chain_nat_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_CHAIN(AF_INET, "nat");
diff --git a/net/ipv4/netfilter/nft_chain_route_ipv4.c b/net/ipv4/netfilter/nft_chain_route_ipv4.c
new file mode 100644
index 00000000000..125b66766c0
--- /dev/null
+++ b/net/ipv4/netfilter/nft_chain_route_ipv4.c
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2012 Pablo Neira Ayuso <pablo@netfilter.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/skbuff.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter_ipv4.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_ipv4.h>
+#include <net/route.h>
+#include <net/ip.h>
+
+static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops,
+ struct sk_buff *skb,
+ const struct net_device *in,
+ const struct net_device *out,
+ int (*okfn)(struct sk_buff *))
+{
+ unsigned int ret;
+ struct nft_pktinfo pkt;
+ u32 mark;
+ __be32 saddr, daddr;
+ u_int8_t tos;
+ const struct iphdr *iph;
+
+ /* root is playing with raw sockets. */
+ if (skb->len < sizeof(struct iphdr) ||
+ ip_hdrlen(skb) < sizeof(struct iphdr))
+ return NF_ACCEPT;
+
+ nft_set_pktinfo_ipv4(&pkt, ops, skb, in, out);
+
+ mark = skb->mark;
+ iph = ip_hdr(skb);
+ saddr = iph->saddr;
+ daddr = iph->daddr;
+ tos = iph->tos;
+
+ ret = nft_do_chain(&pkt, ops);
+ if (ret != NF_DROP && ret != NF_QUEUE) {
+ iph = ip_hdr(skb);
+
+ if (iph->saddr != saddr ||
+ iph->daddr != daddr ||
+ skb->mark != mark ||
+ iph->tos != tos)
+ if (ip_route_me_harder(skb, RTN_UNSPEC))
+ ret = NF_DROP;
+ }
+ return ret;
+}
+
+static const struct nf_chain_type nft_chain_route_ipv4 = {
+ .name = "route",
+ .type = NFT_CHAIN_T_ROUTE,
+ .family = NFPROTO_IPV4,
+ .owner = THIS_MODULE,
+ .hook_mask = (1 << NF_INET_LOCAL_OUT),
+ .hooks = {
+ [NF_INET_LOCAL_OUT] = nf_route_table_hook,
+ },
+};
+
+static int __init nft_chain_route_init(void)
+{
+ return nft_register_chain_type(&nft_chain_route_ipv4);
+}
+
+static void __exit nft_chain_route_exit(void)
+{
+ nft_unregister_chain_type(&nft_chain_route_ipv4);
+}
+
+module_init(nft_chain_route_init);
+module_exit(nft_chain_route_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_CHAIN(AF_INET, "route");
diff --git a/net/ipv4/netfilter/nft_reject_ipv4.c b/net/ipv4/netfilter/nft_reject_ipv4.c
new file mode 100644
index 00000000000..e79718a382f
--- /dev/null
+++ b/net/ipv4/netfilter/nft_reject_ipv4.c
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2008-2009 Patrick McHardy <kaber@trash.net>
+ * Copyright (c) 2013 Eric Leblond <eric@regit.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Development of this code funded by Astaro AG (http://www.astaro.com/)
+ */
+
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/netlink.h>
+#include <linux/netfilter.h>
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables.h>
+#include <net/icmp.h>
+#include <net/netfilter/ipv4/nf_reject.h>
+#include <net/netfilter/nft_reject.h>
+
+void nft_reject_ipv4_eval(const struct nft_expr *expr,
+ struct nft_data data[NFT_REG_MAX + 1],
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_reject *priv = nft_expr_priv(expr);
+
+ switch (priv->type) {
+ case NFT_REJECT_ICMP_UNREACH:
+ nf_send_unreach(pkt->skb, priv->icmp_code);
+ break;
+ case NFT_REJECT_TCP_RST:
+ nf_send_reset(pkt->skb, pkt->ops->hooknum);
+ break;
+ }
+
+ data[NFT_REG_VERDICT].verdict = NF_DROP;
+}
+EXPORT_SYMBOL_GPL(nft_reject_ipv4_eval);
+
+static struct nft_expr_type nft_reject_ipv4_type;
+static const struct nft_expr_ops nft_reject_ipv4_ops = {
+ .type = &nft_reject_ipv4_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_reject)),
+ .eval = nft_reject_ipv4_eval,
+ .init = nft_reject_init,
+ .dump = nft_reject_dump,
+};
+
+static struct nft_expr_type nft_reject_ipv4_type __read_mostly = {
+ .family = NFPROTO_IPV4,
+ .name = "reject",
+ .ops = &nft_reject_ipv4_ops,
+ .policy = nft_reject_policy,
+ .maxattr = NFTA_REJECT_MAX,
+ .owner = THIS_MODULE,
+};
+
+static int __init nft_reject_ipv4_module_init(void)
+{
+ return nft_register_expr(&nft_reject_ipv4_type);
+}
+
+static void __exit nft_reject_ipv4_module_exit(void)
+{
+ nft_unregister_expr(&nft_reject_ipv4_type);
+}
+
+module_init(nft_reject_ipv4_module_init);
+module_exit(nft_reject_ipv4_module_exit);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
+MODULE_ALIAS_NFT_AF_EXPR(AF_INET, "reject");