diff options
author | David S. Miller <davem@davemloft.net> | 2011-12-25 02:21:45 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2011-12-25 02:21:45 -0500 |
commit | c5e1fd8ccae09f574d6f978c90c2b968ee29030c (patch) | |
tree | e4485dc086ce76c4ff2ff551246255f5de0a250b /net | |
parent | 60b778ce519625102d3f72a2071ea72a05e990ce (diff) | |
parent | ceb98d03eac5704820f2ac1f370c9ff385e3a9f5 (diff) |
Merge branch 'nf-next' of git://1984.lsi.us.es/net-next
Diffstat (limited to 'net')
41 files changed, 1019 insertions, 294 deletions
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 46339ba7a2d..799fc790b3c 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -67,6 +67,7 @@ int fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res) return err; } +EXPORT_SYMBOL_GPL(fib_lookup); static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp, int flags, struct fib_lookup_arg *arg) diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 37b671185c8..d04b13ae18f 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1607,6 +1607,7 @@ found: rcu_read_unlock(); return ret; } +EXPORT_SYMBOL_GPL(fib_table_lookup); /* * Remove the leaf and return parent. diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index f19f2182894..7e1f5cdaf11 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -82,6 +82,16 @@ config IP_NF_MATCH_ECN To compile it as a module, choose M here. If unsure, say N. +config IP_NF_MATCH_RPFILTER + tristate '"rpfilter" reverse path filter match support' + depends on NETFILTER_ADVANCED + ---help--- + This option allows you to match packets whose replies would + go out via the interface the packet came in. + + To compile it as a module, choose M here. If unsure, say N. + The module will be called ipt_rpfilter. + config IP_NF_MATCH_TTL tristate '"ttl" match support' depends on NETFILTER_ADVANCED diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index dca2082ec68..123dd88cea5 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -50,6 +50,7 @@ obj-$(CONFIG_IP_NF_SECURITY) += iptable_security.o # matches obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o +obj-$(CONFIG_IP_NF_MATCH_RPFILTER) += ipt_rpfilter.o # targets obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index 9931152a78b..2f210c79dc8 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -30,9 +30,9 @@ MODULE_DESCRIPTION("Xtables: automatic-address SNAT"); /* FIXME: Multiple targets. --RR */ static int masquerade_tg_check(const struct xt_tgchk_param *par) { - const struct nf_nat_multi_range_compat *mr = par->targinfo; + const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; - if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) { + if (mr->range[0].flags & NF_NAT_RANGE_MAP_IPS) { pr_debug("bad MAP_IPS.\n"); return -EINVAL; } @@ -49,8 +49,8 @@ masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par) struct nf_conn *ct; struct nf_conn_nat *nat; enum ip_conntrack_info ctinfo; - struct nf_nat_range newrange; - const struct nf_nat_multi_range_compat *mr; + struct nf_nat_ipv4_range newrange; + const struct nf_nat_ipv4_multi_range_compat *mr; const struct rtable *rt; __be32 newsrc; @@ -79,13 +79,13 @@ masquerade_tg(struct sk_buff *skb, const struct xt_action_param *par) nat->masq_index = par->out->ifindex; /* Transfer from original range. */ - newrange = ((struct nf_nat_range) - { mr->range[0].flags | IP_NAT_RANGE_MAP_IPS, + newrange = ((struct nf_nat_ipv4_range) + { mr->range[0].flags | NF_NAT_RANGE_MAP_IPS, newsrc, newsrc, mr->range[0].min, mr->range[0].max }); /* Hand modified range to generic setup. */ - return nf_nat_setup_info(ct, &newrange, IP_NAT_MANIP_SRC); + return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC); } static int @@ -139,7 +139,7 @@ static struct xt_target masquerade_tg_reg __read_mostly = { .name = "MASQUERADE", .family = NFPROTO_IPV4, .target = masquerade_tg, - .targetsize = sizeof(struct nf_nat_multi_range_compat), + .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat), .table = "nat", .hooks = 1 << NF_INET_POST_ROUTING, .checkentry = masquerade_tg_check, diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c index 6cdb298f103..b5bfbbabf70 100644 --- a/net/ipv4/netfilter/ipt_NETMAP.c +++ b/net/ipv4/netfilter/ipt_NETMAP.c @@ -24,9 +24,9 @@ MODULE_DESCRIPTION("Xtables: 1:1 NAT mapping of IPv4 subnets"); static int netmap_tg_check(const struct xt_tgchk_param *par) { - const struct nf_nat_multi_range_compat *mr = par->targinfo; + const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; - if (!(mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)) { + if (!(mr->range[0].flags & NF_NAT_RANGE_MAP_IPS)) { pr_debug("bad MAP_IPS.\n"); return -EINVAL; } @@ -43,8 +43,8 @@ netmap_tg(struct sk_buff *skb, const struct xt_action_param *par) struct nf_conn *ct; enum ip_conntrack_info ctinfo; __be32 new_ip, netmask; - const struct nf_nat_multi_range_compat *mr = par->targinfo; - struct nf_nat_range newrange; + const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; + struct nf_nat_ipv4_range newrange; NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING || par->hooknum == NF_INET_POST_ROUTING || @@ -61,8 +61,8 @@ netmap_tg(struct sk_buff *skb, const struct xt_action_param *par) new_ip = ip_hdr(skb)->saddr & ~netmask; new_ip |= mr->range[0].min_ip & netmask; - newrange = ((struct nf_nat_range) - { mr->range[0].flags | IP_NAT_RANGE_MAP_IPS, + newrange = ((struct nf_nat_ipv4_range) + { mr->range[0].flags | NF_NAT_RANGE_MAP_IPS, new_ip, new_ip, mr->range[0].min, mr->range[0].max }); @@ -74,7 +74,7 @@ static struct xt_target netmap_tg_reg __read_mostly = { .name = "NETMAP", .family = NFPROTO_IPV4, .target = netmap_tg, - .targetsize = sizeof(struct nf_nat_multi_range_compat), + .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat), .table = "nat", .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_POST_ROUTING) | diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c index 18a0656505a..7c0103a5203 100644 --- a/net/ipv4/netfilter/ipt_REDIRECT.c +++ b/net/ipv4/netfilter/ipt_REDIRECT.c @@ -28,9 +28,9 @@ MODULE_DESCRIPTION("Xtables: Connection redirection to localhost"); /* FIXME: Take multiple ranges --RR */ static int redirect_tg_check(const struct xt_tgchk_param *par) { - const struct nf_nat_multi_range_compat *mr = par->targinfo; + const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; - if (mr->range[0].flags & IP_NAT_RANGE_MAP_IPS) { + if (mr->range[0].flags & NF_NAT_RANGE_MAP_IPS) { pr_debug("bad MAP_IPS.\n"); return -EINVAL; } @@ -47,8 +47,8 @@ redirect_tg(struct sk_buff *skb, const struct xt_action_param *par) struct nf_conn *ct; enum ip_conntrack_info ctinfo; __be32 newdst; - const struct nf_nat_multi_range_compat *mr = par->targinfo; - struct nf_nat_range newrange; + const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; + struct nf_nat_ipv4_range newrange; NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING || par->hooknum == NF_INET_LOCAL_OUT); @@ -76,20 +76,20 @@ redirect_tg(struct sk_buff *skb, const struct xt_action_param *par) } /* Transfer from original range. */ - newrange = ((struct nf_nat_range) - { mr->range[0].flags | IP_NAT_RANGE_MAP_IPS, + newrange = ((struct nf_nat_ipv4_range) + { mr->range[0].flags | NF_NAT_RANGE_MAP_IPS, newdst, newdst, mr->range[0].min, mr->range[0].max }); /* Hand modified range to generic setup. */ - return nf_nat_setup_info(ct, &newrange, IP_NAT_MANIP_DST); + return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_DST); } static struct xt_target redirect_tg_reg __read_mostly = { .name = "REDIRECT", .family = NFPROTO_IPV4, .target = redirect_tg, - .targetsize = sizeof(struct nf_nat_multi_range_compat), + .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat), .table = "nat", .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT), .checkentry = redirect_tg_check, diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c new file mode 100644 index 00000000000..31371be8174 --- /dev/null +++ b/net/ipv4/netfilter/ipt_rpfilter.c @@ -0,0 +1,141 @@ +/* + * Copyright (c) 2011 Florian Westphal <fw@strlen.de> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * based on fib_frontend.c; Author: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru> + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/netdevice.h> +#include <linux/ip.h> +#include <net/ip.h> +#include <net/ip_fib.h> +#include <net/route.h> + +#include <linux/netfilter/xt_rpfilter.h> +#include <linux/netfilter/x_tables.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Florian Westphal <fw@strlen.de>"); +MODULE_DESCRIPTION("iptables: ipv4 reverse path filter match"); + +/* don't try to find route from mcast/bcast/zeronet */ +static __be32 rpfilter_get_saddr(__be32 addr) +{ + if (ipv4_is_multicast(addr) || ipv4_is_lbcast(addr) || + ipv4_is_zeronet(addr)) + return 0; + return addr; +} + +static bool rpfilter_lookup_reverse(struct flowi4 *fl4, + const struct net_device *dev, u8 flags) +{ + struct fib_result res; + bool dev_match; + struct net *net = dev_net(dev); + int ret __maybe_unused; + + if (fib_lookup(net, fl4, &res)) + return false; + + if (res.type != RTN_UNICAST) { + if (res.type != RTN_LOCAL || !(flags & XT_RPFILTER_ACCEPT_LOCAL)) + return false; + } + dev_match = false; +#ifdef CONFIG_IP_ROUTE_MULTIPATH + for (ret = 0; ret < res.fi->fib_nhs; ret++) { + struct fib_nh *nh = &res.fi->fib_nh[ret]; + + if (nh->nh_dev == dev) { + dev_match = true; + break; + } + } +#else + if (FIB_RES_DEV(res) == dev) + dev_match = true; +#endif + if (dev_match || flags & XT_RPFILTER_LOOSE) + return FIB_RES_NH(res).nh_scope <= RT_SCOPE_HOST; + return dev_match; +} + +static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_rpfilter_info *info; + const struct iphdr *iph; + struct flowi4 flow; + bool invert; + + info = par->matchinfo; + invert = info->flags & XT_RPFILTER_INVERT; + + if (par->in->flags & IFF_LOOPBACK) + return true ^ invert; + + iph = ip_hdr(skb); + if (ipv4_is_multicast(iph->daddr)) { + if (ipv4_is_zeronet(iph->saddr)) + return ipv4_is_local_multicast(iph->daddr) ^ invert; + flow.flowi4_iif = 0; + } else { + flow.flowi4_iif = dev_net(par->in)->loopback_dev->ifindex; + } + + flow.daddr = iph->saddr; + flow.saddr = rpfilter_get_saddr(iph->daddr); + flow.flowi4_oif = 0; + flow.flowi4_mark = info->flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0; + flow.flowi4_tos = RT_TOS(iph->tos); + flow.flowi4_scope = RT_SCOPE_UNIVERSE; + + return rpfilter_lookup_reverse(&flow, par->in, info->flags) ^ invert; +} + +static int rpfilter_check(const struct xt_mtchk_param *par) +{ + const struct xt_rpfilter_info *info = par->matchinfo; + unsigned int options = ~XT_RPFILTER_OPTION_MASK; + if (info->flags & options) { + pr_info("unknown options encountered"); + return -EINVAL; + } + + if (strcmp(par->table, "mangle") != 0 && + strcmp(par->table, "raw") != 0) { + pr_info("match only valid in the \'raw\' " + "or \'mangle\' tables, not \'%s\'.\n", par->table); + return -EINVAL; + } + + return 0; +} + +static struct xt_match rpfilter_mt_reg __read_mostly = { + .name = "rpfilter", + .family = NFPROTO_IPV4, + .checkentry = rpfilter_check, + .match = rpfilter_mt, + .matchsize = sizeof(struct xt_rpfilter_info), + .hooks = (1 << NF_INET_PRE_ROUTING), + .me = THIS_MODULE +}; + +static int __init rpfilter_mt_init(void) +{ + return xt_register_match(&rpfilter_mt_reg); +} + +static void __exit rpfilter_mt_exit(void) +{ + xt_unregister_match(&rpfilter_mt_reg); +} + +module_init(rpfilter_mt_init); +module_exit(rpfilter_mt_exit); diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index 447bc5cfdc6..acdd002bb54 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -30,7 +30,6 @@ #include <net/netfilter/nf_nat_helper.h> #include <net/netfilter/nf_conntrack_helper.h> #include <net/netfilter/nf_conntrack_l3proto.h> -#include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_zones.h> static DEFINE_SPINLOCK(nf_nat_lock); @@ -57,7 +56,7 @@ hash_by_src(const struct net *net, u16 zone, /* Original src, to ensure we map it consistently if poss. */ hash = jhash_3words((__force u32)tuple->src.u3.ip, (__force u32)tuple->src.u.all ^ zone, - tuple->dst.protonum, 0); + tuple->dst.protonum, nf_conntrack_hash_rnd); return ((u64)hash * net->ipv4.nat_htable_size) >> 32; } @@ -82,14 +81,14 @@ EXPORT_SYMBOL(nf_nat_used_tuple); * that meet the constraints of range. */ static int in_range(const struct nf_conntrack_tuple *tuple, - const struct nf_nat_range *range) + const struct nf_nat_ipv4_range *range) { const struct nf_nat_protocol *proto; int ret = 0; /* If we are supposed to map IPs, then we must be in the range specified, otherwise let this drag us onto a new src IP. */ - if (range->flags & IP_NAT_RANGE_MAP_IPS) { + if (range->flags & NF_NAT_RANGE_MAP_IPS) { if (ntohl(tuple->src.u3.ip) < ntohl(range->min_ip) || ntohl(tuple->src.u3.ip) > ntohl(range->max_ip)) return 0; @@ -97,8 +96,8 @@ in_range(const struct nf_conntrack_tuple *tuple, rcu_read_lock(); proto = __nf_nat_proto_find(tuple->dst.protonum); - if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) || - proto->in_range(tuple, IP_NAT_MANIP_SRC, + if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) || + proto->in_range(tuple, NF_NAT_MANIP_SRC, &range->min, &range->max)) ret = 1; rcu_read_unlock(); @@ -123,7 +122,7 @@ static int find_appropriate_src(struct net *net, u16 zone, const struct nf_conntrack_tuple *tuple, struct nf_conntrack_tuple *result, - const struct nf_nat_range *range) + const struct nf_nat_ipv4_range *range) { unsigned int h = hash_by_src(net, zone, tuple); const struct nf_conn_nat *nat; @@ -157,7 +156,7 @@ find_appropriate_src(struct net *net, u16 zone, */ static void find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple, - const struct nf_nat_range *range, + const struct nf_nat_ipv4_range *range, const struct nf_conn *ct, enum nf_nat_manip_type maniptype) { @@ -166,10 +165,10 @@ find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple, u_int32_t minip, maxip, j; /* No IP mapping? Do nothing. */ - if (!(range->flags & IP_NAT_RANGE_MAP_IPS)) + if (!(range->flags & NF_NAT_RANGE_MAP_IPS)) return; - if (maniptype == IP_NAT_MANIP_SRC) + if (maniptype == NF_NAT_MANIP_SRC) var_ipp = &tuple->src.u3.ip; else var_ipp = &tuple->dst.u3.ip; @@ -189,7 +188,7 @@ find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple, minip = ntohl(range->min_ip); maxip = ntohl(range->max_ip); j = jhash_2words((__force u32)tuple->src.u3.ip, - range->flags & IP_NAT_RANGE_PERSISTENT ? + range->flags & NF_NAT_RANGE_PERSISTENT ? 0 : (__force u32)tuple->dst.u3.ip ^ zone, 0); j = ((u64)j * (maxip - minip + 1)) >> 32; *var_ipp = htonl(minip + j); @@ -204,7 +203,7 @@ find_best_ips_proto(u16 zone, struct nf_conntrack_tuple *tuple, static void get_unique_tuple(struct nf_conntrack_tuple *tuple, const struct nf_conntrack_tuple *orig_tuple, - const struct nf_nat_range *range, + const struct nf_nat_ipv4_range *range, struct nf_conn *ct, enum nf_nat_manip_type maniptype) { @@ -219,8 +218,8 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, This is only required for source (ie. NAT/masq) mappings. So far, we don't do local source mappings, so multiple manips not an issue. */ - if (maniptype == IP_NAT_MANIP_SRC && - !(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) { + if (maniptype == NF_NAT_MANIP_SRC && + !(range->flags & NF_NAT_RANGE_PROTO_RANDOM)) { /* try the original tuple first */ if (in_range(orig_tuple, range)) { if (!nf_nat_used_tuple(orig_tuple, ct)) { @@ -247,8 +246,8 @@ get_unique_tuple(struct nf_conntrack_tuple *tuple, proto = __nf_nat_proto_find(orig_tuple->dst.protonum); /* Only bother mapping if it's not already in range and unique */ - if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) { - if (range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) { + if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM)) { + if (range->flags & NF_NAT_RANGE_PROTO_SPECIFIED) { if (proto->in_range(tuple, maniptype, &range->min, &range->max) && (range->min.all == range->max.all || @@ -267,7 +266,7 @@ out: unsigned int nf_nat_setup_info(struct nf_conn *ct, - const struct nf_nat_range *range, + const struct nf_nat_ipv4_range *range, enum nf_nat_manip_type maniptype) { struct net *net = nf_ct_net(ct); @@ -284,8 +283,8 @@ nf_nat_setup_info(struct nf_conn *ct, } } - NF_CT_ASSERT(maniptype == IP_NAT_MANIP_SRC || - maniptype == IP_NAT_MANIP_DST); + NF_CT_ASSERT(maniptype == NF_NAT_MANIP_SRC || + maniptype == NF_NAT_MANIP_DST); BUG_ON(nf_nat_initialized(ct, maniptype)); /* What we've got will look like inverse of reply. Normally @@ -306,13 +305,13 @@ nf_nat_setup_info(struct nf_conn *ct, nf_conntrack_alter_reply(ct, &reply); /* Non-atomic: we own this at the moment. */ - if (maniptype == IP_NAT_MANIP_SRC) + if (maniptype == NF_NAT_MANIP_SRC) ct->status |= IPS_SRC_NAT; else ct->status |= IPS_DST_NAT; } - if (maniptype == IP_NAT_MANIP_SRC) { + if (maniptype == NF_NAT_MANIP_SRC) { unsigned int srchash; srchash = hash_by_src(net, nf_ct_zone(ct), @@ -327,7 +326,7 @@ nf_nat_setup_info(struct nf_conn *ct, } /* It's done. */ - if (maniptype == IP_NAT_MANIP_DST) + if (maniptype == NF_NAT_MANIP_DST) ct->status |= IPS_DST_NAT_DONE; else ct->status |= IPS_SRC_NAT_DONE; @@ -361,7 +360,7 @@ manip_pkt(u_int16_t proto, iph = (void *)skb->data + iphdroff; - if (maniptype == IP_NAT_MANIP_SRC) { + if (maniptype == NF_NAT_MANIP_SRC) { csum_replace4(&iph->check, iph->saddr, target->src.u3.ip); iph->saddr = target->src.u3.ip; } else { @@ -381,7 +380,7 @@ unsigned int nf_nat_packet(struct nf_conn *ct, unsigned long statusbit; enum nf_nat_manip_type mtype = HOOK2MANIP(hooknum); - if (mtype == IP_NAT_MANIP_SRC) + if (mtype == NF_NAT_MANIP_SRC) statusbit = IPS_SRC_NAT; else statusbit = IPS_DST_NAT; @@ -414,8 +413,7 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct, struct icmphdr icmp; struct iphdr ip; } *inside; - const struct nf_conntrack_l4proto *l4proto; - struct nf_conntrack_tuple inner, target; + struct nf_conntrack_tuple target; int hdrlen = ip_hdrlen(skb); enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); unsigned long statusbit; @@ -447,7 +445,7 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct, return 0; } - if (manip == IP_NAT_MANIP_SRC) + if (manip == NF_NAT_MANIP_SRC) statusbit = IPS_SRC_NAT; else statusbit = IPS_DST_NAT; @@ -463,16 +461,6 @@ int nf_nat_icmp_reply_translation(struct nf_conn *ct, "dir %s\n", skb, manip, dir == IP_CT_DIR_ORIGINAL ? "ORIG" : "REPLY"); - /* rcu_read_lock()ed by nf_hook_slow */ - l4proto = __nf_ct_l4proto_find(PF_INET, inside->ip.protocol); - - if (!nf_ct_get_tuple(skb, hdrlen + sizeof(struct icmphdr), - (hdrlen + - sizeof(struct icmphdr) + inside->ip.ihl * 4), - (u_int16_t)AF_INET, inside->ip.protocol, - &inner, l3proto, l4proto)) - return 0; - /* Change inner back to look like incoming packet. We do the opposite manip on this hook to normal, because it might not pass all hooks (locally-generated ICMP). Consider incoming @@ -575,26 +563,6 @@ static struct nf_ct_ext_type nat_extend __read_mostly = { #include <linux/netfilter/nfnetlink.h> #include <linux/netfilter/nfnetlink_conntrack.h> -static const struct nf_nat_protocol * -nf_nat_proto_find_get(u_int8_t protonum) -{ - const struct nf_nat_protocol *p; - - rcu_read_lock(); - p = __nf_nat_proto_find(protonum); - if (!try_module_get(p->me)) - p = &nf_nat_unknown_protocol; - rcu_read_unlock(); - - return p; -} - -static void -nf_nat_proto_put(const struct nf_nat_protocol *p) -{ - module_put(p->me); -} - static const struct nla_policy protonat_nla_policy[CTA_PROTONAT_MAX+1] = { [CTA_PROTONAT_PORT_MIN] = { .type = NLA_U16 }, [CTA_PROTONAT_PORT_MAX] = { .type = NLA_U16 }, @@ -602,7 +570,7 @@ static const struct nla_policy protonat_nla_policy[CTA_PROTONAT_MAX+1] = { static int nfnetlink_parse_nat_proto(struct nlattr *attr, const struct nf_conn *ct, - struct nf_nat_range *range) + struct nf_nat_ipv4_range *range) { struct nlattr *tb[CTA_PROTONAT_MAX+1]; const struct nf_nat_protocol *npt; @@ -612,21 +580,23 @@ static int nfnetlink_parse_nat_proto(struct nlattr *attr, if (err < 0) return err; - npt = nf_nat_proto_find_get(nf_ct_protonum(ct)); + rcu_read_lock(); + npt = __nf_nat_proto_find(nf_ct_protonum(ct)); if (npt->nlattr_to_range) err = npt->nlattr_to_range(tb, range); - nf_nat_proto_put(npt); + rcu_read_unlock(); return err; } static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = { [CTA_NAT_MINIP] = { .type = NLA_U32 }, [CTA_NAT_MAXIP] = { .type = NLA_U32 }, + [CTA_NAT_PROTO] = { .type = NLA_NESTED }, }; static int nfnetlink_parse_nat(const struct nlattr *nat, - const struct nf_conn *ct, struct nf_nat_range *range) + const struct nf_conn *ct, struct nf_nat_ipv4_range *range) { struct nlattr *tb[CTA_NAT_MAX+1]; int err; @@ -646,7 +616,7 @@ nfnetlink_parse_nat(const struct nlattr *nat, range->max_ip = nla_get_be32(tb[CTA_NAT_MAXIP]); if (range->min_ip) - range->flags |= IP_NAT_RANGE_MAP_IPS; + range->flags |= NF_NAT_RANGE_MAP_IPS; if (!tb[CTA_NAT_PROTO]) return 0; @@ -663,7 +633,7 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct, enum nf_nat_manip_type manip, const struct nlattr *attr) { - struct nf_nat_range range; + struct nf_nat_ipv4_range range; if (nfnetlink_parse_nat(attr, ct, &range) < 0) return -EINVAL; diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c index b9a1136addb..dc1dd912baf 100644 --- a/net/ipv4/netfilter/nf_nat_h323.c +++ b/net/ipv4/netfilter/nf_nat_h323.c @@ -398,7 +398,7 @@ static int nat_h245(struct sk_buff *skb, struct nf_conn *ct, static void ip_nat_q931_expect(struct nf_conn *new, struct nf_conntrack_expect *this) { - struct nf_nat_range range; + struct nf_nat_ipv4_range range; if (this->tuple.src.u3.ip != 0) { /* Only accept calls from GK */ nf_nat_follow_master(new, this); @@ -409,16 +409,16 @@ static void ip_nat_q931_expect(struct nf_conn *new, BUG_ON(new->status & IPS_NAT_DONE_MASK); /* Change src to where master sends to */ - range.flags = IP_NAT_RANGE_MAP_IPS; + range.flags = NF_NAT_RANGE_MAP_IPS; range.min_ip = range.max_ip = new->tuplehash[!this->dir].tuple.src.u3.ip; - nf_nat_setup_info(new, &range, IP_NAT_MANIP_SRC); + nf_nat_setup_info(new, &range, NF_NAT_MANIP_SRC); /* For DST manip, map port here to where it's expected. */ - range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED); + range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED); range.min = range.max = this->saved_proto; range.min_ip = range.max_ip = new->master->tuplehash[!this->dir].tuple.src.u3.ip; - nf_nat_setup_info(new, &range, IP_NAT_MANIP_DST); + nf_nat_setup_info(new, &range, NF_NAT_MANIP_DST); } /****************************************************************************/ @@ -496,21 +496,21 @@ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct, static void ip_nat_callforwarding_expect(struct nf_conn *new, struct nf_conntrack_expect *this) { - struct nf_nat_range range; + struct nf_nat_ipv4_range range; /* This must be a fresh one. */ BUG_ON(new->status & IPS_NAT_DONE_MASK); /* Change src to where master sends to */ - range.flags = IP_NAT_RANGE_MAP_IPS; + range.flags = NF_NAT_RANGE_MAP_IPS; range.min_ip = range.max_ip = new->tuplehash[!this->dir].tuple.src.u3.ip; - nf_nat_setup_info(new, &range, IP_NAT_MANIP_SRC); + nf_nat_setup_info(new, &range, NF_NAT_MANIP_SRC); /* For DST manip, map port here to where it's expected. */ - range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED); + range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED); range.min = range.max = this->saved_proto; range.min_ip = range.max_ip = this->saved_ip; - nf_nat_setup_info(new, &range, IP_NAT_MANIP_DST); + nf_nat_setup_info(new, &range, NF_NAT_MANIP_DST); } /****************************************************************************/ diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c index ebc5f8894f9..af65958f630 100644 --- a/net/ipv4/netfilter/nf_nat_helper.c +++ b/net/ipv4/netfilter/nf_nat_helper.c @@ -253,12 +253,6 @@ nf_nat_mangle_udp_packet(struct sk_buff *skb, struct udphdr *udph; int datalen, oldlen; - /* UDP helpers might accidentally mangle the wrong packet */ - iph = ip_hdr(skb); - if (skb->len < iph->ihl*4 + sizeof(*udph) + - match_offset + match_len) - return 0; - if (!skb_make_writable(skb, skb->len)) return 0; @@ -430,22 +424,22 @@ nf_nat_seq_adjust(struct sk_buff *skb, void nf_nat_follow_master(struct nf_conn *ct, struct nf_conntrack_expect *exp) { - struct nf_nat_range range; + struct nf_nat_ipv4_range range; /* This must be a fresh one. */ BUG_ON(ct->status & IPS_NAT_DONE_MASK); /* Change src to where master sends to */ - range.flags = IP_NAT_RANGE_MAP_IPS; + range.flags = NF_NAT_RANGE_MAP_IPS; range.min_ip = range.max_ip = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip; - nf_nat_setup_info(ct, &range, IP_NAT_MANIP_SRC); + nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC); /* For DST manip, map port here to where it's expected. */ - range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED); + range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED); range.min = range.max = exp->saved_proto; range.min_ip = range.max_ip = ct->master->tuplehash[!exp->dir].tuple.src.u3.ip; - nf_nat_setup_info(ct, &range, IP_NAT_MANIP_DST); + nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST); } EXPORT_SYMBOL(nf_nat_follow_master); diff --git a/net/ipv4/netfilter/nf_nat_pptp.c b/net/ipv4/netfilter/nf_nat_pptp.c index 3e8284ba46b..c273d58980a 100644 --- a/net/ipv4/netfilter/nf_nat_pptp.c +++ b/net/ipv4/netfilter/nf_nat_pptp.c @@ -47,7 +47,7 @@ static void pptp_nat_expected(struct nf_conn *ct, struct nf_conntrack_tuple t; const struct nf_ct_pptp_master *ct_pptp_info; const struct nf_nat_pptp *nat_pptp_info; - struct nf_nat_range range; + struct nf_nat_ipv4_range range; ct_pptp_info = &nfct_help(master)->help.ct_pptp_info; nat_pptp_info = &nfct_nat(master)->help.nat_pptp_info; @@ -88,24 +88,24 @@ static void pptp_nat_expected(struct nf_conn *ct, BUG_ON(ct->status & IPS_NAT_DONE_MASK); /* Change src to where master sends to */ - range.flags = IP_NAT_RANGE_MAP_IPS; + range.flags = NF_NAT_RANGE_MAP_IPS; range.min_ip = range.max_ip = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip; if (exp->dir == IP_CT_DIR_ORIGINAL) { - range.flags |= IP_NAT_RANGE_PROTO_SPECIFIED; + range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; range.min = range.max = exp->saved_proto; } - nf_nat_setup_info(ct, &range, IP_NAT_MANIP_SRC); + nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC); /* For DST manip, map port here to where it's expected. */ - range.flags = IP_NAT_RANGE_MAP_IPS; + range.flags = NF_NAT_RANGE_MAP_IPS; range.min_ip = range.max_ip = ct->master->tuplehash[!exp->dir].tuple.src.u3.ip; if (exp->dir == IP_CT_DIR_REPLY) { - range.flags |= IP_NAT_RANGE_PROTO_SPECIFIED; + range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; range.min = range.max = exp->saved_proto; } - nf_nat_setup_info(ct, &range, IP_NAT_MANIP_DST); + nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST); } /* outbound packets == from PNS to PAC */ diff --git a/net/ipv4/netfilter/nf_nat_proto_common.c b/net/ipv4/netfilter/nf_nat_proto_common.c index a3d99761860..9993bc93e10 100644 --- a/net/ipv4/netfilter/nf_nat_proto_common.c +++ b/net/ipv4/netfilter/nf_nat_proto_common.c @@ -26,7 +26,7 @@ bool nf_nat_proto_in_range(const struct nf_conntrack_tuple *tuple, { __be16 port; - if (maniptype == IP_NAT_MANIP_SRC) + if (maniptype == NF_NAT_MANIP_SRC) port = tuple->src.u.all; else port = tuple->dst.u.all; @@ -37,7 +37,7 @@ bool nf_nat_proto_in_range(const struct nf_conntrack_tuple *tuple, EXPORT_SYMBOL_GPL(nf_nat_proto_in_range); void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple, - const struct nf_nat_range *range, + const struct nf_nat_ipv4_range *range, enum nf_nat_manip_type maniptype, const struct nf_conn *ct, u_int16_t *rover) @@ -46,15 +46,15 @@ void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple, __be16 *portptr; u_int16_t off; - if (maniptype == IP_NAT_MANIP_SRC) + if (maniptype == NF_NAT_MANIP_SRC) portptr = &tuple->src.u.all; else portptr = &tuple->dst.u.all; /* If no range specified... */ - if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) { + if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) { /* If it's dst rewrite, can't change port */ - if (maniptype == IP_NAT_MANIP_DST) + if (maniptype == NF_NAT_MANIP_DST) return; if (ntohs(*portptr) < 1024) { @@ -75,9 +75,9 @@ void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple, range_size = ntohs(range->max.all) - min + 1; } - if (range->flags & IP_NAT_RANGE_PROTO_RANDOM) + if (range->flags & NF_NAT_RANGE_PROTO_RANDOM) off = secure_ipv4_port_ephemeral(tuple->src.u3.ip, tuple->dst.u3.ip, - maniptype == IP_NAT_MANIP_SRC + maniptype == NF_NAT_MANIP_SRC ? tuple->dst.u.all : tuple->src.u.all); else @@ -87,7 +87,7 @@ void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple, *portptr = htons(min + off % range_size); if (++i != range_size && nf_nat_used_tuple(tuple, ct)) continue; - if (!(range->flags & IP_NAT_RANGE_PROTO_RANDOM)) + if (!(range->flags & NF_NAT_RANGE_PROTO_RANDOM)) *rover = off; return; } @@ -96,31 +96,19 @@ void nf_nat_proto_unique_tuple(struct nf_conntrack_tuple *tuple, EXPORT_SYMBOL_GPL(nf_nat_proto_unique_tuple); #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) -int nf_nat_proto_range_to_nlattr(struct sk_buff *skb, - const struct nf_nat_range *range) -{ - NLA_PUT_BE16(skb, CTA_PROTONAT_PORT_MIN, range->min.all); - NLA_PUT_BE16(skb, CTA_PROTONAT_PORT_MAX, range->max.all); - return 0; - -nla_put_failure: - return -1; -} -EXPORT_SYMBOL_GPL(nf_nat_proto_nlattr_to_range); - int nf_nat_proto_nlattr_to_range(struct nlattr *tb[], - struct nf_nat_range *range) + struct nf_nat_ipv4_range *range) { if (tb[CTA_PROTONAT_PORT_MIN]) { range->min.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MIN]); range->max.all = range->min.tcp.port; - range->flags |= IP_NAT_RANGE_PROTO_SPECIFIED; + range->flags |= NF_NAT_RANGE_PROTO_SPECIFIED; } if (tb[CTA_PROTONAT_PORT_MAX]) { range->max.all = nla_get_be16(tb[CTA_PROTONAT_PORT_MAX]); - range->flags |= IP_NAT_RANGE_PROTO_SPECIFIED; + range->flags |= NF_NAT_RANGE_PROTO_SPECIFIED; } return 0; } -EXPORT_SYMBOL_GPL(nf_nat_proto_range_to_nlattr); +EXPORT_SYMBOL_GPL(nf_nat_proto_nlattr_to_range); #endif diff --git a/net/ipv4/netfilter/nf_nat_proto_dccp.c b/net/ipv4/netfilter/nf_nat_proto_dccp.c index 570faf2667b..3f67138d187 100644 --- a/net/ipv4/netfilter/nf_nat_proto_dccp.c +++ b/net/ipv4/netfilter/nf_nat_proto_dccp.c @@ -24,7 +24,7 @@ static u_int16_t dccp_port_rover; static void dccp_unique_tuple(struct nf_conntrack_tuple *tuple, - const struct nf_nat_range *range, + const struct nf_nat_ipv4_range *range, enum nf_nat_manip_type maniptype, const struct nf_conn *ct) { @@ -54,7 +54,7 @@ dccp_manip_pkt(struct sk_buff *skb, iph = (struct iphdr *)(skb->data + iphdroff); hdr = (struct dccp_hdr *)(skb->data + hdroff); - if (maniptype == IP_NAT_MANIP_SRC) { + if (maniptype == NF_NAT_MANIP_SRC) { oldip = iph->saddr; newip = tuple->src.u3.ip; newport = tuple->src.u.dccp.port; @@ -80,12 +80,10 @@ dccp_manip_pkt(struct sk_buff *skb, static const struct nf_nat_protocol nf_nat_protocol_dccp = { .protonum = IPPROTO_DCCP, - .me = THIS_MODULE, .manip_pkt = dccp_manip_pkt, .in_range = nf_nat_proto_in_range, .unique_tuple = dccp_unique_tuple, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) - .range_to_nlattr = nf_nat_proto_range_to_nlattr, .nlattr_to_range = nf_nat_proto_nlattr_to_range, #endif }; diff --git a/net/ipv4/netfilter/nf_nat_proto_gre.c b/net/ipv4/netfilter/nf_nat_proto_gre.c index bc8d83a31c7..46ba0b9ab98 100644 --- a/net/ipv4/netfilter/nf_nat_proto_gre.c +++ b/net/ipv4/netfilter/nf_nat_proto_gre.c @@ -39,7 +39,7 @@ MODULE_DESCRIPTION("Netfilter NAT protocol helper module for GRE"); /* generate unique tuple ... */ static void gre_unique_tuple(struct nf_conntrack_tuple *tuple, - const struct nf_nat_range *range, + const struct nf_nat_ipv4_range *range, enum nf_nat_manip_type maniptype, const struct nf_conn *ct) { @@ -52,12 +52,12 @@ gre_unique_tuple(struct nf_conntrack_tuple *tuple, if (!ct->master) return; - if (maniptype == IP_NAT_MANIP_SRC) + if (maniptype == NF_NAT_MANIP_SRC) keyptr = &tuple->src.u.gre.key; else keyptr = &tuple->dst.u.gre.key; - if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) { + if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) { pr_debug("%p: NATing GRE PPTP\n", ct); min = 1; range_size = 0xffff; @@ -99,7 +99,7 @@ gre_manip_pkt(struct sk_buff *skb, unsigned int iphdroff, /* we only have destination manip of a packet, since 'source key' * is not present in the packet itself */ - if (maniptype != IP_NAT_MANIP_DST) + if (maniptype != NF_NAT_MANIP_DST) return true; switch (greh->version) { case GRE_VERSION_1701: @@ -119,12 +119,10 @@ gre_manip_pkt(struct sk_buff *skb, unsigned int iphdroff, static const struct nf_nat_protocol gre = { .protonum = IPPROTO_GRE, - .me = THIS_MODULE, .manip_pkt = gre_manip_pkt, .in_range = nf_nat_proto_in_range, .unique_tuple = gre_unique_tuple, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) - .range_to_nlattr = nf_nat_proto_range_to_nlattr, .nlattr_to_range = nf_nat_proto_nlattr_to_range, #endif }; diff --git a/net/ipv4/netfilter/nf_nat_proto_icmp.c b/net/ipv4/netfilter/nf_nat_proto_icmp.c index 9f4dc1235dc..b35172851ba 100644 --- a/net/ipv4/netfilter/nf_nat_proto_icmp.c +++ b/net/ipv4/netfilter/nf_nat_proto_icmp.c @@ -30,7 +30,7 @@ icmp_in_range(const struct nf_conntrack_tuple *tuple, static void icmp_unique_tuple(struct nf_conntrack_tuple *tuple, - const struct nf_nat_range *range, + const struct nf_nat_ipv4_range *range, enum nf_nat_manip_type maniptype, const struct nf_conn *ct) { @@ -40,7 +40,7 @@ icmp_unique_tuple(struct nf_conntrack_tuple *tuple, range_size = ntohs(range->max.icmp.id) - ntohs(range->min.icmp.id) + 1; /* If no range specified... */ - if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED)) + if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) range_size = 0xFFFF; for (i = 0; ; ++id) { @@ -74,12 +74,10 @@ icmp_manip_pkt(struct sk_buff *skb, const struct nf_nat_protocol nf_nat_protocol_icmp = { .protonum = IPPROTO_ICMP, - .me = THIS_MODULE, .manip_pkt = icmp_manip_pkt, .in_range = icmp_in_range, .unique_tuple = icmp_unique_tuple, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) - .range_to_nlattr = nf_nat_proto_range_to_nlattr, .nlattr_to_range = nf_nat_proto_nlattr_to_range, #endif }; diff --git a/net/ipv4/netfilter/nf_nat_proto_sctp.c b/net/ipv4/netfilter/nf_nat_proto_sctp.c index bd5a80a62a5..3cce9b6c1c2 100644 --- a/net/ipv4/netfilter/nf_nat_proto_sctp.c +++ b/net/ipv4/netfilter/nf_nat_proto_sctp.c @@ -19,7 +19,7 @@ static u_int16_t nf_sctp_port_rover; static void sctp_unique_tuple(struct nf_conntrack_tuple *tuple, - const struct nf_nat_range *range, + const struct nf_nat_ipv4_range *range, enum nf_nat_manip_type maniptype, const struct nf_conn *ct) { @@ -46,7 +46,7 @@ sctp_manip_pkt(struct sk_buff *skb, iph = (struct iphdr *)(skb->data + iphdroff); hdr = (struct sctphdr *)(skb->data + hdroff); - if (maniptype == IP_NAT_MANIP_SRC) { + if (maniptype == NF_NAT_MANIP_SRC) { /* Get rid of src ip and src pt */ oldip = iph->saddr; newip = tuple->src.u3.ip; @@ -70,12 +70,10 @@ sctp_manip_pkt(struct sk_buff *skb, static const struct nf_nat_protocol nf_nat_protocol_sctp = { .protonum = IPPROTO_SCTP, - .me = THIS_MODULE, .manip_pkt = sctp_manip_pkt, .in_range = nf_nat_proto_in_range, .unique_tuple = sctp_unique_tuple, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) - .range_to_nlattr = nf_nat_proto_range_to_nlattr, .nlattr_to_range = nf_nat_proto_nlattr_to_range, #endif }; diff --git a/net/ipv4/netfilter/nf_nat_proto_tcp.c b/net/ipv4/netfilter/nf_nat_proto_tcp.c index 0d67bb80130..9fb4b4e72bb 100644 --- a/net/ipv4/netfilter/nf_nat_proto_tcp.c +++ b/net/ipv4/netfilter/nf_nat_proto_tcp.c @@ -23,7 +23,7 @@ static u_int16_t tcp_port_rover; static void tcp_unique_tuple(struct nf_conntrack_tuple *tuple, - const struct nf_nat_range *range, + const struct nf_nat_ipv4_range *range, enum nf_nat_manip_type maniptype, const struct nf_conn *ct) { @@ -55,7 +55,7 @@ tcp_manip_pkt(struct sk_buff *skb, iph = (struct iphdr *)(skb->data + iphdroff); hdr = (struct tcphdr *)(skb->data + hdroff); - if (maniptype == IP_NAT_MANIP_SRC) { + if (maniptype == NF_NAT_MANIP_SRC) { /* Get rid of src ip and src pt */ oldip = iph->saddr; newip = tuple->src.u3.ip; @@ -82,12 +82,10 @@ tcp_manip_pkt(struct sk_buff *skb, const struct nf_nat_protocol nf_nat_protocol_tcp = { .protonum = IPPROTO_TCP, - .me = THIS_MODULE, .manip_pkt = tcp_manip_pkt, .in_range = nf_nat_proto_in_range, .unique_tuple = tcp_unique_tuple, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) - .range_to_nlattr = nf_nat_proto_range_to_nlattr, .nlattr_to_range = nf_nat_proto_nlattr_to_range, #endif }; diff --git a/net/ipv4/netfilter/nf_nat_proto_udp.c b/net/ipv4/netfilter/nf_nat_proto_udp.c index 0b1b8601cba..9883336e628 100644 --- a/net/ipv4/netfilter/nf_nat_proto_udp.c +++ b/net/ipv4/netfilter/nf_nat_proto_udp.c @@ -22,7 +22,7 @@ static u_int16_t udp_port_rover; static void udp_unique_tuple(struct nf_conntrack_tuple *tuple, - const struct nf_nat_range *range, + const struct nf_nat_ipv4_range *range, enum nf_nat_manip_type maniptype, const struct nf_conn *ct) { @@ -47,7 +47,7 @@ udp_manip_pkt(struct sk_buff *skb, iph = (struct iphdr *)(skb->data + iphdroff); hdr = (struct udphdr *)(skb->data + hdroff); - if (maniptype == IP_NAT_MANIP_SRC) { + if (maniptype == NF_NAT_MANIP_SRC) { /* Get rid of src ip and src pt */ oldip = iph->saddr; newip = tuple->src.u3.ip; @@ -73,12 +73,10 @@ udp_manip_pkt(struct sk_buff *skb, const struct nf_nat_protocol nf_nat_protocol_udp = { .protonum = IPPROTO_UDP, - .me = THIS_MODULE, .manip_pkt = udp_manip_pkt, .in_range = nf_nat_proto_in_range, .unique_tuple = udp_unique_tuple, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) - .range_to_nlattr = nf_nat_proto_range_to_nlattr, .nlattr_to_range = nf_nat_proto_nlattr_to_range, #endif }; diff --git a/net/ipv4/netfilter/nf_nat_proto_udplite.c b/net/ipv4/netfilter/nf_nat_proto_udplite.c index f83ef23e2ab..d24d10a7beb 100644 --- a/net/ipv4/netfilter/nf_nat_proto_udplite.c +++ b/net/ipv4/netfilter/nf_nat_proto_udplite.c @@ -21,7 +21,7 @@ static u_int16_t udplite_port_rover; static void udplite_unique_tuple(struct nf_conntrack_tuple *tuple, - const struct nf_nat_range *range, + const struct nf_nat_ipv4_range *range, enum nf_nat_manip_type maniptype, const struct nf_conn *ct) { @@ -47,7 +47,7 @@ udplite_manip_pkt(struct sk_buff *skb, iph = (struct iphdr *)(skb->data + iphdroff); hdr = (struct udphdr *)(skb->data + hdroff); - if (maniptype == IP_NAT_MANIP_SRC) { + if (maniptype == NF_NAT_MANIP_SRC) { /* Get rid of src ip and src pt */ oldip = iph->saddr; newip = tuple->src.u3.ip; @@ -72,12 +72,10 @@ udplite_manip_pkt(struct sk_buff *skb, static const struct nf_nat_protocol nf_nat_protocol_udplite = { .protonum = IPPROTO_UDPLITE, - .me = THIS_MODULE, .manip_pkt = udplite_manip_pkt, .in_range = nf_nat_proto_in_range, .unique_tuple = udplite_unique_tuple, #if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) - .range_to_nlattr = nf_nat_proto_range_to_nlattr, .nlattr_to_range = nf_nat_proto_nlattr_to_range, #endif }; diff --git a/net/ipv4/netfilter/nf_nat_proto_unknown.c b/net/ipv4/netfilter/nf_nat_proto_unknown.c index a50f2bc1c73..e0afe8112b1 100644 --- a/net/ipv4/netfilter/nf_nat_proto_unknown.c +++ b/net/ipv4/netfilter/nf_nat_proto_unknown.c @@ -27,7 +27,7 @@ static bool unknown_in_range(const struct nf_conntrack_tuple *tuple, } static void unknown_unique_tuple(struct nf_conntrack_tuple *tuple, - const struct nf_nat_range *range, + const struct nf_nat_ipv4_range *range, enum nf_nat_manip_type maniptype, const struct nf_conn *ct) { @@ -46,7 +46,6 @@ unknown_manip_pkt(struct sk_buff *skb, } const struct nf_nat_protocol nf_nat_unknown_protocol = { - /* .me isn't set: getting a ref to this cannot fail. */ .manip_pkt = unknown_manip_pkt, .in_range = unknown_in_range, .unique_tuple = unknown_unique_tuple, diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c index 733c9abc1cb..d2a9dc314e0 100644 --- a/net/ipv4/netfilter/nf_nat_rule.c +++ b/net/ipv4/netfilter/nf_nat_rule.c @@ -44,7 +44,7 @@ ipt_snat_target(struct sk_buff *skb, const struct xt_action_param *par) { struct nf_conn *ct; enum ip_conntrack_info ctinfo; - const struct nf_nat_multi_range_compat *mr = par->targinfo; + const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING || par->hooknum == NF_INET_LOCAL_IN); @@ -56,7 +56,7 @@ ipt_snat_target(struct sk_buff *skb, const struct xt_action_param *par) ctinfo == IP_CT_RELATED_REPLY)); NF_CT_ASSERT(par->out != NULL); - return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_SRC); + return nf_nat_setup_info(ct, &mr->range[0], NF_NAT_MANIP_SRC); } static unsigned int @@ -64,7 +64,7 @@ ipt_dnat_target(struct sk_buff *skb, const struct xt_action_param *par) { struct nf_conn *ct; enum ip_conntrack_info ctinfo; - const struct nf_nat_multi_range_compat *mr = par->targinfo; + const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING || par->hooknum == NF_INET_LOCAL_OUT); @@ -74,12 +74,12 @@ ipt_dnat_target(struct sk_buff *skb, const struct xt_action_param *par) /* Connection must be valid and new. */ NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)); - return nf_nat_setup_info(ct, &mr->range[0], IP_NAT_MANIP_DST); + return nf_nat_setup_info(ct, &mr->range[0], NF_NAT_MANIP_DST); } static int ipt_snat_checkentry(const struct xt_tgchk_param *par) { - const struct nf_nat_multi_range_compat *mr = par->targinfo; + const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; /* Must be a valid range */ if (mr->rangesize != 1) { @@ -91,7 +91,7 @@ static int ipt_snat_checkentry(const struct xt_tgchk_param *par) static int ipt_dnat_checkentry(const struct xt_tgchk_param *par) { - const struct nf_nat_multi_range_compat *mr = par->targinfo; + const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; /* Must be a valid range */ if (mr->rangesize != 1) { @@ -105,13 +105,13 @@ static unsigned int alloc_null_binding(struct nf_conn *ct, unsigned int hooknum) { /* Force range to this IP; let proto decide mapping for - per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED). + per-proto parts (hence not NF_NAT_RANGE_PROTO_SPECIFIED). */ - struct nf_nat_range range; + struct nf_nat_ipv4_range range; range.flags = 0; pr_debug("Allocating NULL binding for %p (%pI4)\n", ct, - HOOK2MANIP(hooknum) == IP_NAT_MANIP_SRC ? + HOOK2MANIP(hooknum) == NF_NAT_MANIP_SRC ? &ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip : &ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip); @@ -140,7 +140,7 @@ int nf_nat_rule_find(struct sk_buff *skb, static struct xt_target ipt_snat_reg __read_mostly = { .name = "SNAT", .target = ipt_snat_target, - .targetsize = sizeof(struct nf_nat_multi_range_compat), + .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat), .table = "nat", .hooks = (1 << NF_INET_POST_ROUTING) | (1 << NF_INET_LOCAL_IN), .checkentry = ipt_snat_checkentry, @@ -150,7 +150,7 @@ static struct xt_target ipt_snat_reg __read_mostly = { static struct xt_target ipt_dnat_reg __read_mostly = { .name = "DNAT", .target = ipt_dnat_target, - .targetsize = sizeof(struct nf_nat_multi_range_compat), + .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat), .table = "nat", .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT), .checkentry = ipt_dnat_checkentry, diff --git a/net/ipv4/netfilter/nf_nat_sip.c b/net/ipv4/netfilter/nf_nat_sip.c index 78844d9208f..d0319f96269 100644 --- a/net/ipv4/netfilter/nf_nat_sip.c +++ b/net/ipv4/netfilter/nf_nat_sip.c @@ -249,25 +249,25 @@ static void ip_nat_sip_seq_adjust(struct sk_buff *skb, s16 off) static void ip_nat_sip_expected(struct nf_conn *ct, struct nf_conntrack_expect *exp) { - struct nf_nat_range range; + struct nf_nat_ipv4_range range; /* This must be a fresh one. */ BUG_ON(ct->status & IPS_NAT_DONE_MASK); /* For DST manip, map port here to where it's expected. */ - range.flags = (IP_NAT_RANGE_MAP_IPS | IP_NAT_RANGE_PROTO_SPECIFIED); + range.flags = (NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED); range.min = range.max = exp->saved_proto; range.min_ip = range.max_ip = exp->saved_ip; - nf_nat_setup_info(ct, &range, IP_NAT_MANIP_DST); + nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST); /* Change src to where master sends to, but only if the connection * actually came from the same source. */ if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip == ct->master->tuplehash[exp->dir].tuple.src.u3.ip) { - range.flags = IP_NAT_RANGE_MAP_IPS; + range.flags = NF_NAT_RANGE_MAP_IPS; range.min_ip = range.max_ip = ct->master->tuplehash[!exp->dir].tuple.dst.u3.ip; - nf_nat_setup_info(ct, &range, IP_NAT_MANIP_SRC); + nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC); } } diff --git a/net/ipv4/netfilter/nf_nat_standalone.c b/net/ipv4/netfilter/nf_nat_standalone.c index 92900482ede..3828a422982 100644 --- a/net/ipv4/netfilter/nf_nat_standalone.c +++ b/net/ipv4/netfilter/nf_nat_standalone.c @@ -137,7 +137,7 @@ nf_nat_fn(unsigned int hooknum, return ret; } else pr_debug("Already setup manip %s for ct %p\n", - maniptype == IP_NAT_MANIP_SRC ? "SRC" : "DST", + maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST", ct); break; diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index f792b34cbe9..9a68fb5b9e7 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -125,6 +125,16 @@ config IP6_NF_MATCH_MH To compile it as a module, choose M here. If unsure, say N. +config IP6_NF_MATCH_RPFILTER + tristate '"rpfilter" reverse path filter match support' + depends on NETFILTER_ADVANCED + ---help--- + This option allows you to match packets whose replies would + go out via the interface the packet came in. + + To compile it as a module, choose M here. If unsure, say N. + The module will be called ip6t_rpfilter. + config IP6_NF_MATCH_RT tristate '"rt" Routing header match support' depends on NETFILTER_ADVANCED diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index abfee91ce81..2eaed96db02 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -27,6 +27,7 @@ obj-$(CONFIG_IP6_NF_MATCH_FRAG) += ip6t_frag.o obj-$(CONFIG_IP6_NF_MATCH_IPV6HEADER) += ip6t_ipv6header.o obj-$(CONFIG_IP6_NF_MATCH_MH) += ip6t_mh.o obj-$(CONFIG_IP6_NF_MATCH_OPTS) += ip6t_hbh.o +obj-$(CONFIG_IP6_NF_MATCH_RPFILTER) += ip6t_rpfilter.o obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o # targets diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c new file mode 100644 index 00000000000..5d1d8b04d69 --- /dev/null +++ b/net/ipv6/netfilter/ip6t_rpfilter.c @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2011 Florian Westphal <fw@strlen.de> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/module.h> +#include <linux/skbuff.h> +#include <linux/netdevice.h> +#include <linux/route.h> +#include <net/ip6_fib.h> +#include <net/ip6_route.h> + +#include <linux/netfilter/xt_rpfilter.h> +#include <linux/netfilter/x_tables.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Florian Westphal <fw@strlen.de>"); +MODULE_DESCRIPTION("Xtables: IPv6 reverse path filter match"); + +static bool rpfilter_addr_unicast(const struct in6_addr *addr) +{ + int addr_type = ipv6_addr_type(addr); + return addr_type & IPV6_ADDR_UNICAST; +} + +static bool rpfilter_lookup_reverse6(const struct sk_buff *skb, + const struct net_device *dev, u8 flags) +{ + struct rt6_info *rt; + struct ipv6hdr *iph = ipv6_hdr(skb); + bool ret = false; + struct flowi6 fl6 = { + .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK, + .flowi6_proto = iph->nexthdr, + .daddr = iph->saddr, + }; + int lookup_flags; + + if (rpfilter_addr_unicast(&iph->daddr)) { + memcpy(&fl6.saddr, &iph->daddr, sizeof(struct in6_addr)); + lookup_flags = RT6_LOOKUP_F_HAS_SADDR; + } else { + lookup_flags = 0; + } + + fl6.flowi6_mark = flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0; + if ((flags & XT_RPFILTER_LOOSE) == 0) { + fl6.flowi6_oif = dev->ifindex; + lookup_flags |= RT6_LOOKUP_F_IFACE; + } + + rt = (void *) ip6_route_lookup(dev_net(dev), &fl6, lookup_flags); + if (rt->dst.error) + goto out; + + if (rt->rt6i_flags & (RTF_REJECT|RTF_ANYCAST)) + goto out; + + if (rt->rt6i_flags & RTF_LOCAL) { + ret = flags & XT_RPFILTER_ACCEPT_LOCAL; + goto out; + } + + if (rt->rt6i_idev->dev == dev || (flags & XT_RPFILTER_LOOSE)) + ret = true; + out: + dst_release(&rt->dst); + return ret; +} + +static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_rpfilter_info *info = par->matchinfo; + int saddrtype; + struct ipv6hdr *iph; + bool invert = info->flags & XT_RPFILTER_INVERT; + + if (par->in->flags & IFF_LOOPBACK) + return true ^ invert; + + iph = ipv6_hdr(skb); + saddrtype = ipv6_addr_type(&iph->saddr); + if (unlikely(saddrtype == IPV6_ADDR_ANY)) + return true ^ invert; /* not routable: forward path will drop it */ + + return rpfilter_lookup_reverse6(skb, par->in, info->flags) ^ invert; +} + +static int rpfilter_check(const struct xt_mtchk_param *par) +{ + const struct xt_rpfilter_info *info = par->matchinfo; + unsigned int options = ~XT_RPFILTER_OPTION_MASK; + + if (info->flags & options) { + pr_info("unknown options encountered"); + return -EINVAL; + } + + if (strcmp(par->table, "mangle") != 0 && + strcmp(par->table, "raw") != 0) { + pr_info("match only valid in the \'raw\' " + "or \'mangle\' tables, not \'%s\'.\n", par->table); + return -EINVAL; + } + + return 0; +} + +static struct xt_match rpfilter_mt_reg __read_mostly = { + .name = "rpfilter", + .family = NFPROTO_IPV6, + .checkentry = rpfilter_check, + .match = rpfilter_mt, + .matchsize = sizeof(struct xt_rpfilter_info), + .hooks = (1 << NF_INET_PRE_ROUTING), + .me = THIS_MODULE +}; + +static int __init rpfilter_mt_init(void) +{ + return xt_register_match(&rpfilter_mt_reg); +} + +static void __exit rpfilter_mt_exit(void) +{ + xt_unregister_match(&rpfilter_mt_reg); +} + +module_init(rpfilter_mt_init); +module_exit(rpfilter_mt_exit); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index ad438546d91..5855e9ede3c 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -658,6 +658,13 @@ out: } +struct dst_entry * ip6_route_lookup(struct net *net, struct flowi6 *fl6, + int flags) +{ + return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_lookup); +} +EXPORT_SYMBOL_GPL(ip6_route_lookup); + struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr, const struct in6_addr *saddr, int oif, int strict) { diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index d5597b759ba..bac93ba6077 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -4,6 +4,14 @@ menu "Core Netfilter Configuration" config NETFILTER_NETLINK tristate +config NETFILTER_NETLINK_ACCT +tristate "Netfilter NFACCT over NFNETLINK interface" + depends on NETFILTER_ADVANCED + select NETFILTER_NETLINK + help + If this option is enabled, the kernel will include support + for extended accounting via NFNETLINK. + config NETFILTER_NETLINK_QUEUE tristate "Netfilter NFQUEUE over NFNETLINK interface" depends on NETFILTER_ADVANCED @@ -879,6 +887,16 @@ config NETFILTER_XT_MATCH_MULTIPORT To compile it as a module, choose M here. If unsure, say N. +config NETFILTER_XT_MATCH_NFACCT + tristate '"nfacct" match support' + default m if NETFILTER_ADVANCED=n + select NETFILTER_NETLINK_ACCT + help + This option allows you to use the extended accounting through + nfnetlink_acct. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_MATCH_OSF tristate '"osf" Passive OS fingerprint match' depends on NETFILTER_ADVANCED && NETFILTER_NETLINK diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 1a02853df86..b2eee4df816 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -7,6 +7,7 @@ nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o obj-$(CONFIG_NETFILTER) = netfilter.o obj-$(CONFIG_NETFILTER_NETLINK) += nfnetlink.o +obj-$(CONFIG_NETFILTER_NETLINK_ACCT) += nfnetlink_acct.o obj-$(CONFIG_NETFILTER_NETLINK_QUEUE) += nfnetlink_queue.o obj-$(CONFIG_NETFILTER_NETLINK_LOG) += nfnetlink_log.o @@ -90,6 +91,7 @@ obj-$(CONFIG_NETFILTER_XT_MATCH_LENGTH) += xt_length.o obj-$(CONFIG_NETFILTER_XT_MATCH_LIMIT) += xt_limit.o obj-$(CONFIG_NETFILTER_XT_MATCH_MAC) += xt_mac.o obj-$(CONFIG_NETFILTER_XT_MATCH_MULTIPORT) += xt_multiport.o +obj-$(CONFIG_NETFILTER_XT_MATCH_NFACCT) += xt_nfacct.o obj-$(CONFIG_NETFILTER_XT_MATCH_OSF) += xt_osf.o obj-$(CONFIG_NETFILTER_XT_MATCH_OWNER) += xt_owner.o obj-$(CONFIG_NETFILTER_XT_MATCH_PHYSDEV) += xt_physdev.o diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig index 70bd1d0774c..af4c0b8c527 100644 --- a/net/netfilter/ipvs/Kconfig +++ b/net/netfilter/ipvs/Kconfig @@ -232,6 +232,21 @@ config IP_VS_NQ If you want to compile it in kernel, say Y. To compile it as a module, choose M here. If unsure, say N. +comment 'IPVS SH scheduler' + +config IP_VS_SH_TAB_BITS + int "IPVS source hashing table size (the Nth power of 2)" + range 4 20 + default 8 + ---help--- + The source hashing scheduler maps source IPs to destinations + stored in a hash table. This table is tiled by each destination + until all slots in the table are filled. When using weights to + allow destinations to receive more connections, the table is + tiled an amount proportional to the weights specified. The table + needs to be large enough to effectively fit all the destinations + multiplied by their respective weights. + comment 'IPVS application helper' config IP_VS_FTP diff --git a/net/netfilter/ipvs/ip_vs_sh.c b/net/netfilter/ipvs/ip_vs_sh.c index 33815f4fb45..069e8d4d5c0 100644 --- a/net/netfilter/ipvs/ip_vs_sh.c +++ b/net/netfilter/ipvs/ip_vs_sh.c @@ -30,6 +30,11 @@ * server is dead or overloaded, the load balancer can bypass the cache * server and send requests to the original server directly. * + * The weight destination attribute can be used to control the + * distribution of connections to the destinations in servernode. The + * greater the weight, the more connections the destination + * will receive. + * */ #define KMSG_COMPONENT "IPVS" @@ -99,9 +104,11 @@ ip_vs_sh_assign(struct ip_vs_sh_bucket *tbl, struct ip_vs_service *svc) struct ip_vs_sh_bucket *b; struct list_head *p; struct ip_vs_dest *dest; + int d_count; b = tbl; p = &svc->destinations; + d_count = 0; for (i=0; i<IP_VS_SH_TAB_SIZE; i++) { if (list_empty(p)) { b->dest = NULL; @@ -113,7 +120,16 @@ ip_vs_sh_assign(struct ip_vs_sh_bucket *tbl, struct ip_vs_service *svc) atomic_inc(&dest->refcnt); b->dest = dest; - p = p->next; + IP_VS_DBG_BUF(6, "assigned i: %d dest: %s weight: %d\n", + i, IP_VS_DBG_ADDR(svc->af, &dest->addr), + atomic_read(&dest->weight)); + + /* Don't move to next dest until filling weight */ + if (++d_count >= atomic_read(&dest->weight)) { + p = p->next; + d_count = 0; + } + } b++; } diff --git a/net/netfilter/nf_conntrack_acct.c b/net/netfilter/nf_conntrack_acct.c index bffa6b03bb7..f4f8cda0598 100644 --- a/net/netfilter/nf_conntrack_acct.c +++ b/net/netfilter/nf_conntrack_acct.c @@ -46,8 +46,8 @@ seq_print_acct(struct seq_file *s, const struct nf_conn *ct, int dir) return 0; return seq_printf(s, "packets=%llu bytes=%llu ", - (unsigned long long)acct[dir].packets, - (unsigned long long)acct[dir].bytes); + (unsigned long long)atomic64_read(&acct[dir].packets), + (unsigned long long)atomic64_read(&acct[dir].bytes)); }; EXPORT_SYMBOL_GPL(seq_print_acct); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index deeef74e775..e875f8902db 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -67,6 +67,7 @@ DEFINE_PER_CPU(struct nf_conn, nf_conntrack_untracked); EXPORT_PER_CPU_SYMBOL(nf_conntrack_untracked); unsigned int nf_conntrack_hash_rnd __read_mostly; +EXPORT_SYMBOL_GPL(nf_conntrack_hash_rnd); static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple, u16 zone) { @@ -1044,10 +1045,8 @@ acct: acct = nf_conn_acct_find(ct); if (acct) { - spin_lock_bh(&ct->lock); - acct[CTINFO2DIR(ctinfo)].packets++; - acct[CTINFO2DIR(ctinfo)].bytes += skb->len; - spin_unlock_bh(&ct->lock); + atomic64_inc(&acct[CTINFO2DIR(ctinfo)].packets); + atomic64_add(skb->len, &acct[CTINFO2DIR(ctinfo)].bytes); } } } @@ -1063,11 +1062,9 @@ bool __nf_ct_kill_acct(struct nf_conn *ct, acct = nf_conn_acct_find(ct); if (acct) { - spin_lock_bh(&ct->lock); - acct[CTINFO2DIR(ctinfo)].packets++; - acct[CTINFO2DIR(ctinfo)].bytes += - skb->len - skb_network_offset(skb); - spin_unlock_bh(&ct->lock); + atomic64_inc(&acct[CTINFO2DIR(ctinfo)].packets); + atomic64_add(skb->len - skb_network_offset(skb), + &acct[CTINFO2DIR(ctinfo)].bytes); } } diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 340c80d968d..bebb1675e6f 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -38,8 +38,6 @@ unsigned int nf_ct_expect_max __read_mostly; static struct kmem_cache *nf_ct_expect_cachep __read_mostly; -static HLIST_HEAD(nf_ct_userspace_expect_list); - /* nf_conntrack_expect helper functions */ void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp, u32 pid, int report) @@ -47,14 +45,14 @@ void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp, struct nf_conn_help *master_help = nfct_help(exp->master); struct net *net = nf_ct_exp_net(exp); + NF_CT_ASSERT(master_help); NF_CT_ASSERT(!timer_pending(&exp->timeout)); hlist_del_rcu(&exp->hnode); net->ct.expect_count--; hlist_del(&exp->lnode); - if (!(exp->flags & NF_CT_EXPECT_USERSPACE)) - master_help->expecting[exp->class]--; + master_help->expecting[exp->class]--; nf_ct_expect_event_report(IPEXP_DESTROY, exp, pid, report); nf_ct_expect_put(exp); @@ -314,37 +312,34 @@ void nf_ct_expect_put(struct nf_conntrack_expect *exp) } EXPORT_SYMBOL_GPL(nf_ct_expect_put); -static void nf_ct_expect_insert(struct nf_conntrack_expect *exp) +static int nf_ct_expect_insert(struct nf_conntrack_expect *exp) { struct nf_conn_help *master_help = nfct_help(exp->master); + struct nf_conntrack_helper *helper; struct net *net = nf_ct_exp_net(exp); - const struct nf_conntrack_expect_policy *p; unsigned int h = nf_ct_expect_dst_hash(&exp->tuple); /* two references : one for hash insert, one for the timer */ atomic_add(2, &exp->use); - if (master_help) { - hlist_add_head(&exp->lnode, &master_help->expectations); - master_help->expecting[exp->class]++; - } else if (exp->flags & NF_CT_EXPECT_USERSPACE) - hlist_add_head(&exp->lnode, &nf_ct_userspace_expect_list); + hlist_add_head(&exp->lnode, &master_help->expectations); + master_help->expecting[exp->class]++; hlist_add_head_rcu(&exp->hnode, &net->ct.expect_hash[h]); net->ct.expect_count++; setup_timer(&exp->timeout, nf_ct_expectation_timed_out, (unsigned long)exp); - if (master_help) { - p = &rcu_dereference_protected( - master_help->helper, - lockdep_is_held(&nf_conntrack_lock) - )->expect_policy[exp->class]; - exp->timeout.expires = jiffies + p->timeout * HZ; + helper = rcu_dereference_protected(master_help->helper, + lockdep_is_held(&nf_conntrack_lock)); + if (helper) { + exp->timeout.expires = jiffies + + helper->expect_policy[exp->class].timeout * HZ; } add_timer(&exp->timeout); NF_CT_STAT_INC(net, expect_create); + return 0; } /* Race with expectations being used means we could have none to find; OK. */ @@ -389,14 +384,13 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect) struct nf_conntrack_expect *i; struct nf_conn *master = expect->master; struct nf_conn_help *master_help = nfct_help(master); + struct nf_conntrack_helper *helper; struct net *net = nf_ct_exp_net(expect); struct hlist_node *n; unsigned int h; int ret = 1; - /* Don't allow expectations created from kernel-space with no helper */ - if (!(expect->flags & NF_CT_EXPECT_USERSPACE) && - (!master_help || (master_help && !master_help->helper))) { + if (!master_help) { ret = -ESHUTDOWN; goto out; } @@ -414,11 +408,10 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect) } } /* Will be over limit? */ - if (master_help) { - p = &rcu_dereference_protected( - master_help->helper, - lockdep_is_held(&nf_conntrack_lock) - )->expect_policy[expect->class]; + helper = rcu_dereference_protected(master_help->helper, + lockdep_is_held(&nf_conntrack_lock)); + if (helper) { + p = &helper->expect_policy[expect->class]; if (p->max_expected && master_help->expecting[expect->class] >= p->max_expected) { evict_oldest_expect(master, expect); @@ -450,8 +443,9 @@ int nf_ct_expect_related_report(struct nf_conntrack_expect *expect, if (ret <= 0) goto out; - ret = 0; - nf_ct_expect_insert(expect); + ret = nf_ct_expect_insert(expect); + if (ret < 0) + goto out; spin_unlock_bh(&nf_conntrack_lock); nf_ct_expect_event_report(IPEXP_NEW, expect, pid, report); return ret; @@ -461,21 +455,6 @@ out: } EXPORT_SYMBOL_GPL(nf_ct_expect_related_report); -void nf_ct_remove_userspace_expectations(void) -{ - struct nf_conntrack_expect *exp; - struct hlist_node *n, *next; - - hlist_for_each_entry_safe(exp, n, next, - &nf_ct_userspace_expect_list, lnode) { - if (del_timer(&exp->timeout)) { - nf_ct_unlink_expect(exp); - nf_ct_expect_put(exp); - } - } -} -EXPORT_SYMBOL_GPL(nf_ct_remove_userspace_expectations); - #ifdef CONFIG_PROC_FS struct ct_expect_iter_state { struct seq_net_private p; diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 93c4bdbfc1a..c9e0de08aa8 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -121,6 +121,18 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl, int ret = 0; if (tmpl != NULL) { + /* we've got a userspace helper. */ + if (tmpl->status & IPS_USERSPACE_HELPER) { + help = nf_ct_helper_ext_add(ct, flags); + if (help == NULL) { + ret = -ENOMEM; + goto out; + } + rcu_assign_pointer(help->helper, NULL); + __set_bit(IPS_USERSPACE_HELPER_BIT, &ct->status); + ret = 0; + goto out; + } help = nfct_help(tmpl); if (help != NULL) helper = help->helper; diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index ef21b221f03..85033344aed 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -203,25 +203,18 @@ nla_put_failure: } static int -ctnetlink_dump_counters(struct sk_buff *skb, const struct nf_conn *ct, - enum ip_conntrack_dir dir) +dump_counters(struct sk_buff *skb, u64 pkts, u64 bytes, + enum ip_conntrack_dir dir) { enum ctattr_type type = dir ? CTA_COUNTERS_REPLY: CTA_COUNTERS_ORIG; struct nlattr *nest_count; - const struct nf_conn_counter *acct; - - acct = nf_conn_acct_find(ct); - if (!acct) - return 0; nest_count = nla_nest_start(skb, type | NLA_F_NESTED); if (!nest_count) goto nla_put_failure; - NLA_PUT_BE64(skb, CTA_COUNTERS_PACKETS, - cpu_to_be64(acct[dir].packets)); - NLA_PUT_BE64(skb, CTA_COUNTERS_BYTES, - cpu_to_be64(acct[dir].bytes)); + NLA_PUT_BE64(skb, CTA_COUNTERS_PACKETS, cpu_to_be64(pkts)); + NLA_PUT_BE64(skb, CTA_COUNTERS_BYTES, cpu_to_be64(bytes)); nla_nest_end(skb, nest_count); @@ -232,6 +225,27 @@ nla_put_failure: } static int +ctnetlink_dump_counters(struct sk_buff *skb, const struct nf_conn *ct, + enum ip_conntrack_dir dir, int type) +{ + struct nf_conn_counter *acct; + u64 pkts, bytes; + + acct = nf_conn_acct_find(ct); + if (!acct) + return 0; + + if (type == IPCTNL_MSG_CT_GET_CTRZERO) { + pkts = atomic64_xchg(&acct[dir].packets, 0); + bytes = atomic64_xchg(&acct[dir].bytes, 0); + } else { + pkts = atomic64_read(&acct[dir].packets); + bytes = atomic64_read(&acct[dir].bytes); + } + return dump_counters(skb, pkts, bytes, dir); +} + +static int ctnetlink_dump_timestamp(struct sk_buff *skb, const struct nf_conn *ct) { struct nlattr *nest_count; @@ -393,15 +407,15 @@ nla_put_failure: } static int -ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq, - int event, struct nf_conn *ct) +ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq, u32 type, + struct nf_conn *ct) { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; struct nlattr *nest_parms; - unsigned int flags = pid ? NLM_F_MULTI : 0; + unsigned int flags = pid ? NLM_F_MULTI : 0, event; - event |= NFNL_SUBSYS_CTNETLINK << 8; + event = (NFNL_SUBSYS_CTNETLINK << 8 | IPCTNL_MSG_CT_NEW); nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; @@ -430,8 +444,8 @@ ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq, if (ctnetlink_dump_status(skb, ct) < 0 || ctnetlink_dump_timeout(skb, ct) < 0 || - ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 || - ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0 || + ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL, type) < 0 || + ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY, type) < 0 || ctnetlink_dump_timestamp(skb, ct) < 0 || ctnetlink_dump_protoinfo(skb, ct) < 0 || ctnetlink_dump_helpinfo(skb, ct) < 0 || @@ -612,8 +626,10 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) goto nla_put_failure; if (events & (1 << IPCT_DESTROY)) { - if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 || - ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0 || + if (ctnetlink_dump_counters(skb, ct, + IP_CT_DIR_ORIGINAL, type) < 0 || + ctnetlink_dump_counters(skb, ct, + IP_CT_DIR_REPLY, type) < 0 || ctnetlink_dump_timestamp(skb, ct) < 0) goto nla_put_failure; } else { @@ -709,20 +725,13 @@ restart: } if (ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, - IPCTNL_MSG_CT_NEW, ct) < 0) { + NFNL_MSG_TYPE( + cb->nlh->nlmsg_type), + ct) < 0) { nf_conntrack_get(&ct->ct_general); cb->args[1] = (unsigned long)ct; goto out; } - - if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == - IPCTNL_MSG_CT_GET_CTRZERO) { - struct nf_conn_counter *acct; - - acct = nf_conn_acct_find(ct); - if (acct) - memset(acct, 0, sizeof(struct nf_conn_counter[IP_CT_DIR_MAX])); - } } if (cb->args[1]) { cb->args[1] = 0; @@ -1001,7 +1010,7 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, rcu_read_lock(); err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, - IPCTNL_MSG_CT_NEW, ct); + NFNL_MSG_TYPE(nlh->nlmsg_type), ct); rcu_read_unlock(); nf_ct_put(ct); if (err <= 0) @@ -1087,14 +1096,14 @@ ctnetlink_change_nat(struct nf_conn *ct, const struct nlattr * const cda[]) if (cda[CTA_NAT_DST]) { ret = ctnetlink_parse_nat_setup(ct, - IP_NAT_MANIP_DST, + NF_NAT_MANIP_DST, cda[CTA_NAT_DST]); if (ret < 0) return ret; } if (cda[CTA_NAT_SRC]) { ret = ctnetlink_parse_nat_setup(ct, - IP_NAT_MANIP_SRC, + NF_NAT_MANIP_SRC, cda[CTA_NAT_SRC]); if (ret < 0) return ret; @@ -1847,7 +1856,9 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, if (err < 0) return err; - if (cda[CTA_EXPECT_MASTER]) + if (cda[CTA_EXPECT_TUPLE]) + err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_TUPLE, u3); + else if (cda[CTA_EXPECT_MASTER]) err = ctnetlink_parse_tuple(cda, &tuple, CTA_EXPECT_MASTER, u3); else return -EINVAL; @@ -2023,6 +2034,10 @@ ctnetlink_create_expect(struct net *net, u16 zone, } help = nfct_help(ct); if (!help) { + err = -EOPNOTSUPP; + goto out; + } + if (test_bit(IPS_USERSPACE_HELPER_BIT, &ct->status)) { if (!cda[CTA_EXPECT_TIMEOUT]) { err = -EINVAL; goto out; @@ -2247,7 +2262,6 @@ static void __exit ctnetlink_exit(void) { pr_info("ctnetlink: unregistering from nfnetlink.\n"); - nf_ct_remove_userspace_expectations(); unregister_pernet_subsys(&ctnetlink_net_ops); nfnetlink_subsys_unregister(&ctnl_exp_subsys); nfnetlink_subsys_unregister(&ctnl_subsys); diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c new file mode 100644 index 00000000000..362ab6ca3dc --- /dev/null +++ b/net/netfilter/nfnetlink_acct.c @@ -0,0 +1,352 @@ +/* + * (C) 2011 Pablo Neira Ayuso <pablo@netfilter.org> + * (C) 2011 Intra2net AG <http://www.intra2net.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation (or any later at your option). + */ +#include <linux/init.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/skbuff.h> +#include <linux/netlink.h> +#include <linux/rculist.h> +#include <linux/slab.h> +#include <linux/types.h> +#include <linux/errno.h> +#include <net/netlink.h> +#include <net/sock.h> +#include <asm/atomic.h> + +#include <linux/netfilter.h> +#include <linux/netfilter/nfnetlink.h> +#include <linux/netfilter/nfnetlink_acct.h> + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); +MODULE_DESCRIPTION("nfacct: Extended Netfilter accounting infrastructure"); + +static LIST_HEAD(nfnl_acct_list); + +struct nf_acct { + atomic64_t pkts; + atomic64_t bytes; + struct list_head head; + atomic_t refcnt; + char name[NFACCT_NAME_MAX]; + struct rcu_head rcu_head; +}; + +static int +nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, const struct nlattr * const tb[]) +{ + struct nf_acct *nfacct, *matching = NULL; + char *acct_name; + + if (!tb[NFACCT_NAME]) + return -EINVAL; + + acct_name = nla_data(tb[NFACCT_NAME]); + + list_for_each_entry(nfacct, &nfnl_acct_list, head) { + if (strncmp(nfacct->name, acct_name, NFACCT_NAME_MAX) != 0) + continue; + + if (nlh->nlmsg_flags & NLM_F_EXCL) + return -EEXIST; + + matching = nfacct; + break; + } + + if (matching) { + if (nlh->nlmsg_flags & NLM_F_REPLACE) { + /* reset counters if you request a replacement. */ + atomic64_set(&matching->pkts, 0); + atomic64_set(&matching->bytes, 0); + return 0; + } + return -EBUSY; + } + + nfacct = kzalloc(sizeof(struct nf_acct), GFP_KERNEL); + if (nfacct == NULL) + return -ENOMEM; + + strncpy(nfacct->name, nla_data(tb[NFACCT_NAME]), NFACCT_NAME_MAX); + + if (tb[NFACCT_BYTES]) { + atomic64_set(&nfacct->bytes, + be64_to_cpu(nla_get_u64(tb[NFACCT_BYTES]))); + } + if (tb[NFACCT_PKTS]) { + atomic64_set(&nfacct->pkts, + be64_to_cpu(nla_get_u64(tb[NFACCT_PKTS]))); + } + atomic_set(&nfacct->refcnt, 1); + list_add_tail_rcu(&nfacct->head, &nfnl_acct_list); + return 0; +} + +static int +nfnl_acct_fill_info(struct sk_buff *skb, u32 pid, u32 seq, u32 type, + int event, struct nf_acct *acct) +{ + struct nlmsghdr *nlh; + struct nfgenmsg *nfmsg; + unsigned int flags = pid ? NLM_F_MULTI : 0; + u64 pkts, bytes; + + event |= NFNL_SUBSYS_ACCT << 8; + nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags); + if (nlh == NULL) + goto nlmsg_failure; + + nfmsg = nlmsg_data(nlh); + nfmsg->nfgen_family = AF_UNSPEC; + nfmsg->version = NFNETLINK_V0; + nfmsg->res_id = 0; + + NLA_PUT_STRING(skb, NFACCT_NAME, acct->name); + + if (type == NFNL_MSG_ACCT_GET_CTRZERO) { + pkts = atomic64_xchg(&acct->pkts, 0); + bytes = atomic64_xchg(&acct->bytes, 0); + } else { + pkts = atomic64_read(&acct->pkts); + bytes = atomic64_read(&acct->bytes); + } + NLA_PUT_BE64(skb, NFACCT_PKTS, cpu_to_be64(pkts)); + NLA_PUT_BE64(skb, NFACCT_BYTES, cpu_to_be64(bytes)); + NLA_PUT_BE32(skb, NFACCT_USE, htonl(atomic_read(&acct->refcnt))); + + nlmsg_end(skb, nlh); + return skb->len; + +nlmsg_failure: +nla_put_failure: + nlmsg_cancel(skb, nlh); + return -1; +} + +static int +nfnl_acct_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct nf_acct *cur, *last; + + if (cb->args[2]) + return 0; + + last = (struct nf_acct *)cb->args[1]; + if (cb->args[1]) + cb->args[1] = 0; + + rcu_read_lock(); + list_for_each_entry_rcu(cur, &nfnl_acct_list, head) { + if (last && cur != last) + continue; + + if (nfnl_acct_fill_info(skb, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, + NFNL_MSG_TYPE(cb->nlh->nlmsg_type), + NFNL_MSG_ACCT_NEW, cur) < 0) { + cb->args[1] = (unsigned long)cur; + break; + } + } + if (!cb->args[1]) + cb->args[2] = 1; + rcu_read_unlock(); + return skb->len; +} + +static int +nfnl_acct_get(struct sock *nfnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, const struct nlattr * const tb[]) +{ + int ret = 0; + struct nf_acct *cur; + char *acct_name; + + if (nlh->nlmsg_flags & NLM_F_DUMP) { + return netlink_dump_start(nfnl, skb, nlh, nfnl_acct_dump, + NULL, 0); + } + + if (!tb[NFACCT_NAME]) + return -EINVAL; + acct_name = nla_data(tb[NFACCT_NAME]); + + list_for_each_entry(cur, &nfnl_acct_list, head) { + struct sk_buff *skb2; + + if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX)!= 0) + continue; + + skb2 = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (skb2 == NULL) + break; + + ret = nfnl_acct_fill_info(skb2, NETLINK_CB(skb).pid, + nlh->nlmsg_seq, + NFNL_MSG_TYPE(nlh->nlmsg_type), + NFNL_MSG_ACCT_NEW, cur); + if (ret <= 0) + kfree_skb(skb2); + + break; + } + return ret; +} + +/* try to delete object, fail if it is still in use. */ +static int nfnl_acct_try_del(struct nf_acct *cur) +{ + int ret = 0; + + /* we want to avoid races with nfnl_acct_find_get. */ + if (atomic_dec_and_test(&cur->refcnt)) { + /* We are protected by nfnl mutex. */ + list_del_rcu(&cur->head); + kfree_rcu(cur, rcu_head); + } else { + /* still in use, restore reference counter. */ + atomic_inc(&cur->refcnt); + ret = -EBUSY; + } + return ret; +} + +static int +nfnl_acct_del(struct sock *nfnl, struct sk_buff *skb, + const struct nlmsghdr *nlh, const struct nlattr * const tb[]) +{ + char *acct_name; + struct nf_acct *cur; + int ret = -ENOENT; + + if (!tb[NFACCT_NAME]) { + list_for_each_entry(cur, &nfnl_acct_list, head) + nfnl_acct_try_del(cur); + + return 0; + } + acct_name = nla_data(tb[NFACCT_NAME]); + + list_for_each_entry(cur, &nfnl_acct_list, head) { + if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX) != 0) + continue; + + ret = nfnl_acct_try_del(cur); + if (ret < 0) + return ret; + + break; + } + return ret; +} + +static const struct nla_policy nfnl_acct_policy[NFACCT_MAX+1] = { + [NFACCT_NAME] = { .type = NLA_NUL_STRING, .len = NFACCT_NAME_MAX-1 }, + [NFACCT_BYTES] = { .type = NLA_U64 }, + [NFACCT_PKTS] = { .type = NLA_U64 }, +}; + +static const struct nfnl_callback nfnl_acct_cb[NFNL_MSG_ACCT_MAX] = { + [NFNL_MSG_ACCT_NEW] = { .call = nfnl_acct_new, + .attr_count = NFACCT_MAX, + .policy = nfnl_acct_policy }, + [NFNL_MSG_ACCT_GET] = { .call = nfnl_acct_get, + .attr_count = NFACCT_MAX, + .policy = nfnl_acct_policy }, + [NFNL_MSG_ACCT_GET_CTRZERO] = { .call = nfnl_acct_get, + .attr_count = NFACCT_MAX, + .policy = nfnl_acct_policy }, + [NFNL_MSG_ACCT_DEL] = { .call = nfnl_acct_del, + .attr_count = NFACCT_MAX, + .policy = nfnl_acct_policy }, +}; + +static const struct nfnetlink_subsystem nfnl_acct_subsys = { + .name = "acct", + .subsys_id = NFNL_SUBSYS_ACCT, + .cb_count = NFNL_MSG_ACCT_MAX, + .cb = nfnl_acct_cb, +}; + +MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_ACCT); + +struct nf_acct *nfnl_acct_find_get(const char *acct_name) +{ + struct nf_acct *cur, *acct = NULL; + + rcu_read_lock(); + list_for_each_entry_rcu(cur, &nfnl_acct_list, head) { + if (strncmp(cur->name, acct_name, NFACCT_NAME_MAX)!= 0) + continue; + + if (!try_module_get(THIS_MODULE)) + goto err; + + if (!atomic_inc_not_zero(&cur->refcnt)) { + module_put(THIS_MODULE); + goto err; + } + + acct = cur; + break; + } +err: + rcu_read_unlock(); + return acct; +} +EXPORT_SYMBOL_GPL(nfnl_acct_find_get); + +void nfnl_acct_put(struct nf_acct *acct) +{ + atomic_dec(&acct->refcnt); + module_put(THIS_MODULE); +} +EXPORT_SYMBOL_GPL(nfnl_acct_put); + +void nfnl_acct_update(const struct sk_buff *skb, struct nf_acct *nfacct) +{ + atomic64_inc(&nfacct->pkts); + atomic64_add(skb->len, &nfacct->bytes); +} +EXPORT_SYMBOL_GPL(nfnl_acct_update); + +static int __init nfnl_acct_init(void) +{ + int ret; + + pr_info("nfnl_acct: registering with nfnetlink.\n"); + ret = nfnetlink_subsys_register(&nfnl_acct_subsys); + if (ret < 0) { + pr_err("nfnl_acct_init: cannot register with nfnetlink.\n"); + goto err_out; + } + return 0; +err_out: + return ret; +} + +static void __exit nfnl_acct_exit(void) +{ + struct nf_acct *cur, *tmp; + + pr_info("nfnl_acct: unregistering from nfnetlink.\n"); + nfnetlink_subsys_unregister(&nfnl_acct_subsys); + + list_for_each_entry_safe(cur, tmp, &nfnl_acct_list, head) { + list_del_rcu(&cur->head); + /* We are sure that our objects have no clients at this point, + * it's safe to release them all without checking refcnt. */ + kfree_rcu(cur, rcu_head); + } +} + +module_init(nfnl_acct_init); +module_exit(nfnl_acct_exit); diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index 0221d10de75..8e87123f137 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -62,8 +62,8 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par) int ret = 0; u8 proto; - if (info->flags & ~XT_CT_NOTRACK) - return -EINVAL; + if (info->flags & ~(XT_CT_NOTRACK | XT_CT_USERSPACE_HELPER)) + return -EOPNOTSUPP; if (info->flags & XT_CT_NOTRACK) { ct = nf_ct_untracked_get(); @@ -92,7 +92,9 @@ static int xt_ct_tg_check(const struct xt_tgchk_param *par) GFP_KERNEL)) goto err3; - if (info->helper[0]) { + if (info->flags & XT_CT_USERSPACE_HELPER) { + __set_bit(IPS_USERSPACE_HELPER_BIT, &ct->status); + } else if (info->helper[0]) { ret = -ENOENT; proto = xt_ct_find_proto(par); if (!proto) { diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c index 9ddf1c3bfb3..e595e07a759 100644 --- a/net/netfilter/xt_connbytes.c +++ b/net/netfilter/xt_connbytes.c @@ -40,46 +40,46 @@ connbytes_mt(const struct sk_buff *skb, struct xt_action_param *par) case XT_CONNBYTES_PKTS: switch (sinfo->direction) { case XT_CONNBYTES_DIR_ORIGINAL: - what = counters[IP_CT_DIR_ORIGINAL].packets; + what = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].packets); break; case XT_CONNBYTES_DIR_REPLY: - what = counters[IP_CT_DIR_REPLY].packets; + what = atomic64_read(&counters[IP_CT_DIR_REPLY].packets); break; case XT_CONNBYTES_DIR_BOTH: - what = counters[IP_CT_DIR_ORIGINAL].packets; - what += counters[IP_CT_DIR_REPLY].packets; + what = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].packets); + what += atomic64_read(&counters[IP_CT_DIR_REPLY].packets); break; } break; case XT_CONNBYTES_BYTES: switch (sinfo->direction) { case XT_CONNBYTES_DIR_ORIGINAL: - what = counters[IP_CT_DIR_ORIGINAL].bytes; + what = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].bytes); break; case XT_CONNBYTES_DIR_REPLY: - what = counters[IP_CT_DIR_REPLY].bytes; + what = atomic64_read(&counters[IP_CT_DIR_REPLY].bytes); break; case XT_CONNBYTES_DIR_BOTH: - what = counters[IP_CT_DIR_ORIGINAL].bytes; - what += counters[IP_CT_DIR_REPLY].bytes; + what = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].bytes); + what += atomic64_read(&counters[IP_CT_DIR_REPLY].bytes); break; } break; case XT_CONNBYTES_AVGPKT: switch (sinfo->direction) { case XT_CONNBYTES_DIR_ORIGINAL: - bytes = counters[IP_CT_DIR_ORIGINAL].bytes; - pkts = counters[IP_CT_DIR_ORIGINAL].packets; + bytes = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].bytes); + pkts = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].packets); break; case XT_CONNBYTES_DIR_REPLY: - bytes = counters[IP_CT_DIR_REPLY].bytes; - pkts = counters[IP_CT_DIR_REPLY].packets; + bytes = atomic64_read(&counters[IP_CT_DIR_REPLY].bytes); + pkts = atomic64_read(&counters[IP_CT_DIR_REPLY].packets); break; case XT_CONNBYTES_DIR_BOTH: - bytes = counters[IP_CT_DIR_ORIGINAL].bytes + - counters[IP_CT_DIR_REPLY].bytes; - pkts = counters[IP_CT_DIR_ORIGINAL].packets + - counters[IP_CT_DIR_REPLY].packets; + bytes = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].bytes) + + atomic64_read(&counters[IP_CT_DIR_REPLY].bytes); + pkts = atomic64_read(&counters[IP_CT_DIR_ORIGINAL].packets) + + atomic64_read(&counters[IP_CT_DIR_REPLY].packets); break; } if (pkts != 0) diff --git a/net/netfilter/xt_nfacct.c b/net/netfilter/xt_nfacct.c new file mode 100644 index 00000000000..b3be0ef21f1 --- /dev/null +++ b/net/netfilter/xt_nfacct.c @@ -0,0 +1,76 @@ +/* + * (C) 2011 Pablo Neira Ayuso <pablo@netfilter.org> + * (C) 2011 Intra2net AG <http://www.intra2net.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 (or any + * later at your option) as published by the Free Software Foundation. + */ +#include <linux/module.h> +#include <linux/skbuff.h> + +#include <linux/netfilter/x_tables.h> +#include <linux/netfilter/nfnetlink_acct.h> +#include <linux/netfilter/xt_nfacct.h> + +MODULE_AUTHOR("Pablo Neira Ayuso <pablo@netfilter.org>"); +MODULE_DESCRIPTION("Xtables: match for the extended accounting infrastructure"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS("ipt_nfacct"); +MODULE_ALIAS("ip6t_nfacct"); + +static bool nfacct_mt(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_nfacct_match_info *info = par->targinfo; + + nfnl_acct_update(skb, info->nfacct); + + return true; +} + +static int +nfacct_mt_checkentry(const struct xt_mtchk_param *par) +{ + struct xt_nfacct_match_info *info = par->matchinfo; + struct nf_acct *nfacct; + + nfacct = nfnl_acct_find_get(info->name); + if (nfacct == NULL) { + pr_info("xt_nfacct: accounting object with name `%s' " + "does not exists\n", info->name); + return -ENOENT; + } + info->nfacct = nfacct; + return 0; +} + +static void +nfacct_mt_destroy(const struct xt_mtdtor_param *par) +{ + const struct xt_nfacct_match_info *info = par->matchinfo; + + nfnl_acct_put(info->nfacct); +} + +static struct xt_match nfacct_mt_reg __read_mostly = { + .name = "nfacct", + .family = NFPROTO_UNSPEC, + .checkentry = nfacct_mt_checkentry, + .match = nfacct_mt, + .destroy = nfacct_mt_destroy, + .matchsize = sizeof(struct xt_nfacct_match_info), + .me = THIS_MODULE, +}; + +static int __init nfacct_mt_init(void) +{ + return xt_register_match(&nfacct_mt_reg); +} + +static void __exit nfacct_mt_exit(void) +{ + xt_unregister_match(&nfacct_mt_reg); +} + +module_init(nfacct_mt_init); +module_exit(nfacct_mt_exit); |