diff options
author | David S. Miller <davem@davemloft.net> | 2009-03-24 13:24:36 -0700 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2009-03-24 13:24:36 -0700 |
commit | b5bb14386eabcb4229ade2bc0a2b237ca166d37d (patch) | |
tree | 1966e65479f0d12cec0a204443a95b8eb57946db /net | |
parent | bb4f92b3a33bfc31f55098da85be44702bea2d16 (diff) | |
parent | 1d45209d89e647e9f27e4afa1f47338df73bc112 (diff) |
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/kaber/nf-next-2.6
Diffstat (limited to 'net')
58 files changed, 1555 insertions, 756 deletions
diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c index 8604dfc1fc3..c751111440f 100644 --- a/net/bridge/netfilter/ebtable_broute.c +++ b/net/bridge/netfilter/ebtable_broute.c @@ -46,7 +46,6 @@ static struct ebt_table broute_table = .name = "broute", .table = &initial_table, .valid_hooks = 1 << NF_BR_BROUTING, - .lock = __RW_LOCK_UNLOCKED(broute_table.lock), .check = check, .me = THIS_MODULE, }; diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c index 2b2e8040a9c..a5eea72938a 100644 --- a/net/bridge/netfilter/ebtable_filter.c +++ b/net/bridge/netfilter/ebtable_filter.c @@ -55,7 +55,6 @@ static struct ebt_table frame_filter = .name = "filter", .table = &initial_table, .valid_hooks = FILTER_VALID_HOOKS, - .lock = __RW_LOCK_UNLOCKED(frame_filter.lock), .check = check, .me = THIS_MODULE, }; diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c index 3fe1ae87e35..6024c551f9a 100644 --- a/net/bridge/netfilter/ebtable_nat.c +++ b/net/bridge/netfilter/ebtable_nat.c @@ -55,7 +55,6 @@ static struct ebt_table frame_nat = .name = "nat", .table = &initial_table, .valid_hooks = NAT_VALID_HOOKS, - .lock = __RW_LOCK_UNLOCKED(frame_nat.lock), .check = check, .me = THIS_MODULE, }; diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 3816e1dc929..1833bdbf980 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -31,7 +31,7 @@ config NF_CONNTRACK_PROC_COMPAT default y help This option enables /proc and sysctl compatibility with the old - layer 3 dependant connection tracking. This is needed to keep + layer 3 dependent connection tracking. This is needed to keep old programs that have not been adapted to the new names working. If unsure, say Y. @@ -95,11 +95,11 @@ config IP_NF_MATCH_ECN config IP_NF_MATCH_TTL tristate '"ttl" match support' depends on NETFILTER_ADVANCED - help - This adds CONFIG_IP_NF_MATCH_TTL option, which enabled the user - to match packets by their TTL value. - - To compile it as a module, choose M here. If unsure, say N. + select NETFILTER_XT_MATCH_HL + ---help--- + This is a backwards-compat option for the user's convenience + (e.g. when running oldconfig). It selects + CONFIG_NETFILTER_XT_MATCH_HL. # `filter', generic and specific targets config IP_NF_FILTER @@ -323,19 +323,13 @@ config IP_NF_TARGET_ECN To compile it as a module, choose M here. If unsure, say N. config IP_NF_TARGET_TTL - tristate 'TTL target support' - depends on IP_NF_MANGLE + tristate '"TTL" target support' depends on NETFILTER_ADVANCED - help - This option adds a `TTL' target, which enables the user to modify - the TTL value of the IP header. - - While it is safe to decrement/lower the TTL, this target also enables - functionality to increment and set the TTL value of the IP header to - arbitrary values. This is EXTREMELY DANGEROUS since you can easily - create immortal packets that loop forever on the network. - - To compile it as a module, choose M here. If unsure, say N. + select NETFILTER_XT_TARGET_HL + ---help--- + This is a backwards-compat option for the user's convenience + (e.g. when running oldconfig). It selects + CONFIG_NETFILTER_XT_TARGET_HL. # raw + specific targets config IP_NF_RAW diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index 5f9b650d90f..48111594ee9 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -51,7 +51,6 @@ obj-$(CONFIG_IP_NF_SECURITY) += iptable_security.o obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o -obj-$(CONFIG_IP_NF_MATCH_TTL) += ipt_ttl.o # targets obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o @@ -61,7 +60,6 @@ obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o obj-$(CONFIG_IP_NF_TARGET_NETMAP) += ipt_NETMAP.o obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o -obj-$(CONFIG_IP_NF_TARGET_TTL) += ipt_TTL.o obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o # generic ARP tables diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 7ea88b61cb0..64a7c6ce0b9 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -73,6 +73,36 @@ static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap, return (ret != 0); } +/* + * Unfortunatly, _b and _mask are not aligned to an int (or long int) + * Some arches dont care, unrolling the loop is a win on them. + */ +static unsigned long ifname_compare(const char *_a, const char *_b, const char *_mask) +{ +#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS + const unsigned long *a = (const unsigned long *)_a; + const unsigned long *b = (const unsigned long *)_b; + const unsigned long *mask = (const unsigned long *)_mask; + unsigned long ret; + + ret = (a[0] ^ b[0]) & mask[0]; + if (IFNAMSIZ > sizeof(unsigned long)) + ret |= (a[1] ^ b[1]) & mask[1]; + if (IFNAMSIZ > 2 * sizeof(unsigned long)) + ret |= (a[2] ^ b[2]) & mask[2]; + if (IFNAMSIZ > 3 * sizeof(unsigned long)) + ret |= (a[3] ^ b[3]) & mask[3]; + BUILD_BUG_ON(IFNAMSIZ > 4 * sizeof(unsigned long)); +#else + unsigned long ret = 0; + int i; + + for (i = 0; i < IFNAMSIZ; i++) + ret |= (_a[i] ^ _b[i]) & _mask[i]; +#endif + return ret; +} + /* Returns whether packet matches rule or not. */ static inline int arp_packet_match(const struct arphdr *arphdr, struct net_device *dev, @@ -83,7 +113,7 @@ static inline int arp_packet_match(const struct arphdr *arphdr, const char *arpptr = (char *)(arphdr + 1); const char *src_devaddr, *tgt_devaddr; __be32 src_ipaddr, tgt_ipaddr; - int i, ret; + long ret; #define FWINV(bool, invflg) ((bool) ^ !!(arpinfo->invflags & (invflg))) @@ -156,10 +186,7 @@ static inline int arp_packet_match(const struct arphdr *arphdr, } /* Look for ifname matches. */ - for (i = 0, ret = 0; i < IFNAMSIZ; i++) { - ret |= (indev[i] ^ arpinfo->iniface[i]) - & arpinfo->iniface_mask[i]; - } + ret = ifname_compare(indev, arpinfo->iniface, arpinfo->iniface_mask); if (FWINV(ret != 0, ARPT_INV_VIA_IN)) { dprintf("VIA in mismatch (%s vs %s).%s\n", @@ -168,10 +195,7 @@ static inline int arp_packet_match(const struct arphdr *arphdr, return 0; } - for (i = 0, ret = 0; i < IFNAMSIZ; i++) { - ret |= (outdev[i] ^ arpinfo->outiface[i]) - & arpinfo->outiface_mask[i]; - } + ret = ifname_compare(outdev, arpinfo->outiface, arpinfo->outiface_mask); if (FWINV(ret != 0, ARPT_INV_VIA_OUT)) { dprintf("VIA out mismatch (%s vs %s).%s\n", @@ -221,7 +245,7 @@ unsigned int arpt_do_table(struct sk_buff *skb, const struct net_device *out, struct xt_table *table) { - static const char nulldevname[IFNAMSIZ]; + static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); unsigned int verdict = NF_DROP; const struct arphdr *arp; bool hotdrop = false; @@ -237,9 +261,10 @@ unsigned int arpt_do_table(struct sk_buff *skb, indev = in ? in->name : nulldevname; outdev = out ? out->name : nulldevname; - read_lock_bh(&table->lock); - private = table->private; - table_base = (void *)private->entries[smp_processor_id()]; + rcu_read_lock(); + private = rcu_dereference(table->private); + table_base = rcu_dereference(private->entries[smp_processor_id()]); + e = get_entry(table_base, private->hook_entry[hook]); back = get_entry(table_base, private->underflow[hook]); @@ -311,7 +336,8 @@ unsigned int arpt_do_table(struct sk_buff *skb, e = (void *)e + e->next_offset; } } while (!hotdrop); - read_unlock_bh(&table->lock); + + rcu_read_unlock(); if (hotdrop) return NF_DROP; @@ -714,11 +740,65 @@ static void get_counters(const struct xt_table_info *t, } } -static inline struct xt_counters *alloc_counters(struct xt_table *table) + +/* We're lazy, and add to the first CPU; overflow works its fey magic + * and everything is OK. */ +static int +add_counter_to_entry(struct arpt_entry *e, + const struct xt_counters addme[], + unsigned int *i) +{ + ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); + + (*i)++; + return 0; +} + +/* Take values from counters and add them back onto the current cpu */ +static void put_counters(struct xt_table_info *t, + const struct xt_counters counters[]) +{ + unsigned int i, cpu; + + local_bh_disable(); + cpu = smp_processor_id(); + i = 0; + ARPT_ENTRY_ITERATE(t->entries[cpu], + t->size, + add_counter_to_entry, + counters, + &i); + local_bh_enable(); +} + +static inline int +zero_entry_counter(struct arpt_entry *e, void *arg) +{ + e->counters.bcnt = 0; + e->counters.pcnt = 0; + return 0; +} + +static void +clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info) +{ + unsigned int cpu; + const void *loc_cpu_entry = info->entries[raw_smp_processor_id()]; + + memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); + for_each_possible_cpu(cpu) { + memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size); + ARPT_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size, + zero_entry_counter, NULL); + } +} + +static struct xt_counters *alloc_counters(struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; - const struct xt_table_info *private = table->private; + struct xt_table_info *private = table->private; + struct xt_table_info *info; /* We need atomic snapshot of counters: rest doesn't change * (other than comefrom, which userspace doesn't care @@ -728,14 +808,30 @@ static inline struct xt_counters *alloc_counters(struct xt_table *table) counters = vmalloc_node(countersize, numa_node_id()); if (counters == NULL) - return ERR_PTR(-ENOMEM); + goto nomem; + + info = xt_alloc_table_info(private->size); + if (!info) + goto free_counters; + + clone_counters(info, private); + + mutex_lock(&table->lock); + xt_table_entry_swap_rcu(private, info); + synchronize_net(); /* Wait until smoke has cleared */ + + get_counters(info, counters); + put_counters(private, counters); + mutex_unlock(&table->lock); - /* First, sum counters... */ - write_lock_bh(&table->lock); - get_counters(private, counters); - write_unlock_bh(&table->lock); + xt_free_table_info(info); return counters; + + free_counters: + vfree(counters); + nomem: + return ERR_PTR(-ENOMEM); } static int copy_entries_to_user(unsigned int total_size, @@ -1075,20 +1171,6 @@ static int do_replace(struct net *net, void __user *user, unsigned int len) return ret; } -/* We're lazy, and add to the first CPU; overflow works its fey magic - * and everything is OK. - */ -static inline int add_counter_to_entry(struct arpt_entry *e, - const struct xt_counters addme[], - unsigned int *i) -{ - - ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); - - (*i)++; - return 0; -} - static int do_add_counters(struct net *net, void __user *user, unsigned int len, int compat) { @@ -1148,13 +1230,14 @@ static int do_add_counters(struct net *net, void __user *user, unsigned int len, goto free; } - write_lock_bh(&t->lock); + mutex_lock(&t->lock); private = t->private; if (private->number != num_counters) { ret = -EINVAL; goto unlock_up_free; } + preempt_disable(); i = 0; /* Choose the copy that is on our node */ loc_cpu_entry = private->entries[smp_processor_id()]; @@ -1163,8 +1246,10 @@ static int do_add_counters(struct net *net, void __user *user, unsigned int len, add_counter_to_entry, paddc, &i); + preempt_enable(); unlock_up_free: - write_unlock_bh(&t->lock); + mutex_unlock(&t->lock); + xt_table_unlock(t); module_put(t->me); free: diff --git a/net/ipv4/netfilter/arptable_filter.c b/net/ipv4/netfilter/arptable_filter.c index e091187e864..6ecfdae7c58 100644 --- a/net/ipv4/netfilter/arptable_filter.c +++ b/net/ipv4/netfilter/arptable_filter.c @@ -48,8 +48,6 @@ static struct static struct xt_table packet_filter = { .name = "filter", .valid_hooks = FILTER_VALID_HOOKS, - .lock = __RW_LOCK_UNLOCKED(packet_filter.lock), - .private = NULL, .me = THIS_MODULE, .af = NFPROTO_ARP, }; diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c index 432ce9d1c11..5f22c91c6e1 100644 --- a/net/ipv4/netfilter/ip_queue.c +++ b/net/ipv4/netfilter/ip_queue.c @@ -24,6 +24,7 @@ #include <linux/proc_fs.h> #include <linux/seq_file.h> #include <linux/security.h> +#include <linux/net.h> #include <linux/mutex.h> #include <net/net_namespace.h> #include <net/sock.h> @@ -640,6 +641,7 @@ static void __exit ip_queue_fini(void) MODULE_DESCRIPTION("IPv4 packet queue handler"); MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>"); MODULE_LICENSE("GPL"); +MODULE_ALIAS_NET_PF_PROTO(PF_NETLINK, NETLINK_FIREWALL); module_init(ip_queue_init); module_exit(ip_queue_fini); diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index ef8b6ca068b..e5294aec967 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -74,6 +74,25 @@ do { \ Hence the start of any table is given by get_table() below. */ +static unsigned long ifname_compare(const char *_a, const char *_b, + const unsigned char *_mask) +{ + const unsigned long *a = (const unsigned long *)_a; + const unsigned long *b = (const unsigned long *)_b; + const unsigned long *mask = (const unsigned long *)_mask; + unsigned long ret; + + ret = (a[0] ^ b[0]) & mask[0]; + if (IFNAMSIZ > sizeof(unsigned long)) + ret |= (a[1] ^ b[1]) & mask[1]; + if (IFNAMSIZ > 2 * sizeof(unsigned long)) + ret |= (a[2] ^ b[2]) & mask[2]; + if (IFNAMSIZ > 3 * sizeof(unsigned long)) + ret |= (a[3] ^ b[3]) & mask[3]; + BUILD_BUG_ON(IFNAMSIZ > 4 * sizeof(unsigned long)); + return ret; +} + /* Returns whether matches rule or not. */ /* Performance critical - called for every packet */ static inline bool @@ -83,7 +102,6 @@ ip_packet_match(const struct iphdr *ip, const struct ipt_ip *ipinfo, int isfrag) { - size_t i; unsigned long ret; #define FWINV(bool, invflg) ((bool) ^ !!(ipinfo->invflags & (invflg))) @@ -103,12 +121,7 @@ ip_packet_match(const struct iphdr *ip, return false; } - /* Look for ifname matches; this should unroll nicely. */ - for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) { - ret |= (((const unsigned long *)indev)[i] - ^ ((const unsigned long *)ipinfo->iniface)[i]) - & ((const unsigned long *)ipinfo->iniface_mask)[i]; - } + ret = ifname_compare(indev, ipinfo->iniface, ipinfo->iniface_mask); if (FWINV(ret != 0, IPT_INV_VIA_IN)) { dprintf("VIA in mismatch (%s vs %s).%s\n", @@ -117,11 +130,7 @@ ip_packet_match(const struct iphdr *ip, return false; } - for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) { - ret |= (((const unsigned long *)outdev)[i] - ^ ((const unsigned long *)ipinfo->outiface)[i]) - & ((const unsigned long *)ipinfo->outiface_mask)[i]; - } + ret = ifname_compare(outdev, ipinfo->outiface, ipinfo->outiface_mask); if (FWINV(ret != 0, IPT_INV_VIA_OUT)) { dprintf("VIA out mismatch (%s vs %s).%s\n", @@ -347,10 +356,12 @@ ipt_do_table(struct sk_buff *skb, mtpar.family = tgpar.family = NFPROTO_IPV4; tgpar.hooknum = hook; - read_lock_bh(&table->lock); IP_NF_ASSERT(table->valid_hooks & (1 << hook)); - private = table->private; - table_base = (void *)private->entries[smp_processor_id()]; + + rcu_read_lock(); + private = rcu_dereference(table->private); + table_base = rcu_dereference(private->entries[smp_processor_id()]); + e = get_entry(table_base, private->hook_entry[hook]); /* For return from builtin chain */ @@ -445,7 +456,7 @@ ipt_do_table(struct sk_buff *skb, } } while (!hotdrop); - read_unlock_bh(&table->lock); + rcu_read_unlock(); #ifdef DEBUG_ALLOW_ALL return NF_ACCEPT; @@ -924,13 +935,68 @@ get_counters(const struct xt_table_info *t, counters, &i); } + +} + +/* We're lazy, and add to the first CPU; overflow works its fey magic + * and everything is OK. */ +static int +add_counter_to_entry(struct ipt_entry *e, + const struct xt_counters addme[], + unsigned int *i) +{ + ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt); + + (*i)++; + return 0; +} + +/* Take values from counters and add them back onto the current cpu */ +static void put_counters(struct xt_table_info *t, + const struct xt_counters counters[]) +{ + unsigned int i, cpu; + + local_bh_disable(); + cpu = smp_processor_id(); + i = 0; + IPT_ENTRY_ITERATE(t->entries[cpu], + t->size, + add_counter_to_entry, + counters, + &i); + local_bh_enable(); +} + + +static inline int +zero_entry_counter(struct ipt_entry *e, void *arg) +{ + e->counters.bcnt = 0; + e->counters.pcnt = 0; + return 0; +} + +static void +clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info) +{ + unsigned int cpu; + const void *loc_cpu_entry = info->entries[raw_smp_processor_id()]; + + memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); + for_each_possible_cpu(cpu) { + memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size); + IPT_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size, + zero_entry_counter, NULL); + } } static struct xt_counters * alloc_counters(struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; - const struct xt_table_info *private = table->private; + struct xt_table_info *private = table->private; + struct xt_table_info *info; /* We need atomic snapshot of counters: rest doesn't change (other than comefrom, which userspace doesn't care @@ -939,14 +1005,30 @@ static struct xt_counters * alloc_counters(struct xt_table *table) counters = vmalloc_node(countersize, numa_node_id()); if (counters == NULL) - return ERR_PTR(-ENOMEM); + goto nomem; + + info = xt_alloc_table_info(private->size); + if (!info) + goto free_counters; - /* First, sum counters... */ - write_lock_bh(&table->lock); - get_counters(private, counters); - write_unlock_bh(&table->lock); + clone_counters(info, private); + + mutex_lock(&table->lock); + xt_table_entry_swap_rcu(private, info); + synchronize_net(); /* Wait until smoke has cleared */ + + get_counters(info, counters); + put_counters(private, counters); + mutex_unlock(&table->lock); + + xt_free_table_info(info); return counters; + + free_counters: + vfree(counters); + nomem: + return ERR_PTR(-ENOMEM); } static int @@ -1312,27 +1394,6 @@ do_replace(struct net *net, void __user *user, unsigned int len) return ret; } -/* We're lazy, and add to the first CPU; overflow works its fey magic - * and everything is OK. */ -static int -add_counter_to_entry(struct ip |