diff options
author | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-02-12 15:34:17 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@woody.linux-foundation.org> | 2007-02-12 15:34:17 -0800 |
commit | 89697f1d715e20fff0361cca79efd5a371623af7 (patch) | |
tree | d64d279f6805fd3d5fb53f059cbe7ea1ae348592 | |
parent | fd19e44f449f7e2e58d42d7bb6813e2292c38fba (diff) | |
parent | 13fcfbb0675bf87da694f55dec11cada489a205c (diff) |
Merge master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
* master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6: (25 commits)
[XFRM]: Fix OOPSes in xfrm_audit_log().
[TCP]: cleanup of htcp (resend)
[TCP]: Use read mostly for CUBIC parameters.
[NETFILTER]: nf_conntrack_tcp: make sysctl variables static
[NETFILTER]: ip6t_mh: drop piggyback payload packet on MH packets
[NETFILTER]: Fix whitespace errors
[NETFILTER]: Kconfig: improve dependency handling
[NETFILTER]: xt_mac/xt_CLASSIFY: use IPv6 hook names for IPv6 registration
[NETFILTER]: nf_conntrack: change nf_conntrack_l[34]proto_unregister to void
[NETFILTER]: nf_conntrack: properly use RCU for nf_conntrack_destroyed callback
[NETFILTER]: ip_conntrack: properly use RCU for ip_conntrack_destroyed callback
[NETFILTER]: nf_conntrack: fix invalid conntrack statistics RCU assumption
[NETFILTER]: ip_conntrack: fix invalid conntrack statistics RCU assumption
[NETFILTER]: nf_conntrack: properly use RCU API for nf_ct_protos/nf_ct_l3protos arrays
[NETFILTER]: ip_conntrack: properly use RCU API for ip_ct_protos array
[NETFILTER]: nf_nat: properly use RCU API for nf_nat_protos array
[NETFILTER]: ip_nat: properly use RCU API for ip_nat_protos array
[NETFILTER]: nf_log: minor cleanups
[NETFILTER]: nf_log: switch logger registration/unregistration to mutex
[NETFILTER]: nf_log: make nf_log_unregister_pf return void
...
71 files changed, 768 insertions, 709 deletions
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index d4c4c5120bc..70d3b4f1e48 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -172,8 +172,8 @@ struct nf_logger { /* Function to register/unregister log function. */ int nf_log_register(int pf, struct nf_logger *logger); -int nf_log_unregister_pf(int pf); -void nf_log_unregister_logger(struct nf_logger *logger); +void nf_log_unregister(struct nf_logger *logger); +void nf_log_unregister_pf(int pf); /* Calls the registered backend logging function */ void nf_log_packet(int pf, diff --git a/include/linux/netfilter_ipv4/ip_conntrack.h b/include/linux/netfilter_ipv4/ip_conntrack.h index 33581c13d94..da9274e6bf1 100644 --- a/include/linux/netfilter_ipv4/ip_conntrack.h +++ b/include/linux/netfilter_ipv4/ip_conntrack.h @@ -301,6 +301,12 @@ extern unsigned int ip_conntrack_htable_size; extern int ip_conntrack_checksum; #define CONNTRACK_STAT_INC(count) (__get_cpu_var(ip_conntrack_stat).count++) +#define CONNTRACK_STAT_INC_ATOMIC(count) \ +do { \ + local_bh_disable(); \ + __get_cpu_var(ip_conntrack_stat).count++; \ + local_bh_enable(); \ +} while (0) #ifdef CONFIG_IP_NF_CONNTRACK_EVENTS #include <linux/notifier.h> diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h index 68ec27490c2..0e690e34c00 100644 --- a/include/net/netfilter/nf_conntrack.h +++ b/include/net/netfilter/nf_conntrack.h @@ -257,6 +257,12 @@ extern int nf_conntrack_max; DECLARE_PER_CPU(struct ip_conntrack_stat, nf_conntrack_stat); #define NF_CT_STAT_INC(count) (__get_cpu_var(nf_conntrack_stat).count++) +#define NF_CT_STAT_INC_ATOMIC(count) \ +do { \ + local_bh_disable(); \ + __get_cpu_var(nf_conntrack_stat).count++; \ + local_bh_enable(); \ +} while (0) /* no helper, no nat */ #define NF_CT_F_BASIC 0 diff --git a/include/net/netfilter/nf_conntrack_l3proto.h b/include/net/netfilter/nf_conntrack_l3proto.h index 664ddcffe00..eb575cbd4c9 100644 --- a/include/net/netfilter/nf_conntrack_l3proto.h +++ b/include/net/netfilter/nf_conntrack_l3proto.h @@ -89,7 +89,7 @@ extern struct nf_conntrack_l3proto *nf_ct_l3protos[AF_MAX]; /* Protocol registration. */ extern int nf_conntrack_l3proto_register(struct nf_conntrack_l3proto *proto); -extern int nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto); +extern void nf_conntrack_l3proto_unregister(struct nf_conntrack_l3proto *proto); extern struct nf_conntrack_l3proto * nf_ct_l3proto_find_get(u_int16_t l3proto); @@ -106,7 +106,7 @@ __nf_ct_l3proto_find(u_int16_t l3proto) { if (unlikely(l3proto >= AF_MAX)) return &nf_conntrack_l3proto_generic; - return nf_ct_l3protos[l3proto]; + return rcu_dereference(nf_ct_l3protos[l3proto]); } #endif /*_NF_CONNTRACK_L3PROTO_H*/ diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h index fc8af08ff54..8415182ec12 100644 --- a/include/net/netfilter/nf_conntrack_l4proto.h +++ b/include/net/netfilter/nf_conntrack_l4proto.h @@ -109,7 +109,7 @@ extern void nf_ct_l4proto_put(struct nf_conntrack_l4proto *p); /* Protocol registration. */ extern int nf_conntrack_l4proto_register(struct nf_conntrack_l4proto *proto); -extern int nf_conntrack_l4proto_unregister(struct nf_conntrack_l4proto *proto); +extern void nf_conntrack_l4proto_unregister(struct nf_conntrack_l4proto *proto); /* Generic netlink helpers */ extern int nf_ct_port_tuple_to_nfattr(struct sk_buff *skb, diff --git a/net/bridge/netfilter/ebt_log.c b/net/bridge/netfilter/ebt_log.c index f9a5ae9d5b6..45712aec6a0 100644 --- a/net/bridge/netfilter/ebt_log.c +++ b/net/bridge/netfilter/ebt_log.c @@ -208,7 +208,7 @@ static int __init ebt_log_init(void) static void __exit ebt_log_fini(void) { - nf_log_unregister_logger(&ebt_log_logger); + nf_log_unregister(&ebt_log_logger); ebt_unregister_watcher(&log); } diff --git a/net/bridge/netfilter/ebt_ulog.c b/net/bridge/netfilter/ebt_ulog.c index 2e4cb24e191..8e15cc47f6c 100644 --- a/net/bridge/netfilter/ebt_ulog.c +++ b/net/bridge/netfilter/ebt_ulog.c @@ -323,7 +323,7 @@ static void __exit ebt_ulog_fini(void) ebt_ulog_buff_t *ub; int i; - nf_log_unregister_logger(&ebt_ulog_logger); + nf_log_unregister(&ebt_ulog_logger); ebt_unregister_watcher(&ulog); for (i = 0; i < EBT_ULOG_MAXNLGROUPS; i++) { ub = &ulog_buffers[i]; diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 9b08e7ad71b..601808c796e 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -226,7 +226,7 @@ config IP_NF_QUEUE config IP_NF_IPTABLES tristate "IP tables support (required for filtering/masq/NAT)" - depends on NETFILTER_XTABLES + select NETFILTER_XTABLES help iptables is a general, extensible packet identification framework. The packet filtering and full NAT (masquerading, port forwarding, @@ -606,7 +606,9 @@ config IP_NF_TARGET_TTL config IP_NF_TARGET_CLUSTERIP tristate "CLUSTERIP target support (EXPERIMENTAL)" depends on IP_NF_MANGLE && EXPERIMENTAL - depends on (IP_NF_CONNTRACK && IP_NF_CONNTRACK_MARK) || (NF_CONNTRACK_MARK && NF_CONNTRACK_IPV4) + depends on IP_NF_CONNTRACK || NF_CONNTRACK_IPV4 + select IP_NF_CONNTRACK_MARK if IP_NF_CONNTRACK + select NF_CONNTRACK_MARK if NF_CONNTRACK_IPV4 help The CLUSTERIP target allows you to build load-balancing clusters of network servers without having a dedicated load-balancing @@ -629,7 +631,7 @@ config IP_NF_RAW # ARP tables config IP_NF_ARPTABLES tristate "ARP tables support" - depends on NETFILTER_XTABLES + select NETFILTER_XTABLES help arptables is a general, extensible packet identification framework. The ARP packet filtering and mangling (manipulation)subsystems diff --git a/net/ipv4/netfilter/ip_conntrack_core.c b/net/ipv4/netfilter/ip_conntrack_core.c index 04e466d53c0..07ba1dd136b 100644 --- a/net/ipv4/netfilter/ip_conntrack_core.c +++ b/net/ipv4/netfilter/ip_conntrack_core.c @@ -303,6 +303,7 @@ destroy_conntrack(struct nf_conntrack *nfct) struct ip_conntrack *ct = (struct ip_conntrack *)nfct; struct ip_conntrack_protocol *proto; struct ip_conntrack_helper *helper; + typeof(ip_conntrack_destroyed) destroyed; DEBUGP("destroy_conntrack(%p)\n", ct); IP_NF_ASSERT(atomic_read(&nfct->use) == 0); @@ -318,12 +319,16 @@ destroy_conntrack(struct nf_conntrack *nfct) /* To make sure we don't get any weird locking issues here: * destroy_conntrack() MUST NOT be called with a write lock * to ip_conntrack_lock!!! -HW */ + rcu_read_lock(); proto = __ip_conntrack_proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum); if (proto && proto->destroy) proto->destroy(ct); - if (ip_conntrack_destroyed) - ip_conntrack_destroyed(ct); + destroyed = rcu_dereference(ip_conntrack_destroyed); + if (destroyed) + destroyed(ct); + + rcu_read_unlock(); write_lock_bh(&ip_conntrack_lock); /* Expectations will have been removed in clean_from_lists, @@ -536,7 +541,7 @@ static int early_drop(struct list_head *chain) if (del_timer(&ct->timeout)) { death_by_timeout((unsigned long)ct); dropped = 1; - CONNTRACK_STAT_INC(early_drop); + CONNTRACK_STAT_INC_ATOMIC(early_drop); } ip_conntrack_put(ct); return dropped; @@ -595,13 +600,13 @@ ip_conntrack_proto_find_get(u_int8_t protocol) { struct ip_conntrack_protocol *p; - preempt_disable(); + rcu_read_lock(); p = __ip_conntrack_proto_find(protocol); if (p) { if (!try_module_get(p->me)) p = &ip_conntrack_generic_protocol; } - preempt_enable(); + rcu_read_unlock(); return p; } @@ -802,7 +807,7 @@ unsigned int ip_conntrack_in(unsigned int hooknum, /* Previously seen (loopback or untracked)? Ignore. */ if ((*pskb)->nfct) { - CONNTRACK_STAT_INC(ignore); + CONNTRACK_STAT_INC_ATOMIC(ignore); return NF_ACCEPT; } @@ -830,6 +835,7 @@ unsigned int ip_conntrack_in(unsigned int hooknum, } #endif + /* rcu_read_lock()ed by nf_hook_slow */ proto = __ip_conntrack_proto_find((*pskb)->nh.iph->protocol); /* It may be an special packet, error, unclean... @@ -837,20 +843,20 @@ unsigned int ip_conntrack_in(unsigned int hooknum, * core what to do with the packet. */ if (proto->error != NULL && (ret = proto->error(*pskb, &ctinfo, hooknum)) <= 0) { - CONNTRACK_STAT_INC(error); - CONNTRACK_STAT_INC(invalid); + CONNTRACK_STAT_INC_ATOMIC(error); + CONNTRACK_STAT_INC_ATOMIC(invalid); return -ret; } if (!(ct = resolve_normal_ct(*pskb, proto,&set_reply,hooknum,&ctinfo))) { /* Not valid part of a connection */ - CONNTRACK_STAT_INC(invalid); + CONNTRACK_STAT_INC_ATOMIC(invalid); return NF_ACCEPT; } if (IS_ERR(ct)) { /* Too stressed to deal. */ - CONNTRACK_STAT_INC(drop); + CONNTRACK_STAT_INC_ATOMIC(drop); return NF_DROP; } @@ -862,7 +868,7 @@ unsigned int ip_conntrack_in(unsigned int hooknum, * the netfilter core what to do*/ nf_conntrack_put((*pskb)->nfct); (*pskb)->nfct = NULL; - CONNTRACK_STAT_INC(invalid); + CONNTRACK_STAT_INC_ATOMIC(invalid); return -ret; } @@ -875,8 +881,15 @@ unsigned int ip_conntrack_in(unsigned int hooknum, int invert_tuplepr(struct ip_conntrack_tuple *inverse, const struct ip_conntrack_tuple *orig) { - return ip_ct_invert_tuple(inverse, orig, - __ip_conntrack_proto_find(orig->dst.protonum)); + struct ip_conntrack_protocol *proto; + int ret; + + rcu_read_lock(); + proto = __ip_conntrack_proto_find(orig->dst.protonum); + ret = ip_ct_invert_tuple(inverse, orig, proto); + rcu_read_unlock(); + + return ret; } /* Would two expected things clash? */ @@ -1354,7 +1367,7 @@ static void free_conntrack_hash(struct list_head *hash, int vmalloced,int size) supposed to kill the mall. */ void ip_conntrack_cleanup(void) { - ip_ct_attach = NULL; + rcu_assign_pointer(ip_ct_attach, NULL); /* This makes sure all current packets have passed through netfilter framework. Roll on, two-stage module @@ -1507,15 +1520,15 @@ int __init ip_conntrack_init(void) /* Don't NEED lock here, but good form anyway. */ write_lock_bh(&ip_conntrack_lock); for (i = 0; i < MAX_IP_CT_PROTO; i++) - ip_ct_protos[i] = &ip_conntrack_generic_protocol; + rcu_assign_pointer(ip_ct_protos[i], &ip_conntrack_generic_protocol); /* Sew in builtin protocols. */ - ip_ct_protos[IPPROTO_TCP] = &ip_conntrack_protocol_tcp; - ip_ct_protos[IPPROTO_UDP] = &ip_conntrack_protocol_udp; - ip_ct_protos[IPPROTO_ICMP] = &ip_conntrack_protocol_icmp; + rcu_assign_pointer(ip_ct_protos[IPPROTO_TCP], &ip_conntrack_protocol_tcp); + rcu_assign_pointer(ip_ct_protos[IPPROTO_UDP], &ip_conntrack_protocol_udp); + rcu_assign_pointer(ip_ct_protos[IPPROTO_ICMP], &ip_conntrack_protocol_icmp); write_unlock_bh(&ip_conntrack_lock); /* For use by ipt_REJECT */ - ip_ct_attach = ip_conntrack_attach; + rcu_assign_pointer(ip_ct_attach, ip_conntrack_attach); /* Set up fake conntrack: - to never be deleted, not in any hashes */ diff --git a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c index 300ccbbbdac..c7c1ec61b0f 100644 --- a/net/ipv4/netfilter/ip_conntrack_standalone.c +++ b/net/ipv4/netfilter/ip_conntrack_standalone.c @@ -796,7 +796,7 @@ int ip_conntrack_protocol_register(struct ip_conntrack_protocol *proto) ret = -EBUSY; goto out; } - ip_ct_protos[proto->proto] = proto; + rcu_assign_pointer(ip_ct_protos[proto->proto], proto); out: write_unlock_bh(&ip_conntrack_lock); return ret; @@ -805,11 +805,10 @@ int ip_conntrack_protocol_register(struct ip_conntrack_protocol *proto) void ip_conntrack_protocol_unregister(struct ip_conntrack_protocol *proto) { write_lock_bh(&ip_conntrack_lock); - ip_ct_protos[proto->proto] = &ip_conntrack_generic_protocol; + rcu_assign_pointer(ip_ct_protos[proto->proto], + &ip_conntrack_generic_protocol); write_unlock_bh(&ip_conntrack_lock); - - /* Somebody could be still looking at the proto in bh. */ - synchronize_net(); + synchronize_rcu(); /* Remove all contrack entries for this protocol */ ip_ct_iterate_cleanup(kill_proto, &proto->proto); diff --git a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c index 275a4d3faf0..40737fdbe9a 100644 --- a/net/ipv4/netfilter/ip_nat_core.c +++ b/net/ipv4/netfilter/ip_nat_core.c @@ -50,7 +50,7 @@ static struct ip_nat_protocol *ip_nat_protos[MAX_IP_NAT_PROTO]; static inline struct ip_nat_protocol * __ip_nat_proto_find(u_int8_t protonum) { - return ip_nat_protos[protonum]; + return rcu_dereference(ip_nat_protos[protonum]); } struct ip_nat_protocol * @@ -58,13 +58,11 @@ ip_nat_proto_find_get(u_int8_t protonum) { struct ip_nat_protocol *p; - /* we need to disable preemption to make sure 'p' doesn't get - * removed until we've grabbed the reference */ - preempt_disable(); + rcu_read_lock(); p = __ip_nat_proto_find(protonum); if (!try_module_get(p->me)) p = &ip_nat_unknown_protocol; - preempt_enable(); + rcu_read_unlock(); return p; } @@ -120,8 +118,8 @@ static int in_range(const struct ip_conntrack_tuple *tuple, const struct ip_nat_range *range) { - struct ip_nat_protocol *proto = - __ip_nat_proto_find(tuple->dst.protonum); + struct ip_nat_protocol *proto; + int ret = 0; /* If we are supposed to map IPs, then we must be in the range specified, otherwise let this drag us onto a new src IP. */ @@ -131,12 +129,15 @@ in_range(const struct ip_conntrack_tuple *tuple, return 0; } + rcu_read_lock(); + proto = __ip_nat_proto_find(tuple->dst.protonum); if (!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) || proto->in_range(tuple, IP_NAT_MANIP_SRC, &range->min, &range->max)) - return 1; + ret = 1; + rcu_read_unlock(); - return 0; + return ret; } static inline int @@ -260,27 +261,25 @@ get_unique_tuple(struct ip_conntrack_tuple *tuple, /* 3) The per-protocol part of the manip is made to map into the range to make a unique tuple. */ - proto = ip_nat_proto_find_get(orig_tuple->dst.protonum); + rcu_read_lock(); + proto = __ip_nat_proto_find(orig_tuple->dst.protonum); /* Change protocol info to have some randomization */ if (range->flags & IP_NAT_RANGE_PROTO_RANDOM) { proto->unique_tuple(tuple, range, maniptype, conntrack); - ip_nat_proto_put(proto); - return; + goto out; } /* Only bother mapping if it's not already in range and unique */ if ((!(range->flags & IP_NAT_RANGE_PROTO_SPECIFIED) || proto->in_range(tuple, maniptype, &range->min, &range->max)) - && !ip_nat_used_tuple(tuple, conntrack)) { - ip_nat_proto_put(proto); - return; - } + && !ip_nat_used_tuple(tuple, conntrack)) + goto out; /* Last change: get protocol to try to obtain unique tuple. */ proto->unique_tuple(tuple, range, maniptype, conntrack); - - ip_nat_proto_put(proto); +out: + rcu_read_unlock(); } unsigned int @@ -360,12 +359,11 @@ manip_pkt(u_int16_t proto, iph = (void *)(*pskb)->data + iphdroff; /* Manipulate protcol part. */ - p = ip_nat_proto_find_get(proto); - if (!p->manip_pkt(pskb, iphdroff, target, maniptype)) { - ip_nat_proto_put(p); + + /* rcu_read_lock()ed by nf_hook_slow */ + p = __ip_nat_proto_find(proto); + if (!p->manip_pkt(pskb, iphdroff, target, maniptype)) return 0; - } - ip_nat_proto_put(p); iph = (void *)(*pskb)->data + iphdroff; @@ -422,6 +420,7 @@ int ip_nat_icmp_reply_translation(struct ip_conntrack *ct, struct icmphdr icmp; struct iphdr ip; } *inside; + struct ip_conntrack_protocol *proto; struct ip_conntrack_tuple inner, target; int hdrlen = (*pskb)->nh.iph->ihl * 4; enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); @@ -457,10 +456,11 |