From c8a627ed06d6d49bf65015a2185c519335c4c83f Mon Sep 17 00:00:00 2001 From: Gao feng Date: Fri, 8 Jun 2012 01:20:41 +0000 Subject: inetpeer: add namespace support for inetpeer now inetpeer doesn't support namespace,the information will be leaking across namespace. this patch move the global vars v4_peers and v6_peers to netns_ipv4 and netns_ipv6 as a field peers. add struct pernet_operations inetpeer_ops to initial pernet inetpeer data. and change family_to_base and inet_getpeer to support namespace. Signed-off-by: Gao feng Signed-off-by: David S. Miller --- net/ipv4/inetpeer.c | 68 +++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 50 insertions(+), 18 deletions(-) (limited to 'net/ipv4/inetpeer.c') diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index dfba343b250..1c8527349c8 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -88,18 +88,6 @@ struct inet_peer_base { int total; }; -static struct inet_peer_base v4_peers = { - .root = peer_avl_empty_rcu, - .lock = __SEQLOCK_UNLOCKED(v4_peers.lock), - .total = 0, -}; - -static struct inet_peer_base v6_peers = { - .root = peer_avl_empty_rcu, - .lock = __SEQLOCK_UNLOCKED(v6_peers.lock), - .total = 0, -}; - #define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */ /* Exported for sysctl_net_ipv4. */ @@ -153,6 +141,46 @@ static void inetpeer_gc_worker(struct work_struct *work) schedule_delayed_work(&gc_work, gc_delay); } +static int __net_init inetpeer_net_init(struct net *net) +{ + net->ipv4.peers = kzalloc(sizeof(struct inet_peer_base), + GFP_KERNEL); + if (net->ipv4.peers == NULL) + return -ENOMEM; + + net->ipv4.peers->root = peer_avl_empty_rcu; + seqlock_init(&net->ipv4.peers->lock); + + net->ipv6.peers = kzalloc(sizeof(struct inet_peer_base), + GFP_KERNEL); + if (net->ipv6.peers == NULL) + goto out_ipv6; + + net->ipv6.peers->root = peer_avl_empty_rcu; + seqlock_init(&net->ipv6.peers->lock); + + return 0; +out_ipv6: + kfree(net->ipv4.peers); + return -ENOMEM; +} + +static void __net_exit inetpeer_net_exit(struct net *net) +{ + inetpeer_invalidate_tree(net, AF_INET); + kfree(net->ipv4.peers); + net->ipv4.peers = NULL; + + inetpeer_invalidate_tree(net, AF_INET6); + kfree(net->ipv6.peers); + net->ipv6.peers = NULL; +} + +static struct pernet_operations inetpeer_ops = { + .init = inetpeer_net_init, + .exit = inetpeer_net_exit, +}; + /* Called from ip_output.c:ip_init */ void __init inet_initpeers(void) { @@ -177,6 +205,7 @@ void __init inet_initpeers(void) NULL); INIT_DELAYED_WORK_DEFERRABLE(&gc_work, inetpeer_gc_worker); + register_pernet_subsys(&inetpeer_ops); } static int addr_compare(const struct inetpeer_addr *a, @@ -401,9 +430,10 @@ static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base, call_rcu(&p->rcu, inetpeer_free_rcu); } -static struct inet_peer_base *family_to_base(int family) +static struct inet_peer_base *family_to_base(struct net *net, + int family) { - return family == AF_INET ? &v4_peers : &v6_peers; + return family == AF_INET ? net->ipv4.peers : net->ipv6.peers; } /* perform garbage collect on all items stacked during a lookup */ @@ -443,10 +473,12 @@ static int inet_peer_gc(struct inet_peer_base *base, return cnt; } -struct inet_peer *inet_getpeer(const struct inetpeer_addr *daddr, int create) +struct inet_peer *inet_getpeer(struct net *net, + const struct inetpeer_addr *daddr, + int create) { struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr; - struct inet_peer_base *base = family_to_base(daddr->family); + struct inet_peer_base *base = family_to_base(net, daddr->family); struct inet_peer *p; unsigned int sequence; int invalidated, gccnt = 0; @@ -571,10 +603,10 @@ static void inetpeer_inval_rcu(struct rcu_head *head) schedule_delayed_work(&gc_work, gc_delay); } -void inetpeer_invalidate_tree(int family) +void inetpeer_invalidate_tree(struct net *net, int family) { struct inet_peer *old, *new, *prev; - struct inet_peer_base *base = family_to_base(family); + struct inet_peer_base *base = family_to_base(net, family); write_seqlock_bh(&base->lock); -- cgit v1.2.3-18-g5258 From c3426b47190d7c6785230c91a706fd42208b4120 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 9 Jun 2012 16:27:05 -0700 Subject: inet: Initialize per-netns inetpeer roots in net/ipv{4,6}/route.c Instead of net/ipv4/inetpeer.c Signed-off-by: David S. Miller --- net/ipv4/inetpeer.c | 64 ++++++++++++++--------------------------------------- 1 file changed, 16 insertions(+), 48 deletions(-) (limited to 'net/ipv4/inetpeer.c') diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 1c8527349c8..d0d72f89b27 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -82,11 +82,13 @@ static const struct inet_peer peer_fake_node = { .avl_height = 0 }; -struct inet_peer_base { - struct inet_peer __rcu *root; - seqlock_t lock; - int total; -}; +void inet_peer_base_init(struct inet_peer_base *bp) +{ + bp->root = peer_avl_empty_rcu; + seqlock_init(&bp->lock); + bp->total = 0; +} +EXPORT_SYMBOL_GPL(inet_peer_base_init); #define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */ @@ -141,46 +143,6 @@ static void inetpeer_gc_worker(struct work_struct *work) schedule_delayed_work(&gc_work, gc_delay); } -static int __net_init inetpeer_net_init(struct net *net) -{ - net->ipv4.peers = kzalloc(sizeof(struct inet_peer_base), - GFP_KERNEL); - if (net->ipv4.peers == NULL) - return -ENOMEM; - - net->ipv4.peers->root = peer_avl_empty_rcu; - seqlock_init(&net->ipv4.peers->lock); - - net->ipv6.peers = kzalloc(sizeof(struct inet_peer_base), - GFP_KERNEL); - if (net->ipv6.peers == NULL) - goto out_ipv6; - - net->ipv6.peers->root = peer_avl_empty_rcu; - seqlock_init(&net->ipv6.peers->lock); - - return 0; -out_ipv6: - kfree(net->ipv4.peers); - return -ENOMEM; -} - -static void __net_exit inetpeer_net_exit(struct net *net) -{ - inetpeer_invalidate_tree(net, AF_INET); - kfree(net->ipv4.peers); - net->ipv4.peers = NULL; - - inetpeer_invalidate_tree(net, AF_INET6); - kfree(net->ipv6.peers); - net->ipv6.peers = NULL; -} - -static struct pernet_operations inetpeer_ops = { - .init = inetpeer_net_init, - .exit = inetpeer_net_exit, -}; - /* Called from ip_output.c:ip_init */ void __init inet_initpeers(void) { @@ -205,7 +167,6 @@ void __init inet_initpeers(void) NULL); INIT_DELAYED_WORK_DEFERRABLE(&gc_work, inetpeer_gc_worker); - register_pernet_subsys(&inetpeer_ops); } static int addr_compare(const struct inetpeer_addr *a, @@ -603,10 +564,9 @@ static void inetpeer_inval_rcu(struct rcu_head *head) schedule_delayed_work(&gc_work, gc_delay); } -void inetpeer_invalidate_tree(struct net *net, int family) +void __inetpeer_invalidate_tree(struct inet_peer_base *base) { struct inet_peer *old, *new, *prev; - struct inet_peer_base *base = family_to_base(net, family); write_seqlock_bh(&base->lock); @@ -625,4 +585,12 @@ void inetpeer_invalidate_tree(struct net *net, int family) out: write_sequnlock_bh(&base->lock); } +EXPORT_SYMBOL(__inetpeer_invalidate_tree); + +void inetpeer_invalidate_tree(struct net *net, int family) +{ + struct inet_peer_base *base = family_to_base(net, family); + + __inetpeer_invalidate_tree(base); +} EXPORT_SYMBOL(inetpeer_invalidate_tree); -- cgit v1.2.3-18-g5258 From 56a6b248eb345c1948ee60bf426de1ff7dd81509 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 9 Jun 2012 16:32:41 -0700 Subject: inet: Consolidate inetpeer_invalidate_tree() interfaces. We only need one interface for this operation, since we always know which inetpeer root we want to flush. Signed-off-by: David S. Miller --- net/ipv4/inetpeer.c | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) (limited to 'net/ipv4/inetpeer.c') diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index d0d72f89b27..9d89a381f0e 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -564,7 +564,7 @@ static void inetpeer_inval_rcu(struct rcu_head *head) schedule_delayed_work(&gc_work, gc_delay); } -void __inetpeer_invalidate_tree(struct inet_peer_base *base) +void inetpeer_invalidate_tree(struct inet_peer_base *base) { struct inet_peer *old, *new, *prev; @@ -585,12 +585,4 @@ void __inetpeer_invalidate_tree(struct inet_peer_base *base) out: write_sequnlock_bh(&base->lock); } -EXPORT_SYMBOL(__inetpeer_invalidate_tree); - -void inetpeer_invalidate_tree(struct net *net, int family) -{ - struct inet_peer_base *base = family_to_base(net, family); - - __inetpeer_invalidate_tree(base); -} EXPORT_SYMBOL(inetpeer_invalidate_tree); -- cgit v1.2.3-18-g5258 From c0efc887dcadbdbfe171f028acfab9c7c00e9dde Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 9 Jun 2012 19:12:36 -0700 Subject: inet: Pass inetpeer root into inet_getpeer*() interfaces. Otherwise we reference potentially non-existing members when ipv6 is disabled. Signed-off-by: David S. Miller --- net/ipv4/inetpeer.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'net/ipv4/inetpeer.c') diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index 9d89a381f0e..e4cba56a534 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -391,12 +391,6 @@ static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base, call_rcu(&p->rcu, inetpeer_free_rcu); } -static struct inet_peer_base *family_to_base(struct net *net, - int family) -{ - return family == AF_INET ? net->ipv4.peers : net->ipv6.peers; -} - /* perform garbage collect on all items stacked during a lookup */ static int inet_peer_gc(struct inet_peer_base *base, struct inet_peer __rcu **stack[PEER_MAXDEPTH], @@ -434,12 +428,11 @@ static int inet_peer_gc(struct inet_peer_base *base, return cnt; } -struct inet_peer *inet_getpeer(struct net *net, +struct inet_peer *inet_getpeer(struct inet_peer_base *base, const struct inetpeer_addr *daddr, int create) { struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr; - struct inet_peer_base *base = family_to_base(net, daddr->family); struct inet_peer *p; unsigned int sequence; int invalidated, gccnt = 0; -- cgit v1.2.3-18-g5258 From b48c80ece973e9eddb042f6685b482b261ff0d47 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 10 Jun 2012 00:24:21 -0700 Subject: inet: Add family scope inetpeer flushes. This implementation can deal with having many inetpeer roots, which is a necessary prerequisite for per-FIB table rooted peer tables. Each family (AF_INET, AF_INET6) has a sequence number which we bump when we get a family invalidation request. Each peer lookup cheaply checks whether the flush sequence of the root we are using is out of date, and if so flushes it and updates the sequence number. Signed-off-by: David S. Miller --- net/ipv4/inetpeer.c | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) (limited to 'net/ipv4/inetpeer.c') diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index e4cba56a534..cac02ad1425 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -86,10 +86,36 @@ void inet_peer_base_init(struct inet_peer_base *bp) { bp->root = peer_avl_empty_rcu; seqlock_init(&bp->lock); + bp->flush_seq = ~0U; bp->total = 0; } EXPORT_SYMBOL_GPL(inet_peer_base_init); +static atomic_t v4_seq = ATOMIC_INIT(0); +static atomic_t v6_seq = ATOMIC_INIT(0); + +static atomic_t *inetpeer_seq_ptr(int family) +{ + return (family == AF_INET ? &v4_seq : &v6_seq); +} + +static inline void flush_check(struct inet_peer_base *base, int family) +{ + atomic_t *fp = inetpeer_seq_ptr(family); + + if (unlikely(base->flush_seq != atomic_read(fp))) { + inetpeer_invalidate_tree(base); + base->flush_seq = atomic_read(fp); + } +} + +void inetpeer_invalidate_family(int family) +{ + atomic_t *fp = inetpeer_seq_ptr(family); + + atomic_inc(fp); +} + #define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */ /* Exported for sysctl_net_ipv4. */ @@ -437,6 +463,8 @@ struct inet_peer *inet_getpeer(struct inet_peer_base *base, unsigned int sequence; int invalidated, gccnt = 0; + flush_check(base, daddr->family); + /* Attempt a lockless lookup first. * Because of a concurrent writer, we might not find an existing entry. */ -- cgit v1.2.3-18-g5258 From da55737467c1c3bc02271039c088171d82e0796f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 20 Jun 2012 04:02:10 +0000 Subject: inetpeer: inetpeer_invalidate_tree() cleanup No need to use cmpxchg() in inetpeer_invalidate_tree() since we hold base lock. Also use correct rcu annotations to remove sparse errors (CONFIG_SPARSE_RCU_POINTER=y) net/ipv4/inetpeer.c:144:19: error: incompatible types in comparison expression (different address spaces) net/ipv4/inetpeer.c:149:20: error: incompatible types in comparison expression (different address spaces) net/ipv4/inetpeer.c:595:10: error: incompatible types in comparison expression (different address spaces) Signed-off-by: Eric Dumazet Cc: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv4/inetpeer.c | 34 +++++++++++++++------------------- 1 file changed, 15 insertions(+), 19 deletions(-) (limited to 'net/ipv4/inetpeer.c') diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index cac02ad1425..da90a8cab61 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -126,7 +126,7 @@ int inet_peer_maxttl __read_mostly = 10 * 60 * HZ; /* usual time to live: 10 min static void inetpeer_gc_worker(struct work_struct *work) { - struct inet_peer *p, *n; + struct inet_peer *p, *n, *c; LIST_HEAD(list); spin_lock_bh(&gc_lock); @@ -138,17 +138,19 @@ static void inetpeer_gc_worker(struct work_struct *work) list_for_each_entry_safe(p, n, &list, gc_list) { - if(need_resched()) + if (need_resched()) cond_resched(); - if (p->avl_left != peer_avl_empty) { - list_add_tail(&p->avl_left->gc_list, &list); - p->avl_left = peer_avl_empty; + c = rcu_dereference_protected(p->avl_left, 1); + if (c != peer_avl_empty) { + list_add_tail(&c->gc_list, &list); + p->avl_left = peer_avl_empty_rcu; } - if (p->avl_right != peer_avl_empty) { - list_add_tail(&p->avl_right->gc_list, &list); - p->avl_right = peer_avl_empty; + c = rcu_dereference_protected(p->avl_right, 1); + if (c != peer_avl_empty) { + list_add_tail(&c->gc_list, &list); + p->avl_right = peer_avl_empty_rcu; } n = list_entry(p->gc_list.next, struct inet_peer, gc_list); @@ -587,23 +589,17 @@ static void inetpeer_inval_rcu(struct rcu_head *head) void inetpeer_invalidate_tree(struct inet_peer_base *base) { - struct inet_peer *old, *new, *prev; + struct inet_peer *root; write_seqlock_bh(&base->lock); - old = base->root; - if (old == peer_avl_empty_rcu) - goto out; - - new = peer_avl_empty_rcu; - - prev = cmpxchg(&base->root, old, new); - if (prev == old) { + root = rcu_deref_locked(base->root, base); + if (root != peer_avl_empty) { + base->root = peer_avl_empty_rcu; base->total = 0; - call_rcu(&prev->gc_rcu, inetpeer_inval_rcu); + call_rcu(&root->gc_rcu, inetpeer_inval_rcu); } -out: write_sequnlock_bh(&base->lock); } EXPORT_SYMBOL(inetpeer_invalidate_tree); -- cgit v1.2.3-18-g5258 From 81166dd6fa8eb780b2132d32fbc77eb6ac04e44e Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 10 Jul 2012 03:14:24 -0700 Subject: tcp: Move timestamps from inetpeer to metrics cache. With help from Lin Ming. Signed-off-by: David S. Miller --- net/ipv4/inetpeer.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net/ipv4/inetpeer.c') diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index da90a8cab61..f457bcb4135 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -508,7 +508,6 @@ relookup: (daddr->family == AF_INET) ? secure_ip_id(daddr->addr.a4) : secure_ipv6_id(daddr->addr.a6)); - p->tcp_ts_stamp = 0; p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; p->rate_tokens = 0; p->rate_last = 0; -- cgit v1.2.3-18-g5258 From 5943634fc5592037db0693b261f7f4bea6bb9457 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 10 Jul 2012 06:58:42 -0700 Subject: ipv4: Maintain redirect and PMTU info in struct rtable again. Maintaining this in the inetpeer entries was not the right way to do this at all. Signed-off-by: David S. Miller --- net/ipv4/inetpeer.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'net/ipv4/inetpeer.c') diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index f457bcb4135..e1e0a4e8fd3 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -511,9 +511,6 @@ relookup: p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; p->rate_tokens = 0; p->rate_last = 0; - p->pmtu_expires = 0; - p->pmtu_orig = 0; - memset(&p->redirect_learned, 0, sizeof(p->redirect_learned)); INIT_LIST_HEAD(&p->gc_list); /* Link the node. */ -- cgit v1.2.3-18-g5258