From c3059477fce2d956a0bb3e04357324780c5d8eeb Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Tue, 14 Jul 2009 08:33:08 +0000 Subject: ipv4: Use synchronize_rcu() during trie_rebalance() During trie_rebalance() we free memory after resizing with call_rcu(), but large updates, especially with PREEMPT_NONE configs, can cause memory stresses, so this patch calls synchronize_rcu() in tnode_free_flush() after each sync_pages to guarantee such freeing (especially before resizing the root node). The value of sync_pages = 128 is based on Pawel Staszewski's tests as the lowest which doesn't hinder updating times. (For testing purposes there was a sysfs module parameter to change it on demand, but it's removed until we're sure it could be really useful.) The patch is based on suggestions by: Paul E. McKenney Reported-by: Pawel Staszewski Tested-by: Pawel Staszewski Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'net/ipv4/fib_trie.c') diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 63c2fa7b68c..58ba9f4f2c9 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -164,6 +164,14 @@ static struct tnode *inflate(struct trie *t, struct tnode *tn); static struct tnode *halve(struct trie *t, struct tnode *tn); /* tnodes to free after resize(); protected by RTNL */ static struct tnode *tnode_free_head; +static size_t tnode_free_size; + +/* + * synchronize_rcu after call_rcu for that many pages; it should be especially + * useful before resizing the root node with PREEMPT_NONE configs; the value was + * obtained experimentally, aiming to avoid visible slowdown. + */ +static const int sync_pages = 128; static struct kmem_cache *fn_alias_kmem __read_mostly; static struct kmem_cache *trie_leaf_kmem __read_mostly; @@ -393,6 +401,8 @@ static void tnode_free_safe(struct tnode *tn) BUG_ON(IS_LEAF(tn)); tn->tnode_free = tnode_free_head; tnode_free_head = tn; + tnode_free_size += sizeof(struct tnode) + + (sizeof(struct node *) << tn->bits); } static void tnode_free_flush(void) @@ -404,6 +414,11 @@ static void tnode_free_flush(void) tn->tnode_free = NULL; tnode_free(tn); } + + if (tnode_free_size >= PAGE_SIZE * sync_pages) { + tnode_free_size = 0; + synchronize_rcu(); + } } static struct leaf *leaf_new(void) -- cgit v1.2.3-18-g5258 From be916cdebe4dc720a23b1a9bb589f2c22afd6589 Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Tue, 14 Jul 2009 09:41:00 +0000 Subject: ipv4: Fix inflate_threshold_root automatically During large updates there could be triggered warnings like: "Fix inflate_threshold_root. Now=25 size=11 bits" if inflate() of the root node isn't finished in 10 loops. It should be much rarer now, after changing the threshold from 15 to 25, and a temporary problem, so this patch tries to handle it automatically using a fix variable to increase by one inflate threshold for next root resizes (up to the 35 limit, max fix = 10). The fix variable is decreased when root's inflate() finishes below 7 loops (even if some other, smaller table/ trie is updated -- for simplicity the fix variable is global for now). Reported-by: Pawel Staszewski Reported-by: Jorge Boncompte [DTI2] Tested-by: Pawel Staszewski Signed-off-by: Jarek Poplawski Signed-off-by: Robert Olsson Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) (limited to 'net/ipv4/fib_trie.c') diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 58ba9f4f2c9..5741d1306cc 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -327,6 +327,8 @@ static const int inflate_threshold = 50; static const int halve_threshold_root = 15; static const int inflate_threshold_root = 25; +static int inflate_threshold_root_fix; +#define INFLATE_FIX_MAX 10 /* a comment in resize() */ static void __alias_free_mem(struct rcu_head *head) { @@ -617,7 +619,8 @@ static struct node *resize(struct trie *t, struct tnode *tn) /* Keep root node larger */ if (!tn->parent) - inflate_threshold_use = inflate_threshold_root; + inflate_threshold_use = inflate_threshold_root + + inflate_threshold_root_fix; else inflate_threshold_use = inflate_threshold; @@ -641,15 +644,27 @@ static struct node *resize(struct trie *t, struct tnode *tn) } if (max_resize < 0) { - if (!tn->parent) - pr_warning("Fix inflate_threshold_root." - " Now=%d size=%d bits\n", - inflate_threshold_root, tn->bits); - else + if (!tn->parent) { + /* + * It was observed that during large updates even + * inflate_threshold_root = 35 might be needed to avoid + * this warning; but it should be temporary, so let's + * try to handle this automatically. + */ + if (inflate_threshold_root_fix < INFLATE_FIX_MAX) + inflate_threshold_root_fix++; + else + pr_warning("Fix inflate_threshold_root." + " Now=%d size=%d bits fix=%d\n", + inflate_threshold_root, tn->bits, + inflate_threshold_root_fix); + } else { pr_warning("Fix inflate_threshold." " Now=%d size=%d bits\n", inflate_threshold, tn->bits); - } + } + } else if (max_resize > 3 && !tn->parent && inflate_threshold_root_fix) + inflate_threshold_root_fix--; check_tnode(tn); -- cgit v1.2.3-18-g5258 From b902e5735272b6a79fe2853180b2ad6658aa9678 Mon Sep 17 00:00:00 2001 From: Jarek Poplawski Date: Tue, 14 Jul 2009 11:20:32 +0000 Subject: ipv4: fib_trie: Use tnode_get_child_rcu() and node_parent_rcu() in lookups While looking for other fib_trie problems reported by Pawel Staszewski I noticed there are a few uses of tnode_get_child() and node_parent() in lookups instead of their rcu versions. Signed-off-by: Jarek Poplawski Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net/ipv4/fib_trie.c') diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 5741d1306cc..d58b4911538 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1465,7 +1465,7 @@ static int fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, cindex = tkey_extract_bits(mask_pfx(key, current_prefix_length), pos, bits); - n = tnode_get_child(pn, cindex); + n = tnode_get_child_rcu(pn, cindex); if (n == NULL) { #ifdef CONFIG_IP_FIB_TRIE_STATS @@ -1600,7 +1600,7 @@ backtrace: if (chopped_off <= pn->bits) { cindex &= ~(1 << (chopped_off-1)); } else { - struct tnode *parent = node_parent((struct node *) pn); + struct tnode *parent = node_parent_rcu((struct node *) pn); if (!parent) goto failed; @@ -1813,7 +1813,7 @@ static struct leaf *trie_firstleaf(struct trie *t) static struct leaf *trie_nextleaf(struct leaf *l) { struct node *c = (struct node *) l; - struct tnode *p = node_parent(c); + struct tnode *p = node_parent_rcu(c); if (!p) return NULL; /* trie with just one leaf */ -- cgit v1.2.3-18-g5258 From 36cbd3dcc10384f813ec0814255f576c84f2bcd4 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Wed, 5 Aug 2009 10:42:58 -0700 Subject: net: mark read-only arrays as const String literals are constant, and usually, we can also tag the array of pointers const too, moving it to the .rodata section. Signed-off-by: Jan Engelhardt Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv4/fib_trie.c') diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index d58b4911538..fe3c846b99a 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -2421,7 +2421,7 @@ static inline const char *rtn_scope(char *buf, size_t len, enum rt_scope_t s) } } -static const char *rtn_type_names[__RTN_MAX] = { +static const char *const rtn_type_names[__RTN_MAX] = { [RTN_UNSPEC] = "UNSPEC", [RTN_UNICAST] = "UNICAST", [RTN_LOCAL] = "LOCAL", -- cgit v1.2.3-18-g5258 From 80b71b80df14d885f7e50e115c1348398f418759 Mon Sep 17 00:00:00 2001 From: Jens Låås Date: Fri, 28 Aug 2009 23:57:15 -0700 Subject: fib_trie: resize rework MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Here is rework and cleanup of the resize function. Some bugs we had. We were using ->parent when we should use node_parent(). Also we used ->parent which is not assigned by inflate in inflate loop. Also a fix to set thresholds to power 2 to fit halve and double strategy. max_resize is renamed to max_work which better indicates it's function. Reaching max_work is not an error, so warning is removed. max_work only limits amount of work done per resize. (limits CPU-usage, outstanding memory etc). The clean-up makes it relatively easy to add fixed sized root-nodes if we would like to decrease the memory pressure on routers with large routing tables and dynamic routing. If we'll need that... Its been tested with 280k routes. Work done together with Robert Olsson. Signed-off-by: Jens Låås Signed-off-by: Robert Olsson Signed-off-by: David S. Miller --- net/ipv4/fib_trie.c | 95 +++++++++++++---------------------------------------- 1 file changed, 23 insertions(+), 72 deletions(-) (limited to 'net/ipv4/fib_trie.c') diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index fe3c846b99a..291bdf50a21 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -48,7 +48,7 @@ * Patrick McHardy */ -#define VERSION "0.408" +#define VERSION "0.409" #include #include @@ -325,10 +325,7 @@ static inline void check_tnode(const struct tnode *tn) static const int halve_threshold = 25; static const int inflate_threshold = 50; static const int halve_threshold_root = 15; -static const int inflate_threshold_root = 25; - -static int inflate_threshold_root_fix; -#define INFLATE_FIX_MAX 10 /* a comment in resize() */ +static const int inflate_threshold_root = 30; static void __alias_free_mem(struct rcu_head *head) { @@ -516,14 +513,14 @@ static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, rcu_assign_pointer(tn->child[i], n); } +#define MAX_WORK 10 static struct node *resize(struct trie *t, struct tnode *tn) { int i; - int err = 0; struct tnode *old_tn; int inflate_threshold_use; int halve_threshold_use; - int max_resize; + int max_work; if (!tn) return NULL; @@ -538,18 +535,7 @@ static struct node *resize(struct trie *t, struct tnode *tn) } /* One child */ if (tn->empty_children == tnode_child_length(tn) - 1) - for (i = 0; i < tnode_child_length(tn); i++) { - struct node *n; - - n = tn->child[i]; - if (!n) - continue; - - /* compress one level */ - node_set_parent(n, NULL); - tnode_free_safe(tn); - return n; - } + goto one_child; /* * Double as long as the resulting node has a number of * nonempty nodes that are above the threshold. @@ -618,15 +604,17 @@ static struct node *resize(struct trie *t, struct tnode *tn) /* Keep root node larger */ - if (!tn->parent) - inflate_threshold_use = inflate_threshold_root + - inflate_threshold_root_fix; - else + if (!node_parent((struct node*) tn)) { + inflate_threshold_use = inflate_threshold_root; + halve_threshold_use = halve_threshold_root; + } + else { inflate_threshold_use = inflate_threshold; + halve_threshold_use = halve_threshold; + } - err = 0; - max_resize = 10; - while ((tn->full_children > 0 && max_resize-- && + max_work = MAX_WORK; + while ((tn->full_children > 0 && max_work-- && 50 * (tn->full_children + tnode_child_length(tn) - tn->empty_children) >= inflate_threshold_use * tnode_child_length(tn))) { @@ -643,47 +631,19 @@ static struct node *resize(struct trie *t, struct tnode *tn) } } - if (max_resize < 0) { - if (!tn->parent) { - /* - * It was observed that during large updates even - * inflate_threshold_root = 35 might be needed to avoid - * this warning; but it should be temporary, so let's - * try to handle this automatically. - */ - if (inflate_threshold_root_fix < INFLATE_FIX_MAX) - inflate_threshold_root_fix++; - else - pr_warning("Fix inflate_threshold_root." - " Now=%d size=%d bits fix=%d\n", - inflate_threshold_root, tn->bits, - inflate_threshold_root_fix); - } else { - pr_warning("Fix inflate_threshold." - " Now=%d size=%d bits\n", - inflate_threshold, tn->bits); - } - } else if (max_resize > 3 && !tn->parent && inflate_threshold_root_fix) - inflate_threshold_root_fix--; - check_tnode(tn); + /* Return if at least one inflate is run */ + if( max_work != MAX_WORK) + return (struct node *) tn; + /* * Halve as long as the number of empty children in this * node is above threshold. */ - - /* Keep root node larger */ - - if (!tn->parent) - halve_threshold_use = halve_threshold_root; - else - halve_threshold_use = halve_threshold; - - err = 0; - max_resize = 10; - while (tn->bits > 1 && max_resize-- && + max_work = MAX_WORK; + while (tn->bits > 1 && max_work-- && 100 * (tnode_child_length(tn) - tn->empty_children) < halve_threshold_use * tnode_child_length(tn)) { @@ -698,19 +658,10 @@ static struct node *resize(struct trie *t, struct tnode *tn) } } - if (max_resize < 0) { - if (!tn->parent) - pr_warning("Fix halve_threshold_root." - " Now=%d size=%d bits\n", - halve_threshold_root, tn->bits); - else - pr_warning("Fix halve_threshold." - " Now=%d size=%d bits\n", - halve_threshold, tn->bits); - } /* Only one child remains */ - if (tn->empty_children == tnode_child_length(tn) - 1) + if (tn->empty_children == tnode_child_length(tn) - 1) { +one_child: for (i = 0; i < tnode_child_length(tn); i++) { struct node *n; @@ -724,7 +675,7 @@ static struct node *resize(struct trie *t, struct tnode *tn) tnode_free_safe(tn); return n; } - + } return (struct node *) tn; } -- cgit v1.2.3-18-g5258