diff options
Diffstat (limited to 'net/core/flow.c')
| -rw-r--r-- | net/core/flow.c | 267 |
1 files changed, 172 insertions, 95 deletions
diff --git a/net/core/flow.c b/net/core/flow.c index 127c8a7ffd6..a0348fde1fd 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -22,14 +22,16 @@ #include <linux/cpumask.h> #include <linux/mutex.h> #include <net/flow.h> -#include <asm/atomic.h> +#include <linux/atomic.h> #include <linux/security.h> +#include <net/net_namespace.h> struct flow_cache_entry { union { struct hlist_node hlist; struct list_head gc_list; } u; + struct net *net; u16 family; u8 dir; u32 genid; @@ -37,37 +39,14 @@ struct flow_cache_entry { struct flow_cache_object *object; }; -struct flow_cache_percpu { - struct hlist_head *hash_table; - int hash_count; - u32 hash_rnd; - int hash_rnd_recalc; - struct tasklet_struct flush_tasklet; -}; - struct flow_flush_info { struct flow_cache *cache; atomic_t cpuleft; struct completion completion; }; -struct flow_cache { - u32 hash_shift; - struct flow_cache_percpu __percpu *percpu; - struct notifier_block hotcpu_notifier; - int low_watermark; - int high_watermark; - struct timer_list rnd_timer; -}; - -atomic_t flow_cache_genid = ATOMIC_INIT(0); -EXPORT_SYMBOL(flow_cache_genid); -static struct flow_cache flow_cache_global; static struct kmem_cache *flow_cachep __read_mostly; -static DEFINE_SPINLOCK(flow_cache_gc_lock); -static LIST_HEAD(flow_cache_gc_list); - #define flow_cache_hash_size(cache) (1 << (cache)->hash_shift) #define FLOW_HASH_RND_PERIOD (10 * 60 * HZ) @@ -83,16 +62,18 @@ static void flow_cache_new_hashrnd(unsigned long arg) add_timer(&fc->rnd_timer); } -static int flow_entry_valid(struct flow_cache_entry *fle) +static int flow_entry_valid(struct flow_cache_entry *fle, + struct netns_xfrm *xfrm) { - if (atomic_read(&flow_cache_genid) != fle->genid) + if (atomic_read(&xfrm->flow_cache_genid) != fle->genid) return 0; if (fle->object && !fle->object->ops->check(fle->object)) return 0; return 1; } -static void flow_entry_kill(struct flow_cache_entry *fle) +static void flow_entry_kill(struct flow_cache_entry *fle, + struct netns_xfrm *xfrm) { if (fle->object) fle->object->ops->delete(fle->object); @@ -103,26 +84,28 @@ static void flow_cache_gc_task(struct work_struct *work) { struct list_head gc_list; struct flow_cache_entry *fce, *n; + struct netns_xfrm *xfrm = container_of(work, struct netns_xfrm, + flow_cache_gc_work); INIT_LIST_HEAD(&gc_list); - spin_lock_bh(&flow_cache_gc_lock); - list_splice_tail_init(&flow_cache_gc_list, &gc_list); - spin_unlock_bh(&flow_cache_gc_lock); + spin_lock_bh(&xfrm->flow_cache_gc_lock); + list_splice_tail_init(&xfrm->flow_cache_gc_list, &gc_list); + spin_unlock_bh(&xfrm->flow_cache_gc_lock); list_for_each_entry_safe(fce, n, &gc_list, u.gc_list) - flow_entry_kill(fce); + flow_entry_kill(fce, xfrm); } -static DECLARE_WORK(flow_cache_gc_work, flow_cache_gc_task); static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp, - int deleted, struct list_head *gc_list) + int deleted, struct list_head *gc_list, + struct netns_xfrm *xfrm) { if (deleted) { fcp->hash_count -= deleted; - spin_lock_bh(&flow_cache_gc_lock); - list_splice_tail(gc_list, &flow_cache_gc_list); - spin_unlock_bh(&flow_cache_gc_lock); - schedule_work(&flow_cache_gc_work); + spin_lock_bh(&xfrm->flow_cache_gc_lock); + list_splice_tail(gc_list, &xfrm->flow_cache_gc_list); + spin_unlock_bh(&xfrm->flow_cache_gc_lock); + schedule_work(&xfrm->flow_cache_gc_work); } } @@ -131,17 +114,19 @@ static void __flow_cache_shrink(struct flow_cache *fc, int shrink_to) { struct flow_cache_entry *fle; - struct hlist_node *entry, *tmp; + struct hlist_node *tmp; LIST_HEAD(gc_list); int i, deleted = 0; + struct netns_xfrm *xfrm = container_of(fc, struct netns_xfrm, + flow_cache_global); for (i = 0; i < flow_cache_hash_size(fc); i++) { int saved = 0; - hlist_for_each_entry_safe(fle, entry, tmp, + hlist_for_each_entry_safe(fle, tmp, &fcp->hash_table[i], u.hlist) { if (saved < shrink_to && - flow_entry_valid(fle)) { + flow_entry_valid(fle, xfrm)) { saved++; } else { deleted++; @@ -151,7 +136,7 @@ static void __flow_cache_shrink(struct flow_cache *fc, } } - flow_cache_queue_garbage(fcp, deleted, &gc_list); + flow_cache_queue_garbage(fcp, deleted, &gc_list, xfrm); } static void flow_cache_shrink(struct flow_cache *fc, @@ -172,31 +157,28 @@ static void flow_new_hash_rnd(struct flow_cache *fc, static u32 flow_hash_code(struct flow_cache *fc, struct flow_cache_percpu *fcp, - struct flowi *key) + const struct flowi *key, + size_t keysize) { - u32 *k = (u32 *) key; + const u32 *k = (const u32 *) key; + const u32 length = keysize * sizeof(flow_compare_t) / sizeof(u32); - return jhash2(k, (sizeof(*key) / sizeof(u32)), fcp->hash_rnd) + return jhash2(k, length, fcp->hash_rnd) & (flow_cache_hash_size(fc) - 1); } -typedef unsigned long flow_compare_t; - /* I hear what you're saying, use memcmp. But memcmp cannot make - * important assumptions that we can here, such as alignment and - * constant size. + * important assumptions that we can here, such as alignment. */ -static int flow_key_compare(struct flowi *key1, struct flowi *key2) +static int flow_key_compare(const struct flowi *key1, const struct flowi *key2, + size_t keysize) { - flow_compare_t *k1, *k1_lim, *k2; - const int n_elem = sizeof(struct flowi) / sizeof(flow_compare_t); + const flow_compare_t *k1, *k1_lim, *k2; - BUILD_BUG_ON(sizeof(struct flowi) % sizeof(flow_compare_t)); + k1 = (const flow_compare_t *) key1; + k1_lim = k1 + keysize; - k1 = (flow_compare_t *) key1; - k1_lim = k1 + n_elem; - - k2 = (flow_compare_t *) key2; + k2 = (const flow_compare_t *) key2; do { if (*k1++ != *k2++) @@ -207,14 +189,14 @@ static int flow_key_compare(struct flowi *key1, struct flowi *key2) } struct flow_cache_object * -flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir, +flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir, flow_resolve_t resolver, void *ctx) { - struct flow_cache *fc = &flow_cache_global; + struct flow_cache *fc = &net->xfrm.flow_cache_global; struct flow_cache_percpu *fcp; struct flow_cache_entry *fle, *tfle; - struct hlist_node *entry; struct flow_cache_object *flo; + size_t keysize; unsigned int hash; local_bh_disable(); @@ -222,6 +204,11 @@ flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir, fle = NULL; flo = NULL; + + keysize = flow_key_size(family); + if (!keysize) + goto nocache; + /* Packet really early in init? Making flow_cache_init a * pre-smp initcall would solve this. --RR */ if (!fcp->hash_table) @@ -230,11 +217,12 @@ flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir, if (fcp->hash_rnd_recalc) flow_new_hash_rnd(fc, fcp); - hash = flow_hash_code(fc, fcp, key); - hlist_for_each_entry(tfle, entry, &fcp->hash_table[hash], u.hlist) { - if (tfle->family == family && + hash = flow_hash_code(fc, fcp, key, keysize); + hlist_for_each_entry(tfle, &fcp->hash_table[hash], u.hlist) { + if (tfle->net == net && + tfle->family == family && tfle->dir == dir && - flow_key_compare(key, &tfle->key) == 0) { + flow_key_compare(key, &tfle->key, keysize) == 0) { fle = tfle; break; } @@ -246,14 +234,15 @@ flow_cache_lookup(struct net *net, struct flowi *key, u16 family, u8 dir, fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC); if (fle) { + fle->net = net; fle->family = family; fle->dir = dir; - memcpy(&fle->key, key, sizeof(*key)); + memcpy(&fle->key, key, keysize * sizeof(flow_compare_t)); fle->object = NULL; hlist_add_head(&fle->u.hlist, &fcp->hash_table[hash]); fcp->hash_count++; } - } else if (likely(fle->genid == atomic_read(&flow_cache_genid))) { + } else if (likely(fle->genid == atomic_read(&net->xfrm.flow_cache_genid))) { flo = fle->object; if (!flo) goto ret_object; @@ -274,13 +263,13 @@ nocache: } flo = resolver(net, key, family, dir, flo, ctx); if (fle) { - fle->genid = atomic_read(&flow_cache_genid); + fle->genid = atomic_read(&net->xfrm.flow_cache_genid); if (!IS_ERR(flo)) fle->object = flo; else fle->genid--; } else { - if (flo && !IS_ERR(flo)) + if (!IS_ERR_OR_NULL(flo)) flo->ops->delete(flo); } ret_object: @@ -295,15 +284,17 @@ static void flow_cache_flush_tasklet(unsigned long data) struct flow_cache *fc = info->cache; struct flow_cache_percpu *fcp; struct flow_cache_entry *fle; - struct hlist_node *entry, *tmp; + struct hlist_node *tmp; LIST_HEAD(gc_list); int i, deleted = 0; + struct netns_xfrm *xfrm = container_of(fc, struct netns_xfrm, + flow_cache_global); fcp = this_cpu_ptr(fc->percpu); for (i = 0; i < flow_cache_hash_size(fc); i++) { - hlist_for_each_entry_safe(fle, entry, tmp, + hlist_for_each_entry_safe(fle, tmp, &fcp->hash_table[i], u.hlist) { - if (flow_entry_valid(fle)) + if (flow_entry_valid(fle, xfrm)) continue; deleted++; @@ -312,47 +303,94 @@ static void flow_cache_flush_tasklet(unsigned long data) } } - flow_cache_queue_garbage(fcp, deleted, &gc_list); + flow_cache_queue_garbage(fcp, deleted, &gc_list, xfrm); if (atomic_dec_and_test(&info->cpuleft)) complete(&info->completion); } +/* + * Return whether a cpu needs flushing. Conservatively, we assume + * the presence of any entries means the core may require flushing, + * since the flow_cache_ops.check() function may assume it's running + * on the same core as the per-cpu cache component. + */ +static int flow_cache_percpu_empty(struct flow_cache *fc, int cpu) +{ + struct flow_cache_percpu *fcp; + int i; + + fcp = per_cpu_ptr(fc->percpu, cpu); + for (i = 0; i < flow_cache_hash_size(fc); i++) + if (!hlist_empty(&fcp->hash_table[i])) + return 0; + return 1; +} + static void flow_cache_flush_per_cpu(void *data) { struct flow_flush_info *info = data; - int cpu; struct tasklet_struct *tasklet; - cpu = smp_processor_id(); - tasklet = &per_cpu_ptr(info->cache->percpu, cpu)->flush_tasklet; + tasklet = &this_cpu_ptr(info->cache->percpu)->flush_tasklet; tasklet->data = (unsigned long)info; tasklet_schedule(tasklet); } -void flow_cache_flush(void) +void flow_cache_flush(struct net *net) { struct flow_flush_info info; - static DEFINE_MUTEX(flow_flush_sem); + cpumask_var_t mask; + int i, self; + + /* Track which cpus need flushing to avoid disturbing all cores. */ + if (!alloc_cpumask_var(&mask, GFP_KERNEL)) + return; + cpumask_clear(mask); /* Don't want cpus going down or up during this. */ get_online_cpus(); - mutex_lock(&flow_flush_sem); - info.cache = &flow_cache_global; - atomic_set(&info.cpuleft, num_online_cpus()); + mutex_lock(&net->xfrm.flow_flush_sem); + info.cache = &net->xfrm.flow_cache_global; + for_each_online_cpu(i) + if (!flow_cache_percpu_empty(info.cache, i)) + cpumask_set_cpu(i, mask); + atomic_set(&info.cpuleft, cpumask_weight(mask)); + if (atomic_read(&info.cpuleft) == 0) + goto done; + init_completion(&info.completion); local_bh_disable(); - smp_call_function(flow_cache_flush_per_cpu, &info, 0); - flow_cache_flush_tasklet((unsigned long)&info); + self = cpumask_test_and_clear_cpu(smp_processor_id(), mask); + on_each_cpu_mask(mask, flow_cache_flush_per_cpu, &info, 0); + if (self) + flow_cache_flush_tasklet((unsigned long)&info); local_bh_enable(); wait_for_completion(&info.completion); - mutex_unlock(&flow_flush_sem); + +done: + mutex_unlock(&net->xfrm.flow_flush_sem); put_online_cpus(); + free_cpumask_var(mask); } -static int __cpuinit flow_cache_cpu_prepare(struct flow_cache *fc, int cpu) +static void flow_cache_flush_task(struct work_struct *work) +{ + struct netns_xfrm *xfrm = container_of(work, struct netns_xfrm, + flow_cache_gc_work); + struct net *net = container_of(xfrm, struct net, xfrm); + + flow_cache_flush(net); +} + +void flow_cache_flush_deferred(struct net *net) +{ + schedule_work(&net->xfrm.flow_cache_flush_work); +} + +static int flow_cache_cpu_prepare(struct flow_cache *fc, int cpu) { struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu); size_t sz = sizeof(struct hlist_head) * flow_cache_hash_size(fc); @@ -370,11 +408,12 @@ static int __cpuinit flow_cache_cpu_prepare(struct flow_cache *fc, int cpu) return 0; } -static int __cpuinit flow_cache_cpu(struct notifier_block *nfb, +static int flow_cache_cpu(struct notifier_block *nfb, unsigned long action, void *hcpu) { - struct flow_cache *fc = container_of(nfb, struct flow_cache, hotcpu_notifier); + struct flow_cache *fc = container_of(nfb, struct flow_cache, + hotcpu_notifier); int res, cpu = (unsigned long) hcpu; struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu); @@ -393,9 +432,20 @@ static int __cpuinit flow_cache_cpu(struct notifier_block *nfb, return NOTIFY_OK; } -static int __init flow_cache_init(struct flow_cache *fc) +int flow_cache_init(struct net *net) { int i; + struct flow_cache *fc = &net->xfrm.flow_cache_global; + + if (!flow_cachep) + flow_cachep = kmem_cache_create("flow_cache", + sizeof(struct flow_cache_entry), + 0, SLAB_PANIC, NULL); + spin_lock_init(&net->xfrm.flow_cache_gc_lock); + INIT_LIST_HEAD(&net->xfrm.flow_cache_gc_list); + INIT_WORK(&net->xfrm.flow_cache_gc_work, flow_cache_gc_task); + INIT_WORK(&net->xfrm.flow_cache_flush_work, flow_cache_flush_task); + mutex_init(&net->xfrm.flow_flush_sem); fc->hash_shift = 10; fc->low_watermark = 2 * flow_cache_hash_size(fc); @@ -405,14 +455,18 @@ static int __init flow_cache_init(struct flow_cache *fc) if (!fc->percpu) return -ENOMEM; + cpu_notifier_register_begin(); + for_each_online_cpu(i) { if (flow_cache_cpu_prepare(fc, i)) - return -ENOMEM; + goto err; } fc->hotcpu_notifier = (struct notifier_block){ .notifier_call = flow_cache_cpu, }; - register_hotcpu_notifier(&fc->hotcpu_notifier); + __register_hotcpu_notifier(&fc->hotcpu_notifier); + + cpu_notifier_register_done(); setup_timer(&fc->rnd_timer, flow_cache_new_hashrnd, (unsigned long) fc); @@ -420,15 +474,38 @@ static int __init flow_cache_init(struct flow_cache *fc) add_timer(&fc->rnd_timer); return 0; + +err: + for_each_possible_cpu(i) { + struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, i); + kfree(fcp->hash_table); + fcp->hash_table = NULL; + } + + cpu_notifier_register_done(); + + free_percpu(fc->percpu); + fc->percpu = NULL; + + return -ENOMEM; } +EXPORT_SYMBOL(flow_cache_init); -static int __init flow_cache_init_global(void) +void flow_cache_fini(struct net *net) { - flow_cachep = kmem_cache_create("flow_cache", - sizeof(struct flow_cache_entry), - 0, SLAB_PANIC, NULL); + int i; + struct flow_cache *fc = &net->xfrm.flow_cache_global; - return flow_cache_init(&flow_cache_global); -} + del_timer_sync(&fc->rnd_timer); + unregister_hotcpu_notifier(&fc->hotcpu_notifier); -module_init(flow_cache_init_global); + for_each_possible_cpu(i) { + struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, i); + kfree(fcp->hash_table); + fcp->hash_table = NULL; + } + + free_percpu(fc->percpu); + fc->percpu = NULL; +} +EXPORT_SYMBOL(flow_cache_fini); |
