diff options
Diffstat (limited to 'mm/slub.c')
| -rw-r--r-- | mm/slub.c | 420 |
1 files changed, 225 insertions, 195 deletions
diff --git a/mm/slub.c b/mm/slub.c index 7e3e0458bce..73004808537 100644 --- a/mm/slub.c +++ b/mm/slub.c @@ -210,21 +210,22 @@ enum track_item { TRACK_ALLOC, TRACK_FREE }; #ifdef CONFIG_SYSFS static int sysfs_slab_add(struct kmem_cache *); static int sysfs_slab_alias(struct kmem_cache *, const char *); -static void sysfs_slab_remove(struct kmem_cache *); static void memcg_propagate_slab_attrs(struct kmem_cache *s); #else static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; } static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p) { return 0; } -static inline void sysfs_slab_remove(struct kmem_cache *s) { } - static inline void memcg_propagate_slab_attrs(struct kmem_cache *s) { } #endif static inline void stat(const struct kmem_cache *s, enum stat_item si) { #ifdef CONFIG_SLUB_STATS - __this_cpu_inc(s->cpu_slab->stat[si]); + /* + * The rmw is racy on a preemptible kernel but this is acceptable, so + * avoid this_cpu_add()'s irq-disable overhead. + */ + raw_cpu_inc(s->cpu_slab->stat[si]); #endif } @@ -402,7 +403,7 @@ static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page stat(s, CMPXCHG_DOUBLE_FAIL); #ifdef SLUB_DEBUG_CMPXCHG - printk(KERN_INFO "%s %s: cmpxchg double redo ", n, s->name); + pr_info("%s %s: cmpxchg double redo ", n, s->name); #endif return 0; @@ -443,7 +444,7 @@ static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page, stat(s, CMPXCHG_DOUBLE_FAIL); #ifdef SLUB_DEBUG_CMPXCHG - printk(KERN_INFO "%s %s: cmpxchg double redo ", n, s->name); + pr_info("%s %s: cmpxchg double redo ", n, s->name); #endif return 0; @@ -545,14 +546,14 @@ static void print_track(const char *s, struct track *t) if (!t->addr) return; - printk(KERN_ERR "INFO: %s in %pS age=%lu cpu=%u pid=%d\n", - s, (void *)t->addr, jiffies - t->when, t->cpu, t->pid); + pr_err("INFO: %s in %pS age=%lu cpu=%u pid=%d\n", + s, (void *)t->addr, jiffies - t->when, t->cpu, t->pid); #ifdef CONFIG_STACKTRACE { int i; for (i = 0; i < TRACK_ADDRS_COUNT; i++) if (t->addrs[i]) - printk(KERN_ERR "\t%pS\n", (void *)t->addrs[i]); + pr_err("\t%pS\n", (void *)t->addrs[i]); else break; } @@ -570,38 +571,37 @@ static void print_tracking(struct kmem_cache *s, void *object) static void print_page_info(struct page *page) { - printk(KERN_ERR - "INFO: Slab 0x%p objects=%u used=%u fp=0x%p flags=0x%04lx\n", + pr_err("INFO: Slab 0x%p objects=%u used=%u fp=0x%p flags=0x%04lx\n", page, page->objects, page->inuse, page->freelist, page->flags); } static void slab_bug(struct kmem_cache *s, char *fmt, ...) { + struct va_format vaf; va_list args; - char buf[100]; va_start(args, fmt); - vsnprintf(buf, sizeof(buf), fmt, args); - va_end(args); - printk(KERN_ERR "========================================" - "=====================================\n"); - printk(KERN_ERR "BUG %s (%s): %s\n", s->name, print_tainted(), buf); - printk(KERN_ERR "----------------------------------------" - "-------------------------------------\n\n"); + vaf.fmt = fmt; + vaf.va = &args; + pr_err("=============================================================================\n"); + pr_err("BUG %s (%s): %pV\n", s->name, print_tainted(), &vaf); + pr_err("-----------------------------------------------------------------------------\n\n"); add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE); + va_end(args); } static void slab_fix(struct kmem_cache *s, char *fmt, ...) { + struct va_format vaf; va_list args; - char buf[100]; va_start(args, fmt); - vsnprintf(buf, sizeof(buf), fmt, args); + vaf.fmt = fmt; + vaf.va = &args; + pr_err("FIX %s: %pV\n", s->name, &vaf); va_end(args); - printk(KERN_ERR "FIX %s: %s\n", s->name, buf); } static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p) @@ -613,8 +613,8 @@ static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p) print_page_info(page); - printk(KERN_ERR "INFO: Object 0x%p @offset=%tu fp=0x%p\n\n", - p, p - addr, get_freepointer(s, p)); + pr_err("INFO: Object 0x%p @offset=%tu fp=0x%p\n\n", + p, p - addr, get_freepointer(s, p)); if (p > addr + 16) print_section("Bytes b4 ", p - 16, 16); @@ -697,7 +697,7 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page, end--; slab_bug(s, "%s overwritten", what); - printk(KERN_ERR "INFO: 0x%p-0x%p. First byte 0x%x instead of 0x%x\n", + pr_err("INFO: 0x%p-0x%p. First byte 0x%x instead of 0x%x\n", fault, end - 1, fault[0], value); print_trailer(s, page, object); @@ -930,7 +930,7 @@ static void trace(struct kmem_cache *s, struct page *page, void *object, int alloc) { if (s->flags & SLAB_TRACE) { - printk(KERN_INFO "TRACE %s %s 0x%p inuse=%d fp=0x%p\n", + pr_info("TRACE %s %s 0x%p inuse=%d fp=0x%p\n", s->name, alloc ? "alloc" : "free", object, page->inuse, @@ -1004,21 +1004,19 @@ static inline void slab_free_hook(struct kmem_cache *s, void *x) static void add_full(struct kmem_cache *s, struct kmem_cache_node *n, struct page *page) { - lockdep_assert_held(&n->list_lock); - if (!(s->flags & SLAB_STORE_USER)) return; + lockdep_assert_held(&n->list_lock); list_add(&page->lru, &n->full); } static void remove_full(struct kmem_cache *s, struct kmem_cache_node *n, struct page *page) { - lockdep_assert_held(&n->list_lock); - if (!(s->flags & SLAB_STORE_USER)) return; + lockdep_assert_held(&n->list_lock); list_del(&page->lru); } @@ -1135,9 +1133,8 @@ static noinline struct kmem_cache_node *free_debug_processing( slab_err(s, page, "Attempt to free object(0x%p) " "outside of slab", object); } else if (!page->slab_cache) { - printk(KERN_ERR - "SLUB <none>: no slab for object 0x%p.\n", - object); + pr_err("SLUB <none>: no slab for object 0x%p.\n", + object); dump_stack(); } else object_err(s, page, object, @@ -1220,8 +1217,8 @@ static int __init setup_slub_debug(char *str) slub_debug |= SLAB_FAILSLAB; break; default: - printk(KERN_ERR "slub_debug option '%c' " - "unknown. skipped\n", *str); + pr_err("slub_debug option '%c' unknown. skipped\n", + *str); } } @@ -1315,17 +1312,26 @@ static inline void slab_free_hook(struct kmem_cache *s, void *x) /* * Slab allocation and freeing */ -static inline struct page *alloc_slab_page(gfp_t flags, int node, - struct kmem_cache_order_objects oo) +static inline struct page *alloc_slab_page(struct kmem_cache *s, + gfp_t flags, int node, struct kmem_cache_order_objects oo) { + struct page *page; int order = oo_order(oo); flags |= __GFP_NOTRACK; + if (memcg_charge_slab(s, flags, order)) + return NULL; + if (node == NUMA_NO_NODE) - return alloc_pages(flags, order); + page = alloc_pages(flags, order); else - return alloc_pages_exact_node(node, flags, order); + page = alloc_pages_exact_node(node, flags, order); + + if (!page) + memcg_uncharge_slab(s, order); + + return page; } static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) @@ -1347,14 +1353,15 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) */ alloc_gfp = (flags | __GFP_NOWARN | __GFP_NORETRY) & ~__GFP_NOFAIL; - page = alloc_slab_page(alloc_gfp, node, oo); + page = alloc_slab_page(s, alloc_gfp, node, oo); if (unlikely(!page)) { oo = s->min; + alloc_gfp = flags; /* * Allocation may have failed due to fragmentation. * Try a lower order alloc if possible */ - page = alloc_slab_page(flags, node, oo); + page = alloc_slab_page(s, alloc_gfp, node, oo); if (page) stat(s, ORDER_FALLBACK); @@ -1364,7 +1371,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) && !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS))) { int pages = 1 << oo_order(oo); - kmemcheck_alloc_shadow(page, oo_order(oo), flags, node); + kmemcheck_alloc_shadow(page, oo_order(oo), alloc_gfp, node); /* * Objects from caches that have a constructor don't get @@ -1415,7 +1422,6 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) order = compound_order(page); inc_slabs_node(s, page_to_nid(page), page->objects); - memcg_bind_pages(s, order); page->slab_cache = s; __SetPageSlab(page); if (page->pfmemalloc) @@ -1466,11 +1472,11 @@ static void __free_slab(struct kmem_cache *s, struct page *page) __ClearPageSlabPfmemalloc(page); __ClearPageSlab(page); - memcg_release_pages(s, order); page_mapcount_reset(page); if (current->reclaim_state) current->reclaim_state->reclaimed_slab += pages; - __free_memcg_kmem_pages(page, order); + __free_pages(page, order); + memcg_uncharge_slab(s, order); } #define need_reserve_slab_rcu \ @@ -1520,11 +1526,9 @@ static void discard_slab(struct kmem_cache *s, struct page *page) /* * Management of partially allocated slabs. */ -static inline void add_partial(struct kmem_cache_node *n, - struct page *page, int tail) +static inline void +__add_partial(struct kmem_cache_node *n, struct page *page, int tail) { - lockdep_assert_held(&n->list_lock); - n->nr_partial++; if (tail == DEACTIVATE_TO_TAIL) list_add_tail(&page->lru, &n->partial); @@ -1532,15 +1536,27 @@ static inline void add_partial(struct kmem_cache_node *n, list_add(&page->lru, &n->partial); } -static inline void remove_partial(struct kmem_cache_node *n, - struct page *page) +static inline void add_partial(struct kmem_cache_node *n, + struct page *page, int tail) { lockdep_assert_held(&n->list_lock); + __add_partial(n, page, tail); +} +static inline void +__remove_partial(struct kmem_cache_node *n, struct page *page) +{ list_del(&page->lru); n->nr_partial--; } +static inline void remove_partial(struct kmem_cache_node *n, + struct page *page) +{ + lockdep_assert_held(&n->list_lock); + __remove_partial(n, page); +} + /* * Remove slab from the partial list, freeze it and * return the pointer to the freelist. @@ -1676,8 +1692,8 @@ static void *get_any_partial(struct kmem_cache *s, gfp_t flags, return NULL; do { - cpuset_mems_cookie = get_mems_allowed(); - zonelist = node_zonelist(slab_node(), flags); + cpuset_mems_cookie = read_mems_allowed_begin(); + zonelist = node_zonelist(mempolicy_slab_node(), flags); for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) { struct kmem_cache_node *n; @@ -1688,19 +1704,17 @@ static void *get_any_partial(struct kmem_cache *s, gfp_t flags, object = get_partial_node(s, n, c, flags); if (object) { /* - * Return the object even if - * put_mems_allowed indicated that - * the cpuset mems_allowed was - * updated in parallel. It's a - * harmless race between the alloc - * and the cpuset update. + * Don't check read_mems_allowed_retry() + * here - if mems_allowed was updated in + * parallel, that was a harmless race + * between allocation and the cpuset + * update */ - put_mems_allowed(cpuset_mems_cookie); return object; } } } - } while (!put_mems_allowed(cpuset_mems_cookie)); + } while (read_mems_allowed_retry(cpuset_mems_cookie)); #endif return NULL; } @@ -1712,7 +1726,7 @@ static void *get_partial(struct kmem_cache *s, gfp_t flags, int node, struct kmem_cache_cpu *c) { void *object; - int searchnode = (node == NUMA_NO_NODE) ? numa_node_id() : node; + int searchnode = (node == NUMA_NO_NODE) ? numa_mem_id() : node; object = get_partial_node(s, get_node(s, searchnode), c, flags); if (object || node != NUMA_NO_NODE) @@ -1762,19 +1776,19 @@ static inline void note_cmpxchg_failure(const char *n, #ifdef SLUB_DEBUG_CMPXCHG unsigned long actual_tid = __this_cpu_read(s->cpu_slab->tid); - printk(KERN_INFO "%s %s: cmpxchg redo ", n, s->name); + pr_info("%s %s: cmpxchg redo ", n, s->name); #ifdef CONFIG_PREEMPT if (tid_to_cpu(tid) != tid_to_cpu(actual_tid)) - printk("due to cpu change %d -> %d\n", + pr_warn("due to cpu change %d -> %d\n", tid_to_cpu(tid), tid_to_cpu(actual_tid)); else #endif if (tid_to_event(tid) != tid_to_event(actual_tid)) - printk("due to cpu running other code. Event %ld->%ld\n", + pr_warn("due to cpu running other code. Event %ld->%ld\n", tid_to_event(tid), tid_to_event(actual_tid)); else - printk("for unknown reason: actual=%lx was=%lx target=%lx\n", + pr_warn("for unknown reason: actual=%lx was=%lx target=%lx\n", actual_tid, tid, next_tid(tid)); #endif stat(s, CMPXCHG_DOUBLE_CPU_FAIL); @@ -1867,7 +1881,7 @@ redo: new.frozen = 0; - if (!new.inuse && n->nr_partial > s->min_partial) + if (!new.inuse && n->nr_partial >= s->min_partial) m = M_FREE; else if (new.freelist) { m = M_PARTIAL; @@ -1978,7 +1992,7 @@ static void unfreeze_partials(struct kmem_cache *s, new.freelist, new.counters, "unfreezing slab")); - if (unlikely(!new.inuse && n->nr_partial > s->min_partial)) { + if (unlikely(!new.inuse && n->nr_partial >= s->min_partial)) { page->next = discard_page; discard_page = page; } else { @@ -2113,11 +2127,19 @@ static inline int node_match(struct page *page, int node) return 1; } +#ifdef CONFIG_SLUB_DEBUG static int count_free(struct page *page) { return page->objects - page->inuse; } +static inline unsigned long node_nr_objs(struct kmem_cache_node *n) +{ + return atomic_long_read(&n->total_objects); +} +#endif /* CONFIG_SLUB_DEBUG */ + +#if defined(CONFIG_SLUB_DEBUG) || defined(CONFIG_SYSFS) static unsigned long count_partial(struct kmem_cache_node *n, int (*get_count)(struct page *)) { @@ -2131,31 +2153,28 @@ static unsigned long count_partial(struct kmem_cache_node *n, spin_unlock_irqrestore(&n->list_lock, flags); return x; } - -static inline unsigned long node_nr_objs(struct kmem_cache_node *n) -{ -#ifdef CONFIG_SLUB_DEBUG - return atomic_long_read(&n->total_objects); -#else - return 0; -#endif -} +#endif /* CONFIG_SLUB_DEBUG || CONFIG_SYSFS */ static noinline void slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid) { +#ifdef CONFIG_SLUB_DEBUG + static DEFINE_RATELIMIT_STATE(slub_oom_rs, DEFAULT_RATELIMIT_INTERVAL, + DEFAULT_RATELIMIT_BURST); int node; - printk(KERN_WARNING - "SLUB: Unable to allocate memory on node %d (gfp=0x%x)\n", + if ((gfpflags & __GFP_NOWARN) || !__ratelimit(&slub_oom_rs)) + return; + + pr_warn("SLUB: Unable to allocate memory on node %d (gfp=0x%x)\n", nid, gfpflags); - printk(KERN_WARNING " cache: %s, object size: %d, buffer size: %d, " - "default order: %d, min order: %d\n", s->name, s->object_size, - s->size, oo_order(s->oo), oo_order(s->min)); + pr_warn(" cache: %s, object size: %d, buffer size: %d, default order: %d, min order: %d\n", + s->name, s->object_size, s->size, oo_order(s->oo), + oo_order(s->min)); if (oo_order(s->min) > get_order(s->object_size)) - printk(KERN_WARNING " %s debugging increased min order, use " - "slub_debug=O to disable.\n", s->name); + pr_warn(" %s debugging increased min order, use slub_debug=O to disable.\n", + s->name); for_each_online_node(node) { struct kmem_cache_node *n = get_node(s, node); @@ -2170,10 +2189,10 @@ slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid) nr_slabs = node_nr_slabs(n); nr_objs = node_nr_objs(n); - printk(KERN_WARNING - " node %d: slabs: %ld, objs: %ld, free: %ld\n", + pr_warn(" node %d: slabs: %ld, objs: %ld, free: %ld\n", node, nr_slabs, nr_objs, nr_free); } +#endif } static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags, @@ -2190,7 +2209,7 @@ static inline void *new_slab_objects(struct kmem_cache *s, gfp_t flags, page = new_slab(s, flags, node); if (page) { - c = __this_cpu_ptr(s->cpu_slab); + c = raw_cpu_ptr(s->cpu_slab); if (c->page) flush_slab(s, c); @@ -2315,8 +2334,6 @@ redo: if (freelist) goto load_freelist; - stat(s, ALLOC_SLOWPATH); - freelist = get_freelist(s, page); if (!freelist) { @@ -2352,9 +2369,7 @@ new_slab: freelist = new_slab_objects(s, gfpflags, node, &c); if (unlikely(!freelist)) { - if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit()) - slab_out_of_memory(s, gfpflags, node); - + slab_out_of_memory(s, gfpflags, node); local_irq_restore(flags); return NULL; } @@ -2410,7 +2425,7 @@ redo: * and the retrieval of the tid. */ preempt_disable(); - c = __this_cpu_ptr(s->cpu_slab); + c = this_cpu_ptr(s->cpu_slab); /* * The transaction ids are globally unique per cpu and per operation on @@ -2423,10 +2438,10 @@ redo: object = c->freelist; page = c->page; - if (unlikely(!object || !node_match(page, node))) + if (unlikely(!object || !node_match(page, node))) { object = __slab_alloc(s, gfpflags, node, addr, c); - - else { + stat(s, ALLOC_SLOWPATH); + } else { void *next_object = get_freepointer_safe(s, object); /* @@ -2605,7 +2620,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page, return; } - if (unlikely(!new.inuse && n->nr_partial > s->min_partial)) + if (unlikely(!new.inuse && n->nr_partial >= s->min_partial)) goto slab_empty; /* @@ -2666,7 +2681,7 @@ redo: * during the cmpxchg then the free will succedd. */ preempt_disable(); - c = __this_cpu_ptr(s->cpu_slab); + c = this_cpu_ptr(s->cpu_slab); tid = c->tid; preempt_enable(); @@ -2886,10 +2901,8 @@ static void early_kmem_cache_node_alloc(int node) BUG_ON(!page); if (page_to_nid(page) != node) { - printk(KERN_ERR "SLUB: Unable to allocate memory from " - "node %d\n", node); - printk(KERN_ERR "SLUB: Allocating a useless per node structure " - "in order to be able to continue\n"); + pr_err("SLUB: Unable to allocate memory from node %d\n", node); + pr_err("SLUB: Allocating a useless per node structure in order to be able to continue\n"); } n = page->freelist; @@ -2906,12 +2919,10 @@ static void early_kmem_cache_node_alloc(int node) inc_slabs_node(kmem_cache_node, node, page->objects); /* - * the lock is for lockdep's sake, not for any actual - * race protection + * No locks need to be taken here as it has just been + * initialized and there is no concurrent access. */ - spin_lock(&n->list_lock); - add_partial(n, page, DEACTIVATE_TO_HEAD); - spin_unlock(&n->list_lock); + __add_partial(n, page, DEACTIVATE_TO_HEAD); } static void free_kmem_cache_nodes(struct kmem_cache *s) @@ -3176,8 +3187,7 @@ static void list_slab_objects(struct kmem_cache *s, struct page *page, for_each_object(p, s, addr, page->objects) { if (!test_bit(slab_index(p, s, addr), map)) { - printk(KERN_ERR "INFO: Object 0x%p @offset=%tu\n", - p, p - addr); + pr_err("INFO: Object 0x%p @offset=%tu\n", p, p - addr); print_tracking(s, p); } } @@ -3197,7 +3207,7 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n) list_for_each_entry_safe(page, h, &n->partial, lru) { if (!page->inuse) { - remove_partial(n, page); + __remove_partial(n, page); discard_slab(s, page); } else { list_slab_objects(s, page, @@ -3229,23 +3239,7 @@ static inline int kmem_cache_close(struct kmem_cache *s) int __kmem_cache_shutdown(struct kmem_cache *s) { - int rc = kmem_cache_close(s); - - if (!rc) { - /* - * We do the same lock strategy around sysfs_slab_add, see - * __kmem_cache_create. Because this is pretty much the last - * operation we do and the lock will be released shortly after - * that in slab_common.c, we could just move sysfs_slab_remove - * to a later point in common code. We should do that when we - * have a common sysfs framework for all allocators. - */ - mutex_unlock(&slab_mutex); - sysfs_slab_remove(s); - mutex_lock(&slab_mutex); - } - - return rc; + return kmem_cache_close(s); } /******************************************************************** @@ -3315,8 +3309,8 @@ static void *kmalloc_large_node(size_t size, gfp_t flags, int node) struct page *page; void *ptr = NULL; - flags |= __GFP_COMP | __GFP_NOTRACK | __GFP_KMEMCG; - page = alloc_pages_node(node, flags, get_order(size)); + flags |= __GFP_COMP | __GFP_NOTRACK; + page = alloc_kmem_pages_node(node, flags, get_order(size)); if (page) ptr = page_address(page); @@ -3385,7 +3379,7 @@ void kfree(const void *x) if (unlikely(!PageSlab(page))) { BUG_ON(!PageCompound(page)); kfree_hook(x); - __free_memcg_kmem_pages(page, compound_order(page)); + __free_kmem_pages(page, compound_order(page)); return; } slab_free(page->slab_cache, page, object, _RET_IP_); @@ -3402,7 +3396,7 @@ EXPORT_SYMBOL(kfree); * being allocated from last increasing the chance that the last objects * are freed in them. */ -int kmem_cache_shrink(struct kmem_cache *s) +int __kmem_cache_shrink(struct kmem_cache *s) { int node; int i; @@ -3458,7 +3452,6 @@ int kmem_cache_shrink(struct kmem_cache *s) kfree(slabs_by_inuse); return 0; } -EXPORT_SYMBOL(kmem_cache_shrink); static int slab_mem_going_offline_callback(void *arg) { @@ -3466,7 +3459,7 @@ static int slab_mem_going_offline_callback(void *arg) mutex_lock(&slab_mutex); list_for_each_entry(s, &slab_caches, list) - kmem_cache_shrink(s); + __kmem_cache_shrink(s); mutex_unlock(&slab_mutex); return 0; @@ -3660,9 +3653,7 @@ void __init kmem_cache_init(void) register_cpu_notifier(&slab_notifier); #endif - printk(KERN_INFO - "SLUB: HWalign=%d, Order=%d-%d, MinObjects=%d," - " CPUs=%d, Nodes=%d\n", + pr_info("SLUB: HWalign=%d, Order=%d-%d, MinObjects=%d, CPUs=%d, Nodes=%d\n", cache_line_size(), slub_min_order, slub_max_order, slub_min_objects, nr_cpu_ids, nr_node_ids); @@ -3680,6 +3671,9 @@ static int slab_unmergeable(struct kmem_cache *s) if (slub_nomerge || (s->flags & SLUB_NEVER_MERGE)) return 1; + if (!is_root_cache(s)) + return 1; + if (s->ctor) return 1; @@ -3692,9 +3686,8 @@ static int slab_unmergeable(struct kmem_cache *s) return 0; } -static struct kmem_cache *find_mergeable(struct mem_cgroup *memcg, size_t size, - size_t align, unsigned long flags, const char *name, - void (*ctor)(void *)) +static struct kmem_cache *find_mergeable(size_t size, size_t align, + unsigned long flags, const char *name, void (*ctor)(void *)) { struct kmem_cache *s; @@ -3717,7 +3710,7 @@ static struct kmem_cache *find_mergeable(struct mem_cgroup *memcg, size_t size, continue; if ((flags & SLUB_MERGE_SAME) != (s->flags & SLUB_MERGE_SAME)) - continue; + continue; /* * Check if alignment is compatible. * Courtesy of Adrian Drzewiecki @@ -3728,23 +3721,24 @@ static struct kmem_cache *find_mergeable(struct mem_cgroup *memcg, size_t size, if (s->size - size >= sizeof(void *)) continue; - if (!cache_match_memcg(s, memcg)) - continue; - return s; } return NULL; } struct kmem_cache * -__kmem_cache_alias(struct mem_cgroup *memcg, const char *name, size_t size, - size_t align, unsigned long flags, void (*ctor)(void *)) +__kmem_cache_alias(const char *name, size_t size, size_t align, + unsigned long flags, void (*ctor)(void *)) { struct kmem_cache *s; - s = find_mergeable(memcg, size, align, flags, name, ctor); + s = find_mergeable(size, align, flags, name, ctor); if (s) { + int i; + struct kmem_cache *c; + s->refcount++; + /* * Adjust the object sizes so that we clear * the complete object on kzalloc. @@ -3752,6 +3746,15 @@ __kmem_cache_alias(struct mem_cgroup *memcg, const char *name, size_t size, s->object_size = max(s->object_size, (int)size); s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *))); + for_each_memcg_cache_index(i) { + c = cache_from_memcg_idx(s, i); + if (!c) + continue; + c->object_size = s->object_size; + c->inuse = max_t(int, c->inuse, + ALIGN(size, sizeof(void *))); + } + if (sysfs_slab_alias(s, name)) { s->refcount--; s = NULL; @@ -3774,10 +3777,7 @@ int __kmem_cache_create(struct kmem_cache *s, unsigned long flags) return 0; memcg_propagate_slab_attrs(s); - mutex_unlock(&slab_mutex); err = sysfs_slab_add(s); - mutex_lock(&slab_mutex); - if (err) kmem_cache_close(s); @@ -3935,8 +3935,8 @@ static int validate_slab_node(struct kmem_cache *s, count++; } if (count != n->nr_partial) - printk(KERN_ERR "SLUB %s: %ld partial slabs counted but " - "counter=%ld\n", s->name, count, n->nr_partial); + pr_err("SLUB %s: %ld partial slabs counted but counter=%ld\n", + s->name, count, n->nr_partial); if (!(s->flags & SLAB_STORE_USER)) goto out; @@ -3946,9 +3946,8 @@ static int validate_slab_node(struct kmem_cache *s, count++; } if (count != atomic_long_read(&n->nr_slabs)) - printk(KERN_ERR "SLUB: %s %ld slabs counted but " - "counter=%ld\n", s->name, count, - atomic_long_read(&n->nr_slabs)); + pr_err("SLUB: %s %ld slabs counted but counter=%ld\n", + s->name, count, atomic_long_read(&n->nr_slabs)); out: spin_unlock_irqrestore(&n->list_lock, flags); @@ -4212,53 +4211,50 @@ static void resiliency_test(void) BUILD_BUG_ON(KMALLOC_MIN_SIZE > 16 || KMALLOC_SHIFT_HIGH < 10); - printk(KERN_ERR "SLUB resiliency testing\n"); - printk(KERN_ERR "-----------------------\n"); - printk(KERN_ERR "A. Corruption after allocation\n"); + pr_err("SLUB resiliency testing\n"); + pr_err("-----------------------\n"); + pr_err("A. Corruption after allocation\n"); p = kzalloc(16, GFP_KERNEL); p[16] = 0x12; - printk(KERN_ERR "\n1. kmalloc-16: Clobber Redzone/next pointer" - " 0x12->0x%p\n\n", p + 16); + pr_err("\n1. kmalloc-16: Clobber Redzone/next pointer 0x12->0x%p\n\n", + p + 16); validate_slab_cache(kmalloc_caches[4]); /* Hmmm... The next two are dangerous */ p = kzalloc(32, GFP_KERNEL); p[32 + sizeof(void *)] = 0x34; - printk(KERN_ERR "\n2. kmalloc-32: Clobber next pointer/next slab" - " 0x34 -> -0x%p\n", p); - printk(KERN_ERR - "If allocated object is overwritten then not detectable\n\n"); + pr_err("\n2. kmalloc-32: Clobber next pointer/next slab 0x34 -> -0x%p\n", + p); + pr_err("If allocated object is overwritten then not detectable\n\n"); validate_slab_cache(kmalloc_caches[5]); p = kzalloc(64, GFP_KERNEL); p += 64 + (get_cycles() & 0xff) * sizeof(void *); *p = 0x56; - printk(KERN_ERR "\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n", - p); - printk(KERN_ERR - "If allocated object is overwritten then not detectable\n\n"); + pr_err("\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n", + p); + pr_err("If allocated object is overwritten then not detectable\n\n"); validate_slab_cache(kmalloc_caches[6]); - printk(KERN_ERR "\nB. Corruption after free\n"); + pr_err("\nB. Corruption after free\n"); p = kzalloc(128, GFP_KERNEL); kfree(p); *p = 0x78; - printk(KERN_ERR "1. kmalloc-128: Clobber first word 0x78->0x%p\n\n", p); + pr_err("1. kmalloc-128: Clobber first word 0x78->0x%p\n\n", p); validate_slab_cache(kmalloc_caches[7]); p = kzalloc(256, GFP_KERNEL); kfree(p); p[50] = 0x9a; - printk(KERN_ERR "\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n", - p); + pr_err("\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n", p); validate_slab_cache(kmalloc_caches[8]); p = kzalloc(512, GFP_KERNEL); kfree(p); p[512] = 0xab; - printk(KERN_ERR "\n3. kmalloc-512: Clobber redzone 0xab->0x%p\n\n", p); + pr_err("\n3. kmalloc-512: Clobber redzone 0xab->0x%p\n\n", p); validate_slab_cache(kmalloc_caches[9]); } #else @@ -4333,7 +4329,7 @@ static ssize_t show_slab_objects(struct kmem_cache *s, } } - lock_memory_hotplug(); + get_online_mems(); #ifdef CONFIG_SLUB_DEBUG if (flags & SO_ALL) { for_each_node_state(node, N_NORMAL_MEMORY) { @@ -4373,7 +4369,7 @@ static ssize_t show_slab_objects(struct kmem_cache *s, x += sprintf(buf + x, " N%d=%lu", node, nodes[node]); #endif - unlock_memory_hotplug(); + put_online_mems(); kfree(nodes); return x + sprintf(buf + x, "\n"); } @@ -5052,15 +5048,18 @@ static void memcg_propagate_slab_attrs(struct kmem_cache *s) #ifdef CONFIG_MEMCG_KMEM int i; char *buffer = NULL; + struct kmem_cache *root_cache; - if (!is_root_cache(s)) + if (is_root_cache(s)) return; + root_cache = s->memcg_params->root_cache; + /* * This mean this cache had no attribute written. Therefore, no point * in copying default values around */ - if (!s->max_attr_size) + if (!root_cache->max_attr_size) return; for (i = 0; i < ARRAY_SIZE(slab_attrs); i++) { @@ -5082,7 +5081,7 @@ static void memcg_propagate_slab_attrs(struct kmem_cache *s) */ if (buffer) buf = buffer; - else if (s->max_attr_size < ARRAY_SIZE(mbuf)) + else if (root_cache->max_attr_size < ARRAY_SIZE(mbuf)) buf = mbuf; else { buffer = (char *) get_zeroed_page(GFP_KERNEL); @@ -5091,7 +5090,7 @@ static void memcg_propagate_slab_attrs(struct kmem_cache *s) buf = buffer; } - attr->show(s->memcg_params->root_cache, buf); + attr->show(root_cache, buf); attr->store(s, buf, strlen(buf)); } @@ -5100,6 +5099,11 @@ static void memcg_propagate_slab_attrs(struct kmem_cache *s) #endif } +static void kmem_cache_release(struct kobject *k) +{ + slab_kmem_cache_release(to_slab(k)); +} + static const struct sysfs_ops slab_sysfs_ops = { .show = slab_attr_show, .store = slab_attr_store, @@ -5107,6 +5111,7 @@ static const struct sysfs_ops slab_sysfs_ops = { static struct kobj_type slab_ktype = { .sysfs_ops = &slab_sysfs_ops, + .release = kmem_cache_release, }; static int uevent_filter(struct kset *kset, struct kobject *kobj) @@ -5124,6 +5129,15 @@ static const struct kset_uevent_ops slab_uevent_ops = { static struct kset *slab_kset; +static inline struct kset *cache_kset(struct kmem_cache *s) +{ +#ifdef CONFIG_MEMCG_KMEM + if (!is_root_cache(s)) + return s->memcg_params->root_cache->memcg_kset; +#endif + return slab_kset; +} + #define ID_STR_LENGTH 64 /* Create a unique string id for a slab cache: @@ -5189,29 +5203,42 @@ static int sysfs_slab_add(struct kmem_cache *s) name = create_unique_id(s); } - s->kobj.kset = slab_kset; + s->kobj.kset = cache_kset(s); err = kobject_init_and_add(&s->kobj, &slab_ktype, NULL, "%s", name); - if (err) { - kobject_put(&s->kobj); - return err; - } + if (err) + goto out_put_kobj; err = sysfs_create_group(&s->kobj, &slab_attr_group); - if (err) { - kobject_del(&s->kobj); - kobject_put(&s->kobj); - return err; + if (err) + goto out_del_kobj; + +#ifdef CONFIG_MEMCG_KMEM + if (is_root_cache(s)) { + s->memcg_kset = kset_create_and_add("cgroup", NULL, &s->kobj); + if (!s->memcg_kset) { + err = -ENOMEM; + goto out_del_kobj; + } } +#endif + kobject_uevent(&s->kobj, KOBJ_ADD); if (!unmergeable) { /* Setup first alias */ sysfs_slab_alias(s, s->name); - kfree(name); } - return 0; +out: + if (!unmergeable) + kfree(name); + return err; +out_del_kobj: + kobject_del(&s->kobj); +out_put_kobj: + kobject_put(&s->kobj); + goto out; } -static void sysfs_slab_remove(struct kmem_cache *s) +void sysfs_slab_remove(struct kmem_cache *s) { if (slab_state < FULL) /* @@ -5220,6 +5247,9 @@ static void sysfs_slab_remove(struct kmem_cache *s) */ return; +#ifdef CONFIG_MEMCG_KMEM + kset_unregister(s->memcg_kset); +#endif kobject_uevent(&s->kobj, KOBJ_REMOVE); kobject_del(&s->kobj); kobject_put(&s->kobj); @@ -5270,7 +5300,7 @@ static int __init slab_sysfs_init(void) slab_kset = kset_create_and_add("slab", &slab_uevent_ops, kernel_kobj); if (!slab_kset) { mutex_unlock(&slab_mutex); - printk(KERN_ERR "Cannot register slab subsystem.\n"); + pr_err("Cannot register slab subsystem.\n"); return -ENOSYS; } @@ -5279,8 +5309,8 @@ static int __init slab_sysfs_init(void) list_for_each_entry(s, &slab_caches, list) { err = sysfs_slab_add(s); if (err) - printk(KERN_ERR "SLUB: Unable to add boot slab %s" - " to sysfs\n", s->name); + pr_err("SLUB: Unable to add boot slab %s to sysfs\n", + s->name); } while (alias_list) { @@ -5289,8 +5319,8 @@ static int __init slab_sysfs_init(void) alias_list = alias_list->next; err = sysfs_slab_alias(al->s, al->name); if (err) - printk(KERN_ERR "SLUB: Unable to add boot slab alias" - " %s to sysfs\n", al->name); + pr_err("SLUB: Unable to add boot slab alias %s to sysfs\n", + al->name); kfree(al); } |
