diff options
Diffstat (limited to 'mm/slab_common.c')
| -rw-r--r-- | mm/slab_common.c | 499 |
1 files changed, 411 insertions, 88 deletions
diff --git a/mm/slab_common.c b/mm/slab_common.c index 3f3cd97d3fd..d31c4bacc6a 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -19,6 +19,7 @@ #include <asm/tlbflush.h> #include <asm/page.h> #include <linux/memcontrol.h> +#include <trace/events/kmem.h> #include "slab.h" @@ -28,8 +29,7 @@ DEFINE_MUTEX(slab_mutex); struct kmem_cache *kmem_cache; #ifdef CONFIG_DEBUG_VM -static int kmem_cache_sanity_check(struct mem_cgroup *memcg, const char *name, - size_t size) +static int kmem_cache_sanity_check(const char *name, size_t size) { struct kmem_cache *s = NULL; @@ -55,27 +55,22 @@ static int kmem_cache_sanity_check(struct mem_cgroup *memcg, const char *name, continue; } - /* - * For simplicity, we won't check this in the list of memcg - * caches. We have control over memcg naming, and if there - * aren't duplicates in the global list, there won't be any - * duplicates in the memcg lists as well. - */ - if (!memcg && !strcmp(s->name, name)) { +#if !defined(CONFIG_SLUB) + if (!strcmp(s->name, name)) { pr_err("%s (%s): Cache name already exists.\n", __func__, name); dump_stack(); s = NULL; return -EINVAL; } +#endif } WARN_ON(strchr(name, ' ')); /* It confuses parsers */ return 0; } #else -static inline int kmem_cache_sanity_check(struct mem_cgroup *memcg, - const char *name, size_t size) +static inline int kmem_cache_sanity_check(const char *name, size_t size) { return 0; } @@ -136,6 +131,45 @@ unsigned long calculate_alignment(unsigned long flags, return ALIGN(align, sizeof(void *)); } +static struct kmem_cache * +do_kmem_cache_create(char *name, size_t object_size, size_t size, size_t align, + unsigned long flags, void (*ctor)(void *), + struct mem_cgroup *memcg, struct kmem_cache *root_cache) +{ + struct kmem_cache *s; + int err; + + err = -ENOMEM; + s = kmem_cache_zalloc(kmem_cache, GFP_KERNEL); + if (!s) + goto out; + + s->name = name; + s->object_size = object_size; + s->size = size; + s->align = align; + s->ctor = ctor; + + err = memcg_alloc_cache_params(memcg, s, root_cache); + if (err) + goto out_free_cache; + + err = __kmem_cache_create(s, flags); + if (err) + goto out_free_cache; + + s->refcount = 1; + list_add(&s->list, &slab_caches); +out: + if (err) + return ERR_PTR(err); + return s; + +out_free_cache: + memcg_free_cache_params(s); + kfree(s); + goto out; +} /* * kmem_cache_create - Create a cache. @@ -161,20 +195,22 @@ unsigned long calculate_alignment(unsigned long flags, * cacheline. This can be beneficial if you're counting cycles as closely * as davem. */ - struct kmem_cache * -kmem_cache_create_memcg(struct mem_cgroup *memcg, const char *name, size_t size, - size_t align, unsigned long flags, void (*ctor)(void *), - struct kmem_cache *parent_cache) +kmem_cache_create(const char *name, size_t size, size_t align, + unsigned long flags, void (*ctor)(void *)) { - struct kmem_cache *s = NULL; - int err = 0; + struct kmem_cache *s; + char *cache_name; + int err; get_online_cpus(); + get_online_mems(); + mutex_lock(&slab_mutex); - if (!kmem_cache_sanity_check(memcg, name, size) == 0) - goto out_locked; + err = kmem_cache_sanity_check(name, size); + if (err) + goto out_unlock; /* * Some allocators will constraint the set of valid flags to a subset @@ -184,47 +220,31 @@ kmem_cache_create_memcg(struct mem_cgroup *memcg, const char *name, size_t size, */ flags &= CACHE_CREATE_MASK; - s = __kmem_cache_alias(memcg, name, size, align, flags, ctor); + s = __kmem_cache_alias(name, size, align, flags, ctor); if (s) - goto out_locked; - - s = kmem_cache_zalloc(kmem_cache, GFP_KERNEL); - if (s) { - s->object_size = s->size = size; - s->align = calculate_alignment(flags, align, size); - s->ctor = ctor; - - if (memcg_register_cache(memcg, s, parent_cache)) { - kmem_cache_free(kmem_cache, s); - err = -ENOMEM; - goto out_locked; - } + goto out_unlock; - s->name = kstrdup(name, GFP_KERNEL); - if (!s->name) { - kmem_cache_free(kmem_cache, s); - err = -ENOMEM; - goto out_locked; - } - - err = __kmem_cache_create(s, flags); - if (!err) { - s->refcount = 1; - list_add(&s->list, &slab_caches); - memcg_cache_list_add(memcg, s); - } else { - kfree(s->name); - kmem_cache_free(kmem_cache, s); - } - } else + cache_name = kstrdup(name, GFP_KERNEL); + if (!cache_name) { err = -ENOMEM; + goto out_unlock; + } + + s = do_kmem_cache_create(cache_name, size, size, + calculate_alignment(flags, align, size), + flags, ctor, NULL, NULL); + if (IS_ERR(s)) { + err = PTR_ERR(s); + kfree(cache_name); + } -out_locked: +out_unlock: mutex_unlock(&slab_mutex); + + put_online_mems(); put_online_cpus(); if (err) { - if (flags & SLAB_PANIC) panic("kmem_cache_create: Failed to create slab '%s'. Error %d\n", name, err); @@ -233,54 +253,148 @@ out_locked: name, err); dump_stack(); } - return NULL; } + return s; +} +EXPORT_SYMBOL(kmem_cache_create); + +#ifdef CONFIG_MEMCG_KMEM +/* + * memcg_create_kmem_cache - Create a cache for a memory cgroup. + * @memcg: The memory cgroup the new cache is for. + * @root_cache: The parent of the new cache. + * @memcg_name: The name of the memory cgroup (used for naming the new cache). + * + * This function attempts to create a kmem cache that will serve allocation + * requests going from @memcg to @root_cache. The new cache inherits properties + * from its parent. + */ +struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg, + struct kmem_cache *root_cache, + const char *memcg_name) +{ + struct kmem_cache *s = NULL; + char *cache_name; + + get_online_cpus(); + get_online_mems(); + + mutex_lock(&slab_mutex); + + cache_name = kasprintf(GFP_KERNEL, "%s(%d:%s)", root_cache->name, + memcg_cache_id(memcg), memcg_name); + if (!cache_name) + goto out_unlock; + + s = do_kmem_cache_create(cache_name, root_cache->object_size, + root_cache->size, root_cache->align, + root_cache->flags, root_cache->ctor, + memcg, root_cache); + if (IS_ERR(s)) { + kfree(cache_name); + s = NULL; + } + +out_unlock: + mutex_unlock(&slab_mutex); + + put_online_mems(); + put_online_cpus(); return s; } -struct kmem_cache * -kmem_cache_create(const char *name, size_t size, size_t align, - unsigned long flags, void (*ctor)(void *)) +static int memcg_cleanup_cache_params(struct kmem_cache *s) +{ + int rc; + + if (!s->memcg_params || + !s->memcg_params->is_root_cache) + return 0; + + mutex_unlock(&slab_mutex); + rc = __memcg_cleanup_cache_params(s); + mutex_lock(&slab_mutex); + + return rc; +} +#else +static int memcg_cleanup_cache_params(struct kmem_cache *s) { - return kmem_cache_create_memcg(NULL, name, size, align, flags, ctor, NULL); + return 0; } -EXPORT_SYMBOL(kmem_cache_create); +#endif /* CONFIG_MEMCG_KMEM */ -void kmem_cache_destroy(struct kmem_cache *s) +void slab_kmem_cache_release(struct kmem_cache *s) { - /* Destroy all the children caches if we aren't a memcg cache */ - kmem_cache_destroy_memcg_children(s); + kfree(s->name); + kmem_cache_free(kmem_cache, s); +} +void kmem_cache_destroy(struct kmem_cache *s) +{ get_online_cpus(); + get_online_mems(); + mutex_lock(&slab_mutex); + s->refcount--; - if (!s->refcount) { - list_del(&s->list); - - if (!__kmem_cache_shutdown(s)) { - mutex_unlock(&slab_mutex); - if (s->flags & SLAB_DESTROY_BY_RCU) - rcu_barrier(); - - memcg_release_cache(s); - kfree(s->name); - kmem_cache_free(kmem_cache, s); - } else { - list_add(&s->list, &slab_caches); - mutex_unlock(&slab_mutex); - printk(KERN_ERR "kmem_cache_destroy %s: Slab cache still has objects\n", - s->name); - dump_stack(); - } - } else { - mutex_unlock(&slab_mutex); + if (s->refcount) + goto out_unlock; + + if (memcg_cleanup_cache_params(s) != 0) + goto out_unlock; + + if (__kmem_cache_shutdown(s) != 0) { + printk(KERN_ERR "kmem_cache_destroy %s: " + "Slab cache still has objects\n", s->name); + dump_stack(); + goto out_unlock; } + + list_del(&s->list); + + mutex_unlock(&slab_mutex); + if (s->flags & SLAB_DESTROY_BY_RCU) + rcu_barrier(); + + memcg_free_cache_params(s); +#ifdef SLAB_SUPPORTS_SYSFS + sysfs_slab_remove(s); +#else + slab_kmem_cache_release(s); +#endif + goto out; + +out_unlock: + mutex_unlock(&slab_mutex); +out: + put_online_mems(); put_online_cpus(); } EXPORT_SYMBOL(kmem_cache_destroy); +/** + * kmem_cache_shrink - Shrink a cache. + * @cachep: The cache to shrink. + * + * Releases as many slabs as possible for a cache. + * To help debugging, a zero exit status indicates all slabs were released. + */ +int kmem_cache_shrink(struct kmem_cache *cachep) +{ + int ret; + + get_online_cpus(); + get_online_mems(); + ret = __kmem_cache_shrink(cachep); + put_online_mems(); + put_online_cpus(); + return ret; +} +EXPORT_SYMBOL(kmem_cache_shrink); + int slab_is_available(void) { return slab_state >= UP; @@ -299,7 +413,7 @@ void __init create_boot_cache(struct kmem_cache *s, const char *name, size_t siz err = __kmem_cache_create(s, flags); if (err) - panic("Creation of kmalloc slab %s size=%zd failed. Reason %d\n", + panic("Creation of kmalloc slab %s size=%zu failed. Reason %d\n", name, size, err); s->refcount = -1; /* Exempt from merging for now */ @@ -319,10 +433,218 @@ struct kmem_cache *__init create_kmalloc_cache(const char *name, size_t size, return s; } +struct kmem_cache *kmalloc_caches[KMALLOC_SHIFT_HIGH + 1]; +EXPORT_SYMBOL(kmalloc_caches); + +#ifdef CONFIG_ZONE_DMA +struct kmem_cache *kmalloc_dma_caches[KMALLOC_SHIFT_HIGH + 1]; +EXPORT_SYMBOL(kmalloc_dma_caches); +#endif + +/* + * Conversion table for small slabs sizes / 8 to the index in the + * kmalloc array. This is necessary for slabs < 192 since we have non power + * of two cache sizes there. The size of larger slabs can be determined using + * fls. + */ +static s8 size_index[24] = { + 3, /* 8 */ + 4, /* 16 */ + 5, /* 24 */ + 5, /* 32 */ + 6, /* 40 */ + 6, /* 48 */ + 6, /* 56 */ + 6, /* 64 */ + 1, /* 72 */ + 1, /* 80 */ + 1, /* 88 */ + 1, /* 96 */ + 7, /* 104 */ + 7, /* 112 */ + 7, /* 120 */ + 7, /* 128 */ + 2, /* 136 */ + 2, /* 144 */ + 2, /* 152 */ + 2, /* 160 */ + 2, /* 168 */ + 2, /* 176 */ + 2, /* 184 */ + 2 /* 192 */ +}; + +static inline int size_index_elem(size_t bytes) +{ + return (bytes - 1) / 8; +} + +/* + * Find the kmem_cache structure that serves a given size of + * allocation + */ +struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags) +{ + int index; + + if (unlikely(size > KMALLOC_MAX_SIZE)) { + WARN_ON_ONCE(!(flags & __GFP_NOWARN)); + return NULL; + } + + if (size <= 192) { + if (!size) + return ZERO_SIZE_PTR; + + index = size_index[size_index_elem(size)]; + } else + index = fls(size - 1); + +#ifdef CONFIG_ZONE_DMA + if (unlikely((flags & GFP_DMA))) + return kmalloc_dma_caches[index]; + +#endif + return kmalloc_caches[index]; +} + +/* + * Create the kmalloc array. Some of the regular kmalloc arrays + * may already have been created because they were needed to + * enable allocations for slab creation. + */ +void __init create_kmalloc_caches(unsigned long flags) +{ + int i; + + /* + * Patch up the size_index table if we have strange large alignment + * requirements for the kmalloc array. This is only the case for + * MIPS it seems. The standard arches will not generate any code here. + * + * Largest permitted alignment is 256 bytes due to the way we + * handle the index determination for the smaller caches. + * + * Make sure that nothing crazy happens if someone starts tinkering + * around with ARCH_KMALLOC_MINALIGN + */ + BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 || + (KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1))); + + for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) { + int elem = size_index_elem(i); + + if (elem >= ARRAY_SIZE(size_index)) + break; + size_index[elem] = KMALLOC_SHIFT_LOW; + } + + if (KMALLOC_MIN_SIZE >= 64) { + /* + * The 96 byte size cache is not used if the alignment + * is 64 byte. + */ + for (i = 64 + 8; i <= 96; i += 8) + size_index[size_index_elem(i)] = 7; + + } + + if (KMALLOC_MIN_SIZE >= 128) { + /* + * The 192 byte sized cache is not used if the alignment + * is 128 byte. Redirect kmalloc to use the 256 byte cache + * instead. + */ + for (i = 128 + 8; i <= 192; i += 8) + size_index[size_index_elem(i)] = 8; + } + for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) { + if (!kmalloc_caches[i]) { + kmalloc_caches[i] = create_kmalloc_cache(NULL, + 1 << i, flags); + } + + /* + * Caches that are not of the two-to-the-power-of size. + * These have to be created immediately after the + * earlier power of two caches + */ + if (KMALLOC_MIN_SIZE <= 32 && !kmalloc_caches[1] && i == 6) + kmalloc_caches[1] = create_kmalloc_cache(NULL, 96, flags); + + if (KMALLOC_MIN_SIZE <= 64 && !kmalloc_caches[2] && i == 7) + kmalloc_caches[2] = create_kmalloc_cache(NULL, 192, flags); + } + + /* Kmalloc array is now usable */ + slab_state = UP; + + for (i = 0; i <= KMALLOC_SHIFT_HIGH; i++) { + struct kmem_cache *s = kmalloc_caches[i]; + char *n; + + if (s) { + n = kasprintf(GFP_NOWAIT, "kmalloc-%d", kmalloc_size(i)); + + BUG_ON(!n); + s->name = n; + } + } + +#ifdef CONFIG_ZONE_DMA + for (i = 0; i <= KMALLOC_SHIFT_HIGH; i++) { + struct kmem_cache *s = kmalloc_caches[i]; + + if (s) { + int size = kmalloc_size(i); + char *n = kasprintf(GFP_NOWAIT, + "dma-kmalloc-%d", size); + + BUG_ON(!n); + kmalloc_dma_caches[i] = create_kmalloc_cache(n, + size, SLAB_CACHE_DMA | flags); + } + } +#endif +} #endif /* !CONFIG_SLOB */ +/* + * To avoid unnecessary overhead, we pass through large allocation requests + * directly to the page allocator. We use __GFP_COMP, because we will need to + * know the allocation order to free the pages properly in kfree. + */ +void *kmalloc_order(size_t size, gfp_t flags, unsigned int order) +{ + void *ret; + struct page *page; + + flags |= __GFP_COMP; + page = alloc_kmem_pages(flags, order); + ret = page ? page_address(page) : NULL; + kmemleak_alloc(ret, size, 1, flags); + return ret; +} +EXPORT_SYMBOL(kmalloc_order); + +#ifdef CONFIG_TRACING +void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order) +{ + void *ret = kmalloc_order(size, flags, order); + trace_kmalloc(_RET_IP_, ret, size, PAGE_SIZE << order, flags); + return ret; +} +EXPORT_SYMBOL(kmalloc_order_trace); +#endif #ifdef CONFIG_SLABINFO + +#ifdef CONFIG_SLAB +#define SLABINFO_RIGHTS (S_IWUSR | S_IRUSR) +#else +#define SLABINFO_RIGHTS S_IRUSR +#endif + void print_slabinfo_header(struct seq_file *m) { /* @@ -357,12 +679,12 @@ static void *s_start(struct seq_file *m, loff_t *pos) return seq_list_start(&slab_caches, *pos); } -static void *s_next(struct seq_file *m, void *p, loff_t *pos) +void *slab_next(struct seq_file *m, void *p, loff_t *pos) { return seq_list_next(p, &slab_caches, pos); } -static void s_stop(struct seq_file *m, void *p) +void slab_stop(struct seq_file *m, void *p) { mutex_unlock(&slab_mutex); } @@ -378,7 +700,7 @@ memcg_accumulate_slabinfo(struct kmem_cache *s, struct slabinfo *info) return; for_each_memcg_cache_index(i) { - c = cache_from_memcg(s, i); + c = cache_from_memcg_idx(s, i); if (!c) continue; @@ -439,8 +761,8 @@ static int s_show(struct seq_file *m, void *p) */ static const struct seq_operations slabinfo_op = { .start = s_start, - .next = s_next, - .stop = s_stop, + .next = slab_next, + .stop = slab_stop, .show = s_show, }; @@ -459,7 +781,8 @@ static const struct file_operations proc_slabinfo_operations = { static int __init slab_proc_init(void) { - proc_create("slabinfo", S_IRUSR, NULL, &proc_slabinfo_operations); + proc_create("slabinfo", SLABINFO_RIGHTS, NULL, + &proc_slabinfo_operations); return 0; } module_init(slab_proc_init); |
