diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-07-30 11:32:24 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-07-30 11:32:24 -0700 |
commit | 720d85075b7ed3617de8ca8d9097390e303e9f60 (patch) | |
tree | 3ce3911aa3f948b94949440954503c9f1b10ee64 | |
parent | 637e49ae4f5b4a82b418dae8435e16132b298b7e (diff) | |
parent | 73a1180e140d45cb9ef5fbab103d3bbfc4c84606 (diff) |
Merge branch 'slab/next' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/linux
Pull SLAB changes from Pekka Enberg:
"Most of the changes included are from Christoph Lameter's "common
slab" patch series that unifies common parts of SLUB, SLAB, and SLOB
allocators. The unification is needed for Glauber Costa's "kmem
memcg" work that will hopefully appear for v3.7.
The rest of the changes are fixes and speedups by various people."
* 'slab/next' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/linux: (32 commits)
mm: Fix build warning in kmem_cache_create()
slob: Fix early boot kernel crash
mm, slub: ensure irqs are enabled for kmemcheck
mm, sl[aou]b: Move kmem_cache_create mutex handling to common code
mm, sl[aou]b: Use a common mutex definition
mm, sl[aou]b: Common definition for boot state of the slab allocators
mm, sl[aou]b: Extract common code for kmem_cache_create()
slub: remove invalid reference to list iterator variable
mm: Fix signal SIGFPE in slabinfo.c.
slab: move FULL state transition to an initcall
slab: Fix a typo in commit 8c138b "slab: Get rid of obj_size macro"
mm, slab: Build fix for recent kmem_cache changes
slab: rename gfpflags to allocflags
slub: refactoring unfreeze_partials()
slub: use __cmpxchg_double_slab() at interrupt disabled place
slab/mempolicy: always use local policy from interrupt context
slab: Get rid of obj_size macro
mm, sl[aou]b: Extract common fields from struct kmem_cache
slab: Remove some accessors
slab: Use page struct fields instead of casting
...
-rw-r--r-- | include/linux/mempolicy.h | 2 | ||||
-rw-r--r-- | include/linux/mm_types.h | 11 | ||||
-rw-r--r-- | include/linux/slab.h | 24 | ||||
-rw-r--r-- | include/linux/slab_def.h | 12 | ||||
-rw-r--r-- | include/linux/slub_def.h | 3 | ||||
-rw-r--r-- | mm/Makefile | 3 | ||||
-rw-r--r-- | mm/mempolicy.c | 8 | ||||
-rw-r--r-- | mm/slab.c | 406 | ||||
-rw-r--r-- | mm/slab.h | 33 | ||||
-rw-r--r-- | mm/slab_common.c | 120 | ||||
-rw-r--r-- | mm/slob.c | 152 | ||||
-rw-r--r-- | mm/slub.c | 436 | ||||
-rw-r--r-- | tools/vm/slabinfo.c | 14 |
13 files changed, 608 insertions, 616 deletions
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 4aa42732e47..95b738c7abf 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -215,7 +215,7 @@ extern struct zonelist *huge_zonelist(struct vm_area_struct *vma, extern bool init_nodemask_of_mempolicy(nodemask_t *mask); extern bool mempolicy_nodemask_intersects(struct task_struct *tsk, const nodemask_t *mask); -extern unsigned slab_node(struct mempolicy *policy); +extern unsigned slab_node(void); extern enum zone_type policy_zone; diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 704a626d94a..074eb98fe15 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -53,7 +53,7 @@ struct page { struct { union { pgoff_t index; /* Our offset within mapping. */ - void *freelist; /* slub first free object */ + void *freelist; /* slub/slob first free object */ }; union { @@ -91,11 +91,12 @@ struct page { */ atomic_t _mapcount; - struct { + struct { /* SLUB */ unsigned inuse:16; unsigned objects:15; unsigned frozen:1; }; + int units; /* SLOB */ }; atomic_t _count; /* Usage count, see below. */ }; @@ -117,6 +118,12 @@ struct page { short int pobjects; #endif }; + + struct list_head list; /* slobs list of pages */ + struct { /* slab fields */ + struct kmem_cache *slab_cache; + struct slab *slab_page; + }; }; /* Remainder is not double word aligned */ diff --git a/include/linux/slab.h b/include/linux/slab.h index 67d5d94b783..0dd2dfa7bec 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -93,6 +93,30 @@ (unsigned long)ZERO_SIZE_PTR) /* + * Common fields provided in kmem_cache by all slab allocators + * This struct is either used directly by the allocator (SLOB) + * or the allocator must include definitions for all fields + * provided in kmem_cache_common in their definition of kmem_cache. + * + * Once we can do anonymous structs (C11 standard) we could put a + * anonymous struct definition in these allocators so that the + * separate allocations in the kmem_cache structure of SLAB and + * SLUB is no longer needed. + */ +#ifdef CONFIG_SLOB +struct kmem_cache { + unsigned int object_size;/* The original size of the object */ + unsigned int size; /* The aligned/padded/added on size */ + unsigned int align; /* Alignment as calculated */ + unsigned long flags; /* Active flags on the slab */ + const char *name; /* Slab name for sysfs */ + int refcount; /* Use counter */ + void (*ctor)(void *); /* Called on object slot creation */ + struct list_head list; /* List of all slab caches on the system */ +}; +#endif + +/* * struct kmem_cache related prototypes */ void __init kmem_cache_init(void); diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index fbd1117fdfd..0c634fa376c 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h @@ -27,7 +27,7 @@ struct kmem_cache { unsigned int limit; unsigned int shared; - unsigned int buffer_size; + unsigned int size; u32 reciprocal_buffer_size; /* 2) touched by every alloc & free from the backend */ @@ -39,7 +39,7 @@ struct kmem_cache { unsigned int gfporder; /* force GFP flags, e.g. GFP_DMA */ - gfp_t gfpflags; + gfp_t allocflags; size_t colour; /* cache colouring range */ unsigned int colour_off; /* colour offset */ @@ -52,7 +52,10 @@ struct kmem_cache { /* 4) cache creation/removal */ const char *name; - struct list_head next; + struct list_head list; + int refcount; + int object_size; + int align; /* 5) statistics */ #ifdef CONFIG_DEBUG_SLAB @@ -73,12 +76,11 @@ struct kmem_cache { /* * If debugging is enabled, then the allocator can add additional - * fields and/or padding to every object. buffer_size contains the total + * fields and/or padding to every object. size contains the total * object size including these internal fields, the following two * variables contain the offset to the user object and its size. */ int obj_offset; - int obj_size; #endif /* CONFIG_DEBUG_SLAB */ /* 6) per-cpu/per-node data, touched during every alloc/free */ diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index c2f8c8bc56e..df448adb728 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -48,7 +48,6 @@ struct kmem_cache_cpu { unsigned long tid; /* Globally unique transaction id */ struct page *page; /* The slab from which we are allocating */ struct page *partial; /* Partially allocated frozen slabs */ - int node; /* The node of the page (or -1 for debug) */ #ifdef CONFIG_SLUB_STATS unsigned stat[NR_SLUB_STAT_ITEMS]; #endif @@ -83,7 +82,7 @@ struct kmem_cache { unsigned long flags; unsigned long min_partial; int size; /* The size of an object including meta data */ - int objsize; /* The size of an object without meta data */ + int object_size; /* The size of an object without meta data */ int offset; /* Free pointer offset. */ int cpu_partial; /* Number of per cpu partial objects to keep around */ struct kmem_cache_order_objects oo; diff --git a/mm/Makefile b/mm/Makefile index 2e2fbbefb99..8e81fe263c9 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -16,7 +16,8 @@ obj-y := filemap.o mempool.o oom_kill.o fadvise.o \ readahead.o swap.o truncate.o vmscan.o shmem.o \ prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \ page_isolation.o mm_init.o mmu_context.o percpu.o \ - compaction.o $(mmu-y) + compaction.o slab_common.o $(mmu-y) + obj-y += init-mm.o ifdef CONFIG_NO_BOOTMEM diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 1d771e4200d..bd92431d4c4 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1602,8 +1602,14 @@ static unsigned interleave_nodes(struct mempolicy *policy) * task can change it's policy. The system default policy requires no * such protection. */ -unsigned slab_node(struct mempolicy *policy) +unsigned slab_node(void) { + struct mempolicy *policy; + + if (in_interrupt()) + return numa_node_id(); + + policy = current->mempolicy; if (!policy || policy->flags & MPOL_F_LOCAL) return numa_node_id(); diff --git a/mm/slab.c b/mm/slab.c index e901a36e252..1fcf3ac94b6 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -68,7 +68,7 @@ * Further notes from the original documentation: * * 11 April '97. Started multi-threading - markhe - * The global cache-chain is protected by the mutex 'cache_chain_mutex'. + * The global cache-chain is protected by the mutex 'slab_mutex'. * The sem is only needed when accessing/extending the cache-chain, which * can never happen inside an interrupt (kmem_cache_create(), * kmem_cache_shrink() and kmem_cache_reap()). @@ -87,6 +87,7 @@ */ #include <linux/slab.h> +#include "slab.h" #include <linux/mm.h> #include <linux/poison.h> #include <linux/swap.h> @@ -424,8 +425,8 @@ static void kmem_list3_init(struct kmem_list3 *parent) * cachep->obj_offset - BYTES_PER_WORD .. cachep->obj_offset - 1: * redzone word. * cachep->obj_offset: The real object. - * cachep->buffer_size - 2* BYTES_PER_WORD: redzone word [BYTES_PER_WORD long] - * cachep->buffer_size - 1* BYTES_PER_WORD: last caller address + * cachep->size - 2* BYTES_PER_WORD: redzone word [BYTES_PER_WORD long] + * cachep->size - 1* BYTES_PER_WORD: last caller address * [BYTES_PER_WORD long] */ static int obj_offset(struct kmem_cache *cachep) @@ -433,11 +434,6 @@ static int obj_offset(struct kmem_cache *cachep) return cachep->obj_offset; } -static int obj_size(struct kmem_cache *cachep) -{ - return cachep->obj_size; -} - static unsigned long long *dbg_redzone1(struct kmem_cache *cachep, void *objp) { BUG_ON(!(cachep->flags & SLAB_RED_ZONE)); @@ -449,23 +445,22 @@ static unsigned long long *dbg_redzone2(struct kmem_cache *cachep, void *objp) { BUG_ON(!(cachep->flags & SLAB_RED_ZONE)); if (cachep->flags & SLAB_STORE_USER) - return (unsigned long long *)(objp + cachep->buffer_size - + return (unsigned long long *)(objp + cachep->size - sizeof(unsigned long long) - REDZONE_ALIGN); - return (unsigned long long *) (objp + cachep->buffer_size - + return (unsigned long long *) (objp + cachep->size - sizeof(unsigned long long)); } static void **dbg_userword(struct kmem_cache *cachep, void *objp) { BUG_ON(!(cachep->flags & SLAB_STORE_USER)); - return (void **)(objp + cachep->buffer_size - BYTES_PER_WORD); + return (void **)(objp + cachep->size - BYTES_PER_WORD); } #else #define obj_offset(x) 0 -#define obj_size(cachep) (cachep->buffer_size) #define dbg_redzone1(cachep, objp) ({BUG(); (unsigned long long *)NULL;}) #define dbg_redzone2(cachep, objp) ({BUG(); (unsigned long long *)NULL;}) #define dbg_userword(cachep, objp) ({BUG(); (void **)NULL;}) @@ -475,7 +470,7 @@ static void **dbg_userword(struct kmem_cache *cachep, void *objp) #ifdef CONFIG_TRACING size_t slab_buffer_size(struct kmem_cache *cachep) { - return cachep->buffer_size; + return cachep->size; } EXPORT_SYMBOL(slab_buffer_size); #endif @@ -489,56 +484,37 @@ EXPORT_SYMBOL(slab_buffer_size); static int slab_max_order = SLAB_MAX_ORDER_LO; static bool slab_max_order_set __initdata; -/* - * Functions for storing/retrieving the cachep and or slab from the page - * allocator. These are used to find the slab an obj belongs to. With kfree(), - * these are used to find the cache which an obj belongs to. - */ -static inline void page_set_cache(struct page *page, struct kmem_cache *cache) -{ - page->lru.next = (struct list_head *)cache; -} - static inline struct kmem_cache *page_get_cache(struct page *page) { page = compound_head(page); BUG_ON(!PageSlab(page)); - return (struct kmem_cache *)page->lru.next; -} - -static inline void page_set_slab(struct page *page, struct slab *slab) -{ - page->lru.prev = (struct list_head *)slab; -} - -static inline struct slab *page_get_slab(struct page *page) -{ - BUG_ON(!PageSlab(page)); - return (struct slab *)page->lru.prev; + return page->slab_cache; } static inline struct kmem_cache *virt_to_cache(const void *obj) { struct page *page = virt_to_head_page(obj); - return page_get_cache(page); + return page->slab_cache; } static inline struct slab *virt_to_slab(const void *obj) { struct page *page = virt_to_head_page(obj); - return page_get_slab(page); + + VM_BUG_ON(!PageSlab(page)); + return page->slab_page; } static inline void *index_to_obj(struct kmem_cache *cache, struct slab *slab, unsigned int idx) { - return slab->s_mem + cache->buffer_size * idx; + return slab->s_mem + cache->size * idx; } /* - * We want to avoid an expensive divide : (offset / cache->buffer_size) - * Using the fact that buffer_size is a constant for a particular cache, - * we can replace (offset / cache->buffer_size) by + * We want to avoid an expensive divide : (offset / cache->size) + * Using the fact that size is a constant for a particular cache, + * we can replace (offset / cache->size) by * reciprocal_divide(offset, cache->reciprocal_buffer_size) */ static inline unsigned int obj_to_index(const struct kmem_cache *cache, @@ -584,33 +560,12 @@ static struct kmem_cache cache_cache = { .batchcount = 1, .limit = BOOT_CPUCACHE_ENTRIES, .shared = 1, - .buffer_size = sizeof(struct kmem_cache), + .size = sizeof(struct kmem_cache), .name = "kmem_cache", }; #define BAD_ALIEN_MAGIC 0x01020304ul -/* - * chicken and egg problem: delay the per-cpu array allocation - * until the general caches are up. - */ -static enum { - NONE, - PARTIAL_AC, - PARTIAL_L3, - EARLY, - LATE, - FULL -} g_cpucache_up; - -/* - * used by boot code to determine if it can use slab based allocator - */ -int slab_is_available(void) -{ - return g_cpucache_up >= EARLY; -} - #ifdef CONFIG_LOCKDEP /* @@ -676,7 +631,7 @@ static void init_node_lock_keys(int q) { struct cache_sizes *s = malloc_sizes; - if (g_cpucache_up < LATE) + if (slab_state < UP) return; for (s = malloc_sizes; s->cs_size != ULONG_MAX; s++) { @@ -716,12 +671,6 @@ static void slab_set_debugobj_lock_classes(struct kmem_cache *cachep) } #endif -/* - * Guard access to the cache-chain. - */ -static DEFINE_MUTEX(cache_chain_mutex); -static struct list_head cache_chain; - static DEFINE_PER_CPU(struct delayed_work, slab_reap_work); static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep) @@ -1145,7 +1094,7 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp) * When hotplugging memory or a cpu, existing nodelists are not replaced if * already in use. * - * Must hold cache_chain_mutex. + * Must hold slab_mutex. */ static int init_cache_nodelists_node(int node) { @@ -1153,7 +1102,7 @@ static int init_cache_nodelists_node(int node) struct kmem_list3 *l3; const int memsize = sizeof(struct kmem_list3); - list_for_each_entry(cachep, &cache_chain, next) { + list_for_each_entry(cachep, &slab_caches, list) { /* * Set up the size64 kmemlist for cpu before we can * begin anything. Make sure some other cpu on this @@ -1169,7 +1118,7 @@ static int init_cache_nodelists_node(int node) /* * The l3s don't come and go as CPUs come and - * go. cache_chain_mutex is sufficient + * go. slab_mutex is sufficient * protection here. */ cachep->nodelists[node] = l3; @@ -1191,7 +1140,7 @@ static void __cpuinit cpuup_canceled(long cpu) int node = cpu_to_mem(cpu); const struct cpumask *mask = cpumask_of_node(node); - list_for_each_entry(cachep, &cache_chain, next) { + list_for_each_entry(cachep, &slab_caches, list) { struct array_cache *nc; struct array_cache *shared; struct array_cache **alien; @@ -1241,7 +1190,7 @@ free_array_cache: * the respective cache's slabs, now we can go ahead and * shrink each nodelist to its limit. */ - list_for_each_entry(cachep, &cache_chain, next) { + list_for_each_entry(cachep, &slab_caches, list) { l3 = cachep->nodelists[node]; if (!l3) continue; @@ -1270,7 +1219,7 @@ static int __cpuinit cpuup_prepare(long cpu) * Now we can go ahead with allocating the shared arrays and * array caches */ - list_for_each_entry(cachep, &cache_chain, next) { + list_for_each_entry(cachep, &slab_caches, list) { struct array_cache *nc; struct array_cache *shared = NULL; struct array_cache **alien = NULL; @@ -1338,9 +1287,9 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb, switch (action) { case CPU_UP_PREPARE: case CPU_UP_PREPARE_FROZEN: - mutex_lock(&cache_chain_mutex); + mutex_lock(&slab_mutex); err = cpuup_prepare(cpu); - mutex_unlock(&cache_chain_mutex); + mutex_unlock(&slab_mutex); break; case CPU_ONLINE: case CPU_ONLINE_FROZEN: @@ -1350,7 +1299,7 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb, case CPU_DOWN_PREPARE: case CPU_DOWN_PREPARE_FROZEN: /* - * Shutdown cache reaper. Note that the cache_chain_mutex is + * Shutdown cache reaper. Note that the slab_mutex is * held so that if cache_reap() is invoked it cannot do * anything expensive but will only modify reap_work * and reschedule the timer. @@ -1377,9 +1326,9 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb, #endif case CPU_UP_CANCELED: case CPU_UP_CANCELED_FROZEN: - mutex_lock(&cache_chain_mutex); + mutex_lock(&slab_mutex); cpuup_canceled(cpu); - mutex_unlock(&cache_chain_mutex); + mutex_unlock(&slab_mutex); break; } return notifier_from_errno(err); @@ -1395,14 +1344,14 @@ static struct notifier_block __cpuinitdata cpucache_notifier = { * Returns -EBUSY if all objects cannot be drained so that the node is not * removed. * - * Must hold cache_chain_mutex. + * Must hold slab_mutex. */ static int __meminit drain_cache_nodelists_node(int node) { struct kmem_cache *cachep; int ret = 0; - list_for_each_entry(cachep, &cache_chain, next) { + list_for_each_entry(cachep, &slab_caches, list) { struct kmem_list3 *l3; l3 = cachep->nodelists[node]; @@ -1433,14 +1382,14 @@ static int __meminit slab_memory_callback(struct notifier_block *self, switch (action) { case MEM_GOING_ONLINE: - mutex_lock(&cache_chain_mutex); + mutex_lock(&slab_mutex); ret = init_cache_nodelists_node(nid); - mutex_unlock(&cache_chain_mutex); + mutex_unlock(&slab_mutex); break; case MEM_GOING_OFFLINE: - mutex_lock(&cache_chain_mutex); + mutex_lock(&slab_mutex); ret = drain_cache_nodelists_node(nid); - mutex_unlock(&cache_chain_mutex); + mutex_unlock(&slab_mutex); break; case MEM_ONLINE: case MEM_OFFLINE: @@ -1544,8 +1493,8 @@ void __init kmem_cache_init(void) node = numa_mem_id(); /* 1) create the cache_cache */ - INIT_LIST_HEAD(&cache_chain); - list_add(&cache_cache.next, &cache_chain); + INIT_LIST_HEAD(&slab_caches); + list_add(&cache_cache.list, &slab_caches); cache_cache.colour_off = cache_line_size(); cache_cache.array[smp_processor_id()] = &initarray_cache.cache; cache_cache.nodelists[node] = &initkmem_list3[CACHE_CACHE + node]; @@ -1553,18 +1502,16 @@ void __init kmem_cache_init(void) /* * struct kmem_cache size depends on nr_node_ids & nr_cpu_ids */ - cache_cache.buffer_size = offsetof(struct kmem_cache, array[nr_cpu_ids]) + + cache_cache.size = offsetof(struct kmem_cache, array[nr_cpu_ids]) + nr_node_ids * sizeof(struct kmem_list3 *); -#if DEBUG - cache_cache.obj_size = cache_cache.buffer_size; -#endif - cache_cache.buffer_size = ALIGN(cache_cache.buffer_size, + cache_cache.object_size = cache_cache.size; + cache_cache.size = ALIGN(cache_cache.size, cache_line_size()); cache_cache.reciprocal_buffer_size = - reciprocal_value(cache_cache.buffer_size); + reciprocal_value(cache_cache.size); for (order = 0; order < MAX_ORDER; order++) { - cache_estimate(order, cache_cache.buffer_size, + cache_estimate(order, cache_cache.size, cache_line_size(), 0, &left_over, &cache_cache.num); if (cache_cache.num) break; @@ -1585,7 +1532,7 @@ void __init kmem_cache_init(void) * bug. */ - sizes[INDEX_AC].cs_cachep = kmem_cache_create(names[INDEX_AC].name, + sizes[INDEX_AC].cs_cachep = __kmem_cache_create(names[INDEX_AC].name, sizes[INDEX_AC].cs_size, ARCH_KMALLOC_MINALIGN, ARCH_KMALLOC_FLAGS|SLAB_PANIC, @@ -1593,7 +1540,7 @@ void __init kmem_cache_init(void) if (INDEX_AC != INDEX_L3) { sizes[INDEX_L3].cs_cachep = - kmem_cache_create(names[INDEX_L3].name, + __kmem_cache_create(names[INDEX_L3].name, sizes[INDEX_L3].cs_size, ARCH_KMALLOC_MINALIGN, ARCH_KMALLOC_FLAGS|SLAB_PANIC, @@ -1611,14 +1558,14 @@ void __init kmem_cache_init(void) * allow tighter packing of the smaller caches. */ if (!sizes->cs_cachep) { - sizes->cs_cachep = kmem_cache_create(names->name, + sizes->cs_cachep = __kmem_cache_create(names->name, sizes->cs_size, ARCH_KMALLOC_MINALIGN, ARCH_KMALLOC_FLAGS|SLAB_PANIC, NULL); } #ifdef CONFIG_ZONE_DMA - sizes->cs_dmacachep = kmem_cache_create( + sizes->cs_dmacachep = __kmem_cache_create( names->name_dma, sizes->cs_size, ARCH_KMALLOC_MINALIGN, @@ -1676,27 +1623,27 @@ void __init kmem_cache_init(void) } } - g_cpucache_up = EARLY; + slab_state = UP; } void __init kmem_cache_init_late(void) { struct kmem_cache *cachep; - g_cpucache_up = LATE; + slab_state = UP; /* Annotate slab for lockdep -- annotate the malloc caches */ init_lock_keys(); /* 6) resize the head arrays to their final sizes */ - mutex_lock(&cache_chain_mutex); - list_for_each_entry(cachep, &cache_chain, next) + mutex_lock(&slab_mutex); + list_for_each_entry(cachep, &slab_caches, list) if (enable_cpucache(cachep, GFP_NOWAIT)) BUG(); - mutex_unlock(&cache_chain_mutex); + mutex_unlock(&slab_mutex); /* Done! */ - g_cpucache_up = FULL; + slab_state = FULL; /* * Register a cpu startup notifier callback that initializes @@ -1727,6 +1674,9 @@ static int __init cpucache_init(void) */ for_each_online_cpu(cpu) start_cpu_timer(cpu); + + /* Done! */ + slab_state = FULL; return 0; } __initcall(cpucache_init); @@ -1743,7 +1693,7 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid) "SLAB: Unable to allocate memory on node %d (gfp=0x%x)\n", nodeid, gfpflags); printk(KERN_WARNING " cache: %s, object size: %d, order: %d\n", - cachep->name, cachep->buffer_size, cachep->gfporder); + cachep->name, cachep->size, cachep->gfporder); for_each_online_node(node) { unsigned long active_objs = 0, num_objs = 0, free_objects = 0; @@ -1798,7 +1748,7 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid) flags |= __GFP_COMP; #endif - flags |= cachep->gfpflags; + flags |= cachep->allocflags; if (cachep->flags & SLAB_RECLAIM_ACCOUNT) flags |= __GFP_RECLAIMABLE; @@ -1874,7 +1824,7 @@ static void kmem_rcu_free(struct rcu_head *head) static void store_stackinfo(struct kmem_cache *cachep, unsigned long *addr, unsigned long caller) { - int size = obj_size(cachep); + int size = cachep->object_size; addr = (unsigned long *)&((char *)addr)[obj_offset(cachep)]; @@ -1906,7 +1856,7 @@ static void store_stackinfo(struct kmem_cache *cachep, unsigned long *addr, static void poison_obj(struct kmem_cache *cachep, void *addr, unsigned char val) { - int size = obj_size(cachep); + int size = cachep->object_size; addr = &((char *)addr)[obj_offset(cachep)]; memset(addr, val, size); @@ -1966,7 +1916,7 @@ static void print_objinfo(struct kmem_cache *cachep, void *objp, int lines) printk("\n"); } realobj = (char *)objp + obj_offset(cachep); - size = obj_size(cachep); + size = cachep->object_size; for (i = 0; i < size && lines; i += 16, lines--) { int limit; limit = 16; @@ -1983,7 +1933,7 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp) int lines = 0; realobj = (char *)objp + obj_offset(cachep); - size = obj_size(cachep); + size = cachep->object_size; for (i = 0; i < size; i++) { char exp = POISON_FREE; @@ -2047,10 +1997,10 @@ static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slab if (cachep->flags & SLAB_POISON) { #ifdef CONFIG_DEBUG_PAGEALLOC - if (cachep->buffer_size % PAGE_SIZE == 0 && + if (cachep->size % PAGE_SIZE == 0 && OFF_SLAB(cachep)) kernel_map_pages(virt_to_page(objp), - cachep->buffer_size / PAGE_SIZE, 1); + cachep->size / PAGE_SIZE, 1); else check_poison_obj(cachep, objp); #else @@ -2194,10 +2144,10 @@ static size_t calculate_slab_order(struct kmem_cache *cachep, static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp) { - if (g_cpucache_up == FULL) + if (slab_state >= FULL) return enable_cpucache(cachep, gfp); - if (g_cpucache_up == NONE) { + if (slab_state == DOWN) { /* * Note: the first kmem_cache_create must create the cache * that's used by kmalloc(24), otherwise the creation of @@ -2212,16 +2162,16 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp) */ set_up_list3s(cachep, SIZE_AC); if (INDEX_AC == INDEX_L3) - g_cpucache_up = PARTIAL_L3; + slab_state = PARTIAL_L3; else - g_cpucache_up = PARTIAL_AC; + slab_state = PARTIAL_ARRAYCACHE; } else { cachep->array[smp_processor_id()] = kmalloc(sizeof(struct arraycache_init), gfp); - if (g_cpucache_up == PARTIAL_AC) { + if (slab_state == PARTIAL_ARRAYCACHE) { set_up_list3s(cachep, SIZE_L3); - g_cpucache_up = PARTIAL_L3; + slab_state = PARTIAL_L3; } else { int node; for_each_online_node(node) { @@ -2247,7 +2197,7 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp) } /** - * kmem_cache_create - Create a cache. + * __kmem_cache_create - Create a cache. * @name: A string which is used in /proc/slabinfo to identify this cache. * @size: The size of objects to be created in this cache. * @align: The required alignment for the objects. @@ -2274,59 +2224,14 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp) * as davem. */ struct kmem_cache * -kmem_cache_create (const char *name, size_t size, size_t align, +__kmem_cache_create (const char *name, size_t size, size_t align, unsigned long flags, void (*ctor)(void *)) { size_t left_over, slab_size, ralign; - struct kmem_cache *cachep = NULL, *pc; + struct kmem_cache *cachep = NULL; gfp_t gfp; - /* - * Sanity checks... these are all serious usage bugs. - */ - if (!name || in_interrupt() || (size < BYTES_PER_WORD) || - size > KMALLOC_MAX_SIZE) { - printk(KERN_ERR "%s: Early error in slab %s\n", __func__, - name); - BUG(); - } - - /* - * We use cache_chain_mutex to ensure a consistent view of - * cpu_online_mask as well. Please see cpuup_callback - */ - if (slab_is_available()) { - get_online_cpus(); - mutex_lock(&cache_chain_mutex); - } - - list_for_each_entry(pc, &cache_chain, next) { - char tmp; - int res; - - /* - * This happens when the module gets unloaded and doesn't - * destroy its slab cache and no-one else reuses the vmalloc - * area of the module. Print a warning. - */ - res = probe_kernel_address(pc->name, tmp); - if (res) { - printk(KERN_ERR - "SLAB: cache with size %d has lost its name\n", - pc->buffer_size); - continue; - } - - if (!strcmp(pc->name, name)) { - printk(KERN_ERR - "kmem_cache_create: duplicate cache %s\n", name); - dump_stack(); - goto oops; - } - } - #if DEBUG - WARN_ON(strchr(name, ' ')); /* It confuses parsers */ #if FORCED_DEBUG /* * Enable redzoning and last user accounting, except for caches with @@ -2415,11 +2320,12 @@ kmem_cache_create (const char *name, size_t size, size_t align, /* Get cache's description obj. */ cachep = kmem_cache_zalloc(&cache_cache, gfp); if (!cachep) - goto oops; + return NULL; cachep->nodelists = (struct kmem_list3 **)&cachep->array[nr_cpu_ids]; + cachep->object_size = size; + cachep->align = align; #if DEBUG - cachep->obj_size = size; /* * Both debugging options require word-alignment which is calculated @@ -2442,7 +2348,7 @@ kmem_cache_create (const char *name, size_t size, size_t align, } #if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC) if (size >= malloc_sizes[INDEX_L3 + 1].cs_size - && cachep->obj_size > cache_line_size() && ALIGN(size, align) < PAGE_SIZE) { + && cachep->object_size > cache_line_size() && ALIGN(size, align) < PAGE_SIZE) { cachep->obj_offset += PAGE_SIZE - ALIGN(size, align); size = PAGE_SIZE; } @@ -2471,8 +2377,7 @@ kmem_cache_create (const char *name, size_t size, size_t align, printk(KERN_ERR "kmem_cache_create: couldn't create cache %s.\n", name); kmem_cache_free(&cache_cache, cachep); - cachep = NULL; - goto oops; + return NULL; } slab_size = ALIGN(cachep->num * sizeof(kmem_bufctl_t) + sizeof(struct slab), align); @@ -2508,10 +2413,10 @@ kmem_cache_create (const char *name, size_t size, size_t align, cachep->colour = left_over / cachep->colour_off; cachep->slab_size = slab_size; cachep->flags = flags; - cachep->gfpflags = 0; + cachep->allocflags = 0; if (CONFIG_ZONE_DMA_FLAG && (flags & SLAB_CACHE_DMA)) - cachep->gfpflags |= GFP_DMA; - cachep->buffer_size = size; + cachep->allocflags |= GFP_DMA; + cachep->size = size; cachep->reciprocal_buffer_size = reciprocal_value(size); if (flags & CFLGS_OFF_SLAB) { @@ -2530,8 +2435,7 @@ kmem_cache_create (const char *name, size_t size, size_t align, if (setup_cpu_cache(cachep, gfp)) { __kmem_cache_destroy(cachep); - cachep = NULL; - goto oops; + return NULL; } if (flags & SLAB_DEBUG_OBJECTS) { @@ -2545,18 +2449,9 @@ kmem_cache_create (const char *name, size_t size, size_t align, } /* cache setup completed, link it into the list */ - list_add(&cachep->next, &cache_chain); -oops: - if (!cachep && (flags & SLAB_PANIC)) - panic("kmem_cache_create(): failed to create slab `%s'\n", - name); - if (slab_is_available()) { - mutex_unlock(&cache_chain_mutex); - put_online_cpus(); - } + list_add(&cachep->list, &slab_caches); return cachep; } -EXPORT_SYMBOL(kmem_cache_create); #if DEBUG static void check_irq_off(void) @@ -2671,7 +2566,7 @@ out: return nr_freed; } -/* Called with cache_chain_mutex held to protect against cpu hotplug */ +/* Called with slab_mutex held to protect against cpu hotplug */ static int __cache_shrink(struct kmem_cache *cachep) { int ret = 0, i = 0; @@ -2706,9 +2601,9 @@ int kmem_cache_shrink(struct kmem_cache *cachep) BUG_ON(!cachep || in_interrupt()); get_online_cpus(); - mutex_lock(&cache_chain_mutex); + mutex_lock(&slab_mutex); ret = __cache_shrink(cachep); - mutex_unlock(&cache_chain_mutex); + mutex_unlock(&slab_mutex); put_online_cpus(); return ret; } @@ -2736,15 +2631,15 @@ void kmem_cache_destroy(struct kmem_cache *cachep) /* Find the cache in the chain of caches. */ get_online_cpus(); - mutex_lock(&cache_chain_mutex); + mutex_lock(&slab_mutex); /* * the chain is never empty, cache_cache is never destroyed */ - list_del(&cachep->next); + list_del(&cachep->list); if (__cache_shrink(cachep)) { slab_error(cachep, "Can't free all objects"); - list_add(&cachep->next, &cache_chain); - mutex_unlock(&cache_chain_mutex); + list_add(&cachep->list, &slab_caches); + mutex_unlock(&slab_mutex); put_online_cpus(); return; } @@ -2753,7 +2648,7 @@ void kmem_cache_destroy(struct kmem_cache *cachep) rcu_barrier(); __kmem_cache_destroy(cachep); - mutex_unlock(&cache_chain_mutex); + mutex_unlock(&slab_mutex); put_online_cpus(); } EXPORT_SYMBOL(kmem_cache_destroy); @@ -2840,10 +2735,10 @@ static void cache_init_objs(struct kmem_cache *cachep, slab_error(cachep, "constructor overwrote the" " start of an object"); } - if ((cachep->buffer_size % PAGE_SIZE) == 0 && + if ((cachep->size % PAGE_SIZE) == 0 && OFF_SLAB(cachep) && cachep->flags & SLAB_POISON) kernel_map_pages(virt_to_page(objp), - cachep->buffer_size / PAGE_SIZE, 0); + cachep->size / PAGE_SIZE |