diff options
author | Steven Whitehouse <swhiteho@redhat.com> | 2006-03-31 15:34:58 -0500 |
---|---|---|
committer | Steven Whitehouse <swhiteho@redhat.com> | 2006-03-31 15:34:58 -0500 |
commit | 86579dd06deecfa6ac88d5e84e4d63c397cd6f6d (patch) | |
tree | b4475d3ccde53015ad84a06e4e55e64591171b75 /mm/slab.c | |
parent | 7ea9ea832212c4a755650f7c7cc1ff0b63292a41 (diff) | |
parent | a0f067802576d4eb4c65d40b8ee7d6ea3c81dd61 (diff) |
Merge branch 'master'
Diffstat (limited to 'mm/slab.c')
-rw-r--r-- | mm/slab.c | 1233 |
1 files changed, 760 insertions, 473 deletions
diff --git a/mm/slab.c b/mm/slab.c index d0bd7f07ab0..4cbf8bb1355 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -50,7 +50,7 @@ * The head array is strictly LIFO and should improve the cache hit rates. * On SMP, it additionally reduces the spinlock operations. * - * The c_cpuarray may not be read with enabled local interrupts - + * The c_cpuarray may not be read with enabled local interrupts - * it's changed with a smp_call_function(). * * SMP synchronization: @@ -94,6 +94,7 @@ #include <linux/interrupt.h> #include <linux/init.h> #include <linux/compiler.h> +#include <linux/cpuset.h> #include <linux/seq_file.h> #include <linux/notifier.h> #include <linux/kallsyms.h> @@ -170,15 +171,15 @@ #if DEBUG # define CREATE_MASK (SLAB_DEBUG_INITIAL | SLAB_RED_ZONE | \ SLAB_POISON | SLAB_HWCACHE_ALIGN | \ - SLAB_NO_REAP | SLAB_CACHE_DMA | \ + SLAB_CACHE_DMA | \ SLAB_MUST_HWCACHE_ALIGN | SLAB_STORE_USER | \ SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \ - SLAB_DESTROY_BY_RCU) + SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD) #else -# define CREATE_MASK (SLAB_HWCACHE_ALIGN | SLAB_NO_REAP | \ +# define CREATE_MASK (SLAB_HWCACHE_ALIGN | \ SLAB_CACHE_DMA | SLAB_MUST_HWCACHE_ALIGN | \ SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \ - SLAB_DESTROY_BY_RCU) + SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD) #endif /* @@ -203,7 +204,8 @@ typedef unsigned int kmem_bufctl_t; #define BUFCTL_END (((kmem_bufctl_t)(~0U))-0) #define BUFCTL_FREE (((kmem_bufctl_t)(~0U))-1) -#define SLAB_LIMIT (((kmem_bufctl_t)(~0U))-2) +#define BUFCTL_ACTIVE (((kmem_bufctl_t)(~0U))-2) +#define SLAB_LIMIT (((kmem_bufctl_t)(~0U))-3) /* Max number of objs-per-slab for caches which use off-slab slabs. * Needed to avoid a possible looping condition in cache_grow(). @@ -266,16 +268,17 @@ struct array_cache { unsigned int batchcount; unsigned int touched; spinlock_t lock; - void *entry[0]; /* - * Must have this definition in here for the proper - * alignment of array_cache. Also simplifies accessing - * the entries. - * [0] is for gcc 2.95. It should really be []. - */ + void *entry[0]; /* + * Must have this definition in here for the proper + * alignment of array_cache. Also simplifies accessing + * the entries. + * [0] is for gcc 2.95. It should really be []. + */ }; -/* bootstrap: The caches do not work without cpuarrays anymore, - * but the cpuarrays are allocated from the generic caches... +/* + * bootstrap: The caches do not work without cpuarrays anymore, but the + * cpuarrays are allocated from the generic caches... */ #define BOOT_CPUCACHE_ENTRIES 1 struct arraycache_init { @@ -291,13 +294,13 @@ struct kmem_list3 { struct list_head slabs_full; struct list_head slabs_free; unsigned long free_objects; - unsigned long next_reap; - int free_touched; unsigned int free_limit; unsigned int colour_next; /* Per-node cache coloring */ spinlock_t list_lock; struct array_cache *shared; /* shared per node */ struct array_cache **alien; /* on other nodes */ + unsigned long next_reap; /* updated without locking */ + int free_touched; /* updated without locking */ }; /* @@ -310,10 +313,8 @@ struct kmem_list3 __initdata initkmem_list3[NUM_INIT_LISTS]; #define SIZE_L3 (1 + MAX_NUMNODES) /* - * This function must be completely optimized away if - * a constant is passed to it. Mostly the same as - * what is in linux/slab.h except it returns an - * index. + * This function must be completely optimized away if a constant is passed to + * it. Mostly the same as what is in linux/slab.h except it returns an index. */ static __always_inline int index_of(const size_t size) { @@ -351,14 +352,14 @@ static void kmem_list3_init(struct kmem_list3 *parent) parent->free_touched = 0; } -#define MAKE_LIST(cachep, listp, slab, nodeid) \ - do { \ - INIT_LIST_HEAD(listp); \ - list_splice(&(cachep->nodelists[nodeid]->slab), listp); \ +#define MAKE_LIST(cachep, listp, slab, nodeid) \ + do { \ + INIT_LIST_HEAD(listp); \ + list_splice(&(cachep->nodelists[nodeid]->slab), listp); \ } while (0) -#define MAKE_ALL_LISTS(cachep, ptr, nodeid) \ - do { \ +#define MAKE_ALL_LISTS(cachep, ptr, nodeid) \ + do { \ MAKE_LIST((cachep), (&(ptr)->slabs_full), slabs_full, nodeid); \ MAKE_LIST((cachep), (&(ptr)->slabs_partial), slabs_partial, nodeid); \ MAKE_LIST((cachep), (&(ptr)->slabs_free), slabs_free, nodeid); \ @@ -373,28 +374,30 @@ static void kmem_list3_init(struct kmem_list3 *parent) struct kmem_cache { /* 1) per-cpu data, touched during every alloc/free */ struct array_cache *array[NR_CPUS]; +/* 2) Cache tunables. Protected by cache_chain_mutex */ unsigned int batchcount; unsigned int limit; unsigned int shared; + unsigned int buffer_size; -/* 2) touched by every alloc & free from the backend */ +/* 3) touched by every alloc & free from the backend */ struct kmem_list3 *nodelists[MAX_NUMNODES]; - unsigned int flags; /* constant flags */ - unsigned int num; /* # of objs per slab */ - spinlock_t spinlock; -/* 3) cache_grow/shrink */ + unsigned int flags; /* constant flags */ + unsigned int num; /* # of objs per slab */ + +/* 4) cache_grow/shrink */ /* order of pgs per slab (2^n) */ unsigned int gfporder; /* force GFP flags, e.g. GFP_DMA */ gfp_t gfpflags; - size_t colour; /* cache colouring range */ + size_t colour; /* cache colouring range */ unsigned int colour_off; /* colour offset */ struct kmem_cache *slabp_cache; unsigned int slab_size; - unsigned int dflags; /* dynamic flags */ + unsigned int dflags; /* dynamic flags */ /* constructor func */ void (*ctor) (void *, struct kmem_cache *, unsigned long); @@ -402,11 +405,11 @@ struct kmem_cache { /* de-constructor func */ void (*dtor) (void *, struct kmem_cache *, unsigned long); -/* 4) cache creation/removal */ +/* 5) cache creation/removal */ const char *name; struct list_head next; -/* 5) statistics */ +/* 6) statistics */ #if STATS unsigned long num_active; unsigned long num_allocations; @@ -438,8 +441,9 @@ struct kmem_cache { #define OFF_SLAB(x) ((x)->flags & CFLGS_OFF_SLAB) #define BATCHREFILL_LIMIT 16 -/* Optimization question: fewer reaps means less - * probability for unnessary cpucache drain/refill cycles. +/* + * Optimization question: fewer reaps means less probability for unnessary + * cpucache drain/refill cycles. * * OTOH the cpuarrays can contain lots of objects, * which could lock up otherwise freeable slabs. @@ -453,17 +457,19 @@ struct kmem_cache { #define STATS_INC_ALLOCED(x) ((x)->num_allocations++) #define STATS_INC_GROWN(x) ((x)->grown++) #define STATS_INC_REAPED(x) ((x)->reaped++) -#define STATS_SET_HIGH(x) do { if ((x)->num_active > (x)->high_mark) \ - (x)->high_mark = (x)->num_active; \ - } while (0) +#define STATS_SET_HIGH(x) \ + do { \ + if ((x)->num_active > (x)->high_mark) \ + (x)->high_mark = (x)->num_active; \ + } while (0) #define STATS_INC_ERR(x) ((x)->errors++) #define STATS_INC_NODEALLOCS(x) ((x)->node_allocs++) #define STATS_INC_NODEFREES(x) ((x)->node_frees++) -#define STATS_SET_FREEABLE(x, i) \ - do { if ((x)->max_freeable < i) \ - (x)->max_freeable = i; \ - } while (0) - +#define STATS_SET_FREEABLE(x, i) \ + do { \ + if ((x)->max_freeable < i) \ + (x)->max_freeable = i; \ + } while (0) #define STATS_INC_ALLOCHIT(x) atomic_inc(&(x)->allochit) #define STATS_INC_ALLOCMISS(x) atomic_inc(&(x)->allocmiss) #define STATS_INC_FREEHIT(x) atomic_inc(&(x)->freehit) @@ -478,9 +484,7 @@ struct kmem_cache { #define STATS_INC_ERR(x) do { } while (0) #define STATS_INC_NODEALLOCS(x) do { } while (0) #define STATS_INC_NODEFREES(x) do { } while (0) -#define STATS_SET_FREEABLE(x, i) \ - do { } while (0) - +#define STATS_SET_FREEABLE(x, i) do { } while (0) #define STATS_INC_ALLOCHIT(x) do { } while (0) #define STATS_INC_ALLOCMISS(x) do { } while (0) #define STATS_INC_FREEHIT(x) do { } while (0) @@ -488,7 +492,8 @@ struct kmem_cache { #endif #if DEBUG -/* Magic nums for obj red zoning. +/* + * Magic nums for obj red zoning. * Placed in the first word before and the first word after an obj. */ #define RED_INACTIVE 0x5A2CF071UL /* when obj is inactive */ @@ -499,7 +504,8 @@ struct kmem_cache { #define POISON_FREE 0x6b /* for use-after-free poisoning */ #define POISON_END 0xa5 /* end-byte of poisoning */ -/* memory layout of objects: +/* + * memory layout of objects: * 0 : objp * 0 .. cachep->obj_offset - BYTES_PER_WORD - 1: padding. This ensures that * the end of an object is aligned with the end of the real @@ -508,7 +514,8 @@ struct kmem_cache { * redzone word. * cachep->obj_offset: The real object. * cachep->buffer_size - 2* BYTES_PER_WORD: redzone word [BYTES_PER_WORD long] - * cachep->buffer_size - 1* BYTES_PER_WORD: last caller address [BYTES_PER_WORD long] + * cachep->buffer_size - 1* BYTES_PER_WORD: last caller address + * [BYTES_PER_WORD long] */ static int obj_offset(struct kmem_cache *cachep) { @@ -552,8 +559,8 @@ static void **dbg_userword(struct kmem_cache *cachep, void *objp) #endif /* - * Maximum size of an obj (in 2^order pages) - * and absolute limit for the gfp order. + * Maximum size of an obj (in 2^order pages) and absolute limit for the gfp + * order. */ #if defined(CONFIG_LARGE_ALLOCS) #define MAX_OBJ_ORDER 13 /* up to 32Mb */ @@ -573,9 +580,10 @@ static void **dbg_userword(struct kmem_cache *cachep, void *objp) #define BREAK_GFP_ORDER_LO 0 static int slab_break_gfp_order = BREAK_GFP_ORDER_LO; -/* Functions for storing/retrieving the cachep and or slab from the - * global 'mem_map'. These are used to find the slab an obj belongs to. - * With kfree(), these are used to find the cache which an obj belongs to. +/* + * Functions for storing/retrieving the cachep and or slab from the page + * allocator. These are used to find the slab an obj belongs to. With kfree(), + * these are used to find the cache which an obj belongs to. */ static inline void page_set_cache(struct page *page, struct kmem_cache *cache) { @@ -584,6 +592,8 @@ static inline void page_set_cache(struct page *page, struct kmem_cache *cache) static inline struct kmem_cache *page_get_cache(struct page *page) { + if (unlikely(PageCompound(page))) + page = (struct page *)page_private(page); return (struct kmem_cache *)page->lru.next; } @@ -594,6 +604,8 @@ static inline void page_set_slab(struct page *page, struct slab *slab) static inline struct slab *page_get_slab(struct page *page) { + if (unlikely(PageCompound(page))) + page = (struct page *)page_private(page); return (struct slab *)page->lru.prev; } @@ -609,7 +621,21 @@ static inline struct slab *virt_to_slab(const void *obj) return page_get_slab(page); } -/* These are the default caches for kmalloc. Custom caches can have other sizes. */ +static inline void *index_to_obj(struct kmem_cache *cache, struct slab *slab, + unsigned int idx) +{ + return slab->s_mem + cache->buffer_size * idx; +} + +static inline unsigned int obj_to_index(struct kmem_cache *cache, + struct slab *slab, void *obj) +{ + return (unsigned)(obj - slab->s_mem) / cache->buffer_size; +} + +/* + * These are the default caches for kmalloc. Custom caches can have other sizes. + */ struct cache_sizes malloc_sizes[] = { #define CACHE(x) { .cs_size = (x) }, #include <linux/kmalloc_sizes.h> @@ -642,8 +668,6 @@ static struct kmem_cache cache_cache = { .limit = BOOT_CPUCACHE_ENTRIES, .shared = 1, .buffer_size = sizeof(struct kmem_cache), - .flags = SLAB_NO_REAP, - .spinlock = SPIN_LOCK_UNLOCKED, .name = "kmem_cache", #if DEBUG .obj_size = sizeof(struct kmem_cache), @@ -655,8 +679,8 @@ static DEFINE_MUTEX(cache_chain_mutex); static struct list_head cache_chain; /* - * vm_enough_memory() looks at this to determine how many - * slab-allocated pages are possibly freeable under pressure + * vm_enough_memory() looks at this to determine how many slab-allocated pages + * are possibly freeable under pressure * * SLAB_RECLAIM_ACCOUNT turns this on per-slab */ @@ -675,7 +699,8 @@ static enum { static DEFINE_PER_CPU(struct work_struct, reap_work); -static void free_block(struct kmem_cache *cachep, void **objpp, int len, int node); +static void free_block(struct kmem_cache *cachep, void **objpp, int len, + int node); static void enable_cpucache(struct kmem_cache *cachep); static void cache_reap(void *unused); static int __node_shrink(struct kmem_cache *cachep, int node); @@ -685,7 +710,8 @@ static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep) return cachep->array[smp_processor_id()]; } -static inline struct kmem_cache *__find_general_cachep(size_t size, gfp_t gfpflags) +static inline struct kmem_cache *__find_general_cachep(size_t size, + gfp_t gfpflags) { struct cache_sizes *csizep = malloc_sizes; @@ -720,8 +746,9 @@ static size_t slab_mgmt_size(size_t nr_objs, size_t align) return ALIGN(sizeof(struct slab)+nr_objs*sizeof(kmem_bufctl_t), align); } -/* Calculate the number of objects and left-over bytes for a given - buffer size. */ +/* + * Calculate the number of objects and left-over bytes for a given buffer size. + */ static void cache_estimate(unsigned long gfporder, size_t buffer_size, size_t align, int flags, size_t *left_over, unsigned int *num) @@ -782,7 +809,8 @@ static void cache_estimate(unsigned long gfporder, size_t buffer_size, #define slab_error(cachep, msg) __slab_error(__FUNCTION__, cachep, msg) -static void __slab_error(const char *function, struct kmem_cache *cachep, char *msg) +static void __slab_error(const char *function, struct kmem_cache *cachep, + char *msg) { printk(KERN_ERR "slab error in %s(): cache `%s': %s\n", function, cachep->name, msg); @@ -804,7 +832,7 @@ static void init_reap_node(int cpu) node = next_node(cpu_to_node(cpu), node_online_map); if (node == MAX_NUMNODES) - node = 0; + node = first_node(node_online_map); __get_cpu_var(reap_node) = node; } @@ -870,8 +898,33 @@ static struct array_cache *alloc_arraycache(int node, int entries, return nc; } +/* + * Transfer objects in one arraycache to another. + * Locking must be handled by the caller. + * + * Return the number of entries transferred. + */ +static int transfer_objects(struct array_cache *to, + struct array_cache *from, unsigned int max) +{ + /* Figure out how many entries to transfer */ + int nr = min(min(from->avail, max), to->limit - to->avail); + + if (!nr) + return 0; + + memcpy(to->entry + to->avail, from->entry + from->avail -nr, + sizeof(void *) *nr); + + from->avail -= nr; + to->avail += nr; + to->touched = 1; + return nr; +} + #ifdef CONFIG_NUMA static void *__cache_alloc_node(struct kmem_cache *, gfp_t, int); +static void *alternate_node_alloc(struct kmem_cache *, gfp_t); static struct array_cache **alloc_alien_cache(int node, int limit) { @@ -906,10 +959,8 @@ static void free_alien_cache(struct array_cache **ac_ptr) if (!ac_ptr) return; - for_each_node(i) kfree(ac_ptr[i]); - kfree(ac_ptr); } @@ -920,6 +971,13 @@ static void __drain_alien_cache(struct kmem_cache *cachep, if (ac->avail) { spin_lock(&rl3->list_lock); + /* + * Stuff objects into the remote nodes shared array first. + * That way we could avoid the overhead of putting the objects + * into the free lists and getting them back later. + */ + transfer_objects(rl3->shared, ac, ac->limit); + free_block(cachep, ac->entry, ac->avail, node); ac->avail = 0; spin_unlock(&rl3->list_lock); @@ -935,15 +993,16 @@ static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3) if (l3->alien) { struct array_cache *ac = l3->alien[node]; - if (ac && ac->avail) { - spin_lock_irq(&ac->lock); + + if (ac && ac->avail && spin_trylock_irq(&ac->lock)) { __drain_alien_cache(cachep, ac, node); spin_unlock_irq(&ac->lock); } } } -static void drain_alien_cache(struct kmem_cache *cachep, struct array_cache **alien) +static void drain_alien_cache(struct kmem_cache *cachep, + struct array_cache **alien) { int i = 0; struct array_cache *ac; @@ -986,20 +1045,22 @@ static int __devinit cpuup_callback(struct notifier_block *nfb, switch (action) { case CPU_UP_PREPARE: mutex_lock(&cache_chain_mutex); - /* we need to do this right in the beginning since + /* + * We need to do this right in the beginning since * alloc_arraycache's are going to use this list. * kmalloc_node allows us to add the slab to the right * kmem_list3 and not this cpu's kmem_list3 */ list_for_each_entry(cachep, &cache_chain, next) { - /* setup the size64 kmemlist for cpu before we can + /* + * Set up the size64 kmemlist for cpu before we can * begin anything. Make sure some other cpu on this * node has not already allocated this */ if (!cachep->nodelists[node]) { - if (!(l3 = kmalloc_node(memsize, - GFP_KERNEL, node))) + l3 = kmalloc_node(memsize, GFP_KERNEL, node); + if (!l3) goto bad; kmem_list3_init(l3); l3->next_reap = jiffies + REAPTIMEOUT_LIST3 + @@ -1015,13 +1076,15 @@ static int __devinit cpuup_callback(struct notifier_block *nfb, spin_lock_irq(&cachep->nodelists[node]->list_lock); cachep->nodelists[node]->free_limit = - (1 + nr_cpus_node(node)) * - cachep->batchcount + cachep->num; + (1 + nr_cpus_node(node)) * + cachep->batchcount + cachep->num; spin_unlock_irq(&cachep->nodelists[node]->list_lock); } - /* Now we can go ahead with allocating the shared array's - & array cache's */ + /* + * Now we can go ahead with allocating the shared arrays and + * array caches + */ list_for_each_entry(cachep, &cache_chain, next) { struct array_cache *nc; struct array_cache *shared; @@ -1041,7 +1104,6 @@ static int __devinit cpuup_callback(struct notifier_block *nfb, if (!alien) goto bad; cachep->array[cpu] = nc; - l3 = cachep->nodelists[node]; BUG_ON(!l3); @@ -1061,7 +1123,6 @@ static int __devinit cpuup_callback(struct notifier_block *nfb, } #endif spin_unlock_irq(&l3->list_lock); - kfree(shared); free_alien_cache(alien); } @@ -1083,7 +1144,6 @@ static int __devinit cpuup_callback(struct notifier_block *nfb, /* fall thru */ case CPU_UP_CANCELED: mutex_lock(&cache_chain_mutex); - list_for_each_entry(cachep, &cache_chain, next) { struct array_cache *nc; struct array_cache *shared; @@ -1150,7 +1210,7 @@ free_array_cache: #endif } return NOTIFY_OK; - bad: +bad: mutex_unlock(&cache_chain_mutex); return NOTIFY_BAD; } @@ -1160,7 +1220,8 @@ static struct notifier_block cpucache_notifier = { &cpuup_callback, NULL, 0 }; /* * swap the static kmem_list3 with kmalloced memory */ -static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list, int nodeid) +static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list, + int nodeid) { struct kmem_list3 *ptr; @@ -1175,8 +1236,9 @@ static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list, int no local_irq_enable(); } -/* Initialisation. - * Called after the gfp() functions have been enabled, and before smp_init(). +/* + * Initialisation. Called after the page allocator have been initialised and + * before smp_init(). */ void __init kmem_cache_init(void) { @@ -1201,9 +1263,9 @@ void __init kmem_cache_init(void) /* Bootstrap is tricky, because several objects are allocated * from caches that do not exist yet: - * 1) initialize the cache_cache cache: it contains the struct kmem_cache - * structures of all caches, except cache_cache itself: cache_cache - * is statically allocated. + * 1) initialize the cache_cache cache: it contains the struct + * kmem_cache structures of all caches, except cache_cache itself: + * cache_cache is statically allocated. * Initially an __init data area is used for the head array and the * kmem_list3 structures, it's replaced with a kmalloc allocated * array at the end of the bootstrap. @@ -1226,7 +1288,8 @@ void __init kmem_cache_init(void) cache_cache.array[smp_processor_id()] = &initarray_cache.cache; cache_cache.nodelists[numa_node_id()] = &initkmem_list3[CACHE_CACHE]; - cache_cache.buffer_size = ALIGN(cache_cache.buffer_size, cache_line_size()); + cache_cache.buffer_size = ALIGN(cache_cache.buffer_size, + cache_line_size()); for (order = 0; order < MAX_ORDER; order++) { cache_estimate(order, cache_cache.buffer_size, @@ -1245,24 +1308,26 @@ void __init kmem_cache_init(void) sizes = malloc_sizes; names = cache_names; - /* Initialize the caches that provide memory for the array cache - * and the kmem_list3 structures first. - * Without this, further allocations will bug + /* + * Initialize the caches that provide memory for the array cache and the + * kmem_list3 structures first. Without this, further allocations will + * bug. */ sizes[INDEX_AC].cs_cachep = kmem_cache_create(names[INDEX_AC].name, - sizes[INDEX_AC].cs_size, - ARCH_KMALLOC_MINALIGN, - (ARCH_KMALLOC_FLAGS | - SLAB_PANIC), NULL, NULL); + sizes[INDEX_AC].cs_size, + ARCH_KMALLOC_MINALIGN, + ARCH_KMALLOC_FLAGS|SLAB_PANIC, + NULL, NULL); - if (INDEX_AC != INDEX_L3) + if (INDEX_AC != INDEX_L3) { sizes[INDEX_L3].cs_cachep = - kmem_cache_create(names[INDEX_L3].name, - sizes[INDEX_L3].cs_size, - ARCH_KMALLOC_MINALIGN, - (ARCH_KMALLOC_FLAGS | SLAB_PANIC), NULL, - NULL); + kmem_cache_create(names[INDEX_L3].name, + sizes[INDEX_L3].cs_size, + ARCH_KMALLOC_MINALIGN, + ARCH_KMALLOC_FLAGS|SLAB_PANIC, + NULL, NULL); + } while (sizes->cs_size != ULONG_MAX) { /* @@ -1272,13 +1337,13 @@ void __init kmem_cache_init(void) * Note for systems short on memory removing the alignment will * allow tighter packing of the smaller caches. */ - if (!sizes->cs_cachep) + if (!sizes->cs_cachep) { sizes->cs_cachep = kmem_cache_create(names->name, - sizes->cs_size, - ARCH_KMALLOC_MINALIGN, - (ARCH_KMALLOC_FLAGS - | SLAB_PANIC), - NULL, NULL); + sizes->cs_size, + ARCH_KMALLOC_MINALIGN, + ARCH_KMALLOC_FLAGS|SLAB_PANIC, + NULL, NULL); + } /* Inc off-slab bufctl limit until the ceiling is hit. */ if (!(OFF_SLAB(sizes->cs_cachep))) { @@ -1287,13 +1352,11 @@ void __init kmem_cache_init(void) } sizes->cs_dmacachep = kmem_cache_create(names->name_dma, - sizes->cs_size, - ARCH_KMALLOC_MINALIGN, - (ARCH_KMALLOC_FLAGS | - SLAB_CACHE_DMA | - SLAB_PANIC), NULL, - NULL); - + sizes->cs_size, + ARCH_KMALLOC_MINALIGN, + ARCH_KMALLOC_FLAGS|SLAB_CACHE_DMA| + SLAB_PANIC, + NULL, NULL); sizes++; names++; } @@ -1345,20 +1408,22 @@ void __init kmem_cache_init(void) struct kmem_cache *cachep; mutex_lock(&cache_chain_mutex); list_for_each_entry(cachep, &cache_chain, next) - enable_cpucache(cachep); + enable_cpucache(cachep); mutex_unlock(&cache_chain_mutex); } /* Done! */ g_cpucache_up = FULL; - /* Register a cpu startup notifier callback - * that initializes cpu_cache_get for all new cpus + /* + * Register a cpu startup notifier callback that initializes + * cpu_cache_get for all new cpus */ register_cpu_notifier(&cpucache_notifier); - /* The reap timers are started later, with a module init call: - * That part of the kernel is not yet operational. + /* + * The reap timers are started later, with a module init call: That part + * of the kernel is not yet operational. */ } @@ -1366,16 +1431,13 @@ static int __init cpucache_init(void) { int cpu; - /* - * Register the timers that return unneeded - * pages to gfp. + /* + * Register the timers that return unneeded pages to the page allocator */ for_each_online_cpu(cpu) - start_cpu_timer(cpu); - + start_cpu_timer(cpu); return 0; } - __initcall(cpucache_init); /* @@ -1402,7 +1464,7 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid) atomic_add(i, &slab_reclaim_pages); add_page_state(nr_slab, i); while (i--) { - SetPageSlab(page); + __SetPageSlab(page); page++; } return addr; @@ -1418,8 +1480,8 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr) const unsigned long nr_freed = i; while (i--) { - if (!TestClearPageSlab(page)) - BUG(); + BUG_ON(!PageSlab(page)); + __ClearPageSlab(page); page++; } sub_page_state(nr_slab, nr_freed); @@ -1489,9 +1551,8 @@ static void dump_line(char *data, int offset, int limit) { int i; printk(KERN_ERR "%03x:", offset); - for (i = 0; i < limit; i++) { + for (i = 0; i < limit; i++) printk(" %02x", (unsigned char)data[offset + i]); - } printk("\n"); } #endif @@ -1505,15 +1566,15 @@ static void print_objinfo(struct kmem_cache *cachep, void *objp, int lines) if (cachep->flags & SLAB_RED_ZONE) { printk(KERN_ERR "Redzone: 0x%lx/0x%lx.\n", - *dbg_redzone1(cachep, objp), - *dbg_redzone2(cachep, objp)); + *dbg_redzone1(cachep, objp), + *dbg_redzone2(cachep, objp)); } if (cachep->flags & SLAB_STORE_USER) { printk(KERN_ERR "Last user: [<%p>]", - *dbg_userword(cachep, objp)); + *dbg_userword(cachep, objp)); print_symbol("(%s)", - (unsigned long)*dbg_userword(cachep, objp)); + (unsigned long)*dbg_userword(cachep, objp)); printk("\n"); } realobj = (char *)objp + obj_offset(cachep); @@ -1546,8 +1607,8 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp) /* Print header */ if (lines == 0) { printk(KERN_ERR - "Slab corruption: start=%p, len=%d\n", - realobj, size); + "Slab corruption: start=%p, len=%d\n", + realobj, size); print_objinfo(cachep, objp, 0); } /* Hexdump the affected line */ @@ -1568,18 +1629,18 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp) * exist: */ struct slab *slabp = virt_to_slab(objp); - int objnr; + unsigned int objnr; - objnr = (unsigned)(objp - slabp->s_mem) / cachep->buffer_size; + objnr = obj_to_index(cachep, slabp, objp); if (objnr) { - objp = slabp->s_mem + (objnr - 1) * cachep->buffer_size; + objp = index_to_obj(cachep, slabp, objnr - 1); realobj = (char *)objp + obj_offset(cachep); printk(KERN_ERR "Prev obj: start=%p, len=%d\n", realobj, size); print_objinfo(cachep, objp, 2); } if (objnr + 1 < cachep->num) { - objp = slabp->s_mem + (objnr + 1) * cachep->buffer_size; + objp = index_to_obj(cachep, slabp, objnr + 1); realobj = (char *)objp + obj_offset(cachep); printk(KERN_ERR "Next obj: start=%p, len=%d\n", realobj, size); @@ -1591,22 +1652,25 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp) #if DEBUG /** - * slab_destroy_objs - call the registered destructor for each object in - * a slab that is to be destroyed. + * slab_destroy_objs - destroy a slab and its objects + * @cachep: cache pointer being destroyed + * @slabp: slab pointer being destroyed + * + * Call the registered destructor for each object in a slab that is being + * destroyed. */ static void slab_destroy_objs(struct kmem_cache *cachep, struct slab *slabp) { int i; for (i = 0; i < cachep->num; i++) { - void *objp = slabp->s_mem + cachep->buffer_size * i; + void *objp = index_to_obj(cachep, slabp, i); if (cachep->flags & SLAB_POISON) { #ifdef CONFIG_DEBUG_PAGEALLOC - if ((cachep->buffer_size % PAGE_SIZE) == 0 - && OFF_SLAB(cachep)) + if (cachep->buffer_size % PAGE_SIZE == 0 && + OFF_SLAB(cachep)) kernel_map_pages(virt_to_page(objp), - cachep->buffer_size / PAGE_SIZE, - 1); + cachep->buffer_size / PAGE_SIZE, 1); else check_poison_obj(cachep, objp); #else @@ -1631,7 +1695,7 @@ static void slab_destroy_objs(struct kmem_cache *cachep, struct slab *slabp) if (cachep->dtor) { int i; for (i = 0; i < cachep->num; i++) { - void *objp = slabp->s_mem + cachep->buffer_size * i; + void *objp = index_to_obj(cachep, slabp, i); (cachep->dtor) (objp, cachep, 0); } } @@ -1639,9 +1703,13 @@ static void slab_destroy_objs(struct kmem_cache *cachep, struct slab *slabp) #endif /** + * slab_destroy - destroy and release all objects in a slab + * @cachep: cache pointer being destroyed + * @slabp: slab pointer being destroyed + * * Destroy all the objs in a slab, and release the mem back to the system. - * Before calling the slab must have been unlinked from the cache. - * The cache-lock is not held/needed. + * Before calling the slab must have been unlinked from the cache. The + * cache-lock is not held/needed. */ static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp) { @@ -1662,8 +1730,10 @@ static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp) } } -/* For setting up all the kmem_list3s for cache whose buffer_size is same - as size of kmem_list3. */ +/* + * For setting up all the kmem_list3s for cache whose buffer_size is same as + * size of kmem_list3. + */ static void set_up_list3s(struct kmem_cache *cachep, int index) { int node; @@ -1689,13 +1759,13 @@ static void set_up_list3s(struct kmem_cache *cachep, int index) * high order pages for slabs. When the gfp() functions are more friendly * towards high-order requests, this should be changed. */ -static inline size_t calculate_slab_order(struct kmem_cache *cachep, +static size_t calculate_slab_order(struct kmem_cache *cachep, size_t size, size_t align, unsigned long flags) { size_t left_over = 0; int gfporder; - for (gfporder = 0 ; gfporder <= MAX_GFP_ORDER; gfporder++) { + for (gfporder = 0; gfporder <= MAX_GFP_ORDER; gfporder++) { unsigned int num; size_t remainder; @@ -1730,12 +1800,66 @@ static inline size_t calculate_slab_order(struct kmem_cache *cachep, /* * Acceptable internal fragmentation? */ - if ((left_over * 8) <= (PAGE_SIZE << gfporder)) + if (left_over * 8 <= (PAGE_SIZE << gfporder)) break; } return left_over; } +static void setup_cpu_cache(struct kmem_cache *cachep) +{ + if (g_cpucache_up == FULL) { + enable_cpucache(cachep); + return; + } + if (g_cpucache_up == NONE) { + /* + * Note: the first kmem_cache_create must create the cache + * that's used by kmalloc(24), otherwise the creation of + * further caches will BUG(). + */ + cachep->array[smp_processor_id()] = &initarray_generic.cache; + + /* + * If the cache that's used by kmalloc(sizeof(kmem_list3)) is + * the first cache, then we need to set up all its list3s, + * otherwise the creation of further caches will BUG(). + */ + set_up_list3s(cachep, SIZE_AC); + if (INDEX_AC == INDEX_L3) + g_cpucache_up = PARTIAL_L3; + else + g_cpucache_up = PARTIAL_AC; + } else { + cachep->array[smp_processor_id()] = + kmalloc(sizeof(struct arraycache_init), GFP_KERNEL); + + if (g_cpucache_up == PARTIAL_AC) { + set_up_list3s(cachep, SIZE_L3); + g_cpucache_up = PARTIAL_L3; + } else { + int node; + for_each_online_node(node) { + cachep->nodelists[node] = + kmalloc_node(sizeof(struct kmem_list3), + GFP_KERNEL, node); + BUG_ON(!cachep->nodelists[node]); + kmem_list3_init(cachep->nodelists[node]); + } + } + } + cachep->nodelists[numa_node_id()]->next_reap = + jiffies + REAPTIMEOUT_LIST3 + + ((unsigned long)cachep) % REAPTIMEOUT_LIST3; + + cpu_cache_get(cachep)->avail = 0; + cpu_cache_get(cachep)->limit = BOOT_CPUCACHE_ENTRIES; + cpu_cache_get(cachep)->batchcount = 1; + cpu_cache_get(cachep)->touched = 0; + cachep->batchcount = 1; + cachep->limit = BOOT_CPUCACHE_ENTRIES; +} + /** * kmem_cache_create - Create a cache. * @name: A string which is used in /proc/slabinfo to identify this cache. @@ -1751,9 +1875,8 @@ static inline size_t calculate_slab_order(struct kmem_cache *cachep, * and the @dtor is run before the pages are handed back. * * @name must be valid until the cache is destroyed. This implies that - * the module calling this has to destroy the cache before getting - * unloaded. - * + * the module calling this has to destroy the cache before getting unloaded. + * * The flags are * * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5) @@ -1762,16 +1885,14 @@ static inline size_t calculate_slab_order(struct kmem_cache *cachep, * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check * for buffer overruns. * - * %SLAB_NO_REAP - Don't automatically reap this cache when we're under - * memory pressure. - * * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware * cacheline. This can be beneficial if you're counting cycles as closely * as davem. */ struct kmem_cache * kmem_cache_create (const char *name, size_t size, size_t align, - unsigned long flags, void (*ctor)(void*, struct kmem_cache *, unsigned long), + unsigned long flags, + void (*ctor)(void*, struct kmem_cache *, unsigned long), void (*dtor)(void*, struct kmem_cache *, unsigned long)) { size_t left_over, slab_size, ralign; @@ -1781,12 +1902,10 @@ kmem_cache_create (const char *name, size_t size, size_t align, /* * Sanity checks... these are all serious usage bugs. */ - if ((!name) || - in_interrupt() || - (size < BYTES_PER_WORD) || + if (!name || in_interrupt() || (size < BYTES_PER_WORD) || (size > (1 << MAX_OBJ_ORDER) * PAGE_SIZE) || (dtor && !ctor)) { - printk(KERN_ERR "%s: Early error in slab %s\n", - __FUNCTION__, name); + printk(KERN_ERR "%s: Early error in slab %s\n", __FUNCTION__, + name); BUG(); } @@ -1840,8 +1959,7 @@ kmem_cache_create (const char *name, size_t size, size_t align, * above the next power of two: caches with object sizes just above a * power of two have a significant amount of internal fragmentation. */ - if ((size < 4096 - || fls(size - 1) == fls(size - 1 + 3 * BYTES_PER_WORD))) + if (size < 4096 || fls(size - 1) == fls(size-1 + 3 * BYTES_PER_WORD)) flags |= SLAB_RED_ZONE | SLAB_STORE_USER; if (!(flags & SLAB_DESTROY_BY_RCU)) flags |= SLAB_POISON; @@ -1853,13 +1971,14 @@ kmem_cache_create (const char *name, size_t size, size_t align, BUG_ON(dtor); /* - * Always checks flags, a caller might be expecting debug - * support which isn't available. + * Always checks flags, a caller might be expecting debug support which + * isn't available. */ if (flags & ~CREATE_MASK) BUG(); - /* Check that size is in terms of words. This is needed to avoid + /* + * Check that size is in terms of words. This is needed to avoid * unaligned accesses for some archs when redzoning is used, and makes * sure any on-slab bufctl's are also correctly aligned. |