aboutsummaryrefslogtreecommitdiff
path: root/mm/slab.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/slab.c')
-rw-r--r--mm/slab.c2817
1 files changed, 1331 insertions, 1486 deletions
diff --git a/mm/slab.c b/mm/slab.c
index b1e40dafbab..3070b929a1b 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -68,7 +68,7 @@
* Further notes from the original documentation:
*
* 11 April '97. Started multi-threading - markhe
- * The global cache-chain is protected by the mutex 'cache_chain_mutex'.
+ * The global cache-chain is protected by the mutex 'slab_mutex'.
* The sem is only needed when accessing/extending the cache-chain, which
* can never happen inside an interrupt (kmem_cache_create(),
* kmem_cache_shrink() and kmem_cache_reap()).
@@ -115,11 +115,20 @@
#include <linux/debugobjects.h>
#include <linux/kmemcheck.h>
#include <linux/memory.h>
+#include <linux/prefetch.h>
+
+#include <net/sock.h>
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
#include <asm/page.h>
+#include <trace/events/kmem.h>
+
+#include "internal.h"
+
+#include "slab.h"
+
/*
* DEBUG - 1 for kmem_cache_create() to honour; SLAB_RED_ZONE & SLAB_POISON.
* 0 for faster, smaller code (especially in the critical paths).
@@ -148,85 +157,22 @@
#define ARCH_KMALLOC_FLAGS SLAB_HWCACHE_ALIGN
#endif
-/* Legal flag mask for kmem_cache_create(). */
-#if DEBUG
-# define CREATE_MASK (SLAB_RED_ZONE | \
- SLAB_POISON | SLAB_HWCACHE_ALIGN | \
- SLAB_CACHE_DMA | \
- SLAB_STORE_USER | \
- SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
- SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \
- SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE | SLAB_NOTRACK)
+#define FREELIST_BYTE_INDEX (((PAGE_SIZE >> BITS_PER_BYTE) \
+ <= SLAB_OBJ_MIN_SIZE) ? 1 : 0)
+
+#if FREELIST_BYTE_INDEX
+typedef unsigned char freelist_idx_t;
#else
-# define CREATE_MASK (SLAB_HWCACHE_ALIGN | \
- SLAB_CACHE_DMA | \
- SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
- SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \
- SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE | SLAB_NOTRACK)
+typedef unsigned short freelist_idx_t;
#endif
-/*
- * kmem_bufctl_t:
- *
- * Bufctl's are used for linking objs within a slab
- * linked offsets.
- *
- * This implementation relies on "struct page" for locating the cache &
- * slab an object belongs to.
- * This allows the bufctl structure to be small (one int), but limits
- * the number of objects a slab (not a cache) can contain when off-slab
- * bufctls are used. The limit is the size of the largest general cache
- * that does not use off-slab slabs.
- * For 32bit archs with 4 kB pages, is this 56.
- * This is not serious, as it is only for large objects, when it is unwise
- * to have too many per slab.
- * Note: This limit can be raised by introducing a general cache whose size
- * is less than 512 (PAGE_SIZE<<3), but greater than 256.
- */
-
-typedef unsigned int kmem_bufctl_t;
-#define BUFCTL_END (((kmem_bufctl_t)(~0U))-0)
-#define BUFCTL_FREE (((kmem_bufctl_t)(~0U))-1)
-#define BUFCTL_ACTIVE (((kmem_bufctl_t)(~0U))-2)
-#define SLAB_LIMIT (((kmem_bufctl_t)(~0U))-3)
+#define SLAB_OBJ_MAX_NUM ((1 << sizeof(freelist_idx_t) * BITS_PER_BYTE) - 1)
/*
- * struct slab
- *
- * Manages the objs in a slab. Placed either at the beginning of mem allocated
- * for a slab, or allocated from an general cache.
- * Slabs are chained into three list: fully used, partial, fully free slabs.
+ * true if a page was allocated from pfmemalloc reserves for network-based
+ * swap
*/
-struct slab {
- struct list_head list;
- unsigned long colouroff;
- void *s_mem; /* including colour offset */
- unsigned int inuse; /* num of objs active in slab */
- kmem_bufctl_t free;
- unsigned short nodeid;
-};
-
-/*
- * struct slab_rcu
- *
- * slab_destroy on a SLAB_DESTROY_BY_RCU cache uses this structure to
- * arrange for kmem_freepages to be called via RCU. This is useful if
- * we need to approach a kernel structure obliquely, from its address
- * obtained without the usual locking. We can lock the structure to
- * stabilize it and check it's still at the given address, only if we
- * can be sure that the memory has not been meanwhile reused for some
- * other kind of object (which our subsystem's lock might corrupt).
- *
- * rcu_read_lock before reading the address, then rcu_read_unlock after
- * taking the spinlock within the structure expected at that address.
- *
- * We assume struct slab_rcu can overlay struct slab when destroying.
- */
-struct slab_rcu {
- struct rcu_head head;
- struct kmem_cache *cachep;
- void *addr;
-};
+static bool pfmemalloc_active __read_mostly;
/*
* struct array_cache
@@ -250,9 +196,30 @@ struct array_cache {
* Must have this definition in here for the proper
* alignment of array_cache. Also simplifies accessing
* the entries.
+ *
+ * Entries should not be directly dereferenced as
+ * entries belonging to slabs marked pfmemalloc will
+ * have the lower bits set SLAB_OBJ_PFMEMALLOC
*/
};
+#define SLAB_OBJ_PFMEMALLOC 1
+static inline bool is_obj_pfmemalloc(void *objp)
+{
+ return (unsigned long)objp & SLAB_OBJ_PFMEMALLOC;
+}
+
+static inline void set_obj_pfmemalloc(void **objp)
+{
+ *objp = (void *)((unsigned long)*objp | SLAB_OBJ_PFMEMALLOC);
+ return;
+}
+
+static inline void clear_obj_pfmemalloc(void **objp)
+{
+ *objp = (void *)((unsigned long)*objp & ~SLAB_OBJ_PFMEMALLOC);
+}
+
/*
* bootstrap: The caches do not work without cpuarrays anymore, but the
* cpuarrays are allocated from the generic caches...
@@ -264,68 +231,27 @@ struct arraycache_init {
};
/*
- * The slab lists for all objects.
- */
-struct kmem_list3 {
- struct list_head slabs_partial; /* partial list first, better asm code */
- struct list_head slabs_full;
- struct list_head slabs_free;
- unsigned long free_objects;
- unsigned int free_limit;
- unsigned int colour_next; /* Per-node cache coloring */
- spinlock_t list_lock;
- struct array_cache *shared; /* shared per node */
- struct array_cache **alien; /* on other nodes */
- unsigned long next_reap; /* updated without locking */
- int free_touched; /* updated without locking */
-};
-
-/*
* Need this for bootstrapping a per node allocator.
*/
#define NUM_INIT_LISTS (3 * MAX_NUMNODES)
-struct kmem_list3 __initdata initkmem_list3[NUM_INIT_LISTS];
+static struct kmem_cache_node __initdata init_kmem_cache_node[NUM_INIT_LISTS];
#define CACHE_CACHE 0
#define SIZE_AC MAX_NUMNODES
-#define SIZE_L3 (2 * MAX_NUMNODES)
+#define SIZE_NODE (2 * MAX_NUMNODES)
static int drain_freelist(struct kmem_cache *cache,
- struct kmem_list3 *l3, int tofree);
+ struct kmem_cache_node *n, int tofree);
static void free_block(struct kmem_cache *cachep, void **objpp, int len,
int node);
static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp);
static void cache_reap(struct work_struct *unused);
-/*
- * This function must be completely optimized away if a constant is passed to
- * it. Mostly the same as what is in linux/slab.h except it returns an index.
- */
-static __always_inline int index_of(const size_t size)
-{
- extern void __bad_size(void);
-
- if (__builtin_constant_p(size)) {
- int i = 0;
-
-#define CACHE(x) \
- if (size <=x) \
- return i; \
- else \
- i++;
-#include <linux/kmalloc_sizes.h>
-#undef CACHE
- __bad_size();
- } else
- __bad_size();
- return 0;
-}
-
static int slab_early_init = 1;
-#define INDEX_AC index_of(sizeof(struct arraycache_init))
-#define INDEX_L3 index_of(sizeof(struct kmem_list3))
+#define INDEX_AC kmalloc_index(sizeof(struct arraycache_init))
+#define INDEX_NODE kmalloc_index(sizeof(struct kmem_cache_node))
-static void kmem_list3_init(struct kmem_list3 *parent)
+static void kmem_cache_node_init(struct kmem_cache_node *parent)
{
INIT_LIST_HEAD(&parent->slabs_full);
INIT_LIST_HEAD(&parent->slabs_partial);
@@ -341,7 +267,7 @@ static void kmem_list3_init(struct kmem_list3 *parent)
#define MAKE_LIST(cachep, listp, slab, nodeid) \
do { \
INIT_LIST_HEAD(listp); \
- list_splice(&(cachep->nodelists[nodeid]->slab), listp); \
+ list_splice(&(cachep->node[nodeid]->slab), listp); \
} while (0)
#define MAKE_ALL_LISTS(cachep, ptr, nodeid) \
@@ -362,8 +288,8 @@ static void kmem_list3_init(struct kmem_list3 *parent)
* OTOH the cpuarrays can contain lots of objects,
* which could lock up otherwise freeable slabs.
*/
-#define REAPTIMEOUT_CPUC (2*HZ)
-#define REAPTIMEOUT_LIST3 (4*HZ)
+#define REAPTIMEOUT_AC (2*HZ)
+#define REAPTIMEOUT_NODE (4*HZ)
#if STATS
#define STATS_INC_ACTIVE(x) ((x)->num_active++)
@@ -418,8 +344,8 @@ static void kmem_list3_init(struct kmem_list3 *parent)
* cachep->obj_offset - BYTES_PER_WORD .. cachep->obj_offset - 1:
* redzone word.
* cachep->obj_offset: The real object.
- * cachep->buffer_size - 2* BYTES_PER_WORD: redzone word [BYTES_PER_WORD long]
- * cachep->buffer_size - 1* BYTES_PER_WORD: last caller address
+ * cachep->size - 2* BYTES_PER_WORD: redzone word [BYTES_PER_WORD long]
+ * cachep->size - 1* BYTES_PER_WORD: last caller address
* [BYTES_PER_WORD long]
*/
static int obj_offset(struct kmem_cache *cachep)
@@ -427,11 +353,6 @@ static int obj_offset(struct kmem_cache *cachep)
return cachep->obj_offset;
}
-static int obj_size(struct kmem_cache *cachep)
-{
- return cachep->obj_size;
-}
-
static unsigned long long *dbg_redzone1(struct kmem_cache *cachep, void *objp)
{
BUG_ON(!(cachep->flags & SLAB_RED_ZONE));
@@ -443,163 +364,109 @@ static unsigned long long *dbg_redzone2(struct kmem_cache *cachep, void *objp)
{
BUG_ON(!(cachep->flags & SLAB_RED_ZONE));
if (cachep->flags & SLAB_STORE_USER)
- return (unsigned long long *)(objp + cachep->buffer_size -
+ return (unsigned long long *)(objp + cachep->size -
sizeof(unsigned long long) -
REDZONE_ALIGN);
- return (unsigned long long *) (objp + cachep->buffer_size -
+ return (unsigned long long *) (objp + cachep->size -
sizeof(unsigned long long));
}
static void **dbg_userword(struct kmem_cache *cachep, void *objp)
{
BUG_ON(!(cachep->flags & SLAB_STORE_USER));
- return (void **)(objp + cachep->buffer_size - BYTES_PER_WORD);
+ return (void **)(objp + cachep->size - BYTES_PER_WORD);
}
#else
#define obj_offset(x) 0
-#define obj_size(cachep) (cachep->buffer_size)
#define dbg_redzone1(cachep, objp) ({BUG(); (unsigned long long *)NULL;})
#define dbg_redzone2(cachep, objp) ({BUG(); (unsigned long long *)NULL;})
#define dbg_userword(cachep, objp) ({BUG(); (void **)NULL;})
#endif
-#ifdef CONFIG_TRACING
-size_t slab_buffer_size(struct kmem_cache *cachep)
-{
- return cachep->buffer_size;
-}
-EXPORT_SYMBOL(slab_buffer_size);
-#endif
+#define OBJECT_FREE (0)
+#define OBJECT_ACTIVE (1)
-/*
- * Do not go above this order unless 0 objects fit into the slab.
- */
-#define BREAK_GFP_ORDER_HI 1
-#define BREAK_GFP_ORDER_LO 0
-static int slab_break_gfp_order = BREAK_GFP_ORDER_LO;
+#ifdef CONFIG_DEBUG_SLAB_LEAK
-/*
- * Functions for storing/retrieving the cachep and or slab from the page
- * allocator. These are used to find the slab an obj belongs to. With kfree(),
- * these are used to find the cache which an obj belongs to.
- */
-static inline void page_set_cache(struct page *page, struct kmem_cache *cache)
+static void set_obj_status(struct page *page, int idx, int val)
{
- page->lru.next = (struct list_head *)cache;
-}
+ int freelist_size;
+ char *status;
+ struct kmem_cache *cachep = page->slab_cache;
-static inline struct kmem_cache *page_get_cache(struct page *page)
-{
- page = compound_head(page);
- BUG_ON(!PageSlab(page));
- return (struct kmem_cache *)page->lru.next;
+ freelist_size = cachep->num * sizeof(freelist_idx_t);
+ status = (char *)page->freelist + freelist_size;
+ status[idx] = val;
}
-static inline void page_set_slab(struct page *page, struct slab *slab)
+static inline unsigned int get_obj_status(struct page *page, int idx)
{
- page->lru.prev = (struct list_head *)slab;
-}
+ int freelist_size;
+ char *status;
+ struct kmem_cache *cachep = page->slab_cache;
-static inline struct slab *page_get_slab(struct page *page)
-{
- BUG_ON(!PageSlab(page));
- return (struct slab *)page->lru.prev;
-}
+ freelist_size = cachep->num * sizeof(freelist_idx_t);
+ status = (char *)page->freelist + freelist_size;
-static inline struct kmem_cache *virt_to_cache(const void *obj)
-{
- struct page *page = virt_to_head_page(obj);
- return page_get_cache(page);
+ return status[idx];
}
-static inline struct slab *virt_to_slab(const void *obj)
+#else
+static inline void set_obj_status(struct page *page, int idx, int val) {}
+
+#endif
+
+/*
+ * Do not go above this order unless 0 objects fit into the slab or
+ * overridden on the command line.
+ */
+#define SLAB_MAX_ORDER_HI 1
+#define SLAB_MAX_ORDER_LO 0
+static int slab_max_order = SLAB_MAX_ORDER_LO;
+static bool slab_max_order_set __initdata;
+
+static inline struct kmem_cache *virt_to_cache(const void *obj)
{
struct page *page = virt_to_head_page(obj);
- return page_get_slab(page);
+ return page->slab_cache;
}
-static inline void *index_to_obj(struct kmem_cache *cache, struct slab *slab,
+static inline void *index_to_obj(struct kmem_cache *cache, struct page *page,
unsigned int idx)
{
- return slab->s_mem + cache->buffer_size * idx;
+ return page->s_mem + cache->size * idx;
}
/*
- * We want to avoid an expensive divide : (offset / cache->buffer_size)
- * Using the fact that buffer_size is a constant for a particular cache,
- * we can replace (offset / cache->buffer_size) by
+ * We want to avoid an expensive divide : (offset / cache->size)
+ * Using the fact that size is a constant for a particular cache,
+ * we can replace (offset / cache->size) by
* reciprocal_divide(offset, cache->reciprocal_buffer_size)
*/
static inline unsigned int obj_to_index(const struct kmem_cache *cache,
- const struct slab *slab, void *obj)
+ const struct page *page, void *obj)
{
- u32 offset = (obj - slab->s_mem);
+ u32 offset = (obj - page->s_mem);
return reciprocal_divide(offset, cache->reciprocal_buffer_size);
}
-/*
- * These are the default caches for kmalloc. Custom caches can have other sizes.
- */
-struct cache_sizes malloc_sizes[] = {
-#define CACHE(x) { .cs_size = (x) },
-#include <linux/kmalloc_sizes.h>
- CACHE(ULONG_MAX)
-#undef CACHE
-};
-EXPORT_SYMBOL(malloc_sizes);
-
-/* Must match cache_sizes above. Out of line to keep cache footprint low. */
-struct cache_names {
- char *name;
- char *name_dma;
-};
-
-static struct cache_names __initdata cache_names[] = {
-#define CACHE(x) { .name = "size-" #x, .name_dma = "size-" #x "(DMA)" },
-#include <linux/kmalloc_sizes.h>
- {NULL,}
-#undef CACHE
-};
-
-static struct arraycache_init initarray_cache __initdata =
- { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
static struct arraycache_init initarray_generic =
{ {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
/* internal cache of cache description objs */
-static struct kmem_cache cache_cache = {
+static struct kmem_cache kmem_cache_boot = {
.batchcount = 1,
.limit = BOOT_CPUCACHE_ENTRIES,
.shared = 1,
- .buffer_size = sizeof(struct kmem_cache),
+ .size = sizeof(struct kmem_cache),
.name = "kmem_cache",
};
#define BAD_ALIEN_MAGIC 0x01020304ul
-/*
- * chicken and egg problem: delay the per-cpu array allocation
- * until the general caches are up.
- */
-static enum {
- NONE,
- PARTIAL_AC,
- PARTIAL_L3,
- EARLY,
- FULL
-} g_cpucache_up;
-
-/*
- * used by boot code to determine if it can use slab based allocator
- */
-int slab_is_available(void)
-{
- return g_cpucache_up >= EARLY;
-}
-
#ifdef CONFIG_LOCKDEP
/*
@@ -616,40 +483,92 @@ int slab_is_available(void)
static struct lock_class_key on_slab_l3_key;
static struct lock_class_key on_slab_alc_key;
+static struct lock_class_key debugobj_l3_key;
+static struct lock_class_key debugobj_alc_key;
+
+static void slab_set_lock_classes(struct kmem_cache *cachep,
+ struct lock_class_key *l3_key, struct lock_class_key *alc_key,
+ int q)
+{
+ struct array_cache **alc;
+ struct kmem_cache_node *n;
+ int r;
+
+ n = cachep->node[q];
+ if (!n)
+ return;
+
+ lockdep_set_class(&n->list_lock, l3_key);
+ alc = n->alien;
+ /*
+ * FIXME: This check for BAD_ALIEN_MAGIC
+ * should go away when common slab code is taught to
+ * work even without alien caches.
+ * Currently, non NUMA code returns BAD_ALIEN_MAGIC
+ * for alloc_alien_cache,
+ */
+ if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC)
+ return;
+ for_each_node(r) {
+ if (alc[r])
+ lockdep_set_class(&alc[r]->lock, alc_key);
+ }
+}
+
+static void slab_set_debugobj_lock_classes_node(struct kmem_cache *cachep, int node)
+{
+ slab_set_lock_classes(cachep, &debugobj_l3_key, &debugobj_alc_key, node);
+}
+
+static void slab_set_debugobj_lock_classes(struct kmem_cache *cachep)
+{
+ int node;
+
+ for_each_online_node(node)
+ slab_set_debugobj_lock_classes_node(cachep, node);
+}
+
static void init_node_lock_keys(int q)
{
- struct cache_sizes *s = malloc_sizes;
+ int i;
- if (g_cpucache_up != FULL)
+ if (slab_state < UP)
return;
- for (s = malloc_sizes; s->cs_size != ULONG_MAX; s++) {
- struct array_cache **alc;
- struct kmem_list3 *l3;
- int r;
+ for (i = 1; i <= KMALLOC_SHIFT_HIGH; i++) {
+ struct kmem_cache_node *n;
+ struct kmem_cache *cache = kmalloc_caches[i];
- l3 = s->cs_cachep->nodelists[q];
- if (!l3 || OFF_SLAB(s->cs_cachep))
+ if (!cache)
continue;
- lockdep_set_class(&l3->list_lock, &on_slab_l3_key);
- alc = l3->alien;
- /*
- * FIXME: This check for BAD_ALIEN_MAGIC
- * should go away when common slab code is taught to
- * work even without alien caches.
- * Currently, non NUMA code returns BAD_ALIEN_MAGIC
- * for alloc_alien_cache,
- */
- if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC)
+
+ n = cache->node[q];
+ if (!n || OFF_SLAB(cache))
continue;
- for_each_node(r) {
- if (alc[r])
- lockdep_set_class(&alc[r]->lock,
- &on_slab_alc_key);
- }
+
+ slab_set_lock_classes(cache, &on_slab_l3_key,
+ &on_slab_alc_key, q);
}
}
+static void on_slab_lock_classes_node(struct kmem_cache *cachep, int q)
+{
+ if (!cachep->node[q])
+ return;
+
+ slab_set_lock_classes(cachep, &on_slab_l3_key,
+ &on_slab_alc_key, q);
+}
+
+static inline void on_slab_lock_classes(struct kmem_cache *cachep)
+{
+ int node;
+
+ VM_BUG_ON(OFF_SLAB(cachep));
+ for_each_node(node)
+ on_slab_lock_classes_node(cachep, node);
+}
+
static inline void init_lock_keys(void)
{
int node;
@@ -665,13 +584,23 @@ static void init_node_lock_keys(int q)
static inline void init_lock_keys(void)
{
}
-#endif
-/*
- * Guard access to the cache-chain.
- */
-static DEFINE_MUTEX(cache_chain_mutex);
-static struct list_head cache_chain;
+static inline void on_slab_lock_classes(struct kmem_cache *cachep)
+{
+}
+
+static inline void on_slab_lock_classes_node(struct kmem_cache *cachep, int node)
+{
+}
+
+static void slab_set_debugobj_lock_classes_node(struct kmem_cache *cachep, int node)
+{
+}
+
+static void slab_set_debugobj_lock_classes(struct kmem_cache *cachep)
+{
+}
+#endif
static DEFINE_PER_CPU(struct delayed_work, slab_reap_work);
@@ -680,44 +609,50 @@ static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
return cachep->array[smp_processor_id()];
}
-static inline struct kmem_cache *__find_general_cachep(size_t size,
- gfp_t gfpflags)
+static size_t calculate_freelist_size(int nr_objs, size_t align)
{
- struct cache_sizes *csizep = malloc_sizes;
+ size_t freelist_size;
-#if DEBUG
- /* This happens if someone tries to call
- * kmem_cache_create(), or __kmalloc(), before
- * the generic caches are initialized.
- */
- BUG_ON(malloc_sizes[INDEX_AC].cs_cachep == NULL);
-#endif
- if (!size)
- return ZERO_SIZE_PTR;
+ freelist_size = nr_objs * sizeof(freelist_idx_t);
+ if (IS_ENABLED(CONFIG_DEBUG_SLAB_LEAK))
+ freelist_size += nr_objs * sizeof(char);
- while (size > csizep->cs_size)
- csizep++;
+ if (align)
+ freelist_size = ALIGN(freelist_size, align);
- /*
- * Really subtle: The last entry with cs->cs_size==ULONG_MAX
- * has cs_{dma,}cachep==NULL. Thus no special case
- * for large kmalloc calls required.
- */
-#ifdef CONFIG_ZONE_DMA
- if (unlikely(gfpflags & GFP_DMA))
- return csizep->cs_dmacachep;
-#endif
- return csizep->cs_cachep;
+ return freelist_size;
}
-static struct kmem_cache *kmem_find_general_cachep(size_t size, gfp_t gfpflags)
+static int calculate_nr_objs(size_t slab_size, size_t buffer_size,
+ size_t idx_size, size_t align)
{
- return __find_general_cachep(size, gfpflags);
-}
+ int nr_objs;
+ size_t remained_size;
+ size_t freelist_size;
+ int extra_space = 0;
-static size_t slab_mgmt_size(size_t nr_objs, size_t align)
-{
- return ALIGN(sizeof(struct slab)+nr_objs*sizeof(kmem_bufctl_t), align);
+ if (IS_ENABLED(CONFIG_DEBUG_SLAB_LEAK))
+ extra_space = sizeof(char);
+ /*
+ * Ignore padding for the initial guess. The padding
+ * is at most @align-1 bytes, and @buffer_size is at
+ * least @align. In the worst case, this result will
+ * be one greater than the number of objects that fit
+ * into the memory allocation when taking the padding
+ * into account.
+ */
+ nr_objs = slab_size / (buffer_size + idx_size + extra_space);
+
+ /*
+ * This calculated number will be either the right
+ * amount, or one greater than what we want.
+ */
+ remained_size = slab_size - nr_objs * buffer_size;
+ freelist_size = calculate_freelist_size(nr_objs, align);
+ if (remained_size < freelist_size)
+ nr_objs--;
+
+ return nr_objs;
}
/*
@@ -736,8 +671,7 @@ static void cache_estimate(unsigned long gfporder, size_t buffer_size,
* on it. For the latter case, the memory allocated for a
* slab is used for:
*
- * - The struct slab
- * - One kmem_bufctl_t for each object
+ * - One unsigned int for each object
* - Padding to respect alignment of @align
* - @buffer_size bytes for each object
*
@@ -750,37 +684,16 @@ static void cache_estimate(unsigned long gfporder, size_t buffer_size,
mgmt_size = 0;
nr_objs = slab_size / buffer_size;
- if (nr_objs > SLAB_LIMIT)
- nr_objs = SLAB_LIMIT;
} else {
- /*
- * Ignore padding for the initial guess. The padding
- * is at most @align-1 bytes, and @buffer_size is at
- * least @align. In the worst case, this result will
- * be one greater than the number of objects that fit
- * into the memory allocation when taking the padding
- * into account.
- */
- nr_objs = (slab_size - sizeof(struct slab)) /
- (buffer_size + sizeof(kmem_bufctl_t));
-
- /*
- * This calculated number will be either the right
- * amount, or one greater than what we want.
- */
- if (slab_mgmt_size(nr_objs, align) + nr_objs*buffer_size
- > slab_size)
- nr_objs--;
-
- if (nr_objs > SLAB_LIMIT)
- nr_objs = SLAB_LIMIT;
-
- mgmt_size = slab_mgmt_size(nr_objs, align);
+ nr_objs = calculate_nr_objs(slab_size, buffer_size,
+ sizeof(freelist_idx_t), align);
+ mgmt_size = calculate_freelist_size(nr_objs, align);
}
*num = nr_objs;
*left_over = slab_size - nr_objs*buffer_size - mgmt_size;
}
+#if DEBUG
#define slab_error(cachep, msg) __slab_error(__func__, cachep, msg)
static void __slab_error(const char *function, struct kmem_cache *cachep,
@@ -789,7 +702,9 @@ static void __slab_error(const char *function, struct kmem_cache *cachep,
printk(KERN_ERR "slab error in %s(): cache `%s': %s\n",
function, cachep->name, msg);
dump_stack();
+ add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
}
+#endif
/*
* By default on NUMA we use alien caches to stage the freeing of
@@ -807,6 +722,17 @@ static int __init noaliencache_setup(char *s)
}
__setup("noaliencache", noaliencache_setup);
+static int __init slab_max_order_setup(char *str)
+{
+ get_option(&str, &slab_max_order);
+ slab_max_order = slab_max_order < 0 ? 0 :
+ min(slab_max_order, MAX_ORDER - 1);
+ slab_max_order_set = true;
+
+ return 1;
+}
+__setup("slab_max_order=", slab_max_order_setup);
+
#ifdef CONFIG_NUMA
/*
* Special reaping functions for NUMA systems called from cache_reap().
@@ -829,12 +755,12 @@ static void init_reap_node(int cpu)
static void next_reap_node(void)
{
- int node = __get_cpu_var(slab_reap_node);
+ int node = __this_cpu_read(slab_reap_node);
node = next_node(node, node_online_map);
if (unlikely(node >= MAX_NUMNODES))
node = first_node(node_online_map);
- __get_cpu_var(slab_reap_node) = node;
+ __this_cpu_write(slab_reap_node, node);
}
#else
@@ -849,7 +775,7 @@ static void next_reap_node(void)
* the CPUs getting into lockstep and contending for the global cache chain
* lock.
*/
-static void __cpuinit start_cpu_timer(int cpu)
+static void start_cpu_timer(int cpu)
{
struct delayed_work *reap_work = &per_cpu(slab_reap_work, cpu);
@@ -860,7 +786,7 @@ static void __cpuinit start_cpu_timer(int cpu)
*/
if (keventd_up() && reap_work->work.func == NULL) {
init_reap_node(cpu);
- INIT_DELAYED_WORK_DEFERRABLE(reap_work, cache_reap);
+ INIT_DEFERRABLE_WORK(reap_work, cache_reap);
schedule_delayed_work_on(cpu, reap_work,
__round_jiffies_relative(HZ, cpu));
}
@@ -875,7 +801,7 @@ static struct array_cache *alloc_arraycache(int node, int entries,
nc = kmalloc_node(memsize, gfp, node);
/*
* The array_cache structures contain pointers to free object.
- * However, when such objects are allocated or transfered to another
+ * However, when such objects are allocated or transferred to another
* cache the pointers are not cleared and they could be counted as
* valid references during a kmemleak scan. Therefore, kmemleak must
* not scan such objects.
@@ -891,6 +817,122 @@ static struct array_cache *alloc_arraycache(int node, int entries,
return nc;
}
+static inline bool is_slab_pfmemalloc(struct page *page)
+{
+ return PageSlabPfmemalloc(page);
+}
+
+/* Clears pfmemalloc_active if no slabs have pfmalloc set */
+static void recheck_pfmemalloc_active(struct kmem_cache *cachep,
+ struct array_cache *ac)
+{
+ struct kmem_cache_node *n = cachep->node[numa_mem_id()];
+ struct page *page;
+ unsigned long flags;
+
+ if (!pfmemalloc_active)
+ return;
+
+ spin_lock_irqsave(&n->list_lock, flags);
+ list_for_each_entry(page, &n->slabs_full, lru)
+ if (is_slab_pfmemalloc(page))
+ goto out;
+
+ list_for_each_entry(page, &n->slabs_partial, lru)
+ if (is_slab_pfmemalloc(page))
+ goto out;
+
+ list_for_each_entry(page, &n->slabs_free, lru)
+ if (is_slab_pfmemalloc(page))
+ goto out;
+
+ pfmemalloc_active = false;
+out:
+ spin_unlock_irqrestore(&n->list_lock, flags);
+}
+
+static void *__ac_get_obj(struct kmem_cache *cachep, struct array_cache *ac,
+ gfp_t flags, bool force_refill)
+{
+ int i;
+ void *objp = ac->entry[--ac->avail];
+
+ /* Ensure the caller is allowed to use objects from PFMEMALLOC slab */
+ if (unlikely(is_obj_pfmemalloc(objp))) {
+ struct kmem_cache_node *n;
+
+ if (gfp_pfmemalloc_allowed(flags)) {
+ clear_obj_pfmemalloc(&objp);
+ return objp;
+ }
+
+ /* The caller cannot use PFMEMALLOC objects, find another one */
+ for (i = 0; i < ac->avail; i++) {
+ /* If a !PFMEMALLOC object is found, swap them */
+ if (!is_obj_pfmemalloc(ac->entry[i])) {
+ objp = ac->entry[i];
+ ac->entry[i] = ac->entry[ac->avail];
+ ac->entry[ac->avail] = objp;
+ return objp;
+ }
+ }
+
+ /*
+ * If there are empty slabs on the slabs_free list and we are
+ * being forced to refill the cache, mark this one !pfmemalloc.
+ */
+ n = cachep->node[numa_mem_id()];
+ if (!list_empty(&n->slabs_free) && force_refill) {
+ struct page *page = virt_to_head_page(objp);
+ ClearPageSlabPfmemalloc(page);
+ clear_obj_pfmemalloc(&objp);
+ recheck_pfmemalloc_active(cachep, ac);
+ return objp;
+ }
+
+ /* No !PFMEMALLOC objects available */
+ ac->avail++;
+ objp = NULL;
+ }
+
+ return objp;
+}
+
+static inline void *ac_get_obj(struct kmem_cache *cachep,
+ struct array_cache *ac, gfp_t flags, bool force_refill)
+{
+ void *objp;
+
+ if (unlikely(sk_memalloc_socks()))
+ objp = __ac_get_obj(cachep, ac, flags, force_refill);
+ else
+ objp = ac->entry[--ac->avail];
+
+ return objp;
+}
+
+static void *__ac_put_obj(struct kmem_cache *cachep, struct array_cache *ac,
+ void *objp)
+{
+ if (unlikely(pfmemalloc_active)) {
+ /* Some pfmemalloc slabs exist, check if this is one */
+ struct page *page = virt_to_head_page(objp);
+ if (PageSlabPfmemalloc(page))
+ set_obj_pfmemalloc(&objp);
+ }
+
+ return objp;
+}
+
+static inline void ac_put_obj(struct kmem_cache *cachep, struct array_cache *ac,
+ void *objp)
+{
+ if (unlikely(sk_memalloc_socks()))
+ objp = __ac_put_obj(cachep, ac, objp);
+
+ ac->entry[ac->avail++] = objp;
+}
+
/*
* Transfer objects in one arraycache to another.
* Locking must be handled by the caller.
@@ -917,7 +959,7 @@ static int transfer_objects(struct array_cache *to,
#ifndef CONFIG_NUMA
#define drain_alien_cache(cachep, alien) do { } while (0)
-#define reap_alien(cachep, l3) do { } while (0)
+#define reap_alien(cachep, n) do { } while (0)
static inline struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp)
{
@@ -989,33 +1031,33 @@ static void free_alien_cache(struct array_cache **ac_ptr)
static void __drain_alien_cache(struct kmem_cache *cachep,
struct array_cache *ac, int node)
{
- struct kmem_list3 *rl3 = cachep->nodelists[node];
+ struct kmem_cache_node *n = cachep->node[node];
if (ac->avail) {
- spin_lock(&rl3->list_lock);
+ spin_lock(&n->list_lock);
/*
* Stuff objects into the remote nodes shared array first.
* That way we could avoid the overhead of putting the objects
* into the free lists and getting them back later.
*/
- if (rl3->shared)
- transfer_objects(rl3->shared, ac, ac->limit);
+ if (n->shared)
+ transfer_objects(n->shared, ac, ac->limit);
free_block(cachep, ac->entry, ac->avail, node);
ac->avail = 0;
- spin_unlock(&rl3->list_lock);
+ spin_unlock(&n->list_lock);
}
}
/*
* Called from cache_reap() to regularly drain alien caches round robin.
*/
-static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3)
+static void reap_alien(struct kmem_cache *cachep, struct kmem_cache_node *n)
{
- int node = __get_cpu_var(slab_reap_node);
+ int node = __this_cpu_read(slab_reap_node);
- if (l3->alien) {
- struct array_cache *ac = l3->alien[node];
+ if (n->alien) {
+ struct array_cache *ac = n->alien[node];
if (ac && ac->avail && spin_trylock_irq(&ac->lock)) {
__drain_alien_cache(cachep, ac, node);
@@ -1043,9 +1085,8 @@ static void drain_alien_cache(struct kmem_cache *cachep,
static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
{
- struct slab *slabp = virt_to_slab(objp);
- int nodeid = slabp->nodeid;
- struct kmem_list3 *l3;
+ int nodeid = page_to_nid(virt_to_page(objp));
+ struct kmem_cache_node *n;
struct array_cache *alien = NULL;
int node;
@@ -1055,83 +1096,89 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
* Make sure we are not freeing a object from another node to the array
* cache on this cpu.
*/
- if (likely(slabp->nodeid == node))
+ if (likely(nodeid == node))
return 0;
- l3 = cachep->nodelists[node];
+ n = cachep->node[node];
STATS_INC_NODEFREES(cachep);
- if (l3->alien && l3->alien[nodeid]) {
- alien = l3->alien[nodeid];
+ if (n->alien && n->alien[nodeid]) {
+ alien = n->alien[nodeid];
spin_lock(&alien->lock);
if (unlikely(alien->avail == alien->limit)) {
STATS_INC_ACOVERFLOW(cachep);
__drain_alien_cache(cachep, alien, nodeid);
}
- alien->entry[alien->avail++] = objp;
+ ac_put_obj(cachep, alien, objp);
spin_unlock(&alien->lock);
} else {
- spin_lock(&(cachep->nodelists[nodeid])->list_lock);
+ spin_lock(&(cachep->node[nodeid])->list_lock);
free_block(cachep, &objp, 1, nodeid);
- spin_unlock(&(cachep->nodelists[nodeid])->list_lock);
+ spin_unlock(&(cachep->node[nodeid])->list_lock);
}
return 1;
}
#endif
/*
- * Allocates and initializes nodelists for a node on each slab cache, used for
- * either memory or cpu hotplug. If memory is being hot-added, the kmem_list3
+ * Allocates and initializes node for a node on each slab cache, used for
+ * either memory or cpu hotplug. If memory is being hot-added, the kmem_cache_node
* will be allocated off-node since memory is not yet online for the new node.
- * When hotplugging memory or a cpu, existing nodelists are not replaced if
+ * When hotplugging memory or a cpu, existing node are not replaced if
* already in use.
*
- * Must hold cache_chain_mutex.
+ * Must hold slab_mutex.
*/
-static int init_cache_nodelists_node(int node)
+static int init_cache_node_node(int node)
{
struct kmem_cache *cachep;
- struct kmem_list3 *l3;
- const int memsize = sizeof(struct kmem_list3);
+ struct kmem_cache_node *n;
+ const int memsize = sizeof(struct kmem_cache_node);
- list_for_each_entry(cachep, &cache_chain, next) {
+ list_for_each_entry(cachep, &slab_caches, list) {
/*
- * Set up the size64 kmemlist for cpu before we can
+ * Set up the kmem_cache_node for cpu before we can
* begin anything. Make sure some other cpu on this
* node has not already allocated this
*/
- if (!cachep->nodelists[node]) {
- l3 = kmalloc_node(memsize, GFP_KERNEL, node);
- if (!l3)
+ if (!cachep->node[node]) {
+ n = kmalloc_node(memsize, GFP_KERNEL, node);
+ if (!n)
return -ENOMEM;
- kmem_list3_init(l3);
- l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +
- ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
+ kmem_cache_node_init(n);
+ n->next_reap = jiffies + REAPTIMEOUT_NODE +
+ ((unsigned long)cachep) % REAPTIMEOUT_NODE;
/*
- * The l3s don't come and go as CPUs come and
- * go. cache_chain_mutex is sufficient
+ * The kmem_cache_nodes don't come and go as CPUs
+ * come and go. slab_mutex is sufficient
* protection here.
*/
- cachep->nodelists[node] = l3;
+ cachep->node[node] = n;
}
- spin_lock_irq(&cachep->nodelists[node]->list_lock);
- cachep->nodelists[node]->free_limit =
+ spin_lock_irq(&cachep->node[node]->list_lock);
+ cachep->node[node]->free_limit =
(1 + nr_cpus_node(node)) *
cachep->batchcount + cachep->num;
- spin_unlock_irq(&cachep->nodelists[node]->list_lock);
+ spin_unlock_irq(&cachep->node[node]->list_lock);
}
return 0;
}
-static void __cpuinit cpuup_canceled(long cpu)
+static inline int slabs_tofree(struct kmem_cache *cachep,
+ struct kmem_cache_node *n)
+{
+ return (n->free_objects + cachep->num - 1) / cachep->num;
+}
+
+static void cpuup_canceled(long cpu)
{
struct kmem_cache *cachep;
- struct kmem_list3 *l3 = NULL;
+ struct kmem_cache_node *n = NULL;
int node = cpu_to_mem(cpu);
const struct cpumask *mask = cpumask_of_node(node);
- list_for_each_entry(cachep, &cache_chain, next) {
+ list_for_each_entry(cachep, &slab_caches, list) {
struct array_cache *nc;
struct array_cache *shared;
struct array_cache **alien;
@@ -1139,34 +1186,34 @@ static void __cpuinit cpuup_canceled(long cpu)
/* cpu is dead; no one can alloc from it. */
nc = cachep->array[cpu];
cachep->array[cpu] = NULL;
- l3 = cachep->nodelists[node];
+ n = cachep->node[node];
- if (!l3)
+ if (!n)
goto free_array_cache;
- spin_lock_irq(&l3->list_lock);
+ spin_lock_irq(&n->list_lock);
- /* Free limit for this kmem_list3 */
- l3->free_limit -= cachep->batchcount;
+ /* Free limit for this kmem_cache_node */
+ n->free_limit -= cachep->batchcount;
if (nc)
free_block(cachep, nc->entry, nc->avail, node);
if (!cpumask_empty(mask)) {
- spin_unlock_irq(&l3->list_lock);
+ spin_unlock_irq(&n->list_lock);
goto free_array_cache;
}
- shared = l3->shared;
+ shared = n->shared;
if (shared) {
free_block(cachep, shared->entry,
shared->avail, node);
- l3->shared = NULL;
+ n->shared = NULL;
}
- alien = l3->alien;
- l3->alien = NULL;
+ alien = n->alien;
+ n->alien = NULL;
- spin_unlock_irq(&l3->list_lock);
+ spin_unlock_irq(&n->list_lock);
kfree(shared);
if (alien) {
@@ -1181,18 +1228,18 @@ free_array_cache:
* the respective cache's slabs, now we can go ahead and
* shrink each nodelist to its limit.
*/
- list_for_each_entry(cachep, &cache_chain, next) {
- l3 = cachep->nodelists[node];
- if (!l3)
+ list_for_each_entry(cachep, &slab_caches, list) {
+ n = cachep->node[node];
+ if (!n)
continue;
- drain_freelist(cachep, l3, l3->free_objects);
+ drain_freelist(cachep, n, slabs_tofree(cachep, n));
}
}
-static int __cpuinit cpuup_prepare(long cpu)
+static int cpuup_prepare(long cpu)
{
struct kmem_cache *cachep;
- struct kmem_list3 *l3 = NULL;
+ struct kmem_cache_node *n = NULL;
int node = cpu_to_mem(cpu);
int err;
@@ -1200,9 +1247,9 @@ static int __cpuinit cpuup_prepare(long cpu)
* We need to do this right in the beginning since
* alloc_arraycache's are going to use this list.
* kmalloc_node allows us to add the slab to the right
- * kmem_list3 and not this cpu's kmem_list3
+ * kmem_cache_node and not this cpu's kmem_cache_node
*/
- err = init_cache_nodelists_node(node);
+ err = init_cache_node_node(node);
if (err < 0)
goto bad;
@@ -1210,7 +1257,7 @@ static int __cpuinit cpuup_prepare(long cpu)
* Now we can go ahead with allocating the shared arrays and
* array caches
*/
- list_for_each_entry(cachep, &cache_chain, next) {
+ list_for_each_entry(cachep, &slab_caches, list) {
struct array_cache *nc;
struct array_cache *shared = NULL;
struct array_cache **alien = NULL;
@@ -1237,27 +1284,32 @@ static int __cpuinit cpuup_prepare(long cpu)
}
}
cachep->array[cpu] = nc;
- l3 = cachep->nodelists[node];
- BUG_ON(!l3);
+ n = cachep->node[node];
+ BUG_ON(!n);
- spin_lock_irq(&l3->list_lock);
- if (!l3->shared) {
+ spin_lock_irq(&n->list_lock);
+ if (!n->shared) {
/*
* We are serialised from CPU_DEAD or
* CPU_UP_CANCELLED by the cpucontrol lock
*/
- l3->shared = shared;
+ n->shared = shared;
shared = NULL;
}
#ifdef CONFIG_NUMA
- if (!l3->alien) {
- l3->alien = alien;
+ if (!n->alien) {
+ n->alien = alien;
alien = NULL;
}
#endif
- spin_unlock_irq(&l3->list_lock);
+ spin_unlock_irq(&n->list_lock);
kfree(shared);
free_alien_cache(alien);
+ if (cachep->flags & SLAB_DEBUG_OBJECTS)
+ slab_set_debugobj_lock_classes_node(cachep, node);
+ else if (!OFF_SLAB(cachep) &&
+ !(cachep->flags & SLAB_DESTROY_BY_RCU))
+ on_slab_lock_classes_node(cachep, node);
}
init_node_lock_keys(node);
@@ -1267,7 +1319,7 @@ bad:
return -ENOMEM;
}
-static int __cpuinit cpuup_callback(struct notifier_block *nfb,
+static int cpuup_callback(struct notifier_block *nfb,
unsigned long action, void *hcpu)
{
long cpu = (long)hcpu;
@@ -1276,9 +1328,9 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb,
switch (action) {
case CPU_UP_PREPARE:
case CPU_UP_PREPARE_FROZEN:
- mutex_lock(&cache_chain_mutex);
+ mutex_lock(&slab_mutex);
err = cpuup_prepare(cpu);
- mutex_unlock(&cache_chain_mutex);
+ mutex_unlock(&slab_mutex);
break;
case CPU_ONLINE:
case CPU_ONLINE_FROZEN:
@@ -1288,12 +1340,12 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb,
case CPU_DOWN_PREPARE:
case CPU_DOWN_PREPARE_FROZEN:
/*
- * Shutdown cache reaper. Note that the cache_chain_mutex is
+ * Shutdown cache reaper. Note that the slab_mutex is
* held so that if cache_reap() is invoked it cannot do
* anything expensive but will only modify reap_work
* and reschedule the timer.
*/
- cancel_rearming_delayed_work(&per_cpu(slab_reap_work, cpu));
+ cancel_delayed_work_sync(&per_cpu(slab_reap_work, cpu));
/* Now the cache_reaper is guaranteed to be not running. */
per_cpu(slab_reap_work, cpu).work.func = NULL;
break;
@@ -1305,9 +1357,9 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb,
case CPU_DEAD_FROZEN:
/*
* Even if all the cpus of a node are down, we don't free the
- * kmem_list3 of any cache. This to avoid a race between
+ * kmem_cache_node of any cache. This to avoid a race between
* cpu_down, and a kmalloc allocation from another cpu for
- * memory from the node of the cpu going down. The list3
+ * memory from the node of the cpu going down. The node
* structure is usually allocated from kmem_cache_create() and
* gets destroyed at kmem_cache_destroy().
*/
@@ -1315,15 +1367,15 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb,
#endif
case CPU_UP_CANCELED:
case CPU_UP_CANCELED_FROZEN:
- mutex_lock(&cache_chain_mutex);
+ mutex_lock(&slab_mutex);
cpuup_canceled(cpu);
- mutex_unlock(&cache_chain_mutex);
+ mutex_unlock(&slab_mutex);
break;
}
return notifier_from_errno(err);
}
-static struct notifier_block __cpuinitdata cpucache_notifier = {
+static struct notifier_block cpucache_notifier = {
&cpuup_callback, NULL, 0
};
@@ -1333,24 +1385,24 @@ static struct notifier_block __cpuinitdata cpucache_notifier = {
* Returns -EBUSY if all objects cannot be drained so that the node is not
* removed.
*
- * Must hold cache_chain_mutex.
+ * Must hold slab_mutex.
*/
-static int __meminit drain_cache_nodelists_node(int node)
+static int __meminit drain_cache_node_node(int node)
{
struct kmem_cache *cachep;
int ret = 0;
- list_for_each_entry(cachep, &cache_chain, next) {
- struct kmem_list3 *l3;
+ list_for_each_entry(cachep, &slab_caches, list) {
+ struct kmem_cache_node *n;
- l3 = cachep->nodelists[node];
- if (!l3)
+ n = cachep->node[node];
+ if (!n)
continue;
- drain_freelist(cachep, l3, l3->free_objects);
+ drain_freelist(cachep, n, slabs_tofree(cachep, n));
- if (!list_empty(&l3->slabs_full) ||
- !list_empty(&l3->slabs_partial)) {
+ if (!list_empty(&n->slabs_full) ||
+ !list_empty(&n->slabs_partial)) {
ret = -EBUSY;
break;
}
@@ -1371,14 +1423,14 @@ static int __meminit slab_memory_callback(struct notifier_block *self,
switch (action) {
case MEM_GOING_ONLINE:
- mutex_lock(&cache_chain_mutex);
- ret = init_cache_nodelists_node(nid);
- mutex_unlock(&cache_chain_mutex);
+ mutex_lock(&slab_mutex);
+ ret = init_cache_node_node(nid);
+ mutex_unlock(&slab_mutex);
break;
case MEM_GOING_OFFLINE:
- mutex_lock(&cache_chain_mutex);
- ret = drain_cache_nodelists_node(nid);
- mutex_unlock(&cache_chain_mutex);
+ mutex_lock(&slab_mutex);
+ ret = drain_cache_node_node(nid);
+ mutex_unlock(&slab_mutex);
break;
case MEM_ONLINE:
case MEM_OFFLINE:
@@ -1387,253 +1439,201 @@ static int __meminit slab_memory_callback(struct notifier_block *self,
break;
}
out:
- return ret ? notifier_from_errno(ret) : NOTIFY_OK;
+ return notifier_from_errno(ret);
}
#endif /* CONFIG_NUMA && CONFIG_MEMORY_HOTPLUG */
/*
- * swap the static kmem_list3 with kmalloced memory
+ * swap the static kmem_cache_node with kmalloced memory
*/
-static void __init init_list(struct kmem_cache *cachep, struct kmem_list3 *list,
+static void __init init_list(struct kmem_cache *cachep, struct kmem_cache_node *list,
int nodeid)
{
- struct kmem_list3 *ptr;
+ struct kmem_cache_node *ptr;
- ptr = kmalloc_node(sizeof(struct kmem_list3), GFP_NOWAIT, nodeid);
+ ptr = kmalloc_node(sizeof(struct kmem_cache_node), GFP_NOWAIT, nodeid);
BUG_ON(!ptr);
- memcpy(ptr, list, sizeof(struct kmem_list3));
+ memcpy(ptr, list, sizeof(struct kmem_cache_node));
/*
* Do not assume that spinlocks can be initialized via memcpy:
*/
spin_lock_init(&ptr->list_lock);
MAKE_ALL_LISTS(cachep, ptr, nodeid);
- cachep->nodelists[nodeid] = ptr;
+ cachep->node[nodeid] = ptr;
}
/*
- * For setting up all the kmem_list3s for cache whose buffer_size is same as
- * size of kmem_list3.
+ * For setting up all the kmem_cache_node for cache whose buffer_size is same as
+ * size of kmem_cache_node.
*/
-static void __init set_up_list3s(struct kmem_cache *cachep, int index)
+static void __init set_up_node(struct kmem_cache *cachep, int index)
{
int node;
for_each_online_node(node) {
- cachep->nodelists[node] = &initkmem_list3[index + node];
- cachep->nodelists[node]->next_reap = jiffies +
- REAPTIMEOUT_LIST3 +
- ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
+ cachep->node[node] = &init_kmem_cache_node[index + node];
+ cachep->node[node]->next_reap = jiffies +
+ REAPTIMEOUT_NODE +
+ ((unsigned long)cachep) % REAPTIMEOUT_NODE;
}
}
/*
+ * The memory after the last cpu cache pointer is used for the
+ * the node pointer.
+ */
+static void setup_node_pointer(struct kmem_cache *cachep)
+{
+ cachep->node = (struct kmem_cache_node **)&cachep->array[nr_cpu_ids];
+}
+
+/*
* Initialisation. Called after the page allocator have been initialised and
* before smp_init().
*/
void __init kmem_cache_init(void)
{
- size_t left_over;
- struct cache_sizes *sizes;
- struct cache_names *names;
int i;
- int order;
- int node;
+
+ BUILD_BUG_ON(sizeof(((struct page *)NULL)->lru) <
+ sizeof(struct rcu_head));
+ kmem_cache = &kmem_cache_boot;
+ setup_node_pointer(kmem_cache);
if (num_possible_nodes() == 1)
use_alien_caches = 0;
- for (i = 0; i < NUM_INIT_LISTS; i++) {
- kmem_list3_init(&initkmem_list3[i]);
- if (i < MAX_NUMNODES)
- cache_cache.nodelists[i] = NULL;
- }
- set_up_list3s(&cache_cache, CACHE_CACHE);
+ for (i = 0; i < NUM_INIT_LISTS; i++)
+ kmem_cache_node_init(&init_kmem_cache_node[i]);
+
+ set_up_node(kmem_cache, CACHE_CACHE);
/*
* Fragmentation resistance on low memory - only use bigger
- * page orders on machines with more than 32MB of memory.
+ * page orders on machines with more than 32MB of memory if
+ * not overridden on the command line.
*/
- if (totalram_pages > (32 << 20) >> PAGE_SHIFT)
- slab_break_gfp_order = BREAK_GFP_ORDER_HI;
+ if (!slab_max_order_set && totalram_pages > (32 << 20) >> PAGE_SHIFT)
+ slab_max_order = SLAB_MAX_ORDER_HI;
/* Bootstrap is tricky, because several objects are allocated
* from caches that do not exist yet:
- * 1) initialize the cache_cache cache: it contains the struct
- * kmem_cache structures of all caches, except cache_cache itself:
- * cache_cache is statically allocated.
+ * 1) initialize the kmem_cache cache: it contains the struct
+ * kmem_cache structures of all caches, except kmem_cache itself:
+ * kmem_cache is statically allocated.
* Initially an __init data area is used for the head array and the
- * kmem_list3 structures, it's replaced with a kmalloc allocated
+ * kmem_cache_node structures, it's replaced with a kmalloc allocated
* array at the end of the bootstrap.
* 2) Create the first kmalloc cache.
* The struct kmem_cache for the new cache is allocated normally.
* An __init data area is used for the head array.
* 3) Create the remaining kmalloc caches, with minimally sized
* head arrays.
- * 4) Replace the __init data head arrays for cache_cache and the first
+ * 4) Replace the __init data head arrays for kmem_cache and the first
* kmalloc cache with kmalloc allocated arrays.
- * 5) Replace the __init data for kmem_list3 for cache_cache and
+ * 5) Replace the __init data for kmem_cache_node for kmem_cache and
* the other cache's with kmalloc allocated memory.
* 6) Resize the head arrays of the kmalloc caches to their final sizes.
*/
- node = numa_mem_id();
-
- /* 1) create the cache_cache */
- INIT_LIST_HEAD(&cache_chain);
- list_add(&cache_cache.next, &cache_chain);
- cache_cache.colour_off = cache_line_size();
- cache_cache.array[smp_processor_id()] = &initarray_cache.cache;
- cache_cache.nodelists[node] = &initkmem_list3[CACHE_CACHE + node];
+ /* 1) create the kmem_cache */
/*
- * struct kmem_cache size depends on nr_node_ids, which
- * can be less than MAX_NUMNODES.
+ * struct kmem_cache size depends on nr_node_ids & nr_cpu_ids
*/
- cache_cache.buffer_size = offsetof(struct kmem_cache, nodelists) +
- nr_node_ids * sizeof(struct kmem_list3 *);
-#if DEBUG
- cache_cache.obj_size = cache_cache.buffer_size;
-#endif
- cache_cache.buffer_size = ALIGN(cache_cache.buffer_size,
- cache_line_size());
- cache_cache.reciprocal_buffer_size =
- reciprocal_value(cache_cache.buffer_size);
-
- for (order = 0; order < MAX_ORDER; order++) {
- cache_estimate(order, cache_cache.buffer_size,
- cache_line_size(), 0, &left_over, &cache_cache.num);
- if (cache_cache.num)
- break;
- }
- BUG_ON(!cache_cache.num);
- cache_cache.gfporder = order;
- cache_cache.colour = left_over / cache_cache.colour_off;
- cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) +
- sizeof(struct slab), cache_line_size());
+ create_boot_cache(kmem_cache, "kmem_cache",
+ offsetof(struct kmem_cache, array[nr_cpu_ids]) +
+ nr_node_ids * sizeof(struct kmem_cache_node *),
+ SLAB_HWCACHE_ALIGN);
+ list_add(&kmem_cache->list, &slab_caches);
/* 2+3) create the kmalloc caches */
- sizes = malloc_sizes;
- names = cache_names;
/*
* Initialize the caches that provide memory for the array cache and the
- * kmem_list3 structures first. Without this, further allocations will
+ * kmem_cache_node structures first. Without this, further allocations will
* bug.
*/
- sizes[INDEX_AC].cs_cachep = kmem_cache_create(names[INDEX_AC].name,
- sizes[INDEX_AC].cs_size,
- ARCH_KMALLOC_MINALIGN,
- ARCH_KMALLOC_FLAGS|SLAB_PANIC,
- NULL);
-
- if (INDEX_AC != INDEX_L3) {
- sizes[INDEX_L3].cs_cachep =
- kmem_cache_create(names[INDEX_L3].name,
- sizes[INDEX_L3].cs_size,
- ARCH_KMALLOC_MINALIGN,
- ARCH_KMALLOC_FLAGS|SLAB_PANIC,
- NULL);
- }
+ kmalloc_caches[INDEX_AC] = create_kmalloc_cache("kmalloc-ac",
+ kmalloc_size(INDEX_AC), ARCH_KMALLOC_FLAGS);
+
+ if (INDEX_AC != INDEX_NODE)
+ kmalloc_caches[INDEX_NODE] =
+ create_kmalloc_cache("kmalloc-node",
+ kmalloc_size(INDEX_NODE), ARCH_KMALLOC_FLAGS);
slab_early_init = 0;
- while (sizes->cs_size != ULONG_MAX) {
- /*
- * For performance, all the general caches are L1 aligned.
- * This should be particularly beneficial on SMP boxes, as it
- * eliminates "false sharing".
- * Note for systems short on memory removing the alignment will
- * allow tighter packing of the smaller caches.
- */
- if (!sizes->cs_cachep) {
- sizes->cs_cachep = kmem_cache_create(names->name,
- sizes->cs_size,
- ARCH_KMALLOC_MINALIGN,
- ARCH_KMALLOC_FLAGS|SLAB_PANIC,
- NULL);
- }
-#ifdef CONFIG_ZONE_DMA
- sizes->cs_dmacachep = kmem_cache_create(
- names->name_dma,
- sizes->cs_size,
- ARCH_KMALLOC_MINALIGN,
- ARCH_KMALLOC_FLAGS|SLAB_CACHE_DMA|
- SLAB_PANIC,
- NULL);
-#endif
- sizes++;
- names++;
- }
/* 4) Replace the bootstrap head arrays */
{
struct array_cache *ptr;
ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);
- BUG_ON(cpu_cache_get(&cache_cache) != &initarray_cache.cache);
- memcpy(ptr, cpu_cache_get(&cache_cache),
+ memcpy(ptr, cpu_cache_get(kmem_cache),
sizeof(struct arraycache_init));
/*
* Do not assume that spinlocks can be initialized via memcpy:
*/
spin_lock_init(&ptr->lock);
- cache_cache.array[smp_processor_id()] = ptr;
+ kmem_cache->array[smp_processor_id()] = ptr;
ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);
- BUG_ON(cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep)
+ BUG_ON(cpu_cache_get(kmalloc_caches[INDEX_AC])
!= &initarray_generic.cache);
- memcpy(ptr, cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep),
+ memcpy(ptr, cpu_cache_get(kmalloc_caches[INDEX_AC]),
sizeof(struct arraycache_init));
/*
* Do not assume that spinlocks can be initialized via memcpy:
*/
spin_lock_init(&ptr->lock);
- malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] =
- ptr;
+ kmalloc_caches[INDEX_AC]->array[smp_processor_id()] = ptr;
}
- /* 5) Replace the bootstrap kmem_list3's */
+ /* 5) Replace the bootstrap kmem_cache_node */
{
int nid;
for_each_online_node(nid) {
- init_list(&cache_cache, &initkmem_list3[CACHE_CACHE + nid], nid);
+ init_list(kmem_cache, &init_kmem_cache_node[CACHE_CACHE + nid], nid);
- init_list(malloc_sizes[INDEX_AC].cs_cachep,
- &initkmem_list3[SIZE_AC + nid], nid);
+ init_list(kmalloc_caches[INDEX_AC],
+ &init_kmem_cache_node[SIZE_AC + nid], nid);
- if (INDEX_AC != INDEX_L3) {
- init_list(malloc_sizes[INDEX_L3].cs_cachep,
- &initkmem_list3[SIZE_L3 + nid], nid);
+ if (INDEX_AC != INDEX_NODE) {
+ init_list(kmalloc_caches[INDEX_NODE],
+ &init_kmem_cache_node[SIZE_NODE + nid], nid);
}
}
}
- g_cpucache_up = EARLY;
+ create_kmalloc_caches(ARCH_KMALLOC_FLAGS);
}
void __init kmem_cache_init_late(void)
{
struct kmem_cache *cachep;
+ slab_state = UP;
+
/* 6) resize the head arrays to their final sizes */
- mutex_lock(&cache_chain_mutex);
- list_for_each_entry(cachep, &cache_chain, next)
+ mutex_lock(&slab_mutex);
+ list_for_each_entry(cachep, &slab_caches, list)
if (enable_cpucache(cachep, GFP_NOWAIT))
BUG();
- mutex_unlock(&cache_chain_mutex);
-
- /* Done! */
- g_cpucache_up = FULL;
+ mutex_unlock(&slab_mutex);
/* Annotate slab for lockdep -- annotate the malloc caches */
init_lock_keys();
+ /* Done! */
+ slab_state = FULL;
+
/*
* Register a cpu startup notifier callback that initializes
* cpu_cache_get for all new cpus
@@ -1643,7 +1643,7 @@ void __init kmem_cache_init_late(void)
#ifdef CONFIG_NUMA
/*
* Register a memory hotplug callback that initializes and frees
- * nodelists.
+ * node.
*/
hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI);
#endif
@@ -1663,10 +1663,66 @@ static int __init cpucache_init(void)
*/
for_each_online_cpu(cpu)
start_cpu_timer(cpu);
+
+ /* Done! */
+ slab_state = FULL;
return 0;
}
__initcall(cpucache_init);
+static noinline void
+slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid)
+{
+#if DEBUG
+ struct kmem_cache_node *n;
+ struct page *page;
+ unsigned long flags;
+ int node;
+ static DEFINE_RATELIMIT_STATE(slab_oom_rs, DEFAULT_RATELIMIT_INTERVAL,
+ DEFAULT_RATELIMIT_BURST);
+
+ if ((gfpflags & __GFP_NOWARN) || !__ratelimit(&slab_oom_rs))
+ return;
+
+ printk(KERN_WARNING
+ "SLAB: Unable to allocate memory on node %d (gfp=0x%x)\n",
+ nodeid, gfpflags);
+ printk(KERN_WARNING " cache: %s, object size: %d, order: %d\n",
+ cachep->name, cachep->size, cachep->gfporder);
+
+ for_each_online_node(node) {
+ unsigned long active_objs = 0, num_objs = 0, free_objects = 0;
+ unsigned long active_slabs = 0, num_slabs = 0;
+
+ n = cachep->node[node];
+ if (!n)
+ continue;
+
+ spin_lock_irqsave(&n->list_lock, flags);
+ list_for_each_entry(page, &n->slabs_full, lru) {
+ active_objs += cachep->num;
+ active_slabs++;
+ }
+ list_for_each_entry(page, &n->slabs_partial, lru) {
+ active_objs += page->active;
+ active_slabs++;
+ }
+ list_for_each_entry(page, &n->slabs_free, lru)
+ num_slabs++;
+
+ free_objects += n->free_objects;
+ spin_unlock_irqrestore(&n->list_lock, flags);
+
+ num_slabs += active_slabs;
+ num_objs = num_slabs * cachep->num;
+ printk(KERN_WARNING
+ " node %d: slabs: %ld/%ld, objs: %ld/%ld, free: %ld\n",
+ node, active_slabs, num_slabs, active_objs, num_objs,
+ free_objects);
+ }
+#endif
+}
+
/*
* Interface to system's page allocator. No need to hold the cache-lock.
*
@@ -1674,27 +1730,29 @@ __initcall(cpucache_init);
* did not request dmaable memory, we might get it, but that
* would be relatively rare and ignorable.
*/
-static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
+static struct page *kmem_getpages(struct kmem_cache *cachep, gfp_t flags,
+ int nodeid)
{
struct page *page;
int nr_pages;
- int i;
-#ifndef CONFIG_MMU
- /*
- * Nommu uses slab's for process anonymous memory allocations, and thus
- * requires __GFP_COMP to properly refcount higher order allocations
- */
- flags |= __GFP_COMP;
-#endif
-
- flags |= cachep->gfpflags;
+ flags |= cachep->allocflags;
if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
flags |= __GFP_RECLAIMABLE;
+ if (memcg_charge_slab(cachep, flags, cachep->gfporder))
+ return NULL;
+
page = alloc_pages_exact_node(nodeid, flags | __GFP_NOTRACK, cachep->gfporder);
- if (!page)
+ if (!page) {
+ memcg_uncharge_slab(cachep, cachep->gfporder);
+ slab_out_of_memory(cachep, flags, nodeid);
return NULL;
+ }
+
+ /* Record if ALLOC_NO_WATERMARKS was set when allocating the slab */
+ if (unlikely(page->pfmemalloc))
+ pfmemalloc_active = true;
nr_pages = (1 << cachep->gfporder);
if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
@@ -1703,8 +1761,9 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
else
add_zone_page_state(page_zone(page),
NR_SLAB_UNRECLAIMABLE, nr_pages);
- for (i = 0; i < nr_pages; i++)
- __SetPageSlab(page + i);
+ __SetPageSlab(page);
+ if (page->pfmemalloc)
+ SetPageSlabPfmemalloc(page);
if (kmemcheck_enabled && !(cachep->flags & SLAB_NOTRACK)) {
kmemcheck_alloc_shadow(page, cachep->gfporder, flags, nodeid);
@@ -1715,17 +1774,15 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
kmemcheck_mark_unallocated_pages(page, nr_pages);
}
- return page_address(page);
+ return page;
}
/*
* Interface to system's page release.
*/
-static void kmem_freepages(struct kmem_cache *cachep, void *addr)
+static void kmem_freepages(struct kmem_cache *cachep, struct page *page)
{
- unsigned long i = (1 << cachep->gfporder);
- struct page *page = virt_to_page(addr);
- const unsigned long nr_freed = i;
+ const unsigned long nr_freed = (1 << cachep->gfporder);
kmemcheck_free_shadow(page, cachep->gfporder);
@@ -1735,24 +1792,28 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr)
else
sub_zone_page_state(page_zone(page),
NR_SLAB_UNRECLAIMABLE, nr_freed);
- while (i--) {
- BUG_ON(!PageSlab(page));
- __ClearPageSlab(page);
- page++;
- }
+
+ BUG_ON(!PageSlab(page));
+ __ClearPageSlabPfmemalloc(page);
+ __ClearPageSlab(page);
+ page_mapcount_reset(page);
+ page->mapping = NULL;
+
if (current->reclaim_state)
current->reclaim_state->reclaimed_slab += nr_freed;
- free_pages((unsigned long)addr, cachep->gfporder);
+ __free_pages(page, cachep->gfporder);
+ memcg_uncharge_slab(cachep, cachep->gfporder);
}
static void kmem_rcu_free(struct rcu_head *head)
{
- struct slab_rcu *slab_rcu = (struct slab_rcu *)head;
- struct kmem_cache *cachep = slab_rcu->cachep;
+ struct kmem_cache *cachep;
+ struct page *page;
- kmem_freepages(cachep, slab_rcu->addr);
- if (OFF_SLAB(cachep))
- kmem_cache_free(cachep->slabp_cache, slab_rcu);
+ page = container_of(head, struct page, rcu_head);
+ cachep = page->slab_cache;
+
+ kmem_freepages(cachep, page);
}
#if DEBUG
@@ -1761,7 +1822,7 @@ static void kmem_rcu_free(struct rcu_head *head)
static void store_stackinfo(struct kmem_cache *cachep, unsigned long *addr,
unsigned long caller)
{
- int size = obj_size(cachep);
+ int size = cachep->object_size;
addr = (unsigned long *)&((char *)addr)[obj_offset(cachep)];
@@ -1793,7 +1854,7 @@ static void store_stackinfo(struct kmem_cache *cachep, unsigned long *addr,
static void poison_obj(struct kmem_cache *cachep, void *addr, unsigned char val)
{
- int size = obj_size(cachep);
+ int size = cachep->object_size;
addr = &((char *)addr)[obj_offset(cachep)];
memset(addr, val, size);
@@ -1806,15 +1867,15 @@ static void dump_line(char *data, int offset, int limit)
unsigned char error = 0;
int bad_count = 0;
- printk(KERN_ERR "%03x:", offset);
+ printk(KERN_ERR "%03x: ", offset);
for (i = 0; i < limit; i++) {
if (data[offset + i] != POISON_FREE) {
error = data[offset + i];
bad_count++;
}
- printk(" %02x", (unsigned char)data[offset + i]);
}
- printk("\n");
+ print_hex_dump(KERN_CONT, "", 0, 16, 1,
+ &data[offset], limit, 1);
if (bad_count == 1) {
error ^= POISON_FREE;
@@ -1846,14 +1907,12 @@ static void print_objinfo(struct kmem_cache *cachep, void *objp, int lines)
}
if (cachep->flags & SLAB_STORE_USER) {
- printk(KERN_ERR "Last user: [<%p>]",
- *dbg_userword(cachep, objp));
- print_symbol("(%s)",
- (unsigned long)*dbg_userword(cachep, objp));
- printk("\n");
+ printk(KERN_ERR "Last user: [<%p>](%pSR)\n",
+ *dbg_userword(cachep, objp),
+ *dbg_userword(cachep, objp));
}
realobj = (char *)objp + obj_offset(cachep);
- size = obj_size(cachep);
+ size = cachep->object_size;
for (i = 0; i < size && lines; i += 16, lines--) {
int limit;
limit = 16;
@@ -1870,7 +1929,7 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp)
int lines = 0;
realobj = (char *)objp + obj_offset(cachep);
- size = obj_size(cachep);
+ size = cachep->object_size;
for (i = 0; i < size; i++) {
char exp = POISON_FREE;
@@ -1882,8 +1941,8 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp)
/* Print header */
if (lines == 0) {
printk(KERN_ERR
- "Slab corruption: %s start=%p, len=%d\n",
- cachep->name, realobj, size);
+ "Slab corruption (%s): %s start=%p, len=%d\n",
+ print_tainted(), cachep->name, realobj, size);
print_objinfo(cachep, objp, 0);
}
/* Hexdump the affected line */
@@ -1903,19 +1962,19 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp)
/* Print some data about the neighboring objects, if they
* exist:
*/
- struct slab *slabp = virt_to_slab(objp);
+ struct page *page = virt_to_head_page(objp);
unsigned int objnr;
- objnr = obj_to_index(cachep, slabp, objp);
+ objnr = obj_to_index(cachep, page, objp);
if (objnr) {
- objp = index_to_obj(cachep, slabp, objnr - 1);
+ objp = index_to_obj(cachep, page, objnr - 1);
realobj = (char *)objp + obj_offset(cachep);
printk(KERN_ERR "Prev obj: start=%p, len=%d\n",
realobj, size);
print_objinfo(cachep, objp, 2);
}
if (objnr + 1 < cachep->num) {
- objp = index_to_obj(cachep, slabp, objnr + 1);
+ objp = index_to_obj(cachep, page, objnr + 1);
realobj = (char *)objp + obj_offset(cachep);
printk(KERN_ERR "Next obj: start=%p, len=%d\n",
realobj, size);
@@ -1926,18 +1985,19 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp)
#endif
#if DEBUG
-static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slabp)
+static void slab_destroy_debugcheck(struct kmem_cache *cachep,
+ struct page *page)
{
int i;
for (i = 0; i < cachep->num; i++) {
- void *objp = index_to_obj(cachep, slabp, i);
+ void *objp = index_to_obj(cachep, page, i);
if (cachep->flags & SLAB_POISON) {
#ifdef CONFIG_DEBUG_PAGEALLOC
- if (cachep->buffer_size % PAGE_SIZE == 0 &&
+ if (cachep->size % PAGE_SIZE == 0 &&
OFF_SLAB(cachep))
kernel_map_pages(virt_to_page(objp),
- cachep->buffer_size / PAGE_SIZE, 1);
+ cachep->size / PAGE_SIZE, 1);
else
check_poison_obj(cachep, objp);
#else
@@ -1955,7 +2015,8 @@ static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slab
}
}
#else
-static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slabp)
+static void slab_destroy_debugcheck(struct kmem_cache *cachep,
+ struct page *page)
{
}
#endif
@@ -1963,52 +2024,42 @@ static void slab_destroy_debugcheck(struct kmem_cache *cachep, struct slab *slab
/**
* slab_destroy - destroy and release all objects in a slab
* @cachep: cache pointer being destroyed
- * @slabp: slab pointer being destroyed
+ * @page: page pointer being destroyed
*
* Destroy all the objs in a slab, and release the mem back to the system.
* Before calling the slab must have been unlinked from the cache. The
* cache-lock is not held/needed.
*/
-static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp)
+static void slab_destroy(struct kmem_cache *cachep, struct page *page)
{
- void *addr = slabp->s_mem - slabp->colouroff;
+ void *freelist;
- slab_destroy_debugcheck(cachep, slabp);
+ freelist = page->freelist;
+ slab_destroy_debugcheck(cachep, page);
if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) {
- struct slab_rcu *slab_rcu;
+ struct rcu_head *head;
+
+ /*
+ * RCU free overloads the RCU head over the LRU.
+ * slab_page has been overloeaded over the LRU,
+ * however it is not used from now on so that
+ * we can use it safely.
+ */
+ head = (void *)&page->rcu_head;
+ call_rcu(head, kmem_rcu_free);
- slab_rcu = (struct slab_rcu *)slabp;
- slab_rcu->cachep = cachep;
- slab_rcu->addr = addr;
- call_rcu(&slab_rcu->head, kmem_rcu_free);
} else {
- kmem_freepages(cachep, addr);
- if (OFF_SLAB(cachep))
- kmem_cache_free(cachep->slabp_cache, slabp);
+ kmem_freepages(cachep, page);
}
-}
-static void __kmem_cache_destroy(struct kmem_cache *cachep)
-{
- int i;
- struct kmem_list3 *l3;
-
- for_each_online_cpu(i)
- kfree(cachep->array[i]);
-
- /* NUMA: free the list3 structures */
- for_each_online_node(i) {
- l3 = cachep->nodelists[i];
- if (l3) {
- kfree(l3->shared);
- free_alien_cache(l3->alien);
- kfree(l3);
- }
- }
- kmem_cache_free(&cache_cache, cachep);
+ /*
+ * From now on, we don't use freelist
+ * although actual page can be freed in rcu context
+ */
+ if (OFF_SLAB(cachep))
+ kmem_cache_free(cachep->freelist_cache, freelist);
}
-
/**
* calculate_slab_order - calculate size (page order) of slabs
* @cachep: pointer to the cache that is being created
@@ -2037,14 +2088,21 @@ static size_t calculate_slab_order(struct kmem_cache *cachep,
if (!num)
continue;
+ /* Can't handle number of objects more than SLAB_OBJ_MAX_NUM */
+ if (num > SLAB_OBJ_MAX_NUM)
+ break;
+
if (flags & CFLGS_OFF_SLAB) {
+ size_t freelist_size_per_obj = sizeof(freelist_idx_t);
/*
* Max number of objs-per-slab for caches which
* use off-slab slabs. Needed to avoid a possible
* looping condition in cache_grow().
*/
- offslab_limit = size - sizeof(struct slab);
- offslab_limit /= sizeof(kmem_bufctl_t);
+ if (IS_ENABLED(CONFIG_DEBUG_SLAB_LEAK))
+ freelist_size_per_obj += sizeof(char);
+ offslab_limit = size;
+ offslab_limit /= freelist_size_per_obj;
if (num > offslab_limit)
break;
@@ -2067,7 +2125,7 @@ static size_t calculate_slab_order(struct kmem_cache *cachep,
* Large number of objects is good, but very large slabs are
* currently bad for the gfp()s.
*/
- if (gfporder >= slab_break_gfp_order)
+ if (gfporder >= slab_max_order)
break;
/*
@@ -2081,48 +2139,57 @@ static size_t calculate_slab_order(struct kmem_cache *cachep,
static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
{
- if (g_cpucache_up == FULL)
+ if (slab_state >= FULL)
return enable_cpucache(cachep, gfp);
- if (g_cpucache_up == NONE) {
+ if (slab_state == DOWN) {
/*
- * Note: the first kmem_cache_create must create the cache
+ * Note: Creation of first cache (kmem_cache).
+ * The setup_node is taken care
+ * of by the caller of __kmem_cache_create
+ */
+ cachep->array[smp_processor_id()] = &initarray_generic.cache;
+ slab_state = PARTIAL;
+ } else if (slab_state == PARTIAL) {
+ /*
+ * Note: the second kmem_cache_create must create the cache
* that's used by kmalloc(24), otherwise the creation of
* further caches will BUG().
*/
cachep->array[smp_processor_id()] = &initarray_generic.cache;
/*
- * If the cache that's used by kmalloc(sizeof(kmem_list3)) is
- * the first cache, then we need to set up all its list3s,
+ * If the cache that's used by kmalloc(sizeof(kmem_cache_node)) is
+ * the second cache, then we need to set up all its node/,
* otherwise the creation of further caches will BUG().
*/
- set_up_list3s(cachep, SIZE_AC);
- if (INDEX_AC == INDEX_L3)
- g_cpucache_up = PARTIAL_L3;
+ set_up_node(cachep, SIZE_AC);
+ if (INDEX_AC == INDEX_NODE)
+ slab_state = PARTIAL_NODE;
else
- g_cpucache_up = PARTIAL_AC;
+ slab_state = PARTIAL_ARRAYCACHE;
} else {
+ /* Remaining boot caches */
cachep->array[smp_processor_id()] =
kmalloc(sizeof(struct arraycache_init), gfp);
- if (g_cpucache_up == PARTIAL_AC) {
- set_up_list3s(cachep, SIZE_L3);
- g_cpucache_up = PARTIAL_L3;
+ if (slab_state == PARTIAL_ARRAYCACHE) {
+ set_up_node(cachep, SIZE_NODE);
+ slab_state = PARTIAL_NODE;
} else {
int node;
for_each_online_node(node) {
- cachep->nodelists[node] =
- kmalloc_node(sizeof(struct kmem_list3),
+ cachep->node[node] =
+ kmalloc_node(sizeof(struct kmem_cache_node),
gfp, node);
- BUG_ON(!cachep->nodelists[node]);
- kmem_list3_init(cachep->nodelists[node]);
+ BUG_ON(!cachep->node[node]);
+ kmem_cache_node_init(cachep->node[node]);
}
}
}
- cachep->nodelists[numa_mem_id()]->next_reap =
- jiffies + REAPTIMEOUT_LIST3 +
- ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
+ cachep->node[numa_mem_id()]->next_reap =
+ jiffies + REAPTIMEOUT_NODE +
+ ((unsigned long)cachep) % REAPTIMEOUT_NODE;
cpu_cache_get(cachep)->avail = 0;
cpu_cache_get(cachep)->limit = BOOT_CPUCACHE_ENTRIES;
@@ -2134,22 +2201,14 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
}
/**
- * kmem_cache_create - Create a cache.
- * @name: A string which is used in /proc/slabinfo to identify this cache.
- * @size: The size of objects to be created in this cache.
- * @align: The required alignment for the objects.
+ * __kmem_cache_create - Create a cache.
+ * @cachep: cache management descriptor
* @flags: SLAB flags
- * @ctor: A constructor for the objects.
*
* Returns a ptr to the cache on success, NULL on failure.
* Cannot be called within a int, but can be interrupted.
* The @ctor is run when new pages are allocated by the cache.
*
- * @name must be valid until the cache is destroyed. This implies that
- * the module calling this has to destroy the cache before getting unloaded.
- * Note that kmem_cache_name() is not guaranteed to return the same pointer,
- * therefore applications must manage it themselves.
- *
* The flags are
*
* %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5)
@@ -2162,60 +2221,15 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
* cacheline. This can be beneficial if you're counting cycles as closely
* as davem.
*/
-struct kmem_cache *
-kmem_cache_create (const char *name, size_t size, size_t align,
- unsigned long flags, void (*ctor)(void *))
+int
+__kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
{
- size_t left_over, slab_size, ralign;
- struct kmem_cache *cachep = NULL, *pc;
+ size_t left_over, freelist_size, ralign;
gfp_t gfp;
-
- /*
- * Sanity checks... these are all serious usage bugs.
- */
- if (!name || in_interrupt() || (size < BYTES_PER_WORD) ||
- size > KMALLOC_MAX_SIZE) {
- printk(KERN_ERR "%s: Early error in slab %s\n", __func__,
- name);
- BUG();
- }
-
- /*
- * We use cache_chain_mutex to ensure a consistent view of
- * cpu_online_mask as well. Please see cpuup_callback
- */
- if (slab_is_available()) {
- get_online_cpus();
- mutex_lock(&cache_chain_mutex);
- }
-
- list_for_each_entry(pc, &cache_chain, next) {
- char tmp;
- int res;
-
- /*
- * This happens when the module gets unloaded and doesn't
- * destroy its slab cache and no-one else reuses the vmalloc
- * area of the module. Print a warning.
- */
- res = probe_kernel_address(pc->name, tmp);
- if (res) {
- printk(KERN_ERR
- "SLAB: cache with size %d has lost its name\n",
- pc->buffer_size);
- continue;
- }
-
- if (!strcmp(pc->name, name)) {
- printk(KERN_ERR
- "kmem_cache_create: duplicate cache %s\n", name);
- dump_stack();
- goto oops;
- }
- }
+ int err;
+ size_t size = cachep->size;
#if DEBUG
- WARN_ON(strchr(name, ' ')); /* It confuses parsers */
#if FORCED_DEBUG
/*
* Enable redzoning and last user accounting, except for caches with
@@ -2232,11 +2246,6 @@ kmem_cache_create (const char *name, size_t size, size_t align,
if (flags & SLAB_DESTROY_BY_RCU)
BUG_ON(flags & SLAB_POISON);
#endif
- /*
- * Always checks flags, a caller might be expecting debug support which
- * isn't available.
- */
- BUG_ON(flags & ~CREATE_MASK);
/*
* Check that size is in terms of words. This is needed to avoid
@@ -2248,22 +2257,6 @@ kmem_cache_create (const char *name, size_t size, size_t align,
size &= ~(BYTES_PER_WORD - 1);
}
- /* calculate the final buffer alignment: */
-
- /* 1) arch recommendation: can be overridden for debug */
- if (flags & SLAB_HWCACHE_ALIGN) {
- /*
- * Default alignment: as specified by the arch code. Except if
- * an object is really small, then squeeze multiple objects into
- * one cacheline.
- */
- ralign = cache_line_size();
- while (size <= ralign / 2)
- ralign /= 2;
- } else {
- ralign = BYTES_PER_WORD;
- }
-
/*
* Redzoning and user store require word alignment or possibly larger.
* Note this will be overridden by architecture or caller mandated
@@ -2280,34 +2273,25 @@ kmem_cache_create (const char *name, size_t size, size_t align,
size &= ~(REDZONE_ALIGN - 1);
}
- /* 2) arch mandated alignment */
- if (ralign < ARCH_SLAB_MINALIGN) {
- ralign = ARCH_SLAB_MINALIGN;
- }
/* 3) caller mandated alignment */
- if (ralign < align) {
- ralign = align;
+ if (ralign < cachep->align) {
+ ralign = cachep->align;
}
- /* disable debug if not aligning with REDZONE_ALIGN */
- if (ralign & (__alignof__(unsigned long long) - 1))
+ /* disable debug if necessary */
+ if (ralign > __alignof__(unsigned long long))
flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
/*
* 4) Store it.
*/
- align = ralign;
+ cachep->align = ralign;
if (slab_is_available())
gfp = GFP_KERNEL;
else
gfp = GFP_NOWAIT;
- /* Get cache's description obj. */
- cachep = kmem_cache_zalloc(&cache_cache, gfp);
- if (!cachep)
- goto oops;
-
+ setup_node_pointer(cachep);
#if DEBUG
- cachep->obj_size = size;
/*
* Both debugging options require word-alignment which is calculated
@@ -2315,8 +2299,8 @@ kmem_cache_create (const char *name, size_t size, size_t align,
*/
if (flags & SLAB_RED_ZONE) {
/* add space for red zone words */
- cachep->obj_offset += align;
- size += align + sizeof(unsigned long long);
+ cachep->obj_offset += sizeof(unsigned long long);
+ size += 2 * sizeof(unsigned long long);
}
if (flags & SLAB_STORE_USER) {
/* user store requires one word storage behind the end of
@@ -2329,9 +2313,10 @@ kmem_cache_create (const char *name, size_t size, size_t align,
size += BYTES_PER_WORD;
}
#if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC)
- if (size >= malloc_sizes[INDEX_L3 + 1].cs_size
- && cachep->obj_size > cache_line_size() && ALIGN(size, align) < PAGE_SIZE) {
- cachep->obj_offset += PAGE_SIZE - ALIGN(size, align);
+ if (size >= kmalloc_size(INDEX_NODE + 1)
+ && cachep->object_size > cache_line_size()
+ && ALIGN(size, cachep->align) < PAGE_SIZE) {
+ cachep->obj_offset += PAGE_SIZE - ALIGN(size, cachep->align);
size = PAGE_SIZE;
}
#endif
@@ -2343,7 +2328,7 @@ kmem_cache_create (const char *name, size_t size, size_t align,
* it too early on. Always use on-slab management when
* SLAB_NOLEAKTRACE to avoid recursive calls into kmemleak)
*/
- if ((size >= (PAGE_SIZE >> 3)) && !slab_early_init &&
+ if ((size >= (PAGE_SIZE >> 5)) && !slab_early_init &&
!(flags & SLAB_NOLEAKTRACE))
/*
* Size is large, assume best to place the slab management obj
@@ -2351,33 +2336,33 @@ kmem_cache_create (const char *name, size_t size, size_t align,
*/
flags |= CFLGS_OFF_SLAB;
- size = ALIGN(size, align);
+ size = ALIGN(size, cachep->align);
+ /*
+ * We should restrict the number of objects in a slab to implement
+ * byte sized index. Refer comment on SLAB_OBJ_MIN_SIZE definition.
+ */
+ if (FREELIST_BYTE_INDEX && size < SLAB_OBJ_MIN_SIZE)
+ size = ALIGN(SLAB_OBJ_MIN_SIZE, cachep->align);
- left_over = calculate_slab_order(cachep, size, align, flags);
+ left_over = calculate_slab_order(cachep, size, cachep->align, flags);
- if (!cachep->num) {
- printk(KERN_ERR
- "kmem_cache_create: couldn't create cache %s.\n", name);
- kmem_cache_free(&cache_cache, cachep);
- cachep = NULL;
- goto oops;
- }
- slab_size = ALIGN(cachep->num * sizeof(kmem_bufctl_t)
- + sizeof(struct slab), align);
+ if (!cachep->num)
+ return -E2BIG;
+
+ freelist_size = calculate_freelist_size(cachep->num, cachep->align);
/*
* If the slab has been placed off-slab, and we have enough space then
* move it on-slab. This is at the expense of any extra colouring.
*/
- if (flags & CFLGS_OFF_SLAB && left_over >= slab_size) {
+ if (flags & CFLGS_OFF_SLAB && left_over >= freelist_size) {
flags &= ~CFLGS_OFF_SLAB;
- left_over -= slab_size;
+ left_over -= freelist_size;
}
if (flags & CFLGS_OFF_SLAB) {
/* really off slab. No need for manual alignment */
- slab_size =
- cachep->num * sizeof(kmem_bufctl_t) + sizeof(struct slab);
+ freelist_size = calculate_freelist_size(cachep->num, 0);
#ifdef CONFIG_PAGE_POISONING
/* If we're going to use the generic kernel_map_pages()
@@ -2391,50 +2376,48 @@ kmem_cache_create (const char *name, size_t size, size_t align,
cachep->colour_off = cache_line_size();
/* Offset must be a multiple of the alignment. */
- if (cachep->colour_off < align)
- cachep->colour_off = align;
+ if (cachep->colour_off < cachep->align)
+ cachep->colour_off = cachep->align;
cachep->colour = left_over / cachep->colour_off;
- cachep->slab_size = slab_size;
+ cachep->freelist_size = freelist_size;
cachep->flags = flags;
- cachep->gfpflags = 0;
+ cachep->allocflags = __GFP_COMP;
if (CONFIG_ZONE_DMA_FLAG && (flags & SLAB_CACHE_DMA))
- cachep->gfpflags |= GFP_DMA;
- cachep->buffer_size = size;
+ cachep->allocflags |= GFP_DMA;
+ cachep->size = size;
cachep->reciprocal_buffer_size = reciprocal_value(size);
if (flags & CFLGS_OFF_SLAB) {
- cachep->slabp_cache = kmem_find_general_cachep(slab_size, 0u);
+ cachep->freelist_cache = kmalloc_slab(freelist_size, 0u);
/*
- * This is a possibility for one of the malloc_sizes caches.
+ * This is a possibility for one of the kmalloc_{dma,}_caches.
* But since we go off slab only for object size greater than
- * PAGE_SIZE/8, and malloc_sizes gets created in ascending order,
- * this should not happen at all.
+ * PAGE_SIZE/8, and kmalloc_{dma,}_caches get created
+ * in ascending order,this should not happen at all.
* But leave a BUG_ON for some lucky dude.
*/
- BUG_ON(ZERO_OR_NULL_PTR(cachep->slabp_cache));
+ BUG_ON(ZERO_OR_NULL_PTR(cachep->freelist_cache));
}
- cachep->ctor = ctor;
- cachep->name = name;
- if (setup_cpu_cache(cachep, gfp)) {
- __kmem_cache_destroy(cachep);
- cachep = NULL;
- goto oops;
+ err = setup_cpu_cache(cachep, gfp);
+ if (err) {
+ __kmem_cache_shutdown(cachep);
+ return err;
}
- /* cache setup completed, link it into the list */
- list_add(&cachep->next, &cache_chain);
-oops:
- if (!cachep && (flags & SLAB_PANIC))
- panic("kmem_cache_create(): failed to create slab `%s'\n",
- name);
- if (slab_is_available()) {
- mutex_unlock(&cache_chain_mutex);
- put_online_cpus();
- }
- return cachep;
+ if (flags & SLAB_DEBUG_OBJECTS) {
+ /*
+ * Would deadlock through slab_destroy()->call_rcu()->
+ * debug_object_activate()->kmem_cache_alloc().
+ */
+ WARN_ON_ONCE(flags & SLAB_DESTROY_BY_RCU);
+
+ slab_set_debugobj_lock_classes(cachep);
+ } else if (!OFF_SLAB(cachep) && !(flags & SLAB_DESTROY_BY_RCU))
+ on_slab_lock_classes(cachep);
+
+ return 0;
}
-EXPORT_SYMBOL(kmem_cache_create);
#if DEBUG
static void check_irq_off(void)
@@ -2451,7 +2434,7 @@ static void check_spinlock_acquired(struct kmem_cache *cachep)
{
#ifdef CONFIG_SMP
check_irq_off();
- assert_spin_locked(&cachep->nodelists[numa_mem_id()]->list_lock);
+ assert_spin_locked(&cachep->node[numa_mem_id()]->list_lock);
#endif
}
@@ -2459,7 +2442,7 @@ static void check_spinlock_acquired_node(struct kmem_cache *cachep, int node)
{
#ifdef CONFIG_SMP
check_irq_off();
- assert_spin_locked(&cachep->nodelists[node]->list_lock);
+ assert_spin_locked(&cachep->node[node]->list_lock);
#endif
}
@@ -2470,7 +2453,7 @@ static void check_spinlock_acquired_node(struct kmem_cache *cachep, int node)
#define check_spinlock_acquired_node(x, y) do { } while(0)
#endif
-static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
+static void drain_array(struct kmem_cache *cachep, struct kmem_cache_node *n,
struct array_cache *ac,
int force, int node);
@@ -2482,29 +2465,29 @@ static void do_drain(void *arg)
check_irq_off();
ac = cpu_cache_get(cachep);
- spin_lock(&cachep->nodelists[node]->list_lock);
+ spin_lock(&cachep->node[node]->list_lock);
free_block(cachep, ac->entry, ac->avail, node);
- spin_unlock(&cachep->nodelists[node]->list_lock);
+ spin_unlock(&cachep->node[node]->list_lock);
ac->avail = 0;
}
static void drain_cpu_caches(struct kmem_cache *cachep)
{
- struct kmem_list3 *l3;
+ struct kmem_cache_node *n;
int node;
on_each_cpu(do_drain, cachep, 1);
check_irq_on();
for_each_online_node(node) {
- l3 = cachep->nodelists[node];
- if (l3 && l3->alien)
- drain_alien_cache(cachep, l3->alien);
+ n = cachep->node[node];
+ if (n && n->alien)
+ drain_alien_cache(cachep, n->alien);
}
for_each_online_node(node) {
- l3 = cachep->nodelists[node];
- if (l3)
- drain_array(cachep, l3, l3->shared, 1, node);
+ n = cachep->node[node];
+ if (n)
+ drain_array(cachep, n, n->shared, 1, node);
}
}
@@ -2515,182 +2498,139 @@ static void drain_cpu_caches(struct kmem_cache *cachep)
* Returns the actual number of slabs released.
*/
static int drain_freelist(struct kmem_cache *cache,
- struct kmem_list3 *l3, int tofree)
+ struct kmem_cache_node *n, int tofree)
{
struct list_head *p;
int nr_freed;
- struct slab *slabp;
+ struct page *page;
nr_freed = 0;
- while (nr_freed < tofree && !list_empty(&l3->slabs_free)) {
+ while (nr_freed < tofree && !list_empty(&n->slabs_free)) {
- spin_lock_irq(&l3->list_lock);
- p = l3->slabs_free.prev;
- if (p == &l3->slabs_free) {
- spin_unlock_irq(&l3->list_lock);
+ spin_lock_irq(&n->list_lock);
+ p = n->slabs_free.prev;
+ if (p == &n->slabs_free) {
+ spin_unlock_irq(&n->list_lock);
goto out;
}
- slabp = list_entry(p, struct slab, list);
+ page = list_entry(p, struct page, lru);
#if DEBUG
- BUG_ON(slabp->inuse);
+ BUG_ON(page->active);
#endif
- list_del(&slabp->list);
+ list_del(&page->lru);
/*
* Safe to drop the lock. The slab is no longer linked
* to the cache.
*/
- l3->free_objects -= cache->num;
- spin_unlock_irq(&l3->list_lock);
- slab_destroy(cache, slabp);
+ n->free_objects -= cache->num;
+ spin_unlock_irq(&n->list_lock);
+ slab_destroy(cache, page);
nr_freed++;
}
out:
return nr_freed;
}
-/* Called with cache_chain_mutex held to protect against cpu hotplug */
-static int __cache_shrink(struct kmem_cache *cachep)
+int __kmem_cache_shrink(struct kmem_cache *cachep)
{
int ret = 0, i = 0;
- struct kmem_list3 *l3;
+ struct kmem_cache_node *n;
drain_cpu_caches(cachep);
check_irq_on();
for_each_online_node(i) {
- l3 = cachep->nodelists[i];
- if (!l3)
+ n = cachep->node[i];
+ if (!n)
continue;
- drain_freelist(cachep, l3, l3->free_objects);
+ drain_freelist(cachep, n, slabs_tofree(cachep, n));
- ret += !list_empty(&l3->slabs_full) ||
- !list_empty(&l3->slabs_partial);
+ ret += !list_empty(&n->slabs_full) ||
+ !list_empty(&n->slabs_partial);
}
return (ret ? 1 : 0);
}
-/**
- * kmem_cache_shrink - Shrink a cache.
- * @cachep: The cache to shrink.
- *
- * Releases as many slabs as possible for a cache.
- * To help debugging, a zero exit status indicates all slabs were released.
- */
-int kmem_cache_shrink(struct kmem_cache *cachep)
+int __kmem_cache_shutdown(struct kmem_cache *cachep)
{
- int ret;
- BUG_ON(!cachep || in_interrupt());
+ int i;
+ struct kmem_cache_node *n;
+ int rc = __kmem_cache_shrink(cachep);
- get_online_cpus();
- mutex_lock(&cache_chain_mutex);
- ret = __cache_shrink(cachep);
- mutex_unlock(&cache_chain_mutex);
- put_online_cpus();
- return ret;
-}
-EXPORT_SYMBOL(kmem_cache_shrink);
+ if (rc)
+ return rc;
-/**
- * kmem_cache_destroy - delete a cache
- * @cachep: the cache to destroy
- *
- * Remove a &struct kmem_cache object from the slab cache.
- *
- * It is expected this function will be called by a module when it is
- * unloaded. This will remove the cache completely, and avoid a duplicate
- * cache being allocated each time a module is loaded and unloaded, if the
- * module doesn't have persistent in-kernel storage across loads and unloads.
- *
- * The cache must be empty before calling this function.
- *
- * The caller must guarantee that noone will allocate memory from the cache
- * during the kmem_cache_destroy().
- */
-void kmem_cache_destroy(struct kmem_cache *cachep)
-{
- BUG_ON(!cachep || in_interrupt());
+ for_each_online_cpu(i)
+ kfree(cachep->array[i]);
- /* Find the cache in the chain of caches. */
- get_online_cpus();
- mutex_lock(&cache_chain_mutex);
- /*
- * the chain is never empty, cache_cache is never destroyed
- */
- list_del(&cachep->next);
- if (__cache_shrink(cachep)) {
- slab_error(cachep, "Can't free all objects");
- list_add(&cachep->next, &cache_chain);
- mutex_unlock(&cache_chain_mutex);
- put_online_cpus();
- return;
+ /* NUMA: free the node structures */
+ for_each_online_node(i) {
+ n = cachep->node[i];
+ if (n) {
+ kfree(n->shared);
+ free_alien_cache(n->alien);
+ kfree(n);
+ }
}
-
- if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU))
- rcu_barrier();
-
- __kmem_cache_destroy(cachep);
- mutex_unlock(&cache_chain_mutex);
- put_online_cpus();
+ return 0;
}
-EXPORT_SYMBOL(kmem_cache_destroy);
/*
* Get the memory for a slab management obj.
- * For a slab cache when the slab descriptor is off-slab, slab descriptors
- * always come from malloc_sizes caches. The slab descriptor cannot
- * come from the same cache which is getting created because,
- * when we are searching for an appropriate cache for these
- * descriptors in kmem_cache_create, we search through the malloc_sizes array.
- * If we are creating a malloc_sizes cache here it would not be visible to
- * kmem_find_general_cachep till the initialization is complete.
- * Hence we cannot have slabp_cache same as the original cache.
+ *
+ * For a slab cache when the slab descriptor is off-slab, the
+ * slab descriptor can't come from the same cache which is being created,
+ * Because if it is the case, that means we defer the creation of
+ * the kmalloc_{dma,}_cache of size sizeof(slab descriptor) to this point.
+ * And we eventually call down to __kmem_cache_create(), which
+ * in turn looks up in the kmalloc_{dma,}_caches for the disired-size one.
+ * This is a "chicken-and-egg" problem.
+ *
+ * So the off-slab slab descriptor shall come from the kmalloc_{dma,}_caches,
+ * which are all initialized during kmem_cache_init().
*/
-static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp,
- int colour_off, gfp_t local_flags,
- int nodeid)
+static void *alloc_slabmgmt(struct kmem_cache *cachep,
+ struct page *page, int colour_off,
+ gfp_t local_flags, int nodeid)
{
- struct slab *slabp;
+ void *freelist;
+ void *addr = page_address(page);
if (OFF_SLAB(cachep)) {
/* Slab management obj is off-slab. */
- slabp = kmem_cache_alloc_node(cachep->slabp_cache,
+ freelist = kmem_cache_alloc_node(cachep->freelist_cache,
local_flags, nodeid);
- /*
- * If the first object in the slab is leaked (it's allocated
- * but no one has a reference to it), we want to make sure
- * kmemleak does not treat the ->s_mem pointer as a reference
- * to the object. Otherwise we will not report the leak.
- */
- kmemleak_scan_area(&slabp->list, sizeof(struct list_head),
- local_flags);
- if (!slabp)
+ if (!freelist)
return NULL;
} else {
- slabp = objp + colour_off;
- colour_off += cachep->slab_size;
+ freelist = addr + colour_off;
+ colour_off += cachep->freelist_size;
}
- slabp->inuse = 0;
- slabp->colouroff = colour_off;
- slabp->s_mem = objp + colour_off;
- slabp->nodeid = nodeid;
- slabp->free = 0;
- return slabp;
+ page->active = 0;
+ page->s_mem = addr + colour_off;
+ return freelist;
+}
+
+static inline freelist_idx_t get_free_obj(struct page *page, unsigned int idx)
+{
+ return ((freelist_idx_t *)page->freelist)[idx];
}
-static inline kmem_bufctl_t *slab_bufctl(struct slab *slabp)
+static inline void set_free_obj(struct page *page,
+ unsigned int idx, freelist_idx_t val)
{
- return (kmem_bufctl_t *) (slabp + 1);
+ ((freelist_idx_t *)(page->freelist))[idx] = val;
}
static void cache_init_objs(struct kmem_cache *cachep,
- struct slab *slabp)
+ struct page *page)
{
int i;
for (i = 0; i < cachep->num; i++) {
- void *objp = index_to_obj(cachep, slabp, i);
+ void *objp = index_to_obj(cachep, page, i);
#if DEBUG
/* need to poison the objs? */
if (cachep->flags & SLAB_POISON)
@@ -2718,88 +2658,76 @@ static void cache_init_objs(struct kmem_cache *cachep,
slab_error(cachep, "constructor overwrote the"
" start of an object");
}
- if ((cachep->buffer_size % PAGE_SIZE) == 0 &&
+ if ((cachep->size % PAGE_SIZE) == 0 &&
OFF_SLAB(cachep) && cachep->flags & SLAB_POISON)
kernel_map_pages(virt_to_page(objp),
- cachep->buffer_size / PAGE_SIZE, 0);
+ cachep->size / PAGE_SIZE, 0);
#else
if (cachep->ctor)
cachep->ctor(objp);
#endif
- slab_bufctl(slabp)[i] = i + 1;
+ set_obj_status(page, i, OBJECT_FREE);
+ set_free_obj(page, i, i);
}
- slab_bufctl(slabp)[i - 1] = BUFCTL_END;
}
static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags)
{
if (CONFIG_ZONE_DMA_FLAG) {
if (flags & GFP_DMA)
- BUG_ON(!(cachep->gfpflags & GFP_DMA));
+ BUG_ON(!(cachep->allocflags & GFP_DMA));
else
- BUG_ON(cachep->gfpflags & GFP_DMA);
+ BUG_ON(cachep->allocflags & GFP_DMA);
}
}
-static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slabp,
+static void *slab_get_obj(struct kmem_cache *cachep, struct page *page,
int nodeid)
{
- void *objp = index_to_obj(cachep, slabp, slabp->free);
- kmem_bufctl_t next;
+ void *objp;
- slabp->inuse++;
- next = slab_bufctl(slabp)[slabp->free];
+ objp = index_to_obj(cachep, page, get_free_obj(page, page->active));
+ page->active++;
#if DEBUG
- slab_bufctl(slabp)[slabp->free] = BUFCTL_FREE;
- WARN_ON(slabp->nodeid != nodeid);
+ WARN_ON(page_to_nid(virt_to_page(objp)) != nodeid);
#endif
- slabp->free = next;
return objp;
}
-static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp,
+static void slab_put_obj(struct kmem_cache *cachep, struct page *page,
void *objp, int nodeid)
{
- unsigned int objnr = obj_to_index(cachep, slabp, objp);
-
+ unsigned int objnr = obj_to_index(cachep, page, objp);
#if DEBUG
+ unsigned int i;
+
/* Verify that the slab belongs to the intended node */
- WARN_ON(slabp->nodeid != nodeid);
+ WARN_ON(page_to_nid(virt_to_page(objp)) != nodeid);
- if (slab_bufctl(slabp)[objnr] + 1 <= SLAB_LIMIT + 1) {
- printk(KERN_ERR "slab: double free detected in cache "
- "'%s', objp %p\n", cachep->name, objp);
- BUG();
+ /* Verify double free bug */
+ for (i = page->active; i < cachep->num; i++) {
+ if (get_free_obj(page, i) == objnr) {
+ printk(KERN_ERR "slab: double free detected in cache "
+ "'%s', objp %p\n", cachep->name, objp);
+ BUG();
+ }
}
#endif
- slab_bufctl(slabp)[objnr] = slabp->free;
- slabp->free = objnr;
- slabp->inuse--;
+ page->active--;
+ set_free_obj(page, page->active, objnr);
}
/*
* Map pages beginning at addr to the given cache and slab. This is required
* for the slab allocator to be able to lookup the cache and slab of a
- * virtual address for kfree, ksize, kmem_ptr_validate, and slab debugging.
+ * virtual address for kfree, ksize, and slab debugging.
*/
-static void slab_map_pages(struct kmem_cache *cache, struct slab *slab,
- void *addr)
+static void slab_map_pages(struct kmem_cache *cache, struct page *page,
+ void *freelist)
{
- int nr_pages;
- struct page *page;
-
- page = virt_to_page(addr);
-
- nr_pages = 1;
- if (likely(!PageCompound(page)))
- nr_pages <<= cache->gfporder;
-
- do {
- page_set_cache(page, cache);
- page_set_slab(page, slab);
- page++;
- } while (--nr_pages);
+ page->slab_cache = cache;
+ page->freelist = freelist;
}
/*
@@ -2807,12 +2735,12 @@ static void slab_map_pages(struct kmem_cache *cache, struct slab *slab,
* kmem_cache_alloc() when there are no active objs left in a cache.
*/
static int cache_grow(struct kmem_cache *cachep,
- gfp_t flags, int nodeid, void *objp)
+ gfp_t flags, int nodeid, struct page *page)
{
- struct slab *slabp;
+ void *freelist;
size_t offset;
gfp_t local_flags;
- struct kmem_list3 *l3;
+ struct kmem_cache_node *n;
/*
* Be lazy and only check for valid flags here, keeping it out of the
@@ -2821,17 +2749,17 @@ static int cache_grow(struct kmem_cache *cachep,
BUG_ON(flags & GFP_SLAB_BUG_MASK);
local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);
- /* Take the l3 list lock to change the colour_next on this node */
+ /* Take the node list lock to change the colour_next on this node */
check_irq_off();
- l3 = cachep->nodelists[nodeid];
- spin_lock(&l3->list_lock);
+ n = cachep->node[nodeid];
+ spin_lock(&n->list_lock);
/* Get colour for the slab, and cal the next value. */
- offset = l3->colour_next;
- l3->colour_next++;
- if (l3->colour_next >= cachep->colour)
- l3->colour_next = 0;
- spin_unlock(&l3->list_lock);
+ offset = n->colour_next;
+ n->colour_next++;
+ if (n->colour_next >= cachep->colour)
+ n->colour_next = 0;
+ spin_unlock(&n->list_lock);
offset *= cachep->colour_off;
@@ -2850,34 +2778,34 @@ static int cache_grow(struct kmem_cache *cachep,
* Get mem for the objs. Attempt to allocate a physical page from
* 'nodeid'.
*/
- if (!objp)
- objp = kmem_getpages(cachep, local_flags, nodeid);
- if (!objp)
+ if (!page)
+ page = kmem_getpages(cachep, local_flags, nodeid);
+ if (!page)
goto failed;
/* Get slab management. */
- slabp = alloc_slabmgmt(cachep, objp, offset,
+ freelist = alloc_slabmgmt(cachep, page, offset,
local_flags & ~GFP_CONSTRAINT_MASK, nodeid);
- if (!slabp)
+ if (!freelist)
goto opps1;
- slab_map_pages(cachep, slabp, objp);
+ slab_map_pages(cachep, page, freelist);
- cache_init_objs(cachep, slabp);
+ cache_init_objs(cachep, page);
if (local_flags & __GFP_WAIT)
local_irq_disable();
check_irq_off();
- spin_lock(&l3->list_lock);
+ spin_lock(&n->list_lock);
/* Make slab active. */
- list_add_tail(&slabp->list, &(l3->slabs_free));
+ list_add_tail(&page->lru, &(n->slabs_free));
STATS_INC_GROWN(cachep);
- l3->free_objects += cachep->num;
- spin_unlock(&l3->list_lock);
+ n->free_objects += cachep->num;
+ spin_unlock(&n->list_lock);
return 1;
opps1:
- kmem_freepages(cachep, objp);
+ kmem_freepages(cachep, page);
failed:
if (local_flags & __GFP_WAIT)
local_irq_disable();
@@ -2923,11 +2851,10 @@ static inline void verify_redzone_free(struct kmem_cache *cache, void *obj)
}
static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
- void *caller)
+ unsigned long caller)
{
- struct page *page;
unsigned int objnr;
- struct slab *slabp;
+ struct page *page;
BUG_ON(virt_to_cache(objp) != cachep);
@@ -2935,30 +2862,26 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
kfree_debugcheck(objp);
page = virt_to_head_page(objp);
- slabp = page_get_slab(page);
-
if (cachep->flags & SLAB_RED_ZONE) {
verify_redzone_free(cachep, objp);
*dbg_redzone1(cachep, objp) = RED_INACTIVE;
*dbg_redzone2(cachep, objp) = RED_INACTIVE;
}
if (cachep->flags & SLAB_STORE_USER)
- *dbg_userword(cachep, objp) = caller;
+ *dbg_userword(cachep, objp) = (void *)caller;
- objnr = obj_to_index(cachep, slabp, objp);
+ objnr = obj_to_index(cachep, page, objp);
BUG_ON(objnr >= cachep->num);
- BUG_ON(objp != index_to_obj(cachep, slabp, objnr));
+ BUG_ON(objp != index_to_obj(cachep, page, objnr));
-#ifdef CONFIG_DEBUG_SLAB_LEAK
- slab_bufctl(slabp)[objnr] = BUFCTL_FREE;
-#endif
+ set_obj_status(page, objnr, OBJECT_FREE);
if (cachep->flags & SLAB_POISON) {
#ifdef CONFIG_DEBUG_PAGEALLOC
- if ((cachep->buffer_size % PAGE_SIZE)==0 && OFF_SLAB(cachep)) {
- store_stackinfo(cachep, objp, (unsigned long)caller);
+ if ((cachep->size % PAGE_SIZE)==0 && OFF_SLAB(cachep)) {
+ store_stackinfo(cachep, objp, caller);
kernel_map_pages(virt_to_page(objp),
- cachep->buffer_size / PAGE_SIZE, 0);
+ cachep->size / PAGE_SIZE, 0);
} else {
poison_obj(cachep, objp, POISON_FREE);
}
@@ -2969,49 +2892,24 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
return objp;
}
-static void check_slabp(struct kmem_cache *cachep, struct slab *slabp)
-{
- kmem_bufctl_t i;
- int entries = 0;
-
- /* Check slab's freelist to see if this obj is there. */
- for (i = slabp->free; i != BUFCTL_END; i = slab_bufctl(slabp)[i]) {
- entries++;
- if (entries > cachep->num || i >= cachep->num)
- goto bad;
- }
- if (entries != cachep->num - slabp->inuse) {
-bad:
- printk(KERN_ERR "slab: Internal list corruption detected in "
- "cache '%s'(%d), slabp %p(%d). Hexdump:\n",
- cachep->name, cachep->num, slabp, slabp->inuse);
- for (i = 0;
- i < sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t);
- i++) {
- if (i % 16 == 0)
- printk("\n%03x:", i);
- printk(" %02x", ((unsigned char *)slabp)[i]);
- }
- printk("\n");
- BUG();
- }
-}
#else
#define kfree_debugcheck(x) do { } while(0)
#define cache_free_debugcheck(x,objp,z) (objp)
-#define check_slabp(x,y) do { } while(0)
#endif
-static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags)
+static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags,
+ bool force_refill)
{
int batchcount;
- struct kmem_list3 *l3;
+ struct kmem_cache_node *n;
struct array_cache *ac;
int node;
-retry:
check_irq_off();
node = numa_mem_id();
+ if (unlikely(force_refill))
+ goto force_grow;
+retry:
ac = cpu_cache_get(cachep);
batchcount = ac->batchcount;
if (!ac->touched && batchcount > BATCHREFILL_LIMIT) {
@@ -3022,31 +2920,30 @@ retry:
*/
batchcount = BATCHREFILL_LIMIT;
}
- l3 = cachep->nodelists[node];
+ n = cachep->node[node];
- BUG_ON(ac->avail > 0 || !l3);
- spin_lock(&l3->list_lock);
+ BUG_ON(ac->avail > 0 || !n);
+ spin_lock(&n->list_lock);
/* See if we can refill from the shared array */
- if (l3->shared && transfer_objects(ac, l3->shared, batchcount)) {
- l3->shared->touched = 1;
+ if (n->shared && transfer_objects(ac, n->shared, batchcount)) {
+ n->shared->touched = 1;
goto alloc_done;
}
while (batchcount > 0) {
struct list_head *entry;
- struct slab *slabp;
+ struct page *page;
/* Get slab alloc is to come from. */
- entry = l3->slabs_partial.next;
- if (entry == &l3->slabs_partial) {
- l3->free_touched = 1;
- entry = l3->slabs_free.next;
- if (entry == &l3->slabs_free)
+ entry = n->slabs_partial.next;
+ if (entry == &n->slabs_partial) {
+ n->free_touched = 1;
+ entry = n->slabs_free.next;
+ if (entry == &n->slabs_free)
goto must_grow;
}
- slabp = list_entry(entry, struct slab, list);
- check_slabp(cachep, slabp);
+ page = list_entry(entry, struct page, lru);
check_spinlock_acquired(cachep);
/*
@@ -3054,45 +2951,49 @@ retry:
* there must be at least one object available for
* allocation.
*/
- BUG_ON(slabp->inuse >= cachep->num);
+ BUG_ON(page->active >= cachep->num);
- while (slabp->inuse < cachep->num && batchcount--) {
+ while (page->active < cachep->num && batchcount--) {
STATS_INC_ALLOCED(cachep);
STATS_INC_ACTIVE(cachep);
STATS_SET_HIGH(cachep);
- ac->entry[ac->avail++] = slab_get_obj(cachep, slabp,
- node);
+ ac_put_obj(cachep, ac, slab_get_obj(cachep, page,
+ node));
}
- check_slabp(cachep, slabp);
/* move slabp to correct slabp list: */
- list_del(&slabp->list);
- if (slabp->free == BUFCTL_END)
- list_add(&slabp->list, &l3->slabs_full);
+ list_del(&page->lru);
+ if (page->active == cachep->num)
+ list_add(&page->lru, &n->slabs_full);
else
- list_add(&slabp->list, &l3->slabs_partial);
+ list_add(&page->lru, &n->slabs_partial);
}
must_grow:
- l3->free_objects -= ac->avail;
+ n->free_objects -= ac->avail;
alloc_done:
- spin_unlock(&l3->list_lock);
+ spin_unlock(&n->list_lock);
if (unlikely(!ac->avail)) {
int x;
+force_grow:
x = cache_grow(cachep, flags | GFP_THISNODE, node, NULL);
/* cache_grow can reenable interrupts, then ac could change. */
ac = cpu_cache_get(cachep);
- if (!x && ac->avail == 0) /* no objects in sight? abort */
+ node = numa_mem_id();
+
+ /* no objects in sight? abort */
+ if (!x && (ac->avail == 0 || force_refill))
return NULL;
if (!ac->avail) /* objects refilled by interrupt? */
goto retry;
}
ac->touched = 1;
- return ac->entry[--ac->avail];
+
+ return ac_get_obj(cachep, ac, flags, force_refill);
}
static inline void cache_alloc_debugcheck_before(struct kmem_cache *cachep,
@@ -3106,15 +3007,17 @@ static inline void cache_alloc_debugcheck_before(struct kmem_cache *cachep,
#if DEBUG
static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
- gfp_t flags, void *objp, void *caller)
+ gfp_t flags, void *objp, unsigned long caller)
{
+ struct page *page;
+
if (!objp)
return objp;
if (cachep->flags & SLAB_POISON) {
#ifdef CONFIG_DEBUG_PAGEALLOC
- if ((cachep->buffer_size % PAGE_SIZE) == 0 && OFF_SLAB(cachep))
+ if ((cachep->size % PAGE_SIZE) == 0 && OFF_SLAB(cachep))
kernel_map_pages(virt_to_page(objp),
- cachep->buffer_size / PAGE_SIZE, 1);
+ cachep->size / PAGE_SIZE, 1);
else
check_poison_obj(cachep, objp);
#else
@@ -3123,7 +3026,7 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
poison_obj(cachep, objp, POISON_INUSE);
}
if (cachep->flags & SLAB_STORE_USER)
- *dbg_userword(cachep, objp) = caller;
+ *dbg_userword(cachep, objp) = (void *)caller;
if (cachep->flags & SLAB_RED_ZONE) {
if (*dbg_redzone1(cachep, objp) != RED_INACTIVE ||
@@ -3138,25 +3041,17 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
*dbg_redzone1(cachep, objp) = RED_ACTIVE;
*dbg_redzone2(cachep, objp) = RED_ACTIVE;
}
-#ifdef CONFIG_DEBUG_SLAB_LEAK
- {
- struct slab *slabp;
- unsigned objnr;
- slabp = page_get_slab(virt_to_head_page(objp));
- objnr = (unsigned)(objp - slabp->s_mem) / cachep->buffer_size;
- slab_bufctl(slabp)[objnr] = BUFCTL_ACTIVE;
- }
-#endif
+ page = virt_to_head_page(objp);
+ set_obj_status(page, obj_to_index(cachep, page, objp), OBJECT_ACTIVE);
objp += obj_offset(cachep);
if (cachep->ctor && cachep->flags & SLAB_POISON)
cachep->ctor(objp);
-#if ARCH_SLAB_MINALIGN
- if ((u32)objp & (ARCH_SLAB_MINALIGN-1)) {
+ if (ARCH_SLAB_MINALIGN &&
+ ((unsigned long)objp & (ARCH_SLAB_MINALIGN-1))) {
printk(KERN_ERR "0x%p: not aligned to ARCH_SLAB_MINALIGN=%d\n",
- objp, ARCH_SLAB_MINALIGN);
+ objp, (int)ARCH_SLAB_MINALIGN);
}
-#endif
return objp;
}
#else
@@ -3165,33 +3060,45 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
static bool slab_should_failslab(struct kmem_cache *cachep, gfp_t flags)
{
- if (cachep == &cache_cache)
+ if (cachep == kmem_cache)
return false;
- return should_failslab(obj_size(cachep), flags, cachep->flags);
+ return should_failslab(cachep->object_size, flags, cachep->flags);
}
static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
{
void *objp;
struct array_cache *ac;
+ bool force_refill = false;
check_irq_off();
ac = cpu_cache_get(cachep);
if (likely(ac->avail)) {
- STATS_INC_ALLOCHIT(cachep);
ac->touched = 1;
- objp = ac->entry[--ac->avail];
- } else {
- STATS_INC_ALLOCMISS(cachep);
- objp = cache_alloc_refill(cachep, flags);
+ objp = ac_get_obj(cachep, ac, flags, false);
+
/*
- * the 'ac' may be updated by cache_alloc_refill(),
- * and kmemleak_erase() requires its correct value.
+ * Allow for the possibility all avail objects are not allowed
+ * by the current flags
*/
- ac = cpu_cache_get(cachep);
+ if (objp) {
+ STATS_INC_ALLOCHIT(cachep);
+ goto out;
+ }
+ force_refill = true;
}
+
+ STATS_INC_ALLOCMISS(cachep);
+ objp = cache_alloc_refill(cachep, flags, force_refill);
+ /*
+ * the 'ac' may be updated by cache_alloc_refill(),
+ * and kmemleak_erase() requires its correct value.
+ */
+ ac = cpu_cache_get(cachep);
+
+out:
/*
* To avoid a false negative, if an object that is in one of the
* per-CPU caches is leaked, we need to make sure kmemleak doesn't
@@ -3204,7 +3111,7 @@ static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
#ifdef CONFIG_NUMA
/*
- * Try allocating on another node if PF_SPREAD_SLAB|PF_MEMPOLICY.
+ * Try allocating on another node if PF_SPREAD_SLAB is a mempolicy is set.
*
* If we are in_interrupt, then process context, including cpusets and
* mempolicy, may not apply and should not be used for allocation policy.
@@ -3216,12 +3123,10 @@ static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags)
if (in_interrupt() || (flags & __GFP_THISNODE))
return NULL;
nid_alloc = nid_here = numa_mem_id();
- get_mems_allowed();
if (cpuset_do_slab_mem_spread() && (cachep->flags & SLAB_MEM_SPREAD))
nid_alloc = cpuset_slab_spread_node();
else if (current->mempolicy)
- nid_alloc = slab_node(current->mempolicy);
- put_mems_allowed();
+ nid_alloc = mempolicy_slab_node();
if (nid_alloc != nid_here)
return ____cache_alloc_node(cachep, flags, nid_alloc);
return NULL;
@@ -3230,7 +3135,7 @@ static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags)
/*
* Fallback function if there was no memory available and no objects on a
* certain node and fall back is permitted. First we scan all the
- * available nodelists for available objects. If that fails then we
+ * available node for available objects. If that fails then we
* perform an allocation without specifying a node. This allows the page
* allocator to do its reclaim / fallback magic. We then insert the
* slab into the proper nodelist and then allocate from it.
@@ -3244,14 +3149,17 @@ static void *fallback_alloc(struct kmem_cache *cache, gfp_t flags)
enum zone_type high_zoneidx = gfp_zone(flags);
void *obj = NULL;
int nid;
+ unsigned int cpuset_mems_cookie;
if (flags & __GFP_THISNODE)
return NULL;
- get_mems_allowed();
- zonelist = node_zonelist(slab_node(current->mempolicy), flags);
local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);
+retry_cpuset:
+ cpuset_mems_cookie = read_mems_allowed_begin();
+ zonelist = node_zonelist(mempolicy_slab_node(), flags);
+
retry:
/*
* Look through allowed nodes for objects available
@@ -3261,8 +3169,8 @@ retry:
nid = zone_to_nid(zone);
if (cpuset_zone_allowed_hardwall(zone, flags) &&
- cache->nodelists[nid] &&
- cache->nodelists[nid]->free_objects) {
+ cache->node[nid] &&
+ cache->node[nid]->free_objects) {
obj = ____cache_alloc_node(cache,
flags | GFP_THISNODE, nid);
if (obj)
@@ -3277,18 +3185,20 @@ retry:
* We may trigger various forms of reclaim on the allowed
* set and go into memory reserves if necessary.
*/
+ struct page *page;
+
if (local_flags & __GFP_WAIT)
local_irq_enable();
kmem_flagcheck(cache, flags);
- obj = kmem_getpages(cache, local_flags, numa_mem_id());
+ page = kmem_getpages(cache, local_flags, numa_mem_id());
if (local_flags & __GFP_WAIT)
local_irq_disable();
- if (obj) {
+ if (page) {
/*
* Insert into the appropriate per node queues
*/
- nid = page_to_nid(virt_to_page(obj));
- if (cache_grow(cache, flags, nid, obj)) {
+ nid = page_to_nid(page);
+ if (cache_grow(cache, flags, nid, page)) {
obj = ____cache_alloc_node(cache,
flags | GFP_THISNODE, nid);
if (!obj)
@@ -3304,7 +3214,9 @@ retry:
}
}
}
- put_mems_allowed();
+
+ if (unlikely(!obj && read_mems_allowed_retry(cpuset_mems_cookie)))
+ goto retry_cpuset;
return obj;
}
@@ -3315,51 +3227,50 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
int nodeid)
{
struct list_head *entry;
- struct slab *slabp;
- struct kmem_list3 *l3;
+ struct page *page;
+ struct kmem_cache_node *n;
void *obj;
int x;
- l3 = cachep->nodelists[nodeid];
- BUG_ON(!l3);
+ VM_BUG_ON(nodeid > num_online_nodes());
+ n = cachep->node[nodeid];
+ BUG_ON(!n);
retry:
check_irq_off();
- spin_lock(&l3->list_lock);
- entry = l3->slabs_partial.next;
- if (entry == &l3->slabs_partial) {
- l3->free_touched = 1;
- entry = l3->slabs_free.next;
- if (entry == &l3->slabs_free)
+ spin_lock(&n->list_lock);
+ entry = n->slabs_partial.next;
+ if (entry == &n->slabs_partial) {
+ n->free_touched = 1;
+ entry = n->slabs_free.next;
+ if (entry == &n->slabs_free)
goto must_grow;
}
- slabp = list_entry(entry, struct slab, list);
+ page = list_entry(entry, struct page, lru);
check_spinlock_acquired_node(cachep, nodeid);
- check_slabp(cachep, slabp);
STATS_INC_NODEALLOCS(cachep);
STATS_INC_ACTIVE(cachep);
STATS_SET_HIGH(cachep);
- BUG_ON(slabp->inuse == cachep->num);
+ BUG_ON(page->active == cachep->num);
- obj = slab_get_obj(cachep, slabp, nodeid);
- check_slabp(cachep, slabp);
- l3->free_objects--;
+ obj = slab_get_obj(cachep, page, nodeid);
+ n->free_objects--;
/* move slabp to correct slabp list: */
- list_del(&slabp->list);
+ list_del(&page->lru);
- if (slabp->free == BUFCTL_END)
- list_add(&slabp->list, &l3->slabs_full);
+ if (page->active == cachep->num)
+ list_add(&page->lru, &n->slabs_full);
else
- list_add(&slabp->list, &l3->slabs_partial);
+ list_add(&page->lru, &n->slabs_partial);
- spin_unlock(&l3->list_lock);
+ spin_unlock(&n->list_lock);
goto done;
must_grow:
- spin_unlock(&l3->list_lock);
+ spin_unlock(&n->list_lock);
x = cache_grow(cachep, flags | GFP_THISNODE, nodeid, NULL);
if (x)
goto retry;
@@ -3370,21 +3281,9 @@ done:
return obj;
}
-/**
- * kmem_cache_alloc_node - Allocate an object on the specified node
- * @cachep: The cache to allocate from.
- * @flags: See kmalloc().
- * @nodeid: node number of the target node.
- * @caller: return address of caller, used for debug information
- *
- * Identical to kmem_cache_alloc but it will allocate memory on the given
- * node, which can improve the performance for cpu bound structures.
- *
- * Fallback to other node is possible if __GFP_THISNODE is not set.
- */
static __always_inline void *
-__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
- void *caller)
+slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
+ unsigned long caller)
{
unsigned long save_flags;
void *ptr;
@@ -3397,13 +3296,15 @@ __cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
if (slab_should_failslab(cachep, flags))
return NULL;
+ cachep = memcg_kmem_get_cache(cachep, flags);
+
cache_alloc_debugcheck_before(cachep, flags);
local_irq_save(save_flags);
- if (nodeid == -1)
+ if (nodeid == NUMA_NO_NODE)
nodeid = slab_node;
- if (unlikely(!cachep->nodelists[nodeid])) {
+ if (unlikely(!cachep->node[nodeid])) {
/* Node not bootstrapped yet */
ptr = fallback_alloc(cachep, flags);
goto out;
@@ -3425,14 +3326,14 @@ __cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
out:
local_irq_restore(save_flags);
ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller);
- kmemleak_alloc_recursive(ptr, obj_size(cachep), 1, cachep->flags,
+ kmemleak_alloc_recursive(ptr, cachep->object_size, 1, cachep->flags,
flags);
- if (likely(ptr))
- kmemcheck_slab_alloc(cachep, flags, ptr, obj_size(cachep));
-
- if (unlikely((flags & __GFP_ZERO) && ptr))
- memset(ptr, 0, obj_size(cachep));
+ if (likely(ptr)) {
+ kmemcheck_slab_alloc(cachep, flags, ptr, cachep->object_size);
+ if (unlikely(flags & __GFP_ZERO))
+ memset(ptr, 0, cachep->object_size);
+ }
return ptr;
}
@@ -3442,7 +3343,7 @@ __do_cache_alloc(struct kmem_cache *cache, gfp_t flags)
{
void *objp;
- if (unlikely(current->flags & (PF_SPREAD_SLAB | PF_MEMPOLICY))) {
+ if (current->mempolicy || unlikely(current->flags & PF_SPREAD_SLAB)) {
objp = alternate_node_alloc(cache, flags);
if (objp)
goto out;
@@ -3470,7 +3371,7 @@ __do_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
#endif /* CONFIG_NUMA */
static __always_inline void *
-__cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller)
+slab_alloc(struct kmem_cache *cachep, gfp_t flags, unsigned long caller)
{
unsigned long save_flags;
void *objp;
@@ -3482,67 +3383,70 @@ __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller)
if (slab_should_failslab(cachep, flags))
return NULL;
+ cachep = memcg_kmem_get_cache(cachep, flags);
+
cache_alloc_debugcheck_before(cachep, flags);
local_irq_save(save_flags);
objp = __do_cache_alloc(cachep, flags);
local_irq_restore(save_flags);
objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller);
- kmemleak_alloc_recursive(objp, obj_size(cachep), 1, cachep->flags,
+ kmemleak_alloc_recursive(objp, cachep->object_size, 1, cachep->flags,
flags);
prefetchw(objp);
- if (likely(objp))
- kmemcheck_slab_alloc(cachep, flags, objp, obj_size(cachep));
-
- if (unlikely((flags & __GFP_ZERO) && objp))
- memset(objp, 0, obj_size(cachep));
+ if (likely(objp)) {
+ kmemcheck_slab_alloc(cachep, flags, objp, cachep->object_size);
+ if (unlikely(flags & __GFP_ZERO))
+ memset(objp, 0, cachep->object_size);
+ }
return objp;
}
/*
- * Caller needs to acquire correct kmem_list's list_lock
+ * Caller needs to acquire correct kmem_cache_node's list_lock
*/
static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
int node)
{
int i;
- struct kmem_list3 *l3;
+ struct kmem_cache_node *n;
for (i = 0; i < nr_objects; i++) {
- void *objp = objpp[i];
- struct slab *slabp;
+ void *objp;
+ struct page *page;
+
+ clear_obj_pfmemalloc(&objpp[i]);
+ objp = objpp[i];
- slabp = virt_to_slab(objp);
- l3 = cachep->nodelists[node];
- list_del(&slabp->list);
+ page = virt_to_head_page(objp);
+ n = cachep->node[node];
+ list_del(&page->lru);
check_spinlock_acquired_node(cachep, node);
- check_slabp(cachep, slabp);
- slab_put_obj(cachep, slabp, objp, node);
+ slab_put_obj(cachep, page, objp, node);
STATS_DEC_ACTIVE(cachep);
- l3->free_objects++;
- check_slabp(cachep, slabp);
+ n->free_objects++;
/* fixup slab chains */
- if (slabp->inuse == 0) {
- if (l3->free_objects > l3->free_limit) {
- l3->free_objects -= cachep->num;
+ if (page->active == 0) {
+ if (n->free_objects > n->free_limit) {
+ n->free_objects -= cachep->num;
/* No need to drop any previously held
* lock here, even if we have a off-slab slab
* descriptor it is guaranteed to come from
* a different cache, refer to comments before
* alloc_slabmgmt.
*/
- slab_destroy(cachep, slabp);
+ slab_destroy(cachep, page);
} else {
- list_add(&slabp->list, &l3->slabs_free);
+ list_add(&page->lru, &n->slabs_free);
}
} else {
/* Unconditionally move a slab to the end of the
* partial list on free - maximum time for the
* other objects to be freed, too.
*/
- list_add_tail(&slabp->list, &l3->slabs_partial);
+ list_add_tail(&page->lru, &n->slabs_partial);
}
}
}
@@ -3550,7 +3454,7 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
{
int batchcount;
- struct kmem_list3 *l3;
+ struct kmem_cache_node *n;
int node = numa_mem_id();
batchcount = ac->batchcount;
@@ -3558,10 +3462,10 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
BUG_ON(!batchcount || batchcount > ac->avail);
#endif
check_irq_off();
- l3 = cachep->nodelists[node];
- spin_lock(&l3->list_lock);
- if (l3->shared) {
- struct array_cache *shared_array = l3->shared;
+ n = cachep->node[node];
+ spin_lock(&n->list_lock);
+ if (n->shared) {
+ struct array_cache *shared_array = n->shared;
int max = shared_array->limit - shared_array->avail;
if (max) {
if (batchcount > max)
@@ -3580,12 +3484,12 @@ free_done:
int i = 0;
struct list_head *p;
- p = l3->slabs_free.next;
- while (p != &(l3->slabs_free)) {
- struct slab *slabp;
+ p = n->slabs_free.next;
+ while (p != &(n->slabs_free)) {
+ struct page *page;
- slabp = list_entry(p, struct slab, list);
- BUG_ON(slabp->inuse);
+ page = list_entry(p, struct page, lru);
+ BUG_ON(page->active);
i++;
p = p->next;
@@ -3593,7 +3497,7 @@ free_done:
STATS_SET_FREEABLE(cachep, i);
}
#endif
- spin_unlock(&l3->list_lock);
+ spin_unlock(&n->list_lock);
ac->avail -= batchcount;
memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail);
}
@@ -3602,15 +3506,16 @@ free_done:
* Release an obj back to its cache. If the obj has a constructed state, it must
* be in this state _before_ it is released. Called with disabled ints.
*/
-static inline void __cache_free(struct kmem_cache *cachep, void *objp)
+static inline void __cache_free(struct kmem_cache *cachep, void *objp,
+ unsigned long caller)
{
struct array_cache *ac = cpu_cache_get(cachep);
check_irq_off();
kmemleak_free_recursive(objp, cachep->flags);
- objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0));
+ objp = cache_free_debugcheck(cachep, objp, caller);
- kmemcheck_slab_free(cachep, objp, obj_size(cachep));
+ kmemcheck_slab_free(cachep, objp, cachep->object_size);
/*
* Skip calling cache_free_alien() when the platform is not numa.
@@ -3624,13 +3529,12 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp)
if (likely(ac->avail < ac->limit)) {
STATS_INC_FREEHIT(cachep);
- ac->entry[ac->avail++] = objp;
- return;
} else {
STATS_INC_FREEMISS(cachep);
cache_flusharray(cachep, ac);
- ac->entry[ac->avail++] = objp;
}
+
+ ac_put_obj(cachep, ac, objp);
}
/**
@@ -3643,61 +3547,48 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp)
*/
void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
{
- void *ret = __cache_alloc(cachep, flags, __builtin_return_address(0));
+ void *ret = slab_alloc(cachep, flags, _RET_IP_);
trace_kmem_cache_alloc(_RET_IP_, ret,
- obj_size(cachep), cachep->buffer_size, flags);
+ cachep->object_size, cachep->size, flags);
return ret;
}
EXPORT_SYMBOL(kmem_cache_alloc);
#ifdef CONFIG_TRACING
-void *kmem_cache_alloc_notrace(struct kmem_cache *cachep, gfp_t flags)
+void *
+kmem_cache_alloc_trace(struct kmem_cache *cachep, gfp_t flags, size_t size)
{
- return __cache_alloc(cachep, flags, __builtin_return_address(0));
+ void *ret;
+
+ ret = slab_alloc(cachep, flags, _RET_IP_);
+
+ trace_kmalloc(_RET_IP_, ret,
+ size, cachep->size, flags);
+ return ret;
}
-EXPORT_SYMBOL(kmem_cache_alloc_notrace);
+EXPORT_SYMBOL(kmem_cache_alloc_trace);
#endif
+#ifdef CONFIG_NUMA
/**
- * kmem_ptr_validate - check if an untrusted pointer might be a slab entry.
- * @cachep: the cache we're checking against
- * @ptr: pointer to validate
+ * kmem_cache_alloc_node - Allocate an object on the specified node
+ * @cachep: The cache to allocate from.
+ * @flags: See kmalloc().
+ * @nodeid: node number of the target node.
*
- * This verifies that the untrusted pointer looks sane;
- * it is _not_ a guarantee that the pointer is actually
- * part of the slab cache in question, but it at least
- * validates that the pointer can be dereferenced and
- * looks half-way sane.
+ * Identical to kmem_cache_alloc but it will allocate memory on the given
+ * node, which can improve the performance for cpu bound structures.
*
- * Currently only used for dentry validation.
+ * Fallback to other node is possible if __GFP_THISNODE is not set.
*/
-int kmem_ptr_validate(struct kmem_cache *cachep, const void *ptr)
-{
- unsigned long size = cachep->buffer_size;
- struct page *page;
-
- if (unlikely(!kern_ptr_validate(ptr, size)))
- goto out;
- page = virt_to_page(ptr);
- if (unlikely(!PageSlab(page)))
- goto out;
- if (unlikely(page_get_cache(page) != cachep))
- goto out;
- return 1;
-out:
- return 0;
-}
-
-#ifdef CONFIG_NUMA
void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
{
- void *ret = __cache_alloc_node(cachep, flags, nodeid,
- __builtin_return_address(0));
+ void *ret = slab_alloc_node(cachep, flags, nodeid, _RET_IP_);
trace_kmem_cache_alloc_node(_RET_IP_, ret,
- obj_size(cachep), cachep->buffer_size,
+ cachep->object_size, cachep->size,
flags, nodeid);
return ret;
@@ -3705,51 +3596,51 @@ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
EXPORT_SYMBOL(kmem_cache_alloc_node);
#ifdef CONFIG_TRACING
-void *kmem_cache_alloc_node_notrace(struct kmem_cache *cachep,
- gfp_t flags,
- int nodeid)
+void *kmem_cache_alloc_node_trace(struct kmem_cache *cachep,
+ gfp_t flags,
+ int nodeid,
+ size_t size)
{
- return __cache_alloc_node(cachep, flags, nodeid,
- __builtin_return_address(0));
+ void *ret;
+
+ ret = slab_alloc_node(cachep, flags, nodeid, _RET_IP_);
+
+ trace_kmalloc_node(_RET_IP_, ret,
+ size, cachep->size,
+ flags, nodeid);
+ return ret;
}
-EXPORT_SYMBOL(kmem_cache_alloc_node_notrace);
+EXPORT_SYMBOL(kmem_cache_alloc_node_trace);
#endif
static __always_inline void *
-__do_kmalloc_node(size_t size, gfp_t flags, int node, void *caller)
+__do_kmalloc_node(size_t size, gfp_t flags, int node, unsigned long caller)
{
struct kmem_cache *cachep;
- void *ret;
- cachep = kmem_find_general_cachep(size, flags);
+ cachep = kmalloc_slab(size, flags);
if (unlikely(ZERO_OR_NULL_PTR(cachep)))
return cachep;
- ret = kmem_cache_alloc_node_notrace(cachep, flags, node);
-
- trace_kmalloc_node((unsigned long) caller, ret,
- size, cachep->buffer_size, flags, node);
-
- return ret;
+ return kmem_cache_alloc_node_trace(cachep, flags, node, size);
}
#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_TRACING)
void *__kmalloc_node(size_t size, gfp_t flags, int node)
{
- return __do_kmalloc_node(size, flags, node,
- __builtin_return_address(0));
+ return __do_kmalloc_node(size, flags, node, _RET_IP_);
}
EXPORT_SYMBOL(__kmalloc_node);
void *__kmalloc_node_track_caller(size_t size, gfp_t flags,
int node, unsigned long caller)
{
- return __do_kmalloc_node(size, flags, node, (void *)caller);
+ return __do_kmalloc_node(size, flags, node, caller);
}
EXPORT_SYMBOL(__kmalloc_node_track_caller);
#else
void *__kmalloc_node(size_t size, gfp_t flags, int node)
{
- return __do_kmalloc_node(size, flags, node, NULL);
+ return __do_kmalloc_node(size, flags, node, 0);
}
EXPORT_SYMBOL(__kmalloc_node);
#endif /* CONFIG_DEBUG_SLAB || CONFIG_TRACING */
@@ -3762,23 +3653,18 @@ EXPORT_SYMBOL(__kmalloc_node);
* @caller: function caller for debug tracking of the caller
*/
static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
- void *caller)
+ unsigned long caller)
{
struct kmem_cache *cachep;
void *ret;
- /* If you want to save a few bytes .text space: replace
- * __ with kmem_.
- * Then kmalloc uses the uninlined functions instead of the inline
- * functions.
- */
- cachep = __find_general_cachep(size, flags);
+ cachep = kmalloc_slab(size, flags);
if (unlikely(ZERO_OR_NULL_PTR(cachep)))
return cachep;
- ret = __cache_alloc(cachep, flags, caller);
+ ret = slab_alloc(cachep, flags, caller);
- trace_kmalloc((unsigned long) caller, ret,
- size, cachep->buffer_size, flags);
+ trace_kmalloc(caller, ret,
+ size, cachep->size, flags);
return ret;
}
@@ -3787,20 +3673,20 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_TRACING)
void *__kmalloc(size_t size, gfp_t flags)
{
- return __do_kmalloc(size, flags, __builtin_return_address(0));
+ return __do_kmalloc(size, flags, _RET_IP_);
}
EXPORT_SYMBOL(__kmalloc);
void *__kmalloc_track_caller(size_t size, gfp_t flags, unsigned long caller)
{
- return __do_kmalloc(size, flags, (void *)caller);
+ return __do_kmalloc(size, flags, caller);
}
EXPORT_SYMBOL(__kmalloc_track_caller);
#else
void *__kmalloc(size_t size, gfp_t flags)
{
- return __do_kmalloc(size, flags, NULL);
+ return __do_kmalloc(size, flags, 0);
}
EXPORT_SYMBOL(__kmalloc);
#endif
@@ -3816,12 +3702,15 @@ EXPORT_SYMBOL(__kmalloc);
void kmem_cache_free(struct kmem_cache *cachep, void *objp)
{
unsigned long flags;
+ cachep = cache_from_obj(cachep, objp);
+ if (!cachep)
+ return;
local_irq_save(flags);
- debug_check_no_locks_freed(objp, obj_size(cachep));
+ debug_check_no_locks_freed(objp, cachep->object_size);
if (!(cachep->flags & SLAB_DEBUG_OBJECTS))
- debug_check_no_obj_freed(objp, obj_size(cachep));
- __cache_free(cachep, objp);
+ debug_check_no_obj_freed(objp, cachep->object_size);
+ __cache_free(cachep, objp, _RET_IP_);
local_irq_restore(flags);
trace_kmem_cache_free(_RET_IP_, objp);
@@ -3849,32 +3738,21 @@ void kfree(const void *objp)
local_irq_save(flags);
kfree_debugcheck(objp);
c = virt_to_cache(objp);
- debug_check_no_locks_freed(objp, obj_size(c));
- debug_check_no_obj_freed(objp, obj_size(c));
- __cache_free(c, (void *)objp);
+ debug_check_no_locks_freed(objp, c->object_size);
+
+ debug_check_no_obj_freed(objp, c->object_size);
+ __cache_free(c, (void *)objp, _RET_IP_);
local_irq_restore(flags);
}
EXPORT_SYMBOL(kfree);
-unsigned int kmem_cache_size(struct kmem_cache *cachep)
-{
- return obj_size(cachep);
-}
-EXPORT_SYMBOL(kmem_cache_size);
-
-const char *kmem_cache_name(struct kmem_cache *cachep)
-{
- return cachep->name;
-}
-EXPORT_SYMBOL_GPL(kmem_cache_name);
-
/*
- * This initializes kmem_list3 or resizes various caches for all nodes.
+ * This initializes kmem_cache_node or resizes various caches for all nodes.
*/
-static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp)
+static int alloc_kmem_cache_node(struct kmem_cache *cachep, gfp_t gfp)
{
int node;
- struct kmem_list3 *l3;
+ struct kmem_cache_node *n;
struct array_cache *new_shared;
struct array_cache **new_alien = NULL;
@@ -3897,58 +3775,58 @@ static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp)
}
}
- l3 = cachep->nodelists[node];
- if (l3) {
- struct array_cache *shared = l3->shared;
+ n = cachep->node[node];
+ if (n) {
+ struct array_cache *shared = n->shared;
- spin_lock_irq(&l3->list_lock);
+ spin_lock_irq(&n->list_lock);
if (shared)
free_block(cachep, shared->entry,
shared->avail, node);
- l3->shared = new_shared;
- if (!l3->alien) {
- l3->alien = new_alien;
+ n->shared = new_shared;
+ if (!n->alien) {
+ n->alien = new_alien;
new_alien = NULL;
}
- l3->free_limit = (1 + nr_cpus_node(node)) *
+ n->free_limit = (1 + nr_cpus_node(node)) *
cachep->batchcount + cachep->num;
- spin_unlock_irq(&l3->list_lock);
+ spin_unlock_irq(&n->list_lock);
kfree(shared);
free_alien_cache(new_alien);
continue;
}
- l3 = kmalloc_node(sizeof(struct kmem_list3), gfp, node);
- if (!l3) {
+ n = kmalloc_node(sizeof(struct kmem_cache_node), gfp, node);
+ if (!n) {
free_alien_cache(new_alien);
kfree(new_shared);
goto fail;
}
- kmem_list3_init(l3);
- l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +
- ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
- l3->shared = new_shared;
- l3->alien = new_alien;
- l3->free_limit = (1 + nr_cpus_node(node)) *
+ kmem_cache_node_init(n);
+ n->next_reap = jiffies + REAPTIMEOUT_NODE +
+ ((unsigned long)cachep) % REAPTIMEOUT_NODE;
+ n->shared = new_shared;
+ n->alien = new_alien;
+ n->free_limit = (1 + nr_cpus_node(node)) *
cachep->batchcount + cachep->num;
- cachep->nodelists[node] = l3;
+ cachep->node[node] = n;
}
return 0;
fail:
- if (!cachep->next.next) {
+ if (!cachep->list.next) {
/* Cache is not active yet. Roll back what we did */
node--;
while (node >= 0) {
- if (cachep->nodelists[node]) {
- l3 = cachep->nodelists[node];
+ if (cachep->node[node]) {
+ n = cachep->node[node];
- kfree(l3->shared);
- free_alien_cache(l3->alien);
- kfree(l3);
- cachep->nodelists[node] = NULL;
+ kfree(n->shared);
+ free_alien_cache(n->alien);
+ kfree(n);
+ cachep->node[node] = NULL;
}
node--;
}
@@ -3958,7 +3836,7 @@ fail:
struct ccupdate_struct {
struct kmem_cache *cachep;
- struct array_cache *new[NR_CPUS];
+ struct array_cache *new[0];
};
static void do_ccupdate_local(void *info)
@@ -3973,14 +3851,15 @@ static void do_ccupdate_local(void *info)
new->new[smp_processor_id()] = old;
}
-/* Always called with the cache_chain_mutex held */
-static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
+/* Always called with the slab_mutex held */
+static int __do_tune_cpucache(struct kmem_cache *cachep, int limit,
int batchcount, int shared, gfp_t gfp)
{
struct ccupdate_struct *new;
int i;
- new = kzalloc(sizeof(*new), gfp);
+ new = kzalloc(sizeof(*new) + nr_cpu_ids * sizeof(struct array_cache *),
+ gfp);
if (!new)
return -ENOMEM;
@@ -4007,21 +3886,58 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
struct array_cache *ccold = new->new[i];
if (!ccold)
continue;
- spin_lock_irq(&cachep->nodelists[cpu_to_mem(i)]->list_lock);
+ spin_lock_irq(&cachep->node[cpu_to_mem(i)]->list_lock);
free_block(cachep, ccold->entry, ccold->avail, cpu_to_mem(i));
- spin_unlock_irq(&cachep->nodelists[cpu_to_mem(i)]->list_lock);
+ spin_unlock_irq(&cachep->node[cpu_to_mem(i)]->list_lock);
kfree(ccold);
}
kfree(new);
- return alloc_kmemlist(cachep, gfp);
+ return alloc_kmem_cache_node(cachep, gfp);
+}
+
+static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
+ int batchcount, int shared, gfp_t gfp)
+{
+ int ret;
+ struct kmem_cache *c = NULL;
+ int i = 0;
+
+ ret = __do_tune_cpucache(cachep, limit, batchcount, shared, gfp);
+
+ if (slab_state < FULL)
+ return ret;
+
+ if ((ret < 0) || !is_root_cache(cachep))
+ return ret;
+
+ VM_BUG_ON(!mutex_is_locked(&slab_mutex));
+ for_each_memcg_cache_index(i) {
+ c = cache_from_memcg_idx(cachep, i);
+ if (c)
+ /* return value determined by the parent cache only */
+ __do_tune_cpucache(c, limit, batchcount, shared, gfp);
+ }
+
+ return ret;
}
-/* Called with cache_chain_mutex held always */
+/* Called with slab_mutex held always */
static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp)
{
int err;
- int limit, shared;
+ int limit = 0;
+ int shared = 0;
+ int batchcount = 0;
+
+ if (!is_root_cache(cachep)) {
+ struct kmem_cache *root = memcg_root_cache(cachep);
+ limit = root->limit;
+ shared = root->shared;
+ batchcount = root->batchcount;
+ }
+ if (limit && shared && batchcount)
+ goto skip_setup;
/*
* The head array serves three purposes:
* - create a LIFO ordering, i.e. return objects that are cache-warm
@@ -4031,13 +3947,13 @@ static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp)
* The numbers are guessed, we should auto-tune as described by
* Bonwick.
*/
- if (cachep->buffer_size > 131072)
+ if (cachep->size > 131072)
limit = 1;
- else if (cachep->buffer_size > PAGE_SIZE)
+ else if (cachep->size > PAGE_SIZE)
limit = 8;
- else if (cachep->buffer_size > 1024)
+ else if (cachep->size > 1024)
limit = 24;
- else if (cachep->buffer_size > 256)
+ else if (cachep->size > 256)
limit = 54;
else
limit = 120;
@@ -4052,7 +3968,7 @@ static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp)
* to a larger limit. Thus disabled by default.
*/
shared = 0;
- if (cachep->buffer_size <= PAGE_SIZE && num_possible_cpus() > 1)
+ if (cachep->size <= PAGE_SIZE && num_possible_cpus() > 1)
shared = 8;
#if DEBUG
@@ -4063,7 +3979,9 @@ static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp)
if (limit > 32)
limit = 32;
#endif
- err = do_tune_cpucache(cachep, limit, (limit + 1) / 2, shared, gfp);
+ batchcount = (limit + 1) / 2;
+skip_setup:
+ err = do_tune_cpucache(cachep, limit, batchcount, shared, gfp);
if (err)
printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n",
cachep->name, -err);
@@ -4071,11 +3989,11 @@ static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp)
}
/*
- * Drain an array if it contains any elements taking the l3 lock only if
- * necessary. Note that the l3 listlock also protects the array_cache
+ * Drain an array if it contains any elements taking the node lock only if
+ * necessary. Note that the node listlock also protects the array_cache
* if drain_array() is used on the shared array.
*/
-void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
+static void drain_array(struct kmem_cache *cachep, struct kmem_cache_node *n,
struct array_cache *ac, int force, int node)
{
int tofree;
@@ -4085,7 +4003,7 @@ void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
if (ac->touched && !force) {
ac->touched = 0;
} else {
- spin_lock_irq(&l3->list_lock);
+ spin_lock_irq(&n->list_lock);
if (ac->avail) {
tofree = force ? ac->avail : (ac->limit + 4) / 5;
if (tofree > ac->avail)
@@ -4095,7 +4013,7 @@ void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
memmove(ac->entry, &(ac->entry[tofree]),
sizeof(void *) * ac->avail);
}
- spin_unlock_irq(&l3->list_lock);
+ spin_unlock_irq(&n->list_lock);
}
}
@@ -4114,45 +4032,45 @@ void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
static void cache_reap(struct work_struct *w)
{
struct kmem_cache *searchp;
- struct kmem_list3 *l3;
+ struct kmem_cache_node *n;
int node = numa_mem_id();
struct delayed_work *work = to_delayed_work(w);
- if (!mutex_trylock(&cache_chain_mutex))
+ if (!mutex_trylock(&slab_mutex))
/* Give up. Setup the next iteration. */
goto out;
- list_for_each_entry(searchp, &cache_chain, next) {
+ list_for_each_entry(searchp, &slab_caches, list) {
check_irq_on();
/*
- * We only take the l3 lock if absolutely necessary and we
+ * We only take the node lock if absolutely necessary and we
* have established with reasonable certainty that
* we can do some work if the lock was obtained.
*/
- l3 = searchp->nodelists[node];
+ n = searchp->node[node];
- reap_alien(searchp, l3);
+ reap_alien(searchp, n);
- drain_array(searchp, l3, cpu_cache_get(searchp), 0, node);
+ drain_array(searchp, n, cpu_cache_get(searchp), 0, node);
/*
* These are racy checks but it does not matter
* if we skip one check or scan twice.
*/
- if (time_after(l3->next_reap, jiffies))
+ if (time_after(n->next_reap, jiffies))
goto next;
- l3->next_reap = jiffies + REAPTIMEOUT_LIST3;
+ n->next_reap = jiffies + REAPTIMEOUT_NODE;
- drain_array(searchp, l3, l3->shared, 0, node);
+ drain_array(searchp, n, n->shared, 0, node);
- if (l3->free_touched)
- l3->free_touched = 0;
+ if (n->free_touched)
+ n->free_touched = 0;
else {
int freed;
- freed = drain_freelist(searchp, l3, (l3->free_limit +
+ freed = drain_freelist(searchp, n, (n->free_limit +
5 * searchp->num - 1) / (5 * searchp->num));
STATS_ADD_REAPED(searchp, freed);
}
@@ -4160,63 +4078,17 @@ next:
cond_resched();
}
check_irq_on();
- mutex_unlock(&cache_chain_mutex);
+ mutex_unlock(&slab_mutex);
next_reap_node();
out:
/* Set up the next iteration */
- schedule_delayed_work(work, round_jiffies_relative(REAPTIMEOUT_CPUC));
+ schedule_delayed_work(work, round_jiffies_relative(REAPTIMEOUT_AC));
}
#ifdef CONFIG_SLABINFO
-
-static void print_slabinfo_header(struct seq_file *m)
-{
- /*
- * Output format version, so at least we can change it
- * without _too_ many complaints.
- */
-#if STATS
- seq_puts(m, "slabinfo - version: 2.1 (statistics)\n");
-#else
- seq_puts(m, "slabinfo - version: 2.1\n");
-#endif
- seq_puts(m, "# name <active_objs> <num_objs> <objsize> "
- "<objperslab> <pagesperslab>");
- seq_puts(m, " : tunables <limit> <batchcount> <sharedfactor>");
- seq_puts(m, " : slabdata <active_slabs> <num_slabs> <sharedavail>");
-#if STATS
- seq_puts(m, " : globalstat <listallocs> <maxobjs> <grown> <reaped> "
- "<error> <maxfreeable> <nodeallocs> <remotefrees> <alienoverflow>");
- seq_puts(m, " : cpustat <allochit> <allocmiss> <freehit> <freemiss>");
-#endif
- seq_putc(m, '\n');
-}
-
-static void *s_start(struct seq_file *m, loff_t *pos)
-{
- loff_t n = *pos;
-
- mutex_lock(&cache_chain_mutex);
- if (!n)
- print_slabinfo_header(m);
-
- return seq_list_start(&cache_chain, *pos);
-}
-
-static void *s_next(struct seq_file *m, void *p, loff_t *pos)
-{
- return seq_list_next(p, &cache_chain, pos);
-}
-
-static void s_stop(struct seq_file *m, void *p)
+void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo)
{
- mutex_unlock(&cache_chain_mutex);
-}
-
-static int s_show(struct seq_file *m, void *p)
-{
- struct kmem_cache *cachep = list_entry(p, struct kmem_cache, next);
- struct slab *slabp;
+ struct page *page;
unsigned long active_objs;
unsigned long num_objs;
unsigned long active_slabs = 0;
@@ -4224,42 +4096,42 @@ static int s_show(struct seq_file *m, void *p)
const char *name;
char *error = NULL;
int node;
- struct kmem_list3 *l3;
+ struct kmem_cache_node *n;
active_objs = 0;
num_slabs = 0;
for_each_online_node(node) {
- l3 = cachep->nodelists[node];
- if (!l3)
+ n = cachep->node[node];
+ if (!n)
continue;
check_irq_on();
- spin_lock_irq(&l3->list_lock);
+ spin_lock_irq(&n->list_lock);
- list_for_each_entry(slabp, &l3->slabs_full, list) {
- if (slabp->inuse != cachep->num && !error)
+ list_for_each_entry(page, &n->slabs_full, lru) {
+ if (page->active != cachep->num && !error)
error = "slabs_full accounting error";
active_objs += cachep->num;
active_slabs++;
}
- list_for_each_entry(slabp, &l3->slabs_partial, list) {
- if (slabp->inuse == cachep->num && !error)
- error = "slabs_partial inuse accounting error";
- if (!slabp->inuse && !error)
- error = "slabs_partial/inuse accounting error";
- active_objs += slabp->inuse;
+ list_for_each_entry(page, &n->slabs_partial, lru) {
+ if (page->active == cachep->num && !error)
+ error = "slabs_partial accounting error";
+ if (!page->active && !error)
+ error = "slabs_partial accounting error";
+ active_objs += page->active;
active_slabs++;
}
- list_for_each_entry(slabp, &l3->slabs_free, list) {
- if (slabp->inuse && !error)
- error = "slabs_free/inuse accounting error";
+ list_for_each_entry(page, &n->slabs_free, lru) {
+ if (page->active && !error)
+ error = "slabs_free accounting error";
num_slabs++;
}
- free_objects += l3->free_objects;
- if (l3->shared)
- shared_avail += l3->shared->avail;
+ free_objects += n->free_objects;
+ if (n->shared)
+ shared_avail += n->shared->avail;
- spin_unlock_irq(&l3->list_lock);
+ spin_unlock_irq(&n->list_lock);
}
num_slabs += active_slabs;
num_objs = num_slabs * cachep->num;
@@ -4270,15 +4142,22 @@ static int s_show(struct seq_file *m, void *p)
if (error)
printk(KERN_ERR "slab: cache %s error: %s\n", name, error);
- seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d",
- name, active_objs, num_objs, cachep->buffer_size,
- cachep->num, (1 << cachep->gfporder));
- seq_printf(m, " : tunables %4u %4u %4u",
- cachep->limit, cachep->batchcount, cachep->shared);
- seq_printf(m, " : slabdata %6lu %6lu %6lu",
- active_slabs, num_slabs, shared_avail);
+ sinfo->active_objs = active_objs;
+ sinfo->num_objs = num_objs;
+ sinfo->active_slabs = active_slabs;
+ sinfo->num_slabs = num_slabs;
+ sinfo->shared_avail = shared_avail;
+ sinfo->limit = cachep->limit;
+ sinfo->batchcount = cachep->batchcount;
+ sinfo->shared = cachep->shared;
+ sinfo->objects_per_slab = cachep->num;
+ sinfo->cache_order = cachep->gfporder;
+}
+
+void slabinfo_show_stats(struct seq_file *m, struct kmem_cache *cachep)
+{
#if STATS
- { /* list3 stats */
+ { /* node stats */
unsigned long high = cachep->high_mark;
unsigned long allocs = cachep->num_allocations;
unsigned long grown = cachep->grown;
@@ -4306,31 +4185,8 @@ static int s_show(struct seq_file *m, void *p)
allochit, allocmiss, freehit, freemiss);
}
#endif
- seq_putc(m, '\n');
- return 0;
}
-/*
- * slabinfo_op - iterator that generates /proc/slabinfo
- *
- * Output layout:
- * cache-name
- * num-active-objs
- * total-objs
- * object size
- * num-active-slabs
- * total-slabs
- * num-pages-per-slab
- * + further values on SMP and with statistics enabled
- */
-
-static const struct seq_operations slabinfo_op = {
- .start = s_start,
- .next = s_next,
- .stop = s_stop,
- .show = s_show,
-};
-
#define MAX_SLABINFO_WRITE 128
/**
* slabinfo_write - Tuning for the slab allocator
@@ -4339,7 +4195,7 @@ static const struct seq_operations slabinfo_op = {
* @count: data length
* @ppos: unused
*/
-ssize_t slabinfo_write(struct file *file, const char __user * buffer,
+ssize_t slabinfo_write(struct file *file, const char __user *buffer,
size_t count, loff_t *ppos)
{
char kbuf[MAX_SLABINFO_WRITE + 1], *tmp;
@@ -4361,9 +4217,9 @@ ssize_t slabinfo_write(struct file *file, const char __user * buffer,
return -EINVAL;
/* Find the cache in the chain of caches. */
- mutex_lock(&cache_chain_mutex);
+ mutex_lock(&slab_mutex);
res = -EINVAL;
- list_for_each_entry(cachep, &cache_chain, next) {
+ list_for_each_entry(cachep, &slab_caches, list) {
if (!strcmp(cachep->name, kbuf)) {
if (limit < 1 || batchcount < 1 ||
batchcount > limit || shared < 0) {
@@ -4376,31 +4232,18 @@ ssize_t slabinfo_write(struct file *file, const char __user * buffer,
break;
}
}
- mutex_unlock(&cache_chain_mutex);
+ mutex_unlock(&slab_mutex);
if (res >= 0)
res = count;
return res;
}
-static int slabinfo_open(struct inode *inode, struct file *file)
-{
- return seq_open(file, &slabinfo_op);
-}
-
-static const struct file_operations proc_slabinfo_operations = {
- .open = slabinfo_open,
- .read = seq_read,
- .write = slabinfo_write,
- .llseek = seq_lseek,
- .release = seq_release,
-};
-
#ifdef CONFIG_DEBUG_SLAB_LEAK
static void *leaks_start(struct seq_file *m, loff_t *pos)
{
- mutex_lock(&cache_chain_mutex);
- return seq_list_start(&cache_chain, *pos);
+ mutex_lock(&slab_mutex);
+ return seq_list_start(&slab_caches, *pos);
}
static inline int add_caller(unsigned long *n, unsigned long v)
@@ -4433,15 +4276,18 @@ static inline int add_caller(unsigned long *n, unsigned long v)
return 1;
}
-static void handle_slab(unsigned long *n, struct kmem_cache *c, struct slab *s)
+static void handle_slab(unsigned long *n, struct kmem_cache *c,
+ struct page *page)
{
void *p;
int i;
+
if (n[0] == n[1])
return;
- for (i = 0, p = s->s_mem; i < c->num; i++, p += c->buffer_size) {
- if (slab_bufctl(s)[i] != BUFCTL_ACTIVE)
+ for (i = 0, p = page->s_mem; i < c->num; i++, p += c->size) {
+ if (get_obj_status(page, i) != OBJECT_ACTIVE)
continue;
+
if (!add_caller(n, (unsigned long)*dbg_userword(c, p)))
return;
}
@@ -4465,11 +4311,11 @@ static void show_symbol(struct seq_file *m, unsigned long address)
static int leaks_show(struct seq_file *m, void *p)
{
- struct kmem_cache *cachep = list_entry(p, struct kmem_cache, next);
- struct slab *slabp;
- struct kmem_list3 *l3;
+ struct kmem_cache *cachep = list_entry(p, struct kmem_cache, list);
+ struct page *page;
+ struct kmem_cache_node *n;
const char *name;
- unsigned long *n = m->private;
+ unsigned long *x = m->private;
int node;
int i;
@@ -4480,43 +4326,43 @@ static int leaks_show(struct seq_file *m, void *p)
/* OK, we can do it */
- n[1] = 0;
+ x[1] = 0;
for_each_online_node(node) {
- l3 = cachep->nodelists[node];
- if (!l3)
+ n = cachep->node[node];
+ if (!n)
continue;
check_irq_on();
- spin_lock_irq(&l3->list_lock);
+ spin_lock_irq(&n->list_lock);
- list_for_each_entry(slabp, &l3->slabs_full, list)
- handle_slab(n, cachep, slabp);
- list_for_each_entry(slabp, &l3->slabs_partial, list)
- handle_slab(n, cachep, slabp);
- spin_unlock_irq(&l3->list_lock);
+ list_for_each_entry(page, &n->slabs_full, lru)
+ handle_slab(x, cachep, page);
+ list_for_each_entry(page, &n->slabs_partial, lru)
+ handle_slab(x, cachep, page);
+ spin_unlock_irq(&n->list_lock);
}
name = cachep->name;
- if (n[0] == n[1]) {
+ if (x[0] == x[1]) {
/* Increase the buffer size */
- mutex_unlock(&cache_chain_mutex);
- m->private = kzalloc(n[0] * 4 * sizeof(unsigned long), GFP_KERNEL);
+ mutex_unlock(&slab_mutex);
+ m->private = kzalloc(x[0] * 4 * sizeof(unsigned long), GFP_KERNEL);
if (!m->private) {
/* Too bad, we are really out */
- m->private = n;
- mutex_lock(&cache_chain_mutex);
+ m->private = x;
+ mutex_lock(&slab_mutex);
return -ENOMEM;
}
- *(unsigned long *)m->private = n[0] * 2;
- kfree(n);
- mutex_lock(&cache_chain_mutex);
+ *(unsigned long *)m->private = x[0] * 2;
+ kfree(x);
+ mutex_lock(&slab_mutex);
/* Now make sure this entry will be retried */
m->count = m->size;
return 0;
}
- for (i = 0; i < n[1]; i++) {
- seq_printf(m, "%s: %lu ", name, n[2*i+3]);
- show_symbol(m, n[2*i+2]);
+ for (i = 0; i < x[1]; i++) {
+ seq_printf(m, "%s: %lu ", name, x[2*i+3]);
+ show_symbol(m, x[2*i+2]);
seq_putc(m, '\n');
}
@@ -4525,8 +4371,8 @@ static int leaks_show(struct seq_file *m, void *p)
static const struct seq_operations slabstats_op = {
.start = leaks_start,
- .next = s_next,
- .stop = s_stop,
+ .next = slab_next,
+ .stop = slab_stop,
.show = leaks_show,
};
@@ -4557,7 +4403,6 @@ static const struct file_operations proc_slabstats_operations = {
static int __init slab_proc_init(void)
{
- proc_create("slabinfo",S_IWUSR|S_IRUGO,NULL,&proc_slabinfo_operations);
#ifdef CONFIG_DEBUG_SLAB_LEAK
proc_create("slab_allocators", 0, NULL, &proc_slabstats_operations);
#endif
@@ -4584,6 +4429,6 @@ size_t ksize(const void *objp)
if (unlikely(objp == ZERO_SIZE_PTR))
return 0;
- return obj_size(virt_to_cache(objp));
+ return virt_to_cache(objp)->object_size;
}
EXPORT_SYMBOL(ksize);