diff options
-rw-r--r-- | arch/frv/Kconfig | 4 | ||||
-rw-r--r-- | arch/i386/Kconfig | 4 | ||||
-rw-r--r-- | include/linux/mm_types.h | 17 | ||||
-rw-r--r-- | include/linux/poison.h | 3 | ||||
-rw-r--r-- | include/linux/slab.h | 14 | ||||
-rw-r--r-- | include/linux/slub_def.h | 201 | ||||
-rw-r--r-- | init/Kconfig | 53 | ||||
-rw-r--r-- | mm/Makefile | 1 | ||||
-rw-r--r-- | mm/slub.c | 3144 |
9 files changed, 3422 insertions, 19 deletions
diff --git a/arch/frv/Kconfig b/arch/frv/Kconfig index cea237413aa..eed694312a7 100644 --- a/arch/frv/Kconfig +++ b/arch/frv/Kconfig @@ -53,6 +53,10 @@ config ARCH_HAS_ILOG2_U64 bool default y +config ARCH_USES_SLAB_PAGE_STRUCT + bool + default y + mainmenu "Fujitsu FR-V Kernel Configuration" source "init/Kconfig" diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index a9af760c7e5..64ad10f984a 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -79,6 +79,10 @@ config ARCH_MAY_HAVE_PC_FDC bool default y +config ARCH_USES_SLAB_PAGE_STRUCT + bool + default y + config DMI bool default y diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index c3852fd4a1c..e30687bad07 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -19,10 +19,16 @@ struct page { unsigned long flags; /* Atomic flags, some possibly * updated asynchronously */ atomic_t _count; /* Usage count, see below. */ - atomic_t _mapcount; /* Count of ptes mapped in mms, + union { + atomic_t _mapcount; /* Count of ptes mapped in mms, * to show when page is mapped * & limit reverse map searches. */ + struct { /* SLUB uses */ + short unsigned int inuse; + short unsigned int offset; + }; + }; union { struct { unsigned long private; /* Mapping-private opaque data: @@ -43,8 +49,15 @@ struct page { #if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS spinlock_t ptl; #endif + struct { /* SLUB uses */ + struct page *first_page; /* Compound pages */ + struct kmem_cache *slab; /* Pointer to slab */ + }; + }; + union { + pgoff_t index; /* Our offset within mapping. */ + void *freelist; /* SLUB: pointer to free object */ }; - pgoff_t index; /* Our offset within mapping. */ struct list_head lru; /* Pageout list, eg. active_list * protected by zone->lru_lock ! */ diff --git a/include/linux/poison.h b/include/linux/poison.h index 89580b76495..95f518b1768 100644 --- a/include/linux/poison.h +++ b/include/linux/poison.h @@ -18,6 +18,9 @@ #define RED_INACTIVE 0x5A2CF071UL /* when obj is inactive */ #define RED_ACTIVE 0x170FC2A5UL /* when obj is active */ +#define SLUB_RED_INACTIVE 0xbb +#define SLUB_RED_ACTIVE 0xcc + /* ...and for poisoning */ #define POISON_INUSE 0x5a /* for use-uninitialised poisoning */ #define POISON_FREE 0x6b /* for use-after-free poisoning */ diff --git a/include/linux/slab.h b/include/linux/slab.h index f9ed9346bfd..67425c277e1 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -32,6 +32,7 @@ typedef struct kmem_cache kmem_cache_t __deprecated; #define SLAB_PANIC 0x00040000UL /* Panic if kmem_cache_create() fails */ #define SLAB_DESTROY_BY_RCU 0x00080000UL /* Defer freeing slabs to RCU */ #define SLAB_MEM_SPREAD 0x00100000UL /* Spread some memory over cpuset */ +#define SLAB_TRACE 0x00200000UL /* Trace allocations and frees */ /* Flags passed to a constructor functions */ #define SLAB_CTOR_CONSTRUCTOR 0x001UL /* If not set, then deconstructor */ @@ -42,7 +43,7 @@ typedef struct kmem_cache kmem_cache_t __deprecated; * struct kmem_cache related prototypes */ void __init kmem_cache_init(void); -extern int slab_is_available(void); +int slab_is_available(void); struct kmem_cache *kmem_cache_create(const char *, size_t, size_t, unsigned long, @@ -95,9 +96,14 @@ static inline void *kcalloc(size_t n, size_t size, gfp_t flags) * the appropriate general cache at compile time. */ -#ifdef CONFIG_SLAB +#if defined(CONFIG_SLAB) || defined(CONFIG_SLUB) +#ifdef CONFIG_SLUB +#include <linux/slub_def.h> +#else #include <linux/slab_def.h> +#endif /* !CONFIG_SLUB */ #else + /* * Fallback definitions for an allocator not wanting to provide * its own optimized kmalloc definitions (like SLOB). @@ -184,7 +190,7 @@ static inline void *__kmalloc_node(size_t size, gfp_t flags, int node) * allocator where we care about the real place the memory allocation * request comes from. */ -#ifdef CONFIG_DEBUG_SLAB +#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_SLUB) extern void *__kmalloc_track_caller(size_t, gfp_t, void*); #define kmalloc_track_caller(size, flags) \ __kmalloc_track_caller(size, flags, __builtin_return_address(0)) @@ -202,7 +208,7 @@ extern void *__kmalloc_track_caller(size_t, gfp_t, void*); * standard allocator where we care about the real place the memory * allocation request comes from. */ -#ifdef CONFIG_DEBUG_SLAB +#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_SLUB) extern void *__kmalloc_node_track_caller(size_t, gfp_t, int, void *); #define kmalloc_node_track_caller(size, flags, node) \ __kmalloc_node_track_caller(size, flags, node, \ diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h new file mode 100644 index 00000000000..30b154ce728 --- /dev/null +++ b/include/linux/slub_def.h @@ -0,0 +1,201 @@ +#ifndef _LINUX_SLUB_DEF_H +#define _LINUX_SLUB_DEF_H + +/* + * SLUB : A Slab allocator without object queues. + * + * (C) 2007 SGI, Christoph Lameter <clameter@sgi.com> + */ +#include <linux/types.h> +#include <linux/gfp.h> +#include <linux/workqueue.h> +#include <linux/kobject.h> + +struct kmem_cache_node { + spinlock_t list_lock; /* Protect partial list and nr_partial */ + unsigned long nr_partial; + atomic_long_t nr_slabs; + struct list_head partial; +}; + +/* + * Slab cache management. + */ +struct kmem_cache { + /* Used for retriving partial slabs etc */ + unsigned long flags; + int size; /* The size of an object including meta data */ + int objsize; /* The size of an object without meta data */ + int offset; /* Free pointer offset. */ + unsigned int order; + + /* + * Avoid an extra cache line for UP, SMP and for the node local to + * struct kmem_cache. + */ + struct kmem_cache_node local_node; + + /* Allocation and freeing of slabs */ + int objects; /* Number of objects in slab */ + int refcount; /* Refcount for slab cache destroy */ + void (*ctor)(void *, struct kmem_cache *, unsigned long); + void (*dtor)(void *, struct kmem_cache *, unsigned long); + int inuse; /* Offset to metadata */ + int align; /* Alignment */ + const char *name; /* Name (only for display!) */ + struct list_head list; /* List of slab caches */ + struct kobject kobj; /* For sysfs */ + +#ifdef CONFIG_NUMA + int defrag_ratio; + struct kmem_cache_node *node[MAX_NUMNODES]; +#endif + struct page *cpu_slab[NR_CPUS]; +}; + +/* + * Kmalloc subsystem. + */ +#define KMALLOC_SHIFT_LOW 3 + +#ifdef CONFIG_LARGE_ALLOCS +#define KMALLOC_SHIFT_HIGH 25 +#else +#if !defined(CONFIG_MMU) || NR_CPUS > 512 || MAX_NUMNODES > 256 +#define KMALLOC_SHIFT_HIGH 20 +#else +#define KMALLOC_SHIFT_HIGH 18 +#endif +#endif + +/* + * We keep the general caches in an array of slab caches that are used for + * 2^x bytes of allocations. + */ +extern struct kmem_cache kmalloc_caches[KMALLOC_SHIFT_HIGH + 1]; + +/* + * Sorry that the following has to be that ugly but some versions of GCC + * have trouble with constant propagation and loops. + */ +static inline int kmalloc_index(int size) +{ + if (size == 0) + return 0; + if (size > 64 && size <= 96) + return 1; + if (size > 128 && size <= 192) + return 2; + if (size <= 8) return 3; + if (size <= 16) return 4; + if (size <= 32) return 5; + if (size <= 64) return 6; + if (size <= 128) return 7; + if (size <= 256) return 8; + if (size <= 512) return 9; + if (size <= 1024) return 10; + if (size <= 2 * 1024) return 11; + if (size <= 4 * 1024) return 12; + if (size <= 8 * 1024) return 13; + if (size <= 16 * 1024) return 14; + if (size <= 32 * 1024) return 15; + if (size <= 64 * 1024) return 16; + if (size <= 128 * 1024) return 17; + if (size <= 256 * 1024) return 18; +#if KMALLOC_SHIFT_HIGH > 18 + if (size <= 512 * 1024) return 19; + if (size <= 1024 * 1024) return 20; +#endif +#if KMALLOC_SHIFT_HIGH > 20 + if (size <= 2 * 1024 * 1024) return 21; + if (size <= 4 * 1024 * 1024) return 22; + if (size <= 8 * 1024 * 1024) return 23; + if (size <= 16 * 1024 * 1024) return 24; + if (size <= 32 * 1024 * 1024) return 25; +#endif + return -1; + +/* + * What we really wanted to do and cannot do because of compiler issues is: + * int i; + * for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) + * if (size <= (1 << i)) + * return i; + */ +} + +/* + * Find the slab cache for a given combination of allocation flags and size. + * + * This ought to end up with a global pointer to the right cache + * in kmalloc_caches. + */ +static inline struct kmem_cache *kmalloc_slab(size_t size) +{ + int index = kmalloc_index(size); + + if (index == 0) + return NULL; + + if (index < 0) { + /* + * Generate a link failure. Would be great if we could + * do something to stop the compile here. + */ + extern void __kmalloc_size_too_large(void); + __kmalloc_size_too_large(); + } + return &kmalloc_caches[index]; +} + +#ifdef CONFIG_ZONE_DMA +#define SLUB_DMA __GFP_DMA +#else +/* Disable DMA functionality */ +#define SLUB_DMA 0 +#endif + +static inline void *kmalloc(size_t size, gfp_t flags) +{ + if (__builtin_constant_p(size) && !(flags & SLUB_DMA)) { + struct kmem_cache *s = kmalloc_slab(size); + + if (!s) + return NULL; + + return kmem_cache_alloc(s, flags); + } else + return __kmalloc(size, flags); +} + +static inline void *kzalloc(size_t size, gfp_t flags) +{ + if (__builtin_constant_p(size) && !(flags & SLUB_DMA)) { + struct kmem_cache *s = kmalloc_slab(size); + + if (!s) + return NULL; + + return kmem_cache_zalloc(s, flags); + } else + return __kzalloc(size, flags); +} + +#ifdef CONFIG_NUMA +extern void *__kmalloc_node(size_t size, gfp_t flags, int node); + +static inline void *kmalloc_node(size_t size, gfp_t flags, int node) +{ + if (__builtin_constant_p(size) && !(flags & SLUB_DMA)) { + struct kmem_cache *s = kmalloc_slab(size); + + if (!s) + return NULL; + + return kmem_cache_alloc_node(s, flags, node); + } else + return __kmalloc_node(size, flags, node); +} +#endif + +#endif /* _LINUX_SLUB_DEF_H */ diff --git a/init/Kconfig b/init/Kconfig index 29d9e47ee0d..7ce95205294 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -478,15 +478,6 @@ config SHMEM option replaces shmem and tmpfs with the much simpler ramfs code, which may be appropriate on small systems without swap. -config SLAB - default y - bool "Use full SLAB allocator" if (EMBEDDED && !SMP && !SPARSEMEM) - help - Disabling this replaces the advanced SLAB allocator and - kmalloc support with the drastically simpler SLOB allocator. - SLOB is more space efficient but does not scale well and is - more susceptible to fragmentation. - config VM_EVENT_COUNTERS default y bool "Enable VM event counters for /proc/vmstat" if EMBEDDED @@ -496,6 +487,46 @@ config VM_EVENT_COUNTERS on EMBEDDED systems. /proc/vmstat will only show page counts if VM event counters are disabled. +choice + prompt "Choose SLAB allocator" + default SLAB + help + This option allows to select a slab allocator. + +config SLAB + bool "SLAB" + help + The regular slab allocator that is established and known to work + well in all environments. It organizes chache hot objects in + per cpu and per node queues. SLAB is the default choice for + slab allocator. + +config SLUB + depends on EXPERIMENTAL && !ARCH_USES_SLAB_PAGE_STRUCT + bool "SLUB (Unqueued Allocator)" + help + SLUB is a slab allocator that minimizes cache line usage + instead of managing queues of cached objects (SLAB approach). + Per cpu caching is realized using slabs of objects instead + of queues of objects. SLUB can use memory efficiently + way and has enhanced diagnostics. + +config SLOB +# +# SLOB cannot support SMP because SLAB_DESTROY_BY_RCU does not work +# properly. +# + depends on EMBEDDED && !SMP && !SPARSEMEM + bool "SLOB (Simple Allocator)" + help + SLOB replaces the SLAB allocator with a drastically simpler + allocator. SLOB is more space efficient that SLAB but does not + scale well (single lock for all operations) and is more susceptible + to fragmentation. SLOB it is a great choice to reduce + memory usage and code size for embedded systems. + +endchoice + endmenu # General setup config RT_MUTEXES @@ -511,10 +542,6 @@ config BASE_SMALL default 0 if BASE_FULL default 1 if !BASE_FULL -config SLOB - default !SLAB - bool - menu "Loadable module support" config MODULES diff --git a/mm/Makefile b/mm/Makefile index f3c077eb0b8..1887148e44e 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -25,6 +25,7 @@ obj-$(CONFIG_TMPFS_POSIX_ACL) += shmem_acl.o obj-$(CONFIG_TINY_SHMEM) += tiny-shmem.o obj-$(CONFIG_SLOB) += slob.o obj-$(CONFIG_SLAB) += slab.o +obj-$(CONFIG_SLUB) += slub.o obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o obj-$(CONFIG_FS_XIP) += filemap_xip.o obj-$(CONFIG_MIGRATION) += migrate.o diff --git a/mm/slub.c b/mm/slub.c new file mode 100644 index 00000000000..0cd56bd74b6 --- /dev/null +++ b/mm/slub.c @@ -0,0 +1,3144 @@ +/* + * SLUB: A slab allocator that limits cache line use instead of queuing + * objects in per cpu and per node lists. + * + * The allocator synchronizes using per slab locks and only + * uses a centralized lock to manage a pool of partial slabs. + * + * (C) 2007 SGI, Christoph Lameter <clameter@sgi.com> + */ + +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/bit_spinlock.h> +#include <linux/interrupt.h> +#include <linux/bitops.h> +#include <linux/slab.h> +#include <linux/seq_file.h> +#include <linux/cpu.h> +#include <linux/cpuset.h> +#include <linux/mempolicy.h> +#include <linux/ctype.h> +#include <linux/kallsyms.h> + +/* + * Lock order: + * 1. slab_lock(page) + * 2. slab->list_lock + * + * The slab_lock protects operations on the object of a particular + * slab and its metadata in the page struct. If the slab lock + * has been taken then no allocations nor frees can be performed + * on the objects in the slab nor can the slab be added or removed + * from the partial or full lists since this would mean modifying + * the page_struct of the slab. + * + * The list_lock protects the partial and full list on each node and + * the partial slab counter. If taken then no new slabs may be added or + * removed from the lists nor make the number of partial slabs be modified. + * (Note that the total number of slabs is an atomic value that may be + * modified without taking the list lock). + * + * The list_lock is a centralized lock and thus we avoid taking it as + * much as possible. As long as SLUB does not have to handle partial + * slabs, operations can continue without any centralized lock. F.e. + * allocating a long series of objects that fill up slabs does not require + * the list lock. + * + * The lock order is sometimes inverted when we are trying to get a slab + * off a list. We take the list_lock and then look for a page on the list + * to use. While we do that objects in the slabs may be freed. We can + * only operate on the slab if we have also taken the slab_lock. So we use + * a slab_trylock() on the slab. If trylock was successful then no frees + * can occur anymore and we can use the slab for allocations etc. If the + * slab_trylock() does not succeed then frees are in progress in the slab and + * we must stay away from it for a while since we may cause a bouncing + * cacheline if we try to acquire the lock. So go onto the next slab. + * If all pages are busy then we may allocate a new slab instead of reusing + * a partial slab. A new slab has noone operating on it and thus there is + * no danger of cacheline contention. + * + * Interrupts are disabled during allocation and deallocation in order to + * make the slab allocator safe to use in the context of an irq. In addition + * interrupts are disabled to ensure that the processor does not change + * while handling per_cpu slabs, due to kernel preemption. + * + * SLUB assigns one slab for allocation to each processor. + * Allocations only occur from these slabs called cpu slabs. + * + * Slabs with free elements are kept on a partial list. + * There is no list for full slabs. If an object in a full slab is + * freed then the slab will show up again on the partial lists. + * Otherwise there is no need to track full slabs unless we have to + * track full slabs for debugging purposes. + * + * Slabs are freed when they become empty. Teardown and setup is + * minimal so we rely on the page allocators per cpu caches for + * fast frees and allocs. + * + * Overloading of page flags that are otherwise used for LRU management. + * + * PageActive The slab is used as a cpu cache. Allocations + * may be performed from the slab. The slab is not + * on any slab list and cannot be moved onto one. + * + * PageError Slab requires special handling due to debug + * options set. This moves slab handling out of + * the fast path. + */ + +/* + * Issues still to be resolved: + * + * - The per cpu array is updated for each new slab and and is a remote + * cacheline for most nodes. This could become a bouncing cacheline given + * enough frequent updates. There are 16 pointers in a cacheline.so at + * max 16 cpus could compete. Likely okay. + * + * - Support PAGE_ALLOC_DEBUG. Should be easy to do. + * + * - Support DEBUG_SLAB_LEAK. Trouble is we do not know where the full + * slabs are in SLUB. + * + * - SLAB_DEBUG_INITIAL is not supported but I have never seen a use of + * it. + * + * - Variable sizing of the per node arrays + */ + +/* Enable to test recovery from slab corruption on boot */ +#undef SLUB_RESILIENCY_TEST + +#if PAGE_SHIFT <= 12 + +/* + * Small page size. Make sure that we do not fragment memory + */ +#define DEFAULT_MAX_ORDER 1 +#define DEFAULT_MIN_OBJECTS 4 + +#else + +/* + * Large page machines are customarily able to handle larger + * page orders. + */ +#define DEFAULT_MAX_ORDER 2 +#define DEFAULT_MIN_OBJECTS 8 + +#endif + +/* + * Flags from the regular SLAB that SLUB does not support: + */ +#define SLUB_UNIMPLEMENTED (SLAB_DEBUG_INITIAL) + +#define DEBUG_DEFAULT_FLAGS (SLAB_DEBUG_FREE | SLAB_RED_ZONE | \ + SLAB_POISON | SLAB_STORE_USER) +/* + * Set of flags that will prevent slab merging + */ +#define SLUB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \ + SLAB_TRACE | SLAB_DESTROY_BY_RCU) + +#define SLUB_MERGE_SAME (SLAB_DEBUG_FREE | SLAB_RECLAIM_ACCOUNT | \ + SLAB_CACHE_DMA) + +#ifndef ARCH_KMALLOC_MINALIGN +#define ARCH_KMALLOC_MINALIGN sizeof(void *) +#endif + +#ifndef ARCH_SLAB_MINALIGN +#define ARCH_SLAB_MINALIGN sizeof(void *) +#endif + +/* Internal SLUB flags */ +#define __OBJECT_POISON 0x80000000 /* Poison object */ + +static int kmem_size = sizeof(struct kmem_cache); + +#ifdef CONFIG_SMP +static struct notifier_block slab_notifier; +#endif + +static enum { + DOWN, /* No slab functionality available */ + PARTIAL, /* kmem_cache_open() works but kmalloc does not */ + UP, /* Everything works */ + SYSFS /* Sysfs up */ +} slab_state = DOWN; + +/* A list of all slab caches on the system */ +static DECLARE_RWSEM(slub_lock); +LIST_HEAD(slab_caches); + +#ifdef CONFIG_SYSFS +static int sysfs_slab_add(struct kmem_cache *); +static int sysfs_slab_alias(struct kmem_cache *, const char *); +static void sysfs_slab_remove(struct kmem_cache *); +#else +static int sysfs_slab_add(struct kmem_cache *s) { return 0; } +static int sysfs_slab_alias(struct kmem_cache *s, const char *p) { return 0; } +static void sysfs_slab_remove(struct kmem_cache *s) {} +#endif + +/******************************************************************** + * Core slab cache functions + *******************************************************************/ + +int slab_is_available(void) +{ + return slab_state >= UP; +} + +static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node) +{ +#ifdef CONFIG_NUMA + return s->node[node]; +#else + return &s->local_node; +#endif +} + +/* + * Object debugging + */ +static void print_section(char *text, u8 *addr, unsigned int length) +{ + int i, offset; + int newline = 1; + char ascii[17]; + + ascii[16] = 0; + + for (i = 0; i < length; i++) { + if (newline) { + printk(KERN_ERR "%10s 0x%p: ", text, addr + i); + newline = 0; + } + printk(" %02x", addr[i]); + offset = i % 16; + ascii[offset] = isgraph(addr[i]) ? addr[i] : '.'; + if (offset == 15) { + printk(" %s\n",ascii); + newline = 1; + } + } + if (!newline) { + i %= 16; + while (i < 16) { + printk(" "); + ascii[i] = ' '; + i++; + } + printk(" %s\n", ascii); + } +} + +/* + * Slow version of get and set free pointer. + * + * This requires touching the cache lines of kmem_cache. + * The offset can also be obtained from the page. In that + * case it is in the cacheline that we already need to touch. + */ +static void *get_freepointer(struct kmem_cache *s, void *object) +{ + return *(void **)(object + s->offset); +} + +static void set_freepointer(struct kmem_cache *s, void *object, void *fp) +{ + *(void **)(object + s->offset) = fp; +} + +/* + * Tracking user of a slab. + */ +struct track { + void *addr; /* Called from address */ + int cpu; /* Was running on cpu */ + int pid; /* Pid context */ + unsigned long when; /* When did the operation occur */ +}; + +enum track_item { TRACK_ALLOC, TRACK_FREE }; + +static struct track *get_track(struct kmem_cache *s, void *object, + enum track_item alloc) +{ + struct track *p; + + if (s->offset) + p = object + s->offset + sizeof(void *); + else + p = object + s->inuse; + + return p + alloc; +} + +static void set_track(struct kmem_cache *s, void *object, + enum track_item alloc, void *addr) +{ + struct track *p; + + if (s->offset) + p = object + s->offset + sizeof(void *); + else + p = object + s->inuse; + + p += alloc; + if (addr) { + p->addr = addr; + p->cpu = smp_processor_id(); + p->pid = current ? current->pid : -1; + p->when = jiffies; + } else + memset(p, 0, sizeof(struct track)); +} + +#define set_tracking(__s, __o, __a) set_track(__s, __o, __a, \ + __builtin_return_address(0)) + +static void init_tracking(struct kmem_cache *s, void *object) +{ + if (s->flags & SLAB_STORE_USER) { + set_track(s, object, TRACK_FREE, NULL); + set_track(s, object, TRACK_ALLOC, NULL); + } +} + +static void print_track(const char *s, struct track *t) +{ + if (!t->addr) + return; + + printk(KERN_ERR "%s: ", s); + __print_symbol("%s", (unsigned long)t->addr); + printk(" jiffies_ago=%lu cpu=%u pid=%d\n", jiffies - t->when, t->cpu, t->pid); +} + +static void print_trailer(struct kmem_cache *s, u8 *p) +{ + unsigned int off; /* Offset of last byte */ + + if (s->flags & SLAB_RED_ZONE) + print_section("Redzone", p + s->objsize, + s->inuse - s->objsize); + + printk(KERN_ERR "FreePointer 0x%p -> 0x%p\n", + p + s->offset, + get_freepointer(s, p)); + + if (s->offset) + off = s->offset + sizeof(void *); + else + off = s->inuse; + + if (s->flags & SLAB_STORE_USER) { + print_track("Last alloc", get_track(s, p, TRACK_ALLOC)); + print_track("Last free ", get_track(s, p, TRACK_FREE)); + off += 2 * sizeof(struct track); + } + + if (off != s->size) + /* Beginning of the filler is the free pointer */ + print_section("Filler", p + off, s->size - off); +} + +static void object_err(struct kmem_cache *s, struct page *page, + u8 *object, char *reason) +{ + u8 *addr = page_address(page); + + printk(KERN_ERR "*** SLUB %s: %s@0x%p slab 0x%p\n", + s->name, reason, object, page); + printk(KERN_ERR " offset=%tu flags=0x%04lx inuse=%u freelist=0x%p\n", + object - addr, page->flags, page->inuse, page->freelist); + if (object > addr + 16) + print_section("Bytes b4", object - 16, 16); + print_section("Object", object, min(s->objsize, 128)); + print_trailer(s, object); + dump_stack(); +} + +static void slab_err(struct kmem_cache *s, struct page *page, char *reason, ...) +{ + va_list args; + char buf[100]; + + va_start(args, reason); + vsnprintf(buf, sizeof(buf), reason, args); + va_end(args); + printk(KERN_ERR "*** SLUB %s: %s in slab @0x%p\n", s->name, buf, + page); + dump_stack(); +} + +static void init_object(struct kmem_cache *s, void *object, int active) +{ + u8 *p = object; + + if (s->flags & __OBJECT_POISON) { + memset(p, POISON_FREE, s->objsize - 1); + p[s->objsize -1] = POISON_END; + } + + if (s->flags & SLAB_RED_ZONE) + memset(p + s->objsize, + active ? SLUB_RED_ACTIVE : SLUB_RED_INACTIVE, + s->inuse - s->objsize); +} + +static int check_bytes(u8 *start, unsigned int value, unsigned int bytes) +{ + while (bytes) { + if (*start != (u8)value) + return 0; + start++; + bytes--; + } + return 1; +} + + +static int check_valid_pointer(struct kmem_cache *s, struct page *page, + void *object) +{ + void *base; + + if (!object) + return 1; + + base = page_address(page); + if (object < base || object >= base + s->objects * s->size || + (object - base) % s->size) { + return 0; + } + + return 1; +} + +/* + * Object layout: + * + * object address + * Bytes of the object to be managed. + * If the freepointer may overlay the object then the free + * pointer is the first word of the object. + * Poisoning uses 0x6b (POISON_FREE) and the last byte is + * 0xa5 (POISON_END) + * + * object + s->objsize + * Padding to reach word boundary. This is also used for Redzoning. + * Padding is extended to word size if Redzoning is enabled + * and objsize == inuse. + * We fill with 0xbb (RED_INACTIVE) for inactive objects and with + * 0xcc (RED_ACTIVE) for objects in use. + * + * object + s->inuse + * A. Free pointer (if we cannot overwrite object on free) + * B. Tracking data for SLAB_STORE_USER + * C. Padding to reach required alignment boundary + * Padding is done using 0x5a (POISON_INUSE) + * + * object + s->size + * + * If slabcaches are merged then the objsize and inuse boundaries are to + * be ignored. And therefore no slab options that rely on these boundaries + * may be used with merged slabcaches. + */ + +static void restore_bytes(struct kmem_cache *s, char *message, u8 data, + void *from, void *to) +{ + printk(KERN_ERR "@@@ SLUB: %s Restoring %s (0x%x) from 0x%p-0x%p\n", + s->name, message, data, from, to - 1); + memset(from, data, to - from); +} + +static int check_pad_bytes(struct kmem_cache *s, struct page *page, u8 *p) +{ + unsigned long off = s->inuse; /* The end of info */ + + if (s->offset) + /* Freepointer is placed after the object. */ + off += sizeof(void *); + + if (s->flags & SLAB_STORE_USER) + /* We also have user information there */ + off += 2 * sizeof(struct track); + + if (s->size == off) + return 1; + + if (check_bytes(p + off, POISON_INUSE, s->size - off)) + return 1; + + object_err(s, page, p, "Object padding check fails"); + + /* + * Restore padding + */ + restore_bytes(s, "object padding", POISON_INUSE, p + off, p + s->size); + return 0; +} + +static int slab_pad_check(struct kmem_cache *s, struct page *page) +{ + u8 *p; + int length, remainder; + + if (!(s->flags & SLAB_POISON)) + return 1; + + p = page_address(page); + length = s->objects * s->size; + remainder = (PAGE_SIZE << s->order) - length; + if (!remainder) + return 1; + + if (!check_bytes(p + length, POISON_INUSE, remainder)) { + printk(KERN_ERR "SLUB: %s slab 0x%p: Padding fails check\n", + s->name, p); + dump_stack(); + restore_bytes(s, "slab padding", POISON_INUSE, p + length, + p + length + remainder); + return 0; + } + return 1; +} + +static int check_object(struct kmem_cache *s, struct page *page, + void *object, int active) +{ + u8 *p = object; + u8 *endobject = object + s->objsize; + + if (s->flags & SLAB_RED_ZONE) { + unsigned int red = + active ? SLUB_RED_ACTIVE : SLUB_RED_INACTIVE; + + if (!check_bytes(endobject, red, s->inuse - s->objsize)) { + object_err(s, page, object, + active ? "Redzone Active" : "Redzone Inactive"); + restore_bytes(s, "redzone", red, + endobject, object + s->inuse); + return 0; + } + } else { + if ((s->flags & SLAB_POISON) && s->objsize < s->inuse && + !check_bytes(endobject, POISON_INUSE, + s->inuse - s->objsize)) { + object_err(s, page, p, "Alignment padding check fails"); + /* + * Fix it so that there will not be another report. + * + * Hmmm... We may be corrupting an object that now expects + * to be longer than allowed. + */ + restore_bytes(s, "alignment padding", POISON_INUSE, + endobject, object + s->inuse); + } + } + + if (s->flags & SLAB_POISON) { + if (!active && (s->flags & __OBJECT_POISON) && + (!check_bytes(p, POISON_FREE, s->objsize - 1) || + p[s->objsize - 1] != POISON_END)) { + + object_err(s, page, p, "Poison check failed"); + restore_bytes(s, "Poison", POISON_FREE, + p, p + s->objsize -1); + restore_bytes(s, "Poison", POISON_END, + p + s->objsize - 1, p + s->objsize); + return 0; + } + /* + * check_pad_bytes cleans up on its own. + */ + check_pad_bytes(s, page, p); + } + + if (!s->offset && active) + /* + * Object and freepointer overlap. Cannot check + * freepointer while object is allocated. + */ + return 1; + + /* Check free pointer validity */ + if (!check_valid_pointer(s, page, get_freepointer(s, p))) { + object_err(s, page, p, "Freepointer corrupt"); + /* + * No choice but to zap it and thus loose the remainder + * of the free objects in this slab. May cause + * another error because the object count maybe + * wrong now. + */ + set_freepointer(s, p, NULL); + return 0; + } + return 1; +} + +static int check_slab(struct kmem_cache *s, struct page *page) +{ + VM_BUG_ON(!irqs_disabled()); + + if (!PageSlab(page)) { + printk(KERN_ERR "SLUB: %s Not a valid slab page @0x%p " + "flags=%lx mapping=0x%p count=%d \n", + s->name, page, page->flags, page->mapping, + page_count(page)); + return 0; + } + if (page->offset * sizeof(void *) != s->offset) { + printk(KERN_ERR "SLUB: %s Corrupted offset %lu in slab @0x%p" + " flags=0x%lx mapping=0x%p count=%d\n", + s->name, + (unsigned long)(page->offset * sizeof(void *)), + page, + page->flags, + page->mapping, + page_count(page)); + dump_stack(); + return 0; + } + if (page->inuse > s->objects) { + printk(KERN_ERR "SLUB: %s Inuse %u > max %u in slab " + "page @0x%p flags=%lx mapping=0x%p count=%d\n", + s->name, page->inuse, s->objects, page, page->flags, + page->mapping, page_count(page)); + dump_stack(); + return 0; + } + /* Slab_pad_check fixes things up after itself */ + slab_pad_check(s, page); + return 1; +} + +/* + * Determine if a certain object on a page is on the freelist and + * therefore free. Must hold the slab lock for cpu slabs to + * guarantee that the chains are consistent. + */ +static int on_freelist(struct kmem_cache *s, struct page *page, void *search) +{ + int nr = 0; + void *fp = page->freelist; + void *object = NULL; + + while (fp && nr <= s->objects) { + if (fp == search) + return 1; + if (!check_valid_pointer(s, page, fp)) { + if (object) { + object_err(s, page, object, + "Freechain corrupt"); + set_freepointer(s, object, NULL); + break; + } else { + printk(KERN_ERR "SLUB: %s slab 0x%p " + "freepointer 0x%p corrupted.\n", + s->name, page, fp); + dump_stack(); + page->freelist = NULL; + page->inuse = s->objects; + return 0; + } + break; + } + object = fp; + fp = get_freepointer(s, object); + nr++; + } + + if (page->inuse != s->objects - nr) { + printk(KERN_ERR "slab %s: page 0x%p wrong object count." + " counter is %d but counted were %d\n", + s->name, page, page->inuse, + s->o |