aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-12-29 15:16:24 +0100
committerIngo Molnar <mingo@elte.hu>2008-12-29 15:16:24 +0100
commit2ff9f9d9629bf9530fe2ab8d803d612761ffc059 (patch)
treeb22e3fddffbc0f58b1e1974f4819896d58b7bdaf
parent0f01f07fad4ee11d98fe6faa442afbeb0328a378 (diff)
parenta4900437f3d76761a1646cd90254ccb01714a9ed (diff)
Merge branch 'topic/kmemtrace' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/slab-2.6 into tracing/kmemtrace
-rw-r--r--Documentation/ABI/testing/debugfs-kmemtrace71
-rw-r--r--Documentation/kernel-parameters.txt10
-rw-r--r--Documentation/vm/kmemtrace.txt126
-rw-r--r--MAINTAINERS6
-rw-r--r--include/linux/kmemtrace.h86
-rw-r--r--include/linux/slab.h8
-rw-r--r--include/linux/slab_def.h68
-rw-r--r--include/linux/slob_def.h9
-rw-r--r--include/linux/slub_def.h53
-rw-r--r--init/main.c2
-rw-r--r--lib/Kconfig.debug20
-rw-r--r--mm/Makefile1
-rw-r--r--mm/kmemtrace.c333
-rw-r--r--mm/slab.c79
-rw-r--r--mm/slob.c37
-rw-r--r--mm/slub.c123
16 files changed, 967 insertions, 65 deletions
diff --git a/Documentation/ABI/testing/debugfs-kmemtrace b/Documentation/ABI/testing/debugfs-kmemtrace
new file mode 100644
index 00000000000..5e6a92a02d8
--- /dev/null
+++ b/Documentation/ABI/testing/debugfs-kmemtrace
@@ -0,0 +1,71 @@
+What: /sys/kernel/debug/kmemtrace/
+Date: July 2008
+Contact: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
+Description:
+
+In kmemtrace-enabled kernels, the following files are created:
+
+/sys/kernel/debug/kmemtrace/
+ cpu<n> (0400) Per-CPU tracing data, see below. (binary)
+ total_overruns (0400) Total number of bytes which were dropped from
+ cpu<n> files because of full buffer condition,
+ non-binary. (text)
+ abi_version (0400) Kernel's kmemtrace ABI version. (text)
+
+Each per-CPU file should be read according to the relay interface. That is,
+the reader should set affinity to that specific CPU and, as currently done by
+the userspace application (though there are other methods), use poll() with
+an infinite timeout before every read(). Otherwise, erroneous data may be
+read. The binary data has the following _core_ format:
+
+ Event ID (1 byte) Unsigned integer, one of:
+ 0 - represents an allocation (KMEMTRACE_EVENT_ALLOC)
+ 1 - represents a freeing of previously allocated memory
+ (KMEMTRACE_EVENT_FREE)
+ Type ID (1 byte) Unsigned integer, one of:
+ 0 - this is a kmalloc() / kfree()
+ 1 - this is a kmem_cache_alloc() / kmem_cache_free()
+ 2 - this is a __get_free_pages() et al.
+ Event size (2 bytes) Unsigned integer representing the
+ size of this event. Used to extend
+ kmemtrace. Discard the bytes you
+ don't know about.
+ Sequence number (4 bytes) Signed integer used to reorder data
+ logged on SMP machines. Wraparound
+ must be taken into account, although
+ it is unlikely.
+ Caller address (8 bytes) Return address to the caller.
+ Pointer to mem (8 bytes) Pointer to target memory area. Can be
+ NULL, but not all such calls might be
+ recorded.
+
+In case of KMEMTRACE_EVENT_ALLOC events, the next fields follow:
+
+ Requested bytes (8 bytes) Total number of requested bytes,
+ unsigned, must not be zero.
+ Allocated bytes (8 bytes) Total number of actually allocated
+ bytes, unsigned, must not be lower
+ than requested bytes.
+ Requested flags (4 bytes) GFP flags supplied by the caller.
+ Target CPU (4 bytes) Signed integer, valid for event id 1.
+ If equal to -1, target CPU is the same
+ as origin CPU, but the reverse might
+ not be true.
+
+The data is made available in the same endianness the machine has.
+
+Other event ids and type ids may be defined and added. Other fields may be
+added by increasing event size, but see below for details.
+Every modification to the ABI, including new id definitions, are followed
+by bumping the ABI version by one.
+
+Adding new data to the packet (features) is done at the end of the mandatory
+data:
+ Feature size (2 byte)
+ Feature ID (1 byte)
+ Feature data (Feature size - 3 bytes)
+
+
+Users:
+ kmemtrace-user - git://repo.or.cz/kmemtrace-user.git
+
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index a2d8805c03d..af600c0fe0e 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -49,6 +49,7 @@ parameter is applicable:
ISAPNP ISA PnP code is enabled.
ISDN Appropriate ISDN support is enabled.
JOY Appropriate joystick support is enabled.
+ KMEMTRACE kmemtrace is enabled.
LIBATA Libata driver is enabled
LP Printer support is enabled.
LOOP Loopback device support is enabled.
@@ -1033,6 +1034,15 @@ and is between 256 and 4096 characters. It is defined in the file
use the HighMem zone if it exists, and the Normal
zone if it does not.
+ kmemtrace.enable= [KNL,KMEMTRACE] Format: { yes | no }
+ Controls whether kmemtrace is enabled
+ at boot-time.
+
+ kmemtrace.subbufs=n [KNL,KMEMTRACE] Overrides the number of
+ subbufs kmemtrace's relay channel has. Set this
+ higher than default (KMEMTRACE_N_SUBBUFS in code) if
+ you experience buffer overruns.
+
movablecore=nn[KMG] [KNL,X86-32,IA-64,PPC,X86-64] This parameter
is similar to kernelcore except it specifies the
amount of memory used for migratable allocations.
diff --git a/Documentation/vm/kmemtrace.txt b/Documentation/vm/kmemtrace.txt
new file mode 100644
index 00000000000..a956d9b7f94
--- /dev/null
+++ b/Documentation/vm/kmemtrace.txt
@@ -0,0 +1,126 @@
+ kmemtrace - Kernel Memory Tracer
+
+ by Eduard - Gabriel Munteanu
+ <eduard.munteanu@linux360.ro>
+
+I. Introduction
+===============
+
+kmemtrace helps kernel developers figure out two things:
+1) how different allocators (SLAB, SLUB etc.) perform
+2) how kernel code allocates memory and how much
+
+To do this, we trace every allocation and export information to the userspace
+through the relay interface. We export things such as the number of requested
+bytes, the number of bytes actually allocated (i.e. including internal
+fragmentation), whether this is a slab allocation or a plain kmalloc() and so
+on.
+
+The actual analysis is performed by a userspace tool (see section III for
+details on where to get it from). It logs the data exported by the kernel,
+processes it and (as of writing this) can provide the following information:
+- the total amount of memory allocated and fragmentation per call-site
+- the amount of memory allocated and fragmentation per allocation
+- total memory allocated and fragmentation in the collected dataset
+- number of cross-CPU allocation and frees (makes sense in NUMA environments)
+
+Moreover, it can potentially find inconsistent and erroneous behavior in
+kernel code, such as using slab free functions on kmalloc'ed memory or
+allocating less memory than requested (but not truly failed allocations).
+
+kmemtrace also makes provisions for tracing on some arch and analysing the
+data on another.
+
+II. Design and goals
+====================
+
+kmemtrace was designed to handle rather large amounts of data. Thus, it uses
+the relay interface to export whatever is logged to userspace, which then
+stores it. Analysis and reporting is done asynchronously, that is, after the
+data is collected and stored. By design, it allows one to log and analyse
+on different machines and different arches.
+
+As of writing this, the ABI is not considered stable, though it might not
+change much. However, no guarantees are made about compatibility yet. When
+deemed stable, the ABI should still allow easy extension while maintaining
+backward compatibility. This is described further in Documentation/ABI.
+
+Summary of design goals:
+ - allow logging and analysis to be done across different machines
+ - be fast and anticipate usage in high-load environments (*)
+ - be reasonably extensible
+ - make it possible for GNU/Linux distributions to have kmemtrace
+ included in their repositories
+
+(*) - one of the reasons Pekka Enberg's original userspace data analysis
+ tool's code was rewritten from Perl to C (although this is more than a
+ simple conversion)
+
+
+III. Quick usage guide
+======================
+
+1) Get a kernel that supports kmemtrace and build it accordingly (i.e. enable
+CONFIG_KMEMTRACE).
+
+2) Get the userspace tool and build it:
+$ git-clone git://repo.or.cz/kmemtrace-user.git # current repository
+$ cd kmemtrace-user/
+$ ./autogen.sh
+$ ./configure
+$ make
+
+3) Boot the kmemtrace-enabled kernel if you haven't, preferably in the
+'single' runlevel (so that relay buffers don't fill up easily), and run
+kmemtrace:
+# '$' does not mean user, but root here.
+$ mount -t debugfs none /sys/kernel/debug
+$ mount -t proc none /proc
+$ cd path/to/kmemtrace-user/
+$ ./kmemtraced
+Wait a bit, then stop it with CTRL+C.
+$ cat /sys/kernel/debug/kmemtrace/total_overruns # Check if we didn't
+ # overrun, should
+ # be zero.
+$ (Optionally) [Run kmemtrace_check separately on each cpu[0-9]*.out file to
+ check its correctness]
+$ ./kmemtrace-report
+
+Now you should have a nice and short summary of how the allocator performs.
+
+IV. FAQ and known issues
+========================
+
+Q: 'cat /sys/kernel/debug/kmemtrace/total_overruns' is non-zero, how do I fix
+this? Should I worry?
+A: If it's non-zero, this affects kmemtrace's accuracy, depending on how
+large the number is. You can fix it by supplying a higher
+'kmemtrace.subbufs=N' kernel parameter.
+---
+
+Q: kmemtrace_check reports errors, how do I fix this? Should I worry?
+A: This is a bug and should be reported. It can occur for a variety of
+reasons:
+ - possible bugs in relay code
+ - possible misuse of relay by kmemtrace
+ - timestamps being collected unorderly
+Or you may fix it yourself and send us a patch.
+---
+
+Q: kmemtrace_report shows many errors, how do I fix this? Should I worry?
+A: This is a known issue and I'm working on it. These might be true errors
+in kernel code, which may have inconsistent behavior (e.g. allocating memory
+with kmem_cache_alloc() and freeing it with kfree()). Pekka Enberg pointed
+out this behavior may work with SLAB, but may fail with other allocators.
+
+It may also be due to lack of tracing in some unusual allocator functions.
+
+We don't want bug reports regarding this issue yet.
+---
+
+V. See also
+===========
+
+Documentation/kernel-parameters.txt
+Documentation/ABI/testing/debugfs-kmemtrace
+
diff --git a/MAINTAINERS b/MAINTAINERS
index 08d0ab7fa16..857c877eee2 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2566,6 +2566,12 @@ M: jason.wessel@windriver.com
L: kgdb-bugreport@lists.sourceforge.net
S: Maintained
+KMEMTRACE
+P: Eduard - Gabriel Munteanu
+M: eduard.munteanu@linux360.ro
+L: linux-kernel@vger.kernel.org
+S: Maintained
+
KPROBES
P: Ananth N Mavinakayanahalli
M: ananth@in.ibm.com
diff --git a/include/linux/kmemtrace.h b/include/linux/kmemtrace.h
new file mode 100644
index 00000000000..5bea8ead6a6
--- /dev/null
+++ b/include/linux/kmemtrace.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright (C) 2008 Eduard - Gabriel Munteanu
+ *
+ * This file is released under GPL version 2.
+ */
+
+#ifndef _LINUX_KMEMTRACE_H
+#define _LINUX_KMEMTRACE_H
+
+#ifdef __KERNEL__
+
+#include <linux/types.h>
+#include <linux/marker.h>
+
+enum kmemtrace_type_id {
+ KMEMTRACE_TYPE_KMALLOC = 0, /* kmalloc() or kfree(). */
+ KMEMTRACE_TYPE_CACHE, /* kmem_cache_*(). */
+ KMEMTRACE_TYPE_PAGES, /* __get_free_pages() and friends. */
+};
+
+#ifdef CONFIG_KMEMTRACE
+
+extern void kmemtrace_init(void);
+
+static inline void kmemtrace_mark_alloc_node(enum kmemtrace_type_id type_id,
+ unsigned long call_site,
+ const void *ptr,
+ size_t bytes_req,
+ size_t bytes_alloc,
+ gfp_t gfp_flags,
+ int node)
+{
+ trace_mark(kmemtrace_alloc, "type_id %d call_site %lu ptr %lu "
+ "bytes_req %lu bytes_alloc %lu gfp_flags %lu node %d",
+ type_id, call_site, (unsigned long) ptr,
+ (unsigned long) bytes_req, (unsigned long) bytes_alloc,
+ (unsigned long) gfp_flags, node);
+}
+
+static inline void kmemtrace_mark_free(enum kmemtrace_type_id type_id,
+ unsigned long call_site,
+ const void *ptr)
+{
+ trace_mark(kmemtrace_free, "type_id %d call_site %lu ptr %lu",
+ type_id, call_site, (unsigned long) ptr);
+}
+
+#else /* CONFIG_KMEMTRACE */
+
+static inline void kmemtrace_init(void)
+{
+}
+
+static inline void kmemtrace_mark_alloc_node(enum kmemtrace_type_id type_id,
+ unsigned long call_site,
+ const void *ptr,
+ size_t bytes_req,
+ size_t bytes_alloc,
+ gfp_t gfp_flags,
+ int node)
+{
+}
+
+static inline void kmemtrace_mark_free(enum kmemtrace_type_id type_id,
+ unsigned long call_site,
+ const void *ptr)
+{
+}
+
+#endif /* CONFIG_KMEMTRACE */
+
+static inline void kmemtrace_mark_alloc(enum kmemtrace_type_id type_id,
+ unsigned long call_site,
+ const void *ptr,
+ size_t bytes_req,
+ size_t bytes_alloc,
+ gfp_t gfp_flags)
+{
+ kmemtrace_mark_alloc_node(type_id, call_site, ptr,
+ bytes_req, bytes_alloc, gfp_flags, -1);
+}
+
+#endif /* __KERNEL__ */
+
+#endif /* _LINUX_KMEMTRACE_H */
+
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 000da12b5cf..c97ed28559e 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -253,9 +253,9 @@ static inline void *kmem_cache_alloc_node(struct kmem_cache *cachep,
* request comes from.
*/
#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_SLUB)
-extern void *__kmalloc_track_caller(size_t, gfp_t, void*);
+extern void *__kmalloc_track_caller(size_t, gfp_t, unsigned long);
#define kmalloc_track_caller(size, flags) \
- __kmalloc_track_caller(size, flags, __builtin_return_address(0))
+ __kmalloc_track_caller(size, flags, _RET_IP_)
#else
#define kmalloc_track_caller(size, flags) \
__kmalloc(size, flags)
@@ -271,10 +271,10 @@ extern void *__kmalloc_track_caller(size_t, gfp_t, void*);
* allocation request comes from.
*/
#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_SLUB)
-extern void *__kmalloc_node_track_caller(size_t, gfp_t, int, void *);
+extern void *__kmalloc_node_track_caller(size_t, gfp_t, int, unsigned long);
#define kmalloc_node_track_caller(size, flags, node) \
__kmalloc_node_track_caller(size, flags, node, \
- __builtin_return_address(0))
+ _RET_IP_)
#else
#define kmalloc_node_track_caller(size, flags, node) \
__kmalloc_node(size, flags, node)
diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index 39c3a5eb8eb..7555ce99f6d 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -14,6 +14,7 @@
#include <asm/page.h> /* kmalloc_sizes.h needs PAGE_SIZE */
#include <asm/cache.h> /* kmalloc_sizes.h needs L1_CACHE_BYTES */
#include <linux/compiler.h>
+#include <linux/kmemtrace.h>
/* Size description struct for general caches. */
struct cache_sizes {
@@ -28,8 +29,26 @@ extern struct cache_sizes malloc_sizes[];
void *kmem_cache_alloc(struct kmem_cache *, gfp_t);
void *__kmalloc(size_t size, gfp_t flags);
-static inline void *kmalloc(size_t size, gfp_t flags)
+#ifdef CONFIG_KMEMTRACE
+extern void *kmem_cache_alloc_notrace(struct kmem_cache *cachep, gfp_t flags);
+extern size_t slab_buffer_size(struct kmem_cache *cachep);
+#else
+static __always_inline void *
+kmem_cache_alloc_notrace(struct kmem_cache *cachep, gfp_t flags)
{
+ return kmem_cache_alloc(cachep, flags);
+}
+static inline size_t slab_buffer_size(struct kmem_cache *cachep)
+{
+ return 0;
+}
+#endif
+
+static __always_inline void *kmalloc(size_t size, gfp_t flags)
+{
+ struct kmem_cache *cachep;
+ void *ret;
+
if (__builtin_constant_p(size)) {
int i = 0;
@@ -50,10 +69,17 @@ static inline void *kmalloc(size_t size, gfp_t flags)
found:
#ifdef CONFIG_ZONE_DMA
if (flags & GFP_DMA)
- return kmem_cache_alloc(malloc_sizes[i].cs_dmacachep,
- flags);
+ cachep = malloc_sizes[i].cs_dmacachep;
+ else
#endif
- return kmem_cache_alloc(malloc_sizes[i].cs_cachep, flags);
+ cachep = malloc_sizes[i].cs_cachep;
+
+ ret = kmem_cache_alloc_notrace(cachep, flags);
+
+ kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC, _THIS_IP_, ret,
+ size, slab_buffer_size(cachep), flags);
+
+ return ret;
}
return __kmalloc(size, flags);
}
@@ -62,8 +88,25 @@ found:
extern void *__kmalloc_node(size_t size, gfp_t flags, int node);
extern void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node);
-static inline void *kmalloc_node(size_t size, gfp_t flags, int node)
+#ifdef CONFIG_KMEMTRACE
+extern void *kmem_cache_alloc_node_notrace(struct kmem_cache *cachep,
+ gfp_t flags,
+ int nodeid);
+#else
+static __always_inline void *
+kmem_cache_alloc_node_notrace(struct kmem_cache *cachep,
+ gfp_t flags,
+ int nodeid)
+{
+ return kmem_cache_alloc_node(cachep, flags, nodeid);
+}
+#endif
+
+static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
{
+ struct kmem_cache *cachep;
+ void *ret;
+
if (__builtin_constant_p(size)) {
int i = 0;
@@ -84,11 +127,18 @@ static inline void *kmalloc_node(size_t size, gfp_t flags, int node)
found:
#ifdef CONFIG_ZONE_DMA
if (flags & GFP_DMA)
- return kmem_cache_alloc_node(malloc_sizes[i].cs_dmacachep,
- flags, node);
+ cachep = malloc_sizes[i].cs_dmacachep;
+ else
#endif
- return kmem_cache_alloc_node(malloc_sizes[i].cs_cachep,
- flags, node);
+ cachep = malloc_sizes[i].cs_cachep;
+
+ ret = kmem_cache_alloc_node_notrace(cachep, flags, node);
+
+ kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC, _THIS_IP_,
+ ret, size, slab_buffer_size(cachep),
+ flags, node);
+
+ return ret;
}
return __kmalloc_node(size, flags, node);
}
diff --git a/include/linux/slob_def.h b/include/linux/slob_def.h
index 59a3fa476ab..0ec00b39d00 100644
--- a/include/linux/slob_def.h
+++ b/include/linux/slob_def.h
@@ -3,14 +3,15 @@
void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node);
-static inline void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
+static __always_inline void *kmem_cache_alloc(struct kmem_cache *cachep,
+ gfp_t flags)
{
return kmem_cache_alloc_node(cachep, flags, -1);
}
void *__kmalloc_node(size_t size, gfp_t flags, int node);
-static inline void *kmalloc_node(size_t size, gfp_t flags, int node)
+static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
{
return __kmalloc_node(size, flags, node);
}
@@ -23,12 +24,12 @@ static inline void *kmalloc_node(size_t size, gfp_t flags, int node)
* kmalloc is the normal method of allocating memory
* in the kernel.
*/
-static inline void *kmalloc(size_t size, gfp_t flags)
+static __always_inline void *kmalloc(size_t size, gfp_t flags)
{
return __kmalloc_node(size, flags, -1);
}
-static inline void *__kmalloc(size_t size, gfp_t flags)
+static __always_inline void *__kmalloc(size_t size, gfp_t flags)
{
return kmalloc(size, flags);
}
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 2f5c16b1aac..dc28432b5b9 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -10,6 +10,7 @@
#include <linux/gfp.h>
#include <linux/workqueue.h>
#include <linux/kobject.h>
+#include <linux/kmemtrace.h>
enum stat_item {
ALLOC_FASTPATH, /* Allocation from cpu slab */
@@ -204,13 +205,31 @@ static __always_inline struct kmem_cache *kmalloc_slab(size_t size)
void *kmem_cache_alloc(struct kmem_cache *, gfp_t);
void *__kmalloc(size_t size, gfp_t flags);
+#ifdef CONFIG_KMEMTRACE
+extern void *kmem_cache_alloc_notrace(struct kmem_cache *s, gfp_t gfpflags);
+#else
+static __always_inline void *
+kmem_cache_alloc_notrace(struct kmem_cache *s, gfp_t gfpflags)
+{
+ return kmem_cache_alloc(s, gfpflags);
+}
+#endif
+
static __always_inline void *kmalloc_large(size_t size, gfp_t flags)
{
- return (void *)__get_free_pages(flags | __GFP_COMP, get_order(size));
+ unsigned int order = get_order(size);
+ void *ret = (void *) __get_free_pages(flags | __GFP_COMP, order);
+
+ kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC, _THIS_IP_, ret,
+ size, PAGE_SIZE << order, flags);
+
+ return ret;
}
static __always_inline void *kmalloc(size_t size, gfp_t flags)
{
+ void *ret;
+
if (__builtin_constant_p(size)) {
if (size > PAGE_SIZE)
return kmalloc_large(size, flags);
@@ -221,7 +240,13 @@ static __always_inline void *kmalloc(size_t size, gfp_t flags)
if (!s)
return ZERO_SIZE_PTR;
- return kmem_cache_alloc(s, flags);
+ ret = kmem_cache_alloc_notrace(s, flags);
+
+ kmemtrace_mark_alloc(KMEMTRACE_TYPE_KMALLOC,
+ _THIS_IP_, ret,
+ size, s->size, flags);
+
+ return ret;
}
}
return __kmalloc(size, flags);
@@ -231,8 +256,24 @@ static __always_inline void *kmalloc(size_t size, gfp_t flags)
void *__kmalloc_node(size_t size, gfp_t flags, int node);
void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node);
+#ifdef CONFIG_KMEMTRACE
+extern void *kmem_cache_alloc_node_notrace(struct kmem_cache *s,
+ gfp_t gfpflags,
+ int node);
+#else
+static __always_inline void *
+kmem_cache_alloc_node_notrace(struct kmem_cache *s,
+ gfp_t gfpflags,
+ int node)
+{
+ return kmem_cache_alloc_node(s, gfpflags, node);
+}
+#endif
+
static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
{
+ void *ret;
+
if (__builtin_constant_p(size) &&
size <= PAGE_SIZE && !(flags & SLUB_DMA)) {
struct kmem_cache *s = kmalloc_slab(size);
@@ -240,7 +281,13 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
if (!s)
return ZERO_SIZE_PTR;
- return kmem_cache_alloc_node(s, flags, node);
+ ret = kmem_cache_alloc_node_notrace(s, flags, node);
+
+ kmemtrace_mark_alloc_node(KMEMTRACE_TYPE_KMALLOC,
+ _THIS_IP_, ret,
+ size, s->size, flags, node);
+
+ return ret;
}
return __kmalloc_node(size, flags, node);
}
diff --git a/init/main.c b/init/main.c
index 17e9757bfde..9711586aa7c 100644
--- a/init/main.c
+++ b/init/main.c
@@ -70,6 +70,7 @@
#include <asm/setup.h>
#include <asm/sections.h>
#include <asm/cacheflush.h>
+#include <linux/kmemtrace.h>
#ifdef CONFIG_X86_LOCAL_APIC
#include <asm/smp.h>
@@ -654,6 +655,7 @@ asmlinkage void __init start_kernel(void)
enable_debug_pagealloc();
cpu_hotplug_init();
kmem_cache_init();
+ kmemtrace_init();
debug_objects_mem_init();
idr_init_cache();
setup_per_cpu_pageset();
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index b0f239e443b..b5417e23ba9 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -803,6 +803,26 @@ config FIREWIRE_OHCI_REMOTE_DMA
If unsure, say N.
+config KMEMTRACE
+ bool "Kernel memory tracer (kmemtrace)"
+ depends on RELAY && DEBUG_FS && MARKERS
+ help
+ kmemtrace provides tracing for slab allocator functions, such as
+ kmalloc, kfree, kmem_cache_alloc, kmem_cache_free etc.. Collected
+ data is then fed to the userspace application in order to analyse
+ allocation hotspots, internal fragmentation and so on, making it
+ possible to see how well an allocator performs, as well as debug
+ and profile kernel code.
+
+ This requires an userspace application to use. See
+ Documentation/vm/kmemtrace.txt for more information.
+
+ Saying Y will make the kernel somewhat larger and slower. However,
+ if you disable kmemtrace at run-time or boot-time, the performance
+ impact is minimal (depending on the arch the kernel is built for).
+
+ If unsure, say N.
+
menuconfig BUILD_DOCSRC
bool "Build targets in Documentation/ tree"
depends on HEADERS_CHECK
diff --git a/mm/Makefile b/mm/Makefile
index c06b45a1ff5..3782eb66d4b 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -34,3 +34,4 @@ obj-$(CONFIG_MIGRATION) += migrate.o
obj-$(CONFIG_SMP) += allocpercpu.o
obj-$(CONFIG_QUICKLIST) += quicklist.o
obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o
+obj-$(CONFIG_KMEMTRACE) += kmemtrace.o
diff --git a/mm/kmemtrace.c b/mm/kmemtrace.c
new file mode 100644
index 00000000000..2a70a805027
--- /dev/null
+++ b/mm/kmemtrace.c
@@ -0,0 +1,333 @@
+/*
+ * Copyright (C) 2008 Pekka Enberg, Eduard - Gabriel Munteanu
+ *
+ * This file is released under GPL version 2.
+ */
+
+#include <linux/string.h>
+#include <linux/debugfs.h>
+#include <linux/relay.h>
+#include <linux/module.h>
+#include <linux/marker.h>
+#include <linux/gfp.h>
+#include <linux/kmemtrace.h>
+
+#define KMEMTRACE_SUBBUF_SIZE 524288
+#define KMEMTRACE_DEF_N_SUBBUFS 20
+
+static struct rchan *kmemtrace_chan;
+static u32 kmemtrace_buf_overruns;
+
+static unsigned int kmemtrace_n_subbufs;
+
+/* disabled by default */
+static unsigned int kmemtrace_enabled;
+
+/*
+ * The sequence number is used for reordering kmemtrace packets
+ * in userspace, since they are logged as per-CPU data.
+ *
+ * atomic_t should always be a 32-bit signed integer. Wraparound is not
+ * likely to occur, but userspace can deal with it by expecting a certain
+ * sequence number in the next packet that will be read.
+ */
+static atomic_t kmemtrace_seq_num;
+
+#define KMEMTRACE_ABI_VERSION 1
+
+static u32 kmemtrace_abi_version __read_mostly = KMEMTRACE_ABI_VERSION;
+
+enum kmemtrace_event_id {
+ KMEMTRACE_EVENT_ALLOC = 0,
+ KMEMTRACE_EVENT_FREE,
+};
+
+struct kmemtrace_event {
+ u8 event_id;
+ u8 type_id;
+ u16 event_size;
+ s32 seq_num;
+ u64 call_site;
+ u64 ptr;
+} __attribute__ ((__packed__));
+
+struct kmemtrace_stats_alloc {
+ u64 bytes_req;
+ u64 bytes_alloc;
+ u32 gfp_flags;
+ s32 numa_node;
+} __attribute__ ((__packed__));
+
+static void kmemtrace_probe_alloc(void *probe_data, void *call_data,
+ const char *format, va_list *args)
+{
+ unsigned long flags;
+ struct kmemtrace_event *ev;
+ struct kmemtrace_stats_alloc *stats;
+ void *buf;
+
+ local_irq_save(flags);
+
+ buf = relay_reserve(kmemtrace_chan,
+ sizeof(struct kmemtrace_event) +
+ sizeof(struct kmemtrace_stats_alloc));
+ if (!buf)
+ goto failed;
+
+ /*
+ * Don't convert this to use structure initializers,
+ * C99 does not guarantee the rvalues evaluation order.
+ */
+
+ ev = buf;
+ ev->event_id = KMEMTRACE_EVENT_ALLOC;
+ ev->type_id = va_arg(*args, int);
+ ev->event_size = sizeof(struct kmemtrace_event) +
+ sizeof(struct kmemtrace_stats_alloc);
+ ev->seq_num = atomic_add_return(1, &kmemtrace_seq_num);
+ ev->call_site = va_arg(*args, unsigned long);
+ ev->ptr = va_arg(*args, unsigned long);
+
+ stats = buf + sizeof(struct kmemtrace_event);
+ stats->bytes_req = va_arg(*args, unsigned long);
+ stats->bytes_alloc = va_arg(*args, unsigned long);
+ stats->gfp_flags = va_arg(*args, unsigned long);
+ stats->numa_node = va_arg(*args, int);
+
+failed:
+ local_irq_restore(flags);
+}
+
+static void kmemtrace_probe_free(void *probe_data, void *call_data,
+ const char *format, va_list *args)
+{
+ unsigned long flags;
+ struct kmemtrace_event *ev;
+
+ local_irq_save(flags);
+
+ ev = relay_reserve(kmemtrace_chan, sizeof(struct kmemtrace_event));
+ if (!ev)
+ goto failed;
+
+ /*
+ * Don't convert this to use structure initializers,
+ * C99 does not guarantee the rvalues evaluation order.
+ */
+ ev->event_id = KMEMTRACE_EVENT_FREE;
+ ev->type_id = va_arg(*args, int);
+ ev->event_size = sizeof(struct kmemtrace_event);
+ ev->seq_num = atomic_add_return(1, &kmemtrace_seq_num);
+ ev->call_site = va_arg(*args, unsigned long);
+ ev->ptr = va_arg(*args, unsigned long);
+
+failed:
+ local_irq_restore(flags);
+}
+
+static struct dentry *
+kmemtrace_create_buf_file(const char *filename, struct dentry *parent,
+ int mode, struct rchan_buf *buf, int *is_global)
+{
+ return debugfs_create_file(filename, mode, parent, buf,
+ &relay_file_operations);
+}
+
+static int kmemtrace_remove_buf_file(struct dentry *dentry)
+{
+ debugfs_remove(dentry);
+
+ return 0;
+}
+
+static int kmemtrace_subbuf_start(struct rchan_buf *buf,
+ void *subbuf,
+ void *prev_subbuf,
+ size_t prev_padding)
+{
+ if (relay_buf_full(buf)) {
+ /*
+ * We know it's not SMP-safe, but neither
+ * debugfs_create_u32() is.
+ */
+ kmemtrace_buf_overruns++;
+ return 0;
+ }
+
+ return 1;
+}
+
+static struct rchan_callbacks relay_callbacks = {
+ .create_buf_file = kmemtrace_create_buf_file,
+ .remove_buf_file = kmemtrace_remove_buf_file,
+ .subbuf_start = kmemtrace_subbuf_start,
+};
+
+static struct dentry *kmemtrace_dir;
+static struct dentry *kmemtrace_overruns_dentry;
+static struct dentry *kmemtrace_abi_version_dentry;
+
+static struct dentry *kmemtrace_enabled_dentry;
+
+static int kmemtrace_start_probes(void)
+{
+ int err;
+
+ err = marker_probe_register("kmemtrace_alloc", "type_id %d "
+ "call_site %lu ptr %lu "
+ "bytes_req %lu bytes_alloc %lu "
+ "gfp_flags %lu node %d",
+ kmemtrace_probe_alloc, NULL);
+ if (err)
+ return err;
+ err = marker_probe_register("kmemtrace_free", "type_id %d "
+ "call_site %lu ptr %lu",
+ kmemtrace_probe_free, NULL);
+
+ return err;
+}
+
+static void kmemtrace_stop_probes(void)
+{
+ marker_probe_unregister("kmemtrace_alloc",
+ kmemtrace_probe_alloc, NULL);
+ marker_probe_unregister("kmemtrace_free",
+ kmemtrace_probe_free, NULL);
+}
+
+static int kmemtrace_enabled_get(void *data, u64 *val)
+{
+ *val = *((int *) data);
+
+ return 0;
+}
+
+static int kmemtrace_enabled_set(void *data, u64 val)
+{
+ u64 old_val = kmemtrace_enabled;
+
+ *((int *) data) = !!val;
+
+ if (old_val == val)
+ return 0;
+ if (val)
+ kmemtrace_start_probes();
+ else
+ kmemtrace_stop_probes();
+
+ return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(kmemtrace_enabled_fops,
+ kmemtrace_enabled_get,
+ kmemtrace_enabled_set, "%llu\n");
+
+static void kmemtrace_cleanup(void)
+{
+ if (kmemtrace_enabled_dentry)
+ debugfs_remove(kmemtrace_enabled_dentry);
+
+ kmemtrace_stop_probes();
+
+ if (kmemtrace_abi_version_dentry)
+ debugfs_remove(kmemtrace_abi_version_dentry);
+ if (kmemtrace_overruns_dentry)
+ debugfs_remove(kmemtrace_overruns_dentry);
+
+ relay_close(kmemtrace_chan);
+ kmemtrace_chan = NULL;
+
+ if (kmemtrace_dir)
+ debugfs_remove(kmemtrace_dir);
+}
+
+static int __init kmemtrace_setup_late(void)
+{
+ if (!kmemtrace_chan)
+ goto failed;
+
+ kmemtrace_dir = debugfs_create_dir("kmemtrace", NULL);
+ if (!kmemtrace_dir)
+ goto cleanup;
+
+ kmemtrace_abi_version_dentry =
+ debugfs_create_u32("abi_version", S_IRUSR,
+ kmemtrace_dir, &kmemtrace_abi_version);
+ kmemtrace_overruns_dentry =
+ debugfs_create_u32("total_overruns", S_IRUSR,
+ kmemtrace_dir, &kmemtrace_buf_overruns);
+ if (!kmemtrace_overruns_dentry || !kmemtrace_abi_version_dentry)
+ goto cleanup;
+
+ kmemtrace_enabled_dentry =
+ debugfs_create_file("enabled", S_IRUSR | S_IWUSR,
+ kmemtrace_dir, &kmemtrace_enabled,
+ &kmemtrace_enabled_fops);
+ if (!kmemtrace_enabled_dentry)
+ goto cleanup;
+
+ if (relay_late_setup_files(kmemtrace_chan, "cpu", kmemtrace_dir))
+ goto cleanup;
+
+ printk(KERN_INFO "kmemtrace: fully up.\n");
+
+ return 0;
+
+cleanup:
+ kmemtrace_cleanup();
+failed:
+ return 1;