aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/x86/kernel/cpu/perf_event.c4
-rw-r--r--kernel/perf_event.c4
-rw-r--r--tools/perf/.gitignore1
-rw-r--r--tools/perf/Documentation/perf-kmem.txt44
-rw-r--r--tools/perf/Makefile3
-rw-r--r--tools/perf/bench/bench.h1
-rw-r--r--tools/perf/bench/mem-memcpy.c193
-rw-r--r--tools/perf/builtin-annotate.c80
-rw-r--r--tools/perf/builtin-bench.c15
-rw-r--r--tools/perf/builtin-help.c4
-rw-r--r--tools/perf/builtin-kmem.c354
-rw-r--r--tools/perf/builtin-probe.c4
-rw-r--r--tools/perf/builtin-report.c86
-rw-r--r--tools/perf/builtin-sched.c16
-rw-r--r--tools/perf/builtin-top.c68
-rw-r--r--tools/perf/builtin-trace.c2
-rw-r--r--tools/perf/command-list.txt1
-rw-r--r--tools/perf/util/data_map.c8
-rw-r--r--tools/perf/util/data_map.h2
-rw-r--r--tools/perf/util/event.h7
-rw-r--r--tools/perf/util/header.c8
-rw-r--r--tools/perf/util/include/asm/bug.h22
-rw-r--r--tools/perf/util/include/linux/bitops.h2
-rw-r--r--tools/perf/util/map.c14
-rw-r--r--tools/perf/util/parse-events.c2
-rw-r--r--tools/perf/util/process_event.c53
-rw-r--r--tools/perf/util/process_event.h29
-rw-r--r--tools/perf/util/process_events.c64
-rw-r--r--tools/perf/util/process_events.h35
-rw-r--r--tools/perf/util/symbol.c110
-rw-r--r--tools/perf/util/symbol.h17
-rw-r--r--tools/perf/util/thread.c2
-rw-r--r--tools/perf/util/thread.h4
-rw-r--r--tools/perf/util/util.h16
-rw-r--r--tools/perf/util/wrapper.c61
35 files changed, 916 insertions, 420 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index bd874302420..c1bbed1021d 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -2229,10 +2229,10 @@ validate_event(struct cpu_hw_events *cpuc, struct perf_event *event)
{
struct hw_perf_event fake_event = event->hw;
- if (event->pmu != &pmu)
+ if (event->pmu && event->pmu != &pmu)
return 0;
- return x86_schedule_event(cpuc, &fake_event);
+ return x86_schedule_event(cpuc, &fake_event) >= 0;
}
static int validate_group(struct perf_event *event)
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 9425c9600c8..35df94e344f 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -1831,7 +1831,7 @@ static int perf_event_read_group(struct perf_event *event,
size = n * sizeof(u64);
- if (copy_to_user(buf + size, values, size)) {
+ if (copy_to_user(buf + ret, values, size)) {
ret = -EFAULT;
goto unlock;
}
@@ -3914,7 +3914,7 @@ void perf_swevent_put_recursion_context(int rctx)
{
struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
barrier();
- cpuctx->recursion[rctx]++;
+ cpuctx->recursion[rctx]--;
put_cpu_var(perf_cpu_context);
}
EXPORT_SYMBOL_GPL(perf_swevent_put_recursion_context);
diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore
index 0854f110bf7..fe08660ce0b 100644
--- a/tools/perf/.gitignore
+++ b/tools/perf/.gitignore
@@ -12,6 +12,7 @@ perf*.1
perf*.xml
perf*.html
common-cmds.h
+perf.data
tags
TAGS
cscope*
diff --git a/tools/perf/Documentation/perf-kmem.txt b/tools/perf/Documentation/perf-kmem.txt
new file mode 100644
index 00000000000..44b0ce35c28
--- /dev/null
+++ b/tools/perf/Documentation/perf-kmem.txt
@@ -0,0 +1,44 @@
+perf-kmem(1)
+==============
+
+NAME
+----
+perf-kmem - Tool to trace/measure kernel memory(slab) properties
+
+SYNOPSIS
+--------
+[verse]
+'perf kmem' {record} [<options>]
+
+DESCRIPTION
+-----------
+There's two variants of perf kmem:
+
+ 'perf kmem record <command>' to record the kmem events
+ of an arbitrary workload.
+
+ 'perf kmem' to report kernel memory statistics.
+
+OPTIONS
+-------
+-i <file>::
+--input=<file>::
+ Select the input file (default: perf.data)
+
+--stat=<caller|alloc>::
+ Select per callsite or per allocation statistics
+
+-s <key[,key2...]>::
+--sort=<key[,key2...]>::
+ Sort the output (default: frag,hit,bytes)
+
+-l <num>::
+--line=<num>::
+ Print n lines only
+
+--raw-ip::
+ Print raw ip instead of symbol
+
+SEE ALSO
+--------
+linkperf:perf-record[1]
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 3ef6621bf6c..de37d492e10 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -369,6 +369,7 @@ LIB_H += util/sort.h
LIB_H += util/hist.h
LIB_H += util/thread.h
LIB_H += util/data_map.h
+LIB_H += util/process_events.h
LIB_OBJS += util/abspath.o
LIB_OBJS += util/alias.o
@@ -411,6 +412,7 @@ LIB_OBJS += util/svghelper.o
LIB_OBJS += util/sort.o
LIB_OBJS += util/hist.o
LIB_OBJS += util/data_map.o
+LIB_OBJS += util/process_events.o
BUILTIN_OBJS += builtin-annotate.o
@@ -419,6 +421,7 @@ BUILTIN_OBJS += builtin-bench.o
# Benchmark modules
BUILTIN_OBJS += bench/sched-messaging.o
BUILTIN_OBJS += bench/sched-pipe.o
+BUILTIN_OBJS += bench/mem-memcpy.o
BUILTIN_OBJS += builtin-help.o
BUILTIN_OBJS += builtin-sched.o
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
index 9fbd8d745fa..f7781c6267c 100644
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -3,6 +3,7 @@
extern int bench_sched_messaging(int argc, const char **argv, const char *prefix);
extern int bench_sched_pipe(int argc, const char **argv, const char *prefix);
+extern int bench_mem_memcpy(int argc, const char **argv, const char *prefix __used);
#define BENCH_FORMAT_DEFAULT_STR "default"
#define BENCH_FORMAT_DEFAULT 0
diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c
new file mode 100644
index 00000000000..89773178e89
--- /dev/null
+++ b/tools/perf/bench/mem-memcpy.c
@@ -0,0 +1,193 @@
+/*
+ * mem-memcpy.c
+ *
+ * memcpy: Simple memory copy in various ways
+ *
+ * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
+ */
+#include <ctype.h>
+
+#include "../perf.h"
+#include "../util/util.h"
+#include "../util/parse-options.h"
+#include "../util/string.h"
+#include "../util/header.h"
+#include "bench.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/time.h>
+#include <errno.h>
+
+#define K 1024
+
+static const char *length_str = "1MB";
+static const char *routine = "default";
+static int use_clock = 0;
+static int clock_fd;
+
+static const struct option options[] = {
+ OPT_STRING('l', "length", &length_str, "1MB",
+ "Specify length of memory to copy. "
+ "available unit: B, MB, GB (upper and lower)"),
+ OPT_STRING('r', "routine", &routine, "default",
+ "Specify routine to copy"),
+ OPT_BOOLEAN('c', "clock", &use_clock,
+ "Use CPU clock for measuring"),
+ OPT_END()
+};
+
+struct routine {
+ const char *name;
+ const char *desc;
+ void * (*fn)(void *dst, const void *src, size_t len);
+};
+
+struct routine routines[] = {
+ { "default",
+ "Default memcpy() provided by glibc",
+ memcpy },
+ { NULL,
+ NULL,
+ NULL }
+};
+
+static const char * const bench_mem_memcpy_usage[] = {
+ "perf bench mem memcpy <options>",
+ NULL
+};
+
+static struct perf_event_attr clock_attr = {
+ .type = PERF_TYPE_HARDWARE,
+ .config = PERF_COUNT_HW_CPU_CYCLES
+};
+
+static void init_clock(void)
+{
+ clock_fd = sys_perf_event_open(&clock_attr, getpid(), -1, -1, 0);
+
+ if (clock_fd < 0 && errno == ENOSYS)
+ die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
+ else
+ BUG_ON(clock_fd < 0);
+}
+
+static u64 get_clock(void)
+{
+ int ret;
+ u64 clk;
+
+ ret = read(clock_fd, &clk, sizeof(u64));
+ BUG_ON(ret != sizeof(u64));
+
+ return clk;
+}
+
+static double timeval2double(struct timeval *ts)
+{
+ return (double)ts->tv_sec +
+ (double)ts->tv_usec / (double)1000000;
+}
+
+int bench_mem_memcpy(int argc, const char **argv,
+ const char *prefix __used)
+{
+ int i;
+ void *dst, *src;
+ size_t length;
+ double bps = 0.0;
+ struct timeval tv_start, tv_end, tv_diff;
+ u64 clock_start, clock_end, clock_diff;
+
+ clock_start = clock_end = clock_diff = 0ULL;
+ argc = parse_options(argc, argv, options,
+ bench_mem_memcpy_usage, 0);
+
+ tv_diff.tv_sec = 0;
+ tv_diff.tv_usec = 0;
+ length = (size_t)perf_atoll((char *)length_str);
+
+ if ((s64)length <= 0) {
+ fprintf(stderr, "Invalid length:%s\n", length_str);
+ return 1;
+ }
+
+ for (i = 0; routines[i].name; i++) {
+ if (!strcmp(routines[i].name, routine))
+ break;
+ }
+ if (!routines[i].name) {
+ printf("Unknown routine:%s\n", routine);
+ printf("Available routines...\n");
+ for (i = 0; routines[i].name; i++) {
+ printf("\t%s ... %s\n",
+ routines[i].name, routines[i].desc);
+ }
+ return 1;
+ }
+
+ dst = zalloc(length);
+ if (!dst)
+ die("memory allocation failed - maybe length is too large?\n");
+
+ src = zalloc(length);
+ if (!src)
+ die("memory allocation failed - maybe length is too large?\n");
+
+ if (bench_format == BENCH_FORMAT_DEFAULT) {
+ printf("# Copying %s Bytes from %p to %p ...\n\n",
+ length_str, src, dst);
+ }
+
+ if (use_clock) {
+ init_clock();
+ clock_start = get_clock();
+ } else {
+ BUG_ON(gettimeofday(&tv_start, NULL));
+ }
+
+ routines[i].fn(dst, src, length);
+
+ if (use_clock) {
+ clock_end = get_clock();
+ clock_diff = clock_end - clock_start;
+ } else {
+ BUG_ON(gettimeofday(&tv_end, NULL));
+ timersub(&tv_end, &tv_start, &tv_diff);
+ bps = (double)((double)length / timeval2double(&tv_diff));
+ }
+
+ switch (bench_format) {
+ case BENCH_FORMAT_DEFAULT:
+ if (use_clock) {
+ printf(" %14lf Clock/Byte\n",
+ (double)clock_diff / (double)length);
+ } else {
+ if (bps < K)
+ printf(" %14lf B/Sec\n", bps);
+ else if (bps < K * K)
+ printf(" %14lfd KB/Sec\n", bps / 1024);
+ else if (bps < K * K * K)
+ printf(" %14lf MB/Sec\n", bps / 1024 / 1024);
+ else {
+ printf(" %14lf GB/Sec\n",
+ bps / 1024 / 1024 / 1024);
+ }
+ }
+ break;
+ case BENCH_FORMAT_SIMPLE:
+ if (use_clock) {
+ printf("%14lf\n",
+ (double)clock_diff / (double)length);
+ } else
+ printf("%lf\n", bps);
+ break;
+ default:
+ /* reaching this means there's some disaster: */
+ die("unknown format: %d\n", bench_format);
+ break;
+ }
+
+ return 0;
+}
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 6b13a1ecf1e..18ac5eaefc3 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -24,6 +24,7 @@
#include "util/thread.h"
#include "util/sort.h"
#include "util/hist.h"
+#include "util/process_events.h"
static char const *input_name = "perf.data";
@@ -33,11 +34,9 @@ static int input;
static int full_paths;
static int print_line;
-static bool use_modules;
static unsigned long page_size;
static unsigned long mmap_window = 32;
-const char *vmlinux_name;
struct sym_hist {
u64 sum;
@@ -55,6 +54,11 @@ struct sym_priv {
struct sym_ext *ext;
};
+static struct symbol_conf symbol_conf = {
+ .priv_size = sizeof(struct sym_priv),
+ .try_vmlinux_path = true,
+};
+
static const char *sym_hist_filter;
static int symbol_filter(struct map *map __used, struct symbol *sym)
@@ -158,7 +162,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
if (event->header.misc & PERF_RECORD_MISC_KERNEL) {
level = 'k';
- sym = kernel_maps__find_symbol(ip, &map, symbol_filter);
+ sym = kernel_maps__find_function(ip, &map, symbol_filter);
dump_printf(" ...... dso: %s\n",
map ? map->dso->long_name : "<not found>");
} else if (event->header.misc & PERF_RECORD_MISC_USER) {
@@ -167,7 +171,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head)
if (map != NULL) {
got_map:
ip = map->map_ip(map, ip);
- sym = map__find_symbol(map, ip, symbol_filter);
+ sym = map__find_function(map, ip, symbol_filter);
} else {
/*
* If this is outside of all known maps,
@@ -202,32 +206,6 @@ got_map:
}
static int
-process_mmap_event(event_t *event, unsigned long offset, unsigned long head)
-{
- struct map *map = map__new(&event->mmap, NULL, 0);
- struct thread *thread = threads__findnew(event->mmap.pid);
-
- dump_printf("%p [%p]: PERF_RECORD_MMAP %d: [%p(%p) @ %p]: %s\n",
- (void *)(offset + head),
- (void *)(long)(event->header.size),
- event->mmap.pid,
- (void *)(long)event->mmap.start,
- (void *)(long)event->mmap.len,
- (void *)(long)event->mmap.pgoff,
- event->mmap.filename);
-
- if (thread == NULL || map == NULL) {
- dump_printf("problem processing PERF_RECORD_MMAP, skipping event.\n");
- return 0;
- }
-
- thread__insert_map(thread, map);
- total_mmap++;
-
- return 0;
-}
-
-static int
process_comm_event(event_t *event, unsigned long offset, unsigned long head)
{
struct thread *thread = threads__findnew(event->comm.pid);
@@ -248,33 +226,6 @@ process_comm_event(event_t *event, unsigned long offset, unsigned long head)
}
static int
-process_fork_event(event_t *event, unsigned long offset, unsigned long head)
-{
- struct thread *thread = threads__findnew(event->fork.pid);
- struct thread *parent = threads__findnew(event->fork.ppid);
-
- dump_printf("%p [%p]: PERF_RECORD_FORK: %d:%d\n",
- (void *)(offset + head),
- (void *)(long)(event->header.size),
- event->fork.pid, event->fork.ppid);
-
- /*
- * A thread clone will have the same PID for both
- * parent and child.
- */
- if (thread == parent)
- return 0;
-
- if (!thread || !parent || thread__fork(thread, parent)) {
- dump_printf("problem processing PERF_RECORD_FORK, skipping event.\n");
- return -1;
- }
- total_fork++;
-
- return 0;
-}
-
-static int
process_event(event_t *event, unsigned long offset, unsigned long head)
{
switch (event->header.type) {
@@ -288,7 +239,7 @@ process_event(event_t *event, unsigned long offset, unsigned long head)
return process_comm_event(event, offset, head);
case PERF_RECORD_FORK:
- return process_fork_event(event, offset, head);
+ return process_task_event(event, offset, head);
/*
* We dont process them right now but they are fine:
*/
@@ -638,11 +589,6 @@ static int __cmd_annotate(void)
exit(0);
}
- if (kernel_maps__init(vmlinux_name, true, use_modules) < 0) {
- pr_err("failed to create kernel maps for symbol resolution\b");
- return -1;
- }
-
remap:
buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ,
MAP_SHARED, input, offset);
@@ -743,8 +689,9 @@ static const struct option options[] = {
"be more verbose (show symbol address, etc)"),
OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
"dump raw trace in ASCII"),
- OPT_STRING('k', "vmlinux", &vmlinux_name, "file", "vmlinux pathname"),
- OPT_BOOLEAN('m', "modules", &use_modules,
+ OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
+ "file", "vmlinux pathname"),
+ OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules,
"load module symbols - WARNING: use only with -k and LIVE kernel"),
OPT_BOOLEAN('l', "print-line", &print_line,
"print matching source lines (may be slow)"),
@@ -770,7 +717,8 @@ static void setup_sorting(void)
int cmd_annotate(int argc, const char **argv, const char *prefix __used)
{
- symbol__init(sizeof(struct sym_priv));
+ if (symbol__init(&symbol_conf) < 0)
+ return -1;
page_size = getpagesize();
diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
index 90c39baae0d..e043eb83092 100644
--- a/tools/perf/builtin-bench.c
+++ b/tools/perf/builtin-bench.c
@@ -12,6 +12,7 @@
*
* Available subsystem list:
* sched ... scheduler and IPC mechanism
+ * mem ... memory access performance
*
*/
@@ -43,6 +44,15 @@ static struct bench_suite sched_suites[] = {
NULL }
};
+static struct bench_suite mem_suites[] = {
+ { "memcpy",
+ "Simple memory copy in various ways",
+ bench_mem_memcpy },
+ { NULL,
+ NULL,
+ NULL }
+};
+
struct bench_subsys {
const char *name;
const char *summary;
@@ -53,9 +63,12 @@ static struct bench_subsys subsystems[] = {
{ "sched",
"scheduler and IPC mechanism",
sched_suites },
+ { "mem",
+ "memory access performance",
+ mem_suites },
{ NULL,
NULL,
- NULL }
+ NULL }
};
static void dump_suites(int subsys_index)
diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c
index 768f9c82631..9f810b17c25 100644
--- a/tools/perf/builtin-help.c
+++ b/tools/perf/builtin-help.c
@@ -179,7 +179,7 @@ static void add_man_viewer(const char *name)
while (*p)
p = &((*p)->next);
- *p = calloc(1, (sizeof(**p) + len + 1));
+ *p = zalloc(sizeof(**p) + len + 1);
strncpy((*p)->name, name, len);
}
@@ -194,7 +194,7 @@ static void do_add_man_viewer_info(const char *name,
size_t len,
const char *value)
{
- struct man_viewer_info_list *new = calloc(1, sizeof(*new) + len + 1);
+ struct man_viewer_info_list *new = zalloc(sizeof(*new) + len + 1);
strncpy(new->name, name, len);
new->info = strdup(value);
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 256d18fa047..35722fafc4d 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -26,26 +26,28 @@ static u64 sample_type;
static int alloc_flag;
static int caller_flag;
-sort_fn_t alloc_sort_fn;
-sort_fn_t caller_sort_fn;
-
static int alloc_lines = -1;
static int caller_lines = -1;
+static bool raw_ip;
+
+static char default_sort_order[] = "frag,hit,bytes";
+
static char *cwd;
static int cwdlen;
+static int *cpunode_map;
+static int max_cpu_num;
+
struct alloc_stat {
- union {
- struct {
- char *name;
- u64 call_site;
- };
- u64 ptr;
- };
+ u64 call_site;
+ u64 ptr;
u64 bytes_req;
u64 bytes_alloc;
u32 hit;
+ u32 pingpong;
+
+ short alloc_cpu;
struct rb_node node;
};
@@ -56,12 +58,74 @@ static struct rb_root root_caller_stat;
static struct rb_root root_caller_sorted;
static unsigned long total_requested, total_allocated;
+static unsigned long nr_allocs, nr_cross_allocs;
struct raw_event_sample {
u32 size;
char data[0];
};
+#define PATH_SYS_NODE "/sys/devices/system/node"
+
+static void init_cpunode_map(void)
+{
+ FILE *fp;
+ int i;
+
+ fp = fopen("/sys/devices/system/cpu/kernel_max", "r");
+ if (!fp) {
+ max_cpu_num = 4096;
+ return;
+ }
+
+ if (fscanf(fp, "%d", &max_cpu_num) < 1)
+ die("Failed to read 'kernel_max' from sysfs");
+ max_cpu_num++;
+
+ cpunode_map = calloc(max_cpu_num, sizeof(int));
+ if (!cpunode_map)
+ die("calloc");
+ for (i = 0; i < max_cpu_num; i++)
+ cpunode_map[i] = -1;
+ fclose(fp);
+}
+
+static void setup_cpunode_map(void)
+{
+ struct dirent *dent1, *dent2;
+ DIR *dir1, *dir2;
+ unsigned int cpu, mem;
+ char buf[PATH_MAX];
+
+ init_cpunode_map();
+
+ dir1 = opendir(PATH_SYS_NODE);
+ if (!dir1)
+ return;
+
+ while (true) {
+ dent1 = readdir(dir1);
+ if (!dent1)
+ break;
+
+ if (sscanf(dent1->d_name, "node%u", &mem) < 1)
+ continue;
+
+ snprintf(buf, PATH_MAX, "%s/%s", PATH_SYS_NODE, dent1->d_name);
+ dir2 = opendir(buf);
+ if (!dir2)
+ continue;
+ while (true) {
+ dent2 = readdir(dir2);
+ if (!dent2)
+ break;
+ if (sscanf(dent2->d_name, "cpu%u", &cpu) < 1)
+ continue;
+ cpunode_map[cpu] = mem;
+ }
+ }
+}
+
static int
process_comm_event(event_t *event, unsigned long offset, unsigned long head)
{
@@ -81,16 +145,13 @@ process_comm_event(event_t *event, unsigned long offset, unsigned long head)
return 0;
}
-static void insert_alloc_stat(unsigned long ptr,
- int bytes_req, int bytes_alloc)
+static void insert_alloc_stat(unsigned long call_site, unsigned long ptr,
+ int bytes_req, int bytes_alloc, int cpu)
{
struct rb_node **node = &root_alloc_stat.rb_node;
struct rb_node *parent = NULL;
struct alloc_stat *data = NULL;
- if (!alloc_flag)
- return;
-
while (*node) {
parent = *node;
data = rb_entry(*node, struct alloc_stat, node);
@@ -109,7 +170,10 @@ static void insert_alloc_stat(unsigned long ptr,
data->bytes_alloc += bytes_req;
} else {
data = malloc(sizeof(*data));
+ if (!data)
+ die("malloc");
data->ptr = ptr;
+ data->pingpong = 0;
data->hit = 1;
data->bytes_req = bytes_req;
data->bytes_alloc = bytes_alloc;
@@ -117,6 +181,8 @@ static void insert_alloc_stat(unsigned long ptr,
rb_link_node(&data->node, parent, node);
rb_insert_color(&data->node, &root_alloc_stat);
}
+ data->call_site = call_site;
+ data->alloc_cpu = cpu;
}
static void insert_caller_stat(unsigned long call_site,
@@ -126,9 +192,6 @@ static void insert_caller_stat(unsigned long call_site,
struct rb_node *parent = NULL;
struct alloc_stat *data = NULL;
- if (!caller_flag)
- return;
-
while (*node) {
parent = *node;
data = rb_entry(*node, struct alloc_stat, node);
@@ -147,7 +210,10 @@ static void insert_caller_stat(unsigned long call_site,
data->bytes_alloc += bytes_req;
} else {
data = malloc(sizeof(*data));
+ if (!data)
+ die("malloc");
data->call_site = call_site;
+ data->pingpong = 0;
data->hit = 1;
data->bytes_req = bytes_req;
data->bytes_alloc = bytes_alloc;
@@ -159,34 +225,89 @@ static void insert_caller_stat(unsigned long call_site,
static void process_alloc_event(struct raw_event_sample *raw,
struct event *event,
- int cpu __used,
+ int cpu,
u64 timestamp __used,
struct thread *thread __used,
- int node __used)
+ int node)
{
unsigned long call_site;
unsigned long ptr;
int bytes_req;
int bytes_alloc;
+ int node1, node2;
ptr = raw_field_value(event, "ptr", raw->data);
call_site = raw_field_value(event, "call_site", raw->data);
bytes_req = raw_field_value(event, "bytes_req", raw->data);
bytes_alloc = raw_field_value(event, "bytes_alloc", raw->data);
- insert_alloc_stat(ptr, bytes_req, bytes_alloc);
+ insert_alloc_stat(call_site, ptr, bytes_req, bytes_alloc, cpu);
insert_caller_stat(call_site, bytes_req, bytes_alloc);
total_requested += bytes_req;
total_allocated += bytes_alloc;
+
+ if (node) {
+ node1 = cpunode_map[cpu];
+ node2 = raw_field_value(event, "node", raw->data);
+ if (node1 != node2)
+ nr_cross_allocs++;
+ }
+ nr_allocs++;
+}
+
+static int ptr_cmp(struct alloc_stat *, struct alloc_stat *);
+static int callsite_cmp(struct alloc_stat *, struct alloc_stat *);
+
+static struct alloc_stat *search_alloc_stat(unsigned long ptr,
+ unsigned long call_site,
+ struct rb_root *root,
+ sort_fn_t sort_fn)
+{
+ struct rb_node *node = root->rb_node;
+ struct alloc_stat key = { .ptr = ptr, .call_site = call_site };
+
+ while (node) {
+ struct alloc_stat *data;
+ int cmp;
+
+ data = rb_entry(node, struct alloc_stat, node);
+
+ cmp = sort_fn(&key, data);
+ if (cmp < 0)
+ node = node->rb_left;
+ else if (cmp > 0)
+ node = node->rb_right;
+ else
+ return data;
+ }
+ return NULL;
}
-static void process_free_event(struct raw_event_sample *raw __used,
- struct event *event __used,
- int cpu __used,
+static void process_free_event(struct raw_event_sample *raw,
+ struct event *event,
+ int cpu,
u64 timestamp __used,
struct thread *thread __used)
{
+ unsigned long ptr;
+ struct alloc_stat *s_alloc, *s_caller;
+
+ ptr = raw_field_value(event, "ptr", raw->data);
+
+ s_alloc = search_alloc_stat(ptr, 0, &root_alloc_stat, ptr_cmp);
+ if (!s_alloc)
+ return;
+
+ if (cpu != s_alloc->alloc_cpu) {
+ s_alloc->pingpong++;
+
+ s_caller = search_alloc_stat(0, s_alloc->call_site,
+ &root_caller_stat, callsite_cmp);
+ assert(s_caller);
+ s_caller->pingpong++;
+ }
+ s_alloc->alloc_cpu = -1;
}
static void
@@ -291,7 +412,7 @@ static int read_events(void)
register_idle_thread();
register_perf_file_handler(&file_handler);
- return mmap_dispatch_perf_file(&header, input_name, NULL, false, 0, 0,
+ return mmap_dispatch_perf_file(&header, input_name, 0, 0,
&cwdlen, &cwd);
}
@@ -307,10 +428,10 @@ static void __print_result(struct rb_root *root, int n_lines, int is_caller)
{
struct rb_node *next;
- printf("%.78s\n", graph_dotted_line);
- printf("%-28s|", is_caller ? "Callsite": "Alloc Ptr");
- printf("Total_alloc/Per | Total_req/Per | Hit | Frag\n");
- printf("%.78s\n", graph_dotted_line);
+ printf("%.102s\n", graph_dotted_line);
+ printf(" %-34s |", is_caller ? "Callsite": "Alloc Ptr");
+ printf(" Total_alloc/Per | Total_req/Per | Hit | Ping-pong | Frag\n");
+ printf("%.102s\n", graph_dotted_line);
next = rb_first(root);
@@ -318,36 +439,39 @@ static void __print_result(struct rb_root *root, int n_lines, int is_caller)
struct alloc_stat *data = rb_entry(next, struct alloc_stat,
node);
struct symbol *sym = NULL;
- char bf[BUFSIZ];
+ char buf[BUFSIZ];
u64 addr;
if (is_caller) {
addr = data->call_site;
- sym = kernel_maps__find_symbol(addr, NULL, NULL);
+ if (!raw_ip)
+ sym = kernel_maps__find_function(addr, NULL, NULL);
} else
addr = data->ptr;
if (sym != NULL)
- snprintf(bf, sizeof(bf), "%s/%Lx", sym->name,
+ snprintf(buf, sizeof(buf), "%s+%Lx", sym->name,
addr - sym->start);
else
- snprintf(bf, sizeof(bf), "%#Lx", addr);
+ snprintf(buf, sizeof(buf), "%#Lx", addr);
+ printf(" %-34s |", buf);
- printf("%-28s|%8llu/%-6lu |%8llu/%-6lu|%6lu|%8.3f%