diff options
35 files changed, 916 insertions, 420 deletions
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index bd874302420..c1bbed1021d 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -2229,10 +2229,10 @@ validate_event(struct cpu_hw_events *cpuc, struct perf_event *event) { struct hw_perf_event fake_event = event->hw; - if (event->pmu != &pmu) + if (event->pmu && event->pmu != &pmu) return 0; - return x86_schedule_event(cpuc, &fake_event); + return x86_schedule_event(cpuc, &fake_event) >= 0; } static int validate_group(struct perf_event *event) diff --git a/kernel/perf_event.c b/kernel/perf_event.c index 9425c9600c8..35df94e344f 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -1831,7 +1831,7 @@ static int perf_event_read_group(struct perf_event *event, size = n * sizeof(u64); - if (copy_to_user(buf + size, values, size)) { + if (copy_to_user(buf + ret, values, size)) { ret = -EFAULT; goto unlock; } @@ -3914,7 +3914,7 @@ void perf_swevent_put_recursion_context(int rctx) { struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); barrier(); - cpuctx->recursion[rctx]++; + cpuctx->recursion[rctx]--; put_cpu_var(perf_cpu_context); } EXPORT_SYMBOL_GPL(perf_swevent_put_recursion_context); diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore index 0854f110bf7..fe08660ce0b 100644 --- a/tools/perf/.gitignore +++ b/tools/perf/.gitignore @@ -12,6 +12,7 @@ perf*.1 perf*.xml perf*.html common-cmds.h +perf.data tags TAGS cscope* diff --git a/tools/perf/Documentation/perf-kmem.txt b/tools/perf/Documentation/perf-kmem.txt new file mode 100644 index 00000000000..44b0ce35c28 --- /dev/null +++ b/tools/perf/Documentation/perf-kmem.txt @@ -0,0 +1,44 @@ +perf-kmem(1) +============== + +NAME +---- +perf-kmem - Tool to trace/measure kernel memory(slab) properties + +SYNOPSIS +-------- +[verse] +'perf kmem' {record} [<options>] + +DESCRIPTION +----------- +There's two variants of perf kmem: + + 'perf kmem record <command>' to record the kmem events + of an arbitrary workload. + + 'perf kmem' to report kernel memory statistics. + +OPTIONS +------- +-i <file>:: +--input=<file>:: + Select the input file (default: perf.data) + +--stat=<caller|alloc>:: + Select per callsite or per allocation statistics + +-s <key[,key2...]>:: +--sort=<key[,key2...]>:: + Sort the output (default: frag,hit,bytes) + +-l <num>:: +--line=<num>:: + Print n lines only + +--raw-ip:: + Print raw ip instead of symbol + +SEE ALSO +-------- +linkperf:perf-record[1] diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 3ef6621bf6c..de37d492e10 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -369,6 +369,7 @@ LIB_H += util/sort.h LIB_H += util/hist.h LIB_H += util/thread.h LIB_H += util/data_map.h +LIB_H += util/process_events.h LIB_OBJS += util/abspath.o LIB_OBJS += util/alias.o @@ -411,6 +412,7 @@ LIB_OBJS += util/svghelper.o LIB_OBJS += util/sort.o LIB_OBJS += util/hist.o LIB_OBJS += util/data_map.o +LIB_OBJS += util/process_events.o BUILTIN_OBJS += builtin-annotate.o @@ -419,6 +421,7 @@ BUILTIN_OBJS += builtin-bench.o # Benchmark modules BUILTIN_OBJS += bench/sched-messaging.o BUILTIN_OBJS += bench/sched-pipe.o +BUILTIN_OBJS += bench/mem-memcpy.o BUILTIN_OBJS += builtin-help.o BUILTIN_OBJS += builtin-sched.o diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h index 9fbd8d745fa..f7781c6267c 100644 --- a/tools/perf/bench/bench.h +++ b/tools/perf/bench/bench.h @@ -3,6 +3,7 @@ extern int bench_sched_messaging(int argc, const char **argv, const char *prefix); extern int bench_sched_pipe(int argc, const char **argv, const char *prefix); +extern int bench_mem_memcpy(int argc, const char **argv, const char *prefix __used); #define BENCH_FORMAT_DEFAULT_STR "default" #define BENCH_FORMAT_DEFAULT 0 diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c new file mode 100644 index 00000000000..89773178e89 --- /dev/null +++ b/tools/perf/bench/mem-memcpy.c @@ -0,0 +1,193 @@ +/* + * mem-memcpy.c + * + * memcpy: Simple memory copy in various ways + * + * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp> + */ +#include <ctype.h> + +#include "../perf.h" +#include "../util/util.h" +#include "../util/parse-options.h" +#include "../util/string.h" +#include "../util/header.h" +#include "bench.h" + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/time.h> +#include <errno.h> + +#define K 1024 + +static const char *length_str = "1MB"; +static const char *routine = "default"; +static int use_clock = 0; +static int clock_fd; + +static const struct option options[] = { + OPT_STRING('l', "length", &length_str, "1MB", + "Specify length of memory to copy. " + "available unit: B, MB, GB (upper and lower)"), + OPT_STRING('r', "routine", &routine, "default", + "Specify routine to copy"), + OPT_BOOLEAN('c', "clock", &use_clock, + "Use CPU clock for measuring"), + OPT_END() +}; + +struct routine { + const char *name; + const char *desc; + void * (*fn)(void *dst, const void *src, size_t len); +}; + +struct routine routines[] = { + { "default", + "Default memcpy() provided by glibc", + memcpy }, + { NULL, + NULL, + NULL } +}; + +static const char * const bench_mem_memcpy_usage[] = { + "perf bench mem memcpy <options>", + NULL +}; + +static struct perf_event_attr clock_attr = { + .type = PERF_TYPE_HARDWARE, + .config = PERF_COUNT_HW_CPU_CYCLES +}; + +static void init_clock(void) +{ + clock_fd = sys_perf_event_open(&clock_attr, getpid(), -1, -1, 0); + + if (clock_fd < 0 && errno == ENOSYS) + die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); + else + BUG_ON(clock_fd < 0); +} + +static u64 get_clock(void) +{ + int ret; + u64 clk; + + ret = read(clock_fd, &clk, sizeof(u64)); + BUG_ON(ret != sizeof(u64)); + + return clk; +} + +static double timeval2double(struct timeval *ts) +{ + return (double)ts->tv_sec + + (double)ts->tv_usec / (double)1000000; +} + +int bench_mem_memcpy(int argc, const char **argv, + const char *prefix __used) +{ + int i; + void *dst, *src; + size_t length; + double bps = 0.0; + struct timeval tv_start, tv_end, tv_diff; + u64 clock_start, clock_end, clock_diff; + + clock_start = clock_end = clock_diff = 0ULL; + argc = parse_options(argc, argv, options, + bench_mem_memcpy_usage, 0); + + tv_diff.tv_sec = 0; + tv_diff.tv_usec = 0; + length = (size_t)perf_atoll((char *)length_str); + + if ((s64)length <= 0) { + fprintf(stderr, "Invalid length:%s\n", length_str); + return 1; + } + + for (i = 0; routines[i].name; i++) { + if (!strcmp(routines[i].name, routine)) + break; + } + if (!routines[i].name) { + printf("Unknown routine:%s\n", routine); + printf("Available routines...\n"); + for (i = 0; routines[i].name; i++) { + printf("\t%s ... %s\n", + routines[i].name, routines[i].desc); + } + return 1; + } + + dst = zalloc(length); + if (!dst) + die("memory allocation failed - maybe length is too large?\n"); + + src = zalloc(length); + if (!src) + die("memory allocation failed - maybe length is too large?\n"); + + if (bench_format == BENCH_FORMAT_DEFAULT) { + printf("# Copying %s Bytes from %p to %p ...\n\n", + length_str, src, dst); + } + + if (use_clock) { + init_clock(); + clock_start = get_clock(); + } else { + BUG_ON(gettimeofday(&tv_start, NULL)); + } + + routines[i].fn(dst, src, length); + + if (use_clock) { + clock_end = get_clock(); + clock_diff = clock_end - clock_start; + } else { + BUG_ON(gettimeofday(&tv_end, NULL)); + timersub(&tv_end, &tv_start, &tv_diff); + bps = (double)((double)length / timeval2double(&tv_diff)); + } + + switch (bench_format) { + case BENCH_FORMAT_DEFAULT: + if (use_clock) { + printf(" %14lf Clock/Byte\n", + (double)clock_diff / (double)length); + } else { + if (bps < K) + printf(" %14lf B/Sec\n", bps); + else if (bps < K * K) + printf(" %14lfd KB/Sec\n", bps / 1024); + else if (bps < K * K * K) + printf(" %14lf MB/Sec\n", bps / 1024 / 1024); + else { + printf(" %14lf GB/Sec\n", + bps / 1024 / 1024 / 1024); + } + } + break; + case BENCH_FORMAT_SIMPLE: + if (use_clock) { + printf("%14lf\n", + (double)clock_diff / (double)length); + } else + printf("%lf\n", bps); + break; + default: + /* reaching this means there's some disaster: */ + die("unknown format: %d\n", bench_format); + break; + } + + return 0; +} diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 6b13a1ecf1e..18ac5eaefc3 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -24,6 +24,7 @@ #include "util/thread.h" #include "util/sort.h" #include "util/hist.h" +#include "util/process_events.h" static char const *input_name = "perf.data"; @@ -33,11 +34,9 @@ static int input; static int full_paths; static int print_line; -static bool use_modules; static unsigned long page_size; static unsigned long mmap_window = 32; -const char *vmlinux_name; struct sym_hist { u64 sum; @@ -55,6 +54,11 @@ struct sym_priv { struct sym_ext *ext; }; +static struct symbol_conf symbol_conf = { + .priv_size = sizeof(struct sym_priv), + .try_vmlinux_path = true, +}; + static const char *sym_hist_filter; static int symbol_filter(struct map *map __used, struct symbol *sym) @@ -158,7 +162,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) if (event->header.misc & PERF_RECORD_MISC_KERNEL) { level = 'k'; - sym = kernel_maps__find_symbol(ip, &map, symbol_filter); + sym = kernel_maps__find_function(ip, &map, symbol_filter); dump_printf(" ...... dso: %s\n", map ? map->dso->long_name : "<not found>"); } else if (event->header.misc & PERF_RECORD_MISC_USER) { @@ -167,7 +171,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) if (map != NULL) { got_map: ip = map->map_ip(map, ip); - sym = map__find_symbol(map, ip, symbol_filter); + sym = map__find_function(map, ip, symbol_filter); } else { /* * If this is outside of all known maps, @@ -202,32 +206,6 @@ got_map: } static int -process_mmap_event(event_t *event, unsigned long offset, unsigned long head) -{ - struct map *map = map__new(&event->mmap, NULL, 0); - struct thread *thread = threads__findnew(event->mmap.pid); - - dump_printf("%p [%p]: PERF_RECORD_MMAP %d: [%p(%p) @ %p]: %s\n", - (void *)(offset + head), - (void *)(long)(event->header.size), - event->mmap.pid, - (void *)(long)event->mmap.start, - (void *)(long)event->mmap.len, - (void *)(long)event->mmap.pgoff, - event->mmap.filename); - - if (thread == NULL || map == NULL) { - dump_printf("problem processing PERF_RECORD_MMAP, skipping event.\n"); - return 0; - } - - thread__insert_map(thread, map); - total_mmap++; - - return 0; -} - -static int process_comm_event(event_t *event, unsigned long offset, unsigned long head) { struct thread *thread = threads__findnew(event->comm.pid); @@ -248,33 +226,6 @@ process_comm_event(event_t *event, unsigned long offset, unsigned long head) } static int -process_fork_event(event_t *event, unsigned long offset, unsigned long head) -{ - struct thread *thread = threads__findnew(event->fork.pid); - struct thread *parent = threads__findnew(event->fork.ppid); - - dump_printf("%p [%p]: PERF_RECORD_FORK: %d:%d\n", - (void *)(offset + head), - (void *)(long)(event->header.size), - event->fork.pid, event->fork.ppid); - - /* - * A thread clone will have the same PID for both - * parent and child. - */ - if (thread == parent) - return 0; - - if (!thread || !parent || thread__fork(thread, parent)) { - dump_printf("problem processing PERF_RECORD_FORK, skipping event.\n"); - return -1; - } - total_fork++; - - return 0; -} - -static int process_event(event_t *event, unsigned long offset, unsigned long head) { switch (event->header.type) { @@ -288,7 +239,7 @@ process_event(event_t *event, unsigned long offset, unsigned long head) return process_comm_event(event, offset, head); case PERF_RECORD_FORK: - return process_fork_event(event, offset, head); + return process_task_event(event, offset, head); /* * We dont process them right now but they are fine: */ @@ -638,11 +589,6 @@ static int __cmd_annotate(void) exit(0); } - if (kernel_maps__init(vmlinux_name, true, use_modules) < 0) { - pr_err("failed to create kernel maps for symbol resolution\b"); - return -1; - } - remap: buf = (char *)mmap(NULL, page_size * mmap_window, PROT_READ, MAP_SHARED, input, offset); @@ -743,8 +689,9 @@ static const struct option options[] = { "be more verbose (show symbol address, etc)"), OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), - OPT_STRING('k', "vmlinux", &vmlinux_name, "file", "vmlinux pathname"), - OPT_BOOLEAN('m', "modules", &use_modules, + OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, + "file", "vmlinux pathname"), + OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules, "load module symbols - WARNING: use only with -k and LIVE kernel"), OPT_BOOLEAN('l', "print-line", &print_line, "print matching source lines (may be slow)"), @@ -770,7 +717,8 @@ static void setup_sorting(void) int cmd_annotate(int argc, const char **argv, const char *prefix __used) { - symbol__init(sizeof(struct sym_priv)); + if (symbol__init(&symbol_conf) < 0) + return -1; page_size = getpagesize(); diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c index 90c39baae0d..e043eb83092 100644 --- a/tools/perf/builtin-bench.c +++ b/tools/perf/builtin-bench.c @@ -12,6 +12,7 @@ * * Available subsystem list: * sched ... scheduler and IPC mechanism + * mem ... memory access performance * */ @@ -43,6 +44,15 @@ static struct bench_suite sched_suites[] = { NULL } }; +static struct bench_suite mem_suites[] = { + { "memcpy", + "Simple memory copy in various ways", + bench_mem_memcpy }, + { NULL, + NULL, + NULL } +}; + struct bench_subsys { const char *name; const char *summary; @@ -53,9 +63,12 @@ static struct bench_subsys subsystems[] = { { "sched", "scheduler and IPC mechanism", sched_suites }, + { "mem", + "memory access performance", + mem_suites }, { NULL, NULL, - NULL } + NULL } }; static void dump_suites(int subsys_index) diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c index 768f9c82631..9f810b17c25 100644 --- a/tools/perf/builtin-help.c +++ b/tools/perf/builtin-help.c @@ -179,7 +179,7 @@ static void add_man_viewer(const char *name) while (*p) p = &((*p)->next); - *p = calloc(1, (sizeof(**p) + len + 1)); + *p = zalloc(sizeof(**p) + len + 1); strncpy((*p)->name, name, len); } @@ -194,7 +194,7 @@ static void do_add_man_viewer_info(const char *name, size_t len, const char *value) { - struct man_viewer_info_list *new = calloc(1, sizeof(*new) + len + 1); + struct man_viewer_info_list *new = zalloc(sizeof(*new) + len + 1); strncpy(new->name, name, len); new->info = strdup(value); diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index 256d18fa047..35722fafc4d 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -26,26 +26,28 @@ static u64 sample_type; static int alloc_flag; static int caller_flag; -sort_fn_t alloc_sort_fn; -sort_fn_t caller_sort_fn; - static int alloc_lines = -1; static int caller_lines = -1; +static bool raw_ip; + +static char default_sort_order[] = "frag,hit,bytes"; + static char *cwd; static int cwdlen; +static int *cpunode_map; +static int max_cpu_num; + struct alloc_stat { - union { - struct { - char *name; - u64 call_site; - }; - u64 ptr; - }; + u64 call_site; + u64 ptr; u64 bytes_req; u64 bytes_alloc; u32 hit; + u32 pingpong; + + short alloc_cpu; struct rb_node node; }; @@ -56,12 +58,74 @@ static struct rb_root root_caller_stat; static struct rb_root root_caller_sorted; static unsigned long total_requested, total_allocated; +static unsigned long nr_allocs, nr_cross_allocs; struct raw_event_sample { u32 size; char data[0]; }; +#define PATH_SYS_NODE "/sys/devices/system/node" + +static void init_cpunode_map(void) +{ + FILE *fp; + int i; + + fp = fopen("/sys/devices/system/cpu/kernel_max", "r"); + if (!fp) { + max_cpu_num = 4096; + return; + } + + if (fscanf(fp, "%d", &max_cpu_num) < 1) + die("Failed to read 'kernel_max' from sysfs"); + max_cpu_num++; + + cpunode_map = calloc(max_cpu_num, sizeof(int)); + if (!cpunode_map) + die("calloc"); + for (i = 0; i < max_cpu_num; i++) + cpunode_map[i] = -1; + fclose(fp); +} + +static void setup_cpunode_map(void) +{ + struct dirent *dent1, *dent2; + DIR *dir1, *dir2; + unsigned int cpu, mem; + char buf[PATH_MAX]; + + init_cpunode_map(); + + dir1 = opendir(PATH_SYS_NODE); + if (!dir1) + return; + + while (true) { + dent1 = readdir(dir1); + if (!dent1) + break; + + if (sscanf(dent1->d_name, "node%u", &mem) < 1) + continue; + + snprintf(buf, PATH_MAX, "%s/%s", PATH_SYS_NODE, dent1->d_name); + dir2 = opendir(buf); + if (!dir2) + continue; + while (true) { + dent2 = readdir(dir2); + if (!dent2) + break; + if (sscanf(dent2->d_name, "cpu%u", &cpu) < 1) + continue; + cpunode_map[cpu] = mem; + } + } +} + static int process_comm_event(event_t *event, unsigned long offset, unsigned long head) { @@ -81,16 +145,13 @@ process_comm_event(event_t *event, unsigned long offset, unsigned long head) return 0; } -static void insert_alloc_stat(unsigned long ptr, - int bytes_req, int bytes_alloc) +static void insert_alloc_stat(unsigned long call_site, unsigned long ptr, + int bytes_req, int bytes_alloc, int cpu) { struct rb_node **node = &root_alloc_stat.rb_node; struct rb_node *parent = NULL; struct alloc_stat *data = NULL; - if (!alloc_flag) - return; - while (*node) { parent = *node; data = rb_entry(*node, struct alloc_stat, node); @@ -109,7 +170,10 @@ static void insert_alloc_stat(unsigned long ptr, data->bytes_alloc += bytes_req; } else { data = malloc(sizeof(*data)); + if (!data) + die("malloc"); data->ptr = ptr; + data->pingpong = 0; data->hit = 1; data->bytes_req = bytes_req; data->bytes_alloc = bytes_alloc; @@ -117,6 +181,8 @@ static void insert_alloc_stat(unsigned long ptr, rb_link_node(&data->node, parent, node); rb_insert_color(&data->node, &root_alloc_stat); } + data->call_site = call_site; + data->alloc_cpu = cpu; } static void insert_caller_stat(unsigned long call_site, @@ -126,9 +192,6 @@ static void insert_caller_stat(unsigned long call_site, struct rb_node *parent = NULL; struct alloc_stat *data = NULL; - if (!caller_flag) - return; - while (*node) { parent = *node; data = rb_entry(*node, struct alloc_stat, node); @@ -147,7 +210,10 @@ static void insert_caller_stat(unsigned long call_site, data->bytes_alloc += bytes_req; } else { data = malloc(sizeof(*data)); + if (!data) + die("malloc"); data->call_site = call_site; + data->pingpong = 0; data->hit = 1; data->bytes_req = bytes_req; data->bytes_alloc = bytes_alloc; @@ -159,34 +225,89 @@ static void insert_caller_stat(unsigned long call_site, static void process_alloc_event(struct raw_event_sample *raw, struct event *event, - int cpu __used, + int cpu, u64 timestamp __used, struct thread *thread __used, - int node __used) + int node) { unsigned long call_site; unsigned long ptr; int bytes_req; int bytes_alloc; + int node1, node2; ptr = raw_field_value(event, "ptr", raw->data); call_site = raw_field_value(event, "call_site", raw->data); bytes_req = raw_field_value(event, "bytes_req", raw->data); bytes_alloc = raw_field_value(event, "bytes_alloc", raw->data); - insert_alloc_stat(ptr, bytes_req, bytes_alloc); + insert_alloc_stat(call_site, ptr, bytes_req, bytes_alloc, cpu); insert_caller_stat(call_site, bytes_req, bytes_alloc); total_requested += bytes_req; total_allocated += bytes_alloc; + + if (node) { + node1 = cpunode_map[cpu]; + node2 = raw_field_value(event, "node", raw->data); + if (node1 != node2) + nr_cross_allocs++; + } + nr_allocs++; +} + +static int ptr_cmp(struct alloc_stat *, struct alloc_stat *); +static int callsite_cmp(struct alloc_stat *, struct alloc_stat *); + +static struct alloc_stat *search_alloc_stat(unsigned long ptr, + unsigned long call_site, + struct rb_root *root, + sort_fn_t sort_fn) +{ + struct rb_node *node = root->rb_node; + struct alloc_stat key = { .ptr = ptr, .call_site = call_site }; + + while (node) { + struct alloc_stat *data; + int cmp; + + data = rb_entry(node, struct alloc_stat, node); + + cmp = sort_fn(&key, data); + if (cmp < 0) + node = node->rb_left; + else if (cmp > 0) + node = node->rb_right; + else + return data; + } + return NULL; } -static void process_free_event(struct raw_event_sample *raw __used, - struct event *event __used, - int cpu __used, +static void process_free_event(struct raw_event_sample *raw, + struct event *event, + int cpu, u64 timestamp __used, struct thread *thread __used) { + unsigned long ptr; + struct alloc_stat *s_alloc, *s_caller; + + ptr = raw_field_value(event, "ptr", raw->data); + + s_alloc = search_alloc_stat(ptr, 0, &root_alloc_stat, ptr_cmp); + if (!s_alloc) + return; + + if (cpu != s_alloc->alloc_cpu) { + s_alloc->pingpong++; + + s_caller = search_alloc_stat(0, s_alloc->call_site, + &root_caller_stat, callsite_cmp); + assert(s_caller); + s_caller->pingpong++; + } + s_alloc->alloc_cpu = -1; } static void @@ -291,7 +412,7 @@ static int read_events(void) register_idle_thread(); register_perf_file_handler(&file_handler); - return mmap_dispatch_perf_file(&header, input_name, NULL, false, 0, 0, + return mmap_dispatch_perf_file(&header, input_name, 0, 0, &cwdlen, &cwd); } @@ -307,10 +428,10 @@ static void __print_result(struct rb_root *root, int n_lines, int is_caller) { struct rb_node *next; - printf("%.78s\n", graph_dotted_line); - printf("%-28s|", is_caller ? "Callsite": "Alloc Ptr"); - printf("Total_alloc/Per | Total_req/Per | Hit | Frag\n"); - printf("%.78s\n", graph_dotted_line); + printf("%.102s\n", graph_dotted_line); + printf(" %-34s |", is_caller ? "Callsite": "Alloc Ptr"); + printf(" Total_alloc/Per | Total_req/Per | Hit | Ping-pong | Frag\n"); + printf("%.102s\n", graph_dotted_line); next = rb_first(root); @@ -318,36 +439,39 @@ static void __print_result(struct rb_root *root, int n_lines, int is_caller) struct alloc_stat *data = rb_entry(next, struct alloc_stat, node); struct symbol *sym = NULL; - char bf[BUFSIZ]; + char buf[BUFSIZ]; u64 addr; if (is_caller) { addr = data->call_site; - sym = kernel_maps__find_symbol(addr, NULL, NULL); + if (!raw_ip) + sym = kernel_maps__find_function(addr, NULL, NULL); } else addr = data->ptr; if (sym != NULL) - snprintf(bf, sizeof(bf), "%s/%Lx", sym->name, + snprintf(buf, sizeof(buf), "%s+%Lx", sym->name, addr - sym->start); else - snprintf(bf, sizeof(bf), "%#Lx", addr); + snprintf(buf, sizeof(buf), "%#Lx", addr); + printf(" %-34s |", buf); - printf("%-28s|%8llu/%-6lu |%8llu/%-6lu|%6lu|%8.3f% |