From 26353a61b977e57b58dd3555bc0422fea46c5ad6 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 1 Apr 2013 20:35:17 +0900 Subject: perf hists: Fix an invalid memory free on he->branch_info The branch info was allocated for the whole stack and passed matching hist entry for each level during processing samples. Thus when a hist entry tries to free its branch info like in hists__collapse_insert_entry it'll face following error. *** glibc detected *** perf: munmap_chunk(): invalid pointer: 0x00000000014e9d20 *** ======= Backtrace: ========= /lib64/libc.so.6[0x387d47ae16] perf[0x4923bd] perf(cmd_report+0xd68)[0x432a08] perf[0x41a663] perf(main+0x58f)[0x419eaf] /lib64/libc.so.6(__libc_start_main+0xf5)[0x387d421735] perf[0x419f95] Fix it by allocating and copying branch info for each new hist entry. Signed-off-by: Namhyung Kim Cc: Andi Kleen Cc: David Ahern Cc: Ingo Molnar Cc: Jiri Olsa Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1364816125-12212-2-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'tools/perf/builtin-report.c') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index bd0ca81eeac..d9f2de3e81f 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -187,6 +187,9 @@ static int perf_report__add_branch_hist_entry(struct perf_tool *tool, for (i = 0; i < sample->branch_stack->nr; i++) { if (rep->hide_unresolved && !(bi[i].from.sym && bi[i].to.sym)) continue; + + err = -ENOMEM; + /* * The report shows the percentage of total branches captured * and not events sampled. Thus we use a pseudo period of 1. @@ -195,7 +198,6 @@ static int perf_report__add_branch_hist_entry(struct perf_tool *tool, &bi[i], 1, 1); if (he) { struct annotation *notes; - err = -ENOMEM; bx = he->branch_info; if (bx->from.sym && use_browser == 1 && sort__has_sym) { notes = symbol__annotation(bx->from.sym); @@ -226,11 +228,12 @@ static int perf_report__add_branch_hist_entry(struct perf_tool *tool, } evsel->hists.stats.total_period += 1; hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE); - err = 0; } else - return -ENOMEM; + goto out; } + err = 0; out: + free(bi); return err; } -- cgit v1.2.3-18-g5258 From 55369fc179b0572d0b4a06a9be1d2779b3ac22e0 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 1 Apr 2013 20:35:20 +0900 Subject: perf sort: Introduce sort__mode variable It's used for determining current sort mode which can be one of NORMAL, BRANCH and new MEMORY. Signed-off-by: Namhyung Kim Cc: Andi Kleen Cc: David Ahern Cc: Ingo Molnar Cc: Jiri Olsa Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1364816125-12212-5-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) (limited to 'tools/perf/builtin-report.c') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index d9f2de3e81f..c877982a64d 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -311,7 +311,7 @@ static int process_sample_event(struct perf_tool *tool, if (rep->cpu_list && !test_bit(sample->cpu, rep->cpu_bitmap)) return 0; - if (sort__branch_mode == 1) { + if (sort__mode == SORT_MODE__BRANCH) { if (perf_report__add_branch_hist_entry(tool, &al, sample, evsel, machine)) { pr_debug("problem adding lbr entry, skipping event\n"); @@ -387,7 +387,7 @@ static int perf_report__setup_sample_type(struct perf_report *rep) } } - if (sort__branch_mode == 1) { + if (sort__mode == SORT_MODE__BRANCH) { if (!self->fd_pipe && !(sample_type & PERF_SAMPLE_BRANCH_STACK)) { ui__error("Selected -b but no branch data. " @@ -694,7 +694,9 @@ static int parse_branch_mode(const struct option *opt __maybe_unused, const char *str __maybe_unused, int unset) { - sort__branch_mode = !unset; + int *branch_mode = opt->value; + + *branch_mode = !unset; return 0; } @@ -703,6 +705,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) struct perf_session *session; struct stat st; bool has_br_stack = false; + int branch_mode = -1; int ret = -1; char callchain_default_opt[] = "fractal,0.5,callee"; const char * const report_usage[] = { @@ -799,7 +802,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) "Show a column with the sum of periods"), OPT_BOOLEAN(0, "group", &symbol_conf.event_group, "Show event group information together"), - OPT_CALLBACK_NOOPT('b', "branch-stack", &sort__branch_mode, "", + OPT_CALLBACK_NOOPT('b', "branch-stack", &branch_mode, "", "use branch records for histogram filling", parse_branch_mode), OPT_STRING(0, "objdump", &objdump_path, "path", "objdump binary to use for disassembly and annotations"), @@ -849,11 +852,11 @@ repeat: has_br_stack = perf_header__has_feat(&session->header, HEADER_BRANCH_STACK); - if (sort__branch_mode == -1 && has_br_stack) - sort__branch_mode = 1; + if (branch_mode == -1 && has_br_stack) + sort__mode = SORT_MODE__BRANCH; - /* sort__branch_mode could be 0 if --no-branch-stack */ - if (sort__branch_mode == 1) { + /* sort__mode could be NORMAL if --no-branch-stack */ + if (sort__mode == SORT_MODE__BRANCH) { /* * if no sort_order is provided, then specify * branch-mode specific order @@ -864,7 +867,7 @@ repeat: } if (report.mem_mode) { - if (sort__branch_mode == 1) { + if (sort__mode == SORT_MODE__BRANCH) { fprintf(stderr, "branch and mem mode incompatible\n"); goto error; } @@ -934,7 +937,7 @@ repeat: sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list, "comm", stdout); - if (sort__branch_mode == 1) { + if (sort__mode == SORT_MODE__BRANCH) { sort_entry__setup_elide(&sort_dso_from, symbol_conf.dso_from_list, "dso_from", stdout); sort_entry__setup_elide(&sort_dso_to, symbol_conf.dso_to_list, "dso_to", stdout); sort_entry__setup_elide(&sort_sym_from, symbol_conf.sym_from_list, "sym_from", stdout); -- cgit v1.2.3-18-g5258 From afab87b91f3f331d55664172dad8e476e6ffca9d Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 3 Apr 2013 21:26:11 +0900 Subject: perf sort: Separate out memory-specific sort keys Since they're used only for perf mem, separate out them to a different dimension so that normal user cannot access them by any chance. For global/local weights, I'm not entirely sure to place them into the memory dimension. But it's the only user at this time. Signed-off-by: Namhyung Kim Cc: Andi Kleen Cc: David Ahern Cc: Ingo Molnar Cc: Jiri Olsa Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1364991979-3008-3-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools/perf/builtin-report.c') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index c877982a64d..669405c9b8a 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -871,6 +871,8 @@ repeat: fprintf(stderr, "branch and mem mode incompatible\n"); goto error; } + sort__mode = SORT_MODE__MEMORY; + /* * if no sort_order is provided, then specify * branch-mode specific order -- cgit v1.2.3-18-g5258 From 08e71542fd0f4a0e30b4e3794329d63ae891e0c0 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 3 Apr 2013 21:26:19 +0900 Subject: perf sort: Consolidate sort_entry__setup_elide() The same code was duplicate to places, factor them out to common sort__setup_elide(). Signed-off-by: Namhyung Kim Cc: Andi Kleen Cc: David Ahern Cc: Ingo Molnar Cc: Jiri Olsa Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1364991979-3008-11-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 20 +------------------- 1 file changed, 1 insertion(+), 19 deletions(-) (limited to 'tools/perf/builtin-report.c') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 669405c9b8a..d45bf9b0361 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -937,25 +937,7 @@ repeat: report.symbol_filter_str = argv[0]; } - sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list, "comm", stdout); - - if (sort__mode == SORT_MODE__BRANCH) { - sort_entry__setup_elide(&sort_dso_from, symbol_conf.dso_from_list, "dso_from", stdout); - sort_entry__setup_elide(&sort_dso_to, symbol_conf.dso_to_list, "dso_to", stdout); - sort_entry__setup_elide(&sort_sym_from, symbol_conf.sym_from_list, "sym_from", stdout); - sort_entry__setup_elide(&sort_sym_to, symbol_conf.sym_to_list, "sym_to", stdout); - } else { - if (report.mem_mode) { - sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "symbol_daddr", stdout); - sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso_daddr", stdout); - sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "mem", stdout); - sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "local_weight", stdout); - sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "tlb", stdout); - sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "snoop", stdout); - } - sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", stdout); - sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", stdout); - } + sort__setup_elide(stdout); ret = __cmd_report(&report); if (ret == K_SWITCH_INPUT_DATA) { -- cgit v1.2.3-18-g5258 From 27a0dcb7adb52473dd98d285a46b764b9219d303 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 14 May 2013 11:09:02 +0900 Subject: perf hists: Move locking to its call-sites It's a preparation patch to eliminate unneeded locking in the perf report path. Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: David Ahern Cc: Ingo Molnar Cc: Jiri Olsa Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1368497347-9628-5-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) (limited to 'tools/perf/builtin-report.c') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index d45bf9b0361..63febd24e91 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -297,6 +297,7 @@ static int process_sample_event(struct perf_tool *tool, { struct perf_report *rep = container_of(tool, struct perf_report, tool); struct addr_location al; + int ret; if (perf_event__preprocess_sample(event, machine, &al, sample, rep->annotate_init) < 0) { @@ -311,28 +312,29 @@ static int process_sample_event(struct perf_tool *tool, if (rep->cpu_list && !test_bit(sample->cpu, rep->cpu_bitmap)) return 0; + pthread_mutex_lock(&evsel->hists.lock); + if (sort__mode == SORT_MODE__BRANCH) { - if (perf_report__add_branch_hist_entry(tool, &al, sample, - evsel, machine)) { + ret = perf_report__add_branch_hist_entry(tool, &al, sample, + evsel, machine); + if (ret < 0) pr_debug("problem adding lbr entry, skipping event\n"); - return -1; - } } else if (rep->mem_mode == 1) { - if (perf_report__add_mem_hist_entry(tool, &al, sample, - evsel, machine, event)) { + ret = perf_report__add_mem_hist_entry(tool, &al, sample, + evsel, machine, event); + if (ret < 0) pr_debug("problem adding mem entry, skipping event\n"); - return -1; - } } else { if (al.map != NULL) al.map->dso->hit = 1; - if (perf_evsel__add_hist_entry(evsel, &al, sample, machine)) { + ret = perf_evsel__add_hist_entry(evsel, &al, sample, machine); + if (ret < 0) pr_debug("problem incrementing symbol period, skipping event\n"); - return -1; - } } - return 0; + pthread_mutex_unlock(&evsel->hists.lock); + + return ret; } static int process_read_event(struct perf_tool *tool, -- cgit v1.2.3-18-g5258 From f3dd19817e5bbcae81e96571a3d42aa30a1581fb Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 14 May 2013 11:09:03 +0900 Subject: perf report: Don't bother locking when adding hist entries The 'perf report'command is single-threaded, so no need to grab a lock. Although the fast path of pthread_mutex_[un]lock() is very fast, there's a ~3% gain by eliminating it when we have huge sample data. $ perf record -a -F 100000 -o perf.data.bench -- perf bench sched all $ perf record -e cycles:upp -o perf.data.before -- \ > perf report -i perf.data.bench --stdio > /dev/null ... apply this patch ... $ perf record -e cycles:upp -o perf.data.after -- \ > perf report -i perf.data.bench --stdio > /dev/null $ perf diff perf.data.{before,after} | grep pthread +0.02% libpthread-2.15.so [.] _pthread_cleanup_push_defer +0.02% libpthread-2.15.so [.] _pthread_cleanup_pop_restore 0.05% -0.05% perf [.] pthread_mutex_unlock@plt 0.05% -0.05% perf [.] pthread_mutex_lock@plt 1.01% -1.01% libpthread-2.15.so [.] pthread_mutex_lock 1.68% -1.68% libpthread-2.15.so [.] __pthread_mutex_unlock_usercnt 0.05% -0.05% libpthread-2.15.so [.] pthread_mutex_unlock Signed-off-by: Namhyung Kim Acked-by: Jiri Olsa Cc: David Ahern Cc: Ingo Molnar Cc: Jiri Olsa Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1368497347-9628-6-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'tools/perf/builtin-report.c') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 63febd24e91..0f0cf2472d9 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -312,8 +312,6 @@ static int process_sample_event(struct perf_tool *tool, if (rep->cpu_list && !test_bit(sample->cpu, rep->cpu_bitmap)) return 0; - pthread_mutex_lock(&evsel->hists.lock); - if (sort__mode == SORT_MODE__BRANCH) { ret = perf_report__add_branch_hist_entry(tool, &al, sample, evsel, machine); @@ -332,8 +330,6 @@ static int process_sample_event(struct perf_tool *tool, if (ret < 0) pr_debug("problem incrementing symbol period, skipping event\n"); } - pthread_mutex_unlock(&evsel->hists.lock); - return ret; } -- cgit v1.2.3-18-g5258 From 064f19815c4e99e8b22bc3c5f4d7f4e0b96d226a Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 14 May 2013 11:09:04 +0900 Subject: perf report: Add --percent-limit option The --percent-limit option is for not showing small overhead entries in the output. Maybe we want to set a certain default value like 0.1. Signed-off-by: Namhyung Kim Acked-by: Pekka Enberg Cc: Andi Kleen Cc: David Ahern Cc: Ingo Molnar Cc: Jiri Olsa Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1368497347-9628-7-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) (limited to 'tools/perf/builtin-report.c') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 0f0cf2472d9..0a4979bdd4c 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -52,6 +52,7 @@ struct perf_report { symbol_filter_t annotate_init; const char *cpu_list; const char *symbol_filter_str; + float min_percent; DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); }; @@ -456,7 +457,7 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist, continue; hists__fprintf_nr_sample_events(rep, hists, evname, stdout); - hists__fprintf(hists, true, 0, 0, stdout); + hists__fprintf(hists, true, 0, 0, rep->min_percent, stdout); fprintf(stdout, "\n\n"); } @@ -575,8 +576,8 @@ static int __cmd_report(struct perf_report *rep) if (use_browser > 0) { if (use_browser == 1) { ret = perf_evlist__tui_browse_hists(session->evlist, - help, - NULL, + help, NULL, + rep->min_percent, &session->header.env); /* * Usually "ret" is the last pressed key, and we only @@ -587,7 +588,7 @@ static int __cmd_report(struct perf_report *rep) } else if (use_browser == 2) { perf_evlist__gtk_browse_hists(session->evlist, help, - NULL); + NULL, rep->min_percent); } } else perf_evlist__tty_browse_hists(session->evlist, rep, help); @@ -698,6 +699,16 @@ parse_branch_mode(const struct option *opt __maybe_unused, return 0; } +static int +parse_percent_limit(const struct option *opt, const char *str, + int unset __maybe_unused) +{ + struct perf_report *rep = opt->value; + + rep->min_percent = strtof(str, NULL); + return 0; +} + int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) { struct perf_session *session; @@ -807,6 +818,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) OPT_BOOLEAN(0, "demangle", &symbol_conf.demangle, "Disable symbol demangling"), OPT_BOOLEAN(0, "mem-mode", &report.mem_mode, "mem access profile"), + OPT_CALLBACK(0, "percent-limit", &report, "percent", + "Don't show entries under that percent", parse_percent_limit), OPT_END() }; -- cgit v1.2.3-18-g5258 From eec574e6bc3ee4558d4a282e0e3e1bd6dd0ad67b Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 14 May 2013 11:09:06 +0900 Subject: perf report: Add report.percent-limit config variable Now an user can set a default value of --percent-limit option into the perfconfig file. $ cat ~/.perfconfig [report] percent-limit = 0.1 Signed-off-by: Namhyung Kim Acked-by: Pekka Enberg Cc: Andi Kleen Cc: David Ahern Cc: Ingo Molnar Cc: Jiri Olsa Cc: Paul Mackerras Cc: Pekka Enberg Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/r/1368497347-9628-9-git-send-email-namhyung@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'tools/perf/builtin-report.c') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 0a4979bdd4c..ca98d34cd58 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -62,6 +62,11 @@ static int perf_report_config(const char *var, const char *value, void *cb) symbol_conf.event_group = perf_config_bool(var, value); return 0; } + if (!strcmp(var, "report.percent-limit")) { + struct perf_report *rep = cb; + rep->min_percent = strtof(value, NULL); + return 0; + } return perf_default_config(var, value, cb); } @@ -823,7 +828,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) OPT_END() }; - perf_config(perf_report_config, NULL); + perf_config(perf_report_config, &report); argc = parse_options(argc, argv, options, report_usage, 0); -- cgit v1.2.3-18-g5258 From 0276c22a3f22b7f6696fa07b0a77635726b2c0fd Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 10 Jun 2013 08:21:21 +0200 Subject: perf tools: Fix -x/--exclude-other option for report command Currently we have symbol_conf.exclude_other being set as true every time so the -x/--exclude-other has nothing to do. Also we have no way to see the data with symbol_conf.exclude_other being false which is useful sometimes. Fixing it by making symbol_conf.exclude_other false by default. 1) Example without -x option: $ perf report -i perf.data.delete -p perf_session__delete -s parent + 99.91% [other] + 0.08% perf_session__delete + 0.00% perf_session__delete_dead_threads + 0.00% perf_session__delete_threads 2) Example with -x option: $ ./perf report -i perf.data.delete -p perf_session__delete -s parent -x + 96.22% perf_session__delete + 1.89% perf_session__delete_dead_threads + 1.89% perf_session__delete_threads In Example 1) we get the sorted out data together with the rest "[other]". This could help us estimate how much time we spent in the sorted data. In Example 2) the total is just the sorted data. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: Corey Ashford Cc: David Ahern Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Namhyung Kim Cc: Paul Mackerras Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-sg8fvu0fyqohf9ur9l38lhkw@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'tools/perf/builtin-report.c') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index ca98d34cd58..3662047cc6b 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -939,8 +939,7 @@ repeat: */ if (!strstr(sort_order, "parent")) sort_parent.elide = 1; - } else - symbol_conf.exclude_other = false; + } if (argc) { /* -- cgit v1.2.3-18-g5258