aboutsummaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2012-03-12 20:46:35 +0100
committerIngo Molnar <mingo@elte.hu>2012-03-12 20:47:05 +0100
commitbea95c152dee1791dd02cbc708afbb115bb00f9a (patch)
treeaf9994c42c5fdd81ba3dadd7b812e2fa85273353 /tools
parentf9b4eeb809c6d031cc9561cc34dd691701cb2c2a (diff)
parent24bff2dc0f77b1f186b7bdf30060caf3df191a68 (diff)
Merge branch 'perf/hw-branch-sampling' into perf/core
Merge reason: The 'perf record -b' hardware branch sampling feature is ready for upstream. Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'tools')
-rw-r--r--tools/perf/Documentation/perf-record.txt30
-rw-r--r--tools/perf/Documentation/perf-report.txt10
-rw-r--r--tools/perf/builtin-record.c95
-rw-r--r--tools/perf/builtin-report.c178
-rw-r--r--tools/perf/perf.h18
-rw-r--r--tools/perf/util/event.h1
-rw-r--r--tools/perf/util/evsel.c14
-rw-r--r--tools/perf/util/header.c207
-rw-r--r--tools/perf/util/header.h2
-rw-r--r--tools/perf/util/hist.c122
-rw-r--r--tools/perf/util/hist.h11
-rw-r--r--tools/perf/util/session.c77
-rw-r--r--tools/perf/util/session.h4
-rw-r--r--tools/perf/util/sort.c287
-rw-r--r--tools/perf/util/sort.h11
-rw-r--r--tools/perf/util/symbol.h20
-rw-r--r--tools/perf/util/ui/browsers/hists.c102
17 files changed, 1023 insertions, 166 deletions
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index a5766b4b012..a1386b2fff0 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -152,6 +152,36 @@ an empty cgroup (monitor all the time) using, e.g., -G foo,,bar. Cgroups must ha
corresponding events, i.e., they always refer to events defined earlier on the command
line.
+-b::
+--branch-any::
+Enable taken branch stack sampling. Any type of taken branch may be sampled.
+This is a shortcut for --branch-filter any. See --branch-filter for more infos.
+
+-j::
+--branch-filter::
+Enable taken branch stack sampling. Each sample captures a series of consecutive
+taken branches. The number of branches captured with each sample depends on the
+underlying hardware, the type of branches of interest, and the executed code.
+It is possible to select the types of branches captured by enabling filters. The
+following filters are defined:
+
+ - any: any type of branches
+ - any_call: any function call or system call
+ - any_ret: any function return or system call return
+ - any_ind: any indirect branch
+ - u: only when the branch target is at the user level
+ - k: only when the branch target is in the kernel
+ - hv: only when the target is at the hypervisor level
+
++
+The option requires at least one branch type among any, any_call, any_ret, ind_call.
+The privilege levels may be ommitted, in which case, the privilege levels of the associated
+event are applied to the branch filter. Both kernel (k) and hypervisor (hv) privilege
+levels are subject to permissions. When sampling on multiple events, branch stack sampling
+is enabled for all the sampling events. The sampled branch type is the same for all events.
+The various filters must be specified as a comma separated list: --branch-filter any_ret,u,k
+Note that this feature may not be available on all processors.
+
SEE ALSO
--------
linkperf:perf-stat[1], linkperf:perf-list[1]
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 9b430e98712..87feeee8b90 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -153,6 +153,16 @@ OPTIONS
information which may be very large and thus may clutter the display.
It currently includes: cpu and numa topology of the host system.
+-b::
+--branch-stack::
+ Use the addresses of sampled taken branches instead of the instruction
+ address to build the histograms. To generate meaningful output, the
+ perf.data file must have been obtained using perf record -b or
+ perf record --branch-filter xxx where xxx is a branch filter option.
+ perf report is able to auto-detect whether a perf.data file contains
+ branch stacks and it will automatically switch to the branch view mode,
+ unless --no-branch-stack is used.
+
SEE ALSO
--------
linkperf:perf-stat[1], linkperf:perf-annotate[1]
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 75d230fef20..be4e1eee782 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -473,6 +473,9 @@ static int __cmd_record(struct perf_record *rec, int argc, const char **argv)
if (!have_tracepoints(&evsel_list->entries))
perf_header__clear_feat(&session->header, HEADER_TRACE_INFO);
+ if (!rec->opts.branch_stack)
+ perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
+
if (!rec->file_new) {
err = perf_session__read_header(session, output);
if (err < 0)
@@ -638,6 +641,90 @@ out_delete_session:
return err;
}
+#define BRANCH_OPT(n, m) \
+ { .name = n, .mode = (m) }
+
+#define BRANCH_END { .name = NULL }
+
+struct branch_mode {
+ const char *name;
+ int mode;
+};
+
+static const struct branch_mode branch_modes[] = {
+ BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
+ BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
+ BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
+ BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
+ BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
+ BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
+ BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
+ BRANCH_END
+};
+
+static int
+parse_branch_stack(const struct option *opt, const char *str, int unset)
+{
+#define ONLY_PLM \
+ (PERF_SAMPLE_BRANCH_USER |\
+ PERF_SAMPLE_BRANCH_KERNEL |\
+ PERF_SAMPLE_BRANCH_HV)
+
+ uint64_t *mode = (uint64_t *)opt->value;
+ const struct branch_mode *br;
+ char *s, *os = NULL, *p;
+ int ret = -1;
+
+ if (unset)
+ return 0;
+
+ /*
+ * cannot set it twice, -b + --branch-filter for instance
+ */
+ if (*mode)
+ return -1;
+
+ /* str may be NULL in case no arg is passed to -b */
+ if (str) {
+ /* because str is read-only */
+ s = os = strdup(str);
+ if (!s)
+ return -1;
+
+ for (;;) {
+ p = strchr(s, ',');
+ if (p)
+ *p = '\0';
+
+ for (br = branch_modes; br->name; br++) {
+ if (!strcasecmp(s, br->name))
+ break;
+ }
+ if (!br->name) {
+ ui__warning("unknown branch filter %s,"
+ " check man page\n", s);
+ goto error;
+ }
+
+ *mode |= br->mode;
+
+ if (!p)
+ break;
+
+ s = p + 1;
+ }
+ }
+ ret = 0;
+
+ /* default to any branch */
+ if ((*mode & ~ONLY_PLM) == 0) {
+ *mode = PERF_SAMPLE_BRANCH_ANY;
+ }
+error:
+ free(os);
+ return ret;
+}
+
static const char * const record_usage[] = {
"perf record [<options>] [<command>]",
"perf record [<options>] -- <command> [<options>]",
@@ -727,6 +814,14 @@ const struct option record_options[] = {
"monitor event in cgroup name only",
parse_cgroups),
OPT_STRING('u', "uid", &record.uid_str, "user", "user to profile"),
+
+ OPT_CALLBACK_NOOPT('b', "branch-any", &record.opts.branch_stack,
+ "branch any", "sample any taken branches",
+ parse_branch_stack),
+
+ OPT_CALLBACK('j', "branch-filter", &record.opts.branch_stack,
+ "branch filter mask", "branch stack filter modes",
+ parse_branch_stack),
OPT_END()
};
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 25d34d483e4..8e91c6eba18 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -53,6 +53,82 @@ struct perf_report {
DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
};
+static int perf_report__add_branch_hist_entry(struct perf_tool *tool,
+ struct addr_location *al,
+ struct perf_sample *sample,
+ struct perf_evsel *evsel,
+ struct machine *machine)
+{
+ struct perf_report *rep = container_of(tool, struct perf_report, tool);
+ struct symbol *parent = NULL;
+ int err = 0;
+ unsigned i;
+ struct hist_entry *he;
+ struct branch_info *bi, *bx;
+
+ if ((sort__has_parent || symbol_conf.use_callchain)
+ && sample->callchain) {
+ err = machine__resolve_callchain(machine, evsel, al->thread,
+ sample->callchain, &parent);
+ if (err)
+ return err;
+ }
+
+ bi = machine__resolve_bstack(machine, al->thread,
+ sample->branch_stack);
+ if (!bi)
+ return -ENOMEM;
+
+ for (i = 0; i < sample->branch_stack->nr; i++) {
+ if (rep->hide_unresolved && !(bi[i].from.sym && bi[i].to.sym))
+ continue;
+ /*
+ * The report shows the percentage of total branches captured
+ * and not events sampled. Thus we use a pseudo period of 1.
+ */
+ he = __hists__add_branch_entry(&evsel->hists, al, parent,
+ &bi[i], 1);
+ if (he) {
+ struct annotation *notes;
+ err = -ENOMEM;
+ bx = he->branch_info;
+ if (bx->from.sym && use_browser > 0) {
+ notes = symbol__annotation(bx->from.sym);
+ if (!notes->src
+ && symbol__alloc_hist(bx->from.sym) < 0)
+ goto out;
+
+ err = symbol__inc_addr_samples(bx->from.sym,
+ bx->from.map,
+ evsel->idx,
+ bx->from.al_addr);
+ if (err)
+ goto out;
+ }
+
+ if (bx->to.sym && use_browser > 0) {
+ notes = symbol__annotation(bx->to.sym);
+ if (!notes->src
+ && symbol__alloc_hist(bx->to.sym) < 0)
+ goto out;
+
+ err = symbol__inc_addr_samples(bx->to.sym,
+ bx->to.map,
+ evsel->idx,
+ bx->to.al_addr);
+ if (err)
+ goto out;
+ }
+ evsel->hists.stats.total_period += 1;
+ hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
+ err = 0;
+ } else
+ return -ENOMEM;
+ }
+out:
+ return err;
+}
+
static int perf_evsel__add_hist_entry(struct perf_evsel *evsel,
struct addr_location *al,
struct perf_sample *sample,
@@ -126,14 +202,21 @@ static int process_sample_event(struct perf_tool *tool,
if (rep->cpu_list && !test_bit(sample->cpu, rep->cpu_bitmap))
return 0;
- if (al.map != NULL)
- al.map->dso->hit = 1;
+ if (sort__branch_mode == 1) {
+ if (perf_report__add_branch_hist_entry(tool, &al, sample,
+ evsel, machine)) {
+ pr_debug("problem adding lbr entry, skipping event\n");
+ return -1;
+ }
+ } else {
+ if (al.map != NULL)
+ al.map->dso->hit = 1;
- if (perf_evsel__add_hist_entry(evsel, &al, sample, machine)) {
- pr_debug("problem incrementing symbol period, skipping event\n");
- return -1;
+ if (perf_evsel__add_hist_entry(evsel, &al, sample, machine)) {
+ pr_debug("problem incrementing symbol period, skipping event\n");
+ return -1;
+ }
}
-
return 0;
}
@@ -188,6 +271,15 @@ static int perf_report__setup_sample_type(struct perf_report *rep)
}
}
+ if (sort__branch_mode == 1) {
+ if (!(self->sample_type & PERF_SAMPLE_BRANCH_STACK)) {
+ fprintf(stderr, "selected -b but no branch data."
+ " Did you call perf record without"
+ " -b?\n");
+ return -1;
+ }
+ }
+
return 0;
}
@@ -246,7 +338,7 @@ static int __cmd_report(struct perf_report *rep)
{
int ret = -EINVAL;
u64 nr_samples;
- struct perf_session *session;
+ struct perf_session *session = rep->session;
struct perf_evsel *pos;
struct map *kernel_map;
struct kmap *kernel_kmap;
@@ -254,13 +346,6 @@ static int __cmd_report(struct perf_report *rep)
signal(SIGINT, sig_handler);
- session = perf_session__new(rep->input_name, O_RDONLY,
- rep->force, false, &rep->tool);
- if (session == NULL)
- return -ENOMEM;
-
- rep->session = session;
-
if (rep->cpu_list) {
ret = perf_session__cpu_bitmap(session, rep->cpu_list,
rep->cpu_bitmap);
@@ -427,9 +512,19 @@ setup:
return 0;
}
+static int
+parse_branch_mode(const struct option *opt __used, const char *str __used, int unset)
+{
+ sort__branch_mode = !unset;
+ return 0;
+}
+
int cmd_report(int argc, const char **argv, const char *prefix __used)
{
+ struct perf_session *session;
struct stat st;
+ bool has_br_stack = false;
+ int ret = -1;
char callchain_default_opt[] = "fractal,0.5,callee";
const char * const report_usage[] = {
"perf report [<options>]",
@@ -477,7 +572,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)
OPT_BOOLEAN(0, "stdio", &report.use_stdio,
"Use the stdio interface"),
OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
- "sort by key(s): pid, comm, dso, symbol, parent"),
+ "sort by key(s): pid, comm, dso, symbol, parent, dso_to,"
+ " dso_from, symbol_to, symbol_from, mispredict"),
OPT_BOOLEAN(0, "showcpuutilization", &symbol_conf.show_cpu_utilization,
"Show sample percentage for different cpu modes"),
OPT_STRING('p', "parent", &parent_pattern, "regex",
@@ -517,6 +613,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)
"Specify disassembler style (e.g. -M intel for intel syntax)"),
OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period,
"Show a column with the sum of periods"),
+ OPT_CALLBACK_NOOPT('b', "branch-stack", &sort__branch_mode, "",
+ "use branch records for histogram filling", parse_branch_mode),
OPT_END()
};
@@ -536,11 +634,36 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)
else
report.input_name = "perf.data";
}
+ session = perf_session__new(report.input_name, O_RDONLY,
+ report.force, false, &report.tool);
+ if (session == NULL)
+ return -ENOMEM;
- if (strcmp(report.input_name, "-") != 0)
+ report.session = session;
+
+ has_br_stack = perf_header__has_feat(&session->header,
+ HEADER_BRANCH_STACK);
+
+ if (sort__branch_mode == -1 && has_br_stack)
+ sort__branch_mode = 1;
+
+ /* sort__branch_mode could be 0 if --no-branch-stack */
+ if (sort__branch_mode == 1) {
+ /*
+ * if no sort_order is provided, then specify
+ * branch-mode specific order
+ */
+ if (sort_order == default_sort_order)
+ sort_order = "comm,dso_from,symbol_from,"
+ "dso_to,symbol_to";
+
+ }
+
+ if (strcmp(report.input_name, "-") != 0) {
setup_browser(true);
- else
+ } else {
use_browser = 0;
+ }
/*
* Only in the newt browser we are doing integrated annotation,
@@ -568,13 +691,13 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)
}
if (symbol__init() < 0)
- return -1;
+ goto error;
setup_sorting(report_usage, options);
if (parent_pattern != default_parent_pattern) {
if (sort_dimension__add("parent") < 0)
- return -1;
+ goto error;
/*
* Only show the parent fields if we explicitly
@@ -592,9 +715,20 @@ int cmd_report(int argc, const char **argv, const char *prefix __used)
if (argc)
usage_with_options(report_usage, options);
- sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", stdout);
sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list, "comm", stdout);
- sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", stdout);
- return __cmd_report(&report);
+ if (sort__branch_mode == 1) {
+ sort_entry__setup_elide(&sort_dso_from, symbol_conf.dso_from_list, "dso_from", stdout);
+ sort_entry__setup_elide(&sort_dso_to, symbol_conf.dso_to_list, "dso_to", stdout);
+ sort_entry__setup_elide(&sort_sym_from, symbol_conf.sym_from_list, "sym_from", stdout);
+ sort_entry__setup_elide(&sort_sym_to, symbol_conf.sym_to_list, "sym_to", stdout);
+ } else {
+ sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", stdout);
+ sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", stdout);
+ }
+
+ ret = __cmd_report(&report);
+error:
+ perf_session__delete(session);
+ return ret;
}
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index f0227e93665..eec392e4806 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -179,6 +179,23 @@ struct ip_callchain {
u64 ips[0];
};
+struct branch_flags {
+ u64 mispred:1;
+ u64 predicted:1;
+ u64 reserved:62;
+};
+
+struct branch_entry {
+ u64 from;
+ u64 to;
+ struct branch_flags flags;
+};
+
+struct branch_stack {
+ u64 nr;
+ struct branch_entry entries[0];
+};
+
extern bool perf_host, perf_guest;
extern const char perf_version_string[];
@@ -205,6 +222,7 @@ struct perf_record_opts {
unsigned int freq;
unsigned int mmap_pages;
unsigned int user_freq;
+ int branch_stack;
u64 default_interval;
u64 user_interval;
const char *cpu_list;
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index cbdeaad9c5e..1b197280c62 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -81,6 +81,7 @@ struct perf_sample {
u32 raw_size;
void *raw_data;
struct ip_callchain *callchain;
+ struct branch_stack *branch_stack;
};
#define BUILD_ID_SIZE 20
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 302d49a9f98..f421f7cbc0d 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -126,6 +126,10 @@ void perf_evsel__config(struct perf_evsel *evsel, struct perf_record_opts *opts)
attr->watermark = 0;
attr->wakeup_events = 1;
}
+ if (opts->branch_stack) {
+ attr->sample_type |= PERF_SAMPLE_BRANCH_STACK;
+ attr->branch_sample_type = opts->branch_stack;
+ }
attr->mmap = track;
attr->comm = track;
@@ -576,6 +580,16 @@ int perf_event__parse_sample(const union perf_event *event, u64 type,
data->raw_data = (void *) pdata;
}
+ if (type & PERF_SAMPLE_BRANCH_STACK) {
+ u64 sz;
+
+ data->branch_stack = (struct branch_stack *)array;
+ array++; /* nr */
+
+ sz = data->branch_stack->nr * sizeof(struct branch_entry);
+ sz /= sizeof(u64);
+ array += sz;
+ }
return 0;
}
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 9f867d96c6a..0d9b6da86a3 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -1023,6 +1023,12 @@ write_it:
return do_write_string(fd, buffer);
}
+static int write_branch_stack(int fd __used, struct perf_header *h __used,
+ struct perf_evlist *evlist __used)
+{
+ return 0;
+}
+
static void print_hostname(struct perf_header *ph, int fd, FILE *fp)
{
char *str = do_read_string(fd, ph);
@@ -1144,8 +1150,9 @@ static void print_event_desc(struct perf_header *ph, int fd, FILE *fp)
uint64_t id;
void *buf = NULL;
char *str;
- u32 nre, sz, nr, i, j, msz;
- int ret;
+ u32 nre, sz, nr, i, j;
+ ssize_t ret;
+ size_t msz;
/* number of events */
ret = read(fd, &nre, sizeof(nre));
@@ -1162,25 +1169,23 @@ static void print_event_desc(struct perf_header *ph, int fd, FILE *fp)
if (ph->needs_swap)
sz = bswap_32(sz);
- /*
- * ensure it is at least to our ABI rev
- */
- if (sz < (u32)sizeof(attr))
- goto error;
-
memset(&attr, 0, sizeof(attr));
- /* read entire region to sync up to next field */
+ /* buffer to hold on file attr struct */
buf = malloc(sz);
if (!buf)
goto error;
msz = sizeof(attr);
- if (sz < msz)
+ if (sz < (ssize_t)msz)
msz = sz;
for (i = 0 ; i < nre; i++) {
+ /*
+ * must read entire on-file attr struct to
+ * sync up with layout.
+ */
ret = read(fd, buf, sz);
if (ret != (ssize_t)sz)
goto error;
@@ -1316,6 +1321,12 @@ static void print_cpuid(struct perf_header *ph, int fd, FILE *fp)
free(str);
}
+static void print_branch_stack(struct perf_header *ph __used, int fd __used,
+ FILE *fp)
+{
+ fprintf(fp, "# contains samples with branch stack\n");
+}
+
static int __event_process_build_id(struct build_id_event *bev,
char *filename,
struct perf_session *session)
@@ -1520,6 +1531,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = {
FEAT_OPA(HEADER_CMDLINE, cmdline),
FEAT_OPF(HEADER_CPU_TOPOLOGY, cpu_topology),
FEAT_OPF(HEADER_NUMA_TOPOLOGY, numa_topology),
+ FEAT_OPA(HEADER_BRANCH_STACK, branch_stack),
};
struct header_print_data {
@@ -1804,35 +1816,101 @@ out_free:
return err;
}
-static int check_magic_endian(u64 *magic, struct perf_file_header *header,
- struct perf_header *ph)
+static const int attr_file_abi_sizes[] = {
+ [0] = PERF_ATTR_SIZE_VER0,
+ [1] = PERF_ATTR_SIZE_VER1,
+ 0,
+};
+
+/*
+ * In the legacy file format, the magic number is not used to encode endianness.
+ * hdr_sz was used to encode endianness. But given that hdr_sz can vary based
+ * on ABI revisions, we need to try all combinations for all endianness to
+ * detect the endianness.
+ */
+static int try_all_file_abis(uint64_t hdr_sz, struct perf_header *ph)
{
- int ret;
+ uint64_t ref_size, attr_size;
+ int i;
- /* check for legacy format */
- ret = memcmp(magic, __perf_magic1, sizeof(*magic));
- if (ret == 0) {
- pr_debug("legacy perf.data format\n");
- if (!header)
- return -1;
+ for (i = 0 ; attr_file_abi_sizes[i]; i++) {
+ ref_size = attr_file_abi_sizes[i]
+ + sizeof(struct perf_file_section);
+ if (hdr_sz != ref_size) {
+ attr_size = bswap_64(hdr_sz);
+ if (attr_size != ref_size)
+ continue;
- if (header->attr_size != sizeof(struct perf_file_attr)) {
- u64 attr_size = bswap_64(header->attr_size);
+ ph->needs_swap = true;
+ }
+ pr_debug("ABI%d perf.data file detected, need_swap=%d\n",
+ i,
+ ph->needs_swap);
+ return 0;
+ }
+ /* could not determine endianness */
+ return -1;
+}
- if (attr_size != sizeof(struct perf_file_attr))
- return -1;
+#define PERF_PIPE_HDR_VER0 16
+
+static const size_t attr_pipe_abi_sizes[] = {
+ [0] = PERF_PIPE_HDR_VER0,
+ 0,
+};
+
+/*
+ * In the legacy pipe format, there is an implicit assumption that endiannesss
+ * between host recording the samples, and host parsing the samples is the
+ * same. This is not always the case given that the pipe output may always be
+ * redirected into a file and analyzed on a different machine with possibly a
+ * different endianness and perf_event ABI revsions in the perf tool itself.
+ */
+static int try_all_pipe_abis(uint64_t hdr_sz, struct perf_header *ph)
+{
+ u64 attr_size;
+ int i;
+
+ for (i = 0 ; attr_pipe_abi_sizes[i]; i++) {
+ if (hdr_sz != attr_pipe_abi_sizes[i]) {
+ attr_size = bswap_64(hdr_sz);
+ if (attr_size != hdr_sz)
+ continue;
ph->needs_swap = true;
}
+ pr_debug("Pipe ABI%d perf.data file detected\n", i);
return 0;
}
+ return -1;
+}
+
+static int check_magic_endian(u64 magic, uint64_t hdr_sz,
+ bool is_pipe, struct perf_header *ph)
+{
+ int ret;
+
+ /* check for legacy format */
+ ret = memcmp(&magic, __perf_magic1, sizeof(magic));
+ if (ret == 0) {
+ pr_debug("legacy perf.data format\n");
+ if (is_pipe)
+ return try_all_pipe_abis(hdr_sz, ph);
+
+ return try_all_file_abis(hdr_sz, ph);
+ }
+ /*
+ * the new magic number serves two purposes:
+ * - unique number to identify actual perf.data files
+ * - encode endianness of file
+ */
- /* check magic number with same endianness */
- if (*magic == __perf_magic2)
+ /* check magic number with one endianness */
+ if (magic == __perf_magic2)
return 0;
- /* check magic number but opposite endianness */
- if (*magic != __perf_magic2_sw)
+ /* check magic number with opposite endianness */
+ if (magic != __perf_magic2_sw)
return -1;
ph->needs_swap = true;
@@ -1851,8 +1929,11 @@ int perf_file_header__read(struct perf_file_header *header,
if (ret <= 0)
return -1;
- if (check_magic_endian(&header->magic, header, ph) < 0)
+ if (check_magic_endian(header->magic,
+ header->attr_size, false, ph) < 0) {
+ pr_debug("magic/endian check failed\n");
return -1;
+ }
if (ph->needs_swap) {
mem_bswap_64(header, offsetof(struct perf_file_header,
@@ -1939,21 +2020,17 @@ static int perf_file_header__read_pipe(struct perf_pipe_file_header *header,
if (ret <= 0)
return -1;
- if (check_magic_endian(&header->magic, NULL, ph) < 0)
+ if (check_magic_endian(header->magic, header->size, true, ph) < 0) {
+ pr_debug("endian/magic failed\n");
return -1;
+ }
+
+ if (ph->needs_swap)
+ header->size = bswap_64(header->size);
if (repipe && do_write(STDOUT_FILENO, header, sizeof(*header)) < 0)
return -1;
- if (header->size != sizeof(*header)) {
- u64 size = bswap_64(header->size);
-
- if (size != sizeof(*header))
- return -1;
-
- ph->needs_swap = true;
- }
-
return 0;
}
@@ -1973,6 +2050,52 @@ static int perf_header__read_pipe(struct perf_session *session, int fd)
return 0;
}
+static int read_attr(int fd, struct perf_header *ph,
+ struct perf_file_attr *f_attr)
+{
+ struct perf_event_attr *attr = &f_attr->attr;
+ size_t sz, left;
+ size_t our_sz = sizeof(f_attr->attr);
+ int ret;
+
+ memset(f_attr, 0, sizeof(*f_attr));
+
+ /* read minimal guaranteed structure */
+ ret = readn(fd, attr, PERF_ATTR_SIZE_VER0);
+ if (ret <= 0) {
+ pr_debug("cannot read %d bytes of header attr\n",
+ PERF_ATTR_SIZE_VER0);
+ return -1;
+ }
+
+ /* on file perf_event_attr size */
+ sz = attr->size;
+
+ if (ph->needs_swap)
+ sz = bswap_32(sz);
+
+ if (sz == 0) {
+ /* assume ABI0 */
+ sz = PERF_ATTR_SIZE_VER0;
+ } else if (sz > our_sz) {
+ pr_debug("file uses a more recent and unsupported ABI"
+ " (%zu bytes extra)\n", sz - our_sz);
+ return -1;
+ }
+ /* what we have not yet read and that we know about */
+ left = sz - PERF_ATTR_SIZE_VER0;
+ if (left) {
+ void *ptr = attr;
+ ptr += PERF_ATTR_SIZE_VER0;
+
+ ret = readn(fd, ptr, left);
+ }
+ /* read perf_file_section, ids are read in caller */
+ ret = readn(fd, &f_attr->ids, sizeof(f_attr->ids));
+
+ return ret <= 0 ? -1 : 0;
+}
+
int perf_session__read_header(struct perf_session *session, int fd)
{
struct perf_header *header = &session->header;
@@ -1988,19 +2111,17 @@ int perf_session__read_header(struct perf_session *session, int fd)
if (session->fd_pipe)
return perf_header__read_pipe(session, fd);
- if (perf_file_header__read(&f_header, header, fd) < 0) {
- pr_debug("incompatible file format\n");
+ if (perf_file_header__read(&f_header, header, fd) < 0)
return -EINVAL;
- }
- nr_attrs = f_header.attrs.size / sizeof(f_attr);
+ nr_attrs = f_header.attrs.size / f_header.attr_size;
lseek(fd, f_header.attrs.offset, SEEK_SET);
for (i = 0; i < nr_attrs; i++) {
struct perf_evsel *evsel;
off_t tmp;
- if (readn(fd, &f_attr, sizeof(f_attr)) <= 0)
+ if (read_attr(fd, header, &f_attr) < 0)
goto out_errno;
if (header->needs_swap)
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index e68f617d082..21a6be09c12 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -27,7 +27,7 @@ enum {
HEADER_EVENT_DESC,
HEADER_CPU_TOPOLOGY,
HEADER_NUMA_TOPOLOGY,
-
+ HEADER_BRANCH_STACK,
HEADER_LAST_FEATURE,
HEADER_FEAT_BITS = 256,
};
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 6f505d1abac..8380c3db1c9 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -50,21 +50,25 @@ static void hists__reset_col_len(struct hists *hists)
hists__set_col_len(hists, col, 0);
}
+static void hists__set_unres_dso_col_len(struct hists *hists, int dso)
+{
+ const unsigned int unresolved_col_width = BITS_PER_LONG / 4;
+
+ if (hists__col_len(hists, dso) < unresolved_col_width &&
+ !symbol_conf.col_width_list_str && !symbol_conf.field_sep &&
+ !symbol_conf.dso_list)
+ hists__set_col_len(hists, dso, unresolved_col_width);
+}
+
static void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
{
+ const unsigned int unresolved_col_width = BITS_PER_LONG / 4;
u16 len;
if (h->ms.sym)
- hists__new_col_len(hists, HISTC_SYMBOL, h->ms.sym->namelen);
- else {
- const unsigned int unresolved_col_width = BITS_PER_LONG / 4;
-
- if (hists__col_len(hists, HISTC_DSO) < unresolved_col_width &&
- !symbol_conf.col_width_list_str && !symbol_conf.field_sep &&
- !symbol_conf.dso_list)
- hists__set_col_len(hists, HISTC_DSO,
- unresolved_col_width);
- }
+ hists__new_col_len(hists, HISTC_SYMBOL, h->ms.sym->namelen + 4);
+ else
+ hists__set_unres_dso_col_len(hists, HISTC_DSO);
len = thread__comm_len(h->thread);
if (hists__new_col_len(hists, HISTC_COMM, len))
@@ -74,6 +78,37 @@ static void hists__calc_col_len(struct hists *hists, struct hist_entry *h)
len = dso__name_len(h->ms.map->dso);
hists__new_col_len(hists, HISTC_DSO, len);
}
+
+ if (h->branch_info) {
+ int symlen;
+ /*
+ * +4 accounts for '[x] ' priv level info
+ * +2 account of 0x prefix on raw addresses
+ */
+ if (h->branch_info->from.sym) {
+ symlen = (int)h->branch_info->from.sym->namelen + 4;
+ hists__new_col_len(hists, HISTC_SYMBOL_FROM, symlen);
+
+ symlen = dso__name_len(h->branch_info->from.map->dso);
+ hists__new_col_len(hists, HISTC_DSO_FROM, symlen);
+ } else {
+ symlen = unresolved_col_width + 4 + 2;
+ hists__new_col_len(hists, HISTC_SYMBOL_FROM, symlen);
+ hists__set_unres_dso_col_len(hists, HISTC_DSO_FROM);
+ }
+
+ if (h->branch_info->to.sym) {
+ symlen = (int)h->branch_info->to.sym->namelen + 4;
+ hists__new_col_len(hists, HISTC_SYMBOL_TO, symlen);
+
+ symlen = dso__name_len(h->branch_info->to.map->dso);
+ hists__new_col_len(hists, HISTC_DSO_TO, symlen);
+ } else {
+ symlen = unresolved_col_width + 4 + 2;
+ hists__new_col_len(hists, HISTC_SYMBOL_TO, symlen);
+ hists__set_unres_dso_col_len(hists, HISTC_DSO_TO);
+ }
+ }
}
static void hist_entry__add_cpumode_period(struct hist_entry *he,
@@ -195,26 +230,14 @@ static u8 symbol__parent_filter(const struct symbol *parent)
return 0;
}
-struct hist_entry *__hists__add_entry(struct hists *hists,
+static struct hist_entry *add_hist_entry(struct hists *hists,
+ struct hist_entry *entry,
struct addr_location *al,
- struct symbol *sym_parent, u64 period)
+ u64 period)
{
struct rb_node **p;
struct rb_node *parent = NULL;
struct hist_entry *he;
- struct hist_entry entry = {
- .thread = al->thread,
- .ms = {
- .map = al->map,