aboutsummaryrefslogtreecommitdiff
path: root/net/tipc/bcast.h
diff options
context:
space:
mode:
Diffstat (limited to 'net/tipc/bcast.h')
-rw-r--r--net/tipc/bcast.h146
1 files changed, 19 insertions, 127 deletions
diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
index 4c1771e95c9..00330c45df3 100644
--- a/net/tipc/bcast.h
+++ b/net/tipc/bcast.h
@@ -2,7 +2,7 @@
* net/tipc/bcast.h: Include file for TIPC broadcast code
*
* Copyright (c) 2003-2006, Ericsson AB
- * Copyright (c) 2005, Wind River Systems
+ * Copyright (c) 2005, 2010-2011, Wind River Systems
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -39,31 +39,29 @@
#define MAX_NODES 4096
#define WSIZE 32
+#define TIPC_BCLINK_RESET 1
/**
* struct tipc_node_map - set of node identifiers
* @count: # of nodes in set
* @map: bitmap of node identifiers that are in the set
*/
-
struct tipc_node_map {
u32 count;
u32 map[MAX_NODES / WSIZE];
};
-
#define PLSIZE 32
/**
- * struct port_list - set of node local destination ports
+ * struct tipc_port_list - set of node local destination ports
* @count: # of ports in set (only valid for first entry in list)
* @next: pointer to next entry in list
* @ports: array of port references
*/
-
-struct port_list {
+struct tipc_port_list {
int count;
- struct port_list *next;
+ struct tipc_port_list *next;
u32 ports[PLSIZE];
};
@@ -72,139 +70,33 @@ struct tipc_node;
extern const char tipc_bclink_name[];
-
/**
- * nmap_add - add a node to a node map
+ * tipc_nmap_equal - test for equality of node maps
*/
-
-static inline void tipc_nmap_add(struct tipc_node_map *nm_ptr, u32 node)
-{
- int n = tipc_node(node);
- int w = n / WSIZE;
- u32 mask = (1 << (n % WSIZE));
-
- if ((nm_ptr->map[w] & mask) == 0) {
- nm_ptr->count++;
- nm_ptr->map[w] |= mask;
- }
-}
-
-/**
- * nmap_remove - remove a node from a node map
- */
-
-static inline void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node)
-{
- int n = tipc_node(node);
- int w = n / WSIZE;
- u32 mask = (1 << (n % WSIZE));
-
- if ((nm_ptr->map[w] & mask) != 0) {
- nm_ptr->map[w] &= ~mask;
- nm_ptr->count--;
- }
-}
-
-/**
- * nmap_equal - test for equality of node maps
- */
-
-static inline int tipc_nmap_equal(struct tipc_node_map *nm_a, struct tipc_node_map *nm_b)
+static inline int tipc_nmap_equal(struct tipc_node_map *nm_a,
+ struct tipc_node_map *nm_b)
{
return !memcmp(nm_a, nm_b, sizeof(*nm_a));
}
-/**
- * nmap_diff - find differences between node maps
- * @nm_a: input node map A
- * @nm_b: input node map B
- * @nm_diff: output node map A-B (i.e. nodes of A that are not in B)
- */
-
-static inline void tipc_nmap_diff(struct tipc_node_map *nm_a, struct tipc_node_map *nm_b,
- struct tipc_node_map *nm_diff)
-{
- int stop = ARRAY_SIZE(nm_a->map);
- int w;
- int b;
- u32 map;
-
- memset(nm_diff, 0, sizeof(*nm_diff));
- for (w = 0; w < stop; w++) {
- map = nm_a->map[w] ^ (nm_a->map[w] & nm_b->map[w]);
- nm_diff->map[w] = map;
- if (map != 0) {
- for (b = 0 ; b < WSIZE; b++) {
- if (map & (1 << b))
- nm_diff->count++;
- }
- }
- }
-}
-
-/**
- * port_list_add - add a port to a port list, ensuring no duplicates
- */
-
-static inline void tipc_port_list_add(struct port_list *pl_ptr, u32 port)
-{
- struct port_list *item = pl_ptr;
- int i;
- int item_sz = PLSIZE;
- int cnt = pl_ptr->count;
-
- for (; ; cnt -= item_sz, item = item->next) {
- if (cnt < PLSIZE)
- item_sz = cnt;
- for (i = 0; i < item_sz; i++)
- if (item->ports[i] == port)
- return;
- if (i < PLSIZE) {
- item->ports[i] = port;
- pl_ptr->count++;
- return;
- }
- if (!item->next) {
- item->next = kmalloc(sizeof(*item), GFP_ATOMIC);
- if (!item->next) {
- warn("Incomplete multicast delivery, no memory\n");
- return;
- }
- item->next->next = NULL;
- }
- }
-}
-
-/**
- * port_list_free - free dynamically created entries in port_list chain
- *
- * Note: First item is on stack, so it doesn't need to be released
- */
-
-static inline void tipc_port_list_free(struct port_list *pl_ptr)
-{
- struct port_list *item;
- struct port_list *next;
-
- for (item = pl_ptr->next; item; item = next) {
- next = item->next;
- kfree(item);
- }
-}
-
+void tipc_port_list_add(struct tipc_port_list *pl_ptr, u32 port);
+void tipc_port_list_free(struct tipc_port_list *pl_ptr);
-int tipc_bclink_init(void);
+int tipc_bclink_init(void);
void tipc_bclink_stop(void);
+void tipc_bclink_set_flags(unsigned int flags);
+void tipc_bclink_add_node(u32 addr);
+void tipc_bclink_remove_node(u32 addr);
+struct tipc_node *tipc_bclink_retransmit_to(void);
void tipc_bclink_acknowledge(struct tipc_node *n_ptr, u32 acked);
-int tipc_bclink_send_msg(struct sk_buff *buf);
-void tipc_bclink_recv_pkt(struct sk_buff *buf);
+int tipc_bclink_xmit(struct sk_buff *buf);
+void tipc_bclink_rcv(struct sk_buff *buf);
u32 tipc_bclink_get_last_sent(void);
u32 tipc_bclink_acks_missing(struct tipc_node *n_ptr);
-void tipc_bclink_check_gap(struct tipc_node *n_ptr, u32 seqno);
+void tipc_bclink_update_link_state(struct tipc_node *n_ptr, u32 last_sent);
int tipc_bclink_stats(char *stats_buf, const u32 buf_size);
int tipc_bclink_reset_stats(void);
int tipc_bclink_set_queue_limits(u32 limit);
-void tipc_bcbearer_sort(void);
-void tipc_bcbearer_push(void);
+void tipc_bcbearer_sort(struct tipc_node_map *nm_ptr, u32 node, bool action);
#endif
e'>-rw-r--r--tools/perf/arch/x86/util/tsc.h2
-rw-r--r--tools/perf/arch/x86/util/unwind-libdw.c51
-rw-r--r--tools/perf/arch/x86/util/unwind-libunwind.c (renamed from tools/perf/arch/x86/util/unwind.c)4
-rw-r--r--tools/perf/bench/bench.h3
-rw-r--r--tools/perf/bench/futex-hash.c212
-rw-r--r--tools/perf/bench/futex-requeue.c211
-rw-r--r--tools/perf/bench/futex-wake.c201
-rw-r--r--tools/perf/bench/futex.h71
-rw-r--r--tools/perf/bench/numa.c5
-rw-r--r--tools/perf/builtin-annotate.c8
-rw-r--r--tools/perf/builtin-bench.c14
-rw-r--r--tools/perf/builtin-buildid-cache.c33
-rw-r--r--tools/perf/builtin-diff.c59
-rw-r--r--tools/perf/builtin-inject.c5
-rw-r--r--tools/perf/builtin-kmem.c88
-rw-r--r--tools/perf/builtin-kvm.c13
-rw-r--r--tools/perf/builtin-lock.c10
-rw-r--r--tools/perf/builtin-mem.c15
-rw-r--r--tools/perf/builtin-probe.c35
-rw-r--r--tools/perf/builtin-record.c205
-rw-r--r--tools/perf/builtin-report.c376
-rw-r--r--tools/perf/builtin-sched.c92
-rw-r--r--tools/perf/builtin-stat.c11
-rw-r--r--tools/perf/builtin-timechart.c4
-rw-r--r--tools/perf/builtin-top.c130
-rw-r--r--tools/perf/builtin-trace.c32
-rw-r--r--tools/perf/config/Makefile296
-rw-r--r--tools/perf/config/Makefile.arch3
-rw-r--r--tools/perf/config/feature-checks/Makefile12
-rw-r--r--tools/perf/config/feature-checks/test-all.c10
-rw-r--r--tools/perf/config/feature-checks/test-libdw-dwarf-unwind.c13
-rw-r--r--tools/perf/config/feature-checks/test-on-exit.c16
-rw-r--r--tools/perf/design.txt13
-rw-r--r--tools/perf/perf-completion.sh6
-rw-r--r--tools/perf/perf-sys.h190
-rw-r--r--tools/perf/perf.c9
-rw-r--r--tools/perf/perf.h234
-rw-r--r--tools/perf/tests/attr.c7
-rw-r--r--tools/perf/tests/builtin-test.c70
-rw-r--r--tools/perf/tests/code-reading.c6
-rw-r--r--tools/perf/tests/dso-data.c216
-rw-r--r--tools/perf/tests/dwarf-unwind.c144
-rw-r--r--tools/perf/tests/evsel-tp-sched.c3
-rw-r--r--tools/perf/tests/hists_common.c209
-rw-r--r--tools/perf/tests/hists_common.h75
-rw-r--r--tools/perf/tests/hists_cumulate.c726
-rw-r--r--tools/perf/tests/hists_filter.c289
-rw-r--r--tools/perf/tests/hists_link.c208
-rw-r--r--tools/perf/tests/hists_output.c621
-rw-r--r--tools/perf/tests/keep-tracking.c2
-rw-r--r--tools/perf/tests/make34
-rw-r--r--tools/perf/tests/mmap-thread-lookup.c233
-rw-r--r--tools/perf/tests/parse-events.c144
-rw-r--r--tools/perf/tests/parse-no-sample-id-all.c2
-rw-r--r--tools/perf/tests/perf-time-to-tsc.c3
-rw-r--r--tools/perf/tests/rdpmc.c2
-rw-r--r--tools/perf/tests/sample-parsing.c19
-rw-r--r--tools/perf/tests/tests.h16
-rw-r--r--tools/perf/tests/thread-mg-share.c90
-rw-r--r--tools/perf/tests/vmlinux-kallsyms.c10
-rw-r--r--tools/perf/ui/browser.c2
-rw-r--r--tools/perf/ui/browser.h4
-rw-r--r--tools/perf/ui/browsers/hists.c353
-rw-r--r--tools/perf/ui/gtk/hists.c153
-rw-r--r--tools/perf/ui/hist.c463
-rw-r--r--tools/perf/ui/progress.h2
-rw-r--r--tools/perf/ui/setup.c2
-rw-r--r--tools/perf/ui/stdio/hist.c100
-rw-r--r--tools/perf/util/annotate.c23
-rw-r--r--tools/perf/util/annotate.h4
-rw-r--r--tools/perf/util/build-id.c2
-rw-r--r--tools/perf/util/build-id.h2
-rw-r--r--tools/perf/util/callchain.c123
-rw-r--r--tools/perf/util/callchain.h19
-rw-r--r--tools/perf/util/config.c4
-rw-r--r--tools/perf/util/cpumap.c162
-rw-r--r--tools/perf/util/cpumap.h35
-rw-r--r--tools/perf/util/data.c9
-rw-r--r--tools/perf/util/dso.c283
-rw-r--r--tools/perf/util/dso.h62
-rw-r--r--tools/perf/util/dwarf-aux.c7
-rw-r--r--tools/perf/util/event.c285
-rw-r--r--tools/perf/util/event.h42
-rw-r--r--tools/perf/util/evsel.c65
-rw-r--r--tools/perf/util/evsel.h27
-rw-r--r--tools/perf/util/fs.c119
-rw-r--r--tools/perf/util/fs.h7
-rw-r--r--tools/perf/util/header.h4
-rw-r--r--tools/perf/util/hist.c690
-rw-r--r--tools/perf/util/hist.h127
-rw-r--r--tools/perf/util/include/asm/hash.h6
-rw-r--r--tools/perf/util/include/linux/bitmap.h3
-rw-r--r--tools/perf/util/include/linux/bitops.h4
-rw-r--r--tools/perf/util/include/linux/export.h6
-rw-r--r--tools/perf/util/include/linux/hash.h5
-rw-r--r--tools/perf/util/include/linux/kernel.h6
-rw-r--r--tools/perf/util/include/linux/list.h2
-rw-r--r--tools/perf/util/include/linux/magic.h16
-rw-r--r--tools/perf/util/include/linux/prefetch.h6
-rw-r--r--tools/perf/util/include/linux/types.h29
-rw-r--r--tools/perf/util/machine.c174
-rw-r--r--tools/perf/util/machine.h15
-rw-r--r--tools/perf/util/map.c127
-rw-r--r--tools/perf/util/map.h29
-rw-r--r--tools/perf/util/pager.c12
-rw-r--r--tools/perf/util/parse-events.c17
-rw-r--r--tools/perf/util/parse-events.h3
-rw-r--r--tools/perf/util/parse-events.y14
-rw-r--r--tools/perf/util/parse-options.c37
-rw-r--r--tools/perf/util/parse-options.h8
-rw-r--r--tools/perf/util/perf_regs.c27
-rw-r--r--tools/perf/util/perf_regs.h15
-rw-r--r--tools/perf/util/pmu.c8
-rw-r--r--tools/perf/util/pmu.h2
-rw-r--r--tools/perf/util/probe-event.c878
-rw-r--r--tools/perf/util/probe-event.h12
-rw-r--r--tools/perf/util/probe-finder.c228
-rw-r--r--tools/perf/util/probe-finder.h5
-rw-r--r--tools/perf/util/python-ext-sources2
-rw-r--r--tools/perf/util/record.c2
-rw-r--r--tools/perf/util/scripting-engines/trace-event-perl.c1
-rw-r--r--tools/perf/util/scripting-engines/trace-event-python.c2
-rw-r--r--tools/perf/util/session.c18
-rw-r--r--tools/perf/util/sort.c630
-rw-r--r--tools/perf/util/sort.h28
-rw-r--r--tools/perf/util/stat.h2
-rw-r--r--tools/perf/util/svghelper.c2
-rw-r--r--tools/perf/util/svghelper.h2
-rw-r--r--tools/perf/util/symbol-elf.c14
-rw-r--r--tools/perf/util/symbol.c141
-rw-r--r--tools/perf/util/symbol.h19
-rw-r--r--tools/perf/util/thread.c71
-rw-r--r--tools/perf/util/thread.h14
-rw-r--r--tools/perf/util/top.h2
-rw-r--r--tools/perf/util/trace-event-parse.c1
-rw-r--r--tools/perf/util/types.h24
-rw-r--r--tools/perf/util/unwind-libdw.c210
-rw-r--r--tools/perf/util/unwind-libdw.h21
-rw-r--r--tools/perf/util/unwind-libunwind.c (renamed from tools/perf/util/unwind.c)52
-rw-r--r--tools/perf/util/unwind.h13
-rw-r--r--tools/perf/util/util.c5
-rw-r--r--tools/perf/util/util.h3
-rw-r--r--tools/perf/util/values.h2
168 files changed, 9785 insertions, 3086 deletions
diff --git a/tools/perf/Documentation/perf-bench.txt b/tools/perf/Documentation/perf-bench.txt
index 7065cd6fbdf..4464ad770d5 100644
--- a/tools/perf/Documentation/perf-bench.txt
+++ b/tools/perf/Documentation/perf-bench.txt
@@ -48,6 +48,12 @@ SUBSYSTEM
'mem'::
Memory access performance.
+'numa'::
+ NUMA scheduling and MM benchmarks.
+
+'futex'::
+ Futex stressing benchmarks.
+
'all'::
All benchmark subsystems.
@@ -187,6 +193,22 @@ Show only the result with page faults before memset.
--no-prefault::
Show only the result without page faults before memset.
+SUITES FOR 'numa'
+~~~~~~~~~~~~~~~~~
+*mem*::
+Suite for evaluating NUMA workloads.
+
+SUITES FOR 'futex'
+~~~~~~~~~~~~~~~~~~
+*hash*::
+Suite for evaluating hash tables.
+
+*wake*::
+Suite for evaluating wake calls.
+
+*requeue*::
+Suite for evaluating requeue calls.
+
SEE ALSO
--------
linkperf:perf[1]
diff --git a/tools/perf/Documentation/perf-diff.txt b/tools/perf/Documentation/perf-diff.txt
index fdfceee0ffd..b3b8abae62b 100644
--- a/tools/perf/Documentation/perf-diff.txt
+++ b/tools/perf/Documentation/perf-diff.txt
@@ -33,21 +33,25 @@ OPTIONS
-d::
--dsos=::
Only consider symbols in these dsos. CSV that understands
- file://filename entries.
+ file://filename entries. This option will affect the percentage
+ of the Baseline/Delta column. See --percentage for more info.
-C::
--comms=::
Only consider symbols in these comms. CSV that understands
- file://filename entries.
+ file://filename entries. This option will affect the percentage
+ of the Baseline/Delta column. See --percentage for more info.
-S::
--symbols=::
Only consider these symbols. CSV that understands
- file://filename entries.
+ file://filename entries. This option will affect the percentage
+ of the Baseline/Delta column. See --percentage for more info.
-s::
--sort=::
- Sort by key(s): pid, comm, dso, symbol.
+ Sort by key(s): pid, comm, dso, symbol, cpu, parent, srcline.
+ Please see description of --sort in the perf-report man page.
-t::
--field-separator=::
@@ -89,6 +93,14 @@ OPTIONS
--order::
Specify compute sorting column number.
+--percentage::
+ Determine how to display the overhead percentage of filtered entries.
+ Filters can be applied by --comms, --dsos and/or --symbols options.
+
+ "relative" means it's relative to filtered entries only so that the
+ sum of shown entries will be always 100%. "absolute" means it retains
+ the original value before and after the filter is applied.
+
COMPARISON
----------
The comparison is governed by the baseline file. The baseline perf.data
@@ -157,6 +169,10 @@ with:
- period_percent being the % of the hist entry period value within
single data file
+ - with filtering by -C, -d and/or -S, period_percent might be changed
+ relative to how entries are filtered. Use --percentage=absolute to
+ prevent such fluctuation.
+
ratio
~~~~~
If specified the 'Ratio' column is displayed with value 'r' computed as:
@@ -187,4 +203,4 @@ If specified the 'Weighted diff' column is displayed with value 'd' computed as:
SEE ALSO
--------
-linkperf:perf-record[1]
+linkperf:perf-record[1], linkperf:perf-report[1]
diff --git a/tools/perf/Documentation/perf-mem.txt b/tools/perf/Documentation/perf-mem.txt
index 888d51137fb..1d78a4064da 100644
--- a/tools/perf/Documentation/perf-mem.txt
+++ b/tools/perf/Documentation/perf-mem.txt
@@ -18,6 +18,10 @@ from it, into perf.data. Perf record options are accepted and are passed through
"perf mem -t <TYPE> report" displays the result. It invokes perf report with the
right set of options to display a memory access profile.
+Note that on Intel systems the memory latency reported is the use-latency,
+not the pure load (or store latency). Use latency includes any pipeline
+queueing delays in addition to the memory subsystem latency.
+
OPTIONS
-------
<command>...::
diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt
index b715cb71592..1513935c399 100644
--- a/tools/perf/Documentation/perf-probe.txt
+++ b/tools/perf/Documentation/perf-probe.txt
@@ -136,6 +136,8 @@ Each probe argument follows below syntax.
'NAME' specifies the name of this argument (optional). You can use the name of local variable, local data structure member (e.g. var->field, var.field2), local array with fixed index (e.g. array[1], var->array[0], var->pointer[2]), or kprobe-tracer argument format (e.g. $retval, %ax, etc). Note that the name of this argument will be set as the last member name if you specify a local data structure member (e.g. field2 for 'var->field1.field2'.)
'TYPE' casts the type of this argument (optional). If omitted, perf probe automatically set the type based on debuginfo. You can specify 'string' type only for the local variable or structure member which is an array of or a pointer to 'char' or 'unsigned char' type.
+On x86 systems %REG is always the short form of the register: for example %AX. %RAX or %EAX is not valid.
+
LINE SYNTAX
-----------
Line range is described by following syntax.
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index c71b0f36d9e..d460049cae8 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -184,9 +184,10 @@ following filters are defined:
- in_tx: only when the target is in a hardware transaction
- no_tx: only when the target is not in a hardware transaction
- abort_tx: only when the target is a hardware transaction abort
+ - cond: conditional branches
+
-The option requires at least one branch type among any, any_call, any_ret, ind_call.
+The option requires at least one branch type among any, any_call, any_ret, ind_call, cond.
The privilege levels may be omitted, in which case, the privilege levels of the associated
event are applied to the branch filter. Both kernel (k) and hypervisor (hv) privilege
levels are subject to permissions. When sampling on multiple events, branch stack sampling
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 8eab8a4bdeb..d2b59af62bc 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -25,10 +25,6 @@ OPTIONS
--verbose::
Be more verbose. (show symbol address, etc)
--d::
---dsos=::
- Only consider symbols in these dsos. CSV that understands
- file://filename entries.
-n::
--show-nr-samples::
Show the number of samples for each symbol
@@ -42,11 +38,18 @@ OPTIONS
-c::
--comms=::
Only consider symbols in these comms. CSV that understands
- file://filename entries.
+ file://filename entries. This option will affect the percentage of
+ the overhead column. See --percentage for more info.
+-d::
+--dsos=::
+ Only consider symbols in these dsos. CSV that understands
+ file://filename entries. This option will affect the percentage of
+ the overhead column. See --percentage for more info.
-S::
--symbols=::
Only consider these symbols. CSV that understands
- file://filename entries.
+ file://filename entries. This option will affect the percentage of
+ the overhead column. See --percentage for more info.
--symbol-filter=::
Only show symbols that match (partially) with this filter.
@@ -76,6 +79,15 @@ OPTIONS
abort cost. This is the global weight.
- local_weight: Local weight version of the weight above.
- transaction: Transaction abort flags.
+ - overhead: Overhead percentage of sample
+ - overhead_sys: Overhead percentage of sample running in system mode
+ - overhead_us: Overhead percentage of sample running in user mode
+ - overhead_guest_sys: Overhead percentage of sample running in system mode
+ on guest machine
+ - overhead_guest_us: Overhead percentage of sample running in user mode on
+ guest machine
+ - sample: Number of sample
+ - period: Raw number of event count of sample
By default, comm, dso and symbol keys are used.
(i.e. --sort comm,dso,symbol)
@@ -95,6 +107,32 @@ OPTIONS
And default sort keys are changed to comm, dso_from, symbol_from, dso_to
and symbol_to, see '--branch-stack'.
+-F::
+--fields=::
+ Specify output field - multiple keys can be specified in CSV format.
+ Following fields are available:
+ overhead, overhead_sys, overhead_us, overhead_children, sample and period.
+ Also it can contain any sort key(s).
+
+ By default, every sort keys not specified in -F will be appended
+ automatically.
+
+ If --mem-mode option is used, following sort keys are also available
+ (incompatible with --branch-stack):
+ symbol_daddr, dso_daddr, locked, tlb, mem, snoop, dcacheline.
+
+ - symbol_daddr: name of data symbol being executed on at the time of sample
+ - dso_daddr: name of library or module containing the data being executed
+ on at the time of sample
+ - locked: whether the bus was locked at the time of sample
+ - tlb: type of tlb access for the data at the time of sample
+ - mem: type of memory access for the data at the time of sample
+ - snoop: type of snoop (if any) for the data at the time of sample
+ - dcacheline: the cacheline the data address is on at the time of sample
+
+ And default sort keys are changed to local_weight, mem, sym, dso,
+ symbol_daddr, dso_daddr, snoop, tlb, locked, see '--mem-mode'.
+
-p::
--parent=<regex>::
A regex filter to identify parent. The parent is a caller of this
@@ -141,6 +179,11 @@ OPTIONS
Default: fractal,0.5,callee,function.
+--children::
+ Accumulate callchain of children to parent entry so that then can
+ show up in the output. The output will have a new "Children" column
+ and will be sorted on the data. It requires callchains are recorded.
+
--max-stack::
Set the stack depth limit when parsing the callchain, anything
beyond the specified depth will be ignored. This is a trade-off
@@ -233,10 +276,26 @@ OPTIONS
Demangle symbol names to human readable form. It's enabled by default,
disable with --no-demangle.
+--mem-mode::
+ Use the data addresses of samples in addition to instruction addresses
+ to build the histograms. To generate meaningful output, the perf.data
+ file must have been obtained using perf record -d -W and using a
+ special event -e cpu/mem-loads/ or -e cpu/mem-stores/. See
+ 'perf mem' for simpler access.
+
--percent-limit::
Do not show entries which have an overhead under that percent.
(Default: 0).
+--percentage::
+ Determine how to display the overhead percentage of filtered entries.
+ Filters can be applied by --comms, --dsos and/or --symbols options and
+ Zoom operations on the TUI (thread, dso, etc).
+
+ "relative" means it's relative to filtered entries only so that the
+ sum of shown entries will be always 100%. "absolute" means it retains
+ the original value before and after the filter is applied.
+
--header::
Show header information in the perf.data file. This includes
various information like hostname, OS and perf version, cpu/mem
diff --git a/tools/perf/Documentation/perf-timechart.txt b/tools/perf/Documentation/perf-timechart.txt
index bc5990c33dc..5e0f986dff3 100644
--- a/tools/perf/Documentation/perf-timechart.txt
+++ b/tools/perf/Documentation/perf-timechart.txt
@@ -43,27 +43,6 @@ TIMECHART OPTIONS
--symfs=<directory>::
Look for files with symbols relative to this directory.
-
-EXAMPLES
---------
-
-$ perf timechart record git pull
-
- [ perf record: Woken up 13 times to write data ]
- [ perf record: Captured and wrote 4.253 MB perf.data (~185801 samples) ]
-
-$ perf timechart
-
- Written 10.2 seconds of trace to output.svg.
-
-Record system-wide timechart:
-
- $ perf timechart record
-
- then generate timechart and highlight 'gcc' tasks:
-
- $ perf timechart --highlight gcc
-
-n::
--proc-num::
Print task info for at least given number of tasks.
@@ -88,6 +67,26 @@ RECORD OPTIONS
--callchain::
Do call-graph (stack chain/backtrace) recording
+EXAMPLES
+--------
+
+$ perf timechart record git pull
+
+ [ perf record: Woken up 13 times to write data ]
+ [ perf record: Captured and wrote 4.253 MB perf.data (~185801 samples) ]
+
+$ perf timechart
+
+ Written 10.2 seconds of trace to output.svg.
+
+Record system-wide timechart:
+
+ $ perf timechart record
+
+ then generate timechart and highlight 'gcc' tasks:
+
+ $ perf timechart --highlight gcc
+
SEE ALSO
--------
linkperf:perf-record[1]
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index cdd8d4946db..180ae02137a 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -87,7 +87,6 @@ Default is to monitor all CPUS.
--realtime=<priority>::
Collect data with this RT SCHED_FIFO priority.
--s <symbol>::
--sym-annotate=<symbol>::
Annotate this symbol.
@@ -114,7 +113,17 @@ Default is to monitor all CPUS.
-s::
--sort::
Sort by key(s): pid, comm, dso, symbol, parent, srcline, weight,
- local_weight, abort, in_tx, transaction
+ local_weight, abort, in_tx, transaction, overhead, sample, period.
+ Please see description of --sort in the perf-report man page.
+
+--fields=::
+ Specify output field - multiple keys can be specified in CSV format.
+ Following fields are available:
+ overhead, overhead_sys, overhead_us, overhead_children, sample and period.
+ Also it can contain any sort key(s).
+
+ By default, every sort keys not specified in --field will be appended
+ automatically.
-n::
--show-nr-samples::
@@ -124,13 +133,16 @@ Default is to monitor all CPUS.
Show a column with the sum of periods.
--dsos::
- Only consider symbols in these dsos.
+ Only consider symbols in these dsos. This option will affect the
+ percentage of the overhead column. See --percentage for more info.
--comms::
- Only consider symbols in these comms.
+ Only consider symbols in these comms. This option will affect the
+ percentage of the overhead column. See --percentage for more info.
--symbols::
- Only consider these symbols.
+ Only consider these symbols. This option will affect the
+ percentage of the overhead column. See --percentage for more info.
-M::
--disassembler-style=:: Set disassembler style for objdump.
@@ -149,6 +161,12 @@ Default is to monitor all CPUS.
Setup and enable call-graph (stack chain/backtrace) recording,
implies -g.
+--children::
+ Accumulate callchain of children to parent entry so that then can
+ show up in the output. The output will have a new "Children" column
+ and will be sorted on the data. It requires -g/--call-graph option
+ enabled.
+
--max-stack::
Set the stack depth limit when parsing the callchain, anything
beyond the specified depth will be ignored. This is a trade-off
@@ -166,6 +184,15 @@ Default is to monitor all CPUS.
Do not show entries which have an overhead under that percent.
(Default: 0).
+--percentage::
+ Determine how to display the overhead percentage of filtered entries.
+ Filters can be applied by --comms, --dsos and/or --symbols options and
+ Zoom operations on the TUI (thread, dso, etc).
+
+ "relative" means it's relative to filtered entries only so that the
+ sum of shown entries will be always 100%. "absolute" means it retains
+ the original value before and after the filter is applied.
+
INTERACTIVE PROMPTING KEYS
--------------------------
@@ -201,4 +228,4 @@ Pressing any unmapped key displays a menu, and prompts for input.
SEE ALSO
--------
-linkperf:perf-stat[1], linkperf:perf-list[1]
+linkperf:perf-stat[1], linkperf:perf-list[1], linkperf:perf-report[1]
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index f41572d0dd7..45da209b6ed 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -6,6 +6,9 @@ tools/lib/symbol/kallsyms.c
tools/lib/symbol/kallsyms.h
tools/include/asm/bug.h
tools/include/linux/compiler.h
+tools/include/linux/hash.h
+tools/include/linux/export.h
+tools/include/linux/types.h
include/linux/const.h
include/linux/perf_event.h
include/linux/rbtree.h
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 7257e7e9e38..9670a16fa57 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -7,6 +7,8 @@ include config/utilities.mak
# Define V to have a more verbose compile.
#
+# Define VF to have a more verbose feature check output.
+#
# Define O to save output files in a separate directory.
#
# Define ARCH as name of target architecture if you want cross-builds.
@@ -55,6 +57,9 @@ include config/utilities.mak
# Define NO_LIBAUDIT if you do not want libaudit support
#
# Define NO_LIBBIONIC if you do not want bionic support
+#
+# Define NO_LIBDW_DWARF_UNWIND if you do not want libdw support
+# for dwarf backtrace post unwind.
ifeq ($(srctree),)
srctree := $(patsubst %/,%,$(dir $(shell pwd)))
@@ -187,13 +192,13 @@ endif
export PERL_PATH
$(OUTPUT)util/parse-events-flex.c: util/parse-events.l $(OUTPUT)util/parse-events-bison.c
- $(QUIET_FLEX)$(FLEX) --header-file=$(OUTPUT)util/parse-events-flex.h $(PARSER_DEBUG_FLEX) -t util/parse-events.l > $(OUTPUT)util/parse-events-flex.c
+ $(QUIET_FLEX)$(FLEX) -o $@ --header-file=$(OUTPUT)util/parse-events-flex.h $(PARSER_DEBUG_FLEX) util/parse-events.l
$(OUTPUT)util/parse-events-bison.c: util/parse-events.y
$(QUIET_BISON)$(BISON) -v util/parse-events.y -d $(PARSER_DEBUG_BISON) -o $(OUTPUT)util/parse-events-bison.c -p parse_events_
$(OUTPUT)util/pmu-flex.c: util/pmu.l $(OUTPUT)util/pmu-bison.c
- $(QUIET_FLEX)$(FLEX) --header-file=$(OUTPUT)util/pmu-flex.h -t util/pmu.l > $(OUTPUT)util/pmu-flex.c
+ $(QUIET_FLEX)$(FLEX) -o $@ --header-file=$(OUTPUT)util/pmu-flex.h util/pmu.l
$(OUTPUT)util/pmu-bison.c: util/pmu.y
$(QUIET_BISON)$(BISON) -v util/pmu.y -d -o $(OUTPUT)util/pmu-bison.c -p perf_pmu_
@@ -208,7 +213,7 @@ LIB_H += ../../include/uapi/linux/perf_event.h
LIB_H += ../../include/linux/rbtree.h
LIB_H += ../../include/linux/list.h
LIB_H += ../../include/uapi/linux/const.h
-LIB_H += ../../include/linux/hash.h
+LIB_H += ../include/linux/hash.h
LIB_H += ../../include/linux/stringify.h
LIB_H += util/include/linux/bitmap.h
LIB_H += util/include/linux/bitops.h
@@ -217,14 +222,12 @@ LIB_H += util/include/linux/const.h
LIB_H += util/include/linux/ctype.h
LIB_H += util/include/linux/kernel.h
LIB_H += util/include/linux/list.h
-LIB_H += util/include/linux/export.h
-LIB_H += util/include/linux/magic.h
+LIB_H += ../include/linux/export.h
LIB_H += util/include/linux/poison.h
-LIB_H += util/include/linux/prefetch.h
LIB_H += util/include/linux/rbtree.h
LIB_H += util/include/linux/rbtree_augmented.h
LIB_H += util/include/linux/string.h
-LIB_H += util/include/linux/types.h
+LIB_H += ../include/linux/types.h
LIB_H += util/include/linux/linkage.h
LIB_H += util/include/asm/asm-offsets.h
LIB_H += ../include/asm/bug.h
@@ -244,13 +247,11 @@ LIB_H += util/cache.h
LIB_H += util/callchain.h
LIB_H += util/build-id.h
LIB_H += util/debug.h
-LIB_H += util/fs.h
LIB_H += util/pmu.h
LIB_H += util/event.h
LIB_H += util/evsel.h
LIB_H += util/evlist.h
LIB_H += util/exec_cmd.h
-LIB_H += util/types.h
LIB_H += util/levenshtein.h
LIB_H += util/machine.h
LIB_H += util/map.h
@@ -306,7 +307,6 @@ LIB_OBJS += $(OUTPUT)util/annotate.o
LIB_OBJS += $(OUTPUT)util/build-id.o
LIB_OBJS += $(OUTPUT)util/config.o
LIB_OBJS += $(OUTPUT)util/ctype.o
-LIB_OBJS += $(OUTPUT)util/fs.o
LIB_OBJS += $(OUTPUT)util/pmu.o
LIB_OBJS += $(OUTPUT)util/environment.o
LIB_OBJS += $(OUTPUT)util/event.o
@@ -396,7 +396,11 @@ LIB_OBJS += $(OUTPUT)tests/rdpmc.o
LIB_OBJS += $(OUTPUT)tests/evsel-roundtrip-name.o
LIB_OBJS += $(OUTPUT)tests/evsel-tp-sched.o
LIB_OBJS += $(OUTPUT)tests/pmu.o
+LIB_OBJS += $(OUTPUT)tests/hists_common.o
LIB_OBJS += $(OUTPUT)tests/hists_link.o
+LIB_OBJS += $(OUTPUT)tests/hists_filter.o
+LIB_OBJS += $(OUTPUT)tests/hists_output.o
+LIB_OBJS += $(OUTPUT)tests/hists_cumulate.o
LIB_OBJS += $(OUTPUT)tests/python-use.o
LIB_OBJS += $(OUTPUT)tests/bp_signal.o
LIB_OBJS += $(OUTPUT)tests/bp_signal_overflow.o
@@ -408,6 +412,13 @@ endif
LIB_OBJS += $(OUTPUT)tests/code-reading.o
LIB_OBJS += $(OUTPUT)tests/sample-parsing.o
LIB_OBJS += $(OUTPUT)tests/parse-no-sample-id-all.o
+ifndef NO_DWARF_UNWIND
+ifeq ($(ARCH),$(filter $(ARCH),x86 arm))
+LIB_OBJS += $(OUTPUT)tests/dwarf-unwind.o
+endif
+endif
+LIB_OBJS += $(OUTPUT)tests/mmap-thread-lookup.o
+LIB_OBJS += $(OUTPUT)tests/thread-mg-share.o
BUILTIN_OBJS += $(OUTPUT)builtin-annotate.o
BUILTIN_OBJS += $(OUTPUT)builtin-bench.o
@@ -420,6 +431,9 @@ BUILTIN_OBJS += $(OUTPUT)bench/mem-memset-x86-64-asm.o
endif
BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o
BUILTIN_OBJS += $(OUTPUT)bench/mem-memset.o
+BUILTIN_OBJS += $(OUTPUT)bench/futex-hash.o
+BUILTIN_OBJS += $(OUTPUT)bench/futex-wake.o
+BUILTIN_OBJS += $(OUTPUT)bench/futex-requeue.o
BUILTIN_OBJS += $(OUTPUT)builtin-diff.o
BUILTIN_OBJS += $(OUTPUT)builtin-evlist.o
@@ -475,8 +489,13 @@ ifndef NO_DWARF
endif # NO_DWARF
endif # NO_LIBELF
+ifndef NO_LIBDW_DWARF_UNWIND
+ LIB_OBJS += $(OUTPUT)util/unwind-libdw.o
+ LIB_H += util/unwind-libdw.h
+endif
+
ifndef NO_LIBUNWIND
- LIB_OBJS += $(OUTPUT)util/unwind.o
+ LIB_OBJS += $(OUTPUT)util/unwind-libunwind.o
endif
LIB_OBJS += $(OUTPUT)tests/keep-tracking.o
@@ -533,6 +552,7 @@ ifeq ($(NO_PERF_REGS),0)
ifeq ($(ARCH),x86)
LIB_H += arch/x86/include/perf_regs.h
endif
+ LIB_OBJS += $(OUTPUT)util/perf_regs.o
endif
ifndef NO_LIBNUMA
@@ -574,7 +594,7 @@ $(GTK_OBJS): $(OUTPUT)%.o: %.c $(LIB_H)
$(QUIET_CC)$(CC) -o $@ -c -fPIC $(CFLAGS) $(GTK_CFLAGS) $<
$(OUTPUT)libperf-gtk.so: $(GTK_OBJS) $(PERFLIBS)
- $(QUIET_LINK)$(CC) -o $@ -shared $(ALL_LDFLAGS) $(filter %.o,$^) $(GTK_LIBS)
+ $(QUIET_LINK)$(CC) -o $@ -shared $(LDFLAGS) $(filter %.o,$^) $(GTK_LIBS)
$(OUTPUT)builtin-help.o: builtin-help.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS
$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \
@@ -655,6 +675,9 @@ $(OUTPUT)tests/python-use.o: tests/python-use.c $(OUTPUT)PERF-CFLAGS
-DPYTHON='"$(PYTHON_WORD)"' \
$<
+$(OUTPUT)tests/dwarf-unwind.o: tests/dwarf-unwind.c
+ $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -fno-optimize-sibling-calls $<
+
$(OUTPUT)util/config.o: util/config.c $(OUTPUT)PERF-CFLAGS
$(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $<
@@ -707,9 +730,15 @@ $(patsubst perf-%,%.o,$(PROGRAMS)): $(LIB_H) $(wildcard */*.h)
# we depend the various files onto their directories.
DIRECTORY_DEPS = $(LIB_OBJS) $(BUILTIN_OBJS) $(GTK_OBJS)
DIRECTORY_DEPS += $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h
-$(DIRECTORY_DEPS): | $(sort $(dir $(DIRECTORY_DEPS)))
+# no need to add flex objects, because they depend on bison ones
+DIRECTORY_DEPS += $(OUTPUT)util/parse-events-bison.c
+DIRECTORY_DEPS += $(OUTPUT)util/pmu-bison.c
+
+OUTPUT_DIRECTORIES := $(sort $(dir $(DIRECTORY_DEPS)))
+
+$(DIRECTORY_DEPS): | $(OUTPUT_DIRECTORIES)
# In the second step, we make a rule to actually create these directories
-$(sort $(dir $(DIRECTORY_DEPS))):
+$(OUTPUT_DIRECTORIES):
$(QUIET_MKDIR)$(MKDIR) -p $@ 2>/dev/null
$(LIB_FILE): $(LIB_OBJS)
@@ -760,8 +789,8 @@ help:
@echo ''
@echo 'Perf install targets:'
@echo ' NOTE: documentation build requires asciidoc, xmlto packages to be installed'
- @echo ' HINT: use "make prefix=<path> <install target>" to install to a particular'
- @echo ' path like make prefix=/usr/local install install-doc'
+ @echo ' HINT: use "prefix" or "DESTDIR" to install to a particular'
+ @echo ' path like "make prefix=/usr/local install install-doc"'
@echo ' install - install compiled binaries'
@echo ' install-doc - install *all* documentation'
@echo ' install-man - install manpage documentation'
@@ -786,17 +815,20 @@ INSTALL_DOC_TARGETS += quick-install-doc quick-install-man quick-install-html
$(DOC_TARGETS):
$(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) $(@:doc=all)
+TAG_FOLDERS= . ../lib/traceevent ../lib/api ../lib/symbol
+TAG_FILES= ../../include/uapi/linux/perf_event.h
+
TAGS:
- $(RM) TAGS
- $(FIND) . -name '*.[hcS]' -print | xargs etags -a
+ $(QUIET_GEN)$(RM) TAGS; \
+ $(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print | xargs etags -a $(TAG_FILES)
tags:
- $(RM) tags
- $(FIND) . -name '*.[hcS]' -print | xargs ctags -a
+ $(QUIET_GEN)$(RM) tags; \
+ $(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print | xargs ctags -a $(TAG_FILES)
cscope:
- $(RM) cscope*
- $(FIND) . -name '*.[hcS]' -print | xargs cscope -b
+ $(QUIET_GEN)$(RM) cscope*; \
+ $(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print | xargs cscope -b $(TAG_FILES)
### Detect prefix changes
TRACK_CFLAGS = $(subst ','\'',$(CFLAGS)):\
@@ -886,7 +918,7 @@ config-clean:
clean: $(LIBTRACEEVENT)-clean $(LIBAPIKFS)-clean config-clean
$(call QUIET_CLEAN, core-objs) $(RM) $(LIB_OBJS) $(BUILTIN_OBJS) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf.o $(LANG_BINDINGS) $(GTK_OBJS)
$(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf
- $(call QUIET_CLEAN, core-gen) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)PERF-CFLAGS $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex*
+ $(call QUIET_CLEAN, core-gen) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)PERF-CFLAGS $(OUTPUT)PERF-FEATURES $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex*
$(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean
$(python-clean)
diff --git a/tools/perf/arch/arm/Makefile b/tools/perf/arch/arm/Makefile
index fe9b61e322a..09d62153d38 100644
--- a/tools/perf/arch/arm/Makefile
+++ b/tools/perf/arch/arm/Makefile
@@ -3,5 +3,12 @@ PERF_HAVE_DWARF_REGS := 1
LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o
endif
ifndef NO_LIBUNWIND
-LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind.o
+LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libunwind.o
+endif
+ifndef NO_LIBDW_DWARF_UNWIND
+LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libdw.o
+endif
+ifndef NO_DWARF_UNWIND
+LIB_OBJS += $(OUTPUT)arch/$(ARCH)/tests/regs_load.o
+LIB_OBJS += $(OUTPUT)arch/$(ARCH)/tests/dwarf-unwind.o
endif
diff --git a/tools/perf/arch/arm/include/perf_regs.h b/tools/perf/arch/arm/include/perf_regs.h
index 2a1cfde66b6..f619c9c5a4b 100644
--- a/tools/perf/arch/arm/include/perf_regs.h
+++ b/tools/perf/arch/arm/include/perf_regs.h
@@ -2,10 +2,15 @@
#define ARCH_PERF_REGS_H
#include <stdlib.h>
-#include "../../util/types.h"
+#include <linux/types.h>
#include <asm/perf_regs.h>
+void perf_regs_load(u64 *regs);
+
#define PERF_REGS_MASK ((1ULL << PERF_REG_ARM_MAX) - 1)
+#define PERF_REGS_MAX PERF_REG_ARM_MAX
+#define PERF_SAMPLE_REGS_ABI PERF_SAMPLE_REGS_ABI_32
+
#define PERF_REG_IP PERF_REG_ARM_PC
#define PERF_REG_SP PERF_REG_ARM_SP
diff --git a/tools/perf/arch/arm/tests/dwarf-unwind.c b/tools/perf/arch/arm/tests/dwarf-unwind.c
new file mode 100644
index 00000000000..9f870d27cb3
--- /dev/null
+++ b/tools/perf/arch/arm/tests/dwarf-unwind.c
@@ -0,0 +1,60 @@
+#include <string.h>
+#include "perf_regs.h"
+#include "thread.h"
+#include "map.h"
+#include "event.h"
+#include "tests/tests.h"
+
+#define STACK_SIZE 8192
+
+static int sample_ustack(struct perf_sample *sample,
+ struct thread *thread, u64 *regs)
+{
+ struct stack_dump *stack = &sample->user_stack;
+ struct map *map;
+ unsigned long sp;
+ u64 stack_size, *buf;
+
+ buf = malloc(STACK_SIZE);
+ if (!buf) {
+ pr_debug("failed to allocate sample uregs data\n");
+ return -1;
+ }
+
+ sp = (unsigned long) regs[PERF_REG_ARM_SP];
+
+ map = map_groups__find(thread->mg, MAP__VARIABLE, (u64) sp);
+ if (!map) {
+ pr_debug("failed to get stack map\n");
+ free(buf);
+ return -1;
+ }
+
+ stack_size = map->end - sp;
+ stack_size = stack_size > STACK_SIZE ? STACK_SIZE : stack_size;
+
+ memcpy(buf, (void *) sp, stack_size);
+ stack->data = (char *) buf;
+ stack->size = stack_size;
+ return 0;
+}
+
+int test__arch_unwind_sample(struct perf_sample *sample,
+ struct thread *thread)
+{
+ struct regs_dump *regs = &sample->user_regs;
+ u64 *buf;
+
+ buf = calloc(1, sizeof(u64) * PERF_REGS_MAX);
+ if (!buf) {
+ pr_debug("failed to allocate sample uregs data\n");
+ return -1;
+ }
+
+ perf_regs_load(buf);
+ regs->abi = PERF_SAMPLE_REGS_ABI;
+ regs->regs = buf;
+ regs->mask = PERF_REGS_MASK;
+
+ return sample_ustack(sample, thread, buf);
+}
diff --git a/tools/perf/arch/arm/tests/regs_load.S b/tools/perf/arch/arm/tests/regs_load.S
new file mode 100644
index 00000000000..e09e983946f
--- /dev/null
+++ b/tools/perf/arch/arm/tests/regs_load.S
@@ -0,0 +1,58 @@
+#include <linux/linkage.h>
+
+#define R0 0x00
+#define R1 0x08
+#define R2 0x10
+#define R3 0x18
+#define R4 0x20
+#define R5 0x28
+#define R6 0x30
+#define R7 0x38
+#define R8 0x40
+#define R9 0x48
+#define SL 0x50
+#define FP 0x58
+#define IP 0x60
+#define SP 0x68
+#define LR 0x70
+#define PC 0x78
+
+/*
+ * Implementation of void perf_regs_load(u64 *regs);
+ *
+ * This functions fills in the 'regs' buffer from the actual registers values,
+ * in the way the perf built-in unwinding test expects them:
+ * - the PC at the time at the call to this function. Since this function
+ * is called using a bl instruction, the PC value is taken from LR.
+ * The built-in unwinding test then unwinds the call stack from the dwarf
+ * information in unwind__get_entries.
+ *
+ * Notes:
+ * - the 8 bytes stride in the registers offsets comes from the fact
+ * that the registers are stored in an u64 array (u64 *regs),
+ * - the regs buffer needs to be zeroed before the call to this function,
+ * in this case using a calloc in dwarf-unwind.c.
+ */
+
+.text
+.type perf_regs_load,%function
+ENTRY(perf_regs_load)
+ str r0, [r0, #R0]
+ str r1, [r0, #R1]
+ str r2, [r0, #R2]
+ str r3, [r0, #R3]
+ str r4, [r0, #R4]
+ str r5, [r0, #R5]
+ str r6, [r0, #R6]
+ str r7, [r0, #R7]
+ str r8, [r0, #R8]
+ str r9, [r0, #R9]
+ str sl, [r0, #SL]
+ str fp, [r0, #FP]
+ str ip, [r0, #IP]
+ str sp, [r0, #SP]
+ str lr, [r0, #LR]
+ str lr, [r0, #PC] // store pc as lr in order to skip the call
+ // to this function
+ mov pc, lr
+ENDPROC(perf_regs_load)
diff --git a/tools/perf/arch/arm/util/unwind-libdw.c b/tools/perf/arch/arm/util/unwind-libdw.c
new file mode 100644
index 00000000000..b4176c60117
--- /dev/null
+++ b/tools/perf/arch/arm/util/unwind-libdw.c
@@ -0,0 +1,36 @@
+#include <elfutils/libdwfl.h>
+#include "../../util/unwind-libdw.h"
+#include "../../util/perf_regs.h"
+
+bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg)
+{
+ struct unwind_info *ui = arg;
+ struct regs_dump *user_regs = &ui->sample->user_regs;
+ Dwarf_Word dwarf_regs[PERF_REG_ARM_MAX];
+
+#define REG(r) ({ \
+ Dwarf_Word val = 0; \
+ perf_reg_value(&val, user_regs, PERF_REG_ARM_##r); \
+ val; \
+})
+
+ dwarf_regs[0] = REG(R0);
+ dwarf_regs[1] = REG(R1);
+ dwarf_regs[2] = REG(R2);
+ dwarf_regs[3] = REG(R3);
+ dwarf_regs[4] = REG(R4);
+ dwarf_regs[5] = REG(R5);
+ dwarf_regs[6] = REG(R6);
+ dwarf_regs[7] = REG(R7);
+ dwarf_regs[8] = REG(R8);
+ dwarf_regs[9] = REG(R9);
+ dwarf_regs[10] = REG(R10);
+ dwarf_regs[11] = REG(FP);
+ dwarf_regs[12] = REG(IP);
+ dwarf_regs[13] = REG(SP);
+ dwarf_regs[14] = REG(LR);
+ dwarf_regs[15] = REG(PC);
+
+ return dwfl_thread_state_registers(thread, 0, PERF_REG_ARM_MAX,
+ dwarf_regs);
+}
diff --git a/tools/perf/arch/arm/util/unwind.c b/tools/perf/arch/arm/util/unwind-libunwind.c
index da3dc950550..729ed69a666 100644
--- a/tools/perf/arch/arm/util/unwind.c
+++ b/tools/perf/arch/arm/util/unwind-libunwind.c
@@ -4,7 +4,7 @@
#include "perf_regs.h"
#include "../../util/unwind.h"
-int unwind__arch_reg_id(int regnum)
+int libunwind__arch_reg_id(int regnum)
{
switch (regnum) {
case UNW_ARM_R0:
diff --git a/tools/perf/arch/arm64/Makefile b/tools/perf/arch/arm64/Makefile
new file mode 100644
index 00000000000..67e9b3d38e8
--- /dev/null
+++ b/tools/perf/arch/arm64/Makefile
@@ -0,0 +1,7 @@
+ifndef NO_DWARF
+PERF_HAVE_DWARF_REGS := 1
+LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o
+endif
+ifndef NO_LIBUNWIND
+LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libunwind.o
+endif
diff --git a/tools/perf/arch/arm64/include/perf_regs.h b/tools/perf/arch/arm64/include/perf_regs.h
new file mode 100644
index 00000000000..e9441b9e2a3
--- /dev/null
+++ b/tools/perf/arch/arm64/include/perf_regs.h
@@ -0,0 +1,88 @@
+#ifndef ARCH_PERF_REGS_H
+#define ARCH_PERF_REGS_H
+
+#include <stdlib.h>
+#include <linux/types.h>
+#include <asm/perf_regs.h>
+
+#define PERF_REGS_MASK ((1ULL << PERF_REG_ARM64_MAX) - 1)
+#define PERF_REG_IP PERF_REG_ARM64_PC
+#define PERF_REG_SP PERF_REG_ARM64_SP
+
+static inline const char *perf_reg_name(int id)
+{
+ switch (id) {
+ case PERF_REG_ARM64_X0:
+ return "x0";
+ case PERF_REG_ARM64_X1:
+ return "x1";
+ case PERF_REG_ARM64_X2:
+ return "x2";
+ case PERF_REG_ARM64_X3:
+ return "x3";
+ case PERF_REG_ARM64_X4:
+ return "x4";
+ case PERF_REG_ARM64_X5:
+ return "x5";
+ case PERF_REG_ARM64_X6:
+ return "x6";
+ case PERF_REG_ARM64_X7:
+ return "x7";
+ case PERF_REG_ARM64_X8:
+ return "x8";
+ case PERF_REG_ARM64_X9:
+ return "x9";
+ case PERF_REG_ARM64_X10:
+ return "x10";
+ case PERF_REG_ARM64_X11:
+ return "x11";
+ case PERF_REG_ARM64_X12:
+ return "x12";
+ case PERF_REG_ARM64_X13:
+ return "x13";
+ case PERF_REG_ARM64_X14:
+ return "x14";
+ case PERF_REG_ARM64_X15:
+ return "x15";
+ case PERF_REG_ARM64_X16:
+ return "x16";
+ case PERF_REG_ARM64_X17:
+ return "x17";
+ case PERF_REG_ARM64_X18:
+ return "x18";
+ case PERF_REG_ARM64_X19:
+ return "x19";
+ case PERF_REG_ARM64_X20:
+ return "x20";
+ case PERF_REG_ARM64_X21:
+ return "x21";
+ case PERF_REG_ARM64_X22:
+ return "x22";
+ case PERF_REG_ARM64_X23:
+ return "x23";
+ case PERF_REG_ARM64_X24:
+ return "x24";
+ case PERF_REG_ARM64_X25:
+ return "x25";
+ case PERF_REG_ARM64_X26:
+ return "x26";
+ case PERF_REG_ARM64_X27:
+ return "x27";
+ case PERF_REG_ARM64_X28:
+ return "x28";
+ case PERF_REG_ARM64_X29:
+ return "x29";
+ case PERF_REG_ARM64_SP:
+ return "sp";
+ case PERF_REG_ARM64_LR:
+ return "lr";
+ case PERF_REG_ARM64_PC:
+ return "pc";
+ default:
+ return NULL;
+ }
+
+ return NULL;
+}
+
+#endif /* ARCH_PERF_REGS_H */
diff --git a/tools/perf/arch/arm64/util/dwarf-regs.c b/tools/perf/arch/arm64/util/dwarf-regs.c
new file mode 100644
index 00000000000..d49efeb8172
--- /dev/null
+++ b/tools/perf/arch/arm64/util/dwarf-regs.c
@@ -0,0 +1,80 @@
+/*
+ * Mapping of DWARF debug register numbers into register names.
+ *
+ * Copyright (C) 2010 Will Deacon, ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <stddef.h>
+#include <dwarf-regs.h>
+
+struct pt_regs_dwarfnum {
+ const char *name;
+ unsigned int dwarfnum;
+};
+
+#define STR(s) #s
+#define REG_DWARFNUM_NAME(r, num) {.name = r, .dwarfnum = num}
+#define GPR_DWARFNUM_NAME(num) \
+ {.name = STR(%x##num), .dwarfnum = num}
+#define REG_DWARFNUM_END {.name = NULL, .dwarfnum = 0}
+
+/*
+ * Reference:
+ * http://infocenter.arm.com/help/topic/com.arm.doc.ihi0057b/IHI0057B_aadwarf64.pdf
+ */
+static const struct pt_regs_dwarfnum regdwarfnum_table[] = {
+ GPR_DWARFNUM_NAME(0),
+ GPR_DWARFNUM_NAME(1),
+ GPR_DWARFNUM_NAME(2),
+ GPR_DWARFNUM_NAME(3),
+ GPR_DWARFNUM_NAME(4),
+ GPR_DWARFNUM_NAME(5),
+ GPR_DWARFNUM_NAME(6),
+ GPR_DWARFNUM_NAME(7),
+ GPR_DWARFNUM_NAME(8),
+ GPR_DWARFNUM_NAME(9),
+ GPR_DWARFNUM_NAME(10),
+ GPR_DWARFNUM_NAME(11),
+ GPR_DWARFNUM_NAME(12),
+ GPR_DWARFNUM_NAME(13),
+ GPR_DWARFNUM_NAME(14),
+ GPR_DWARFNUM_NAME(15),
+ GPR_DWARFNUM_NAME(16),
+ GPR_DWARFNUM_NAME(17),
+ GPR_DWARFNUM_NAME(18),
+ GPR_DWARFNUM_NAME(19),
+ GPR_DWARFNUM_NAME(20),
+ GPR_DWARFNUM_NAME(21),
+ GPR_DWARFNUM_NAME(22),
+ GPR_DWARFNUM_NAME(23),
+ GPR_DWARFNUM_NAME(24),
+ GPR_DWARFNUM_NAME(25),
+ GPR_DWARFNUM_NAME(26),
+ GPR_DWARFNUM_NAME(27),
+ GPR_DWARFNUM_NAME(28),
+ GPR_DWARFNUM_NAME(29),
+ REG_DWARFNUM_NAME("%lr", 30),
+ REG_DWARFNUM_NAME("%sp", 31),
+ REG_DWARFNUM_END,
+};
+
+/**
+ * get_arch_regstr() - lookup register name from it's DWARF register number
+ * @n: the DWARF register number
+ *
+ * get_arch_regstr() returns the name of the register in struct
+ * regdwarfnum_table from it's DWARF register number. If the register is not
+ * found in the table, this returns NULL;
+ */
+const char *get_arch_regstr(unsigned int n)
+{
+ const struct pt_regs_dwarfnum *roff;
+ for (roff = regdwarfnum_table; roff->name != NULL; roff++)
+ if (roff->dwarfnum == n)
+ return roff->name;
+ return NULL;
+}
diff --git a/tools/perf/arch/arm64/util/unwind-libunwind.c b/tools/perf/arch/arm64/util/unwind-libunwind.c
new file mode 100644
index 00000000000..436ee43859d
--- /dev/null
+++ b/tools/perf/arch/arm64/util/unwind-libunwind.c
@@ -0,0 +1,82 @@
+
+#include <errno.h>
+#include <libunwind.h>
+#include "perf_regs.h"
+#include "../../util/unwind.h"
+
+int libunwind__arch_reg_id(int regnum)
+{
+ switch (regnum) {
+ case UNW_AARCH64_X0:
+ return PERF_REG_ARM64_X0;
+ case UNW_AARCH64_X1:
+ return PERF_REG_ARM64_X1;
+ case UNW_AARCH64_X2:
+ return PERF_REG_ARM64_X2;
+ case UNW_AARCH64_X3:
+ return PERF_REG_ARM64_X3;
+ case UNW_AARCH64_X4:
+ return PERF_REG_ARM64_X4;
+ case UNW_AARCH64_X5:
+ return PERF_REG_ARM64_X5;
+ case UNW_AARCH64_X6:
+ return PERF_REG_ARM64_X6;
+ case UNW_AARCH64_X7:
+ return PERF_REG_ARM64_X7;
+ case UNW_AARCH64_X8:
+ return PERF_REG_ARM64_X8;
+ case UNW_AARCH64_X9:
+ return PERF_REG_ARM64_X9;
+ case UNW_AARCH64_X10:
+ return PERF_REG_ARM64_X10;
+ case UNW_AARCH64_X11:
+ return PERF_REG_ARM64_X11;
+ case UNW_AARCH64_X12:
+ return PERF_REG_ARM64_X12;
+ case UNW_AARCH64_X13:
+ return PERF_REG_ARM64_X13;
+ case UNW_AARCH64_X14:
+ return PERF_REG_ARM64_X14;
+ case UNW_AARCH64_X15:
+ return PERF_REG_ARM64_X15;
+ case UNW_AARCH64_X16:
+ return PERF_REG_ARM64_X16;
+ case UNW_AARCH64_X17:
+ return PERF_REG_ARM64_X17;
+ case UNW_AARCH64_X18:
+ return PERF_REG_ARM64_X18;
+ case UNW_AARCH64_X19:
+ return PERF_REG_ARM64_X19;
+ case UNW_AARCH64_X20:
+ return PERF_REG_ARM64_X20;
+ case UNW_AARCH64_X21:
+ return PERF_REG_ARM64_X21;
+ case UNW_AARCH64_X22:
+ return PERF_REG_ARM64_X22;
+ case UNW_AARCH64_X23:
+ return PERF_REG_ARM64_X23;
+ case UNW_AARCH64_X24:
+ return PERF_REG_ARM64_X24;
+ case UNW_AARCH64_X25:
+ return PERF_REG_ARM64_X25;
+ case UNW_AARCH64_X26:
+ return PERF_REG_ARM64_X26;
+ case UNW_AARCH64_X27:
+ return PERF_REG_ARM64_X27;
+ case UNW_AARCH64_X28:
+ return PERF_REG_ARM64_X28;
+ case UNW_AARCH64_X29:
+ return PERF_REG_ARM64_X29;
+ case UNW_AARCH64_X30:
+ return PERF_REG_ARM64_LR;
+ case UNW_AARCH64_SP:
+ return PERF_REG_ARM64_SP;
+ case UNW_AARCH64_PC:
+ return PERF_REG_ARM64_PC;
+ default:
+ pr_err("unwind: invalid reg id %d\n", regnum);
+ return -EINVAL;
+ }
+
+ return -EINVAL;
+}
diff --git a/tools/perf/arch/x86/Makefile b/tools/perf/arch/x86/Makefile
index 8801fe02f20..1641542e363 100644
--- a/tools/perf/arch/x86/Makefile
+++ b/tools/perf/arch/x86/Makefile
@@ -3,7 +3,14 @@ PERF_HAVE_DWARF_REGS := 1
LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o
endif
ifndef NO_LIBUNWIND
-LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind.o
+LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libunwind.o
+endif
+ifndef NO_LIBDW_DWARF_UNWIND
+LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libdw.o
+endif
+ifndef NO_DWARF_UNWIND
+LIB_OBJS += $(OUTPUT)arch/$(ARCH)/tests/regs_load.o
+LIB_OBJS += $(OUTPUT)arch/$(ARCH)/tests/dwarf-unwind.o
endif
LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/header.o
LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/tsc.o
diff --git a/tools/perf/arch/x86/include/perf_regs.h b/tools/perf/arch/x86/include/perf_regs.h
index e84ca76aae7..7df517acfef 100644
--- a/tools/perf/arch/x86/include/perf_regs.h
+++ b/tools/perf/arch/x86/include/perf_regs.h
@@ -2,17 +2,23 @@
#define ARCH_PERF_REGS_H
#include <stdlib.h>
-#include "../../util/types.h"
+#include <linux/types.h>
#include <asm/perf_regs.h>
+void perf_regs_load(u64 *regs);
+
#ifndef HAVE_ARCH_X86_64_SUPPORT
#define PERF_REGS_MASK ((1ULL << PERF_REG_X86_32_MAX) - 1)
+#define PERF_REGS_MAX PERF_REG_X86_32_MAX
+#define PERF_SAMPLE_REGS_ABI PERF_SAMPLE_REGS_ABI_32
#else
#define REG_NOSUPPORT ((1ULL << PERF_REG_X86_DS) | \
(1ULL << PERF_REG_X86_ES) | \
(1ULL << PERF_REG_X86_FS) | \
(1ULL << PERF_REG_X86_GS))
#define PERF_REGS_MASK (((1ULL << PERF_REG_X86_64_MAX) - 1) & ~REG_NOSUPPORT)
+#define PERF_REGS_MAX PERF_REG_X86_64_MAX
+#define PERF_SAMPLE_REGS_ABI PERF_SAMPLE_REGS_ABI_64
#endif
#define PERF_REG_IP PERF_REG_X86_IP
#define PERF_REG_SP PERF_REG_X86_SP
diff --git a/tools/perf/arch/x86/tests/dwarf-unwind.c b/tools/perf/arch/x86/tests/dwarf-unwind.c
new file mode 100644
index 00000000000..9f89f899ccc
--- /dev/null
+++ b/tools/perf/arch/x86/tests/dwarf-unwind.c
@@ -0,0 +1,60 @@
+#include <string.h>
+#include "perf_regs.h"
+#include "thread.h"
+#include "map.h"
+#include "event.h"
+#include "tests/tests.h"
+
+#define STACK_SIZE 8192
+
+static int sample_ustack(struct perf_sample *sample,
+ struct thread *thread, u64 *regs)
+{
+ struct stack_dump *stack = &sample->user_stack;
+ struct map *map;
+ unsigned long sp;
+ u64 stack_size, *buf;
+
+ buf = malloc(STACK_SIZE);
+ if (!buf) {
+ pr_debug("failed to allocate sample uregs data\n");
+ return -1;
+ }
+
+ sp = (unsigned long) regs[PERF_REG_X86_SP];
+
+ map = map_groups__find(thread->mg, MAP__VARIABLE, (u64) sp);
+ if (!map) {
+ pr_debug("failed to get stack map\n");
+ free(buf);
+ return -1;
+ }
+
+ stack_size = map->end - sp;
+ stack_size = stack_size > STACK_SIZE ? STACK_SIZE : stack_size;
+
+ memcpy(buf, (void *) sp, stack_size);
+ stack->data = (char *) buf;
+ stack->size = stack_size;
+ return 0;
+}
+
+int test__arch_unwind_sample(struct perf_sample *sample,
+ struct thread *thread)
+{
+ struct regs_dump *regs = &sample->user_regs;
+ u64 *buf;
+
+ buf = malloc(sizeof(u64) * PERF_REGS_MAX);
+ if (!buf) {
+ pr_debug("failed to allocate sample uregs data\n");
+ return -1;
+ }
+
+ perf_regs_load(buf);
+ regs->abi = PERF_SAMPLE_REGS_ABI;
+ regs->regs = buf;
+ regs->mask = PERF_REGS_MASK;
+
+ return sample_ustack(sample, thread, buf);
+}
diff --git a/tools/perf/arch/x86/tests/regs_load.S b/tools/perf/arch/x86/tests/regs_load.S
new file mode 100644
index 00000000000..60875d5c556
--- /dev/null
+++ b/tools/perf/arch/x86/tests/regs_load.S
@@ -0,0 +1,98 @@
+#include <linux/linkage.h>
+
+#define AX 0
+#define BX 1 * 8
+#define CX 2 * 8
+#define DX 3 * 8
+#define SI 4 * 8
+#define DI 5 * 8
+#define BP 6 * 8
+#define SP 7 * 8
+#define IP 8 * 8
+#define FLAGS 9 * 8
+#define CS 10 * 8
+#define SS 11 * 8
+#define DS 12 * 8
+#define ES 13 * 8
+#define FS 14 * 8
+#define GS 15 * 8
+#define R8 16 * 8
+#define R9 17 * 8
+#define R10 18 * 8
+#define R11 19 * 8
+#define R12 20 * 8
+#define R13 21 * 8
+#define R14 22 * 8
+#define R15 23 * 8
+
+.text
+#ifdef HAVE_ARCH_X86_64_SUPPORT
+ENTRY(perf_regs_load)
+ movq %rax, AX(%rdi)
+ movq %rbx, BX(%rdi)
+ movq %rcx, CX(%rdi)
+ movq %rdx, DX(%rdi)
+ movq %rsi, SI(%rdi)
+ movq %rdi, DI(%rdi)
+ movq %rbp, BP(%rdi)
+
+ leaq 8(%rsp), %rax /* exclude this call. */
+ movq %rax, SP(%rdi)
+
+ movq 0(%rsp), %rax
+ movq %rax, IP(%rdi)
+
+ movq $0, FLAGS(%rdi)
+ movq $0, CS(%rdi)
+ movq $0, SS(%rdi)
+ movq $0, DS(%rdi)
+ movq $0, ES(%rdi)
+ movq $0, FS(%rdi)
+ movq $0, GS(%rdi)
+
+ movq %r8, R8(%rdi)
+ movq %r9, R9(%rdi)
+ movq %r10, R10(%rdi)
+ movq %r11, R11(%rdi)
+ movq %r12, R12(%rdi)
+ movq %r13, R13(%rdi)
+ movq %r14, R14(%rdi)
+ movq %r15, R15(%rdi)
+ ret
+ENDPROC(perf_regs_load)
+#else
+ENTRY(perf_regs_load)
+ push %edi
+ movl 8(%esp), %edi
+ movl %eax, AX(%edi)
+ movl %ebx, BX(%edi)
+ movl %ecx, CX(%edi)
+ movl %edx, DX(%edi)
+ movl %esi, SI(%edi)
+ pop %eax
+ movl %eax, DI(%edi)
+ movl %ebp, BP(%edi)
+
+ leal 4(%esp), %eax /* exclude this call. */
+ movl %eax, SP(%edi)
+
+ movl 0(%esp), %eax
+ movl %eax, IP(%edi)
+
+ movl $0, FLAGS(%edi)
+ movl $0, CS(%edi)
+ movl $0, SS(%edi)
+ movl $0, DS(%edi)
+ movl $0, ES(%edi)
+ movl $0, FS(%edi)
+ movl $0, GS(%edi)
+ ret
+ENDPROC(perf_regs_load)
+#endif
+
+/*
+ * We need to provide note.GNU-stack section, saying that we want
+ * NOT executable stack. Otherwise the final linking will assume that
+ * the ELF stack should not be restricted at all and set it RWX.
+ */
+.section .note.GNU-stack,"",@progbits
diff --git a/tools/perf/arch/x86/util/tsc.c b/tools/perf/arch/x86/util/tsc.c
index b2519e49424..40021fa3129 100644
--- a/tools/perf/arch/x86/util/tsc.c
+++ b/tools/perf/arch/x86/util/tsc.c
@@ -4,7 +4,7 @@
#include <linux/perf_event.h>
#include "../../perf.h"
-#include "../../util/types.h"
+#include <linux/types.h>
#include "../../util/debug.h"
#include "tsc.h"
diff --git a/tools/perf/arch/x86/util/tsc.h b/tools/perf/arch/x86/util/tsc.h
index a24dec81c79..2affe0366b5 100644
--- a/tools/perf/arch/x86/util/tsc.h
+++ b/tools/perf/arch/x86/util/tsc.h
@@ -1,7 +1,7 @@
#ifndef TOOLS_PERF_ARCH_X86_UTIL_TSC_H__
#define TOOLS_PERF_ARCH_X86_UTIL_TSC_H__
-#include "../../util/types.h"
+#include <linux/types.h>
struct perf_tsc_conversion {
u16 time_shift;
diff --git a/tools/perf/arch/x86/util/unwind-libdw.c b/tools/perf/arch/x86/util/unwind-libdw.c
new file mode 100644
index 00000000000..c4b72176ca8
--- /dev/null
+++ b/tools/perf/arch/x86/util/unwind-libdw.c
@@ -0,0 +1,51 @@
+#include <elfutils/libdwfl.h>
+#include "../../util/unwind-libdw.h"
+#include "../../util/perf_regs.h"
+
+bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg)
+{
+ struct unwind_info *ui = arg;
+ struct regs_dump *user_regs = &ui->sample->user_regs;
+ Dwarf_Word dwarf_regs[17];
+ unsigned nregs;
+
+#define REG(r) ({ \
+ Dwarf_Word val = 0; \
+ perf_reg_value(&val, user_regs, PERF_REG_X86_##r); \
+ val; \
+})
+
+ if (user_regs->abi == PERF_SAMPLE_REGS_ABI_32) {
+ dwarf_regs[0] = REG(AX);
+ dwarf_regs[1] = REG(CX);
+ dwarf_regs[2] = REG(DX);
+ dwarf_regs[3] = REG(BX);
+ dwarf_regs[4] = REG(SP);
+ dwarf_regs[5] = REG(BP);
+ dwarf_regs[6] = REG(SI);
+ dwarf_regs[7] = REG(DI);
+ dwarf_regs[8] = REG(IP);
+ nregs = 9;
+ } else {
+ dwarf_regs[0] = REG(AX);
+ dwarf_regs[1] = REG(DX);
+ dwarf_regs[2] = REG(CX);
+ dwarf_regs[3] = REG(BX);
+ dwarf_regs[4] = REG(SI);
+ dwarf_regs[5] = REG(DI);
+ dwarf_regs[6] = REG(BP);
+ dwarf_regs[7] = REG(SP);
+ dwarf_regs[8] = REG(R8);
+ dwarf_regs[9] = REG(R9);
+ dwarf_regs[10] = REG(R10);
+ dwarf_regs[11] = REG(R11);
+ dwarf_regs[12] = REG(R12);
+ dwarf_regs[13] = REG(R13);
+ dwarf_regs[14] = REG(R14);
+ dwarf_regs[15] = REG(R15);
+ dwarf_regs[16] = REG(IP);
+ nregs = 17;
+ }
+
+ return dwfl_thread_state_registers(thread, 0, nregs, dwarf_regs);
+}
diff --git a/tools/perf/arch/x86/util/unwind.c b/tools/perf/arch/x86/util/unwind-libunwind.c
index 456a88cf5b3..3261f68c6a7 100644
--- a/tools/perf/arch/x86/util/unwind.c
+++ b/tools/perf/arch/x86/util/unwind-libunwind.c
@@ -5,7 +5,7 @@
#include "../../util/unwind.h"
#ifdef HAVE_ARCH_X86_64_SUPPORT
-int unwind__arch_reg_id(int regnum)
+int libunwind__arch_reg_id(int regnum)
{
int id;
@@ -69,7 +69,7 @@ int unwind__arch_reg_id(int regnum)
return id;
}
#else
-int unwind__arch_reg_id(int regnum)
+int libunwind__arch_reg_id(int regnum)
{
int id;
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
index 0fdc85269c4..eba46709b27 100644
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -31,6 +31,9 @@ extern int bench_sched_pipe(int argc, const char **argv, const char *prefix);
extern int bench_mem_memcpy(int argc, const char **argv,
const char *prefix __maybe_unused);
extern int bench_mem_memset(int argc, const char **argv, const char *prefix);
+extern int bench_futex_hash(int argc, const char **argv, const char *prefix);
+extern int bench_futex_wake(int argc, const char **argv, const char *prefix);
+extern int bench_futex_requeue(int argc, const char **argv, const char *prefix);
#define BENCH_FORMAT_DEFAULT_STR "default"
#define BENCH_FORMAT_DEFAULT 0
diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c
new file mode 100644
index 00000000000..a84206e9c4a
--- /dev/null
+++ b/tools/perf/bench/futex-hash.c
@@ -0,0 +1,212 @@
+/*
+ * Copyright (C) 2013 Davidlohr Bueso <davidlohr@hp.com>
+ *
+ * futex-hash: Stress the hell out of the Linux kernel futex uaddr hashing.
+ *
+ * This program is particularly useful for measuring the kernel's futex hash
+ * table/function implementation. In order for it to make sense, use with as
+ * many threads and futexes as possible.
+ */
+
+#include "../perf.h"
+#include "../util/util.h"
+#include "../util/stat.h"
+#include "../util/parse-options.h"
+#include "../util/header.h"
+#include "bench.h"
+#include "futex.h"
+
+#include <err.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <pthread.h>
+
+static unsigned int nthreads = 0;
+static unsigned int nsecs = 10;
+/* amount of futexes per thread */
+static unsigned int nfutexes = 1024;
+static bool fshared = false, done = false, silent = false;
+
+struct timeval start, end, runtime;
+static pthread_mutex_t thread_lock;
+static unsigned int threads_starting;
+static struct stats throughput_stats;
+static pthread_cond_t thread_parent, thread_worker;
+
+struct worker {
+ int tid;
+ u_int32_t *futex;
+ pthread_t thread;
+ unsigned long ops;
+};
+
+static const struct option options[] = {
+ OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"),
+ OPT_UINTEGER('r', "runtime", &nsecs, "Specify runtime (in seconds)"),
+ OPT_UINTEGER('f', "futexes", &nfutexes, "Specify amount of futexes per threads"),
+ OPT_BOOLEAN( 's', "silent", &silent, "Silent mode: do not display data/details"),
+ OPT_BOOLEAN( 'S', "shared", &fshared, "Use shared futexes instead of private ones"),
+ OPT_END()
+};
+
+static const char * const bench_futex_hash_usage[] = {
+ "perf bench futex hash <options>",
+ NULL
+};
+
+static void *workerfn(void *arg)
+{
+ int ret;
+ unsigned int i;
+ struct worker *w = (struct worker *) arg;
+
+ pthread_mutex_lock(&thread_lock);
+ threads_starting--;
+ if (!threads_starting)
+ pthread_cond_signal(&thread_parent);
+ pthread_cond_wait(&thread_worker, &thread_lock);
+ pthread_mutex_unlock(&thread_lock);
+
+ do {
+ for (i = 0; i < nfutexes; i++, w->ops++) {
+ /*
+ * We want the futex calls to fail in order to stress
+ * the hashing of uaddr and not measure other steps,
+ * such as internal waitqueue handling, thus enlarging
+ * the critical region protected by hb->lock.
+ */
+ ret = futex_wait(&w->futex[i], 1234, NULL,
+ fshared ? 0 : FUTEX_PRIVATE_FLAG);
+ if (!silent &&
+ (!ret || errno != EAGAIN || errno != EWOULDBLOCK))
+ warn("Non-expected futex return call");
+ }
+ } while (!done);
+
+ return NULL;
+}
+
+static void toggle_done(int sig __maybe_unused,
+ siginfo_t *info __maybe_unused,
+ void *uc __maybe_unused)
+{
+ /* inform all threads that we're done for the day */
+ done = true;
+ gettimeofday(&end, NULL);
+ timersub(&end, &start, &runtime);
+}
+
+static void print_summary(void)
+{
+ unsigned long avg = avg_stats(&throughput_stats);
+ double stddev = stddev_stats(&throughput_stats);
+
+ printf("%sAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n",
+ !silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg),
+ (int) runtime.tv_sec);
+}
+
+int bench_futex_hash(int argc, const char **argv,
+ const char *prefix __maybe_unused)
+{
+ int ret = 0;
+ cpu_set_t cpu;
+ struct sigaction act;
+ unsigned int i, ncpus;
+ pthread_attr_t thread_attr;
+ struct worker *worker = NULL;
+
+ argc = parse_options(argc, argv, options, bench_futex_hash_usage, 0);
+ if (argc) {
+ usage_with_options(bench_futex_hash_usage, options);
+ exit(EXIT_FAILURE);
+ }
+
+ ncpus = sysconf(_SC_NPROCESSORS_ONLN);
+
+ sigfillset(&act.sa_mask);
+ act.sa_sigaction = toggle_done;
+ sigaction(SIGINT, &act, NULL);
+
+ if (!nthreads) /* default to the number of CPUs */
+ nthreads = ncpus;
+
+ worker = calloc(nthreads, sizeof(*worker));
+ if (!worker)
+ goto errmem;
+
+ printf("Run summary [PID %d]: %d threads, each operating on %d [%s] futexes for %d secs.\n\n",
+ getpid(), nthreads, nfutexes, fshared ? "shared":"private", nsecs);
+
+ init_stats(&throughput_stats);
+ pthread_mutex_init(&thread_lock, NULL);
+ pthread_cond_init(&thread_parent, NULL);
+ pthread_cond_init(&thread_worker, NULL);
+
+ threads_starting = nthreads;
+ pthread_attr_init(&thread_attr);
+ gettimeofday(&start, NULL);
+ for (i = 0; i < nthreads; i++) {
+ worker[i].tid = i;
+ worker[i].futex = calloc(nfutexes, sizeof(*worker[i].futex));
+ if (!worker[i].futex)
+ goto errmem;
+
+ CPU_ZERO(&cpu);
+ CPU_SET(i % ncpus, &cpu);
+
+ ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu);
+ if (ret)
+ err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
+
+ ret = pthread_create(&worker[i].thread, &thread_attr, workerfn,
+ (void *)(struct worker *) &worker[i]);
+ if (ret)
+ err(EXIT_FAILURE, "pthread_create");
+
+ }
+ pthread_attr_destroy(&thread_attr);
+
+ pthread_mutex_lock(&thread_lock);
+ while (threads_starting)
+ pthread_cond_wait(&thread_parent, &thread_lock);
+ pthread_cond_broadcast(&thread_worker);
+ pthread_mutex_unlock(&thread_lock);
+
+ sleep(nsecs);
+ toggle_done(0, NULL, NULL);
+
+ for (i = 0; i < nthreads; i++) {
+ ret = pthread_join(worker[i].thread, NULL);
+ if (ret)
+ err(EXIT_FAILURE, "pthread_join");
+ }
+
+ /* cleanup & report results */
+ pthread_cond_destroy(&thread_parent);
+ pthread_cond_destroy(&thread_worker);
+ pthread_mutex_destroy(&thread_lock);
+
+ for (i = 0; i < nthreads; i++) {
+ unsigned long t = worker[i].ops/runtime.tv_sec;
+ update_stats(&throughput_stats, t);
+ if (!silent) {
+ if (nfutexes == 1)
+ printf("[thread %2d] futex: %p [ %ld ops/sec ]\n",
+ worker[i].tid, &worker[i].futex[0], t);
+ else
+ printf("[thread %2d] futexes: %p ... %p [ %ld ops/sec ]\n",
+ worker[i].tid, &worker[i].futex[0],
+ &worker[i].futex[nfutexes-1], t);
+ }
+
+ free(worker[i].futex);
+ }
+
+ print_summary();
+
+ free(worker);
+ return ret;
+errmem:
+ err(EXIT_FAILURE, "calloc");
+}
diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c
new file mode 100644
index 00000000000..a16255876f1
--- /dev/null
+++ b/tools/perf/bench/futex-requeue.c
@@ -0,0 +1,211 @@
+/*
+ * Copyright (C) 2013 Davidlohr Bueso <davidlohr@hp.com>
+ *
+ * futex-requeue: Block a bunch of threads on futex1 and requeue them
+ * on futex2, N at a time.
+ *
+ * This program is particularly useful to measure the latency of nthread
+ * requeues without waking up any tasks -- thus mimicking a regular futex_wait.
+ */
+
+#include "../perf.h"
+#include "../util/util.h"
+#include "../util/stat.h"
+#include "../util/parse-options.h"
+#include "../util/header.h"
+#include "bench.h"
+#include "futex.h"
+
+#include <err.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <pthread.h>
+
+static u_int32_t futex1 = 0, futex2 = 0;
+
+/*
+ * How many tasks to requeue at a time.
+ * Default to 1 in order to make the kernel work more.
+ */
+static unsigned int nrequeue = 1;
+
+/*
+ * There can be significant variance from run to run,
+ * the more repeats, the more exact the overall avg and
+ * the better idea of the futex latency.
+ */
+static unsigned int repeat = 10;
+
+static pthread_t *worker;
+static bool done = 0, silent = 0;
+static pthread_mutex_t thread_lock;
+static pthread_cond_t thread_parent, thread_worker;
+static struct stats requeuetime_stats, requeued_stats;
+static unsigned int ncpus, threads_starting, nthreads = 0;
+
+static const struct option options[] = {
+ OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"),
+ OPT_UINTEGER('q', "nrequeue", &nrequeue, "Specify amount of threads to requeue at once"),
+ OPT_UINTEGER('r', "repeat", &repeat, "Specify amount of times to repeat the run"),
+ OPT_BOOLEAN( 's', "silent", &silent, "Silent mode: do not display data/details"),
+ OPT_END()
+};
+
+static const char * const bench_futex_requeue_usage[] = {
+ "perf bench futex requeue <options>",
+ NULL
+};
+
+static void print_summary(void)
+{
+ double requeuetime_avg = avg_stats(&requeuetime_stats);
+ double requeuetime_stddev = stddev_stats(&requeuetime_stats);
+ unsigned int requeued_avg = avg_stats(&requeued_stats);
+
+ printf("Requeued %d of %d threads in %.4f ms (+-%.2f%%)\n",
+ requeued_avg,
+ nthreads,
+ requeuetime_avg/1e3,
+ rel_stddev_stats(requeuetime_stddev, requeuetime_avg));
+}
+
+static void *workerfn(void *arg __maybe_unused)
+{
+ pthread_mutex_lock(&thread_lock);
+ threads_starting--;
+ if (!threads_starting)
+ pthread_cond_signal(&thread_parent);
+ pthread_cond_wait(&thread_worker, &thread_lock);
+ pthread_mutex_unlock(&thread_lock);
+
+ futex_wait(&futex1, 0, NULL, FUTEX_PRIVATE_FLAG);
+ return NULL;
+}
+
+static void block_threads(pthread_t *w,
+ pthread_attr_t thread_attr)
+{
+ cpu_set_t cpu;
+ unsigned int i;
+
+ threads_starting = nthreads;
+
+ /* create and block all threads */
+ for (i = 0; i < nthreads; i++) {
+ CPU_ZERO(&cpu);
+ CPU_SET(i % ncpus, &cpu);
+
+ if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu))
+ err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
+
+ if (pthread_create(&w[i], &thread_attr, workerfn, NULL))
+ err(EXIT_FAILURE, "pthread_create");
+ }
+}
+
+static void toggle_done(int sig __maybe_unused,
+ siginfo_t *info __maybe_unused,
+ void *uc __maybe_unused)
+{
+ done = true;
+}
+
+int bench_futex_requeue(int argc, const char **argv,
+ const char *prefix __maybe_unused)
+{
+ int ret = 0;
+ unsigned int i, j;
+ struct sigaction act;
+ pthread_attr_t thread_attr;
+
+ argc = parse_options(argc, argv, options, bench_futex_requeue_usage, 0);
+ if (argc)
+ goto err;
+
+ ncpus = sysconf(_SC_NPROCESSORS_ONLN);
+
+ sigfillset(&act.sa_mask);
+ act.sa_sigaction = toggle_done;
+ sigaction(SIGINT, &act, NULL);
+
+ if (!nthreads)
+ nthreads = ncpus;
+
+ worker = calloc(nthreads, sizeof(*worker));
+ if (!worker)
+ err(EXIT_FAILURE, "calloc");
+
+ printf("Run summary [PID %d]: Requeuing %d threads (from %p to %p), "
+ "%d at a time.\n\n",
+ getpid(), nthreads, &futex1, &futex2, nrequeue);
+
+ init_stats(&requeued_stats);
+ init_stats(&requeuetime_stats);
+ pthread_attr_init(&thread_attr);
+ pthread_mutex_init(&thread_lock, NULL);
+ pthread_cond_init(&thread_parent, NULL);
+ pthread_cond_init(&thread_worker, NULL);
+
+ for (j = 0; j < repeat && !done; j++) {
+ unsigned int nrequeued = 0;
+ struct timeval start, end, runtime;
+
+ /* create, launch & block all threads */
+ block_threads(worker, thread_attr);
+
+ /* make sure all threads are already blocked */
+ pthread_mutex_lock(&thread_lock);
+ while (threads_starting)
+ pthread_cond_wait(&thread_parent, &thread_lock);
+ pthread_cond_broadcast(&thread_worker);
+ pthread_mutex_unlock(&thread_lock);
+
+ usleep(100000);
+
+ /* Ok, all threads are patiently blocked, start requeueing */
+ gettimeofday(&start, NULL);
+ for (nrequeued = 0; nrequeued < nthreads; nrequeued += nrequeue)
+ /*
+ * Do not wakeup any tasks blocked on futex1, allowing
+ * us to really measure futex_wait functionality.
+ */
+ futex_cmp_requeue(&futex1, 0, &futex2, 0, nrequeue,
+ FUTEX_PRIVATE_FLAG);
+ gettimeofday(&end, NULL);
+ timersub(&end, &start, &runtime);
+
+ update_stats(&requeued_stats, nrequeued);
+ update_stats(&requeuetime_stats, runtime.tv_usec);
+
+ if (!silent) {
+ printf("[Run %d]: Requeued %d of %d threads in %.4f ms\n",
+ j + 1, nrequeued, nthreads, runtime.tv_usec/1e3);
+ }
+
+ /* everybody should be blocked on futex2, wake'em up */
+ nrequeued = futex_wake(&futex2, nthreads, FUTEX_PRIVATE_FLAG);
+ if (nthreads != nrequeued)
+ warnx("couldn't wakeup all tasks (%d/%d)", nrequeued, nthreads);
+
+ for (i = 0; i < nthreads; i++) {
+ ret = pthread_join(worker[i], NULL);
+ if (ret)
+ err(EXIT_FAILURE, "pthread_join");
+ }
+
+ }
+
+ /* cleanup & report results */
+ pthread_cond_destroy(&thread_parent);
+ pthread_cond_destroy(&thread_worker);
+ pthread_mutex_destroy(&thread_lock);
+ pthread_attr_destroy(&thread_attr);
+
+ print_summary();
+
+ free(worker);
+ return ret;
+err:
+ usage_with_options(bench_futex_requeue_usage, options);
+ exit(EXIT_FAILURE);
+}
diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c
new file mode 100644
index 00000000000..d096169b161
--- /dev/null
+++ b/tools/perf/bench/futex-wake.c
@@ -0,0 +1,201 @@
+/*
+ * Copyright (C) 2013 Davidlohr Bueso <davidlohr@hp.com>
+ *
+ * futex-wake: Block a bunch of threads on a futex and wake'em up, N at a time.
+ *
+ * This program is particularly useful to measure the latency of nthread wakeups
+ * in non-error situations: all waiters are queued and all wake calls wakeup
+ * one or more tasks, and thus the waitqueue is never empty.
+ */
+
+#include "../perf.h"
+#include "../util/util.h"
+#include "../util/stat.h"
+#include "../util/parse-options.h"
+#include "../util/header.h"
+#include "bench.h"
+#include "futex.h"
+
+#include <err.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <pthread.h>
+
+/* all threads will block on the same futex */
+static u_int32_t futex1 = 0;
+
+/*
+ * How many wakeups to do at a time.
+ * Default to 1 in order to make the kernel work more.
+ */
+static unsigned int nwakes = 1;
+
+/*
+ * There can be significant variance from run to run,
+ * the more repeats, the more exact the overall avg and
+ * the better idea of the futex latency.
+ */
+static unsigned int repeat = 10;
+
+pthread_t *worker;
+static bool done = 0, silent = 0;
+static pthread_mutex_t thread_lock;
+static pthread_cond_t thread_parent, thread_worker;
+static struct stats waketime_stats, wakeup_stats;
+static unsigned int ncpus, threads_starting, nthreads = 0;
+
+static const struct option options[] = {
+ OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"),
+ OPT_UINTEGER('w', "nwakes", &nwakes, "Specify amount of threads to wake at once"),
+ OPT_UINTEGER('r', "repeat", &repeat, "Specify amount of times to repeat the run"),
+ OPT_BOOLEAN( 's', "silent", &silent, "Silent mode: do not display data/details"),
+ OPT_END()
+};
+
+static const char * const bench_futex_wake_usage[] = {
+ "perf bench futex wake <options>",
+ NULL
+};
+
+static void *workerfn(void *arg __maybe_unused)
+{
+ pthread_mutex_lock(&thread_lock);
+ threads_starting--;
+ if (!threads_starting)
+ pthread_cond_signal(&thread_parent);
+ pthread_cond_wait(&thread_worker, &thread_lock);
+ pthread_mutex_unlock(&thread_lock);
+
+ futex_wait(&futex1, 0, NULL, FUTEX_PRIVATE_FLAG);
+ return NULL;
+}
+
+static void print_summary(void)
+{
+ double waketime_avg = avg_stats(&waketime_stats);
+ double waketime_stddev = stddev_stats(&waketime_stats);
+ unsigned int wakeup_avg = avg_stats(&wakeup_stats);
+
+ printf("Wokeup %d of %d threads in %.4f ms (+-%.2f%%)\n",
+ wakeup_avg,
+ nthreads,
+ waketime_avg/1e3,
+ rel_stddev_stats(waketime_stddev, waketime_avg));
+}
+
+static void block_threads(pthread_t *w,
+ pthread_attr_t thread_attr)
+{
+ cpu_set_t cpu;
+ unsigned int i;
+
+ threads_starting = nthreads;
+
+ /* create and block all threads */
+ for (i = 0; i < nthreads; i++) {
+ CPU_ZERO(&cpu);
+ CPU_SET(i % ncpus, &cpu);
+
+ if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu))
+ err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
+
+ if (pthread_create(&w[i], &thread_attr, workerfn, NULL))
+ err(EXIT_FAILURE, "pthread_create");
+ }
+}
+
+static void toggle_done(int sig __maybe_unused,
+ siginfo_t *info __maybe_unused,
+ void *uc __maybe_unused)
+{
+ done = true;
+}
+
+int bench_futex_wake(int argc, const char **argv,
+ const char *prefix __maybe_unused)
+{
+ int ret = 0;
+ unsigned int i, j;
+ struct sigaction act;
+ pthread_attr_t thread_attr;
+
+ argc = parse_options(argc, argv, options, bench_futex_wake_usage, 0);
+ if (argc) {
+ usage_with_options(bench_futex_wake_usage, options);
+ exit(EXIT_FAILURE);
+ }
+
+ ncpus = sysconf(_SC_NPROCESSORS_ONLN);
+
+ sigfillset(&act.sa_mask);
+ act.sa_sigaction = toggle_done;
+ sigaction(SIGINT, &act, NULL);
+
+ if (!nthreads)
+ nthreads = ncpus;
+
+ worker = calloc(nthreads, sizeof(*worker));
+ if (!worker)
+ err(EXIT_FAILURE, "calloc");
+
+ printf("Run summary [PID %d]: blocking on %d threads (at futex %p), "
+ "waking up %d at a time.\n\n",
+ getpid(), nthreads, &futex1, nwakes);
+
+ init_stats(&wakeup_stats);
+ init_stats(&waketime_stats);
+ pthread_attr_init(&thread_attr);
+ pthread_mutex_init(&thread_lock, NULL);
+ pthread_cond_init(&thread_parent, NULL);
+ pthread_cond_init(&thread_worker, NULL);
+
+ for (j = 0; j < repeat && !done; j++) {
+ unsigned int nwoken = 0;
+ struct timeval start, end, runtime;
+
+ /* create, launch & block all threads */
+ block_threads(worker, thread_attr);
+
+ /* make sure all threads are already blocked */
+ pthread_mutex_lock(&thread_lock);
+ while (threads_starting)
+ pthread_cond_wait(&thread_parent, &thread_lock);
+ pthread_cond_broadcast(&thread_worker);
+ pthread_mutex_unlock(&thread_lock);
+
+ usleep(100000);
+
+ /* Ok, all threads are patiently blocked, start waking folks up */
+ gettimeofday(&start, NULL);
+ while (nwoken != nthreads)
+ nwoken += futex_wake(&futex1, nwakes, FUTEX_PRIVATE_FLAG);
+ gettimeofday(&end, NULL);
+ timersub(&end, &start, &runtime);
+
+ update_stats(&wakeup_stats, nwoken);
+ update_stats(&waketime_stats, runtime.tv_usec);
+
+ if (!silent) {
+ printf("[Run %d]: Wokeup %d of %d threads in %.4f ms\n",
+ j + 1, nwoken, nthreads, runtime.tv_usec/1e3);
+ }
+
+ for (i = 0; i < nthreads; i++) {
+ ret = pthread_join(worker[i], NULL);
+ if (ret)
+ err(EXIT_FAILURE, "pthread_join");
+ }
+
+ }
+
+ /* cleanup & report results */
+ pthread_cond_destroy(&thread_parent);
+ pthread_cond_destroy(&thread_worker);
+ pthread_mutex_destroy(&thread_lock);
+ pthread_attr_destroy(&thread_attr);
+
+ print_summary();
+
+ free(worker);
+ return ret;
+}
diff --git a/tools/perf/bench/futex.h b/tools/perf/bench/futex.h
new file mode 100644
index 00000000000..71f2844cf97
--- /dev/null
+++ b/tools/perf/bench/futex.h
@@ -0,0 +1,71 @@
+/*
+ * Glibc independent futex library for testing kernel functionality.
+ * Shamelessly stolen from Darren Hart <dvhltc@us.ibm.com>
+ * http://git.kernel.org/cgit/linux/kernel/git/dvhart/futextest.git/
+ */
+
+#ifndef _FUTEX_H
+#define _FUTEX_H
+
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <linux/futex.h>
+
+/**
+ * futex() - SYS_futex syscall wrapper
+ * @uaddr: address of first futex
+ * @op: futex op code
+ * @val: typically expected value of uaddr, but varies by op
+ * @timeout: typically an absolute struct timespec (except where noted
+ * otherwise). Overloaded by some ops
+ * @uaddr2: address of second futex for some ops\
+ * @val3: varies by op
+ * @opflags: flags to be bitwise OR'd with op, such as FUTEX_PRIVATE_FLAG
+ *
+ * futex() is used by all the following futex op wrappers. It can also be
+ * used for misuse and abuse testing. Generally, the specific op wrappers
+ * should be used instead. It is a macro instead of an static inline function as
+ * some of the types over overloaded (timeout is used for nr_requeue for
+ * example).
+ *
+ * These argument descriptions are the defaults for all
+ * like-named arguments in the following wrappers except where noted below.
+ */
+#define futex(uaddr, op, val, timeout, uaddr2, val3, opflags) \
+ syscall(SYS_futex, uaddr, op | opflags, val, timeout, uaddr2, val3)
+
+/**
+ * futex_wait() - block on uaddr with optional timeout
+ * @timeout: relative timeout
+ */
+static inline int
+futex_wait(u_int32_t *uaddr, u_int32_t val, struct timespec *timeout, int opflags)
+{
+ return futex(uaddr, FUTEX_WAIT, val, timeout, NULL, 0, opflags);
+}
+
+/**
+ * futex_wake() - wake one or more tasks blocked on uaddr
+ * @nr_wake: wake up to this many tasks
+ */
+static inline int
+futex_wake(u_int32_t *uaddr, int nr_wake, int opflags)
+{
+ return futex(uaddr, FUTEX_WAKE, nr_wake, NULL, NULL, 0, opflags);
+}
+
+/**
+* futex_cmp_requeue() - requeue tasks from uaddr to uaddr2
+* @nr_wake: wake up to this many tasks
+* @nr_requeue: requeue up to this many tasks
+*/
+static inline int
+futex_cmp_requeue(u_int32_t *uaddr, u_int32_t val, u_int32_t *uaddr2, int nr_wake,
+ int nr_requeue, int opflags)
+{
+ return futex(uaddr, FUTEX_CMP_REQUEUE, nr_wake, nr_requeue, uaddr2,
+ val, opflags);
+}
+
+#endif /* _FUTEX_H */
diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c
index d4c83c60b9b..ebfa163b80b 100644
--- a/tools/perf/bench/numa.c
+++ b/tools/perf/bench/numa.c
@@ -1593,6 +1593,11 @@ static void init_params(struct params *p, const char *name, int argc, const char
p->data_rand_walk = true;
p->nr_loops = -1;
p->init_random = true;
+ p->mb_global_str = "1";
+ p->nr_proc = 1;
+ p->nr_threads = 1;
+ p->nr_secs = 5;
+ p->run_all = argc == 1;
}
static int run_bench_numa(const char *name, const char **argv)
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 0da603b79b6..1ec429fef2b 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -46,7 +46,7 @@ struct perf_annotate {
};
static int perf_evsel__add_sample(struct perf_evsel *evsel,
- struct perf_sample *sample,
+ struct perf_sample *sample __maybe_unused,
struct addr_location *al,
struct perf_annotate *ann)
{
@@ -65,13 +65,13 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel,
return 0;
}
- he = __hists__add_entry(&evsel->hists, al, NULL, NULL, NULL, 1, 1, 0);
+ he = __hists__add_entry(&evsel->hists, al, NULL, NULL, NULL, 1, 1, 0,
+ true);
if (he == NULL)
return -ENOMEM;
ret = hist_entry__inc_addr_samples(he, evsel->idx, al->addr);
- evsel->hists.stats.total_period += sample->period;
- hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
+ hists__inc_nr_samples(&evsel->hists, true);
return ret;
}
diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
index e47f90cc7b9..1e6e7771054 100644
--- a/tools/perf/builtin-bench.c
+++ b/tools/perf/builtin-bench.c
@@ -12,6 +12,7 @@
* sched ... scheduler and IPC performance
* mem ... memory access performance
* numa ... NUMA scheduling and MM performance
+ * futex ... Futex performance
*/
#include "perf.h"
#include "util/util.h"
@@ -54,6 +55,14 @@ static struct bench mem_benchmarks[] = {
{ NULL, NULL, NULL }
};
+static struct bench futex_benchmarks[] = {
+ { "hash", "Benchmark for futex hash table", bench_futex_hash },
+ { "wake", "Benchmark for futex wake calls", bench_futex_wake },
+ { "requeue", "Benchmark for futex requeue calls", bench_futex_requeue },
+ { "all", "Test all futex benchmarks", NULL },
+ { NULL, NULL, NULL }
+};
+
struct collection {
const char *name;
const char *summary;
@@ -61,11 +70,12 @@ struct collection {
};
static struct collection collections[] = {
- { "sched", "Scheduler and IPC benchmarks", sched_benchmarks },
+ { "sched", "Scheduler and IPC benchmarks", sched_benchmarks },
{ "mem", "Memory access benchmarks", mem_benchmarks },
#ifdef HAVE_LIBNUMA_SUPPORT
{ "numa", "NUMA scheduling and MM benchmarks", numa_benchmarks },
#endif
+ {"futex", "Futex stressing benchmarks", futex_benchmarks },
{ "all", "All benchmarks", NULL },
{ NULL, NULL, NULL }
};
@@ -76,7 +86,7 @@ static struct collection collections[] = {
/* Iterate over all benchmarks within a collection: */
#define for_each_bench(coll, bench) \
- for (bench = coll->benchmarks; bench->name; bench++)
+ for (bench = coll->benchmarks; bench && bench->name; bench++)
static void dump_benchmarks(struct collection *coll)
{
diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c
index cfede86161d..b22dbb16f87 100644
--- a/tools/perf/builtin-buildid-cache.c
+++ b/tools/perf/builtin-buildid-cache.c
@@ -63,11 +63,35 @@ static int build_id_cache__kcore_dir(char *dir, size_t sz)
return 0;
}
+static bool same_kallsyms_reloc(const char *from_dir, char *to_dir)
+{
+ char from[PATH_MAX];
+ char to[PATH_MAX];
+ const char *name;
+ u64 addr1 = 0, addr2 = 0;
+ int i;
+
+ scnprintf(from, sizeof(from), "%s/kallsyms", from_dir);
+ scnprintf(to, sizeof(to), "%s/kallsyms", to_dir);
+
+ for (i = 0; (name = ref_reloc_sym_names[i]) != NULL; i++) {
+ addr1 = kallsyms__get_function_start(from, name);
+ if (addr1)
+ break;
+ }
+
+ if (name)
+ addr2 = kallsyms__get_function_start(to, name);
+
+ return addr1 == addr2;
+}
+
static int build_id_cache__kcore_existing(const char *from_dir, char *to_dir,
size_t to_dir_sz)
{
char from[PATH_MAX];
char to[PATH_MAX];
+ char to_subdir[PATH_MAX];
struct dirent *dent;
int ret = -1;
DIR *d;
@@ -86,10 +110,11 @@ static int build_id_cache__kcore_existing(const char *from_dir, char *to_dir,
continue;
scnprintf(to, sizeof(to), "%s/%s/modules", to_dir,
dent->d_name);
- if (!compare_proc_modules(from, to)) {
- scnprintf(to, sizeof(to), "%s/%s", to_dir,
- dent->d_name);
- strlcpy(to_dir, to, to_dir_sz);
+ scnprintf(to_subdir, sizeof(to_subdir), "%s/%s",
+ to_dir, dent->d_name);
+ if (!compare_proc_modules(from, to) &&
+ same_kallsyms_reloc(from_dir, to_subdir)) {
+ strlcpy(to_dir, to_subdir, to_dir_sz);
ret = 0;
break;
}
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index a77e31246c0..9a5a035cb42 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -60,7 +60,6 @@ static int data__files_cnt;
#define data__for_each_file(i, d) data__for_each_file_start(i, d, 0)
#define data__for_each_file_new(i, d) data__for_each_file_start(i, d, 1)
-static char diff__default_sort_order[] = "dso,symbol";
static bool force;
static bool show_period;
static bool show_formula;
@@ -220,7 +219,8 @@ static int setup_compute(const struct option *opt, const char *str,
static double period_percent(struct hist_entry *he, u64 period)
{
- u64 total = he->hists->stats.total_period;
+ u64 total = hists__total_period(he->hists);
+
return (period * 100.0) / total;
}
@@ -259,11 +259,18 @@ static s64 compute_wdiff(struct hist_entry *he, struct hist_entry *pair)
static int formula_delta(struct hist_entry *he, struct hist_entry *pair,
char *buf, size_t size)
{
+ u64 he_total = he->hists->stats.total_period;
+ u64 pair_total = pair->hists->stats.total_period;
+
+ if (symbol_conf.filter_relative) {
+ he_total = he->hists->stats.total_non_filtered_period;
+ pair_total = pair->hists->stats.total_non_filtered_period;
+ }
return scnprintf(buf, size,
"(%" PRIu64 " * 100 / %" PRIu64 ") - "
"(%" PRIu64 " * 100 / %" PRIu64 ")",
- pair->stat.period, pair->hists->stats.total_period,
- he->stat.period, he->hists->stats.total_period);
+ pair->stat.period, pair_total,
+ he->stat.period, he_total);
}
static int formula_ratio(struct hist_entry *he, struct hist_entry *pair,
@@ -308,7 +315,7 @@ static int hists__add_entry(struct hists *hists,
u64 weight, u64 transaction)
{
if (__hists__add_entry(hists, al, NULL, NULL, NULL, period, weight,
- transaction) != NULL)
+ transaction, true) != NULL)
return 0;
return -ENOMEM;
}
@@ -327,16 +334,22 @@ static int diff__process_sample_event(struct perf_tool *tool __maybe_unused,
return -1;
}
- if (al.filtered)
- return 0;
-
if (hists__add_entry(&evsel->hists, &al, sample->period,
sample->weight, sample->transaction)) {
pr_warning("problem incrementing symbol period, skipping event\n");
return -1;
}
+ /*
+ * The total_period is updated here before going to the output
+ * tree since normally only the baseline hists will call
+ * hists__output_resort() and precompute needs the total
+ * period in order to sort entries by percentage delta.
+ */
evsel->hists.stats.total_period += sample->period;
+ if (!al.filtered)
+ evsel->hists.stats.total_non_filtered_period += sample->period;
+
return 0;
}
@@ -564,8 +577,7 @@ static void hists__compute_resort(struct hists *hists)
hists->entries = RB_ROOT;
next = rb_first(root);
- hists->nr_entries = 0;
- hists->stats.total_period = 0;
+ hists__reset_stats(hists);
hists__reset_col_len(hists);
while (next != NULL) {
@@ -575,7 +587,10 @@ static void hists__compute_resort(struct hists *hists)
next = rb_next(&he->rb_node_in);
insert_hist_entry_by_compute(&hists->entries, he, compute);
- hists__inc_nr_entries(hists, he);
+ hists__inc_stats(hists, he);
+
+ if (!he->filtered)
+ hists__calc_col_len(hists, he);
}
}
@@ -725,20 +740,24 @@ static const struct option options[] = {
OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
"only consider these symbols"),
OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
- "sort by key(s): pid, comm, dso, symbol, parent"),
+ "sort by key(s): pid, comm, dso, symbol, parent, cpu, srcline, ..."
+ " Please refer the man page for the complete list."),
OPT_STRING('t', "field-separator", &symbol_conf.field_sep, "separator",
"separator for columns, no spaces will be added between "
"columns '.' is reserved."),
OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",
"Look for files with symbols relative to this directory"),
OPT_UINTEGER('o', "order", &sort_compute, "Specify compute sorting."),
+ OPT_CALLBACK(0, "percentage", NULL, "relative|absolute",
+ "How to display percentage of filtered entries", parse_filter_percentage),
OPT_END()
};
static double baseline_percent(struct hist_entry *he)
{
- struct hists *hists = he->hists;
- return 100.0 * he->stat.period / hists->stats.total_period;
+ u64 total = hists__total_period(he->hists);
+
+ return 100.0 * he->stat.period / total;
}
static int hpp__color_baseline(struct perf_hpp_fmt *fmt,
@@ -952,8 +971,8 @@ static int hpp__entry_global(struct perf_hpp_fmt *_fmt, struct perf_hpp *hpp,
dfmt->header_width, buf);
}
-static int hpp__header(struct perf_hpp_fmt *fmt,
- struct perf_hpp *hpp)
+static int hpp__header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct perf_evsel *evsel __maybe_unused)
{
struct diff_hpp_fmt *dfmt =
container_of(fmt, struct diff_hpp_fmt, fmt);
@@ -963,7 +982,8 @@ static int hpp__header(struct perf_hpp_fmt *fmt,
}
static int hpp__width(struct perf_hpp_fmt *fmt,
- struct perf_hpp *hpp __maybe_unused)
+ struct perf_hpp *hpp __maybe_unused,
+ struct perf_evsel *evsel __maybe_unused)
{
struct diff_hpp_fmt *dfmt =
container_of(fmt, struct diff_hpp_fmt, fmt);
@@ -1119,7 +1139,8 @@ static int data_init(int argc, const char **argv)
int cmd_diff(int argc, const char **argv, const char *prefix __maybe_unused)
{
- sort_order = diff__default_sort_order;
+ perf_config(perf_default_config, NULL);
+
argc = parse_options(argc, argv, options, diff_usage, 0);
if (symbol__init() < 0)
@@ -1130,6 +1151,8 @@ int cmd_diff(int argc, const char **argv, const char *prefix __maybe_unused)
ui_init();
+ sort__mode = SORT_MODE__DIFF;
+
if (setup_sorting() < 0)
usage_with_options(diff_usage, options);
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index b3466018bbd..16c7c11ad06 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -72,7 +72,7 @@ static int perf_event__repipe_attr(struct perf_tool *tool,
if (ret)
return ret;
- if (&inject->output.is_pipe)
+ if (!inject->output.is_pipe)
return 0;
return perf_event__repipe_synth(tool, event);
@@ -209,7 +209,7 @@ static int perf_event__inject_buildid(struct perf_tool *tool,
cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
- thread = machine__findnew_thread(machine, sample->pid, sample->pid);
+ thread = machine__findnew_thread(machine, sample->pid, sample->tid);
if (thread == NULL) {
pr_err("problem processing %d event, skipping it.\n",
event->header.type);
@@ -312,7 +312,6 @@ found:
sample_sw.period = sample->period;
sample_sw.time = sample->time;
perf_event__synthesize_sample(event_sw, evsel->attr.sample_type,
- evsel->attr.sample_regs_user,
evsel->attr.read_format, &sample_sw,
false);
build_id__mark_dso_hit(tool, event_sw, &sample_sw, evsel, machine);
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 929462aa494..bef3376bfaf 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -14,6 +14,7 @@
#include "util/parse-options.h"
#include "util/trace-event.h"
#include "util/data.h"
+#include "util/cpumap.h"
#include "util/debug.h"
@@ -31,9 +32,6 @@ static int caller_lines = -1;
static bool raw_ip;
-static int *cpunode_map;
-static int max_cpu_num;
-
struct alloc_stat {
u64 call_site;
u64 ptr;
@@ -55,76 +53,6 @@ static struct rb_root root_caller_sorted;
static unsigned long total_requested, total_allocated;
static unsigned long nr_allocs, nr_cross_allocs;
-#define PATH_SYS_NODE "/sys/devices/system/node"
-
-static int init_cpunode_map(void)
-{
- FILE *fp;
- int i, err = -1;
-
- fp = fopen("/sys/devices/system/cpu/kernel_max", "r");
- if (!fp) {
- max_cpu_num = 4096;
- return 0;
- }
-
- if (fscanf(fp, "%d", &max_cpu_num) < 1) {
- pr_err("Failed to read 'kernel_max' from sysfs");
- goto out_close;
- }
-
- max_cpu_num++;
-
- cpunode_map = calloc(max_cpu_num, sizeof(int));
- if (!cpunode_map) {
- pr_err("%s: calloc failed\n", __func__);
- goto out_close;
- }
-
- for (i = 0; i < max_cpu_num; i++)
- cpunode_map[i] = -1;
-
- err = 0;
-out_close:
- fclose(fp);
- return err;
-}
-
-static int setup_cpunode_map(void)
-{
- struct dirent *dent1, *dent2;
- DIR *dir1, *dir2;
- unsigned int cpu, mem;
- char buf[PATH_MAX];
-
- if (init_cpunode_map())
- return -1;
-
- dir1 = opendir(PATH_SYS_NODE);
- if (!dir1)
- return 0;
-
- while ((dent1 = readdir(dir1)) != NULL) {
- if (dent1->d_type != DT_DIR ||
- sscanf(dent1->d_name, "node%u", &mem) < 1)
- continue;
-
- snprintf(buf, PATH_MAX, "%s/%s", PATH_SYS_NODE, dent1->d_name);
- dir2 = opendir(buf);
- if (!dir2)
- continue;
- while ((dent2 = readdir(dir2)) != NULL) {
- if (dent2->d_type != DT_LNK ||
- sscanf(dent2->d_name, "cpu%u", &cpu) < 1)
- continue;
- cpunode_map[cpu] = mem;
- }
- closedir(dir2);
- }
- closedir(dir1);
- return 0;
-}
-
static int insert_alloc_stat(unsigned long call_site, unsigned long ptr,
int bytes_req, int bytes_alloc, int cpu)
{
@@ -235,7 +163,7 @@ static int perf_evsel__process_alloc_node_event(struct perf_evsel *evsel,
int ret = perf_evsel__process_alloc_event(evsel, sample);
if (!ret) {
- int node1 = cpunode_map[sample->cpu],
+ int node1 = cpu__get_node(sample->cpu),
node2 = perf_evsel__intval(evsel, sample, "node");
if (node1 != node2)
@@ -307,7 +235,7 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
struct machine *machine)
{
struct thread *thread = machine__findnew_thread(machine, sample->pid,
- sample->pid);
+ sample->tid);
if (thread == NULL) {
pr_debug("problem processing %d event, skipping it.\n",
@@ -756,11 +684,13 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_BOOLEAN(0, "raw-ip", &raw_ip, "show raw ip instead of symbol"),
OPT_END()
};
- const char * const kmem_usage[] = {
- "perf kmem [<options>] {record|stat}",
+ const char *const kmem_subcommands[] = { "record", "stat", NULL };
+ const char *kmem_usage[] = {
+ NULL,
NULL
};
- argc = parse_options(argc, argv, kmem_options, kmem_usage, 0);
+ argc = parse_options_subcommand(argc, argv, kmem_options,
+ kmem_subcommands, kmem_usage, 0);
if (!argc)
usage_with_options(kmem_usage, kmem_options);
@@ -770,7 +700,7 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused)
if (!strncmp(argv[0], "rec", 3)) {
return __cmd_record(argc, argv);
} else if (!strcmp(argv[0], "stat")) {
- if (setup_cpunode_map())
+ if (cpu__setup_cpunode_map())
return -1;
if (list_empty(&caller_sort))
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index a7350519c63..0f1e5a2f6ad 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -404,6 +404,7 @@ static struct kvm_event *kvm_alloc_init_event(struct event_key *key)
}
event->key = *key;
+ init_stats(&event->total.stats);
return event;
}
@@ -1691,17 +1692,15 @@ int cmd_kvm(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_END()
};
-
- const char * const kvm_usage[] = {
- "perf kvm [<options>] {top|record|report|diff|buildid-list|stat}",
- NULL
- };
+ const char *const kvm_subcommands[] = { "top", "record", "report", "diff",
+ "buildid-list", "stat", NULL };
+ const char *kvm_usage[] = { NULL, NULL };
perf_host = 0;
perf_guest = 1;
- argc = parse_options(argc, argv, kvm_options, kvm_usage,
- PARSE_OPT_STOP_AT_NON_OPTION);
+ argc = parse_options_subcommand(argc, argv, kvm_options, kvm_subcommands, kvm_usage,
+ PARSE_OPT_STOP_AT_NON_OPTION);
if (!argc)
usage_with_options(kvm_usage, kvm_options);
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index c852c7a85d3..6148afc995c 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -961,8 +961,10 @@ int cmd_lock(int argc, const char **argv, const char *prefix __maybe_unused)
"perf lock info [<options>]",
NULL
};
- const char * const lock_usage[] = {
- "perf lock [<options>] {record|report|script|info}",
+ const char *const lock_subcommands[] = { "record", "report", "script",
+ "info", NULL };
+ const char *lock_usage[] = {
+ NULL,
NULL
};
const char * const report_usage[] = {
@@ -976,8 +978,8 @@ int cmd_lock(int argc, const char **argv, const char *prefix __maybe_unused)
for (i = 0; i < LOCKHASH_SIZE; i++)
INIT_LIST_HEAD(lockhash_table + i);
- argc = parse_options(argc, argv, lock_options, lock_usage,
- PARSE_OPT_STOP_AT_NON_OPTION);
+ argc = parse_options_subcommand(argc, argv, lock_options, lock_subcommands,
+ lock_usage, PARSE_OPT_STOP_AT_NON_OPTION);
if (!argc)
usage_with_options(lock_usage, lock_options);
diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c
index 2e3ade69a58..4a1a6c94a5e 100644
--- a/tools/perf/builtin-mem.c
+++ b/tools/perf/builtin-mem.c
@@ -21,11 +21,6 @@ struct perf_mem {
DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
};
-static const char * const mem_usage[] = {
- "perf mem [<options>] {record <command> |report}",
- NULL
-};
-
static int __cmd_record(int argc, const char **argv)
{
int rec_argc, i = 0, j;
@@ -220,9 +215,15 @@ int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused)
" between columns '.' is reserved."),
OPT_END()
};
+ const char *const mem_subcommands[] = { "record", "report", NULL };
+ const char *mem_usage[] = {
+ NULL,
+ NULL
+ };
+
- argc = parse_options(argc, argv, mem_options, mem_usage,
- PARSE_OPT_STOP_AT_NON_OPTION);
+ argc = parse_options_subcommand(argc, argv, mem_options, mem_subcommands,
+ mem_usage, PARSE_OPT_STOP_AT_NON_OPTION);
if (!argc || !(strncmp(argv[0], "rec", 3) || mem_operation))
usage_with_options(mem_usage, mem_options);
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index 78948882e3d..c63fa292507 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -268,9 +268,9 @@ static int opt_set_filter(const struct option *opt __maybe_unused,
return 0;
}
-static void init_params(void)
+static int init_params(void)
{
- line_range__init(&params.line_range);
+ return line_range__init(&params.line_range);
}
static void cleanup_params(void)
@@ -288,6 +288,13 @@ static void cleanup_params(void)
memset(&params, 0, sizeof(params));
}
+static void pr_err_with_code(const char *msg, int err)
+{
+ pr_err("%s", msg);
+ pr_debug(" Reason: %s (Code: %d)", strerror(-err), err);
+ pr_err("\n");
+}
+
static int
__cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
{
@@ -379,7 +386,7 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
}
ret = parse_probe_event_argv(argc, argv);
if (ret < 0) {
- pr_err(" Error: Parse Error. (%d)\n", ret);
+ pr_err_with_code(" Error: Command Parse Error.", ret);
return ret;
}
}
@@ -419,8 +426,7 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
}
ret = show_perf_probe_events();
if (ret < 0)
- pr_err(" Error: Failed to show event list. (%d)\n",
- ret);
+ pr_err_with_code(" Error: Failed to show event list.", ret);
return ret;
}
if (params.show_funcs) {
@@ -445,8 +451,7 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
strfilter__delete(params.filter);
params.filter = NULL;
if (ret < 0)
- pr_err(" Error: Failed to show functions."
- " (%d)\n", ret);
+ pr_err_with_code(" Error: Failed to show functions.", ret);
return ret;
}
@@ -464,7 +469,7 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
ret = show_line_range(&params.line_range, params.target);
if (ret < 0)
- pr_err(" Error: Failed to show lines. (%d)\n", ret);
+ pr_err_with_code(" Error: Failed to show lines.", ret);
return ret;
}
if (params.show_vars) {
@@ -485,7 +490,7 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
strfilter__delete(params.filter);
params.filter = NULL;
if (ret < 0)
- pr_err(" Error: Failed to show vars. (%d)\n", ret);
+ pr_err_with_code(" Error: Failed to show vars.", ret);
return ret;
}
#endif
@@ -493,7 +498,7 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
if (params.dellist) {
ret = del_perf_probe_events(params.dellist);
if (ret < 0) {
- pr_err(" Error: Failed to delete events. (%d)\n", ret);
+ pr_err_with_code(" Error: Failed to delete events.", ret);
return ret;
}
}
@@ -504,7 +509,7 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
params.target,
params.force_add);
if (ret < 0) {
- pr_err(" Error: Failed to add events. (%d)\n", ret);
+ pr_err_with_code(" Error: Failed to add events.", ret);
return ret;
}
}
@@ -515,9 +520,11 @@ int cmd_probe(int argc, const char **argv, const char *prefix)
{
int ret;
- init_params();
- ret = __cmd_probe(argc, argv, prefix);
- cleanup_params();
+ ret = init_params();
+ if (!ret) {
+ ret = __cmd_probe(argc, argv, prefix);
+ cleanup_params();
+ }
return ret;
}
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 3c394bf16fa..378b85b731a 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -30,37 +30,6 @@
#include <sched.h>
#include <sys/mman.h>
-#ifndef HAVE_ON_EXIT_SUPPORT
-#ifndef ATEXIT_MAX
-#define ATEXIT_MAX 32
-#endif
-static int __on_exit_count = 0;
-typedef void (*on_exit_func_t) (int, void *);
-static on_exit_func_t __on_exit_funcs[ATEXIT_MAX];
-static void *__on_exit_args[ATEXIT_MAX];
-static int __exitcode = 0;
-static void __handle_on_exit_funcs(void);
-static int on_exit(on_exit_func_t function, void *arg);
-#define exit(x) (exit)(__exitcode = (x))
-
-static int on_exit(on_exit_func_t function, void *arg)
-{
- if (__on_exit_count == ATEXIT_MAX)
- return -ENOMEM;
- else if (__on_exit_count == 0)
- atexit(__handle_on_exit_funcs);
- __on_exit_funcs[__on_exit_count] = function;
- __on_exit_args[__on_exit_count++] = arg;
- return 0;
-}
-
-static void __handle_on_exit_funcs(void)
-{
- int i;
- for (i = 0; i < __on_exit_count; i++)
- __on_exit_funcs[i] (__exitcode, __on_exit_args[i]);
-}
-#endif
struct record {
struct perf_tool tool;
@@ -147,29 +116,19 @@ static void sig_handler(int sig)
{
if (sig == SIGCHLD)
child_finished = 1;
+ else
+ signr = sig;
done = 1;
- signr = sig;
}
-static void record__sig_exit(int exit_status __maybe_unused, void *arg)
+static void record__sig_exit(void)
{
- struct record *rec = arg;
- int status;
-
- if (rec->evlist->workload.pid > 0) {
- if (!child_finished)
- kill(rec->evlist->workload.pid, SIGTERM);
-
- wait(&status);
- if (WIFSIGNALED(status))
- psignal(WTERMSIG(status), rec->progname);
- }
-
- if (signr == -1 || signr == SIGUSR1)
+ if (signr == -1)
return;
signal(signr, SIG_DFL);
+ raise(signr);
}
static int record__open(struct record *rec)
@@ -243,27 +202,6 @@ static int process_buildids(struct record *rec)
size, &build_id__mark_dso_hit_ops);
}
-static void record__exit(int status, void *arg)
-{
- struct record *rec = arg;
- struct perf_data_file *file = &rec->file;
-
- if (status != 0)
- return;
-
- if (!file->is_pipe) {
- rec->session->header.data_size += rec->bytes_written;
-
- if (!rec->no_buildid)
- process_buildids(rec);
- perf_session__write_header(rec->session, rec->evlist,
- file->fd, true);
- perf_session__delete(rec->session);
- perf_evlist__delete(rec->evlist);
- symbol__exit();
- }
-}
-
static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
{
int err;
@@ -287,10 +225,7 @@ static void perf_event__synthesize_guest_os(struct machine *machine, void *data)
* have no _text sometimes.
*/
err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
- machine, "_text");
- if (err < 0)
- err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
- machine, "_stext");
+ machine);
if (err < 0)
pr_err("Couldn't record guest kernel [%d]'s reference"
" relocation symbol.\n", machine->pid);
@@ -347,18 +282,19 @@ static volatile int workload_exec_errno;
* if the fork fails, since we asked by setting its
* want_signal to true.
*/
-static void workload_exec_failed_signal(int signo, siginfo_t *info,
+static void workload_exec_failed_signal(int signo __maybe_unused,
+ siginfo_t *info,
void *ucontext __maybe_unused)
{
workload_exec_errno = info->si_value.sival_int;
done = 1;
- signr = signo;
child_finished = 1;
}
static int __cmd_record(struct record *rec, int argc, const char **argv)
{
int err;
+ int status = 0;
unsigned long waking = 0;
const bool forks = argc > 0;
struct machine *machine;
@@ -370,14 +306,14 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
rec->progname = argv[0];
- on_exit(record__sig_exit, rec);
+ atexit(record__sig_exit);
signal(SIGCHLD, sig_handler);
signal(SIGINT, sig_handler);
signal(SIGTERM, sig_handler);
session = perf_session__new(file, false, NULL);
if (session == NULL) {
- pr_err("Not enough memory for reading perf file header\n");
+ pr_err("Perf session creation failed.\n");
return -1;
}
@@ -391,32 +327,28 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
workload_exec_failed_signal);
if (err < 0) {
pr_err("Couldn't run the workload!\n");
+ status = err;
goto out_delete_session;
}
}
if (record__open(rec) != 0) {
err = -1;
- goto out_delete_session;
+ goto out_child;
}
if (!rec->evlist->nr_groups)
perf_header__clear_feat(&session->header, HEADER_GROUP_DESC);
- /*
- * perf_session__delete(session) will be called at record__exit()
- */
- on_exit(record__exit, rec);
-
if (file->is_pipe) {
err = perf_header__write_pipe(file->fd);
if (err < 0)
- goto out_delete_session;
+ goto out_child;
} else {
err = perf_session__write_header(session, rec->evlist,
file->fd, false);
if (err < 0)
- goto out_delete_session;
+ goto out_child;
}
if (!rec->no_buildid
@@ -424,7 +356,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
pr_err("Couldn't generate buildids. "
"Use --no-buildid to profile anyway.\n");
err = -1;
- goto out_delete_session;
+ goto out_child;
}
machine = &session->machines.host;
@@ -434,7 +366,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
process_synthesized_event);
if (err < 0) {
pr_err("Couldn't synthesize attrs.\n");
- goto out_delete_session;
+ goto out_child;
}
if (have_tracepoints(&rec->evlist->entries)) {
@@ -450,17 +382,14 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
process_synthesized_event);
if (err <= 0) {
pr_err("Couldn't record tracing data.\n");
- goto out_delete_session;
+ goto out_child;
}
rec->bytes_written += err;
}
}
err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
- machine, "_text");
- if (err < 0)
- err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
- machine, "_stext");
+ machine);
if (err < 0)
pr_err("Couldn't record kernel reference relocation symbol\n"
"Symbol resolution may be skewed if relocation was used (e.g. kexec).\n"
@@ -481,7 +410,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
process_synthesized_event, opts->sample_address);
if (err != 0)
- goto out_delete_session;
+ goto out_child;
if (rec->realtime_prio) {
struct sched_param param;
@@ -490,7 +419,7 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
if (sched_setscheduler(0, SCHED_FIFO, &param)) {
pr_err("Could not set realtime priority.\n");
err = -1;
- goto out_delete_session;
+ goto out_child;
}
}
@@ -518,13 +447,19 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
if (record__mmap_read_all(rec) < 0) {
err = -1;
- goto out_delete_session;
+ goto out_child;
}
if (hits == rec->samples) {
if (done)
break;
err = poll(rec->evlist->pollfd, rec->evlist->nr_fds, -1);
+ /*
+ * Propagate error, only if there's any. Ignore positive
+ * number of returned events and interrupt error.
+ */
+ if (err > 0 || (err < 0 && errno == EINTR))
+ err = 0;
waking++;
}
@@ -544,28 +479,52 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
const char *emsg = strerror_r(workload_exec_errno, msg, sizeof(msg));
pr_err("Workload failed: %s\n", emsg);
err = -1;
- goto out_delete_session;
+ goto out_child;
}
- if (quiet || signr == SIGUSR1)
- return 0;
+ if (!quiet) {
+ fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
+
+ /*
+ * Approximate RIP event size: 24 bytes.
+ */
+ fprintf(stderr,
+ "[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
+ (double)rec->bytes_written / 1024.0 / 1024.0,
+ file->path,
+ rec->bytes_written / 24);
+ }
- fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking);
+out_child:
+ if (forks) {
+ int exit_status;
- /*
- * Approximate RIP event size: 24 bytes.
- */
- fprintf(stderr,
- "[ perf record: Captured and wrote %.3f MB %s (~%" PRIu64 " samples) ]\n",
- (double)rec->bytes_written / 1024.0 / 1024.0,
- file->path,
- rec->bytes_written / 24);
+ if (!child_finished)
+ kill(rec->evlist->workload.pid, SIGTERM);
- return 0;
+ wait(&exit_status);
+
+ if (err < 0)
+ status = err;
+ else if (WIFEXITED(exit_status))
+ status = WEXITSTATUS(exit_status);
+ else if (WIFSIGNALED(exit_status))
+ signr = WTERMSIG(exit_status);
+ } else
+ status = err;
+
+ if (!err && !file->is_pipe) {
+ rec->session->header.data_size += rec->bytes_written;
+
+ if (!rec->no_buildid)
+ process_buildids(rec);
+ perf_session__write_header(rec->session, rec->evlist,
+ file->fd, true);
+ }
out_delete_session:
perf_session__delete(session);
- return err;
+ return status;
}
#define BRANCH_OPT(n, m) \
@@ -589,6 +548,7 @@ static const struct branch_mode branch_modes[] = {
BRANCH_OPT("abort_tx", PERF_SAMPLE_BRANCH_ABORT_TX),
BRANCH_OPT("in_tx", PERF_SAMPLE_BRANCH_IN_TX),
BRANCH_OPT("no_tx", PERF_SAMPLE_BRANCH_NO_TX),
+ BRANCH_OPT("cond", PERF_SAMPLE_BRANCH_COND),
BRANCH_END
};
@@ -655,7 +615,7 @@ error:
return ret;
}
-#ifdef HAVE_LIBUNWIND_SUPPORT
+#ifdef HAVE_DWARF_UNWIND_SUPPORT
static int get_stack_size(char *str, unsigned long *_size)
{
char *endptr;
@@ -681,7 +641,7 @@ static int get_stack_size(char *str, unsigned long *_size)
max_size, str);
return -1;
}
-#endif /* HAVE_LIBUNWIND_SUPPORT */
+#endif /* HAVE_DWARF_UNWIND_SUPPORT */
int record_parse_callchain(const char *arg, struct record_opts *opts)
{
@@ -710,7 +670,7 @@ int record_parse_callchain(const char *arg, struct record_opts *opts)
"needed for -g fp\n");
break;
-#ifdef HAVE_LIBUNWIND_SUPPORT
+#ifdef HAVE_DWARF_UNWIND_SUPPORT
/* Dwarf style */
} else if (!strncmp(name, "dwarf", sizeof("dwarf"))) {
const unsigned long default_stack_dump_size = 8192;
@@ -726,7 +686,7 @@ int record_parse_callchain(const char *arg, struct record_opts *opts)
ret = get_stack_size(tok, &size);
opts->stack_dump_size = size;
}
-#endif /* HAVE_LIBUNWIND_SUPPORT */
+#endif /* HAVE_DWARF_UNWIND_SUPPORT */
} else {
pr_err("callchain: Unknown --call-graph option "
"value: %s\n", arg);
@@ -741,7 +701,9 @@ int record_parse_callchain(const char *arg, struct record_opts *opts)
static void callchain_debug(struct record_opts *opts)
{
- pr_debug("callchain: type %d\n", opts->call_graph);
+ static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF" };
+
+ pr_debug("callchain: type %s\n", str[opts->call_graph]);
if (opts->call_graph == CALLCHAIN_DWARF)
pr_debug("callchain: stack dump size %d\n",
@@ -755,6 +717,8 @@ int record_parse_callchain_opt(const struct option *opt,
struct record_opts *opts = opt->value;
int ret;
+ opts->call_graph_enabled = !unset;
+
/* --no-call-graph */
if (unset) {
opts->call_graph = CALLCHAIN_NONE;
@@ -775,6 +739,8 @@ int record_callchain_opt(const struct option *opt,
{
struct record_opts *opts = opt->value;
+ opts->call_graph_enabled = !unset;
+
if (opts->call_graph == CALLCHAIN_NONE)
opts->call_graph = CALLCHAIN_FP;
@@ -782,6 +748,16 @@ int record_callchain_opt(const struct option *opt,
return 0;
}
+static int perf_record_config(const char *var, const char *value, void *cb)
+{
+ struct record *rec = cb;
+
+ if (!strcmp(var, "record.call-graph"))
+ return record_parse_callchain(value, &rec->opts);
+
+ return perf_default_config(var, value, cb);
+}
+
static const char * const record_usage[] = {
"perf record [<options>] [<command>]",
"perf record [<options>] -- <command> [<options>]",
@@ -813,7 +789,7 @@ static struct record record = {
#define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace) recording: "
-#ifdef HAVE_LIBUNWIND_SUPPORT
+#ifdef HAVE_DWARF_UNWIND_SUPPORT
const char record_callchain_help[] = CALLCHAIN_HELP "fp dwarf";
#else
const char record_callchain_help[] = CALLCHAIN_HELP "fp";
@@ -913,6 +889,8 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
if (rec->evlist == NULL)
return -ENOMEM;
+ perf_config(perf_record_config, rec);
+
argc = parse_options(argc, argv, record_options, record_usage,
PARSE_OPT_STOP_AT_NON_OPTION);
if (!argc && target__none(&rec->opts.target))
@@ -976,6 +954,7 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
err = __cmd_record(&record, argc, argv);
out_symbol_exit:
+ perf_evlist__delete(rec->evlist);
symbol__exit();
return err;
}
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 3c53ec268fb..21d830bafff 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -57,6 +57,7 @@ struct report {
const char *cpu_list;
const char *symbol_filter_str;
float min_percent;
+ u64 nr_entries;
DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
};
@@ -71,148 +72,69 @@ static int report__config(const char *var, const char *value, void *cb)
rep->min_percent = strtof(value, NULL);
return 0;
}
+ if (!strcmp(var, "report.children")) {
+ symbol_conf.cumulate_callchain = perf_config_bool(var, value);
+ return 0;
+ }
return perf_default_config(var, value, cb);
}
-static int report__add_mem_hist_entry(struct perf_tool *tool, struct addr_location *al,
- struct perf_sample *sample, struct perf_evsel *evsel,
- union perf_event *event)
+static void report__inc_stats(struct report *rep, struct hist_entry *he)
{
- struct report *rep = container_of(tool, struct report, tool);
- struct symbol *parent = NULL;
- u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
- struct hist_entry *he;
- struct mem_info *mi, *mx;
- uint64_t cost;
- int err = sample__resolve_callchain(sample, &parent, evsel, al, rep->max_stack);
-
- if (err)
- return err;
-
- mi = machine__resolve_mem(al->machine, al->thread, sample, cpumode);
- if (!mi)
- return -ENOMEM;
-
- if (rep->hide_unresolved && !al->sym)
- return 0;
-
- cost = sample->weight;
- if (!cost)
- cost = 1;
-
/*
- * must pass period=weight in order to get the correct
- * sorting from hists__collapse_resort() which is solely
- * based on periods. We want sorting be done on nr_events * weight
- * and this is indirectly achieved by passing period=weight here
- * and the he_stat__add_period() function.
+ * The @he is either of a newly created one or an existing one
+ * merging current sample. We only want to count a new one so
+ * checking ->nr_events being 1.
*/
- he = __hists__add_entry(&evsel->hists, al, parent, NULL, mi,
- cost, cost, 0);
- if (!he)
- return -ENOMEM;
-
- err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr);
- if (err)
- goto out;
-
- mx = he->mem_info;
- err = addr_map_symbol__inc_samples(&mx->daddr, evsel->idx);
- if (err)
- goto out;
-
- evsel->hists.stats.total_period += cost;
- hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
- err = hist_entry__append_callchain(he, sample);
-out:
- return err;
+ if (he->stat.nr_events == 1)
+ rep->nr_entries++;
}
-static int report__add_branch_hist_entry(struct perf_tool *tool, struct addr_location *al,
- struct perf_sample *sample, struct perf_evsel *evsel)
+static int hist_iter__report_callback(struct hist_entry_iter *iter,
+ struct addr_location *al, bool single,
+ void *arg)
{
- struct report *rep = container_of(tool, struct report, tool);
- struct symbol *parent = NULL;
- unsigned i;
- struct hist_entry *he;
- struct branch_info *bi, *bx;
- int err = sample__resolve_callchain(sample, &parent, evsel, al, rep->max_stack);
-
- if (err)
- return err;
-
- bi = machine__resolve_bstack(al->machine, al->thread,
- sample->branch_stack);
- if (!bi)
- return -ENOMEM;
+ int err = 0;
+ struct report *rep = arg;
+ struct hist_entry *he = iter->he;
+ struct perf_evsel *evsel = iter->evsel;
+ struct mem_info *mi;
+ struct branch_info *bi;
- for (i = 0; i < sample->branch_stack->nr; i++) {
- if (rep->hide_unresolved && !(bi[i].from.sym && bi[i].to.sym))
- continue;
+ report__inc_stats(rep, he);
- err = -ENOMEM;
+ if (!ui__has_annotation())
+ return 0;
- /* overwrite the 'al' to branch-to info */
- al->map = bi[i].to.map;
- al->sym = bi[i].to.sym;
- al->addr = bi[i].to.addr;
- /*
- * The report shows the percentage of total branches captured
- * and not events sampled. Thus we use a pseudo period of 1.
- */
- he = __hists__add_entry(&evsel->hists, al, parent, &bi[i], NULL,
- 1, 1, 0);
- if (he) {
- bx = he->branch_info;
- err = addr_map_symbol__inc_samples(&bx->from, evsel->idx);
- if (err)
- goto out;
-
- err = addr_map_symbol__inc_samples(&bx->to, evsel->idx);
- if (err)
- goto out;
-
- evsel->hists.stats.total_period += 1;
- hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
- } else
+ if (sort__mode == SORT_MODE__BRANCH) {
+ bi = he->branch_info;
+ err = addr_map_symbol__inc_samples(&bi->from, evsel->idx);
+ if (err)
goto out;
- }
- err = 0;
-out:
- free(bi);
- return err;
-}
-static int report__add_hist_entry(struct perf_tool *tool, struct perf_evsel *evsel,
- struct addr_location *al, struct perf_sample *sample)
-{
- struct report *rep = container_of(tool, struct report, tool);
- struct symbol *parent = NULL;
- struct hist_entry *he;
- int err = sample__resolve_callchain(sample, &parent, evsel, al, rep->max_stack);
+ err = addr_map_symbol__inc_samples(&bi->to, evsel->idx);
- if (err)
- return err;
+ } else if (rep->mem_mode) {
+ mi = he->mem_info;
+ err = addr_map_symbol__inc_samples(&mi->daddr, evsel->idx);
+ if (err)
+ goto out;
- he = __hists__add_entry(&evsel->hists, al, parent, NULL, NULL,
- sample->period, sample->weight,
- sample->transaction);
- if (he == NULL)
- return -ENOMEM;
+ err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr);
- err = hist_entry__append_callchain(he, sample);
- if (err)
- goto out;
+ } else if (symbol_conf.cumulate_callchain) {
+ if (single)
+ err = hist_entry__inc_addr_samples(he, evsel->idx,
+ al->addr);
+ } else {
+ err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr);
+ }
- err = hist_entry__inc_addr_samples(he, evsel->idx, al->addr);
- evsel->hists.stats.total_period += sample->period;
- hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
out:
return err;
}
-
static int process_sample_event(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
@@ -221,6 +143,10 @@ static int process_sample_event(struct perf_tool *tool,
{
struct report *rep = container_of(tool, struct report, tool);
struct addr_location al;
+ struct hist_entry_iter iter = {
+ .hide_unresolved = rep->hide_unresolved,
+ .add_entry_cb = hist_iter__report_callback,
+ };
int ret;
if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) {
@@ -229,28 +155,29 @@ static int process_sample_event(struct perf_tool *tool,
return -1;
}
- if (al.filtered || (rep->hide_unresolved && al.sym == NULL))
+ if (rep->hide_unresolved && al.sym == NULL)
return 0;
if (rep->cpu_list && !test_bit(sample->cpu, rep->cpu_bitmap))
return 0;
- if (sort__mode == SORT_MODE__BRANCH) {
- ret = report__add_branch_hist_entry(tool, &al, sample, evsel);
- if (ret < 0)
- pr_debug("problem adding lbr entry, skipping event\n");
- } else if (rep->mem_mode == 1) {
- ret = report__add_mem_hist_entry(tool, &al, sample, evsel, event);
- if (ret < 0)
- pr_debug("problem adding mem entry, skipping event\n");
- } else {
- if (al.map != NULL)
- al.map->dso->hit = 1;
+ if (sort__mode == SORT_MODE__BRANCH)
+ iter.ops = &hist_iter_branch;
+ else if (rep->mem_mode)
+ iter.ops = &hist_iter_mem;
+ else if (symbol_conf.cumulate_callchain)
+ iter.ops = &hist_iter_cumulative;
+ else
+ iter.ops = &hist_iter_normal;
+
+ if (al.map != NULL)
+ al.map->dso->hit = 1;
+
+ ret = hist_entry_iter__add(&iter, &al, evsel, sample, rep->max_stack,
+ rep);
+ if (ret < 0)
+ pr_debug("problem adding hist entry, skipping event\n");
- ret = report__add_hist_entry(tool, evsel, &al, sample);
- if (ret < 0)
- pr_debug("problem incrementing symbol period, skipping event\n");
- }
return ret;
}
@@ -307,6 +234,14 @@ static int report__setup_sample_type(struct report *rep)
}
}
+ if (symbol_conf.cumulate_callchain) {
+ /* Silently ignore if callchain is missing */
+ if (!(sample_type & PERF_SAMPLE_CALLCHAIN)) {
+ symbol_conf.cumulate_callchain = false;
+ perf_hpp__cancel_cumulate();
+ }
+ }
+
if (sort__mode == SORT_MODE__BRANCH) {
if (!is_pipe &&
!(sample_type & PERF_SAMPLE_BRANCH_STACK)) {
@@ -335,6 +270,11 @@ static size_t hists__fprintf_nr_sample_events(struct hists *hists, struct report
char buf[512];
size_t size = sizeof(buf);
+ if (symbol_conf.filter_relative) {
+ nr_samples = hists->stats.nr_non_filtered_samples;
+ nr_events = hists->stats.total_non_filtered_period;
+ }
+
if (perf_evsel__is_group_event(evsel)) {
struct perf_evsel *pos;
@@ -342,8 +282,13 @@ static size_t hists__fprintf_nr_sample_events(struct hists *hists, struct report
evname = buf;
for_each_group_member(pos, evsel) {
- nr_samples += pos->hists.stats.nr_events[PERF_RECORD_SAMPLE];
- nr_events += pos->hists.stats.total_period;
+ if (symbol_conf.filter_relative) {
+ nr_samples += pos->hists.stats.nr_non_filtered_samples;
+ nr_events += pos->hists.stats.total_non_filtered_period;
+ } else {
+ nr_samples += pos->hists.stats.nr_events[PERF_RECORD_SAMPLE];
+ nr_events += pos->hists.stats.total_period;
+ }
}
}
@@ -468,24 +413,12 @@ static int report__browse_hists(struct report *rep)
return ret;
}
-static u64 report__collapse_hists(struct report *rep)
+static void report__collapse_hists(struct report *rep)
{
struct ui_progress prog;
struct perf_evsel *pos;
- u64 nr_samples = 0;
- /*
- * Count number of histogram entries to use when showing progress,
- * reusing nr_samples variable.
- */
- evlist__for_each(rep->session->evlist, pos)
- nr_samples += pos->hists.nr_entries;
- ui_progress__init(&prog, nr_samples, "Merging related events...");
- /*
- * Count total number of samples, will be used to check if this
- * session had any.
- */
- nr_samples = 0;
+ ui_progress__init(&prog, rep->nr_entries, "Merging related events...");
evlist__for_each(rep->session->evlist, pos) {
struct hists *hists = &pos->hists;
@@ -494,7 +427,6 @@ static u64 report__collapse_hists(struct report *rep)
hists->symbol_filter_str = rep->symbol_filter_str;
hists__collapse_resort(hists, &prog);
- nr_samples += hists->stats.nr_events[PERF_RECORD_SAMPLE];
/* Non-group events are considered as leader */
if (symbol_conf.event_group &&
@@ -507,14 +439,11 @@ static u64 report__collapse_hists(struct report *rep)
}
ui_progress__finish();
-
- return nr_samples;
}
static int __cmd_report(struct report *rep)
{
int ret;
- u64 nr_samples;
struct perf_session *session = rep->session;
struct perf_evsel *pos;
struct perf_data_file *file = session->file;
@@ -554,12 +483,12 @@ static int __cmd_report(struct report *rep)
}
}
- nr_samples = report__collapse_hists(rep);
+ report__collapse_hists(rep);
if (session_done())
return 0;
- if (nr_samples == 0) {
+ if (rep->nr_entries == 0) {
ui__error("The %s file has no samples!\n", file->path);
return 0;
}
@@ -571,11 +500,9 @@ static int __cmd_report(struct report *rep)
}
static int
-parse_callchain_opt(const struct option *opt, const char *arg, int unset)
+report_parse_callchain_opt(const struct option *opt, const char *arg, int unset)
{
struct report *rep = (struct report *)opt->value;
- char *tok, *tok2;
- char *endptr;
/*
* --no-call-graph
@@ -585,80 +512,7 @@ parse_callchain_opt(const struct option *opt, const char *arg, int unset)
return 0;
}
- symbol_conf.use_callchain = true;
-
- if (!arg)
- return 0;
-
- tok = strtok((char *)arg, ",");
- if (!tok)
- return -1;
-
- /* get the output mode */
- if (!strncmp(tok, "graph", strlen(arg)))
- callchain_param.mode = CHAIN_GRAPH_ABS;
-
- else if (!strncmp(tok, "flat", strlen(arg)))
- callchain_param.mode = CHAIN_FLAT;
-
- else if (!strncmp(tok, "fractal", strlen(arg)))
- callchain_param.mode = CHAIN_GRAPH_REL;
-
- else if (!strncmp(tok, "none", strlen(arg))) {
- callchain_param.mode = CHAIN_NONE;
- symbol_conf.use_callchain = false;
-
- return 0;
- }
-
- else
- return -1;
-
- /* get the min percentage */
- tok = strtok(NULL, ",");
- if (!tok)
- goto setup;
-
- callchain_param.min_percent = strtod(tok, &endptr);
- if (tok == endptr)
- return -1;
-
- /* get the print limit */
- tok2 = strtok(NULL, ",");
- if (!tok2)
- goto setup;
-
- if (tok2[0] != 'c') {
- callchain_param.print_limit = strtoul(tok2, &endptr, 0);
- tok2 = strtok(NULL, ",");
- if (!tok2)
- goto setup;
- }
-
- /* get the call chain order */
- if (!strncmp(tok2, "caller", strlen("caller")))
- callchain_param.order = ORDER_CALLER;
- else if (!strncmp(tok2, "callee", strlen("callee")))
- callchain_param.order = ORDER_CALLEE;
- else
- return -1;
-
- /* Get the sort key */
- tok2 = strtok(NULL, ",");
- if (!tok2)
- goto setup;
- if (!strncmp(tok2, "function", strlen("function")))
- callchain_param.key = CCKEY_FUNCTION;
- else if (!strncmp(tok2, "address", strlen("address")))
- callchain_param.key = CCKEY_ADDRESS;
- else
- return -1;
-setup:
- if (callchain_register_param(&callchain_param) < 0) {
- pr_err("Can't register callchain params\n");
- return -1;
- }
- return 0;
+ return parse_callchain_report_opt(arg);
}
int
@@ -758,10 +612,10 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_BOOLEAN(0, "header-only", &report.header_only,
"Show only data header."),
OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
- "sort by key(s): pid, comm, dso, symbol, parent, cpu, srcline,"
- " dso_to, dso_from, symbol_to, symbol_from, mispredict,"
- " weight, local_weight, mem, symbol_daddr, dso_daddr, tlb, "
- "snoop, locked, abort, in_tx, transaction"),
+ "sort by key(s): pid, comm, dso, symbol, parent, cpu, srcline, ..."
+ " Please refer the man page for the complete list."),
+ OPT_STRING('F', "fields", &field_order, "key[,keys...]",
+ "output field(s): overhead, period, sample plus all of sort keys"),
OPT_BOOLEAN(0, "showcpuutilization", &symbol_conf.show_cpu_utilization,
"Show sample percentage for different cpu modes"),
OPT_STRING('p', "parent", &parent_pattern, "regex",
@@ -770,7 +624,9 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
"Only display entries with parent-match"),
OPT_CALLBACK_DEFAULT('g', "call-graph", &report, "output_type,min_percent[,print_limit],call_order",
"Display callchains using output_type (graph, flat, fractal, or none) , min percent threshold, optional print limit, callchain order, key (function or address). "
- "Default: fractal,0.5,callee,function", &parse_callchain_opt, callchain_default_opt),
+ "Default: fractal,0.5,callee,function", &report_parse_callchain_opt, callchain_default_opt),
+ OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain,
+ "Accumulate callchains of children and show total overhead as well"),
OPT_INTEGER(0, "max-stack", &report.max_stack,
"Set the maximum stack depth when parsing the callchain, "
"anything beyond the specified depth will be ignored. "
@@ -821,6 +677,8 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_BOOLEAN(0, "mem-mode", &report.mem_mode, "mem access profile"),
OPT_CALLBACK(0, "percent-limit", &report, "percent",
"Don't show entries under that percent", parse_percent_limit),
+ OPT_CALLBACK(0, "percentage", NULL, "relative|absolute",
+ "how to display percentage of filtered entries", parse_filter_percentage),
OPT_END()
};
struct perf_data_file file = {
@@ -861,55 +719,37 @@ repeat:
has_br_stack = perf_header__has_feat(&session->header,
HEADER_BRANCH_STACK);
- if (branch_mode == -1 && has_br_stack)
+ if (branch_mode == -1 && has_br_stack) {
sort__mode = SORT_MODE__BRANCH;
-
- /* sort__mode could be NORMAL if --no-branch-stack */
- if (sort__mode == SORT_MODE__BRANCH) {
- /*
- * if no sort_order is provided, then specify
- * branch-mode specific order
- */
- if (sort_order == default_sort_order)
- sort_order = "comm,dso_from,symbol_from,"
- "dso_to,symbol_to";
-
+ symbol_conf.cumulate_callchain = false;
}
+
if (report.mem_mode) {
if (sort__mode == SORT_MODE__BRANCH) {
pr_err("branch and mem mode incompatible\n");
goto error;
}
sort__mode = SORT_MODE__MEMORY;
-
- /*
- * if no sort_order is provided, then specify
- * branch-mode specific order
- */
- if (sort_order == default_sort_order)
- sort_order = "local_weight,mem,sym,dso,symbol_daddr,dso_daddr,snoop,tlb,locked";
+ symbol_conf.cumulate_callchain = false;
}
if (setup_sorting() < 0) {
- parse_options_usage(report_usage, options, "s", 1);
+ if (sort_order)
+ parse_options_usage(report_usage, options, "s", 1);
+ if (field_order)
+ parse_options_usage(sort_order ? NULL : report_usage,
+ options, "F", 1);
goto error;
}
- if (parent_pattern != default_parent_pattern) {
- if (sort_dimension__add("parent") < 0)
- goto error;
- }
-
/* Force tty output for header output. */
if (report.header || report.header_only)
use_browser = 0;
if (strcmp(input_name, "-") != 0)
setup_browser(true);
- else {
+ else
use_browser = 0;
- perf_hpp__init();
- }
if (report.header || report.header_only) {
perf_session__fprintf_info(session, stdout,
@@ -926,7 +766,7 @@ repeat:
* so don't allocate extra space that won't be used in the stdio
* implementation.
*/
- if (use_browser == 1 && sort__has_sym) {
+ if (ui__has_annotation()) {
symbol_conf.priv_size = sizeof(struct annotation);
machines__set_symbol_filter(&session->machines,
symbol__annotate_init);
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 6a76a07b678..c38d06c0477 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -66,7 +66,7 @@ struct sched_atom {
struct task_desc *wakee;
};
-#define TASK_STATE_TO_CHAR_STR "RSDTtZX"
+#define TASK_STATE_TO_CHAR_STR "RSDTtZXxKWP"
enum thread_state {
THREAD_SLEEPING = 0,
@@ -149,7 +149,6 @@ struct perf_sched {
unsigned long nr_runs;
unsigned long nr_timestamps;
unsigned long nr_unordered_timestamps;
- unsigned long nr_state_machine_bugs;
unsigned long nr_context_switch_bugs;
unsigned long nr_events;
unsigned long nr_lost_chunks;
@@ -1007,17 +1006,12 @@ static int latency_wakeup_event(struct perf_sched *sched,
struct perf_sample *sample,
struct machine *machine)
{
- const u32 pid = perf_evsel__intval(evsel, sample, "pid"),
- success = perf_evsel__intval(evsel, sample, "success");
+ const u32 pid = perf_evsel__intval(evsel, sample, "pid");
struct work_atoms *atoms;
struct work_atom *atom;
struct thread *wakee;
u64 timestamp = sample->time;
- /* Note for later, it may be interesting to observe the failing cases */
- if (!success)
- return 0;
-
wakee = machine__findnew_thread(machine, 0, pid);
atoms = thread_atoms_search(&sched->atom_root, wakee, &sched->cmp_pid);
if (!atoms) {
@@ -1037,12 +1031,18 @@ static int latency_wakeup_event(struct perf_sched *sched,
atom = list_entry(atoms->work_list.prev, struct work_atom, list);
/*
+ * As we do not guarantee the wakeup event happens when
+ * task is out of run queue, also may happen when task is
+ * on run queue and wakeup only change ->state to TASK_RUNNING,
+ * then we should not set the ->wake_up_time when wake up a
+ * task which is on run queue.
+ *
* You WILL be missing events if you've recorded only
* one CPU, or are only looking at only one, so don't
- * make useless noise.
+ * skip in this case.
*/
if (sched->profile_cpu == -1 && atom->state != THREAD_SLEEPING)
- sched->nr_state_machine_bugs++;
+ return 0;
sched->nr_timestamps++;
if (atom->sched_out_time > timestamp) {
@@ -1124,7 +1124,7 @@ static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_
avg = work_list->total_lat / work_list->nb_atoms;
- printf("|%11.3f ms |%9" PRIu64 " | avg:%9.3f ms | max:%9.3f ms | max at: %9.6f s\n",
+ printf("|%11.3f ms |%9" PRIu64 " | avg:%9.3f ms | max:%9.3f ms | max at: %13.6f s\n",
(double)work_list->total_runtime / 1e6,
work_list->nb_atoms, (double)avg / 1e6,
(double)work_list->max_lat / 1e6,
@@ -1266,9 +1266,8 @@ static int process_sched_wakeup_event(struct perf_tool *tool,
static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
struct perf_sample *sample, struct machine *machine)
{
- const u32 prev_pid = perf_evsel__intval(evsel, sample, "prev_pid"),
- next_pid = perf_evsel__intval(evsel, sample, "next_pid");
- struct thread *sched_out __maybe_unused, *sched_in;
+ const u32 next_pid = perf_evsel__intval(evsel, sample, "next_pid");
+ struct thread *sched_in;
int new_shortname;
u64 timestamp0, timestamp = sample->time;
s64 delta;
@@ -1291,7 +1290,6 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
return -1;
}
- sched_out = machine__findnew_thread(machine, 0, prev_pid);
sched_in = machine__findnew_thread(machine, 0, next_pid);
sched->curr_thread[this_cpu] = sched_in;
@@ -1300,17 +1298,25 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
new_shortname = 0;
if (!sched_in->shortname[0]) {
- sched_in->shortname[0] = sched->next_shortname1;
- sched_in->shortname[1] = sched->next_shortname2;
-
- if (sched->next_shortname1 < 'Z') {
- sched->next_shortname1++;
+ if (!strcmp(thread__comm_str(sched_in), "swapper")) {
+ /*
+ * Don't allocate a letter-number for swapper:0
+ * as a shortname. Instead, we use '.' for it.
+ */
+ sched_in->shortname[0] = '.';
+ sched_in->shortname[1] = ' ';
} else {
- sched->next_shortname1='A';
- if (sched->next_shortname2 < '9') {
- sched->next_shortname2++;
+ sched_in->shortname[0] = sched->next_shortname1;
+ sched_in->shortname[1] = sched->next_shortname2;
+
+ if (sched->next_shortname1 < 'Z') {
+ sched->next_shortname1++;
} else {
- sched->next_shortname2='0';
+ sched->next_shortname1 = 'A';
+ if (sched->next_shortname2 < '9')
+ sched->next_shortname2++;
+ else
+ sched->next_shortname2 = '0';
}
}
new_shortname = 1;
@@ -1322,12 +1328,9 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
else
printf("*");
- if (sched->curr_thread[cpu]) {
- if (sched->curr_thread[cpu]->tid)
- printf("%2s ", sched->curr_thread[cpu]->shortname);
- else
- printf(". ");
- } else
+ if (sched->curr_thread[cpu])
+ printf("%2s ", sched->curr_thread[cpu]->shortname);
+ else
printf(" ");
}
@@ -1425,7 +1428,7 @@ static int perf_sched__process_tracepoint_sample(struct perf_tool *tool __maybe_
int err = 0;
evsel->hists.stats.total_period += sample->period;
- hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
+ hists__inc_nr_samples(&evsel->hists, true);
if (evsel->handler != NULL) {
tracepoint_handler f = evsel->handler;
@@ -1496,14 +1499,6 @@ static void print_bad_events(struct perf_sched *sched)
(double)sched->nr_lost_events/(double)sched->nr_events * 100.0,
sched->nr_lost_events, sched->nr_events, sched->nr_lost_chunks);
}
- if (sched->nr_state_machine_bugs && sched->nr_timestamps) {
- printf(" INFO: %.3f%% state machine bugs (%ld out of %ld)",
- (double)sched->nr_state_machine_bugs/(double)sched->nr_timestamps*100.0,
- sched->nr_state_machine_bugs, sched->nr_timestamps);
- if (sched->nr_lost_events)
- printf(" (due to lost events?)");
- printf("\n");
- }
if (sched->nr_context_switch_bugs && sched->nr_timestamps) {
printf(" INFO: %.3f%% context switch bugs (%ld out of %ld)",
(double)sched->nr_context_switch_bugs/(double)sched->nr_timestamps*100.0,
@@ -1527,9 +1522,9 @@ static int perf_sched__lat(struct perf_sched *sched)
perf_sched__sort_lat(sched);
- printf("\n ---------------------------------------------------------------------------------------------------------------\n");
- printf(" Task | Runtime ms | Switches | Average delay ms | Maximum delay ms | Maximum delay at |\n");
- printf(" ---------------------------------------------------------------------------------------------------------------\n");
+ printf("\n -----------------------------------------------------------------------------------------------------------------\n");
+ printf(" Task | Runtime ms | Switches | Average delay ms | Maximum delay ms | Maximum delay at |\n");
+ printf(" -----------------------------------------------------------------------------------------------------------------\n");
next = rb_first(&sched->sorted_atom_root);
@@ -1541,7 +1536,7 @@ static int perf_sched__lat(struct perf_sched *sched)
next = rb_next(next);
}
- printf(" -----------------------------------------------------------------------------------------\n");
+ printf(" -----------------------------------------------------------------------------------------------------------------\n");
printf(" TOTAL: |%11.3f ms |%9" PRIu64 " |\n",
(double)sched->all_runtime / 1e6, sched->all_count);
@@ -1635,6 +1630,7 @@ static int __cmd_record(int argc, const char **argv)
"-e", "sched:sched_stat_runtime",
"-e", "sched:sched_process_fork",
"-e", "sched:sched_wakeup",
+ "-e", "sched:sched_wakeup_new",
"-e", "sched:sched_migrate_task",
};
@@ -1713,8 +1709,10 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
"perf sched replay [<options>]",
NULL
};
- const char * const sched_usage[] = {
- "perf sched [<options>] {record|latency|map|replay|script}",
+ const char *const sched_subcommands[] = { "record", "latency", "map",
+ "replay", "script", NULL };
+ const char *sched_usage[] = {
+ NULL,
NULL
};
struct trace_sched_handler lat_ops = {
@@ -1736,8 +1734,8 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
for (i = 0; i < ARRAY_SIZE(sched.curr_pid); i++)
sched.curr_pid[i] = -1;
- argc = parse_options(argc, argv, sched_options, sched_usage,
- PARSE_OPT_STOP_AT_NON_OPTION);
+ argc = parse_options_subcommand(argc, argv, sched_options, sched_subcommands,
+ sched_usage, PARSE_OPT_STOP_AT_NON_OPTION);
if (!argc)
usage_with_options(sched_usage, sched_options);
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 8b0e1c9234d..65a151e3606 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -174,13 +174,20 @@ static inline int perf_evsel__nr_cpus(struct perf_evsel *evsel)
static void perf_evsel__reset_stat_priv(struct perf_evsel *evsel)
{
- memset(evsel->priv, 0, sizeof(struct perf_stat));
+ int i;
+ struct perf_stat *ps = evsel->priv;
+
+ for (i = 0; i < 3; i++)
+ init_stats(&ps->res_stats[i]);
}
static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel)
{
evsel->priv = zalloc(sizeof(struct perf_stat));
- return evsel->priv == NULL ? -ENOMEM : 0;
+ if (evsel == NULL)
+ return -ENOMEM;
+ perf_evsel__reset_stat_priv(evsel);
+ return 0;
}
static void perf_evsel__free_stat_priv(struct perf_evsel *evsel)
diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c
index 25526d6eae5..74db2568b86 100644
--- a/tools/perf/builtin-timechart.c
+++ b/tools/perf/builtin-timechart.c
@@ -494,7 +494,7 @@ static const char *cat_backtrace(union perf_event *event,
continue;
}
- tal.filtered = false;
+ tal.filtered = 0;
thread__find_addr_location(al.thread, machine, cpumode,
MAP__FUNCTION, ip, &tal);
@@ -1238,7 +1238,7 @@ static int timechart__record(struct timechart *tchart, int argc, const char **ar
for (i = 0; i < old_power_args_nr; i++)
*p++ = strdup(old_power_args[i]);
- for (j = 1; j < (unsigned int)argc; j++)
+ for (j = 0; j < (unsigned int)argc; j++)
*p++ = argv[j];
return cmd_record(rec_argc, rec_argv, NULL);
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 76cd510d34d..377971dc89a 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -176,7 +176,7 @@ static void perf_top__record_precise_ip(struct perf_top *top,
{
struct annotation *notes;
struct symbol *sym;
- int err;
+ int err = 0;
if (he == NULL || he->ms.sym == NULL ||
((top->sym_filter_entry == NULL ||
@@ -190,10 +190,18 @@ static void perf_top__record_precise_ip(struct perf_top *top,
return;
ip = he->ms.map->map_ip(he->ms.map, ip);
- err = hist_entry__inc_addr_samples(he, counter, ip);
+
+ if (ui__has_annotation())
+ err = hist_entry__inc_addr_samples(he, counter, ip);
pthread_mutex_unlock(&notes->lock);
+ /*
+ * This function is now called with he->hists->lock held.
+ * Release it before going to sleep.
+ */
+ pthread_mutex_unlock(&he->hists->lock);
+
if (err == -ERANGE && !he->ms.map->erange_warned)
ui__warn_map_erange(he->ms.map, sym, ip);
else if (err == -ENOMEM) {
@@ -201,6 +209,8 @@ static void perf_top__record_precise_ip(struct perf_top *top,
sym->name);
sleep(1);
}
+
+ pthread_mutex_lock(&he->hists->lock);
}
static void perf_top__show_details(struct perf_top *top)
@@ -236,24 +246,6 @@ out_unlock:
pthread_mutex_unlock(&notes->lock);
}
-static struct hist_entry *perf_evsel__add_hist_entry(struct perf_evsel *evsel,
- struct addr_location *al,
- struct perf_sample *sample)
-{
- struct hist_entry *he;
-
- pthread_mutex_lock(&evsel->hists.lock);
- he = __hists__add_entry(&evsel->hists, al, NULL, NULL, NULL,
- sample->period, sample->weight,
- sample->transaction);
- pthread_mutex_unlock(&evsel->hists.lock);
- if (he == NULL)
- return NULL;
-
- hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
- return he;
-}
-
static void perf_top__print_sym_table(struct perf_top *top)
{
char bf[160];
@@ -657,6 +649,26 @@ static int symbol_filter(struct map *map __maybe_unused, struct symbol *sym)
return 0;
}
+static int hist_iter__top_callback(struct hist_entry_iter *iter,
+ struct addr_location *al, bool single,
+ void *arg)
+{
+ struct perf_top *top = arg;
+ struct hist_entry *he = iter->he;
+ struct perf_evsel *evsel = iter->evsel;
+
+ if (sort__has_sym && single) {
+ u64 ip = al->addr;
+
+ if (al->map)
+ ip = al->map->unmap_ip(al->map, ip);
+
+ perf_top__record_precise_ip(top, he, evsel->idx, ip);
+ }
+
+ return 0;
+}
+
static void perf_event__process_sample(struct perf_tool *tool,
const union perf_event *event,
struct perf_evsel *evsel,
@@ -664,8 +676,6 @@ static void perf_event__process_sample(struct perf_tool *tool,
struct machine *machine)
{
struct perf_top *top = container_of(tool, struct perf_top, tool);
- struct symbol *parent = NULL;
- u64 ip = sample->ip;
struct addr_location al;
int err;
@@ -692,8 +702,7 @@ static void perf_event__process_sample(struct perf_tool *tool,
if (event->header.misc & PERF_RECORD_MISC_EXACT_IP)
top->exact_samples++;
- if (perf_event__preprocess_sample(event, machine, &al, sample) < 0 ||
- al.filtered)
+ if (perf_event__preprocess_sample(event, machine, &al, sample) < 0)
return;
if (!top->kptr_restrict_warned &&
@@ -741,25 +750,23 @@ static void perf_event__process_sample(struct perf_tool *tool,
}
if (al.sym == NULL || !al.sym->ignore) {
- struct hist_entry *he;
+ struct hist_entry_iter iter = {
+ .add_entry_cb = hist_iter__top_callback,
+ };
- err = sample__resolve_callchain(sample, &parent, evsel, &al,
- top->max_stack);
- if (err)
- return;
+ if (symbol_conf.cumulate_callchain)
+ iter.ops = &hist_iter_cumulative;
+ else
+ iter.ops = &hist_iter_normal;
- he = perf_evsel__add_hist_entry(evsel, &al, sample);
- if (he == NULL) {
- pr_err("Problem incrementing symbol period, skipping event\n");
- return;
- }
+ pthread_mutex_lock(&evsel->hists.lock);
- err = hist_entry__append_callchain(he, sample);
- if (err)
- return;
+ err = hist_entry_iter__add(&iter, &al, evsel, sample,
+ top->max_stack, top);
+ if (err < 0)
+ pr_err("Problem incrementing symbol period, skipping event\n");
- if (sort__has_sym)
- perf_top__record_precise_ip(top, he, evsel->idx, ip);
+ pthread_mutex_unlock(&evsel->hists.lock);
}
return;
@@ -991,6 +998,20 @@ parse_callchain_opt(const struct option *opt, const char *arg, int unset)
return record_parse_callchain_opt(opt, arg, unset);
}
+static int perf_top_config(const char *var, const char *value, void *cb)
+{
+ struct perf_top *top = cb;
+
+ if (!strcmp(var, "top.call-graph"))
+ return record_parse_callchain(value, &top->record_opts);
+ if (!strcmp(var, "top.children")) {
+ symbol_conf.cumulate_callchain = perf_config_bool(var, value);
+ return 0;
+ }
+
+ return perf_default_config(var, value, cb);
+}
+
static int
parse_percent_limit(const struct option *opt, const char *arg,
int unset __maybe_unused)
@@ -1069,8 +1090,10 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_INCR('v', "verbose", &verbose,
"be more verbose (show counter open errors, etc)"),
OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
- "sort by key(s): pid, comm, dso, symbol, parent, weight, local_weight,"
- " abort, in_tx, transaction"),
+ "sort by key(s): pid, comm, dso, symbol, parent, cpu, srcline, ..."
+ " Please refer the man page for the complete list."),
+ OPT_STRING(0, "fields", &field_order, "key[,keys...]",
+ "output field(s): overhead, period, sample plus all of sort keys"),
OPT_BOOLEAN('n', "show-nr-samples", &symbol_conf.show_nr_samples,
"Show a column with the number of samples"),
OPT_CALLBACK_NOOPT('g', NULL, &top.record_opts,
@@ -1079,6 +1102,8 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_CALLBACK(0, "call-graph", &top.record_opts,
"mode[,dump_size]", record_callchain_help,
&parse_callchain_opt),
+ OPT_BOOLEAN(0, "children", &symbol_conf.cumulate_callchain,
+ "Accumulate callchains of children and show total overhead as well"),
OPT_INTEGER(0, "max-stack", &top.max_stack,
"Set the maximum stack depth when parsing the callchain. "
"Default: " __stringify(PERF_MAX_STACK_DEPTH)),
@@ -1104,6 +1129,8 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_STRING('u', "uid", &target->uid_str, "user", "user to profile"),
OPT_CALLBACK(0, "percent-limit", &top, "percent",
"Don't show entries under that percent", parse_percent_limit),
+ OPT_CALLBACK(0, "percentage", NULL, "relative|absolute",
+ "How to display percentage of filtered entries", parse_filter_percentage),
OPT_END()
};
const char * const top_usage[] = {
@@ -1115,21 +1142,25 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
if (top.evlist == NULL)
return -ENOMEM;
+ perf_config(perf_top_config, &top);
+
argc = parse_options(argc, argv, options, top_usage, 0);
if (argc)
usage_with_options(top_usage, options);
- if (sort_order == default_sort_order)
- sort_order = "dso,symbol";
+ sort__mode = SORT_MODE__TOP;
+ /* display thread wants entries to be collapsed in a different tree */
+ sort__need_collapse = 1;
if (setup_sorting() < 0) {
- parse_options_usage(top_usage, options, "s", 1);
+ if (sort_order)
+ parse_options_usage(top_usage, options, "s", 1);
+ if (field_order)
+ parse_options_usage(sort_order ? NULL : top_usage,
+ options, "fields", 0);
goto out_delete_evlist;
}
- /* display thread wants entries to be collapsed in a different tree */
- sort__need_collapse = 1;
-
if (top.use_stdio)
use_browser = 0;
else if (top.use_tui)
@@ -1178,6 +1209,11 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
top.sym_evsel = perf_evlist__first(top.evlist);
+ if (!symbol_conf.use_callchain) {
+ symbol_conf.cumulate_callchain = false;
+ perf_hpp__cancel_cumulate();
+ }
+
symbol_conf.priv_size = sizeof(struct annotation);
symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 896f27047ed..f954c26de23 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -37,6 +37,10 @@
# define MADV_UNMERGEABLE 13
#endif
+#ifndef EFD_SEMAPHORE
+# define EFD_SEMAPHORE 1
+#endif
+
struct tp_field {
int offset;
union {
@@ -279,6 +283,11 @@ static size_t syscall_arg__scnprintf_strarray(char *bf, size_t size,
#define SCA_STRARRAY syscall_arg__scnprintf_strarray
+#if defined(__i386__) || defined(__x86_64__)
+/*
+ * FIXME: Make this available to all arches as soon as the ioctl beautifier
+ * gets rewritten to support all arches.
+ */
static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
struct syscall_arg *arg)
{
@@ -286,6 +295,7 @@ static size_t syscall_arg__scnprintf_strhexarray(char *bf, size_t size,
}
#define SCA_STRHEXARRAY syscall_arg__scnprintf_strhexarray
+#endif /* defined(__i386__) || defined(__x86_64__) */
static size_t syscall_arg__scnprintf_fd(char *bf, size_t size,
struct syscall_arg *arg);
@@ -815,7 +825,6 @@ static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscal
P_SIGNUM(PIPE);
P_SIGNUM(ALRM);
P_SIGNUM(TERM);
- P_SIGNUM(STKFLT);
P_SIGNUM(CHLD);
P_SIGNUM(CONT);
P_SIGNUM(STOP);
@@ -831,6 +840,15 @@ static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscal
P_SIGNUM(IO);
P_SIGNUM(PWR);
P_SIGNUM(SYS);
+#ifdef SIGEMT
+ P_SIGNUM(EMT);
+#endif
+#ifdef SIGSTKFLT
+ P_SIGNUM(STKFLT);
+#endif
+#ifdef SIGSWI
+ P_SIGNUM(SWI);
+#endif
default: break;
}
@@ -839,6 +857,10 @@ static size_t syscall_arg__scnprintf_signum(char *bf, size_t size, struct syscal
#define SCA_SIGNUM syscall_arg__scnprintf_signum
+#if defined(__i386__) || defined(__x86_64__)
+/*
+ * FIXME: Make this available to all arches.
+ */
#define TCGETS 0x5401
static const char *tioctls[] = {
@@ -860,6 +882,7 @@ static const char *tioctls[] = {
};
static DEFINE_STRARRAY_OFFSET(tioctls, 0x5401);
+#endif /* defined(__i386__) || defined(__x86_64__) */
#define STRARRAY(arg, name, array) \
.arg_scnprintf = { [arg] = SCA_STRARRAY, }, \
@@ -941,9 +964,16 @@ static struct syscall_fmt {
{ .name = "getrlimit", .errmsg = true, STRARRAY(0, resource, rlimit_resources), },
{ .name = "ioctl", .errmsg = true,
.arg_scnprintf = { [0] = SCA_FD, /* fd */
+#if defined(__i386__) || defined(__x86_64__)
+/*
+ * FIXME: Make this available to all arches.
+ */
[1] = SCA_STRHEXARRAY, /* cmd */
[2] = SCA_HEX, /* arg */ },
.arg_parm = { [1] = &strarray__tioctls, /* cmd */ }, },
+#else
+ [2] = SCA_HEX, /* arg */ }, },
+#endif
{ .name = "kill", .errmsg = true,
.arg_scnprintf = { [1] = SCA_SIGNUM, /* sig */ }, },
{ .name = "linkat", .errmsg = true,
diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile
index c48d4495817..f30ac5e5d27 100644
--- a/tools/perf/config/Makefile
+++ b/tools/perf/config/Makefile
@@ -29,11 +29,25 @@ ifeq ($(ARCH),x86)
endif
NO_PERF_REGS := 0
endif
+
ifeq ($(ARCH),arm)
NO_PERF_REGS := 0
LIBUNWIND_LIBS = -lunwind -lunwind-arm
endif
+ifeq ($(ARCH),arm64)
+ NO_PERF_REGS := 0
+ LIBUNWIND_LIBS = -lunwind -lunwind-aarch64
+endif
+
+# So far there's only x86 and arm libdw unwind support merged in perf.
+# Disable it on all other architectures in case libdw unwind
+# support is detected in system. Add supported architectures
+# to the check.
+ifneq ($(ARCH),$(filter $(ARCH),x86 arm))
+ NO_LIBDW_DWARF_UNWIND := 1
+endif
+
ifeq ($(LIBUNWIND_LIBS),)
NO_LIBUNWIND := 1
else
@@ -59,6 +73,17 @@ ifeq ($(NO_PERF_REGS),0)
CFLAGS += -DHAVE_PERF_REGS_SUPPORT
endif
+ifndef NO_LIBELF
+ # for linking with debug library, run like:
+ # make DEBUG=1 LIBDW_DIR=/opt/libdw/
+ ifdef LIBDW_DIR
+ LIBDW_CFLAGS := -I$(LIBDW_DIR)/include
+ LIBDW_LDFLAGS := -L$(LIBDW_DIR)/lib
+ endif
+ FEATURE_CHECK_CFLAGS-libdw-dwarf-unwind := $(LIBDW_CFLAGS)
+ FEATURE_CHECK_LDFLAGS-libdw-dwarf-unwind := $(LIBDW_LDFLAGS) -ldw
+endif
+
# include ARCH specific config
-include $(src-perf)/arch/$(ARCH)/Makefile
@@ -98,6 +123,10 @@ CFLAGS += -Wall
CFLAGS += -Wextra
CFLAGS += -std=gnu99
+# Enforce a non-executable stack, as we may regress (again) in the future by
+# adding assembler files missing the .GNU-stack linker note.
+LDFLAGS += -Wl,-z,noexecstack
+
EXTLIBS = -lelf -lpthread -lrt -lm -ldl
ifneq ($(OUTPUT),)
@@ -145,9 +174,38 @@ CORE_FEATURE_TESTS = \
libpython-version \
libslang \
libunwind \
- on-exit \
stackprotector-all \
- timerfd
+ timerfd \
+ libdw-dwarf-unwind
+
+LIB_FEATURE_TESTS = \
+ dwarf \
+ glibc \
+ gtk2 \
+ libaudit \
+ libbfd \
+ libelf \
+ libnuma \
+ libperl \
+ libpython \
+ libslang \
+ libunwind \
+ libdw-dwarf-unwind
+
+VF_FEATURE_TESTS = \
+ backtrace \
+ fortify-source \
+ gtk2-infobar \
+ libelf-getphdrnum \
+ libelf-mmap \
+ libpython-version \
+ stackprotector-all \
+ timerfd \
+ libunwind-debug-frame \
+ bionic \
+ liberty \
+ liberty-z \
+ cplus-demangle
# Set FEATURE_CHECK_(C|LD)FLAGS-all for all CORE_FEATURE_TESTS features.
# If in the future we need per-feature checks/flags for features not
@@ -161,17 +219,6 @@ endef
$(foreach feat,$(CORE_FEATURE_TESTS),$(call set_test_all_flags,$(feat)))
#
-# So here we detect whether test-all was rebuilt, to be able
-# to skip the print-out of the long features list if the file
-# existed before and after it was built:
-#
-ifeq ($(wildcard $(OUTPUT)config/feature-checks/test-all.bin),)
- test-all-failed := 1
-else
- test-all-failed := 0
-endif
-
-#
# Special fast-path for the 'all features are available' case:
#
$(call feature_check,all,$(MSG))
@@ -180,15 +227,6 @@ $(call feature_check,all,$(MSG))
# Just in case the build freshly failed, make sure we print the
# feature matrix:
#
-ifeq ($(feature-all), 0)
- test-all-failed := 1
-endif
-
-ifeq ($(test-all-failed),1)
- $(info )
- $(info Auto-detecting system features:)
-endif
-
ifeq ($(feature-all), 1)
#
# test-all.c passed - just set all the core feature flags to 1:
@@ -199,27 +237,6 @@ else
$(foreach feat,$(CORE_FEATURE_TESTS),$(call feature_check,$(feat)))
endif
-#
-# Print the result of the feature test:
-#
-feature_print = $(eval $(feature_print_code)) $(info $(MSG))
-
-define feature_print_code
- ifeq ($(feature-$(1)), 1)
- MSG = $(shell printf '...%30s: [ \033[32mon\033[m ]' $(1))
- else
- MSG = $(shell printf '...%30s: [ \033[31mOFF\033[m ]' $(1))
- endif
-endef
-
-#
-# Only print out our features if we rebuilt the testcases or if a test failed:
-#
-ifeq ($(test-all-failed), 1)
- $(foreach feat,$(CORE_FEATURE_TESTS),$(call feature_print,$(feat)))
- $(info )
-endif
-
ifeq ($(feature-stackprotector-all), 1)
CFLAGS += -fstack-protector-all
endif
@@ -264,6 +281,7 @@ ifdef NO_LIBELF
NO_DWARF := 1
NO_DEMANGLE := 1
NO_LIBUNWIND := 1
+ NO_LIBDW_DWARF_UNWIND := 1
else
ifeq ($(feature-libelf), 0)
ifeq ($(feature-glibc), 1)
@@ -278,17 +296,22 @@ else
NO_LIBELF := 1
NO_DWARF := 1
NO_DEMANGLE := 1
+ NO_LIBUNWIND := 1
+ NO_LIBDW_DWARF_UNWIND := 1
else
- msg := $(error No gnu/libc-version.h found, please install glibc-dev[el]/glibc-static);
+ ifneq ($(filter s% -static%,$(LDFLAGS),),)
+ msg := $(error No static glibc found, please install glibc-static);
+ else
+ msg := $(error No gnu/libc-version.h found, please install glibc-dev[el]);
+ endif
endif
else
- # for linking with debug library, run like:
- # make DEBUG=1 LIBDW_DIR=/opt/libdw/
- ifdef LIBDW_DIR
- LIBDW_CFLAGS := -I$(LIBDW_DIR)/include
- LIBDW_LDFLAGS := -L$(LIBDW_DIR)/lib
+ ifndef NO_LIBDW_DWARF_UNWIND
+ ifneq ($(feature-libdw-dwarf-unwind),1)
+ NO_LIBDW_DWARF_UNWIND := 1
+ msg := $(warning No libdw DWARF unwind found, Please install elfutils-devel/libdw-dev >= 0.158 and/or set LIBDW_DIR);
+ endif
endif
-
ifneq ($(feature-dwarf), 1)
msg := $(warning No libdw.h found or old libdw.h found or elfutils is older than 0.138, disables dwarf support. Please install new elfutils-devel/libdw-dev);
NO_DWARF := 1
@@ -324,25 +347,51 @@ endif # NO_LIBELF
ifndef NO_LIBUNWIND
ifneq ($(feature-libunwind), 1)
- msg := $(warning No libunwind found, disabling post unwind support. Please install libunwind-dev[el] >= 1.1);
+ msg := $(warning No libunwind found. Please install libunwind-dev[el] >= 1.1 and/or set LIBUNWIND_DIR);
NO_LIBUNWIND := 1
+ endif
+endif
+
+dwarf-post-unwind := 1
+dwarf-post-unwind-text := BUG
+
+# setup DWARF post unwinder
+ifdef NO_LIBUNWIND
+ ifdef NO_LIBDW_DWARF_UNWIND
+ msg := $(warning Disabling post unwind, no support found.);
+ dwarf-post-unwind := 0
else
- ifeq ($(ARCH),arm)
- $(call feature_check,libunwind-debug-frame)
- ifneq ($(feature-libunwind-debug-frame), 1)
- msg := $(warning No debug_frame support found in libunwind);
- CFLAGS += -DNO_LIBUNWIND_DEBUG_FRAME
- endif
- else
- # non-ARM has no dwarf_find_debug_frame() function:
+ dwarf-post-unwind-text := libdw
+ endif
+else
+ dwarf-post-unwind-text := libunwind
+ # Enable libunwind support by default.
+ ifndef NO_LIBDW_DWARF_UNWIND
+ NO_LIBDW_DWARF_UNWIND := 1
+ endif
+endif
+
+ifeq ($(dwarf-post-unwind),1)
+ CFLAGS += -DHAVE_DWARF_UNWIND_SUPPORT
+else
+ NO_DWARF_UNWIND := 1
+endif
+
+ifndef NO_LIBUNWIND
+ ifeq ($(ARCH),$(filter $(ARCH),arm arm64))
+ $(call feature_check,libunwind-debug-frame)
+ ifneq ($(feature-libunwind-debug-frame), 1)
+ msg := $(warning No debug_frame support found in libunwind);
CFLAGS += -DNO_LIBUNWIND_DEBUG_FRAME
endif
-
- CFLAGS += -DHAVE_LIBUNWIND_SUPPORT
- EXTLIBS += $(LIBUNWIND_LIBS)
- CFLAGS += $(LIBUNWIND_CFLAGS)
- LDFLAGS += $(LIBUNWIND_LDFLAGS)
- endif # ifneq ($(feature-libunwind), 1)
+ else
+ # non-ARM has no dwarf_find_debug_frame() function:
+ CFLAGS += -DNO_LIBUNWIND_DEBUG_FRAME
+ endif
+ CFLAGS += -DHAVE_LIBUNWIND_SUPPORT
+ EXTLIBS += $(LIBUNWIND_LIBS)
+ CFLAGS += $(LIBUNWIND_CFLAGS)
+ LDFLAGS += $(LIBUNWIND_LDFLAGS)
endif
ifndef NO_LIBAUDIT
@@ -402,6 +451,7 @@ else
ifneq ($(feature-libperl), 1)
CFLAGS += -DNO_LIBPERL
NO_LIBPERL := 1
+ msg := $(warning Missing perl devel files. Disabling perl scripting support, consider installing perl-ExtUtils-Embed);
else
LDFLAGS += $(PERL_EMBED_LDFLAGS)
EXTLIBS += $(PERL_EMBED_LIBADD)
@@ -479,6 +529,20 @@ endif
ifeq ($(feature-libbfd), 1)
EXTLIBS += -lbfd
+
+ # call all detections now so we get correct
+ # status in VF output
+ $(call feature_check,liberty)
+ $(call feature_check,liberty-z)
+ $(call feature_check,cplus-demangle)
+
+ ifeq ($(feature-liberty), 1)
+ EXTLIBS += -liberty
+ else
+ ifeq ($(feature-liberty-z), 1)
+ EXTLIBS += -liberty -lz
+ endif
+ endif
endif
ifdef NO_DEMANGLE
@@ -489,15 +553,10 @@ else
CFLAGS += -DHAVE_CPLUS_DEMANGLE_SUPPORT
else
ifneq ($(feature-libbfd), 1)
- $(call feature_check,liberty)
- ifeq ($(feature-liberty), 1)
- EXTLIBS += -lbfd -liberty
- else
- $(call feature_check,liberty-z)
- ifeq ($(feature-liberty-z), 1)
- EXTLIBS += -lbfd -liberty -lz
- else
- $(call feature_check,cplus-demangle)
+ ifneq ($(feature-liberty), 1)
+ ifneq ($(feature-liberty-z), 1)
+ # we dont have neither HAVE_CPLUS_DEMANGLE_SUPPORT
+ # or any of 'bfd iberty z' trinity
ifeq ($(feature-cplus-demangle), 1)
EXTLIBS += -liberty
CFLAGS += -DHAVE_CPLUS_DEMANGLE_SUPPORT
@@ -515,12 +574,6 @@ ifneq ($(filter -lbfd,$(EXTLIBS)),)
CFLAGS += -DHAVE_LIBBFD_SUPPORT
endif
-ifndef NO_ON_EXIT
- ifeq ($(feature-on-exit), 1)
- CFLAGS += -DHAVE_ON_EXIT_SUPPORT
- endif
-endif
-
ifndef NO_BACKTRACE
ifeq ($(feature-backtrace), 1)
CFLAGS += -DHAVE_BACKTRACE_SUPPORT
@@ -551,7 +604,7 @@ endif
# Make the path relative to DESTDIR, not to prefix
ifndef DESTDIR
-prefix = $(HOME)
+prefix ?= $(HOME)
endif
bindir_relative = bin
bindir = $(prefix)/$(bindir_relative)
@@ -602,3 +655,84 @@ ifdef DESTDIR
plugindir=$(libdir)/traceevent/plugins
plugindir_SQ= $(subst ','\'',$(plugindir))
endif
+
+#
+# Print the result of the feature test:
+#
+feature_print_status = $(eval $(feature_print_status_code)) $(info $(MSG))
+
+define feature_print_status_code
+ ifeq ($(feature-$(1)), 1)
+ MSG = $(shell printf '...%30s: [ \033[32mon\033[m ]' $(1))
+ else
+ MSG = $(shell printf '...%30s: [ \033[31mOFF\033[m ]' $(1))
+ endif
+endef
+
+feature_print_var = $(eval $(feature_print_var_code)) $(info $(MSG))
+define feature_print_var_code
+ MSG = $(shell printf '...%30s: %s' $(1) $($(1)))
+endef
+
+feature_print_text = $(eval $(feature_print_text_code)) $(info $(MSG))
+define feature_print_text_code
+ MSG = $(shell printf '...%30s: %s' $(1) $(2))
+endef
+
+PERF_FEATURES := $(foreach feat,$(LIB_FEATURE_TESTS),feature-$(feat)($(feature-$(feat))))
+PERF_FEATURES_FILE := $(shell touch $(OUTPUT)PERF-FEATURES; cat $(OUTPUT)PERF-FEATURES)
+
+ifeq ($(dwarf-post-unwind),1)
+ PERF_FEATURES += dwarf-post-unwind($(dwarf-post-unwind-text))
+endif
+
+# The $(display_lib) controls the default detection message
+# output. It's set if:
+# - detected features differes from stored features from
+# last build (in PERF-FEATURES file)
+# - one of the $(LIB_FEATURE_TESTS) is not detected
+# - VF is enabled
+
+ifneq ("$(PERF_FEATURES)","$(PERF_FEATURES_FILE)")
+ $(shell echo "$(PERF_FEATURES)" > $(OUTPUT)PERF-FEATURES)
+ display_lib := 1
+endif
+
+feature_check = $(eval $(feature_check_code))
+define feature_check_code
+ ifneq ($(feature-$(1)), 1)
+ display_lib := 1
+ endif
+endef
+
+$(foreach feat,$(LIB_FEATURE_TESTS),$(call feature_check,$(feat)))
+
+ifeq ($(VF),1)
+ display_lib := 1
+ display_vf := 1
+endif
+
+ifeq ($(display_lib),1)
+ $(info )
+ $(info Auto-detecting system features:)
+ $(foreach feat,$(LIB_FEATURE_TESTS),$(call feature_print_status,$(feat),))
+
+ ifeq ($(dwarf-post-unwind),1)
+ $(call feature_print_text,"DWARF post unwind library", $(dwarf-post-unwind-text))
+ endif
+endif
+
+ifeq ($(display_vf),1)
+ $(foreach feat,$(VF_FEATURE_TESTS),$(call feature_print_status,$(feat),))
+ $(info )
+ $(call feature_print_var,prefix)
+ $(call feature_print_var,bindir)
+ $(call feature_print_var,libdir)
+ $(call feature_print_var,sysconfdir)
+ $(call feature_print_var,LIBUNWIND_DIR)
+ $(call feature_print_var,LIBDW_DIR)
+endif
+
+ifeq ($(display_lib),1)
+ $(info )
+endif
diff --git a/tools/perf/config/Makefile.arch b/tools/perf/config/Makefile.arch
index fef8ae92280..4b06719ee98 100644
--- a/tools/perf/config/Makefile.arch
+++ b/tools/perf/config/Makefile.arch
@@ -5,7 +5,8 @@ ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ \
-e s/arm.*/arm/ -e s/sa110/arm/ \
-e s/s390x/s390/ -e s/parisc64/parisc/ \
-e s/ppc.*/powerpc/ -e s/mips.*/mips/ \
- -e s/sh[234].*/sh/ -e s/aarch64.*/arm64/ )
+ -e s/sh[234].*/sh/ -e s/aarch64.*/arm64/ \
+ -e s/tile.*/tile/ )
# Additional ARCH settings for x86
ifeq ($(ARCH),i386)
diff --git a/tools/perf/config/feature-checks/Makefile b/tools/perf/config/feature-checks/Makefile
index 12e551346fa..64c84e5f051 100644
--- a/tools/perf/config/feature-checks/Makefile
+++ b/tools/perf/config/feature-checks/Makefile
@@ -24,9 +24,9 @@ FILES= \
test-libslang.bin \
test-libunwind.bin \
test-libunwind-debug-frame.bin \
- test-on-exit.bin \
test-stackprotector-all.bin \
- test-timerfd.bin
+ test-timerfd.bin \
+ test-libdw-dwarf-unwind.bin
CC := $(CROSS_COMPILE)gcc -MD
PKG_CONFIG := $(CROSS_COMPILE)pkg-config
@@ -121,7 +121,7 @@ test-libpython-version.bin:
$(BUILD) $(FLAGS_PYTHON_EMBED)
test-libbfd.bin:
- $(BUILD) -DPACKAGE='"perf"' -lbfd -ldl
+ $(BUILD) -DPACKAGE='"perf"' -lbfd -lz -liberty -ldl
test-liberty.bin:
$(CC) -o $(OUTPUT)$@ test-libbfd.c -DPACKAGE='"perf"' -lbfd -ldl -liberty
@@ -132,15 +132,15 @@ test-liberty-z.bin:
test-cplus-demangle.bin:
$(BUILD) -liberty
-test-on-exit.bin:
- $(BUILD)
-
test-backtrace.bin:
$(BUILD)
test-timerfd.bin:
$(BUILD)
+test-libdw-dwarf-unwind.bin:
+ $(BUILD)
+
-include *.d
###############################
diff --git a/tools/perf/config/feature-checks/test-all.c b/tools/perf/config/feature-checks/test-all.c
index 9b8a544155b..fe5c1e5c952 100644
--- a/tools/perf/config/feature-checks/test-all.c
+++ b/tools/perf/config/feature-checks/test-all.c
@@ -69,10 +69,6 @@
# include "test-libbfd.c"
#undef main
-#define main main_test_on_exit
-# include "test-on-exit.c"
-#undef main
-
#define main main_test_backtrace
# include "test-backtrace.c"
#undef main
@@ -89,6 +85,10 @@
# include "test-stackprotector-all.c"
#undef main
+#define main main_test_libdw_dwarf_unwind
+# include "test-libdw-dwarf-unwind.c"
+#undef main
+
int main(int argc, char *argv[])
{
main_test_libpython();
@@ -106,11 +106,11 @@ int main(int argc, char *argv[])
main_test_gtk2(argc, argv);
main_test_gtk2_infobar(argc, argv);
main_test_libbfd();
- main_test_on_exit();
main_test_backtrace();
main_test_libnuma();
main_test_timerfd();
main_test_stackprotector_all();
+ main_test_libdw_dwarf_unwind();
return 0;
}
diff --git a/tools/perf/config/feature-checks/test-libdw-dwarf-unwind.c b/tools/perf/config/feature-checks/test-libdw-dwarf-unwind.c
new file mode 100644
index 00000000000..f676a3ff442
--- /dev/null
+++ b/tools/perf/config/feature-checks/test-libdw-dwarf-unwind.c
@@ -0,0 +1,13 @@
+
+#include <elfutils/libdwfl.h>
+
+int main(void)
+{
+ /*
+ * This function is guarded via: __nonnull_attribute__ (1, 2).
+ * Passing '1' as arguments value. This code is never executed,
+ * only compiled.
+ */
+ dwfl_thread_getframes((void *) 1, (void *) 1, NULL);
+ return 0;
+}
diff --git a/tools/perf/config/feature-checks/test-on-exit.c b/tools/perf/config/feature-checks/test-on-exit.c
deleted file mode 100644
index 8e88b16e6de..00000000000
--- a/tools/perf/config/feature-checks/test-on-exit.c
+++ /dev/null
@@ -1,16 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-
-static void exit_fn(int status, void *__data)
-{
- printf("exit status: %d, data: %d\n", status, *(int *)__data);
-}
-
-static int data = 123;
-
-int main(void)
-{
- on_exit(exit_fn, &data);
-
- return 321;
-}
diff --git a/tools/perf/design.txt b/tools/perf/design.txt
index 67e5d0cace8..a28dca2582a 100644
--- a/tools/perf/design.txt
+++ b/tools/perf/design.txt
@@ -18,7 +18,7 @@ underlying hardware counters.
Performance counters are accessed via special file descriptors.
There's one file descriptor per virtual counter used.
-The special file descriptor is opened via the perf_event_open()
+The special file descriptor is opened via the sys_perf_event_open()
system call:
int sys_perf_event_open(struct perf_event_attr *hw_event_uptr,
@@ -82,7 +82,7 @@ machine-specific.
If 'raw_type' is 0, then the 'type' field says what kind of counter
this is, with the following encoding:
-enum perf_event_types {
+enum perf_type_id {
PERF_TYPE_HARDWARE = 0,
PERF_TYPE_SOFTWARE = 1,
PERF_TYPE_TRACEPOINT = 2,
@@ -95,7 +95,7 @@ specified by 'event_id':
* Generalized performance counter event types, used by the hw_event.event_id
* parameter of the sys_perf_event_open() syscall:
*/
-enum hw_event_ids {
+enum perf_hw_id {
/*
* Common hardware events, generalized by the kernel:
*/
@@ -129,7 +129,7 @@ software events, selected by 'event_id':
* physical and sw events of the kernel (and allow the profiling of them as
* well):
*/
-enum sw_event_ids {
+enum perf_sw_ids {
PERF_COUNT_SW_CPU_CLOCK = 0,
PERF_COUNT_SW_TASK_CLOCK = 1,
PERF_COUNT_SW_PAGE_FAULTS = 2,
@@ -230,7 +230,7 @@ these events are recorded in the ring-buffer (see below).
The 'comm' bit allows tracking of process comm data on process creation.
This too is recorded in the ring-buffer (see below).
-The 'pid' parameter to the perf_event_open() system call allows the
+The 'pid' parameter to the sys_perf_event_open() system call allows the
counter to be specific to a task:
pid == 0: if the pid parameter is zero, the counter is attached to the
@@ -260,7 +260,7 @@ The 'flags' parameter is currently unused and must be zero.
The 'group_fd' parameter allows counter "groups" to be set up. A
counter group has one counter which is the group "leader". The leader
-is created first, with group_fd = -1 in the perf_event_open call
+is created first, with group_fd = -1 in the sys_perf_event_open call
that creates it. The rest of the group members are created
subsequently, with group_fd giving the fd of the group leader.
(A single counter on its own is created with group_fd = -1 and is
@@ -454,7 +454,6 @@ So to start with, in order to add HAVE_PERF_EVENTS to your Kconfig, you
will need at least this:
- asm/perf_event.h - a basic stub will suffice at first
- support for atomic64 types (and associated helper functions)
- - set_perf_event_pending() implemented
If your architecture does have hardware capabilities, you can override the
weak stub hw_perf_event_init() to register hardware counters.
diff --git a/tools/perf/perf-completion.sh b/tools/perf/perf-completion.sh
index 496e2abb548..33569847fdc 100644
--- a/tools/perf/perf-completion.sh
+++ b/tools/perf/perf-completion.sh
@@ -121,9 +121,9 @@ __perf_main ()
elif [[ $prev == "-e" && "${words[1]}" == @(record|stat|top) ]]; then
evts=$($cmd list --raw-dump)
__perfcomp_colon "$evts" "$cur"
- # List subcommands for 'perf kvm'
- elif [[ $prev == "kvm" ]]; then
- subcmds="top record report diff buildid-list stat"
+ # List subcommands for perf commands
+ elif [[ $prev == @(kvm|kmem|mem|lock|sched) ]]; then
+ subcmds=$($cmd $prev --list-cmds)
__perfcomp_colon "$subcmds" "$cur"
# List long option names
elif [[ $cur == --* ]]; then
diff --git a/tools/perf/perf-sys.h b/tools/perf/perf-sys.h
new file mode 100644
index 00000000000..5268a1481d2
--- /dev/null
+++ b/tools/perf/perf-sys.h
@@ -0,0 +1,190 @@
+#ifndef _PERF_SYS_H
+#define _PERF_SYS_H
+
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/syscall.h>
+#include <linux/types.h>
+#include <linux/perf_event.h>
+#include <asm/unistd.h>
+
+#if defined(__i386__)
+#define mb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
+#define wmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
+#define rmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
+#define cpu_relax() asm volatile("rep; nop" ::: "memory");
+#define CPUINFO_PROC "model name"
+#ifndef __NR_perf_event_open
+# define __NR_perf_event_open 336
+#endif
+#ifndef __NR_futex
+# define __NR_futex 240
+#endif
+#ifndef __NR_gettid
+# define __NR_gettid 224
+#endif
+#endif
+
+#if defined(__x86_64__)
+#define mb() asm volatile("mfence" ::: "memory")
+#define wmb() asm volatile("sfence" ::: "memory")
+#define rmb() asm volatile("lfence" ::: "memory")
+#define cpu_relax() asm volatile("rep; nop" ::: "memory");
+#define CPUINFO_PROC "model name"
+#ifndef __NR_perf_event_open
+# define __NR_perf_event_open 298
+#endif
+#ifndef __NR_futex
+# define __NR_futex 202
+#endif
+#ifndef __NR_gettid
+# define __NR_gettid 186
+#endif
+#endif
+
+#ifdef __powerpc__
+#include "../../arch/powerpc/include/uapi/asm/unistd.h"
+#define mb() asm volatile ("sync" ::: "memory")
+#define wmb() asm volatile ("sync" ::: "memory")
+#define rmb() asm volatile ("sync" ::: "memory")
+#define CPUINFO_PROC "cpu"
+#endif
+
+#ifdef __s390__
+#define mb() asm volatile("bcr 15,0" ::: "memory")
+#define wmb() asm volatile("bcr 15,0" ::: "memory")
+#define rmb() asm volatile("bcr 15,0" ::: "memory")
+#endif
+
+#ifdef __sh__
+#if defined(__SH4A__) || defined(__SH5__)
+# define mb() asm volatile("synco" ::: "memory")
+# define wmb() asm volatile("synco" ::: "memory")
+# define rmb() asm volatile("synco" ::: "memory")
+#else
+# define mb() asm volatile("" ::: "memory")
+# define wmb() asm volatile("" ::: "memory")
+# define rmb() asm volatile("" ::: "memory")
+#endif
+#define CPUINFO_PROC "cpu type"
+#endif
+
+#ifdef __hppa__
+#define mb() asm volatile("" ::: "memory")
+#define wmb() asm volatile("" ::: "memory")
+#define rmb() asm volatile("" ::: "memory")
+#define CPUINFO_PROC "cpu"
+#endif
+
+#ifdef __sparc__
+#ifdef __LP64__
+#define mb() asm volatile("ba,pt %%xcc, 1f\n" \
+ "membar #StoreLoad\n" \
+ "1:\n":::"memory")
+#else
+#define mb() asm volatile("":::"memory")
+#endif
+#define wmb() asm volatile("":::"memory")
+#define rmb() asm volatile("":::"memory")
+#define CPUINFO_PROC "cpu"
+#endif
+
+#ifdef __alpha__
+#define mb() asm volatile("mb" ::: "memory")
+#define wmb() asm volatile("wmb" ::: "memory")
+#define rmb() asm volatile("mb" ::: "memory")
+#define CPUINFO_PROC "cpu model"
+#endif
+
+#ifdef __ia64__
+#define mb() asm volatile ("mf" ::: "memory")
+#define wmb() asm volatile ("mf" ::: "memory")
+#define rmb() asm volatile ("mf" ::: "memory")
+#define cpu_relax() asm volatile ("hint @pause" ::: "memory")
+#define CPUINFO_PROC "model name"
+#endif
+
+#ifdef __arm__
+/*
+ * Use the __kuser_memory_barrier helper in the CPU helper page. See
+ * arch/arm/kernel/entry-armv.S in the kernel source for details.
+ */
+#define mb() ((void(*)(void))0xffff0fa0)()
+#define wmb() ((void(*)(void))0xffff0fa0)()
+#define rmb() ((void(*)(void))0xffff0fa0)()
+#define CPUINFO_PROC "Processor"
+#endif
+
+#ifdef __aarch64__
+#define mb() asm volatile("dmb ish" ::: "memory")
+#define wmb() asm volatile("dmb ishst" ::: "memory")
+#define rmb() asm volatile("dmb ishld" ::: "memory")
+#define cpu_relax() asm volatile("yield" ::: "memory")
+#endif
+
+#ifdef __mips__
+#define mb() asm volatile( \
+ ".set mips2\n\t" \
+ "sync\n\t" \
+ ".set mips0" \
+ : /* no output */ \
+ : /* no input */ \
+ : "memory")
+#define wmb() mb()
+#define rmb() mb()
+#define CPUINFO_PROC "cpu model"
+#endif
+
+#ifdef __arc__
+#define mb() asm volatile("" ::: "memory")
+#define wmb() asm volatile("" ::: "memory")
+#define rmb() asm volatile("" ::: "memory")
+#define CPUINFO_PROC "Processor"
+#endif
+
+#ifdef __metag__
+#define mb() asm volatile("" ::: "memory")
+#define wmb() asm volatile("" ::: "memory")
+#define rmb() asm volatile("" ::: "memory")
+#define CPUINFO_PROC "CPU"
+#endif
+
+#ifdef __xtensa__
+#define mb() asm volatile("memw" ::: "memory")
+#define wmb() asm volatile("memw" ::: "memory")
+#define rmb() asm volatile("" ::: "memory")
+#define CPUINFO_PROC "core ID"
+#endif
+
+#ifdef __tile__
+#define mb() asm volatile ("mf" ::: "memory")
+#define wmb() asm volatile ("mf" ::: "memory")
+#define rmb() asm volatile ("mf" ::: "memory")
+#define cpu_relax() asm volatile ("mfspr zero, PASS" ::: "memory")
+#define CPUINFO_PROC "model name"
+#endif
+
+#define barrier() asm volatile ("" ::: "memory")
+
+#ifndef cpu_relax
+#define cpu_relax() barrier()
+#endif
+
+static inline int
+sys_perf_event_open(struct perf_event_attr *attr,
+ pid_t pid, int cpu, int group_fd,
+ unsigned long flags)
+{
+ int fd;
+
+ fd = syscall(__NR_perf_event_open, attr, pid, cpu,
+ group_fd, flags);
+
+#ifdef HAVE_ATTR_TEST
+ if (unlikely(test_attr__enabled))
+ test_attr__open(attr, pid, cpu, fd, group_fd, flags);
+#endif
+ return fd;
+}
+
+#endif /* _PERF_SYS_H */
diff --git a/tools/perf/perf.c b/tools/perf/perf.c
index 431798a4110..95c58fc1528 100644
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@@ -458,6 +458,7 @@ int main(int argc, const char **argv)
/* The page_size is placed in util object. */
page_size = sysconf(_SC_PAGE_SIZE);
+ cacheline_size = sysconf(_SC_LEVEL1_DCACHE_LINESIZE);
cmd = perf_extract_argv0_path(argv[0]);
if (!cmd)
@@ -481,14 +482,18 @@ int main(int argc, const char **argv)
fprintf(stderr, "cannot handle %s internally", cmd);
goto out;
}
-#ifdef HAVE_LIBAUDIT_SUPPORT
if (!prefixcmp(cmd, "trace")) {
+#ifdef HAVE_LIBAUDIT_SUPPORT
set_buildid_dir();
setup_path();
argv[0] = "trace";
return cmd_trace(argc, argv, NULL);
- }
+#else
+ fprintf(stderr,
+ "trace command not available: missing audit-libs devel package at build time.\n");
+ goto out;
#endif
+ }
/* Look for flags.. */
argv++;
argc--;
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 7daa806d905..510c65f7285 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -1,168 +1,18 @@
#ifndef _PERF_PERF_H
#define _PERF_PERF_H
-#include <asm/unistd.h>
-
-#if defined(__i386__)
-#define mb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
-#define wmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
-#define rmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
-#define cpu_relax() asm volatile("rep; nop" ::: "memory");
-#define CPUINFO_PROC "model name"
-#ifndef __NR_perf_event_open
-# define __NR_perf_event_open 336
-#endif
-#endif
-
-#if defined(__x86_64__)
-#define mb() asm volatile("mfence" ::: "memory")
-#define wmb() asm volatile("sfence" ::: "memory")
-#define rmb() asm volatile("lfence" ::: "memory")
-#define cpu_relax() asm volatile("rep; nop" ::: "memory");
-#define CPUINFO_PROC "model name"
-#ifndef __NR_perf_event_open
-# define __NR_perf_event_open 298
-#endif
-#endif
-
-#ifdef __powerpc__
-#include "../../arch/powerpc/include/uapi/asm/unistd.h"
-#define mb() asm volatile ("sync" ::: "memory")
-#define wmb() asm volatile ("sync" ::: "memory")
-#define rmb() asm volatile ("sync" ::: "memory")
-#define CPUINFO_PROC "cpu"
-#endif
-
-#ifdef __s390__
-#define mb() asm volatile("bcr 15,0" ::: "memory")
-#define wmb() asm volatile("bcr 15,0" ::: "memory")
-#define rmb() asm volatile("bcr 15,0" ::: "memory")
-#endif
-
-#ifdef __sh__
-#if defined(__SH4A__) || defined(__SH5__)
-# define mb() asm volatile("synco" ::: "memory")
-# define wmb() asm volatile("synco" ::: "memory")
-# define rmb() asm volatile("synco" ::: "memory")
-#else
-# define mb() asm volatile("" ::: "memory")
-# define wmb() asm volatile("" ::: "memory")
-# define rmb() asm volatile("" ::: "memory")
-#endif
-#define CPUINFO_PROC "cpu type"
-#endif
-
-#ifdef __hppa__
-#define mb() asm volatile("" ::: "memory")
-#define wmb() asm volatile("" ::: "memory")
-#define rmb() asm volatile("" ::: "memory")
-#define CPUINFO_PROC "cpu"
-#endif
-
-#ifdef __sparc__
-#ifdef __LP64__
-#define mb() asm volatile("ba,pt %%xcc, 1f\n" \
- "membar #StoreLoad\n" \
- "1:\n":::"memory")
-#else
-#define mb() asm volatile("":::"memory")
-#endif
-#define wmb() asm volatile("":::"memory")
-#define rmb() asm volatile("":::"memory")
-#define CPUINFO_PROC "cpu"
-#endif
-
-#ifdef __alpha__
-#define mb() asm volatile("mb" ::: "memory")
-#define wmb() asm volatile("wmb" ::: "memory")
-#define rmb() asm volatile("mb" ::: "memory")
-#define CPUINFO_PROC "cpu model"
-#endif
-
-#ifdef __ia64__
-#define mb() asm volatile ("mf" ::: "memory")
-#define wmb() asm volatile ("mf" ::: "memory")
-#define rmb() asm volatile ("mf" ::: "memory")
-#define cpu_relax() asm volatile ("hint @pause" ::: "memory")
-#define CPUINFO_PROC "model name"
-#endif
-
-#ifdef __arm__
-/*
- * Use the __kuser_memory_barrier helper in the CPU helper page. See
- * arch/arm/kernel/entry-armv.S in the kernel source for details.
- */
-#define mb() ((void(*)(void))0xffff0fa0)()
-#define wmb() ((void(*)(void))0xffff0fa0)()
-#define rmb() ((void(*)(void))0xffff0fa0)()
-#define CPUINFO_PROC "Processor"
-#endif
-
-#ifdef __aarch64__
-#define mb() asm volatile("dmb ish" ::: "memory")
-#define wmb() asm volatile("dmb ishld" ::: "memory")
-#define rmb() asm volatile("dmb ishst" ::: "memory")
-#define cpu_relax() asm volatile("yield" ::: "memory")
-#endif
-
-#ifdef __mips__
-#define mb() asm volatile( \
- ".set mips2\n\t" \
- "sync\n\t" \
- ".set mips0" \
- : /* no output */ \
- : /* no input */ \
- : "memory")
-#define wmb() mb()
-#define rmb() mb()
-#define CPUINFO_PROC "cpu model"
-#endif
-
-#ifdef __arc__
-#define mb() asm volatile("" ::: "memory")
-#define wmb() asm volatile("" ::: "memory")
-#define rmb() asm volatile("" ::: "memory")
-#define CPUINFO_PROC "Processor"
-#endif
-
-#ifdef __metag__
-#define mb() asm volatile("" ::: "memory")
-#define wmb() asm volatile("" ::: "memory")
-#define rmb() asm volatile("" ::: "memory")
-#define CPUINFO_PROC "CPU"
-#endif
-
-#ifdef __xtensa__
-#define mb() asm volatile("memw" ::: "memory")
-#define wmb() asm volatile("memw" ::: "memory")
-#define rmb() asm volatile("" ::: "memory")
-#define CPUINFO_PROC "core ID"
-#endif
-
-#define barrier() asm volatile ("" ::: "memory")
-
-#ifndef cpu_relax
-#define cpu_relax() barrier()
-#endif
-
-#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
-
-
#include <time.h>
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/syscall.h>
-
-#include <linux/perf_event.h>
-#include "util/types.h"
#include <stdbool.h>
+#include <linux/types.h>
+#include <linux/perf_event.h>
+
+extern bool test_attr__enabled;
+void test_attr__init(void);
+void test_attr__open(struct perf_event_attr *attr, pid_t pid, int cpu,
+ int fd, int group_fd, unsigned long flags);
-/*
- * prctl(PR_TASK_PERF_EVENTS_DISABLE) will (cheaply) disable all
- * counters in the current task.
- */
-#define PR_TASK_PERF_EVENTS_DISABLE 31
-#define PR_TASK_PERF_EVENTS_ENABLE 32
+#define HAVE_ATTR_TEST
+#include "perf-sys.h"
#ifndef NSEC_PER_SEC
# define NSEC_PER_SEC 1000000000ULL
@@ -179,67 +29,8 @@ static inline unsigned long long rdclock(void)
return ts.tv_sec * 1000000000ULL + ts.tv_nsec;
}
-/*
- * Pick up some kernel type conventions:
- */
-#define __user
-#define asmlinkage
-
-#define unlikely(x) __builtin_expect(!!(x), 0)
-#define min(x, y) ({ \
- typeof(x) _min1 = (x); \
- typeof(y) _min2 = (y); \
- (void) (&_min1 == &_min2); \
- _min1 < _min2 ? _min1 : _min2; })
-
-extern bool test_attr__enabled;
-void test_attr__init(void);
-void test_attr__open(struct perf_event_attr *attr, pid_t pid, int cpu,
- int fd, int group_fd, unsigned long flags);
-
-static inline int
-sys_perf_event_open(struct perf_event_attr *attr,
- pid_t pid, int cpu, int group_fd,
- unsigned long flags)
-{
- int fd;
-
- fd = syscall(__NR_perf_event_open, attr, pid, cpu,
- group_fd, flags);
-
- if (unlikely(test_attr__enabled))
- test_attr__open(attr, pid, cpu, fd, group_fd, flags);
-
- return fd;
-}
-
-#define MAX_COUNTERS 256
#define MAX_NR_CPUS 256
-struct ip_callchain {
- u64 nr;
- u64 ips[0];
-};
-
-struct branch_flags {
- u64 mispred:1;
- u64 predicted:1;
- u64 in_tx:1;
- u64 abort:1;
- u64 reserved:60;
-};
-
-struct branch_entry {
- u64 from;
- u64 to;
- struct branch_flags flags;
-};
-
-struct branch_stack {
- u64 nr;
- struct branch_entry entries[0];
-};
-
extern const char *input_name;
extern bool perf_host, perf_guest;
extern const char perf_version_string[];
@@ -248,15 +39,10 @@ void pthread__unblock_sigwinch(void);
#include "util/target.h"
-enum perf_call_graph_mode {
- CALLCHAIN_NONE,
- CALLCHAIN_FP,
- CALLCHAIN_DWARF
-};
-
struct record_opts {
struct target target;
int call_graph;
+ bool call_graph_enabled;
bool group;
bool inherit_stat;
bool no_buffering;
diff --git a/tools/perf/tests/attr.c b/tools/perf/tests/attr.c
index 00218f503b2..2dfc9ad0e6f 100644
--- a/tools/perf/tests/attr.c
+++ b/tools/perf/tests/attr.c
@@ -1,4 +1,3 @@
-
/*
* The struct perf_event_attr test support.
*
@@ -19,14 +18,8 @@
* permissions. All the event text files are stored there.
*/
-/*
- * Powerpc needs __SANE_USERSPACE_TYPES__ before <linux/types.h> to select
- * 'int-ll64.h' and avoid compile warnings when printing __u64 with %llu.
- */
-#define __SANE_USERSPACE_TYPES__
#include <stdlib.h>
#include <stdio.h>
-#include <inttypes.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include "../perf.h"
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 1e67437fb4c..6f8b01bc603 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -3,6 +3,8 @@
*
* Builtin regression testing command: ever growing number of sanity tests
*/
+#include <unistd.h>
+#include <string.h>
#include "builtin.h"
#include "intlist.h"
#include "tests.h"
@@ -50,10 +52,18 @@ static struct test {
.func = test__pmu,
},
{
- .desc = "Test dso data interface",
+ .desc = "Test dso data read",
.func = test__dso_data,
},
{
+ .desc = "Test dso data cache",
+ .func = test__dso_data_cache,
+ },
+ {
+ .desc = "Test dso data reopen",
+ .func = test__dso_data_reopen,
+ },
+ {
.desc = "roundtrip evsel->name check",
.func = test__perf_evsel__roundtrip_name_test,
},
@@ -115,6 +125,34 @@ static struct test {
.desc = "Test parsing with no sample_id_all bit set",
.func = test__parse_no_sample_id_all,
},
+#if defined(__x86_64__) || defined(__i386__) || defined(__arm__)
+#ifdef HAVE_DWARF_UNWIND_SUPPORT
+ {
+ .desc = "Test dwarf unwind",
+ .func = test__dwarf_unwind,
+ },
+#endif
+#endif
+ {
+ .desc = "Test filtering hist entries",
+ .func = test__hists_filter,
+ },
+ {
+ .desc = "Test mmap thread lookup",
+ .func = test__mmap_thread_lookup,
+ },
+ {
+ .desc = "Test thread mg sharing",
+ .func = test__thread_mg_share,
+ },
+ {
+ .desc = "Test output sorting of hist entries",
+ .func = test__hists_output,
+ },
+ {
+ .desc = "Test cumulation of child hist entries",
+ .func = test__hists_cumulate,
+ },
{
.func = NULL,
},
@@ -144,6 +182,34 @@ static bool perf_test__matches(int curr, int argc, const char *argv[])
return false;
}
+static int run_test(struct test *test)
+{
+ int status, err = -1, child = fork();
+
+ if (child < 0) {
+ pr_err("failed to fork test: %s\n", strerror(errno));
+ return -1;
+ }
+
+ if (!child) {
+ pr_debug("test child forked, pid %d\n", getpid());
+ err = test->func();
+ exit(err);
+ }
+
+ wait(&status);
+
+ if (WIFEXITED(status)) {
+ err = WEXITSTATUS(status);
+ pr_debug("test child finished with %d\n", err);
+ } else if (WIFSIGNALED(status)) {
+ err = -1;
+ pr_debug("test child interrupted\n");
+ }
+
+ return err;
+}
+
static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist)
{
int i = 0;
@@ -172,7 +238,7 @@ static int __cmd_test(int argc, const char *argv[], struct intlist *skiplist)
}
pr_debug("\n--- start ---\n");
- err = tests[curr].func();
+ err = run_test(&tests[curr]);
pr_debug("---- end ----\n%s:", tests[curr].desc);
switch (err) {
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index 653a8fe2db9..67f2d632355 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -1,8 +1,7 @@
-#include <sys/types.h>
+#include <linux/types.h>
#include <stdlib.h>
#include <unistd.h>
#include <stdio.h>
-#include <inttypes.h>
#include <ctype.h>
#include <string.h>
@@ -257,7 +256,7 @@ static int process_sample_event(struct machine *machine,
return -1;
}
- thread = machine__findnew_thread(machine, sample.pid, sample.pid);
+ thread = machine__findnew_thread(machine, sample.pid, sample.tid);
if (!thread) {
pr_debug("machine__findnew_thread failed\n");
return -1;
@@ -504,6 +503,7 @@ static int do_test_code_reading(bool try_kcore)
if (ret < 0) {
if (!excl_kernel) {
excl_kernel = true;
+ perf_evlist__set_maps(evlist, NULL, NULL);
perf_evlist__delete(evlist);
evlist = NULL;
continue;
diff --git a/tools/perf/tests/dso-data.c b/tools/perf/tests/dso-data.c
index 9cc81a3eb9b..630808cd7cc 100644
--- a/tools/perf/tests/dso-data.c
+++ b/tools/perf/tests/dso-data.c
@@ -1,22 +1,27 @@
-#include "util.h"
-
#include <stdlib.h>
-#include <sys/types.h>
+#include <linux/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <string.h>
-
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <api/fs/fs.h>
+#include "util.h"
#include "machine.h"
#include "symbol.h"
#include "tests.h"
static char *test_file(int size)
{
- static char buf_templ[] = "/tmp/test-XXXXXX";
+#define TEMPL "/tmp/perf-test-XXXXXX"
+ static char buf_templ[sizeof(TEMPL)];
char *templ = buf_templ;
int fd, i;
unsigned char *buf;
+ strcpy(buf_templ, TEMPL);
+#undef TEMPL
+
fd = mkstemp(templ);
if (fd < 0) {
perror("mkstemp failed");
@@ -150,3 +155,204 @@ int test__dso_data(void)
unlink(file);
return 0;
}
+
+static long open_files_cnt(void)
+{
+ char path[PATH_MAX];
+ struct dirent *dent;
+ DIR *dir;
+ long nr = 0;
+
+ scnprintf(path, PATH_MAX, "%s/self/fd", procfs__mountpoint());
+ pr_debug("fd path: %s\n", path);
+
+ dir = opendir(path);
+ TEST_ASSERT_VAL("failed to open fd directory", dir);
+
+ while ((dent = readdir(dir)) != NULL) {
+ if (!strcmp(dent->d_name, ".") ||
+ !strcmp(dent->d_name, ".."))
+ continue;
+
+ nr++;
+ }
+
+ closedir(dir);
+ return nr - 1;
+}
+
+static struct dso **dsos;
+
+static int dsos__create(int cnt, int size)
+{
+ int i;
+
+ dsos = malloc(sizeof(dsos) * cnt);
+ TEST_ASSERT_VAL("failed to alloc dsos array", dsos);
+
+ for (i = 0; i < cnt; i++) {
+ char *file;
+
+ file = test_file(size);
+ TEST_ASSERT_VAL("failed to get dso file", file);
+
+ dsos[i] = dso__new(file);
+ TEST_ASSERT_VAL("failed to get dso", dsos[i]);
+ }
+
+ return 0;
+}
+
+static void dsos__delete(int cnt)
+{
+ int i;
+
+ for (i = 0; i < cnt; i++) {
+ struct dso *dso = dsos[i];
+
+ unlink(dso->name);
+ dso__delete(dso);
+ }
+
+ free(dsos);
+}
+
+static int set_fd_limit(int n)
+{
+ struct rlimit rlim;
+
+ if (getrlimit(RLIMIT_NOFILE, &rlim))
+ return -1;
+
+ pr_debug("file limit %ld, new %d\n", (long) rlim.rlim_cur, n);
+
+ rlim.rlim_cur = n;
+ return setrlimit(RLIMIT_NOFILE, &rlim);
+}
+
+int test__dso_data_cache(void)
+{
+ struct machine machine;
+ long nr_end, nr = open_files_cnt();
+ int dso_cnt, limit, i, fd;
+
+ memset(&machine, 0, sizeof(machine));
+
+ /* set as system limit */
+ limit = nr * 4;
+ TEST_ASSERT_VAL("failed to set file limit", !set_fd_limit(limit));
+
+ /* and this is now our dso open FDs limit + 1 extra */
+ dso_cnt = limit / 2 + 1;
+ TEST_ASSERT_VAL("failed to create dsos\n",
+ !dsos__create(dso_cnt, TEST_FILE_SIZE));
+
+ for (i = 0; i < (dso_cnt - 1); i++) {
+ struct dso *dso = dsos[i];
+
+ /*
+ * Open dsos via dso__data_fd or dso__data_read_offset.
+ * Both opens the data file and keep it open.
+ */
+ if (i % 2) {
+ fd = dso__data_fd(dso, &machine);
+ TEST_ASSERT_VAL("failed to get fd", fd > 0);
+ } else {
+ #define BUFSIZE 10
+ u8 buf[BUFSIZE];
+ ssize_t n;
+
+ n = dso__data_read_offset(dso, &machine, 0, buf, BUFSIZE);
+ TEST_ASSERT_VAL("failed to read dso", n == BUFSIZE);
+ }
+ }
+
+ /* open +1 dso over the allowed limit */
+ fd = dso__data_fd(dsos[i], &machine);
+ TEST_ASSERT_VAL("failed to get fd", fd > 0);
+
+ /* should force the first one to be closed */
+ TEST_ASSERT_VAL("failed to close dsos[0]", dsos[0]->data.fd == -1);
+
+ /* cleanup everything */
+ dsos__delete(dso_cnt);
+
+ /* Make sure we did not leak any file descriptor. */
+ nr_end = open_files_cnt();
+ pr_debug("nr start %ld, nr stop %ld\n", nr, nr_end);
+ TEST_ASSERT_VAL("failed leadking files", nr == nr_end);
+ return 0;
+}
+
+int test__dso_data_reopen(void)
+{
+ struct machine machine;
+ long nr_end, nr = open_files_cnt();
+ int fd, fd_extra;
+
+#define dso_0 (dsos[0])
+#define dso_1 (dsos[1])
+#define dso_2 (dsos[2])
+
+ memset(&machine, 0, sizeof(machine));
+
+ /*
+ * Test scenario:
+ * - create 3 dso objects
+ * - set process file descriptor limit to current
+ * files count + 3
+ * - test that the first dso gets closed when we
+ * reach the files count limit
+ */
+
+ /* Make sure we are able to open 3 fds anyway */
+ TEST_ASSERT_VAL("failed to set file limit",
+ !set_fd_limit((nr + 3)));
+
+ TEST_ASSERT_VAL("failed to create dsos\n", !dsos__create(3, TEST_FILE_SIZE));
+
+ /* open dso_0 */
+ fd = dso__data_fd(dso_0, &machine);
+ TEST_ASSERT_VAL("failed to get fd", fd > 0);
+
+ /* open dso_1 */
+ fd = dso__data_fd(dso_1, &machine);
+ TEST_ASSERT_VAL("failed to get fd", fd > 0);
+
+ /*
+ * open extra file descriptor and we just
+ * reached the files count limit
+ */
+ fd_extra = open("/dev/null", O_RDONLY);
+ TEST_ASSERT_VAL("failed to open extra fd", fd_extra > 0);
+
+ /* open dso_2 */
+ fd = dso__data_fd(dso_2, &machine);
+ TEST_ASSERT_VAL("failed to get fd", fd > 0);
+
+ /*
+ * dso_0 should get closed, because we reached
+ * the file descriptor limit
+ */
+ TEST_ASSERT_VAL("failed to close dso_0", dso_0->data.fd == -1);
+
+ /* open dso_0 */
+ fd = dso__data_fd(dso_0, &machine);
+ TEST_ASSERT_VAL("failed to get fd", fd > 0);
+
+ /*
+ * dso_1 should get closed, because we reached
+ * the file descriptor limit
+ */
+ TEST_ASSERT_VAL("failed to close dso_1", dso_1->data.fd == -1);
+
+ /* cleanup everything */
+ close(fd_extra);
+ dsos__delete(3);
+
+ /* Make sure we did not leak any file descriptor. */
+ nr_end = open_files_cnt();
+ pr_debug("nr start %ld, nr stop %ld\n", nr, nr_end);
+ TEST_ASSERT_VAL("failed leadking files", nr == nr_end);
+ return 0;
+}
diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c
new file mode 100644
index 00000000000..96adb730b74
--- /dev/null
+++ b/tools/perf/tests/dwarf-unwind.c
@@ -0,0 +1,144 @@
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <unistd.h>
+#include "tests.h"
+#include "debug.h"
+#include "machine.h"
+#include "event.h"
+#include "unwind.h"
+#include "perf_regs.h"
+#include "map.h"
+#include "thread.h"
+
+static int mmap_handler(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine)
+{
+ return machine__process_mmap2_event(machine, event, NULL);
+}
+
+static int init_live_machine(struct machine *machine)
+{
+ union perf_event event;
+ pid_t pid = getpid();
+
+ return perf_event__synthesize_mmap_events(NULL, &event, pid, pid,
+ mmap_handler, machine, true);
+}
+
+#define MAX_STACK 6
+
+static int unwind_entry(struct unwind_entry *entry, void *arg)
+{
+ unsigned long *cnt = (unsigned long *) arg;
+ char *symbol = entry->sym ? entry->sym->name : NULL;
+ static const char *funcs[MAX_STACK] = {
+ "test__arch_unwind_sample",
+ "unwind_thread",
+ "krava_3",
+ "krava_2",
+ "krava_1",
+ "test__dwarf_unwind"
+ };
+
+ if (*cnt >= MAX_STACK) {
+ pr_debug("failed: crossed the max stack value %d\n", MAX_STACK);
+ return -1;
+ }
+
+ if (!symbol) {
+ pr_debug("failed: got unresolved address 0x%" PRIx64 "\n",
+ entry->ip);
+ return -1;
+ }
+
+ pr_debug("got: %s 0x%" PRIx64 "\n", symbol, entry->ip);
+ return strcmp((const char *) symbol, funcs[(*cnt)++]);
+}
+
+__attribute__ ((noinline))
+static int unwind_thread(struct thread *thread, struct machine *machine)
+{
+ struct perf_sample sample;
+ unsigned long cnt = 0;
+ int err = -1;
+
+ memset(&sample, 0, sizeof(sample));
+
+ if (test__arch_unwind_sample(&sample, thread)) {
+ pr_debug("failed to get unwind sample\n");
+ goto out;
+ }
+
+ err = unwind__get_entries(unwind_entry, &cnt, machine, thread,
+ &sample, MAX_STACK);
+ if (err)
+ pr_debug("unwind failed\n");
+ else if (cnt != MAX_STACK) {
+ pr_debug("got wrong number of stack entries %lu != %d\n",
+ cnt, MAX_STACK);
+ err = -1;
+ }
+
+ out:
+ free(sample.user_stack.data);
+ free(sample.user_regs.regs);
+ return err;
+}
+
+__attribute__ ((noinline))
+static int krava_3(struct thread *thread, struct machine *machine)
+{
+ return unwind_thread(thread, machine);
+}
+
+__attribute__ ((noinline))
+static int krava_2(struct thread *thread, struct machine *machine)
+{
+ return krava_3(thread, machine);
+}
+
+__attribute__ ((noinline))
+static int krava_1(struct thread *thread, struct machine *machine)
+{
+ return krava_2(thread, machine);
+}
+
+int test__dwarf_unwind(void)
+{
+ struct machines machines;
+ struct machine *machine;
+ struct thread *thread;
+ int err = -1;
+
+ machines__init(&machines);
+
+ machine = machines__find(&machines, HOST_KERNEL_ID);
+ if (!machine) {
+ pr_err("Could not get machine\n");
+ return -1;
+ }
+
+ if (init_live_machine(machine)) {
+ pr_err("Could not init machine\n");
+ goto out;
+ }
+
+ if (verbose > 1)
+ machine__fprintf(machine, stderr);
+
+ thread = machine__find_thread(machine, getpid(), getpid());
+ if (!thread) {
+ pr_err("Could not get thread\n");
+ goto out;
+ }
+
+ err = krava_1(thread, machine);
+
+ out:
+ machine__delete_threads(machine);
+ machine__exit(machine);
+ machines__exit(&machines);
+ return err;
+}
diff --git a/tools/perf/tests/evsel-tp-sched.c b/tools/perf/tests/evsel-tp-sched.c
index 4774f7fbb75..35d7fdb2328 100644
--- a/tools/perf/tests/evsel-tp-sched.c
+++ b/tools/perf/tests/evsel-tp-sched.c
@@ -74,9 +74,6 @@ int test__perf_evsel__tp_sched_test(void)
if (perf_evsel__test_field(evsel, "prio", 4, true))
ret = -1;
- if (perf_evsel__test_field(evsel, "success", 4, true))
- ret = -1;
-
if (perf_evsel__test_field(evsel, "target_cpu", 4, true))
ret = -1;
diff --git a/tools/perf/tests/hists_common.c b/tools/perf/tests/hists_common.c
new file mode 100644
index 00000000000..a62c0913451
--- /dev/null
+++ b/tools/perf/tests/hists_common.c
@@ -0,0 +1,209 @@
+#include "perf.h"
+#include "util/debug.h"
+#include "util/symbol.h"
+#include "util/sort.h"
+#include "util/evsel.h"
+#include "util/evlist.h"
+#include "util/machine.h"
+#include "util/thread.h"
+#include "tests/hists_common.h"
+
+static struct {
+ u32 pid;
+ const char *comm;
+} fake_threads[] = {
+ { FAKE_PID_PERF1, "perf" },
+ { FAKE_PID_PERF2, "perf" },
+ { FAKE_PID_BASH, "bash" },
+};
+
+static struct {
+ u32 pid;
+ u64 start;
+ const char *filename;
+} fake_mmap_info[] = {
+ { FAKE_PID_PERF1, FAKE_MAP_PERF, "perf" },
+ { FAKE_PID_PERF1, FAKE_MAP_LIBC, "libc" },
+ { FAKE_PID_PERF1, FAKE_MAP_KERNEL, "[kernel]" },
+ { FAKE_PID_PERF2, FAKE_MAP_PERF, "perf" },
+ { FAKE_PID_PERF2, FAKE_MAP_LIBC, "libc" },
+ { FAKE_PID_PERF2, FAKE_MAP_KERNEL, "[kernel]" },
+ { FAKE_PID_BASH, FAKE_MAP_BASH, "bash" },
+ { FAKE_PID_BASH, FAKE_MAP_LIBC, "libc" },
+ { FAKE_PID_BASH, FAKE_MAP_KERNEL, "[kernel]" },
+};
+
+struct fake_sym {
+ u64 start;
+ u64 length;
+ const char *name;
+};
+
+static struct fake_sym perf_syms[] = {
+ { FAKE_SYM_OFFSET1, FAKE_SYM_LENGTH, "main" },
+ { FAKE_SYM_OFFSET2, FAKE_SYM_LENGTH, "run_command" },
+ { FAKE_SYM_OFFSET3, FAKE_SYM_LENGTH, "cmd_record" },
+};
+
+static struct fake_sym bash_syms[] = {
+ { FAKE_SYM_OFFSET1, FAKE_SYM_LENGTH, "main" },
+ { FAKE_SYM_OFFSET2, FAKE_SYM_LENGTH, "xmalloc" },
+ { FAKE_SYM_OFFSET3, FAKE_SYM_LENGTH, "xfree" },
+};
+
+static struct fake_sym libc_syms[] = {
+ { 700, 100, "malloc" },
+ { 800, 100, "free" },
+ { 900, 100, "realloc" },
+ { FAKE_SYM_OFFSET1, FAKE_SYM_LENGTH, "malloc" },
+ { FAKE_SYM_OFFSET2, FAKE_SYM_LENGTH, "free" },
+ { FAKE_SYM_OFFSET3, FAKE_SYM_LENGTH, "realloc" },
+};
+
+static struct fake_sym kernel_syms[] = {
+ { FAKE_SYM_OFFSET1, FAKE_SYM_LENGTH, "schedule" },
+ { FAKE_SYM_OFFSET2, FAKE_SYM_LENGTH, "page_fault" },
+ { FAKE_SYM_OFFSET3, FAKE_SYM_LENGTH, "sys_perf_event_open" },
+};
+
+static struct {
+ const char *dso_name;
+ struct fake_sym *syms;
+ size_t nr_syms;
+} fake_symbols[] = {
+ { "perf", perf_syms, ARRAY_SIZE(perf_syms) },
+ { "bash", bash_syms, ARRAY_SIZE(bash_syms) },
+ { "libc", libc_syms, ARRAY_SIZE(libc_syms) },
+ { "[kernel]", kernel_syms, ARRAY_SIZE(kernel_syms) },
+};
+
+struct machine *setup_fake_machine(struct machines *machines)
+{
+ struct machine *machine = machines__find(machines, HOST_KERNEL_ID);
+ size_t i;
+
+ if (machine == NULL) {
+ pr_debug("Not enough memory for machine setup\n");
+ return NULL;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(fake_threads); i++) {
+ struct thread *thread;
+
+ thread = machine__findnew_thread(machine, fake_threads[i].pid,
+ fake_threads[i].pid);
+ if (thread == NULL)
+ goto out;
+
+ thread__set_comm(thread, fake_threads[i].comm, 0);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(fake_mmap_info); i++) {
+ union perf_event fake_mmap_event = {
+ .mmap = {
+ .header = { .misc = PERF_RECORD_MISC_USER, },
+ .pid = fake_mmap_info[i].pid,
+ .tid = fake_mmap_info[i].pid,
+ .start = fake_mmap_info[i].start,
+ .len = FAKE_MAP_LENGTH,
+ .pgoff = 0ULL,
+ },
+ };
+
+ strcpy(fake_mmap_event.mmap.filename,
+ fake_mmap_info[i].filename);
+
+ machine__process_mmap_event(machine, &fake_mmap_event, NULL);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(fake_symbols); i++) {
+ size_t k;
+ struct dso *dso;
+
+ dso = __dsos__findnew(&machine->user_dsos,
+ fake_symbols[i].dso_name);
+ if (dso == NULL)
+ goto out;
+
+ /* emulate dso__load() */
+ dso__set_loaded(dso, MAP__FUNCTION);
+
+ for (k = 0; k < fake_symbols[i].nr_syms; k++) {
+ struct symbol *sym;
+ struct fake_sym *fsym = &fake_symbols[i].syms[k];
+
+ sym = symbol__new(fsym->start, fsym->length,
+ STB_GLOBAL, fsym->name);
+ if (sym == NULL)
+ goto out;
+
+ symbols__insert(&dso->symbols[MAP__FUNCTION], sym);
+ }
+ }
+
+ return machine;
+
+out:
+ pr_debug("Not enough memory for machine setup\n");
+ machine__delete_threads(machine);
+ machine__delete(machine);
+ return NULL;
+}
+
+void print_hists_in(struct hists *hists)
+{
+ int i = 0;
+ struct rb_root *root;
+ struct rb_node *node;
+
+ if (sort__need_collapse)
+ root = &hists->entries_collapsed;
+ else
+ root = hists->entries_in;
+
+ pr_info("----- %s --------\n", __func__);
+ node = rb_first(root);
+ while (node) {
+ struct hist_entry *he;
+
+ he = rb_entry(node, struct hist_entry, rb_node_in);
+
+ if (!he->filtered) {
+ pr_info("%2d: entry: %-8s [%-8s] %20s: period = %"PRIu64"\n",
+ i, thread__comm_str(he->thread),
+ he->ms.map->dso->short_name,
+ he->ms.sym->name, he->stat.period);
+ }
+
+ i++;
+ node = rb_next(node);
+ }
+}
+
+void print_hists_out(struct hists *hists)
+{
+ int i = 0;
+ struct rb_root *root;
+ struct rb_node *node;
+
+ root = &hists->entries;
+
+ pr_info("----- %s --------\n", __func__);
+ node = rb_first(root);
+ while (node) {
+ struct hist_entry *he;
+
+ he = rb_entry(node, struct hist_entry, rb_node);
+
+ if (!he->filtered) {
+ pr_info("%2d: entry: %8s:%5d [%-8s] %20s: period = %"PRIu64"/%"PRIu64"\n",
+ i, thread__comm_str(he->thread), he->thread->tid,
+ he->ms.map->dso->short_name,
+ he->ms.sym->name, he->stat.period,
+ he->stat_acc ? he->stat_acc->period : 0);
+ }
+
+ i++;
+ node = rb_next(node);
+ }
+}
diff --git a/tools/perf/tests/hists_common.h b/tools/perf/tests/hists_common.h
new file mode 100644
index 00000000000..888254e8665
--- /dev/null
+++ b/tools/perf/tests/hists_common.h
@@ -0,0 +1,75 @@
+#ifndef __PERF_TESTS__HISTS_COMMON_H__
+#define __PERF_TESTS__HISTS_COMMON_H__
+
+struct machine;
+struct machines;
+
+#define FAKE_PID_PERF1 100
+#define FAKE_PID_PERF2 200
+#define FAKE_PID_BASH 300
+
+#define FAKE_MAP_PERF 0x400000
+#define FAKE_MAP_BASH 0x400000
+#define FAKE_MAP_LIBC 0x500000
+#define FAKE_MAP_KERNEL 0xf00000
+#define FAKE_MAP_LENGTH 0x100000
+
+#define FAKE_SYM_OFFSET1 700
+#define FAKE_SYM_OFFSET2 800
+#define FAKE_SYM_OFFSET3 900
+#define FAKE_SYM_LENGTH 100
+
+#define FAKE_IP_PERF_MAIN FAKE_MAP_PERF + FAKE_SYM_OFFSET1
+#define FAKE_IP_PERF_RUN_COMMAND FAKE_MAP_PERF + FAKE_SYM_OFFSET2
+#define FAKE_IP_PERF_CMD_RECORD FAKE_MAP_PERF + FAKE_SYM_OFFSET3
+#define FAKE_IP_BASH_MAIN FAKE_MAP_BASH + FAKE_SYM_OFFSET1
+#define FAKE_IP_BASH_XMALLOC FAKE_MAP_BASH + FAKE_SYM_OFFSET2
+#define FAKE_IP_BASH_XFREE FAKE_MAP_BASH + FAKE_SYM_OFFSET3
+#define FAKE_IP_LIBC_MALLOC FAKE_MAP_LIBC + FAKE_SYM_OFFSET1
+#define FAKE_IP_LIBC_FREE FAKE_MAP_LIBC + FAKE_SYM_OFFSET2
+#define FAKE_IP_LIBC_REALLOC FAKE_MAP_LIBC + FAKE_SYM_OFFSET3
+#define FAKE_IP_KERNEL_SCHEDULE FAKE_MAP_KERNEL + FAKE_SYM_OFFSET1
+#define FAKE_IP_KERNEL_PAGE_FAULT FAKE_MAP_KERNEL + FAKE_SYM_OFFSET2
+#define FAKE_IP_KERNEL_SYS_PERF_EVENT_OPEN FAKE_MAP_KERNEL + FAKE_SYM_OFFSET3
+
+/*
+ * The setup_fake_machine() provides a test environment which consists
+ * of 3 processes that have 3 mappings and in turn, have 3 symbols
+ * respectively. See below table:
+ *
+ * Command: Pid Shared Object Symbol
+ * ............. ............. ...................
+ * perf: 100 perf main
+ * perf: 100 perf run_command
+ * perf: 100 perf cmd_record
+ * perf: 100 libc malloc
+ * perf: 100 libc free
+ * perf: 100 libc realloc
+ * perf: 100 [kernel] schedule
+ * perf: 100 [kernel] page_fault
+ * perf: 100 [kernel] sys_perf_event_open
+ * perf: 200 perf main
+ * perf: 200 perf run_command
+ * perf: 200 perf cmd_record
+ * perf: 200 libc malloc
+ * perf: 200 libc free
+ * perf: 200 libc realloc
+ * perf: 200 [kernel] schedule
+ * perf: 200 [kernel] page_fault
+ * perf: 200 [kernel] sys_perf_event_open
+ * bash: 300 bash main
+ * bash: 300 bash xmalloc
+ * bash: 300 bash xfree
+ * bash: 300 libc malloc
+ * bash: 300 libc free
+ * bash: 300 libc realloc
+ * bash: 300 [kernel] schedule
+ * bash: 300 [kernel] page_fault
+ * bash: 300 [kernel] sys_perf_event_open
+ */
+struct machine *setup_fake_machine(struct machines *machines);
+
+void print_hists_in(struct hists *hists);
+void print_hists_out(struct hists *hists);
+
+#endif /* __PERF_TESTS__HISTS_COMMON_H__ */
diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c
new file mode 100644
index 00000000000..0ac240db2e2
--- /dev/null
+++ b/tools/perf/tests/hists_cumulate.c
@@ -0,0 +1,726 @@
+#include "perf.h"
+#include "util/debug.h"
+#include "util/symbol.h"
+#include "util/sort.h"
+#include "util/evsel.h"
+#include "util/evlist.h"
+#include "util/machine.h"
+#include "util/thread.h"
+#include "util/parse-events.h"
+#include "tests/tests.h"
+#include "tests/hists_common.h"
+
+struct sample {
+ u32 pid;
+ u64 ip;
+ struct thread *thread;
+ struct map *map;
+ struct symbol *sym;
+};
+
+/* For the numbers, see hists_common.c */
+static struct sample fake_samples[] = {
+ /* perf [kernel] schedule() */
+ { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_SCHEDULE, },
+ /* perf [perf] main() */
+ { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_MAIN, },
+ /* perf [perf] cmd_record() */
+ { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_CMD_RECORD, },
+ /* perf [libc] malloc() */
+ { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_MALLOC, },
+ /* perf [libc] free() */
+ { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_FREE, },
+ /* perf [perf] main() */
+ { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_MAIN, },
+ /* perf [kernel] page_fault() */
+ { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
+ /* bash [bash] main() */
+ { .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_MAIN, },
+ /* bash [bash] xmalloc() */
+ { .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_XMALLOC, },
+ /* bash [kernel] page_fault() */
+ { .pid = FAKE_PID_BASH, .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
+};
+
+/*
+ * Will be casted to struct ip_callchain which has all 64 bit entries
+ * of nr and ips[].
+ */
+static u64 fake_callchains[][10] = {
+ /* schedule => run_command => main */
+ { 3, FAKE_IP_KERNEL_SCHEDULE, FAKE_IP_PERF_RUN_COMMAND, FAKE_IP_PERF_MAIN, },
+ /* main */
+ { 1, FAKE_IP_PERF_MAIN, },
+ /* cmd_record => run_command => main */
+ { 3, FAKE_IP_PERF_CMD_RECORD, FAKE_IP_PERF_RUN_COMMAND, FAKE_IP_PERF_MAIN, },
+ /* malloc => cmd_record => run_command => main */
+ { 4, FAKE_IP_LIBC_MALLOC, FAKE_IP_PERF_CMD_RECORD, FAKE_IP_PERF_RUN_COMMAND,
+ FAKE_IP_PERF_MAIN, },
+ /* free => cmd_record => run_command => main */
+ { 4, FAKE_IP_LIBC_FREE, FAKE_IP_PERF_CMD_RECORD, FAKE_IP_PERF_RUN_COMMAND,
+ FAKE_IP_PERF_MAIN, },
+ /* main */
+ { 1, FAKE_IP_PERF_MAIN, },
+ /* page_fault => sys_perf_event_open => run_command => main */
+ { 4, FAKE_IP_KERNEL_PAGE_FAULT, FAKE_IP_KERNEL_SYS_PERF_EVENT_OPEN,
+ FAKE_IP_PERF_RUN_COMMAND, FAKE_IP_PERF_MAIN, },
+ /* main */
+ { 1, FAKE_IP_BASH_MAIN, },
+ /* xmalloc => malloc => xmalloc => malloc => xmalloc => main */
+ { 6, FAKE_IP_BASH_XMALLOC, FAKE_IP_LIBC_MALLOC, FAKE_IP_BASH_XMALLOC,
+ FAKE_IP_LIBC_MALLOC, FAKE_IP_BASH_XMALLOC, FAKE_IP_BASH_MAIN, },
+ /* page_fault => malloc => main */
+ { 3, FAKE_IP_KERNEL_PAGE_FAULT, FAKE_IP_LIBC_MALLOC, FAKE_IP_BASH_MAIN, },
+};
+
+static int add_hist_entries(struct hists *hists, struct machine *machine)
+{
+ struct addr_location al;
+ struct perf_evsel *evsel = hists_to_evsel(hists);
+ struct perf_sample sample = { .period = 1000, };
+ size_t i;
+
+ for (i = 0; i < ARRAY_SIZE(fake_samples); i++) {
+ const union perf_event event = {
+ .header = {
+ .misc = PERF_RECORD_MISC_USER,
+ },
+ };
+ struct hist_entry_iter iter = {
+ .hide_unresolved = false,
+ };
+
+ if (symbol_conf.cumulate_callchain)
+ iter.ops = &hist_iter_cumulative;
+ else
+ iter.ops = &hist_iter_normal;
+
+ sample.pid = fake_samples[i].pid;
+ sample.tid = fake_samples[i].pid;
+ sample.ip = fake_samples[i].ip;
+ sample.callchain = (struct ip_callchain *)fake_callchains[i];
+
+ if (perf_event__preprocess_sample(&event, machine, &al,
+ &sample) < 0)
+ goto out;
+
+ if (hist_entry_iter__add(&iter, &al, evsel, &sample,
+ PERF_MAX_STACK_DEPTH, NULL) < 0)
+ goto out;
+
+ fake_samples[i].thread = al.thread;
+ fake_samples[i].map = al.map;
+ fake_samples[i].sym = al.sym;
+ }
+
+ return TEST_OK;
+
+out:
+ pr_debug("Not enough memory for adding a hist entry\n");
+ return TEST_FAIL;
+}
+
+static void del_hist_entries(struct hists *hists)
+{
+ struct hist_entry *he;
+ struct rb_root *root_in;
+ struct rb_root *root_out;
+ struct rb_node *node;
+
+ if (sort__need_collapse)
+ root_in = &hists->entries_collapsed;
+ else
+ root_in = hists->entries_in;
+
+ root_out = &hists->entries;
+
+ while (!RB_EMPTY_ROOT(root_out)) {
+ node = rb_first(root_out);
+
+ he = rb_entry(node, struct hist_entry, rb_node);
+ rb_erase(node, root_out);
+ rb_erase(&he->rb_node_in, root_in);
+ hist_entry__free(he);
+ }
+}
+
+typedef int (*test_fn_t)(struct perf_evsel *, struct machine *);
+
+#define COMM(he) (thread__comm_str(he->thread))
+#define DSO(he) (he->ms.map->dso->short_name)
+#define SYM(he) (he->ms.sym->name)
+#define CPU(he) (he->cpu)
+#define PID(he) (he->thread->tid)
+#define DEPTH(he) (he->callchain->max_depth)
+#define CDSO(cl) (cl->ms.map->dso->short_name)
+#define CSYM(cl) (cl->ms.sym->name)
+
+struct result {
+ u64 children;
+ u64 self;
+ const char *comm;
+ const char *dso;
+ const char *sym;
+};
+
+struct callchain_result {
+ u64 nr;
+ struct {
+ const char *dso;
+ const char *sym;
+ } node[10];
+};
+
+static int do_test(struct hists *hists, struct result *expected, size_t nr_expected,
+ struct callchain_result *expected_callchain, size_t nr_callchain)
+{
+ char buf[32];
+ size_t i, c;
+ struct hist_entry *he;
+ struct rb_root *root;
+ struct rb_node *node;
+ struct callchain_node *cnode;
+ struct callchain_list *clist;
+
+ /*
+ * adding and deleting hist entries must be done outside of this
+ * function since TEST_ASSERT_VAL() returns in case of failure.
+ */
+ hists__collapse_resort(hists, NULL);
+ hists__output_resort(hists);
+
+ if (verbose > 2) {
+ pr_info("use callchain: %d, cumulate callchain: %d\n",
+ symbol_conf.use_callchain,
+ symbol_conf.cumulate_callchain);
+ print_hists_out(hists);
+ }
+
+ root = &hists->entries;
+ for (node = rb_first(root), i = 0;
+ node && (he = rb_entry(node, struct hist_entry, rb_node));
+ node = rb_next(node), i++) {
+ scnprintf(buf, sizeof(buf), "Invalid hist entry #%zd", i);
+
+ TEST_ASSERT_VAL("Incorrect number of hist entry",
+ i < nr_expected);
+ TEST_ASSERT_VAL(buf, he->stat.period == expected[i].self &&
+ !strcmp(COMM(he), expected[i].comm) &&
+ !strcmp(DSO(he), expected[i].dso) &&
+ !strcmp(SYM(he), expected[i].sym));
+
+ if (symbol_conf.cumulate_callchain)
+ TEST_ASSERT_VAL(buf, he->stat_acc->period == expected[i].children);
+
+ if (!symbol_conf.use_callchain)
+ continue;
+
+ /* check callchain entries */
+ root = &he->callchain->node.rb_root;
+ cnode = rb_entry(rb_first(root), struct callchain_node, rb_node);
+
+ c = 0;
+ list_for_each_entry(clist, &cnode->val, list) {
+ scnprintf(buf, sizeof(buf), "Invalid callchain entry #%zd/%zd", i, c);
+
+ TEST_ASSERT_VAL("Incorrect number of callchain entry",
+ c < expected_callchain[i].nr);
+ TEST_ASSERT_VAL(buf,
+ !strcmp(CDSO(clist), expected_callchain[i].node[c].dso) &&
+ !strcmp(CSYM(clist), expected_callchain[i].node[c].sym));
+ c++;
+ }
+ /* TODO: handle multiple child nodes properly */
+ TEST_ASSERT_VAL("Incorrect number of callchain entry",
+ c <= expected_callchain[i].nr);
+ }
+ TEST_ASSERT_VAL("Incorrect number of hist entry",
+ i == nr_expected);
+ TEST_ASSERT_VAL("Incorrect number of callchain entry",
+ !symbol_conf.use_callchain || nr_expected == nr_callchain);
+ return 0;
+}
+
+/* NO callchain + NO children */
+static int test1(struct perf_evsel *evsel, struct machine *machine)
+{
+ int err;
+ struct hists *hists = &evsel->hists;
+ /*
+ * expected output:
+ *
+ * Overhead Command Shared Object Symbol
+ * ======== ======= ============= ==============
+ * 20.00% perf perf [.] main
+ * 10.00% bash [kernel] [k] page_fault
+ * 10.00% bash bash [.] main
+ * 10.00% bash bash [.] xmalloc
+ * 10.00% perf [kernel] [k] page_fault
+ * 10.00% perf [kernel] [k] schedule
+ * 10.00% perf libc [.] free
+ * 10.00% perf libc [.] malloc
+ * 10.00% perf perf [.] cmd_record
+ */
+ struct result expected[] = {
+ { 0, 2000, "perf", "perf", "main" },
+ { 0, 1000, "bash", "[kernel]", "page_fault" },
+ { 0, 1000, "bash", "bash", "main" },
+ { 0, 1000, "bash", "bash", "xmalloc" },
+ { 0, 1000, "perf", "[kernel]", "page_fault" },
+ { 0, 1000, "perf", "[kernel]", "schedule" },
+ { 0, 1000, "perf", "libc", "free" },
+ { 0, 1000, "perf", "libc", "malloc" },
+ { 0, 1000, "perf", "perf", "cmd_record" },
+ };
+
+ symbol_conf.use_callchain = false;
+ symbol_conf.cumulate_callchain = false;
+
+ setup_sorting();
+ callchain_register_param(&callchain_param);
+
+ err = add_hist_entries(hists, machine);
+ if (err < 0)
+ goto out;
+
+ err = do_test(hists, expected, ARRAY_SIZE(expected), NULL, 0);
+
+out:
+ del_hist_entries(hists);
+ reset_output_field();
+ return err;
+}
+
+/* callcain + NO children */
+static int test2(struct perf_evsel *evsel, struct machine *machine)
+{
+ int err;
+ struct hists *hists = &evsel->hists;
+ /*
+ * expected output:
+ *
+ * Overhead Command Shared Object Symbol
+ * ======== ======= ============= ==============
+ * 20.00% perf perf [.] main
+ * |
+ * --- main
+ *
+ * 10.00% bash [kernel] [k] page_fault
+ * |
+ * --- page_fault
+ * malloc
+ * main
+ *
+ * 10.00% bash bash [.] main
+ * |
+ * --- main
+ *
+ * 10.00% bash bash [.] xmalloc
+ * |
+ * --- xmalloc
+ * malloc
+ * xmalloc <--- NOTE: there's a cycle
+ * malloc
+ * xmalloc
+ * main
+ *
+ * 10.00% perf [kernel] [k] page_fault
+ * |
+ * --- page_fault
+ * sys_perf_event_open
+ * run_command
+ * main
+ *
+ * 10.00% perf [kernel] [k] schedule
+ * |
+ * --- schedule
+ * run_command
+ * main
+ *
+ * 10.00% perf libc [.] free
+ * |
+ * --- free
+ * cmd_record
+ * run_command
+ * main
+ *
+ * 10.00% perf libc [.] malloc
+ * |
+ * --- malloc
+ * cmd_record
+ * run_command
+ * main
+ *
+ * 10.00% perf perf [.] cmd_record
+ * |
+ * --- cmd_record
+ * run_command
+ * main
+ *
+ */
+ struct result expected[] = {
+ { 0, 2000, "perf", "perf", "main" },
+ { 0, 1000, "bash", "[kernel]", "page_fault" },
+ { 0, 1000, "bash", "bash", "main" },
+ { 0, 1000, "bash", "bash", "xmalloc" },
+ { 0, 1000, "perf", "[kernel]", "page_fault" },
+ { 0, 1000, "perf", "[kernel]", "schedule" },
+ { 0, 1000, "perf", "libc", "free" },
+ { 0, 1000, "perf", "libc", "malloc" },
+ { 0, 1000, "perf", "perf", "cmd_record" },
+ };
+ struct callchain_result expected_callchain[] = {
+ {
+ 1, { { "perf", "main" }, },
+ },
+ {
+ 3, { { "[kernel]", "page_fault" },
+ { "libc", "malloc" },
+ { "bash", "main" }, },
+ },
+ {
+ 1, { { "bash", "main" }, },
+ },
+ {
+ 6, { { "bash", "xmalloc" },
+ { "libc", "malloc" },
+ { "bash", "xmalloc" },
+ { "libc", "malloc" },
+ { "bash", "xmalloc" },
+ { "bash", "main" }, },
+ },
+ {
+ 4, { { "[kernel]", "page_fault" },
+ { "[kernel]", "sys_perf_event_open" },
+ { "perf", "run_command" },
+ { "perf", "main" }, },
+ },
+ {
+ 3, { { "[kernel]", "schedule" },
+ { "perf", "run_command" },
+ { "perf", "main" }, },
+ },
+ {
+ 4, { { "libc", "free" },
+ { "perf", "cmd_record" },
+ { "perf", "run_command" },
+ { "perf", "main" }, },
+ },
+ {
+ 4, { { "libc", "malloc" },
+ { "perf", "cmd_record" },
+ { "perf", "run_command" },
+ { "perf", "main" }, },
+ },
+ {
+ 3, { { "perf", "cmd_record" },
+ { "perf", "run_command" },
+ { "perf", "main" }, },
+ },
+ };
+
+ symbol_conf.use_callchain = true;
+ symbol_conf.cumulate_callchain = false;
+
+ setup_sorting();
+ callchain_register_param(&callchain_param);
+
+ err = add_hist_entries(hists, machine);
+ if (err < 0)
+ goto out;
+
+ err = do_test(hists, expected, ARRAY_SIZE(expected),
+ expected_callchain, ARRAY_SIZE(expected_callchain));
+
+out:
+ del_hist_entries(hists);
+ reset_output_field();
+ return err;
+}
+
+/* NO callchain + children */
+static int test3(struct perf_evsel *evsel, struct machine *machine)
+{
+ int err;
+ struct hists *hists = &evsel->hists;
+ /*
+ * expected output:
+ *
+ * Children Self Command Shared Object Symbol
+ * ======== ======== ======= ============= =======================
+ * 70.00% 20.00% perf perf [.] main
+ * 50.00% 0.00% perf perf [.] run_command
+ * 30.00% 10.00% bash bash [.] main
+ * 30.00% 10.00% perf perf [.] cmd_record
+ * 20.00% 0.00% bash libc [.] malloc
+ * 10.00% 10.00% bash [kernel] [k] page_fault
+ * 10.00% 10.00% perf [kernel] [k] schedule
+ * 10.00% 0.00% perf [kernel] [k] sys_perf_event_open
+ * 10.00% 10.00% perf [kernel] [k] page_fault
+ * 10.00% 10.00% perf libc [.] free
+ * 10.00% 10.00% perf libc [.] malloc
+ * 10.00% 10.00% bash bash [.] xmalloc
+ */
+ struct result expected[] = {
+ { 7000, 2000, "perf", "perf", "main" },
+ { 5000, 0, "perf", "perf", "run_command" },
+ { 3000, 1000, "bash", "bash", "main" },
+ { 3000, 1000, "perf", "perf", "cmd_record" },
+ { 2000, 0, "bash", "libc", "malloc" },
+ { 1000, 1000, "bash", "[kernel]", "page_fault" },
+ { 1000, 1000, "perf", "[kernel]", "schedule" },
+ { 1000, 0, "perf", "[kernel]", "sys_perf_event_open" },
+ { 1000, 1000, "perf", "[kernel]", "page_fault" },
+ { 1000, 1000, "perf", "libc", "free" },
+ { 1000, 1000, "perf", "libc", "malloc" },
+ { 1000, 1000, "bash", "bash", "xmalloc" },
+ };
+
+ symbol_conf.use_callchain = false;
+ symbol_conf.cumulate_callchain = true;
+
+ setup_sorting();
+ callchain_register_param(&callchain_param);
+
+ err = add_hist_entries(hists, machine);
+ if (err < 0)
+ goto out;
+
+ err = do_test(hists, expected, ARRAY_SIZE(expected), NULL, 0);
+
+out:
+ del_hist_entries(hists);
+ reset_output_field();
+ return err;
+}
+
+/* callchain + children */
+static int test4(struct perf_evsel *evsel, struct machine *machine)
+{
+ int err;
+ struct hists *hists = &evsel->hists;
+ /*
+ * expected output:
+ *
+ * Children Self Command Shared Object Symbol
+ * ======== ======== ======= ============= =======================
+ * 70.00% 20.00% perf perf [.] main
+ * |
+ * --- main
+ *
+ * 50.00% 0.00% perf perf [.] run_command
+ * |
+ * --- run_command
+ * main
+ *
+ * 30.00% 10.00% bash bash [.] main
+ * |
+ * --- main
+ *
+ * 30.00% 10.00% perf perf [.] cmd_record
+ * |
+ * --- cmd_record
+ * run_command
+ * main
+ *
+ * 20.00% 0.00% bash libc [.] malloc
+ * |
+ * --- malloc
+ * |
+ * |--50.00%-- xmalloc
+ * | main
+ * --50.00%-- main
+ *
+ * 10.00% 10.00% bash [kernel] [k] page_fault
+ * |
+ * --- page_fault
+ * malloc
+ * main
+ *
+ * 10.00% 10.00% perf [kernel] [k] schedule
+ * |
+ * --- schedule
+ * run_command
+ * main
+ *
+ * 10.00% 0.00% perf [kernel] [k] sys_perf_event_open
+ * |
+ * --- sys_perf_event_open
+ * run_command
+ * main
+ *
+ * 10.00% 10.00% perf [kernel] [k] page_fault
+ * |
+ * --- page_fault
+ * sys_perf_event_open
+ * run_command
+ * main
+ *
+ * 10.00% 10.00% perf libc [.] free
+ * |
+ * --- free
+ * cmd_record
+ * run_command
+ * main
+ *
+ * 10.00% 10.00% perf libc [.] malloc
+ * |
+ * --- malloc
+ * cmd_record
+ * run_command
+ * main
+ *
+ * 10.00% 10.00% bash bash [.] xmalloc
+ * |
+ * --- xmalloc
+ * malloc
+ * xmalloc <--- NOTE: there's a cycle
+ * malloc
+ * xmalloc
+ * main
+ *
+ */
+ struct result expected[] = {
+ { 7000, 2000, "perf", "perf", "main" },
+ { 5000, 0, "perf", "perf", "run_command" },
+ { 3000, 1000, "bash", "bash", "main" },
+ { 3000, 1000, "perf", "perf", "cmd_record" },
+ { 2000, 0, "bash", "libc", "malloc" },
+ { 1000, 1000, "bash", "[kernel]", "page_fault" },
+ { 1000, 1000, "perf", "[kernel]", "schedule" },
+ { 1000, 0, "perf", "[kernel]", "sys_perf_event_open" },
+ { 1000, 1000, "perf", "[kernel]", "page_fault" },
+ { 1000, 1000, "perf", "libc", "free" },
+ { 1000, 1000, "perf", "libc", "malloc" },
+ { 1000, 1000, "bash", "bash", "xmalloc" },
+ };
+ struct callchain_result expected_callchain[] = {
+ {
+ 1, { { "perf", "main" }, },
+ },
+ {
+ 2, { { "perf", "run_command" },
+ { "perf", "main" }, },
+ },
+ {
+ 1, { { "bash", "main" }, },
+ },
+ {
+ 3, { { "perf", "cmd_record" },
+ { "perf", "run_command" },
+ { "perf", "main" }, },
+ },
+ {
+ 4, { { "libc", "malloc" },
+ { "bash", "xmalloc" },
+ { "bash", "main" },
+ { "bash", "main" }, },
+ },
+ {
+ 3, { { "[kernel]", "page_fault" },
+ { "libc", "malloc" },
+ { "bash", "main" }, },
+ },
+ {
+ 3, { { "[kernel]", "schedule" },
+ { "perf", "run_command" },
+ { "perf", "main" }, },
+ },
+ {
+ 3, { { "[kernel]", "sys_perf_event_open" },
+ { "perf", "run_command" },
+ { "perf", "main" }, },
+ },
+ {
+ 4, { { "[kernel]", "page_fault" },
+ { "[kernel]", "sys_perf_event_open" },
+ { "perf", "run_command" },
+ { "perf", "main" }, },
+ },
+ {
+ 4, { { "libc", "free" },
+ { "perf", "cmd_record" },
+ { "perf", "run_command" },
+ { "perf", "main" }, },
+ },
+ {
+ 4, { { "libc", "malloc" },
+ { "perf", "cmd_record" },
+ { "perf", "run_command" },
+ { "perf", "main" }, },
+ },
+ {
+ 6, { { "bash", "xmalloc" },
+ { "libc", "malloc" },
+ { "bash", "xmalloc" },
+ { "libc", "malloc" },
+ { "bash", "xmalloc" },
+ { "bash", "main" }, },
+ },
+ };
+
+ symbol_conf.use_callchain = true;
+ symbol_conf.cumulate_callchain = true;
+
+ setup_sorting();
+ callchain_register_param(&callchain_param);
+
+ err = add_hist_entries(hists, machine);
+ if (err < 0)
+ goto out;
+
+ err = do_test(hists, expected, ARRAY_SIZE(expected),
+ expected_callchain, ARRAY_SIZE(expected_callchain));
+
+out:
+ del_hist_entries(hists);
+ reset_output_field();
+ return err;
+}
+
+int test__hists_cumulate(void)
+{
+ int err = TEST_FAIL;
+ struct machines machines;
+ struct machine *machine;
+ struct perf_evsel *evsel;
+ struct perf_evlist *evlist = perf_evlist__new();
+ size_t i;
+ test_fn_t testcases[] = {
+ test1,
+ test2,
+ test3,
+ test4,
+ };
+
+ TEST_ASSERT_VAL("No memory", evlist);
+
+ err = parse_events(evlist, "cpu-clock");
+ if (err)
+ goto out;
+
+ machines__init(&machines);
+
+ /* setup threads/dso/map/symbols also */
+ machine = setup_fake_machine(&machines);
+ if (!machine)
+ goto out;
+
+ if (verbose > 1)
+ machine__fprintf(machine, stderr);
+
+ evsel = perf_evlist__first(evlist);
+
+ for (i = 0; i < ARRAY_SIZE(testcases); i++) {
+ err = testcases[i](evsel, machine);
+ if (err < 0)
+ break;
+ }
+
+out:
+ /* tear down everything */
+ perf_evlist__delete(evlist);
+ machines__exit(&machines);
+
+ return err;
+}
diff --git a/tools/perf/tests/hists_filter.c b/tools/perf/tests/hists_filter.c
new file mode 100644
index 00000000000..821f581fd93
--- /dev/null
+++ b/tools/perf/tests/hists_filter.c
@@ -0,0 +1,289 @@
+#include "perf.h"
+#include "util/debug.h"
+#include "util/symbol.h"
+#include "util/sort.h"
+#include "util/evsel.h"
+#include "util/evlist.h"
+#include "util/machine.h"
+#include "util/thread.h"
+#include "util/parse-events.h"
+#include "tests/tests.h"
+#include "tests/hists_common.h"
+
+struct sample {
+ u32 pid;
+ u64 ip;
+ struct thread *thread;
+ struct map *map;
+ struct symbol *sym;
+};
+
+/* For the numbers, see hists_common.c */
+static struct sample fake_samples[] = {
+ /* perf [kernel] schedule() */
+ { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_SCHEDULE, },
+ /* perf [perf] main() */
+ { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_MAIN, },
+ /* perf [libc] malloc() */
+ { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_MALLOC, },
+ /* perf [perf] main() */
+ { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_MAIN, }, /* will be merged */
+ /* perf [perf] cmd_record() */
+ { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_CMD_RECORD, },
+ /* perf [kernel] page_fault() */
+ { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
+ /* bash [bash] main() */
+ { .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_MAIN, },
+ /* bash [bash] xmalloc() */
+ { .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_XMALLOC, },
+ /* bash [libc] malloc() */
+ { .pid = FAKE_PID_BASH, .ip = FAKE_IP_LIBC_MALLOC, },
+ /* bash [kernel] page_fault() */
+ { .pid = FAKE_PID_BASH, .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
+};
+
+static int add_hist_entries(struct perf_evlist *evlist,
+ struct machine *machine __maybe_unused)
+{
+ struct perf_evsel *evsel;
+ struct addr_location al;
+ struct perf_sample sample = { .period = 100, };
+ size_t i;
+
+ /*
+ * each evsel will have 10 samples but the 4th sample
+ * (perf [perf] main) will be collapsed to an existing entry
+ * so total 9 entries will be in the tree.
+ */
+ evlist__for_each(evlist, evsel) {
+ for (i = 0; i < ARRAY_SIZE(fake_samples); i++) {
+ const union perf_event event = {
+ .header = {
+ .misc = PERF_RECORD_MISC_USER,
+ },
+ };
+ struct hist_entry_iter iter = {
+ .ops = &hist_iter_normal,
+ .hide_unresolved = false,
+ };
+
+ /* make sure it has no filter at first */
+ evsel->hists.thread_filter = NULL;
+ evsel->hists.dso_filter = NULL;
+ evsel->hists.symbol_filter_str = NULL;
+
+ sample.pid = fake_samples[i].pid;
+ sample.tid = fake_samples[i].pid;
+ sample.ip = fake_samples[i].ip;
+
+ if (perf_event__preprocess_sample(&event, machine, &al,
+ &sample) < 0)
+ goto out;
+
+ if (hist_entry_iter__add(&iter, &al, evsel, &sample,
+ PERF_MAX_STACK_DEPTH, NULL) < 0)
+ goto out;
+
+ fake_samples[i].thread = al.thread;
+ fake_samples[i].map = al.map;
+ fake_samples[i].sym = al.sym;
+ }
+ }
+
+ return 0;
+
+out:
+ pr_debug("Not enough memory for adding a hist entry\n");
+ return TEST_FAIL;
+}
+
+int test__hists_filter(void)
+{
+ int err = TEST_FAIL;
+ struct machines machines;
+ struct machine *machine;
+ struct perf_evsel *evsel;
+ struct perf_evlist *evlist = perf_evlist__new();
+
+ TEST_ASSERT_VAL("No memory", evlist);
+
+ err = parse_events(evlist, "cpu-clock");
+ if (err)
+ goto out;
+ err = parse_events(evlist, "task-clock");
+ if (err)
+ goto out;
+
+ /* default sort order (comm,dso,sym) will be used */
+ if (setup_sorting() < 0)
+ goto out;
+
+ machines__init(&machines);
+
+ /* setup threads/dso/map/symbols also */
+ machine = setup_fake_machine(&machines);
+ if (!machine)
+ goto out;
+
+ if (verbose > 1)
+ machine__fprintf(machine, stderr);
+
+ /* process sample events */
+ err = add_hist_entries(evlist, machine);
+ if (err < 0)
+ goto out;
+
+ evlist__for_each(evlist, evsel) {
+ struct hists *hists = &evsel->hists;
+
+ hists__collapse_resort(hists, NULL);
+ hists__output_resort(hists);
+
+ if (verbose > 2) {
+ pr_info("Normal histogram\n");
+ print_hists_out(hists);
+ }
+
+ TEST_ASSERT_VAL("Invalid nr samples",
+ hists->stats.nr_events[PERF_RECORD_SAMPLE] == 10);
+ TEST_ASSERT_VAL("Invalid nr hist entries",
+ hists->nr_entries == 9);
+ TEST_ASSERT_VAL("Invalid total period",
+ hists->stats.total_period == 1000);
+ TEST_ASSERT_VAL("Unmatched nr samples",
+ hists->stats.nr_events[PERF_RECORD_SAMPLE] ==
+ hists->stats.nr_non_filtered_samples);
+ TEST_ASSERT_VAL("Unmatched nr hist entries",
+ hists->nr_entries == hists->nr_non_filtered_entries);
+ TEST_ASSERT_VAL("Unmatched total period",
+ hists->stats.total_period ==
+ hists->stats.total_non_filtered_period);
+
+ /* now applying thread filter for 'bash' */
+ evsel->hists.thread_filter = fake_samples[9].thread;
+ hists__filter_by_thread(hists);
+
+ if (verbose > 2) {
+ pr_info("Histogram for thread filter\n");
+ print_hists_out(hists);
+ }
+
+ /* normal stats should be invariant */
+ TEST_ASSERT_VAL("Invalid nr samples",
+ hists->stats.nr_events[PERF_RECORD_SAMPLE] == 10);
+ TEST_ASSERT_VAL("Invalid nr hist entries",
+ hists->nr_entries == 9);
+ TEST_ASSERT_VAL("Invalid total period",
+ hists->stats.total_period == 1000);
+
+ /* but filter stats are changed */
+ TEST_ASSERT_VAL("Unmatched nr samples for thread filter",
+ hists->stats.nr_non_filtered_samples == 4);
+ TEST_ASSERT_VAL("Unmatched nr hist entries for thread filter",
+ hists->nr_non_filtered_entries == 4);
+ TEST_ASSERT_VAL("Unmatched total period for thread filter",
+ hists->stats.total_non_filtered_period == 400);
+
+ /* remove thread filter first */
+ evsel->hists.thread_filter = NULL;
+ hists__filter_by_thread(hists);
+
+ /* now applying dso filter for 'kernel' */
+ evsel->hists.dso_filter = fake_samples[0].map->dso;
+ hists__filter_by_dso(hists);
+
+ if (verbose > 2) {
+ pr_info("Histogram for dso filter\n");
+ print_hists_out(hists);
+ }
+
+ /* normal stats should be invariant */
+ TEST_ASSERT_VAL("Invalid nr samples",
+ hists->stats.nr_events[PERF_RECORD_SAMPLE] == 10);
+ TEST_ASSERT_VAL("Invalid nr hist entries",
+ hists->nr_entries == 9);
+ TEST_ASSERT_VAL("Invalid total period",
+ hists->stats.total_period == 1000);
+
+ /* but filter stats are changed */
+ TEST_ASSERT_VAL("Unmatched nr samples for dso filter",
+ hists->stats.nr_non_filtered_samples == 3);
+ TEST_ASSERT_VAL("Unmatched nr hist entries for dso filter",
+ hists->nr_non_filtered_entries == 3);
+ TEST_ASSERT_VAL("Unmatched total period for dso filter",
+ hists->stats.total_non_filtered_period == 300);
+
+ /* remove dso filter first */
+ evsel->hists.dso_filter = NULL;
+ hists__filter_by_dso(hists);
+
+ /*
+ * now applying symbol filter for 'main'. Also note that
+ * there's 3 samples that have 'main' symbol but the 4th
+ * entry of fake_samples was collapsed already so it won't
+ * be counted as a separate entry but the sample count and
+ * total period will be remained.
+ */
+ evsel->hists.symbol_filter_str = "main";
+ hists__filter_by_symbol(hists);
+
+ if (verbose > 2) {
+ pr_info("Histogram for symbol filter\n");
+ print_hists_out(hists);
+ }
+
+ /* normal stats should be invariant */
+ TEST_ASSERT_VAL("Invalid nr samples",
+ hists->stats.nr_events[PERF_RECORD_SAMPLE] == 10);
+ TEST_ASSERT_VAL("Invalid nr hist entries",
+ hists->nr_entries == 9);
+ TEST_ASSERT_VAL("Invalid total period",
+ hists->stats.total_period == 1000);
+
+ /* but filter stats are changed */
+ TEST_ASSERT_VAL("Unmatched nr samples for symbol filter",
+ hists->stats.nr_non_filtered_samples == 3);
+ TEST_ASSERT_VAL("Unmatched nr hist entries for symbol filter",
+ hists->nr_non_filtered_entries == 2);
+ TEST_ASSERT_VAL("Unmatched total period for symbol filter",
+ hists->stats.total_non_filtered_period == 300);
+
+ /* now applying all filters at once. */
+ evsel->hists.thread_filter = fake_samples[1].thread;
+ evsel->hists.dso_filter = fake_samples[1].map->dso;
+ hists__filter_by_thread(hists);
+ hists__filter_by_dso(hists);
+
+ if (verbose > 2) {
+ pr_info("Histogram for all filters\n");
+ print_hists_out(hists);
+ }
+
+ /* normal stats should be invariant */
+ TEST_ASSERT_VAL("Invalid nr samples",
+ hists->stats.nr_events[PERF_RECORD_SAMPLE] == 10);
+ TEST_ASSERT_VAL("Invalid nr hist entries",
+ hists->nr_entries == 9);
+ TEST_ASSERT_VAL("Invalid total period",
+ hists->stats.total_period == 1000);
+
+ /* but filter stats are changed */
+ TEST_ASSERT_VAL("Unmatched nr samples for all filter",
+ hists->stats.nr_non_filtered_samples == 2);
+ TEST_ASSERT_VAL("Unmatched nr hist entries for all filter",
+ hists->nr_non_filtered_entries == 1);
+ TEST_ASSERT_VAL("Unmatched total period for all filter",
+ hists->stats.total_non_filtered_period == 200);
+ }
+
+
+ err = TEST_OK;
+
+out:
+ /* tear down everything */
+ perf_evlist__delete(evlist);
+ reset_output_field();
+ machines__exit(&machines);
+
+ return err;
+}
diff --git a/tools/perf/tests/hists_link.c b/tools/perf/tests/hists_link.c
index 2b6519e0e36..d4b34b0f50a 100644
--- a/tools/perf/tests/hists_link.c
+++ b/tools/perf/tests/hists_link.c
@@ -8,144 +8,7 @@
#include "machine.h"
#include "thread.h"
#include "parse-events.h"
-
-static struct {
- u32 pid;
- const char *comm;
-} fake_threads[] = {
- { 100, "perf" },
- { 200, "perf" },
- { 300, "bash" },
-};
-
-static struct {
- u32 pid;
- u64 start;
- const char *filename;
-} fake_mmap_info[] = {
- { 100, 0x40000, "perf" },
- { 100, 0x50000, "libc" },
- { 100, 0xf0000, "[kernel]" },
- { 200, 0x40000, "perf" },
- { 200, 0x50000, "libc" },
- { 200, 0xf0000, "[kernel]" },
- { 300, 0x40000, "bash" },
- { 300, 0x50000, "libc" },
- { 300, 0xf0000, "[kernel]" },
-};
-
-struct fake_sym {
- u64 start;
- u64 length;
- const char *name;
-};
-
-static struct fake_sym perf_syms[] = {
- { 700, 100, "main" },
- { 800, 100, "run_command" },
- { 900, 100, "cmd_record" },
-};
-
-static struct fake_sym bash_syms[] = {
- { 700, 100, "main" },
- { 800, 100, "xmalloc" },
- { 900, 100, "xfree" },
-};
-
-static struct fake_sym libc_syms[] = {
- { 700, 100, "malloc" },
- { 800, 100, "free" },
- { 900, 100, "realloc" },
-};
-
-static struct fake_sym kernel_syms[] = {
- { 700, 100, "schedule" },
- { 800, 100, "page_fault" },
- { 900, 100, "sys_perf_event_open" },
-};
-
-static struct {
- const char *dso_name;
- struct fake_sym *syms;
- size_t nr_syms;
-} fake_symbols[] = {
- { "perf", perf_syms, ARRAY_SIZE(perf_syms) },
- { "bash", bash_syms, ARRAY_SIZE(bash_syms) },
- { "libc", libc_syms, ARRAY_SIZE(libc_syms) },
- { "[kernel]", kernel_syms, ARRAY_SIZE(kernel_syms) },
-};
-
-static struct machine *setup_fake_machine(struct machines *machines)
-{
- struct machine *machine = machines__find(machines, HOST_KERNEL_ID);
- size_t i;
-
- if (machine == NULL) {
- pr_debug("Not enough memory for machine setup\n");
- return NULL;
- }
-
- for (i = 0; i < ARRAY_SIZE(fake_threads); i++) {
- struct thread *thread;
-
- thread = machine__findnew_thread(machine, fake_threads[i].pid,
- fake_threads[i].pid);
- if (thread == NULL)
- goto out;
-
- thread__set_comm(thread, fake_threads[i].comm, 0);
- }
-
- for (i = 0; i < ARRAY_SIZE(fake_mmap_info); i++) {
- union perf_event fake_mmap_event = {
- .mmap = {
- .header = { .misc = PERF_RECORD_MISC_USER, },
- .pid = fake_mmap_info[i].pid,
- .start = fake_mmap_info[i].start,
- .len = 0x1000ULL,
- .pgoff = 0ULL,
- },
- };
-
- strcpy(fake_mmap_event.mmap.filename,
- fake_mmap_info[i].filename);
-
- machine__process_mmap_event(machine, &fake_mmap_event, NULL);
- }
-
- for (i = 0; i < ARRAY_SIZE(fake_symbols); i++) {
- size_t k;
- struct dso *dso;
-
- dso = __dsos__findnew(&machine->user_dsos,
- fake_symbols[i].dso_name);
- if (dso == NULL)
- goto out;
-
- /* emulate dso__load() */
- dso__set_loaded(dso, MAP__FUNCTION);
-
- for (k = 0; k < fake_symbols[i].nr_syms; k++) {
- struct symbol *sym;
- struct fake_sym *fsym = &fake_symbols[i].syms[k];
-
- sym = symbol__new(fsym->start, fsym->length,
- STB_GLOBAL, fsym->name);
- if (sym == NULL)
- goto out;
-
- symbols__insert(&dso->symbols[MAP__FUNCTION], sym);
- }
- }
-
- return machine;
-
-out:
- pr_debug("Not enough memory for machine setup\n");
- machine__delete_threads(machine);
- machine__delete(machine);
- return NULL;
-}
+#include "hists_common.h"
struct sample {
u32 pid;
@@ -155,43 +18,44 @@ struct sample {
struct symbol *sym;
};
+/* For the numbers, see hists_common.c */
static struct sample fake_common_samples[] = {
/* perf [kernel] schedule() */
- { .pid = 100, .ip = 0xf0000 + 700, },
+ { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_SCHEDULE, },
/* perf [perf] main() */
- { .pid = 200, .ip = 0x40000 + 700, },
+ { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_MAIN, },
/* perf [perf] cmd_record() */
- { .pid = 200, .ip = 0x40000 + 900, },
+ { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_CMD_RECORD, },
/* bash [bash] xmalloc() */
- { .pid = 300, .ip = 0x40000 + 800, },
+ { .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_XMALLOC, },
/* bash [libc] malloc() */
- { .pid = 300, .ip = 0x50000 + 700, },
+ { .pid = FAKE_PID_BASH, .ip = FAKE_IP_LIBC_MALLOC, },
};
static struct sample fake_samples[][5] = {
{
/* perf [perf] run_command() */
- { .pid = 100, .ip = 0x40000 + 800, },
+ { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_RUN_COMMAND, },
/* perf [libc] malloc() */
- { .pid = 100, .ip = 0x50000 + 700, },
+ { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_MALLOC, },
/* perf [kernel] page_fault() */
- { .pid = 100, .ip = 0xf0000 + 800, },
+ { .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
/* perf [kernel] sys_perf_event_open() */
- { .pid = 200, .ip = 0xf0000 + 900, },
+ { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_KERNEL_SYS_PERF_EVENT_OPEN, },
/* bash [libc] free() */
- { .pid = 300, .ip = 0x50000 + 800, },
+ { .pid = FAKE_PID_BASH, .ip = FAKE_IP_LIBC_FREE, },
},
{
/* perf [libc] free() */
- { .pid = 200, .ip = 0x50000 + 800, },
+ { .pid = FAKE_PID_PERF2, .ip = FAKE_IP_LIBC_FREE, },
/* bash [libc] malloc() */
- { .pid = 300, .ip = 0x50000 + 700, }, /* will be merged */
+ { .pid = FAKE_PID_BASH, .ip = FAKE_IP_LIBC_MALLOC, }, /* will be merged */
/* bash [bash] xfee() */
- { .pid = 300, .ip = 0x40000 + 900, },
+ { .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_XFREE, },
/* bash [libc] realloc() */
- { .pid = 300, .ip = 0x50000 + 900, },
+ { .pid = FAKE_PID_BASH, .ip = FAKE_IP_LIBC_REALLOC, },
/* bash [kernel] page_fault() */
- { .pid = 300, .ip = 0xf0000 + 800, },
+ { .pid = FAKE_PID_BASH, .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
},
};
@@ -200,7 +64,7 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
struct perf_evsel *evsel;
struct addr_location al;
struct hist_entry *he;
- struct perf_sample sample = { .cpu = 0, };
+ struct perf_sample sample = { .period = 1, };
size_t i = 0, k;
/*
@@ -217,13 +81,14 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
};
sample.pid = fake_common_samples[k].pid;
+ sample.tid = fake_common_samples[k].pid;
sample.ip = fake_common_samples[k].ip;
if (perf_event__preprocess_sample(&event, machine, &al,
&sample) < 0)
goto out;
he = __hists__add_entry(&evsel->hists, &al, NULL,
- NULL, NULL, 1, 1, 0);
+ NULL, NULL, 1, 1, 0, true);
if (he == NULL)
goto out;
@@ -240,13 +105,14 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
};
sample.pid = fake_samples[i][k].pid;
+ sample.tid = fake_samples[i][k].pid;
sample.ip = fake_samples[i][k].ip;
if (perf_event__preprocess_sample(&event, machine, &al,
&sample) < 0)
goto out;
he = __hists__add_entry(&evsel->hists, &al, NULL,
- NULL, NULL, 1, 1, 0);
+ NULL, NULL, 1, 1, 0, true);
if (he == NULL)
goto out;
@@ -402,33 +268,6 @@ static int validate_link(struct hists *leader, struct hists *other)
return __validate_link(leader, 0) || __validate_link(other, 1);
}
-static void print_hists(struct hists *hists)
-{
- int i = 0;
- struct rb_root *root;
- struct rb_node *node;
-
- if (sort__need_collapse)
- root = &hists->entries_collapsed;
- else
- root = hists->entries_in;
-
- pr_info("----- %s --------\n", __func__);
- node = rb_first(root);
- while (node) {
- struct hist_entry *he;
-
- he = rb_entry(node, struct hist_entry, rb_node_in);
-
- pr_info("%2d: entry: %-8s [%-8s] %20s: period = %"PRIu64"\n",
- i, thread__comm_str(he->thread), he->ms.map->dso->short_name,
- he->ms.sym->name, he->stat.period);
-
- i++;
- node = rb_next(node);
- }
-}
-
int test__hists_link(void)
{
int err = -1;
@@ -470,7 +309,7 @@ int test__hists_link(void)
hists__collapse_resort(&evsel->hists, NULL);
if (verbose > 2)
- print_hists(&evsel->hists);
+ print_hists_in(&evsel->hists);
}
first = perf_evlist__first(evlist);
@@ -493,6 +332,7 @@ int test__hists_link(void)
out:
/* tear down everything */
perf_evlist__delete(evlist);
+ reset_output_field();
machines__exit(&machines);
return err;
diff --git a/tools/perf/tests/hists_output.c b/tools/perf/tests/hists_output.c
new file mode 100644
index 00000000000..e3bbd6c54c1
--- /dev/null
+++ b/tools/perf/tests/hists_output.c
@@ -0,0 +1,621 @@
+#include "perf.h"
+#include "util/debug.h"
+#include "util/symbol.h"
+#include "util/sort.h"
+#include "util/evsel.h"
+#include "util/evlist.h"
+#include "util/machine.h"
+#include "util/thread.h"
+#include "util/parse-events.h"
+#include "tests/tests.h"
+#include "tests/hists_common.h"
+
+struct sample {
+ u32 cpu;
+ u32 pid;
+ u64 ip;
+ struct thread *thread;
+ struct map *map;
+ struct symbol *sym;
+};
+
+/* For the numbers, see hists_common.c */
+static struct sample fake_samples[] = {
+ /* perf [kernel] schedule() */
+ { .cpu = 0, .pid = FAKE_PID_PERF1, .ip = FAKE_IP_KERNEL_SCHEDULE, },
+ /* perf [perf] main() */
+ { .cpu = 1, .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_MAIN, },
+ /* perf [perf] cmd_record() */
+ { .cpu = 1, .pid = FAKE_PID_PERF1, .ip = FAKE_IP_PERF_CMD_RECORD, },
+ /* perf [libc] malloc() */
+ { .cpu = 1, .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_MALLOC, },
+ /* perf [libc] free() */
+ { .cpu = 2, .pid = FAKE_PID_PERF1, .ip = FAKE_IP_LIBC_FREE, },
+ /* perf [perf] main() */
+ { .cpu = 2, .pid = FAKE_PID_PERF2, .ip = FAKE_IP_PERF_MAIN, },
+ /* perf [kernel] page_fault() */
+ { .cpu = 2, .pid = FAKE_PID_PERF2, .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
+ /* bash [bash] main() */
+ { .cpu = 3, .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_MAIN, },
+ /* bash [bash] xmalloc() */
+ { .cpu = 0, .pid = FAKE_PID_BASH, .ip = FAKE_IP_BASH_XMALLOC, },
+ /* bash [kernel] page_fault() */
+ { .cpu = 1, .pid = FAKE_PID_BASH, .ip = FAKE_IP_KERNEL_PAGE_FAULT, },
+};
+
+static int add_hist_entries(struct hists *hists, struct machine *machine)
+{
+ struct addr_location al;
+ struct perf_evsel *evsel = hists_to_evsel(hists);
+ struct perf_sample sample = { .period = 100, };
+ size_t i;
+
+ for (i = 0; i < ARRAY_SIZE(fake_samples); i++) {
+ const union perf_event event = {
+ .header = {
+ .misc = PERF_RECORD_MISC_USER,
+ },
+ };
+ struct hist_entry_iter iter = {
+ .ops = &hist_iter_normal,
+ .hide_unresolved = false,
+ };
+
+ sample.cpu = fake_samples[i].cpu;
+ sample.pid = fake_samples[i].pid;
+ sample.tid = fake_samples[i].pid;
+ sample.ip = fake_samples[i].ip;
+
+ if (perf_event__preprocess_sample(&event, machine, &al,
+ &sample) < 0)
+ goto out;
+
+ if (hist_entry_iter__add(&iter, &al, evsel, &sample,
+ PERF_MAX_STACK_DEPTH, NULL) < 0)
+ goto out;
+
+ fake_samples[i].thread = al.thread;
+ fake_samples[i].map = al.map;
+ fake_samples[i].sym = al.sym;
+ }
+
+ return TEST_OK;
+
+out:
+ pr_debug("Not enough memory for adding a hist entry\n");
+ return TEST_FAIL;
+}
+
+static void del_hist_entries(struct hists *hists)
+{
+ struct hist_entry *he;
+ struct rb_root *root_in;
+ struct rb_root *root_out;
+ struct rb_node *node;
+
+ if (sort__need_collapse)
+ root_in = &hists->entries_collapsed;
+ else
+ root_in = hists->entries_in;
+
+ root_out = &hists->entries;
+
+ while (!RB_EMPTY_ROOT(root_out)) {
+ node = rb_first(root_out);
+
+ he = rb_entry(node, struct hist_entry, rb_node);
+ rb_erase(node, root_out);
+ rb_erase(&he->rb_node_in, root_in);
+ hist_entry__free(he);
+ }
+}
+
+typedef int (*test_fn_t)(struct perf_evsel *, struct machine *);
+
+#define COMM(he) (thread__comm_str(he->thread))
+#define DSO(he) (he->ms.map->dso->short_name)
+#define SYM(he) (he->ms.sym->name)
+#define CPU(he) (he->cpu)
+#define PID(he) (he->thread->tid)
+
+/* default sort keys (no field) */
+static int test1(struct perf_evsel *evsel, struct machine *machine)
+{
+ int err;
+ struct hists *hists = &evsel->hists;
+ struct hist_entry *he;
+ struct rb_root *root;
+ struct rb_node *node;
+
+ field_order = NULL;
+ sort_order = NULL; /* equivalent to sort_order = "comm,dso,sym" */
+
+ setup_sorting();
+
+ /*
+ * expected output:
+ *
+ * Overhead Command Shared Object Symbol
+ * ======== ======= ============= ==============
+ * 20.00% perf perf [.] main
+ * 10.00% bash [kernel] [k] page_fault
+ * 10.00% bash bash [.] main
+ * 10.00% bash bash [.] xmalloc
+ * 10.00% perf [kernel] [k] page_fault
+ * 10.00% perf [kernel] [k] schedule
+ * 10.00% perf libc [.] free
+ * 10.00% perf libc [.] malloc
+ * 10.00% perf perf [.] cmd_record
+ */
+ err = add_hist_entries(hists, machine);
+ if (err < 0)
+ goto out;
+
+ hists__collapse_resort(hists, NULL);
+ hists__output_resort(hists);
+
+ if (verbose > 2) {
+ pr_info("[fields = %s, sort = %s]\n", field_order, sort_order);
+ print_hists_out(hists);
+ }
+
+ root = &evsel->hists.entries;
+ node = rb_first(root);
+ he = rb_entry(node, struct hist_entry, rb_node);
+ TEST_ASSERT_VAL("Invalid hist entry",
+ !strcmp(COMM(he), "perf") && !strcmp(DSO(he), "perf") &&
+ !strcmp(SYM(he), "main") && he->stat.period == 200);
+
+ node = rb_next(node);
+ he = rb_entry(node, struct hist_entry, rb_node);
+ TEST_ASSERT_VAL("Invalid hist entry",
+ !strcmp(COMM(he), "bash") && !strcmp(DSO(he), "[kernel]") &&
+ !strcmp(SYM(he), "page_fault") && he->stat.period == 100);
+
+ node = rb_next(node);
+ he = rb_entry(node, struct hist_entry, rb_node);
+ TEST_ASSERT_VAL("Invalid hist entry",
+ !strcmp(COMM(he), "bash") && !strcmp(DSO(he), "bash") &&
+ !strcmp(SYM(he), "main") && he->stat.period == 100);
+
+ node = rb_next(node);
+ he = rb_entry(node, struct hist_entry, rb_node);
+ TEST_ASSERT_VAL("Invalid hist entry",
+ !strcmp(COMM(he), "bash") && !strcmp(DSO(he), "bash") &&
+ !strcmp(SYM(he), "xmalloc") && he->stat.period == 100);
+
+ node = rb_next(node);
+ he = rb_entry(node, struct hist_entry, rb_node);
+ TEST_ASSERT_VAL("Invalid hist entry",
+ !strcmp(COMM(he), "perf") && !strcmp(DSO(he), "[kernel]") &&
+ !strcmp(SYM(he), "page_fault") && he->stat.period == 100);
+
+ node = rb_next(node);
+ he = rb_entry(node, struct hist_entry, rb_node);
+ TEST_ASSERT_VAL("Invalid hist entry",
+ !strcmp(COMM(he), "perf") && !strcmp(DSO(he), "[kernel]") &&
+ !strcmp(SYM(he), "schedule") && he->stat.period == 100);
+
+ node = rb_next(node);
+ he = rb_entry(node, struct hist_entry, rb_node);
+ TEST_ASSERT_VAL("Invalid hist entry",
+ !strcmp(COMM(he), "perf") && !strcmp(DSO(he), "libc") &&
+ !strcmp(SYM(he), "free") && he->stat.period == 100);
+
+ node = rb_next(node);
+ he = rb_entry(node, struct hist_entry, rb_node);
+ TEST_ASSERT_VAL("Invalid hist entry",
+ !strcmp(COMM(he), "perf") && !strcmp(DSO(he), "libc") &&
+ !strcmp(SYM(he), "malloc") && he->stat.period == 100);
+
+ node = rb_next(node);
+ he = rb_entry(node, struct hist_entry, rb_node);
+ TEST_ASSERT_VAL("Invalid hist entry",
+ !strcmp(COMM(he), "perf") && !strcmp(DSO(he), "perf") &&
+ !strcmp(SYM(he), "cmd_record") && he->stat.period == 100);
+
+out:
+ del_hist_entries(hists);
+ reset_output_field();
+ return err;
+}
+
+/* mixed fields and sort keys */
+static int test2(struct perf_evsel *evsel, struct machine *machine)
+{
+ int err;
+ struct hists *hists = &evsel->hists;
+ struct hist_entry *he;
+ struct rb_root *root;
+ struct rb_node *node;
+
+ field_order = "overhead,cpu";
+ sort_order = "pid";
+
+ setup_sorting();
+
+ /*
+ * expected output:
+ *
+ * Overhead CPU Command: Pid
+ * ======== === =============
+ * 30.00% 1 perf : 100
+ * 10.00% 0 perf : 100
+ * 10.00% 2 perf : 100
+ * 20.00% 2 perf : 200
+ * 10.00% 0 bash : 300
+ * 10.00% 1 bash : 300
+ * 10.00% 3 bash : 300
+ */
+ err = add_hist_entries(hists, machine);
+ if (err < 0)
+ goto out;
+
+ hists__collapse_resort(hists, NULL);
+ hists__output_resort(hists);
+
+ if (verbose > 2) {
+ pr_info("[fields = %s, sort = %s]\n", field_order, sort_order);
+ print_hists_out(hists);
+ }
+
+ root = &evsel->hists.entries;
+ node = rb_first(root);
+ he = rb_entry(node, struct hist_entry, rb_node);
+ TEST_ASSERT_VAL("Invalid hist entry",
+ CPU(he) == 1 && PID(he) == 100 && he->stat.period == 300);
+
+ node = rb_next(node);
+ he = rb_entry(node, struct hist_entry, rb_node);
+ TEST_ASSERT_VAL("Invalid hist entry",
+ CPU(he) == 0 && PID(he) == 100 && he->stat.period == 100);
+
+out:
+ del_hist_entries(hists);
+ reset_output_field();
+ return err;
+}
+
+/* fields only (no sort key) */
+static int test3(struct perf_evsel *evsel, struct machine *machine)
+{
+ int err;
+ struct hists *hists = &evsel->hists;
+ struct hist_entry *he;
+ struct rb_root *root;
+ struct rb_node *node;
+
+ field_order = "comm,overhead,dso";
+ sort_order = NULL;
+
+ setup_sorting();
+
+ /*
+ * expected output:
+ *
+ * Command Overhead Shared Object
+ * ======= ======== =============
+ * bash 20.00% bash
+ * bash 10.00% [kernel]
+ * perf 30.00% perf
+ * perf 20.00% [kernel]
+ * perf 20.00% libc
+ */
+ err = add_hist_entries(hists, machine);
+ if (err < 0)
+ goto out;
+
+ hists__collapse_resort(hists, NULL);
+ hists__output_resort(hists);
+
+ if (verbose > 2) {
+ pr_info("[fields = %s, sort = %s]\n", field_order, sort_order);
+ print_hists_out(hists);
+ }
+
+ root = &evsel->hists.entries;
+ node = rb_first(root);
+ he = rb_entry(node, struct hist_entry, rb_node);
+ TEST_ASSERT_VAL("Invalid hist entry",
+ !strcmp(COMM(he), "bash") && !strcmp(DSO(he), "bash") &&
+ he->stat.period == 200);
+
+ node = rb_next(node);
+ he = rb_entry(node, struct hist_entry, rb_node);
+ TEST_ASSERT_VAL("Invalid hist entry",
+ !strcmp(COMM(he), "bash") && !strcmp(DSO(he), "[kernel]") &&
+ he->stat.period == 100);
+
+ node = rb_next(node);
+ he = rb_entry(node, struct hist_entry, rb_node);
+ TEST_ASSERT_VAL("Invalid hist entry",
+ !strcmp(COMM(he), "perf") && !strcmp(DSO(he), "perf") &&
+ he->stat.period == 300);
+
+ node = rb_next(node);
+ he = rb_entry(node, struct hist_entry, rb_node);
+ TEST_ASSERT_VAL("Invalid hist entry",
+ !strcmp(COMM(he), "perf") && !strcmp(DSO(he), "[kernel]") &&
+ he->stat.period == 200);
+
+ node = rb_next(node);
+ he = rb_entry(node, struct hist_entry, rb_node);
+ TEST_ASSERT_VAL("Invalid hist entry",
+ !strcmp(COMM(he), "perf") && !strcmp(DSO(he), "libc") &&
+ he->stat.period == 200);
+
+out:
+ del_hist_entries(hists);
+ reset_output_field();
+ return err;
+}
+
+/* handle duplicate 'dso' field */
+static int test4(struct perf_evsel *evsel, struct machine *machine)
+{
+ int err;
+ struct hists *hists = &evsel->hists;
+ struct hist_entry *he;
+ struct rb_root *root;
+ struct rb_node *node;
+
+ field_order = "dso,sym,comm,overhead,dso";
+ sort_order = "sym";
+
+ setup_sorting();
+
+ /*
+ * expected output:
+ *
+ * Shared Object Symbol Command Overhead
+ * ============= ============== ======= ========
+ * perf [.] cmd_record perf 10.00%
+ * libc [.] free perf 10.00%
+ * bash [.] main bash 10.00%
+ * perf [.] main perf 20.00%
+ * libc [.] malloc perf 10.00%
+ * [kernel] [k] page_fault bash 10.00%
+ * [kernel] [k] page_fault perf 10.00%
+ * [kernel] [k] schedule perf 10.00%
+ * bash [.] xmalloc bash 10.00%
+ */
+ err = add_hist_entries(hists, machine);
+ if (err < 0)
+ goto out;
+
+ hists__collapse_resort(hists, NULL);
+ hists__output_resort(hists);
+
+ if (verbose > 2) {
+ pr_info("[fields = %s, sort = %s]\n", field_order, sort_order);
+ print_hists_out(hists);
+ }
+
+ root = &evsel->hists.entries;
+ node = rb_first(root);
+ he = rb_entry(node, struct hist_entry, rb_node);
+ TEST_ASSERT_VAL("Invalid hist entry",
+ !strcmp(DSO(he), "perf") && !strcmp(SYM(he), "cmd_record") &&
+ !strcmp(COMM(he), "perf") && he->stat.period == 100);
+
+ node = rb_next(node);
+ he = rb_entry(node, struct hist_entry, rb_node);
+ TEST_ASSERT_VAL("Invalid hist entry",
+ !strcmp(DSO(he), "libc") && !strcmp(SYM(he), "free") &&
+ !strcmp(COMM(he), "perf") && he->stat.period == 100);
+
+ node = rb_next(node);
+ he = rb_entry(node, struct hist_entry, rb_node);
+ TEST_ASSERT_VAL("Invalid hist entry",
+ !strcmp(DSO(he), "bash") && !strcmp(SYM(he), "main") &&
+ !strcmp(COMM(he), "bash") && he->stat.period == 100);
+
+ node = rb_next(node);
+ he = rb_entry(node, struct hist_entry, rb_node);
+ TEST_ASSERT_VAL("Invalid hist entry",
+ !strcmp(DSO(he), "perf") && !strcmp(SYM(he), "main") &&
+ !strcmp(COMM(he), "perf") && he->stat.period == 200);
+
+ node = rb_next(node);
+ he = rb_entry(node, struct hist_entry, rb_node);
+ TEST_ASSERT_VAL("Invalid hist entry",
+ !strcmp(DSO(he), "libc") && !strcmp(SYM(he), "malloc") &&
+ !strcmp(COMM(he), "perf") && he->stat.period == 100);
+
+ node = rb_next(node);
+ he = rb_entry(node, struct hist_entry, rb_node);
+ TEST_ASSERT_VAL("Invalid hist entry",
+ !strcmp(DSO(he), "[kernel]") && !strcmp(SYM(he), "page_fault") &&
+ !strcmp(COMM(he), "bash") && he->stat.period == 100);
+
+ node = rb_next(node);
+ he = rb_entry(node, struct hist_entry, rb_node);
+ TEST_ASSERT_VAL("Invalid hist entry",
+ !strcmp(DSO(he), "[kernel]") && !strcmp(SYM(he), "page_fault") &&
+ !strcmp(COMM(he), "perf") && he->stat.period == 100);
+
+ node = rb_next(node);
+ he = rb_entry(node, struct hist_entry, rb_node);
+ TEST_ASSERT_VAL("Invalid hist entry",
+ !strcmp(DSO(he), "[kernel]") && !strcmp(SYM(he), "schedule") &&
+ !strcmp(COMM(he), "perf") && he->stat.period == 100);
+
+ node = rb_next(node);
+ he = rb_entry(node, struct hist_entry, rb_node);
+ TEST_ASSERT_VAL("Invalid hist entry",
+ !strcmp(DSO(he), "bash") && !strcmp(SYM(he), "xmalloc") &&
+ !strcmp(COMM(he), "bash") && he->stat.period == 100);
+
+out:
+ del_hist_entries(hists);
+ reset_output_field();
+ return err;
+}
+
+/* full sort keys w/o overhead field */
+static int test5(struct perf_evsel *evsel, struct machine *machine)
+{
+ int err;
+ struct hists *hists = &evsel->hists;
+ struct hist_entry *he;
+ struct rb_root *root;
+ struct rb_node *node;
+
+ field_order = "cpu,pid,comm,dso,sym";
+ sort_order = "dso,pid";
+
+ setup_sorting();
+
+ /*
+ * expected output:
+ *
+ * CPU Command: Pid Command Shared Object Symbol
+ * === ============= ======= ============= ==============
+ * 0 perf: 100 perf [kernel] [k] schedule
+ * 2 perf: 200 perf [kernel] [k] page_fault
+ * 1 bash: 300 bash [kernel] [k] page_fault
+ * 0 bash: 300 bash bash [.] xmalloc
+ * 3 bash: 300 bash bash [.] main
+ * 1 perf: 100 perf libc [.] malloc
+ * 2 perf: 100 perf libc [.] free
+ * 1 perf: 100 perf perf [.] cmd_record
+ * 1 perf: 100 perf perf [.] main
+ * 2 perf: 200 perf perf [.] main
+ */
+ err = add_hist_entries(hists, machine);
+ if (err < 0)
+ goto out;
+
+ hists__collapse_resort(hists, NULL);
+ hists__output_resort(hists);
+
+ if (verbose > 2) {
+ pr_info("[fields = %s, sort = %s]\n", field_order, sort_order);
+ print_hists_out(hists);
+ }
+
+ root = &evsel->hists.entries;
+ node = rb_first(root);
+ he = rb_entry(node, struct hist_entry, rb_node);
+
+ TEST_ASSERT_VAL("Invalid hist entry",
+ CPU(he) == 0 && PID(he) == 100 &&
+ !strcmp(COMM(he), "perf") && !strcmp(DSO(he), "[kernel]") &&
+ !strcmp(SYM(he), "schedule") && he->stat.period == 100);
+
+ node = rb_next(node);
+ he = rb_entry(node, struct hist_entry, rb_node);
+ TEST_ASSERT_VAL("Invalid hist entry",
+ CPU(he) == 2 && PID(he) == 200 &&
+ !strcmp(COMM(he), "perf") && !strcmp(DSO(he), "[kernel]") &&
+ !strcmp(SYM(he), "page_fault") && he->stat.period == 100);
+
+ node = rb_next(node);
+ he = rb_entry(node, struct hist_entry, rb_node);
+ TEST_ASSERT_VAL("Invalid hist entry",
+ CPU(he) == 1 && PID(he) == 300 &&
+ !strcmp(COMM(he), "bash") && !strcmp(DSO(he), "[kernel]") &&
+ !strcmp(SYM(he), "page_fault") && he->stat.period == 100);
+
+ node = rb_next(node);
+ he = rb_entry(node, struct hist_entry, rb_node);
+ TEST_ASSERT_VAL("Invalid hist entry",
+ CPU(he) == 0 && PID(he) == 300 &&
+ !strcmp(COMM(he), "bash") && !strcmp(DSO(he), "bash") &&
+ !strcmp(SYM(he), "xmalloc") && he->stat.period == 100);
+
+ node = rb_next(node);
+ he = rb_entry(node, struct hist_entry, rb_node);
+ TEST_ASSERT_VAL("Invalid hist entry",
+ CPU(he) == 3 && PID(he) == 300 &&
+ !strcmp(COMM(he), "bash") && !strcmp(DSO(he), "bash") &&
+ !strcmp(SYM(he), "main") && he->stat.period == 100);
+
+ node = rb_next(node);
+ he = rb_entry(node, struct hist_entry, rb_node);
+ TEST_ASSERT_VAL("Invalid hist entry",
+ CPU(he) == 1 && PID(he) == 100 &&
+ !strcmp(COMM(he), "perf") && !strcmp(DSO(he), "libc") &&
+ !strcmp(SYM(he), "malloc") && he->stat.period == 100);
+
+ node = rb_next(node);
+ he = rb_entry(node, struct hist_entry, rb_node);
+ TEST_ASSERT_VAL("Invalid hist entry",
+ CPU(he) == 2 && PID(he) == 100 &&
+ !strcmp(COMM(he), "perf") && !strcmp(DSO(he), "libc") &&
+ !strcmp(SYM(he), "free") && he->stat.period == 100);
+
+ node = rb_next(node);
+ he = rb_entry(node, struct hist_entry, rb_node);
+ TEST_ASSERT_VAL("Invalid hist entry",
+ CPU(he) == 1 && PID(he) == 100 &&
+ !strcmp(COMM(he), "perf") && !strcmp(DSO(he), "perf") &&
+ !strcmp(SYM(he), "cmd_record") && he->stat.period == 100);
+
+ node = rb_next(node);
+ he = rb_entry(node, struct hist_entry, rb_node);
+ TEST_ASSERT_VAL("Invalid hist entry",
+ CPU(he) == 1 && PID(he) == 100 &&
+ !strcmp(COMM(he), "perf") && !strcmp(DSO(he), "perf") &&
+ !strcmp(SYM(he), "main") && he->stat.period == 100);
+
+ node = rb_next(node);
+ he = rb_entry(node, struct hist_entry, rb_node);
+ TEST_ASSERT_VAL("Invalid hist entry",
+ CPU(he) == 2 && PID(he) == 200 &&
+ !strcmp(COMM(he), "perf") && !strcmp(DSO(he), "perf") &&
+ !strcmp(SYM(he), "main") && he->stat.period == 100);
+
+out:
+ del_hist_entries(hists);
+ reset_output_field();
+ return err;
+}
+
+int test__hists_output(void)
+{
+ int err = TEST_FAIL;
+ struct machines machines;
+ struct machine *machine;
+ struct perf_evsel *evsel;
+ struct perf_evlist *evlist = perf_evlist__new();
+ size_t i;
+ test_fn_t testcases[] = {
+ test1,
+ test2,
+ test3,
+ test4,
+ test5,
+ };
+
+ TEST_ASSERT_VAL("No memory", evlist);
+
+ err = parse_events(evlist, "cpu-clock");
+ if (err)
+ goto out;
+
+ machines__init(&machines);
+
+ /* setup threads/dso/map/symbols also */
+ machine = setup_fake_machine(&machines);
+ if (!machine)
+ goto out;
+
+ if (verbose > 1)
+ machine__fprintf(machine, stderr);
+
+ evsel = perf_evlist__first(evlist);
+
+ for (i = 0; i < ARRAY_SIZE(testcases); i++) {
+ err = testcases[i](evsel, machine);
+ if (err < 0)
+ break;
+ }
+
+out:
+ /* tear down everything */
+ perf_evlist__delete(evlist);
+ machines__exit(&machines);
+
+ return err;
+}
diff --git a/tools/perf/tests/keep-tracking.c b/tools/perf/tests/keep-tracking.c
index 497957f269d..7a5ab7b0b8f 100644
--- a/tools/perf/tests/keep-tracking.c
+++ b/tools/perf/tests/keep-tracking.c
@@ -1,4 +1,4 @@
-#include <sys/types.h>
+#include <linux/types.h>
#include <unistd.h>
#include <sys/prctl.h>
diff --git a/tools/perf/tests/make b/tools/perf/tests/make