aboutsummaryrefslogtreecommitdiff
path: root/arch/mips/kernel/smp-mt.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/mips/kernel/smp-mt.c')
-rw-r--r--arch/mips/kernel/smp-mt.c43
1 files changed, 33 insertions, 10 deletions
diff --git a/arch/mips/kernel/smp-mt.c b/arch/mips/kernel/smp-mt.c
index bfede063d96..3babf6e4f89 100644
--- a/arch/mips/kernel/smp-mt.c
+++ b/arch/mips/kernel/smp-mt.c
@@ -34,6 +34,7 @@
#include <asm/mipsregs.h>
#include <asm/mipsmtregs.h>
#include <asm/mips_mt.h>
+#include <asm/gic.h>
static void __init smvp_copy_vpe_config(void)
{
@@ -70,6 +71,7 @@ static unsigned int __init smvp_vpe_init(unsigned int tc, unsigned int mvpconf0,
/* Record this as available CPU */
set_cpu_possible(tc, true);
+ set_cpu_present(tc, true);
__cpu_number_map[tc] = ++ncpu;
__cpu_logical_map[ncpu] = tc;
}
@@ -117,6 +119,12 @@ static void vsmp_send_ipi_single(int cpu, unsigned int action)
unsigned long flags;
int vpflags;
+#ifdef CONFIG_IRQ_GIC
+ if (gic_present) {
+ gic_send_ipi_single(cpu, action);
+ return;
+ }
+#endif
local_irq_save(flags);
vpflags = dvpe(); /* can't access the other CPU's registers whilst MVPE enabled */
@@ -148,11 +156,9 @@ static void vsmp_send_ipi_mask(const struct cpumask *mask, unsigned int action)
vsmp_send_ipi_single(i, action);
}
-static void __cpuinit vsmp_init_secondary(void)
+static void vsmp_init_secondary(void)
{
#ifdef CONFIG_IRQ_GIC
- extern int gic_present;
-
/* This is Malta specific: IPI,performance and timer interrupts */
if (gic_present)
change_c0_status(ST0_IM, STATUSF_IP3 | STATUSF_IP4 |
@@ -163,7 +169,7 @@ static void __cpuinit vsmp_init_secondary(void)
STATUSF_IP6 | STATUSF_IP7);
}
-static void __cpuinit vsmp_smp_finish(void)
+static void vsmp_smp_finish(void)
{
/* CDFIXME: remove this? */
write_c0_compare(read_c0_count() + (8* mips_hpt_frequency/HZ));
@@ -177,10 +183,6 @@ static void __cpuinit vsmp_smp_finish(void)
local_irq_enable();
}
-static void vsmp_cpus_done(void)
-{
-}
-
/*
* Setup the PC, SP, and GP of a secondary processor and start it
* running!
@@ -189,7 +191,7 @@ static void vsmp_cpus_done(void)
* (unsigned long)idle->thread_info the gp
* assumes a 1:1 mapping of TC => VPE
*/
-static void __cpuinit vsmp_boot_secondary(int cpu, struct task_struct *idle)
+static void vsmp_boot_secondary(int cpu, struct task_struct *idle)
{
struct thread_info *gp = task_thread_info(idle);
dvpe();
@@ -281,8 +283,29 @@ struct plat_smp_ops vsmp_smp_ops = {
.send_ipi_mask = vsmp_send_ipi_mask,
.init_secondary = vsmp_init_secondary,
.smp_finish = vsmp_smp_finish,
- .cpus_done = vsmp_cpus_done,
.boot_secondary = vsmp_boot_secondary,
.smp_setup = vsmp_smp_setup,
.prepare_cpus = vsmp_prepare_cpus,
};
+
+static int proc_cpuinfo_chain_call(struct notifier_block *nfb,
+ unsigned long action_unused, void *data)
+{
+ struct proc_cpuinfo_notifier_args *pcn = data;
+ struct seq_file *m = pcn->m;
+ unsigned long n = pcn->n;
+
+ if (!cpu_has_mipsmt)
+ return NOTIFY_OK;
+
+ seq_printf(m, "VPE\t\t\t: %d\n", cpu_data[n].vpe_id);
+
+ return NOTIFY_OK;
+}
+
+static int __init proc_cpuinfo_notifier_init(void)
+{
+ return proc_cpuinfo_notifier(proc_cpuinfo_chain_call, 0);
+}
+
+subsys_initcall(proc_cpuinfo_notifier_init);
/a>2
-rw-r--r--tools/perf/Documentation/perf-record.txt58
-rw-r--r--tools/perf/Documentation/perf-report.txt156
-rw-r--r--tools/perf/Documentation/perf-script-python.txt2
-rw-r--r--tools/perf/Documentation/perf-script.txt14
-rw-r--r--tools/perf/Documentation/perf-stat.txt34
-rw-r--r--tools/perf/Documentation/perf-test.txt4
-rw-r--r--tools/perf/Documentation/perf-timechart.txt52
-rw-r--r--tools/perf/Documentation/perf-top.txt78
-rw-r--r--tools/perf/Documentation/perf-trace.txt67
-rw-r--r--tools/perf/MANIFEST18
-rw-r--r--tools/perf/Makefile1109
-rw-r--r--tools/perf/Makefile.perf938
-rw-r--r--tools/perf/arch/arm/Makefile10
-rw-r--r--tools/perf/arch/arm/include/perf_regs.h59
-rw-r--r--tools/perf/arch/arm/tests/dwarf-unwind.c60
-rw-r--r--tools/perf/arch/arm/tests/regs_load.S58
-rw-r--r--tools/perf/arch/arm/util/dwarf-regs.c5
-rw-r--r--tools/perf/arch/arm/util/unwind-libdw.c36
-rw-r--r--tools/perf/arch/arm/util/unwind-libunwind.c48
-rw-r--r--tools/perf/arch/arm64/Makefile7
-rw-r--r--tools/perf/arch/arm64/include/perf_regs.h88
-rw-r--r--tools/perf/arch/arm64/util/dwarf-regs.c80
-rw-r--r--tools/perf/arch/arm64/util/unwind-libunwind.c82
-rw-r--r--tools/perf/arch/common.c211
-rw-r--r--tools/perf/arch/common.h10
-rw-r--r--tools/perf/arch/powerpc/util/dwarf-regs.c5
-rw-r--r--tools/perf/arch/s390/util/dwarf-regs.c2
-rw-r--r--tools/perf/arch/sh/util/dwarf-regs.c2
-rw-r--r--tools/perf/arch/sparc/util/dwarf-regs.c2
-rw-r--r--tools/perf/arch/x86/Makefile11
-rw-r--r--tools/perf/arch/x86/include/perf_regs.h16
-rw-r--r--tools/perf/arch/x86/tests/dwarf-unwind.c60
-rw-r--r--tools/perf/arch/x86/tests/regs_load.S98
-rw-r--r--tools/perf/arch/x86/util/dwarf-regs.c2
-rw-r--r--tools/perf/arch/x86/util/tsc.c59
-rw-r--r--tools/perf/arch/x86/util/tsc.h20
-rw-r--r--tools/perf/arch/x86/util/unwind-libdw.c51
-rw-r--r--tools/perf/arch/x86/util/unwind-libunwind.c (renamed from tools/perf/arch/x86/util/unwind.c)8
-rw-r--r--tools/perf/bash_completion62
-rw-r--r--tools/perf/bench/bench.h28
-rw-r--r--tools/perf/bench/futex-hash.c212
-rw-r--r--tools/perf/bench/futex-requeue.c211
-rw-r--r--tools/perf/bench/futex-wake.c201
-rw-r--r--tools/perf/bench/futex.h71
-rw-r--r--tools/perf/bench/mem-memcpy-arch.h2
-rw-r--r--tools/perf/bench/mem-memcpy.c8
-rw-r--r--tools/perf/bench/mem-memset-arch.h2
-rw-r--r--tools/perf/bench/mem-memset.c4
-rw-r--r--tools/perf/bench/numa.c1744
-rw-r--r--tools/perf/bench/sched-pipe.c115
-rw-r--r--tools/perf/builtin-annotate.c130
-rw-r--r--tools/perf/builtin-bench.c250
-rw-r--r--tools/perf/builtin-buildid-cache.c272
-rw-r--r--tools/perf/builtin-buildid-list.c34
-rw-r--r--tools/perf/builtin-diff.c1138
-rw-r--r--tools/perf/builtin-evlist.c98
-rw-r--r--tools/perf/builtin-inject.c309
-rw-r--r--tools/perf/builtin-kmem.c125
-rw-r--r--tools/perf/builtin-kvm.c949
-rw-r--r--tools/perf/builtin-list.c87
-rw-r--r--tools/perf/builtin-lock.c154
-rw-r--r--tools/perf/builtin-mem.c246
-rw-r--r--tools/perf/builtin-probe.c108
-rw-r--r--tools/perf/builtin-record.c789
-rw-r--r--tools/perf/builtin-report.c720
-rw-r--r--tools/perf/builtin-sched.c239
-rw-r--r--tools/perf/builtin-script.c484
-rw-r--r--tools/perf/builtin-stat.c986
-rw-r--r--tools/perf/builtin-test.c1547
-rw-r--r--tools/perf/builtin-timechart.c866
-rw-r--r--tools/perf/builtin-top.c678
-rw-r--r--tools/perf/builtin-trace.c2241
-rw-r--r--tools/perf/builtin.h1
-rw-r--r--tools/perf/command-list.txt15
-rw-r--r--tools/perf/config/Makefile738
-rw-r--r--tools/perf/config/Makefile.arch23
-rw-r--r--tools/perf/config/feature-checks/.gitignore2
-rw-r--r--tools/perf/config/feature-checks/Makefile149
-rw-r--r--tools/perf/config/feature-checks/test-all.c116
-rw-r--r--tools/perf/config/feature-checks/test-backtrace.c13
-rw-r--r--tools/perf/config/feature-checks/test-bionic.c6
-rw-r--r--tools/perf/config/feature-checks/test-cplus-demangle.c14
-rw-r--r--tools/perf/config/feature-checks/test-dwarf.c10
-rw-r--r--tools/perf/config/feature-checks/test-fortify-source.c6
-rw-r--r--tools/perf/config/feature-checks/test-glibc.c8
-rw-r--r--tools/perf/config/feature-checks/test-gtk2-infobar.c11
-rw-r--r--tools/perf/config/feature-checks/test-gtk2.c10
-rw-r--r--tools/perf/config/feature-checks/test-hello.c6
-rw-r--r--tools/perf/config/feature-checks/test-libaudit.c10
-rw-r--r--tools/perf/config/feature-checks/test-libbfd.c15
-rw-r--r--tools/perf/config/feature-checks/test-libdw-dwarf-unwind.c13
-rw-r--r--tools/perf/config/feature-checks/test-libelf-getphdrnum.c8
-rw-r--r--tools/perf/config/feature-checks/test-libelf-mmap.c8
-rw-r--r--tools/perf/config/feature-checks/test-libelf.c8
-rw-r--r--tools/perf/config/feature-checks/test-libnuma.c9
-rw-r--r--tools/perf/config/feature-checks/test-libperl.c9
-rw-r--r--tools/perf/config/feature-checks/test-libpython-version.c10
-rw-r--r--tools/perf/config/feature-checks/test-libpython.c8
-rw-r--r--tools/perf/config/feature-checks/test-libslang.c6
-rw-r--r--tools/perf/config/feature-checks/test-libunwind-debug-frame.c16
-rw-r--r--tools/perf/config/feature-checks/test-libunwind.c27
-rw-r--r--tools/perf/config/feature-checks/test-stackprotector-all.c6
-rw-r--r--tools/perf/config/feature-checks/test-timerfd.c18
-rw-r--r--tools/perf/config/feature-tests.mak206
-rw-r--r--tools/perf/config/utilities.mak12
-rw-r--r--tools/perf/design.txt13
-rw-r--r--tools/perf/perf-completion.sh206
-rw-r--r--tools/perf/perf-sys.h190
-rw-r--r--tools/perf/perf.c83
-rw-r--r--tools/perf/perf.h220
-rwxr-xr-xtools/perf/python/twatch.py2
-rw-r--r--tools/perf/scripts/perl/Perf-Trace-Util/Context.xs2
-rw-r--r--tools/perf/scripts/perl/bin/workqueue-stats-record2
-rw-r--r--tools/perf/scripts/perl/bin/workqueue-stats-report3
-rw-r--r--tools/perf/scripts/perl/rwtop.pl6
-rw-r--r--tools/perf/scripts/perl/workqueue-stats.pl129
-rw-r--r--tools/perf/scripts/python/Perf-Trace-Util/Context.c6
-rwxr-xr-xtools/perf/scripts/python/net_dropmonitor.py39
-rw-r--r--tools/perf/tests/attr.c176
-rw-r--r--tools/perf/tests/attr.py332
-rw-r--r--tools/perf/tests/attr/README64
-rw-r--r--tools/perf/tests/attr/base-record40
-rw-r--r--tools/perf/tests/attr/base-stat40
-rw-r--r--tools/perf/tests/attr/test-record-C013
-rw-r--r--tools/perf/tests/attr/test-record-basic5
-rw-r--r--tools/perf/tests/attr/test-record-branch-any8
-rw-r--r--tools/perf/tests/attr/test-record-branch-filter-any8
-rw-r--r--tools/perf/tests/attr/test-record-branch-filter-any_call8
-rw-r--r--tools/perf/tests/attr/test-record-branch-filter-any_ret8
-rw-r--r--tools/perf/tests/attr/test-record-branch-filter-hv8
-rw-r--r--tools/perf/tests/attr/test-record-branch-filter-ind_call8
-rw-r--r--tools/perf/tests/attr/test-record-branch-filter-k8
-rw-r--r--tools/perf/tests/attr/test-record-branch-filter-u8
-rw-r--r--tools/perf/tests/attr/test-record-count8
-rw-r--r--tools/perf/tests/attr/test-record-data11
-rw-r--r--tools/perf/tests/attr/test-record-freq6
-rw-r--r--tools/perf/tests/attr/test-record-graph-default6
-rw-r--r--tools/perf/tests/attr/test-record-graph-dwarf10
-rw-r--r--tools/perf/tests/attr/test-record-graph-fp6
-rw-r--r--tools/perf/tests/attr/test-record-group20
-rw-r--r--tools/perf/tests/attr/test-record-group-sampling36
-rw-r--r--tools/perf/tests/attr/test-record-group121
-rw-r--r--tools/perf/tests/attr/test-record-no-delay9
-rw-r--r--tools/perf/tests/attr/test-record-no-inherit7
-rw-r--r--tools/perf/tests/attr/test-record-no-samples6
-rw-r--r--tools/perf/tests/attr/test-record-period7
-rw-r--r--tools/perf/tests/attr/test-record-raw7
-rw-r--r--tools/perf/tests/attr/test-stat-C09
-rw-r--r--tools/perf/tests/attr/test-stat-basic6
-rw-r--r--tools/perf/tests/attr/test-stat-default64
-rw-r--r--tools/perf/tests/attr/test-stat-detailed-1101
-rw-r--r--tools/perf/tests/attr/test-stat-detailed-2155
-rw-r--r--tools/perf/tests/attr/test-stat-detailed-3173
-rw-r--r--tools/perf/tests/attr/test-stat-group15
-rw-r--r--tools/perf/tests/attr/test-stat-group115
-rw-r--r--tools/perf/tests/attr/test-stat-no-inherit7
-rw-r--r--tools/perf/tests/bp_signal.c192
-rw-r--r--tools/perf/tests/bp_signal_overflow.c132
-rw-r--r--tools/perf/tests/builtin-test.c307
-rw-r--r--tools/perf/tests/code-reading.c581
-rw-r--r--tools/perf/tests/dso-data.c358
-rw-r--r--tools/perf/tests/dwarf-unwind.c144
-rw-r--r--tools/perf/tests/evsel-roundtrip-name.c114
-rw-r--r--tools/perf/tests/evsel-tp-sched.c81
-rw-r--r--tools/perf/tests/hists_common.c209
-rw-r--r--tools/perf/tests/hists_common.h75
-rw-r--r--tools/perf/tests/hists_cumulate.c726
-rw-r--r--tools/perf/tests/hists_filter.c289
-rw-r--r--tools/perf/tests/hists_link.c339
-rw-r--r--tools/perf/tests/hists_output.c621
-rw-r--r--tools/perf/tests/keep-tracking.c152
-rw-r--r--tools/perf/tests/make233
-rw-r--r--tools/perf/tests/mmap-basic.c148
-rw-r--r--tools/perf/tests/mmap-thread-lookup.c233
-rw-r--r--tools/perf/tests/open-syscall-all-cpus.c109
-rw-r--r--tools/perf/tests/open-syscall-tp-fields.c117
-rw-r--r--tools/perf/tests/open-syscall.c55
-rw-r--r--tools/perf/tests/parse-events.c (renamed from tools/perf/util/parse-events-test.c)724
-rw-r--r--tools/perf/tests/parse-no-sample-id-all.c108
-rw-r--r--tools/perf/tests/perf-record.c309
-rwxr-xr-xtools/perf/tests/perf-targz-src-pkg21
-rw-r--r--tools/perf/tests/perf-time-to-tsc.c172
-rw-r--r--tools/perf/tests/pmu.c173
-rw-r--r--tools/perf/tests/python-use.c23
-rw-r--r--tools/perf/tests/rdpmc.c173
-rw-r--r--tools/perf/tests/sample-parsing.c320
-rw-r--r--tools/perf/tests/sw-clock.c122
-rw-r--r--tools/perf/tests/task-exit.c118
-rw-r--r--tools/perf/tests/tests.h60
-rw-r--r--tools/perf/tests/thread-mg-share.c90
-rw-r--r--tools/perf/tests/vmlinux-kallsyms.c242
-rw-r--r--tools/perf/ui/browser.c25
-rw-r--r--tools/perf/ui/browser.h39
-rw-r--r--tools/perf/ui/browsers/annotate.c264
-rw-r--r--tools/perf/ui/browsers/header.c127
-rw-r--r--tools/perf/ui/browsers/hists.c707
-rw-r--r--tools/perf/ui/browsers/map.c98
-rw-r--r--tools/perf/ui/browsers/map.h2
-rw-r--r--tools/perf/ui/browsers/scripts.c187
-rw-r--r--tools/perf/ui/gtk/annotate.c252
-rw-r--r--tools/perf/ui/gtk/browser.c239
-rw-r--r--tools/perf/ui/gtk/gtk.h29
-rw-r--r--tools/perf/ui/gtk/helpline.c23
-rw-r--r--tools/perf/ui/gtk/hists.c365
-rw-r--r--tools/perf/ui/gtk/progress.c59
-rw-r--r--tools/perf/ui/gtk/setup.c2
-rw-r--r--tools/perf/ui/gtk/util.c18
-rw-r--r--tools/perf/ui/helpline.c12
-rw-r--r--tools/perf/ui/helpline.h22
-rw-r--r--tools/perf/ui/hist.c801
-rw-r--r--tools/perf/ui/keysyms.h1
-rw-r--r--tools/perf/ui/progress.c60
-rw-r--r--tools/perf/ui/progress.h19
-rw-r--r--tools/perf/ui/setup.c65
-rw-r--r--tools/perf/ui/stdio/hist.c183
-rw-r--r--tools/perf/ui/tui/helpline.c29
-rw-r--r--tools/perf/ui/tui/progress.c44
-rw-r--r--tools/perf/ui/tui/setup.c23
-rw-r--r--tools/perf/ui/tui/tui.h6
-rw-r--r--tools/perf/ui/tui/util.c19
-rw-r--r--tools/perf/ui/ui.h18
-rw-r--r--tools/perf/ui/util.c1
-rwxr-xr-xtools/perf/util/PERF-VERSION-GEN40
-rw-r--r--tools/perf/util/alias.c6
-rw-r--r--tools/perf/util/annotate.c490
-rw-r--r--tools/perf/util/annotate.h56
-rw-r--r--tools/perf/util/build-id.c43
-rw-r--r--tools/perf/util/build-id.h12
-rw-r--r--tools/perf/util/cache.h42
-rw-r--r--tools/perf/util/callchain.c301
-rw-r--r--tools/perf/util/callchain.h55
-rw-r--r--tools/perf/util/cgroup.c6
-rw-r--r--tools/perf/util/color.c22
-rw-r--r--tools/perf/util/color.h3
-rw-r--r--tools/perf/util/comm.c122
-rw-r--r--tools/perf/util/comm.h21
-rw-r--r--tools/perf/util/config.c4
-rw-r--r--tools/perf/util/cpumap.c276
-rw-r--r--tools/perf/util/cpumap.h58
-rw-r--r--tools/perf/util/data.c133
-rw-r--r--tools/perf/util/data.h50
-rw-r--r--tools/perf/util/debug.c55
-rw-r--r--tools/perf/util/debug.h35
-rw-r--r--tools/perf/util/debugfs.c114
-rw-r--r--tools/perf/util/debugfs.h12
-rw-r--r--tools/perf/util/dso-test-data.c153
-rw-r--r--tools/perf/util/dso.c900
-rw-r--r--tools/perf/util/dso.h232
-rw-r--r--tools/perf/util/dwarf-aux.c51
-rw-r--r--tools/perf/util/dwarf-aux.h9
-rw-r--r--tools/perf/util/event.c683
-rw-r--r--tools/perf/util/event.h133
-rw-r--r--tools/perf/util/evlist.c782
-rw-r--r--tools/perf/util/evlist.h155
-rw-r--r--tools/perf/util/evsel.c1167
-rw-r--r--tools/perf/util/evsel.h161
-rwxr-xr-xtools/perf/util/generate-cmdlist.sh4
-rw-r--r--tools/perf/util/header.c605
-rw-r--r--tools/perf/util/header.h58
-rw-r--r--tools/perf/util/help.c7
-rw-r--r--tools/perf/util/hist.c1005
-rw-r--r--tools/perf/util/hist.h274
-rw-r--r--tools/perf/util/include/asm/bug.h22
-rw-r--r--tools/perf/util/include/asm/hash.h6
-rw-r--r--tools/perf/util/include/dwarf-regs.h2
-rw-r--r--tools/perf/util/include/linux/bitmap.h3
-rw-r--r--tools/perf/util/include/linux/bitops.h5
-rw-r--r--tools/perf/util/include/linux/compiler.h21
-rw-r--r--tools/perf/util/include/linux/export.h6
-rw-r--r--tools/perf/util/include/linux/hash.h5
-rw-r--r--tools/perf/util/include/linux/kernel.h6
-rw-r--r--tools/perf/util/include/linux/list.h2
-rw-r--r--tools/perf/util/include/linux/magic.h12
-rw-r--r--tools/perf/util/include/linux/prefetch.h6
-rw-r--r--tools/perf/util/include/linux/string.h1
-rw-r--r--tools/perf/util/include/linux/types.h29
-rw-r--r--tools/perf/util/intlist.c51
-rw-r--r--tools/perf/util/intlist.h4
-rw-r--r--tools/perf/util/machine.c1422
-rw-r--r--tools/perf/util/machine.h194
-rw-r--r--tools/perf/util/map.c544
-rw-r--r--tools/perf/util/map.h162
-rw-r--r--tools/perf/util/pager.c12
-rw-r--r--tools/perf/util/parse-events.c374
-rw-r--r--tools/perf/util/parse-events.h39
-rw-r--r--tools/perf/util/parse-events.l67
-rw-r--r--tools/perf/util/parse-events.y161
-rw-r--r--tools/perf/util/parse-options.c276
-rw-r--r--tools/perf/util/parse-options.h20
-rw-r--r--tools/perf/util/path.c10
-rw-r--r--tools/perf/util/perf_regs.c27
-rw-r--r--tools/perf/util/perf_regs.h19
-rw-r--r--tools/perf/util/pmu.c488
-rw-r--r--tools/perf/util/pmu.h32
-rw-r--r--tools/perf/util/pmu.y1
-rw-r--r--tools/perf/util/probe-event.c1007
-rw-r--r--tools/perf/util/probe-event.h17
-rw-r--r--tools/perf/util/probe-finder.c595
-rw-r--r--tools/perf/util/probe-finder.h23
-rw-r--r--tools/perf/util/pstack.c46
-rw-r--r--tools/perf/util/pstack.h10
-rw-r--r--tools/perf/util/python-ext-sources3
-rw-r--r--tools/perf/util/python.c45
-rw-r--r--tools/perf/util/rblist.c31
-rw-r--r--tools/perf/util/rblist.h1
-rw-r--r--tools/perf/util/record.c215
-rw-r--r--tools/perf/util/scripting-engines/trace-event-perl.c28
-rw-r--r--tools/perf/util/scripting-engines/trace-event-python.c76
-rw-r--r--tools/perf/util/session.c1006
-rw-r--r--tools/perf/util/session.h110
-rw-r--r--tools/perf/util/setup.py8
-rw-r--r--tools/perf/util/sort.c1532
-rw-r--r--tools/perf/util/sort.h121
-rw-r--r--tools/perf/util/srcline.c299
-rw-r--r--tools/perf/util/stat.c8
-rw-r--r--tools/perf/util/stat.h11
-rw-r--r--tools/perf/util/strbuf.c10
-rw-r--r--tools/perf/util/strfilter.c72
-rw-r--r--tools/perf/util/strfilter.h12
-rw-r--r--tools/perf/util/string.c62
-rw-r--r--tools/perf/util/strlist.c55
-rw-r--r--tools/perf/util/strlist.h42
-rw-r--r--tools/perf/util/svghelper.c235
-rw-r--r--tools/perf/util/svghelper.h19
-rw-r--r--tools/perf/util/symbol-elf.c840
-rw-r--r--tools/perf/util/symbol-minimal.c27
-rw-r--r--tools/perf/util/symbol.c2050
-rw-r--r--tools/perf/util/symbol.h243
-rw-r--r--tools/perf/util/sysfs.c60
-rw-r--r--tools/perf/util/sysfs.h6
-rw-r--r--tools/perf/util/target.c63
-rw-r--r--tools/perf/util/target.h60
-rw-r--r--tools/perf/util/thread.c226
-rw-r--r--tools/perf/util/thread.h65
-rw-r--r--tools/perf/util/thread_map.c20
-rw-r--r--tools/perf/util/thread_map.h5
-rw-r--r--tools/perf/util/tool.h11
-rw-r--r--tools/perf/util/top.c45
-rw-r--r--tools/perf/util/top.h17
-rw-r--r--tools/perf/util/trace-event-info.c434
-rw-r--r--tools/perf/util/trace-event-parse.c95
-rw-r--r--tools/perf/util/trace-event-read.c517
-rw-r--r--tools/perf/util/trace-event-scripting.c2
-rw-r--r--tools/perf/util/trace-event.c82
-rw-r--r--tools/perf/util/trace-event.h46
-rw-r--r--tools/perf/util/types.h24
-rw-r--r--tools/perf/util/unwind-libdw.c210
-rw-r--r--tools/perf/util/unwind-libdw.h21
-rw-r--r--tools/perf/util/unwind-libunwind.c (renamed from tools/perf/util/unwind.c)150
-rw-r--r--tools/perf/util/unwind.h18
-rw-r--r--tools/perf/util/util.c383
-rw-r--r--tools/perf/util/util.h81
-rw-r--r--tools/perf/util/values.c14
-rw-r--r--tools/perf/util/values.h2
-rw-r--r--tools/perf/util/vdso.c4
371 files changed, 44067 insertions, 14576 deletions
diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore
index 8f8fbc227a4..782d86e961b 100644
--- a/tools/perf/.gitignore
+++ b/tools/perf/.gitignore
@@ -13,6 +13,7 @@ perf*.html
common-cmds.h
perf.data
perf.data.old
+output.svg
perf-archive
tags
TAGS
diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile
index 9f2e44f2b17..3ba1c0b0990 100644
--- a/tools/perf/Documentation/Makefile
+++ b/tools/perf/Documentation/Makefile
@@ -1,9 +1,5 @@
-OUTPUT := ./
-ifeq ("$(origin O)", "command line")
- ifneq ($(O),)
- OUTPUT := $(O)/
- endif
-endif
+include ../../scripts/Makefile.include
+include ../config/utilities.mak
MAN1_TXT= \
$(filter-out $(addsuffix .txt, $(ARTICLES) $(SP_ARTICLES)), \
@@ -64,6 +60,7 @@ MAKEINFO=makeinfo
INSTALL_INFO=install-info
DOCBOOK2X_TEXI=docbook2x-texi
DBLATEX=dblatex
+XMLTO=xmlto
ifndef PERL_PATH
PERL_PATH = /usr/bin/perl
endif
@@ -71,6 +68,16 @@ endif
-include ../config.mak.autogen
-include ../config.mak
+_tmp_tool_path := $(call get-executable,$(ASCIIDOC))
+ifeq ($(_tmp_tool_path),)
+ missing_tools = $(ASCIIDOC)
+endif
+
+_tmp_tool_path := $(call get-executable,$(XMLTO))
+ifeq ($(_tmp_tool_path),)
+ missing_tools += $(XMLTO)
+endif
+
#
# For asciidoc ...
# -7.1.2, no extra settings are needed.
@@ -137,17 +144,18 @@ NO_SUBDIR = :
endif
ifneq ($(findstring $(MAKEFLAGS),s),s)
-ifndef V
- QUIET_ASCIIDOC = @echo ' ' ASCIIDOC $@;
- QUIET_XMLTO = @echo ' ' XMLTO $@;
- QUIET_DB2TEXI = @echo ' ' DB2TEXI $@;
- QUIET_MAKEINFO = @echo ' ' MAKEINFO $@;
- QUIET_DBLATEX = @echo ' ' DBLATEX $@;
- QUIET_XSLTPROC = @echo ' ' XSLTPROC $@;
- QUIET_GEN = @echo ' ' GEN $@;
+ifneq ($(V),1)
+ QUIET_ASCIIDOC = @echo ' ASCIIDOC '$@;
+ QUIET_XMLTO = @echo ' XMLTO '$@;
+ QUIET_DB2TEXI = @echo ' DB2TEXI '$@;
+ QUIET_MAKEINFO = @echo ' MAKEINFO '$@;
+ QUIET_DBLATEX = @echo ' DBLATEX '$@;
+ QUIET_XSLTPROC = @echo ' XSLTPROC '$@;
+ QUIET_GEN = @echo ' GEN '$@;
QUIET_STDERR = 2> /dev/null
QUIET_SUBDIR0 = +@subdir=
- QUIET_SUBDIR1 = ;$(NO_SUBDIR) echo ' ' SUBDIR $$subdir; \
+ QUIET_SUBDIR1 = ;$(NO_SUBDIR) \
+ echo ' SUBDIR ' $$subdir; \
$(MAKE) $(PRINT_DIR) -C $$subdir
export V
endif
@@ -170,35 +178,49 @@ pdf: $(OUTPUT)user-manual.pdf
install: install-man
-install-man: man
- $(INSTALL) -d -m 755 $(DESTDIR)$(man1dir)
-# $(INSTALL) -d -m 755 $(DESTDIR)$(man5dir)
-# $(INSTALL) -d -m 755 $(DESTDIR)$(man7dir)
- $(INSTALL) -m 644 $(DOC_MAN1) $(DESTDIR)$(man1dir)
-# $(INSTALL) -m 644 $(DOC_MAN5) $(DESTDIR)$(man5dir)
-# $(INSTALL) -m 644 $(DOC_MAN7) $(DESTDIR)$(man7dir)
+check-man-tools:
+ifdef missing_tools
+ $(error "You need to install $(missing_tools) for man pages")
+endif
+
+do-install-man: man
+ $(call QUIET_INSTALL, Documentation-man) \
+ $(INSTALL) -d -m 755 $(DESTDIR)$(man1dir); \
+# $(INSTALL) -d -m 755 $(DESTDIR)$(man5dir); \
+# $(INSTALL) -d -m 755 $(DESTDIR)$(man7dir); \
+ $(INSTALL) -m 644 $(DOC_MAN1) $(DESTDIR)$(man1dir); \
+# $(INSTALL) -m 644 $(DOC_MAN5) $(DESTDIR)$(man5dir); \
+# $(INSTALL) -m 644 $(DOC_MAN7) $(DESTDIR)$(man7dir)
+
+install-man: check-man-tools man
+
+ifdef missing_tools
+ DO_INSTALL_MAN = $(warning Please install $(missing_tools) to have the man pages installed)
+else
+ DO_INSTALL_MAN = do-install-man
+endif
+
+try-install-man: $(DO_INSTALL_MAN)
install-info: info
- $(INSTALL) -d -m 755 $(DESTDIR)$(infodir)
- $(INSTALL) -m 644 $(OUTPUT)perf.info $(OUTPUT)perfman.info $(DESTDIR)$(infodir)
+ $(call QUIET_INSTALL, Documentation-info) \
+ $(INSTALL) -d -m 755 $(DESTDIR)$(infodir); \
+ $(INSTALL) -m 644 $(OUTPUT)perf.info $(OUTPUT)perfman.info $(DESTDIR)$(infodir); \
if test -r $(DESTDIR)$(infodir)/dir; then \
- $(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) perf.info ;\
- $(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) perfman.info ;\
+ $(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) perf.info ;\
+ $(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) perfman.info ;\
else \
echo "No directory found in $(DESTDIR)$(infodir)" >&2 ; \
fi
install-pdf: pdf
- $(INSTALL) -d -m 755 $(DESTDIR)$(pdfdir)
- $(INSTALL) -m 644 $(OUTPUT)user-manual.pdf $(DESTDIR)$(pdfdir)
+ $(call QUIET_INSTALL, Documentation-pdf) \
+ $(INSTALL) -d -m 755 $(DESTDIR)$(pdfdir); \
+ $(INSTALL) -m 644 $(OUTPUT)user-manual.pdf $(DESTDIR)$(pdfdir)
#install-html: html
# '$(SHELL_PATH_SQ)' ./install-webdoc.sh $(DESTDIR)$(htmldir)
-$(OUTPUT)PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE
- $(QUIET_SUBDIR0)../ $(QUIET_SUBDIR1) $(OUTPUT)PERF-VERSION-FILE
-
--include $(OUTPUT)PERF-VERSION-FILE
#
# Determine "include::" file references in asciidoc files.
@@ -228,15 +250,17 @@ $(OUTPUT)cmd-list.made: cmd-list.perl ../command-list.txt $(MAN1_TXT)
$(PERL_PATH) ./cmd-list.perl ../command-list.txt $(QUIET_STDERR) && \
date >$@
+CLEAN_FILES = \
+ $(MAN_XML) $(addsuffix +,$(MAN_XML)) \
+ $(MAN_HTML) $(addsuffix +,$(MAN_HTML)) \
+ $(DOC_HTML) $(DOC_MAN1) $(DOC_MAN5) $(DOC_MAN7) \
+ $(OUTPUT)*.texi $(OUTPUT)*.texi+ $(OUTPUT)*.texi++ \
+ $(OUTPUT)perf.info $(OUTPUT)perfman.info \
+ $(OUTPUT)howto-index.txt $(OUTPUT)howto/*.html $(OUTPUT)doc.dep \
+ $(OUTPUT)technical/api-*.html $(OUTPUT)technical/api-index.txt \
+ $(cmds_txt) $(OUTPUT)*.made
clean:
- $(RM) $(MAN_XML) $(addsuffix +,$(MAN_XML))
- $(RM) $(MAN_HTML) $(addsuffix +,$(MAN_HTML))
- $(RM) $(DOC_HTML) $(DOC_MAN1) $(DOC_MAN5) $(DOC_MAN7)
- $(RM) $(OUTPUT)*.texi $(OUTPUT)*.texi+ $(OUTPUT)*.texi++
- $(RM) $(OUTPUT)perf.info $(OUTPUT)perfman.info
- $(RM) $(OUTPUT)howto-index.txt $(OUTPUT)howto/*.html $(OUTPUT)doc.dep
- $(RM) $(OUTPUT)technical/api-*.html $(OUTPUT)technical/api-index.txt
- $(RM) $(cmds_txt) $(OUTPUT)*.made
+ $(call QUIET_CLEAN, Documentation) $(RM) $(CLEAN_FILES)
$(MAN_HTML): $(OUTPUT)%.html : %.txt
$(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
@@ -246,7 +270,7 @@ $(MAN_HTML): $(OUTPUT)%.html : %.txt
$(OUTPUT)%.1 $(OUTPUT)%.5 $(OUTPUT)%.7 : $(OUTPUT)%.xml
$(QUIET_XMLTO)$(RM) $@ && \
- xmlto -o $(OUTPUT) -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $<
+ $(XMLTO) -o $(OUTPUT). -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $<
$(OUTPUT)%.xml : %.txt
$(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
@@ -317,5 +341,3 @@ $(patsubst %.txt,%.html,$(wildcard howto/*.txt)): %.html : %.txt
#quick-install-html:
# '$(SHELL_PATH_SQ)' ./install-doc-quick.sh $(HTML_REF) $(DESTDIR)$(htmldir)
-
-.PHONY: .FORCE-PERF-VERSION-FILE
diff --git a/tools/perf/Documentation/android.txt b/tools/perf/Documentation/android.txt
new file mode 100644
index 00000000000..8484c3a04a6
--- /dev/null
+++ b/tools/perf/Documentation/android.txt
@@ -0,0 +1,78 @@
+How to compile perf for Android
+=========================================
+
+I. Set the Android NDK environment
+------------------------------------------------
+
+(a). Use the Android NDK
+------------------------------------------------
+1. You need to download and install the Android Native Development Kit (NDK).
+Set the NDK variable to point to the path where you installed the NDK:
+ export NDK=/path/to/android-ndk
+
+2. Set cross-compiling environment variables for NDK toolchain and sysroot.
+For arm:
+ export NDK_TOOLCHAIN=${NDK}/toolchains/arm-linux-androideabi-4.6/prebuilt/linux-x86/bin/arm-linux-androideabi-
+ export NDK_SYSROOT=${NDK}/platforms/android-9/arch-arm
+For x86:
+ export NDK_TOOLCHAIN=${NDK}/toolchains/x86-4.6/prebuilt/linux-x86/bin/i686-linux-android-
+ export NDK_SYSROOT=${NDK}/platforms/android-9/arch-x86
+
+This method is not working for Android NDK versions up to Revision 8b.
+perf uses some bionic enhancements that are not included in these NDK versions.
+You can use method (b) described below instead.
+
+(b). Use the Android source tree
+-----------------------------------------------
+1. Download the master branch of the Android source tree.
+Set the environment for the target you want using:
+ source build/envsetup.sh
+ lunch
+
+2. Build your own NDK sysroot to contain latest bionic changes and set the
+NDK sysroot environment variable.
+ cd ${ANDROID_BUILD_TOP}/ndk
+For arm:
+ ./build/tools/build-ndk-sysroot.sh --abi=arm
+ export NDK_SYSROOT=${ANDROID_BUILD_TOP}/ndk/build/platforms/android-3/arch-arm
+For x86:
+ ./build/tools/build-ndk-sysroot.sh --abi=x86
+ export NDK_SYSROOT=${ANDROID_BUILD_TOP}/ndk/build/platforms/android-3/arch-x86
+
+3. Set the NDK toolchain environment variable.
+For arm:
+ export NDK_TOOLCHAIN=${ANDROID_TOOLCHAIN}/arm-linux-androideabi-
+For x86:
+ export NDK_TOOLCHAIN=${ANDROID_TOOLCHAIN}/i686-linux-android-
+
+II. Compile perf for Android
+------------------------------------------------
+You need to run make with the NDK toolchain and sysroot defined above:
+For arm:
+ make ARCH=arm CROSS_COMPILE=${NDK_TOOLCHAIN} CFLAGS="--sysroot=${NDK_SYSROOT}"
+For x86:
+ make ARCH=x86 CROSS_COMPILE=${NDK_TOOLCHAIN} CFLAGS="--sysroot=${NDK_SYSROOT}"
+
+III. Install perf
+-----------------------------------------------
+You need to connect to your Android device/emulator using adb.
+Install perf using:
+ adb push perf /data/perf
+
+If you also want to use perf-archive you need busybox tools for Android.
+For installing perf-archive, you first need to replace #!/bin/bash with #!/system/bin/sh:
+ sed 's/#!\/bin\/bash/#!\/system\/bin\/sh/g' perf-archive >> /tmp/perf-archive
+ chmod +x /tmp/perf-archive
+ adb push /tmp/perf-archive /data/perf-archive
+
+IV. Environment settings for running perf
+------------------------------------------------
+Some perf features need environment variables to run properly.
+You need to set these before running perf on the target:
+ adb shell
+ # PERF_PAGER=cat
+
+IV. Run perf
+------------------------------------------------
+Run perf on your device/emulator to which you previously connected using adb:
+ # ./data/perf
diff --git a/tools/perf/Documentation/examples.txt b/tools/perf/Documentation/examples.txt
index 77f95276242..a4e39215648 100644
--- a/tools/perf/Documentation/examples.txt
+++ b/tools/perf/Documentation/examples.txt
@@ -66,7 +66,7 @@ Furthermore, these tracepoints can be used to sample the workload as
well. For example the page allocations done by a 'git gc' can be
captured the following way:
- titan:~/git> perf record -f -e kmem:mm_page_alloc -c 1 ./git gc
+ titan:~/git> perf record -e kmem:mm_page_alloc -c 1 ./git gc
Counting objects: 1148, done.
Delta compression using up to 2 threads.
Compressing objects: 100% (450/450), done.
@@ -120,7 +120,7 @@ Furthermore, call-graph sampling can be done too, of page
allocations - to see precisely what kind of page allocations there
are:
- titan:~/git> perf record -f -g -e kmem:mm_page_alloc -c 1 ./git gc
+ titan:~/git> perf record -g -e kmem:mm_page_alloc -c 1 ./git gc
Counting objects: 1148, done.
Delta compression using up to 2 threads.
Compressing objects: 100% (450/450), done.
diff --git a/tools/perf/Documentation/perf-annotate.txt b/tools/perf/Documentation/perf-annotate.txt
index c8ffd9fd5c6..e9cd39a92dc 100644
--- a/tools/perf/Documentation/perf-annotate.txt
+++ b/tools/perf/Documentation/perf-annotate.txt
@@ -61,11 +61,13 @@ OPTIONS
--stdio:: Use the stdio interface.
---tui:: Use the TUI interface Use of --tui requires a tty, if one is not
+--tui:: Use the TUI interface. Use of --tui requires a tty, if one is not
present, as when piping to other commands, the stdio interface is
used. This interfaces starts by centering on the line with more
samples, TAB/UNTAB cycles through the lines with more samples.
+--gtk:: Use the GTK interface.
+
-C::
--cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can
be provided as a comma-separated list with no space: 0,1. Ranges of
@@ -88,6 +90,12 @@ OPTIONS
--objdump=<path>::
Path to objdump binary.
+--skip-missing::
+ Skip symbols that cannot be annotated.
+
+--group::
+ Show event group information together
+
SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-report[1]
diff --git a/tools/perf/Documentation/perf-archive.txt b/tools/perf/Documentation/perf-archive.txt
index fae174dc7d0..ac6ecbb3e66 100644
--- a/tools/perf/Documentation/perf-archive.txt
+++ b/tools/perf/Documentation/perf-archive.txt
@@ -12,9 +12,9 @@ SYNOPSIS
DESCRIPTION
-----------
-This command runs runs perf-buildid-list --with-hits, and collects the files
-with the buildids found so that analisys of perf.data contents can be possible
-on another machine.
+This command runs perf-buildid-list --with-hits, and collects the files with the
+buildids found so that analysis of perf.data contents can be possible on another
+machine.
SEE ALSO
diff --git a/tools/perf/Documentation/perf-bench.txt b/tools/perf/Documentation/perf-bench.txt
index 7065cd6fbdf..4464ad770d5 100644
--- a/tools/perf/Documentation/perf-bench.txt
+++ b/tools/perf/Documentation/perf-bench.txt
@@ -48,6 +48,12 @@ SUBSYSTEM
'mem'::
Memory access performance.
+'numa'::
+ NUMA scheduling and MM benchmarks.
+
+'futex'::
+ Futex stressing benchmarks.
+
'all'::
All benchmark subsystems.
@@ -187,6 +193,22 @@ Show only the result with page faults before memset.
--no-prefault::
Show only the result without page faults before memset.
+SUITES FOR 'numa'
+~~~~~~~~~~~~~~~~~
+*mem*::
+Suite for evaluating NUMA workloads.
+
+SUITES FOR 'futex'
+~~~~~~~~~~~~~~~~~~
+*hash*::
+Suite for evaluating hash tables.
+
+*wake*::
+Suite for evaluating wake calls.
+
+*requeue*::
+Suite for evaluating requeue calls.
+
SEE ALSO
--------
linkperf:perf[1]
diff --git a/tools/perf/Documentation/perf-buildid-cache.txt b/tools/perf/Documentation/perf-buildid-cache.txt
index c1057701a7d..fd77d81ea74 100644
--- a/tools/perf/Documentation/perf-buildid-cache.txt
+++ b/tools/perf/Documentation/perf-buildid-cache.txt
@@ -21,9 +21,29 @@ OPTIONS
-a::
--add=::
Add specified file to the cache.
+-k::
+--kcore::
+ Add specified kcore file to the cache. For the current host that is
+ /proc/kcore which requires root permissions to read. Be aware that
+ running 'perf buildid-cache' as root may update root's build-id cache
+ not the user's. Use the -v option to see where the file is created.
+ Note that the copied file contains only code sections not the whole core
+ image. Note also that files "kallsyms" and "modules" must also be in the
+ same directory and are also copied. All 3 files are created with read
+ permissions for root only. kcore will not be added if there is already a
+ kcore in the cache (with the same build-id) that has the same modules at
+ the same addresses. Use the -v option to see if a copy of kcore is
+ actually made.
-r::
--remove=::
Remove specified file from the cache.
+-M::
+--missing=::
+ List missing build ids in the cache for the specified file.
+-u::
+--update::
+ Update specified file of the cache. It can be used to update kallsyms
+ kernel dso to vmlinux in order to support annotation.
-v::
--verbose::
Be more verbose.
diff --git a/tools/perf/Documentation/perf-diff.txt b/tools/perf/Documentation/perf-diff.txt
index ab7f667de1b..b3b8abae62b 100644
--- a/tools/perf/Documentation/perf-diff.txt
+++ b/tools/perf/Documentation/perf-diff.txt
@@ -3,17 +3,17 @@ perf-diff(1)
NAME
----
-perf-diff - Read two perf.data files and display the differential profile
+perf-diff - Read perf.data files and display the differential profile
SYNOPSIS
--------
[verse]
-'perf diff' [oldfile] [newfile]
+'perf diff' [baseline file] [data file1] [[data file2] ... ]
DESCRIPTION
-----------
-This command displays the performance difference amongst two perf.data files
-captured via perf record.
+This command displays the performance difference amongst two or more perf.data
+files captured via perf record.
If no parameters are passed it will assume perf.data.old and perf.data.
@@ -22,10 +22,6 @@ specified perf.data files.
OPTIONS
-------
--M::
---displacement::
- Show position displacement relative to baseline.
-
-D::
--dump-raw-trace::
Dump raw trace in ASCII.
@@ -37,21 +33,25 @@ OPTIONS
-d::
--dsos=::
Only consider symbols in these dsos. CSV that understands
- file://filename entries.
+ file://filename entries. This option will affect the percentage
+ of the Baseline/Delta column. See --percentage for more info.
-C::
--comms=::
Only consider symbols in these comms. CSV that understands
- file://filename entries.
+ file://filename entries. This option will affect the percentage
+ of the Baseline/Delta column. See --percentage for more info.
-S::
--symbols=::
Only consider these symbols. CSV that understands
- file://filename entries.
+ file://filename entries. This option will affect the percentage
+ of the Baseline/Delta column. See --percentage for more info.
-s::
--sort=::
- Sort by key(s): pid, comm, dso, symbol.
+ Sort by key(s): pid, comm, dso, symbol, cpu, parent, srcline.
+ Please see description of --sort in the perf-report man page.
-t::
--field-separator=::
@@ -72,6 +72,135 @@ OPTIONS
--symfs=<directory>::
Look for files with symbols relative to this directory.
+-b::
+--baseline-only::
+ Show only items with match in baseline.
+
+-c::
+--compute::
+ Differential computation selection - delta,ratio,wdiff (default is delta).
+ See COMPARISON METHODS section for more info.
+
+-p::
+--period::
+ Show period values for both compared hist entries.
+
+-F::
+--formula::
+ Show formula for given computation.
+
+-o::
+--order::
+ Specify compute sorting column number.
+
+--percentage::
+ Determine how to display the overhead percentage of filtered entries.
+ Filters can be applied by --comms, --dsos and/or --symbols options.
+
+ "relative" means it's relative to filtered entries only so that the
+ sum of shown entries will be always 100%. "absolute" means it retains
+ the original value before and after the filter is applied.
+
+COMPARISON
+----------
+The comparison is governed by the baseline file. The baseline perf.data
+file is iterated for samples. All other perf.data files specified on
+the command line are searched for the baseline sample pair. If the pair
+is found, specified computation is made and result is displayed.
+
+All samples from non-baseline perf.data files, that do not match any
+baseline entry, are displayed with empty space within baseline column
+and possible computation results (delta) in their related column.
+
+Example files samples:
+- file A with samples f1, f2, f3, f4, f6
+- file B with samples f2, f4, f5
+- file C with samples f1, f2, f5
+
+Example output:
+ x - computation takes place for pair
+ b - baseline sample percentage
+
+- perf diff A B C
+
+ baseline/A compute/B compute/C samples
+ ---------------------------------------
+ b x f1
+ b x x f2
+ b f3
+ b x f4
+ b f6
+ x x f5
+
+- perf diff B A C
+
+ baseline/B compute/A compute/C samples
+ ---------------------------------------
+ b x x f2
+ b x f4
+ b x f5
+ x x f1
+ x f3
+ x f6
+
+- perf diff C B A
+
+ baseline/C compute/B compute/A samples
+ ---------------------------------------
+ b x f1
+ b x x f2
+ b x f5
+ x f3
+ x x f4
+ x f6
+
+COMPARISON METHODS
+------------------
+delta
+~~~~~
+If specified the 'Delta' column is displayed with value 'd' computed as:
+
+ d = A->period_percent - B->period_percent
+
+with:
+ - A/B being matching hist entry from data/baseline file specified
+ (or perf.data/perf.data.old) respectively.
+
+ - period_percent being the % of the hist entry period value within
+ single data file
+
+ - with filtering by -C, -d and/or -S, period_percent might be changed
+ relative to how entries are filtered. Use --percentage=absolute to
+ prevent such fluctuation.
+
+ratio
+~~~~~
+If specified the 'Ratio' column is displayed with value 'r' computed as:
+
+ r = A->period / B->period
+
+with:
+ - A/B being matching hist entry from data/baseline file specified
+ (or perf.data/perf.data.old) respectively.
+
+ - period being the hist entry period value
+
+wdiff:WEIGHT-B,WEIGHT-A
+~~~~~~~~~~~~~~~~~~~~~~~
+If specified the 'Weighted diff' column is displayed with value 'd' computed as:
+
+ d = B->period * WEIGHT-A - A->period * WEIGHT-B
+
+ - A/B being matching hist entry from data/baseline file specified
+ (or perf.data/perf.data.old) respectively.
+
+ - period being the hist entry period value
+
+ - WEIGHT-A/WEIGHT-B being user suplied weights in the the '-c' option
+ behind ':' separator like '-c wdiff:1,2'.
+ - WIEGHT-A being the weight of the data file
+ - WIEGHT-B being the weight of the baseline data file
+
SEE ALSO
--------
-linkperf:perf-record[1]
+linkperf:perf-record[1], linkperf:perf-report[1]
diff --git a/tools/perf/Documentation/perf-evlist.txt b/tools/perf/Documentation/perf-evlist.txt
index 15217345c2f..1ceb3700ffb 100644
--- a/tools/perf/Documentation/perf-evlist.txt
+++ b/tools/perf/Documentation/perf-evlist.txt
@@ -28,6 +28,10 @@ OPTIONS
--verbose=::
Show all fields.
+-g::
+--group::
+ Show event group information.
+
SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-list[1],
diff --git a/tools/perf/Documentation/perf-inject.txt b/tools/perf/Documentation/perf-inject.txt
index 025630d43cd..a00a34276c5 100644
--- a/tools/perf/Documentation/perf-inject.txt
+++ b/tools/perf/Documentation/perf-inject.txt
@@ -29,6 +29,17 @@ OPTIONS
-v::
--verbose::
Be more verbose.
+-i::
+--input=::
+ Input file name. (default: stdin)
+-o::
+--output=::
+ Output file name. (default: stdout)
+-s::
+--sched-stat::
+ Merge sched_stat and sched_switch for getting events where and how long
+ tasks slept. sched_switch contains a callchain where a task slept and
+ sched_stat contains a timeslice how long a task slept.
SEE ALSO
--------
diff --git a/tools/perf/Documentation/perf-kvm.txt b/tools/perf/Documentation/perf-kvm.txt
index 326f2cb333c..52276a6d2b7 100644
--- a/tools/perf/Documentation/perf-kvm.txt
+++ b/tools/perf/Documentation/perf-kvm.txt
@@ -10,9 +10,10 @@ SYNOPSIS
[verse]
'perf kvm' [--host] [--guest] [--guestmount=<path>
[--guestkallsyms=<path> --guestmodules=<path> | --guestvmlinux=<path>]]
- {top|record|report|diff|buildid-list}
+ {top|record|report|diff|buildid-list} [<options>]
'perf kvm' [--host] [--guest] [--guestkallsyms=<path> --guestmodules=<path>
- | --guestvmlinux=<path>] {top|record|report|diff|buildid-list|stat}
+ | --guestvmlinux=<path>] {top|record|report|diff|buildid-list|stat} [<options>]
+'perf kvm stat [record|report|live] [<options>]
DESCRIPTION
-----------
@@ -23,10 +24,17 @@ There are a couple of variants of perf kvm:
of an arbitrary workload.
'perf kvm record <command>' to record the performance counter profile
- of an arbitrary workload and save it into a perf data file. If both
- --host and --guest are input, the perf data file name is perf.data.kvm.
- If there is no --host but --guest, the file name is perf.data.guest.
- If there is no --guest but --host, the file name is perf.data.host.
+ of an arbitrary workload and save it into a perf data file. We set the
+ default behavior of perf kvm as --guest, so if neither --host nor --guest
+ is input, the perf data file name is perf.data.guest. If --host is input,
+ the perf data file name is perf.data.kvm. If you want to record data into
+ perf.data.host, please input --host --no-guest. The behaviors are shown as
+ following:
+ Default('') -> perf.data.guest
+ --host -> perf.data.kvm
+ --guest -> perf.data.guest
+ --host --guest -> perf.data.kvm
+ --host --no-guest -> perf.data.host
'perf kvm report' to display the performance counter profile information
recorded via perf kvm record.
@@ -36,7 +44,9 @@ There are a couple of variants of perf kvm:
'perf kvm buildid-list' to display the buildids found in a perf data file,
so that other tools can be used to fetch packages with matching symbol tables
- for use by perf report.
+ for use by perf report. As buildid is read from /sys/kernel/notes in os, then
+ if you want to list the buildid for guest, please make sure your perf data file
+ was captured with --guestmount in perf kvm record.
'perf kvm stat <command>' to run a command and gather performance counter
statistics.
@@ -50,17 +60,21 @@ There are a couple of variants of perf kvm:
'perf kvm stat report' reports statistical data which includes events
handled time, samples, and so on.
+ 'perf kvm stat live' reports statistical data in a live mode (similar to
+ record + report but with statistical data updated live at a given display
+ rate).
+
OPTIONS
-------
-i::
---input=::
+--input=<path>::
Input file name.
-o::
---output::
+--output=<path>::
Output file name.
---host=::
+--host::
Collect host side performance profile.
---guest=::
+--guest::
Collect guest side performance profile.
--guestmount=<path>::
Guest os root file system mount directory. Users mounts guest os
@@ -79,19 +93,61 @@ OPTIONS
kernel module information. Users copy it out from guest os.
--guestvmlinux=<path>::
Guest os kernel vmlinux.
+-v::
+--verbose::
+ Be more verbose (show counter open errors, etc).
STAT REPORT OPTIONS
-------------------
--vcpu=<value>::
analyze events which occures on this vcpu. (default: all vcpus)
---events=<value>::
- events to be analyzed. Possible values: vmexit, mmio, ioport.
+--event=<value>::
+ event to be analyzed. Possible values: vmexit, mmio, ioport.
(default: vmexit)
-k::
--key=<value>::
Sorting key. Possible values: sample (default, sort by samples
number), time (sort by average time).
+-p::
+--pid=::
+ Analyze events only for given process ID(s) (comma separated list).
+
+STAT LIVE OPTIONS
+-----------------
+-d::
+--display::
+ Time in seconds between display updates
+
+-m::
+--mmap-pages=::
+ Number of mmap data pages (must be a power of two) or size
+ specification with appended unit character - B/K/M/G. The
+ size is rounded up to have nearest pages power of two value.
+
+-a::
+--all-cpus::
+ System-wide collection from all CPUs.
+
+-p::
+--pid=::
+ Analyze events only for given process ID(s) (comma separated list).
+
+--vcpu=<value>::
+ analyze events which occures on this vcpu. (default: all vcpus)
+
+
+--event=<value>::
+ event to be analyzed. Possible values: vmexit, mmio, ioport.
+ (default: vmexit)
+
+-k::
+--key=<value>::
+ Sorting key. Possible values: sample (default, sort by samples
+ number), time (sort by average time).
+
+--duration=<value>::
+ Show events other than HLT that take longer than duration usecs.
SEE ALSO
--------
diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt
index d1e39dc8c81..6fce6a62220 100644
--- a/tools/perf/Documentation/perf-list.txt
+++ b/tools/perf/Documentation/perf-list.txt
@@ -8,7 +8,7 @@ perf-list - List all symbolic event types
SYNOPSIS
--------
[verse]
-'perf list' [hw|sw|cache|tracepoint|event_glob]
+'perf list' [hw|sw|cache|tracepoint|pmu|event_glob]
DESCRIPTION
-----------
@@ -29,6 +29,8 @@ counted. The following modifiers exist:
G - guest counting (in KVM guests)
H - host counting (not in KVM guests)
p - precise level
+ S - read sample value (PERF_SAMPLE_READ)
+ D - pin the event to the PMU
The 'p' modifier can be used for specifying how precise the instruction
address should be. The 'p' modifier can be specified multiple times:
@@ -104,6 +106,8 @@ To limit the list use:
'subsys_glob:event_glob' to filter by tracepoint subsystems such as sched,
block, etc.
+. 'pmu' to print the kernel supplied PMU events.
+
. If none of the above is matched, it will apply the supplied glob to all
events, printing the ones that match.
diff --git a/tools/perf/Documentation/perf-lock.txt b/tools/perf/Documentation/perf-lock.txt
index c7f5f55634a..ab25be28c9d 100644
--- a/tools/perf/Documentation/perf-lock.txt
+++ b/tools/perf/Documentation/perf-lock.txt
@@ -48,7 +48,7 @@ REPORT OPTIONS
-k::
--key=<value>::
Sorting key. Possible values: acquired (default), contended,
- wait_total, wait_max, wait_min.
+ avg_wait, wait_total, wait_max, wait_min.
INFO OPTIONS
------------
diff --git a/tools/perf/Documentation/perf-mem.txt b/tools/perf/Documentation/perf-mem.txt
new file mode 100644
index 00000000000..1d78a4064da
--- /dev/null
+++ b/tools/perf/Documentation/perf-mem.txt
@@ -0,0 +1,52 @@
+perf-mem(1)
+===========
+
+NAME
+----
+perf-mem - Profile memory accesses
+
+SYNOPSIS
+--------
+[verse]
+'perf mem' [<options>] (record [<command>] | report)
+
+DESCRIPTION
+-----------
+"perf mem -t <TYPE> record" runs a command and gathers memory operation data
+from it, into perf.data. Perf record options are accepted and are passed through.
+
+"perf mem -t <TYPE> report" displays the result. It invokes perf report with the
+right set of options to display a memory access profile.
+
+Note that on Intel systems the memory latency reported is the use-latency,
+not the pure load (or store latency). Use latency includes any pipeline
+queueing delays in addition to the memory subsystem latency.
+
+OPTIONS
+-------
+<command>...::
+ Any command you can specify in a shell.
+
+-t::
+--type=::
+ Select the memory operation type: load or store (default: load)
+
+-D::
+--dump-raw-samples=::
+ Dump the raw decoded samples on the screen in a format that is easy to parse with
+ one sample per line.
+
+-x::
+--field-separator::
+ Specify the field separator used when dump raw samples (-D option). By default,
+ The separator is the space character.
+
+-C::
+--cpu-list::
+ Restrict dump of raw samples to those provided via this option. Note that the same
+ option can be passed in record mode. It will be interpreted the same way as perf
+ record.
+
+SEE ALSO
+--------
+linkperf:perf-record[1], linkperf:perf-report[1]
diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt
index b715cb71592..1513935c399 100644
--- a/tools/perf/Documentation/perf-probe.txt
+++ b/tools/perf/Documentation/perf-probe.txt
@@ -136,6 +136,8 @@ Each probe argument follows below syntax.
'NAME' specifies the name of this argument (optional). You can use the name of local variable, local data structure member (e.g. var->field, var.field2), local array with fixed index (e.g. array[1], var->array[0], var->pointer[2]), or kprobe-tracer argument format (e.g. $retval, %ax, etc). Note that the name of this argument will be set as the last member name if you specify a local data structure member (e.g. field2 for 'var->field1.field2'.)
'TYPE' casts the type of this argument (optional). If omitted, perf probe automatically set the type based on debuginfo. You can specify 'string' type only for the local variable or structure member which is an array of or a pointer to 'char' or 'unsigned char' type.
+On x86 systems %REG is always the short form of the register: for example %AX. %RAX or %EAX is not valid.
+
LINE SYNTAX
-----------
Line range is described by following syntax.
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index b38a1f9ad46..d460049cae8 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -57,6 +57,8 @@ OPTIONS
-t::
--tid=::
Record events on existing thread ID (comma separated list).
+ This option also disables inheritance by default. Enable it by adding
+ --inherit.
-u::
--uid=::
@@ -65,16 +67,9 @@ OPTIONS
-r::
--realtime=::
Collect data with this RT SCHED_FIFO priority.
--D::
---no-delay::
- Collect data without buffering.
--A::
---append::
- Append to the output file to do incremental profiling.
--f::
---force::
- Overwrite existing data file. (deprecated)
+--no-buffering::
+ Collect data without buffering.
-c::
--count=::
@@ -93,11 +88,25 @@ OPTIONS
-m::
--mmap-pages=::
- Number of mmap data pages. Must be a power of two.
+ Number of mmap data pages (must be a power of two) or size
+ specification with appended unit character - B/K/M/G. The
+ size is rounded up to have nearest pages power of two value.
-g::
+ Enables call-graph (stack chain/backtrace) recording.
+
--call-graph::
- Do call-graph (stack chain/backtrace) recording.
+ Setup and enable call-graph (stack chain/backtrace) recording,
+ implies -g.
+
+ Allows specifying "fp" (frame pointer) or "dwarf"
+ (DWARF's CFI - Call Frame Information) as the method to collect
+ the information used to show the call graphs.
+
+ In some systems, where binaries are build with gcc
+ --fomit-frame-pointer, using the "fp" method will produce bogus
+ call graphs, using "dwarf", if available (perf tools linked to
+ the libunwind library) should be used instead.
-q::
--quiet::
@@ -172,16 +181,39 @@ following filters are defined:
- u: only when the branch target is at the user level
- k: only when the branch target is in the kernel
- hv: only when the target is at the hypervisor level
+ - in_tx: only when the target is in a hardware transaction
+ - no_tx: only when the target is not in a hardware transaction
+ - abort_tx: only when the target is a hardware transaction abort
+ - cond: conditional branches
+
-The option requires at least one branch type among any, any_call, any_ret, ind_call.
-The privilege levels may be ommitted, in which case, the privilege levels of the associated
+The option requires at least one branch type among any, any_call, any_ret, ind_call, cond.
+The privilege levels may be omitted, in which case, the privilege levels of the associated
event are applied to the branch filter. Both kernel (k) and hypervisor (hv) privilege
levels are subject to permissions. When sampling on multiple events, branch stack sampling
is enabled for all the sampling events. The sampled branch type is the same for all events.
The various filters must be specified as a comma separated list: --branch-filter any_ret,u,k
Note that this feature may not be available on all processors.
+--weight::
+Enable weightened sampling. An additional weight is recorded per sample and can be
+displayed with the weight and local_weight sort keys. This currently works for TSX
+abort events and some memory events in precise mode on modern Intel CPUs.
+
+--transaction::
+Record transaction flags for transaction related events.
+
+--per-thread::
+Use per-thread mmaps. By default per-cpu mmaps are created. This option
+overrides that and uses per-thread mmaps. A side-effect of that is that
+inheritance is automatically disabled. --per-thread is ignored with a warning
+if combined with -a or -C options.
+
+-D::
+--delay=::
+After starting the program, wait msecs before measuring. This is useful to
+filter out the startup phase of the program, which is often very different.
+
SEE ALSO
--------
linkperf:perf-stat[1], linkperf:perf-list[1]
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index f4d91bebd59..d2b59af62bc 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -25,10 +25,6 @@ OPTIONS
--verbose::
Be more verbose. (show symbol address, etc)
--d::
---dsos=::
- Only consider symbols in these dsos. CSV that understands
- file://filename entries.
-n::
--show-nr-samples::
Show the number of samples for each symbol
@@ -42,11 +38,18 @@ OPTIONS
-c::
--comms=::
Only consider symbols in these comms. CSV that understands
- file://filename entries.
+ file://filename entries. This option will affect the percentage of
+ the overhead column. See --percentage for more info.
+-d::
+--dsos=::
+ Only consider symbols in these dsos. CSV that understands
+ file://filename entries. This option will affect the percentage of
+ the overhead column. See --percentage for more info.
-S::
--symbols=::
Only consider these symbols. CSV that understands
- file://filename entries.
+ file://filename entries. This option will affect the percentage of
+ the overhead column. See --percentage for more info.
--symbol-filter=::
Only show symbols that match (partially) with this filter.
@@ -57,11 +60,85 @@ OPTIONS
-s::
--sort=::
- Sort by key(s): pid, comm, dso, symbol, parent, srcline.
+ Sort histogram entries by given key(s) - multiple keys can be specified
+ in CSV format. Following sort keys are available:
+ pid, comm, dso, symbol, parent, cpu, srcline, weight, local_weight.
+
+ Each key has following meaning:
+
+ - comm: command (name) of the task which can be read via /proc/<pid>/comm
+ - pid: command and tid of the task
+ - dso: name of library or module executed at the time of sample
+ - symbol: name of function executed at the time of sample
+ - parent: name of function matched to the parent regex filter. Unmatched
+ entries are displayed as "[other]".
+ - cpu: cpu number the task ran at the time of sample
+ - srcline: filename and line number executed at the time of sample. The
+ DWARF debugging info must be provided.
+ - weight: Event specific weight, e.g. memory latency or transaction
+ abort cost. This is the global weight.
+ - local_weight: Local weight version of the weight above.
+ - transaction: Transaction abort flags.
+ - overhead: Overhead percentage of sample
+ - overhead_sys: Overhead percentage of sample running in system mode
+ - overhead_us: Overhead percentage of sample running in user mode
+ - overhead_guest_sys: Overhead percentage of sample running in system mode
+ on guest machine
+ - overhead_guest_us: Overhead percentage of sample running in user mode on
+ guest machine
+ - sample: Number of sample
+ - period: Raw number of event count of sample
+
+ By default, comm, dso and symbol keys are used.
+ (i.e. --sort comm,dso,symbol)
+
+ If --branch-stack option is used, following sort keys are also
+ available:
+ dso_from, dso_to, symbol_from, symbol_to, mispredict.
+
+ - dso_from: name of library or module branched from
+ - dso_to: name of library or module branched to
+ - symbol_from: name of function branched from
+ - symbol_to: name of function branched to
+ - mispredict: "N" for predicted branch, "Y" for mispredicted branch
+ - in_tx: branch in TSX transaction
+ - abort: TSX transaction abort.
+
+ And default sort keys are changed to comm, dso_from, symbol_from, dso_to
+ and symbol_to, see '--branch-stack'.
+
+-F::
+--fields=::
+ Specify output field - multiple keys can be specified in CSV format.
+ Following fields are available:
+ overhead, overhead_sys, overhead_us, overhead_children, sample and period.
+ Also it can contain any sort key(s).
+
+ By default, every sort keys not specified in -F will be appended
+ automatically.
+
+ If --mem-mode option is used, following sort keys are also available
+ (incompatible with --branch-stack):
+ symbol_daddr, dso_daddr, locked, tlb, mem, snoop, dcacheline.
+
+ - symbol_daddr: name of data symbol being executed on at the time of sample
+ - dso_daddr: name of library or module containing the data being executed
+ on at the time of sample
+ - locked: whether the bus was locked at the time of sample
+ - tlb: type of tlb access for the data at the time of sample
+ - mem: type of memory access for the data at the time of sample
+ - snoop: type of snoop (if any) for the data at the time of sample
+ - dcacheline: the cacheline the data address is on at the time of sample
+
+ And default sort keys are changed to local_weight, mem, sym, dso,
+ symbol_daddr, dso_daddr, snoop, tlb, locked, see '--mem-mode'.
-p::
--parent=<regex>::
- regex filter to identify parent, see: '--sort parent'
+ A regex filter to identify parent. The parent is a caller of this
+ function and searched through the callchain, thus it requires callchain
+ information recorded. The pattern is in the exteneded regex format and
+ defaults to "\^sys_|^do_page_fault", see '--sort parent'.
-x::
--exclude-other::
@@ -74,7 +151,6 @@ OPTIONS
-t::
--field-separator=::
-
Use a special separator character and don't pad with spaces, replacing
all occurrences of this separator in symbol names (and other output)
with a '.' character, that thus it's the only non valid separator.
@@ -83,7 +159,7 @@ OPTIONS
--dump-raw-trace::
Dump raw trace in ASCII.
--g [type,min[,limit],order]::
+-g [type,min[,limit],order[,key]]::
--call-graph::
Display call chains using type, min percent threshold, optional print
limit and order.
@@ -97,12 +173,34 @@ OPTIONS
- callee: callee based call graph.
- caller: inverted caller based call graph.
- Default: fractal,0.5,callee.
+ key can be:
+ - function: compare on functions
+ - address: compare on individual code addresses
+
+ Default: fractal,0.5,callee,function.
+
+--children::
+ Accumulate callchain of children to parent entry so that then can
+ show up in the output. The output will have a new "Children" column
+ and will be sorted on the data. It requires callchains are recorded.
+
+--max-stack::
+ Set the stack depth limit when parsing the callchain, anything
+ beyond the specified depth will be ignored. This is a trade-off
+ between information loss and faster processing especially for
+ workloads that can have a very long callchain stack.
+
+ Default: 127
-G::
--inverted::
alias for inverted caller based call graph.
+--ignore-callees=<regex>::
+ Ignore callees of the function(s) matching the given regex.
+ This has the effect of collecting the callers of each such
+ function into one place in the call-graph tree.
+
--pretty=<key>::
Pretty printing style. key: normal, raw
@@ -171,6 +269,42 @@ OPTIONS
--objdump=<path>::
Path to objdump binary.
+--group::
+ Show event group information together.
+
+--demangle::
+ Demangle symbol names to human readable form. It's enabled by default,
+ disable with --no-demangle.
+
+--mem-mode::
+ Use the data addresses of samples in addition to instruction addresses
+ to build the histograms. To generate meaningful output, the perf.data
+ file must have been obtained using perf record -d -W and using a
+ special event -e cpu/mem-loads/ or -e cpu/mem-stores/. See
+ 'perf mem' for simpler access.
+
+--percent-limit::
+ Do not show entries which have an overhead under that percent.
+ (Default: 0).
+
+--percentage::
+ Determine how to display the overhead percentage of filtered entries.
+ Filters can be applied by --comms, --dsos and/or --symbols options and
+ Zoom operations on the TUI (thread, dso, etc).
+
+ "relative" means it's relative to filtered entries only so that the
+ sum of shown entries will be always 100%. "absolute" means it retains
+ the original value before and after the filter is applied.
+
+--header::
+ Show header information in the perf.data file. This includes
+ various information like hostname, OS and perf version, cpu/mem
+ info, perf command line, event list and so on. Currently only
+ --stdio output supports this feature.
+
+--header-only::
+ Show only perf.data header (forces --stdio).
+
SEE ALSO
--------
linkperf:perf-stat[1], linkperf:perf-annotate[1]
diff --git a/tools/perf/Documentation/perf-script-python.txt b/tools/perf/Documentation/perf-script-python.txt
index a4027f221a5..9f1f054b843 100644
--- a/tools/perf/Documentation/perf-script-python.txt
+++ b/tools/perf/Documentation/perf-script-python.txt
@@ -336,7 +336,6 @@ scripts listed by the 'perf script -l' command e.g.:
----
root@tropicana:~# perf script -l
List of available trace scripts:
- workqueue-stats workqueue stats (ins/exe/create/destroy)
wakeup-latency system-wide min/max/avg wakeup latency
rw-by-file <comm> r/w activity for a program, by file
rw-by-pid system-wide r/w activity
@@ -402,7 +401,6 @@ should show a new entry for your script:
----
root@tropicana:~# perf script -l
List of available trace scripts:
- workqueue-stats workqueue stats (ins/exe/create/destroy)
wakeup-latency system-wide min/max/avg wakeup latency
rw-by-file <comm> r/w activity for a program, by file
rw-by-pid system-wide r/w activity
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index e9cbfcddfa3..05f9a0a6784 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -115,7 +115,7 @@ OPTIONS
-f::
--fields::
Comma separated list of fields to print. Options are:
- comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff.
+ comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff, srcline.
Field list can be prepended with the type, trace, sw or hw,
to indicate to which event type the field list applies.
e.g., -f sw:comm,tid,time,ip,sym and -f trace:time,cpu,trace
@@ -203,6 +203,18 @@ OPTIONS
--show-kernel-path::
Try to resolve the path of [kernel.kallsyms]
+--show-task-events
+ Display task related events (e.g. FORK, COMM, EXIT).
+
+--show-mmap-events
+ Display mmap related events (e.g. MMAP, MMAP2).
+
+--header
+ Show perf.data header.
+
+--header-only
+ Show only perf.data header.
+
SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-script-perl[1],
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 2fa173b5197..29ee857c09c 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -52,7 +52,7 @@ OPTIONS
-r::
--repeat=<n>::
- repeat command and print average + stddev (max: 100)
+ repeat command and print average + stddev (max: 100). 0 means forever.
-B::
--big-num::
@@ -108,7 +108,39 @@ with it. --append may be used here. Examples:
3>results perf stat --log-fd 3 -- $cmd
3>>results perf stat --log-fd 3 --append -- $cmd
+--pre::
+--post::
+ Pre and post measurement hooks, e.g.:
+perf stat --repeat 10 --null --sync --pre 'make -s O=defconfig-build/clean' -- make -s -j64 O=defconfig-build/ bzImage
+
+-I msecs::
+--interval-print msecs::
+ Print count deltas every N milliseconds (minimum: 100ms)
+ example: perf stat -I 1000 -e cycles -a sleep 5
+
+--per-socket::
+Aggregate counts per processor socket for system-wide mode measurements. This
+is a useful mode to detect imbalance between sockets. To enable this mode,
+use --per-socket in addition to -a. (system-wide). The output includes the
+socket number and the number of online processors on that socket. This is
+useful to gauge the amount of aggregation.
+
+--per-core::
+Aggregate counts per physical processor for system-wide mode measurements. This
+is a useful mode to detect imbalance between physical cores. To enable this mode,
+use --per-core in addition to -a. (system-wide). The output includes the
+core number and the number of online logical processors on that physical processor.
+
+-D msecs::
+--delay msecs::
+After starting the program, wait msecs before measuring. This is useful to
+filter out the startup phase of the program, which is often very different.
+
+-T::
+--transaction::
+
+Print statistics of transactional execution if supported.
EXAMPLES
--------
diff --git a/tools/perf/Documentation/perf-test.txt b/tools/perf/Documentation/perf-test.txt
index b24ac40fcd5..d1d3e5121f8 100644
--- a/tools/perf/Documentation/perf-test.txt
+++ b/tools/perf/Documentation/perf-test.txt
@@ -23,6 +23,10 @@ from 'perf test list'.
OPTIONS
-------
+-s::
+--skip::
+ Tests to skip (comma separater numeric list).
+
-v::
--verbose::
Be more verbose.
diff --git a/tools/perf/Documentation/perf-timechart.txt b/tools/perf/Documentation/perf-timechart.txt
index 1632b0efc75..5e0f986dff3 100644
--- a/tools/perf/Documentation/perf-timechart.txt
+++ b/tools/perf/Documentation/perf-timechart.txt
@@ -8,7 +8,7 @@ perf-timechart - Tool to visualize total system behavior during a workload
SYNOPSIS
--------
[verse]
-'perf timechart' {record}
+'perf timechart' [<timechart options>] {record} [<record options>]
DESCRIPTION
-----------
@@ -20,8 +20,8 @@ There are two variants of perf timechart:
'perf timechart' to turn a trace into a Scalable Vector Graphics file,
that can be viewed with popular SVG viewers such as 'Inkscape'.
-OPTIONS
--------
+TIMECHART OPTIONS
+-----------------
-o::
--output=::
Select the output file (default: output.svg)
@@ -34,12 +34,58 @@ OPTIONS
-P::
--power-only::
Only output the CPU power section of the diagram
+-T::
+--tasks-only::
+ Don't output processor state transitions
-p::
--process::
Select the processes to display, by name or PID
--symfs=<directory>::
Look for files with symbols relative to this directory.
+-n::
+--proc-num::
+ Print task info for at least given number of tasks.
+-t::
+--topology::
+ Sort CPUs according to topology.
+--highlight=<duration_nsecs|task_name>::
+ Highlight tasks (using different color) that run more than given
+ duration or tasks with given name. If number is given it's interpreted
+ as number of nanoseconds. If non-numeric string is given it's
+ interpreted as task name.
+
+RECORD OPTIONS
+--------------
+-P::
+--power-only::
+ Record only power-related events
+-T::
+--tasks-only::
+ Record only tasks-related events
+-g::
+--callchain::
+ Do call-graph (stack chain/backtrace) recording
+
+EXAMPLES
+--------
+
+$ perf timechart record git pull
+
+ [ perf record: Woken up 13 times to write data ]
+ [ perf record: Captured and wrote 4.253 MB perf.data (~185801 samples) ]
+
+$ perf timechart
+
+ Written 10.2 seconds of trace to output.svg.
+
+Record system-wide timechart:
+
+ $ perf timechart record
+
+ then generate timechart and highlight 'gcc' tasks:
+
+ $ perf timechart --highlight gcc
SEE ALSO
--------
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index 5b80d84d6b4..180ae02137a 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -50,7 +50,6 @@ Default is to monitor all CPUS.
--count-filter=<count>::
Only display functions with more events than this.
--g::
--group::
Put the counters into a counter group.
@@ -60,7 +59,7 @@ Default is to monitor all CPUS.
-i::
--inherit::
- Child tasks inherit counters, only makes sens with -p option.
+ Child tasks do not inherit counters.
-k <path>::
--vmlinux=<path>::
@@ -68,7 +67,9 @@ Default is to monitor all CPUS.
-m <pages>::
--mmap-pages=<pages>::
- Number of mmapped data pages.
+ Number of mmap data pages (must be a power of two) or size
+ specification with appended unit character - B/K/M/G. The
+ size is rounded up to have nearest pages power of two value.
-p <pid>::
--pid=<pid>::
@@ -86,7 +87,6 @@ Default is to monitor all CPUS.
--realtime=<priority>::
Collect data with this RT SCHED_FIFO priority.
--s <symbol>::
--sym-annotate=<symbol>::
Annotate this symbol.
@@ -112,7 +112,18 @@ Default is to monitor all CPUS.
-s::
--sort::
- Sort by key(s): pid, comm, dso, symbol, parent, srcline.
+ Sort by key(s): pid, comm, dso, symbol, parent, srcline, weight,
+ local_weight, abort, in_tx, transaction, overhead, sample, period.
+ Please see description of --sort in the perf-report man page.
+
+--fields=::
+ Specify output field - multiple keys can be specified in CSV format.
+ Following fields are available:
+ overhead, overhead_sys, overhead_us, overhead_children, sample and period.
+ Also it can contain any sort key(s).
+
+ By default, every sort keys not specified in --field will be appended
+ automatically.
-n::
--show-nr-samples::
@@ -122,13 +133,16 @@ Default is to monitor all CPUS.
Show a column with the sum of periods.
--dsos::
- Only consider symbols in these dsos.
+ Only consider symbols in these dsos. This option will affect the
+ percentage of the overhead column. See --percentage for more info.
--comms::
- Only consider symbols in these comms.
+ Only consider symbols in these comms. This option will affect the
+ percentage of the overhead column. See --percentage for more info.
--symbols::
- Only consider these symbols.
+ Only consider these symbols. This option will affect the
+ percentage of the overhead column. See --percentage for more info.
-M::
--disassembler-style=:: Set disassembler style for objdump.
@@ -140,20 +154,44 @@ Default is to monitor all CPUS.
--asm-raw::
Show raw instruction encoding of assembly instructions.
--G [type,min,order]::
+-g::
+ Enables call-graph (stack chain/backtrace) recording.
+
--call-graph::
- Display call chains using type, min percent threshold and order.
- type can be either:
- - flat: single column, linear exposure of call chains.
- - graph: use a graph tree, displaying absolute overhead rates.
- - fractal: like graph, but displays relative rates. Each branch of
- the tree is considered as a new profiled object.
+ Setup and enable call-graph (stack chain/backtrace) recording,
+ implies -g.
+
+--children::
+ Accumulate callchain of children to parent entry so that then can
+ show up in the output. The output will have a new "Children" column
+ and will be sorted on the data. It requires -g/--call-graph option
+ enabled.
+
+--max-stack::
+ Set the stack depth limit when parsing the callchain, anything
+ beyond the specified depth will be ignored. This is a trade-off
+ between information loss and faster processing especially for
+ workloads that can have a very long callchain stack.
+
+ Default: 127
+
+--ignore-callees=<regex>::
+ Ignore callees of the function(s) matching the given regex.
+ This has the effect of collecting the callers of each such
+ function into one place in the call-graph tree.
+
+--percent-limit::
+ Do not show entries which have an overhead under that percent.
+ (Default: 0).
- order can be either:
- - callee: callee based call graph.
- - caller: inverted caller based call graph.
+--percentage::
+ Determine how to display the overhead percentage of filtered entries.
+ Filters can be applied by --comms, --dsos and/or --symbols options and
+ Zoom operations on the TUI (thread, dso, etc).
- Default: fractal,0.5,callee.
+ "relative" means it's relative to filtered entries only so that the
+ sum of shown entries will be always 100%. "absolute" means it retains
+ the original value before and after the filter is applied.
INTERACTIVE PROMPTING KEYS
--------------------------
@@ -190,4 +228,4 @@ Pressing any unmapped key displays a menu, and prompts for input.
SEE ALSO
--------
-linkperf:perf-stat[1], linkperf:perf-list[1]
+linkperf:perf-stat[1], linkperf:perf-list[1], linkperf:perf-report[1]
diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
index 3a2ae37310a..fae38d9a44a 100644
--- a/tools/perf/Documentation/perf-trace.txt
+++ b/tools/perf/Documentation/perf-trace.txt
@@ -9,6 +9,7 @@ SYNOPSIS
--------
[verse]
'perf trace'
+'perf trace record'
DESCRIPTION
-----------
@@ -16,38 +17,96 @@ This command will show the events associated with the target, initially
syscalls, but other system events like pagefaults, task lifetime events,
scheduling events, etc.
-Initially this is a live mode only tool, but eventually will work with
-perf.data files like the other tools, allowing a detached 'record' from
-analysis phases.
+This is a live mode tool in addition to working with perf.data files like
+the other perf tools. Files can be generated using the 'perf record' command
+but the session needs to include the raw_syscalls events (-e 'raw_syscalls:*').
+Alernatively, the 'perf trace record' can be used as a shortcut to
+automatically include the raw_syscalls events when writing events to a file.
+
+The following options apply to perf trace; options to perf trace record are
+found in the perf record man page.
OPTIONS
-------
+-a::
--all-cpus::
System-wide collection from all CPUs.
+-e::
+--expr::
+ List of events to show, currently only syscall names.
+ Prefixing with ! shows all syscalls but the ones specified. You may
+ need to escape it.
+
+-o::
+--output=::
+ Output file name.
+
-p::
--pid=::
Record events on existing process ID (comma separated list).
+-t::
--tid=::
Record events on existing thread ID (comma separated list).
+-u::
--uid=::
Record events in threads owned by uid. Name or number.
+-v::
+--verbose=::
+ Verbosity level.
+
+-i::
--no-inherit::
Child tasks do not inherit counters.
+-m::
--mmap-pages=::
- Number of mmap data pages. Must be a power of two.
+ Number of mmap data pages (must be a power of two) or size
+ specification with appended unit character - B/K/M/G. The
+ size is rounded up to have nearest pages power of two value.
+-C::
--cpu::
Collect samples only on the list of CPUs provided. Multiple CPUs can be provided as a
comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
In per-thread mode with inheritance mode on (default), Events are captured only when
the thread executes on the designated CPUs. Default is to monitor all CPUs.
+--duration:
+ Show only events that had a duration greater than N.M ms.
+
+--sched:
+ Accrue thread runtime and provide a summary at the end of the session.
+
+-i
+--input
+ Process events from a given perf data file.
+
+-T
+--time
+ Print full timestamp rather time relative to first sample.
+
+--comm::
+ Show process COMM right beside its ID, on by default, disable with --no-comm.
+
+-s::
+--summary::
+ Show only a summary of syscalls by thread with min, max, and average times
+ (in msec) and relative stddev.
+
+-S::
+--with-summary::
+ Show all syscalls followed by a summary by thread with min, max, and
+ average times (in msec) and relative stddev.
+
+--tool_stats::
+ Show tool stats such as number of times fd->pathname was discovered thru
+ hooking the open syscall return + vfs_getname or via reading /proc/pid/fd, etc.
+
SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-script[1]
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index 80db3f4bcf7..45da209b6ed 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -1,6 +1,14 @@
tools/perf
tools/scripts
tools/lib/traceevent
+tools/lib/api
+tools/lib/symbol/kallsyms.c
+tools/lib/symbol/kallsyms.h
+tools/include/asm/bug.h
+tools/include/linux/compiler.h
+tools/include/linux/hash.h
+tools/include/linux/export.h
+tools/include/linux/types.h
include/linux/const.h
include/linux/perf_event.h
include/linux/rbtree.h
@@ -11,11 +19,21 @@ lib/rbtree.c
include/linux/swab.h
arch/*/include/asm/unistd*.h
arch/*/include/asm/perf_regs.h
+arch/*/include/uapi/asm/unistd*.h
+arch/*/include/uapi/asm/perf_regs.h
arch/*/lib/memcpy*.S
arch/*/lib/memset*.S
include/linux/poison.h
include/linux/magic.h
include/linux/hw_breakpoint.h
+include/linux/rbtree_augmented.h
+include/uapi/linux/perf_event.h
+include/uapi/linux/const.h
+include/uapi/linux/swab.h
+include/uapi/linux/hw_breakpoint.h
arch/x86/include/asm/svm.h
arch/x86/include/asm/vmx.h
arch/x86/include/asm/kvm_host.h
+arch/x86/include/uapi/asm/svm.h
+arch/x86/include/uapi/asm/vmx.h
+arch/x86/include/uapi/asm/kvm.h
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 00deed4d615..cb2e5868c8e 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -1,1079 +1,90 @@
-include ../scripts/Makefile.include
-
-# The default target of this Makefile is...
-all:
-
-include config/utilities.mak
-
-# Define V to have a more verbose compile.
-#
-# Define O to save output files in a separate directory.
-#
-# Define ARCH as name of target architecture if you want cross-builds.
-#
-# Define CROSS_COMPILE as prefix name of compiler if you want cross-builds.
-#
-# Define NO_LIBPERL to disable perl script extension.
#
-# Define NO_LIBPYTHON to disable python script extension.
+# This is a simple wrapper Makefile that calls the main Makefile.perf
+# with a -j option to do parallel builds
#
-# Define PYTHON to point to the python binary if the default
-# `python' is not correct; for example: PYTHON=python2
+# If you want to invoke the perf build in some non-standard way then
+# you can use the 'make -f Makefile.perf' method to invoke it.
#
-# Define PYTHON_CONFIG to point to the python-config binary if
-# the default `$(PYTHON)-config' is not correct.
-#
-# Define ASCIIDOC8 if you want to format documentation with AsciiDoc 8
-#
-# Define DOCBOOK_XSL_172 if you want to format man pages with DocBook XSL v1.72.
-#
-# Define LDFLAGS=-static to build a static binary.
-#
-# Define EXTRA_CFLAGS=-m64 or EXTRA_CFLAGS=-m32 as appropriate for cross-builds.
-#
-# Define NO_DWARF if you do not want debug-info analysis feature at all.
+
#
-# Define WERROR=0 to disable treating any warnings as errors.
+# Clear out the built-in rules GNU make defines by default (such as .o targets),
+# so that we pass through all targets to Makefile.perf:
#
-# Define NO_NEWT if you do not want TUI support.
+.SUFFIXES:
+
#
-# Define NO_GTK2 if you do not want GTK+ GUI support.
+# We don't want to pass along options like -j:
#
-# Define NO_DEMANGLE if you do not want C++ symbol demangling.
+unexport MAKEFLAGS
+
#
-# Define NO_LIBELF if you do not want libelf dependency (e.g. cross-builds)
+# Do a parallel build with multiple jobs, based on the number of CPUs online
+# in this system: 'make -j8' on a 8-CPU system, etc.
#
-# Define NO_LIBUNWIND if you do not want libunwind dependency for dwarf
-# backtrace post unwind.
+# (To override it, run 'make JOBS=1' and similar.)
#
-# Define NO_BACKTRACE if you do not want stack backtrace debug feature
-
-$(OUTPUT)PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE
- @$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT)
--include $(OUTPUT)PERF-VERSION-FILE
-
-uname_M := $(shell uname -m 2>/dev/null || echo not)
-
-ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ \
- -e s/arm.*/arm/ -e s/sa110/arm/ \
- -e s/s390x/s390/ -e s/parisc64/parisc/ \
- -e s/ppc.*/powerpc/ -e s/mips.*/mips/ \
- -e s/sh[234].*/sh/ )
-NO_PERF_REGS := 1
-
-CC = $(CROSS_COMPILE)gcc
-AR = $(CROSS_COMPILE)ar
-
-# Additional ARCH settings for x86
-ifeq ($(ARCH),i386)
- override ARCH := x86
- NO_PERF_REGS := 0
- LIBUNWIND_LIBS = -lunwind -lunwind-x86
-endif
-ifeq ($(ARCH),x86_64)
- override ARCH := x86
- IS_X86_64 := 0
- ifeq (, $(findstring m32,$(EXTRA_CFLAGS)))
- IS_X86_64 := $(shell echo __x86_64__ | ${CC} -E -x c - | tail -n 1)
- endif
- ifeq (${IS_X86_64}, 1)
- RAW_ARCH := x86_64
- ARCH_CFLAGS := -DARCH_X86_64
- ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memset_64.S
- endif
- NO_PERF_REGS := 0
- LIBUNWIND_LIBS = -lunwind -lunwind-x86_64
+ifeq ($(JOBS),)
+ JOBS := $(shell grep -c ^processor /proc/cpuinfo 2>/dev/null)
+ ifeq ($(JOBS),)
+ JOBS := 1
+ endif
endif
-# Treat warnings as errors unless directed not to
-ifneq ($(WERROR),0)
- CFLAGS_WERROR := -Werror
+#
+# Only pass canonical directory names as the output directory:
+#
+ifneq ($(O),)
+ FULL_O := $(shell readlink -f $(O) || echo $(O))
endif
+#
+# Only accept the 'DEBUG' variable from the command line:
+#
ifeq ("$(origin DEBUG)", "command line")
- PERF_DEBUG = $(DEBUG)
-endif
-ifndef PERF_DEBUG
- CFLAGS_OPTIMIZE = -O6 -D_FORTIFY_SOURCE=2
-endif
-
-ifdef PARSER_DEBUG
- PARSER_DEBUG_BISON := -t
- PARSER_DEBUG_FLEX := -d
- PARSER_DEBUG_CFLAGS := -DPARSER_DEBUG
-endif
-
-CFLAGS = -fno-omit-frame-pointer -ggdb3 -funwind-tables -Wall -Wextra -std=gnu99 $(CFLAGS_WERROR) $(CFLAGS_OPTIMIZE) $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) $(PARSER_DEBUG_CFLAGS)
-EXTLIBS = -lpthread -lrt -lelf -lm
-ALL_CFLAGS = $(CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE
-ALL_LDFLAGS = $(LDFLAGS)
-STRIP ?= strip
-
-# Among the variables below, these:
-# perfexecdir
-# template_dir
-# mandir
-# infodir
-# htmldir
-# ETC_PERFCONFIG (but not sysconfdir)
-# can be specified as a relative path some/where/else;
-# this is interpreted as relative to $(prefix) and "perf" at
-# runtime figures out where they are based on the path to the executable.
-# This can help installing the suite in a relocatable way.
-
-# Make the path relative to DESTDIR, not to prefix
-ifndef DESTDIR
-prefix = $(HOME)
-endif
-bindir_relative = bin
-bindir = $(prefix)/$(bindir_relative)
-mandir = share/man
-infodir = share/info
-perfexecdir = libexec/perf-core
-sharedir = $(prefix)/share
-template_dir = share/perf-core/templates
-htmldir = share/doc/perf-doc
-ifeq ($(prefix),/usr)
-sysconfdir = /etc
-ETC_PERFCONFIG = $(sysconfdir)/perfconfig
-else
-sysconfdir = $(prefix)/etc
-ETC_PERFCONFIG = etc/perfconfig
-endif
-lib = lib
-
-export prefix bindir sharedir sysconfdir
-
-RM = rm -f
-MKDIR = mkdir
-FIND = find
-INSTALL = install
-
-# sparse is architecture-neutral, which means that we need to tell it
-# explicitly what architecture to check for. Fix this up for yours..
-SPARSE_FLAGS = -D__BIG_ENDIAN__ -D__powerpc__
-
--include config/feature-tests.mak
-
-ifeq ($(call try-cc,$(SOURCE_HELLO),-Werror -fstack-protector-all),y)
- CFLAGS := $(CFLAGS) -fstack-protector-all
-endif
-
-ifeq ($(call try-cc,$(SOURCE_HELLO),-Werror -Wstack-protector),y)
- CFLAGS := $(CFLAGS) -Wstack-protector
-endif
-
-ifeq ($(call try-cc,$(SOURCE_HELLO),-Werror -Wvolatile-register-var),y)
- CFLAGS := $(CFLAGS) -Wvolatile-register-var
-endif
-
-### --- END CONFIGURATION SECTION ---
-
-BASIC_CFLAGS = -Iutil/include -Iarch/$(ARCH)/include -I$(OUTPUT)util -I$(TRACE_EVENT_DIR) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE
-BASIC_LDFLAGS =
-
-# Guard against environment variables
-BUILTIN_OBJS =
-LIB_H =
-LIB_OBJS =
-PYRF_OBJS =
-SCRIPT_SH =
-
-SCRIPT_SH += perf-archive.sh
-
-grep-libs = $(filter -l%,$(1))
-strip-libs = $(filter-out -l%,$(1))
-
-TRACE_EVENT_DIR = ../lib/traceevent/
-
-ifneq ($(OUTPUT),)
- TE_PATH=$(OUTPUT)
+ ifeq ($(DEBUG),)
+ override DEBUG = 0
+ else
+ SET_DEBUG = "DEBUG=$(DEBUG)"
+ endif
else
- TE_PATH=$(TRACE_EVENT_DIR)
+ override DEBUG = 0
endif
-LIBTRACEEVENT = $(TE_PATH)libtraceevent.a
-TE_LIB := -L$(TE_PATH) -ltraceevent
-
-PYTHON_EXT_SRCS := $(shell grep -v ^\# util/python-ext-sources)
-PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py
+define print_msg
+ @printf ' BUILD: Doing '\''make \033[33m-j'$(JOBS)'\033[m'\'' parallel build\n'
+endef
-export LIBTRACEEVENT
+define make
+ @$(MAKE) -f Makefile.perf --no-print-directory -j$(JOBS) O=$(FULL_O) $(SET_DEBUG) $@
+endef
-$(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS)
- $(QUIET_GEN)CFLAGS='$(BASIC_CFLAGS)' $(PYTHON_WORD) util/setup.py \
- --quiet build_ext; \
- mkdir -p $(OUTPUT)python && \
- cp $(PYTHON_EXTBUILD_LIB)perf.so $(OUTPUT)python/
#
-# No Perl scripts right now:
+# Needed if no target specified:
+# (Except for tags and TAGS targets. The reason is that the
+# Makefile does not treat tags/TAGS as targets but as files
+# and thus won't rebuilt them once they are in place.)
#
-
-SCRIPTS = $(patsubst %.sh,%,$(SCRIPT_SH))
+all tags TAGS:
+ $(print_msg)
+ $(make)
#
-# Single 'perf' binary right now:
+# The clean target is not really parallel, don't print the jobs info:
#
-PROGRAMS += $(OUTPUT)perf
-
-LANG_BINDINGS =
-
-# what 'all' will build and 'install' will install, in perfexecdir
-ALL_PROGRAMS = $(PROGRAMS) $(SCRIPTS)
-
-# what 'all' will build but not install in perfexecdir
-OTHER_PROGRAMS = $(OUTPUT)perf
-
-# Set paths to tools early so that they can be used for version tests.
-ifndef SHELL_PATH
- SHELL_PATH = /bin/sh
-endif
-ifndef PERL_PATH
- PERL_PATH = /usr/bin/perl
-endif
-
-export PERL_PATH
-
-FLEX = flex
-BISON= bison
-
-$(OUTPUT)util/parse-events-flex.c: util/parse-events.l $(OUTPUT)util/parse-events-bison.c
- $(QUIET_FLEX)$(FLEX) --header-file=$(OUTPUT)util/parse-events-flex.h $(PARSER_DEBUG_FLEX) -t util/parse-events.l > $(OUTPUT)util/parse-events-flex.c
-
-$(OUTPUT)util/parse-events-bison.c: util/parse-events.y
- $(QUIET_BISON)$(BISON) -v util/parse-events.y -d $(PARSER_DEBUG_BISON) -o $(OUTPUT)util/parse-events-bison.c
-
-$(OUTPUT)util/pmu-flex.c: util/pmu.l $(OUTPUT)util/pmu-bison.c
- $(QUIET_FLEX)$(FLEX) --header-file=$(OUTPUT)util/pmu-flex.h -t util/pmu.l > $(OUTPUT)util/pmu-flex.c
-
-$(OUTPUT)util/pmu-bison.c: util/pmu.y
- $(QUIET_BISON)$(BISON) -v util/pmu.y -d -o $(OUTPUT)util/pmu-bison.c
-
-$(OUTPUT)util/parse-events.o: $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-bison.c
-$(OUTPUT)util/pmu.o: $(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-bison.c
-
-LIB_FILE=$(OUTPUT)libperf.a
-
-LIB_H += ../../include/uapi/linux/perf_event.h
-LIB_H += ../../include/linux/rbtree.h
-LIB_H += ../../include/linux/list.h
-LIB_H += ../../include/uapi/linux/const.h
-LIB_H += ../../include/linux/hash.h
-LIB_H += ../../include/linux/stringify.h
-LIB_H += util/include/linux/bitmap.h
-LIB_H += util/include/linux/bitops.h
-LIB_H += util/include/linux/compiler.h
-LIB_H += util/include/linux/const.h
-LIB_H += util/include/linux/ctype.h
-LIB_H += util/include/linux/kernel.h
-LIB_H += util/include/linux/list.h
-LIB_H += util/include/linux/export.h
-LIB_H += util/include/linux/magic.h
-LIB_H += util/include/linux/poison.h
-LIB_H += util/include/linux/prefetch.h
-LIB_H += util/include/linux/rbtree.h
-LIB_H += util/include/linux/rbtree_augmented.h
-LIB_H += util/include/linux/string.h
-LIB_H += util/include/linux/types.h
-LIB_H += util/include/linux/linkage.h
-LIB_H += util/include/asm/asm-offsets.h
-LIB_H += util/include/asm/bug.h
-LIB_H += util/include/asm/byteorder.h
-LIB_H += util/include/asm/hweight.h
-LIB_H += util/include/asm/swab.h
-LIB_H += util/include/asm/system.h
-LIB_H += util/include/asm/uaccess.h
-LIB_H += util/include/dwarf-regs.h
-LIB_H += util/include/asm/dwarf2.h
-LIB_H += util/include/asm/cpufeature.h
-LIB_H += util/include/asm/unistd_32.h
-LIB_H += util/include/asm/unistd_64.h
-LIB_H += perf.h
-LIB_H += util/annotate.h
-LIB_H += util/cache.h
-LIB_H += util/callchain.h
-LIB_H += util/build-id.h
-LIB_H += util/debug.h
-LIB_H += util/debugfs.h
-LIB_H += util/sysfs.h
-LIB_H += util/pmu.h
-LIB_H += util/event.h
-LIB_H += util/evsel.h
-LIB_H += util/evlist.h
-LIB_H += util/exec_cmd.h
-LIB_H += util/types.h
-LIB_H += util/levenshtein.h
-LIB_H += util/map.h
-LIB_H += util/parse-options.h
-LIB_H += util/parse-events.h
-LIB_H += util/quote.h
-LIB_H += util/util.h
-LIB_H += util/xyarray.h
-LIB_H += util/header.h
-LIB_H += util/help.h
-LIB_H += util/session.h
-LIB_H += util/strbuf.h
-LIB_H += util/strlist.h
-LIB_H += util/strfilter.h
-LIB_H += util/svghelper.h
-LIB_H += util/tool.h
-LIB_H += util/run-command.h
-LIB_H += util/sigchain.h
-LIB_H += util/symbol.h
-LIB_H += util/color.h
-LIB_H += util/values.h
-LIB_H += util/sort.h
-LIB_H += util/hist.h
-LIB_H += util/thread.h
-LIB_H += util/thread_map.h
-LIB_H += util/trace-event.h
-LIB_H += util/probe-finder.h
-LIB_H += util/dwarf-aux.h
-LIB_H += util/probe-event.h
-LIB_H += util/pstack.h
-LIB_H += util/cpumap.h
-LIB_H += util/top.h
-LIB_H += $(ARCH_INCLUDE)
-LIB_H += util/cgroup.h
-LIB_H += $(TRACE_EVENT_DIR)event-parse.h
-LIB_H += util/target.h
-LIB_H += util/rblist.h
-LIB_H += util/intlist.h
-LIB_H += util/perf_regs.h
-LIB_H += util/unwind.h
-LIB_H += ui/helpline.h
-LIB_H += util/vdso.h
-
-LIB_OBJS += $(OUTPUT)util/abspath.o
-LIB_OBJS += $(OUTPUT)util/alias.o
-LIB_OBJS += $(OUTPUT)util/annotate.o
-LIB_OBJS += $(OUTPUT)util/build-id.o
-LIB_OBJS += $(OUTPUT)util/config.o
-LIB_OBJS += $(OUTPUT)util/ctype.o
-LIB_OBJS += $(OUTPUT)util/debugfs.o
-LIB_OBJS += $(OUTPUT)util/sysfs.o
-LIB_OBJS += $(OUTPUT)util/pmu.o
-LIB_OBJS += $(OUTPUT)util/environment.o
-LIB_OBJS += $(OUTPUT)util/event.o
-LIB_OBJS += $(OUTPUT)util/evlist.o
-LIB_OBJS += $(OUTPUT)util/evsel.o
-LIB_OBJS += $(OUTPUT)util/exec_cmd.o
-LIB_OBJS += $(OUTPUT)util/help.o
-LIB_OBJS += $(OUTPUT)util/levenshtein.o
-LIB_OBJS += $(OUTPUT)util/parse-options.o
-LIB_OBJS += $(OUTPUT)util/parse-events.o
-LIB_OBJS += $(OUTPUT)util/parse-events-test.o
-LIB_OBJS += $(OUTPUT)util/path.o
-LIB_OBJS += $(OUTPUT)util/rbtree.o
-LIB_OBJS += $(OUTPUT)util/bitmap.o
-LIB_OBJS += $(OUTPUT)util/hweight.o
-LIB_OBJS += $(OUTPUT)util/run-command.o
-LIB_OBJS += $(OUTPUT)util/quote.o
-LIB_OBJS += $(OUTPUT)util/strbuf.o
-LIB_OBJS += $(OUTPUT)util/string.o
-LIB_OBJS += $(OUTPUT)util/strlist.o
-LIB_OBJS += $(OUTPUT)util/strfilter.o
-LIB_OBJS += $(OUTPUT)util/top.o
-LIB_OBJS += $(OUTPUT)util/usage.o
-LIB_OBJS += $(OUTPUT)util/wrapper.o
-LIB_OBJS += $(OUTPUT)util/sigchain.o
-LIB_OBJS += $(OUTPUT)util/symbol.o
-LIB_OBJS += $(OUTPUT)util/symbol-elf.o
-LIB_OBJS += $(OUTPUT)util/dso-test-data.o
-LIB_OBJS += $(OUTPUT)util/color.o
-LIB_OBJS += $(OUTPUT)util/pager.o
-LIB_OBJS += $(OUTPUT)util/header.o
-LIB_OBJS += $(OUTPUT)util/callchain.o
-LIB_OBJS += $(OUTPUT)util/values.o
-LIB_OBJS += $(OUTPUT)util/debug.o
-LIB_OBJS += $(OUTPUT)util/map.o
-LIB_OBJS += $(OUTPUT)util/pstack.o
-LIB_OBJS += $(OUTPUT)util/session.o
-LIB_OBJS += $(OUTPUT)util/thread.o
-LIB_OBJS += $(OUTPUT)util/thread_map.o
-LIB_OBJS += $(OUTPUT)util/trace-event-parse.o
-LIB_OBJS += $(OUTPUT)util/parse-events-flex.o
-LIB_OBJS += $(OUTPUT)util/parse-events-bison.o
-LIB_OBJS += $(OUTPUT)util/pmu-flex.o
-LIB_OBJS += $(OUTPUT)util/pmu-bison.o
-LIB_OBJS += $(OUTPUT)util/trace-event-read.o
-LIB_OBJS += $(OUTPUT)util/trace-event-info.o
-LIB_OBJS += $(OUTPUT)util/trace-event-scripting.o
-LIB_OBJS += $(OUTPUT)util/svghelper.o
-LIB_OBJS += $(OUTPUT)util/sort.o
-LIB_OBJS += $(OUTPUT)util/hist.o
-LIB_OBJS += $(OUTPUT)util/probe-event.o
-LIB_OBJS += $(OUTPUT)util/util.o
-LIB_OBJS += $(OUTPUT)util/xyarray.o
-LIB_OBJS += $(OUTPUT)util/cpumap.o
-LIB_OBJS += $(OUTPUT)util/cgroup.o
-LIB_OBJS += $(OUTPUT)util/target.o
-LIB_OBJS += $(OUTPUT)util/rblist.o
-LIB_OBJS += $(OUTPUT)util/intlist.o
-LIB_OBJS += $(OUTPUT)util/vdso.o
-LIB_OBJS += $(OUTPUT)util/stat.o
-
-LIB_OBJS += $(OUTPUT)ui/helpline.o
-LIB_OBJS += $(OUTPUT)ui/hist.o
-LIB_OBJS += $(OUTPUT)ui/stdio/hist.o
-
-BUILTIN_OBJS += $(OUTPUT)builtin-annotate.o
-BUILTIN_OBJS += $(OUTPUT)builtin-bench.o
-# Benchmark modules
-BUILTIN_OBJS += $(OUTPUT)bench/sched-messaging.o
-BUILTIN_OBJS += $(OUTPUT)bench/sched-pipe.o
-ifeq ($(RAW_ARCH),x86_64)
-BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy-x86-64-asm.o
-BUILTIN_OBJS += $(OUTPUT)bench/mem-memset-x86-64-asm.o
-endif
-BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o
-BUILTIN_OBJS += $(OUTPUT)bench/mem-memset.o
-
-BUILTIN_OBJS += $(OUTPUT)builtin-diff.o
-BUILTIN_OBJS += $(OUTPUT)builtin-evlist.o
-BUILTIN_OBJS += $(OUTPUT)builtin-help.o
-BUILTIN_OBJS += $(OUTPUT)builtin-sched.o
-BUILTIN_OBJS += $(OUTPUT)builtin-buildid-list.o
-BUILTIN_OBJS += $(OUTPUT)builtin-buildid-cache.o
-BUILTIN_OBJS += $(OUTPUT)builtin-list.o
-BUILTIN_OBJS += $(OUTPUT)builtin-record.o
-BUILTIN_OBJS += $(OUTPUT)builtin-report.o
-BUILTIN_OBJS += $(OUTPUT)builtin-stat.o
-BUILTIN_OBJS += $(OUTPUT)builtin-timechart.o
-BUILTIN_OBJS += $(OUTPUT)builtin-top.o
-BUILTIN_OBJS += $(OUTPUT)builtin-script.o
-BUILTIN_OBJS += $(OUTPUT)builtin-probe.o
-BUILTIN_OBJS += $(OUTPUT)builtin-kmem.o
-BUILTIN_OBJS += $(OUTPUT)builtin-lock.o
-BUILTIN_OBJS += $(OUTPUT)builtin-kvm.o
-BUILTIN_OBJS += $(OUTPUT)builtin-test.o
-BUILTIN_OBJS += $(OUTPUT)builtin-inject.o
-
-PERFLIBS = $(LIB_FILE) $(LIBTRACEEVENT)
+clean:
+ $(make)
#
-# Platform specific tweaks
+# The build-test target is not really parallel, don't print the jobs info:
#
+build-test:
+ @$(MAKE) -f tests/make --no-print-directory
-# We choose to avoid "if .. else if .. else .. endif endif"
-# because maintaining the nesting to match is a pain. If
-# we had "elif" things would have been much nicer...
-
--include config.mak.autogen
--include config.mak
-
-ifdef NO_LIBELF
- NO_DWARF := 1
- NO_DEMANGLE := 1
- NO_LIBUNWIND := 1
-else
-FLAGS_LIBELF=$(ALL_CFLAGS) $(ALL_LDFLAGS) $(EXTLIBS)
-ifneq ($(call try-cc,$(SOURCE_LIBELF),$(FLAGS_LIBELF)),y)
- FLAGS_GLIBC=$(ALL_CFLAGS) $(ALL_LDFLAGS)
- ifneq ($(call try-cc,$(SOURCE_GLIBC),$(FLAGS_GLIBC)),y)
- msg := $(error No gnu/libc-version.h found, please install glibc-dev[el]/glibc-static);
- else
- NO_LIBELF := 1
- NO_DWARF := 1
- NO_DEMANGLE := 1
- endif
-else
- FLAGS_DWARF=$(ALL_CFLAGS) -ldw -lelf $(ALL_LDFLAGS) $(EXTLIBS)
- ifneq ($(call try-cc,$(SOURCE_DWARF),$(FLAGS_DWARF)),y)
- msg := $(warning No libdw.h found or old libdw.h found or elfutils is older than 0.138, disables dwarf support. Please install new elfutils-devel/libdw-dev);
- NO_DWARF := 1
- endif # Dwarf support
-endif # SOURCE_LIBELF
-endif # NO_LIBELF
-
-ifndef NO_LIBUNWIND
-# for linking with debug library, run like:
-# make DEBUG=1 LIBUNWIND_DIR=/opt/libunwind/
-ifdef LIBUNWIND_DIR
- LIBUNWIND_CFLAGS := -I$(LIBUNWIND_DIR)/include
- LIBUNWIND_LDFLAGS := -L$(LIBUNWIND_DIR)/lib
-endif
-
-FLAGS_UNWIND=$(LIBUNWIND_CFLAGS) $(ALL_CFLAGS) $(LIBUNWIND_LDFLAGS) $(ALL_LDFLAGS) $(EXTLIBS) $(LIBUNWIND_LIBS)
-ifneq ($(call try-cc,$(SOURCE_LIBUNWIND),$(FLAGS_UNWIND)),y)
- msg := $(warning No libunwind found, disabling post unwind support. Please install libunwind-dev[el] >= 0.99);
- NO_LIBUNWIND := 1
-endif # Libunwind support
-endif # NO_LIBUNWIND
-
--include arch/$(ARCH)/Makefile
-
-ifneq ($(OUTPUT),)
- BASIC_CFLAGS += -I$(OUTPUT)
-endif
-
-ifdef NO_LIBELF
-EXTLIBS := $(filter-out -lelf,$(EXTLIBS))
-
-# Remove ELF/DWARF dependent codes
-LIB_OBJS := $(filter-out $(OUTPUT)util/symbol-elf.o,$(LIB_OBJS))
-LIB_OBJS := $(filter-out $(OUTPUT)util/dwarf-aux.o,$(LIB_OBJS))
-LIB_OBJS := $(filter-out $(OUTPUT)util/probe-event.o,$(LIB_OBJS))
-LIB_OBJS := $(filter-out $(OUTPUT)util/probe-finder.o,$(LIB_OBJS))
-
-BUILTIN_OBJS := $(filter-out $(OUTPUT)builtin-probe.o,$(BUILTIN_OBJS))
-
-# Use minimal symbol handling
-LIB_OBJS += $(OUTPUT)util/symbol-minimal.o
-
-else # NO_LIBELF
-BASIC_CFLAGS += -DLIBELF_SUPPORT
-
-ifeq ($(call try-cc,$(SOURCE_ELF_MMAP),$(FLAGS_COMMON)),y)
- BASIC_CFLAGS += -DLIBELF_MMAP
-endif
-
-ifndef NO_DWARF
-ifeq ($(origin PERF_HAVE_DWARF_REGS), undefined)
- msg := $(warning DWARF register mappings have not been defined for architecture $(ARCH), DWARF support disabled);
-else
- BASIC_CFLAGS += -DDWARF_SUPPORT
- EXTLIBS += -lelf -ldw
- LIB_OBJS += $(OUTPUT)util/probe-finder.o
- LIB_OBJS += $(OUTPUT)util/dwarf-aux.o
-endif # PERF_HAVE_DWARF_REGS
-endif # NO_DWARF
-endif # NO_LIBELF
-
-ifndef NO_LIBUNWIND
- BASIC_CFLAGS += -DLIBUNWIND_SUPPORT
- EXTLIBS += $(LIBUNWIND_LIBS)
- BASIC_CFLAGS := $(LIBUNWIND_CFLAGS) $(BASIC_CFLAGS)
- BASIC_LDFLAGS := $(LIBUNWIND_LDFLAGS) $(BASIC_LDFLAGS)
- LIB_OBJS += $(OUTPUT)util/unwind.o
-endif
-
-ifndef NO_LIBAUDIT
- FLAGS_LIBAUDIT = $(ALL_CFLAGS) $(ALL_LDFLAGS) -laudit
- ifneq ($(call try-cc,$(SOURCE_LIBAUDIT),$(FLAGS_LIBAUDIT)),y)
- msg := $(warning No libaudit.h found, disables 'trace' tool, please install audit-libs-devel or libaudit-dev);
- else
- BASIC_CFLAGS += -DLIBAUDIT_SUPPORT
- BUILTIN_OBJS += $(OUTPUT)builtin-trace.o
- EXTLIBS += -laudit
- endif
-endif
-
-ifndef NO_NEWT
- FLAGS_NEWT=$(ALL_CFLAGS) $(ALL_LDFLAGS) $(EXTLIBS) -lnewt
- ifneq ($(call try-cc,$(SOURCE_NEWT),$(FLAGS_NEWT)),y)
- msg := $(warning newt not found, disables TUI support. Please install newt-devel or libnewt-dev);
- else
- # Fedora has /usr/include/slang/slang.h, but ubuntu /usr/include/slang.h
- BASIC_CFLAGS += -I/usr/include/slang
- BASIC_CFLAGS += -DNEWT_SUPPORT
- EXTLIBS += -lnewt -lslang
- LIB_OBJS += $(OUTPUT)ui/setup.o
- LIB_OBJS += $(OUTPUT)ui/browser.o
- LIB_OBJS += $(OUTPUT)ui/browsers/annotate.o
- LIB_OBJS += $(OUTPUT)ui/browsers/hists.o
- LIB_OBJS += $(OUTPUT)ui/browsers/map.o
- LIB_OBJS += $(OUTPUT)ui/progress.o
- LIB_OBJS += $(OUTPUT)ui/util.o
- LIB_OBJS += $(OUTPUT)ui/tui/setup.o
- LIB_OBJS += $(OUTPUT)ui/tui/util.o
- LIB_OBJS += $(OUTPUT)ui/tui/helpline.o
- LIB_H += ui/browser.h
- LIB_H += ui/browsers/map.h
- LIB_H += ui/keysyms.h
- LIB_H += ui/libslang.h
- LIB_H += ui/progress.h
- LIB_H += ui/util.h
- LIB_H += ui/ui.h
- endif
-endif
-
-ifndef NO_GTK2
- FLAGS_GTK2=$(ALL_CFLAGS) $(ALL_LDFLAGS) $(EXTLIBS) $(shell pkg-config --libs --cflags gtk+-2.0 2>/dev/null)
- ifneq ($(call try-cc,$(SOURCE_GTK2),$(FLAGS_GTK2)),y)
- msg := $(warning GTK2 not found, disables GTK2 support. Please install gtk2-devel or libgtk2.0-dev);
- else
- ifeq ($(call try-cc,$(SOURCE_GTK2_INFOBAR),$(FLAGS_GTK2)),y)
- BASIC_CFLAGS += -DHAVE_GTK_INFO_BAR
- endif
- BASIC_CFLAGS += -DGTK2_SUPPORT
- BASIC_CFLAGS += $(shell pkg-config --cflags gtk+-2.0 2>/dev/null)
- EXTLIBS += $(shell pkg-config --libs gtk+-2.0 2>/dev/null)
- LIB_OBJS += $(OUTPUT)ui/gtk/browser.o
- LIB_OBJS += $(OUTPUT)ui/gtk/setup.o
- LIB_OBJS += $(OUTPUT)ui/gtk/util.o
- LIB_OBJS += $(OUTPUT)ui/gtk/helpline.o
- # Make sure that it'd be included only once.
- ifeq ($(findstring -DNEWT_SUPPORT,$(BASIC_CFLAGS)),)
- LIB_OBJS += $(OUTPUT)ui/setup.o
- LIB_OBJS += $(OUTPUT)ui/util.o
- endif
- endif
-endif
-
-ifdef NO_LIBPERL
- BASIC_CFLAGS += -DNO_LIBPERL
-else
- PERL_EMBED_LDOPTS = $(shell perl -MExtUtils::Embed -e ldopts 2>/dev/null)
- PERL_EMBED_LDFLAGS = $(call strip-libs,$(PERL_EMBED_LDOPTS))
- PERL_EMBED_LIBADD = $(call grep-libs,$(PERL_EMBED_LDOPTS))
- PERL_EMBED_CCOPTS = `perl -MExtUtils::Embed -e ccopts 2>/dev/null`
- FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS)
-
- ifneq ($(call try-cc,$(SOURCE_PERL_EMBED),$(FLAGS_PERL_EMBED)),y)
- BASIC_CFLAGS += -DNO_LIBPERL
- else
- ALL_LDFLAGS += $(PERL_EMBED_LDFLAGS)
- EXTLIBS += $(PERL_EMBED_LIBADD)
- LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-perl.o
- LIB_OBJS += $(OUTPUT)scripts/perl/Perf-Trace-Util/Context.o
- endif
-endif
-
-disable-python = $(eval $(disable-python_code))
-define disable-python_code
- BASIC_CFLAGS += -DNO_LIBPYTHON
- $(if $(1),$(warning No $(1) was found))
- $(warning Python support won't be built)
-endef
-
-override PYTHON := \
- $(call get-executable-or-default,PYTHON,python)
-
-ifndef PYTHON
- $(call disable-python,python interpreter)
- python-clean :=
-else
-
- PYTHON_WORD := $(call shell-wordify,$(PYTHON))
-
- # python extension build directories
- PYTHON_EXTBUILD := $(OUTPUT)python_ext_build/
- PYTHON_EXTBUILD_LIB := $(PYTHON_EXTBUILD)lib/
- PYTHON_EXTBUILD_TMP := $(PYTHON_EXTBUILD)tmp/
- export PYTHON_EXTBUILD_LIB PYTHON_EXTBUILD_TMP
-
- python-clean := rm -rf $(PYTHON_EXTBUILD) $(OUTPUT)python/perf.so
-
- ifdef NO_LIBPYTHON
- $(call disable-python)
- else
-
- override PYTHON_CONFIG := \
- $(call get-executable-or-default,PYTHON_CONFIG,$(PYTHON)-config)
-
- ifndef PYTHON_CONFIG
- $(call disable-python,python-config tool)
- else
-
- PYTHON_CONFIG_SQ := $(call shell-sq,$(PYTHON_CONFIG))
-
- PYTHON_EMBED_LDOPTS := $(shell $(PYTHON_CONFIG_SQ) --ldflags 2>/dev/null)
- PYTHON_EMBED_LDFLAGS := $(call strip-libs,$(PYTHON_EMBED_LDOPTS))
- PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS))
- PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null)
- FLAGS_PYTHON_EMBED := $(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS)
-
- ifneq ($(call try-cc,$(SOURCE_PYTHON_EMBED),$(FLAGS_PYTHON_EMBED)),y)
- $(call disable-python,Python.h (for Python 2.x))
- else
-
- ifneq ($(call try-cc,$(SOURCE_PYTHON_VERSION),$(FLAGS_PYTHON_EMBED)),y)
- $(warning Python 3 is not yet supported; please set)
- $(warning PYTHON and/or PYTHON_CONFIG appropriately.)
- $(warning If you also have Python 2 installed, then)
- $(warning try something like:)
- $(warning $(and ,))
- $(warning $(and ,) make PYTHON=python2)
- $(warning $(and ,))
- $(warning Otherwise, disable Python support entirely:)
- $(warning $(and ,))
- $(warning $(and ,) make NO_LIBPYTHON=1)
- $(warning $(and ,))
- $(error $(and ,))
- else
- ALL_LDFLAGS += $(PYTHON_EMBED_LDFLAGS)
- EXTLIBS += $(PYTHON_EMBED_LIBADD)
- LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-python.o
- LIB_OBJS += $(OUTPUT)scripts/python/Perf-Trace-Util/Context.o
- LANG_BINDINGS += $(OUTPUT)python/perf.so
- endif
-
- endif
- endif
- endif
-endif
-
-ifdef NO_DEMANGLE
- BASIC_CFLAGS += -DNO_DEMANGLE
-else
- ifdef HAVE_CPLUS_DEMANGLE
- EXTLIBS += -liberty
- BASIC_CFLAGS += -DHAVE_CPLUS_DEMANGLE
- else
- FLAGS_BFD=$(ALL_CFLAGS) $(ALL_LDFLAGS) $(EXTLIBS) -DPACKAGE='perf' -lbfd
- has_bfd := $(call try-cc,$(SOURCE_BFD),$(FLAGS_BFD))
- ifeq ($(has_bfd),y)
- EXTLIBS += -lbfd
- else
- FLAGS_BFD_IBERTY=$(FLAGS_BFD) -liberty
- has_bfd_iberty := $(call try-cc,$(SOURCE_BFD),$(FLAGS_BFD_IBERTY))
- ifeq ($(has_bfd_iberty),y)
- EXTLIBS += -lbfd -liberty
- else
- FLAGS_BFD_IBERTY_Z=$(FLAGS_BFD_IBERTY) -lz
- has_bfd_iberty_z := $(call try-cc,$(SOURCE_BFD),$(FLAGS_BFD_IBERTY_Z))
- ifeq ($(has_bfd_iberty_z),y)
- EXTLIBS += -lbfd -liberty -lz
- else
- FLAGS_CPLUS_DEMANGLE=$(ALL_CFLAGS) $(ALL_LDFLAGS) $(EXTLIBS) -liberty
- has_cplus_demangle := $(call try-cc,$(SOURCE_CPLUS_DEMANGLE),$(FLAGS_CPLUS_DEMANGLE))
- ifeq ($(has_cplus_demangle),y)
- EXTLIBS += -liberty
- BASIC_CFLAGS += -DHAVE_CPLUS_DEMANGLE
- else
- msg := $(warning No bfd.h/libbfd found, install binutils-dev[el]/zlib-static to gain symbol demangling)
- BASIC_CFLAGS += -DNO_DEMANGLE
- endif
- endif
- endif
- endif
- endif
-endif
-
-ifeq ($(NO_PERF_REGS),0)
- ifeq ($(ARCH),x86)
- LIB_H += arch/x86/include/perf_regs.h
- endif
- BASIC_CFLAGS += -DHAVE_PERF_REGS
-endif
-
-ifndef NO_STRLCPY
- ifeq ($(call try-cc,$(SOURCE_STRLCPY),),y)
- BASIC_CFLAGS += -DHAVE_STRLCPY
- endif
-endif
-
-ifndef NO_BACKTRACE
- ifeq ($(call try-cc,$(SOURCE_BACKTRACE),),y)
- BASIC_CFLAGS += -DBACKTRACE_SUPPORT
- endif
-endif
-
-ifdef ASCIIDOC8
- export ASCIIDOC8
-endif
-
-# Shell quote (do not use $(call) to accommodate ancient setups);
-
-ETC_PERFCONFIG_SQ = $(subst ','\'',$(ETC_PERFCONFIG))
-
-DESTDIR_SQ = $(subst ','\'',$(DESTDIR))
-bindir_SQ = $(subst ','\'',$(bindir))
-bindir_relative_SQ = $(subst ','\'',$(bindir_relative))
-mandir_SQ = $(subst ','\'',$(mandir))
-infodir_SQ = $(subst ','\'',$(infodir))
-perfexecdir_SQ = $(subst ','\'',$(perfexecdir))
-template_dir_SQ = $(subst ','\'',$(template_dir))
-htmldir_SQ = $(subst ','\'',$(htmldir))
-prefix_SQ = $(subst ','\'',$(prefix))
-sysconfdir_SQ = $(subst ','\'',$(sysconfdir))
-
-SHELL_PATH_SQ = $(subst ','\'',$(SHELL_PATH))
-
-LIBS = -Wl,--whole-archive $(PERFLIBS) -Wl,--no-whole-archive -Wl,--start-group $(EXTLIBS) -Wl,--end-group
-
-ALL_CFLAGS += $(BASIC_CFLAGS)
-ALL_CFLAGS += $(ARCH_CFLAGS)
-ALL_LDFLAGS += $(BASIC_LDFLAGS)
-
-export INSTALL SHELL_PATH
-
-
-### Build rules
-
-SHELL = $(SHELL_PATH)
-
-all: shell_compatibility_test $(ALL_PROGRAMS) $(LANG_BINDINGS) $(OTHER_PROGRAMS)
-
-please_set_SHELL_PATH_to_a_more_modern_shell:
- @$$(:)
-
-shell_compatibility_test: please_set_SHELL_PATH_to_a_more_modern_shell
-
-strip: $(PROGRAMS) $(OUTPUT)perf
- $(STRIP) $(STRIP_OPTS) $(PROGRAMS) $(OUTPUT)perf
-
-$(OUTPUT)perf.o: perf.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS
- $(QUIET_CC)$(CC) -DPERF_VERSION='"$(PERF_VERSION)"' \
- '-DPERF_HTML_PATH="$(htmldir_SQ)"' \
- $(ALL_CFLAGS) -c $(filter %.c,$^) -o $@
-
-$(OUTPUT)perf: $(OUTPUT)perf.o $(BUILTIN_OBJS) $(PERFLIBS)
- $(QUIET_LINK)$(CC) $(ALL_CFLAGS) $(ALL_LDFLAGS) $(OUTPUT)perf.o \
- $(BUILTIN_OBJS) $(LIBS) -o $@
-
-$(OUTPUT)builtin-help.o: builtin-help.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS
- $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) \
- '-DPERF_HTML_PATH="$(htmldir_SQ)"' \
- '-DPERF_MAN_PATH="$(mandir_SQ)"' \
- '-DPERF_INFO_PATH="$(infodir_SQ)"' $<
-
-$(OUTPUT)builtin-timechart.o: builtin-timechart.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS
- $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) \
- '-DPERF_HTML_PATH="$(htmldir_SQ)"' \
- '-DPERF_MAN_PATH="$(mandir_SQ)"' \
- '-DPERF_INFO_PATH="$(infodir_SQ)"' $<
-
-$(OUTPUT)common-cmds.h: util/generate-cmdlist.sh command-list.txt
-
-$(OUTPUT)common-cmds.h: $(wildcard Documentation/perf-*.txt)
- $(QUIET_GEN). util/generate-cmdlist.sh > $@+ && mv $@+ $@
-
-$(SCRIPTS) : % : %.sh
- $(QUIET_GEN)$(INSTALL) '$@.sh' '$(OUTPUT)$@'
-
-# These can record PERF_VERSION
-$(OUTPUT)perf.o perf.spec \
- $(SCRIPTS) \
- : $(OUTPUT)PERF-VERSION-FILE
-
-.SUFFIXES:
-.SUFFIXES: .o .c .S .s
-
-# These two need to be here so that when O= is not used they take precedence
-# over the general rule for .o
-
-$(OUTPUT)util/%-flex.o: $(OUTPUT)util/%-flex.c $(OUTPUT)PERF-CFLAGS
- $(QUIET_CC)$(CC) -o $@ -c -Iutil/ $(ALL_CFLAGS) -w $<
-
-$(OUTPUT)util/%-bison.o: $(OUTPUT)util/%-bison.c $(OUTPUT)PERF-CFLAGS
- $(QUIET_CC)$(CC) -o $@ -c -Iutil/ $(ALL_CFLAGS) -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w $<
-
-$(OUTPUT)%.o: %.c $(OUTPUT)PERF-CFLAGS
- $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) $<
-$(OUTPUT)%.i: %.c $(OUTPUT)PERF-CFLAGS
- $(QUIET_CC)$(CC) -o $@ -E $(ALL_CFLAGS) $<
-$(OUTPUT)%.s: %.c $(OUTPUT)PERF-CFLAGS
- $(QUIET_CC)$(CC) -o $@ -S $(ALL_CFLAGS) $<
-$(OUTPUT)%.o: %.S
- $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) $<
-$(OUTPUT)%.s: %.S
- $(QUIET_CC)$(CC) -o $@ -E $(ALL_CFLAGS) $<
-
-$(OUTPUT)util/exec_cmd.o: util/exec_cmd.c $(OUTPUT)PERF-CFLAGS
- $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) \
- '-DPERF_EXEC_PATH="$(perfexecdir_SQ)"' \
- '-DBINDIR="$(bindir_relative_SQ)"' \
- '-DPREFIX="$(prefix_SQ)"' \
- $<
-
-$(OUTPUT)util/config.o: util/config.c $(OUTPUT)PERF-CFLAGS
- $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $<
-
-$(OUTPUT)ui/browser.o: ui/browser.c $(OUTPUT)PERF-CFLAGS
- $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DENABLE_SLFUTURE_CONST $<
-
-$(OUTPUT)ui/browsers/annotate.o: ui/browsers/annotate.c $(OUTPUT)PERF-CFLAGS
- $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DENABLE_SLFUTURE_CONST $<
-
-$(OUTPUT)ui/browsers/hists.o: ui/browsers/hists.c $(OUTPUT)PERF-CFLAGS
- $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DENABLE_SLFUTURE_CONST $<
-
-$(OUTPUT)ui/browsers/map.o: ui/browsers/map.c $(OUTPUT)PERF-CFLAGS
- $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -DENABLE_SLFUTURE_CONST $<
-
-$(OUTPUT)util/rbtree.o: ../../lib/rbtree.c $(OUTPUT)PERF-CFLAGS
- $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -Wno-unused-parameter -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $<
-
-$(OUTPUT)util/parse-events.o: util/parse-events.c $(OUTPUT)PERF-CFLAGS
- $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) -Wno-redundant-decls $<
-
-$(OUTPUT)util/scripting-engines/trace-event-perl.o: util/scripting-engines/trace-event-perl.c $(OUTPUT)PERF-CFLAGS
- $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow $<
-
-$(OUTPUT)scripts/perl/Perf-Trace-Util/Context.o: scripts/perl/Perf-Trace-Util/Context.c $(OUTPUT)PERF-CFLAGS
- $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs $<
-
-$(OUTPUT)util/scripting-engines/trace-event-python.o: util/scripting-engines/trace-event-python.c $(OUTPUT)PERF-CFLAGS
- $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow $<
-
-$(OUTPUT)scripts/python/Perf-Trace-Util/Context.o: scripts/python/Perf-Trace-Util/Context.c $(OUTPUT)PERF-CFLAGS
- $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs $<
-
-$(OUTPUT)perf-%: %.o $(PERFLIBS)
- $(QUIET_LINK)$(CC) $(ALL_CFLAGS) -o $@ $(ALL_LDFLAGS) $(filter %.o,$^) $(LIBS)
-
-$(LIB_OBJS) $(BUILTIN_OBJS): $(LIB_H)
-$(patsubst perf-%,%.o,$(PROGRAMS)): $(LIB_H) $(wildcard */*.h)
-
-# we compile into subdirectories. if the target directory is not the source directory, they might not exists. So
-# we depend the various files onto their directories.
-DIRECTORY_DEPS = $(LIB_OBJS) $(BUILTIN_OBJS) $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h
-$(DIRECTORY_DEPS): | $(sort $(dir $(DIRECTORY_DEPS)))
-# In the second step, we make a rule to actually create these directories
-$(sort $(dir $(DIRECTORY_DEPS))):
- $(QUIET_MKDIR)$(MKDIR) -p $@ 2>/dev/null
-
-$(LIB_FILE): $(LIB_OBJS)
- $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIB_OBJS)
-
-# libtraceevent.a
-$(LIBTRACEEVENT):
- $(QUIET_SUBDIR0)$(TRACE_EVENT_DIR) $(QUIET_SUBDIR1) O=$(OUTPUT) libtraceevent.a
-
-$(LIBTRACEEVENT)-clean:
- $(QUIET_SUBDIR0)$(TRACE_EVENT_DIR) $(QUIET_SUBDIR1) O=$(OUTPUT) clean
-
-help:
- @echo 'Perf make targets:'
- @echo ' doc - make *all* documentation (see below)'
- @echo ' man - make manpage documentation (access with man <foo>)'
- @echo ' html - make html documentation'
- @echo ' info - make GNU info documentation (access with info <foo>)'
- @echo ' pdf - make pdf documentation'
- @echo ' TAGS - use etags to make tag information for source browsing'
- @echo ' tags - use ctags to make tag information for source browsing'
- @echo ' cscope - use cscope to make interactive browsing database'
- @echo ''
- @echo 'Perf install targets:'
- @echo ' NOTE: documentation build requires asciidoc, xmlto packages to be installed'
- @echo ' HINT: use "make prefix=<path> <install target>" to install to a particular'
- @echo ' path like make prefix=/usr/local install install-doc'
- @echo ' install - install compiled binaries'
- @echo ' install-doc - install *all* documentation'
- @echo ' install-man - install manpage documentation'
- @echo ' install-html - install html documentation'
- @echo ' install-info - install GNU info documentation'
- @echo ' install-pdf - install pdf documentation'
- @echo ''
- @echo ' quick-install-doc - alias for quick-install-man'
- @echo ' quick-install-man - install the documentation quickly'
- @echo ' quick-install-html - install the html documentation quickly'
- @echo ''
- @echo 'Perf maintainer targets:'
- @echo ' clean - clean all binary objects and build output'
-
-doc:
- $(MAKE) -C Documentation all
-
-man:
- $(MAKE) -C Documentation man
-
-html:
- $(MAKE) -C Documentation html
-
-info:
- $(MAKE) -C Documentation info
-
-pdf:
- $(MAKE) -C Documentation pdf
-
-TAGS:
- $(RM) TAGS
- $(FIND) . -name '*.[hcS]' -print | xargs etags -a
-
-tags:
- $(RM) tags
- $(FIND) . -name '*.[hcS]' -print | xargs ctags -a
-
-cscope:
- $(RM) cscope*
- $(FIND) . -name '*.[hcS]' -print | xargs cscope -b
-
-### Detect prefix changes
-TRACK_CFLAGS = $(subst ','\'',$(ALL_CFLAGS)):\
- $(bindir_SQ):$(perfexecdir_SQ):$(template_dir_SQ):$(prefix_SQ)
-
-$(OUTPUT)PERF-CFLAGS: .FORCE-PERF-CFLAGS
- @FLAGS='$(TRACK_CFLAGS)'; \
- if test x"$$FLAGS" != x"`cat $(OUTPUT)PERF-CFLAGS 2>/dev/null`" ; then \
- echo 1>&2 " * new build flags or prefix"; \
- echo "$$FLAGS" >$(OUTPUT)PERF-CFLAGS; \
- fi
-
-### Testing rules
-
-# GNU make supports exporting all variables by "export" without parameters.
-# However, the environment gets quite big, and some programs have problems
-# with that.
-
-check: $(OUTPUT)common-cmds.h
- if sparse; \
- then \
- for i in *.c */*.c; \
- do \
- sparse $(ALL_CFLAGS) $(SPARSE_FLAGS) $$i || exit; \
- done; \
- else \
- exit 1; \
- fi
-
-### Installation rules
-
-ifneq ($(filter /%,$(firstword $(perfexecdir))),)
-perfexec_instdir = $(perfexecdir)
-else
-perfexec_instdir = $(prefix)/$(perfexecdir)
-endif
-perfexec_instdir_SQ = $(subst ','\'',$(perfexec_instdir))
-
-install: all
- $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)'
- $(INSTALL) $(OUTPUT)perf '$(DESTDIR_SQ)$(bindir_SQ)'
- $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'
- $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin'
- $(INSTALL) $(OUTPUT)perf-archive -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
- $(INSTALL) scripts/perl/Perf-Trace-Util/lib/Perf/Trace/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'
- $(INSTALL) scripts/perl/*.pl -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl'
- $(INSTALL) scripts/perl/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin'
- $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/Perf-Trace-Util/lib/Perf/Trace'
- $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin'
- $(INSTALL) scripts/python/Perf-Trace-Util/lib/Perf/Trace/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/Perf-Trace-Util/lib/Perf/Trace'
- $(INSTALL) scripts/python/*.py -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python'
- $(INSTALL) scripts/python/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin'
- $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d'
- $(INSTALL) bash_completion '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d/perf'
-
-install-python_ext:
- $(PYTHON_WORD) util/setup.py --quiet install --root='/$(DESTDIR_SQ)'
-
-install-doc:
- $(MAKE) -C Documentation install
-
-install-man:
- $(MAKE) -C Documentation install-man
-
-install-html:
- $(MAKE) -C Documentation install-html
-
-install-info:
- $(MAKE) -C Documentation install-info
-
-install-pdf:
- $(MAKE) -C Documentation install-pdf
-
-quick-install-doc:
- $(MAKE) -C Documentation quick-install
-
-quick-install-man:
- $(MAKE) -C Documentation quick-install-man
-
-quick-install-html:
- $(MAKE) -C Documentation quick-install-html
-
-### Cleaning rules
-
-clean: $(LIBTRACEEVENT)-clean
- $(RM) $(LIB_OBJS) $(BUILTIN_OBJS) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf.o $(LANG_BINDINGS)
- $(RM) $(ALL_PROGRAMS) perf
- $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope*
- $(MAKE) -C Documentation/ clean
- $(RM) $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)PERF-CFLAGS
- $(RM) $(OUTPUT)util/*-bison*
- $(RM) $(OUTPUT)util/*-flex*
- $(python-clean)
+#
+# All other targets get passed through:
+#
+%:
+ $(print_msg)
+ $(make)
-.PHONY: all install clean strip $(LIBTRACEEVENT)
-.PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell
-.PHONY: .FORCE-PERF-VERSION-FILE TAGS tags cscope .FORCE-PERF-CFLAGS
+.PHONY: tags TAGS
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
new file mode 100644
index 00000000000..9670a16fa57
--- /dev/null
+++ b/tools/perf/Makefile.perf
@@ -0,0 +1,938 @@
+include ../scripts/Makefile.include
+
+# The default target of this Makefile is...
+all:
+
+include config/utilities.mak
+
+# Define V to have a more verbose compile.
+#
+# Define VF to have a more verbose feature check output.
+#
+# Define O to save output files in a separate directory.
+#
+# Define ARCH as name of target architecture if you want cross-builds.
+#
+# Define CROSS_COMPILE as prefix name of compiler if you want cross-builds.
+#
+# Define NO_LIBPERL to disable perl script extension.
+#
+# Define NO_LIBPYTHON to disable python script extension.
+#
+# Define PYTHON to point to the python binary if the default
+# `python' is not correct; for example: PYTHON=python2
+#
+# Define PYTHON_CONFIG to point to the python-config binary if
+# the default `$(PYTHON)-config' is not correct.
+#
+# Define ASCIIDOC8 if you want to format documentation with AsciiDoc 8
+#
+# Define DOCBOOK_XSL_172 if you want to format man pages with DocBook XSL v1.72.
+#
+# Define LDFLAGS=-static to build a static binary.
+#
+# Define EXTRA_CFLAGS=-m64 or EXTRA_CFLAGS=-m32 as appropriate for cross-builds.
+#
+# Define NO_DWARF if you do not want debug-info analysis feature at all.
+#
+# Define WERROR=0 to disable treating any warnings as errors.
+#
+# Define NO_NEWT if you do not want TUI support. (deprecated)
+#
+# Define NO_SLANG if you do not want TUI support.
+#
+# Define NO_GTK2 if you do not want GTK+ GUI support.
+#
+# Define NO_DEMANGLE if you do not want C++ symbol demangling.
+#
+# Define NO_LIBELF if you do not want libelf dependency (e.g. cross-builds)
+#
+# Define NO_LIBUNWIND if you do not want libunwind dependency for dwarf
+# backtrace post unwind.
+#
+# Define NO_BACKTRACE if you do not want stack backtrace debug feature
+#
+# Define NO_LIBNUMA if you do not want numa perf benchmark
+#
+# Define NO_LIBAUDIT if you do not want libaudit support
+#
+# Define NO_LIBBIONIC if you do not want bionic support
+#
+# Define NO_LIBDW_DWARF_UNWIND if you do not want libdw support
+# for dwarf backtrace post unwind.
+
+ifeq ($(srctree),)
+srctree := $(patsubst %/,%,$(dir $(shell pwd)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+#$(info Determined 'srctree' to be $(srctree))
+endif
+
+ifneq ($(objtree),)
+#$(info Determined 'objtree' to be $(objtree))
+endif
+
+ifneq ($(OUTPUT),)
+#$(info Determined 'OUTPUT' to be $(OUTPUT))
+endif
+
+$(OUTPUT)PERF-VERSION-FILE: ../../.git/HEAD
+ @$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT)
+ @touch $(OUTPUT)PERF-VERSION-FILE
+
+CC = $(CROSS_COMPILE)gcc
+AR = $(CROSS_COMPILE)ar
+PKG_CONFIG = $(CROSS_COMPILE)pkg-config
+
+RM = rm -f
+LN = ln -f
+MKDIR = mkdir
+FIND = find
+INSTALL = install
+FLEX = flex
+BISON = bison
+STRIP = strip
+
+LIB_DIR = $(srctree)/tools/lib/api/
+TRACE_EVENT_DIR = $(srctree)/tools/lib/traceevent/
+
+# include config/Makefile by default and rule out
+# non-config cases
+config := 1
+
+NON_CONFIG_TARGETS := clean TAGS tags cscope help
+
+ifdef MAKECMDGOALS
+ifeq ($(filter-out $(NON_CONFIG_TARGETS),$(MAKECMDGOALS)),)
+ config := 0
+endif
+endif
+
+ifeq ($(config),1)
+include config/Makefile
+endif
+
+export prefix bindir sharedir sysconfdir DESTDIR
+
+# sparse is architecture-neutral, which means that we need to tell it
+# explicitly what architecture to check for. Fix this up for yours..
+SPARSE_FLAGS = -D__BIG_ENDIAN__ -D__powerpc__
+
+# Guard against environment variables
+BUILTIN_OBJS =
+LIB_H =
+LIB_OBJS =
+GTK_OBJS =
+PYRF_OBJS =
+SCRIPT_SH =
+
+SCRIPT_SH += perf-archive.sh
+
+grep-libs = $(filter -l%,$(1))
+strip-libs = $(filter-out -l%,$(1))
+
+ifneq ($(OUTPUT),)
+ TE_PATH=$(OUTPUT)
+ifneq ($(subdir),)
+ LIB_PATH=$(OUTPUT)/../lib/api/
+else
+ LIB_PATH=$(OUTPUT)
+endif
+else
+ TE_PATH=$(TRACE_EVENT_DIR)
+ LIB_PATH=$(LIB_DIR)
+endif
+
+LIBTRACEEVENT = $(TE_PATH)libtraceevent.a
+export LIBTRACEEVENT
+
+LIBAPIKFS = $(LIB_PATH)libapikfs.a
+export LIBAPIKFS
+
+# python extension build directories
+PYTHON_EXTBUILD := $(OUTPUT)python_ext_build/
+PYTHON_EXTBUILD_LIB := $(PYTHON_EXTBUILD)lib/
+PYTHON_EXTBUILD_TMP := $(PYTHON_EXTBUILD)tmp/
+export PYTHON_EXTBUILD_LIB PYTHON_EXTBUILD_TMP
+
+python-clean := $(call QUIET_CLEAN, python) $(RM) -r $(PYTHON_EXTBUILD) $(OUTPUT)python/perf.so
+
+PYTHON_EXT_SRCS := $(shell grep -v ^\# util/python-ext-sources)
+PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py $(LIBTRACEEVENT) $(LIBAPIKFS)
+
+$(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS)
+ $(QUIET_GEN)CFLAGS='$(CFLAGS)' $(PYTHON_WORD) util/setup.py \
+ --quiet build_ext; \
+ mkdir -p $(OUTPUT)python && \
+ cp $(PYTHON_EXTBUILD_LIB)perf.so $(OUTPUT)python/
+#
+# No Perl scripts right now:
+#
+
+SCRIPTS = $(patsubst %.sh,%,$(SCRIPT_SH))
+
+#
+# Single 'perf' binary right now:
+#
+PROGRAMS += $(OUTPUT)perf
+
+# what 'all' will build and 'install' will install, in perfexecdir
+ALL_PROGRAMS = $(PROGRAMS) $(SCRIPTS)
+
+# what 'all' will build but not install in perfexecdir
+OTHER_PROGRAMS = $(OUTPUT)perf
+
+# Set paths to tools early so that they can be used for version tests.
+ifndef SHELL_PATH
+ SHELL_PATH = /bin/sh
+endif
+ifndef PERL_PATH
+ PERL_PATH = /usr/bin/perl
+endif
+
+export PERL_PATH
+
+$(OUTPUT)util/parse-events-flex.c: util/parse-events.l $(OUTPUT)util/parse-events-bison.c
+ $(QUIET_FLEX)$(FLEX) -o $@ --header-file=$(OUTPUT)util/parse-events-flex.h $(PARSER_DEBUG_FLEX) util/parse-events.l
+
+$(OUTPUT)util/parse-events-bison.c: util/parse-events.y
+ $(QUIET_BISON)$(BISON) -v util/parse-events.y -d $(PARSER_DEBUG_BISON) -o $(OUTPUT)util/parse-events-bison.c -p parse_events_
+
+$(OUTPUT)util/pmu-flex.c: util/pmu.l $(OUTPUT)util/pmu-bison.c
+ $(QUIET_FLEX)$(FLEX) -o $@ --header-file=$(OUTPUT)util/pmu-flex.h util/pmu.l
+
+$(OUTPUT)util/pmu-bison.c: util/pmu.y
+ $(QUIET_BISON)$(BISON) -v util/pmu.y -d -o $(OUTPUT)util/pmu-bison.c -p perf_pmu_
+
+$(OUTPUT)util/parse-events.o: $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-bison.c
+$(OUTPUT)util/pmu.o: $(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-bison.c
+
+LIB_FILE=$(OUTPUT)libperf.a
+
+LIB_H += ../lib/symbol/kallsyms.h
+LIB_H += ../../include/uapi/linux/perf_event.h
+LIB_H += ../../include/linux/rbtree.h
+LIB_H += ../../include/linux/list.h
+LIB_H += ../../include/uapi/linux/const.h
+LIB_H += ../include/linux/hash.h
+LIB_H += ../../include/linux/stringify.h
+LIB_H += util/include/linux/bitmap.h
+LIB_H += util/include/linux/bitops.h
+LIB_H += ../include/linux/compiler.h
+LIB_H += util/include/linux/const.h
+LIB_H += util/include/linux/ctype.h
+LIB_H += util/include/linux/kernel.h
+LIB_H += util/include/linux/list.h
+LIB_H += ../include/linux/export.h
+LIB_H += util/include/linux/poison.h
+LIB_H += util/include/linux/rbtree.h
+LIB_H += util/include/linux/rbtree_augmented.h
+LIB_H += util/include/linux/string.h
+LIB_H += ../include/linux/types.h
+LIB_H += util/include/linux/linkage.h
+LIB_H += util/include/asm/asm-offsets.h
+LIB_H += ../include/asm/bug.h
+LIB_H += util/include/asm/byteorder.h
+LIB_H += util/include/asm/hweight.h
+LIB_H += util/include/asm/swab.h
+LIB_H += util/include/asm/system.h
+LIB_H += util/include/asm/uaccess.h
+LIB_H += util/include/dwarf-regs.h
+LIB_H += util/include/asm/dwarf2.h
+LIB_H += util/include/asm/cpufeature.h
+LIB_H += util/include/asm/unistd_32.h
+LIB_H += util/include/asm/unistd_64.h
+LIB_H += perf.h
+LIB_H += util/annotate.h
+LIB_H += util/cache.h
+LIB_H += util/callchain.h
+LIB_H += util/build-id.h
+LIB_H += util/debug.h
+LIB_H += util/pmu.h
+LIB_H += util/event.h
+LIB_H += util/evsel.h
+LIB_H += util/evlist.h
+LIB_H += util/exec_cmd.h
+LIB_H += util/levenshtein.h
+LIB_H += util/machine.h
+LIB_H += util/map.h
+LIB_H += util/parse-options.h
+LIB_H += util/parse-events.h
+LIB_H += util/quote.h
+LIB_H += util/util.h
+LIB_H += util/xyarray.h
+LIB_H += util/header.h
+LIB_H += util/help.h
+LIB_H += util/session.h
+LIB_H += util/strbuf.h
+LIB_H += util/strlist.h
+LIB_H += util/strfilter.h
+LIB_H += util/svghelper.h
+LIB_H += util/tool.h
+LIB_H += util/run-command.h
+LIB_H += util/sigchain.h
+LIB_H += util/dso.h
+LIB_H += util/symbol.h
+LIB_H += util/color.h
+LIB_H += util/values.h
+LIB_H += util/sort.h
+LIB_H += util/hist.h
+LIB_H += util/comm.h
+LIB_H += util/thread.h
+LIB_H += util/thread_map.h
+LIB_H += util/trace-event.h
+LIB_H += util/probe-finder.h
+LIB_H += util/dwarf-aux.h
+LIB_H += util/probe-event.h
+LIB_H += util/pstack.h
+LIB_H += util/cpumap.h
+LIB_H += util/top.h
+LIB_H += $(ARCH_INCLUDE)
+LIB_H += util/cgroup.h
+LIB_H += $(LIB_INCLUDE)traceevent/event-parse.h
+LIB_H += util/target.h
+LIB_H += util/rblist.h
+LIB_H += util/intlist.h
+LIB_H += util/perf_regs.h
+LIB_H += util/unwind.h
+LIB_H += util/vdso.h
+LIB_H += ui/helpline.h
+LIB_H += ui/progress.h
+LIB_H += ui/util.h
+LIB_H += ui/ui.h
+LIB_H += util/data.h
+
+LIB_OBJS += $(OUTPUT)util/abspath.o
+LIB_OBJS += $(OUTPUT)util/alias.o
+LIB_OBJS += $(OUTPUT)util/annotate.o
+LIB_OBJS += $(OUTPUT)util/build-id.o
+LIB_OBJS += $(OUTPUT)util/config.o
+LIB_OBJS += $(OUTPUT)util/ctype.o
+LIB_OBJS += $(OUTPUT)util/pmu.o
+LIB_OBJS += $(OUTPUT)util/environment.o
+LIB_OBJS += $(OUTPUT)util/event.o
+LIB_OBJS += $(OUTPUT)util/evlist.o
+LIB_OBJS += $(OUTPUT)util/evsel.o
+LIB_OBJS += $(OUTPUT)util/exec_cmd.o
+LIB_OBJS += $(OUTPUT)util/help.o
+LIB_OBJS += $(OUTPUT)util/kallsyms.o
+LIB_OBJS += $(OUTPUT)util/levenshtein.o
+LIB_OBJS += $(OUTPUT)util/parse-options.o
+LIB_OBJS += $(OUTPUT)util/parse-events.o
+LIB_OBJS += $(OUTPUT)util/path.o
+LIB_OBJS += $(OUTPUT)util/rbtree.o
+LIB_OBJS += $(OUTPUT)util/bitmap.o
+LIB_OBJS += $(OUTPUT)util/hweight.o
+LIB_OBJS += $(OUTPUT)util/run-command.o
+LIB_OBJS += $(OUTPUT)util/quote.o
+LIB_OBJS += $(OUTPUT)util/strbuf.o
+LIB_OBJS += $(OUTPUT)util/string.o
+LIB_OBJS += $(OUTPUT)util/strlist.o
+LIB_OBJS += $(OUTPUT)util/strfilter.o
+LIB_OBJS += $(OUTPUT)util/top.o
+LIB_OBJS += $(OUTPUT)util/usage.o
+LIB_OBJS += $(OUTPUT)util/wrapper.o
+LIB_OBJS += $(OUTPUT)util/sigchain.o
+LIB_OBJS += $(OUTPUT)util/dso.o
+LIB_OBJS += $(OUTPUT)util/symbol.o
+LIB_OBJS += $(OUTPUT)util/symbol-elf.o
+LIB_OBJS += $(OUTPUT)util/color.o
+LIB_OBJS += $(OUTPUT)util/pager.o
+LIB_OBJS += $(OUTPUT)util/header.o
+LIB_OBJS += $(OUTPUT)util/callchain.o
+LIB_OBJS += $(OUTPUT)util/values.o
+LIB_OBJS += $(OUTPUT)util/debug.o
+LIB_OBJS += $(OUTPUT)util/machine.o
+LIB_OBJS += $(OUTPUT)util/map.o
+LIB_OBJS += $(OUTPUT)util/pstack.o
+LIB_OBJS += $(OUTPUT)util/session.o
+LIB_OBJS += $(OUTPUT)util/comm.o
+LIB_OBJS += $(OUTPUT)util/thread.o
+LIB_OBJS += $(OUTPUT)util/thread_map.o
+LIB_OBJS += $(OUTPUT)util/trace-event-parse.o
+LIB_OBJS += $(OUTPUT)util/parse-events-flex.o
+LIB_OBJS += $(OUTPUT)util/parse-events-bison.o
+LIB_OBJS += $(OUTPUT)util/pmu-flex.o
+LIB_OBJS += $(OUTPUT)util/pmu-bison.o
+LIB_OBJS += $(OUTPUT)util/trace-event-read.o
+LIB_OBJS += $(OUTPUT)util/trace-event-info.o
+LIB_OBJS += $(OUTPUT)util/trace-event-scripting.o
+LIB_OBJS += $(OUTPUT)util/trace-event.o
+LIB_OBJS += $(OUTPUT)util/svghelper.o
+LIB_OBJS += $(OUTPUT)util/sort.o
+LIB_OBJS += $(OUTPUT)util/hist.o
+LIB_OBJS += $(OUTPUT)util/probe-event.o
+LIB_OBJS += $(OUTPUT)util/util.o
+LIB_OBJS += $(OUTPUT)util/xyarray.o
+LIB_OBJS += $(OUTPUT)util/cpumap.o
+LIB_OBJS += $(OUTPUT)util/cgroup.o
+LIB_OBJS += $(OUTPUT)util/target.o
+LIB_OBJS += $(OUTPUT)util/rblist.o
+LIB_OBJS += $(OUTPUT)util/intlist.o
+LIB_OBJS += $(OUTPUT)util/vdso.o
+LIB_OBJS += $(OUTPUT)util/stat.o
+LIB_OBJS += $(OUTPUT)util/record.o
+LIB_OBJS += $(OUTPUT)util/srcline.o
+LIB_OBJS += $(OUTPUT)util/data.o
+
+LIB_OBJS += $(OUTPUT)ui/setup.o
+LIB_OBJS += $(OUTPUT)ui/helpline.o
+LIB_OBJS += $(OUTPUT)ui/progress.o
+LIB_OBJS += $(OUTPUT)ui/util.o
+LIB_OBJS += $(OUTPUT)ui/hist.o
+LIB_OBJS += $(OUTPUT)ui/stdio/hist.o
+
+LIB_OBJS += $(OUTPUT)arch/common.o
+
+LIB_OBJS += $(OUTPUT)tests/parse-events.o
+LIB_OBJS += $(OUTPUT)tests/dso-data.o
+LIB_OBJS += $(OUTPUT)tests/attr.o
+LIB_OBJS += $(OUTPUT)tests/vmlinux-kallsyms.o
+LIB_OBJS += $(OUTPUT)tests/open-syscall.o
+LIB_OBJS += $(OUTPUT)tests/open-syscall-all-cpus.o
+LIB_OBJS += $(OUTPUT)tests/open-syscall-tp-fields.o
+LIB_OBJS += $(OUTPUT)tests/mmap-basic.o
+LIB_OBJS += $(OUTPUT)tests/perf-record.o
+LIB_OBJS += $(OUTPUT)tests/rdpmc.o
+LIB_OBJS += $(OUTPUT)tests/evsel-roundtrip-name.o
+LIB_OBJS += $(OUTPUT)tests/evsel-tp-sched.o
+LIB_OBJS += $(OUTPUT)tests/pmu.o
+LIB_OBJS += $(OUTPUT)tests/hists_common.o
+LIB_OBJS += $(OUTPUT)tests/hists_link.o
+LIB_OBJS += $(OUTPUT)tests/hists_filter.o
+LIB_OBJS += $(OUTPUT)tests/hists_output.o
+LIB_OBJS += $(OUTPUT)tests/hists_cumulate.o
+LIB_OBJS += $(OUTPUT)tests/python-use.o
+LIB_OBJS += $(OUTPUT)tests/bp_signal.o
+LIB_OBJS += $(OUTPUT)tests/bp_signal_overflow.o
+LIB_OBJS += $(OUTPUT)tests/task-exit.o
+LIB_OBJS += $(OUTPUT)tests/sw-clock.o
+ifeq ($(ARCH),x86)
+LIB_OBJS += $(OUTPUT)tests/perf-time-to-tsc.o
+endif
+LIB_OBJS += $(OUTPUT)tests/code-reading.o
+LIB_OBJS += $(OUTPUT)tests/sample-parsing.o
+LIB_OBJS += $(OUTPUT)tests/parse-no-sample-id-all.o
+ifndef NO_DWARF_UNWIND
+ifeq ($(ARCH),$(filter $(ARCH),x86 arm))
+LIB_OBJS += $(OUTPUT)tests/dwarf-unwind.o
+endif
+endif
+LIB_OBJS += $(OUTPUT)tests/mmap-thread-lookup.o
+LIB_OBJS += $(OUTPUT)tests/thread-mg-share.o
+
+BUILTIN_OBJS += $(OUTPUT)builtin-annotate.o
+BUILTIN_OBJS += $(OUTPUT)builtin-bench.o
+# Benchmark modules
+BUILTIN_OBJS += $(OUTPUT)bench/sched-messaging.o
+BUILTIN_OBJS += $(OUTPUT)bench/sched-pipe.o
+ifeq ($(RAW_ARCH),x86_64)
+BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy-x86-64-asm.o
+BUILTIN_OBJS += $(OUTPUT)bench/mem-memset-x86-64-asm.o
+endif
+BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o
+BUILTIN_OBJS += $(OUTPUT)bench/mem-memset.o
+BUILTIN_OBJS += $(OUTPUT)bench/futex-hash.o
+BUILTIN_OBJS += $(OUTPUT)bench/futex-wake.o
+BUILTIN_OBJS += $(OUTPUT)bench/futex-requeue.o
+
+BUILTIN_OBJS += $(OUTPUT)builtin-diff.o
+BUILTIN_OBJS += $(OUTPUT)builtin-evlist.o
+BUILTIN_OBJS += $(OUTPUT)builtin-help.o
+BUILTIN_OBJS += $(OUTPUT)builtin-sched.o
+BUILTIN_OBJS += $(OUTPUT)builtin-buildid-list.o
+BUILTIN_OBJS += $(OUTPUT)builtin-buildid-cache.o
+BUILTIN_OBJS += $(OUTPUT)builtin-list.o
+BUILTIN_OBJS += $(OUTPUT)builtin-record.o
+BUILTIN_OBJS += $(OUTPUT)builtin-report.o
+BUILTIN_OBJS += $(OUTPUT)builtin-stat.o
+BUILTIN_OBJS += $(OUTPUT)builtin-timechart.o
+BUILTIN_OBJS += $(OUTPUT)builtin-top.o
+BUILTIN_OBJS += $(OUTPUT)builtin-script.o
+BUILTIN_OBJS += $(OUTPUT)builtin-probe.o
+BUILTIN_OBJS += $(OUTPUT)builtin-kmem.o
+BUILTIN_OBJS += $(OUTPUT)builtin-lock.o
+BUILTIN_OBJS += $(OUTPUT)builtin-kvm.o
+BUILTIN_OBJS += $(OUTPUT)builtin-inject.o
+BUILTIN_OBJS += $(OUTPUT)tests/builtin-test.o
+BUILTIN_OBJS += $(OUTPUT)builtin-mem.o
+
+PERFLIBS = $(LIB_FILE) $(LIBAPIKFS) $(LIBTRACEEVENT)
+
+# We choose to avoid "if .. else if .. else .. endif endif"
+# because maintaining the nesting to match is a pain. If
+# we had "elif" things would have been much nicer...
+
+-include arch/$(ARCH)/Makefile
+
+ifneq ($(OUTPUT),)
+ CFLAGS += -I$(OUTPUT)
+endif
+
+ifdef NO_LIBELF
+EXTLIBS := $(filter-out -lelf,$(EXTLIBS))
+
+# Remove ELF/DWARF dependent codes
+LIB_OBJS := $(filter-out $(OUTPUT)util/symbol-elf.o,$(LIB_OBJS))
+LIB_OBJS := $(filter-out $(OUTPUT)util/dwarf-aux.o,$(LIB_OBJS))
+LIB_OBJS := $(filter-out $(OUTPUT)util/probe-event.o,$(LIB_OBJS))
+LIB_OBJS := $(filter-out $(OUTPUT)util/probe-finder.o,$(LIB_OBJS))
+
+BUILTIN_OBJS := $(filter-out $(OUTPUT)builtin-probe.o,$(BUILTIN_OBJS))
+
+# Use minimal symbol handling
+LIB_OBJS += $(OUTPUT)util/symbol-minimal.o
+
+else # NO_LIBELF
+ifndef NO_DWARF
+ LIB_OBJS += $(OUTPUT)util/probe-finder.o
+ LIB_OBJS += $(OUTPUT)util/dwarf-aux.o
+endif # NO_DWARF
+endif # NO_LIBELF
+
+ifndef NO_LIBDW_DWARF_UNWIND
+ LIB_OBJS += $(OUTPUT)util/unwind-libdw.o
+ LIB_H += util/unwind-libdw.h
+endif
+
+ifndef NO_LIBUNWIND
+ LIB_OBJS += $(OUTPUT)util/unwind-libunwind.o
+endif
+LIB_OBJS += $(OUTPUT)tests/keep-tracking.o
+
+ifndef NO_LIBAUDIT
+ BUILTIN_OBJS += $(OUTPUT)builtin-trace.o
+endif
+
+ifndef NO_SLANG
+ LIB_OBJS += $(OUTPUT)ui/browser.o
+ LIB_OBJS += $(OUTPUT)ui/browsers/annotate.o
+ LIB_OBJS += $(OUTPUT)ui/browsers/hists.o
+ LIB_OBJS += $(OUTPUT)ui/browsers/map.o
+ LIB_OBJS += $(OUTPUT)ui/browsers/scripts.o
+ LIB_OBJS += $(OUTPUT)ui/browsers/header.o
+ LIB_OBJS += $(OUTPUT)ui/tui/setup.o
+ LIB_OBJS += $(OUTPUT)ui/tui/util.o
+ LIB_OBJS += $(OUTPUT)ui/tui/helpline.o
+ LIB_OBJS += $(OUTPUT)ui/tui/progress.o
+ LIB_H += ui/tui/tui.h
+ LIB_H += ui/browser.h
+ LIB_H += ui/browsers/map.h
+ LIB_H += ui/keysyms.h
+ LIB_H += ui/libslang.h
+endif
+
+ifndef NO_GTK2
+ ALL_PROGRAMS += $(OUTPUT)libperf-gtk.so
+
+ GTK_OBJS += $(OUTPUT)ui/gtk/browser.o
+ GTK_OBJS += $(OUTPUT)ui/gtk/hists.o
+ GTK_OBJS += $(OUTPUT)ui/gtk/setup.o
+ GTK_OBJS += $(OUTPUT)ui/gtk/util.o
+ GTK_OBJS += $(OUTPUT)ui/gtk/helpline.o
+ GTK_OBJS += $(OUTPUT)ui/gtk/progress.o
+ GTK_OBJS += $(OUTPUT)ui/gtk/annotate.o
+
+install-gtk: $(OUTPUT)libperf-gtk.so
+ $(call QUIET_INSTALL, 'GTK UI') \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(libdir_SQ)'; \
+ $(INSTALL) $(OUTPUT)libperf-gtk.so '$(DESTDIR_SQ)$(libdir_SQ)'
+endif
+
+ifndef NO_LIBPERL
+ LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-perl.o
+ LIB_OBJS += $(OUTPUT)scripts/perl/Perf-Trace-Util/Context.o
+endif
+
+ifndef NO_LIBPYTHON
+ LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-python.o
+ LIB_OBJS += $(OUTPUT)scripts/python/Perf-Trace-Util/Context.o
+endif
+
+ifeq ($(NO_PERF_REGS),0)
+ ifeq ($(ARCH),x86)
+ LIB_H += arch/x86/include/perf_regs.h
+ endif
+ LIB_OBJS += $(OUTPUT)util/perf_regs.o
+endif
+
+ifndef NO_LIBNUMA
+ BUILTIN_OBJS += $(OUTPUT)bench/numa.o
+endif
+
+ifdef ASCIIDOC8
+ export ASCIIDOC8
+endif
+
+LIBS = -Wl,--whole-archive $(PERFLIBS) -Wl,--no-whole-archive -Wl,--start-group $(EXTLIBS) -Wl,--end-group
+
+export INSTALL SHELL_PATH
+
+### Build rules
+
+SHELL = $(SHELL_PATH)
+
+all: shell_compatibility_test $(ALL_PROGRAMS) $(LANG_BINDINGS) $(OTHER_PROGRAMS)
+
+please_set_SHELL_PATH_to_a_more_modern_shell:
+ @$$(:)
+
+shell_compatibility_test: please_set_SHELL_PATH_to_a_more_modern_shell
+
+strip: $(PROGRAMS) $(OUTPUT)perf
+ $(STRIP) $(STRIP_OPTS) $(PROGRAMS) $(OUTPUT)perf
+
+$(OUTPUT)perf.o: perf.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -include $(OUTPUT)PERF-VERSION-FILE \
+ '-DPERF_HTML_PATH="$(htmldir_SQ)"' \
+ $(CFLAGS) -c $(filter %.c,$^) -o $@
+
+$(OUTPUT)perf: $(OUTPUT)perf.o $(BUILTIN_OBJS) $(PERFLIBS)
+ $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(OUTPUT)perf.o \
+ $(BUILTIN_OBJS) $(LIBS) -o $@
+
+$(GTK_OBJS): $(OUTPUT)%.o: %.c $(LIB_H)
+ $(QUIET_CC)$(CC) -o $@ -c -fPIC $(CFLAGS) $(GTK_CFLAGS) $<
+
+$(OUTPUT)libperf-gtk.so: $(GTK_OBJS) $(PERFLIBS)
+ $(QUIET_LINK)$(CC) -o $@ -shared $(LDFLAGS) $(filter %.o,$^) $(GTK_LIBS)
+
+$(OUTPUT)builtin-help.o: builtin-help.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \
+ '-DPERF_HTML_PATH="$(htmldir_SQ)"' \
+ '-DPERF_MAN_PATH="$(mandir_SQ)"' \
+ '-DPERF_INFO_PATH="$(infodir_SQ)"' $<
+
+$(OUTPUT)builtin-timechart.o: builtin-timechart.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \
+ '-DPERF_HTML_PATH="$(htmldir_SQ)"' \
+ '-DPERF_MAN_PATH="$(mandir_SQ)"' \
+ '-DPERF_INFO_PATH="$(infodir_SQ)"' $<
+
+$(OUTPUT)common-cmds.h: util/generate-cmdlist.sh command-list.txt
+
+$(OUTPUT)common-cmds.h: $(wildcard Documentation/perf-*.txt)
+ $(QUIET_GEN). util/generate-cmdlist.sh > $@+ && mv $@+ $@
+
+$(SCRIPTS) : % : %.sh
+ $(QUIET_GEN)$(INSTALL) '$@.sh' '$(OUTPUT)$@'
+
+# These can record PERF_VERSION
+$(OUTPUT)perf.o perf.spec \
+ $(SCRIPTS) \
+ : $(OUTPUT)PERF-VERSION-FILE
+
+.SUFFIXES:
+
+#
+# If a target does not match any of the later rules then prefix it by $(OUTPUT)
+# This makes targets like 'make O=/tmp/perf perf.o' work in a natural way.
+#
+ifneq ($(OUTPUT),)
+%.o: $(OUTPUT)%.o
+ @echo " # Redirected target $@ => $(OUTPUT)$@"
+util/%.o: $(OUTPUT)util/%.o
+ @echo " # Redirected target $@ => $(OUTPUT)$@"
+bench/%.o: $(OUTPUT)bench/%.o
+ @echo " # Redirected target $@ => $(OUTPUT)$@"
+tests/%.o: $(OUTPUT)tests/%.o
+ @echo " # Redirected target $@ => $(OUTPUT)$@"
+endif
+
+# These two need to be here so that when O= is not used they take precedence
+# over the general rule for .o
+
+$(OUTPUT)util/%-flex.o: $(OUTPUT)util/%-flex.c $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -c -Iutil/ $(CFLAGS) -w $<
+
+$(OUTPUT)util/%-bison.o: $(OUTPUT)util/%-bison.c $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -c -Iutil/ $(CFLAGS) -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w $<
+
+$(OUTPUT)%.o: %.c $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $<
+$(OUTPUT)%.i: %.c $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -E $(CFLAGS) $<
+$(OUTPUT)%.s: %.c $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -S $(CFLAGS) $<
+$(OUTPUT)%.o: %.S
+ $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $<
+$(OUTPUT)%.s: %.S
+ $(QUIET_CC)$(CC) -o $@ -E $(CFLAGS) $<
+
+$(OUTPUT)util/exec_cmd.o: util/exec_cmd.c $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \
+ '-DPERF_EXEC_PATH="$(perfexecdir_SQ)"' \
+ '-DPREFIX="$(prefix_SQ)"' \
+ $<
+
+$(OUTPUT)tests/attr.o: tests/attr.c $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \
+ '-DBINDIR="$(bindir_SQ)"' -DPYTHON='"$(PYTHON_WORD)"' \
+ $<
+
+$(OUTPUT)tests/python-use.o: tests/python-use.c $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \
+ -DPYTHONPATH='"$(OUTPUT)python"' \
+ -DPYTHON='"$(PYTHON_WORD)"' \
+ $<
+
+$(OUTPUT)tests/dwarf-unwind.o: tests/dwarf-unwind.c
+ $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -fno-optimize-sibling-calls $<
+
+$(OUTPUT)util/config.o: util/config.c $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $<
+
+$(OUTPUT)ui/setup.o: ui/setup.c $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DLIBDIR='"$(libdir_SQ)"' $<
+
+$(OUTPUT)ui/browser.o: ui/browser.c $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $<
+
+$(OUTPUT)ui/browsers/annotate.o: ui/browsers/annotate.c $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $<
+
+$(OUTPUT)ui/browsers/hists.o: ui/browsers/hists.c $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $<
+
+$(OUTPUT)ui/browsers/map.o: ui/browsers/map.c $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $<
+
+$(OUTPUT)ui/browsers/scripts.o: ui/browsers/scripts.c $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $<
+
+$(OUTPUT)util/kallsyms.o: ../lib/symbol/kallsyms.c $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $<
+
+$(OUTPUT)util/rbtree.o: ../../lib/rbtree.c $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -Wno-unused-parameter -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $<
+
+$(OUTPUT)util/parse-events.o: util/parse-events.c $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -Wno-redundant-decls $<
+
+$(OUTPUT)util/scripting-engines/trace-event-perl.o: util/scripting-engines/trace-event-perl.c $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-undef -Wno-switch-default $<
+
+$(OUTPUT)scripts/perl/Perf-Trace-Util/Context.o: scripts/perl/Perf-Trace-Util/Context.c $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs -Wno-undef -Wno-switch-default $<
+
+$(OUTPUT)util/scripting-engines/trace-event-python.o: util/scripting-engines/trace-event-python.c $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow $<
+
+$(OUTPUT)scripts/python/Perf-Trace-Util/Context.o: scripts/python/Perf-Trace-Util/Context.c $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs $<
+
+$(OUTPUT)perf-%: %.o $(PERFLIBS)
+ $(QUIET_LINK)$(CC) $(CFLAGS) -o $@ $(LDFLAGS) $(filter %.o,$^) $(LIBS)
+
+$(LIB_OBJS) $(BUILTIN_OBJS): $(LIB_H)
+$(patsubst perf-%,%.o,$(PROGRAMS)): $(LIB_H) $(wildcard */*.h)
+
+# we compile into subdirectories. if the target directory is not the source directory, they might not exists. So
+# we depend the various files onto their directories.
+DIRECTORY_DEPS = $(LIB_OBJS) $(BUILTIN_OBJS) $(GTK_OBJS)
+DIRECTORY_DEPS += $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h
+# no need to add flex objects, because they depend on bison ones
+DIRECTORY_DEPS += $(OUTPUT)util/parse-events-bison.c
+DIRECTORY_DEPS += $(OUTPUT)util/pmu-bison.c
+
+OUTPUT_DIRECTORIES := $(sort $(dir $(DIRECTORY_DEPS)))
+
+$(DIRECTORY_DEPS): | $(OUTPUT_DIRECTORIES)
+# In the second step, we make a rule to actually create these directories
+$(OUTPUT_DIRECTORIES):
+ $(QUIET_MKDIR)$(MKDIR) -p $@ 2>/dev/null
+
+$(LIB_FILE): $(LIB_OBJS)
+ $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIB_OBJS)
+
+# libtraceevent.a
+TE_SOURCES = $(wildcard $(TRACE_EVENT_DIR)*.[ch])
+
+LIBTRACEEVENT_FLAGS = $(QUIET_SUBDIR1) O=$(OUTPUT)
+LIBTRACEEVENT_FLAGS += CFLAGS="-g -Wall $(EXTRA_CFLAGS)"
+LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ)
+
+$(LIBTRACEEVENT): $(TE_SOURCES) $(OUTPUT)PERF-CFLAGS
+ $(QUIET_SUBDIR0)$(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) libtraceevent.a plugins
+
+$(LIBTRACEEVENT)-clean:
+ $(call QUIET_CLEAN, libtraceevent)
+ @$(MAKE) -C $(TRACE_EVENT_DIR) O=$(OUTPUT) clean >/dev/null
+
+install-traceevent-plugins: $(LIBTRACEEVENT)
+ $(QUIET_SUBDIR0)$(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) install_plugins
+
+LIBAPIKFS_SOURCES = $(wildcard $(LIB_PATH)fs/*.[ch])
+
+# if subdir is set, we've been called from above so target has been built
+# already
+$(LIBAPIKFS): $(LIBAPIKFS_SOURCES)
+ifeq ($(subdir),)
+ $(QUIET_SUBDIR0)$(LIB_DIR) $(QUIET_SUBDIR1) O=$(OUTPUT) libapikfs.a
+endif
+
+$(LIBAPIKFS)-clean:
+ifeq ($(subdir),)
+ $(call QUIET_CLEAN, libapikfs)
+ @$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null
+endif
+
+help:
+ @echo 'Perf make targets:'
+ @echo ' doc - make *all* documentation (see below)'
+ @echo ' man - make manpage documentation (access with man <foo>)'
+ @echo ' html - make html documentation'
+ @echo ' info - make GNU info documentation (access with info <foo>)'
+ @echo ' pdf - make pdf documentation'
+ @echo ' TAGS - use etags to make tag information for source browsing'
+ @echo ' tags - use ctags to make tag information for source browsing'
+ @echo ' cscope - use cscope to make interactive browsing database'
+ @echo ''
+ @echo 'Perf install targets:'
+ @echo ' NOTE: documentation build requires asciidoc, xmlto packages to be installed'
+ @echo ' HINT: use "prefix" or "DESTDIR" to install to a particular'
+ @echo ' path like "make prefix=/usr/local install install-doc"'
+ @echo ' install - install compiled binaries'
+ @echo ' install-doc - install *all* documentation'
+ @echo ' install-man - install manpage documentation'
+ @echo ' install-html - install html documentation'
+ @echo ' install-info - install GNU info documentation'
+ @echo ' install-pdf - install pdf documentation'
+ @echo ''
+ @echo ' quick-install-doc - alias for quick-install-man'
+ @echo ' quick-install-man - install the documentation quickly'
+ @echo ' quick-install-html - install the html documentation quickly'
+ @echo ''
+ @echo 'Perf maintainer targets:'
+ @echo ' clean - clean all binary objects and build output'
+
+
+DOC_TARGETS := doc man html info pdf
+
+INSTALL_DOC_TARGETS := $(patsubst %,install-%,$(DOC_TARGETS)) try-install-man
+INSTALL_DOC_TARGETS += quick-install-doc quick-install-man quick-install-html
+
+# 'make doc' should call 'make -C Documentation all'
+$(DOC_TARGETS):
+ $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) $(@:doc=all)
+
+TAG_FOLDERS= . ../lib/traceevent ../lib/api ../lib/symbol
+TAG_FILES= ../../include/uapi/linux/perf_event.h
+
+TAGS:
+ $(QUIET_GEN)$(RM) TAGS; \
+ $(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print | xargs etags -a $(TAG_FILES)
+
+tags:
+ $(QUIET_GEN)$(RM) tags; \
+ $(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print | xargs ctags -a $(TAG_FILES)
+
+cscope:
+ $(QUIET_GEN)$(RM) cscope*; \
+ $(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print | xargs cscope -b $(TAG_FILES)
+
+### Detect prefix changes
+TRACK_CFLAGS = $(subst ','\'',$(CFLAGS)):\
+ $(bindir_SQ):$(perfexecdir_SQ):$(template_dir_SQ):$(prefix_SQ):$(plugindir_SQ)
+
+$(OUTPUT)PERF-CFLAGS: .FORCE-PERF-CFLAGS
+ @FLAGS='$(TRACK_CFLAGS)'; \
+ if test x"$$FLAGS" != x"`cat $(OUTPUT)PERF-CFLAGS 2>/dev/null`" ; then \
+ echo 1>&2 " FLAGS: * new build flags or prefix"; \
+ echo "$$FLAGS" >$(OUTPUT)PERF-CFLAGS; \
+ fi
+
+### Testing rules
+
+# GNU make supports exporting all variables by "export" without parameters.
+# However, the environment gets quite big, and some programs have problems
+# with that.
+
+check: $(OUTPUT)common-cmds.h
+ if sparse; \
+ then \
+ for i in *.c */*.c; \
+ do \
+ sparse $(CFLAGS) $(SPARSE_FLAGS) $$i || exit; \
+ done; \
+ else \
+ exit 1; \
+ fi
+
+### Installation rules
+
+install-gtk:
+
+install-bin: all install-gtk
+ $(call QUIET_INSTALL, binaries) \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)'; \
+ $(INSTALL) $(OUTPUT)perf '$(DESTDIR_SQ)$(bindir_SQ)'; \
+ $(LN) '$(DESTDIR_SQ)$(bindir_SQ)/perf' '$(DESTDIR_SQ)$(bindir_SQ)/trace'
+ $(call QUIET_INSTALL, libexec) \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
+ $(call QUIET_INSTALL, perf-archive) \
+ $(INSTALL) $(OUTPUT)perf-archive -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
+ifndef NO_LIBPERL
+ $(call QUIET_INSTALL, perl-scripts) \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'; \
+ $(INSTALL) scripts/perl/Perf-Trace-Util/lib/Perf/Trace/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'; \
+ $(INSTALL) scripts/perl/*.pl -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl'; \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin'; \
+ $(INSTALL) scripts/perl/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin'
+endif
+ifndef NO_LIBPYTHON
+ $(call QUIET_INSTALL, python-scripts) \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/Perf-Trace-Util/lib/Perf/Trace'; \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin'; \
+ $(INSTALL) scripts/python/Perf-Trace-Util/lib/Perf/Trace/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/Perf-Trace-Util/lib/Perf/Trace'; \
+ $(INSTALL) scripts/python/*.py -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python'; \
+ $(INSTALL) scripts/python/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin'
+endif
+ $(call QUIET_INSTALL, perf_completion-script) \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d'; \
+ $(INSTALL) perf-completion.sh '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d/perf'
+ $(call QUIET_INSTALL, tests) \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'; \
+ $(INSTALL) tests/attr.py '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'; \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'; \
+ $(INSTALL) tests/attr/* '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'
+
+install: install-bin try-install-man install-traceevent-plugins
+
+install-python_ext:
+ $(PYTHON_WORD) util/setup.py --quiet install --root='/$(DESTDIR_SQ)'
+
+# 'make install-doc' should call 'make -C Documentation install'
+$(INSTALL_DOC_TARGETS):
+ $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) $(@:-doc=)
+
+### Cleaning rules
+
+#
+# This is here, not in config/Makefile, because config/Makefile does
+# not get included for the clean target:
+#
+config-clean:
+ $(call QUIET_CLEAN, config)
+ @$(MAKE) -C config/feature-checks clean >/dev/null
+
+clean: $(LIBTRACEEVENT)-clean $(LIBAPIKFS)-clean config-clean
+ $(call QUIET_CLEAN, core-objs) $(RM) $(LIB_OBJS) $(BUILTIN_OBJS) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf.o $(LANG_BINDINGS) $(GTK_OBJS)
+ $(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf
+ $(call QUIET_CLEAN, core-gen) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)PERF-CFLAGS $(OUTPUT)PERF-FEATURES $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex*
+ $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean
+ $(python-clean)
+
+#
+# Trick: if ../../.git does not exist - we are building out of tree for example,
+# then force version regeneration:
+#
+ifeq ($(wildcard ../../.git/HEAD),)
+ GIT-HEAD-PHONY = ../../.git/HEAD
+else
+ GIT-HEAD-PHONY =
+endif
+
+.PHONY: all install clean config-clean strip install-gtk
+.PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell
+.PHONY: $(GIT-HEAD-PHONY) TAGS tags cscope .FORCE-PERF-CFLAGS
+
diff --git a/tools/perf/arch/arm/Makefile b/tools/perf/arch/arm/Makefile
index 15130b50dfe..09d62153d38 100644
--- a/tools/perf/arch/arm/Makefile
+++ b/tools/perf/arch/arm/Makefile
@@ -2,3 +2,13 @@ ifndef NO_DWARF
PERF_HAVE_DWARF_REGS := 1
LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o
endif
+ifndef NO_LIBUNWIND
+LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libunwind.o
+endif
+ifndef NO_LIBDW_DWARF_UNWIND
+LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libdw.o
+endif
+ifndef NO_DWARF_UNWIND
+LIB_OBJS += $(OUTPUT)arch/$(ARCH)/tests/regs_load.o
+LIB_OBJS += $(OUTPUT)arch/$(ARCH)/tests/dwarf-unwind.o
+endif
diff --git a/tools/perf/arch/arm/include/perf_regs.h b/tools/perf/arch/arm/include/perf_regs.h
new file mode 100644
index 00000000000..f619c9c5a4b
--- /dev/null
+++ b/tools/perf/arch/arm/include/perf_regs.h
@@ -0,0 +1,59 @@
+#ifndef ARCH_PERF_REGS_H
+#define ARCH_PERF_REGS_H
+
+#include <stdlib.h>
+#include <linux/types.h>
+#include <asm/perf_regs.h>
+
+void perf_regs_load(u64 *regs);
+
+#define PERF_REGS_MASK ((1ULL << PERF_REG_ARM_MAX) - 1)
+#define PERF_REGS_MAX PERF_REG_ARM_MAX
+#define PERF_SAMPLE_REGS_ABI PERF_SAMPLE_REGS_ABI_32
+
+#define PERF_REG_IP PERF_REG_ARM_PC
+#define PERF_REG_SP PERF_REG_ARM_SP
+
+static inline const char *perf_reg_name(int id)
+{
+ switch (id) {
+ case PERF_REG_ARM_R0:
+ return "r0";
+ case PERF_REG_ARM_R1:
+ return "r1";
+ case PERF_REG_ARM_R2:
+ return "r2";
+ case PERF_REG_ARM_R3:
+ return "r3";
+ case PERF_REG_ARM_R4:
+ return "r4";
+ case PERF_REG_ARM_R5:
+ return "r5";
+ case PERF_REG_ARM_R6:
+ return "r6";
+ case PERF_REG_ARM_R7:
+ return "r7";
+ case PERF_REG_ARM_R8:
+ return "r8";
+ case PERF_REG_ARM_R9:
+ return "r9";
+ case PERF_REG_ARM_R10:
+ return "r10";
+ case PERF_REG_ARM_FP:
+ return "fp";
+ case PERF_REG_ARM_IP:
+ return "ip";
+ case PERF_REG_ARM_SP:
+ return "sp";
+ case PERF_REG_ARM_LR:
+ return "lr";
+ case PERF_REG_ARM_PC:
+ return "pc";
+ default:
+ return NULL;
+ }
+
+ return NULL;
+}
+
+#endif /* ARCH_PERF_REGS_H */
diff --git a/tools/perf/arch/arm/tests/dwarf-unwind.c b/tools/perf/arch/arm/tests/dwarf-unwind.c
new file mode 100644
index 00000000000..9f870d27cb3
--- /dev/null
+++ b/tools/perf/arch/arm/tests/dwarf-unwind.c
@@ -0,0 +1,60 @@
+#include <string.h>
+#include "perf_regs.h"
+#include "thread.h"
+#include "map.h"
+#include "event.h"
+#include "tests/tests.h"
+
+#define STACK_SIZE 8192
+
+static int sample_ustack(struct perf_sample *sample,
+ struct thread *thread, u64 *regs)
+{
+ struct stack_dump *stack = &sample->user_stack;
+ struct map *map;
+ unsigned long sp;
+ u64 stack_size, *buf;
+
+ buf = malloc(STACK_SIZE);
+ if (!buf) {
+ pr_debug("failed to allocate sample uregs data\n");
+ return -1;
+ }
+
+ sp = (unsigned long) regs[PERF_REG_ARM_SP];
+
+ map = map_groups__find(thread->mg, MAP__VARIABLE, (u64) sp);
+ if (!map) {
+ pr_debug("failed to get stack map\n");
+ free(buf);
+ return -1;
+ }
+
+ stack_size = map->end - sp;
+ stack_size = stack_size > STACK_SIZE ? STACK_SIZE : stack_size;
+
+ memcpy(buf, (void *) sp, stack_size);
+ stack->data = (char *) buf;
+ stack->size = stack_size;
+ return 0;
+}
+
+int test__arch_unwind_sample(struct perf_sample *sample,
+ struct thread *thread)
+{
+ struct regs_dump *regs = &sample->user_regs;
+ u64 *buf;
+
+ buf = calloc(1, sizeof(u64) * PERF_REGS_MAX);
+ if (!buf) {
+ pr_debug("failed to allocate sample uregs data\n");
+ return -1;
+ }
+
+ perf_regs_load(buf);
+ regs->abi = PERF_SAMPLE_REGS_ABI;
+ regs->regs = buf;
+ regs->mask = PERF_REGS_MASK;
+
+ return sample_ustack(sample, thread, buf);
+}
diff --git a/tools/perf/arch/arm/tests/regs_load.S b/tools/perf/arch/arm/tests/regs_load.S
new file mode 100644
index 00000000000..e09e983946f
--- /dev/null
+++ b/tools/perf/arch/arm/tests/regs_load.S
@@ -0,0 +1,58 @@
+#include <linux/linkage.h>
+
+#define R0 0x00
+#define R1 0x08
+#define R2 0x10
+#define R3 0x18
+#define R4 0x20
+#define R5 0x28
+#define R6 0x30
+#define R7 0x38
+#define R8 0x40
+#define R9 0x48
+#define SL 0x50
+#define FP 0x58
+#define IP 0x60
+#define SP 0x68
+#define LR 0x70
+#define PC 0x78
+
+/*
+ * Implementation of void perf_regs_load(u64 *regs);
+ *
+ * This functions fills in the 'regs' buffer from the actual registers values,
+ * in the way the perf built-in unwinding test expects them:
+ * - the PC at the time at the call to this function. Since this function
+ * is called using a bl instruction, the PC value is taken from LR.
+ * The built-in unwinding test then unwinds the call stack from the dwarf
+ * information in unwind__get_entries.
+ *
+ * Notes:
+ * - the 8 bytes stride in the registers offsets comes from the fact
+ * that the registers are stored in an u64 array (u64 *regs),
+ * - the regs buffer needs to be zeroed before the call to this function,
+ * in this case using a calloc in dwarf-unwind.c.
+ */
+
+.text
+.type perf_regs_load,%function
+ENTRY(perf_regs_load)
+ str r0, [r0, #R0]
+ str r1, [r0, #R1]
+ str r2, [r0, #R2]
+ str r3, [r0, #R3]
+ str r4, [r0, #R4]
+ str r5, [r0, #R5]
+ str r6, [r0, #R6]
+ str r7, [r0, #R7]
+ str r8, [r0, #R8]
+ str r9, [r0, #R9]
+ str sl, [r0, #SL]
+ str fp, [r0, #FP]
+ str ip, [r0, #IP]
+ str sp, [r0, #SP]
+ str lr, [r0, #LR]
+ str lr, [r0, #PC] // store pc as lr in order to skip the call
+ // to this function
+ mov pc, lr
+ENDPROC(perf_regs_load)
diff --git a/tools/perf/arch/arm/util/dwarf-regs.c b/tools/perf/arch/arm/util/dwarf-regs.c
index e8d5c551c69..33ec5b339da 100644
--- a/tools/perf/arch/arm/util/dwarf-regs.c
+++ b/tools/perf/arch/arm/util/dwarf-regs.c
@@ -8,10 +8,7 @@
* published by the Free Software Foundation.
*/
-#include <stdlib.h>
-#ifndef __UCLIBC__
-#include <libio.h>
-#endif
+#include <stddef.h>
#include <dwarf-regs.h>
struct pt_regs_dwarfnum {
diff --git a/tools/perf/arch/arm/util/unwind-libdw.c b/tools/perf/arch/arm/util/unwind-libdw.c
new file mode 100644
index 00000000000..b4176c60117
--- /dev/null
+++ b/tools/perf/arch/arm/util/unwind-libdw.c
@@ -0,0 +1,36 @@
+#include <elfutils/libdwfl.h>
+#include "../../util/unwind-libdw.h"
+#include "../../util/perf_regs.h"
+
+bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg)
+{
+ struct unwind_info *ui = arg;
+ struct regs_dump *user_regs = &ui->sample->user_regs;
+ Dwarf_Word dwarf_regs[PERF_REG_ARM_MAX];
+
+#define REG(r) ({ \
+ Dwarf_Word val = 0; \
+ perf_reg_value(&val, user_regs, PERF_REG_ARM_##r); \
+ val; \
+})
+
+ dwarf_regs[0] = REG(R0);
+ dwarf_regs[1] = REG(R1);
+ dwarf_regs[2] = REG(R2);
+ dwarf_regs[3] = REG(R3);
+ dwarf_regs[4] = REG(R4);
+ dwarf_regs[5] = REG(R5);
+ dwarf_regs[6] = REG(R6);
+ dwarf_regs[7] = REG(R7);
+ dwarf_regs[8] = REG(R8);
+ dwarf_regs[9] = REG(R9);
+ dwarf_regs[10] = REG(R10);
+ dwarf_regs[11] = REG(FP);
+ dwarf_regs[12] = REG(IP);
+ dwarf_regs[13] = REG(SP);
+ dwarf_regs[14] = REG(LR);
+ dwarf_regs[15] = REG(PC);
+
+ return dwfl_thread_state_registers(thread, 0, PERF_REG_ARM_MAX,
+ dwarf_regs);
+}
diff --git a/tools/perf/arch/arm/util/unwind-libunwind.c b/tools/perf/arch/arm/util/unwind-libunwind.c
new file mode 100644
index 00000000000..729ed69a666
--- /dev/null
+++ b/tools/perf/arch/arm/util/unwind-libunwind.c
@@ -0,0 +1,48 @@
+
+#include <errno.h>
+#include <libunwind.h>
+#include "perf_regs.h"
+#include "../../util/unwind.h"
+
+int libunwind__arch_reg_id(int regnum)
+{
+ switch (regnum) {
+ case UNW_ARM_R0:
+ return PERF_REG_ARM_R0;
+ case UNW_ARM_R1:
+ return PERF_REG_ARM_R1;
+ case UNW_ARM_R2:
+ return PERF_REG_ARM_R2;
+ case UNW_ARM_R3:
+ return PERF_REG_ARM_R3;
+ case UNW_ARM_R4:
+ return PERF_REG_ARM_R4;
+ case UNW_ARM_R5:
+ return PERF_REG_ARM_R5;
+ case UNW_ARM_R6:
+ return PERF_REG_ARM_R6;
+ case UNW_ARM_R7:
+ return PERF_REG_ARM_R7;
+ case UNW_ARM_R8:
+ return PERF_REG_ARM_R8;
+ case UNW_ARM_R9:
+ return PERF_REG_ARM_R9;
+ case UNW_ARM_R10:
+ return PERF_REG_ARM_R10;
+ case UNW_ARM_R11:
+ return PERF_REG_ARM_FP;
+ case UNW_ARM_R12:
+ return PERF_REG_ARM_IP;
+ case UNW_ARM_R13:
+ return PERF_REG_ARM_SP;
+ case UNW_ARM_R14:
+ return PERF_REG_ARM_LR;
+ case UNW_ARM_R15:
+ return PERF_REG_ARM_PC;
+ default:
+ pr_err("unwind: invalid reg id %d\n", regnum);
+ return -EINVAL;
+ }
+
+ return -EINVAL;
+}
diff --git a/tools/perf/arch/arm64/Makefile b/tools/perf/arch/arm64/Makefile
new file mode 100644
index 00000000000..67e9b3d38e8
--- /dev/null
+++ b/tools/perf/arch/arm64/Makefile
@@ -0,0 +1,7 @@
+ifndef NO_DWARF
+PERF_HAVE_DWARF_REGS := 1
+LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o
+endif
+ifndef NO_LIBUNWIND
+LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libunwind.o
+endif
diff --git a/tools/perf/arch/arm64/include/perf_regs.h b/tools/perf/arch/arm64/include/perf_regs.h
new file mode 100644
index 00000000000..e9441b9e2a3
--- /dev/null
+++ b/tools/perf/arch/arm64/include/perf_regs.h
@@ -0,0 +1,88 @@
+#ifndef ARCH_PERF_REGS_H
+#define ARCH_PERF_REGS_H
+
+#include <stdlib.h>
+#include <linux/types.h>
+#include <asm/perf_regs.h>
+
+#define PERF_REGS_MASK ((1ULL << PERF_REG_ARM64_MAX) - 1)
+#define PERF_REG_IP PERF_REG_ARM64_PC
+#define PERF_REG_SP PERF_REG_ARM64_SP
+
+static inline const char *perf_reg_name(int id)
+{
+ switch (id) {
+ case PERF_REG_ARM64_X0:
+ return "x0";
+ case PERF_REG_ARM64_X1:
+ return "x1";
+ case PERF_REG_ARM64_X2:
+ return "x2";
+ case PERF_REG_ARM64_X3:
+ return "x3";
+ case PERF_REG_ARM64_X4:
+ return "x4";
+ case PERF_REG_ARM64_X5:
+ return "x5";
+ case PERF_REG_ARM64_X6:
+ return "x6";
+ case PERF_REG_ARM64_X7:
+ return "x7";
+ case PERF_REG_ARM64_X8:
+ return "x8";
+ case PERF_REG_ARM64_X9:
+ return "x9";
+ case PERF_REG_ARM64_X10:
+ return "x10";
+ case PERF_REG_ARM64_X11:
+ return "x11";
+ case PERF_REG_ARM64_X12:
+ return "x12";
+ case PERF_REG_ARM64_X13:
+ return "x13";
+ case PERF_REG_ARM64_X14:
+ return "x14";
+ case PERF_REG_ARM64_X15:
+ return "x15";
+ case PERF_REG_ARM64_X16:
+ return "x16";
+ case PERF_REG_ARM64_X17:
+ return "x17";
+ case PERF_REG_ARM64_X18:
+ return "x18";
+ case PERF_REG_ARM64_X19:
+ return "x19";
+ case PERF_REG_ARM64_X20:
+ return "x20";
+ case PERF_REG_ARM64_X21:
+ return "x21";
+ case PERF_REG_ARM64_X22:
+ return "x22";
+ case PERF_REG_ARM64_X23:
+ return "x23";
+ case PERF_REG_ARM64_X24:
+ return "x24";
+ case PERF_REG_ARM64_X25:
+ return "x25";
+ case PERF_REG_ARM64_X26:
+ return "x26";
+ case PERF_REG_ARM64_X27:
+ return "x27";
+ case PERF_REG_ARM64_X28:
+ return "x28";
+ case PERF_REG_ARM64_X29:
+ return "x29";
+ case PERF_REG_ARM64_SP:
+ return "sp";
+ case PERF_REG_ARM64_LR:
+ return "lr";
+ case PERF_REG_ARM64_PC:
+ return "pc";
+ default:
+ return NULL;
+ }
+
+ return NULL;
+}
+
+#endif /* ARCH_PERF_REGS_H */
diff --git a/tools/perf/arch/arm64/util/dwarf-regs.c b/tools/perf/arch/arm64/util/dwarf-regs.c
new file mode 100644
index 00000000000..d49efeb8172
--- /dev/null
+++ b/tools/perf/arch/arm64/util/dwarf-regs.c
@@ -0,0 +1,80 @@
+/*
+ * Mapping of DWARF debug register numbers into register names.
+ *
+ * Copyright (C) 2010 Will Deacon, ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <stddef.h>
+#include <dwarf-regs.h>
+
+struct pt_regs_dwarfnum {
+ const char *name;
+ unsigned int dwarfnum;
+};
+
+#define STR(s) #s
+#define REG_DWARFNUM_NAME(r, num) {.name = r, .dwarfnum = num}
+#define GPR_DWARFNUM_NAME(num) \
+ {.name = STR(%x##num), .dwarfnum = num}
+#define REG_DWARFNUM_END {.name = NULL, .dwarfnum = 0}
+
+/*
+ * Reference:
+ * http://infocenter.arm.com/help/topic/com.arm.doc.ihi0057b/IHI0057B_aadwarf64.pdf
+ */
+static const struct pt_regs_dwarfnum regdwarfnum_table[] = {
+ GPR_DWARFNUM_NAME(0),
+ GPR_DWARFNUM_NAME(1),
+ GPR_DWARFNUM_NAME(2),
+ GPR_DWARFNUM_NAME(3),
+ GPR_DWARFNUM_NAME(4),
+ GPR_DWARFNUM_NAME(5),
+ GPR_DWARFNUM_NAME(6),
+ GPR_DWARFNUM_NAME(7),
+ GPR_DWARFNUM_NAME(8),
+ GPR_DWARFNUM_NAME(9),
+ GPR_DWARFNUM_NAME(10),
+ GPR_DWARFNUM_NAME(11),
+ GPR_DWARFNUM_NAME(12),
+ GPR_DWARFNUM_NAME(13),
+ GPR_DWARFNUM_NAME(14),
+ GPR_DWARFNUM_NAME(15),
+ GPR_DWARFNUM_NAME(16),
+ GPR_DWARFNUM_NAME(17),
+ GPR_DWARFNUM_NAME(18),
+ GPR_DWARFNUM_NAME(19),
+ GPR_DWARFNUM_NAME(20),
+ GPR_DWARFNUM_NAME(21),
+ GPR_DWARFNUM_NAME(22),
+ GPR_DWARFNUM_NAME(23),
+ GPR_DWARFNUM_NAME(24),
+ GPR_DWARFNUM_NAME(25),
+ GPR_DWARFNUM_NAME(26),
+ GPR_DWARFNUM_NAME(27),
+ GPR_DWARFNUM_NAME(28),
+ GPR_DWARFNUM_NAME(29),
+ REG_DWARFNUM_NAME("%lr", 30),
+ REG_DWARFNUM_NAME("%sp", 31),
+ REG_DWARFNUM_END,
+};
+
+/**
+ * get_arch_regstr() - lookup register name from it's DWARF register number
+ * @n: the DWARF register number
+ *
+ * get_arch_regstr() returns the name of the register in struct
+ * regdwarfnum_table from it's DWARF register number. If the register is not
+ * found in the table, this returns NULL;
+ */
+const char *get_arch_regstr(unsigned int n)
+{
+ const struct pt_regs_dwarfnum *roff;
+ for (roff = regdwarfnum_table; roff->name != NULL; roff++)
+ if (roff->dwarfnum == n)
+ return roff->name;
+ return NULL;
+}
diff --git a/tools/perf/arch/arm64/util/unwind-libunwind.c b/tools/perf/arch/arm64/util/unwind-libunwind.c
new file mode 100644
index 00000000000..436ee43859d
--- /dev/null
+++ b/tools/perf/arch/arm64/util/unwind-libunwind.c
@@ -0,0 +1,82 @@
+
+#include <errno.h>
+#include <libunwind.h>
+#include "perf_regs.h"
+#include "../../util/unwind.h"
+
+int libunwind__arch_reg_id(int regnum)
+{
+ switch (regnum) {
+ case UNW_AARCH64_X0:
+ return PERF_REG_ARM64_X0;
+ case UNW_AARCH64_X1:
+ return PERF_REG_ARM64_X1;
+ case UNW_AARCH64_X2:
+ return PERF_REG_ARM64_X2;
+ case UNW_AARCH64_X3:
+ return PERF_REG_ARM64_X3;
+ case UNW_AARCH64_X4:
+ return PERF_REG_ARM64_X4;
+ case UNW_AARCH64_X5:
+ return PERF_REG_ARM64_X5;
+ case UNW_AARCH64_X6:
+ return PERF_REG_ARM64_X6;
+ case UNW_AARCH64_X7:
+ return PERF_REG_ARM64_X7;
+ case UNW_AARCH64_X8:
+ return PERF_REG_ARM64_X8;
+ case UNW_AARCH64_X9:
+ return PERF_REG_ARM64_X9;
+ case UNW_AARCH64_X10:
+ return PERF_REG_ARM64_X10;
+ case UNW_AARCH64_X11:
+ return PERF_REG_ARM64_X11;
+ case UNW_AARCH64_X12:
+ return PERF_REG_ARM64_X12;
+ case UNW_AARCH64_X13:
+ return PERF_REG_ARM64_X13;
+ case UNW_AARCH64_X14:
+ return PERF_REG_ARM64_X14;
+ case UNW_AARCH64_X15:
+ return PERF_REG_ARM64_X15;
+ case UNW_AARCH64_X16:
+ return PERF_REG_ARM64_X16;
+ case UNW_AARCH64_X17:
+ return PERF_REG_ARM64_X17;
+ case UNW_AARCH64_X18:
+ return PERF_REG_ARM64_X18;
+ case UNW_AARCH64_X19:
+ return PERF_REG_ARM64_X19;
+ case UNW_AARCH64_X20:
+ return PERF_REG_ARM64_X20;
+ case UNW_AARCH64_X21:
+ return PERF_REG_ARM64_X21;
+ case UNW_AARCH64_X22:
+ return PERF_REG_ARM64_X22;
+ case UNW_AARCH64_X23:
+ return PERF_REG_ARM64_X23;
+ case UNW_AARCH64_X24:
+ return PERF_REG_ARM64_X24;
+ case UNW_AARCH64_X25:
+ return PERF_REG_ARM64_X25;
+ case UNW_AARCH64_X26:
+ return PERF_REG_ARM64_X26;
+ case UNW_AARCH64_X27:
+ return PERF_REG_ARM64_X27;
+ case UNW_AARCH64_X28:
+ return PERF_REG_ARM64_X28;
+ case UNW_AARCH64_X29:
+ return PERF_REG_ARM64_X29;
+ case UNW_AARCH64_X30:
+ return PERF_REG_ARM64_LR;
+ case UNW_AARCH64_SP:
+ return PERF_REG_ARM64_SP;
+ case UNW_AARCH64_PC:
+ return PERF_REG_ARM64_PC;
+ default:
+ pr_err("unwind: invalid reg id %d\n", regnum);
+ return -EINVAL;
+ }
+
+ return -EINVAL;
+}
diff --git a/tools/perf/arch/common.c b/tools/perf/arch/common.c
new file mode 100644
index 00000000000..42faf369211
--- /dev/null
+++ b/tools/perf/arch/common.c
@@ -0,0 +1,211 @@
+#include <stdio.h>
+#include <sys/utsname.h>
+#include "common.h"
+#include "../util/debug.h"
+
+const char *const arm_triplets[] = {
+ "arm-eabi-",
+ "arm-linux-androideabi-",
+ "arm-unknown-linux-",
+ "arm-unknown-linux-gnu-",
+ "arm-unknown-linux-gnueabi-",
+ NULL
+};
+
+const char *const powerpc_triplets[] = {
+ "powerpc-unknown-linux-gnu-",
+ "powerpc64-unknown-linux-gnu-",
+ NULL
+};
+
+const char *const s390_triplets[] = {
+ "s390-ibm-linux-",
+ NULL
+};
+
+const char *const sh_triplets[] = {
+ "sh-unknown-linux-gnu-",
+ "sh64-unknown-linux-gnu-",
+ NULL
+};
+
+const char *const sparc_triplets[] = {
+ "sparc-unknown-linux-gnu-",
+ "sparc64-unknown-linux-gnu-",
+ NULL
+};
+
+const char *const x86_triplets[] = {
+ "x86_64-pc-linux-gnu-",
+ "x86_64-unknown-linux-gnu-",
+ "i686-pc-linux-gnu-",
+ "i586-pc-linux-gnu-",
+ "i486-pc-linux-gnu-",
+ "i386-pc-linux-gnu-",
+ "i686-linux-android-",
+ "i686-android-linux-",
+ NULL
+};
+
+const char *const mips_triplets[] = {
+ "mips-unknown-linux-gnu-",
+ "mipsel-linux-android-",
+ NULL
+};
+
+static bool lookup_path(char *name)
+{
+ bool found = false;
+ char *path, *tmp;
+ char buf[PATH_MAX];
+ char *env = getenv("PATH");
+
+ if (!env)
+ return false;
+
+ env = strdup(env);
+ if (!env)
+ return false;
+
+ path = strtok_r(env, ":", &tmp);
+ while (path) {
+ scnprintf(buf, sizeof(buf), "%s/%s", path, name);
+ if (access(buf, F_OK) == 0) {
+ found = true;
+ break;
+ }
+ path = strtok_r(NULL, ":", &tmp);
+ }
+ free(env);
+ return found;
+}
+
+static int lookup_triplets(const char *const *triplets, const char *name)
+{
+ int i;
+ char buf[PATH_MAX];
+
+ for (i = 0; triplets[i] != NULL; i++) {
+ scnprintf(buf, sizeof(buf), "%s%s", triplets[i], name);
+ if (lookup_path(buf))
+ return i;
+ }
+ return -1;
+}
+
+/*
+ * Return architecture name in a normalized form.
+ * The conversion logic comes from the Makefile.
+ */
+static const char *normalize_arch(char *arch)
+{
+ if (!strcmp(arch, "x86_64"))
+ return "x86";
+ if (arch[0] == 'i' && arch[2] == '8' && arch[3] == '6')
+ return "x86";
+ if (!strcmp(arch, "sun4u") || !strncmp(arch, "sparc", 5))
+ return "sparc";
+ if (!strncmp(arch, "arm", 3) || !strcmp(arch, "sa110"))
+ return "arm";
+ if (!strncmp(arch, "s390", 4))
+ return "s390";
+ if (!strncmp(arch, "parisc", 6))
+ return "parisc";
+ if (!strncmp(arch, "powerpc", 7) || !strncmp(arch, "ppc", 3))
+ return "powerpc";
+ if (!strncmp(arch, "mips", 4))
+ return "mips";
+ if (!strncmp(arch, "sh", 2) && isdigit(arch[2]))
+ return "sh";
+
+ return arch;
+}
+
+static int perf_session_env__lookup_binutils_path(struct perf_session_env *env,
+ const char *name,
+ const char **path)
+{
+ int idx;
+ const char *arch, *cross_env;
+ struct utsname uts;
+ const char *const *path_list;
+ char *buf = NULL;
+
+ arch = normalize_arch(env->arch);
+
+ if (uname(&uts) < 0)
+ goto out;
+
+ /*
+ * We don't need to try to find objdump path for native system.
+ * Just use default binutils path (e.g.: "objdump").
+ */
+ if (!strcmp(normalize_arch(uts.machine), arch))
+ goto out;
+
+ cross_env = getenv("CROSS_COMPILE");
+ if (cross_env) {
+ if (asprintf(&buf, "%s%s", cross_env, name) < 0)
+ goto out_error;
+ if (buf[0] == '/') {
+ if (access(buf, F_OK) == 0)
+ goto out;
+ goto out_error;
+ }
+ if (lookup_path(buf))
+ goto out;
+ zfree(&buf);
+ }
+
+ if (!strcmp(arch, "arm"))
+ path_list = arm_triplets;
+ else if (!strcmp(arch, "powerpc"))
+ path_list = powerpc_triplets;
+ else if (!strcmp(arch, "sh"))
+ path_list = sh_triplets;
+ else if (!strcmp(arch, "s390"))
+ path_list = s390_triplets;
+ else if (!strcmp(arch, "sparc"))
+ path_list = sparc_triplets;
+ else if (!strcmp(arch, "x86"))
+ path_list = x86_triplets;
+ else if (!strcmp(arch, "mips"))
+ path_list = mips_triplets;
+ else {
+ ui__error("binutils for %s not supported.\n", arch);
+ goto out_error;
+ }
+
+ idx = lookup_triplets(path_list, name);
+ if (idx < 0) {
+ ui__error("Please install %s for %s.\n"
+ "You can add it to PATH, set CROSS_COMPILE or "
+ "override the default using --%s.\n",
+ name, arch, name);
+ goto out_error;
+ }
+
+ if (asprintf(&buf, "%s%s", path_list[idx], name) < 0)
+ goto out_error;
+
+out:
+ *path = buf;
+ return 0;
+out_error:
+ free(buf);
+ *path = NULL;
+ return -1;
+}
+
+int perf_session_env__lookup_objdump(struct perf_session_env *env)
+{
+ /*
+ * For live mode, env->arch will be NULL and we can use
+ * the native objdump tool.
+ */
+ if (env->arch == NULL)
+ return 0;
+
+ return perf_session_env__lookup_binutils_path(env, "objdump",
+ &objdump_path);
+}
diff --git a/tools/perf/arch/common.h b/tools/perf/arch/common.h
new file mode 100644
index 00000000000..ede246eda9b
--- /dev/null
+++ b/tools/perf/arch/common.h
@@ -0,0 +1,10 @@
+#ifndef ARCH_PERF_COMMON_H
+#define ARCH_PERF_COMMON_H
+
+#include "../util/session.h"
+
+extern const char *objdump_path;
+
+int perf_session_env__lookup_objdump(struct perf_session_env *env);
+
+#endif /* ARCH_PERF_COMMON_H */
diff --git a/tools/perf/arch/powerpc/util/dwarf-regs.c b/tools/perf/arch/powerpc/util/dwarf-regs.c
index 7cdd61d0e27..733151cdf46 100644
--- a/tools/perf/arch/powerpc/util/dwarf-regs.c
+++ b/tools/perf/arch/powerpc/util/dwarf-regs.c
@@ -9,10 +9,7 @@
* 2 of the License, or (at your option) any later version.
*/
-#include <stdlib.h>
-#ifndef __UCLIBC__
-#include <libio.h>
-#endif
+#include <stddef.h>
#include <dwarf-regs.h>
diff --git a/tools/perf/arch/s390/util/dwarf-regs.c b/tools/perf/arch/s390/util/dwarf-regs.c
index e19653e025f..0469df02ee6 100644
--- a/tools/perf/arch/s390/util/dwarf-regs.c
+++ b/tools/perf/arch/s390/util/dwarf-regs.c
@@ -6,7 +6,7 @@
*
*/
-#include <libio.h>
+#include <stddef.h>
#include <dwarf-regs.h>
#define NUM_GPRS 16
diff --git a/tools/perf/arch/sh/util/dwarf-regs.c b/tools/perf/arch/sh/util/dwarf-regs.c
index a11edb007a6..0d0897f57a1 100644
--- a/tools/perf/arch/sh/util/dwarf-regs.c
+++ b/tools/perf/arch/sh/util/dwarf-regs.c
@@ -19,7 +19,7 @@
*
*/
-#include <libio.h>
+#include <stddef.h>
#include <dwarf-regs.h>
/*
diff --git a/tools/perf/arch/sparc/util/dwarf-regs.c b/tools/perf/arch/sparc/util/dwarf-regs.c
index 0ab88483720..92eda412fed 100644
--- a/tools/perf/arch/sparc/util/dwarf-regs.c
+++ b/tools/perf/arch/sparc/util/dwarf-regs.c
@@ -9,7 +9,7 @@
* 2 of the License, or (at your option) any later version.
*/
-#include <libio.h>
+#include <stddef.h>
#include <dwarf-regs.h>
#define SPARC_MAX_REGS 96
diff --git a/tools/perf/arch/x86/Makefile b/tools/perf/arch/x86/Makefile
index 815841c04eb..1641542e363 100644
--- a/tools/perf/arch/x86/Makefile
+++ b/tools/perf/arch/x86/Makefile
@@ -3,6 +3,15 @@ PERF_HAVE_DWARF_REGS := 1
LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o
endif
ifndef NO_LIBUNWIND
-LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind.o
+LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libunwind.o
+endif
+ifndef NO_LIBDW_DWARF_UNWIND
+LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libdw.o
+endif
+ifndef NO_DWARF_UNWIND
+LIB_OBJS += $(OUTPUT)arch/$(ARCH)/tests/regs_load.o
+LIB_OBJS += $(OUTPUT)arch/$(ARCH)/tests/dwarf-unwind.o
endif
LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/header.o
+LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/tsc.o
+LIB_H += arch/$(ARCH)/util/tsc.h
diff --git a/tools/perf/arch/x86/include/perf_regs.h b/tools/perf/arch/x86/include/perf_regs.h
index 46fc9f15c6b..7df517acfef 100644
--- a/tools/perf/arch/x86/include/perf_regs.h
+++ b/tools/perf/arch/x86/include/perf_regs.h
@@ -2,17 +2,23 @@
#define ARCH_PERF_REGS_H
#include <stdlib.h>
-#include "../../util/types.h"
-#include "../../../../../arch/x86/include/asm/perf_regs.h"
+#include <linux/types.h>
+#include <asm/perf_regs.h>
-#ifndef ARCH_X86_64
+void perf_regs_load(u64 *regs);
+
+#ifndef HAVE_ARCH_X86_64_SUPPORT
#define PERF_REGS_MASK ((1ULL << PERF_REG_X86_32_MAX) - 1)
+#define PERF_REGS_MAX PERF_REG_X86_32_MAX
+#define PERF_SAMPLE_REGS_ABI PERF_SAMPLE_REGS_ABI_32
#else
#define REG_NOSUPPORT ((1ULL << PERF_REG_X86_DS) | \
(1ULL << PERF_REG_X86_ES) | \
(1ULL << PERF_REG_X86_FS) | \
(1ULL << PERF_REG_X86_GS))
#define PERF_REGS_MASK (((1ULL << PERF_REG_X86_64_MAX) - 1) & ~REG_NOSUPPORT)
+#define PERF_REGS_MAX PERF_REG_X86_64_MAX
+#define PERF_SAMPLE_REGS_ABI PERF_SAMPLE_REGS_ABI_64
#endif
#define PERF_REG_IP PERF_REG_X86_IP
#define PERF_REG_SP PERF_REG_X86_SP
@@ -52,7 +58,7 @@ static inline const char *perf_reg_name(int id)
return "FS";
case PERF_REG_X86_GS:
return "GS";
-#ifdef ARCH_X86_64
+#ifdef HAVE_ARCH_X86_64_SUPPORT
case PERF_REG_X86_R8:
return "R8";
case PERF_REG_X86_R9:
@@ -69,7 +75,7 @@ static inline const char *perf_reg_name(int id)
return "R14";
case PERF_REG_X86_R15:
return "R15";
-#endif /* ARCH_X86_64 */
+#endif /* HAVE_ARCH_X86_64_SUPPORT */
default:
return NULL;
}
diff --git a/tools/perf/arch/x86/tests/dwarf-unwind.c b/tools/perf/arch/x86/tests/dwarf-unwind.c
new file mode 100644
index 00000000000..9f89f899ccc
--- /dev/null
+++ b/tools/perf/arch/x86/tests/dwarf-unwind.c
@@ -0,0 +1,60 @@
+#include <string.h>
+#include "perf_regs.h"
+#include "thread.h"
+#include "map.h"
+#include "event.h"
+#include "tests/tests.h"
+
+#define STACK_SIZE 8192
+
+static int sample_ustack(struct perf_sample *sample,
+ struct thread *thread, u64 *regs)
+{
+ struct stack_dump *stack = &sample->user_stack;
+ struct map *map;
+ unsigned long sp;
+ u64 stack_size, *buf;
+
+ buf = malloc(STACK_SIZE);
+ if (!buf) {
+ pr_debug("failed to allocate sample uregs data\n");
+ return -1;
+ }
+
+ sp = (unsigned long) regs[PERF_REG_X86_SP];
+
+ map = map_groups__find(thread->mg, MAP__VARIABLE, (u64) sp);
+ if (!map) {
+ pr_debug("failed to get stack map\n");
+ free(buf);
+ return -1;
+ }
+
+ stack_size = map->end - sp;
+ stack_size = stack_size > STACK_SIZE ? STACK_SIZE : stack_size;
+
+ memcpy(buf, (void *) sp, stack_size);
+ stack->data = (char *) buf;
+ stack->size = stack_size;
+ return 0;
+}
+
+int test__arch_unwind_sample(struct perf_sample *sample,
+ struct thread *thread)
+{
+ struct regs_dump *regs = &sample->user_regs;
+ u64 *buf;
+
+ buf = malloc(sizeof(u64) * PERF_REGS_MAX);
+ if (!buf) {
+ pr_debug("failed to allocate sample uregs data\n");
+ return -1;
+ }
+
+ perf_regs_load(buf);
+ regs->abi = PERF_SAMPLE_REGS_ABI;
+ regs->regs = buf;
+ regs->mask = PERF_REGS_MASK;
+
+ return sample_ustack(sample, thread, buf);
+}
diff --git a/tools/perf/arch/x86/tests/regs_load.S b/tools/perf/arch/x86/tests/regs_load.S
new file mode 100644
index 00000000000..60875d5c556
--- /dev/null
+++ b/tools/perf/arch/x86/tests/regs_load.S
@@ -0,0 +1,98 @@
+#include <linux/linkage.h>
+
+#define AX 0
+#define BX 1 * 8
+#define CX 2 * 8
+#define DX 3 * 8
+#define SI 4 * 8
+#define DI 5 * 8
+#define BP 6 * 8
+#define SP 7 * 8
+#define IP 8 * 8
+#define FLAGS 9 * 8
+#define CS 10 * 8
+#define SS 11 * 8
+#define DS 12 * 8
+#define ES 13 * 8
+#define FS 14 * 8
+#define GS 15 * 8
+#define R8 16 * 8
+#define R9 17 * 8
+#define R10 18 * 8
+#define R11 19 * 8
+#define R12 20 * 8
+#define R13 21 * 8
+#define R14 22 * 8
+#define R15 23 * 8
+
+.text
+#ifdef HAVE_ARCH_X86_64_SUPPORT
+ENTRY(perf_regs_load)
+ movq %rax, AX(%rdi)
+ movq %rbx, BX(%rdi)
+ movq %rcx, CX(%rdi)
+ movq %rdx, DX(%rdi)
+ movq %rsi, SI(%rdi)
+ movq %rdi, DI(%rdi)
+ movq %rbp, BP(%rdi)
+
+ leaq 8(%rsp), %rax /* exclude this call. */
+ movq %rax, SP(%rdi)
+
+ movq 0(%rsp), %rax
+ movq %rax, IP(%rdi)
+
+ movq $0, FLAGS(%rdi)
+ movq $0, CS(%rdi)
+ movq $0, SS(%rdi)
+ movq $0, DS(%rdi)
+ movq $0, ES(%rdi)
+ movq $0, FS(%rdi)
+ movq $0, GS(%rdi)
+
+ movq %r8, R8(%rdi)
+ movq %r9, R9(%rdi)
+ movq %r10, R10(%rdi)
+ movq %r11, R11(%rdi)
+ movq %r12, R12(%rdi)
+ movq %r13, R13(%rdi)
+ movq %r14, R14(%rdi)
+ movq %r15, R15(%rdi)
+ ret
+ENDPROC(perf_regs_load)
+#else
+ENTRY(perf_regs_load)
+ push %edi
+ movl 8(%esp), %edi
+ movl %eax, AX(%edi)
+ movl %ebx, BX(%edi)
+ movl %ecx, CX(%edi)
+ movl %edx, DX(%edi)
+ movl %esi, SI(%edi)
+ pop %eax
+ movl %eax, DI(%edi)
+ movl %ebp, BP(%edi)
+
+ leal 4(%esp), %eax /* exclude this call. */
+ movl %eax, SP(%edi)
+
+ movl 0(%esp), %eax
+ movl %eax, IP(%edi)
+
+ movl $0, FLAGS(%edi)
+ movl $0, CS(%edi)
+ movl $0, SS(%edi)
+ movl $0, DS(%edi)
+ movl $0, ES(%edi)
+ movl $0, FS(%edi)
+ movl $0, GS(%edi)
+ ret
+ENDPROC(perf_regs_load)
+#endif
+
+/*
+ * We need to provide note.GNU-stack section, saying that we want
+ * NOT executable stack. Otherwise the final linking will assume that
+ * the ELF stack should not be restricted at all and set it RWX.
+ */
+.section .note.GNU-stack,"",@progbits
diff --git a/tools/perf/arch/x86/util/dwarf-regs.c b/tools/perf/arch/x86/util/dwarf-regs.c
index a794d308192..be22dd46323 100644
--- a/tools/perf/arch/x86/util/dwarf-regs.c
+++ b/tools/perf/arch/x86/util/dwarf-regs.c
@@ -20,7 +20,7 @@
*
*/
-#include <libio.h>
+#include <stddef.h>
#include <dwarf-regs.h>
/*
diff --git a/tools/perf/arch/x86/util/tsc.c b/tools/perf/arch/x86/util/tsc.c
new file mode 100644
index 00000000000..40021fa3129
--- /dev/null
+++ b/tools/perf/arch/x86/util/tsc.c
@@ -0,0 +1,59 @@
+#include <stdbool.h>
+#include <errno.h>
+
+#include <linux/perf_event.h>
+
+#include "../../perf.h"
+#include <linux/types.h>
+#include "../../util/debug.h"
+#include "tsc.h"
+
+u64 perf_time_to_tsc(u64 ns, struct perf_tsc_conversion *tc)
+{
+ u64 t, quot, rem;
+
+ t = ns - tc->time_zero;
+ quot = t / tc->time_mult;
+ rem = t % tc->time_mult;
+ return (quot << tc->time_shift) +
+ (rem << tc->time_shift) / tc->time_mult;
+}
+
+u64 tsc_to_perf_time(u64 cyc, struct perf_tsc_conversion *tc)
+{
+ u64 quot, rem;
+
+ quot = cyc >> tc->time_shift;
+ rem = cyc & ((1 << tc->time_shift) - 1);
+ return tc->time_zero + quot * tc->time_mult +
+ ((rem * tc->time_mult) >> tc->time_shift);
+}
+
+int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc,
+ struct perf_tsc_conversion *tc)
+{
+ bool cap_user_time_zero;
+ u32 seq;
+ int i = 0;
+
+ while (1) {
+ seq = pc->lock;
+ rmb();
+ tc->time_mult = pc->time_mult;
+ tc->time_shift = pc->time_shift;
+ tc->time_zero = pc->time_zero;
+ cap_user_time_zero = pc->cap_user_time_zero;
+ rmb();
+ if (pc->lock == seq && !(seq & 1))
+ break;
+ if (++i > 10000) {
+ pr_debug("failed to get perf_event_mmap_page lock\n");
+ return -EINVAL;
+ }
+ }
+
+ if (!cap_user_time_zero)
+ return -EOPNOTSUPP;
+
+ return 0;
+}
diff --git a/tools/perf/arch/x86/util/tsc.h b/tools/perf/arch/x86/util/tsc.h
new file mode 100644
index 00000000000..2affe0366b5
--- /dev/null
+++ b/tools/perf/arch/x86/util/tsc.h
@@ -0,0 +1,20 @@
+#ifndef TOOLS_PERF_ARCH_X86_UTIL_TSC_H__
+#define TOOLS_PERF_ARCH_X86_UTIL_TSC_H__
+
+#include <linux/types.h>
+
+struct perf_tsc_conversion {
+ u16 time_shift;
+ u32 time_mult;
+ u64 time_zero;
+};
+
+struct perf_event_mmap_page;
+
+int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc,
+ struct perf_tsc_conversion *tc);
+
+u64 perf_time_to_tsc(u64 ns, struct perf_tsc_conversion *tc);
+u64 tsc_to_perf_time(u64 cyc, struct perf_tsc_conversion *tc);
+
+#endif /* TOOLS_PERF_ARCH_X86_UTIL_TSC_H__ */
diff --git a/tools/perf/arch/x86/util/unwind-libdw.c b/tools/perf/arch/x86/util/unwind-libdw.c
new file mode 100644
index 00000000000..c4b72176ca8
--- /dev/null
+++ b/tools/perf/arch/x86/util/unwind-libdw.c
@@ -0,0 +1,51 @@
+#include <elfutils/libdwfl.h>
+#include "../../util/unwind-libdw.h"
+#include "../../util/perf_regs.h"
+
+bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg)
+{
+ struct unwind_info *ui = arg;
+ struct regs_dump *user_regs = &ui->sample->user_regs;
+ Dwarf_Word dwarf_regs[17];
+ unsigned nregs;
+
+#define REG(r) ({ \
+ Dwarf_Word val = 0; \
+ perf_reg_value(&val, user_regs, PERF_REG_X86_##r); \
+ val; \
+})
+
+ if (user_regs->abi == PERF_SAMPLE_REGS_ABI_32) {
+ dwarf_regs[0] = REG(AX);
+ dwarf_regs[1] = REG(CX);
+ dwarf_regs[2] = REG(DX);
+ dwarf_regs[3] = REG(BX);
+ dwarf_regs[4] = REG(SP);
+ dwarf_regs[5] = REG(BP);
+ dwarf_regs[6] = REG(SI);
+ dwarf_regs[7] = REG(DI);
+ dwarf_regs[8] = REG(IP);
+ nregs = 9;
+ } else {
+ dwarf_regs[0] = REG(AX);
+ dwarf_regs[1] = REG(DX);
+ dwarf_regs[2] = REG(CX);
+ dwarf_regs[3] = REG(BX);
+ dwarf_regs[4] = REG(SI);
+ dwarf_regs[5] = REG(DI);
+ dwarf_regs[6] = REG(BP);
+ dwarf_regs[7] = REG(SP);
+ dwarf_regs[8] = REG(R8);
+ dwarf_regs[9] = REG(R9);
+ dwarf_regs[10] = REG(R10);
+ dwarf_regs[11] = REG(R11);
+ dwarf_regs[12] = REG(R12);
+ dwarf_regs[13] = REG(R13);
+ dwarf_regs[14] = REG(R14);
+ dwarf_regs[15] = REG(R15);
+ dwarf_regs[16] = REG(IP);
+ nregs = 17;
+ }
+
+ return dwfl_thread_state_registers(thread, 0, nregs, dwarf_regs);
+}
diff --git a/tools/perf/arch/x86/util/unwind.c b/tools/perf/arch/x86/util/unwind-libunwind.c
index 78d956eff96..3261f68c6a7 100644
--- a/tools/perf/arch/x86/util/unwind.c
+++ b/tools/perf/arch/x86/util/unwind-libunwind.c
@@ -4,8 +4,8 @@
#include "perf_regs.h"
#include "../../util/unwind.h"
-#ifdef ARCH_X86_64
-int unwind__arch_reg_id(int regnum)
+#ifdef HAVE_ARCH_X86_64_SUPPORT
+int libunwind__arch_reg_id(int regnum)
{
int id;
@@ -69,7 +69,7 @@ int unwind__arch_reg_id(int regnum)
return id;
}
#else
-int unwind__arch_reg_id(int regnum)
+int libunwind__arch_reg_id(int regnum)
{
int id;
@@ -108,4 +108,4 @@ int unwind__arch_reg_id(int regnum)
return id;
}
-#endif /* ARCH_X86_64 */
+#endif /* HAVE_ARCH_X86_64_SUPPORT */
diff --git a/tools/perf/bash_completion b/tools/perf/bash_completion
deleted file mode 100644
index 56e6a12aab5..00000000000
--- a/tools/perf/bash_completion
+++ /dev/null
@@ -1,62 +0,0 @@
-# perf completion
-
-function_exists()
-{
- declare -F $1 > /dev/null
- return $?
-}
-
-function_exists __ltrim_colon_completions ||
-__ltrim_colon_completions()
-{
- if [[ "$1" == *:* && "$COMP_WORDBREAKS" == *:* ]]; then
- # Remove colon-word prefix from COMPREPLY items
- local colon_word=${1%${1##*:}}
- local i=${#COMPREPLY[*]}
- while [[ $((--i)) -ge 0 ]]; do
- COMPREPLY[$i]=${COMPREPLY[$i]#"$colon_word"}
- done
- fi
-}
-
-have perf &&
-_perf()
-{
- local cur prev cmd
-
- COMPREPLY=()
- if function_exists _get_comp_words_by_ref; then
- _get_comp_words_by_ref -n : cur prev
- else
- cur=$(_get_cword :)
- prev=${COMP_WORDS[COMP_CWORD-1]}
- fi
-
- cmd=${COMP_WORDS[0]}
-
- # List perf subcommands or long options
- if [ $COMP_CWORD -eq 1 ]; then
- if [[ $cur == --* ]]; then
- COMPREPLY=( $( compgen -W '--help --version \
- --exec-path --html-path --paginate --no-pager \
- --perf-dir --work-tree --debugfs-dir' -- "$cur" ) )
- else
- cmds=$($cmd --list-cmds)
- COMPREPLY=( $( compgen -W '$cmds' -- "$cur" ) )
- fi
- # List possible events for -e option
- elif [[ $prev == "-e" && "${COMP_WORDS[1]}" == @(record|stat|top) ]]; then
- evts=$($cmd list --raw-dump)
- COMPREPLY=( $( compgen -W '$evts' -- "$cur" ) )
- __ltrim_colon_completions $cur
- # List long option names
- elif [[ $cur == --* ]]; then
- subcmd=${COMP_WORDS[1]}
- opts=$($cmd $subcmd --list-opts)
- COMPREPLY=( $( compgen -W '$opts' -- "$cur" ) )
- # Fall down to list regular files
- else
- _filedir
- fi
-} &&
-complete -F _perf perf
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
index 8f89998eeaf..eba46709b27 100644
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -1,11 +1,39 @@
#ifndef BENCH_H
#define BENCH_H
+/*
+ * The madvise transparent hugepage constants were added in glibc
+ * 2.13. For compatibility with older versions of glibc, define these
+ * tokens if they are not already defined.
+ *
+ * PA-RISC uses different madvise values from other architectures and
+ * needs to be special-cased.
+ */
+#ifdef __hppa__
+# ifndef MADV_HUGEPAGE
+# define MADV_HUGEPAGE 67
+# endif
+# ifndef MADV_NOHUGEPAGE
+# define MADV_NOHUGEPAGE 68
+# endif
+#else
+# ifndef MADV_HUGEPAGE
+# define MADV_HUGEPAGE 14
+# endif
+# ifndef MADV_NOHUGEPAGE
+# define MADV_NOHUGEPAGE 15
+# endif
+#endif
+
+extern int bench_numa(int argc, const char **argv, const char *prefix);
extern int bench_sched_messaging(int argc, const char **argv, const char *prefix);
extern int bench_sched_pipe(int argc, const char **argv, const char *prefix);
extern int bench_mem_memcpy(int argc, const char **argv,
const char *prefix __maybe_unused);
extern int bench_mem_memset(int argc, const char **argv, const char *prefix);
+extern int bench_futex_hash(int argc, const char **argv, const char *prefix);
+extern int bench_futex_wake(int argc, const char **argv, const char *prefix);
+extern int bench_futex_requeue(int argc, const char **argv, const char *prefix);
#define BENCH_FORMAT_DEFAULT_STR "default"
#define BENCH_FORMAT_DEFAULT 0
diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c
new file mode 100644
index 00000000000..a84206e9c4a
--- /dev/null
+++ b/tools/perf/bench/futex-hash.c
@@ -0,0 +1,212 @@
+/*
+ * Copyright (C) 2013 Davidlohr Bueso <davidlohr@hp.com>
+ *
+ * futex-hash: Stress the hell out of the Linux kernel futex uaddr hashing.
+ *
+ * This program is particularly useful for measuring the kernel's futex hash
+ * table/function implementation. In order for it to make sense, use with as
+ * many threads and futexes as possible.
+ */
+
+#include "../perf.h"
+#include "../util/util.h"
+#include "../util/stat.h"
+#include "../util/parse-options.h"
+#include "../util/header.h"
+#include "bench.h"
+#include "futex.h"
+
+#include <err.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <pthread.h>
+
+static unsigned int nthreads = 0;
+static unsigned int nsecs = 10;
+/* amount of futexes per thread */
+static unsigned int nfutexes = 1024;
+static bool fshared = false, done = false, silent = false;
+
+struct timeval start, end, runtime;
+static pthread_mutex_t thread_lock;
+static unsigned int threads_starting;
+static struct stats throughput_stats;
+static pthread_cond_t thread_parent, thread_worker;
+
+struct worker {
+ int tid;
+ u_int32_t *futex;
+ pthread_t thread;
+ unsigned long ops;
+};
+
+static const struct option options[] = {
+ OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"),
+ OPT_UINTEGER('r', "runtime", &nsecs, "Specify runtime (in seconds)"),
+ OPT_UINTEGER('f', "futexes", &nfutexes, "Specify amount of futexes per threads"),
+ OPT_BOOLEAN( 's', "silent", &silent, "Silent mode: do not display data/details"),
+ OPT_BOOLEAN( 'S', "shared", &fshared, "Use shared futexes instead of private ones"),
+ OPT_END()
+};
+
+static const char * const bench_futex_hash_usage[] = {
+ "perf bench futex hash <options>",
+ NULL
+};
+
+static void *workerfn(void *arg)
+{
+ int ret;
+ unsigned int i;
+ struct worker *w = (struct worker *) arg;
+
+ pthread_mutex_lock(&thread_lock);
+ threads_starting--;
+ if (!threads_starting)
+ pthread_cond_signal(&thread_parent);
+ pthread_cond_wait(&thread_worker, &thread_lock);
+ pthread_mutex_unlock(&thread_lock);
+
+ do {
+ for (i = 0; i < nfutexes; i++, w->ops++) {
+ /*
+ * We want the futex calls to fail in order to stress
+ * the hashing of uaddr and not measure other steps,
+ * such as internal waitqueue handling, thus enlarging
+ * the critical region protected by hb->lock.
+ */
+ ret = futex_wait(&w->futex[i], 1234, NULL,
+ fshared ? 0 : FUTEX_PRIVATE_FLAG);
+ if (!silent &&
+ (!ret || errno != EAGAIN || errno != EWOULDBLOCK))
+ warn("Non-expected futex return call");
+ }
+ } while (!done);
+
+ return NULL;
+}
+
+static void toggle_done(int sig __maybe_unused,
+ siginfo_t *info __maybe_unused,
+ void *uc __maybe_unused)
+{
+ /* inform all threads that we're done for the day */
+ done = true;
+ gettimeofday(&end, NULL);
+ timersub(&end, &start, &runtime);
+}
+
+static void print_summary(void)
+{
+ unsigned long avg = avg_stats(&throughput_stats);
+ double stddev = stddev_stats(&throughput_stats);
+
+ printf("%sAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n",
+ !silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg),
+ (int) runtime.tv_sec);
+}
+
+int bench_futex_hash(int argc, const char **argv,
+ const char *prefix __maybe_unused)
+{
+ int ret = 0;
+ cpu_set_t cpu;
+ struct sigaction act;
+ unsigned int i, ncpus;
+ pthread_attr_t thread_attr;
+ struct worker *worker = NULL;
+
+ argc = parse_options(argc, argv, options, bench_futex_hash_usage, 0);
+ if (argc) {
+ usage_with_options(bench_futex_hash_usage, options);
+ exit(EXIT_FAILURE);
+ }
+
+ ncpus = sysconf(_SC_NPROCESSORS_ONLN);
+
+ sigfillset(&act.sa_mask);
+ act.sa_sigaction = toggle_done;
+ sigaction(SIGINT, &act, NULL);
+
+ if (!nthreads) /* default to the number of CPUs */
+ nthreads = ncpus;
+
+ worker = calloc(nthreads, sizeof(*worker));
+ if (!worker)
+ goto errmem;
+
+ printf("Run summary [PID %d]: %d threads, each operating on %d [%s] futexes for %d secs.\n\n",
+ getpid(), nthreads, nfutexes, fshared ? "shared":"private", nsecs);
+
+ init_stats(&throughput_stats);
+ pthread_mutex_init(&thread_lock, NULL);
+ pthread_cond_init(&thread_parent, NULL);
+ pthread_cond_init(&thread_worker, NULL);
+
+ threads_starting = nthreads;
+ pthread_attr_init(&thread_attr);
+ gettimeofday(&start, NULL);
+ for (i = 0; i < nthreads; i++) {
+ worker[i].tid = i;
+ worker[i].futex = calloc(nfutexes, sizeof(*worker[i].futex));
+ if (!worker[i].futex)
+ goto errmem;
+
+ CPU_ZERO(&cpu);
+ CPU_SET(i % ncpus, &cpu);
+
+ ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu);
+ if (ret)
+ err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
+
+ ret = pthread_create(&worker[i].thread, &thread_attr, workerfn,
+ (void *)(struct worker *) &worker[i]);
+ if (ret)
+ err(EXIT_FAILURE, "pthread_create");
+
+ }
+ pthread_attr_destroy(&thread_attr);
+
+ pthread_mutex_lock(&thread_lock);
+ while (threads_starting)
+ pthread_cond_wait(&thread_parent, &thread_lock);
+ pthread_cond_broadcast(&thread_worker);
+ pthread_mutex_unlock(&thread_lock);
+
+ sleep(nsecs);
+ toggle_done(0, NULL, NULL);
+
+ for (i = 0; i < nthreads; i++) {
+ ret = pthread_join(worker[i].thread, NULL);
+ if (ret)
+ err(EXIT_FAILURE, "pthread_join");
+ }
+
+ /* cleanup & report results */
+ pthread_cond_destroy(&thread_parent);
+ pthread_cond_destroy(&thread_worker);
+ pthread_mutex_destroy(&thread_lock);
+
+ for (i = 0; i < nthreads; i++) {
+ unsigned long t = worker[i].ops/runtime.tv_sec;
+ update_stats(&throughput_stats, t);
+ if (!silent) {
+ if (nfutexes == 1)
+ printf("[thread %2d] futex: %p [ %ld ops/sec ]\n",
+ worker[i].tid, &worker[i].futex[0], t);
+ else
+ printf("[thread %2d] futexes: %p ... %p [ %ld ops/sec ]\n",
+ worker[i].tid, &worker[i].futex[0],
+ &worker[i].futex[nfutexes-1], t);
+ }
+
+ free(worker[i].futex);
+ }
+
+ print_summary();
+
+ free(worker);
+ return ret;
+errmem:
+ err(EXIT_FAILURE, "calloc");
+}
diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c
new file mode 100644
index 00000000000..a16255876f1
--- /dev/null
+++ b/tools/perf/bench/futex-requeue.c
@@ -0,0 +1,211 @@
+/*
+ * Copyright (C) 2013 Davidlohr Bueso <davidlohr@hp.com>
+ *
+ * futex-requeue: Block a bunch of threads on futex1 and requeue them
+ * on futex2, N at a time.
+ *
+ * This program is particularly useful to measure the latency of nthread
+ * requeues without waking up any tasks -- thus mimicking a regular futex_wait.
+ */
+
+#include "../perf.h"
+#include "../util/util.h"
+#include "../util/stat.h"
+#include "../util/parse-options.h"
+#include "../util/header.h"
+#include "bench.h"
+#include "futex.h"
+
+#include <err.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <pthread.h>
+
+static u_int32_t futex1 = 0, futex2 = 0;
+
+/*
+ * How many tasks to requeue at a time.
+ * Default to 1 in order to make the kernel work more.
+ */
+static unsigned int nrequeue = 1;
+
+/*
+ * There can be significant variance from run to run,
+ * the more repeats, the more exact the overall avg and
+ * the better idea of the futex latency.
+ */
+static unsigned int repeat = 10;
+
+static pthread_t *worker;
+static bool done = 0, silent = 0;
+static pthread_mutex_t thread_lock;
+static pthread_cond_t thread_parent, thread_worker;
+static struct stats requeuetime_stats, requeued_stats;
+static unsigned int ncpus, threads_starting, nthreads = 0;
+
+static const struct option options[] = {
+ OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"),
+ OPT_UINTEGER('q', "nrequeue", &nrequeue, "Specify amount of threads to requeue at once"),
+ OPT_UINTEGER('r', "repeat", &repeat, "Specify amount of times to repeat the run"),
+ OPT_BOOLEAN( 's', "silent", &silent, "Silent mode: do not display data/details"),
+ OPT_END()
+};
+
+static const char * const bench_futex_requeue_usage[] = {
+ "perf bench futex requeue <options>",
+ NULL
+};
+
+static void print_summary(void)
+{
+ double requeuetime_avg = avg_stats(&requeuetime_stats);
+ double requeuetime_stddev = stddev_stats(&requeuetime_stats);
+ unsigned int requeued_avg = avg_stats(&requeued_stats);
+
+ printf("Requeued %d of %d threads in %.4f ms (+-%.2f%%)\n",
+ requeued_avg,
+ nthreads,
+ requeuetime_avg/1e3,
+ rel_stddev_stats(requeuetime_stddev, requeuetime_avg));
+}
+
+static void *workerfn(void *arg __maybe_unused)
+{
+ pthread_mutex_lock(&thread_lock);
+ threads_starting--;
+ if (!threads_starting)
+ pthread_cond_signal(&thread_parent);
+ pthread_cond_wait(&thread_worker, &thread_lock);
+ pthread_mutex_unlock(&thread_lock);
+
+ futex_wait(&futex1, 0, NULL, FUTEX_PRIVATE_FLAG);
+ return NULL;
+}
+
+static void block_threads(pthread_t *w,
+ pthread_attr_t thread_attr)
+{
+ cpu_set_t cpu;
+ unsigned int i;
+
+ threads_starting = nthreads;
+
+ /* create and block all threads */
+ for (i = 0; i < nthreads; i++) {
+ CPU_ZERO(&cpu);
+ CPU_SET(i % ncpus, &cpu);
+
+ if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu))
+ err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
+
+ if (pthread_create(&w[i], &thread_attr, workerfn, NULL))
+ err(EXIT_FAILURE, "pthread_create");
+ }
+}
+
+static void toggle_done(int sig __maybe_unused,
+ siginfo_t *info __maybe_unused,
+ void *uc __maybe_unused)
+{
+ done = true;
+}
+
+int bench_futex_requeue(int argc, const char **argv,
+ const char *prefix __maybe_unused)
+{
+ int ret = 0;
+ unsigned int i, j;
+ struct sigaction act;
+ pthread_attr_t thread_attr;
+
+ argc = parse_options(argc, argv, options, bench_futex_requeue_usage, 0);
+ if (argc)
+ goto err;
+
+ ncpus = sysconf(_SC_NPROCESSORS_ONLN);
+
+ sigfillset(&act.sa_mask);
+ act.sa_sigaction = toggle_done;
+ sigaction(SIGINT, &act, NULL);
+
+ if (!nthreads)
+ nthreads = ncpus;
+
+ worker = calloc(nthreads, sizeof(*worker));
+ if (!worker)
+ err(EXIT_FAILURE, "calloc");
+
+ printf("Run summary [PID %d]: Requeuing %d threads (from %p to %p), "
+ "%d at a time.\n\n",
+ getpid(), nthreads, &futex1, &futex2, nrequeue);
+
+ init_stats(&requeued_stats);
+ init_stats(&requeuetime_stats);
+ pthread_attr_init(&thread_attr);
+ pthread_mutex_init(&thread_lock, NULL);
+ pthread_cond_init(&thread_parent, NULL);
+ pthread_cond_init(&thread_worker, NULL);
+
+ for (j = 0; j < repeat && !done; j++) {
+ unsigned int nrequeued = 0;
+ struct timeval start, end, runtime;
+
+ /* create, launch & block all threads */
+ block_threads(worker, thread_attr);
+
+ /* make sure all threads are already blocked */
+ pthread_mutex_lock(&thread_lock);
+ while (threads_starting)
+ pthread_cond_wait(&thread_parent, &thread_lock);
+ pthread_cond_broadcast(&thread_worker);
+ pthread_mutex_unlock(&thread_lock);
+
+ usleep(100000);
+
+ /* Ok, all threads are patiently blocked, start requeueing */
+ gettimeofday(&start, NULL);
+ for (nrequeued = 0; nrequeued < nthreads; nrequeued += nrequeue)
+ /*
+ * Do not wakeup any tasks blocked on futex1, allowing
+ * us to really measure futex_wait functionality.
+ */
+ futex_cmp_requeue(&futex1, 0, &futex2, 0, nrequeue,
+ FUTEX_PRIVATE_FLAG);
+ gettimeofday(&end, NULL);
+ timersub(&end, &start, &runtime);
+
+ update_stats(&requeued_stats, nrequeued);
+ update_stats(&requeuetime_stats, runtime.tv_usec);
+
+ if (!silent) {
+ printf("[Run %d]: Requeued %d of %d threads in %.4f ms\n",
+ j + 1, nrequeued, nthreads, runtime.tv_usec/1e3);
+ }
+
+ /* everybody should be blocked on futex2, wake'em up */
+ nrequeued = futex_wake(&futex2, nthreads, FUTEX_PRIVATE_FLAG);
+ if (nthreads != nrequeued)
+ warnx("couldn't wakeup all tasks (%d/%d)", nrequeued, nthreads);
+
+ for (i = 0; i < nthreads; i++) {
+ ret = pthread_join(worker[i], NULL);
+ if (ret)
+ err(EXIT_FAILURE, "pthread_join");
+ }
+
+ }
+
+ /* cleanup & report results */
+ pthread_cond_destroy(&thread_parent);
+ pthread_cond_destroy(&thread_worker);
+ pthread_mutex_destroy(&thread_lock);
+ pthread_attr_destroy(&thread_attr);
+
+ print_summary();
+
+ free(worker);
+ return ret;
+err:
+ usage_with_options(bench_futex_requeue_usage, options);
+ exit(EXIT_FAILURE);
+}
diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c
new file mode 100644
index 00000000000..d096169b161
--- /dev/null
+++ b/tools/perf/bench/futex-wake.c
@@ -0,0 +1,201 @@
+/*
+ * Copyright (C) 2013 Davidlohr Bueso <davidlohr@hp.com>
+ *
+ * futex-wake: Block a bunch of threads on a futex and wake'em up, N at a time.
+ *
+ * This program is particularly useful to measure the latency of nthread wakeups
+ * in non-error situations: all waiters are queued and all wake calls wakeup
+ * one or more tasks, and thus the waitqueue is never empty.
+ */
+
+#include "../perf.h"
+#include "../util/util.h"
+#include "../util/stat.h"
+#include "../util/parse-options.h"
+#include "../util/header.h"
+#include "bench.h"
+#include "futex.h"
+
+#include <err.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <pthread.h>
+
+/* all threads will block on the same futex */
+static u_int32_t futex1 = 0;
+
+/*
+ * How many wakeups to do at a time.
+ * Default to 1 in order to make the kernel work more.
+ */
+static unsigned int nwakes = 1;
+
+/*
+ * There can be significant variance from run to run,
+ * the more repeats, the more exact the overall avg and
+ * the better idea of the futex latency.
+ */
+static unsigned int repeat = 10;
+
+pthread_t *worker;
+static bool done = 0, silent = 0;
+static pthread_mutex_t thread_lock;
+static pthread_cond_t thread_parent, thread_worker;
+static struct stats waketime_stats, wakeup_stats;
+static unsigned int ncpus, threads_starting, nthreads = 0;
+
+static const struct option options[] = {
+ OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"),
+ OPT_UINTEGER('w', "nwakes", &nwakes, "Specify amount of threads to wake at once"),
+ OPT_UINTEGER('r', "repeat", &repeat, "Specify amount of times to repeat the run"),
+ OPT_BOOLEAN( 's', "silent", &silent, "Silent mode: do not display data/details"),
+ OPT_END()
+};
+
+static const char * const bench_futex_wake_usage[] = {
+ "perf bench futex wake <options>",
+ NULL
+};
+
+static void *workerfn(void *arg __maybe_unused)
+{
+ pthread_mutex_lock(&thread_lock);
+ threads_starting--;
+ if (!threads_starting)
+ pthread_cond_signal(&thread_parent);
+ pthread_cond_wait(&thread_worker, &thread_lock);
+ pthread_mutex_unlock(&thread_lock);
+
+ futex_wait(&futex1, 0, NULL, FUTEX_PRIVATE_FLAG);
+ return NULL;
+}
+
+static void print_summary(void)
+{
+ double waketime_avg = avg_stats(&waketime_stats);
+ double waketime_stddev = stddev_stats(&waketime_stats);
+ unsigned int wakeup_avg = avg_stats(&wakeup_stats);
+
+ printf("Wokeup %d of %d threads in %.4f ms (+-%.2f%%)\n",
+ wakeup_avg,
+ nthreads,
+ waketime_avg/1e3,
+ rel_stddev_stats(waketime_stddev, waketime_avg));
+}
+
+static void block_threads(pthread_t *w,
+ pthread_attr_t thread_attr)
+{
+ cpu_set_t cpu;
+ unsigned int i;
+
+ threads_starting = nthreads;
+
+ /* create and block all threads */
+ for (i = 0; i < nthreads; i++) {
+ CPU_ZERO(&cpu);
+ CPU_SET(i % ncpus, &cpu);
+
+ if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu))
+ err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
+
+ if (pthread_create(&w[i], &thread_attr, workerfn, NULL))
+ err(EXIT_FAILURE, "pthread_create");
+ }
+}
+
+static void toggle_done(int sig __maybe_unused,
+ siginfo_t *info __maybe_unused,
+ void *uc __maybe_unused)
+{
+ done = true;
+}
+
+int bench_futex_wake(int argc, const char **argv,
+ const char *prefix __maybe_unused)
+{
+ int ret = 0;
+ unsigned int i, j;
+ struct sigaction act;
+ pthread_attr_t thread_attr;
+
+ argc = parse_options(argc, argv, options, bench_futex_wake_usage, 0);
+ if (argc) {
+ usage_with_options(bench_futex_wake_usage, options);
+ exit(EXIT_FAILURE);
+ }
+
+ ncpus = sysconf(_SC_NPROCESSORS_ONLN);
+
+ sigfillset(&act.sa_mask);
+ act.sa_sigaction = toggle_done;
+ sigaction(SIGINT, &act, NULL);
+
+ if (!nthreads)
+ nthreads = ncpus;
+
+ worker = calloc(nthreads, sizeof(*worker));
+ if (!worker)
+ err(EXIT_FAILURE, "calloc");
+
+ printf("Run summary [PID %d]: blocking on %d threads (at futex %p), "
+ "waking up %d at a time.\n\n",
+ getpid(), nthreads, &futex1, nwakes);
+
+ init_stats(&wakeup_stats);
+ init_stats(&waketime_stats);
+ pthread_attr_init(&thread_attr);
+ pthread_mutex_init(&thread_lock, NULL);
+ pthread_cond_init(&thread_parent, NULL);
+ pthread_cond_init(&thread_worker, NULL);
+
+ for (j = 0; j < repeat && !done; j++) {
+ unsigned int nwoken = 0;
+ struct timeval start, end, runtime;
+
+ /* create, launch & block all threads */
+ block_threads(worker, thread_attr);
+
+ /* make sure all threads are already blocked */
+ pthread_mutex_lock(&thread_lock);
+ while (threads_starting)
+ pthread_cond_wait(&thread_parent, &thread_lock);
+ pthread_cond_broadcast(&thread_worker);
+ pthread_mutex_unlock(&thread_lock);
+
+ usleep(100000);
+
+ /* Ok, all threads are patiently blocked, start waking folks up */
+ gettimeofday(&start, NULL);
+ while (nwoken != nthreads)
+ nwoken += futex_wake(&futex1, nwakes, FUTEX_PRIVATE_FLAG);
+ gettimeofday(&end, NULL);
+ timersub(&end, &start, &runtime);
+
+ update_stats(&wakeup_stats, nwoken);
+ update_stats(&waketime_stats, runtime.tv_usec);
+
+ if (!silent) {
+ printf("[Run %d]: Wokeup %d of %d threads in %.4f ms\n",
+ j + 1, nwoken, nthreads, runtime.tv_usec/1e3);
+ }
+
+ for (i = 0; i < nthreads; i++) {
+ ret = pthread_join(worker[i], NULL);
+ if (ret)
+ err(EXIT_FAILURE, "pthread_join");
+ }
+
+ }
+
+ /* cleanup & report results */
+ pthread_cond_destroy(&thread_parent);
+ pthread_cond_destroy(&thread_worker);
+ pthread_mutex_destroy(&thread_lock);
+ pthread_attr_destroy(&thread_attr);
+
+ print_summary();
+
+ free(worker);
+ return ret;
+}
diff --git a/tools/perf/bench/futex.h b/tools/perf/bench/futex.h
new file mode 100644
index 00000000000..71f2844cf97
--- /dev/null
+++ b/tools/perf/bench/futex.h
@@ -0,0 +1,71 @@
+/*
+ * Glibc independent futex library for testing kernel functionality.
+ * Shamelessly stolen from Darren Hart <dvhltc@us.ibm.com>
+ * http://git.kernel.org/cgit/linux/kernel/git/dvhart/futextest.git/
+ */
+
+#ifndef _FUTEX_H
+#define _FUTEX_H
+
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <linux/futex.h>
+
+/**
+ * futex() - SYS_futex syscall wrapper
+ * @uaddr: address of first futex
+ * @op: futex op code
+ * @val: typically expected value of uaddr, but varies by op
+ * @timeout: typically an absolute struct timespec (except where noted
+ * otherwise). Overloaded by some ops
+ * @uaddr2: address of second futex for some ops\
+ * @val3: varies by op
+ * @opflags: flags to be bitwise OR'd with op, such as FUTEX_PRIVATE_FLAG
+ *
+ * futex() is used by all the following futex op wrappers. It can also be
+ * used for misuse and abuse testing. Generally, the specific op wrappers
+ * should be used instead. It is a macro instead of an static inline function as
+ * some of the types over overloaded (timeout is used for nr_requeue for
+ * example).
+ *
+ * These argument descriptions are the defaults for all
+ * like-named arguments in the following wrappers except where noted below.
+ */
+#define futex(uaddr, op, val, timeout, uaddr2, val3, opflags) \
+ syscall(SYS_futex, uaddr, op | opflags, val, timeout, uaddr2, val3)
+
+/**
+ * futex_wait() - block on uaddr with optional timeout
+ * @timeout: relative timeout
+ */
+static inline int
+futex_wait(u_int32_t *uaddr, u_int32_t val, struct timespec *timeout, int opflags)
+{
+ return futex(uaddr, FUTEX_WAIT, val, timeout, NULL, 0, opflags);
+}
+
+/**
+ * futex_wake() - wake one or more tasks blocked on uaddr
+ * @nr_wake: wake up to this many tasks
+ */
+static inline int
+futex_wake(u_int32_t *uaddr, int nr_wake, int opflags)
+{
+ return futex(uaddr, FUTEX_WAKE, nr_wake, NULL, NULL, 0, opflags);
+}
+
+/**
+* futex_cmp_requeue() - requeue tasks from uaddr to uaddr2
+* @nr_wake: wake up to this many tasks
+* @nr_requeue: requeue up to this many tasks
+*/
+static inline int
+futex_cmp_requeue(u_int32_t *uaddr, u_int32_t val, u_int32_t *uaddr2, int nr_wake,
+ int nr_requeue, int opflags)
+{
+ return futex(uaddr, FUTEX_CMP_REQUEUE, nr_wake, nr_requeue, uaddr2,
+ val, opflags);
+}
+
+#endif /* _FUTEX_H */
diff --git a/tools/perf/bench/mem-memcpy-arch.h b/tools/perf/bench/mem-memcpy-arch.h
index a72e36cb539..57b4ed87145 100644
--- a/tools/perf/bench/mem-memcpy-arch.h
+++ b/tools/perf/bench/mem-memcpy-arch.h
@@ -1,5 +1,5 @@
-#ifdef ARCH_X86_64
+#ifdef HAVE_ARCH_X86_64_SUPPORT
#define MEMCPY_FN(fn, name, desc) \
extern void *fn(void *, const void *, size_t);
diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c
index 93c83e3cb4a..5ce71d3b72c 100644
--- a/tools/perf/bench/mem-memcpy.c
+++ b/tools/perf/bench/mem-memcpy.c
@@ -58,7 +58,7 @@ struct routine routines[] = {
{ "default",
"Default memcpy() provided by glibc",
memcpy },
-#ifdef ARCH_X86_64
+#ifdef HAVE_ARCH_X86_64_SUPPORT
#define MEMCPY_FN(fn, name, desc) { name, desc, fn },
#include "mem-memcpy-x86-64-asm-def.h"
@@ -111,12 +111,14 @@ static double timeval2double(struct timeval *ts)
static void alloc_mem(void **dst, void **src, size_t length)
{
*dst = zalloc(length);
- if (!dst)
+ if (!*dst)
die("memory allocation failed - maybe length is too large?\n");
*src = zalloc(length);
- if (!src)
+ if (!*src)
die("memory allocation failed - maybe length is too large?\n");
+ /* Make sure to always replace the zero pages even if MMAP_THRESH is crossed */
+ memset(*src, 0, length);
}
static u64 do_memcpy_cycle(memcpy_t fn, size_t len, bool prefault)
diff --git a/tools/perf/bench/mem-memset-arch.h b/tools/perf/bench/mem-memset-arch.h
index a040fa77665..633800cb0dc 100644
--- a/tools/perf/bench/mem-memset-arch.h
+++ b/tools/perf/bench/mem-memset-arch.h
@@ -1,5 +1,5 @@
-#ifdef ARCH_X86_64
+#ifdef HAVE_ARCH_X86_64_SUPPORT
#define MEMSET_FN(fn, name, desc) \
extern void *fn(void *, int, size_t);
diff --git a/tools/perf/bench/mem-memset.c b/tools/perf/bench/mem-memset.c
index c6e4bc52349..9af79d2b18e 100644
--- a/tools/perf/bench/mem-memset.c
+++ b/tools/perf/bench/mem-memset.c
@@ -58,7 +58,7 @@ static const struct routine routines[] = {
{ "default",
"Default memset() provided by glibc",
memset },
-#ifdef ARCH_X86_64
+#ifdef HAVE_ARCH_X86_64_SUPPORT
#define MEMSET_FN(fn, name, desc) { name, desc, fn },
#include "mem-memset-x86-64-asm-def.h"
@@ -111,7 +111,7 @@ static double timeval2double(struct timeval *ts)
static void alloc_mem(void **dst, size_t length)
{
*dst = zalloc(length);
- if (!dst)
+ if (!*dst)
die("memory allocation failed - maybe length is too large?\n");
}
diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c
new file mode 100644
index 00000000000..ebfa163b80b
--- /dev/null
+++ b/tools/perf/bench/numa.c
@@ -0,0 +1,1744 @@
+/*
+ * numa.c
+ *
+ * numa: Simulate NUMA-sensitive workload and measure their NUMA performance
+ */
+
+#include "../perf.h"
+#include "../builtin.h"
+#include "../util/util.h"
+#include "../util/parse-options.h"
+
+#include "bench.h"
+
+#include <errno.h>
+#include <sched.h>
+#include <stdio.h>
+#include <assert.h>
+#include <malloc.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <sys/mman.h>
+#include <sys/time.h>
+#include <sys/wait.h>
+#include <sys/prctl.h>
+#include <sys/types.h>
+
+#include <numa.h>
+#include <numaif.h>
+
+/*
+ * Regular printout to the terminal, supressed if -q is specified:
+ */
+#define tprintf(x...) do { if (g && g->p.show_details >= 0) printf(x); } while (0)
+
+/*
+ * Debug printf:
+ */
+#define dprintf(x...) do { if (g && g->p.show_details >= 1) printf(x); } while (0)
+
+struct thread_data {
+ int curr_cpu;
+ cpu_set_t bind_cpumask;
+ int bind_node;
+ u8 *process_data;
+ int process_nr;
+ int thread_nr;
+ int task_nr;
+ unsigned int loops_done;
+ u64 val;
+ u64 runtime_ns;
+ pthread_mutex_t *process_lock;
+};
+
+/* Parameters set by options: */
+
+struct params {
+ /* Startup synchronization: */
+ bool serialize_startup;
+
+ /* Task hierarchy: */
+ int nr_proc;
+ int nr_threads;
+
+ /* Working set sizes: */
+ const char *mb_global_str;
+ const char *mb_proc_str;
+ const char *mb_proc_locked_str;
+ const char *mb_thread_str;
+
+ double mb_global;
+ double mb_proc;
+ double mb_proc_locked;
+ double mb_thread;
+
+ /* Access patterns to the working set: */
+ bool data_reads;
+ bool data_writes;
+ bool data_backwards;
+ bool data_zero_memset;
+ bool data_rand_walk;
+ u32 nr_loops;
+ u32 nr_secs;
+ u32 sleep_usecs;
+
+ /* Working set initialization: */
+ bool init_zero;
+ bool init_random;
+ bool init_cpu0;
+
+ /* Misc options: */
+ int show_details;
+ int run_all;
+ int thp;
+
+ long bytes_global;
+ long bytes_process;
+ long bytes_process_locked;
+ long bytes_thread;
+
+ int nr_tasks;
+ bool show_quiet;
+
+ bool show_convergence;
+ bool measure_convergence;
+
+ int perturb_secs;
+ int nr_cpus;
+ int nr_nodes;
+
+ /* Affinity options -C and -N: */
+ char *cpu_list_str;
+ char *node_list_str;
+};
+
+
+/* Global, read-writable area, accessible to all processes and threads: */
+
+struct global_info {
+ u8 *data;
+
+ pthread_mutex_t startup_mutex;
+ int nr_tasks_started;
+
+ pthread_mutex_t startup_done_mutex;
+
+ pthread_mutex_t start_work_mutex;
+ int nr_tasks_working;
+
+ pthread_mutex_t stop_work_mutex;
+ u64 bytes_done;
+
+ struct thread_data *threads;
+
+ /* Convergence latency measurement: */
+ bool all_converged;
+ bool stop_work;
+
+ int print_once;
+
+ struct params p;
+};
+
+static struct global_info *g = NULL;
+
+static int parse_cpus_opt(const struct option *opt, const char *arg, int unset);
+static int parse_nodes_opt(const struct option *opt, const char *arg, int unset);
+
+struct params p0;
+
+static const struct option options[] = {
+ OPT_INTEGER('p', "nr_proc" , &p0.nr_proc, "number of processes"),
+ OPT_INTEGER('t', "nr_threads" , &p0.nr_threads, "number of threads per process"),
+
+ OPT_STRING('G', "mb_global" , &p0.mb_global_str, "MB", "global memory (MBs)"),
+ OPT_STRING('P', "mb_proc" , &p0.mb_proc_str, "MB", "process memory (MBs)"),
+ OPT_STRING('L', "mb_proc_locked", &p0.mb_proc_locked_str,"MB", "process serialized/locked memory access (MBs), <= process_memory"),
+ OPT_STRING('T', "mb_thread" , &p0.mb_thread_str, "MB", "thread memory (MBs)"),
+
+ OPT_UINTEGER('l', "nr_loops" , &p0.nr_loops, "max number of loops to run"),
+ OPT_UINTEGER('s', "nr_secs" , &p0.nr_secs, "max number of seconds to run"),
+ OPT_UINTEGER('u', "usleep" , &p0.sleep_usecs, "usecs to sleep per loop iteration"),
+
+ OPT_BOOLEAN('R', "data_reads" , &p0.data_reads, "access the data via writes (can be mixed with -W)"),
+ OPT_BOOLEAN('W', "data_writes" , &p0.data_writes, "access the data via writes (can be mixed with -R)"),
+ OPT_BOOLEAN('B', "data_backwards", &p0.data_backwards, "access the data backwards as well"),
+ OPT_BOOLEAN('Z', "data_zero_memset", &p0.data_zero_memset,"access the data via glibc bzero only"),
+ OPT_BOOLEAN('r', "data_rand_walk", &p0.data_rand_walk, "access the data with random (32bit LFSR) walk"),
+
+
+ OPT_BOOLEAN('z', "init_zero" , &p0.init_zero, "bzero the initial allocations"),
+ OPT_BOOLEAN('I', "init_random" , &p0.init_random, "randomize the contents of the initial allocations"),
+ OPT_BOOLEAN('0', "init_cpu0" , &p0.init_cpu0, "do the initial allocations on CPU#0"),
+ OPT_INTEGER('x', "perturb_secs", &p0.perturb_secs, "perturb thread 0/0 every X secs, to test convergence stability"),
+
+ OPT_INCR ('d', "show_details" , &p0.show_details, "Show details"),
+ OPT_INCR ('a', "all" , &p0.run_all, "Run all tests in the suite"),
+ OPT_INTEGER('H', "thp" , &p0.thp, "MADV_NOHUGEPAGE < 0 < MADV_HUGEPAGE"),
+ OPT_BOOLEAN('c', "show_convergence", &p0.show_convergence, "show convergence details"),
+ OPT_BOOLEAN('m', "measure_convergence", &p0.measure_convergence, "measure convergence latency"),
+ OPT_BOOLEAN('q', "quiet" , &p0.show_quiet, "bzero the initial allocations"),
+ OPT_BOOLEAN('S', "serialize-startup", &p0.serialize_startup,"serialize thread startup"),
+
+ /* Special option string parsing callbacks: */
+ OPT_CALLBACK('C', "cpus", NULL, "cpu[,cpu2,...cpuN]",
+ "bind the first N tasks to these specific cpus (the rest is unbound)",
+ parse_cpus_opt),
+ OPT_CALLBACK('M', "memnodes", NULL, "node[,node2,...nodeN]",
+ "bind the first N tasks to these specific memory nodes (the rest is unbound)",
+ parse_nodes_opt),
+ OPT_END()
+};
+
+static const char * const bench_numa_usage[] = {
+ "perf bench numa <options>",
+ NULL
+};
+
+static const char * const numa_usage[] = {
+ "perf bench numa mem [<options>]",
+ NULL
+};
+
+static cpu_set_t bind_to_cpu(int target_cpu)
+{
+ cpu_set_t orig_mask, mask;
+ int ret;
+
+ ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask);
+ BUG_ON(ret);
+
+ CPU_ZERO(&mask);
+
+ if (target_cpu == -1) {
+ int cpu;
+
+ for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
+ CPU_SET(cpu, &mask);
+ } else {
+ BUG_ON(target_cpu < 0 || target_cpu >= g->p.nr_cpus);
+ CPU_SET(target_cpu, &mask);
+ }
+
+ ret = sched_setaffinity(0, sizeof(mask), &mask);
+ BUG_ON(ret);
+
+ return orig_mask;
+}
+
+static cpu_set_t bind_to_node(int target_node)
+{
+ int cpus_per_node = g->p.nr_cpus/g->p.nr_nodes;
+ cpu_set_t orig_mask, mask;
+ int cpu;
+ int ret;
+
+ BUG_ON(cpus_per_node*g->p.nr_nodes != g->p.nr_cpus);
+ BUG_ON(!cpus_per_node);
+
+ ret = sched_getaffinity(0, sizeof(orig_mask), &orig_mask);
+ BUG_ON(ret);
+
+ CPU_ZERO(&mask);
+
+ if (target_node == -1) {
+ for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
+ CPU_SET(cpu, &mask);
+ } else {
+ int cpu_start = (target_node + 0) * cpus_per_node;
+ int cpu_stop = (target_node + 1) * cpus_per_node;
+
+ BUG_ON(cpu_stop > g->p.nr_cpus);
+
+ for (cpu = cpu_start; cpu < cpu_stop; cpu++)
+ CPU_SET(cpu, &mask);
+ }
+
+ ret = sched_setaffinity(0, sizeof(mask), &mask);
+ BUG_ON(ret);
+
+ return orig_mask;
+}
+
+static void bind_to_cpumask(cpu_set_t mask)
+{
+ int ret;
+
+ ret = sched_setaffinity(0, sizeof(mask), &mask);
+ BUG_ON(ret);
+}
+
+static void mempol_restore(void)
+{
+ int ret;
+
+ ret = set_mempolicy(MPOL_DEFAULT, NULL, g->p.nr_nodes-1);
+
+ BUG_ON(ret);
+}
+
+static void bind_to_memnode(int node)
+{
+ unsigned long nodemask;
+ int ret;
+
+ if (node == -1)
+ return;
+
+ BUG_ON(g->p.nr_nodes > (int)sizeof(nodemask));
+ nodemask = 1L << node;
+
+ ret = set_mempolicy(MPOL_BIND, &nodemask, sizeof(nodemask)*8);
+ dprintf("binding to node %d, mask: %016lx => %d\n", node, nodemask, ret);
+
+ BUG_ON(ret);
+}
+
+#define HPSIZE (2*1024*1024)
+
+#define set_taskname(fmt...) \
+do { \
+ char name[20]; \
+ \
+ snprintf(name, 20, fmt); \
+ prctl(PR_SET_NAME, name); \
+} while (0)
+
+static u8 *alloc_data(ssize_t bytes0, int map_flags,
+ int init_zero, int init_cpu0, int thp, int init_random)
+{
+ cpu_set_t orig_mask;
+ ssize_t bytes;
+ u8 *buf;
+ int ret;
+
+ if (!bytes0)
+ return NULL;
+
+ /* Allocate and initialize all memory on CPU#0: */
+ if (init_cpu0) {
+ orig_mask = bind_to_node(0);
+ bind_to_memnode(0);
+ }
+
+ bytes = bytes0 + HPSIZE;
+
+ buf = (void *)mmap(0, bytes, PROT_READ|PROT_WRITE, MAP_ANON|map_flags, -1, 0);
+ BUG_ON(buf == (void *)-1);
+
+ if (map_flags == MAP_PRIVATE) {
+ if (thp > 0) {
+ ret = madvise(buf, bytes, MADV_HUGEPAGE);
+ if (ret && !g->print_once) {
+ g->print_once = 1;
+ printf("WARNING: Could not enable THP - do: 'echo madvise > /sys/kernel/mm/transparent_hugepage/enabled'\n");
+ }
+ }
+ if (thp < 0) {
+ ret = madvise(buf, bytes, MADV_NOHUGEPAGE);
+ if (ret && !g->print_once) {
+ g->print_once = 1;
+ printf("WARNING: Could not disable THP: run a CONFIG_TRANSPARENT_HUGEPAGE kernel?\n");
+ }
+ }
+ }
+
+ if (init_zero) {
+ bzero(buf, bytes);
+ } else {
+ /* Initialize random contents, different in each word: */
+ if (init_random) {
+ u64 *wbuf = (void *)buf;
+ long off = rand();
+ long i;
+
+ for (i = 0; i < bytes/8; i++)
+ wbuf[i] = i + off;
+ }
+ }
+
+ /* Align to 2MB boundary: */
+ buf = (void *)(((unsigned long)buf + HPSIZE-1) & ~(HPSIZE-1));
+
+ /* Restore affinity: */
+ if (init_cpu0) {
+ bind_to_cpumask(orig_mask);
+ mempol_restore();
+ }
+
+ return buf;
+}
+
+static void free_data(void *data, ssize_t bytes)
+{
+ int ret;
+
+ if (!data)
+ return;
+
+ ret = munmap(data, bytes);
+ BUG_ON(ret);
+}
+
+/*
+ * Create a shared memory buffer that can be shared between processes, zeroed:
+ */
+static void * zalloc_shared_data(ssize_t bytes)
+{
+ return alloc_data(bytes, MAP_SHARED, 1, g->p.init_cpu0, g->p.thp, g->p.init_random);
+}
+
+/*
+ * Create a shared memory buffer that can be shared between processes:
+ */
+static void * setup_shared_data(ssize_t bytes)
+{
+ return alloc_data(bytes, MAP_SHARED, 0, g->p.init_cpu0, g->p.thp, g->p.init_random);
+}
+
+/*
+ * Allocate process-local memory - this will either be shared between
+ * threads of this process, or only be accessed by this thread:
+ */
+static void * setup_private_data(ssize_t bytes)
+{
+ return alloc_data(bytes, MAP_PRIVATE, 0, g->p.init_cpu0, g->p.thp, g->p.init_random);
+}
+
+/*
+ * Return a process-shared (global) mutex:
+ */
+static void init_global_mutex(pthread_mutex_t *mutex)
+{
+ pthread_mutexattr_t attr;
+
+ pthread_mutexattr_init(&attr);
+ pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED);
+ pthread_mutex_init(mutex, &attr);
+}
+
+static int parse_cpu_list(const char *arg)
+{
+ p0.cpu_list_str = strdup(arg);
+
+ dprintf("got CPU list: {%s}\n", p0.cpu_list_str);
+
+ return 0;
+}
+
+static int parse_setup_cpu_list(void)
+{
+ struct thread_data *td;
+ char *str0, *str;
+ int t;
+
+ if (!g->p.cpu_list_str)
+ return 0;
+
+ dprintf("g->p.nr_tasks: %d\n", g->p.nr_tasks);
+
+ str0 = str = strdup(g->p.cpu_list_str);
+ t = 0;
+
+ BUG_ON(!str);
+
+ tprintf("# binding tasks to CPUs:\n");
+ tprintf("# ");
+
+ while (true) {
+ int bind_cpu, bind_cpu_0, bind_cpu_1;
+ char *tok, *tok_end, *tok_step, *tok_len, *tok_mul;
+ int bind_len;
+ int step;
+ int mul;
+
+ tok = strsep(&str, ",");
+ if (!tok)
+ break;
+
+ tok_end = strstr(tok, "-");
+
+ dprintf("\ntoken: {%s}, end: {%s}\n", tok, tok_end);
+ if (!tok_end) {
+ /* Single CPU specified: */
+ bind_cpu_0 = bind_cpu_1 = atol(tok);
+ } else {
+ /* CPU range specified (for example: "5-11"): */
+ bind_cpu_0 = atol(tok);
+ bind_cpu_1 = atol(tok_end + 1);
+ }
+
+ step = 1;
+ tok_step = strstr(tok, "#");
+ if (tok_step) {
+ step = atol(tok_step + 1);
+ BUG_ON(step <= 0 || step >= g->p.nr_cpus);
+ }
+
+ /*
+ * Mask length.
+ * Eg: "--cpus 8_4-16#4" means: '--cpus 8_4,12_4,16_4',
+ * where the _4 means the next 4 CPUs are allowed.
+ */
+ bind_len = 1;
+ tok_len = strstr(tok, "_");
+ if (tok_len) {
+ bind_len = atol(tok_len + 1);
+ BUG_ON(bind_len <= 0 || bind_len > g->p.nr_cpus);
+ }
+
+ /* Multiplicator shortcut, "0x8" is a shortcut for: "0,0,0,0,0,0,0,0" */
+ mul = 1;
+ tok_mul = strstr(tok, "x");
+ if (tok_mul) {
+ mul = atol(tok_mul + 1);
+ BUG_ON(mul <= 0);
+ }
+
+ dprintf("CPUs: %d_%d-%d#%dx%d\n", bind_cpu_0, bind_len, bind_cpu_1, step, mul);
+
+ if (bind_cpu_0 >= g->p.nr_cpus || bind_cpu_1 >= g->p.nr_cpus) {
+ printf("\nTest not applicable, system has only %d CPUs.\n", g->p.nr_cpus);
+ return -1;
+ }
+
+ BUG_ON(bind_cpu_0 < 0 || bind_cpu_1 < 0);
+ BUG_ON(bind_cpu_0 > bind_cpu_1);
+
+ for (bind_cpu = bind_cpu_0; bind_cpu <= bind_cpu_1; bind_cpu += step) {
+ int i;
+
+ for (i = 0; i < mul; i++) {
+ int cpu;
+
+ if (t >= g->p.nr_tasks) {
+ printf("\n# NOTE: ignoring bind CPUs starting at CPU#%d\n #", bind_cpu);
+ goto out;
+ }
+ td = g->threads + t;
+
+ if (t)
+ tprintf(",");
+ if (bind_len > 1) {
+ tprintf("%2d/%d", bind_cpu, bind_len);
+ } else {
+ tprintf("%2d", bind_cpu);
+ }
+
+ CPU_ZERO(&td->bind_cpumask);
+ for (cpu = bind_cpu; cpu < bind_cpu+bind_len; cpu++) {
+ BUG_ON(cpu < 0 || cpu >= g->p.nr_cpus);
+ CPU_SET(cpu, &td->bind_cpumask);
+ }
+ t++;
+ }
+ }
+ }
+out:
+
+ tprintf("\n");
+
+ if (t < g->p.nr_tasks)
+ printf("# NOTE: %d tasks bound, %d tasks unbound\n", t, g->p.nr_tasks - t);
+
+ free(str0);
+ return 0;
+}
+
+static int parse_cpus_opt(const struct option *opt __maybe_unused,
+ const char *arg, int unset __maybe_unused)
+{
+ if (!arg)
+ return -1;
+
+ return parse_cpu_list(arg);
+}
+
+static int parse_node_list(const char *arg)
+{
+ p0.node_list_str = strdup(arg);
+
+ dprintf("got NODE list: {%s}\n", p0.node_list_str);
+
+ return 0;
+}
+
+static int parse_setup_node_list(void)
+{
+ struct thread_data *td;
+ char *str0, *str;
+ int t;
+
+ if (!g->p.node_list_str)
+ return 0;
+
+ dprintf("g->p.nr_tasks: %d\n", g->p.nr_tasks);
+
+ str0 = str = strdup(g->p.node_list_str);
+ t = 0;
+
+ BUG_ON(!str);
+
+ tprintf("# binding tasks to NODEs:\n");
+ tprintf("# ");
+
+ while (true) {
+ int bind_node, bind_node_0, bind_node_1;
+ char *tok, *tok_end, *tok_step, *tok_mul;
+ int step;
+ int mul;
+
+ tok = strsep(&str, ",");
+ if (!tok)
+ break;
+
+ tok_end = strstr(tok, "-");
+
+ dprintf("\ntoken: {%s}, end: {%s}\n", tok, tok_end);
+ if (!tok_end) {
+ /* Single NODE specified: */
+ bind_node_0 = bind_node_1 = atol(tok);
+ } else {
+ /* NODE range specified (for example: "5-11"): */
+ bind_node_0 = atol(tok);
+ bind_node_1 = atol(tok_end + 1);
+ }
+
+ step = 1;
+ tok_step = strstr(tok, "#");
+ if (tok_step) {
+ step = atol(tok_step + 1);
+ BUG_ON(step <= 0 || step >= g->p.nr_nodes);
+ }
+
+ /* Multiplicator shortcut, "0x8" is a shortcut for: "0,0,0,0,0,0,0,0" */
+ mul = 1;
+ tok_mul = strstr(tok, "x");
+ if (tok_mul) {
+ mul = atol(tok_mul + 1);
+ BUG_ON(mul <= 0);
+ }
+
+ dprintf("NODEs: %d-%d #%d\n", bind_node_0, bind_node_1, step);
+
+ if (bind_node_0 >= g->p.nr_nodes || bind_node_1 >= g->p.nr_nodes) {
+ printf("\nTest not applicable, system has only %d nodes.\n", g->p.nr_nodes);
+ return -1;
+ }
+
+ BUG_ON(bind_node_0 < 0 || bind_node_1 < 0);
+ BUG_ON(bind_node_0 > bind_node_1);
+
+ for (bind_node = bind_node_0; bind_node <= bind_node_1; bind_node += step) {
+ int i;
+
+ for (i = 0; i < mul; i++) {
+ if (t >= g->p.nr_tasks) {
+ printf("\n# NOTE: ignoring bind NODEs starting at NODE#%d\n", bind_node);
+ goto out;
+ }
+ td = g->threads + t;
+
+ if (!t)
+ tprintf(" %2d", bind_node);
+ else
+ tprintf(",%2d", bind_node);
+
+ td->bind_node = bind_node;
+ t++;
+ }
+ }
+ }
+out:
+
+ tprintf("\n");
+
+ if (t < g->p.nr_tasks)
+ printf("# NOTE: %d tasks mem-bound, %d tasks unbound\n", t, g->p.nr_tasks - t);
+
+ free(str0);
+ return 0;
+}
+
+static int parse_nodes_opt(const struct option *opt __maybe_unused,
+ const char *arg, int unset __maybe_unused)
+{
+ if (!arg)
+ return -1;
+
+ return parse_node_list(arg);
+
+ return 0;
+}
+
+#define BIT(x) (1ul << x)
+
+static inline uint32_t lfsr_32(uint32_t lfsr)
+{
+ const uint32_t taps = BIT(1) | BIT(5) | BIT(6) | BIT(31);
+ return (lfsr>>1) ^ ((0x0u - (lfsr & 0x1u)) & taps);
+}
+
+/*
+ * Make sure there's real data dependency to RAM (when read
+ * accesses are enabled), so the compiler, the CPU and the
+ * kernel (KSM, zero page, etc.) cannot optimize away RAM
+ * accesses:
+ */
+static inline u64 access_data(u64 *data __attribute__((unused)), u64 val)
+{
+ if (g->p.data_reads)
+ val += *data;
+ if (g->p.data_writes)
+ *data = val + 1;
+ return val;
+}
+
+/*
+ * The worker process does two types of work, a forwards going
+ * loop and a backwards going loop.
+ *
+ * We do this so that on multiprocessor systems we do not create
+ * a 'train' of processing, with highly synchronized processes,
+ * skewing the whole benchmark.
+ */
+static u64 do_work(u8 *__data, long bytes, int nr, int nr_max, int loop, u64 val)
+{
+ long words = bytes/sizeof(u64);
+ u64 *data = (void *)__data;
+ long chunk_0, chunk_1;
+ u64 *d0, *d, *d1;
+ long off;
+ long i;
+
+ BUG_ON(!data && words);
+ BUG_ON(data && !words);
+
+ if (!data)
+ return val;
+
+ /* Very simple memset() work variant: */
+ if (g->p.data_zero_memset && !g->p.data_rand_walk) {
+ bzero(data, bytes);
+ return val;
+ }
+
+ /* Spread out by PID/TID nr and by loop nr: */
+ chunk_0 = words/nr_max;
+ chunk_1 = words/g->p.nr_loops;
+ off = nr*chunk_0 + loop*chunk_1;
+
+ while (off >= words)
+ off -= words;
+
+ if (g->p.data_rand_walk) {
+ u32 lfsr = nr + loop + val;
+ int j;
+
+ for (i = 0; i < words/1024; i++) {
+ long start, end;
+
+ lfsr = lfsr_32(lfsr);
+
+ start = lfsr % words;
+ end = min(start + 1024, words-1);
+
+ if (g->p.data_zero_memset) {
+ bzero(data + start, (end-start) * sizeof(u64));
+ } else {
+ for (j = start; j < end; j++)
+ val = access_data(data + j, val);
+ }
+ }
+ } else if (!g->p.data_backwards || (nr + loop) & 1) {
+
+ d0 = data + off;
+ d = data + off + 1;
+ d1 = data + words;
+
+ /* Process data forwards: */
+ for (;;) {
+ if (unlikely(d >= d1))
+ d = data;
+ if (unlikely(d == d0))
+ break;
+
+ val = access_data(d, val);
+
+ d++;
+ }
+ } else {
+ /* Process data backwards: */
+
+ d0 = data + off;
+ d = data + off - 1;
+ d1 = data + words;
+
+ /* Process data forwards: */
+ for (;;) {
+ if (unlikely(d < data))
+ d = data + words-1;
+ if (unlikely(d == d0))
+ break;
+
+ val = access_data(d, val);
+
+ d--;
+ }
+ }
+
+ return val;
+}
+
+static void update_curr_cpu(int task_nr, unsigned long bytes_worked)
+{
+ unsigned int cpu;
+
+ cpu = sched_getcpu();
+
+ g->threads[task_nr].curr_cpu = cpu;
+ prctl(0, bytes_worked);
+}
+
+#define MAX_NR_NODES 64
+
+/*
+ * Count the number of nodes a process's threads
+ * are spread out on.
+ *
+ * A count of 1 means that the process is compressed
+ * to a single node. A count of g->p.nr_nodes means it's
+ * spread out on the whole system.
+ */
+static int count_process_nodes(int process_nr)
+{
+ char node_present[MAX_NR_NODES] = { 0, };
+ int nodes;
+ int n, t;
+
+ for (t = 0; t < g->p.nr_threads; t++) {
+ struct thread_data *td;
+ int task_nr;
+ int node;
+
+ task_nr = process_nr*g->p.nr_threads + t;
+ td = g->threads + task_nr;
+
+ node = numa_node_of_cpu(td->curr_cpu);
+ node_present[node] = 1;
+ }
+
+ nodes = 0;
+
+ for (n = 0; n < MAX_NR_NODES; n++)
+ nodes += node_present[n];
+
+ return nodes;
+}
+
+/*
+ * Count the number of distinct process-threads a node contains.
+ *
+ * A count of 1 means that the node contains only a single
+ * process. If all nodes on the system contain at most one
+ * process then we are well-converged.
+ */
+static int count_node_processes(int node)
+{
+ int processes = 0;
+ int t, p;
+
+ for (p = 0; p < g->p.nr_proc; p++) {
+ for (t = 0; t < g->p.nr_threads; t++) {
+ struct thread_data *td;
+ int task_nr;
+ int n;
+
+ task_nr = p*g->p.nr_threads + t;
+ td = g->threads + task_nr;
+
+ n = numa_node_of_cpu(td->curr_cpu);
+ if (n == node) {
+ processes++;
+ break;
+ }
+ }
+ }
+
+ return processes;
+}
+
+static void calc_convergence_compression(int *strong)
+{
+ unsigned int nodes_min, nodes_max;
+ int p;
+
+ nodes_min = -1;
+ nodes_max = 0;
+
+ for (p = 0; p < g->p.nr_proc; p++) {
+ unsigned int nodes = count_process_nodes(p);
+
+ nodes_min = min(nodes, nodes_min);
+ nodes_max = max(nodes, nodes_max);
+ }
+
+ /* Strong convergence: all threads compress on a single node: */
+ if (nodes_min == 1 && nodes_max == 1) {
+ *strong = 1;
+ } else {
+ *strong = 0;
+ tprintf(" {%d-%d}", nodes_min, nodes_max);
+ }
+}
+
+static void calc_convergence(double runtime_ns_max, double *convergence)
+{
+ unsigned int loops_done_min, loops_done_max;
+ int process_groups;
+ int nodes[MAX_NR_NODES];
+ int distance;
+ int nr_min;
+ int nr_max;
+ int strong;
+ int sum;
+ int nr;
+ int node;
+ int cpu;
+ int t;
+
+ if (!g->p.show_convergence && !g->p.measure_convergence)
+ return;
+
+ for (node = 0; node < g->p.nr_nodes; node++)
+ nodes[node] = 0;
+
+ loops_done_min = -1;
+ loops_done_max = 0;
+
+ for (t = 0; t < g->p.nr_tasks; t++) {
+ struct thread_data *td = g->threads + t;
+ unsigned int loops_done;
+
+ cpu = td->curr_cpu;
+
+ /* Not all threads have written it yet: */
+ if (cpu < 0)
+ continue;
+
+ node = numa_node_of_cpu(cpu);
+
+ nodes[node]++;
+
+ loops_done = td->loops_done;
+ loops_done_min = min(loops_done, loops_done_min);
+ loops_done_max = max(loops_done, loops_done_max);
+ }
+
+ nr_max = 0;
+ nr_min = g->p.nr_tasks;
+ sum = 0;
+
+ for (node = 0; node < g->p.nr_nodes; node++) {
+ nr = nodes[node];
+ nr_min = min(nr, nr_min);
+ nr_max = max(nr, nr_max);
+ sum += nr;
+ }
+ BUG_ON(nr_min > nr_max);
+
+ BUG_ON(sum > g->p.nr_tasks);
+
+ if (0 && (sum < g->p.nr_tasks))
+ return;
+
+ /*
+ * Count the number of distinct process groups present
+ * on nodes - when we are converged this will decrease
+ * to g->p.nr_proc:
+ */
+ process_groups = 0;
+
+ for (node = 0; node < g->p.nr_nodes; node++) {
+ int processes = count_node_processes(node);
+
+ nr = nodes[node];
+ tprintf(" %2d/%-2d", nr, processes);
+
+ process_groups += processes;
+ }
+
+ distance = nr_max - nr_min;
+
+ tprintf(" [%2d/%-2d]", distance, process_groups);
+
+ tprintf(" l:%3d-%-3d (%3d)",
+ loops_done_min, loops_done_max, loops_done_max-loops_done_min);
+
+ if (loops_done_min && loops_done_max) {
+ double skew = 1.0 - (double)loops_done_min/loops_done_max;
+
+ tprintf(" [%4.1f%%]", skew * 100.0);
+ }
+
+ calc_convergence_compression(&strong);
+
+ if (strong && process_groups == g->p.nr_proc) {
+ if (!*convergence) {
+ *convergence = runtime_ns_max;
+ tprintf(" (%6.1fs converged)\n", *convergence/1e9);
+ if (g->p.measure_convergence) {
+ g->all_converged = true;
+ g->stop_work = true;
+ }
+ }
+ } else {
+ if (*convergence) {
+ tprintf(" (%6.1fs de-converged)", runtime_ns_max/1e9);
+ *convergence = 0;
+ }
+ tprintf("\n");
+ }
+}
+
+static void show_summary(double runtime_ns_max, int l, double *convergence)
+{
+ tprintf("\r # %5.1f%% [%.1f mins]",
+ (double)(l+1)/g->p.nr_loops*100.0, runtime_ns_max/1e9 / 60.0);
+
+ calc_convergence(runtime_ns_max, convergence);
+
+ if (g->p.show_details >= 0)
+ fflush(stdout);
+}
+
+static void *worker_thread(void *__tdata)
+{
+ struct thread_data *td = __tdata;
+ struct timeval start0, start, stop, diff;
+ int process_nr = td->process_nr;
+ int thread_nr = td->thread_nr;
+ unsigned long last_perturbance;
+ int task_nr = td->task_nr;
+ int details = g->p.show_details;
+ int first_task, last_task;
+ double convergence = 0;
+ u64 val = td->val;
+ double runtime_ns_max;
+ u8 *global_data;
+ u8 *process_data;
+ u8 *thread_data;
+ u64 bytes_done;
+ long work_done;
+ u32 l;
+
+ bind_to_cpumask(td->bind_cpumask);
+ bind_to_memnode(td->bind_node);
+
+ set_taskname("thread %d/%d", process_nr, thread_nr);
+
+ global_data = g->data;
+ process_data = td->process_data;
+ thread_data = setup_private_data(g->p.bytes_thread);
+
+ bytes_done = 0;
+
+ last_task = 0;
+ if (process_nr == g->p.nr_proc-1 && thread_nr == g->p.nr_threads-1)
+ last_task = 1;
+
+ first_task = 0;
+ if (process_nr == 0 && thread_nr == 0)
+ first_task = 1;
+
+ if (details >= 2) {
+ printf("# thread %2d / %2d global mem: %p, process mem: %p, thread mem: %p\n",
+ process_nr, thread_nr, global_data, process_data, thread_data);
+ }
+
+ if (g->p.serialize_startup) {
+ pthread_mutex_lock(&g->startup_mutex);
+ g->nr_tasks_started++;
+ pthread_mutex_unlock(&g->startup_mutex);
+
+ /* Here we will wait for the main process to start us all at once: */
+ pthread_mutex_lock(&g->start_work_mutex);
+ g->nr_tasks_working++;
+
+ /* Last one wake the main process: */
+ if (g->nr_tasks_working == g->p.nr_tasks)
+ pthread_mutex_unlock(&g->startup_done_mutex);
+
+ pthread_mutex_unlock(&g->start_work_mutex);
+ }
+
+ gettimeofday(&start0, NULL);
+
+ start = stop = start0;
+ last_perturbance = start.tv_sec;
+
+ for (l = 0; l < g->p.nr_loops; l++) {
+ start = stop;
+
+ if (g->stop_work)
+ break;
+
+ val += do_work(global_data, g->p.bytes_global, process_nr, g->p.nr_proc, l, val);
+ val += do_work(process_data, g->p.bytes_process, thread_nr, g->p.nr_threads, l, val);
+ val += do_work(thread_data, g->p.bytes_thread, 0, 1, l, val);
+
+ if (g->p.sleep_usecs) {
+ pthread_mutex_lock(td->process_lock);
+ usleep(g->p.sleep_usecs);
+ pthread_mutex_unlock(td->process_lock);
+ }
+ /*
+ * Amount of work to be done under a process-global lock:
+ */
+ if (g->p.bytes_process_locked) {
+ pthread_mutex_lock(td->process_lock);
+ val += do_work(process_data, g->p.bytes_process_locked, thread_nr, g->p.nr_threads, l, val);
+ pthread_mutex_unlock(td->process_lock);
+ }
+
+ work_done = g->p.bytes_global + g->p.bytes_process +
+ g->p.bytes_process_locked + g->p.bytes_thread;
+
+ update_curr_cpu(task_nr, work_done);
+ bytes_done += work_done;
+
+ if (details < 0 && !g->p.perturb_secs && !g->p.measure_convergence && !g->p.nr_secs)
+ continue;
+
+ td->loops_done = l;
+
+ gettimeofday(&stop, NULL);
+
+ /* Check whether our max runtime timed out: */
+ if (g->p.nr_secs) {
+ timersub(&stop, &start0, &diff);
+ if ((u32)diff.tv_sec >= g->p.nr_secs) {
+ g->stop_work = true;
+ break;
+ }
+ }
+
+ /* Update the summary at most once per second: */
+ if (start.tv_sec == stop.tv_sec)
+ continue;
+
+ /*
+ * Perturb the first task's equilibrium every g->p.perturb_secs seconds,
+ * by migrating to CPU#0:
+ */
+ if (first_task && g->p.perturb_secs && (int)(stop.tv_sec - last_perturbance) >= g->p.perturb_secs) {
+ cpu_set_t orig_mask;
+ int target_cpu;
+ int this_cpu;
+
+ last_perturbance = stop.tv_sec;
+
+ /*
+ * Depending on where we are running, move into
+ * the other half of the system, to create some
+ * real disturbance:
+ */
+ this_cpu = g->threads[task_nr].curr_cpu;
+ if (this_cpu < g->p.nr_cpus/2)
+ target_cpu = g->p.nr_cpus-1;
+ else
+ target_cpu = 0;
+
+ orig_mask = bind_to_cpu(target_cpu);
+
+ /* Here we are running on the target CPU already */
+ if (details >= 1)
+ printf(" (injecting perturbalance, moved to CPU#%d)\n", target_cpu);
+
+ bind_to_cpumask(orig_mask);
+ }
+
+ if (details >= 3) {
+ timersub(&stop, &start, &diff);
+ runtime_ns_max = diff.tv_sec * 1000000000;
+ runtime_ns_max += diff.tv_usec * 1000;
+
+ if (details >= 0) {
+ printf(" #%2d / %2d: %14.2lf nsecs/op [val: %016"PRIx64"]\n",
+ process_nr, thread_nr, runtime_ns_max / bytes_done, val);
+ }
+ fflush(stdout);
+ }
+ if (!last_task)
+ continue;
+
+ timersub(&stop, &start0, &diff);
+ runtime_ns_max = diff.tv_sec * 1000000000ULL;
+ runtime_ns_max += diff.tv_usec * 1000ULL;
+
+ show_summary(runtime_ns_max, l, &convergence);
+ }
+
+ gettimeofday(&stop, NULL);
+ timersub(&stop, &start0, &diff);
+ td->runtime_ns = diff.tv_sec * 1000000000ULL;
+ td->runtime_ns += diff.tv_usec * 1000ULL;
+
+ free_data(thread_data, g->p.bytes_thread);
+
+ pthread_mutex_lock(&g->stop_work_mutex);
+ g->bytes_done += bytes_done;
+ pthread_mutex_unlock(&g->stop_work_mutex);
+
+ return NULL;
+}
+
+/*
+ * A worker process starts a couple of threads:
+ */
+static void worker_process(int process_nr)
+{
+ pthread_mutex_t process_lock;
+ struct thread_data *td;
+ pthread_t *pthreads;
+ u8 *process_data;
+ int task_nr;
+ int ret;
+ int t;
+
+ pthread_mutex_init(&process_lock, NULL);
+ set_taskname("process %d", process_nr);
+
+ /*
+ * Pick up the memory policy and the CPU binding of our first thread,
+ * so that we initialize memory accordingly:
+ */
+ task_nr = process_nr*g->p.nr_threads;
+ td = g->threads + task_nr;
+
+ bind_to_memnode(td->bind_node);
+ bind_to_cpumask(td->bind_cpumask);
+
+ pthreads = zalloc(g->p.nr_threads * sizeof(pthread_t));
+ process_data = setup_private_data(g->p.bytes_process);
+
+ if (g->p.show_details >= 3) {
+ printf(" # process %2d global mem: %p, process mem: %p\n",
+ process_nr, g->data, process_data);
+ }
+
+ for (t = 0; t < g->p.nr_threads; t++) {
+ task_nr = process_nr*g->p.nr_threads + t;
+ td = g->threads + task_nr;
+
+ td->process_data = process_data;
+ td->process_nr = process_nr;
+ td->thread_nr = t;
+ td->task_nr = task_nr;
+ td->val = rand();
+ td->curr_cpu = -1;
+ td->process_lock = &process_lock;
+
+ ret = pthread_create(pthreads + t, NULL, worker_thread, td);
+ BUG_ON(ret);
+ }
+
+ for (t = 0; t < g->p.nr_threads; t++) {
+ ret = pthread_join(pthreads[t], NULL);
+ BUG_ON(ret);
+ }
+
+ free_data(process_data, g->p.bytes_process);
+ free(pthreads);
+}
+
+static void print_summary(void)
+{
+ if (g->p.show_details < 0)
+ return;
+
+ printf("\n ###\n");
+ printf(" # %d %s will execute (on %d nodes, %d CPUs):\n",
+ g->p.nr_tasks, g->p.nr_tasks == 1 ? "task" : "tasks", g->p.nr_nodes, g->p.nr_cpus);
+ printf(" # %5dx %5ldMB global shared mem operations\n",
+ g->p.nr_loops, g->p.bytes_global/1024/1024);
+ printf(" # %5dx %5ldMB process shared mem operations\n",
+ g->p.nr_loops, g->p.bytes_process/1024/1024);
+ printf(" # %5dx %5ldMB thread local mem operations\n",
+ g->p.nr_loops, g->p.bytes_thread/1024/1024);
+
+ printf(" ###\n");
+
+ printf("\n ###\n"); fflush(stdout);
+}
+
+static void init_thread_data(void)
+{
+ ssize_t size = sizeof(*g->threads)*g->p.nr_tasks;
+ int t;
+
+ g->threads = zalloc_shared_data(size);
+
+ for (t = 0; t < g->p.nr_tasks; t++) {
+ struct thread_data *td = g->threads + t;
+ int cpu;
+
+ /* Allow all nodes by default: */
+ td->bind_node = -1;
+
+ /* Allow all CPUs by default: */
+ CPU_ZERO(&td->bind_cpumask);
+ for (cpu = 0; cpu < g->p.nr_cpus; cpu++)
+ CPU_SET(cpu, &td->bind_cpumask);
+ }
+}
+
+static void deinit_thread_data(void)
+{
+ ssize_t size = sizeof(*g->threads)*g->p.nr_tasks;
+
+ free_data(g->threads, size);
+}
+
+static int init(void)
+{
+ g = (void *)alloc_data(sizeof(*g), MAP_SHARED, 1, 0, 0 /* THP */, 0);
+
+ /* Copy over options: */
+ g->p = p0;
+
+ g->p.nr_cpus = numa_num_configured_cpus();
+
+ g->p.nr_nodes = numa_max_node() + 1;
+
+ /* char array in count_process_nodes(): */
+ BUG_ON(g->p.nr_nodes > MAX_NR_NODES || g->p.nr_nodes < 0);
+
+ if (g->p.show_quiet && !g->p.show_details)
+ g->p.show_details = -1;
+
+ /* Some memory should be specified: */
+ if (!g->p.mb_global_str && !g->p.mb_proc_str && !g->p.mb_thread_str)
+ return -1;
+
+ if (g->p.mb_global_str) {
+ g->p.mb_global = atof(g->p.mb_global_str);
+ BUG_ON(g->p.mb_global < 0);
+ }
+
+ if (g->p.mb_proc_str) {
+ g->p.mb_proc = atof(g->p.mb_proc_str);
+ BUG_ON(g->p.mb_proc < 0);
+ }
+
+ if (g->p.mb_proc_locked_str) {
+ g->p.mb_proc_locked = atof(g->p.mb_proc_locked_str);
+ BUG_ON(g->p.mb_proc_locked < 0);
+ BUG_ON(g->p.mb_proc_locked > g->p.mb_proc);
+ }
+
+ if (g->p.mb_thread_str) {
+ g->p.mb_thread = atof(g->p.mb_thread_str);
+ BUG_ON(g->p.mb_thread < 0);
+ }
+
+ BUG_ON(g->p.nr_threads <= 0);
+ BUG_ON(g->p.nr_proc <= 0);
+
+ g->p.nr_tasks = g->p.nr_proc*g->p.nr_threads;
+
+ g->p.bytes_global = g->p.mb_global *1024L*1024L;
+ g->p.bytes_process = g->p.mb_proc *1024L*1024L;
+ g->p.bytes_process_locked = g->p.mb_proc_locked *1024L*1024L;
+ g->p.bytes_thread = g->p.mb_thread *1024L*1024L;
+
+ g->data = setup_shared_data(g->p.bytes_global);
+
+ /* Startup serialization: */
+ init_global_mutex(&g->start_work_mutex);
+ init_global_mutex(&g->startup_mutex);
+ init_global_mutex(&g->startup_done_mutex);
+ init_global_mutex(&g->stop_work_mutex);
+
+ init_thread_data();
+
+ tprintf("#\n");
+ if (parse_setup_cpu_list() || parse_setup_node_list())
+ return -1;
+ tprintf("#\n");
+
+ print_summary();
+
+ return 0;
+}
+
+static void deinit(void)
+{
+ free_data(g->data, g->p.bytes_global);
+ g->data = NULL;
+
+ deinit_thread_data();
+
+ free_data(g, sizeof(*g));
+ g = NULL;
+}
+
+/*
+ * Print a short or long result, depending on the verbosity setting:
+ */
+static void print_res(const char *name, double val,
+ const char *txt_unit, const char *txt_short, const char *txt_long)
+{
+ if (!name)
+ name = "main,";
+
+ if (g->p.show_quiet)
+ printf(" %-30s %15.3f, %-15s %s\n", name, val, txt_unit, txt_short);
+ else
+ printf(" %14.3f %s\n", val, txt_long);
+}
+
+static int __bench_numa(const char *name)
+{
+ struct timeval start, stop, diff;
+ u64 runtime_ns_min, runtime_ns_sum;
+ pid_t *pids, pid, wpid;
+ double delta_runtime;
+ double runtime_avg;
+ double runtime_sec_max;
+ double runtime_sec_min;
+ int wait_stat;
+ double bytes;
+ int i, t;
+
+ if (init())
+ return -1;
+
+ pids = zalloc(g->p.nr_proc * sizeof(*pids));
+ pid = -1;
+
+ /* All threads try to acquire it, this way we can wait for them to start up: */
+ pthread_mutex_lock(&g->start_work_mutex);
+
+ if (g->p.serialize_startup) {
+ tprintf(" #\n");
+ tprintf(" # Startup synchronization: ..."); fflush(stdout);
+ }
+
+ gettimeofday(&start, NULL);
+
+ for (i = 0; i < g->p.nr_proc; i++) {
+ pid = fork();
+ dprintf(" # process %2d: PID %d\n", i, pid);
+
+ BUG_ON(pid < 0);
+ if (!pid) {
+ /* Child process: */
+ worker_process(i);
+
+ exit(0);
+ }
+ pids[i] = pid;
+
+ }
+ /* Wait for all the threads to start up: */
+ while (g->nr_tasks_started != g->p.nr_tasks)
+ usleep(1000);
+
+ BUG_ON(g->nr_tasks_started != g->p.nr_tasks);
+
+ if (g->p.serialize_startup) {
+ double startup_sec;
+
+ pthread_mutex_lock(&g->startup_done_mutex);
+
+ /* This will start all threads: */
+ pthread_mutex_unlock(&g->start_work_mutex);
+
+ /* This mutex is locked - the last started thread will wake us: */
+ pthread_mutex_lock(&g->startup_done_mutex);
+
+ gettimeofday(&stop, NULL);
+
+ timersub(&stop, &start, &diff);
+
+ startup_sec = diff.tv_sec * 1000000000.0;
+ startup_sec += diff.tv_usec * 1000.0;
+ startup_sec /= 1e9;
+
+ tprintf(" threads initialized in %.6f seconds.\n", startup_sec);
+ tprintf(" #\n");
+
+ start = stop;
+ pthread_mutex_unlock(&g->startup_done_mutex);
+ } else {
+ gettimeofday(&start, NULL);
+ }
+
+ /* Parent process: */
+
+
+ for (i = 0; i < g->p.nr_proc; i++) {
+ wpid = waitpid(pids[i], &wait_stat, 0);
+ BUG_ON(wpid < 0);
+ BUG_ON(!WIFEXITED(wait_stat));
+
+ }
+
+ runtime_ns_sum = 0;
+ runtime_ns_min = -1LL;
+
+ for (t = 0; t < g->p.nr_tasks; t++) {
+ u64 thread_runtime_ns = g->threads[t].runtime_ns;
+
+ runtime_ns_sum += thread_runtime_ns;
+ runtime_ns_min = min(thread_runtime_ns, runtime_ns_min);
+ }
+
+ gettimeofday(&stop, NULL);
+ timersub(&stop, &start, &diff);
+
+ BUG_ON(bench_format != BENCH_FORMAT_DEFAULT);
+
+ tprintf("\n ###\n");
+ tprintf("\n");
+
+ runtime_sec_max = diff.tv_sec * 1000000000.0;
+ runtime_sec_max += diff.tv_usec * 1000.0;
+ runtime_sec_max /= 1e9;
+
+ runtime_sec_min = runtime_ns_min/1e9;
+
+ bytes = g->bytes_done;
+ runtime_avg = (double)runtime_ns_sum / g->p.nr_tasks / 1e9;
+
+ if (g->p.measure_convergence) {
+ print_res(name, runtime_sec_max,
+ "secs,", "NUMA-convergence-latency", "secs latency to NUMA-converge");
+ }
+
+ print_res(name, runtime_sec_max,
+ "secs,", "runtime-max/thread", "secs slowest (max) thread-runtime");
+
+ print_res(name, runtime_sec_min,
+ "secs,", "runtime-min/thread", "secs fastest (min) thread-runtime");
+
+ print_res(name, runtime_avg,
+ "secs,", "runtime-avg/thread", "secs average thread-runtime");
+
+ delta_runtime = (runtime_sec_max - runtime_sec_min)/2.0;
+ print_res(name, delta_runtime / runtime_sec_max * 100.0,
+ "%,", "spread-runtime/thread", "% difference between max/avg runtime");
+
+ print_res(name, bytes / g->p.nr_tasks / 1e9,
+ "GB,", "data/thread", "GB data processed, per thread");
+
+ print_res(name, bytes / 1e9,
+ "GB,", "data-total", "GB data processed, total");
+
+ print_res(name, runtime_sec_max * 1e9 / (bytes / g->p.nr_tasks),
+ "nsecs,", "runtime/byte/thread","nsecs/byte/thread runtime");
+
+ print_res(name, bytes / g->p.nr_tasks / 1e9 / runtime_sec_max,
+ "GB/sec,", "thread-speed", "GB/sec/thread speed");
+
+ print_res(name, bytes / runtime_sec_max / 1e9,
+ "GB/sec,", "total-speed", "GB/sec total speed");
+
+ free(pids);
+
+ deinit();
+
+ return 0;
+}
+
+#define MAX_ARGS 50
+
+static int command_size(const char **argv)
+{
+ int size = 0;
+
+ while (*argv) {
+ size++;
+ argv++;
+ }
+
+ BUG_ON(size >= MAX_ARGS);
+
+ return size;
+}
+
+static void init_params(struct params *p, const char *name, int argc, const char **argv)
+{
+ int i;
+
+ printf("\n # Running %s \"perf bench numa", name);
+
+ for (i = 0; i < argc; i++)
+ printf(" %s", argv[i]);
+
+ printf("\"\n");
+
+ memset(p, 0, sizeof(*p));
+
+ /* Initialize nonzero defaults: */
+
+ p->serialize_startup = 1;
+ p->data_reads = true;
+ p->data_writes = true;
+ p->data_backwards = true;
+ p->data_rand_walk = true;
+ p->nr_loops = -1;
+ p->init_random = true;
+ p->mb_global_str = "1";
+ p->nr_proc = 1;
+ p->nr_threads = 1;
+ p->nr_secs = 5;
+ p->run_all = argc == 1;
+}
+
+static int run_bench_numa(const char *name, const char **argv)
+{
+ int argc = command_size(argv);
+
+ init_params(&p0, name, argc, argv);
+ argc = parse_options(argc, argv, options, bench_numa_usage, 0);
+ if (argc)
+ goto err;
+
+ if (__bench_numa(name))
+ goto err;
+
+ return 0;
+
+err:
+ return -1;
+}
+
+#define OPT_BW_RAM "-s", "20", "-zZq", "--thp", " 1", "--no-data_rand_walk"
+#define OPT_BW_RAM_NOTHP OPT_BW_RAM, "--thp", "-1"
+
+#define OPT_CONV "-s", "100", "-zZ0qcm", "--thp", " 1"
+#define OPT_CONV_NOTHP OPT_CONV, "--thp", "-1"
+
+#define OPT_BW "-s", "20", "-zZ0q", "--thp", " 1"
+#define OPT_BW_NOTHP OPT_BW, "--thp", "-1"
+
+/*
+ * The built-in test-suite executed by "perf bench numa -a".
+ *
+ * (A minimum of 4 nodes and 16 GB of RAM is recommended.)
+ */
+static const char *tests[][MAX_ARGS] = {
+ /* Basic single-stream NUMA bandwidth measurements: */
+ { "RAM-bw-local,", "mem", "-p", "1", "-t", "1", "-P", "1024",
+ "-C" , "0", "-M", "0", OPT_BW_RAM },
+ { "RAM-bw-local-NOTHP,",
+ "mem", "-p", "1", "-t", "1", "-P", "1024",
+ "-C" , "0", "-M", "0", OPT_BW_RAM_NOTHP },
+ { "RAM-bw-remote,", "mem", "-p", "1", "-t", "1", "-P", "1024",
+ "-C" , "0", "-M", "1", OPT_BW_RAM },
+
+ /* 2-stream NUMA bandwidth measurements: */
+ { "RAM-bw-local-2x,", "mem", "-p", "2", "-t", "1", "-P", "1024",
+ "-C", "0,2", "-M", "0x2", OPT_BW_RAM },
+ { "RAM-bw-remote-2x,", "mem", "-p", "2", "-t", "1", "-P", "1024",
+ "-C", "0,2", "-M", "1x2", OPT_BW_RAM },
+
+ /* Cross-stream NUMA bandwidth measurement: */
+ { "RAM-bw-cross,", "mem", "-p", "2", "-t", "1", "-P", "1024",
+ "-C", "0,8", "-M", "1,0", OPT_BW_RAM },
+
+ /* Convergence latency measurements: */
+ { " 1x3-convergence,", "mem", "-p", "1", "-t", "3", "-P", "512", OPT_CONV },
+ { " 1x4-convergence,", "mem", "-p", "1", "-t", "4", "-P", "512", OPT_CONV },
+ { " 1x6-convergence,", "mem", "-p", "1", "-t", "6", "-P", "1020", OPT_CONV },
+ { " 2x3-convergence,", "mem", "-p", "3", "-t", "3", "-P", "1020", OPT_CONV },
+ { " 3x3-convergence,", "mem", "-p", "3", "-t", "3", "-P", "1020", OPT_CONV },
+ { " 4x4-convergence,", "mem", "-p", "4", "-t", "4", "-P", "512", OPT_CONV },
+ { " 4x4-convergence-NOTHP,",
+ "mem", "-p", "4", "-t", "4", "-P", "512", OPT_CONV_NOTHP },
+ { " 4x6-convergence,", "mem", "-p", "4", "-t", "6", "-P", "1020", OPT_CONV },
+ { " 4x8-convergence,", "mem", "-p", "4", "-t", "8", "-P", "512", OPT_CONV },
+ { " 8x4-convergence,", "mem", "-p", "8", "-t", "4", "-P", "512", OPT_CONV },
+ { " 8x4-convergence-NOTHP,",
+ "mem", "-p", "8", "-t", "4", "-P", "512", OPT_CONV_NOTHP },
+ { " 3x1-convergence,", "mem", "-p", "3", "-t", "1", "-P", "512", OPT_CONV },
+ { " 4x1-convergence,", "mem", "-p", "4", "-t", "1", "-P", "512", OPT_CONV },
+ { " 8x1-convergence,", "mem", "-p", "8", "-t", "1", "-P", "512", OPT_CONV },
+ { "16x1-convergence,", "mem", "-p", "16", "-t", "1", "-P", "256", OPT_CONV },
+ { "32x1-convergence,", "mem", "-p", "32", "-t", "1", "-P", "128", OPT_CONV },
+
+ /* Various NUMA process/thread layout bandwidth measurements: */
+ { " 2x1-bw-process,", "mem", "-p", "2", "-t", "1", "-P", "1024", OPT_BW },
+ { " 3x1-bw-process,", "mem", "-p", "3", "-t", "1", "-P", "1024", OPT_BW },
+ { " 4x1-bw-process,", "mem", "-p", "4", "-t", "1", "-P", "1024", OPT_BW },
+ { " 8x1-bw-process,", "mem", "-p", "8", "-t", "1", "-P", " 512", OPT_BW },
+ { " 8x1-bw-process-NOTHP,",
+ "mem", "-p", "8", "-t", "1", "-P", " 512", OPT_BW_NOTHP },
+ { "16x1-bw-process,", "mem", "-p", "16", "-t", "1", "-P", "256", OPT_BW },
+
+ { " 4x1-bw-thread,", "mem", "-p", "1", "-t", "4", "-T", "256", OPT_BW },
+ { " 8x1-bw-thread,", "mem", "-p", "1", "-t", "8", "-T", "256", OPT_BW },
+ { "16x1-bw-thread,", "mem", "-p", "1", "-t", "16", "-T", "128", OPT_BW },
+ { "32x1-bw-thread,", "mem", "-p", "1", "-t", "32", "-T", "64", OPT_BW },
+
+ { " 2x3-bw-thread,", "mem", "-p", "2", "-t", "3", "-P", "512", OPT_BW },
+ { " 4x4-bw-thread,", "mem", "-p", "4", "-t", "4", "-P", "512", OPT_BW },
+ { " 4x6-bw-thread,", "mem", "-p", "4", "-t", "6", "-P", "512", OPT_BW },
+ { " 4x8-bw-thread,", "mem", "-p", "4", "-t", "8", "-P", "512", OPT_BW },
+ { " 4x8-bw-thread-NOTHP,",
+ "mem", "-p", "4", "-t", "8", "-P", "512", OPT_BW_NOTHP },
+ { " 3x3-bw-thread,", "mem", "-p", "3", "-t", "3", "-P", "512", OPT_BW },
+ { " 5x5-bw-thread,", "mem", "-p", "5", "-t", "5", "-P", "512", OPT_BW },
+
+ { "2x16-bw-thread,", "mem", "-p", "2", "-t", "16", "-P", "512", OPT_BW },
+ { "1x32-bw-thread,", "mem", "-p", "1", "-t", "32", "-P", "2048", OPT_BW },
+
+ { "numa02-bw,", "mem", "-p", "1", "-t", "32", "-T", "32", OPT_BW },
+ { "numa02-bw-NOTHP,", "mem", "-p", "1", "-t", "32", "-T", "32", OPT_BW_NOTHP },
+ { "numa01-bw-thread,", "mem", "-p", "2", "-t", "16", "-T", "192", OPT_BW },
+ { "numa01-bw-thread-NOTHP,",
+ "mem", "-p", "2", "-t", "16", "-T", "192", OPT_BW_NOTHP },
+};
+
+static int bench_all(void)
+{
+ int nr = ARRAY_SIZE(tests);
+ int ret;
+ int i;
+
+ ret = system("echo ' #'; echo ' # Running test on: '$(uname -a); echo ' #'");
+ BUG_ON(ret < 0);
+
+ for (i = 0; i < nr; i++) {
+ run_bench_numa(tests[i][0], tests[i] + 1);
+ }
+
+ printf("\n");
+
+ return 0;
+}
+
+int bench_numa(int argc, const char **argv, const char *prefix __maybe_unused)
+{
+ init_params(&p0, "main,", argc, argv);
+ argc = parse_options(argc, argv, options, bench_numa_usage, 0);
+ if (argc)
+ goto err;
+
+ if (p0.run_all)
+ return bench_all();
+
+ if (__bench_numa(NULL))
+ goto err;
+
+ return 0;
+
+err:
+ usage_with_options(numa_usage, options);
+ return -1;
+}
diff --git a/tools/perf/bench/sched-pipe.c b/tools/perf/bench/sched-pipe.c
index 69cfba8d4c6..07a8d7646a1 100644
--- a/tools/perf/bench/sched-pipe.c
+++ b/tools/perf/bench/sched-pipe.c
@@ -7,9 +7,7 @@
* Based on pipe-test-1m.c by Ingo Molnar <mingo@redhat.com>
* http://people.redhat.com/mingo/cfs-scheduler/tools/pipe-test-1m.c
* Ported to perf by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
- *
*/
-
#include "../perf.h"
#include "../util/util.h"
#include "../util/parse-options.h"
@@ -28,12 +26,24 @@
#include <sys/time.h>
#include <sys/types.h>
+#include <pthread.h>
+
+struct thread_data {
+ int nr;
+ int pipe_read;
+ int pipe_write;
+ pthread_t pthread;
+};
+
#define LOOPS_DEFAULT 1000000
-static int loops = LOOPS_DEFAULT;
+static int loops = LOOPS_DEFAULT;
+
+/* Use processes by default: */
+static bool threaded;
static const struct option options[] = {
- OPT_INTEGER('l', "loop", &loops,
- "Specify number of loops"),
+ OPT_INTEGER('l', "loop", &loops, "Specify number of loops"),
+ OPT_BOOLEAN('T', "threaded", &threaded, "Specify threads/process based task setup"),
OPT_END()
};
@@ -42,13 +52,37 @@ static const char * const bench_sched_pipe_usage[] = {
NULL
};
-int bench_sched_pipe(int argc, const char **argv,
- const char *prefix __maybe_unused)
+static void *worker_thread(void *__tdata)
{
- int pipe_1[2], pipe_2[2];
+ struct thread_data *td = __tdata;
int m = 0, i;
+ int ret;
+
+ for (i = 0; i < loops; i++) {
+ if (!td->nr) {
+ ret = read(td->pipe_read, &m, sizeof(int));
+ BUG_ON(ret != sizeof(int));
+ ret = write(td->pipe_write, &m, sizeof(int));
+ BUG_ON(ret != sizeof(int));
+ } else {
+ ret = write(td->pipe_write, &m, sizeof(int));
+ BUG_ON(ret != sizeof(int));
+ ret = read(td->pipe_read, &m, sizeof(int));
+ BUG_ON(ret != sizeof(int));
+ }
+ }
+
+ return NULL;
+}
+
+int bench_sched_pipe(int argc, const char **argv, const char *prefix __maybe_unused)
+{
+ struct thread_data threads[2], *td;
+ int pipe_1[2], pipe_2[2];
struct timeval start, stop, diff;
unsigned long long result_usec = 0;
+ int nr_threads = 2;
+ int t;
/*
* why does "ret" exist?
@@ -58,43 +92,66 @@ int bench_sched_pipe(int argc, const char **argv,
int __maybe_unused ret, wait_stat;
pid_t pid, retpid __maybe_unused;
- argc = parse_options(argc, argv, options,
- bench_sched_pipe_usage, 0);
+ argc = parse_options(argc, argv, options, bench_sched_pipe_usage, 0);
BUG_ON(pipe(pipe_1));
BUG_ON(pipe(pipe_2));
- pid = fork();
- assert(pid >= 0);
-
gettimeofday(&start, NULL);
- if (!pid) {
- for (i = 0; i < loops; i++) {
- ret = read(pipe_1[0], &m, sizeof(int));
- ret = write(pipe_2[1], &m, sizeof(int));
- }
- } else {
- for (i = 0; i < loops; i++) {
- ret = write(pipe_1[1], &m, sizeof(int));
- ret = read(pipe_2[0], &m, sizeof(int));
+ for (t = 0; t < nr_threads; t++) {
+ td = threads + t;
+
+ td->nr = t;
+
+ if (t == 0) {
+ td->pipe_read = pipe_1[0];
+ td->pipe_write = pipe_2[1];
+ } else {
+ td->pipe_write = pipe_1[1];
+ td->pipe_read = pipe_2[0];
}
}
- gettimeofday(&stop, NULL);
- timersub(&stop, &start, &diff);
- if (pid) {
+ if (threaded) {
+
+ for (t = 0; t < nr_threads; t++) {
+ td = threads + t;
+
+ ret = pthread_create(&td->pthread, NULL, worker_thread, td);
+ BUG_ON(ret);
+ }
+
+ for (t = 0; t < nr_threads; t++) {
+ td = threads + t;
+
+ ret = pthread_join(td->pthread, NULL);
+ BUG_ON(ret);
+ }
+
+ } else {
+ pid = fork();
+ assert(pid >= 0);
+
+ if (!pid) {
+ worker_thread(threads + 0);
+ exit(0);
+ } else {
+ worker_thread(threads + 1);
+ }
+
retpid = waitpid(pid, &wait_stat, 0);
assert((retpid == pid) && WIFEXITED(wait_stat));
- } else {
- exit(0);
}
+ gettimeofday(&stop, NULL);
+ timersub(&stop, &start, &diff);
+
switch (bench_format) {
case BENCH_FORMAT_DEFAULT:
- printf("# Executed %d pipe operations between two tasks\n\n",
- loops);
+ printf("# Executed %d pipe operations between two %s\n\n",
+ loops, threaded ? "threads" : "processes");
result_usec = diff.tv_sec * 1000000;
result_usec += diff.tv_usec;
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 9ea38540b87..1ec429fef2b 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -28,22 +28,25 @@
#include "util/hist.h"
#include "util/session.h"
#include "util/tool.h"
+#include "util/data.h"
+#include "arch/common.h"
+#include <dlfcn.h>
#include <linux/bitmap.h>
struct perf_annotate {
struct perf_tool tool;
- char const *input_name;
- bool force, use_tui, use_stdio;
+ bool force, use_tui, use_stdio, use_gtk;
bool full_paths;
bool print_line;
+ bool skip_missing;
const char *sym_hist_filter;
const char *cpu_list;
DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
};
static int perf_evsel__add_sample(struct perf_evsel *evsel,
- struct perf_sample *sample,
+ struct perf_sample *sample __maybe_unused,
struct addr_location *al,
struct perf_annotate *ann)
{
@@ -62,21 +65,13 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel,
return 0;
}
- he = __hists__add_entry(&evsel->hists, al, NULL, 1);
+ he = __hists__add_entry(&evsel->hists, al, NULL, NULL, NULL, 1, 1, 0,
+ true);
if (he == NULL)
return -ENOMEM;
- ret = 0;
- if (he->ms.sym != NULL) {
- struct annotation *notes = symbol__annotation(he->ms.sym);
- if (notes->src == NULL && symbol__alloc_hist(he->ms.sym) < 0)
- return -ENOMEM;
-
- ret = hist_entry__inc_addr_samples(he, evsel->idx, al->addr);
- }
-
- evsel->hists.stats.total_period += sample->period;
- hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
+ ret = hist_entry__inc_addr_samples(he, evsel->idx, al->addr);
+ hists__inc_nr_samples(&evsel->hists, true);
return ret;
}
@@ -89,8 +84,7 @@ static int process_sample_event(struct perf_tool *tool,
struct perf_annotate *ann = container_of(tool, struct perf_annotate, tool);
struct addr_location al;
- if (perf_event__preprocess_sample(event, machine, &al, sample,
- symbol__annotate_init) < 0) {
+ if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) {
pr_warning("problem processing %d event, skipping it.\n",
event->header.type);
return -1;
@@ -108,17 +102,19 @@ static int process_sample_event(struct perf_tool *tool,
return 0;
}
-static int hist_entry__tty_annotate(struct hist_entry *he, int evidx,
+static int hist_entry__tty_annotate(struct hist_entry *he,
+ struct perf_evsel *evsel,
struct perf_annotate *ann)
{
- return symbol__tty_annotate(he->ms.sym, he->ms.map, evidx,
+ return symbol__tty_annotate(he->ms.sym, he->ms.map, evsel,
ann->print_line, ann->full_paths, 0, 0);
}
-static void hists__find_annotations(struct hists *self, int evidx,
+static void hists__find_annotations(struct hists *hists,
+ struct perf_evsel *evsel,
struct perf_annotate *ann)
{
- struct rb_node *nd = rb_first(&self->entries), *next;
+ struct rb_node *nd = rb_first(&hists->entries), *next;
int key = K_RIGHT;
while (nd) {
@@ -138,9 +134,32 @@ find_next:
continue;
}
- if (use_browser > 0) {
- key = hist_entry__tui_annotate(he, evidx, NULL, NULL, 0);
+ if (use_browser == 2) {
+ int ret;
+ int (*annotate)(struct hist_entry *he,
+ struct perf_evsel *evsel,
+ struct hist_browser_timer *hbt);
+
+ annotate = dlsym(perf_gtk_handle,
+ "hist_entry__gtk_annotate");
+ if (annotate == NULL) {
+ ui__error("GTK browser not found!\n");
+ return;
+ }
+
+ ret = annotate(he, evsel, NULL);
+ if (!ret || !ann->skip_missing)
+ return;
+
+ /* skip missing symbols */
+ nd = rb_next(nd);
+ } else if (use_browser == 1) {
+ key = hist_entry__tui_annotate(he, evsel, NULL);
switch (key) {
+ case -1:
+ if (!ann->skip_missing)
+ return;
+ /* fall through */
case K_RIGHT:
next = rb_next(nd);
break;
@@ -154,15 +173,14 @@ find_next:
if (next != NULL)
nd = next;
} else {
- hist_entry__tty_annotate(he, evidx, ann);
+ hist_entry__tty_annotate(he, evsel, ann);
nd = rb_next(nd);
/*
* Since we have a hist_entry per IP for the same
* symbol, free he->ms.sym->src to signal we already
* processed this symbol.
*/
- free(notes->src);
- notes->src = NULL;
+ zfree(&notes->src);
}
}
}
@@ -173,12 +191,18 @@ static int __cmd_annotate(struct perf_annotate *ann)
struct perf_session *session;
struct perf_evsel *pos;
u64 total_nr_samples;
+ struct perf_data_file file = {
+ .path = input_name,
+ .mode = PERF_DATA_MODE_READ,
+ .force = ann->force,
+ };
- session = perf_session__new(ann->input_name, O_RDONLY,
- ann->force, false, &ann->tool);
+ session = perf_session__new(&file, false, &ann->tool);
if (session == NULL)
return -ENOMEM;
+ machines__set_symbol_filter(&session->machines, symbol__annotate_init);
+
if (ann->cpu_list) {
ret = perf_session__cpu_bitmap(session, ann->cpu_list,
ann->cpu_bitmap);
@@ -186,6 +210,12 @@ static int __cmd_annotate(struct perf_annotate *ann)
goto out_delete;
}
+ if (!objdump_path) {
+ ret = perf_session_env__lookup_objdump(&session->header.env);
+ if (ret)
+ goto out_delete;
+ }
+
ret = perf_session__process_events(session, &ann->tool);
if (ret)
goto out_delete;
@@ -202,22 +232,40 @@ static int __cmd_annotate(struct perf_annotate *ann)
perf_session__fprintf_dsos(session, stdout);
total_nr_samples = 0;
- list_for_each_entry(pos, &session->evlist->entries, node) {
+ evlist__for_each(session->evlist, pos) {
struct hists *hists = &pos->hists;
u32 nr_samples = hists->stats.nr_events[PERF_RECORD_SAMPLE];
if (nr_samples > 0) {
total_nr_samples += nr_samples;
- hists__collapse_resort(hists);
+ hists__collapse_resort(hists, NULL);
hists__output_resort(hists);
- hists__find_annotations(hists, pos->idx, ann);
+
+ if (symbol_conf.event_group &&
+ !perf_evsel__is_group_leader(pos))
+ continue;
+
+ hists__find_annotations(hists, pos, ann);
}
}
if (total_nr_samples == 0) {
- ui__error("The %s file has no samples!\n", session->filename);
+ ui__error("The %s file has no samples!\n", file.path);
goto out_delete;
}
+
+ if (use_browser == 2) {
+ void (*show_annotations)(void);
+
+ show_annotations = dlsym(perf_gtk_handle,
+ "perf_gtk__show_annotations");
+ if (show_annotations == NULL) {
+ ui__error("GTK browser not found!\n");
+ goto out_delete;
+ }
+ show_annotations();
+ }
+
out_delete:
/*
* Speed up the exit process, for large files this can
@@ -245,14 +293,16 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused)
.tool = {
.sample = process_sample_event,
.mmap = perf_event__process_mmap,
+ .mmap2 = perf_event__process_mmap2,
.comm = perf_event__process_comm,
- .fork = perf_event__process_task,
+ .exit = perf_event__process_exit,
+ .fork = perf_event__process_fork,
.ordered_samples = true,
.ordering_requires_timestamps = true,
},
};
const struct option options[] = {
- OPT_STRING('i', "input", &annotate.input_name, "file",
+ OPT_STRING('i', "input", &input_name, "file",
"input file name"),
OPT_STRING('d', "dsos", &symbol_conf.dso_list_str, "dso[,dso...]",
"only consider symbols in these dsos"),
@@ -263,6 +313,7 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused)
"be more verbose (show symbol address, etc)"),
OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
"dump raw trace in ASCII"),
+ OPT_BOOLEAN(0, "gtk", &annotate.use_gtk, "Use the GTK interface"),
OPT_BOOLEAN(0, "tui", &annotate.use_tui, "Use the TUI interface"),
OPT_BOOLEAN(0, "stdio", &annotate.use_stdio, "Use the stdio interface"),
OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
@@ -273,6 +324,8 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused)
"print matching source lines (may be slow)"),
OPT_BOOLEAN('P', "full-paths", &annotate.full_paths,
"Don't shorten the displayed pathnames"),
+ OPT_BOOLEAN(0, "skip-missing", &annotate.skip_missing,
+ "Skip symbols that cannot be annotated"),
OPT_STRING('C', "cpu", &annotate.cpu_list, "cpu", "list of cpus to profile"),
OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",
"Look for files with symbols relative to this directory"),
@@ -284,6 +337,8 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused)
"Specify disassembler style (e.g. -M intel for intel syntax)"),
OPT_STRING(0, "objdump", &objdump_path, "path",
"objdump binary to use for disassembly and annotations"),
+ OPT_BOOLEAN(0, "group", &symbol_conf.event_group,
+ "Show event group information together"),
OPT_END()
};
@@ -293,6 +348,8 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused)
use_browser = 0;
else if (annotate.use_tui)
use_browser = 1;
+ else if (annotate.use_gtk)
+ use_browser = 2;
setup_browser(true);
@@ -302,11 +359,12 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused)
if (symbol__init() < 0)
return -1;
- setup_sorting(annotate_usage, options);
+ if (setup_sorting() < 0)
+ usage_with_options(annotate_usage, options);
if (argc) {
/*
- * Special case: if there's an argument left then assume tha
+ * Special case: if there's an argument left then assume that
* it's a symbol filter:
*/
if (argc > 1)
diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
index cae9a5fd2ec..1e6e7771054 100644
--- a/tools/perf/builtin-bench.c
+++ b/tools/perf/builtin-bench.c
@@ -1,21 +1,19 @@
/*
- *
* builtin-bench.c
*
- * General benchmarking subsystem provided by perf
+ * General benchmarking collections provided by perf
*
* Copyright (C) 2009, Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
- *
*/
/*
+ * Available benchmark collection list:
*
- * Available subsystem list:
- * sched ... scheduler and IPC mechanism
+ * sched ... scheduler and IPC performance
* mem ... memory access performance
- *
+ * numa ... NUMA scheduling and MM performance
+ * futex ... Futex performance
*/
-
#include "perf.h"
#include "util/util.h"
#include "util/parse-options.h"
@@ -25,95 +23,101 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <sys/prctl.h>
+
+typedef int (*bench_fn_t)(int argc, const char **argv, const char *prefix);
+
+struct bench {
+ const char *name;
+ const char *summary;
+ bench_fn_t fn;
+};
+
+#ifdef HAVE_LIBNUMA_SUPPORT
+static struct bench numa_benchmarks[] = {
+ { "mem", "Benchmark for NUMA workloads", bench_numa },
+ { "all", "Test all NUMA benchmarks", NULL },
+ { NULL, NULL, NULL }
+};
+#endif
-struct bench_suite {
- const char *name;
- const char *summary;
- int (*fn)(int, const char **, const char *);
+static struct bench sched_benchmarks[] = {
+ { "messaging", "Benchmark for scheduling and IPC", bench_sched_messaging },
+ { "pipe", "Benchmark for pipe() between two processes", bench_sched_pipe },
+ { "all", "Test all scheduler benchmarks", NULL },
+ { NULL, NULL, NULL }
};
- \
-/* sentinel: easy for help */
-#define suite_all { "all", "Test all benchmark suites", NULL }
-
-static struct bench_suite sched_suites[] = {
- { "messaging",
- "Benchmark for scheduler and IPC mechanisms",
- bench_sched_messaging },
- { "pipe",
- "Flood of communication over pipe() between two processes",
- bench_sched_pipe },
- suite_all,
- { NULL,
- NULL,
- NULL }
+
+static struct bench mem_benchmarks[] = {
+ { "memcpy", "Benchmark for memcpy()", bench_mem_memcpy },
+ { "memset", "Benchmark for memset() tests", bench_mem_memset },
+ { "all", "Test all memory benchmarks", NULL },
+ { NULL, NULL, NULL }
};
-static struct bench_suite mem_suites[] = {
- { "memcpy",
- "Simple memory copy in various ways",
- bench_mem_memcpy },
- { "memset",
- "Simple memory set in various ways",
- bench_mem_memset },
- suite_all,
- { NULL,
- NULL,
- NULL }
+static struct bench futex_benchmarks[] = {
+ { "hash", "Benchmark for futex hash table", bench_futex_hash },
+ { "wake", "Benchmark for futex wake calls", bench_futex_wake },
+ { "requeue", "Benchmark for futex requeue calls", bench_futex_requeue },
+ { "all", "Test all futex benchmarks", NULL },
+ { NULL, NULL, NULL }
};
-struct bench_subsys {
- const char *name;
- const char *summary;
- struct bench_suite *suites;
+struct collection {
+ const char *name;
+ const char *summary;
+ struct bench *benchmarks;
};
-static struct bench_subsys subsystems[] = {
- { "sched",
- "scheduler and IPC mechanism",
- sched_suites },
- { "mem",
- "memory access performance",
- mem_suites },
- { "all", /* sentinel: easy for help */
- "all benchmark subsystem",
- NULL },
- { NULL,
- NULL,
- NULL }
+static struct collection collections[] = {
+ { "sched", "Scheduler and IPC benchmarks", sched_benchmarks },
+ { "mem", "Memory access benchmarks", mem_benchmarks },
+#ifdef HAVE_LIBNUMA_SUPPORT
+ { "numa", "NUMA scheduling and MM benchmarks", numa_benchmarks },
+#endif
+ {"futex", "Futex stressing benchmarks", futex_benchmarks },
+ { "all", "All benchmarks", NULL },
+ { NULL, NULL, NULL }
};
-static void dump_suites(int subsys_index)
+/* Iterate over all benchmark collections: */
+#define for_each_collection(coll) \
+ for (coll = collections; coll->name; coll++)
+
+/* Iterate over all benchmarks within a collection: */
+#define for_each_bench(coll, bench) \
+ for (bench = coll->benchmarks; bench && bench->name; bench++)
+
+static void dump_benchmarks(struct collection *coll)
{
- int i;
+ struct bench *bench;
- printf("# List of available suites for %s...\n\n",
- subsystems[subsys_index].name);
+ printf("\n # List of available benchmarks for collection '%s':\n\n", coll->name);
- for (i = 0; subsystems[subsys_index].suites[i].name; i++)
- printf("%14s: %s\n",
- subsystems[subsys_index].suites[i].name,
- subsystems[subsys_index].suites[i].summary);
+ for_each_bench(coll, bench)
+ printf("%14s: %s\n", bench->name, bench->summary);
printf("\n");
- return;
}
static const char *bench_format_str;
+
+/* Output/formatting style, exported to benchmark modules: */
int bench_format = BENCH_FORMAT_DEFAULT;
static const struct option bench_options[] = {
- OPT_STRING('f', "format", &bench_format_str, "default",
- "Specify format style"),
+ OPT_STRING('f', "format", &bench_format_str, "default", "Specify format style"),
OPT_END()
};
static const char * const bench_usage[] = {
- "perf bench [<common options>] <subsystem> <suite> [<options>]",
+ "perf bench [<common options>] <collection> <benchmark> [<options>]",
NULL
};
static void print_usage(void)
{
+ struct collection *coll;
int i;
printf("Usage: \n");
@@ -121,11 +125,10 @@ static void print_usage(void)
printf("\t%s\n", bench_usage[i]);
printf("\n");
- printf("# List of available subsystems...\n\n");
+ printf(" # List of all available benchmark collections:\n\n");
- for (i = 0; subsystems[i].name; i++)
- printf("%14s: %s\n",
- subsystems[i].name, subsystems[i].summary);
+ for_each_collection(coll)
+ printf("%14s: %s\n", coll->name, coll->summary);
printf("\n");
}
@@ -142,43 +145,74 @@ static int bench_str2int(const char *str)
return BENCH_FORMAT_UNKNOWN;
}
-static void all_suite(struct bench_subsys *subsys) /* FROM HERE */
+/*
+ * Run a specific benchmark but first rename the running task's ->comm[]
+ * to something meaningful:
+ */
+static int run_bench(const char *coll_name, const char *bench_name, bench_fn_t fn,
+ int argc, const char **argv, const char *prefix)
{
- int i;
+ int size;
+ char *name;
+ int ret;
+
+ size = strlen(coll_name) + 1 + strlen(bench_name) + 1;
+
+ name = zalloc(size);
+ BUG_ON(!name);
+
+ scnprintf(name, size, "%s-%s", coll_name, bench_name);
+
+ prctl(PR_SET_NAME, name);
+ argv[0] = name;
+
+ ret = fn(argc, argv, prefix);
+
+ free(name);
+
+ return ret;
+}
+
+static void run_collection(struct collection *coll)
+{
+ struct bench *bench;
const char *argv[2];
- struct bench_suite *suites = subsys->suites;
argv[1] = NULL;
/*
* TODO:
- * preparing preset parameters for
+ *
+ * Preparing preset parameters for
* embedded, ordinary PC, HPC, etc...
- * will be helpful
+ * would be helpful.
*/
- for (i = 0; suites[i].fn; i++) {
- printf("# Running %s/%s benchmark...\n",
- subsys->name,
- suites[i].name);
-
- argv[1] = suites[i].name;
- suites[i].fn(1, argv, NULL);
+ for_each_bench(coll, bench) {
+ if (!bench->fn)
+ break;
+ printf("# Running %s/%s benchmark...\n", coll->name, bench->name);
+ fflush(stdout);
+
+ argv[1] = bench->name;
+ run_bench(coll->name, bench->name, bench->fn, 1, argv, NULL);
printf("\n");
}
}
-static void all_subsystem(void)
+static void run_all_collections(void)
{
- int i;
- for (i = 0; subsystems[i].suites; i++)
- all_suite(&subsystems[i]);
+ struct collection *coll;
+
+ for_each_collection(coll)
+ run_collection(coll);
}
int cmd_bench(int argc, const char **argv, const char *prefix __maybe_unused)
{
- int i, j, status = 0;
+ struct collection *coll;
+ int ret = 0;
if (argc < 2) {
- /* No subsystem specified. */
+ /* No collection specified. */
print_usage();
goto end;
}
@@ -188,7 +222,7 @@ int cmd_bench(int argc, const char **argv, const char *prefix __maybe_unused)
bench_format = bench_str2int(bench_format_str);
if (bench_format == BENCH_FORMAT_UNKNOWN) {
- printf("Unknown format descriptor:%s\n", bench_format_str);
+ printf("Unknown format descriptor: '%s'\n", bench_format_str);
goto end;
}
@@ -198,51 +232,51 @@ int cmd_bench(int argc, const char **argv, const char *prefix __maybe_unused)
}
if (!strcmp(argv[0], "all")) {
- all_subsystem();
+ run_all_collections();
goto end;
}
- for (i = 0; subsystems[i].name; i++) {
- if (strcmp(subsystems[i].name, argv[0]))
+ for_each_collection(coll) {
+ struct bench *bench;
+
+ if (strcmp(coll->name, argv[0]))
continue;
if (argc < 2) {
- /* No suite specified. */
- dump_suites(i);
+ /* No bench specified. */
+ dump_benchmarks(coll);
goto end;
}
if (!strcmp(argv[1], "all")) {
- all_suite(&subsystems[i]);
+ run_collection(coll);
goto end;
}
- for (j = 0; subsystems[i].suites[j].name; j++) {
- if (strcmp(subsystems[i].suites[j].name, argv[1]))
+ for_each_bench(coll, bench) {
+ if (strcmp(bench->name, argv[1]))
continue;
if (bench_format == BENCH_FORMAT_DEFAULT)
- printf("# Running %s/%s benchmark...\n",
- subsystems[i].name,
- subsystems[i].suites[j].name);
- status = subsystems[i].suites[j].fn(argc - 1,
- argv + 1, prefix);
+ printf("# Running '%s/%s' benchmark:\n", coll->name, bench->name);
+ fflush(stdout);
+ ret = run_bench(coll->name, bench->name, bench->fn, argc-1, argv+1, prefix);
goto end;
}
if (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) {
- dump_suites(i);
+ dump_benchmarks(coll);
goto end;
}
- printf("Unknown suite:%s for %s\n", argv[1], argv[0]);
- status = 1;
+ printf("Unknown benchmark: '%s' for collection '%s'\n", argv[1], argv[0]);
+ ret = 1;
goto end;
}
- printf("Unknown subsystem:%s\n", argv[0]);
- status = 1;
+ printf("Unknown collection: '%s'\n", argv[0]);
+ ret = 1;
end:
- return status;
+ return ret;
}
diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c
index d37e077f4b1..b22dbb16f87 100644
--- a/tools/perf/builtin-buildid-cache.c
+++ b/tools/perf/builtin-buildid-cache.c
@@ -6,6 +6,11 @@
* Copyright (C) 2010, Red Hat Inc.
* Copyright (C) 2010, Arnaldo Carvalho de Melo <acme@redhat.com>
*/
+#include <sys/types.h>
+#include <sys/time.h>
+#include <time.h>
+#include <dirent.h>
+#include <unistd.h>
#include "builtin.h"
#include "perf.h"
#include "util/cache.h"
@@ -13,8 +18,169 @@
#include "util/header.h"
#include "util/parse-options.h"
#include "util/strlist.h"
+#include "util/build-id.h"
+#include "util/session.h"
#include "util/symbol.h"
+static int build_id_cache__kcore_buildid(const char *proc_dir, char *sbuildid)
+{
+ char root_dir[PATH_MAX];
+ char notes[PATH_MAX];
+ u8 build_id[BUILD_ID_SIZE];
+ char *p;
+
+ strlcpy(root_dir, proc_dir, sizeof(root_dir));
+
+ p = strrchr(root_dir, '/');
+ if (!p)
+ return -1;
+ *p = '\0';
+
+ scnprintf(notes, sizeof(notes), "%s/sys/kernel/notes", root_dir);
+
+ if (sysfs__read_build_id(notes, build_id, sizeof(build_id)))
+ return -1;
+
+ build_id__sprintf(build_id, sizeof(build_id), sbuildid);
+
+ return 0;
+}
+
+static int build_id_cache__kcore_dir(char *dir, size_t sz)
+{
+ struct timeval tv;
+ struct tm tm;
+ char dt[32];
+
+ if (gettimeofday(&tv, NULL) || !localtime_r(&tv.tv_sec, &tm))
+ return -1;
+
+ if (!strftime(dt, sizeof(dt), "%Y%m%d%H%M%S", &tm))
+ return -1;
+
+ scnprintf(dir, sz, "%s%02u", dt, (unsigned)tv.tv_usec / 10000);
+
+ return 0;
+}
+
+static bool same_kallsyms_reloc(const char *from_dir, char *to_dir)
+{
+ char from[PATH_MAX];
+ char to[PATH_MAX];
+ const char *name;
+ u64 addr1 = 0, addr2 = 0;
+ int i;
+
+ scnprintf(from, sizeof(from), "%s/kallsyms", from_dir);
+ scnprintf(to, sizeof(to), "%s/kallsyms", to_dir);
+
+ for (i = 0; (name = ref_reloc_sym_names[i]) != NULL; i++) {
+ addr1 = kallsyms__get_function_start(from, name);
+ if (addr1)
+ break;
+ }
+
+ if (name)
+ addr2 = kallsyms__get_function_start(to, name);
+
+ return addr1 == addr2;
+}
+
+static int build_id_cache__kcore_existing(const char *from_dir, char *to_dir,
+ size_t to_dir_sz)
+{
+ char from[PATH_MAX];
+ char to[PATH_MAX];
+ char to_subdir[PATH_MAX];
+ struct dirent *dent;
+ int ret = -1;
+ DIR *d;
+
+ d = opendir(to_dir);
+ if (!d)
+ return -1;
+
+ scnprintf(from, sizeof(from), "%s/modules", from_dir);
+
+ while (1) {
+ dent = readdir(d);
+ if (!dent)
+ break;
+ if (dent->d_type != DT_DIR)
+ continue;
+ scnprintf(to, sizeof(to), "%s/%s/modules", to_dir,
+ dent->d_name);
+ scnprintf(to_subdir, sizeof(to_subdir), "%s/%s",
+ to_dir, dent->d_name);
+ if (!compare_proc_modules(from, to) &&
+ same_kallsyms_reloc(from_dir, to_subdir)) {
+ strlcpy(to_dir, to_subdir, to_dir_sz);
+ ret = 0;
+ break;
+ }
+ }
+
+ closedir(d);
+
+ return ret;
+}
+
+static int build_id_cache__add_kcore(const char *filename, const char *debugdir)
+{
+ char dir[32], sbuildid[BUILD_ID_SIZE * 2 + 1];
+ char from_dir[PATH_MAX], to_dir[PATH_MAX];
+ char *p;
+
+ strlcpy(from_dir, filename, sizeof(from_dir));
+
+ p = strrchr(from_dir, '/');
+ if (!p || strcmp(p + 1, "kcore"))
+ return -1;
+ *p = '\0';
+
+ if (build_id_cache__kcore_buildid(from_dir, sbuildid))
+ return -1;
+
+ scnprintf(to_dir, sizeof(to_dir), "%s/[kernel.kcore]/%s",
+ debugdir, sbuildid);
+
+ if (!build_id_cache__kcore_existing(from_dir, to_dir, sizeof(to_dir))) {
+ pr_debug("same kcore found in %s\n", to_dir);
+ return 0;
+ }
+
+ if (build_id_cache__kcore_dir(dir, sizeof(dir)))
+ return -1;
+
+ scnprintf(to_dir, sizeof(to_dir), "%s/[kernel.kcore]/%s/%s",
+ debugdir, sbuildid, dir);
+
+ if (mkdir_p(to_dir, 0755))
+ return -1;
+
+ if (kcore_copy(from_dir, to_dir)) {
+ /* Remove YYYYmmddHHMMSShh directory */
+ if (!rmdir(to_dir)) {
+ p = strrchr(to_dir, '/');
+ if (p)
+ *p = '\0';
+ /* Try to remove buildid directory */
+ if (!rmdir(to_dir)) {
+ p = strrchr(to_dir, '/');
+ if (p)
+ *p = '\0';
+ /* Try to remove [kernel.kcore] directory */
+ rmdir(to_dir);
+ }
+ }
+ return -1;
+ }
+
+ pr_debug("kcore added to build-id cache directory %s\n", to_dir);
+
+ return 0;
+}
+
static int build_id_cache__add_file(const char *filename, const char *debugdir)
{
char sbuild_id[BUILD_ID_SIZE * 2 + 1];
@@ -57,19 +223,96 @@ static int build_id_cache__remove_file(const char *filename,
return err;
}
+static bool dso__missing_buildid_cache(struct dso *dso, int parm __maybe_unused)
+{
+ char filename[PATH_MAX];
+ u8 build_id[BUILD_ID_SIZE];
+
+ if (dso__build_id_filename(dso, filename, sizeof(filename)) &&
+ filename__read_build_id(filename, build_id,
+ sizeof(build_id)) != sizeof(build_id)) {
+ if (errno == ENOENT)
+ return false;
+
+ pr_warning("Problems with %s file, consider removing it from the cache\n",
+ filename);
+ } else if (memcmp(dso->build_id, build_id, sizeof(dso->build_id))) {
+ pr_warning("Problems with %s file, consider removing it from the cache\n",
+ filename);
+ }
+
+ return true;
+}
+
+static int build_id_cache__fprintf_missing(const char *filename, bool force, FILE *fp)
+{
+ struct perf_data_file file = {
+ .path = filename,
+ .mode = PERF_DATA_MODE_READ,
+ .force = force,
+ };
+ struct perf_session *session = perf_session__new(&file, false, NULL);
+ if (session == NULL)
+ return -1;
+
+ perf_session__fprintf_dsos_buildid(session, fp, dso__missing_buildid_cache, 0);
+ perf_session__delete(session);
+
+ return 0;
+}
+
+static int build_id_cache__update_file(const char *filename,
+ const char *debugdir)
+{
+ u8 build_id[BUILD_ID_SIZE];
+ char sbuild_id[BUILD_ID_SIZE * 2 + 1];
+
+ int err;
+
+ if (filename__read_build_id(filename, &build_id, sizeof(build_id)) < 0) {
+ pr_debug("Couldn't read a build-id in %s\n", filename);
+ return -1;
+ }
+
+ build_id__sprintf(build_id, sizeof(build_id), sbuild_id);
+ err = build_id_cache__remove_s(sbuild_id, debugdir);
+ if (!err) {
+ err = build_id_cache__add_s(sbuild_id, debugdir, filename,
+ false, false);
+ }
+ if (verbose)
+ pr_info("Updating %s %s: %s\n", sbuild_id, filename,
+ err ? "FAIL" : "Ok");
+
+ return err;
+}
+
int cmd_buildid_cache(int argc, const char **argv,
const char *prefix __maybe_unused)
{
struct strlist *list;
struct str_node *pos;
+ int ret = 0;
+ bool force = false;
char debugdir[PATH_MAX];
char const *add_name_list_str = NULL,
- *remove_name_list_str = NULL;
+ *remove_name_list_str = NULL,
+ *missing_filename = NULL,
+ *update_name_list_str = NULL,
+ *kcore_filename;
+
const struct option buildid_cache_options[] = {
OPT_STRING('a', "add", &add_name_list_str,
"file list", "file(s) to add"),
+ OPT_STRING('k', "kcore", &kcore_filename,
+ "file", "kcore file to add"),
OPT_STRING('r', "remove", &remove_name_list_str, "file list",
"file(s) to remove"),
+ OPT_STRING('M', "missing", &missing_filename, "file",
+ "to find missing build ids in the cache"),
+ OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
+ OPT_STRING('u', "update", &update_name_list_str, "file list",
+ "file(s) to update"),
OPT_INCR('v', "verbose", &verbose, "be more verbose"),
OPT_END()
};
@@ -124,5 +367,30 @@ int cmd_buildid_cache(int argc, const char **argv,
}
}
- return 0;
+ if (missing_filename)
+ ret = build_id_cache__fprintf_missing(missing_filename, force, stdout);
+
+ if (update_name_list_str) {
+ list = strlist__new(true, update_name_list_str);
+ if (list) {
+ strlist__for_each(pos, list)
+ if (build_id_cache__update_file(pos->s, debugdir)) {
+ if (errno == ENOENT) {
+ pr_debug("%s wasn't in the cache\n",
+ pos->s);
+ continue;
+ }
+ pr_warning("Couldn't update %s: %s\n",
+ pos->s, strerror(errno));
+ }
+
+ strlist__delete(list);
+ }
+ }
+
+ if (kcore_filename &&
+ build_id_cache__add_kcore(kcore_filename, debugdir))
+ pr_warning("Couldn't add %s\n", kcore_filename);
+
+ return ret;
}
diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c
index a0e94fffa03..ed3873b3e23 100644
--- a/tools/perf/builtin-buildid-list.c
+++ b/tools/perf/builtin-buildid-list.c
@@ -15,6 +15,7 @@
#include "util/parse-options.h"
#include "util/session.h"
#include "util/symbol.h"
+#include "util/data.h"
static int sysfs__fprintf_build_id(FILE *fp)
{
@@ -44,34 +45,40 @@ static int filename__fprintf_build_id(const char *name, FILE *fp)
return fprintf(fp, "%s\n", sbuild_id);
}
-static int perf_session__list_build_ids(const char *input_name,
- bool force, bool with_hits)
+static bool dso__skip_buildid(struct dso *dso, int with_hits)
+{
+ return with_hits && !dso->hit;
+}
+
+static int perf_session__list_build_ids(bool force, bool with_hits)
{
struct perf_session *session;
+ struct perf_data_file file = {
+ .path = input_name,
+ .mode = PERF_DATA_MODE_READ,
+ .force = force,
+ };
symbol__elf_init();
-
- session = perf_session__new(input_name, O_RDONLY, force, false,
- &build_id__mark_dso_hit_ops);
- if (session == NULL)
- return -1;
-
/*
* See if this is an ELF file first:
*/
- if (filename__fprintf_build_id(session->filename, stdout))
+ if (filename__fprintf_build_id(input_name, stdout))
goto out;
+ session = perf_session__new(&file, false, &build_id__mark_dso_hit_ops);
+ if (session == NULL)
+ return -1;
/*
* in pipe-mode, the only way to get the buildids is to parse
* the record stream. Buildids are stored as RECORD_HEADER_BUILD_ID
*/
- if (with_hits || session->fd_pipe)
+ if (with_hits || perf_data_file__is_pipe(&file))
perf_session__process_events(session, &build_id__mark_dso_hit_ops);
- perf_session__fprintf_dsos_buildid(session, stdout, with_hits);
-out:
+ perf_session__fprintf_dsos_buildid(session, stdout, dso__skip_buildid, with_hits);
perf_session__delete(session);
+out:
return 0;
}
@@ -81,7 +88,6 @@ int cmd_buildid_list(int argc, const char **argv,
bool show_kernel = false;
bool with_hits = false;
bool force = false;
- const char *input_name = NULL;
const struct option options[] = {
OPT_BOOLEAN('H', "with-hits", &with_hits, "Show only DSOs with hits"),
OPT_STRING('i', "input", &input_name, "file", "input file name"),
@@ -101,5 +107,5 @@ int cmd_buildid_list(int argc, const char **argv,
if (show_kernel)
return sysfs__fprintf_build_id(stdout);
- return perf_session__list_build_ids(input_name, force, with_hits);
+ return perf_session__list_build_ids(force, with_hits);
}
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index a0b531c14b9..9a5a035cb42 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -16,19 +16,306 @@
#include "util/sort.h"
#include "util/symbol.h"
#include "util/util.h"
+#include "util/data.h"
#include <stdlib.h>
+#include <math.h>
-static char const *input_old = "perf.data.old",
- *input_new = "perf.data";
-static char diff__default_sort_order[] = "dso,symbol";
-static bool force;
-static bool show_displacement;
+/* Diff command specific HPP columns. */
+enum {
+ PERF_HPP_DIFF__BASELINE,
+ PERF_HPP_DIFF__PERIOD,
+ PERF_HPP_DIFF__PERIOD_BASELINE,
+ PERF_HPP_DIFF__DELTA,
+ PERF_HPP_DIFF__RATIO,
+ PERF_HPP_DIFF__WEIGHTED_DIFF,
+ PERF_HPP_DIFF__FORMULA,
-static int hists__add_entry(struct hists *self,
- struct addr_location *al, u64 period)
+ PERF_HPP_DIFF__MAX_INDEX
+};
+
+struct diff_hpp_fmt {
+ struct perf_hpp_fmt fmt;
+ int idx;
+ char *header;
+ int header_width;
+};
+
+struct data__file {
+ struct perf_session *session;
+ struct perf_data_file file;
+ int idx;
+ struct hists *hists;
+ struct diff_hpp_fmt fmt[PERF_HPP_DIFF__MAX_INDEX];
+};
+
+static struct data__file *data__files;
+static int data__files_cnt;
+
+#define data__for_each_file_start(i, d, s) \
+ for (i = s, d = &data__files[s]; \
+ i < data__files_cnt; \
+ i++, d = &data__files[i])
+
+#define data__for_each_file(i, d) data__for_each_file_start(i, d, 0)
+#define data__for_each_file_new(i, d) data__for_each_file_start(i, d, 1)
+
+static bool force;
+static bool show_period;
+static bool show_formula;
+static bool show_baseline_only;
+static unsigned int sort_compute;
+
+static s64 compute_wdiff_w1;
+static s64 compute_wdiff_w2;
+
+enum {
+ COMPUTE_DELTA,
+ COMPUTE_RATIO,
+ COMPUTE_WEIGHTED_DIFF,
+ COMPUTE_MAX,
+};
+
+const char *compute_names[COMPUTE_MAX] = {
+ [COMPUTE_DELTA] = "delta",
+ [COMPUTE_RATIO] = "ratio",
+ [COMPUTE_WEIGHTED_DIFF] = "wdiff",
+};
+
+static int compute;
+
+static int compute_2_hpp[COMPUTE_MAX] = {
+ [COMPUTE_DELTA] = PERF_HPP_DIFF__DELTA,
+ [COMPUTE_RATIO] = PERF_HPP_DIFF__RATIO,
+ [COMPUTE_WEIGHTED_DIFF] = PERF_HPP_DIFF__WEIGHTED_DIFF,
+};
+
+#define MAX_COL_WIDTH 70
+
+static struct header_column {
+ const char *name;
+ int width;
+} columns[PERF_HPP_DIFF__MAX_INDEX] = {
+ [PERF_HPP_DIFF__BASELINE] = {
+ .name = "Baseline",
+ },
+ [PERF_HPP_DIFF__PERIOD] = {
+ .name = "Period",
+ .width = 14,
+ },
+ [PERF_HPP_DIFF__PERIOD_BASELINE] = {
+ .name = "Base period",
+ .width = 14,
+ },
+ [PERF_HPP_DIFF__DELTA] = {
+ .name = "Delta",
+ .width = 7,
+ },
+ [PERF_HPP_DIFF__RATIO] = {
+ .name = "Ratio",
+ .width = 14,
+ },
+ [PERF_HPP_DIFF__WEIGHTED_DIFF] = {
+ .name = "Weighted diff",
+ .width = 14,
+ },
+ [PERF_HPP_DIFF__FORMULA] = {
+ .name = "Formula",
+ .width = MAX_COL_WIDTH,
+ }
+};
+
+static int setup_compute_opt_wdiff(char *opt)
+{
+ char *w1_str = opt;
+ char *w2_str;
+
+ int ret = -EINVAL;
+
+ if (!opt)
+ goto out;
+
+ w2_str = strchr(opt, ',');
+ if (!w2_str)
+ goto out;
+
+ *w2_str++ = 0x0;
+ if (!*w2_str)
+ goto out;
+
+ compute_wdiff_w1 = strtol(w1_str, NULL, 10);
+ compute_wdiff_w2 = strtol(w2_str, NULL, 10);
+
+ if (!compute_wdiff_w1 || !compute_wdiff_w2)
+ goto out;
+
+ pr_debug("compute wdiff w1(%" PRId64 ") w2(%" PRId64 ")\n",
+ compute_wdiff_w1, compute_wdiff_w2);
+
+ ret = 0;
+
+ out:
+ if (ret)
+ pr_err("Failed: wrong weight data, use 'wdiff:w1,w2'\n");
+
+ return ret;
+}
+
+static int setup_compute_opt(char *opt)
+{
+ if (compute == COMPUTE_WEIGHTED_DIFF)
+ return setup_compute_opt_wdiff(opt);
+
+ if (opt) {
+ pr_err("Failed: extra option specified '%s'", opt);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int setup_compute(const struct option *opt, const char *str,
+ int unset __maybe_unused)
+{
+ int *cp = (int *) opt->value;
+ char *cstr = (char *) str;
+ char buf[50];
+ unsigned i;
+ char *option;
+
+ if (!str) {
+ *cp = COMPUTE_DELTA;
+ return 0;
+ }
+
+ option = strchr(str, ':');
+ if (option) {
+ unsigned len = option++ - str;
+
+ /*
+ * The str data are not writeable, so we need
+ * to use another buffer.
+ */
+
+ /* No option value is longer. */
+ if (len >= sizeof(buf))
+ return -EINVAL;
+
+ strncpy(buf, str, len);
+ buf[len] = 0x0;
+ cstr = buf;
+ }
+
+ for (i = 0; i < COMPUTE_MAX; i++)
+ if (!strcmp(cstr, compute_names[i])) {
+ *cp = i;
+ return setup_compute_opt(option);
+ }
+
+ pr_err("Failed: '%s' is not computation method "
+ "(use 'delta','ratio' or 'wdiff')\n", str);
+ return -EINVAL;
+}
+
+static double period_percent(struct hist_entry *he, u64 period)
+{
+ u64 total = hists__total_period(he->hists);
+
+ return (period * 100.0) / total;
+}
+
+static double compute_delta(struct hist_entry *he, struct hist_entry *pair)
+{
+ double old_percent = period_percent(he, he->stat.period);
+ double new_percent = period_percent(pair, pair->stat.period);
+
+ pair->diff.period_ratio_delta = new_percent - old_percent;
+ pair->diff.computed = true;
+ return pair->diff.period_ratio_delta;
+}
+
+static double compute_ratio(struct hist_entry *he, struct hist_entry *pair)
+{
+ double old_period = he->stat.period ?: 1;
+ double new_period = pair->stat.period;
+
+ pair->diff.computed = true;
+ pair->diff.period_ratio = new_period / old_period;
+ return pair->diff.period_ratio;
+}
+
+static s64 compute_wdiff(struct hist_entry *he, struct hist_entry *pair)
+{
+ u64 old_period = he->stat.period;
+ u64 new_period = pair->stat.period;
+
+ pair->diff.computed = true;
+ pair->diff.wdiff = new_period * compute_wdiff_w2 -
+ old_period * compute_wdiff_w1;
+
+ return pair->diff.wdiff;
+}
+
+static int formula_delta(struct hist_entry *he, struct hist_entry *pair,
+ char *buf, size_t size)
+{
+ u64 he_total = he->hists->stats.total_period;
+ u64 pair_total = pair->hists->stats.total_period;
+
+ if (symbol_conf.filter_relative) {
+ he_total = he->hists->stats.total_non_filtered_period;
+ pair_total = pair->hists->stats.total_non_filtered_period;
+ }
+ return scnprintf(buf, size,
+ "(%" PRIu64 " * 100 / %" PRIu64 ") - "
+ "(%" PRIu64 " * 100 / %" PRIu64 ")",
+ pair->stat.period, pair_total,
+ he->stat.period, he_total);
+}
+
+static int formula_ratio(struct hist_entry *he, struct hist_entry *pair,
+ char *buf, size_t size)
+{
+ double old_period = he->stat.period;
+ double new_period = pair->stat.period;
+
+ return scnprintf(buf, size, "%.0F / %.0F", new_period, old_period);
+}
+
+static int formula_wdiff(struct hist_entry *he, struct hist_entry *pair,
+ char *buf, size_t size)
+{
+ u64 old_period = he->stat.period;
+ u64 new_period = pair->stat.period;
+
+ return scnprintf(buf, size,
+ "(%" PRIu64 " * " "%" PRId64 ") - (%" PRIu64 " * " "%" PRId64 ")",
+ new_period, compute_wdiff_w2, old_period, compute_wdiff_w1);
+}
+
+static int formula_fprintf(struct hist_entry *he, struct hist_entry *pair,
+ char *buf, size_t size)
+{
+ switch (compute) {
+ case COMPUTE_DELTA:
+ return formula_delta(he, pair, buf, size);
+ case COMPUTE_RATIO:
+ return formula_ratio(he, pair, buf, size);
+ case COMPUTE_WEIGHTED_DIFF:
+ return formula_wdiff(he, pair, buf, size);
+ default:
+ BUG_ON(1);
+ }
+
+ return -1;
+}
+
+static int hists__add_entry(struct hists *hists,
+ struct addr_location *al, u64 period,
+ u64 weight, u64 transaction)
{
- if (__hists__add_entry(self, al, NULL, period) != NULL)
+ if (__hists__add_entry(hists, al, NULL, NULL, NULL, period, weight,
+ transaction, true) != NULL)
return 0;
return -ENOMEM;
}
@@ -41,21 +328,28 @@ static int diff__process_sample_event(struct perf_tool *tool __maybe_unused,
{
struct addr_location al;
- if (perf_event__preprocess_sample(event, machine, &al, sample, NULL) < 0) {
+ if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) {
pr_warning("problem processing %d event, skipping it.\n",
event->header.type);
return -1;
}
- if (al.filtered || al.sym == NULL)
- return 0;
-
- if (hists__add_entry(&evsel->hists, &al, sample->period)) {
+ if (hists__add_entry(&evsel->hists, &al, sample->period,
+ sample->weight, sample->transaction)) {
pr_warning("problem incrementing symbol period, skipping event\n");
return -1;
}
+ /*
+ * The total_period is updated here before going to the output
+ * tree since normally only the baseline hists will call
+ * hists__output_resort() and precompute needs the total
+ * period in order to sort entries by percentage delta.
+ */
evsel->hists.stats.total_period += sample->period;
+ if (!al.filtered)
+ evsel->hists.stats.total_non_filtered_period += sample->period;
+
return 0;
}
@@ -63,15 +357,195 @@ static struct perf_tool tool = {
.sample = diff__process_sample_event,
.mmap = perf_event__process_mmap,
.comm = perf_event__process_comm,
- .exit = perf_event__process_task,
- .fork = perf_event__process_task,
+ .exit = perf_event__process_exit,
+ .fork = perf_event__process_fork,
.lost = perf_event__process_lost,
.ordered_samples = true,
.ordering_requires_timestamps = true,
};
-static void insert_hist_entry_by_name(struct rb_root *root,
- struct hist_entry *he)
+static struct perf_evsel *evsel_match(struct perf_evsel *evsel,
+ struct perf_evlist *evlist)
+{
+ struct perf_evsel *e;
+
+ evlist__for_each(evlist, e) {
+ if (perf_evsel__match2(evsel, e))
+ return e;
+ }
+
+ return NULL;
+}
+
+static void perf_evlist__collapse_resort(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel;
+
+ evlist__for_each(evlist, evsel) {
+ struct hists *hists = &evsel->hists;
+
+ hists__collapse_resort(hists, NULL);
+ }
+}
+
+static struct hist_entry*
+get_pair_data(struct hist_entry *he, struct data__file *d)
+{
+ if (hist_entry__has_pairs(he)) {
+ struct hist_entry *pair;
+
+ list_for_each_entry(pair, &he->pairs.head, pairs.node)
+ if (pair->hists == d->hists)
+ return pair;
+ }
+
+ return NULL;
+}
+
+static struct hist_entry*
+get_pair_fmt(struct hist_entry *he, struct diff_hpp_fmt *dfmt)
+{
+ void *ptr = dfmt - dfmt->idx;
+ struct data__file *d = container_of(ptr, struct data__file, fmt);
+
+ return get_pair_data(he, d);
+}
+
+static void hists__baseline_only(struct hists *hists)
+{
+ struct rb_root *root;
+ struct rb_node *next;
+
+ if (sort__need_collapse)
+ root = &hists->entries_collapsed;
+ else
+ root = hists->entries_in;
+
+ next = rb_first(root);
+ while (next != NULL) {
+ struct hist_entry *he = rb_entry(next, struct hist_entry, rb_node_in);
+
+ next = rb_next(&he->rb_node_in);
+ if (!hist_entry__next_pair(he)) {
+ rb_erase(&he->rb_node_in, root);
+ hist_entry__free(he);
+ }
+ }
+}
+
+static void hists__precompute(struct hists *hists)
+{
+ struct rb_root *root;
+ struct rb_node *next;
+
+ if (sort__need_collapse)
+ root = &hists->entries_collapsed;
+ else
+ root = hists->entries_in;
+
+ next = rb_first(root);
+ while (next != NULL) {
+ struct hist_entry *he, *pair;
+
+ he = rb_entry(next, struct hist_entry, rb_node_in);
+ next = rb_next(&he->rb_node_in);
+
+ pair = get_pair_data(he, &data__files[sort_compute]);
+ if (!pair)
+ continue;
+
+ switch (compute) {
+ case COMPUTE_DELTA:
+ compute_delta(he, pair);
+ break;
+ case COMPUTE_RATIO:
+ compute_ratio(he, pair);
+ break;
+ case COMPUTE_WEIGHTED_DIFF:
+ compute_wdiff(he, pair);
+ break;
+ default:
+ BUG_ON(1);
+ }
+ }
+}
+
+static int64_t cmp_doubles(double l, double r)
+{
+ if (l > r)
+ return -1;
+ else if (l < r)
+ return 1;
+ else
+ return 0;
+}
+
+static int64_t
+__hist_entry__cmp_compute(struct hist_entry *left, struct hist_entry *right,
+ int c)
+{
+ switch (c) {
+ case COMPUTE_DELTA:
+ {
+ double l = left->diff.period_ratio_delta;
+ double r = right->diff.period_ratio_delta;
+
+ return cmp_doubles(l, r);
+ }
+ case COMPUTE_RATIO:
+ {
+ double l = left->diff.period_ratio;
+ double r = right->diff.period_ratio;
+
+ return cmp_doubles(l, r);
+ }
+ case COMPUTE_WEIGHTED_DIFF:
+ {
+ s64 l = left->diff.wdiff;
+ s64 r = right->diff.wdiff;
+
+ return r - l;
+ }
+ default:
+ BUG_ON(1);
+ }
+
+ return 0;
+}
+
+static int64_t
+hist_entry__cmp_compute(struct hist_entry *left, struct hist_entry *right,
+ int c)
+{
+ bool pairs_left = hist_entry__has_pairs(left);
+ bool pairs_right = hist_entry__has_pairs(right);
+ struct hist_entry *p_right, *p_left;
+
+ if (!pairs_left && !pairs_right)
+ return 0;
+
+ if (!pairs_left || !pairs_right)
+ return pairs_left ? -1 : 1;
+
+ p_left = get_pair_data(left, &data__files[sort_compute]);
+ p_right = get_pair_data(right, &data__files[sort_compute]);
+
+ if (!p_left && !p_right)
+ return 0;
+
+ if (!p_left || !p_right)
+ return p_left ? -1 : 1;
+
+ /*
+ * We have 2 entries of same kind, let's
+ * make the data comparison.
+ */
+ return __hist_entry__cmp_compute(p_left, p_right, c);
+}
+
+static void insert_hist_entry_by_compute(struct rb_root *root,
+ struct hist_entry *he,
+ int c)
{
struct rb_node **p = &root->rb_node;
struct rb_node *parent = NULL;
@@ -80,7 +554,7 @@ static void insert_hist_entry_by_name(struct rb_root *root,
while (*p != NULL) {
parent = *p;
iter = rb_entry(parent, struct hist_entry, rb_node);
- if (hist_entry__cmp(he, iter) < 0)
+ if (hist_entry__cmp_compute(he, iter, c) < 0)
p = &(*p)->rb_left;
else
p = &(*p)->rb_right;
@@ -90,139 +564,150 @@ static void insert_hist_entry_by_name(struct rb_root *root,
rb_insert_color(&he->rb_node, root);
}
-static void hists__name_resort(struct hists *self, bool sort)
+static void hists__compute_resort(struct hists *hists)
{
- unsigned long position = 1;
- struct rb_root tmp = RB_ROOT;
- struct rb_node *next = rb_first(&self->entries);
+ struct rb_root *root;
+ struct rb_node *next;
- while (next != NULL) {
- struct hist_entry *n = rb_entry(next, struct hist_entry, rb_node);
+ if (sort__need_collapse)
+ root = &hists->entries_collapsed;
+ else
+ root = hists->entries_in;
- next = rb_next(&n->rb_node);
- n->position = position++;
+ hists->entries = RB_ROOT;
+ next = rb_first(root);
- if (sort) {
- rb_erase(&n->rb_node, &self->entries);
- insert_hist_entry_by_name(&tmp, n);
- }
- }
+ hists__reset_stats(hists);
+ hists__reset_col_len(hists);
- if (sort)
- self->entries = tmp;
-}
+ while (next != NULL) {
+ struct hist_entry *he;
-static struct hist_entry *hists__find_entry(struct hists *self,
- struct hist_entry *he)
-{
- struct rb_node *n = self->entries.rb_node;
+ he = rb_entry(next, struct hist_entry, rb_node_in);
+ next = rb_next(&he->rb_node_in);
- while (n) {
- struct hist_entry *iter = rb_entry(n, struct hist_entry, rb_node);
- int64_t cmp = hist_entry__cmp(he, iter);
+ insert_hist_entry_by_compute(&hists->entries, he, compute);
+ hists__inc_stats(hists, he);
- if (cmp < 0)
- n = n->rb_left;
- else if (cmp > 0)
- n = n->rb_right;
- else
- return iter;
+ if (!he->filtered)
+ hists__calc_col_len(hists, he);
}
-
- return NULL;
}
-static void hists__match(struct hists *older, struct hists *newer)
+static void hists__process(struct hists *hists)
{
- struct rb_node *nd;
+ if (show_baseline_only)
+ hists__baseline_only(hists);
- for (nd = rb_first(&newer->entries); nd; nd = rb_next(nd)) {
- struct hist_entry *pos = rb_entry(nd, struct hist_entry, rb_node);
- pos->pair = hists__find_entry(older, pos);
+ if (sort_compute) {
+ hists__precompute(hists);
+ hists__compute_resort(hists);
+ } else {
+ hists__output_resort(hists);
}
+
+ hists__fprintf(hists, true, 0, 0, 0, stdout);
}
-static struct perf_evsel *evsel_match(struct perf_evsel *evsel,
- struct perf_evlist *evlist)
+static void data__fprintf(void)
{
- struct perf_evsel *e;
+ struct data__file *d;
+ int i;
- list_for_each_entry(e, &evlist->entries, node)
- if (perf_evsel__match2(evsel, e))
- return e;
+ fprintf(stdout, "# Data files:\n");
- return NULL;
+ data__for_each_file(i, d)
+ fprintf(stdout, "# [%d] %s %s\n",
+ d->idx, d->file.path,
+ !d->idx ? "(Baseline)" : "");
+
+ fprintf(stdout, "#\n");
}
-static void perf_evlist__resort_hists(struct perf_evlist *evlist, bool name)
+static void data_process(void)
{
- struct perf_evsel *evsel;
+ struct perf_evlist *evlist_base = data__files[0].session->evlist;
+ struct perf_evsel *evsel_base;
+ bool first = true;
- list_for_each_entry(evsel, &evlist->entries, node) {
- struct hists *hists = &evsel->hists;
+ evlist__for_each(evlist_base, evsel_base) {
+ struct data__file *d;
+ int i;
- hists__output_resort(hists);
+ data__for_each_file_new(i, d) {
+ struct perf_evlist *evlist = d->session->evlist;
+ struct perf_evsel *evsel;
- /*
- * The hists__name_resort only sets possition
- * if name is false.
- */
- if (name || ((!name) && show_displacement))
- hists__name_resort(hists, name);
+ evsel = evsel_match(evsel_base, evlist);
+ if (!evsel)
+ continue;
+
+ d->hists = &evsel->hists;
+
+ hists__match(&evsel_base->hists, &evsel->hists);
+
+ if (!show_baseline_only)
+ hists__link(&evsel_base->hists,
+ &evsel->hists);
+ }
+
+ fprintf(stdout, "%s# Event '%s'\n#\n", first ? "" : "\n",
+ perf_evsel__name(evsel_base));
+
+ first = false;
+
+ if (verbose || data__files_cnt > 2)
+ data__fprintf();
+
+ hists__process(&evsel_base->hists);
}
}
-static int __cmd_diff(void)
+static void data__free(struct data__file *d)
{
- int ret, i;
-#define older (session[0])
-#define newer (session[1])
- struct perf_session *session[2];
- struct perf_evlist *evlist_new, *evlist_old;
- struct perf_evsel *evsel;
- bool first = true;
+ int col;
- older = perf_session__new(input_old, O_RDONLY, force, false,
- &tool);
- newer = perf_session__new(input_new, O_RDONLY, force, false,
- &tool);
- if (session[0] == NULL || session[1] == NULL)
- return -ENOMEM;
+ for (col = 0; col < PERF_HPP_DIFF__MAX_INDEX; col++) {
+ struct diff_hpp_fmt *fmt = &d->fmt[col];
- for (i = 0; i < 2; ++i) {
- ret = perf_session__process_events(session[i], &tool);
- if (ret)
- goto out_delete;
+ zfree(&fmt->header);
}
+}
- evlist_old = older->evlist;
- evlist_new = newer->evlist;
+static int __cmd_diff(void)
+{
+ struct data__file *d;
+ int ret = -EINVAL, i;
- perf_evlist__resort_hists(evlist_old, true);
- perf_evlist__resort_hists(evlist_new, false);
+ data__for_each_file(i, d) {
+ d->session = perf_session__new(&d->file, false, &tool);
+ if (!d->session) {
+ pr_err("Failed to open %s\n", d->file.path);
+ ret = -ENOMEM;
+ goto out_delete;
+ }
- list_for_each_entry(evsel, &evlist_new->entries, node) {
- struct perf_evsel *evsel_old;
+ ret = perf_session__process_events(d->session, &tool);
+ if (ret) {
+ pr_err("Failed to process %s\n", d->file.path);
+ goto out_delete;
+ }
- evsel_old = evsel_match(evsel, evlist_old);
- if (!evsel_old)
- continue;
+ perf_evlist__collapse_resort(d->session->evlist);
+ }
- fprintf(stdout, "%s# Event '%s'\n#\n", first ? "" : "\n",
- perf_evsel__name(evsel));
+ data_process();
- first = false;
+ out_delete:
+ data__for_each_file(i, d) {
+ if (d->session)
+ perf_session__delete(d->session);
- hists__match(&evsel_old->hists, &evsel->hists);
- hists__fprintf(&evsel->hists, true, 0, 0, stdout);
+ data__free(d);
}
-out_delete:
- for (i = 0; i < 2; ++i)
- perf_session__delete(session[i]);
+ free(data__files);
return ret;
-#undef older
-#undef newer
}
static const char * const diff_usage[] = {
@@ -233,8 +718,16 @@ static const char * const diff_usage[] = {
static const struct option options[] = {
OPT_INCR('v', "verbose", &verbose,
"be more verbose (show symbol address, etc)"),
- OPT_BOOLEAN('M', "displacement", &show_displacement,
- "Show position displacement relative to baseline"),
+ OPT_BOOLEAN('b', "baseline-only", &show_baseline_only,
+ "Show only items with match in baseline"),
+ OPT_CALLBACK('c', "compute", &compute,
+ "delta,ratio,wdiff:w1,w2 (default delta)",
+ "Entries differential computation selection",
+ setup_compute),
+ OPT_BOOLEAN('p', "period", &show_period,
+ "Show period values."),
+ OPT_BOOLEAN('F', "formula", &show_formula,
+ "Show formula."),
OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
"dump raw trace in ASCII"),
OPT_BOOLEAN('f', "force", &force, "don't complain, do it"),
@@ -247,60 +740,425 @@ static const struct option options[] = {
OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
"only consider these symbols"),
OPT_STRING('s', "sort", &sort_order, "key[,key2...]",
- "sort by key(s): pid, comm, dso, symbol, parent"),
+ "sort by key(s): pid, comm, dso, symbol, parent, cpu, srcline, ..."
+ " Please refer the man page for the complete list."),
OPT_STRING('t', "field-separator", &symbol_conf.field_sep, "separator",
"separator for columns, no spaces will be added between "
"columns '.' is reserved."),
OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory",
"Look for files with symbols relative to this directory"),
+ OPT_UINTEGER('o', "order", &sort_compute, "Specify compute sorting."),
+ OPT_CALLBACK(0, "percentage", NULL, "relative|absolute",
+ "How to display percentage of filtered entries", parse_filter_percentage),
OPT_END()
};
+static double baseline_percent(struct hist_entry *he)
+{
+ u64 total = hists__total_period(he->hists);
+
+ return 100.0 * he->stat.period / total;
+}
+
+static int hpp__color_baseline(struct perf_hpp_fmt *fmt,
+ struct perf_hpp *hpp, struct hist_entry *he)
+{
+ struct diff_hpp_fmt *dfmt =
+ container_of(fmt, struct diff_hpp_fmt, fmt);
+ double percent = baseline_percent(he);
+ char pfmt[20] = " ";
+
+ if (!he->dummy) {
+ scnprintf(pfmt, 20, "%%%d.2f%%%%", dfmt->header_width - 1);
+ return percent_color_snprintf(hpp->buf, hpp->size,
+ pfmt, percent);
+ } else
+ return scnprintf(hpp->buf, hpp->size, "%*s",
+ dfmt->header_width, pfmt);
+}
+
+static int hpp__entry_baseline(struct hist_entry *he, char *buf, size_t size)
+{
+ double percent = baseline_percent(he);
+ const char *fmt = symbol_conf.field_sep ? "%.2f" : "%6.2f%%";
+ int ret = 0;
+
+ if (!he->dummy)
+ ret = scnprintf(buf, size, fmt, percent);
+
+ return ret;
+}
+
+static int __hpp__color_compare(struct perf_hpp_fmt *fmt,
+ struct perf_hpp *hpp, struct hist_entry *he,
+ int comparison_method)
+{
+ struct diff_hpp_fmt *dfmt =
+ container_of(fmt, struct diff_hpp_fmt, fmt);
+ struct hist_entry *pair = get_pair_fmt(he, dfmt);
+ double diff;
+ s64 wdiff;
+ char pfmt[20] = " ";
+
+ if (!pair)
+ goto dummy_print;
+
+ switch (comparison_method) {
+ case COMPUTE_DELTA:
+ if (pair->diff.computed)
+ diff = pair->diff.period_ratio_delta;
+ else
+ diff = compute_delta(he, pair);
+
+ if (fabs(diff) < 0.01)
+ goto dummy_print;
+ scnprintf(pfmt, 20, "%%%+d.2f%%%%", dfmt->header_width - 1);
+ return percent_color_snprintf(hpp->buf, hpp->size,
+ pfmt, diff);
+ case COMPUTE_RATIO:
+ if (he->dummy)
+ goto dummy_print;
+ if (pair->diff.computed)
+ diff = pair->diff.period_ratio;
+ else
+ diff = compute_ratio(he, pair);
+
+ scnprintf(pfmt, 20, "%%%d.6f", dfmt->header_width);
+ return value_color_snprintf(hpp->buf, hpp->size,
+ pfmt, diff);
+ case COMPUTE_WEIGHTED_DIFF:
+ if (he->dummy)
+ goto dummy_print;
+ if (pair->diff.computed)
+ wdiff = pair->diff.wdiff;
+ else
+ wdiff = compute_wdiff(he, pair);
+
+ scnprintf(pfmt, 20, "%%14ld", dfmt->header_width);
+ return color_snprintf(hpp->buf, hpp->size,
+ get_percent_color(wdiff),
+ pfmt, wdiff);
+ default:
+ BUG_ON(1);
+ }
+dummy_print:
+ return scnprintf(hpp->buf, hpp->size, "%*s",
+ dfmt->header_width, pfmt);
+}
+
+static int hpp__color_delta(struct perf_hpp_fmt *fmt,
+ struct perf_hpp *hpp, struct hist_entry *he)
+{
+ return __hpp__color_compare(fmt, hpp, he, COMPUTE_DELTA);
+}
+
+static int hpp__color_ratio(struct perf_hpp_fmt *fmt,
+ struct perf_hpp *hpp, struct hist_entry *he)
+{
+ return __hpp__color_compare(fmt, hpp, he, COMPUTE_RATIO);
+}
+
+static int hpp__color_wdiff(struct perf_hpp_fmt *fmt,
+ struct perf_hpp *hpp, struct hist_entry *he)
+{
+ return __hpp__color_compare(fmt, hpp, he, COMPUTE_WEIGHTED_DIFF);
+}
+
+static void
+hpp__entry_unpair(struct hist_entry *he, int idx, char *buf, size_t size)
+{
+ switch (idx) {
+ case PERF_HPP_DIFF__PERIOD_BASELINE:
+ scnprintf(buf, size, "%" PRIu64, he->stat.period);
+ break;
+
+ default:
+ break;
+ }
+}
+
+static void
+hpp__entry_pair(struct hist_entry *he, struct hist_entry *pair,
+ int idx, char *buf, size_t size)
+{
+ double diff;
+ double ratio;
+ s64 wdiff;
+
+ switch (idx) {
+ case PERF_HPP_DIFF__DELTA:
+ if (pair->diff.computed)
+ diff = pair->diff.period_ratio_delta;
+ else
+ diff = compute_delta(he, pair);
+
+ if (fabs(diff) >= 0.01)
+ scnprintf(buf, size, "%+4.2F%%", diff);
+ break;
+
+ case PERF_HPP_DIFF__RATIO:
+ /* No point for ratio number if we are dummy.. */
+ if (he->dummy)
+ break;
+
+ if (pair->diff.computed)
+ ratio = pair->diff.period_ratio;
+ else
+ ratio = compute_ratio(he, pair);
+
+ if (ratio > 0.0)
+ scnprintf(buf, size, "%14.6F", ratio);
+ break;
+
+ case PERF_HPP_DIFF__WEIGHTED_DIFF:
+ /* No point for wdiff number if we are dummy.. */
+ if (he->dummy)
+ break;
+
+ if (pair->diff.computed)
+ wdiff = pair->diff.wdiff;
+ else
+ wdiff = compute_wdiff(he, pair);
+
+ if (wdiff != 0)
+ scnprintf(buf, size, "%14ld", wdiff);
+ break;
+
+ case PERF_HPP_DIFF__FORMULA:
+ formula_fprintf(he, pair, buf, size);
+ break;
+
+ case PERF_HPP_DIFF__PERIOD:
+ scnprintf(buf, size, "%" PRIu64, pair->stat.period);
+ break;
+
+ default:
+ BUG_ON(1);
+ };
+}
+
+static void
+__hpp__entry_global(struct hist_entry *he, struct diff_hpp_fmt *dfmt,
+ char *buf, size_t size)
+{
+ struct hist_entry *pair = get_pair_fmt(he, dfmt);
+ int idx = dfmt->idx;
+
+ /* baseline is special */
+ if (idx == PERF_HPP_DIFF__BASELINE)
+ hpp__entry_baseline(he, buf, size);
+ else {
+ if (pair)
+ hpp__entry_pair(he, pair, idx, buf, size);
+ else
+ hpp__entry_unpair(he, idx, buf, size);
+ }
+}
+
+static int hpp__entry_global(struct perf_hpp_fmt *_fmt, struct perf_hpp *hpp,
+ struct hist_entry *he)
+{
+ struct diff_hpp_fmt *dfmt =
+ container_of(_fmt, struct diff_hpp_fmt, fmt);
+ char buf[MAX_COL_WIDTH] = " ";
+
+ __hpp__entry_global(he, dfmt, buf, MAX_COL_WIDTH);
+
+ if (symbol_conf.field_sep)
+ return scnprintf(hpp->buf, hpp->size, "%s", buf);
+ else
+ return scnprintf(hpp->buf, hpp->size, "%*s",
+ dfmt->header_width, buf);
+}
+
+static int hpp__header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp,
+ struct perf_evsel *evsel __maybe_unused)
+{
+ struct diff_hpp_fmt *dfmt =
+ container_of(fmt, struct diff_hpp_fmt, fmt);
+
+ BUG_ON(!dfmt->header);
+ return scnprintf(hpp->buf, hpp->size, dfmt->header);
+}
+
+static int hpp__width(struct perf_hpp_fmt *fmt,
+ struct perf_hpp *hpp __maybe_unused,
+ struct perf_evsel *evsel __maybe_unused)
+{
+ struct diff_hpp_fmt *dfmt =
+ container_of(fmt, struct diff_hpp_fmt, fmt);
+
+ BUG_ON(dfmt->header_width <= 0);
+ return dfmt->header_width;
+}
+
+static void init_header(struct data__file *d, struct diff_hpp_fmt *dfmt)
+{
+#define MAX_HEADER_NAME 100
+ char buf_indent[MAX_HEADER_NAME];
+ char buf[MAX_HEADER_NAME];
+ const char *header = NULL;
+ int width = 0;
+
+ BUG_ON(dfmt->idx >= PERF_HPP_DIFF__MAX_INDEX);
+ header = columns[dfmt->idx].name;
+ width = columns[dfmt->idx].width;
+
+ /* Only our defined HPP fmts should appear here. */
+ BUG_ON(!header);
+
+ if (data__files_cnt > 2)
+ scnprintf(buf, MAX_HEADER_NAME, "%s/%d", header, d->idx);
+
+#define NAME (data__files_cnt > 2 ? buf : header)
+ dfmt->header_width = width;
+ width = (int) strlen(NAME);
+ if (dfmt->header_width < width)
+ dfmt->header_width = width;
+
+ scnprintf(buf_indent, MAX_HEADER_NAME, "%*s",
+ dfmt->header_width, NAME);
+
+ dfmt->header = strdup(buf_indent);
+#undef MAX_HEADER_NAME
+#undef NAME
+}
+
+static void data__hpp_register(struct data__file *d, int idx)
+{
+ struct diff_hpp_fmt *dfmt = &d->fmt[idx];
+ struct perf_hpp_fmt *fmt = &dfmt->fmt;
+
+ dfmt->idx = idx;
+
+ fmt->header = hpp__header;
+ fmt->width = hpp__width;
+ fmt->entry = hpp__entry_global;
+
+ /* TODO more colors */
+ switch (idx) {
+ case PERF_HPP_DIFF__BASELINE:
+ fmt->color = hpp__color_baseline;
+ break;
+ case PERF_HPP_DIFF__DELTA:
+ fmt->color = hpp__color_delta;
+ break;
+ case PERF_HPP_DIFF__RATIO:
+ fmt->color = hpp__color_ratio;
+ break;
+ case PERF_HPP_DIFF__WEIGHTED_DIFF:
+ fmt->color = hpp__color_wdiff;
+ break;
+ default:
+ break;
+ }
+
+ init_header(d, dfmt);
+ perf_hpp__column_register(fmt);
+}
+
static void ui_init(void)
{
- perf_hpp__init();
+ struct data__file *d;
+ int i;
+
+ data__for_each_file(i, d) {
- /* No overhead column. */
- perf_hpp__column_enable(PERF_HPP__OVERHEAD, false);
+ /*
+ * Baseline or compute realted columns:
+ *
+ * PERF_HPP_DIFF__BASELINE
+ * PERF_HPP_DIFF__DELTA
+ * PERF_HPP_DIFF__RATIO
+ * PERF_HPP_DIFF__WEIGHTED_DIFF
+ */
+ data__hpp_register(d, i ? compute_2_hpp[compute] :
+ PERF_HPP_DIFF__BASELINE);
- /* Display baseline/delta/displacement columns. */
- perf_hpp__column_enable(PERF_HPP__BASELINE, true);
- perf_hpp__column_enable(PERF_HPP__DELTA, true);
+ /*
+ * And the rest:
+ *
+ * PERF_HPP_DIFF__FORMULA
+ * PERF_HPP_DIFF__PERIOD
+ * PERF_HPP_DIFF__PERIOD_BASELINE
+ */
+ if (show_formula && i)
+ data__hpp_register(d, PERF_HPP_DIFF__FORMULA);
- if (show_displacement)
- perf_hpp__column_enable(PERF_HPP__DISPL, true);
+ if (show_period)
+ data__hpp_register(d, i ? PERF_HPP_DIFF__PERIOD :
+ PERF_HPP_DIFF__PERIOD_BASELINE);
+ }
+}
+
+static int data_init(int argc, const char **argv)
+{
+ struct data__file *d;
+ static const char *defaults[] = {
+ "perf.data.old",
+ "perf.data",
+ };
+ bool use_default = true;
+ int i;
+
+ data__files_cnt = 2;
+
+ if (argc) {
+ if (argc == 1)
+ defaults[1] = argv[0];
+ else {
+ data__files_cnt = argc;
+ use_default = false;
+ }
+ } else if (perf_guest) {
+ defaults[0] = "perf.data.host";
+ defaults[1] = "perf.data.guest";
+ }
+
+ if (sort_compute >= (unsigned int) data__files_cnt) {
+ pr_err("Order option out of limit.\n");
+ return -EINVAL;
+ }
+
+ data__files = zalloc(sizeof(*data__files) * data__files_cnt);
+ if (!data__files)
+ return -ENOMEM;
+
+ data__for_each_file(i, d) {
+ struct perf_data_file *file = &d->file;
+
+ file->path = use_default ? defaults[i] : argv[i];
+ file->mode = PERF_DATA_MODE_READ,
+ file->force = force,
+
+ d->idx = i;
+ }
+
+ return 0;
}
int cmd_diff(int argc, const char **argv, const char *prefix __maybe_unused)
{
- sort_order = diff__default_sort_order;
+ perf_config(perf_default_config, NULL);
+
argc = parse_options(argc, argv, options, diff_usage, 0);
- if (argc) {
- if (argc > 2)
- usage_with_options(diff_usage, options);
- if (argc == 2) {
- input_old = argv[0];
- input_new = argv[1];
- } else
- input_new = argv[0];
- } else if (symbol_conf.default_guest_vmlinux_name ||
- symbol_conf.default_guest_kallsyms) {
- input_old = "perf.data.host";
- input_new = "perf.data.guest";
- }
-
- symbol_conf.exclude_other = false;
+
if (symbol__init() < 0)
return -1;
+ if (data_init(argc, argv) < 0)
+ return -1;
+
ui_init();
- setup_sorting(diff_usage, options);
+ sort__mode = SORT_MODE__DIFF;
+
+ if (setup_sorting() < 0)
+ usage_with_options(diff_usage, options);
+
setup_pager();
- sort_entry__setup_elide(&sort_dso, symbol_conf.dso_list, "dso", NULL);
- sort_entry__setup_elide(&sort_comm, symbol_conf.comm_list, "comm", NULL);
- sort_entry__setup_elide(&sort_sym, symbol_conf.sym_list, "symbol", NULL);
+ sort__setup_elide(NULL);
return __cmd_diff();
}
diff --git a/tools/perf/builtin-evlist.c b/tools/perf/builtin-evlist.c
index 997afb82691..c99e0de7e54 100644
--- a/tools/perf/builtin-evlist.c
+++ b/tools/perf/builtin-evlist.c
@@ -14,95 +14,23 @@
#include "util/parse-events.h"
#include "util/parse-options.h"
#include "util/session.h"
+#include "util/data.h"
-struct perf_attr_details {
- bool freq;
- bool verbose;
-};
-
-static int comma_printf(bool *first, const char *fmt, ...)
-{
- va_list args;
- int ret = 0;
-
- if (!*first) {
- ret += printf(",");
- } else {
- ret += printf(":");
- *first = false;
- }
-
- va_start(args, fmt);
- ret += vprintf(fmt, args);
- va_end(args);
- return ret;
-}
-
-static int __if_print(bool *first, const char *field, u64 value)
-{
- if (value == 0)
- return 0;
-
- return comma_printf(first, " %s: %" PRIu64, field, value);
-}
-
-#define if_print(field) __if_print(&first, #field, pos->attr.field)
-
-static int __cmd_evlist(const char *input_name, struct perf_attr_details *details)
+static int __cmd_evlist(const char *file_name, struct perf_attr_details *details)
{
struct perf_session *session;
struct perf_evsel *pos;
+ struct perf_data_file file = {
+ .path = file_name,
+ .mode = PERF_DATA_MODE_READ,
+ };
- session = perf_session__new(input_name, O_RDONLY, 0, false, NULL);
+ session = perf_session__new(&file, 0, NULL);
if (session == NULL)
return -ENOMEM;
- list_for_each_entry(pos, &session->evlist->entries, node) {
- bool first = true;
-
- printf("%s", perf_evsel__name(pos));
-
- if (details->verbose || details->freq) {
- comma_printf(&first, " sample_freq=%" PRIu64,
- (u64)pos->attr.sample_freq);
- }
-
- if (details->verbose) {
- if_print(type);
- if_print(config);
- if_print(config1);
- if_print(config2);
- if_print(size);
- if_print(sample_type);
- if_print(read_format);
- if_print(disabled);
- if_print(inherit);
- if_print(pinned);
- if_print(exclusive);
- if_print(exclude_user);
- if_print(exclude_kernel);
- if_print(exclude_hv);
- if_print(exclude_idle);
- if_print(mmap);
- if_print(comm);
- if_print(freq);
- if_print(inherit_stat);
- if_print(enable_on_exec);
- if_print(task);
- if_print(watermark);
- if_print(precise_ip);
- if_print(mmap_data);
- if_print(sample_id_all);
- if_print(exclude_host);
- if_print(exclude_guest);
- if_print(__reserved_1);
- if_print(wakeup_events);
- if_print(bp_type);
- if_print(branch_sample_type);
- }
-
- putchar('\n');
- }
+ evlist__for_each(session->evlist, pos)
+ perf_evsel__fprintf(pos, details, stdout);
perf_session__delete(session);
return 0;
@@ -111,12 +39,13 @@ static int __cmd_evlist(const char *input_name, struct perf_attr_details *detail
int cmd_evlist(int argc, const char **argv, const char *prefix __maybe_unused)
{
struct perf_attr_details details = { .verbose = false, };
- const char *input_name = NULL;
const struct option options[] = {
OPT_STRING('i', "input", &input_name, "file", "Input file name"),
OPT_BOOLEAN('F', "freq", &details.freq, "Show the sample frequency"),
OPT_BOOLEAN('v', "verbose", &details.verbose,
"Show all event attr details"),
+ OPT_BOOLEAN('g', "group", &details.event_group,
+ "Show event group information"),
OPT_END()
};
const char * const evlist_usage[] = {
@@ -128,5 +57,10 @@ int cmd_evlist(int argc, const char **argv, const char *prefix __maybe_unused)
if (argc)
usage_with_options(evlist_usage, options);
+ if (details.event_group && (details.verbose || details.freq)) {
+ pr_err("--group option is not compatible with other options\n");
+ usage_with_options(evlist_usage, options);
+ }
+
return __cmd_evlist(input_name, &details);
}
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 4688bea95c1..16c7c11ad06 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -8,35 +8,47 @@
#include "builtin.h"
#include "perf.h"
+#include "util/color.h"
+#include "util/evlist.h"
+#include "util/evsel.h"
#include "util/session.h"
#include "util/tool.h"
#include "util/debug.h"
+#include "util/build-id.h"
+#include "util/data.h"
#include "util/parse-options.h"
+#include <linux/list.h>
+
struct perf_inject {
- struct perf_tool tool;
- bool build_ids;
+ struct perf_tool tool;
+ bool build_ids;
+ bool sched_stat;
+ const char *input_name;
+ struct perf_data_file output;
+ u64 bytes_written;
+ struct list_head samples;
};
-static int perf_event__repipe_synth(struct perf_tool *tool __maybe_unused,
- union perf_event *event,
- struct machine *machine __maybe_unused)
-{
- uint32_t size;
- void *buf = event;
-
- size = event->header.size;
+struct event_entry {
+ struct list_head node;
+ u32 tid;
+ union perf_event event[0];
+};
- while (size) {
- int ret = write(STDOUT_FILENO, buf, size);
- if (ret < 0)
- return -errno;
+static int perf_event__repipe_synth(struct perf_tool *tool,
+ union perf_event *event)
+{
+ struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
+ ssize_t size;
- size -= ret;
- buf += ret;
- }
+ size = perf_data_file__write(&inject->output, event,
+ event->header.size);
+ if (size < 0)
+ return -errno;
+ inject->bytes_written += size;
return 0;
}
@@ -45,48 +57,55 @@ static int perf_event__repipe_op2_synth(struct perf_tool *tool,
struct perf_session *session
__maybe_unused)
{
- return perf_event__repipe_synth(tool, event, NULL);
+ return perf_event__repipe_synth(tool, event);
}
-static int perf_event__repipe_event_type_synth(struct perf_tool *tool,
- union perf_event *event)
-{
- return perf_event__repipe_synth(tool, event, NULL);
-}
-
-static int perf_event__repipe_tracing_data_synth(union perf_event *event,
- struct perf_session *session
- __maybe_unused)
-{
- return perf_event__repipe_synth(NULL, event, NULL);
-}
-
-static int perf_event__repipe_attr(union perf_event *event,
- struct perf_evlist **pevlist __maybe_unused)
+static int perf_event__repipe_attr(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_evlist **pevlist)
{
+ struct perf_inject *inject = container_of(tool, struct perf_inject,
+ tool);
int ret;
- ret = perf_event__process_attr(event, pevlist);
+
+ ret = perf_event__process_attr(tool, event, pevlist);
if (ret)
return ret;
- return perf_event__repipe_synth(NULL, event, NULL);
+ if (!inject->output.is_pipe)
+ return 0;
+
+ return perf_event__repipe_synth(tool, event);
}
static int perf_event__repipe(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample __maybe_unused,
- struct machine *machine)
+ struct machine *machine __maybe_unused)
{
- return perf_event__repipe_synth(tool, event, machine);
+ return perf_event__repipe_synth(tool, event);
}
+typedef int (*inject_handler)(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_evsel *evsel,
+ struct machine *machine);
+
static int perf_event__repipe_sample(struct perf_tool *tool,
union perf_event *event,
- struct perf_sample *sample __maybe_unused,
- struct perf_evsel *evsel __maybe_unused,
- struct machine *machine)
+ struct perf_sample *sample,
+ struct perf_evsel *evsel,
+ struct machine *machine)
{
- return perf_event__repipe_synth(tool, event, machine);
+ if (evsel->handler) {
+ inject_handler f = evsel->handler;
+ return f(tool, event, sample, evsel, machine);
+ }
+
+ build_id__mark_dso_hit(tool, event, sample, evsel, machine);
+
+ return perf_event__repipe_synth(tool, event);
}
static int perf_event__repipe_mmap(struct perf_tool *tool,
@@ -102,62 +121,76 @@ static int perf_event__repipe_mmap(struct perf_tool *tool,
return err;
}
-static int perf_event__repipe_task(struct perf_tool *tool,
+static int perf_event__repipe_mmap2(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ int err;
+
+ err = perf_event__process_mmap2(tool, event, sample, machine);
+ perf_event__repipe(tool, event, sample, machine);
+
+ return err;
+}
+
+static int perf_event__repipe_fork(struct perf_tool *tool,
union perf_event *event,
struct perf_sample *sample,
struct machine *machine)
{
int err;
- err = perf_event__process_task(tool, event, sample, machine);
+ err = perf_event__process_fork(tool, event, sample, machine);
perf_event__repipe(tool, event, sample, machine);
return err;
}
-static int perf_event__repipe_tracing_data(union perf_event *event,
+static int perf_event__repipe_tracing_data(struct perf_tool *tool,
+ union perf_event *event,
struct perf_session *session)
{
int err;
- perf_event__repipe_synth(NULL, event, NULL);
- err = perf_event__process_tracing_data(event, session);
+ perf_event__repipe_synth(tool, event);
+ err = perf_event__process_tracing_data(tool, event, session);
return err;
}
-static int dso__read_build_id(struct dso *self)
+static int dso__read_build_id(struct dso *dso)
{
- if (self->has_build_id)
+ if (dso->has_build_id)
return 0;
- if (filename__read_build_id(self->long_name, self->build_id,
- sizeof(self->build_id)) > 0) {
- self->has_build_id = true;
+ if (filename__read_build_id(dso->long_name, dso->build_id,
+ sizeof(dso->build_id)) > 0) {
+ dso->has_build_id = true;
return 0;
}
return -1;
}
-static int dso__inject_build_id(struct dso *self, struct perf_tool *tool,
+static int dso__inject_build_id(struct dso *dso, struct perf_tool *tool,
struct machine *machine)
{
u16 misc = PERF_RECORD_MISC_USER;
int err;
- if (dso__read_build_id(self) < 0) {
- pr_debug("no build_id found for %s\n", self->long_name);
+ if (dso__read_build_id(dso) < 0) {
+ pr_debug("no build_id found for %s\n", dso->long_name);
return -1;
}
- if (self->kernel)
+ if (dso->kernel)
misc = PERF_RECORD_MISC_KERNEL;
- err = perf_event__synthesize_build_id(tool, self, misc, perf_event__repipe,
+ err = perf_event__synthesize_build_id(tool, dso, misc, perf_event__repipe,
machine);
if (err) {
- pr_err("Can't synthesize build_id event for %s\n", self->long_name);
+ pr_err("Can't synthesize build_id event for %s\n", dso->long_name);
return -1;
}
@@ -176,7 +209,7 @@ static int perf_event__inject_buildid(struct perf_tool *tool,
cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
- thread = machine__findnew_thread(machine, event->ip.pid);
+ thread = machine__findnew_thread(machine, sample->pid, sample->tid);
if (thread == NULL) {
pr_err("problem processing %d event, skipping it.\n",
event->header.type);
@@ -184,7 +217,7 @@ static int perf_event__inject_buildid(struct perf_tool *tool,
}
thread__find_addr_map(thread, machine, cpumode, MAP__FUNCTION,
- event->ip.ip, &al);
+ sample->ip, &al);
if (al.map != NULL) {
if (!al.map->dso->hit) {
@@ -196,7 +229,7 @@ static int perf_event__inject_buildid(struct perf_tool *tool,
* account this as unresolved.
*/
} else {
-#ifdef LIBELF_SUPPORT
+#ifdef HAVE_LIBELF_SUPPORT
pr_warning("no symbols found in %s, maybe "
"install a debug package?\n",
al.map->dso->long_name);
@@ -210,33 +243,156 @@ repipe:
return 0;
}
-extern volatile int session_done;
+static int perf_inject__sched_process_exit(struct perf_tool *tool,
+ union perf_event *event __maybe_unused,
+ struct perf_sample *sample,
+ struct perf_evsel *evsel __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
+ struct event_entry *ent;
+
+ list_for_each_entry(ent, &inject->samples, node) {
+ if (sample->tid == ent->tid) {
+ list_del_init(&ent->node);
+ free(ent);
+ break;
+ }
+ }
+
+ return 0;
+}
+
+static int perf_inject__sched_switch(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_evsel *evsel,
+ struct machine *machine)
+{
+ struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
+ struct event_entry *ent;
+
+ perf_inject__sched_process_exit(tool, event, sample, evsel, machine);
+
+ ent = malloc(event->header.size + sizeof(struct event_entry));
+ if (ent == NULL) {
+ color_fprintf(stderr, PERF_COLOR_RED,
+ "Not enough memory to process sched switch event!");
+ return -1;
+ }
+
+ ent->tid = sample->tid;
+ memcpy(&ent->event, event, event->header.size);
+ list_add(&ent->node, &inject->samples);
+ return 0;
+}
+
+static int perf_inject__sched_stat(struct perf_tool *tool,
+ union perf_event *event __maybe_unused,
+ struct perf_sample *sample,
+ struct perf_evsel *evsel,
+ struct machine *machine)
+{
+ struct event_entry *ent;
+ union perf_event *event_sw;
+ struct perf_sample sample_sw;
+ struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
+ u32 pid = perf_evsel__intval(evsel, sample, "pid");
+
+ list_for_each_entry(ent, &inject->samples, node) {
+ if (pid == ent->tid)
+ goto found;
+ }
+
+ return 0;
+found:
+ event_sw = &ent->event[0];
+ perf_evsel__parse_sample(evsel, event_sw, &sample_sw);
+
+ sample_sw.period = sample->period;
+ sample_sw.time = sample->time;
+ perf_event__synthesize_sample(event_sw, evsel->attr.sample_type,
+ evsel->attr.read_format, &sample_sw,
+ false);
+ build_id__mark_dso_hit(tool, event_sw, &sample_sw, evsel, machine);
+ return perf_event__repipe(tool, event_sw, &sample_sw, machine);
+}
static void sig_handler(int sig __maybe_unused)
{
session_done = 1;
}
+static int perf_evsel__check_stype(struct perf_evsel *evsel,
+ u64 sample_type, const char *sample_msg)
+{
+ struct perf_event_attr *attr = &evsel->attr;
+ const char *name = perf_evsel__name(evsel);
+
+ if (!(attr->sample_type & sample_type)) {
+ pr_err("Samples for %s event do not have %s attribute set.",
+ name, sample_msg);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int __cmd_inject(struct perf_inject *inject)
{
struct perf_session *session;
int ret = -EINVAL;
+ struct perf_data_file file = {
+ .path = inject->input_name,
+ .mode = PERF_DATA_MODE_READ,
+ };
+ struct perf_data_file *file_out = &inject->output;
signal(SIGINT, sig_handler);
- if (inject->build_ids) {
- inject->tool.sample = perf_event__inject_buildid;
+ if (inject->build_ids || inject->sched_stat) {
inject->tool.mmap = perf_event__repipe_mmap;
- inject->tool.fork = perf_event__repipe_task;
+ inject->tool.mmap2 = perf_event__repipe_mmap2;
+ inject->tool.fork = perf_event__repipe_fork;
inject->tool.tracing_data = perf_event__repipe_tracing_data;
}
- session = perf_session__new("-", O_RDONLY, false, true, &inject->tool);
+ session = perf_session__new(&file, true, &inject->tool);
if (session == NULL)
return -ENOMEM;
+ if (inject->build_ids) {
+ inject->tool.sample = perf_event__inject_buildid;
+ } else if (inject->sched_stat) {
+ struct perf_evsel *evsel;
+
+ inject->tool.ordered_samples = true;
+
+ evlist__for_each(session->evlist, evsel) {
+ const char *name = perf_evsel__name(evsel);
+
+ if (!strcmp(name, "sched:sched_switch")) {
+ if (perf_evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID"))
+ return -EINVAL;
+
+ evsel->handler = perf_inject__sched_switch;
+ } else if (!strcmp(name, "sched:sched_process_exit"))
+ evsel->handler = perf_inject__sched_process_exit;
+ else if (!strncmp(name, "sched:sched_stat_", 17))
+ evsel->handler = perf_inject__sched_stat;
+ }
+ }
+
+ if (!file_out->is_pipe)
+ lseek(file_out->fd, session->header.data_offset, SEEK_SET);
+
ret = perf_session__process_events(session, &inject->tool);
+ if (!file_out->is_pipe) {
+ session->header.data_size = inject->bytes_written;
+ perf_session__write_header(session, session->evlist, file_out->fd, true);
+ }
+
perf_session__delete(session);
return ret;
@@ -248,6 +404,7 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused)
.tool = {
.sample = perf_event__repipe_sample,
.mmap = perf_event__repipe,
+ .mmap2 = perf_event__repipe,
.comm = perf_event__repipe,
.fork = perf_event__repipe,
.exit = perf_event__repipe,
@@ -256,14 +413,27 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused)
.throttle = perf_event__repipe,
.unthrottle = perf_event__repipe,
.attr = perf_event__repipe_attr,
- .event_type = perf_event__repipe_event_type_synth,
- .tracing_data = perf_event__repipe_tracing_data_synth,
+ .tracing_data = perf_event__repipe_op2_synth,
+ .finished_round = perf_event__repipe_op2_synth,
.build_id = perf_event__repipe_op2_synth,
},
+ .input_name = "-",
+ .samples = LIST_HEAD_INIT(inject.samples),
+ .output = {
+ .path = "-",
+ .mode = PERF_DATA_MODE_WRITE,
+ },
};
const struct option options[] = {
OPT_BOOLEAN('b', "build-ids", &inject.build_ids,
"Inject build-ids into the output stream"),
+ OPT_STRING('i', "input", &inject.input_name, "file",
+ "input file name"),
+ OPT_STRING('o', "output", &inject.output.path, "file",
+ "output file name"),
+ OPT_BOOLEAN('s', "sched-stat", &inject.sched_stat,
+ "Merge sched-stat and sched-switch for getting events "
+ "where and how long tasks slept"),
OPT_INCR('v', "verbose", &verbose,
"be more verbose (show build ids, etc)"),
OPT_END()
@@ -281,6 +451,11 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused)
if (argc)
usage_with_options(inject_usage, options);
+ if (perf_data_file__open(&inject.output)) {
+ perror("failed to create output file");
+ return -1;
+ }
+
if (symbol__init() < 0)
return -1;
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 14bf82f6365..bef3376bfaf 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -13,10 +13,13 @@
#include "util/parse-options.h"
#include "util/trace-event.h"
+#include "util/data.h"
+#include "util/cpumap.h"
#include "util/debug.h"
#include <linux/rbtree.h>
+#include <linux/string.h>
struct alloc_stat;
typedef int (*sort_fn_t)(struct alloc_stat *, struct alloc_stat *);
@@ -29,9 +32,6 @@ static int caller_lines = -1;
static bool raw_ip;
-static int *cpunode_map;
-static int max_cpu_num;
-
struct alloc_stat {
u64 call_site;
u64 ptr;
@@ -53,76 +53,6 @@ static struct rb_root root_caller_sorted;
static unsigned long total_requested, total_allocated;
static unsigned long nr_allocs, nr_cross_allocs;
-#define PATH_SYS_NODE "/sys/devices/system/node"
-
-static int init_cpunode_map(void)
-{
- FILE *fp;
- int i, err = -1;
-
- fp = fopen("/sys/devices/system/cpu/kernel_max", "r");
- if (!fp) {
- max_cpu_num = 4096;
- return 0;
- }
-
- if (fscanf(fp, "%d", &max_cpu_num) < 1) {
- pr_err("Failed to read 'kernel_max' from sysfs");
- goto out_close;
- }
-
- max_cpu_num++;
-
- cpunode_map = calloc(max_cpu_num, sizeof(int));
- if (!cpunode_map) {
- pr_err("%s: calloc failed\n", __func__);
- goto out_close;
- }
-
- for (i = 0; i < max_cpu_num; i++)
- cpunode_map[i] = -1;
-
- err = 0;
-out_close:
- fclose(fp);
- return err;
-}
-
-static int setup_cpunode_map(void)
-{
- struct dirent *dent1, *dent2;
- DIR *dir1, *dir2;
- unsigned int cpu, mem;
- char buf[PATH_MAX];
-
- if (init_cpunode_map())
- return -1;
-
- dir1 = opendir(PATH_SYS_NODE);
- if (!dir1)
- return -1;
-
- while ((dent1 = readdir(dir1)) != NULL) {
- if (dent1->d_type != DT_DIR ||
- sscanf(dent1->d_name, "node%u", &mem) < 1)
- continue;
-
- snprintf(buf, PATH_MAX, "%s/%s", PATH_SYS_NODE, dent1->d_name);
- dir2 = opendir(buf);
- if (!dir2)
- continue;
- while ((dent2 = readdir(dir2)) != NULL) {
- if (dent2->d_type != DT_LNK ||
- sscanf(dent2->d_name, "cpu%u", &cpu) < 1)
- continue;
- cpunode_map[cpu] = mem;
- }
- closedir(dir2);
- }
- closedir(dir1);
- return 0;
-}
-
static int insert_alloc_stat(unsigned long call_site, unsigned long ptr,
int bytes_req, int bytes_alloc, int cpu)
{
@@ -233,7 +163,7 @@ static int perf_evsel__process_alloc_node_event(struct perf_evsel *evsel,
int ret = perf_evsel__process_alloc_event(evsel, sample);
if (!ret) {
- int node1 = cpunode_map[sample->cpu],
+ int node1 = cpu__get_node(sample->cpu),
node2 = perf_evsel__intval(evsel, sample, "node");
if (node1 != node2)
@@ -304,7 +234,8 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
struct perf_evsel *evsel,
struct machine *machine)
{
- struct thread *thread = machine__findnew_thread(machine, event->ip.pid);
+ struct thread *thread = machine__findnew_thread(machine, sample->pid,
+ sample->tid);
if (thread == NULL) {
pr_debug("problem processing %d event, skipping it.\n",
@@ -312,10 +243,10 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
return -1;
}
- dump_printf(" ... thread: %s:%d\n", thread->comm, thread->pid);
+ dump_printf(" ... thread: %s:%d\n", thread__comm_str(thread), thread->tid);
- if (evsel->handler.func != NULL) {
- tracepoint_handler f = evsel->handler.func;
+ if (evsel->handler != NULL) {
+ tracepoint_handler f = evsel->handler;
return f(evsel, sample);
}
@@ -340,7 +271,7 @@ static void __print_result(struct rb_root *root, struct perf_session *session,
int n_lines, int is_caller)
{
struct rb_node *next;
- struct machine *machine;
+ struct machine *machine = &session->machines.host;
printf("%.102s\n", graph_dotted_line);
printf(" %-34s |", is_caller ? "Callsite": "Alloc Ptr");
@@ -349,11 +280,6 @@ static void __print_result(struct rb_root *root, struct perf_session *session,
next = rb_first(root);
- machine = perf_session__find_host_machine(session);
- if (!machine) {
- pr_err("__print_result: couldn't find kernel information\n");
- return;
- }
while (next && n_lines--) {
struct alloc_stat *data = rb_entry(next, struct alloc_stat,
node);
@@ -477,7 +403,7 @@ static void sort_result(void)
__sort_result(&root_caller_stat, &root_caller_sorted, &caller_sort);
}
-static int __cmd_kmem(const char *input_name)
+static int __cmd_kmem(void)
{
int err = -EINVAL;
struct perf_session *session;
@@ -489,8 +415,12 @@ static int __cmd_kmem(const char *input_name)
{ "kmem:kfree", perf_evsel__process_free_event, },
{ "kmem:kmem_cache_free", perf_evsel__process_free_event, },
};
+ struct perf_data_file file = {
+ .path = input_name,
+ .mode = PERF_DATA_MODE_READ,
+ };
- session = perf_session__new(input_name, O_RDONLY, 0, false, &perf_kmem);
+ session = perf_session__new(&file, false, &perf_kmem);
if (session == NULL)
return -ENOMEM;
@@ -614,8 +544,7 @@ static struct sort_dimension *avail_sorts[] = {
&pingpong_sort_dimension,
};
-#define NUM_AVAIL_SORTS \
- (int)(sizeof(avail_sorts) / sizeof(struct sort_dimension *))
+#define NUM_AVAIL_SORTS ((int)ARRAY_SIZE(avail_sorts))
static int sort_dimension__add(const char *tok, struct list_head *list)
{
@@ -624,12 +553,11 @@ static int sort_dimension__add(const char *tok, struct list_head *list)
for (i = 0; i < NUM_AVAIL_SORTS; i++) {
if (!strcmp(avail_sorts[i]->name, tok)) {
- sort = malloc(sizeof(*sort));
+ sort = memdup(avail_sorts[i], sizeof(*avail_sorts[i]));
if (!sort) {
- pr_err("%s: malloc failed\n", __func__);
+ pr_err("%s: memdup failed\n", __func__);
return -1;
}
- memcpy(sort, avail_sorts[i], sizeof(*sort));
list_add_tail(&sort->list, list);
return 0;
}
@@ -714,7 +642,7 @@ static int parse_line_opt(const struct option *opt __maybe_unused,
static int __cmd_record(int argc, const char **argv)
{
const char * const record_args[] = {
- "record", "-a", "-R", "-f", "-c", "1",
+ "record", "-a", "-R", "-c", "1",
"-e", "kmem:kmalloc",
"-e", "kmem:kmalloc_node",
"-e", "kmem:kfree",
@@ -743,7 +671,6 @@ static int __cmd_record(int argc, const char **argv)
int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused)
{
const char * const default_sort_order = "frag,hit,bytes";
- const char *input_name = NULL;
const struct option kmem_options[] = {
OPT_STRING('i', "input", &input_name, "file", "input file name"),
OPT_CALLBACK_NOOPT(0, "caller", NULL, NULL,
@@ -757,11 +684,13 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_BOOLEAN(0, "raw-ip", &raw_ip, "show raw ip instead of symbol"),
OPT_END()
};
- const char * const kmem_usage[] = {
- "perf kmem [<options>] {record|stat}",
+ const char *const kmem_subcommands[] = { "record", "stat", NULL };
+ const char *kmem_usage[] = {
+ NULL,
NULL
};
- argc = parse_options(argc, argv, kmem_options, kmem_usage, 0);
+ argc = parse_options_subcommand(argc, argv, kmem_options,
+ kmem_subcommands, kmem_usage, 0);
if (!argc)
usage_with_options(kmem_usage, kmem_options);
@@ -771,7 +700,7 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused)
if (!strncmp(argv[0], "rec", 3)) {
return __cmd_record(argc, argv);
} else if (!strcmp(argv[0], "stat")) {
- if (setup_cpunode_map())
+ if (cpu__setup_cpunode_map())
return -1;
if (list_empty(&caller_sort))
@@ -779,7 +708,7 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused)
if (list_empty(&alloc_sort))
setup_sorting(&alloc_sort, default_sort_order);
- return __cmd_kmem(input_name);
+ return __cmd_kmem();
} else
usage_with_options(kmem_usage, kmem_options);
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index 260abc535b5..0f1e5a2f6ad 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -2,29 +2,37 @@
#include "perf.h"
#include "util/evsel.h"
+#include "util/evlist.h"
#include "util/util.h"
#include "util/cache.h"
#include "util/symbol.h"
#include "util/thread.h"
#include "util/header.h"
#include "util/session.h"
-
+#include "util/intlist.h"
#include "util/parse-options.h"
#include "util/trace-event.h"
#include "util/debug.h"
-#include "util/debugfs.h"
+#include <api/fs/debugfs.h>
#include "util/tool.h"
#include "util/stat.h"
+#include "util/top.h"
+#include "util/data.h"
#include <sys/prctl.h>
+#ifdef HAVE_TIMERFD_SUPPORT
+#include <sys/timerfd.h>
+#endif
+#include <termios.h>
#include <semaphore.h>
#include <pthread.h>
#include <math.h>
-#include "../../arch/x86/include/asm/svm.h"
-#include "../../arch/x86/include/asm/vmx.h"
-#include "../../arch/x86/include/asm/kvm.h"
+#if defined(__i386__) || defined(__x86_64__)
+#include <asm/svm.h>
+#include <asm/vmx.h>
+#include <asm/kvm.h>
struct event_key {
#define INVALID_KEY (~0ULL)
@@ -58,7 +66,7 @@ struct kvm_event_key {
};
-struct perf_kvm;
+struct perf_kvm_stat;
struct kvm_events_ops {
bool (*is_begin_event)(struct perf_evsel *evsel,
@@ -66,7 +74,7 @@ struct kvm_events_ops {
struct event_key *key);
bool (*is_end_event)(struct perf_evsel *evsel,
struct perf_sample *sample, struct event_key *key);
- void (*decode_key)(struct perf_kvm *kvm, struct event_key *key,
+ void (*decode_key)(struct perf_kvm_stat *kvm, struct event_key *key,
char decode[20]);
const char *name;
};
@@ -79,8 +87,10 @@ struct exit_reasons_table {
#define EVENTS_BITS 12
#define EVENTS_CACHE_SIZE (1UL << EVENTS_BITS)
-struct perf_kvm {
+struct perf_kvm_stat {
struct perf_tool tool;
+ struct record_opts opts;
+ struct perf_evlist *evlist;
struct perf_session *session;
const char *file_name;
@@ -95,10 +105,20 @@ struct perf_kvm {
struct kvm_events_ops *events_ops;
key_cmp_fun compare;
struct list_head kvm_events_cache[EVENTS_CACHE_SIZE];
+
u64 total_time;
u64 total_count;
+ u64 lost_events;
+ u64 duration;
+
+ const char *pid_str;
+ struct intlist *pid_list;
struct rb_root result;
+
+ int timerfd;
+ unsigned int display_time;
+ bool live;
};
@@ -146,7 +166,7 @@ static struct exit_reasons_table svm_exit_reasons[] = {
SVM_EXIT_REASONS
};
-static const char *get_exit_reason(struct perf_kvm *kvm, u64 exit_code)
+static const char *get_exit_reason(struct perf_kvm_stat *kvm, u64 exit_code)
{
int i = kvm->exit_reasons_size;
struct exit_reasons_table *tbl = kvm->exit_reasons;
@@ -162,7 +182,7 @@ static const char *get_exit_reason(struct perf_kvm *kvm, u64 exit_code)
return "UNKNOWN";
}
-static void exit_event_decode_key(struct perf_kvm *kvm,
+static void exit_event_decode_key(struct perf_kvm_stat *kvm,
struct event_key *key,
char decode[20])
{
@@ -228,7 +248,7 @@ static bool mmio_event_end(struct perf_evsel *evsel, struct perf_sample *sample,
return false;
}
-static void mmio_event_decode_key(struct perf_kvm *kvm __maybe_unused,
+static void mmio_event_decode_key(struct perf_kvm_stat *kvm __maybe_unused,
struct event_key *key,
char decode[20])
{
@@ -271,7 +291,7 @@ static bool ioport_event_end(struct perf_evsel *evsel,
return kvm_entry_event(evsel);
}
-static void ioport_event_decode_key(struct perf_kvm *kvm __maybe_unused,
+static void ioport_event_decode_key(struct perf_kvm_stat *kvm __maybe_unused,
struct event_key *key,
char decode[20])
{
@@ -286,7 +306,7 @@ static struct kvm_events_ops ioport_events = {
.name = "IO Port Access"
};
-static bool register_kvm_events_ops(struct perf_kvm *kvm)
+static bool register_kvm_events_ops(struct perf_kvm_stat *kvm)
{
bool ret = true;
@@ -311,14 +331,38 @@ struct vcpu_event_record {
};
-static void init_kvm_event_record(struct perf_kvm *kvm)
+static void init_kvm_event_record(struct perf_kvm_stat *kvm)
{
- int i;
+ unsigned int i;
- for (i = 0; i < (int)EVENTS_CACHE_SIZE; i++)
+ for (i = 0; i < EVENTS_CACHE_SIZE; i++)
INIT_LIST_HEAD(&kvm->kvm_events_cache[i]);
}
+#ifdef HAVE_TIMERFD_SUPPORT
+static void clear_events_cache_stats(struct list_head *kvm_events_cache)
+{
+ struct list_head *head;
+ struct kvm_event *event;
+ unsigned int i;
+ int j;
+
+ for (i = 0; i < EVENTS_CACHE_SIZE; i++) {
+ head = &kvm_events_cache[i];
+ list_for_each_entry(event, head, hash_entry) {
+ /* reset stats for event */
+ event->total.time = 0;
+ init_stats(&event->total.stats);
+
+ for (j = 0; j < event->max_vcpu; ++j) {
+ event->vcpu[j].time = 0;
+ init_stats(&event->vcpu[j].stats);
+ }
+ }
+ }
+}
+#endif
+
static int kvm_events_hash_fn(u64 key)
{
return key & (EVENTS_CACHE_SIZE - 1);
@@ -327,6 +371,7 @@ static int kvm_events_hash_fn(u64 key)
static bool kvm_event_expand(struct kvm_event *event, int vcpu_id)
{
int old_max_vcpu = event->max_vcpu;
+ void *prev;
if (vcpu_id < event->max_vcpu)
return true;
@@ -334,9 +379,11 @@ static bool kvm_event_expand(struct kvm_event *event, int vcpu_id)
while (event->max_vcpu <= vcpu_id)
event->max_vcpu += DEFAULT_VCPU_NUM;
+ prev = event->vcpu;
event->vcpu = realloc(event->vcpu,
event->max_vcpu * sizeof(*event->vcpu));
if (!event->vcpu) {
+ free(prev);
pr_err("Not enough memory\n");
return false;
}
@@ -357,10 +404,11 @@ static struct kvm_event *kvm_alloc_init_event(struct event_key *key)
}
event->key = *key;
+ init_stats(&event->total.stats);
return event;
}
-static struct kvm_event *find_create_kvm_event(struct perf_kvm *kvm,
+static struct kvm_event *find_create_kvm_event(struct perf_kvm_stat *kvm,
struct event_key *key)
{
struct kvm_event *event;
@@ -369,9 +417,10 @@ static struct kvm_event *find_create_kvm_event(struct perf_kvm *kvm,
BUG_ON(key->key == INVALID_KEY);
head = &kvm->kvm_events_cache[kvm_events_hash_fn(key->key)];
- list_for_each_entry(event, head, hash_entry)
+ list_for_each_entry(event, head, hash_entry) {
if (event->key.key == key->key && event->key.info == key->info)
return event;
+ }
event = kvm_alloc_init_event(key);
if (!event)
@@ -381,7 +430,7 @@ static struct kvm_event *find_create_kvm_event(struct perf_kvm *kvm,
return event;
}
-static bool handle_begin_event(struct perf_kvm *kvm,
+static bool handle_begin_event(struct perf_kvm_stat *kvm,
struct vcpu_event_record *vcpu_record,
struct event_key *key, u64 timestamp)
{
@@ -416,7 +465,10 @@ static double kvm_event_rel_stddev(int vcpu_id, struct kvm_event *event)
static bool update_kvm_event(struct kvm_event *event, int vcpu_id,
u64 time_diff)
{
- kvm_update_event_stats(&event->total, time_diff);
+ if (vcpu_id == -1) {
+ kvm_update_event_stats(&event->total, time_diff);
+ return true;
+ }
if (!kvm_event_expand(event, vcpu_id))
return false;
@@ -425,13 +477,19 @@ static bool update_kvm_event(struct kvm_event *event, int vcpu_id,
return true;
}
-static bool handle_end_event(struct perf_kvm *kvm,
+static bool handle_end_event(struct perf_kvm_stat *kvm,
struct vcpu_event_record *vcpu_record,
struct event_key *key,
- u64 timestamp)
+ struct perf_sample *sample)
{
struct kvm_event *event;
u64 time_begin, time_diff;
+ int vcpu;
+
+ if (kvm->trace_vcpu == -1)
+ vcpu = -1;
+ else
+ vcpu = vcpu_record->vcpu_id;
event = vcpu_record->last_event;
time_begin = vcpu_record->start_time;
@@ -458,10 +516,26 @@ static bool handle_end_event(struct perf_kvm *kvm,
vcpu_record->last_event = NULL;
vcpu_record->start_time = 0;
- BUG_ON(timestamp < time_begin);
+ /* seems to happen once in a while during live mode */
+ if (sample->time < time_begin) {
+ pr_debug("End time before begin time; skipping event.\n");
+ return true;
+ }
+
+ time_diff = sample->time - time_begin;
+
+ if (kvm->duration && time_diff > kvm->duration) {
+ char decode[32];
+
+ kvm->events_ops->decode_key(kvm, &event->key, decode);
+ if (strcmp(decode, "HLT")) {
+ pr_info("%" PRIu64 " VM %d, vcpu %d: %s event took %" PRIu64 "usec\n",
+ sample->time, sample->pid, vcpu_record->vcpu_id,
+ decode, time_diff/1000);
+ }
+ }
- time_diff = timestamp - time_begin;
- return update_kvm_event(event, vcpu_record->vcpu_id, time_diff);
+ return update_kvm_event(event, vcpu, time_diff);
}
static
@@ -486,7 +560,7 @@ struct vcpu_event_record *per_vcpu_record(struct thread *thread,
return thread->priv;
}
-static bool handle_kvm_event(struct perf_kvm *kvm,
+static bool handle_kvm_event(struct perf_kvm_stat *kvm,
struct thread *thread,
struct perf_evsel *evsel,
struct perf_sample *sample)
@@ -498,11 +572,16 @@ static bool handle_kvm_event(struct perf_kvm *kvm,
if (!vcpu_record)
return true;
+ /* only process events for vcpus user cares about */
+ if ((kvm->trace_vcpu != -1) &&
+ (kvm->trace_vcpu != vcpu_record->vcpu_id))
+ return true;
+
if (kvm->events_ops->is_begin_event(evsel, sample, &key))
return handle_begin_event(kvm, vcpu_record, &key, sample->time);
if (kvm->events_ops->is_end_event(evsel, sample, &key))
- return handle_end_event(kvm, vcpu_record, &key, sample->time);
+ return handle_end_event(kvm, vcpu_record, &key, sample);
return true;
}
@@ -531,6 +610,8 @@ static int compare_kvm_event_ ## func(struct kvm_event *one, \
GET_EVENT_KEY(time, time);
COMPARE_EVENT_KEY(count, stats.n);
COMPARE_EVENT_KEY(mean, stats.mean);
+GET_EVENT_KEY(max, stats.max);
+GET_EVENT_KEY(min, stats.min);
#define DEF_SORT_NAME_KEY(name, compare_key) \
{ #name, compare_kvm_event_ ## compare_key }
@@ -541,7 +622,7 @@ static struct kvm_event_key keys[] = {
{ NULL, NULL }
};
-static bool select_key(struct perf_kvm *kvm)
+static bool select_key(struct perf_kvm_stat *kvm)
{
int i;
@@ -577,7 +658,8 @@ static void insert_to_result(struct rb_root *result, struct kvm_event *event,
rb_insert_color(&event->rb, result);
}
-static void update_total_count(struct perf_kvm *kvm, struct kvm_event *event)
+static void
+update_total_count(struct perf_kvm_stat *kvm, struct kvm_event *event)
{
int vcpu = kvm->trace_vcpu;
@@ -590,19 +672,21 @@ static bool event_is_valid(struct kvm_event *event, int vcpu)
return !!get_event_count(event, vcpu);
}
-static void sort_result(struct perf_kvm *kvm)
+static void sort_result(struct perf_kvm_stat *kvm)
{
unsigned int i;
int vcpu = kvm->trace_vcpu;
struct kvm_event *event;
- for (i = 0; i < EVENTS_CACHE_SIZE; i++)
- list_for_each_entry(event, &kvm->kvm_events_cache[i], hash_entry)
+ for (i = 0; i < EVENTS_CACHE_SIZE; i++) {
+ list_for_each_entry(event, &kvm->kvm_events_cache[i], hash_entry) {
if (event_is_valid(event, vcpu)) {
update_total_count(kvm, event);
insert_to_result(&kvm->result, event,
kvm->compare, vcpu);
}
+ }
+ }
}
/* returns left most element of result, and erase it */
@@ -617,50 +701,113 @@ static struct kvm_event *pop_from_result(struct rb_root *result)
return container_of(node, struct kvm_event, rb);
}
-static void print_vcpu_info(int vcpu)
+static void print_vcpu_info(struct perf_kvm_stat *kvm)
{
+ int vcpu = kvm->trace_vcpu;
+
pr_info("Analyze events for ");
+ if (kvm->live) {
+ if (kvm->opts.target.system_wide)
+ pr_info("all VMs, ");
+ else if (kvm->opts.target.pid)
+ pr_info("pid(s) %s, ", kvm->opts.target.pid);
+ else
+ pr_info("dazed and confused on what is monitored, ");
+ }
+
if (vcpu == -1)
pr_info("all VCPUs:\n\n");
else
pr_info("VCPU %d:\n\n", vcpu);
}
-static void print_result(struct perf_kvm *kvm)
+static void show_timeofday(void)
+{
+ char date[64];
+ struct timeval tv;
+ struct tm ltime;
+
+ gettimeofday(&tv, NULL);
+ if (localtime_r(&tv.tv_sec, &ltime)) {
+ strftime(date, sizeof(date), "%H:%M:%S", &ltime);
+ pr_info("%s.%06ld", date, tv.tv_usec);
+ } else
+ pr_info("00:00:00.000000");
+
+ return;
+}
+
+static void print_result(struct perf_kvm_stat *kvm)
{
char decode[20];
struct kvm_event *event;
int vcpu = kvm->trace_vcpu;
+ if (kvm->live) {
+ puts(CONSOLE_CLEAR);
+ show_timeofday();
+ }
+
pr_info("\n\n");
- print_vcpu_info(vcpu);
+ print_vcpu_info(kvm);
pr_info("%20s ", kvm->events_ops->name);
pr_info("%10s ", "Samples");
pr_info("%9s ", "Samples%");
pr_info("%9s ", "Time%");
+ pr_info("%10s ", "Min Time");
+ pr_info("%10s ", "Max Time");
pr_info("%16s ", "Avg time");
pr_info("\n\n");
while ((event = pop_from_result(&kvm->result))) {
- u64 ecount, etime;
+ u64 ecount, etime, max, min;
ecount = get_event_count(event, vcpu);
etime = get_event_time(event, vcpu);
+ max = get_event_max(event, vcpu);
+ min = get_event_min(event, vcpu);
kvm->events_ops->decode_key(kvm, &event->key, decode);
pr_info("%20s ", decode);
pr_info("%10llu ", (unsigned long long)ecount);
pr_info("%8.2f%% ", (double)ecount / kvm->total_count * 100);
pr_info("%8.2f%% ", (double)etime / kvm->total_time * 100);
+ pr_info("%8" PRIu64 "us ", min / 1000);
+ pr_info("%8" PRIu64 "us ", max / 1000);
pr_info("%9.2fus ( +-%7.2f%% )", (double)etime / ecount/1e3,
kvm_event_rel_stddev(vcpu, event));
pr_info("\n");
}
- pr_info("\nTotal Samples:%lld, Total events handled time:%.2fus.\n\n",
- (unsigned long long)kvm->total_count, kvm->total_time / 1e3);
+ pr_info("\nTotal Samples:%" PRIu64 ", Total events handled time:%.2fus.\n\n",
+ kvm->total_count, kvm->total_time / 1e3);
+
+ if (kvm->lost_events)
+ pr_info("\nLost events: %" PRIu64 "\n\n", kvm->lost_events);
+}
+
+#ifdef HAVE_TIMERFD_SUPPORT
+static int process_lost_event(struct perf_tool *tool,
+ union perf_event *event __maybe_unused,
+ struct perf_sample *sample __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ struct perf_kvm_stat *kvm = container_of(tool, struct perf_kvm_stat, tool);
+
+ kvm->lost_events++;
+ return 0;
+}
+#endif
+
+static bool skip_sample(struct perf_kvm_stat *kvm,
+ struct perf_sample *sample)
+{
+ if (kvm->pid_list && intlist__find(kvm->pid_list, sample->pid) == NULL)
+ return true;
+
+ return false;
}
static int process_sample_event(struct perf_tool *tool,
@@ -669,9 +816,14 @@ static int process_sample_event(struct perf_tool *tool,
struct perf_evsel *evsel,
struct machine *machine)
{
- struct thread *thread = machine__findnew_thread(machine, sample->tid);
- struct perf_kvm *kvm = container_of(tool, struct perf_kvm, tool);
+ struct thread *thread;
+ struct perf_kvm_stat *kvm = container_of(tool, struct perf_kvm_stat,
+ tool);
+
+ if (skip_sample(kvm, sample))
+ return 0;
+ thread = machine__findnew_thread(machine, sample->pid, sample->tid);
if (thread == NULL) {
pr_debug("problem processing %d event, skipping it.\n",
event->header.type);
@@ -684,10 +836,20 @@ static int process_sample_event(struct perf_tool *tool,
return 0;
}
-static int get_cpu_isa(struct perf_session *session)
+static int cpu_isa_config(struct perf_kvm_stat *kvm)
{
- char *cpuid = session->header.env.cpuid;
- int isa;
+ char buf[64], *cpuid;
+ int err, isa;
+
+ if (kvm->live) {
+ err = get_cpuid(buf, sizeof(buf));
+ if (err != 0) {
+ pr_err("Failed to look up CPU type (Intel or AMD)\n");
+ return err;
+ }
+ cpuid = buf;
+ } else
+ cpuid = kvm->session->header.env.cpuid;
if (strstr(cpuid, "Intel"))
isa = 1;
@@ -695,13 +857,371 @@ static int get_cpu_isa(struct perf_session *session)
isa = 0;
else {
pr_err("CPU %s is not supported.\n", cpuid);
- isa = -ENOTSUP;
+ return -ENOTSUP;
+ }
+
+ if (isa == 1) {
+ kvm->exit_reasons = vmx_exit_reasons;
+ kvm->exit_reasons_size = ARRAY_SIZE(vmx_exit_reasons);
+ kvm->exit_reasons_isa = "VMX";
+ }
+
+ return 0;
+}
+
+static bool verify_vcpu(int vcpu)
+{
+ if (vcpu != -1 && vcpu < 0) {
+ pr_err("Invalid vcpu:%d.\n", vcpu);
+ return false;
+ }
+
+ return true;
+}
+
+#ifdef HAVE_TIMERFD_SUPPORT
+/* keeping the max events to a modest level to keep
+ * the processing of samples per mmap smooth.
+ */
+#define PERF_KVM__MAX_EVENTS_PER_MMAP 25
+
+static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx,
+ u64 *mmap_time)
+{
+ union perf_event *event;
+ struct perf_sample sample;
+ s64 n = 0;
+ int err;
+
+ *mmap_time = ULLONG_MAX;
+ while ((event = perf_evlist__mmap_read(kvm->evlist, idx)) != NULL) {
+ err = perf_evlist__parse_sample(kvm->evlist, event, &sample);
+ if (err) {
+ perf_evlist__mmap_consume(kvm->evlist, idx);
+ pr_err("Failed to parse sample\n");
+ return -1;
+ }
+
+ err = perf_session_queue_event(kvm->session, event, &sample, 0);
+ /*
+ * FIXME: Here we can't consume the event, as perf_session_queue_event will
+ * point to it, and it'll get possibly overwritten by the kernel.
+ */
+ perf_evlist__mmap_consume(kvm->evlist, idx);
+
+ if (err) {
+ pr_err("Failed to enqueue sample: %d\n", err);
+ return -1;
+ }
+
+ /* save time stamp of our first sample for this mmap */
+ if (n == 0)
+ *mmap_time = sample.time;
+
+ /* limit events per mmap handled all at once */
+ n++;
+ if (n == PERF_KVM__MAX_EVENTS_PER_MMAP)
+ break;
+ }
+
+ return n;
+}
+
+static int perf_kvm__mmap_read(struct perf_kvm_stat *kvm)
+{
+ int i, err, throttled = 0;
+ s64 n, ntotal = 0;
+ u64 flush_time = ULLONG_MAX, mmap_time;
+
+ for (i = 0; i < kvm->evlist->nr_mmaps; i++) {
+ n = perf_kvm__mmap_read_idx(kvm, i, &mmap_time);
+ if (n < 0)
+ return -1;
+
+ /* flush time is going to be the minimum of all the individual
+ * mmap times. Essentially, we flush all the samples queued up
+ * from the last pass under our minimal start time -- that leaves
+ * a very small race for samples to come in with a lower timestamp.
+ * The ioctl to return the perf_clock timestamp should close the
+ * race entirely.
+ */
+ if (mmap_time < flush_time)
+ flush_time = mmap_time;
+
+ ntotal += n;
+ if (n == PERF_KVM__MAX_EVENTS_PER_MMAP)
+ throttled = 1;
+ }
+
+ /* flush queue after each round in which we processed events */
+ if (ntotal) {
+ kvm->session->ordered_samples.next_flush = flush_time;
+ err = kvm->tool.finished_round(&kvm->tool, NULL, kvm->session);
+ if (err) {
+ if (kvm->lost_events)
+ pr_info("\nLost events: %" PRIu64 "\n\n",
+ kvm->lost_events);
+ return err;
+ }
+ }
+
+ return throttled;
+}
+
+static volatile int done;
+
+static void sig_handler(int sig __maybe_unused)
+{
+ done = 1;
+}
+
+static int perf_kvm__timerfd_create(struct perf_kvm_stat *kvm)
+{
+ struct itimerspec new_value;
+ int rc = -1;
+
+ kvm->timerfd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK);
+ if (kvm->timerfd < 0) {
+ pr_err("timerfd_create failed\n");
+ goto out;
+ }
+
+ new_value.it_value.tv_sec = kvm->display_time;
+ new_value.it_value.tv_nsec = 0;
+ new_value.it_interval.tv_sec = kvm->display_time;
+ new_value.it_interval.tv_nsec = 0;
+
+ if (timerfd_settime(kvm->timerfd, 0, &new_value, NULL) != 0) {
+ pr_err("timerfd_settime failed: %d\n", errno);
+ close(kvm->timerfd);
+ goto out;
+ }
+
+ rc = 0;
+out:
+ return rc;
+}
+
+static int perf_kvm__handle_timerfd(struct perf_kvm_stat *kvm)
+{
+ uint64_t c;
+ int rc;
+
+ rc = read(kvm->timerfd, &c, sizeof(uint64_t));
+ if (rc < 0) {
+ if (errno == EAGAIN)
+ return 0;
+
+ pr_err("Failed to read timer fd: %d\n", errno);
+ return -1;
+ }
+
+ if (rc != sizeof(uint64_t)) {
+ pr_err("Error reading timer fd - invalid size returned\n");
+ return -1;
+ }
+
+ if (c != 1)
+ pr_debug("Missed timer beats: %" PRIu64 "\n", c-1);
+
+ /* update display */
+ sort_result(kvm);
+ print_result(kvm);
+
+ /* reset counts */
+ clear_events_cache_stats(kvm->kvm_events_cache);
+ kvm->total_count = 0;
+ kvm->total_time = 0;
+ kvm->lost_events = 0;
+
+ return 0;
+}
+
+static int fd_set_nonblock(int fd)
+{
+ long arg = 0;
+
+ arg = fcntl(fd, F_GETFL);
+ if (arg < 0) {
+ pr_err("Failed to get current flags for fd %d\n", fd);
+ return -1;
+ }
+
+ if (fcntl(fd, F_SETFL, arg | O_NONBLOCK) < 0) {
+ pr_err("Failed to set non-block option on fd %d\n", fd);
+ return -1;
}
- return isa;
+ return 0;
}
-static int read_events(struct perf_kvm *kvm)
+static
+int perf_kvm__handle_stdin(struct termios *tc_now, struct termios *tc_save)
+{
+ int c;
+
+ tcsetattr(0, TCSANOW, tc_now);
+ c = getc(stdin);
+ tcsetattr(0, TCSAFLUSH, tc_save);
+
+ if (c == 'q')
+ return 1;
+
+ return 0;
+}
+
+static int kvm_events_live_report(struct perf_kvm_stat *kvm)
+{
+ struct pollfd *pollfds = NULL;
+ int nr_fds, nr_stdin, ret, err = -EINVAL;
+ struct termios tc, save;
+
+ /* live flag must be set first */
+ kvm->live = true;
+
+ ret = cpu_isa_config(kvm);
+ if (ret < 0)
+ return ret;
+
+ if (!verify_vcpu(kvm->trace_vcpu) ||
+ !select_key(kvm) ||
+ !register_kvm_events_ops(kvm)) {
+ goto out;
+ }
+
+ init_kvm_event_record(kvm);
+
+ tcgetattr(0, &save);
+ tc = save;
+ tc.c_lflag &= ~(ICANON | ECHO);
+ tc.c_cc[VMIN] = 0;
+ tc.c_cc[VTIME] = 0;
+
+ signal(SIGINT, sig_handler);
+ signal(SIGTERM, sig_handler);
+
+ /* copy pollfds -- need to add timerfd and stdin */
+ nr_fds = kvm->evlist->nr_fds;
+ pollfds = zalloc(sizeof(struct pollfd) * (nr_fds + 2));
+ if (!pollfds) {
+ err = -ENOMEM;
+ goto out;
+ }
+ memcpy(pollfds, kvm->evlist->pollfd,
+ sizeof(struct pollfd) * kvm->evlist->nr_fds);
+
+ /* add timer fd */
+ if (perf_kvm__timerfd_create(kvm) < 0) {
+ err = -1;
+ goto out;
+ }
+
+ pollfds[nr_fds].fd = kvm->timerfd;
+ pollfds[nr_fds].events = POLLIN;
+ nr_fds++;
+
+ pollfds[nr_fds].fd = fileno(stdin);
+ pollfds[nr_fds].events = POLLIN;
+ nr_stdin = nr_fds;
+ nr_fds++;
+ if (fd_set_nonblock(fileno(stdin)) != 0)
+ goto out;
+
+ /* everything is good - enable the events and process */
+ perf_evlist__enable(kvm->evlist);
+
+ while (!done) {
+ int rc;
+
+ rc = perf_kvm__mmap_read(kvm);
+ if (rc < 0)
+ break;
+
+ err = perf_kvm__handle_timerfd(kvm);
+ if (err)
+ goto out;
+
+ if (pollfds[nr_stdin].revents & POLLIN)
+ done = perf_kvm__handle_stdin(&tc, &save);
+
+ if (!rc && !done)
+ err = poll(pollfds, nr_fds, 100);
+ }
+
+ perf_evlist__disable(kvm->evlist);
+
+ if (err == 0) {
+ sort_result(kvm);
+ print_result(kvm);
+ }
+
+out:
+ if (kvm->timerfd >= 0)
+ close(kvm->timerfd);
+
+ free(pollfds);
+ return err;
+}
+
+static int kvm_live_open_events(struct perf_kvm_stat *kvm)
+{
+ int err, rc = -1;
+ struct perf_evsel *pos;
+ struct perf_evlist *evlist = kvm->evlist;
+
+ perf_evlist__config(evlist, &kvm->opts);
+
+ /*
+ * Note: exclude_{guest,host} do not apply here.
+ * This command processes KVM tracepoints from host only
+ */
+ evlist__for_each(evlist, pos) {
+ struct perf_event_attr *attr = &pos->attr;
+
+ /* make sure these *are* set */
+ perf_evsel__set_sample_bit(pos, TID);
+ perf_evsel__set_sample_bit(pos, TIME);
+ perf_evsel__set_sample_bit(pos, CPU);
+ perf_evsel__set_sample_bit(pos, RAW);
+ /* make sure these are *not*; want as small a sample as possible */
+ perf_evsel__reset_sample_bit(pos, PERIOD);
+ perf_evsel__reset_sample_bit(pos, IP);
+ perf_evsel__reset_sample_bit(pos, CALLCHAIN);
+ perf_evsel__reset_sample_bit(pos, ADDR);
+ perf_evsel__reset_sample_bit(pos, READ);
+ attr->mmap = 0;
+ attr->comm = 0;
+ attr->task = 0;
+
+ attr->sample_period = 1;
+
+ attr->watermark = 0;
+ attr->wakeup_events = 1000;
+
+ /* will enable all once we are ready */
+ attr->disabled = 1;
+ }
+
+ err = perf_evlist__open(evlist);
+ if (err < 0) {
+ printf("Couldn't create the events: %s\n", strerror(errno));
+ goto out;
+ }
+
+ if (perf_evlist__mmap(evlist, kvm->opts.mmap_pages, false) < 0) {
+ ui__error("Failed to mmap the events: %s\n", strerror(errno));
+ perf_evlist__close(evlist);
+ goto out;
+ }
+
+ rc = 0;
+
+out:
+ return rc;
+}
+#endif
+
+static int read_events(struct perf_kvm_stat *kvm)
{
int ret;
@@ -710,10 +1230,13 @@ static int read_events(struct perf_kvm *kvm)
.comm = perf_event__process_comm,
.ordered_samples = true,
};
+ struct perf_data_file file = {
+ .path = kvm->file_name,
+ .mode = PERF_DATA_MODE_READ,
+ };
kvm->tool = eops;
- kvm->session = perf_session__new(kvm->file_name, O_RDONLY, 0, false,
- &kvm->tool);
+ kvm->session = perf_session__new(&file, false, &kvm->tool);
if (!kvm->session) {
pr_err("Initializing perf session failed\n");
return -EINVAL;
@@ -726,35 +1249,34 @@ static int read_events(struct perf_kvm *kvm)
* Do not use 'isa' recorded in kvm_exit tracepoint since it is not
* traced in the old kernel.
*/
- ret = get_cpu_isa(kvm->session);
-
+ ret = cpu_isa_config(kvm);
if (ret < 0)
return ret;
- if (ret == 1) {
- kvm->exit_reasons = vmx_exit_reasons;
- kvm->exit_reasons_size = ARRAY_SIZE(vmx_exit_reasons);
- kvm->exit_reasons_isa = "VMX";
- }
-
return perf_session__process_events(kvm->session, &kvm->tool);
}
-static bool verify_vcpu(int vcpu)
+static int parse_target_str(struct perf_kvm_stat *kvm)
{
- if (vcpu != -1 && vcpu < 0) {
- pr_err("Invalid vcpu:%d.\n", vcpu);
- return false;
+ if (kvm->pid_str) {
+ kvm->pid_list = intlist__new(kvm->pid_str);
+ if (kvm->pid_list == NULL) {
+ pr_err("Error parsing process id string\n");
+ return -EINVAL;
+ }
}
- return true;
+ return 0;
}
-static int kvm_events_report_vcpu(struct perf_kvm *kvm)
+static int kvm_events_report_vcpu(struct perf_kvm_stat *kvm)
{
int ret = -EINVAL;
int vcpu = kvm->trace_vcpu;
+ if (parse_target_str(kvm) != 0)
+ goto exit;
+
if (!verify_vcpu(vcpu))
goto exit;
@@ -778,16 +1300,11 @@ exit:
return ret;
}
-static const char * const record_args[] = {
- "record",
- "-R",
- "-f",
- "-m", "1024",
- "-c", "1",
- "-e", "kvm:kvm_entry",
- "-e", "kvm:kvm_exit",
- "-e", "kvm:kvm_mmio",
- "-e", "kvm:kvm_pio",
+static const char * const kvm_events_tp[] = {
+ "kvm:kvm_entry",
+ "kvm:kvm_exit",
+ "kvm:kvm_mmio",
+ "kvm:kvm_pio",
};
#define STRDUP_FAIL_EXIT(s) \
@@ -798,12 +1315,20 @@ static const char * const record_args[] = {
_p; \
})
-static int kvm_events_record(struct perf_kvm *kvm, int argc, const char **argv)
+static int
+kvm_events_record(struct perf_kvm_stat *kvm, int argc, const char **argv)
{
unsigned int rec_argc, i, j;
const char **rec_argv;
+ const char * const record_args[] = {
+ "record",
+ "-R",
+ "-m", "1024",
+ "-c", "1",
+ };
- rec_argc = ARRAY_SIZE(record_args) + argc + 2;
+ rec_argc = ARRAY_SIZE(record_args) + argc + 2 +
+ 2 * ARRAY_SIZE(kvm_events_tp);
rec_argv = calloc(rec_argc + 1, sizeof(char *));
if (rec_argv == NULL)
@@ -812,6 +1337,11 @@ static int kvm_events_record(struct perf_kvm *kvm, int argc, const char **argv)
for (i = 0; i < ARRAY_SIZE(record_args); i++)
rec_argv[i] = STRDUP_FAIL_EXIT(record_args[i]);
+ for (j = 0; j < ARRAY_SIZE(kvm_events_tp); j++) {
+ rec_argv[i++] = "-e";
+ rec_argv[i++] = STRDUP_FAIL_EXIT(kvm_events_tp[j]);
+ }
+
rec_argv[i++] = STRDUP_FAIL_EXIT("-o");
rec_argv[i++] = STRDUP_FAIL_EXIT(kvm->file_name);
@@ -821,7 +1351,8 @@ static int kvm_events_record(struct perf_kvm *kvm, int argc, const char **argv)
return cmd_record(i, rec_argv, NULL);
}
-static int kvm_events_report(struct perf_kvm *kvm, int argc, const char **argv)
+static int
+kvm_events_report(struct perf_kvm_stat *kvm, int argc, const char **argv)
{
const struct option kvm_events_report_options[] = {
OPT_STRING(0, "event", &kvm->report_event, "report event",
@@ -831,6 +1362,8 @@ static int kvm_events_report(struct perf_kvm *kvm, int argc, const char **argv)
OPT_STRING('k', "key", &kvm->sort_key, "sort-key",
"key for sorting: sample(sort by samples number)"
" time (sort by avg time)"),
+ OPT_STRING('p', "pid", &kvm->pid_str, "pid",
+ "analyze events only for given process id(s)"),
OPT_END()
};
@@ -853,6 +1386,184 @@ static int kvm_events_report(struct perf_kvm *kvm, int argc, const char **argv)
return kvm_events_report_vcpu(kvm);
}
+#ifdef HAVE_TIMERFD_SUPPORT
+static struct perf_evlist *kvm_live_event_list(void)
+{
+ struct perf_evlist *evlist;
+ char *tp, *name, *sys;
+ unsigned int j;
+ int err = -1;
+
+ evlist = perf_evlist__new();
+ if (evlist == NULL)
+ return NULL;
+
+ for (j = 0; j < ARRAY_SIZE(kvm_events_tp); j++) {
+
+ tp = strdup(kvm_events_tp[j]);
+ if (tp == NULL)
+ goto out;
+
+ /* split tracepoint into subsystem and name */
+ sys = tp;
+ name = strchr(tp, ':');
+ if (name == NULL) {
+ pr_err("Error parsing %s tracepoint: subsystem delimiter not found\n",
+ kvm_events_tp[j]);
+ free(tp);
+ goto out;
+ }
+ *name = '\0';
+ name++;
+
+ if (perf_evlist__add_newtp(evlist, sys, name, NULL)) {
+ pr_err("Failed to add %s tracepoint to the list\n", kvm_events_tp[j]);
+ free(tp);
+ goto out;
+ }
+
+ free(tp);
+ }
+
+ err = 0;
+
+out:
+ if (err) {
+ perf_evlist__delete(evlist);
+ evlist = NULL;
+ }
+
+ return evlist;
+}
+
+static int kvm_events_live(struct perf_kvm_stat *kvm,
+ int argc, const char **argv)
+{
+ char errbuf[BUFSIZ];
+ int err;
+
+ const struct option live_options[] = {
+ OPT_STRING('p', "pid", &kvm->opts.target.pid, "pid",
+ "record events on existing process id"),
+ OPT_CALLBACK('m', "mmap-pages", &kvm->opts.mmap_pages, "pages",
+ "number of mmap data pages",
+ perf_evlist__parse_mmap_pages),
+ OPT_INCR('v', "verbose", &verbose,
+ "be more verbose (show counter open errors, etc)"),
+ OPT_BOOLEAN('a', "all-cpus", &kvm->opts.target.system_wide,
+ "system-wide collection from all CPUs"),
+ OPT_UINTEGER('d', "display", &kvm->display_time,
+ "time in seconds between display updates"),
+ OPT_STRING(0, "event", &kvm->report_event, "report event",
+ "event for reporting: vmexit, mmio, ioport"),
+ OPT_INTEGER(0, "vcpu", &kvm->trace_vcpu,
+ "vcpu id to report"),
+ OPT_STRING('k', "key", &kvm->sort_key, "sort-key",
+ "key for sorting: sample(sort by samples number)"
+ " time (sort by avg time)"),
+ OPT_U64(0, "duration", &kvm->duration,
+ "show events other than HALT that take longer than duration usecs"),
+ OPT_END()
+ };
+ const char * const live_usage[] = {
+ "perf kvm stat live [<options>]",
+ NULL
+ };
+ struct perf_data_file file = {
+ .mode = PERF_DATA_MODE_WRITE,
+ };
+
+
+ /* event handling */
+ kvm->tool.sample = process_sample_event;
+ kvm->tool.comm = perf_event__process_comm;
+ kvm->tool.exit = perf_event__process_exit;
+ kvm->tool.fork = perf_event__process_fork;
+ kvm->tool.lost = process_lost_event;
+ kvm->tool.ordered_samples = true;
+ perf_tool__fill_defaults(&kvm->tool);
+
+ /* set defaults */
+ kvm->display_time = 1;
+ kvm->opts.user_interval = 1;
+ kvm->opts.mmap_pages = 512;
+ kvm->opts.target.uses_mmap = false;
+ kvm->opts.target.uid_str = NULL;
+ kvm->opts.target.uid = UINT_MAX;
+
+ symbol__init();
+ disable_buildid_cache();
+
+ use_browser = 0;
+ setup_browser(false);
+
+ if (argc) {
+ argc = parse_options(argc, argv, live_options,
+ live_usage, 0);
+ if (argc)
+ usage_with_options(live_usage, live_options);
+ }
+
+ kvm->duration *= NSEC_PER_USEC; /* convert usec to nsec */
+
+ /*
+ * target related setups
+ */
+ err = target__validate(&kvm->opts.target);
+ if (err) {
+ target__strerror(&kvm->opts.target, err, errbuf, BUFSIZ);
+ ui__warning("%s", errbuf);
+ }
+
+ if (target__none(&kvm->opts.target))
+ kvm->opts.target.system_wide = true;
+
+
+ /*
+ * generate the event list
+ */
+ kvm->evlist = kvm_live_event_list();
+ if (kvm->evlist == NULL) {
+ err = -1;
+ goto out;
+ }
+
+ symbol_conf.nr_events = kvm->evlist->nr_entries;
+
+ if (perf_evlist__create_maps(kvm->evlist, &kvm->opts.target) < 0)
+ usage_with_options(live_usage, live_options);
+
+ /*
+ * perf session
+ */
+ kvm->session = perf_session__new(&file, false, &kvm->tool);
+ if (kvm->session == NULL) {
+ err = -ENOMEM;
+ goto out;
+ }
+ kvm->session->evlist = kvm->evlist;
+ perf_session__set_id_hdr_size(kvm->session);
+ machine__synthesize_threads(&kvm->session->machines.host, &kvm->opts.target,
+ kvm->evlist->threads, false);
+ err = kvm_live_open_events(kvm);
+ if (err)
+ goto out;
+
+ err = kvm_events_live_report(kvm);
+
+out:
+ exit_browser(0);
+
+ if (kvm->session)
+ perf_session__delete(kvm->session);
+ kvm->session = NULL;
+ if (kvm->evlist)
+ perf_evlist__delete(kvm->evlist);
+
+ return err;
+}
+#endif
+
static void print_kvm_stat_usage(void)
{
printf("Usage: perf kvm stat <command>\n\n");
@@ -860,28 +1571,47 @@ static void print_kvm_stat_usage(void)
printf("# Available commands:\n");
printf("\trecord: record kvm events\n");
printf("\treport: report statistical data of kvm events\n");
+ printf("\tlive: live reporting of statistical data of kvm events\n");
printf("\nOtherwise, it is the alias of 'perf stat':\n");
}
-static int kvm_cmd_stat(struct perf_kvm *kvm, int argc, const char **argv)
+static int kvm_cmd_stat(const char *file_name, int argc, const char **argv)
{
+ struct perf_kvm_stat kvm = {
+ .file_name = file_name,
+
+ .trace_vcpu = -1,
+ .report_event = "vmexit",
+ .sort_key = "sample",
+
+ .exit_reasons = svm_exit_reasons,
+ .exit_reasons_size = ARRAY_SIZE(svm_exit_reasons),
+ .exit_reasons_isa = "SVM",
+ };
+
if (argc == 1) {
print_kvm_stat_usage();
goto perf_stat;
}
if (!strncmp(argv[1], "rec", 3))
- return kvm_events_record(kvm, argc - 1, argv + 1);
+ return kvm_events_record(&kvm, argc - 1, argv + 1);
if (!strncmp(argv[1], "rep", 3))
- return kvm_events_report(kvm, argc - 1 , argv + 1);
+ return kvm_events_report(&kvm, argc - 1 , argv + 1);
+
+#ifdef HAVE_TIMERFD_SUPPORT
+ if (!strncmp(argv[1], "live", 4))
+ return kvm_events_live(&kvm, argc - 1 , argv + 1);
+#endif
perf_stat:
return cmd_stat(argc, argv, NULL);
}
+#endif
-static int __cmd_record(struct perf_kvm *kvm, int argc, const char **argv)
+static int __cmd_record(const char *file_name, int argc, const char **argv)
{
int rec_argc, i = 0, j;
const char **rec_argv;
@@ -890,7 +1620,7 @@ static int __cmd_record(struct perf_kvm *kvm, int argc, const char **argv)
rec_argv = calloc(rec_argc + 1, sizeof(char *));
rec_argv[i++] = strdup("record");
rec_argv[i++] = strdup("-o");
- rec_argv[i++] = strdup(kvm->file_name);
+ rec_argv[i++] = strdup(file_name);
for (j = 1; j < argc; j++, i++)
rec_argv[i] = argv[j];
@@ -899,7 +1629,7 @@ static int __cmd_record(struct perf_kvm *kvm, int argc, const char **argv)
return cmd_record(i, rec_argv, NULL);
}
-static int __cmd_report(struct perf_kvm *kvm, int argc, const char **argv)
+static int __cmd_report(const char *file_name, int argc, const char **argv)
{
int rec_argc, i = 0, j;
const char **rec_argv;
@@ -908,7 +1638,7 @@ static int __cmd_report(struct perf_kvm *kvm, int argc, const char **argv)
rec_argv = calloc(rec_argc + 1, sizeof(char *));
rec_argv[i++] = strdup("report");
rec_argv[i++] = strdup("-i");
- rec_argv[i++] = strdup(kvm->file_name);
+ rec_argv[i++] = strdup(file_name);
for (j = 1; j < argc; j++, i++)
rec_argv[i] = argv[j];
@@ -917,7 +1647,8 @@ static int __cmd_report(struct perf_kvm *kvm, int argc, const char **argv)
return cmd_report(i, rec_argv, NULL);
}
-static int __cmd_buildid_list(struct perf_kvm *kvm, int argc, const char **argv)
+static int
+__cmd_buildid_list(const char *file_name, int argc, const char **argv)
{
int rec_argc, i = 0, j;
const char **rec_argv;
@@ -926,7 +1657,7 @@ static int __cmd_buildid_list(struct perf_kvm *kvm, int argc, const char **argv)
rec_argv = calloc(rec_argc + 1, sizeof(char *));
rec_argv[i++] = strdup("buildid-list");
rec_argv[i++] = strdup("-i");
- rec_argv[i++] = strdup(kvm->file_name);
+ rec_argv[i++] = strdup(file_name);
for (j = 1; j < argc; j++, i++)
rec_argv[i] = argv[j];
@@ -937,20 +1668,11 @@ static int __cmd_buildid_list(struct perf_kvm *kvm, int argc, const char **argv)
int cmd_kvm(int argc, const char **argv, const char *prefix __maybe_unused)
{
- struct perf_kvm kvm = {
- .trace_vcpu = -1,
- .report_event = "vmexit",
- .sort_key = "sample",
-
- .exit_reasons = svm_exit_reasons,
- .exit_reasons_size = ARRAY_SIZE(svm_exit_reasons),
- .exit_reasons_isa = "SVM",
- };
-
+ const char *file_name = NULL;
const struct option kvm_options[] = {
- OPT_STRING('i', "input", &kvm.file_name, "file",
+ OPT_STRING('i', "input", &file_name, "file",
"Input file name"),
- OPT_STRING('o', "output", &kvm.file_name, "file",
+ OPT_STRING('o', "output", &file_name, "file",
"Output file name"),
OPT_BOOLEAN(0, "guest", &perf_guest,
"Collect guest os data"),
@@ -965,52 +1687,49 @@ int cmd_kvm(int argc, const char **argv, const char *prefix __maybe_unused)
"file", "file saving guest os /proc/kallsyms"),
OPT_STRING(0, "guestmodules", &symbol_conf.default_guest_modules,
"file", "file saving guest os /proc/modules"),
+ OPT_INCR('v', "verbose", &verbose,
+ "be more verbose (show counter open errors, etc)"),
OPT_END()
};
-
- const char * const kvm_usage[] = {
- "perf kvm [<options>] {top|record|report|diff|buildid-list|stat}",
- NULL
- };
+ const char *const kvm_subcommands[] = { "top", "record", "report", "diff",
+ "buildid-list", "stat", NULL };
+ const char *kvm_usage[] = { NULL, NULL };
perf_host = 0;
perf_guest = 1;
- argc = parse_options(argc, argv, kvm_options, kvm_usage,
- PARSE_OPT_STOP_AT_NON_OPTION);
+ argc = parse_options_subcommand(argc, argv, kvm_options, kvm_subcommands, kvm_usage,
+ PARSE_OPT_STOP_AT_NON_OPTION);
if (!argc)
usage_with_options(kvm_usage, kvm_options);
if (!perf_host)
perf_guest = 1;
- if (!kvm.file_name) {
- if (perf_host && !perf_guest)
- kvm.file_name = strdup("perf.data.host");
- else if (!perf_host && perf_guest)
- kvm.file_name = strdup("perf.data.guest");
- else
- kvm.file_name = strdup("perf.data.kvm");
+ if (!file_name) {
+ file_name = get_filename_for_perf_kvm();
- if (!kvm.file_name) {
+ if (!file_name) {
pr_err("Failed to allocate memory for filename\n");
return -ENOMEM;
}
}
if (!strncmp(argv[0], "rec", 3))
- return __cmd_record(&kvm, argc, argv);
+ return __cmd_record(file_name, argc, argv);
else if (!strncmp(argv[0], "rep", 3))
- return __cmd_report(&kvm, argc, argv);
+ return __cmd_report(file_name, argc, argv);
else if (!strncmp(argv[0], "diff", 4))
return cmd_diff(argc, argv, NULL);
else if (!strncmp(argv[0], "top", 3))
return cmd_top(argc, argv, NULL);
else if (!strncmp(argv[0], "buildid-list", 12))
- return __cmd_buildid_list(&kvm, argc, argv);
+ return __cmd_buildid_list(file_name, argc, argv);
+#if defined(__i386__) || defined(__x86_64__)
else if (!strncmp(argv[0], "stat", 4))
- return kvm_cmd_stat(&kvm, argc, argv);
+ return kvm_cmd_stat(file_name, argc, argv);
+#endif
else
usage_with_options(kvm_usage, kvm_options);
diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c
index 1948eceb517..011195e38f2 100644
--- a/tools/perf/builtin-list.c
+++ b/tools/perf/builtin-list.c
@@ -13,49 +13,64 @@
#include "util/parse-events.h"
#include "util/cache.h"
+#include "util/pmu.h"
+#include "util/parse-options.h"
int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused)
{
+ int i;
+ const struct option list_options[] = {
+ OPT_END()
+ };
+ const char * const list_usage[] = {
+ "perf list [hw|sw|cache|tracepoint|pmu|event_glob]",
+ NULL
+ };
+
+ argc = parse_options(argc, argv, list_options, list_usage,
+ PARSE_OPT_STOP_AT_NON_OPTION);
+
setup_pager();
- if (argc == 1)
+ if (argc == 0) {
print_events(NULL, false);
- else {
- int i;
-
- for (i = 1; i < argc; ++i) {
- if (i > 2)
- putchar('\n');
- if (strncmp(argv[i], "tracepoint", 10) == 0)
- print_tracepoint_events(NULL, NULL, false);
- else if (strcmp(argv[i], "hw") == 0 ||
- strcmp(argv[i], "hardware") == 0)
- print_events_type(PERF_TYPE_HARDWARE);
- else if (strcmp(argv[i], "sw") == 0 ||
- strcmp(argv[i], "software") == 0)
- print_events_type(PERF_TYPE_SOFTWARE);
- else if (strcmp(argv[i], "cache") == 0 ||
- strcmp(argv[i], "hwcache") == 0)
- print_hwcache_events(NULL, false);
- else if (strcmp(argv[i], "--raw-dump") == 0)
- print_events(NULL, true);
- else {
- char *sep = strchr(argv[i], ':'), *s;
- int sep_idx;
-
- if (sep == NULL) {
- print_events(argv[i], false);
- continue;
- }
- sep_idx = sep - argv[i];
- s = strdup(argv[i]);
- if (s == NULL)
- return -1;
-
- s[sep_idx] = '\0';
- print_tracepoint_events(s, s + sep_idx + 1, false);
- free(s);
+ return 0;
+ }
+
+ for (i = 0; i < argc; ++i) {
+ if (i)
+ putchar('\n');
+ if (strncmp(argv[i], "tracepoint", 10) == 0)
+ print_tracepoint_events(NULL, NULL, false);
+ else if (strcmp(argv[i], "hw") == 0 ||
+ strcmp(argv[i], "hardware") == 0)
+ print_events_type(PERF_TYPE_HARDWARE);
+ else if (strcmp(argv[i], "sw") == 0 ||
+ strcmp(argv[i], "software") == 0)
+ print_events_type(PERF_TYPE_SOFTWARE);
+ else if (strcmp(argv[i], "cache") == 0 ||
+ strcmp(argv[i], "hwcache") == 0)
+ print_hwcache_events(NULL, false);
+ else if (strcmp(argv[i], "pmu") == 0)
+ print_pmu_events(NULL, false);
+ else if (strcmp(argv[i], "--raw-dump") == 0)
+ print_events(NULL, true);
+ else {
+ char *sep = strchr(argv[i], ':'), *s;
+ int sep_idx;
+
+ if (sep == NULL) {
+ print_events(argv[i], false);
+ continue;
}
+ sep_idx = sep - argv[i];
+ s = strdup(argv[i]);
+ if (s == NULL)
+ return -1;
+
+ s[sep_idx] = '\0';
+ print_tracepoint_events(s, s + sep_idx + 1, false);
+ free(s);
}
}
return 0;
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index 6f5f328157a..6148afc995c 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -15,6 +15,7 @@
#include "util/debug.h"
#include "util/session.h"
#include "util/tool.h"
+#include "util/data.h"
#include <sys/types.h>
#include <sys/prctl.h>
@@ -56,7 +57,9 @@ struct lock_stat {
unsigned int nr_readlock;
unsigned int nr_trylock;
+
/* these times are in nano sec. */
+ u64 avg_wait_time;
u64 wait_time_total;
u64 wait_time_min;
u64 wait_time_max;
@@ -208,6 +211,7 @@ static struct thread_stat *thread_stat_findnew_first(u32 tid)
SINGLE_KEY(nr_acquired)
SINGLE_KEY(nr_contended)
+SINGLE_KEY(avg_wait_time)
SINGLE_KEY(wait_time_total)
SINGLE_KEY(wait_time_max)
@@ -244,6 +248,7 @@ static struct rb_root result; /* place to store sorted data */
struct lock_key keys[] = {
DEF_KEY_LOCK(acquired, nr_acquired),
DEF_KEY_LOCK(contended, nr_contended),
+ DEF_KEY_LOCK(avg_wait, avg_wait_time),
DEF_KEY_LOCK(wait_total, wait_time_total),
DEF_KEY_LOCK(wait_min, wait_time_min),
DEF_KEY_LOCK(wait_max, wait_time_max),
@@ -321,10 +326,12 @@ static struct lock_stat *lock_stat_findnew(void *addr, const char *name)
new->addr = addr;
new->name = zalloc(sizeof(char) * strlen(name) + 1);
- if (!new->name)
+ if (!new->name) {
+ free(new);
goto alloc_failed;
- strcpy(new->name, name);
+ }
+ strcpy(new->name, name);
new->wait_time_min = ULLONG_MAX;
list_add(&new->hash_entry, entry);
@@ -335,8 +342,6 @@ alloc_failed:
return NULL;
}
-static const char *input_name;
-
struct trace_lock_handler {
int (*acquire_event)(struct perf_evsel *evsel,
struct perf_sample *sample);
@@ -402,17 +407,17 @@ static int report_lock_acquire_event(struct perf_evsel *evsel,
ls = lock_stat_findnew(addr, name);
if (!ls)
- return -1;
+ return -ENOMEM;
if (ls->discard)
return 0;
ts = thread_stat_findnew(sample->tid);
if (!ts)
- return -1;
+ return -ENOMEM;
seq = get_seq(ts, addr);
if (!seq)
- return -1;
+ return -ENOMEM;
switch (seq->state) {
case SEQ_STATE_UNINITIALIZED:
@@ -448,7 +453,6 @@ broken:
list_del(&seq->list);
free(seq);
goto end;
- break;
default:
BUG_ON("Unknown state of lock sequence found!\n");
break;
@@ -475,17 +479,17 @@ static int report_lock_acquired_event(struct perf_evsel *evsel,
ls = lock_stat_findnew(addr, name);
if (!ls)
- return -1;
+ return -ENOMEM;
if (ls->discard)
return 0;
ts = thread_stat_findnew(sample->tid);
if (!ts)
- return -1;
+ return -ENOMEM;
seq = get_seq(ts, addr);
if (!seq)
- return -1;
+ return -ENOMEM;
switch (seq->state) {
case SEQ_STATE_UNINITIALIZED:
@@ -510,8 +514,6 @@ static int report_lock_acquired_event(struct perf_evsel *evsel,
list_del(&seq->list);
free(seq);
goto end;
- break;
-
default:
BUG_ON("Unknown state of lock sequence found!\n");
break;
@@ -519,6 +521,7 @@ static int report_lock_acquired_event(struct perf_evsel *evsel,
seq->state = SEQ_STATE_ACQUIRED;
ls->nr_acquired++;
+ ls->avg_wait_time = ls->nr_contended ? ls->wait_time_total/ls->nr_contended : 0;
seq->prev_event_time = sample->time;
end:
return 0;
@@ -538,17 +541,17 @@ static int report_lock_contended_event(struct perf_evsel *evsel,
ls = lock_stat_findnew(addr, name);
if (!ls)
- return -1;
+ return -ENOMEM;
if (ls->discard)
return 0;
ts = thread_stat_findnew(sample->tid);
if (!ts)
- return -1;
+ return -ENOMEM;
seq = get_seq(ts, addr);
if (!seq)
- return -1;
+ return -ENOMEM;
switch (seq->state) {
case SEQ_STATE_UNINITIALIZED:
@@ -566,7 +569,6 @@ static int report_lock_contended_event(struct perf_evsel *evsel,
list_del(&seq->list);
free(seq);
goto end;
- break;
default:
BUG_ON("Unknown state of lock sequence found!\n");
break;
@@ -574,6 +576,7 @@ static int report_lock_contended_event(struct perf_evsel *evsel,
seq->state = SEQ_STATE_CONTENDED;
ls->nr_contended++;
+ ls->avg_wait_time = ls->wait_time_total/ls->nr_contended;
seq->prev_event_time = sample->time;
end:
return 0;
@@ -593,22 +596,21 @@ static int report_lock_release_event(struct perf_evsel *evsel,
ls = lock_stat_findnew(addr, name);
if (!ls)
- return -1;
+ return -ENOMEM;
if (ls->discard)
return 0;
ts = thread_stat_findnew(sample->tid);
if (!ts)
- return -1;
+ return -ENOMEM;
seq = get_seq(ts, addr);
if (!seq)
- return -1;
+ return -ENOMEM;
switch (seq->state) {
case SEQ_STATE_UNINITIALIZED:
goto end;
- break;
case SEQ_STATE_ACQUIRED:
break;
case SEQ_STATE_READ_ACQUIRED:
@@ -626,7 +628,6 @@ static int report_lock_release_event(struct perf_evsel *evsel,
ls->discard = 1;
bad_hist[BROKEN_RELEASE]++;
goto free_seq;
- break;
default:
BUG_ON("Unknown state of lock sequence found!\n");
break;
@@ -692,7 +693,7 @@ static void print_bad_events(int bad, int total)
pr_info("\n=== output for debug===\n\n");
pr_info("bad: %d, total: %d\n", bad, total);
- pr_info("bad rate: %f %%\n", (double)bad / (double)total * 100);
+ pr_info("bad rate: %.2f %%\n", (double)bad / (double)total * 100);
pr_info("histogram of events caused bad sequence\n");
for (i = 0; i < BROKEN_MAX; i++)
pr_info(" %10s: %d\n", name[i], bad_hist[i]);
@@ -709,6 +710,7 @@ static void print_result(void)
pr_info("%10s ", "acquired");
pr_info("%10s ", "contended");
+ pr_info("%15s ", "avg wait (ns)");
pr_info("%15s ", "total wait (ns)");
pr_info("%15s ", "max wait (ns)");
pr_info("%15s ", "min wait (ns)");
@@ -740,6 +742,7 @@ static void print_result(void)
pr_info("%10u ", st->nr_acquired);
pr_info("%10u ", st->nr_contended);
+ pr_info("%15" PRIu64 " ", st->avg_wait_time);
pr_info("%15" PRIu64 " ", st->wait_time_total);
pr_info("%15" PRIu64 " ", st->wait_time_max);
pr_info("%15" PRIu64 " ", st->wait_time_min == ULLONG_MAX ?
@@ -764,7 +767,7 @@ static void dump_threads(void)
while (node) {
st = container_of(node, struct thread_stat, rb);
t = perf_session__findnew(session, st->tid);
- pr_info("%10d: %s\n", st->tid, t->comm);
+ pr_info("%10d: %s\n", st->tid, thread__comm_str(t));
node = rb_next(node);
};
}
@@ -807,7 +810,8 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
struct perf_evsel *evsel,
struct machine *machine)
{
- struct thread *thread = machine__findnew_thread(machine, sample->tid);
+ struct thread *thread = machine__findnew_thread(machine, sample->pid,
+ sample->tid);
if (thread == NULL) {
pr_debug("problem processing %d event, skipping it.\n",
@@ -815,14 +819,26 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
return -1;
}
- if (evsel->handler.func != NULL) {
- tracepoint_handler f = evsel->handler.func;
+ if (evsel->handler != NULL) {
+ tracepoint_handler f = evsel->handler;
return f(evsel, sample);
}
return 0;
}
+static void sort_result(void)
+{
+ unsigned int i;
+ struct lock_stat *st;
+
+ for (i = 0; i < LOCKHASH_SIZE; i++) {
+ list_for_each_entry(st, &lockhash_table[i], hash_entry) {
+ insert_to_result(st, compare);
+ }
+ }
+}
+
static const struct perf_evsel_str_handler lock_tracepoints[] = {
{ "lock:lock_acquire", perf_evsel__process_lock_acquire, }, /* CONFIG_LOCKDEP */
{ "lock:lock_acquired", perf_evsel__process_lock_acquired, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */
@@ -830,59 +846,59 @@ static const struct perf_evsel_str_handler lock_tracepoints[] = {
{ "lock:lock_release", perf_evsel__process_lock_release, }, /* CONFIG_LOCKDEP */
};
-static int read_events(void)
+static int __cmd_report(bool display_info)
{
+ int err = -EINVAL;
struct perf_tool eops = {
.sample = process_sample_event,
.comm = perf_event__process_comm,
.ordered_samples = true,
};
- session = perf_session__new(input_name, O_RDONLY, 0, false, &eops);
+ struct perf_data_file file = {
+ .path = input_name,
+ .mode = PERF_DATA_MODE_READ,
+ };
+
+ session = perf_session__new(&file, false, &eops);
if (!session) {
pr_err("Initializing perf session failed\n");
- return -1;
+ return -ENOMEM;
}
+ if (!perf_session__has_traces(session, "lock record"))
+ goto out_delete;
+
if (perf_session__set_tracepoints_handlers(session, lock_tracepoints)) {
pr_err("Initializing perf session tracepoint handlers failed\n");
- return -1;
+ goto out_delete;
}
- return perf_session__process_events(session, &eops);
-}
-
-static void sort_result(void)
-{
- unsigned int i;
- struct lock_stat *st;
+ if (select_key())
+ goto out_delete;
- for (i = 0; i < LOCKHASH_SIZE; i++) {
- list_for_each_entry(st, &lockhash_table[i], hash_entry) {
- insert_to_result(st, compare);
- }
- }
-}
+ err = perf_session__process_events(session, &eops);
+ if (err)
+ goto out_delete;
-static int __cmd_report(void)
-{
setup_pager();
+ if (display_info) /* used for info subcommand */
+ err = dump_info();
+ else {
+ sort_result();
+ print_result();
+ }
- if ((select_key() != 0) ||
- (read_events() != 0))
- return -1;
-
- sort_result();
- print_result();
-
- return 0;
+out_delete:
+ perf_session__delete(session);
+ return err;
}
static int __cmd_record(int argc, const char **argv)
{
const char *record_args[] = {
- "record", "-R", "-f", "-m", "1024", "-c", "1",
+ "record", "-R", "-m", "1024", "-c", "1",
};
- unsigned int rec_argc, i, j;
+ unsigned int rec_argc, i, j, ret;
const char **rec_argv;
for (i = 0; i < ARRAY_SIZE(lock_tracepoints); i++) {
@@ -899,7 +915,7 @@ static int __cmd_record(int argc, const char **argv)
rec_argc += 2 * ARRAY_SIZE(lock_tracepoints);
rec_argv = calloc(rec_argc + 1, sizeof(char *));
- if (rec_argv == NULL)
+ if (!rec_argv)
return -ENOMEM;
for (i = 0; i < ARRAY_SIZE(record_args); i++)
@@ -915,7 +931,9 @@ static int __cmd_record(int argc, const char **argv)
BUG_ON(i != rec_argc);
- return cmd_record(i, rec_argv, NULL);
+ ret = cmd_record(i, rec_argv, NULL);
+ free(rec_argv);
+ return ret;
}
int cmd_lock(int argc, const char **argv, const char *prefix __maybe_unused)
@@ -935,7 +953,7 @@ int cmd_lock(int argc, const char **argv, const char *prefix __maybe_unused)
};
const struct option report_options[] = {
OPT_STRING('k', "key", &sort_key, "acquired",
- "key for sorting (acquired / contended / wait_total / wait_max / wait_min)"),
+ "key for sorting (acquired / contended / avg_wait / wait_total / wait_max / wait_min)"),
/* TODO: type */
OPT_END()
};
@@ -943,8 +961,10 @@ int cmd_lock(int argc, const char **argv, const char *prefix __maybe_unused)
"perf lock info [<options>]",
NULL
};
- const char * const lock_usage[] = {
- "perf lock [<options>] {record|report|script|info}",
+ const char *const lock_subcommands[] = { "record", "report", "script",
+ "info", NULL };
+ const char *lock_usage[] = {
+ NULL,
NULL
};
const char * const report_usage[] = {
@@ -958,8 +978,8 @@ int cmd_lock(int argc, const char **argv, const char *prefix __maybe_unused)
for (i = 0; i < LOCKHASH_SIZE; i++)
INIT_LIST_HEAD(lockhash_table + i);
- argc = parse_options(argc, argv, lock_options, lock_usage,
- PARSE_OPT_STOP_AT_NON_OPTION);
+ argc = parse_options_subcommand(argc, argv, lock_options, lock_subcommands,
+ lock_usage, PARSE_OPT_STOP_AT_NON_OPTION);
if (!argc)
usage_with_options(lock_usage, lock_options);
@@ -973,7 +993,7 @@ int cmd_lock(int argc, const char **argv, const char *prefix __maybe_unused)
if (argc)
usage_with_options(report_usage, report_options);
}
- __cmd_report();
+ rc = __cmd_report(false);
} else if (!strcmp(argv[0], "script")) {
/* Aliased to 'perf script' */
return cmd_script(argc, argv, prefix);
@@ -986,11 +1006,7 @@ int cmd_lock(int argc, const char **argv, const char *prefix __maybe_unused)
}
/* recycling report_lock_ops */
trace_handler = &report_lock_ops;
- setup_pager();
- if (read_events() != 0)
- rc = -1;
- else
- rc = dump_info();
+ rc = __cmd_report(true);
} else {
usage_with_options(lock_usage, lock_options);
}
diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c
new file mode 100644
index 00000000000..4a1a6c94a5e
--- /dev/null
+++ b/tools/perf/builtin-mem.c
@@ -0,0 +1,246 @@
+#include "builtin.h"
+#include "perf.h"
+
+#include "util/parse-options.h"
+#include "util/trace-event.h"
+#include "util/tool.h"
+#include "util/session.h"
+#include "util/data.h"
+
+#define MEM_OPERATION_LOAD "load"
+#define MEM_OPERATION_STORE "store"
+
+static const char *mem_operation = MEM_OPERATION_LOAD;
+
+struct perf_mem {
+ struct perf_tool tool;
+ char const *input_name;
+ bool hide_unresolved;
+ bool dump_raw;
+ const char *cpu_list;
+ DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
+};
+
+static int __cmd_record(int argc, const char **argv)
+{
+ int rec_argc, i = 0, j;
+ const char **rec_argv;
+ char event[64];
+ int ret;
+
+ rec_argc = argc + 4;
+ rec_argv = calloc(rec_argc + 1, sizeof(char *));
+ if (!rec_argv)
+ return -1;
+
+ rec_argv[i++] = strdup("record");
+ if (!strcmp(mem_operation, MEM_OPERATION_LOAD))
+ rec_argv[i++] = strdup("-W");
+ rec_argv[i++] = strdup("-d");
+ rec_argv[i++] = strdup("-e");
+
+ if (strcmp(mem_operation, MEM_OPERATION_LOAD))
+ sprintf(event, "cpu/mem-stores/pp");
+ else
+ sprintf(event, "cpu/mem-loads/pp");
+
+ rec_argv[i++] = strdup(event);
+ for (j = 1; j < argc; j++, i++)
+ rec_argv[i] = argv[j];
+
+ ret = cmd_record(i, rec_argv, NULL);
+ free(rec_argv);
+ return ret;
+}
+
+static int
+dump_raw_samples(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ struct perf_mem *mem = container_of(tool, struct perf_mem, tool);
+ struct addr_location al;
+ const char *fmt;
+
+ if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) {
+ fprintf(stderr, "problem processing %d event, skipping it.\n",
+ event->header.type);
+ return -1;
+ }
+
+ if (al.filtered || (mem->hide_unresolved && al.sym == NULL))
+ return 0;
+
+ if (al.map != NULL)
+ al.map->dso->hit = 1;
+
+ if (symbol_conf.field_sep) {
+ fmt = "%d%s%d%s0x%"PRIx64"%s0x%"PRIx64"%s%"PRIu64
+ "%s0x%"PRIx64"%s%s:%s\n";
+ } else {
+ fmt = "%5d%s%5d%s0x%016"PRIx64"%s0x016%"PRIx64
+ "%s%5"PRIu64"%s0x%06"PRIx64"%s%s:%s\n";
+ symbol_conf.field_sep = " ";
+ }
+
+ printf(fmt,
+ sample->pid,
+ symbol_conf.field_sep,
+ sample->tid,
+ symbol_conf.field_sep,
+ sample->ip,
+ symbol_conf.field_sep,
+ sample->addr,
+ symbol_conf.field_sep,
+ sample->weight,
+ symbol_conf.field_sep,
+ sample->data_src,
+ symbol_conf.field_sep,
+ al.map ? (al.map->dso ? al.map->dso->long_name : "???") : "???",
+ al.sym ? al.sym->name : "???");
+
+ return 0;
+}
+
+static int process_sample_event(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_evsel *evsel __maybe_unused,
+ struct machine *machine)
+{
+ return dump_raw_samples(tool, event, sample, machine);
+}
+
+static int report_raw_events(struct perf_mem *mem)
+{
+ struct perf_data_file file = {
+ .path = input_name,
+ .mode = PERF_DATA_MODE_READ,
+ };
+ int err = -EINVAL;
+ int ret;
+ struct perf_session *session = perf_session__new(&file, false,
+ &mem->tool);
+
+ if (session == NULL)
+ return -ENOMEM;
+
+ if (mem->cpu_list) {
+ ret = perf_session__cpu_bitmap(session, mem->cpu_list,
+ mem->cpu_bitmap);
+ if (ret)
+ goto out_delete;
+ }
+
+ if (symbol__init() < 0)
+ return -1;
+
+ printf("# PID, TID, IP, ADDR, LOCAL WEIGHT, DSRC, SYMBOL\n");
+
+ err = perf_session__process_events(session, &mem->tool);
+ if (err)
+ return err;
+
+ return 0;
+
+out_delete:
+ perf_session__delete(session);
+ return err;
+}
+
+static int report_events(int argc, const char **argv, struct perf_mem *mem)
+{
+ const char **rep_argv;
+ int ret, i = 0, j, rep_argc;
+
+ if (mem->dump_raw)
+ return report_raw_events(mem);
+
+ rep_argc = argc + 3;
+ rep_argv = calloc(rep_argc + 1, sizeof(char *));
+ if (!rep_argv)
+ return -1;
+
+ rep_argv[i++] = strdup("report");
+ rep_argv[i++] = strdup("--mem-mode");
+ rep_argv[i++] = strdup("-n"); /* display number of samples */
+
+ /*
+ * there is no weight (cost) associated with stores, so don't print
+ * the column
+ */
+ if (strcmp(mem_operation, MEM_OPERATION_LOAD))
+ rep_argv[i++] = strdup("--sort=mem,sym,dso,symbol_daddr,"
+ "dso_daddr,tlb,locked");
+
+ for (j = 1; j < argc; j++, i++)
+ rep_argv[i] = argv[j];
+
+ ret = cmd_report(i, rep_argv, NULL);
+ free(rep_argv);
+ return ret;
+}
+
+int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused)
+{
+ struct stat st;
+ struct perf_mem mem = {
+ .tool = {
+ .sample = process_sample_event,
+ .mmap = perf_event__process_mmap,
+ .mmap2 = perf_event__process_mmap2,
+ .comm = perf_event__process_comm,
+ .lost = perf_event__process_lost,
+ .fork = perf_event__process_fork,
+ .build_id = perf_event__process_build_id,
+ .ordered_samples = true,
+ },
+ .input_name = "perf.data",
+ };
+ const struct option mem_options[] = {
+ OPT_STRING('t', "type", &mem_operation,
+ "type", "memory operations(load/store)"),
+ OPT_BOOLEAN('D', "dump-raw-samples", &mem.dump_raw,
+ "dump raw samples in ASCII"),
+ OPT_BOOLEAN('U', "hide-unresolved", &mem.hide_unresolved,
+ "Only display entries resolved to a symbol"),
+ OPT_STRING('i', "input", &input_name, "file",
+ "input file name"),
+ OPT_STRING('C', "cpu", &mem.cpu_list, "cpu",
+ "list of cpus to profile"),
+ OPT_STRING('x', "field-separator", &symbol_conf.field_sep,
+ "separator",
+ "separator for columns, no spaces will be added"
+ " between columns '.' is reserved."),
+ OPT_END()
+ };
+ const char *const mem_subcommands[] = { "record", "report", NULL };
+ const char *mem_usage[] = {
+ NULL,
+ NULL
+ };
+
+
+ argc = parse_options_subcommand(argc, argv, mem_options, mem_subcommands,
+ mem_usage, PARSE_OPT_STOP_AT_NON_OPTION);
+
+ if (!argc || !(strncmp(argv[0], "rec", 3) || mem_operation))
+ usage_with_options(mem_usage, mem_options);
+
+ if (!mem.input_name || !strlen(mem.input_name)) {
+ if (!fstat(STDIN_FILENO, &st) && S_ISFIFO(st.st_mode))
+ mem.input_name = "-";
+ else
+ mem.input_name = "perf.data";
+ }
+
+ if (!strncmp(argv[0], "rec", 3))
+ return __cmd_record(argc, argv);
+ else if (!strncmp(argv[0], "rep", 3))
+ return report_events(argc, argv, &mem);
+ else
+ usage_with_options(mem_usage, mem_options);
+
+ return 0;
+}
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c