aboutsummaryrefslogtreecommitdiff
path: root/drivers/pci/ats.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/pci/ats.c')
-rw-r--r--drivers/pci/ats.c84
1 files changed, 1 insertions, 83 deletions
diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c
index 95655d7c0d0..a8099d4d0c9 100644
--- a/drivers/pci/ats.c
+++ b/drivers/pci/ats.c
@@ -235,27 +235,6 @@ void pci_disable_pri(struct pci_dev *pdev)
EXPORT_SYMBOL_GPL(pci_disable_pri);
/**
- * pci_pri_enabled - Checks if PRI capability is enabled
- * @pdev: PCI device structure
- *
- * Returns true if PRI is enabled on the device, false otherwise
- */
-bool pci_pri_enabled(struct pci_dev *pdev)
-{
- u16 control;
- int pos;
-
- pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
- if (!pos)
- return false;
-
- pci_read_config_word(pdev, pos + PCI_PRI_CTRL, &control);
-
- return (control & PCI_PRI_CTRL_ENABLE) ? true : false;
-}
-EXPORT_SYMBOL_GPL(pci_pri_enabled);
-
-/**
* pci_reset_pri - Resets device's PRI state
* @pdev: PCI device structure
*
@@ -282,67 +261,6 @@ int pci_reset_pri(struct pci_dev *pdev)
return 0;
}
EXPORT_SYMBOL_GPL(pci_reset_pri);
-
-/**
- * pci_pri_stopped - Checks whether the PRI capability is stopped
- * @pdev: PCI device structure
- *
- * Returns true if the PRI capability on the device is disabled and the
- * device has no outstanding PRI requests, false otherwise. The device
- * indicates this via the STOPPED bit in the status register of the
- * capability.
- * The device internal state can be cleared by resetting the PRI state
- * with pci_reset_pri(). This can force the capability into the STOPPED
- * state.
- */
-bool pci_pri_stopped(struct pci_dev *pdev)
-{
- u16 control, status;
- int pos;
-
- pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
- if (!pos)
- return true;
-
- pci_read_config_word(pdev, pos + PCI_PRI_CTRL, &control);
- pci_read_config_word(pdev, pos + PCI_PRI_STATUS, &status);
-
- if (control & PCI_PRI_CTRL_ENABLE)
- return false;
-
- return (status & PCI_PRI_STATUS_STOPPED) ? true : false;
-}
-EXPORT_SYMBOL_GPL(pci_pri_stopped);
-
-/**
- * pci_pri_status - Request PRI status of a device
- * @pdev: PCI device structure
- *
- * Returns negative value on failure, status on success. The status can
- * be checked against status-bits. Supported bits are currently:
- * PCI_PRI_STATUS_RF: Response failure
- * PCI_PRI_STATUS_UPRGI: Unexpected Page Request Group Index
- * PCI_PRI_STATUS_STOPPED: PRI has stopped
- */
-int pci_pri_status(struct pci_dev *pdev)
-{
- u16 status, control;
- int pos;
-
- pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI);
- if (!pos)
- return -EINVAL;
-
- pci_read_config_word(pdev, pos + PCI_PRI_CTRL, &control);
- pci_read_config_word(pdev, pos + PCI_PRI_STATUS, &status);
-
- /* Stopped bit is undefined when enable == 1, so clear it */
- if (control & PCI_PRI_CTRL_ENABLE)
- status &= ~PCI_PRI_STATUS_STOPPED;
-
- return status;
-}
-EXPORT_SYMBOL_GPL(pci_pri_status);
#endif /* CONFIG_PCI_PRI */
#ifdef CONFIG_PCI_PASID
@@ -410,7 +328,7 @@ EXPORT_SYMBOL_GPL(pci_disable_pasid);
* Otherwise is returns a bitmask with supported features. Current
* features reported are:
* PCI_PASID_CAP_EXEC - Execute permission supported
- * PCI_PASID_CAP_PRIV - Priviledged mode supported
+ * PCI_PASID_CAP_PRIV - Privileged mode supported
*/
int pci_pasid_features(struct pci_dev *pdev)
{
tion/perf-stat.txt10
-rw-r--r--tools/perf/Documentation/perf-timechart.txt52
-rw-r--r--tools/perf/Documentation/perf-top.txt72
-rw-r--r--tools/perf/Documentation/perf-trace.txt61
-rw-r--r--tools/perf/MANIFEST9
-rw-r--r--tools/perf/Makefile843
-rw-r--r--tools/perf/Makefile.perf938
-rw-r--r--tools/perf/arch/arm/Makefile10
-rw-r--r--tools/perf/arch/arm/include/perf_regs.h59
-rw-r--r--tools/perf/arch/arm/tests/dwarf-unwind.c60
-rw-r--r--tools/perf/arch/arm/tests/regs_load.S58
-rw-r--r--tools/perf/arch/arm/util/unwind-libdw.c36
-rw-r--r--tools/perf/arch/arm/util/unwind-libunwind.c48
-rw-r--r--tools/perf/arch/arm64/Makefile7
-rw-r--r--tools/perf/arch/arm64/include/perf_regs.h88
-rw-r--r--tools/perf/arch/arm64/util/dwarf-regs.c80
-rw-r--r--tools/perf/arch/arm64/util/unwind-libunwind.c82
-rw-r--r--tools/perf/arch/common.c3
-rw-r--r--tools/perf/arch/x86/Makefile11
-rw-r--r--tools/perf/arch/x86/include/perf_regs.h14
-rw-r--r--tools/perf/arch/x86/tests/dwarf-unwind.c60
-rw-r--r--tools/perf/arch/x86/tests/regs_load.S98
-rw-r--r--tools/perf/arch/x86/util/tsc.c59
-rw-r--r--tools/perf/arch/x86/util/tsc.h20
-rw-r--r--tools/perf/arch/x86/util/unwind-libdw.c51
-rw-r--r--tools/perf/arch/x86/util/unwind-libunwind.c (renamed from tools/perf/arch/x86/util/unwind.c)8
-rw-r--r--tools/perf/bash_completion62
-rw-r--r--tools/perf/bench/bench.h3
-rw-r--r--tools/perf/bench/futex-hash.c212
-rw-r--r--tools/perf/bench/futex-requeue.c211
-rw-r--r--tools/perf/bench/futex-wake.c201
-rw-r--r--tools/perf/bench/futex.h71
-rw-r--r--tools/perf/bench/mem-memcpy-arch.h2
-rw-r--r--tools/perf/bench/mem-memcpy.c8
-rw-r--r--tools/perf/bench/mem-memset-arch.h2
-rw-r--r--tools/perf/bench/mem-memset.c4
-rw-r--r--tools/perf/bench/numa.c43
-rw-r--r--tools/perf/bench/sched-pipe.c115
-rw-r--r--tools/perf/builtin-annotate.c74
-rw-r--r--tools/perf/builtin-bench.c255
-rw-r--r--tools/perf/builtin-buildid-cache.c181
-rw-r--r--tools/perf/builtin-buildid-list.c11
-rw-r--r--tools/perf/builtin-diff.c818
-rw-r--r--tools/perf/builtin-evlist.c9
-rw-r--r--tools/perf/builtin-inject.c180
-rw-r--r--tools/perf/builtin-kmem.c104
-rw-r--r--tools/perf/builtin-kvm.c795
-rw-r--r--tools/perf/builtin-list.c87
-rw-r--r--tools/perf/builtin-lock.c152
-rw-r--r--tools/perf/builtin-mem.c36
-rw-r--r--tools/perf/builtin-probe.c108
-rw-r--r--tools/perf/builtin-record.c674
-rw-r--r--tools/perf/builtin-report.c738
-rw-r--r--tools/perf/builtin-sched.c230
-rw-r--r--tools/perf/builtin-script.c394
-rw-r--r--tools/perf/builtin-stat.c462
-rw-r--r--tools/perf/builtin-timechart.c865
-rw-r--r--tools/perf/builtin-top.c328
-rw-r--r--tools/perf/builtin-trace.c2074
-rw-r--r--tools/perf/config/Makefile615
-rw-r--r--tools/perf/config/Makefile.arch23
-rw-r--r--tools/perf/config/feature-checks/.gitignore2
-rw-r--r--tools/perf/config/feature-checks/Makefile149
-rw-r--r--tools/perf/config/feature-checks/test-all.c116
-rw-r--r--tools/perf/config/feature-checks/test-backtrace.c13
-rw-r--r--tools/perf/config/feature-checks/test-bionic.c6
-rw-r--r--tools/perf/config/feature-checks/test-cplus-demangle.c14
-rw-r--r--tools/perf/config/feature-checks/test-dwarf.c10
-rw-r--r--tools/perf/config/feature-checks/test-fortify-source.c6
-rw-r--r--tools/perf/config/feature-checks/test-glibc.c8
-rw-r--r--tools/perf/config/feature-checks/test-gtk2-infobar.c11
-rw-r--r--tools/perf/config/feature-checks/test-gtk2.c10
-rw-r--r--tools/perf/config/feature-checks/test-hello.c6
-rw-r--r--tools/perf/config/feature-checks/test-libaudit.c10
-rw-r--r--tools/perf/config/feature-checks/test-libbfd.c15
-rw-r--r--tools/perf/config/feature-checks/test-libdw-dwarf-unwind.c13
-rw-r--r--tools/perf/config/feature-checks/test-libelf-getphdrnum.c8
-rw-r--r--tools/perf/config/feature-checks/test-libelf-mmap.c8
-rw-r--r--tools/perf/config/feature-checks/test-libelf.c8
-rw-r--r--tools/perf/config/feature-checks/test-libnuma.c9
-rw-r--r--tools/perf/config/feature-checks/test-libperl.c9
-rw-r--r--tools/perf/config/feature-checks/test-libpython-version.c10
-rw-r--r--tools/perf/config/feature-checks/test-libpython.c8
-rw-r--r--tools/perf/config/feature-checks/test-libslang.c6
-rw-r--r--tools/perf/config/feature-checks/test-libunwind-debug-frame.c16
-rw-r--r--tools/perf/config/feature-checks/test-libunwind.c27
-rw-r--r--tools/perf/config/feature-checks/test-stackprotector-all.c6
-rw-r--r--tools/perf/config/feature-checks/test-timerfd.c18
-rw-r--r--tools/perf/config/feature-tests.mak236
-rw-r--r--tools/perf/config/utilities.mak16
-rw-r--r--tools/perf/design.txt13
-rw-r--r--tools/perf/perf-completion.sh206
-rw-r--r--tools/perf/perf-sys.h190
-rw-r--r--tools/perf/perf.c21
-rw-r--r--tools/perf/perf.h203
-rwxr-xr-xtools/perf/python/twatch.py2
-rw-r--r--tools/perf/scripts/perl/Perf-Trace-Util/Context.xs2
-rw-r--r--tools/perf/scripts/python/Perf-Trace-Util/Context.c6
-rw-r--r--tools/perf/tests/attr.c7
-rw-r--r--tools/perf/tests/attr/README6
-rw-r--r--tools/perf/tests/attr/test-record-graph-default2
-rw-r--r--tools/perf/tests/attr/test-record-graph-dwarf2
-rw-r--r--tools/perf/tests/attr/test-record-graph-fp2
-rw-r--r--tools/perf/tests/attr/test-record-group-sampling36
-rw-r--r--tools/perf/tests/attr/test-record-no-inherit2
-rw-r--r--tools/perf/tests/builtin-test.c92
-rw-r--r--tools/perf/tests/code-reading.c581
-rw-r--r--tools/perf/tests/dso-data.c225
-rw-r--r--tools/perf/tests/dwarf-unwind.c144
-rw-r--r--tools/perf/tests/evsel-roundtrip-name.c2
-rw-r--r--tools/perf/tests/evsel-tp-sched.c11
-rw-r--r--tools/perf/tests/hists_common.c209
-rw-r--r--tools/perf/tests/hists_common.h75
-rw-r--r--tools/perf/tests/hists_cumulate.c726
-rw-r--r--tools/perf/tests/hists_filter.c289
-rw-r--r--tools/perf/tests/hists_link.c239
-rw-r--r--tools/perf/tests/hists_output.c621
-rw-r--r--tools/perf/tests/keep-tracking.c152
-rw-r--r--tools/perf/tests/make145
-rw-r--r--tools/perf/tests/mmap-basic.c30
-rw-r--r--tools/perf/tests/mmap-thread-lookup.c233
-rw-r--r--tools/perf/tests/open-syscall-all-cpus.c2
-rw-r--r--tools/perf/tests/open-syscall-tp-fields.c32
-rw-r--r--tools/perf/tests/open-syscall.c2
-rw-r--r--tools/perf/tests/parse-events.c345
-rw-r--r--tools/perf/tests/parse-no-sample-id-all.c108
-rw-r--r--tools/perf/tests/perf-record.c58
-rwxr-xr-xtools/perf/tests/perf-targz-src-pkg21
-rw-r--r--tools/perf/tests/perf-time-to-tsc.c172
-rw-r--r--tools/perf/tests/rdpmc.c4
-rw-r--r--tools/perf/tests/sample-parsing.c320
-rw-r--r--tools/perf/tests/sw-clock.c35
-rw-r--r--tools/perf/tests/task-exit.c55
-rw-r--r--tools/perf/tests/tests.h29
-rw-r--r--tools/perf/tests/thread-mg-share.c90
-rw-r--r--tools/perf/tests/vmlinux-kallsyms.c57
-rw-r--r--tools/perf/ui/browser.c14
-rw-r--r--tools/perf/ui/browser.h38
-rw-r--r--tools/perf/ui/browsers/annotate.c42
-rw-r--r--tools/perf/ui/browsers/header.c127
-rw-r--r--tools/perf/ui/browsers/hists.c462
-rw-r--r--tools/perf/ui/browsers/map.c40
-rw-r--r--tools/perf/ui/browsers/map.h2
-rw-r--r--tools/perf/ui/browsers/scripts.c11
-rw-r--r--tools/perf/ui/gtk/annotate.c13
-rw-r--r--tools/perf/ui/gtk/browser.c2
-rw-r--r--tools/perf/ui/gtk/gtk.h22
-rw-r--r--tools/perf/ui/gtk/hists.c251
-rw-r--r--tools/perf/ui/gtk/progress.c20
-rw-r--r--tools/perf/ui/gtk/setup.c2
-rw-r--r--tools/perf/ui/gtk/util.c7
-rw-r--r--tools/perf/ui/hist.c677
-rw-r--r--tools/perf/ui/progress.c32
-rw-r--r--tools/perf/ui/progress.h21
-rw-r--r--tools/perf/ui/setup.c64
-rw-r--r--tools/perf/ui/stdio/hist.c165
-rw-r--r--tools/perf/ui/tui/progress.c18
-rw-r--r--tools/perf/ui/tui/setup.c3
-rw-r--r--tools/perf/ui/tui/tui.h6
-rw-r--r--tools/perf/ui/tui/util.c19
-rw-r--r--tools/perf/ui/ui.h14
-rwxr-xr-xtools/perf/util/PERF-VERSION-GEN26
-rw-r--r--tools/perf/util/alias.c6
-rw-r--r--tools/perf/util/annotate.c208
-rw-r--r--tools/perf/util/annotate.h39
-rw-r--r--tools/perf/util/build-id.c18
-rw-r--r--tools/perf/util/build-id.h5
-rw-r--r--tools/perf/util/cache.h3
-rw-r--r--tools/perf/util/callchain.c299
-rw-r--r--tools/perf/util/callchain.h50
-rw-r--r--tools/perf/util/cgroup.c6
-rw-r--r--tools/perf/util/color.c22
-rw-r--r--tools/perf/util/color.h3
-rw-r--r--tools/perf/util/comm.c122
-rw-r--r--tools/perf/util/comm.h21
-rw-r--r--tools/perf/util/config.c4
-rw-r--r--tools/perf/util/cpumap.c166
-rw-r--r--tools/perf/util/cpumap.h37
-rw-r--r--tools/perf/util/data.c133
-rw-r--r--tools/perf/util/data.h50
-rw-r--r--tools/perf/util/debug.c31
-rw-r--r--tools/perf/util/debug.h2
-rw-r--r--tools/perf/util/dso.c471
-rw-r--r--tools/perf/util/dso.h108
-rw-r--r--tools/perf/util/dwarf-aux.c51
-rw-r--r--tools/perf/util/dwarf-aux.h9
-rw-r--r--tools/perf/util/event.c445
-rw-r--r--tools/perf/util/event.h117
-rw-r--r--tools/perf/util/evlist.c712
-rw-r--r--tools/perf/util/evlist.h126
-rw-r--r--tools/perf/util/evsel.c757
-rw-r--r--tools/perf/util/evsel.h87
-rwxr-xr-xtools/perf/util/generate-cmdlist.sh4
-rw-r--r--tools/perf/util/header.c323
-rw-r--r--tools/perf/util/header.h54
-rw-r--r--tools/perf/util/help.c7
-rw-r--r--tools/perf/util/hist.c808
-rw-r--r--tools/perf/util/hist.h230
-rw-r--r--tools/perf/util/include/asm/bug.h22
-rw-r--r--tools/perf/util/include/asm/hash.h6
-rw-r--r--tools/perf/util/include/dwarf-regs.h2
-rw-r--r--tools/perf/util/include/linux/bitmap.h3
-rw-r--r--tools/perf/util/include/linux/bitops.h4
-rw-r--r--tools/perf/util/include/linux/compiler.h21
-rw-r--r--tools/perf/util/include/linux/export.h6
-rw-r--r--tools/perf/util/include/linux/hash.h5
-rw-r--r--tools/perf/util/include/linux/kernel.h6
-rw-r--r--tools/perf/util/include/linux/list.h2
-rw-r--r--tools/perf/util/include/linux/magic.h12
-rw-r--r--tools/perf/util/include/linux/prefetch.h6
-rw-r--r--tools/perf/util/include/linux/string.h1
-rw-r--r--tools/perf/util/include/linux/types.h29
-rw-r--r--tools/perf/util/intlist.c23
-rw-r--r--tools/perf/util/intlist.h2
-rw-r--r--tools/perf/util/machine.c550
-rw-r--r--tools/perf/util/machine.h67
-rw-r--r--tools/perf/util/map.c270
-rw-r--r--tools/perf/util/map.h57
-rw-r--r--tools/perf/util/pager.c12
-rw-r--r--tools/perf/util/parse-events.c234
-rw-r--r--tools/perf/util/parse-events.h14
-rw-r--r--tools/perf/util/parse-events.l67
-rw-r--r--tools/perf/util/parse-events.y76
-rw-r--r--tools/perf/util/parse-options.c276
-rw-r--r--tools/perf/util/parse-options.h20
-rw-r--r--tools/perf/util/path.c10
-rw-r--r--tools/perf/util/perf_regs.c27
-rw-r--r--tools/perf/util/perf_regs.h19
-rw-r--r--tools/perf/util/pmu.c282
-rw-r--r--tools/perf/util/pmu.h11
-rw-r--r--tools/perf/util/probe-event.c1007
-rw-r--r--tools/perf/util/probe-event.h17
-rw-r--r--tools/perf/util/probe-finder.c585
-rw-r--r--tools/perf/util/probe-finder.h23
-rw-r--r--tools/perf/util/pstack.h10
-rw-r--r--tools/perf/util/python-ext-sources3
-rw-r--r--tools/perf/util/python.c34
-rw-r--r--tools/perf/util/rblist.c27
-rw-r--r--tools/perf/util/rblist.h1
-rw-r--r--tools/perf/util/record.c215
-rw-r--r--tools/perf/util/scripting-engines/trace-event-perl.c27
-rw-r--r--tools/perf/util/scripting-engines/trace-event-python.c74
-rw-r--r--tools/perf/util/session.c710
-rw-r--r--tools/perf/util/session.h66
-rw-r--r--tools/perf/util/setup.py4
-rw-r--r--tools/perf/util/sort.c967
-rw-r--r--tools/perf/util/sort.h49
-rw-r--r--tools/perf/util/srcline.c299
-rw-r--r--tools/perf/util/stat.c6
-rw-r--r--tools/perf/util/stat.h11
-rw-r--r--tools/perf/util/strbuf.c2
-rw-r--r--tools/perf/util/strfilter.c72
-rw-r--r--tools/perf/util/strfilter.h12
-rw-r--r--tools/perf/util/string.c26
-rw-r--r--tools/perf/util/strlist.c3
-rw-r--r--tools/perf/util/svghelper.c235
-rw-r--r--tools/perf/util/svghelper.h19
-rw-r--r--tools/perf/util/symbol-elf.c817
-rw-r--r--tools/perf/util/symbol-minimal.c26
-rw-r--r--tools/perf/util/symbol.c897
-rw-r--r--tools/perf/util/symbol.h67
-rw-r--r--tools/perf/util/sysfs.c60
-rw-r--r--tools/perf/util/sysfs.h6
-rw-r--r--tools/perf/util/target.c63
-rw-r--r--tools/perf/util/target.h60
-rw-r--r--tools/perf/util/thread.c191
-rw-r--r--tools/perf/util/thread.h65
-rw-r--r--tools/perf/util/thread_map.c20
-rw-r--r--tools/perf/util/tool.h11
-rw-r--r--tools/perf/util/top.c4
-rw-r--r--tools/perf/util/top.h7
-rw-r--r--tools/perf/util/trace-event-info.c108
-rw-r--r--tools/perf/util/trace-event-parse.c58
-rw-r--r--tools/perf/util/trace-event-read.c72
-rw-r--r--tools/perf/util/trace-event-scripting.c2
-rw-r--r--tools/perf/util/trace-event.c82
-rw-r--r--tools/perf/util/trace-event.h40
-rw-r--r--tools/perf/util/types.h24
-rw-r--r--tools/perf/util/unwind-libdw.c210
-rw-r--r--tools/perf/util/unwind-libdw.h21
-rw-r--r--tools/perf/util/unwind-libunwind.c (renamed from tools/perf/util/unwind.c)150
-rw-r--r--tools/perf/util/unwind.h18
-rw-r--r--tools/perf/util/util.c297
-rw-r--r--tools/perf/util/util.h62
-rw-r--r--tools/perf/util/values.c14
-rw-r--r--tools/perf/util/values.h2
-rw-r--r--tools/perf/util/vdso.c4
302 files changed, 28494 insertions, 9317 deletions
diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore
index 8f8fbc227a4..782d86e961b 100644
--- a/tools/perf/.gitignore
+++ b/tools/perf/.gitignore
@@ -13,6 +13,7 @@ perf*.html
common-cmds.h
perf.data
perf.data.old
+output.svg
perf-archive
tags
TAGS
diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile
index eb30044a922..3ba1c0b0990 100644
--- a/tools/perf/Documentation/Makefile
+++ b/tools/perf/Documentation/Makefile
@@ -1,12 +1,6 @@
+include ../../scripts/Makefile.include
include ../config/utilities.mak
-OUTPUT := ./
-ifeq ("$(origin O)", "command line")
- ifneq ($(O),)
- OUTPUT := $(O)/
- endif
-endif
-
MAN1_TXT= \
$(filter-out $(addsuffix .txt, $(ARTICLES) $(SP_ARTICLES)), \
$(wildcard perf-*.txt)) \
@@ -150,17 +144,18 @@ NO_SUBDIR = :
endif
ifneq ($(findstring $(MAKEFLAGS),s),s)
-ifndef V
- QUIET_ASCIIDOC = @echo ' ' ASCIIDOC $@;
- QUIET_XMLTO = @echo ' ' XMLTO $@;
- QUIET_DB2TEXI = @echo ' ' DB2TEXI $@;
- QUIET_MAKEINFO = @echo ' ' MAKEINFO $@;
- QUIET_DBLATEX = @echo ' ' DBLATEX $@;
- QUIET_XSLTPROC = @echo ' ' XSLTPROC $@;
- QUIET_GEN = @echo ' ' GEN $@;
+ifneq ($(V),1)
+ QUIET_ASCIIDOC = @echo ' ASCIIDOC '$@;
+ QUIET_XMLTO = @echo ' XMLTO '$@;
+ QUIET_DB2TEXI = @echo ' DB2TEXI '$@;
+ QUIET_MAKEINFO = @echo ' MAKEINFO '$@;
+ QUIET_DBLATEX = @echo ' DBLATEX '$@;
+ QUIET_XSLTPROC = @echo ' XSLTPROC '$@;
+ QUIET_GEN = @echo ' GEN '$@;
QUIET_STDERR = 2> /dev/null
QUIET_SUBDIR0 = +@subdir=
- QUIET_SUBDIR1 = ;$(NO_SUBDIR) echo ' ' SUBDIR $$subdir; \
+ QUIET_SUBDIR1 = ;$(NO_SUBDIR) \
+ echo ' SUBDIR ' $$subdir; \
$(MAKE) $(PRINT_DIR) -C $$subdir
export V
endif
@@ -189,47 +184,43 @@ ifdef missing_tools
endif
do-install-man: man
- $(INSTALL) -d -m 755 $(DESTDIR)$(man1dir)
-# $(INSTALL) -d -m 755 $(DESTDIR)$(man5dir)
-# $(INSTALL) -d -m 755 $(DESTDIR)$(man7dir)
- $(INSTALL) -m 644 $(DOC_MAN1) $(DESTDIR)$(man1dir)
-# $(INSTALL) -m 644 $(DOC_MAN5) $(DESTDIR)$(man5dir)
-# $(INSTALL) -m 644 $(DOC_MAN7) $(DESTDIR)$(man7dir)
+ $(call QUIET_INSTALL, Documentation-man) \
+ $(INSTALL) -d -m 755 $(DESTDIR)$(man1dir); \
+# $(INSTALL) -d -m 755 $(DESTDIR)$(man5dir); \
+# $(INSTALL) -d -m 755 $(DESTDIR)$(man7dir); \
+ $(INSTALL) -m 644 $(DOC_MAN1) $(DESTDIR)$(man1dir); \
+# $(INSTALL) -m 644 $(DOC_MAN5) $(DESTDIR)$(man5dir); \
+# $(INSTALL) -m 644 $(DOC_MAN7) $(DESTDIR)$(man7dir)
install-man: check-man-tools man
-try-install-man:
ifdef missing_tools
- $(warning Please install $(missing_tools) to have the man pages installed)
+ DO_INSTALL_MAN = $(warning Please install $(missing_tools) to have the man pages installed)
else
- $(MAKE) do-install-man
+ DO_INSTALL_MAN = do-install-man
endif
+try-install-man: $(DO_INSTALL_MAN)
+
install-info: info
- $(INSTALL) -d -m 755 $(DESTDIR)$(infodir)
- $(INSTALL) -m 644 $(OUTPUT)perf.info $(OUTPUT)perfman.info $(DESTDIR)$(infodir)
+ $(call QUIET_INSTALL, Documentation-info) \
+ $(INSTALL) -d -m 755 $(DESTDIR)$(infodir); \
+ $(INSTALL) -m 644 $(OUTPUT)perf.info $(OUTPUT)perfman.info $(DESTDIR)$(infodir); \
if test -r $(DESTDIR)$(infodir)/dir; then \
- $(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) perf.info ;\
- $(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) perfman.info ;\
+ $(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) perf.info ;\
+ $(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) perfman.info ;\
else \
echo "No directory found in $(DESTDIR)$(infodir)" >&2 ; \
fi
install-pdf: pdf
- $(INSTALL) -d -m 755 $(DESTDIR)$(pdfdir)
- $(INSTALL) -m 644 $(OUTPUT)user-manual.pdf $(DESTDIR)$(pdfdir)
+ $(call QUIET_INSTALL, Documentation-pdf) \
+ $(INSTALL) -d -m 755 $(DESTDIR)$(pdfdir); \
+ $(INSTALL) -m 644 $(OUTPUT)user-manual.pdf $(DESTDIR)$(pdfdir)
#install-html: html
# '$(SHELL_PATH_SQ)' ./install-webdoc.sh $(DESTDIR)$(htmldir)
-ifneq ($(MAKECMDGOALS),clean)
-ifneq ($(MAKECMDGOALS),tags)
-$(OUTPUT)PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE
- $(QUIET_SUBDIR0)../ $(QUIET_SUBDIR1) $(OUTPUT)PERF-VERSION-FILE
-
--include $(OUTPUT)PERF-VERSION-FILE
-endif
-endif
#
# Determine "include::" file references in asciidoc files.
@@ -259,15 +250,17 @@ $(OUTPUT)cmd-list.made: cmd-list.perl ../command-list.txt $(MAN1_TXT)
$(PERL_PATH) ./cmd-list.perl ../command-list.txt $(QUIET_STDERR) && \
date >$@
+CLEAN_FILES = \
+ $(MAN_XML) $(addsuffix +,$(MAN_XML)) \
+ $(MAN_HTML) $(addsuffix +,$(MAN_HTML)) \
+ $(DOC_HTML) $(DOC_MAN1) $(DOC_MAN5) $(DOC_MAN7) \
+ $(OUTPUT)*.texi $(OUTPUT)*.texi+ $(OUTPUT)*.texi++ \
+ $(OUTPUT)perf.info $(OUTPUT)perfman.info \
+ $(OUTPUT)howto-index.txt $(OUTPUT)howto/*.html $(OUTPUT)doc.dep \
+ $(OUTPUT)technical/api-*.html $(OUTPUT)technical/api-index.txt \
+ $(cmds_txt) $(OUTPUT)*.made
clean:
- $(RM) $(MAN_XML) $(addsuffix +,$(MAN_XML))
- $(RM) $(MAN_HTML) $(addsuffix +,$(MAN_HTML))
- $(RM) $(DOC_HTML) $(DOC_MAN1) $(DOC_MAN5) $(DOC_MAN7)
- $(RM) $(OUTPUT)*.texi $(OUTPUT)*.texi+ $(OUTPUT)*.texi++
- $(RM) $(OUTPUT)perf.info $(OUTPUT)perfman.info
- $(RM) $(OUTPUT)howto-index.txt $(OUTPUT)howto/*.html $(OUTPUT)doc.dep
- $(RM) $(OUTPUT)technical/api-*.html $(OUTPUT)technical/api-index.txt
- $(RM) $(cmds_txt) $(OUTPUT)*.made
+ $(call QUIET_CLEAN, Documentation) $(RM) $(CLEAN_FILES)
$(MAN_HTML): $(OUTPUT)%.html : %.txt
$(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
@@ -277,7 +270,7 @@ $(MAN_HTML): $(OUTPUT)%.html : %.txt
$(OUTPUT)%.1 $(OUTPUT)%.5 $(OUTPUT)%.7 : $(OUTPUT)%.xml
$(QUIET_XMLTO)$(RM) $@ && \
- $(XMLTO) -o $(OUTPUT) -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $<
+ $(XMLTO) -o $(OUTPUT). -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $<
$(OUTPUT)%.xml : %.txt
$(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
@@ -348,5 +341,3 @@ $(patsubst %.txt,%.html,$(wildcard howto/*.txt)): %.html : %.txt
#quick-install-html:
# '$(SHELL_PATH_SQ)' ./install-doc-quick.sh $(HTML_REF) $(DESTDIR)$(htmldir)
-
-.PHONY: .FORCE-PERF-VERSION-FILE
diff --git a/tools/perf/Documentation/examples.txt b/tools/perf/Documentation/examples.txt
index 77f95276242..a4e39215648 100644
--- a/tools/perf/Documentation/examples.txt
+++ b/tools/perf/Documentation/examples.txt
@@ -66,7 +66,7 @@ Furthermore, these tracepoints can be used to sample the workload as
well. For example the page allocations done by a 'git gc' can be
captured the following way:
- titan:~/git> perf record -f -e kmem:mm_page_alloc -c 1 ./git gc
+ titan:~/git> perf record -e kmem:mm_page_alloc -c 1 ./git gc
Counting objects: 1148, done.
Delta compression using up to 2 threads.
Compressing objects: 100% (450/450), done.
@@ -120,7 +120,7 @@ Furthermore, call-graph sampling can be done too, of page
allocations - to see precisely what kind of page allocations there
are:
- titan:~/git> perf record -f -g -e kmem:mm_page_alloc -c 1 ./git gc
+ titan:~/git> perf record -g -e kmem:mm_page_alloc -c 1 ./git gc
Counting objects: 1148, done.
Delta compression using up to 2 threads.
Compressing objects: 100% (450/450), done.
diff --git a/tools/perf/Documentation/perf-archive.txt b/tools/perf/Documentation/perf-archive.txt
index 5032a142853..ac6ecbb3e66 100644
--- a/tools/perf/Documentation/perf-archive.txt
+++ b/tools/perf/Documentation/perf-archive.txt
@@ -12,9 +12,9 @@ SYNOPSIS
DESCRIPTION
-----------
-This command runs runs perf-buildid-list --with-hits, and collects the files
-with the buildids found so that analysis of perf.data contents can be possible
-on another machine.
+This command runs perf-buildid-list --with-hits, and collects the files with the
+buildids found so that analysis of perf.data contents can be possible on another
+machine.
SEE ALSO
diff --git a/tools/perf/Documentation/perf-bench.txt b/tools/perf/Documentation/perf-bench.txt
index 7065cd6fbdf..4464ad770d5 100644
--- a/tools/perf/Documentation/perf-bench.txt
+++ b/tools/perf/Documentation/perf-bench.txt
@@ -48,6 +48,12 @@ SUBSYSTEM
'mem'::
Memory access performance.
+'numa'::
+ NUMA scheduling and MM benchmarks.
+
+'futex'::
+ Futex stressing benchmarks.
+
'all'::
All benchmark subsystems.
@@ -187,6 +193,22 @@ Show only the result with page faults before memset.
--no-prefault::
Show only the result without page faults before memset.
+SUITES FOR 'numa'
+~~~~~~~~~~~~~~~~~
+*mem*::
+Suite for evaluating NUMA workloads.
+
+SUITES FOR 'futex'
+~~~~~~~~~~~~~~~~~~
+*hash*::
+Suite for evaluating hash tables.
+
+*wake*::
+Suite for evaluating wake calls.
+
+*requeue*::
+Suite for evaluating requeue calls.
+
SEE ALSO
--------
linkperf:perf[1]
diff --git a/tools/perf/Documentation/perf-buildid-cache.txt b/tools/perf/Documentation/perf-buildid-cache.txt
index e9a8349a717..fd77d81ea74 100644
--- a/tools/perf/Documentation/perf-buildid-cache.txt
+++ b/tools/perf/Documentation/perf-buildid-cache.txt
@@ -21,6 +21,19 @@ OPTIONS
-a::
--add=::
Add specified file to the cache.
+-k::
+--kcore::
+ Add specified kcore file to the cache. For the current host that is
+ /proc/kcore which requires root permissions to read. Be aware that
+ running 'perf buildid-cache' as root may update root's build-id cache
+ not the user's. Use the -v option to see where the file is created.
+ Note that the copied file contains only code sections not the whole core
+ image. Note also that files "kallsyms" and "modules" must also be in the
+ same directory and are also copied. All 3 files are created with read
+ permissions for root only. kcore will not be added if there is already a
+ kcore in the cache (with the same build-id) that has the same modules at
+ the same addresses. Use the -v option to see if a copy of kcore is
+ actually made.
-r::
--remove=::
Remove specified file from the cache.
diff --git a/tools/perf/Documentation/perf-diff.txt b/tools/perf/Documentation/perf-diff.txt
index 5b3123d5721..b3b8abae62b 100644
--- a/tools/perf/Documentation/perf-diff.txt
+++ b/tools/perf/Documentation/perf-diff.txt
@@ -3,17 +3,17 @@ perf-diff(1)
NAME
----
-perf-diff - Read two perf.data files and display the differential profile
+perf-diff - Read perf.data files and display the differential profile
SYNOPSIS
--------
[verse]
-'perf diff' [oldfile] [newfile]
+'perf diff' [baseline file] [data file1] [[data file2] ... ]
DESCRIPTION
-----------
-This command displays the performance difference amongst two perf.data files
-captured via perf record.
+This command displays the performance difference amongst two or more perf.data
+files captured via perf record.
If no parameters are passed it will assume perf.data.old and perf.data.
@@ -33,21 +33,25 @@ OPTIONS
-d::
--dsos=::
Only consider symbols in these dsos. CSV that understands
- file://filename entries.
+ file://filename entries. This option will affect the percentage
+ of the Baseline/Delta column. See --percentage for more info.
-C::
--comms=::
Only consider symbols in these comms. CSV that understands
- file://filename entries.
+ file://filename entries. This option will affect the percentage
+ of the Baseline/Delta column. See --percentage for more info.
-S::
--symbols=::
Only consider these symbols. CSV that understands
- file://filename entries.
+ file://filename entries. This option will affect the percentage
+ of the Baseline/Delta column. See --percentage for more info.
-s::
--sort=::
- Sort by key(s): pid, comm, dso, symbol.
+ Sort by key(s): pid, comm, dso, symbol, cpu, parent, srcline.
+ Please see description of --sort in the perf-report man page.
-t::
--field-separator=::
@@ -75,8 +79,6 @@ OPTIONS
-c::
--compute::
Differential computation selection - delta,ratio,wdiff (default is delta).
- If '+' is specified as a first character, the output is sorted based
- on the computation results.
See COMPARISON METHODS section for more info.
-p::
@@ -87,6 +89,71 @@ OPTIONS
--formula::
Show formula for given computation.
+-o::
+--order::
+ Specify compute sorting column number.
+
+--percentage::
+ Determine how to display the overhead percentage of filtered entries.
+ Filters can be applied by --comms, --dsos and/or --symbols options.
+
+ "relative" means it's relative to filtered entries only so that the
+ sum of shown entries will be always 100%. "absolute" means it retains
+ the original value before and after the filter is applied.
+
+COMPARISON
+----------
+The comparison is governed by the baseline file. The baseline perf.data
+file is iterated for samples. All other perf.data files specified on
+the command line are searched for the baseline sample pair. If the pair
+is found, specified computation is made and result is displayed.
+
+All samples from non-baseline perf.data files, that do not match any
+baseline entry, are displayed with empty space within baseline column
+and possible computation results (delta) in their related column.
+
+Example files samples:
+- file A with samples f1, f2, f3, f4, f6
+- file B with samples f2, f4, f5
+- file C with samples f1, f2, f5
+
+Example output:
+ x - computation takes place for pair
+ b - baseline sample percentage
+
+- perf diff A B C
+
+ baseline/A compute/B compute/C samples
+ ---------------------------------------
+ b x f1
+ b x x f2
+ b f3
+ b x f4
+ b f6
+ x x f5
+
+- perf diff B A C
+
+ baseline/B compute/A compute/C samples
+ ---------------------------------------
+ b x x f2
+ b x f4
+ b x f5
+ x x f1
+ x f3
+ x f6
+
+- perf diff C B A
+
+ baseline/C compute/B compute/A samples
+ ---------------------------------------
+ b x f1
+ b x x f2
+ b x f5
+ x f3
+ x x f4
+ x f6
+
COMPARISON METHODS
------------------
delta
@@ -96,12 +163,16 @@ If specified the 'Delta' column is displayed with value 'd' computed as:
d = A->period_percent - B->period_percent
with:
- - A/B being matching hist entry from first/second file specified
+ - A/B being matching hist entry from data/baseline file specified
(or perf.data/perf.data.old) respectively.
- period_percent being the % of the hist entry period value within
single data file
+ - with filtering by -C, -d and/or -S, period_percent might be changed
+ relative to how entries are filtered. Use --percentage=absolute to
+ prevent such fluctuation.
+
ratio
~~~~~
If specified the 'Ratio' column is displayed with value 'r' computed as:
@@ -109,25 +180,27 @@ If specified the 'Ratio' column is displayed with value 'r' computed as:
r = A->period / B->period
with:
- - A/B being matching hist entry from first/second file specified
+ - A/B being matching hist entry from data/baseline file specified
(or perf.data/perf.data.old) respectively.
- period being the hist entry period value
-wdiff
-~~~~~
+wdiff:WEIGHT-B,WEIGHT-A
+~~~~~~~~~~~~~~~~~~~~~~~
If specified the 'Weighted diff' column is displayed with value 'd' computed as:
d = B->period * WEIGHT-A - A->period * WEIGHT-B
- - A/B being matching hist entry from first/second file specified
+ - A/B being matching hist entry from data/baseline file specified
(or perf.data/perf.data.old) respectively.
- period being the hist entry period value
- WEIGHT-A/WEIGHT-B being user suplied weights in the the '-c' option
behind ':' separator like '-c wdiff:1,2'.
+ - WIEGHT-A being the weight of the data file
+ - WIEGHT-B being the weight of the baseline data file
SEE ALSO
--------
-linkperf:perf-record[1]
+linkperf:perf-record[1], linkperf:perf-report[1]
diff --git a/tools/perf/Documentation/perf-kvm.txt b/tools/perf/Documentation/perf-kvm.txt
index 326f2cb333c..52276a6d2b7 100644
--- a/tools/perf/Documentation/perf-kvm.txt
+++ b/tools/perf/Documentation/perf-kvm.txt
@@ -10,9 +10,10 @@ SYNOPSIS
[verse]
'perf kvm' [--host] [--guest] [--guestmount=<path>
[--guestkallsyms=<path> --guestmodules=<path> | --guestvmlinux=<path>]]
- {top|record|report|diff|buildid-list}
+ {top|record|report|diff|buildid-list} [<options>]
'perf kvm' [--host] [--guest] [--guestkallsyms=<path> --guestmodules=<path>
- | --guestvmlinux=<path>] {top|record|report|diff|buildid-list|stat}
+ | --guestvmlinux=<path>] {top|record|report|diff|buildid-list|stat} [<options>]
+'perf kvm stat [record|report|live] [<options>]
DESCRIPTION
-----------
@@ -23,10 +24,17 @@ There are a couple of variants of perf kvm:
of an arbitrary workload.
'perf kvm record <command>' to record the performance counter profile
- of an arbitrary workload and save it into a perf data file. If both
- --host and --guest are input, the perf data file name is perf.data.kvm.
- If there is no --host but --guest, the file name is perf.data.guest.
- If there is no --guest but --host, the file name is perf.data.host.
+ of an arbitrary workload and save it into a perf data file. We set the
+ default behavior of perf kvm as --guest, so if neither --host nor --guest
+ is input, the perf data file name is perf.data.guest. If --host is input,
+ the perf data file name is perf.data.kvm. If you want to record data into
+ perf.data.host, please input --host --no-guest. The behaviors are shown as
+ following:
+ Default('') -> perf.data.guest
+ --host -> perf.data.kvm
+ --guest -> perf.data.guest
+ --host --guest -> perf.data.kvm
+ --host --no-guest -> perf.data.host
'perf kvm report' to display the performance counter profile information
recorded via perf kvm record.
@@ -36,7 +44,9 @@ There are a couple of variants of perf kvm:
'perf kvm buildid-list' to display the buildids found in a perf data file,
so that other tools can be used to fetch packages with matching symbol tables
- for use by perf report.
+ for use by perf report. As buildid is read from /sys/kernel/notes in os, then
+ if you want to list the buildid for guest, please make sure your perf data file
+ was captured with --guestmount in perf kvm record.
'perf kvm stat <command>' to run a command and gather performance counter
statistics.
@@ -50,17 +60,21 @@ There are a couple of variants of perf kvm:
'perf kvm stat report' reports statistical data which includes events
handled time, samples, and so on.
+ 'perf kvm stat live' reports statistical data in a live mode (similar to
+ record + report but with statistical data updated live at a given display
+ rate).
+
OPTIONS
-------
-i::
---input=::
+--input=<path>::
Input file name.
-o::
---output::
+--output=<path>::
Output file name.
---host=::
+--host::
Collect host side performance profile.
---guest=::
+--guest::
Collect guest side performance profile.
--guestmount=<path>::
Guest os root file system mount directory. Users mounts guest os
@@ -79,19 +93,61 @@ OPTIONS
kernel module information. Users copy it out from guest os.
--guestvmlinux=<path>::
Guest os kernel vmlinux.
+-v::
+--verbose::
+ Be more verbose (show counter open errors, etc).
STAT REPORT OPTIONS
-------------------
--vcpu=<value>::
analyze events which occures on this vcpu. (default: all vcpus)
---events=<value>::
- events to be analyzed. Possible values: vmexit, mmio, ioport.
+--event=<value>::
+ event to be analyzed. Possible values: vmexit, mmio, ioport.
(default: vmexit)
-k::
--key=<value>::
Sorting key. Possible values: sample (default, sort by samples
number), time (sort by average time).
+-p::
+--pid=::
+ Analyze events only for given process ID(s) (comma separated list).
+
+STAT LIVE OPTIONS
+-----------------
+-d::
+--display::
+ Time in seconds between display updates
+
+-m::
+--mmap-pages=::
+ Number of mmap data pages (must be a power of two) or size
+ specification with appended unit character - B/K/M/G. The
+ size is rounded up to have nearest pages power of two value.
+
+-a::
+--all-cpus::
+ System-wide collection from all CPUs.
+
+-p::
+--pid=::
+ Analyze events only for given process ID(s) (comma separated list).
+
+--vcpu=<value>::
+ analyze events which occures on this vcpu. (default: all vcpus)
+
+
+--event=<value>::
+ event to be analyzed. Possible values: vmexit, mmio, ioport.
+ (default: vmexit)
+
+-k::
+--key=<value>::
+ Sorting key. Possible values: sample (default, sort by samples
+ number), time (sort by average time).
+
+--duration=<value>::
+ Show events other than HLT that take longer than duration usecs.
SEE ALSO
--------
diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt
index d1e39dc8c81..6fce6a62220 100644
--- a/tools/perf/Documentation/perf-list.txt
+++ b/tools/perf/Documentation/perf-list.txt
@@ -8,7 +8,7 @@ perf-list - List all symbolic event types
SYNOPSIS
--------
[verse]
-'perf list' [hw|sw|cache|tracepoint|event_glob]
+'perf list' [hw|sw|cache|tracepoint|pmu|event_glob]
DESCRIPTION
-----------
@@ -29,6 +29,8 @@ counted. The following modifiers exist:
G - guest counting (in KVM guests)
H - host counting (not in KVM guests)
p - precise level
+ S - read sample value (PERF_SAMPLE_READ)
+ D - pin the event to the PMU
The 'p' modifier can be used for specifying how precise the instruction
address should be. The 'p' modifier can be specified multiple times:
@@ -104,6 +106,8 @@ To limit the list use:
'subsys_glob:event_glob' to filter by tracepoint subsystems such as sched,
block, etc.
+. 'pmu' to print the kernel supplied PMU events.
+
. If none of the above is matched, it will apply the supplied glob to all
events, printing the ones that match.
diff --git a/tools/perf/Documentation/perf-lock.txt b/tools/perf/Documentation/perf-lock.txt
index c7f5f55634a..ab25be28c9d 100644
--- a/tools/perf/Documentation/perf-lock.txt
+++ b/tools/perf/Documentation/perf-lock.txt
@@ -48,7 +48,7 @@ REPORT OPTIONS
-k::
--key=<value>::
Sorting key. Possible values: acquired (default), contended,
- wait_total, wait_max, wait_min.
+ avg_wait, wait_total, wait_max, wait_min.
INFO OPTIONS
------------
diff --git a/tools/perf/Documentation/perf-mem.txt b/tools/perf/Documentation/perf-mem.txt
index 888d51137fb..1d78a4064da 100644
--- a/tools/perf/Documentation/perf-mem.txt
+++ b/tools/perf/Documentation/perf-mem.txt
@@ -18,6 +18,10 @@ from it, into perf.data. Perf record options are accepted and are passed through
"perf mem -t <TYPE> report" displays the result. It invokes perf report with the
right set of options to display a memory access profile.
+Note that on Intel systems the memory latency reported is the use-latency,
+not the pure load (or store latency). Use latency includes any pipeline
+queueing delays in addition to the memory subsystem latency.
+
OPTIONS
-------
<command>...::
diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt
index b715cb71592..1513935c399 100644
--- a/tools/perf/Documentation/perf-probe.txt
+++ b/tools/perf/Documentation/perf-probe.txt
@@ -136,6 +136,8 @@ Each probe argument follows below syntax.
'NAME' specifies the name of this argument (optional). You can use the name of local variable, local data structure member (e.g. var->field, var.field2), local array with fixed index (e.g. array[1], var->array[0], var->pointer[2]), or kprobe-tracer argument format (e.g. $retval, %ax, etc). Note that the name of this argument will be set as the last member name if you specify a local data structure member (e.g. field2 for 'var->field1.field2'.)
'TYPE' casts the type of this argument (optional). If omitted, perf probe automatically set the type based on debuginfo. You can specify 'string' type only for the local variable or structure member which is an array of or a pointer to 'char' or 'unsigned char' type.
+On x86 systems %REG is always the short form of the register: for example %AX. %RAX or %EAX is not valid.
+
LINE SYNTAX
-----------
Line range is described by following syntax.
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index d4da111ef53..d460049cae8 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -57,6 +57,8 @@ OPTIONS
-t::
--tid=::
Record events on existing thread ID (comma separated list).
+ This option also disables inheritance by default. Enable it by adding
+ --inherit.
-u::
--uid=::
@@ -65,16 +67,9 @@ OPTIONS
-r::
--realtime=::
Collect data with this RT SCHED_FIFO priority.
--D::
---no-delay::
- Collect data without buffering.
--A::
---append::
- Append to the output file to do incremental profiling.
--f::
---force::
- Overwrite existing data file. (deprecated)
+--no-buffering::
+ Collect data without buffering.
-c::
--count=::
@@ -93,11 +88,25 @@ OPTIONS
-m::
--mmap-pages=::
- Number of mmap data pages. Must be a power of two.
+ Number of mmap data pages (must be a power of two) or size
+ specification with appended unit character - B/K/M/G. The
+ size is rounded up to have nearest pages power of two value.
-g::
+ Enables call-graph (stack chain/backtrace) recording.
+
--call-graph::
- Do call-graph (stack chain/backtrace) recording.
+ Setup and enable call-graph (stack chain/backtrace) recording,
+ implies -g.
+
+ Allows specifying "fp" (frame pointer) or "dwarf"
+ (DWARF's CFI - Call Frame Information) as the method to collect
+ the information used to show the call graphs.
+
+ In some systems, where binaries are build with gcc
+ --fomit-frame-pointer, using the "fp" method will produce bogus
+ call graphs, using "dwarf", if available (perf tools linked to
+ the libunwind library) should be used instead.
-q::
--quiet::
@@ -172,9 +181,13 @@ following filters are defined:
- u: only when the branch target is at the user level
- k: only when the branch target is in the kernel
- hv: only when the target is at the hypervisor level
+ - in_tx: only when the target is in a hardware transaction
+ - no_tx: only when the target is not in a hardware transaction
+ - abort_tx: only when the target is a hardware transaction abort
+ - cond: conditional branches
+
-The option requires at least one branch type among any, any_call, any_ret, ind_call.
+The option requires at least one branch type among any, any_call, any_ret, ind_call, cond.
The privilege levels may be omitted, in which case, the privilege levels of the associated
event are applied to the branch filter. Both kernel (k) and hypervisor (hv) privilege
levels are subject to permissions. When sampling on multiple events, branch stack sampling
@@ -182,12 +195,25 @@ is enabled for all the sampling events. The sampled branch type is the same for
The various filters must be specified as a comma separated list: --branch-filter any_ret,u,k
Note that this feature may not be available on all processors.
--W::
--weight::
Enable weightened sampling. An additional weight is recorded per sample and can be
displayed with the weight and local_weight sort keys. This currently works for TSX
abort events and some memory events in precise mode on modern Intel CPUs.
+--transaction::
+Record transaction flags for transaction related events.
+
+--per-thread::
+Use per-thread mmaps. By default per-cpu mmaps are created. This option
+overrides that and uses per-thread mmaps. A side-effect of that is that
+inheritance is automatically disabled. --per-thread is ignored with a warning
+if combined with -a or -C options.
+
+-D::
+--delay=::
+After starting the program, wait msecs before measuring. This is useful to
+filter out the startup phase of the program, which is often very different.
+
SEE ALSO
--------
linkperf:perf-stat[1], linkperf:perf-list[1]
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 66dab7410c1..d2b59af62bc 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -25,10 +25,6 @@ OPTIONS
--verbose::
Be more verbose. (show symbol address, etc)
--d::
---dsos=::
- Only consider symbols in these dsos. CSV that understands
- file://filename entries.
-n::
--show-nr-samples::
Show the number of samples for each symbol
@@ -42,11 +38,18 @@ OPTIONS
-c::
--comms=::
Only consider symbols in these comms. CSV that understands
- file://filename entries.
+ file://filename entries. This option will affect the percentage of
+ the overhead column. See --percentage for more info.
+-d::
+--dsos=::
+ Only consider symbols in these dsos. CSV that understands
+ file://filename entries. This option will affect the percentage of
+ the overhead column. See --percentage for more info.
-S::
--symbols=::
Only consider these symbols. CSV that understands
- file://filename entries.
+ file://filename entries. This option will affect the percentage of
+ the overhead column. See --percentage for more info.
--symbol-filter=::
Only show symbols that match (partially) with this filter.
@@ -71,7 +74,20 @@ OPTIONS
entries are displayed as "[other]".
- cpu: cpu number the task ran at the time of sample
- srcline: filename and line number executed at the time of sample. The
- DWARF debuggin info must be provided.
+ DWARF debugging info must be provided.
+ - weight: Event specific weight, e.g. memory latency or transaction
+ abort cost. This is the global weight.
+ - local_weight: Local weight version of the weight above.
+ - transaction: Transaction abort flags.
+ - overhead: Overhead percentage of sample
+ - overhead_sys: Overhead percentage of sample running in system mode
+ - overhead_us: Overhead percentage of sample running in user mode
+ - overhead_guest_sys: Overhead percentage of sample running in system mode
+ on guest machine
+ - overhead_guest_us: Overhead percentage of sample running in user mode on
+ guest machine
+ - sample: Number of sample
+ - period: Raw number of event count of sample
By default, comm, dso and symbol keys are used.
(i.e. --sort comm,dso,symbol)
@@ -85,10 +101,38 @@ OPTIONS
- symbol_from: name of function branched from
- symbol_to: name of function branched to
- mispredict: "N" for predicted branch, "Y" for mispredicted branch
+ - in_tx: branch in TSX transaction
+ - abort: TSX transaction abort.
And default sort keys are changed to comm, dso_from, symbol_from, dso_to
and symbol_to, see '--branch-stack'.
+-F::
+--fields=::
+ Specify output field - multiple keys can be specified in CSV format.
+ Following fields are available:
+ overhead, overhead_sys, overhead_us, overhead_children, sample and period.
+ Also it can contain any sort key(s).
+
+ By default, every sort keys not specified in -F will be appended
+ automatically.
+
+ If --mem-mode option is used, following sort keys are also available
+ (incompatible with --branch-stack):
+ symbol_daddr, dso_daddr, locked, tlb, mem, snoop, dcacheline.
+
+ - symbol_daddr: name of data symbol being executed on at the time of sample
+ - dso_daddr: name of library or module containing the data being executed
+ on at the time of sample
+ - locked: whether the bus was locked at the time of sample
+ - tlb: type of tlb access for the data at the time of sample
+ - mem: type of memory access for the data at the time of sample
+ - snoop: type of snoop (if any) for the data at the time of sample
+ - dcacheline: the cacheline the data address is on at the time of sample
+
+ And default sort keys are changed to local_weight, mem, sym, dso,
+ symbol_daddr, dso_daddr, snoop, tlb, locked, see '--mem-mode'.
+
-p::
--parent=<regex>::
A regex filter to identify parent. The parent is a caller of this
@@ -115,7 +159,7 @@ OPTIONS
--dump-raw-trace::
Dump raw trace in ASCII.
--g [type,min[,limit],order]::
+-g [type,min[,limit],order[,key]]::
--call-graph::
Display call chains using type, min percent threshold, optional print
limit and order.
@@ -129,12 +173,34 @@ OPTIONS
- callee: callee based call graph.
- caller: inverted caller based call graph.
- Default: fractal,0.5,callee.
+ key can be:
+ - function: compare on functions
+ - address: compare on individual code addresses
+
+ Default: fractal,0.5,callee,function.
+
+--children::
+ Accumulate callchain of children to parent entry so that then can
+ show up in the output. The output will have a new "Children" column
+ and will be sorted on the data. It requires callchains are recorded.
+
+--max-stack::
+ Set the stack depth limit when parsing the callchain, anything
+ beyond the specified depth will be ignored. This is a trade-off
+ between information loss and faster processing especially for
+ workloads that can have a very long callchain stack.
+
+ Default: 127
-G::
--inverted::
alias for inverted caller based call graph.
+--ignore-callees=<regex>::
+ Ignore callees of the function(s) matching the given regex.
+ This has the effect of collecting the callers of each such
+ function into one place in the call-graph tree.
+
--pretty=<key>::
Pretty printing style. key: normal, raw
@@ -210,10 +276,35 @@ OPTIONS
Demangle symbol names to human readable form. It's enabled by default,
disable with --no-demangle.
+--mem-mode::
+ Use the data addresses of samples in addition to instruction addresses
+ to build the histograms. To generate meaningful output, the perf.data
+ file must have been obtained using perf record -d -W and using a
+ special event -e cpu/mem-loads/ or -e cpu/mem-stores/. See
+ 'perf mem' for simpler access.
+
--percent-limit::
Do not show entries which have an overhead under that percent.
(Default: 0).
+--percentage::
+ Determine how to display the overhead percentage of filtered entries.
+ Filters can be applied by --comms, --dsos and/or --symbols options and
+ Zoom operations on the TUI (thread, dso, etc).
+
+ "relative" means it's relative to filtered entries only so that the
+ sum of shown entries will be always 100%. "absolute" means it retains
+ the original value before and after the filter is applied.
+
+--header::
+ Show header information in the perf.data file. This includes
+ various information like hostname, OS and perf version, cpu/mem
+ info, perf command line, event list and so on. Currently only
+ --stdio output supports this feature.
+
+--header-only::
+ Show only perf.data header (forces --stdio).
+
SEE ALSO
--------
linkperf:perf-stat[1], linkperf:perf-annotate[1]
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index e9cbfcddfa3..05f9a0a6784 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -115,7 +115,7 @@ OPTIONS
-f::
--fields::
Comma separated list of fields to print. Options are:
- comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff.
+ comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff, srcline.
Field list can be prepended with the type, trace, sw or hw,
to indicate to which event type the field list applies.
e.g., -f sw:comm,tid,time,ip,sym and -f trace:time,cpu,trace
@@ -203,6 +203,18 @@ OPTIONS
--show-kernel-path::
Try to resolve the path of [kernel.kallsyms]
+--show-task-events
+ Display task related events (e.g. FORK, COMM, EXIT).
+
+--show-mmap-events
+ Display mmap related events (e.g. MMAP, MMAP2).
+
+--header
+ Show perf.data header.
+
+--header-only
+ Show only perf.data header.
+
SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-script-perl[1],
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 2fe87fb558f..29ee857c09c 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -132,6 +132,16 @@ is a useful mode to detect imbalance between physical cores. To enable this mod
use --per-core in addition to -a. (system-wide). The output includes the
core number and the number of online logical processors on that physical processor.
+-D msecs::
+--delay msecs::
+After starting the program, wait msecs before measuring. This is useful to
+filter out the startup phase of the program, which is often very different.
+
+-T::
+--transaction::
+
+Print statistics of transactional execution if supported.
+
EXAMPLES
--------
diff --git a/tools/perf/Documentation/perf-timechart.txt b/tools/perf/Documentation/perf-timechart.txt
index 1632b0efc75..5e0f986dff3 100644
--- a/tools/perf/Documentation/perf-timechart.txt
+++ b/tools/perf/Documentation/perf-timechart.txt
@@ -8,7 +8,7 @@ perf-timechart - Tool to visualize total system behavior during a workload
SYNOPSIS
--------
[verse]
-'perf timechart' {record}
+'perf timechart' [<timechart options>] {record} [<record options>]
DESCRIPTION
-----------
@@ -20,8 +20,8 @@ There are two variants of perf timechart:
'perf timechart' to turn a trace into a Scalable Vector Graphics file,
that can be viewed with popular SVG viewers such as 'Inkscape'.
-OPTIONS
--------
+TIMECHART OPTIONS
+-----------------
-o::
--output=::
Select the output file (default: output.svg)
@@ -34,12 +34,58 @@ OPTIONS
-P::
--power-only::
Only output the CPU power section of the diagram
+-T::
+--tasks-only::
+ Don't output processor state transitions
-p::
--process::
Select the processes to display, by name or PID
--symfs=<directory>::
Look for files with symbols relative to this directory.
+-n::
+--proc-num::
+ Print task info for at least given number of tasks.
+-t::
+--topology::
+ Sort CPUs according to topology.
+--highlight=<duration_nsecs|task_name>::
+ Highlight tasks (using different color) that run more than given
+ duration or tasks with given name. If number is given it's interpreted
+ as number of nanoseconds. If non-numeric string is given it's
+ interpreted as task name.
+
+RECORD OPTIONS
+--------------
+-P::
+--power-only::
+ Record only power-related events
+-T::
+--tasks-only::
+ Record only tasks-related events
+-g::
+--callchain::
+ Do call-graph (stack chain/backtrace) recording
+
+EXAMPLES
+--------
+
+$ perf timechart record git pull
+
+ [ perf record: Woken up 13 times to write data ]
+ [ perf record: Captured and wrote 4.253 MB perf.data (~185801 samples) ]
+
+$ perf timechart
+
+ Written 10.2 seconds of trace to output.svg.
+
+Record system-wide timechart:
+
+ $ perf timechart record
+
+ then generate timechart and highlight 'gcc' tasks:
+
+ $ perf timechart --highlight gcc
SEE ALSO
--------
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index 7fdd1909e37..180ae02137a 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -50,7 +50,6 @@ Default is to monitor all CPUS.
--count-filter=<count>::
Only display functions with more events than this.
--g::
--group::
Put the counters into a counter group.
@@ -68,7 +67,9 @@ Default is to monitor all CPUS.
-m <pages>::
--mmap-pages=<pages>::
- Number of mmapped data pages.
+ Number of mmap data pages (must be a power of two) or size
+ specification with appended unit character - B/K/M/G. The
+ size is rounded up to have nearest pages power of two value.
-p <pid>::
--pid=<pid>::
@@ -86,7 +87,6 @@ Default is to monitor all CPUS.
--realtime=<priority>::
Collect data with this RT SCHED_FIFO priority.
--s <symbol>::
--sym-annotate=<symbol>::
Annotate this symbol.
@@ -112,7 +112,18 @@ Default is to monitor all CPUS.
-s::
--sort::
- Sort by key(s): pid, comm, dso, symbol, parent, srcline, weight, local_weight.
+ Sort by key(s): pid, comm, dso, symbol, parent, srcline, weight,
+ local_weight, abort, in_tx, transaction, overhead, sample, period.
+ Please see description of --sort in the perf-report man page.
+
+--fields=::
+ Specify output field - multiple keys can be specified in CSV format.
+ Following fields are available:
+ overhead, overhead_sys, overhead_us, overhead_children, sample and period.
+ Also it can contain any sort key(s).
+
+ By default, every sort keys not specified in --field will be appended
+ automatically.
-n::
--show-nr-samples::
@@ -122,13 +133,16 @@ Default is to monitor all CPUS.
Show a column with the sum of periods.
--dsos::
- Only consider symbols in these dsos.
+ Only consider symbols in these dsos. This option will affect the
+ percentage of the overhead column. See --percentage for more info.
--comms::
- Only consider symbols in these comms.
+ Only consider symbols in these comms. This option will affect the
+ percentage of the overhead column. See --percentage for more info.
--symbols::
- Only consider these symbols.
+ Only consider these symbols. This option will affect the
+ percentage of the overhead column. See --percentage for more info.
-M::
--disassembler-style=:: Set disassembler style for objdump.
@@ -140,25 +154,45 @@ Default is to monitor all CPUS.
--asm-raw::
Show raw instruction encoding of assembly instructions.
--G [type,min,order]::
+-g::
+ Enables call-graph (stack chain/backtrace) recording.
+
--call-graph::
- Display call chains using type, min percent threshold and order.
- type can be either:
- - flat: single column, linear exposure of call chains.
- - graph: use a graph tree, displaying absolute overhead rates.
- - fractal: like graph, but displays relative rates. Each branch of
- the tree is considered as a new profiled object.
+ Setup and enable call-graph (stack chain/backtrace) recording,
+ implies -g.
- order can be either:
- - callee: callee based call graph.
- - caller: inverted caller based call graph.
+--children::
+ Accumulate callchain of children to parent entry so that then can
+ show up in the output. The output will have a new "Children" column
+ and will be sorted on the data. It requires -g/--call-graph option
+ enabled.
- Default: fractal,0.5,callee.
+--max-stack::
+ Set the stack depth limit when parsing the callchain, anything
+ beyond the specified depth will be ignored. This is a trade-off
+ between information loss and faster processing especially for
+ workloads that can have a very long callchain stack.
+
+ Default: 127
+
+--ignore-callees=<regex>::
+ Ignore callees of the function(s) matching the given regex.
+ This has the effect of collecting the callers of each such
+ function into one place in the call-graph tree.
--percent-limit::
Do not show entries which have an overhead under that percent.
(Default: 0).
+--percentage::
+ Determine how to display the overhead percentage of filtered entries.
+ Filters can be applied by --comms, --dsos and/or --symbols options and
+ Zoom operations on the TUI (thread, dso, etc).
+
+ "relative" means it's relative to filtered entries only so that the
+ sum of shown entries will be always 100%. "absolute" means it retains
+ the original value before and after the filter is applied.
+
INTERACTIVE PROMPTING KEYS
--------------------------
@@ -194,4 +228,4 @@ Pressing any unmapped key displays a menu, and prompts for input.
SEE ALSO
--------
-linkperf:perf-stat[1], linkperf:perf-list[1]
+linkperf:perf-stat[1], linkperf:perf-list[1], linkperf:perf-report[1]
diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
index 68718ccdd17..fae38d9a44a 100644
--- a/tools/perf/Documentation/perf-trace.txt
+++ b/tools/perf/Documentation/perf-trace.txt
@@ -9,6 +9,7 @@ SYNOPSIS
--------
[verse]
'perf trace'
+'perf trace record'
DESCRIPTION
-----------
@@ -16,32 +17,59 @@ This command will show the events associated with the target, initially
syscalls, but other system events like pagefaults, task lifetime events,
scheduling events, etc.
-Initially this is a live mode only tool, but eventually will work with
-perf.data files like the other tools, allowing a detached 'record' from
-analysis phases.
+This is a live mode tool in addition to working with perf.data files like
+the other perf tools. Files can be generated using the 'perf record' command
+but the session needs to include the raw_syscalls events (-e 'raw_syscalls:*').
+Alernatively, the 'perf trace record' can be used as a shortcut to
+automatically include the raw_syscalls events when writing events to a file.
+
+The following options apply to perf trace; options to perf trace record are
+found in the perf record man page.
OPTIONS
-------
+-a::
--all-cpus::
System-wide collection from all CPUs.
+-e::
+--expr::
+ List of events to show, currently only syscall names.
+ Prefixing with ! shows all syscalls but the ones specified. You may
+ need to escape it.
+
+-o::
+--output=::
+ Output file name.
+
-p::
--pid=::
Record events on existing process ID (comma separated list).
+-t::
--tid=::
Record events on existing thread ID (comma separated list).
+-u::
--uid=::
Record events in threads owned by uid. Name or number.
+-v::
+--verbose=::
+ Verbosity level.
+
+-i::
--no-inherit::
Child tasks do not inherit counters.
+-m::
--mmap-pages=::
- Number of mmap data pages. Must be a power of two.
+ Number of mmap data pages (must be a power of two) or size
+ specification with appended unit character - B/K/M/G. The
+ size is rounded up to have nearest pages power of two value.
+-C::
--cpu::
Collect samples only on the list of CPUs provided. Multiple CPUs can be provided as a
comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
@@ -54,6 +82,31 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs.
--sched:
Accrue thread runtime and provide a summary at the end of the session.
+-i
+--input
+ Process events from a given perf data file.
+
+-T
+--time
+ Print full timestamp rather time relative to first sample.
+
+--comm::
+ Show process COMM right beside its ID, on by default, disable with --no-comm.
+
+-s::
+--summary::
+ Show only a summary of syscalls by thread with min, max, and average times
+ (in msec) and relative stddev.
+
+-S::
+--with-summary::
+ Show all syscalls followed by a summary by thread with min, max, and
+ average times (in msec) and relative stddev.
+
+--tool_stats::
+ Show tool stats such as number of times fd->pathname was discovered thru
+ hooking the open syscall return + vfs_getname or via reading /proc/pid/fd, etc.
+
SEE ALSO
--------
linkperf:perf-record[1], linkperf:perf-script[1]
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index 025de796067..45da209b6ed 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -1,7 +1,14 @@
tools/perf
tools/scripts
tools/lib/traceevent
-tools/lib/lk
+tools/lib/api
+tools/lib/symbol/kallsyms.c
+tools/lib/symbol/kallsyms.h
+tools/include/asm/bug.h
+tools/include/linux/compiler.h
+tools/include/linux/hash.h
+tools/include/linux/export.h
+tools/include/linux/types.h
include/linux/const.h
include/linux/perf_event.h
include/linux/rbtree.h
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index 203cb0eecff..cb2e5868c8e 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -1,807 +1,90 @@
-include ../scripts/Makefile.include
-
-# The default target of this Makefile is...
-all:
-
-include config/utilities.mak
-
-# Define V to have a more verbose compile.
-#
-# Define O to save output files in a separate directory.
-#
-# Define ARCH as name of target architecture if you want cross-builds.
-#
-# Define CROSS_COMPILE as prefix name of compiler if you want cross-builds.
-#
-# Define NO_LIBPERL to disable perl script extension.
-#
-# Define NO_LIBPYTHON to disable python script extension.
-#
-# Define PYTHON to point to the python binary if the default
-# `python' is not correct; for example: PYTHON=python2
-#
-# Define PYTHON_CONFIG to point to the python-config binary if
-# the default `$(PYTHON)-config' is not correct.
-#
-# Define ASCIIDOC8 if you want to format documentation with AsciiDoc 8
#
-# Define DOCBOOK_XSL_172 if you want to format man pages with DocBook XSL v1.72.
+# This is a simple wrapper Makefile that calls the main Makefile.perf
+# with a -j option to do parallel builds
#
-# Define LDFLAGS=-static to build a static binary.
+# If you want to invoke the perf build in some non-standard way then
+# you can use the 'make -f Makefile.perf' method to invoke it.
#
-# Define EXTRA_CFLAGS=-m64 or EXTRA_CFLAGS=-m32 as appropriate for cross-builds.
-#
-# Define NO_DWARF if you do not want debug-info analysis feature at all.
-#
-# Define WERROR=0 to disable treating any warnings as errors.
-#
-# Define NO_NEWT if you do not want TUI support. (deprecated)
-#
-# Define NO_SLANG if you do not want TUI support.
-#
-# Define NO_GTK2 if you do not want GTK+ GUI support.
+
#
-# Define NO_DEMANGLE if you do not want C++ symbol demangling.
+# Clear out the built-in rules GNU make defines by default (such as .o targets),
+# so that we pass through all targets to Makefile.perf:
#
-# Define NO_LIBELF if you do not want libelf dependency (e.g. cross-builds)
+.SUFFIXES:
+
#
-# Define NO_LIBUNWIND if you do not want libunwind dependency for dwarf
-# backtrace post unwind.
+# We don't want to pass along options like -j:
#
-# Define NO_BACKTRACE if you do not want stack backtrace debug feature
+unexport MAKEFLAGS
+
#
-# Define NO_LIBNUMA if you do not want numa perf benchmark
+# Do a parallel build with multiple jobs, based on the number of CPUs online
+# in this system: 'make -j8' on a 8-CPU system, etc.
#
-# Define NO_LIBAUDIT if you do not want libaudit support
+# (To override it, run 'make JOBS=1' and similar.)
#
-# Define NO_LIBBIONIC if you do not want bionic support
-
-ifeq ($(srctree),)
-srctree := $(patsubst %/,%,$(dir $(shell pwd)))
-srctree := $(patsubst %/,%,$(dir $(srctree)))
-#$(info Determined 'srctree' to be $(srctree))
-endif
-
-ifneq ($(objtree),)
-#$(info Determined 'objtree' to be $(objtree))
-endif
-
-ifneq ($(OUTPUT),)
-#$(info Determined 'OUTPUT' to be $(OUTPUT))
-endif
-
-$(OUTPUT)PERF-VERSION-FILE: .FORCE-PERF-VERSION-FILE
- @$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT)
-
-CC = $(CROSS_COMPILE)gcc
-AR = $(CROSS_COMPILE)ar
-
-RM = rm -f
-MKDIR = mkdir
-FIND = find
-INSTALL = install
-FLEX = flex
-BISON = bison
-STRIP = strip
-
-LK_DIR = $(srctree)/tools/lib/lk/
-TRACE_EVENT_DIR = $(srctree)/tools/lib/traceevent/
-
-# include config/Makefile by default and rule out
-# non-config cases
-config := 1
-
-NON_CONFIG_TARGETS := clean TAGS tags cscope help
-
-ifdef MAKECMDGOALS
-ifeq ($(filter-out $(NON_CONFIG_TARGETS),$(MAKECMDGOALS)),)
- config := 0
-endif
+ifeq ($(JOBS),)
+ JOBS := $(shell grep -c ^processor /proc/cpuinfo 2>/dev/null)
+ ifeq ($(JOBS),)
+ JOBS := 1
+ endif
endif
-ifeq ($(config),1)
-include config/Makefile
+#
+# Only pass canonical directory names as the output directory:
+#
+ifneq ($(O),)
+ FULL_O := $(shell readlink -f $(O) || echo $(O))
endif
-export prefix bindir sharedir sysconfdir
-
-# sparse is architecture-neutral, which means that we need to tell it
-# explicitly what architecture to check for. Fix this up for yours..
-SPARSE_FLAGS = -D__BIG_ENDIAN__ -D__powerpc__
-
-# Guard against environment variables
-BUILTIN_OBJS =
-LIB_H =
-LIB_OBJS =
-PYRF_OBJS =
-SCRIPT_SH =
-
-SCRIPT_SH += perf-archive.sh
-
-grep-libs = $(filter -l%,$(1))
-strip-libs = $(filter-out -l%,$(1))
-
-LK_PATH=$(LK_DIR)
-
-ifneq ($(OUTPUT),)
- TE_PATH=$(OUTPUT)
-ifneq ($(subdir),)
- LK_PATH=$(OUTPUT)$(LK_DIR)
-else
- LK_PATH=$(OUTPUT)
-endif
+#
+# Only accept the 'DEBUG' variable from the command line:
+#
+ifeq ("$(origin DEBUG)", "command line")
+ ifeq ($(DEBUG),)
+ override DEBUG = 0
+ else
+ SET_DEBUG = "DEBUG=$(DEBUG)"
+ endif
else
- TE_PATH=$(TRACE_EVENT_DIR)
+ override DEBUG = 0
endif
-LIBTRACEEVENT = $(TE_PATH)libtraceevent.a
-export LIBTRACEEVENT
-
-LIBLK = $(LK_PATH)liblk.a
-export LIBLK
+define print_msg
+ @printf ' BUILD: Doing '\''make \033[33m-j'$(JOBS)'\033[m'\'' parallel build\n'
+endef
-# python extension build directories
-PYTHON_EXTBUILD := $(OUTPUT)python_ext_build/
-PYTHON_EXTBUILD_LIB := $(PYTHON_EXTBUILD)lib/
-PYTHON_EXTBUILD_TMP := $(PYTHON_EXTBUILD)tmp/
-export PYTHON_EXTBUILD_LIB PYTHON_EXTBUILD_TMP
+define make
+ @$(MAKE) -f Makefile.perf --no-print-directory -j$(JOBS) O=$(FULL_O) $(SET_DEBUG) $@
+endef
-python-clean := rm -rf $(PYTHON_EXTBUILD) $(OUTPUT)python/perf.so
-
-PYTHON_EXT_SRCS := $(shell grep -v ^\# util/python-ext-sources)
-PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py $(LIBTRACEEVENT) $(LIBLK)
-
-$(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS)
- $(QUIET_GEN)CFLAGS='$(CFLAGS)' $(PYTHON_WORD) util/setup.py \
- --quiet build_ext; \
- mkdir -p $(OUTPUT)python && \
- cp $(PYTHON_EXTBUILD_LIB)perf.so $(OUTPUT)python/
#
-# No Perl scripts right now:
+# Needed if no target specified:
+# (Except for tags and TAGS targets. The reason is that the
+# Makefile does not treat tags/TAGS as targets but as files
+# and thus won't rebuilt them once they are in place.)
#
-
-SCRIPTS = $(patsubst %.sh,%,$(SCRIPT_SH))
+all tags TAGS:
+ $(print_msg)
+ $(make)
#
-# Single 'perf' binary right now:
+# The clean target is not really parallel, don't print the jobs info:
#
-PROGRAMS += $(OUTPUT)perf
-
-# what 'all' will build and 'install' will install, in perfexecdir
-ALL_PROGRAMS = $(PROGRAMS) $(SCRIPTS)
-
-# what 'all' will build but not install in perfexecdir
-OTHER_PROGRAMS = $(OUTPUT)perf
-
-# Set paths to tools early so that they can be used for version tests.
-ifndef SHELL_PATH
- SHELL_PATH = /bin/sh
-endif
-ifndef PERL_PATH
- PERL_PATH = /usr/bin/perl
-endif
-
-export PERL_PATH
-
-$(OUTPUT)util/parse-events-flex.c: util/parse-events.l $(OUTPUT)util/parse-events-bison.c
- $(QUIET_FLEX)$(FLEX) --header-file=$(OUTPUT)util/parse-events-flex.h $(PARSER_DEBUG_FLEX) -t util/parse-events.l > $(OUTPUT)util/parse-events-flex.c
-
-$(OUTPUT)util/parse-events-bison.c: util/parse-events.y
- $(QUIET_BISON)$(BISON) -v util/parse-events.y -d $(PARSER_DEBUG_BISON) -o $(OUTPUT)util/parse-events-bison.c -p parse_events_
-
-$(OUTPUT)util/pmu-flex.c: util/pmu.l $(OUTPUT)util/pmu-bison.c
- $(QUIET_FLEX)$(FLEX) --header-file=$(OUTPUT)util/pmu-flex.h -t util/pmu.l > $(OUTPUT)util/pmu-flex.c
-
-$(OUTPUT)util/pmu-bison.c: util/pmu.y
- $(QUIET_BISON)$(BISON) -v util/pmu.y -d -o $(OUTPUT)util/pmu-bison.c -p perf_pmu_
-
-$(OUTPUT)util/parse-events.o: $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-bison.c
-$(OUTPUT)util/pmu.o: $(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-bison.c
-
-LIB_FILE=$(OUTPUT)libperf.a
-
-LIB_H += ../../include/uapi/linux/perf_event.h
-LIB_H += ../../include/linux/rbtree.h
-LIB_H += ../../include/linux/list.h
-LIB_H += ../../include/uapi/linux/const.h
-LIB_H += ../../include/linux/hash.h
-LIB_H += ../../include/linux/stringify.h
-LIB_H += util/include/linux/bitmap.h
-LIB_H += util/include/linux/bitops.h
-LIB_H += util/include/linux/compiler.h
-LIB_H += util/include/linux/const.h
-LIB_H += util/include/linux/ctype.h
-LIB_H += util/include/linux/kernel.h
-LIB_H += util/include/linux/list.h
-LIB_H += util/include/linux/export.h
-LIB_H += util/include/linux/magic.h
-LIB_H += util/include/linux/poison.h
-LIB_H += util/include/linux/prefetch.h
-LIB_H += util/include/linux/rbtree.h
-LIB_H += util/include/linux/rbtree_augmented.h
-LIB_H += util/include/linux/string.h
-LIB_H += util/include/linux/types.h
-LIB_H += util/include/linux/linkage.h
-LIB_H += util/include/asm/asm-offsets.h
-LIB_H += util/include/asm/bug.h
-LIB_H += util/include/asm/byteorder.h
-LIB_H += util/include/asm/hweight.h
-LIB_H += util/include/asm/swab.h
-LIB_H += util/include/asm/system.h
-LIB_H += util/include/asm/uaccess.h
-LIB_H += util/include/dwarf-regs.h
-LIB_H += util/include/asm/dwarf2.h
-LIB_H += util/include/asm/cpufeature.h
-LIB_H += util/include/asm/unistd_32.h
-LIB_H += util/include/asm/unistd_64.h
-LIB_H += perf.h
-LIB_H += util/annotate.h
-LIB_H += util/cache.h
-LIB_H += util/callchain.h
-LIB_H += util/build-id.h
-LIB_H += util/debug.h
-LIB_H += util/sysfs.h
-LIB_H += util/pmu.h
-LIB_H += util/event.h
-LIB_H += util/evsel.h
-LIB_H += util/evlist.h
-LIB_H += util/exec_cmd.h
-LIB_H += util/types.h
-LIB_H += util/levenshtein.h
-LIB_H += util/machine.h
-LIB_H += util/map.h
-LIB_H += util/parse-options.h
-LIB_H += util/parse-events.h
-LIB_H += util/quote.h
-LIB_H += util/util.h
-LIB_H += util/xyarray.h
-LIB_H += util/header.h
-LIB_H += util/help.h
-LIB_H += util/session.h
-LIB_H += util/strbuf.h
-LIB_H += util/strlist.h
-LIB_H += util/strfilter.h
-LIB_H += util/svghelper.h
-LIB_H += util/tool.h
-LIB_H += util/run-command.h
-LIB_H += util/sigchain.h
-LIB_H += util/dso.h
-LIB_H += util/symbol.h
-LIB_H += util/color.h
-LIB_H += util/values.h
-LIB_H += util/sort.h
-LIB_H += util/hist.h
-LIB_H += util/thread.h
-LIB_H += util/thread_map.h
-LIB_H += util/trace-event.h
-LIB_H += util/probe-finder.h
-LIB_H += util/dwarf-aux.h
-LIB_H += util/probe-event.h
-LIB_H += util/pstack.h
-LIB_H += util/cpumap.h
-LIB_H += util/top.h
-LIB_H += $(ARCH_INCLUDE)
-LIB_H += util/cgroup.h
-LIB_H += $(TRACE_EVENT_DIR)event-parse.h
-LIB_H += util/target.h
-LIB_H += util/rblist.h
-LIB_H += util/intlist.h
-LIB_H += util/perf_regs.h
-LIB_H += util/unwind.h
-LIB_H += util/vdso.h
-LIB_H += ui/helpline.h
-LIB_H += ui/progress.h
-LIB_H += ui/util.h
-LIB_H += ui/ui.h
-
-LIB_OBJS += $(OUTPUT)util/abspath.o
-LIB_OBJS += $(OUTPUT)util/alias.o
-LIB_OBJS += $(OUTPUT)util/annotate.o
-LIB_OBJS += $(OUTPUT)util/build-id.o
-LIB_OBJS += $(OUTPUT)util/config.o
-LIB_OBJS += $(OUTPUT)util/ctype.o
-LIB_OBJS += $(OUTPUT)util/sysfs.o
-LIB_OBJS += $(OUTPUT)util/pmu.o
-LIB_OBJS += $(OUTPUT)util/environment.o
-LIB_OBJS += $(OUTPUT)util/event.o
-LIB_OBJS += $(OUTPUT)util/evlist.o
-LIB_OBJS += $(OUTPUT)util/evsel.o
-LIB_OBJS += $(OUTPUT)util/exec_cmd.o
-LIB_OBJS += $(OUTPUT)util/help.o
-LIB_OBJS += $(OUTPUT)util/levenshtein.o
-LIB_OBJS += $(OUTPUT)util/parse-options.o
-LIB_OBJS += $(OUTPUT)util/parse-events.o
-LIB_OBJS += $(OUTPUT)util/path.o
-LIB_OBJS += $(OUTPUT)util/rbtree.o
-LIB_OBJS += $(OUTPUT)util/bitmap.o
-LIB_OBJS += $(OUTPUT)util/hweight.o
-LIB_OBJS += $(OUTPUT)util/run-command.o
-LIB_OBJS += $(OUTPUT)util/quote.o
-LIB_OBJS += $(OUTPUT)util/strbuf.o
-LIB_OBJS += $(OUTPUT)util/string.o
-LIB_OBJS += $(OUTPUT)util/strlist.o
-LIB_OBJS += $(OUTPUT)util/strfilter.o
-LIB_OBJS += $(OUTPUT)util/top.o
-LIB_OBJS += $(OUTPUT)util/usage.o
-LIB_OBJS += $(OUTPUT)util/wrapper.o
-LIB_OBJS += $(OUTPUT)util/sigchain.o
-LIB_OBJS += $(OUTPUT)util/dso.o
-LIB_OBJS += $(OUTPUT)util/symbol.o
-LIB_OBJS += $(OUTPUT)util/symbol-elf.o
-LIB_OBJS += $(OUTPUT)util/color.o
-LIB_OBJS += $(OUTPUT)util/pager.o
-LIB_OBJS += $(OUTPUT)util/header.o
-LIB_OBJS += $(OUTPUT)util/callchain.o
-LIB_OBJS += $(OUTPUT)util/values.o
-LIB_OBJS += $(OUTPUT)util/debug.o
-LIB_OBJS += $(OUTPUT)util/machine.o
-LIB_OBJS += $(OUTPUT)util/map.o
-LIB_OBJS += $(OUTPUT)util/pstack.o
-LIB_OBJS += $(OUTPUT)util/session.o
-LIB_OBJS += $(OUTPUT)util/thread.o
-LIB_OBJS += $(OUTPUT)util/thread_map.o
-LIB_OBJS += $(OUTPUT)util/trace-event-parse.o
-LIB_OBJS += $(OUTPUT)util/parse-events-flex.o
-LIB_OBJS += $(OUTPUT)util/parse-events-bison.o
-LIB_OBJS += $(OUTPUT)util/pmu-flex.o
-LIB_OBJS += $(OUTPUT)util/pmu-bison.o
-LIB_OBJS += $(OUTPUT)util/trace-event-read.o
-LIB_OBJS += $(OUTPUT)util/trace-event-info.o
-LIB_OBJS += $(OUTPUT)util/trace-event-scripting.o
-LIB_OBJS += $(OUTPUT)util/svghelper.o
-LIB_OBJS += $(OUTPUT)util/sort.o
-LIB_OBJS += $(OUTPUT)util/hist.o
-LIB_OBJS += $(OUTPUT)util/probe-event.o
-LIB_OBJS += $(OUTPUT)util/util.o
-LIB_OBJS += $(OUTPUT)util/xyarray.o
-LIB_OBJS += $(OUTPUT)util/cpumap.o
-LIB_OBJS += $(OUTPUT)util/cgroup.o
-LIB_OBJS += $(OUTPUT)util/target.o
-LIB_OBJS += $(OUTPUT)util/rblist.o
-LIB_OBJS += $(OUTPUT)util/intlist.o
-LIB_OBJS += $(OUTPUT)util/vdso.o
-LIB_OBJS += $(OUTPUT)util/stat.o
-
-LIB_OBJS += $(OUTPUT)ui/setup.o
-LIB_OBJS += $(OUTPUT)ui/helpline.o
-LIB_OBJS += $(OUTPUT)ui/progress.o
-LIB_OBJS += $(OUTPUT)ui/util.o
-LIB_OBJS += $(OUTPUT)ui/hist.o
-LIB_OBJS += $(OUTPUT)ui/stdio/hist.o
-
-LIB_OBJS += $(OUTPUT)arch/common.o
-
-LIB_OBJS += $(OUTPUT)tests/parse-events.o
-LIB_OBJS += $(OUTPUT)tests/dso-data.o
-LIB_OBJS += $(OUTPUT)tests/attr.o
-LIB_OBJS += $(OUTPUT)tests/vmlinux-kallsyms.o
-LIB_OBJS += $(OUTPUT)tests/open-syscall.o
-LIB_OBJS += $(OUTPUT)tests/open-syscall-all-cpus.o
-LIB_OBJS += $(OUTPUT)tests/open-syscall-tp-fields.o
-LIB_OBJS += $(OUTPUT)tests/mmap-basic.o
-LIB_OBJS += $(OUTPUT)tests/perf-record.o
-LIB_OBJS += $(OUTPUT)tests/rdpmc.o
-LIB_OBJS += $(OUTPUT)tests/evsel-roundtrip-name.o
-LIB_OBJS += $(OUTPUT)tests/evsel-tp-sched.o
-LIB_OBJS += $(OUTPUT)tests/pmu.o
-LIB_OBJS += $(OUTPUT)tests/hists_link.o
-LIB_OBJS += $(OUTPUT)tests/python-use.o
-LIB_OBJS += $(OUTPUT)tests/bp_signal.o
-LIB_OBJS += $(OUTPUT)tests/bp_signal_overflow.o
-LIB_OBJS += $(OUTPUT)tests/task-exit.o
-LIB_OBJS += $(OUTPUT)tests/sw-clock.o
-
-BUILTIN_OBJS += $(OUTPUT)builtin-annotate.o
-BUILTIN_OBJS += $(OUTPUT)builtin-bench.o
-# Benchmark modules
-BUILTIN_OBJS += $(OUTPUT)bench/sched-messaging.o
-BUILTIN_OBJS += $(OUTPUT)bench/sched-pipe.o
-ifeq ($(RAW_ARCH),x86_64)
-BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy-x86-64-asm.o
-BUILTIN_OBJS += $(OUTPUT)bench/mem-memset-x86-64-asm.o
-endif
-BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o
-BUILTIN_OBJS += $(OUTPUT)bench/mem-memset.o
-
-BUILTIN_OBJS += $(OUTPUT)builtin-diff.o
-BUILTIN_OBJS += $(OUTPUT)builtin-evlist.o
-BUILTIN_OBJS += $(OUTPUT)builtin-help.o
-BUILTIN_OBJS += $(OUTPUT)builtin-sched.o
-BUILTIN_OBJS += $(OUTPUT)builtin-buildid-list.o
-BUILTIN_OBJS += $(OUTPUT)builtin-buildid-cache.o
-BUILTIN_OBJS += $(OUTPUT)builtin-list.o
-BUILTIN_OBJS += $(OUTPUT)builtin-record.o
-BUILTIN_OBJS += $(OUTPUT)builtin-report.o
-BUILTIN_OBJS += $(OUTPUT)builtin-stat.o
-BUILTIN_OBJS += $(OUTPUT)builtin-timechart.o
-BUILTIN_OBJS += $(OUTPUT)builtin-top.o
-BUILTIN_OBJS += $(OUTPUT)builtin-script.o
-BUILTIN_OBJS += $(OUTPUT)builtin-probe.o
-BUILTIN_OBJS += $(OUTPUT)builtin-kmem.o
-BUILTIN_OBJS += $(OUTPUT)builtin-lock.o
-BUILTIN_OBJS += $(OUTPUT)builtin-kvm.o
-BUILTIN_OBJS += $(OUTPUT)builtin-inject.o
-BUILTIN_OBJS += $(OUTPUT)tests/builtin-test.o
-BUILTIN_OBJS += $(OUTPUT)builtin-mem.o
-
-PERFLIBS = $(LIB_FILE) $(LIBLK) $(LIBTRACEEVENT)
-
-# We choose to avoid "if .. else if .. else .. endif endif"
-# because maintaining the nesting to match is a pain. If
-# we had "elif" things would have been much nicer...
-
--include arch/$(ARCH)/Makefile
-
-ifneq ($(OUTPUT),)
- CFLAGS += -I$(OUTPUT)
-endif
-
-ifdef NO_LIBELF
-EXTLIBS := $(filter-out -lelf,$(EXTLIBS))
-
-# Remove ELF/DWARF dependent codes
-LIB_OBJS := $(filter-out $(OUTPUT)util/symbol-elf.o,$(LIB_OBJS))
-LIB_OBJS := $(filter-out $(OUTPUT)util/dwarf-aux.o,$(LIB_OBJS))
-LIB_OBJS := $(filter-out $(OUTPUT)util/probe-event.o,$(LIB_OBJS))
-LIB_OBJS := $(filter-out $(OUTPUT)util/probe-finder.o,$(LIB_OBJS))
-
-BUILTIN_OBJS := $(filter-out $(OUTPUT)builtin-probe.o,$(BUILTIN_OBJS))
-
-# Use minimal symbol handling
-LIB_OBJS += $(OUTPUT)util/symbol-minimal.o
-
-else # NO_LIBELF
-ifndef NO_DWARF
- LIB_OBJS += $(OUTPUT)util/probe-finder.o
- LIB_OBJS += $(OUTPUT)util/dwarf-aux.o
-endif # NO_DWARF
-endif # NO_LIBELF
-
-ifndef NO_LIBUNWIND
- LIB_OBJS += $(OUTPUT)util/unwind.o
-endif
-
-ifndef NO_LIBAUDIT
- BUILTIN_OBJS += $(OUTPUT)builtin-trace.o
-endif
-
-ifndef NO_SLANG
- LIB_OBJS += $(OUTPUT)ui/browser.o
- LIB_OBJS += $(OUTPUT)ui/browsers/annotate.o
- LIB_OBJS += $(OUTPUT)ui/browsers/hists.o
- LIB_OBJS += $(OUTPUT)ui/browsers/map.o
- LIB_OBJS += $(OUTPUT)ui/browsers/scripts.o
- LIB_OBJS += $(OUTPUT)ui/tui/setup.o
- LIB_OBJS += $(OUTPUT)ui/tui/util.o
- LIB_OBJS += $(OUTPUT)ui/tui/helpline.o
- LIB_OBJS += $(OUTPUT)ui/tui/progress.o
- LIB_H += ui/browser.h
- LIB_H += ui/browsers/map.h
- LIB_H += ui/keysyms.h
- LIB_H += ui/libslang.h
-endif
-
-ifndef NO_GTK2
- LIB_OBJS += $(OUTPUT)ui/gtk/browser.o
- LIB_OBJS += $(OUTPUT)ui/gtk/hists.o
- LIB_OBJS += $(OUTPUT)ui/gtk/setup.o
- LIB_OBJS += $(OUTPUT)ui/gtk/util.o
- LIB_OBJS += $(OUTPUT)ui/gtk/helpline.o
- LIB_OBJS += $(OUTPUT)ui/gtk/progress.o
- LIB_OBJS += $(OUTPUT)ui/gtk/annotate.o
-endif
-
-ifndef NO_LIBPERL
- LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-perl.o
- LIB_OBJS += $(OUTPUT)scripts/perl/Perf-Trace-Util/Context.o
-endif
-
-ifndef NO_LIBPYTHON
- LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-python.o
- LIB_OBJS += $(OUTPUT)scripts/python/Perf-Trace-Util/Context.o
-endif
-
-ifeq ($(NO_PERF_REGS),0)
- ifeq ($(ARCH),x86)
- LIB_H += arch/x86/include/perf_regs.h
- endif
-endif
-
-ifndef NO_LIBNUMA
- BUILTIN_OBJS += $(OUTPUT)bench/numa.o
-endif
-
-ifdef ASCIIDOC8
- export ASCIIDOC8
-endif
-
-LIBS = -Wl,--whole-archive $(PERFLIBS) -Wl,--no-whole-archive -Wl,--start-group $(EXTLIBS) -Wl,--end-group
-
-export INSTALL SHELL_PATH
-
-### Build rules
-
-SHELL = $(SHELL_PATH)
-
-all: shell_compatibility_test $(ALL_PROGRAMS) $(LANG_BINDINGS) $(OTHER_PROGRAMS)
-
-please_set_SHELL_PATH_to_a_more_modern_shell:
- @$$(:)
-
-shell_compatibility_test: please_set_SHELL_PATH_to_a_more_modern_shell
-
-strip: $(PROGRAMS) $(OUTPUT)perf
- $(STRIP) $(STRIP_OPTS) $(PROGRAMS) $(OUTPUT)perf
-
-$(OUTPUT)perf.o: perf.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS
- $(QUIET_CC)$(CC) -include $(OUTPUT)PERF-VERSION-FILE \
- '-DPERF_HTML_PATH="$(htmldir_SQ)"' \
- $(CFLAGS) -c $(filter %.c,$^) -o $@
-
-$(OUTPUT)perf: $(OUTPUT)perf.o $(BUILTIN_OBJS) $(PERFLIBS)
- $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(OUTPUT)perf.o \
- $(BUILTIN_OBJS) $(LIBS) -o $@
-
-$(OUTPUT)builtin-help.o: builtin-help.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS
- $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \
- '-DPERF_HTML_PATH="$(htmldir_SQ)"' \
- '-DPERF_MAN_PATH="$(mandir_SQ)"' \
- '-DPERF_INFO_PATH="$(infodir_SQ)"' $<
-
-$(OUTPUT)builtin-timechart.o: builtin-timechart.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS
- $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \
- '-DPERF_HTML_PATH="$(htmldir_SQ)"' \
- '-DPERF_MAN_PATH="$(mandir_SQ)"' \
- '-DPERF_INFO_PATH="$(infodir_SQ)"' $<
-
-$(OUTPUT)common-cmds.h: util/generate-cmdlist.sh command-list.txt
-
-$(OUTPUT)common-cmds.h: $(wildcard Documentation/perf-*.txt)
- $(QUIET_GEN). util/generate-cmdlist.sh > $@+ && mv $@+ $@
-
-$(SCRIPTS) : % : %.sh
- $(QUIET_GEN)$(INSTALL) '$@.sh' '$(OUTPUT)$@'
-
-# These can record PERF_VERSION
-$(OUTPUT)perf.o perf.spec \
- $(SCRIPTS) \
- : $(OUTPUT)PERF-VERSION-FILE
-
-.SUFFIXES:
-.SUFFIXES: .o .c .S .s
-
-# These two need to be here so that when O= is not used they take precedence
-# over the general rule for .o
-
-$(OUTPUT)util/%-flex.o: $(OUTPUT)util/%-flex.c $(OUTPUT)PERF-CFLAGS
- $(QUIET_CC)$(CC) -o $@ -c -Iutil/ $(CFLAGS) -w $<
-
-$(OUTPUT)util/%-bison.o: $(OUTPUT)util/%-bison.c $(OUTPUT)PERF-CFLAGS
- $(QUIET_CC)$(CC) -o $@ -c -Iutil/ $(CFLAGS) -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w $<
-
-$(OUTPUT)%.o: %.c $(OUTPUT)PERF-CFLAGS
- $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $<
-$(OUTPUT)%.i: %.c $(OUTPUT)PERF-CFLAGS
- $(QUIET_CC)$(CC) -o $@ -E $(CFLAGS) $<
-$(OUTPUT)%.s: %.c $(OUTPUT)PERF-CFLAGS
- $(QUIET_CC)$(CC) -o $@ -S $(CFLAGS) $<
-$(OUTPUT)%.o: %.S
- $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $<
-$(OUTPUT)%.s: %.S
- $(QUIET_CC)$(CC) -o $@ -E $(CFLAGS) $<
-
-$(OUTPUT)util/exec_cmd.o: util/exec_cmd.c $(OUTPUT)PERF-CFLAGS
- $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \
- '-DPERF_EXEC_PATH="$(perfexecdir_SQ)"' \
- '-DPREFIX="$(prefix_SQ)"' \
- $<
-
-$(OUTPUT)tests/attr.o: tests/attr.c $(OUTPUT)PERF-CFLAGS
- $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \
- '-DBINDIR="$(bindir_SQ)"' -DPYTHON='"$(PYTHON_WORD)"' \
- $<
-
-$(OUTPUT)tests/python-use.o: tests/python-use.c $(OUTPUT)PERF-CFLAGS
- $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \
- -DPYTHONPATH='"$(OUTPUT)python"' \
- -DPYTHON='"$(PYTHON_WORD)"' \
- $<
-
-$(OUTPUT)util/config.o: util/config.c $(OUTPUT)PERF-CFLAGS
- $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $<
-
-$(OUTPUT)ui/browser.o: ui/browser.c $(OUTPUT)PERF-CFLAGS
- $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $<
+clean:
+ $(make)
-$(OUTPUT)ui/browsers/annotate.o: ui/browsers/annotate.c $(OUTPUT)PERF-CFLAGS
- $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $<
-
-$(OUTPUT)ui/browsers/hists.o: ui/browsers/hists.c $(OUTPUT)PERF-CFLAGS
- $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $<
-
-$(OUTPUT)ui/browsers/map.o: ui/browsers/map.c $(OUTPUT)PERF-CFLAGS
- $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $<
-
-$(OUTPUT)ui/browsers/scripts.o: ui/browsers/scripts.c $(OUTPUT)PERF-CFLAGS
- $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $<
-
-$(OUTPUT)util/rbtree.o: ../../lib/rbtree.c $(OUTPUT)PERF-CFLAGS
- $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -Wno-unused-parameter -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $<
-
-$(OUTPUT)util/parse-events.o: util/parse-events.c $(OUTPUT)PERF-CFLAGS
- $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -Wno-redundant-decls $<
-
-$(OUTPUT)util/scripting-engines/trace-event-perl.o: util/scripting-engines/trace-event-perl.c $(OUTPUT)PERF-CFLAGS
- $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow $<
-
-$(OUTPUT)scripts/perl/Perf-Trace-Util/Context.o: scripts/perl/Perf-Trace-Util/Context.c $(OUTPUT)PERF-CFLAGS
- $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs $<
-
-$(OUTPUT)util/scripting-engines/trace-event-python.o: util/scripting-engines/trace-event-python.c $(OUTPUT)PERF-CFLAGS
- $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow $<
-
-$(OUTPUT)scripts/python/Perf-Trace-Util/Context.o: scripts/python/Perf-Trace-Util/Context.c $(OUTPUT)PERF-CFLAGS
- $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs $<
-
-$(OUTPUT)perf-%: %.o $(PERFLIBS)
- $(QUIET_LINK)$(CC) $(CFLAGS) -o $@ $(LDFLAGS) $(filter %.o,$^) $(LIBS)
-
-$(LIB_OBJS) $(BUILTIN_OBJS): $(LIB_H)
-$(patsubst perf-%,%.o,$(PROGRAMS)): $(LIB_H) $(wildcard */*.h)
-
-# we compile into subdirectories. if the target directory is not the source directory, they might not exists. So
-# we depend the various files onto their directories.
-DIRECTORY_DEPS = $(LIB_OBJS) $(BUILTIN_OBJS) $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h
-$(DIRECTORY_DEPS): | $(sort $(dir $(DIRECTORY_DEPS)))
-# In the second step, we make a rule to actually create these directories
-$(sort $(dir $(DIRECTORY_DEPS))):
- $(QUIET_MKDIR)$(MKDIR) -p $@ 2>/dev/null
-
-$(LIB_FILE): $(LIB_OBJS)
- $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIB_OBJS)
-
-# libtraceevent.a
-$(LIBTRACEEVENT):
- $(QUIET_SUBDIR0)$(TRACE_EVENT_DIR) $(QUIET_SUBDIR1) O=$(OUTPUT) libtraceevent.a
-
-$(LIBTRACEEVENT)-clean:
- $(QUIET_SUBDIR0)$(TRACE_EVENT_DIR) $(QUIET_SUBDIR1) O=$(OUTPUT) clean
-
-# if subdir is set, we've been called from above so target has been built
-# already
-$(LIBLK):
-ifeq ($(subdir),)
- $(QUIET_SUBDIR0)$(LK_DIR) $(QUIET_SUBDIR1) O=$(OUTPUT) liblk.a
-endif
-
-$(LIBLK)-clean:
-ifeq ($(subdir),)
- $(QUIET_SUBDIR0)$(LK_DIR) $(QUIET_SUBDIR1) O=$(OUTPUT) clean
-endif
-
-help:
- @echo 'Perf make targets:'
- @echo ' doc - make *all* documentation (see below)'
- @echo ' man - make manpage documentation (access with man <foo>)'
- @echo ' html - make html documentation'
- @echo ' info - make GNU info documentation (access with info <foo>)'
- @echo ' pdf - make pdf documentation'
- @echo ' TAGS - use etags to make tag information for source browsing'
- @echo ' tags - use ctags to make tag information for source browsing'
- @echo ' cscope - use cscope to make interactive browsing database'
- @echo ''
- @echo 'Perf install targets:'
- @echo ' NOTE: documentation build requires asciidoc, xmlto packages to be installed'
- @echo ' HINT: use "make prefix=<path> <install target>" to install to a particular'
- @echo ' path like make prefix=/usr/local install install-doc'
- @echo ' install - install compiled binaries'
- @echo ' install-doc - install *all* documentation'
- @echo ' install-man - install manpage documentation'
- @echo ' install-html - install html documentation'
- @echo ' install-info - install GNU info documentation'
- @echo ' install-pdf - install pdf documentation'
- @echo ''
- @echo ' quick-install-doc - alias for quick-install-man'
- @echo ' quick-install-man - install the documentation quickly'
- @echo ' quick-install-html - install the html documentation quickly'
- @echo ''
- @echo 'Perf maintainer targets:'
- @echo ' clean - clean all binary objects and build output'
-
-
-DOC_TARGETS := doc man html info pdf
-
-INSTALL_DOC_TARGETS := $(patsubst %,install-%,$(DOC_TARGETS)) try-install-man
-INSTALL_DOC_TARGETS += quick-install-doc quick-install-man quick-install-html
-
-# 'make doc' should call 'make -C Documentation all'
-$(DOC_TARGETS):
- $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) $(@:doc=all)
-
-TAGS:
- $(RM) TAGS
- $(FIND) . -name '*.[hcS]' -print | xargs etags -a
-
-tags:
- $(RM) tags
- $(FIND) . -name '*.[hcS]' -print | xargs ctags -a
-
-cscope:
- $(RM) cscope*
- $(FIND) . -name '*.[hcS]' -print | xargs cscope -b
-
-### Detect prefix changes
-TRACK_CFLAGS = $(subst ','\'',$(CFLAGS)):\
- $(bindir_SQ):$(perfexecdir_SQ):$(template_dir_SQ):$(prefix_SQ)
-
-$(OUTPUT)PERF-CFLAGS: .FORCE-PERF-CFLAGS
- @FLAGS='$(TRACK_CFLAGS)'; \
- if test x"$$FLAGS" != x"`cat $(OUTPUT)PERF-CFLAGS 2>/dev/null`" ; then \
- echo 1>&2 " * new build flags or prefix"; \
- echo "$$FLAGS" >$(OUTPUT)PERF-CFLAGS; \
- fi
-
-### Testing rules
-
-# GNU make supports exporting all variables by "export" without parameters.
-# However, the environment gets quite big, and some programs have problems
-# with that.
-
-check: $(OUTPUT)common-cmds.h
- if sparse; \
- then \
- for i in *.c */*.c; \
- do \
- sparse $(CFLAGS) $(SPARSE_FLAGS) $$i || exit; \
- done; \
- else \
- exit 1; \
- fi
-
-### Installation rules
-
-install-bin: all
- $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)'
- $(INSTALL) $(OUTPUT)perf '$(DESTDIR_SQ)$(bindir_SQ)'
- $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'
- $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin'
- $(INSTALL) $(OUTPUT)perf-archive -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
- $(INSTALL) scripts/perl/Perf-Trace-Util/lib/Perf/Trace/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'
- $(INSTALL) scripts/perl/*.pl -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl'
- $(INSTALL) scripts/perl/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin'
- $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/Perf-Trace-Util/lib/Perf/Trace'
- $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin'
- $(INSTALL) scripts/python/Perf-Trace-Util/lib/Perf/Trace/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/Perf-Trace-Util/lib/Perf/Trace'
- $(INSTALL) scripts/python/*.py -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python'
- $(INSTALL) scripts/python/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin'
- $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d'
- $(INSTALL) bash_completion '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d/perf'
- $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'
- $(INSTALL) tests/attr.py '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'
- $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'
- $(INSTALL) tests/attr/* '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'
-
-install: install-bin try-install-man
-
-install-python_ext:
- $(PYTHON_WORD) util/setup.py --quiet install --root='/$(DESTDIR_SQ)'
-
-# 'make install-doc' should call 'make -C Documentation install'
-$(INSTALL_DOC_TARGETS):
- $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) $(@:-doc=)
-
-### Cleaning rules
+#
+# The build-test target is not really parallel, don't print the jobs info:
+#
+build-test:
+ @$(MAKE) -f tests/make --no-print-directory
-clean: $(LIBTRACEEVENT)-clean $(LIBLK)-clean
- $(RM) $(LIB_OBJS) $(BUILTIN_OBJS) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf.o $(LANG_BINDINGS)
- $(RM) $(ALL_PROGRAMS) perf
- $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope*
- $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean
- $(RM) $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)PERF-CFLAGS
- $(RM) $(OUTPUT)util/*-bison*
- $(RM) $(OUTPUT)util/*-flex*
- $(python-clean)
+#
+# All other targets get passed through:
+#
+%:
+ $(print_msg)
+ $(make)
-.PHONY: all install clean strip $(LIBTRACEEVENT) $(LIBLK)
-.PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell
-.PHONY: .FORCE-PERF-VERSION-FILE TAGS tags cscope .FORCE-PERF-CFLAGS
+.PHONY: tags TAGS
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
new file mode 100644
index 00000000000..9670a16fa57
--- /dev/null
+++ b/tools/perf/Makefile.perf
@@ -0,0 +1,938 @@
+include ../scripts/Makefile.include
+
+# The default target of this Makefile is...
+all:
+
+include config/utilities.mak
+
+# Define V to have a more verbose compile.
+#
+# Define VF to have a more verbose feature check output.
+#
+# Define O to save output files in a separate directory.
+#
+# Define ARCH as name of target architecture if you want cross-builds.
+#
+# Define CROSS_COMPILE as prefix name of compiler if you want cross-builds.
+#
+# Define NO_LIBPERL to disable perl script extension.
+#
+# Define NO_LIBPYTHON to disable python script extension.
+#
+# Define PYTHON to point to the python binary if the default
+# `python' is not correct; for example: PYTHON=python2
+#
+# Define PYTHON_CONFIG to point to the python-config binary if
+# the default `$(PYTHON)-config' is not correct.
+#
+# Define ASCIIDOC8 if you want to format documentation with AsciiDoc 8
+#
+# Define DOCBOOK_XSL_172 if you want to format man pages with DocBook XSL v1.72.
+#
+# Define LDFLAGS=-static to build a static binary.
+#
+# Define EXTRA_CFLAGS=-m64 or EXTRA_CFLAGS=-m32 as appropriate for cross-builds.
+#
+# Define NO_DWARF if you do not want debug-info analysis feature at all.
+#
+# Define WERROR=0 to disable treating any warnings as errors.
+#
+# Define NO_NEWT if you do not want TUI support. (deprecated)
+#
+# Define NO_SLANG if you do not want TUI support.
+#
+# Define NO_GTK2 if you do not want GTK+ GUI support.
+#
+# Define NO_DEMANGLE if you do not want C++ symbol demangling.
+#
+# Define NO_LIBELF if you do not want libelf dependency (e.g. cross-builds)
+#
+# Define NO_LIBUNWIND if you do not want libunwind dependency for dwarf
+# backtrace post unwind.
+#
+# Define NO_BACKTRACE if you do not want stack backtrace debug feature
+#
+# Define NO_LIBNUMA if you do not want numa perf benchmark
+#
+# Define NO_LIBAUDIT if you do not want libaudit support
+#
+# Define NO_LIBBIONIC if you do not want bionic support
+#
+# Define NO_LIBDW_DWARF_UNWIND if you do not want libdw support
+# for dwarf backtrace post unwind.
+
+ifeq ($(srctree),)
+srctree := $(patsubst %/,%,$(dir $(shell pwd)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+#$(info Determined 'srctree' to be $(srctree))
+endif
+
+ifneq ($(objtree),)
+#$(info Determined 'objtree' to be $(objtree))
+endif
+
+ifneq ($(OUTPUT),)
+#$(info Determined 'OUTPUT' to be $(OUTPUT))
+endif
+
+$(OUTPUT)PERF-VERSION-FILE: ../../.git/HEAD
+ @$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT)
+ @touch $(OUTPUT)PERF-VERSION-FILE
+
+CC = $(CROSS_COMPILE)gcc
+AR = $(CROSS_COMPILE)ar
+PKG_CONFIG = $(CROSS_COMPILE)pkg-config
+
+RM = rm -f
+LN = ln -f
+MKDIR = mkdir
+FIND = find
+INSTALL = install
+FLEX = flex
+BISON = bison
+STRIP = strip
+
+LIB_DIR = $(srctree)/tools/lib/api/
+TRACE_EVENT_DIR = $(srctree)/tools/lib/traceevent/
+
+# include config/Makefile by default and rule out
+# non-config cases
+config := 1
+
+NON_CONFIG_TARGETS := clean TAGS tags cscope help
+
+ifdef MAKECMDGOALS
+ifeq ($(filter-out $(NON_CONFIG_TARGETS),$(MAKECMDGOALS)),)
+ config := 0
+endif
+endif
+
+ifeq ($(config),1)
+include config/Makefile
+endif
+
+export prefix bindir sharedir sysconfdir DESTDIR
+
+# sparse is architecture-neutral, which means that we need to tell it
+# explicitly what architecture to check for. Fix this up for yours..
+SPARSE_FLAGS = -D__BIG_ENDIAN__ -D__powerpc__
+
+# Guard against environment variables
+BUILTIN_OBJS =
+LIB_H =
+LIB_OBJS =
+GTK_OBJS =
+PYRF_OBJS =
+SCRIPT_SH =
+
+SCRIPT_SH += perf-archive.sh
+
+grep-libs = $(filter -l%,$(1))
+strip-libs = $(filter-out -l%,$(1))
+
+ifneq ($(OUTPUT),)
+ TE_PATH=$(OUTPUT)
+ifneq ($(subdir),)
+ LIB_PATH=$(OUTPUT)/../lib/api/
+else
+ LIB_PATH=$(OUTPUT)
+endif
+else
+ TE_PATH=$(TRACE_EVENT_DIR)
+ LIB_PATH=$(LIB_DIR)
+endif
+
+LIBTRACEEVENT = $(TE_PATH)libtraceevent.a
+export LIBTRACEEVENT
+
+LIBAPIKFS = $(LIB_PATH)libapikfs.a
+export LIBAPIKFS
+
+# python extension build directories
+PYTHON_EXTBUILD := $(OUTPUT)python_ext_build/
+PYTHON_EXTBUILD_LIB := $(PYTHON_EXTBUILD)lib/
+PYTHON_EXTBUILD_TMP := $(PYTHON_EXTBUILD)tmp/
+export PYTHON_EXTBUILD_LIB PYTHON_EXTBUILD_TMP
+
+python-clean := $(call QUIET_CLEAN, python) $(RM) -r $(PYTHON_EXTBUILD) $(OUTPUT)python/perf.so
+
+PYTHON_EXT_SRCS := $(shell grep -v ^\# util/python-ext-sources)
+PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py $(LIBTRACEEVENT) $(LIBAPIKFS)
+
+$(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS)
+ $(QUIET_GEN)CFLAGS='$(CFLAGS)' $(PYTHON_WORD) util/setup.py \
+ --quiet build_ext; \
+ mkdir -p $(OUTPUT)python && \
+ cp $(PYTHON_EXTBUILD_LIB)perf.so $(OUTPUT)python/
+#
+# No Perl scripts right now:
+#
+
+SCRIPTS = $(patsubst %.sh,%,$(SCRIPT_SH))
+
+#
+# Single 'perf' binary right now:
+#
+PROGRAMS += $(OUTPUT)perf
+
+# what 'all' will build and 'install' will install, in perfexecdir
+ALL_PROGRAMS = $(PROGRAMS) $(SCRIPTS)
+
+# what 'all' will build but not install in perfexecdir
+OTHER_PROGRAMS = $(OUTPUT)perf
+
+# Set paths to tools early so that they can be used for version tests.
+ifndef SHELL_PATH
+ SHELL_PATH = /bin/sh
+endif
+ifndef PERL_PATH
+ PERL_PATH = /usr/bin/perl
+endif
+
+export PERL_PATH
+
+$(OUTPUT)util/parse-events-flex.c: util/parse-events.l $(OUTPUT)util/parse-events-bison.c
+ $(QUIET_FLEX)$(FLEX) -o $@ --header-file=$(OUTPUT)util/parse-events-flex.h $(PARSER_DEBUG_FLEX) util/parse-events.l
+
+$(OUTPUT)util/parse-events-bison.c: util/parse-events.y
+ $(QUIET_BISON)$(BISON) -v util/parse-events.y -d $(PARSER_DEBUG_BISON) -o $(OUTPUT)util/parse-events-bison.c -p parse_events_
+
+$(OUTPUT)util/pmu-flex.c: util/pmu.l $(OUTPUT)util/pmu-bison.c
+ $(QUIET_FLEX)$(FLEX) -o $@ --header-file=$(OUTPUT)util/pmu-flex.h util/pmu.l
+
+$(OUTPUT)util/pmu-bison.c: util/pmu.y
+ $(QUIET_BISON)$(BISON) -v util/pmu.y -d -o $(OUTPUT)util/pmu-bison.c -p perf_pmu_
+
+$(OUTPUT)util/parse-events.o: $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-bison.c
+$(OUTPUT)util/pmu.o: $(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-bison.c
+
+LIB_FILE=$(OUTPUT)libperf.a
+
+LIB_H += ../lib/symbol/kallsyms.h
+LIB_H += ../../include/uapi/linux/perf_event.h
+LIB_H += ../../include/linux/rbtree.h
+LIB_H += ../../include/linux/list.h
+LIB_H += ../../include/uapi/linux/const.h
+LIB_H += ../include/linux/hash.h
+LIB_H += ../../include/linux/stringify.h
+LIB_H += util/include/linux/bitmap.h
+LIB_H += util/include/linux/bitops.h
+LIB_H += ../include/linux/compiler.h
+LIB_H += util/include/linux/const.h
+LIB_H += util/include/linux/ctype.h
+LIB_H += util/include/linux/kernel.h
+LIB_H += util/include/linux/list.h
+LIB_H += ../include/linux/export.h
+LIB_H += util/include/linux/poison.h
+LIB_H += util/include/linux/rbtree.h
+LIB_H += util/include/linux/rbtree_augmented.h
+LIB_H += util/include/linux/string.h
+LIB_H += ../include/linux/types.h
+LIB_H += util/include/linux/linkage.h
+LIB_H += util/include/asm/asm-offsets.h
+LIB_H += ../include/asm/bug.h
+LIB_H += util/include/asm/byteorder.h
+LIB_H += util/include/asm/hweight.h
+LIB_H += util/include/asm/swab.h
+LIB_H += util/include/asm/system.h
+LIB_H += util/include/asm/uaccess.h
+LIB_H += util/include/dwarf-regs.h
+LIB_H += util/include/asm/dwarf2.h
+LIB_H += util/include/asm/cpufeature.h
+LIB_H += util/include/asm/unistd_32.h
+LIB_H += util/include/asm/unistd_64.h
+LIB_H += perf.h
+LIB_H += util/annotate.h
+LIB_H += util/cache.h
+LIB_H += util/callchain.h
+LIB_H += util/build-id.h
+LIB_H += util/debug.h
+LIB_H += util/pmu.h
+LIB_H += util/event.h
+LIB_H += util/evsel.h
+LIB_H += util/evlist.h
+LIB_H += util/exec_cmd.h
+LIB_H += util/levenshtein.h
+LIB_H += util/machine.h
+LIB_H += util/map.h
+LIB_H += util/parse-options.h
+LIB_H += util/parse-events.h
+LIB_H += util/quote.h
+LIB_H += util/util.h
+LIB_H += util/xyarray.h
+LIB_H += util/header.h
+LIB_H += util/help.h
+LIB_H += util/session.h
+LIB_H += util/strbuf.h
+LIB_H += util/strlist.h
+LIB_H += util/strfilter.h
+LIB_H += util/svghelper.h
+LIB_H += util/tool.h
+LIB_H += util/run-command.h
+LIB_H += util/sigchain.h
+LIB_H += util/dso.h
+LIB_H += util/symbol.h
+LIB_H += util/color.h
+LIB_H += util/values.h
+LIB_H += util/sort.h
+LIB_H += util/hist.h
+LIB_H += util/comm.h
+LIB_H += util/thread.h
+LIB_H += util/thread_map.h
+LIB_H += util/trace-event.h
+LIB_H += util/probe-finder.h
+LIB_H += util/dwarf-aux.h
+LIB_H += util/probe-event.h
+LIB_H += util/pstack.h
+LIB_H += util/cpumap.h
+LIB_H += util/top.h
+LIB_H += $(ARCH_INCLUDE)
+LIB_H += util/cgroup.h
+LIB_H += $(LIB_INCLUDE)traceevent/event-parse.h
+LIB_H += util/target.h
+LIB_H += util/rblist.h
+LIB_H += util/intlist.h
+LIB_H += util/perf_regs.h
+LIB_H += util/unwind.h
+LIB_H += util/vdso.h
+LIB_H += ui/helpline.h
+LIB_H += ui/progress.h
+LIB_H += ui/util.h
+LIB_H += ui/ui.h
+LIB_H += util/data.h
+
+LIB_OBJS += $(OUTPUT)util/abspath.o
+LIB_OBJS += $(OUTPUT)util/alias.o
+LIB_OBJS += $(OUTPUT)util/annotate.o
+LIB_OBJS += $(OUTPUT)util/build-id.o
+LIB_OBJS += $(OUTPUT)util/config.o
+LIB_OBJS += $(OUTPUT)util/ctype.o
+LIB_OBJS += $(OUTPUT)util/pmu.o
+LIB_OBJS += $(OUTPUT)util/environment.o
+LIB_OBJS += $(OUTPUT)util/event.o
+LIB_OBJS += $(OUTPUT)util/evlist.o
+LIB_OBJS += $(OUTPUT)util/evsel.o
+LIB_OBJS += $(OUTPUT)util/exec_cmd.o
+LIB_OBJS += $(OUTPUT)util/help.o
+LIB_OBJS += $(OUTPUT)util/kallsyms.o
+LIB_OBJS += $(OUTPUT)util/levenshtein.o
+LIB_OBJS += $(OUTPUT)util/parse-options.o
+LIB_OBJS += $(OUTPUT)util/parse-events.o
+LIB_OBJS += $(OUTPUT)util/path.o
+LIB_OBJS += $(OUTPUT)util/rbtree.o
+LIB_OBJS += $(OUTPUT)util/bitmap.o
+LIB_OBJS += $(OUTPUT)util/hweight.o
+LIB_OBJS += $(OUTPUT)util/run-command.o
+LIB_OBJS += $(OUTPUT)util/quote.o
+LIB_OBJS += $(OUTPUT)util/strbuf.o
+LIB_OBJS += $(OUTPUT)util/string.o
+LIB_OBJS += $(OUTPUT)util/strlist.o
+LIB_OBJS += $(OUTPUT)util/strfilter.o
+LIB_OBJS += $(OUTPUT)util/top.o
+LIB_OBJS += $(OUTPUT)util/usage.o
+LIB_OBJS += $(OUTPUT)util/wrapper.o
+LIB_OBJS += $(OUTPUT)util/sigchain.o
+LIB_OBJS += $(OUTPUT)util/dso.o
+LIB_OBJS += $(OUTPUT)util/symbol.o
+LIB_OBJS += $(OUTPUT)util/symbol-elf.o
+LIB_OBJS += $(OUTPUT)util/color.o
+LIB_OBJS += $(OUTPUT)util/pager.o
+LIB_OBJS += $(OUTPUT)util/header.o
+LIB_OBJS += $(OUTPUT)util/callchain.o
+LIB_OBJS += $(OUTPUT)util/values.o
+LIB_OBJS += $(OUTPUT)util/debug.o
+LIB_OBJS += $(OUTPUT)util/machine.o
+LIB_OBJS += $(OUTPUT)util/map.o
+LIB_OBJS += $(OUTPUT)util/pstack.o
+LIB_OBJS += $(OUTPUT)util/session.o
+LIB_OBJS += $(OUTPUT)util/comm.o
+LIB_OBJS += $(OUTPUT)util/thread.o
+LIB_OBJS += $(OUTPUT)util/thread_map.o
+LIB_OBJS += $(OUTPUT)util/trace-event-parse.o
+LIB_OBJS += $(OUTPUT)util/parse-events-flex.o
+LIB_OBJS += $(OUTPUT)util/parse-events-bison.o
+LIB_OBJS += $(OUTPUT)util/pmu-flex.o
+LIB_OBJS += $(OUTPUT)util/pmu-bison.o
+LIB_OBJS += $(OUTPUT)util/trace-event-read.o
+LIB_OBJS += $(OUTPUT)util/trace-event-info.o
+LIB_OBJS += $(OUTPUT)util/trace-event-scripting.o
+LIB_OBJS += $(OUTPUT)util/trace-event.o
+LIB_OBJS += $(OUTPUT)util/svghelper.o
+LIB_OBJS += $(OUTPUT)util/sort.o
+LIB_OBJS += $(OUTPUT)util/hist.o
+LIB_OBJS += $(OUTPUT)util/probe-event.o
+LIB_OBJS += $(OUTPUT)util/util.o
+LIB_OBJS += $(OUTPUT)util/xyarray.o
+LIB_OBJS += $(OUTPUT)util/cpumap.o
+LIB_OBJS += $(OUTPUT)util/cgroup.o
+LIB_OBJS += $(OUTPUT)util/target.o
+LIB_OBJS += $(OUTPUT)util/rblist.o
+LIB_OBJS += $(OUTPUT)util/intlist.o
+LIB_OBJS += $(OUTPUT)util/vdso.o
+LIB_OBJS += $(OUTPUT)util/stat.o
+LIB_OBJS += $(OUTPUT)util/record.o
+LIB_OBJS += $(OUTPUT)util/srcline.o
+LIB_OBJS += $(OUTPUT)util/data.o
+
+LIB_OBJS += $(OUTPUT)ui/setup.o
+LIB_OBJS += $(OUTPUT)ui/helpline.o
+LIB_OBJS += $(OUTPUT)ui/progress.o
+LIB_OBJS += $(OUTPUT)ui/util.o
+LIB_OBJS += $(OUTPUT)ui/hist.o
+LIB_OBJS += $(OUTPUT)ui/stdio/hist.o
+
+LIB_OBJS += $(OUTPUT)arch/common.o
+
+LIB_OBJS += $(OUTPUT)tests/parse-events.o
+LIB_OBJS += $(OUTPUT)tests/dso-data.o
+LIB_OBJS += $(OUTPUT)tests/attr.o
+LIB_OBJS += $(OUTPUT)tests/vmlinux-kallsyms.o
+LIB_OBJS += $(OUTPUT)tests/open-syscall.o
+LIB_OBJS += $(OUTPUT)tests/open-syscall-all-cpus.o
+LIB_OBJS += $(OUTPUT)tests/open-syscall-tp-fields.o
+LIB_OBJS += $(OUTPUT)tests/mmap-basic.o
+LIB_OBJS += $(OUTPUT)tests/perf-record.o
+LIB_OBJS += $(OUTPUT)tests/rdpmc.o
+LIB_OBJS += $(OUTPUT)tests/evsel-roundtrip-name.o
+LIB_OBJS += $(OUTPUT)tests/evsel-tp-sched.o
+LIB_OBJS += $(OUTPUT)tests/pmu.o
+LIB_OBJS += $(OUTPUT)tests/hists_common.o
+LIB_OBJS += $(OUTPUT)tests/hists_link.o
+LIB_OBJS += $(OUTPUT)tests/hists_filter.o
+LIB_OBJS += $(OUTPUT)tests/hists_output.o
+LIB_OBJS += $(OUTPUT)tests/hists_cumulate.o
+LIB_OBJS += $(OUTPUT)tests/python-use.o
+LIB_OBJS += $(OUTPUT)tests/bp_signal.o
+LIB_OBJS += $(OUTPUT)tests/bp_signal_overflow.o
+LIB_OBJS += $(OUTPUT)tests/task-exit.o
+LIB_OBJS += $(OUTPUT)tests/sw-clock.o
+ifeq ($(ARCH),x86)
+LIB_OBJS += $(OUTPUT)tests/perf-time-to-tsc.o
+endif
+LIB_OBJS += $(OUTPUT)tests/code-reading.o
+LIB_OBJS += $(OUTPUT)tests/sample-parsing.o
+LIB_OBJS += $(OUTPUT)tests/parse-no-sample-id-all.o
+ifndef NO_DWARF_UNWIND
+ifeq ($(ARCH),$(filter $(ARCH),x86 arm))
+LIB_OBJS += $(OUTPUT)tests/dwarf-unwind.o
+endif
+endif
+LIB_OBJS += $(OUTPUT)tests/mmap-thread-lookup.o
+LIB_OBJS += $(OUTPUT)tests/thread-mg-share.o
+
+BUILTIN_OBJS += $(OUTPUT)builtin-annotate.o
+BUILTIN_OBJS += $(OUTPUT)builtin-bench.o
+# Benchmark modules
+BUILTIN_OBJS += $(OUTPUT)bench/sched-messaging.o
+BUILTIN_OBJS += $(OUTPUT)bench/sched-pipe.o
+ifeq ($(RAW_ARCH),x86_64)
+BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy-x86-64-asm.o
+BUILTIN_OBJS += $(OUTPUT)bench/mem-memset-x86-64-asm.o
+endif
+BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o
+BUILTIN_OBJS += $(OUTPUT)bench/mem-memset.o
+BUILTIN_OBJS += $(OUTPUT)bench/futex-hash.o
+BUILTIN_OBJS += $(OUTPUT)bench/futex-wake.o
+BUILTIN_OBJS += $(OUTPUT)bench/futex-requeue.o
+
+BUILTIN_OBJS += $(OUTPUT)builtin-diff.o
+BUILTIN_OBJS += $(OUTPUT)builtin-evlist.o
+BUILTIN_OBJS += $(OUTPUT)builtin-help.o
+BUILTIN_OBJS += $(OUTPUT)builtin-sched.o
+BUILTIN_OBJS += $(OUTPUT)builtin-buildid-list.o
+BUILTIN_OBJS += $(OUTPUT)builtin-buildid-cache.o
+BUILTIN_OBJS += $(OUTPUT)builtin-list.o
+BUILTIN_OBJS += $(OUTPUT)builtin-record.o
+BUILTIN_OBJS += $(OUTPUT)builtin-report.o
+BUILTIN_OBJS += $(OUTPUT)builtin-stat.o
+BUILTIN_OBJS += $(OUTPUT)builtin-timechart.o
+BUILTIN_OBJS += $(OUTPUT)builtin-top.o
+BUILTIN_OBJS += $(OUTPUT)builtin-script.o
+BUILTIN_OBJS += $(OUTPUT)builtin-probe.o
+BUILTIN_OBJS += $(OUTPUT)builtin-kmem.o
+BUILTIN_OBJS += $(OUTPUT)builtin-lock.o
+BUILTIN_OBJS += $(OUTPUT)builtin-kvm.o
+BUILTIN_OBJS += $(OUTPUT)builtin-inject.o
+BUILTIN_OBJS += $(OUTPUT)tests/builtin-test.o
+BUILTIN_OBJS += $(OUTPUT)builtin-mem.o
+
+PERFLIBS = $(LIB_FILE) $(LIBAPIKFS) $(LIBTRACEEVENT)
+
+# We choose to avoid "if .. else if .. else .. endif endif"
+# because maintaining the nesting to match is a pain. If
+# we had "elif" things would have been much nicer...
+
+-include arch/$(ARCH)/Makefile
+
+ifneq ($(OUTPUT),)
+ CFLAGS += -I$(OUTPUT)
+endif
+
+ifdef NO_LIBELF
+EXTLIBS := $(filter-out -lelf,$(EXTLIBS))
+
+# Remove ELF/DWARF dependent codes
+LIB_OBJS := $(filter-out $(OUTPUT)util/symbol-elf.o,$(LIB_OBJS))
+LIB_OBJS := $(filter-out $(OUTPUT)util/dwarf-aux.o,$(LIB_OBJS))
+LIB_OBJS := $(filter-out $(OUTPUT)util/probe-event.o,$(LIB_OBJS))
+LIB_OBJS := $(filter-out $(OUTPUT)util/probe-finder.o,$(LIB_OBJS))
+
+BUILTIN_OBJS := $(filter-out $(OUTPUT)builtin-probe.o,$(BUILTIN_OBJS))
+
+# Use minimal symbol handling
+LIB_OBJS += $(OUTPUT)util/symbol-minimal.o
+
+else # NO_LIBELF
+ifndef NO_DWARF
+ LIB_OBJS += $(OUTPUT)util/probe-finder.o
+ LIB_OBJS += $(OUTPUT)util/dwarf-aux.o
+endif # NO_DWARF
+endif # NO_LIBELF
+
+ifndef NO_LIBDW_DWARF_UNWIND
+ LIB_OBJS += $(OUTPUT)util/unwind-libdw.o
+ LIB_H += util/unwind-libdw.h
+endif
+
+ifndef NO_LIBUNWIND
+ LIB_OBJS += $(OUTPUT)util/unwind-libunwind.o
+endif
+LIB_OBJS += $(OUTPUT)tests/keep-tracking.o
+
+ifndef NO_LIBAUDIT
+ BUILTIN_OBJS += $(OUTPUT)builtin-trace.o
+endif
+
+ifndef NO_SLANG
+ LIB_OBJS += $(OUTPUT)ui/browser.o
+ LIB_OBJS += $(OUTPUT)ui/browsers/annotate.o
+ LIB_OBJS += $(OUTPUT)ui/browsers/hists.o
+ LIB_OBJS += $(OUTPUT)ui/browsers/map.o
+ LIB_OBJS += $(OUTPUT)ui/browsers/scripts.o
+ LIB_OBJS += $(OUTPUT)ui/browsers/header.o
+ LIB_OBJS += $(OUTPUT)ui/tui/setup.o
+ LIB_OBJS += $(OUTPUT)ui/tui/util.o
+ LIB_OBJS += $(OUTPUT)ui/tui/helpline.o
+ LIB_OBJS += $(OUTPUT)ui/tui/progress.o
+ LIB_H += ui/tui/tui.h
+ LIB_H += ui/browser.h
+ LIB_H += ui/browsers/map.h
+ LIB_H += ui/keysyms.h
+ LIB_H += ui/libslang.h
+endif
+
+ifndef NO_GTK2
+ ALL_PROGRAMS += $(OUTPUT)libperf-gtk.so
+
+ GTK_OBJS += $(OUTPUT)ui/gtk/browser.o
+ GTK_OBJS += $(OUTPUT)ui/gtk/hists.o
+ GTK_OBJS += $(OUTPUT)ui/gtk/setup.o
+ GTK_OBJS += $(OUTPUT)ui/gtk/util.o
+ GTK_OBJS += $(OUTPUT)ui/gtk/helpline.o
+ GTK_OBJS += $(OUTPUT)ui/gtk/progress.o
+ GTK_OBJS += $(OUTPUT)ui/gtk/annotate.o
+
+install-gtk: $(OUTPUT)libperf-gtk.so
+ $(call QUIET_INSTALL, 'GTK UI') \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(libdir_SQ)'; \
+ $(INSTALL) $(OUTPUT)libperf-gtk.so '$(DESTDIR_SQ)$(libdir_SQ)'
+endif
+
+ifndef NO_LIBPERL
+ LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-perl.o
+ LIB_OBJS += $(OUTPUT)scripts/perl/Perf-Trace-Util/Context.o
+endif
+
+ifndef NO_LIBPYTHON
+ LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-python.o
+ LIB_OBJS += $(OUTPUT)scripts/python/Perf-Trace-Util/Context.o
+endif
+
+ifeq ($(NO_PERF_REGS),0)
+ ifeq ($(ARCH),x86)
+ LIB_H += arch/x86/include/perf_regs.h
+ endif
+ LIB_OBJS += $(OUTPUT)util/perf_regs.o
+endif
+
+ifndef NO_LIBNUMA
+ BUILTIN_OBJS += $(OUTPUT)bench/numa.o
+endif
+
+ifdef ASCIIDOC8
+ export ASCIIDOC8
+endif
+
+LIBS = -Wl,--whole-archive $(PERFLIBS) -Wl,--no-whole-archive -Wl,--start-group $(EXTLIBS) -Wl,--end-group
+
+export INSTALL SHELL_PATH
+
+### Build rules
+
+SHELL = $(SHELL_PATH)
+
+all: shell_compatibility_test $(ALL_PROGRAMS) $(LANG_BINDINGS) $(OTHER_PROGRAMS)
+
+please_set_SHELL_PATH_to_a_more_modern_shell:
+ @$$(:)
+
+shell_compatibility_test: please_set_SHELL_PATH_to_a_more_modern_shell
+
+strip: $(PROGRAMS) $(OUTPUT)perf
+ $(STRIP) $(STRIP_OPTS) $(PROGRAMS) $(OUTPUT)perf
+
+$(OUTPUT)perf.o: perf.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -include $(OUTPUT)PERF-VERSION-FILE \
+ '-DPERF_HTML_PATH="$(htmldir_SQ)"' \
+ $(CFLAGS) -c $(filter %.c,$^) -o $@
+
+$(OUTPUT)perf: $(OUTPUT)perf.o $(BUILTIN_OBJS) $(PERFLIBS)
+ $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(OUTPUT)perf.o \
+ $(BUILTIN_OBJS) $(LIBS) -o $@
+
+$(GTK_OBJS): $(OUTPUT)%.o: %.c $(LIB_H)
+ $(QUIET_CC)$(CC) -o $@ -c -fPIC $(CFLAGS) $(GTK_CFLAGS) $<
+
+$(OUTPUT)libperf-gtk.so: $(GTK_OBJS) $(PERFLIBS)
+ $(QUIET_LINK)$(CC) -o $@ -shared $(LDFLAGS) $(filter %.o,$^) $(GTK_LIBS)
+
+$(OUTPUT)builtin-help.o: builtin-help.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \
+ '-DPERF_HTML_PATH="$(htmldir_SQ)"' \
+ '-DPERF_MAN_PATH="$(mandir_SQ)"' \
+ '-DPERF_INFO_PATH="$(infodir_SQ)"' $<
+
+$(OUTPUT)builtin-timechart.o: builtin-timechart.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \
+ '-DPERF_HTML_PATH="$(htmldir_SQ)"' \
+ '-DPERF_MAN_PATH="$(mandir_SQ)"' \
+ '-DPERF_INFO_PATH="$(infodir_SQ)"' $<
+
+$(OUTPUT)common-cmds.h: util/generate-cmdlist.sh command-list.txt
+
+$(OUTPUT)common-cmds.h: $(wildcard Documentation/perf-*.txt)
+ $(QUIET_GEN). util/generate-cmdlist.sh > $@+ && mv $@+ $@
+
+$(SCRIPTS) : % : %.sh
+ $(QUIET_GEN)$(INSTALL) '$@.sh' '$(OUTPUT)$@'
+
+# These can record PERF_VERSION
+$(OUTPUT)perf.o perf.spec \
+ $(SCRIPTS) \
+ : $(OUTPUT)PERF-VERSION-FILE
+
+.SUFFIXES:
+
+#
+# If a target does not match any of the later rules then prefix it by $(OUTPUT)
+# This makes targets like 'make O=/tmp/perf perf.o' work in a natural way.
+#
+ifneq ($(OUTPUT),)
+%.o: $(OUTPUT)%.o
+ @echo " # Redirected target $@ => $(OUTPUT)$@"
+util/%.o: $(OUTPUT)util/%.o
+ @echo " # Redirected target $@ => $(OUTPUT)$@"
+bench/%.o: $(OUTPUT)bench/%.o
+ @echo " # Redirected target $@ => $(OUTPUT)$@"
+tests/%.o: $(OUTPUT)tests/%.o
+ @echo " # Redirected target $@ => $(OUTPUT)$@"
+endif
+
+# These two need to be here so that when O= is not used they take precedence
+# over the general rule for .o
+
+$(OUTPUT)util/%-flex.o: $(OUTPUT)util/%-flex.c $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -c -Iutil/ $(CFLAGS) -w $<
+
+$(OUTPUT)util/%-bison.o: $(OUTPUT)util/%-bison.c $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -c -Iutil/ $(CFLAGS) -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w $<
+
+$(OUTPUT)%.o: %.c $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $<
+$(OUTPUT)%.i: %.c $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -E $(CFLAGS) $<
+$(OUTPUT)%.s: %.c $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -S $(CFLAGS) $<
+$(OUTPUT)%.o: %.S
+ $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $<
+$(OUTPUT)%.s: %.S
+ $(QUIET_CC)$(CC) -o $@ -E $(CFLAGS) $<
+
+$(OUTPUT)util/exec_cmd.o: util/exec_cmd.c $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \
+ '-DPERF_EXEC_PATH="$(perfexecdir_SQ)"' \
+ '-DPREFIX="$(prefix_SQ)"' \
+ $<
+
+$(OUTPUT)tests/attr.o: tests/attr.c $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \
+ '-DBINDIR="$(bindir_SQ)"' -DPYTHON='"$(PYTHON_WORD)"' \
+ $<
+
+$(OUTPUT)tests/python-use.o: tests/python-use.c $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \
+ -DPYTHONPATH='"$(OUTPUT)python"' \
+ -DPYTHON='"$(PYTHON_WORD)"' \
+ $<
+
+$(OUTPUT)tests/dwarf-unwind.o: tests/dwarf-unwind.c
+ $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -fno-optimize-sibling-calls $<
+
+$(OUTPUT)util/config.o: util/config.c $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $<
+
+$(OUTPUT)ui/setup.o: ui/setup.c $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DLIBDIR='"$(libdir_SQ)"' $<
+
+$(OUTPUT)ui/browser.o: ui/browser.c $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $<
+
+$(OUTPUT)ui/browsers/annotate.o: ui/browsers/annotate.c $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $<
+
+$(OUTPUT)ui/browsers/hists.o: ui/browsers/hists.c $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $<
+
+$(OUTPUT)ui/browsers/map.o: ui/browsers/map.c $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $<
+
+$(OUTPUT)ui/browsers/scripts.o: ui/browsers/scripts.c $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $<
+
+$(OUTPUT)util/kallsyms.o: ../lib/symbol/kallsyms.c $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $<
+
+$(OUTPUT)util/rbtree.o: ../../lib/rbtree.c $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -Wno-unused-parameter -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $<
+
+$(OUTPUT)util/parse-events.o: util/parse-events.c $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -Wno-redundant-decls $<
+
+$(OUTPUT)util/scripting-engines/trace-event-perl.o: util/scripting-engines/trace-event-perl.c $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-undef -Wno-switch-default $<
+
+$(OUTPUT)scripts/perl/Perf-Trace-Util/Context.o: scripts/perl/Perf-Trace-Util/Context.c $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs -Wno-undef -Wno-switch-default $<
+
+$(OUTPUT)util/scripting-engines/trace-event-python.o: util/scripting-engines/trace-event-python.c $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow $<
+
+$(OUTPUT)scripts/python/Perf-Trace-Util/Context.o: scripts/python/Perf-Trace-Util/Context.c $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs $<
+
+$(OUTPUT)perf-%: %.o $(PERFLIBS)
+ $(QUIET_LINK)$(CC) $(CFLAGS) -o $@ $(LDFLAGS) $(filter %.o,$^) $(LIBS)
+
+$(LIB_OBJS) $(BUILTIN_OBJS): $(LIB_H)
+$(patsubst perf-%,%.o,$(PROGRAMS)): $(LIB_H) $(wildcard */*.h)
+
+# we compile into subdirectories. if the target directory is not the source directory, they might not exists. So
+# we depend the various files onto their directories.
+DIRECTORY_DEPS = $(LIB_OBJS) $(BUILTIN_OBJS) $(GTK_OBJS)
+DIRECTORY_DEPS += $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h
+# no need to add flex objects, because they depend on bison ones
+DIRECTORY_DEPS += $(OUTPUT)util/parse-events-bison.c
+DIRECTORY_DEPS += $(OUTPUT)util/pmu-bison.c
+
+OUTPUT_DIRECTORIES := $(sort $(dir $(DIRECTORY_DEPS)))
+
+$(DIRECTORY_DEPS): | $(OUTPUT_DIRECTORIES)
+# In the second step, we make a rule to actually create these directories
+$(OUTPUT_DIRECTORIES):
+ $(QUIET_MKDIR)$(MKDIR) -p $@ 2>/dev/null
+
+$(LIB_FILE): $(LIB_OBJS)
+ $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIB_OBJS)
+
+# libtraceevent.a
+TE_SOURCES = $(wildcard $(TRACE_EVENT_DIR)*.[ch])
+
+LIBTRACEEVENT_FLAGS = $(QUIET_SUBDIR1) O=$(OUTPUT)
+LIBTRACEEVENT_FLAGS += CFLAGS="-g -Wall $(EXTRA_CFLAGS)"
+LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ)
+
+$(LIBTRACEEVENT): $(TE_SOURCES) $(OUTPUT)PERF-CFLAGS
+ $(QUIET_SUBDIR0)$(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) libtraceevent.a plugins
+
+$(LIBTRACEEVENT)-clean:
+ $(call QUIET_CLEAN, libtraceevent)
+ @$(MAKE) -C $(TRACE_EVENT_DIR) O=$(OUTPUT) clean >/dev/null
+
+install-traceevent-plugins: $(LIBTRACEEVENT)
+ $(QUIET_SUBDIR0)$(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) install_plugins
+
+LIBAPIKFS_SOURCES = $(wildcard $(LIB_PATH)fs/*.[ch])
+
+# if subdir is set, we've been called from above so target has been built
+# already
+$(LIBAPIKFS): $(LIBAPIKFS_SOURCES)
+ifeq ($(subdir),)
+ $(QUIET_SUBDIR0)$(LIB_DIR) $(QUIET_SUBDIR1) O=$(OUTPUT) libapikfs.a
+endif
+
+$(LIBAPIKFS)-clean:
+ifeq ($(subdir),)
+ $(call QUIET_CLEAN, libapikfs)
+ @$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null
+endif
+
+help:
+ @echo 'Perf make targets:'
+ @echo ' doc - make *all* documentation (see below)'
+ @echo ' man - make manpage documentation (access with man <foo>)'
+ @echo ' html - make html documentation'
+ @echo ' info - make GNU info documentation (access with info <foo>)'
+ @echo ' pdf - make pdf documentation'
+ @echo ' TAGS - use etags to make tag information for source browsing'
+ @echo ' tags - use ctags to make tag information for source browsing'
+ @echo ' cscope - use cscope to make interactive browsing database'
+ @echo ''
+ @echo 'Perf install targets:'
+ @echo ' NOTE: documentation build requires asciidoc, xmlto packages to be installed'
+ @echo ' HINT: use "prefix" or "DESTDIR" to install to a particular'
+ @echo ' path like "make prefix=/usr/local install install-doc"'
+ @echo ' install - install compiled binaries'
+ @echo ' install-doc - install *all* documentation'
+ @echo ' install-man - install manpage documentation'
+ @echo ' install-html - install html documentation'
+ @echo ' install-info - install GNU info documentation'
+ @echo ' install-pdf - install pdf documentation'
+ @echo ''
+ @echo ' quick-install-doc - alias for quick-install-man'
+ @echo ' quick-install-man - install the documentation quickly'
+ @echo ' quick-install-html - install the html documentation quickly'
+ @echo ''
+ @echo 'Perf maintainer targets:'
+ @echo ' clean - clean all binary objects and build output'
+
+
+DOC_TARGETS := doc man html info pdf
+
+INSTALL_DOC_TARGETS := $(patsubst %,install-%,$(DOC_TARGETS)) try-install-man
+INSTALL_DOC_TARGETS += quick-install-doc quick-install-man quick-install-html
+
+# 'make doc' should call 'make -C Documentation all'
+$(DOC_TARGETS):
+ $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) $(@:doc=all)
+
+TAG_FOLDERS= . ../lib/traceevent ../lib/api ../lib/symbol
+TAG_FILES= ../../include/uapi/linux/perf_event.h
+
+TAGS:
+ $(QUIET_GEN)$(RM) TAGS; \
+ $(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print | xargs etags -a $(TAG_FILES)
+
+tags:
+ $(QUIET_GEN)$(RM) tags; \
+ $(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print | xargs ctags -a $(TAG_FILES)
+
+cscope:
+ $(QUIET_GEN)$(RM) cscope*; \
+ $(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print | xargs cscope -b $(TAG_FILES)
+
+### Detect prefix changes
+TRACK_CFLAGS = $(subst ','\'',$(CFLAGS)):\
+ $(bindir_SQ):$(perfexecdir_SQ):$(template_dir_SQ):$(prefix_SQ):$(plugindir_SQ)
+
+$(OUTPUT)PERF-CFLAGS: .FORCE-PERF-CFLAGS
+ @FLAGS='$(TRACK_CFLAGS)'; \
+ if test x"$$FLAGS" != x"`cat $(OUTPUT)PERF-CFLAGS 2>/dev/null`" ; then \
+ echo 1>&2 " FLAGS: * new build flags or prefix"; \
+ echo "$$FLAGS" >$(OUTPUT)PERF-CFLAGS; \
+ fi
+
+### Testing rules
+
+# GNU make supports exporting all variables by "export" without parameters.
+# However, the environment gets quite big, and some programs have problems
+# with that.
+
+check: $(OUTPUT)common-cmds.h
+ if sparse; \
+ then \
+ for i in *.c */*.c; \
+ do \
+ sparse $(CFLAGS) $(SPARSE_FLAGS) $$i || exit; \
+ done; \
+ else \
+ exit 1; \
+ fi
+
+### Installation rules
+
+install-gtk:
+
+install-bin: all install-gtk
+ $(call QUIET_INSTALL, binaries) \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)'; \
+ $(INSTALL) $(OUTPUT)perf '$(DESTDIR_SQ)$(bindir_SQ)'; \
+ $(LN) '$(DESTDIR_SQ)$(bindir_SQ)/perf' '$(DESTDIR_SQ)$(bindir_SQ)/trace'
+ $(call QUIET_INSTALL, libexec) \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
+ $(call QUIET_INSTALL, perf-archive) \
+ $(INSTALL) $(OUTPUT)perf-archive -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
+ifndef NO_LIBPERL
+ $(call QUIET_INSTALL, perl-scripts) \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'; \
+ $(INSTALL) scripts/perl/Perf-Trace-Util/lib/Perf/Trace/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'; \
+ $(INSTALL) scripts/perl/*.pl -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl'; \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin'; \
+ $(INSTALL) scripts/perl/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin'
+endif
+ifndef NO_LIBPYTHON
+ $(call QUIET_INSTALL, python-scripts) \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/Perf-Trace-Util/lib/Perf/Trace'; \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin'; \
+ $(INSTALL) scripts/python/Perf-Trace-Util/lib/Perf/Trace/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/Perf-Trace-Util/lib/Perf/Trace'; \
+ $(INSTALL) scripts/python/*.py -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python'; \
+ $(INSTALL) scripts/python/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin'
+endif
+ $(call QUIET_INSTALL, perf_completion-script) \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d'; \
+ $(INSTALL) perf-completion.sh '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d/perf'
+ $(call QUIET_INSTALL, tests) \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'; \
+ $(INSTALL) tests/attr.py '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'; \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'; \
+ $(INSTALL) tests/attr/* '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'
+
+install: install-bin try-install-man install-traceevent-plugins
+
+install-python_ext:
+ $(PYTHON_WORD) util/setup.py --quiet install --root='/$(DESTDIR_SQ)'
+
+# 'make install-doc' should call 'make -C Documentation install'
+$(INSTALL_DOC_TARGETS):
+ $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) $(@:-doc=)
+
+### Cleaning rules
+
+#
+# This is here, not in config/Makefile, because config/Makefile does
+# not get included for the clean target:
+#
+config-clean:
+ $(call QUIET_CLEAN, config)
+ @$(MAKE) -C config/feature-checks clean >/dev/null
+
+clean: $(LIBTRACEEVENT)-clean $(LIBAPIKFS)-clean config-clean
+ $(call QUIET_CLEAN, core-objs) $(RM) $(LIB_OBJS) $(BUILTIN_OBJS) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf.o $(LANG_BINDINGS) $(GTK_OBJS)
+ $(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf
+ $(call QUIET_CLEAN, core-gen) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)PERF-CFLAGS $(OUTPUT)PERF-FEATURES $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex*
+ $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean
+ $(python-clean)
+
+#
+# Trick: if ../../.git does not exist - we are building out of tree for example,
+# then force version regeneration:
+#
+ifeq ($(wildcard ../../.git/HEAD),)
+ GIT-HEAD-PHONY = ../../.git/HEAD
+else
+ GIT-HEAD-PHONY =
+endif
+
+.PHONY: all install clean config-clean strip install-gtk
+.PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell
+.PHONY: $(GIT-HEAD-PHONY) TAGS tags cscope .FORCE-PERF-CFLAGS
+
diff --git a/tools/perf/arch/arm/Makefile b/tools/perf/arch/arm/Makefile
index 15130b50dfe..09d62153d38 100644
--- a/tools/perf/arch/arm/Makefile
+++ b/tools/perf/arch/arm/Makefile
@@ -2,3 +2,13 @@ ifndef NO_DWARF
PERF_HAVE_DWARF_REGS := 1
LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o
endif
+ifndef NO_LIBUNWIND
+LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libunwind.o
+endif
+ifndef NO_LIBDW_DWARF_UNWIND
+LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libdw.o
+endif
+ifndef NO_DWARF_UNWIND
+LIB_OBJS += $(OUTPUT)arch/$(ARCH)/tests/regs_load.o
+LIB_OBJS += $(OUTPUT)arch/$(ARCH)/tests/dwarf-unwind.o
+endif
diff --git a/tools/perf/arch/arm/include/perf_regs.h b/tools/perf/arch/arm/include/perf_regs.h
new file mode 100644
index 00000000000..f619c9c5a4b
--- /dev/null
+++ b/tools/perf/arch/arm/include/perf_regs.h
@@ -0,0 +1,59 @@
+#ifndef ARCH_PERF_REGS_H
+#define ARCH_PERF_REGS_H
+
+#include <stdlib.h>
+#include <linux/types.h>
+#include <asm/perf_regs.h>
+
+void perf_regs_load(u64 *regs);
+
+#define PERF_REGS_MASK ((1ULL << PERF_REG_ARM_MAX) - 1)
+#define PERF_REGS_MAX PERF_REG_ARM_MAX
+#define PERF_SAMPLE_REGS_ABI PERF_SAMPLE_REGS_ABI_32
+
+#define PERF_REG_IP PERF_REG_ARM_PC
+#define PERF_REG_SP PERF_REG_ARM_SP
+
+static inline const char *perf_reg_name(int id)
+{
+ switch (id) {
+ case PERF_REG_ARM_R0:
+ return "r0";
+ case PERF_REG_ARM_R1:
+ return "r1";
+ case PERF_REG_ARM_R2:
+ return "r2";
+ case PERF_REG_ARM_R3:
+ return "r3";
+ case PERF_REG_ARM_R4:
+ return "r4";
+ case PERF_REG_ARM_R5:
+ return "r5";
+ case PERF_REG_ARM_R6:
+ return "r6";
+ case PERF_REG_ARM_R7:
+ return "r7";
+ case PERF_REG_ARM_R8:
+ return "r8";
+ case PERF_REG_ARM_R9:
+ return "r9";
+ case PERF_REG_ARM_R10:
+ return "r10";
+ case PERF_REG_ARM_FP:
+ return "fp";
+ case PERF_REG_ARM_IP:
+ return "ip";
+ case PERF_REG_ARM_SP:
+ return "sp";
+ case PERF_REG_ARM_LR:
+ return "lr";
+ case PERF_REG_ARM_PC:
+ return "pc";
+ default:
+ return NULL;
+ }
+
+ return NULL;
+}
+
+#endif /* ARCH_PERF_REGS_H */
diff --git a/tools/perf/arch/arm/tests/dwarf-unwind.c b/tools/perf/arch/arm/tests/dwarf-unwind.c
new file mode 100644
index 00000000000..9f870d27cb3
--- /dev/null
+++ b/tools/perf/arch/arm/tests/dwarf-unwind.c
@@ -0,0 +1,60 @@
+#include <string.h>
+#include "perf_regs.h"
+#include "thread.h"
+#include "map.h"
+#include "event.h"
+#include "tests/tests.h"
+
+#define STACK_SIZE 8192
+
+static int sample_ustack(struct perf_sample *sample,
+ struct thread *thread, u64 *regs)
+{
+ struct stack_dump *stack = &sample->user_stack;
+ struct map *map;
+ unsigned long sp;
+ u64 stack_size, *buf;
+
+ buf = malloc(STACK_SIZE);
+ if (!buf) {
+ pr_debug("failed to allocate sample uregs data\n");
+ return -1;
+ }
+
+ sp = (unsigned long) regs[PERF_REG_ARM_SP];
+
+ map = map_groups__find(thread->mg, MAP__VARIABLE, (u64) sp);
+ if (!map) {
+ pr_debug("failed to get stack map\n");
+ free(buf);
+ return -1;
+ }
+
+ stack_size = map->end - sp;
+ stack_size = stack_size > STACK_SIZE ? STACK_SIZE : stack_size;
+
+ memcpy(buf, (void *) sp, stack_size);
+ stack->data = (char *) buf;
+ stack->size = stack_size;
+ return 0;
+}
+
+int test__arch_unwind_sample(struct perf_sample *sample,
+ struct thread *thread)
+{
+ struct regs_dump *regs = &sample->user_regs;
+ u64 *buf;
+
+ buf = calloc(1, sizeof(u64) * PERF_REGS_MAX);
+ if (!buf) {
+ pr_debug("failed to allocate sample uregs data\n");
+ return -1;
+ }
+
+ perf_regs_load(buf);
+ regs->abi = PERF_SAMPLE_REGS_ABI;
+ regs->regs = buf;
+ regs->mask = PERF_REGS_MASK;
+
+ return sample_ustack(sample, thread, buf);
+}
diff --git a/tools/perf/arch/arm/tests/regs_load.S b/tools/perf/arch/arm/tests/regs_load.S
new file mode 100644
index 00000000000..e09e983946f
--- /dev/null
+++ b/tools/perf/arch/arm/tests/regs_load.S
@@ -0,0 +1,58 @@
+#include <linux/linkage.h>
+
+#define R0 0x00
+#define R1 0x08
+#define R2 0x10
+#define R3 0x18
+#define R4 0x20
+#define R5 0x28
+#define R6 0x30
+#define R7 0x38
+#define R8 0x40
+#define R9 0x48
+#define SL 0x50
+#define FP 0x58
+#define IP 0x60
+#define SP 0x68
+#define LR 0x70
+#define PC 0x78
+
+/*
+ * Implementation of void perf_regs_load(u64 *regs);
+ *
+ * This functions fills in the 'regs' buffer from the actual registers values,
+ * in the way the perf built-in unwinding test expects them:
+ * - the PC at the time at the call to this function. Since this function
+ * is called using a bl instruction, the PC value is taken from LR.
+ * The built-in unwinding test then unwinds the call stack from the dwarf
+ * information in unwind__get_entries.
+ *
+ * Notes:
+ * - the 8 bytes stride in the registers offsets comes from the fact
+ * that the registers are stored in an u64 array (u64 *regs),
+ * - the regs buffer needs to be zeroed before the call to this function,
+ * in this case using a calloc in dwarf-unwind.c.
+ */
+
+.text
+.type perf_regs_load,%function
+ENTRY(perf_regs_load)
+ str r0, [r0, #R0]
+ str r1, [r0, #R1]
+ str r2, [r0, #R2]
+ str r3, [r0, #R3]
+ str r4, [r0, #R4]
+ str r5, [r0, #R5]
+ str r6, [r0, #R6]
+ str r7, [r0, #R7]
+ str r8, [r0, #R8]
+ str r9, [r0, #R9]
+ str sl, [r0, #SL]
+ str fp, [r0, #FP]
+ str ip, [r0, #IP]
+ str sp, [r0, #SP]
+ str lr, [r0, #LR]
+ str lr, [r0, #PC] // store pc as lr in order to skip the call
+ // to this function
+ mov pc, lr
+ENDPROC(perf_regs_load)
diff --git a/tools/perf/arch/arm/util/unwind-libdw.c b/tools/perf/arch/arm/util/unwind-libdw.c
new file mode 100644
index 00000000000..b4176c60117
--- /dev/null
+++ b/tools/perf/arch/arm/util/unwind-libdw.c
@@ -0,0 +1,36 @@
+#include <elfutils/libdwfl.h>
+#include "../../util/unwind-libdw.h"
+#include "../../util/perf_regs.h"
+
+bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg)
+{
+ struct unwind_info *ui = arg;
+ struct regs_dump *user_regs = &ui->sample->user_regs;
+ Dwarf_Word dwarf_regs[PERF_REG_ARM_MAX];
+
+#define REG(r) ({ \
+ Dwarf_Word val = 0; \
+ perf_reg_value(&val, user_regs, PERF_REG_ARM_##r); \
+ val; \
+})
+
+ dwarf_regs[0] = REG(R0);
+ dwarf_regs[1] = REG(R1);
+ dwarf_regs[2] = REG(R2);
+ dwarf_regs[3] = REG(R3);
+ dwarf_regs[4] = REG(R4);
+ dwarf_regs[5] = REG(R5);
+ dwarf_regs[6] = REG(R6);
+ dwarf_regs[7] = REG(R7);
+ dwarf_regs[8] = REG(R8);
+ dwarf_regs[9] = REG(R9);
+ dwarf_regs[10] = REG(R10);
+ dwarf_regs[11] = REG(FP);
+ dwarf_regs[12] = REG(IP);
+ dwarf_regs[13] = REG(SP);
+ dwarf_regs[14] = REG(LR);
+ dwarf_regs[15] = REG(PC);
+
+ return dwfl_thread_state_registers(thread, 0, PERF_REG_ARM_MAX,
+ dwarf_regs);
+}
diff --git a/tools/perf/arch/arm/util/unwind-libunwind.c b/tools/perf/arch/arm/util/unwind-libunwind.c
new file mode 100644
index 00000000000..729ed69a666
--- /dev/null
+++ b/tools/perf/arch/arm/util/unwind-libunwind.c
@@ -0,0 +1,48 @@
+
+#include <errno.h>
+#include <libunwind.h>
+#include "perf_regs.h"
+#include "../../util/unwind.h"
+
+int libunwind__arch_reg_id(int regnum)
+{
+ switch (regnum) {
+ case UNW_ARM_R0:
+ return PERF_REG_ARM_R0;
+ case UNW_ARM_R1:
+ return PERF_REG_ARM_R1;
+ case UNW_ARM_R2:
+ return PERF_REG_ARM_R2;
+ case UNW_ARM_R3:
+ return PERF_REG_ARM_R3;
+ case UNW_ARM_R4:
+ return PERF_REG_ARM_R4;
+ case UNW_ARM_R5:
+ return PERF_REG_ARM_R5;
+ case UNW_ARM_R6:
+ return PERF_REG_ARM_R6;
+ case UNW_ARM_R7:
+ return PERF_REG_ARM_R7;
+ case UNW_ARM_R8:
+ return PERF_REG_ARM_R8;
+ case UNW_ARM_R9:
+ return PERF_REG_ARM_R9;
+ case UNW_ARM_R10:
+ return PERF_REG_ARM_R10;
+ case UNW_ARM_R11:
+ return PERF_REG_ARM_FP;
+ case UNW_ARM_R12:
+ return PERF_REG_ARM_IP;
+ case UNW_ARM_R13:
+ return PERF_REG_ARM_SP;
+ case UNW_ARM_R14:
+ return PERF_REG_ARM_LR;
+ case UNW_ARM_R15:
+ return PERF_REG_ARM_PC;
+ default:
+ pr_err("unwind: invalid reg id %d\n", regnum);
+ return -EINVAL;
+ }
+
+ return -EINVAL;
+}
diff --git a/tools/perf/arch/arm64/Makefile b/tools/perf/arch/arm64/Makefile
new file mode 100644
index 00000000000..67e9b3d38e8
--- /dev/null
+++ b/tools/perf/arch/arm64/Makefile
@@ -0,0 +1,7 @@
+ifndef NO_DWARF
+PERF_HAVE_DWARF_REGS := 1
+LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o
+endif
+ifndef NO_LIBUNWIND
+LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libunwind.o
+endif
diff --git a/tools/perf/arch/arm64/include/perf_regs.h b/tools/perf/arch/arm64/include/perf_regs.h
new file mode 100644
index 00000000000..e9441b9e2a3
--- /dev/null
+++ b/tools/perf/arch/arm64/include/perf_regs.h
@@ -0,0 +1,88 @@
+#ifndef ARCH_PERF_REGS_H
+#define ARCH_PERF_REGS_H
+
+#include <stdlib.h>
+#include <linux/types.h>
+#include <asm/perf_regs.h>
+
+#define PERF_REGS_MASK ((1ULL << PERF_REG_ARM64_MAX) - 1)
+#define PERF_REG_IP PERF_REG_ARM64_PC
+#define PERF_REG_SP PERF_REG_ARM64_SP
+
+static inline const char *perf_reg_name(int id)
+{
+ switch (id) {
+ case PERF_REG_ARM64_X0:
+ return "x0";
+ case PERF_REG_ARM64_X1:
+ return "x1";
+ case PERF_REG_ARM64_X2:
+ return "x2";
+ case PERF_REG_ARM64_X3:
+ return "x3";
+ case PERF_REG_ARM64_X4:
+ return "x4";
+ case PERF_REG_ARM64_X5:
+ return "x5";
+ case PERF_REG_ARM64_X6:
+ return "x6";
+ case PERF_REG_ARM64_X7:
+ return "x7";
+ case PERF_REG_ARM64_X8:
+ return "x8";
+ case PERF_REG_ARM64_X9:
+ return "x9";
+ case PERF_REG_ARM64_X10:
+ return "x10";
+ case PERF_REG_ARM64_X11:
+ return "x11";
+ case PERF_REG_ARM64_X12:
+ return "x12";
+ case PERF_REG_ARM64_X13:
+ return "x13";
+ case PERF_REG_ARM64_X14:
+ return "x14";
+ case PERF_REG_ARM64_X15:
+ return "x15";
+ case PERF_REG_ARM64_X16:
+ return "x16";
+ case PERF_REG_ARM64_X17:
+ return "x17";
+ case PERF_REG_ARM64_X18:
+ return "x18";
+ case PERF_REG_ARM64_X19:
+ return "x19";
+ case PERF_REG_ARM64_X20:
+ return "x20";
+ case PERF_REG_ARM64_X21:
+ return "x21";
+ case PERF_REG_ARM64_X22:
+ return "x22";
+ case PERF_REG_ARM64_X23:
+ return "x23";
+ case PERF_REG_ARM64_X24:
+ return "x24";
+ case PERF_REG_ARM64_X25:
+ return "x25";
+ case PERF_REG_ARM64_X26:
+ return "x26";
+ case PERF_REG_ARM64_X27:
+ return "x27";
+ case PERF_REG_ARM64_X28:
+ return "x28";
+ case PERF_REG_ARM64_X29:
+ return "x29";
+ case PERF_REG_ARM64_SP:
+ return "sp";
+ case PERF_REG_ARM64_LR:
+ return "lr";
+ case PERF_REG_ARM64_PC:
+ return "pc";
+ default:
+ return NULL;
+ }
+
+ return NULL;
+}
+
+#endif /* ARCH_PERF_REGS_H */
diff --git a/tools/perf/arch/arm64/util/dwarf-regs.c b/tools/perf/arch/arm64/util/dwarf-regs.c
new file mode 100644
index 00000000000..d49efeb8172
--- /dev/null
+++ b/tools/perf/arch/arm64/util/dwarf-regs.c
@@ -0,0 +1,80 @@
+/*
+ * Mapping of DWARF debug register numbers into register names.
+ *
+ * Copyright (C) 2010 Will Deacon, ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <stddef.h>
+#include <dwarf-regs.h>
+
+struct pt_regs_dwarfnum {
+ const char *name;
+ unsigned int dwarfnum;
+};
+
+#define STR(s) #s
+#define REG_DWARFNUM_NAME(r, num) {.name = r, .dwarfnum = num}
+#define GPR_DWARFNUM_NAME(num) \
+ {.name = STR(%x##num), .dwarfnum = num}
+#define REG_DWARFNUM_END {.name = NULL, .dwarfnum = 0}
+
+/*
+ * Reference:
+ * http://infocenter.arm.com/help/topic/com.arm.doc.ihi0057b/IHI0057B_aadwarf64.pdf
+ */
+static const struct pt_regs_dwarfnum regdwarfnum_table[] = {
+ GPR_DWARFNUM_NAME(0),
+ GPR_DWARFNUM_NAME(1),
+ GPR_DWARFNUM_NAME(2),
+ GPR_DWARFNUM_NAME(3),
+ GPR_DWARFNUM_NAME(4),
+ GPR_DWARFNUM_NAME(5),
+ GPR_DWARFNUM_NAME(6),
+ GPR_DWARFNUM_NAME(7),
+ GPR_DWARFNUM_NAME(8),
+ GPR_DWARFNUM_NAME(9),
+ GPR_DWARFNUM_NAME(10),
+ GPR_DWARFNUM_NAME(11),
+ GPR_DWARFNUM_NAME(12),
+ GPR_DWARFNUM_NAME(13),
+ GPR_DWARFNUM_NAME(14),
+ GPR_DWARFNUM_NAME(15),
+ GPR_DWARFNUM_NAME(16),
+ GPR_DWARFNUM_NAME(17),
+ GPR_DWARFNUM_NAME(18),
+ GPR_DWARFNUM_NAME(19),
+ GPR_DWARFNUM_NAME(20),
+ GPR_DWARFNUM_NAME(21),
+ GPR_DWARFNUM_NAME(22),
+ GPR_DWARFNUM_NAME(23),
+ GPR_DWARFNUM_NAME(24),
+ GPR_DWARFNUM_NAME(25),
+ GPR_DWARFNUM_NAME(26),
+ GPR_DWARFNUM_NAME(27),
+ GPR_DWARFNUM_NAME(28),
+ GPR_DWARFNUM_NAME(29),
+ REG_DWARFNUM_NAME("%lr", 30),
+ REG_DWARFNUM_NAME("%sp", 31),
+ REG_DWARFNUM_END,
+};
+
+/**
+ * get_arch_regstr() - lookup register name from it's DWARF register number
+ * @n: the DWARF register number
+ *
+ * get_arch_regstr() returns the name of the register in struct
+ * regdwarfnum_table from it's DWARF register number. If the register is not
+ * found in the table, this returns NULL;
+ */
+const char *get_arch_regstr(unsigned int n)
+{
+ const struct pt_regs_dwarfnum *roff;
+ for (roff = regdwarfnum_table; roff->name != NULL; roff++)
+ if (roff->dwarfnum == n)
+ return roff->name;
+ return NULL;
+}
diff --git a/tools/perf/arch/arm64/util/unwind-libunwind.c b/tools/perf/arch/arm64/util/unwind-libunwind.c
new file mode 100644
index 00000000000..436ee43859d
--- /dev/null
+++ b/tools/perf/arch/arm64/util/unwind-libunwind.c
@@ -0,0 +1,82 @@
+
+#include <errno.h>
+#include <libunwind.h>
+#include "perf_regs.h"
+#include "../../util/unwind.h"
+
+int libunwind__arch_reg_id(int regnum)
+{
+ switch (regnum) {
+ case UNW_AARCH64_X0:
+ return PERF_REG_ARM64_X0;
+ case UNW_AARCH64_X1:
+ return PERF_REG_ARM64_X1;
+ case UNW_AARCH64_X2:
+ return PERF_REG_ARM64_X2;
+ case UNW_AARCH64_X3:
+ return PERF_REG_ARM64_X3;
+ case UNW_AARCH64_X4:
+ return PERF_REG_ARM64_X4;
+ case UNW_AARCH64_X5:
+ return PERF_REG_ARM64_X5;
+ case UNW_AARCH64_X6:
+ return PERF_REG_ARM64_X6;
+ case UNW_AARCH64_X7:
+ return PERF_REG_ARM64_X7;
+ case UNW_AARCH64_X8:
+ return PERF_REG_ARM64_X8;
+ case UNW_AARCH64_X9:
+ return PERF_REG_ARM64_X9;
+ case UNW_AARCH64_X10:
+ return PERF_REG_ARM64_X10;
+ case UNW_AARCH64_X11:
+ return PERF_REG_ARM64_X11;
+ case UNW_AARCH64_X12:
+ return PERF_REG_ARM64_X12;
+ case UNW_AARCH64_X13:
+ return PERF_REG_ARM64_X13;
+ case UNW_AARCH64_X14:
+ return PERF_REG_ARM64_X14;
+ case UNW_AARCH64_X15:
+ return PERF_REG_ARM64_X15;
+ case UNW_AARCH64_X16:
+ return PERF_REG_ARM64_X16;
+ case UNW_AARCH64_X17:
+ return PERF_REG_ARM64_X17;
+ case UNW_AARCH64_X18:
+ return PERF_REG_ARM64_X18;
+ case UNW_AARCH64_X19:
+ return PERF_REG_ARM64_X19;
+ case UNW_AARCH64_X20:
+ return PERF_REG_ARM64_X20;
+ case UNW_AARCH64_X21:
+ return PERF_REG_ARM64_X21;
+ case UNW_AARCH64_X22:
+ return PERF_REG_ARM64_X22;
+ case UNW_AARCH64_X23:
+ return PERF_REG_ARM64_X23;
+ case UNW_AARCH64_X24:
+ return PERF_REG_ARM64_X24;
+ case UNW_AARCH64_X25:
+ return PERF_REG_ARM64_X25;
+ case UNW_AARCH64_X26:
+ return PERF_REG_ARM64_X26;
+ case UNW_AARCH64_X27:
+ return PERF_REG_ARM64_X27;
+ case UNW_AARCH64_X28:
+ return PERF_REG_ARM64_X28;
+ case UNW_AARCH64_X29:
+ return PERF_REG_ARM64_X29;
+ case UNW_AARCH64_X30:
+ return PERF_REG_ARM64_LR;
+ case UNW_AARCH64_SP:
+ return PERF_REG_ARM64_SP;
+ case UNW_AARCH64_PC:
+ return PERF_REG_ARM64_PC;
+ default:
+ pr_err("unwind: invalid reg id %d\n", regnum);
+ return -EINVAL;
+ }
+
+ return -EINVAL;
+}
diff --git a/tools/perf/arch/common.c b/tools/perf/arch/common.c
index aacef07ebf3..42faf369211 100644
--- a/tools/perf/arch/common.c
+++ b/tools/perf/arch/common.c
@@ -154,8 +154,7 @@ static int perf_session_env__lookup_binutils_path(struct perf_session_env *env,
}
if (lookup_path(buf))
goto out;
- free(buf);
- buf = NULL;
+ zfree(&buf);
}
if (!strcmp(arch, "arm"))
diff --git a/tools/perf/arch/x86/Makefile b/tools/perf/arch/x86/Makefile
index 815841c04eb..1641542e363 100644
--- a/tools/perf/arch/x86/Makefile
+++ b/tools/perf/arch/x86/Makefile
@@ -3,6 +3,15 @@ PERF_HAVE_DWARF_REGS := 1
LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o
endif
ifndef NO_LIBUNWIND
-LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind.o
+LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libunwind.o
+endif
+ifndef NO_LIBDW_DWARF_UNWIND
+LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libdw.o
+endif
+ifndef NO_DWARF_UNWIND
+LIB_OBJS += $(OUTPUT)arch/$(ARCH)/tests/regs_load.o
+LIB_OBJS += $(OUTPUT)arch/$(ARCH)/tests/dwarf-unwind.o
endif
LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/header.o
+LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/tsc.o
+LIB_H += arch/$(ARCH)/util/tsc.h
diff --git a/tools/perf/arch/x86/include/perf_regs.h b/tools/perf/arch/x86/include/perf_regs.h
index 7fcdcdbee91..7df517acfef 100644
--- a/tools/perf/arch/x86/include/perf_regs.h
+++ b/tools/perf/arch/x86/include/perf_regs.h
@@ -2,17 +2,23 @@
#define ARCH_PERF_REGS_H
#include <stdlib.h>
-#include "../../util/types.h"
+#include <linux/types.h>
#include <asm/perf_regs.h>
-#ifndef ARCH_X86_64
+void perf_regs_load(u64 *regs);
+
+#ifndef HAVE_ARCH_X86_64_SUPPORT
#define PERF_REGS_MASK ((1ULL << PERF_REG_X86_32_MAX) - 1)
+#define PERF_REGS_MAX PERF_REG_X86_32_MAX
+#define PERF_SAMPLE_REGS_ABI PERF_SAMPLE_REGS_ABI_32
#else
#define REG_NOSUPPORT ((1ULL << PERF_REG_X86_DS) | \
(1ULL << PERF_REG_X86_ES) | \
(1ULL << PERF_REG_X86_FS) | \
(1ULL << PERF_REG_X86_GS))
#define PERF_REGS_MASK (((1ULL << PERF_REG_X86_64_MAX) - 1) & ~REG_NOSUPPORT)
+#define PERF_REGS_MAX PERF_REG_X86_64_MAX
+#define PERF_SAMPLE_REGS_ABI PERF_SAMPLE_REGS_ABI_64
#endif
#define PERF_REG_IP PERF_REG_X86_IP
#define PERF_REG_SP PERF_REG_X86_SP
@@ -52,7 +58,7 @@ static inline const char *perf_reg_name(int id)
return "FS";
case PERF_REG_X86_GS:
return "GS";
-#ifdef ARCH_X86_64
+#ifdef HAVE_ARCH_X86_64_SUPPORT
case PERF_REG_X86_R8:
return "R8";
case PERF_REG_X86_R9:
@@ -69,7 +75,7 @@ static inline const char *perf_reg_name(int id)
return "R14";
case PERF_REG_X86_R15:
return "R15";
-#endif /* ARCH_X86_64 */
+#endif /* HAVE_ARCH_X86_64_SUPPORT */
default:
return NULL;
}
diff --git a/tools/perf/arch/x86/tests/dwarf-unwind.c b/tools/perf/arch/x86/tests/dwarf-unwind.c
new file mode 100644
index 00000000000..9f89f899ccc
--- /dev/null
+++ b/tools/perf/arch/x86/tests/dwarf-unwind.c
@@ -0,0 +1,60 @@
+#include <string.h>
+#include "perf_regs.h"
+#include "thread.h"
+#include "map.h"
+#include "event.h"
+#include "tests/tests.h"
+
+#define STACK_SIZE 8192
+
+static int sample_ustack(struct perf_sample *sample,
+ struct thread *thread, u64 *regs)
+{
+ struct stack_dump *stack = &sample->user_stack;
+ struct map *map;
+ unsigned long sp;
+ u64 stack_size, *buf;
+
+ buf = malloc(STACK_SIZE);
+ if (!buf) {
+ pr_debug("failed to allocate sample uregs data\n");
+ return -1;
+ }
+
+ sp = (unsigned long) regs[PERF_REG_X86_SP];
+
+ map = map_groups__find(thread->mg, MAP__VARIABLE, (u64) sp);
+ if (!map) {
+ pr_debug("failed to get stack map\n");
+ free(buf);
+ return -1;
+ }
+
+ stack_size = map->end - sp;
+ stack_size = stack_size > STACK_SIZE ? STACK_SIZE : stack_size;
+
+ memcpy(buf, (void *) sp, stack_size);
+ stack->data = (char *) buf;
+ stack->size = stack_size;
+ return 0;
+}
+
+int test__arch_unwind_sample(struct perf_sample *sample,
+ struct thread *thread)
+{
+ struct regs_dump *regs = &sample->user_regs;
+ u64 *buf;
+
+ buf = malloc(sizeof(u64) * PERF_REGS_MAX);
+ if (!buf) {
+ pr_debug("failed to allocate sample uregs data\n");
+ return -1;
+ }
+
+ perf_regs_load(buf);
+ regs->abi = PERF_SAMPLE_REGS_ABI;
+ regs->regs = buf;
+ regs->mask = PERF_REGS_MASK;
+
+ return sample_ustack(sample, thread, buf);
+}
diff --git a/tools/perf/arch/x86/tests/regs_load.S b/tools/perf/arch/x86/tests/regs_load.S
new file mode 100644
index 00000000000..60875d5c556
--- /dev/null
+++ b/tools/perf/arch/x86/tests/regs_load.S
@@ -0,0 +1,98 @@
+#include <linux/linkage.h>
+
+#define AX 0
+#define BX 1 * 8
+#define CX 2 * 8
+#define DX 3 * 8
+#define SI 4 * 8
+#define DI 5 * 8
+#define BP 6 * 8
+#define SP 7 * 8
+#define IP 8 * 8
+#define FLAGS 9 * 8
+#define CS 10 * 8
+#define SS 11 * 8
+#define DS 12 * 8
+#define ES 13 * 8
+#define FS 14 * 8
+#define GS 15 * 8
+#define R8 16 * 8
+#define R9 17 * 8
+#define R10 18 * 8
+#define R11 19 * 8
+#define R12 20 * 8
+#define R13 21 * 8
+#define R14 22 * 8
+#define R15 23 * 8
+
+.text
+#ifdef HAVE_ARCH_X86_64_SUPPORT
+ENTRY(perf_regs_load)
+ movq %rax, AX(%rdi)
+ movq %rbx, BX(%rdi)
+ movq %rcx, CX(%rdi)
+ movq %rdx, DX(%rdi)
+ movq %rsi, SI(%rdi)
+ movq %rdi, DI(%rdi)
+ movq %rbp, BP(%rdi)
+
+ leaq 8(%rsp), %rax /* exclude this call. */
+ movq %rax, SP(%rdi)
+
+ movq 0(%rsp), %rax
+ movq %rax, IP(%rdi)
+
+ movq $0, FLAGS(%rdi)
+ movq $0, CS(%rdi)
+ movq $0, SS(%rdi)
+ movq $0, DS(%rdi)
+ movq $0, ES(%rdi)
+ movq $0, FS(%rdi)
+ movq $0, GS(%rdi)
+
+ movq %r8, R8(%rdi)
+ movq %r9, R9(%rdi)
+ movq %r10, R10(%rdi)
+ movq %r11, R11(%rdi)
+ movq %r12, R12(%rdi)
+ movq %r13, R13(%rdi)
+ movq %r14, R14(%rdi)
+ movq %r15, R15(%rdi)
+ ret
+ENDPROC(perf_regs_load)
+#else
+ENTRY(perf_regs_load)
+ push %edi
+ movl 8(%esp), %edi
+ movl %eax, AX(%edi)
+ movl %ebx, BX(%edi)
+ movl %ecx, CX(%edi)
+ movl %edx, DX(%edi)
+ movl %esi, SI(%edi)
+ pop %eax
+ movl %eax, DI(%edi)
+ movl %ebp, BP(%edi)
+
+ leal 4(%esp), %eax /* exclude this call. */
+ movl %eax, SP(%edi)
+
+ movl 0(%esp), %eax
+ movl %eax, IP(%edi)
+
+ movl $0, FLAGS(%edi)
+ movl $0, CS(%edi)
+ movl $0, SS(%edi)
+ movl $0, DS(%edi)
+ movl $0, ES(%edi)
+ movl $0, FS(%edi)
+ movl $0, GS(%edi)
+ ret
+ENDPROC(perf_regs_load)
+#endif
+
+/*
+ * We need to provide note.GNU-stack section, saying that we want
+ * NOT executable stack. Otherwise the final linking will assume that
+ * the ELF stack should not be restricted at all and set it RWX.
+ */
+.section .note.GNU-stack,"",@progbits
diff --git a/tools/perf/arch/x86/util/tsc.c b/tools/perf/arch/x86/util/tsc.c
new file mode 100644
index 00000000000..40021fa3129
--- /dev/null
+++ b/tools/perf/arch/x86/util/tsc.c
@@ -0,0 +1,59 @@
+#include <stdbool.h>
+#include <errno.h>
+
+#include <linux/perf_event.h>
+
+#include "../../perf.h"
+#include <linux/types.h>
+#include "../../util/debug.h"
+#include "tsc.h"
+
+u64 perf_time_to_tsc(u64 ns, struct perf_tsc_conversion *tc)
+{
+ u64 t, quot, rem;
+
+ t = ns - tc->time_zero;
+ quot = t / tc->time_mult;
+ rem = t % tc->time_mult;
+ return (quot << tc->time_shift) +
+ (rem << tc->time_shift) / tc->time_mult;
+}
+
+u64 tsc_to_perf_time(u64 cyc, struct perf_tsc_conversion *tc)
+{
+ u64 quot, rem;
+
+ quot = cyc >> tc->time_shift;
+ rem = cyc & ((1 << tc->time_shift) - 1);
+ return tc->time_zero + quot * tc->time_mult +
+ ((rem * tc->time_mult) >> tc->time_shift);
+}
+
+int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc,
+ struct perf_tsc_conversion *tc)
+{
+ bool cap_user_time_zero;
+ u32 seq;
+ int i = 0;
+
+ while (1) {
+ seq = pc->lock;
+ rmb();
+ tc->time_mult = pc->time_mult;
+ tc->time_shift = pc->time_shift;
+ tc->time_zero = pc->time_zero;
+ cap_user_time_zero = pc->cap_user_time_zero;
+ rmb();
+ if (pc->lock == seq && !(seq & 1))
+ break;
+ if (++i > 10000) {
+ pr_debug("failed to get perf_event_mmap_page lock\n");
+ return -EINVAL;
+ }
+ }
+
+ if (!cap_user_time_zero)
+ return -EOPNOTSUPP;
+
+ return 0;
+}
diff --git a/tools/perf/arch/x86/util/tsc.h b/tools/perf/arch/x86/util/tsc.h
new file mode 100644
index 00000000000..2affe0366b5
--- /dev/null
+++ b/tools/perf/arch/x86/util/tsc.h
@@ -0,0 +1,20 @@
+#ifndef TOOLS_PERF_ARCH_X86_UTIL_TSC_H__
+#define TOOLS_PERF_ARCH_X86_UTIL_TSC_H__
+
+#include <linux/types.h>
+
+struct perf_tsc_conversion {
+ u16 time_shift;
+ u32 time_mult;
+ u64 time_zero;
+};
+
+struct perf_event_mmap_page;
+
+int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc,
+ struct perf_tsc_conversion *tc);
+
+u64 perf_time_to_tsc(u64 ns, struct perf_tsc_conversion *tc);
+u64 tsc_to_perf_time(u64 cyc, struct perf_tsc_conversion *tc);
+
+#endif /* TOOLS_PERF_ARCH_X86_UTIL_TSC_H__ */
diff --git a/tools/perf/arch/x86/util/unwind-libdw.c b/tools/perf/arch/x86/util/unwind-libdw.c
new file mode 100644
index 00000000000..c4b72176ca8
--- /dev/null
+++ b/tools/perf/arch/x86/util/unwind-libdw.c
@@ -0,0 +1,51 @@
+#include <elfutils/libdwfl.h>
+#include "../../util/unwind-libdw.h"
+#include "../../util/perf_regs.h"
+
+bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg)
+{
+ struct unwind_info *ui = arg;
+ struct regs_dump *user_regs = &ui->sample->user_regs;
+ Dwarf_Word dwarf_regs[17];
+ unsigned nregs;
+
+#define REG(r) ({ \
+ Dwarf_Word val = 0; \
+ perf_reg_value(&val, user_regs, PERF_REG_X86_##r); \
+ val; \
+})
+
+ if (user_regs->abi == PERF_SAMPLE_REGS_ABI_32) {
+ dwarf_regs[0] = REG(AX);
+ dwarf_regs[1] = REG(CX);
+ dwarf_regs[2] = REG(DX);
+ dwarf_regs[3] = REG(BX);
+ dwarf_regs[4] = REG(SP);
+ dwarf_regs[5] = REG(BP);
+ dwarf_regs[6] = REG(SI);
+ dwarf_regs[7] = REG(DI);
+ dwarf_regs[8] = REG(IP);
+ nregs = 9;
+ } else {
+ dwarf_regs[0] = REG(AX);
+ dwarf_regs[1] = REG(DX);
+ dwarf_regs[2] = REG(CX);
+ dwarf_regs[3] = REG(BX);
+ dwarf_regs[4] = REG(SI);
+ dwarf_regs[5] = REG(DI);
+ dwarf_regs[6] = REG(BP);
+ dwarf_regs[7] = REG(SP);
+ dwarf_regs[8] = REG(R8);
+ dwarf_regs[9] = REG(R9);
+ dwarf_regs[10] = REG(R10);
+ dwarf_regs[11] = REG(R11);
+ dwarf_regs[12] = REG(R12);
+ dwarf_regs[13] = REG(R13);
+ dwarf_regs[14] = REG(R14);
+ dwarf_regs[15] = REG(R15);
+ dwarf_regs[16] = REG(IP);
+ nregs = 17;
+ }
+
+ return dwfl_thread_state_registers(thread, 0, nregs, dwarf_regs);
+}
diff --git a/tools/perf/arch/x86/util/unwind.c b/tools/perf/arch/x86/util/unwind-libunwind.c
index 78d956eff96..3261f68c6a7 100644
--- a/tools/perf/arch/x86/util/unwind.c
+++ b/tools/perf/arch/x86/util/unwind-libunwind.c
@@ -4,8 +4,8 @@
#include "perf_regs.h"
#include "../../util/unwind.h"
-#ifdef ARCH_X86_64
-int unwind__arch_reg_id(int regnum)
+#ifdef HAVE_ARCH_X86_64_SUPPORT
+int libunwind__arch_reg_id(int regnum)
{
int id;
@@ -69,7 +69,7 @@ int unwind__arch_reg_id(int regnum)
return id;
}
#else
-int unwind__arch_reg_id(int regnum)
+int libunwind__arch_reg_id(int regnum)
{
int id;
@@ -108,4 +108,4 @@ int unwind__arch_reg_id(int regnum)
return id;
}
-#endif /* ARCH_X86_64 */
+#endif /* HAVE_ARCH_X86_64_SUPPORT */
diff --git a/tools/perf/bash_completion b/tools/perf/bash_completion
deleted file mode 100644
index 56e6a12aab5..00000000000
--- a/tools/perf/bash_completion
+++ /dev/null
@@ -1,62 +0,0 @@
-# perf completion
-
-function_exists()
-{
- declare -F $1 > /dev/null
- return $?
-}
-
-function_exists __ltrim_colon_completions ||
-__ltrim_colon_completions()
-{
- if [[ "$1" == *:* && "$COMP_WORDBREAKS" == *:* ]]; then
- # Remove colon-word prefix from COMPREPLY items
- local colon_word=${1%${1##*:}}
- local i=${#COMPREPLY[*]}
- while [[ $((--i)) -ge 0 ]]; do
- COMPREPLY[$i]=${COMPREPLY[$i]#"$colon_word"}
- done
- fi
-}
-
-have perf &&
-_perf()
-{
- local cur prev cmd
-
- COMPREPLY=()
- if function_exists _get_comp_words_by_ref; then
- _get_comp_words_by_ref -n : cur prev
- else
- cur=$(_get_cword :)
- prev=${COMP_WORDS[COMP_CWORD-1]}
- fi
-
- cmd=${COMP_WORDS[0]}
-
- # List perf subcommands or long options
- if [ $COMP_CWORD -eq 1 ]; then
- if [[ $cur == --* ]]; then
- COMPREPLY=( $( compgen -W '--help --version \
- --exec-path --html-path --paginate --no-pager \
- --perf-dir --work-tree --debugfs-dir' -- "$cur" ) )
- else
- cmds=$($cmd --list-cmds)
- COMPREPLY=( $( compgen -W '$cmds' -- "$cur" ) )
- fi
- # List possible events for -e option
- elif [[ $prev == "-e" && "${COMP_WORDS[1]}" == @(record|stat|top) ]]; then
- evts=$($cmd list --raw-dump)
- COMPREPLY=( $( compgen -W '$evts' -- "$cur" ) )
- __ltrim_colon_completions $cur
- # List long option names
- elif [[ $cur == --* ]]; then
- subcmd=${COMP_WORDS[1]}
- opts=$($cmd $subcmd --list-opts)
- COMPREPLY=( $( compgen -W '$opts' -- "$cur" ) )
- # Fall down to list regular files
- else
- _filedir
- fi
-} &&
-complete -F _perf perf
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
index 0fdc85269c4..eba46709b27 100644
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -31,6 +31,9 @@ extern int bench_sched_pipe(int argc, const char **argv, const char *prefix);
extern int bench_mem_memcpy(int argc, const char **argv,
const char *prefix __maybe_unused);
extern int bench_mem_memset(int argc, const char **argv, const char *prefix);
+extern int bench_futex_hash(int argc, const char **argv, const char *prefix);
+extern int bench_futex_wake(int argc, const char **argv, const char *prefix);
+extern int bench_futex_requeue(int argc, const char **argv, const char *prefix);
#define BENCH_FORMAT_DEFAULT_STR "default"
#define BENCH_FORMAT_DEFAULT 0
diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c
new file mode 100644
index 00000000000..a84206e9c4a
--- /dev/null
+++ b/tools/perf/bench/futex-hash.c
@@ -0,0 +1,212 @@
+/*
+ * Copyright (C) 2013 Davidlohr Bueso <davidlohr@hp.com>
+ *
+ * futex-hash: Stress the hell out of the Linux kernel futex uaddr hashing.
+ *
+ * This program is particularly useful for measuring the kernel's futex hash
+ * table/function implementation. In order for it to make sense, use with as
+ * many threads and futexes as possible.
+ */
+
+#include "../perf.h"
+#include "../util/util.h"
+#include "../util/stat.h"
+#include "../util/parse-options.h"
+#include "../util/header.h"
+#include "bench.h"
+#include "futex.h"
+
+#include <err.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <pthread.h>
+
+static unsigned int nthreads = 0;
+static unsigned int nsecs = 10;
+/* amount of futexes per thread */
+static unsigned int nfutexes = 1024;
+static bool fshared = false, done = false, silent = false;
+
+struct timeval start, end, runtime;
+static pthread_mutex_t thread_lock;
+static unsigned int threads_starting;
+static struct stats throughput_stats;
+static pthread_cond_t thread_parent, thread_worker;
+
+struct worker {
+ int tid;
+ u_int32_t *futex;
+ pthread_t thread;
+ unsigned long ops;
+};
+
+static const struct option options[] = {
+ OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"),
+ OPT_UINTEGER('r', "runtime", &nsecs, "Specify runtime (in seconds)"),
+ OPT_UINTEGER('f', "futexes", &nfutexes, "Specify amount of futexes per threads"),
+ OPT_BOOLEAN( 's', "silent", &silent, "Silent mode: do not display data/details"),
+ OPT_BOOLEAN( 'S', "shared", &fshared, "Use shared futexes instead of private ones"),
+ OPT_END()
+};
+
+static const char * const bench_futex_hash_usage[] = {
+ "perf bench futex hash <options>",
+ NULL
+};
+
+static void *workerfn(void *arg)
+{
+ int ret;
+ unsigned int i;
+ struct worker *w = (struct worker *) arg;
+
+ pthread_mutex_lock(&thread_lock);
+ threads_starting--;
+ if (!threads_starting)
+ pthread_cond_signal(&thread_parent);
+ pthread_cond_wait(&thread_worker, &thread_lock);
+ pthread_mutex_unlock(&thread_lock);
+
+ do {
+ for (i = 0; i < nfutexes; i++, w->ops++) {
+ /*
+ * We want the futex calls to fail in order to stress
+ * the hashing of uaddr and not measure other steps,
+ * such as internal waitqueue handling, thus enlarging
+ * the critical region protected by hb->lock.
+ */
+ ret = futex_wait(&w->futex[i], 1234, NULL,
+ fshared ? 0 : FUTEX_PRIVATE_FLAG);
+ if (!silent &&
+ (!ret || errno != EAGAIN || errno != EWOULDBLOCK))
+ warn("Non-expected futex return call");
+ }
+ } while (!done);
+
+ return NULL;
+}
+
+static void toggle_done(int sig __maybe_unused,
+ siginfo_t *info __maybe_unused,
+ void *uc __maybe_unused)
+{
+ /* inform all threads that we're done for the day */
+ done = true;
+ gettimeofday(&end, NULL);
+ timersub(&end, &start, &runtime);
+}
+
+static void print_summary(void)
+{
+ unsigned long avg = avg_stats(&throughput_stats);
+ double stddev = stddev_stats(&throughput_stats);
+
+ printf("%sAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n",
+ !silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg),
+ (int) runtime.tv_sec);
+}
+
+int bench_futex_hash(int argc, const char **argv,
+ const char *prefix __maybe_unused)
+{
+ int ret = 0;
+ cpu_set_t cpu;
+ struct sigaction act;
+ unsigned int i, ncpus;
+ pthread_attr_t thread_attr;
+ struct worker *worker = NULL;
+
+ argc = parse_options(argc, argv, options, bench_futex_hash_usage, 0);
+ if (argc) {
+ usage_with_options(bench_futex_hash_usage, options);
+ exit(EXIT_FAILURE);
+ }
+
+ ncpus = sysconf(_SC_NPROCESSORS_ONLN);
+
+ sigfillset(&act.sa_mask);
+ act.sa_sigaction = toggle_done;
+ sigaction(SIGINT, &act, NULL);
+
+ if (!nthreads) /* default to the number of CPUs */
+ nthreads = ncpus;
+
+ worker = calloc(nthreads, sizeof(*worker));
+ if (!worker)
+ goto errmem;
+
+ printf("Run summary [PID %d]: %d threads, each operating on %d [%s] futexes for %d secs.\n\n",
+ getpid(), nthreads, nfutexes, fshared ? "shared":"private", nsecs);
+
+ init_stats(&throughput_stats);
+ pthread_mutex_init(&thread_lock, NULL);
+ pthread_cond_init(&thread_parent, NULL);
+ pthread_cond_init(&thread_worker, NULL);
+
+ threads_starting = nthreads;
+ pthread_attr_init(&thread_attr);
+ gettimeofday(&start, NULL);
+ for (i = 0; i < nthreads; i++) {
+ worker[i].tid = i;
+ worker[i].futex = calloc(nfutexes, sizeof(*worker[i].futex));
+ if (!worker[i].futex)
+ goto errmem;
+
+ CPU_ZERO(&cpu);
+ CPU_SET(i % ncpus, &cpu);
+
+ ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu);
+ if (ret)
+ err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
+
+ ret = pthread_create(&worker[i].thread, &thread_attr, workerfn,
+ (void *)(struct worker *) &worker[i]);
+ if (ret)
+ err(EXIT_FAILURE, "pthread_create");
+
+ }
+ pthread_attr_destroy(&thread_attr);
+
+ pthread_mutex_lock(&thread_lock);
+ while (threads_starting)
+ pthread_cond_wait(&thread_parent, &thread_lock);
+ pthread_cond_broadcast(&thread_worker);
+ pthread_mutex_unlock(&thread_lock);
+
+ sleep(nsecs);
+ toggle_done(0, NULL, NULL);
+
+ for (i = 0; i < nthreads; i++) {
+ ret = pthread_join(worker[i].thread, NULL);
+ if (ret)
+ err(EXIT_FAILURE, "pthread_join");
+ }
+
+ /* cleanup & report results */
+ pthread_cond_destroy(&thread_parent);
+ pthread_cond_destroy(&thread_worker);
+ pthread_mutex_destroy(&thread_lock);
+
+ for (i = 0; i < nthreads; i++) {
+ unsigned long t = worker[i].ops/runtime.tv_sec;
+ update_stats(&throughput_stats, t);
+ if (!silent) {
+ if (nfutexes == 1)
+ printf("[thread %2d] futex: %p [ %ld ops/sec ]\n",
+ worker[i].tid, &worker[i].futex[0], t);
+ else
+ printf("[thread %2d] futexes: %p ... %p [ %ld ops/sec ]\n",
+ worker[i].tid, &worker[i].futex[0],
+ &worker[i].futex[nfutexes-1], t);
+ }
+
+ free(worker[i].futex);
+ }
+
+ print_summary();
+
+ free(worker);
+ return ret;
+errmem:
+ err(EXIT_FAILURE, "calloc");
+}
diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c
new file mode 100644
index 00000000000..a16255876f1
--- /dev/null
+++ b/tools/perf/bench/futex-requeue.c
@@ -0,0 +1,211 @@
+/*
+ * Copyright (C) 2013 Davidlohr Bueso <davidlohr@hp.com>
+ *
+ * futex-requeue: Block a bunch of threads on futex1 and requeue them
+ * on futex2, N at a time.
+ *
+ * This program is particularly useful to measure the latency of nthread
+ * requeues without waking up any tasks -- thus mimicking a regular futex_wait.
+ */
+
+#include "../perf.h"
+#include "../util/util.h"
+#include "../util/stat.h"
+#include "../util/parse-options.h"
+#include "../util/header.h"
+#include "bench.h"
+#include "futex.h"
+
+#include <err.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <pthread.h>
+
+static u_int32_t futex1 = 0, futex2 = 0;
+
+/*
+ * How many tasks to requeue at a time.
+ * Default to 1 in order to make the kernel work more.
+ */
+static unsigned int nrequeue = 1;
+
+/*
+ * There can be significant variance from run to run,
+ * the more repeats, the more exact the overall avg and
+ * the better idea of the futex latency.
+ */
+static unsigned int repeat = 10;
+
+static pthread_t *worker;
+static bool done = 0, silent = 0;
+static pthread_mutex_t thread_lock;
+static pthread_cond_t thread_parent, thread_worker;
+static struct stats requeuetime_stats, requeued_stats;
+static unsigned int ncpus, threads_starting, nthreads = 0;
+
+static const struct option options[] = {
+ OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"),
+ OPT_UINTEGER('q', "nrequeue", &nrequeue, "Specify amount of threads to requeue at once"),
+ OPT_UINTEGER('r', "repeat", &repeat, "Specify amount of times to repeat the run"),
+ OPT_BOOLEAN( 's', "silent", &silent, "Silent mode: do not display data/details"),
+ OPT_END()
+};
+
+static const char * const bench_futex_requeue_usage[] = {
+ "perf bench futex requeue <options>",
+ NULL
+};
+
+static void print_summary(void)
+{
+ double requeuetime_avg = avg_stats(&requeuetime_stats);
+ double requeuetime_stddev = stddev_stats(&requeuetime_stats);
+ unsigned int requeued_avg = avg_stats(&requeued_stats);
+
+ printf("Requeued %d of %d threads in %.4f ms (+-%.2f%%)\n",
+ requeued_avg,
+ nthreads,
+ requeuetime_avg/1e3,
+ rel_stddev_stats(requeuetime_stddev, requeuetime_avg));
+}
+
+static void *workerfn(void *arg __maybe_unused)
+{
+ pthread_mutex_lock(&thread_lock);
+ threads_starting--;
+ if (!threads_starting)
+ pthread_cond_signal(&thread_parent);
+ pthread_cond_wait(&thread_worker, &thread_lock);
+ pthread_mutex_unlock(&thread_lock);
+
+ futex_wait(&futex1, 0, NULL, FUTEX_PRIVATE_FLAG);
+ return NULL;
+}
+
+static void block_threads(pthread_t *w,
+ pthread_attr_t thread_attr)
+{
+ cpu_set_t cpu;
+ unsigned int i;
+
+ threads_starting = nthreads;
+
+ /* create and block all threads */
+ for (i = 0; i < nthreads; i++) {
+ CPU_ZERO(&cpu);
+ CPU_SET(i % ncpus, &cpu);
+
+ if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu))
+ err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
+
+ if (pthread_create(&w[i], &thread_attr, workerfn, NULL))
+ err(EXIT_FAILURE, "pthread_create");
+ }
+}
+
+static void toggle_done(int sig __maybe_unused,
+ siginfo_t *info __maybe_unused,
+ void *uc __maybe_unused)
+{
+ done = true;
+}
+
+int bench_futex_requeue(int argc, const char **argv,
+ const char *prefix __maybe_unused)
+{
+ int ret = 0;
+ unsigned int i, j;
+ struct sigaction act;
+ pthread_attr_t thread_attr;
+
+ argc = parse_options(argc, argv, options, bench_futex_requeue_usage, 0);
+ if (argc)
+ goto err;
+
+ ncpus = sysconf(_SC_NPROCESSORS_ONLN);
+
+ sigfillset(&act.sa_mask);
+ act.sa_sigaction = toggle_done;
+ sigaction(SIGINT, &act, NULL);
+
+ if (!nthreads)
+ nthreads = ncpus;
+
+ worker = calloc(nthreads, sizeof(*worker));
+ if (!worker)
+ err(EXIT_FAILURE, "calloc");
+
+ printf("Run summary [PID %d]: Requeuing %d threads (from %p to %p), "
+ "%d at a time.\n\n",
+ getpid(), nthreads, &futex1, &futex2, nrequeue);
+
+ init_stats(&requeued_stats);
+ init_stats(&requeuetime_stats);
+ pthread_attr_init(&thread_attr);
+ pthread_mutex_init(&thread_lock, NULL);
+ pthread_cond_init(&thread_parent, NULL);
+ pthread_cond_init(&thread_worker, NULL);
+
+ for (j = 0; j < repeat && !done; j++) {
+ unsigned int nrequeued = 0;
+ struct timeval start, end, runtime;
+
+ /* create, launch & block all threads */
+ block_threads(worker, thread_attr);
+
+ /* make sure all threads are already blocked */
+ pthread_mutex_lock(&thread_lock);
+ while (threads_starting)
+ pthread_cond_wait(&thread_parent, &thread_lock);
+ pthread_cond_broadcast(&thread_worker);
+ pthread_mutex_unlock(&thread_lock);
+
+ usleep(100000);
+
+ /* Ok, all threads are patiently blocked, start requeueing */
+ gettimeofday(&start, NULL);
+ for (nrequeued = 0; nrequeued < nthreads; nrequeued += nrequeue)
+ /*
+ * Do not wakeup any tasks blocked on futex1, allowing
+ * us to really measure futex_wait functionality.
+ */
+ futex_cmp_requeue(&futex1, 0, &futex2, 0, nrequeue,
+ FUTEX_PRIVATE_FLAG);
+ gettimeofday(&end, NULL);
+ timersub(&end, &start, &runtime);
+
+ update_stats(&requeued_stats, nrequeued);
+ update_stats(&requeuetime_stats, runtime.tv_usec);
+
+ if (!silent) {
+ printf("[Run %d]: Requeued %d of %d threads in %.4f ms\n",
+ j + 1, nrequeued, nthreads, runtime.tv_usec/1e3);
+ }
+
+ /* everybody should be blocked on futex2, wake'em up */
+ nrequeued = futex_wake(&futex2, nthreads, FUTEX_PRIVATE_FLAG);
+ if (nthreads != nrequeued)
+ warnx("couldn't wakeup all tasks (%d/%d)", nrequeued, nthreads);
+
+ for (i = 0; i < nthreads; i++) {
+ ret = pthread_join(worker[i], NULL);
+ if (ret)
+ err(EXIT_FAILURE, "pthread_join");
+ }
+
+ }
+
+ /* cleanup & report results */
+ pthread_cond_destroy(&thread_parent);
+ pthread_cond_destroy(&thread_worker);
+ pthread_mutex_destroy(&thread_lock);
+ pthread_attr_destroy(&thread_attr);
+
+ print_summary();
+
+ free(worker);
+ return ret;
+err:
+ usage_with_options(bench_futex_requeue_usage, options);
+ exit(EXIT_FAILURE);
+}
diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c
new file mode 100644
index 00000000000..d096169b161
--- /dev/null
+++ b/tools/perf/bench/futex-wake.c
@@ -0,0 +1,201 @@
+/*
+ * Copyright (C) 2013 Davidlohr Bueso <davidlohr@hp.com>
+ *
+ * futex-wake: Block a bunch of threads on a futex and wake'em up, N at a time.
+ *
+ * This program is particularly useful to measure the latency of nthread wakeups
+ * in non-error situations: all waiters are queued and all wake calls wakeup
+ * one or more tasks, and thus the waitqueue is never empty.
+ */
+
+#include "../perf.h"
+#include "../util/util.h"
+#include "../util/stat.h"
+#include "../util/parse-options.h"
+#include "../util/header.h"
+#include "bench.h"
+#include "futex.h"
+
+#include <err.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <pthread.h>
+
+/* all threads will block on the same futex */
+static u_int32_t futex1 = 0;
+
+/*
+ * How many wakeups to do at a time.
+ * Default to 1 in order to make the kernel work more.
+ */
+static unsigned int nwakes = 1;
+
+/*
+ * There can be significant variance from run to run,
+ * the more repeats, the more exact the overall avg and
+ * the better idea of the futex latency.
+ */
+static unsigned int repeat = 10;
+
+pthread_t *worker;
+static bool done = 0, silent = 0;
+static pthread_mutex_t thread_lock;
+static pthread_cond_t thread_parent, thread_worker;
+static struct stats waketime_stats, wakeup_stats;
+static unsigned int ncpus, threads_starting, nthreads = 0;
+
+static const struct option options[] = {
+ OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"),
+ OPT_UINTEGER('w', "nwakes", &nwakes, "Specify amount of threads to wake at once"),
+ OPT_UINTEGER('r', "repeat", &repeat, "Specify amount of times to repeat the run"),
+ OPT_BOOLEAN( 's', "silent", &silent, "Silent mode: do not display data/details"),
+ OPT_END()
+};
+
+static const char * const bench_futex_wake_usage[] = {
+ "perf bench futex wake <options>",
+ NULL
+};
+
+static void *workerfn(void *arg __maybe_unused)
+{
+ pthread_mutex_lock(&thread_lock);
+ threads_starting--;
+ if (!threads_starting)
+ pthread_cond_signal(&thread_parent);
+ pthread_cond_wait(&thread_worker, &thread_lock);
+ pthread_mutex_unlock(&thread_lock);
+
+ futex_wait(&futex1, 0, NULL, FUTEX_PRIVATE_FLAG);
+ return NULL;
+}
+
+static void print_summary(void)
+{
+ double waketime_avg = avg_stats(&waketime_stats);
+ double waketime_stddev = stddev_stats(&waketime_stats);
+ unsigned int wakeup_avg = avg_stats(&wakeup_stats);
+
+ printf("Wokeup %d of %d threads in %.4f ms (+-%.2f%%)\n",
+ wakeup_avg,
+ nthreads,
+ waketime_avg/1e3,
+ rel_stddev_stats(waketime_stddev, waketime_avg));
+}
+
+static void block_threads(pthread_t *w,
+ pthread_attr_t thread_attr)
+{
+ cpu_set_t cpu;
+ unsigned int i;
+
+ threads_starting = nthreads;
+
+ /* create and block all threads */
+ for (i = 0; i < nthreads; i++) {
+ CPU_ZERO(&cpu);
+ CPU_SET(i % ncpus, &cpu);
+
+ if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu))
+ err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
+
+ if (pthread_create(&w[i], &thread_attr, workerfn, NULL))
+ err(EXIT_FAILURE, "pthread_create");
+ }
+}
+
+static void toggle_done(int sig __maybe_unused,
+ siginfo_t *info __maybe_unused,
+ void *uc __maybe_unused)
+{
+ done = true;
+}
+
+int bench_futex_wake(int argc, const char **argv,
+ const char *prefix __maybe_unused)
+{
+ int ret = 0;
+ unsigned int i, j;
+ struct sigaction act;
+ pthread_attr_t thread_attr;
+
+ argc = parse_options(argc, argv, options, bench_futex_wake_usage, 0);
+ if (argc) {
+ usage_with_options(bench_futex_wake_usage, options);
+ exit(EXIT_FAILURE);
+ }
+
+ ncpus = sysconf(_SC_NPROCESSORS_ONLN);
+
+ sigfillset(&act.sa_mask);
+ act.sa_sigaction = toggle_done;
+ sigaction(SIGINT, &act, NULL);
+
+ if (!nthreads)
+ nthreads = ncpus;
+
+ worker = calloc(nthreads, sizeof(*worker));
+ if (!worker)
+ err(EXIT_FAILURE, "calloc");
+
+ printf("Run summary [PID %d]: blocking on %d threads (at futex %p), "
+ "waking up %d at a time.\n\n",
+ getpid(), nthreads, &futex1, nwakes);
+
+ init_stats(&wakeup_stats);
+ init_stats(&waketime_stats);
+ pthread_attr_init(&thread_attr);
+ pthread_mutex_init(&thread_lock, NULL);
+ pthread_cond_init(&thread_parent, NULL);
+ pthread_cond_init(&thread_worker, NULL);
+
+ for (j = 0; j < repeat && !done; j++) {
+ unsigned int nwoken = 0;
+ struct timeval start, end, runtime;
+
+ /* create, launch & block all threads */
+ block_threads(worker, thread_attr);
+
+ /* make sure all threads are already blocked */
+ pthread_mutex_lock(&thread_lock);
+ while (threads_starting)
+ pthread_cond_wait(&thread_parent, &thread_lock);
+ pthread_cond_broadcast(&thread_worker);
+ pthread_mutex_unlock(&thread_lock);
+
+ usleep(100000);
+
+ /* Ok, all threads are patiently blocked, start waking folks up */
+ gettimeofday(&start, NULL);
+ while (nwoken != nthreads)
+ nwoken += futex_wake(&futex1, nwakes, FUTEX_PRIVATE_FLAG);
+ gettimeofday(&end, NULL);
+ timersub(&end, &start, &runtime);
+
+ update_stats(&wakeup_stats, nwoken);
+ update_stats(&waketime_stats, runtime.tv_usec);
+
+ if (!silent) {
+ printf("[Run %d]: Wokeup %d of %d threads in %.4f ms\n",
+ j + 1, nwoken, nthreads, runtime.tv_usec/1e3);
+ }
+
+ for (i = 0; i < nthreads; i++) {
+ ret = pthread_join(worker[i], NULL);
+ if (ret)
+ err(EXIT_FAILURE, "pthread_join");
+ }
+
+ }
+
+ /* cleanup & report results */
+ pthread_cond_destroy(&thread_parent);
+ pthread_cond_destroy(&thread_worker);
+ pthread_mutex_destroy(&thread_lock);
+ pthread_attr_destroy(&thread_attr);
+
+ print_summary();
+
+ free(worker);
+ return ret;
+}
diff --git a/tools/perf/bench/futex.h b/tools/perf/bench/futex.h
new file mode 100644
index 00000000000..71f2844cf97
--- /dev/null
+++ b/tools/perf/bench/futex.h
@@ -0,0 +1,71 @@
+/*
+ * Glibc independent futex library for testing kernel functionality.
+ * Shamelessly stolen from Darren Hart <dvhltc@us.ibm.com>
+ * http://git.kernel.org/cgit/linux/kernel/git/dvhart/futextest.git/
+ */
+
+#ifndef _FUTEX_H
+#define _FUTEX_H
+
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <linux/futex.h>
+
+/**
+ * futex() - SYS_futex syscall wrapper
+ * @uaddr: address of first futex
+ * @op: futex op code
+ * @val: typically expected value of uaddr, but varies by op
+ * @timeout: typically an absolute struct timespec (except where noted
+ * otherwise). Overloaded by some ops
+ * @uaddr2: address of second futex for some ops\
+ * @val3: varies by op
+ * @opflags: flags to be bitwise OR'd with op, such as FUTEX_PRIVATE_FLAG
+ *
+ * futex() is used by all the following futex op wrappers. It can also be
+ * used for misuse and abuse testing. Generally, the specific op wrappers
+ * should be used instead. It is a macro instead of an static inline function as
+ * some of the types over overloaded (timeout is used for nr_requeue for
+ * example).
+ *
+ * These argument descriptions are the defaults for all
+ * like-named arguments in the following wrappers except where noted below.
+ */
+#define futex(uaddr, op, val, timeout, uaddr2, val3, opflags) \
+ syscall(SYS_futex, uaddr, op | opflags, val, timeout, uaddr2, val3)
+
+/**
+ * futex_wait() - block on uaddr with optional timeout
+ * @timeout: relative timeout
+ */
+static inline int
+futex_wait(u_int32_t *uaddr, u_int32_t val, struct timespec *timeout, int opflags)
+{
+ return futex(uaddr, FUTEX_WAIT, val, timeout, NULL, 0, opflags);
+}
+
+/**
+ * futex_wake() - wake one or more tasks blocked on uaddr
+ * @nr_wake: wake up to this many tasks
+ */
+static inline int
+futex_wake(u_int32_t *uaddr, int nr_wake, int opflags)
+{
+ return futex(uaddr, FUTEX_WAKE, nr_wake, NULL, NULL, 0, opflags);
+}
+
+/**
+* futex_cmp_requeue() - requeue tasks from uaddr to uaddr2
+* @nr_wake: wake up to this many tasks
+* @nr_requeue: requeue up to this many tasks
+*/
+static inline int
+futex_cmp_requeue(u_int32_t *uaddr, u_int32_t val, u_int32_t *uaddr2, int nr_wake,
+ int nr_requeue, int opflags)
+{
+ return futex(uaddr, FUTEX_CMP_REQUEUE, nr_wake, nr_requeue, uaddr2,
+ val, opflags);
+}
+
+#endif /* _FUTEX_H */
diff --git a/tools/perf/bench/mem-memcpy-arch.h b/tools/perf/bench/mem-memcpy-arch.h
index a72e36cb539..57b4ed87145 100644
--- a/tools/perf/bench/mem-memcpy-arch.h
+++ b/tools/perf/bench/mem-memcpy-arch.h
@@ -1,5 +1,5 @@
-#ifdef ARCH_X86_64
+#ifdef HAVE_ARCH_X86_64_SUPPORT
#define MEMCPY_FN(fn, name, desc) \
extern void *fn(void *, const void *, size_t);
diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c
index 93c83e3cb4a..5ce71d3b72c 100644
--- a/tools/perf/bench/mem-memcpy.c
+++ b/tools/perf/bench/mem-memcpy.c
@@ -58,7 +58,7 @@ struct routine routines[] = {
{ "default",
"Default memcpy() provided by glibc",
memcpy },
-#ifdef ARCH_X86_64
+#ifdef HAVE_ARCH_X86_64_SUPPORT
#define MEMCPY_FN(fn, name, desc) { name, desc, fn },
#include "mem-memcpy-x86-64-asm-def.h"
@@ -111,12 +111,14 @@ static double timeval2double(struct timeval *ts)
static void alloc_mem(void **dst, void **src, size_t length)
{
*dst = zalloc(length);
- if (!dst)
+ if (!*dst)
die("memory allocation failed - maybe length is too large?\n");
*src = zalloc(length);
- if (!src)
+ if (!*src)
die("memory allocation failed - maybe length is too large?\n");
+ /* Make sure to always replace the zero pages even if MMAP_THRESH is crossed */
+ memset(*src, 0, length);
}
static u64 do_memcpy_cycle(memcpy_t fn, size_t len, bool prefault)
diff --git a/tools/perf/bench/mem-memset-arch.h b/tools/perf/bench/mem-memset-arch.h
index a040fa77665..633800cb0dc 100644
--- a/tools/perf/bench/mem-memset-arch.h
+++ b/tools/perf/bench/mem-memset-arch.h
@@ -1,5 +1,5 @@
-#ifdef ARCH_X86_64
+#ifdef HAVE_ARCH_X86_64_SUPPORT
#define MEMSET_FN(fn, name, desc) \
extern void *fn(void *, int, size_t);
diff --git a/tools/perf/bench/mem-memset.c b/tools/perf/bench/mem-memset.c
index c6e4bc52349..9af79d2b18e 100644
--- a/tools/perf/bench/mem-memset.c
+++ b/tools/perf/bench/mem-memset.c
@@ -58,7 +58,7 @@ static const struct routine routines[] = {
{ "default",
"Default memset() provided by glibc",
memset },
-#ifdef ARCH_X86_64
+#ifdef HAVE_ARCH_X86_64_SUPPORT
#define MEMSET_FN(fn, name, desc) { name, desc, fn },
#include "mem-memset-x86-64-asm-def.h"
@@ -111,7 +111,7 @@ static double timeval2double(struct timeval *ts)
static void alloc_mem(void **dst, size_t length)
{
*dst = zalloc(length);
- if (!dst)
+ if (!*dst)
die("memory allocation failed - maybe length is too large?\n");
}
diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c
index 30d1c3225b4..ebfa163b80b 100644
--- a/tools/perf/bench/numa.c
+++ b/tools/perf/bench/numa.c
@@ -429,14 +429,14 @@ static int parse_cpu_list(const char *arg)
return 0;
}
-static void parse_setup_cpu_list(void)
+static int parse_setup_cpu_list(void)
{
struct thread_data *td;
char *str0, *str;
int t;
if (!g->p.cpu_list_str)
- return;
+ return 0;
dprintf("g->p.nr_tasks: %d\n", g->p.nr_tasks);
@@ -500,8 +500,12 @@ static void parse_setup_cpu_list(void)
dprintf("CPUs: %d_%d-%d#%dx%d\n", bind_cpu_0, bind_len, bind_cpu_1, step, mul);
- BUG_ON(bind_cpu_0 < 0 || bind_cpu_0 >= g->p.nr_cpus);
- BUG_ON(bind_cpu_1 < 0 || bind_cpu_1 >= g->p.nr_cpus);
+ if (bind_cpu_0 >= g->p.nr_cpus || bind_cpu_1 >= g->p.nr_cpus) {
+ printf("\nTest not applicable, system has only %d CPUs.\n", g->p.nr_cpus);
+ return -1;
+ }
+
+ BUG_ON(bind_cpu_0 < 0 || bind_cpu_1 < 0);
BUG_ON(bind_cpu_0 > bind_cpu_1);
for (bind_cpu = bind_cpu_0; bind_cpu <= bind_cpu_1; bind_cpu += step) {
@@ -541,6 +545,7 @@ out:
printf("# NOTE: %d tasks bound, %d tasks unbound\n", t, g->p.nr_tasks - t);
free(str0);
+ return 0;
}
static int parse_cpus_opt(const struct option *opt __maybe_unused,
@@ -561,14 +566,14 @@ static int parse_node_list(const char *arg)
return 0;
}
-static void parse_setup_node_list(void)
+static int parse_setup_node_list(void)
{
struct thread_data *td;
char *str0, *str;
int t;
if (!g->p.node_list_str)
- return;
+ return 0;
dprintf("g->p.nr_tasks: %d\n", g->p.nr_tasks);
@@ -619,8 +624,12 @@ static void parse_setup_node_list(void)
dprintf("NODEs: %d-%d #%d\n", bind_node_0, bind_node_1, step);
- BUG_ON(bind_node_0 < 0 || bind_node_0 >= g->p.nr_nodes);
- BUG_ON(bind_node_1 < 0 || bind_node_1 >= g->p.nr_nodes);
+ if (bind_node_0 >= g->p.nr_nodes || bind_node_1 >= g->p.nr_nodes) {
+ printf("\nTest not applicable, system has only %d nodes.\n", g->p.nr_nodes);
+ return -1;
+ }
+
+ BUG_ON(bind_node_0 < 0 || bind_node_1 < 0);
BUG_ON(bind_node_0 > bind_node_1);
for (bind_node = bind_node_0; bind_node <= bind_node_1; bind_node += step) {
@@ -651,6 +660,7 @@ out:
printf("# NOTE: %d tasks mem-bound, %d tasks unbound\n", t, g->p.nr_tasks - t);
free(str0);
+ return 0;
}
static int parse_nodes_opt(const struct option *opt __maybe_unused,
@@ -1110,7 +1120,7 @@ static void *worker_thread(void *__tdata)
/* Check whether our max runtime timed out: */
if (g->p.nr_secs) {
timersub(&stop, &start0, &diff);
- if (diff.tv_sec >= g->p.nr_secs) {
+ if ((u32)diff.tv_sec >= g->p.nr_secs) {
g->stop_work = true;
break;
}
@@ -1157,7 +1167,7 @@ static void *worker_thread(void *__tdata)
runtime_ns_max += diff.tv_usec * 1000;
if (details >= 0) {
- printf(" #%2d / %2d: %14.2lf nsecs/op [val: %016lx]\n",
+ printf(" #%2d / %2d: %14.2lf nsecs/op [val: %016"PRIx64"]\n",
process_nr, thread_nr, runtime_ns_max / bytes_done, val);
}
fflush(stdout);
@@ -1356,8 +1366,8 @@ static int init(void)
init_thread_data();
tprintf("#\n");
- parse_setup_cpu_list();
- parse_setup_node_list();
+ if (parse_setup_cpu_list() || parse_setup_node_list())
+ return -1;
tprintf("#\n");
print_summary();
@@ -1583,6 +1593,11 @@ static void init_params(struct params *p, const char *name, int argc, const char
p->data_rand_walk = true;
p->nr_loops = -1;
p->init_random = true;
+ p->mb_global_str = "1";
+ p->nr_proc = 1;
+ p->nr_threads = 1;
+ p->nr_secs = 5;
+ p->run_all = argc == 1;
}
static int run_bench_numa(const char *name, const char **argv)
@@ -1600,7 +1615,6 @@ static int run_bench_numa(const char *name, const char **argv)
return 0;
err:
- usage_with_options(numa_usage, options);
return -1;
}
@@ -1701,8 +1715,7 @@ static int bench_all(void)
BUG_ON(ret < 0);
for (i = 0; i < nr; i++) {
- if (run_bench_numa(tests[i][0], tests[i] + 1))
- return -1;
+ run_bench_numa(tests[i][0], tests[i] + 1);
}
printf("\n");
diff --git a/tools/perf/bench/sched-pipe.c b/tools/perf/bench/sched-pipe.c
index 69cfba8d4c6..07a8d7646a1 100644
--- a/tools/perf/bench/sched-pipe.c
+++ b/tools/perf/bench/sched-pipe.c
@@ -7,9 +7,7 @@
* Based on pipe-test-1m.c by Ingo Molnar <mingo@redhat.com>
* http://people.redhat.com/mingo/cfs-scheduler/tools/pipe-test-1m.c
* Ported to perf by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
- *
*/
-
#include "../perf.h"
#include "../util/util.h"
#include "../util/parse-options.h"
@@ -28,12 +26,24 @@
#include <sys/time.h>
#include <sys/types.h>
+#include <pthread.h>
+
+struct thread_data {
+ int nr;
+ int pipe_read;
+ int pipe_write;
+ pthread_t pthread;
+};
+
#define LOOPS_DEFAULT 1000000
-static int loops = LOOPS_DEFAULT;
+static int loops = LOOPS_DEFAULT;
+
+/* Use processes by default: */
+static bool threaded;
static const struct option options[] = {
- OPT_INTEGER('l', "loop", &loops,
- "Specify number of loops"),
+ OPT_INTEGER('l', "loop", &loops, "Specify number of loops"),
+ OPT_BOOLEAN('T', "threaded", &threaded, "Specify threads/process based task setup"),
OPT_END()
};
@@ -42,13 +52,37 @@ static const char * const bench_sched_pipe_usage[] = {
NULL
};
-int bench_sched_pipe(int argc, const char **argv,
- const char *prefix __maybe_unused)
+static void *worker_thread(void *__tdata)
{
- int pipe_1[2], pipe_2[2];
+ struct thread_data *td = __tdata;
int m = 0, i;
+ int ret;
+
+ for (i = 0; i < loops; i++) {
+ if (!td->nr) {
+ ret = read(td->pipe_read, &m, sizeof(int));
+ BUG_ON(ret != sizeof(int));
+ ret = write(td->pipe_write, &m, sizeof(int));
+ BUG_ON(ret != sizeof(int));
+ } else {
+ ret = write(td->pipe_write, &m, sizeof(int));
+ BUG_ON(ret != sizeof(int));
+ ret = read(td->pipe_read, &m, sizeof(int));
+ BUG_ON(ret != sizeof(int));
+ }
+ }
+
+ return NULL;
+}
+
+int bench_sched_pipe(int argc, const char **argv, const char *prefix __maybe_unused)
+{
+ struct thread_data threads[2], *td;
+ int pipe_1[2], pipe_2[2];
struct timeval start, stop, diff;
unsigned long long result_usec = 0;
+ int nr_threads = 2;
+ int t;
/*
* why does "ret" exist?
@@ -58,43 +92,66 @@ int bench_sched_pipe(int argc, const char **argv,
int __maybe_unused ret, wait_stat;
pid_t pid, retpid __maybe_unused;
- argc = parse_options(argc, argv, options,
- bench_sched_pipe_usage, 0);
+ argc = parse_options(argc, argv, options, bench_sched_pipe_usage, 0);
BUG_ON(pipe(pipe_1));
BUG_ON(pipe(pipe_2));
- pid = fork();
- assert(pid >= 0);
-
gettimeofday(&start, NULL);
- if (!pid) {
- for (i = 0; i < loops; i++) {
- ret = read(pipe_1[0], &m, sizeof(int));
- ret = write(pipe_2[1], &m, sizeof(int));
- }
- } else {
- for (i = 0; i < loops; i++) {
- ret = write(pipe_1[1], &m, sizeof(int));
- ret = read(pipe_2[0], &m, sizeof(int));
+ for (t = 0; t < nr_threads; t++) {
+ td = threads + t;
+
+ td->nr = t;
+
+ if (t == 0) {
+ td->pipe_read = pipe_1[0];
+ td->pipe_write = pipe_2[1];
+ } else {
+ td->pipe_write = pipe_1[1];
+ td->pipe_read = pipe_2[0];
}
}
- gettimeofday(&stop, NULL);
- timersub(&stop, &start, &diff);
- if (pid) {
+ if (threaded) {
+
+ for (t = 0; t < nr_threads; t++) {
+ td = threads + t;
+
+ ret = pthread_create(&td->pthread, NULL, worker_thread, td);
+ BUG_ON(ret);
+ }
+
+ for (t = 0; t < nr_threads; t++) {
+ td = threads + t;
+
+ ret = pthread_join(td->pthread, NULL);
+ BUG_ON(ret);
+ }
+
+ } else {
+ pid = fork();
+ assert(pid >= 0);
+
+ if (!pid) {
+ worker_thread(threads + 0);
+ exit(0);
+ } else {
+ worker_thread(threads + 1);
+ }
+
retpid = waitpid(pid, &wait_stat, 0);
assert((retpid == pid) && WIFEXITED(wait_stat));
- } else {
- exit(0);
}
+ gettimeofday(&stop, NULL);
+ timersub(&stop, &start, &diff);
+
switch (bench_format) {
case BENCH_FORMAT_DEFAULT:
- printf("# Executed %d pipe operations between two tasks\n\n",
- loops);
+ printf("# Executed %d pipe operations between two %s\n\n",
+ loops, threaded ? "threads" : "processes");
result_usec = diff.tv_sec * 1000000;
result_usec += diff.tv_usec;
diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index db491e9a812..1ec429fef2b 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -28,8 +28,10 @@
#include "util/hist.h"
#include "util/session.h"
#include "util/tool.h"
+#include "util/data.h"
#include "arch/common.h"
+#include <dlfcn.h>
#include <linux/bitmap.h>
struct perf_annotate {
@@ -44,7 +46,7 @@ struct perf_annotate {
};
static int perf_evsel__add_sample(struct perf_evsel *evsel,
- struct perf_sample *sample,
+ struct perf_sample *sample __maybe_unused,
struct addr_location *al,
struct perf_annotate *ann)
{
@@ -63,21 +65,13 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel,
return 0;
}
- he = __hists__add_entry(&evsel->hists, al, NULL, 1, 1);
+ he = __hists__add_entry(&evsel->hists, al, NULL, NULL, NULL, 1, 1, 0,
+ true);
if (he == NULL)
return -ENOMEM;
- ret = 0;
- if (he->ms.sym != NULL) {
- struct annotation *notes = symbol__annotation(he->ms.sym);
- if (notes->src == NULL && symbol__alloc_hist(he->ms.sym) < 0)
- return -ENOMEM;
-
- ret = hist_entry__inc_addr_samples(he, evsel->idx, al->addr);
- }
-
- evsel->hists.stats.total_period += sample->period;
- hists__inc_nr_events(&evsel->hists, PERF_RECORD_SAMPLE);
+ ret = hist_entry__inc_addr_samples(he, evsel->idx, al->addr);
+ hists__inc_nr_samples(&evsel->hists, true);
return ret;
}
@@ -90,8 +84,7 @@ static int process_sample_event(struct perf_tool *tool,
struct perf_annotate *ann = container_of(tool, struct perf_annotate, tool);
struct addr_location al;
- if (perf_event__preprocess_sample(event, machine, &al, sample,
- symbol__annotate_init) < 0) {
+ if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) {
pr_warning("problem processing %d event, skipping it.\n",
event->header.type);
return -1;
@@ -117,11 +110,11 @@ static int hist_entry__tty_annotate(struct hist_entry *he,
ann->print_line, ann->full_paths, 0, 0);
}
-static void hists__find_annotations(struct hists *self,
+static void hists__find_annotations(struct hists *hists,
struct perf_evsel *evsel,
struct perf_annotate *ann)
{
- struct rb_node *nd = rb_first(&self->entries), *next;
+ struct rb_node *nd = rb_first(&hists->entries), *next;
int key = K_RIGHT;
while (nd) {
@@ -143,8 +136,18 @@ find_next:
if (use_browser == 2) {
int ret;
+ int (*annotate)(struct hist_entry *he,
+ struct perf_evsel *evsel,
+ struct hist_browser_timer *hbt);
+
+ annotate = dlsym(perf_gtk_handle,
+ "hist_entry__gtk_annotate");
+ if (annotate == NULL) {
+ ui__error("GTK browser not found!\n");
+ return;
+ }
- ret = hist_entry__gtk_annotate(he, evsel, NULL);
+ ret = annotate(he, evsel, NULL);
if (!ret || !ann->skip_missing)
return;
@@ -177,8 +180,7 @@ find_next:
* symbol, free he->ms.sym->src to signal we already
* processed this symbol.
*/
- free(notes->src);
- notes->src = NULL;
+ zfree(&notes->src);
}
}
}
@@ -189,12 +191,18 @@ static int __cmd_annotate(struct perf_annotate *ann)
struct perf_session *session;
struct perf_evsel *pos;
u64 total_nr_samples;
+ struct perf_data_file file = {
+ .path = input_name,
+ .mode = PERF_DATA_MODE_READ,
+ .force = ann->force,
+ };
- session = perf_session__new(input_name, O_RDONLY,
- ann->force, false, &ann->tool);
+ session = perf_session__new(&file, false, &ann->tool);
if (session == NULL)
return -ENOMEM;
+ machines__set_symbol_filter(&session->machines, symbol__annotate_init);
+
if (ann->cpu_list) {
ret = perf_session__cpu_bitmap(session, ann->cpu_list,
ann->cpu_bitmap);
@@ -224,13 +232,13 @@ static int __cmd_annotate(struct perf_annotate *ann)
perf_session__fprintf_dsos(session, stdout);
total_nr_samples = 0;
- list_for_each_entry(pos, &session->evlist->entries, node) {
+ evlist__for_each(session->evlist, pos) {
struct hists *hists = &pos->hists;
u32 nr_samples = hists->stats.nr_events[PERF_RECORD_SAMPLE];
if (nr_samples > 0) {
total_nr_samples += nr_samples;
- hists__collapse_resort(hists);
+ hists__collapse_resort(hists, NULL);
hists__output_resort(hists);
if (symbol_conf.event_group &&
@@ -242,12 +250,21 @@ static int __cmd_annotate(struct perf_annotate *ann)
}
if (total_nr_samples == 0) {
- ui__error("The %s file has no samples!\n", session->filename);
+ ui__error("The %s file has no samples!\n", file.path);
goto out_delete;
}
- if (use_browser == 2)
- perf_gtk__show_annotations();
+ if (use_browser == 2) {
+ void (*show_annotations)(void);
+
+ show_annotations = dlsym(perf_gtk_handle,
+ "perf_gtk__show_annotations");
+ if (show_annotations == NULL) {
+ ui__error("GTK browser not found!\n");
+ goto out_delete;
+ }
+ show_annotations();
+ }
out_delete:
/*
@@ -276,6 +293,7 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused)
.tool = {
.sample = process_sample_event,
.mmap = perf_event__process_mmap,
+ .mmap2 = perf_event__process_mmap2,
.comm = perf_event__process_comm,
.exit = perf_event__process_exit,
.fork = perf_event__process_fork,
@@ -346,7 +364,7 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused)
if (argc) {
/*
- * Special case: if there's an argument left then assume tha
+ * Special case: if there's an argument left then assume that
* it's a symbol filter:
*/
if (argc > 1)
diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
index 77298bf892b..1e6e7771054 100644
--- a/tools/perf/builtin-bench.c
+++ b/tools/perf/builtin-bench.c
@@ -1,21 +1,19 @@
/*
- *
* builtin-bench.c
*
- * General benchmarking subsystem provided by perf
+ * General benchmarking collections provided by perf
*
* Copyright (C) 2009, Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
- *
*/
/*
+ * Available benchmark collection list:
*
- * Available subsystem list:
- * sched ... scheduler and IPC mechanism
+ * sched ... scheduler and IPC performance
* mem ... memory access performance
- *
+ * numa ... NUMA scheduling and MM performance
+ * futex ... Futex performance
*/
-
#include "perf.h"
#include "util/util.h"
#include "util/parse-options.h"
@@ -25,112 +23,101 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <sys/prctl.h>
-struct bench_suite {
- const char *name;
- const char *summary;
- int (*fn)(int, const char **, const char *);
+typedef int (*bench_fn_t)(int argc, const char **argv, const char *prefix);
+
+struct bench {
+ const char *name;
+ const char *summary;
+ bench_fn_t fn;
};
- \
-/* sentinel: easy for help */
-#define suite_all { "all", "Test all benchmark suites", NULL }
-
-#ifdef LIBNUMA_SUPPORT
-static struct bench_suite numa_suites[] = {
- { "mem",
- "Benchmark for NUMA workloads",
- bench_numa },
- suite_all,
- { NULL,
- NULL,
- NULL }
+
+#ifdef HAVE_LIBNUMA_SUPPORT
+static struct bench numa_benchmarks[] = {
+ { "mem", "Benchmark for NUMA workloads", bench_numa },
+ { "all", "Test all NUMA benchmarks", NULL },
+ { NULL, NULL, NULL }
};
#endif
-static struct bench_suite sched_suites[] = {
- { "messaging",
- "Benchmark for scheduler and IPC mechanisms",
- bench_sched_messaging },
- { "pipe",
- "Flood of communication over pipe() between two processes",
- bench_sched_pipe },
- suite_all,
- { NULL,
- NULL,
- NULL }
+static struct bench sched_benchmarks[] = {
+ { "messaging", "Benchmark for scheduling and IPC", bench_sched_messaging },
+ { "pipe", "Benchmark for pipe() between two processes", bench_sched_pipe },
+ { "all", "Test all scheduler benchmarks", NULL },
+ { NULL, NULL, NULL }
+};
+
+static struct bench mem_benchmarks[] = {
+ { "memcpy", "Benchmark for memcpy()", bench_mem_memcpy },
+ { "memset", "Benchmark for memset() tests", bench_mem_memset },
+ { "all", "Test all memory benchmarks", NULL },
+ { NULL, NULL, NULL }
};
-static struct bench_suite mem_suites[] = {
- { "memcpy",
- "Simple memory copy in various ways",
- bench_mem_memcpy },
- { "memset",
- "Simple memory set in various ways",
- bench_mem_memset },
- suite_all,
- { NULL,
- NULL,
- NULL }
+static struct bench futex_benchmarks[] = {
+ { "hash", "Benchmark for futex hash table", bench_futex_hash },
+ { "wake", "Benchmark for futex wake calls", bench_futex_wake },
+ { "requeue", "Benchmark for futex requeue calls", bench_futex_requeue },
+ { "all", "Test all futex benchmarks", NULL },
+ { NULL, NULL, NULL }
};
-struct bench_subsys {
- const char *name;
- const char *summary;
- struct bench_suite *suites;
+struct collection {
+ const char *name;
+ const char *summary;
+ struct bench *benchmarks;
};
-static struct bench_subsys subsystems[] = {
-#ifdef LIBNUMA_SUPPORT
- { "numa",
- "NUMA scheduling and MM behavior",
- numa_suites },
+static struct collection collections[] = {
+ { "sched", "Scheduler and IPC benchmarks", sched_benchmarks },
+ { "mem", "Memory access benchmarks", mem_benchmarks },
+#ifdef HAVE_LIBNUMA_SUPPORT
+ { "numa", "NUMA scheduling and MM benchmarks", numa_benchmarks },
#endif
- { "sched",
- "scheduler and IPC mechanism",
- sched_suites },
- { "mem",
- "memory access performance",
- mem_suites },
- { "all", /* sentinel: easy for help */
- "all benchmark subsystem",
- NULL },
- { NULL,
- NULL,
- NULL }
+ {"futex", "Futex stressing benchmarks", futex_benchmarks },
+ { "all", "All benchmarks", NULL },
+ { NULL, NULL, NULL }
};
-static void dump_suites(int subsys_index)
+/* Iterate over all benchmark collections: */
+#define for_each_collection(coll) \
+ for (coll = collections; coll->name; coll++)
+
+/* Iterate over all benchmarks within a collection: */
+#define for_each_bench(coll, bench) \
+ for (bench = coll->benchmarks; bench && bench->name; bench++)
+
+static void dump_benchmarks(struct collection *coll)
{
- int i;
+ struct bench *bench;
- printf("# List of available suites for %s...\n\n",
- subsystems[subsys_index].name);
+ printf("\n # List of available benchmarks for collection '%s':\n\n", coll->name);
- for (i = 0; subsystems[subsys_index].suites[i].name; i++)
- printf("%14s: %s\n",
- subsystems[subsys_index].suites[i].name,
- subsystems[subsys_index].suites[i].summary);
+ for_each_bench(coll, bench)
+ printf("%14s: %s\n", bench->name, bench->summary);
printf("\n");
- return;
}
static const char *bench_format_str;
+
+/* Output/formatting style, exported to benchmark modules: */
int bench_format = BENCH_FORMAT_DEFAULT;
static const struct option bench_options[] = {
- OPT_STRING('f', "format", &bench_format_str, "default",
- "Specify format style"),
+ OPT_STRING('f', "format", &bench_format_str, "default", "Specify format style"),
OPT_END()
};
static const char * const bench_usage[] = {
- "perf bench [<common options>] <subsystem> <suite> [<options>]",
+ "perf bench [<common options>] <collection> <benchmark> [<options>]",
NULL
};
static void print_usage(void)
{
+ struct collection *coll;
int i;
printf("Usage: \n");
@@ -138,11 +125,10 @@ static void print_usage(void)
printf("\t%s\n", bench_usage[i]);
printf("\n");
- printf("# List of available subsystems...\n\n");
+ printf(" # List of all available benchmark collections:\n\n");
- for (i = 0; subsystems[i].name; i++)
- printf("%14s: %s\n",
- subsystems[i].name, subsystems[i].summary);
+ for_each_collection(coll)
+ printf("%14s: %s\n", coll->name, coll->summary);
printf("\n");
}
@@ -159,44 +145,74 @@ static int bench_str2int(const char *str)
return BENCH_FORMAT_UNKNOWN;
}
-static void all_suite(struct bench_subsys *subsys) /* FROM HERE */
+/*
+ * Run a specific benchmark but first rename the running task's ->comm[]
+ * to something meaningful:
+ */
+static int run_bench(const char *coll_name, const char *bench_name, bench_fn_t fn,
+ int argc, const char **argv, const char *prefix)
+{
+ int size;
+ char *name;
+ int ret;
+
+ size = strlen(coll_name) + 1 + strlen(bench_name) + 1;
+
+ name = zalloc(size);
+ BUG_ON(!name);
+
+ scnprintf(name, size, "%s-%s", coll_name, bench_name);
+
+ prctl(PR_SET_NAME, name);
+ argv[0] = name;
+
+ ret = fn(argc, argv, prefix);
+
+ free(name);
+
+ return ret;
+}
+
+static void run_collection(struct collection *coll)
{
- int i;
+ struct bench *bench;
const char *argv[2];
- struct bench_suite *suites = subsys->suites;
argv[1] = NULL;
/*
* TODO:
- * preparing preset parameters for
+ *
+ * Preparing preset parameters for
* embedded, ordinary PC, HPC, etc...
- * will be helpful
+ * would be helpful.
*/
- for (i = 0; suites[i].fn; i++) {
- printf("# Running %s/%s benchmark...\n",
- subsys->name,
- suites[i].name);
+ for_each_bench(coll, bench) {
+ if (!bench->fn)
+ break;
+ printf("# Running %s/%s benchmark...\n", coll->name, bench->name);
fflush(stdout);
- argv[1] = suites[i].name;
- suites[i].fn(1, argv, NULL);
+ argv[1] = bench->name;
+ run_bench(coll->name, bench->name, bench->fn, 1, argv, NULL);
printf("\n");
}
}
-static void all_subsystem(void)
+static void run_all_collections(void)
{
- int i;
- for (i = 0; subsystems[i].suites; i++)
- all_suite(&subsystems[i]);
+ struct collection *coll;
+
+ for_each_collection(coll)
+ run_collection(coll);
}
int cmd_bench(int argc, const char **argv, const char *prefix __maybe_unused)
{
- int i, j, status = 0;
+ struct collection *coll;
+ int ret = 0;
if (argc < 2) {
- /* No subsystem specified. */
+ /* No collection specified. */
print_usage();
goto end;
}
@@ -206,7 +222,7 @@ int cmd_bench(int argc, const char **argv, const char *prefix __maybe_unused)
bench_format = bench_str2int(bench_format_str);
if (bench_format == BENCH_FORMAT_UNKNOWN) {
- printf("Unknown format descriptor:%s\n", bench_format_str);
+ printf("Unknown format descriptor: '%s'\n", bench_format_str);
goto end;
}
@@ -216,52 +232,51 @@ int cmd_bench(int argc, const char **argv, const char *prefix __maybe_unused)
}
if (!strcmp(argv[0], "all")) {
- all_subsystem();
+ run_all_collections();
goto end;
}
- for (i = 0; subsystems[i].name; i++) {
- if (strcmp(subsystems[i].name, argv[0]))
+ for_each_collection(coll) {
+ struct bench *bench;
+
+ if (strcmp(coll->name, argv[0]))
continue;
if (argc < 2) {
- /* No suite specified. */
- dump_suites(i);
+ /* No bench specified. */
+ dump_benchmarks(coll);
goto end;
}
if (!strcmp(argv[1], "all")) {
- all_suite(&subsystems[i]);
+ run_collection(coll);
goto end;
}
- for (j = 0; subsystems[i].suites[j].name; j++) {
- if (strcmp(subsystems[i].suites[j].name, argv[1]))
+ for_each_bench(coll, bench) {
+ if (strcmp(bench->name, argv[1]))
continue;
if (bench_format == BENCH_FORMAT_DEFAULT)
- printf("# Running %s/%s benchmark...\n",
- subsystems[i].name,
- subsystems[i].suites[j].name);
+ printf("# Running '%s/%s' benchmark:\n", coll->name, bench->name);
fflush(stdout);
- status = subsystems[i].suites[j].fn(argc - 1,
- argv + 1, prefix);
+ ret = run_bench(coll->name, bench->name, bench->fn, argc-1, argv+1, prefix);
goto end;
}
if (!strcmp(argv[1], "-h") || !strcmp(argv[1], "--help")) {
- dump_suites(i);
+ dump_benchmarks(coll);
goto end;
}
- printf("Unknown suite:%s for %s\n", argv[1], argv[0]);
- status = 1;
+ printf("Unknown benchmark: '%s' for collection '%s'\n", argv[1], argv[0]);
+ ret = 1;
goto end;
}
- printf("Unknown subsystem:%s\n", argv[0]);
- status = 1;
+ printf("Unknown collection: '%s'\n", argv[0]);
+ ret = 1;
end:
- return status;
+ return ret;
}
diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c
index c96c8fa3824..b22dbb16f87 100644
--- a/tools/perf/builtin-buildid-cache.c
+++ b/tools/perf/builtin-buildid-cache.c
@@ -6,6 +6,11 @@
* Copyright (C) 2010, Red Hat Inc.
* Copyright (C) 2010, Arnaldo Carvalho de Melo <acme@redhat.com>
*/
+#include <sys/types.h>
+#include <sys/time.h>
+#include <time.h>
+#include <dirent.h>
+#include <unistd.h>
#include "builtin.h"
#include "perf.h"
#include "util/cache.h"
@@ -17,6 +22,165 @@
#include "util/session.h"
#include "util/symbol.h"
+static int build_id_cache__kcore_buildid(const char *proc_dir, char *sbuildid)
+{
+ char root_dir[PATH_MAX];
+ char notes[PATH_MAX];
+ u8 build_id[BUILD_ID_SIZE];
+ char *p;
+
+ strlcpy(root_dir, proc_dir, sizeof(root_dir));
+
+ p = strrchr(root_dir, '/');
+ if (!p)
+ return -1;
+ *p = '\0';
+
+ scnprintf(notes, sizeof(notes), "%s/sys/kernel/notes", root_dir);
+
+ if (sysfs__read_build_id(notes, build_id, sizeof(build_id)))
+ return -1;
+
+ build_id__sprintf(build_id, sizeof(build_id), sbuildid);
+
+ return 0;
+}
+
+static int build_id_cache__kcore_dir(char *dir, size_t sz)
+{
+ struct timeval tv;
+ struct tm tm;
+ char dt[32];
+
+ if (gettimeofday(&tv, NULL) || !localtime_r(&tv.tv_sec, &tm))
+ return -1;
+
+ if (!strftime(dt, sizeof(dt), "%Y%m%d%H%M%S", &tm))
+ return -1;
+
+ scnprintf(dir, sz, "%s%02u", dt, (unsigned)tv.tv_usec / 10000);
+
+ return 0;
+}
+
+static bool same_kallsyms_reloc(const char *from_dir, char *to_dir)
+{
+ char from[PATH_MAX];
+ char to[PATH_MAX];
+ const char *name;
+ u64 addr1 = 0, addr2 = 0;
+ int i;
+
+ scnprintf(from, sizeof(from), "%s/kallsyms", from_dir);
+ scnprintf(to, sizeof(to), "%s/kallsyms", to_dir);
+
+ for (i = 0; (name = ref_reloc_sym_names[i]) != NULL; i++) {
+ addr1 = kallsyms__get_function_start(from, name);
+ if (addr1)
+ break;
+ }
+
+ if (name)
+ addr2 = kallsyms__get_function_start(to, name);
+
+ return addr1 == addr2;
+}
+
+static int build_id_cache__kcore_existing(const char *from_dir, char *to_dir,
+ size_t to_dir_sz)
+{
+ char from[PATH_MAX];
+ char to[PATH_MAX];
+ char to_subdir[PATH_MAX];
+ struct dirent *dent;
+ int ret = -1;
+ DIR *d;
+
+ d = opendir(to_dir);
+ if (!d)
+ return -1;
+
+ scnprintf(from, sizeof(from), "%s/modules", from_dir);
+
+ while (1) {
+ dent = readdir(d);
+ if (!dent)
+ break;
+ if (dent->d_type != DT_DIR)
+ continue;
+ scnprintf(to, sizeof(to), "%s/%s/modules", to_dir,
+ dent->d_name);
+ scnprintf(to_subdir, sizeof(to_subdir), "%s/%s",
+ to_dir, dent->d_name);
+ if (!compare_proc_modules(from, to) &&
+ same_kallsyms_reloc(from_dir, to_subdir)) {
+ strlcpy(to_dir, to_subdir, to_dir_sz);
+ ret = 0;
+ break;
+ }
+ }
+
+ closedir(d);
+
+ return ret;
+}
+
+static int build_id_cache__add_kcore(const char *filename, const char *debugdir)
+{
+ char dir[32], sbuildid[BUILD_ID_SIZE * 2 + 1];
+ char from_dir[PATH_MAX], to_dir[PATH_MAX];
+ char *p;
+
+ strlcpy(from_dir, filename, sizeof(from_dir));
+
+ p = strrchr(from_dir, '/');
+ if (!p || strcmp(p + 1, "kcore"))
+ return -1;
+ *p = '\0';
+
+ if (build_id_cache__kcore_buildid(from_dir, sbuildid))
+ return -1;
+
+ scnprintf(to_dir, sizeof(to_dir), "%s/[kernel.kcore]/%s",
+ debugdir, sbuildid);
+
+ if (!build_id_cache__kcore_existing(from_dir, to_dir, sizeof(to_dir))) {
+ pr_debug("same kcore found in %s\n", to_dir);
+ return 0;
+ }
+
+ if (build_id_cache__kcore_dir(dir, sizeof(dir)))
+ return -1;
+
+ scnprintf(to_dir, sizeof(to_dir), "%s/[kernel.kcore]/%s/%s",
+ debugdir, sbuildid, dir);
+
+ if (mkdir_p(to_dir, 0755))
+ return -1;
+
+ if (kcore_copy(from_dir, to_dir)) {
+ /* Remove YYYYmmddHHMMSShh directory */
+ if (!rmdir(to_dir)) {
+ p = strrchr(to_dir, '/');
+ if (p)
+ *p = '\0';
+ /* Try to remove buildid directory */
+ if (!rmdir(to_dir)) {
+ p = strrchr(to_dir, '/');
+ if (p)
+ *p = '\0';
+ /* Try to remove [kernel.kcore] directory */
+ rmdir(to_dir);
+ }
+ }
+ return -1;
+ }
+
+ pr_debug("kcore added to build-id cache directory %s\n", to_dir);
+
+ return 0;
+}
+
static int build_id_cache__add_file(const char *filename, const char *debugdir)
{
char sbuild_id[BUILD_ID_SIZE * 2 + 1];
@@ -82,8 +246,12 @@ static bool dso__missing_buildid_cache(struct dso *dso, int parm __maybe_unused)
static int build_id_cache__fprintf_missing(const char *filename, bool force, FILE *fp)
{
- struct perf_session *session = perf_session__new(filename, O_RDONLY,
- force, false, NULL);
+ struct perf_data_file file = {
+ .path = filename,
+ .mode = PERF_DATA_MODE_READ,
+ .force = force,
+ };
+ struct perf_session *session = perf_session__new(&file, false, NULL);
if (session == NULL)
return -1;
@@ -130,11 +298,14 @@ int cmd_buildid_cache(int argc, const char **argv,
char const *add_name_list_str = NULL,
*remove_name_list_str = NULL,
*missing_filename = NULL,
- *update_name_list_str = NULL;
+ *update_name_list_str = NULL,
+ *kcore_filename;
const struct option buildid_cache_options[] = {
OPT_STRING('a', "add", &add_name_list_str,
"file list", "file(s) to add"),
+ OPT_STRING('k', "kcore", &kcore_filename,
+ "file", "kcore file to add"),
OPT_STRING('r', "remove", &remove_name_list_str, "file list",
"file(s) to remove"),
OPT_STRING('M', "missing", &missing_filename, "file",
@@ -217,5 +388,9 @@ int cmd_buildid_cache(int argc, const char **argv,
}
}
+ if (kcore_filename &&
+ build_id_cache__add_kcore(kcore_filename, debugdir))
+ pr_warning("Couldn't add %s\n", kcore_filename);
+
return ret;
}
diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c