diff options
Diffstat (limited to 'tools/perf')
62 files changed, 5649 insertions, 2234 deletions
diff --git a/tools/perf/CREDITS b/tools/perf/CREDITS new file mode 100644 index 00000000000..c2ddcb3acbd --- /dev/null +++ b/tools/perf/CREDITS @@ -0,0 +1,30 @@ +Most of the infrastructure that 'perf' uses here has been reused +from the Git project, as of version: + + 66996ec: Sync with 1.6.2.4 + +Here is an (incomplete!) list of main contributors to those files +in util/* and elsewhere: + + Alex Riesen + Christian Couder + Dmitry Potapov + Jeff King + Johannes Schindelin + Johannes Sixt + Junio C Hamano + Linus Torvalds + Matthias Kestenholz + Michal Ostrowski + Miklos Vajna + Petr Baudis + Pierre Habouzit + René Scharfe + Samuel Tardieu + Shawn O. Pearce + Steffen Prohaska + Steve Haslam + +Thanks guys! + +The full history of the files can be found in the upstream Git commits. diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile index 5457192e1b4..bdd3b7ecad0 100644 --- a/tools/perf/Documentation/Makefile +++ b/tools/perf/Documentation/Makefile @@ -35,7 +35,7 @@ man7dir=$(mandir)/man7 # DESTDIR= ASCIIDOC=asciidoc -ASCIIDOC_EXTRA = +ASCIIDOC_EXTRA = --unsafe MANPAGE_XSL = manpage-normal.xsl XMLTO_EXTRA = INSTALL?=install diff --git a/tools/perf/Documentation/examples.txt b/tools/perf/Documentation/examples.txt new file mode 100644 index 00000000000..8eb6c489fb1 --- /dev/null +++ b/tools/perf/Documentation/examples.txt @@ -0,0 +1,225 @@ + + ------------------------------ + ****** perf by examples ****** + ------------------------------ + +[ From an e-mail by Ingo Molnar, http://lkml.org/lkml/2009/8/4/346 ] + + +First, discovery/enumeration of available counters can be done via +'perf list': + +titan:~> perf list + [...] + kmem:kmalloc [Tracepoint event] + kmem:kmem_cache_alloc [Tracepoint event] + kmem:kmalloc_node [Tracepoint event] + kmem:kmem_cache_alloc_node [Tracepoint event] + kmem:kfree [Tracepoint event] + kmem:kmem_cache_free [Tracepoint event] + kmem:mm_page_free_direct [Tracepoint event] + kmem:mm_pagevec_free [Tracepoint event] + kmem:mm_page_alloc [Tracepoint event] + kmem:mm_page_alloc_zone_locked [Tracepoint event] + kmem:mm_page_pcpu_drain [Tracepoint event] + kmem:mm_page_alloc_extfrag [Tracepoint event] + +Then any (or all) of the above event sources can be activated and +measured. For example the page alloc/free properties of a 'hackbench +run' are: + + titan:~> perf stat -e kmem:mm_page_pcpu_drain -e kmem:mm_page_alloc + -e kmem:mm_pagevec_free -e kmem:mm_page_free_direct ./hackbench 10 + Time: 0.575 + + Performance counter stats for './hackbench 10': + + 13857 kmem:mm_page_pcpu_drain + 27576 kmem:mm_page_alloc + 6025 kmem:mm_pagevec_free + 20934 kmem:mm_page_free_direct + + 0.613972165 seconds time elapsed + +You can observe the statistical properties as well, by using the +'repeat the workload N times' feature of perf stat: + + titan:~> perf stat --repeat 5 -e kmem:mm_page_pcpu_drain -e + kmem:mm_page_alloc -e kmem:mm_pagevec_free -e + kmem:mm_page_free_direct ./hackbench 10 + Time: 0.627 + Time: 0.644 + Time: 0.564 + Time: 0.559 + Time: 0.626 + + Performance counter stats for './hackbench 10' (5 runs): + + 12920 kmem:mm_page_pcpu_drain ( +- 3.359% ) + 25035 kmem:mm_page_alloc ( +- 3.783% ) + 6104 kmem:mm_pagevec_free ( +- 0.934% ) + 18376 kmem:mm_page_free_direct ( +- 4.941% ) + + 0.643954516 seconds time elapsed ( +- 2.363% ) + +Furthermore, these tracepoints can be used to sample the workload as +well. For example the page allocations done by a 'git gc' can be +captured the following way: + + titan:~/git> perf record -f -e kmem:mm_page_alloc -c 1 ./git gc + Counting objects: 1148, done. + Delta compression using up to 2 threads. + Compressing objects: 100% (450/450), done. + Writing objects: 100% (1148/1148), done. + Total 1148 (delta 690), reused 1148 (delta 690) + [ perf record: Captured and wrote 0.267 MB perf.data (~11679 samples) ] + +To check which functions generated page allocations: + + titan:~/git> perf report + # Samples: 10646 + # + # Overhead Command Shared Object + # ........ ............... .......................... + # + 23.57% git-repack /lib64/libc-2.5.so + 21.81% git /lib64/libc-2.5.so + 14.59% git ./git + 11.79% git-repack ./git + 7.12% git /lib64/ld-2.5.so + 3.16% git-repack /lib64/libpthread-2.5.so + 2.09% git-repack /bin/bash + 1.97% rm /lib64/libc-2.5.so + 1.39% mv /lib64/ld-2.5.so + 1.37% mv /lib64/libc-2.5.so + 1.12% git-repack /lib64/ld-2.5.so + 0.95% rm /lib64/ld-2.5.so + 0.90% git-update-serv /lib64/libc-2.5.so + 0.73% git-update-serv /lib64/ld-2.5.so + 0.68% perf /lib64/libpthread-2.5.so + 0.64% git-repack /usr/lib64/libz.so.1.2.3 + +Or to see it on a more finegrained level: + +titan:~/git> perf report --sort comm,dso,symbol +# Samples: 10646 +# +# Overhead Command Shared Object Symbol +# ........ ............... .......................... ...... +# + 9.35% git-repack ./git [.] insert_obj_hash + 9.12% git ./git [.] insert_obj_hash + 7.31% git /lib64/libc-2.5.so [.] memcpy + 6.34% git-repack /lib64/libc-2.5.so [.] _int_malloc + 6.24% git-repack /lib64/libc-2.5.so [.] memcpy + 5.82% git-repack /lib64/libc-2.5.so [.] __GI___fork + 5.47% git /lib64/libc-2.5.so [.] _int_malloc + 2.99% git /lib64/libc-2.5.so [.] memset + +Furthermore, call-graph sampling can be done too, of page +allocations - to see precisely what kind of page allocations there +are: + + titan:~/git> perf record -f -g -e kmem:mm_page_alloc -c 1 ./git gc + Counting objects: 1148, done. + Delta compression using up to 2 threads. + Compressing objects: 100% (450/450), done. + Writing objects: 100% (1148/1148), done. + Total 1148 (delta 690), reused 1148 (delta 690) + [ perf record: Captured and wrote 0.963 MB perf.data (~42069 samples) ] + + titan:~/git> perf report -g + # Samples: 10686 + # + # Overhead Command Shared Object + # ........ ............... .......................... + # + 23.25% git-repack /lib64/libc-2.5.so + | + |--50.00%-- _int_free + | + |--37.50%-- __GI___fork + | make_child + | + |--12.50%-- ptmalloc_unlock_all2 + | make_child + | + --6.25%-- __GI_strcpy + 21.61% git /lib64/libc-2.5.so + | + |--30.00%-- __GI_read + | | + | --83.33%-- git_config_from_file + | git_config + | | + [...] + +Or you can observe the whole system's page allocations for 10 +seconds: + +titan:~/git> perf stat -a -e kmem:mm_page_pcpu_drain -e +kmem:mm_page_alloc -e kmem:mm_pagevec_free -e +kmem:mm_page_free_direct sleep 10 + + Performance counter stats for 'sleep 10': + + 171585 kmem:mm_page_pcpu_drain + 322114 kmem:mm_page_alloc + 73623 kmem:mm_pagevec_free + 254115 kmem:mm_page_free_direct + + 10.000591410 seconds time elapsed + +Or observe how fluctuating the page allocations are, via statistical +analysis done over ten 1-second intervals: + + titan:~/git> perf stat --repeat 10 -a -e kmem:mm_page_pcpu_drain -e + kmem:mm_page_alloc -e kmem:mm_pagevec_free -e + kmem:mm_page_free_direct sleep 1 + + Performance counter stats for 'sleep 1' (10 runs): + + 17254 kmem:mm_page_pcpu_drain ( +- 3.709% ) + 34394 kmem:mm_page_alloc ( +- 4.617% ) + 7509 kmem:mm_pagevec_free ( +- 4.820% ) + 25653 kmem:mm_page_free_direct ( +- 3.672% ) + + 1.058135029 seconds time elapsed ( +- 3.089% ) + +Or you can annotate the recorded 'git gc' run on a per symbol basis +and check which instructions/source-code generated page allocations: + + titan:~/git> perf annotate __GI___fork + ------------------------------------------------ + Percent | Source code & Disassembly of libc-2.5.so + ------------------------------------------------ + : + : + : Disassembly of section .plt: + : Disassembly of section .text: + : + : 00000031a2e95560 <__fork>: + [...] + 0.00 : 31a2e95602: b8 38 00 00 00 mov $0x38,%eax + 0.00 : 31a2e95607: 0f 05 syscall + 83.42 : 31a2e95609: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax + 0.00 : 31a2e9560f: 0f 87 4d 01 00 00 ja 31a2e95762 <__fork+0x202> + 0.00 : 31a2e95615: 85 c0 test %eax,%eax + +( this shows that 83.42% of __GI___fork's page allocations come from + the 0x38 system call it performs. ) + +etc. etc. - a lot more is possible. I could list a dozen of +other different usecases straight away - neither of which is +possible via /proc/vmstat. + +/proc/vmstat is not in the same league really, in terms of +expressive power of system analysis and performance +analysis. + +All that the above results needed were those new tracepoints +in include/tracing/events/kmem.h. + + Ingo + + diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 1dbc1eeb4c0..6be696b0a2b 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -29,13 +29,67 @@ OPTIONS Select the PMU event. Selection can be a symbolic event name (use 'perf list' to list all events) or a raw PMU event (eventsel+umask) in the form of rNNN where NNN is a - hexadecimal event descriptor. + hexadecimal event descriptor. -a:: - system-wide collection + System-wide collection. -l:: - scale counter values + Scale counter values. + +-p:: +--pid=:: + Record events on existing pid. + +-r:: +--realtime=:: + Collect data with this RT SCHED_FIFO priority. +-A:: +--append:: + Append to the output file to do incremental profiling. + +-f:: +--force:: + Overwrite existing data file. + +-c:: +--count=:: + Event period to sample. + +-o:: +--output=:: + Output file name. + +-i:: +--inherit:: + Child tasks inherit counters. +-F:: +--freq=:: + Profile at this frequency. + +-m:: +--mmap-pages=:: + Number of mmap data pages. + +-g:: +--call-graph:: + Do call-graph (stack chain/backtrace) recording. + +-v:: +--verbose:: + Be more verbose (show counter open errors, etc). + +-s:: +--stat:: + Per thread counts. + +-d:: +--data:: + Sample addresses. + +-n:: +--no-samples:: + Don't sample. SEE ALSO -------- diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 52d3fc6846a..e72e9311078 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -13,13 +13,40 @@ SYNOPSIS DESCRIPTION ----------- This command displays the performance counter profile information recorded -via perf report. +via perf record. OPTIONS ------- -i:: --input=:: Input file name. (default: perf.data) +-d:: +--dsos=:: + Only consider symbols in these dsos. CSV that understands + file://filename entries. +-n +--show-nr-samples + Show the number of samples for each symbol +-C:: +--comms=:: + Only consider symbols in these comms. CSV that understands + file://filename entries. +-S:: +--symbols=:: + Only consider these symbols. CSV that understands + file://filename entries. + +-w:: +--field-width=:: + Force each column width to the provided list, for large terminal + readability. + +-t:: +--field-separator=:: + + Use a special separator character and don't pad with spaces, replacing + all occurances of this separator in symbol names (and other output) + with a '.' character, that thus it's the only non valid separator. SEE ALSO -------- diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index c368a72721d..484080dd5b6 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -8,8 +8,8 @@ perf-stat - Run a command and gather performance counter statistics SYNOPSIS -------- [verse] -'perf stat' [-e <EVENT> | --event=EVENT] [-l] [-a] <command> -'perf stat' [-e <EVENT> | --event=EVENT] [-l] [-a] -- <command> [<options>] +'perf stat' [-e <EVENT> | --event=EVENT] [-S] [-a] <command> +'perf stat' [-e <EVENT> | --event=EVENT] [-S] [-a] -- <command> [<options>] DESCRIPTION ----------- @@ -40,7 +40,7 @@ OPTIONS -a:: system-wide collection --l:: +-c:: scale counter values EXAMPLES diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index 539d0128972..4a7d558dc30 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -3,36 +3,122 @@ perf-top(1) NAME ---- -perf-top - Run a command and profile it +perf-top - System profiling tool. SYNOPSIS -------- [verse] -'perf top' [-e <EVENT> | --event=EVENT] [-l] [-a] <command> +'perf top' [-e <EVENT> | --event=EVENT] [<options>] DESCRIPTION ----------- -This command runs a command and gathers a performance counter profile -from it. +This command generates and displays a performance counter profile in realtime. OPTIONS ------- -<command>...:: - Any command you can specify in a shell. +-a:: +--all-cpus:: + System-wide collection. (default) + +-c <count>:: +--count=<count>:: + Event period to sample. + +-C <cpu>:: +--CPU=<cpu>:: + CPU to profile. + +-d <seconds>:: +--delay=<seconds>:: + Number of seconds to delay between refreshes. --e:: ---event=:: +-e <event>:: +--event=<event>:: Select the PMU event. Selection can be a symbolic event name (use 'perf list' to list all events) or a raw PMU event (eventsel+umask) in the form of rNNN where NNN is a - hexadecimal event descriptor. + hexadecimal event descriptor. --a:: - system-wide collection +-E <entries>:: +--entries=<entries>:: + Display this many functions. + +-f <count>:: +--count-filter=<count>:: + Only display functions with more events than this. + +-F <freq>:: +--freq=<freq>:: + Profile at this frequency. + +-i:: +--inherit:: + Child tasks inherit counters, only makes sens with -p option. + +-k <path>:: +--vmlinux=<path>:: + Path to vmlinux. Required for annotation functionality. + +-m <pages>:: +--mmap-pages=<pages>:: + Number of mmapped data pages. + +-p <pid>:: +--pid=<pid>:: + Profile events on existing pid. + +-r <priority>:: +--realtime=<priority>:: + Collect data with this RT SCHED_FIFO priority. + +-s <symbol>:: +--sym-annotate=<symbol>:: + Annotate this symbol. Requires -k option. + +-v:: +--verbose:: + Be more verbose (show counter open errors, etc). + +-z:: +--zero:: + Zero history across display updates. + +INTERACTIVE PROMPTING KEYS +-------------------------- + +[d]:: + Display refresh delay. + +[e]:: + Number of entries to display. + +[E]:: + Event to display when multiple counters are active. + +[f]:: + Profile display filter (>= hit count). + +[F]:: + Annotation display filter (>= % of total). + +[s]:: + Annotate symbol. + +[S]:: + Stop annotation, return to full profile display. + +[w]:: + Toggle between weighted sum and individual count[E]r profile. + +[z]:: + Toggle event count zeroing across display updates. + +[qQ]:: + Quit. + +Pressing any unmapped key displays a menu, and prompts for input. --l:: - scale counter values SEE ALSO -------- diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 0cbd5d6874e..c045b4271e5 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -157,10 +157,17 @@ uname_R := $(shell sh -c 'uname -r 2>/dev/null || echo not') uname_P := $(shell sh -c 'uname -p 2>/dev/null || echo not') uname_V := $(shell sh -c 'uname -v 2>/dev/null || echo not') +# If we're on a 64-bit kernel, use -m64 +ifndef NO_64BIT + ifneq ($(patsubst %64,%,$(uname_M)),$(uname_M)) + M64 := -m64 + endif +endif + # CFLAGS and LDFLAGS are for the users to override from the command line. -CFLAGS = -ggdb3 -Wall -Werror -Wstrict-prototypes -Wmissing-declarations -Wmissing-prototypes -std=gnu99 -Wdeclaration-after-statement -O6 -LDFLAGS = -lpthread -lrt -lelf +CFLAGS = $(M64) -ggdb3 -Wall -Wextra -Wstrict-prototypes -Wmissing-declarations -Wmissing-prototypes -std=gnu99 -Wdeclaration-after-statement -Werror -O6 +LDFLAGS = -lpthread -lrt -lelf -lm ALL_CFLAGS = $(CFLAGS) ALL_LDFLAGS = $(LDFLAGS) STRIP ?= strip @@ -218,7 +225,7 @@ SPARSE_FLAGS = -D__BIG_ENDIAN__ -D__powerpc__ # Those must not be GNU-specific; they are shared with perl/ which may # be built by a different compiler. (Note that this is an artifact now # but it still might be nice to keep that distinction.) -BASIC_CFLAGS = +BASIC_CFLAGS = -Iutil/include BASIC_LDFLAGS = # Guard against environment variables @@ -284,9 +291,11 @@ export PERL_PATH LIB_FILE=libperf.a LIB_H += ../../include/linux/perf_counter.h +LIB_H += ../../include/linux/rbtree.h +LIB_H += ../../include/linux/list.h +LIB_H += util/include/linux/list.h LIB_H += perf.h -LIB_H += util/list.h -LIB_H += util/rbtree.h +LIB_H += util/types.h LIB_H += util/levenshtein.h LIB_H += util/parse-options.h LIB_H += util/parse-events.h @@ -295,9 +304,11 @@ LIB_H += util/util.h LIB_H += util/help.h LIB_H += util/strbuf.h LIB_H += util/string.h +LIB_H += util/strlist.h LIB_H += util/run-command.h LIB_H += util/sigchain.h LIB_H += util/symbol.h +LIB_H += util/module.h LIB_H += util/color.h LIB_OBJS += util/abspath.o @@ -316,12 +327,16 @@ LIB_OBJS += util/run-command.o LIB_OBJS += util/quote.o LIB_OBJS += util/strbuf.o LIB_OBJS += util/string.o +LIB_OBJS += util/strlist.o LIB_OBJS += util/usage.o LIB_OBJS += util/wrapper.o LIB_OBJS += util/sigchain.o LIB_OBJS += util/symbol.o +LIB_OBJS += util/module.o LIB_OBJS += util/color.o LIB_OBJS += util/pager.o +LIB_OBJS += util/header.o +LIB_OBJS += util/callchain.o BUILTIN_OBJS += builtin-annotate.o BUILTIN_OBJS += builtin-help.o @@ -332,7 +347,6 @@ BUILTIN_OBJS += builtin-stat.o BUILTIN_OBJS += builtin-top.o PERFLIBS = $(LIB_FILE) -EXTLI |