From 5d2cd90922c778908bd0cd669e572a5b5eafd737 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 14 Apr 2011 11:20:14 -0300 Subject: perf evsel: Fix use of inherit perf stat doesn't mmap and its perfectly fine for it to use task-bound counters with inheritance. So set the attr.inherit on the caller and leave the syscall itself to validate it. When the mmap fails perf_evlist__mmap will just emit a warning if this is the failure reason. Reported-by: Peter Zijlstra Acked-by: Peter Zijlstra Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi Link: http://lkml.kernel.org/r/20110414170121.GC3229@ghostprotocols.net Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/python.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'tools/perf/util/python.c') diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index a9f2d7e1204..f5e38451fdc 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -498,11 +498,11 @@ static PyObject *pyrf_evsel__open(struct pyrf_evsel *pevsel, struct cpu_map *cpus = NULL; struct thread_map *threads = NULL; PyObject *pcpus = NULL, *pthreads = NULL; - int group = 0, overwrite = 0; - static char *kwlist[] = {"cpus", "threads", "group", "overwrite", NULL, NULL}; + int group = 0, inherit = 0; + static char *kwlist[] = {"cpus", "threads", "group", "inherit", NULL, NULL}; if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|OOii", kwlist, - &pcpus, &pthreads, &group, &overwrite)) + &pcpus, &pthreads, &group, &inherit)) return NULL; if (pthreads != NULL) @@ -511,7 +511,8 @@ static PyObject *pyrf_evsel__open(struct pyrf_evsel *pevsel, if (pcpus != NULL) cpus = ((struct pyrf_cpu_map *)pcpus)->cpus; - if (perf_evsel__open(evsel, cpus, threads, group, overwrite) < 0) { + evsel->attr.inherit = inherit; + if (perf_evsel__open(evsel, cpus, threads, group) < 0) { PyErr_SetFromErrno(PyExc_OSError); return NULL; } -- cgit v1.2.3-18-g5258 From 94403f8863d0d1d2005291b2ef0719c2534aa303 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 24 Apr 2011 08:18:31 +0200 Subject: perf events: Add stalled cycles generic event - PERF_COUNT_HW_STALLED_CYCLES The new PERF_COUNT_HW_STALLED_CYCLES event tries to approximate cycles the CPU does nothing useful, because it is stalled on a cache-miss or some other condition. Acked-by: Peter Zijlstra Acked-by: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Link: http://lkml.kernel.org/n/tip-fue11vymwqsoo5to72jxxjyl@git.kernel.org Signed-off-by: Ingo Molnar --- tools/perf/util/python.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools/perf/util/python.c') diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index f5e38451fdc..406f613ee61 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -798,6 +798,7 @@ static struct { { "COUNT_HW_BRANCH_INSTRUCTIONS", PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, { "COUNT_HW_BRANCH_MISSES", PERF_COUNT_HW_BRANCH_MISSES }, { "COUNT_HW_BUS_CYCLES", PERF_COUNT_HW_BUS_CYCLES }, + { "COUNT_HW_STALLED_CYCLES", PERF_COUNT_HW_STALLED_CYCLES }, { "COUNT_HW_CACHE_L1D", PERF_COUNT_HW_CACHE_L1D }, { "COUNT_HW_CACHE_L1I", PERF_COUNT_HW_CACHE_L1I }, { "COUNT_HW_CACHE_LL", PERF_COUNT_HW_CACHE_LL }, -- cgit v1.2.3-18-g5258 From 129c04cb8ce2e4bf3f17223f58ef16aa8a2cb3b8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 29 Apr 2011 14:41:28 +0200 Subject: perf tools: Add front-end and back-end stalled cycles support Update perf tooling to deal with front-end and back-end stalled cycles events. Add both the default 'perf stat' output. Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Link: http://lkml.kernel.org/n/tip-7y40wib8n002io7hjpn1dsrm@git.kernel.org Signed-off-by: Ingo Molnar --- tools/perf/util/python.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'tools/perf/util/python.c') diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 406f613ee61..8b0eff8b828 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -798,7 +798,6 @@ static struct { { "COUNT_HW_BRANCH_INSTRUCTIONS", PERF_COUNT_HW_BRANCH_INSTRUCTIONS }, { "COUNT_HW_BRANCH_MISSES", PERF_COUNT_HW_BRANCH_MISSES }, { "COUNT_HW_BUS_CYCLES", PERF_COUNT_HW_BUS_CYCLES }, - { "COUNT_HW_STALLED_CYCLES", PERF_COUNT_HW_STALLED_CYCLES }, { "COUNT_HW_CACHE_L1D", PERF_COUNT_HW_CACHE_L1D }, { "COUNT_HW_CACHE_L1I", PERF_COUNT_HW_CACHE_L1I }, { "COUNT_HW_CACHE_LL", PERF_COUNT_HW_CACHE_LL }, @@ -811,6 +810,9 @@ static struct { { "COUNT_HW_CACHE_RESULT_ACCESS", PERF_COUNT_HW_CACHE_RESULT_ACCESS }, { "COUNT_HW_CACHE_RESULT_MISS", PERF_COUNT_HW_CACHE_RESULT_MISS }, + { "COUNT_HW_STALLED_CYCLES_FRONTEND", PERF_COUNT_HW_STALLED_CYCLES_FRONTEND }, + { "COUNT_HW_STALLED_CYCLES_BACKEND", PERF_COUNT_HW_STALLED_CYCLES_BACKEND }, + { "COUNT_SW_CPU_CLOCK", PERF_COUNT_SW_CPU_CLOCK }, { "COUNT_SW_TASK_CLOCK", PERF_COUNT_SW_TASK_CLOCK }, { "COUNT_SW_PAGE_FAULTS", PERF_COUNT_SW_PAGE_FAULTS }, -- cgit v1.2.3-18-g5258 From aece948f5ddd70d70df2f35855c706ef9a4f62e2 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Sun, 15 May 2011 09:39:00 -0300 Subject: perf evlist: Fix per thread mmap setup The PERF_EVENT_IOC_SET_OUTPUT ioctl was returning -EINVAL when using --pid when monitoring multithreaded apps, as we can only share a ring buffer for events on the same thread if not doing per cpu. Fix it by using per thread ring buffers. Tested with: [root@felicio ~]# tuna -t 26131 -CP | nl 1 thread ctxt_switches 2 pid SCHED_ rtpri affinity voluntary nonvoluntary cmd 3 26131 OTHER 0 0,1 10814276 2397830 chromium-browse 4 642 OTHER 0 0,1 14688 0 chromium-browse 5 26148 OTHER 0 0,1 713602 115479 chromium-browse 6 26149 OTHER 0 0,1 801958 2262 chromium-browse 7 26150 OTHER 0 0,1 1271128 248 chromium-browse 8 26151 OTHER 0 0,1 3 0 chromium-browse 9 27049 OTHER 0 0,1 36796 9 chromium-browse 10 618 OTHER 0 0,1 14711 0 chromium-browse 11 661 OTHER 0 0,1 14593 0 chromium-browse 12 29048 OTHER 0 0,1 28125 0 chromium-browse 13 26143 OTHER 0 0,1 2202789 781 chromium-browse [root@felicio ~]# So 11 threads under pid 26131, then: [root@felicio ~]# perf record -F 50000 --pid 26131 [root@felicio ~]# grep perf_event /proc/`pidof perf`/maps | nl 1 7fa4a2538000-7fa4a25b9000 rwxs 00000000 00:09 4064 anon_inode:[perf_event] 2 7fa4a25b9000-7fa4a263a000 rwxs 00000000 00:09 4064 anon_inode:[perf_event] 3 7fa4a263a000-7fa4a26bb000 rwxs 00000000 00:09 4064 anon_inode:[perf_event] 4 7fa4a26bb000-7fa4a273c000 rwxs 00000000 00:09 4064 anon_inode:[perf_event] 5 7fa4a273c000-7fa4a27bd000 rwxs 00000000 00:09 4064 anon_inode:[perf_event] 6 7fa4a27bd000-7fa4a283e000 rwxs 00000000 00:09 4064 anon_inode:[perf_event] 7 7fa4a283e000-7fa4a28bf000 rwxs 00000000 00:09 4064 anon_inode:[perf_event] 8 7fa4a28bf000-7fa4a2940000 rwxs 00000000 00:09 4064 anon_inode:[perf_event] 9 7fa4a2940000-7fa4a29c1000 rwxs 00000000 00:09 4064 anon_inode:[perf_event] 10 7fa4a29c1000-7fa4a2a42000 rwxs 00000000 00:09 4064 anon_inode:[perf_event] 11 7fa4a2a42000-7fa4a2ac3000 rwxs 00000000 00:09 4064 anon_inode:[perf_event] [root@felicio ~]# 11 mmaps, one per thread since we didn't specify any CPU list, so we need one mmap per thread and: [root@felicio ~]# perf record -F 50000 --pid 26131 ^M ^C[ perf record: Woken up 79 times to write data ] [ perf record: Captured and wrote 20.614 MB perf.data (~900639 samples) ] [root@felicio ~]# perf report -D | grep PERF_RECORD_SAMPLE | cut -d/ -f2 | cut -d: -f1 | sort -n | uniq -c | sort -nr | nl 1 371310 26131 2 96516 26148 3 95694 26149 4 95203 26150 5 7291 26143 6 87 27049 7 76 661 8 60 29048 9 47 618 10 43 642 [root@felicio ~]# Ok, one of the threads, 26151 was quiescent, so no samples there, but all the others are there. Then, if I specify one CPU: [root@felicio ~]# perf record -F 50000 --pid 26131 --cpu 1 ^C[ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.680 MB perf.data (~29730 samples) ] [root@felicio ~]# perf report -D | grep PERF_RECORD_SAMPLE | cut -d/ -f2 | cut -d: -f1 | sort -n | uniq -c | sort -nr | nl 1 8444 26131 2 2584 26149 3 2518 26148 4 2324 26150 5 123 26143 6 9 661 7 9 29048 [root@felicio ~]# This machine has two cores, so fewer threads appeared on the radar, and: [root@felicio ~]# grep perf_event /proc/`pidof perf`/maps | nl 1 7f484b922000-7f484b9a3000 rwxs 00000000 00:09 4064 anon_inode:[perf_event] [root@felicio ~]# Just one mmap, as now we can use just one per-cpu buffer instead of the per-thread needed in the previous case. For global profiling: [root@felicio ~]# perf record -F 50000 -a ^C[ perf record: Woken up 26 times to write data ] [ perf record: Captured and wrote 7.128 MB perf.data (~311412 samples) ] [root@felicio ~]# grep perf_event /proc/`pidof perf`/maps | nl 1 7fb49b435000-7fb49b4b6000 rwxs 00000000 00:09 4064 anon_inode:[perf_event] 2 7fb49b4b6000-7fb49b537000 rwxs 00000000 00:09 4064 anon_inode:[perf_event] [root@felicio ~]# It uses per-cpu buffers. For just one thread: [root@felicio ~]# perf record -F 50000 --tid 26148 ^C[ perf record: Woken up 2 times to write data ] [ perf record: Captured and wrote 0.330 MB perf.data (~14426 samples) ] [root@felicio ~]# perf report -D | grep PERF_RECORD_SAMPLE | cut -d/ -f2 | cut -d: -f1 | sort -n | uniq -c | sort -nr | nl 1 9969 26148 [root@felicio ~]# [root@felicio ~]# grep perf_event /proc/`pidof perf`/maps | nl 1 7f286a51b000-7f286a59c000 rwxs 00000000 00:09 4064 anon_inode:[perf_event] [root@felicio ~]# Tested-by: David Ahern Tested-by: Lin Ming Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi Link: http://lkml.kernel.org/r/20110426204401.GB1746@ghostprotocols.net Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/python.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/perf/util/python.c') diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index f5e38451fdc..99c722672f8 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -680,7 +680,7 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist, &cpu, &sample_id_all)) return NULL; - event = perf_evlist__read_on_cpu(evlist, cpu); + event = perf_evlist__mmap_read(evlist, cpu); if (event != NULL) { struct perf_evsel *first; PyObject *pyevent = pyrf_event__new(event); -- cgit v1.2.3-18-g5258 From a285412479b6d5af3e48273a92ec2f1987df8cd1 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 21 May 2011 19:33:04 +0200 Subject: perf tools: Pre-check sample size before parsing Check that the total size of the sample fields having a fixed size do not exceed the one of the whole event. This robustifies the sample parsing. Signed-off-by: Frederic Weisbecker Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Stephane Eranian --- tools/perf/util/python.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'tools/perf/util/python.c') diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 8b0eff8b828..4174c099032 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -690,8 +690,9 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist, return PyErr_NoMemory(); first = list_entry(evlist->entries.next, struct perf_evsel, node); - perf_event__parse_sample(event, first->attr.sample_type, sample_id_all, - &pevent->sample); + perf_event__parse_sample(event, first->attr.sample_type, + perf_sample_size(first->attr.sample_type), + sample_id_all, &pevent->sample); return pyevent; } -- cgit v1.2.3-18-g5258 From 5538becaec9ca2ff21e7826372941dc46f498487 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 22 May 2011 02:17:22 +0200 Subject: perf tools: Propagate event parse error handling Better handle event parsing error by propagating the details in upper layers or by dumping some failure message. So that the user knows he has some crazy events in the batch. Signed-off-by: Frederic Weisbecker Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Stephane Eranian --- tools/perf/util/python.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'tools/perf/util/python.c') diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 4174c099032..3344d6e6f04 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -675,6 +675,7 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist, union perf_event *event; int sample_id_all = 1, cpu; static char *kwlist[] = {"sample_id_all", NULL, NULL}; + int err; if (!PyArg_ParseTupleAndKeywords(args, kwargs, "i|i", kwlist, &cpu, &sample_id_all)) @@ -690,12 +691,17 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist, return PyErr_NoMemory(); first = list_entry(evlist->entries.next, struct perf_evsel, node); - perf_event__parse_sample(event, first->attr.sample_type, - perf_sample_size(first->attr.sample_type), - sample_id_all, &pevent->sample); + err = perf_event__parse_sample(event, first->attr.sample_type, + perf_sample_size(first->attr.sample_type), + sample_id_all, &pevent->sample); + if (err) { + pr_err("Can't parse sample, err = %d\n", err); + goto end; + } + return pyevent; } - +end: Py_INCREF(Py_None); return Py_None; } -- cgit v1.2.3-18-g5258 From 9c850d6c4b95bb07fb066eb7f43dd4e3b4842b85 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 2 Jun 2011 10:55:10 -0300 Subject: perf python: Use exception to propagate errors We were using pr_debug to tell the user about not being able to parse a sample where we should really use the python way of reporting errors: exceptions. Fixes this problem: [root@emilia ~]# python >>> import perf Traceback (most recent call last): File "", line 1, in ImportError: /home/acme/git/build/perf/python/perf.so: undefined symbol: eprintf >>> [root@emilia ~] As we want to keep the objects linked in the python binding (and in the future in a shared library) minimal. Cc: Frederic Weisbecker Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-m9dba9kaluas0kq8r58z191c@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/python.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'tools/perf/util/python.c') diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 69436b3200a..2dd1698e093 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -694,14 +694,12 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist, err = perf_event__parse_sample(event, first->attr.sample_type, perf_sample_size(first->attr.sample_type), sample_id_all, &pevent->sample); - if (err) { - pr_err("Can't parse sample, err = %d\n", err); - goto end; - } - + if (err) + return PyErr_Format(PyExc_OSError, + "perf: can't parse sample, err=%d", err); return pyevent; } -end: + Py_INCREF(Py_None); return Py_None; } -- cgit v1.2.3-18-g5258 From 56722381b8506733852c44dacf6d7bc5f90aedaf Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 2 Jun 2011 11:04:54 -0300 Subject: perf evlist: Don't die if sample_{id_all|type} is invalid Fixes two more cases where the python binding would not load: . Not finding die(), which it shouldn't anyway, not good to just stop the world because some particular perf.data file is invalid, just propagate the error to the caller. . Not finding perf_sample_size: fix it by moving it from event.c to evsel, where it belongs, as most cases are moving to operate on an evsel object.o One of the fixed problems: [root@emilia ~]# python >>> import perf Traceback (most recent call last): File "", line 1, in ImportError: /home/acme/git/build/perf/python/perf.so: undefined symbol: perf_sample_size >>> [root@emilia ~]# Cc: Frederic Weisbecker Cc: Mike Galbraith Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Link: http://lkml.kernel.org/n/tip-1hkj7b2cvgbfnoizsekjb6c9@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/python.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/perf/util/python.c') diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 2dd1698e093..24063b4d41e 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -692,7 +692,7 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist, first = list_entry(evlist->entries.next, struct perf_evsel, node); err = perf_event__parse_sample(event, first->attr.sample_type, - perf_sample_size(first->attr.sample_type), + perf_evsel__sample_size(first), sample_id_all, &pevent->sample); if (err) return PyErr_Format(PyExc_OSError, -- cgit v1.2.3-18-g5258 From b273fa9716aa1564bee88ceee62f9042981cdc81 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 31 Mar 2011 18:27:42 +0200 Subject: perf python: Fix argument name list of read_on_cpu() Mandatory arguments need to be present in the argument name list, as well as optional arguments, otherwise python barfs: # ./python/twatch.py Traceback (most recent call last): File "./python/twatch.py", line 41, in main() File "./python/twatch.py", line 32, in main event = evlist.read_on_cpu(cpu) RuntimeError: more argument specifiers than keyword list entries Hence, add cpu to the name list. Cc: David Ahern Cc: Ingo Molnar Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Tom Zanussi Link: http://lkml.kernel.org/r/1301588863-20210-1-git-send-email-fweisbec@gmail.com Signed-off-by: Frederic Weisbecker Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/python.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/perf/util/python.c') diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 24063b4d41e..a9ac0504aab 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -674,7 +674,7 @@ static PyObject *pyrf_evlist__read_on_cpu(struct pyrf_evlist *pevlist, struct perf_evlist *evlist = &pevlist->evlist; union perf_event *event; int sample_id_all = 1, cpu; - static char *kwlist[] = {"sample_id_all", NULL, NULL}; + static char *kwlist[] = {"cpu", "sample_id_all", NULL, NULL}; int err; if (!PyArg_ParseTupleAndKeywords(args, kwargs, "i|i", kwlist, -- cgit v1.2.3-18-g5258