diff options
Diffstat (limited to 'tools/perf/scripts')
11 files changed, 324 insertions, 157 deletions
diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/Context.xs b/tools/perf/scripts/perl/Perf-Trace-Util/Context.xs index c1e2ed1ed34..8c7ea42444d 100644 --- a/tools/perf/scripts/perl/Perf-Trace-Util/Context.xs +++ b/tools/perf/scripts/perl/Perf-Trace-Util/Context.xs @@ -23,7 +23,7 @@ #include "perl.h" #include "XSUB.h" #include "../../../perf.h" -#include "../../../util/script-event.h" +#include "../../../util/trace-event.h" MODULE = Perf::Trace::Context PACKAGE = Perf::Trace::Context PROTOTYPES: ENABLE diff --git a/tools/perf/scripts/perl/bin/workqueue-stats-record b/tools/perf/scripts/perl/bin/workqueue-stats-record deleted file mode 100644 index 8edda9078d5..00000000000 --- a/tools/perf/scripts/perl/bin/workqueue-stats-record +++ /dev/null @@ -1,2 +0,0 @@ -#!/bin/bash -perf record -e workqueue:workqueue_creation -e workqueue:workqueue_destruction -e workqueue:workqueue_execution -e workqueue:workqueue_insertion $@ diff --git a/tools/perf/scripts/perl/bin/workqueue-stats-report b/tools/perf/scripts/perl/bin/workqueue-stats-report deleted file mode 100644 index 6d91411d248..00000000000 --- a/tools/perf/scripts/perl/bin/workqueue-stats-report +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/bash -# description: workqueue stats (ins/exe/create/destroy) -perf script $@ -s "$PERF_EXEC_PATH"/scripts/perl/workqueue-stats.pl diff --git a/tools/perf/scripts/perl/rwtop.pl b/tools/perf/scripts/perl/rwtop.pl index 4bb3ecd3347..8b20787021c 100644 --- a/tools/perf/scripts/perl/rwtop.pl +++ b/tools/perf/scripts/perl/rwtop.pl @@ -17,6 +17,7 @@ use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib"; use lib "./Perf-Trace-Util/lib"; use Perf::Trace::Core; use Perf::Trace::Util; +use POSIX qw/SIGALRM SA_RESTART/; my $default_interval = 3; my $nlines = 20; @@ -90,7 +91,10 @@ sub syscalls::sys_enter_write sub trace_begin { - $SIG{ALRM} = \&set_print_pending; + my $sa = POSIX::SigAction->new(\&set_print_pending); + $sa->flags(SA_RESTART); + $sa->safe(1); + POSIX::sigaction(SIGALRM, $sa) or die "Can't set SIGALRM handler: $!\n"; alarm 1; } diff --git a/tools/perf/scripts/perl/workqueue-stats.pl b/tools/perf/scripts/perl/workqueue-stats.pl deleted file mode 100644 index a8eaff5119e..00000000000 --- a/tools/perf/scripts/perl/workqueue-stats.pl +++ /dev/null @@ -1,129 +0,0 @@ -#!/usr/bin/perl -w -# (c) 2009, Tom Zanussi <tzanussi@gmail.com> -# Licensed under the terms of the GNU GPL License version 2 - -# Displays workqueue stats -# -# Usage: -# -# perf record -c 1 -f -a -R -e workqueue:workqueue_creation -e -# workqueue:workqueue_destruction -e workqueue:workqueue_execution -# -e workqueue:workqueue_insertion -# -# perf script -p -s tools/perf/scripts/perl/workqueue-stats.pl - -use 5.010000; -use strict; -use warnings; - -use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib"; -use lib "./Perf-Trace-Util/lib"; -use Perf::Trace::Core; -use Perf::Trace::Util; - -my @cpus; - -sub workqueue::workqueue_destruction -{ - my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, - $common_pid, $common_comm, - $thread_comm, $thread_pid) = @_; - - $cpus[$common_cpu]{$thread_pid}{destroyed}++; - $cpus[$common_cpu]{$thread_pid}{comm} = $thread_comm; -} - -sub workqueue::workqueue_creation -{ - my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, - $common_pid, $common_comm, - $thread_comm, $thread_pid, $cpu) = @_; - - $cpus[$common_cpu]{$thread_pid}{created}++; - $cpus[$common_cpu]{$thread_pid}{comm} = $thread_comm; -} - -sub workqueue::workqueue_execution -{ - my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, - $common_pid, $common_comm, - $thread_comm, $thread_pid, $func) = @_; - - $cpus[$common_cpu]{$thread_pid}{executed}++; - $cpus[$common_cpu]{$thread_pid}{comm} = $thread_comm; -} - -sub workqueue::workqueue_insertion -{ - my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, - $common_pid, $common_comm, - $thread_comm, $thread_pid, $func) = @_; - - $cpus[$common_cpu]{$thread_pid}{inserted}++; - $cpus[$common_cpu]{$thread_pid}{comm} = $thread_comm; -} - -sub trace_end -{ - print "workqueue work stats:\n\n"; - my $cpu = 0; - printf("%3s %6s %6s\t%-20s\n", "cpu", "ins", "exec", "name"); - printf("%3s %6s %6s\t%-20s\n", "---", "---", "----", "----"); - foreach my $pidhash (@cpus) { - while ((my $pid, my $wqhash) = each %$pidhash) { - my $ins = $$wqhash{'inserted'} || 0; - my $exe = $$wqhash{'executed'} || 0; - my $comm = $$wqhash{'comm'} || ""; - if ($ins || $exe) { - printf("%3u %6u %6u\t%-20s\n", $cpu, $ins, $exe, $comm); - } - } - $cpu++; - } - - $cpu = 0; - print "\nworkqueue lifecycle stats:\n\n"; - printf("%3s %6s %6s\t%-20s\n", "cpu", "created", "destroyed", "name"); - printf("%3s %6s %6s\t%-20s\n", "---", "-------", "---------", "----"); - foreach my $pidhash (@cpus) { - while ((my $pid, my $wqhash) = each %$pidhash) { - my $created = $$wqhash{'created'} || 0; - my $destroyed = $$wqhash{'destroyed'} || 0; - my $comm = $$wqhash{'comm'} || ""; - if ($created || $destroyed) { - printf("%3u %6u %6u\t%-20s\n", $cpu, $created, $destroyed, - $comm); - } - } - $cpu++; - } - - print_unhandled(); -} - -my %unhandled; - -sub print_unhandled -{ - if ((scalar keys %unhandled) == 0) { - return; - } - - print "\nunhandled events:\n\n"; - - printf("%-40s %10s\n", "event", "count"); - printf("%-40s %10s\n", "----------------------------------------", - "-----------"); - - foreach my $event_name (keys %unhandled) { - printf("%-40s %10d\n", $event_name, $unhandled{$event_name}); - } -} - -sub trace_unhandled -{ - my ($event_name, $context, $common_cpu, $common_secs, $common_nsecs, - $common_pid, $common_comm) = @_; - - $unhandled{$event_name}++; -} diff --git a/tools/perf/scripts/python/Perf-Trace-Util/Context.c b/tools/perf/scripts/python/Perf-Trace-Util/Context.c index 315067b8f55..fcd1dd66790 100644 --- a/tools/perf/scripts/python/Perf-Trace-Util/Context.c +++ b/tools/perf/scripts/python/Perf-Trace-Util/Context.c @@ -25,7 +25,7 @@ PyMODINIT_FUNC initperf_trace_context(void); -static PyObject *perf_trace_context_common_pc(PyObject *self, PyObject *args) +static PyObject *perf_trace_context_common_pc(PyObject *obj, PyObject *args) { static struct scripting_context *scripting_context; PyObject *context; @@ -40,7 +40,7 @@ static PyObject *perf_trace_context_common_pc(PyObject *self, PyObject *args) return Py_BuildValue("i", retval); } -static PyObject *perf_trace_context_common_flags(PyObject *self, +static PyObject *perf_trace_context_common_flags(PyObject *obj, PyObject *args) { static struct scripting_context *scripting_context; @@ -56,7 +56,7 @@ static PyObject *perf_trace_context_common_flags(PyObject *self, return Py_BuildValue("i", retval); } -static PyObject *perf_trace_context_common_lock_depth(PyObject *self, +static PyObject *perf_trace_context_common_lock_depth(PyObject *obj, PyObject *args) { static struct scripting_context *scripting_context; diff --git a/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/EventClass.py b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/EventClass.py new file mode 100755 index 00000000000..9e0985794e2 --- /dev/null +++ b/tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/EventClass.py @@ -0,0 +1,94 @@ +# EventClass.py +# +# This is a library defining some events types classes, which could +# be used by other scripts to analyzing the perf samples. +# +# Currently there are just a few classes defined for examples, +# PerfEvent is the base class for all perf event sample, PebsEvent +# is a HW base Intel x86 PEBS event, and user could add more SW/HW +# event classes based on requirements. + +import struct + +# Event types, user could add more here +EVTYPE_GENERIC = 0 +EVTYPE_PEBS = 1 # Basic PEBS event +EVTYPE_PEBS_LL = 2 # PEBS event with load latency info +EVTYPE_IBS = 3 + +# +# Currently we don't have good way to tell the event type, but by +# the size of raw buffer, raw PEBS event with load latency data's +# size is 176 bytes, while the pure PEBS event's size is 144 bytes. +# +def create_event(name, comm, dso, symbol, raw_buf): + if (len(raw_buf) == 144): + event = PebsEvent(name, comm, dso, symbol, raw_buf) + elif (len(raw_buf) == 176): + event = PebsNHM(name, comm, dso, symbol, raw_buf) + else: + event = PerfEvent(name, comm, dso, symbol, raw_buf) + + return event + +class PerfEvent(object): + event_num = 0 + def __init__(self, name, comm, dso, symbol, raw_buf, ev_type=EVTYPE_GENERIC): + self.name = name + self.comm = comm + self.dso = dso + self.symbol = symbol + self.raw_buf = raw_buf + self.ev_type = ev_type + PerfEvent.event_num += 1 + + def show(self): + print "PMU event: name=%12s, symbol=%24s, comm=%8s, dso=%12s" % (self.name, self.symbol, self.comm, self.dso) + +# +# Basic Intel PEBS (Precise Event-based Sampling) event, whose raw buffer +# contains the context info when that event happened: the EFLAGS and +# linear IP info, as well as all the registers. +# +class PebsEvent(PerfEvent): + pebs_num = 0 + def __init__(self, name, comm, dso, symbol, raw_buf, ev_type=EVTYPE_PEBS): + tmp_buf=raw_buf[0:80] + flags, ip, ax, bx, cx, dx, si, di, bp, sp = struct.unpack('QQQQQQQQQQ', tmp_buf) + self.flags = flags + self.ip = ip + self.ax = ax + self.bx = bx + self.cx = cx + self.dx = dx + self.si = si + self.di = di + self.bp = bp + self.sp = sp + + PerfEvent.__init__(self, name, comm, dso, symbol, raw_buf, ev_type) + PebsEvent.pebs_num += 1 + del tmp_buf + +# +# Intel Nehalem and Westmere support PEBS plus Load Latency info which lie +# in the four 64 bit words write after the PEBS data: +# Status: records the IA32_PERF_GLOBAL_STATUS register value +# DLA: Data Linear Address (EIP) +# DSE: Data Source Encoding, where the latency happens, hit or miss +# in L1/L2/L3 or IO operations +# LAT: the actual latency in cycles +# +class PebsNHM(PebsEvent): + pebs_nhm_num = 0 + def __init__(self, name, comm, dso, symbol, raw_buf, ev_type=EVTYPE_PEBS_LL): + tmp_buf=raw_buf[144:176] + status, dla, dse, lat = struct.unpack('QQQQ', tmp_buf) + self.status = status + self.dla = dla + self.dse = dse + self.lat = lat + + PebsEvent.__init__(self, name, comm, dso, symbol, raw_buf, ev_type) + PebsNHM.pebs_nhm_num += 1 + del tmp_buf diff --git a/tools/perf/scripts/python/bin/event_analyzing_sample-record b/tools/perf/scripts/python/bin/event_analyzing_sample-record new file mode 100644 index 00000000000..5ce652dabd0 --- /dev/null +++ b/tools/perf/scripts/python/bin/event_analyzing_sample-record @@ -0,0 +1,8 @@ +#!/bin/bash + +# +# event_analyzing_sample.py can cover all type of perf samples including +# the tracepoints, so no special record requirements, just record what +# you want to analyze. +# +perf record $@ diff --git a/tools/perf/scripts/python/bin/event_analyzing_sample-report b/tools/perf/scripts/python/bin/event_analyzing_sample-report new file mode 100644 index 00000000000..0941fc94e15 --- /dev/null +++ b/tools/perf/scripts/python/bin/event_analyzing_sample-report @@ -0,0 +1,3 @@ +#!/bin/bash +# description: analyze all perf samples +perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/event_analyzing_sample.py diff --git a/tools/perf/scripts/python/event_analyzing_sample.py b/tools/perf/scripts/python/event_analyzing_sample.py new file mode 100644 index 00000000000..163c39fa12d --- /dev/null +++ b/tools/perf/scripts/python/event_analyzing_sample.py @@ -0,0 +1,189 @@ +# event_analyzing_sample.py: general event handler in python +# +# Current perf report is already very powerful with the annotation integrated, +# and this script is not trying to be as powerful as perf report, but +# providing end user/developer a flexible way to analyze the events other +# than trace points. +# +# The 2 database related functions in this script just show how to gather +# the basic information, and users can modify and write their own functions +# according to their specific requirement. +# +# The first function "show_general_events" just does a basic grouping for all +# generic events with the help of sqlite, and the 2nd one "show_pebs_ll" is +# for a x86 HW PMU event: PEBS with load latency data. +# + +import os +import sys +import math +import struct +import sqlite3 + +sys.path.append(os.environ['PERF_EXEC_PATH'] + \ + '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') + +from perf_trace_context import * +from EventClass import * + +# +# If the perf.data has a big number of samples, then the insert operation +# will be very time consuming (about 10+ minutes for 10000 samples) if the +# .db database is on disk. Move the .db file to RAM based FS to speedup +# the handling, which will cut the time down to several seconds. +# +con = sqlite3.connect("/dev/shm/perf.db") +con.isolation_level = None + +def trace_begin(): + print "In trace_begin:\n" + + # + # Will create several tables at the start, pebs_ll is for PEBS data with + # load latency info, while gen_events is for general event. + # + con.execute(""" + create table if not exists gen_events ( + name text, + symbol text, + comm text, + dso text + );""") + con.execute(""" + create table if not exists pebs_ll ( + name text, + symbol text, + comm text, + dso text, + flags integer, + ip integer, + status integer, + dse integer, + dla integer, + lat integer + );""") + +# +# Create and insert event object to a database so that user could +# do more analysis with simple database commands. +# +def process_event(param_dict): + event_attr = param_dict["attr"] + sample = param_dict["sample"] + raw_buf = param_dict["raw_buf"] + comm = param_dict["comm"] + name = param_dict["ev_name"] + + # Symbol and dso info are not always resolved + if (param_dict.has_key("dso")): + dso = param_dict["dso"] + else: + dso = "Unknown_dso" + + if (param_dict.has_key("symbol")): + symbol = param_dict["symbol"] + else: + symbol = "Unknown_symbol" + + # Create the event object and insert it to the right table in database + event = create_event(name, comm, dso, symbol, raw_buf) + insert_db(event) + +def insert_db(event): + if event.ev_type == EVTYPE_GENERIC: + con.execute("insert into gen_events values(?, ?, ?, ?)", + (event.name, event.symbol, event.comm, event.dso)) + elif event.ev_type == EVTYPE_PEBS_LL: + event.ip &= 0x7fffffffffffffff + event.dla &= 0x7fffffffffffffff + con.execute("insert into pebs_ll values (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", + (event.name, event.symbol, event.comm, event.dso, event.flags, + event.ip, event.status, event.dse, event.dla, event.lat)) + +def trace_end(): + print "In trace_end:\n" + # We show the basic info for the 2 type of event classes + show_general_events() + show_pebs_ll() + con.close() + +# +# As the event number may be very big, so we can't use linear way +# to show the histogram in real number, but use a log2 algorithm. +# + +def num2sym(num): + # Each number will have at least one '#' + snum = '#' * (int)(math.log(num, 2) + 1) + return snum + +def show_general_events(): + + # Check the total record number in the table + count = con.execute("select count(*) from gen_events") + for t in count: + print "There is %d records in gen_events table" % t[0] + if t[0] == 0: + return + + print "Statistics about the general events grouped by thread/symbol/dso: \n" + + # Group by thread + commq = con.execute("select comm, count(comm) from gen_events group by comm order by -count(comm)") + print "\n%16s %8s %16s\n%s" % ("comm", "number", "histogram", "="*42) + for row in commq: + print "%16s %8d %s" % (row[0], row[1], num2sym(row[1])) + + # Group by symbol + print "\n%32s %8s %16s\n%s" % ("symbol", "number", "histogram", "="*58) + symbolq = con.execute("select symbol, count(symbol) from gen_events group by symbol order by -count(symbol)") + for row in symbolq: + print "%32s %8d %s" % (row[0], row[1], num2sym(row[1])) + + # Group by dso + print "\n%40s %8s %16s\n%s" % ("dso", "number", "histogram", "="*74) + dsoq = con.execute("select dso, count(dso) from gen_events group by dso order by -count(dso)") + for row in dsoq: + print "%40s %8d %s" % (row[0], row[1], num2sym(row[1])) + +# +# This function just shows the basic info, and we could do more with the +# data in the tables, like checking the function parameters when some +# big latency events happen. +# +def show_pebs_ll(): + + count = con.execute("select count(*) from pebs_ll") + for t in count: + print "There is %d records in pebs_ll table" % t[0] + if t[0] == 0: + return + + print "Statistics about the PEBS Load Latency events grouped by thread/symbol/dse/latency: \n" + + # Group by thread + commq = con.execute("select comm, count(comm) from pebs_ll group by comm order by -count(comm)") + print "\n%16s %8s %16s\n%s" % ("comm", "number", "histogram", "="*42) + for row in commq: + print "%16s %8d %s" % (row[0], row[1], num2sym(row[1])) + + # Group by symbol + print "\n%32s %8s %16s\n%s" % ("symbol", "number", "histogram", "="*58) + symbolq = con.execute("select symbol, count(symbol) from pebs_ll group by symbol order by -count(symbol)") + for row in symbolq: + print "%32s %8d %s" % (row[0], row[1], num2sym(row[1])) + + # Group by dse + dseq = con.execute("select dse, count(dse) from pebs_ll group by dse order by -count(dse)") + print "\n%32s %8s %16s\n%s" % ("dse", "number", "histogram", "="*58) + for row in dseq: + print "%32s %8d %s" % (row[0], row[1], num2sym(row[1])) + + # Group by latency + latq = con.execute("select lat, count(lat) from pebs_ll group by lat order by lat") + print "\n%32s %8s %16s\n%s" % ("latency", "number", "histogram", "="*58) + for row in latq: + print "%32s %8d %s" % (row[0], row[1], num2sym(row[1])) + +def trace_unhandled(event_name, context, event_fields_dict): + print ' '.join(['%s=%s'%(k,str(v))for k,v in sorted(event_fields_dict.items())]) diff --git a/tools/perf/scripts/python/net_dropmonitor.py b/tools/perf/scripts/python/net_dropmonitor.py index a4ffc950002..b5740599aab 100755 --- a/tools/perf/scripts/python/net_dropmonitor.py +++ b/tools/perf/scripts/python/net_dropmonitor.py @@ -15,35 +15,38 @@ kallsyms = [] def get_kallsyms_table(): global kallsyms + try: f = open("/proc/kallsyms", "r") - linecount = 0 - for line in f: - linecount = linecount+1 - f.seek(0) except: return - - j = 0 for line in f: loc = int(line.split()[0], 16) name = line.split()[2] - j = j +1 - if ((j % 100) == 0): - print "\r" + str(j) + "/" + str(linecount), - kallsyms.append({ 'loc': loc, 'name' : name}) - - print "\r" + str(j) + "/" + str(linecount) + kallsyms.append((loc, name)) kallsyms.sort() - return def get_sym(sloc): loc = int(sloc) - for i in kallsyms: - if (i['loc'] >= loc): - return (i['name'], i['loc']-loc) - return (None, 0) + + # Invariant: kallsyms[i][0] <= loc for all 0 <= i <= start + # kallsyms[i][0] > loc for all end <= i < len(kallsyms) + start, end = -1, len(kallsyms) + while end != start + 1: + pivot = (start + end) // 2 + if loc < kallsyms[pivot][0]: + end = pivot + else: + start = pivot + + # Now (start == -1 or kallsyms[start][0] <= loc) + # and (start == len(kallsyms) - 1 or loc < kallsyms[start + 1][0]) + if start >= 0: + symloc, name = kallsyms[start] + return (name, loc - symloc) + else: + return (None, 0) def print_drop_table(): print "%25s %25s %25s" % ("LOCATION", "OFFSET", "COUNT") @@ -64,7 +67,7 @@ def trace_end(): # called from perf, when it finds a correspoinding event def skb__kfree_skb(name, context, cpu, sec, nsec, pid, comm, - skbaddr, protocol, location): + skbaddr, location, protocol): slocation = str(location) try: drop_log[slocation] = drop_log[slocation] + 1 |
