diff options
Diffstat (limited to 'tools')
31 files changed, 4294 insertions, 228 deletions
diff --git a/tools/perf/Documentation/perf-sched.txt b/tools/perf/Documentation/perf-sched.txt new file mode 100644 index 00000000000..1ce79198997 --- /dev/null +++ b/tools/perf/Documentation/perf-sched.txt @@ -0,0 +1,41 @@ +perf-sched(1) +============== + +NAME +---- +perf-sched - Tool to trace/measure scheduler properties (latencies) + +SYNOPSIS +-------- +[verse] +'perf sched' {record|latency|replay|trace} + +DESCRIPTION +----------- +There's four variants of perf sched: + + 'perf sched record <command>' to record the scheduling events + of an arbitrary workload. + + 'perf sched latency' to report the per task scheduling latencies + and other scheduling properties of the workload. + + 'perf sched trace' to see a detailed trace of the workload that + was recorded. + + 'perf sched replay' to simulate the workload that was recorded + via perf sched record. (this is done by starting up mockup threads + that mimic the workload based on the events in the trace. These + threads can then replay the timings (CPU runtime and sleep patterns) + of the workload as it occured when it was recorded - and can repeat + it a number of times, measuring its performance.) + +OPTIONS +------- +-D:: +--dump-raw-trace=:: + Display verbose dump of the sched data. + +SEE ALSO +-------- +linkperf:perf-record[1] diff --git a/tools/perf/Documentation/perf-timechart.txt b/tools/perf/Documentation/perf-timechart.txt new file mode 100644 index 00000000000..1c2ed3090cc --- /dev/null +++ b/tools/perf/Documentation/perf-timechart.txt @@ -0,0 +1,38 @@ +perf-timechart(1) +================= + +NAME +---- +perf-timechart - Tool to visualize total system behavior during a workload + +SYNOPSIS +-------- +[verse] +'perf timechart' {record} + +DESCRIPTION +----------- +There are two variants of perf timechart: + + 'perf timechart record <command>' to record the system level events + of an arbitrary workload. + + 'perf timechart' to turn a trace into a Scalable Vector Graphics file, + that can be viewed with popular SVG viewers such as 'Inkscape'. + +OPTIONS +------- +-o:: +--output=:: + Select the output file (default: output.svg) +-i:: +--input=:: + Select the input file (default: perf.data) +-w:: +--width=:: + Select the width of the SVG file (default: 1000) + + +SEE ALSO +-------- +linkperf:perf-record[1] diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt new file mode 100644 index 00000000000..41ed75398ca --- /dev/null +++ b/tools/perf/Documentation/perf-trace.txt @@ -0,0 +1,25 @@ +perf-trace(1) +============== + +NAME +---- +perf-trace - Read perf.data (created by perf record) and display trace output + +SYNOPSIS +-------- +[verse] +'perf trace' [-i <file> | --input=file] symbol_name + +DESCRIPTION +----------- +This command reads the input file and displays the trace recorded. + +OPTIONS +------- +-D:: +--dump-raw-trace=:: + Display verbose dump of the trace data. + +SEE ALSO +-------- +linkperf:perf-record[1] diff --git a/tools/perf/Makefile b/tools/perf/Makefile index 9f8d207a91b..b5f1953b614 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -318,7 +318,7 @@ export PERL_PATH LIB_FILE=libperf.a -LIB_H += ../../include/linux/perf_counter.h +LIB_H += ../../include/linux/perf_event.h LIB_H += ../../include/linux/rbtree.h LIB_H += ../../include/linux/list.h LIB_H += util/include/linux/list.h @@ -373,13 +373,16 @@ LIB_OBJS += util/thread.o LIB_OBJS += util/trace-event-parse.o LIB_OBJS += util/trace-event-read.o LIB_OBJS += util/trace-event-info.o +LIB_OBJS += util/svghelper.o BUILTIN_OBJS += builtin-annotate.o BUILTIN_OBJS += builtin-help.o +BUILTIN_OBJS += builtin-sched.o BUILTIN_OBJS += builtin-list.o BUILTIN_OBJS += builtin-record.o BUILTIN_OBJS += builtin-report.o BUILTIN_OBJS += builtin-stat.o +BUILTIN_OBJS += builtin-timechart.o BUILTIN_OBJS += builtin-top.o BUILTIN_OBJS += builtin-trace.o @@ -710,6 +713,12 @@ builtin-help.o: builtin-help.c common-cmds.h PERF-CFLAGS '-DPERF_MAN_PATH="$(mandir_SQ)"' \ '-DPERF_INFO_PATH="$(infodir_SQ)"' $< +builtin-timechart.o: builtin-timechart.c common-cmds.h PERF-CFLAGS + $(QUIET_CC)$(CC) -o $*.o -c $(ALL_CFLAGS) \ + '-DPERF_HTML_PATH="$(htmldir_SQ)"' \ + '-DPERF_MAN_PATH="$(mandir_SQ)"' \ + '-DPERF_INFO_PATH="$(infodir_SQ)"' $< + $(BUILT_INS): perf$X $(QUIET_BUILT_IN)$(RM) $@ && \ ln perf$X $@ 2>/dev/null || \ diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 043d85b7e25..1ec74161581 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -505,7 +505,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) return -1; } - if (event->header.misc & PERF_EVENT_MISC_KERNEL) { + if (event->header.misc & PERF_RECORD_MISC_KERNEL) { show = SHOW_KERNEL; level = 'k'; @@ -513,7 +513,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) dump_printf(" ...... dso: %s\n", dso->name); - } else if (event->header.misc & PERF_EVENT_MISC_USER) { + } else if (event->header.misc & PERF_RECORD_MISC_USER) { show = SHOW_USER; level = '.'; @@ -565,7 +565,7 @@ process_mmap_event(event_t *event, unsigned long offset, unsigned long head) thread = threads__findnew(event->mmap.pid, &threads, &last_match); - dump_printf("%p [%p]: PERF_EVENT_MMAP %d: [%p(%p) @ %p]: %s\n", + dump_printf("%p [%p]: PERF_RECORD_MMAP %d: [%p(%p) @ %p]: %s\n", (void *)(offset + head), (void *)(long)(event->header.size), event->mmap.pid, @@ -575,7 +575,7 @@ process_mmap_event(event_t *event, unsigned long offset, unsigned long head) event->mmap.filename); if (thread == NULL || map == NULL) { - dump_printf("problem processing PERF_EVENT_MMAP, skipping event.\n"); + dump_printf("problem processing PERF_RECORD_MMAP, skipping event.\n"); return 0; } @@ -591,14 +591,14 @@ process_comm_event(event_t *event, unsigned long offset, unsigned long head) struct thread *thread; thread = threads__findnew(event->comm.pid, &threads, &last_match); - dump_printf("%p [%p]: PERF_EVENT_COMM: %s:%d\n", + dump_printf("%p [%p]: PERF_RECORD_COMM: %s:%d\n", (void *)(offset + head), (void *)(long)(event->header.size), event->comm.comm, event->comm.pid); if (thread == NULL || thread__set_comm(thread, event->comm.comm)) { - dump_printf("problem processing PERF_EVENT_COMM, skipping event.\n"); + dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n"); return -1; } total_comm++; @@ -614,7 +614,7 @@ process_fork_event(event_t *event, unsigned long offset, unsigned long head) thread = threads__findnew(event->fork.pid, &threads, &last_match); parent = threads__findnew(event->fork.ppid, &threads, &last_match); - dump_printf("%p [%p]: PERF_EVENT_FORK: %d:%d\n", + dump_printf("%p [%p]: PERF_RECORD_FORK: %d:%d\n", (void *)(offset + head), (void *)(long)(event->header.size), event->fork.pid, event->fork.ppid); @@ -627,7 +627,7 @@ process_fork_event(event_t *event, unsigned long offset, unsigned long head) return 0; if (!thread || !parent || thread__fork(thread, parent)) { - dump_printf("problem processing PERF_EVENT_FORK, skipping event.\n"); + dump_printf("problem processing PERF_RECORD_FORK, skipping event.\n"); return -1; } total_fork++; @@ -639,23 +639,23 @@ static int process_event(event_t *event, unsigned long offset, unsigned long head) { switch (event->header.type) { - case PERF_EVENT_SAMPLE: + case PERF_RECORD_SAMPLE: return process_sample_event(event, offset, head); - case PERF_EVENT_MMAP: + case PERF_RECORD_MMAP: return process_mmap_event(event, offset, head); - case PERF_EVENT_COMM: + case PERF_RECORD_COMM: return process_comm_event(event, offset, head); - case PERF_EVENT_FORK: + case PERF_RECORD_FORK: return process_fork_event(event, offset, head); /* * We dont process them right now but they are fine: */ - case PERF_EVENT_THROTTLE: - case PERF_EVENT_UNTHROTTLE: + case PERF_RECORD_THROTTLE: + case PERF_RECORD_UNTHROTTLE: return 0; default: diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 99a12fe86e9..a5a050af8e7 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -48,6 +48,8 @@ static int call_graph = 0; static int inherit_stat = 0; static int no_samples = 0; static int sample_address = 0; +static int multiplex = 0; +static int multiplex_fd = -1; static long samples; static struct timeval last_read; @@ -75,7 +77,7 @@ static struct mmap_data mmap_array[MAX_NR_CPUS][MAX_COUNTERS]; static unsigned long mmap_read_head(struct mmap_data *md) { - struct perf_counter_mmap_page *pc = md->base; + struct perf_event_mmap_page *pc = md->base; long head; head = pc->data_head; @@ -86,7 +88,7 @@ static unsigned long mmap_read_head(struct mmap_data *md) static void mmap_write_tail(struct mmap_data *md, unsigned long tail) { - struct perf_counter_mmap_page *pc = md->base; + struct perf_event_mmap_page *pc = md->base; /* * ensure all reads are done before we write the tail out. @@ -231,7 +233,7 @@ static pid_t pid_synthesize_comm_event(pid_t pid, int full) } } - comm_ev.header.type = PERF_EVENT_COMM; + comm_ev.header.type = PERF_RECORD_COMM; size = ALIGN(size, sizeof(u64)); comm_ev.header.size = sizeof(comm_ev) - (sizeof(comm_ev.comm) - size); @@ -286,7 +288,7 @@ static void pid_synthesize_mmap_samples(pid_t pid, pid_t tgid) while (1) { char bf[BUFSIZ], *pbf = bf; struct mmap_event mmap_ev = { - .header = { .type = PERF_EVENT_MMAP }, + .header = { .type = PERF_RECORD_MMAP }, }; int n; size_t size; @@ -353,7 +355,7 @@ static void synthesize_all(void) static int group_fd; -static struct perf_header_attr *get_header_attr(struct perf_counter_attr *a, int nr) +static struct perf_header_attr *get_header_attr(struct perf_event_attr *a, int nr) { struct perf_header_attr *h_attr; @@ -369,7 +371,7 @@ static struct perf_header_attr *get_header_attr(struct perf_counter_attr *a, int static void create_counter(int counter, int cpu, pid_t pid) { - struct perf_counter_attr *attr = attrs + counter; + struct perf_event_attr *attr = attrs + counter; struct perf_header_attr *h_attr; int track = !counter; /* only the first counter needs these */ struct { @@ -415,7 +417,7 @@ static void create_counter(int counter, int cpu, pid_t pid) attr->disabled = 1; try_again: - fd[nr_cpu][counter] = sys_perf_counter_open(attr, pid, cpu, group_fd, 0); + fd[nr_cpu][counter] = sys_perf_event_open(attr, pid, cpu, group_fd, 0); if (fd[nr_cpu][counter] < 0) { int err = errno; @@ -442,7 +444,7 @@ try_again: printf("\n"); error("perfcounter syscall returned with %d (%s)\n", fd[nr_cpu][counter], strerror(err)); - die("No CONFIG_PERF_COUNTERS=y kernel support configured?\n"); + die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); exit(-1); } @@ -470,22 +472,31 @@ try_again: */ if (group && group_fd == -1) group_fd = fd[nr_cpu][counter]; + if (multiplex && multiplex_fd == -1) + multiplex_fd = fd[nr_cpu][counter]; - event_array[nr_poll].fd = fd[nr_cpu][counter]; - event_array[nr_poll].events = POLLIN; - nr_poll++; - - mmap_array[nr_cpu][counter].counter = counter; - mmap_array[nr_cpu][counter].prev = 0; - mmap_array[nr_cpu][counter].mask = mmap_pages*page_size - 1; - mmap_array[nr_cpu][counter].base = mmap(NULL, (mmap_pages+1)*page_size, - PROT_READ|PROT_WRITE, MAP_SHARED, fd[nr_cpu][counter], 0); - if (mmap_array[nr_cpu][counter].base == MAP_FAILED) { - error("failed to mmap with %d (%s)\n", errno, strerror(errno)); - exit(-1); + if (multiplex && fd[nr_cpu][counter] != multiplex_fd) { + int ret; + + ret = ioctl(fd[nr_cpu][counter], PERF_EVENT_IOC_SET_OUTPUT, multiplex_fd); + assert(ret != -1); + } else { + event_array[nr_poll].fd = fd[nr_cpu][counter]; + event_array[nr_poll].events = POLLIN; + nr_poll++; + + mmap_array[nr_cpu][counter].counter = counter; + mmap_array[nr_cpu][counter].prev = 0; + mmap_array[nr_cpu][counter].mask = mmap_pages*page_size - 1; + mmap_array[nr_cpu][counter].base = mmap(NULL, (mmap_pages+1)*page_size, + PROT_READ|PROT_WRITE, MAP_SHARED, fd[nr_cpu][counter], 0); + if (mmap_array[nr_cpu][counter].base == MAP_FAILED) { + error("failed to mmap with %d (%s)\n", errno, strerror(errno)); + exit(-1); + } } - ioctl(fd[nr_cpu][counter], PERF_COUNTER_IOC_ENABLE); + ioctl(fd[nr_cpu][counter], PERF_EVENT_IOC_ENABLE); } static void open_counters(int cpu, pid_t pid) @@ -513,6 +524,7 @@ static int __cmd_record(int argc, const char **argv) pid_t pid = 0; int flags; int ret; + unsigned long waking = 0; page_size = sysconf(_SC_PAGE_SIZE); nr_cpus = sysconf(_SC_NPROCESSORS_ONLN); @@ -614,17 +626,29 @@ static int __cmd_record(int argc, const char **argv) int hits = samples; for (i = 0; i < nr_cpu; i++) { - for (counter = 0; counter < nr_counters; counter++) - mmap_read(&mmap_array[i][counter]); + for (counter = 0; counter < nr_counters; counter++) { + if (mmap_array[i][counter].base) + mmap_read(&mmap_array[i][counter]); + } } if (hits == samples) { if (done) break; - ret = poll(event_array, nr_poll, 100); + ret = poll(event_array, nr_poll, -1); + waking++; + } + + if (done) { + for (i = 0; i < nr_cpu; i++) { + for (counter = 0; counter < nr_counters; counter++) + ioctl(fd[i][counter], PERF_EVENT_IOC_DISABLE); + } } } + fprintf(stderr, "[ perf record: Woken up %ld times to write data ]\n", waking); + /* * Approximate RIP event size: 24 bytes. */ @@ -681,6 +705,8 @@ static const struct option options[] = { "Sample addresses"), OPT_BOOLEAN('n', "no-samples", &no_samples, "don't sample"), + OPT_BOOLEAN('M', "multiplex", &multiplex, + "multiplex counter output in a single channel"), OPT_END() }; diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index cdf9a8d27bb..19669c20088 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -1121,7 +1121,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) more_data += sizeof(u64); } - dump_printf("%p [%p]: PERF_EVENT_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n", + dump_printf("%p [%p]: PERF_RECORD_SAMPLE (IP, %d): %d/%d: %p period: %Ld\n", (void *)(offset + head), (void *)(long)(event->header.size), event->header.misc, @@ -1158,9 +1158,9 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) if (comm_list && !strlist__has_entry(comm_list, thread->comm)) return 0; - cpumode = event->header.misc & PERF_EVENT_MISC_CPUMODE_MASK; + cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; - if (cpumode == PERF_EVENT_MISC_KERNEL) { + if (cpumode == PERF_RECORD_MISC_KERNEL) { show = SHOW_KERNEL; level = 'k'; @@ -1168,7 +1168,7 @@ process_sample_event(event_t *event, unsigned long offset, unsigned long head) dump_printf(" ...... dso: %s\n", dso->name); - } else if (cpumode == PERF_EVENT_MISC_USER) { + } else if (cpumode == PERF_RECORD_MISC_USER) { show = SHOW_USER; level = '.'; @@ -1210,7 +1210,7 @@ process_mmap_event(event_t *event, unsigned long offset, unsigned long head) thread = threads__findnew(event->mmap.pid, &threads, &last_match); - dump_printf("%p [%p]: PERF_EVENT_MMAP %d/%d: [%p(%p) @ %p]: %s\n", + dump_printf("%p [%p]: PERF_RECORD_MMAP %d/%d: [%p(%p) @ %p]: %s\n", (void *)(offset + head), (void *)(long)(event->header.size), event->mmap.pid, @@ -1221,7 +1221,7 @@ process_mmap_event(event_t *event, unsigned long offset, unsigned long head) event->mmap.filename); if (thread == NULL || map == NULL) { - dump_printf("problem processing PERF_EVENT_MMAP, skipping event.\n"); + dump_printf("problem processing PERF_RECORD_MMAP, skipping event.\n"); return 0; } @@ -1238,14 +1238,14 @@ process_comm_event(event_t *event, unsigned long offset, unsigned long head) thread = threads__findnew(event->comm.pid, &threads, &last_match); - dump_printf("%p [%p]: PERF_EVENT_COMM: %s:%d\n", + dump_printf("%p [%p]: PERF_RECORD_COMM: %s:%d\n", (void *)(offset + head), (void *)(long)(event->header.size), event->comm.comm, event->comm.pid); if (thread == NULL || thread__set_comm_adjust(thread, event->comm.comm)) { - dump_printf("problem processing PERF_EVENT_COMM, skipping event.\n"); + dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n"); return -1; } total_comm++; @@ -1262,10 +1262,10 @@ process_task_event(event_t *event, unsigned long offset, unsigned long head) thread = threads__findnew(event->fork.pid, &threads, &last_match); parent = threads__findnew(event->fork.ppid, &threads, &last_match); - dump_printf("%p [%p]: PERF_EVENT_%s: (%d:%d):(%d:%d)\n", + dump_printf("%p [%p]: PERF_RECORD_%s: (%d:%d):(%d:%d)\n", (void *)(offset + head), (void *)(long)(event->header.size), - event->header.type == PERF_EVENT_FORK ? "FORK" : "EXIT", + event->header.type == PERF_RECORD_FORK ? "FORK" : "EXIT", event->fork.pid, event->fork.tid, event->fork.ppid, event->fork.ptid); @@ -1276,11 +1276,11 @@ process_task_event(event_t *event, unsigned long offset, unsigned long head) if (thread == parent) return 0; - if (event->header.type == PERF_EVENT_EXIT) + if (event->header.type == PERF_RECORD_EXIT) return 0; if (!thread || !parent || thread__fork(thread, parent)) { - dump_printf("problem processing PERF_EVENT_FORK, skipping event.\n"); + dump_printf("problem processing PERF_RECORD_FORK, skipping event.\n"); return -1; } total_fork++; @@ -1291,7 +1291,7 @@ process_task_event(event_t *event, unsigned long offset, unsigned long head) static int process_lost_event(event_t *event, unsigned long offset, unsigned long head) { - dump_printf("%p [%p]: PERF_EVENT_LOST: id:%Ld: lost:%Ld\n", + dump_printf("%p [%p]: PERF_RECORD_LOST: id:%Ld: lost:%Ld\n", (void *)(offset + head), (void *)(long)(event->header.size), event->lost.id, @@ -1305,7 +1305,7 @@ process_lost_event(event_t *event, unsigned long offset, unsigned long head) static int process_read_event(event_t *event, unsigned long offset, unsigned long head) { - struct perf_counter_attr *attr; + struct perf_event_attr *attr; attr = perf_header__find_attr(event->read.id, header); @@ -1319,7 +1319,7 @@ process_read_event(event_t *event, unsigned long offset, unsigned long head) event->read.value); } - dump_printf("%p [%p]: PERF_EVENT_READ: %d %d %s %Lu\n", + dump_printf("%p [%p]: PERF_RECORD_READ: %d %d %s %Lu\n", (void *)(offset + head), (void *)(long)(event->header.size), event->read.pid, @@ -1337,31 +1337,31 @@ process_event(event_t *event, unsigned long offset, unsigned long head) trace_event(event); switch (event->header.type) { - case PERF_EVENT_SAMPLE: + case PERF_RECORD_SAMPLE: return process_sample_event(event, offset, head); - case PERF_EVENT_MMAP: + case PERF_RECORD_MMAP: return process_mmap_event(event, offset, head); - case PERF_EVENT_COMM: + case PERF_RECORD_COMM: return process_comm_event(event, offset, head); - case PERF_EVENT_FORK: - case PERF_EVENT_EXIT: + case PERF_RECORD_FORK: + case PERF_RECORD_EXIT: return process_task_event(event, offset, head); - case PERF_EVENT_LOST: + case PERF_RECORD_LOST: return process_lost_event(event, offset, head); - case PERF_EVENT_READ: + case PERF_RECORD_READ: return process_read_event(event, offset, head); /* * We dont process them right now but they are fine: */ - case PERF_EVENT_THROTTLE: - case PERF_EVENT_UNTHROTTLE: + case PERF_RECORD_THROTTLE: + case PERF_RECORD_UNTHROTTLE: return 0; default: diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c new file mode 100644 index 00000000000..ea9c15c0cdf --- /dev/null +++ b/tools/perf/builtin-sched.c @@ -0,0 +1,2004 @@ +#include "builtin.h" +#include "perf.h" + +#include "util/util.h" +#include "util/cache.h" +#include "util/symbol.h" +#include "util/thread.h" +#include "util/header.h" + +#include "util/parse-options.h" +#include "util/trace-event.h" + +#include "util/debug.h" + +#include <sys/types.h> +#include <sys/prctl.h> + +#include <semaphore.h> +#include <pthread.h> +#include <math.h> + +static char const *input_name = "perf.data"; +static int input; +static unsigned long page_size; +static unsigned long mmap_window = 32; + +static unsigned long total_comm = 0; + +static struct rb_root threads; +static struct thread *last_match; + +static struct perf_header *header; +static u64 sample_type; + +static char default_sort_order[] = "avg, max, switch, runtime"; +static char *sort_order = default_sort_order; + +#define PR_SET_NAME 15 /* Set process name */ +#define MAX_CPUS 4096 + +#define BUG_ON(x) assert(!(x)) + +static u64 run_measurement_overhead; +static u64 sleep_measurement_overhead; + +#define COMM_LEN 20 +#define SYM_LEN 129 + +#define MAX_PID 65536 + +static unsigned long nr_tasks; + +struct sched_atom; + +struct task_desc { + unsigned long nr; + unsigned long pid; + char comm[COMM_LEN]; + + unsigned long nr_events; + unsigned long curr_event; + struct sched_atom **atoms; + + pthread_t thread; + sem_t sleep_sem; + + sem_t ready_for_work; + sem_t work_done_sem; + + u64 cpu_usage; +}; + +enum sched_event_type { + SCHED_EVENT_RUN, + SCHED_EVENT_SLEEP, + SCHED_EVENT_WAKEUP, +}; + +struct sched_atom { + enum sched_event_type type; + u64 timestamp; + u64 duration; + unsigned long nr; + int specific_wait; + sem_t *wait_sem; + struct task_desc *wakee; +}; + +static struct task_desc *pid_to_task[MAX_PID]; + +static struct task_desc **tasks; + +static pthread_mutex_t start_work_mutex = PTHREAD_MUTEX_INITIALIZER; +static u64 start_time; + +static pthread_mutex_t work_done_wait_mutex = PTHREAD_MUTEX_INITIALIZER; + +static unsigned long nr_run_events; +static unsigned long nr_sleep_events; +static unsigned long nr_wakeup_events; + +static unsigned long nr_sleep_corrections; +static unsigned long nr_run_events_optimized; + +static unsigned long targetless_wakeups; +static unsigned long multitarget_wakeups; + +static u64 cpu_usage; +static u64 runavg_cpu_usage; +static u64 parent_cpu_usage; +static u64 runavg_parent_cpu_usage; + +static unsigned long nr_runs; +static u64 sum_runtime; +static u64 sum_fluct; +static u64 run_avg; + +static unsigned long replay_repeat = 10; +static unsigned long nr_timestamps; +static unsigned long nr_unordered_timestamps; +static unsigned long nr_state_machine_bugs; +static unsigned long nr_context_switch_bugs; +static unsigned long nr_events; +static unsigned long nr_lost_chunks; +static unsigned long nr_lost_events; + +#define TASK_STATE_TO_CHAR_STR "RSDTtZX" + +enum thread_state { + THREAD_SLEEPING = 0, + THREAD_WAIT_CPU, + THREAD_SCHED_IN, + THREAD_IGNORE +}; + +struct work_atom { + struct list_head list; + enum thread_state state; + u64 sched_out_time; + u64 wake_up_time; + u64 sched_in_time; + u64 runtime; +}; + +struct work_atoms { + struct list_head work_list; + struct thread *thread; + struct rb_node node; + u64 max_lat; + u64 total_lat; + u64 nb_atoms; + u64 total_runtime; +}; + +typedef int (*sort_fn_t)(struct work_atoms *, struct work_atoms *); + +static struct rb_root atom_root, sorted_atom_root; + +static u64 all_runtime; +static u64 all_count; + + +static u64 get_nsecs(void) +{ + struct timespec ts; + + clock_gettime(CLOCK_MONOTONIC, &ts); + + return ts.tv_sec * 1000000000ULL + ts.tv_nsec; +} + +static void burn_nsecs(u64 nsecs) +{ + u64 T0 = get_nsecs(), T1; + + do { + T1 = get_nsecs(); + } while (T1 + run_measurement_overhead < T0 + nsecs); +} + +static void sleep_nsecs(u64 nsecs) +{ + struct timespec ts; + + ts.tv_nsec = nsecs % 999999999; + ts.tv_sec = nsecs / 999999999; + + nanosleep(&ts, NULL); +} + +static void calibrate_run_measurement_overhead(void) +{ + u64 T0, T1, delta, min_delta = 1000000000ULL; + int i; + + for (i = 0; i < 10; i++) { + T0 = get_nsecs(); + burn_nsecs(0); + T1 = get_nsecs(); + delta = T1-T0; + min_delta = min(min_delta, delta); + } + run_measurement_overhead = min_delta; + + printf("run measurement overhead: %Ld nsecs\n", min_delta); +} + +static void calibrate_sleep_measurement_overhead(void) +{ + u64 T0, T1, delta, min_delta = 1000000000ULL; + int i; + + for (i = 0; i < 10; i++) { + T0 = get_nsecs(); + sleep_nsecs(10000); + T1 = get_nsecs(); + delta = T1-T0; + min_delta = min(min_delta, delta); + } + min_delta -= 10000; + sleep_measurement_overhead = min_delta; + + printf("sleep measurement overhead: %Ld nsecs\n", min_delta); +} + +static struct sched_atom * +get_new_event(struct task_desc *task, u64 timestamp) +{ + struct sched_atom *event = calloc(1, sizeof(*event)); + unsigned long idx = task->nr_events; + size_t size; + + event->timestamp = timestamp; + event->nr = idx; + + task->nr_events++; + size = sizeof(struct sched_atom *) * task->nr_events; + task->atoms = realloc(task->atoms, size); + BUG_ON(!task->atoms); + + task->atoms[idx] = event; + + return event; +} + +static struct sched_atom *last_event(struct task_desc *task) +{ + if (!task->nr_events) + return NULL; + + return task->atoms[task->nr_events - 1]; +} + +static void +add_sched_event_run(struct task_desc *task, u64 timestamp, u64 duration) +{ + struct sched_atom *event, *curr_event = last_event(task); + + /* + * optimize an existing RUN event by merging this one + * to it: + */ + if (curr_event && curr_event->type == SCHED_EVENT_RUN) { + nr_run_events_optimized++; + curr_event->duration += duration; + return; + } + + event = get_new_event(task, timestamp); + + event->type = SCHED_EVENT_RUN; + event->duration = duration; + + nr_run_events++; +} + +static void +add_sched_event_wakeup(struct task_desc *task, u64 timestamp, + struct task_desc *wakee) +{ + struct sched_atom *event, *wakee_event; + + event = get_new_event(task, timestamp); + event->type = SCHED_EVENT_WAKEUP; + event->wakee = wakee; + + wakee_event = last_event(wakee); + if (!wakee_event || wakee_event->type != SCHED_EVENT_SLEEP) { + targetless_wakeups++; + return; + } + if (wakee_event->wait_sem) { + multitarget_wakeups++; + return; + } + + wakee_event->wait_sem = calloc(1, sizeof(*wakee_event->wait_sem)); + sem_init(wakee_event->wait_sem, 0, 0); + wakee_event->specific_wait = 1; + event->wait_sem = wakee_event->wait_sem; + + nr_wakeup_events++; +} + +static void +add_sched_event_sleep(struct task_desc *task, u64 timestamp, + u64 task_state __used) +{ + struct sched_atom *event = get_new_ev |