diff options
Diffstat (limited to 'tools/perf/bench')
| -rw-r--r-- | tools/perf/bench/bench.h | 3 | ||||
| -rw-r--r-- | tools/perf/bench/futex-hash.c | 212 | ||||
| -rw-r--r-- | tools/perf/bench/futex-requeue.c | 211 | ||||
| -rw-r--r-- | tools/perf/bench/futex-wake.c | 201 | ||||
| -rw-r--r-- | tools/perf/bench/futex.h | 71 | ||||
| -rw-r--r-- | tools/perf/bench/mem-memcpy-arch.h | 2 | ||||
| -rw-r--r-- | tools/perf/bench/mem-memcpy.c | 2 | ||||
| -rw-r--r-- | tools/perf/bench/mem-memset-arch.h | 2 | ||||
| -rw-r--r-- | tools/perf/bench/mem-memset.c | 2 | ||||
| -rw-r--r-- | tools/perf/bench/numa.c | 43 | ||||
| -rw-r--r-- | tools/perf/bench/sched-pipe.c | 115 |
11 files changed, 816 insertions, 48 deletions
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h index 0fdc85269c4..eba46709b27 100644 --- a/tools/perf/bench/bench.h +++ b/tools/perf/bench/bench.h @@ -31,6 +31,9 @@ extern int bench_sched_pipe(int argc, const char **argv, const char *prefix); extern int bench_mem_memcpy(int argc, const char **argv, const char *prefix __maybe_unused); extern int bench_mem_memset(int argc, const char **argv, const char *prefix); +extern int bench_futex_hash(int argc, const char **argv, const char *prefix); +extern int bench_futex_wake(int argc, const char **argv, const char *prefix); +extern int bench_futex_requeue(int argc, const char **argv, const char *prefix); #define BENCH_FORMAT_DEFAULT_STR "default" #define BENCH_FORMAT_DEFAULT 0 diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c new file mode 100644 index 00000000000..a84206e9c4a --- /dev/null +++ b/tools/perf/bench/futex-hash.c @@ -0,0 +1,212 @@ +/* + * Copyright (C) 2013 Davidlohr Bueso <davidlohr@hp.com> + * + * futex-hash: Stress the hell out of the Linux kernel futex uaddr hashing. + * + * This program is particularly useful for measuring the kernel's futex hash + * table/function implementation. In order for it to make sense, use with as + * many threads and futexes as possible. + */ + +#include "../perf.h" +#include "../util/util.h" +#include "../util/stat.h" +#include "../util/parse-options.h" +#include "../util/header.h" +#include "bench.h" +#include "futex.h" + +#include <err.h> +#include <stdlib.h> +#include <sys/time.h> +#include <pthread.h> + +static unsigned int nthreads = 0; +static unsigned int nsecs = 10; +/* amount of futexes per thread */ +static unsigned int nfutexes = 1024; +static bool fshared = false, done = false, silent = false; + +struct timeval start, end, runtime; +static pthread_mutex_t thread_lock; +static unsigned int threads_starting; +static struct stats throughput_stats; +static pthread_cond_t thread_parent, thread_worker; + +struct worker { + int tid; + u_int32_t *futex; + pthread_t thread; + unsigned long ops; +}; + +static const struct option options[] = { + OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"), + OPT_UINTEGER('r', "runtime", &nsecs, "Specify runtime (in seconds)"), + OPT_UINTEGER('f', "futexes", &nfutexes, "Specify amount of futexes per threads"), + OPT_BOOLEAN( 's', "silent", &silent, "Silent mode: do not display data/details"), + OPT_BOOLEAN( 'S', "shared", &fshared, "Use shared futexes instead of private ones"), + OPT_END() +}; + +static const char * const bench_futex_hash_usage[] = { + "perf bench futex hash <options>", + NULL +}; + +static void *workerfn(void *arg) +{ + int ret; + unsigned int i; + struct worker *w = (struct worker *) arg; + + pthread_mutex_lock(&thread_lock); + threads_starting--; + if (!threads_starting) + pthread_cond_signal(&thread_parent); + pthread_cond_wait(&thread_worker, &thread_lock); + pthread_mutex_unlock(&thread_lock); + + do { + for (i = 0; i < nfutexes; i++, w->ops++) { + /* + * We want the futex calls to fail in order to stress + * the hashing of uaddr and not measure other steps, + * such as internal waitqueue handling, thus enlarging + * the critical region protected by hb->lock. + */ + ret = futex_wait(&w->futex[i], 1234, NULL, + fshared ? 0 : FUTEX_PRIVATE_FLAG); + if (!silent && + (!ret || errno != EAGAIN || errno != EWOULDBLOCK)) + warn("Non-expected futex return call"); + } + } while (!done); + + return NULL; +} + +static void toggle_done(int sig __maybe_unused, + siginfo_t *info __maybe_unused, + void *uc __maybe_unused) +{ + /* inform all threads that we're done for the day */ + done = true; + gettimeofday(&end, NULL); + timersub(&end, &start, &runtime); +} + +static void print_summary(void) +{ + unsigned long avg = avg_stats(&throughput_stats); + double stddev = stddev_stats(&throughput_stats); + + printf("%sAveraged %ld operations/sec (+- %.2f%%), total secs = %d\n", + !silent ? "\n" : "", avg, rel_stddev_stats(stddev, avg), + (int) runtime.tv_sec); +} + +int bench_futex_hash(int argc, const char **argv, + const char *prefix __maybe_unused) +{ + int ret = 0; + cpu_set_t cpu; + struct sigaction act; + unsigned int i, ncpus; + pthread_attr_t thread_attr; + struct worker *worker = NULL; + + argc = parse_options(argc, argv, options, bench_futex_hash_usage, 0); + if (argc) { + usage_with_options(bench_futex_hash_usage, options); + exit(EXIT_FAILURE); + } + + ncpus = sysconf(_SC_NPROCESSORS_ONLN); + + sigfillset(&act.sa_mask); + act.sa_sigaction = toggle_done; + sigaction(SIGINT, &act, NULL); + + if (!nthreads) /* default to the number of CPUs */ + nthreads = ncpus; + + worker = calloc(nthreads, sizeof(*worker)); + if (!worker) + goto errmem; + + printf("Run summary [PID %d]: %d threads, each operating on %d [%s] futexes for %d secs.\n\n", + getpid(), nthreads, nfutexes, fshared ? "shared":"private", nsecs); + + init_stats(&throughput_stats); + pthread_mutex_init(&thread_lock, NULL); + pthread_cond_init(&thread_parent, NULL); + pthread_cond_init(&thread_worker, NULL); + + threads_starting = nthreads; + pthread_attr_init(&thread_attr); + gettimeofday(&start, NULL); + for (i = 0; i < nthreads; i++) { + worker[i].tid = i; + worker[i].futex = calloc(nfutexes, sizeof(*worker[i].futex)); + if (!worker[i].futex) + goto errmem; + + CPU_ZERO(&cpu); + CPU_SET(i % ncpus, &cpu); + + ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu); + if (ret) + err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); + + ret = pthread_create(&worker[i].thread, &thread_attr, workerfn, + (void *)(struct worker *) &worker[i]); + if (ret) + err(EXIT_FAILURE, "pthread_create"); + + } + pthread_attr_destroy(&thread_attr); + + pthread_mutex_lock(&thread_lock); + while (threads_starting) + pthread_cond_wait(&thread_parent, &thread_lock); + pthread_cond_broadcast(&thread_worker); + pthread_mutex_unlock(&thread_lock); + + sleep(nsecs); + toggle_done(0, NULL, NULL); + + for (i = 0; i < nthreads; i++) { + ret = pthread_join(worker[i].thread, NULL); + if (ret) + err(EXIT_FAILURE, "pthread_join"); + } + + /* cleanup & report results */ + pthread_cond_destroy(&thread_parent); + pthread_cond_destroy(&thread_worker); + pthread_mutex_destroy(&thread_lock); + + for (i = 0; i < nthreads; i++) { + unsigned long t = worker[i].ops/runtime.tv_sec; + update_stats(&throughput_stats, t); + if (!silent) { + if (nfutexes == 1) + printf("[thread %2d] futex: %p [ %ld ops/sec ]\n", + worker[i].tid, &worker[i].futex[0], t); + else + printf("[thread %2d] futexes: %p ... %p [ %ld ops/sec ]\n", + worker[i].tid, &worker[i].futex[0], + &worker[i].futex[nfutexes-1], t); + } + + free(worker[i].futex); + } + + print_summary(); + + free(worker); + return ret; +errmem: + err(EXIT_FAILURE, "calloc"); +} diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c new file mode 100644 index 00000000000..a16255876f1 --- /dev/null +++ b/tools/perf/bench/futex-requeue.c @@ -0,0 +1,211 @@ +/* + * Copyright (C) 2013 Davidlohr Bueso <davidlohr@hp.com> + * + * futex-requeue: Block a bunch of threads on futex1 and requeue them + * on futex2, N at a time. + * + * This program is particularly useful to measure the latency of nthread + * requeues without waking up any tasks -- thus mimicking a regular futex_wait. + */ + +#include "../perf.h" +#include "../util/util.h" +#include "../util/stat.h" +#include "../util/parse-options.h" +#include "../util/header.h" +#include "bench.h" +#include "futex.h" + +#include <err.h> +#include <stdlib.h> +#include <sys/time.h> +#include <pthread.h> + +static u_int32_t futex1 = 0, futex2 = 0; + +/* + * How many tasks to requeue at a time. + * Default to 1 in order to make the kernel work more. + */ +static unsigned int nrequeue = 1; + +/* + * There can be significant variance from run to run, + * the more repeats, the more exact the overall avg and + * the better idea of the futex latency. + */ +static unsigned int repeat = 10; + +static pthread_t *worker; +static bool done = 0, silent = 0; +static pthread_mutex_t thread_lock; +static pthread_cond_t thread_parent, thread_worker; +static struct stats requeuetime_stats, requeued_stats; +static unsigned int ncpus, threads_starting, nthreads = 0; + +static const struct option options[] = { + OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"), + OPT_UINTEGER('q', "nrequeue", &nrequeue, "Specify amount of threads to requeue at once"), + OPT_UINTEGER('r', "repeat", &repeat, "Specify amount of times to repeat the run"), + OPT_BOOLEAN( 's', "silent", &silent, "Silent mode: do not display data/details"), + OPT_END() +}; + +static const char * const bench_futex_requeue_usage[] = { + "perf bench futex requeue <options>", + NULL +}; + +static void print_summary(void) +{ + double requeuetime_avg = avg_stats(&requeuetime_stats); + double requeuetime_stddev = stddev_stats(&requeuetime_stats); + unsigned int requeued_avg = avg_stats(&requeued_stats); + + printf("Requeued %d of %d threads in %.4f ms (+-%.2f%%)\n", + requeued_avg, + nthreads, + requeuetime_avg/1e3, + rel_stddev_stats(requeuetime_stddev, requeuetime_avg)); +} + +static void *workerfn(void *arg __maybe_unused) +{ + pthread_mutex_lock(&thread_lock); + threads_starting--; + if (!threads_starting) + pthread_cond_signal(&thread_parent); + pthread_cond_wait(&thread_worker, &thread_lock); + pthread_mutex_unlock(&thread_lock); + + futex_wait(&futex1, 0, NULL, FUTEX_PRIVATE_FLAG); + return NULL; +} + +static void block_threads(pthread_t *w, + pthread_attr_t thread_attr) +{ + cpu_set_t cpu; + unsigned int i; + + threads_starting = nthreads; + + /* create and block all threads */ + for (i = 0; i < nthreads; i++) { + CPU_ZERO(&cpu); + CPU_SET(i % ncpus, &cpu); + + if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu)) + err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); + + if (pthread_create(&w[i], &thread_attr, workerfn, NULL)) + err(EXIT_FAILURE, "pthread_create"); + } +} + +static void toggle_done(int sig __maybe_unused, + siginfo_t *info __maybe_unused, + void *uc __maybe_unused) +{ + done = true; +} + +int bench_futex_requeue(int argc, const char **argv, + const char *prefix __maybe_unused) +{ + int ret = 0; + unsigned int i, j; + struct sigaction act; + pthread_attr_t thread_attr; + + argc = parse_options(argc, argv, options, bench_futex_requeue_usage, 0); + if (argc) + goto err; + + ncpus = sysconf(_SC_NPROCESSORS_ONLN); + + sigfillset(&act.sa_mask); + act.sa_sigaction = toggle_done; + sigaction(SIGINT, &act, NULL); + + if (!nthreads) + nthreads = ncpus; + + worker = calloc(nthreads, sizeof(*worker)); + if (!worker) + err(EXIT_FAILURE, "calloc"); + + printf("Run summary [PID %d]: Requeuing %d threads (from %p to %p), " + "%d at a time.\n\n", + getpid(), nthreads, &futex1, &futex2, nrequeue); + + init_stats(&requeued_stats); + init_stats(&requeuetime_stats); + pthread_attr_init(&thread_attr); + pthread_mutex_init(&thread_lock, NULL); + pthread_cond_init(&thread_parent, NULL); + pthread_cond_init(&thread_worker, NULL); + + for (j = 0; j < repeat && !done; j++) { + unsigned int nrequeued = 0; + struct timeval start, end, runtime; + + /* create, launch & block all threads */ + block_threads(worker, thread_attr); + + /* make sure all threads are already blocked */ + pthread_mutex_lock(&thread_lock); + while (threads_starting) + pthread_cond_wait(&thread_parent, &thread_lock); + pthread_cond_broadcast(&thread_worker); + pthread_mutex_unlock(&thread_lock); + + usleep(100000); + + /* Ok, all threads are patiently blocked, start requeueing */ + gettimeofday(&start, NULL); + for (nrequeued = 0; nrequeued < nthreads; nrequeued += nrequeue) + /* + * Do not wakeup any tasks blocked on futex1, allowing + * us to really measure futex_wait functionality. + */ + futex_cmp_requeue(&futex1, 0, &futex2, 0, nrequeue, + FUTEX_PRIVATE_FLAG); + gettimeofday(&end, NULL); + timersub(&end, &start, &runtime); + + update_stats(&requeued_stats, nrequeued); + update_stats(&requeuetime_stats, runtime.tv_usec); + + if (!silent) { + printf("[Run %d]: Requeued %d of %d threads in %.4f ms\n", + j + 1, nrequeued, nthreads, runtime.tv_usec/1e3); + } + + /* everybody should be blocked on futex2, wake'em up */ + nrequeued = futex_wake(&futex2, nthreads, FUTEX_PRIVATE_FLAG); + if (nthreads != nrequeued) + warnx("couldn't wakeup all tasks (%d/%d)", nrequeued, nthreads); + + for (i = 0; i < nthreads; i++) { + ret = pthread_join(worker[i], NULL); + if (ret) + err(EXIT_FAILURE, "pthread_join"); + } + + } + + /* cleanup & report results */ + pthread_cond_destroy(&thread_parent); + pthread_cond_destroy(&thread_worker); + pthread_mutex_destroy(&thread_lock); + pthread_attr_destroy(&thread_attr); + + print_summary(); + + free(worker); + return ret; +err: + usage_with_options(bench_futex_requeue_usage, options); + exit(EXIT_FAILURE); +} diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c new file mode 100644 index 00000000000..d096169b161 --- /dev/null +++ b/tools/perf/bench/futex-wake.c @@ -0,0 +1,201 @@ +/* + * Copyright (C) 2013 Davidlohr Bueso <davidlohr@hp.com> + * + * futex-wake: Block a bunch of threads on a futex and wake'em up, N at a time. + * + * This program is particularly useful to measure the latency of nthread wakeups + * in non-error situations: all waiters are queued and all wake calls wakeup + * one or more tasks, and thus the waitqueue is never empty. + */ + +#include "../perf.h" +#include "../util/util.h" +#include "../util/stat.h" +#include "../util/parse-options.h" +#include "../util/header.h" +#include "bench.h" +#include "futex.h" + +#include <err.h> +#include <stdlib.h> +#include <sys/time.h> +#include <pthread.h> + +/* all threads will block on the same futex */ +static u_int32_t futex1 = 0; + +/* + * How many wakeups to do at a time. + * Default to 1 in order to make the kernel work more. + */ +static unsigned int nwakes = 1; + +/* + * There can be significant variance from run to run, + * the more repeats, the more exact the overall avg and + * the better idea of the futex latency. + */ +static unsigned int repeat = 10; + +pthread_t *worker; +static bool done = 0, silent = 0; +static pthread_mutex_t thread_lock; +static pthread_cond_t thread_parent, thread_worker; +static struct stats waketime_stats, wakeup_stats; +static unsigned int ncpus, threads_starting, nthreads = 0; + +static const struct option options[] = { + OPT_UINTEGER('t', "threads", &nthreads, "Specify amount of threads"), + OPT_UINTEGER('w', "nwakes", &nwakes, "Specify amount of threads to wake at once"), + OPT_UINTEGER('r', "repeat", &repeat, "Specify amount of times to repeat the run"), + OPT_BOOLEAN( 's', "silent", &silent, "Silent mode: do not display data/details"), + OPT_END() +}; + +static const char * const bench_futex_wake_usage[] = { + "perf bench futex wake <options>", + NULL +}; + +static void *workerfn(void *arg __maybe_unused) +{ + pthread_mutex_lock(&thread_lock); + threads_starting--; + if (!threads_starting) + pthread_cond_signal(&thread_parent); + pthread_cond_wait(&thread_worker, &thread_lock); + pthread_mutex_unlock(&thread_lock); + + futex_wait(&futex1, 0, NULL, FUTEX_PRIVATE_FLAG); + return NULL; +} + +static void print_summary(void) +{ + double waketime_avg = avg_stats(&waketime_stats); + double waketime_stddev = stddev_stats(&waketime_stats); + unsigned int wakeup_avg = avg_stats(&wakeup_stats); + + printf("Wokeup %d of %d threads in %.4f ms (+-%.2f%%)\n", + wakeup_avg, + nthreads, + waketime_avg/1e3, + rel_stddev_stats(waketime_stddev, waketime_avg)); +} + +static void block_threads(pthread_t *w, + pthread_attr_t thread_attr) +{ + cpu_set_t cpu; + unsigned int i; + + threads_starting = nthreads; + + /* create and block all threads */ + for (i = 0; i < nthreads; i++) { + CPU_ZERO(&cpu); + CPU_SET(i % ncpus, &cpu); + + if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu)) + err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); + + if (pthread_create(&w[i], &thread_attr, workerfn, NULL)) + err(EXIT_FAILURE, "pthread_create"); + } +} + +static void toggle_done(int sig __maybe_unused, + siginfo_t *info __maybe_unused, + void *uc __maybe_unused) +{ + done = true; +} + +int bench_futex_wake(int argc, const char **argv, + const char *prefix __maybe_unused) +{ + int ret = 0; + unsigned int i, j; + struct sigaction act; + pthread_attr_t thread_attr; + + argc = parse_options(argc, argv, options, bench_futex_wake_usage, 0); + if (argc) { + usage_with_options(bench_futex_wake_usage, options); + exit(EXIT_FAILURE); + } + + ncpus = sysconf(_SC_NPROCESSORS_ONLN); + + sigfillset(&act.sa_mask); + act.sa_sigaction = toggle_done; + sigaction(SIGINT, &act, NULL); + + if (!nthreads) + nthreads = ncpus; + + worker = calloc(nthreads, sizeof(*worker)); + if (!worker) + err(EXIT_FAILURE, "calloc"); + + printf("Run summary [PID %d]: blocking on %d threads (at futex %p), " + "waking up %d at a time.\n\n", + getpid(), nthreads, &futex1, nwakes); + + init_stats(&wakeup_stats); + init_stats(&waketime_stats); + pthread_attr_init(&thread_attr); + pthread_mutex_init(&thread_lock, NULL); + pthread_cond_init(&thread_parent, NULL); + pthread_cond_init(&thread_worker, NULL); + + for (j = 0; j < repeat && !done; j++) { + unsigned int nwoken = 0; + struct timeval start, end, runtime; + + /* create, launch & block all threads */ + block_threads(worker, thread_attr); + + /* make sure all threads are already blocked */ + pthread_mutex_lock(&thread_lock); + while (threads_starting) + pthread_cond_wait(&thread_parent, &thread_lock); + pthread_cond_broadcast(&thread_worker); + pthread_mutex_unlock(&thread_lock); + + usleep(100000); + + /* Ok, all threads are patiently blocked, start waking folks up */ + gettimeofday(&start, NULL); + while (nwoken != nthreads) + nwoken += futex_wake(&futex1, nwakes, FUTEX_PRIVATE_FLAG); + gettimeofday(&end, NULL); + timersub(&end, &start, &runtime); + + update_stats(&wakeup_stats, nwoken); + update_stats(&waketime_stats, runtime.tv_usec); + + if (!silent) { + printf("[Run %d]: Wokeup %d of %d threads in %.4f ms\n", + j + 1, nwoken, nthreads, runtime.tv_usec/1e3); + } + + for (i = 0; i < nthreads; i++) { + ret = pthread_join(worker[i], NULL); + if (ret) + err(EXIT_FAILURE, "pthread_join"); + } + + } + + /* cleanup & report results */ + pthread_cond_destroy(&thread_parent); + pthread_cond_destroy(&thread_worker); + pthread_mutex_destroy(&thread_lock); + pthread_attr_destroy(&thread_attr); + + print_summary(); + + free(worker); + return ret; +} diff --git a/tools/perf/bench/futex.h b/tools/perf/bench/futex.h new file mode 100644 index 00000000000..71f2844cf97 --- /dev/null +++ b/tools/perf/bench/futex.h @@ -0,0 +1,71 @@ +/* + * Glibc independent futex library for testing kernel functionality. + * Shamelessly stolen from Darren Hart <dvhltc@us.ibm.com> + * http://git.kernel.org/cgit/linux/kernel/git/dvhart/futextest.git/ + */ + +#ifndef _FUTEX_H +#define _FUTEX_H + +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <linux/futex.h> + +/** + * futex() - SYS_futex syscall wrapper + * @uaddr: address of first futex + * @op: futex op code + * @val: typically expected value of uaddr, but varies by op + * @timeout: typically an absolute struct timespec (except where noted + * otherwise). Overloaded by some ops + * @uaddr2: address of second futex for some ops\ + * @val3: varies by op + * @opflags: flags to be bitwise OR'd with op, such as FUTEX_PRIVATE_FLAG + * + * futex() is used by all the following futex op wrappers. It can also be + * used for misuse and abuse testing. Generally, the specific op wrappers + * should be used instead. It is a macro instead of an static inline function as + * some of the types over overloaded (timeout is used for nr_requeue for + * example). + * + * These argument descriptions are the defaults for all + * like-named arguments in the following wrappers except where noted below. + */ +#define futex(uaddr, op, val, timeout, uaddr2, val3, opflags) \ + syscall(SYS_futex, uaddr, op | opflags, val, timeout, uaddr2, val3) + +/** + * futex_wait() - block on uaddr with optional timeout + * @timeout: relative timeout + */ +static inline int +futex_wait(u_int32_t *uaddr, u_int32_t val, struct timespec *timeout, int opflags) +{ + return futex(uaddr, FUTEX_WAIT, val, timeout, NULL, 0, opflags); +} + +/** + * futex_wake() - wake one or more tasks blocked on uaddr + * @nr_wake: wake up to this many tasks + */ +static inline int +futex_wake(u_int32_t *uaddr, int nr_wake, int opflags) +{ + return futex(uaddr, FUTEX_WAKE, nr_wake, NULL, NULL, 0, opflags); +} + +/** +* futex_cmp_requeue() - requeue tasks from uaddr to uaddr2 +* @nr_wake: wake up to this many tasks +* @nr_requeue: requeue up to this many tasks +*/ +static inline int +futex_cmp_requeue(u_int32_t *uaddr, u_int32_t val, u_int32_t *uaddr2, int nr_wake, + int nr_requeue, int opflags) +{ + return futex(uaddr, FUTEX_CMP_REQUEUE, nr_wake, nr_requeue, uaddr2, + val, opflags); +} + +#endif /* _FUTEX_H */ diff --git a/tools/perf/bench/mem-memcpy-arch.h b/tools/perf/bench/mem-memcpy-arch.h index a72e36cb539..57b4ed87145 100644 --- a/tools/perf/bench/mem-memcpy-arch.h +++ b/tools/perf/bench/mem-memcpy-arch.h @@ -1,5 +1,5 @@ -#ifdef ARCH_X86_64 +#ifdef HAVE_ARCH_X86_64_SUPPORT #define MEMCPY_FN(fn, name, desc) \ extern void *fn(void *, const void *, size_t); diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c index 8cdca43016b..5ce71d3b72c 100644 --- a/tools/perf/bench/mem-memcpy.c +++ b/tools/perf/bench/mem-memcpy.c @@ -58,7 +58,7 @@ struct routine routines[] = { { "default", "Default memcpy() provided by glibc", memcpy }, -#ifdef ARCH_X86_64 +#ifdef HAVE_ARCH_X86_64_SUPPORT #define MEMCPY_FN(fn, name, desc) { name, desc, fn }, #include "mem-memcpy-x86-64-asm-def.h" diff --git a/tools/perf/bench/mem-memset-arch.h b/tools/perf/bench/mem-memset-arch.h index a040fa77665..633800cb0dc 100644 --- a/tools/perf/bench/mem-memset-arch.h +++ b/tools/perf/bench/mem-memset-arch.h @@ -1,5 +1,5 @@ -#ifdef ARCH_X86_64 +#ifdef HAVE_ARCH_X86_64_SUPPORT #define MEMSET_FN(fn, name, desc) \ extern void *fn(void *, int, size_t); diff --git a/tools/perf/bench/mem-memset.c b/tools/perf/bench/mem-memset.c index 4a2f1208196..9af79d2b18e 100644 --- a/tools/perf/bench/mem-memset.c +++ b/tools/perf/bench/mem-memset.c @@ -58,7 +58,7 @@ static const struct routine routines[] = { { "default", "Default memset() provided by glibc", memset }, -#ifdef ARCH_X86_64 +#ifdef HAVE_ARCH_X86_64_SUPPORT #define MEMSET_FN(fn, name, desc) { name, desc, fn }, #include "mem-memset-x86-64-asm-def.h" diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index 30d1c3225b4..ebfa163b80b 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -429,14 +429,14 @@ static int parse_cpu_list(const char *arg) return 0; } -static void parse_setup_cpu_list(void) +static int parse_setup_cpu_list(void) { struct thread_data *td; char *str0, *str; int t; if (!g->p.cpu_list_str) - return; + return 0; dprintf("g->p.nr_tasks: %d\n", g->p.nr_tasks); @@ -500,8 +500,12 @@ static void parse_setup_cpu_list(void) dprintf("CPUs: %d_%d-%d#%dx%d\n", bind_cpu_0, bind_len, bind_cpu_1, step, mul); - BUG_ON(bind_cpu_0 < 0 || bind_cpu_0 >= g->p.nr_cpus); - BUG_ON(bind_cpu_1 < 0 || bind_cpu_1 >= g->p.nr_cpus); + if (bind_cpu_0 >= g->p.nr_cpus || bind_cpu_1 >= g->p.nr_cpus) { + printf("\nTest not applicable, system has only %d CPUs.\n", g->p.nr_cpus); + return -1; + } + + BUG_ON(bind_cpu_0 < 0 || bind_cpu_1 < 0); BUG_ON(bind_cpu_0 > bind_cpu_1); for (bind_cpu = bind_cpu_0; bind_cpu <= bind_cpu_1; bind_cpu += step) { @@ -541,6 +545,7 @@ out: printf("# NOTE: %d tasks bound, %d tasks unbound\n", t, g->p.nr_tasks - t); free(str0); + return 0; } static int parse_cpus_opt(const struct option *opt __maybe_unused, @@ -561,14 +566,14 @@ static int parse_node_list(const char *arg) return 0; } -static void parse_setup_node_list(void) +static int parse_setup_node_list(void) { struct thread_data *td; char *str0, *str; int t; if (!g->p.node_list_str) - return; + return 0; dprintf("g->p.nr_tasks: %d\n", g->p.nr_tasks); @@ -619,8 +624,12 @@ static void parse_setup_node_list(void) dprintf("NODEs: %d-%d #%d\n", bind_node_0, bind_node_1, step); - BUG_ON(bind_node_0 < 0 || bind_node_0 >= g->p.nr_nodes); - BUG_ON(bind_node_1 < 0 || bind_node_1 >= g->p.nr_nodes); + if (bind_node_0 >= g->p.nr_nodes || bind_node_1 >= g->p.nr_nodes) { + printf("\nTest not applicable, system has only %d nodes.\n", g->p.nr_nodes); + return -1; + } + + BUG_ON(bind_node_0 < 0 || bind_node_1 < 0); BUG_ON(bind_node_0 > bind_node_1); for (bind_node = bind_node_0; bind_node <= bind_node_1; bind_node += step) { @@ -651,6 +660,7 @@ out: printf("# NOTE: %d tasks mem-bound, %d tasks unbound\n", t, g->p.nr_tasks - t); free(str0); + return 0; } static int parse_nodes_opt(const struct option *opt __maybe_unused, @@ -1110,7 +1120,7 @@ static void *worker_thread(void *__tdata) /* Check whether our max runtime timed out: */ if (g->p.nr_secs) { timersub(&stop, &start0, &diff); - if (diff.tv_sec >= g->p.nr_secs) { + if ((u32)diff.tv_sec >= g->p.nr_secs) { g->stop_work = true; break; } @@ -1157,7 +1167,7 @@ static void *worker_thread(void *__tdata) runtime_ns_max += diff.tv_usec * 1000; if (details >= 0) { - printf(" #%2d / %2d: %14.2lf nsecs/op [val: %016lx]\n", + printf(" #%2d / %2d: %14.2lf nsecs/op [val: %016"PRIx64"]\n", process_nr, thread_nr, runtime_ns_max / bytes_done, val); } fflush(stdout); @@ -1356,8 +1366,8 @@ static int init(void) init_thread_data(); tprintf("#\n"); - parse_setup_cpu_list(); - parse_setup_node_list(); + if (parse_setup_cpu_list() || parse_setup_node_list()) + return -1; tprintf("#\n"); print_summary(); @@ -1583,6 +1593,11 @@ static void init_params(struct params *p, const char *name, int argc, const char p->data_rand_walk = true; p->nr_loops = -1; p->init_random = true; + p->mb_global_str = "1"; + p->nr_proc = 1; + p->nr_threads = 1; + p->nr_secs = 5; + p->run_all = argc == 1; } static int run_bench_numa(const char *name, const char **argv) @@ -1600,7 +1615,6 @@ static int run_bench_numa(const char *name, const char **argv) return 0; err: - usage_with_options(numa_usage, options); return -1; } @@ -1701,8 +1715,7 @@ static int bench_all(void) BUG_ON(ret < 0); for (i = 0; i < nr; i++) { - if (run_bench_numa(tests[i][0], tests[i] + 1)) - return -1; + run_bench_numa(tests[i][0], tests[i] + 1); } printf("\n"); diff --git a/tools/perf/bench/sched-pipe.c b/tools/perf/bench/sched-pipe.c index 69cfba8d4c6..07a8d7646a1 100644 --- a/tools/perf/bench/sched-pipe.c +++ b/tools/perf/bench/sched-pipe.c @@ -7,9 +7,7 @@ * Based on pipe-test-1m.c by Ingo Molnar <mingo@redhat.com> * http://people.redhat.com/mingo/cfs-scheduler/tools/pipe-test-1m.c * Ported to perf by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp> - * */ - #include "../perf.h" #include "../util/util.h" #include "../util/parse-options.h" @@ -28,12 +26,24 @@ #include <sys/time.h> #include <sys/types.h> +#include <pthread.h> + +struct thread_data { + int nr; + int pipe_read; + int pipe_write; + pthread_t pthread; +}; + #define LOOPS_DEFAULT 1000000 -static int loops = LOOPS_DEFAULT; +static int loops = LOOPS_DEFAULT; + +/* Use processes by default: */ +static bool threaded; static const struct option options[] = { - OPT_INTEGER('l', "loop", &loops, - "Specify number of loops"), + OPT_INTEGER('l', "loop", &loops, "Specify number of loops"), + OPT_BOOLEAN('T', "threaded", &threaded, "Specify threads/process based task setup"), OPT_END() }; @@ -42,13 +52,37 @@ static const char * const bench_sched_pipe_usage[] = { NULL }; -int bench_sched_pipe(int argc, const char **argv, - const char *prefix __maybe_unused) +static void *worker_thread(void *__tdata) { - int pipe_1[2], pipe_2[2]; + struct thread_data *td = __tdata; int m = 0, i; + int ret; + + for (i = 0; i < loops; i++) { + if (!td->nr) { + ret = read(td->pipe_read, &m, sizeof(int)); + BUG_ON(ret != sizeof(int)); + ret = write(td->pipe_write, &m, sizeof(int)); + BUG_ON(ret != sizeof(int)); + } else { + ret = write(td->pipe_write, &m, sizeof(int)); + BUG_ON(ret != sizeof(int)); + ret = read(td->pipe_read, &m, sizeof(int)); + BUG_ON(ret != sizeof(int)); + } + } + + return NULL; +} + +int bench_sched_pipe(int argc, const char **argv, const char *prefix __maybe_unused) +{ + struct thread_data threads[2], *td; + int pipe_1[2], pipe_2[2]; struct timeval start, stop, diff; unsigned long long result_usec = 0; + int nr_threads = 2; + int t; /* * why does "ret" exist? @@ -58,43 +92,66 @@ int bench_sched_pipe(int argc, const char **argv, int __maybe_unused ret, wait_stat; pid_t pid, retpid __maybe_unused; - argc = parse_options(argc, argv, options, - bench_sched_pipe_usage, 0); + argc = parse_options(argc, argv, options, bench_sched_pipe_usage, 0); BUG_ON(pipe(pipe_1)); BUG_ON(pipe(pipe_2)); - pid = fork(); - assert(pid >= 0); - gettimeofday(&start, NULL); - if (!pid) { - for (i = 0; i < loops; i++) { - ret = read(pipe_1[0], &m, sizeof(int)); - ret = write(pipe_2[1], &m, sizeof(int)); - } - } else { - for (i = 0; i < loops; i++) { - ret = write(pipe_1[1], &m, sizeof(int)); - ret = read(pipe_2[0], &m, sizeof(int)); + for (t = 0; t < nr_threads; t++) { + td = threads + t; + + td->nr = t; + + if (t == 0) { + td->pipe_read = pipe_1[0]; + td->pipe_write = pipe_2[1]; + } else { + td->pipe_write = pipe_1[1]; + td->pipe_read = pipe_2[0]; } } - gettimeofday(&stop, NULL); - timersub(&stop, &start, &diff); - if (pid) { + if (threaded) { + + for (t = 0; t < nr_threads; t++) { + td = threads + t; + + ret = pthread_create(&td->pthread, NULL, worker_thread, td); + BUG_ON(ret); + } + + for (t = 0; t < nr_threads; t++) { + td = threads + t; + + ret = pthread_join(td->pthread, NULL); + BUG_ON(ret); + } + + } else { + pid = fork(); + assert(pid >= 0); + + if (!pid) { + worker_thread(threads + 0); + exit(0); + } else { + worker_thread(threads + 1); + } + retpid = waitpid(pid, &wait_stat, 0); assert((retpid == pid) && WIFEXITED(wait_stat)); - } else { - exit(0); } + gettimeofday(&stop, NULL); + timersub(&stop, &start, &diff); + switch (bench_format) { case BENCH_FORMAT_DEFAULT: - printf("# Executed %d pipe operations between two tasks\n\n", - loops); + printf("# Executed %d pipe operations between two %s\n\n", + loops, threaded ? "threads" : "processes"); result_usec = diff.tv_sec * 1000000; result_usec += diff.tv_usec; |
