diff options
Diffstat (limited to 'tools/perf/bench/mem-memcpy.c')
| -rw-r--r-- | tools/perf/bench/mem-memcpy.c | 254 |
1 files changed, 183 insertions, 71 deletions
diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c index 89773178e89..5ce71d3b72c 100644 --- a/tools/perf/bench/mem-memcpy.c +++ b/tools/perf/bench/mem-memcpy.c @@ -5,14 +5,13 @@ * * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp> */ -#include <ctype.h> #include "../perf.h" #include "../util/util.h" #include "../util/parse-options.h" -#include "../util/string.h" #include "../util/header.h" #include "bench.h" +#include "mem-memcpy-arch.h" #include <stdio.h> #include <stdlib.h> @@ -24,30 +23,49 @@ static const char *length_str = "1MB"; static const char *routine = "default"; -static int use_clock = 0; -static int clock_fd; +static int iterations = 1; +static bool use_cycle; +static int cycle_fd; +static bool only_prefault; +static bool no_prefault; static const struct option options[] = { OPT_STRING('l', "length", &length_str, "1MB", "Specify length of memory to copy. " - "available unit: B, MB, GB (upper and lower)"), + "Available units: B, KB, MB, GB and TB (upper and lower)"), OPT_STRING('r', "routine", &routine, "default", "Specify routine to copy"), - OPT_BOOLEAN('c', "clock", &use_clock, - "Use CPU clock for measuring"), + OPT_INTEGER('i', "iterations", &iterations, + "repeat memcpy() invocation this number of times"), + OPT_BOOLEAN('c', "cycle", &use_cycle, + "Use cycles event instead of gettimeofday() for measuring"), + OPT_BOOLEAN('o', "only-prefault", &only_prefault, + "Show only the result with page faults before memcpy()"), + OPT_BOOLEAN('n', "no-prefault", &no_prefault, + "Show only the result without page faults before memcpy()"), OPT_END() }; +typedef void *(*memcpy_t)(void *, const void *, size_t); + struct routine { const char *name; const char *desc; - void * (*fn)(void *dst, const void *src, size_t len); + memcpy_t fn; }; struct routine routines[] = { { "default", "Default memcpy() provided by glibc", memcpy }, +#ifdef HAVE_ARCH_X86_64_SUPPORT + +#define MEMCPY_FN(fn, name, desc) { name, desc, fn }, +#include "mem-memcpy-x86-64-asm-def.h" +#undef MEMCPY_FN + +#endif + { NULL, NULL, NULL } @@ -58,27 +76,27 @@ static const char * const bench_mem_memcpy_usage[] = { NULL }; -static struct perf_event_attr clock_attr = { +static struct perf_event_attr cycle_attr = { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES }; -static void init_clock(void) +static void init_cycle(void) { - clock_fd = sys_perf_event_open(&clock_attr, getpid(), -1, -1, 0); + cycle_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, 0); - if (clock_fd < 0 && errno == ENOSYS) + if (cycle_fd < 0 && errno == ENOSYS) die("No CONFIG_PERF_EVENTS=y kernel support configured?\n"); else - BUG_ON(clock_fd < 0); + BUG_ON(cycle_fd < 0); } -static u64 get_clock(void) +static u64 get_cycle(void) { int ret; u64 clk; - ret = read(clock_fd, &clk, sizeof(u64)); + ret = read(cycle_fd, &clk, sizeof(u64)); BUG_ON(ret != sizeof(u64)); return clk; @@ -90,29 +108,104 @@ static double timeval2double(struct timeval *ts) (double)ts->tv_usec / (double)1000000; } -int bench_mem_memcpy(int argc, const char **argv, - const char *prefix __used) +static void alloc_mem(void **dst, void **src, size_t length) +{ + *dst = zalloc(length); + if (!*dst) + die("memory allocation failed - maybe length is too large?\n"); + + *src = zalloc(length); + if (!*src) + die("memory allocation failed - maybe length is too large?\n"); + /* Make sure to always replace the zero pages even if MMAP_THRESH is crossed */ + memset(*src, 0, length); +} + +static u64 do_memcpy_cycle(memcpy_t fn, size_t len, bool prefault) { + u64 cycle_start = 0ULL, cycle_end = 0ULL; + void *src = NULL, *dst = NULL; int i; - void *dst, *src; - size_t length; - double bps = 0.0; + + alloc_mem(&src, &dst, len); + + if (prefault) + fn(dst, src, len); + + cycle_start = get_cycle(); + for (i = 0; i < iterations; ++i) + fn(dst, src, len); + cycle_end = get_cycle(); + + free(src); + free(dst); + return cycle_end - cycle_start; +} + +static double do_memcpy_gettimeofday(memcpy_t fn, size_t len, bool prefault) +{ struct timeval tv_start, tv_end, tv_diff; - u64 clock_start, clock_end, clock_diff; + void *src = NULL, *dst = NULL; + int i; + + alloc_mem(&src, &dst, len); + + if (prefault) + fn(dst, src, len); + + BUG_ON(gettimeofday(&tv_start, NULL)); + for (i = 0; i < iterations; ++i) + fn(dst, src, len); + BUG_ON(gettimeofday(&tv_end, NULL)); + + timersub(&tv_end, &tv_start, &tv_diff); + + free(src); + free(dst); + return (double)((double)len / timeval2double(&tv_diff)); +} + +#define pf (no_prefault ? 0 : 1) + +#define print_bps(x) do { \ + if (x < K) \ + printf(" %14lf B/Sec", x); \ + else if (x < K * K) \ + printf(" %14lfd KB/Sec", x / K); \ + else if (x < K * K * K) \ + printf(" %14lf MB/Sec", x / K / K); \ + else \ + printf(" %14lf GB/Sec", x / K / K / K); \ + } while (0) + +int bench_mem_memcpy(int argc, const char **argv, + const char *prefix __maybe_unused) +{ + int i; + size_t len; + double result_bps[2]; + u64 result_cycle[2]; - clock_start = clock_end = clock_diff = 0ULL; argc = parse_options(argc, argv, options, bench_mem_memcpy_usage, 0); - tv_diff.tv_sec = 0; - tv_diff.tv_usec = 0; - length = (size_t)perf_atoll((char *)length_str); + if (use_cycle) + init_cycle(); - if ((s64)length <= 0) { + len = (size_t)perf_atoll((char *)length_str); + + result_cycle[0] = result_cycle[1] = 0ULL; + result_bps[0] = result_bps[1] = 0.0; + + if ((s64)len <= 0) { fprintf(stderr, "Invalid length:%s\n", length_str); return 1; } + /* same to without specifying either of prefault and no-prefault */ + if (only_prefault && no_prefault) + only_prefault = no_prefault = false; + for (i = 0; routines[i].name; i++) { if (!strcmp(routines[i].name, routine)) break; @@ -127,61 +220,80 @@ int bench_mem_memcpy(int argc, const char **argv, return 1; } - dst = zalloc(length); - if (!dst) - die("memory allocation failed - maybe length is too large?\n"); - - src = zalloc(length); - if (!src) - die("memory allocation failed - maybe length is too large?\n"); - - if (bench_format == BENCH_FORMAT_DEFAULT) { - printf("# Copying %s Bytes from %p to %p ...\n\n", - length_str, src, dst); - } - - if (use_clock) { - init_clock(); - clock_start = get_clock(); - } else { - BUG_ON(gettimeofday(&tv_start, NULL)); - } + if (bench_format == BENCH_FORMAT_DEFAULT) + printf("# Copying %s Bytes ...\n\n", length_str); - routines[i].fn(dst, src, length); - - if (use_clock) { - clock_end = get_clock(); - clock_diff = clock_end - clock_start; + if (!only_prefault && !no_prefault) { + /* show both of results */ + if (use_cycle) { + result_cycle[0] = + do_memcpy_cycle(routines[i].fn, len, false); + result_cycle[1] = + do_memcpy_cycle(routines[i].fn, len, true); + } else { + result_bps[0] = + do_memcpy_gettimeofday(routines[i].fn, + len, false); + result_bps[1] = + do_memcpy_gettimeofday(routines[i].fn, + len, true); + } } else { - BUG_ON(gettimeofday(&tv_end, NULL)); - timersub(&tv_end, &tv_start, &tv_diff); - bps = (double)((double)length / timeval2double(&tv_diff)); + if (use_cycle) { + result_cycle[pf] = + do_memcpy_cycle(routines[i].fn, + len, only_prefault); + } else { + result_bps[pf] = + do_memcpy_gettimeofday(routines[i].fn, + len, only_prefault); + } } switch (bench_format) { case BENCH_FORMAT_DEFAULT: - if (use_clock) { - printf(" %14lf Clock/Byte\n", - (double)clock_diff / (double)length); - } else { - if (bps < K) - printf(" %14lf B/Sec\n", bps); - else if (bps < K * K) - printf(" %14lfd KB/Sec\n", bps / 1024); - else if (bps < K * K * K) - printf(" %14lf MB/Sec\n", bps / 1024 / 1024); - else { - printf(" %14lf GB/Sec\n", - bps / 1024 / 1024 / 1024); + if (!only_prefault && !no_prefault) { + if (use_cycle) { + printf(" %14lf Cycle/Byte\n", + (double)result_cycle[0] + / (double)len); + printf(" %14lf Cycle/Byte (with prefault)\n", + (double)result_cycle[1] + / (double)len); + } else { + print_bps(result_bps[0]); + printf("\n"); + print_bps(result_bps[1]); + printf(" (with prefault)\n"); } + } else { + if (use_cycle) { + printf(" %14lf Cycle/Byte", + (double)result_cycle[pf] + / (double)len); + } else + print_bps(result_bps[pf]); + + printf("%s\n", only_prefault ? " (with prefault)" : ""); } break; case BENCH_FORMAT_SIMPLE: - if (use_clock) { - printf("%14lf\n", - (double)clock_diff / (double)length); - } else - printf("%lf\n", bps); + if (!only_prefault && !no_prefault) { + if (use_cycle) { + printf("%lf %lf\n", + (double)result_cycle[0] / (double)len, + (double)result_cycle[1] / (double)len); + } else { + printf("%lf %lf\n", + result_bps[0], result_bps[1]); + } + } else { + if (use_cycle) { + printf("%lf\n", (double)result_cycle[pf] + / (double)len); + } else + printf("%lf\n", result_bps[pf]); + } break; default: /* reaching this means there's some disaster: */ |
