diff options
Diffstat (limited to 'tools/perf/bench/mem-memcpy.c')
| -rw-r--r-- | tools/perf/bench/mem-memcpy.c | 253 | 
1 files changed, 183 insertions, 70 deletions
diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c index 38dae746514..5ce71d3b72c 100644 --- a/tools/perf/bench/mem-memcpy.c +++ b/tools/perf/bench/mem-memcpy.c @@ -5,13 +5,13 @@   *   * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>   */ -#include <ctype.h>  #include "../perf.h"  #include "../util/util.h"  #include "../util/parse-options.h"  #include "../util/header.h"  #include "bench.h" +#include "mem-memcpy-arch.h"  #include <stdio.h>  #include <stdlib.h> @@ -23,30 +23,49 @@  static const char	*length_str	= "1MB";  static const char	*routine	= "default"; -static bool		use_clock	= false; -static int		clock_fd; +static int		iterations	= 1; +static bool		use_cycle; +static int		cycle_fd; +static bool		only_prefault; +static bool		no_prefault;  static const struct option options[] = {  	OPT_STRING('l', "length", &length_str, "1MB",  		    "Specify length of memory to copy. " -		    "available unit: B, MB, GB (upper and lower)"), +		    "Available units: B, KB, MB, GB and TB (upper and lower)"),  	OPT_STRING('r', "routine", &routine, "default",  		    "Specify routine to copy"), -	OPT_BOOLEAN('c', "clock", &use_clock, -		    "Use CPU clock for measuring"), +	OPT_INTEGER('i', "iterations", &iterations, +		    "repeat memcpy() invocation this number of times"), +	OPT_BOOLEAN('c', "cycle", &use_cycle, +		    "Use cycles event instead of gettimeofday() for measuring"), +	OPT_BOOLEAN('o', "only-prefault", &only_prefault, +		    "Show only the result with page faults before memcpy()"), +	OPT_BOOLEAN('n', "no-prefault", &no_prefault, +		    "Show only the result without page faults before memcpy()"),  	OPT_END()  }; +typedef void *(*memcpy_t)(void *, const void *, size_t); +  struct routine {  	const char *name;  	const char *desc; -	void * (*fn)(void *dst, const void *src, size_t len); +	memcpy_t fn;  };  struct routine routines[] = {  	{ "default",  	  "Default memcpy() provided by glibc",  	  memcpy }, +#ifdef HAVE_ARCH_X86_64_SUPPORT + +#define MEMCPY_FN(fn, name, desc) { name, desc, fn }, +#include "mem-memcpy-x86-64-asm-def.h" +#undef MEMCPY_FN + +#endif +  	{ NULL,  	  NULL,  	  NULL   } @@ -57,27 +76,27 @@ static const char * const bench_mem_memcpy_usage[] = {  	NULL  }; -static struct perf_event_attr clock_attr = { +static struct perf_event_attr cycle_attr = {  	.type		= PERF_TYPE_HARDWARE,  	.config		= PERF_COUNT_HW_CPU_CYCLES  }; -static void init_clock(void) +static void init_cycle(void)  { -	clock_fd = sys_perf_event_open(&clock_attr, getpid(), -1, -1, 0); +	cycle_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, 0); -	if (clock_fd < 0 && errno == ENOSYS) +	if (cycle_fd < 0 && errno == ENOSYS)  		die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");  	else -		BUG_ON(clock_fd < 0); +		BUG_ON(cycle_fd < 0);  } -static u64 get_clock(void) +static u64 get_cycle(void)  {  	int ret;  	u64 clk; -	ret = read(clock_fd, &clk, sizeof(u64)); +	ret = read(cycle_fd, &clk, sizeof(u64));  	BUG_ON(ret != sizeof(u64));  	return clk; @@ -89,29 +108,104 @@ static double timeval2double(struct timeval *ts)  		(double)ts->tv_usec / (double)1000000;  } -int bench_mem_memcpy(int argc, const char **argv, -		     const char *prefix __used) +static void alloc_mem(void **dst, void **src, size_t length) +{ +	*dst = zalloc(length); +	if (!*dst) +		die("memory allocation failed - maybe length is too large?\n"); + +	*src = zalloc(length); +	if (!*src) +		die("memory allocation failed - maybe length is too large?\n"); +	/* Make sure to always replace the zero pages even if MMAP_THRESH is crossed */ +	memset(*src, 0, length); +} + +static u64 do_memcpy_cycle(memcpy_t fn, size_t len, bool prefault)  { +	u64 cycle_start = 0ULL, cycle_end = 0ULL; +	void *src = NULL, *dst = NULL;  	int i; -	void *dst, *src; -	size_t length; -	double bps = 0.0; + +	alloc_mem(&src, &dst, len); + +	if (prefault) +		fn(dst, src, len); + +	cycle_start = get_cycle(); +	for (i = 0; i < iterations; ++i) +		fn(dst, src, len); +	cycle_end = get_cycle(); + +	free(src); +	free(dst); +	return cycle_end - cycle_start; +} + +static double do_memcpy_gettimeofday(memcpy_t fn, size_t len, bool prefault) +{  	struct timeval tv_start, tv_end, tv_diff; -	u64 clock_start, clock_end, clock_diff; +	void *src = NULL, *dst = NULL; +	int i; + +	alloc_mem(&src, &dst, len); + +	if (prefault) +		fn(dst, src, len); + +	BUG_ON(gettimeofday(&tv_start, NULL)); +	for (i = 0; i < iterations; ++i) +		fn(dst, src, len); +	BUG_ON(gettimeofday(&tv_end, NULL)); + +	timersub(&tv_end, &tv_start, &tv_diff); + +	free(src); +	free(dst); +	return (double)((double)len / timeval2double(&tv_diff)); +} + +#define pf (no_prefault ? 0 : 1) + +#define print_bps(x) do {					\ +		if (x < K)					\ +			printf(" %14lf B/Sec", x);		\ +		else if (x < K * K)				\ +			printf(" %14lfd KB/Sec", x / K);	\ +		else if (x < K * K * K)				\ +			printf(" %14lf MB/Sec", x / K / K);	\ +		else						\ +			printf(" %14lf GB/Sec", x / K / K / K); \ +	} while (0) + +int bench_mem_memcpy(int argc, const char **argv, +		     const char *prefix __maybe_unused) +{ +	int i; +	size_t len; +	double result_bps[2]; +	u64 result_cycle[2]; -	clock_start = clock_end = clock_diff = 0ULL;  	argc = parse_options(argc, argv, options,  			     bench_mem_memcpy_usage, 0); -	tv_diff.tv_sec = 0; -	tv_diff.tv_usec = 0; -	length = (size_t)perf_atoll((char *)length_str); +	if (use_cycle) +		init_cycle(); -	if ((s64)length <= 0) { +	len = (size_t)perf_atoll((char *)length_str); + +	result_cycle[0] = result_cycle[1] = 0ULL; +	result_bps[0] = result_bps[1] = 0.0; + +	if ((s64)len <= 0) {  		fprintf(stderr, "Invalid length:%s\n", length_str);  		return 1;  	} +	/* same to without specifying either of prefault and no-prefault */ +	if (only_prefault && no_prefault) +		only_prefault = no_prefault = false; +  	for (i = 0; routines[i].name; i++) {  		if (!strcmp(routines[i].name, routine))  			break; @@ -126,61 +220,80 @@ int bench_mem_memcpy(int argc, const char **argv,  		return 1;  	} -	dst = zalloc(length); -	if (!dst) -		die("memory allocation failed - maybe length is too large?\n"); - -	src = zalloc(length); -	if (!src) -		die("memory allocation failed - maybe length is too large?\n"); - -	if (bench_format == BENCH_FORMAT_DEFAULT) { -		printf("# Copying %s Bytes from %p to %p ...\n\n", -		       length_str, src, dst); -	} - -	if (use_clock) { -		init_clock(); -		clock_start = get_clock(); -	} else { -		BUG_ON(gettimeofday(&tv_start, NULL)); -	} +	if (bench_format == BENCH_FORMAT_DEFAULT) +		printf("# Copying %s Bytes ...\n\n", length_str); -	routines[i].fn(dst, src, length); - -	if (use_clock) { -		clock_end = get_clock(); -		clock_diff = clock_end - clock_start; +	if (!only_prefault && !no_prefault) { +		/* show both of results */ +		if (use_cycle) { +			result_cycle[0] = +				do_memcpy_cycle(routines[i].fn, len, false); +			result_cycle[1] = +				do_memcpy_cycle(routines[i].fn, len, true); +		} else { +			result_bps[0] = +				do_memcpy_gettimeofday(routines[i].fn, +						len, false); +			result_bps[1] = +				do_memcpy_gettimeofday(routines[i].fn, +						len, true); +		}  	} else { -		BUG_ON(gettimeofday(&tv_end, NULL)); -		timersub(&tv_end, &tv_start, &tv_diff); -		bps = (double)((double)length / timeval2double(&tv_diff)); +		if (use_cycle) { +			result_cycle[pf] = +				do_memcpy_cycle(routines[i].fn, +						len, only_prefault); +		} else { +			result_bps[pf] = +				do_memcpy_gettimeofday(routines[i].fn, +						len, only_prefault); +		}  	}  	switch (bench_format) {  	case BENCH_FORMAT_DEFAULT: -		if (use_clock) { -			printf(" %14lf Clock/Byte\n", -			       (double)clock_diff / (double)length); -		} else { -			if (bps < K) -				printf(" %14lf B/Sec\n", bps); -			else if (bps < K * K) -				printf(" %14lfd KB/Sec\n", bps / 1024); -			else if (bps < K * K * K) -				printf(" %14lf MB/Sec\n", bps / 1024 / 1024); -			else { -				printf(" %14lf GB/Sec\n", -				       bps / 1024 / 1024 / 1024); +		if (!only_prefault && !no_prefault) { +			if (use_cycle) { +				printf(" %14lf Cycle/Byte\n", +					(double)result_cycle[0] +					/ (double)len); +				printf(" %14lf Cycle/Byte (with prefault)\n", +					(double)result_cycle[1] +					/ (double)len); +			} else { +				print_bps(result_bps[0]); +				printf("\n"); +				print_bps(result_bps[1]); +				printf(" (with prefault)\n");  			} +		} else { +			if (use_cycle) { +				printf(" %14lf Cycle/Byte", +					(double)result_cycle[pf] +					/ (double)len); +			} else +				print_bps(result_bps[pf]); + +			printf("%s\n", only_prefault ? " (with prefault)" : "");  		}  		break;  	case BENCH_FORMAT_SIMPLE: -		if (use_clock) { -			printf("%14lf\n", -			       (double)clock_diff / (double)length); -		} else -			printf("%lf\n", bps); +		if (!only_prefault && !no_prefault) { +			if (use_cycle) { +				printf("%lf %lf\n", +					(double)result_cycle[0] / (double)len, +					(double)result_cycle[1] / (double)len); +			} else { +				printf("%lf %lf\n", +					result_bps[0], result_bps[1]); +			} +		} else { +			if (use_cycle) { +				printf("%lf\n", (double)result_cycle[pf] +					/ (double)len); +			} else +				printf("%lf\n", result_bps[pf]); +		}  		break;  	default:  		/* reaching this means there's some disaster: */  | 
