diff options
Diffstat (limited to 'tools/power/x86')
| -rw-r--r-- | tools/power/x86/turbostat/.gitignore | 1 | ||||
| -rw-r--r-- | tools/power/x86/turbostat/Makefile | 22 | ||||
| -rw-r--r-- | tools/power/x86/turbostat/turbostat.8 | 221 | ||||
| -rw-r--r-- | tools/power/x86/turbostat/turbostat.c | 2462 | ||||
| -rw-r--r-- | tools/power/x86/x86_energy_perf_policy/Makefile | 10 | ||||
| -rw-r--r-- | tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8 | 104 | ||||
| -rw-r--r-- | tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c | 324 | 
7 files changed, 3144 insertions, 0 deletions
diff --git a/tools/power/x86/turbostat/.gitignore b/tools/power/x86/turbostat/.gitignore new file mode 100644 index 00000000000..7521370d356 --- /dev/null +++ b/tools/power/x86/turbostat/.gitignore @@ -0,0 +1 @@ +turbostat diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile new file mode 100644 index 00000000000..d1b3a361e52 --- /dev/null +++ b/tools/power/x86/turbostat/Makefile @@ -0,0 +1,22 @@ +CC		= $(CROSS_COMPILE)gcc +BUILD_OUTPUT	:= $(PWD) +PREFIX		:= /usr +DESTDIR		:= + +turbostat : turbostat.c +CFLAGS +=	-Wall +CFLAGS +=	-DMSRHEADER='"../../../../arch/x86/include/uapi/asm/msr-index.h"' + +%: %.c +	@mkdir -p $(BUILD_OUTPUT) +	$(CC) $(CFLAGS) $< -o $(BUILD_OUTPUT)/$@ + +.PHONY : clean +clean : +	@rm -f $(BUILD_OUTPUT)/turbostat + +install : turbostat +	install -d  $(DESTDIR)$(PREFIX)/bin +	install $(BUILD_OUTPUT)/turbostat $(DESTDIR)$(PREFIX)/bin/turbostat +	install -d  $(DESTDIR)$(PREFIX)/share/man/man8 +	install turbostat.8 $(DESTDIR)$(PREFIX)/share/man/man8 diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 new file mode 100644 index 00000000000..56bfb523c5b --- /dev/null +++ b/tools/power/x86/turbostat/turbostat.8 @@ -0,0 +1,221 @@ +.TH TURBOSTAT 8 +.SH NAME +turbostat \- Report processor frequency and idle statistics +.SH SYNOPSIS +.ft B +.B turbostat +.RB [ Options ] +.RB command +.br +.B turbostat +.RB [ Options ] +.RB [ "\-i interval_sec" ] +.SH DESCRIPTION +\fBturbostat \fP reports processor topology, frequency, +idle power-state statistics, temperature and power on modern X86 processors. +Either \fBcommand\fP is forked and statistics are printed +upon its completion, or statistics are printed periodically. + +\fBturbostat \fP +must be run on root, and +minimally requires that the processor +supports an "invariant" TSC, plus the APERF and MPERF MSRs. +Additional information is reported depending on hardware counter support. + +.SS Options +The \fB-p\fP option limits output to the 1st thread in 1st core of each package. +.PP +The \fB-P\fP option limits output to the 1st thread in each Package. +.PP +The \fB-S\fP option limits output to a 1-line System Summary for each interval. +.PP +The \fB-v\fP option increases verbosity. +.PP +The \fB-c MSR#\fP option includes the delta of the specified 32-bit MSR counter. +.PP +The \fB-C MSR#\fP option includes the delta of the specified 64-bit MSR counter. +.PP +The \fB-m MSR#\fP option includes the the specified 32-bit MSR value. +.PP +The \fB-M MSR#\fP option includes the the specified 64-bit MSR value. +.PP +The \fB-i interval_sec\fP option prints statistics every \fiinterval_sec\fP seconds. +The default is 5 seconds. +.PP +The \fBcommand\fP parameter forks \fBcommand\fP and upon its exit, +displays the statistics gathered since it was forked. +.PP +.SH FIELD DESCRIPTIONS +.nf +\fBPackage\fP processor package number. +\fBCore\fP processor core number. +\fBCPU\fP Linux CPU (logical processor) number. +Note that multiple CPUs per core indicate support for Intel(R) Hyper-Threading Technology. +\fBAVG_MHz\fP number of cycles executed divided by time elapsed. +\fB%Buzy\fP percent of the interval that the CPU retired instructions, aka. % of time in "C0" state. +\fBBzy_MHz\fP average clock rate while the CPU was busy (in "c0" state). +\fBTSC_MHz\fP average MHz that the TSC ran during the entire interval. +\fBCPU%c1, CPU%c3, CPU%c6, CPU%c7\fP show the percentage residency in hardware core idle states. +\fBCoreTmp\fP Degrees Celsius reported by the per-core Digital Thermal Sensor. +\fBPkgTtmp\fP Degrees Celsius reported by the per-package Package Thermal Monitor. +\fBPkg%pc2, Pkg%pc3, Pkg%pc6, Pkg%pc7\fP percentage residency in hardware package idle states. +\fBPkgWatt\fP Watts consumed by the whole package. +\fBCorWatt\fP Watts consumed by the core part of the package. +\fBGFXWatt\fP Watts consumed by the Graphics part of the package -- available only on client processors. +\fBRAMWatt\fP Watts consumed by the DRAM DIMMS -- available only on server processors. +\fBPKG_%\fP percent of the interval that RAPL throttling was active on the Package. +\fBRAM_%\fP percent of the interval that RAPL throttling was active on DRAM. +.fi +.PP +.SH EXAMPLE +Without any parameters, turbostat prints out counters ever 5 seconds. +(override interval with "-i sec" option, or specify a command +for turbostat to fork). + +The first row of statistics is a summary for the entire system. +For residency % columns, the summary is a weighted average. +For Temperature columns, the summary is the column maximum. +For Watts columns, the summary is a system total. +Subsequent rows show per-CPU statistics. + +.nf +[root@ivy]# ./turbostat +    Core     CPU Avg_MHz   %Busy Bzy_MHz TSC_MHz     SMI  CPU%c1  CPU%c3  CPU%c6  CPU%c7 CoreTmp  PkgTmp Pkg%pc2 Pkg%pc3 Pkg%pc6 Pkg%pc7 PkgWatt CorWatt GFXWatt  +       -       -       6    0.36    1596    3492       0    0.59    0.01   99.04    0.00      23      24   23.82    0.01   72.47    0.00    6.40    1.01    0.00 +       0       0       9    0.58    1596    3492       0    0.28    0.01   99.13    0.00      23      24   23.82    0.01   72.47    0.00    6.40    1.01    0.00 +       0       4       1    0.07    1596    3492       0    0.79 +       1       1      10    0.65    1596    3492       0    0.59    0.00   98.76    0.00      23 +       1       5       5    0.28    1596    3492       0    0.95 +       2       2      10    0.66    1596    3492       0    0.41    0.01   98.92    0.00      23 +       2       6       2    0.10    1597    3492       0    0.97 +       3       3       3    0.20    1596    3492       0    0.44    0.00   99.37    0.00      23 +       3       7       5    0.31    1596    3492       0    0.33 +.fi +.SH VERBOSE EXAMPLE +The "-v" option adds verbosity to the output: + +.nf +[root@ivy]# turbostat -v +turbostat v3.0 November 23, 2012 - Len Brown <lenb@kernel.org> +CPUID(0): GenuineIntel 13 CPUID levels; family:model:stepping 0x6:3a:9 (6:58:9) +CPUID(6): APERF, DTS, PTM, EPB +RAPL: 851 sec. Joule Counter Range +cpu0: MSR_NHM_PLATFORM_INFO: 0x81010f0012300 +16 * 100 = 1600 MHz max efficiency +35 * 100 = 3500 MHz TSC frequency +cpu0: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x1e008402 (UNdemote-C3, UNdemote-C1, demote-C3, demote-C1, locked: pkg-cstate-limit=2: pc6-noret) +cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x25262727 +37 * 100 = 3700 MHz max turbo 4 active cores +38 * 100 = 3800 MHz max turbo 3 active cores +39 * 100 = 3900 MHz max turbo 2 active cores +39 * 100 = 3900 MHz max turbo 1 active cores +cpu0: MSR_IA32_ENERGY_PERF_BIAS: 0x00000006 (balanced) +cpu0: MSR_RAPL_POWER_UNIT: 0x000a1003 (0.125000 Watts, 0.000015 Joules, 0.000977 sec.) +cpu0: MSR_PKG_POWER_INFO: 0x01e00268 (77 W TDP, RAPL 60 - 0 W, 0.000000 sec.) +cpu0: MSR_PKG_POWER_LIMIT: 0x830000148268 (UNlocked) +cpu0: PKG Limit #1: ENabled (77.000000 Watts, 1.000000 sec, clamp DISabled) +cpu0: PKG Limit #2: ENabled (96.000000 Watts, 0.000977* sec, clamp DISabled) +cpu0: MSR_PP0_POLICY: 0 +cpu0: MSR_PP0_POWER_LIMIT: 0x00000000 (UNlocked) +cpu0: Cores Limit: DISabled (0.000000 Watts, 0.000977 sec, clamp DISabled) +cpu0: MSR_PP1_POLICY: 0 +cpu0: MSR_PP1_POWER_LIMIT: 0x00000000 (UNlocked) +cpu0: GFX Limit: DISabled (0.000000 Watts, 0.000977 sec, clamp DISabled) +cpu0: MSR_IA32_TEMPERATURE_TARGET: 0x00691400 (105 C) +cpu0: MSR_IA32_PACKAGE_THERM_STATUS: 0x884e0000 (27 C) +cpu0: MSR_IA32_THERM_STATUS: 0x88560000 (19 C +/- 1) +cpu1: MSR_IA32_THERM_STATUS: 0x88560000 (19 C +/- 1) +cpu2: MSR_IA32_THERM_STATUS: 0x88540000 (21 C +/- 1) +cpu3: MSR_IA32_THERM_STATUS: 0x884e0000 (27 C +/- 1) + ... +.fi +The \fBmax efficiency\fP frequency, a.k.a. Low Frequency Mode, is the frequency +available at the minimum package voltage.  The \fBTSC frequency\fP is the nominal +maximum frequency of the processor if turbo-mode were not available.  This frequency +should be sustainable on all CPUs indefinitely, given nominal power and cooling. +The remaining rows show what maximum turbo frequency is possible +depending on the number of idle cores.  Note that this information is +not available on all processors. +.SH FORK EXAMPLE +If turbostat is invoked with a command, it will fork that command +and output the statistics gathered when the command exits. +eg. Here a cycle soaker is run on 1 CPU (see %c0) for a few seconds +until ^C while the other CPUs are mostly idle: + +.nf +root@ivy: turbostat cat /dev/zero > /dev/null +^C +    Core     CPU Avg_MHz   %Busy Bzy_MHz TSC_MHz     SMI  CPU%c1  CPU%c3  CPU%c6  CPU%c7 CoreTmp  PkgTmp Pkg%pc2 Pkg%pc3 Pkg%pc6 Pkg%pc7 PkgWatt CorWatt GFXWatt  +       -       -     496   12.75    3886    3492       0   13.16    0.04   74.04    0.00      36      36    0.00    0.00    0.00    0.00   23.15   17.65    0.00 +       0       0      22    0.57    3830    3492       0    0.83    0.02   98.59    0.00      27      36    0.00    0.00    0.00    0.00   23.15   17.65    0.00 +       0       4       9    0.24    3829    3492       0    1.15 +       1       1       4    0.09    3783    3492       0   99.91    0.00    0.00    0.00      36 +       1       5    3880   99.82    3888    3492       0    0.18 +       2       2      17    0.44    3813    3492       0    0.77    0.04   98.75    0.00      28 +       2       6      12    0.32    3823    3492       0    0.89 +       3       3      16    0.43    3844    3492       0    0.63    0.11   98.84    0.00      30 +       3       7       4    0.11    3827    3492       0    0.94 +30.372243 sec + +.fi +Above the cycle soaker drives cpu5 up its 3.8 GHz turbo limit +while the other processors are generally in various states of idle. + +Note that cpu1 and cpu5 are HT siblings within core1. +As cpu5 is very busy, it prevents its sibling, cpu1, +from entering a c-state deeper than c1. + +Note that the Avg_MHz column reflects the total number of cycles executed +divided by the measurement interval.  If the %Busy column is 100%, +then the processor was running at that speed the entire interval. +The Avg_MHz multiplied by the %Busy results in the Bzy_MHz -- +which is the average frequency while the processor was executing -- +not including any non-busy idle time. + +.SH NOTES + +.B "turbostat " +must be run as root. + +.B "turbostat " +reads hardware counters, but doesn't write them. +So it will not interfere with the OS or other programs, including +multiple invocations of itself. + +\fBturbostat \fP +may work poorly on Linux-2.6.20 through 2.6.29, +as \fBacpi-cpufreq \fPperiodically cleared the APERF and MPERF +in those kernels. + +If the TSC column does not make sense, then +the other numbers will also make no sense. +Turbostat is lightweight, and its data collection is not atomic. +These issues are usually caused by an extremely short measurement +interval (much less than 1 second), or system activity that prevents +turbostat from being able to run on all CPUS to quickly collect data. + +The APERF, MPERF MSRs are defined to count non-halted cycles. +Although it is not guaranteed by the architecture, turbostat assumes +that they count at TSC rate, which is true on all processors tested to date. + +.SH REFERENCES +"Intel® Turbo Boost Technology +in Intel® Core™ Microarchitecture (Nehalem) Based Processors" +http://download.intel.com/design/processor/applnots/320354.pdf + +"Intel® 64 and IA-32 Architectures Software Developer's Manual +Volume 3B: System Programming Guide" +http://www.intel.com/products/processor/manuals/ + +.SH FILES +.ta +.nf +/dev/cpu/*/msr +.fi + +.SH "SEE ALSO" +msr(4), vmstat(8) +.PP +.SH AUTHOR +.nf +Written by Len Brown <len.brown@intel.com> diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c new file mode 100644 index 00000000000..d0396af99fa --- /dev/null +++ b/tools/power/x86/turbostat/turbostat.c @@ -0,0 +1,2462 @@ +/* + * turbostat -- show CPU frequency and C-state residency + * on modern Intel turbo-capable processors. + * + * Copyright (c) 2013 Intel Corporation. + * Len Brown <len.brown@intel.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#define _GNU_SOURCE +#include MSRHEADER +#include <stdarg.h> +#include <stdio.h> +#include <err.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <sys/stat.h> +#include <sys/resource.h> +#include <fcntl.h> +#include <signal.h> +#include <sys/time.h> +#include <stdlib.h> +#include <dirent.h> +#include <string.h> +#include <ctype.h> +#include <sched.h> +#include <cpuid.h> + +char *proc_stat = "/proc/stat"; +unsigned int interval_sec = 5;	/* set with -i interval_sec */ +unsigned int verbose;		/* set with -v */ +unsigned int rapl_verbose;	/* set with -R */ +unsigned int rapl_joules;	/* set with -J */ +unsigned int thermal_verbose;	/* set with -T */ +unsigned int summary_only;	/* set with -S */ +unsigned int dump_only;		/* set with -s */ +unsigned int skip_c0; +unsigned int skip_c1; +unsigned int do_nhm_cstates; +unsigned int do_snb_cstates; +unsigned int do_c8_c9_c10; +unsigned int do_slm_cstates; +unsigned int use_c1_residency_msr; +unsigned int has_aperf; +unsigned int has_epb; +unsigned int units = 1000000;	/* MHz etc */ +unsigned int genuine_intel; +unsigned int has_invariant_tsc; +unsigned int do_nehalem_platform_info; +unsigned int do_nehalem_turbo_ratio_limit; +unsigned int do_ivt_turbo_ratio_limit; +unsigned int extra_msr_offset32; +unsigned int extra_msr_offset64; +unsigned int extra_delta_offset32; +unsigned int extra_delta_offset64; +int do_smi; +double bclk; +unsigned int show_pkg; +unsigned int show_core; +unsigned int show_cpu; +unsigned int show_pkg_only; +unsigned int show_core_only; +char *output_buffer, *outp; +unsigned int do_rapl; +unsigned int do_dts; +unsigned int do_ptm; +unsigned int tcc_activation_temp; +unsigned int tcc_activation_temp_override; +double rapl_power_units, rapl_energy_units, rapl_time_units; +double rapl_joule_counter_range; + +#define RAPL_PKG		(1 << 0) +					/* 0x610 MSR_PKG_POWER_LIMIT */ +					/* 0x611 MSR_PKG_ENERGY_STATUS */ +#define RAPL_PKG_PERF_STATUS	(1 << 1) +					/* 0x613 MSR_PKG_PERF_STATUS */ +#define RAPL_PKG_POWER_INFO	(1 << 2) +					/* 0x614 MSR_PKG_POWER_INFO */ + +#define RAPL_DRAM		(1 << 3) +					/* 0x618 MSR_DRAM_POWER_LIMIT */ +					/* 0x619 MSR_DRAM_ENERGY_STATUS */ +					/* 0x61c MSR_DRAM_POWER_INFO */ +#define RAPL_DRAM_PERF_STATUS	(1 << 4) +					/* 0x61b MSR_DRAM_PERF_STATUS */ + +#define RAPL_CORES		(1 << 5) +					/* 0x638 MSR_PP0_POWER_LIMIT */ +					/* 0x639 MSR_PP0_ENERGY_STATUS */ +#define RAPL_CORE_POLICY	(1 << 6) +					/* 0x63a MSR_PP0_POLICY */ + + +#define RAPL_GFX		(1 << 7) +					/* 0x640 MSR_PP1_POWER_LIMIT */ +					/* 0x641 MSR_PP1_ENERGY_STATUS */ +					/* 0x642 MSR_PP1_POLICY */ +#define	TJMAX_DEFAULT	100 + +#define MAX(a, b) ((a) > (b) ? (a) : (b)) + +int aperf_mperf_unstable; +int backwards_count; +char *progname; + +cpu_set_t *cpu_present_set, *cpu_affinity_set; +size_t cpu_present_setsize, cpu_affinity_setsize; + +struct thread_data { +	unsigned long long tsc; +	unsigned long long aperf; +	unsigned long long mperf; +	unsigned long long c1; +	unsigned long long extra_msr64; +	unsigned long long extra_delta64; +	unsigned long long extra_msr32; +	unsigned long long extra_delta32; +	unsigned int smi_count; +	unsigned int cpu_id; +	unsigned int flags; +#define CPU_IS_FIRST_THREAD_IN_CORE	0x2 +#define CPU_IS_FIRST_CORE_IN_PACKAGE	0x4 +} *thread_even, *thread_odd; + +struct core_data { +	unsigned long long c3; +	unsigned long long c6; +	unsigned long long c7; +	unsigned int core_temp_c; +	unsigned int core_id; +} *core_even, *core_odd; + +struct pkg_data { +	unsigned long long pc2; +	unsigned long long pc3; +	unsigned long long pc6; +	unsigned long long pc7; +	unsigned long long pc8; +	unsigned long long pc9; +	unsigned long long pc10; +	unsigned int package_id; +	unsigned int energy_pkg;	/* MSR_PKG_ENERGY_STATUS */ +	unsigned int energy_dram;	/* MSR_DRAM_ENERGY_STATUS */ +	unsigned int energy_cores;	/* MSR_PP0_ENERGY_STATUS */ +	unsigned int energy_gfx;	/* MSR_PP1_ENERGY_STATUS */ +	unsigned int rapl_pkg_perf_status;	/* MSR_PKG_PERF_STATUS */ +	unsigned int rapl_dram_perf_status;	/* MSR_DRAM_PERF_STATUS */ +	unsigned int pkg_temp_c; + +} *package_even, *package_odd; + +#define ODD_COUNTERS thread_odd, core_odd, package_odd +#define EVEN_COUNTERS thread_even, core_even, package_even + +#define GET_THREAD(thread_base, thread_no, core_no, pkg_no) \ +	(thread_base + (pkg_no) * topo.num_cores_per_pkg * \ +		topo.num_threads_per_core + \ +		(core_no) * topo.num_threads_per_core + (thread_no)) +#define GET_CORE(core_base, core_no, pkg_no) \ +	(core_base + (pkg_no) * topo.num_cores_per_pkg + (core_no)) +#define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no) + +struct system_summary { +	struct thread_data threads; +	struct core_data cores; +	struct pkg_data packages; +} sum, average; + + +struct topo_params { +	int num_packages; +	int num_cpus; +	int num_cores; +	int max_cpu_num; +	int num_cores_per_pkg; +	int num_threads_per_core; +} topo; + +struct timeval tv_even, tv_odd, tv_delta; + +void setup_all_buffers(void); + +int cpu_is_not_present(int cpu) +{ +	return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set); +} +/* + * run func(thread, core, package) in topology order + * skip non-present cpus + */ + +int for_all_cpus(int (func)(struct thread_data *, struct core_data *, struct pkg_data *), +	struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base) +{ +	int retval, pkg_no, core_no, thread_no; + +	for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) { +		for (core_no = 0; core_no < topo.num_cores_per_pkg; ++core_no) { +			for (thread_no = 0; thread_no < +				topo.num_threads_per_core; ++thread_no) { +				struct thread_data *t; +				struct core_data *c; +				struct pkg_data *p; + +				t = GET_THREAD(thread_base, thread_no, core_no, pkg_no); + +				if (cpu_is_not_present(t->cpu_id)) +					continue; + +				c = GET_CORE(core_base, core_no, pkg_no); +				p = GET_PKG(pkg_base, pkg_no); + +				retval = func(t, c, p); +				if (retval) +					return retval; +			} +		} +	} +	return 0; +} + +int cpu_migrate(int cpu) +{ +	CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set); +	CPU_SET_S(cpu, cpu_affinity_setsize, cpu_affinity_set); +	if (sched_setaffinity(0, cpu_affinity_setsize, cpu_affinity_set) == -1) +		return -1; +	else +		return 0; +} + +int get_msr(int cpu, off_t offset, unsigned long long *msr) +{ +	ssize_t retval; +	char pathname[32]; +	int fd; + +	sprintf(pathname, "/dev/cpu/%d/msr", cpu); +	fd = open(pathname, O_RDONLY); +	if (fd < 0) +		return -1; + +	retval = pread(fd, msr, sizeof *msr, offset); +	close(fd); + +	if (retval != sizeof *msr) { +		fprintf(stderr, "%s offset 0x%llx read failed\n", pathname, (unsigned long long)offset); +		return -1; +	} + +	return 0; +} + +/* + * Example Format w/ field column widths: + * + * Package    Core     CPU Avg_MHz Bzy_MHz TSC_MHz     SMI   %Busy CPU_%c1 CPU_%c3 CPU_%c6 CPU_%c7 CoreTmp  PkgTmp Pkg%pc2 Pkg%pc3 Pkg%pc6 Pkg%pc7 PkgWatt CorWatt GFXWatt + * 1234567 1234567 1234567 1234567 1234567 1234567 1234567 1234567 1234567 1234567 1234567 1234567 1234567 1234567 1234567 1234567 1234567 1234567 1234567 1234567 1234567 + */ + +void print_header(void) +{ +	if (show_pkg) +		outp += sprintf(outp, "Package "); +	if (show_core) +		outp += sprintf(outp, "    Core "); +	if (show_cpu) +		outp += sprintf(outp, "    CPU "); +	if (has_aperf) +		outp += sprintf(outp, "Avg_MHz "); +	if (do_nhm_cstates) +		outp += sprintf(outp, "  %%Busy "); +	if (has_aperf) +		outp += sprintf(outp, "Bzy_MHz "); +	outp += sprintf(outp, "TSC_MHz "); +	if (do_smi) +		outp += sprintf(outp, "    SMI "); +	if (extra_delta_offset32) +		outp += sprintf(outp, " count 0x%03X ", extra_delta_offset32); +	if (extra_delta_offset64) +		outp += sprintf(outp, " COUNT 0x%03X ", extra_delta_offset64); +	if (extra_msr_offset32) +		outp += sprintf(outp, "  MSR 0x%03X ", extra_msr_offset32); +	if (extra_msr_offset64) +		outp += sprintf(outp, "          MSR 0x%03X ", extra_msr_offset64); +	if (do_nhm_cstates) +		outp += sprintf(outp, " CPU%%c1 "); +	if (do_nhm_cstates && !do_slm_cstates) +		outp += sprintf(outp, " CPU%%c3 "); +	if (do_nhm_cstates) +		outp += sprintf(outp, " CPU%%c6 "); +	if (do_snb_cstates) +		outp += sprintf(outp, " CPU%%c7 "); + +	if (do_dts) +		outp += sprintf(outp, "CoreTmp "); +	if (do_ptm) +		outp += sprintf(outp, " PkgTmp "); + +	if (do_snb_cstates) +		outp += sprintf(outp, "Pkg%%pc2 "); +	if (do_nhm_cstates && !do_slm_cstates) +		outp += sprintf(outp, "Pkg%%pc3 "); +	if (do_nhm_cstates && !do_slm_cstates) +		outp += sprintf(outp, "Pkg%%pc6 "); +	if (do_snb_cstates) +		outp += sprintf(outp, "Pkg%%pc7 "); +	if (do_c8_c9_c10) { +		outp += sprintf(outp, "Pkg%%pc8 "); +		outp += sprintf(outp, "Pkg%%pc9 "); +		outp += sprintf(outp, "Pk%%pc10 "); +	} + +	if (do_rapl && !rapl_joules) { +		if (do_rapl & RAPL_PKG) +			outp += sprintf(outp, "PkgWatt "); +		if (do_rapl & RAPL_CORES) +			outp += sprintf(outp, "CorWatt "); +		if (do_rapl & RAPL_GFX) +			outp += sprintf(outp, "GFXWatt "); +		if (do_rapl & RAPL_DRAM) +			outp += sprintf(outp, "RAMWatt "); +		if (do_rapl & RAPL_PKG_PERF_STATUS) +			outp += sprintf(outp, "  PKG_%% "); +		if (do_rapl & RAPL_DRAM_PERF_STATUS) +			outp += sprintf(outp, "  RAM_%% "); +	} else { +		if (do_rapl & RAPL_PKG) +			outp += sprintf(outp, "  Pkg_J "); +		if (do_rapl & RAPL_CORES) +			outp += sprintf(outp, "  Cor_J "); +		if (do_rapl & RAPL_GFX) +			outp += sprintf(outp, "  GFX_J "); +		if (do_rapl & RAPL_DRAM) +			outp += sprintf(outp, "  RAM_W "); +		if (do_rapl & RAPL_PKG_PERF_STATUS) +			outp += sprintf(outp, "  PKG_%% "); +		if (do_rapl & RAPL_DRAM_PERF_STATUS) +			outp += sprintf(outp, "  RAM_%% "); +		outp += sprintf(outp, "  time "); + +	} +	outp += sprintf(outp, "\n"); +} + +int dump_counters(struct thread_data *t, struct core_data *c, +	struct pkg_data *p) +{ +	outp += sprintf(outp, "t %p, c %p, p %p\n", t, c, p); + +	if (t) { +		outp += sprintf(outp, "CPU: %d flags 0x%x\n", +			t->cpu_id, t->flags); +		outp += sprintf(outp, "TSC: %016llX\n", t->tsc); +		outp += sprintf(outp, "aperf: %016llX\n", t->aperf); +		outp += sprintf(outp, "mperf: %016llX\n", t->mperf); +		outp += sprintf(outp, "c1: %016llX\n", t->c1); +		outp += sprintf(outp, "msr0x%x: %08llX\n", +			extra_delta_offset32, t->extra_delta32); +		outp += sprintf(outp, "msr0x%x: %016llX\n", +			extra_delta_offset64, t->extra_delta64); +		outp += sprintf(outp, "msr0x%x: %08llX\n", +			extra_msr_offset32, t->extra_msr32); +		outp += sprintf(outp, "msr0x%x: %016llX\n", +			extra_msr_offset64, t->extra_msr64); +		if (do_smi) +			outp += sprintf(outp, "SMI: %08X\n", t->smi_count); +	} + +	if (c) { +		outp += sprintf(outp, "core: %d\n", c->core_id); +		outp += sprintf(outp, "c3: %016llX\n", c->c3); +		outp += sprintf(outp, "c6: %016llX\n", c->c6); +		outp += sprintf(outp, "c7: %016llX\n", c->c7); +		outp += sprintf(outp, "DTS: %dC\n", c->core_temp_c); +	} + +	if (p) { +		outp += sprintf(outp, "package: %d\n", p->package_id); +		outp += sprintf(outp, "pc2: %016llX\n", p->pc2); +		outp += sprintf(outp, "pc3: %016llX\n", p->pc3); +		outp += sprintf(outp, "pc6: %016llX\n", p->pc6); +		outp += sprintf(outp, "pc7: %016llX\n", p->pc7); +		outp += sprintf(outp, "pc8: %016llX\n", p->pc8); +		outp += sprintf(outp, "pc9: %016llX\n", p->pc9); +		outp += sprintf(outp, "pc10: %016llX\n", p->pc10); +		outp += sprintf(outp, "Joules PKG: %0X\n", p->energy_pkg); +		outp += sprintf(outp, "Joules COR: %0X\n", p->energy_cores); +		outp += sprintf(outp, "Joules GFX: %0X\n", p->energy_gfx); +		outp += sprintf(outp, "Joules RAM: %0X\n", p->energy_dram); +		outp += sprintf(outp, "Throttle PKG: %0X\n", +			p->rapl_pkg_perf_status); +		outp += sprintf(outp, "Throttle RAM: %0X\n", +			p->rapl_dram_perf_status); +		outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c); +	} + +	outp += sprintf(outp, "\n"); + +	return 0; +} + +/* + * column formatting convention & formats + */ +int format_counters(struct thread_data *t, struct core_data *c, +	struct pkg_data *p) +{ +	double interval_float; +	char *fmt8; + +	 /* if showing only 1st thread in core and this isn't one, bail out */ +	if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) +		return 0; + +	 /* if showing only 1st thread in pkg and this isn't one, bail out */ +	if (show_pkg_only && !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) +		return 0; + +	interval_float = tv_delta.tv_sec + tv_delta.tv_usec/1000000.0; + +	/* topo columns, print blanks on 1st (average) line */ +	if (t == &average.threads) { +		if (show_pkg) +			outp += sprintf(outp, "       -"); +		if (show_core) +			outp += sprintf(outp, "       -"); +		if (show_cpu) +			outp += sprintf(outp, "       -"); +	} else { +		if (show_pkg) { +			if (p) +				outp += sprintf(outp, "%8d", p->package_id); +			else +				outp += sprintf(outp, "       -"); +		} +		if (show_core) { +			if (c) +				outp += sprintf(outp, "%8d", c->core_id); +			else +				outp += sprintf(outp, "       -"); +		} +		if (show_cpu) +			outp += sprintf(outp, "%8d", t->cpu_id); +	} + +	/* AvgMHz */ +	if (has_aperf) +		outp += sprintf(outp, "%8.0f", +			1.0 / units * t->aperf / interval_float); + +	/* %c0 */ +	if (do_nhm_cstates) { +		if (!skip_c0) +			outp += sprintf(outp, "%8.2f", 100.0 * t->mperf/t->tsc); +		else +			outp += sprintf(outp, "********"); +	} + +	/* BzyMHz */ +	if (has_aperf) +		outp += sprintf(outp, "%8.0f", +			1.0 * t->tsc / units * t->aperf / t->mperf / interval_float); + +	/* TSC */ +	outp += sprintf(outp, "%8.0f", 1.0 * t->tsc/units/interval_float); + +	/* SMI */ +	if (do_smi) +		outp += sprintf(outp, "%8d", t->smi_count); + +	/* delta */ +	if (extra_delta_offset32) +		outp += sprintf(outp, "  %11llu", t->extra_delta32); + +	/* DELTA */ +	if (extra_delta_offset64) +		outp += sprintf(outp, "  %11llu", t->extra_delta64); +	/* msr */ +	if (extra_msr_offset32) +		outp += sprintf(outp, "  0x%08llx", t->extra_msr32); + +	/* MSR */ +	if (extra_msr_offset64) +		outp += sprintf(outp, "  0x%016llx", t->extra_msr64); + +	if (do_nhm_cstates) { +		if (!skip_c1) +			outp += sprintf(outp, "%8.2f", 100.0 * t->c1/t->tsc); +		else +			outp += sprintf(outp, "********"); +	} + +	/* print per-core data only for 1st thread in core */ +	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) +		goto done; + +	if (do_nhm_cstates && !do_slm_cstates) +		outp += sprintf(outp, "%8.2f", 100.0 * c->c3/t->tsc); +	if (do_nhm_cstates) +		outp += sprintf(outp, "%8.2f", 100.0 * c->c6/t->tsc); +	if (do_snb_cstates) +		outp += sprintf(outp, "%8.2f", 100.0 * c->c7/t->tsc); + +	if (do_dts) +		outp += sprintf(outp, "%8d", c->core_temp_c); + +	/* print per-package data only for 1st core in package */ +	if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) +		goto done; + +	if (do_ptm) +		outp += sprintf(outp, "%8d", p->pkg_temp_c); + +	if (do_snb_cstates) +		outp += sprintf(outp, "%8.2f", 100.0 * p->pc2/t->tsc); +	if (do_nhm_cstates && !do_slm_cstates) +		outp += sprintf(outp, "%8.2f", 100.0 * p->pc3/t->tsc); +	if (do_nhm_cstates && !do_slm_cstates) +		outp += sprintf(outp, "%8.2f", 100.0 * p->pc6/t->tsc); +	if (do_snb_cstates) +		outp += sprintf(outp, "%8.2f", 100.0 * p->pc7/t->tsc); +	if (do_c8_c9_c10) { +		outp += sprintf(outp, "%8.2f", 100.0 * p->pc8/t->tsc); +		outp += sprintf(outp, "%8.2f", 100.0 * p->pc9/t->tsc); +		outp += sprintf(outp, "%8.2f", 100.0 * p->pc10/t->tsc); +	} + +	/* + 	 * If measurement interval exceeds minimum RAPL Joule Counter range, + 	 * indicate that results are suspect by printing "**" in fraction place. + 	 */ +	if (interval_float < rapl_joule_counter_range) +		fmt8 = "%8.2f"; +	else +		fmt8 = " %6.0f**"; + +	if (do_rapl && !rapl_joules) { +		if (do_rapl & RAPL_PKG) +			outp += sprintf(outp, fmt8, p->energy_pkg * rapl_energy_units / interval_float); +		if (do_rapl & RAPL_CORES) +			outp += sprintf(outp, fmt8, p->energy_cores * rapl_energy_units / interval_float); +		if (do_rapl & RAPL_GFX) +			outp += sprintf(outp, fmt8, p->energy_gfx * rapl_energy_units / interval_float); +		if (do_rapl & RAPL_DRAM) +			outp += sprintf(outp, fmt8, p->energy_dram * rapl_energy_units / interval_float); +		if (do_rapl & RAPL_PKG_PERF_STATUS) +			outp += sprintf(outp, fmt8, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float); +		if (do_rapl & RAPL_DRAM_PERF_STATUS) +			outp += sprintf(outp, fmt8, 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float); +	} else { +		if (do_rapl & RAPL_PKG) +			outp += sprintf(outp, fmt8, +					p->energy_pkg * rapl_energy_units); +		if (do_rapl & RAPL_CORES) +			outp += sprintf(outp, fmt8, +					p->energy_cores * rapl_energy_units); +		if (do_rapl & RAPL_GFX) +			outp += sprintf(outp, fmt8, +					p->energy_gfx * rapl_energy_units); +		if (do_rapl & RAPL_DRAM) +			outp += sprintf(outp, fmt8, +					p->energy_dram * rapl_energy_units); +		if (do_rapl & RAPL_PKG_PERF_STATUS) +			outp += sprintf(outp, fmt8, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float); +		if (do_rapl & RAPL_DRAM_PERF_STATUS) +			outp += sprintf(outp, fmt8, 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float); +	outp += sprintf(outp, fmt8, interval_float); + +	} +done: +	outp += sprintf(outp, "\n"); + +	return 0; +} + +void flush_stdout() +{ +	fputs(output_buffer, stdout); +	fflush(stdout); +	outp = output_buffer; +} +void flush_stderr() +{ +	fputs(output_buffer, stderr); +	outp = output_buffer; +} +void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) +{ +	static int printed; + +	if (!printed || !summary_only) +		print_header(); + +	if (topo.num_cpus > 1) +		format_counters(&average.threads, &average.cores, +			&average.packages); + +	printed = 1; + +	if (summary_only) +		return; + +	for_all_cpus(format_counters, t, c, p); +} + +#define DELTA_WRAP32(new, old)			\ +	if (new > old) {			\ +		old = new - old;		\ +	} else {				\ +		old = 0x100000000 + new - old;	\ +	} + +void +delta_package(struct pkg_data *new, struct pkg_data *old) +{ +	old->pc2 = new->pc2 - old->pc2; +	old->pc3 = new->pc3 - old->pc3; +	old->pc6 = new->pc6 - old->pc6; +	old->pc7 = new->pc7 - old->pc7; +	old->pc8 = new->pc8 - old->pc8; +	old->pc9 = new->pc9 - old->pc9; +	old->pc10 = new->pc10 - old->pc10; +	old->pkg_temp_c = new->pkg_temp_c; + +	DELTA_WRAP32(new->energy_pkg, old->energy_pkg); +	DELTA_WRAP32(new->energy_cores, old->energy_cores); +	DELTA_WRAP32(new->energy_gfx, old->energy_gfx); +	DELTA_WRAP32(new->energy_dram, old->energy_dram); +	DELTA_WRAP32(new->rapl_pkg_perf_status, old->rapl_pkg_perf_status); +	DELTA_WRAP32(new->rapl_dram_perf_status, old->rapl_dram_perf_status); +} + +void +delta_core(struct core_data *new, struct core_data *old) +{ +	old->c3 = new->c3 - old->c3; +	old->c6 = new->c6 - old->c6; +	old->c7 = new->c7 - old->c7; +	old->core_temp_c = new->core_temp_c; +} + +/* + * old = new - old + */ +void +delta_thread(struct thread_data *new, struct thread_data *old, +	struct core_data *core_delta) +{ +	old->tsc = new->tsc - old->tsc; + +	/* check for TSC < 1 Mcycles over interval */ +	if (old->tsc < (1000 * 1000)) +		errx(-3, "Insanely slow TSC rate, TSC stops in idle?\n" +		     "You can disable all c-states by booting with \"idle=poll\"\n" +		     "or just the deep ones with \"processor.max_cstate=1\""); + +	old->c1 = new->c1 - old->c1; + +	if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) { +		old->aperf = new->aperf - old->aperf; +		old->mperf = new->mperf - old->mperf; +	} else { + +		if (!aperf_mperf_unstable) { +			fprintf(stderr, "%s: APERF or MPERF went backwards *\n", progname); +			fprintf(stderr, "* Frequency results do not cover entire interval *\n"); +			fprintf(stderr, "* fix this by running Linux-2.6.30 or later *\n"); + +			aperf_mperf_unstable = 1; +		} +		/* +		 * mperf delta is likely a huge "positive" number +		 * can not use it for calculating c0 time +		 */ +		skip_c0 = 1; +		skip_c1 = 1; +	} + + +	if (use_c1_residency_msr) { +		/* +		 * Some models have a dedicated C1 residency MSR, +		 * which should be more accurate than the derivation below. +		 */ +	} else { +		/* +		 * As counter collection is not atomic, +		 * it is possible for mperf's non-halted cycles + idle states +		 * to exceed TSC's all cycles: show c1 = 0% in that case. +		 */ +		if ((old->mperf + core_delta->c3 + core_delta->c6 + core_delta->c7) > old->tsc) +			old->c1 = 0; +		else { +			/* normal case, derive c1 */ +			old->c1 = old->tsc - old->mperf - core_delta->c3 +				- core_delta->c6 - core_delta->c7; +		} +	} + +	if (old->mperf == 0) { +		if (verbose > 1) fprintf(stderr, "cpu%d MPERF 0!\n", old->cpu_id); +		old->mperf = 1;	/* divide by 0 protection */ +	} + +	old->extra_delta32 = new->extra_delta32 - old->extra_delta32; +	old->extra_delta32 &= 0xFFFFFFFF; + +	old->extra_delta64 = new->extra_delta64 - old->extra_delta64; + +	/* +	 * Extra MSR is just a snapshot, simply copy latest w/o subtracting +	 */ +	old->extra_msr32 = new->extra_msr32; +	old->extra_msr64 = new->extra_msr64; + +	if (do_smi) +		old->smi_count = new->smi_count - old->smi_count; +} + +int delta_cpu(struct thread_data *t, struct core_data *c, +	struct pkg_data *p, struct thread_data *t2, +	struct core_data *c2, struct pkg_data *p2) +{ +	/* calculate core delta only for 1st thread in core */ +	if (t->flags & CPU_IS_FIRST_THREAD_IN_CORE) +		delta_core(c, c2); + +	/* always calculate thread delta */ +	delta_thread(t, t2, c2);	/* c2 is core delta */ + +	/* calculate package delta only for 1st core in package */ +	if (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE) +		delta_package(p, p2); + +	return 0; +} + +void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) +{ +	t->tsc = 0; +	t->aperf = 0; +	t->mperf = 0; +	t->c1 = 0; + +	t->smi_count = 0; +	t->extra_delta32 = 0; +	t->extra_delta64 = 0; + +	/* tells format_counters to dump all fields from this set */ +	t->flags = CPU_IS_FIRST_THREAD_IN_CORE | CPU_IS_FIRST_CORE_IN_PACKAGE; + +	c->c3 = 0; +	c->c6 = 0; +	c->c7 = 0; +	c->core_temp_c = 0; + +	p->pc2 = 0; +	p->pc3 = 0; +	p->pc6 = 0; +	p->pc7 = 0; +	p->pc8 = 0; +	p->pc9 = 0; +	p->pc10 = 0; + +	p->energy_pkg = 0; +	p->energy_dram = 0; +	p->energy_cores = 0; +	p->energy_gfx = 0; +	p->rapl_pkg_perf_status = 0; +	p->rapl_dram_perf_status = 0; +	p->pkg_temp_c = 0; +} +int sum_counters(struct thread_data *t, struct core_data *c, +	struct pkg_data *p) +{ +	average.threads.tsc += t->tsc; +	average.threads.aperf += t->aperf; +	average.threads.mperf += t->mperf; +	average.threads.c1 += t->c1; + +	average.threads.extra_delta32 += t->extra_delta32; +	average.threads.extra_delta64 += t->extra_delta64; + +	/* sum per-core values only for 1st thread in core */ +	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) +		return 0; + +	average.cores.c3 += c->c3; +	average.cores.c6 += c->c6; +	average.cores.c7 += c->c7; + +	average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c); + +	/* sum per-pkg values only for 1st core in pkg */ +	if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) +		return 0; + +	average.packages.pc2 += p->pc2; +	average.packages.pc3 += p->pc3; +	average.packages.pc6 += p->pc6; +	average.packages.pc7 += p->pc7; +	average.packages.pc8 += p->pc8; +	average.packages.pc9 += p->pc9; +	average.packages.pc10 += p->pc10; + +	average.packages.energy_pkg += p->energy_pkg; +	average.packages.energy_dram += p->energy_dram; +	average.packages.energy_cores += p->energy_cores; +	average.packages.energy_gfx += p->energy_gfx; + +	average.packages.pkg_temp_c = MAX(average.packages.pkg_temp_c, p->pkg_temp_c); + +	average.packages.rapl_pkg_perf_status += p->rapl_pkg_perf_status; +	average.packages.rapl_dram_perf_status += p->rapl_dram_perf_status; +	return 0; +} +/* + * sum the counters for all cpus in the system + * compute the weighted average + */ +void compute_average(struct thread_data *t, struct core_data *c, +	struct pkg_data *p) +{ +	clear_counters(&average.threads, &average.cores, &average.packages); + +	for_all_cpus(sum_counters, t, c, p); + +	average.threads.tsc /= topo.num_cpus; +	average.threads.aperf /= topo.num_cpus; +	average.threads.mperf /= topo.num_cpus; +	average.threads.c1 /= topo.num_cpus; + +	average.threads.extra_delta32 /= topo.num_cpus; +	average.threads.extra_delta32 &= 0xFFFFFFFF; + +	average.threads.extra_delta64 /= topo.num_cpus; + +	average.cores.c3 /= topo.num_cores; +	average.cores.c6 /= topo.num_cores; +	average.cores.c7 /= topo.num_cores; + +	average.packages.pc2 /= topo.num_packages; +	average.packages.pc3 /= topo.num_packages; +	average.packages.pc6 /= topo.num_packages; +	average.packages.pc7 /= topo.num_packages; + +	average.packages.pc8 /= topo.num_packages; +	average.packages.pc9 /= topo.num_packages; +	average.packages.pc10 /= topo.num_packages; +} + +static unsigned long long rdtsc(void) +{ +	unsigned int low, high; + +	asm volatile("rdtsc" : "=a" (low), "=d" (high)); + +	return low | ((unsigned long long)high) << 32; +} + + +/* + * get_counters(...) + * migrate to cpu + * acquire and record local counters for that cpu + */ +int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) +{ +	int cpu = t->cpu_id; +	unsigned long long msr; + +	if (cpu_migrate(cpu)) { +		fprintf(stderr, "Could not migrate to CPU %d\n", cpu); +		return -1; +	} + +	t->tsc = rdtsc();	/* we are running on local CPU of interest */ + +	if (has_aperf) { +		if (get_msr(cpu, MSR_IA32_APERF, &t->aperf)) +			return -3; +		if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf)) +			return -4; +	} + +	if (do_smi) { +		if (get_msr(cpu, MSR_SMI_COUNT, &msr)) +			return -5; +		t->smi_count = msr & 0xFFFFFFFF; +	} +	if (extra_delta_offset32) { +		if (get_msr(cpu, extra_delta_offset32, &msr)) +			return -5; +		t->extra_delta32 = msr & 0xFFFFFFFF; +	} + +	if (extra_delta_offset64) +		if (get_msr(cpu, extra_delta_offset64, &t->extra_delta64)) +			return -5; + +	if (extra_msr_offset32) { +		if (get_msr(cpu, extra_msr_offset32, &msr)) +			return -5; +		t->extra_msr32 = msr & 0xFFFFFFFF; +	} + +	if (extra_msr_offset64) +		if (get_msr(cpu, extra_msr_offset64, &t->extra_msr64)) +			return -5; + +	if (use_c1_residency_msr) { +		if (get_msr(cpu, MSR_CORE_C1_RES, &t->c1)) +			return -6; +	} + +	/* collect core counters only for 1st thread in core */ +	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) +		return 0; + +	if (do_nhm_cstates && !do_slm_cstates) { +		if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3)) +			return -6; +	} + +	if (do_nhm_cstates) { +		if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6)) +			return -7; +	} + +	if (do_snb_cstates) +		if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7)) +			return -8; + +	if (do_dts) { +		if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr)) +			return -9; +		c->core_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F); +	} + + +	/* collect package counters only for 1st core in package */ +	if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) +		return 0; + +	if (do_nhm_cstates && !do_slm_cstates) { +		if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3)) +			return -9; +		if (get_msr(cpu, MSR_PKG_C6_RESIDENCY, &p->pc6)) +			return -10; +	} +	if (do_snb_cstates) { +		if (get_msr(cpu, MSR_PKG_C2_RESIDENCY, &p->pc2)) +			return -11; +		if (get_msr(cpu, MSR_PKG_C7_RESIDENCY, &p->pc7)) +			return -12; +	} +	if (do_c8_c9_c10) { +		if (get_msr(cpu, MSR_PKG_C8_RESIDENCY, &p->pc8)) +			return -13; +		if (get_msr(cpu, MSR_PKG_C9_RESIDENCY, &p->pc9)) +			return -13; +		if (get_msr(cpu, MSR_PKG_C10_RESIDENCY, &p->pc10)) +			return -13; +	} +	if (do_rapl & RAPL_PKG) { +		if (get_msr(cpu, MSR_PKG_ENERGY_STATUS, &msr)) +			return -13; +		p->energy_pkg = msr & 0xFFFFFFFF; +	} +	if (do_rapl & RAPL_CORES) { +		if (get_msr(cpu, MSR_PP0_ENERGY_STATUS, &msr)) +			return -14; +		p->energy_cores = msr & 0xFFFFFFFF; +	} +	if (do_rapl & RAPL_DRAM) { +		if (get_msr(cpu, MSR_DRAM_ENERGY_STATUS, &msr)) +			return -15; +		p->energy_dram = msr & 0xFFFFFFFF; +	} +	if (do_rapl & RAPL_GFX) { +		if (get_msr(cpu, MSR_PP1_ENERGY_STATUS, &msr)) +			return -16; +		p->energy_gfx = msr & 0xFFFFFFFF; +	} +	if (do_rapl & RAPL_PKG_PERF_STATUS) { +		if (get_msr(cpu, MSR_PKG_PERF_STATUS, &msr)) +			return -16; +		p->rapl_pkg_perf_status = msr & 0xFFFFFFFF; +	} +	if (do_rapl & RAPL_DRAM_PERF_STATUS) { +		if (get_msr(cpu, MSR_DRAM_PERF_STATUS, &msr)) +			return -16; +		p->rapl_dram_perf_status = msr & 0xFFFFFFFF; +	} +	if (do_ptm) { +		if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr)) +			return -17; +		p->pkg_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F); +	} +	return 0; +} + +void print_verbose_header(void) +{ +	unsigned long long msr; +	unsigned int ratio; + +	if (!do_nehalem_platform_info) +		return; + +	get_msr(0, MSR_NHM_PLATFORM_INFO, &msr); + +	fprintf(stderr, "cpu0: MSR_NHM_PLATFORM_INFO: 0x%08llx\n", msr); + +	ratio = (msr >> 40) & 0xFF; +	fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency\n", +		ratio, bclk, ratio * bclk); + +	ratio = (msr >> 8) & 0xFF; +	fprintf(stderr, "%d * %.0f = %.0f MHz TSC frequency\n", +		ratio, bclk, ratio * bclk); + +	get_msr(0, MSR_IA32_POWER_CTL, &msr); +	fprintf(stderr, "cpu0: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n", +		msr, msr & 0x2 ? "EN" : "DIS"); + +	if (!do_ivt_turbo_ratio_limit) +		goto print_nhm_turbo_ratio_limits; + +	get_msr(0, MSR_IVT_TURBO_RATIO_LIMIT, &msr); + +	fprintf(stderr, "cpu0: MSR_IVT_TURBO_RATIO_LIMIT: 0x%08llx\n", msr); + +	ratio = (msr >> 56) & 0xFF; +	if (ratio) +		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 16 active cores\n", +			ratio, bclk, ratio * bclk); + +	ratio = (msr >> 48) & 0xFF; +	if (ratio) +		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 15 active cores\n", +			ratio, bclk, ratio * bclk); + +	ratio = (msr >> 40) & 0xFF; +	if (ratio) +		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 14 active cores\n", +			ratio, bclk, ratio * bclk); + +	ratio = (msr >> 32) & 0xFF; +	if (ratio) +		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 13 active cores\n", +			ratio, bclk, ratio * bclk); + +	ratio = (msr >> 24) & 0xFF; +	if (ratio) +		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 12 active cores\n", +			ratio, bclk, ratio * bclk); + +	ratio = (msr >> 16) & 0xFF; +	if (ratio) +		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 11 active cores\n", +			ratio, bclk, ratio * bclk); + +	ratio = (msr >> 8) & 0xFF; +	if (ratio) +		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 10 active cores\n", +			ratio, bclk, ratio * bclk); + +	ratio = (msr >> 0) & 0xFF; +	if (ratio) +		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 9 active cores\n", +			ratio, bclk, ratio * bclk); + +print_nhm_turbo_ratio_limits: +	get_msr(0, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr); + +#define SNB_C1_AUTO_UNDEMOTE              (1UL << 27) +#define SNB_C3_AUTO_UNDEMOTE              (1UL << 28) + +	fprintf(stderr, "cpu0: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x%08llx", msr); + +	fprintf(stderr, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: ", +		(msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "", +		(msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "", +		(msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "", +		(msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "", +		(msr & (1 << 15)) ? "" : "UN", +		(unsigned int)msr & 7); + + +	switch(msr & 0x7) { +	case 0: +		fprintf(stderr, do_slm_cstates ? "no pkg states" : "pc0"); +		break; +	case 1: +		fprintf(stderr, do_slm_cstates ? "no pkg states" : do_snb_cstates ? "pc2" : "pc0"); +		break; +	case 2: +		fprintf(stderr, do_slm_cstates ? "invalid" : do_snb_cstates ? "pc6-noret" : "pc3"); +		break; +	case 3: +		fprintf(stderr, do_slm_cstates ? "invalid" : "pc6"); +		break; +	case 4: +		fprintf(stderr, do_slm_cstates ? "pc4" : "pc7"); +		break; +	case 5: +		fprintf(stderr, do_slm_cstates ? "invalid" : do_snb_cstates ? "pc7s" : "invalid"); +		break; +	case 6: +		fprintf(stderr, do_slm_cstates ? "pc6" : "invalid"); +		break; +	case 7: +		fprintf(stderr, do_slm_cstates ? "pc7" : "unlimited"); +		break; +	default: +		fprintf(stderr, "invalid"); +	} +	fprintf(stderr, ")\n"); + +	if (!do_nehalem_turbo_ratio_limit) +		return; + +	get_msr(0, MSR_NHM_TURBO_RATIO_LIMIT, &msr); + +	fprintf(stderr, "cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x%08llx\n", msr); + +	ratio = (msr >> 56) & 0xFF; +	if (ratio) +		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 8 active cores\n", +			ratio, bclk, ratio * bclk); + +	ratio = (msr >> 48) & 0xFF; +	if (ratio) +		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 7 active cores\n", +			ratio, bclk, ratio * bclk); + +	ratio = (msr >> 40) & 0xFF; +	if (ratio) +		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 6 active cores\n", +			ratio, bclk, ratio * bclk); + +	ratio = (msr >> 32) & 0xFF; +	if (ratio) +		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 5 active cores\n", +			ratio, bclk, ratio * bclk); + +	ratio = (msr >> 24) & 0xFF; +	if (ratio) +		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 4 active cores\n", +			ratio, bclk, ratio * bclk); + +	ratio = (msr >> 16) & 0xFF; +	if (ratio) +		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 3 active cores\n", +			ratio, bclk, ratio * bclk); + +	ratio = (msr >> 8) & 0xFF; +	if (ratio) +		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 2 active cores\n", +			ratio, bclk, ratio * bclk); + +	ratio = (msr >> 0) & 0xFF; +	if (ratio) +		fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 1 active cores\n", +			ratio, bclk, ratio * bclk); +} + +void free_all_buffers(void) +{ +	CPU_FREE(cpu_present_set); +	cpu_present_set = NULL; +	cpu_present_set = 0; + +	CPU_FREE(cpu_affinity_set); +	cpu_affinity_set = NULL; +	cpu_affinity_setsize = 0; + +	free(thread_even); +	free(core_even); +	free(package_even); + +	thread_even = NULL; +	core_even = NULL; +	package_even = NULL; + +	free(thread_odd); +	free(core_odd); +	free(package_odd); + +	thread_odd = NULL; +	core_odd = NULL; +	package_odd = NULL; + +	free(output_buffer); +	output_buffer = NULL; +	outp = NULL; +} + +/* + * Open a file, and exit on failure + */ +FILE *fopen_or_die(const char *path, const char *mode) +{ +	FILE *filep = fopen(path, "r"); +	if (!filep) +		err(1, "%s: open failed", path); +	return filep; +} + +/* + * Parse a file containing a single int. + */ +int parse_int_file(const char *fmt, ...) +{ +	va_list args; +	char path[PATH_MAX]; +	FILE *filep; +	int value; + +	va_start(args, fmt); +	vsnprintf(path, sizeof(path), fmt, args); +	va_end(args); +	filep = fopen_or_die(path, "r"); +	if (fscanf(filep, "%d", &value) != 1) +		err(1, "%s: failed to parse number from file", path); +	fclose(filep); +	return value; +} + +/* + * cpu_is_first_sibling_in_core(cpu) + * return 1 if given CPU is 1st HT sibling in the core + */ +int cpu_is_first_sibling_in_core(int cpu) +{ +	return cpu == parse_int_file("/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpu); +} + +/* + * cpu_is_first_core_in_package(cpu) + * return 1 if given CPU is 1st core in package + */ +int cpu_is_first_core_in_package(int cpu) +{ +	return cpu == parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_siblings_list", cpu); +} + +int get_physical_package_id(int cpu) +{ +	return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/physical_package_id", cpu); +} + +int get_core_id(int cpu) +{ +	return parse_int_file("/sys/devices/system/cpu/cpu%d/topology/core_id", cpu); +} + +int get_num_ht_siblings(int cpu) +{ +	char path[80]; +	FILE *filep; +	int sib1, sib2; +	int matches; +	char character; + +	sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpu); +	filep = fopen_or_die(path, "r"); +	/* +	 * file format: +	 * if a pair of number with a character between: 2 siblings (eg. 1-2, or 1,4) +	 * otherwinse 1 sibling (self). +	 */ +	matches = fscanf(filep, "%d%c%d\n", &sib1, &character, &sib2); + +	fclose(filep); + +	if (matches == 3) +		return 2; +	else +		return 1; +} + +/* + * run func(thread, core, package) in topology order + * skip non-present cpus + */ + +int for_all_cpus_2(int (func)(struct thread_data *, struct core_data *, +	struct pkg_data *, struct thread_data *, struct core_data *, +	struct pkg_data *), struct thread_data *thread_base, +	struct core_data *core_base, struct pkg_data *pkg_base, +	struct thread_data *thread_base2, struct core_data *core_base2, +	struct pkg_data *pkg_base2) +{ +	int retval, pkg_no, core_no, thread_no; + +	for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) { +		for (core_no = 0; core_no < topo.num_cores_per_pkg; ++core_no) { +			for (thread_no = 0; thread_no < +				topo.num_threads_per_core; ++thread_no) { +				struct thread_data *t, *t2; +				struct core_data *c, *c2; +				struct pkg_data *p, *p2; + +				t = GET_THREAD(thread_base, thread_no, core_no, pkg_no); + +				if (cpu_is_not_present(t->cpu_id)) +					continue; + +				t2 = GET_THREAD(thread_base2, thread_no, core_no, pkg_no); + +				c = GET_CORE(core_base, core_no, pkg_no); +				c2 = GET_CORE(core_base2, core_no, pkg_no); + +				p = GET_PKG(pkg_base, pkg_no); +				p2 = GET_PKG(pkg_base2, pkg_no); + +				retval = func(t, c, p, t2, c2, p2); +				if (retval) +					return retval; +			} +		} +	} +	return 0; +} + +/* + * run func(cpu) on every cpu in /proc/stat + * return max_cpu number + */ +int for_all_proc_cpus(int (func)(int)) +{ +	FILE *fp; +	int cpu_num; +	int retval; + +	fp = fopen_or_die(proc_stat, "r"); + +	retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n"); +	if (retval != 0) +		err(1, "%s: failed to parse format", proc_stat); + +	while (1) { +		retval = fscanf(fp, "cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", &cpu_num); +		if (retval != 1) +			break; + +		retval = func(cpu_num); +		if (retval) { +			fclose(fp); +			return(retval); +		} +	} +	fclose(fp); +	return 0; +} + +void re_initialize(void) +{ +	free_all_buffers(); +	setup_all_buffers(); +	printf("turbostat: re-initialized with num_cpus %d\n", topo.num_cpus); +} + + +/* + * count_cpus() + * remember the last one seen, it will be the max + */ +int count_cpus(int cpu) +{ +	if (topo.max_cpu_num < cpu) +		topo.max_cpu_num = cpu; + +	topo.num_cpus += 1; +	return 0; +} +int mark_cpu_present(int cpu) +{ +	CPU_SET_S(cpu, cpu_present_setsize, cpu_present_set); +	return 0; +} + +void turbostat_loop() +{ +	int retval; +	int restarted = 0; + +restart: +	restarted++; + +	retval = for_all_cpus(get_counters, EVEN_COUNTERS); +	if (retval < -1) { +		exit(retval); +	} else if (retval == -1) { +		if (restarted > 1) { +			exit(retval); +		} +		re_initialize(); +		goto restart; +	} +	restarted = 0; +	gettimeofday(&tv_even, (struct timezone *)NULL); + +	while (1) { +		if (for_all_proc_cpus(cpu_is_not_present)) { +			re_initialize(); +			goto restart; +		} +		sleep(interval_sec); +		retval = for_all_cpus(get_counters, ODD_COUNTERS); +		if (retval < -1) { +			exit(retval); +		} else if (retval == -1) { +			re_initialize(); +			goto restart; +		} +		gettimeofday(&tv_odd, (struct timezone *)NULL); +		timersub(&tv_odd, &tv_even, &tv_delta); +		for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS); +		compute_average(EVEN_COUNTERS); +		format_all_counters(EVEN_COUNTERS); +		flush_stdout(); +		sleep(interval_sec); +		retval = for_all_cpus(get_counters, EVEN_COUNTERS); +		if (retval < -1) { +			exit(retval); +		} else if (retval == -1) { +			re_initialize(); +			goto restart; +		} +		gettimeofday(&tv_even, (struct timezone *)NULL); +		timersub(&tv_even, &tv_odd, &tv_delta); +		for_all_cpus_2(delta_cpu, EVEN_COUNTERS, ODD_COUNTERS); +		compute_average(ODD_COUNTERS); +		format_all_counters(ODD_COUNTERS); +		flush_stdout(); +	} +} + +void check_dev_msr() +{ +	struct stat sb; + +	if (stat("/dev/cpu/0/msr", &sb)) +		err(-5, "no /dev/cpu/0/msr\n" +		    "Try \"# modprobe msr\""); +} + +void check_super_user() +{ +	if (getuid() != 0) +		errx(-6, "must be root"); +} + +int has_nehalem_turbo_ratio_limit(unsigned int family, unsigned int model) +{ +	if (!genuine_intel) +		return 0; + +	if (family != 6) +		return 0; + +	switch (model) { +	case 0x1A:	/* Core i7, Xeon 5500 series - Bloomfield, Gainstown NHM-EP */ +	case 0x1E:	/* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */ +	case 0x1F:	/* Core i7 and i5 Processor - Nehalem */ +	case 0x25:	/* Westmere Client - Clarkdale, Arrandale */ +	case 0x2C:	/* Westmere EP - Gulftown */ +	case 0x2A:	/* SNB */ +	case 0x2D:	/* SNB Xeon */ +	case 0x3A:	/* IVB */ +	case 0x3E:	/* IVB Xeon */ +	case 0x3C:	/* HSW */ +	case 0x3F:	/* HSX */ +	case 0x45:	/* HSW */ +	case 0x46:	/* HSW */ +	case 0x37:	/* BYT */ +	case 0x4D:	/* AVN */ +	case 0x3D:	/* BDW */ +	case 0x4F:	/* BDX */ +	case 0x56:	/* BDX-DE */ +		return 1; +	case 0x2E:	/* Nehalem-EX Xeon - Beckton */ +	case 0x2F:	/* Westmere-EX Xeon - Eagleton */ +	default: +		return 0; +	} +} +int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model) +{ +	if (!genuine_intel) +		return 0; + +	if (family != 6) +		return 0; + +	switch (model) { +	case 0x3E:	/* IVB Xeon */ +		return 1; +	default: +		return 0; +	} +} + +/* + * print_epb() + * Decode the ENERGY_PERF_BIAS MSR + */ +int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p) +{ +	unsigned long long msr; +	char *epb_string; +	int cpu; + +	if (!has_epb) +		return 0; + +	cpu = t->cpu_id; + +	/* EPB is per-package */ +	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) +		return 0; + +	if (cpu_migrate(cpu)) { +		fprintf(stderr, "Could not migrate to CPU %d\n", cpu); +		return -1; +	} + +	if (get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr)) +		return 0; + +	switch (msr & 0x7) { +	case ENERGY_PERF_BIAS_PERFORMANCE: +		epb_string = "performance"; +		break; +	case ENERGY_PERF_BIAS_NORMAL: +		epb_string = "balanced"; +		break; +	case ENERGY_PERF_BIAS_POWERSAVE: +		epb_string = "powersave"; +		break; +	default: +		epb_string = "custom"; +		break; +	} +	fprintf(stderr, "cpu%d: MSR_IA32_ENERGY_PERF_BIAS: 0x%08llx (%s)\n", cpu, msr, epb_string); + +	return 0; +} + +#define	RAPL_POWER_GRANULARITY	0x7FFF	/* 15 bit power granularity */ +#define	RAPL_TIME_GRANULARITY	0x3F /* 6 bit time granularity */ + +double get_tdp(model) +{ +	unsigned long long msr; + +	if (do_rapl & RAPL_PKG_POWER_INFO) +		if (!get_msr(0, MSR_PKG_POWER_INFO, &msr)) +			return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units; + +	switch (model) { +	case 0x37: +	case 0x4D: +		return 30.0; +	default: +		return 135.0; +	} +} + + +/* + * rapl_probe() + * + * sets do_rapl, rapl_power_units, rapl_energy_units, rapl_time_units + */ +void rapl_probe(unsigned int family, unsigned int model) +{ +	unsigned long long msr; +	unsigned int time_unit; +	double tdp; + +	if (!genuine_intel) +		return; + +	if (family != 6) +		return; + +	switch (model) { +	case 0x2A: +	case 0x3A: +	case 0x3C:	/* HSW */ +	case 0x45:	/* HSW */ +	case 0x46:	/* HSW */ +	case 0x3D:	/* BDW */ +		do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_GFX | RAPL_PKG_POWER_INFO; +		break; +	case 0x3F:	/* HSX */ +	case 0x4F:	/* BDX */ +	case 0x56:	/* BDX-DE */ +		do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO; +		break; +	case 0x2D: +	case 0x3E: +		do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO; +		break; +	case 0x37:	/* BYT */ +	case 0x4D:	/* AVN */ +		do_rapl = RAPL_PKG | RAPL_CORES ; +		break; +	default: +		return; +	} + +	/* units on package 0, verify later other packages match */ +	if (get_msr(0, MSR_RAPL_POWER_UNIT, &msr)) +		return; + +	rapl_power_units = 1.0 / (1 << (msr & 0xF)); +	if (model == 0x37) +		rapl_energy_units = 1.0 * (1 << (msr >> 8 & 0x1F)) / 1000000; +	else +		rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F)); + +	time_unit = msr >> 16 & 0xF; +	if (time_unit == 0) +		time_unit = 0xA; + +	rapl_time_units = 1.0 / (1 << (time_unit)); + +	tdp = get_tdp(model); + +	rapl_joule_counter_range = 0xFFFFFFFF * rapl_energy_units / tdp; +	if (verbose) +		fprintf(stderr, "RAPL: %.0f sec. Joule Counter Range, at %.0f Watts\n", rapl_joule_counter_range, tdp); + +	return; +} + +int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p) +{ +	unsigned long long msr; +	unsigned int dts; +	int cpu; + +	if (!(do_dts || do_ptm)) +		return 0; + +	cpu = t->cpu_id; + +	/* DTS is per-core, no need to print for each thread */ +	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))  +		return 0; + +	if (cpu_migrate(cpu)) { +		fprintf(stderr, "Could not migrate to CPU %d\n", cpu); +		return -1; +	} + +	if (do_ptm && (t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) { +		if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr)) +			return 0; + +		dts = (msr >> 16) & 0x7F; +		fprintf(stderr, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n", +			cpu, msr, tcc_activation_temp - dts); + +#ifdef	THERM_DEBUG +		if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &msr)) +			return 0; + +		dts = (msr >> 16) & 0x7F; +		dts2 = (msr >> 8) & 0x7F; +		fprintf(stderr, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", +			cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2); +#endif +	} + + +	if (do_dts) { +		unsigned int resolution; + +		if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr)) +			return 0; + +		dts = (msr >> 16) & 0x7F; +		resolution = (msr >> 27) & 0xF; +		fprintf(stderr, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n", +			cpu, msr, tcc_activation_temp - dts, resolution); + +#ifdef THERM_DEBUG +		if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr)) +			return 0; + +		dts = (msr >> 16) & 0x7F; +		dts2 = (msr >> 8) & 0x7F; +		fprintf(stderr, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n", +			cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2); +#endif +	} + +	return 0; +} +	 +void print_power_limit_msr(int cpu, unsigned long long msr, char *label) +{ +	fprintf(stderr, "cpu%d: %s: %sabled (%f Watts, %f sec, clamp %sabled)\n", +		cpu, label, +		((msr >> 15) & 1) ? "EN" : "DIS", +		((msr >> 0) & 0x7FFF) * rapl_power_units, +		(1.0 + (((msr >> 22) & 0x3)/4.0)) * (1 << ((msr >> 17) & 0x1F)) * rapl_time_units, +		(((msr >> 16) & 1) ? "EN" : "DIS")); + +	return; +} + +int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) +{ +	unsigned long long msr; +	int cpu; + +	if (!do_rapl) +		return 0; + +	/* RAPL counters are per package, so print only for 1st thread/package */ +	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) +		return 0; + +	cpu = t->cpu_id; +	if (cpu_migrate(cpu)) { +		fprintf(stderr, "Could not migrate to CPU %d\n", cpu); +		return -1; +	} + +	if (get_msr(cpu, MSR_RAPL_POWER_UNIT, &msr)) +		return -1; + +	if (verbose) { +		fprintf(stderr, "cpu%d: MSR_RAPL_POWER_UNIT: 0x%08llx " +			"(%f Watts, %f Joules, %f sec.)\n", cpu, msr, +			rapl_power_units, rapl_energy_units, rapl_time_units); +	} +	if (do_rapl & RAPL_PKG_POWER_INFO) { + +		if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr)) +                	return -5; + + +		fprintf(stderr, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n", +			cpu, msr, +			((msr >>  0) & RAPL_POWER_GRANULARITY) * rapl_power_units, +			((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units, +			((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units, +			((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units); + +	} +	if (do_rapl & RAPL_PKG) { + +		if (get_msr(cpu, MSR_PKG_POWER_LIMIT, &msr)) +			return -9; + +		fprintf(stderr, "cpu%d: MSR_PKG_POWER_LIMIT: 0x%08llx (%slocked)\n", +			cpu, msr, (msr >> 63) & 1 ? "": "UN"); + +		print_power_limit_msr(cpu, msr, "PKG Limit #1"); +		fprintf(stderr, "cpu%d: PKG Limit #2: %sabled (%f Watts, %f* sec, clamp %sabled)\n", +			cpu, +			((msr >> 47) & 1) ? "EN" : "DIS", +			((msr >> 32) & 0x7FFF) * rapl_power_units, +			(1.0 + (((msr >> 54) & 0x3)/4.0)) * (1 << ((msr >> 49) & 0x1F)) * rapl_time_units, +			((msr >> 48) & 1) ? "EN" : "DIS"); +	} + +	if (do_rapl & RAPL_DRAM) { +		if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr)) +                	return -6; + + +		fprintf(stderr, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n", +			cpu, msr, +			((msr >>  0) & RAPL_POWER_GRANULARITY) * rapl_power_units, +			((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units, +			((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units, +			((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units); + + +		if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr)) +			return -9; +		fprintf(stderr, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n", +				cpu, msr, (msr >> 31) & 1 ? "": "UN"); + +		print_power_limit_msr(cpu, msr, "DRAM Limit"); +	} +	if (do_rapl & RAPL_CORE_POLICY) { +		if (verbose) { +			if (get_msr(cpu, MSR_PP0_POLICY, &msr)) +				return -7; + +			fprintf(stderr, "cpu%d: MSR_PP0_POLICY: %lld\n", cpu, msr & 0xF); +		} +	} +	if (do_rapl & RAPL_CORES) { +		if (verbose) { + +			if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr)) +				return -9; +			fprintf(stderr, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n", +					cpu, msr, (msr >> 31) & 1 ? "": "UN"); +			print_power_limit_msr(cpu, msr, "Cores Limit"); +		} +	} +	if (do_rapl & RAPL_GFX) { +		if (verbose) { +			if (get_msr(cpu, MSR_PP1_POLICY, &msr)) +				return -8; + +			fprintf(stderr, "cpu%d: MSR_PP1_POLICY: %lld\n", cpu, msr & 0xF); + +			if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr)) +				return -9; +			fprintf(stderr, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n", +					cpu, msr, (msr >> 31) & 1 ? "": "UN"); +			print_power_limit_msr(cpu, msr, "GFX Limit"); +		} +	} +	return 0; +} + + +int is_snb(unsigned int family, unsigned int model) +{ +	if (!genuine_intel) +		return 0; + +	switch (model) { +	case 0x2A: +	case 0x2D: +	case 0x3A:	/* IVB */ +	case 0x3E:	/* IVB Xeon */ +	case 0x3C:	/* HSW */ +	case 0x3F:	/* HSW */ +	case 0x45:	/* HSW */ +	case 0x46:	/* HSW */ +	case 0x3D:	/* BDW */ +	case 0x4F:	/* BDX */ +	case 0x56:	/* BDX-DE */ +		return 1; +	} +	return 0; +} + +int has_c8_c9_c10(unsigned int family, unsigned int model) +{ +	if (!genuine_intel) +		return 0; + +	switch (model) { +	case 0x45:	/* HSW */ +	case 0x3D:	/* BDW */ +		return 1; +	} +	return 0; +} + + +int is_slm(unsigned int family, unsigned int model) +{ +	if (!genuine_intel) +		return 0; +	switch (model) { +	case 0x37:	/* BYT */ +	case 0x4D:	/* AVN */ +		return 1; +	} +	return 0; +} + +#define SLM_BCLK_FREQS 5 +double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0}; + +double slm_bclk(void) +{ +	unsigned long long msr = 3; +	unsigned int i; +	double freq; + +	if (get_msr(0, MSR_FSB_FREQ, &msr)) +		fprintf(stderr, "SLM BCLK: unknown\n"); + +	i = msr & 0xf; +	if (i >= SLM_BCLK_FREQS) { +		fprintf(stderr, "SLM BCLK[%d] invalid\n", i); +		msr = 3; +	} +	freq = slm_freq_table[i]; + +	fprintf(stderr, "SLM BCLK: %.1f Mhz\n", freq); + +	return freq; +} + +double discover_bclk(unsigned int family, unsigned int model) +{ +	if (is_snb(family, model)) +		return 100.00; +	else if (is_slm(family, model)) +		return slm_bclk(); +	else +		return 133.33; +} + +/* + * MSR_IA32_TEMPERATURE_TARGET indicates the temperature where + * the Thermal Control Circuit (TCC) activates. + * This is usually equal to tjMax. + * + * Older processors do not have this MSR, so there we guess, + * but also allow cmdline over-ride with -T. + * + * Several MSR temperature values are in units of degrees-C + * below this value, including the Digital Thermal Sensor (DTS), + * Package Thermal Management Sensor (PTM), and thermal event thresholds. + */ +int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p) +{ +	unsigned long long msr; +	unsigned int target_c_local; +	int cpu; + +	/* tcc_activation_temp is used only for dts or ptm */ +	if (!(do_dts || do_ptm)) +		return 0; + +	/* this is a per-package concept */ +	if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) +		return 0; + +	cpu = t->cpu_id; +	if (cpu_migrate(cpu)) { +		fprintf(stderr, "Could not migrate to CPU %d\n", cpu); +		return -1; +	} + +	if (tcc_activation_temp_override != 0) { +		tcc_activation_temp = tcc_activation_temp_override; +		fprintf(stderr, "cpu%d: Using cmdline TCC Target (%d C)\n", +			cpu, tcc_activation_temp); +		return 0; +	} + +	/* Temperature Target MSR is Nehalem and newer only */ +	if (!do_nehalem_platform_info) +		goto guess; + +	if (get_msr(0, MSR_IA32_TEMPERATURE_TARGET, &msr)) +		goto guess; + +	target_c_local = (msr >> 16) & 0xFF; + +	if (verbose) +		fprintf(stderr, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n", +			cpu, msr, target_c_local); + +	if (!target_c_local) +		goto guess; + +	tcc_activation_temp = target_c_local; + +	return 0; + +guess: +	tcc_activation_temp = TJMAX_DEFAULT; +	fprintf(stderr, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n", +		cpu, tcc_activation_temp); + +	return 0; +} +void check_cpuid() +{ +	unsigned int eax, ebx, ecx, edx, max_level; +	unsigned int fms, family, model, stepping; + +	eax = ebx = ecx = edx = 0; + +	__get_cpuid(0, &max_level, &ebx, &ecx, &edx); + +	if (ebx == 0x756e6547 && edx == 0x49656e69 && ecx == 0x6c65746e) +		genuine_intel = 1; + +	if (verbose) +		fprintf(stderr, "CPUID(0): %.4s%.4s%.4s ", +			(char *)&ebx, (char *)&edx, (char *)&ecx); + +	__get_cpuid(1, &fms, &ebx, &ecx, &edx); +	family = (fms >> 8) & 0xf; +	model = (fms >> 4) & 0xf; +	stepping = fms & 0xf; +	if (family == 6 || family == 0xf) +		model += ((fms >> 16) & 0xf) << 4; + +	if (verbose) +		fprintf(stderr, "%d CPUID levels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n", +			max_level, family, model, stepping, family, model, stepping); + +	if (!(edx & (1 << 5))) +		errx(1, "CPUID: no MSR"); + +	/* +	 * check max extended function levels of CPUID. +	 * This is needed to check for invariant TSC. +	 * This check is valid for both Intel and AMD. +	 */ +	ebx = ecx = edx = 0; +	__get_cpuid(0x80000000, &max_level, &ebx, &ecx, &edx); + +	if (max_level < 0x80000007) +		errx(1, "CPUID: no invariant TSC (max_level 0x%x)", max_level); + +	/* +	 * Non-Stop TSC is advertised by CPUID.EAX=0x80000007: EDX.bit8 +	 * this check is valid for both Intel and AMD +	 */ +	__get_cpuid(0x80000007, &eax, &ebx, &ecx, &edx); +	has_invariant_tsc = edx & (1 << 8); + +	if (!has_invariant_tsc) +		errx(1, "No invariant TSC"); + +	/* +	 * APERF/MPERF is advertised by CPUID.EAX=0x6: ECX.bit0 +	 * this check is valid for both Intel and AMD +	 */ + +	__get_cpuid(0x6, &eax, &ebx, &ecx, &edx); +	has_aperf = ecx & (1 << 0); +	do_dts = eax & (1 << 0); +	do_ptm = eax & (1 << 6); +	has_epb = ecx & (1 << 3); + +	if (verbose) +		fprintf(stderr, "CPUID(6): %s%s%s%s\n", +			has_aperf ? "APERF" : "No APERF!", +			do_dts ? ", DTS" : "", +			do_ptm ? ", PTM": "", +			has_epb ? ", EPB": ""); + +	if (!has_aperf) +		errx(-1, "No APERF"); + +	do_nehalem_platform_info = genuine_intel && has_invariant_tsc; +	do_nhm_cstates = genuine_intel;	/* all Intel w/ non-stop TSC have NHM counters */ +	do_smi = do_nhm_cstates; +	do_snb_cstates = is_snb(family, model); +	do_c8_c9_c10 = has_c8_c9_c10(family, model); +	do_slm_cstates = is_slm(family, model); +	bclk = discover_bclk(family, model); + +	do_nehalem_turbo_ratio_limit = has_nehalem_turbo_ratio_limit(family, model); +	do_ivt_turbo_ratio_limit = has_ivt_turbo_ratio_limit(family, model); +	rapl_probe(family, model); + +	return; +} + + +void usage() +{ +	errx(1, "%s: [-v][-R][-T][-p|-P|-S][-c MSR#][-C MSR#][-m MSR#][-M MSR#][-i interval_sec | command ...]\n", +	     progname); +} + + +/* + * in /dev/cpu/ return success for names that are numbers + * ie. filter out ".", "..", "microcode". + */ +int dir_filter(const struct dirent *dirp) +{ +	if (isdigit(dirp->d_name[0])) +		return 1; +	else +		return 0; +} + +int open_dev_cpu_msr(int dummy1) +{ +	return 0; +} + +void topology_probe() +{ +	int i; +	int max_core_id = 0; +	int max_package_id = 0; +	int max_siblings = 0; +	struct cpu_topology { +		int core_id; +		int physical_package_id; +	} *cpus; + +	/* Initialize num_cpus, max_cpu_num */ +	topo.num_cpus = 0; +	topo.max_cpu_num = 0; +	for_all_proc_cpus(count_cpus); +	if (!summary_only && topo.num_cpus > 1) +		show_cpu = 1; + +	if (verbose > 1) +		fprintf(stderr, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num); + +	cpus = calloc(1, (topo.max_cpu_num  + 1) * sizeof(struct cpu_topology)); +	if (cpus == NULL) +		err(1, "calloc cpus"); + +	/* +	 * Allocate and initialize cpu_present_set +	 */ +	cpu_present_set = CPU_ALLOC((topo.max_cpu_num + 1)); +	if (cpu_present_set == NULL) +		err(3, "CPU_ALLOC"); +	cpu_present_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); +	CPU_ZERO_S(cpu_present_setsize, cpu_present_set); +	for_all_proc_cpus(mark_cpu_present); + +	/* +	 * Allocate and initialize cpu_affinity_set +	 */ +	cpu_affinity_set = CPU_ALLOC((topo.max_cpu_num + 1)); +	if (cpu_affinity_set == NULL) +		err(3, "CPU_ALLOC"); +	cpu_affinity_setsize = CPU_ALLOC_SIZE((topo.max_cpu_num + 1)); +	CPU_ZERO_S(cpu_affinity_setsize, cpu_affinity_set); + + +	/* +	 * For online cpus +	 * find max_core_id, max_package_id +	 */ +	for (i = 0; i <= topo.max_cpu_num; ++i) { +		int siblings; + +		if (cpu_is_not_present(i)) { +			if (verbose > 1) +				fprintf(stderr, "cpu%d NOT PRESENT\n", i); +			continue; +		} +		cpus[i].core_id = get_core_id(i); +		if (cpus[i].core_id > max_core_id) +			max_core_id = cpus[i].core_id; + +		cpus[i].physical_package_id = get_physical_package_id(i); +		if (cpus[i].physical_package_id > max_package_id) +			max_package_id = cpus[i].physical_package_id; + +		siblings = get_num_ht_siblings(i); +		if (siblings > max_siblings) +			max_siblings = siblings; +		if (verbose > 1) +			fprintf(stderr, "cpu %d pkg %d core %d\n", +				i, cpus[i].physical_package_id, cpus[i].core_id); +	} +	topo.num_cores_per_pkg = max_core_id + 1; +	if (verbose > 1) +		fprintf(stderr, "max_core_id %d, sizing for %d cores per package\n", +			max_core_id, topo.num_cores_per_pkg); +	if (!summary_only && topo.num_cores_per_pkg > 1) +		show_core = 1; + +	topo.num_packages = max_package_id + 1; +	if (verbose > 1) +		fprintf(stderr, "max_package_id %d, sizing for %d packages\n", +			max_package_id, topo.num_packages); +	if (!summary_only && topo.num_packages > 1) +		show_pkg = 1; + +	topo.num_threads_per_core = max_siblings; +	if (verbose > 1) +		fprintf(stderr, "max_siblings %d\n", max_siblings); + +	free(cpus); +} + +void +allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_data **p) +{ +	int i; + +	*t = calloc(topo.num_threads_per_core * topo.num_cores_per_pkg * +		topo.num_packages, sizeof(struct thread_data)); +	if (*t == NULL) +		goto error; + +	for (i = 0; i < topo.num_threads_per_core * +		topo.num_cores_per_pkg * topo.num_packages; i++) +		(*t)[i].cpu_id = -1; + +	*c = calloc(topo.num_cores_per_pkg * topo.num_packages, +		sizeof(struct core_data)); +	if (*c == NULL) +		goto error; + +	for (i = 0; i < topo.num_cores_per_pkg * topo.num_packages; i++) +		(*c)[i].core_id = -1; + +	*p = calloc(topo.num_packages, sizeof(struct pkg_data)); +	if (*p == NULL) +		goto error; + +	for (i = 0; i < topo.num_packages; i++) +		(*p)[i].package_id = i; + +	return; +error: +	err(1, "calloc counters"); +} +/* + * init_counter() + * + * set cpu_id, core_num, pkg_num + * set FIRST_THREAD_IN_CORE and FIRST_CORE_IN_PACKAGE + * + * increment topo.num_cores when 1st core in pkg seen + */ +void init_counter(struct thread_data *thread_base, struct core_data *core_base, +	struct pkg_data *pkg_base, int thread_num, int core_num, +	int pkg_num, int cpu_id) +{ +	struct thread_data *t; +	struct core_data *c; +	struct pkg_data *p; + +	t = GET_THREAD(thread_base, thread_num, core_num, pkg_num); +	c = GET_CORE(core_base, core_num, pkg_num); +	p = GET_PKG(pkg_base, pkg_num); + +	t->cpu_id = cpu_id; +	if (thread_num == 0) { +		t->flags |= CPU_IS_FIRST_THREAD_IN_CORE; +		if (cpu_is_first_core_in_package(cpu_id)) +			t->flags |= CPU_IS_FIRST_CORE_IN_PACKAGE; +	} + +	c->core_id = core_num; +	p->package_id = pkg_num; +} + + +int initialize_counters(int cpu_id) +{ +	int my_thread_id, my_core_id, my_package_id; + +	my_package_id = get_physical_package_id(cpu_id); +	my_core_id = get_core_id(cpu_id); + +	if (cpu_is_first_sibling_in_core(cpu_id)) { +		my_thread_id = 0; +		topo.num_cores++; +	} else { +		my_thread_id = 1; +	} + +	init_counter(EVEN_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id); +	init_counter(ODD_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id); +	return 0; +} + +void allocate_output_buffer() +{ +	output_buffer = calloc(1, (1 + topo.num_cpus) * 1024); +	outp = output_buffer; +	if (outp == NULL) +		err(-1, "calloc output buffer"); +} + +void setup_all_buffers(void) +{ +	topology_probe(); +	allocate_counters(&thread_even, &core_even, &package_even); +	allocate_counters(&thread_odd, &core_odd, &package_odd); +	allocate_output_buffer(); +	for_all_proc_cpus(initialize_counters); +} + +void turbostat_init() +{ +	check_cpuid(); + +	check_dev_msr(); +	check_super_user(); + +	setup_all_buffers(); + +	if (verbose) +		print_verbose_header(); + +	if (verbose) +		for_all_cpus(print_epb, ODD_COUNTERS); + +	if (verbose) +		for_all_cpus(print_rapl, ODD_COUNTERS); + +	for_all_cpus(set_temperature_target, ODD_COUNTERS); + +	if (verbose) +		for_all_cpus(print_thermal, ODD_COUNTERS); +} + +int fork_it(char **argv) +{ +	pid_t child_pid; +	int status; + +	status = for_all_cpus(get_counters, EVEN_COUNTERS); +	if (status) +		exit(status); +	/* clear affinity side-effect of get_counters() */ +	sched_setaffinity(0, cpu_present_setsize, cpu_present_set); +	gettimeofday(&tv_even, (struct timezone *)NULL); + +	child_pid = fork(); +	if (!child_pid) { +		/* child */ +		execvp(argv[0], argv); +	} else { + +		/* parent */ +		if (child_pid == -1) +			err(1, "fork"); + +		signal(SIGINT, SIG_IGN); +		signal(SIGQUIT, SIG_IGN); +		if (waitpid(child_pid, &status, 0) == -1) +			err(status, "waitpid"); +	} +	/* +	 * n.b. fork_it() does not check for errors from for_all_cpus() +	 * because re-starting is problematic when forking +	 */ +	for_all_cpus(get_counters, ODD_COUNTERS); +	gettimeofday(&tv_odd, (struct timezone *)NULL); +	timersub(&tv_odd, &tv_even, &tv_delta); +	for_all_cpus_2(delta_cpu, ODD_COUNTERS, EVEN_COUNTERS); +	compute_average(EVEN_COUNTERS); +	format_all_counters(EVEN_COUNTERS); +	flush_stderr(); + +	fprintf(stderr, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0); + +	return status; +} + +int get_and_dump_counters(void) +{ +	int status; + +	status = for_all_cpus(get_counters, ODD_COUNTERS); +	if (status) +		return status; + +	status = for_all_cpus(dump_counters, ODD_COUNTERS); +	if (status) +		return status; + +	flush_stdout(); + +	return status; +} + +void cmdline(int argc, char **argv) +{ +	int opt; + +	progname = argv[0]; + +	while ((opt = getopt(argc, argv, "+pPsSvi:c:C:m:M:RJT:")) != -1) { +		switch (opt) { +		case 'p': +			show_core_only++; +			break; +		case 'P': +			show_pkg_only++; +			break; +		case 's': +			dump_only++; +			break; +		case 'S': +			summary_only++; +			break; +		case 'v': +			verbose++; +			break; +		case 'i': +			interval_sec = atoi(optarg); +			break; +		case 'c': +			sscanf(optarg, "%x", &extra_delta_offset32); +			break; +		case 'C': +			sscanf(optarg, "%x", &extra_delta_offset64); +			break; +		case 'm': +			sscanf(optarg, "%x", &extra_msr_offset32); +			break; +		case 'M': +			sscanf(optarg, "%x", &extra_msr_offset64); +			break; +		case 'R': +			rapl_verbose++; +			break; +		case 'T': +			tcc_activation_temp_override = atoi(optarg); +			break; +		case 'J': +			rapl_joules++; +			break; + +		default: +			usage(); +		} +	} +} + +int main(int argc, char **argv) +{ +	cmdline(argc, argv); + +	if (verbose) +		fprintf(stderr, "turbostat v3.7 Feb 6, 2014" +			" - Len Brown <lenb@kernel.org>\n"); + +	turbostat_init(); + +	/* dump counters and exit */ +	if (dump_only) +		return get_and_dump_counters(); + +	/* +	 * if any params left, it must be a command to fork +	 */ +	if (argc - optind) +		return fork_it(argv + optind); +	else +		turbostat_loop(); + +	return 0; +} diff --git a/tools/power/x86/x86_energy_perf_policy/Makefile b/tools/power/x86/x86_energy_perf_policy/Makefile new file mode 100644 index 00000000000..971c9ffdcb5 --- /dev/null +++ b/tools/power/x86/x86_energy_perf_policy/Makefile @@ -0,0 +1,10 @@ +DESTDIR ?= + +x86_energy_perf_policy : x86_energy_perf_policy.c + +clean : +	rm -f x86_energy_perf_policy + +install : +	install x86_energy_perf_policy ${DESTDIR}/usr/bin/ +	install x86_energy_perf_policy.8 ${DESTDIR}/usr/share/man/man8/ diff --git a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8 b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8 new file mode 100644 index 00000000000..8eaaad648cd --- /dev/null +++ b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8 @@ -0,0 +1,104 @@ +.\"  This page Copyright (C) 2010 Len Brown <len.brown@intel.com> +.\"  Distributed under the GPL, Copyleft 1994. +.TH X86_ENERGY_PERF_POLICY 8 +.SH NAME +x86_energy_perf_policy \- read or write MSR_IA32_ENERGY_PERF_BIAS +.SH SYNOPSIS +.ft B +.B x86_energy_perf_policy +.RB [ "\-c cpu" ] +.RB [ "\-v" ] +.RB "\-r" +.br +.B x86_energy_perf_policy +.RB [ "\-c cpu" ] +.RB [ "\-v" ] +.RB 'performance' +.br +.B x86_energy_perf_policy +.RB [ "\-c cpu" ] +.RB [ "\-v" ] +.RB 'normal' +.br +.B x86_energy_perf_policy +.RB [ "\-c cpu" ] +.RB [ "\-v" ] +.RB 'powersave' +.br +.B x86_energy_perf_policy +.RB [ "\-c cpu" ] +.RB [ "\-v" ] +.RB n +.br +.SH DESCRIPTION +\fBx86_energy_perf_policy\fP +allows software to convey +its policy for the relative importance of performance +versus energy savings to the processor. + +The processor uses this information in model-specific ways +when it must select trade-offs between performance and +energy efficiency. + +This policy hint does not supersede Processor Performance states +(P-states) or CPU Idle power states (C-states), but allows +software to have influence where it would otherwise be unable +to express a preference. + +For example, this setting may tell the hardware how +aggressively or conservatively to control frequency +in the "turbo range" above the explicitly OS-controlled +P-state frequency range.  It may also tell the hardware +how aggressively is should enter the OS requested C-states. + +Support for this feature is indicated by CPUID.06H.ECX.bit3 +per the Intel Architectures Software Developer's Manual. + +.SS Options +\fB-c\fP limits operation to a single CPU. +The default is to operate on all CPUs. +Note that MSR_IA32_ENERGY_PERF_BIAS is defined per +logical processor, but that the initial implementations +of the MSR were shared among all processors in each package. +.PP +\fB-v\fP increases verbosity.  By default +x86_energy_perf_policy is silent. +.PP +\fB-r\fP is for "read-only" mode - the unchanged state +is read and displayed. +.PP +.I performance +Set a policy where performance is paramount. +The processor will be unwilling to sacrifice any performance +for the sake of energy saving. This is the hardware default. +.PP +.I normal +Set a policy with a normal balance between performance and energy efficiency. +The processor will tolerate minor performance compromise +for potentially significant energy savings. +This reasonable default for most desktops and servers. +.PP +.I powersave +Set a policy where the processor can accept +a measurable performance hit to maximize energy efficiency. +.PP +.I n +Set MSR_IA32_ENERGY_PERF_BIAS to the specified number. +The range of valid numbers is 0-15, where 0 is maximum +performance and 15 is maximum energy efficiency. + +.SH NOTES +.B "x86_energy_perf_policy " +runs only as root. +.SH FILES +.ta +.nf +/dev/cpu/*/msr +.fi + +.SH "SEE ALSO" +msr(4) +.PP +.SH AUTHORS +.nf +Written by Len Brown <len.brown@intel.com> diff --git a/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c new file mode 100644 index 00000000000..40b3e5482f8 --- /dev/null +++ b/tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c @@ -0,0 +1,324 @@ +/* + * x86_energy_perf_policy -- set the energy versus performance + * policy preference bias on recent X86 processors. + */ +/* + * Copyright (c) 2010, Intel Corporation. + * Len Brown <len.brown@intel.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <stdio.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/resource.h> +#include <fcntl.h> +#include <signal.h> +#include <sys/time.h> +#include <stdlib.h> +#include <string.h> + +unsigned int verbose;		/* set with -v */ +unsigned int read_only;		/* set with -r */ +char *progname; +unsigned long long new_bias; +int cpu = -1; + +/* + * Usage: + * + * -c cpu: limit action to a single CPU (default is all CPUs) + * -v: verbose output (can invoke more than once) + * -r: read-only, don't change any settings + * + *  performance + *	Performance is paramount. + *	Unwilling to sacrifice any performance + *	for the sake of energy saving. (hardware default) + * + *  normal + *	Can tolerate minor performance compromise + *	for potentially significant energy savings. + *	(reasonable default for most desktops and servers) + * + *  powersave + *	Can tolerate significant performance hit + *	to maximize energy savings. + * + * n + *	a numerical value to write to the underlying MSR. + */ +void usage(void) +{ +	printf("%s: [-c cpu] [-v] " +		"(-r | 'performance' | 'normal' | 'powersave' | n)\n", +		progname); +	exit(1); +} + +#define MSR_IA32_ENERGY_PERF_BIAS	0x000001b0 + +#define	BIAS_PERFORMANCE		0 +#define BIAS_BALANCE			6 +#define	BIAS_POWERSAVE			15 + +void cmdline(int argc, char **argv) +{ +	int opt; + +	progname = argv[0]; + +	while ((opt = getopt(argc, argv, "+rvc:")) != -1) { +		switch (opt) { +		case 'c': +			cpu = atoi(optarg); +			break; +		case 'r': +			read_only = 1; +			break; +		case 'v': +			verbose++; +			break; +		default: +			usage(); +		} +	} +	/* if -r, then should be no additional optind */ +	if (read_only && (argc > optind)) +		usage(); + +	/* +	 * if no -r , then must be one additional optind +	 */ +	if (!read_only) { + +		if (argc != optind + 1) { +			printf("must supply -r or policy param\n"); +			usage(); +			} + +		if (!strcmp("performance", argv[optind])) { +			new_bias = BIAS_PERFORMANCE; +		} else if (!strcmp("normal", argv[optind])) { +			new_bias = BIAS_BALANCE; +		} else if (!strcmp("powersave", argv[optind])) { +			new_bias = BIAS_POWERSAVE; +		} else { +			char *endptr; + +			new_bias = strtoull(argv[optind], &endptr, 0); +			if (endptr == argv[optind] || +				new_bias > BIAS_POWERSAVE) { +					fprintf(stderr, "invalid value: %s\n", +						argv[optind]); +				usage(); +			} +		} +	} +} + +/* + * validate_cpuid() + * returns on success, quietly exits on failure (make verbose with -v) + */ +void validate_cpuid(void) +{ +	unsigned int eax, ebx, ecx, edx, max_level; +	unsigned int fms, family, model, stepping; + +	eax = ebx = ecx = edx = 0; + +	asm("cpuid" : "=a" (max_level), "=b" (ebx), "=c" (ecx), +		"=d" (edx) : "a" (0)); + +	if (ebx != 0x756e6547 || edx != 0x49656e69 || ecx != 0x6c65746e) { +		if (verbose) +			fprintf(stderr, "%.4s%.4s%.4s != GenuineIntel", +				(char *)&ebx, (char *)&edx, (char *)&ecx); +		exit(1); +	} + +	asm("cpuid" : "=a" (fms), "=c" (ecx), "=d" (edx) : "a" (1) : "ebx"); +	family = (fms >> 8) & 0xf; +	model = (fms >> 4) & 0xf; +	stepping = fms & 0xf; +	if (family == 6 || family == 0xf) +		model += ((fms >> 16) & 0xf) << 4; + +	if (verbose > 1) +		printf("CPUID %d levels family:model:stepping " +			"0x%x:%x:%x (%d:%d:%d)\n", max_level, +			family, model, stepping, family, model, stepping); + +	if (!(edx & (1 << 5))) { +		if (verbose) +			printf("CPUID: no MSR\n"); +		exit(1); +	} + +	/* +	 * Support for MSR_IA32_ENERGY_PERF_BIAS +	 * is indicated by CPUID.06H.ECX.bit3 +	 */ +	asm("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (6)); +	if (verbose) +		printf("CPUID.06H.ECX: 0x%x\n", ecx); +	if (!(ecx & (1 << 3))) { +		if (verbose) +			printf("CPUID: No MSR_IA32_ENERGY_PERF_BIAS\n"); +		exit(1); +	} +	return;	/* success */ +} + +unsigned long long get_msr(int cpu, int offset) +{ +	unsigned long long msr; +	char msr_path[32]; +	int retval; +	int fd; + +	sprintf(msr_path, "/dev/cpu/%d/msr", cpu); +	fd = open(msr_path, O_RDONLY); +	if (fd < 0) { +		printf("Try \"# modprobe msr\"\n"); +		perror(msr_path); +		exit(1); +	} + +	retval = pread(fd, &msr, sizeof msr, offset); + +	if (retval != sizeof msr) { +		printf("pread cpu%d 0x%x = %d\n", cpu, offset, retval); +		exit(-2); +	} +	close(fd); +	return msr; +} + +unsigned long long  put_msr(int cpu, unsigned long long new_msr, int offset) +{ +	unsigned long long old_msr; +	char msr_path[32]; +	int retval; +	int fd; + +	sprintf(msr_path, "/dev/cpu/%d/msr", cpu); +	fd = open(msr_path, O_RDWR); +	if (fd < 0) { +		perror(msr_path); +		exit(1); +	} + +	retval = pread(fd, &old_msr, sizeof old_msr, offset); +	if (retval != sizeof old_msr) { +		perror("pwrite"); +		printf("pread cpu%d 0x%x = %d\n", cpu, offset, retval); +		exit(-2); +	} + +	retval = pwrite(fd, &new_msr, sizeof new_msr, offset); +	if (retval != sizeof new_msr) { +		perror("pwrite"); +		printf("pwrite cpu%d 0x%x = %d\n", cpu, offset, retval); +		exit(-2); +	} + +	close(fd); + +	return old_msr; +} + +void print_msr(int cpu) +{ +	printf("cpu%d: 0x%016llx\n", +		cpu, get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS)); +} + +void update_msr(int cpu) +{ +	unsigned long long previous_msr; + +	previous_msr = put_msr(cpu, new_bias, MSR_IA32_ENERGY_PERF_BIAS); + +	if (verbose) +		printf("cpu%d  msr0x%x 0x%016llx -> 0x%016llx\n", +			cpu, MSR_IA32_ENERGY_PERF_BIAS, previous_msr, new_bias); + +	return; +} + +char *proc_stat = "/proc/stat"; +/* + * run func() on every cpu in /dev/cpu + */ +void for_every_cpu(void (func)(int)) +{ +	FILE *fp; +	int retval; + +	fp = fopen(proc_stat, "r"); +	if (fp == NULL) { +		perror(proc_stat); +		exit(1); +	} + +	retval = fscanf(fp, "cpu %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n"); +	if (retval != 0) { +		perror("/proc/stat format"); +		exit(1); +	} + +	while (1) { +		int cpu; + +		retval = fscanf(fp, +			"cpu%u %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d\n", +			&cpu); +		if (retval != 1) +			break; + +		func(cpu); +	} +	fclose(fp); +} + +int main(int argc, char **argv) +{ +	cmdline(argc, argv); + +	if (verbose > 1) +		printf("x86_energy_perf_policy Nov 24, 2010" +				" - Len Brown <lenb@kernel.org>\n"); +	if (verbose > 1 && !read_only) +		printf("new_bias %lld\n", new_bias); + +	validate_cpuid(); + +	if (cpu != -1) { +		if (read_only) +			print_msr(cpu); +		else +			update_msr(cpu); +	} else { +		if (read_only) +			for_every_cpu(print_msr); +		else +			for_every_cpu(update_msr); +	} + +	return 0; +}  | 
