diff options
Diffstat (limited to 'arch/powerpc/perf/power8-pmu.c')
| -rw-r--r-- | arch/powerpc/perf/power8-pmu.c | 229 | 
1 files changed, 209 insertions, 20 deletions
diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c index 2ee4a707f0d..639cd915658 100644 --- a/arch/powerpc/perf/power8-pmu.c +++ b/arch/powerpc/perf/power8-pmu.c @@ -10,6 +10,8 @@   * 2 of the License, or (at your option) any later version.   */ +#define pr_fmt(fmt)	"power8-pmu: " fmt +  #include <linux/kernel.h>  #include <linux/perf_event.h>  #include <asm/firmware.h> @@ -25,15 +27,48 @@  #define PM_BRU_FIN			0x10068  #define PM_BR_MPRED_CMPL		0x400f6 +/* All L1 D cache load references counted at finish, gated by reject */ +#define PM_LD_REF_L1			0x100ee +/* Load Missed L1 */ +#define PM_LD_MISS_L1			0x3e054 +/* Store Missed L1 */ +#define PM_ST_MISS_L1			0x300f0 +/* L1 cache data prefetches */ +#define PM_L1_PREF			0x0d8b8 +/* Instruction fetches from L1 */ +#define PM_INST_FROM_L1			0x04080 +/* Demand iCache Miss */ +#define PM_L1_ICACHE_MISS		0x200fd +/* Instruction Demand sectors wriittent into IL1 */ +#define PM_L1_DEMAND_WRITE		0x0408c +/* Instruction prefetch written into IL1 */ +#define PM_IC_PREF_WRITE		0x0408e +/* The data cache was reloaded from local core's L3 due to a demand load */ +#define PM_DATA_FROM_L3			0x4c042 +/* Demand LD - L3 Miss (not L2 hit and not L3 hit) */ +#define PM_DATA_FROM_L3MISS		0x300fe +/* All successful D-side store dispatches for this thread */ +#define PM_L2_ST			0x17080 +/* All successful D-side store dispatches for this thread that were L2 Miss */ +#define PM_L2_ST_MISS			0x17082 +/* Total HW L3 prefetches(Load+store) */ +#define PM_L3_PREF_ALL			0x4e052 +/* Data PTEG reload */ +#define PM_DTLB_MISS			0x300fc +/* ITLB Reloaded */ +#define PM_ITLB_MISS			0x400fc +  /*   * Raw event encoding for POWER8:   *   *        60        56        52        48        44        40        36        32   * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - *   |                                 [      thresh_cmp     ]   [  thresh_ctl   ] - *   |                                                                   | - *   *- EBB (Linux)                      thresh start/stop OR FAB match -* + *   | | [ ]                           [      thresh_cmp     ]   [  thresh_ctl   ] + *   | |  |                                                              | + *   | |  *- IFM (Linux)                 thresh start/stop OR FAB match -* + *   | *- BHRB (Linux) + *   *- EBB (Linux)   *   *        28        24        20        16        12         8         4         0   * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | @@ -83,9 +118,18 @@   *	MMCRA[57:59] = sample[0:2]	(RAND_SAMP_ELIG)   *	MMCRA[61:62] = sample[3:4]	(RAND_SAMP_MODE)   * + * if EBB and BHRB: + *	MMCRA[32:33] = IFM + *   */  #define EVENT_EBB_MASK		1ull +#define EVENT_EBB_SHIFT		PERF_EVENT_CONFIG_EBB_SHIFT +#define EVENT_BHRB_MASK		1ull +#define EVENT_BHRB_SHIFT	62 +#define EVENT_WANTS_BHRB	(EVENT_BHRB_MASK << EVENT_BHRB_SHIFT) +#define EVENT_IFM_MASK		3ull +#define EVENT_IFM_SHIFT		60  #define EVENT_THR_CMP_SHIFT	40	/* Threshold CMP value */  #define EVENT_THR_CMP_MASK	0x3ff  #define EVENT_THR_CTL_SHIFT	32	/* Threshold control value (start/stop) */ @@ -110,6 +154,12 @@  #define EVENT_IS_MARKED		(EVENT_MARKED_MASK << EVENT_MARKED_SHIFT)  #define EVENT_PSEL_MASK		0xff	/* PMCxSEL value */ +/* Bits defined by Linux */ +#define EVENT_LINUX_MASK	\ +	((EVENT_EBB_MASK  << EVENT_EBB_SHIFT)			|	\ +	 (EVENT_BHRB_MASK << EVENT_BHRB_SHIFT)			|	\ +	 (EVENT_IFM_MASK  << EVENT_IFM_SHIFT)) +  #define EVENT_VALID_MASK	\  	((EVENT_THRESH_MASK    << EVENT_THRESH_SHIFT)		|	\  	 (EVENT_SAMPLE_MASK    << EVENT_SAMPLE_SHIFT)		|	\ @@ -118,7 +168,7 @@  	 (EVENT_UNIT_MASK      << EVENT_UNIT_SHIFT)		|	\  	 (EVENT_COMBINE_MASK   << EVENT_COMBINE_SHIFT)		|	\  	 (EVENT_MARKED_MASK    << EVENT_MARKED_SHIFT)		|	\ -	 (EVENT_EBB_MASK       << PERF_EVENT_CONFIG_EBB_SHIFT)	|	\ +	  EVENT_LINUX_MASK					|	\  	  EVENT_PSEL_MASK)  /* MMCRA IFM bits - POWER8 */ @@ -142,10 +192,11 @@   *   *        28        24        20        16        12         8         4         0   * | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - - - - | - *                   |   [ ]   [  sample ]   [     ]   [6] [5]   [4] [3]   [2] [1] - *              EBB -*    |                     | - *                        |                     |      Count of events for each PMC. - *      L1 I/D qualifier -*                     |        p1, p2, p3, p4, p5, p6. + *               [ ] |   [ ]   [  sample ]   [     ]   [6] [5]   [4] [3]   [2] [1] + *                |  |    |                     | + *      BHRB IFM -*  |    |                     |      Count of events for each PMC. + *              EBB -*    |                     |        p1, p2, p3, p4, p5, p6. + *      L1 I/D qualifier -*                     |   *                     nc - number of counters -*   *   * The PMC fields P1..P6, and NC, are adder fields. As we accumulate constraints @@ -164,6 +215,9 @@  #define CNST_EBB_VAL(v)		(((v) & EVENT_EBB_MASK) << 24)  #define CNST_EBB_MASK		CNST_EBB_VAL(EVENT_EBB_MASK) +#define CNST_IFM_VAL(v)		(((v) & EVENT_IFM_MASK) << 25) +#define CNST_IFM_MASK		CNST_IFM_VAL(EVENT_IFM_MASK) +  #define CNST_L1_QUAL_VAL(v)	(((v) & 3) << 22)  #define CNST_L1_QUAL_MASK	CNST_L1_QUAL_VAL(3) @@ -199,6 +253,7 @@  #define MMCR1_UNIT_SHIFT(pmc)		(60 - (4 * ((pmc) - 1)))  #define MMCR1_COMBINE_SHIFT(pmc)	(35 - ((pmc) - 1))  #define MMCR1_PMCSEL_SHIFT(pmc)		(24 - (((pmc) - 1)) * 8) +#define MMCR1_FAB_SHIFT			36  #define MMCR1_DC_QUAL_SHIFT		47  #define MMCR1_IC_QUAL_SHIFT		46 @@ -209,6 +264,7 @@  #define MMCRA_THR_SEL_SHIFT		16  #define MMCRA_THR_CMP_SHIFT		32  #define MMCRA_SDAR_MODE_TLB		(1ull << 42) +#define MMCRA_IFM_SHIFT			30  static inline bool event_is_fab_match(u64 event) @@ -233,20 +289,22 @@ static int power8_get_constraint(u64 event, unsigned long *maskp, unsigned long  	pmc   = (event >> EVENT_PMC_SHIFT)        & EVENT_PMC_MASK;  	unit  = (event >> EVENT_UNIT_SHIFT)       & EVENT_UNIT_MASK;  	cache = (event >> EVENT_CACHE_SEL_SHIFT)  & EVENT_CACHE_SEL_MASK; -	ebb   = (event >> PERF_EVENT_CONFIG_EBB_SHIFT) & EVENT_EBB_MASK; - -	/* Clear the EBB bit in the event, so event checks work below */ -	event &= ~(EVENT_EBB_MASK << PERF_EVENT_CONFIG_EBB_SHIFT); +	ebb   = (event >> EVENT_EBB_SHIFT)        & EVENT_EBB_MASK;  	if (pmc) { +		u64 base_event; +  		if (pmc > 6)  			return -1; -		mask  |= CNST_PMC_MASK(pmc); -		value |= CNST_PMC_VAL(pmc); +		/* Ignore Linux defined bits when checking event below */ +		base_event = event & ~EVENT_LINUX_MASK; -		if (pmc >= 5 && event != 0x500fa && event != 0x600f4) +		if (pmc >= 5 && base_event != 0x500fa && base_event != 0x600f4)  			return -1; + +		mask  |= CNST_PMC_MASK(pmc); +		value |= CNST_PMC_VAL(pmc);  	}  	if (pmc <= 4) { @@ -267,9 +325,10 @@ static int power8_get_constraint(u64 event, unsigned long *maskp, unsigned long  		 * HV writable, and there is no API for guest kernels to modify  		 * it. The solution is for the hypervisor to initialise the  		 * field to zeroes, and for us to only ever allow events that -		 * have a cache selector of zero. +		 * have a cache selector of zero. The bank selector (bit 3) is +		 * irrelevant, as long as the rest of the value is 0.  		 */ -		if (cache) +		if (cache & 0x7)  			return -1;  	} else if (event & EVENT_IS_L1) { @@ -310,6 +369,15 @@ static int power8_get_constraint(u64 event, unsigned long *maskp, unsigned long  		/* EBB events must specify the PMC */  		return -1; +	if (event & EVENT_WANTS_BHRB) { +		if (!ebb) +			/* Only EBB events can request BHRB */ +			return -1; + +		mask  |= CNST_IFM_MASK; +		value |= CNST_IFM_VAL(event >> EVENT_IFM_SHIFT); +	} +  	/*  	 * All events must agree on EBB, either all request it or none.  	 * EBB events are pinned & exclusive, so this should never actually @@ -388,8 +456,8 @@ static int power8_compute_mmcr(u64 event[], int n_ev,  		 * the threshold bits are used for the match value.  		 */  		if (event_is_fab_match(event[i])) { -			mmcr1 |= (event[i] >> EVENT_THR_CTL_SHIFT) & -				  EVENT_THR_CTL_MASK; +			mmcr1 |= ((event[i] >> EVENT_THR_CTL_SHIFT) & +				  EVENT_THR_CTL_MASK) << MMCR1_FAB_SHIFT;  		} else {  			val = (event[i] >> EVENT_THR_CTL_SHIFT) & EVENT_THR_CTL_MASK;  			mmcra |= val << MMCRA_THR_CTL_SHIFT; @@ -399,6 +467,11 @@ static int power8_compute_mmcr(u64 event[], int n_ev,  			mmcra |= val << MMCRA_THR_CMP_SHIFT;  		} +		if (event[i] & EVENT_WANTS_BHRB) { +			val = (event[i] >> EVENT_IFM_SHIFT) & EVENT_IFM_MASK; +			mmcra |= val << MMCRA_IFM_SHIFT; +		} +  		hwc[i] = pmc - 1;  	} @@ -556,6 +629,8 @@ static int power8_generic_events[] = {  	[PERF_COUNT_HW_INSTRUCTIONS] =			PM_INST_CMPL,  	[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] =		PM_BRU_FIN,  	[PERF_COUNT_HW_BRANCH_MISSES] =			PM_BR_MPRED_CMPL, +	[PERF_COUNT_HW_CACHE_REFERENCES] =		PM_LD_REF_L1, +	[PERF_COUNT_HW_CACHE_MISSES] =			PM_LD_MISS_L1,  };  static u64 power8_bhrb_filter_map(u64 branch_sample_type) @@ -595,6 +670,116 @@ static void power8_config_bhrb(u64 pmu_bhrb_filter)  	mtspr(SPRN_MMCRA, (mfspr(SPRN_MMCRA) | pmu_bhrb_filter));  } +#define C(x)	PERF_COUNT_HW_CACHE_##x + +/* + * Table of generalized cache-related events. + * 0 means not supported, -1 means nonsensical, other values + * are event codes. + */ +static int power8_cache_events[C(MAX)][C(OP_MAX)][C(RESULT_MAX)] = { +	[ C(L1D) ] = { +		[ C(OP_READ) ] = { +			[ C(RESULT_ACCESS) ] = PM_LD_REF_L1, +			[ C(RESULT_MISS)   ] = PM_LD_MISS_L1, +		}, +		[ C(OP_WRITE) ] = { +			[ C(RESULT_ACCESS) ] = 0, +			[ C(RESULT_MISS)   ] = PM_ST_MISS_L1, +		}, +		[ C(OP_PREFETCH) ] = { +			[ C(RESULT_ACCESS) ] = PM_L1_PREF, +			[ C(RESULT_MISS)   ] = 0, +		}, +	}, +	[ C(L1I) ] = { +		[ C(OP_READ) ] = { +			[ C(RESULT_ACCESS) ] = PM_INST_FROM_L1, +			[ C(RESULT_MISS)   ] = PM_L1_ICACHE_MISS, +		}, +		[ C(OP_WRITE) ] = { +			[ C(RESULT_ACCESS) ] = PM_L1_DEMAND_WRITE, +			[ C(RESULT_MISS)   ] = -1, +		}, +		[ C(OP_PREFETCH) ] = { +			[ C(RESULT_ACCESS) ] = PM_IC_PREF_WRITE, +			[ C(RESULT_MISS)   ] = 0, +		}, +	}, +	[ C(LL) ] = { +		[ C(OP_READ) ] = { +			[ C(RESULT_ACCESS) ] = PM_DATA_FROM_L3, +			[ C(RESULT_MISS)   ] = PM_DATA_FROM_L3MISS, +		}, +		[ C(OP_WRITE) ] = { +			[ C(RESULT_ACCESS) ] = PM_L2_ST, +			[ C(RESULT_MISS)   ] = PM_L2_ST_MISS, +		}, +		[ C(OP_PREFETCH) ] = { +			[ C(RESULT_ACCESS) ] = PM_L3_PREF_ALL, +			[ C(RESULT_MISS)   ] = 0, +		}, +	}, +	[ C(DTLB) ] = { +		[ C(OP_READ) ] = { +			[ C(RESULT_ACCESS) ] = 0, +			[ C(RESULT_MISS)   ] = PM_DTLB_MISS, +		}, +		[ C(OP_WRITE) ] = { +			[ C(RESULT_ACCESS) ] = -1, +			[ C(RESULT_MISS)   ] = -1, +		}, +		[ C(OP_PREFETCH) ] = { +			[ C(RESULT_ACCESS) ] = -1, +			[ C(RESULT_MISS)   ] = -1, +		}, +	}, +	[ C(ITLB) ] = { +		[ C(OP_READ) ] = { +			[ C(RESULT_ACCESS) ] = 0, +			[ C(RESULT_MISS)   ] = PM_ITLB_MISS, +		}, +		[ C(OP_WRITE) ] = { +			[ C(RESULT_ACCESS) ] = -1, +			[ C(RESULT_MISS)   ] = -1, +		}, +		[ C(OP_PREFETCH) ] = { +			[ C(RESULT_ACCESS) ] = -1, +			[ C(RESULT_MISS)   ] = -1, +		}, +	}, +	[ C(BPU) ] = { +		[ C(OP_READ) ] = { +			[ C(RESULT_ACCESS) ] = PM_BRU_FIN, +			[ C(RESULT_MISS)   ] = PM_BR_MPRED_CMPL, +		}, +		[ C(OP_WRITE) ] = { +			[ C(RESULT_ACCESS) ] = -1, +			[ C(RESULT_MISS)   ] = -1, +		}, +		[ C(OP_PREFETCH) ] = { +			[ C(RESULT_ACCESS) ] = -1, +			[ C(RESULT_MISS)   ] = -1, +		}, +	}, +	[ C(NODE) ] = { +		[ C(OP_READ) ] = { +			[ C(RESULT_ACCESS) ] = -1, +			[ C(RESULT_MISS)   ] = -1, +		}, +		[ C(OP_WRITE) ] = { +			[ C(RESULT_ACCESS) ] = -1, +			[ C(RESULT_MISS)   ] = -1, +		}, +		[ C(OP_PREFETCH) ] = { +			[ C(RESULT_ACCESS) ] = -1, +			[ C(RESULT_MISS)   ] = -1, +		}, +	}, +}; + +#undef C +  static struct power_pmu power8_pmu = {  	.name			= "POWER8",  	.n_counter		= 6, @@ -607,9 +792,10 @@ static struct power_pmu power8_pmu = {  	.get_constraint		= power8_get_constraint,  	.get_alternatives	= power8_get_alternatives,  	.disable_pmc		= power8_disable_pmc, -	.flags			= PPMU_HAS_SSLOT | PPMU_HAS_SIER | PPMU_BHRB | PPMU_EBB, +	.flags			= PPMU_HAS_SSLOT | PPMU_HAS_SIER | PPMU_ARCH_207S,  	.n_generic		= ARRAY_SIZE(power8_generic_events),  	.generic_events		= power8_generic_events, +	.cache_events		= &power8_cache_events,  	.attr_groups		= power8_pmu_attr_groups,  	.bhrb_nr		= 32,  }; @@ -629,6 +815,9 @@ static int __init init_power8_pmu(void)  	/* Tell userspace that EBB is supported */  	cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_EBB; +	if (cpu_has_feature(CPU_FTR_PMAO_BUG)) +		pr_info("PMAO restore workaround active.\n"); +  	return 0;  }  early_initcall(init_power8_pmu);  | 
