diff options
Diffstat (limited to 'arch/x86/include/asm/perf_event_p4.h')
| -rw-r--r-- | arch/x86/include/asm/perf_event_p4.h | 164 |
1 files changed, 124 insertions, 40 deletions
diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h index a70cd216be5..85e13ccf15c 100644 --- a/arch/x86/include/asm/perf_event_p4.h +++ b/arch/x86/include/asm/perf_event_p4.h @@ -1,5 +1,5 @@ /* - * Netburst Perfomance Events (P4, old Xeon) + * Netburst Performance Events (P4, old Xeon) */ #ifndef PERF_EVENT_P4_H @@ -9,7 +9,7 @@ #include <linux/bitops.h> /* - * NetBurst has perfomance MSRs shared between + * NetBurst has performance MSRs shared between * threads if HT is turned on, ie for both logical * processors (mem: in turn in Atom with HT support * perf-MSRs are not shared and every thread has its @@ -20,45 +20,49 @@ #define ARCH_P4_MAX_ESCR (ARCH_P4_TOTAL_ESCR - ARCH_P4_RESERVED_ESCR) #define ARCH_P4_MAX_CCCR (18) -#define P4_ESCR_EVENT_MASK 0x7e000000U +#define ARCH_P4_CNTRVAL_BITS (40) +#define ARCH_P4_CNTRVAL_MASK ((1ULL << ARCH_P4_CNTRVAL_BITS) - 1) +#define ARCH_P4_UNFLAGGED_BIT ((1ULL) << (ARCH_P4_CNTRVAL_BITS - 1)) + +#define P4_ESCR_EVENT_MASK 0x7e000000ULL #define P4_ESCR_EVENT_SHIFT 25 -#define P4_ESCR_EVENTMASK_MASK 0x01fffe00U +#define P4_ESCR_EVENTMASK_MASK 0x01fffe00ULL #define P4_ESCR_EVENTMASK_SHIFT 9 -#define P4_ESCR_TAG_MASK 0x000001e0U +#define P4_ESCR_TAG_MASK 0x000001e0ULL #define P4_ESCR_TAG_SHIFT 5 -#define P4_ESCR_TAG_ENABLE 0x00000010U -#define P4_ESCR_T0_OS 0x00000008U -#define P4_ESCR_T0_USR 0x00000004U -#define P4_ESCR_T1_OS 0x00000002U -#define P4_ESCR_T1_USR 0x00000001U +#define P4_ESCR_TAG_ENABLE 0x00000010ULL +#define P4_ESCR_T0_OS 0x00000008ULL +#define P4_ESCR_T0_USR 0x00000004ULL +#define P4_ESCR_T1_OS 0x00000002ULL +#define P4_ESCR_T1_USR 0x00000001ULL #define P4_ESCR_EVENT(v) ((v) << P4_ESCR_EVENT_SHIFT) #define P4_ESCR_EMASK(v) ((v) << P4_ESCR_EVENTMASK_SHIFT) #define P4_ESCR_TAG(v) ((v) << P4_ESCR_TAG_SHIFT) -#define P4_CCCR_OVF 0x80000000U -#define P4_CCCR_CASCADE 0x40000000U -#define P4_CCCR_OVF_PMI_T0 0x04000000U -#define P4_CCCR_OVF_PMI_T1 0x08000000U -#define P4_CCCR_FORCE_OVF 0x02000000U -#define P4_CCCR_EDGE 0x01000000U -#define P4_CCCR_THRESHOLD_MASK 0x00f00000U +#define P4_CCCR_OVF 0x80000000ULL +#define P4_CCCR_CASCADE 0x40000000ULL +#define P4_CCCR_OVF_PMI_T0 0x04000000ULL +#define P4_CCCR_OVF_PMI_T1 0x08000000ULL +#define P4_CCCR_FORCE_OVF 0x02000000ULL +#define P4_CCCR_EDGE 0x01000000ULL +#define P4_CCCR_THRESHOLD_MASK 0x00f00000ULL #define P4_CCCR_THRESHOLD_SHIFT 20 -#define P4_CCCR_COMPLEMENT 0x00080000U -#define P4_CCCR_COMPARE 0x00040000U -#define P4_CCCR_ESCR_SELECT_MASK 0x0000e000U +#define P4_CCCR_COMPLEMENT 0x00080000ULL +#define P4_CCCR_COMPARE 0x00040000ULL +#define P4_CCCR_ESCR_SELECT_MASK 0x0000e000ULL #define P4_CCCR_ESCR_SELECT_SHIFT 13 -#define P4_CCCR_ENABLE 0x00001000U -#define P4_CCCR_THREAD_SINGLE 0x00010000U -#define P4_CCCR_THREAD_BOTH 0x00020000U -#define P4_CCCR_THREAD_ANY 0x00030000U -#define P4_CCCR_RESERVED 0x00000fffU +#define P4_CCCR_ENABLE 0x00001000ULL +#define P4_CCCR_THREAD_SINGLE 0x00010000ULL +#define P4_CCCR_THREAD_BOTH 0x00020000ULL +#define P4_CCCR_THREAD_ANY 0x00030000ULL +#define P4_CCCR_RESERVED 0x00000fffULL #define P4_CCCR_THRESHOLD(v) ((v) << P4_CCCR_THRESHOLD_SHIFT) #define P4_CCCR_ESEL(v) ((v) << P4_CCCR_ESCR_SELECT_SHIFT) #define P4_GEN_ESCR_EMASK(class, name, bit) \ - class##__##name = ((1 << bit) << P4_ESCR_EVENTMASK_SHIFT) + class##__##name = ((1ULL << bit) << P4_ESCR_EVENTMASK_SHIFT) #define P4_ESCR_EMASK_BIT(class, name) class##__##name /* @@ -98,6 +102,14 @@ #define P4_CONFIG_HT (1ULL << P4_CONFIG_HT_SHIFT) /* + * If an event has alias it should be marked + * with a special bit. (Don't forget to check + * P4_PEBS_CONFIG_MASK and related bits on + * modification.) + */ +#define P4_CONFIG_ALIASABLE (1ULL << 9) + +/* * The bits we allow to pass for RAW events */ #define P4_CONFIG_MASK_ESCR \ @@ -119,6 +131,31 @@ (p4_config_pack_escr(P4_CONFIG_MASK_ESCR)) | \ (p4_config_pack_cccr(P4_CONFIG_MASK_CCCR)) +/* + * In case of event aliasing we need to preserve some + * caller bits, otherwise the mapping won't be complete. + */ +#define P4_CONFIG_EVENT_ALIAS_MASK \ + (p4_config_pack_escr(P4_CONFIG_MASK_ESCR) | \ + p4_config_pack_cccr(P4_CCCR_EDGE | \ + P4_CCCR_THRESHOLD_MASK | \ + P4_CCCR_COMPLEMENT | \ + P4_CCCR_COMPARE)) + +#define P4_CONFIG_EVENT_ALIAS_IMMUTABLE_BITS \ + ((P4_CONFIG_HT) | \ + p4_config_pack_escr(P4_ESCR_T0_OS | \ + P4_ESCR_T0_USR | \ + P4_ESCR_T1_OS | \ + P4_ESCR_T1_USR) | \ + p4_config_pack_cccr(P4_CCCR_OVF | \ + P4_CCCR_CASCADE | \ + P4_CCCR_FORCE_OVF | \ + P4_CCCR_THREAD_ANY | \ + P4_CCCR_OVF_PMI_T0 | \ + P4_CCCR_OVF_PMI_T1 | \ + P4_CONFIG_ALIASABLE)) + static inline bool p4_is_event_cascaded(u64 config) { u32 cccr = p4_config_unpack_cccr(config); @@ -744,28 +781,20 @@ enum P4_ESCR_EMASKS { }; /* - * P4 PEBS specifics (Replay Event only) - * - * Format (bits): - * 0-6: metric from P4_PEBS_METRIC enum - * 7 : reserved - * 8 : reserved - * 9-11 : reserved - * * Note we have UOP and PEBS bits reserved for now * just in case if we will need them once */ -#define P4_PEBS_CONFIG_ENABLE (1 << 7) -#define P4_PEBS_CONFIG_UOP_TAG (1 << 8) -#define P4_PEBS_CONFIG_METRIC_MASK 0x3f -#define P4_PEBS_CONFIG_MASK 0xff +#define P4_PEBS_CONFIG_ENABLE (1ULL << 7) +#define P4_PEBS_CONFIG_UOP_TAG (1ULL << 8) +#define P4_PEBS_CONFIG_METRIC_MASK 0x3FLL +#define P4_PEBS_CONFIG_MASK 0xFFLL /* * mem: Only counters MSR_IQ_COUNTER4 (16) and * MSR_IQ_COUNTER5 (17) are allowed for PEBS sampling */ -#define P4_PEBS_ENABLE 0x02000000U -#define P4_PEBS_ENABLE_UOP_TAG 0x01000000U +#define P4_PEBS_ENABLE 0x02000000ULL +#define P4_PEBS_ENABLE_UOP_TAG 0x01000000ULL #define p4_config_unpack_metric(v) (((u64)(v)) & P4_PEBS_CONFIG_METRIC_MASK) #define p4_config_unpack_pebs(v) (((u64)(v)) & P4_PEBS_CONFIG_MASK) @@ -788,5 +817,60 @@ enum P4_PEBS_METRIC { P4_PEBS_METRIC__max }; +/* + * Notes on internal configuration of ESCR+CCCR tuples + * + * Since P4 has quite the different architecture of + * performance registers in compare with "architectural" + * once and we have on 64 bits to keep configuration + * of performance event, the following trick is used. + * + * 1) Since both ESCR and CCCR registers have only low + * 32 bits valuable, we pack them into a single 64 bit + * configuration. Low 32 bits of such config correspond + * to low 32 bits of CCCR register and high 32 bits + * correspond to low 32 bits of ESCR register. + * + * 2) The meaning of every bit of such config field can + * be found in Intel SDM but it should be noted that + * we "borrow" some reserved bits for own usage and + * clean them or set to a proper value when we do + * a real write to hardware registers. + * + * 3) The format of bits of config is the following + * and should be either 0 or set to some predefined + * values: + * + * Low 32 bits + * ----------- + * 0-6: P4_PEBS_METRIC enum + * 7-11: reserved + * 12: reserved (Enable) + * 13-15: reserved (ESCR select) + * 16-17: Active Thread + * 18: Compare + * 19: Complement + * 20-23: Threshold + * 24: Edge + * 25: reserved (FORCE_OVF) + * 26: reserved (OVF_PMI_T0) + * 27: reserved (OVF_PMI_T1) + * 28-29: reserved + * 30: reserved (Cascade) + * 31: reserved (OVF) + * + * High 32 bits + * ------------ + * 0: reserved (T1_USR) + * 1: reserved (T1_OS) + * 2: reserved (T0_USR) + * 3: reserved (T0_OS) + * 4: Tag Enable + * 5-8: Tag Value + * 9-24: Event Mask (may use P4_ESCR_EMASK_BIT helper) + * 25-30: enum P4_EVENTS + * 31: reserved (HT thread) + */ + #endif /* PERF_EVENT_P4_H */ |
