diff options
Diffstat (limited to 'arch/x86/kernel/cpu/perf_event_intel_ds.c')
| -rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel_ds.c | 262 |
1 files changed, 167 insertions, 95 deletions
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 3065c57a63c..696ade311de 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -12,6 +12,7 @@ #define BTS_BUFFER_SIZE (PAGE_SIZE << 4) #define PEBS_BUFFER_SIZE PAGE_SIZE +#define PEBS_FIXUP_SIZE PAGE_SIZE /* * pebs_record_32 for p4 and core not supported @@ -107,15 +108,31 @@ static u64 precise_store_data(u64 status) return val; } -static u64 precise_store_data_hsw(u64 status) +static u64 precise_store_data_hsw(struct perf_event *event, u64 status) { union perf_mem_data_src dse; + u64 cfg = event->hw.config & INTEL_ARCH_EVENT_MASK; dse.val = 0; dse.mem_op = PERF_MEM_OP_STORE; dse.mem_lvl = PERF_MEM_LVL_NA; + + /* + * L1 info only valid for following events: + * + * MEM_UOPS_RETIRED.STLB_MISS_STORES + * MEM_UOPS_RETIRED.LOCK_STORES + * MEM_UOPS_RETIRED.SPLIT_STORES + * MEM_UOPS_RETIRED.ALL_STORES + */ + if (cfg != 0x12d0 && cfg != 0x22d0 && cfg != 0x42d0 && cfg != 0x82d0) + return dse.mem_lvl; + if (status & 1) - dse.mem_lvl = PERF_MEM_LVL_L1; + dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT; + else + dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_MISS; + /* Nothing else supported. Sorry. */ return dse.val; } @@ -182,18 +199,32 @@ struct pebs_record_nhm { * Same as pebs_record_nhm, with two additional fields. */ struct pebs_record_hsw { - struct pebs_record_nhm nhm; - /* - * Real IP of the event. In the Intel documentation this - * is called eventingrip. - */ - u64 real_ip; - /* - * TSX tuning information field: abort cycles and abort flags. - */ - u64 tsx_tuning; + u64 flags, ip; + u64 ax, bx, cx, dx; + u64 si, di, bp, sp; + u64 r8, r9, r10, r11; + u64 r12, r13, r14, r15; + u64 status, dla, dse, lat; + u64 real_ip, tsx_tuning; }; +union hsw_tsx_tuning { + struct { + u32 cycles_last_block : 32, + hle_abort : 1, + rtm_abort : 1, + instruction_abort : 1, + non_instruction_abort : 1, + retry : 1, + data_conflict : 1, + capacity_writes : 1, + capacity_reads : 1; + }; + u64 value; +}; + +#define PEBS_HSW_TSX_FLAGS 0xff00000000ULL + void init_debug_store_on_cpu(int cpu) { struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; @@ -214,20 +245,35 @@ void fini_debug_store_on_cpu(int cpu) wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); } +static DEFINE_PER_CPU(void *, insn_buffer); + static int alloc_pebs_buffer(int cpu) { struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; int node = cpu_to_node(cpu); int max, thresh = 1; /* always use a single PEBS record */ - void *buffer; + void *buffer, *ibuffer; if (!x86_pmu.pebs) return 0; - buffer = kmalloc_node(PEBS_BUFFER_SIZE, GFP_KERNEL | __GFP_ZERO, node); + buffer = kzalloc_node(PEBS_BUFFER_SIZE, GFP_KERNEL, node); if (unlikely(!buffer)) return -ENOMEM; + /* + * HSW+ already provides us the eventing ip; no need to allocate this + * buffer then. + */ + if (x86_pmu.intel_cap.pebs_format < 2) { + ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node); + if (!ibuffer) { + kfree(buffer); + return -ENOMEM; + } + per_cpu(insn_buffer, cpu) = ibuffer; + } + max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size; ds->pebs_buffer_base = (u64)(unsigned long)buffer; @@ -248,6 +294,9 @@ static void release_pebs_buffer(int cpu) if (!ds || !x86_pmu.pebs) return; + kfree(per_cpu(insn_buffer, cpu)); + per_cpu(insn_buffer, cpu) = NULL; + kfree((void *)(unsigned long)ds->pebs_buffer_base); ds->pebs_buffer_base = 0; } @@ -262,9 +311,11 @@ static int alloc_bts_buffer(int cpu) if (!x86_pmu.bts) return 0; - buffer = kmalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_ZERO, node); - if (unlikely(!buffer)) + buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node); + if (unlikely(!buffer)) { + WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__); return -ENOMEM; + } max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE; thresh = max / 16; @@ -295,7 +346,7 @@ static int alloc_ds_buffer(int cpu) int node = cpu_to_node(cpu); struct debug_store *ds; - ds = kmalloc_node(sizeof(*ds), GFP_KERNEL | __GFP_ZERO, node); + ds = kzalloc_node(sizeof(*ds), GFP_KERNEL, node); if (unlikely(!ds)) return -ENOMEM; @@ -517,6 +568,32 @@ struct event_constraint intel_atom_pebs_event_constraints[] = { EVENT_CONSTRAINT_END }; +struct event_constraint intel_slm_pebs_event_constraints[] = { + INTEL_UEVENT_CONSTRAINT(0x0103, 0x1), /* REHABQ.LD_BLOCK_ST_FORWARD_PS */ + INTEL_UEVENT_CONSTRAINT(0x0803, 0x1), /* REHABQ.LD_SPLITS_PS */ + INTEL_UEVENT_CONSTRAINT(0x0204, 0x1), /* MEM_UOPS_RETIRED.L2_HIT_LOADS_PS */ + INTEL_UEVENT_CONSTRAINT(0x0404, 0x1), /* MEM_UOPS_RETIRED.L2_MISS_LOADS_PS */ + INTEL_UEVENT_CONSTRAINT(0x0804, 0x1), /* MEM_UOPS_RETIRED.DTLB_MISS_LOADS_PS */ + INTEL_UEVENT_CONSTRAINT(0x2004, 0x1), /* MEM_UOPS_RETIRED.HITM_PS */ + INTEL_UEVENT_CONSTRAINT(0x00c0, 0x1), /* INST_RETIRED.ANY_PS */ + INTEL_UEVENT_CONSTRAINT(0x00c4, 0x1), /* BR_INST_RETIRED.ALL_BRANCHES_PS */ + INTEL_UEVENT_CONSTRAINT(0x7ec4, 0x1), /* BR_INST_RETIRED.JCC_PS */ + INTEL_UEVENT_CONSTRAINT(0xbfc4, 0x1), /* BR_INST_RETIRED.FAR_BRANCH_PS */ + INTEL_UEVENT_CONSTRAINT(0xebc4, 0x1), /* BR_INST_RETIRED.NON_RETURN_IND_PS */ + INTEL_UEVENT_CONSTRAINT(0xf7c4, 0x1), /* BR_INST_RETIRED.RETURN_PS */ + INTEL_UEVENT_CONSTRAINT(0xf9c4, 0x1), /* BR_INST_RETIRED.CALL_PS */ + INTEL_UEVENT_CONSTRAINT(0xfbc4, 0x1), /* BR_INST_RETIRED.IND_CALL_PS */ + INTEL_UEVENT_CONSTRAINT(0xfdc4, 0x1), /* BR_INST_RETIRED.REL_CALL_PS */ + INTEL_UEVENT_CONSTRAINT(0xfec4, 0x1), /* BR_INST_RETIRED.TAKEN_JCC_PS */ + INTEL_UEVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_MISP_RETIRED.ALL_BRANCHES_PS */ + INTEL_UEVENT_CONSTRAINT(0x7ec5, 0x1), /* BR_INST_MISP_RETIRED.JCC_PS */ + INTEL_UEVENT_CONSTRAINT(0xebc5, 0x1), /* BR_INST_MISP_RETIRED.NON_RETURN_IND_PS */ + INTEL_UEVENT_CONSTRAINT(0xf7c5, 0x1), /* BR_INST_MISP_RETIRED.RETURN_PS */ + INTEL_UEVENT_CONSTRAINT(0xfbc5, 0x1), /* BR_INST_MISP_RETIRED.IND_CALL_PS */ + INTEL_UEVENT_CONSTRAINT(0xfec5, 0x1), /* BR_INST_MISP_RETIRED.TAKEN_JCC_PS */ + EVENT_CONSTRAINT_END +}; + struct event_constraint intel_nehalem_pebs_event_constraints[] = { INTEL_PLD_CONSTRAINT(0x100b, 0xf), /* MEM_INST_RETIRED.* */ INTEL_EVENT_CONSTRAINT(0x0f, 0xf), /* MEM_UNCORE_RETIRED.* */ @@ -558,6 +635,7 @@ struct event_constraint intel_snb_pebs_event_constraints[] = { INTEL_EVENT_CONSTRAINT(0xd0, 0xf), /* MEM_UOP_RETIRED.* */ INTEL_EVENT_CONSTRAINT(0xd1, 0xf), /* MEM_LOAD_UOPS_RETIRED.* */ INTEL_EVENT_CONSTRAINT(0xd2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.* */ + INTEL_EVENT_CONSTRAINT(0xd3, 0xf), /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.* */ INTEL_UEVENT_CONSTRAINT(0x02d4, 0xf), /* MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */ EVENT_CONSTRAINT_END }; @@ -688,6 +766,7 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) unsigned long old_to, to = cpuc->lbr_entries[0].to; unsigned long ip = regs->ip; int is_64bit = 0; + void *kaddr; /* * We don't need to fixup if the PEBS assist is fault like @@ -711,7 +790,7 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) * unsigned math, either ip is before the start (impossible) or * the basic block is larger than 1 page (sanity) */ - if ((ip - to) > PAGE_SIZE) + if ((ip - to) > PEBS_FIXUP_SIZE) return 0; /* @@ -722,29 +801,33 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) return 1; } + if (!kernel_ip(ip)) { + int size, bytes; + u8 *buf = this_cpu_read(insn_buffer); + + size = ip - to; /* Must fit our buffer, see above */ + bytes = copy_from_user_nmi(buf, (void __user *)to, size); + if (bytes != 0) + return 0; + + kaddr = buf; + } else { + kaddr = (void *)to; + } + do { struct insn insn; - u8 buf[MAX_INSN_SIZE]; - void *kaddr; old_to = to; - if (!kernel_ip(ip)) { - int bytes, size = MAX_INSN_SIZE; - - bytes = copy_from_user_nmi(buf, (void __user *)to, size); - if (bytes != size) - return 0; - - kaddr = buf; - } else - kaddr = (void *)to; #ifdef CONFIG_X86_64 is_64bit = kernel_ip(to) || !test_thread_flag(TIF_IA32); #endif insn_init(&insn, kaddr, is_64bit); insn_get_length(&insn); + to += insn.length; + kaddr += insn.length; } while (to < ip); if (to == ip) { @@ -759,16 +842,34 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) return 0; } +static inline u64 intel_hsw_weight(struct pebs_record_hsw *pebs) +{ + if (pebs->tsx_tuning) { + union hsw_tsx_tuning tsx = { .value = pebs->tsx_tuning }; + return tsx.cycles_last_block; + } + return 0; +} + +static inline u64 intel_hsw_transaction(struct pebs_record_hsw *pebs) +{ + u64 txn = (pebs->tsx_tuning & PEBS_HSW_TSX_FLAGS) >> 32; + + /* For RTM XABORTs also log the abort code from AX */ + if ((txn & PERF_TXN_TRANSACTION) && (pebs->ax & 1)) + txn |= ((pebs->ax >> 24) & 0xff) << PERF_TXN_ABORT_SHIFT; + return txn; +} + static void __intel_pmu_pebs_event(struct perf_event *event, struct pt_regs *iregs, void *__pebs) { /* - * We cast to pebs_record_nhm to get the load latency data - * if extra_reg MSR_PEBS_LD_LAT_THRESHOLD used + * We cast to the biggest pebs_record but are careful not to + * unconditionally access the 'extra' entries. */ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - struct pebs_record_nhm *pebs = __pebs; - struct pebs_record_hsw *pebs_hsw = __pebs; + struct pebs_record_hsw *pebs = __pebs; struct perf_sample_data data; struct pt_regs regs; u64 sample_type; @@ -804,7 +905,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event, data.data_src.val = load_latency_data(pebs->dse); else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW) data.data_src.val = - precise_store_data_hsw(pebs->dse); + precise_store_data_hsw(event, pebs->dse); else data.data_src.val = precise_store_data(pebs->dse); } @@ -827,7 +928,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event, regs.sp = pebs->sp; if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) { - regs.ip = pebs_hsw->real_ip; + regs.ip = pebs->real_ip; regs.flags |= PERF_EFLAGS_EXACT; } else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(®s)) regs.flags |= PERF_EFLAGS_EXACT; @@ -835,9 +936,18 @@ static void __intel_pmu_pebs_event(struct perf_event *event, regs.flags &= ~PERF_EFLAGS_EXACT; if ((event->attr.sample_type & PERF_SAMPLE_ADDR) && - x86_pmu.intel_cap.pebs_format >= 1) + x86_pmu.intel_cap.pebs_format >= 1) data.addr = pebs->dla; + if (x86_pmu.intel_cap.pebs_format >= 2) { + /* Only set the TSX weight when no memory weight. */ + if ((event->attr.sample_type & PERF_SAMPLE_WEIGHT) && !fll) + data.weight = intel_hsw_weight(pebs); + + if (event->attr.sample_type & PERF_SAMPLE_TRANSACTION) + data.txn = intel_hsw_transaction(pebs); + } + if (has_branch_stack(event)) data.br_stack = &cpuc->lbr_stack; @@ -886,17 +996,34 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) __intel_pmu_pebs_event(event, iregs, at); } -static void __intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, void *at, - void *top) +static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct debug_store *ds = cpuc->ds; struct perf_event *event = NULL; + void *at, *top; u64 status = 0; int bit; + if (!x86_pmu.pebs_active) + return; + + at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base; + top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index; + ds->pebs_index = ds->pebs_buffer_base; + if (unlikely(at > top)) + return; + + /* + * Should not happen, we program the threshold at 1 and do not + * set a reset value. + */ + WARN_ONCE(top - at > x86_pmu.max_pebs_events * x86_pmu.pebs_record_size, + "Unexpected number of pebs records %ld\n", + (long)(top - at) / x86_pmu.pebs_record_size); + for (; at < top; at += x86_pmu.pebs_record_size) { struct pebs_record_nhm *p = at; @@ -924,61 +1051,6 @@ static void __intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, void *at, } } -static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) -{ - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - struct debug_store *ds = cpuc->ds; - struct pebs_record_nhm *at, *top; - int n; - - if (!x86_pmu.pebs_active) - return; - - at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base; - top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index; - - ds->pebs_index = ds->pebs_buffer_base; - - n = top - at; - if (n <= 0) - return; - - /* - * Should not happen, we program the threshold at 1 and do not - * set a reset value. - */ - WARN_ONCE(n > x86_pmu.max_pebs_events, - "Unexpected number of pebs records %d\n", n); - - return __intel_pmu_drain_pebs_nhm(iregs, at, top); -} - -static void intel_pmu_drain_pebs_hsw(struct pt_regs *iregs) -{ - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - struct debug_store *ds = cpuc->ds; - struct pebs_record_hsw *at, *top; - int n; - - if (!x86_pmu.pebs_active) - return; - - at = (struct pebs_record_hsw *)(unsigned long)ds->pebs_buffer_base; - top = (struct pebs_record_hsw *)(unsigned long)ds->pebs_index; - - n = top - at; - if (n <= 0) - return; - /* - * Should not happen, we program the threshold at 1 and do not - * set a reset value. - */ - WARN_ONCE(n > x86_pmu.max_pebs_events, - "Unexpected number of pebs records %d\n", n); - - return __intel_pmu_drain_pebs_nhm(iregs, at, top); -} - /* * BTS, PEBS probe and setup */ @@ -1013,7 +1085,7 @@ void intel_ds_init(void) case 2: pr_cont("PEBS fmt2%c, ", pebs_type); x86_pmu.pebs_record_size = sizeof(struct pebs_record_hsw); - x86_pmu.drain_pebs = intel_pmu_drain_pebs_hsw; + x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm; break; default: |
