diff options
Diffstat (limited to 'arch/x86/kernel/cpu/perf_event_intel_ds.c')
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel_ds.c | 178 |
1 files changed, 153 insertions, 25 deletions
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 60250f68705..3065c57a63c 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -107,6 +107,19 @@ static u64 precise_store_data(u64 status) return val; } +static u64 precise_store_data_hsw(u64 status) +{ + union perf_mem_data_src dse; + + dse.val = 0; + dse.mem_op = PERF_MEM_OP_STORE; + dse.mem_lvl = PERF_MEM_LVL_NA; + if (status & 1) + dse.mem_lvl = PERF_MEM_LVL_L1; + /* Nothing else supported. Sorry. */ + return dse.val; +} + static u64 load_latency_data(u64 status) { union intel_x86_pebs_dse dse; @@ -165,6 +178,22 @@ struct pebs_record_nhm { u64 status, dla, dse, lat; }; +/* + * Same as pebs_record_nhm, with two additional fields. + */ +struct pebs_record_hsw { + struct pebs_record_nhm nhm; + /* + * Real IP of the event. In the Intel documentation this + * is called eventingrip. + */ + u64 real_ip; + /* + * TSX tuning information field: abort cycles and abort flags. + */ + u64 tsx_tuning; +}; + void init_debug_store_on_cpu(int cpu) { struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; @@ -548,6 +577,42 @@ struct event_constraint intel_ivb_pebs_event_constraints[] = { EVENT_CONSTRAINT_END }; +struct event_constraint intel_hsw_pebs_event_constraints[] = { + INTEL_UEVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */ + INTEL_PST_HSW_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */ + INTEL_UEVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */ + INTEL_EVENT_CONSTRAINT(0xc4, 0xf), /* BR_INST_RETIRED.* */ + INTEL_UEVENT_CONSTRAINT(0x01c5, 0xf), /* BR_MISP_RETIRED.CONDITIONAL */ + INTEL_UEVENT_CONSTRAINT(0x04c5, 0xf), /* BR_MISP_RETIRED.ALL_BRANCHES */ + INTEL_UEVENT_CONSTRAINT(0x20c5, 0xf), /* BR_MISP_RETIRED.NEAR_TAKEN */ + INTEL_PLD_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.* */ + /* MEM_UOPS_RETIRED.STLB_MISS_LOADS */ + INTEL_UEVENT_CONSTRAINT(0x11d0, 0xf), + /* MEM_UOPS_RETIRED.STLB_MISS_STORES */ + INTEL_UEVENT_CONSTRAINT(0x12d0, 0xf), + INTEL_UEVENT_CONSTRAINT(0x21d0, 0xf), /* MEM_UOPS_RETIRED.LOCK_LOADS */ + INTEL_UEVENT_CONSTRAINT(0x41d0, 0xf), /* MEM_UOPS_RETIRED.SPLIT_LOADS */ + /* MEM_UOPS_RETIRED.SPLIT_STORES */ + INTEL_UEVENT_CONSTRAINT(0x42d0, 0xf), + INTEL_UEVENT_CONSTRAINT(0x81d0, 0xf), /* MEM_UOPS_RETIRED.ALL_LOADS */ + INTEL_PST_HSW_CONSTRAINT(0x82d0, 0xf), /* MEM_UOPS_RETIRED.ALL_STORES */ + INTEL_UEVENT_CONSTRAINT(0x01d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L1_HIT */ + INTEL_UEVENT_CONSTRAINT(0x02d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L2_HIT */ + INTEL_UEVENT_CONSTRAINT(0x04d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L3_HIT */ + /* MEM_LOAD_UOPS_RETIRED.HIT_LFB */ + INTEL_UEVENT_CONSTRAINT(0x40d1, 0xf), + /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS */ + INTEL_UEVENT_CONSTRAINT(0x01d2, 0xf), + /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT */ + INTEL_UEVENT_CONSTRAINT(0x02d2, 0xf), + /* MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM */ + INTEL_UEVENT_CONSTRAINT(0x01d3, 0xf), + INTEL_UEVENT_CONSTRAINT(0x04c8, 0xf), /* HLE_RETIRED.Abort */ + INTEL_UEVENT_CONSTRAINT(0x04c9, 0xf), /* RTM_RETIRED.Abort */ + + EVENT_CONSTRAINT_END +}; + struct event_constraint *intel_pebs_constraints(struct perf_event *event) { struct event_constraint *c; @@ -588,6 +653,12 @@ void intel_pmu_pebs_disable(struct perf_event *event) struct hw_perf_event *hwc = &event->hw; cpuc->pebs_enabled &= ~(1ULL << hwc->idx); + + if (event->hw.constraint->flags & PERF_X86_EVENT_PEBS_LDLAT) + cpuc->pebs_enabled &= ~(1ULL << (hwc->idx + 32)); + else if (event->hw.constraint->flags & PERF_X86_EVENT_PEBS_ST) + cpuc->pebs_enabled &= ~(1ULL << 63); + if (cpuc->enabled) wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); @@ -697,6 +768,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event, */ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct pebs_record_nhm *pebs = __pebs; + struct pebs_record_hsw *pebs_hsw = __pebs; struct perf_sample_data data; struct pt_regs regs; u64 sample_type; @@ -706,7 +778,8 @@ static void __intel_pmu_pebs_event(struct perf_event *event, return; fll = event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT; - fst = event->hw.flags & PERF_X86_EVENT_PEBS_ST; + fst = event->hw.flags & (PERF_X86_EVENT_PEBS_ST | + PERF_X86_EVENT_PEBS_ST_HSW); perf_sample_data_init(&data, 0, event->hw.last_period); @@ -717,9 +790,6 @@ static void __intel_pmu_pebs_event(struct perf_event *event, * if PEBS-LL or PreciseStore */ if (fll || fst) { - if (sample_type & PERF_SAMPLE_ADDR) - data.addr = pebs->dla; - /* * Use latency for weight (only avail with PEBS-LL) */ @@ -732,6 +802,9 @@ static void __intel_pmu_pebs_event(struct perf_event *event, if (sample_type & PERF_SAMPLE_DATA_SRC) { if (fll) data.data_src.val = load_latency_data(pebs->dse); + else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW) + data.data_src.val = + precise_store_data_hsw(pebs->dse); else data.data_src.val = precise_store_data(pebs->dse); } @@ -753,11 +826,18 @@ static void __intel_pmu_pebs_event(struct perf_event *event, regs.bp = pebs->bp; regs.sp = pebs->sp; - if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(®s)) + if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) { + regs.ip = pebs_hsw->real_ip; + regs.flags |= PERF_EFLAGS_EXACT; + } else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(®s)) regs.flags |= PERF_EFLAGS_EXACT; else regs.flags &= ~PERF_EFLAGS_EXACT; + if ((event->attr.sample_type & PERF_SAMPLE_ADDR) && + x86_pmu.intel_cap.pebs_format >= 1) + data.addr = pebs->dla; + if (has_branch_stack(event)) data.br_stack = &cpuc->lbr_stack; @@ -806,35 +886,22 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) __intel_pmu_pebs_event(event, iregs, at); } -static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) +static void __intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, void *at, + void *top) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct debug_store *ds = cpuc->ds; - struct pebs_record_nhm *at, *top; struct perf_event *event = NULL; u64 status = 0; - int bit, n; - - if (!x86_pmu.pebs_active) - return; - - at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base; - top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index; + int bit; ds->pebs_index = ds->pebs_buffer_base; - n = top - at; - if (n <= 0) - return; - - /* - * Should not happen, we program the threshold at 1 and do not - * set a reset value. - */ - WARN_ONCE(n > x86_pmu.max_pebs_events, "Unexpected number of pebs records %d\n", n); + for (; at < top; at += x86_pmu.pebs_record_size) { + struct pebs_record_nhm *p = at; - for ( ; at < top; at++) { - for_each_set_bit(bit, (unsigned long *)&at->status, x86_pmu.max_pebs_events) { + for_each_set_bit(bit, (unsigned long *)&p->status, + x86_pmu.max_pebs_events) { event = cpuc->events[bit]; if (!test_bit(bit, cpuc->active_mask)) continue; @@ -857,6 +924,61 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) } } +static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct debug_store *ds = cpuc->ds; + struct pebs_record_nhm *at, *top; + int n; + + if (!x86_pmu.pebs_active) + return; + + at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base; + top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index; + + ds->pebs_index = ds->pebs_buffer_base; + + n = top - at; + if (n <= 0) + return; + + /* + * Should not happen, we program the threshold at 1 and do not + * set a reset value. + */ + WARN_ONCE(n > x86_pmu.max_pebs_events, + "Unexpected number of pebs records %d\n", n); + + return __intel_pmu_drain_pebs_nhm(iregs, at, top); +} + +static void intel_pmu_drain_pebs_hsw(struct pt_regs *iregs) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct debug_store *ds = cpuc->ds; + struct pebs_record_hsw *at, *top; + int n; + + if (!x86_pmu.pebs_active) + return; + + at = (struct pebs_record_hsw *)(unsigned long)ds->pebs_buffer_base; + top = (struct pebs_record_hsw *)(unsigned long)ds->pebs_index; + + n = top - at; + if (n <= 0) + return; + /* + * Should not happen, we program the threshold at 1 and do not + * set a reset value. + */ + WARN_ONCE(n > x86_pmu.max_pebs_events, + "Unexpected number of pebs records %d\n", n); + + return __intel_pmu_drain_pebs_nhm(iregs, at, top); +} + /* * BTS, PEBS probe and setup */ @@ -888,6 +1010,12 @@ void intel_ds_init(void) x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm; break; + case 2: + pr_cont("PEBS fmt2%c, ", pebs_type); + x86_pmu.pebs_record_size = sizeof(struct pebs_record_hsw); + x86_pmu.drain_pebs = intel_pmu_drain_pebs_hsw; + break; + default: printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type); x86_pmu.pebs = 0; |