diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-04-30 07:41:01 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-04-30 07:41:01 -0700 |
commit | e0972916e8fe943f342b0dd1c9d43dbf5bc261c2 (patch) | |
tree | 690c436f1f9b839c4ba34d17ab3efa63b97a2dce /arch/x86/kernel | |
parent | 1f889ec62c3f0d8913f3c32f9aff2a1e15099346 (diff) | |
parent | 5ac2b5c2721501a8f5c5e1cd4116cbc31ace6886 (diff) |
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf updates from Ingo Molnar:
"Features:
- Add "uretprobes" - an optimization to uprobes, like kretprobes are
an optimization to kprobes. "perf probe -x file sym%return" now
works like kretprobes. By Oleg Nesterov.
- Introduce per core aggregation in 'perf stat', from Stephane
Eranian.
- Add memory profiling via PEBS, from Stephane Eranian.
- Event group view for 'annotate' in --stdio, --tui and --gtk, from
Namhyung Kim.
- Add support for AMD NB and L2I "uncore" counters, by Jacob Shin.
- Add Ivy Bridge-EP uncore support, by Zheng Yan
- IBM zEnterprise EC12 oprofile support patchlet from Robert Richter.
- Add perf test entries for checking breakpoint overflow signal
handler issues, from Jiri Olsa.
- Add perf test entry for for checking number of EXIT events, from
Namhyung Kim.
- Add perf test entries for checking --cpu in record and stat, from
Jiri Olsa.
- Introduce perf stat --repeat forever, from Frederik Deweerdt.
- Add --no-demangle to report/top, from Namhyung Kim.
- PowerPC fixes plus a couple of cleanups/optimizations in uprobes
and trace_uprobes, by Oleg Nesterov.
Various fixes and refactorings:
- Fix dependency of the python binding wrt libtraceevent, from
Naohiro Aota.
- Simplify some perf_evlist methods and to allow 'stat' to share code
with 'record' and 'trace', by Arnaldo Carvalho de Melo.
- Remove dead code in related to libtraceevent integration, from
Namhyung Kim.
- Revert "perf sched: Handle PERF_RECORD_EXIT events" to get 'perf
sched lat' back working, by Arnaldo Carvalho de Melo
- We don't use Newt anymore, just plain libslang, by Arnaldo Carvalho
de Melo.
- Kill a bunch of die() calls, from Namhyung Kim.
- Fix build on non-glibc systems due to libio.h absence, from Cody P
Schafer.
- Remove some perf_session and tracing dead code, from David Ahern.
- Honor parallel jobs, fix from Borislav Petkov
- Introduce tools/lib/lk library, initially just removing duplication
among tools/perf and tools/vm. from Borislav Petkov
... and many more I missed to list, see the shortlog and git log for
more details."
* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (136 commits)
perf/x86/intel/P4: Robistify P4 PMU types
perf/x86/amd: Fix AMD NB and L2I "uncore" support
perf/x86/amd: Remove old-style NB counter support from perf_event_amd.c
perf/x86: Check all MSRs before passing hw check
perf/x86/amd: Add support for AMD NB and L2I "uncore" counters
perf/x86/intel: Add Ivy Bridge-EP uncore support
perf/x86/intel: Fix SNB-EP CBO and PCU uncore PMU filter management
perf/x86: Avoid kfree() in CPU_{STARTING,DYING}
uprobes/perf: Avoid perf_trace_buf_prepare/submit if ->perf_events is empty
uprobes/tracing: Don't pass addr=ip to perf_trace_buf_submit()
uprobes/tracing: Change create_trace_uprobe() to support uretprobes
uprobes/tracing: Make seq_printf() code uretprobe-friendly
uprobes/tracing: Make register_uprobe_event() paths uretprobe-friendly
uprobes/tracing: Make uprobe_{trace,perf}_print() uretprobe-friendly
uprobes/tracing: Introduce is_ret_probe() and uretprobe_dispatcher()
uprobes/tracing: Introduce uprobe_{trace,perf}_print() helpers
uprobes/tracing: Generalize struct uprobe_trace_entry_head
uprobes/tracing: Kill the pointless local_save_flags/preempt_count calls
uprobes/tracing: Kill the pointless seq_print_ip_sym() call
uprobes/tracing: Kill the pointless task_pt_regs() calls
...
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r-- | arch/x86/kernel/cpu/Makefile | 2 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.c | 89 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event.h | 56 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_amd.c | 138 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_amd_uncore.c | 547 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel.c | 38 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel_ds.c | 182 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel_uncore.c | 876 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_intel_uncore.h | 64 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/perf_event_p4.c | 9 | ||||
-rw-r--r-- | arch/x86/kernel/kprobes/core.c | 6 | ||||
-rw-r--r-- | arch/x86/kernel/uprobes.c | 29 |
12 files changed, 1770 insertions, 266 deletions
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index c9496313843..deef0399fc7 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -31,7 +31,7 @@ obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o obj-$(CONFIG_PERF_EVENTS) += perf_event.o ifdef CONFIG_PERF_EVENTS -obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd.o +obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd.o perf_event_amd_uncore.o obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_uncore.o diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index bf0f01aea99..1025f3c99d2 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -180,8 +180,9 @@ static void release_pmc_hardware(void) {} static bool check_hw_exists(void) { - u64 val, val_new = ~0; - int i, reg, ret = 0; + u64 val, val_fail, val_new= ~0; + int i, reg, reg_fail, ret = 0; + int bios_fail = 0; /* * Check to see if the BIOS enabled any of the counters, if so @@ -192,8 +193,11 @@ static bool check_hw_exists(void) ret = rdmsrl_safe(reg, &val); if (ret) goto msr_fail; - if (val & ARCH_PERFMON_EVENTSEL_ENABLE) - goto bios_fail; + if (val & ARCH_PERFMON_EVENTSEL_ENABLE) { + bios_fail = 1; + val_fail = val; + reg_fail = reg; + } } if (x86_pmu.num_counters_fixed) { @@ -202,8 +206,11 @@ static bool check_hw_exists(void) if (ret) goto msr_fail; for (i = 0; i < x86_pmu.num_counters_fixed; i++) { - if (val & (0x03 << i*4)) - goto bios_fail; + if (val & (0x03 << i*4)) { + bios_fail = 1; + val_fail = val; + reg_fail = reg; + } } } @@ -221,14 +228,13 @@ static bool check_hw_exists(void) if (ret || val != val_new) goto msr_fail; - return true; - -bios_fail: /* * We still allow the PMU driver to operate: */ - printk(KERN_CONT "Broken BIOS detected, complain to your hardware vendor.\n"); - printk(KERN_ERR FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n", reg, val); + if (bios_fail) { + printk(KERN_CONT "Broken BIOS detected, complain to your hardware vendor.\n"); + printk(KERN_ERR FW_BUG "the BIOS has corrupted hw-PMU resources (MSR %x is %Lx)\n", reg_fail, val_fail); + } return true; @@ -1316,9 +1322,16 @@ static struct attribute_group x86_pmu_format_group = { */ static void __init filter_events(struct attribute **attrs) { + struct device_attribute *d; + struct perf_pmu_events_attr *pmu_attr; int i, j; for (i = 0; attrs[i]; i++) { + d = (struct device_attribute *)attrs[i]; + pmu_attr = container_of(d, struct perf_pmu_events_attr, attr); + /* str trumps id */ + if (pmu_attr->event_str) + continue; if (x86_pmu.event_map(i)) continue; @@ -1330,22 +1343,45 @@ static void __init filter_events(struct attribute **attrs) } } -static ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, +/* Merge two pointer arrays */ +static __init struct attribute **merge_attr(struct attribute **a, struct attribute **b) +{ + struct attribute **new; + int j, i; + + for (j = 0; a[j]; j++) + ; + for (i = 0; b[i]; i++) + j++; + j++; + + new = kmalloc(sizeof(struct attribute *) * j, GFP_KERNEL); + if (!new) + return NULL; + + j = 0; + for (i = 0; a[i]; i++) + new[j++] = a[i]; + for (i = 0; b[i]; i++) + new[j++] = b[i]; + new[j] = NULL; + + return new; +} + +ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, char *page) { struct perf_pmu_events_attr *pmu_attr = \ container_of(attr, struct perf_pmu_events_attr, attr); - u64 config = x86_pmu.event_map(pmu_attr->id); - return x86_pmu.events_sysfs_show(page, config); -} -#define EVENT_VAR(_id) event_attr_##_id -#define EVENT_PTR(_id) &event_attr_##_id.attr.attr + /* string trumps id */ + if (pmu_attr->event_str) + return sprintf(page, "%s", pmu_attr->event_str); -#define EVENT_ATTR(_name, _id) \ - PMU_EVENT_ATTR(_name, EVENT_VAR(_id), PERF_COUNT_HW_##_id, \ - events_sysfs_show) + return x86_pmu.events_sysfs_show(page, config); +} EVENT_ATTR(cpu-cycles, CPU_CYCLES ); EVENT_ATTR(instructions, INSTRUCTIONS ); @@ -1459,16 +1495,27 @@ static int __init init_hw_perf_events(void) unconstrained = (struct event_constraint) __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1, - 0, x86_pmu.num_counters, 0); + 0, x86_pmu.num_counters, 0, 0); x86_pmu.attr_rdpmc = 1; /* enable userspace RDPMC usage by default */ x86_pmu_format_group.attrs = x86_pmu.format_attrs; + if (x86_pmu.event_attrs) + x86_pmu_events_group.attrs = x86_pmu.event_attrs; + if (!x86_pmu.events_sysfs_show) x86_pmu_events_group.attrs = &empty_attrs; else filter_events(x86_pmu_events_group.attrs); + if (x86_pmu.cpu_events) { + struct attribute **tmp; + + tmp = merge_attr(x86_pmu_events_group.attrs, x86_pmu.cpu_events); + if (!WARN_ON(!tmp)) + x86_pmu_events_group.attrs = tmp; + } + pr_info("... version: %d\n", x86_pmu.version); pr_info("... bit width: %d\n", x86_pmu.cntval_bits); pr_info("... generic registers: %d\n", x86_pmu.num_counters); diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 7f5c75c2afd..ba9aadfa683 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -46,6 +46,7 @@ enum extra_reg_type { EXTRA_REG_RSP_0 = 0, /* offcore_response_0 */ EXTRA_REG_RSP_1 = 1, /* offcore_response_1 */ EXTRA_REG_LBR = 2, /* lbr_select */ + EXTRA_REG_LDLAT = 3, /* ld_lat_threshold */ EXTRA_REG_MAX /* number of entries needed */ }; @@ -59,7 +60,13 @@ struct event_constraint { u64 cmask; int weight; int overlap; + int flags; }; +/* + * struct event_constraint flags + */ +#define PERF_X86_EVENT_PEBS_LDLAT 0x1 /* ld+ldlat data address sampling */ +#define PERF_X86_EVENT_PEBS_ST 0x2 /* st data address sampling */ struct amd_nb { int nb_id; /* NorthBridge id */ @@ -170,16 +177,17 @@ struct cpu_hw_events { void *kfree_on_online; }; -#define __EVENT_CONSTRAINT(c, n, m, w, o) {\ +#define __EVENT_CONSTRAINT(c, n, m, w, o, f) {\ { .idxmsk64 = (n) }, \ .code = (c), \ .cmask = (m), \ .weight = (w), \ .overlap = (o), \ + .flags = f, \ } #define EVENT_CONSTRAINT(c, n, m) \ - __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0) + __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 0, 0) /* * The overlap flag marks event constraints with overlapping counter @@ -203,7 +211,7 @@ struct cpu_hw_events { * and its counter masks must be kept at a minimum. */ #define EVENT_CONSTRAINT_OVERLAP(c, n, m) \ - __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 1) + __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n), 1, 0) /* * Constraint on the Event code. @@ -231,6 +239,14 @@ struct cpu_hw_events { #define INTEL_UEVENT_CONSTRAINT(c, n) \ EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) +#define INTEL_PLD_CONSTRAINT(c, n) \ + __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ + HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_LDLAT) + +#define INTEL_PST_CONSTRAINT(c, n) \ + __EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK, \ + HWEIGHT(n), 0, PERF_X86_EVENT_PEBS_ST) + #define EVENT_CONSTRAINT_END \ EVENT_CONSTRAINT(0, 0, 0) @@ -260,12 +276,22 @@ struct extra_reg { .msr = (ms), \ .config_mask = (m), \ .valid_mask = (vm), \ - .idx = EXTRA_REG_##i \ + .idx = EXTRA_REG_##i, \ } #define INTEL_EVENT_EXTRA_REG(event, msr, vm, idx) \ EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm, idx) +#define INTEL_UEVENT_EXTRA_REG(event, msr, vm, idx) \ + EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT | \ + ARCH_PERFMON_EVENTSEL_UMASK, vm, idx) + +#define INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(c) \ + INTEL_UEVENT_EXTRA_REG(c, \ + MSR_PEBS_LD_LAT_THRESHOLD, \ + 0xffff, \ + LDLAT) + #define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0, RSP_0) union perf_capabilities { @@ -355,8 +381,10 @@ struct x86_pmu { */ int attr_rdpmc; struct attribute **format_attrs; + struct attribute **event_attrs; ssize_t (*events_sysfs_show)(char *page, u64 config); + struct attribute **cpu_events; /* * CPU Hotplug hooks @@ -421,6 +449,23 @@ do { \ #define ERF_NO_HT_SHARING 1 #define ERF_HAS_RSP_1 2 +#define EVENT_VAR(_id) event_attr_##_id +#define EVENT_PTR(_id) &event_attr_##_id.attr.attr + +#define EVENT_ATTR(_name, _id) \ +static struct perf_pmu_events_attr EVENT_VAR(_id) = { \ + .attr = __ATTR(_name, 0444, events_sysfs_show, NULL), \ + .id = PERF_COUNT_HW_##_id, \ + .event_str = NULL, \ +}; + +#define EVENT_ATTR_STR(_name, v, str) \ +static struct perf_pmu_events_attr event_attr_##v = { \ + .attr = __ATTR(_name, 0444, events_sysfs_show, NULL), \ + .id = 0, \ + .event_str = str, \ +}; + extern struct x86_pmu x86_pmu __read_mostly; DECLARE_PER_CPU(struct cpu_hw_events, cpu_hw_events); @@ -628,6 +673,9 @@ int p6_pmu_init(void); int knc_pmu_init(void); +ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, + char *page); + #else /* CONFIG_CPU_SUP_INTEL */ static inline void reserve_ds_buffers(void) diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index dfdab42aed2..7e28d9467bb 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c @@ -132,14 +132,11 @@ static u64 amd_pmu_event_map(int hw_event) return amd_perfmon_event_map[hw_event]; } -static struct event_constraint *amd_nb_event_constraint; - /* * Previously calculated offsets */ static unsigned int event_offsets[X86_PMC_IDX_MAX] __read_mostly; static unsigned int count_offsets[X86_PMC_IDX_MAX] __read_mostly; -static unsigned int rdpmc_indexes[X86_PMC_IDX_MAX] __read_mostly; /* * Legacy CPUs: @@ -147,14 +144,10 @@ static unsigned int rdpmc_indexes[X86_PMC_IDX_MAX] __read_mostly; * * CPUs with core performance counter extensions: * 6 counters starting at 0xc0010200 each offset by 2 - * - * CPUs with north bridge performance counter extensions: - * 4 additional counters starting at 0xc0010240 each offset by 2 - * (indexed right above either one of the above core counters) */ static inline int amd_pmu_addr_offset(int index, bool eventsel) { - int offset, first, base; + int offset; if (!index) return index; @@ -167,23 +160,7 @@ static inline int amd_pmu_addr_offset(int index, bool eventsel) if (offset) return offset; - if (amd_nb_event_constraint && - test_bit(index, amd_nb_event_constraint->idxmsk)) { - /* - * calculate the offset of NB counters with respect to - * base eventsel or perfctr - */ - - first = find_first_bit(amd_nb_event_constraint->idxmsk, - X86_PMC_IDX_MAX); - - if (eventsel) - base = MSR_F15H_NB_PERF_CTL - x86_pmu.eventsel; - else - base = MSR_F15H_NB_PERF_CTR - x86_pmu.perfctr; - - offset = base + ((index - first) << 1); - } else if (!cpu_has_perfctr_core) + if (!cpu_has_perfctr_core) offset = index; else offset = index << 1; @@ -196,36 +173,6 @@ static inline int amd_pmu_addr_offset(int index, bool eventsel) return offset; } -static inline int amd_pmu_rdpmc_index(int index) -{ - int ret, first; - - if (!index) - return index; - - ret = rdpmc_indexes[index]; - - if (ret) - return ret; - - if (amd_nb_event_constraint && - test_bit(index, amd_nb_event_constraint->idxmsk)) { - /* - * according to the mnual, ECX value of the NB counters is - * the index of the NB counter (0, 1, 2 or 3) plus 6 - */ - - first = find_first_bit(amd_nb_event_constraint->idxmsk, - X86_PMC_IDX_MAX); - ret = index - first + 6; - } else - ret = index; - - rdpmc_indexes[index] = ret; - - return ret; -} - static int amd_core_hw_config(struct perf_event *event) { if (event->attr.exclude_host && event->attr.exclude_guest) @@ -245,34 +192,6 @@ static int amd_core_hw_config(struct perf_event *event) } /* - * NB counters do not support the following event select bits: - * Host/Guest only - * Counter mask - * Invert counter mask - * Edge detect - * OS/User mode - */ -static int amd_nb_hw_config(struct perf_event *event) -{ - /* for NB, we only allow system wide counting mode */ - if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK) - return -EINVAL; - - if (event->attr.exclude_user || event->attr.exclude_kernel || - event->attr.exclude_host || event->attr.exclude_guest) - return -EINVAL; - - event->hw.config &= ~(ARCH_PERFMON_EVENTSEL_USR | - ARCH_PERFMON_EVENTSEL_OS); - - if (event->hw.config & ~(AMD64_RAW_EVENT_MASK_NB | - ARCH_PERFMON_EVENTSEL_INT)) - return -EINVAL; - - return 0; -} - -/* * AMD64 events are detected based on their event codes. */ static inline unsigned int amd_get_event_code(struct hw_perf_event *hwc) @@ -285,11 +204,6 @@ static inline int amd_is_nb_event(struct hw_perf_event *hwc) return (hwc->config & 0xe0) == 0xe0; } -static inline int amd_is_perfctr_nb_event(struct hw_perf_event *hwc) -{ - return amd_nb_event_constraint && amd_is_nb_event(hwc); -} - static inline int amd_has_nb(struct cpu_hw_events *cpuc) { struct amd_nb *nb = cpuc->amd_nb; @@ -315,9 +229,6 @@ static int amd_pmu_hw_config(struct perf_event *event) if (event->attr.type == PERF_TYPE_RAW) event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK; - if (amd_is_perfctr_nb_event(&event->hw)) - return amd_nb_hw_config(event); - return amd_core_hw_config(event); } @@ -341,19 +252,6 @@ static void __amd_put_nb_event_constraints(struct cpu_hw_events *cpuc, } } -static void amd_nb_interrupt_hw_config(struct hw_perf_event *hwc) -{ - int core_id = cpu_data(smp_processor_id()).cpu_core_id; - - /* deliver interrupts only to this core */ - if (hwc->config & ARCH_PERFMON_EVENTSEL_INT) { - hwc->config |= AMD64_EVENTSEL_INT_CORE_ENABLE; - hwc->config &= ~AMD64_EVENTSEL_INT_CORE_SEL_MASK; - hwc->config |= (u64)(core_id) << - AMD64_EVENTSEL_INT_CORE_SEL_SHIFT; - } -} - /* * AMD64 NorthBridge events need special treatment because * counter access needs to be synchronized across all cores @@ -441,9 +339,6 @@ __amd_get_nb_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *ev if (new == -1) return &emptyconstraint; - if (amd_is_perfctr_nb_event(hwc)) - amd_nb_interrupt_hw_config(hwc); - return &nb->event_constraints[new]; } @@ -543,8 +438,7 @@ amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) if (!(amd_has_nb(cpuc) && amd_is_nb_event(&event->hw))) return &unconstrained; - return __amd_get_nb_event_constraints(cpuc, event, - amd_nb_event_constraint); + return __amd_get_nb_event_constraints(cpuc, event, NULL); } static void amd_put_event_constraints(struct cpu_hw_events *cpuc, @@ -643,9 +537,6 @@ static struct event_constraint amd_f15_PMC30 = EVENT_CONSTRAINT_OVERLAP(0, 0x09, static struct event_constraint amd_f15_PMC50 = EVENT_CONSTRAINT(0, 0x3F, 0); static struct event_constraint amd_f15_PMC53 = EVENT_CONSTRAINT(0, 0x38, 0); -static struct event_constraint amd_NBPMC96 = EVENT_CONSTRAINT(0, 0x3C0, 0); -static struct event_constraint amd_NBPMC74 = EVENT_CONSTRAINT(0, 0xF0, 0); - static struct event_constraint * amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *event) { @@ -711,8 +602,8 @@ amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, struct perf_event *ev return &amd_f15_PMC20; } case AMD_EVENT_NB: - return __amd_get_nb_event_constraints(cpuc, event, - amd_nb_event_constraint); + /* moved to perf_event_amd_uncore.c */ + return &emptyconstraint; default: return &emptyconstraint; } @@ -738,7 +629,6 @@ static __initconst const struct x86_pmu amd_pmu = { .eventsel = MSR_K7_EVNTSEL0, .perfctr = MSR_K7_PERFCTR0, .addr_offset = amd_pmu_addr_offset, - .rdpmc_index = amd_pmu_rdpmc_index, .event_map = amd_pmu_event_map, .max_events = ARRAY_SIZE(amd_perfmon_event_map), .num_counters = AMD64_NUM_COUNTERS, @@ -790,23 +680,6 @@ static int setup_perfctr_core(void) return 0; } -static int setup_perfctr_nb(void) -{ - if (!cpu_has_perfctr_nb) - return -ENODEV; - - x86_pmu.num_counters += AMD64_NUM_COUNTERS_NB; - - if (cpu_has_perfctr_core) - amd_nb_event_constraint = &amd_NBPMC96; - else - amd_nb_event_constraint = &amd_NBPMC74; - - printk(KERN_INFO "perf: AMD northbridge performance counters detected\n"); - - return 0; -} - __init int amd_pmu_init(void) { /* Performance-monitoring supported from K7 and later: */ @@ -817,7 +690,6 @@ __init int amd_pmu_init(void) setup_event_constraints(); setup_perfctr_core(); - setup_perfctr_nb(); /* Events are common for all AMDs */ memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, diff --git a/arch/x86/kernel/cpu/perf_event_amd_uncore.c b/arch/x86/kernel/cpu/perf_event_amd_uncore.c new file mode 100644 index 00000000000..c0c661adf03 --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_amd_uncore.c @@ -0,0 +1,547 @@ +/* + * Copyright (C) 2013 Advanced Micro Devices, Inc. + * + * Author: Jacob Shin <jacob.shin@amd.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/perf_event.h> +#include <linux/percpu.h> +#include <linux/types.h> +#include <linux/slab.h> +#include <linux/init.h> +#include <linux/cpu.h> +#include <linux/cpumask.h> + +#include <asm/cpufeature.h> +#include <asm/perf_event.h> +#include <asm/msr.h> + +#define NUM_COUNTERS_NB 4 +#define NUM_COUNTERS_L2 4 +#define MAX_COUNTERS NUM_COUNTERS_NB + +#define RDPMC_BASE_NB 6 +#define RDPMC_BASE_L2 10 + +#define COUNTER_SHIFT 16 + +struct amd_uncore { + int id; + int refcnt; + int cpu; + int num_counters; + int rdpmc_base; + u32 msr_base; + cpumask_t *active_mask; + struct pmu *pmu; + struct perf_event *events[MAX_COUNTERS]; + struct amd_uncore *free_when_cpu_online; +}; + +static struct amd_uncore * __percpu *amd_uncore_nb; +static struct amd_uncore * __percpu *amd_uncore_l2; + +static struct pmu amd_nb_pmu; +static struct pmu amd_l2_pmu; + +static cpumask_t amd_nb_active_mask; +static cpumask_t amd_l2_active_mask; + +static bool is_nb_event(struct perf_event *event) +{ + return event->pmu->type == amd_nb_pmu.type; +} + +static bool is_l2_event(struct perf_event *event) +{ + return event->pmu->type == amd_l2_pmu.type; +} + +static struct amd_uncore *event_to_amd_uncore(struct perf_event *event) +{ + if (is_nb_event(event) && amd_uncore_nb) + return *per_cpu_ptr(amd_uncore_nb, event->cpu); + else if (is_l2_event(event) && amd_uncore_l2) + return *per_cpu_ptr(amd_uncore_l2, event->cpu); + + return NULL; +} + +static void amd_uncore_read(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + u64 prev, new; + s64 delta; + + /* + * since we do not enable counter overflow interrupts, + * we do not have to worry about prev_count changing on us + */ + + prev = local64_read(&hwc->prev_count); + rdpmcl(hwc->event_base_rdpmc, new); + local64_set(&hwc->prev_count, new); + delta = (new << COUNTER_SHIFT) - (prev << COUNTER_SHIFT); + delta >>= COUNTER_SHIFT; + local64_add(delta, &event->count); +} + +static void amd_uncore_start(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + + if (flags & PERF_EF_RELOAD) + wrmsrl(hwc->event_base, (u64)local64_read(&hwc->prev_count)); + + hwc->state = 0; + wrmsrl(hwc->config_base, (hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE)); + perf_event_update_userpage(event); +} + +static void amd_uncore_stop(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + + wrmsrl(hwc->config_base, hwc->config); + hwc->state |= PERF_HES_STOPPED; + + if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { + amd_uncore_read(event); + hwc->state |= PERF_HES_UPTODATE; + } +} + +static int amd_uncore_add(struct perf_event *event, int flags) +{ + int i; + struct amd_uncore *uncore = event_to_amd_uncore(event); + struct hw_perf_event *hwc = &event->hw; + + /* are we already assigned? */ + if (hwc->idx != -1 && uncore->events[hwc->idx] == event) + goto out; + + for (i = 0; i < uncore->num_counters; i++) { + if (uncore->events[i] == event) { + hwc->idx = i; + goto out; + } + } + + /* if not, take the first available counter */ + hwc->idx = -1; + for (i = 0; i < uncore->num_counters; i++) { + if (cmpxchg(&uncore->events[i], NULL, event) == NULL) { + hwc->idx = i; + break; + } + } + +out: + if (hwc->idx == -1) + return -EBUSY; + + hwc->config_base = uncore->msr_base + (2 * hwc->idx); + hwc->event_base = uncore->msr_base + 1 + (2 * hwc->idx); + hwc->event_base_rdpmc = uncore->rdpmc_base + hwc->idx; + hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; + + if (flags & PERF_EF_START) + amd_uncore_start(event, PERF_EF_RELOAD); + + return 0; +} + +static void amd_uncore_del(struct perf_event *event, int flags) +{ + int i; + struct amd_uncore *uncore = event_to_amd_uncore(event); + struct hw_perf_event *hwc = &event->hw; + + amd_uncore_stop(event, PERF_EF_UPDATE); + + for (i = 0; i < uncore->num_counters; i++) { + if (cmpxchg(&uncore->events[i], event, NULL) == event) + break; + } + + hwc->idx = -1; +} + +static int amd_uncore_event_init(struct perf_event *event) +{ + struct amd_uncore *uncore; + struct hw_perf_event *hwc = &event->hw; + + if (event->attr.type != event->pmu->type) + return -ENOENT; + + /* + * NB and L2 counters (MSRs) are shared across all cores that share the + * same NB / L2 cache. Interrupts can be directed to a single target + * core, however, event counts generated by processes running on other + * cores cannot be masked out. So we do not support sampling and + * per-thread events. + */ + if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK) + return -EINVAL; + + /* NB and L2 counters do not have usr/os/guest/host bits */ + if (event->attr.exclude_user || event->attr.exclude_kernel || + event->attr.exclude_host || event->attr.exclude_guest) + return -EINVAL; + + /* and we do not enable counter overflow interrupts */ + hwc->config = event->attr.config & AMD64_RAW_EVENT_MASK_NB; + hwc->idx = -1; + + if (event->cpu < 0) + return -EINVAL; + + uncore = event_to_amd_uncore(event); + if (!uncore) + return -ENODEV; + + /* + * since request can come in to any of the shared cores, we will remap + * to a single common cpu. + */ + event->cpu = uncore->cpu; + + return 0; +} + +static ssize_t amd_uncore_attr_show_cpumask(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + int n; + cpumask_t *active_mask; + struct pmu *pmu = dev_get_drvdata(dev); + + if (pmu->type == amd_nb_pmu.type) + active_mask = &amd_nb_active_mask; + else if (pmu->type == amd_l2_pmu.type) + active_mask = &amd_l2_active_mask; + else + return 0; + + n = cpulist_scnprintf(buf, PAGE_SIZE - 2, active_mask); + buf[n++] = '\n'; + buf[n] = '\0'; + return n; +} +static DEVICE_ATTR(cpumask, S_IRUGO, amd_uncore_attr_show_cpumask, NULL); + +static struct attribute *amd_uncore_attrs[] = { + &dev_attr_cpumask.attr, + NULL, +}; + +static struct attribute_group amd_uncore_attr_group = { + .attrs = amd_uncore_attrs, +}; + +PMU_FORMAT_ATTR(event, "config:0-7,32-35"); +PMU_FORMAT_ATTR(umask, "config:8-15"); + +static struct attribute *amd_uncore_format_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + NULL, +}; + +static struct attribute_group amd_uncore_format_group = { + .name = "format", + .attrs = amd_uncore_format_attr, +}; + +static const struct attribute_group *amd_uncore_attr_groups[] = { + &amd_uncore_attr_group, + &amd_uncore_format_group, + NULL, +}; + +static struct pmu amd_nb_pmu = { + .attr_groups = amd_uncore_attr_groups, + .name = "amd_nb", + .event_init = amd_uncore_event_init, + .add = amd_uncore_add, + .del = amd_uncore_del, + .start = amd_uncore_start, + .stop = amd_uncore_stop, + .read = amd_uncore_read, +}; + +static struct pmu amd_l2_pmu = { + .attr_groups = amd_uncore_attr_groups, + .name = "amd_l2", + .event_init = amd_uncore_event_init, + .add = amd_uncore_add, + .del = amd_uncore_del, + .start = amd_uncore_start, + .stop = amd_uncore_stop, + .read = amd_uncore_read, +}; + +static struct amd_uncore * __cpuinit amd_uncore_alloc(unsigned int cpu) +{ + return kzalloc_node(sizeof(struct amd_uncore), GFP_KERNEL, + cpu_to_node(cpu)); +} + +static void __cpuinit amd_uncore_cpu_up_prepare(unsigned int cpu) +{ + struct amd_uncore *uncore; + + if (amd_uncore_nb) { + uncore = amd_uncore_alloc(cpu); + uncore->cpu = cpu; + uncore->num_counters = NUM_COUNTERS_NB; + uncore->rdpmc_base = RDPMC_BASE_NB; + uncore->msr_base = MSR_F15H_NB_PERF_CTL; + uncore->active_mask = &amd_nb_active_mask; + uncore->pmu = &amd_nb_pmu; + *per_cpu_ptr(amd_uncore_nb, cpu) = uncore; + } + + if (amd_uncore_l2) { + uncore = amd_uncore_alloc(cpu); + uncore->cpu = cpu; + uncore->num_counters = NUM_COUNTERS_L2; + uncore->rdpmc_base = RDPMC_BASE_L2; + uncore->msr_base = MSR_F16H_L2I_PERF_CTL; + uncore->active_mask = &amd_l2_active_mask; + uncore->pmu = &amd_l2_pmu; + *per_cpu_ptr(amd_uncore_l2, cpu) = uncore; + } +} + +static struct amd_uncore * +__cpuinit amd_uncore_find_online_sibling(struct amd_uncore *this, + struct amd_uncore * __percpu *uncores) +{ + unsigned int cpu; + struct amd_uncore *that; + + for_each_online_cpu(cpu) { + that = *per_cpu_ptr(uncores, cpu); + + if (!that) + continue; + + if (this == that) + continue; + + if (this->id == that->id) { + that->free_when_cpu_online = this; + this = that; + break; + } + } + + this->refcnt++; + return this; +} + +static void __cpuinit amd_uncore_cpu_starting(unsigned int cpu) +{ + unsigned int eax, ebx, ecx, edx; + struct amd_uncore *uncore; + + if (amd_uncore_nb) { + uncore = *per_cpu_ptr(amd_uncore_nb, cpu); + cpuid(0x8000001e, &eax, &ebx, &ecx, &edx); + uncore->id = ecx & 0xff; + + uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_nb); + *per_cpu_ptr(amd_uncore_nb, cpu) = uncore; + } + + if (amd_uncore_l2) { + unsigned int apicid = cpu_data(cpu).apicid; + unsigned int nshared; + + uncore = *per_cpu_ptr(amd_uncore_l2, cpu); + cpuid_count(0x8000001d, 2, &eax, &ebx, &ecx, &edx); + nshared = ((eax >> 14) & 0xfff) + 1; + uncore->id = apicid - (apicid % nshared); + + uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_l2); + *per_cpu_ptr(amd_uncore_l2, cpu) = uncore; + } +} + +static void __cpuinit uncore_online(unsigned int cpu, + struct amd_uncore * __percpu *uncores) +{ + struct amd_uncore *uncore = *per_cpu_ptr(uncores, cpu); + + kfree(uncore->free_when_cpu_online); + uncore->free_when_cpu_online = NULL; + + if (cpu == uncore->cpu) + cpumask_set_cpu(cpu, uncore->active_mask); +} + +static void __cpuinit amd_uncore_ |