diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-05-22 18:18:55 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-05-22 18:18:55 -0700 |
commit | 2ff2b289a695807e291e1ed9f639d8a3ba5f4254 (patch) | |
tree | e4b7f44e5cc1582ba2be8aeba221f4841f4c86a6 /arch | |
parent | 88d6ae8dc33af12fe1c7941b1fae2767374046fd (diff) | |
parent | 73787190d04a34e6da745da893b3ae8bedde418f (diff) |
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf changes from Ingo Molnar:
"Lots of changes:
- (much) improved assembly annotation support in perf report, with
jump visualization, searching, navigation, visual output
improvements and more.
- kernel support for AMD IBS PMU hardware features. Notably 'perf
record -e cycles:p' and 'perf top -e cycles:p' should work without
skid now, like PEBS does on the Intel side, because it takes
advantage of IBS transparently.
- the libtracevents library: it is the first step towards unifying
tracing tooling and perf, and it also gives a tracing library for
external tools like powertop to rely on.
- infrastructure: various improvements and refactoring of the UI
modules and related code
- infrastructure: cleanup and simplification of the profiling
targets code (--uid, --pid, --tid, --cpu, --all-cpus, etc.)
- tons of robustness fixes all around
- various ftrace updates: speedups, cleanups, robustness
improvements.
- typing 'make' in tools/ will now give you a menu of projects to
build and a short help text to explain what each does.
- ... and lots of other changes I forgot to list.
The perf record make bzImage + perf report regression you reported
should be fixed."
* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (166 commits)
tracing: Remove kernel_lock annotations
tracing: Fix initial buffer_size_kb state
ring-buffer: Merge separate resize loops
perf evsel: Create events initially disabled -- again
perf tools: Split term type into value type and term type
perf hists: Fix callchain ip printf format
perf target: Add uses_mmap field
ftrace: Remove selecting FRAME_POINTER with FUNCTION_TRACER
ftrace/x86: Have x86 ftrace use the ftrace_modify_all_code()
ftrace: Make ftrace_modify_all_code() global for archs to use
ftrace: Return record ip addr for ftrace_location()
ftrace: Consolidate ftrace_location() and ftrace_text_reserved()
ftrace: Speed up search by skipping pages by address
ftrace: Remove extra helper functions
ftrace: Sort all function addresses, not just per page
tracing: change CPU ring buffer state from tracing_cpumask
tracing: Check return value of tracing_dentry_percpu()
ring-buffer: Reset head page before running self test
ring-buffer: Add integrity check at end of iter read
ring-buffer: Make addition of pages in ring buffer atomic
...
Diffstat (limited to 'arch')
27 files changed, 954 insertions, 232 deletions
diff --git a/arch/alpha/kernel/perf_event.c b/arch/alpha/kernel/perf_event.c index 0dae252f7a3..d821b17047e 100644 --- a/arch/alpha/kernel/perf_event.c +++ b/arch/alpha/kernel/perf_event.c @@ -824,7 +824,6 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr, idx = la_ptr; - perf_sample_data_init(&data, 0); for (j = 0; j < cpuc->n_events; j++) { if (cpuc->current_idx[j] == idx) break; @@ -848,7 +847,7 @@ static void alpha_perf_event_irq_handler(unsigned long la_ptr, hwc = &event->hw; alpha_perf_event_update(event, hwc, idx, alpha_pmu->pmc_max_period[idx]+1); - data.period = event->hw.last_period; + perf_sample_data_init(&data, 0, hwc->last_period); if (alpha_perf_event_set_period(event, hwc, idx)) { if (perf_event_overflow(event, &data, regs)) { diff --git a/arch/arm/configs/bcmring_defconfig b/arch/arm/configs/bcmring_defconfig index 795374d48f8..9e6a8fe1316 100644 --- a/arch/arm/configs/bcmring_defconfig +++ b/arch/arm/configs/bcmring_defconfig @@ -11,7 +11,7 @@ CONFIG_KALLSYMS_EXTRA_PASS=y # CONFIG_TIMERFD is not set # CONFIG_EVENTFD is not set # CONFIG_AIO is not set -CONFIG_PERF_COUNTERS=y +CONFIG_PERF_EVENTS=y # CONFIG_VM_EVENT_COUNTERS is not set # CONFIG_SLUB_DEBUG is not set # CONFIG_COMPAT_BRK is not set diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c index b78af0cc6ef..ab627a740fa 100644 --- a/arch/arm/kernel/perf_event_v6.c +++ b/arch/arm/kernel/perf_event_v6.c @@ -489,8 +489,6 @@ armv6pmu_handle_irq(int irq_num, */ armv6_pmcr_write(pmcr); - perf_sample_data_init(&data, 0); - cpuc = &__get_cpu_var(cpu_hw_events); for (idx = 0; idx < cpu_pmu->num_events; ++idx) { struct perf_event *event = cpuc->events[idx]; @@ -509,7 +507,7 @@ armv6pmu_handle_irq(int irq_num, hwc = &event->hw; armpmu_event_update(event, hwc, idx); - data.period = event->hw.last_period; + perf_sample_data_init(&data, 0, hwc->last_period); if (!armpmu_event_set_period(event, hwc, idx)) continue; diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c index 00755d82e2f..d3c53606816 100644 --- a/arch/arm/kernel/perf_event_v7.c +++ b/arch/arm/kernel/perf_event_v7.c @@ -1077,8 +1077,6 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev) */ regs = get_irq_regs(); - perf_sample_data_init(&data, 0); - cpuc = &__get_cpu_var(cpu_hw_events); for (idx = 0; idx < cpu_pmu->num_events; ++idx) { struct perf_event *event = cpuc->events[idx]; @@ -1097,7 +1095,7 @@ static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev) hwc = &event->hw; armpmu_event_update(event, hwc, idx); - data.period = event->hw.last_period; + perf_sample_data_init(&data, 0, hwc->last_period); if (!armpmu_event_set_period(event, hwc, idx)) continue; diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c index 71a21e6712f..e34e7254e65 100644 --- a/arch/arm/kernel/perf_event_xscale.c +++ b/arch/arm/kernel/perf_event_xscale.c @@ -248,8 +248,6 @@ xscale1pmu_handle_irq(int irq_num, void *dev) regs = get_irq_regs(); - perf_sample_data_init(&data, 0); - cpuc = &__get_cpu_var(cpu_hw_events); for (idx = 0; idx < cpu_pmu->num_events; ++idx) { struct perf_event *event = cpuc->events[idx]; @@ -263,7 +261,7 @@ xscale1pmu_handle_irq(int irq_num, void *dev) hwc = &event->hw; armpmu_event_update(event, hwc, idx); - data.period = event->hw.last_period; + perf_sample_data_init(&data, 0, hwc->last_period); if (!armpmu_event_set_period(event, hwc, idx)) continue; @@ -588,8 +586,6 @@ xscale2pmu_handle_irq(int irq_num, void *dev) regs = get_irq_regs(); - perf_sample_data_init(&data, 0); - cpuc = &__get_cpu_var(cpu_hw_events); for (idx = 0; idx < cpu_pmu->num_events; ++idx) { struct perf_event *event = cpuc->events[idx]; @@ -603,7 +599,7 @@ xscale2pmu_handle_irq(int irq_num, void *dev) hwc = &event->hw; armpmu_event_update(event, hwc, idx); - data.period = event->hw.last_period; + perf_sample_data_init(&data, 0, hwc->last_period); if (!armpmu_event_set_period(event, hwc, idx)) continue; diff --git a/arch/mips/kernel/perf_event_mipsxx.c b/arch/mips/kernel/perf_event_mipsxx.c index 811084f4e42..ab73fa2fb9b 100644 --- a/arch/mips/kernel/perf_event_mipsxx.c +++ b/arch/mips/kernel/perf_event_mipsxx.c @@ -1325,7 +1325,7 @@ static int mipsxx_pmu_handle_shared_irq(void) regs = get_irq_regs(); - perf_sample_data_init(&data, 0); + perf_sample_data_init(&data, 0, 0); switch (counters) { #define HANDLE_COUNTER(n) \ diff --git a/arch/powerpc/configs/chroma_defconfig b/arch/powerpc/configs/chroma_defconfig index f104ccde6b5..b1f9597fe31 100644 --- a/arch/powerpc/configs/chroma_defconfig +++ b/arch/powerpc/configs/chroma_defconfig @@ -32,7 +32,7 @@ CONFIG_RD_LZMA=y CONFIG_INITRAMFS_COMPRESSION_GZIP=y CONFIG_KALLSYMS_ALL=y CONFIG_EMBEDDED=y -CONFIG_PERF_COUNTERS=y +CONFIG_PERF_EVENTS=y CONFIG_PROFILING=y CONFIG_OPROFILE=y CONFIG_KPROBES=y diff --git a/arch/powerpc/configs/gamecube_defconfig b/arch/powerpc/configs/gamecube_defconfig index e74d3a48370..9ef2cc13e1b 100644 --- a/arch/powerpc/configs/gamecube_defconfig +++ b/arch/powerpc/configs/gamecube_defconfig @@ -8,7 +8,7 @@ CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y # CONFIG_ELF_CORE is not set -CONFIG_PERF_COUNTERS=y +CONFIG_PERF_EVENTS=y # CONFIG_VM_EVENT_COUNTERS is not set CONFIG_SLAB=y CONFIG_MODULES=y diff --git a/arch/powerpc/configs/wii_defconfig b/arch/powerpc/configs/wii_defconfig index 175295fbf4f..1e2b7d062aa 100644 --- a/arch/powerpc/configs/wii_defconfig +++ b/arch/powerpc/configs/wii_defconfig @@ -9,7 +9,7 @@ CONFIG_BLK_DEV_INITRD=y # CONFIG_CC_OPTIMIZE_FOR_SIZE is not set CONFIG_EXPERT=y # CONFIG_ELF_CORE is not set -CONFIG_PERF_COUNTERS=y +CONFIG_PERF_EVENTS=y # CONFIG_VM_EVENT_COUNTERS is not set CONFIG_SLAB=y CONFIG_MODULES=y diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 02aee03e713..8f84bcba18d 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -1299,8 +1299,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val, if (record) { struct perf_sample_data data; - perf_sample_data_init(&data, ~0ULL); - data.period = event->hw.last_period; + perf_sample_data_init(&data, ~0ULL, event->hw.last_period); if (event->attr.sample_type & PERF_SAMPLE_ADDR) perf_get_data_addr(regs, &data.addr); diff --git a/arch/powerpc/perf/core-fsl-emb.c b/arch/powerpc/perf/core-fsl-emb.c index 0a6d2a9d569..106c5335467 100644 --- a/arch/powerpc/perf/core-fsl-emb.c +++ b/arch/powerpc/perf/core-fsl-emb.c @@ -613,8 +613,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val, if (record) { struct perf_sample_data data; - perf_sample_data_init(&data, 0); - data.period = event->hw.last_period; + perf_sample_data_init(&data, 0, event->hw.last_period); if (perf_event_overflow(event, &data, regs)) fsl_emb_pmu_stop(event, 0); diff --git a/arch/sh/configs/sh7785lcr_32bit_defconfig b/arch/sh/configs/sh7785lcr_32bit_defconfig index 7b9c696ac5e..9bdcf72ec06 100644 --- a/arch/sh/configs/sh7785lcr_32bit_defconfig +++ b/arch/sh/configs/sh7785lcr_32bit_defconfig @@ -5,7 +5,7 @@ CONFIG_BSD_PROCESS_ACCT=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=16 -CONFIG_PERF_COUNTERS=y +CONFIG_PERF_EVENTS=y # CONFIG_COMPAT_BRK is not set CONFIG_SLAB=y CONFIG_PROFILING=y diff --git a/arch/sparc/configs/sparc64_defconfig b/arch/sparc/configs/sparc64_defconfig index 3c1e8580740..9d8521b8c85 100644 --- a/arch/sparc/configs/sparc64_defconfig +++ b/arch/sparc/configs/sparc64_defconfig @@ -5,7 +5,7 @@ CONFIG_SYSVIPC=y CONFIG_POSIX_MQUEUE=y CONFIG_LOG_BUF_SHIFT=18 CONFIG_BLK_DEV_INITRD=y -CONFIG_PERF_COUNTERS=y +CONFIG_PERF_EVENTS=y # CONFIG_COMPAT_BRK is not set CONFIG_SLAB=y CONFIG_PROFILING=y diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c index 28559ce5eeb..5713957dcb8 100644 --- a/arch/sparc/kernel/perf_event.c +++ b/arch/sparc/kernel/perf_event.c @@ -1296,8 +1296,6 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self, regs = args->regs; - perf_sample_data_init(&data, 0); - cpuc = &__get_cpu_var(cpu_hw_events); /* If the PMU has the TOE IRQ enable bits, we need to do a @@ -1321,7 +1319,7 @@ static int __kprobes perf_event_nmi_handler(struct notifier_block *self, if (val & (1ULL << 31)) continue; - data.period = event->hw.last_period; + perf_sample_data_init(&data, 0, hwc->last_period); if (!sparc_perf_event_set_period(event, hwc, idx)) continue; diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 2787fbec7ae..7b383d8da7b 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -40,7 +40,6 @@ config X86 select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_GRAPH_FP_TEST select HAVE_FUNCTION_TRACE_MCOUNT_TEST - select HAVE_FTRACE_NMI_ENTER if DYNAMIC_FTRACE select HAVE_SYSCALL_TRACEPOINTS select HAVE_KVM select HAVE_ARCH_KGDB diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index 268c783ab1c..18d9005d9e4 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -34,6 +34,7 @@ #ifndef __ASSEMBLY__ extern void mcount(void); +extern int modifying_ftrace_code; static inline unsigned long ftrace_call_adjust(unsigned long addr) { @@ -50,6 +51,8 @@ struct dyn_arch_ftrace { /* No extra data needed for x86 */ }; +int ftrace_int3_handler(struct pt_regs *regs); + #endif /* CONFIG_DYNAMIC_FTRACE */ #endif /* __ASSEMBLY__ */ #endif /* CONFIG_FUNCTION_TRACER */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index ccb805966f6..957ec87385a 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -134,6 +134,8 @@ #define MSR_AMD64_IBSFETCHCTL 0xc0011030 #define MSR_AMD64_IBSFETCHLINAD 0xc0011031 #define MSR_AMD64_IBSFETCHPHYSAD 0xc0011032 +#define MSR_AMD64_IBSFETCH_REG_COUNT 3 +#define MSR_AMD64_IBSFETCH_REG_MASK ((1UL<<MSR_AMD64_IBSFETCH_REG_COUNT)-1) #define MSR_AMD64_IBSOPCTL 0xc0011033 #define MSR_AMD64_IBSOPRIP 0xc0011034 #define MSR_AMD64_IBSOPDATA 0xc0011035 @@ -141,8 +143,11 @@ #define MSR_AMD64_IBSOPDATA3 0xc0011037 #define MSR_AMD64_IBSDCLINAD 0xc0011038 #define MSR_AMD64_IBSDCPHYSAD 0xc0011039 +#define MSR_AMD64_IBSOP_REG_COUNT 7 +#define MSR_AMD64_IBSOP_REG_MASK ((1UL<<MSR_AMD64_IBSOP_REG_COUNT)-1) #define MSR_AMD64_IBSCTL 0xc001103a #define MSR_AMD64_IBSBRTARGET 0xc001103b +#define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */ /* Fam 15h MSRs */ #define MSR_F15H_PERF_CTL 0xc0010200 diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 2291895b183..588f52ea810 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -158,6 +158,7 @@ struct x86_pmu_capability { #define IBS_CAPS_OPCNT (1U<<4) #define IBS_CAPS_BRNTRGT (1U<<5) #define IBS_CAPS_OPCNTEXT (1U<<6) +#define IBS_CAPS_RIPINVALIDCHK (1U<<7) #define IBS_CAPS_DEFAULT (IBS_CAPS_AVAIL \ | IBS_CAPS_FETCHSAM \ @@ -170,21 +171,28 @@ struct x86_pmu_capability { #define IBSCTL_LVT_OFFSET_VALID (1ULL<<8) #define IBSCTL_LVT_OFFSET_MASK 0x0F -/* IbsFetchCtl bits/masks */ +/* ibs fetch bits/masks */ #define IBS_FETCH_RAND_EN (1ULL<<57) #define IBS_FETCH_VAL (1ULL<<49) #define IBS_FETCH_ENABLE (1ULL<<48) #define IBS_FETCH_CNT 0xFFFF0000ULL #define IBS_FETCH_MAX_CNT 0x0000FFFFULL -/* IbsOpCtl bits */ +/* ibs op bits/masks */ +/* lower 4 bits of the current count are ignored: */ +#define IBS_OP_CUR_CNT (0xFFFF0ULL<<32) #define IBS_OP_CNT_CTL (1ULL<<19) #define IBS_OP_VAL (1ULL<<18) #define IBS_OP_ENABLE (1ULL<<17) #define IBS_OP_MAX_CNT 0x0000FFFFULL #define IBS_OP_MAX_CNT_EXT 0x007FFFFFULL /* not a register bit mask */ +#define IBS_RIP_INVALID (1ULL<<38) +#ifdef CONFIG_X86_LOCAL_APIC extern u32 get_ibs_caps(void); +#else +static inline u32 get_ibs_caps(void) { return 0; } +#endif #ifdef CONFIG_PERF_EVENTS extern void perf_events_lapic_init(void); diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index bb8e03407e1..e049d6da018 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -484,9 +484,6 @@ static int __x86_pmu_event_init(struct perf_event *event) /* mark unused */ event->hw.extra_reg.idx = EXTRA_REG_NONE; - - /* mark not used */ - event->hw.extra_reg.idx = EXTRA_REG_NONE; event->hw.branch_reg.idx = EXTRA_REG_NONE; return x86_pmu.hw_config(event); @@ -1186,8 +1183,6 @@ int x86_pmu_handle_irq(struct pt_regs *regs) int idx, handled = 0; u64 val; - perf_sample_data_init(&data, 0); - cpuc = &__get_cpu_var(cpu_hw_events); /* @@ -1222,7 +1217,7 @@ int x86_pmu_handle_irq(struct pt_regs *regs) * event overflow */ handled++; - data.period = event->hw.last_period; + perf_sample_data_init(&data, 0, event->hw.last_period); if (!x86_perf_event_set_period(event)) continue; diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index 95e7fe1c5f0..65652265fff 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c @@ -134,8 +134,13 @@ static u64 amd_pmu_event_map(int hw_event) static int amd_pmu_hw_config(struct perf_event *event) { - int ret = x86_pmu_hw_config(event); + int ret; + /* pass precise event sampling to ibs: */ + if (event->attr.precise_ip && get_ibs_caps()) + return -ENOENT; + + ret = x86_pmu_hw_config(event); if (ret) return ret; @@ -205,10 +210,8 @@ static void amd_put_event_constraints(struct cpu_hw_events *cpuc, * when we come here */ for (i = 0; i < x86_pmu.num_counters; i++) { - if (nb->owners[i] == event) { - cmpxchg(nb->owners+i, event, NULL); + if (cmpxchg(nb->owners + i, event, NULL) == event) break; - } } } diff --git a/arch/x86/kernel/cpu/perf_event_amd_ibs.c b/arch/x86/kernel/cpu/perf_event_amd_ibs.c index 3b8a2d30d14..da9bcdcd985 100644 --- a/arch/x86/kernel/cpu/perf_event_amd_ibs.c +++ b/arch/x86/kernel/cpu/perf_event_amd_ibs.c @@ -9,6 +9,7 @@ #include <linux/perf_event.h> #include <linux/module.h> #include <linux/pci.h> +#include <linux/ptrace.h> #include <asm/apic.h> @@ -16,36 +17,591 @@ static u32 ibs_caps; #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD) -static struct pmu perf_ibs; +#include <linux/kprobes.h> +#include <linux/hardirq.h> + +#include <asm/nmi.h> + +#define IBS_FETCH_CONFIG_MASK (IBS_FETCH_RAND_EN | IBS_FETCH_MAX_CNT) +#define IBS_OP_CONFIG_MASK IBS_OP_MAX_CNT + +enum ibs_states { + IBS_ENABLED = 0, + IBS_STARTED = 1, + IBS_STOPPING = 2, + + IBS_MAX_STATES, +}; + +struct cpu_perf_ibs { + struct perf_event *event; + unsigned long state[BITS_TO_LONGS(IBS_MAX_STATES)]; +}; + +struct perf_ibs { + struct pmu pmu; + unsigned int msr; + u64 config_mask; + u64 cnt_mask; + u64 enable_mask; + u64 valid_mask; + u64 max_period; + unsigned long offset_mask[1]; + int offset_max; + struct cpu_perf_ibs __percpu *pcpu; + u64 (*get_count)(u64 config); +}; + +struct perf_ibs_data { + u32 size; + union { + u32 data[0]; /* data buffer starts here */ + u32 caps; + }; + u64 regs[MSR_AMD64_IBS_REG_COUNT_MAX]; +}; + +static int +perf_event_set_period(struct hw_perf_event *hwc, u64 min, u64 max, u64 *hw_period) +{ + s64 left = local64_read(&hwc->period_left); + s64 period = hwc->sample_period; + int overflow = 0; + + /* + * If we are way outside a reasonable range then just skip forward: + */ + if (unlikely(left <= -period)) { + left = period; + local64_set(&hwc->period_left, left); + hwc->last_period = period; + overflow = 1; + } + + if (unlikely(left < (s64)min)) { + left += period; + local64_set(&hwc->period_left, left); + hwc->last_period = period; + overflow = 1; + } + + /* + * If the hw period that triggers the sw overflow is too short + * we might hit the irq handler. This biases the results. + * Thus we shorten the next-to-last period and set the last + * period to the max period. + */ + if (left > max) { + left -= max; + if (left > max) + left = max; + else if (left < min) + left = min; + } + + *hw_period = (u64)left; + + return overflow; +} + +static int +perf_event_try_update(struct perf_event *event, u64 new_raw_count, int width) +{ + struct hw_perf_event *hwc = &event->hw; + int shift = 64 - width; + u64 prev_raw_count; + u64 delta; + + /* + * Careful: an NMI might modify the previous event value. + * + * Our tactic to handle this is to first atomically read and + * exchange a new raw count - then add that new-prev delta + * count to the generic event atomically: + */ + prev_raw_count = local64_read(&hwc->prev_count); + if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, + new_raw_count) != prev_raw_count) + return 0; + + /* + * Now we have the new raw value and have updated the prev + * timestamp already. We can now calculate the elapsed delta + * (event-)time and add that to the generic event. + * + * Careful, not all hw sign-extends above the physical width + * of the count. + */ + delta = (new_raw_count << shift) - (prev_raw_count << shift); + delta >>= shift; + + local64_add(delta, &event->count); + local64_sub(delta, &hwc->period_left); + + return 1; +} + +static struct perf_ibs perf_ibs_fetch; +static struct perf_ibs perf_ibs_op; + +static struct perf_ibs *get_ibs_pmu(int type) +{ + if (perf_ibs_fetch.pmu.type == type) + return &perf_ibs_fetch; + if (perf_ibs_op.pmu.type == type) + return &perf_ibs_op; + return NULL; +} + +/* + * Use IBS for precise event sampling: + * + * perf record -a -e cpu-cycles:p ... # use ibs op counting cycle count + * perf record -a -e r076:p ... # same as -e cpu-cycles:p + * perf record -a -e r0C1:p ... # use ibs op counting micro-ops + * + * IbsOpCntCtl (bit 19) of IBS Execution Control Register (IbsOpCtl, + * MSRC001_1033) is used to select either cycle or micro-ops counting + * mode. + * + * The rip of IBS samples has skid 0. Thus, IBS supports precise + * levels 1 and 2 and the PERF_EFLAGS_EXACT is set. In rare cases the + * rip is invalid when IBS was not able to record the rip correctly. + * We clear PERF_EFLAGS_EXACT and take the rip from pt_regs then. + * + */ +static int perf_ibs_precise_event(struct perf_event *event, u64 *config) +{ + switch (event->attr.precise_ip) { + case 0: + return -ENOENT; + case 1: + case 2: + break; + default: + return -EOPNOTSUPP; + } + + switch (event->attr.type) { + case PERF_TYPE_HARDWARE: + switch (event->attr.config) { + case PERF_COUNT_HW_CPU_CYCLES: + *config = 0; + return 0; + } + break; + case PERF_TYPE_RAW: + switch (event->attr.config) { + case 0x0076: + *config = 0; + return 0; + case 0x00C1: + *config = IBS_OP_CNT_CTL; + return 0; + } + break; + default: + return -ENOENT; + } + + return -EOPNOTSUPP; +} static int perf_ibs_init(struct perf_event *event) { - if (perf_ibs.type != event->attr.type) + struct hw_perf_event *hwc = &event->hw; + struct perf_ibs *perf_ibs; + u64 max_cnt, config; + int ret; + + perf_ibs = get_ibs_pmu(event->attr.type); + if (perf_ibs) { + config = event->attr.config; + } else { + perf_ibs = &perf_ibs_op; + ret = perf_ibs_precise_event(event, &config); + if (ret) + return ret; + } + + if (event->pmu != &perf_ibs->pmu) return -ENOENT; + + if (config & ~perf_ibs->config_mask) + return -EINVAL; + + if (hwc->sample_period) { + if (config & perf_ibs->cnt_mask) + /* raw max_cnt may not be set */ + return -EINVAL; + if (!event->attr.sample_freq && hwc->sample_period & 0x0f) + /* + * lower 4 bits can not be set in ibs max cnt, + * but allowing it in case we adjust the + * sample period to set a frequency. + */ + return -EINVAL; + hwc->sample_period &= ~0x0FULL; + if (!hwc->sample_period) + hwc->sample_period = 0x10; + } else { + max_cnt = config & perf_ibs->cnt_mask; + config &= ~perf_ibs->cnt_mask; + event->attr.sample_period = max_cnt << 4; + hwc->sample_period = event->attr.sample_period; + } + + if (!hwc->sample_period) + return -EINVAL; + + /* + * If we modify hwc->sample_period, we also need to update + * hwc->last_period and hwc->period_left. + */ + hwc->last_period = hwc->sample_period; + local64_set(&hwc->period_left, hwc->sample_period); + + hwc->config_base = perf_ibs->msr; + hwc->config = config; + return 0; } +static int perf_ibs_set_period(struct perf_ibs *perf_ibs, + struct hw_perf_event *hwc, u64 *period) +{ + int overflow; + + /* ignore lower 4 bits in min count: */ + overflow = perf_event_set_period(hwc, 1<<4, perf_ibs->max_period, period); + local64_set(&hwc->prev_count, 0); + + return overflow; +} + +static u64 get_ibs_fetch_count(u64 config) +{ + return (config & IBS_FETCH_CNT) >> 12; +} + +static u64 get_ibs_op_count(u64 config) +{ + u64 count = 0; + + if (config & IBS_OP_VAL) + count += (config & IBS_OP_MAX_CNT) << 4; /* cnt rolled over */ + + if (ibs_caps & IBS_CAPS_RDWROPCNT) + count += (config & IBS_OP_CUR_CNT) >> 32; + + return count; +} + +static void +perf_ibs_event_update(struct perf_ibs *perf_ibs, struct perf_event *event, + u64 *config) +{ + u64 count = perf_ibs->get_count(*config); + + /* + * Set width to 64 since we do not overflow on max width but + * instead on max count. In perf_ibs_set_period() we clear + * prev count manually on overflow. + */ + while (!perf_event_try_update(event, count, 64)) { + rdmsrl(event->hw.config_base, *config); + count = perf_ibs->get_count(*config); + } +} + +static inline void perf_ibs_enable_event(struct perf_ibs *perf_ibs, + struct hw_perf_event *hwc, u64 config) +{ + wrmsrl(hwc->config_base, hwc->config | config | perf_ibs->enable_mask); +} + +/* + * Erratum #420 Instruction-Based Sampling Engine May Generate + * Interrupt that Cannot Be Cleared: + * + * Must clear counter mask first, then clear the enable bit. See + * Revision Guide for AMD Family 10h Processors, Publication #41322. + */ +static inline void perf_ibs_disable_event(struct perf_ibs *perf_ibs, + struct hw_perf_event *hwc, u64 config) +{ + config &= ~perf_ibs->cnt_mask; + wrmsrl(hwc->config_base, config); + config &= ~perf_ibs->enable_mask; + wrmsrl(hwc->config_base, config); +} + +/* + * We cannot restore the ibs pmu state, so we always needs to update + * the event while stopping it and then reset the state when starting + * again. Thus, ignoring PERF_EF_RELOAD and PERF_EF_UPDATE flags in + * perf_ibs_start()/perf_ibs_stop() and instead always do it. + */ +static void perf_ibs_start(struct perf_event *event, int flags) +{ + struct hw_perf_event *hwc = &event->hw; + struct perf_ibs *perf_ibs = container_of(event->pmu, struct perf_ibs, pmu); + struct cpu_perf_ibs *pcpu = this_cpu_ptr(perf_ibs->pcpu); + u64 period; + + if (WARN_ON_ONCE(!(hwc->state & PERF_HES_STOPPED))) + return; + + WARN_ON_ONCE(!(hwc->state & PERF_HES_UPTODATE)); + hwc->state = 0; + + perf_ibs_set_period(perf_ibs, hwc, &period); + set_bit(IBS_STARTED, pcpu->state); + perf_ibs_enable_event(perf_ibs, hwc, period >> 4); + + perf_event_update_userpage(event); +} + +static void perf_ibs_stop(struct perf_event *event, int flags) + |