diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-07-02 16:15:23 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-07-02 16:15:23 -0700 |
commit | f0bb4c0ab064a8aeeffbda1cee380151a594eaab (patch) | |
tree | 14d55a89c5db455aa10ff9a96ca14c474a9c4d55 | |
parent | a4883ef6af5e513a1e8c2ab9aab721604aa3a4f5 (diff) | |
parent | 983433b5812c5cf33a9008fa38c6f9b407fedb76 (diff) |
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf updates from Ingo Molnar:
"Kernel improvements:
- watchdog driver improvements by Li Zefan
- Power7 CPI stack events related improvements by Sukadev Bhattiprolu
- event multiplexing via hrtimers and other improvements by Stephane
Eranian
- kernel stack use optimization by Andrew Hunter
- AMD IOMMU uncore PMU support by Suravee Suthikulpanit
- NMI handling rate-limits by Dave Hansen
- various hw_breakpoint fixes by Oleg Nesterov
- hw_breakpoint overflow period sampling and related signal handling
fixes by Jiri Olsa
- Intel Haswell PMU support by Andi Kleen
Tooling improvements:
- Reset SIGTERM handler in workload child process, fix from David
Ahern.
- Makefile reorganization, prep work for Kconfig patches, from Jiri
Olsa.
- Add automated make test suite, from Jiri Olsa.
- Add --percent-limit option to 'top' and 'report', from Namhyung
Kim.
- Sorting improvements, from Namhyung Kim.
- Expand definition of sysfs format attribute, from Michael Ellerman.
Tooling fixes:
- 'perf tests' fixes from Jiri Olsa.
- Make Power7 CPI stack events available in sysfs, from Sukadev
Bhattiprolu.
- Handle death by SIGTERM in 'perf record', fix from David Ahern.
- Fix printing of perf_event_paranoid message, from David Ahern.
- Handle realloc failures in 'perf kvm', from David Ahern.
- Fix divide by 0 in variance, from David Ahern.
- Save parent pid in thread struct, from David Ahern.
- Handle JITed code in shared memory, from Andi Kleen.
- Fixes for 'perf diff', from Jiri Olsa.
- Remove some unused struct members, from Jiri Olsa.
- Add missing liblk.a dependency for python/perf.so, fix from Jiri
Olsa.
- Respect CROSS_COMPILE in liblk.a, from Rabin Vincent.
- No need to do locking when adding hists in perf report, only 'top'
needs that, from Namhyung Kim.
- Fix alignment of symbol column in in the hists browser (top,
report) when -v is given, from NAmhyung Kim.
- Fix 'perf top' -E option behavior, from Namhyung Kim.
- Fix bug in isupper() and islower(), from Sukadev Bhattiprolu.
- Fix compile errors in bp_signal 'perf test', from Sukadev
Bhattiprolu.
... and more things"
* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (102 commits)
perf/x86: Disable PEBS-LL in intel_pmu_pebs_disable()
perf/x86: Fix shared register mutual exclusion enforcement
perf/x86/intel: Support full width counting
x86: Add NMI duration tracepoints
perf: Drop sample rate when sampling is too slow
x86: Warn when NMI handlers take large amounts of time
hw_breakpoint: Introduce "struct bp_cpuinfo"
hw_breakpoint: Simplify *register_wide_hw_breakpoint()
hw_breakpoint: Introduce cpumask_of_bp()
hw_breakpoint: Simplify the "weight" usage in toggle_bp_slot() paths
hw_breakpoint: Simplify list/idx mess in toggle_bp_slot() paths
perf/x86/intel: Add mem-loads/stores support for Haswell
perf/x86/intel: Support Haswell/v4 LBR format
perf/x86/intel: Move NMI clearing to end of PMI handler
perf/x86/intel: Add Haswell PEBS support
perf/x86/intel: Add simple Haswell PMU support
perf/x86/intel: Add Haswell PEBS record support
perf/x86/intel: Fix sparse warning
perf/x86/amd: AMD IOMMU Performance Counter PERF uncore PMU implementation
perf/x86/amd: Add IOMMU Performance Counter resource management
...
71 files changed, 2947 insertions, 1053 deletions
diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-events b/Documentation/ABI/testing/sysfs-bus-event_source-devices-events index 0adeb524c0d..8b25ffb4256 100644 --- a/Documentation/ABI/testing/sysfs-bus-event_source-devices-events +++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-events @@ -27,14 +27,36 @@ Description: Generic performance monitoring events "basename". -What: /sys/devices/cpu/events/PM_LD_MISS_L1 - /sys/devices/cpu/events/PM_LD_REF_L1 - /sys/devices/cpu/events/PM_CYC +What: /sys/devices/cpu/events/PM_1PLUS_PPC_CMPL /sys/devices/cpu/events/PM_BRU_FIN - /sys/devices/cpu/events/PM_GCT_NOSLOT_CYC /sys/devices/cpu/events/PM_BRU_MPRED - /sys/devices/cpu/events/PM_INST_CMPL /sys/devices/cpu/events/PM_CMPLU_STALL + /sys/devices/cpu/events/PM_CMPLU_STALL_BRU + /sys/devices/cpu/events/PM_CMPLU_STALL_DCACHE_MISS + /sys/devices/cpu/events/PM_CMPLU_STALL_DFU + /sys/devices/cpu/events/PM_CMPLU_STALL_DIV + /sys/devices/cpu/events/PM_CMPLU_STALL_ERAT_MISS + /sys/devices/cpu/events/PM_CMPLU_STALL_FXU + /sys/devices/cpu/events/PM_CMPLU_STALL_IFU + /sys/devices/cpu/events/PM_CMPLU_STALL_LSU + /sys/devices/cpu/events/PM_CMPLU_STALL_REJECT + /sys/devices/cpu/events/PM_CMPLU_STALL_SCALAR + /sys/devices/cpu/events/PM_CMPLU_STALL_SCALAR_LONG + /sys/devices/cpu/events/PM_CMPLU_STALL_STORE + /sys/devices/cpu/events/PM_CMPLU_STALL_THRD + /sys/devices/cpu/events/PM_CMPLU_STALL_VECTOR + /sys/devices/cpu/events/PM_CMPLU_STALL_VECTOR_LONG + /sys/devices/cpu/events/PM_CYC + /sys/devices/cpu/events/PM_GCT_NOSLOT_BR_MPRED + /sys/devices/cpu/events/PM_GCT_NOSLOT_BR_MPRED_IC_MISS + /sys/devices/cpu/events/PM_GCT_NOSLOT_CYC + /sys/devices/cpu/events/PM_GCT_NOSLOT_IC_MISS + /sys/devices/cpu/events/PM_GRP_CMPL + /sys/devices/cpu/events/PM_INST_CMPL + /sys/devices/cpu/events/PM_LD_MISS_L1 + /sys/devices/cpu/events/PM_LD_REF_L1 + /sys/devices/cpu/events/PM_RUN_CYC + /sys/devices/cpu/events/PM_RUN_INST_CMPL Date: 2013/01/08 diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-format b/Documentation/ABI/testing/sysfs-bus-event_source-devices-format index 079afc71363..77f47ff5ee0 100644 --- a/Documentation/ABI/testing/sysfs-bus-event_source-devices-format +++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-format @@ -9,6 +9,12 @@ Description: we want to export, so that userspace can deal with sane name/value pairs. + Userspace must be prepared for the possibility that attributes + define overlapping bit ranges. For example: + attr1 = 'config:0-23' + attr2 = 'config:0-7' + attr3 = 'config:12-35' + Example: 'config1:1,6-10,44' Defines contents of attribute that occupies bits 1,6-10,44 of perf_event_attr::config1. diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index ccd42589e12..ab7d16efa96 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt @@ -70,12 +70,12 @@ show up in /proc/sys/kernel: - shmall - shmmax [ sysv ipc ] - shmmni -- softlockup_thresh - stop-a [ SPARC only ] - sysrq ==> Documentation/sysrq.txt - tainted - threads-max - unknown_nmi_panic +- watchdog_thresh - version ============================================================== @@ -427,6 +427,32 @@ This file shows up if CONFIG_DEBUG_STACKOVERFLOW is enabled. ============================================================== +perf_cpu_time_max_percent: + +Hints to the kernel how much CPU time it should be allowed to +use to handle perf sampling events. If the perf subsystem +is informed that its samples are exceeding this limit, it +will drop its sampling frequency to attempt to reduce its CPU +usage. + +Some perf sampling happens in NMIs. If these samples +unexpectedly take too long to execute, the NMIs can become +stacked up next to each other so much that nothing else is +allowed to execute. + +0: disable the mechanism. Do not monitor or correct perf's + sampling rate no matter how CPU time it takes. + +1-100: attempt to throttle perf's sample rate to this + percentage of CPU. Note: the kernel calculates an + "expected" length of each sample event. 100 here means + 100% of that expected length. Even if this is set to + 100, you may still see sample throttling if this + length is exceeded. Set to 0 if you truly do not care + how much CPU is consumed. + +============================================================== + pid_max: @@ -604,15 +630,6 @@ without users and with a dead originative process will be destroyed. ============================================================== -softlockup_thresh: - -This value can be used to lower the softlockup tolerance threshold. The -default threshold is 60 seconds. If a cpu is locked up for 60 seconds, -the kernel complains. Valid values are 1-60 seconds. Setting this -tunable to zero will disable the softlockup detection altogether. - -============================================================== - tainted: Non-zero if the kernel has been tainted. Numeric values, which @@ -648,3 +665,16 @@ that time, kernel debugging information is displayed on console. NMI switch that most IA32 servers have fires unknown NMI up, for example. If a system hangs up, try pressing the NMI switch. + +============================================================== + +watchdog_thresh: + +This value can be used to control the frequency of hrtimer and NMI +events and the soft and hard lockup thresholds. The default threshold +is 10 seconds. + +The softlockup threshold is (2 * watchdog_thresh). Setting this +tunable to zero will disable lockup detection altogether. + +============================================================== diff --git a/Documentation/trace/events-nmi.txt b/Documentation/trace/events-nmi.txt new file mode 100644 index 00000000000..c03c8c89f08 --- /dev/null +++ b/Documentation/trace/events-nmi.txt @@ -0,0 +1,43 @@ +NMI Trace Events + +These events normally show up here: + + /sys/kernel/debug/tracing/events/nmi + +-- + +nmi_handler: + +You might want to use this tracepoint if you suspect that your +NMI handlers are hogging large amounts of CPU time. The kernel +will warn if it sees long-running handlers: + + INFO: NMI handler took too long to run: 9.207 msecs + +and this tracepoint will allow you to drill down and get some +more details. + +Let's say you suspect that perf_event_nmi_handler() is causing +you some problems and you only want to trace that handler +specifically. You need to find its address: + + $ grep perf_event_nmi_handler /proc/kallsyms + ffffffff81625600 t perf_event_nmi_handler + +Let's also say you are only interested in when that function is +really hogging a lot of CPU time, like a millisecond at a time. +Note that the kernel's output is in milliseconds, but the input +to the filter is in nanoseconds! You can filter on 'delta_ns': + +cd /sys/kernel/debug/tracing/events/nmi/nmi_handler +echo 'handler==0xffffffff81625600 && delta_ns>1000000' > filter +echo 1 > enable + +Your output would then look like: + +$ cat /sys/kernel/debug/tracing/trace_pipe +<idle>-0 [000] d.h3 505.397558: nmi_handler: perf_event_nmi_handler() delta_ns: 3236765 handled: 1 +<idle>-0 [000] d.h3 505.805893: nmi_handler: perf_event_nmi_handler() delta_ns: 3174234 handled: 1 +<idle>-0 [000] d.h3 506.158206: nmi_handler: perf_event_nmi_handler() delta_ns: 3084642 handled: 1 +<idle>-0 [000] d.h3 506.334346: nmi_handler: perf_event_nmi_handler() delta_ns: 3080351 handled: 1 + diff --git a/arch/metag/kernel/perf/perf_event.c b/arch/metag/kernel/perf/perf_event.c index 366569425c5..5b18888ee36 100644 --- a/arch/metag/kernel/perf/perf_event.c +++ b/arch/metag/kernel/perf/perf_event.c @@ -882,7 +882,7 @@ static int __init init_hw_perf_events(void) } register_cpu_notifier(&metag_pmu_notifier); - ret = perf_pmu_register(&pmu, (char *)metag_pmu->name, PERF_TYPE_RAW); + ret = perf_pmu_register(&pmu, metag_pmu->name, PERF_TYPE_RAW); out: return ret; } diff --git a/arch/powerpc/perf/power7-pmu.c b/arch/powerpc/perf/power7-pmu.c index 3c475d6267c..13c3f0e547a 100644 --- a/arch/powerpc/perf/power7-pmu.c +++ b/arch/powerpc/perf/power7-pmu.c @@ -62,6 +62,29 @@ #define PME_PM_BRU_FIN 0x10068 #define PME_PM_BRU_MPRED 0x400f6 +#define PME_PM_CMPLU_STALL_FXU 0x20014 +#define PME_PM_CMPLU_STALL_DIV 0x40014 +#define PME_PM_CMPLU_STALL_SCALAR 0x40012 +#define PME_PM_CMPLU_STALL_SCALAR_LONG 0x20018 +#define PME_PM_CMPLU_STALL_VECTOR 0x2001c +#define PME_PM_CMPLU_STALL_VECTOR_LONG 0x4004a +#define PME_PM_CMPLU_STALL_LSU 0x20012 +#define PME_PM_CMPLU_STALL_REJECT 0x40016 +#define PME_PM_CMPLU_STALL_ERAT_MISS 0x40018 +#define PME_PM_CMPLU_STALL_DCACHE_MISS 0x20016 +#define PME_PM_CMPLU_STALL_STORE 0x2004a +#define PME_PM_CMPLU_STALL_THRD 0x1001c +#define PME_PM_CMPLU_STALL_IFU 0x4004c +#define PME_PM_CMPLU_STALL_BRU 0x4004e +#define PME_PM_GCT_NOSLOT_IC_MISS 0x2001a +#define PME_PM_GCT_NOSLOT_BR_MPRED 0x4001a +#define PME_PM_GCT_NOSLOT_BR_MPRED_IC_MISS 0x4001c +#define PME_PM_GRP_CMPL 0x30004 +#define PME_PM_1PLUS_PPC_CMPL 0x100f2 +#define PME_PM_CMPLU_STALL_DFU 0x2003c +#define PME_PM_RUN_CYC 0x200f4 +#define PME_PM_RUN_INST_CMPL 0x400fa + /* * Layout of constraint bits: * 6666555555555544444444443333333333222222222211111111110000000000 @@ -393,6 +416,31 @@ POWER_EVENT_ATTR(LD_MISS_L1, LD_MISS_L1); POWER_EVENT_ATTR(BRU_FIN, BRU_FIN) POWER_EVENT_ATTR(BRU_MPRED, BRU_MPRED); +POWER_EVENT_ATTR(CMPLU_STALL_FXU, CMPLU_STALL_FXU); +POWER_EVENT_ATTR(CMPLU_STALL_DIV, CMPLU_STALL_DIV); +POWER_EVENT_ATTR(CMPLU_STALL_SCALAR, CMPLU_STALL_SCALAR); +POWER_EVENT_ATTR(CMPLU_STALL_SCALAR_LONG, CMPLU_STALL_SCALAR_LONG); +POWER_EVENT_ATTR(CMPLU_STALL_VECTOR, CMPLU_STALL_VECTOR); +POWER_EVENT_ATTR(CMPLU_STALL_VECTOR_LONG, CMPLU_STALL_VECTOR_LONG); +POWER_EVENT_ATTR(CMPLU_STALL_LSU, CMPLU_STALL_LSU); +POWER_EVENT_ATTR(CMPLU_STALL_REJECT, CMPLU_STALL_REJECT); + +POWER_EVENT_ATTR(CMPLU_STALL_ERAT_MISS, CMPLU_STALL_ERAT_MISS); +POWER_EVENT_ATTR(CMPLU_STALL_DCACHE_MISS, CMPLU_STALL_DCACHE_MISS); +POWER_EVENT_ATTR(CMPLU_STALL_STORE, CMPLU_STALL_STORE); +POWER_EVENT_ATTR(CMPLU_STALL_THRD, CMPLU_STALL_THRD); +POWER_EVENT_ATTR(CMPLU_STALL_IFU, CMPLU_STALL_IFU); +POWER_EVENT_ATTR(CMPLU_STALL_BRU, CMPLU_STALL_BRU); +POWER_EVENT_ATTR(GCT_NOSLOT_IC_MISS, GCT_NOSLOT_IC_MISS); + +POWER_EVENT_ATTR(GCT_NOSLOT_BR_MPRED, GCT_NOSLOT_BR_MPRED); +POWER_EVENT_ATTR(GCT_NOSLOT_BR_MPRED_IC_MISS, GCT_NOSLOT_BR_MPRED_IC_MISS); +POWER_EVENT_ATTR(GRP_CMPL, GRP_CMPL); +POWER_EVENT_ATTR(1PLUS_PPC_CMPL, 1PLUS_PPC_CMPL); +POWER_EVENT_ATTR(CMPLU_STALL_DFU, CMPLU_STALL_DFU); +POWER_EVENT_ATTR(RUN_CYC, RUN_CYC); +POWER_EVENT_ATTR(RUN_INST_CMPL, RUN_INST_CMPL); + static struct attribute *power7_events_attr[] = { GENERIC_EVENT_PTR(CYC), GENERIC_EVENT_PTR(GCT_NOSLOT_CYC), @@ -411,6 +459,31 @@ static struct attribute *power7_events_attr[] = { POWER_EVENT_PTR(LD_MISS_L1), POWER_EVENT_PTR(BRU_FIN), POWER_EVENT_PTR(BRU_MPRED), + + POWER_EVENT_PTR(CMPLU_STALL_FXU), + POWER_EVENT_PTR(CMPLU_STALL_DIV), + POWER_EVENT_PTR(CMPLU_STALL_SCALAR), + POWER_EVENT_PTR(CMPLU_STALL_SCALAR_LONG), + POWER_EVENT_PTR(CMPLU_STALL_VECTOR), + POWER_EVENT_PTR(CMPLU_STALL_VECTOR_LONG), + POWER_EVENT_PTR(CMPLU_STALL_LSU), + POWER_EVENT_PTR(CMPLU_STALL_REJECT), + + POWER_EVENT_PTR(CMPLU_STALL_ERAT_MISS), + POWER_EVENT_PTR(CMPLU_STALL_DCACHE_MISS), + POWER_EVENT_PTR(CMPLU_STALL_STORE), + POWER_EVENT_PTR(CMPLU_STALL_THRD), + POWER_EVENT_PTR(CMPLU_STALL_IFU), + POWER_EVENT_PTR(CMPLU_STALL_BRU), + POWER_EVENT_PTR(GCT_NOSLOT_IC_MISS), + POWER_EVENT_PTR(GCT_NOSLOT_BR_MPRED), + + POWER_EVENT_PTR(GCT_NOSLOT_BR_MPRED_IC_MISS), + POWER_EVENT_PTR(GRP_CMPL), + POWER_EVENT_PTR(1PLUS_PPC_CMPL), + POWER_EVENT_PTR(CMPLU_STALL_DFU), + POWER_EVENT_PTR(RUN_CYC), + POWER_EVENT_PTR(RUN_INST_CMPL), NULL }; diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index cf1a471a18a..bccfca68430 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -34,8 +34,6 @@ #include <asm/sys_ia32.h> #include <asm/smap.h> -#define FIX_EFLAGS __FIX_EFLAGS - int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from) { int err = 0; diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 57cb6340221..8249df45d2f 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -29,6 +29,9 @@ #define ARCH_PERFMON_EVENTSEL_INV (1ULL << 23) #define ARCH_PERFMON_EVENTSEL_CMASK 0xFF000000ULL +#define HSW_IN_TX (1ULL << 32) +#define HSW_IN_TX_CHECKPOINTED (1ULL << 33) + #define AMD64_EVENTSEL_INT_CORE_ENABLE (1ULL << 36) #define AMD64_EVENTSEL_GUESTONLY (1ULL << 40) #define AMD64_EVENTSEL_HOSTONLY (1ULL << 41) diff --git a/arch/x86/include/asm/sighandling.h b/arch/x86/include/asm/sighandling.h index beff97f7df3..7a958164088 100644 --- a/arch/x86/include/asm/sighandling.h +++ b/arch/x86/include/asm/sighandling.h @@ -7,10 +7,10 @@ #include <asm/processor-flags.h> -#define __FIX_EFLAGS (X86_EFLAGS_AC | X86_EFLAGS_OF | \ +#define FIX_EFLAGS (X86_EFLAGS_AC | X86_EFLAGS_OF | \ X86_EFLAGS_DF | X86_EFLAGS_TF | X86_EFLAGS_SF | \ X86_EFLAGS_ZF | X86_EFLAGS_AF | X86_EFLAGS_PF | \ - X86_EFLAGS_CF) + X86_EFLAGS_CF | X86_EFLAGS_RF) void signal_fault(struct pt_regs *regs, void __user *frame, char *where); diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h index 2af848dfa75..bb0465090ae 100644 --- a/arch/x86/include/uapi/asm/msr-index.h +++ b/arch/x86/include/uapi/asm/msr-index.h @@ -170,6 +170,9 @@ #define MSR_KNC_EVNTSEL0 0x00000028 #define MSR_KNC_EVNTSEL1 0x00000029 +/* Alternative perfctr range with full access. */ +#define MSR_IA32_PMC0 0x000004c1 + /* AMD64 MSRs. Not complete. See the architecture manual for a more complete list. */ diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index b0684e4a73a..47b56a7e99c 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -31,11 +31,15 @@ obj-$(CONFIG_PERF_EVENTS) += perf_event.o ifdef CONFIG_PERF_EVENTS obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd.o perf_event_amd_uncore.o +ifdef CONFIG_AMD_IOMMU +obj-$(CONFIG_CPU_SUP_AMD) += perf_event_amd_iommu.o +endif obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_p6.o perf_event_knc.o perf_event_p4.o obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_lbr.o perf_event_intel_ds.o perf_event_intel.o obj-$(CONFIG_CPU_SUP_INTEL) += perf_event_intel_uncore.o endif + obj-$(CONFIG_X86_MCE) += mcheck/ obj-$(CONFIG_MTRR) += mtrr/ |