/drivers/cpufreq/

tion> Linux kernel source treegit repository hosting
aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-08-06 09:30:52 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2010-08-06 09:30:52 -0700
commit4aed2fd8e3181fea7c09ba79cf64e7e3f4413bf9 (patch)
tree1f69733e5daab4915a76a41de0e4d1dc61e12cfb
parent3a3527b6461b1298cc53ce72f336346739297ac8 (diff)
parentfc9ea5a1e53ee54f681e226d735008e2a6f8f470 (diff)
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (162 commits) tracing/kprobes: unregister_trace_probe needs to be called under mutex perf: expose event__process function perf events: Fix mmap offset determination perf, powerpc: fsl_emb: Restore setting perf_sample_data.period perf, powerpc: Convert the FSL driver to use local64_t perf tools: Don't keep unreferenced maps when unmaps are detected perf session: Invalidate last_match when removing threads from rb_tree perf session: Free the ref_reloc_sym memory at the right place x86,mmiotrace: Add support for tracing STOS instruction perf, sched migration: Librarize task states and event headers helpers perf, sched migration: Librarize the GUI class perf, sched migration: Make the GUI class client agnostic perf, sched migration: Make it vertically scrollable perf, sched migration: Parameterize cpu height and spacing perf, sched migration: Fix key bindings perf, sched migration: Ignore unhandled task states perf, sched migration: Handle ignored migrate out events perf: New migration tool overview tracing: Drop cpparg() macro perf: Use tracepoint_synchronize_unregister() to flush any pending tracepoint call ... Fix up trivial conflicts in Makefile and drivers/cpufreq/cpufreq.c
Diffstat
-rw-r--r--Documentation/ABI/testing/debugfs-kmemtrace71
-rw-r--r--Documentation/kernel-parameters.txt2
-rw-r--r--Documentation/trace/ftrace-design.txt153
-rw-r--r--Documentation/trace/kmemtrace.txt126
-rw-r--r--Documentation/trace/kprobetrace.txt2
-rw-r--r--MAINTAINERS9
-rw-r--r--Makefile4
-rw-r--r--arch/Kconfig7
-rw-r--r--arch/alpha/include/asm/local64.h1
-rw-r--r--arch/arm/include/asm/local64.h1
-rw-r--r--arch/arm/kernel/perf_event.c18
-rw-r--r--arch/avr32/include/asm/local64.h1
-rw-r--r--arch/blackfin/include/asm/local64.h1
-rw-r--r--arch/cris/include/asm/local64.h1
-rw-r--r--arch/frv/include/asm/local64.h1
-rw-r--r--arch/frv/kernel/local64.h1
-rw-r--r--arch/h8300/include/asm/local64.h1
-rw-r--r--arch/ia64/include/asm/local64.h1
-rw-r--r--arch/m32r/include/asm/local64.h1
-rw-r--r--arch/m68k/include/asm/local64.h1
-rw-r--r--arch/microblaze/include/asm/local64.h1
-rw-r--r--arch/mips/include/asm/local64.h1
-rw-r--r--arch/mn10300/include/asm/local64.h1
-rw-r--r--arch/parisc/include/asm/local64.h1
-rw-r--r--arch/powerpc/include/asm/local64.h1
-rw-r--r--arch/powerpc/include/asm/perf_event.h12
-rw-r--r--arch/powerpc/kernel/misc.S26
-rw-r--r--arch/powerpc/kernel/perf_event.c41
-rw-r--r--arch/powerpc/kernel/perf_event_fsl_emb.c29
-rw-r--r--arch/s390/include/asm/local64.h1
-rw-r--r--arch/score/include/asm/local64.h1
-rw-r--r--arch/sh/include/asm/local64.h1
-rw-r--r--arch/sh/kernel/perf_event.c6
-rw-r--r--arch/sparc/include/asm/local64.h1
-rw-r--r--arch/sparc/include/asm/perf_event.h8
-rw-r--r--arch/sparc/kernel/helpers.S6
-rw-r--r--arch/sparc/kernel/perf_event.c25
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/include/asm/hw_breakpoint.h2
-rw-r--r--arch/x86/include/asm/local64.h1
-rw-r--r--arch/x86/include/asm/nmi.h2
-rw-r--r--arch/x86/include/asm/perf_event.h18
-rw-r--r--arch/x86/include/asm/perf_event_p4.h99
-rw-r--r--arch/x86/include/asm/stacktrace.h49
-rw-r--r--arch/x86/kernel/apic/Makefile7
-rw-r--r--arch/x86/kernel/apic/hw_nmi.c107
-rw-r--r--arch/x86/kernel/apic/nmi.c7
-rw-r--r--arch/x86/kernel/cpu/perf_event.c62
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c156
-rw-r--r--arch/x86/kernel/dumpstack.c1
-rw-r--r--arch/x86/kernel/dumpstack.h56
-rw-r--r--arch/x86/kernel/dumpstack_32.c2
-rw-r--r--arch/x86/kernel/dumpstack_64.c1
-rw-r--r--arch/x86/kernel/hw_breakpoint.c51
-rw-r--r--arch/x86/kernel/kprobes.c33
-rw-r--r--arch/x86/kernel/process_32.c4
-rw-r--r--arch/x86/kernel/process_64.c5
-rw-r--r--arch/x86/kernel/stacktrace.c31
-rw-r--r--arch/x86/kernel/traps.c7
-rw-r--r--arch/x86/mm/pf_in.c30
-rw-r--r--arch/x86/oprofile/nmi_int.c16
-rw-r--r--arch/xtensa/include/asm/local64.h1
-rw-r--r--drivers/oprofile/event_buffer.c3
-rw-r--r--fs/exec.c1
-rw-r--r--include/asm-generic/local64.h96
-rw-r--r--include/asm-generic/vmlinux.lds.h4
-rw-r--r--include/linux/ftrace.h5
-rw-r--r--include/linux/ftrace_event.h18
-rw-r--r--include/linux/kernel.h5
-rw-r--r--include/linux/kmemtrace.h25
-rw-r--r--include/linux/nmi.h13
-rw-r--r--include/linux/perf_event.h95
-rw-r--r--include/linux/sched.h24
-rw-r--r--include/linux/slab_def.h3
-rw-r--r--include/linux/slub_def.h3
-rw-r--r--include/linux/syscalls.h2
-rw-r--r--include/trace/boot.h60
-rw-r--r--include/trace/events/sched.h32
-rw-r--r--include/trace/events/timer.h80
-rw-r--r--include/trace/ftrace.h23
-rw-r--r--include/trace/syscall.h1
-rw-r--r--init/main.c29
-rw-r--r--kernel/Makefile2
-rw-r--r--kernel/hw_breakpoint.c78
-rw-r--r--kernel/perf_event.c458
-rw-r--r--kernel/sched.c6
-rw-r--r--kernel/softlockup.c293
-rw-r--r--kernel/sysctl.c55
-rw-r--r--kernel/timer.c1
-rw-r--r--kernel/trace/Kconfig68
-rw-r--r--kernel/trace/Makefile4
-rw-r--r--kernel/trace/ftrace.c5
-rw-r--r--kernel/trace/kmemtrace.c529
-rw-r--r--kernel/trace/ring_buffer.c40
-rw-r--r--kernel/trace/trace.c127
-rw-r--r--kernel/trace/trace.h90
-rw-r--r--kernel/trace/trace_boot.c185
-rw-r--r--kernel/trace/trace_clock.c5
-rw-r--r--kernel/trace/trace_entries.h94
-rw-r--r--kernel/trace/trace_event_perf.c27
-rw-r--r--kernel/trace/trace_events.c299
-rw-r--r--kernel/trace/trace_events_filter.c27
-rw-r--r--kernel/trace/trace_export.c8
-rw-r--r--kernel/trace/trace_functions.c6
-rw-r--r--kernel/trace/trace_functions_graph.c3
-rw-r--r--kernel/trace/trace_irqsoff.c3
-rw-r--r--kernel/trace/trace_kprobe.c383
-rw-r--r--kernel/trace/trace_ksym.c508
-rw-r--r--kernel/trace/trace_output.c69
-rw-r--r--kernel/trace/trace_sched_wakeup.c7
-rw-r--r--kernel/trace/trace_selftest.c87
-rw-r--r--kernel/trace/trace_stack.c6
-rw-r--r--kernel/trace/trace_syscalls.c7
-rw-r--r--kernel/trace/trace_sysprof.c329
-rw-r--r--kernel/watchdog.c567
-rw-r--r--lib/Kconfig.debug35
-rw-r--r--mm/mmap.c6
-rw-r--r--mm/slab.c1
-rw-r--r--mm/slob.c4
-rw-r--r--mm/slub.c1
-rw-r--r--scripts/package/Makefile37
-rwxr-xr-xscripts/recordmcount.pl2
-rw-r--r--tools/perf/.gitignore2
-rw-r--r--tools/perf/Documentation/perf-buildid-cache.txt8
-rw-r--r--tools/perf/Documentation/perf-probe.txt8
-rw-r--r--tools/perf/Documentation/perf-record.txt13
-rw-r--r--tools/perf/Documentation/perf-stat.txt7
-rw-r--r--tools/perf/Documentation/perf-top.txt8
-rw-r--r--tools/perf/MANIFEST12
-rw-r--r--tools/perf/Makefile113
-rw-r--r--tools/perf/arch/sh/Makefile4
-rw-r--r--tools/perf/arch/sh/util/dwarf-regs.c55
-rw-r--r--tools/perf/builtin-annotate.c6
-rw-r--r--tools/perf/builtin-buildid-cache.c3
-rw-r--r--tools/perf/builtin-buildid-list.c4
-rw-r--r--tools/perf/builtin-diff.c9
-rw-r--r--tools/perf/builtin-probe.c3
-rw-r--r--tools/perf/builtin-record.c81
-rw-r--r--tools/perf/builtin-report.c27
-rw-r--r--tools/perf/builtin-stat.c14
-rw-r--r--tools/perf/builtin-top.c40
-rw-r--r--tools/perf/builtin-trace.c32
-rw-r--r--tools/perf/feature-tests.mak119
-rw-r--r--tools/perf/perf-archive.sh20
-rw-r--r--tools/perf/perf.c2
-rw-r--r--tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py30
-rw-r--r--tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/SchedGui.py184
-rw-r--r--tools/perf/scripts/python/bin/sched-migration-record2
-rw-r--r--tools/perf/scripts/python/bin/sched-migration-report3
-rw-r--r--tools/perf/scripts/python/sched-migration.py461
-rw-r--r--tools/perf/util/build-id.c28
-rw-r--r--tools/perf/util/cache.h1
-rw-r--r--tools/perf/util/callchain.c2
-rw-r--r--tools/perf/util/callchain.h2
-rw-r--r--tools/perf/util/config.c64
-rw-r--r--tools/perf/util/cpumap.c57
-rw-r--r--tools/perf/util/cpumap.h2
-rw-r--r--tools/perf/util/debug.c10
-rw-r--r--tools/perf/util/event.c107
-rw-r--r--tools/perf/util/event.h6
-rw-r--r--tools/perf/util/header.c13
-rw-r--r--tools/perf/util/hist.c214
-rw-r--r--tools/perf/util/hist.h30
-rw-r--r--tools/perf/util/map.c116
-rw-r--r--tools/perf/util/map.h14
-rw-r--r--tools/perf/util/newt.c1164
-rw-r--r--tools/perf/util/parse-events.c11
-rw-r--r--tools/perf/util/probe-event.c271
-rw-r--r--tools/perf/util/probe-event.h29
-rw-r--r--tools/perf/util/probe-finder.c248
-rw-r--r--tools/perf/util/probe-finder.h10
-rw-r--r--tools/perf/util/session.c62
-rw-r--r--tools/perf/util/sort.c40
-rw-r--r--tools/perf/util/sort.h22
-rw-r--r--tools/perf/util/symbol.c299
-rw-r--r--tools/perf/util/symbol.h18
-rw-r--r--tools/perf/util/thread.c7
-rw-r--r--tools/perf/util/thread.h2
-rw-r--r--tools/perf/util/util.h3
179 files changed, 5611 insertions, 4809 deletions
diff --git a/Documentation/ABI/testing/debugfs-kmemtrace b/Documentation/ABI/testing/debugfs-kmemtrace
deleted file mode 100644
index 5e6a92a02d8..00000000000
--- a/Documentation/ABI/testing/debugfs-kmemtrace
+++ /dev/null
@@ -1,71 +0,0 @@
-What: /sys/kernel/debug/kmemtrace/
-Date: July 2008
-Contact: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
-Description:
-
-In kmemtrace-enabled kernels, the following files are created:
-
-/sys/kernel/debug/kmemtrace/
- cpu<n> (0400) Per-CPU tracing data, see below. (binary)
- total_overruns (0400) Total number of bytes which were dropped from
- cpu<n> files because of full buffer condition,
- non-binary. (text)
- abi_version (0400) Kernel's kmemtrace ABI version. (text)
-
-Each per-CPU file should be read according to the relay interface. That is,
-the reader should set affinity to that specific CPU and, as currently done by
-the userspace application (though there are other methods), use poll() with
-an infinite timeout before every read(). Otherwise, erroneous data may be
-read. The binary data has the following _core_ format:
-
- Event ID (1 byte) Unsigned integer, one of:
- 0 - represents an allocation (KMEMTRACE_EVENT_ALLOC)
- 1 - represents a freeing of previously allocated memory
- (KMEMTRACE_EVENT_FREE)
- Type ID (1 byte) Unsigned integer, one of:
- 0 - this is a kmalloc() / kfree()
- 1 - this is a kmem_cache_alloc() / kmem_cache_free()
- 2 - this is a __get_free_pages() et al.
- Event size (2 bytes) Unsigned integer representing the
- size of this event. Used to extend
- kmemtrace. Discard the bytes you
- don't know about.
- Sequence number (4 bytes) Signed integer used to reorder data
- logged on SMP machines. Wraparound
- must be taken into account, although
- it is unlikely.
- Caller address (8 bytes) Return address to the caller.
- Pointer to mem (8 bytes) Pointer to target memory area. Can be
- NULL, but not all such calls might be
- recorded.
-
-In case of KMEMTRACE_EVENT_ALLOC events, the next fields follow:
-
- Requested bytes (8 bytes) Total number of requested bytes,
- unsigned, must not be zero.
- Allocated bytes (8 bytes) Total number of actually allocated
- bytes, unsigned, must not be lower
- than requested bytes.
- Requested flags (4 bytes) GFP flags supplied by the caller.
- Target CPU (4 bytes) Signed integer, valid for event id 1.
- If equal to -1, target CPU is the same
- as origin CPU, but the reverse might
- not be true.
-
-The data is made available in the same endianness the machine has.
-
-Other event ids and type ids may be defined and added. Other fields may be
-added by increasing event size, but see below for details.
-Every modification to the ABI, including new id definitions, are followed
-by bumping the ABI version by one.
-
-Adding new data to the packet (features) is done at the end of the mandatory
-data:
- Feature size (2 byte)
- Feature ID (1 byte)
- Feature data (Feature size - 3 bytes)
-
-
-Users:
- kmemtrace-user - git://repo.or.cz/kmemtrace-user.git
-
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index f72ba727441..f20c7abc032 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1816,6 +1816,8 @@ and is between 256 and 4096 characters. It is defined in the file
nousb [USB] Disable the USB subsystem
+ nowatchdog [KNL] Disable the lockup detector.
+
nowb [ARM]
nox2apic [X86-64,APIC] Do not enable x2APIC mode.
diff --git a/Documentation/trace/ftrace-design.txt b/Documentation/trace/ftrace-design.txt
index f1f81afee8a..dc52bd442c9 100644
--- a/Documentation/trace/ftrace-design.txt
+++ b/Documentation/trace/ftrace-design.txt
@@ -13,6 +13,9 @@ Note that this focuses on architecture implementation details only. If you
want more explanation of a feature in terms of common code, review the common
ftrace.txt file.
+Ideally, everyone who wishes to retain performance while supporting tracing in
+their kernel should make it all the way to dynamic ftrace support.
+
Prerequisites
-------------
@@ -215,7 +218,7 @@ An arch may pass in a unique value (frame pointer) to both the entering and
exiting of a function. On exit, the value is compared and if it does not
match, then it will panic the kernel. This is largely a sanity check for bad
code generation with gcc. If gcc for your port sanely updates the frame
-pointer under different opitmization levels, then ignore this option.
+pointer under different optimization levels, then ignore this option.
However, adding support for it isn't terribly difficult. In your assembly code
that calls prepare_ftrace_return(), pass the frame pointer as the 3rd argument.
@@ -234,7 +237,7 @@ If you can't trace NMI functions, then skip this option.
HAVE_SYSCALL_TRACEPOINTS
----------------------
+------------------------
You need very few things to get the syscalls tracing in an arch.
@@ -250,12 +253,152 @@ You need very few things to get the syscalls tracing in an arch.
HAVE_FTRACE_MCOUNT_RECORD
-------------------------
-See scripts/recordmcount.pl for more info.
+See scripts/recordmcount.pl for more info. Just fill in the arch-specific
+details for how to locate the addresses of mcount call sites via objdump.
+This option doesn't make much sense without also implementing dynamic ftrace.
+
+HAVE_DYNAMIC_FTRACE
+-------------------
+
+You will first need HAVE_FTRACE_MCOUNT_RECORD and HAVE_FUNCTION_TRACER, so
+scroll your reader back up if you got over eager.
+
+Once those are out of the way, you will need to implement:
+ - asm/ftrace.h:
+ - MCOUNT_ADDR
+ - ftrace_call_adjust()
+ - struct dyn_arch_ftrace{}
+ - asm code:
+ - mcount() (new stub)
+ - ftrace_caller()
+ - ftrace_call()
+ - ftrace_stub()
+ - C code:
+ - ftrace_dyn_arch_init()
+ - ftrace_make_nop()
+ - ftrace_make_call()
+ - ftrace_update_ftrace_func()
+
+First you will need to fill out some arch details in your asm/ftrace.h.
+
+Define MCOUNT_ADDR as the address of your mcount symbol similar to:
+ #define MCOUNT_ADDR ((unsigned long)mcount)
+Since no one else will have a decl for that function, you will need to:
+ extern void mcount(void);
+
+You will also need the helper function ftrace_call_adjust(). Most people
+will be able to stub it out like so:
+ static inline unsigned long ftrace_call_adjust(unsigned long addr)
+ {
+ return addr;
+ }
<details to be filled>
+Lastly you will need the custom dyn_arch_ftrace structure. If you need
+some extra state when runtime patching arbitrary call sites, this is the
+place. For now though, create an empty struct:
+ struct dyn_arch_ftrace {
+ /* No extra data needed */
+ };
+
+With the header out of the way, we can fill out the assembly code. While we
+did already create a mcount() function earlier, dynamic ftrace only wants a
+stub function. This is because the mcount() will only be used during boot
+and then all references to it will be patched out never to return. Instead,
+the guts of the old mcount() will be used to create a new ftrace_caller()
+function. Because the two are hard to merge, it will most likely be a lot
+easier to have two separate definitions split up by #ifdefs. Same goes for
+the ftrace_stub() as that will now be inlined in ftrace_caller().
+
+Before we get confused anymore, let's check out some pseudo code so you can
+implement your own stuff in assembly:
-HAVE_DYNAMIC_FTRACE
----------------------
+void mcount(void)
+{
+ return;
+}
+
+void ftrace_caller(void)
+{
+ /* implement HAVE_FUNCTION_TRACE_MCOUNT_TEST if you desire */
+
+ /* save all state needed by the ABI (see paragraph above) */
+
+ unsigned long frompc = ...;
+ unsigned long selfpc = <return address> - MCOUNT_INSN_SIZE;
+
+ftrace_call:
+ ftrace_stub(frompc, selfpc);
+
+ /* restore all state needed by the ABI */
+
+ftrace_stub:
+ return;
+}
+
+This might look a little odd at first, but keep in mind that we will be runtime
+patching multiple things. First, only functions that we actually want to trace
+will be patched to call ftrace_caller(). Second, since we only have one tracer
+active at a time, we will patch the ftrace_caller() function itself to call the
+specific tracer in question. That is the point of the ftrace_call label.
+
+With that in mind, let's move on to the C code that will actually be doing the
+runtime patching. You'll need a little knowledge of your arch's opcodes in
+order to make it through the next section.
+
+Every arch has an init callback function. If you need to do something early on
+to initialize some state, this is the time to do that. Otherwise, this simple
+function below should be sufficient for most people:
+
+int __init ftrace_dyn_arch_init(void *data)
+{
+ /* return value is done indirectly via data */
+ *(unsigned long *)data = 0;
+
+ return 0;
+}
+
+There are two functions that are used to do runtime patching of arbitrary
+functions. The first is used to turn the mcount call site into a nop (which
+is what helps us retain runtime performance when not tracing). The second is
+used to turn the mcount call site into a call to an arbitrary location (but
+typically that is ftracer_caller()). See the general function definition in
+linux/ftrace.h for the functions:
+ ftrace_make_nop()
+ ftrace_make_call()
+The rec->ip value is the address of the mcount call site that was collected
+by the scripts/recordmcount.pl during build time.
+
+The last function is used to do runtime patching of the active tracer. This
+will be modifying the assembly code at the location of the ftrace_call symbol
+inside of the ftrace_caller() function. So you should have sufficient padding
+at that location to support the new function calls you'll be inserting. Some
+people will be using a "call" type instruction while others will be using a
+"branch" type instruction. Specifically, the function is:
+ ftrace_update_ftrace_func()
+
+
+HAVE_DYNAMIC_FTRACE + HAVE_FUNCTION_GRAPH_TRACER
+------------------------------------------------
+
+The function grapher needs a few tweaks in order to work with dynamic ftrace.
+Basically, you will need to:
+ - update:
+ - ftrace_caller()
+ - ftrace_graph_call()
+ - ftrace_graph_caller()
+ - implement:
+ - ftrace_enable_ftrace_graph_caller()
+ - ftrace_disable_ftrace_graph_caller()
<details to be filled>
+Quick notes:
+ - add a nop stub after the ftrace_call location named ftrace_graph_call;
+ stub needs to be large enough to support a call to ftrace_graph_caller()
+ - update ftrace_graph_caller() to work with being called by the new
+ ftrace_caller() since some semantics may have changed
+ - ftrace_enable_ftrace_graph_caller() will runtime patch the
+ ftrace_graph_call location with a call to ftrace_graph_caller()
+ - ftrace_disable_ftrace_graph_caller() will runtime patch the
+ ftrace_graph_call location with nops
diff --git a/Documentation/trace/kmemtrace.txt b/Documentation/trace/kmemtrace.txt
deleted file mode 100644
index 6308735e58c..00000000000
--- a/Documentation/trace/kmemtrace.txt
+++ /dev/null
@@ -1,126 +0,0 @@
- kmemtrace - Kernel Memory Tracer
-
- by Eduard - Gabriel Munteanu
- <eduard.munteanu@linux360.ro>
-
-I. Introduction
-===============
-
-kmemtrace helps kernel developers figure out two things:
-1) how different allocators (SLAB, SLUB etc.) perform
-2) how kernel code allocates memory and how much
-
-To do this, we trace every allocation and export information to the userspace
-through the relay interface. We export things such as the number of requested
-bytes, the number of bytes actually allocated (i.e. including internal
-fragmentation), whether this is a slab allocation or a plain kmalloc() and so
-on.
-
-The actual analysis is performed by a userspace tool (see section III for
-details on where to get it from). It logs the data exported by the kernel,
-processes it and (as of writing this) can provide the following information:
-- the total amount of memory allocated and fragmentation per call-site
-- the amount of memory allocated and fragmentation per allocation
-- total memory allocated and fragmentation in the collected dataset
-- number of cross-CPU allocation and frees (makes sense in NUMA environments)
-
-Moreover, it can potentially find inconsistent and erroneous behavior in
-kernel code, such as using slab free functions on kmalloc'ed memory or
-allocating less memory than requested (but not truly failed allocations).
-
-kmemtrace also makes provisions for tracing on some arch and analysing the
-data on another.
-
-II. Design and goals
-====================
-
-kmemtrace was designed to handle rather large amounts of data. Thus, it uses
-the relay interface to export whatever is logged to userspace, which then
-stores it. Analysis and reporting is done asynchronously, that is, after the
-data is collected and stored. By design, it allows one to log and analyse
-on different machines and different arches.
-
-As of writing this, the ABI is not considered stable, though it might not
-change much. However, no guarantees are made about compatibility yet. When
-deemed stable, the ABI should still allow easy extension while maintaining
-backward compatibility. This is described further in Documentation/ABI.
-
-Summary of design goals:
- - allow logging and analysis to be done across different machines
- - be fast and anticipate usage in high-load environments (*)
- - be reasonably extensible
- - make it possible for GNU/Linux distributions to have kmemtrace
- included in their repositories
-
-(*) - one of the reasons Pekka Enberg's original userspace data analysis
- tool's code was rewritten from Perl to C (although this is more than a
- simple conversion)
-
-
-III. Quick usage guide
-======================
-
-1) Get a kernel that supports kmemtrace and build it accordingly (i.e. enable
-CONFIG_KMEMTRACE).
-
-2) Get the userspace tool and build it:
-$ git clone git://repo.or.cz/kmemtrace-user.git # current repository
-$ cd kmemtrace-user/
-$ ./autogen.sh
-$ ./configure
-$ make
-
-3) Boot the kmemtrace-enabled kernel if you haven't, preferably in the
-'single' runlevel (so that relay buffers don't fill up easily), and run
-kmemtrace:
-# '$' does not mean user, but root here.
-$ mount -t debugfs none /sys/kernel/debug
-$ mount -t proc none /proc
-$ cd path/to/kmemtrace-user/
-$ ./kmemtraced
-Wait a bit, then stop it with CTRL+C.
-$ cat /sys/kernel/debug/kmemtrace/total_overruns # Check if we didn't
- # overrun, should
- # be zero.
-$ (Optionally) [Run kmemtrace_check separately on each cpu[0-9]*.out file to
- check its correctness]
-$ ./kmemtrace-report
-
-Now you should have a nice and short summary of how the allocator performs.
-
-IV. FAQ and known issues
-========================
-
-Q: 'cat /sys/kernel/debug/kmemtrace/total_overruns' is non-zero, how do I fix
-this? Should I worry?
-A: If it's non-zero, this affects kmemtrace's accuracy, depending on how
-large the number is. You can fix it by supplying a higher
-'kmemtrace.subbufs=N' kernel parameter.
----
-
-Q: kmemtrace_check reports errors, how do I fix this? Should I worry?
-A: This is a bug and should be reported. It can occur for a variety of
-reasons:
- - possible bugs in relay code
- - possible misuse of relay by kmemtrace
- - timestamps being collected unorderly
-Or you may fix it yourself and send us a patch.
----
-
-Q: kmemtrace_report shows many errors, how do I fix this? Should I worry?
-A: This is a known issue and I'm working on it. These might be true errors
-in kernel code, which may have inconsistent behavior (e.g. allocating memory
-with kmem_cache_alloc() and freeing it with kfree()). Pekka Enberg pointed
-out this behavior may work with SLAB, but may fail with other allocators.
-
-It may also be due to lack of tracing in some unusual allocator functions.
-
-We don't want bug reports regarding this issue yet.
----
-
-V. See also
-===========
-
-Documentation/kernel-parameters.txt
-Documentation/ABI/testing/debugfs-kmemtrace
-
diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt
index ec94748ae65..5f77d94598d 100644
--- a/Documentation/trace/kprobetrace.txt
+++ b/Documentation/trace/kprobetrace.txt
@@ -42,7 +42,7 @@ Synopsis of kprobe_events
+|-offs(FETCHARG) : Fetch memory at FETCHARG +|- offs address.(**)
NAME=FETCHARG : Set NAME as the argument name of FETCHARG.
FETCHARG:TYPE : Set TYPE as the type of FETCHARG. Currently, basic types
- (u8/u16/u32/u64/s8/s16/s32/s64) are supported.
+ (u8/u16/u32/u64/s8/s16/s32/s64) and string are supported.
(*) only for return probe.
(**) this is useful for fetching a field of data structures.
diff --git a/MAINTAINERS b/MAINTAINERS
index 11e34d5272b..100a3f535c9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3403,13 +3403,6 @@ F: include/linux/kmemleak.h
F: mm/kmemleak.c
F: mm/kmemleak-test.c
-KMEMTRACE
-M: Eduard - Gabriel Munteanu <eduard.munteanu@linux360.ro>
-S: Maintained
-F: Documentation/trace/kmemtrace.txt
-F: include/linux/kmemtrace.h
-F: kernel/trace/kmemtrace.c
-
KPROBES
M: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
M: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
@@ -5685,7 +5678,7 @@ TRACING
M: Steven Rostedt <rostedt@goodmis.org>
M: Frederic Weisbecker <fweisbec@gmail.com>
M: Ingo Molnar <mingo@redhat.com>
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git tracing/core
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip.git perf/core
S: Maintained
F: Documentation/trace/ftrace.txt
F: arch/*/*/*/ftrace.h
diff --git a/Makefile b/Makefile
index 66c94aad366..7431c283f15 100644
--- a/Makefile
+++ b/Makefile
@@ -420,7 +420,7 @@ endif
no-dot-config-targets := clean mrproper distclean \
cscope TAGS tags help %docs check% coccicheck \
include/linux/version.h headers_% \
- kernelversion
+ kernelversion %src-pkg
config-targets := 0
mixed-targets := 0
@@ -1168,6 +1168,8 @@ distclean: mrproper
# rpm target kept for backward compatibility
package-dir := $(srctree)/scripts/package
+%src-pkg: FORCE
+ $(Q)$(MAKE) $(build)=$(package-dir) $@
%pkg: include/config/kernel.release FORCE
$(Q)$(MAKE) $(build)=$(package-dir) $@
rpm: include/config/kernel.release FORCE
diff --git a/arch/Kconfig b/arch/Kconfig
index acda512da2e..4877a8c8ee1 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -151,4 +151,11 @@ config HAVE_MIXED_BREAKPOINTS_REGS
config HAVE_USER_RETURN_NOTIFIER
bool
+config HAVE_PERF_EVENTS_NMI
+ bool
+ help
+ System hardware can generate an NMI using the perf event
+ subsystem. Also has support for calculating CPU cycle events
+ to determine how many clock cycles in a given period.
+
source "kernel/gcov/Kconfig"
diff --git a/arch/alpha/include/asm/local64.h b/arch/alpha/include/asm/local64.h
new file mode 100644
index 00000000000..36c93b5cc23
--- /dev/null
+++ b/arch/alpha/include/asm/local64.h
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/arm/include/asm/local64.h b/arch/arm/include/asm/local64.h
new file mode 100644
index 00000000000..36c93b5cc23
--- /dev/null
+++ b/arch/arm/include/asm/local64.h
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index de12536d687..417c392ddf1 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -164,20 +164,20 @@ armpmu_event_set_period(struct perf_event *event,
struct hw_perf_event *hwc,
int idx)
{
- s64 left = atomic64_read(&hwc->period_left);
+ s64 left = local64_read(&hwc->period_left);
s64 period = hwc->sample_period;
int ret = 0;
if (unlikely(left <= -period)) {
left = period;
- atomic64_set(&hwc->period_left, left);
+ local64_set(&hwc->period_left, left);
hwc->last_period = period;
ret = 1;
}
if (unlikely(left <= 0)) {
left += period;
- atomic64_set(&hwc->period_left, left);
+ local64_set(&hwc->period_left, left);
hwc->last_period = period;
ret = 1;
}
@@ -185,7 +185,7 @@ armpmu_event_set_period(struct perf_event *event,
if (left > (s64)armpmu->max_period)
left = armpmu->max_period;
- atomic64_set(&hwc->prev_count, (u64)-left);
+ local64_set(&hwc->prev_count, (u64)-left);
armpmu->write_counter(idx, (u64)(-left) & 0xffffffff);
@@ -204,18 +204,18 @@ armpmu_event_update(struct perf_event *event,
u64 delta;
again:
- prev_raw_count = atomic64_read(&hwc->prev_count);
+ prev_raw_count = local64_read(&hwc->prev_count);
new_raw_count = armpmu->read_counter(idx);
- if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
+ if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
new_raw_count) != prev_raw_count)
goto again;
delta = (new_raw_count << shift) - (prev_raw_count << shift);
delta >>= shift;
- atomic64_add(delta, &event->count);
- atomic64_sub(delta, &hwc->period_left);
+ local64_add(delta, &event->count);
+ local64_sub(delta, &hwc->period_left);
return new_raw_count;
}
@@ -478,7 +478,7 @@ __hw_perf_event_init(struct perf_event *event)
if (!hwc->sample_period) {
hwc->sample_period = armpmu->max_period;
hwc->last_period = hwc->sample_period;
- atomic64_set(&hwc->period_left, hwc->sample_period);
+ local64_set(&hwc->period_left, hwc->sample_period);
}
err = 0;
diff --git a/arch/avr32/include/asm/local64.h b/arch/avr32/include/asm/local64.h
new file mode 100644
index 00000000000..36c93b5cc23
--- /dev/null
+++ b/arch/avr32/include/asm/local64.h
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/blackfin/include/asm/local64.h b/arch/blackfin/include/asm/local64.h
new file mode 100644
index 00000000000..36c93b5cc23
--- /dev/null
+++ b/arch/blackfin/include/asm/local64.h
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/cris/include/asm/local64.h b/arch/cris/include/asm/local64.h
new file mode 100644
index 00000000000..36c93b5cc23
--- /dev/null
+++ b/arch/cris/include/asm/local64.h
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/frv/include/asm/local64.h b/arch/frv/include/asm/local64.h
new file mode 100644
index 00000000000..36c93b5cc23
--- /dev/null
+++ b/arch/frv/include/asm/local64.h
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/frv/kernel/local64.h b/arch/frv/kernel/local64.h
new file mode 100644
index 00000000000..36c93b5cc23
--- /dev/null
+++ b/arch/frv/kernel/local64.h
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/h8300/include/asm/local64.h b/arch/h8300/include/asm/local64.h
new file mode 100644
index 00000000000..36c93b5cc23
--- /dev/null
+++ b/arch/h8300/include/asm/local64.h
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/ia64/include/asm/local64.h b/arch/ia64/include/asm/local64.h
new file mode 100644
index 00000000000..36c93b5cc23
--- /dev/null
+++ b/arch/ia64/include/asm/local64.h
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/m32r/include/asm/local64.h b/arch/m32r/include/asm/local64.h
new file mode 100644
index 00000000000..36c93b5cc23
--- /dev/null
+++ b/arch/m32r/include/asm/local64.h
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/m68k/include/asm/local64.h b/arch/m68k/include/asm/local64.h
new file mode 100644
index 00000000000..36c93b5cc23
--- /dev/null
+++ b/arch/m68k/include/asm/local64.h
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/microblaze/include/asm/local64.h b/arch/microblaze/include/asm/local64.h
new file mode 100644
index 00000000000..36c93b5cc23
--- /dev/null
+++ b/arch/microblaze/include/asm/local64.h
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/mips/include/asm/local64.h b/arch/mips/include/asm/local64.h
new file mode 100644
index 00000000000..36c93b5cc23
--- /dev/null
+++ b/arch/mips/include/asm/local64.h
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/mn10300/include/asm/local64.h b/arch/mn10300/include/asm/local64.h
new file mode 100644
index 00000000000..36c93b5cc23
--- /dev/null
+++ b/arch/mn10300/include/asm/local64.h
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/parisc/include/asm/local64.h b/arch/parisc/include/asm/local64.h
new file mode 100644
index 00000000000..36c93b5cc23
--- /dev/null
+++ b/arch/parisc/include/asm/local64.h
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/powerpc/include/asm/local64.h b/arch/powerpc/include/asm/local64.h
new file mode 100644
index 00000000000..36c93b5cc23
--- /dev/null
+++ b/arch/powerpc/include/asm/local64.h
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/powerpc/include/asm/perf_event.h b/arch/powerpc/include/asm/perf_event.h
index e6d4ce69b12..5c16b891d50 100644
--- a/arch/powerpc/include/asm/perf_event.h
+++ b/arch/powerpc/include/asm/perf_event.h
@@ -21,3 +21,15 @@
#ifdef CONFIG_FSL_EMB_PERF_EVENT
#include <asm/perf_event_fsl_emb.h>
#endif
+
+#ifdef CONFIG_PERF_EVENTS
+#include <asm/ptrace.h>
+#include <asm/reg.h>
+
+#define perf_arch_fetch_caller_regs(regs, __ip) \
+ do { \
+ (regs)->nip = __ip; \
+ (regs)->gpr[1] = *(unsigned long *)__get_SP(); \
+ asm volatile("mfmsr %0" : "=r" ((regs)->msr)); \
+ } while (0)
+#endif
diff --git a/arch/powerpc/kernel/misc.S b/arch/powerpc/kernel/misc.S
index 22e507c8a55..2d29752cbe1 100644
--- a/arch/powerpc/kernel/misc.S
+++ b/arch/powerpc/kernel/misc.S
@@ -127,29 +127,3 @@ _GLOBAL(__setup_cpu_power7)
_GLOBAL(__restore_cpu_power7)
/* place holder */
blr
-
-/*
- * Get a minimal set of registers for our caller's nth caller.
- * r3 = regs pointer, r5 = n.
- *
- * We only get R1 (stack pointer), NIP (next instruction pointer)
- * and LR (link register). These are all we can get in the
- * general case without doing complicated stack unwinding, but
- * fortunately they are enough to do a stack backtrace, which
- * is all we need them for.
- */
-_GLOBAL(perf_arch_fetch_caller_regs)
- mr r6,r1
- cmpwi r5,0
- mflr r4
- ble 2f
- mtctr r5
-1: PPC_LL r6,0(r6)
- bdnz 1b
- PPC_LL r4,PPC_LR_STKOFF(r6)
-2: PPC_LL r7,0(r6)
- PPC_LL r7,PPC_LR_STKOFF(r7)
- PPC_STL r6,GPR1-STACK_FRAME_OVERHEAD(r3)
- PPC_STL r4,_NIP-STACK_FRAME_OVERHEAD(r3)
- PPC_STL r7,_LINK-STACK_FRAME_OVERHEAD(r3)
- blr
diff --git a/arch/powerpc/kernel/perf_event.c b/arch/powerpc/kernel/perf_event.c
index 5c14ffe5125..d301a30445e 100644
--- a/arch/powerpc/kernel/perf_event.c
+++ b/arch/powerpc/kernel/perf_event.c
@@ -410,15 +410,15 @@ static void power_pmu_read(struct perf_event *event)
* Therefore we treat them like NMIs.
*/
do {
- prev = atomic64_read(&event->hw.prev_count);
+ prev = local64_read(&event->hw.prev_count);
barrier();
val = read_pmc(event->hw.idx);
- } while (atomic64_cmpxchg(&event->hw.prev_count, prev, val) != prev);
+ } while (local64_cmpxchg(&event->hw.prev_count, prev, val) != prev);
/* The counters are only 32 bits wide */
delta = (val - prev) & 0xfffffffful;
- atomic64_add(delta, &event->count);
- atomic64_sub(delta, &event->hw.period_left);
+ local64_add(delta, &event->count);
+ local64_sub(delta, &event->hw.period_left);
}
/*
@@ -444,10 +444,10 @@ static void freeze_limited_counters(struct cpu_hw_events *cpuhw,
if (!event->hw.idx)
continue;
val = (event->hw.idx == 5) ? pmc5 : pmc6;
- prev = atomic64_read(&event->hw.prev_count);
+ prev = local64_read(&event->hw.prev_count);
event->hw.idx = 0;
delta = (val - prev) & 0xfffffffful;
- atomic64_add(delta, &event->count);
+ local64_add(delta, &event->count);
}
}
@@ -462,7 +462,7 @@ static void thaw_limited_counters(struct cpu_hw_events *cpuhw,
event = cpuhw->limited_counter[i];
event->hw.idx = cpuhw->limited_hwidx[i];
val = (event->hw.idx == 5) ? pmc5 : pmc6;
- atomic64_set(&event->hw.prev_count, val);
+ local64_set(&event->hw.prev_count, val);
perf_event_update_userpage(event);
}
}
@@ -666,11 +666,11 @@ void hw_perf_enable(void)
}
val = 0;
if (event->hw.sample_period) {
- left = atomic64_read(&event->hw.period_left);
+ left = local64_read(&event->hw.period_left);
if (left < 0x80000000L)
val = 0x80000000L - left;
}
- atomic64_set(&event->hw.prev_count, val);
+ local64_set(&event->hw.prev_count, val);
event->hw.idx = idx;
write_pmc(idx, val);
perf_event_update_userpage(event);
@@ -754,7 +754,7 @@ static int power_pmu_enable(struct perf_event *event)
* skip the schedulability test here, it will be peformed
* at commit time(->commit_txn) as a whole
*/
- if (cpuhw->group_flag & PERF_EVENT_TXN_STARTED)
+ if (cpuhw->group_flag & PERF_EVENT_TXN)
goto nocheck;
if (check_excludes(cpuhw->event, cpuhw->flags, n0, 1))
@@ -845,8 +845,8 @@ static void power_pmu_unthrottle(struct perf_event *event)
if (left < 0x80000000L)
val = 0x80000000L - left;
write_pmc(event->hw.idx, val);
- atomic64_set(&event->hw.prev_count, val);
- atomic64_set(&event->hw.period_left, left);
+ local64_set(&event->hw.prev_count, val);
+ local64_set(&event->hw.period_left, left);
perf_event_update_userpage(event);
perf_enable();
local_irq_restore(flags);
@@ -861,7 +861,7 @@ void power_pmu_start_txn(const struct pmu *pmu)
{
struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
- cpuhw->group_flag |= PERF_EVENT_TXN_STARTED;
+ cpuhw->group_flag |= PERF_EVENT_TXN;
cpuhw->n_txn_start = cpuhw->n_events;
}
@@ -874,7 +874,7 @@ void power_pmu_cancel_txn(const struct pmu *pmu)
{
struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
- cpuhw->group_flag &= ~PERF_EVENT_TXN_STARTED;
+ cpuhw->group_flag &= ~PERF_EVENT_TXN;
}
/*
@@ -900,6 +900,7 @@ int power_pmu_commit_txn(const struct pmu *pmu)
for (i = cpuhw->n_txn_start; i < n; ++i)
cpuhw->event[i]->hw.config = cpuhw->events[i];
+ cpuhw->group_flag &= ~PERF_EVENT_TXN;
return 0;
}
@@ -1111,7 +1112,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
event->hw.config = events[n];
event->hw.event_base = cflags[n];
event->hw.last_period = event->hw.sample_period;
- atomic64_set(&event->hw.period_left, event->hw.last_period);
+ local64_set(&event->hw.period_left, event->hw.last_period);
/*
* See if we need to reserve the PMU.
@@ -1149,16 +1150,16 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
int record = 0;
/* we don't have to worry about interrupts here */
- prev = atomic64_read(&event->hw.prev_count);
+ prev = local64_read(&event->hw.prev_count);
delta = (val - prev) & 0xfffffffful;
- atomic64_add(delta, &event->count);
+ local64_add(delta, &event->count);
/*
* See if the total period for this event has expired,
* and update for the next period.
*/
val = 0;
- left = atomic64_read(&event->hw.period_left) - delta;
+ left = local64_read(&event->hw.period_left) - delta;
if (period) {
if (left <= 0) {
left += period;
@@ -1196,8 +1197,8 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
}
write_pmc(event->hw.idx, val);
- atomic64_set(&event->hw.prev_count, val);
- atomic64_set(&event->hw.period_left, left);
+ local64_set(&event->hw.prev_count, val);
+ local64_set(&event->hw.period_left, left);
perf_event_update_userpage(event);
}
diff --git a/arch/powerpc/kernel/perf_event_fsl_emb.c b/arch/powerpc/kernel/perf_event_fsl_emb.c
index babcceecd2e..1ba45471ae4 100644
--- a/arch/powerpc/kernel/perf_event_fsl_emb.c
+++ b/arch/powerpc/kernel/perf_event_fsl_emb.c
@@ -162,15 +162,15 @@ static void fsl_emb_pmu_read(struct perf_event *event)
* Therefore we treat them like NMIs.
*/
do {
- prev = atomic64_read(&event->hw.prev_count);
+ prev = local64_read(&event->hw.prev_count);
barrier();
val = read_pmc(event->hw.idx);
- } while (atomic64_cmpxchg(&event->hw.prev_count, prev, val) != prev);
+ } while (local64_cmpxchg(&event->hw.prev_count, prev, val) != prev);
/* The counters are only 32 bits wide */
delta = (val - prev) & 0xfffffffful;
- atomic64_add(delta, &event->count);
- atomic64_sub(delta, &event->hw.period_left);
+ local64_add(delta, &event->count);
+ local64_sub(delta, &event->hw.period_left);
}
/*
@@ -296,11 +296,11 @@ static int fsl_emb_pmu_enable(struct perf_event *event)
val = 0;
if (event->hw.sample_period) {
- s64 left = atomic64_read(&event->hw.period_left);
+ s64 left = local64_read(&event->hw.period_left);
if (left < 0x80000000L)
val = 0x80000000L - left;
}
- atomic64_set(&event->hw.prev_count, val);
+ local64_set(&event->hw.prev_count, val);
write_pmc(i, val);
perf_event_update_userpage(event);
@@ -371,8 +371,8 @@ static void fsl_emb_pmu_unthrottle(struct perf_event *event)
if (left < 0x80000000L)
val = 0x80000000L - left;
write_pmc(event->hw.idx, val);
- atomic64_set(&event->hw.prev_count, val);
- atomic64_set(&event->hw.period_left, left);
+ local64_set(&event->hw.prev_count, val);
+ local64_set(&event->hw.period_left, left);
perf_event_update_userpage(event);
perf_enable();
local_irq_restore(flags);
@@ -500,7 +500,7 @@ const struct pmu *hw_perf_event_init(struct perf_event *event)
return ERR_PTR(-ENOTSUPP);
event->hw.last_period = event->hw.sample_period;
- atomic64_set(&event->hw.period_left, event->hw.last_period);
+ local64_set(&event->hw.period_left, event->hw.last_period);
/*
* See if we need to reserve the PMU.
@@ -541,16 +541,16 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
int record = 0;
/* we don't have to worry about interrupts here */
- prev = atomic64_read(&event->hw.prev_count);
+ prev = local64_read(&event->hw.prev_count);
delta = (val - prev) & 0xfffffffful;
- atomic64_add(delta, &event->count);
+ local64_add(delta, &event->count);
/*
* See if the total period for this event has expired,
* and update for the next period.
*/
val = 0;
- left = atomic64_read(&event->hw.period_left) - delta;
+ left = local64_read(&event->hw.period_left) - delta;
if (period) {
if (left <= 0) {
left += period;
@@ -569,6 +569,7 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
struct perf_sample_data data;
perf_sample_data_init(&data, 0);
+ data.period = event->hw.last_period;
if (perf_event_overflow(event, nmi, &data, regs)) {
/*
@@ -584,8 +585,8 @@ static void record_and_restart(struct perf_event *event, unsigned long val,
}
write_pmc(event->hw.idx, val);
- atomic64_set(&event->hw.prev_count, val);
- atomic64_set(&event->hw.period_left, left);
+ local64_set(&event->hw.prev_count, val);
+ local64_set(&event->hw.period_left, left);
perf_event_update_userpage(event);
}
diff --git a/arch/s390/include/asm/local64.h b/arch/s390/include/asm/local64.h
new file mode 100644
index 00000000000..36c93b5cc23
--- /dev/null
+++ b/arch/s390/include/asm/local64.h
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/score/include/asm/local64.h b/arch/score/include/asm/local64.h
new file mode 100644
index 00000000000..36c93b5cc23
--- /dev/null
+++ b/arch/score/include/asm/local64.h
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/sh/include/asm/local64.h b/arch/sh/include/asm/local64.h
new file mode 100644
index 00000000000..36c93b5cc23
--- /dev/null
+++ b/arch/sh/include/asm/local64.h
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/sh/kernel/perf_event.c b/arch/sh/kernel/perf_event.c
index 81b6de41ae5..7a3dc356725 100644
--- a/arch/sh/kernel/perf_event.c
+++ b/arch/sh/kernel/perf_event.c
@@ -185,10 +185,10 @@ static void sh_perf_event_update(struct perf_event *event,
* this is the simplest approach for maintaining consistency.
*/
again:
- prev_raw_count = atomic64_read(&hwc->prev_count);
+ prev_raw_count = local64_read(&hwc->prev_count);
new_raw_count = sh_pmu->read(idx);
- if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
+ if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
new_raw_count) != prev_raw_count)
goto again;
@@ -203,7 +203,7 @@ again:
delta = (new_raw_count << shift) - (prev_raw_count << shift);
delta >>= shift;
- atomic64_add(delta, &event->count);
+ local64_add(delta, &event->count);
}
static void sh_pmu_disable(struct perf_event *event)
diff --git a/arch/sparc/include/asm/local64.h b/arch/sparc/include/asm/local64.h
new file mode 100644
index 00000000000..36c93b5cc23
--- /dev/null
+++ b/arch/sparc/include/asm/local64.h
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/sparc/include/asm/perf_event.h b/arch/sparc/include/asm/perf_event.h
index 7e2669894ce..74c4e0cd889 100644
--- a/arch/sparc/include/asm/perf_event.h
+++ b/arch/sparc/include/asm/perf_event.h
@@ -6,7 +6,15 @@ extern void set_perf_event_pending(void);
#define PERF_EVENT_INDEX_OFFSET 0
#ifdef CONFIG_PERF_EVENTS
+#include <asm/ptrace.h>
+
extern void init_hw_perf_events(void);
+
+extern void
+__perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip);
+
+#define perf_arch_fetch_caller_regs(pt_regs, ip) \
+ __perf_arch_fetch_caller_regs(pt_regs, ip, 1);
#else
static inline void init_hw_perf_events(void) { }
#endif
diff --git a/arch/sparc/kernel/helpers.S b/arch/sparc/kernel/helpers.S
index 92090cc9e82..682fee06a16 100644
--- a/arch/sparc/kernel/helpers.S
+++ b/arch/sparc/kernel/helpers.S
@@ -47,9 +47,9 @@ stack_trace_flush:
.size stack_trace_flush,.-stack_trace_flush
#ifdef CONFIG_PERF_EVENTS
- .globl perf_arch_fetch_caller_regs
- .type perf_arch_fetch_caller_regs,#function
-perf_arch_fetch_caller_regs:
+ .globl __perf_arch_fetch_caller_regs
+ .type __perf_arch_fetch_caller_regs,#function
+__perf_arch_fetch_caller_regs:
/* We always read the %pstate into %o5 since we will use
* that to construct a fake %tstate to store into the regs.
*/
diff --git a/arch/sparc/kernel/perf_event.c b/arch/sparc/kernel/perf_event.c
index 44faabc3c02..357ced3c33f 100644
--- a/arch/sparc/kernel/perf_event.c
+++ b/arch/sparc/kernel/perf_event.c
@@ -572,18 +572,18 @@ static u64 sparc_perf_event_update(struct perf_event *event,
s64 delta;
again:
- prev_raw_count = atomic64_read(&hwc->prev_count);
+ prev_raw_count = local64_read(&hwc->prev_count);
new_raw_count = read_pmc(idx);
- if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
+ if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
new_raw_count) != prev_raw_count)
goto again;
delta = (new_raw_count << shift) - (prev_raw_count << shift);
delta >>= shift;
- atomic64_add(delta, &event->count);
- atomic64_sub(delta, &hwc->period_left);
+ local64_add(delta, &event->count);
+ local64_sub(delta, &hwc->period_left);
return new_raw_count;
}
@@ -591,27 +591,27 @@ again:
static int sparc_perf_event_set_period(struct perf_event *event,
struct hw_perf_event *hwc, int idx)
{
- s64 left = atomic64_read(&hwc->period_left);
+ s64 left = local64_read(&hwc->period_left);
s64 period = hwc->sample_period;
int ret = 0;
if (unlikely(left <= -period)) {
left = period;
- atomic64_set(&hwc->period_left, left);
+ local64_set(&hwc->period_left, left);
hwc->last_period = period;
ret = 1;
}
if (unlikely(left <= 0)) {
left += period;
- atomic64_set(&hwc->period_left, left);
+ local64_set(&hwc->period_left, left);
hwc->last_period = period;
ret = 1;
}
if (left > MAX_PERIOD)
left = MAX_PERIOD;
- atomic64_set(&hwc->prev_count, (u64)-left);
+ local64_set(&hwc->prev_count, (u64)-left);
write_pmc(idx, (u64)(-left) & 0xffffffff);
@@ -1006,7 +1006,7 @@ static int sparc_pmu_enable(struct perf_event *event)
* skip the schedulability test here, it will be peformed
* at commit time(->commit_txn) as a whole
*/
- if (cpuc->group_flag & PERF_EVENT_TXN_STARTED)
+ if (cpuc->group_flag & PERF_EVENT_TXN)
goto nocheck;
if (check_excludes(cpuc->event, n0, 1))
@@ -1088,7 +1088,7 @@ static int __hw_perf_event_init(struct perf_event *event)
if (!hwc->sample_period) {
hwc->sample_period = MAX_PERIOD;
hwc->last_period = hwc->sample_period;
- atomic64_set(&hwc->period_left, hwc->sample_period);
+ local64_set(&hwc->period_left, hwc->sample_period);
}
return 0;
@@ -1103,7 +1103,7 @@ static void sparc_pmu_start_txn(const struct pmu *pmu)
{
struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
- cpuhw->group_flag |= PERF_EVENT_TXN_STARTED;
+ cpuhw->group_flag |= PERF_EVENT_TXN;
}
/*
@@ -1115,7 +1115,7 @@ static void sparc_pmu_cancel_txn(const struct pmu *pmu)
{
struct cpu_hw_events *cpuhw = &__get_cpu_var(cpu_hw_events);
- cpuhw->group_flag &= ~PERF_EVENT_TXN_STARTED;
+ cpuhw->group_flag &= ~PERF_EVENT_TXN;
}
/*
@@ -1138,6 +1138,7 @@ static int sparc_pmu_commit_txn(const struct pmu *pmu)
if (sparc_check_constraints(cpuc->event, cpuc->events, n))
return -EAGAIN;
+ cpuc->group_flag &= ~PERF_EVENT_TXN;
return 0;
}
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index dcb0593b4a6..6f77afa6bca 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -55,6 +55,7 @@ config X86
select HAVE_HW_BREAKPOINT
select HAVE_MIXED_BREAKPOINTS_REGS
select PERF_EVENTS
+ select HAVE_PERF_EVENTS_NMI
select ANON_INODES
select HAVE_ARCH_KMEMCHECK
select HAVE_USER_RETURN_NOTIFIER
diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h
index 942255310e6..528a11e8d3e 100644
--- a/arch/x86/include/asm/hw_breakpoint.h
+++ b/arch/x86/include/asm/hw_breakpoint.h
@@ -20,10 +20,10 @@ struct arch_hw_breakpoint {
#include <linux/list.h>
/* Available HW breakpoint length encodings */
+#define X86_BREAKPOINT_LEN_X 0x00
#define X86_BREAKPOINT_LEN_1 0x40
#define X86_BREAKPOINT_LEN_2 0x44
#define X86_BREAKPOINT_LEN_4 0x4c
-#define X86_BREAKPOINT_LEN_EXECUTE 0x40
#ifdef CONFIG_X86_64
#define X86_BREAKPOINT_LEN_8 0x48
diff --git a/arch/x86/include/asm/local64.h b/arch/x86/include/asm/local64.h
new file mode 100644
index 00000000000..36c93b5cc23
--- /dev/null
+++ b/arch/x86/include/asm/local64.h
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index 93da9c3f334..932f0f86b4b 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -17,7 +17,9 @@ int do_nmi_callback(struct pt_regs *regs, int cpu);
extern void die_nmi(char *str, struct pt_regs *regs, int do_panic);
extern int check_nmi_watchdog(void);
+#if !defined(CONFIG_LOCKUP_DETECTOR)
extern int nmi_watchdog_enabled;
+#endif
extern int avail_to_resrv_perfctr_nmi_bit(unsigned int);
extern int reserve_perfctr_nmi(unsigned int);
extern void release_perfctr_nmi(unsigned int);
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 254883d0c7e..6e742cc4251 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -68,8 +68,9 @@ union cpuid10_eax {
union cpuid10_edx {
struct {
- unsigned int num_counters_fixed:4;
- unsigned int reserved:28;
+ unsigned int num_counters_fixed:5;
+ unsigned int bit_width_fixed:8;
+ unsigned int reserved:19;
} split;
unsigned int full;
};
@@ -140,6 +141,19 @@ extern unsigned long perf_instruction_pointer(struct pt_regs *regs);
extern unsigned long perf_misc_flags(struct pt_regs *regs);
#define perf_misc_flags(regs) perf_misc_flags(regs)
+#include <asm/stacktrace.h>
+
+/*
+ * We abuse bit 3 from flags to pass exact information, see perf_misc_flags
+ * and the comment with PERF_EFLAGS_EXACT.
+ */
+#define perf_arch_fetch_caller_regs(regs, __ip) { \
+ (regs)->ip = (__ip); \
+ (regs)->bp = caller_frame_pointer(); \
+ (regs)->cs = __KERNEL_CS; \
+ regs->flags = 0; \
+}
+
#else
static inline void init_hw_perf_events(void) { }
static inline void perf_events_lapic_init(void) { }
diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h
index 64a8ebff06f..def500776b1 100644
--- a/arch/x86/include/asm/perf_event_p4.h
+++ b/arch/x86/include/asm/perf_event_p4.h
@@ -19,7 +19,6 @@
#define ARCH_P4_RESERVED_ESCR (2) /* IQ_ESCR(0,1) not always present */
#define ARCH_P4_MAX_ESCR (ARCH_P4_TOTAL_ESCR - ARCH_P4_RESERVED_ESCR)
#define ARCH_P4_MAX_CCCR (18)
-#define ARCH_P4_MAX_COUNTER (ARCH_P4_MAX_CCCR / 2)
#define P4_ESCR_EVENT_MASK 0x7e000000U
#define P4_ESCR_EVENT_SHIFT 25
@@ -71,10 +70,6 @@
#define P4_CCCR_THRESHOLD(v) ((v) << P4_CCCR_THRESHOLD_SHIFT)
#define P4_CCCR_ESEL(v) ((v) << P4_CCCR_ESCR_SELECT_SHIFT)
-/* Custom bits in reerved CCCR area */
-#define P4_CCCR_CACHE_OPS_MASK 0x0000003fU
-
-
/* Non HT mask */
#define P4_CCCR_MASK \
(P4_CCCR_OVF | \
@@ -106,8 +101,7 @@
* ESCR and CCCR but rather an only packed value should
* be unpacked and written to a proper addresses
*
- * the base idea is to pack as much info as
- * possible
+ * the base idea is to pack as much info as possible
*/
#define p4_config_pack_escr(v) (((u64)(v)) << 32)
#define p4_config_pack_cccr(v) (((u64)(v)) & 0xffffffffULL)
@@ -130,8 +124,6 @@
t; \
})
-#define p4_config_unpack_cache_event(v) (((u64)(v)) & P4_CCCR_CACHE_OPS_MASK)
-
#define P4_CONFIG_HT_SHIFT 63
#define P4_CONFIG_HT (1ULL << P4_CONFIG_HT_SHIFT)
@@ -214,6 +206,12 @@ static inline u32 p4_default_escr_conf(int cpu, int exclude_os, int exclude_usr)
return escr;
}
+/*
+ * This are the events which should be used in "Event Select"
+ * field of ESCR register, they are like unique keys which allow
+ * the kernel to determinate which CCCR and COUNTER should be
+ * used to track an event
+ */
enum P4_EVENTS {
P4_EVENT_TC_DELIVER_MODE,
P4_EVENT_BPU_FETCH_REQUEST,
@@ -561,7 +559,7 @@ enum P4_EVENT_OPCODES {
* a caller should use P4_ESCR_EMASK_NAME helper to
* pick the EventMask needed, for example
*
- * P4_ESCR_EMASK_NAME(P4_EVENT_TC_DELIVER_MODE, DD)
+ * P4_ESCR_EMASK_BIT(P4_EVENT_TC_DELIVER_MODE, DD)
*/
enum P4_ESCR_EMASKS {
P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, DD, 0),
@@ -753,43 +751,50 @@ enum P4_ESCR_EMASKS {
P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_COMPLETED, BOGUS, 1),
};
-/* P4 PEBS: stale for a while */
-#define P4_PEBS_METRIC_MASK 0x00001fffU
-#define P4_PEBS_UOB_TAG 0x01000000U
-#define P4_PEBS_ENABLE 0x02000000U
-
-/* Replay metrics for MSR_IA32_PEBS_ENABLE and MSR_P4_PEBS_MATRIX_VERT */
-#define P4_PEBS__1stl_cache_load_miss_retired 0x3000001
-#define P4_PEBS__2ndl_cache_load_miss_retired 0x3000002
-#define P4_PEBS__dtlb_load_miss_retired 0x3000004
-#define P4_PEBS__dtlb_store_miss_retired 0x3000004
-#define P4_PEBS__dtlb_all_miss_retired 0x3000004
-#define P4_PEBS__tagged_mispred_branch 0x3018000
-#define P4_PEBS__mob_load_replay_retired 0x3000200
-#define P4_PEBS__split_load_retired 0x3000400
-#define P4_PEBS__split_store_retired 0x3000400
-
-#define P4_VERT__1stl_cache_load_miss_retired 0x0000001
-#define P4_VERT__2ndl_cache_load_miss_retired 0x0000001
-#define P4_VERT__dtlb_load_miss_retired 0x0000001
-#define P4_VERT__dtlb_store_miss_retired 0x0000002
-#define P4_VERT__dtlb_all_miss_retired 0x0000003
-#define P4_VERT__tagged_mispred_branch 0x0000010
-#define P4_VERT__mob_load_replay_retired 0x0000001
-#define P4_VERT__split_load_retired 0x0000001
-#define P4_VERT__split_store_retired 0x0000002
-
-enum P4_CACHE_EVENTS {
- P4_CACHE__NONE,
-
- P4_CACHE__1stl_cache_load_miss_retired,
- P4_CACHE__2ndl_cache_load_miss_retired,
- P4_CACHE__dtlb_load_miss_retired,
- P4_CACHE__dtlb_store_miss_retired,
- P4_CACHE__itlb_reference_hit,
- P4_CACHE__itlb_reference_miss,
-
- P4_CACHE__MAX
+/*
+ * P4 PEBS specifics (Replay Event only)
+ *
+ * Format (bits):
+ * 0-6: metric from P4_PEBS_METRIC enum
+ * 7 : reserved
+ * 8 : reserved
+ * 9-11 : reserved
+ *
+ * Note we have UOP and PEBS bits reserved for now
+ * just in case if we will need them once
+ */
+#define P4_PEBS_CONFIG_ENABLE (1 << 7)
+#define P4_PEBS_CONFIG_UOP_TAG (1 << 8)
+#define P4_PEBS_CONFIG_METRIC_MASK 0x3f
+#define P4_PEBS_CONFIG_MASK 0xff
+
+/*
+ * mem: Only counters MSR_IQ_COUNTER4 (16) and
+ * MSR_IQ_COUNTER5 (17) are allowed for PEBS sampling
+ */
+#define P4_PEBS_ENABLE 0x02000000U
+#define P4_PEBS_ENABLE_UOP_TAG 0x01000000U
+
+#define p4_config_unpack_metric(v) (((u64)(v)) & P4_PEBS_CONFIG_METRIC_MASK)
+#define p4_config_unpack_pebs(v) (((u64)(v)) & P4_PEBS_CONFIG_MASK)
+
+#define p4_config_pebs_has(v, mask) (p4_config_unpack_pebs(v) & (mask))
+
+enum P4_PEBS_METRIC {
+ P4_PEBS_METRIC__none,
+
+ P4_PEBS_METRIC__1stl_cache_load_miss_retired,
+ P4_PEBS_METRIC__2ndl_cache_load_miss_retired,
+ P4_PEBS_METRIC__dtlb_load_miss_retired,
+ P4_PEBS_METRIC__dtlb_store_miss_retired,
+ P4_PEBS_METRIC__dtlb_all_miss_retired,
+ P4_PEBS_METRIC__tagged_mispred_branch,
+ P4_PEBS_METRIC__mob_load_replay_retired,
+ P4_PEBS_METRIC__split_load_retired,
+ P4_PEBS_METRIC__split_store_retired,
+
+ P4_PEBS_METRIC__max
};
#endif /* PERF_EVENT_P4_H */
+
diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h
index 4dab78edbad..2b16a2ad23d 100644
--- a/arch/x86/include/asm/stacktrace.h
+++ b/arch/x86/include/asm/stacktrace.h
@@ -1,6 +1,13 @@
+/*
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
+ */
+
#ifndef _ASM_X86_STACKTRACE_H
#define _ASM_X86_STACKTRACE_H
+#include <linux/uaccess.h>
+
extern int kstack_depth_to_print;
struct thread_info;
@@ -42,4 +49,46 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
unsigned long *stack, unsigned long bp,
const struct stacktrace_ops *ops, void *data);
+#ifdef CONFIG_X86_32
+#define STACKSLOTS_PER_LINE 8
+#define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :)
+#else
+#define STACKSLOTS_PER_LINE 4
+#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :)
+#endif
+
+extern void
+show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
+ unsigned long *stack, unsigned long bp, char *log_lvl);
+
+extern void
+show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
+ unsigned long *sp, unsigned long bp, char *log_lvl);
+
+extern unsigned int code_bytes;
+
+/* The form of the top of the frame on the stack */
+struct stack_frame {
+ struct stack_frame *next_frame;
+ unsigned long return_address;
+};
+
+struct stack_frame_ia32 {
+ u32 next_frame;
+ u32 return_address;
+};
+
+static inline unsigned long caller_frame_pointer(void)
+{
+ struct stack_frame *frame;
+
+ get_bp(frame);
+
+#ifdef CONFIG_FRAME_POINTER
+ frame = frame->next_frame;
+#endif
+
+ return (unsigned long)frame;
+}
+
#endif /* _ASM_X86_STACKTRACE_H */
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile
index 565c1bfc507..910f20b457c 100644
--- a/arch/x86/kernel/apic/Makefile
+++ b/arch/x86/kernel/apic/Makefile
@@ -2,7 +2,12 @@
# Makefile for local APIC drivers and for the IO-APIC code
#
-obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o probe_$(BITS).o ipi.o nmi.o
+obj-$(CONFIG_X86_LOCAL_APIC) += apic.o apic_noop.o probe_$(BITS).o ipi.o
+ifneq ($(CONFIG_HARDLOCKUP_DETECTOR),y)
+obj-$(CONFIG_X86_LOCAL_APIC) += nmi.o
+endif
+obj-$(CONFIG_HARDLOCKUP_DETECTOR) += hw_nmi.o
+
obj-$(CONFIG_X86_IO_APIC) += io_apic.o
obj-$(CONFIG_SMP) += ipi.o
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
new file mode 100644
index 00000000000..cefd6942f0e
--- /dev/null
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -0,0 +1,107 @@
+/*
+ * HW NMI watchdog support
+ *
+ * started by Don Zickus, Copyright (C) 2010 Red Hat, Inc.
+ *
+ * Arch specific calls to support NMI watchdog
+ *
+ * Bits copied from original nmi.c file
+ *
+ */
+#include <asm/apic.h>
+
+#include <linux/cpumask.h>
+#include <linux/kdebug.h>
+#include <linux/notifier.h>
+#include <linux/kprobes.h>
+#include <linux/nmi.h>
+#include <linux/module.h>
+
+/* For reliability, we're prepared to waste bits here. */
+static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
+
+u64 hw_nmi_get_sample_period(void)
+{
+ return (u64)(cpu_khz) * 1000 * 60;
+}
+
+#ifdef ARCH_HAS_NMI_WATCHDOG
+void arch_trigger_all_cpu_backtrace(void)
+{
+ int i;
+
+ cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask);
+
+ printk(KERN_INFO "sending NMI to all CPUs:\n");
+ apic->send_IPI_all(NMI_VECTOR);
+
+ /* Wait for up to 10 seconds for all CPUs to do the backtrace */
+ for (i = 0; i < 10 * 1000; i++) {
+ if (cpumask_empty(to_cpumask(backtrace_mask)))
+ break;
+ mdelay(1);
+ }
+}
+
+static int __kprobes
+arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self,
+ unsigned long cmd, void *__args)
+{
+ struct die_args *args = __args;
+ struct pt_regs *regs;
+ int cpu = smp_processor_id();
+
+ switch (cmd) {
+ case DIE_NMI:
+ case DIE_NMI_IPI:
+ break;
+
+ default:
+ return NOTIFY_DONE;
+ }
+
+ regs = args->regs;
+
+ if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
+ static arch_spinlock_t lock = __ARCH_SPIN_LOCK_UNLOCKED;
+
+ arch_spin_lock(&lock);
+ printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu);
+ show_regs(regs);
+ dump_stack();
+ arch_spin_unlock(&lock);
+ cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
+ return NOTIFY_STOP;
+ }
+
+ return NOTIFY_DONE;
+}
+
+static __read_mostly struct notifier_block backtrace_notifier = {
+ .notifier_call = arch_trigger_all_cpu_backtrace_handler,
+ .next = NULL,
+ .priority = 1
+};
+
+static int __init register_trigger_all_cpu_backtrace(void)
+{
+ register_die_notifier(&backtrace_notifier);
+ return 0;
+}
+early_initcall(register_trigger_all_cpu_backtrace);
+#endif
+
+/* STUB calls to mimic old nmi_watchdog behaviour */
+#if defined(CONFIG_X86_LOCAL_APIC)
+unsigned int nmi_watchdog = NMI_NONE;
+EXPORT_SYMBOL(nmi_watchdog);
+void acpi_nmi_enable(void) { return; }
+void acpi_nmi_disable(void) { return; }
+#endif
+atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */
+EXPORT_SYMBOL(nmi_active);
+int unknown_nmi_panic;
+void cpu_nmi_set_wd_enabled(void) { return; }
+void stop_apic_nmi_watchdog(void *unused) { return; }
+void setup_apic_nmi_watchdog(void *unused) { return; }
+int __init check_nmi_watchdog(void) { return 0; }
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c
index 1edaf15c0b8..a43f71cb30f 100644
--- a/arch/x86/kernel/apic/nmi.c
+++ b/arch/x86/kernel/apic/nmi.c
@@ -401,13 +401,6 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
int cpu = smp_processor_id();
int rc = 0;
- /* check for other users first */
- if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
- == NOTIFY_STOP) {
- rc = 1;
- touched = 1;
- }
-
sum = get_timer_irqs(cpu);
if (__get_cpu_var(nmi_touch)) {
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 5db5b7d65a1..f2da20fda02 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -220,6 +220,7 @@ struct x86_pmu {
struct perf_event *event);
struct event_constraint *event_constraints;
void (*quirks)(void);
+ int perfctr_second_write;
int (*cpu_prepare)(int cpu);
void (*cpu_starting)(int cpu);
@@ -295,10 +296,10 @@ x86_perf_event_update(struct perf_event *event)
* count to the generic event atomically:
*/
again:
- prev_raw_count = atomic64_read(&hwc->prev_count);
+ prev_raw_count = local64_read(&hwc->prev_count);
rdmsrl(hwc->event_base + idx, new_raw_count);
- if (atomic64_cmpxchg(&hwc->prev_count, prev_raw_count,
+ if (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
new_raw_count) != prev_raw_count)
goto again;
@@ -313,8 +314,8 @@ again:
delta = (new_raw_count << shift) - (prev_raw_count << shift);
delta >>= shift;
- atomic64_add(delta, &event->count);
- atomic64_sub(delta, &hwc->period_left);
+ local64_add(delta, &event->count);
+ local64_sub(delta, &hwc->period_left);
return new_raw_count;
}
@@ -438,7 +439,7 @@ static int x86_setup_perfctr(struct perf_event *event)
if (!hwc->sample_period) {
hwc->sample_period = x86_pmu.max_period;
hwc->last_period = hwc->sample_period;
- atomic64_set(&hwc->period_left, hwc->sample_period);
+ local64_set(&hwc->period_left, hwc->sample_period);
} else {
/*
* If we have a PMU initialized but no APIC
@@ -885,7 +886,7 @@ static int
x86_perf_event_set_period(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
- s64 left = atomic64_read(&hwc->period_left);
+ s64 left = local64_read(&hwc->period_left);
s64 period = hwc->sample_period;
int ret = 0, idx = hwc->idx;
@@ -897,14 +898,14 @@ x86_perf_event_set_period(struct perf_event *event)
*/
if (unlikely(left <= -period)) {
left = period;
- atomic64_set(&hwc->period_left, left);
+ local64_set(&hwc->period_left, left);
hwc->last_period = period;
ret = 1;
}
if (unlikely(left <= 0)) {
left += period;
- atomic64_set(&hwc->period_left, left);
+ local64_set(&hwc->period_left, left);
hwc->last_period = period;
ret = 1;
}
@@ -923,10 +924,19 @@ x86_perf_event_set_period(struct perf_event *event)
* The hw event starts counting from this event offset,
* mark it to be able to extra future deltas:
*/
- atomic64_set(&hwc->prev_count, (u64)-left);
+ local64_set(&hwc->prev_count, (u64)-left);
- wrmsrl(hwc->event_base + idx,
+ wrmsrl(hwc->event_base + idx, (u64)(-left) & x86_pmu.cntval_mask);
+
+ /*
+ * Due to erratum on certan cpu we need
+ * a second write to be sure the register
+ * is updated properly
+ */
+ if (x86_pmu.perfctr_second_write) {
+ wrmsrl(hwc->event_base + idx,
(u64)(-left) & x86_pmu.cntval_mask);
+ }
perf_event_update_userpage(event);
@@ -969,7 +979,7 @@ static int x86_pmu_enable(struct perf_event *event)
* skip the schedulability test here, it will be peformed
* at commit time(->commit_txn) as a whole
*/
- if (cpuc->group_flag & PERF_EVENT_TXN_STARTED)
+ if (cpuc->group_flag & PERF_EVENT_TXN)
goto out;
ret = x86_pmu.schedule_events(cpuc, n, assign);
@@ -1096,7 +1106,7 @@ static void x86_pmu_disable(struct perf_event *event)
* The events never got scheduled and ->cancel_txn will truncate
* the event_list.
*/
- if (cpuc->group_flag & PERF_EVENT_TXN_STARTED)
+ if (cpuc->group_flag & PERF_EVENT_TXN)
return;
x86_pmu_stop(event);
@@ -1388,7 +1398,7 @@ static void x86_pmu_start_txn(const struct pmu *pmu)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
- cpuc->group_flag |= PERF_EVENT_TXN_STARTED;
+ cpuc->group_flag |= PERF_EVENT_TXN;
cpuc->n_txn = 0;
}
@@ -1401,7 +1411,7 @@ static void x86_pmu_cancel_txn(const struct pmu *pmu)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
- cpuc->group_flag &= ~PERF_EVENT_TXN_STARTED;
+ cpuc->group_flag &= ~PERF_EVENT_TXN;
/*
* Truncate the collected events.
*/
@@ -1435,11 +1445,7 @@ static int x86_pmu_commit_txn(const struct pmu *pmu)
*/
memcpy(cpuc->assign, assign, n*sizeof(int));
- /*
- * Clear out the txn count so that ->cancel_txn() which gets
- * run after ->commit_txn() doesn't undo things.
- */
- cpuc->n_txn = 0;
+ cpuc->group_flag &= ~PERF_EVENT_TXN;
return 0;
}
@@ -1607,8 +1613,6 @@ static const struct stacktrace_ops backtrace_ops = {
.walk_stack = print_context_stack_bp,
};
-#include "../dumpstack.h"
-
static void
perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry)
{
@@ -1730,22 +1734,6 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
return entry;
}
-void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip)
-{
- regs->ip = ip;
- /*
- * perf_arch_fetch_caller_regs adds another call, we need to increment
- * the skip level
- */
- regs->bp = rewind_frame_pointer(skip + 1);
- regs->cs = __KERNEL_CS;
- /*
- * We abuse bit 3 to pass exact information, see perf_misc_flags
- * and the comment with PERF_EFLAGS_EXACT.
- */
- regs->flags = 0;
-}
-
unsigned long perf_instruction_pointer(struct pt_regs *regs)
{
unsigned long ip;
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index ae85d69644d..107711bf0ee 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -21,22 +21,36 @@ struct p4_event_bind {
char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on abscence */
};
-struct p4_cache_event_bind {
+struct p4_pebs_bind {
unsigned int metric_pebs;
unsigned int metric_vert;
};
-#define P4_GEN_CACHE_EVENT_BIND(name) \
- [P4_CACHE__##name] = { \
- .metric_pebs = P4_PEBS__##name, \
- .metric_vert = P4_VERT__##name, \
+/* it sets P4_PEBS_ENABLE_UOP_TAG as well */
+#define P4_GEN_PEBS_BIND(name, pebs, vert) \
+ [P4_PEBS_METRIC__##name] = { \
+ .metric_pebs = pebs | P4_PEBS_ENABLE_UOP_TAG, \
+ .metric_vert = vert, \
}
-static struct p4_cache_event_bind p4_cache_event_bind_map[] = {
- P4_GEN_CACHE_EVENT_BIND(1stl_cache_load_miss_retired),
- P4_GEN_CACHE_EVENT_BIND(2ndl_cache_load_miss_retired),
- P4_GEN_CACHE_EVENT_BIND(dtlb_load_miss_retired),
- P4_GEN_CACHE_EVENT_BIND(dtlb_store_miss_retired),
+/*
+ * note we have P4_PEBS_ENABLE_UOP_TAG always set here
+ *
+ * it's needed for mapping P4_PEBS_CONFIG_METRIC_MASK bits of
+ * event configuration to find out which values are to be
+ * written into MSR_IA32_PEBS_ENABLE and MSR_P4_PEBS_MATRIX_VERT
+ * resgisters
+ */
+static struct p4_pebs_bind p4_pebs_bind_map[] = {
+ P4_GEN_PEBS_BIND(1stl_cache_load_miss_retired, 0x0000001, 0x0000001),
+ P4_GEN_PEBS_BIND(2ndl_cache_load_miss_retired, 0x0000002, 0x0000001),
+ P4_GEN_PEBS_BIND(dtlb_load_miss_retired, 0x0000004, 0x0000001),
+ P4_GEN_PEBS_BIND(dtlb_store_miss_retired, 0x0000004, 0x0000002),
+ P4_GEN_PEBS_BIND(dtlb_all_miss_retired, 0x0000004, 0x0000003),
+ P4_GEN_PEBS_BIND(tagged_mispred_branch, 0x0018000, 0x0000010),
+ P4_GEN_PEBS_BIND(mob_load_replay_retired, 0x0000200, 0x0000001),
+ P4_GEN_PEBS_BIND(split_load_retired, 0x0000400, 0x0000001),
+ P4_GEN_PEBS_BIND(split_store_retired, 0x0000400, 0x0000002),
};
/*
@@ -281,10 +295,10 @@ static struct p4_event_bind p4_event_bind_map[] = {
},
};
-#define P4_GEN_CACHE_EVENT(event, bit, cache_event) \
+#define P4_GEN_CACHE_EVENT(event, bit, metric) \
p4_config_pack_escr(P4_ESCR_EVENT(event) | \
P4_ESCR_EMASK_BIT(event, bit)) | \
- p4_config_pack_cccr(cache_event | \
+ p4_config_pack_cccr(metric | \
P4_CCCR_ESEL(P4_OPCODE_ESEL(P4_OPCODE(event))))
static __initconst const u64 p4_hw_cache_event_ids
@@ -296,34 +310,34 @@ static __initconst const u64 p4_hw_cache_event_ids
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x0,
[ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
- P4_CACHE__1stl_cache_load_miss_retired),
+ P4_PEBS_METRIC__1stl_cache_load_miss_retired),
},
},
[ C(LL ) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x0,
[ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
- P4_CACHE__2ndl_cache_load_miss_retired),
+ P4_PEBS_METRIC__2ndl_cache_load_miss_retired),
},
},
[ C(DTLB) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = 0x0,
[ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
- P4_CACHE__dtlb_load_miss_retired),
+ P4_PEBS_METRIC__dtlb_load_miss_retired),
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = 0x0,
[ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS,
- P4_CACHE__dtlb_store_miss_retired),
+ P4_PEBS_METRIC__dtlb_store_miss_retired),
},
},
[ C(ITLB) ] = {
[ C(OP_READ) ] = {
[ C(RESULT_ACCESS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, HIT,
- P4_CACHE__itlb_reference_hit),
+ P4_PEBS_METRIC__none),
[ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, MISS,
- P4_CACHE__itlb_reference_miss),
+ P4_PEBS_METRIC__none),
},
[ C(OP_WRITE) ] = {
[ C(RESULT_ACCESS) ] = -1,
@@ -414,11 +428,37 @@ static u64 p4_pmu_event_map(int hw_event)
return config;
}
+static int p4_validate_raw_event(struct perf_event *event)
+{
+ unsigned int v;
+
+ /* user data may have out-of-bound event index */
+ v = p4_config_unpack_event(event->attr.config);
+ if (v >= ARRAY_SIZE(p4_event_bind_map)) {
+ pr_warning("P4 PMU: Unknown event code: %d\n", v);
+ return -EINVAL;
+ }
+
+ /*
+ * it may have some screwed PEBS bits
+ */
+ if (p4_config_pebs_has(event->attr.config, P4_PEBS_CONFIG_ENABLE)) {
+ pr_warning("P4 PMU: PEBS are not supported yet\n");
+ return -EINVAL;
+ }
+ v = p4_config_unpack_metric(event->attr.config);
+ if (v >= ARRAY_SIZE(p4_pebs_bind_map)) {
+ pr_warning("P4 PMU: Unknown metric code: %d\n", v);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int p4_hw_config(struct perf_event *event)
{
int cpu = get_cpu();
int rc = 0;
- unsigned int evnt;
u32 escr, cccr;
/*
@@ -438,12 +478,9 @@ static int p4_hw_config(struct perf_event *event)
if (event->attr.type == PERF_TYPE_RAW) {
- /* user data may have out-of-bound event index */
- evnt = p4_config_unpack_event(event->attr.config);
- if (evnt >= ARRAY_SIZE(p4_event_bind_map)) {
- rc = -EINVAL;
+ rc = p4_validate_raw_event(event);
+ if (rc)
goto out;
- }
/*
* We don't control raw events so it's up to the caller
@@ -451,12 +488,15 @@ static int p4_hw_config(struct perf_event *event)
* on HT machine but allow HT-compatible specifics to be
* passed on)
*
+ * Note that for RAW events we allow user to use P4_CCCR_RESERVED
+ * bits since we keep additional info here (for cache events and etc)
+ *
* XXX: HT wide things should check perf_paranoid_cpu() &&
* CAP_SYS_ADMIN
*/
event->hw.config |= event->attr.config &
(p4_config_pack_escr(P4_ESCR_MASK_HT) |
- p4_config_pack_cccr(P4_CCCR_MASK_HT));
+ p4_config_pack_cccr(P4_CCCR_MASK_HT | P4_CCCR_RESERVED));
}
rc = x86_setup_perfctr(event);
@@ -482,6 +522,29 @@ static inline int p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc)
return overflow;
}
+static void p4_pmu_disable_pebs(void)
+{
+ /*
+ * FIXME
+ *
+ * It's still allowed that two threads setup same cache
+ * events so we can't simply clear metrics until we knew
+ * noone is depending on us, so we need kind of counter
+ * for "ReplayEvent" users.
+ *
+ * What is more complex -- RAW events, if user (for some
+ * reason) will pass some cache event metric with improper
+ * event opcode -- it's fine from hardware point of view
+ * but completely nonsence from "meaning" of such action.
+ *
+ * So at moment let leave metrics turned on forever -- it's
+ * ok for now but need to be revisited!
+ *
+ * (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)0);
+ * (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)0);
+ */
+}
+
static inline void p4_pmu_disable_event(struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
@@ -507,6 +570,26 @@ static void p4_pmu_disable_all(void)
continue;
p4_pmu_disable_event(event);
}
+
+ p4_pmu_disable_pebs();
+}
+
+/* configuration must be valid */
+static void p4_pmu_enable_pebs(u64 config)
+{
+ struct p4_pebs_bind *bind;
+ unsigned int idx;
+
+ BUILD_BUG_ON(P4_PEBS_METRIC__max > P4_PEBS_CONFIG_METRIC_MASK);
+
+ idx = p4_config_unpack_metric(config);
+ if (idx == P4_PEBS_METRIC__none)
+ return;
+
+ bind = &p4_pebs_bind_map[idx];
+
+ (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)bind->metric_pebs);
+ (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)bind->metric_vert);
}
static void p4_pmu_enable_event(struct perf_event *event)
@@ -515,9 +598,7 @@ static void p4_pmu_enable_event(struct perf_event *event)
int thread = p4_ht_config_thread(hwc->config);
u64 escr_conf = p4_config_unpack_escr(p4_clear_ht_bit(hwc->config));
unsigned int idx = p4_config_unpack_event(hwc->config);
- unsigned int idx_cache = p4_config_unpack_cache_event(hwc->config);
struct p4_event_bind *bind;
- struct p4_cache_event_bind *bind_cache;
u64 escr_addr, cccr;
bind = &p4_event_bind_map[idx];
@@ -537,16 +618,10 @@ static void p4_pmu_enable_event(struct perf_event *event)
cccr = p4_config_unpack_cccr(hwc->config);
/*
- * it could be Cache event so that we need to
- * set metrics into additional MSRs
+ * it could be Cache event so we need to write metrics
+ * into additional MSRs
*/
- BUILD_BUG_ON(P4_CACHE__MAX > P4_CCCR_CACHE_OPS_MASK);
- if (idx_cache > P4_CACHE__NONE &&
- idx_cache < ARRAY_SIZE(p4_cache_event_bind_map)) {
- bind_cache = &p4_cache_event_bind_map[idx_cache];
- (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)bind_cache->metric_pebs);
- (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)bind_cache->metric_vert);
- }
+ p4_pmu_enable_pebs(hwc->config);
(void)checking_wrmsrl(escr_addr, escr_conf);
(void)checking_wrmsrl(hwc->config_base + hwc->idx,
@@ -829,6 +904,15 @@ static __initconst const struct x86_pmu p4_pmu = {
.max_period = (1ULL << 39) - 1,
.hw_config = p4_hw_config,
.schedule_events = p4_pmu_schedule_events,
+ /*
+ * This handles erratum N15 in intel doc 249199-029,
+ * the counter may not be updated correctly on write
+ * so we need a second write operation to do the trick
+ * (the official workaround didn't work)
+ *
+ * the former idea is taken from OProfile code
+ */
+ .perfctr_second_write = 1,
};
static __init int p4_pmu_init(void)
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index c89a386930b..6e8752c1bd5 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -18,7 +18,6 @@
#include <asm/stacktrace.h>
-#include "dumpstack.h"
int panic_on_unrecovered_nmi;
int panic_on_io_nmi;
diff --git a/arch/x86/kernel/dumpstack.h b/arch/x86/kernel/dumpstack.h
deleted file mode 100644
index e1a93be4fd4..00000000000
--- a/arch/x86/kernel/dumpstack.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (C) 1991, 1992 Linus Torvalds
- * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
- */
-
-#ifndef DUMPSTACK_H
-#define DUMPSTACK_H
-
-#ifdef CONFIG_X86_32
-#define STACKSLOTS_PER_LINE 8
-#define get_bp(bp) asm("movl %%ebp, %0" : "=r" (bp) :)
-#else
-#define STACKSLOTS_PER_LINE 4
-#define get_bp(bp) asm("movq %%rbp, %0" : "=r" (bp) :)
-#endif
-
-#include <linux/uaccess.h>
-
-extern void
-show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
- unsigned long *stack, unsigned long bp, char *log_lvl);
-
-extern void
-show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
- unsigned long *sp, unsigned long bp, char *log_lvl);
-
-extern unsigned int code_bytes;
-
-/* The form of the top of the frame on the stack */
-struct stack_frame {
- struct stack_frame *next_frame;
- unsigned long return_address;
-};
-
-struct stack_frame_ia32 {
- u32 next_frame;
- u32 return_address;
-};
-
-static inline unsigned long rewind_frame_pointer(int n)
-{
- struct stack_frame *frame;
-
- get_bp(frame);
-
-#ifdef CONFIG_FRAME_POINTER
- while (n--) {
- if (probe_kernel_address(&frame->next_frame, frame))
- break;
- }
-#endif
-
- return (unsigned long)frame;
-}
-
-#endif /* DUMPSTACK_H */
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index 11540a189d9..0f6376ffa2d 100644
--- a/arch/x86/kernel/dumpstack_32.c
+++ b/arch/x86/kernel/dumpstack_32.c
@@ -16,8 +16,6 @@
#include <asm/stacktrace.h>
-#include "dumpstack.h"
-
void dump_trace(struct task_struct *task, struct pt_regs *regs,
unsigned long *stack, unsigned long bp,
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index 272c9f1f05f..57a21f11c79 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -16,7 +16,6 @@
#include <asm/stacktrace.h>
-#include "dumpstack.h"
#define N_EXCEPTION_STACKS_END \
(N_EXCEPTION_STACKS + DEBUG_STKSZ/EXCEPTION_STKSZ - 2)
diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c
index a8f1b803d2f..a474ec37c32 100644
--- a/arch/x86/kernel/hw_breakpoint.c
+++ b/arch/x86/kernel/hw_breakpoint.c
@@ -208,6 +208,9 @@ int arch_bp_generic_fields(int x86_len, int x86_type,
{
/* Len */
switch (x86_len) {
+ case X86_BREAKPOINT_LEN_X:
+ *gen_len = sizeof(long);
+ break;
case X86_BREAKPOINT_LEN_1:
*gen_len = HW_BREAKPOINT_LEN_1;
break;
@@ -251,6 +254,29 @@ static int arch_build_bp_info(struct perf_event *bp)
info->address = bp->attr.bp_addr;
+ /* Type */
+ switch (bp->attr.bp_type) {
+ case HW_BREAKPOINT_W:
+ info->type = X86_BREAKPOINT_WRITE;
+ break;
+ case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
+ info->type = X86_BREAKPOINT_RW;
+ break;
+ case HW_BREAKPOINT_X:
+ info->type = X86_BREAKPOINT_EXECUTE;
+ /*
+ * x86 inst breakpoints need to have a specific undefined len.
+ * But we still need to check userspace is not trying to setup
+ * an unsupported length, to get a range breakpoint for example.
+ */
+ if (bp->attr.bp_len == sizeof(long)) {
+ info->len = X86_BREAKPOINT_LEN_X;
+ return 0;
+ }
+ default:
+ return -EINVAL;
+ }
+
/* Len */
switch (bp->attr.bp_len) {
case HW_BREAKPOINT_LEN_1:
@@ -271,21 +297,6 @@ static int arch_build_bp_info(struct perf_event *bp)
return -EINVAL;
}
- /* Type */
- switch (bp->attr.bp_type) {
- case HW_BREAKPOINT_W:
- info->type = X86_BREAKPOINT_WRITE;
- break;
- case HW_BREAKPOINT_W | HW_BREAKPOINT_R:
- info->type = X86_BREAKPOINT_RW;
- break;
- case HW_BREAKPOINT_X:
- info->type = X86_BREAKPOINT_EXECUTE;
- break;
- default:
- return -EINVAL;
- }
-
return 0;
}
/*
@@ -305,6 +316,9 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp)
ret = -EINVAL;
switch (info->len) {
+ case X86_BREAKPOINT_LEN_X:
+ align = sizeof(long) -1;
+ break;
case X86_BREAKPOINT_LEN_1:
align = 0;
break;
@@ -466,6 +480,13 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args)
perf_bp_event(bp, args->regs);
+ /*
+ * Set up resume flag to avoid breakpoint recursion when
+ * returning back to origin.
+ */
+ if (bp->hw.info.type == X86_BREAKPOINT_EXECUTE)
+ args->regs->flags |= X86_EFLAGS_RF;
+
rcu_read_unlock();
}
/*
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 675879b65ce..1bfb6cf4dd5 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -126,16 +126,22 @@ static void __kprobes synthesize_reljump(void *from, void *to)
}
/*
- * Check for the REX prefix which can only exist on X86_64
- * X86_32 always returns 0
+ * Skip the prefixes of the instruction.
*/
-static int __kprobes is_REX_prefix(kprobe_opcode_t *insn)
+static kprobe_opcode_t *__kprobes skip_prefixes(kprobe_opcode_t *insn)
{
+ insn_attr_t attr;
+
+ attr = inat_get_opcode_attribute((insn_byte_t)*insn);
+ while (inat_is_legacy_prefix(attr)) {
+ insn++;
+ attr = inat_get_opcode_attribute((insn_byte_t)*insn);
+ }
#ifdef CONFIG_X86_64
- if ((*insn & 0xf0) == 0x40)
- return 1;
+ if (inat_is_rex_prefix(attr))
+ insn++;
#endif
- return 0;
+ return insn;
}
/*
@@ -272,6 +278,9 @@ static int __kprobes can_probe(unsigned long paddr)
*/
static int __kprobes is_IF_modifier(kprobe_opcode_t *insn)
{
+ /* Skip prefixes */
+ insn = skip_prefixes(insn);
+
switch (*insn) {
case 0xfa: /* cli */
case 0xfb: /* sti */
@@ -280,13 +289,6 @@ static int __kprobes is_IF_modifier(kprobe_opcode_t *insn)
return 1;
}
- /*
- * on X86_64, 0x40-0x4f are REX prefixes so we need to look
- * at the next byte instead.. but of course not recurse infinitely
- */
- if (is_REX_prefix(insn))
- return is_IF_modifier(++insn);
-
return 0;
}
@@ -803,9 +805,8 @@ static void __kprobes resume_execution(struct kprobe *p,
unsigned long orig_ip = (unsigned long)p->addr;
kprobe_opcode_t *insn = p->ainsn.insn;
- /*skip the REX prefix*/
- if (is_REX_prefix(insn))
- insn++;
+ /* Skip prefixes */
+ insn = skip_prefixes(insn);
regs->flags &= ~X86_EFLAGS_TF;
switch (*insn) {
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 8d128783af4..96586c3cbbb 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -57,6 +57,8 @@
#include <asm/syscalls.h>
#include <asm/debugreg.h>
+#include <trace/events/power.h>
+
asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
/*
@@ -111,6 +113,8 @@ void cpu_idle(void)
stop_critical_timings();
pm_idle();
start_critical_timings();
+
+ trace_power_end(smp_processor_id());
}
tick_nohz_restart_sched_tick();
preempt_enable_no_resched();
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 3c2422a99f1..3d9ea531ddd 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -51,6 +51,8 @@
#include <asm/syscalls.h>
#include <asm/debugreg.h>
+#include <trace/events/power.h>
+
asmlinkage extern void ret_from_fork(void);
DEFINE_PER_CPU(unsigned long, old_rsp);
@@ -138,6 +140,9 @@ void cpu_idle(void)
stop_critical_timings();
pm_idle();
start_critical_timings();
+
+ trace_power_end(smp_processor_id());
+
/* In many cases the interrupt that ended idle
has already called exit_idle. But some idle
loops can be woken up without interrupt. */
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index 922eefbb3f6..b53c525368a 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -23,11 +23,16 @@ static int save_stack_stack(void *data, char *name)
return 0;
}
-static void save_stack_address(void *data, unsigned long addr, int reliable)
+static void
+__save_stack_address(void *data, unsigned long addr, bool reliable, bool nosched)
{
struct stack_trace *trace = data;
+#ifdef CONFIG_FRAME_POINTER
if (!reliable)
return;
+#endif
+ if (nosched && in_sched_functions(addr))
+ return;
if (trace->skip > 0) {
trace->skip--;
return;
@@ -36,20 +41,15 @@ static void save_stack_address(void *data, unsigned long addr, int reliable)
trace->entries[trace->nr_entries++] = addr;
}
+static void save_stack_address(void *data, unsigned long addr, int reliable)
+{
+ return __save_stack_address(data, addr, reliable, false);
+}
+
static void
save_stack_address_nosched(void *data, unsigned long addr, int reliable)
{
- struct stack_trace *trace = (struct stack_trace *)data;
- if (!reliable)
- return;
- if (in_sched_functions(addr))
- return;
- if (trace->skip > 0) {
- trace->skip--;
- return;
- }
- if (trace->nr_entries < trace->max_entries)
- trace->entries[trace->nr_entries++] = addr;
+ return __save_stack_address(data, addr, reliable, true);
}
static const struct stacktrace_ops save_stack_ops = {
@@ -96,12 +96,13 @@ EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
/* Userspace stacktrace - based on kernel/trace/trace_sysprof.c */
-struct stack_frame {
+struct stack_frame_user {
const void __user *next_fp;
unsigned long ret_addr;
};
-static int copy_stack_frame(const void __user *fp, struct stack_frame *frame)
+static int
+copy_stack_frame(const void __user *fp, struct stack_frame_user *frame)
{
int ret;
@@ -126,7 +127,7 @@ static inline void __save_stack_trace_user(struct stack_trace *trace)
trace->entries[trace->nr_entries++] = regs->ip;
while (trace->nr_entries < trace->max_entries) {
- struct stack_frame frame;
+ struct stack_frame_user frame;
frame.next_fp = NULL;
frame.ret_addr = 0;
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 725ef4d17cd..60788dee0f8 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -392,7 +392,13 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT)
== NOTIFY_STOP)
return;
+
#ifdef CONFIG_X86_LOCAL_APIC
+ if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
+ == NOTIFY_STOP)
+ return;
+
+#ifndef CONFIG_LOCKUP_DETECTOR
/*
* Ok, so this is none of the documented NMI sources,
* so it must be the NMI watchdog.
@@ -400,6 +406,7 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
if (nmi_watchdog_tick(regs, reason))
return;
if (!do_nmi_callback(regs, cpu))
+#endif /* !CONFIG_LOCKUP_DETECTOR */
unknown_nmi_error(reason, regs);
#else
unknown_nmi_error(reason, regs);
diff --git a/arch/x86/mm/pf_in.c b/arch/x86/mm/pf_in.c
index 308e32570d8..38e6d174c49 100644
--- a/arch/x86/mm/pf_in.c
+++ b/arch/x86/mm/pf_in.c
@@ -40,16 +40,16 @@ static unsigned char prefix_codes[] = {
static unsigned int reg_rop[] = {
0x8A, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F
};
-static unsigned int reg_wop[] = { 0x88, 0x89 };
+static unsigned int reg_wop[] = { 0x88, 0x89, 0xAA, 0xAB };
static unsigned int imm_wop[] = { 0xC6, 0xC7 };
/* IA32 Manual 3, 3-432*/
-static unsigned int rw8[] = { 0x88, 0x8A, 0xC6 };
+static unsigned int rw8[] = { 0x88, 0x8A, 0xC6, 0xAA };
static unsigned int rw32[] = {
- 0x89, 0x8B, 0xC7, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F
+ 0x89, 0x8B, 0xC7, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F, 0xAB
};
-static unsigned int mw8[] = { 0x88, 0x8A, 0xC6, 0xB60F, 0xBE0F };
+static unsigned int mw8[] = { 0x88, 0x8A, 0xC6, 0xB60F, 0xBE0F, 0xAA };
static unsigned int mw16[] = { 0xB70F, 0xBF0F };
-static unsigned int mw32[] = { 0x89, 0x8B, 0xC7 };
+static unsigned int mw32[] = { 0x89, 0x8B, 0xC7, 0xAB };
static unsigned int mw64[] = {};
#else /* not __i386__ */
static unsigned char prefix_codes[] = {
@@ -63,20 +63,20 @@ static unsigned char prefix_codes[] = {
static unsigned int reg_rop[] = {
0x8A, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F
};
-static unsigned int reg_wop[] = { 0x88, 0x89 };
+static unsigned int reg_wop[] = { 0x88, 0x89, 0xAA, 0xAB };
static unsigned int imm_wop[] = { 0xC6, 0xC7 };
-static unsigned int rw8[] = { 0xC6, 0x88, 0x8A };
+static unsigned int rw8[] = { 0xC6, 0x88, 0x8A, 0xAA };
static unsigned int rw32[] = {
- 0xC7, 0x89, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F
+ 0xC7, 0x89, 0x8B, 0xB60F, 0xB70F, 0xBE0F, 0xBF0F, 0xAB
};
/* 8 bit only */
-static unsigned int mw8[] = { 0xC6, 0x88, 0x8A, 0xB60F, 0xBE0F };
+static unsigned int mw8[] = { 0xC6, 0x88, 0x8A, 0xB60F, 0xBE0F, 0xAA };
/* 16 bit only */
static unsigned int mw16[] = { 0xB70F, 0xBF0F };
/* 16 or 32 bit */
static unsigned int mw32[] = { 0xC7 };
/* 16, 32 or 64 bit */
-static unsigned int mw64[] = { 0x89, 0x8B };
+static unsigned int mw64[] = { 0x89, 0x8B, 0xAB };
#endif /* not __i386__ */
struct prefix_bits {
@@ -410,7 +410,6 @@ static unsigned long *get_reg_w32(int no, struct pt_regs *regs)
unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs)
{
unsigned int opcode;
- unsigned char mod_rm;
int reg;
unsigned char *p;
struct prefix_bits prf;
@@ -437,8 +436,13 @@ unsigned long get_ins_reg_val(unsigned long ins_addr, struct pt_regs *regs)
goto err;
do_work:
- mod_rm = *p;
- reg = ((mod_rm >> 3) & 0x7) | (prf.rexr << 3);
+ /* for STOS, source register is fixed */
+ if (opcode == 0xAA || opcode == 0xAB) {
+ reg = arg_AX;
+ } else {
+ unsigned char mod_rm = *p;
+ reg = ((mod_rm >> 3) & 0x7) | (prf.rexr << 3);
+ }
switch (get_ins_reg_width(ins_addr)) {
case 1:
return *get_reg_w8(reg, prf.rex, regs);
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index b28d2f1253b..1ba67dc8006 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -634,6 +634,18 @@ static int __init ppro_init(char **cpu_type)
if (force_arch_perfmon && cpu_has_arch_perfmon)
return 0;
+ /*
+ * Documentation on identifying Intel processors by CPU family
+ * and model can be found in the Intel Software Developer's
+ * Manuals (SDM):
+ *
+ * http://www.intel.com/products/processor/manuals/
+ *
+ * As of May 2010 the documentation for this was in the:
+ * "Intel 64 and IA-32 Architectures Software Developer's
+ * Manual Volume 3B: System Programming Guide", "Table B-1
+ * CPUID Signature Values of DisplayFamily_DisplayModel".
+ */
switch (cpu_model) {
case 0 ... 2:
*cpu_type = "i386/ppro";
@@ -655,12 +667,12 @@ static int __init ppro_init(char **cpu_type)
case 15: case 23:
*cpu_type = "i386/core_2";
break;
+ case 0x1a:
case 0x2e:
- case 26:
spec = &op_arch_perfmon_spec;
*cpu_type = "i386/core_i7";
break;
- case 28:
+ case 0x1c:
*cpu_type = "i386/atom";
break;
default:
diff --git a/arch/xtensa/include/asm/local64.h b/arch/xtensa/include/asm/local64.h
new file mode 100644
index 00000000000..36c93b5cc23
--- /dev/null
+++ b/arch/xtensa/include/asm/local64.h
@@ -0,0 +1 @@
+#include <asm-generic/local64.h>
diff --git a/drivers/oprofile/event_buffer.c b/drivers/oprofile/event_buffer.c
index 5df60a6b677..dd87e86048b 100644
--- a/drivers/oprofile/event_buffer.c
+++ b/drivers/oprofile/event_buffer.c
@@ -135,7 +135,7 @@ static int event_buffer_open(struct inode *inode, struct file *file)
* echo 1 >/dev/oprofile/enable
*/
- return 0;
+ return nonseekable_open(inode, file);
fail:
dcookie_unregister(file->private_data);
@@ -205,4 +205,5 @@ const struct file_operations event_buffer_fops = {
.open = event_buffer_open,
.release = event_buffer_release,
.read = event_buffer_read,
+ .llseek = no_llseek,
};
diff --git a/fs/exec.c b/fs/exec.c
index e19de6a8033..97d91a03fb1 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -653,6 +653,7 @@ int setup_arg_pages(struct linux_binprm *bprm,
else
stack_base = vma->vm_start - stack_expand;
#endif
+ current->mm->start_stack = bprm->p;
ret = expand_stack(vma, stack_base);
if (ret)
ret = -EFAULT;
diff --git a/include/asm-generic/local64.h b/include/asm-generic/local64.h
new file mode 100644
index 00000000000..02ac760c1a8
--- /dev/null
+++ b/include/asm-generic/local64.h
@@ -0,0 +1,96 @@
+#ifndef _ASM_GENERIC_LOCAL64_H
+#define _ASM_GENERIC_LOCAL64_H
+
+#include <linux/percpu.h>
+#include <asm/types.h>
+
+/*
+ * A signed long type for operations which are atomic for a single CPU.
+ * Usually used in combination with per-cpu variables.
+ *
+ * This is the default implementation, which uses atomic64_t. Which is
+ * rather pointless. The whole point behind local64_t is that some processors
+ * can perform atomic adds and subtracts in a manner which is atomic wrt IRQs
+ * running on this CPU. local64_t allows exploitation of such capabilities.
+ */
+
+/* Implement in terms of atomics. */
+
+#if BITS_PER_LONG == 64
+
+#include <asm/local.h>
+
+typedef struct {
+ local_t a;
+} local64_t;
+
+#define LOCAL64_INIT(i) { LOCAL_INIT(i) }
+
+#define local64_read(l) local_read(&(l)->a)
+#define local64_set(l,i) local_set((&(l)->a),(i))
+#define local64_inc(l) local_inc(&(l)->a)
+#define local64_dec(l) local_dec(&(l)->a)
+#define local64_add(i,l) local_add((i),(&(l)->a))
+#define local64_sub(i,l) local_sub((i),(&(l)->a))
+
+#define local64_sub_and_test(i, l) local_sub_and_test((i), (&(l)->a))
+#define local64_dec_and_test(l) local_dec_and_test(&(l)->a)
+#define local64_inc_and_test(l) local_inc_and_test(&(l)->a)
+#define local64_add_negative(i, l) local_add_negative((i), (&(l)->a))
+#define local64_add_return(i, l) local_add_return((i), (&(l)->a))
+#define local64_sub_return(i, l) local_sub_return((i), (&(l)->a))
+#define local64_inc_return(l) local_inc_return(&(l)->a)
+
+#define local64_cmpxchg(l, o, n) local_cmpxchg((&(l)->a), (o), (n))
+#define local64_xchg(l, n) local_xchg((&(l)->a), (n))
+#define local64_add_unless(l, _a, u) local_add_unless((&(l)->a), (_a), (u))
+#define local64_inc_not_zero(l) local_inc_not_zero(&(l)->a)
+
+/* Non-atomic variants, ie. preemption disabled and won't be touched
+ * in interrupt, etc. Some archs can optimize this case well. */
+#define __local64_inc(l) local64_set((l), local64_read(l) + 1)
+#define __local64_dec(l) local64_set((l), local64_read(l) - 1)
+#define __local64_add(i,l) local64_set((l), local64_read(l) + (i))
+#define __local64_sub(i,l) local64_set((l), local64_read(l) - (i))
+
+#else /* BITS_PER_LONG != 64 */
+
+#include <asm/atomic.h>
+
+/* Don't use typedef: don't want them to be mixed with atomic_t's. */
+typedef struct {
+ atomic64_t a;
+} local64_t;
+
+#define LOCAL64_INIT(i) { ATOMIC_LONG_INIT(i) }
+
+#define local64_read(l) atomic64_read(&(l)->a)
+#define local64_set(l,i) atomic64_set((&(l)->a),(i))
+#define local64_inc(l) atomic64_inc(&(l)->a)
+#define local64_dec(l) atomic64_dec(&(l)->a)
+#define local64_add(i,l) atomic64_add((i),(&(l)->a))
+#define local64_sub(i,l) atomic64_sub((i),(&(l)->a))
+
+#define local64_sub_and_test(i, l) atomic64_sub_and_test((i), (&(l)->a))
+#define local64_dec_and_test(l) atomic64_dec_and_test(&(l)->a)
+#define local64_inc_and_test(l) atomic64_inc_and_test(&(l)->a)
+#define local64_add_negative(i, l) atomic64_add_negative((i), (&(l)->a))
+#define local64_add_return(i, l) atomic64_add_return((i), (&(l)->a))
+#define local64_sub_return(i, l) atomic64_sub_return((i), (&(l)->a))
+#define local64_inc_return(l) atomic64_inc_return(&(l)->a)
+
+#define local64_cmpxchg(l, o, n) atomic64_cmpxchg((&(l)->a), (o), (n))
+#define local64_xchg(l, n) atomic64_xchg((&(l)->a), (n))
+#define local64_add_unless(l, _a, u) atomic64_add_unless((&(l)->a), (_a), (u))
+#define local64_inc_not_zero(l) atomic64_inc_not_zero(&(l)->a)
+
+/* Non-atomic variants, ie. preemption disabled and won't be touched
+ * in interrupt, etc. Some archs can optimize this case well. */
+#define __local64_inc(l) local64_set((l), local64_read(l) + 1)
+#define __local64_dec(l) local64_set((l), local64_read(l) - 1)
+#define __local64_add(i,l) local64_set((l), local64_read(l) + (i))
+#define __local64_sub(i,l) local64_set((l), local64_read(l) - (i))
+
+#endif /* BITS_PER_LONG != 64 */
+
+#endif /* _ASM_GENERIC_LOCAL64_H */
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 4e7ae600205..8a92a170fb7 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -156,10 +156,6 @@
CPU_KEEP(exit.data) \
MEM_KEEP(init.data) \
MEM_KEEP(exit.data) \
- . = ALIGN(8); \
- VMLINUX_SYMBOL(__start___markers) = .; \
- *(__markers) \
- VMLINUX_SYMBOL(__stop___markers) = .; \
. = ALIGN(32); \
VMLINUX_SYMBOL(__start___tracepoints) = .; \
*(__tracepoints) \
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 41e46330d9b..dcd6a7c3a43 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -1,3 +1,8 @@
+/*
+ * Ftrace header. For implementation details beyond the random comments
+ * scattered below, see: Documentation/trace/ftrace-design.txt
+ */
+
#ifndef _LINUX_FTRACE_H
#define _LINUX_FTRACE_H
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 3167f2df412..02b8b24f8f5 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -11,8 +11,6 @@ struct trace_array;
struct tracer;
struct dentry;
-DECLARE_PER_CPU(struct trace_seq, ftrace_event_seq);
-
struct trace_print_flags {
unsigned long mask;
const char *name;
@@ -58,6 +56,9 @@ struct trace_iterator {
struct ring_buffer_iter *buffer_iter[NR_CPUS];
unsigned long iter_flags;
+ /* trace_seq for __print_flags() and __print_symbolic() etc. */
+ struct trace_seq tmp_seq;
+
/* The below is zeroed out in pipe_read */
struct trace_seq seq;
struct trace_entry *ent;
@@ -146,14 +147,19 @@ struct ftrace_event_class {
int (*raw_init)(struct ftrace_event_call *);
};
+extern int ftrace_event_reg(struct ftrace_event_call *event,
+ enum trace_reg type);
+
enum {
TRACE_EVENT_FL_ENABLED_BIT,
TRACE_EVENT_FL_FILTERED_BIT,
+ TRACE_EVENT_FL_RECORDED_CMD_BIT,
};
enum {
- TRACE_EVENT_FL_ENABLED = (1 << TRACE_EVENT_FL_ENABLED_BIT),
- TRACE_EVENT_FL_FILTERED = (1 << TRACE_EVENT_FL_FILTERED_BIT),
+ TRACE_EVENT_FL_ENABLED = (1 << TRACE_EVENT_FL_ENABLED_BIT),
+ TRACE_EVENT_FL_FILTERED = (1 << TRACE_EVENT_FL_FILTERED_BIT),
+ TRACE_EVENT_FL_RECORDED_CMD = (1 << TRACE_EVENT_FL_RECORDED_CMD_BIT),
};
struct ftrace_event_call {
@@ -171,6 +177,7 @@ struct ftrace_event_call {
* 32 bit flags:
* bit 1: enabled
* bit 2: filter_active
+ * bit 3: enabled cmd record
*
* Changes to flags must hold the event_mutex.
*
@@ -257,8 +264,7 @@ static inline void
perf_trace_buf_submit(void *raw_data, int size, int rctx, u64 addr,
u64 count, struct pt_regs *regs, void *head)
{
- perf_tp_event(addr, count, raw_data, size, regs, head);
- perf_swevent_put_recursion_context(rctx);
+ perf_tp_event(addr, count, raw_data, size, regs, head, rctx);
}
#endif
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 5de838b0fc1..38e462e0059 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -513,9 +513,6 @@ extern void tracing_start(void);
extern void tracing_stop(void);
extern void ftrace_off_permanent(void);
-extern void
-ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3);
-
static inline void __attribute__ ((format (printf, 1, 2)))
____trace_printk_check_format(const char *fmt, ...)
{
@@ -591,8 +588,6 @@ __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap);
extern void ftrace_dump(enum ftrace_dump_mode oops_dump_mode);
#else
-static inline void
-ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3) { }
static inline int
trace_printk(const char *fmt, ...) __attribute__ ((format (printf, 1, 2)));
diff --git a/include/linux/kmemtrace.h b/include/linux/kmemtrace.h
deleted file mode 100644
index b616d3930c3..00000000000
--- a/include/linux/kmemtrace.h
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Copyright (C) 2008 Eduard - Gabriel Munteanu
- *
- * This file is released under GPL version 2.
- */
-
-#ifndef _LINUX_KMEMTRACE_H
-#define _LINUX_KMEMTRACE_H
-
-#ifdef __KERNEL__
-
-#include <trace/events/kmem.h>
-
-#ifdef CONFIG_KMEMTRACE
-extern void kmemtrace_init(void);
-#else
-static inline void kmemtrace_init(void)
-{
-}
-#endif
-
-#endif /* __KERNEL__ */
-
-#endif /* _LINUX_KMEMTRACE_H */
-
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index b752e807add..06aab5eee13 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -20,10 +20,14 @@ extern void touch_nmi_watchdog(void);
extern void acpi_nmi_disable(void);
extern void acpi_nmi_enable(void);
#else
+#ifndef CONFIG_HARDLOCKUP_DETECTOR
static inline void touch_nmi_watchdog(void)
{
touch_softlockup_watchdog();
}
+#else
+extern void touch_nmi_watchdog(void);
+#endif
static inline void acpi_nmi_disable(void) { }
static inline void acpi_nmi_enable(void) { }
#endif
@@ -47,4 +51,13 @@ static inline bool trigger_all_cpu_backtrace(void)
}
#endif
+#ifdef CONFIG_LOCKUP_DETECTOR
+int hw_nmi_is_cpu_stuck(struct pt_regs *);
+u64 hw_nmi_get_sample_period(void);
+extern int watchdog_enabled;
+struct ctl_table;
+extern int proc_dowatchdog_enabled(struct ctl_table *, int ,
+ void __user *, size_t *, loff_t *);
+#endif
+
#endif
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 5d0266d9498..937495c2507 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -214,8 +214,9 @@ struct perf_event_attr {
* See also PERF_RECORD_MISC_EXACT_IP
*/
precise_ip : 2, /* skid constraint */
+ mmap_data : 1, /* non-exec mmap data */
- __reserved_1 : 47;
+ __reserved_1 : 46;
union {
__u32 wakeup_events; /* wakeup every n events */
@@ -461,6 +462,7 @@ enum perf_callchain_context {
#ifdef CONFIG_PERF_EVENTS
# include <asm/perf_event.h>
+# include <asm/local64.h>
#endif
struct perf_guest_info_callbacks {
@@ -531,14 +533,16 @@ struct hw_perf_event {
struct hrtimer hrtimer;
};
#ifdef CONFIG_HAVE_HW_BREAKPOINT
- /* breakpoint */
- struct arch_hw_breakpoint info;
+ struct { /* breakpoint */
+ struct arch_hw_breakpoint info;
+ struct list_head bp_list;
+ };
#endif
};
- atomic64_t prev_count;
+ local64_t prev_count;
u64 sample_period;
u64 last_period;
- atomic64_t period_left;
+ local64_t period_left;
u64 interrupts;
u64 freq_time_stamp;
@@ -548,7 +552,10 @@ struct hw_perf_event {
struct perf_event;
-#define PERF_EVENT_TXN_STARTED 1
+/*
+ * Common implementation detail of pmu::{start,commit,cancel}_txn
+ */
+#define PERF_EVENT_TXN 0x1
/**
* struct pmu - generic performance monitoring unit
@@ -562,14 +569,28 @@ struct pmu {
void (*unthrottle) (struct perf_event *event);
/*
- * group events scheduling is treated as a transaction,
- * add group events as a whole and perform one schedulability test.
- * If test fails, roll back the whole group
+ * Group events scheduling is treated as a transaction, add group
+ * events as a whole and perform one schedulability test. If the test
+ * fails, roll back the whole group
*/
+ /*
+ * Start the transaction, after this ->enable() doesn't need
+ * to do schedulability tests.
+ */
void (*start_txn) (const struct pmu *pmu);
- void (*cancel_txn) (const struct pmu *pmu);
+ /*
+ * If ->start_txn() disabled the ->enable() schedulability test
+ * then ->commit_txn() is required to perform one. On success
+ * the transaction is closed. On error the transaction is kept
+ * open until ->cancel_txn() is called.
+ */
int (*commit_txn) (const struct pmu *pmu);
+ /*
+ * Will cancel the transaction, assumes ->disable() is called for
+ * each successfull ->enable() during the transaction.
+ */
+ void (*cancel_txn) (const struct pmu *pmu);
};
/**
@@ -584,7 +605,9 @@ enum perf_event_active_state {
struct file;
-struct perf_mmap_data {
+#define PERF_BUFFER_WRITABLE 0x01
+
+struct perf_buffer {
atomic_t refcount;
struct rcu_head rcu_head;
#ifdef CONFIG_PERF_USE_VMALLOC
@@ -650,7 +673,8 @@ struct perf_event {
enum perf_event_active_state state;
unsigned int attach_state;
- atomic64_t count;
+ local64_t count;
+ atomic64_t child_count;
/*
* These are the total time in nanoseconds that the event
@@ -709,7 +733,7 @@ struct perf_event {
atomic_t mmap_count;
int mmap_locked;
struct user_struct *mmap_user;
- struct perf_mmap_data *data;
+ struct perf_buffer *buffer;
/* poll related */
wait_queue_head_t waitq;
@@ -807,7 +831,7 @@ struct perf_cpu_context {
struct perf_output_handle {
struct perf_event *event;
- struct perf_mmap_data *data;
+ struct perf_buffer *buffer;
unsigned long wakeup;
unsigned long size;
void *addr;
@@ -910,8 +934,10 @@ extern atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
extern void __perf_sw_event(u32, u64, int, struct pt_regs *, u64);
-extern void
-perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip);
+#ifndef perf_arch_fetch_caller_regs
+static inline void
+perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip) { }
+#endif
/*
* Take a snapshot of the regs. Skip ip and frame pointer to
@@ -921,31 +947,11 @@ perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip);
* - bp for callchains
* - eflags, for future purposes, just in case
*/
-static inline void perf_fetch_caller_regs(struct pt_regs *regs, int skip)
+static inline void perf_fetch_caller_regs(struct pt_regs *regs)
{
- unsigned long ip;
-
memset(regs, 0, sizeof(*regs));
- switch (skip) {
- case 1 :
- ip = CALLER_ADDR0;
- break;
- case 2 :
- ip = CALLER_ADDR1;
- break;
- case 3 :
- ip = CALLER_ADDR2;
- break;
- case 4:
- ip = CALLER_ADDR3;
- break;
- /* No need to support further for now */
- default:
- ip = 0;
- }
-
- return perf_arch_fetch_caller_regs(regs, ip, skip);
+ perf_arch_fetch_caller_regs(regs, CALLER_ADDR0);
}
static inline void
@@ -955,21 +961,14 @@ perf_sw_event(u32 event_id, u64 nr, int nmi, struct pt_regs *regs, u64 addr)
struct pt_regs hot_regs;
if (!regs) {
- perf_fetch_caller_regs(&hot_regs, 1);
+ perf_fetch_caller_regs(&hot_regs);
regs = &hot_regs;
}
__perf_sw_event(event_id, nr, nmi, regs, addr);
}
}
-extern void __perf_event_mmap(struct vm_area_struct *vma);
-
-static inline void perf_event_mmap(struct vm_area_struct *vma)
-{
- if (vma->vm_flags & VM_EXEC)
- __perf_event_mmap(vma);
-}
-
+extern void perf_event_mmap(struct vm_area_struct *vma);
extern struct perf_guest_info_callbacks *perf_guest_cbs;
extern int perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *callbacks);
@@ -1001,7 +1000,7 @@ static inline bool perf_paranoid_kernel(void)
extern void perf_event_init(void);
extern void perf_tp_event(u64 addr, u64 count, void *record,
int entry_size, struct pt_regs *regs,
- struct hlist_head *head);
+ struct hlist_head *head, int rctx);
extern void perf_bp_event(struct perf_event *event, void *data);
#ifndef perf_misc_flags
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 0478888c689..3992f50de61 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -316,20 +316,16 @@ extern void scheduler_tick(void);
extern void sched_show_task(struct task_struct *p);
-#ifdef CONFIG_DETECT_SOFTLOCKUP
-extern void softlockup_tick(void);
+#ifdef CONFIG_LOCKUP_DETECTOR
extern void touch_softlockup_watchdog(void);
extern void touch_softlockup_watchdog_sync(void);
extern void touch_all_softlockup_watchdogs(void);
-extern int proc_dosoftlockup_thresh(struct ctl_table *table, int write,
- void __user *buffer,
- size_t *lenp, loff_t *ppos);
+extern int proc_dowatchdog_thresh(struct ctl_table *table, int write,
+ void __user *buffer,
+ size_t *lenp, loff_t *ppos);
extern unsigned int softlockup_panic;
extern int softlockup_thresh;
#else
-static inline void softlockup_tick(void)
-{
-}
static inline void touch_softlockup_watchdog(void)
{
}
@@ -2435,18 +2431,6 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
#endif /* CONFIG_SMP */
-#ifdef CONFIG_TRACING
-extern void
-__trace_special(void *__tr, void *__data,
- unsigned long arg1, unsigned long arg2, unsigned long arg3);
-#else
-static inline void
-__trace_special(void *__tr, void *__data,
- unsigned long arg1, unsigned long arg2, unsigned long arg3)
-{
-}
-#endif
-
extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index 1812dac8c49..1acfa73ce2a 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -14,7 +14,8 @@
#include <asm/page.h> /* kmalloc_sizes.h needs PAGE_SIZE */
#include <asm/cache.h> /* kmalloc_sizes.h needs L1_CACHE_BYTES */
#include <linux/compiler.h>
-#include <linux/kmemtrace.h>
+
+#include <trace/events/kmem.h>
#ifndef ARCH_KMALLOC_MINALIGN
/*
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 4ba59cfc1f7..6447a723ecb 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -10,9 +10,10 @@
#include <linux/gfp.h>
#include <linux/workqueue.h>
#include <linux/kobject.h>
-#include <linux/kmemtrace.h>
#include <linux/kmemleak.h>
+#include <trace/events/kmem.h>
+
enum stat_item {
ALLOC_FASTPATH, /* Allocation from cpu slab */
ALLOC_SLOWPATH, /* Allocation by getting a new cpu slab */
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 13ebb5413a7..a6bfd1367d2 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -167,7 +167,6 @@ extern struct trace_event_functions exit_syscall_print_funcs;
.enter_event = &event_enter_##sname, \
.exit_event = &event_exit_##sname, \
.enter_fields = LIST_HEAD_INIT(__syscall_meta_##sname.enter_fields), \
- .exit_fields = LIST_HEAD_INIT(__syscall_meta_##sname.exit_fields), \
};
#define SYSCALL_DEFINE0(sname) \
@@ -182,7 +181,6 @@ extern struct trace_event_functions exit_syscall_print_funcs;
.enter_event = &event_enter__##sname, \
.exit_event = &event_exit__##sname, \
.enter_fields = LIST_HEAD_INIT(__syscall_meta__##sname.enter_fields), \
- .exit_fields = LIST_HEAD_INIT(__syscall_meta__##sname.exit_fields), \
}; \
asmlinkage long sys_##sname(void)
#else
diff --git a/include/trace/boot.h b/include/trace/boot.h
deleted file mode 100644
index 088ea089e31..00000000000
--- a/include/trace/boot.h
+++ /dev/null
@@ -1,60 +0,0 @@
-#ifndef _LINUX_TRACE_BOOT_H
-#define _LINUX_TRACE_BOOT_H
-
-#include <linux/module.h>
-#include <linux/kallsyms.h>
-#include <linux/init.h>
-
-/*
- * Structure which defines the trace of an initcall
- * while it is called.
- * You don't have to fill the func field since it is
- * only used internally by the tracer.
- */
-struct boot_trace_call {
- pid_t caller;
- char func[KSYM_SYMBOL_LEN];
-};
-
-/*
- * Structure which defines the trace of an initcall
- * while it returns.
- */
-struct boot_trace_ret {
- char func[KSYM_SYMBOL_LEN];
- int result;
- unsigned long long duration; /* nsecs */
-};
-
-#ifdef CONFIG_BOOT_TRACER
-/* Append the traces on the ring-buffer */
-extern void trace_boot_call(struct boot_trace_call *bt, initcall_t fn);
-extern void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn);
-
-/* Tells the tracer that smp_pre_initcall is finished.
- * So we can start the tracing
- */
-extern void start_boot_trace(void);
-
-/* Resume the tracing of other necessary events
- * such as sched switches
- */
-extern void enable_boot_trace(void);
-
-/* Suspend this tracing. Actually, only sched_switches tracing have
- * to be suspended. Initcalls doesn't need it.)
- */
-extern void disable_boot_trace(void);
-#else
-static inline
-void trace_boot_call(struct boot_trace_call *bt, initcall_t fn) { }
-
-static inline
-void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn) { }
-
-static inline void start_boot_trace(void) { }
-static inline void enable_boot_trace(void) { }
-static inline void disable_boot_trace(void) { }
-#endif /* CONFIG_BOOT_TRACER */
-
-#endif /* __LINUX_TRACE_BOOT_H */
diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h
index b9e1dd6c620..9208c92aeab 100644
--- a/include/trace/events/sched.h
+++ b/include/trace/events/sched.h
@@ -50,31 +50,6 @@ TRACE_EVENT(sched_kthread_stop_ret,
);
/*
- * Tracepoint for waiting on task to unschedule:
- */
-TRACE_EVENT(sched_wait_task,
-
- TP_PROTO(struct task_struct *p),
-
- TP_ARGS(p),
-
- TP_STRUCT__entry(
- __array( char, comm, TASK_COMM_LEN )
- __field( pid_t, pid )
- __field( int, prio )
- ),
-
- TP_fast_assign(
- memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
- __entry->pid = p->pid;
- __entry->prio = p->prio;
- ),
-
- TP_printk("comm=%s pid=%d prio=%d",
- __entry->comm, __entry->pid, __entry->prio)
-);
-
-/*
* Tracepoint for waking up a task:
*/
DECLARE_EVENT_CLASS(sched_wakeup_template,
@@ -240,6 +215,13 @@ DEFINE_EVENT(sched_process_template, sched_process_exit,
TP_ARGS(p));
/*
+ * Tracepoint for waiting on task to unschedule:
+ */
+DEFINE_EVENT(sched_process_template, sched_wait_task,
+ TP_PROTO(struct task_struct *p),
+ TP_ARGS(p));
+
+/*
* Tracepoint for a waiting task:
*/
TRACE_EVENT(sched_process_wait,
diff --git a/include/trace/events/timer.h b/include/trace/events/timer.h
index 9496b965d62..c624126a9c8 100644
--- a/include/trace/events/timer.h
+++ b/include/trace/events/timer.h
@@ -8,11 +8,7 @@
#include <linux/hrtimer.h>
#include <linux/timer.h>
-/**
- * timer_init - called when the timer is initialized
- * @timer: pointer to struct timer_list
- */
-TRACE_EVENT(timer_init,
+DECLARE_EVENT_CLASS(timer_class,
TP_PROTO(struct timer_list *timer),
@@ -30,6 +26,17 @@ TRACE_EVENT(timer_init,
);
/**
+ * timer_init - called when the timer is initialized
+ * @timer: pointer to struct timer_list
+ */
+DEFINE_EVENT(timer_class, timer_init,
+
+ TP_PROTO(struct timer_list *timer),
+
+ TP_ARGS(timer)
+);
+
+/**
* timer_start - called when the timer is started
* @timer: pointer to struct timer_list
* @expires: the timers expiry time
@@ -94,42 +101,22 @@ TRACE_EVENT(timer_expire_entry,
* NOTE: Do NOT derefernce timer in TP_fast_assign. The pointer might
* be invalid. We solely track the pointer.
*/
-TRACE_EVENT(timer_expire_exit,
+DEFINE_EVENT(timer_class, timer_expire_exit,
TP_PROTO(struct timer_list *timer),
- TP_ARGS(timer),
-
- TP_STRUCT__entry(
- __field(void *, timer )
- ),
-
- TP_fast_assign(
- __entry->timer = timer;
- ),
-
- TP_printk("timer=%p", __entry->timer)
+ TP_ARGS(timer)
);
/**
* timer_cancel - called when the timer is canceled
* @timer: pointer to struct timer_list
*/
-TRACE_EVENT(timer_cancel,
+DEFINE_EVENT(timer_class, timer_cancel,
TP_PROTO(struct timer_list *timer),
- TP_ARGS(timer),
-
- TP_STRUCT__entry(
- __field( void *, timer )
- ),
-
- TP_fast_assign(
- __entry->timer = timer;
- ),
-
- TP_printk("timer=%p", __entry->timer)
+ TP_ARGS(timer)
);
/**
@@ -224,14 +211,7 @@ TRACE_EVENT(hrtimer_expire_entry,
(unsigned long long)ktime_to_ns((ktime_t) { .tv64 = __entry->now }))
);
-/**
- * hrtimer_expire_exit - called immediately after the hrtimer callback returns
- * @timer: pointer to struct hrtimer
- *
- * When used in combination with the hrtimer_expire_entry tracepoint we can
- * determine the runtime of the callback function.
- */
-TRACE_EVENT(hrtimer_expire_exit,
+DECLARE_EVENT_CLASS(hrtimer_class,
TP_PROTO(struct hrtimer *hrtimer),
@@ -249,24 +229,28 @@ TRACE_EVENT(hrtimer_expire_exit,
);
/**
- * hrtimer_cancel - called when the hrtimer is canceled
- * @hrtimer: pointer to struct hrtimer
+ * hrtimer_expire_exit - called immediately after the hrtimer callback returns
+ * @timer: pointer to struct hrtimer
+ *
+ * When used in combination with the hrtimer_expire_entry tracepoint we can
+ * determine the runtime of the callback function.
*/
-TRACE_EVENT(hrtimer_cancel,
+DEFINE_EVENT(hrtimer_class, hrtimer_expire_exit,
TP_PROTO(struct hrtimer *hrtimer),
- TP_ARGS(hrtimer),
+ TP_ARGS(hrtimer)
+);
- TP_STRUCT__entry(
- __field( void *, hrtimer )
- ),
+/**
+ * hrtimer_cancel - called when the hrtimer is canceled
+ * @hrtimer: pointer to struct hrtimer
+ */
+DEFINE_EVENT(hrtimer_class, hrtimer_cancel,
- TP_fast_assign(
- __entry->hrtimer = hrtimer;
- ),
+ TP_PROTO(struct hrtimer *hrtimer),
- TP_printk("hrtimer=%p", __entry->hrtimer)
+ TP_ARGS(hrtimer)
);
/**
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 5a64905d727..a9377c0083a 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -75,15 +75,12 @@
#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \
DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
-#undef __cpparg
-#define __cpparg(arg...) arg
-
/* Callbacks are meaningless to ftrace. */
#undef TRACE_EVENT_FN
#define TRACE_EVENT_FN(name, proto, args, tstruct, \
assign, print, reg, unreg) \
- TRACE_EVENT(name, __cpparg(proto), __cpparg(args), \
- __cpparg(tstruct), __cpparg(assign), __cpparg(print)) \
+ TRACE_EVENT(name, PARAMS(proto), PARAMS(args), \
+ PARAMS(tstruct), PARAMS(assign), PARAMS(print)) \
#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
@@ -145,7 +142,7 @@
* struct trace_seq *s = &iter->seq;
* struct ftrace_raw_<call> *field; <-- defined in stage 1
* struct trace_entry *entry;
- * struct trace_seq *p;
+ * struct trace_seq *p = &iter->tmp_seq;
* int ret;
*
* entry = iter->ent;
@@ -157,12 +154,10 @@
*
* field = (typeof(field))entry;
*
- * p = &get_cpu_var(ftrace_event_seq);
* trace_seq_init(p);
* ret = trace_seq_printf(s, "%s: ", <call>);
* if (ret)
* ret = trace_seq_printf(s, <TP_printk> "\n");
- * put_cpu();
* if (!ret)
* return TRACE_TYPE_PARTIAL_LINE;
*
@@ -216,7 +211,7 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags, \
struct trace_seq *s = &iter->seq; \
struct ftrace_raw_##call *field; \
struct trace_entry *entry; \
- struct trace_seq *p; \
+ struct trace_seq *p = &iter->tmp_seq; \
int ret; \
\
event = container_of(trace_event, struct ftrace_event_call, \
@@ -231,12 +226,10 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags, \
\
field = (typeof(field))entry; \
\
- p = &get_cpu_var(ftrace_event_seq); \
trace_seq_init(p); \
ret = trace_seq_printf(s, "%s: ", event->name); \
if (ret) \
ret = trace_seq_printf(s, print); \
- put_cpu(); \
if (!ret) \
return TRACE_TYPE_PARTIAL_LINE; \
\
@@ -255,7 +248,7 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags, \
struct trace_seq *s = &iter->seq; \
struct ftrace_raw_##template *field; \
struct trace_entry *entry; \
- struct trace_seq *p; \
+ struct trace_seq *p = &iter->tmp_seq; \
int ret; \
\
entry = iter->ent; \
@@ -267,12 +260,10 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags, \
\
field = (typeof(field))entry; \
\
- p = &get_cpu_var(ftrace_event_seq); \
trace_seq_init(p); \
ret = trace_seq_printf(s, "%s: ", #call); \
if (ret) \
ret = trace_seq_printf(s, print); \
- put_cpu(); \
if (!ret) \
return TRACE_TYPE_PARTIAL_LINE; \
\
@@ -439,6 +430,7 @@ static inline notrace int ftrace_get_offsets_##call( \
* .fields = LIST_HEAD_INIT(event_class_##call.fields),
* .raw_init = trace_event_raw_init,
* .probe = ftrace_raw_event_##call,
+ * .reg = ftrace_event_reg,
* };
*
* static struct ftrace_event_call __used
@@ -567,6 +559,7 @@ static struct ftrace_event_class __used event_class_##call = { \
.fields = LIST_HEAD_INIT(event_class_##call.fields),\
.raw_init = trace_event_raw_init, \
.probe = ftrace_raw_event_##call, \
+ .reg = ftrace_event_reg, \
_TRACE_PERF_INIT(call) \
};
@@ -705,7 +698,7 @@ perf_trace_##call(void *__data, proto) \
int __data_size; \
int rctx; \
\
- perf_fetch_caller_regs(&__regs, 1); \
+ perf_fetch_caller_regs(&__regs); \
\
__data_size = ftrace_get_offsets_##call(&__data_offsets, args); \
__entry_size = ALIGN(__data_size + sizeof(*entry) + sizeof(u32),\
diff --git a/include/trace/syscall.h b/include/trace/syscall.h
index 257e08960d7..31966a4fb8c 100644
--- a/include/trace/syscall.h
+++ b/include/trace/syscall.h
@@ -26,7 +26,6 @@ struct syscall_metadata {
const char **types;
const char **args;
struct list_head enter_fields;
- struct list_head exit_fields;
struct ftrace_event_call *enter_event;
struct ftrace_event_call *exit_event;
diff --git a/init/main.c b/init/main.c
index 4ddb53f04f2..b03a4c1f69f 100644
--- a/init/main.c
+++ b/init/main.c
@@ -66,11 +66,9 @@
#include <linux/ftrace.h>
#include <linux/async.h>
#include <linux/kmemcheck.h>
-#include <linux/kmemtrace.h>
#include <linux/sfi.h>
#include <linux/shmem_fs.h>
#include <linux/slab.h>
-#include <trace/boot.h>
#include <asm/io.h>
#include <asm/bugs.h>
@@ -664,7 +662,6 @@ asmlinkage void __init start_kernel(void)
#endif
page_cgroup_init();
enable_debug_pagealloc();
- kmemtrace_init();
kmemleak_init();
debug_objects_mem_init();
idr_init_cache();
@@ -726,38 +723,33 @@ int initcall_debug;
core_param(initcall_debug, initcall_debug, bool, 0644);
static char msgbuf[64];
-static struct boot_trace_call call;
-static struct boot_trace_ret ret;
int do_one_initcall(initcall_t fn)
{
int count = preempt_count();
ktime_t calltime, delta, rettime;
+ unsigned long long duration;
+ int ret;
if (initcall_debug) {
- call.caller = task_pid_nr(current);
- printk("calling %pF @ %i\n", fn, call.caller);
+ printk("calling %pF @ %i\n", fn, task_pid_nr(current));
calltime = ktime_get();
- trace_boot_call(&call, fn);
- enable_boot_trace();
}
- ret.result = fn();
+ ret = fn();
if (initcall_debug) {
- disable_boot_trace();
rettime = ktime_get();
delta = ktime_sub(rettime, calltime);
- ret.duration = (unsigned long long) ktime_to_ns(delta) >> 10;
- trace_boot_ret(&ret, fn);
- printk("initcall %pF returned %d after %Ld usecs\n", fn,
- ret.result, ret.duration);
+ duration = (unsigned long long) ktime_to_ns(delta) >> 10;
+ printk("initcall %pF returned %d after %lld usecs\n", fn,
+ ret, duration);
}
msgbuf[0] = 0;
- if (ret.result && ret.result != -ENODEV && initcall_debug)
- sprintf(msgbuf, "error code %d ", ret.result);
+ if (ret && ret != -ENODEV && initcall_debug)
+ sprintf(msgbuf, "error code %d ", ret);
if (preempt_count() != count) {
strlcat(msgbuf, "preemption imbalance ", sizeof(msgbuf));
@@ -771,7 +763,7 @@ int do_one_initcall(initcall_t fn)
printk("initcall %pF returned with %s\n", fn, msgbuf);
}
- return ret.result;
+ return ret;
}
@@ -895,7 +887,6 @@ static int __init kernel_init(void * unused)
smp_prepare_cpus(setup_max_cpus);
do_pre_smp_initcalls();
- start_boot_trace();
smp_init();
sched_init_smp();
diff --git a/kernel/Makefile b/kernel/Makefile
index 057472fbc27..ce53fb2bd1d 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -76,8 +76,8 @@ obj-$(CONFIG_GCOV_KERNEL) += gcov/
obj-$(CONFIG_AUDIT_TREE) += audit_tree.o
obj-$(CONFIG_KPROBES) += kprobes.o
obj-$(CONFIG_KGDB) += debug/
-obj-$(CONFIG_DETECT_SOFTLOCKUP) += softlockup.o
obj-$(CONFIG_DETECT_HUNG_TASK) += hung_task.o
+obj-$(CONFIG_LOCKUP_DETECTOR) += watchdog.o
obj-$(CONFIG_GENERIC_HARDIRQS) += irq/
obj-$(CONFIG_SECCOMP) += seccomp.o
obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c
index 71ed3ce29e1..d71a987fd2b 100644
--- a/kernel/hw_breakpoint.c
+++ b/kernel/hw_breakpoint.c
@@ -41,6 +41,7 @@
#include <linux/sched.h>
#include <linux/init.h>
#include <linux/slab.h>
+#include <linux/list.h>
#include <linux/cpu.h>
#include <linux/smp.h>
@@ -62,6 +63,9 @@ static DEFINE_PER_CPU(unsigned int, nr_bp_flexible[TYPE_MAX]);
static int nr_slots[TYPE_MAX];
+/* Keep track of the breakpoints attached to tasks */
+static LIST_HEAD(bp_task_head);
+
static int constraints_initialized;
/* Gather the number of total pinned and un-pinned bp in a cpuset */
@@ -103,33 +107,21 @@ static unsigned int max_task_bp_pinned(int cpu, enum bp_type_idx type)
return 0;
}
-static int task_bp_pinned(struct task_struct *tsk, enum bp_type_idx type)
+/*
+ * Count the number of breakpoints of the same type and same task.
+ * The given event must be not on the list.
+ */
+static int task_bp_pinned(struct perf_event *bp, enum bp_type_idx type)
{
- struct perf_event_context *ctx = tsk->perf_event_ctxp;
- struct list_head *list;
- struct perf_event *bp;
- unsigned long flags;
+ struct perf_event_context *ctx = bp->ctx;
+ struct perf_event *iter;
int count = 0;
- if (WARN_ONCE(!ctx, "No perf context for this task"))
- return 0;
-
- list = &ctx->event_list;
-
- raw_spin_lock_irqsave(&ctx->lock, flags);
-
- /*
- * The current breakpoint counter is not included in the list
- * at the open() callback time
- */
- list_for_each_entry(bp, list, event_entry) {
- if (bp->attr.type == PERF_TYPE_BREAKPOINT)
- if (find_slot_idx(bp) == type)
- count += hw_breakpoint_weight(bp);
+ list_for_each_entry(iter, &bp_task_head, hw.bp_list) {
+ if (iter->ctx == ctx && find_slot_idx(iter) == type)
+ count += hw_breakpoint_weight(iter);
}
- raw_spin_unlock_irqrestore(&ctx->lock, flags);
-
return count;
}
@@ -149,7 +141,7 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp,
if (!tsk)
slots->pinned += max_task_bp_pinned(cpu, type);
else
- slots->pinned += task_bp_pinned(tsk, type);
+ slots->pinned += task_bp_pinned(bp, type);
slots->flexible = per_cpu(nr_bp_flexible[type], cpu);
return;
@@ -162,7 +154,7 @@ fetch_bp_busy_slots(struct bp_busy_slots *slots, struct perf_event *bp,
if (!tsk)
nr += max_task_bp_pinned(cpu, type);
else
- nr += task_bp_pinned(tsk, type);
+ nr += task_bp_pinned(bp, type);
if (nr > slots->pinned)
slots->pinned = nr;
@@ -188,7 +180,7 @@ fetch_this_slot(struct bp_busy_slots *slots, int weight)
/*
* Add a pinned breakpoint for the given task in our constraint table
*/
-static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable,
+static void toggle_bp_task_slot(struct perf_event *bp, int cpu, bool enable,
enum bp_type_idx type, int weight)
{
unsigned int *tsk_pinned;
@@ -196,10 +188,11 @@ static void toggle_bp_task_slot(struct task_struct *tsk, int cpu, bool enable,
int old_idx = 0;
int idx = 0;
- old_count = task_bp_pinned(tsk, type);
+ old_count = task_bp_pinned(bp, type);
old_idx = old_count - 1;
idx = old_idx + weight;
+ /* tsk_pinned[n] is the number of tasks having n breakpoints */
tsk_pinned = per_cpu(nr_task_bp_pinned[type], cpu);
if (enable) {
tsk_pinned[idx]++;
@@ -222,23 +215,30 @@ toggle_bp_slot(struct perf_event *bp, bool enable, enum bp_type_idx type,
int cpu = bp->cpu;
struct task_struct *tsk = bp->ctx->task;
+ /* Pinned counter cpu profiling */
+ if (!tsk) {
+
+ if (enable)
+ per_cpu(nr_cpu_bp_pinned[type], bp->cpu) += weight;
+ else
+ per_cpu(nr_cpu_bp_pinned[type], bp->cpu) -= weight;
+ return;
+ }
+
/* Pinned counter task profiling */
- if (tsk) {
- if (cpu >= 0) {
- toggle_bp_task_slot(tsk, cpu, enable, type, weight);
- return;
- }
+ if (!enable)
+ list_del(&bp->hw.bp_list);
+
+ if (cpu >= 0) {
+ toggle_bp_task_slot(bp, cpu, enable, type, weight);
+ } else {
for_each_online_cpu(cpu)
- toggle_bp_task_slot(tsk, cpu, enable, type, weight);
- return;
+ toggle_bp_task_slot(bp, cpu, enable, type, weight);
}
- /* Pinned counter cpu profiling */
if (enable)
- per_cpu(nr_cpu_bp_pinned[type], bp->cpu) += weight;
- else
- per_cpu(nr_cpu_bp_pinned[type], bp->cpu) -= weight;
+ list_add_tail(&bp->hw.bp_list, &bp_task_head);
}
/*
@@ -312,6 +312,10 @@ static int __reserve_bp_slot(struct perf_event *bp)
weight = hw_breakpoint_weight(bp);
fetch_bp_busy_slots(&slots, bp, type);
+ /*
+ * Simulate the addition of this breakpoint to the constraints
+ * and see the result.
+ */
fetch_this_slot(&slots, weight);
/* Flexible counters need to keep at least one slot */
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index ff86c558af4..c772a3d4000 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -675,7 +675,6 @@ group_sched_in(struct perf_event *group_event,
struct perf_event *event, *partial_group = NULL;
const struct pmu *pmu = group_event->pmu;
bool txn = false;
- int ret;
if (group_event->state == PERF_EVENT_STATE_OFF)
return 0;
@@ -703,14 +702,8 @@ group_sched_in(struct perf_event *group_event,
}
}
- if (!txn)
- return 0;
-
- ret = pmu->commit_txn(pmu);
- if (!ret) {
- pmu->cancel_txn(pmu);
+ if (!txn || !pmu->commit_txn(pmu))
return 0;
- }
group_error:
/*
@@ -1155,9 +1148,9 @@ static void __perf_event_sync_stat(struct perf_event *event,
* In order to keep per-task stats reliable we need to flip the event
* values when we flip the contexts.
*/
- value = atomic64_read(&next_event->count);
- value = atomic64_xchg(&event->count, value);
- atomic64_set(&next_event->count, value);
+ value = local64_read(&next_event->count);
+ value = local64_xchg(&event->count, value);
+ local64_set(&next_event->count, value);
swap(event->total_time_enabled, next_event->total_time_enabled);
swap(event->total_time_running, next_event->total_time_running);
@@ -1547,10 +1540,10 @@ static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
hwc->sample_period = sample_period;
- if (atomic64_read(&hwc->period_left) > 8*sample_period) {
+ if (local64_read(&hwc->period_left) > 8*sample_period) {
perf_disable();
perf_event_stop(event);
- atomic64_set(&hwc->period_left, 0);
+ local64_set(&hwc->period_left, 0);
perf_event_start(event);
perf_enable();
}
@@ -1591,7 +1584,7 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
perf_disable();
event->pmu->read(event);
- now = atomic64_read(&event->count);
+ now = local64_read(&event->count);
delta = now - hwc->freq_count_stamp;
hwc->freq_count_stamp = now;
@@ -1743,6 +1736,11 @@ static void __perf_event_read(void *info)
event->pmu->read(event);
}
+static inline u64 perf_event_count(struct perf_event *event)
+{
+ return local64_read(&event->count) + atomic64_read(&event->child_count);
+}
+
static u64 perf_event_read(struct perf_event *event)
{
/*
@@ -1762,7 +1760,7 @@ static u64 perf_event_read(struct perf_event *event)
raw_spin_unlock_irqrestore(&ctx->lock, flags);
}
- return atomic64_read(&event->count);
+ return perf_event_count(event);
}
/*
@@ -1883,7 +1881,7 @@ static void free_event_rcu(struct rcu_head *head)
}
static void perf_pending_sync(struct perf_event *event);
-static void perf_mmap_data_put(struct perf_mmap_data *data);
+static void perf_buffer_put(struct perf_buffer *buffer);
static void free_event(struct perf_event *event)
{
@@ -1891,7 +1889,7 @@ static void free_event(struct perf_event *event)
if (!event->parent) {
atomic_dec(&nr_events);
- if (event->attr.mmap)
+ if (event->attr.mmap || event->attr.mmap_data)
atomic_dec(&nr_mmap_events);
if (event->attr.comm)
atomic_dec(&nr_comm_events);
@@ -1899,9 +1897,9 @@ static void free_event(struct perf_event *event)
atomic_dec(&nr_task_events);
}
- if (event->data) {
- perf_mmap_data_put(event->data);
- event->data = NULL;
+ if (event->buffer) {
+ perf_buffer_put(event->buffer);
+ event->buffer = NULL;
}
if (event->destroy)
@@ -2126,13 +2124,13 @@ perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
static unsigned int perf_poll(struct file *file, poll_table *wait)
{
struct perf_event *event = file->private_data;
- struct perf_mmap_data *data;
+ struct perf_buffer *buffer;
unsigned int events = POLL_HUP;
rcu_read_lock();
- data = rcu_dereference(event->data);
- if (data)
- events = atomic_xchg(&data->poll, 0);
+ buffer = rcu_dereference(event->buffer);
+ if (buffer)
+ events = atomic_xchg(&buffer->poll, 0);
rcu_read_unlock();
poll_wait(file, &event->waitq, wait);
@@ -2143,7 +2141,7 @@ static unsigned int perf_poll(struct file *file, poll_table *wait)
static void perf_event_reset(struct perf_event *event)
{
(void)perf_event_read(event);
- atomic64_set(&event->count, 0);
+ local64_set(&event->count, 0);
perf_event_update_userpage(event);
}
@@ -2342,14 +2340,14 @@ static int perf_event_index(struct perf_event *event)
void perf_event_update_userpage(struct perf_event *event)
{
struct perf_event_mmap_page *userpg;
- struct perf_mmap_data *data;
+ struct perf_buffer *buffer;
rcu_read_lock();
- data = rcu_dereference(event->data);
- if (!data)
+ buffer = rcu_dereference(event->buffer);
+ if (!buffer)
goto unlock;
- userpg = data->user_page;
+ userpg = buffer->user_page;
/*
* Disable preemption so as to not let the corresponding user-space
@@ -2359,9 +2357,9 @@ void perf_event_update_userpage(struct perf_event *event)
++userpg->lock;
barrier();
userpg->index = perf_event_index(event);
- userpg->offset = atomic64_read(&event->count);
+ userpg->offset = perf_event_count(event);
if (event->state == PERF_EVENT_STATE_ACTIVE)
- userpg->offset -= atomic64_read(&event->hw.prev_count);
+ userpg->offset -= local64_read(&event->hw.prev_count);
userpg->time_enabled = event->total_time_enabled +
atomic64_read(&event->child_total_time_enabled);
@@ -2376,6 +2374,25 @@ unlock:
rcu_read_unlock();
}
+static unsigned long perf_data_size(struct perf_buffer *buffer);
+
+static void
+perf_buffer_init(struct perf_buffer *buffer, long watermark, int flags)
+{
+ long max_size = perf_data_size(buffer);
+
+ if (watermark)
+ buffer->watermark = min(max_size, watermark);
+
+ if (!buffer->watermark)
+ buffer->watermark = max_size / 2;
+
+ if (flags & PERF_BUFFER_WRITABLE)
+ buffer->writable = 1;
+
+ atomic_set(&buffer->refcount, 1);
+}
+
#ifndef CONFIG_PERF_USE_VMALLOC
/*
@@ -2383,15 +2400,15 @@ unlock:
*/
static struct page *
-perf_mmap_to_page(struct perf_mmap_data *data, unsigned long pgoff)
+perf_mmap_to_page(struct perf_buffer *buffer, unsigned long pgoff)
{
- if (pgoff > data->nr_pages)
+ if (pgoff > buffer->nr_pages)
return NULL;
if (pgoff == 0)
- return virt_to_page(data->user_page);
+ return virt_to_page(buffer->user_page);
- return virt_to_page(data->data_pages[pgoff - 1]);
+ return virt_to_page(buffer->data_pages[pgoff - 1]);
}
static void *perf_mmap_alloc_page(int cpu)
@@ -2407,42 +2424,44 @@ static void *perf_mmap_alloc_page(int cpu)
return page_address(page);
}
-static struct perf_mmap_data *
-perf_mmap_data_alloc(struct perf_event *event, int nr_pages)
+static struct perf_buffer *
+perf_buffer_alloc(int nr_pages, long watermark, int cpu, int flags)
{
- struct perf_mmap_data *data;
+ struct perf_buffer *buffer;
unsigned long size;
int i;
- size = sizeof(struct perf_mmap_data);
+ size = sizeof(struct perf_buffer);
size += nr_pages * sizeof(void *);
- data = kzalloc(size, GFP_KERNEL);
- if (!data)
+ buffer = kzalloc(size, GFP_KERNEL);
+ if (!buffer)
goto fail;
- data->user_page = perf_mmap_alloc_page(event->cpu);
- if (!data->user_page)
+ buffer->user_page = perf_mmap_alloc_page(cpu);
+ if (!buffer->user_page)
goto fail_user_page;
for (i = 0; i < nr_pages; i++) {
- data->data_pages[i] = perf_mmap_alloc_page(event->cpu);
- if (!data->data_pages[i])
+ buffer->data_pages[i] = perf_mmap_alloc_page(cpu);
+ if (!buffer->data_pages[i])
goto fail_data_pages;
}
- data->nr_pages = nr_pages;
+ buffer->nr_pages = nr_pages;
+
+ perf_buffer_init(buffer, watermark, flags);
- return data;
+ return buffer;
fail_data_pages:
for (i--; i >= 0; i--)
- free_page((unsigned long)data->data_pages[i]);
+ free_page((unsigned long)buffer->data_pages[i]);
- free_page((unsigned long)data->user_page);
+ free_page((unsigned long)buffer->user_page);
fail_user_page:
- kfree(data);
+ kfree(buffer);
fail:
return NULL;
@@ -2456,17 +2475,17 @@ static void perf_mmap_free_page(unsigned long addr)
__free_page(page);
}
-static void perf_mmap_data_free(struct perf_mmap_data *data)
+static void perf_buffer_free(struct perf_buffer *buffer)
{
int i;
- perf_mmap_free_page((unsigned long)data->user_page);
- for (i = 0; i < data->nr_pages; i++)
- perf_mmap_free_page((unsigned long)data->data_pages[i]);
- kfree(data);
+ perf_mmap_free_page((unsigned long)buffer->user_page);
+ for (i = 0; i < buffer->nr_pages; i++)
+ perf_mmap_free_page((unsigned long)buffer->data_pages[i]);
+ kfree(buffer);
}
-static inline int page_order(struct perf_mmap_data *data)
+static inline int page_order(struct perf_buffer *buffer)
{
return 0;
}
@@ -2479,18 +2498,18 @@ static inline int page_order(struct perf_mmap_data *data)
* Required for architectures that have d-cache aliasing issues.
*/
-static inline int page_order(struct perf_mmap_data *data)
+static inline int page_order(struct perf_buffer *buffer)
{
- return data->page_order;
+ return buffer->page_order;
}
static struct page *
-perf_mmap_to_page(struct perf_mmap_data *data, unsigned long pgoff)
+perf_mmap_to_page(struct perf_buffer *buffer, unsigned long pgoff)
{
- if (pgoff > (1UL << page_order(data)))
+ if (pgoff > (1UL << page_order(buffer)))
return NULL;
- return vmalloc_to_page((void *)data->user_page + pgoff * PAGE_SIZE);
+ return vmalloc_to_page((void *)buffer->user_page + pgoff * PAGE_SIZE);
}
static void perf_mmap_unmark_page(void *addr)
@@ -2500,57 +2519,59 @@ static void perf_mmap_unmark_page(void *addr)
page->mapping = NULL;
}
-static void perf_mmap_data_free_work(struct work_struct *work)
+static void perf_buffer_free_work(struct work_struct *work)
{
- struct perf_mmap_data *data;
+ struct perf_buffer *buffer;
void *base;
int i, nr;
- data = container_of(work, struct perf_mmap_data, work);
- nr = 1 << page_order(data);
+ buffer = container_of(work, struct perf_buffer, work);
+ nr = 1 << page_order(buffer);
- base = data->user_page;
+ base = buffer->user_page;
for (i = 0; i < nr + 1; i++)
perf_mmap_unmark_page(base + (i * PAGE_SIZE));
vfree(base);
- kfree(data);
+ kfree(buffer);
}
-static void perf_mmap_data_free(struct perf_mmap_data *data)
+static void perf_buffer_free(struct perf_buffer *buffer)
{
- schedule_work(&data->work);
+ schedule_work(&buffer->work);
}
-static struct perf_mmap_data *
-perf_mmap_data_alloc(struct perf_event *event, int nr_pages)
+static struct perf_buffer *
+perf_buffer_alloc(int nr_pages, long watermark, int cpu, int flags)
{
- struct perf_mmap_data *data;
+ struct perf_buffer *buffer;
unsigned long size;
void *all_buf;
- size = sizeof(struct perf_mmap_data);
+ size = sizeof(struct perf_buffer);
size += sizeof(void *);
- data = kzalloc(size, GFP_KERNEL);
- if (!data)
+ buffer = kzalloc(size, GFP_KERNEL);
+ if (!buffer)
goto fail;
- INIT_WORK(&data->work, perf_mmap_data_free_work);
+ INIT_WORK(&buffer->work, perf_buffer_free_work);
all_buf = vmalloc_user((nr_pages + 1) * PAGE_SIZE);
if (!all_buf)
goto fail_all_buf;
- data->user_page = all_buf;
- data->data_pages[0] = all_buf + PAGE_SIZE;
- data->page_order = ilog2(nr_pages);
- data->nr_pages = 1;
+ buffer->user_page = all_buf;
+ buffer->data_pages[0] = all_buf + PAGE_SIZE;
+ buffer->page_order = ilog2(nr_pages);
+ buffer->nr_pages = 1;
+
+ perf_buffer_init(buffer, watermark, flags);
- return data;
+ return buffer;
fail_all_buf:
- kfree(data);
+ kfree(buffer);
fail:
return NULL;
@@ -2558,15 +2579,15 @@ fail:
#endif
-static unsigned long perf_data_size(struct perf_mmap_data *data)
+static unsigned long perf_data_size(struct perf_buffer *buffer)
{
- return data->nr_pages << (PAGE_SHIFT + page_order(data));
+ return buffer->nr_pages << (PAGE_SHIFT + page_order(buffer));
}
static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
struct perf_event *event = vma->vm_file->private_data;
- struct perf_mmap_data *data;
+ struct perf_buffer *buffer;
int ret = VM_FAULT_SIGBUS;
if (vmf->flags & FAULT_FLAG_MKWRITE) {
@@ -2576,14 +2597,14 @@ static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
}
rcu_read_lock();
- data = rcu_dereference(event->data);
- if (!data)
+ buffer = rcu_dereference(event->buffer);
+ if (!buffer)
goto unlock;
if (vmf->pgoff && (vmf->flags & FAULT_FLAG_WRITE))
goto unlock;
- vmf->page = perf_mmap_to_page(data, vmf->pgoff);
+ vmf->page = perf_mmap_to_page(buffer, vmf->pgoff);
if (!vmf->page)
goto unlock;
@@ -2598,52 +2619,35 @@ unlock:
return ret;
}
-static void
-perf_mmap_data_init(struct perf_event *event, struct perf_mmap_data *data)
-{
- long max_size = perf_data_size(data);
-
- if (event->attr.watermark) {
- data->watermark = min_t(long, max_size,
- event->attr.wakeup_watermark);
- }
-
- if (!data->watermark)
- data->watermark = max_size / 2;
-
- atomic_set(&data->refcount, 1);
- rcu_assign_pointer(event->data, data);
-}
-
-static void perf_mmap_data_free_rcu(struct rcu_head *rcu_head)
+static void perf_buffer_free_rcu(struct rcu_head *rcu_head)
{
- struct perf_mmap_data *data;
+ struct perf_buffer *buffer;
- data = container_of(rcu_head, struct perf_mmap_data, rcu_head);
- perf_mmap_data_free(data);
+ buffer = container_of(rcu_head, struct perf_buffer, rcu_head);
+ perf_buffer_free(buffer);
}
-static struct perf_mmap_data *perf_mmap_data_get(struct perf_event *event)
+static struct perf_buffer *perf_buffer_get(struct perf_event *event)
{
- struct perf_mmap_data *data;
+ struct perf_buffer *buffer;
rcu_read_lock();
- data = rcu_dereference(event->data);
- if (data) {
- if (!atomic_inc_not_zero(&data->refcount))
- data = NULL;
+ buffer = rcu_dereference(event->buffer);
+ if (buffer) {
+ if (!atomic_inc_not_zero(&buffer->refcount))
+ buffer = NULL;
}
rcu_read_unlock();
- return data;
+ return buffer;
}
-static void perf_mmap_data_put(struct perf_mmap_data *data)
+static void perf_buffer_put(struct perf_buffer *buffer)
{
- if (!atomic_dec_and_test(&data->refcount))
+ if (!atomic_dec_and_test(&buffer->refcount))
return;
- call_rcu(&data->rcu_head, perf_mmap_data_free_rcu);
+ call_rcu(&buffer->rcu_head, perf_buffer_free_rcu);
}
static void perf_mmap_open(struct vm_area_struct *vma)
@@ -2658,16 +2662,16 @@ static void perf_mmap_close(struct vm_area_struct *vma)
struct perf_event *event = vma->vm_file->private_data;
if (atomic_dec_and_mutex_lock(&event->mmap_count, &event->mmap_mutex)) {
- unsigned long size = perf_data_size(event->data);
+ unsigned long size = perf_data_size(event->buffer);
struct user_struct *user = event->mmap_user;
- struct perf_mmap_data *data = event->data;
+ struct perf_buffer *buffer = event->buffer;
atomic_long_sub((size >> PAGE_SHIFT) + 1, &user->locked_vm);
vma->vm_mm->locked_vm -= event->mmap_locked;
- rcu_assign_pointer(event->data, NULL);
+ rcu_assign_pointer(event->buffer, NULL);
mutex_unlock(&event->mmap_mutex);
- perf_mmap_data_put(data);
+ perf_buffer_put(buffer);
free_uid(user);
}
}
@@ -2685,11 +2689,11 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
unsigned long user_locked, user_lock_limit;
struct user_struct *user = current_user();
unsigned long locked, lock_limit;
- struct perf_mmap_data *data;
+ struct perf_buffer *buffer;
unsigned long vma_size;
unsigned long nr_pages;
long user_extra, extra;
- int ret = 0;
+ int ret = 0, flags = 0;
/*
* Don't allow mmap() of inherited per-task counters. This would
@@ -2706,7 +2710,7 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
nr_pages = (vma_size / PAGE_SIZE) - 1;
/*
- * If we have data pages ensure they're a power-of-two number, so we
+ * If we have buffer pages ensure they're a power-of-two number, so we
* can do bitmasks instead of modulo.
*/
if (nr_pages != 0 && !is_power_of_2(nr_pages))
@@ -2720,9 +2724,9 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
WARN_ON_ONCE(event->ctx->parent_ctx);
mutex_lock(&event->mmap_mutex);
- if (event->data) {
- if (event->data->nr_pages == nr_pages)
- atomic_inc(&event->data->refcount);
+ if (event->buffer) {
+ if (event->buffer->nr_pages == nr_pages)
+ atomic_inc(&event->buffer->refcount);
else
ret = -EINVAL;
goto unlock;
@@ -2752,17 +2756,18 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
goto unlock;
}
- WARN_ON(event->data);
+ WARN_ON(event->buffer);
+
+ if (vma->vm_flags & VM_WRITE)
+ flags |= PERF_BUFFER_WRITABLE;
- data = perf_mmap_data_alloc(event, nr_pages);
- if (!data) {
+ buffer = perf_buffer_alloc(nr_pages, event->attr.wakeup_watermark,
+ event->cpu, flags);
+ if (!buffer) {
ret = -ENOMEM;
goto unlock;
}
-
- perf_mmap_data_init(event, data);
- if (vma->vm_flags & VM_WRITE)
- event->data->writable = 1;
+ rcu_assign_pointer(event->buffer, buffer);
atomic_long_add(user_extra, &user->locked_vm);
event->mmap_locked = extra;
@@ -2941,11 +2946,6 @@ __weak struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
return NULL;
}
-__weak
-void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip)
-{
-}
-
/*
* We assume there is only KVM supporting the callbacks.
@@ -2971,15 +2971,15 @@ EXPORT_SYMBOL_GPL(perf_unregister_guest_info_callbacks);
/*
* Output
*/
-static bool perf_output_space(struct perf_mmap_data *data, unsigned long tail,
+static bool perf_output_space(struct perf_buffer *buffer, unsigned long tail,
unsigned long offset, unsigned long head)
{
unsigned long mask;
- if (!data->writable)
+ if (!buffer->writable)
return true;
- mask = perf_data_size(data) - 1;
+ mask = perf_data_size(buffer) - 1;
offset = (offset - tail) & mask;
head = (head - tail) & mask;
@@ -2992,7 +2992,7 @@ static bool perf_output_space(struct perf_mmap_data *data, unsigned long tail,
static void perf_output_wakeup(struct perf_output_handle *handle)
{
- atomic_set(&handle->data->poll, POLL_IN);
+ atomic_set(&handle->buffer->poll, POLL_IN);
if (handle->nmi) {
handle->event->pending_wakeup = 1;
@@ -3012,45 +3012,45 @@ static void perf_output_wakeup(struct perf_output_handle *handle)
*/
static void perf_output_get_handle(struct perf_output_handle *handle)
{
- struct perf_mmap_data *data = handle->data;
+ struct perf_buffer *buffer = handle->buffer;
preempt_disable();
- local_inc(&data->nest);
- handle->wakeup = local_read(&data->wakeup);
+ local_inc(&buffer->nest);
+ handle->wakeup = local_read(&buffer->wakeup);
}
static void perf_output_put_handle(struct perf_output_handle *handle)
{
- struct perf_mmap_data *data = handle->data;
+ struct perf_buffer *buffer = handle->buffer;
unsigned long head;
again:
- head = local_read(&data->head);
+ head = local_read(&buffer->head);
/*
* IRQ/NMI can happen here, which means we can miss a head update.
*/
- if (!local_dec_and_test(&data->nest))
+ if (!local_dec_and_test(&buffer->nest))
goto out;
/*
* Publish the known good head. Rely on the full barrier implied
- * by atomic_dec_and_test() order the data->head read and this
+ * by atomic_dec_and_test() order the buffer->head read and this
* write.
*/
- data->user_page->data_head = head;
+ buffer->user_page->data_head = head;
/*
* Now check if we missed an update, rely on the (compiler)
- * barrier in atomic_dec_and_test() to re-read data->head.
+ * barrier in atomic_dec_and_test() to re-read buffer->head.
*/
- if (unlikely(head != local_read(&data->head))) {
- local_inc(&data->nest);
+ if (unlikely(head != local_read(&buffer->head))) {
+ local_inc(&buffer->nest);
goto again;
}
- if (handle->wakeup != local_read(&data->wakeup))
+ if (handle->wakeup != local_read(&buffer->wakeup))
perf_output_wakeup(handle);
out:
@@ -3070,12 +3070,12 @@ __always_inline void perf_output_copy(struct perf_output_handle *handle,
buf += size;
handle->size -= size;
if (!handle->size) {
- struct perf_mmap_data *data = handle->data;
+ struct perf_buffer *buffer = handle->buffer;
handle->page++;
- handle->page &= data->nr_pages - 1;
- handle->addr = data->data_pages[handle->page];
- handle->size = PAGE_SIZE << page_order(data);
+ handle->page &= buffer->nr_pages - 1;
+ handle->addr = buffer->data_pages[handle->page];
+ handle->size = PAGE_SIZE << page_order(buffer);
}
} while (len);
}
@@ -3084,7 +3084,7 @@ int perf_output_begin(struct perf_output_handle *handle,
struct perf_event *event, unsigned int size,
int nmi, int sample)
{
- struct perf_mmap_data *data;
+ struct perf_buffer *buffer;
unsigned long tail, offset, head;
int have_lost;
struct {
@@ -3100,19 +3100,19 @@ int perf_output_begin(struct perf_output_handle *handle,
if (event->parent)
event = event->parent;
- data = rcu_dereference(event->data);
- if (!data)
+ buffer = rcu_dereference(event->buffer);
+ if (!buffer)
goto out;
- handle->data = data;
+ handle->buffer = buffer;
handle->event = event;
handle->nmi = nmi;
handle->sample = sample;
- if (!data->nr_pages)
+ if (!buffer->nr_pages)
goto out;
- have_lost = local_read(&data->lost);
+ have_lost = local_read(&buffer->lost);
if (have_lost)
size += sizeof(lost_event);
@@ -3124,30 +3124,30 @@ int perf_output_begin(struct perf_output_handle *handle,
* tail pointer. So that all reads will be completed before the
* write is issued.
*/
- tail = ACCESS_ONCE(data->user_page->data_tail);
+ tail = ACCESS_ONCE(buffer->user_page->data_tail);
smp_rmb();
- offset = head = local_read(&data->head);
+ offset = head = local_read(&buffer->head);
head += size;
- if (unlikely(!perf_output_space(data, tail, offset, head)))
+ if (unlikely(!perf_output_space(buffer, tail, offset, head)))
goto fail;
- } while (local_cmpxchg(&data->head, offset, head) != offset);
+ } while (local_cmpxchg(&buffer->head, offset, head) != offset);
- if (head - local_read(&data->wakeup) > data->watermark)
- local_add(data->watermark, &data->wakeup);
+ if (head - local_read(&buffer->wakeup) > buffer->watermark)
+ local_add(buffer->watermark, &buffer->wakeup);
- handle->page = offset >> (PAGE_SHIFT + page_order(data));
- handle->page &= data->nr_pages - 1;
- handle->size = offset & ((PAGE_SIZE << page_order(data)) - 1);
- handle->addr = data->data_pages[handle->page];
+ handle->page = offset >> (PAGE_SHIFT + page_order(buffer));
+ handle->page &= buffer->nr_pages - 1;
+ handle->size = offset & ((PAGE_SIZE << page_order(buffer)) - 1);
+ handle->addr = buffer->data_pages[handle->page];
handle->addr += handle->size;
- handle->size = (PAGE_SIZE << page_order(data)) - handle->size;
+ handle->size = (PAGE_SIZE << page_order(buffer)) - handle->size;
if (have_lost) {
lost_event.header.type = PERF_RECORD_LOST;
lost_event.header.misc = 0;
lost_event.header.size = sizeof(lost_event);
lost_event.id = event->id;
- lost_event.lost = local_xchg(&data->lost, 0);
+ lost_event.lost = local_xchg(&buffer->lost, 0);
perf_output_put(handle, lost_event);
}
@@ -3155,7 +3155,7 @@ int perf_output_begin(struct perf_output_handle *handle,
return 0;
fail:
- local_inc(&data->lost);
+ local_inc(&buffer->lost);
perf_output_put_handle(handle);
out:
rcu_read_unlock();
@@ -3166,15 +3166,15 @@ out:
void perf_output_end(struct perf_output_handle *handle)
{
struct perf_event *event = handle->event;
- struct perf_mmap_data *data = handle->data;
+ struct perf_buffer *buffer = handle->buffer;
int wakeup_events = event->attr.wakeup_events;
if (handle->sample && wakeup_events) {
- int events = local_inc_return(&data->events);
+ int events = local_inc_return(&buffer->events);
if (events >= wakeup_events) {
- local_sub(wakeup_events, &data->events);
- local_inc(&data->wakeup);
+ local_sub(wakeup_events, &buffer->events);
+ local_inc(&buffer->wakeup);
}
}
@@ -3211,7 +3211,7 @@ static void perf_output_read_one(struct perf_output_handle *handle,
u64 values[4];
int n = 0;
- values[n++] = atomic64_read(&event->count);
+ values[n++] = perf_event_count(event);
if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
values[n++] = event->total_time_enabled +
atomic64_read(&event->child_total_time_enabled);
@@ -3248,7 +3248,7 @@ static void perf_output_read_group(struct perf_output_handle *handle,
if (leader != event)
leader->pmu->read(leader);
- values[n++] = atomic64_read(&leader->count);
+ values[n++] = perf_event_count(leader);
if (read_format & PERF_FORMAT_ID)
values[n++] = primary_event_id(leader);
@@ -3260,7 +3260,7 @@ static void perf_output_read_group(struct perf_output_handle *handle,
if (sub != event)
sub->pmu->read(sub);
- values[n++] = atomic64_read(&sub->count);
+ values[n++] = perf_event_count(sub);
if (read_format & PERF_FORMAT_ID)
values[n++] = primary_event_id(sub);
@@ -3491,7 +3491,7 @@ perf_event_read_event(struct perf_event *event,
/*
* task tracking -- fork/exit
*
- * enabled by: attr.comm | attr.mmap | attr.task
+ * enabled by: attr.comm | attr.mmap | attr.mmap_data | attr.task
*/
struct perf_task_event {
@@ -3541,7 +3541,8 @@ static int perf_event_task_match(struct perf_event *event)
if (event->cpu != -1 && event->cpu != smp_processor_id())
return 0;
- if (event->attr.comm || event->attr.mmap || event->attr.task)
+ if (event->attr.comm || event->attr.mmap ||
+ event->attr.mmap_data || event->attr.task)
return 1;
return 0;
@@ -3766,7 +3767,8 @@ static void perf_event_mmap_output(struct perf_event *event,
}
static int perf_event_mmap_match(struct perf_event *event,
- struct perf_mmap_event *mmap_event)
+ struct perf_mmap_event *mmap_event,
+ int executable)
{
if (event->state < PERF_EVENT_STATE_INACTIVE)
return 0;
@@ -3774,19 +3776,21 @@ static int perf_event_mmap_match(struct perf_event *event,
if (event->cpu != -1 && event->cpu != smp_processor_id())
return 0;
- if (event->attr.mmap)
+ if ((!executable && event->attr.mmap_data) ||
+ (executable && event->attr.mmap))
return 1;
return 0;
}
static void perf_event_mmap_ctx(struct perf_event_context *ctx,
- struct perf_mmap_event *mmap_event)
+ struct perf_mmap_event *mmap_event,
+ int executable)
{
struct perf_event *event;
list_for_each_entry_rcu(event, &ctx->event_list, event_entry) {
- if (perf_event_mmap_match(event, mmap_event))
+ if (perf_event_mmap_match(event, mmap_event, executable))
perf_event_mmap_output(event, mmap_event);
}
}
@@ -3830,6 +3834,14 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event)
if (!vma->vm_mm) {
name = strncpy(tmp, "[vdso]", sizeof(tmp));
goto got_name;
+ } else if (vma->vm_start <= vma->vm_mm->start_brk &&
+ vma->vm_end >= vma->vm_mm->brk) {
+ name = strncpy(tmp, "[heap]", sizeof(tmp));
+ goto got_name;
+ } else if (vma->vm_start <= vma->vm_mm->start_stack &&
+ vma->vm_end >= vma->vm_mm->start_stack) {
+ name = strncpy(tmp, "[stack]", sizeof(tmp));
+ goto got_name;
}
name = strncpy(tmp, "//anon", sizeof(tmp));
@@ -3846,17 +3858,17 @@ got_name:
rcu_read_lock();
cpuctx = &get_cpu_var(perf_cpu_context);
- perf_event_mmap_ctx(&cpuctx->ctx, mmap_event);
+ perf_event_mmap_ctx(&cpuctx->ctx, mmap_event, vma->vm_flags & VM_EXEC);
ctx = rcu_dereference(current->perf_event_ctxp);
if (ctx)
- perf_event_mmap_ctx(ctx, mmap_event);
+ perf_event_mmap_ctx(ctx, mmap_event, vma->vm_flags & VM_EXEC);
put_cpu_var(perf_cpu_context);
rcu_read_unlock();
kfree(buf);
}
-void __perf_event_mmap(struct vm_area_struct *vma)
+void perf_event_mmap(struct vm_area_struct *vma)
{
struct perf_mmap_event mmap_event;
@@ -4018,14 +4030,14 @@ static u64 perf_swevent_set_period(struct perf_event *event)
hwc->last_period = hwc->sample_period;
again:
- old = val = atomic64_read(&hwc->period_left);
+ old = val = local64_read(&hwc->period_left);
if (val < 0)
return 0;
nr = div64_u64(period + val, period);
offset = nr * period;
val -= offset;
- if (atomic64_cmpxchg(&hwc->period_left, old, val) != old)
+ if (local64_cmpxchg(&hwc->period_left, old, val) != old)
goto again;
return nr;
@@ -4064,7 +4076,7 @@ static void perf_swevent_add(struct perf_event *event, u64 nr,
{
struct hw_perf_event *hwc = &event->hw;
- atomic64_add(nr, &event->count);
+ local64_add(nr, &event->count);
if (!regs)
return;
@@ -4075,7 +4087,7 @@ static void perf_swevent_add(struct perf_event *event, u64 nr,
if (nr == 1 && hwc->sample_period == 1 && !event->attr.freq)
return perf_swevent_overflow(event, 1, nmi, data, regs);
- if (atomic64_add_negative(nr, &hwc->period_left))
+ if (local64_add_negative(nr, &hwc->period_left))
return;
perf_swevent_overflow(event, 0, nmi, data, regs);
@@ -4213,14 +4225,12 @@ int perf_swevent_get_recursion_context(void)
}
EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context);
-void perf_swevent_put_recursion_context(int rctx)
+void inline perf_swevent_put_recursion_context(int rctx)
{
struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
barrier();
cpuctx->recursion[rctx]--;
}
-EXPORT_SYMBOL_GPL(perf_swevent_put_recursion_context);
-
void __perf_sw_event(u32 event_id, u64 nr, int nmi,
struct pt_regs *regs, u64 addr)
@@ -4368,8 +4378,8 @@ static void cpu_clock_perf_event_update(struct perf_event *event)
u64 now;
now = cpu_clock(cpu);
- prev = atomic64_xchg(&event->hw.prev_count, now);
- atomic64_add(now - prev, &event->count);
+ prev = local64_xchg(&event->hw.prev_count, now);
+ local64_add(now - prev, &event->count);
}
static int cpu_clock_perf_event_enable(struct perf_event *event)
@@ -4377,7 +4387,7 @@ static int cpu_clock_perf_event_enable(struct perf_event *event)
struct hw_perf_event *hwc = &event->hw;
int cpu = raw_smp_processor_id();
- atomic64_set(&hwc->prev_count, cpu_clock(cpu));
+ local64_set(&hwc->prev_count, cpu_clock(cpu));
perf_swevent_start_hrtimer(event);
return 0;
@@ -4409,9 +4419,9 @@ static void task_clock_perf_event_update(struct perf_event *event, u64 now)
u64 prev;
s64 delta;
- prev = atomic64_xchg(&event->hw.prev_count, now);
+ prev = local64_xchg(&event->hw.prev_count, now);
delta = now - prev;
- atomic64_add(delta, &event->count);
+ local64_add(delta, &event->count);
}
static int task_clock_perf_event_enable(struct perf_event *event)
@@ -4421,7 +4431,7 @@ static int task_clock_perf_event_enable(struct perf_event *event)
now = event->ctx->time;
- atomic64_set(&hwc->prev_count, now);
+ local64_set(&hwc->prev_count, now);
perf_swevent_start_hrtimer(event);
@@ -4601,7 +4611,7 @@ static int perf_tp_event_match(struct perf_event *event,
}
void perf_tp_event(u64 addr, u64 count, void *record, int entry_size,
- struct pt_regs *regs, struct hlist_head *head)
+ struct pt_regs *regs, struct hlist_head *head, int rctx)
{
struct perf_sample_data data;
struct perf_event *event;
@@ -4615,12 +4625,12 @@ void perf_tp_event(u64 addr, u64 count, void *record, int entry_size,
perf_sample_data_init(&data, addr);
data.raw = &raw;
- rcu_read_lock();
hlist_for_each_entry_rcu(event, node, head, hlist_entry) {
if (perf_tp_event_match(event, &data, regs))
perf_swevent_add(event, count, 1, &data, regs);
}
- rcu_read_unlock();
+
+ perf_swevent_put_recursion_context(rctx);
}
EXPORT_SYMBOL_GPL(perf_tp_event);
@@ -4864,7 +4874,7 @@ perf_event_alloc(struct perf_event_attr *attr,
hwc->sample_period = 1;
hwc->last_period = hwc->sample_period;
- atomic64_set(&hwc->period_left, hwc->sample_period);
+ local64_set(&hwc->period_left, hwc->sample_period);
/*
* we currently do not support PERF_FORMAT_GROUP on inherited events
@@ -4913,7 +4923,7 @@ done:
if (!event->parent) {
atomic_inc(&nr_events);
- if (event->attr.mmap)
+ if (event->attr.mmap || event->attr.mmap_data)
atomic_inc(&nr_mmap_events);
if (event->attr.comm)
atomic_inc(&nr_comm_events);
@@ -5007,7 +5017,7 @@ err_size:
static int
perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
{
- struct perf_mmap_data *data = NULL, *old_data = NULL;
+ struct perf_buffer *buffer = NULL, *old_buffer = NULL;
int ret = -EINVAL;
if (!output_event)
@@ -5037,19 +5047,19 @@ set:
if (output_event) {
/* get the buffer we want to redirect to */
- data = perf_mmap_data_get(output_event);
- if (!data)
+ buffer = perf_buffer_get(output_event);
+ if (!buffer)
goto unlock;
}
- old_data = event->data;
- rcu_assign_pointer(event->data, data);
+ old_buffer = event->buffer;
+ rcu_assign_pointer(event->buffer, buffer);
ret = 0;
unlock:
mutex_unlock(&event->mmap_mutex);
- if (old_data)
- perf_mmap_data_put(old_data);
+ if (old_buffer)
+ perf_buffer_put(old_buffer);
out:
return ret;
}
@@ -5298,7 +5308,7 @@ inherit_event(struct perf_event *parent_event,
hwc->sample_period = sample_period;
hwc->last_period = sample_period;
- atomic64_set(&hwc->period_left, sample_period);
+ local64_set(&hwc->period_left, sample_period);
}
child_event->overflow_handler = parent_event->overflow_handler;
@@ -5359,12 +5369,12 @@ static void sync_child_event(struct perf_event *child_event,
if (child_event->attr.inherit_stat)
perf_event_read_event(child_event, child);
- child_val = atomic64_read(&child_event->count);
+ child_val = perf_event_count(child_event);
/*
* Add back the child's count to the parent's count:
*/
- atomic64_add(child_val, &parent_event->count);
+ atomic64_add(child_val, &parent_event->child_count);
atomic64_add(child_event->total_time_enabled,
&parent_event->child_total_time_enabled);
atomic64_add(child_event->total_time_running,
diff --git a/kernel/sched.c b/kernel/sched.c
index f52a8801b7a..265cf3a2b5d 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3726,7 +3726,7 @@ int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner)
* off of preempt_enable. Kernel preemptions off return from interrupt
* occur there and call schedule directly.
*/
-asmlinkage void __sched preempt_schedule(void)
+asmlinkage void __sched notrace preempt_schedule(void)
{
struct thread_info *ti = current_thread_info();
@@ -3738,9 +3738,9 @@ asmlinkage void __sched preempt_schedule(void)
return;
do {
- add_preempt_count(PREEMPT_ACTIVE);
+ add_preempt_count_notrace(PREEMPT_ACTIVE);
schedule();
- sub_preempt_count(PREEMPT_ACTIVE);
+ sub_preempt_count_notrace(PREEMPT_ACTIVE);
/*
* Check again in case we missed a preemption opportunity
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
deleted file mode 100644
index 4b493f67dcb..00000000000
--- a/kernel/softlockup.c
+++ /dev/null
@@ -1,293 +0,0 @@
-/*
- * Detect Soft Lockups
- *
- * started by Ingo Molnar, Copyright (C) 2005, 2006 Red Hat, Inc.
- *
- * this code detects soft lockups: incidents in where on a CPU
- * the kernel does not reschedule for 10 seconds or more.
- */
-#include <linux/mm.h>
-#include <linux/cpu.h>
-#include <linux/nmi.h>
-#include <linux/init.h>
-#include <linux/delay.h>
-#include <linux/freezer.h>
-#include <linux/kthread.h>
-#include <linux/lockdep.h>
-#include <linux/notifier.h>
-#include <linux/module.h>
-#include <linux/sysctl.h>
-
-#include <asm/irq_regs.h>
-
-static DEFINE_SPINLOCK(print_lock);
-
-static DEFINE_PER_CPU(unsigned long, softlockup_touch_ts); /* touch timestamp */
-static DEFINE_PER_CPU(unsigned long, softlockup_print_ts); /* print timestamp */
-static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog);
-static DEFINE_PER_CPU(bool, softlock_touch_sync);
-
-static int __read_mostly did_panic;
-int __read_mostly softlockup_thresh = 60;
-
-/*
- * Should we panic (and reboot, if panic_timeout= is set) when a
- * soft-lockup occurs:
- */
-unsigned int __read_mostly softlockup_panic =
- CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC_VALUE;
-
-static int __init softlockup_panic_setup(char *str)
-{
- softlockup_panic = simple_strtoul(str, NULL, 0);
-
- return 1;
-}
-__setup("softlockup_panic=", softlockup_panic_setup);
-
-static int
-softlock_panic(struct notifier_block *this, unsigned long event, void *ptr)
-{
- did_panic = 1;
-
- return NOTIFY_DONE;
-}
-
-static struct notifier_block panic_block = {
- .notifier_call = softlock_panic,
-};
-
-/*
- * Returns seconds, approximately. We don't need nanosecond
- * resolution, and we don't need to waste time with a big divide when
- * 2^30ns == 1.074s.
- */
-static unsigned long get_timestamp(int this_cpu)
-{
- return cpu_clock(this_cpu) >> 30LL; /* 2^30 ~= 10^9 */
-}
-
-static void __touch_softlockup_watchdog(void)
-{
- int this_cpu = raw_smp_processor_id();
-
- __raw_get_cpu_var(softlockup_touch_ts) = get_timestamp(this_cpu);
-}
-
-void touch_softlockup_watchdog(void)
-{
- __raw_get_cpu_var(softlockup_touch_ts) = 0;
-}
-EXPORT_SYMBOL(touch_softlockup_watchdog);
-
-void touch_softlockup_watchdog_sync(void)
-{
- __raw_get_cpu_var(softlock_touch_sync) = true;
- __raw_get_cpu_var(softlockup_touch_ts) = 0;
-}
-
-void touch_all_softlockup_watchdogs(void)
-{
- int cpu;
-
- /* Cause each CPU to re-update its timestamp rather than complain */
- for_each_online_cpu(cpu)
- per_cpu(softlockup_touch_ts, cpu) = 0;
-}
-EXPORT_SYMBOL(touch_all_softlockup_watchdogs);
-
-int proc_dosoftlockup_thresh(struct ctl_table *table, int write,
- void __user *buffer,
- size_t *lenp, loff_t *ppos)
-{
- touch_all_softlockup_watchdogs();
- return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
-}
-
-/*
- * This callback runs from the timer interrupt, and checks
- * whether the watchdog thread has hung or not:
- */
-void softlockup_tick(void)
-{
- int this_cpu = smp_processor_id();
- unsigned long touch_ts = per_cpu(softlockup_touch_ts, this_cpu);
- unsigned long print_ts;
- struct pt_regs *regs = get_irq_regs();
- unsigned long now;
-
- /* Is detection switched off? */
- if (!per_cpu(softlockup_watchdog, this_cpu) || softlockup_thresh <= 0) {
- /* Be sure we don't false trigger if switched back on */
- if (touch_ts)
- per_cpu(softlockup_touch_ts, this_cpu) = 0;
- return;
- }
-
- if (touch_ts == 0) {
- if (unlikely(per_cpu(softlock_touch_sync, this_cpu))) {
- /*
- * If the time stamp was touched atomically
- * make sure the scheduler tick is up to date.
- */
- per_cpu(softlock_touch_sync, this_cpu) = false;
- sched_clock_tick();
- }
- __touch_softlockup_watchdog();
- return;
- }
-
- print_ts = per_cpu(softlockup_print_ts, this_cpu);
-
- /* report at most once a second */
- if (print_ts == touch_ts || did_panic)
- return;
-
- /* do not print during early bootup: */
- if (unlikely(system_state != SYSTEM_RUNNING)) {
- __touch_softlockup_watchdog();
- return;
- }
-
- now = get_timestamp(this_cpu);
-
- /*
- * Wake up the high-prio watchdog task twice per
- * threshold timespan.
- */
- if (time_after(now - softlockup_thresh/2, touch_ts))
- wake_up_process(per_cpu(softlockup_watchdog, this_cpu));
-
- /* Warn about unreasonable delays: */
- if (time_before_eq(now - softlockup_thresh, touch_ts))
- return;
-
- per_cpu(softlockup_print_ts, this_cpu) = touch_ts;
-
- spin_lock(&print_lock);
- printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %lus! [%s:%d]\n",
- this_cpu, now - touch_ts,
- current->comm, task_pid_nr(current));
- print_modules();
- print_irqtrace_events(current);
- if (regs)
- show_regs(regs);
- else
- dump_stack();
- spin_unlock(&print_lock);
-
- if (softlockup_panic)
- panic("softlockup: hung tasks");
-}
-
-/*
- * The watchdog thread - runs every second and touches the timestamp.
- */
-static int watchdog(void *__bind_cpu)
-{
- struct sched_param param = { .sched_priority = MAX_RT_PRIO-1 };
-
- sched_setscheduler(current, SCHED_FIFO, &param);
-
- /* initialize timestamp */
- __touch_softlockup_watchdog();
-
- set_current_state(TASK_INTERRUPTIBLE);
- /*
- * Run briefly once per second to reset the softlockup timestamp.
- * If this gets delayed for more than 60 seconds then the
- * debug-printout triggers in softlockup_tick().
- */
- while (!kthread_should_stop()) {
- __touch_softlockup_watchdog();
- schedule();
-
- if (kthread_should_stop())
- break;
-
- set_current_state(TASK_INTERRUPTIBLE);
- }
- __set_current_state(TASK_RUNNING);
-
- return 0;
-}
-
-/*
- * Create/destroy watchdog threads as CPUs come and go:
- */
-static int __cpuinit
-cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
-{
- int hotcpu = (unsigned long)hcpu;
- struct task_struct *p;
-
- switch (action) {
- case CPU_UP_PREPARE:
- case CPU_UP_PREPARE_FROZEN:
- BUG_ON(per_cpu(softlockup_watchdog, hotcpu));
- p = kthread_create(watchdog, hcpu, "watchdog/%d", hotcpu);
- if (IS_ERR(p)) {
- printk(KERN_ERR "watchdog for %i failed\n", hotcpu);
- return NOTIFY_BAD;
- }
- per_cpu(softlockup_touch_ts, hotcpu) = 0;
- per_cpu(softlockup_watchdog, hotcpu) = p;
- kthread_bind(p, hotcpu);
- break;
- case CPU_ONLINE:
- case CPU_ONLINE_FROZEN:
- wake_up_process(per_cpu(softlockup_watchdog, hotcpu));
- break;
-#ifdef CONFIG_HOTPLUG_CPU
- case CPU_UP_CANCELED:
- case CPU_UP_CANCELED_FROZEN:
- if (!per_cpu(softlockup_watchdog, hotcpu))
- break;
- /* Unbind so it can run. Fall thru. */
- kthread_bind(per_cpu(softlockup_watchdog, hotcpu),
- cpumask_any(cpu_online_mask));
- case CPU_DEAD:
- case CPU_DEAD_FROZEN:
- p = per_cpu(softlockup_watchdog, hotcpu);
- per_cpu(softlockup_watchdog, hotcpu) = NULL;
- kthread_stop(p);
- break;
-#endif /* CONFIG_HOTPLUG_CPU */
- }
- return NOTIFY_OK;
-}
-
-static struct notifier_block __cpuinitdata cpu_nfb = {
- .notifier_call = cpu_callback
-};
-
-static int __initdata nosoftlockup;
-
-static int __init nosoftlockup_setup(char *str)
-{
- nosoftlockup = 1;
- return 1;
-}
-__setup("nosoftlockup", nosoftlockup_setup);
-
-static int __init spawn_softlockup_task(void)
-{
- void *cpu = (void *)(long)smp_processor_id();
- int err;
-
- if (nosoftlockup)
- return 0;
-
- err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
- if (err == NOTIFY_BAD) {
- BUG();
- return 1;
- }
- cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
- register_cpu_notifier(&cpu_nfb);
-
- atomic_notifier_chain_register(&panic_notifier_list, &panic_block);
-
- return 0;
-}
-early_initcall(spawn_softlockup_task);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index d24f761f487..6f79c7f81c9 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -76,6 +76,10 @@
#include <scsi/sg.h>
#endif
+#ifdef CONFIG_LOCKUP_DETECTOR
+#include <linux/nmi.h>
+#endif
+
#if defined(CONFIG_SYSCTL)
@@ -106,7 +110,7 @@ extern int blk_iopoll_enabled;
#endif
/* Constants used for minimum and maximum */
-#ifdef CONFIG_DETECT_SOFTLOCKUP
+#ifdef CONFIG_LOCKUP_DETECTOR
static int sixty = 60;
static int neg_one = -1;
#endif
@@ -710,7 +714,34 @@ static struct ctl_table kern_table[] = {
.mode = 0444,
.proc_handler = proc_dointvec,
},
-#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
+#if defined(CONFIG_LOCKUP_DETECTOR)
+ {
+ .procname = "watchdog",
+ .data = &watchdog_enabled,
+ .maxlen = sizeof (int),
+ .mode = 0644,
+ .proc_handler = proc_dowatchdog_enabled,
+ },
+ {
+ .procname = "watchdog_thresh",
+ .data = &softlockup_thresh,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dowatchdog_thresh,
+ .extra1 = &neg_one,
+ .extra2 = &sixty,
+ },
+ {
+ .procname = "softlockup_panic",
+ .data = &softlockup_panic,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ .extra1 = &zero,
+ .extra2 = &one,
+ },
+#endif
+#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86) && !defined(CONFIG_LOCKUP_DETECTOR)
{
.procname = "unknown_nmi_panic",
.data = &unknown_nmi_panic,
@@ -813,26 +844,6 @@ static struct ctl_table kern_table[] = {
.proc_handler = proc_dointvec,
},
#endif
-#ifdef CONFIG_DETECT_SOFTLOCKUP
- {
- .procname = "softlockup_panic",
- .data = &softlockup_panic,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec_minmax,
- .extra1 = &zero,
- .extra2 = &one,
- },
- {
- .procname = "softlockup_thresh",
- .data = &softlockup_thresh,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dosoftlockup_thresh,
- .extra1 = &neg_one,
- .extra2 = &sixty,
- },
-#endif
#ifdef CONFIG_DETECT_HUNG_TASK
{
.procname = "hung_task_panic",
diff --git a/kernel/timer.c b/kernel/timer.c
index efde11e197c..6aa6f7e69ad 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1302,7 +1302,6 @@ void run_local_timers(void)
{
hrtimer_run_queues();
raise_softirq(TIMER_SOFTIRQ);
- softlockup_tick();
}
/*
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 8b1797c4545..c7683fd8a03 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -194,15 +194,6 @@ config PREEMPT_TRACER
enabled. This option and the irqs-off timing option can be
used together or separately.)
-config SYSPROF_TRACER
- bool "Sysprof Tracer"
- depends on X86
- select GENERIC_TRACER
- select CONTEXT_SWITCH_TRACER
- help
- This tracer provides the trace needed by the 'Sysprof' userspace
- tool.
-
config SCHED_TRACER
bool "Scheduling Latency Tracer"
select GENERIC_TRACER
@@ -229,23 +220,6 @@ config FTRACE_SYSCALLS
help
Basic tracer to catch the syscall entry and exit events.
-config BOOT_TRACER
- bool "Trace boot initcalls"
- select GENERIC_TRACER
- select CONTEXT_SWITCH_TRACER
- help
- This tracer helps developers to optimize boot times: it records
- the timings of the initcalls and traces key events and the identity
- of tasks that can cause boot delays, such as context-switches.
-
- Its aim is to be parsed by the scripts/bootgraph.pl tool to
- produce pretty graphics about boot inefficiencies, giving a visual
- representation of the delays during initcalls - but the raw
- /debug/tracing/trace text output is readable too.
-
- You must pass in initcall_debug and ftrace=initcall to the kernel
- command line to enable this on bootup.
-
config TRACE_BRANCH_PROFILING
bool
select GENERIC_TRACER
@@ -325,28 +299,6 @@ config BRANCH_TRACER
Say N if unsure.
-config KSYM_TRACER
- bool "Trace read and write access on kernel memory locations"
- depends on HAVE_HW_BREAKPOINT
- select TRACING
- help
- This tracer helps find read and write operations on any given kernel
- symbol i.e. /proc/kallsyms.
-
-config PROFILE_KSYM_TRACER
- bool "Profile all kernel memory accesses on 'watched' variables"
- depends on KSYM_TRACER
- help
- This tracer profiles kernel accesses on variables watched through the
- ksym tracer ftrace plugin. Depending upon the hardware, all read
- and write operations on kernel variables can be monitored for
- accesses.
-
- The results will be displayed in:
- /debugfs/tracing/profile_ksym
-
- Say N if unsure.
-
config STACK_TRACER
bool "Trace max stack"
depends on HAVE_FUNCTION_TRACER
@@ -371,26 +323,6 @@ config STACK_TRACER
Say N if unsure.
-config KMEMTRACE
- bool "Trace SLAB allocations"
- select GENERIC_TRACER
- help
- kmemtrace provides tracing for slab allocator functions, such as
- kmalloc, kfree, kmem_cache_alloc, kmem_cache_free, etc. Collected
- data is then fed to the userspace application in order to analyse
- allocation hotspots, internal fragmentation and so on, making it
- possible to see how well an allocator performs, as well as debug
- and profile kernel code.
-
- This requires an userspace application to use. See
- Documentation/trace/kmemtrace.txt for more information.
-
- Saying Y will make the kernel somewhat larger and slower. However,
- if you disable kmemtrace at run-time or boot-time, the performance
- impact is minimal (depending on the arch the kernel is built for).
-
- If unsure, say N.
-
config WORKQUEUE_TRACER
bool "Trace workqueues"
select GENERIC_TRACER
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 4215530b490..53f338190b2 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -30,7 +30,6 @@ obj-$(CONFIG_TRACING) += trace_output.o
obj-$(CONFIG_TRACING) += trace_stat.o
obj-$(CONFIG_TRACING) += trace_printk.o
obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
-obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o
obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o
obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o
obj-$(CONFIG_PREEMPT_TRACER) += trace_irqsoff.o
@@ -38,10 +37,8 @@ obj-$(CONFIG_SCHED_TRACER) += trace_sched_wakeup.o
obj-$(CONFIG_NOP_TRACER) += trace_nop.o
obj-$(CONFIG_STACK_TRACER) += trace_stack.o
obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
-obj-$(CONFIG_BOOT_TRACER) += trace_boot.o
obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += trace_functions_graph.o
obj-$(CONFIG_TRACE_BRANCH_PROFILING) += trace_branch.o
-obj-$(CONFIG_KMEMTRACE) += kmemtrace.o
obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o
obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o
ifeq ($(CONFIG_BLOCK),y)
@@ -55,7 +52,6 @@ obj-$(CONFIG_EVENT_TRACING) += trace_event_perf.o
endif
obj-$(CONFIG_EVENT_TRACING) += trace_events_filter.o
obj-$(CONFIG_KPROBE_EVENT) += trace_kprobe.o
-obj-$(CONFIG_KSYM_TRACER) += trace_ksym.o
obj-$(CONFIG_EVENT_TRACING) += power-traces.o
ifeq ($(CONFIG_TRACING),y)
obj-$(CONFIG_KGDB_KDB) += trace_kdb.o
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 6d2cb14f944..0d88ce9b9fb 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1883,7 +1883,6 @@ function_trace_probe_call(unsigned long ip, unsigned long parent_ip)
struct hlist_head *hhd;
struct hlist_node *n;
unsigned long key;
- int resched;
key = hash_long(ip, FTRACE_HASH_BITS);
@@ -1897,12 +1896,12 @@ function_trace_probe_call(unsigned long ip, unsigned long parent_ip)
* period. This syncs the hash iteration and freeing of items
* on the hash. rcu_read_lock is too dangerous here.
*/
- resched = ftrace_preempt_disable();
+ preempt_disable_notrace();
hlist_for_each_entry_rcu(entry, n, hhd, node) {
if (entry->ip == ip)
entry->ops->func(ip, parent_ip, &entry->data);
}
- ftrace_preempt_enable(resched);
+ preempt_enable_notrace();
}
static struct ftrace_ops trace_probe_ops __read_mostly =
diff --git a/kernel/trace/kmemtrace.c b/kernel/trace/kmemtrace.c
deleted file mode 100644
index bbfc1bb1660..00000000000
--- a/kernel/trace/kmemtrace.c
+++ /dev/null
@@ -1,529 +0,0 @@
-/*
- * Memory allocator tracing
- *
- * Copyright (C) 2008 Eduard - Gabriel Munteanu
- * Copyright (C) 2008 Pekka Enberg <penberg@cs.helsinki.fi>
- * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
- */
-
-#include <linux/tracepoint.h>
-#include <linux/seq_file.h>
-#include <linux/debugfs.h>
-#include <linux/dcache.h>
-#include <linux/fs.h>
-
-#include <linux/kmemtrace.h>
-
-#include "trace_output.h"
-#include "trace.h"
-
-/* Select an alternative, minimalistic output than the original one */
-#define TRACE_KMEM_OPT_MINIMAL 0x1
-
-static struct tracer_opt kmem_opts[] = {
- /* Default disable the minimalistic output */
- { TRACER_OPT(kmem_minimalistic, TRACE_KMEM_OPT_MINIMAL) },
- { }
-};
-
-static struct tracer_flags kmem_tracer_flags = {
- .val = 0,
- .opts = kmem_opts
-};
-
-static struct trace_array *kmemtrace_array;
-
-/* Trace allocations */
-static inline void kmemtrace_alloc(enum kmemtrace_type_id type_id,
- unsigned long call_site,
- const void *ptr,
- size_t bytes_req,
- size_t bytes_alloc,
- gfp_t gfp_flags,
- int node)
-{
- struct ftrace_event_call *call = &event_kmem_alloc;
- struct trace_array *tr = kmemtrace_array;
- struct kmemtrace_alloc_entry *entry;
- struct ring_buffer_event *event;
-
- event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry));
- if (!event)
- return;
-
- entry = ring_buffer_event_data(event);
- tracing_generic_entry_update(&entry->ent, 0, 0);
-
- entry->ent.type = TRACE_KMEM_ALLOC;
- entry->type_id = type_id;
- entry->call_site = call_site;
- entry->ptr = ptr;
- entry->bytes_req = bytes_req;
- entry->bytes_alloc = bytes_alloc;
- entry->gfp_flags = gfp_flags;
- entry->node = node;
-
- if (!filter_check_discard(call, entry, tr->buffer, event))
- ring_buffer_unlock_commit(tr->buffer, event);
-
- trace_wake_up();
-}
-
-static inline void kmemtrace_free(enum kmemtrace_type_id type_id,
- unsigned long call_site,
- const void *ptr)
-{
- struct ftrace_event_call *call = &event_kmem_free;
- struct trace_array *tr = kmemtrace_array;
- struct kmemtrace_free_entry *entry;
- struct ring_buffer_event *event;
-
- event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry));
- if (!event)
- return;
- entry = ring_buffer_event_data(event);
- tracing_generic_entry_update(&entry->ent, 0, 0);
-
- entry->ent.type = TRACE_KMEM_FREE;
- entry->type_id = type_id;
- entry->call_site = call_site;
- entry->ptr = ptr;
-
- if (!filter_check_discard(call, entry, tr->buffer, event))
- ring_buffer_unlock_commit(tr->buffer, event);
-
- trace_wake_up();
-}
-
-static void kmemtrace_kmalloc(void *ignore,
- unsigned long call_site,
- const void *ptr,
- size_t bytes_req,
- size_t bytes_alloc,
- gfp_t gfp_flags)
-{
- kmemtrace_alloc(KMEMTRACE_TYPE_KMALLOC, call_site, ptr,
- bytes_req, bytes_alloc, gfp_flags, -1);
-}
-
-static void kmemtrace_kmem_cache_alloc(void *ignore,
- unsigned long call_site,
- const void *ptr,
- size_t bytes_req,
- size_t bytes_alloc,
- gfp_t gfp_flags)
-{
- kmemtrace_alloc(KMEMTRACE_TYPE_CACHE, call_site, ptr,
- bytes_req, bytes_alloc, gfp_flags, -1);
-}
-
-static void kmemtrace_kmalloc_node(void *ignore,
- unsigned long call_site,
- const void *ptr,
- size_t bytes_req,
- size_t bytes_alloc,
- gfp_t gfp_flags,
- int node)
-{
- kmemtrace_alloc(KMEMTRACE_TYPE_KMALLOC, call_site, ptr,
- bytes_req, bytes_alloc, gfp_flags, node);
-}
-
-static void kmemtrace_kmem_cache_alloc_node(void *ignore,
- unsigned long call_site,
- const void *ptr,
- size_t bytes_req,
- size_t bytes_alloc,
- gfp_t gfp_flags,
- int node)
-{
- kmemtrace_alloc(KMEMTRACE_TYPE_CACHE, call_site, ptr,
- bytes_req, bytes_alloc, gfp_flags, node);
-}
-
-static void
-kmemtrace_kfree(void *ignore, unsigned long call_site, const void *ptr)
-{
- kmemtrace_free(KMEMTRACE_TYPE_KMALLOC, call_site, ptr);
-}
-
-static void kmemtrace_kmem_cache_free(void *ignore,
- unsigned long call_site, const void *ptr)
-{
- kmemtrace_free(KMEMTRACE_TYPE_CACHE, call_site, ptr);
-}
-
-static int kmemtrace_start_probes(void)
-{
- int err;
-
- err = register_trace_kmalloc(kmemtrace_kmalloc, NULL);
- if (err)
- return err;
- err = register_trace_kmem_cache_alloc(kmemtrace_kmem_cache_alloc, NULL);
- if (err)
- return err;
- err = register_trace_kmalloc_node(kmemtrace_kmalloc_node, NULL);
- if (err)
- return err;
- err = register_trace_kmem_cache_alloc_node(kmemtrace_kmem_cache_alloc_node, NULL);
- if (err)
- return err;
- err = register_trace_kfree(kmemtrace_kfree, NULL);
- if (err)
- return err;
- err = register_trace_kmem_cache_free(kmemtrace_kmem_cache_free, NULL);
-
- return err;
-}
-
-static void kmemtrace_stop_probes(void)
-{
- unregister_trace_kmalloc(kmemtrace_kmalloc, NULL);
- unregister_trace_kmem_cache_alloc(kmemtrace_kmem_cache_alloc, NULL);
- unregister_trace_kmalloc_node(kmemtrace_kmalloc_node, NULL);
- unregister_trace_kmem_cache_alloc_node(kmemtrace_kmem_cache_alloc_node, NULL);
- unregister_trace_kfree(kmemtrace_kfree, NULL);
- unregister_trace_kmem_cache_free(kmemtrace_kmem_cache_free, NULL);
-}
-
-static int kmem_trace_init(struct trace_array *tr)
-{
- kmemtrace_array = tr;
-
- tracing_reset_online_cpus(tr);
-
- kmemtrace_start_probes();
-
- return 0;
-}
-
-static void kmem_trace_reset(struct trace_array *tr)
-{
- kmemtrace_stop_probes();
-}
-
-static void kmemtrace_headers(struct seq_file *s)
-{
- /* Don't need headers for the original kmemtrace output */
- if (!(kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL))
- return;
-
- seq_printf(s, "#\n");
- seq_printf(s, "# ALLOC TYPE REQ GIVEN FLAGS "
- " POINTER NODE CALLER\n");
- seq_printf(s, "# FREE | | | | "
- " | | | |\n");
- seq_printf(s, "# |\n\n");
-}
-
-/*
- * The following functions give the original output from kmemtrace,
- * plus the origin CPU, since reordering occurs in-kernel now.
- */
-
-#define KMEMTRACE_USER_ALLOC 0
-#define KMEMTRACE_USER_FREE 1
-
-struct kmemtrace_user_event {
- u8 event_id;
- u8 type_id;
- u16 event_size;
- u32 cpu;
- u64 timestamp;
- unsigned long call_site;
- unsigned long ptr;
-};
-
-struct kmemtrace_user_event_alloc {
- size_t bytes_req;
- size_t bytes_alloc;
- unsigned gfp_flags;
- int node;
-};
-
-static enum print_line_t
-kmemtrace_print_alloc(struct trace_iterator *iter, int flags,
- struct trace_event *event)
-{
- struct trace_seq *s = &iter->seq;
- struct kmemtrace_alloc_entry *entry;
- int ret;
-
- trace_assign_type(entry, iter->ent);
-
- ret = trace_seq_printf(s, "type_id %d call_site %pF ptr %lu "
- "bytes_req %lu bytes_alloc %lu gfp_flags %lu node %d\n",
- entry->type_id, (void *)entry->call_site, (unsigned long)entry->ptr,
- (unsigned long)entry->bytes_req, (unsigned long)entry->bytes_alloc,
- (unsigned long)entry->gfp_flags, entry->node);
-
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
- return TRACE_TYPE_HANDLED;
-}
-
-static enum print_line_t
-kmemtrace_print_free(struct trace_iterator *iter, int flags,
- struct trace_event *event)
-{
- struct trace_seq *s = &iter->seq;
- struct kmemtrace_free_entry *entry;
- int ret;
-
- trace_assign_type(entry, iter->ent);
-
- ret = trace_seq_printf(s, "type_id %d call_site %pF ptr %lu\n",
- entry->type_id, (void *)entry->call_site,
- (unsigned long)entry->ptr);
-
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
- return TRACE_TYPE_HANDLED;
-}
-
-static enum print_line_t
-kmemtrace_print_alloc_user(struct trace_iterator *iter, int flags,
- struct trace_event *event)
-{
- struct trace_seq *s = &iter->seq;
- struct kmemtrace_alloc_entry *entry;
- struct kmemtrace_user_event *ev;
- struct kmemtrace_user_event_alloc *ev_alloc;
-
- trace_assign_type(entry, iter->ent);
-
- ev = trace_seq_reserve(s, sizeof(*ev));
- if (!ev)
- return TRACE_TYPE_PARTIAL_LINE;
-
- ev->event_id = KMEMTRACE_USER_ALLOC;
- ev->type_id = entry->type_id;
- ev->event_size = sizeof(*ev) + sizeof(*ev_alloc);
- ev->cpu = iter->cpu;
- ev->timestamp = iter->ts;
- ev->call_site = entry->call_site;
- ev->ptr = (unsigned long)entry->ptr;
-
- ev_alloc = trace_seq_reserve(s, sizeof(*ev_alloc));
- if (!ev_alloc)
- return TRACE_TYPE_PARTIAL_LINE;
-
- ev_alloc->bytes_req = entry->bytes_req;
- ev_alloc->bytes_alloc = entry->bytes_alloc;
- ev_alloc->gfp_flags = entry->gfp_flags;
- ev_alloc->node = entry->node;
-
- return TRACE_TYPE_HANDLED;
-}
-
-static enum print_line_t
-kmemtrace_print_free_user(struct trace_iterator *iter, int flags,
- struct trace_event *event)
-{
- struct trace_seq *s = &iter->seq;
- struct kmemtrace_free_entry *entry;
- struct kmemtrace_user_event *ev;
-
- trace_assign_type(entry, iter->ent);
-
- ev = trace_seq_reserve(s, sizeof(*ev));
- if (!ev)
- return TRACE_TYPE_PARTIAL_LINE;
-
- ev->event_id = KMEMTRACE_USER_FREE;
- ev->type_id = entry->type_id;
- ev->event_size = sizeof(*ev);
- ev->cpu = iter->cpu;
- ev->timestamp = iter->ts;
- ev->call_site = entry->call_site;
- ev->ptr = (unsigned long)entry->ptr;
-
- return TRACE_TYPE_HANDLED;
-}
-
-/* The two other following provide a more minimalistic output */
-static enum print_line_t
-kmemtrace_print_alloc_compress(struct trace_iterator *iter)
-{
- struct kmemtrace_alloc_entry *entry;
- struct trace_seq *s = &iter->seq;
- int ret;
-
- trace_assign_type(entry, iter->ent);
-
- /* Alloc entry */
- ret = trace_seq_printf(s, " + ");
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
-
- /* Type */
- switch (entry->type_id) {
- case KMEMTRACE_TYPE_KMALLOC:
- ret = trace_seq_printf(s, "K ");
- break;
- case KMEMTRACE_TYPE_CACHE:
- ret = trace_seq_printf(s, "C ");
- break;
- case KMEMTRACE_TYPE_PAGES:
- ret = trace_seq_printf(s, "P ");
- break;
- default:
- ret = trace_seq_printf(s, "? ");
- }
-
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
-
- /* Requested */
- ret = trace_seq_printf(s, "%4zu ", entry->bytes_req);
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
-
- /* Allocated */
- ret = trace_seq_printf(s, "%4zu ", entry->bytes_alloc);
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
-
- /* Flags
- * TODO: would be better to see the name of the GFP flag names
- */
- ret = trace_seq_printf(s, "%08x ", entry->gfp_flags);
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
-
- /* Pointer to allocated */
- ret = trace_seq_printf(s, "0x%tx ", (ptrdiff_t)entry->ptr);
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
-
- /* Node and call site*/
- ret = trace_seq_printf(s, "%4d %pf\n", entry->node,
- (void *)entry->call_site);
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
-
- return TRACE_TYPE_HANDLED;
-}
-
-static enum print_line_t
-kmemtrace_print_free_compress(struct trace_iterator *iter)
-{
- struct kmemtrace_free_entry *entry;
- struct trace_seq *s = &iter->seq;
- int ret;
-
- trace_assign_type(entry, iter->ent);
-
- /* Free entry */
- ret = trace_seq_printf(s, " - ");
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
-
- /* Type */
- switch (entry->type_id) {
- case KMEMTRACE_TYPE_KMALLOC:
- ret = trace_seq_printf(s, "K ");
- break;
- case KMEMTRACE_TYPE_CACHE:
- ret = trace_seq_printf(s, "C ");
- break;
- case KMEMTRACE_TYPE_PAGES:
- ret = trace_seq_printf(s, "P ");
- break;
- default:
- ret = trace_seq_printf(s, "? ");
- }
-
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
-
- /* Skip requested/allocated/flags */
- ret = trace_seq_printf(s, " ");
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
-
- /* Pointer to allocated */
- ret = trace_seq_printf(s, "0x%tx ", (ptrdiff_t)entry->ptr);
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
-
- /* Skip node and print call site*/
- ret = trace_seq_printf(s, " %pf\n", (void *)entry->call_site);
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
-
- return TRACE_TYPE_HANDLED;
-}
-
-static enum print_line_t kmemtrace_print_line(struct trace_iterator *iter)
-{
- struct trace_entry *entry = iter->ent;
-
- if (!(kmem_tracer_flags.val & TRACE_KMEM_OPT_MINIMAL))
- return TRACE_TYPE_UNHANDLED;
-
- switch (entry->type) {
- case TRACE_KMEM_ALLOC:
- return kmemtrace_print_alloc_compress(iter);
- case TRACE_KMEM_FREE:
- return kmemtrace_print_free_compress(iter);
- default:
- return TRACE_TYPE_UNHANDLED;
- }
-}
-
-static struct trace_event_functions kmem_trace_alloc_funcs = {
- .trace = kmemtrace_print_alloc,
- .binary = kmemtrace_print_alloc_user,
-};
-
-static struct trace_event kmem_trace_alloc = {
- .type = TRACE_KMEM_ALLOC,
- .funcs = &kmem_trace_alloc_funcs,
-};
-
-static struct trace_event_functions kmem_trace_free_funcs = {
- .trace = kmemtrace_print_free,
- .binary = kmemtrace_print_free_user,
-};
-
-static struct trace_event kmem_trace_free = {
- .type = TRACE_KMEM_FREE,
- .funcs = &kmem_trace_free_funcs,
-};
-
-static struct tracer kmem_tracer __read_mostly = {
- .name = "kmemtrace",
- .init = kmem_trace_init,
- .reset = kmem_trace_reset,
- .print_line = kmemtrace_print_line,
- .print_header = kmemtrace_headers,
- .flags = &kmem_tracer_flags
-};
-
-void kmemtrace_init(void)
-{
- /* earliest opportunity to start kmem tracing */
-}
-
-static int __init init_kmem_tracer(void)
-{
- if (!register_ftrace_event(&kmem_trace_alloc)) {
- pr_warning("Warning: could not register kmem events\n");
- return 1;
- }
-
- if (!register_ftrace_event(&kmem_trace_free)) {
- pr_warning("Warning: could not register kmem events\n");
- return 1;
- }
-
- if (register_tracer(&kmem_tracer) != 0) {
- pr_warning("Warning: could not register the kmem tracer\n");
- return 1;
- }
-
- return 0;
-}
-device_initcall(init_kmem_tracer);
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 1da7b6ea8b8..3632ce87674 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -443,6 +443,7 @@ int ring_buffer_print_page_header(struct trace_seq *s)
*/
struct ring_buffer_per_cpu {
int cpu;
+ atomic_t record_disabled;
struct ring_buffer *buffer;
spinlock_t reader_lock; /* serialize readers */
arch_spinlock_t lock;
@@ -462,7 +463,6 @@ struct ring_buffer_per_cpu {
unsigned long read;
u64 write_stamp;
u64 read_stamp;
- atomic_t record_disabled;
};
struct ring_buffer {
@@ -2242,8 +2242,6 @@ static void trace_recursive_unlock(void)
#endif
-static DEFINE_PER_CPU(int, rb_need_resched);
-
/**
* ring_buffer_lock_reserve - reserve a part of the buffer
* @buffer: the ring buffer to reserve from
@@ -2264,13 +2262,13 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
{
struct ring_buffer_per_cpu *cpu_buffer;
struct ring_buffer_event *event;
- int cpu, resched;
+ int cpu;
if (ring_buffer_flags != RB_BUFFERS_ON)
return NULL;
/* If we are tracing schedule, we don't want to recurse */
- resched = ftrace_preempt_disable();
+ preempt_disable_notrace();
if (atomic_read(&buffer->record_disabled))
goto out_nocheck;
@@ -2295,21 +2293,13 @@ ring_buffer_lock_reserve(struct ring_buffer *buffer, unsigned long length)
if (!event)
goto out;
- /*
- * Need to store resched state on this cpu.
- * Only the first needs to.
- */
-
- if (preempt_count() == 1)
- per_cpu(rb_need_resched, cpu) = resched;
-
return event;
out:
trace_recursive_unlock();
out_nocheck:
- ftrace_preempt_enable(resched);
+ preempt_enable_notrace();
return NULL;
}
EXPORT_SYMBOL_GPL(ring_buffer_lock_reserve);
@@ -2355,13 +2345,7 @@ int ring_buffer_unlock_commit(struct ring_buffer *buffer,
trace_recursive_unlock();
- /*
- * Only the last preempt count needs to restore preemption.
- */
- if (preempt_count() == 1)
- ftrace_preempt_enable(per_cpu(rb_need_resched, cpu));
- else
- preempt_enable_no_resched_notrace();
+ preempt_enable_notrace();
return 0;
}
@@ -2469,13 +2453,7 @@ void ring_buffer_discard_commit(struct ring_buffer *buffer,
trace_recursive_unlock();
- /*
- * Only the last preempt count needs to restore preemption.
- */
- if (preempt_count() == 1)
- ftrace_preempt_enable(per_cpu(rb_need_resched, cpu));
- else
- preempt_enable_no_resched_notrace();
+ preempt_enable_notrace();
}
EXPORT_SYMBOL_GPL(ring_buffer_discard_commit);
@@ -2501,12 +2479,12 @@ int ring_buffer_write(struct ring_buffer *buffer,
struct ring_buffer_event *event;
void *body;
int ret = -EBUSY;
- int cpu, resched;
+ int cpu;
if (ring_buffer_flags != RB_BUFFERS_ON)
return -EBUSY;
- resched = ftrace_preempt_disable();
+ preempt_disable_notrace();
if (atomic_read(&buffer->record_disabled))
goto out;
@@ -2536,7 +2514,7 @@ int ring_buffer_write(struct ring_buffer *buffer,
ret = 0;
out:
- ftrace_preempt_enable(resched);
+ preempt_enable_notrace();
return ret;
}
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index d6736b93dc2..ed1032d6f81 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -341,7 +341,7 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
/* trace_flags holds trace_options default values */
unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
- TRACE_ITER_GRAPH_TIME;
+ TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD;
static int trace_stop_count;
static DEFINE_SPINLOCK(tracing_start_lock);
@@ -425,6 +425,7 @@ static const char *trace_options[] = {
"latency-format",
"sleep-time",
"graph-time",
+ "record-cmd",
NULL
};
@@ -656,6 +657,10 @@ update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
return;
WARN_ON_ONCE(!irqs_disabled());
+ if (!current_trace->use_max_tr) {
+ WARN_ON_ONCE(1);
+ return;
+ }
arch_spin_lock(&ftrace_max_lock);
tr->buffer = max_tr.buffer;
@@ -682,6 +687,11 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
return;
WARN_ON_ONCE(!irqs_disabled());
+ if (!current_trace->use_max_tr) {
+ WARN_ON_ONCE(1);
+ return;
+ }
+
arch_spin_lock(&ftrace_max_lock);
ftrace_disable_cpu();
@@ -726,7 +736,7 @@ __acquires(kernel_lock)
return -1;
}
- if (strlen(type->name) > MAX_TRACER_SIZE) {
+ if (strlen(type->name) >= MAX_TRACER_SIZE) {
pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
return -1;
}
@@ -1328,61 +1338,6 @@ static void __trace_userstack(struct trace_array *tr, unsigned long flags)
#endif /* CONFIG_STACKTRACE */
-static void
-ftrace_trace_special(void *__tr,
- unsigned long arg1, unsigned long arg2, unsigned long arg3,
- int pc)
-{
- struct ftrace_event_call *call = &event_special;
- struct ring_buffer_event *event;
- struct trace_array *tr = __tr;
- struct ring_buffer *buffer = tr->buffer;
- struct special_entry *entry;
-
- event = trace_buffer_lock_reserve(buffer, TRACE_SPECIAL,
- sizeof(*entry), 0, pc);
- if (!event)
- return;
- entry = ring_buffer_event_data(event);
- entry->arg1 = arg1;
- entry->arg2 = arg2;
- entry->arg3 = arg3;
-
- if (!filter_check_discard(call, entry, buffer, event))
- trace_buffer_unlock_commit(buffer, event, 0, pc);
-}
-
-void
-__trace_special(void *__tr, void *__data,
- unsigned long arg1, unsigned long arg2, unsigned long arg3)
-{
- ftrace_trace_special(__tr, arg1, arg2, arg3, preempt_count());
-}
-
-void
-ftrace_special(unsigned long arg1, unsigned long arg2, unsigned long arg3)
-{
- struct trace_array *tr = &global_trace;
- struct trace_array_cpu *data;
- unsigned long flags;
- int cpu;
- int pc;
-
- if (tracing_disabled)
- return;
-
- pc = preempt_count();
- local_irq_save(flags);
- cpu = raw_smp_processor_id();
- data = tr->data[cpu];
-
- if (likely(atomic_inc_return(&data->disabled) == 1))
- ftrace_trace_special(tr, arg1, arg2, arg3, pc);
-
- atomic_dec(&data->disabled);
- local_irq_restore(flags);
-}
-
/**
* trace_vbprintk - write binary msg to tracing buffer
*
@@ -1401,7 +1356,6 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
struct bprint_entry *entry;
unsigned long flags;
int disable;
- int resched;
int cpu, len = 0, size, pc;
if (unlikely(tracing_selftest_running || tracing_disabled))
@@ -1411,7 +1365,7 @@ int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
pause_graph_tracing();
pc = preempt_count();
- resched = ftrace_preempt_disable();
+ preempt_disable_notrace();
cpu = raw_smp_processor_id();
data = tr->data[cpu];
@@ -1449,7 +1403,7 @@ out_unlock:
out:
atomic_dec_return(&data->disabled);
- ftrace_preempt_enable(resched);
+ preempt_enable_notrace();
unpause_graph_tracing();
return len;
@@ -2386,6 +2340,7 @@ static const struct file_operations show_traces_fops = {
.open = show_traces_open,
.read = seq_read,
.release = seq_release,
+ .llseek = seq_lseek,
};
/*
@@ -2479,6 +2434,7 @@ static const struct file_operations tracing_cpumask_fops = {
.open = tracing_open_generic,
.read = tracing_cpumask_read,
.write = tracing_cpumask_write,
+ .llseek = generic_file_llseek,
};
static int tracing_trace_options_show(struct seq_file *m, void *v)
@@ -2554,6 +2510,9 @@ static void set_tracer_flags(unsigned int mask, int enabled)
trace_flags |= mask;
else
trace_flags &= ~mask;
+
+ if (mask == TRACE_ITER_RECORD_CMD)
+ trace_event_enable_cmd_record(enabled);
}
static ssize_t
@@ -2645,6 +2604,7 @@ tracing_readme_read(struct file *filp, char __user *ubuf,
static const struct file_operations tracing_readme_fops = {
.open = tracing_open_generic,
.read = tracing_readme_read,
+ .llseek = generic_file_llseek,
};
static ssize_t
@@ -2695,6 +2655,7 @@ tracing_saved_cmdlines_read(struct file *file, char __user *ubuf,
static const struct file_operations tracing_saved_cmdlines_fops = {
.open = tracing_open_generic,
.read = tracing_saved_cmdlines_read,
+ .llseek = generic_file_llseek,
};
static ssize_t
@@ -2790,6 +2751,9 @@ static int tracing_resize_ring_buffer(unsigned long size)
if (ret < 0)
return ret;
+ if (!current_trace->use_max_tr)
+ goto out;
+
ret = ring_buffer_resize(max_tr.buffer, size);
if (ret < 0) {
int r;
@@ -2817,11 +2781,14 @@ static int tracing_resize_ring_buffer(unsigned long size)
return ret;
}
+ max_tr.entries = size;
+ out:
global_trace.entries = size;
return ret;
}
+
/**
* tracing_update_buffers - used by tracing facility to expand ring buffers
*
@@ -2882,12 +2849,26 @@ static int tracing_set_tracer(const char *buf)
trace_branch_disable();
if (current_trace && current_trace->reset)
current_trace->reset(tr);
-
+ if (current_trace && current_trace->use_max_tr) {
+ /*
+ * We don't free the ring buffer. instead, resize it because
+ * The max_tr ring buffer has some state (e.g. ring->clock) and
+ * we want preserve it.
+ */
+ ring_buffer_resize(max_tr.buffer, 1);
+ max_tr.entries = 1;
+ }
destroy_trace_option_files(topts);
current_trace = t;
topts = create_trace_option_files(current_trace);
+ if (current_trace->use_max_tr) {
+ ret = ring_buffer_resize(max_tr.buffer, global_trace.entries);
+ if (ret < 0)
+ goto out;
+ max_tr.entries = global_trace.entries;
+ }
if (t->init) {
ret = tracer_init(t, tr);
@@ -3024,6 +3005,7 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
if (iter->trace->pipe_open)
iter->trace->pipe_open(iter);
+ nonseekable_open(inode, filp);
out:
mutex_unlock(&trace_types_lock);
return ret;
@@ -3469,7 +3451,6 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
}
tracing_start();
- max_tr.entries = global_trace.entries;
mutex_unlock(&trace_types_lock);
return cnt;
@@ -3582,18 +3563,21 @@ static const struct file_operations tracing_max_lat_fops = {
.open = tracing_open_generic,
.read = tracing_max_lat_read,
.write = tracing_max_lat_write,
+ .llseek = generic_file_llseek,
};
static const struct file_operations tracing_ctrl_fops = {
.open = tracing_open_generic,
.read = tracing_ctrl_read,
.write = tracing_ctrl_write,
+ .llseek = generic_file_llseek,
};
static const struct file_operations set_tracer_fops = {
.open = tracing_open_generic,
.read = tracing_set_trace_read,
.write = tracing_set_trace_write,
+ .llseek = generic_file_llseek,
};
static const struct file_operations tracing_pipe_fops = {
@@ -3602,17 +3586,20 @@ static const struct file_operations tracing_pipe_fops = {
.read = tracing_read_pipe,
.splice_read = tracing_splice_read_pipe,
.release = tracing_release_pipe,
+ .llseek = no_llseek,
};
static const struct file_operations tracing_entries_fops = {
.open = tracing_open_generic,
.read = tracing_entries_read,
.write = tracing_entries_write,
+ .llseek = generic_file_llseek,
};
static const struct file_operations tracing_mark_fops = {
.open = tracing_open_generic,
.write = tracing_mark_write,
+ .llseek = generic_file_llseek,
};
static const struct file_operations trace_clock_fops = {
@@ -3918,6 +3905,7 @@ tracing_stats_read(struct file *filp, char __user *ubuf,
static const struct file_operations tracing_stats_fops = {
.open = tracing_open_generic,
.read = tracing_stats_read,
+ .llseek = generic_file_llseek,
};
#ifdef CONFIG_DYNAMIC_FTRACE
@@ -3954,6 +3942,7 @@ tracing_read_dyn_info(struct file *filp, char __user *ubuf,
static const struct file_operations tracing_dyn_info_fops = {
.open = tracing_open_generic,
.read = tracing_read_dyn_info,
+ .llseek = generic_file_llseek,
};
#endif
@@ -4107,6 +4096,7 @@ static const struct file_operations trace_options_fops = {
.open = tracing_open_generic,
.read = trace_options_read,
.write = trace_options_write,
+ .llseek = generic_file_llseek,
};
static ssize_t
@@ -4158,6 +4148,7 @@ static const struct file_operations trace_options_core_fops = {
.open = tracing_open_generic,
.read = trace_options_core_read,
.write = trace_options_core_write,
+ .llseek = generic_file_llseek,
};
struct dentry *trace_create_file(const char *name,
@@ -4347,9 +4338,6 @@ static __init int tracer_init_debugfs(void)
trace_create_file("dyn_ftrace_total_info", 0444, d_tracer,
&ftrace_update_tot_cnt, &tracing_dyn_info_fops);
#endif
-#ifdef CONFIG_SYSPROF_TRACER
- init_tracer_sysprof_debugfs(d_tracer);
-#endif
create_trace_options_dir();
@@ -4576,16 +4564,14 @@ __init static int tracer_alloc_buffers(void)
#ifdef CONFIG_TRACER_MAX_TRACE
- max_tr.buffer = ring_buffer_alloc(ring_buf_size,
- TRACE_BUFFER_FLAGS);
+ max_tr.buffer = ring_buffer_alloc(1, TRACE_BUFFER_FLAGS);
if (!max_tr.buffer) {
printk(KERN_ERR "tracer: failed to allocate max ring buffer!\n");
WARN_ON(1);
ring_buffer_free(global_trace.buffer);
goto out_free_cpumask;
}
- max_tr.entries = ring_buffer_size(max_tr.buffer);
- WARN_ON(max_tr.entries != global_trace.entries);
+ max_tr.entries = 1;
#endif
/* Allocate the first page for all buffers */
@@ -4598,9 +4584,6 @@ __init static int tracer_alloc_buffers(void)
register_tracer(&nop_trace);
current_trace = &nop_trace;
-#ifdef CONFIG_BOOT_TRACER
- register_tracer(&boot_tracer);
-#endif
/* All seems OK, enable tracing */
tracing_disabled = 0;
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 0605fc00c17..d39b3c5454a 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -9,10 +9,7 @@
#include <linux/mmiotrace.h>
#include <linux/tracepoint.h>
#include <linux/ftrace.h>
-#include <trace/boot.h>
-#include <linux/kmemtrace.h>
#include <linux/hw_breakpoint.h>
-
#include <linux/trace_seq.h>
#include <linux/ftrace_event.h>
@@ -25,30 +22,17 @@ enum trace_type {
TRACE_STACK,
TRACE_PRINT,
TRACE_BPRINT,
- TRACE_SPECIAL,
TRACE_MMIO_RW,
TRACE_MMIO_MAP,
TRACE_BRANCH,
- TRACE_BOOT_CALL,
- TRACE_BOOT_RET,
TRACE_GRAPH_RET,
TRACE_GRAPH_ENT,
TRACE_USER_STACK,
- TRACE_KMEM_ALLOC,
- TRACE_KMEM_FREE,
TRACE_BLK,
- TRACE_KSYM,
__TRACE_LAST_TYPE,
};
-enum kmemtrace_type_id {
- KMEMTRACE_TYPE_KMALLOC = 0, /* kmalloc() or kfree(). */
- KMEMTRACE_TYPE_CACHE, /* kmem_cache_*(). */
- KMEMTRACE_TYPE_PAGES, /* __get_free_pages() and friends. */
-};
-
-extern struct tracer boot_tracer;
#undef __field
#define __field(type, item) type item;
@@ -204,23 +188,15 @@ extern void __ftrace_bad_type(void);
IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\
IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \
IF_ASSIGN(var, ent, struct bprint_entry, TRACE_BPRINT); \
- IF_ASSIGN(var, ent, struct special_entry, 0); \
IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \
TRACE_MMIO_RW); \
IF_ASSIGN(var, ent, struct trace_mmiotrace_map, \
TRACE_MMIO_MAP); \
- IF_ASSIGN(var, ent, struct trace_boot_call, TRACE_BOOT_CALL);\
- IF_ASSIGN(var, ent, struct trace_boot_ret, TRACE_BOOT_RET);\
IF_ASSIGN(var, ent, struct trace_branch, TRACE_BRANCH); \
IF_ASSIGN(var, ent, struct ftrace_graph_ent_entry, \
TRACE_GRAPH_ENT); \
IF_ASSIGN(var, ent, struct ftrace_graph_ret_entry, \
TRACE_GRAPH_RET); \
- IF_ASSIGN(var, ent, struct kmemtrace_alloc_entry, \
- TRACE_KMEM_ALLOC); \
- IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \
- TRACE_KMEM_FREE); \
- IF_ASSIGN(var, ent, struct ksym_trace_entry, TRACE_KSYM);\
__ftrace_bad_type(); \
} while (0)
@@ -298,6 +274,7 @@ struct tracer {
struct tracer *next;
int print_max;
struct tracer_flags *flags;
+ int use_max_tr;
};
@@ -318,7 +295,6 @@ struct dentry *trace_create_file(const char *name,
const struct file_operations *fops);
struct dentry *tracing_init_dentry(void);
-void init_tracer_sysprof_debugfs(struct dentry *d_tracer);
struct ring_buffer_event;
@@ -363,11 +339,6 @@ void tracing_sched_wakeup_trace(struct trace_array *tr,
struct task_struct *wakee,
struct task_struct *cur,
unsigned long flags, int pc);
-void trace_special(struct trace_array *tr,
- struct trace_array_cpu *data,
- unsigned long arg1,
- unsigned long arg2,
- unsigned long arg3, int pc);
void trace_function(struct trace_array *tr,
unsigned long ip,
unsigned long parent_ip,
@@ -398,8 +369,6 @@ extern cpumask_var_t __read_mostly tracing_buffer_mask;
#define for_each_tracing_cpu(cpu) \
for_each_cpu(cpu, tracing_buffer_mask)
-extern int process_new_ksym_entry(char *ksymname, int op, unsigned long addr);
-
extern unsigned long nsecs_to_usecs(unsigned long nsecs);
extern unsigned long tracing_thresh;
@@ -469,12 +438,8 @@ extern int trace_selftest_startup_nop(struct tracer *trace,
struct trace_array *tr);
extern int trace_selftest_startup_sched_switch(struct tracer *trace,
struct trace_array *tr);
-extern int trace_selftest_startup_sysprof(struct tracer *trace,
- struct trace_array *tr);
extern int trace_selftest_startup_branch(struct tracer *trace,
struct trace_array *tr);
-extern int trace_selftest_startup_ksym(struct tracer *trace,
- struct trace_array *tr);
#endif /* CONFIG_FTRACE_STARTUP_TEST */
extern void *head_page(struct trace_array_cpu *data);
@@ -636,6 +601,7 @@ enum trace_iterator_flags {
TRACE_ITER_LATENCY_FMT = 0x20000,
TRACE_ITER_SLEEP_TIME = 0x40000,
TRACE_ITER_GRAPH_TIME = 0x80000,
+ TRACE_ITER_RECORD_CMD = 0x100000,
};
/*
@@ -647,54 +613,6 @@ enum trace_iterator_flags {
extern struct tracer nop_trace;
-/**
- * ftrace_preempt_disable - disable preemption scheduler safe
- *
- * When tracing can happen inside the scheduler, there exists
- * cases that the tracing might happen before the need_resched
- * flag is checked. If this happens and the tracer calls
- * preempt_enable (after a disable), a schedule might take place
- * causing an infinite recursion.
- *
- * To prevent this, we read the need_resched flag before
- * disabling preemption. When we want to enable preemption we
- * check the flag, if it is set, then we call preempt_enable_no_resched.
- * Otherwise, we call preempt_enable.
- *
- * The rational for doing the above is that if need_resched is set
- * and we have yet to reschedule, we are either in an atomic location
- * (where we do not need to check for scheduling) or we are inside
- * the scheduler and do not want to resched.
- */
-static inline int ftrace_preempt_disable(void)
-{
- int resched;
-
- resched = need_resched();
- preempt_disable_notrace();
-
- return resched;
-}
-
-/**
- * ftrace_preempt_enable - enable preemption scheduler safe
- * @resched: the return value from ftrace_preempt_disable
- *
- * This is a scheduler safe way to enable preemption and not miss
- * any preemption checks. The disabled saved the state of preemption.
- * If resched is set, then we are either inside an atomic or
- * are inside the scheduler (we would have already scheduled
- * otherwise). In this case, we do not want to call normal
- * preempt_enable, but preempt_enable_no_resched instead.
- */
-static inline void ftrace_preempt_enable(int resched)
-{
- if (resched)
- preempt_enable_no_resched_notrace();
- else
- preempt_enable_notrace();
-}
-
#ifdef CONFIG_BRANCH_TRACER
extern int enable_branch_tracing(struct trace_array *tr);
extern void disable_branch_tracing(void);
@@ -785,6 +703,8 @@ struct filter_pred {
int pop_n;
};
+extern struct list_head ftrace_common_fields;
+
extern enum regex_type
filter_parse_regex(char *buff, int len, char **search, int *not);
extern void print_event_filter(struct ftrace_event_call *call,
@@ -814,6 +734,8 @@ filter_check_discard(struct ftrace_event_call *call, void *rec,
return 0;
}
+extern void trace_event_enable_cmd_record(bool enable);
+
extern struct mutex event_mutex;
extern struct list_head ftrace_events;
diff --git a/kernel/trace/trace_boot.c b/kernel/trace/trace_boot.c
deleted file mode 100644
index c21d5f3956a..00000000000
--- a/kernel/trace/trace_boot.c
+++ /dev/null
@@ -1,185 +0,0 @@
-/*
- * ring buffer based initcalls tracer
- *
- * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
- *
- */
-
-#include <linux/init.h>
-#include <linux/debugfs.h>
-#include <linux/ftrace.h>
-#include <linux/kallsyms.h>
-#include <linux/time.h>
-
-#include "trace.h"
-#include "trace_output.h"
-
-static struct trace_array *boot_trace;
-static bool pre_initcalls_finished;
-
-/* Tells the boot tracer that the pre_smp_initcalls are finished.
- * So we are ready .
- * It doesn't enable sched events tracing however.
- * You have to call enable_boot_trace to do so.
- */
-void start_boot_trace(void)
-{
- pre_initcalls_finished = true;
-}
-
-void enable_boot_trace(void)
-{
- if (boot_trace && pre_initcalls_finished)
- tracing_start_sched_switch_record();
-}
-
-void disable_boot_trace(void)
-{
- if (boot_trace && pre_initcalls_finished)
- tracing_stop_sched_switch_record();
-}
-
-static int boot_trace_init(struct trace_array *tr)
-{
- boot_trace = tr;
-
- if (!tr)
- return 0;
-
- tracing_reset_online_cpus(tr);
-
- tracing_sched_switch_assign_trace(tr);
- return 0;
-}
-
-static enum print_line_t
-initcall_call_print_line(struct trace_iterator *iter)
-{
- struct trace_entry *entry = iter->ent;
- struct trace_seq *s = &iter->seq;
- struct trace_boot_call *field;
- struct boot_trace_call *call;
- u64 ts;
- unsigned long nsec_rem;
- int ret;
-
- trace_assign_type(field, entry);
- call = &field->boot_call;
- ts = iter->ts;
- nsec_rem = do_div(ts, NSEC_PER_SEC);
-
- ret = trace_seq_printf(s, "[%5ld.%09ld] calling %s @ %i\n",
- (unsigned long)ts, nsec_rem, call->func, call->caller);
-
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
- else
- return TRACE_TYPE_HANDLED;
-}
-
-static enum print_line_t
-initcall_ret_print_line(struct trace_iterator *iter)
-{
- struct trace_entry *entry = iter->ent;
- struct trace_seq *s = &iter->seq;
- struct trace_boot_ret *field;
- struct boot_trace_ret *init_ret;
- u64 ts;
- unsigned long nsec_rem;
- int ret;
-
- trace_assign_type(field, entry);
- init_ret = &field->boot_ret;
- ts = iter->ts;
- nsec_rem = do_div(ts, NSEC_PER_SEC);
-
- ret = trace_seq_printf(s, "[%5ld.%09ld] initcall %s "
- "returned %d after %llu msecs\n",
- (unsigned long) ts,
- nsec_rem,
- init_ret->func, init_ret->result, init_ret->duration);
-
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
- else
- return TRACE_TYPE_HANDLED;
-}
-
-static enum print_line_t initcall_print_line(struct trace_iterator *iter)
-{
- struct trace_entry *entry = iter->ent;
-
- switch (entry->type) {
- case TRACE_BOOT_CALL:
- return initcall_call_print_line(iter);
- case TRACE_BOOT_RET:
- return initcall_ret_print_line(iter);
- default:
- return TRACE_TYPE_UNHANDLED;
- }
-}
-
-struct tracer boot_tracer __read_mostly =
-{
- .name = "initcall",
- .init = boot_trace_init,
- .reset = tracing_reset_online_cpus,
- .print_line = initcall_print_line,
-};
-
-void trace_boot_call(struct boot_trace_call *bt, initcall_t fn)
-{
- struct ftrace_event_call *call = &event_boot_call;
- struct ring_buffer_event *event;
- struct ring_buffer *buffer;
- struct trace_boot_call *entry;
- struct trace_array *tr = boot_trace;
-
- if (!tr || !pre_initcalls_finished)
- return;
-
- /* Get its name now since this function could
- * disappear because it is in the .init section.
- */
- sprint_symbol(bt->func, (unsigned long)fn);
- preempt_disable();
-
- buffer = tr->buffer;
- event = trace_buffer_lock_reserve(buffer, TRACE_BOOT_CALL,
- sizeof(*entry), 0, 0);
- if (!event)
- goto out;
- entry = ring_buffer_event_data(event);
- entry->boot_call = *bt;
- if (!filter_check_discard(call, entry, buffer, event))
- trace_buffer_unlock_commit(buffer, event, 0, 0);
- out:
- preempt_enable();
-}
-
-void trace_boot_ret(struct boot_trace_ret *bt, initcall_t fn)
-{
- struct ftrace_event_call *call = &event_boot_ret;
- struct ring_buffer_event *event;
- struct ring_buffer *buffer;
- struct trace_boot_ret *entry;
- struct trace_array *tr = boot_trace;
-
- if (!tr || !pre_initcalls_finished)
- return;
-
- sprint_symbol(bt->func, (unsigned long)fn);
- preempt_disable();
-
- buffer = tr->buffer;
- event = trace_buffer_lock_reserve(buffer, TRACE_BOOT_RET,
- sizeof(*entry), 0, 0);
- if (!event)
- goto out;
- entry = ring_buffer_event_data(event);
- entry->boot_ret = *bt;
- if (!filter_check_discard(call, entry, buffer, event))
- trace_buffer_unlock_commit(buffer, event, 0, 0);
- out:
- preempt_enable();
-}
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
index 9d589d8dcd1..52fda6c04ac 100644
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -32,16 +32,15 @@
u64 notrace trace_clock_local(void)
{
u64 clock;
- int resched;
/*
* sched_clock() is an architecture implemented, fast, scalable,
* lockless clock. It is not guaranteed to be coherent across
* CPUs, nor across CPU idle events.
*/
- resched = ftrace_preempt_disable();
+ preempt_disable_notrace();
clock = sched_clock();
- ftrace_preempt_enable(resched);
+ preempt_enable_notrace();
return clock;
}
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index dc008c1240d..e3dfecaf13e 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -151,23 +151,6 @@ FTRACE_ENTRY_DUP(wakeup, ctx_switch_entry,
);
/*
- * Special (free-form) trace entry:
- */
-FTRACE_ENTRY(special, special_entry,
-
- TRACE_SPECIAL,
-
- F_STRUCT(
- __field( unsigned long, arg1 )
- __field( unsigned long, arg2 )
- __field( unsigned long, arg3 )
- ),
-
- F_printk("(%08lx) (%08lx) (%08lx)",
- __entry->arg1, __entry->arg2, __entry->arg3)
-);
-
-/*
* Stack-trace entry:
*/
@@ -271,33 +254,6 @@ FTRACE_ENTRY(mmiotrace_map, trace_mmiotrace_map,
__entry->map_id, __entry->opcode)
);
-FTRACE_ENTRY(boot_call, trace_boot_call,
-
- TRACE_BOOT_CALL,
-
- F_STRUCT(
- __field_struct( struct boot_trace_call, boot_call )
- __field_desc( pid_t, boot_call, caller )
- __array_desc( char, boot_call, func, KSYM_SYMBOL_LEN)
- ),
-
- F_printk("%d %s", __entry->caller, __entry->func)
-);
-
-FTRACE_ENTRY(boot_ret, trace_boot_ret,
-
- TRACE_BOOT_RET,
-
- F_STRUCT(
- __field_struct( struct boot_trace_ret, boot_ret )
- __array_desc( char, boot_ret, func, KSYM_SYMBOL_LEN)
- __field_desc( int, boot_ret, result )
- __field_desc( unsigned long, boot_ret, duration )
- ),
-
- F_printk("%s %d %lx",
- __entry->func, __entry->result, __entry->duration)
-);
#define TRACE_FUNC_SIZE 30
#define TRACE_FILE_SIZE 20
@@ -318,53 +274,3 @@ FTRACE_ENTRY(branch, trace_branch,
__entry->func, __entry->file, __entry->correct)
);
-FTRACE_ENTRY(kmem_alloc, kmemtrace_alloc_entry,
-
- TRACE_KMEM_ALLOC,
-
- F_STRUCT(
- __field( enum kmemtrace_type_id, type_id )
- __field( unsigned long, call_site )
- __field( const void *, ptr )
- __field( size_t, bytes_req )
- __field( size_t, bytes_alloc )
- __field( gfp_t, gfp_flags )
- __field( int, node )
- ),
-
- F_printk("type:%u call_site:%lx ptr:%p req:%zi alloc:%zi"
- " flags:%x node:%d",
- __entry->type_id, __entry->call_site, __entry->ptr,
- __entry->bytes_req, __entry->bytes_alloc,
- __entry->gfp_flags, __entry->node)
-);
-
-FTRACE_ENTRY(kmem_free, kmemtrace_free_entry,
-
- TRACE_KMEM_FREE,
-
- F_STRUCT(
- __field( enum kmemtrace_type_id, type_id )
- __field( unsigned long, call_site )
- __field( const void *, ptr )
- ),
-
- F_printk("type:%u call_site:%lx ptr:%p",
- __entry->type_id, __entry->call_site, __entry->ptr)
-);
-
-FTRACE_ENTRY(ksym_trace, ksym_trace_entry,
-
- TRACE_KSYM,
-
- F_STRUCT(
- __field( unsigned long, ip )
- __field( unsigned char, type )
- __array( char , cmd, TASK_COMM_LEN )
- __field( unsigned long, addr )
- ),
-
- F_printk("ip: %pF type: %d ksym_name: %pS cmd: %s",
- (void *)__entry->ip, (unsigned int)__entry->type,
- (void *)__entry->addr, __entry->cmd)
-);
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 8a2b73f7c06..000e6e85b44 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -9,8 +9,6 @@
#include <linux/kprobes.h>
#include "trace.h"
-EXPORT_SYMBOL_GPL(perf_arch_fetch_caller_regs);
-
static char *perf_trace_buf[4];
/*
@@ -56,13 +54,7 @@ static int perf_trace_event_init(struct ftrace_event_call *tp_event,
}
}
- if (tp_event->class->reg)
- ret = tp_event->class->reg(tp_event, TRACE_REG_PERF_REGISTER);
- else
- ret = tracepoint_probe_register(tp_event->name,
- tp_event->class->perf_probe,
- tp_event);
-
+ ret = tp_event->class->reg(tp_event, TRACE_REG_PERF_REGISTER);
if (ret)
goto fail;
@@ -96,9 +88,7 @@ int perf_trace_init(struct perf_event *p_event)
mutex_lock(&event_mutex);
list_for_each_entry(tp_event, &ftrace_events, list) {
if (tp_event->event.type == event_id &&
- tp_event->class &&
- (tp_event->class->perf_probe ||
- tp_event->class->reg) &&
+ tp_event->class && tp_event->class->reg &&
try_module_get(tp_event->mod)) {
ret = perf_trace_event_init(tp_event, p_event);
break;
@@ -138,18 +128,13 @@ void perf_trace_destroy(struct perf_event *p_event)
if (--tp_event->perf_refcount > 0)
goto out;
- if (tp_event->class->reg)
- tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER);
- else
- tracepoint_probe_unregister(tp_event->name,
- tp_event->class->perf_probe,
- tp_event);
+ tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER);
/*
- * Ensure our callback won't be called anymore. See
- * tracepoint_probe_unregister() and __DO_TRACE().
+ * Ensure our callback won't be called anymore. The buffers
+ * will be freed after that.
*/
- synchronize_sched();
+ tracepoint_synchronize_unregister();
free_percpu(tp_event->perf_events);
tp_event->perf_events = NULL;
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 53cffc0b080..09b4fa6e4d3 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -28,6 +28,7 @@
DEFINE_MUTEX(event_mutex);
LIST_HEAD(ftrace_events);
+LIST_HEAD(ftrace_common_fields);
struct list_head *
trace_get_fields(struct ftrace_event_call *event_call)
@@ -37,15 +38,11 @@ trace_get_fields(struct ftrace_event_call *event_call)
return event_call->class->get_fields(event_call);
}
-int trace_define_field(struct ftrace_event_call *call, const char *type,
- const char *name, int offset, int size, int is_signed,
- int filter_type)
+static int __trace_define_field(struct list_head *head, const char *type,
+ const char *name, int offset, int size,
+ int is_signed, int filter_type)
{
struct ftrace_event_field *field;
- struct list_head *head;
-
- if (WARN_ON(!call->class))
- return 0;
field = kzalloc(sizeof(*field), GFP_KERNEL);
if (!field)
@@ -68,7 +65,6 @@ int trace_define_field(struct ftrace_event_call *call, const char *type,
field->size = size;
field->is_signed = is_signed;
- head = trace_get_fields(call);
list_add(&field->link, head);
return 0;
@@ -80,17 +76,32 @@ err:
return -ENOMEM;
}
+
+int trace_define_field(struct ftrace_event_call *call, const char *type,
+ const char *name, int offset, int size, int is_signed,
+ int filter_type)
+{
+ struct list_head *head;
+
+ if (WARN_ON(!call->class))
+ return 0;
+
+ head = trace_get_fields(call);
+ return __trace_define_field(head, type, name, offset, size,
+ is_signed, filter_type);
+}
EXPORT_SYMBOL_GPL(trace_define_field);
#define __common_field(type, item) \
- ret = trace_define_field(call, #type, "common_" #item, \
- offsetof(typeof(ent), item), \
- sizeof(ent.item), \
- is_signed_type(type), FILTER_OTHER); \
+ ret = __trace_define_field(&ftrace_common_fields, #type, \
+ "common_" #item, \
+ offsetof(typeof(ent), item), \
+ sizeof(ent.item), \
+ is_signed_type(type), FILTER_OTHER); \
if (ret) \
return ret;
-static int trace_define_common_fields(struct ftrace_event_call *call)
+static int trace_define_common_fields(void)
{
int ret;
struct trace_entry ent;
@@ -130,6 +141,55 @@ int trace_event_raw_init(struct ftrace_event_call *call)
}
EXPORT_SYMBOL_GPL(trace_event_raw_init);
+int ftrace_event_reg(struct ftrace_event_call *call, enum trace_reg type)
+{
+ switch (type) {
+ case TRACE_REG_REGISTER:
+ return tracepoint_probe_register(call->name,
+ call->class->probe,
+ call);
+ case TRACE_REG_UNREGISTER:
+ tracepoint_probe_unregister(call->name,
+ call->class->probe,
+ call);
+ return 0;
+
+#ifdef CONFIG_PERF_EVENTS
+ case TRACE_REG_PERF_REGISTER:
+ return tracepoint_probe_register(call->name,
+ call->class->perf_probe,
+ call);
+ case TRACE_REG_PERF_UNREGISTER:
+ tracepoint_probe_unregister(call->name,
+ call->class->perf_probe,
+ call);
+ return 0;
+#endif
+ }
+ return 0;
+}
+EXPORT_SYMBOL_GPL(ftrace_event_reg);
+
+void trace_event_enable_cmd_record(bool enable)
+{
+ struct ftrace_event_call *call;
+
+ mutex_lock(&event_mutex);
+ list_for_each_entry(call, &ftrace_events, list) {
+ if (!(call->flags & TRACE_EVENT_FL_ENABLED))
+ continue;
+
+ if (enable) {
+ tracing_start_cmdline_record();
+ call->flags |= TRACE_EVENT_FL_RECORDED_CMD;
+ } else {
+ tracing_stop_cmdline_record();
+ call->flags &= ~TRACE_EVENT_FL_RECORDED_CMD;
+ }
+ }
+ mutex_unlock(&event_mutex);
+}
+
static int ftrace_event_enable_disable(struct ftrace_event_call *call,
int enable)
{
@@ -139,24 +199,20 @@ static int ftrace_event_enable_disable(struct ftrace_event_call *call,
case 0:
if (call->flags & TRACE_EVENT_FL_ENABLED) {
call->flags &= ~TRACE_EVENT_FL_ENABLED;
- tracing_stop_cmdline_record();
- if (call->class->reg)
- call->class->reg(call, TRACE_REG_UNREGISTER);
- else
- tracepoint_probe_unregister(call->name,
- call->class->probe,
- call);
+ if (call->flags & TRACE_EVENT_FL_RECORDED_CMD) {
+ tracing_stop_cmdline_record();
+ call->flags &= ~TRACE_EVENT_FL_RECORDED_CMD;
+ }
+ call->class->reg(call, TRACE_REG_UNREGISTER);
}
break;
case 1:
if (!(call->flags & TRACE_EVENT_FL_ENABLED)) {
- tracing_start_cmdline_record();
- if (call->class->reg)
- ret = call->class->reg(call, TRACE_REG_REGISTER);
- else
- ret = tracepoint_probe_register(call->name,
- call->class->probe,
- call);
+ if (trace_flags & TRACE_ITER_RECORD_CMD) {
+ tracing_start_cmdline_record();
+ call->flags |= TRACE_EVENT_FL_RECORDED_CMD;
+ }
+ ret = call->class->reg(call, TRACE_REG_REGISTER);
if (ret) {
tracing_stop_cmdline_record();
pr_info("event trace: Could not enable event "
@@ -194,8 +250,7 @@ static int __ftrace_set_clr_event(const char *match, const char *sub,
mutex_lock(&event_mutex);
list_for_each_entry(call, &ftrace_events, list) {
- if (!call->name || !call->class ||
- (!call->class->probe && !call->class->reg))
+ if (!call->name || !call->class || !call->class->reg)
continue;
if (match &&
@@ -321,7 +376,7 @@ t_next(struct seq_file *m, void *v, loff_t *pos)
* The ftrace subsystem is for showing formats only.
* They can not be enabled or disabled via the event files.
*/
- if (call->class && (call->class->probe || call->class->reg))
+ if (call->class && call->class->reg)
return call;
}
@@ -474,8 +529,7 @@ system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
mutex_lock(&event_mutex);
list_for_each_entry(call, &ftrace_events, list) {
- if (!call->name || !call->class ||
- (!call->class->probe && !call->class->reg))
+ if (!call->name || !call->class || !call->class->reg)
continue;
if (system && strcmp(call->class->system, system) != 0)
@@ -544,32 +598,10 @@ out:
return ret;
}
-static ssize_t
-event_format_read(struct file *filp, char __user *ubuf, size_t cnt,
- loff_t *ppos)
+static void print_event_fields(struct trace_seq *s, struct list_head *head)
{
- struct ftrace_event_call *call = filp->private_data;
struct ftrace_event_field *field;
- struct list_head *head;
- struct trace_seq *s;
- int common_field_count = 5;
- char *buf;
- int r = 0;
-
- if (*ppos)
- return 0;
-
- s = kmalloc(sizeof(*s), GFP_KERNEL);
- if (!s)
- return -ENOMEM;
-
- trace_seq_init(s);
-
- trace_seq_printf(s, "name: %s\n", call->name);
- trace_seq_printf(s, "ID: %d\n", call->event.type);
- trace_seq_printf(s, "format:\n");
- head = trace_get_fields(call);
list_for_each_entry_reverse(field, head, link) {
/*
* Smartly shows the array type(except dynamic array).
@@ -584,29 +616,54 @@ event_format_read(struct file *filp, char __user *ubuf, size_t cnt,
array_descriptor = NULL;
if (!array_descriptor) {
- r = trace_seq_printf(s, "\tfield:%s %s;\toffset:%u;"
+ trace_seq_printf(s, "\tfield:%s %s;\toffset:%u;"
"\tsize:%u;\tsigned:%d;\n",
field->type, field->name, field->offset,
field->size, !!field->is_signed);
} else {
- r = trace_seq_printf(s, "\tfield:%.*s %s%s;\toffset:%u;"
+ trace_seq_printf(s, "\tfield:%.*s %s%s;\toffset:%u;"
"\tsize:%u;\tsigned:%d;\n",
(int)(array_descriptor - field->type),
field->type, field->name,
array_descriptor, field->offset,
field->size, !!field->is_signed);
}
+ }
+}
- if (--common_field_count == 0)
- r = trace_seq_printf(s, "\n");
+static ssize_t
+event_format_read(struct file *filp, char __user *ubuf, size_t cnt,
+ loff_t *ppos)
+{
+ struct ftrace_event_call *call = filp->private_data;
+ struct list_head *head;
+ struct trace_seq *s;
+ char *buf;
+ int r;
- if (!r)
- break;
- }
+ if (*ppos)
+ return 0;
+
+ s = kmalloc(sizeof(*s), GFP_KERNEL);
+ if (!s)
+ return -ENOMEM;
+
+ trace_seq_init(s);
+
+ trace_seq_printf(s, "name: %s\n", call->name);
+ trace_seq_printf(s, "ID: %d\n", call->event.type);
+ trace_seq_printf(s, "format:\n");
+
+ /* print common fields */
+ print_event_fields(s, &ftrace_common_fields);
- if (r)
- r = trace_seq_printf(s, "\nprint fmt: %s\n",
- call->print_fmt);
+ trace_seq_putc(s, '\n');
+
+ /* print event specific fields */
+ head = trace_get_fields(call);
+ print_event_fields(s, head);
+
+ r = trace_seq_printf(s, "\nprint fmt: %s\n", call->print_fmt);
if (!r) {
/*
@@ -963,35 +1020,31 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
return -1;
}
- if (call->class->probe || call->class->reg)
+ if (call->class->reg)
trace_create_file("enable", 0644, call->dir, call,
enable);
#ifdef CONFIG_PERF_EVENTS
- if (call->event.type && (call->class->perf_probe || call->class->reg))
+ if (call->event.type && call->class->reg)
trace_create_file("id", 0444, call->dir, call,
id);
#endif
- if (call->class->define_fields) {
- /*
- * Other events may have the same class. Only update
- * the fields if they are not already defined.
- */
- head = trace_get_fields(call);
- if (list_empty(head)) {
- ret = trace_define_common_fields(call);
- if (!ret)
- ret = call->class->define_fields(call);
- if (ret < 0) {
- pr_warning("Could not initialize trace point"
- " events/%s\n", call->name);
- return ret;
- }
+ /*
+ * Other events may have the same class. Only update
+ * the fields if they are not already defined.
+ */
+ head = trace_get_fields(call);
+ if (list_empty(head)) {
+ ret = call->class->define_fields(call);
+ if (ret < 0) {
+ pr_warning("Could not initialize trace point"
+ " events/%s\n", call->name);
+ return ret;
}
- trace_create_file("filter", 0644, call->dir, call,
- filter);
}
+ trace_create_file("filter", 0644, call->dir, call,
+ filter);
trace_create_file("format", 0444, call->dir, call,
format);
@@ -999,11 +1052,17 @@ event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
return 0;
}
-static int __trace_add_event_call(struct ftrace_event_call *call)
+static int
+__trace_add_event_call(struct ftrace_event_call *call, struct module *mod,
+ const struct file_operations *id,
+ const struct file_operations *enable,
+ const struct file_operations *filter,
+ const struct file_operations *format)
{
struct dentry *d_events;
int ret;
+ /* The linker may leave blanks */
if (!call->name)
return -EINVAL;
@@ -1011,8 +1070,8 @@ static int __trace_add_event_call(struct ftrace_event_call *call)
ret = call->class->raw_init(call);
if (ret < 0) {
if (ret != -ENOSYS)
- pr_warning("Could not initialize trace "
- "events/%s\n", call->name);
+ pr_warning("Could not initialize trace events/%s\n",
+ call->name);
return ret;
}
}
@@ -1021,11 +1080,10 @@ static int __trace_add_event_call(struct ftrace_event_call *call)
if (!d_events)
return -ENOENT;
- ret = event_create_dir(call, d_events, &ftrace_event_id_fops,
- &ftrace_enable_fops, &ftrace_event_filter_fops,
- &ftrace_event_format_fops);
+ ret = event_create_dir(call, d_events, id, enable, filter, format);
if (!ret)
list_add(&call->list, &ftrace_events);
+ call->mod = mod;
return ret;
}
@@ -1035,7 +1093,10 @@ int trace_add_event_call(struct ftrace_event_call *call)
{
int ret;
mutex_lock(&event_mutex);
- ret = __trace_add_event_call(call);
+ ret = __trace_add_event_call(call, NULL, &ftrace_event_id_fops,
+ &ftrace_enable_fops,
+ &ftrace_event_filter_fops,
+ &ftrace_event_format_fops);
mutex_unlock(&event_mutex);
return ret;
}
@@ -1152,8 +1213,6 @@ static void trace_module_add_events(struct module *mod)
{
struct ftrace_module_file_ops *file_ops = NULL;
struct ftrace_event_call *call, *start, *end;
- struct dentry *d_events;
- int ret;
start = mod->trace_events;
end = mod->trace_events + mod->num_trace_events;
@@ -1161,38 +1220,14 @@ static void trace_module_add_events(struct module *mod)
if (start == end)
return;
- d_events = event_trace_events_dir();
- if (!d_events)
+ file_ops = trace_create_file_ops(mod);
+ if (!file_ops)
return;
for_each_event(call, start, end) {
- /* The linker may leave blanks */
- if (!call->name)
- continue;
- if (call->class->raw_init) {
- ret = call->class->raw_init(call);
- if (ret < 0) {
- if (ret != -ENOSYS)
- pr_warning("Could not initialize trace "
- "point events/%s\n", call->name);
- continue;
- }
- }
- /*
- * This module has events, create file ops for this module
- * if not already done.
- */
- if (!file_ops) {
- file_ops = trace_create_file_ops(mod);
- if (!file_ops)
- return;
- }
- call->mod = mod;
- ret = event_create_dir(call, d_events,
+ __trace_add_event_call(call, mod,
&file_ops->id, &file_ops->enable,
&file_ops->filter, &file_ops->format);
- if (!ret)
- list_add(&call->list, &ftrace_events);
}
}
@@ -1319,25 +1354,14 @@ static __init int event_trace_init(void)
trace_create_file("enable", 0644, d_events,
NULL, &ftrace_system_enable_fops);
+ if (trace_define_common_fields())
+ pr_warning("tracing: Failed to allocate common fields");
+
for_each_event(call, __start_ftrace_events, __stop_ftrace_events) {
- /* The linker may leave blanks */
- if (!call->name)
- continue;
- if (call->class->raw_init) {
- ret = call->class->raw_init(call);
- if (ret < 0) {
- if (ret != -ENOSYS)
- pr_warning("Could not initialize trace "
- "point events/%s\n", call->name);
- continue;
- }
- }
- ret = event_create_dir(call, d_events, &ftrace_event_id_fops,
+ __trace_add_event_call(call, NULL, &ftrace_event_id_fops,
&ftrace_enable_fops,
&ftrace_event_filter_fops,
&ftrace_event_format_fops);
- if (!ret)
- list_add(&call->list, &ftrace_events);
}
while (true) {
@@ -1524,12 +1548,11 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip)
struct ftrace_entry *entry;
unsigned long flags;
long disabled;
- int resched;
int cpu;
int pc;
pc = preempt_count();
- resched = ftrace_preempt_disable();
+ preempt_disable_notrace();
cpu = raw_smp_processor_id();
disabled = atomic_inc_return(&per_cpu(ftrace_test_event_disable, cpu));
@@ -1551,7 +1574,7 @@ function_test_events_call(unsigned long ip, unsigned long parent_ip)
out:
atomic_dec(&per_cpu(ftrace_test_event_disable, cpu));
- ftrace_preempt_enable(resched);
+ preempt_enable_notrace();
}
static struct ftrace_ops trace_ops __initdata =
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 57bb1bb3299..36d40104b17 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -497,12 +497,10 @@ void print_subsystem_event_filter(struct event_subsystem *system,
}
static struct ftrace_event_field *
-find_event_field(struct ftrace_event_call *call, char *name)
+__find_event_field(struct list_head *head, char *name)
{
struct ftrace_event_field *field;
- struct list_head *head;
- head = trace_get_fields(call);
list_for_each_entry(field, head, link) {
if (!strcmp(field->name, name))
return field;
@@ -511,6 +509,20 @@ find_event_field(struct ftrace_event_call *call, char *name)
return NULL;
}
+static struct ftrace_event_field *
+find_event_field(struct ftrace_event_call *call, char *name)
+{
+ struct ftrace_event_field *field;
+ struct list_head *head;
+
+ field = __find_event_field(&ftrace_common_fields, name);
+ if (field)
+ return field;
+
+ head = trace_get_fields(call);
+ return __find_event_field(head, name);
+}
+
static void filter_free_pred(struct filter_pred *pred)
{
if (!pred)
@@ -627,9 +639,6 @@ static int init_subsystem_preds(struct event_subsystem *system)
int err;
list_for_each_entry(call, &ftrace_events, list) {
- if (!call->class || !call->class->define_fields)
- continue;
-
if (strcmp(call->class->system, system->name) != 0)
continue;
@@ -646,9 +655,6 @@ static void filter_free_subsystem_preds(struct event_subsystem *system)
struct ftrace_event_call *call;
list_for_each_entry(call, &ftrace_events, list) {
- if (!call->class || !call->class->define_fields)
- continue;
-
if (strcmp(call->class->system, system->name) != 0)
continue;
@@ -1251,9 +1257,6 @@ static int replace_system_preds(struct event_subsystem *system,
list_for_each_entry(call, &ftrace_events, list) {
struct event_filter *filter = call->filter;
- if (!call->class || !call->class->define_fields)
- continue;
-
if (strcmp(call->class->system, system->name) != 0)
continue;
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index 8536e2a6596..4ba44deaac2 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -125,12 +125,6 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call) \
#include "trace_entries.h"
-static int ftrace_raw_init_event(struct ftrace_event_call *call)
-{
- INIT_LIST_HEAD(&call->class->fields);
- return 0;
-}
-
#undef __entry
#define __entry REC
@@ -158,7 +152,7 @@ static int ftrace_raw_init_event(struct ftrace_event_call *call)
struct ftrace_event_class event_class_ftrace_##call = { \
.system = __stringify(TRACE_SYSTEM), \
.define_fields = ftrace_define_fields_##call, \
- .raw_init = ftrace_raw_init_event, \
+ .fields = LIST_HEAD_INIT(event_class_ftrace_##call.fields),\
}; \
\
struct ftrace_event_call __used \
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index b3f3776b0cd..16aee4d44e8 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -54,14 +54,14 @@ function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip)
struct trace_array_cpu *data;
unsigned long flags;
long disabled;
- int cpu, resched;
+ int cpu;
int pc;
if (unlikely(!ftrace_function_enabled))
return;
pc = preempt_count();
- resched = ftrace_preempt_disable();
+ preempt_disable_notrace();
local_save_flags(flags);
cpu = raw_smp_processor_id();
data = tr->data[cpu];
@@ -71,7 +71,7 @@ function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip)
trace_function(tr, ip, parent_ip, flags, pc);
atomic_dec(&data->disabled);
- ftrace_preempt_enable(resched);
+ preempt_enable_notrace();
}
static void
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c