aboutsummaryrefslogtreecommitdiff
path: root/lib/atomic64.c
diff options
context:
space:
mode:
Diffstat (limited to 'lib/atomic64.c')
-rw-r--r--lib/atomic64.c179
1 files changed, 179 insertions, 0 deletions
diff --git a/lib/atomic64.c b/lib/atomic64.c
new file mode 100644
index 00000000000..08a4f068e61
--- /dev/null
+++ b/lib/atomic64.c
@@ -0,0 +1,179 @@
+/*
+ * Generic implementation of 64-bit atomics using spinlocks,
+ * useful on processors that don't have 64-bit atomic instructions.
+ *
+ * Copyright © 2009 Paul Mackerras, IBM Corp. <paulus@au1.ibm.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+#include <linux/types.h>
+#include <linux/cache.h>
+#include <linux/spinlock.h>
+#include <linux/init.h>
+#include <linux/export.h>
+#include <linux/atomic.h>
+
+/*
+ * We use a hashed array of spinlocks to provide exclusive access
+ * to each atomic64_t variable. Since this is expected to used on
+ * systems with small numbers of CPUs (<= 4 or so), we use a
+ * relatively small array of 16 spinlocks to avoid wasting too much
+ * memory on the spinlock array.
+ */
+#define NR_LOCKS 16
+
+/*
+ * Ensure each lock is in a separate cacheline.
+ */
+static union {
+ raw_spinlock_t lock;
+ char pad[L1_CACHE_BYTES];
+} atomic64_lock[NR_LOCKS] __cacheline_aligned_in_smp = {
+ [0 ... (NR_LOCKS - 1)] = {
+ .lock = __RAW_SPIN_LOCK_UNLOCKED(atomic64_lock.lock),
+ },
+};
+
+static inline raw_spinlock_t *lock_addr(const atomic64_t *v)
+{
+ unsigned long addr = (unsigned long) v;
+
+ addr >>= L1_CACHE_SHIFT;
+ addr ^= (addr >> 8) ^ (addr >> 16);
+ return &atomic64_lock[addr & (NR_LOCKS - 1)].lock;
+}
+
+long long atomic64_read(const atomic64_t *v)
+{
+ unsigned long flags;
+ raw_spinlock_t *lock = lock_addr(v);
+ long long val;
+
+ raw_spin_lock_irqsave(lock, flags);
+ val = v->counter;
+ raw_spin_unlock_irqrestore(lock, flags);
+ return val;
+}
+EXPORT_SYMBOL(atomic64_read);
+
+void atomic64_set(atomic64_t *v, long long i)
+{
+ unsigned long flags;
+ raw_spinlock_t *lock = lock_addr(v);
+
+ raw_spin_lock_irqsave(lock, flags);
+ v->counter = i;
+ raw_spin_unlock_irqrestore(lock, flags);
+}
+EXPORT_SYMBOL(atomic64_set);
+
+void atomic64_add(long long a, atomic64_t *v)
+{
+ unsigned long flags;
+ raw_spinlock_t *lock = lock_addr(v);
+
+ raw_spin_lock_irqsave(lock, flags);
+ v->counter += a;
+ raw_spin_unlock_irqrestore(lock, flags);
+}
+EXPORT_SYMBOL(atomic64_add);
+
+long long atomic64_add_return(long long a, atomic64_t *v)
+{
+ unsigned long flags;
+ raw_spinlock_t *lock = lock_addr(v);
+ long long val;
+
+ raw_spin_lock_irqsave(lock, flags);
+ val = v->counter += a;
+ raw_spin_unlock_irqrestore(lock, flags);
+ return val;
+}
+EXPORT_SYMBOL(atomic64_add_return);
+
+void atomic64_sub(long long a, atomic64_t *v)
+{
+ unsigned long flags;
+ raw_spinlock_t *lock = lock_addr(v);
+
+ raw_spin_lock_irqsave(lock, flags);
+ v->counter -= a;
+ raw_spin_unlock_irqrestore(lock, flags);
+}
+EXPORT_SYMBOL(atomic64_sub);
+
+long long atomic64_sub_return(long long a, atomic64_t *v)
+{
+ unsigned long flags;
+ raw_spinlock_t *lock = lock_addr(v);
+ long long val;
+
+ raw_spin_lock_irqsave(lock, flags);
+ val = v->counter -= a;
+ raw_spin_unlock_irqrestore(lock, flags);
+ return val;
+}
+EXPORT_SYMBOL(atomic64_sub_return);
+
+long long atomic64_dec_if_positive(atomic64_t *v)
+{
+ unsigned long flags;
+ raw_spinlock_t *lock = lock_addr(v);
+ long long val;
+
+ raw_spin_lock_irqsave(lock, flags);
+ val = v->counter - 1;
+ if (val >= 0)
+ v->counter = val;
+ raw_spin_unlock_irqrestore(lock, flags);
+ return val;
+}
+EXPORT_SYMBOL(atomic64_dec_if_positive);
+
+long long atomic64_cmpxchg(atomic64_t *v, long long o, long long n)
+{
+ unsigned long flags;
+ raw_spinlock_t *lock = lock_addr(v);
+ long long val;
+
+ raw_spin_lock_irqsave(lock, flags);
+ val = v->counter;
+ if (val == o)
+ v->counter = n;
+ raw_spin_unlock_irqrestore(lock, flags);
+ return val;
+}
+EXPORT_SYMBOL(atomic64_cmpxchg);
+
+long long atomic64_xchg(atomic64_t *v, long long new)
+{
+ unsigned long flags;
+ raw_spinlock_t *lock = lock_addr(v);
+ long long val;
+
+ raw_spin_lock_irqsave(lock, flags);
+ val = v->counter;
+ v->counter = new;
+ raw_spin_unlock_irqrestore(lock, flags);
+ return val;
+}
+EXPORT_SYMBOL(atomic64_xchg);
+
+int atomic64_add_unless(atomic64_t *v, long long a, long long u)
+{
+ unsigned long flags;
+ raw_spinlock_t *lock = lock_addr(v);
+ int ret = 0;
+
+ raw_spin_lock_irqsave(lock, flags);
+ if (v->counter != u) {
+ v->counter += a;
+ ret = 1;
+ }
+ raw_spin_unlock_irqrestore(lock, flags);
+ return ret;
+}
+EXPORT_SYMBOL(atomic64_add_unless);
'width: 0.0%;'/> -rw-r--r--tools/lguest/Makefile7
-rw-r--r--tools/lguest/extract58
-rw-r--r--tools/lguest/lguest.c2072
-rw-r--r--tools/lguest/lguest.txt125
-rw-r--r--tools/lib/api/Makefile44
-rw-r--r--tools/lib/api/fs/debugfs.c100
-rw-r--r--tools/lib/api/fs/debugfs.h29
-rw-r--r--tools/lib/api/fs/fs.c165
-rw-r--r--tools/lib/api/fs/fs.h14
-rw-r--r--tools/lib/lockdep/Makefile243
-rw-r--r--tools/lib/lockdep/common.c33
-rw-r--r--tools/lib/lockdep/include/liblockdep/common.h50
-rw-r--r--tools/lib/lockdep/include/liblockdep/mutex.h70
-rw-r--r--tools/lib/lockdep/include/liblockdep/rwlock.h86
-rwxr-xr-xtools/lib/lockdep/lockdep3
-rw-r--r--tools/lib/lockdep/lockdep.c2
-rw-r--r--tools/lib/lockdep/lockdep_internals.h1
-rw-r--r--tools/lib/lockdep/lockdep_states.h1
-rw-r--r--tools/lib/lockdep/preload.c445
-rw-r--r--tools/lib/lockdep/rbtree.c1
-rwxr-xr-xtools/lib/lockdep/run_tests.sh27
-rw-r--r--tools/lib/lockdep/tests/AA.c13
-rw-r--r--tools/lib/lockdep/tests/ABBA.c13
-rw-r--r--tools/lib/lockdep/tests/ABBCCA.c15
-rw-r--r--tools/lib/lockdep/tests/ABBCCDDA.c17
-rw-r--r--tools/lib/lockdep/tests/ABCABC.c15
-rw-r--r--tools/lib/lockdep/tests/ABCDBCDA.c17
-rw-r--r--tools/lib/lockdep/tests/ABCDBDDA.c17
-rw-r--r--tools/lib/lockdep/tests/WW.c13
-rw-r--r--tools/lib/lockdep/tests/common.h12
-rw-r--r--tools/lib/lockdep/tests/unlock_balance.c12
-rw-r--r--tools/lib/lockdep/uinclude/asm/hash.h6
-rw-r--r--tools/lib/lockdep/uinclude/asm/hweight.h3
-rw-r--r--tools/lib/lockdep/uinclude/asm/sections.h3
-rw-r--r--tools/lib/lockdep/uinclude/linux/bitops.h3
-rw-r--r--tools/lib/lockdep/uinclude/linux/compiler.h7
-rw-r--r--tools/lib/lockdep/uinclude/linux/debug_locks.h12
-rw-r--r--tools/lib/lockdep/uinclude/linux/delay.h3
-rw-r--r--tools/lib/lockdep/uinclude/linux/ftrace.h3
-rw-r--r--tools/lib/lockdep/uinclude/linux/gfp.h3
-rw-r--r--tools/lib/lockdep/uinclude/linux/hardirq.h11
-rw-r--r--tools/lib/lockdep/uinclude/linux/hash.h1
-rw-r--r--tools/lib/lockdep/uinclude/linux/interrupt.h3
-rw-r--r--tools/lib/lockdep/uinclude/linux/irqflags.h38
-rw-r--r--tools/lib/lockdep/uinclude/linux/kallsyms.h32
-rw-r--r--tools/lib/lockdep/uinclude/linux/kern_levels.h25
-rw-r--r--tools/lib/lockdep/uinclude/linux/kernel.h44
-rw-r--r--tools/lib/lockdep/uinclude/linux/kmemcheck.h8
-rw-r--r--tools/lib/lockdep/uinclude/linux/linkage.h3
-rw-r--r--tools/lib/lockdep/uinclude/linux/list.h1
-rw-r--r--tools/lib/lockdep/uinclude/linux/lockdep.h58
-rw-r--r--tools/lib/lockdep/uinclude/linux/module.h6
-rw-r--r--tools/lib/lockdep/uinclude/linux/mutex.h3
-rw-r--r--tools/lib/lockdep/uinclude/linux/poison.h1
-rw-r--r--tools/lib/lockdep/uinclude/linux/prefetch.h6
-rw-r--r--tools/lib/lockdep/uinclude/linux/proc_fs.h3
-rw-r--r--tools/lib/lockdep/uinclude/linux/rbtree.h1
-rw-r--r--tools/lib/lockdep/uinclude/linux/rbtree_augmented.h2
-rw-r--r--tools/lib/lockdep/uinclude/linux/rcu.h21
-rw-r--r--tools/lib/lockdep/uinclude/linux/seq_file.h3
-rw-r--r--tools/lib/lockdep/uinclude/linux/spinlock.h25
-rw-r--r--tools/lib/lockdep/uinclude/linux/stacktrace.h32
-rw-r--r--tools/lib/lockdep/uinclude/linux/stringify.h7
-rw-r--r--tools/lib/lockdep/uinclude/trace/events/lock.h3
-rw-r--r--tools/lib/symbol/kallsyms.c58
-rw-r--r--tools/lib/symbol/kallsyms.h24
-rw-r--r--tools/lib/traceevent/.gitignore1
-rw-r--r--tools/lib/traceevent/Makefile340
-rw-r--r--tools/lib/traceevent/event-parse.c6025
-rw-r--r--tools/lib/traceevent/event-parse.h944
-rw-r--r--tools/lib/traceevent/event-plugin.c416
-rw-r--r--tools/lib/traceevent/event-utils.h81
-rw-r--r--tools/lib/traceevent/kbuffer-parse.c732
-rw-r--r--tools/lib/traceevent/kbuffer.h67
-rw-r--r--tools/lib/traceevent/parse-filter.c2432
-rw-r--r--tools/lib/traceevent/parse-utils.c85
-rw-r--r--tools/lib/traceevent/plugin_cfg80211.c30
-rw-r--r--tools/lib/traceevent/plugin_function.c194
-rw-r--r--tools/lib/traceevent/plugin_hrtimer.c88
-rw-r--r--tools/lib/traceevent/plugin_jbd2.c77
-rw-r--r--tools/lib/traceevent/plugin_kmem.c94
-rw-r--r--tools/lib/traceevent/plugin_kvm.c465
-rw-r--r--tools/lib/traceevent/plugin_mac80211.c102
-rw-r--r--tools/lib/traceevent/plugin_sched_switch.c160
-rw-r--r--tools/lib/traceevent/plugin_scsi.c429
-rw-r--r--tools/lib/traceevent/plugin_xen.c136
-rw-r--r--tools/lib/traceevent/trace-seq.c249
-rw-r--r--tools/net/Makefile34
-rw-r--r--tools/net/bpf_asm.c52
-rw-r--r--tools/net/bpf_dbg.c1395
-rw-r--r--tools/net/bpf_exp.l144
-rw-r--r--tools/net/bpf_exp.y771
-rw-r--r--tools/net/bpf_jit_disasm.c197
-rwxr-xr-xtools/nfsd/inject_fault.sh49
-rw-r--r--tools/perf/.gitignore26
-rw-r--r--tools/perf/CREDITS30
-rw-r--r--tools/perf/Documentation/Makefile343
-rw-r--r--tools/perf/Documentation/android.txt78
-rw-r--r--tools/perf/Documentation/asciidoc.conf91
-rw-r--r--tools/perf/Documentation/examples.txt225
-rw-r--r--tools/perf/Documentation/jit-interface.txt15
-rw-r--r--tools/perf/Documentation/manpage-1.72.xsl14
-rw-r--r--tools/perf/Documentation/manpage-base.xsl35
-rw-r--r--tools/perf/Documentation/manpage-bold-literal.xsl17
-rw-r--r--tools/perf/Documentation/manpage-normal.xsl13
-rw-r--r--tools/perf/Documentation/manpage-suppress-sp.xsl21
-rw-r--r--tools/perf/Documentation/perf-annotate.txt101
-rw-r--r--tools/perf/Documentation/perf-archive.txt22
-rw-r--r--tools/perf/Documentation/perf-bench.txt214
-rw-r--r--tools/perf/Documentation/perf-buildid-cache.txt53
-rw-r--r--tools/perf/Documentation/perf-buildid-list.txt43
-rw-r--r--tools/perf/Documentation/perf-diff.txt206
-rw-r--r--tools/perf/Documentation/perf-evlist.txt38
-rw-r--r--tools/perf/Documentation/perf-help.txt38
-rw-r--r--tools/perf/Documentation/perf-inject.txt46
-rw-r--r--tools/perf/Documentation/perf-kmem.txt47
-rw-r--r--tools/perf/Documentation/perf-kvm.txt156
-rw-r--r--tools/perf/Documentation/perf-list.txt122
-rw-r--r--tools/perf/Documentation/perf-lock.txt66
-rw-r--r--tools/perf/Documentation/perf-mem.txt52
-rw-r--r--tools/perf/Documentation/perf-probe.txt207
-rw-r--r--tools/perf/Documentation/perf-record.txt219
-rw-r--r--tools/perf/Documentation/perf-report.txt310
-rw-r--r--tools/perf/Documentation/perf-sched.txt55
-rw-r--r--tools/perf/Documentation/perf-script-perl.txt216
-rw-r--r--tools/perf/Documentation/perf-script-python.txt620
-rw-r--r--tools/perf/Documentation/perf-script.txt221
-rw-r--r--tools/perf/Documentation/perf-stat.txt165
-rw-r--r--tools/perf/Documentation/perf-test.txt32
-rw-r--r--tools/perf/Documentation/perf-timechart.txt92
-rw-r--r--tools/perf/Documentation/perf-top.txt231
-rw-r--r--tools/perf/Documentation/perf-trace.txt112
-rw-r--r--tools/perf/Documentation/perf.txt24
-rw-r--r--tools/perf/Documentation/perfconfig.example29
-rw-r--r--tools/perf/MANIFEST39
-rw-r--r--tools/perf/Makefile90
-rw-r--r--tools/perf/Makefile.perf938
-rw-r--r--tools/perf/arch/arm/Makefile14
-rw-r--r--tools/perf/arch/arm/include/perf_regs.h59
-rw-r--r--tools/perf/arch/arm/tests/dwarf-unwind.c60
-rw-r--r--tools/perf/arch/arm/tests/regs_load.S58
-rw-r--r--tools/perf/arch/arm/util/dwarf-regs.c64
-rw-r--r--tools/perf/arch/arm/util/unwind-libdw.c36
-rw-r--r--tools/perf/arch/arm/util/unwind-libunwind.c48
-rw-r--r--tools/perf/arch/arm64/Makefile7
-rw-r--r--tools/perf/arch/arm64/include/perf_regs.h88
-rw-r--r--tools/perf/arch/arm64/util/dwarf-regs.c80
-rw-r--r--tools/perf/arch/arm64/util/unwind-libunwind.c82
-rw-r--r--tools/perf/arch/common.c211
-rw-r--r--tools/perf/arch/common.h10
-rw-r--r--tools/perf/arch/powerpc/Makefile5
-rw-r--r--tools/perf/arch/powerpc/util/dwarf-regs.c88
-rw-r--r--tools/perf/arch/powerpc/util/header.c36
-rw-r--r--tools/perf/arch/s390/Makefile4
-rw-r--r--tools/perf/arch/s390/util/dwarf-regs.c22
-rw-r--r--tools/perf/arch/sh/Makefile4
-rw-r--r--tools/perf/arch/sh/util/dwarf-regs.c55
-rw-r--r--tools/perf/arch/sparc/Makefile4
-rw-r--r--tools/perf/arch/sparc/util/dwarf-regs.c43
-rw-r--r--tools/perf/arch/x86/Makefile17
-rw-r--r--tools/perf/arch/x86/include/perf_regs.h86
-rw-r--r--tools/perf/arch/x86/tests/dwarf-unwind.c60
-rw-r--r--tools/perf/arch/x86/tests/regs_load.S98
-rw-r--r--tools/perf/arch/x86/util/dwarf-regs.c75
-rw-r--r--tools/perf/arch/x86/util/header.c59
-rw-r--r--tools/perf/arch/x86/util/tsc.c59
-rw-r--r--tools/perf/arch/x86/util/tsc.h20
-rw-r--r--tools/perf/arch/x86/util/unwind-libdw.c51
-rw-r--r--tools/perf/arch/x86/util/unwind-libunwind.c111
-rw-r--r--tools/perf/bench/bench.h47
-rw-r--r--tools/perf/bench/futex-hash.c212
-rw-r--r--tools/perf/bench/futex-requeue.c211
-rw-r--r--tools/perf/bench/futex-wake.c201
-rw-r--r--tools/perf/bench/futex.h71
-rw-r--r--tools/perf/bench/mem-memcpy-arch.h12
-rw-r--r--tools/perf/bench/mem-memcpy-x86-64-asm-def.h12
-rw-r--r--tools/perf/bench/mem-memcpy-x86-64-asm.S12
-rw-r--r--tools/perf/bench/mem-memcpy.c305
-rw-r--r--tools/perf/bench/mem-memset-arch.h12
-rw-r--r--tools/perf/bench/mem-memset-x86-64-asm-def.h12
-rw-r--r--tools/perf/bench/mem-memset-x86-64-asm.S13
-rw-r--r--tools/perf/bench/mem-memset.c297
-rw-r--r--tools/perf/bench/numa.c1744
-rw-r--r--tools/perf/bench/sched-messaging.c336
-rw-r--r--tools/perf/bench/sched-pipe.c184
-rw-r--r--tools/perf/builtin-annotate.c377
-rw-r--r--tools/perf/builtin-bench.c282
-rw-r--r--tools/perf/builtin-buildid-cache.c396
-rw-r--r--tools/perf/builtin-buildid-list.c111
-rw-r--r--tools/perf/builtin-diff.c1164
-rw-r--r--tools/perf/builtin-evlist.c66
-rw-r--r--tools/perf/builtin-help.c479
-rw-r--r--tools/perf/builtin-inject.c463
-rw-r--r--tools/perf/builtin-kmem.c717
-rw-r--r--tools/perf/builtin-kvm.c1737
-rw-r--r--tools/perf/builtin-list.c77
-rw-r--r--tools/perf/builtin-lock.c1015
-rw-r--r--tools/perf/builtin-mem.c246
-rw-r--r--tools/perf/builtin-probe.c530
-rw-r--r--tools/perf/builtin-record.c960
-rw-r--r--tools/perf/builtin-report.c816
-rw-r--r--tools/perf/builtin-sched.c1777
-rw-r--r--tools/perf/builtin-script.c1826
-rw-r--r--tools/perf/builtin-stat.c1834
-rw-r--r--tools/perf/builtin-timechart.c1350
-rw-r--r--tools/perf/builtin-top.c1241
-rw-r--r--tools/perf/builtin-trace.c2397
-rw-r--r--tools/perf/builtin.h42
-rw-r--r--tools/perf/command-list.txt27
-rw-r--r--tools/perf/config/Makefile738
-rw-r--r--tools/perf/config/Makefile.arch23
-rw-r--r--tools/perf/config/feature-checks/.gitignore2
-rw-r--r--tools/perf/config/feature-checks/Makefile149
-rw-r--r--tools/perf/config/feature-checks/test-all.c116
-rw-r--r--tools/perf/config/feature-checks/test-backtrace.c13
-rw-r--r--tools/perf/config/feature-checks/test-bionic.c6
-rw-r--r--tools/perf/config/feature-checks/test-cplus-demangle.c14
-rw-r--r--tools/perf/config/feature-checks/test-dwarf.c10
-rw-r--r--tools/perf/config/feature-checks/test-fortify-source.c6
-rw-r--r--tools/perf/config/feature-checks/test-glibc.c8
-rw-r--r--tools/perf/config/feature-checks/test-gtk2-infobar.c11
-rw-r--r--tools/perf/config/feature-checks/test-gtk2.c10
-rw-r--r--tools/perf/config/feature-checks/test-hello.c6
-rw-r--r--tools/perf/config/feature-checks/test-libaudit.c10
-rw-r--r--tools/perf/config/feature-checks/test-libbfd.c15
-rw-r--r--tools/perf/config/feature-checks/test-libdw-dwarf-unwind.c13
-rw-r--r--tools/perf/config/feature-checks/test-libelf-getphdrnum.c8
-rw-r--r--tools/perf/config/feature-checks/test-libelf-mmap.c8
-rw-r--r--tools/perf/config/feature-checks/test-libelf.c8
-rw-r--r--tools/perf/config/feature-checks/test-libnuma.c9
-rw-r--r--tools/perf/config/feature-checks/test-libperl.c9
-rw-r--r--tools/perf/config/feature-checks/test-libpython-version.c10
-rw-r--r--tools/perf/config/feature-checks/test-libpython.c8
-rw-r--r--tools/perf/config/feature-checks/test-libslang.c6
-rw-r--r--tools/perf/config/feature-checks/test-libunwind-debug-frame.c16
-rw-r--r--tools/perf/config/feature-checks/test-libunwind.c27
-rw-r--r--tools/perf/config/feature-checks/test-stackprotector-all.c6
-rw-r--r--tools/perf/config/feature-checks/test-timerfd.c18
-rw-r--r--tools/perf/config/utilities.mak180
-rw-r--r--tools/perf/design.txt462
-rw-r--r--tools/perf/perf-archive.sh47
-rw-r--r--tools/perf/perf-completion.sh206
-rw-r--r--tools/perf/perf-sys.h190
-rw-r--r--tools/perf/perf.c556
-rw-r--r--tools/perf/perf.h68
-rwxr-xr-xtools/perf/python/twatch.py41
-rw-r--r--tools/perf/scripts/perl/Perf-Trace-Util/Context.c135
-rw-r--r--tools/perf/scripts/perl/Perf-Trace-Util/Context.xs42
-rw-r--r--tools/perf/scripts/perl/Perf-Trace-Util/Makefile.PL17
-rw-r--r--tools/perf/scripts/perl/Perf-Trace-Util/README59
-rw-r--r--tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Context.pm55
-rw-r--r--tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Core.pm192
-rw-r--r--tools/perf/scripts/perl/Perf-Trace-Util/lib/Perf/Trace/Util.pm94
-rw-r--r--tools/perf/scripts/perl/Perf-Trace-Util/typemap1
-rw-r--r--tools/perf/scripts/perl/bin/check-perf-trace-record2
-rw-r--r--tools/perf/scripts/perl/bin/failed-syscalls-record2
-rw-r--r--tools/perf/scripts/perl/bin/failed-syscalls-report10
-rw-r--r--tools/perf/scripts/perl/bin/rw-by-file-record3
-rw-r--r--tools/perf/scripts/perl/bin/rw-by-file-report10
-rw-r--r--tools/perf/scripts/perl/bin/rw-by-pid-record2
-rw-r--r--tools/perf/scripts/perl/bin/rw-by-pid-report3
-rw-r--r--tools/perf/scripts/perl/bin/rwtop-record2
-rw-r--r--tools/perf/scripts/perl/bin/rwtop-report20
-rw-r--r--tools/perf/scripts/perl/bin/wakeup-latency-record6
-rw-r--r--tools/perf/scripts/perl/bin/wakeup-latency-report3
-rw-r--r--tools/perf/scripts/perl/check-perf-trace.pl106
-rw-r--r--tools/perf/scripts/perl/failed-syscalls.pl42
-rw-r--r--tools/perf/scripts/perl/rw-by-file.pl106
-rw-r--r--tools/perf/scripts/perl/rw-by-pid.pl184
-rw-r--r--tools/perf/scripts/perl/rwtop.pl203
-rw-r--r--tools/perf/scripts/perl/wakeup-latency.pl107
-rw-r--r--tools/perf/scripts/python/Perf-Trace-Util/Context.c88
-rw-r--r--tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Core.py121
-rwxr-xr-xtools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/EventClass.py94
-rw-r--r--tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/SchedGui.py184
-rw-r--r--tools/perf/scripts/python/Perf-Trace-Util/lib/Perf/Trace/Util.py86
-rw-r--r--tools/perf/scripts/python/bin/event_analyzing_sample-record8
-rw-r--r--tools/perf/scripts/python/bin/event_analyzing_sample-report3
-rw-r--r--tools/perf/scripts/python/bin/failed-syscalls-by-pid-record2
-rw-r--r--tools/perf/scripts/python/bin/failed-syscalls-by-pid-report10
-rw-r--r--tools/perf/scripts/python/bin/futex-contention-record2
-rw-r--r--tools/perf/scripts/python/bin/futex-contention-report4
-rwxr-xr-xtools/perf/scripts/python/bin/net_dropmonitor-record2
-rwxr-xr-xtools/perf/scripts/python/bin/net_dropmonitor-report4
-rw-r--r--tools/perf/scripts/python/bin/netdev-times-record8
-rw-r--r--tools/perf/scripts/python/bin/netdev-times-report5
-rw-r--r--tools/perf/scripts/python/bin/sched-migration-record2
-rw-r--r--tools/perf/scripts/python/bin/sched-migration-report3
-rw-r--r--tools/perf/scripts/python/bin/sctop-record2
-rw-r--r--tools/perf/scripts/python/bin/sctop-report24
-rw-r--r--tools/perf/scripts/python/bin/syscall-counts-by-pid-record2
-rw-r--r--tools/perf/scripts/python/bin/syscall-counts-by-pid-report10
-rw-r--r--tools/perf/scripts/python/bin/syscall-counts-record2
-rw-r--r--tools/perf/scripts/python/bin/syscall-counts-report10
-rw-r--r--tools/perf/scripts/python/check-perf-trace.py82
-rw-r--r--tools/perf/scripts/python/event_analyzing_sample.py189
-rw-r--r--tools/perf/scripts/python/failed-syscalls-by-pid.py73
-rw-r--r--tools/perf/scripts/python/futex-contention.py50
-rwxr-xr-xtools/perf/scripts/python/net_dropmonitor.py75
-rw-r--r--tools/perf/scripts/python/netdev-times.py464
-rw-r--r--tools/perf/scripts/python/sched-migration.py461
-rw-r--r--tools/perf/scripts/python/sctop.py75
-rw-r--r--tools/perf/scripts/python/syscall-counts-by-pid.py69
-rw-r--r--tools/perf/scripts/python/syscall-counts.py59
-rw-r--r--tools/perf/tests/attr.c176
-rw-r--r--tools/perf/tests/attr.py332
-rw-r--r--tools/perf/tests/attr/README64
-rw-r--r--tools/perf/tests/attr/base-record40
-rw-r--r--tools/perf/tests/attr/base-stat40
-rw-r--r--tools/perf/tests/attr/test-record-C013
-rw-r--r--tools/perf/tests/attr/test-record-basic5
-rw-r--r--tools/perf/tests/attr/test-record-branch-any8
-rw-r--r--tools/perf/tests/attr/test-record-branch-filter-any8
-rw-r--r--tools/perf/tests/attr/test-record-branch-filter-any_call8
-rw-r--r--tools/perf/tests/attr/test-record-branch-filter-any_ret8
-rw-r--r--tools/perf/tests/attr/test-record-branch-filter-hv8
-rw-r--r--tools/perf/tests/attr/test-record-branch-filter-ind_call8
-rw-r--r--tools/perf/tests/attr/test-record-branch-filter-k8
-rw-r--r--tools/perf/tests/attr/test-record-branch-filter-u8
-rw-r--r--tools/perf/tests/attr/test-record-count8
-rw-r--r--tools/perf/tests/attr/test-record-data11
-rw-r--r--tools/perf/tests/attr/test-record-freq6
-rw-r--r--tools/perf/tests/attr/test-record-graph-default6
-rw-r--r--tools/perf/tests/attr/test-record-graph-dwarf10
-rw-r--r--tools/perf/tests/attr/test-record-graph-fp6
-rw-r--r--tools/perf/tests/attr/test-record-group20
-rw-r--r--tools/perf/tests/attr/test-record-group-sampling36
-rw-r--r--tools/perf/tests/attr/test-record-group121
-rw-r--r--tools/perf/tests/attr/test-record-no-delay9
-rw-r--r--tools/perf/tests/attr/test-record-no-inherit7
-rw-r--r--tools/perf/tests/attr/test-record-no-samples6
-rw-r--r--tools/perf/tests/attr/test-record-period7
-rw-r--r--tools/perf/tests/attr/test-record-raw7
-rw-r--r--tools/perf/tests/attr/test-stat-C09
-rw-r--r--tools/perf/tests/attr/test-stat-basic6
-rw-r--r--tools/perf/tests/attr/test-stat-default64
-rw-r--r--tools/perf/tests/attr/test-stat-detailed-1101
-rw-r--r--tools/perf/tests/attr/test-stat-detailed-2155
-rw-r--r--tools/perf/tests/attr/test-stat-detailed-3173
-rw-r--r--tools/perf/tests/attr/test-stat-group15
-rw-r--r--tools/perf/tests/attr/test-stat-group115
-rw-r--r--tools/perf/tests/attr/test-stat-no-inherit7
-rw-r--r--tools/perf/tests/bp_signal.c192
-rw-r--r--tools/perf/tests/bp_signal_overflow.c132
-rw-r--r--tools/perf/tests/builtin-test.c307
-rw-r--r--tools/perf/tests/code-reading.c581
-rw-r--r--tools/perf/tests/dso-data.c358
-rw-r--r--tools/perf/tests/dwarf-unwind.c144
-rw-r--r--tools/perf/tests/evsel-roundtrip-name.c114
-rw-r--r--tools/perf/tests/evsel-tp-sched.c81
-rw-r--r--tools/perf/tests/hists_common.c209
-rw-r--r--tools/perf/tests/hists_common.h75
-rw-r--r--tools/perf/tests/hists_cumulate.c726
-rw-r--r--tools/perf/tests/hists_filter.c289
-rw-r--r--tools/perf/tests/hists_link.c339
-rw-r--r--tools/perf/tests/hists_output.c621
-rw-r--r--tools/perf/tests/keep-tracking.c152
-rw-r--r--tools/perf/tests/make233
-rw-r--r--tools/perf/tests/mmap-basic.c148
-rw-r--r--tools/perf/tests/mmap-thread-lookup.c233
-rw-r--r--tools/perf/tests/open-syscall-all-cpus.c109
-rw-r--r--tools/perf/tests/open-syscall-tp-fields.c117
-rw-r--r--tools/perf/tests/open-syscall.c55
-rw-r--r--tools/perf/tests/parse-events.c1590
-rw-r--r--tools/perf/tests/parse-no-sample-id-all.c108
-rw-r--r--tools/perf/tests/perf-record.c309
-rwxr-xr-xtools/perf/tests/perf-targz-src-pkg21
-rw-r--r--tools/perf/tests/perf-time-to-tsc.c172
-rw-r--r--tools/perf/tests/pmu.c173
-rw-r--r--tools/perf/tests/python-use.c23
-rw-r--r--tools/perf/tests/rdpmc.c173
-rw-r--r--tools/perf/tests/sample-parsing.c320
-rw-r--r--tools/perf/tests/sw-clock.c122
-rw-r--r--tools/perf/tests/task-exit.c118
-rw-r--r--tools/perf/tests/tests.h60
-rw-r--r--tools/perf/tests/thread-mg-share.c90
-rw-r--r--tools/perf/tests/vmlinux-kallsyms.c242
-rw-r--r--tools/perf/ui/browser.c719
-rw-r--r--tools/perf/ui/browser.h74
-rw-r--r--tools/perf/ui/browsers/annotate.c1023
-rw-r--r--tools/perf/ui/browsers/header.c127
-rw-r--r--tools/perf/ui/browsers/hists.c2007
-rw-r--r--tools/perf/ui/browsers/map.c130
-rw-r--r--tools/perf/ui/browsers/map.h6
-rw-r--r--tools/perf/ui/browsers/scripts.c187
-rw-r--r--tools/perf/ui/gtk/annotate.c252
-rw-r--r--tools/perf/ui/gtk/browser.c87
-rw-r--r--tools/perf/ui/gtk/gtk.h67
-rw-r--r--tools/perf/ui/gtk/helpline.c57
-rw-r--r--tools/perf/ui/gtk/hists.c365
-rw-r--r--tools/perf/ui/gtk/progress.c59
-rw-r--r--tools/perf/ui/gtk/setup.c23
-rw-r--r--tools/perf/ui/gtk/util.c112
-rw-r--r--tools/perf/ui/helpline.c73
-rw-r--r--tools/perf/ui/helpline.h29
-rw-r--r--tools/perf/ui/hist.c624
-rw-r--r--tools/perf/ui/keysyms.h28
-rw-r--r--tools/perf/ui/libslang.h29
-rw-r--r--tools/perf/ui/progress.c38
-rw-r--r--tools/perf/ui/progress.h23
-rw-r--r--tools/perf/ui/setup.c107
-rw-r--r--tools/perf/ui/stdio/hist.c514
-rw-r--r--tools/perf/ui/tui/helpline.c58
-rw-r--r--tools/perf/ui/tui/progress.c44
-rw-r--r--tools/perf/ui/tui/setup.c151
-rw-r--r--tools/perf/ui/tui/tui.h6
-rw-r--r--tools/perf/ui/tui/util.c256
-rw-r--r--tools/perf/ui/ui.h29
-rw-r--r--tools/perf/ui/util.c84
-rw-r--r--tools/perf/ui/util.h21
-rwxr-xr-xtools/perf/util/PERF-VERSION-GEN50
-rw-r--r--tools/perf/util/abspath.c37
-rw-r--r--tools/perf/util/alias.c76
-rw-r--r--tools/perf/util/annotate.c1412
-rw-r--r--tools/perf/util/annotate.h177
-rw-r--r--tools/perf/util/bitmap.c31
-rw-r--r--tools/perf/util/build-id.c108
-rw-r--r--tools/perf/util/build-id.h18
-rw-r--r--tools/perf/util/cache.h76
-rw-r--r--tools/perf/util/callchain.c674
-rw-r--r--tools/perf/util/callchain.h179
-rw-r--r--tools/perf/util/cgroup.c177
-rw-r--r--tools/perf/util/cgroup.h17
-rw-r--r--tools/perf/util/color.c337
-rw-r--r--tools/perf/util/color.h47
-rw-r--r--tools/perf/util/comm.c122
-rw-r--r--tools/perf/util/comm.h21
-rw-r--r--tools/perf/util/config.c513
-rw-r--r--tools/perf/util/cpumap.c479
-rw-r--r--tools/perf/util/cpumap.h84
-rw-r--r--tools/perf/util/ctype.c39
-rw-r--r--tools/perf/util/data.c133
-rw-r--r--tools/perf/util/data.h50
-rw-r--r--tools/perf/util/debug.c107
-rw-r--r--tools/perf/util/debug.h22
-rw-r--r--tools/perf/util/dso.c900
-rw-r--r--tools/perf/util/dso.h232
-rw-r--r--tools/perf/util/dwarf-aux.c884
-rw-r--r--tools/perf/util/dwarf-aux.h118
-rw-r--r--tools/perf/util/environment.c9
-rw-r--r--tools/perf/util/event.c868
-rw-r--r--tools/perf/util/event.h316
-rw-r--r--tools/perf/util/evlist.c1245
-rw-r--r--tools/perf/util/evlist.h265
-rw-r--r--tools/perf/util/evsel.c2036
-rw-r--r--tools/perf/util/evsel.h365
-rw-r--r--tools/perf/util/exec_cmd.c148
-rw-r--r--tools/perf/util/exec_cmd.h12
-rwxr-xr-xtools/perf/util/generate-cmdlist.sh39
-rw-r--r--tools/perf/util/header.c3071
-rw-r--r--tools/perf/util/header.h159
-rw-r--r--tools/perf/util/help.c339
-rw-r--r--tools/perf/util/help.h29
-rw-r--r--tools/perf/util/hist.c1414
-rw-r--r--tools/perf/util/hist.h347
-rw-r--r--tools/perf/util/hweight.c31
-rw-r--r--tools/perf/util/include/asm/alternative-asm.h8
-rw-r--r--tools/perf/util/include/asm/asm-offsets.h1
-rw-r--r--tools/perf/util/include/asm/byteorder.h2
-rw-r--r--tools/perf/util/include/asm/cpufeature.h9
-rw-r--r--tools/perf/util/include/asm/dwarf2.h13
-rw-r--r--tools/perf/util/include/asm/hash.h6
-rw-r--r--tools/perf/util/include/asm/hweight.h8
-rw-r--r--tools/perf/util/include/asm/swab.h1
-rw-r--r--tools/perf/util/include/asm/system.h1
-rw-r--r--tools/perf/util/include/asm/uaccess.h14
-rw-r--r--tools/perf/util/include/asm/unistd_32.h1
-rw-r--r--tools/perf/util/include/asm/unistd_64.h1
-rw-r--r--tools/perf/util/include/dwarf-regs.h8
-rw-r--r--tools/perf/util/include/linux/bitmap.h49
-rw-r--r--tools/perf/util/include/linux/bitops.h160
-rw-r--r--tools/perf/util/include/linux/const.h1
-rw-r--r--tools/perf/util/include/linux/ctype.h1
-rw-r--r--tools/perf/util/include/linux/kernel.h128
-rw-r--r--tools/perf/util/include/linux/linkage.h13
-rw-r--r--tools/perf/util/include/linux/list.h29
-rw-r--r--tools/perf/util/include/linux/poison.h1
-rw-r--r--tools/perf/util/include/linux/rbtree.h2
-rw-r--r--tools/perf/util/include/linux/rbtree_augmented.h2
-rw-r--r--tools/perf/util/include/linux/string.h4
-rw-r--r--tools/perf/util/intlist.c146
-rw-r--r--tools/perf/util/intlist.h77
-rw-r--r--tools/perf/util/levenshtein.c84
-rw-r--r--tools/perf/util/levenshtein.h8
-rw-r--r--tools/perf/util/machine.c1422
-rw-r--r--tools/perf/util/machine.h194
-rw-r--r--tools/perf/util/map.c764
-rw-r--r--tools/perf/util/map.h220
-rw-r--r--tools/perf/util/pager.c100
-rw-r--r--tools/perf/util/parse-events.c1354
-rw-r--r--tools/perf/util/parse-events.h112
-rw-r--r--tools/perf/util/parse-events.l218
-rw-r--r--tools/perf/util/parse-events.y454
-rw-r--r--tools/perf/util/parse-options.c681
-rw-r--r--tools/perf/util/parse-options.h209
-rw-r--r--tools/perf/util/path.c161
-rw-r--r--tools/perf/util/perf_regs.c27
-rw-r--r--tools/perf/util/perf_regs.h29
-rw-r--r--tools/perf/util/pmu.c796
-rw-r--r--tools/perf/util/pmu.h49
-rw-r--r--tools/perf/util/pmu.l43
-rw-r--r--tools/perf/util/pmu.y92
-rw-r--r--tools/perf/util/probe-event.c2573
-rw-r--r--tools/perf/util/probe-event.h141
-rw-r--r--tools/perf/util/probe-finder.c1603
-rw-r--r--tools/perf/util/probe-finder.h112
-rw-r--r--tools/perf/util/pstack.c75
-rw-r--r--tools/perf/util/pstack.h14
-rw-r--r--tools/perf/util/python-ext-sources22
-rw-r--r--tools/perf/util/python.c1074
-rw-r--r--tools/perf/util/quote.c54
-rw-r--r--tools/perf/util/quote.h29
-rw-r--r--tools/perf/util/rblist.c128
-rw-r--r--tools/perf/util/rblist.h48
-rw-r--r--tools/perf/util/record.c215
-rw-r--r--tools/perf/util/run-command.c214
-rw-r--r--tools/perf/util/run-command.h58
-rw-r--r--tools/perf/util/scripting-engines/trace-event-perl.c638
-rw-r--r--tools/perf/util/scripting-engines/trace-event-python.c698
-rw-r--r--tools/perf/util/session.c1673
-rw-r--r--tools/perf/util/session.h129
-rw-r--r--tools/perf/util/setup.py48
-rw-r--r--tools/perf/util/sigchain.c52
-rw-r--r--tools/perf/util/sigchain.h10
-rw-r--r--tools/perf/util/sort.c1706
-rw-r--r--tools/perf/util/sort.h221
-rw-r--r--tools/perf/util/srcline.c299
-rw-r--r--tools/perf/util/stat.c63
-rw-r--r--tools/perf/util/stat.h25
-rw-r--r--tools/perf/util/strbuf.c134
-rw-r--r--tools/perf/util/strbuf.h92
-rw-r--r--tools/perf/util/strfilter.c199
-rw-r--r--tools/perf/util/strfilter.h48
-rw-r--r--tools/perf/util/string.c413
-rw-r--r--tools/perf/util/strlist.c173
-rw-r--r--tools/perf/util/strlist.h79
-rw-r--r--tools/perf/util/svghelper.c716
-rw-r--r--tools/perf/util/svghelper.h33
-rw-r--r--tools/perf/util/symbol-elf.c1625
-rw-r--r--tools/perf/util/symbol-minimal.c330
-rw-r--r--tools/perf/util/symbol.c1880
-rw-r--r--tools/perf/util/symbol.h297
-rw-r--r--tools/perf/util/target.c158
-rw-r--r--tools/perf/util/target.h79
-rw-r--r--tools/perf/util/thread.c193
-rw-r--r--tools/perf/util/thread.h81
-rw-r--r--tools/perf/util/thread_map.c294
-rw-r--r--tools/perf/util/thread_map.h29
-rw-r--r--tools/perf/util/tool.h47
-rw-r--r--tools/perf/util/top.c117
-rw-r--r--tools/perf/util/top.h47
-rw-r--r--tools/perf/util/trace-event-info.c600
-rw-r--r--tools/perf/util/trace-event-parse.c263
-rw-r--r--tools/perf/util/trace-event-read.c444
-rw-r--r--tools/perf/util/trace-event-scripting.c171
-rw-r--r--tools/perf/util/trace-event.c82
-rw-r--r--tools/perf/util/trace-event.h90
-rw-r--r--tools/perf/util/unwind-libdw.c210
-rw-r--r--tools/perf/util/unwind-libdw.h21
-rw-r--r--tools/perf/util/unwind-libunwind.c579
-rw-r--r--tools/perf/util/unwind.h37
-rw-r--r--tools/perf/util/usage.c84
-rw-r--r--tools/perf/util/util.c542
-rw-r--r--tools/perf/util/util.h333
-rw-r--r--tools/perf/util/values.c232
-rw-r--r--tools/perf/util/values.h27
-rw-r--r--tools/perf/util/vdso.c111
-rw-r--r--tools/perf/util/vdso.h18
-rw-r--r--tools/perf/util/wrapper.c41
-rw-r--r--tools/perf/util/xyarray.c20
-rw-r--r--tools/perf/util/xyarray.h20
-rw-r--r--tools/power/acpi/Makefile156
-rw-r--r--tools/power/acpi/common/cmfsize.c101
-rw-r--r--tools/power/acpi/common/getopt.c239
-rw-r--r--tools/power/acpi/man/acpidump.8120
-rw-r--r--tools/power/acpi/os_specific/service_layers/oslinuxtbl.c1329
-rw-r--r--tools/power/acpi/os_specific/service_layers/osunixdir.c204
-rw-r--r--tools/power/acpi/os_specific/service_layers/osunixmap.c151
-rw-r--r--tools/power/acpi/tools/acpidump/acpidump.h130
-rw-r--r--tools/power/acpi/tools/acpidump/apdump.c451
-rw-r--r--tools/power/acpi/tools/acpidump/apfiles.c228
-rw-r--r--tools/power/acpi/tools/acpidump/apmain.c351
-rw-r--r--tools/power/acpi/tools/ec/Makefile22
-rw-r--r--tools/power/acpi/tools/ec/ec_access.c238
-rw-r--r--tools/power/cpupower/.gitignore29
-rw-r--r--tools/power/cpupower/Makefile310
-rw-r--r--tools/power/cpupower/README47
-rw-r--r--tools/power/cpupower/ToDo10
-rw-r--r--tools/power/cpupower/bench/Makefile36
-rw-r--r--tools/power/cpupower/bench/README-BENCH124
-rw-r--r--tools/power/cpupower/bench/benchmark.c194
-rw-r--r--tools/power/cpupower/bench/benchmark.h29
-rw-r--r--tools/power/cpupower/bench/config.h36
-rw-r--r--tools/power/cpupower/bench/cpufreq-bench_plot.sh104
-rw-r--r--tools/power/cpupower/bench/cpufreq-bench_script.sh101
-rw-r--r--tools/power/cpupower/bench/example.cfg11
-rw-r--r--tools/power/cpupower/bench/main.c202
-rw-r--r--tools/power/cpupower/bench/parse.c225
-rw-r--r--tools/power/cpupower/bench/parse.h53
-rw-r--r--tools/power/cpupower/bench/system.c191
-rw-r--r--tools/power/cpupower/bench/system.h29
-rw-r--r--tools/power/cpupower/debug/i386/Makefile41
-rw-r--r--tools/power/cpupower/debug/i386/centrino-decode.c113
-rw-r--r--tools/power/cpupower/debug/i386/dump_psb.c196
-rw-r--r--tools/power/cpupower/debug/i386/intel_gsic.c78
-rw-r--r--tools/power/cpupower/debug/i386/powernow-k8-decode.c96
-rw-r--r--tools/power/cpupower/debug/kernel/Makefile23
-rw-r--r--tools/power/cpupower/debug/kernel/cpufreq-test_tsc.c110
-rw-r--r--tools/power/cpupower/debug/x86_64/Makefile30
-rw-r--r--tools/power/cpupower/lib/cpufreq.c208
-rw-r--r--tools/power/cpupower/lib/cpufreq.h223
-rw-r--r--tools/power/cpupower/lib/sysfs.c672
-rw-r--r--tools/power/cpupower/lib/sysfs.h31
-rw-r--r--tools/power/cpupower/man/cpupower-frequency-info.177
-rw-r--r--tools/power/cpupower/man/cpupower-frequency-set.152
-rw-r--r--tools/power/cpupower/man/cpupower-idle-info.191
-rw-r--r--tools/power/cpupower/man/cpupower-idle-set.177
-rw-r--r--tools/power/cpupower/man/cpupower-info.119
-rw-r--r--tools/power/cpupower/man/cpupower-monitor.1198
-rw-r--r--tools/power/cpupower/man/cpupower-set.165
-rw-r--r--tools/power/cpupower/man/cpupower.172
-rw-r--r--tools/power/cpupower/po/cs.po944
-rw-r--r--tools/power/cpupower/po/de.po961
-rw-r--r--tools/power/cpupower/po/fr.po947
-rw-r--r--tools/power/cpupower/po/it.po961
-rw-r--r--tools/power/cpupower/po/pt.po957
-rw-r--r--tools/power/cpupower/utils/builtin.h12
-rw-r--r--tools/power/cpupower/utils/cpufreq-info.c698
-rw-r--r--tools/power/cpupower/utils/cpufreq-set.c331
-rw-r--r--tools/power/cpupower/utils/cpuidle-info.c208
-rw-r--r--tools/power/cpupower/utils/cpuidle-set.c181
-rw-r--r--tools/power/cpupower/utils/cpupower-info.c103
-rw-r--r--tools/power/cpupower/utils/cpupower-set.c95
-rw-r--r--tools/power/cpupower/utils/cpupower.c229
-rw-r--r--tools/power/cpupower/utils/helpers/amd.c135
-rw-r--r--tools/power/cpupower/utils/helpers/bitmask.c292
-rw-r--r--tools/power/cpupower/utils/helpers/bitmask.h33
-rw-r--r--tools/power/cpupower/utils/helpers/cpuid.c178
-rw-r--r--tools/power/cpupower/utils/helpers/helpers.h195
-rw-r--r--tools/power/cpupower/utils/helpers/misc.c27
-rw-r--r--tools/power/cpupower/utils/helpers/msr.c115
-rw-r--r--tools/power/cpupower/utils/helpers/pci.c55
-rw-r--r--tools/power/cpupower/utils/helpers/sysfs.c472
-rw-r--r--tools/power/cpupower/utils/helpers/sysfs.h38
-rw-r--r--tools/power/cpupower/utils/helpers/topology.c116
-rw-r--r--tools/power/cpupower/utils/idle_monitor/amd_fam14h_idle.c335
-rw-r--r--tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c196
-rw-r--r--tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c455
-rw-r--r--tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h85
-rw-r--r--tools/power/cpupower/utils/idle_monitor/hsw_ext_idle.c196
-rw-r--r--tools/power/cpupower/utils/idle_monitor/idle_monitors.def8
-rw-r--r--tools/power/cpupower/utils/idle_monitor/idle_monitors.h18
-rw-r--r--tools/power/cpupower/utils/idle_monitor/mperf_monitor.c338
-rw-r--r--tools/power/cpupower/utils/idle_monitor/nhm_idle.c216
-rw-r--r--tools/power/cpupower/utils/idle_monitor/snb_idle.c200
-rwxr-xr-xtools/power/cpupower/utils/version-gen.sh35
-rw-r--r--tools/power/x86/turbostat/.gitignore1
-rw-r--r--tools/power/x86/turbostat/Makefile22
-rw-r--r--tools/power/x86/turbostat/turbostat.8221
-rw-r--r--tools/power/x86/turbostat/turbostat.c2462
-rw-r--r--tools/power/x86/x86_energy_perf_policy/Makefile10
-rw-r--r--tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.8104
-rw-r--r--tools/power/x86/x86_energy_perf_policy/x86_energy_perf_policy.c324
-rw-r--r--tools/scripts/Makefile.include84
-rw-r--r--tools/testing/fault-injection/failcmd.sh219
-rwxr-xr-xtools/testing/ktest/compare-ktest-sample.pl32
-rw-r--r--tools/testing/ktest/examples/README32
-rw-r--r--tools/testing/ktest/examples/crosstests.conf254
-rw-r--r--tools/testing/ktest/examples/include/bisect.conf90
-rw-r--r--tools/testing/ktest/examples/include/defaults.conf157
-rw-r--r--tools/testing/ktest/examples/include/min-config.conf60
-rw-r--r--tools/testing/ktest/examples/include/patchcheck.conf111
-rw-r--r--tools/testing/ktest/examples/include/tests.conf74
-rw-r--r--tools/testing/ktest/examples/kvm.conf92
-rw-r--r--tools/testing/ktest/examples/snowball.conf53
-rw-r--r--tools/testing/ktest/examples/test.conf62
-rwxr-xr-xtools/testing/ktest/ktest.pl4181
-rw-r--r--tools/testing/ktest/sample.conf1289
-rw-r--r--tools/testing/selftests/Makefile28
-rw-r--r--tools/testing/selftests/README.txt42
-rw-r--r--tools/testing/selftests/breakpoints/Makefile23
-rw-r--r--tools/testing/selftests/breakpoints/breakpoint_test.c394
-rw-r--r--tools/testing/selftests/cpu-hotplug/Makefile6
-rw-r--r--tools/testing/selftests/cpu-hotplug/on-off-test.sh221
-rw-r--r--tools/testing/selftests/efivarfs/Makefile12
-rw-r--r--tools/testing/selftests/efivarfs/create-read.c38
-rw-r--r--tools/testing/selftests/efivarfs/efivarfs.sh198
-rw-r--r--tools/testing/selftests/efivarfs/open-unlink.c63
-rw-r--r--tools/testing/selftests/ipc/Makefile25
-rw-r--r--tools/testing/selftests/ipc/msgque.c252
-rw-r--r--tools/testing/selftests/kcmp/.gitignore2
-rw-r--r--tools/testing/selftests/kcmp/Makefile28
-rw-r--r--tools/testing/selftests/kcmp/kcmp_test.c96
-rw-r--r--tools/testing/selftests/memory-hotplug/Makefile6
-rw-r--r--tools/testing/selftests/memory-hotplug/on-off-test.sh230
-rw-r--r--tools/testing/selftests/mqueue/.gitignore2
-rw-r--r--tools/testing/selftests/mqueue/Makefile10
-rw-r--r--tools/testing/selftests/mqueue/mq_open_tests.c492
-rw-r--r--tools/testing/selftests/mqueue/mq_perf_tests.c741
-rw-r--r--tools/testing/selftests/net/.gitignore3
-rw-r--r--tools/testing/selftests/net/Makefile25
-rw-r--r--tools/testing/selftests/net/psock_fanout.c312
-rw-r--r--tools/testing/selftests/net/psock_lib.h127
-rw-r--r--tools/testing/selftests/net/psock_tpacket.c805
-rw-r--r--tools/testing/selftests/net/run_afpackettests26
-rw-r--r--tools/testing/selftests/net/run_netsocktests12
-rw-r--r--tools/testing/selftests/net/socket.c92
-rw-r--r--tools/testing/selftests/powerpc/Makefile39
-rw-r--r--tools/testing/selftests/powerpc/copyloops/Makefile29
-rw-r--r--tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h89
-rw-r--r--tools/testing/selftests/powerpc/copyloops/asm/processor.h0
l---------tools/testing/selftests/powerpc/copyloops/copyuser_64.S1
l---------tools/testing/selftests/powerpc/copyloops/copyuser_power7.S1
l---------tools/testing/selftests/powerpc/copyloops/memcpy_64.S1
l---------tools/testing/selftests/powerpc/copyloops/memcpy_power7.S1
-rw-r--r--tools/testing/selftests/powerpc/copyloops/validate.c99
-rw-r--r--tools/testing/selftests/powerpc/harness.c114
-rw-r--r--tools/testing/selftests/powerpc/mm/Makefile18
-rw-r--r--tools/testing/selftests/powerpc/mm/hugetlb_vs_thp_test.c72
-rw-r--r--tools/testing/selftests/powerpc/pmu/Makefile41
-rw-r--r--tools/testing/selftests/powerpc/pmu/count_instructions.c135
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/Makefile32
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/back_to_back_ebbs_test.c106
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/close_clears_pmcc_test.c59
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/cpu_event_pinned_vs_ebb_test.c93
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/cpu_event_vs_ebb_test.c89
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/cycles_test.c58
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/cycles_with_freeze_test.c117
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/ebb.c727
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/ebb.h78
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/ebb_handler.S365
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/ebb_on_child_test.c86
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/ebb_on_willing_child_test.c92
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/ebb_vs_cpu_event_test.c86
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/event_attributes_test.c131
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/fixed_instruction_loop.S43
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/fork_cleanup_test.c79
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/instruction_count_test.c164
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c100
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/multi_counter_test.c91
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/multi_ebb_procs_test.c109
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/no_handler_test.c61
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/pmae_handling_test.c106
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c93
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/reg.h49
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/reg_access_test.c39
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/task_event_pinned_vs_ebb_test.c91
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/task_event_vs_ebb_test.c83
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/trace.c300
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/trace.h41
-rw-r--r--tools/testing/selftests/powerpc/pmu/event.c131
-rw-r--r--tools/testing/selftests/powerpc/pmu/event.h43
-rw-r--r--tools/testing/selftests/powerpc/pmu/lib.c252
-rw-r--r--tools/testing/selftests/powerpc/pmu/lib.h41
-rw-r--r--tools/testing/selftests/powerpc/pmu/loop.S43
-rw-r--r--tools/testing/selftests/powerpc/subunit.h52
-rw-r--r--tools/testing/selftests/powerpc/tm/Makefile15
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-resched-dscr.c98
-rw-r--r--tools/testing/selftests/powerpc/utils.h49
-rw-r--r--tools/testing/selftests/ptrace/Makefile10
-rw-r--r--tools/testing/selftests/ptrace/peeksiginfo.c214
-rw-r--r--tools/testing/selftests/rcutorture/.gitignore6
-rw-r--r--tools/testing/selftests/rcutorture/bin/config2frag.sh25
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/configNR_CPUS.sh45
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/configcheck.sh54
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/configinit.sh74
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/cpus2use.sh41
-rw-r--r--tools/testing/selftests/rcutorture/bin/functions.sh223
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-build.sh71
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck-lock.sh51
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh51
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck.sh63
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh222
-rw-r--r--tools/testing/selftests/rcutorture/bin/kvm.sh410
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/parse-build.sh57
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/parse-console.sh41
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/parse-torture.sh106
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/BUSTED6
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/BUSTED.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/CFLIST1
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/CFcommon2
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/LOCK016
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/ver_functions.sh43
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/BUSTED7
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/BUSTED.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/CFLIST13
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/CFcommon2
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/SRCU-N7
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/SRCU-N.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/SRCU-P7
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TINY0112
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TINY0212
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE0122
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE0225
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE02-T25
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE0322
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE0424
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE0524
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE05.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE0625
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE0723
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE0825
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE08-T25
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE0920
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v0.0/CFLIST14
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v0.0/N1-S-T-NH-SD-SMP-HP18
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v0.0/N2-2-t-nh-sd-SMP-hp20
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v0.0/N3-3-T-nh-SD-SMP-hp22
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v0.0/N4-A-t-NH-sd-SMP-HP18
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v0.0/N5-U-T-NH-sd-SMP-hp22
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v0.0/NT1-nh23
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v0.0/NT3-NH20
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v0.0/P1-S-T-NH-SD-SMP-HP19
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v0.0/P2-2-t-nh-sd-SMP-hp20
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v0.0/P3-3-T-nh-SD-SMP-hp20
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v0.0/P4-A-t-NH-sd-SMP-HP22
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v0.0/P5-U-T-NH-sd-SMP-hp28
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v0.0/PT1-nh23
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v0.0/PT2-NH22
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v0.0/ver_functions.sh33
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v3.12/CFLIST17
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v3.12/N1-S-T-NH-SD-SMP-HP19
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v3.12/N2-2-t-nh-sd-SMP-hp20
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v3.12/N3-3-T-nh-SD-SMP-hp22
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v3.12/N4-A-t-NH-sd-SMP-HP18
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v3.12/N5-U-T-NH-sd-SMP-hp22
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v3.12/N6---t-nh-SD-smp-hp19
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v3.12/N7-4-T-NH-SD-SMP-HP26
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v3.12/N8-2-T-NH-SD-SMP-HP22
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v3.12/NT1-nh23
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v3.12/NT3-NH20
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v3.12/P1-S-T-NH-SD-SMP-HP20
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v3.12/P2-2-t-nh-sd-SMP-hp20
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v3.12/P3-3-T-nh-SD-SMP-hp20
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v3.12/P4-A-t-NH-sd-SMP-HP22
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v3.12/P5-U-T-NH-sd-SMP-hp28
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v3.12/P6---t-nh-SD-smp-hp18
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v3.12/P7-4-T-NH-SD-SMP-HP30
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v3.12/P7-4-T-NH-SD-SMP-HP-all30
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v3.12/P7-4-T-NH-SD-SMP-HP-none30
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v3.12/P7-4-T-NH-SD-SMP-hp30
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v3.12/PT1-nh23
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v3.12/PT2-NH22
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v3.3/CFLIST14
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v3.3/N1-S-T-NH-SD-SMP-HP19
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v3.3/N2-2-t-nh-sd-SMP-hp20
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v3.3/N3-3-T-nh-SD-SMP-hp22
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v3.3/N4-A-t-NH-sd-SMP-HP18
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v3.3/N5-U-T-NH-sd-SMP-hp22
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v3.3/NT1-nh23
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v3.3/NT3-NH20
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v3.3/P1-S-T-NH-SD-SMP-HP20
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v3.3/P2-2-t-nh-sd-SMP-hp20
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v3.3/P3-3-T-nh-SD-SMP-hp20
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v3.3/P4-A-t-NH-sd-SMP-HP22
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v3.3/P5-U-T-NH-sd-SMP-hp28
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v3.3/PT1-nh23
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v3.3/PT2-NH22
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v3.3/ver_functions.sh44
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v3.5/CFLIST14
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v3.5/N1-S-T-NH-SD-SMP-HP19
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v3.5/N2-2-t-nh-sd-SMP-hp20
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v3.5/N3-3-T-nh-SD-SMP-hp22
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v3.5/N4-A-t-NH-sd-SMP-HP18
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v3.5/N5-U-T-NH-sd-SMP-hp22
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v3.5/NT1-nh23
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v3.5/NT3-NH20
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v3.5/P1-S-T-NH-SD-SMP-HP20
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v3.5/P2-2-t-nh-sd-SMP-hp20
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v3.5/P3-3-T-nh-SD-SMP-hp20
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v3.5/P4-A-t-NH-sd-SMP-HP22
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v3.5/P5-U-T-NH-sd-SMP-hp28
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v3.5/PT1-nh23
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v3.5/PT2-NH22
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/v3.5/ver_functions.sh57
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh57
-rw-r--r--tools/testing/selftests/rcutorture/doc/TINY_RCU.txt40
-rw-r--r--tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt95
-rw-r--r--tools/testing/selftests/rcutorture/doc/initrd.txt90
-rw-r--r--tools/testing/selftests/rcutorture/doc/rcu-test-image.txt42
-rw-r--r--tools/testing/selftests/sysctl/Makefile19
-rw-r--r--tools/testing/selftests/sysctl/common_tests109
-rw-r--r--tools/testing/selftests/sysctl/run_numerictests10
-rw-r--r--tools/testing/selftests/sysctl/run_stringtests77
-rw-r--r--tools/testing/selftests/timers/Makefile8
-rw-r--r--tools/testing/selftests/timers/posix_timers.c221
-rw-r--r--tools/testing/selftests/user/Makefile13
-rw-r--r--tools/testing/selftests/vm/.gitignore4
-rw-r--r--tools/testing/selftests/vm/Makefile15
-rw-r--r--tools/testing/selftests/vm/hugepage-mmap.c92
-rw-r--r--tools/testing/selftests/vm/hugepage-shm.c100
-rw-r--r--tools/testing/selftests/vm/hugetlbfstest.c84
-rw-r--r--tools/testing/selftests/vm/map_hugetlb.c79
-rw-r--r--tools/testing/selftests/vm/run_vmtests93
-rw-r--r--tools/testing/selftests/vm/thuge-gen.c254
-rw-r--r--tools/thermal/tmon/Makefile47
-rw-r--r--tools/thermal/tmon/README50
-rw-r--r--tools/thermal/tmon/pid.c131
-rw-r--r--tools/thermal/tmon/sysfs.c596
-rw-r--r--tools/thermal/tmon/tmon.8142
-rw-r--r--tools/thermal/tmon/tmon.c376
-rw-r--r--tools/thermal/tmon/tmon.h204
-rw-r--r--tools/thermal/tmon/tui.c638
-rw-r--r--tools/usb/Makefile14
-rw-r--r--tools/usb/ffs-aio-example/multibuff/device_app/aio_multibuff.c349
-rw-r--r--tools/usb/ffs-aio-example/multibuff/host_app/Makefile13
-rw-r--r--tools/usb/ffs-aio-example/multibuff/host_app/test.c146
-rw-r--r--tools/usb/ffs-aio-example/simple/device_app/aio_simple.c335
-rw-r--r--tools/usb/ffs-aio-example/simple/host_app/Makefile13
-rw-r--r--tools/usb/ffs-aio-example/simple/host_app/test.c148
-rw-r--r--tools/usb/ffs-test.c527
-rw-r--r--tools/usb/hcd-tests.sh275
-rw-r--r--tools/usb/testusb.c537
-rw-r--r--tools/virtio/.gitignore3
-rw-r--r--tools/virtio/Makefile14
-rw-r--r--tools/virtio/asm/barrier.h14
-rw-r--r--tools/virtio/linux/bug.h10
-rw-r--r--tools/virtio/linux/device.h2
-rw-r--r--tools/virtio/linux/err.h26
-rw-r--r--tools/virtio/linux/hrtimer.h0
-rw-r--r--tools/virtio/linux/irqreturn.h1
-rw-r--r--tools/virtio/linux/kernel.h105
-rw-r--r--tools/virtio/linux/kmemleak.h3
-rw-r--r--tools/virtio/linux/module.h6
-rw-r--r--tools/virtio/linux/printk.h4
-rw-r--r--tools/virtio/linux/ratelimit.h4
-rw-r--r--tools/virtio/linux/scatterlist.h189
-rw-r--r--tools/virtio/linux/slab.h2
-rw-r--r--tools/virtio/linux/uaccess.h50
-rw-r--r--tools/virtio/linux/uio.h3
-rw-r--r--tools/virtio/linux/virtio.h87
-rw-r--r--tools/virtio/linux/virtio_config.h6
-rw-r--r--tools/virtio/linux/virtio_ring.h1
-rw-r--r--tools/virtio/linux/vringh.h1
-rw-r--r--tools/virtio/uapi/linux/uio.h1
-rw-r--r--tools/virtio/uapi/linux/virtio_config.h1
-rw-r--r--tools/virtio/uapi/linux/virtio_ring.h4
-rw-r--r--tools/virtio/vhost_test/Makefile2
-rw-r--r--tools/virtio/vhost_test/vhost_test.c1
-rw-r--r--tools/virtio/virtio-trace/Makefile13
-rw-r--r--tools/virtio/virtio-trace/README118
-rw-r--r--tools/virtio/virtio-trace/trace-agent-ctl.c137
-rw-r--r--tools/virtio/virtio-trace/trace-agent-rw.c192
-rw-r--r--tools/virtio/virtio-trace/trace-agent.c270
-rw-r--r--tools/virtio/virtio-trace/trace-agent.h75
-rw-r--r--tools/virtio/virtio_test.c290
-rw-r--r--tools/virtio/vringh_test.c746
-rw-r--r--tools/vm/.gitignore2
-rw-r--r--tools/vm/Makefile22
-rw-r--r--tools/vm/page-types.c1176
-rw-r--r--tools/vm/slabinfo.c1393
979 files changed, 180290 insertions, 0 deletions
diff --git a/tools/Makefile b/tools/Makefile
new file mode 100644
index 00000000000..9a617adc667
--- /dev/null
+++ b/tools/Makefile
@@ -0,0 +1,119 @@
+include scripts/Makefile.include
+
+help:
+ @echo 'Possible targets:'
+ @echo ''
+ @echo ' acpi - ACPI tools'
+ @echo ' cgroup - cgroup tools'
+ @echo ' cpupower - a tool for all things x86 CPU power'
+ @echo ' firewire - the userspace part of nosy, an IEEE-1394 traffic sniffer'
+ @echo ' hv - tools used when in Hyper-V clients'
+ @echo ' lguest - a minimal 32-bit x86 hypervisor'
+ @echo ' perf - Linux performance measurement and analysis tool'
+ @echo ' selftests - various kernel selftests'
+ @echo ' turbostat - Intel CPU idle stats and freq reporting tool'
+ @echo ' usb - USB testing tools'
+ @echo ' virtio - vhost test module'
+ @echo ' net - misc networking tools'
+ @echo ' vm - misc vm tools'
+ @echo ' x86_energy_perf_policy - Intel energy policy tool'
+ @echo ' tmon - thermal monitoring and tuning tool'
+ @echo ''
+ @echo 'You can do:'
+ @echo ' $$ make -C tools/ <tool>_install'
+ @echo ''
+ @echo ' from the kernel command line to build and install one of'
+ @echo ' the tools above'
+ @echo ''
+ @echo ' $$ make tools/install'
+ @echo ''
+ @echo ' installs all tools.'
+ @echo ''
+ @echo 'Cleaning targets:'
+ @echo ''
+ @echo ' all of the above with the "_clean" string appended cleans'
+ @echo ' the respective build directory.'
+ @echo ' clean: a summary clean target to clean _all_ folders'
+
+acpi: FORCE
+ $(call descend,power/$@)
+
+cpupower: FORCE
+ $(call descend,power/$@)
+
+cgroup firewire hv guest usb virtio vm net: FORCE
+ $(call descend,$@)
+
+liblockdep: FORCE
+ $(call descend,lib/lockdep)
+
+libapikfs: FORCE
+ $(call descend,lib/api)
+
+perf: libapikfs FORCE
+ $(call descend,$@)
+
+selftests: FORCE
+ $(call descend,testing/$@)
+
+turbostat x86_energy_perf_policy: FORCE
+ $(call descend,power/x86/$@)
+
+tmon: FORCE
+ $(call descend,thermal/$@)
+
+acpi_install:
+ $(call descend,power/$(@:_install=),install)
+
+cpupower_install:
+ $(call descend,power/$(@:_install=),install)
+
+cgroup_install firewire_install hv_install lguest_install perf_install usb_install virtio_install vm_install net_install:
+ $(call descend,$(@:_install=),install)
+
+selftests_install:
+ $(call descend,testing/$(@:_clean=),install)
+
+turbostat_install x86_energy_perf_policy_install:
+ $(call descend,power/x86/$(@:_install=),install)
+
+tmon_install:
+ $(call descend,thermal/$(@:_install=),install)
+
+install: acpi_install cgroup_install cpupower_install hv_install firewire_install lguest_install \
+ perf_install selftests_install turbostat_install usb_install \
+ virtio_install vm_install net_install x86_energy_perf_policy_install \
+ tmon
+
+acpi_clean:
+ $(call descend,power/acpi,clean)
+
+cpupower_clean:
+ $(call descend,power/cpupower,clean)
+
+cgroup_clean hv_clean firewire_clean lguest_clean usb_clean virtio_clean vm_clean net_clean:
+ $(call descend,$(@:_clean=),clean)
+
+liblockdep_clean:
+ $(call descend,lib/lockdep,clean)
+
+libapikfs_clean:
+ $(call descend,lib/api,clean)
+
+perf_clean: libapikfs_clean
+ $(call descend,$(@:_clean=),clean)
+
+selftests_clean:
+ $(call descend,testing/$(@:_clean=),clean)
+
+turbostat_clean x86_energy_perf_policy_clean:
+ $(call descend,power/x86/$(@:_clean=),clean)
+
+tmon_clean:
+ $(call descend,thermal/tmon,clean)
+
+clean: acpi_clean cgroup_clean cpupower_clean hv_clean firewire_clean lguest_clean \
+ perf_clean selftests_clean turbostat_clean usb_clean virtio_clean \
+ vm_clean net_clean x86_energy_perf_policy_clean tmon_clean
+
+.PHONY: FORCE
diff --git a/tools/cgroup/.gitignore b/tools/cgroup/.gitignore
new file mode 100644
index 00000000000..633cd9b874f
--- /dev/null
+++ b/tools/cgroup/.gitignore
@@ -0,0 +1 @@
+cgroup_event_listener
diff --git a/tools/cgroup/Makefile b/tools/cgroup/Makefile
new file mode 100644
index 00000000000..b4286196b76
--- /dev/null
+++ b/tools/cgroup/Makefile
@@ -0,0 +1,11 @@
+# Makefile for cgroup tools
+
+CC = $(CROSS_COMPILE)gcc
+CFLAGS = -Wall -Wextra
+
+all: cgroup_event_listener
+%: %.c
+ $(CC) $(CFLAGS) -o $@ $^
+
+clean:
+ $(RM) cgroup_event_listener
diff --git a/tools/cgroup/cgroup_event_listener.c b/tools/cgroup/cgroup_event_listener.c
new file mode 100644
index 00000000000..4eb5507205c
--- /dev/null
+++ b/tools/cgroup/cgroup_event_listener.c
@@ -0,0 +1,82 @@
+/*
+ * cgroup_event_listener.c - Simple listener of cgroup events
+ *
+ * Copyright (C) Kirill A. Shutemov <kirill@shutemov.name>
+ */
+
+#include <assert.h>
+#include <err.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <libgen.h>
+#include <limits.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <sys/eventfd.h>
+
+#define USAGE_STR "Usage: cgroup_event_listener <path-to-control-file> <args>"
+
+int main(int argc, char **argv)
+{
+ int efd = -1;
+ int cfd = -1;
+ int event_control = -1;
+ char event_control_path[PATH_MAX];
+ char line[LINE_MAX];
+ int ret;
+
+ if (argc != 3)
+ errx(1, "%s", USAGE_STR);
+
+ cfd = open(argv[1], O_RDONLY);
+ if (cfd == -1)
+ err(1, "Cannot open %s", argv[1]);
+
+ ret = snprintf(event_control_path, PATH_MAX, "%s/cgroup.event_control",
+ dirname(argv[1]));
+ if (ret >= PATH_MAX)
+ errx(1, "Path to cgroup.event_control is too long");
+
+ event_control = open(event_control_path, O_WRONLY);
+ if (event_control == -1)
+ err(1, "Cannot open %s", event_control_path);
+
+ efd = eventfd(0, 0);
+ if (efd == -1)
+ err(1, "eventfd() failed");
+
+ ret = snprintf(line, LINE_MAX, "%d %d %s", efd, cfd, argv[2]);
+ if (ret >= LINE_MAX)
+ errx(1, "Arguments string is too long");
+
+ ret = write(event_control, line, strlen(line) + 1);
+ if (ret == -1)
+ err(1, "Cannot write to cgroup.event_control");
+
+ while (1) {
+ uint64_t result;
+
+ ret = read(efd, &result, sizeof(result));
+ if (ret == -1) {
+ if (errno == EINTR)
+ continue;
+ err(1, "Cannot read from eventfd");
+ }
+ assert(ret == sizeof(result));
+
+ ret = access(event_control_path, W_OK);
+ if ((ret == -1) && (errno == ENOENT)) {
+ puts("The cgroup seems to have removed.");
+ break;
+ }
+
+ if (ret == -1)
+ err(1, "cgroup.event_control is not accessible any more");
+
+ printf("%s %s: crossed\n", argv[1], argv[2]);
+ }
+
+ return 0;
+}
diff --git a/tools/firewire/Makefile b/tools/firewire/Makefile
new file mode 100644
index 00000000000..81767adaae7
--- /dev/null
+++ b/tools/firewire/Makefile
@@ -0,0 +1,19 @@
+prefix = /usr
+nosy-dump-version = 0.4
+
+CC = gcc
+
+all : nosy-dump
+
+nosy-dump : CFLAGS = -Wall -O2 -g
+nosy-dump : CPPFLAGS = -DVERSION=\"$(nosy-dump-version)\" -I../../drivers/firewire
+nosy-dump : LDFLAGS = -g
+nosy-dump : LDLIBS = -lpopt
+
+nosy-dump : nosy-dump.o decode-fcp.o
+
+clean :
+ rm -rf *.o nosy-dump
+
+install :
+ install nosy-dump $(prefix)/bin/nosy-dump
diff --git a/tools/firewire/decode-fcp.c b/tools/firewire/decode-fcp.c
new file mode 100644
index 00000000000..e41223b6a4c
--- /dev/null
+++ b/tools/firewire/decode-fcp.c
@@ -0,0 +1,213 @@
+#include <linux/firewire-constants.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "list.h"
+#include "nosy-dump.h"
+
+#define CSR_FCP_COMMAND 0xfffff0000b00ull
+#define CSR_FCP_RESPONSE 0xfffff0000d00ull
+
+static const char * const ctype_names[] = {
+ [0x0] = "control", [0x8] = "not implemented",
+ [0x1] = "status", [0x9] = "accepted",
+ [0x2] = "specific inquiry", [0xa] = "rejected",
+ [0x3] = "notify", [0xb] = "in transition",
+ [0x4] = "general inquiry", [0xc] = "stable",
+ [0x5] = "(reserved 0x05)", [0xd] = "changed",
+ [0x6] = "(reserved 0x06)", [0xe] = "(reserved 0x0e)",
+ [0x7] = "(reserved 0x07)", [0xf] = "interim",
+};
+
+static const char * const subunit_type_names[] = {
+ [0x00] = "monitor", [0x10] = "(reserved 0x10)",
+ [0x01] = "audio", [0x11] = "(reserved 0x11)",
+ [0x02] = "printer", [0x12] = "(reserved 0x12)",
+ [0x03] = "disc", [0x13] = "(reserved 0x13)",
+ [0x04] = "tape recorder/player",[0x14] = "(reserved 0x14)",
+ [0x05] = "tuner", [0x15] = "(reserved 0x15)",
+ [0x06] = "ca", [0x16] = "(reserved 0x16)",
+ [0x07] = "camera", [0x17] = "(reserved 0x17)",
+ [0x08] = "(reserved 0x08)", [0x18] = "(reserved 0x18)",
+ [0x09] = "panel", [0x19] = "(reserved 0x19)",
+ [0x0a] = "bulletin board", [0x1a] = "(reserved 0x1a)",
+ [0x0b] = "camera storage", [0x1b] = "(reserved 0x1b)",
+ [0x0c] = "(reserved 0x0c)", [0x1c] = "vendor unique",
+ [0x0d] = "(reserved 0x0d)", [0x1d] = "all subunit types",
+ [0x0e] = "(reserved 0x0e)", [0x1e] = "subunit_type extended to next byte",
+ [0x0f] = "(reserved 0x0f)", [0x1f] = "unit",
+};
+
+struct avc_enum {
+ int value;
+ const char *name;
+};
+
+struct avc_field {
+ const char *name; /* Short name for field. */
+ int offset; /* Location of field, specified in bits; */
+ /* negative means from end of packet. */
+ int width; /* Width of field, 0 means use data_length. */
+ struct avc_enum *names;
+};
+
+struct avc_opcode_info {
+ const char *name;
+ struct avc_field fields[8];
+};
+
+struct avc_enum power_field_names[] = {
+ { 0x70, "on" },
+ { 0x60, "off" },
+ { }
+};
+
+static const struct avc_opcode_info opcode_info[256] = {
+
+ /* TA Document 1999026 */
+ /* AV/C Digital Interface Command Set General Specification 4.0 */
+ [0xb2] = { "power", {
+ { "state", 0, 8, power_field_names }
+ }
+ },
+ [0x30] = { "unit info", {
+ { "foo", 0, 8 },
+ { "unit_type", 8, 5 },
+ { "unit", 13, 3 },
+ { "company id", 16, 24 },
+ }
+ },
+ [0x31] = { "subunit info" },
+ [0x01] = { "reserve" },
+ [0xb0] = { "version" },
+ [0x00] = { "vendor dependent" },
+ [0x02] = { "plug info" },
+ [0x12] = { "channel usage" },
+ [0x24] = { "connect" },
+ [0x20] = { "connect av" },
+ [0x22] = { "connections" },
+ [0x11] = { "digital input" },
+ [0x10] = { "digital output" },
+ [0x25] = { "disconnect" },
+ [0x21] = { "disconnect av" },
+ [0x19] = { "input plug signal format" },
+ [0x18] = { "output plug signal format" },
+ [0x1f] = { "general bus setup" },
+
+ /* TA Document 1999025 */
+ /* AV/C Descriptor Mechanism Specification Version 1.0 */
+ [0x0c] = { "create descriptor" },
+ [0x08] = { "open descriptor" },
+ [0x09] = { "read descriptor" },
+ [0x0a] = { "write descriptor" },
+ [0x05] = { "open info block" },
+ [0x06] = { "read info block" },
+ [0x07] = { "write info block" },
+ [0x0b] = { "search descriptor" },
+ [0x0d] = { "object number select" },
+
+ /* TA Document 1999015 */
+ /* AV/C Command Set for Rate Control of Isochronous Data Flow 1.0 */
+ [0xb3] = { "rate", {
+ { "subfunction", 0, 8 },
+ { "result", 8, 8 },
+ { "plug_type", 16, 8 },
+ { "plug_id", 16, 8 },
+ }
+ },
+
+ /* TA Document 1999008 */
+ /* AV/C Audio Subunit Specification 1.0 */
+ [0xb8] = { "function block" },
+
+ /* TA Document 2001001 */
+ /* AV/C Panel Subunit Specification 1.1 */
+ [0x7d] = { "gui update" },
+ [0x7e] = { "push gui data" },
+ [0x7f] = { "user action" },
+ [0x7c] = { "pass through" },
+
+ /* */
+ [0x26] = { "asynchronous connection" },
+};
+
+struct avc_frame {
+ uint32_t operand0:8;
+ uint32_t opcode:8;
+ uint32_t subunit_id:3;
+ uint32_t subunit_type:5;
+ uint32_t ctype:4;
+ uint32_t cts:4;
+};
+
+static void
+decode_avc(struct link_transaction *t)
+{
+ struct avc_frame *frame =
+ (struct avc_frame *) t->request->packet.write_block.data;
+ const struct avc_opcode_info *info;
+ const char *name;
+ char buffer[32];
+ int i;
+
+ info = &opcode_info[frame->opcode];
+ if (info->name == NULL) {
+ snprintf(buffer, sizeof(buffer),
+ "(unknown opcode 0x%02x)", frame->opcode);
+ name = buffer;
+ } else {
+ name = info->name;
+ }
+
+ printf("av/c %s, subunit_type=%s, subunit_id=%d, opcode=%s",
+ ctype_names[frame->ctype], subunit_type_names[frame->subunit_type],
+ frame->subunit_id, name);
+
+ for (i = 0; info->fields[i].name != NULL; i++)
+ printf(", %s", info->fields[i].name);
+
+ printf("\n");
+}
+
+int
+decode_fcp(struct link_transaction *t)
+{
+ struct avc_frame *frame =
+ (struct avc_frame *) t->request->packet.write_block.data;
+ unsigned long long offset =
+ ((unsigned long long) t->request->packet.common.offset_high << 32) |
+ t->request->packet.common.offset_low;
+
+ if (t->request->packet.common.tcode != TCODE_WRITE_BLOCK_REQUEST)
+ return 0;
+
+ if (offset == CSR_FCP_COMMAND || offset == CSR_FCP_RESPONSE) {
+ switch (frame->cts) {
+ case 0x00:
+ decode_avc(t);
+ break;
+ case 0x01:
+ printf("cal fcp frame (cts=0x01)\n");
+ break;
+ case 0x02:
+ printf("ehs fcp frame (cts=0x02)\n");
+ break;
+ case 0x03:
+ printf("havi fcp frame (cts=0x03)\n");
+ break;
+ case 0x0e:
+ printf("vendor specific fcp frame (cts=0x0e)\n");
+ break;
+ case 0x0f:
+ printf("extended cts\n");
+ break;
+ default:
+ printf("reserved fcp frame (ctx=0x%02x)\n", frame->cts);
+ break;
+ }
+ return 1;
+ }
+
+ return 0;
+}
+
diff --git a/tools/firewire/list.h b/tools/firewire/list.h
new file mode 100644
index 00000000000..41f4bdadf63
--- /dev/null
+++ b/tools/firewire/list.h
@@ -0,0 +1,62 @@
+struct list {
+ struct list *next, *prev;
+};
+
+static inline void
+list_init(struct list *list)
+{
+ list->next = list;
+ list->prev = list;
+}
+
+static inline int
+list_empty(struct list *list)
+{
+ return list->next == list;
+}
+
+static inline void
+list_insert(struct list *link, struct list *new_link)
+{
+ new_link->prev = link->prev;
+ new_link->next = link;
+ new_link->prev->next = new_link;
+ new_link->next->prev = new_link;
+}
+
+static inline void
+list_append(struct list *list, struct list *new_link)
+{
+ list_insert((struct list *)list, new_link);
+}
+
+static inline void
+list_prepend(struct list *list, struct list *new_link)
+{
+ list_insert(list->next, new_link);
+}
+
+static inline void
+list_remove(struct list *link)
+{
+ link->prev->next = link->next;
+ link->next->prev = link->prev;
+}
+
+#define list_entry(link, type, member) \
+ ((type *)((char *)(link)-(unsigned long)(&((type *)0)->member)))
+
+#define list_head(list, type, member) \
+ list_entry((list)->next, type, member)
+
+#define list_tail(list, type, member) \
+ list_entry((list)->prev, type, member)
+
+#define list_next(elm, member) \
+ list_entry((elm)->member.next, typeof(*elm), member)
+
+#define list_for_each_entry(pos, list, member) \
+ for (pos = list_head(list, typeof(*pos), member); \
+ &pos->member != (list); \
+ pos = list_next(pos, member))
+
diff --git a/tools/firewire/nosy-dump.c b/tools/firewire/nosy-dump.c
new file mode 100644
index 00000000000..3179c711bd6
--- /dev/null
+++ b/tools/firewire/nosy-dump.c
@@ -0,0 +1,1035 @@
+/*
+ * nosy-dump - Interface to snoop mode driver for TI PCILynx 1394 controllers
+ * Copyright (C) 2002-2006 Kristian Høgsberg
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ */
+
+#include <byteswap.h>
+#include <endian.h>
+#include <fcntl.h>
+#include <linux/firewire-constants.h>
+#include <poll.h>
+#include <popt.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <termios.h>
+#include <unistd.h>
+
+#include "list.h"
+#include "nosy-dump.h"
+#include "nosy-user.h"
+
+enum {
+ PACKET_FIELD_DETAIL = 0x01,
+ PACKET_FIELD_DATA_LENGTH = 0x02,
+ /* Marks the fields we print in transaction view. */
+ PACKET_FIELD_TRANSACTION = 0x04,
+};
+
+static void print_packet(uint32_t *data, size_t length);
+static void decode_link_packet(struct link_packet *packet, size_t length,
+ int include_flags, int exclude_flags);
+static int run = 1;
+sig_t sys_sigint_handler;
+
+static char *option_nosy_device = "/dev/nosy";
+static char *option_view = "packet";
+static char *option_output;
+static char *option_input;
+static int option_hex;
+static int option_iso;
+static int option_cycle_start;
+static int option_version;
+static int option_verbose;
+
+enum {
+ VIEW_TRANSACTION,
+ VIEW_PACKET,
+ VIEW_STATS,
+};
+
+static const struct poptOption options[] = {
+ {
+ .longName = "device",
+ .shortName = 'd',
+ .argInfo = POPT_ARG_STRING,
+ .arg = &option_nosy_device,
+ .descrip = "Path to nosy device.",
+ .argDescrip = "DEVICE"
+ },
+ {
+ .longName = "view",
+ .argInfo = POPT_ARG_STRING,
+ .arg = &option_view,
+ .descrip = "Specify view of bus traffic: packet, transaction or stats.",
+ .argDescrip = "VIEW"
+ },
+ {
+ .longName = "hex",
+ .shortName = 'x',
+ .argInfo = POPT_ARG_NONE,
+ .arg = &option_hex,
+ .descrip = "Print each packet in hex.",
+ },
+ {
+ .longName = "iso",
+ .argInfo = POPT_ARG_NONE,
+ .arg = &option_iso,
+ .descrip = "Print iso packets.",
+ },
+ {
+ .longName = "cycle-start",
+ .argInfo = POPT_ARG_NONE,
+ .arg = &option_cycle_start,
+ .descrip = "Print cycle start packets.",
+ },
+ {
+ .longName = "verbose",
+ .shortName = 'v',
+ .argInfo = POPT_ARG_NONE,
+ .arg = &option_verbose,
+ .descrip = "Verbose packet view.",
+ },
+ {
+ .longName = "output",
+ .shortName = 'o',
+ .argInfo = POPT_ARG_STRING,
+ .arg = &option_output,
+ .descrip = "Log to output file.",
+ .argDescrip = "FILENAME"
+ },
+ {
+ .longName = "input",
+ .shortName = 'i',
+ .argInfo = POPT_ARG_STRING,
+ .arg = &option_input,
+ .descrip = "Decode log from file.",
+ .argDescrip = "FILENAME"
+ },
+ {
+ .longName = "version",
+ .argInfo = POPT_ARG_NONE,
+ .arg = &option_version,
+ .descrip = "Specify print version info.",
+ },
+ POPT_AUTOHELP
+ POPT_TABLEEND
+};
+
+/* Allow all ^C except the first to interrupt the program in the usual way. */
+static void
+sigint_handler(int signal_num)
+{
+ if (run == 1) {
+ run = 0;
+ signal(SIGINT, SIG_DFL);
+ }
+}
+
+static struct subaction *
+subaction_create(uint32_t *data, size_t length)
+{
+ struct subaction *sa;
+
+ /* we put the ack in the subaction struct for easy access. */
+ sa = malloc(sizeof *sa - sizeof sa->packet + length);
+ if (!sa)
+ exit(EXIT_FAILURE);
+ sa->ack = data[length / 4 - 1];
+ sa->length = length;
+ memcpy(&sa->packet, data, length);
+
+ return sa;
+}
+
+static void
+subaction_destroy(struct subaction *sa)
+{
+ free(sa);
+}
+
+static struct list pending_transaction_list = {
+ &pending_transaction_list, &pending_transaction_list
+};
+
+static struct link_transaction *
+link_transaction_lookup(int request_node, int response_node, int tlabel)
+{
+ struct link_transaction *t;
+
+ list_for_each_entry(t, &pending_transaction_list, link) {
+ if (t->request_node == request_node &&
+ t->response_node == response_node &&
+ t->tlabel == tlabel)
+ return t;
+ }
+
+ t = malloc(sizeof *t);
+ if (!t)
+ exit(EXIT_FAILURE);
+ t->request_node = request_node;
+ t->response_node = response_node;
+ t->tlabel = tlabel;
+ list_init(&t->request_list);
+ list_init(&t->response_list);
+
+ list_append(&pending_transaction_list, &t->link);
+
+ return t;
+}
+
+static void
+link_transaction_destroy(struct link_transaction *t)
+{
+ struct subaction *sa;
+
+ while (!list_empty(&t->request_list)) {
+ sa = list_head(&t->request_list, struct subaction, link);
+ list_remove(&sa->link);
+ subaction_destroy(sa);
+ }
+ while (!list_empty(&t->response_list)) {
+ sa = list_head(&t->response_list, struct subaction, link);
+ list_remove(&sa->link);
+ subaction_destroy(sa);
+ }
+ free(t);
+}
+
+struct protocol_decoder {
+ const char *name;
+ int (*decode)(struct link_transaction *t);
+};
+
+static const struct protocol_decoder protocol_decoders[] = {
+ { "FCP", decode_fcp }
+};
+
+static void
+handle_transaction(struct link_transaction *t)
+{
+ struct subaction *sa;
+ int i;
+
+ if (!t->request) {
+ printf("BUG in handle_transaction\n");
+ return;
+ }
+
+ for (i = 0; i < array_length(protocol_decoders); i++)
+ if (protocol_decoders[i].decode(t))
+ break;
+
+ /* HACK: decode only fcp right now. */
+ return;
+
+ decode_link_packet(&t->request->packet, t->request->length,
+ PACKET_FIELD_TRANSACTION, 0);
+ if (t->response)
+ decode_link_packet(&t->response->packet, t->request->length,
+ PACKET_FIELD_TRANSACTION, 0);
+ else
+ printf("[no response]");
+
+ if (option_verbose) {
+ list_for_each_entry(sa, &t->request_list, link)
+ print_packet((uint32_t *) &sa->packet, sa->length);
+ list_for_each_entry(sa, &t->response_list, link)
+ print_packet((uint32_t *) &sa->packet, sa->length);
+ }
+ printf("\r\n");
+
+ link_transaction_destroy(t);
+}
+
+static void
+clear_pending_transaction_list(void)
+{
+ struct link_transaction *t;
+
+ while (!list_empty(&pending_transaction_list)) {
+ t = list_head(&pending_transaction_list,
+ struct link_transaction, link);
+ list_remove(&t->link);
+ link_transaction_destroy(t);
+ /* print unfinished transactions */
+ }
+}
+
+static const char * const tcode_names[] = {
+ [0x0] = "write_quadlet_request", [0x6] = "read_quadlet_response",
+ [0x1] = "write_block_request", [0x7] = "read_block_response",
+ [0x2] = "write_response", [0x8] = "cycle_start",
+ [0x3] = "reserved", [0x9] = "lock_request",
+ [0x4] = "read_quadlet_request", [0xa] = "iso_data",
+ [0x5] = "read_block_request", [0xb] = "lock_response",
+};
+
+static const char * const ack_names[] = {
+ [0x0] = "no ack", [0x8] = "reserved (0x08)",
+ [0x1] = "ack_complete", [0x9] = "reserved (0x09)",
+ [0x2] = "ack_pending", [0xa] = "reserved (0x0a)",
+ [0x3] = "reserved (0x03)", [0xb] = "reserved (0x0b)",
+ [0x4] = "ack_busy_x", [0xc] = "reserved (0x0c)",
+ [0x5] = "ack_busy_a", [0xd] = "ack_data_error",
+ [0x6] = "ack_busy_b", [0xe] = "ack_type_error",
+ [0x7] = "reserved (0x07)", [0xf] = "reserved (0x0f)",
+};
+
+static const char * const rcode_names[] = {
+ [0x0] = "complete", [0x4] = "conflict_error",
+ [0x1] = "reserved (0x01)", [0x5] = "data_error",
+ [0x2] = "reserved (0x02)", [0x6] = "type_error",
+ [0x3] = "reserved (0x03)", [0x7] = "address_error",
+};
+
+static const char * const retry_names[] = {
+ [0x0] = "retry_1",
+ [0x1] = "retry_x",
+ [0x2] = "retry_a",
+ [0x3] = "retry_b",
+};
+
+enum {
+ PACKET_RESERVED,
+ PACKET_REQUEST,
+ PACKET_RESPONSE,
+ PACKET_OTHER,
+};
+
+struct packet_info {
+ const char *name;
+ int type;
+ int response_tcode;
+ const struct packet_field *fields;
+ int field_count;
+};
+
+struct packet_field {
+ const char *name; /* Short name for field. */
+ int offset; /* Location of field, specified in bits; */
+ /* negative means from end of packet. */
+ int width; /* Width of field, 0 means use data_length. */
+ int flags; /* Show options. */
+ const char * const *value_names;
+};
+
+#define COMMON_REQUEST_FIELDS \
+ { "dest", 0, 16, PACKET_FIELD_TRANSACTION }, \
+ { "tl", 16, 6 }, \
+ { "rt", 22, 2, PACKET_FIELD_DETAIL, retry_names }, \
+ { "tcode", 24, 4, PACKET_FIELD_TRANSACTION, tcode_names }, \
+ { "pri", 28, 4, PACKET_FIELD_DETAIL }, \
+ { "src", 32, 16, PACKET_FIELD_TRANSACTION }, \
+ { "offs", 48, 48, PACKET_FIELD_TRANSACTION }
+
+#define COMMON_RESPONSE_FIELDS \
+ { "dest", 0, 16 }, \
+ { "tl", 16, 6 }, \
+ { "rt", 22, 2, PACKET_FIELD_DETAIL, retry_names }, \
+ { "tcode", 24, 4, 0, tcode_names }, \
+ { "pri", 28, 4, PACKET_FIELD_DETAIL }, \
+ { "src", 32, 16 }, \
+ { "rcode", 48, 4, PACKET_FIELD_TRANSACTION, rcode_names }
+
+static const struct packet_field read_quadlet_request_fields[] = {
+ COMMON_REQUEST_FIELDS,
+ { "crc", 96, 32, PACKET_FIELD_DETAIL },
+ { "ack", 156, 4, 0, ack_names },
+};
+
+static const struct packet_field read_quadlet_response_fields[] = {
+ COMMON_RESPONSE_FIELDS,
+ { "data", 96, 32, PACKET_FIELD_TRANSACTION },
+ { "crc", 128, 32, PACKET_FIELD_DETAIL },
+ { "ack", 188, 4, 0, ack_names },
+};
+
+static const struct packet_field read_block_request_fields[] = {
+ COMMON_REQUEST_FIELDS,
+ { "data_length", 96, 16, PACKET_FIELD_TRANSACTION },
+ { "extended_tcode", 112, 16 },
+ { "crc", 128, 32, PACKET_FIELD_DETAIL },
+ { "ack", 188, 4, 0, ack_names },
+};
+
+static const struct packet_field block_response_fields[] = {
+ COMMON_RESPONSE_FIELDS,
+ { "data_length", 96, 16, PACKET_FIELD_DATA_LENGTH },
+ { "extended_tcode", 112, 16 },
+ { "crc", 128, 32, PACKET_FIELD_DETAIL },
+ { "data", 160, 0, PACKET_FIELD_TRANSACTION },
+ { "crc", -64, 32, PACKET_FIELD_DETAIL },
+ { "ack", -4, 4, 0, ack_names },
+};
+
+static const struct packet_field write_quadlet_request_fields[] = {
+ COMMON_REQUEST_FIELDS,
+ { "data", 96, 32, PACKET_FIELD_TRANSACTION },
+ { "ack", -4, 4, 0, ack_names },
+};
+
+static const struct packet_field block_request_fields[] = {
+ COMMON_REQUEST_FIELDS,
+ { "data_length", 96, 16, PACKET_FIELD_DATA_LENGTH | PACKET_FIELD_TRANSACTION },
+ { "extended_tcode", 112, 16, PACKET_FIELD_TRANSACTION },
+ { "crc", 128, 32, PACKET_FIELD_DETAIL },
+ { "data", 160, 0, PACKET_FIELD_TRANSACTION },
+ { "crc", -64, 32, PACKET_FIELD_DETAIL },
+ { "ack", -4, 4, 0, ack_names },
+};
+
+static const struct packet_field write_response_fields[] = {
+ COMMON_RESPONSE_FIELDS,
+ { "reserved", 64, 32, PACKET_FIELD_DETAIL },
+ { "ack", -4, 4, 0, ack_names },
+};
+
+static const struct packet_field iso_data_fields[] = {
+ { "data_length", 0, 16, PACKET_FIELD_DATA_LENGTH },
+ { "tag", 16, 2 },
+ { "channel", 18, 6 },
+ { "tcode", 24, 4, 0, tcode_names },
+ { "sy", 28, 4 },
+ { "crc", 32, 32, PACKET_FIELD_DETAIL },
+ { "data", 64, 0 },
+ { "crc", -64, 32, PACKET_FIELD_DETAIL },
+ { "ack", -4, 4, 0, ack_names },
+};
+
+static const struct packet_info packet_info[] = {
+ {
+ .name = "write_quadlet_request",
+ .type = PACKET_REQUEST,
+ .response_tcode = TCODE_WRITE_RESPONSE,
+ .fields = write_quadlet_request_fields,
+ .field_count = array_length(write_quadlet_request_fields)
+ },
+ {
+ .name = "write_block_request",
+ .type = PACKET_REQUEST,
+ .response_tcode = TCODE_WRITE_RESPONSE,
+ .fields = block_request_fields,
+ .field_count = array_length(block_request_fields)
+ },
+ {
+ .name = "write_response",
+ .type = PACKET_RESPONSE,
+ .fields = write_response_fields,
+ .field_count = array_length(write_response_fields)
+ },
+ {
+ .name = "reserved",
+ .type = PACKET_RESERVED,
+ },
+ {
+ .name = "read_quadlet_request",
+ .type = PACKET_REQUEST,
+ .response_tcode = TCODE_READ_QUADLET_RESPONSE,
+ .fields = read_quadlet_request_fields,
+ .field_count = array_length(read_quadlet_request_fields)
+ },
+ {
+ .name = "read_block_request",
+ .type = PACKET_REQUEST,
+ .response_tcode = TCODE_READ_BLOCK_RESPONSE,
+ .fields = read_block_request_fields,
+ .field_count = array_length(read_block_request_fields)
+ },
+ {
+ .name = "read_quadlet_response",
+ .type = PACKET_RESPONSE,
+ .fields = read_quadlet_response_fields,
+ .field_count = array_length(read_quadlet_response_fields)
+ },
+ {
+ .name = "read_block_response",
+ .type = PACKET_RESPONSE,
+ .fields = block_response_fields,
+ .field_count = array_length(block_response_fields)
+ },
+ {
+ .name = "cycle_start",
+ .type = PACKET_OTHER,
+ .fields = write_quadlet_request_fields,
+ .field_count = array_length(write_quadlet_request_fields)
+ },
+ {
+ .name = "lock_request",
+ .type = PACKET_REQUEST,
+ .fields = block_request_fields,
+ .field_count = array_length(block_request_fields)
+ },
+ {
+ .name = "iso_data",
+ .type = PACKET_OTHER,
+ .fields = iso_data_fields,
+ .field_count = array_length(iso_data_fields)
+ },
+ {
+ .name = "lock_response",
+ .type = PACKET_RESPONSE,
+ .fields = block_response_fields,
+ .field_count = array_length(block_response_fields)
+ },
+};
+
+static int
+handle_request_packet(uint32_t *data, size_t length)
+{
+ struct link_packet *p = (struct link_packet *) data;
+ struct subaction *sa, *prev;
+ struct link_transaction *t;
+
+ t = link_transaction_lookup(p->common.source, p->common.destination,
+ p->common.tlabel);
+ sa = subaction_create(data, length);
+ t->request = sa;
+
+ if (!list_empty(&t->request_list)) {
+ prev = list_tail(&t->request_list,
+ struct subaction, link);
+
+ if (!ACK_BUSY(prev->ack)) {
+ /*
+ * error, we should only see ack_busy_* before the
+ * ack_pending/ack_complete -- this is an ack_pending
+ * instead (ack_complete would have finished the
+ * transaction).
+ */
+ }
+
+ if (prev->packet.common.tcode != sa->packet.common.tcode ||
+ prev->packet.common.tlabel != sa->packet.common.tlabel) {
+ /* memcmp() ? */
+ /* error, these should match for retries. */
+ }
+ }
+
+ list_append(&t->request_list, &sa->link);
+
+ switch (sa->ack) {
+ case ACK_COMPLETE:
+ if (p->common.tcode != TCODE_WRITE_QUADLET_REQUEST &&
+ p->common.tcode != TCODE_WRITE_BLOCK_REQUEST)
+ /* error, unified transactions only allowed for write */;
+ list_remove(&t->link);
+ handle_transaction(t);
+ break;
+
+ case ACK_NO_ACK:
+ case ACK_DATA_ERROR:
+ case ACK_TYPE_ERROR:
+ list_remove(&t->link);
+ handle_transaction(t);
+ break;
+
+ case ACK_PENDING:
+ /* request subaction phase over, wait for response. */
+ break;
+
+ case ACK_BUSY_X:
+ case ACK_BUSY_A:
+ case ACK_BUSY_B:
+ /* ok, wait for retry. */
+ /* check that retry protocol is respected. */
+ break;
+ }
+
+ return 1;
+}
+
+static int
+handle_response_packet(uint32_t *data, size_t length)
+{
+ struct link_packet *p = (struct link_packet *) data;
+ struct subaction *sa, *prev;
+ struct link_transaction *t;
+
+ t = link_transaction_lookup(p->common.destination, p->common.source,
+ p->common.tlabel);
+ if (list_empty(&t->request_list)) {
+ /* unsolicited response */
+ }
+
+ sa = subaction_create(data, length);
+ t->response = sa;
+
+ if (!list_empty(&t->response_list)) {
+ prev = list_tail(&t->response_list, struct subaction, link);
+
+ if (!ACK_BUSY(prev->ack)) {
+ /*
+ * error, we should only see ack_busy_* before the
+ * ack_pending/ack_complete
+ */
+ }
+
+ if (prev->packet.common.tcode != sa->packet.common.tcode ||
+ prev->packet.common.tlabel != sa->packet.common.tlabel) {
+ /* use memcmp() instead? */
+ /* error, these should match for retries. */
+ }
+ } else {
+ prev = list_tail(&t->request_list, struct subaction, link);
+ if (prev->ack != ACK_PENDING) {
+ /*
+ * error, should not get response unless last request got
+ * ack_pending.
+ */
+ }
+
+ if (packet_info[prev->packet.common.tcode].response_tcode !=
+ sa->packet.common.tcode) {
+ /* error, tcode mismatch */
+ }
+ }
+
+ list_append(&t->response_list, &sa->link);
+
+ switch (sa->ack) {
+ case ACK_COMPLETE:
+ case ACK_NO_ACK:
+ case ACK_DATA_ERROR:
+ case ACK_TYPE_ERROR:
+ list_remove(&t->link);
+ handle_transaction(t);
+ /* transaction complete, remove t from pending list. */
+ break;
+
+ case ACK_PENDING:
+ /* error for responses. */
+ break;
+
+ case ACK_BUSY_X:
+ case ACK_BUSY_A:
+ case ACK_BUSY_B:
+ /* no problem, wait for next retry */
+ break;
+ }
+
+ return 1;
+}
+
+static int
+handle_packet(uint32_t *data, size_t length)
+{
+ if (length == 0) {
+ printf("bus reset\r\n");
+ clear_pending_transaction_list();
+ } else if (length > sizeof(struct phy_packet)) {
+ struct link_packet *p = (struct link_packet *) data;
+
+ switch (packet_info[p->common.tcode].type) {
+ case PACKET_REQUEST:
+ return handle_request_packet(data, length);
+
+ case PACKET_RESPONSE:
+ return handle_response_packet(data, length);
+
+ case PACKET_OTHER:
+ case PACKET_RESERVED:
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+static unsigned int
+get_bits(struct link_packet *packet, int offset, int width)
+{
+ uint32_t *data = (uint32_t *) packet;
+ uint32_t index, shift, mask;
+
+ index = offset / 32 + 1;
+ shift = 32 - (offset & 31) - width;
+ mask = width == 32 ? ~0 : (1 << width) - 1;
+
+ return (data[index] >> shift) & mask;
+}
+
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+#define byte_index(i) ((i) ^ 3)
+#elif __BYTE_ORDER == __BIG_ENDIAN
+#define byte_index(i) (i)
+#else
+#error unsupported byte order.
+#endif
+
+static void
+dump_data(unsigned char *data, int length)
+{
+ int i, print_length;
+
+ if (length > 128)
+ print_length = 128;
+ else
+ print_length = length;
+
+ for (i = 0; i < print_length; i++)
+ printf("%s%02hhx",
+ (i % 4 == 0 && i != 0) ? " " : "",
+ data[byte_index(i)]);
+
+ if (print_length < length)
+ printf(" (%d more bytes)", length - print_length);
+}
+
+static void
+decode_link_packet(struct link_packet *packet, size_t length,
+ int include_flags, int exclude_flags)
+{
+ const struct packet_info *pi;
+ int data_length = 0;
+ int i;
+
+ pi = &packet_info[packet->common.tcode];
+
+ for (i = 0; i < pi->field_count; i++) {
+ const struct packet_field *f = &pi->fields[i];
+ int offset;
+
+ if (f->flags & exclude_flags)
+ continue;
+ if (include_flags && !(f->flags & include_flags))
+ continue;
+
+ if (f->offset < 0)
+ offset = length * 8 + f->offset - 32;
+ else
+ offset = f->offset;
+
+ if (f->value_names != NULL) {
+ uint32_t bits;
+
+ bits = get_bits(packet, offset, f->width);
+ printf("%s", f->value_names[bits]);
+ } else if (f->width == 0) {
+ printf("%s=[", f->name);
+ dump_data((unsigned char *) packet + (offset / 8 + 4), data_length);
+ printf("]");
+ } else {
+ unsigned long long bits;
+ int high_width, low_width;
+
+ if ((offset & ~31) != ((offset + f->width - 1) & ~31)) {
+ /* Bit field spans quadlet boundary. */
+ high_width = ((offset + 31) & ~31) - offset;
+ low_width = f->width - high_width;
+
+ bits = get_bits(packet, offset, high_width);
+ bits = (bits << low_width) |
+ get_bits(packet, offset + high_width, low_width);
+ } else {
+ bits = get_bits(packet, offset, f->width);
+ }
+
+ printf("%s=0x%0*llx", f->name, (f->width + 3) / 4, bits);
+
+ if (f->flags & PACKET_FIELD_DATA_LENGTH)
+ data_length = bits;
+ }
+
+ if (i < pi->field_count - 1)
+ printf(", ");
+ }
+}
+
+static void
+print_packet(uint32_t *data, size_t length)
+{
+ int i;
+
+ printf("%6u ", data[0]);
+
+ if (length == 4) {
+ printf("bus reset");
+ } else if (length < sizeof(struct phy_packet)) {
+ printf("short packet: ");
+ for (i = 1; i < length / 4; i++)
+ printf("%s%08x", i == 0 ? "[" : " ", data[i]);
+ printf("]");
+
+ } else if (length == sizeof(struct phy_packet) && data[1] == ~data[2]) {
+ struct phy_packet *pp = (struct phy_packet *) data;
+
+ /* phy packet are 3 quadlets: the 1 quadlet payload,
+ * the bitwise inverse of the payload and the snoop
+ * mode ack */
+
+ switch (pp->common.identifier) {
+ case PHY_PACKET_CONFIGURATION:
+ if (!pp->phy_config.set_root && !pp->phy_config.set_gap_count) {
+ printf("ext phy config: phy_id=%02x", pp->phy_config.root_id);
+ } else {
+ printf("phy config:");
+ if (pp->phy_config.set_root)
+ printf(" set_root_id=%02x", pp->phy_config.root_id);
+ if (pp->phy_config.set_gap_count)
+ printf(" set_gap_count=%d", pp->phy_config.gap_count);
+ }
+ break;
+
+ case PHY_PACKET_LINK_ON:
+ printf("link-on packet, phy_id=%02x", pp->link_on.phy_id);
+ break;
+
+ case PHY_PACKET_SELF_ID:
+ if (pp->self_id.extended) {
+ printf("extended self id: phy_id=%02x, seq=%d",
+ pp->ext_self_id.phy_id, pp->ext_self_id.sequence);
+ } else {
+ static const char * const speed_names[] = {
+ "S100", "S200", "S400", "BETA"
+ };
+ printf("self id: phy_id=%02x, link %s, gap_count=%d, speed=%s%s%s",
+ pp->self_id.phy_id,
+ (pp->self_id.link_active ? "active" : "not active"),
+ pp->self_id.gap_count,
+ speed_names[pp->self_id.phy_speed],
+ (pp->self_id.contender ? ", irm contender" : ""),
+ (pp->self_id.initiated_reset ? ", initiator" : ""));
+ }
+ break;
+ default:
+ printf("unknown phy packet: ");
+ for (i = 1; i < length / 4; i++)
+ printf("%s%08x", i == 0 ? "[" : " ", data[i]);
+ printf("]");
+ break;
+ }
+ } else {
+ struct link_packet *packet = (struct link_packet *) data;
+
+ decode_link_packet(packet, length, 0,
+ option_verbose ? 0 : PACKET_FIELD_DETAIL);
+ }
+
+ if (option_hex) {
+ printf(" [");
+ dump_data((unsigned char *) data + 4, length - 4);
+ printf("]");
+ }
+
+ printf("\r\n");
+}
+
+#define HIDE_CURSOR "\033[?25l"
+#define SHOW_CURSOR "\033[?25h"
+#define CLEAR "\033[H\033[2J"
+
+static void
+print_stats(uint32_t *data, size_t length)
+{
+ static int bus_reset_count, short_packet_count, phy_packet_count;
+ static int tcode_count[16];
+ static struct timeval last_update;
+ struct timeval now;
+ int i;
+
+ if (length == 0)
+ bus_reset_count++;
+ else if (length < sizeof(struct phy_packet))
+ short_packet_count++;
+ else if (length == sizeof(struct phy_packet) && data[1] == ~data[2])
+ phy_packet_count++;
+ else {
+ struct link_packet *packet = (struct link_packet *) data;
+ tcode_count[packet->common.tcode]++;
+ }
+
+ gettimeofday(&now, NULL);
+ if (now.tv_sec <= last_update.tv_sec &&
+ now.tv_usec < last_update.tv_usec + 500000)
+ return;
+
+ last_update = now;
+ printf(CLEAR HIDE_CURSOR
+ " bus resets : %8d\n"
+ " short packets : %8d\n"
+ " phy packets : %8d\n",
+ bus_reset_count, short_packet_count, phy_packet_count);
+
+ for (i = 0; i < array_length(packet_info); i++)
+ if (packet_info[i].type != PACKET_RESERVED)
+ printf(" %-24s: %8d\n", packet_info[i].name, tcode_count[i]);
+ printf(SHOW_CURSOR "\n");
+}
+
+static struct termios saved_attributes;
+
+static void
+reset_input_mode(void)
+{
+ tcsetattr(STDIN_FILENO, TCSANOW, &saved_attributes);
+}
+
+static void
+set_input_mode(void)
+{
+ struct termios tattr;
+
+ /* Make sure stdin is a terminal. */
+ if (!isatty(STDIN_FILENO)) {
+ fprintf(stderr, "Not a terminal.\n");
+ exit(EXIT_FAILURE);
+ }
+
+ /* Save the terminal attributes so we can restore them later. */
+ tcgetattr(STDIN_FILENO, &saved_attributes);
+ atexit(reset_input_mode);
+
+ /* Set the funny terminal modes. */
+ tcgetattr(STDIN_FILENO, &tattr);
+ tattr.c_lflag &= ~(ICANON|ECHO); /* Clear ICANON and ECHO. */
+ tattr.c_cc[VMIN] = 1;
+ tattr.c_cc[VTIME] = 0;
+ tcsetattr(STDIN_FILENO, TCSAFLUSH, &tattr);
+}
+
+int main(int argc, const char *argv[])
+{
+ uint32_t buf[128 * 1024];
+ uint32_t filter;
+ int length, retval, view;
+ int fd = -1;
+ FILE *output = NULL, *input = NULL;
+ poptContext con;
+ char c;
+ struct pollfd pollfds[2];
+
+ sys_sigint_handler = signal(SIGINT, sigint_handler);
+
+ con = poptGetContext(NULL, argc, argv, options, 0);
+ retval = poptGetNextOpt(con);
+ if (retval < -1) {
+ poptPrintUsage(con, stdout, 0);
+ return -1;
+ }
+
+ if (option_version) {
+ printf("dump tool for nosy sniffer, version %s\n", VERSION);
+ return 0;
+ }
+
+ if (__BYTE_ORDER != __LITTLE_ENDIAN)
+ fprintf(stderr, "warning: nosy has only been tested on little "
+ "endian machines\n");
+
+ if (option_input != NULL) {
+ input = fopen(option_input, "r");
+ if (input == NULL) {
+ fprintf(stderr, "Could not open %s, %m\n", option_input);
+ return -1;
+ }
+ } else {
+ fd = open(option_nosy_device, O_RDWR);
+ if (fd < 0) {
+ fprintf(stderr, "Could not open %s, %m\n", option_nosy_device);
+ return -1;
+ }
+ set_input_mode();
+ }
+
+ if (strcmp(option_view, "transaction") == 0)
+ view = VIEW_TRANSACTION;
+ else if (strcmp(option_view, "stats") == 0)
+ view = VIEW_STATS;
+ else
+ view = VIEW_PACKET;
+
+ if (option_output) {
+ output = fopen(option_output, "w");
+ if (output == NULL) {
+ fprintf(stderr, "Could not open %s, %m\n", option_output);
+ return -1;
+ }
+ }
+
+ setvbuf(stdout, NULL, _IOLBF, BUFSIZ);
+
+ filter = ~0;
+ if (!option_iso)
+ filter &= ~(1 << TCODE_STREAM_DATA);
+ if (!option_cycle_start)
+ filter &= ~(1 << TCODE_CYCLE_START);
+ if (view == VIEW_STATS)
+ filter = ~(1 << TCODE_CYCLE_START);
+
+ ioctl(fd, NOSY_IOC_FILTER, filter);
+
+ ioctl(fd, NOSY_IOC_START);
+
+ pollfds[0].fd = fd;
+ pollfds[0].events = POLLIN;
+ pollfds[1].fd = STDIN_FILENO;
+ pollfds[1].events = POLLIN;
+
+ while (run) {
+ if (input != NULL) {
+ if (fread(&length, sizeof length, 1, input) != 1)
+ return 0;
+ fread(buf, 1, length, input);
+ } else {
+ poll(pollfds, 2, -1);
+ if (pollfds[1].revents) {
+ read(STDIN_FILENO, &c, sizeof c);
+ switch (c) {
+ case 'q':
+ if (output != NULL)
+ fclose(output);
+ return 0;
+ }
+ }
+
+ if (pollfds[0].revents)
+ length = read(fd, buf, sizeof buf);
+ else
+ continue;
+ }
+
+ if (output != NULL) {
+ fwrite(&length, sizeof length, 1, output);
+ fwrite(buf, 1, length, output);
+ }
+
+ switch (view) {
+ case VIEW_TRANSACTION:
+ handle_packet(buf, length);
+ break;
+ case VIEW_PACKET:
+ print_packet(buf, length);
+ break;
+ case VIEW_STATS:
+ print_stats(buf, length);
+ break;
+ }
+ }
+
+ if (output != NULL)
+ fclose(output);
+
+ close(fd);
+
+ poptFreeContext(con);
+
+ return 0;
+}
diff --git a/tools/firewire/nosy-dump.h b/tools/firewire/nosy-dump.h
new file mode 100644
index 00000000000..3a4b5b33ba5
--- /dev/null
+++ b/tools/firewire/nosy-dump.h
@@ -0,0 +1,173 @@
+#ifndef __nosy_dump_h__
+#define __nosy_dump_h__
+
+#define array_length(array) (sizeof(array) / sizeof(array[0]))
+
+#define ACK_NO_ACK 0x0
+#define ACK_DONE(a) ((a >> 2) == 0)
+#define ACK_BUSY(a) ((a >> 2) == 1)
+#define ACK_ERROR(a) ((a >> 2) == 3)
+
+#include <stdint.h>
+
+struct phy_packet {
+ uint32_t timestamp;
+ union {
+ struct {
+ uint32_t zero:24;
+ uint32_t phy_id:6;
+ uint32_t identifier:2;
+ } common, link_on;
+
+ struct {
+ uint32_t zero:16;
+ uint32_t gap_count:6;
+ uint32_t set_gap_count:1;
+ uint32_t set_root:1;
+ uint32_t root_id:6;
+ uint32_t identifier:2;
+ } phy_config;
+
+ struct {
+ uint32_t more_packets:1;
+ uint32_t initiated_reset:1;
+ uint32_t port2:2;
+ uint32_t port1:2;
+ uint32_t port0:2;
+ uint32_t power_class:3;
+ uint32_t contender:1;
+ uint32_t phy_delay:2;
+ uint32_t phy_speed:2;
+ uint32_t gap_count:6;
+ uint32_t link_active:1;
+ uint32_t extended:1;
+ uint32_t phy_id:6;
+ uint32_t identifier:2;
+ } self_id;
+
+ struct {
+ uint32_t more_packets:1;
+ uint32_t reserved1:1;
+ uint32_t porth:2;
+ uint32_t portg:2;
+ uint32_t portf:2;
+ uint32_t porte:2;
+ uint32_t portd:2;
+ uint32_t portc:2;
+ uint32_t portb:2;
+ uint32_t porta:2;
+ uint32_t reserved0:2;
+ uint32_t sequence:3;
+ uint32_t extended:1;
+ uint32_t phy_id:6;
+ uint32_t identifier:2;
+ } ext_self_id;
+ };
+ uint32_t inverted;
+ uint32_t ack;
+};
+
+#define TCODE_PHY_PACKET 0x10
+
+#define PHY_PACKET_CONFIGURATION 0x00
+#define PHY_PACKET_LINK_ON 0x01
+#define PHY_PACKET_SELF_ID 0x02
+
+struct link_packet {
+ uint32_t timestamp;
+ union {
+ struct {
+ uint32_t priority:4;
+ uint32_t tcode:4;
+ uint32_t rt:2;
+ uint32_t tlabel:6;
+ uint32_t destination:16;
+
+ uint32_t offset_high:16;
+ uint32_t source:16;
+
+ uint32_t offset_low;
+ } common;
+
+ struct {
+ uint32_t common[3];
+ uint32_t crc;
+ } read_quadlet;
+
+ struct {
+ uint32_t common[3];
+ uint32_t data;
+ uint32_t crc;
+ } read_quadlet_response;
+
+ struct {
+ uint32_t common[3];
+ uint32_t extended_tcode:16;
+ uint32_t data_length:16;
+ uint32_t crc;
+ } read_block;
+
+ struct {
+ uint32_t common[3];
+ uint32_t extended_tcode:16;
+ uint32_t data_length:16;
+ uint32_t crc;
+ uint32_t data[0];
+ /* crc and ack follows. */
+ } read_block_response;
+
+ struct {
+ uint32_t common[3];
+ uint32_t data;
+ uint32_t crc;
+ } write_quadlet;
+
+ struct {
+ uint32_t common[3];
+ uint32_t extended_tcode:16;
+ uint32_t data_length:16;
+ uint32_t crc;
+ uint32_t data[0];
+ /* crc and ack follows. */
+ } write_block;
+
+ struct {
+ uint32_t common[3];
+ uint32_t crc;
+ } write_response;
+
+ struct {
+ uint32_t common[3];
+ uint32_t data;
+ uint32_t crc;
+ } cycle_start;
+
+ struct {
+ uint32_t sy:4;
+ uint32_t tcode:4;
+ uint32_t channel:6;
+ uint32_t tag:2;
+ uint32_t data_length:16;
+
+ uint32_t crc;
+ } iso_data;
+ };
+};
+
+struct subaction {
+ uint32_t ack;
+ size_t length;
+ struct list link;
+ struct link_packet packet;
+};
+
+struct link_transaction {
+ int request_node, response_node, tlabel;
+ struct subaction *request, *response;
+ struct list request_list, response_list;
+ struct list link;
+};
+
+int decode_fcp(struct link_transaction *t);
+
+#endif /* __nosy_dump_h__ */
diff --git a/tools/hv/Makefile b/tools/hv/Makefile
new file mode 100644
index 00000000000..bd22f786a60
--- /dev/null
+++ b/tools/hv/Makefile
@@ -0,0 +1,13 @@
+# Makefile for Hyper-V tools
+
+CC = $(CROSS_COMPILE)gcc
+PTHREAD_LIBS = -lpthread
+WARNINGS = -Wall -Wextra
+CFLAGS = $(WARNINGS) -g $(PTHREAD_LIBS)
+
+all: hv_kvp_daemon hv_vss_daemon
+%: %.c
+ $(CC) $(CFLAGS) -o $@ $^
+
+clean:
+ $(RM) hv_kvp_daemon hv_vss_daemon
diff --git a/tools/hv/hv_fcopy_daemon.c b/tools/hv/hv_fcopy_daemon.c
new file mode 100644
index 00000000000..fba1c75aa48
--- /dev/null
+++ b/tools/hv/hv_fcopy_daemon.c
@@ -0,0 +1,197 @@
+/*
+ * An implementation of host to guest copy functionality for Linux.
+ *
+ * Copyright (C) 2014, Microsoft, Inc.
+ *
+ * Author : K. Y. Srinivasan <kys@microsoft.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for more
+ * details.
+ */
+
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/poll.h>
+#include <linux/types.h>
+#include <linux/kdev_t.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <ctype.h>
+#include <errno.h>
+#include <linux/hyperv.h>
+#include <syslog.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <dirent.h>
+
+static int target_fd;
+static char target_fname[W_MAX_PATH];
+
+static int hv_start_fcopy(struct hv_start_fcopy *smsg)
+{
+ int error = HV_E_FAIL;
+ char *q, *p;
+
+ /*
+ * If possile append a path seperator to the path.
+ */
+ if (strlen((char *)smsg->path_name) < (W_MAX_PATH - 2))
+ strcat((char *)smsg->path_name, "/");
+
+ p = (char *)smsg->path_name;
+ snprintf(target_fname, sizeof(target_fname), "%s/%s",
+ (char *)smsg->path_name, smsg->file_name);
+
+ syslog(LOG_INFO, "Target file name: %s", target_fname);
+ /*
+ * Check to see if the path is already in place; if not,
+ * create if required.
+ */
+ while ((q = strchr(p, '/')) != NULL) {
+ if (q == p) {
+ p++;
+ continue;
+ }
+ *q = '\0';
+ if (access((char *)smsg->path_name, F_OK)) {
+ if (smsg->copy_flags & CREATE_PATH) {
+ if (mkdir((char *)smsg->path_name, 0755)) {
+ syslog(LOG_ERR, "Failed to create %s",
+ (char *)smsg->path_name);
+ goto done;
+ }
+ } else {
+ syslog(LOG_ERR, "Invalid path: %s",
+ (char *)smsg->path_name);
+ goto done;
+ }
+ }
+ p = q + 1;
+ *q = '/';
+ }
+
+ if (!access(target_fname, F_OK)) {
+ syslog(LOG_INFO, "File: %s exists", target_fname);
+ if (!(smsg->copy_flags & OVER_WRITE)) {
+ error = HV_ERROR_ALREADY_EXISTS;
+ goto done;
+ }
+ }
+
+ target_fd = open(target_fname, O_RDWR | O_CREAT | O_CLOEXEC, 0744);
+ if (target_fd == -1) {
+ syslog(LOG_INFO, "Open Failed: %s", strerror(errno));
+ goto done;
+ }
+
+ error = 0;
+done:
+ return error;
+}
+
+static int hv_copy_data(struct hv_do_fcopy *cpmsg)
+{
+ ssize_t bytes_written;
+
+ bytes_written = pwrite(target_fd, cpmsg->data, cpmsg->size,
+ cpmsg->offset);
+
+ if (bytes_written != cpmsg->size)
+ return HV_E_FAIL;
+
+ return 0;
+}
+
+static int hv_copy_finished(void)
+{
+ close(target_fd);
+ return 0;
+}
+static int hv_copy_cancel(void)
+{
+ close(target_fd);
+ unlink(target_fname);
+ return 0;
+
+}
+
+int main(void)
+{
+ int fd, fcopy_fd, len;
+ int error;
+ int version = FCOPY_CURRENT_VERSION;
+ char *buffer[4096 * 2];
+ struct hv_fcopy_hdr *in_msg;
+
+ if (daemon(1, 0)) {
+ syslog(LOG_ERR, "daemon() failed; error: %s", strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+
+ openlog("HV_FCOPY", 0, LOG_USER);
+ syslog(LOG_INFO, "HV_FCOPY starting; pid is:%d", getpid());
+
+ fcopy_fd = open("/dev/vmbus/hv_fcopy", O_RDWR);
+
+ if (fcopy_fd < 0) {
+ syslog(LOG_ERR, "open /dev/vmbus/hv_fcopy failed; error: %d %s",
+ errno, strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+
+ /*
+ * Register with the kernel.
+ */
+ if ((write(fcopy_fd, &version, sizeof(int))) != sizeof(int)) {
+ syslog(LOG_ERR, "Registration failed: %s", strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+
+ while (1) {
+ /*
+ * In this loop we process fcopy messages after the
+ * handshake is complete.
+ */
+ len = pread(fcopy_fd, buffer, (4096 * 2), 0);
+ if (len < 0) {
+ syslog(LOG_ERR, "pread failed: %s", strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+ in_msg = (struct hv_fcopy_hdr *)buffer;
+
+ switch (in_msg->operation) {
+ case START_FILE_COPY:
+ error = hv_start_fcopy((struct hv_start_fcopy *)in_msg);
+ break;
+ case WRITE_TO_FILE:
+ error = hv_copy_data((struct hv_do_fcopy *)in_msg);
+ break;
+ case COMPLETE_FCOPY:
+ error = hv_copy_finished();
+ break;
+ case CANCEL_FCOPY:
+ error = hv_copy_cancel();
+ break;
+
+ default:
+ syslog(LOG_ERR, "Unknown operation: %d",
+ in_msg->operation);
+
+ }
+
+ if (pwrite(fcopy_fd, &error, sizeof(int), 0) != sizeof(int)) {
+ syslog(LOG_ERR, "pwrite failed: %s", strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+ }
+}
diff --git a/tools/hv/hv_get_dhcp_info.sh b/tools/hv/hv_get_dhcp_info.sh
new file mode 100755
index 00000000000..ccd3e953276
--- /dev/null
+++ b/tools/hv/hv_get_dhcp_info.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+
+# This example script retrieves the DHCP state of a given interface.
+# In the interest of keeping the KVP daemon code free of distro specific
+# information; the kvp daemon code invokes this external script to gather
+# DHCP setting for the specific interface.
+#
+# Input: Name of the interface
+#
+# Output: The script prints the string "Enabled" to stdout to indicate
+# that DHCP is enabled on the interface. If DHCP is not enabled,
+# the script prints the string "Disabled" to stdout.
+#
+# Each Distro is expected to implement this script in a distro specific
+# fashion. For instance on Distros that ship with Network Manager enabled,
+# this script can be based on the Network Manager APIs for retrieving DHCP
+# information.
+
+if_file="/etc/sysconfig/network-scripts/ifcfg-"$1
+
+dhcp=$(grep "dhcp" $if_file 2>/dev/null)
+
+if [ "$dhcp" != "" ];
+then
+echo "Enabled"
+else
+echo "Disabled"
+fi
diff --git a/tools/hv/hv_get_dns_info.sh b/tools/hv/hv_get_dns_info.sh
new file mode 100755
index 00000000000..058c17b46ff
--- /dev/null
+++ b/tools/hv/hv_get_dns_info.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+# This example script parses /etc/resolv.conf to retrive DNS information.
+# In the interest of keeping the KVP daemon code free of distro specific
+# information; the kvp daemon code invokes this external script to gather
+# DNS information.
+# This script is expected to print the nameserver values to stdout.
+# Each Distro is expected to implement this script in a distro specific
+# fashion. For instance on Distros that ship with Network Manager enabled,
+# this script can be based on the Network Manager APIs for retrieving DNS
+# entries.
+
+cat /etc/resolv.conf 2>/dev/null | awk '/^nameserver/ { print $2 }'
diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c
new file mode 100644
index 00000000000..4088b816a3e
--- /dev/null
+++ b/tools/hv/hv_kvp_daemon.c
@@ -0,0 +1,1742 @@
+/*
+ * An implementation of key value pair (KVP) functionality for Linux.
+ *
+ *
+ * Copyright (C) 2010, Novell, Inc.
+ * Author : K. Y. Srinivasan <ksrinivasan@novell.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/poll.h>
+#include <sys/utsname.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <ctype.h>
+#include <errno.h>
+#include <arpa/inet.h>
+#include <linux/connector.h>
+#include <linux/hyperv.h>
+#include <linux/netlink.h>
+#include <ifaddrs.h>
+#include <netdb.h>
+#include <syslog.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <dirent.h>
+#include <net/if.h>
+
+/*
+ * KVP protocol: The user mode component first registers with the
+ * the kernel component. Subsequently, the kernel component requests, data
+ * for the specified keys. In response to this message the user mode component
+ * fills in the value corresponding to the specified key. We overload the
+ * sequence field in the cn_msg header to define our KVP message types.
+ *
+ * We use this infrastructure for also supporting queries from user mode
+ * application for state that may be maintained in the KVP kernel component.
+ *
+ */
+
+
+enum key_index {
+ FullyQualifiedDomainName = 0,
+ IntegrationServicesVersion, /*This key is serviced in the kernel*/
+ NetworkAddressIPv4,
+ NetworkAddressIPv6,
+ OSBuildNumber,
+ OSName,
+ OSMajorVersion,
+ OSMinorVersion,
+ OSVersion,
+ ProcessorArchitecture
+};
+
+
+enum {
+ IPADDR = 0,
+ NETMASK,
+ GATEWAY,
+ DNS
+};
+
+static struct sockaddr_nl addr;
+static int in_hand_shake = 1;
+
+static char *os_name = "";
+static char *os_major = "";
+static char *os_minor = "";
+static char *processor_arch;
+static char *os_build;
+static char *os_version;
+static char *lic_version = "Unknown version";
+static char full_domain_name[HV_KVP_EXCHANGE_MAX_VALUE_SIZE];
+static struct utsname uts_buf;
+
+/*
+ * The location of the interface configuration file.
+ */
+
+#define KVP_CONFIG_LOC "/var/lib/hyperv"
+
+#define MAX_FILE_NAME 100
+#define ENTRIES_PER_BLOCK 50
+
+#ifndef SOL_NETLINK
+#define SOL_NETLINK 270
+#endif
+
+struct kvp_record {
+ char key[HV_KVP_EXCHANGE_MAX_KEY_SIZE];
+ char value[HV_KVP_EXCHANGE_MAX_VALUE_SIZE];
+};
+
+struct kvp_file_state {
+ int fd;
+ int num_blocks;
+ struct kvp_record *records;
+ int num_records;
+ char fname[MAX_FILE_NAME];
+};
+
+static struct kvp_file_state kvp_file_info[KVP_POOL_COUNT];
+
+static void kvp_acquire_lock(int pool)
+{
+ struct flock fl = {F_WRLCK, SEEK_SET, 0, 0, 0};
+ fl.l_pid = getpid();
+
+ if (fcntl(kvp_file_info[pool].fd, F_SETLKW, &fl) == -1) {
+ syslog(LOG_ERR, "Failed to acquire the lock pool: %d; error: %d %s", pool,
+ errno, strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+}
+
+static void kvp_release_lock(int pool)
+{
+ struct flock fl = {F_UNLCK, SEEK_SET, 0, 0, 0};
+ fl.l_pid = getpid();
+
+ if (fcntl(kvp_file_info[pool].fd, F_SETLK, &fl) == -1) {
+ syslog(LOG_ERR, "Failed to release the lock pool: %d; error: %d %s", pool,
+ errno, strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+}
+
+static void kvp_update_file(int pool)
+{
+ FILE *filep;
+ size_t bytes_written;
+
+ /*
+ * We are going to write our in-memory registry out to
+ * disk; acquire the lock first.
+ */
+ kvp_acquire_lock(pool);
+
+ filep = fopen(kvp_file_info[pool].fname, "we");
+ if (!filep) {
+ syslog(LOG_ERR, "Failed to open file, pool: %d; error: %d %s", pool,
+ errno, strerror(errno));
+ kvp_release_lock(pool);
+ exit(EXIT_FAILURE);
+ }
+
+ bytes_written = fwrite(kvp_file_info[pool].records,
+ sizeof(struct kvp_record),
+ kvp_file_info[pool].num_records, filep);
+
+ if (ferror(filep) || fclose(filep)) {
+ kvp_release_lock(pool);
+ syslog(LOG_ERR, "Failed to write file, pool: %d", pool);
+ exit(EXIT_FAILURE);
+ }
+
+ kvp_release_lock(pool);
+}
+
+static void kvp_update_mem_state(int pool)
+{
+ FILE *filep;
+ size_t records_read = 0;
+ struct kvp_record *record = kvp_file_info[pool].records;
+ struct kvp_record *readp;
+ int num_blocks = kvp_file_info[pool].num_blocks;
+ int alloc_unit = sizeof(struct kvp_record) * ENTRIES_PER_BLOCK;
+
+ kvp_acquire_lock(pool);
+
+ filep = fopen(kvp_file_info[pool].fname, "re");
+ if (!filep) {
+ syslog(LOG_ERR, "Failed to open file, pool: %d; error: %d %s", pool,
+ errno, strerror(errno));
+ kvp_release_lock(pool);
+ exit(EXIT_FAILURE);
+ }
+ for (;;) {
+ readp = &record[records_read];
+ records_read += fread(readp, sizeof(struct kvp_record),
+ ENTRIES_PER_BLOCK * num_blocks,
+ filep);
+
+ if (ferror(filep)) {
+ syslog(LOG_ERR, "Failed to read file, pool: %d", pool);
+ exit(EXIT_FAILURE);
+ }
+
+ if (!feof(filep)) {
+ /*
+ * We have more data to read.
+ */
+ num_blocks++;
+ record = realloc(record, alloc_unit * num_blocks);
+
+ if (record == NULL) {
+ syslog(LOG_ERR, "malloc failed");
+ exit(EXIT_FAILURE);
+ }
+ continue;
+ }
+ break;
+ }
+
+ kvp_file_info[pool].num_blocks = num_blocks;
+ kvp_file_info[pool].records = record;
+ kvp_file_info[pool].num_records = records_read;
+
+ fclose(filep);
+ kvp_release_lock(pool);
+}
+static int kvp_file_init(void)
+{
+ int fd;
+ FILE *filep;
+ size_t records_read;
+ char *fname;
+ struct kvp_record *record;
+ struct kvp_record *readp;
+ int num_blocks;
+ int i;
+ int alloc_unit = sizeof(struct kvp_record) * ENTRIES_PER_BLOCK;
+
+ if (access(KVP_CONFIG_LOC, F_OK)) {
+ if (mkdir(KVP_CONFIG_LOC, 0755 /* rwxr-xr-x */)) {
+ syslog(LOG_ERR, "Failed to create '%s'; error: %d %s", KVP_CONFIG_LOC,
+ errno, strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+ }
+
+ for (i = 0; i < KVP_POOL_COUNT; i++) {
+ fname = kvp_file_info[i].fname;
+ records_read = 0;
+ num_blocks = 1;
+ sprintf(fname, "%s/.kvp_pool_%d", KVP_CONFIG_LOC, i);
+ fd = open(fname, O_RDWR | O_CREAT | O_CLOEXEC, 0644 /* rw-r--r-- */);
+
+ if (fd == -1)
+ return 1;
+
+
+ filep = fopen(fname, "re");
+ if (!filep) {
+ close(fd);
+ return 1;
+ }
+
+ record = malloc(alloc_unit * num_blocks);
+ if (record == NULL) {
+ fclose(filep);
+ close(fd);
+ return 1;
+ }
+ for (;;) {
+ readp = &record[records_read];
+ records_read += fread(readp, sizeof(struct kvp_record),
+ ENTRIES_PER_BLOCK,
+ filep);
+
+ if (ferror(filep)) {
+ syslog(LOG_ERR, "Failed to read file, pool: %d",
+ i);
+ exit(EXIT_FAILURE);
+ }
+
+ if (!feof(filep)) {
+ /*
+ * We have more data to read.
+ */
+ num_blocks++;
+ record = realloc(record, alloc_unit *
+ num_blocks);
+ if (record == NULL) {
+ fclose(filep);
+ close(fd);
+ return 1;
+ }
+ continue;
+ }
+ break;
+ }
+ kvp_file_info[i].fd = fd;
+ kvp_file_info[i].num_blocks = num_blocks;
+ kvp_file_info[i].records = record;
+ kvp_file_info[i].num_records = records_read;
+ fclose(filep);
+
+ }
+
+ return 0;
+}
+
+static int kvp_key_delete(int pool, const char *key, int key_size)
+{
+ int i;
+ int j, k;
+ int num_records;
+ struct kvp_record *record;
+
+ /*
+ * First update the in-memory state.
+ */
+ kvp_update_mem_state(pool);
+
+ num_records = kvp_file_info[pool].num_records;
+ record = kvp_file_info[pool].records;
+
+ for (i = 0; i < num_records; i++) {
+ if (memcmp(key, record[i].key, key_size))
+ continue;
+ /*
+ * Found a match; just move the remaining
+ * entries up.
+ */
+ if (i == num_records) {
+ kvp_file_info[pool].num_records--;
+ kvp_update_file(pool);
+ return 0;
+ }
+
+ j = i;
+ k = j + 1;
+ for (; k < num_records; k++) {
+ strcpy(record[j].key, record[k].key);
+ strcpy(record[j].value, record[k].value);
+ j++;
+ }
+
+ kvp_file_info[pool].num_records--;
+ kvp_update_file(pool);
+ return 0;
+ }
+ return 1;
+}
+
+static int kvp_key_add_or_modify(int pool, const char *key, int key_size, const char *value,
+ int value_size)
+{
+ int i;
+ int num_records;
+ struct kvp_record *record;
+ int num_blocks;
+
+ if ((key_size > HV_KVP_EXCHANGE_MAX_KEY_SIZE) ||
+ (value_size > HV_KVP_EXCHANGE_MAX_VALUE_SIZE))
+ return 1;
+
+ /*
+ * First update the in-memory state.
+ */
+ kvp_update_mem_state(pool);
+
+ num_records = kvp_file_info[pool].num_records;
+ record = kvp_file_info[pool].records;
+ num_blocks = kvp_file_info[pool].num_blocks;
+
+ for (i = 0; i < num_records; i++) {
+ if (memcmp(key, record[i].key, key_size))
+ continue;
+ /*
+ * Found a match; just update the value -
+ * this is the modify case.
+ */
+ memcpy(record[i].value, value, value_size);
+ kvp_update_file(pool);
+ return 0;
+ }
+
+ /*
+ * Need to add a new entry;
+ */
+ if (num_records == (ENTRIES_PER_BLOCK * num_blocks)) {
+ /* Need to allocate a larger array for reg entries. */
+ record = realloc(record, sizeof(struct kvp_record) *
+ ENTRIES_PER_BLOCK * (num_blocks + 1));
+
+ if (record == NULL)
+ return 1;
+ kvp_file_info[pool].num_blocks++;
+
+ }
+ memcpy(record[i].value, value, value_size);
+ memcpy(record[i].key, key, key_size);
+ kvp_file_info[pool].records = record;
+ kvp_file_info[pool].num_records++;
+ kvp_update_file(pool);
+ return 0;
+}
+
+static int kvp_get_value(int pool, const char *key, int key_size, char *value,
+ int value_size)
+{
+ int i;
+ int num_records;
+ struct kvp_record *record;
+
+ if ((key_size > HV_KVP_EXCHANGE_MAX_KEY_SIZE) ||
+ (value_size > HV_KVP_EXCHANGE_MAX_VALUE_SIZE))
+ return 1;
+
+ /*
+ * First update the in-memory state.
+ */
+ kvp_update_mem_state(pool);
+
+ num_records = kvp_file_info[pool].num_records;
+ record = kvp_file_info[pool].records;
+
+ for (i = 0; i < num_records; i++) {
+ if (memcmp(key, record[i].key, key_size))
+ continue;
+ /*
+ * Found a match; just copy the value out.
+ */
+ memcpy(value, record[i].value, value_size);
+ return 0;
+ }
+
+ return 1;
+}
+
+static int kvp_pool_enumerate(int pool, int index, char *key, int key_size,
+ char *value, int value_size)
+{
+ struct kvp_record *record;
+
+ /*
+ * First update our in-memory database.
+ */
+ kvp_update_mem_state(pool);
+ record = kvp_file_info[pool].records;
+
+ if (index >= kvp_file_info[pool].num_records) {
+ return 1;
+ }
+
+ memcpy(key, record[index].key, key_size);
+ memcpy(value, record[index].value, value_size);
+ return 0;
+}
+
+
+void kvp_get_os_info(void)
+{
+ FILE *file;
+ char *p, buf[512];
+
+ uname(&uts_buf);
+ os_version = uts_buf.release;
+ os_build = strdup(uts_buf.release);
+
+ os_name = uts_buf.sysname;
+ processor_arch = uts_buf.machine;
+
+ /*
+ * The current windows host (win7) expects the build
+ * string to be of the form: x.y.z
+ * Strip additional information we may have.
+ */
+ p = strchr(os_version, '-');
+ if (p)
+ *p = '\0';
+
+ /*
+ * Parse the /etc/os-release file if present:
+ * http://www.freedesktop.org/software/systemd/man/os-release.html
+ */
+ file = fopen("/etc/os-release", "r");
+ if (file != NULL) {
+ while (fgets(buf, sizeof(buf), file)) {
+ char *value, *q;
+
+ /* Ignore comments */
+ if (buf[0] == '#')
+ continue;
+
+ /* Split into name=value */
+ p = strchr(buf, '=');
+ if (!p)
+ continue;
+ *p++ = 0;
+
+ /* Remove quotes and newline; un-escape */
+ value = p;
+ q = p;
+ while (*p) {
+ if (*p == '\\') {
+ ++p;
+ if (!*p)
+ break;
+ *q++ = *p++;
+ } else if (*p == '\'' || *p == '"' ||
+ *p == '\n') {
+ ++p;
+ } else {
+ *q++ = *p++;
+ }
+ }
+ *q = 0;
+
+ if (!strcmp(buf, "NAME")) {
+ p = strdup(value);
+ if (!p)
+ break;
+ os_name = p;
+ } else if (!strcmp(buf, "VERSION_ID")) {
+ p = strdup(value);
+ if (!p)
+ break;
+ os_major = p;
+ }
+ }
+ fclose(file);
+ return;
+ }
+
+ /* Fallback for older RH/SUSE releases */
+ file = fopen("/etc/SuSE-release", "r");
+ if (file != NULL)
+ goto kvp_osinfo_found;
+ file = fopen("/etc/redhat-release", "r");
+ if (file != NULL)
+ goto kvp_osinfo_found;
+
+ /*
+ * We don't have information about the os.
+ */
+ return;
+
+kvp_osinfo_found:
+ /* up to three lines */
+ p = fgets(buf, sizeof(buf), file);
+ if (p) {
+ p = strchr(buf, '\n');
+ if (p)
+ *p = '\0';
+ p = strdup(buf);
+ if (!p)
+ goto done;
+ os_name = p;
+
+ /* second line */
+ p = fgets(buf, sizeof(buf), file);
+ if (p) {
+ p = strchr(buf, '\n');
+ if (p)
+ *p = '\0';
+ p = strdup(buf);
+ if (!p)
+ goto done;
+ os_major = p;
+
+ /* third line */
+ p = fgets(buf, sizeof(buf), file);
+ if (p) {
+ p = strchr(buf, '\n');
+ if (p)
+ *p = '\0';
+ p = strdup(buf);
+ if (p)
+ os_minor = p;
+ }
+ }
+ }
+
+done:
+ fclose(file);
+ return;
+}
+
+
+
+/*
+ * Retrieve an interface name corresponding to the specified guid.
+ * If there is a match, the function returns a pointer
+ * to the interface name and if not, a NULL is returned.
+ * If a match is found, the caller is responsible for
+ * freeing the memory.
+ */
+
+static char *kvp_get_if_name(char *guid)
+{
+ DIR *dir;
+ struct dirent *entry;
+ FILE *file;
+ char *p, *q, *x;
+ char *if_name = NULL;
+ char buf[256];
+ char *kvp_net_dir = "/sys/class/net/";
+ char dev_id[256];
+
+ dir = opendir(kvp_net_dir);
+ if (dir == NULL)
+ return NULL;
+
+ snprintf(dev_id, sizeof(dev_id), "%s", kvp_net_dir);
+ q = dev_id + strlen(kvp_net_dir);
+
+ while ((entry = readdir(dir)) != NULL) {
+ /*
+ * Set the state for the next pass.
+ */
+ *q = '\0';
+ strcat(dev_id, entry->d_name);
+ strcat(dev_id, "/device/device_id");
+
+ file = fopen(dev_id, "r");
+ if (file == NULL)
+ continue;
+
+ p = fgets(buf, sizeof(buf), file);
+ if (p) {
+ x = strchr(p, '\n');
+ if (x)
+ *x = '\0';
+
+ if (!strcmp(p, guid)) {
+ /*
+ * Found the guid match; return the interface
+ * name. The caller will free the memory.
+ */
+ if_name = strdup(entry->d_name);
+ fclose(file);
+ break;
+ }
+ }
+ fclose(file);
+ }
+
+ closedir(dir);
+ return if_name;
+}
+
+/*
+ * Retrieve the MAC address given the interface name.
+ */
+
+static char *kvp_if_name_to_mac(char *if_name)
+{
+ FILE *file;
+ char *p, *x;
+ char buf[256];
+ char addr_file[256];
+ int i;
+ char *mac_addr = NULL;
+
+ snprintf(addr_file, sizeof(addr_file), "%s%s%s", "/sys/class/net/",
+ if_name, "/address");
+
+ file = fopen(addr_file, "r");
+ if (file == NULL)
+ return NULL;
+
+ p = fgets(buf, sizeof(buf), file);
+ if (p) {
+ x = strchr(p, '\n');
+ if (x)
+ *x = '\0';
+ for (i = 0; i < strlen(p); i++)
+ p[i] = toupper(p[i]);
+ mac_addr = strdup(p);
+ }
+
+ fclose(file);
+ return mac_addr;
+}
+
+
+/*
+ * Retrieve the interface name given tha MAC address.
+ */
+
+static char *kvp_mac_to_if_name(char *mac)
+{
+ DIR *dir;
+ struct dirent *entry;
+ FILE *file;
+ char *p, *q, *x;
+ char *if_name = NULL;
+ char buf[256];
+ char *kvp_net_dir = "/sys/class/net/";
+ char dev_id[256];
+ int i;
+
+ dir = opendir(kvp_net_dir);
+ if (dir == NULL)
+ return NULL;
+
+ snprintf(dev_id, sizeof(dev_id), kvp_net_dir);
+ q = dev_id + strlen(kvp_net_dir);
+
+ while ((entry = readdir(dir)) != NULL) {
+ /*
+ * Set the state for the next pass.
+ */
+ *q = '\0';
+
+ strcat(dev_id, entry->d_name);
+ strcat(dev_id, "/address");
+
+ file = fopen(dev_id, "r");
+ if (file == NULL)
+ continue;
+
+ p = fgets(buf, sizeof(buf), file);
+ if (p) {
+ x = strchr(p, '\n');
+ if (x)
+ *x = '\0';
+
+ for (i = 0; i < strlen(p); i++)
+ p[i] = toupper(p[i]);
+
+ if (!strcmp(p, mac)) {
+ /*
+ * Found the MAC match; return the interface
+ * name. The caller will free the memory.
+ */
+ if_name = strdup(entry->d_name);
+ fclose(file);
+ break;
+ }
+ }
+ fclose(file);
+ }
+
+ closedir(dir);
+ return if_name;
+}
+
+
+static void kvp_process_ipconfig_file(char *cmd,
+ char *config_buf, int len,
+ int element_size, int offset)
+{
+ char buf[256];
+ char *p;
+ char *x;
+ FILE *file;
+
+ /*
+ * First execute the command.
+ */
+ file = popen(cmd, "r");
+ if (file == NULL)
+ return;
+
+ if (offset == 0)
+ memset(config_buf, 0, len);
+ while ((p = fgets(buf, sizeof(buf), file)) != NULL) {
+ if ((len - strlen(config_buf)) < (element_size + 1))
+ break;
+
+ x = strchr(p, '\n');
+ if (x)
+ *x = '\0';
+
+ strcat(config_buf, p);
+ strcat(config_buf, ";");
+ }
+ pclose(file);
+}
+
+static void kvp_get_ipconfig_info(char *if_name,
+ struct hv_kvp_ipaddr_value *buffer)
+{
+ char cmd[512];
+ char dhcp_info[128];
+ char *p;
+ FILE *file;
+
+ /*
+ * Get the address of default gateway (ipv4).
+ */
+ sprintf(cmd, "%s %s", "ip route show dev", if_name);
+ strcat(cmd, " | awk '/default/ {print $3 }'");
+
+ /*
+ * Execute the command to gather gateway info.
+ */
+ kvp_process_ipconfig_file(cmd, (char *)buffer->gate_way,
+ (MAX_GATEWAY_SIZE * 2), INET_ADDRSTRLEN, 0);
+
+ /*
+ * Get the address of default gateway (ipv6).
+ */
+ sprintf(cmd, "%s %s", "ip -f inet6 route show dev", if_name);
+ strcat(cmd, " | awk '/default/ {print $3 }'");
+
+ /*
+ * Execute the command to gather gateway info (ipv6).
+ */
+ kvp_process_ipconfig_file(cmd, (char *)buffer->gate_way,
+ (MAX_GATEWAY_SIZE * 2), INET6_ADDRSTRLEN, 1);
+
+
+ /*
+ * Gather the DNS state.
+ * Since there is no standard way to get this information
+ * across various distributions of interest; we just invoke
+ * an external script that needs to be ported across distros
+ * of interest.
+ *
+ * Following is the expected format of the information from the script:
+ *
+ * ipaddr1 (nameserver1)
+ * ipaddr2 (nameserver2)
+ * .
+ * .
+ */
+
+ sprintf(cmd, "%s", "hv_get_dns_info");
+
+ /*
+ * Execute the command to gather DNS info.
+ */
+ kvp_process_ipconfig_file(cmd, (char *)buffer->dns_addr,
+ (MAX_IP_ADDR_SIZE * 2), INET_ADDRSTRLEN, 0);
+
+ /*
+ * Gather the DHCP state.
+ * We will gather this state by invoking an external script.
+ * The parameter to the script is the interface name.
+ * Here is the expected output:
+ *
+ * Enabled: DHCP enabled.
+ */
+
+ sprintf(cmd, "%s %s", "hv_get_dhcp_info", if_name);
+
+ file = popen(cmd, "r");
+ if (file == NULL)
+ return;
+
+ p = fgets(dhcp_info, sizeof(dhcp_info), file);
+ if (p == NULL) {
+ pclose(file);
+ return;
+ }
+
+ if (!strncmp(p, "Enabled", 7))
+ buffer->dhcp_enabled = 1;
+ else
+ buffer->dhcp_enabled = 0;
+
+ pclose(file);
+}
+
+
+static unsigned int hweight32(unsigned int *w)
+{
+ unsigned int res = *w - ((*w >> 1) & 0x55555555);
+ res = (res & 0x33333333) + ((res >> 2) & 0x33333333);
+ res = (res + (res >> 4)) & 0x0F0F0F0F;
+ res = res + (res >> 8);
+ return (res + (res >> 16)) & 0x000000FF;
+}
+
+static int kvp_process_ip_address(void *addrp,
+ int family, char *buffer,
+ int length, int *offset)
+{
+ struct sockaddr_in *addr;
+ struct sockaddr_in6 *addr6;
+ int addr_length;
+ char tmp[50];
+ const char *str;
+
+ if (family == AF_INET) {
+ addr = (struct sockaddr_in *)addrp;
+ str = inet_ntop(family, &addr->sin_addr, tmp, 50);
+ addr_length = INET_ADDRSTRLEN;
+ } else {
+ addr6 = (struct sockaddr_in6 *)addrp;
+ str = inet_ntop(family, &addr6->sin6_addr.s6_addr, tmp, 50);
+ addr_length = INET6_ADDRSTRLEN;
+ }
+
+ if ((length - *offset) < addr_length + 2)
+ return HV_E_FAIL;
+ if (str == NULL) {
+ strcpy(buffer, "inet_ntop failed\n");
+ return HV_E_FAIL;
+ }
+ if (*offset == 0)
+ strcpy(buffer, tmp);
+ else {
+ strcat(buffer, ";");
+ strcat(buffer, tmp);
+ }
+
+ *offset += strlen(str) + 1;
+
+ return 0;
+}
+
+static int
+kvp_get_ip_info(int family, char *if_name, int op,
+ void *out_buffer, int length)
+{
+ struct ifaddrs *ifap;
+ struct ifaddrs *curp;
+ int offset = 0;
+ int sn_offset = 0;
+ int error = 0;
+ char *buffer;
+ struct hv_kvp_ipaddr_value *ip_buffer;
+ char cidr_mask[5]; /* /xyz */
+ int weight;
+ int i;
+ unsigned int *w;
+ char *sn_str;
+ struct sockaddr_in6 *addr6;
+
+ if (op == KVP_OP_ENUMERATE) {
+ buffer = out_buffer;
+ } else {
+ ip_buffer = out_buffer;
+ buffer = (char *)ip_buffer->ip_addr;
+ ip_buffer->addr_family = 0;
+ }
+ /*
+ * On entry into this function, the buffer is capable of holding the
+ * maximum key value.
+ */
+
+ if (getifaddrs(&ifap)) {
+ strcpy(buffer, "getifaddrs failed\n");
+ return HV_E_FAIL;
+ }
+
+ curp = ifap;
+ while (curp != NULL) {
+ if (curp->ifa_addr == NULL) {
+ curp = curp->ifa_next;
+ continue;
+ }
+
+ if ((if_name != NULL) &&
+ (strncmp(curp->ifa_name, if_name, strlen(if_name)))) {
+ /*
+ * We want info about a specific interface;
+ * just continue.
+ */
+ curp = curp->ifa_next;
+ continue;
+ }
+
+ /*
+ * We only support two address families: AF_INET and AF_INET6.
+ * If a family value of 0 is specified, we collect both
+ * supported address families; if not we gather info on
+ * the specified address family.
+ */
+ if ((((family != 0) &&
+ (curp->ifa_addr->sa_family != family))) ||
+ (curp->ifa_flags & IFF_LOOPBACK)) {
+ curp = curp->ifa_next;
+ continue;
+ }
+ if ((curp->ifa_addr->sa_family != AF_INET) &&
+ (curp->ifa_addr->sa_family != AF_INET6)) {
+ curp = curp->ifa_next;
+ continue;
+ }
+
+ if (op == KVP_OP_GET_IP_INFO) {
+ /*
+ * Gather info other than the IP address.
+ * IP address info will be gathered later.
+ */
+ if (curp->ifa_addr->sa_family == AF_INET) {
+ ip_buffer->addr_family |= ADDR_FAMILY_IPV4;
+ /*
+ * Get subnet info.
+ */
+ error = kvp_process_ip_address(
+ curp->ifa_netmask,
+ AF_INET,
+ (char *)
+ ip_buffer->sub_net,
+ length,
+ &sn_offset);
+ if (error)
+ goto gather_ipaddr;
+ } else {
+ ip_buffer->addr_family |= ADDR_FAMILY_IPV6;
+
+ /*
+ * Get subnet info in CIDR format.
+ */
+ weight = 0;
+ sn_str = (char *)ip_buffer->sub_net;
+ addr6 = (struct sockaddr_in6 *)
+ curp->ifa_netmask;
+ w = addr6->sin6_addr.s6_addr32;
+
+ for (i = 0; i < 4; i++)
+ weight += hweight32(&w[i]);
+
+ sprintf(cidr_mask, "/%d", weight);
+ if ((length - sn_offset) <
+ (strlen(cidr_mask) + 1))
+ goto gather_ipaddr;
+
+ if (sn_offset == 0)
+ strcpy(sn_str, cidr_mask);
+ else {
+ strcat((char *)ip_buffer->sub_net, ";");
+ strcat(sn_str, cidr_mask);
+ }
+ sn_offset += strlen(sn_str) + 1;
+ }
+
+ /*
+ * Collect other ip related configuration info.
+ */
+
+ kvp_get_ipconfig_info(if_name, ip_buffer);
+ }
+
+gather_ipaddr:
+ error = kvp_process_ip_address(curp->ifa_addr,
+ curp->ifa_addr->sa_family,
+ buffer,
+ length, &offset);
+ if (error)
+ goto getaddr_done;
+
+ curp = curp->ifa_next;
+ }
+
+getaddr_done:
+ freeifaddrs(ifap);
+ return error;
+}
+
+
+static int expand_ipv6(char *addr, int type)
+{
+ int ret;
+ struct in6_addr v6_addr;
+
+ ret = inet_pton(AF_INET6, addr, &v6_addr);
+
+ if (ret != 1) {
+ if (type == NETMASK)
+ return 1;
+ return 0;
+ }
+
+ sprintf(addr, "%02x%02x:%02x%02x:%02x%02x:%02x%02x:%02x%02x:"
+ "%02x%02x:%02x%02x:%02x%02x",
+ (int)v6_addr.s6_addr[0], (int)v6_addr.s6_addr[1],
+ (int)v6_addr.s6_addr[2], (int)v6_addr.s6_addr[3],
+ (int)v6_addr.s6_addr[4], (int)v6_addr.s6_addr[5],
+ (int)v6_addr.s6_addr[6], (int)v6_addr.s6_addr[7],
+ (int)v6_addr.s6_addr[8], (int)v6_addr.s6_addr[9],
+ (int)v6_addr.s6_addr[10], (int)v6_addr.s6_addr[11],
+ (int)v6_addr.s6_addr[12], (int)v6_addr.s6_addr[13],
+ (int)v6_addr.s6_addr[14], (int)v6_addr.s6_addr[15]);
+
+ return 1;
+
+}
+
+static int is_ipv4(char *addr)
+{
+ int ret;
+ struct in_addr ipv4_addr;
+
+ ret = inet_pton(AF_INET, addr, &ipv4_addr);
+
+ if (ret == 1)
+ return 1;
+ return 0;
+}
+
+static int parse_ip_val_buffer(char *in_buf, int *offset,
+ char *out_buf, int out_len)
+{
+ char *x;
+ char *start;
+
+ /*
+ * in_buf has sequence of characters that are seperated by
+ * the character ';'. The last sequence does not have the
+ * terminating ";" character.
+ */
+ start = in_buf + *offset;
+
+ x = strchr(start, ';');
+ if (x)
+ *x = 0;
+ else
+ x = start + strlen(start);
+
+ if (strlen(start) != 0) {
+ int i = 0;
+ /*
+ * Get rid of leading spaces.
+ */
+ while (start[i] == ' ')
+ i++;
+
+ if ((x - start) <= out_len) {
+ strcpy(out_buf, (start + i));
+ *offset += (x - start) + 1;
+ return 1;
+ }
+ }
+ return 0;
+}
+
+static int kvp_write_file(FILE *f, char *s1, char *s2, char *s3)
+{
+ int ret;
+
+ ret = fprintf(f, "%s%s%s%s\n", s1, s2, "=", s3);
+
+ if (ret < 0)
+ return HV_E_FAIL;
+
+ return 0;
+}
+
+
+static int process_ip_string(FILE *f, char *ip_string, int type)
+{
+ int error = 0;
+ char addr[INET6_ADDRSTRLEN];
+ int i = 0;
+ int j = 0;
+ char str[256];
+ char sub_str[10];
+ int offset = 0;
+
+ memset(addr, 0, sizeof(addr));
+
+ while (parse_ip_val_buffer(ip_string, &offset, addr,
+ (MAX_IP_ADDR_SIZE * 2))) {
+
+ sub_str[0] = 0;
+ if (is_ipv4(addr)) {
+ switch (type) {
+ case IPADDR:
+ snprintf(str, sizeof(str), "%s", "IPADDR");
+ break;
+ case NETMASK:
+ snprintf(str, sizeof(str), "%s", "NETMASK");
+ break;
+ case GATEWAY:
+ snprintf(str, sizeof(str), "%s", "GATEWAY");
+ break;
+ case DNS:
+ snprintf(str, sizeof(str), "%s", "DNS");
+ break;
+ }
+
+ if (type == DNS) {
+ snprintf(sub_str, sizeof(sub_str), "%d", ++i);
+ } else if (type == GATEWAY && i == 0) {
+ ++i;
+ } else {
+ snprintf(sub_str, sizeof(sub_str), "%d", i++);
+ }
+
+
+ } else if (expand_ipv6(addr, type)) {
+ switch (type) {
+ case IPADDR:
+ snprintf(str, sizeof(str), "%s", "IPV6ADDR");
+ break;
+ case NETMASK:
+ snprintf(str, sizeof(str), "%s", "IPV6NETMASK");
+ break;
+ case GATEWAY:
+ snprintf(str, sizeof(str), "%s",
+ "IPV6_DEFAULTGW");
+ break;
+ case DNS:
+ snprintf(str, sizeof(str), "%s", "DNS");
+ break;
+ }
+
+ if (type == DNS) {
+ snprintf(sub_str, sizeof(sub_str), "%d", ++i);
+ } else if (j == 0) {
+ ++j;
+ } else {
+ snprintf(sub_str, sizeof(sub_str), "_%d", j++);
+ }
+ } else {
+ return HV_INVALIDARG;
+ }
+
+ error = kvp_write_file(f, str, sub_str, addr);
+ if (error)
+ return error;
+ memset(addr, 0, sizeof(addr));
+ }
+
+ return 0;
+}
+
+static int kvp_set_ip_info(char *if_name, struct hv_kvp_ipaddr_value *new_val)
+{
+ int error = 0;
+ char if_file[128];
+ FILE *file;
+ char cmd[512];
+ char *mac_addr;
+
+ /*
+ * Set the configuration for the specified interface with
+ * the information provided. Since there is no standard
+ * way to configure an interface, we will have an external
+ * script that does the job of configuring the interface and
+ * flushing the configuration.
+ *
+ * The parameters passed to this external script are:
+ * 1. A configuration file that has the specified configuration.
+ *
+ * We will embed the name of the interface in the configuration
+ * file: ifcfg-ethx (where ethx is the interface name).
+ *
+ * The information provided here may be more than what is needed
+ * in a given distro to configure the interface and so are free
+ * ignore information that may not be relevant.
+ *
+ * Here is the format of the ip configuration file:
+ *
+ * HWADDR=macaddr
+ * DEVICE=interface name
+ * BOOTPROTO=<protocol> (where <protocol> is "dhcp" if DHCP is configured
+ * or "none" if no boot-time protocol should be used)
+ *
+ * IPADDR0=ipaddr1
+ * IPADDR1=ipaddr2
+ * IPADDRx=ipaddry (where y = x + 1)
+ *
+ * NETMASK0=netmask1
+ * NETMASKx=netmasky (where y = x + 1)
+ *
+ * GATEWAY=ipaddr1
+ * GATEWAYx=ipaddry (where y = x + 1)
+ *
+ * DNSx=ipaddrx (where first DNS address is tagged as DNS1 etc)
+ *
+ * IPV6 addresses will be tagged as IPV6ADDR, IPV6 gateway will be
+ * tagged as IPV6_DEFAULTGW and IPV6 NETMASK will be tagged as
+ * IPV6NETMASK.
+ *
+ * The host can specify multiple ipv4 and ipv6 addresses to be
+ * configured for the interface. Furthermore, the configuration
+ * needs to be persistent. A subsequent GET call on the interface
+ * is expected to return the configuration that is set via the SET
+ * call.
+ */
+
+ snprintf(if_file, sizeof(if_file), "%s%s%s", KVP_CONFIG_LOC,
+ "/ifcfg-", if_name);
+
+ file = fopen(if_file, "w");
+
+ if (file == NULL) {
+ syslog(LOG_ERR, "Failed to open config file; error: %d %s",
+ errno, strerror(errno));
+ return HV_E_FAIL;
+ }
+
+ /*
+ * First write out the MAC address.
+ */
+
+ mac_addr = kvp_if_name_to_mac(if_name);
+ if (mac_addr == NULL) {
+ error = HV_E_FAIL;
+ goto setval_error;
+ }
+
+ error = kvp_write_file(file, "HWADDR", "", mac_addr);
+ free(mac_addr);
+ if (error)
+ goto setval_error;
+
+ error = kvp_write_file(file, "DEVICE", "", if_name);
+ if (error)
+ goto setval_error;
+
+ if (new_val->dhcp_enabled) {
+ error = kvp_write_file(file, "BOOTPROTO", "", "dhcp");
+ if (error)
+ goto setval_error;
+
+ /*
+ * We are done!.
+ */
+ goto setval_done;
+
+ } else {
+ error = kvp_write_file(file, "BOOTPROTO", "", "none");
+ if (error)
+ goto setval_error;
+ }
+
+ /*
+ * Write the configuration for ipaddress, netmask, gateway and
+ * name servers.
+ */
+
+ error = process_ip_string(file, (char *)new_val->ip_addr, IPADDR);
+ if (error)
+ goto setval_error;
+
+ error = process_ip_string(file, (char *)new_val->sub_net, NETMASK);
+ if (error)
+ goto setval_error;
+
+ error = process_ip_string(file, (char *)new_val->gate_way, GATEWAY);
+ if (error)
+ goto setval_error;
+
+ error = process_ip_string(file, (char *)new_val->dns_addr, DNS);
+ if (error)
+ goto setval_error;
+
+setval_done:
+ fclose(file);
+
+ /*
+ * Now that we have populated the configuration file,
+ * invoke the external script to do its magic.
+ */
+
+ snprintf(cmd, sizeof(cmd), "%s %s", "hv_set_ifconfig", if_file);
+ if (system(cmd)) {
+ syslog(LOG_ERR, "Failed to execute cmd '%s'; error: %d %s",
+ cmd, errno, strerror(errno));
+ return HV_E_FAIL;
+ }
+ return 0;
+
+setval_error:
+ syslog(LOG_ERR, "Failed to write config file");
+ fclose(file);
+ return error;
+}
+
+
+static void
+kvp_get_domain_name(char *buffer, int length)
+{
+ struct addrinfo hints, *info ;
+ int error = 0;
+
+ gethostname(buffer, length);
+ memset(&hints, 0, sizeof(hints));
+ hints.ai_family = AF_INET; /*Get only ipv4 addrinfo. */
+ hints.ai_socktype = SOCK_STREAM;
+ hints.ai_flags = AI_CANONNAME;
+
+ error = getaddrinfo(buffer, NULL, &hints, &info);
+ if (error != 0) {
+ snprintf(buffer, length, "getaddrinfo failed: 0x%x %s",
+ error, gai_strerror(error));
+ return;
+ }
+ snprintf(buffer, length, "%s", info->ai_canonname);
+ freeaddrinfo(info);
+}
+
+static int
+netlink_send(int fd, struct cn_msg *msg)
+{
+ struct nlmsghdr nlh = { .nlmsg_type = NLMSG_DONE };
+ unsigned int size;
+ struct msghdr message;
+ struct iovec iov[2];
+
+ size = sizeof(struct cn_msg) + msg->len;
+
+ nlh.nlmsg_pid = getpid();
+ nlh.nlmsg_len = NLMSG_LENGTH(size);
+
+ iov[0].iov_base = &nlh;
+ iov[0].iov_len = sizeof(nlh);
+
+ iov[1].iov_base = msg;
+ iov[1].iov_len = size;
+
+ memset(&message, 0, sizeof(message));
+ message.msg_name = &addr;
+ message.msg_namelen = sizeof(addr);
+ message.msg_iov = iov;
+ message.msg_iovlen = 2;
+
+ return sendmsg(fd, &message, 0);
+}
+
+int main(void)
+{
+ int fd, len, nl_group;
+ int error;
+ struct cn_msg *message;
+ struct pollfd pfd;
+ struct nlmsghdr *incoming_msg;
+ struct cn_msg *incoming_cn_msg;
+ struct hv_kvp_msg *hv_msg;
+ char *p;
+ char *key_value;
+ char *key_name;
+ int op;
+ int pool;
+ char *if_name;
+ struct hv_kvp_ipaddr_value *kvp_ip_val;
+ char *kvp_recv_buffer;
+ size_t kvp_recv_buffer_len;
+
+ if (daemon(1, 0))
+ return 1;
+ openlog("KVP", 0, LOG_USER);
+ syslog(LOG_INFO, "KVP starting; pid is:%d", getpid());
+
+ kvp_recv_buffer_len = NLMSG_LENGTH(0) + sizeof(struct cn_msg) + sizeof(struct hv_kvp_msg);
+ kvp_recv_buffer = calloc(1, kvp_recv_buffer_len);
+ if (!kvp_recv_buffer) {
+ syslog(LOG_ERR, "Failed to allocate netlink buffer");
+ exit(EXIT_FAILURE);
+ }
+ /*
+ * Retrieve OS release information.
+ */
+ kvp_get_os_info();
+ /*
+ * Cache Fully Qualified Domain Name because getaddrinfo takes an
+ * unpredictable amount of time to finish.
+ */
+ kvp_get_domain_name(full_domain_name, sizeof(full_domain_name));
+
+ if (kvp_file_init()) {
+ syslog(LOG_ERR, "Failed to initialize the pools");
+ exit(EXIT_FAILURE);
+ }
+
+ fd = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_CONNECTOR);
+ if (fd < 0) {
+ syslog(LOG_ERR, "netlink socket creation failed; error: %d %s", errno,
+ strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+ addr.nl_family = AF_NETLINK;
+ addr.nl_pad = 0;
+ addr.nl_pid = 0;
+ addr.nl_groups = 0;
+
+
+ error = bind(fd, (struct sockaddr *)&addr, sizeof(addr));
+ if (error < 0) {
+ syslog(LOG_ERR, "bind failed; error: %d %s", errno, strerror(errno));
+ close(fd);
+ exit(EXIT_FAILURE);
+ }
+ nl_group = CN_KVP_IDX;
+
+ if (setsockopt(fd, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP, &nl_group, sizeof(nl_group)) < 0) {
+ syslog(LOG_ERR, "setsockopt failed; error: %d %s", errno, strerror(errno));
+ close(fd);
+ exit(EXIT_FAILURE);
+ }
+
+ /*
+ * Register ourselves with the kernel.
+ */
+ message = (struct cn_msg *)kvp_recv_buffer;
+ message->id.idx = CN_KVP_IDX;
+ message->id.val = CN_KVP_VAL;
+
+ hv_msg = (struct hv_kvp_msg *)message->data;
+ hv_msg->kvp_hdr.operation = KVP_OP_REGISTER1;
+ message->ack = 0;
+ message->len = sizeof(struct hv_kvp_msg);
+
+ len = netlink_send(fd, message);
+ if (len < 0) {
+ syslog(LOG_ERR, "netlink_send failed; error: %d %s", errno, strerror(errno));
+ close(fd);
+ exit(EXIT_FAILURE);
+ }
+
+ pfd.fd = fd;
+
+ while (1) {
+ struct sockaddr *addr_p = (struct sockaddr *) &addr;
+ socklen_t addr_l = sizeof(addr);
+ pfd.events = POLLIN;
+ pfd.revents = 0;
+
+ if (poll(&pfd, 1, -1) < 0) {
+ syslog(LOG_ERR, "poll failed; error: %d %s", errno, strerror(errno));
+ if (errno == EINVAL) {
+ close(fd);
+ exit(EXIT_FAILURE);
+ }
+ else
+ continue;
+ }
+
+ len = recvfrom(fd, kvp_recv_buffer, kvp_recv_buffer_len, 0,
+ addr_p, &addr_l);
+
+ if (len < 0) {
+ syslog(LOG_ERR, "recvfrom failed; pid:%u error:%d %s",
+ addr.nl_pid, errno, strerror(errno));
+ close(fd);
+ return -1;
+ }
+
+ if (addr.nl_pid) {
+ syslog(LOG_WARNING, "Received packet from untrusted pid:%u",
+ addr.nl_pid);
+ continue;
+ }
+
+ incoming_msg = (struct nlmsghdr *)kvp_recv_buffer;
+
+ if (incoming_msg->nlmsg_type != NLMSG_DONE)
+ continue;
+
+ incoming_cn_msg = (struct cn_msg *)NLMSG_DATA(incoming_msg);
+ hv_msg = (struct hv_kvp_msg *)incoming_cn_msg->data;
+
+ /*
+ * We will use the KVP header information to pass back
+ * the error from this daemon. So, first copy the state
+ * and set the error code to success.
+ */
+ op = hv_msg->kvp_hdr.operation;
+ pool = hv_msg->kvp_hdr.pool;
+ hv_msg->error = HV_S_OK;
+
+ if ((in_hand_shake) && (op == KVP_OP_REGISTER1)) {
+ /*
+ * Driver is registering with us; stash away the version
+ * information.
+ */
+ in_hand_shake = 0;
+ p = (char *)hv_msg->body.kvp_register.version;
+ lic_version = malloc(strlen(p) + 1);
+ if (lic_version) {
+ strcpy(lic_version, p);
+ syslog(LOG_INFO, "KVP LIC Version: %s",
+ lic_version);
+ } else {
+ syslog(LOG_ERR, "malloc failed");
+ }
+ continue;
+ }
+
+ switch (op) {
+ case KVP_OP_GET_IP_INFO:
+ kvp_ip_val = &hv_msg->body.kvp_ip_val;
+ if_name =
+ kvp_mac_to_if_name((char *)kvp_ip_val->adapter_id);
+
+ if (if_name == NULL) {
+ /*
+ * We could not map the mac address to an
+ * interface name; return error.
+ */
+ hv_msg->error = HV_E_FAIL;
+ break;
+ }
+ error = kvp_get_ip_info(
+ 0, if_name, KVP_OP_GET_IP_INFO,
+ kvp_ip_val,
+ (MAX_IP_ADDR_SIZE * 2));
+
+ if (error)
+ hv_msg->error = error;
+
+ free(if_name);
+ break;
+
+ case KVP_OP_SET_IP_INFO:
+ kvp_ip_val = &hv_msg->body.kvp_ip_val;
+ if_name = kvp_get_if_name(
+ (char *)kvp_ip_val->adapter_id);
+ if (if_name == NULL) {
+ /*
+ * We could not map the guid to an
+ * interface name; return error.
+ */
+ hv_msg->error = HV_GUID_NOTFOUND;
+ break;
+ }
+ error = kvp_set_ip_info(if_name, kvp_ip_val);
+ if (error)
+ hv_msg->error = error;
+
+ free(if_name);
+ break;
+
+ case KVP_OP_SET:
+ if (kvp_key_add_or_modify(pool,
+ hv_msg->body.kvp_set.data.key,
+ hv_msg->body.kvp_set.data.key_size,
+ hv_msg->body.kvp_set.data.value,
+ hv_msg->body.kvp_set.data.value_size))
+ hv_msg->error = HV_S_CONT;
+ break;
+
+ case KVP_OP_GET:
+ if (kvp_get_value(pool,
+ hv_msg->body.kvp_set.data.key,
+ hv_msg->body.kvp_set.data.key_size,
+ hv_msg->body.kvp_set.data.value,
+ hv_msg->body.kvp_set.data.value_size))
+ hv_msg->error = HV_S_CONT;
+ break;
+
+ case KVP_OP_DELETE:
+ if (kvp_key_delete(pool,
+ hv_msg->body.kvp_delete.key,
+ hv_msg->body.kvp_delete.key_size))
+ hv_msg->error = HV_S_CONT;
+ break;
+
+ default:
+ break;
+ }
+
+ if (op != KVP_OP_ENUMERATE)
+ goto kvp_done;
+
+ /*
+ * If the pool is KVP_POOL_AUTO, dynamically generate
+ * both the key and the value; if not read from the
+ * appropriate pool.
+ */
+ if (pool != KVP_POOL_AUTO) {
+ if (kvp_pool_enumerate(pool,
+ hv_msg->body.kvp_enum_data.index,
+ hv_msg->body.kvp_enum_data.data.key,
+ HV_KVP_EXCHANGE_MAX_KEY_SIZE,
+ hv_msg->body.kvp_enum_data.data.value,
+ HV_KVP_EXCHANGE_MAX_VALUE_SIZE))
+ hv_msg->error = HV_S_CONT;
+ goto kvp_done;
+ }
+
+ hv_msg = (struct hv_kvp_msg *)incoming_cn_msg->data;
+ key_name = (char *)hv_msg->body.kvp_enum_data.data.key;
+ key_value = (char *)hv_msg->body.kvp_enum_data.data.value;
+
+ switch (hv_msg->body.kvp_enum_data.index) {
+ case FullyQualifiedDomainName:
+ strcpy(key_value, full_domain_name);
+ strcpy(key_name, "FullyQualifiedDomainName");
+ break;
+ case IntegrationServicesVersion:
+ strcpy(key_name, "IntegrationServicesVersion");
+ strcpy(key_value, lic_version);
+ break;
+ case NetworkAddressIPv4:
+ kvp_get_ip_info(AF_INET, NULL, KVP_OP_ENUMERATE,
+ key_value, HV_KVP_EXCHANGE_MAX_VALUE_SIZE);
+ strcpy(key_name, "NetworkAddressIPv4");
+ break;
+ case NetworkAddressIPv6:
+ kvp_get_ip_info(AF_INET6, NULL, KVP_OP_ENUMERATE,
+ key_value, HV_KVP_EXCHANGE_MAX_VALUE_SIZE);
+ strcpy(key_name, "NetworkAddressIPv6");
+ break;
+ case OSBuildNumber:
+ strcpy(key_value, os_build);
+ strcpy(key_name, "OSBuildNumber");
+ break;
+ case OSName:
+ strcpy(key_value, os_name);
+ strcpy(key_name, "OSName");
+ break;
+ case OSMajorVersion:
+ strcpy(key_value, os_major);
+ strcpy(key_name, "OSMajorVersion");
+ break;
+ case OSMinorVersion:
+ strcpy(key_value, os_minor);
+ strcpy(key_name, "OSMinorVersion");
+ break;
+ case OSVersion:
+ strcpy(key_value, os_version);
+ strcpy(key_name, "OSVersion");
+ break;
+ case ProcessorArchitecture:
+ strcpy(key_value, processor_arch);
+ strcpy(key_name, "ProcessorArchitecture");
+ break;
+ default:
+ hv_msg->error = HV_S_CONT;
+ break;
+ }
+ /*
+ * Send the value back to the kernel. The response is
+ * already in the receive buffer. Update the cn_msg header to
+ * reflect the key value that has been added to the message
+ */
+kvp_done:
+
+ incoming_cn_msg->id.idx = CN_KVP_IDX;
+ incoming_cn_msg->id.val = CN_KVP_VAL;
+ incoming_cn_msg->ack = 0;
+ incoming_cn_msg->len = sizeof(struct hv_kvp_msg);
+
+ len = netlink_send(fd, incoming_cn_msg);
+ if (len < 0) {
+ syslog(LOG_ERR, "net_link send failed; error: %d %s", errno,
+ strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+ }
+
+}
diff --git a/tools/hv/hv_set_ifconfig.sh b/tools/hv/hv_set_ifconfig.sh
new file mode 100755
index 00000000000..735aafd64a3
--- /dev/null
+++ b/tools/hv/hv_set_ifconfig.sh
@@ -0,0 +1,64 @@
+#!/bin/bash
+
+# This example script activates an interface based on the specified
+# configuration.
+#
+# In the interest of keeping the KVP daemon code free of distro specific
+# information; the kvp daemon code invokes this external script to configure
+# the interface.
+#
+# The only argument to this script is the configuration file that is to
+# be used to configure the interface.
+#
+# Each Distro is expected to implement this script in a distro specific
+# fashion. For instance on Distros that ship with Network Manager enabled,
+# this script can be based on the Network Manager APIs for configuring the
+# interface.
+#
+# This example script is based on a RHEL environment.
+#
+# Here is the format of the ip configuration file:
+#
+# HWADDR=macaddr
+# DEVICE=interface name
+# BOOTPROTO=<protocol> (where <protocol> is "dhcp" if DHCP is configured
+# or "none" if no boot-time protocol should be used)
+#
+# IPADDR0=ipaddr1
+# IPADDR1=ipaddr2
+# IPADDRx=ipaddry (where y = x + 1)
+#
+# NETMASK0=netmask1
+# NETMASKx=netmasky (where y = x + 1)
+#
+# GATEWAY=ipaddr1
+# GATEWAYx=ipaddry (where y = x + 1)
+#
+# DNSx=ipaddrx (where first DNS address is tagged as DNS1 etc)
+#
+# IPV6 addresses will be tagged as IPV6ADDR, IPV6 gateway will be
+# tagged as IPV6_DEFAULTGW and IPV6 NETMASK will be tagged as
+# IPV6NETMASK.
+#
+# The host can specify multiple ipv4 and ipv6 addresses to be
+# configured for the interface. Furthermore, the configuration
+# needs to be persistent. A subsequent GET call on the interface
+# is expected to return the configuration that is set via the SET
+# call.
+#
+
+
+
+echo "IPV6INIT=yes" >> $1
+echo "NM_CONTROLLED=no" >> $1
+echo "PEERDNS=yes" >> $1
+echo "ONBOOT=yes" >> $1
+
+
+cp $1 /etc/sysconfig/network-scripts/
+
+
+interface=$(echo $1 | awk -F - '{ print $2 }')
+
+/sbin/ifdown $interface 2>/dev/null
+/sbin/ifup $interface 2>/dev/null
diff --git a/tools/hv/hv_vss_daemon.c b/tools/hv/hv_vss_daemon.c
new file mode 100644
index 00000000000..6a213b8cd7b
--- /dev/null
+++ b/tools/hv/hv_vss_daemon.c
@@ -0,0 +1,267 @@
+/*
+ * An implementation of the host initiated guest snapshot for Hyper-V.
+ *
+ *
+ * Copyright (C) 2013, Microsoft, Inc.
+ * Author : K. Y. Srinivasan <kys@microsoft.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT. See the GNU General Public License for more
+ * details.
+ *
+ */
+
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/poll.h>
+#include <sys/ioctl.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <mntent.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <ctype.h>
+#include <errno.h>
+#include <arpa/inet.h>
+#include <linux/fs.h>
+#include <linux/connector.h>
+#include <linux/hyperv.h>
+#include <linux/netlink.h>
+#include <syslog.h>
+
+static struct sockaddr_nl addr;
+
+#ifndef SOL_NETLINK
+#define SOL_NETLINK 270
+#endif
+
+
+static int vss_do_freeze(char *dir, unsigned int cmd, char *fs_op)
+{
+ int ret, fd = open(dir, O_RDONLY);
+
+ if (fd < 0)
+ return 1;
+ ret = ioctl(fd, cmd, 0);
+ syslog(LOG_INFO, "VSS: %s of %s: %s\n", fs_op, dir, strerror(errno));
+ close(fd);
+ return !!ret;
+}
+
+static int vss_operate(int operation)
+{
+ char *fs_op;
+ char match[] = "/dev/";
+ FILE *mounts;
+ struct mntent *ent;
+ unsigned int cmd;
+ int error = 0, root_seen = 0;
+
+ switch (operation) {
+ case VSS_OP_FREEZE:
+ cmd = FIFREEZE;
+ fs_op = "freeze";
+ break;
+ case VSS_OP_THAW:
+ cmd = FITHAW;
+ fs_op = "thaw";
+ break;
+ default:
+ return -1;
+ }
+
+ mounts = setmntent("/proc/mounts", "r");
+ if (mounts == NULL)
+ return -1;
+
+ while ((ent = getmntent(mounts))) {
+ if (strncmp(ent->mnt_fsname, match, strlen(match)))
+ continue;
+ if (strcmp(ent->mnt_type, "iso9660") == 0)
+ continue;
+ if (strcmp(ent->mnt_type, "vfat") == 0)
+ continue;
+ if (strcmp(ent->mnt_dir, "/") == 0) {
+ root_seen = 1;
+ continue;
+ }
+ error |= vss_do_freeze(ent->mnt_dir, cmd, fs_op);
+ }
+ endmntent(mounts);
+
+ if (root_seen) {
+ error |= vss_do_freeze("/", cmd, fs_op);
+ }
+
+ return error;
+}
+
+static int netlink_send(int fd, struct cn_msg *msg)
+{
+ struct nlmsghdr nlh = { .nlmsg_type = NLMSG_DONE };
+ unsigned int size;
+ struct msghdr message;
+ struct iovec iov[2];
+
+ size = sizeof(struct cn_msg) + msg->len;
+
+ nlh.nlmsg_pid = getpid();
+ nlh.nlmsg_len = NLMSG_LENGTH(size);
+
+ iov[0].iov_base = &nlh;
+ iov[0].iov_len = sizeof(nlh);
+
+ iov[1].iov_base = msg;
+ iov[1].iov_len = size;
+
+ memset(&message, 0, sizeof(message));
+ message.msg_name = &addr;
+ message.msg_namelen = sizeof(addr);
+ message.msg_iov = iov;
+ message.msg_iovlen = 2;
+
+ return sendmsg(fd, &message, 0);
+}
+
+int main(void)
+{
+ int fd, len, nl_group;
+ int error;
+ struct cn_msg *message;
+ struct pollfd pfd;
+ struct nlmsghdr *incoming_msg;
+ struct cn_msg *incoming_cn_msg;
+ int op;
+ struct hv_vss_msg *vss_msg;
+ char *vss_recv_buffer;
+ size_t vss_recv_buffer_len;
+
+ if (daemon(1, 0))
+ return 1;
+
+ openlog("Hyper-V VSS", 0, LOG_USER);
+ syslog(LOG_INFO, "VSS starting; pid is:%d", getpid());
+
+ vss_recv_buffer_len = NLMSG_LENGTH(0) + sizeof(struct cn_msg) + sizeof(struct hv_vss_msg);
+ vss_recv_buffer = calloc(1, vss_recv_buffer_len);
+ if (!vss_recv_buffer) {
+ syslog(LOG_ERR, "Failed to allocate netlink buffers");
+ exit(EXIT_FAILURE);
+ }
+
+ fd = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_CONNECTOR);
+ if (fd < 0) {
+ syslog(LOG_ERR, "netlink socket creation failed; error:%d %s",
+ errno, strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+ addr.nl_family = AF_NETLINK;
+ addr.nl_pad = 0;
+ addr.nl_pid = 0;
+ addr.nl_groups = 0;
+
+
+ error = bind(fd, (struct sockaddr *)&addr, sizeof(addr));
+ if (error < 0) {
+ syslog(LOG_ERR, "bind failed; error:%d %s", errno, strerror(errno));
+ close(fd);
+ exit(EXIT_FAILURE);
+ }
+ nl_group = CN_VSS_IDX;
+ if (setsockopt(fd, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP, &nl_group, sizeof(nl_group)) < 0) {
+ syslog(LOG_ERR, "setsockopt failed; error:%d %s", errno, strerror(errno));
+ close(fd);
+ exit(EXIT_FAILURE);
+ }
+ /*
+ * Register ourselves with the kernel.
+ */
+ message = (struct cn_msg *)vss_recv_buffer;
+ message->id.idx = CN_VSS_IDX;
+ message->id.val = CN_VSS_VAL;
+ message->ack = 0;
+ vss_msg = (struct hv_vss_msg *)message->data;
+ vss_msg->vss_hdr.operation = VSS_OP_REGISTER;
+
+ message->len = sizeof(struct hv_vss_msg);
+
+ len = netlink_send(fd, message);
+ if (len < 0) {
+ syslog(LOG_ERR, "netlink_send failed; error:%d %s", errno, strerror(errno));
+ close(fd);
+ exit(EXIT_FAILURE);
+ }
+
+ pfd.fd = fd;
+
+ while (1) {
+ struct sockaddr *addr_p = (struct sockaddr *) &addr;
+ socklen_t addr_l = sizeof(addr);
+ pfd.events = POLLIN;
+ pfd.revents = 0;
+
+ if (poll(&pfd, 1, -1) < 0) {
+ syslog(LOG_ERR, "poll failed; error:%d %s", errno, strerror(errno));
+ if (errno == EINVAL) {
+ close(fd);
+ exit(EXIT_FAILURE);
+ }
+ else
+ continue;
+ }
+
+ len = recvfrom(fd, vss_recv_buffer, vss_recv_buffer_len, 0,
+ addr_p, &addr_l);
+
+ if (len < 0) {
+ syslog(LOG_ERR, "recvfrom failed; pid:%u error:%d %s",
+ addr.nl_pid, errno, strerror(errno));
+ close(fd);
+ return -1;
+ }
+
+ if (addr.nl_pid) {
+ syslog(LOG_WARNING,
+ "Received packet from untrusted pid:%u",
+ addr.nl_pid);
+ continue;
+ }
+
+ incoming_msg = (struct nlmsghdr *)vss_recv_buffer;
+
+ if (incoming_msg->nlmsg_type != NLMSG_DONE)
+ continue;
+
+ incoming_cn_msg = (struct cn_msg *)NLMSG_DATA(incoming_msg);
+ vss_msg = (struct hv_vss_msg *)incoming_cn_msg->data;
+ op = vss_msg->vss_hdr.operation;
+ error = HV_S_OK;
+
+ switch (op) {
+ case VSS_OP_FREEZE:
+ case VSS_OP_THAW:
+ error = vss_operate(op);
+ if (error)
+ error = HV_E_FAIL;
+ break;
+ default:
+ syslog(LOG_ERR, "Illegal op:%d\n", op);
+ }
+ vss_msg->error = error;
+ len = netlink_send(fd, incoming_cn_msg);
+ if (len < 0) {
+ syslog(LOG_ERR, "net_link send failed; error:%d %s",
+ errno, strerror(errno));
+ exit(EXIT_FAILURE);
+ }
+ }
+
+}
diff --git a/tools/include/asm/bug.h b/tools/include/asm/bug.h
new file mode 100644
index 00000000000..9e5f4846967
--- /dev/null
+++ b/tools/include/asm/bug.h
@@ -0,0 +1,25 @@
+#ifndef _TOOLS_ASM_BUG_H
+#define _TOOLS_ASM_BUG_H
+
+#include <linux/compiler.h>
+
+#define __WARN_printf(arg...) do { fprintf(stderr, arg); } while (0)
+
+#define WARN(condition, format...) ({ \
+ int __ret_warn_on = !!(condition); \
+ if (unlikely(__ret_warn_on)) \
+ __WARN_printf(format); \
+ unlikely(__ret_warn_on); \
+})
+
+#define WARN_ONCE(condition, format...) ({ \
+ static int __warned; \
+ int __ret_warn_once = !!(condition); \
+ \
+ if (unlikely(__ret_warn_once)) \
+ if (WARN(!__warned, format)) \
+ __warned = 1; \
+ unlikely(__ret_warn_once); \
+})
+
+#endif /* _TOOLS_ASM_BUG_H */
diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h
new file mode 100644
index 00000000000..88461f09cc8
--- /dev/null
+++ b/tools/include/linux/compiler.h
@@ -0,0 +1,40 @@
+#ifndef _TOOLS_LINUX_COMPILER_H_
+#define _TOOLS_LINUX_COMPILER_H_
+
+#ifndef __always_inline
+# define __always_inline inline __attribute__((always_inline))
+#endif
+
+#define __user
+
+#ifndef __attribute_const__
+# define __attribute_const__
+#endif
+
+#ifndef __maybe_unused
+# define __maybe_unused __attribute__((unused))
+#endif
+
+#ifndef __packed
+# define __packed __attribute__((__packed__))
+#endif
+
+#ifndef __force
+# define __force
+#endif
+
+#ifndef __weak
+# define __weak __attribute__((weak))
+#endif
+
+#ifndef likely
+# define likely(x) __builtin_expect(!!(x), 1)
+#endif
+
+#ifndef unlikely
+# define unlikely(x) __builtin_expect(!!(x), 0)
+#endif
+
+#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
+
+#endif /* _TOOLS_LINUX_COMPILER_H */
diff --git a/tools/include/linux/export.h b/tools/include/linux/export.h
new file mode 100644
index 00000000000..d07e586b9ba
--- /dev/null
+++ b/tools/include/linux/export.h
@@ -0,0 +1,10 @@
+#ifndef _TOOLS_LINUX_EXPORT_H_
+#define _TOOLS_LINUX_EXPORT_H_
+
+#define EXPORT_SYMBOL(sym)
+#define EXPORT_SYMBOL_GPL(sym)
+#define EXPORT_SYMBOL_GPL_FUTURE(sym)
+#define EXPORT_UNUSED_SYMBOL(sym)
+#define EXPORT_UNUSED_SYMBOL_GPL(sym)
+
+#endif
diff --git a/tools/include/linux/hash.h b/tools/include/linux/hash.h
new file mode 100644
index 00000000000..d026c657301
--- /dev/null
+++ b/tools/include/linux/hash.h
@@ -0,0 +1,5 @@
+#include "../../../include/linux/hash.h"
+
+#ifndef _TOOLS_LINUX_HASH_H
+#define _TOOLS_LINUX_HASH_H
+#endif
diff --git a/tools/include/linux/types.h b/tools/include/linux/types.h
new file mode 100644
index 00000000000..b5cf25e05df
--- /dev/null
+++ b/tools/include/linux/types.h
@@ -0,0 +1,75 @@
+#ifndef _TOOLS_LINUX_TYPES_H_
+#define _TOOLS_LINUX_TYPES_H_
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#define __SANE_USERSPACE_TYPES__ /* For PPC64, to get LL64 types */
+#include <asm/types.h>
+
+struct page;
+struct kmem_cache;
+
+typedef enum {
+ GFP_KERNEL,
+ GFP_ATOMIC,
+ __GFP_HIGHMEM,
+ __GFP_HIGH
+} gfp_t;
+
+/*
+ * We define u64 as uint64_t for every architecture
+ * so that we can print it with "%"PRIx64 without getting warnings.
+ *
+ * typedef __u64 u64;
+ * typedef __s64 s64;
+ */
+typedef uint64_t u64;
+typedef int64_t s64;
+
+typedef __u32 u32;
+typedef __s32 s32;
+
+typedef __u16 u16;
+typedef __s16 s16;
+
+typedef __u8 u8;
+typedef __s8 s8;
+
+#ifdef __CHECKER__
+#define __bitwise__ __attribute__((bitwise))
+#else
+#define __bitwise__
+#endif
+#ifdef __CHECK_ENDIAN__
+#define __bitwise __bitwise__
+#else
+#define __bitwise
+#endif
+
+#define __force
+#define __user
+#define __must_check
+#define __cold
+
+typedef __u16 __bitwise __le16;
+typedef __u16 __bitwise __be16;
+typedef __u32 __bitwise __le32;
+typedef __u32 __bitwise __be32;
+typedef __u64 __bitwise __le64;
+typedef __u64 __bitwise __be64;
+
+struct list_head {
+ struct list_head *next, *prev;
+};
+
+struct hlist_head {
+ struct hlist_node *first;
+};
+
+struct hlist_node {
+ struct hlist_node *next, **pprev;
+};
+
+#endif /* _TOOLS_LINUX_TYPES_H_ */
diff --git a/tools/include/tools/be_byteshift.h b/tools/include/tools/be_byteshift.h
new file mode 100644
index 00000000000..84c17d83657
--- /dev/null
+++ b/tools/include/tools/be_byteshift.h
@@ -0,0 +1,70 @@
+#ifndef _TOOLS_BE_BYTESHIFT_H
+#define _TOOLS_BE_BYTESHIFT_H
+
+#include <stdint.h>
+
+static inline uint16_t __get_unaligned_be16(const uint8_t *p)
+{
+ return p[0] << 8 | p[1];
+}
+
+static inline uint32_t __get_unaligned_be32(const uint8_t *p)
+{
+ return p[0] << 24 | p[1] << 16 | p[2] << 8 | p[3];
+}
+
+static inline uint64_t __get_unaligned_be64(const uint8_t *p)
+{
+ return (uint64_t)__get_unaligned_be32(p) << 32 |
+ __get_unaligned_be32(p + 4);
+}
+
+static inline void __put_unaligned_be16(uint16_t val, uint8_t *p)
+{
+ *p++ = val >> 8;
+ *p++ = val;
+}
+
+static inline void __put_unaligned_be32(uint32_t val, uint8_t *p)
+{
+ __put_unaligned_be16(val >> 16, p);
+ __put_unaligned_be16(val, p + 2);
+}
+
+static inline void __put_unaligned_be64(uint64_t val, uint8_t *p)
+{
+ __put_unaligned_be32(val >> 32, p);
+ __put_unaligned_be32(val, p + 4);
+}
+
+static inline uint16_t get_unaligned_be16(const void *p)
+{
+ return __get_unaligned_be16((const uint8_t *)p);
+}
+
+static inline uint32_t get_unaligned_be32(const void *p)
+{
+ return __get_unaligned_be32((const uint8_t *)p);
+}
+
+static inline uint64_t get_unaligned_be64(const void *p)
+{
+ return __get_unaligned_be64((const uint8_t *)p);
+}
+
+static inline void put_unaligned_be16(uint16_t val, void *p)
+{
+ __put_unaligned_be16(val, p);
+}
+
+static inline void put_unaligned_be32(uint32_t val, void *p)
+{
+ __put_unaligned_be32(val, p);
+}
+
+static inline void put_unaligned_be64(uint64_t val, void *p)
+{
+ __put_unaligned_be64(val, p);
+}
+
+#endif /* _TOOLS_BE_BYTESHIFT_H */
diff --git a/tools/include/tools/le_byteshift.h b/tools/include/tools/le_byteshift.h
new file mode 100644
index 00000000000..8fe9f2488ec
--- /dev/null
+++ b/tools/include/tools/le_byteshift.h
@@ -0,0 +1,70 @@
+#ifndef _TOOLS_LE_BYTESHIFT_H
+#define _TOOLS_LE_BYTESHIFT_H
+
+#include <stdint.h>
+
+static inline uint16_t __get_unaligned_le16(const uint8_t *p)
+{
+ return p[0] | p[1] << 8;
+}
+
+static inline uint32_t __get_unaligned_le32(const uint8_t *p)
+{
+ return p[0] | p[1] << 8 | p[2] << 16 | p[3] << 24;
+}
+
+static inline uint64_t __get_unaligned_le64(const uint8_t *p)
+{
+ return (uint64_t)__get_unaligned_le32(p + 4) << 32 |
+ __get_unaligned_le32(p);
+}
+
+static inline void __put_unaligned_le16(uint16_t val, uint8_t *p)
+{
+ *p++ = val;
+ *p++ = val >> 8;
+}
+
+static inline void __put_unaligned_le32(uint32_t val, uint8_t *p)
+{
+ __put_unaligned_le16(val >> 16, p + 2);
+ __put_unaligned_le16(val, p);
+}
+
+static inline void __put_unaligned_le64(uint64_t val, uint8_t *p)
+{
+ __put_unaligned_le32(val >> 32, p + 4);
+ __put_unaligned_le32(val, p);
+}
+
+static inline uint16_t get_unaligned_le16(const void *p)
+{
+ return __get_unaligned_le16((const uint8_t *)p);
+}
+
+static inline uint32_t get_unaligned_le32(const void *p)
+{
+ return __get_unaligned_le32((const uint8_t *)p);
+}
+
+static inline uint64_t get_unaligned_le64(const void *p)
+{
+ return __get_unaligned_le64((const uint8_t *)p);
+}
+
+static inline void put_unaligned_le16(uint16_t val, void *p)
+{
+ __put_unaligned_le16(val, p);
+}
+
+static inline void put_unaligned_le32(uint32_t val, void *p)
+{
+ __put_unaligned_le32(val, p);
+}
+
+static inline void put_unaligned_le64(uint64_t val, void *p)
+{
+ __put_unaligned_le64(val, p);
+}
+
+#endif /* _TOOLS_LE_BYTESHIFT_H */
diff --git a/tools/lguest/.gitignore b/tools/lguest/.gitignore
new file mode 100644
index 00000000000..115587fd5f6
--- /dev/null
+++ b/tools/lguest/.gitignore
@@ -0,0 +1 @@
+lguest
diff --git a/tools/lguest/Makefile b/tools/lguest/Makefile
new file mode 100644
index 00000000000..97bca4871ea
--- /dev/null
+++ b/tools/lguest/Makefile
@@ -0,0 +1,7 @@
+# This creates the demonstration utility "lguest" which runs a Linux guest.
+CFLAGS:=-m32 -Wall -Wmissing-declarations -Wmissing-prototypes -O3 -U_FORTIFY_SOURCE
+
+all: lguest
+
+clean:
+ rm -f lguest
diff --git a/tools/lguest/extract b/tools/lguest/extract
new file mode 100644
index 00000000000..7730bb6e4b9
--- /dev/null
+++ b/tools/lguest/extract
@@ -0,0 +1,58 @@
+#! /bin/sh
+
+set -e
+
+PREFIX=$1
+shift
+
+trap 'rm -r $TMPDIR' 0
+TMPDIR=`mktemp -d`
+
+exec 3>/dev/null
+for f; do
+ while IFS="
+" read -r LINE; do
+ case "$LINE" in
+ *$PREFIX:[0-9]*:\**)
+ NUM=`echo "$LINE" | sed "s/.*$PREFIX:\([0-9]*\).*/\1/"`
+ if [ -f $TMPDIR/$NUM ]; then
+ echo "$TMPDIR/$NUM already exits prior to $f"
+ exit 1
+ fi
+ exec 3>>$TMPDIR/$NUM
+ echo $f | sed 's,\.\./,,g' > $TMPDIR/.$NUM
+ /bin/echo "$LINE" | sed -e "s/$PREFIX:[0-9]*//" -e "s/:\*/*/" >&3
+ ;;
+ *$PREFIX:[0-9]*)
+ NUM=`echo "$LINE" | sed "s/.*$PREFIX:\([0-9]*\).*/\1/"`
+ if [ -f $TMPDIR/$NUM ]; then
+ echo "$TMPDIR/$NUM already exits prior to $f"
+ exit 1
+ fi
+ exec 3>>$TMPDIR/$NUM
+ echo $f | sed 's,\.\./,,g' > $TMPDIR/.$NUM
+ /bin/echo "$LINE" | sed "s/$PREFIX:[0-9]*//" >&3
+ ;;
+ *:\**)
+ /bin/echo "$LINE" | sed -e "s/:\*/*/" -e "s,/\*\*/,," >&3
+ echo >&3
+ exec 3>/dev/null
+ ;;
+ *)
+ /bin/echo "$LINE" >&3
+ ;;
+ esac
+ done < $f
+ echo >&3
+ exec 3>/dev/null
+done
+
+LASTFILE=""
+for f in $TMPDIR/*; do
+ if [ "$LASTFILE" != $(cat $TMPDIR/.$(basename $f) ) ]; then
+ LASTFILE=$(cat $TMPDIR/.$(basename $f) )
+ echo "[ $LASTFILE ]"
+ fi
+ cat $f
+done
+
diff --git a/tools/lguest/lguest.c b/tools/lguest/lguest.c
new file mode 100644
index 00000000000..32cf2ce15d6
--- /dev/null
+++ b/tools/lguest/lguest.c
@@ -0,0 +1,2072 @@
+/*P:100
+ * This is the Launcher code, a simple program which lays out the "physical"
+ * memory for the new Guest by mapping the kernel image and the virtual
+ * devices, then opens /dev/lguest to tell the kernel about the Guest and
+ * control it.
+:*/
+#define _LARGEFILE64_SOURCE
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <err.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <elf.h>
+#include <sys/mman.h>
+#include <sys/param.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <sys/eventfd.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <ctype.h>
+#include <sys/socket.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <time.h>
+#include <netinet/in.h>
+#include <net/if.h>
+#include <linux/sockios.h>
+#include <linux/if_tun.h>
+#include <sys/uio.h>
+#include <termios.h>
+#include <getopt.h>
+#include <assert.h>
+#include <sched.h>
+#include <limits.h>
+#include <stddef.h>
+#include <signal.h>
+#include <pwd.h>
+#include <grp.h>
+
+#ifndef VIRTIO_F_ANY_LAYOUT
+#define VIRTIO_F_ANY_LAYOUT 27
+#endif
+
+/*L:110
+ * We can ignore the 43 include files we need for this program, but I do want
+ * to draw attention to the use of kernel-style types.
+ *
+ * As Linus said, "C is a Spartan language, and so should your naming be." I
+ * like these abbreviations, so we define them here. Note that u64 is always
+ * unsigned long long, which works on all Linux systems: this means that we can
+ * use %llu in printf for any u64.
+ */
+typedef unsigned long long u64;
+typedef uint32_t u32;
+typedef uint16_t u16;
+typedef uint8_t u8;
+/*:*/
+
+#include <linux/virtio_config.h>
+#include <linux/virtio_net.h>
+#include <linux/virtio_blk.h>
+#include <linux/virtio_console.h>
+#include <linux/virtio_rng.h>
+#include <linux/virtio_ring.h>
+#include <asm/bootparam.h>
+#include "../../include/linux/lguest_launcher.h"
+
+#define BRIDGE_PFX "bridge:"
+#ifndef SIOCBRADDIF
+#define SIOCBRADDIF 0x89a2 /* add interface to bridge */
+#endif
+/* We can have up to 256 pages for devices. */
+#define DEVICE_PAGES 256
+/* This will occupy 3 pages: it must be a power of 2. */
+#define VIRTQUEUE_NUM 256
+
+/*L:120
+ * verbose is both a global flag and a macro. The C preprocessor allows
+ * this, and although I wouldn't recommend it, it works quite nicely here.
+ */
+static bool verbose;
+#define verbose(args...) \
+ do { if (verbose) printf(args); } while(0)
+/*:*/
+
+/* The pointer to the start of guest memory. */
+static void *guest_base;
+/* The maximum guest physical address allowed, and maximum possible. */
+static unsigned long guest_limit, guest_max;
+/* The /dev/lguest file descriptor. */
+static int lguest_fd;
+
+/* a per-cpu variable indicating whose vcpu is currently running */
+static unsigned int __thread cpu_id;
+
+/* This is our list of devices. */
+struct device_list {
+ /* Counter to assign interrupt numbers. */
+ unsigned int next_irq;
+
+ /* Counter to print out convenient device numbers. */
+ unsigned int device_num;
+
+ /* The descriptor page for the devices. */
+ u8 *descpage;
+
+ /* A single linked list of devices. */
+ struct device *dev;
+ /* And a pointer to the last device for easy append. */
+ struct device *lastdev;
+};
+
+/* The list of Guest devices, based on command line arguments. */
+static struct device_list devices;
+
+/* The device structure describes a single device. */
+struct device {
+ /* The linked-list pointer. */
+ struct device *next;
+
+ /* The device's descriptor, as mapped into the Guest. */
+ struct lguest_device_desc *desc;
+
+ /* We can't trust desc values once Guest has booted: we use these. */
+ unsigned int feature_len;
+ unsigned int num_vq;
+
+ /* The name of this device, for --verbose. */
+ const char *name;
+
+ /* Any queues attached to this device */
+ struct virtqueue *vq;
+
+ /* Is it operational */
+ bool running;
+
+ /* Device-specific data. */
+ void *priv;
+};
+
+/* The virtqueue structure describes a queue attached to a device. */
+struct virtqueue {
+ struct virtqueue *next;
+
+ /* Which device owns me. */
+ struct device *dev;
+
+ /* The configuration for this queue. */
+ struct lguest_vqconfig config;
+
+ /* The actual ring of buffers. */
+ struct vring vring;
+
+ /* Last available index we saw. */
+ u16 last_avail_idx;
+
+ /* How many are used since we sent last irq? */
+ unsigned int pending_used;
+
+ /* Eventfd where Guest notifications arrive. */
+ int eventfd;
+
+ /* Function for the thread which is servicing this virtqueue. */
+ void (*service)(struct virtqueue *vq);
+ pid_t thread;
+};
+
+/* Remember the arguments to the program so we can "reboot" */
+static char **main_args;
+
+/* The original tty settings to restore on exit. */
+static struct termios orig_term;
+
+/*
+ * We have to be careful with barriers: our devices are all run in separate
+ * threads and so we need to make sure that changes visible to the Guest happen
+ * in precise order.
+ */
+#define wmb() __asm__ __volatile__("" : : : "memory")
+#define rmb() __asm__ __volatile__("lock; addl $0,0(%%esp)" : : : "memory")
+#define mb() __asm__ __volatile__("lock; addl $0,0(%%esp)" : : : "memory")
+
+/* Wrapper for the last available index. Makes it easier to change. */
+#define lg_last_avail(vq) ((vq)->last_avail_idx)
+
+/*
+ * The virtio configuration space is defined to be little-endian. x86 is
+ * little-endian too, but it's nice to be explicit so we have these helpers.
+ */
+#define cpu_to_le16(v16) (v16)
+#define cpu_to_le32(v32) (v32)
+#define cpu_to_le64(v64) (v64)
+#define le16_to_cpu(v16) (v16)
+#define le32_to_cpu(v32) (v32)
+#define le64_to_cpu(v64) (v64)
+
+/* Is this iovec empty? */
+static bool iov_empty(const struct iovec iov[], unsigned int num_iov)
+{
+ unsigned int i;
+
+ for (i = 0; i < num_iov; i++)
+ if (iov[i].iov_len)
+ return false;
+ return true;
+}
+
+/* Take len bytes from the front of this iovec. */
+static void iov_consume(struct iovec iov[], unsigned num_iov,
+ void *dest, unsigned len)
+{
+ unsigned int i;
+
+ for (i = 0; i < num_iov; i++) {
+ unsigned int used;
+
+ used = iov[i].iov_len < len ? iov[i].iov_len : len;
+ if (dest) {
+ memcpy(dest, iov[i].iov_base, used);
+ dest += used;
+ }
+ iov[i].iov_base += used;
+ iov[i].iov_len -= used;
+ len -= used;
+ }
+ if (len != 0)
+ errx(1, "iovec too short!");
+}
+
+/* The device virtqueue descriptors are followed by feature bitmasks. */
+static u8 *get_feature_bits(struct device *dev)
+{
+ return (u8 *)(dev->desc + 1)
+ + dev->num_vq * sizeof(struct lguest_vqconfig);
+}
+
+/*L:100
+ * The Launcher code itself takes us out into userspace, that scary place where
+ * pointers run wild and free! Unfortunately, like most userspace programs,
+ * it's quite boring (which is why everyone likes to hack on the kernel!).
+ * Perhaps if you make up an Lguest Drinking Game at this point, it will get
+ * you through this section. Or, maybe not.
+ *
+ * The Launcher sets up a big chunk of memory to be the Guest's "physical"
+ * memory and stores it in "guest_base". In other words, Guest physical ==
+ * Launcher virtual with an offset.
+ *
+ * This can be tough to get your head around, but usually it just means that we
+ * use these trivial conversion functions when the Guest gives us its
+ * "physical" addresses:
+ */
+static void *from_guest_phys(unsigned long addr)
+{
+ return guest_base + addr;
+}
+
+static unsigned long to_guest_phys(const void *addr)
+{
+ return (addr - guest_base);
+}
+
+/*L:130
+ * Loading the Kernel.
+ *
+ * We start with couple of simple helper routines. open_or_die() avoids
+ * error-checking code cluttering the callers:
+ */
+static int open_or_die(const char *name, int flags)
+{
+ int fd = open(name, flags);
+ if (fd < 0)
+ err(1, "Failed to open %s", name);
+ return fd;
+}
+
+/* map_zeroed_pages() takes a number of pages. */
+static void *map_zeroed_pages(unsigned int num)
+{
+ int fd = open_or_die("/dev/zero", O_RDONLY);
+ void *addr;
+
+ /*
+ * We use a private mapping (ie. if we write to the page, it will be
+ * copied). We allocate an extra two pages PROT_NONE to act as guard
+ * pages against read/write attempts that exceed allocated space.
+ */
+ addr = mmap(NULL, getpagesize() * (num+2),
+ PROT_NONE, MAP_PRIVATE, fd, 0);
+
+ if (addr == MAP_FAILED)
+ err(1, "Mmapping %u pages of /dev/zero", num);
+
+ if (mprotect(addr + getpagesize(), getpagesize() * num,
+ PROT_READ|PROT_WRITE) == -1)
+ err(1, "mprotect rw %u pages failed", num);
+
+ /*
+ * One neat mmap feature is that you can close the fd, and it
+ * stays mapped.
+ */
+ close(fd);
+
+ /* Return address after PROT_NONE page */
+ return addr + getpagesize();
+}
+
+/* Get some more pages for a device. */
+static void *get_pages(unsigned int num)
+{
+ void *addr = from_guest_phys(guest_limit);
+
+ guest_limit += num * getpagesize();
+ if (guest_limit > guest_max)
+ errx(1, "Not enough memory for devices");
+ return addr;
+}
+
+/*
+ * This routine is used to load the kernel or initrd. It tries mmap, but if
+ * that fails (Plan 9's kernel file isn't nicely aligned on page boundaries),
+ * it falls back to reading the memory in.
+ */
+static void map_at(int fd, void *addr, unsigned long offset, unsigned long len)
+{
+ ssize_t r;
+
+ /*
+ * We map writable even though for some segments are marked read-only.
+ * The kernel really wants to be writable: it patches its own
+ * instructions.
+ *
+ * MAP_PRIVATE means that the page won't be copied until a write is
+ * done to it. This allows us to share untouched memory between
+ * Guests.
+ */
+ if (mmap(addr, len, PROT_READ|PROT_WRITE,
+ MAP_FIXED|MAP_PRIVATE, fd, offset) != MAP_FAILED)
+ return;
+
+ /* pread does a seek and a read in one shot: saves a few lines. */
+ r = pread(fd, addr, len, offset);
+ if (r != len)
+ err(1, "Reading offset %lu len %lu gave %zi", offset, len, r);
+}
+
+/*
+ * This routine takes an open vmlinux image, which is in ELF, and maps it into
+ * the Guest memory. ELF = Embedded Linking Format, which is the format used
+ * by all modern binaries on Linux including the kernel.
+ *
+ * The ELF headers give *two* addresses: a physical address, and a virtual
+ * address. We use the physical address; the Guest will map itself to the
+ * virtual address.
+ *
+ * We return the starting address.
+ */
+static unsigned long map_elf(int elf_fd, const Elf32_Ehdr *ehdr)
+{
+ Elf32_Phdr phdr[ehdr->e_phnum];
+ unsigned int i;
+
+ /*
+ * Sanity checks on the main ELF header: an x86 executable with a
+ * reasonable number of correctly-sized program headers.
+ */
+ if (ehdr->e_type != ET_EXEC
+ || ehdr->e_machine != EM_386
+ || ehdr->e_phentsize != sizeof(Elf32_Phdr)
+ || ehdr->e_phnum < 1 || ehdr->e_phnum > 65536U/sizeof(Elf32_Phdr))
+ errx(1, "Malformed elf header");
+
+ /*
+ * An ELF executable contains an ELF header and a number of "program"
+ * headers which indicate which parts ("segments") of the program to
+ * load where.
+ */
+
+ /* We read in all the program headers at once: */
+ if (lseek(elf_fd, ehdr->e_phoff, SEEK_SET) < 0)
+ err(1, "Seeking to program headers");
+ if (read(elf_fd, phdr, sizeof(phdr)) != sizeof(phdr))
+ err(1, "Reading program headers");
+
+ /*
+ * Try all the headers: there are usually only three. A read-only one,
+ * a read-write one, and a "note" section which we don't load.
+ */
+ for (i = 0; i < ehdr->e_phnum; i++) {
+ /* If this isn't a loadable segment, we ignore it */
+ if (phdr[i].p_type != PT_LOAD)
+ continue;
+
+ verbose("Section %i: size %i addr %p\n",
+ i, phdr[i].p_memsz, (void *)phdr[i].p_paddr);
+
+ /* We map this section of the file at its physical address. */
+ map_at(elf_fd, from_guest_phys(phdr[i].p_paddr),
+ phdr[i].p_offset, phdr[i].p_filesz);
+ }
+
+ /* The entry point is given in the ELF header. */
+ return ehdr->e_entry;
+}
+
+/*L:150
+ * A bzImage, unlike an ELF file, is not meant to be loaded. You're supposed
+ * to jump into it and it will unpack itself. We used to have to perform some
+ * hairy magic because the unpacking code scared me.
+ *
+ * Fortunately, Jeremy Fitzhardinge convinced me it wasn't that hard and wrote
+ * a small patch to jump over the tricky bits in the Guest, so now we just read
+ * the funky header so we know where in the file to load, and away we go!
+ */
+static unsigned long load_bzimage(int fd)
+{
+ struct boot_params boot;
+ int r;
+ /* Modern bzImages get loaded at 1M. */
+ void *p = from_guest_phys(0x100000);
+
+ /*
+ * Go back to the start of the file and read the header. It should be
+ * a Linux boot header (see Documentation/x86/boot.txt)
+ */
+ lseek(fd, 0, SEEK_SET);
+ read(fd, &boot, sizeof(boot));
+
+ /* Inside the setup_hdr, we expect the magic "HdrS" */
+ if (memcmp(&boot.hdr.header, "HdrS", 4) != 0)
+ errx(1, "This doesn't look like a bzImage to me");
+
+ /* Skip over the extra sectors of the header. */
+ lseek(fd, (boot.hdr.setup_sects+1) * 512, SEEK_SET);
+
+ /* Now read everything into memory. in nice big chunks. */
+ while ((r = read(fd, p, 65536)) > 0)
+ p += r;
+
+ /* Finally, code32_start tells us where to enter the kernel. */
+ return boot.hdr.code32_start;
+}
+
+/*L:140
+ * Loading the kernel is easy when it's a "vmlinux", but most kernels
+ * come wrapped up in the self-decompressing "bzImage" format. With a little
+ * work, we can load those, too.
+ */
+static unsigned long load_kernel(int fd)
+{
+ Elf32_Ehdr hdr;
+
+ /* Read in the first few bytes. */
+ if (read(fd, &hdr, sizeof(hdr)) != sizeof(hdr))
+ err(1, "Reading kernel");
+
+ /* If it's an ELF file, it starts with "\177ELF" */
+ if (memcmp(hdr.e_ident, ELFMAG, SELFMAG) == 0)
+ return map_elf(fd, &hdr);
+
+ /* Otherwise we assume it's a bzImage, and try to load it. */
+ return load_bzimage(fd);
+}
+
+/*
+ * This is a trivial little helper to align pages. Andi Kleen hated it because
+ * it calls getpagesize() twice: "it's dumb code."
+ *
+ * Kernel guys get really het up about optimization, even when it's not
+ * necessary. I leave this code as a reaction against that.
+ */
+static inline unsigned long page_align(unsigned long addr)
+{
+ /* Add upwards and truncate downwards. */
+ return ((addr + getpagesize()-1) & ~(getpagesize()-1));
+}
+
+/*L:180
+ * An "initial ram disk" is a disk image loaded into memory along with the
+ * kernel which the kernel can use to boot from without needing any drivers.
+ * Most distributions now use this as standard: the initrd contains the code to
+ * load the appropriate driver modules for the current machine.
+ *
+ * Importantly, James Morris works for RedHat, and Fedora uses initrds for its
+ * kernels. He sent me this (and tells me when I break it).
+ */
+static unsigned long load_initrd(const char *name, unsigned long mem)
+{
+ int ifd;
+ struct stat st;
+ unsigned long len;
+
+ ifd = open_or_die(name, O_RDONLY);
+ /* fstat() is needed to get the file size. */
+ if (fstat(ifd, &st) < 0)
+ err(1, "fstat() on initrd '%s'", name);
+
+ /*
+ * We map the initrd at the top of memory, but mmap wants it to be
+ * page-aligned, so we round the size up for that.
+ */
+ len = page_align(st.st_size);
+ map_at(ifd, from_guest_phys(mem - len), 0, st.st_size);
+ /*
+ * Once a file is mapped, you can close the file descriptor. It's a
+ * little odd, but quite useful.
+ */
+ close(ifd);
+ verbose("mapped initrd %s size=%lu @ %p\n", name, len, (void*)mem-len);
+
+ /* We return the initrd size. */
+ return len;
+}
+/*:*/
+
+/*
+ * Simple routine to roll all the commandline arguments together with spaces
+ * between them.
+ */
+static void concat(char *dst, char *args[])
+{
+ unsigned int i, len = 0;
+
+ for (i = 0; args[i]; i++) {
+ if (i) {
+ strcat(dst+len, " ");
+ len++;
+ }
+ strcpy(dst+len, args[i]);
+ len += strlen(args[i]);
+ }
+ /* In case it's empty. */
+ dst[len] = '\0';
+}
+
+/*L:185
+ * This is where we actually tell the kernel to initialize the Guest. We
+ * saw the arguments it expects when we looked at initialize() in lguest_user.c:
+ * the base of Guest "physical" memory, the top physical page to allow and the
+ * entry point for the Guest.
+ */
+static void tell_kernel(unsigned long start)
+{
+ unsigned long args[] = { LHREQ_INITIALIZE,
+ (unsigned long)guest_base,
+ guest_limit / getpagesize(), start };
+ verbose("Guest: %p - %p (%#lx)\n",
+ guest_base, guest_base + guest_limit, guest_limit);
+ lguest_fd = open_or_die("/dev/lguest", O_RDWR);
+ if (write(lguest_fd, args, sizeof(args)) < 0)
+ err(1, "Writing to /dev/lguest");
+}
+/*:*/
+
+/*L:200
+ * Device Handling.
+ *
+ * When the Guest gives us a buffer, it sends an array of addresses and sizes.
+ * We need to make sure it's not trying to reach into the Launcher itself, so
+ * we have a convenient routine which checks it and exits with an error message
+ * if something funny is going on:
+ */
+static void *_check_pointer(unsigned long addr, unsigned int size,
+ unsigned int line)
+{
+ /*
+ * Check if the requested address and size exceeds the allocated memory,
+ * or addr + size wraps around.
+ */
+ if ((addr + size) > guest_limit || (addr + size) < addr)
+ errx(1, "%s:%i: Invalid address %#lx", __FILE__, line, addr);
+ /*
+ * We return a pointer for the caller's convenience, now we know it's
+ * safe to use.
+ */
+ return from_guest_phys(addr);
+}
+/* A macro which transparently hands the line number to the real function. */
+#define check_pointer(addr,size) _check_pointer(addr, size, __LINE__)
+
+/*
+ * Each buffer in the virtqueues is actually a chain of descriptors. This
+ * function returns the next descriptor in the chain, or vq->vring.num if we're
+ * at the end.
+ */
+static unsigned next_desc(struct vring_desc *desc,
+ unsigned int i, unsigned int max)
+{
+ unsigned int next;
+
+ /* If this descriptor says it doesn't chain, we're done. */
+ if (!(desc[i].flags & VRING_DESC_F_NEXT))
+ return max;
+
+ /* Check they're not leading us off end of descriptors. */
+ next = desc[i].next;
+ /* Make sure compiler knows to grab that: we don't want it changing! */
+ wmb();
+
+ if (next >= max)
+ errx(1, "Desc next is %u", next);
+
+ return next;
+}
+
+/*
+ * This actually sends the interrupt for this virtqueue, if we've used a
+ * buffer.
+ */
+static void trigger_irq(struct virtqueue *vq)
+{
+ unsigned long buf[] = { LHREQ_IRQ, vq->config.irq };
+
+ /* Don't inform them if nothing used. */
+ if (!vq->pending_used)
+ return;
+ vq->pending_used = 0;
+
+ /* If they don't want an interrupt, don't send one... */
+ if (vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT) {
+ return;
+ }
+
+ /* Send the Guest an interrupt tell them we used something up. */
+ if (write(lguest_fd, buf, sizeof(buf)) != 0)
+ err(1, "Triggering irq %i", vq->config.irq);
+}
+
+/*
+ * This looks in the virtqueue for the first available buffer, and converts
+ * it to an iovec for convenient access. Since descriptors consist of some
+ * number of output then some number of input descriptors, it's actually two
+ * iovecs, but we pack them into one and note how many of each there were.
+ *
+ * This function waits if necessary, and returns the descriptor number found.
+ */
+static unsigned wait_for_vq_desc(struct virtqueue *vq,
+ struct iovec iov[],
+ unsigned int *out_num, unsigned int *in_num)
+{
+ unsigned int i, head, max;
+ struct vring_desc *desc;
+ u16 last_avail = lg_last_avail(vq);
+
+ /* There's nothing available? */
+ while (last_avail == vq->vring.avail->idx) {
+ u64 event;
+
+ /*
+ * Since we're about to sleep, now is a good time to tell the
+ * Guest about what we've used up to now.
+ */
+ trigger_irq(vq);
+
+ /* OK, now we need to know about added descriptors. */
+ vq->vring.used->flags &= ~VRING_USED_F_NO_NOTIFY;
+
+ /*
+ * They could have slipped one in as we were doing that: make
+ * sure it's written, then check again.
+ */
+ mb();
+ if (last_avail != vq->vring.avail->idx) {
+ vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY;
+ break;
+ }
+
+ /* Nothing new? Wait for eventfd to tell us they refilled. */
+ if (read(vq->eventfd, &event, sizeof(event)) != sizeof(event))
+ errx(1, "Event read failed?");
+
+ /* We don't need to be notified again. */
+ vq->vring.used->flags |= VRING_USED_F_NO_NOTIFY;
+ }
+
+ /* Check it isn't doing very strange things with descriptor numbers. */
+ if ((u16)(vq->vring.avail->idx - last_avail) > vq->vring.num)
+ errx(1, "Guest moved used index from %u to %u",
+ last_avail, vq->vring.avail->idx);
+
+ /*
+ * Make sure we read the descriptor number *after* we read the ring
+ * update; don't let the cpu or compiler change the order.
+ */
+ rmb();
+
+ /*
+ * Grab the next descriptor number they're advertising, and increment
+ * the index we've seen.
+ */
+ head = vq->vring.avail->ring[last_avail % vq->vring.num];
+ lg_last_avail(vq)++;
+
+ /* If their number is silly, that's a fatal mistake. */
+ if (head >= vq->vring.num)
+ errx(1, "Guest says index %u is available", head);
+
+ /* When we start there are none of either input nor output. */
+ *out_num = *in_num = 0;
+
+ max = vq->vring.num;
+ desc = vq->vring.desc;
+ i = head;
+
+ /*
+ * We have to read the descriptor after we read the descriptor number,
+ * but there's a data dependency there so the CPU shouldn't reorder
+ * that: no rmb() required.
+ */
+
+ /*
+ * If this is an indirect entry, then this buffer contains a descriptor
+ * table which we handle as if it's any normal descriptor chain.
+ */
+ if (desc[i].flags & VRING_DESC_F_INDIRECT) {
+ if (desc[i].len % sizeof(struct vring_desc))
+ errx(1, "Invalid size for indirect buffer table");
+
+ max = desc[i].len / sizeof(struct vring_desc);
+ desc = check_pointer(desc[i].addr, desc[i].len);
+ i = 0;
+ }
+
+ do {
+ /* Grab the first descriptor, and check it's OK. */
+ iov[*out_num + *in_num].iov_len = desc[i].len;
+ iov[*out_num + *in_num].iov_base
+ = check_pointer(desc[i].addr, desc[i].len);
+ /* If this is an input descriptor, increment that count. */
+ if (desc[i].flags & VRING_DESC_F_WRITE)
+ (*in_num)++;
+ else {
+ /*
+ * If it's an output descriptor, they're all supposed
+ * to come before any input descriptors.
+ */
+ if (*in_num)
+ errx(1, "Descriptor has out after in");
+ (*out_num)++;
+ }
+
+ /* If we've got too many, that implies a descriptor loop. */
+ if (*out_num + *in_num > max)
+ errx(1, "Looped descriptor");
+ } while ((i = next_desc(desc, i, max)) != max);
+
+ return head;
+}
+
+/*
+ * After we've used one of their buffers, we tell the Guest about it. Sometime
+ * later we'll want to send them an interrupt using trigger_irq(); note that
+ * wait_for_vq_desc() does that for us if it has to wait.
+ */
+static void add_used(struct virtqueue *vq, unsigned int head, int len)
+{
+ struct vring_used_elem *used;
+
+ /*
+ * The virtqueue contains a ring of used buffers. Get a pointer to the
+ * next entry in that used ring.
+ */
+ used = &vq->vring.used->ring[vq->vring.used->idx % vq->vring.num];
+ used->id = head;
+ used->len = len;
+ /* Make sure buffer is written before we update index. */
+ wmb();
+ vq->vring.used->idx++;
+ vq->pending_used++;
+}
+
+/* And here's the combo meal deal. Supersize me! */
+static void add_used_and_trigger(struct virtqueue *vq, unsigned head, int len)
+{
+ add_used(vq, head, len);
+ trigger_irq(vq);
+}
+
+/*
+ * The Console
+ *
+ * We associate some data with the console for our exit hack.
+ */
+struct console_abort {
+ /* How many times have they hit ^C? */
+ int count;
+ /* When did they start? */
+ struct timeval start;
+};
+
+/* This is the routine which handles console input (ie. stdin). */
+static void console_input(struct virtqueue *vq)
+{
+ int len;
+ unsigned int head, in_num, out_num;
+ struct console_abort *abort = vq->dev->priv;
+ struct iovec iov[vq->vring.num];
+
+ /* Make sure there's a descriptor available. */
+ head = wait_for_vq_desc(vq, iov, &out_num, &in_num);
+ if (out_num)
+ errx(1, "Output buffers in console in queue?");
+
+ /* Read into it. This is where we usually wait. */
+ len = readv(STDIN_FILENO, iov, in_num);
+ if (len <= 0) {
+ /* Ran out of input? */
+ warnx("Failed to get console input, ignoring console.");
+ /*
+ * For simplicity, dying threads kill the whole Launcher. So
+ * just nap here.
+ */
+ for (;;)
+ pause();
+ }
+
+ /* Tell the Guest we used a buffer. */
+ add_used_and_trigger(vq, head, len);
+
+ /*
+ * Three ^C within one second? Exit.
+ *
+ * This is such a hack, but works surprisingly well. Each ^C has to
+ * be in a buffer by itself, so they can't be too fast. But we check
+ * that we get three within about a second, so they can't be too
+ * slow.
+ */
+ if (len != 1 || ((char *)iov[0].iov_base)[0] != 3) {
+ abort->count = 0;
+ return;
+ }
+
+ abort->count++;
+ if (abort->count == 1)
+ gettimeofday(&abort->start, NULL);
+ else if (abort->count == 3) {
+ struct timeval now;
+ gettimeofday(&now, NULL);
+ /* Kill all Launcher processes with SIGINT, like normal ^C */
+ if (now.tv_sec <= abort->start.tv_sec+1)
+ kill(0, SIGINT);
+ abort->count = 0;
+ }
+}
+
+/* This is the routine which handles console output (ie. stdout). */
+static void console_output(struct virtqueue *vq)
+{
+ unsigned int head, out, in;
+ struct iovec iov[vq->vring.num];
+
+ /* We usually wait in here, for the Guest to give us something. */
+ head = wait_for_vq_desc(vq, iov, &out, &in);
+ if (in)
+ errx(1, "Input buffers in console output queue?");
+
+ /* writev can return a partial write, so we loop here. */
+ while (!iov_empty(iov, out)) {
+ int len = writev(STDOUT_FILENO, iov, out);
+ if (len <= 0) {
+ warn("Write to stdout gave %i (%d)", len, errno);
+ break;
+ }
+ iov_consume(iov, out, NULL, len);
+ }
+
+ /*
+ * We're finished with that buffer: if we're going to sleep,
+ * wait_for_vq_desc() will prod the Guest with an interrupt.
+ */
+ add_used(vq, head, 0);
+}
+
+/*
+ * The Network
+ *
+ * Handling output for network is also simple: we get all the output buffers
+ * and write them to /dev/net/tun.
+ */
+struct net_info {
+ int tunfd;
+};
+
+static void net_output(struct virtqueue *vq)
+{
+ struct net_info *net_info = vq->dev->priv;
+ unsigned int head, out, in;
+ struct iovec iov[vq->vring.num];
+
+ /* We usually wait in here for the Guest to give us a packet. */
+ head = wait_for_vq_desc(vq, iov, &out, &in);
+ if (in)
+ errx(1, "Input buffers in net output queue?");
+ /*
+ * Send the whole thing through to /dev/net/tun. It expects the exact
+ * same format: what a coincidence!
+ */
+ if (writev(net_info->tunfd, iov, out) < 0)
+ warnx("Write to tun failed (%d)?", errno);
+
+ /*
+ * Done with that one; wait_for_vq_desc() will send the interrupt if
+ * all packets are processed.
+ */
+ add_used(vq, head, 0);
+}
+
+/*
+ * Handling network input is a bit trickier, because I've tried to optimize it.
+ *
+ * First we have a helper routine which tells is if from this file descriptor
+ * (ie. the /dev/net/tun device) will block:
+ */
+static bool will_block(int fd)
+{
+ fd_set fdset;
+ struct timeval zero = { 0, 0 };
+ FD_ZERO(&fdset);
+ FD_SET(fd, &fdset);
+ return select(fd+1, &fdset, NULL, NULL, &zero) != 1;
+}
+
+/*
+ * This handles packets coming in from the tun device to our Guest. Like all
+ * service routines, it gets called again as soon as it returns, so you don't
+ * see a while(1) loop here.
+ */
+static void net_input(struct virtqueue *vq)
+{
+ int len;
+ unsigned int head, out, in;
+ struct iovec iov[vq->vring.num];
+ struct net_info *net_info = vq->dev->priv;
+
+ /*
+ * Get a descriptor to write an incoming packet into. This will also
+ * send an interrupt if they're out of descriptors.
+ */
+ head = wait_for_vq_desc(vq, iov, &out, &in);
+ if (out)
+ errx(1, "Output buffers in net input queue?");
+
+ /*
+ * If it looks like we'll block reading from the tun device, send them
+ * an interrupt.
+ */
+ if (vq->pending_used && will_block(net_info->tunfd))
+ trigger_irq(vq);
+
+ /*
+ * Read in the packet. This is where we normally wait (when there's no
+ * incoming network traffic).
+ */
+ len = readv(net_info->tunfd, iov, in);
+ if (len <= 0)
+ warn("Failed to read from tun (%d).", errno);
+
+ /*
+ * Mark that packet buffer as used, but don't interrupt here. We want
+ * to wait until we've done as much work as we can.
+ */
+ add_used(vq, head, len);
+}
+/*:*/
+
+/* This is the helper to create threads: run the service routine in a loop. */
+static int do_thread(void *_vq)
+{
+ struct virtqueue *vq = _vq;
+
+ for (;;)
+ vq->service(vq);
+ return 0;
+}
+
+/*
+ * When a child dies, we kill our entire process group with SIGTERM. This
+ * also has the side effect that the shell restores the console for us!
+ */
+static void kill_launcher(int signal)
+{
+ kill(0, SIGTERM);
+}
+
+static void reset_device(struct device *dev)
+{
+ struct virtqueue *vq;
+
+ verbose("Resetting device %s\n", dev->name);
+
+ /* Clear any features they've acked. */
+ memset(get_feature_bits(dev) + dev->feature_len, 0, dev->feature_len);
+
+ /* We're going to be explicitly killing threads, so ignore them. */
+ signal(SIGCHLD, SIG_IGN);
+
+ /* Zero out the virtqueues, get rid of their threads */
+ for (vq = dev->vq; vq; vq = vq->next) {
+ if (vq->thread != (pid_t)-1) {
+ kill(vq->thread, SIGTERM);
+ waitpid(vq->thread, NULL, 0);
+ vq->thread = (pid_t)-1;
+ }
+ memset(vq->vring.desc, 0,
+ vring_size(vq->config.num, LGUEST_VRING_ALIGN));
+ lg_last_avail(vq) = 0;
+ }
+ dev->running = false;
+
+ /* Now we care if threads die. */
+ signal(SIGCHLD, (void *)kill_launcher);
+}
+
+/*L:216
+ * This actually creates the thread which services the virtqueue for a device.
+ */
+static void create_thread(struct virtqueue *vq)
+{
+ /*
+ * Create stack for thread. Since the stack grows upwards, we point
+ * the stack pointer to the end of this region.
+ */
+ char *stack = malloc(32768);
+ unsigned long args[] = { LHREQ_EVENTFD,
+ vq->config.pfn*getpagesize(), 0 };
+
+ /* Create a zero-initialized eventfd. */
+ vq->eventfd = eventfd(0, 0);
+ if (vq->eventfd < 0)
+ err(1, "Creating eventfd");
+ args[2] = vq->eventfd;
+
+ /*
+ * Attach an eventfd to this virtqueue: it will go off when the Guest
+ * does an LHCALL_NOTIFY for this vq.
+ */
+ if (write(lguest_fd, &args, sizeof(args)) != 0)
+ err(1, "Attaching eventfd");
+
+ /*
+ * CLONE_VM: because it has to access the Guest memory, and SIGCHLD so
+ * we get a signal if it dies.
+ */
+ vq->thread = clone(do_thread, stack + 32768, CLONE_VM | SIGCHLD, vq);
+ if (vq->thread == (pid_t)-1)
+ err(1, "Creating clone");
+
+ /* We close our local copy now the child has it. */
+ close(vq->eventfd);
+}
+
+static void start_device(struct device *dev)
+{
+ unsigned int i;
+ struct virtqueue *vq;
+
+ verbose("Device %s OK: offered", dev->name);
+ for (i = 0; i < dev->feature_len; i++)
+ verbose(" %02x", get_feature_bits(dev)[i]);
+ verbose(", accepted");
+ for (i = 0; i < dev->feature_len; i++)
+ verbose(" %02x", get_feature_bits(dev)
+ [dev->feature_len+i]);
+
+ for (vq = dev->vq; vq; vq = vq->next) {
+ if (vq->service)
+ create_thread(vq);
+ }
+ dev->running = true;
+}
+
+static void cleanup_devices(void)
+{
+ struct device *dev;
+
+ for (dev = devices.dev; dev; dev = dev->next)
+ reset_device(dev);
+
+ /* If we saved off the original terminal settings, restore them now. */
+ if (orig_term.c_lflag & (ISIG|ICANON|ECHO))
+ tcsetattr(STDIN_FILENO, TCSANOW, &orig_term);
+}
+
+/* When the Guest tells us they updated the status field, we handle it. */
+static void update_device_status(struct device *dev)
+{
+ /* A zero status is a reset, otherwise it's a set of flags. */
+ if (dev->desc->status == 0)
+ reset_device(dev);
+ else if (dev->desc->status & VIRTIO_CONFIG_S_FAILED) {
+ warnx("Device %s configuration FAILED", dev->name);
+ if (dev->running)
+ reset_device(dev);
+ } else {
+ if (dev->running)
+ err(1, "Device %s features finalized twice", dev->name);
+ start_device(dev);
+ }
+}
+
+/*L:215
+ * This is the generic routine we call when the Guest uses LHCALL_NOTIFY. In
+ * particular, it's used to notify us of device status changes during boot.
+ */
+static void handle_output(unsigned long addr)
+{
+ struct device *i;
+
+ /* Check each device. */
+ for (i = devices.dev; i; i = i->next) {
+ struct virtqueue *vq;
+
+ /*
+ * Notifications to device descriptors mean they updated the
+ * device status.
+ */
+ if (from_guest_phys(addr) == i->desc) {
+ update_device_status(i);
+ return;
+ }
+
+ /* Devices should not be used before features are finalized. */
+ for (vq = i->vq; vq; vq = vq->next) {
+ if (addr != vq->config.pfn*getpagesize())
+ continue;
+ errx(1, "Notification on %s before setup!", i->name);
+ }
+ }
+
+ /*
+ * Early console write is done using notify on a nul-terminated string
+ * in Guest memory. It's also great for hacking debugging messages
+ * into a Guest.
+ */
+ if (addr >= guest_limit)
+ errx(1, "Bad NOTIFY %#lx", addr);
+
+ write(STDOUT_FILENO, from_guest_phys(addr),
+ strnlen(from_guest_phys(addr), guest_limit - addr));
+}
+
+/*L:190
+ * Device Setup
+ *
+ * All devices need a descriptor so the Guest knows it exists, and a "struct
+ * device" so the Launcher can keep track of it. We have common helper
+ * routines to allocate and manage them.
+ */
+
+/*
+ * The layout of the device page is a "struct lguest_device_desc" followed by a
+ * number of virtqueue descriptors, then two sets of feature bits, then an
+ * array of configuration bytes. This routine returns the configuration
+ * pointer.
+ */
+static u8 *device_config(const struct device *dev)
+{
+ return (void *)(dev->desc + 1)
+ + dev->num_vq * sizeof(struct lguest_vqconfig)
+ + dev->feature_len * 2;
+}
+
+/*
+ * This routine allocates a new "struct lguest_device_desc" from descriptor
+ * table page just above the Guest's normal memory. It returns a pointer to
+ * that descriptor.
+ */
+static struct lguest_device_desc *new_dev_desc(u16 type)
+{
+ struct lguest_device_desc d = { .type = type };
+ void *p;
+
+ /* Figure out where the next device config is, based on the last one. */
+ if (devices.lastdev)
+ p = device_config(devices.lastdev)
+ + devices.lastdev->desc->config_len;
+ else
+ p = devices.descpage;
+
+ /* We only have one page for all the descriptors. */
+ if (p + sizeof(d) > (void *)devices.descpage + getpagesize())
+ errx(1, "Too many devices");
+
+ /* p might not be aligned, so we memcpy in. */
+ return memcpy(p, &d, sizeof(d));
+}
+
+/*
+ * Each device descriptor is followed by the description of its virtqueues. We
+ * specify how many descriptors the virtqueue is to have.
+ */
+static void add_virtqueue(struct device *dev, unsigned int num_descs,
+ void (*service)(struct virtqueue *))
+{
+ unsigned int pages;
+ struct virtqueue **i, *vq = malloc(sizeof(*vq));
+ void *p;
+
+ /* First we need some memory for this virtqueue. */
+ pages = (vring_size(num_descs, LGUEST_VRING_ALIGN) + getpagesize() - 1)
+ / getpagesize();
+ p = get_pages(pages);
+
+ /* Initialize the virtqueue */
+ vq->next = NULL;
+ vq->last_avail_idx = 0;
+ vq->dev = dev;
+
+ /*
+ * This is the routine the service thread will run, and its Process ID
+ * once it's running.
+ */
+ vq->service = service;
+ vq->thread = (pid_t)-1;
+
+ /* Initialize the configuration. */
+ vq->config.num = num_descs;
+ vq->config.irq = devices.next_irq++;
+ vq->config.pfn = to_guest_phys(p) / getpagesize();
+
+ /* Initialize the vring. */
+ vring_init(&vq->vring, num_descs, p, LGUEST_VRING_ALIGN);
+
+ /*
+ * Append virtqueue to this device's descriptor. We use
+ * device_config() to get the end of the device's current virtqueues;
+ * we check that we haven't added any config or feature information
+ * yet, otherwise we'd be overwriting them.
+ */
+ assert(dev->desc->config_len == 0 && dev->desc->feature_len == 0);
+ memcpy(device_config(dev), &vq->config, sizeof(vq->config));
+ dev->num_vq++;
+ dev->desc->num_vq++;
+
+ verbose("Virtqueue page %#lx\n", to_guest_phys(p));
+
+ /*
+ * Add to tail of list, so dev->vq is first vq, dev->vq->next is
+ * second.
+ */
+ for (i = &dev->vq; *i; i = &(*i)->next);
+ *i = vq;
+}
+
+/*
+ * The first half of the feature bitmask is for us to advertise features. The
+ * second half is for the Guest to accept features.
+ */
+static void add_feature(struct device *dev, unsigned bit)
+{
+ u8 *features = get_feature_bits(dev);
+
+ /* We can't extend the feature bits once we've added config bytes */
+ if (dev->desc->feature_len <= bit / CHAR_BIT) {
+ assert(dev->desc->config_len == 0);
+ dev->feature_len = dev->desc->feature_len = (bit/CHAR_BIT) + 1;
+ }
+
+ features[bit / CHAR_BIT] |= (1 << (bit % CHAR_BIT));
+}
+
+/*
+ * This routine sets the configuration fields for an existing device's
+ * descriptor. It only works for the last device, but that's OK because that's
+ * how we use it.
+ */
+static void set_config(struct device *dev, unsigned len, const void *conf)
+{
+ /* Check we haven't overflowed our single page. */
+ if (device_config(dev) + len > devices.descpage + getpagesize())
+ errx(1, "Too many devices");
+
+ /* Copy in the config information, and store the length. */
+ memcpy(device_config(dev), conf, len);
+ dev->desc->config_len = len;
+
+ /* Size must fit in config_len field (8 bits)! */
+ assert(dev->desc->config_len == len);
+}
+
+/*
+ * This routine does all the creation and setup of a new device, including
+ * calling new_dev_desc() to allocate the descriptor and device memory. We
+ * don't actually start the service threads until later.
+ *
+ * See what I mean about userspace being boring?
+ */
+static struct device *new_device(const char *name, u16 type)
+{
+ struct device *dev = malloc(sizeof(*dev));
+
+ /* Now we populate the fields one at a time. */
+ dev->desc = new_dev_desc(type);
+ dev->name = name;
+ dev->vq = NULL;
+ dev->feature_len = 0;
+ dev->num_vq = 0;
+ dev->running = false;
+ dev->next = NULL;
+
+ /*
+ * Append to device list. Prepending to a single-linked list is
+ * easier, but the user expects the devices to be arranged on the bus
+ * in command-line order. The first network device on the command line
+ * is eth0, the first block device /dev/vda, etc.
+ */
+ if (devices.lastdev)
+ devices.lastdev->next = dev;
+ else
+ devices.dev = dev;
+ devices.lastdev = dev;
+
+ return dev;
+}
+
+/*
+ * Our first setup routine is the console. It's a fairly simple device, but
+ * UNIX tty handling makes it uglier than it could be.
+ */
+static void setup_console(void)
+{
+ struct device *dev;
+
+ /* If we can save the initial standard input settings... */
+ if (tcgetattr(STDIN_FILENO, &orig_term) == 0) {
+ struct termios term = orig_term;
+ /*
+ * Then we turn off echo, line buffering and ^C etc: We want a
+ * raw input stream to the Guest.
+ */
+ term.c_lflag &= ~(ISIG|ICANON|ECHO);
+ tcsetattr(STDIN_FILENO, TCSANOW, &term);
+ }
+
+ dev = new_device("console", VIRTIO_ID_CONSOLE);
+
+ /* We store the console state in dev->priv, and initialize it. */
+ dev->priv = malloc(sizeof(struct console_abort));
+ ((struct console_abort *)dev->priv)->count = 0;
+
+ /*
+ * The console needs two virtqueues: the input then the output. When
+ * they put something the input queue, we make sure we're listening to
+ * stdin. When they put something in the output queue, we write it to
+ * stdout.
+ */
+ add_virtqueue(dev, VIRTQUEUE_NUM, console_input);
+ add_virtqueue(dev, VIRTQUEUE_NUM, console_output);
+
+ verbose("device %u: console\n", ++devices.device_num);
+}
+/*:*/
+
+/*M:010
+ * Inter-guest networking is an interesting area. Simplest is to have a
+ * --sharenet=<name> option which opens or creates a named pipe. This can be
+ * used to send packets to another guest in a 1:1 manner.
+ *
+ * More sophisticated is to use one of the tools developed for project like UML
+ * to do networking.
+ *
+ * Faster is to do virtio bonding in kernel. Doing this 1:1 would be
+ * completely generic ("here's my vring, attach to your vring") and would work
+ * for any traffic. Of course, namespace and permissions issues need to be
+ * dealt with. A more sophisticated "multi-channel" virtio_net.c could hide
+ * multiple inter-guest channels behind one interface, although it would
+ * require some manner of hotplugging new virtio channels.
+ *
+ * Finally, we could use a virtio network switch in the kernel, ie. vhost.
+:*/
+
+static u32 str2ip(const char *ipaddr)
+{
+ unsigned int b[4];
+
+ if (sscanf(ipaddr, "%u.%u.%u.%u", &b[0], &b[1], &b[2], &b[3]) != 4)
+ errx(1, "Failed to parse IP address '%s'", ipaddr);
+ return (b[0] << 24) | (b[1] << 16) | (b[2] << 8) | b[3];
+}
+
+static void str2mac(const char *macaddr, unsigned char mac[6])
+{
+ unsigned int m[6];
+ if (sscanf(macaddr, "%02x:%02x:%02x:%02x:%02x:%02x",
+ &m[0], &m[1], &m[2], &m[3], &m[4], &m[5]) != 6)
+ errx(1, "Failed to parse mac address '%s'", macaddr);
+ mac[0] = m[0];
+ mac[1] = m[1];
+ mac[2] = m[2];
+ mac[3] = m[3];
+ mac[4] = m[4];
+ mac[5] = m[5];
+}
+
+/*
+ * This code is "adapted" from libbridge: it attaches the Host end of the
+ * network device to the bridge device specified by the command line.
+ *
+ * This is yet another James Morris contribution (I'm an IP-level guy, so I
+ * dislike bridging), and I just try not to break it.
+ */
+static void add_to_bridge(int fd, const char *if_name, const char *br_name)
+{
+ int ifidx;
+ struct ifreq ifr;
+
+ if (!*br_name)
+ errx(1, "must specify bridge name");
+
+ ifidx = if_nametoindex(if_name);
+ if (!ifidx)
+ errx(1, "interface %s does not exist!", if_name);
+
+ strncpy(ifr.ifr_name, br_name, IFNAMSIZ);
+ ifr.ifr_name[IFNAMSIZ-1] = '\0';
+ ifr.ifr_ifindex = ifidx;
+ if (ioctl(fd, SIOCBRADDIF, &ifr) < 0)
+ err(1, "can't add %s to bridge %s", if_name, br_name);
+}
+
+/*
+ * This sets up the Host end of the network device with an IP address, brings
+ * it up so packets will flow, the copies the MAC address into the hwaddr
+ * pointer.
+ */
+static void configure_device(int fd, const char *tapif, u32 ipaddr)
+{
+ struct ifreq ifr;
+ struct sockaddr_in sin;
+
+ memset(&ifr, 0, sizeof(ifr));
+ strcpy(ifr.ifr_name, tapif);
+
+ /* Don't read these incantations. Just cut & paste them like I did! */
+ sin.sin_family = AF_INET;
+ sin.sin_addr.s_addr = htonl(ipaddr);
+ memcpy(&ifr.ifr_addr, &sin, sizeof(sin));
+ if (ioctl(fd, SIOCSIFADDR, &ifr) != 0)
+ err(1, "Setting %s interface address", tapif);
+ ifr.ifr_flags = IFF_UP;
+ if (ioctl(fd, SIOCSIFFLAGS, &ifr) != 0)
+ err(1, "Bringing interface %s up", tapif);
+}
+
+static int get_tun_device(char tapif[IFNAMSIZ])
+{
+ struct ifreq ifr;
+ int netfd;
+
+ /* Start with this zeroed. Messy but sure. */
+ memset(&ifr, 0, sizeof(ifr));
+
+ /*
+ * We open the /dev/net/tun device and tell it we want a tap device. A
+ * tap device is like a tun device, only somehow different. To tell
+ * the truth, I completely blundered my way through this code, but it
+ * works now!
+ */
+ netfd = open_or_die("/dev/net/tun", O_RDWR);
+ ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR;
+ strcpy(ifr.ifr_name, "tap%d");
+ if (ioctl(netfd, TUNSETIFF, &ifr) != 0)
+ err(1, "configuring /dev/net/tun");
+
+ if (ioctl(netfd, TUNSETOFFLOAD,
+ TUN_F_CSUM|TUN_F_TSO4|TUN_F_TSO6|TUN_F_TSO_ECN) != 0)
+ err(1, "Could not set features for tun device");
+
+ /*
+ * We don't need checksums calculated for packets coming in this
+ * device: trust us!
+ */
+ ioctl(netfd, TUNSETNOCSUM, 1);
+
+ memcpy(tapif, ifr.ifr_name, IFNAMSIZ);
+ return netfd;
+}
+
+/*L:195
+ * Our network is a Host<->Guest network. This can either use bridging or
+ * routing, but the principle is the same: it uses the "tun" device to inject
+ * packets into the Host as if they came in from a normal network card. We
+ * just shunt packets between the Guest and the tun device.
+ */
+static void setup_tun_net(char *arg)
+{
+ struct device *dev;
+ struct net_info *net_info = malloc(sizeof(*net_info));
+ int ipfd;
+ u32 ip = INADDR_ANY;
+ bool bridging = false;
+ char tapif[IFNAMSIZ], *p;
+ struct virtio_net_config conf;
+
+ net_info->tunfd = get_tun_device(tapif);
+
+ /* First we create a new network device. */
+ dev = new_device("net", VIRTIO_ID_NET);
+ dev->priv = net_info;
+
+ /* Network devices need a recv and a send queue, just like console. */
+ add_virtqueue(dev, VIRTQUEUE_NUM, net_input);
+ add_virtqueue(dev, VIRTQUEUE_NUM, net_output);
+
+ /*
+ * We need a socket to perform the magic network ioctls to bring up the
+ * tap interface, connect to the bridge etc. Any socket will do!
+ */
+ ipfd = socket(PF_INET, SOCK_DGRAM, IPPROTO_IP);
+ if (ipfd < 0)
+ err(1, "opening IP socket");
+
+ /* If the command line was --tunnet=bridge:<name> do bridging. */
+ if (!strncmp(BRIDGE_PFX, arg, strlen(BRIDGE_PFX))) {
+ arg += strlen(BRIDGE_PFX);
+ bridging = true;
+ }
+
+ /* A mac address may follow the bridge name or IP address */
+ p = strchr(arg, ':');
+ if (p) {
+ str2mac(p+1, conf.mac);
+ add_feature(dev, VIRTIO_NET_F_MAC);
+ *p = '\0';
+ }
+
+ /* arg is now either an IP address or a bridge name */
+ if (bridging)
+ add_to_bridge(ipfd, tapif, arg);
+ else
+ ip = str2ip(arg);
+
+ /* Set up the tun device. */
+ configure_device(ipfd, tapif, ip);
+
+ /* Expect Guest to handle everything except UFO */
+ add_feature(dev, VIRTIO_NET_F_CSUM);
+ add_feature(dev, VIRTIO_NET_F_GUEST_CSUM);
+ add_feature(dev, VIRTIO_NET_F_GUEST_TSO4);
+ add_feature(dev, VIRTIO_NET_F_GUEST_TSO6);
+ add_feature(dev, VIRTIO_NET_F_GUEST_ECN);
+ add_feature(dev, VIRTIO_NET_F_HOST_TSO4);
+ add_feature(dev, VIRTIO_NET_F_HOST_TSO6);
+ add_feature(dev, VIRTIO_NET_F_HOST_ECN);
+ /* We handle indirect ring entries */
+ add_feature(dev, VIRTIO_RING_F_INDIRECT_DESC);
+ /* We're compliant with the damn spec. */
+ add_feature(dev, VIRTIO_F_ANY_LAYOUT);
+ set_config(dev, sizeof(conf), &conf);
+
+ /* We don't need the socket any more; setup is done. */
+ close(ipfd);
+
+ devices.device_num++;
+
+ if (bridging)
+ verbose("device %u: tun %s attached to bridge: %s\n",
+ devices.device_num, tapif, arg);
+ else
+ verbose("device %u: tun %s: %s\n",
+ devices.device_num, tapif, arg);
+}
+/*:*/
+
+/* This hangs off device->priv. */
+struct vblk_info {
+ /* The size of the file. */
+ off64_t len;
+
+ /* The file descriptor for the file. */
+ int fd;
+
+};
+
+/*L:210
+ * The Disk
+ *
+ * The disk only has one virtqueue, so it only has one thread. It is really
+ * simple: the Guest asks for a block number and we read or write that position
+ * in the file.
+ *
+ * Before we serviced each virtqueue in a separate thread, that was unacceptably
+ * slow: the Guest waits until the read is finished before running anything
+ * else, even if it could have been doing useful work.
+ *
+ * We could have used async I/O, except it's reputed to suck so hard that
+ * characters actually go missing from your code when you try to use it.
+ */
+static void blk_request(struct virtqueue *vq)
+{
+ struct vblk_info *vblk = vq->dev->priv;
+ unsigned int head, out_num, in_num, wlen;
+ int ret, i;
+ u8 *in;
+ struct virtio_blk_outhdr out;
+ struct iovec iov[vq->vring.num];
+ off64_t off;
+
+ /*
+ * Get the next request, where we normally wait. It triggers the
+ * interrupt to acknowledge previously serviced requests (if any).
+ */
+ head = wait_for_vq_desc(vq, iov, &out_num, &in_num);
+
+ /* Copy the output header from the front of the iov (adjusts iov) */
+ iov_consume(iov, out_num, &out, sizeof(out));
+
+ /* Find and trim end of iov input array, for our status byte. */
+ in = NULL;
+ for (i = out_num + in_num - 1; i >= out_num; i--) {
+ if (iov[i].iov_len > 0) {
+ in = iov[i].iov_base + iov[i].iov_len - 1;
+ iov[i].iov_len--;
+ break;
+ }
+ }
+ if (!in)
+ errx(1, "Bad virtblk cmd with no room for status");
+
+ /*
+ * For historical reasons, block operations are expressed in 512 byte
+ * "sectors".
+ */
+ off = out.sector * 512;
+
+ /*
+ * In general the virtio block driver is allowed to try SCSI commands.
+ * It'd be nice if we supported eject, for example, but we don't.
+ */
+ if (out.type & VIRTIO_BLK_T_SCSI_CMD) {
+ fprintf(stderr, "Scsi commands unsupported\n");
+ *in = VIRTIO_BLK_S_UNSUPP;
+ wlen = sizeof(*in);
+ } else if (out.type & VIRTIO_BLK_T_OUT) {
+ /*
+ * Write
+ *
+ * Move to the right location in the block file. This can fail
+ * if they try to write past end.
+ */
+ if (lseek64(vblk->fd, off, SEEK_SET) != off)
+ err(1, "Bad seek to sector %llu", out.sector);
+
+ ret = writev(vblk->fd, iov, out_num);
+ verbose("WRITE to sector %llu: %i\n", out.sector, ret);
+
+ /*
+ * Grr... Now we know how long the descriptor they sent was, we
+ * make sure they didn't try to write over the end of the block
+ * file (possibly extending it).
+ */
+ if (ret > 0 && off + ret > vblk->len) {
+ /* Trim it back to the correct length */
+ ftruncate64(vblk->fd, vblk->len);
+ /* Die, bad Guest, die. */
+ errx(1, "Write past end %llu+%u", off, ret);
+ }
+
+ wlen = sizeof(*in);
+ *in = (ret >= 0 ? VIRTIO_BLK_S_OK : VIRTIO_BLK_S_IOERR);
+ } else if (out.type & VIRTIO_BLK_T_FLUSH) {
+ /* Flush */
+ ret = fdatasync(vblk->fd);
+ verbose("FLUSH fdatasync: %i\n", ret);
+ wlen = sizeof(*in);
+ *in = (ret >= 0 ? VIRTIO_BLK_S_OK : VIRTIO_BLK_S_IOERR);
+ } else {
+ /*
+ * Read
+ *
+ * Move to the right location in the block file. This can fail
+ * if they try to read past end.
+ */
+ if (lseek64(vblk->fd, off, SEEK_SET) != off)
+ err(1, "Bad seek to sector %llu", out.sector);
+
+ ret = readv(vblk->fd, iov + out_num, in_num);
+ if (ret >= 0) {
+ wlen = sizeof(*in) + ret;
+ *in = VIRTIO_BLK_S_OK;
+ } else {
+ wlen = sizeof(*in);
+ *in = VIRTIO_BLK_S_IOERR;
+ }
+ }
+
+ /* Finished that request. */
+ add_used(vq, head, wlen);
+}
+
+/*L:198 This actually sets up a virtual block device. */
+static void setup_block_file(const char *filename)
+{
+ struct device *dev;
+ struct vblk_info *vblk;
+ struct virtio_blk_config conf;
+
+ /* Creat the device. */
+ dev = new_device("block", VIRTIO_ID_BLOCK);
+
+ /* The device has one virtqueue, where the Guest places requests. */
+ add_virtqueue(dev, VIRTQUEUE_NUM, blk_request);
+
+ /* Allocate the room for our own bookkeeping */
+ vblk = dev->priv = malloc(sizeof(*vblk));
+
+ /* First we open the file and store the length. */
+ vblk->fd = open_or_die(filename, O_RDWR|O_LARGEFILE);
+ vblk->len = lseek64(vblk->fd, 0, SEEK_END);
+
+ /* We support FLUSH. */
+ add_feature(dev, VIRTIO_BLK_F_FLUSH);
+
+ /* Tell Guest how many sectors this device has. */
+ conf.capacity = cpu_to_le64(vblk->len / 512);
+
+ /*
+ * Tell Guest not to put in too many descriptors at once: two are used
+ * for the in and out elements.
+ */
+ add_feature(dev, VIRTIO_BLK_F_SEG_MAX);
+ conf.seg_max = cpu_to_le32(VIRTQUEUE_NUM - 2);
+
+ /* Don't try to put whole struct: we have 8 bit limit. */
+ set_config(dev, offsetof(struct virtio_blk_config, geometry), &conf);
+
+ verbose("device %u: virtblock %llu sectors\n",
+ ++devices.device_num, le64_to_cpu(conf.capacity));
+}
+
+/*L:211
+ * Our random number generator device reads from /dev/random into the Guest's
+ * input buffers. The usual case is that the Guest doesn't want random numbers
+ * and so has no buffers although /dev/random is still readable, whereas
+ * console is the reverse.
+ *
+ * The same logic applies, however.
+ */
+struct rng_info {
+ int rfd;
+};
+
+static void rng_input(struct virtqueue *vq)
+{
+ int len;
+ unsigned int head, in_num, out_num, totlen = 0;
+ struct rng_info *rng_info = vq->dev->priv;
+ struct iovec iov[vq->vring.num];
+
+ /* First we need a buffer from the Guests's virtqueue. */
+ head = wait_for_vq_desc(vq, iov, &out_num, &in_num);
+ if (out_num)
+ errx(1, "Output buffers in rng?");
+
+ /*
+ * Just like the console write, we loop to cover the whole iovec.
+ * In this case, short reads actually happen quite a bit.
+ */
+ while (!iov_empty(iov, in_num)) {
+ len = readv(rng_info->rfd, iov, in_num);
+ if (len <= 0)
+ err(1, "Read from /dev/random gave %i", len);
+ iov_consume(iov, in_num, NULL, len);
+ totlen += len;
+ }
+
+ /* Tell the Guest about the new input. */
+ add_used(vq, head, totlen);
+}
+
+/*L:199
+ * This creates a "hardware" random number device for the Guest.
+ */
+static void setup_rng(void)
+{
+ struct device *dev;
+ struct rng_info *rng_info = malloc(sizeof(*rng_info));
+
+ /* Our device's privat info simply contains the /dev/random fd. */
+ rng_info->rfd = open_or_die("/dev/random", O_RDONLY);
+
+ /* Create the new device. */
+ dev = new_device("rng", VIRTIO_ID_RNG);
+ dev->priv = rng_info;
+
+ /* The device has one virtqueue, where the Guest places inbufs. */
+ add_virtqueue(dev, VIRTQUEUE_NUM, rng_input);
+
+ verbose("device %u: rng\n", devices.device_num++);
+}
+/* That's the end of device setup. */
+
+/*L:230 Reboot is pretty easy: clean up and exec() the Launcher afresh. */
+static void __attribute__((noreturn)) restart_guest(void)
+{
+ unsigned int i;
+
+ /*
+ * Since we don't track all open fds, we simply close everything beyond
+ * stderr.
+ */
+ for (i = 3; i < FD_SETSIZE; i++)
+ close(i);
+
+ /* Reset all the devices (kills all threads). */
+ cleanup_devices();
+
+ execv(main_args[0], main_args);
+ err(1, "Could not exec %s", main_args[0]);
+}
+
+/*L:220
+ * Finally we reach the core of the Launcher which runs the Guest, serves
+ * its input and output, and finally, lays it to rest.
+ */
+static void __attribute__((noreturn)) run_guest(void)
+{
+ for (;;) {
+ unsigned long notify_addr;
+ int readval;
+
+ /* We read from the /dev/lguest device to run the Guest. */
+ readval = pread(lguest_fd, &notify_addr,
+ sizeof(notify_addr), cpu_id);
+
+ /* One unsigned long means the Guest did HCALL_NOTIFY */
+ if (readval == sizeof(notify_addr)) {
+ verbose("Notify on address %#lx\n", notify_addr);
+ handle_output(notify_addr);
+ /* ENOENT means the Guest died. Reading tells us why. */
+ } else if (errno == ENOENT) {
+ char reason[1024] = { 0 };
+ pread(lguest_fd, reason, sizeof(reason)-1, cpu_id);
+ errx(1, "%s", reason);
+ /* ERESTART means that we need to reboot the guest */
+ } else if (errno == ERESTART) {
+ restart_guest();
+ /* Anything else means a bug or incompatible change. */
+ } else
+ err(1, "Running guest failed");
+ }
+}
+/*L:240
+ * This is the end of the Launcher. The good news: we are over halfway
+ * through! The bad news: the most fiendish part of the code still lies ahead
+ * of us.
+ *
+ * Are you ready? Take a deep breath and join me in the core of the Host, in
+ * "make Host".
+:*/
+
+static struct option opts[] = {
+ { "verbose", 0, NULL, 'v' },
+ { "tunnet", 1, NULL, 't' },
+ { "block", 1, NULL, 'b' },
+ { "rng", 0, NULL, 'r' },
+ { "initrd", 1, NULL, 'i' },
+ { "username", 1, NULL, 'u' },
+ { "chroot", 1, NULL, 'c' },
+ { NULL },
+};
+static void usage(void)
+{
+ errx(1, "Usage: lguest [--verbose] "
+ "[--tunnet=(<ipaddr>:<macaddr>|bridge:<bridgename>:<macaddr>)\n"
+ "|--block=<filename>|--initrd=<filename>]...\n"
+ "<mem-in-mb> vmlinux [args...]");
+}
+
+/*L:105 The main routine is where the real work begins: */
+int main(int argc, char *argv[])
+{
+ /* Memory, code startpoint and size of the (optional) initrd. */
+ unsigned long mem = 0, start, initrd_size = 0;
+ /* Two temporaries. */
+ int i, c;
+ /* The boot information for the Guest. */
+ struct boot_params *boot;
+ /* If they specify an initrd file to load. */
+ const char *initrd_name = NULL;
+
+ /* Password structure for initgroups/setres[gu]id */
+ struct passwd *user_details = NULL;
+
+ /* Directory to chroot to */
+ char *chroot_path = NULL;
+
+ /* Save the args: we "reboot" by execing ourselves again. */
+ main_args = argv;
+
+ /*
+ * First we initialize the device list. We keep a pointer to the last
+ * device, and the next interrupt number to use for devices (1:
+ * remember that 0 is used by the timer).
+ */
+ devices.lastdev = NULL;
+ devices.next_irq = 1;
+
+ /* We're CPU 0. In fact, that's the only CPU possible right now. */
+ cpu_id = 0;
+
+ /*
+ * We need to know how much memory so we can set up the device
+ * descriptor and memory pages for the devices as we parse the command
+ * line. So we quickly look through the arguments to find the amount
+ * of memory now.
+ */
+ for (i = 1; i < argc; i++) {
+ if (argv[i][0] != '-') {
+ mem = atoi(argv[i]) * 1024 * 1024;
+ /*
+ * We start by mapping anonymous pages over all of
+ * guest-physical memory range. This fills it with 0,
+ * and ensures that the Guest won't be killed when it
+ * tries to access it.
+ */
+ guest_base = map_zeroed_pages(mem / getpagesize()
+ + DEVICE_PAGES);
+ guest_limit = mem;
+ guest_max = mem + DEVICE_PAGES*getpagesize();
+ devices.descpage = get_pages(1);
+ break;
+ }
+ }
+
+ /* The options are fairly straight-forward */
+ while ((c = getopt_long(argc, argv, "v", opts, NULL)) != EOF) {
+ switch (c) {
+ case 'v':
+ verbose = true;
+ break;
+ case 't':
+ setup_tun_net(optarg);
+ break;
+ case 'b':
+ setup_block_file(optarg);
+ break;
+ case 'r':
+ setup_rng();
+ break;
+ case 'i':
+ initrd_name = optarg;
+ break;
+ case 'u':
+ user_details = getpwnam(optarg);
+ if (!user_details)
+ err(1, "getpwnam failed, incorrect username?");
+ break;
+ case 'c':
+ chroot_path = optarg;
+ break;
+ default:
+ warnx("Unknown argument %s", argv[optind]);
+ usage();
+ }
+ }
+ /*
+ * After the other arguments we expect memory and kernel image name,
+ * followed by command line arguments for the kernel.
+ */
+ if (optind + 2 > argc)
+ usage();
+
+ verbose("Guest base is at %p\n", guest_base);
+
+ /* We always have a console device */
+ setup_console();
+
+ /* Now we load the kernel */
+ start = load_kernel(open_or_die(argv[optind+1], O_RDONLY));
+
+ /* Boot information is stashed at physical address 0 */
+ boot = from_guest_phys(0);
+
+ /* Map the initrd image if requested (at top of physical memory) */
+ if (initrd_name) {
+ initrd_size = load_initrd(initrd_name, mem);
+ /*
+ * These are the location in the Linux boot header where the
+ * start and size of the initrd are expected to be found.
+ */
+ boot->hdr.ramdisk_image = mem - initrd_size;
+ boot->hdr.ramdisk_size = initrd_size;
+ /* The bootloader type 0xFF means "unknown"; that's OK. */
+ boot->hdr.type_of_loader = 0xFF;
+ }
+
+ /*
+ * The Linux boot header contains an "E820" memory map: ours is a
+ * simple, single region.
+ */
+ boot->e820_entries = 1;
+ boot->e820_map[0] = ((struct e820entry) { 0, mem, E820_RAM });
+ /*
+ * The boot header contains a command line pointer: we put the command
+ * line after the boot header.
+ */
+ boot->hdr.cmd_line_ptr = to_guest_phys(boot + 1);
+ /* We use a simple helper to copy the arguments separated by spaces. */
+ concat((char *)(boot + 1), argv+optind+2);
+
+ /* Set kernel alignment to 16M (CONFIG_PHYSICAL_ALIGN) */
+ boot->hdr.kernel_alignment = 0x1000000;
+
+ /* Boot protocol version: 2.07 supports the fields for lguest. */
+ boot->hdr.version = 0x207;
+
+ /* The hardware_subarch value of "1" tells the Guest it's an lguest. */
+ boot->hdr.hardware_subarch = 1;
+
+ /* Tell the entry path not to try to reload segment registers. */
+ boot->hdr.loadflags |= KEEP_SEGMENTS;
+
+ /* We tell the kernel to initialize the Guest. */
+ tell_kernel(start);
+
+ /* Ensure that we terminate if a device-servicing child dies. */
+ signal(SIGCHLD, kill_launcher);
+
+ /* If we exit via err(), this kills all the threads, restores tty. */
+ atexit(cleanup_devices);
+
+ /* If requested, chroot to a directory */
+ if (chroot_path) {
+ if (chroot(chroot_path) != 0)
+ err(1, "chroot(\"%s\") failed", chroot_path);
+
+ if (chdir("/") != 0)
+ err(1, "chdir(\"/\") failed");
+
+ verbose("chroot done\n");
+ }
+
+ /* If requested, drop privileges */
+ if (user_details) {
+ uid_t u;
+ gid_t g;
+
+ u = user_details->pw_uid;
+ g = user_details->pw_gid;
+
+ if (initgroups(user_details->pw_name, g) != 0)
+ err(1, "initgroups failed");
+
+ if (setresgid(g, g, g) != 0)
+ err(1, "setresgid failed");
+
+ if (setresuid(u, u, u) != 0)
+ err(1, "setresuid failed");
+
+ verbose("Dropping privileges completed\n");
+ }
+
+ /* Finally, run the Guest. This doesn't return. */
+ run_guest();
+}
+/*:*/
+
+/*M:999
+ * Mastery is done: you now know everything I do.
+ *
+ * But surely you have seen code, features and bugs in your wanderings which
+ * you now yearn to attack? That is the real game, and I look forward to you
+ * patching and forking lguest into the Your-Name-Here-visor.
+ *
+ * Farewell, and good coding!
+ * Rusty Russell.
+ */
diff --git a/tools/lguest/lguest.txt b/tools/lguest/lguest.txt
new file mode 100644
index 00000000000..06e1f464951
--- /dev/null
+++ b/tools/lguest/lguest.txt
@@ -0,0 +1,125 @@
+ __
+ (___()'`; Rusty's Remarkably Unreliable Guide to Lguest
+ /, /` - or, A Young Coder's Illustrated Hypervisor
+ \\"--\\ http://lguest.ozlabs.org
+
+Lguest is designed to be a minimal 32-bit x86 hypervisor for the Linux kernel,
+for Linux developers and users to experiment with virtualization with the
+minimum of complexity. Nonetheless, it should have sufficient features to
+make it useful for specific tasks, and, of course, you are encouraged to fork
+and enhance it (see drivers/lguest/README).
+
+Features:
+
+- Kernel module which runs in a normal kernel.
+- Simple I/O model for communication.
+- Simple program to create new guests.
+- Logo contains cute puppies: http://lguest.ozlabs.org
+
+Developer features:
+
+- Fun to hack on.
+- No ABI: being tied to a specific kernel anyway, you can change anything.
+- Many opportunities for improvement or feature implementation.
+
+Running Lguest:
+
+- The easiest way to run lguest is to use same kernel as guest and host.
+ You can configure them differently, but usually it's easiest not to.
+
+ You will need to configure your kernel with the following options:
+
+ "Processor type and features":
+ "Paravirtualized guest support" = Y
+ "Lguest guest support" = Y
+ "High Memory Support" = off/4GB
+ "Alignment value to which kernel should be aligned" = 0x100000
+ (CONFIG_PARAVIRT=y, CONFIG_LGUEST_GUEST=y, CONFIG_HIGHMEM64G=n and
+ CONFIG_PHYSICAL_ALIGN=0x100000)
+
+ "Device Drivers":
+ "Block devices"
+ "Virtio block driver" = M/Y
+ "Network device support"
+ "Universal TUN/TAP device driver support" = M/Y
+ "Virtio network driver" = M/Y
+ (CONFIG_VIRTIO_BLK=m, CONFIG_VIRTIO_NET=m and CONFIG_TUN=m)
+
+ "Virtualization"
+ "Linux hypervisor example code" = M/Y
+ (CONFIG_LGUEST=m)
+
+- A tool called "lguest" is available in this directory: type "make"
+ to build it. If you didn't build your kernel in-tree, use "make
+ O=<builddir>".
+
+- Create or find a root disk image. There are several useful ones
+ around, such as the xm-test tiny root image at
+ http://xm-test.xensource.com/ramdisks/initrd-1.1-i386.img
+
+ For more serious work, I usually use a distribution ISO image and
+ install it under qemu, then make multiple copies:
+
+ dd if=/dev/zero of=rootfile bs=1M count=2048
+ qemu -cdrom image.iso -hda rootfile -net user -net nic -boot d
+
+ Make sure that you install a getty on /dev/hvc0 if you want to log in on the
+ console!
+
+- "modprobe lg" if you built it as a module.
+
+- Run an lguest as root:
+
+ tools/lguest/lguest 64 vmlinux --tunnet=192.168.19.1 \
+ --block=rootfile root=/dev/vda
+
+ Explanation:
+ 64: the amount of memory to use, in MB.
+
+ vmlinux: the kernel image found in the top of your build directory. You
+ can also use a standard bzImage.
+
+ --tunnet=192.168.19.1: configures a "tap" device for networking with this
+ IP address.
+
+ --block=rootfile: a file or block device which becomes /dev/vda
+ inside the guest.
+
+ root=/dev/vda: this (and anything else on the command line) are
+ kernel boot parameters.
+
+- Configuring networking. I usually have the host masquerade, using
+ "iptables -t nat -A POSTROUTING -o eth0 -j MASQUERADE" and "echo 1 >
+ /proc/sys/net/ipv4/ip_forward". In this example, I would configure
+ eth0 inside the guest at 192.168.19.2.
+
+ Another method is to bridge the tap device to an external interface
+ using --tunnet=bridge:<bridgename>, and perhaps run dhcp on the guest
+ to obtain an IP address. The bridge needs to be configured first:
+ this option simply adds the tap interface to it.
+
+ A simple example on my system:
+
+ ifconfig eth0 0.0.0.0
+ brctl addbr lg0
+ ifconfig lg0 up
+ brctl addif lg0 eth0
+ dhclient lg0
+
+ Then use --tunnet=bridge:lg0 when launching the guest.
+
+ See:
+
+ http://www.linuxfoundation.org/collaborate/workgroups/networking/bridge
+
+ for general information on how to get bridging to work.
+
+- Random number generation. Using the --rng option will provide a
+ /dev/hwrng in the guest that will read from the host's /dev/random.
+ Use this option in conjunction with rng-tools (see ../hw_random.txt)
+ to provide entropy to the guest kernel's /dev/random.
+
+There is a helpful mailing list at http://ozlabs.org/mailman/listinfo/lguest
+
+Good luck!
+Rusty Russell rusty@rustcorp.com.au.
diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile
new file mode 100644
index 00000000000..ce00f7ee645
--- /dev/null
+++ b/tools/lib/api/Makefile
@@ -0,0 +1,44 @@
+include ../../scripts/Makefile.include
+include ../../perf/config/utilities.mak # QUIET_CLEAN
+
+CC = $(CROSS_COMPILE)gcc
+AR = $(CROSS_COMPILE)ar
+
+# guard against environment variables
+LIB_H=
+LIB_OBJS=
+
+LIB_H += fs/debugfs.h
+LIB_H += fs/fs.h
+
+LIB_OBJS += $(OUTPUT)fs/debugfs.o
+LIB_OBJS += $(OUTPUT)fs/fs.o
+
+LIBFILE = libapikfs.a
+
+CFLAGS = -ggdb3 -Wall -Wextra -std=gnu99 -Werror -O6 -D_FORTIFY_SOURCE=2 $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) -fPIC
+EXTLIBS = -lelf -lpthread -lrt -lm
+ALL_CFLAGS = $(CFLAGS) $(BASIC_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64
+ALL_LDFLAGS = $(LDFLAGS)
+
+RM = rm -f
+
+$(LIBFILE): $(LIB_OBJS)
+ $(QUIET_AR)$(RM) $@ && $(AR) rcs $(OUTPUT)$@ $(LIB_OBJS)
+
+$(LIB_OBJS): $(LIB_H)
+
+libapi_dirs:
+ $(QUIET_MKDIR)mkdir -p $(OUTPUT)fs/
+
+$(OUTPUT)%.o: %.c libapi_dirs
+ $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) $<
+$(OUTPUT)%.s: %.c libapi_dirs
+ $(QUIET_CC)$(CC) -S $(ALL_CFLAGS) $<
+$(OUTPUT)%.o: %.S libapi_dirs
+ $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) $<
+
+clean:
+ $(call QUIET_CLEAN, libapi) $(RM) $(LIB_OBJS) $(LIBFILE)
+
+.PHONY: clean
diff --git a/tools/lib/api/fs/debugfs.c b/tools/lib/api/fs/debugfs.c
new file mode 100644
index 00000000000..a74fba6d774
--- /dev/null
+++ b/tools/lib/api/fs/debugfs.c
@@ -0,0 +1,100 @@
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdbool.h>
+#include <sys/vfs.h>
+#include <sys/mount.h>
+#include <linux/kernel.h>
+
+#include "debugfs.h"
+
+char debugfs_mountpoint[PATH_MAX + 1] = "/sys/kernel/debug";
+
+static const char * const debugfs_known_mountpoints[] = {
+ "/sys/kernel/debug",
+ "/debug",
+ 0,
+};
+
+static bool debugfs_found;
+
+/* find the path to the mounted debugfs */
+const char *debugfs_find_mountpoint(void)
+{
+ const char * const *ptr;
+ char type[100];
+ FILE *fp;
+
+ if (debugfs_found)
+ return (const char *)debugfs_mountpoint;
+
+ ptr = debugfs_known_mountpoints;
+ while (*ptr) {
+ if (debugfs_valid_mountpoint(*ptr) == 0) {
+ debugfs_found = true;
+ strcpy(debugfs_mountpoint, *ptr);
+ return debugfs_mountpoint;
+ }
+ ptr++;
+ }
+
+ /* give up and parse /proc/mounts */
+ fp = fopen("/proc/mounts", "r");
+ if (fp == NULL)
+ return NULL;
+
+ while (fscanf(fp, "%*s %" STR(PATH_MAX) "s %99s %*s %*d %*d\n",
+ debugfs_mountpoint, type) == 2) {
+ if (strcmp(type, "debugfs") == 0)
+ break;
+ }
+ fclose(fp);
+
+ if (strcmp(type, "debugfs") != 0)
+ return NULL;
+
+ debugfs_found = true;
+
+ return debugfs_mountpoint;
+}
+
+/* verify that a mountpoint is actually a debugfs instance */
+
+int debugfs_valid_mountpoint(const char *debugfs)
+{
+ struct statfs st_fs;
+
+ if (statfs(debugfs, &st_fs) < 0)
+ return -ENOENT;
+ else if (st_fs.f_type != (long) DEBUGFS_MAGIC)
+ return -ENOENT;
+
+ return 0;
+}
+
+/* mount the debugfs somewhere if it's not mounted */
+char *debugfs_mount(const char *mountpoint)
+{
+ /* see if it's already mounted */
+ if (debugfs_find_mountpoint())
+ goto out;
+
+ /* if not mounted and no argument */
+ if (mountpoint == NULL) {
+ /* see if environment variable set */
+ mountpoint = getenv(PERF_DEBUGFS_ENVIRONMENT);
+ /* if no environment variable, use default */
+ if (mountpoint == NULL)
+ mountpoint = "/sys/kernel/debug";
+ }
+
+ if (mount(NULL, mountpoint, "debugfs", 0, NULL) < 0)
+ return NULL;
+
+ /* save the mountpoint */
+ debugfs_found = true;
+ strncpy(debugfs_mountpoint, mountpoint, sizeof(debugfs_mountpoint));
+out:
+ return debugfs_mountpoint;
+}
diff --git a/tools/lib/api/fs/debugfs.h b/tools/lib/api/fs/debugfs.h
new file mode 100644
index 00000000000..f19d3df9609
--- /dev/null
+++ b/tools/lib/api/fs/debugfs.h
@@ -0,0 +1,29 @@
+#ifndef __API_DEBUGFS_H__
+#define __API_DEBUGFS_H__
+
+#define _STR(x) #x
+#define STR(x) _STR(x)
+
+/*
+ * On most systems <limits.h> would have given us this, but not on some systems
+ * (e.g. GNU/Hurd).
+ */
+#ifndef PATH_MAX
+#define PATH_MAX 4096
+#endif
+
+#ifndef DEBUGFS_MAGIC
+#define DEBUGFS_MAGIC 0x64626720
+#endif
+
+#ifndef PERF_DEBUGFS_ENVIRONMENT
+#define PERF_DEBUGFS_ENVIRONMENT "PERF_DEBUGFS_DIR"
+#endif
+
+const char *debugfs_find_mountpoint(void);
+int debugfs_valid_mountpoint(const char *debugfs);
+char *debugfs_mount(const char *mountpoint);
+
+extern char debugfs_mountpoint[];
+
+#endif /* __API_DEBUGFS_H__ */
diff --git a/tools/lib/api/fs/fs.c b/tools/lib/api/fs/fs.c
new file mode 100644
index 00000000000..c1b49c36a95
--- /dev/null
+++ b/tools/lib/api/fs/fs.c
@@ -0,0 +1,165 @@
+/* TODO merge/factor in debugfs.c here */
+
+#include <ctype.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/vfs.h>
+
+#include "debugfs.h"
+#include "fs.h"
+
+static const char * const sysfs__fs_known_mountpoints[] = {
+ "/sys",
+ 0,
+};
+
+static const char * const procfs__known_mountpoints[] = {
+ "/proc",
+ 0,
+};
+
+struct fs {
+ const char *name;
+ const char * const *mounts;
+ char path[PATH_MAX + 1];
+ bool found;
+ long magic;
+};
+
+enum {
+ FS__SYSFS = 0,
+ FS__PROCFS = 1,
+};
+
+static struct fs fs__entries[] = {
+ [FS__SYSFS] = {
+ .name = "sysfs",
+ .mounts = sysfs__fs_known_mountpoints,
+ .magic = SYSFS_MAGIC,
+ },
+ [FS__PROCFS] = {
+ .name = "proc",
+ .mounts = procfs__known_mountpoints,
+ .magic = PROC_SUPER_MAGIC,
+ },
+};
+
+static bool fs__read_mounts(struct fs *fs)
+{
+ bool found = false;
+ char type[100];
+ FILE *fp;
+
+ fp = fopen("/proc/mounts", "r");
+ if (fp == NULL)
+ return NULL;
+
+ while (!found &&
+ fscanf(fp, "%*s %" STR(PATH_MAX) "s %99s %*s %*d %*d\n",
+ fs->path, type) == 2) {
+
+ if (strcmp(type, fs->name) == 0)
+ found = true;
+ }
+
+ fclose(fp);
+ return fs->found = found;
+}
+
+static int fs__valid_mount(const char *fs, long magic)
+{
+ struct statfs st_fs;
+
+ if (statfs(fs, &st_fs) < 0)
+ return -ENOENT;
+ else if (st_fs.f_type != magic)
+ return -ENOENT;
+
+ return 0;
+}
+
+static bool fs__check_mounts(struct fs *fs)
+{
+ const char * const *ptr;
+
+ ptr = fs->mounts;
+ while (*ptr) {
+ if (fs__valid_mount(*ptr, fs->magic) == 0) {
+ fs->found = true;
+ strcpy(fs->path, *ptr);
+ return true;
+ }
+ ptr++;
+ }
+
+ return false;
+}
+
+static void mem_toupper(char *f, size_t len)
+{
+ while (len) {
+ *f = toupper(*f);
+ f++;
+ len--;
+ }
+}
+
+/*
+ * Check for "NAME_PATH" environment variable to override fs location (for
+ * testing). This matches the recommendation in Documentation/sysfs-rules.txt
+ * for SYSFS_PATH.
+ */
+static bool fs__env_override(struct fs *fs)
+{
+ char *override_path;
+ size_t name_len = strlen(fs->name);
+ /* name + "_PATH" + '\0' */
+ char upper_name[name_len + 5 + 1];
+ memcpy(upper_name, fs->name, name_len);
+ mem_toupper(upper_name, name_len);
+ strcpy(&upper_name[name_len], "_PATH");
+
+ override_path = getenv(upper_name);
+ if (!override_path)
+ return false;
+
+ fs->found = true;
+ strncpy(fs->path, override_path, sizeof(fs->path));
+ return true;
+}
+
+static const char *fs__get_mountpoint(struct fs *fs)
+{
+ if (fs__env_override(fs))
+ return fs->path;
+
+ if (fs__check_mounts(fs))
+ return fs->path;
+
+ if (fs__read_mounts(fs))
+ return fs->path;
+
+ return NULL;
+}
+
+static const char *fs__mountpoint(int idx)
+{
+ struct fs *fs = &fs__entries[idx];
+
+ if (fs->found)
+ return (const char *)fs->path;
+
+ return fs__get_mountpoint(fs);
+}
+
+#define FS__MOUNTPOINT(name, idx) \
+const char *name##__mountpoint(void) \
+{ \
+ return fs__mountpoint(idx); \
+}
+
+FS__MOUNTPOINT(sysfs, FS__SYSFS);
+FS__MOUNTPOINT(procfs, FS__PROCFS);
diff --git a/tools/lib/api/fs/fs.h b/tools/lib/api/fs/fs.h
new file mode 100644
index 00000000000..cb7049551f3
--- /dev/null
+++ b/tools/lib/api/fs/fs.h
@@ -0,0 +1,14 @@
+#ifndef __API_FS__
+#define __API_FS__
+
+#ifndef SYSFS_MAGIC
+#define SYSFS_MAGIC 0x62656572
+#endif
+
+#ifndef PROC_SUPER_MAGIC
+#define PROC_SUPER_MAGIC 0x9fa0
+#endif
+
+const char *sysfs__mountpoint(void);
+const char *procfs__mountpoint(void);
+#endif /* __API_FS__ */
diff --git a/tools/lib/lockdep/Makefile b/tools/lib/lockdep/Makefile
new file mode 100644
index 00000000000..52f9279c6c1
--- /dev/null
+++ b/tools/lib/lockdep/Makefile
@@ -0,0 +1,243 @@
+# file format version
+FILE_VERSION = 1
+
+LIBLOCKDEP_VERSION=$(shell make --no-print-directory -sC ../../.. kernelversion)
+
+# Makefiles suck: This macro sets a default value of $(2) for the
+# variable named by $(1), unless the variable has been set by
+# environment or command line. This is necessary for CC and AR
+# because make sets default values, so the simpler ?= approach
+# won't work as expected.
+define allow-override
+ $(if $(or $(findstring environment,$(origin $(1))),\
+ $(findstring command line,$(origin $(1)))),,\
+ $(eval $(1) = $(2)))
+endef
+
+# Allow setting CC and AR, or setting CROSS_COMPILE as a prefix.
+$(call allow-override,CC,$(CROSS_COMPILE)gcc)
+$(call allow-override,AR,$(CROSS_COMPILE)ar)
+
+INSTALL = install
+
+# Use DESTDIR for installing into a different root directory.
+# This is useful for building a package. The program will be
+# installed in this directory as if it was the root directory.
+# Then the build tool can move it later.
+DESTDIR ?=
+DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))'
+
+prefix ?= /usr/local
+libdir_relative = lib
+libdir = $(prefix)/$(libdir_relative)
+bindir_relative = bin
+bindir = $(prefix)/$(bindir_relative)
+
+export DESTDIR DESTDIR_SQ INSTALL
+
+# copy a bit from Linux kbuild
+
+ifeq ("$(origin V)", "command line")
+ VERBOSE = $(V)
+endif
+ifndef VERBOSE
+ VERBOSE = 0
+endif
+
+ifeq ("$(origin O)", "command line")
+ BUILD_OUTPUT := $(O)
+endif
+
+ifeq ($(BUILD_SRC),)
+ifneq ($(BUILD_OUTPUT),)
+
+define build_output
+ $(if $(VERBOSE:1=),@)$(MAKE) -C $(BUILD_OUTPUT) \
+ BUILD_SRC=$(CURDIR) -f $(CURDIR)/Makefile $1
+endef
+
+saved-output := $(BUILD_OUTPUT)
+BUILD_OUTPUT := $(shell cd $(BUILD_OUTPUT) && /bin/pwd)
+$(if $(BUILD_OUTPUT),, \
+ $(error output directory "$(saved-output)" does not exist))
+
+all: sub-make
+
+gui: force
+ $(call build_output, all_cmd)
+
+$(filter-out gui,$(MAKECMDGOALS)): sub-make
+
+sub-make: force
+ $(call build_output, $(MAKECMDGOALS))
+
+
+# Leave processing to above invocation of make
+skip-makefile := 1
+
+endif # BUILD_OUTPUT
+endif # BUILD_SRC
+
+# We process the rest of the Makefile if this is the final invocation of make
+ifeq ($(skip-makefile),)
+
+srctree := $(realpath $(if $(BUILD_SRC),$(BUILD_SRC),$(CURDIR)))
+objtree := $(realpath $(CURDIR))
+src := $(srctree)
+obj := $(objtree)
+
+export prefix libdir bindir src obj
+
+# Shell quotes
+libdir_SQ = $(subst ','\'',$(libdir))
+bindir_SQ = $(subst ','\'',$(bindir))
+
+LIB_FILE = liblockdep.a liblockdep.so.$(LIBLOCKDEP_VERSION)
+BIN_FILE = lockdep
+
+CONFIG_INCLUDES =
+CONFIG_LIBS =
+CONFIG_FLAGS =
+
+OBJ = $@
+N =
+
+export Q VERBOSE
+
+INCLUDES = -I. -I/usr/local/include -I./uinclude -I./include -I../../include $(CONFIG_INCLUDES)
+
+# Set compile option CFLAGS if not set elsewhere
+CFLAGS ?= -g -DCONFIG_LOCKDEP -DCONFIG_STACKTRACE -DCONFIG_PROVE_LOCKING -DBITS_PER_LONG=__WORDSIZE -DLIBLOCKDEP_VERSION='"$(LIBLOCKDEP_VERSION)"' -rdynamic -O0 -g
+
+override CFLAGS += $(CONFIG_FLAGS) $(INCLUDES) $(PLUGIN_DIR_SQ)
+
+ifeq ($(VERBOSE),1)
+ Q =
+ print_compile =
+ print_app_build =
+ print_fpic_compile =
+ print_shared_lib_compile =
+ print_install =
+else
+ Q = @
+ print_compile = echo ' CC '$(OBJ);
+ print_app_build = echo ' BUILD '$(OBJ);
+ print_fpic_compile = echo ' CC FPIC '$(OBJ);
+ print_shared_lib_compile = echo ' BUILD SHARED LIB '$(OBJ);
+ print_static_lib_build = echo ' BUILD STATIC LIB '$(OBJ);
+ print_install = echo ' INSTALL '$1' to $(DESTDIR_SQ)$2';
+endif
+
+do_fpic_compile = \
+ ($(print_fpic_compile) \
+ $(CC) -c $(CFLAGS) $(EXT) -fPIC $< -o $@)
+
+do_app_build = \
+ ($(print_app_build) \
+ $(CC) $^ -rdynamic -o $@ $(CONFIG_LIBS) $(LIBS))
+
+do_compile_shared_library = \
+ ($(print_shared_lib_compile) \
+ $(CC) --shared $^ -o $@ -lpthread -ldl -Wl,-soname='"$@"';$(shell ln -s $@ liblockdep.so))
+
+do_build_static_lib = \
+ ($(print_static_lib_build) \
+ $(RM) $@; $(AR) rcs $@ $^)
+
+
+define do_compile
+ $(print_compile) \
+ $(CC) -c $(CFLAGS) $(EXT) $< -o $(obj)/$@;
+endef
+
+$(obj)/%.o: $(src)/%.c
+ $(Q)$(call do_compile)
+
+%.o: $(src)/%.c
+ $(Q)$(call do_compile)
+
+PEVENT_LIB_OBJS = common.o lockdep.o preload.o rbtree.o
+
+ALL_OBJS = $(PEVENT_LIB_OBJS)
+
+CMD_TARGETS = $(LIB_FILE)
+
+TARGETS = $(CMD_TARGETS)
+
+
+all: all_cmd
+
+all_cmd: $(CMD_TARGETS)
+
+liblockdep.so.$(LIBLOCKDEP_VERSION): $(PEVENT_LIB_OBJS)
+ $(Q)$(do_compile_shared_library)
+
+liblockdep.a: $(PEVENT_LIB_OBJS)
+ $(Q)$(do_build_static_lib)
+
+$(PEVENT_LIB_OBJS): %.o: $(src)/%.c
+ $(Q)$(do_fpic_compile)
+
+## make deps
+
+all_objs := $(sort $(ALL_OBJS))
+all_deps := $(all_objs:%.o=.%.d)
+
+# let .d file also depends on the source and header files
+define check_deps
+ @set -e; $(RM) $@; \
+ $(CC) -MM $(CFLAGS) $< > $@.$$$$; \
+ sed 's,\($*\)\.o[ :]*,\1.o $@ : ,g' < $@.$$$$ > $@; \
+ $(RM) $@.$$$$
+endef
+
+$(all_deps): .%.d: $(src)/%.c
+ $(Q)$(call check_deps)
+
+$(all_objs) : %.o : .%.d
+
+dep_includes := $(wildcard $(all_deps))
+
+ifneq ($(dep_includes),)
+ include $(dep_includes)
+endif
+
+### Detect environment changes
+TRACK_CFLAGS = $(subst ','\'',$(CFLAGS)):$(ARCH):$(CROSS_COMPILE)
+
+tags: force
+ $(RM) tags
+ find . -name '*.[ch]' | xargs ctags --extra=+f --c-kinds=+px \
+ --regex-c++='/_PE\(([^,)]*).*/PEVENT_ERRNO__\1/'
+
+TAGS: force
+ $(RM) TAGS
+ find . -name '*.[ch]' | xargs etags \
+ --regex='/_PE(\([^,)]*\).*/PEVENT_ERRNO__\1/'
+
+define do_install
+ $(print_install) \
+ if [ ! -d '$(DESTDIR_SQ)$2' ]; then \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \
+ fi; \
+ $(INSTALL) $1 '$(DESTDIR_SQ)$2'
+endef
+
+install_lib: all_cmd
+ $(Q)$(call do_install,$(LIB_FILE),$(libdir_SQ))
+ $(Q)$(call do_install,$(BIN_FILE),$(bindir_SQ))
+
+install: install_lib
+
+clean:
+ $(RM) *.o *~ $(TARGETS) *.a *liblockdep*.so* $(VERSION_FILES) .*.d
+ $(RM) tags TAGS
+
+endif # skip-makefile
+
+PHONY += force
+force:
+
+# Declare the contents of the .PHONY variable as phony. We keep that
+# information in a variable so we can use it in if_changed and friends.
+.PHONY: $(PHONY)
diff --git a/tools/lib/lockdep/common.c b/tools/lib/lockdep/common.c
new file mode 100644
index 00000000000..8ef602f18a3
--- /dev/null
+++ b/tools/lib/lockdep/common.c
@@ -0,0 +1,33 @@
+#include <stddef.h>
+#include <stdbool.h>
+#include <linux/compiler.h>
+#include <linux/lockdep.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+
+static __thread struct task_struct current_obj;
+
+/* lockdep wants these */
+bool debug_locks = true;
+bool debug_locks_silent;
+
+__attribute__((constructor)) static void liblockdep_init(void)
+{
+ lockdep_init();
+}
+
+__attribute__((destructor)) static void liblockdep_exit(void)
+{
+ debug_check_no_locks_held(&current_obj);
+}
+
+struct task_struct *__curr(void)
+{
+ if (current_obj.pid == 0) {
+ /* Makes lockdep output pretty */
+ prctl(PR_GET_NAME, current_obj.comm);
+ current_obj.pid = syscall(__NR_gettid);
+ }
+
+ return &current_obj;
+}
diff --git a/tools/lib/lockdep/include/liblockdep/common.h b/tools/lib/lockdep/include/liblockdep/common.h
new file mode 100644
index 00000000000..0bda630027c
--- /dev/null
+++ b/tools/lib/lockdep/include/liblockdep/common.h
@@ -0,0 +1,50 @@
+#ifndef _LIBLOCKDEP_COMMON_H
+#define _LIBLOCKDEP_COMMON_H
+
+#include <pthread.h>
+
+#define NR_LOCKDEP_CACHING_CLASSES 2
+#define MAX_LOCKDEP_SUBCLASSES 8UL
+
+#ifndef CALLER_ADDR0
+#define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0))
+#endif
+
+#ifndef _RET_IP_
+#define _RET_IP_ CALLER_ADDR0
+#endif
+
+#ifndef _THIS_IP_
+#define _THIS_IP_ ({ __label__ __here; __here: (unsigned long)&&__here; })
+#endif
+
+struct lockdep_subclass_key {
+ char __one_byte;
+};
+
+struct lock_class_key {
+ struct lockdep_subclass_key subkeys[MAX_LOCKDEP_SUBCLASSES];
+};
+
+struct lockdep_map {
+ struct lock_class_key *key;
+ struct lock_class *class_cache[NR_LOCKDEP_CACHING_CLASSES];
+ const char *name;
+#ifdef CONFIG_LOCK_STAT
+ int cpu;
+ unsigned long ip;
+#endif
+};
+
+void lockdep_init_map(struct lockdep_map *lock, const char *name,
+ struct lock_class_key *key, int subclass);
+void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
+ int trylock, int read, int check,
+ struct lockdep_map *nest_lock, unsigned long ip);
+void lock_release(struct lockdep_map *lock, int nested,
+ unsigned long ip);
+
+#define STATIC_LOCKDEP_MAP_INIT(_name, _key) \
+ { .name = (_name), .key = (void *)(_key), }
+
+#endif
diff --git a/tools/lib/lockdep/include/liblockdep/mutex.h b/tools/lib/lockdep/include/liblockdep/mutex.h
new file mode 100644
index 00000000000..ee53a42818c
--- /dev/null
+++ b/tools/lib/lockdep/include/liblockdep/mutex.h
@@ -0,0 +1,70 @@
+#ifndef _LIBLOCKDEP_MUTEX_H
+#define _LIBLOCKDEP_MUTEX_H
+
+#include <pthread.h>
+#include "common.h"
+
+struct liblockdep_pthread_mutex {
+ pthread_mutex_t mutex;
+ struct lockdep_map dep_map;
+};
+
+typedef struct liblockdep_pthread_mutex liblockdep_pthread_mutex_t;
+
+#define LIBLOCKDEP_PTHREAD_MUTEX_INITIALIZER(mtx) \
+ (const struct liblockdep_pthread_mutex) { \
+ .mutex = PTHREAD_MUTEX_INITIALIZER, \
+ .dep_map = STATIC_LOCKDEP_MAP_INIT(#mtx, &((&(mtx))->dep_map)), \
+}
+
+static inline int __mutex_init(liblockdep_pthread_mutex_t *lock,
+ const char *name,
+ struct lock_class_key *key,
+ const pthread_mutexattr_t *__mutexattr)
+{
+ lockdep_init_map(&lock->dep_map, name, key, 0);
+ return pthread_mutex_init(&lock->mutex, __mutexattr);
+}
+
+#define liblockdep_pthread_mutex_init(mutex, mutexattr) \
+({ \
+ static struct lock_class_key __key; \
+ \
+ __mutex_init((mutex), #mutex, &__key, (mutexattr)); \
+})
+
+static inline int liblockdep_pthread_mutex_lock(liblockdep_pthread_mutex_t *lock)
+{
+ lock_acquire(&lock->dep_map, 0, 0, 0, 1, NULL, (unsigned long)_RET_IP_);
+ return pthread_mutex_lock(&lock->mutex);
+}
+
+static inline int liblockdep_pthread_mutex_unlock(liblockdep_pthread_mutex_t *lock)
+{
+ lock_release(&lock->dep_map, 0, (unsigned long)_RET_IP_);
+ return pthread_mutex_unlock(&lock->mutex);
+}
+
+static inline int liblockdep_pthread_mutex_trylock(liblockdep_pthread_mutex_t *lock)
+{
+ lock_acquire(&lock->dep_map, 0, 1, 0, 1, NULL, (unsigned long)_RET_IP_);
+ return pthread_mutex_trylock(&lock->mutex) == 0 ? 1 : 0;
+}
+
+static inline int liblockdep_pthread_mutex_destroy(liblockdep_pthread_mutex_t *lock)
+{
+ return pthread_mutex_destroy(&lock->mutex);
+}
+
+#ifdef __USE_LIBLOCKDEP
+
+#define pthread_mutex_t liblockdep_pthread_mutex_t
+#define pthread_mutex_init liblockdep_pthread_mutex_init
+#define pthread_mutex_lock liblockdep_pthread_mutex_lock
+#define pthread_mutex_unlock liblockdep_pthread_mutex_unlock
+#define pthread_mutex_trylock liblockdep_pthread_mutex_trylock
+#define pthread_mutex_destroy liblockdep_pthread_mutex_destroy
+
+#endif
+
+#endif
diff --git a/tools/lib/lockdep/include/liblockdep/rwlock.h b/tools/lib/lockdep/include/liblockdep/rwlock.h
new file mode 100644
index 00000000000..4ec03f86155
--- /dev/null
+++ b/tools/lib/lockdep/include/liblockdep/rwlock.h
@@ -0,0 +1,86 @@
+#ifndef _LIBLOCKDEP_RWLOCK_H
+#define _LIBLOCKDEP_RWLOCK_H
+
+#include <pthread.h>
+#include "common.h"
+
+struct liblockdep_pthread_rwlock {
+ pthread_rwlock_t rwlock;
+ struct lockdep_map dep_map;
+};
+
+typedef struct liblockdep_pthread_rwlock liblockdep_pthread_rwlock_t;
+
+#define LIBLOCKDEP_PTHREAD_RWLOCK_INITIALIZER(rwl) \
+ (struct liblockdep_pthread_rwlock) { \
+ .rwlock = PTHREAD_RWLOCK_INITIALIZER, \
+ .dep_map = STATIC_LOCKDEP_MAP_INIT(#rwl, &((&(rwl))->dep_map)), \
+}
+
+static inline int __rwlock_init(liblockdep_pthread_rwlock_t *lock,
+ const char *name,
+ struct lock_class_key *key,
+ const pthread_rwlockattr_t *attr)
+{
+ lockdep_init_map(&lock->dep_map, name, key, 0);
+
+ return pthread_rwlock_init(&lock->rwlock, attr);
+}
+
+#define liblockdep_pthread_rwlock_init(lock, attr) \
+({ \
+ static struct lock_class_key __key; \
+ \
+ __rwlock_init((lock), #lock, &__key, (attr)); \
+})
+
+static inline int liblockdep_pthread_rwlock_rdlock(liblockdep_pthread_rwlock_t *lock)
+{
+ lock_acquire(&lock->dep_map, 0, 0, 2, 1, NULL, (unsigned long)_RET_IP_);
+ return pthread_rwlock_rdlock(&lock->rwlock);
+
+}
+
+static inline int liblockdep_pthread_rwlock_unlock(liblockdep_pthread_rwlock_t *lock)
+{
+ lock_release(&lock->dep_map, 0, (unsigned long)_RET_IP_);
+ return pthread_rwlock_unlock(&lock->rwlock);
+}
+
+static inline int liblockdep_pthread_rwlock_wrlock(liblockdep_pthread_rwlock_t *lock)
+{
+ lock_acquire(&lock->dep_map, 0, 0, 0, 1, NULL, (unsigned long)_RET_IP_);
+ return pthread_rwlock_wrlock(&lock->rwlock);
+}
+
+static inline int liblockdep_pthread_rwlock_tryrdlock(liblockdep_pthread_rwlock_t *lock)
+{
+ lock_acquire(&lock->dep_map, 0, 1, 2, 1, NULL, (unsigned long)_RET_IP_);
+ return pthread_rwlock_tryrdlock(&lock->rwlock) == 0 ? 1 : 0;
+}
+
+static inline int liblockdep_pthread_rwlock_trywlock(liblockdep_pthread_rwlock_t *lock)
+{
+ lock_acquire(&lock->dep_map, 0, 1, 0, 1, NULL, (unsigned long)_RET_IP_);
+ return pthread_rwlock_trywlock(&lock->rwlock) == 0 ? 1 : 0;
+}
+
+static inline int liblockdep_rwlock_destroy(liblockdep_pthread_rwlock_t *lock)
+{
+ return pthread_rwlock_destroy(&lock->rwlock);
+}
+
+#ifdef __USE_LIBLOCKDEP
+
+#define pthread_rwlock_t liblockdep_pthread_rwlock_t
+#define pthread_rwlock_init liblockdep_pthread_rwlock_init
+#define pthread_rwlock_rdlock liblockdep_pthread_rwlock_rdlock
+#define pthread_rwlock_unlock liblockdep_pthread_rwlock_unlock
+#define pthread_rwlock_wrlock liblockdep_pthread_rwlock_wrlock
+#define pthread_rwlock_tryrdlock liblockdep_pthread_rwlock_tryrdlock
+#define pthread_rwlock_trywlock liblockdep_pthread_rwlock_trywlock
+#define pthread_rwlock_destroy liblockdep_rwlock_destroy
+
+#endif
+
+#endif
diff --git a/tools/lib/lockdep/lockdep b/tools/lib/lockdep/lockdep
new file mode 100755
index 00000000000..49af9fe19f5
--- /dev/null
+++ b/tools/lib/lockdep/lockdep
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+LD_PRELOAD="./liblockdep.so $LD_PRELOAD" "$@"
diff --git a/tools/lib/lockdep/lockdep.c b/tools/lib/lockdep/lockdep.c
new file mode 100644
index 00000000000..f42b7e9aa48
--- /dev/null
+++ b/tools/lib/lockdep/lockdep.c
@@ -0,0 +1,2 @@
+#include <linux/lockdep.h>
+#include "../../../kernel/locking/lockdep.c"
diff --git a/tools/lib/lockdep/lockdep_internals.h b/tools/lib/lockdep/lockdep_internals.h
new file mode 100644
index 00000000000..29d0c954cc2
--- /dev/null
+++ b/tools/lib/lockdep/lockdep_internals.h
@@ -0,0 +1 @@
+#include "../../../kernel/locking/lockdep_internals.h"
diff --git a/tools/lib/lockdep/lockdep_states.h b/tools/lib/lockdep/lockdep_states.h
new file mode 100644
index 00000000000..248d235efda
--- /dev/null
+++ b/tools/lib/lockdep/lockdep_states.h
@@ -0,0 +1 @@
+#include "../../../kernel/locking/lockdep_states.h"
diff --git a/tools/lib/lockdep/preload.c b/tools/lib/lockdep/preload.c
new file mode 100644
index 00000000000..6f803609e49
--- /dev/null
+++ b/tools/lib/lockdep/preload.c
@@ -0,0 +1,445 @@
+#define _GNU_SOURCE
+#include <pthread.h>
+#include <stdio.h>
+#include <dlfcn.h>
+#include <stdlib.h>
+#include <sysexits.h>
+#include "include/liblockdep/mutex.h"
+#include "../../../include/linux/rbtree.h"
+
+/**
+ * struct lock_lookup - liblockdep's view of a single unique lock
+ * @orig: pointer to the original pthread lock, used for lookups
+ * @dep_map: lockdep's dep_map structure
+ * @key: lockdep's key structure
+ * @node: rb-tree node used to store the lock in a global tree
+ * @name: a unique name for the lock
+ */
+struct lock_lookup {
+ void *orig; /* Original pthread lock, used for lookups */
+ struct lockdep_map dep_map; /* Since all locks are dynamic, we need
+ * a dep_map and a key for each lock */
+ /*
+ * Wait, there's no support for key classes? Yup :(
+ * Most big projects wrap the pthread api with their own calls to
+ * be compatible with different locking methods. This means that
+ * "classes" will be brokes since the function that creates all
+ * locks will point to a generic locking function instead of the
+ * actual code that wants to do the locking.
+ */
+ struct lock_class_key key;
+ struct rb_node node;
+#define LIBLOCKDEP_MAX_LOCK_NAME 22
+ char name[LIBLOCKDEP_MAX_LOCK_NAME];
+};
+
+/* This is where we store our locks */
+static struct rb_root locks = RB_ROOT;
+static pthread_rwlock_t locks_rwlock = PTHREAD_RWLOCK_INITIALIZER;
+
+/* pthread mutex API */
+
+#ifdef __GLIBC__
+extern int __pthread_mutex_init(pthread_mutex_t *mutex, const pthread_mutexattr_t *attr);
+extern int __pthread_mutex_lock(pthread_mutex_t *mutex);
+extern int __pthread_mutex_trylock(pthread_mutex_t *mutex);
+extern int __pthread_mutex_unlock(pthread_mutex_t *mutex);
+extern int __pthread_mutex_destroy(pthread_mutex_t *mutex);
+#else
+#define __pthread_mutex_init NULL
+#define __pthread_mutex_lock NULL
+#define __pthread_mutex_trylock NULL
+#define __pthread_mutex_unlock NULL
+#define __pthread_mutex_destroy NULL
+#endif
+static int (*ll_pthread_mutex_init)(pthread_mutex_t *mutex,
+ const pthread_mutexattr_t *attr) = __pthread_mutex_init;
+static int (*ll_pthread_mutex_lock)(pthread_mutex_t *mutex) = __pthread_mutex_lock;
+static int (*ll_pthread_mutex_trylock)(pthread_mutex_t *mutex) = __pthread_mutex_trylock;
+static int (*ll_pthread_mutex_unlock)(pthread_mutex_t *mutex) = __pthread_mutex_unlock;
+static int (*ll_pthread_mutex_destroy)(pthread_mutex_t *mutex) = __pthread_mutex_destroy;
+
+/* pthread rwlock API */
+
+#ifdef __GLIBC__
+extern int __pthread_rwlock_init(pthread_rwlock_t *rwlock, const pthread_rwlockattr_t *attr);
+extern int __pthread_rwlock_destroy(pthread_rwlock_t *rwlock);
+extern int __pthread_rwlock_wrlock(pthread_rwlock_t *rwlock);
+extern int __pthread_rwlock_trywrlock(pthread_rwlock_t *rwlock);
+extern int __pthread_rwlock_rdlock(pthread_rwlock_t *rwlock);
+extern int __pthread_rwlock_tryrdlock(pthread_rwlock_t *rwlock);
+extern int __pthread_rwlock_unlock(pthread_rwlock_t *rwlock);
+#else
+#define __pthread_rwlock_init NULL
+#define __pthread_rwlock_destroy NULL
+#define __pthread_rwlock_wrlock NULL
+#define __pthread_rwlock_trywrlock NULL
+#define __pthread_rwlock_rdlock NULL
+#define __pthread_rwlock_tryrdlock NULL
+#define __pthread_rwlock_unlock NULL
+#endif
+
+static int (*ll_pthread_rwlock_init)(pthread_rwlock_t *rwlock,
+ const pthread_rwlockattr_t *attr) = __pthread_rwlock_init;
+static int (*ll_pthread_rwlock_destroy)(pthread_rwlock_t *rwlock) = __pthread_rwlock_destroy;
+static int (*ll_pthread_rwlock_rdlock)(pthread_rwlock_t *rwlock) = __pthread_rwlock_rdlock;
+static int (*ll_pthread_rwlock_tryrdlock)(pthread_rwlock_t *rwlock) = __pthread_rwlock_tryrdlock;
+static int (*ll_pthread_rwlock_trywrlock)(pthread_rwlock_t *rwlock) = __pthread_rwlock_trywrlock;
+static int (*ll_pthread_rwlock_wrlock)(pthread_rwlock_t *rwlock) = __pthread_rwlock_wrlock;
+static int (*ll_pthread_rwlock_unlock)(pthread_rwlock_t *rwlock) = __pthread_rwlock_unlock;
+
+enum { none, prepare, done, } __init_state;
+static void init_preload(void);
+static void try_init_preload(void)
+{
+ if (__init_state != done)
+ init_preload();
+}
+
+static struct rb_node **__get_lock_node(void *lock, struct rb_node **parent)
+{
+ struct rb_node **node = &locks.rb_node;
+ struct lock_lookup *l;
+
+ *parent = NULL;
+
+ while (*node) {
+ l = rb_entry(*node, struct lock_lookup, node);
+
+ *parent = *node;
+ if (lock < l->orig)
+ node = &l->node.rb_left;
+ else if (lock > l->orig)
+ node = &l->node.rb_right;
+ else
+ return node;
+ }
+
+ return node;
+}
+
+#ifndef LIBLOCKDEP_STATIC_ENTRIES
+#define LIBLOCKDEP_STATIC_ENTRIES 1024
+#endif
+
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+
+static struct lock_lookup __locks[LIBLOCKDEP_STATIC_ENTRIES];
+static int __locks_nr;
+
+static inline bool is_static_lock(struct lock_lookup *lock)
+{
+ return lock >= __locks && lock < __locks + ARRAY_SIZE(__locks);
+}
+
+static struct lock_lookup *alloc_lock(void)
+{
+ if (__init_state != done) {
+ /*
+ * Some programs attempt to initialize and use locks in their
+ * allocation path. This means that a call to malloc() would
+ * result in locks being initialized and locked.
+ *
+ * Why is it an issue for us? dlsym() below will try allocating
+ * to give us the original function. Since this allocation will
+ * result in a locking operations, we have to let pthread deal
+ * with it, but we can't! we don't have the pointer to the
+ * original API since we're inside dlsym() trying to get it
+ */
+
+ int idx = __locks_nr++;
+ if (idx >= ARRAY_SIZE(__locks)) {
+ fprintf(stderr,
+ "LOCKDEP error: insufficient LIBLOCKDEP_STATIC_ENTRIES\n");
+ exit(EX_UNAVAILABLE);
+ }
+ return __locks + idx;
+ }
+
+ return malloc(sizeof(struct lock_lookup));
+}
+
+static inline void free_lock(struct lock_lookup *lock)
+{
+ if (likely(!is_static_lock(lock)))
+ free(lock);
+}
+
+/**
+ * __get_lock - find or create a lock instance
+ * @lock: pointer to a pthread lock function
+ *
+ * Try to find an existing lock in the rbtree using the provided pointer. If
+ * one wasn't found - create it.
+ */
+static struct lock_lookup *__get_lock(void *lock)
+{
+ struct rb_node **node, *parent;
+ struct lock_lookup *l;
+
+ ll_pthread_rwlock_rdlock(&locks_rwlock);
+ node = __get_lock_node(lock, &parent);
+ ll_pthread_rwlock_unlock(&locks_rwlock);
+ if (*node) {
+ return rb_entry(*node, struct lock_lookup, node);
+ }
+
+ /* We didn't find the lock, let's create it */
+ l = alloc_lock();
+ if (l == NULL)
+ return NULL;
+
+ l->orig = lock;
+ /*
+ * Currently the name of the lock is the ptr value of the pthread lock,
+ * while not optimal, it makes debugging a bit easier.
+ *
+ * TODO: Get the real name of the lock using libdwarf
+ */
+ sprintf(l->name, "%p", lock);
+ lockdep_init_map(&l->dep_map, l->name, &l->key, 0);
+
+ ll_pthread_rwlock_wrlock(&locks_rwlock);
+ /* This might have changed since the last time we fetched it */
+ node = __get_lock_node(lock, &parent);
+ rb_link_node(&l->node, parent, node);
+ rb_insert_color(&l->node, &locks);
+ ll_pthread_rwlock_unlock(&locks_rwlock);
+
+ return l;
+}
+
+static void __del_lock(struct lock_lookup *lock)
+{
+ ll_pthread_rwlock_wrlock(&locks_rwlock);
+ rb_erase(&lock->node, &locks);
+ ll_pthread_rwlock_unlock(&locks_rwlock);
+ free_lock(lock);
+}
+
+int pthread_mutex_init(pthread_mutex_t *mutex,
+ const pthread_mutexattr_t *attr)
+{
+ int r;
+
+ /*
+ * We keep trying to init our preload module because there might be
+ * code in init sections that tries to touch locks before we are
+ * initialized, in that case we'll need to manually call preload
+ * to get us going.
+ *
+ * Funny enough, kernel's lockdep had the same issue, and used
+ * (almost) the same solution. See look_up_lock_class() in
+ * kernel/locking/lockdep.c for details.
+ */
+ try_init_preload();
+
+ r = ll_pthread_mutex_init(mutex, attr);
+ if (r == 0)
+ /*
+ * We do a dummy initialization here so that lockdep could
+ * warn us if something fishy is going on - such as
+ * initializing a held lock.
+ */
+ __get_lock(mutex);
+
+ return r;
+}
+
+int pthread_mutex_lock(pthread_mutex_t *mutex)
+{
+ int r;
+
+ try_init_preload();
+
+ lock_acquire(&__get_lock(mutex)->dep_map, 0, 0, 0, 1, NULL,
+ (unsigned long)_RET_IP_);
+ /*
+ * Here's the thing with pthread mutexes: unlike the kernel variant,
+ * they can fail.
+ *
+ * This means that the behaviour here is a bit different from what's
+ * going on in the kernel: there we just tell lockdep that we took the
+ * lock before actually taking it, but here we must deal with the case
+ * that locking failed.
+ *
+ * To do that we'll "release" the lock if locking failed - this way
+ * we'll get lockdep doing the correct checks when we try to take
+ * the lock, and if that fails - we'll be back to the correct
+ * state by releasing it.
+ */
+ r = ll_pthread_mutex_lock(mutex);
+ if (r)
+ lock_release(&__get_lock(mutex)->dep_map, 0, (unsigned long)_RET_IP_);
+
+ return r;
+}
+
+int pthread_mutex_trylock(pthread_mutex_t *mutex)
+{
+ int r;
+
+ try_init_preload();
+
+ lock_acquire(&__get_lock(mutex)->dep_map, 0, 1, 0, 1, NULL, (unsigned long)_RET_IP_);
+ r = ll_pthread_mutex_trylock(mutex);
+ if (r)
+ lock_release(&__get_lock(mutex)->dep_map, 0, (unsigned long)_RET_IP_);
+
+ return r;
+}
+
+int pthread_mutex_unlock(pthread_mutex_t *mutex)
+{
+ int r;
+
+ try_init_preload();
+
+ lock_release(&__get_lock(mutex)->dep_map, 0, (unsigned long)_RET_IP_);
+ /*
+ * Just like taking a lock, only in reverse!
+ *
+ * If we fail releasing the lock, tell lockdep we're holding it again.
+ */
+ r = ll_pthread_mutex_unlock(mutex);
+ if (r)
+ lock_acquire(&__get_lock(mutex)->dep_map, 0, 0, 0, 1, NULL, (unsigned long)_RET_IP_);
+
+ return r;
+}
+
+int pthread_mutex_destroy(pthread_mutex_t *mutex)
+{
+ try_init_preload();
+
+ /*
+ * Let's see if we're releasing a lock that's held.
+ *
+ * TODO: Hook into free() and add that check there as well.
+ */
+ debug_check_no_locks_freed(mutex, mutex + sizeof(*mutex));
+ __del_lock(__get_lock(mutex));
+ return ll_pthread_mutex_destroy(mutex);
+}
+
+/* This is the rwlock part, very similar to what happened with mutex above */
+int pthread_rwlock_init(pthread_rwlock_t *rwlock,
+ const pthread_rwlockattr_t *attr)
+{
+ int r;
+
+ try_init_preload();
+
+ r = ll_pthread_rwlock_init(rwlock, attr);
+ if (r == 0)
+ __get_lock(rwlock);
+
+ return r;
+}
+
+int pthread_rwlock_destroy(pthread_rwlock_t *rwlock)
+{
+ try_init_preload();
+
+ debug_check_no_locks_freed(rwlock, rwlock + sizeof(*rwlock));
+ __del_lock(__get_lock(rwlock));
+ return ll_pthread_rwlock_destroy(rwlock);
+}
+
+int pthread_rwlock_rdlock(pthread_rwlock_t *rwlock)
+{
+ int r;
+
+ init_preload();
+
+ lock_acquire(&__get_lock(rwlock)->dep_map, 0, 0, 2, 1, NULL, (unsigned long)_RET_IP_);
+ r = ll_pthread_rwlock_rdlock(rwlock);
+ if (r)
+ lock_release(&__get_lock(rwlock)->dep_map, 0, (unsigned long)_RET_IP_);
+
+ return r;
+}
+
+int pthread_rwlock_tryrdlock(pthread_rwlock_t *rwlock)
+{
+ int r;
+
+ init_preload();
+
+ lock_acquire(&__get_lock(rwlock)->dep_map, 0, 1, 2, 1, NULL, (unsigned long)_RET_IP_);
+ r = ll_pthread_rwlock_tryrdlock(rwlock);
+ if (r)
+ lock_release(&__get_lock(rwlock)->dep_map, 0, (unsigned long)_RET_IP_);
+
+ return r;
+}
+
+int pthread_rwlock_trywrlock(pthread_rwlock_t *rwlock)
+{
+ int r;
+
+ init_preload();
+
+ lock_acquire(&__get_lock(rwlock)->dep_map, 0, 1, 0, 1, NULL, (unsigned long)_RET_IP_);
+ r = ll_pthread_rwlock_trywrlock(rwlock);
+ if (r)
+ lock_release(&__get_lock(rwlock)->dep_map, 0, (unsigned long)_RET_IP_);
+
+ return r;
+}
+
+int pthread_rwlock_wrlock(pthread_rwlock_t *rwlock)
+{
+ int r;
+
+ init_preload();
+
+ lock_acquire(&__get_lock(rwlock)->dep_map, 0, 0, 0, 1, NULL, (unsigned long)_RET_IP_);
+ r = ll_pthread_rwlock_wrlock(rwlock);
+ if (r)
+ lock_release(&__get_lock(rwlock)->dep_map, 0, (unsigned long)_RET_IP_);
+
+ return r;
+}
+
+int pthread_rwlock_unlock(pthread_rwlock_t *rwlock)
+{
+ int r;
+
+ init_preload();
+
+ lock_release(&__get_lock(rwlock)->dep_map, 0, (unsigned long)_RET_IP_);
+ r = ll_pthread_rwlock_unlock(rwlock);
+ if (r)
+ lock_acquire(&__get_lock(rwlock)->dep_map, 0, 0, 0, 1, NULL, (unsigned long)_RET_IP_);
+
+ return r;
+}
+
+__attribute__((constructor)) static void init_preload(void)
+{
+ if (__init_state == done)
+ return;
+
+#ifndef __GLIBC__
+ __init_state = prepare;
+
+ ll_pthread_mutex_init = dlsym(RTLD_NEXT, "pthread_mutex_init");
+ ll_pthread_mutex_lock = dlsym(RTLD_NEXT, "pthread_mutex_lock");
+ ll_pthread_mutex_trylock = dlsym(RTLD_NEXT, "pthread_mutex_trylock");
+ ll_pthread_mutex_unlock = dlsym(RTLD_NEXT, "pthread_mutex_unlock");
+ ll_pthread_mutex_destroy = dlsym(RTLD_NEXT, "pthread_mutex_destroy");
+
+ ll_pthread_rwlock_init = dlsym(RTLD_NEXT, "pthread_rwlock_init");
+ ll_pthread_rwlock_destroy = dlsym(RTLD_NEXT, "pthread_rwlock_destroy");
+ ll_pthread_rwlock_rdlock = dlsym(RTLD_NEXT, "pthread_rwlock_rdlock");
+ ll_pthread_rwlock_tryrdlock = dlsym(RTLD_NEXT, "pthread_rwlock_tryrdlock");
+ ll_pthread_rwlock_wrlock = dlsym(RTLD_NEXT, "pthread_rwlock_wrlock");
+ ll_pthread_rwlock_trywrlock = dlsym(RTLD_NEXT, "pthread_rwlock_trywrlock");
+ ll_pthread_rwlock_unlock = dlsym(RTLD_NEXT, "pthread_rwlock_unlock");
+#endif
+
+ lockdep_init();
+
+ __init_state = done;
+}
diff --git a/tools/lib/lockdep/rbtree.c b/tools/lib/lockdep/rbtree.c
new file mode 100644
index 00000000000..f7f43033c8b
--- /dev/null
+++ b/tools/lib/lockdep/rbtree.c
@@ -0,0 +1 @@
+#include "../../../lib/rbtree.c"
diff --git a/tools/lib/lockdep/run_tests.sh b/tools/lib/lockdep/run_tests.sh
new file mode 100755
index 00000000000..5334ad9d39b
--- /dev/null
+++ b/tools/lib/lockdep/run_tests.sh
@@ -0,0 +1,27 @@
+#! /bin/bash
+
+make &> /dev/null
+
+for i in `ls tests/*.c`; do
+ testname=$(basename -s .c "$i")
+ gcc -o tests/$testname -pthread -lpthread $i liblockdep.a -Iinclude -D__USE_LIBLOCKDEP &> /dev/null
+ echo -ne "$testname... "
+ if [ $(timeout 1 ./tests/$testname | wc -l) -gt 0 ]; then
+ echo "PASSED!"
+ else
+ echo "FAILED!"
+ fi
+ rm tests/$testname
+done
+
+for i in `ls tests/*.c`; do
+ testname=$(basename -s .c "$i")
+ gcc -o tests/$testname -pthread -lpthread -Iinclude $i &> /dev/null
+ echo -ne "(PRELOAD) $testname... "
+ if [ $(timeout 1 ./lockdep ./tests/$testname | wc -l) -gt 0 ]; then
+ echo "PASSED!"
+ else
+ echo "FAILED!"
+ fi
+ rm tests/$testname
+done
diff --git a/tools/lib/lockdep/tests/AA.c b/tools/lib/lockdep/tests/AA.c
new file mode 100644
index 00000000000..0f782ff404a
--- /dev/null
+++ b/tools/lib/lockdep/tests/AA.c
@@ -0,0 +1,13 @@
+#include <liblockdep/mutex.h>
+
+void main(void)
+{
+ pthread_mutex_t a, b;
+
+ pthread_mutex_init(&a, NULL);
+ pthread_mutex_init(&b, NULL);
+
+ pthread_mutex_lock(&a);
+ pthread_mutex_lock(&b);
+ pthread_mutex_lock(&a);
+}
diff --git a/tools/lib/lockdep/tests/ABBA.c b/tools/lib/lockdep/tests/ABBA.c
new file mode 100644
index 00000000000..07f0e29d548
--- /dev/null
+++ b/tools/lib/lockdep/tests/ABBA.c
@@ -0,0 +1,13 @@
+#include <liblockdep/mutex.h>
+#include "common.h"
+
+void main(void)
+{
+ pthread_mutex_t a, b;
+
+ pthread_mutex_init(&a, NULL);
+ pthread_mutex_init(&b, NULL);
+
+ LOCK_UNLOCK_2(a, b);
+ LOCK_UNLOCK_2(b, a);
+}
diff --git a/tools/lib/lockdep/tests/ABBCCA.c b/tools/lib/lockdep/tests/ABBCCA.c
new file mode 100644
index 00000000000..843db09ac66
--- /dev/null
+++ b/tools/lib/lockdep/tests/ABBCCA.c
@@ -0,0 +1,15 @@
+#include <liblockdep/mutex.h>
+#include "common.h"
+
+void main(void)
+{
+ pthread_mutex_t a, b, c;
+
+ pthread_mutex_init(&a, NULL);
+ pthread_mutex_init(&b, NULL);
+ pthread_mutex_init(&c, NULL);
+
+ LOCK_UNLOCK_2(a, b);
+ LOCK_UNLOCK_2(b, c);
+ LOCK_UNLOCK_2(c, a);
+}
diff --git a/tools/lib/lockdep/tests/ABBCCDDA.c b/tools/lib/lockdep/tests/ABBCCDDA.c
new file mode 100644
index 00000000000..33620e268f8
--- /dev/null
+++ b/tools/lib/lockdep/tests/ABBCCDDA.c
@@ -0,0 +1,17 @@
+#include <liblockdep/mutex.h>
+#include "common.h"
+
+void main(void)
+{
+ pthread_mutex_t a, b, c, d;
+
+ pthread_mutex_init(&a, NULL);
+ pthread_mutex_init(&b, NULL);
+ pthread_mutex_init(&c, NULL);
+ pthread_mutex_init(&d, NULL);
+
+ LOCK_UNLOCK_2(a, b);
+ LOCK_UNLOCK_2(b, c);
+ LOCK_UNLOCK_2(c, d);
+ LOCK_UNLOCK_2(d, a);
+}
diff --git a/tools/lib/lockdep/tests/ABCABC.c b/tools/lib/lockdep/tests/ABCABC.c
new file mode 100644
index 00000000000..3fee51e3a68
--- /dev/null
+++ b/tools/lib/lockdep/tests/ABCABC.c
@@ -0,0 +1,15 @@
+#include <liblockdep/mutex.h>
+#include "common.h"
+
+void main(void)
+{
+ pthread_mutex_t a, b, c;
+
+ pthread_mutex_init(&a, NULL);
+ pthread_mutex_init(&b, NULL);
+ pthread_mutex_init(&c, NULL);
+
+ LOCK_UNLOCK_2(a, b);
+ LOCK_UNLOCK_2(c, a);
+ LOCK_UNLOCK_2(b, c);
+}
diff --git a/tools/lib/lockdep/tests/ABCDBCDA.c b/tools/lib/lockdep/tests/ABCDBCDA.c
new file mode 100644
index 00000000000..427ba562c75
--- /dev/null
+++ b/tools/lib/lockdep/tests/ABCDBCDA.c
@@ -0,0 +1,17 @@
+#include <liblockdep/mutex.h>
+#include "common.h"
+
+void main(void)
+{
+ pthread_mutex_t a, b, c, d;
+
+ pthread_mutex_init(&a, NULL);
+ pthread_mutex_init(&b, NULL);
+ pthread_mutex_init(&c, NULL);
+ pthread_mutex_init(&d, NULL);
+
+ LOCK_UNLOCK_2(a, b);
+ LOCK_UNLOCK_2(c, d);
+ LOCK_UNLOCK_2(b, c);
+ LOCK_UNLOCK_2(d, a);
+}
diff --git a/tools/lib/lockdep/tests/ABCDBDDA.c b/tools/lib/lockdep/tests/ABCDBDDA.c
new file mode 100644
index 00000000000..680c6cf3e91
--- /dev/null
+++ b/tools/lib/lockdep/tests/ABCDBDDA.c
@@ -0,0 +1,17 @@
+#include <liblockdep/mutex.h>
+#include "common.h"
+
+void main(void)
+{
+ pthread_mutex_t a, b, c, d;
+
+ pthread_mutex_init(&a, NULL);
+ pthread_mutex_init(&b, NULL);
+ pthread_mutex_init(&c, NULL);
+ pthread_mutex_init(&d, NULL);
+
+ LOCK_UNLOCK_2(a, b);
+ LOCK_UNLOCK_2(c, d);
+ LOCK_UNLOCK_2(b, d);
+ LOCK_UNLOCK_2(d, a);
+}
diff --git a/tools/lib/lockdep/tests/WW.c b/tools/lib/lockdep/tests/WW.c
new file mode 100644
index 00000000000..d44f77d7102
--- /dev/null
+++ b/tools/lib/lockdep/tests/WW.c
@@ -0,0 +1,13 @@
+#include <liblockdep/rwlock.h>
+
+void main(void)
+{
+ pthread_rwlock_t a, b;
+
+ pthread_rwlock_init(&a, NULL);
+ pthread_rwlock_init(&b, NULL);
+
+ pthread_rwlock_wrlock(&a);
+ pthread_rwlock_rdlock(&b);
+ pthread_rwlock_wrlock(&a);
+}
diff --git a/tools/lib/lockdep/tests/common.h b/tools/lib/lockdep/tests/common.h
new file mode 100644
index 00000000000..d89e94d47d8
--- /dev/null
+++ b/tools/lib/lockdep/tests/common.h
@@ -0,0 +1,12 @@
+#ifndef _LIBLOCKDEP_TEST_COMMON_H
+#define _LIBLOCKDEP_TEST_COMMON_H
+
+#define LOCK_UNLOCK_2(a, b) \
+ do { \
+ pthread_mutex_lock(&(a)); \
+ pthread_mutex_lock(&(b)); \
+ pthread_mutex_unlock(&(b)); \
+ pthread_mutex_unlock(&(a)); \
+ } while(0)
+
+#endif
diff --git a/tools/lib/lockdep/tests/unlock_balance.c b/tools/lib/lockdep/tests/unlock_balance.c
new file mode 100644
index 00000000000..0bc62de686f
--- /dev/null
+++ b/tools/lib/lockdep/tests/unlock_balance.c
@@ -0,0 +1,12 @@
+#include <liblockdep/mutex.h>
+
+void main(void)
+{
+ pthread_mutex_t a;
+
+ pthread_mutex_init(&a, NULL);
+
+ pthread_mutex_lock(&a);
+ pthread_mutex_unlock(&a);
+ pthread_mutex_unlock(&a);
+}
diff --git a/tools/lib/lockdep/uinclude/asm/hash.h b/tools/lib/lockdep/uinclude/asm/hash.h
new file mode 100644
index 00000000000..d82b170bb21
--- /dev/null
+++ b/tools/lib/lockdep/uinclude/asm/hash.h
@@ -0,0 +1,6 @@
+#ifndef __ASM_GENERIC_HASH_H
+#define __ASM_GENERIC_HASH_H
+
+/* Stub */
+
+#endif /* __ASM_GENERIC_HASH_H */
diff --git a/tools/lib/lockdep/uinclude/asm/hweight.h b/tools/lib/lockdep/uinclude/asm/hweight.h
new file mode 100644
index 00000000000..fab00ff936d
--- /dev/null
+++ b/tools/lib/lockdep/uinclude/asm/hweight.h
@@ -0,0 +1,3 @@
+
+/* empty file */
+
diff --git a/tools/lib/lockdep/uinclude/asm/sections.h b/tools/lib/lockdep/uinclude/asm/sections.h
new file mode 100644
index 00000000000..fab00ff936d
--- /dev/null
+++ b/tools/lib/lockdep/uinclude/asm/sections.h
@@ -0,0 +1,3 @@
+
+/* empty file */
+
diff --git a/tools/lib/lockdep/uinclude/linux/bitops.h b/tools/lib/lockdep/uinclude/linux/bitops.h
new file mode 100644
index 00000000000..fab00ff936d
--- /dev/null
+++ b/tools/lib/lockdep/uinclude/linux/bitops.h
@@ -0,0 +1,3 @@
+
+/* empty file */
+
diff --git a/tools/lib/lockdep/uinclude/linux/compiler.h b/tools/lib/lockdep/uinclude/linux/compiler.h
new file mode 100644
index 00000000000..7ac838a1f19
--- /dev/null
+++ b/tools/lib/lockdep/uinclude/linux/compiler.h
@@ -0,0 +1,7 @@
+#ifndef _LIBLOCKDEP_LINUX_COMPILER_H_
+#define _LIBLOCKDEP_LINUX_COMPILER_H_
+
+#define __used __attribute__((__unused__))
+#define unlikely
+
+#endif
diff --git a/tools/lib/lockdep/uinclude/linux/debug_locks.h b/tools/lib/lockdep/uinclude/linux/debug_locks.h
new file mode 100644
index 00000000000..f38eb64df79
--- /dev/null
+++ b/tools/lib/lockdep/uinclude/linux/debug_locks.h
@@ -0,0 +1,12 @@
+#ifndef _LIBLOCKDEP_DEBUG_LOCKS_H_
+#define _LIBLOCKDEP_DEBUG_LOCKS_H_
+
+#include <stddef.h>
+#include <linux/compiler.h>
+
+#define DEBUG_LOCKS_WARN_ON(x) (x)
+
+extern bool debug_locks;
+extern bool debug_locks_silent;
+
+#endif
diff --git a/tools/lib/lockdep/uinclude/linux/delay.h b/tools/lib/lockdep/uinclude/linux/delay.h
new file mode 100644
index 00000000000..fab00ff936d
--- /dev/null
+++ b/tools/lib/lockdep/uinclude/linux/delay.h
@@ -0,0 +1,3 @@
+
+/* empty file */
+
diff --git a/tools/lib/lockdep/uinclude/linux/ftrace.h b/tools/lib/lockdep/uinclude/linux/ftrace.h
new file mode 100644
index 00000000000..fab00ff936d
--- /dev/null
+++ b/tools/lib/lockdep/uinclude/linux/ftrace.h
@@ -0,0 +1,3 @@
+
+/* empty file */
+
diff --git a/tools/lib/lockdep/uinclude/linux/gfp.h b/tools/lib/lockdep/uinclude/linux/gfp.h
new file mode 100644
index 00000000000..fab00ff936d
--- /dev/null
+++ b/tools/lib/lockdep/uinclude/linux/gfp.h
@@ -0,0 +1,3 @@
+
+/* empty file */
+
diff --git a/tools/lib/lockdep/uinclude/linux/hardirq.h b/tools/lib/lockdep/uinclude/linux/hardirq.h
new file mode 100644
index 00000000000..c8f3f8f5872
--- /dev/null
+++ b/tools/lib/lockdep/uinclude/linux/hardirq.h
@@ -0,0 +1,11 @@
+#ifndef _LIBLOCKDEP_LINUX_HARDIRQ_H_
+#define _LIBLOCKDEP_LINUX_HARDIRQ_H_
+
+#define SOFTIRQ_BITS 0UL
+#define HARDIRQ_BITS 0UL
+#define SOFTIRQ_SHIFT 0UL
+#define HARDIRQ_SHIFT 0UL
+#define hardirq_count() 0UL
+#define softirq_count() 0UL
+
+#endif
diff --git a/tools/lib/lockdep/uinclude/linux/hash.h b/tools/lib/lockdep/uinclude/linux/hash.h
new file mode 100644
index 00000000000..0f8479858dc
--- /dev/null
+++ b/tools/lib/lockdep/uinclude/linux/hash.h
@@ -0,0 +1 @@
+#include "../../../include/linux/hash.h"
diff --git a/tools/lib/lockdep/uinclude/linux/interrupt.h b/tools/lib/lockdep/uinclude/linux/interrupt.h
new file mode 100644
index 00000000000..fab00ff936d
--- /dev/null
+++ b/tools/lib/lockdep/uinclude/linux/interrupt.h
@@ -0,0 +1,3 @@
+
+/* empty file */
+
diff --git a/tools/lib/lockdep/uinclude/linux/irqflags.h b/tools/lib/lockdep/uinclude/linux/irqflags.h
new file mode 100644
index 00000000000..6cc296f0fad
--- /dev/null
+++ b/tools/lib/lockdep/uinclude/linux/irqflags.h
@@ -0,0 +1,38 @@
+#ifndef _LIBLOCKDEP_LINUX_TRACE_IRQFLAGS_H_
+#define _LIBLOCKDEP_LINUX_TRACE_IRQFLAGS_H_
+
+# define trace_hardirq_context(p) 0
+# define trace_softirq_context(p) 0
+# define trace_hardirqs_enabled(p) 0
+# define trace_softirqs_enabled(p) 0
+# define trace_hardirq_enter() do { } while (0)
+# define trace_hardirq_exit() do { } while (0)
+# define lockdep_softirq_enter() do { } while (0)
+# define lockdep_softirq_exit() do { } while (0)
+# define INIT_TRACE_IRQFLAGS
+
+# define stop_critical_timings() do { } while (0)
+# define start_critical_timings() do { } while (0)
+
+#define raw_local_irq_disable() do { } while (0)
+#define raw_local_irq_enable() do { } while (0)
+#define raw_local_irq_save(flags) ((flags) = 0)
+#define raw_local_irq_restore(flags) do { } while (0)
+#define raw_local_save_flags(flags) ((flags) = 0)
+#define raw_irqs_disabled_flags(flags) do { } while (0)
+#define raw_irqs_disabled() 0
+#define raw_safe_halt()
+
+#define local_irq_enable() do { } while (0)
+#define local_irq_disable() do { } while (0)
+#define local_irq_save(flags) ((flags) = 0)
+#define local_irq_restore(flags) do { } while (0)
+#define local_save_flags(flags) ((flags) = 0)
+#define irqs_disabled() (1)
+#define irqs_disabled_flags(flags) (0)
+#define safe_halt() do { } while (0)
+
+#define trace_lock_release(x, y)
+#define trace_lock_acquire(a, b, c, d, e, f, g)
+
+#endif
diff --git a/tools/lib/lockdep/uinclude/linux/kallsyms.h b/tools/lib/lockdep/uinclude/linux/kallsyms.h
new file mode 100644
index 00000000000..b0f2dbdf1a1
--- /dev/null
+++ b/tools/lib/lockdep/uinclude/linux/kallsyms.h
@@ -0,0 +1,32 @@
+#ifndef _LIBLOCKDEP_LINUX_KALLSYMS_H_
+#define _LIBLOCKDEP_LINUX_KALLSYMS_H_
+
+#include <linux/kernel.h>
+#include <stdio.h>
+
+#define KSYM_NAME_LEN 128
+
+struct module;
+
+static inline const char *kallsyms_lookup(unsigned long addr,
+ unsigned long *symbolsize,
+ unsigned long *offset,
+ char **modname, char *namebuf)
+{
+ return NULL;
+}
+
+#include <execinfo.h>
+#include <stdlib.h>
+static inline void print_ip_sym(unsigned long ip)
+{
+ char **name;
+
+ name = backtrace_symbols((void **)&ip, 1);
+
+ printf("%s\n", *name);
+
+ free(name);
+}
+
+#endif
diff --git a/tools/lib/lockdep/uinclude/linux/kern_levels.h b/tools/lib/lockdep/uinclude/linux/kern_levels.h
new file mode 100644
index 00000000000..3b9bade2869
--- /dev/null
+++ b/tools/lib/lockdep/uinclude/linux/kern_levels.h
@@ -0,0 +1,25 @@
+#ifndef __KERN_LEVELS_H__
+#define __KERN_LEVELS_H__
+
+#define KERN_SOH "" /* ASCII Start Of Header */
+#define KERN_SOH_ASCII ''
+
+#define KERN_EMERG KERN_SOH "" /* system is unusable */
+#define KERN_ALERT KERN_SOH "" /* action must be taken immediately */
+#define KERN_CRIT KERN_SOH "" /* critical conditions */
+#define KERN_ERR KERN_SOH "" /* error conditions */
+#define KERN_WARNING KERN_SOH "" /* warning conditions */
+#define KERN_NOTICE KERN_SOH "" /* normal but significant condition */
+#define KERN_INFO KERN_SOH "" /* informational */
+#define KERN_DEBUG KERN_SOH "" /* debug-level messages */
+
+#define KERN_DEFAULT KERN_SOH "" /* the default kernel loglevel */
+
+/*
+ * Annotation for a "continued" line of log printout (only done after a
+ * line that had no enclosing \n). Only to be used by core/arch code
+ * during early bootup (a continued line is not SMP-safe otherwise).
+ */
+#define KERN_CONT ""
+
+#endif
diff --git a/tools/lib/lockdep/uinclude/linux/kernel.h b/tools/lib/lockdep/uinclude/linux/kernel.h
new file mode 100644
index 00000000000..a11e3c357be
--- /dev/null
+++ b/tools/lib/lockdep/uinclude/linux/kernel.h
@@ -0,0 +1,44 @@
+#ifndef _LIBLOCKDEP_LINUX_KERNEL_H_
+#define _LIBLOCKDEP_LINUX_KERNEL_H_
+
+#include <linux/export.h>
+#include <linux/types.h>
+#include <linux/rcu.h>
+#include <linux/hardirq.h>
+#include <linux/kern_levels.h>
+
+#ifndef container_of
+#define container_of(ptr, type, member) ({ \
+ const typeof(((type *)0)->member) * __mptr = (ptr); \
+ (type *)((char *)__mptr - offsetof(type, member)); })
+#endif
+
+#define max(x, y) ({ \
+ typeof(x) _max1 = (x); \
+ typeof(y) _max2 = (y); \
+ (void) (&_max1 == &_max2); \
+ _max1 > _max2 ? _max1 : _max2; })
+
+#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
+#define WARN_ON(x) (x)
+#define WARN_ON_ONCE(x) (x)
+#define likely(x) (x)
+#define WARN(x, y, z) (x)
+#define uninitialized_var(x) x
+#define __init
+#define noinline
+#define list_add_tail_rcu list_add_tail
+
+#ifndef CALLER_ADDR0
+#define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0))
+#endif
+
+#ifndef _RET_IP_
+#define _RET_IP_ CALLER_ADDR0
+#endif
+
+#ifndef _THIS_IP_
+#define _THIS_IP_ ({ __label__ __here; __here: (unsigned long)&&__here; })
+#endif
+
+#endif
diff --git a/tools/lib/lockdep/uinclude/linux/kmemcheck.h b/tools/lib/lockdep/uinclude/linux/kmemcheck.h
new file mode 100644
index 00000000000..94d598bc6ab
--- /dev/null
+++ b/tools/lib/lockdep/uinclude/linux/kmemcheck.h
@@ -0,0 +1,8 @@
+#ifndef _LIBLOCKDEP_LINUX_KMEMCHECK_H_
+#define _LIBLOCKDEP_LINUX_KMEMCHECK_H_
+
+static inline void kmemcheck_mark_initialized(void *address, unsigned int n)
+{
+}
+
+#endif
diff --git a/tools/lib/lockdep/uinclude/linux/linkage.h b/tools/lib/lockdep/uinclude/linux/linkage.h
new file mode 100644
index 00000000000..fab00ff936d
--- /dev/null
+++ b/tools/lib/lockdep/uinclude/linux/linkage.h
@@ -0,0 +1,3 @@
+
+/* empty file */
+
diff --git a/tools/lib/lockdep/uinclude/linux/list.h b/tools/lib/lockdep/uinclude/linux/list.h
new file mode 100644
index 00000000000..6e9ef31ed82
--- /dev/null
+++ b/tools/lib/lockdep/uinclude/linux/list.h
@@ -0,0 +1 @@
+#include "../../../include/linux/list.h"
diff --git a/tools/lib/lockdep/uinclude/linux/lockdep.h b/tools/lib/lockdep/uinclude/linux/lockdep.h
new file mode 100644
index 00000000000..c1552c28507
--- /dev/null
+++ b/tools/lib/lockdep/uinclude/linux/lockdep.h
@@ -0,0 +1,58 @@
+#ifndef _LIBLOCKDEP_LOCKDEP_H_
+#define _LIBLOCKDEP_LOCKDEP_H_
+
+#include <sys/prctl.h>
+#include <sys/syscall.h>
+#include <string.h>
+#include <limits.h>
+#include <linux/utsname.h>
+
+
+#define MAX_LOCK_DEPTH 2000UL
+
+#define asmlinkage
+#define __visible
+
+#include "../../../include/linux/lockdep.h"
+
+struct task_struct {
+ u64 curr_chain_key;
+ int lockdep_depth;
+ unsigned int lockdep_recursion;
+ struct held_lock held_locks[MAX_LOCK_DEPTH];
+ gfp_t lockdep_reclaim_gfp;
+ int pid;
+ char comm[17];
+};
+
+extern struct task_struct *__curr(void);
+
+#define current (__curr())
+
+#define debug_locks_off() 1
+#define task_pid_nr(tsk) ((tsk)->pid)
+
+#define KSYM_NAME_LEN 128
+#define printk printf
+
+#define list_del_rcu list_del
+
+#define atomic_t unsigned long
+#define atomic_inc(x) ((*(x))++)
+
+static struct new_utsname *init_utsname(void)
+{
+ static struct new_utsname n = (struct new_utsname) {
+ .release = "liblockdep",
+ .version = LIBLOCKDEP_VERSION,
+ };
+
+ return &n;
+}
+
+#define print_tainted() ""
+#define static_obj(x) 1
+
+#define debug_show_all_locks()
+
+#endif
diff --git a/tools/lib/lockdep/uinclude/linux/module.h b/tools/lib/lockdep/uinclude/linux/module.h
new file mode 100644
index 00000000000..09c7a7be8cc
--- /dev/null
+++ b/tools/lib/lockdep/uinclude/linux/module.h
@@ -0,0 +1,6 @@
+#ifndef _LIBLOCKDEP_LINUX_MODULE_H_
+#define _LIBLOCKDEP_LINUX_MODULE_H_
+
+#define module_param(name, type, perm)
+
+#endif
diff --git a/tools/lib/lockdep/uinclude/linux/mutex.h b/tools/lib/lockdep/uinclude/linux/mutex.h
new file mode 100644
index 00000000000..fab00ff936d
--- /dev/null
+++ b/tools/lib/lockdep/uinclude/linux/mutex.h
@@ -0,0 +1,3 @@
+
+/* empty file */
+
diff --git a/tools/lib/lockdep/uinclude/linux/poison.h b/tools/lib/lockdep/uinclude/linux/poison.h
new file mode 100644
index 00000000000..0c27bdf1423
--- /dev/null
+++ b/tools/lib/lockdep/uinclude/linux/poison.h
@@ -0,0 +1 @@
+#include "../../../include/linux/poison.h"
diff --git a/tools/lib/lockdep/uinclude/linux/prefetch.h b/tools/lib/lockdep/uinclude/linux/prefetch.h
new file mode 100644
index 00000000000..d73fe6f850a
--- /dev/null
+++ b/tools/lib/lockdep/uinclude/linux/prefetch.h
@@ -0,0 +1,6 @@
+#ifndef _LIBLOCKDEP_LINUX_PREFETCH_H_
+#define _LIBLOCKDEP_LINUX_PREFETCH_H
+
+static inline void prefetch(void *a __attribute__((unused))) { }
+
+#endif
diff --git a/tools/lib/lockdep/uinclude/linux/proc_fs.h b/tools/lib/lockdep/uinclude/linux/proc_fs.h
new file mode 100644
index 00000000000..fab00ff936d
--- /dev/null
+++ b/tools/lib/lockdep/uinclude/linux/proc_fs.h
@@ -0,0 +1,3 @@
+
+/* empty file */
+
diff --git a/tools/lib/lockdep/uinclude/linux/rbtree.h b/tools/lib/lockdep/uinclude/linux/rbtree.h
new file mode 100644
index 00000000000..965901db486
--- /dev/null
+++ b/tools/lib/lockdep/uinclude/linux/rbtree.h
@@ -0,0 +1 @@
+#include "../../../include/linux/rbtree.h"
diff --git a/tools/lib/lockdep/uinclude/linux/rbtree_augmented.h b/tools/lib/lockdep/uinclude/linux/rbtree_augmented.h
new file mode 100644
index 00000000000..c3759477379
--- /dev/null
+++ b/tools/lib/lockdep/uinclude/linux/rbtree_augmented.h
@@ -0,0 +1,2 @@
+#define __always_inline
+#include "../../../include/linux/rbtree_augmented.h"
diff --git a/tools/lib/lockdep/uinclude/linux/rcu.h b/tools/lib/lockdep/uinclude/linux/rcu.h
new file mode 100644
index 00000000000..042ee8e463c
--- /dev/null
+++ b/tools/lib/lockdep/uinclude/linux/rcu.h
@@ -0,0 +1,21 @@
+#ifndef _LIBLOCKDEP_RCU_H_
+#define _LIBLOCKDEP_RCU_H_
+
+int rcu_scheduler_active;
+
+static inline int rcu_lockdep_current_cpu_online(void)
+{
+ return 1;
+}
+
+static inline int rcu_is_cpu_idle(void)
+{
+ return 1;
+}
+
+static inline bool rcu_is_watching(void)
+{
+ return false;
+}
+
+#endif
diff --git a/tools/lib/lockdep/uinclude/linux/seq_file.h b/tools/lib/lockdep/uinclude/linux/seq_file.h
new file mode 100644
index 00000000000..fab00ff936d
--- /dev/null
+++ b/tools/lib/lockdep/uinclude/linux/seq_file.h
@@ -0,0 +1,3 @@
+
+/* empty file */
+
diff --git a/tools/lib/lockdep/uinclude/linux/spinlock.h b/tools/lib/lockdep/uinclude/linux/spinlock.h
new file mode 100644
index 00000000000..68c1aa2bcba
--- /dev/null
+++ b/tools/lib/lockdep/uinclude/linux/spinlock.h
@@ -0,0 +1,25 @@
+#ifndef _LIBLOCKDEP_SPINLOCK_H_
+#define _LIBLOCKDEP_SPINLOCK_H_
+
+#include <pthread.h>
+#include <stdbool.h>
+
+#define arch_spinlock_t pthread_mutex_t
+#define __ARCH_SPIN_LOCK_UNLOCKED PTHREAD_MUTEX_INITIALIZER
+
+static inline void arch_spin_lock(arch_spinlock_t *mutex)
+{
+ pthread_mutex_lock(mutex);
+}
+
+static inline void arch_spin_unlock(arch_spinlock_t *mutex)
+{
+ pthread_mutex_unlock(mutex);
+}
+
+static inline bool arch_spin_is_locked(arch_spinlock_t *mutex)
+{
+ return true;
+}
+
+#endif
diff --git a/tools/lib/lockdep/uinclude/linux/stacktrace.h b/tools/lib/lockdep/uinclude/linux/stacktrace.h
new file mode 100644
index 00000000000..39aecc6b19d
--- /dev/null
+++ b/tools/lib/lockdep/uinclude/linux/stacktrace.h
@@ -0,0 +1,32 @@
+#ifndef _LIBLOCKDEP_LINUX_STACKTRACE_H_
+#define _LIBLOCKDEP_LINUX_STACKTRACE_H_
+
+#include <execinfo.h>
+
+struct stack_trace {
+ unsigned int nr_entries, max_entries;
+ unsigned long *entries;
+ int skip;
+};
+
+static inline void print_stack_trace(struct stack_trace *trace, int spaces)
+{
+ backtrace_symbols_fd((void **)trace->entries, trace->nr_entries, 1);
+}
+
+#define save_stack_trace(trace) \
+ ((trace)->nr_entries = \
+ backtrace((void **)(trace)->entries, (trace)->max_entries))
+
+static inline int dump_stack(void)
+{
+ void *array[64];
+ size_t size;
+
+ size = backtrace(array, 64);
+ backtrace_symbols_fd(array, size, 1);
+
+ return 0;
+}
+
+#endif
diff --git a/tools/lib/lockdep/uinclude/linux/stringify.h b/tools/lib/lockdep/uinclude/linux/stringify.h
new file mode 100644
index 00000000000..05dfcd1ac11
--- /dev/null
+++ b/tools/lib/lockdep/uinclude/linux/stringify.h
@@ -0,0 +1,7 @@
+#ifndef _LIBLOCKDEP_LINUX_STRINGIFY_H_
+#define _LIBLOCKDEP_LINUX_STRINGIFY_H_
+
+#define __stringify_1(x...) #x
+#define __stringify(x...) __stringify_1(x)
+
+#endif
diff --git a/tools/lib/lockdep/uinclude/trace/events/lock.h b/tools/lib/lockdep/uinclude/trace/events/lock.h
new file mode 100644
index 00000000000..fab00ff936d
--- /dev/null
+++ b/tools/lib/lockdep/uinclude/trace/events/lock.h
@@ -0,0 +1,3 @@
+
+/* empty file */
+
diff --git a/tools/lib/symbol/kallsyms.c b/tools/lib/symbol/kallsyms.c
new file mode 100644
index 00000000000..18bc271a4bb
--- /dev/null
+++ b/tools/lib/symbol/kallsyms.c
@@ -0,0 +1,58 @@
+#include "symbol/kallsyms.h"
+#include <stdio.h>
+#include <stdlib.h>
+
+int kallsyms__parse(const char *filename, void *arg,
+ int (*process_symbol)(void *arg, const char *name,
+ char type, u64 start))
+{
+ char *line = NULL;
+ size_t n;
+ int err = -1;
+ FILE *file = fopen(filename, "r");
+
+ if (file == NULL)
+ goto out_failure;
+
+ err = 0;
+
+ while (!feof(file)) {
+ u64 start;
+ int line_len, len;
+ char symbol_type;
+ char *symbol_name;
+
+ line_len = getline(&line, &n, file);
+ if (line_len < 0 || !line)
+ break;
+
+ line[--line_len] = '\0'; /* \n */
+
+ len = hex2u64(line, &start);
+
+ len++;
+ if (len + 2 >= line_len)
+ continue;
+
+ symbol_type = line[len];
+ len += 2;
+ symbol_name = line + len;
+ len = line_len - len;
+
+ if (len >= KSYM_NAME_LEN) {
+ err = -1;
+ break;
+ }
+
+ err = process_symbol(arg, symbol_name, symbol_type, start);
+ if (err)
+ break;
+ }
+
+ free(line);
+ fclose(file);
+ return err;
+
+out_failure:
+ return -1;
+}
diff --git a/tools/lib/symbol/kallsyms.h b/tools/lib/symbol/kallsyms.h
new file mode 100644
index 00000000000..6084f5e18b3
--- /dev/null
+++ b/tools/lib/symbol/kallsyms.h
@@ -0,0 +1,24 @@
+#ifndef __TOOLS_KALLSYMS_H_
+#define __TOOLS_KALLSYMS_H_ 1
+
+#include <elf.h>
+#include <linux/ctype.h>
+#include <linux/types.h>
+
+#ifndef KSYM_NAME_LEN
+#define KSYM_NAME_LEN 256
+#endif
+
+static inline u8 kallsyms2elf_type(char type)
+{
+ if (type == 'W')
+ return STB_WEAK;
+
+ return isupper(type) ? STB_GLOBAL : STB_LOCAL;
+}
+
+int kallsyms__parse(const char *filename, void *arg,
+ int (*process_symbol)(void *arg, const char *name,
+ char type, u64 start));
+
+#endif /* __TOOLS_KALLSYMS_H_ */
diff --git a/tools/lib/traceevent/.gitignore b/tools/lib/traceevent/.gitignore
new file mode 100644
index 00000000000..35f56be5a4c
--- /dev/null
+++ b/tools/lib/traceevent/.gitignore
@@ -0,0 +1 @@
+TRACEEVENT-CFLAGS
diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile
new file mode 100644
index 00000000000..005c9cc0693
--- /dev/null
+++ b/tools/lib/traceevent/Makefile
@@ -0,0 +1,340 @@
+# trace-cmd version
+EP_VERSION = 1
+EP_PATCHLEVEL = 1
+EP_EXTRAVERSION = 0
+
+# file format version
+FILE_VERSION = 6
+
+MAKEFLAGS += --no-print-directory
+
+
+# Makefiles suck: This macro sets a default value of $(2) for the
+# variable named by $(1), unless the variable has been set by
+# environment or command line. This is necessary for CC and AR
+# because make sets default values, so the simpler ?= approach
+# won't work as expected.
+define allow-override
+ $(if $(or $(findstring environment,$(origin $(1))),\
+ $(findstring command line,$(origin $(1)))),,\
+ $(eval $(1) = $(2)))
+endef
+
+# Allow setting CC and AR, or setting CROSS_COMPILE as a prefix.
+$(call allow-override,CC,$(CROSS_COMPILE)gcc)
+$(call allow-override,AR,$(CROSS_COMPILE)ar)
+
+EXT = -std=gnu99
+INSTALL = install
+
+# Use DESTDIR for installing into a different root directory.
+# This is useful for building a package. The program will be
+# installed in this directory as if it was the root directory.
+# Then the build tool can move it later.
+DESTDIR ?=
+DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))'
+
+prefix ?= /usr/local
+bindir_relative = bin
+bindir = $(prefix)/$(bindir_relative)
+man_dir = $(prefix)/share/man
+man_dir_SQ = '$(subst ','\'',$(man_dir))'
+
+export man_dir man_dir_SQ INSTALL
+export DESTDIR DESTDIR_SQ
+
+set_plugin_dir := 1
+
+# Set plugin_dir to preffered global plugin location
+# If we install under $HOME directory we go under
+# $(HOME)/.traceevent/plugins
+#
+# We dont set PLUGIN_DIR in case we install under $HOME
+# directory, because by default the code looks under:
+# $(HOME)/.traceevent/plugins by default.
+#
+ifeq ($(plugin_dir),)
+ifeq ($(prefix),$(HOME))
+override plugin_dir = $(HOME)/.traceevent/plugins
+set_plugin_dir := 0
+else
+override plugin_dir = $(prefix)/lib/traceevent/plugins
+endif
+endif
+
+ifeq ($(set_plugin_dir),1)
+PLUGIN_DIR = -DPLUGIN_DIR="$(plugin_dir)"
+PLUGIN_DIR_SQ = '$(subst ','\'',$(PLUGIN_DIR))'
+endif
+
+include $(if $(BUILD_SRC),$(BUILD_SRC)/)../../scripts/Makefile.include
+
+# copy a bit from Linux kbuild
+
+ifeq ("$(origin V)", "command line")
+ VERBOSE = $(V)
+endif
+ifndef VERBOSE
+ VERBOSE = 0
+endif
+
+ifeq ("$(origin O)", "command line")
+ BUILD_OUTPUT := $(O)
+endif
+
+ifeq ($(BUILD_SRC),)
+ifneq ($(OUTPUT),)
+
+define build_output
+ $(if $(VERBOSE:1=),@)+$(MAKE) -C $(OUTPUT) \
+ BUILD_SRC=$(CURDIR)/ -f $(CURDIR)/Makefile $1
+endef
+
+all: sub-make
+
+$(MAKECMDGOALS): sub-make
+
+sub-make: force
+ $(call build_output, $(MAKECMDGOALS))
+
+
+# Leave processing to above invocation of make
+skip-makefile := 1
+
+endif # OUTPUT
+endif # BUILD_SRC
+
+# We process the rest of the Makefile if this is the final invocation of make
+ifeq ($(skip-makefile),)
+
+srctree := $(if $(BUILD_SRC),$(BUILD_SRC),$(CURDIR))
+objtree := $(CURDIR)
+src := $(srctree)
+obj := $(objtree)
+
+export prefix bindir src obj
+
+# Shell quotes
+bindir_SQ = $(subst ','\'',$(bindir))
+bindir_relative_SQ = $(subst ','\'',$(bindir_relative))
+plugin_dir_SQ = $(subst ','\'',$(plugin_dir))
+
+LIB_FILE = libtraceevent.a libtraceevent.so
+
+CONFIG_INCLUDES =
+CONFIG_LIBS =
+CONFIG_FLAGS =
+
+VERSION = $(EP_VERSION)
+PATCHLEVEL = $(EP_PATCHLEVEL)
+EXTRAVERSION = $(EP_EXTRAVERSION)
+
+OBJ = $@
+N =
+
+export Q VERBOSE
+
+EVENT_PARSE_VERSION = $(EP_VERSION).$(EP_PATCHLEVEL).$(EP_EXTRAVERSION)
+
+INCLUDES = -I. -I $(srctree)/../../include $(CONFIG_INCLUDES)
+
+# Set compile option CFLAGS if not set elsewhere
+CFLAGS ?= -g -Wall
+
+# Append required CFLAGS
+override CFLAGS += $(CONFIG_FLAGS) $(INCLUDES) $(PLUGIN_DIR_SQ)
+override CFLAGS += $(udis86-flags) -D_GNU_SOURCE
+
+ifeq ($(VERBOSE),1)
+ Q =
+else
+ Q = @
+endif
+
+do_compile_shared_library = \
+ ($(print_shared_lib_compile) \
+ $(CC) --shared $^ -o $@)
+
+do_plugin_build = \
+ ($(print_plugin_build) \
+ $(CC) $(CFLAGS) -shared -nostartfiles -o $@ $<)
+
+do_build_static_lib = \
+ ($(print_static_lib_build) \
+ $(RM) $@; $(AR) rcs $@ $^)
+
+
+do_compile = $(QUIET_CC)$(CC) -c $(CFLAGS) $(EXT) $< -o $(obj)/$@;
+
+$(obj)/%.o: $(src)/%.c
+ $(call do_compile)
+
+%.o: $(src)/%.c
+ $(call do_compile)
+
+PEVENT_LIB_OBJS = event-parse.o
+PEVENT_LIB_OBJS += event-plugin.o
+PEVENT_LIB_OBJS += trace-seq.o
+PEVENT_LIB_OBJS += parse-filter.o
+PEVENT_LIB_OBJS += parse-utils.o
+PEVENT_LIB_OBJS += kbuffer-parse.o
+
+PLUGIN_OBJS = plugin_jbd2.o
+PLUGIN_OBJS += plugin_hrtimer.o
+PLUGIN_OBJS += plugin_kmem.o
+PLUGIN_OBJS += plugin_kvm.o
+PLUGIN_OBJS += plugin_mac80211.o
+PLUGIN_OBJS += plugin_sched_switch.o
+PLUGIN_OBJS += plugin_function.o
+PLUGIN_OBJS += plugin_xen.o
+PLUGIN_OBJS += plugin_scsi.o
+PLUGIN_OBJS += plugin_cfg80211.o
+
+PLUGINS := $(PLUGIN_OBJS:.o=.so)
+
+ALL_OBJS = $(PEVENT_LIB_OBJS) $(PLUGIN_OBJS)
+
+CMD_TARGETS = $(LIB_FILE) $(PLUGINS)
+
+TARGETS = $(CMD_TARGETS)
+
+
+all: all_cmd
+
+all_cmd: $(CMD_TARGETS)
+
+libtraceevent.so: $(PEVENT_LIB_OBJS)
+ $(QUIET_LINK)$(CC) --shared $^ -o $@
+
+libtraceevent.a: $(PEVENT_LIB_OBJS)
+ $(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^
+
+plugins: $(PLUGINS)
+
+$(PEVENT_LIB_OBJS): %.o: $(src)/%.c TRACEEVENT-CFLAGS
+ $(QUIET_CC_FPIC)$(CC) -c $(CFLAGS) $(EXT) -fPIC $< -o $@
+
+$(PLUGIN_OBJS): %.o : $(src)/%.c
+ $(QUIET_CC_FPIC)$(CC) -c $(CFLAGS) -fPIC -o $@ $<
+
+$(PLUGINS): %.so: %.o
+ $(QUIET_LINK)$(CC) $(CFLAGS) -shared -nostartfiles -o $@ $<
+
+define make_version.h
+ (echo '/* This file is automatically generated. Do not modify. */'; \
+ echo \#define VERSION_CODE $(shell \
+ expr $(VERSION) \* 256 + $(PATCHLEVEL)); \
+ echo '#define EXTRAVERSION ' $(EXTRAVERSION); \
+ echo '#define VERSION_STRING "'$(VERSION).$(PATCHLEVEL).$(EXTRAVERSION)'"'; \
+ echo '#define FILE_VERSION '$(FILE_VERSION); \
+ ) > $1
+endef
+
+define update_version.h
+ ($(call make_version.h, $@.tmp); \
+ if [ -r $@ ] && cmp -s $@ $@.tmp; then \
+ rm -f $@.tmp; \
+ else \
+ echo ' UPDATE $@'; \
+ mv -f $@.tmp $@; \
+ fi);
+endef
+
+ep_version.h: force
+ $(Q)$(N)$(call update_version.h)
+
+VERSION_FILES = ep_version.h
+
+define update_dir
+ (echo $1 > $@.tmp; \
+ if [ -r $@ ] && cmp -s $@ $@.tmp; then \
+ rm -f $@.tmp; \
+ else \
+ echo ' UPDATE $@'; \
+ mv -f $@.tmp $@; \
+ fi);
+endef
+
+## make deps
+
+all_objs := $(sort $(ALL_OBJS))
+all_deps := $(all_objs:%.o=.%.d)
+
+# let .d file also depends on the source and header files
+define check_deps
+ @set -e; $(RM) $@; \
+ $(CC) -MM $(CFLAGS) $< > $@.$$$$; \
+ sed 's,\($*\)\.o[ :]*,\1.o $@ : ,g' < $@.$$$$ > $@; \
+ $(RM) $@.$$$$
+endef
+
+$(all_deps): .%.d: $(src)/%.c
+ $(Q)$(call check_deps)
+
+$(all_objs) : %.o : .%.d
+
+dep_includes := $(wildcard $(all_deps))
+
+ifneq ($(dep_includes),)
+ include $(dep_includes)
+endif
+
+### Detect environment changes
+TRACK_CFLAGS = $(subst ','\'',$(CFLAGS)):$(ARCH):$(CROSS_COMPILE)
+
+TRACEEVENT-CFLAGS: force
+ @FLAGS='$(TRACK_CFLAGS)'; \
+ if test x"$$FLAGS" != x"`cat TRACEEVENT-CFLAGS 2>/dev/null`" ; then \
+ echo 1>&2 " FLAGS: * new build flags or cross compiler"; \
+ echo "$$FLAGS" >TRACEEVENT-CFLAGS; \
+ fi
+
+tags: force
+ $(RM) tags
+ find . -name '*.[ch]' | xargs ctags --extra=+f --c-kinds=+px \
+ --regex-c++='/_PE\(([^,)]*).*/PEVENT_ERRNO__\1/'
+
+TAGS: force
+ $(RM) TAGS
+ find . -name '*.[ch]' | xargs etags \
+ --regex='/_PE(\([^,)]*\).*/PEVENT_ERRNO__\1/'
+
+define do_install
+ if [ ! -d '$(DESTDIR_SQ)$2' ]; then \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \
+ fi; \
+ $(INSTALL) $1 '$(DESTDIR_SQ)$2'
+endef
+
+define do_install_plugins
+ for plugin in $1; do \
+ $(call do_install,$$plugin,$(plugin_dir_SQ)); \
+ done
+endef
+
+install_lib: all_cmd install_plugins
+ $(call QUIET_INSTALL, $(LIB_FILE)) \
+ $(call do_install,$(LIB_FILE),$(bindir_SQ))
+
+install_plugins: $(PLUGINS)
+ $(call QUIET_INSTALL, trace_plugins) \
+ $(call do_install_plugins, $(PLUGINS))
+
+install: install_lib
+
+clean:
+ $(call QUIET_CLEAN, libtraceevent) \
+ $(RM) *.o *~ $(TARGETS) *.a *.so $(VERSION_FILES) .*.d \
+ $(RM) TRACEEVENT-CFLAGS tags TAGS
+
+endif # skip-makefile
+
+PHONY += force plugins
+force:
+
+plugins:
+ @echo > /dev/null
+
+# Declare the contents of the .PHONY variable as phony. We keep that
+# information in a variable so we can use it in if_changed and friends.
+.PHONY: $(PHONY)
diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
new file mode 100644
index 00000000000..93825a17dcc
--- /dev/null
+++ b/tools/lib/traceevent/event-parse.c
@@ -0,0 +1,6025 @@
+/*
+ * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License (not later!)
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * The parts for function graph printing was taken and modified from the
+ * Linux Kernel that were written by
+ * - Copyright (C) 2009 Frederic Weisbecker,
+ * Frederic Weisbecker gave his permission to relicense the code to
+ * the Lesser General Public License.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <ctype.h>
+#include <errno.h>
+#include <stdint.h>
+#include <limits.h>
+
+#include "event-parse.h"
+#include "event-utils.h"
+
+static const char *input_buf;
+static unsigned long long input_buf_ptr;
+static unsigned long long input_buf_siz;
+
+static int is_flag_field;
+static int is_symbolic_field;
+
+static int show_warning = 1;
+
+#define do_warning(fmt, ...) \
+ do { \
+ if (show_warning) \
+ warning(fmt, ##__VA_ARGS__); \
+ } while (0)
+
+#define do_warning_event(event, fmt, ...) \
+ do { \
+ if (!show_warning) \
+ continue; \
+ \
+ if (event) \
+ warning("[%s:%s] " fmt, event->system, \
+ event->name, ##__VA_ARGS__); \
+ else \
+ warning(fmt, ##__VA_ARGS__); \
+ } while (0)
+
+static void init_input_buf(const char *buf, unsigned long long size)
+{
+ input_buf = buf;
+ input_buf_siz = size;
+ input_buf_ptr = 0;
+}
+
+const char *pevent_get_input_buf(void)
+{
+ return input_buf;
+}
+
+unsigned long long pevent_get_input_buf_ptr(void)
+{
+ return input_buf_ptr;
+}
+
+struct event_handler {
+ struct event_handler *next;
+ int id;
+ const char *sys_name;
+ const char *event_name;
+ pevent_event_handler_func func;
+ void *context;
+};
+
+struct pevent_func_params {
+ struct pevent_func_params *next;
+ enum pevent_func_arg_type type;
+};
+
+struct pevent_function_handler {
+ struct pevent_function_handler *next;
+ enum pevent_func_arg_type ret_type;
+ char *name;
+ pevent_func_handler func;
+ struct pevent_func_params *params;
+ int nr_args;
+};
+
+static unsigned long long
+process_defined_func(struct trace_seq *s, void *data, int size,
+ struct event_format *event, struct print_arg *arg);
+
+static void free_func_handle(struct pevent_function_handler *func);
+
+/**
+ * pevent_buffer_init - init buffer for parsing
+ * @buf: buffer to parse
+ * @size: the size of the buffer
+ *
+ * For use with pevent_read_token(), this initializes the internal
+ * buffer that pevent_read_token() will parse.
+ */
+void pevent_buffer_init(const char *buf, unsigned long long size)
+{
+ init_input_buf(buf, size);
+}
+
+void breakpoint(void)
+{
+ static int x;
+ x++;
+}
+
+struct print_arg *alloc_arg(void)
+{
+ return calloc(1, sizeof(struct print_arg));
+}
+
+struct cmdline {
+ char *comm;
+ int pid;
+};
+
+static int cmdline_cmp(const void *a, const void *b)
+{
+ const struct cmdline *ca = a;
+ const struct cmdline *cb = b;
+
+ if (ca->pid < cb->pid)
+ return -1;
+ if (ca->pid > cb->pid)
+ return 1;
+
+ return 0;
+}
+
+struct cmdline_list {
+ struct cmdline_list *next;
+ char *comm;
+ int pid;
+};
+
+static int cmdline_init(struct pevent *pevent)
+{
+ struct cmdline_list *cmdlist = pevent->cmdlist;
+ struct cmdline_list *item;
+ struct cmdline *cmdlines;
+ int i;
+
+ cmdlines = malloc(sizeof(*cmdlines) * pevent->cmdline_count);
+ if (!cmdlines)
+ return -1;
+
+ i = 0;
+ while (cmdlist) {
+ cmdlines[i].pid = cmdlist->pid;
+ cmdlines[i].comm = cmdlist->comm;
+ i++;
+ item = cmdlist;
+ cmdlist = cmdlist->next;
+ free(item);
+ }
+
+ qsort(cmdlines, pevent->cmdline_count, sizeof(*cmdlines), cmdline_cmp);
+
+ pevent->cmdlines = cmdlines;
+ pevent->cmdlist = NULL;
+
+ return 0;
+}
+
+static const char *find_cmdline(struct pevent *pevent, int pid)
+{
+ const struct cmdline *comm;
+ struct cmdline key;
+
+ if (!pid)
+ return "<idle>";
+
+ if (!pevent->cmdlines && cmdline_init(pevent))
+ return "<not enough memory for cmdlines!>";
+
+ key.pid = pid;
+
+ comm = bsearch(&key, pevent->cmdlines, pevent->cmdline_count,
+ sizeof(*pevent->cmdlines), cmdline_cmp);
+
+ if (comm)
+ return comm->comm;
+ return "<...>";
+}
+
+/**
+ * pevent_pid_is_registered - return if a pid has a cmdline registered
+ * @pevent: handle for the pevent
+ * @pid: The pid to check if it has a cmdline registered with.
+ *
+ * Returns 1 if the pid has a cmdline mapped to it
+ * 0 otherwise.
+ */
+int pevent_pid_is_registered(struct pevent *pevent, int pid)
+{
+ const struct cmdline *comm;
+ struct cmdline key;
+
+ if (!pid)
+ return 1;
+
+ if (!pevent->cmdlines && cmdline_init(pevent))
+ return 0;
+
+ key.pid = pid;
+
+ comm = bsearch(&key, pevent->cmdlines, pevent->cmdline_count,
+ sizeof(*pevent->cmdlines), cmdline_cmp);
+
+ if (comm)
+ return 1;
+ return 0;
+}
+
+/*
+ * If the command lines have been converted to an array, then
+ * we must add this pid. This is much slower than when cmdlines
+ * are added before the array is initialized.
+ */
+static int add_new_comm(struct pevent *pevent, const char *comm, int pid)
+{
+ struct cmdline *cmdlines = pevent->cmdlines;
+ const struct cmdline *cmdline;
+ struct cmdline key;
+
+ if (!pid)
+ return 0;
+
+ /* avoid duplicates */
+ key.pid = pid;
+
+ cmdline = bsearch(&key, pevent->cmdlines, pevent->cmdline_count,
+ sizeof(*pevent->cmdlines), cmdline_cmp);
+ if (cmdline) {
+ errno = EEXIST;
+ return -1;
+ }
+
+ cmdlines = realloc(cmdlines, sizeof(*cmdlines) * (pevent->cmdline_count + 1));
+ if (!cmdlines) {
+ errno = ENOMEM;
+ return -1;
+ }
+
+ cmdlines[pevent->cmdline_count].comm = strdup(comm);
+ if (!cmdlines[pevent->cmdline_count].comm) {
+ free(cmdlines);
+ errno = ENOMEM;
+ return -1;
+ }
+
+ cmdlines[pevent->cmdline_count].pid = pid;
+
+ if (cmdlines[pevent->cmdline_count].comm)
+ pevent->cmdline_count++;
+
+ qsort(cmdlines, pevent->cmdline_count, sizeof(*cmdlines), cmdline_cmp);
+ pevent->cmdlines = cmdlines;
+
+ return 0;
+}
+
+/**
+ * pevent_register_comm - register a pid / comm mapping
+ * @pevent: handle for the pevent
+ * @comm: the command line to register
+ * @pid: the pid to map the command line to
+ *
+ * This adds a mapping to search for command line names with
+ * a given pid. The comm is duplicated.
+ */
+int pevent_register_comm(struct pevent *pevent, const char *comm, int pid)
+{
+ struct cmdline_list *item;
+
+ if (pevent->cmdlines)
+ return add_new_comm(pevent, comm, pid);
+
+ item = malloc(sizeof(*item));
+ if (!item)
+ return -1;
+
+ item->comm = strdup(comm);
+ if (!item->comm) {
+ free(item);
+ return -1;
+ }
+ item->pid = pid;
+ item->next = pevent->cmdlist;
+
+ pevent->cmdlist = item;
+ pevent->cmdline_count++;
+
+ return 0;
+}
+
+void pevent_register_trace_clock(struct pevent *pevent, char *trace_clock)
+{
+ pevent->trace_clock = trace_clock;
+}
+
+struct func_map {
+ unsigned long long addr;
+ char *func;
+ char *mod;
+};
+
+struct func_list {
+ struct func_list *next;
+ unsigned long long addr;
+ char *func;
+ char *mod;
+};
+
+static int func_cmp(const void *a, const void *b)
+{
+ const struct func_map *fa = a;
+ const struct func_map *fb = b;
+
+ if (fa->addr < fb->addr)
+ return -1;
+ if (fa->addr > fb->addr)
+ return 1;
+
+ return 0;
+}
+
+/*
+ * We are searching for a record in between, not an exact
+ * match.
+ */
+static int func_bcmp(const void *a, const void *b)
+{
+ const struct func_map *fa = a;
+ const struct func_map *fb = b;
+
+ if ((fa->addr == fb->addr) ||
+
+ (fa->addr > fb->addr &&
+ fa->addr < (fb+1)->addr))
+ return 0;
+
+ if (fa->addr < fb->addr)
+ return -1;
+
+ return 1;
+}
+
+static int func_map_init(struct pevent *pevent)
+{
+ struct func_list *funclist;
+ struct func_list *item;
+ struct func_map *func_map;
+ int i;
+
+ func_map = malloc(sizeof(*func_map) * (pevent->func_count + 1));
+ if (!func_map)
+ return -1;
+
+ funclist = pevent->funclist;
+
+ i = 0;
+ while (funclist) {
+ func_map[i].func = funclist->func;
+ func_map[i].addr = funclist->addr;
+ func_map[i].mod = funclist->mod;
+ i++;
+ item = funclist;
+ funclist = funclist->next;
+ free(item);
+ }
+
+ qsort(func_map, pevent->func_count, sizeof(*func_map), func_cmp);
+
+ /*
+ * Add a special record at the end.
+ */
+ func_map[pevent->func_count].func = NULL;
+ func_map[pevent->func_count].addr = 0;
+ func_map[pevent->func_count].mod = NULL;
+
+ pevent->func_map = func_map;
+ pevent->funclist = NULL;
+
+ return 0;
+}
+
+static struct func_map *
+find_func(struct pevent *pevent, unsigned long long addr)
+{
+ struct func_map *func;
+ struct func_map key;
+
+ if (!pevent->func_map)
+ func_map_init(pevent);
+
+ key.addr = addr;
+
+ func = bsearch(&key, pevent->func_map, pevent->func_count,
+ sizeof(*pevent->func_map), func_bcmp);
+
+ return func;
+}
+
+/**
+ * pevent_find_function - find a function by a given address
+ * @pevent: handle for the pevent
+ * @addr: the address to find the function with
+ *
+ * Returns a pointer to the function stored that has the given
+ * address. Note, the address does not have to be exact, it
+ * will select the function that would contain the address.
+ */
+const char *pevent_find_function(struct pevent *pevent, unsigned long long addr)
+{
+ struct func_map *map;
+
+ map = find_func(pevent, addr);
+ if (!map)
+ return NULL;
+
+ return map->func;
+}
+
+/**
+ * pevent_find_function_address - find a function address by a given address
+ * @pevent: handle for the pevent
+ * @addr: the address to find the function with
+ *
+ * Returns the address the function starts at. This can be used in
+ * conjunction with pevent_find_function to print both the function
+ * name and the function offset.
+ */
+unsigned long long
+pevent_find_function_address(struct pevent *pevent, unsigned long long addr)
+{
+ struct func_map *map;
+
+ map = find_func(pevent, addr);
+ if (!map)
+ return 0;
+
+ return map->addr;
+}
+
+/**
+ * pevent_register_function - register a function with a given address
+ * @pevent: handle for the pevent
+ * @function: the function name to register
+ * @addr: the address the function starts at
+ * @mod: the kernel module the function may be in (NULL for none)
+ *
+ * This registers a function name with an address and module.
+ * The @func passed in is duplicated.
+ */
+int pevent_register_function(struct pevent *pevent, char *func,
+ unsigned long long addr, char *mod)
+{
+ struct func_list *item = malloc(sizeof(*item));
+
+ if (!item)
+ return -1;
+
+ item->next = pevent->funclist;
+ item->func = strdup(func);
+ if (!item->func)
+ goto out_free;
+
+ if (mod) {
+ item->mod = strdup(mod);
+ if (!item->mod)
+ goto out_free_func;
+ } else
+ item->mod = NULL;
+ item->addr = addr;
+
+ pevent->funclist = item;
+ pevent->func_count++;
+
+ return 0;
+
+out_free_func:
+ free(item->func);
+ item->func = NULL;
+out_free:
+ free(item);
+ errno = ENOMEM;
+ return -1;
+}
+
+/**
+ * pevent_print_funcs - print out the stored functions
+ * @pevent: handle for the pevent
+ *
+ * This prints out the stored functions.
+ */
+void pevent_print_funcs(struct pevent *pevent)
+{
+ int i;
+
+ if (!pevent->func_map)
+ func_map_init(pevent);
+
+ for (i = 0; i < (int)pevent->func_count; i++) {
+ printf("%016llx %s",
+ pevent->func_map[i].addr,
+ pevent->func_map[i].func);
+ if (pevent->func_map[i].mod)
+ printf(" [%s]\n", pevent->func_map[i].mod);
+ else
+ printf("\n");
+ }
+}
+
+struct printk_map {
+ unsigned long long addr;
+ char *printk;
+};
+
+struct printk_list {
+ struct printk_list *next;
+ unsigned long long addr;
+ char *printk;
+};
+
+static int printk_cmp(const void *a, const void *b)
+{
+ const struct printk_map *pa = a;
+ const struct printk_map *pb = b;
+
+ if (pa->addr < pb->addr)
+ return -1;
+ if (pa->addr > pb->addr)
+ return 1;
+
+ return 0;
+}
+
+static int printk_map_init(struct pevent *pevent)
+{
+ struct printk_list *printklist;
+ struct printk_list *item;
+ struct printk_map *printk_map;
+ int i;
+
+ printk_map = malloc(sizeof(*printk_map) * (pevent->printk_count + 1));
+ if (!printk_map)
+ return -1;
+
+ printklist = pevent->printklist;
+
+ i = 0;
+ while (printklist) {
+ printk_map[i].printk = printklist->printk;
+ printk_map[i].addr = printklist->addr;
+ i++;
+ item = printklist;
+ printklist = printklist->next;
+ free(item);
+ }
+
+ qsort(printk_map, pevent->printk_count, sizeof(*printk_map), printk_cmp);
+
+ pevent->printk_map = printk_map;
+ pevent->printklist = NULL;
+
+ return 0;
+}
+
+static struct printk_map *
+find_printk(struct pevent *pevent, unsigned long long addr)
+{
+ struct printk_map *printk;
+ struct printk_map key;
+
+ if (!pevent->printk_map && printk_map_init(pevent))
+ return NULL;
+
+ key.addr = addr;
+
+ printk = bsearch(&key, pevent->printk_map, pevent->printk_count,
+ sizeof(*pevent->printk_map), printk_cmp);
+
+ return printk;
+}
+
+/**
+ * pevent_register_print_string - register a string by its address
+ * @pevent: handle for the pevent
+ * @fmt: the string format to register
+ * @addr: the address the string was located at
+ *
+ * This registers a string by the address it was stored in the kernel.
+ * The @fmt passed in is duplicated.
+ */
+int pevent_register_print_string(struct pevent *pevent, const char *fmt,
+ unsigned long long addr)
+{
+ struct printk_list *item = malloc(sizeof(*item));
+ char *p;
+
+ if (!item)
+ return -1;
+
+ item->next = pevent->printklist;
+ item->addr = addr;
+
+ /* Strip off quotes and '\n' from the end */
+ if (fmt[0] == '"')
+ fmt++;
+ item->printk = strdup(fmt);
+ if (!item->printk)
+ goto out_free;
+
+ p = item->printk + strlen(item->printk) - 1;
+ if (*p == '"')
+ *p = 0;
+
+ p -= 2;
+ if (strcmp(p, "\\n") == 0)
+ *p = 0;
+
+ pevent->printklist = item;
+ pevent->printk_count++;
+
+ return 0;
+
+out_free:
+ free(item);
+ errno = ENOMEM;
+ return -1;
+}
+
+/**
+ * pevent_print_printk - print out the stored strings
+ * @pevent: handle for the pevent
+ *
+ * This prints the string formats that were stored.
+ */
+void pevent_print_printk(struct pevent *pevent)
+{
+ int i;
+
+ if (!pevent->printk_map)
+ printk_map_init(pevent);
+
+ for (i = 0; i < (int)pevent->printk_count; i++) {
+ printf("%016llx %s\n",
+ pevent->printk_map[i].addr,
+ pevent->printk_map[i].printk);
+ }
+}
+
+static struct event_format *alloc_event(void)
+{
+ return calloc(1, sizeof(struct event_format));
+}
+
+static int add_event(struct pevent *pevent, struct event_format *event)
+{
+ int i;
+ struct event_format **events = realloc(pevent->events, sizeof(event) *
+ (pevent->nr_events + 1));
+ if (!events)
+ return -1;
+
+ pevent->events = events;
+
+ for (i = 0; i < pevent->nr_events; i++) {
+ if (pevent->events[i]->id > event->id)
+ break;
+ }
+ if (i < pevent->nr_events)
+ memmove(&pevent->events[i + 1],
+ &pevent->events[i],
+ sizeof(event) * (pevent->nr_events - i));
+
+ pevent->events[i] = event;
+ pevent->nr_events++;
+
+ event->pevent = pevent;
+
+ return 0;
+}
+
+static int event_item_type(enum event_type type)
+{
+ switch (type) {
+ case EVENT_ITEM ... EVENT_SQUOTE:
+ return 1;
+ case EVENT_ERROR ... EVENT_DELIM:
+ default:
+ return 0;
+ }
+}
+
+static void free_flag_sym(struct print_flag_sym *fsym)
+{
+ struct print_flag_sym *next;
+
+ while (fsym) {
+ next = fsym->next;
+ free(fsym->value);
+ free(fsym->str);
+ free(fsym);
+ fsym = next;
+ }
+}
+
+static void free_arg(struct print_arg *arg)
+{
+ struct print_arg *farg;
+
+ if (!arg)
+ return;
+
+ switch (arg->type) {
+ case PRINT_ATOM:
+ free(arg->atom.atom);
+ break;
+ case PRINT_FIELD:
+ free(arg->field.name);
+ break;
+ case PRINT_FLAGS:
+ free_arg(arg->flags.field);
+ free(arg->flags.delim);
+ free_flag_sym(arg->flags.flags);
+ break;
+ case PRINT_SYMBOL:
+ free_arg(arg->symbol.field);
+ free_flag_sym(arg->symbol.symbols);
+ break;
+ case PRINT_HEX:
+ free_arg(arg->hex.field);
+ free_arg(arg->hex.size);
+ break;
+ case PRINT_TYPE:
+ free(arg->typecast.type);
+ free_arg(arg->typecast.item);
+ break;
+ case PRINT_STRING:
+ case PRINT_BSTRING:
+ free(arg->string.string);
+ break;
+ case PRINT_BITMASK:
+ free(arg->bitmask.bitmask);
+ break;
+ case PRINT_DYNAMIC_ARRAY:
+ free(arg->dynarray.index);
+ break;
+ case PRINT_OP:
+ free(arg->op.op);
+ free_arg(arg->op.left);
+ free_arg(arg->op.right);
+ break;
+ case PRINT_FUNC:
+ while (arg->func.args) {
+ farg = arg->func.args;
+ arg->func.args = farg->next;
+ free_arg(farg);
+ }
+ break;
+
+ case PRINT_NULL:
+ default:
+ break;
+ }
+
+ free(arg);
+}
+
+static enum event_type get_type(int ch)
+{
+ if (ch == '\n')
+ return EVENT_NEWLINE;
+ if (isspace(ch))
+ return EVENT_SPACE;
+ if (isalnum(ch) || ch == '_')
+ return EVENT_ITEM;
+ if (ch == '\'')
+ return EVENT_SQUOTE;
+ if (ch == '"')
+ return EVENT_DQUOTE;
+ if (!isprint(ch))
+ return EVENT_NONE;
+ if (ch == '(' || ch == ')' || ch == ',')
+ return EVENT_DELIM;
+
+ return EVENT_OP;
+}
+
+static int __read_char(void)
+{
+ if (input_buf_ptr >= input_buf_siz)
+ return -1;
+
+ return input_buf[input_buf_ptr++];
+}
+
+static int __peek_char(void)
+{
+ if (input_buf_ptr >= input_buf_siz)
+ return -1;
+
+ return input_buf[input_buf_ptr];
+}
+
+/**
+ * pevent_peek_char - peek at the next character that will be read
+ *
+ * Returns the next character read, or -1 if end of buffer.
+ */
+int pevent_peek_char(void)
+{
+ return __peek_char();
+}
+
+static int extend_token(char **tok, char *buf, int size)
+{
+ char *newtok = realloc(*tok, size);
+
+ if (!newtok) {
+ free(*tok);
+ *tok = NULL;
+ return -1;
+ }
+
+ if (!*tok)
+ strcpy(newtok, buf);
+ else
+ strcat(newtok, buf);
+ *tok = newtok;
+
+ return 0;
+}
+
+static enum event_type force_token(const char *str, char **tok);
+
+static enum event_type __read_token(char **tok)
+{
+ char buf[BUFSIZ];
+ int ch, last_ch, quote_ch, next_ch;
+ int i = 0;
+ int tok_size = 0;
+ enum event_type type;
+
+ *tok = NULL;
+
+
+ ch = __read_char();
+ if (ch < 0)
+ return EVENT_NONE;
+
+ type = get_type(ch);
+ if (type == EVENT_NONE)
+ return type;
+
+ buf[i++] = ch;
+
+ switch (type) {
+ case EVENT_NEWLINE:
+ case EVENT_DELIM:
+ if (asprintf(tok, "%c", ch) < 0)
+ return EVENT_ERROR;
+
+ return type;
+
+ case EVENT_OP:
+ switch (ch) {
+ case '-':
+ next_ch = __peek_char();
+ if (next_ch == '>') {
+ buf[i++] = __read_char();
+ break;
+ }
+ /* fall through */
+ case '+':
+ case '|':
+ case '&':
+ case '>':
+ case '<':
+ last_ch = ch;
+ ch = __peek_char();
+ if (ch != last_ch)
+ goto test_equal;
+ buf[i++] = __read_char();
+ switch (last_ch) {
+ case '>':
+ case '<':
+ goto test_equal;
+ default:
+ break;
+ }
+ break;
+ case '!':
+ case '=':
+ goto test_equal;
+ default: /* what should we do instead? */
+ break;
+ }
+ buf[i] = 0;
+ *tok = strdup(buf);
+ return type;
+
+ test_equal:
+ ch = __peek_char();
+ if (ch == '=')
+ buf[i++] = __read_char();
+ goto out;
+
+ case EVENT_DQUOTE:
+ case EVENT_SQUOTE:
+ /* don't keep quotes */
+ i--;
+ quote_ch = ch;
+ last_ch = 0;
+ concat:
+ do {
+ if (i == (BUFSIZ - 1)) {
+ buf[i] = 0;
+ tok_size += BUFSIZ;
+
+ if (extend_token(tok, buf, tok_size) < 0)
+ return EVENT_NONE;
+ i = 0;
+ }
+ last_ch = ch;
+ ch = __read_char();
+ buf[i++] = ch;
+ /* the '\' '\' will cancel itself */
+ if (ch == '\\' && last_ch == '\\')
+ last_ch = 0;
+ } while (ch != quote_ch || last_ch == '\\');
+ /* remove the last quote */
+ i--;
+
+ /*
+ * For strings (double quotes) check the next token.
+ * If it is another string, concatinate the two.
+ */
+ if (type == EVENT_DQUOTE) {
+ unsigned long long save_input_buf_ptr = input_buf_ptr;
+
+ do {
+ ch = __read_char();
+ } while (isspace(ch));
+ if (ch == '"')
+ goto concat;
+ input_buf_ptr = save_input_buf_ptr;
+ }
+
+ goto out;
+
+ case EVENT_ERROR ... EVENT_SPACE:
+ case EVENT_ITEM:
+ default:
+ break;
+ }
+
+ while (get_type(__peek_char()) == type) {
+ if (i == (BUFSIZ - 1)) {
+ buf[i] = 0;
+ tok_size += BUFSIZ;
+
+ if (extend_token(tok, buf, tok_size) < 0)
+ return EVENT_NONE;
+ i = 0;
+ }
+ ch = __read_char();
+ buf[i++] = ch;
+ }
+
+ out:
+ buf[i] = 0;
+ if (extend_token(tok, buf, tok_size + i + 1) < 0)
+ return EVENT_NONE;
+
+ if (type == EVENT_ITEM) {
+ /*
+ * Older versions of the kernel has a bug that
+ * creates invalid symbols and will break the mac80211
+ * parsing. This is a work around to that bug.
+ *
+ * See Linux kernel commit:
+ * 811cb50baf63461ce0bdb234927046131fc7fa8b
+ */
+ if (strcmp(*tok, "LOCAL_PR_FMT") == 0) {
+ free(*tok);
+ *tok = NULL;
+ return force_token("\"\%s\" ", tok);
+ } else if (strcmp(*tok, "STA_PR_FMT") == 0) {
+ free(*tok);
+ *tok = NULL;
+ return force_token("\" sta:%pM\" ", tok);
+ } else if (strcmp(*tok, "VIF_PR_FMT") == 0) {
+ free(*tok);
+ *tok = NULL;
+ return force_token("\" vif:%p(%d)\" ", tok);
+ }
+ }
+
+ return type;
+}
+
+static enum event_type force_token(const char *str, char **tok)
+{
+ const char *save_input_buf;
+ unsigned long long save_input_buf_ptr;
+ unsigned long long save_input_buf_siz;
+ enum event_type type;
+
+ /* save off the current input pointers */
+ save_input_buf = input_buf;
+ save_input_buf_ptr = input_buf_ptr;
+ save_input_buf_siz = input_buf_siz;
+
+ init_input_buf(str, strlen(str));
+
+ type = __read_token(tok);
+
+ /* reset back to original token */
+ input_buf = save_input_buf;
+ input_buf_ptr = save_input_buf_ptr;
+ input_buf_siz = save_input_buf_siz;
+
+ return type;
+}
+
+static void free_token(char *tok)
+{
+ if (tok)
+ free(tok);
+}
+
+static enum event_type read_token(char **tok)
+{
+ enum event_type type;
+
+ for (;;) {
+ type = __read_token(tok);
+ if (type != EVENT_SPACE)
+ return type;
+
+ free_token(*tok);
+ }
+
+ /* not reached */
+ *tok = NULL;
+ return EVENT_NONE;
+}
+
+/**
+ * pevent_read_token - access to utilites to use the pevent parser
+ * @tok: The token to return
+ *
+ * This will parse tokens from the string given by
+ * pevent_init_data().
+ *
+ * Returns the token type.
+ */
+enum event_type pevent_read_token(char **tok)
+{
+ return read_token(tok);
+}
+
+/**
+ * pevent_free_token - free a token returned by pevent_read_token
+ * @token: the token to free
+ */
+void pevent_free_token(char *token)
+{
+ free_token(token);
+}
+
+/* no newline */
+static enum event_type read_token_item(char **tok)
+{
+ enum event_type type;
+
+ for (;;) {
+ type = __read_token(tok);
+ if (type != EVENT_SPACE && type != EVENT_NEWLINE)
+ return type;
+ free_token(*tok);
+ *tok = NULL;
+ }
+
+ /* not reached */
+ *tok = NULL;
+ return EVENT_NONE;
+}
+
+static int test_type(enum event_type type, enum event_type expect)
+{
+ if (type != expect) {
+ do_warning("Error: expected type %d but read %d",
+ expect, type);
+ return -1;
+ }
+ return 0;
+}
+
+static int test_type_token(enum event_type type, const char *token,
+ enum event_type expect, const char *expect_tok)
+{
+ if (type != expect) {
+ do_warning("Error: expected type %d but read %d",
+ expect, type);
+ return -1;
+ }
+
+ if (strcmp(token, expect_tok) != 0) {
+ do_warning("Error: expected '%s' but read '%s'",
+ expect_tok, token);
+ return -1;
+ }
+ return 0;
+}
+
+static int __read_expect_type(enum event_type expect, char **tok, int newline_ok)
+{
+ enum event_type type;
+
+ if (newline_ok)
+ type = read_token(tok);
+ else
+ type = read_token_item(tok);
+ return test_type(type, expect);
+}
+
+static int read_expect_type(enum event_type expect, char **tok)
+{
+ return __read_expect_type(expect, tok, 1);
+}
+
+static int __read_expected(enum event_type expect, const char *str,
+ int newline_ok)
+{
+ enum event_type type;
+ char *token;
+ int ret;
+
+ if (newline_ok)
+ type = read_token(&token);
+ else
+ type = read_token_item(&token);
+
+ ret = test_type_token(type, token, expect, str);
+
+ free_token(token);
+
+ return ret;
+}
+
+static int read_expected(enum event_type expect, const char *str)
+{
+ return __read_expected(expect, str, 1);
+}
+
+static int read_expected_item(enum event_type expect, const char *str)
+{
+ return __read_expected(expect, str, 0);
+}
+
+static char *event_read_name(void)
+{
+ char *token;
+
+ if (read_expected(EVENT_ITEM, "name") < 0)
+ return NULL;
+
+ if (read_expected(EVENT_OP, ":") < 0)
+ return NULL;
+
+ if (read_expect_type(EVENT_ITEM, &token) < 0)
+ goto fail;
+
+ return token;
+
+ fail:
+ free_token(token);
+ return NULL;
+}
+
+static int event_read_id(void)
+{
+ char *token;
+ int id;
+
+ if (read_expected_item(EVENT_ITEM, "ID") < 0)
+ return -1;
+
+ if (read_expected(EVENT_OP, ":") < 0)
+ return -1;
+
+ if (read_expect_type(EVENT_ITEM, &token) < 0)
+ goto fail;
+
+ id = strtoul(token, NULL, 0);
+ free_token(token);
+ return id;
+
+ fail:
+ free_token(token);
+ return -1;
+}
+
+static int field_is_string(struct format_field *field)
+{
+ if ((field->flags & FIELD_IS_ARRAY) &&
+ (strstr(field->type, "char") || strstr(field->type, "u8") ||
+ strstr(field->type, "s8")))
+ return 1;
+
+ return 0;
+}
+
+static int field_is_dynamic(struct format_field *field)
+{
+ if (strncmp(field->type, "__data_loc", 10) == 0)
+ return 1;
+
+ return 0;
+}
+
+static int field_is_long(struct format_field *field)
+{
+ /* includes long long */
+ if (strstr(field->type, "long"))
+ return 1;
+
+ return 0;
+}
+
+static unsigned int type_size(const char *name)
+{
+ /* This covers all FIELD_IS_STRING types. */
+ static struct {
+ const char *type;
+ unsigned int size;
+ } table[] = {
+ { "u8", 1 },
+ { "u16", 2 },
+ { "u32", 4 },
+ { "u64", 8 },
+ { "s8", 1 },
+ { "s16", 2 },
+ { "s32", 4 },
+ { "s64", 8 },
+ { "char", 1 },
+ { },
+ };
+ int i;
+
+ for (i = 0; table[i].type; i++) {
+ if (!strcmp(table[i].type, name))
+ return table[i].size;
+ }
+
+ return 0;
+}
+
+static int event_read_fields(struct event_format *event, struct format_field **fields)
+{
+ struct format_field *field = NULL;
+ enum event_type type;
+ char *token;
+ char *last_token;
+ int count = 0;
+
+ do {
+ unsigned int size_dynamic = 0;
+
+ type = read_token(&token);
+ if (type == EVENT_NEWLINE) {
+ free_token(token);
+ return count;
+ }
+
+ count++;
+
+ if (test_type_token(type, token, EVENT_ITEM, "field"))
+ goto fail;
+ free_token(token);
+
+ type = read_token(&token);
+ /*
+ * The ftrace fields may still use the "special" name.
+ * Just ignore it.
+ */
+ if (event->flags & EVENT_FL_ISFTRACE &&
+ type == EVENT_ITEM && strcmp(token, "special") == 0) {
+ free_token(token);
+ type = read_token(&token);
+ }
+
+ if (test_type_token(type, token, EVENT_OP, ":") < 0)
+ goto fail;
+
+ free_token(token);
+ if (read_expect_type(EVENT_ITEM, &token) < 0)
+ goto fail;
+
+ last_token = token;
+
+ field = calloc(1, sizeof(*field));
+ if (!field)
+ goto fail;
+
+ field->event = event;
+
+ /* read the rest of the type */
+ for (;;) {
+ type = read_token(&token);
+ if (type == EVENT_ITEM ||
+ (type == EVENT_OP && strcmp(token, "*") == 0) ||
+ /*
+ * Some of the ftrace fields are broken and have
+ * an illegal "." in them.
+ */
+ (event->flags & EVENT_FL_ISFTRACE &&
+ type == EVENT_OP && strcmp(token, ".") == 0)) {
+
+ if (strcmp(token, "*") == 0)
+ field->flags |= FIELD_IS_POINTER;
+
+ if (field->type) {
+ char *new_type;
+ new_type = realloc(field->type,
+ strlen(field->type) +
+ strlen(last_token) + 2);
+ if (!new_type) {
+ free(last_token);
+ goto fail;
+ }
+ field->type = new_type;
+ strcat(field->type, " ");
+ strcat(field->type, last_token);
+ free(last_token);
+ } else
+ field->type = last_token;
+ last_token = token;
+ continue;
+ }
+
+ break;
+ }
+
+ if (!field->type) {
+ do_warning_event(event, "%s: no type found", __func__);
+ goto fail;
+ }
+ field->name = last_token;
+
+ if (test_type(type, EVENT_OP))
+ goto fail;
+
+ if (strcmp(token, "[") == 0) {
+ enum event_type last_type = type;
+ char *brackets = token;
+ char *new_brackets;
+ int len;
+
+ field->flags |= FIELD_IS_ARRAY;
+
+ type = read_token(&token);
+
+ if (type == EVENT_ITEM)
+ field->arraylen = strtoul(token, NULL, 0);
+ else
+ field->arraylen = 0;
+
+ while (strcmp(token, "]") != 0) {
+ if (last_type == EVENT_ITEM &&
+ type == EVENT_ITEM)
+ len = 2;
+ else
+ len = 1;
+ last_type = type;
+
+ new_brackets = realloc(brackets,
+ strlen(brackets) +
+ strlen(token) + len);
+ if (!new_brackets) {
+ free(brackets);
+ goto fail;
+ }
+ brackets = new_brackets;
+ if (len == 2)
+ strcat(brackets, " ");
+ strcat(brackets, token);
+ /* We only care about the last token */
+ field->arraylen = strtoul(token, NULL, 0);
+ free_token(token);
+ type = read_token(&token);
+ if (type == EVENT_NONE) {
+ do_warning_event(event, "failed to find token");
+ goto fail;
+ }
+ }
+
+ free_token(token);
+
+ new_brackets = realloc(brackets, strlen(brackets) + 2);
+ if (!new_brackets) {
+ free(brackets);
+ goto fail;
+ }
+ brackets = new_brackets;
+ strcat(brackets, "]");
+
+ /* add brackets to type */
+
+ type = read_token(&token);
+ /*
+ * If the next token is not an OP, then it is of
+ * the format: type [] item;
+ */
+ if (type == EVENT_ITEM) {
+ char *new_type;
+ new_type = realloc(field->type,
+ strlen(field->type) +
+ strlen(field->name) +
+ strlen(brackets) + 2);
+ if (!new_type) {
+ free(brackets);
+ goto fail;
+ }
+ field->type = new_type;
+ strcat(field->type, " ");
+ strcat(field->type, field->name);
+ size_dynamic = type_size(field->name);
+ free_token(field->name);
+ strcat(field->type, brackets);
+ field->name = token;
+ type = read_token(&token);
+ } else {
+ char *new_type;
+ new_type = realloc(field->type,
+ strlen(field->type) +
+ strlen(brackets) + 1);
+ if (!new_type) {
+ free(brackets);
+ goto fail;
+ }
+ field->type = new_type;
+ strcat(field->type, brackets);
+ }
+ free(brackets);
+ }
+
+ if (field_is_string(field))
+ field->flags |= FIELD_IS_STRING;
+ if (field_is_dynamic(field))
+ field->flags |= FIELD_IS_DYNAMIC;
+ if (field_is_long(field))
+ field->flags |= FIELD_IS_LONG;
+
+ if (test_type_token(type, token, EVENT_OP, ";"))
+ goto fail;
+ free_token(token);
+
+ if (read_expected(EVENT_ITEM, "offset") < 0)
+ goto fail_expect;
+
+ if (read_expected(EVENT_OP, ":") < 0)
+ goto fail_expect;
+
+ if (read_expect_type(EVENT_ITEM, &token))
+ goto fail;
+ field->offset = strtoul(token, NULL, 0);
+ free_token(token);
+
+ if (read_expected(EVENT_OP, ";") < 0)
+ goto fail_expect;
+
+ if (read_expected(EVENT_ITEM, "size") < 0)
+ goto fail_expect;
+
+ if (read_expected(EVENT_OP, ":") < 0)
+ goto fail_expect;
+
+ if (read_expect_type(EVENT_ITEM, &token))
+ goto fail;
+ field->size = strtoul(token, NULL, 0);
+ free_token(token);
+
+ if (read_expected(EVENT_OP, ";") < 0)
+ goto fail_expect;
+
+ type = read_token(&token);
+ if (type != EVENT_NEWLINE) {
+ /* newer versions of the kernel have a "signed" type */
+ if (test_type_token(type, token, EVENT_ITEM, "signed"))
+ goto fail;
+
+ free_token(token);
+
+ if (read_expected(EVENT_OP, ":") < 0)
+ goto fail_expect;
+
+ if (read_expect_type(EVENT_ITEM, &token))
+ goto fail;
+
+ if (strtoul(token, NULL, 0))
+ field->flags |= FIELD_IS_SIGNED;
+
+ free_token(token);
+ if (read_expected(EVENT_OP, ";") < 0)
+ goto fail_expect;
+
+ if (read_expect_type(EVENT_NEWLINE, &token))
+ goto fail;
+ }
+
+ free_token(token);
+
+ if (field->flags & FIELD_IS_ARRAY) {
+ if (field->arraylen)
+ field->elementsize = field->size / field->arraylen;
+ else if (field->flags & FIELD_IS_DYNAMIC)
+ field->elementsize = size_dynamic;
+ else if (field->flags & FIELD_IS_STRING)
+ field->elementsize = 1;
+ else if (field->flags & FIELD_IS_LONG)
+ field->elementsize = event->pevent ?
+ event->pevent->long_size :
+ sizeof(long);
+ } else
+ field->elementsize = field->size;
+
+ *fields = field;
+ fields = &field->next;
+
+ } while (1);
+
+ return 0;
+
+fail:
+ free_token(token);
+fail_expect:
+ if (field) {
+ free(field->type);
+ free(field->name);
+ free(field);
+ }
+ return -1;
+}
+
+static int event_read_format(struct event_format *event)
+{
+ char *token;
+ int ret;
+
+ if (read_expected_item(EVENT_ITEM, "format") < 0)
+ return -1;
+
+ if (read_expected(EVENT_OP, ":") < 0)
+ return -1;
+
+ if (read_expect_type(EVENT_NEWLINE, &token))
+ goto fail;
+ free_token(token);
+
+ ret = event_read_fields(event, &event->format.common_fields);
+ if (ret < 0)
+ return ret;
+ event->format.nr_common = ret;
+
+ ret = event_read_fields(event, &event->format.fields);
+ if (ret < 0)
+ return ret;
+ event->format.nr_fields = ret;
+
+ return 0;
+
+ fail:
+ free_token(token);
+ return -1;
+}
+
+static enum event_type
+process_arg_token(struct event_format *event, struct print_arg *arg,
+ char **tok, enum event_type type);
+
+static enum event_type
+process_arg(struct event_format *event, struct print_arg *arg, char **tok)
+{
+ enum event_type type;
+ char *token;
+
+ type = read_token(&token);
+ *tok = token;
+
+ return process_arg_token(event, arg, tok, type);
+}
+
+static enum event_type
+process_op(struct event_format *event, struct print_arg *arg, char **tok);
+
+/*
+ * For __print_symbolic() and __print_flags, we need to completely
+ * evaluate the first argument, which defines what to print next.
+ */
+static enum event_type
+process_field_arg(struct event_format *event, struct print_arg *arg, char **tok)
+{
+ enum event_type type;
+
+ type = process_arg(event, arg, tok);
+
+ while (type == EVENT_OP) {
+ type = process_op(event, arg, tok);
+ }
+
+ return type;
+}
+
+static enum event_type
+process_cond(struct event_format *event, struct print_arg *top, char **tok)
+{
+ struct print_arg *arg, *left, *right;
+ enum event_type type;
+ char *token = NULL;
+
+ arg = alloc_arg();
+ left = alloc_arg();
+ right = alloc_arg();
+
+ if (!arg || !left || !right) {
+ do_warning_event(event, "%s: not enough memory!", __func__);
+ /* arg will be freed at out_free */
+ free_arg(left);
+ free_arg(right);
+ goto out_free;
+ }
+
+ arg->type = PRINT_OP;
+ arg->op.left = left;
+ arg->op.right = right;
+
+ *tok = NULL;
+ type = process_arg(event, left, &token);
+
+ again:
+ /* Handle other operations in the arguments */
+ if (type == EVENT_OP && strcmp(token, ":") != 0) {
+ type = process_op(event, left, &token);
+ goto again;
+ }
+
+ if (test_type_token(type, token, EVENT_OP, ":"))
+ goto out_free;
+
+ arg->op.op = token;
+
+ type = process_arg(event, right, &token);
+
+ top->op.right = arg;
+
+ *tok = token;
+ return type;
+
+out_free:
+ /* Top may point to itself */
+ top->op.right = NULL;
+ free_token(token);
+ free_arg(arg);
+ return EVENT_ERROR;
+}
+
+static enum event_type
+process_array(struct event_format *event, struct print_arg *top, char **tok)
+{
+ struct print_arg *arg;
+ enum event_type type;
+ char *token = NULL;
+
+ arg = alloc_arg();
+ if (!arg) {
+ do_warning_event(event, "%s: not enough memory!", __func__);
+ /* '*tok' is set to top->op.op. No need to free. */
+ *tok = NULL;
+ return EVENT_ERROR;
+ }
+
+ *tok = NULL;
+ type = process_arg(event, arg, &token);
+ if (test_type_token(type, token, EVENT_OP, "]"))
+ goto out_free;
+
+ top->op.right = arg;
+
+ free_token(token);
+ type = read_token_item(&token);
+ *tok = token;
+
+ return type;
+
+out_free:
+ free_token(token);
+ free_arg(arg);
+ return EVENT_ERROR;
+}
+
+static int get_op_prio(char *op)
+{
+ if (!op[1]) {
+ switch (op[0]) {
+ case '~':
+ case '!':
+ return 4;
+ case '*':
+ case '/':
+ case '%':
+ return 6;
+ case '+':
+ case '-':
+ return 7;
+ /* '>>' and '<<' are 8 */
+ case '<':
+ case '>':
+ return 9;
+ /* '==' and '!=' are 10 */
+ case '&':
+ return 11;
+ case '^':
+ return 12;
+ case '|':
+ return 13;
+ case '?':
+ return 16;
+ default:
+ do_warning("unknown op '%c'", op[0]);
+ return -1;
+ }
+ } else {
+ if (strcmp(op, "++") == 0 ||
+ strcmp(op, "--") == 0) {
+ return 3;
+ } else if (strcmp(op, ">>") == 0 ||
+ strcmp(op, "<<") == 0) {
+ return 8;
+ } else if (strcmp(op, ">=") == 0 ||
+ strcmp(op, "<=") == 0) {
+ return 9;
+ } else if (strcmp(op, "==") == 0 ||
+ strcmp(op, "!=") == 0) {
+ return 10;
+ } else if (strcmp(op, "&&") == 0) {
+ return 14;
+ } else if (strcmp(op, "||") == 0) {
+ return 15;
+ } else {
+ do_warning("unknown op '%s'", op);
+ return -1;
+ }
+ }
+}
+
+static int set_op_prio(struct print_arg *arg)
+{
+
+ /* single ops are the greatest */
+ if (!arg->op.left || arg->op.left->type == PRINT_NULL)
+ arg->op.prio = 0;
+ else
+ arg->op.prio = get_op_prio(arg->op.op);
+
+ return arg->op.prio;
+}
+
+/* Note, *tok does not get freed, but will most likely be saved */
+static enum event_type
+process_op(struct event_format *event, struct print_arg *arg, char **tok)
+{
+ struct print_arg *left, *right = NULL;
+ enum event_type type;
+ char *token;
+
+ /* the op is passed in via tok */
+ token = *tok;
+
+ if (arg->type == PRINT_OP && !arg->op.left) {
+ /* handle single op */
+ if (token[1]) {
+ do_warning_event(event, "bad op token %s", token);
+ goto out_free;
+ }
+ switch (token[0]) {
+ case '~':
+ case '!':
+ case '+':
+ case '-':
+ break;
+ default:
+ do_warning_event(event, "bad op token %s", token);
+ goto out_free;
+
+ }
+
+ /* make an empty left */
+ left = alloc_arg();
+ if (!left)
+ goto out_warn_free;
+
+ left->type = PRINT_NULL;
+ arg->op.left = left;
+
+ right = alloc_arg();
+ if (!right)
+ goto out_warn_free;
+
+ arg->op.right = right;
+
+ /* do not free the token, it belongs to an op */
+ *tok = NULL;
+ type = process_arg(event, right, tok);
+
+ } else if (strcmp(token, "?") == 0) {
+
+ left = alloc_arg();
+ if (!left)
+ goto out_warn_free;
+
+ /* copy the top arg to the left */
+ *left = *arg;
+
+ arg->type = PRINT_OP;
+ arg->op.op = token;
+ arg->op.left = left;
+ arg->op.prio = 0;
+
+ /* it will set arg->op.right */
+ type = process_cond(event, arg, tok);
+
+ } else if (strcmp(token, ">>") == 0 ||
+ strcmp(token, "<<") == 0 ||
+ strcmp(token, "&") == 0 ||
+ strcmp(token, "|") == 0 ||
+ strcmp(token, "&&") == 0 ||
+ strcmp(token, "||") == 0 ||
+ strcmp(token, "-") == 0 ||
+ strcmp(token, "+") == 0 ||
+ strcmp(token, "*") == 0 ||
+ strcmp(token, "^") == 0 ||
+ strcmp(token, "/") == 0 ||
+ strcmp(token, "<") == 0 ||
+ strcmp(token, ">") == 0 ||
+ strcmp(token, "<=") == 0 ||
+ strcmp(token, ">=") == 0 ||
+ strcmp(token, "==") == 0 ||
+ strcmp(token, "!=") == 0) {
+
+ left = alloc_arg();
+ if (!left)
+ goto out_warn_free;
+
+ /* copy the top arg to the left */
+ *left = *arg;
+
+ arg->type = PRINT_OP;
+ arg->op.op = token;
+ arg->op.left = left;
+ arg->op.right = NULL;
+
+ if (set_op_prio(arg) == -1) {
+ event->flags |= EVENT_FL_FAILED;
+ /* arg->op.op (= token) will be freed at out_free */
+ arg->op.op = NULL;
+ goto out_free;
+ }
+
+ type = read_token_item(&token);
+ *tok = token;
+
+ /* could just be a type pointer */
+ if ((strcmp(arg->op.op, "*") == 0) &&
+ type == EVENT_DELIM && (strcmp(token, ")") == 0)) {
+ char *new_atom;
+
+ if (left->type != PRINT_ATOM) {
+ do_warning_event(event, "bad pointer type");
+ goto out_free;
+ }
+ new_atom = realloc(left->atom.atom,
+ strlen(left->atom.atom) + 3);
+ if (!new_atom)
+ goto out_warn_free;
+
+ left->atom.atom = new_atom;
+ strcat(left->atom.atom, " *");
+ free(arg->op.op);
+ *arg = *left;
+ free(left);
+
+ return type;
+ }
+
+ right = alloc_arg();
+ if (!right)
+ goto out_warn_free;
+
+ type = process_arg_token(event, right, tok, type);
+ arg->op.right = right;
+
+ } else if (strcmp(token, "[") == 0) {
+
+ left = alloc_arg();
+ if (!left)
+ goto out_warn_free;
+
+ *left = *arg;
+
+ arg->type = PRINT_OP;
+ arg->op.op = token;
+ arg->op.left = left;
+
+ arg->op.prio = 0;
+
+ /* it will set arg->op.right */
+ type = process_array(event, arg, tok);
+
+ } else {
+ do_warning_event(event, "unknown op '%s'", token);
+ event->flags |= EVENT_FL_FAILED;
+ /* the arg is now the left side */
+ goto out_free;
+ }
+
+ if (type == EVENT_OP && strcmp(*tok, ":") != 0) {
+ int prio;
+
+ /* higher prios need to be closer to the root */
+ prio = get_op_prio(*tok);
+
+ if (prio > arg->op.prio)
+ return process_op(event, arg, tok);
+
+ return process_op(event, right, tok);
+ }
+
+ return type;
+
+out_warn_free:
+ do_warning_event(event, "%s: not enough memory!", __func__);
+out_free:
+ free_token(token);
+ *tok = NULL;
+ return EVENT_ERROR;
+}
+
+static enum event_type
+process_entry(struct event_format *event __maybe_unused, struct print_arg *arg,
+ char **tok)
+{
+ enum event_type type;
+ char *field;
+ char *token;
+
+ if (read_expected(EVENT_OP, "->") < 0)
+ goto out_err;
+
+ if (read_expect_type(EVENT_ITEM, &token) < 0)
+ goto out_free;
+ field = token;
+
+ arg->type = PRINT_FIELD;
+ arg->field.name = field;
+
+ if (is_flag_field) {
+ arg->field.field = pevent_find_any_field(event, arg->field.name);
+ arg->field.field->flags |= FIELD_IS_FLAG;
+ is_flag_field = 0;
+ } else if (is_symbolic_field) {
+ arg->field.field = pevent_find_any_field(event, arg->field.name);
+ arg->field.field->flags |= FIELD_IS_SYMBOLIC;
+ is_symbolic_field = 0;
+ }
+
+ type = read_token(&token);
+ *tok = token;
+
+ return type;
+
+ out_free:
+ free_token(token);
+ out_err:
+ *tok = NULL;
+ return EVENT_ERROR;
+}
+
+static char *arg_eval (struct print_arg *arg);
+
+static unsigned long long
+eval_type_str(unsigned long long val, const char *type, int pointer)
+{
+ int sign = 0;
+ char *ref;
+ int len;
+
+ len = strlen(type);
+
+ if (pointer) {
+
+ if (type[len-1] != '*') {
+ do_warning("pointer expected with non pointer type");
+ return val;
+ }
+
+ ref = malloc(len);
+ if (!ref) {
+ do_warning("%s: not enough memory!", __func__);
+ return val;
+ }
+ memcpy(ref, type, len);
+
+ /* chop off the " *" */
+ ref[len - 2] = 0;
+
+ val = eval_type_str(val, ref, 0);
+ free(ref);
+ return val;
+ }
+
+ /* check if this is a pointer */
+ if (type[len - 1] == '*')
+ return val;
+
+ /* Try to figure out the arg size*/
+ if (strncmp(type, "struct", 6) == 0)
+ /* all bets off */
+ return val;
+
+ if (strcmp(type, "u8") == 0)
+ return val & 0xff;
+
+ if (strcmp(type, "u16") == 0)
+ return val & 0xffff;
+
+ if (strcmp(type, "u32") == 0)
+ return val & 0xffffffff;
+
+ if (strcmp(type, "u64") == 0 ||
+ strcmp(type, "s64"))
+ return val;
+
+ if (strcmp(type, "s8") == 0)
+ return (unsigned long long)(char)val & 0xff;
+
+ if (strcmp(type, "s16") == 0)
+ return (unsigned long long)(short)val & 0xffff;
+
+ if (strcmp(type, "s32") == 0)
+ return (unsigned long long)(int)val & 0xffffffff;
+
+ if (strncmp(type, "unsigned ", 9) == 0) {
+ sign = 0;
+ type += 9;
+ }
+
+ if (strcmp(type, "char") == 0) {
+ if (sign)
+ return (unsigned long long)(char)val & 0xff;
+ else
+ return val & 0xff;
+ }
+
+ if (strcmp(type, "short") == 0) {
+ if (sign)
+ return (unsigned long long)(short)val & 0xffff;
+ else
+ return val & 0xffff;
+ }
+
+ if (strcmp(type, "int") == 0) {
+ if (sign)
+ return (unsigned long long)(int)val & 0xffffffff;
+ else
+ return val & 0xffffffff;
+ }
+
+ return val;
+}
+
+/*
+ * Try to figure out the type.
+ */
+static unsigned long long
+eval_type(unsigned long long val, struct print_arg *arg, int pointer)
+{
+ if (arg->type != PRINT_TYPE) {
+ do_warning("expected type argument");
+ return 0;
+ }
+
+ return eval_type_str(val, arg->typecast.type, pointer);
+}
+
+static int arg_num_eval(struct print_arg *arg, long long *val)
+{
+ long long left, right;
+ int ret = 1;
+
+ switch (arg->type) {
+ case PRINT_ATOM:
+ *val = strtoll(arg->atom.atom, NULL, 0);
+ break;
+ case PRINT_TYPE:
+ ret = arg_num_eval(arg->typecast.item, val);
+ if (!ret)
+ break;
+ *val = eval_type(*val, arg, 0);
+ break;
+ case PRINT_OP:
+ switch (arg->op.op[0]) {
+ case '|':
+ ret = arg_num_eval(arg->op.left, &left);
+ if (!ret)
+ break;
+ ret = arg_num_eval(arg->op.right, &right);
+ if (!ret)
+ break;
+ if (arg->op.op[1])
+ *val = left || right;
+ else
+ *val = left | right;
+ break;
+ case '&':
+ ret = arg_num_eval(arg->op.left, &left);
+ if (!ret)
+ break;
+ ret = arg_num_eval(arg->op.right, &right);
+ if (!ret)
+ break;
+ if (arg->op.op[1])
+ *val = left && right;
+ else
+ *val = left & right;
+ break;
+ case '<':
+ ret = arg_num_eval(arg->op.left, &left);
+ if (!ret)
+ break;
+ ret = arg_num_eval(arg->op.right, &right);
+ if (!ret)
+ break;
+ switch (arg->op.op[1]) {
+ case 0:
+ *val = left < right;
+ break;
+ case '<':
+ *val = left << right;
+ break;
+ case '=':
+ *val = left <= right;
+ break;
+ default:
+ do_warning("unknown op '%s'", arg->op.op);
+ ret = 0;
+ }
+ break;
+ case '>':
+ ret = arg_num_eval(arg->op.left, &left);
+ if (!ret)
+ break;
+ ret = arg_num_eval(arg->op.right, &right);
+ if (!ret)
+ break;
+ switch (arg->op.op[1]) {
+ case 0:
+ *val = left > right;
+ break;
+ case '>':
+ *val = left >> right;
+ break;
+ case '=':
+ *val = left >= right;
+ break;
+ default:
+ do_warning("unknown op '%s'", arg->op.op);
+ ret = 0;
+ }
+ break;
+ case '=':
+ ret = arg_num_eval(arg->op.left, &left);
+ if (!ret)
+ break;
+ ret = arg_num_eval(arg->op.right, &right);
+ if (!ret)
+ break;
+
+ if (arg->op.op[1] != '=') {
+ do_warning("unknown op '%s'", arg->op.op);
+ ret = 0;
+ } else
+ *val = left == right;
+ break;
+ case '!':
+ ret = arg_num_eval(arg->op.left, &left);
+ if (!ret)
+ break;
+ ret = arg_num_eval(arg->op.right, &right);
+ if (!ret)
+ break;
+
+ switch (arg->op.op[1]) {
+ case '=':
+ *val = left != right;
+ break;
+ default:
+ do_warning("unknown op '%s'", arg->op.op);
+ ret = 0;
+ }
+ break;
+ case '-':
+ /* check for negative */
+ if (arg->op.left->type == PRINT_NULL)
+ left = 0;
+ else
+ ret = arg_num_eval(arg->op.left, &left);
+ if (!ret)
+ break;
+ ret = arg_num_eval(arg->op.right, &right);
+ if (!ret)
+ break;
+ *val = left - right;
+ break;
+ case '+':
+ if (arg->op.left->type == PRINT_NULL)
+ left = 0;
+ else
+ ret = arg_num_eval(arg->op.left, &left);
+ if (!ret)
+ break;
+ ret = arg_num_eval(arg->op.right, &right);
+ if (!ret)
+ break;
+ *val = left + right;
+ break;
+ default:
+ do_warning("unknown op '%s'", arg->op.op);
+ ret = 0;
+ }
+ break;
+
+ case PRINT_NULL:
+ case PRINT_FIELD ... PRINT_SYMBOL:
+ case PRINT_STRING:
+ case PRINT_BSTRING:
+ case PRINT_BITMASK:
+ default:
+ do_warning("invalid eval type %d", arg->type);
+ ret = 0;
+
+ }
+ return ret;
+}
+
+static char *arg_eval (struct print_arg *arg)
+{
+ long long val;
+ static char buf[20];
+
+ switch (arg->type) {
+ case PRINT_ATOM:
+ return arg->atom.atom;
+ case PRINT_TYPE:
+ return arg_eval(arg->typecast.item);
+ case PRINT_OP:
+ if (!arg_num_eval(arg, &val))
+ break;
+ sprintf(buf, "%lld", val);
+ return buf;
+
+ case PRINT_NULL:
+ case PRINT_FIELD ... PRINT_SYMBOL:
+ case PRINT_STRING:
+ case PRINT_BSTRING:
+ case PRINT_BITMASK:
+ default:
+ do_warning("invalid eval type %d", arg->type);
+ break;
+ }
+
+ return NULL;
+}
+
+static enum event_type
+process_fields(struct event_format *event, struct print_flag_sym **list, char **tok)
+{
+ enum event_type type;
+ struct print_arg *arg = NULL;
+ struct print_flag_sym *field;
+ char *token = *tok;
+ char *value;
+
+ do {
+ free_token(token);
+ type = read_token_item(&token);
+ if (test_type_token(type, token, EVENT_OP, "{"))
+ break;
+
+ arg = alloc_arg();
+ if (!arg)
+ goto out_free;
+
+ free_token(token);
+ type = process_arg(event, arg, &token);
+
+ if (type == EVENT_OP)
+ type = process_op(event, arg, &token);
+
+ if (type == EVENT_ERROR)
+ goto out_free;
+
+ if (test_type_token(type, token, EVENT_DELIM, ","))
+ goto out_free;
+
+ field = calloc(1, sizeof(*field));
+ if (!field)
+ goto out_free;
+
+ value = arg_eval(arg);
+ if (value == NULL)
+ goto out_free_field;
+ field->value = strdup(value);
+ if (field->value == NULL)
+ goto out_free_field;
+
+ free_arg(arg);
+ arg = alloc_arg();
+ if (!arg)
+ goto out_free;
+
+ free_token(token);
+ type = process_arg(event, arg, &token);
+ if (test_type_token(type, token, EVENT_OP, "}"))
+ goto out_free_field;
+
+ value = arg_eval(arg);
+ if (value == NULL)
+ goto out_free_field;
+ field->str = strdup(value);
+ if (field->str == NULL)
+ goto out_free_field;
+ free_arg(arg);
+ arg = NULL;
+
+ *list = field;
+ list = &field->next;
+
+ free_token(token);
+ type = read_token_item(&token);
+ } while (type == EVENT_DELIM && strcmp(token, ",") == 0);
+
+ *tok = token;
+ return type;
+
+out_free_field:
+ free_flag_sym(field);
+out_free:
+ free_arg(arg);
+ free_token(token);
+ *tok = NULL;
+
+ return EVENT_ERROR;
+}
+
+static enum event_type
+process_flags(struct event_format *event, struct print_arg *arg, char **tok)
+{
+ struct print_arg *field;
+ enum event_type type;
+ char *token;
+
+ memset(arg, 0, sizeof(*arg));
+ arg->type = PRINT_FLAGS;
+
+ field = alloc_arg();
+ if (!field) {
+ do_warning_event(event, "%s: not enough memory!", __func__);
+ goto out_free;
+ }
+
+ type = process_field_arg(event, field, &token);
+
+ /* Handle operations in the first argument */
+ while (type == EVENT_OP)
+ type = process_op(event, field, &token);
+
+ if (test_type_token(type, token, EVENT_DELIM, ","))
+ goto out_free_field;
+ free_token(token);
+
+ arg->flags.field = field;
+
+ type = read_token_item(&token);
+ if (event_item_type(type)) {
+ arg->flags.delim = token;
+ type = read_token_item(&token);
+ }
+
+ if (test_type_token(type, token, EVENT_DELIM, ","))
+ goto out_free;
+
+ type = process_fields(event, &arg->flags.flags, &token);
+ if (test_type_token(type, token, EVENT_DELIM, ")"))
+ goto out_free;
+
+ free_token(token);
+ type = read_token_item(tok);
+ return type;
+
+out_free_field:
+ free_arg(field);
+out_free:
+ free_token(token);
+ *tok = NULL;
+ return EVENT_ERROR;
+}
+
+static enum event_type
+process_symbols(struct event_format *event, struct print_arg *arg, char **tok)
+{
+ struct print_arg *field;
+ enum event_type type;
+ char *token;
+
+ memset(arg, 0, sizeof(*arg));
+ arg->type = PRINT_SYMBOL;
+
+ field = alloc_arg();
+ if (!field) {
+ do_warning_event(event, "%s: not enough memory!", __func__);
+ goto out_free;
+ }
+
+ type = process_field_arg(event, field, &token);
+
+ if (test_type_token(type, token, EVENT_DELIM, ","))
+ goto out_free_field;
+
+ arg->symbol.field = field;
+
+ type = process_fields(event, &arg->symbol.symbols, &token);
+ if (test_type_token(type, token, EVENT_DELIM, ")"))
+ goto out_free;
+
+ free_token(token);
+ type = read_token_item(tok);
+ return type;
+
+out_free_field:
+ free_arg(field);
+out_free:
+ free_token(token);
+ *tok = NULL;
+ return EVENT_ERROR;
+}
+
+static enum event_type
+process_hex(struct event_format *event, struct print_arg *arg, char **tok)
+{
+ struct print_arg *field;
+ enum event_type type;
+ char *token;
+
+ memset(arg, 0, sizeof(*arg));
+ arg->type = PRINT_HEX;
+
+ field = alloc_arg();
+ if (!field) {
+ do_warning_event(event, "%s: not enough memory!", __func__);
+ goto out_free;
+ }
+
+ type = process_arg(event, field, &token);
+
+ if (test_type_token(type, token, EVENT_DELIM, ","))
+ goto out_free;
+
+ arg->hex.field = field;
+
+ free_token(token);
+
+ field = alloc_arg();
+ if (!field) {
+ do_warning_event(event, "%s: not enough memory!", __func__);
+ *tok = NULL;
+ return EVENT_ERROR;
+ }
+
+ type = process_arg(event, field, &token);
+
+ if (test_type_token(type, token, EVENT_DELIM, ")"))
+ goto out_free;
+
+ arg->hex.size = field;
+
+ free_token(token);
+ type = read_token_item(tok);
+ return type;
+
+ out_free:
+ free_arg(field);
+ free_token(token);
+ *tok = NULL;
+ return EVENT_ERROR;
+}
+
+static enum event_type
+process_dynamic_array(struct event_format *event, struct print_arg *arg, char **tok)
+{
+ struct format_field *field;
+ enum event_type type;
+ char *token;
+
+ memset(arg, 0, sizeof(*arg));
+ arg->type = PRINT_DYNAMIC_ARRAY;
+
+ /*
+ * The item within the parenthesis is another field that holds
+ * the index into where the array starts.
+ */
+ type = read_token(&token);
+ *tok = token;
+ if (type != EVENT_ITEM)
+ goto out_free;
+
+ /* Find the field */
+
+ field = pevent_find_field(event, token);
+ if (!field)
+ goto out_free;
+
+ arg->dynarray.field = field;
+ arg->dynarray.index = 0;
+
+ if (read_expected(EVENT_DELIM, ")") < 0)
+ goto out_free;
+
+ free_token(token);
+ type = read_token_item(&token);
+ *tok = token;
+ if (type != EVENT_OP || strcmp(token, "[") != 0)
+ return type;
+
+ free_token(token);
+ arg = alloc_arg();
+ if (!arg) {
+ do_warning_event(event, "%s: not enough memory!", __func__);
+ *tok = NULL;
+ return EVENT_ERROR;
+ }
+
+ type = process_arg(event, arg, &token);
+ if (type == EVENT_ERROR)
+ goto out_free_arg;
+
+ if (!test_type_token(type, token, EVENT_OP, "]"))
+ goto out_free_arg;
+
+ free_token(token);
+ type = read_token_item(tok);
+ return type;
+
+ out_free_arg:
+ free_arg(arg);
+ out_free:
+ free_token(token);
+ *tok = NULL;
+ return EVENT_ERROR;
+}
+
+static enum event_type
+process_paren(struct event_format *event, struct print_arg *arg, char **tok)
+{
+ struct print_arg *item_arg;
+ enum event_type type;
+ char *token;
+
+ type = process_arg(event, arg, &token);
+
+ if (type == EVENT_ERROR)
+ goto out_free;
+
+ if (type == EVENT_OP)
+ type = process_op(event, arg, &token);
+
+ if (type == EVENT_ERROR)
+ goto out_free;
+
+ if (test_type_token(type, token, EVENT_DELIM, ")"))
+ goto out_free;
+
+ free_token(token);
+ type = read_token_item(&token);
+
+ /*
+ * If the next token is an item or another open paren, then
+ * this was a typecast.
+ */
+ if (event_item_type(type) ||
+ (type == EVENT_DELIM && strcmp(token, "(") == 0)) {
+
+ /* make this a typecast and contine */
+
+ /* prevous must be an atom */
+ if (arg->type != PRINT_ATOM) {
+ do_warning_event(event, "previous needed to be PRINT_ATOM");
+ goto out_free;
+ }
+
+ item_arg = alloc_arg();
+ if (!item_arg) {
+ do_warning_event(event, "%s: not enough memory!",
+ __func__);
+ goto out_free;
+ }
+
+ arg->type = PRINT_TYPE;
+ arg->typecast.type = arg->atom.atom;
+ arg->typecast.item = item_arg;
+ type = process_arg_token(event, item_arg, &token, type);
+
+ }
+
+ *tok = token;
+ return type;
+
+ out_free:
+ free_token(token);
+ *tok = NULL;
+ return EVENT_ERROR;
+}
+
+
+static enum event_type
+process_str(struct event_format *event __maybe_unused, struct print_arg *arg,
+ char **tok)
+{
+ enum event_type type;
+ char *token;
+
+ if (read_expect_type(EVENT_ITEM, &token) < 0)
+ goto out_free;
+
+ arg->type = PRINT_STRING;
+ arg->string.string = token;
+ arg->string.offset = -1;
+
+ if (read_expected(EVENT_DELIM, ")") < 0)
+ goto out_err;
+
+ type = read_token(&token);
+ *tok = token;
+
+ return type;
+
+ out_free:
+ free_token(token);
+ out_err:
+ *tok = NULL;
+ return EVENT_ERROR;
+}
+
+static enum event_type
+process_bitmask(struct event_format *event __maybe_unused, struct print_arg *arg,
+ char **tok)
+{
+ enum event_type type;
+ char *token;
+
+ if (read_expect_type(EVENT_ITEM, &token) < 0)
+ goto out_free;
+
+ arg->type = PRINT_BITMASK;
+ arg->bitmask.bitmask = token;
+ arg->bitmask.offset = -1;
+
+ if (read_expected(EVENT_DELIM, ")") < 0)
+ goto out_err;
+
+ type = read_token(&token);
+ *tok = token;
+
+ return type;
+
+ out_free:
+ free_token(token);
+ out_err:
+ *tok = NULL;
+ return EVENT_ERROR;
+}
+
+static struct pevent_function_handler *
+find_func_handler(struct pevent *pevent, char *func_name)
+{
+ struct pevent_function_handler *func;
+
+ if (!pevent)
+ return NULL;
+
+ for (func = pevent->func_handlers; func; func = func->next) {
+ if (strcmp(func->name, func_name) == 0)
+ break;
+ }
+
+ return func;
+}
+
+static void remove_func_handler(struct pevent *pevent, char *func_name)
+{
+ struct pevent_function_handler *func;
+ struct pevent_function_handler **next;
+
+ next = &pevent->func_handlers;
+ while ((func = *next)) {
+ if (strcmp(func->name, func_name) == 0) {
+ *next = func->next;
+ free_func_handle(func);
+ break;
+ }
+ next = &func->next;
+ }
+}
+
+static enum event_type
+process_func_handler(struct event_format *event, struct pevent_function_handler *func,
+ struct print_arg *arg, char **tok)
+{
+ struct print_arg **next_arg;
+ struct print_arg *farg;
+ enum event_type type;
+ char *token;
+ int i;
+
+ arg->type = PRINT_FUNC;
+ arg->func.func = func;
+
+ *tok = NULL;
+
+ next_arg = &(arg->func.args);
+ for (i = 0; i < func->nr_args; i++) {
+ farg = alloc_arg();
+ if (!farg) {
+ do_warning_event(event, "%s: not enough memory!",
+ __func__);
+ return EVENT_ERROR;
+ }
+
+ type = process_arg(event, farg, &token);
+ if (i < (func->nr_args - 1)) {
+ if (type != EVENT_DELIM || strcmp(token, ",") != 0) {
+ do_warning_event(event,
+ "Error: function '%s()' expects %d arguments but event %s only uses %d",
+ func->name, func->nr_args,
+ event->name, i + 1);
+ goto err;
+ }
+ } else {
+ if (type != EVENT_DELIM || strcmp(token, ")") != 0) {
+ do_warning_event(event,
+ "Error: function '%s()' only expects %d arguments but event %s has more",
+ func->name, func->nr_args, event->name);
+ goto err;
+ }
+ }
+
+ *next_arg = farg;
+ next_arg = &(farg->next);
+ free_token(token);
+ }
+
+ type = read_token(&token);
+ *tok = token;
+
+ return type;
+
+err:
+ free_arg(farg);
+ free_token(token);
+ return EVENT_ERROR;
+}
+
+static enum event_type
+process_function(struct event_format *event, struct print_arg *arg,
+ char *token, char **tok)
+{
+ struct pevent_function_handler *func;
+
+ if (strcmp(token, "__print_flags") == 0) {
+ free_token(token);
+ is_flag_field = 1;
+ return process_flags(event, arg, tok);
+ }
+ if (strcmp(token, "__print_symbolic") == 0) {
+ free_token(token);
+ is_symbolic_field = 1;
+ return process_symbols(event, arg, tok);
+ }
+ if (strcmp(token, "__print_hex") == 0) {
+ free_token(token);
+ return process_hex(event, arg, tok);
+ }
+ if (strcmp(token, "__get_str") == 0) {
+ free_token(token);
+ return process_str(event, arg, tok);
+ }
+ if (strcmp(token, "__get_bitmask") == 0) {
+ free_token(token);
+ return process_bitmask(event, arg, tok);
+ }
+ if (strcmp(token, "__get_dynamic_array") == 0) {
+ free_token(token);
+ return process_dynamic_array(event, arg, tok);
+ }
+
+ func = find_func_handler(event->pevent, token);
+ if (func) {
+ free_token(token);
+ return process_func_handler(event, func, arg, tok);
+ }
+
+ do_warning_event(event, "function %s not defined", token);
+ free_token(token);
+ return EVENT_ERROR;
+}
+
+static enum event_type
+process_arg_token(struct event_format *event, struct print_arg *arg,
+ char **tok, enum event_type type)
+{
+ char *token;
+ char *atom;
+
+ token = *tok;
+
+ switch (type) {
+ case EVENT_ITEM:
+ if (strcmp(token, "REC") == 0) {
+ free_token(token);
+ type = process_entry(event, arg, &token);
+ break;
+ }
+ atom = token;
+ /* test the next token */
+ type = read_token_item(&token);
+
+ /*
+ * If the next token is a parenthesis, then this
+ * is a function.
+ */
+ if (type == EVENT_DELIM && strcmp(token, "(") == 0) {
+ free_token(token);
+ token = NULL;
+ /* this will free atom. */
+ type = process_function(event, arg, atom, &token);
+ break;
+ }
+ /* atoms can be more than one token long */
+ while (type == EVENT_ITEM) {
+ char *new_atom;
+ new_atom = realloc(atom,
+ strlen(atom) + strlen(token) + 2);
+ if (!new_atom) {
+ free(atom);
+ *tok = NULL;
+ free_token(token);
+ return EVENT_ERROR;
+ }
+ atom = new_atom;
+ strcat(atom, " ");
+ strcat(atom, token);
+ free_token(token);
+ type = read_token_item(&token);
+ }
+
+ arg->type = PRINT_ATOM;
+ arg->atom.atom = atom;
+ break;
+
+ case EVENT_DQUOTE:
+ case EVENT_SQUOTE:
+ arg->type = PRINT_ATOM;
+ arg->atom.atom = token;
+ type = read_token_item(&token);
+ break;
+ case EVENT_DELIM:
+ if (strcmp(token, "(") == 0) {
+ free_token(token);
+ type = process_paren(event, arg, &token);
+ break;
+ }
+ case EVENT_OP:
+ /* handle single ops */
+ arg->type = PRINT_OP;
+ arg->op.op = token;
+ arg->op.left = NULL;
+ type = process_op(event, arg, &token);
+
+ /* On error, the op is freed */
+ if (type == EVENT_ERROR)
+ arg->op.op = NULL;
+
+ /* return error type if errored */
+ break;
+
+ case EVENT_ERROR ... EVENT_NEWLINE:
+ default:
+ do_warning_event(event, "unexpected type %d", type);
+ return EVENT_ERROR;
+ }
+ *tok = token;
+
+ return type;
+}
+
+static int event_read_print_args(struct event_format *event, struct print_arg **list)
+{
+ enum event_type type = EVENT_ERROR;
+ struct print_arg *arg;
+ char *token;
+ int args = 0;
+
+ do {
+ if (type == EVENT_NEWLINE) {
+ type = read_token_item(&token);
+ continue;
+ }
+
+ arg = alloc_arg();
+ if (!arg) {
+ do_warning_event(event, "%s: not enough memory!",
+ __func__);
+ return -1;
+ }
+
+ type = process_arg(event, arg, &token);
+
+ if (type == EVENT_ERROR) {
+ free_token(token);
+ free_arg(arg);
+ return -1;
+ }
+
+ *list = arg;
+ args++;
+
+ if (type == EVENT_OP) {
+ type = process_op(event, arg, &token);
+ free_token(token);
+ if (type == EVENT_ERROR) {
+ *list = NULL;
+ free_arg(arg);
+ return -1;
+ }
+ list = &arg->next;
+ continue;
+ }
+
+ if (type == EVENT_DELIM && strcmp(token, ",") == 0) {
+ free_token(token);
+ *list = arg;
+ list = &arg->next;
+ continue;
+ }
+ break;
+ } while (type != EVENT_NONE);
+
+ if (type != EVENT_NONE && type != EVENT_ERROR)
+ free_token(token);
+
+ return args;
+}
+
+static int event_read_print(struct event_format *event)
+{
+ enum event_type type;
+ char *token;
+ int ret;
+
+ if (read_expected_item(EVENT_ITEM, "print") < 0)
+ return -1;
+
+ if (read_expected(EVENT_ITEM, "fmt") < 0)
+ return -1;
+
+ if (read_expected(EVENT_OP, ":") < 0)
+ return -1;
+
+ if (read_expect_type(EVENT_DQUOTE, &token) < 0)
+ goto fail;
+
+ concat:
+ event->print_fmt.format = token;
+ event->print_fmt.args = NULL;
+
+ /* ok to have no arg */
+ type = read_token_item(&token);
+
+ if (type == EVENT_NONE)
+ return 0;
+
+ /* Handle concatenation of print lines */
+ if (type == EVENT_DQUOTE) {
+ char *cat;
+
+ if (asprintf(&cat, "%s%s", event->print_fmt.format, token) < 0)
+ goto fail;
+ free_token(token);
+ free_token(event->print_fmt.format);
+ event->print_fmt.format = NULL;
+ token = cat;
+ goto concat;
+ }
+
+ if (test_type_token(type, token, EVENT_DELIM, ","))
+ goto fail;
+
+ free_token(token);
+
+ ret = event_read_print_args(event, &event->print_fmt.args);
+ if (ret < 0)
+ return -1;
+
+ return ret;
+
+ fail:
+ free_token(token);
+ return -1;
+}
+
+/**
+ * pevent_find_common_field - return a common field by event
+ * @event: handle for the event
+ * @name: the name of the common field to return
+ *
+ * Returns a common field from the event by the given @name.
+ * This only searchs the common fields and not all field.
+ */
+struct format_field *
+pevent_find_common_field(struct event_format *event, const char *name)
+{
+ struct format_field *format;
+
+ for (format = event->format.common_fields;
+ format; format = format->next) {
+ if (strcmp(format->name, name) == 0)
+ break;
+ }
+
+ return format;
+}
+
+/**
+ * pevent_find_field - find a non-common field
+ * @event: handle for the event
+ * @name: the name of the non-common field
+ *
+ * Returns a non-common field by the given @name.
+ * This does not search common fields.
+ */
+struct format_field *
+pevent_find_field(struct event_format *event, const char *name)
+{
+ struct format_field *format;
+
+ for (format = event->format.fields;
+ format; format = format->next) {
+ if (strcmp(format->name, name) == 0)
+ break;
+ }
+
+ return format;
+}
+
+/**
+ * pevent_find_any_field - find any field by name
+ * @event: handle for the event
+ * @name: the name of the field
+ *
+ * Returns a field by the given @name.
+ * This searchs the common field names first, then
+ * the non-common ones if a common one was not found.
+ */
+struct format_field *
+pevent_find_any_field(struct event_format *event, const char *name)
+{
+ struct format_field *format;
+
+ format = pevent_find_common_field(event, name);
+ if (format)
+ return format;
+ return pevent_find_field(event, name);
+}
+
+/**
+ * pevent_read_number - read a number from data
+ * @pevent: handle for the pevent
+ * @ptr: the raw data
+ * @size: the size of the data that holds the number
+ *
+ * Returns the number (converted to host) from the
+ * raw data.
+ */
+unsigned long long pevent_read_number(struct pevent *pevent,
+ const void *ptr, int size)
+{
+ switch (size) {
+ case 1:
+ return *(unsigned char *)ptr;
+ case 2:
+ return data2host2(pevent, ptr);
+ case 4:
+ return data2host4(pevent, ptr);
+ case 8:
+ return data2host8(pevent, ptr);
+ default:
+ /* BUG! */
+ return 0;
+ }
+}
+
+/**
+ * pevent_read_number_field - read a number from data
+ * @field: a handle to the field
+ * @data: the raw data to read
+ * @value: the value to place the number in
+ *
+ * Reads raw data according to a field offset and size,
+ * and translates it into @value.
+ *
+ * Returns 0 on success, -1 otherwise.
+ */
+int pevent_read_number_field(struct format_field *field, const void *data,
+ unsigned long long *value)
+{
+ if (!field)
+ return -1;
+ switch (field->size) {
+ case 1:
+ case 2:
+ case 4:
+ case 8:
+ *value = pevent_read_number(field->event->pevent,
+ data + field->offset, field->size);
+ return 0;
+ default:
+ return -1;
+ }
+}
+
+static int get_common_info(struct pevent *pevent,
+ const char *type, int *offset, int *size)
+{
+ struct event_format *event;
+ struct format_field *field;
+
+ /*
+ * All events should have the same common elements.
+ * Pick any event to find where the type is;
+ */
+ if (!pevent->events) {
+ do_warning("no event_list!");
+ return -1;
+ }
+
+ event = pevent->events[0];
+ field = pevent_find_common_field(event, type);
+ if (!field)
+ return -1;
+
+ *offset = field->offset;
+ *size = field->size;
+
+ return 0;
+}
+
+static int __parse_common(struct pevent *pevent, void *data,
+ int *size, int *offset, const char *name)
+{
+ int ret;
+
+ if (!*size) {
+ ret = get_common_info(pevent, name, offset, size);
+ if (ret < 0)
+ return ret;
+ }
+ return pevent_read_number(pevent, data + *offset, *size);
+}
+
+static int trace_parse_common_type(struct pevent *pevent, void *data)
+{
+ return __parse_common(pevent, data,
+ &pevent->type_size, &pevent->type_offset,
+ "common_type");
+}
+
+static int parse_common_pid(struct pevent *pevent, void *data)
+{
+ return __parse_common(pevent, data,
+ &pevent->pid_size, &pevent->pid_offset,
+ "common_pid");
+}
+
+static int parse_common_pc(struct pevent *pevent, void *data)
+{
+ return __parse_common(pevent, data,
+ &pevent->pc_size, &pevent->pc_offset,
+ "common_preempt_count");
+}
+
+static int parse_common_flags(struct pevent *pevent, void *data)
+{
+ return __parse_common(pevent, data,
+ &pevent->flags_size, &pevent->flags_offset,
+ "common_flags");
+}
+
+static int parse_common_lock_depth(struct pevent *pevent, void *data)
+{
+ return __parse_common(pevent, data,
+ &pevent->ld_size, &pevent->ld_offset,
+ "common_lock_depth");
+}
+
+static int parse_common_migrate_disable(struct pevent *pevent, void *data)
+{
+ return __parse_common(pevent, data,
+ &pevent->ld_size, &pevent->ld_offset,
+ "common_migrate_disable");
+}
+
+static int events_id_cmp(const void *a, const void *b);
+
+/**
+ * pevent_find_event - find an event by given id
+ * @pevent: a handle to the pevent
+ * @id: the id of the event
+ *
+ * Returns an event that has a given @id.
+ */
+struct event_format *pevent_find_event(struct pevent *pevent, int id)
+{
+ struct event_format **eventptr;
+ struct event_format key;
+ struct event_format *pkey = &key;
+
+ /* Check cache first */
+ if (pevent->last_event && pevent->last_event->id == id)
+ return pevent->last_event;
+
+ key.id = id;
+
+ eventptr = bsearch(&pkey, pevent->events, pevent->nr_events,
+ sizeof(*pevent->events), events_id_cmp);
+
+ if (eventptr) {
+ pevent->last_event = *eventptr;
+ return *eventptr;
+ }
+
+ return NULL;
+}
+
+/**
+ * pevent_find_event_by_name - find an event by given name
+ * @pevent: a handle to the pevent
+ * @sys: the system name to search for
+ * @name: the name of the event to search for
+ *
+ * This returns an event with a given @name and under the system
+ * @sys. If @sys is NULL the first event with @name is returned.
+ */
+struct event_format *
+pevent_find_event_by_name(struct pevent *pevent,
+ const char *sys, const char *name)
+{
+ struct event_format *event;
+ int i;
+
+ if (pevent->last_event &&
+ strcmp(pevent->last_event->name, name) == 0 &&
+ (!sys || strcmp(pevent->last_event->system, sys) == 0))
+ return pevent->last_event;
+
+ for (i = 0; i < pevent->nr_events; i++) {
+ event = pevent->events[i];
+ if (strcmp(event->name, name) == 0) {
+ if (!sys)
+ break;
+ if (strcmp(event->system, sys) == 0)
+ break;
+ }
+ }
+ if (i == pevent->nr_events)
+ event = NULL;
+
+ pevent->last_event = event;
+ return event;
+}
+
+static unsigned long long
+eval_num_arg(void *data, int size, struct event_format *event, struct print_arg *arg)
+{
+ struct pevent *pevent = event->pevent;
+ unsigned long long val = 0;
+ unsigned long long left, right;
+ struct print_arg *typearg = NULL;
+ struct print_arg *larg;
+ unsigned long offset;
+ unsigned int field_size;
+
+ switch (arg->type) {
+ case PRINT_NULL:
+ /* ?? */
+ return 0;
+ case PRINT_ATOM:
+ return strtoull(arg->atom.atom, NULL, 0);
+ case PRINT_FIELD:
+ if (!arg->field.field) {
+ arg->field.field = pevent_find_any_field(event, arg->field.name);
+ if (!arg->field.field)
+ goto out_warning_field;
+
+ }
+ /* must be a number */
+ val = pevent_read_number(pevent, data + arg->field.field->offset,
+ arg->field.field->size);
+ break;
+ case PRINT_FLAGS:
+ case PRINT_SYMBOL:
+ case PRINT_HEX:
+ break;
+ case PRINT_TYPE:
+ val = eval_num_arg(data, size, event, arg->typecast.item);
+ return eval_type(val, arg, 0);
+ case PRINT_STRING:
+ case PRINT_BSTRING:
+ case PRINT_BITMASK:
+ return 0;
+ case PRINT_FUNC: {
+ struct trace_seq s;
+ trace_seq_init(&s);
+ val = process_defined_func(&s, data, size, event, arg);
+ trace_seq_destroy(&s);
+ return val;
+ }
+ case PRINT_OP:
+ if (strcmp(arg->op.op, "[") == 0) {
+ /*
+ * Arrays are special, since we don't want
+ * to read the arg as is.
+ */
+ right = eval_num_arg(data, size, event, arg->op.right);
+
+ /* handle typecasts */
+ larg = arg->op.left;
+ while (larg->type == PRINT_TYPE) {
+ if (!typearg)
+ typearg = larg;
+ larg = larg->typecast.item;
+ }
+
+ /* Default to long size */
+ field_size = pevent->long_size;
+
+ switch (larg->type) {
+ case PRINT_DYNAMIC_ARRAY:
+ offset = pevent_read_number(pevent,
+ data + larg->dynarray.field->offset,
+ larg->dynarray.field->size);
+ if (larg->dynarray.field->elementsize)
+ field_size = larg->dynarray.field->elementsize;
+ /*
+ * The actual length of the dynamic array is stored
+ * in the top half of the field, and the offset
+ * is in the bottom half of the 32 bit field.
+ */
+ offset &= 0xffff;
+ offset += right;
+ break;
+ case PRINT_FIELD:
+ if (!larg->field.field) {
+ larg->field.field =
+ pevent_find_any_field(event, larg->field.name);
+ if (!larg->field.field) {
+ arg = larg;
+ goto out_warning_field;
+ }
+ }
+ field_size = larg->field.field->elementsize;
+ offset = larg->field.field->offset +
+ right * larg->field.field->elementsize;
+ break;
+ default:
+ goto default_op; /* oops, all bets off */
+ }
+ val = pevent_read_number(pevent,
+ data + offset, field_size);
+ if (typearg)
+ val = eval_type(val, typearg, 1);
+ break;
+ } else if (strcmp(arg->op.op, "?") == 0) {
+ left = eval_num_arg(data, size, event, arg->op.left);
+ arg = arg->op.right;
+ if (left)
+ val = eval_num_arg(data, size, event, arg->op.left);
+ else
+ val = eval_num_arg(data, size, event, arg->op.right);
+ break;
+ }
+ default_op:
+ left = eval_num_arg(data, size, event, arg->op.left);
+ right = eval_num_arg(data, size, event, arg->op.right);
+ switch (arg->op.op[0]) {
+ case '!':
+ switch (arg->op.op[1]) {
+ case 0:
+ val = !right;
+ break;
+ case '=':
+ val = left != right;
+ break;
+ default:
+ goto out_warning_op;
+ }
+ break;
+ case '~':
+ val = ~right;
+ break;
+ case '|':
+ if (arg->op.op[1])
+ val = left || right;
+ else
+ val = left | right;
+ break;
+ case '&':
+ if (arg->op.op[1])
+ val = left && right;
+ else
+ val = left & right;
+ break;
+ case '<':
+ switch (arg->op.op[1]) {
+ case 0:
+ val = left < right;
+ break;
+ case '<':
+ val = left << right;
+ break;
+ case '=':
+ val = left <= right;
+ break;
+ default:
+ goto out_warning_op;
+ }
+ break;
+ case '>':
+ switch (arg->op.op[1]) {
+ case 0:
+ val = left > right;
+ break;
+ case '>':
+ val = left >> right;
+ break;
+ case '=':
+ val = left >= right;
+ break;
+ default:
+ goto out_warning_op;
+ }
+ break;
+ case '=':
+ if (arg->op.op[1] != '=')
+ goto out_warning_op;
+
+ val = left == right;
+ break;
+ case '-':
+ val = left - right;
+ break;
+ case '+':
+ val = left + right;
+ break;
+ case '/':
+ val = left / right;
+ break;
+ case '*':
+ val = left * right;
+ break;
+ default:
+ goto out_warning_op;
+ }
+ break;
+ case PRINT_DYNAMIC_ARRAY:
+ /* Without [], we pass the address to the dynamic data */
+ offset = pevent_read_number(pevent,
+ data + arg->dynarray.field->offset,
+ arg->dynarray.field->size);
+ /*
+ * The actual length of the dynamic array is stored
+ * in the top half of the field, and the offset
+ * is in the bottom half of the 32 bit field.
+ */
+ offset &= 0xffff;
+ val = (unsigned long long)((unsigned long)data + offset);
+ break;
+ default: /* not sure what to do there */
+ return 0;
+ }
+ return val;
+
+out_warning_op:
+ do_warning_event(event, "%s: unknown op '%s'", __func__, arg->op.op);
+ return 0;
+
+out_warning_field:
+ do_warning_event(event, "%s: field %s not found",
+ __func__, arg->field.name);
+ return 0;
+}
+
+struct flag {
+ const char *name;
+ unsigned long long value;
+};
+
+static const struct flag flags[] = {
+ { "HI_SOFTIRQ", 0 },
+ { "TIMER_SOFTIRQ", 1 },
+ { "NET_TX_SOFTIRQ", 2 },
+ { "NET_RX_SOFTIRQ", 3 },
+ { "BLOCK_SOFTIRQ", 4 },
+ { "BLOCK_IOPOLL_SOFTIRQ", 5 },
+ { "TASKLET_SOFTIRQ", 6 },
+ { "SCHED_SOFTIRQ", 7 },
+ { "HRTIMER_SOFTIRQ", 8 },
+ { "RCU_SOFTIRQ", 9 },
+
+ { "HRTIMER_NORESTART", 0 },
+ { "HRTIMER_RESTART", 1 },
+};
+
+static unsigned long long eval_flag(const char *flag)
+{
+ int i;
+
+ /*
+ * Some flags in the format files do not get converted.
+ * If the flag is not numeric, see if it is something that
+ * we already know about.
+ */
+ if (isdigit(flag[0]))
+ return strtoull(flag, NULL, 0);
+
+ for (i = 0; i < (int)(sizeof(flags)/sizeof(flags[0])); i++)
+ if (strcmp(flags[i].name, flag) == 0)
+ return flags[i].value;
+
+ return 0;
+}
+
+static void print_str_to_seq(struct trace_seq *s, const char *format,
+ int len_arg, const char *str)
+{
+ if (len_arg >= 0)
+ trace_seq_printf(s, format, len_arg, str);
+ else
+ trace_seq_printf(s, format, str);
+}
+
+static void print_bitmask_to_seq(struct pevent *pevent,
+ struct trace_seq *s, const char *format,
+ int len_arg, const void *data, int size)
+{
+ int nr_bits = size * 8;
+ int str_size = (nr_bits + 3) / 4;
+ int len = 0;
+ char buf[3];
+ char *str;
+ int index;
+ int i;
+
+ /*
+ * The kernel likes to put in commas every 32 bits, we
+ * can do the same.
+ */
+ str_size += (nr_bits - 1) / 32;
+
+ str = malloc(str_size + 1);
+ if (!str) {
+ do_warning("%s: not enough memory!", __func__);
+ return;
+ }
+ str[str_size] = 0;
+
+ /* Start out with -2 for the two chars per byte */
+ for (i = str_size - 2; i >= 0; i -= 2) {
+ /*
+ * data points to a bit mask of size bytes.
+ * In the kernel, this is an array of long words, thus
+ * endianess is very important.
+ */
+ if (pevent->file_bigendian)
+ index = size - (len + 1);
+ else
+ index = len;
+
+ snprintf(buf, 3, "%02x", *((unsigned char *)data + index));
+ memcpy(str + i, buf, 2);
+ len++;
+ if (!(len & 3) && i > 0) {
+ i--;
+ str[i] = ',';
+ }
+ }
+
+ if (len_arg >= 0)
+ trace_seq_printf(s, format, len_arg, str);
+ else
+ trace_seq_printf(s, format, str);
+
+ free(str);
+}
+
+static void print_str_arg(struct trace_seq *s, void *data, int size,
+ struct event_format *event, const char *format,
+ int len_arg, struct print_arg *arg)
+{
+ struct pevent *pevent = event->pevent;
+ struct print_flag_sym *flag;
+ struct format_field *field;
+ struct printk_map *printk;
+ unsigned long long val, fval;
+ unsigned long addr;
+ char *str;
+ unsigned char *hex;
+ int print;
+ int i, len;
+
+ switch (arg->type) {
+ case PRINT_NULL:
+ /* ?? */
+ return;
+ case PRINT_ATOM:
+ print_str_to_seq(s, format, len_arg, arg->atom.atom);
+ return;
+ case PRINT_FIELD:
+ field = arg->field.field;
+ if (!field) {
+ field = pevent_find_any_field(event, arg->field.name);
+ if (!field) {
+ str = arg->field.name;
+ goto out_warning_field;
+ }
+ arg->field.field = field;
+ }
+ /* Zero sized fields, mean the rest of the data */
+ len = field->size ? : size - field->offset;
+
+ /*
+ * Some events pass in pointers. If this is not an array
+ * and the size is the same as long_size, assume that it
+ * is a pointer.
+ */
+ if (!(field->flags & FIELD_IS_ARRAY) &&
+ field->size == pevent->long_size) {
+ addr = *(unsigned long *)(data + field->offset);
+ /* Check if it matches a print format */
+ printk = find_printk(pevent, addr);
+ if (printk)
+ trace_seq_puts(s, printk->printk);
+ else
+ trace_seq_printf(s, "%lx", addr);
+ break;
+ }
+ str = malloc(len + 1);
+ if (!str) {
+ do_warning_event(event, "%s: not enough memory!",
+ __func__);
+ return;
+ }
+ memcpy(str, data + field->offset, len);
+ str[len] = 0;
+ print_str_to_seq(s, format, len_arg, str);
+ free(str);
+ break;
+ case PRINT_FLAGS:
+ val = eval_num_arg(data, size, event, arg->flags.field);
+ print = 0;
+ for (flag = arg->flags.flags; flag; flag = flag->next) {
+ fval = eval_flag(flag->value);
+ if (!val && !fval) {
+ print_str_to_seq(s, format, len_arg, flag->str);
+ break;
+ }
+ if (fval && (val & fval) == fval) {
+ if (print && arg->flags.delim)
+ trace_seq_puts(s, arg->flags.delim);
+ print_str_to_seq(s, format, len_arg, flag->str);
+ print = 1;
+ val &= ~fval;
+ }
+ }
+ break;
+ case PRINT_SYMBOL:
+ val = eval_num_arg(data, size, event, arg->symbol.field);
+ for (flag = arg->symbol.symbols; flag; flag = flag->next) {
+ fval = eval_flag(flag->value);
+ if (val == fval) {
+ print_str_to_seq(s, format, len_arg, flag->str);
+ break;
+ }
+ }
+ break;
+ case PRINT_HEX:
+ if (arg->hex.field->type == PRINT_DYNAMIC_ARRAY) {
+ unsigned long offset;
+ offset = pevent_read_number(pevent,
+ data + arg->hex.field->dynarray.field->offset,
+ arg->hex.field->dynarray.field->size);
+ hex = data + (offset & 0xffff);
+ } else {
+ field = arg->hex.field->field.field;
+ if (!field) {
+ str = arg->hex.field->field.name;
+ field = pevent_find_any_field(event, str);
+ if (!field)
+ goto out_warning_field;
+ arg->hex.field->field.field = field;
+ }
+ hex = data + field->offset;
+ }
+ len = eval_num_arg(data, size, event, arg->hex.size);
+ for (i = 0; i < len; i++) {
+ if (i)
+ trace_seq_putc(s, ' ');
+ trace_seq_printf(s, "%02x", hex[i]);
+ }
+ break;
+
+ case PRINT_TYPE:
+ break;
+ case PRINT_STRING: {
+ int str_offset;
+
+ if (arg->string.offset == -1) {
+ struct format_field *f;
+
+ f = pevent_find_any_field(event, arg->string.string);
+ arg->string.offset = f->offset;
+ }
+ str_offset = data2host4(pevent, data + arg->string.offset);
+ str_offset &= 0xffff;
+ print_str_to_seq(s, format, len_arg, ((char *)data) + str_offset);
+ break;
+ }
+ case PRINT_BSTRING:
+ print_str_to_seq(s, format, len_arg, arg->string.string);
+ break;
+ case PRINT_BITMASK: {
+ int bitmask_offset;
+ int bitmask_size;
+
+ if (arg->bitmask.offset == -1) {
+ struct format_field *f;
+
+ f = pevent_find_any_field(event, arg->bitmask.bitmask);
+ arg->bitmask.offset = f->offset;
+ }
+ bitmask_offset = data2host4(pevent, data + arg->bitmask.offset);
+ bitmask_size = bitmask_offset >> 16;
+ bitmask_offset &= 0xffff;
+ print_bitmask_to_seq(pevent, s, format, len_arg,
+ data + bitmask_offset, bitmask_size);
+ break;
+ }
+ case PRINT_OP:
+ /*
+ * The only op for string should be ? :
+ */
+ if (arg->op.op[0] != '?')
+ return;
+ val = eval_num_arg(data, size, event, arg->op.left);
+ if (val)
+ print_str_arg(s, data, size, event,
+ format, len_arg, arg->op.right->op.left);
+ else
+ print_str_arg(s, data, size, event,
+ format, len_arg, arg->op.right->op.right);
+ break;
+ case PRINT_FUNC:
+ process_defined_func(s, data, size, event, arg);
+ break;
+ default:
+ /* well... */
+ break;
+ }
+
+ return;
+
+out_warning_field:
+ do_warning_event(event, "%s: field %s not found",
+ __func__, arg->field.name);
+}
+
+static unsigned long long
+process_defined_func(struct trace_seq *s, void *data, int size,
+ struct event_format *event, struct print_arg *arg)
+{
+ struct pevent_function_handler *func_handle = arg->func.func;
+ struct pevent_func_params *param;
+ unsigned long long *args;
+ unsigned long long ret;
+ struct print_arg *farg;
+ struct trace_seq str;
+ struct save_str {
+ struct save_str *next;
+ char *str;
+ } *strings = NULL, *string;
+ int i;
+
+ if (!func_handle->nr_args) {
+ ret = (*func_handle->func)(s, NULL);
+ goto out;
+ }
+
+ farg = arg->func.args;
+ param = func_handle->params;
+
+ ret = ULLONG_MAX;
+ args = malloc(sizeof(*args) * func_handle->nr_args);
+ if (!args)
+ goto out;
+
+ for (i = 0; i < func_handle->nr_args; i++) {
+ switch (param->type) {
+ case PEVENT_FUNC_ARG_INT:
+ case PEVENT_FUNC_ARG_LONG:
+ case PEVENT_FUNC_ARG_PTR:
+ args[i] = eval_num_arg(data, size, event, farg);
+ break;
+ case PEVENT_FUNC_ARG_STRING:
+ trace_seq_init(&str);
+ print_str_arg(&str, data, size, event, "%s", -1, farg);
+ trace_seq_terminate(&str);
+ string = malloc(sizeof(*string));
+ if (!string) {
+ do_warning_event(event, "%s(%d): malloc str",
+ __func__, __LINE__);
+ goto out_free;
+ }
+ string->next = strings;
+ string->str = strdup(str.buffer);
+ if (!string->str) {
+ free(string);
+ do_warning_event(event, "%s(%d): malloc str",
+ __func__, __LINE__);
+ goto out_free;
+ }
+ args[i] = (uintptr_t)string->str;
+ strings = string;
+ trace_seq_destroy(&str);
+ break;
+ default:
+ /*
+ * Something went totally wrong, this is not
+ * an input error, something in this code broke.
+ */
+ do_warning_event(event, "Unexpected end of arguments\n");
+ goto out_free;
+ }
+ farg = farg->next;
+ param = param->next;
+ }
+
+ ret = (*func_handle->func)(s, args);
+out_free:
+ free(args);
+ while (strings) {
+ string = strings;
+ strings = string->next;
+ free(string->str);
+ free(string);
+ }
+
+ out:
+ /* TBD : handle return type here */
+ return ret;
+}
+
+static void free_args(struct print_arg *args)
+{
+ struct print_arg *next;
+
+ while (args) {
+ next = args->next;
+
+ free_arg(args);
+ args = next;
+ }
+}
+
+static struct print_arg *make_bprint_args(char *fmt, void *data, int size, struct event_format *event)
+{
+ struct pevent *pevent = event->pevent;
+ struct format_field *field, *ip_field;
+ struct print_arg *args, *arg, **next;
+ unsigned long long ip, val;
+ char *ptr;
+ void *bptr;
+ int vsize;
+
+ field = pevent->bprint_buf_field;
+ ip_field = pevent->bprint_ip_field;
+
+ if (!field) {
+ field = pevent_find_field(event, "buf");
+ if (!field) {
+ do_warning_event(event, "can't find buffer field for binary printk");
+ return NULL;
+ }
+ ip_field = pevent_find_field(event, "ip");
+ if (!ip_field) {
+ do_warning_event(event, "can't find ip field for binary printk");
+ return NULL;
+ }
+ pevent->bprint_buf_field = field;
+ pevent->bprint_ip_field = ip_field;
+ }
+
+ ip = pevent_read_number(pevent, data + ip_field->offset, ip_field->size);
+
+ /*
+ * The first arg is the IP pointer.
+ */
+ args = alloc_arg();
+ if (!args) {
+ do_warning_event(event, "%s(%d): not enough memory!",
+ __func__, __LINE__);
+ return NULL;
+ }
+ arg = args;
+ arg->next = NULL;
+ next = &arg->next;
+
+ arg->type = PRINT_ATOM;
+
+ if (asprintf(&arg->atom.atom, "%lld", ip) < 0)
+ goto out_free;
+
+ /* skip the first "%pf: " */
+ for (ptr = fmt + 5, bptr = data + field->offset;
+ bptr < data + size && *ptr; ptr++) {
+ int ls = 0;
+
+ if (*ptr == '%') {
+ process_again:
+ ptr++;
+ switch (*ptr) {
+ case '%':
+ break;
+ case 'l':
+ ls++;
+ goto process_again;
+ case 'L':
+ ls = 2;
+ goto process_again;
+ case '0' ... '9':
+ goto process_again;
+ case '.':
+ goto process_again;
+ case 'p':
+ ls = 1;
+ /* fall through */
+ case 'd':
+ case 'u':
+ case 'x':
+ case 'i':
+ switch (ls) {
+ case 0:
+ vsize = 4;
+ break;
+ case 1:
+ vsize = pevent->long_size;
+ break;
+ case 2:
+ vsize = 8;
+ break;
+ default:
+ vsize = ls; /* ? */
+ break;
+ }
+ /* fall through */
+ case '*':
+ if (*ptr == '*')
+ vsize = 4;
+
+ /* the pointers are always 4 bytes aligned */
+ bptr = (void *)(((unsigned long)bptr + 3) &
+ ~3);
+ val = pevent_read_number(pevent, bptr, vsize);
+ bptr += vsize;
+ arg = alloc_arg();
+ if (!arg) {
+ do_warning_event(event, "%s(%d): not enough memory!",
+ __func__, __LINE__);
+ goto out_free;
+ }
+ arg->next = NULL;
+ arg->type = PRINT_ATOM;
+ if (asprintf(&arg->atom.atom, "%lld", val) < 0) {
+ free(arg);
+ goto out_free;
+ }
+ *next = arg;
+ next = &arg->next;
+ /*
+ * The '*' case means that an arg is used as the length.
+ * We need to continue to figure out for what.
+ */
+ if (*ptr == '*')
+ goto process_again;
+
+ break;
+ case 's':
+ arg = alloc_arg();
+ if (!arg) {
+ do_warning_event(event, "%s(%d): not enough memory!",
+ __func__, __LINE__);
+ goto out_free;
+ }
+ arg->next = NULL;
+ arg->type = PRINT_BSTRING;
+ arg->string.string = strdup(bptr);
+ if (!arg->string.string)
+ goto out_free;
+ bptr += strlen(bptr) + 1;
+ *next = arg;
+ next = &arg->next;
+ default:
+ break;
+ }
+ }
+ }
+
+ return args;
+
+out_free:
+ free_args(args);
+ return NULL;
+}
+
+static char *
+get_bprint_format(void *data, int size __maybe_unused,
+ struct event_format *event)
+{
+ struct pevent *pevent = event->pevent;
+ unsigned long long addr;
+ struct format_field *field;
+ struct printk_map *printk;
+ char *format;
+
+ field = pevent->bprint_fmt_field;
+
+ if (!field) {
+ field = pevent_find_field(event, "fmt");
+ if (!field) {
+ do_warning_event(event, "can't find format field for binary printk");
+ return NULL;
+ }
+ pevent->bprint_fmt_field = field;
+ }
+
+ addr = pevent_read_number(pevent, data + field->offset, field->size);
+
+ printk = find_printk(pevent, addr);
+ if (!printk) {
+ if (asprintf(&format, "%%pf: (NO FORMAT FOUND at %llx)\n", addr) < 0)
+ return NULL;
+ return format;
+ }
+
+ if (asprintf(&format, "%s: %s", "%pf", printk->printk) < 0)
+ return NULL;
+
+ return format;
+}
+
+static void print_mac_arg(struct trace_seq *s, int mac, void *data, int size,
+ struct event_format *event, struct print_arg *arg)
+{
+ unsigned char *buf;
+ const char *fmt = "%.2x:%.2x:%.2x:%.2x:%.2x:%.2x";
+
+ if (arg->type == PRINT_FUNC) {
+ process_defined_func(s, data, size, event, arg);
+ return;
+ }
+
+ if (arg->type != PRINT_FIELD) {
+ trace_seq_printf(s, "ARG TYPE NOT FIELD BUT %d",
+ arg->type);
+ return;
+ }
+
+ if (mac == 'm')
+ fmt = "%.2x%.2x%.2x%.2x%.2x%.2x";
+ if (!arg->field.field) {
+ arg->field.field =
+ pevent_find_any_field(event, arg->field.name);
+ if (!arg->field.field) {
+ do_warning_event(event, "%s: field %s not found",
+ __func__, arg->field.name);
+ return;
+ }
+ }
+ if (arg->field.field->size != 6) {
+ trace_seq_printf(s, "INVALIDMAC");
+ return;
+ }
+ buf = data + arg->field.field->offset;
+ trace_seq_printf(s, fmt, buf[0], buf[1], buf[2], buf[3], buf[4], buf[5]);
+}
+
+static int is_printable_array(char *p, unsigned int len)
+{
+ unsigned int i;
+
+ for (i = 0; i < len && p[i]; i++)
+ if (!isprint(p[i]) && !isspace(p[i]))
+ return 0;
+ return 1;
+}
+
+static void print_event_fields(struct trace_seq *s, void *data,
+ int size __maybe_unused,
+ struct event_format *event)
+{
+ struct format_field *field;
+ unsigned long long val;
+ unsigned int offset, len, i;
+
+ field = event->format.fields;
+ while (field) {
+ trace_seq_printf(s, " %s=", field->name);
+ if (field->flags & FIELD_IS_ARRAY) {
+ offset = field->offset;
+ len = field->size;
+ if (field->flags & FIELD_IS_DYNAMIC) {
+ val = pevent_read_number(event->pevent, data + offset, len);
+ offset = val;
+ len = offset >> 16;
+ offset &= 0xffff;
+ }
+ if (field->flags & FIELD_IS_STRING &&
+ is_printable_array(data + offset, len)) {
+ trace_seq_printf(s, "%s", (char *)data + offset);
+ } else {
+ trace_seq_puts(s, "ARRAY[");
+ for (i = 0; i < len; i++) {
+ if (i)
+ trace_seq_puts(s, ", ");
+ trace_seq_printf(s, "%02x",
+ *((unsigned char *)data + offset + i));
+ }
+ trace_seq_putc(s, ']');
+ field->flags &= ~FIELD_IS_STRING;
+ }
+ } else {
+ val = pevent_read_number(event->pevent, data + field->offset,
+ field->size);
+ if (field->flags & FIELD_IS_POINTER) {
+ trace_seq_printf(s, "0x%llx", val);
+ } else if (field->flags & FIELD_IS_SIGNED) {
+ switch (field->size) {
+ case 4:
+ /*
+ * If field is long then print it in hex.
+ * A long usually stores pointers.
+ */
+ if (field->flags & FIELD_IS_LONG)
+ trace_seq_printf(s, "0x%x", (int)val);
+ else
+ trace_seq_printf(s, "%d", (int)val);
+ break;
+ case 2:
+ trace_seq_printf(s, "%2d", (short)val);
+ break;
+ case 1:
+ trace_seq_printf(s, "%1d", (char)val);
+ break;
+ default:
+ trace_seq_printf(s, "%lld", val);
+ }
+ } else {
+ if (field->flags & FIELD_IS_LONG)
+ trace_seq_printf(s, "0x%llx", val);
+ else
+ trace_seq_printf(s, "%llu", val);
+ }
+ }
+ field = field->next;
+ }
+}
+
+static void pretty_print(struct trace_seq *s, void *data, int size, struct event_format *event)
+{
+ struct pevent *pevent = event->pevent;
+ struct print_fmt *print_fmt = &event->print_fmt;
+ struct print_arg *arg = print_fmt->args;
+ struct print_arg *args = NULL;
+ const char *ptr = print_fmt->format;
+ unsigned long long val;
+ struct func_map *func;
+ const char *saveptr;
+ struct trace_seq p;
+ char *bprint_fmt = NULL;
+ char format[32];
+ int show_func;
+ int len_as_arg;
+ int len_arg;
+ int len;
+ int ls;
+
+ if (event->flags & EVENT_FL_FAILED) {
+ trace_seq_printf(s, "[FAILED TO PARSE]");
+ print_event_fields(s, data, size, event);
+ return;
+ }
+
+ if (event->flags & EVENT_FL_ISBPRINT) {
+ bprint_fmt = get_bprint_format(data, size, event);
+ args = make_bprint_args(bprint_fmt, data, size, event);
+ arg = args;
+ ptr = bprint_fmt;
+ }
+
+ for (; *ptr; ptr++) {
+ ls = 0;
+ if (*ptr == '\\') {
+ ptr++;
+ switch (*ptr) {
+ case 'n':
+ trace_seq_putc(s, '\n');
+ break;
+ case 't':
+ trace_seq_putc(s, '\t');
+ break;
+ case 'r':
+ trace_seq_putc(s, '\r');
+ break;
+ case '\\':
+ trace_seq_putc(s, '\\');
+ break;
+ default:
+ trace_seq_putc(s, *ptr);
+ break;
+ }
+
+ } else if (*ptr == '%') {
+ saveptr = ptr;
+ show_func = 0;
+ len_as_arg = 0;
+ cont_process:
+ ptr++;
+ switch (*ptr) {
+ case '%':
+ trace_seq_putc(s, '%');
+ break;
+ case '#':
+ /* FIXME: need to handle properly */
+ goto cont_process;
+ case 'h':
+ ls--;
+ goto cont_process;
+ case 'l':
+ ls++;
+ goto cont_process;
+ case 'L':
+ ls = 2;
+ goto cont_process;
+ case '*':
+ /* The argument is the length. */
+ if (!arg) {
+ do_warning_event(event, "no argument match");
+ event->flags |= EVENT_FL_FAILED;
+ goto out_failed;
+ }
+ len_arg = eval_num_arg(data, size, event, arg);
+ len_as_arg = 1;
+ arg = arg->next;
+ goto cont_process;
+ case '.':
+ case 'z':
+ case 'Z':
+ case '0' ... '9':
+ goto cont_process;
+ case 'p':
+ if (pevent->long_size == 4)
+ ls = 1;
+ else
+ ls = 2;
+
+ if (*(ptr+1) == 'F' ||
+ *(ptr+1) == 'f') {
+ ptr++;
+ show_func = *ptr;
+ } else if (*(ptr+1) == 'M' || *(ptr+1) == 'm') {
+ print_mac_arg(s, *(ptr+1), data, size, event, arg);
+ ptr++;
+ arg = arg->next;
+ break;
+ }
+
+ /* fall through */
+ case 'd':
+ case 'i':
+ case 'x':
+ case 'X':
+ case 'u':
+ if (!arg) {
+ do_warning_event(event, "no argument match");
+ event->flags |= EVENT_FL_FAILED;
+ goto out_failed;
+ }
+
+ len = ((unsigned long)ptr + 1) -
+ (unsigned long)saveptr;
+
+ /* should never happen */
+ if (len > 31) {
+ do_warning_event(event, "bad format!");
+ event->flags |= EVENT_FL_FAILED;
+ len = 31;
+ }
+
+ memcpy(format, saveptr, len);
+ format[len] = 0;
+
+ val = eval_num_arg(data, size, event, arg);
+ arg = arg->next;
+
+ if (show_func) {
+ func = find_func(pevent, val);
+ if (func) {
+ trace_seq_puts(s, func->func);
+ if (show_func == 'F')
+ trace_seq_printf(s,
+ "+0x%llx",
+ val - func->addr);
+ break;
+ }
+ }
+ if (pevent->long_size == 8 && ls &&
+ sizeof(long) != 8) {
+ char *p;
+
+ ls = 2;
+ /* make %l into %ll */
+ p = strchr(format, 'l');
+ if (p)
+ memmove(p+1, p, strlen(p)+1);
+ else if (strcmp(format, "%p") == 0)
+ strcpy(format, "0x%llx");
+ }
+ switch (ls) {
+ case -2:
+ if (len_as_arg)
+ trace_seq_printf(s, format, len_arg, (char)val);
+ else
+ trace_seq_printf(s, format, (char)val);
+ break;
+ case -1:
+ if (len_as_arg)
+ trace_seq_printf(s, format, len_arg, (short)val);
+ else
+ trace_seq_printf(s, format, (short)val);
+ break;
+ case 0:
+ if (len_as_arg)
+ trace_seq_printf(s, format, len_arg, (int)val);
+ else
+ trace_seq_printf(s, format, (int)val);
+ break;
+ case 1:
+ if (len_as_arg)
+ trace_seq_printf(s, format, len_arg, (long)val);
+ else
+ trace_seq_printf(s, format, (long)val);
+ break;
+ case 2:
+ if (len_as_arg)
+ trace_seq_printf(s, format, len_arg,
+ (long long)val);
+ else
+ trace_seq_printf(s, format, (long long)val);
+ break;
+ default:
+ do_warning_event(event, "bad count (%d)", ls);
+ event->flags |= EVENT_FL_FAILED;
+ }
+ break;
+ case 's':
+ if (!arg) {
+ do_warning_event(event, "no matching argument");
+ event->flags |= EVENT_FL_FAILED;
+ goto out_failed;
+ }
+
+ len = ((unsigned long)ptr + 1) -
+ (unsigned long)saveptr;
+
+ /* should never happen */
+ if (len > 31) {
+ do_warning_event(event, "bad format!");
+ event->flags |= EVENT_FL_FAILED;
+ len = 31;
+ }
+
+ memcpy(format, saveptr, len);
+ format[len] = 0;
+ if (!len_as_arg)
+ len_arg = -1;
+ /* Use helper trace_seq */
+ trace_seq_init(&p);
+ print_str_arg(&p, data, size, event,
+ format, len_arg, arg);
+ trace_seq_terminate(&p);
+ trace_seq_puts(s, p.buffer);
+ trace_seq_destroy(&p);
+ arg = arg->next;
+ break;
+ default:
+ trace_seq_printf(s, ">%c<", *ptr);
+
+ }
+ } else
+ trace_seq_putc(s, *ptr);
+ }
+
+ if (event->flags & EVENT_FL_FAILED) {
+out_failed:
+ trace_seq_printf(s, "[FAILED TO PARSE]");
+ }
+
+ if (args) {
+ free_args(args);
+ free(bprint_fmt);
+ }
+}
+
+/**
+ * pevent_data_lat_fmt - parse the data for the latency format
+ * @pevent: a handle to the pevent
+ * @s: the trace_seq to write to
+ * @record: the record to read from
+ *
+ * This parses out the Latency format (interrupts disabled,
+ * need rescheduling, in hard/soft interrupt, preempt count
+ * and lock depth) and places it into the trace_seq.
+ */
+void pevent_data_lat_fmt(struct pevent *pevent,
+ struct trace_seq *s, struct pevent_record *record)
+{
+ static int check_lock_depth = 1;
+ static int check_migrate_disable = 1;
+ static int lock_depth_exists;
+ static int migrate_disable_exists;
+ unsigned int lat_flags;
+ unsigned int pc;
+ int lock_depth;
+ int migrate_disable;
+ int hardirq;
+ int softirq;
+ void *data = record->data;
+
+ lat_flags = parse_common_flags(pevent, data);
+ pc = parse_common_pc(pevent, data);
+ /* lock_depth may not always exist */
+ if (lock_depth_exists)
+ lock_depth = parse_common_lock_depth(pevent, data);
+ else if (check_lock_depth) {
+ lock_depth = parse_common_lock_depth(pevent, data);
+ if (lock_depth < 0)
+ check_lock_depth = 0;
+ else
+ lock_depth_exists = 1;
+ }
+
+ /* migrate_disable may not always exist */
+ if (migrate_disable_exists)
+ migrate_disable = parse_common_migrate_disable(pevent, data);
+ else if (check_migrate_disable) {
+ migrate_disable = parse_common_migrate_disable(pevent, data);
+ if (migrate_disable < 0)
+ check_migrate_disable = 0;
+ else
+ migrate_disable_exists = 1;
+ }
+
+ hardirq = lat_flags & TRACE_FLAG_HARDIRQ;
+ softirq = lat_flags & TRACE_FLAG_SOFTIRQ;
+
+ trace_seq_printf(s, "%c%c%c",
+ (lat_flags & TRACE_FLAG_IRQS_OFF) ? 'd' :
+ (lat_flags & TRACE_FLAG_IRQS_NOSUPPORT) ?
+ 'X' : '.',
+ (lat_flags & TRACE_FLAG_NEED_RESCHED) ?
+ 'N' : '.',
+ (hardirq && softirq) ? 'H' :
+ hardirq ? 'h' : softirq ? 's' : '.');
+
+ if (pc)
+ trace_seq_printf(s, "%x", pc);
+ else
+ trace_seq_putc(s, '.');
+
+ if (migrate_disable_exists) {
+ if (migrate_disable < 0)
+ trace_seq_putc(s, '.');
+ else
+ trace_seq_printf(s, "%d", migrate_disable);
+ }
+
+ if (lock_depth_exists) {
+ if (lock_depth < 0)
+ trace_seq_putc(s, '.');
+ else
+ trace_seq_printf(s, "%d", lock_depth);
+ }
+
+ trace_seq_terminate(s);
+}
+
+/**
+ * pevent_data_type - parse out the given event type
+ * @pevent: a handle to the pevent
+ * @rec: the record to read from
+ *
+ * This returns the event id from the @rec.
+ */
+int pevent_data_type(struct pevent *pevent, struct pevent_record *rec)
+{
+ return trace_parse_common_type(pevent, rec->data);
+}
+
+/**
+ * pevent_data_event_from_type - find the event by a given type
+ * @pevent: a handle to the pevent
+ * @type: the type of the event.
+ *
+ * This returns the event form a given @type;
+ */
+struct event_format *pevent_data_event_from_type(struct pevent *pevent, int type)
+{
+ return pevent_find_event(pevent, type);
+}
+
+/**
+ * pevent_data_pid - parse the PID from raw data
+ * @pevent: a handle to the pevent
+ * @rec: the record to parse
+ *
+ * This returns the PID from a raw data.
+ */
+int pevent_data_pid(struct pevent *pevent, struct pevent_record *rec)
+{
+ return parse_common_pid(pevent, rec->data);
+}
+
+/**
+ * pevent_data_comm_from_pid - return the command line from PID
+ * @pevent: a handle to the pevent
+ * @pid: the PID of the task to search for
+ *
+ * This returns a pointer to the command line that has the given
+ * @pid.
+ */
+const char *pevent_data_comm_from_pid(struct pevent *pevent, int pid)
+{
+ const char *comm;
+
+ comm = find_cmdline(pevent, pid);
+ return comm;
+}
+
+/**
+ * pevent_data_comm_from_pid - parse the data into the print format
+ * @s: the trace_seq to write to
+ * @event: the handle to the event
+ * @record: the record to read from
+ *
+ * This parses the raw @data using the given @event information and
+ * writes the print format into the trace_seq.
+ */
+void pevent_event_info(struct trace_seq *s, struct event_format *event,
+ struct pevent_record *record)
+{
+ int print_pretty = 1;
+
+ if (event->pevent->print_raw || (event->flags & EVENT_FL_PRINTRAW))
+ print_event_fields(s, record->data, record->size, event);
+ else {
+
+ if (event->handler && !(event->flags & EVENT_FL_NOHANDLE))
+ print_pretty = event->handler(s, record, event,
+ event->context);
+
+ if (print_pretty)
+ pretty_print(s, record->data, record->size, event);
+ }
+
+ trace_seq_terminate(s);
+}
+
+static bool is_timestamp_in_us(char *trace_clock, bool use_trace_clock)
+{
+ if (!use_trace_clock)
+ return true;
+
+ if (!strcmp(trace_clock, "local") || !strcmp(trace_clock, "global")
+ || !strcmp(trace_clock, "uptime") || !strcmp(trace_clock, "perf"))
+ return true;
+
+ /* trace_clock is setting in tsc or counter mode */
+ return false;
+}
+
+void pevent_print_event(struct pevent *pevent, struct trace_seq *s,
+ struct pevent_record *record, bool use_trace_clock)
+{
+ static const char *spaces = " "; /* 20 spaces */
+ struct event_format *event;
+ unsigned long secs;
+ unsigned long usecs;
+ unsigned long nsecs;
+ const char *comm;
+ void *data = record->data;
+ int type;
+ int pid;
+ int len;
+ int p;
+ bool use_usec_format;
+
+ use_usec_format = is_timestamp_in_us(pevent->trace_clock,
+ use_trace_clock);
+ if (use_usec_format) {
+ secs = record->ts / NSECS_PER_SEC;
+ nsecs = record->ts - secs * NSECS_PER_SEC;
+ }
+
+ if (record->size < 0) {
+ do_warning("ug! negative record size %d", record->size);
+ return;
+ }
+
+ type = trace_parse_common_type(pevent, data);
+
+ event = pevent_find_event(pevent, type);
+ if (!event) {
+ do_warning("ug! no event found for type %d", type);
+ return;
+ }
+
+ pid = parse_common_pid(pevent, data);
+ comm = find_cmdline(pevent, pid);
+
+ if (pevent->latency_format) {
+ trace_seq_printf(s, "%8.8s-%-5d %3d",
+ comm, pid, record->cpu);
+ pevent_data_lat_fmt(pevent, s, record);
+ } else
+ trace_seq_printf(s, "%16s-%-5d [%03d]", comm, pid, record->cpu);
+
+ if (use_usec_format) {
+ if (pevent->flags & PEVENT_NSEC_OUTPUT) {
+ usecs = nsecs;
+ p = 9;
+ } else {
+ usecs = (nsecs + 500) / NSECS_PER_USEC;
+ p = 6;
+ }
+
+ trace_seq_printf(s, " %5lu.%0*lu: %s: ",
+ secs, p, usecs, event->name);
+ } else
+ trace_seq_printf(s, " %12llu: %s: ",
+ record->ts, event->name);
+
+ /* Space out the event names evenly. */
+ len = strlen(event->name);
+ if (len < 20)
+ trace_seq_printf(s, "%.*s", 20 - len, spaces);
+
+ pevent_event_info(s, event, record);
+}
+
+static int events_id_cmp(const void *a, const void *b)
+{
+ struct event_format * const * ea = a;
+ struct event_format * const * eb = b;
+
+ if ((*ea)->id < (*eb)->id)
+ return -1;
+
+ if ((*ea)->id > (*eb)->id)
+ return 1;
+
+ return 0;
+}
+
+static int events_name_cmp(const void *a, const void *b)
+{
+ struct event_format * const * ea = a;
+ struct event_format * const * eb = b;
+ int res;
+
+ res = strcmp((*ea)->name, (*eb)->name);
+ if (res)
+ return res;
+
+ res = strcmp((*ea)->system, (*eb)->system);
+ if (res)
+ return res;
+
+ return events_id_cmp(a, b);
+}
+
+static int events_system_cmp(const void *a, const void *b)
+{
+ struct event_format * const * ea = a;
+ struct event_format * const * eb = b;
+ int res;
+
+ res = strcmp((*ea)->system, (*eb)->system);
+ if (res)
+ return res;
+
+ res = strcmp((*ea)->name, (*eb)->name);
+ if (res)
+ return res;
+
+ return events_id_cmp(a, b);
+}
+
+struct event_format **pevent_list_events(struct pevent *pevent, enum event_sort_type sort_type)
+{
+ struct event_format **events;
+ int (*sort)(const void *a, const void *b);
+
+ events = pevent->sort_events;
+
+ if (events && pevent->last_type == sort_type)
+ return events;
+
+ if (!events) {
+ events = malloc(sizeof(*events) * (pevent->nr_events + 1));
+ if (!events)
+ return NULL;
+
+ memcpy(events, pevent->events, sizeof(*events) * pevent->nr_events);
+ events[pevent->nr_events] = NULL;
+
+ pevent->sort_events = events;
+
+ /* the internal events are sorted by id */
+ if (sort_type == EVENT_SORT_ID) {
+ pevent->last_type = sort_type;
+ return events;
+ }
+ }
+
+ switch (sort_type) {
+ case EVENT_SORT_ID:
+ sort = events_id_cmp;
+ break;
+ case EVENT_SORT_NAME:
+ sort = events_name_cmp;
+ break;
+ case EVENT_SORT_SYSTEM:
+ sort = events_system_cmp;
+ break;
+ default:
+ return events;
+ }
+
+ qsort(events, pevent->nr_events, sizeof(*events), sort);
+ pevent->last_type = sort_type;
+
+ return events;
+}
+
+static struct format_field **
+get_event_fields(const char *type, const char *name,
+ int count, struct format_field *list)
+{
+ struct format_field **fields;
+ struct format_field *field;
+ int i = 0;
+
+ fields = malloc(sizeof(*fields) * (count + 1));
+ if (!fields)
+ return NULL;
+
+ for (field = list; field; field = field->next) {
+ fields[i++] = field;
+ if (i == count + 1) {
+ do_warning("event %s has more %s fields than specified",
+ name, type);
+ i--;
+ break;
+ }
+ }
+
+ if (i != count)
+ do_warning("event %s has less %s fields than specified",
+ name, type);
+
+ fields[i] = NULL;
+
+ return fields;
+}
+
+/**
+ * pevent_event_common_fields - return a list of common fields for an event
+ * @event: the event to return the common fields of.
+ *
+ * Returns an allocated array of fields. The last item in the array is NULL.
+ * The array must be freed with free().
+ */
+struct format_field **pevent_event_common_fields(struct event_format *event)
+{
+ return get_event_fields("common", event->name,
+ event->format.nr_common,
+ event->format.common_fields);
+}
+
+/**
+ * pevent_event_fields - return a list of event specific fields for an event
+ * @event: the event to return the fields of.
+ *
+ * Returns an allocated array of fields. The last item in the array is NULL.
+ * The array must be freed with free().
+ */
+struct format_field **pevent_event_fields(struct event_format *event)
+{
+ return get_event_fields("event", event->name,
+ event->format.nr_fields,
+ event->format.fields);
+}
+
+static void print_fields(struct trace_seq *s, struct print_flag_sym *field)
+{
+ trace_seq_printf(s, "{ %s, %s }", field->value, field->str);
+ if (field->next) {
+ trace_seq_puts(s, ", ");
+ print_fields(s, field->next);
+ }
+}
+
+/* for debugging */
+static void print_args(struct print_arg *args)
+{
+ int print_paren = 1;
+ struct trace_seq s;
+
+ switch (args->type) {
+ case PRINT_NULL:
+ printf("null");
+ break;
+ case PRINT_ATOM:
+ printf("%s", args->atom.atom);
+ break;
+ case PRINT_FIELD:
+ printf("REC->%s", args->field.name);
+ break;
+ case PRINT_FLAGS:
+ printf("__print_flags(");
+ print_args(args->flags.field);
+ printf(", %s, ", args->flags.delim);
+ trace_seq_init(&s);
+ print_fields(&s, args->flags.flags);
+ trace_seq_do_printf(&s);
+ trace_seq_destroy(&s);
+ printf(")");
+ break;
+ case PRINT_SYMBOL:
+ printf("__print_symbolic(");
+ print_args(args->symbol.field);
+ printf(", ");
+ trace_seq_init(&s);
+ print_fields(&s, args->symbol.symbols);
+ trace_seq_do_printf(&s);
+ trace_seq_destroy(&s);
+ printf(")");
+ break;
+ case PRINT_HEX:
+ printf("__print_hex(");
+ print_args(args->hex.field);
+ printf(", ");
+ print_args(args->hex.size);
+ printf(")");
+ break;
+ case PRINT_STRING:
+ case PRINT_BSTRING:
+ printf("__get_str(%s)", args->string.string);
+ break;
+ case PRINT_BITMASK:
+ printf("__get_bitmask(%s)", args->bitmask.bitmask);
+ break;
+ case PRINT_TYPE:
+ printf("(%s)", args->typecast.type);
+ print_args(args->typecast.item);
+ break;
+ case PRINT_OP:
+ if (strcmp(args->op.op, ":") == 0)
+ print_paren = 0;
+ if (print_paren)
+ printf("(");
+ print_args(args->op.left);
+ printf(" %s ", args->op.op);
+ print_args(args->op.right);
+ if (print_paren)
+ printf(")");
+ break;
+ default:
+ /* we should warn... */
+ return;
+ }
+ if (args->next) {
+ printf("\n");
+ print_args(args->next);
+ }
+}
+
+static void parse_header_field(const char *field,
+ int *offset, int *size, int mandatory)
+{
+ unsigned long long save_input_buf_ptr;
+ unsigned long long save_input_buf_siz;
+ char *token;
+ int type;
+
+ save_input_buf_ptr = input_buf_ptr;
+ save_input_buf_siz = input_buf_siz;
+
+ if (read_expected(EVENT_ITEM, "field") < 0)
+ return;
+ if (read_expected(EVENT_OP, ":") < 0)
+ return;
+
+ /* type */
+ if (read_expect_type(EVENT_ITEM, &token) < 0)
+ goto fail;
+ free_token(token);
+
+ /*
+ * If this is not a mandatory field, then test it first.
+ */
+ if (mandatory) {
+ if (read_expected(EVENT_ITEM, field) < 0)
+ return;
+ } else {
+ if (read_expect_type(EVENT_ITEM, &token) < 0)
+ goto fail;
+ if (strcmp(token, field) != 0)
+ goto discard;
+ free_token(token);
+ }
+
+ if (read_expected(EVENT_OP, ";") < 0)
+ return;
+ if (read_expected(EVENT_ITEM, "offset") < 0)
+ return;
+ if (read_expected(EVENT_OP, ":") < 0)
+ return;
+ if (read_expect_type(EVENT_ITEM, &token) < 0)
+ goto fail;
+ *offset = atoi(token);
+ free_token(token);
+ if (read_expected(EVENT_OP, ";") < 0)
+ return;
+ if (read_expected(EVENT_ITEM, "size") < 0)
+ return;
+ if (read_expected(EVENT_OP, ":") < 0)
+ return;
+ if (read_expect_type(EVENT_ITEM, &token) < 0)
+ goto fail;
+ *size = atoi(token);
+ free_token(token);
+ if (read_expected(EVENT_OP, ";") < 0)
+ return;
+ type = read_token(&token);
+ if (type != EVENT_NEWLINE) {
+ /* newer versions of the kernel have a "signed" type */
+ if (type != EVENT_ITEM)
+ goto fail;
+
+ if (strcmp(token, "signed") != 0)
+ goto fail;
+
+ free_token(token);
+
+ if (read_expected(EVENT_OP, ":") < 0)
+ return;
+
+ if (read_expect_type(EVENT_ITEM, &token))
+ goto fail;
+
+ free_token(token);
+ if (read_expected(EVENT_OP, ";") < 0)
+ return;
+
+ if (read_expect_type(EVENT_NEWLINE, &token))
+ goto fail;
+ }
+ fail:
+ free_token(token);
+ return;
+
+ discard:
+ input_buf_ptr = save_input_buf_ptr;
+ input_buf_siz = save_input_buf_siz;
+ *offset = 0;
+ *size = 0;
+ free_token(token);
+}
+
+/**
+ * pevent_parse_header_page - parse the data stored in the header page
+ * @pevent: the handle to the pevent
+ * @buf: the buffer storing the header page format string
+ * @size: the size of @buf
+ * @long_size: the long size to use if there is no header
+ *
+ * This parses the header page format for information on the
+ * ring buffer used. The @buf should be copied from
+ *
+ * /sys/kernel/debug/tracing/events/header_page
+ */
+int pevent_parse_header_page(struct pevent *pevent, char *buf, unsigned long size,
+ int long_size)
+{
+ int ignore;
+
+ if (!size) {
+ /*
+ * Old kernels did not have header page info.
+ * Sorry but we just use what we find here in user space.
+ */
+ pevent->header_page_ts_size = sizeof(long long);
+ pevent->header_page_size_size = long_size;
+ pevent->header_page_data_offset = sizeof(long long) + long_size;
+ pevent->old_format = 1;
+ return -1;
+ }
+ init_input_buf(buf, size);
+
+ parse_header_field("timestamp", &pevent->header_page_ts_offset,
+ &pevent->header_page_ts_size, 1);
+ parse_header_field("commit", &pevent->header_page_size_offset,
+ &pevent->header_page_size_size, 1);
+ parse_header_field("overwrite", &pevent->header_page_overwrite,
+ &ignore, 0);
+ parse_header_field("data", &pevent->header_page_data_offset,
+ &pevent->header_page_data_size, 1);
+
+ return 0;
+}
+
+static int event_matches(struct event_format *event,
+ int id, const char *sys_name,
+ const char *event_name)
+{
+ if (id >= 0 && id != event->id)
+ return 0;
+
+ if (event_name && (strcmp(event_name, event->name) != 0))
+ return 0;
+
+ if (sys_name && (strcmp(sys_name, event->system) != 0))
+ return 0;
+
+ return 1;
+}
+
+static void free_handler(struct event_handler *handle)
+{
+ free((void *)handle->sys_name);
+ free((void *)handle->event_name);
+ free(handle);
+}
+
+static int find_event_handle(struct pevent *pevent, struct event_format *event)
+{
+ struct event_handler *handle, **next;
+
+ for (next = &pevent->handlers; *next;
+ next = &(*next)->next) {
+ handle = *next;
+ if (event_matches(event, handle->id,
+ handle->sys_name,
+ handle->event_name))
+ break;
+ }
+
+ if (!(*next))
+ return 0;
+
+ pr_stat("overriding event (%d) %s:%s with new print handler",
+ event->id, event->system, event->name);
+
+ event->handler = handle->func;
+ event->context = handle->context;
+
+ *next = handle->next;
+ free_handler(handle);
+
+ return 1;
+}
+
+/**
+ * __pevent_parse_format - parse the event format
+ * @buf: the buffer storing the event format string
+ * @size: the size of @buf
+ * @sys: the system the event belongs to
+ *
+ * This parses the event format and creates an event structure
+ * to quickly parse raw data for a given event.
+ *
+ * These files currently come from:
+ *
+ * /sys/kernel/debug/tracing/events/.../.../format
+ */
+enum pevent_errno __pevent_parse_format(struct event_format **eventp,
+ struct pevent *pevent, const char *buf,
+ unsigned long size, const char *sys)
+{
+ struct event_format *event;
+ int ret;
+
+ init_input_buf(buf, size);
+
+ *eventp = event = alloc_event();
+ if (!event)
+ return PEVENT_ERRNO__MEM_ALLOC_FAILED;
+
+ event->name = event_read_name();
+ if (!event->name) {
+ /* Bad event? */
+ ret = PEVENT_ERRNO__MEM_ALLOC_FAILED;
+ goto event_alloc_failed;
+ }
+
+ if (strcmp(sys, "ftrace") == 0) {
+ event->flags |= EVENT_FL_ISFTRACE;
+
+ if (strcmp(event->name, "bprint") == 0)
+ event->flags |= EVENT_FL_ISBPRINT;
+ }
+
+ event->id = event_read_id();
+ if (event->id < 0) {
+ ret = PEVENT_ERRNO__READ_ID_FAILED;
+ /*
+ * This isn't an allocation error actually.
+ * But as the ID is critical, just bail out.
+ */
+ goto event_alloc_failed;
+ }
+
+ event->system = strdup(sys);
+ if (!event->system) {
+ ret = PEVENT_ERRNO__MEM_ALLOC_FAILED;
+ goto event_alloc_failed;
+ }
+
+ /* Add pevent to event so that it can be referenced */
+ event->pevent = pevent;
+
+ ret = event_read_format(event);
+ if (ret < 0) {
+ ret = PEVENT_ERRNO__READ_FORMAT_FAILED;
+ goto event_parse_failed;
+ }
+
+ /*
+ * If the event has an override, don't print warnings if the event
+ * print format fails to parse.
+ */
+ if (pevent && find_event_handle(pevent, event))
+ show_warning = 0;
+
+ ret = event_read_print(event);
+ show_warning = 1;
+
+ if (ret < 0) {
+ ret = PEVENT_ERRNO__READ_PRINT_FAILED;
+ goto event_parse_failed;
+ }
+
+ if (!ret && (event->flags & EVENT_FL_ISFTRACE)) {
+ struct format_field *field;
+ struct print_arg *arg, **list;
+
+ /* old ftrace had no args */
+ list = &event->print_fmt.args;
+ for (field = event->format.fields; field; field = field->next) {
+ arg = alloc_arg();
+ if (!arg) {
+ event->flags |= EVENT_FL_FAILED;
+ return PEVENT_ERRNO__OLD_FTRACE_ARG_FAILED;
+ }
+ arg->type = PRINT_FIELD;
+ arg->field.name = strdup(field->name);
+ if (!arg->field.name) {
+ event->flags |= EVENT_FL_FAILED;
+ free_arg(arg);
+ return PEVENT_ERRNO__OLD_FTRACE_ARG_FAILED;
+ }
+ arg->field.field = field;
+ *list = arg;
+ list = &arg->next;
+ }
+ return 0;
+ }
+
+ return 0;
+
+ event_parse_failed:
+ event->flags |= EVENT_FL_FAILED;
+ return ret;
+
+ event_alloc_failed:
+ free(event->system);
+ free(event->name);
+ free(event);
+ *eventp = NULL;
+ return ret;
+}
+
+static enum pevent_errno
+__pevent_parse_event(struct pevent *pevent,
+ struct event_format **eventp,
+ const char *buf, unsigned long size,
+ const char *sys)
+{
+ int ret = __pevent_parse_format(eventp, pevent, buf, size, sys);
+ struct event_format *event = *eventp;
+
+ if (event == NULL)
+ return ret;
+
+ if (pevent && add_event(pevent, event)) {
+ ret = PEVENT_ERRNO__MEM_ALLOC_FAILED;
+ goto event_add_failed;
+ }
+
+#define PRINT_ARGS 0
+ if (PRINT_ARGS && event->print_fmt.args)
+ print_args(event->print_fmt.args);
+
+ return 0;
+
+event_add_failed:
+ pevent_free_format(event);
+ return ret;
+}
+
+/**
+ * pevent_parse_format - parse the event format
+ * @pevent: the handle to the pevent
+ * @eventp: returned format
+ * @buf: the buffer storing the event format string
+ * @size: the size of @buf
+ * @sys: the system the event belongs to
+ *
+ * This parses the event format and creates an event structure
+ * to quickly parse raw data for a given event.
+ *
+ * These files currently come from:
+ *
+ * /sys/kernel/debug/tracing/events/.../.../format
+ */
+enum pevent_errno pevent_parse_format(struct pevent *pevent,
+ struct event_format **eventp,
+ const char *buf,
+ unsigned long size, const char *sys)
+{
+ return __pevent_parse_event(pevent, eventp, buf, size, sys);
+}
+
+/**
+ * pevent_parse_event - parse the event format
+ * @pevent: the handle to the pevent
+ * @buf: the buffer storing the event format string
+ * @size: the size of @buf
+ * @sys: the system the event belongs to
+ *
+ * This parses the event format and creates an event structure
+ * to quickly parse raw data for a given event.
+ *
+ * These files currently come from:
+ *
+ * /sys/kernel/debug/tracing/events/.../.../format
+ */
+enum pevent_errno pevent_parse_event(struct pevent *pevent, const char *buf,
+ unsigned long size, const char *sys)
+{
+ struct event_format *event = NULL;
+ return __pevent_parse_event(pevent, &event, buf, size, sys);
+}
+
+#undef _PE
+#define _PE(code, str) str
+static const char * const pevent_error_str[] = {
+ PEVENT_ERRORS
+};
+#undef _PE
+
+int pevent_strerror(struct pevent *pevent __maybe_unused,
+ enum pevent_errno errnum, char *buf, size_t buflen)
+{
+ int idx;
+ const char *msg;
+
+ if (errnum >= 0) {
+ msg = strerror_r(errnum, buf, buflen);
+ if (msg != buf) {
+ size_t len = strlen(msg);
+ memcpy(buf, msg, min(buflen - 1, len));
+ *(buf + min(buflen - 1, len)) = '\0';
+ }
+ return 0;
+ }
+
+ if (errnum <= __PEVENT_ERRNO__START ||
+ errnum >= __PEVENT_ERRNO__END)
+ return -1;
+
+ idx = errnum - __PEVENT_ERRNO__START - 1;
+ msg = pevent_error_str[idx];
+ snprintf(buf, buflen, "%s", msg);
+
+ return 0;
+}
+
+int get_field_val(struct trace_seq *s, struct format_field *field,
+ const char *name, struct pevent_record *record,
+ unsigned long long *val, int err)
+{
+ if (!field) {
+ if (err)
+ trace_seq_printf(s, "<CANT FIND FIELD %s>", name);
+ return -1;
+ }
+
+ if (pevent_read_number_field(field, record->data, val)) {
+ if (err)
+ trace_seq_printf(s, " %s=INVALID", name);
+ return -1;
+ }
+
+ return 0;
+}
+
+/**
+ * pevent_get_field_raw - return the raw pointer into the data field
+ * @s: The seq to print to on error
+ * @event: the event that the field is for
+ * @name: The name of the field
+ * @record: The record with the field name.
+ * @len: place to store the field length.
+ * @err: print default error if failed.
+ *
+ * Returns a pointer into record->data of the field and places
+ * the length of the field in @len.
+ *
+ * On failure, it returns NULL.
+ */
+void *pevent_get_field_raw(struct trace_seq *s, struct event_format *event,
+ const char *name, struct pevent_record *record,
+ int *len, int err)
+{
+ struct format_field *field;
+ void *data = record->data;
+ unsigned offset;
+ int dummy;
+
+ if (!event)
+ return NULL;
+
+ field = pevent_find_field(event, name);
+
+ if (!field) {
+ if (err)
+ trace_seq_printf(s, "<CANT FIND FIELD %s>", name);
+ return NULL;
+ }
+
+ /* Allow @len to be NULL */
+ if (!len)
+ len = &dummy;
+
+ offset = field->offset;
+ if (field->flags & FIELD_IS_DYNAMIC) {
+ offset = pevent_read_number(event->pevent,
+ data + offset, field->size);
+ *len = offset >> 16;
+ offset &= 0xffff;
+ } else
+ *len = field->size;
+
+ return data + offset;
+}
+
+/**
+ * pevent_get_field_val - find a field and return its value
+ * @s: The seq to print to on error
+ * @event: the event that the field is for
+ * @name: The name of the field
+ * @record: The record with the field name.
+ * @val: place to store the value of the field.
+ * @err: print default error if failed.
+ *
+ * Returns 0 on success -1 on field not found.
+ */
+int pevent_get_field_val(struct trace_seq *s, struct event_format *event,
+ const char *name, struct pevent_record *record,
+ unsigned long long *val, int err)
+{
+ struct format_field *field;
+
+ if (!event)
+ return -1;
+
+ field = pevent_find_field(event, name);
+
+ return get_field_val(s, field, name, record, val, err);
+}
+
+/**
+ * pevent_get_common_field_val - find a common field and return its value
+ * @s: The seq to print to on error
+ * @event: the event that the field is for
+ * @name: The name of the field
+ * @record: The record with the field name.
+ * @val: place to store the value of the field.
+ * @err: print default error if failed.
+ *
+ * Returns 0 on success -1 on field not found.
+ */
+int pevent_get_common_field_val(struct trace_seq *s, struct event_format *event,
+ const char *name, struct pevent_record *record,
+ unsigned long long *val, int err)
+{
+ struct format_field *field;
+
+ if (!event)
+ return -1;
+
+ field = pevent_find_common_field(event, name);
+
+ return get_field_val(s, field, name, record, val, err);
+}
+
+/**
+ * pevent_get_any_field_val - find a any field and return its value
+ * @s: The seq to print to on error
+ * @event: the event that the field is for
+ * @name: The name of the field
+ * @record: The record with the field name.
+ * @val: place to store the value of the field.
+ * @err: print default error if failed.
+ *
+ * Returns 0 on success -1 on field not found.
+ */
+int pevent_get_any_field_val(struct trace_seq *s, struct event_format *event,
+ const char *name, struct pevent_record *record,
+ unsigned long long *val, int err)
+{
+ struct format_field *field;
+
+ if (!event)
+ return -1;
+
+ field = pevent_find_any_field(event, name);
+
+ return get_field_val(s, field, name, record, val, err);
+}
+
+/**
+ * pevent_print_num_field - print a field and a format
+ * @s: The seq to print to
+ * @fmt: The printf format to print the field with.
+ * @event: the event that the field is for
+ * @name: The name of the field
+ * @record: The record with the field name.
+ * @err: print default error if failed.
+ *
+ * Returns: 0 on success, -1 field not found, or 1 if buffer is full.
+ */
+int pevent_print_num_field(struct trace_seq *s, const char *fmt,
+ struct event_format *event, const char *name,
+ struct pevent_record *record, int err)
+{
+ struct format_field *field = pevent_find_field(event, name);
+ unsigned long long val;
+
+ if (!field)
+ goto failed;
+
+ if (pevent_read_number_field(field, record->data, &val))
+ goto failed;
+
+ return trace_seq_printf(s, fmt, val);
+
+ failed:
+ if (err)
+ trace_seq_printf(s, "CAN'T FIND FIELD \"%s\"", name);
+ return -1;
+}
+
+/**
+ * pevent_print_func_field - print a field and a format for function pointers
+ * @s: The seq to print to
+ * @fmt: The printf format to print the field with.
+ * @event: the event that the field is for
+ * @name: The name of the field
+ * @record: The record with the field name.
+ * @err: print default error if failed.
+ *
+ * Returns: 0 on success, -1 field not found, or 1 if buffer is full.
+ */
+int pevent_print_func_field(struct trace_seq *s, const char *fmt,
+ struct event_format *event, const char *name,
+ struct pevent_record *record, int err)
+{
+ struct format_field *field = pevent_find_field(event, name);
+ struct pevent *pevent = event->pevent;
+ unsigned long long val;
+ struct func_map *func;
+ char tmp[128];
+
+ if (!field)
+ goto failed;
+
+ if (pevent_read_number_field(field, record->data, &val))
+ goto failed;
+
+ func = find_func(pevent, val);
+
+ if (func)
+ snprintf(tmp, 128, "%s/0x%llx", func->func, func->addr - val);
+ else
+ sprintf(tmp, "0x%08llx", val);
+
+ return trace_seq_printf(s, fmt, tmp);
+
+ failed:
+ if (err)
+ trace_seq_printf(s, "CAN'T FIND FIELD \"%s\"", name);
+ return -1;
+}
+
+static void free_func_handle(struct pevent_function_handler *func)
+{
+ struct pevent_func_params *params;
+
+ free(func->name);
+
+ while (func->params) {
+ params = func->params;
+ func->params = params->next;
+ free(params);
+ }
+
+ free(func);
+}
+
+/**
+ * pevent_register_print_function - register a helper function
+ * @pevent: the handle to the pevent
+ * @func: the function to process the helper function
+ * @ret_type: the return type of the helper function
+ * @name: the name of the helper function
+ * @parameters: A list of enum pevent_func_arg_type
+ *
+ * Some events may have helper functions in the print format arguments.
+ * This allows a plugin to dynamically create a way to process one
+ * of these functions.
+ *
+ * The @parameters is a variable list of pevent_func_arg_type enums that
+ * must end with PEVENT_FUNC_ARG_VOID.
+ */
+int pevent_register_print_function(struct pevent *pevent,
+ pevent_func_handler func,
+ enum pevent_func_arg_type ret_type,
+ char *name, ...)
+{
+ struct pevent_function_handler *func_handle;
+ struct pevent_func_params **next_param;
+ struct pevent_func_params *param;
+ enum pevent_func_arg_type type;
+ va_list ap;
+ int ret;
+
+ func_handle = find_func_handler(pevent, name);
+ if (func_handle) {
+ /*
+ * This is most like caused by the users own
+ * plugins updating the function. This overrides the
+ * system defaults.
+ */
+ pr_stat("override of function helper '%s'", name);
+ remove_func_handler(pevent, name);
+ }
+
+ func_handle = calloc(1, sizeof(*func_handle));
+ if (!func_handle) {
+ do_warning("Failed to allocate function handler");
+ return PEVENT_ERRNO__MEM_ALLOC_FAILED;
+ }
+
+ func_handle->ret_type = ret_type;
+ func_handle->name = strdup(name);
+ func_handle->func = func;
+ if (!func_handle->name) {
+ do_warning("Failed to allocate function name");
+ free(func_handle);
+ return PEVENT_ERRNO__MEM_ALLOC_FAILED;
+ }
+
+ next_param = &(func_handle->params);
+ va_start(ap, name);
+ for (;;) {
+ type = va_arg(ap, enum pevent_func_arg_type);
+ if (type == PEVENT_FUNC_ARG_VOID)
+ break;
+
+ if (type >= PEVENT_FUNC_ARG_MAX_TYPES) {
+ do_warning("Invalid argument type %d", type);
+ ret = PEVENT_ERRNO__INVALID_ARG_TYPE;
+ goto out_free;
+ }
+
+ param = malloc(sizeof(*param));
+ if (!param) {
+ do_warning("Failed to allocate function param");
+ ret = PEVENT_ERRNO__MEM_ALLOC_FAILED;
+ goto out_free;
+ }
+ param->type = type;
+ param->next = NULL;
+
+ *next_param = param;
+ next_param = &(param->next);
+
+ func_handle->nr_args++;
+ }
+ va_end(ap);
+
+ func_handle->next = pevent->func_handlers;
+ pevent->func_handlers = func_handle;
+
+ return 0;
+ out_free:
+ va_end(ap);
+ free_func_handle(func_handle);
+ return ret;
+}
+
+/**
+ * pevent_unregister_print_function - unregister a helper function
+ * @pevent: the handle to the pevent
+ * @func: the function to process the helper function
+ * @name: the name of the helper function
+ *
+ * This function removes existing print handler for function @name.
+ *
+ * Returns 0 if the handler was removed successully, -1 otherwise.
+ */
+int pevent_unregister_print_function(struct pevent *pevent,
+ pevent_func_handler func, char *name)
+{
+ struct pevent_function_handler *func_handle;
+
+ func_handle = find_func_handler(pevent, name);
+ if (func_handle && func_handle->func == func) {
+ remove_func_handler(pevent, name);
+ return 0;
+ }
+ return -1;
+}
+
+static struct event_format *pevent_search_event(struct pevent *pevent, int id,
+ const char *sys_name,
+ const char *event_name)
+{
+ struct event_format *event;
+
+ if (id >= 0) {
+ /* search by id */
+ event = pevent_find_event(pevent, id);
+ if (!event)
+ return NULL;
+ if (event_name && (strcmp(event_name, event->name) != 0))
+ return NULL;
+ if (sys_name && (strcmp(sys_name, event->system) != 0))
+ return NULL;
+ } else {
+ event = pevent_find_event_by_name(pevent, sys_name, event_name);
+ if (!event)
+ return NULL;
+ }
+ return event;
+}
+
+/**
+ * pevent_register_event_handler - register a way to parse an event
+ * @pevent: the handle to the pevent
+ * @id: the id of the event to register
+ * @sys_name: the system name the event belongs to
+ * @event_name: the name of the event
+ * @func: the function to call to parse the event information
+ * @context: the data to be passed to @func
+ *
+ * This function allows a developer to override the parsing of
+ * a given event. If for some reason the default print format
+ * is not sufficient, this function will register a function
+ * for an event to be used to parse the data instead.
+ *
+ * If @id is >= 0, then it is used to find the event.
+ * else @sys_name and @event_name are used.
+ */
+int pevent_register_event_handler(struct pevent *pevent, int id,
+ const char *sys_name, const char *event_name,
+ pevent_event_handler_func func, void *context)
+{
+ struct event_format *event;
+ struct event_handler *handle;
+
+ event = pevent_search_event(pevent, id, sys_name, event_name);
+ if (event == NULL)
+ goto not_found;
+
+ pr_stat("overriding event (%d) %s:%s with new print handler",
+ event->id, event->system, event->name);
+
+ event->handler = func;
+ event->context = context;
+ return 0;
+
+ not_found:
+ /* Save for later use. */
+ handle = calloc(1, sizeof(*handle));
+ if (!handle) {
+ do_warning("Failed to allocate event handler");
+ return PEVENT_ERRNO__MEM_ALLOC_FAILED;
+ }
+
+ handle->id = id;
+ if (event_name)
+ handle->event_name = strdup(event_name);
+ if (sys_name)
+ handle->sys_name = strdup(sys_name);
+
+ if ((event_name && !handle->event_name) ||
+ (sys_name && !handle->sys_name)) {
+ do_warning("Failed to allocate event/sys name");
+ free((void *)handle->event_name);
+ free((void *)handle->sys_name);
+ free(handle);
+ return PEVENT_ERRNO__MEM_ALLOC_FAILED;
+ }
+
+ handle->func = func;
+ handle->next = pevent->handlers;
+ pevent->handlers = handle;
+ handle->context = context;
+
+ return -1;
+}
+
+static int handle_matches(struct event_handler *handler, int id,
+ const char *sys_name, const char *event_name,
+ pevent_event_handler_func func, void *context)
+{
+ if (id >= 0 && id != handler->id)
+ return 0;
+
+ if (event_name && (strcmp(event_name, handler->event_name) != 0))
+ return 0;
+
+ if (sys_name && (strcmp(sys_name, handler->sys_name) != 0))
+ return 0;
+
+ if (func != handler->func || context != handler->context)
+ return 0;
+
+ return 1;
+}
+
+/**
+ * pevent_unregister_event_handler - unregister an existing event handler
+ * @pevent: the handle to the pevent
+ * @id: the id of the event to unregister
+ * @sys_name: the system name the handler belongs to
+ * @event_name: the name of the event handler
+ * @func: the function to call to parse the event information
+ * @context: the data to be passed to @func
+ *
+ * This function removes existing event handler (parser).
+ *
+ * If @id is >= 0, then it is used to find the event.
+ * else @sys_name and @event_name are used.
+ *
+ * Returns 0 if handler was removed successfully, -1 if event was not found.
+ */
+int pevent_unregister_event_handler(struct pevent *pevent, int id,
+ const char *sys_name, const char *event_name,
+ pevent_event_handler_func func, void *context)
+{
+ struct event_format *event;
+ struct event_handler *handle;
+ struct event_handler **next;
+
+ event = pevent_search_event(pevent, id, sys_name, event_name);
+ if (event == NULL)
+ goto not_found;
+
+ if (event->handler == func && event->context == context) {
+ pr_stat("removing override handler for event (%d) %s:%s. Going back to default handler.",
+ event->id, event->system, event->name);
+
+ event->handler = NULL;
+ event->context = NULL;
+ return 0;
+ }
+
+not_found:
+ for (next = &pevent->handlers; *next; next = &(*next)->next) {
+ handle = *next;
+ if (handle_matches(handle, id, sys_name, event_name,
+ func, context))
+ break;
+ }
+
+ if (!(*next))
+ return -1;
+
+ *next = handle->next;
+ free_handler(handle);
+
+ return 0;
+}
+
+/**
+ * pevent_alloc - create a pevent handle
+ */
+struct pevent *pevent_alloc(void)
+{
+ struct pevent *pevent = calloc(1, sizeof(*pevent));
+
+ if (pevent)
+ pevent->ref_count = 1;
+
+ return pevent;
+}
+
+void pevent_ref(struct pevent *pevent)
+{
+ pevent->ref_count++;
+}
+
+static void free_format_fields(struct format_field *field)
+{
+ struct format_field *next;
+
+ while (field) {
+ next = field->next;
+ free(field->type);
+ free(field->name);
+ free(field);
+ field = next;
+ }
+}
+
+static void free_formats(struct format *format)
+{
+ free_format_fields(format->common_fields);
+ free_format_fields(format->fields);
+}
+
+void pevent_free_format(struct event_format *event)
+{
+ free(event->name);
+ free(event->system);
+
+ free_formats(&event->format);
+
+ free(event->print_fmt.format);
+ free_args(event->print_fmt.args);
+
+ free(event);
+}
+
+/**
+ * pevent_free - free a pevent handle
+ * @pevent: the pevent handle to free
+ */
+void pevent_free(struct pevent *pevent)
+{
+ struct cmdline_list *cmdlist, *cmdnext;
+ struct func_list *funclist, *funcnext;
+ struct printk_list *printklist, *printknext;
+ struct pevent_function_handler *func_handler;
+ struct event_handler *handle;
+ int i;
+
+ if (!pevent)
+ return;
+
+ cmdlist = pevent->cmdlist;
+ funclist = pevent->funclist;
+ printklist = pevent->printklist;
+
+ pevent->ref_count--;
+ if (pevent->ref_count)
+ return;
+
+ if (pevent->cmdlines) {
+ for (i = 0; i < pevent->cmdline_count; i++)
+ free(pevent->cmdlines[i].comm);
+ free(pevent->cmdlines);
+ }
+
+ while (cmdlist) {
+ cmdnext = cmdlist->next;
+ free(cmdlist->comm);
+ free(cmdlist);
+ cmdlist = cmdnext;
+ }
+
+ if (pevent->func_map) {
+ for (i = 0; i < (int)pevent->func_count; i++) {
+ free(pevent->func_map[i].func);
+ free(pevent->func_map[i].mod);
+ }
+ free(pevent->func_map);
+ }
+
+ while (funclist) {
+ funcnext = funclist->next;
+ free(funclist->func);
+ free(funclist->mod);
+ free(funclist);
+ funclist = funcnext;
+ }
+
+ while (pevent->func_handlers) {
+ func_handler = pevent->func_handlers;
+ pevent->func_handlers = func_handler->next;
+ free_func_handle(func_handler);
+ }
+
+ if (pevent->printk_map) {
+ for (i = 0; i < (int)pevent->printk_count; i++)
+ free(pevent->printk_map[i].printk);
+ free(pevent->printk_map);
+ }
+
+ while (printklist) {
+ printknext = printklist->next;
+ free(printklist->printk);
+ free(printklist);
+ printklist = printknext;
+ }
+
+ for (i = 0; i < pevent->nr_events; i++)
+ pevent_free_format(pevent->events[i]);
+
+ while (pevent->handlers) {
+ handle = pevent->handlers;
+ pevent->handlers = handle->next;
+ free_handler(handle);
+ }
+
+ free(pevent->events);
+ free(pevent->sort_events);
+
+ free(pevent);
+}
+
+void pevent_unref(struct pevent *pevent)
+{
+ pevent_free(pevent);
+}
diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h
new file mode 100644
index 00000000000..7a3873ff9a4
--- /dev/null
+++ b/tools/lib/traceevent/event-parse.h
@@ -0,0 +1,944 @@
+/*
+ * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License (not later!)
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+#ifndef _PARSE_EVENTS_H
+#define _PARSE_EVENTS_H
+
+#include <stdbool.h>
+#include <stdarg.h>
+#include <regex.h>
+#include <string.h>
+
+#ifndef __maybe_unused
+#define __maybe_unused __attribute__((unused))
+#endif
+
+/* ----------------------- trace_seq ----------------------- */
+
+
+#ifndef TRACE_SEQ_BUF_SIZE
+#define TRACE_SEQ_BUF_SIZE 4096
+#endif
+
+#ifndef DEBUG_RECORD
+#define DEBUG_RECORD 0
+#endif
+
+struct pevent_record {
+ unsigned long long ts;
+ unsigned long long offset;
+ long long missed_events; /* buffer dropped events before */
+ int record_size; /* size of binary record */
+ int size; /* size of data */
+ void *data;
+ int cpu;
+ int ref_count;
+ int locked; /* Do not free, even if ref_count is zero */
+ void *priv;
+#if DEBUG_RECORD
+ struct pevent_record *prev;
+ struct pevent_record *next;
+ long alloc_addr;
+#endif
+};
+
+enum trace_seq_fail {
+ TRACE_SEQ__GOOD,
+ TRACE_SEQ__BUFFER_POISONED,
+ TRACE_SEQ__MEM_ALLOC_FAILED,
+};
+
+/*
+ * Trace sequences are used to allow a function to call several other functions
+ * to create a string of data to use (up to a max of PAGE_SIZE).
+ */
+
+struct trace_seq {
+ char *buffer;
+ unsigned int buffer_size;
+ unsigned int len;
+ unsigned int readpos;
+ enum trace_seq_fail state;
+};
+
+void trace_seq_init(struct trace_seq *s);
+void trace_seq_reset(struct trace_seq *s);
+void trace_seq_destroy(struct trace_seq *s);
+
+extern int trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
+ __attribute__ ((format (printf, 2, 3)));
+extern int trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args)
+ __attribute__ ((format (printf, 2, 0)));
+
+extern int trace_seq_puts(struct trace_seq *s, const char *str);
+extern int trace_seq_putc(struct trace_seq *s, unsigned char c);
+
+extern void trace_seq_terminate(struct trace_seq *s);
+
+extern int trace_seq_do_printf(struct trace_seq *s);
+
+
+/* ----------------------- pevent ----------------------- */
+
+struct pevent;
+struct event_format;
+
+typedef int (*pevent_event_handler_func)(struct trace_seq *s,
+ struct pevent_record *record,
+ struct event_format *event,
+ void *context);
+
+typedef int (*pevent_plugin_load_func)(struct pevent *pevent);
+typedef int (*pevent_plugin_unload_func)(struct pevent *pevent);
+
+struct pevent_plugin_option {
+ struct pevent_plugin_option *next;
+ void *handle;
+ char *file;
+ char *name;
+ char *plugin_alias;
+ char *description;
+ char *value;
+ void *priv;
+ int set;
+};
+
+/*
+ * Plugin hooks that can be called:
+ *
+ * PEVENT_PLUGIN_LOADER: (required)
+ * The function name to initialized the plugin.
+ *
+ * int PEVENT_PLUGIN_LOADER(struct pevent *pevent)
+ *
+ * PEVENT_PLUGIN_UNLOADER: (optional)
+ * The function called just before unloading
+ *
+ * int PEVENT_PLUGIN_UNLOADER(struct pevent *pevent)
+ *
+ * PEVENT_PLUGIN_OPTIONS: (optional)
+ * Plugin options that can be set before loading
+ *
+ * struct pevent_plugin_option PEVENT_PLUGIN_OPTIONS[] = {
+ * {
+ * .name = "option-name",
+ * .plugin_alias = "overide-file-name", (optional)
+ * .description = "description of option to show users",
+ * },
+ * {
+ * .name = NULL,
+ * },
+ * };
+ *
+ * Array must end with .name = NULL;
+ *
+ *
+ * .plugin_alias is used to give a shorter name to access
+ * the vairable. Useful if a plugin handles more than one event.
+ *
+ * PEVENT_PLUGIN_ALIAS: (optional)
+ * The name to use for finding options (uses filename if not defined)
+ */
+#define PEVENT_PLUGIN_LOADER pevent_plugin_loader
+#define PEVENT_PLUGIN_UNLOADER pevent_plugin_unloader
+#define PEVENT_PLUGIN_OPTIONS pevent_plugin_options
+#define PEVENT_PLUGIN_ALIAS pevent_plugin_alias
+#define _MAKE_STR(x) #x
+#define MAKE_STR(x) _MAKE_STR(x)
+#define PEVENT_PLUGIN_LOADER_NAME MAKE_STR(PEVENT_PLUGIN_LOADER)
+#define PEVENT_PLUGIN_UNLOADER_NAME MAKE_STR(PEVENT_PLUGIN_UNLOADER)
+#define PEVENT_PLUGIN_OPTIONS_NAME MAKE_STR(PEVENT_PLUGIN_OPTIONS)
+#define PEVENT_PLUGIN_ALIAS_NAME MAKE_STR(PEVENT_PLUGIN_ALIAS)
+
+#define NSECS_PER_SEC 1000000000ULL
+#define NSECS_PER_USEC 1000ULL
+
+enum format_flags {
+ FIELD_IS_ARRAY = 1,
+ FIELD_IS_POINTER = 2,
+ FIELD_IS_SIGNED = 4,
+ FIELD_IS_STRING = 8,
+ FIELD_IS_DYNAMIC = 16,
+ FIELD_IS_LONG = 32,
+ FIELD_IS_FLAG = 64,
+ FIELD_IS_SYMBOLIC = 128,
+};
+
+struct format_field {
+ struct format_field *next;
+ struct event_format *event;
+ char *type;
+ char *name;
+ int offset;
+ int size;
+ unsigned int arraylen;
+ unsigned int elementsize;
+ unsigned long flags;
+};
+
+struct format {
+ int nr_common;
+ int nr_fields;
+ struct format_field *common_fields;
+ struct format_field *fields;
+};
+
+struct print_arg_atom {
+ char *atom;
+};
+
+struct print_arg_string {
+ char *string;
+ int offset;
+};
+
+struct print_arg_bitmask {
+ char *bitmask;
+ int offset;
+};
+
+struct print_arg_field {
+ char *name;
+ struct format_field *field;
+};
+
+struct print_flag_sym {
+ struct print_flag_sym *next;
+ char *value;
+ char *str;
+};
+
+struct print_arg_typecast {
+ char *type;
+ struct print_arg *item;
+};
+
+struct print_arg_flags {
+ struct print_arg *field;
+ char *delim;
+ struct print_flag_sym *flags;
+};
+
+struct print_arg_symbol {
+ struct print_arg *field;
+ struct print_flag_sym *symbols;
+};
+
+struct print_arg_hex {
+ struct print_arg *field;
+ struct print_arg *size;
+};
+
+struct print_arg_dynarray {
+ struct format_field *field;
+ struct print_arg *index;
+};
+
+struct print_arg;
+
+struct print_arg_op {
+ char *op;
+ int prio;
+ struct print_arg *left;
+ struct print_arg *right;
+};
+
+struct pevent_function_handler;
+
+struct print_arg_func {
+ struct pevent_function_handler *func;
+ struct print_arg *args;
+};
+
+enum print_arg_type {
+ PRINT_NULL,
+ PRINT_ATOM,
+ PRINT_FIELD,
+ PRINT_FLAGS,
+ PRINT_SYMBOL,
+ PRINT_HEX,
+ PRINT_TYPE,
+ PRINT_STRING,
+ PRINT_BSTRING,
+ PRINT_DYNAMIC_ARRAY,
+ PRINT_OP,
+ PRINT_FUNC,
+ PRINT_BITMASK,
+};
+
+struct print_arg {
+ struct print_arg *next;
+ enum print_arg_type type;
+ union {
+ struct print_arg_atom atom;
+ struct print_arg_field field;
+ struct print_arg_typecast typecast;
+ struct print_arg_flags flags;
+ struct print_arg_symbol symbol;
+ struct print_arg_hex hex;
+ struct print_arg_func func;
+ struct print_arg_string string;
+ struct print_arg_bitmask bitmask;
+ struct print_arg_op op;
+ struct print_arg_dynarray dynarray;
+ };
+};
+
+struct print_fmt {
+ char *format;
+ struct print_arg *args;
+};
+
+struct event_format {
+ struct pevent *pevent;
+ char *name;
+ int id;
+ int flags;
+ struct format format;
+ struct print_fmt print_fmt;
+ char *system;
+ pevent_event_handler_func handler;
+ void *context;
+};
+
+enum {
+ EVENT_FL_ISFTRACE = 0x01,
+ EVENT_FL_ISPRINT = 0x02,
+ EVENT_FL_ISBPRINT = 0x04,
+ EVENT_FL_ISFUNCENT = 0x10,
+ EVENT_FL_ISFUNCRET = 0x20,
+ EVENT_FL_NOHANDLE = 0x40,
+ EVENT_FL_PRINTRAW = 0x80,
+
+ EVENT_FL_FAILED = 0x80000000
+};
+
+enum event_sort_type {
+ EVENT_SORT_ID,
+ EVENT_SORT_NAME,
+ EVENT_SORT_SYSTEM,
+};
+
+enum event_type {
+ EVENT_ERROR,
+ EVENT_NONE,
+ EVENT_SPACE,
+ EVENT_NEWLINE,
+ EVENT_OP,
+ EVENT_DELIM,
+ EVENT_ITEM,
+ EVENT_DQUOTE,
+ EVENT_SQUOTE,
+};
+
+typedef unsigned long long (*pevent_func_handler)(struct trace_seq *s,
+ unsigned long long *args);
+
+enum pevent_func_arg_type {
+ PEVENT_FUNC_ARG_VOID,
+ PEVENT_FUNC_ARG_INT,
+ PEVENT_FUNC_ARG_LONG,
+ PEVENT_FUNC_ARG_STRING,
+ PEVENT_FUNC_ARG_PTR,
+ PEVENT_FUNC_ARG_MAX_TYPES
+};
+
+enum pevent_flag {
+ PEVENT_NSEC_OUTPUT = 1, /* output in NSECS */
+ PEVENT_DISABLE_SYS_PLUGINS = 1 << 1,
+ PEVENT_DISABLE_PLUGINS = 1 << 2,
+};
+
+#define PEVENT_ERRORS \
+ _PE(MEM_ALLOC_FAILED, "failed to allocate memory"), \
+ _PE(PARSE_EVENT_FAILED, "failed to parse event"), \
+ _PE(READ_ID_FAILED, "failed to read event id"), \
+ _PE(READ_FORMAT_FAILED, "failed to read event format"), \
+ _PE(READ_PRINT_FAILED, "failed to read event print fmt"), \
+ _PE(OLD_FTRACE_ARG_FAILED,"failed to allocate field name for ftrace"),\
+ _PE(INVALID_ARG_TYPE, "invalid argument type"), \
+ _PE(INVALID_EXP_TYPE, "invalid expression type"), \
+ _PE(INVALID_OP_TYPE, "invalid operator type"), \
+ _PE(INVALID_EVENT_NAME, "invalid event name"), \
+ _PE(EVENT_NOT_FOUND, "no event found"), \
+ _PE(SYNTAX_ERROR, "syntax error"), \
+ _PE(ILLEGAL_RVALUE, "illegal rvalue"), \
+ _PE(ILLEGAL_LVALUE, "illegal lvalue for string comparison"), \
+ _PE(INVALID_REGEX, "regex did not compute"), \
+ _PE(ILLEGAL_STRING_CMP, "illegal comparison for string"), \
+ _PE(ILLEGAL_INTEGER_CMP,"illegal comparison for integer"), \
+ _PE(REPARENT_NOT_OP, "cannot reparent other than OP"), \
+ _PE(REPARENT_FAILED, "failed to reparent filter OP"), \
+ _PE(BAD_FILTER_ARG, "bad arg in filter tree"), \
+ _PE(UNEXPECTED_TYPE, "unexpected type (not a value)"), \
+ _PE(ILLEGAL_TOKEN, "illegal token"), \
+ _PE(INVALID_PAREN, "open parenthesis cannot come here"), \
+ _PE(UNBALANCED_PAREN, "unbalanced number of parenthesis"), \
+ _PE(UNKNOWN_TOKEN, "unknown token"), \
+ _PE(FILTER_NOT_FOUND, "no filter found"), \
+ _PE(NOT_A_NUMBER, "must have number field"), \
+ _PE(NO_FILTER, "no filters exists"), \
+ _PE(FILTER_MISS, "record does not match to filter")
+
+#undef _PE
+#define _PE(__code, __str) PEVENT_ERRNO__ ## __code
+enum pevent_errno {
+ PEVENT_ERRNO__SUCCESS = 0,
+ PEVENT_ERRNO__FILTER_MATCH = PEVENT_ERRNO__SUCCESS,
+
+ /*
+ * Choose an arbitrary negative big number not to clash with standard
+ * errno since SUS requires the errno has distinct positive values.
+ * See 'Issue 6' in the link below.
+ *
+ * http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/errno.h.html
+ */
+ __PEVENT_ERRNO__START = -100000,
+
+ PEVENT_ERRORS,
+
+ __PEVENT_ERRNO__END,
+};
+#undef _PE
+
+struct plugin_list;
+
+#define INVALID_PLUGIN_LIST_OPTION ((char **)((unsigned long)-1))
+
+struct plugin_list *traceevent_load_plugins(struct pevent *pevent);
+void traceevent_unload_plugins(struct plugin_list *plugin_list,
+ struct pevent *pevent);
+char **traceevent_plugin_list_options(void);
+void traceevent_plugin_free_options_list(char **list);
+int traceevent_plugin_add_options(const char *name,
+ struct pevent_plugin_option *options);
+void traceevent_plugin_remove_options(struct pevent_plugin_option *options);
+void traceevent_print_plugins(struct trace_seq *s,
+ const char *prefix, const char *suffix,
+ const struct plugin_list *list);
+
+struct cmdline;
+struct cmdline_list;
+struct func_map;
+struct func_list;
+struct event_handler;
+
+struct pevent {
+ int ref_count;
+
+ int header_page_ts_offset;
+ int header_page_ts_size;
+ int header_page_size_offset;
+ int header_page_size_size;
+ int header_page_data_offset;
+ int header_page_data_size;
+ int header_page_overwrite;
+
+ int file_bigendian;
+ int host_bigendian;
+
+ int latency_format;
+
+ int old_format;
+
+ int cpus;
+ int long_size;
+ int page_size;
+
+ struct cmdline *cmdlines;
+ struct cmdline_list *cmdlist;
+ int cmdline_count;
+
+ struct func_map *func_map;
+ struct func_list *funclist;
+ unsigned int func_count;
+
+ struct printk_map *printk_map;
+ struct printk_list *printklist;
+ unsigned int printk_count;
+
+
+ struct event_format **events;
+ int nr_events;
+ struct event_format **sort_events;
+ enum event_sort_type last_type;
+
+ int type_offset;
+ int type_size;
+
+ int pid_offset;
+ int pid_size;
+
+ int pc_offset;
+ int pc_size;
+
+ int flags_offset;
+ int flags_size;
+
+ int ld_offset;
+ int ld_size;
+
+ int print_raw;
+
+ int test_filters;
+
+ int flags;
+
+ struct format_field *bprint_ip_field;
+ struct format_field *bprint_fmt_field;
+ struct format_field *bprint_buf_field;
+
+ struct event_handler *handlers;
+ struct pevent_function_handler *func_handlers;
+
+ /* cache */
+ struct event_format *last_event;
+
+ char *trace_clock;
+};
+
+static inline void pevent_set_flag(struct pevent *pevent, int flag)
+{
+ pevent->flags |= flag;
+}
+
+static inline unsigned short
+__data2host2(struct pevent *pevent, unsigned short data)
+{
+ unsigned short swap;
+
+ if (pevent->host_bigendian == pevent->file_bigendian)
+ return data;
+
+ swap = ((data & 0xffULL) << 8) |
+ ((data & (0xffULL << 8)) >> 8);
+
+ return swap;
+}
+
+static inline unsigned int
+__data2host4(struct pevent *pevent, unsigned int data)
+{
+ unsigned int swap;
+
+ if (pevent->host_bigendian == pevent->file_bigendian)
+ return data;
+
+ swap = ((data & 0xffULL) << 24) |
+ ((data & (0xffULL << 8)) << 8) |
+ ((data & (0xffULL << 16)) >> 8) |
+ ((data & (0xffULL << 24)) >> 24);
+
+ return swap;
+}
+
+static inline unsigned long long
+__data2host8(struct pevent *pevent, unsigned long long data)
+{
+ unsigned long long swap;
+
+ if (pevent->host_bigendian == pevent->file_bigendian)
+ return data;
+
+ swap = ((data & 0xffULL) << 56) |
+ ((data & (0xffULL << 8)) << 40) |
+ ((data & (0xffULL << 16)) << 24) |
+ ((data & (0xffULL << 24)) << 8) |
+ ((data & (0xffULL << 32)) >> 8) |
+ ((data & (0xffULL << 40)) >> 24) |
+ ((data & (0xffULL << 48)) >> 40) |
+ ((data & (0xffULL << 56)) >> 56);
+
+ return swap;
+}
+
+#define data2host2(pevent, ptr) __data2host2(pevent, *(unsigned short *)(ptr))
+#define data2host4(pevent, ptr) __data2host4(pevent, *(unsigned int *)(ptr))
+#define data2host8(pevent, ptr) \
+({ \
+ unsigned long long __val; \
+ \
+ memcpy(&__val, (ptr), sizeof(unsigned long long)); \
+ __data2host8(pevent, __val); \
+})
+
+static inline int traceevent_host_bigendian(void)
+{
+ unsigned char str[] = { 0x1, 0x2, 0x3, 0x4 };
+ unsigned int val;
+
+ memcpy(&val, str, 4);
+ return val == 0x01020304;
+}
+
+/* taken from kernel/trace/trace.h */
+enum trace_flag_type {
+ TRACE_FLAG_IRQS_OFF = 0x01,
+ TRACE_FLAG_IRQS_NOSUPPORT = 0x02,
+ TRACE_FLAG_NEED_RESCHED = 0x04,
+ TRACE_FLAG_HARDIRQ = 0x08,
+ TRACE_FLAG_SOFTIRQ = 0x10,
+};
+
+int pevent_register_comm(struct pevent *pevent, const char *comm, int pid);
+void pevent_register_trace_clock(struct pevent *pevent, char *trace_clock);
+int pevent_register_function(struct pevent *pevent, char *name,
+ unsigned long long addr, char *mod);
+int pevent_register_print_string(struct pevent *pevent, const char *fmt,
+ unsigned long long addr);
+int pevent_pid_is_registered(struct pevent *pevent, int pid);
+
+void pevent_print_event(struct pevent *pevent, struct trace_seq *s,
+ struct pevent_record *record, bool use_trace_clock);
+
+int pevent_parse_header_page(struct pevent *pevent, char *buf, unsigned long size,
+ int long_size);
+
+enum pevent_errno pevent_parse_event(struct pevent *pevent, const char *buf,
+ unsigned long size, const char *sys);
+enum pevent_errno pevent_parse_format(struct pevent *pevent,
+ struct event_format **eventp,
+ const char *buf,
+ unsigned long size, const char *sys);
+void pevent_free_format(struct event_format *event);
+
+void *pevent_get_field_raw(struct trace_seq *s, struct event_format *event,
+ const char *name, struct pevent_record *record,
+ int *len, int err);
+
+int pevent_get_field_val(struct trace_seq *s, struct event_format *event,
+ const char *name, struct pevent_record *record,
+ unsigned long long *val, int err);
+int pevent_get_common_field_val(struct trace_seq *s, struct event_format *event,
+ const char *name, struct pevent_record *record,
+ unsigned long long *val, int err);
+int pevent_get_any_field_val(struct trace_seq *s, struct event_format *event,
+ const char *name, struct pevent_record *record,
+ unsigned long long *val, int err);
+
+int pevent_print_num_field(struct trace_seq *s, const char *fmt,
+ struct event_format *event, const char *name,
+ struct pevent_record *record, int err);
+
+int pevent_print_func_field(struct trace_seq *s, const char *fmt,
+ struct event_format *event, const char *name,
+ struct pevent_record *record, int err);
+
+int pevent_register_event_handler(struct pevent *pevent, int id,
+ const char *sys_name, const char *event_name,
+ pevent_event_handler_func func, void *context);
+int pevent_unregister_event_handler(struct pevent *pevent, int id,
+ const char *sys_name, const char *event_name,
+ pevent_event_handler_func func, void *context);
+int pevent_register_print_function(struct pevent *pevent,
+ pevent_func_handler func,
+ enum pevent_func_arg_type ret_type,
+ char *name, ...);
+int pevent_unregister_print_function(struct pevent *pevent,
+ pevent_func_handler func, char *name);
+
+struct format_field *pevent_find_common_field(struct event_format *event, const char *name);
+struct format_field *pevent_find_field(struct event_format *event, const char *name);
+struct format_field *pevent_find_any_field(struct event_format *event, const char *name);
+
+const char *pevent_find_function(struct pevent *pevent, unsigned long long addr);
+unsigned long long
+pevent_find_function_address(struct pevent *pevent, unsigned long long addr);
+unsigned long long pevent_read_number(struct pevent *pevent, const void *ptr, int size);
+int pevent_read_number_field(struct format_field *field, const void *data,
+ unsigned long long *value);
+
+struct event_format *pevent_find_event(struct pevent *pevent, int id);
+
+struct event_format *
+pevent_find_event_by_name(struct pevent *pevent, const char *sys, const char *name);
+
+void pevent_data_lat_fmt(struct pevent *pevent,
+ struct trace_seq *s, struct pevent_record *record);
+int pevent_data_type(struct pevent *pevent, struct pevent_record *rec);
+struct event_format *pevent_data_event_from_type(struct pevent *pevent, int type);
+int pevent_data_pid(struct pevent *pevent, struct pevent_record *rec);
+const char *pevent_data_comm_from_pid(struct pevent *pevent, int pid);
+void pevent_event_info(struct trace_seq *s, struct event_format *event,
+ struct pevent_record *record);
+int pevent_strerror(struct pevent *pevent, enum pevent_errno errnum,
+ char *buf, size_t buflen);
+
+struct event_format **pevent_list_events(struct pevent *pevent, enum event_sort_type);
+struct format_field **pevent_event_common_fields(struct event_format *event);
+struct format_field **pevent_event_fields(struct event_format *event);
+
+static inline int pevent_get_cpus(struct pevent *pevent)
+{
+ return pevent->cpus;
+}
+
+static inline void pevent_set_cpus(struct pevent *pevent, int cpus)
+{
+ pevent->cpus = cpus;
+}
+
+static inline int pevent_get_long_size(struct pevent *pevent)
+{
+ return pevent->long_size;
+}
+
+static inline void pevent_set_long_size(struct pevent *pevent, int long_size)
+{
+ pevent->long_size = long_size;
+}
+
+static inline int pevent_get_page_size(struct pevent *pevent)
+{
+ return pevent->page_size;
+}
+
+static inline void pevent_set_page_size(struct pevent *pevent, int _page_size)
+{
+ pevent->page_size = _page_size;
+}
+
+static inline int pevent_is_file_bigendian(struct pevent *pevent)
+{
+ return pevent->file_bigendian;
+}
+
+static inline void pevent_set_file_bigendian(struct pevent *pevent, int endian)
+{
+ pevent->file_bigendian = endian;
+}
+
+static inline int pevent_is_host_bigendian(struct pevent *pevent)
+{
+ return pevent->host_bigendian;
+}
+
+static inline void pevent_set_host_bigendian(struct pevent *pevent, int endian)
+{
+ pevent->host_bigendian = endian;
+}
+
+static inline int pevent_is_latency_format(struct pevent *pevent)
+{
+ return pevent->latency_format;
+}
+
+static inline void pevent_set_latency_format(struct pevent *pevent, int lat)
+{
+ pevent->latency_format = lat;
+}
+
+struct pevent *pevent_alloc(void);
+void pevent_free(struct pevent *pevent);
+void pevent_ref(struct pevent *pevent);
+void pevent_unref(struct pevent *pevent);
+
+/* access to the internal parser */
+void pevent_buffer_init(const char *buf, unsigned long long size);
+enum event_type pevent_read_token(char **tok);
+void pevent_free_token(char *token);
+int pevent_peek_char(void);
+const char *pevent_get_input_buf(void);
+unsigned long long pevent_get_input_buf_ptr(void);
+
+/* for debugging */
+void pevent_print_funcs(struct pevent *pevent);
+void pevent_print_printk(struct pevent *pevent);
+
+/* ----------------------- filtering ----------------------- */
+
+enum filter_boolean_type {
+ FILTER_FALSE,
+ FILTER_TRUE,
+};
+
+enum filter_op_type {
+ FILTER_OP_AND = 1,
+ FILTER_OP_OR,
+ FILTER_OP_NOT,
+};
+
+enum filter_cmp_type {
+ FILTER_CMP_NONE,
+ FILTER_CMP_EQ,
+ FILTER_CMP_NE,
+ FILTER_CMP_GT,
+ FILTER_CMP_LT,
+ FILTER_CMP_GE,
+ FILTER_CMP_LE,
+ FILTER_CMP_MATCH,
+ FILTER_CMP_NOT_MATCH,
+ FILTER_CMP_REGEX,
+ FILTER_CMP_NOT_REGEX,
+};
+
+enum filter_exp_type {
+ FILTER_EXP_NONE,
+ FILTER_EXP_ADD,
+ FILTER_EXP_SUB,
+ FILTER_EXP_MUL,
+ FILTER_EXP_DIV,
+ FILTER_EXP_MOD,
+ FILTER_EXP_RSHIFT,
+ FILTER_EXP_LSHIFT,
+ FILTER_EXP_AND,
+ FILTER_EXP_OR,
+ FILTER_EXP_XOR,
+ FILTER_EXP_NOT,
+};
+
+enum filter_arg_type {
+ FILTER_ARG_NONE,
+ FILTER_ARG_BOOLEAN,
+ FILTER_ARG_VALUE,
+ FILTER_ARG_FIELD,
+ FILTER_ARG_EXP,
+ FILTER_ARG_OP,
+ FILTER_ARG_NUM,
+ FILTER_ARG_STR,
+};
+
+enum filter_value_type {
+ FILTER_NUMBER,
+ FILTER_STRING,
+ FILTER_CHAR
+};
+
+struct fliter_arg;
+
+struct filter_arg_boolean {
+ enum filter_boolean_type value;
+};
+
+struct filter_arg_field {
+ struct format_field *field;
+};
+
+struct filter_arg_value {
+ enum filter_value_type type;
+ union {
+ char *str;
+ unsigned long long val;
+ };
+};
+
+struct filter_arg_op {
+ enum filter_op_type type;
+ struct filter_arg *left;
+ struct filter_arg *right;
+};
+
+struct filter_arg_exp {
+ enum filter_exp_type type;
+ struct filter_arg *left;
+ struct filter_arg *right;
+};
+
+struct filter_arg_num {
+ enum filter_cmp_type type;
+ struct filter_arg *left;
+ struct filter_arg *right;
+};
+
+struct filter_arg_str {
+ enum filter_cmp_type type;
+ struct format_field *field;
+ char *val;
+ char *buffer;
+ regex_t reg;
+};
+
+struct filter_arg {
+ enum filter_arg_type type;
+ union {
+ struct filter_arg_boolean boolean;
+ struct filter_arg_field field;
+ struct filter_arg_value value;
+ struct filter_arg_op op;
+ struct filter_arg_exp exp;
+ struct filter_arg_num num;
+ struct filter_arg_str str;
+ };
+};
+
+struct filter_type {
+ int event_id;
+ struct event_format *event;
+ struct filter_arg *filter;
+};
+
+#define PEVENT_FILTER_ERROR_BUFSZ 1024
+
+struct event_filter {
+ struct pevent *pevent;
+ int filters;
+ struct filter_type *event_filters;
+ char error_buffer[PEVENT_FILTER_ERROR_BUFSZ];
+};
+
+struct event_filter *pevent_filter_alloc(struct pevent *pevent);
+
+/* for backward compatibility */
+#define FILTER_NONE PEVENT_ERRNO__NO_FILTER
+#define FILTER_NOEXIST PEVENT_ERRNO__FILTER_NOT_FOUND
+#define FILTER_MISS PEVENT_ERRNO__FILTER_MISS
+#define FILTER_MATCH PEVENT_ERRNO__FILTER_MATCH
+
+enum filter_trivial_type {
+ FILTER_TRIVIAL_FALSE,
+ FILTER_TRIVIAL_TRUE,
+ FILTER_TRIVIAL_BOTH,
+};
+
+enum pevent_errno pevent_filter_add_filter_str(struct event_filter *filter,
+ const char *filter_str);
+
+enum pevent_errno pevent_filter_match(struct event_filter *filter,
+ struct pevent_record *record);
+
+int pevent_filter_strerror(struct event_filter *filter, enum pevent_errno err,
+ char *buf, size_t buflen);
+
+int pevent_event_filtered(struct event_filter *filter,
+ int event_id);
+
+void pevent_filter_reset(struct event_filter *filter);
+
+int pevent_filter_clear_trivial(struct event_filter *filter,
+ enum filter_trivial_type type);
+
+void pevent_filter_free(struct event_filter *filter);
+
+char *pevent_filter_make_string(struct event_filter *filter, int event_id);
+
+int pevent_filter_remove_event(struct event_filter *filter,
+ int event_id);
+
+int pevent_filter_event_has_trivial(struct event_filter *filter,
+ int event_id,
+ enum filter_trivial_type type);
+
+int pevent_filter_copy(struct event_filter *dest, struct event_filter *source);
+
+int pevent_update_trivial(struct event_filter *dest, struct event_filter *source,
+ enum filter_trivial_type type);
+
+int pevent_filter_compare(struct event_filter *filter1, struct event_filter *filter2);
+
+#endif /* _PARSE_EVENTS_H */
diff --git a/tools/lib/traceevent/event-plugin.c b/tools/lib/traceevent/event-plugin.c
new file mode 100644
index 00000000000..136162c03af
--- /dev/null
+++ b/tools/lib/traceevent/event-plugin.c
@@ -0,0 +1,416 @@
+/*
+ * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License (not later!)
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <dlfcn.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <dirent.h>
+#include "event-parse.h"
+#include "event-utils.h"
+
+#define LOCAL_PLUGIN_DIR ".traceevent/plugins"
+
+static struct registered_plugin_options {
+ struct registered_plugin_options *next;
+ struct pevent_plugin_option *options;
+} *registered_options;
+
+static struct trace_plugin_options {
+ struct trace_plugin_options *next;
+ char *plugin;
+ char *option;
+ char *value;
+} *trace_plugin_options;
+
+struct plugin_list {
+ struct plugin_list *next;
+ char *name;
+ void *handle;
+};
+
+/**
+ * traceevent_plugin_list_options - get list of plugin options
+ *
+ * Returns an array of char strings that list the currently registered
+ * plugin options in the format of <plugin>:<option>. This list can be
+ * used by toggling the option.
+ *
+ * Returns NULL if there's no options registered. On error it returns
+ * INVALID_PLUGIN_LIST_OPTION
+ *
+ * Must be freed with traceevent_plugin_free_options_list().
+ */
+char **traceevent_plugin_list_options(void)
+{
+ struct registered_plugin_options *reg;
+ struct pevent_plugin_option *op;
+ char **list = NULL;
+ char *name;
+ int count = 0;
+
+ for (reg = registered_options; reg; reg = reg->next) {
+ for (op = reg->options; op->name; op++) {
+ char *alias = op->plugin_alias ? op->plugin_alias : op->file;
+ char **temp = list;
+
+ name = malloc(strlen(op->name) + strlen(alias) + 2);
+ if (!name)
+ goto err;
+
+ sprintf(name, "%s:%s", alias, op->name);
+ list = realloc(list, count + 2);
+ if (!list) {
+ list = temp;
+ free(name);
+ goto err;
+ }
+ list[count++] = name;
+ list[count] = NULL;
+ }
+ }
+ return list;
+
+ err:
+ while (--count >= 0)
+ free(list[count]);
+ free(list);
+
+ return INVALID_PLUGIN_LIST_OPTION;
+}
+
+void traceevent_plugin_free_options_list(char **list)
+{
+ int i;
+
+ if (!list)
+ return;
+
+ if (list == INVALID_PLUGIN_LIST_OPTION)
+ return;
+
+ for (i = 0; list[i]; i++)
+ free(list[i]);
+
+ free(list);
+}
+
+static int
+update_option(const char *file, struct pevent_plugin_option *option)
+{
+ struct trace_plugin_options *op;
+ char *plugin;
+
+ if (option->plugin_alias) {
+ plugin = strdup(option->plugin_alias);
+ if (!plugin)
+ return -1;
+ } else {
+ char *p;
+ plugin = strdup(file);
+ if (!plugin)
+ return -1;
+ p = strstr(plugin, ".");
+ if (p)
+ *p = '\0';
+ }
+
+ /* first look for named options */
+ for (op = trace_plugin_options; op; op = op->next) {
+ if (!op->plugin)
+ continue;
+ if (strcmp(op->plugin, plugin) != 0)
+ continue;
+ if (strcmp(op->option, option->name) != 0)
+ continue;
+
+ option->value = op->value;
+ option->set ^= 1;
+ goto out;
+ }
+
+ /* first look for unnamed options */
+ for (op = trace_plugin_options; op; op = op->next) {
+ if (op->plugin)
+ continue;
+ if (strcmp(op->option, option->name) != 0)
+ continue;
+
+ option->value = op->value;
+ option->set ^= 1;
+ break;
+ }
+
+ out:
+ free(plugin);
+ return 0;
+}
+
+/**
+ * traceevent_plugin_add_options - Add a set of options by a plugin
+ * @name: The name of the plugin adding the options
+ * @options: The set of options being loaded
+ *
+ * Sets the options with the values that have been added by user.
+ */
+int traceevent_plugin_add_options(const char *name,
+ struct pevent_plugin_option *options)
+{
+ struct registered_plugin_options *reg;
+
+ reg = malloc(sizeof(*reg));
+ if (!reg)
+ return -1;
+ reg->next = registered_options;
+ reg->options = options;
+ registered_options = reg;
+
+ while (options->name) {
+ update_option(name, options);
+ options++;
+ }
+ return 0;
+}
+
+/**
+ * traceevent_plugin_remove_options - remove plugin options that were registered
+ * @options: Options to removed that were registered with traceevent_plugin_add_options
+ */
+void traceevent_plugin_remove_options(struct pevent_plugin_option *options)
+{
+ struct registered_plugin_options **last;
+ struct registered_plugin_options *reg;
+
+ for (last = &registered_options; *last; last = &(*last)->next) {
+ if ((*last)->options == options) {
+ reg = *last;
+ *last = reg->next;
+ free(reg);
+ return;
+ }
+ }
+}
+
+/**
+ * traceevent_print_plugins - print out the list of plugins loaded
+ * @s: the trace_seq descripter to write to
+ * @prefix: The prefix string to add before listing the option name
+ * @suffix: The suffix string ot append after the option name
+ * @list: The list of plugins (usually returned by traceevent_load_plugins()
+ *
+ * Writes to the trace_seq @s the list of plugins (files) that is
+ * returned by traceevent_load_plugins(). Use @prefix and @suffix for formating:
+ * @prefix = " ", @suffix = "\n".
+ */
+void traceevent_print_plugins(struct trace_seq *s,
+ const char *prefix, const char *suffix,
+ const struct plugin_list *list)
+{
+ while (list) {
+ trace_seq_printf(s, "%s%s%s", prefix, list->name, suffix);
+ list = list->next;
+ }
+}
+
+static void
+load_plugin(struct pevent *pevent, const char *path,
+ const char *file, void *data)
+{
+ struct plugin_list **plugin_list = data;
+ pevent_plugin_load_func func;
+ struct plugin_list *list;
+ const char *alias;
+ char *plugin;
+ void *handle;
+
+ plugin = malloc(strlen(path) + strlen(file) + 2);
+ if (!plugin) {
+ warning("could not allocate plugin memory\n");
+ return;
+ }
+
+ strcpy(plugin, path);
+ strcat(plugin, "/");
+ strcat(plugin, file);
+
+ handle = dlopen(plugin, RTLD_NOW | RTLD_GLOBAL);
+ if (!handle) {
+ warning("could not load plugin '%s'\n%s\n",
+ plugin, dlerror());
+ goto out_free;
+ }
+
+ alias = dlsym(handle, PEVENT_PLUGIN_ALIAS_NAME);
+ if (!alias)
+ alias = file;
+
+ func = dlsym(handle, PEVENT_PLUGIN_LOADER_NAME);
+ if (!func) {
+ warning("could not find func '%s' in plugin '%s'\n%s\n",
+ PEVENT_PLUGIN_LOADER_NAME, plugin, dlerror());
+ goto out_free;
+ }
+
+ list = malloc(sizeof(*list));
+ if (!list) {
+ warning("could not allocate plugin memory\n");
+ goto out_free;
+ }
+
+ list->next = *plugin_list;
+ list->handle = handle;
+ list->name = plugin;
+ *plugin_list = list;
+
+ pr_stat("registering plugin: %s", plugin);
+ func(pevent);
+ return;
+
+ out_free:
+ free(plugin);
+}
+
+static void
+load_plugins_dir(struct pevent *pevent, const char *suffix,
+ const char *path,
+ void (*load_plugin)(struct pevent *pevent,
+ const char *path,
+ const char *name,
+ void *data),
+ void *data)
+{
+ struct dirent *dent;
+ struct stat st;
+ DIR *dir;
+ int ret;
+
+ ret = stat(path, &st);
+ if (ret < 0)
+ return;
+
+ if (!S_ISDIR(st.st_mode))
+ return;
+
+ dir = opendir(path);
+ if (!dir)
+ return;
+
+ while ((dent = readdir(dir))) {
+ const char *name = dent->d_name;
+
+ if (strcmp(name, ".") == 0 ||
+ strcmp(name, "..") == 0)
+ continue;
+
+ /* Only load plugins that end in suffix */
+ if (strcmp(name + (strlen(name) - strlen(suffix)), suffix) != 0)
+ continue;
+
+ load_plugin(pevent, path, name, data);
+ }
+
+ closedir(dir);
+}
+
+static void
+load_plugins(struct pevent *pevent, const char *suffix,
+ void (*load_plugin)(struct pevent *pevent,
+ const char *path,
+ const char *name,
+ void *data),
+ void *data)
+{
+ char *home;
+ char *path;
+ char *envdir;
+
+ if (pevent->flags & PEVENT_DISABLE_PLUGINS)
+ return;
+
+ /*
+ * If a system plugin directory was defined,
+ * check that first.
+ */
+#ifdef PLUGIN_DIR
+ if (!(pevent->flags & PEVENT_DISABLE_SYS_PLUGINS))
+ load_plugins_dir(pevent, suffix, PLUGIN_DIR,
+ load_plugin, data);
+#endif
+
+ /*
+ * Next let the environment-set plugin directory
+ * override the system defaults.
+ */
+ envdir = getenv("TRACEEVENT_PLUGIN_DIR");
+ if (envdir)
+ load_plugins_dir(pevent, suffix, envdir, load_plugin, data);
+
+ /*
+ * Now let the home directory override the environment
+ * or system defaults.
+ */
+ home = getenv("HOME");
+ if (!home)
+ return;
+
+ path = malloc(strlen(home) + strlen(LOCAL_PLUGIN_DIR) + 2);
+ if (!path) {
+ warning("could not allocate plugin memory\n");
+ return;
+ }
+
+ strcpy(path, home);
+ strcat(path, "/");
+ strcat(path, LOCAL_PLUGIN_DIR);
+
+ load_plugins_dir(pevent, suffix, path, load_plugin, data);
+
+ free(path);
+}
+
+struct plugin_list*
+traceevent_load_plugins(struct pevent *pevent)
+{
+ struct plugin_list *list = NULL;
+
+ load_plugins(pevent, ".so", load_plugin, &list);
+ return list;
+}
+
+void
+traceevent_unload_plugins(struct plugin_list *plugin_list, struct pevent *pevent)
+{
+ pevent_plugin_unload_func func;
+ struct plugin_list *list;
+
+ while (plugin_list) {
+ list = plugin_list;
+ plugin_list = list->next;
+ func = dlsym(list->handle, PEVENT_PLUGIN_UNLOADER_NAME);
+ if (func)
+ func(pevent);
+ dlclose(list->handle);
+ free(list->name);
+ free(list);
+ }
+}
diff --git a/tools/lib/traceevent/event-utils.h b/tools/lib/traceevent/event-utils.h
new file mode 100644
index 00000000000..d1dc2170e40
--- /dev/null
+++ b/tools/lib/traceevent/event-utils.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (C) 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License (not later!)
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+#ifndef __UTIL_H
+#define __UTIL_H
+
+#include <ctype.h>
+
+/* Can be overridden */
+void warning(const char *fmt, ...);
+void pr_stat(const char *fmt, ...);
+void vpr_stat(const char *fmt, va_list ap);
+
+/* Always available */
+void __warning(const char *fmt, ...);
+void __pr_stat(const char *fmt, ...);
+
+void __vwarning(const char *fmt, ...);
+void __vpr_stat(const char *fmt, ...);
+
+#define min(x, y) ({ \
+ typeof(x) _min1 = (x); \
+ typeof(y) _min2 = (y); \
+ (void) (&_min1 == &_min2); \
+ _min1 < _min2 ? _min1 : _min2; })
+
+static inline char *strim(char *string)
+{
+ char *ret;
+
+ if (!string)
+ return NULL;
+ while (*string) {
+ if (!isspace(*string))
+ break;
+ string++;
+ }
+ ret = string;
+
+ string = ret + strlen(ret) - 1;
+ while (string > ret) {
+ if (!isspace(*string))
+ break;
+ string--;
+ }
+ string[1] = 0;
+
+ return ret;
+}
+
+static inline int has_text(const char *text)
+{
+ if (!text)
+ return 0;
+
+ while (*text) {
+ if (!isspace(*text))
+ return 1;
+ text++;
+ }
+
+ return 0;
+}
+
+#endif
diff --git a/tools/lib/traceevent/kbuffer-parse.c b/tools/lib/traceevent/kbuffer-parse.c
new file mode 100644
index 00000000000..dcc665228c7
--- /dev/null
+++ b/tools/lib/traceevent/kbuffer-parse.c
@@ -0,0 +1,732 @@
+/*
+ * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License (not later!)
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "kbuffer.h"
+
+#define MISSING_EVENTS (1 << 31)
+#define MISSING_STORED (1 << 30)
+
+#define COMMIT_MASK ((1 << 27) - 1)
+
+enum {
+ KBUFFER_FL_HOST_BIG_ENDIAN = (1<<0),
+ KBUFFER_FL_BIG_ENDIAN = (1<<1),
+ KBUFFER_FL_LONG_8 = (1<<2),
+ KBUFFER_FL_OLD_FORMAT = (1<<3),
+};
+
+#define ENDIAN_MASK (KBUFFER_FL_HOST_BIG_ENDIAN | KBUFFER_FL_BIG_ENDIAN)
+
+/** kbuffer
+ * @timestamp - timestamp of current event
+ * @lost_events - # of lost events between this subbuffer and previous
+ * @flags - special flags of the kbuffer
+ * @subbuffer - pointer to the sub-buffer page
+ * @data - pointer to the start of data on the sub-buffer page
+ * @index - index from @data to the @curr event data
+ * @curr - offset from @data to the start of current event
+ * (includes metadata)
+ * @next - offset from @data to the start of next event
+ * @size - The size of data on @data
+ * @start - The offset from @subbuffer where @data lives
+ *
+ * @read_4 - Function to read 4 raw bytes (may swap)
+ * @read_8 - Function to read 8 raw bytes (may swap)
+ * @read_long - Function to read a long word (4 or 8 bytes with needed swap)
+ */
+struct kbuffer {
+ unsigned long long timestamp;
+ long long lost_events;
+ unsigned long flags;
+ void *subbuffer;
+ void *data;
+ unsigned int index;
+ unsigned int curr;
+ unsigned int next;
+ unsigned int size;
+ unsigned int start;
+
+ unsigned int (*read_4)(void *ptr);
+ unsigned long long (*read_8)(void *ptr);
+ unsigned long long (*read_long)(struct kbuffer *kbuf, void *ptr);
+ int (*next_event)(struct kbuffer *kbuf);
+};
+
+static void *zmalloc(size_t size)
+{
+ return calloc(1, size);
+}
+
+static int host_is_bigendian(void)
+{
+ unsigned char str[] = { 0x1, 0x2, 0x3, 0x4 };
+ unsigned int *ptr;
+
+ ptr = (unsigned int *)str;
+ return *ptr == 0x01020304;
+}
+
+static int do_swap(struct kbuffer *kbuf)
+{
+ return ((kbuf->flags & KBUFFER_FL_HOST_BIG_ENDIAN) + kbuf->flags) &
+ ENDIAN_MASK;
+}
+
+static unsigned long long __read_8(void *ptr)
+{
+ unsigned long long data = *(unsigned long long *)ptr;
+
+ return data;
+}
+
+static unsigned long long __read_8_sw(void *ptr)
+{
+ unsigned long long data = *(unsigned long long *)ptr;
+ unsigned long long swap;
+
+ swap = ((data & 0xffULL) << 56) |
+ ((data & (0xffULL << 8)) << 40) |
+ ((data & (0xffULL << 16)) << 24) |
+ ((data & (0xffULL << 24)) << 8) |
+ ((data & (0xffULL << 32)) >> 8) |
+ ((data & (0xffULL << 40)) >> 24) |
+ ((data & (0xffULL << 48)) >> 40) |
+ ((data & (0xffULL << 56)) >> 56);
+
+ return swap;
+}
+
+static unsigned int __read_4(void *ptr)
+{
+ unsigned int data = *(unsigned int *)ptr;
+
+ return data;
+}
+
+static unsigned int __read_4_sw(void *ptr)
+{
+ unsigned int data = *(unsigned int *)ptr;
+ unsigned int swap;
+
+ swap = ((data & 0xffULL) << 24) |
+ ((data & (0xffULL << 8)) << 8) |
+ ((data & (0xffULL << 16)) >> 8) |
+ ((data & (0xffULL << 24)) >> 24);
+
+ return swap;
+}
+
+static unsigned long long read_8(struct kbuffer *kbuf, void *ptr)
+{
+ return kbuf->read_8(ptr);
+}
+
+static unsigned int read_4(struct kbuffer *kbuf, void *ptr)
+{
+ return kbuf->read_4(ptr);
+}
+
+static unsigned long long __read_long_8(struct kbuffer *kbuf, void *ptr)
+{
+ return kbuf->read_8(ptr);
+}
+
+static unsigned long long __read_long_4(struct kbuffer *kbuf, void *ptr)
+{
+ return kbuf->read_4(ptr);
+}
+
+static unsigned long long read_long(struct kbuffer *kbuf, void *ptr)
+{
+ return kbuf->read_long(kbuf, ptr);
+}
+
+static int calc_index(struct kbuffer *kbuf, void *ptr)
+{
+ return (unsigned long)ptr - (unsigned long)kbuf->data;
+}
+
+static int __next_event(struct kbuffer *kbuf);
+
+/**
+ * kbuffer_alloc - allocat a new kbuffer
+ * @size; enum to denote size of word
+ * @endian: enum to denote endianness
+ *
+ * Allocates and returns a new kbuffer.
+ */
+struct kbuffer *
+kbuffer_alloc(enum kbuffer_long_size size, enum kbuffer_endian endian)
+{
+ struct kbuffer *kbuf;
+ int flags = 0;
+
+ switch (size) {
+ case KBUFFER_LSIZE_4:
+ break;
+ case KBUFFER_LSIZE_8:
+ flags |= KBUFFER_FL_LONG_8;
+ break;
+ default:
+ return NULL;
+ }
+
+ switch (endian) {
+ case KBUFFER_ENDIAN_LITTLE:
+ break;
+ case KBUFFER_ENDIAN_BIG:
+ flags |= KBUFFER_FL_BIG_ENDIAN;
+ break;
+ default:
+ return NULL;
+ }
+
+ kbuf = zmalloc(sizeof(*kbuf));
+ if (!kbuf)
+ return NULL;
+
+ kbuf->flags = flags;
+
+ if (host_is_bigendian())
+ kbuf->flags |= KBUFFER_FL_HOST_BIG_ENDIAN;
+
+ if (do_swap(kbuf)) {
+ kbuf->read_8 = __read_8_sw;
+ kbuf->read_4 = __read_4_sw;
+ } else {
+ kbuf->read_8 = __read_8;
+ kbuf->read_4 = __read_4;
+ }
+
+ if (kbuf->flags & KBUFFER_FL_LONG_8)
+ kbuf->read_long = __read_long_8;
+ else
+ kbuf->read_long = __read_long_4;
+
+ /* May be changed by kbuffer_set_old_format() */
+ kbuf->next_event = __next_event;
+
+ return kbuf;
+}
+
+/** kbuffer_free - free an allocated kbuffer
+ * @kbuf: The kbuffer to free
+ *
+ * Can take NULL as a parameter.
+ */
+void kbuffer_free(struct kbuffer *kbuf)
+{
+ free(kbuf);
+}
+
+static unsigned int type4host(struct kbuffer *kbuf,
+ unsigned int type_len_ts)
+{
+ if (kbuf->flags & KBUFFER_FL_BIG_ENDIAN)
+ return (type_len_ts >> 29) & 3;
+ else
+ return type_len_ts & 3;
+}
+
+static unsigned int len4host(struct kbuffer *kbuf,
+ unsigned int type_len_ts)
+{
+ if (kbuf->flags & KBUFFER_FL_BIG_ENDIAN)
+ return (type_len_ts >> 27) & 7;
+ else
+ return (type_len_ts >> 2) & 7;
+}
+
+static unsigned int type_len4host(struct kbuffer *kbuf,
+ unsigned int type_len_ts)
+{
+ if (kbuf->flags & KBUFFER_FL_BIG_ENDIAN)
+ return (type_len_ts >> 27) & ((1 << 5) - 1);
+ else
+ return type_len_ts & ((1 << 5) - 1);
+}
+
+static unsigned int ts4host(struct kbuffer *kbuf,
+ unsigned int type_len_ts)
+{
+ if (kbuf->flags & KBUFFER_FL_BIG_ENDIAN)
+ return type_len_ts & ((1 << 27) - 1);
+ else
+ return type_len_ts >> 5;
+}
+
+/*
+ * Linux 2.6.30 and earlier (not much ealier) had a different
+ * ring buffer format. It should be obsolete, but we handle it anyway.
+ */
+enum old_ring_buffer_type {
+ OLD_RINGBUF_TYPE_PADDING,
+ OLD_RINGBUF_TYPE_TIME_EXTEND,
+ OLD_RINGBUF_TYPE_TIME_STAMP,
+ OLD_RINGBUF_TYPE_DATA,
+};
+
+static unsigned int old_update_pointers(struct kbuffer *kbuf)
+{
+ unsigned long long extend;
+ unsigned int type_len_ts;
+ unsigned int type;
+ unsigned int len;
+ unsigned int delta;
+ unsigned int length;
+ void *ptr = kbuf->data + kbuf->curr;
+
+ type_len_ts = read_4(kbuf, ptr);
+ ptr += 4;
+
+ type = type4host(kbuf, type_len_ts);
+ len = len4host(kbuf, type_len_ts);
+ delta = ts4host(kbuf, type_len_ts);
+
+ switch (type) {
+ case OLD_RINGBUF_TYPE_PADDING:
+ kbuf->next = kbuf->size;
+ return 0;
+
+ case OLD_RINGBUF_TYPE_TIME_EXTEND:
+ extend = read_4(kbuf, ptr);
+ extend <<= TS_SHIFT;
+ extend += delta;
+ delta = extend;
+ ptr += 4;
+ break;
+
+ case OLD_RINGBUF_TYPE_TIME_STAMP:
+ /* should never happen! */
+ kbuf->curr = kbuf->size;
+ kbuf->next = kbuf->size;
+ kbuf->index = kbuf->size;
+ return -1;
+ default:
+ if (len)
+ length = len * 4;
+ else {
+ length = read_4(kbuf, ptr);
+ length -= 4;
+ ptr += 4;
+ }
+ break;
+ }
+
+ kbuf->timestamp += delta;
+ kbuf->index = calc_index(kbuf, ptr);
+ kbuf->next = kbuf->index + length;
+
+ return type;
+}
+
+static int __old_next_event(struct kbuffer *kbuf)
+{
+ int type;
+
+ do {
+ kbuf->curr = kbuf->next;
+ if (kbuf->next >= kbuf->size)
+ return -1;
+ type = old_update_pointers(kbuf);
+ } while (type == OLD_RINGBUF_TYPE_TIME_EXTEND || type == OLD_RINGBUF_TYPE_PADDING);
+
+ return 0;
+}
+
+static unsigned int
+translate_data(struct kbuffer *kbuf, void *data, void **rptr,
+ unsigned long long *delta, int *length)
+{
+ unsigned long long extend;
+ unsigned int type_len_ts;
+ unsigned int type_len;
+
+ type_len_ts = read_4(kbuf, data);
+ data += 4;
+
+ type_len = type_len4host(kbuf, type_len_ts);
+ *delta = ts4host(kbuf, type_len_ts);
+
+ switch (type_len) {
+ case KBUFFER_TYPE_PADDING:
+ *length = read_4(kbuf, data);
+ data += *length;
+ break;
+
+ case KBUFFER_TYPE_TIME_EXTEND:
+ extend = read_4(kbuf, data);
+ data += 4;
+ extend <<= TS_SHIFT;
+ extend += *delta;
+ *delta = extend;
+ *length = 0;
+ break;
+
+ case KBUFFER_TYPE_TIME_STAMP:
+ data += 12;
+ *length = 0;
+ break;
+ case 0:
+ *length = read_4(kbuf, data) - 4;
+ *length = (*length + 3) & ~3;
+ data += 4;
+ break;
+ default:
+ *length = type_len * 4;
+ break;
+ }
+
+ *rptr = data;
+
+ return type_len;
+}
+
+static unsigned int update_pointers(struct kbuffer *kbuf)
+{
+ unsigned long long delta;
+ unsigned int type_len;
+ int length;
+ void *ptr = kbuf->data + kbuf->curr;
+
+ type_len = translate_data(kbuf, ptr, &ptr, &delta, &length);
+
+ kbuf->timestamp += delta;
+ kbuf->index = calc_index(kbuf, ptr);
+ kbuf->next = kbuf->index + length;
+
+ return type_len;
+}
+
+/**
+ * kbuffer_translate_data - read raw data to get a record
+ * @swap: Set to 1 if bytes in words need to be swapped when read
+ * @data: The raw data to read
+ * @size: Address to store the size of the event data.
+ *
+ * Returns a pointer to the event data. To determine the entire
+ * record size (record metadata + data) just add the difference between
+ * @data and the returned value to @size.
+ */
+void *kbuffer_translate_data(int swap, void *data, unsigned int *size)
+{
+ unsigned long long delta;
+ struct kbuffer kbuf;
+ int type_len;
+ int length;
+ void *ptr;
+
+ if (swap) {
+ kbuf.read_8 = __read_8_sw;
+ kbuf.read_4 = __read_4_sw;
+ kbuf.flags = host_is_bigendian() ? 0 : KBUFFER_FL_BIG_ENDIAN;
+ } else {
+ kbuf.read_8 = __read_8;
+ kbuf.read_4 = __read_4;
+ kbuf.flags = host_is_bigendian() ? KBUFFER_FL_BIG_ENDIAN: 0;
+ }
+
+ type_len = translate_data(&kbuf, data, &ptr, &delta, &length);
+ switch (type_len) {
+ case KBUFFER_TYPE_PADDING:
+ case KBUFFER_TYPE_TIME_EXTEND:
+ case KBUFFER_TYPE_TIME_STAMP:
+ return NULL;
+ };
+
+ *size = length;
+
+ return ptr;
+}
+
+static int __next_event(struct kbuffer *kbuf)
+{
+ int type;
+
+ do {
+ kbuf->curr = kbuf->next;
+ if (kbuf->next >= kbuf->size)
+ return -1;
+ type = update_pointers(kbuf);
+ } while (type == KBUFFER_TYPE_TIME_EXTEND || type == KBUFFER_TYPE_PADDING);
+
+ return 0;
+}
+
+static int next_event(struct kbuffer *kbuf)
+{
+ return kbuf->next_event(kbuf);
+}
+
+/**
+ * kbuffer_next_event - increment the current pointer
+ * @kbuf: The kbuffer to read
+ * @ts: Address to store the next record's timestamp (may be NULL to ignore)
+ *
+ * Increments the pointers into the subbuffer of the kbuffer to point to the
+ * next event so that the next kbuffer_read_event() will return a
+ * new event.
+ *
+ * Returns the data of the next event if a new event exists on the subbuffer,
+ * NULL otherwise.
+ */
+void *kbuffer_next_event(struct kbuffer *kbuf, unsigned long long *ts)
+{
+ int ret;
+
+ if (!kbuf || !kbuf->subbuffer)
+ return NULL;
+
+ ret = next_event(kbuf);
+ if (ret < 0)
+ return NULL;
+
+ if (ts)
+ *ts = kbuf->timestamp;
+
+ return kbuf->data + kbuf->index;
+}
+
+/**
+ * kbuffer_load_subbuffer - load a new subbuffer into the kbuffer
+ * @kbuf: The kbuffer to load
+ * @subbuffer: The subbuffer to load into @kbuf.
+ *
+ * Load a new subbuffer (page) into @kbuf. This will reset all
+ * the pointers and update the @kbuf timestamp. The next read will
+ * return the first event on @subbuffer.
+ *
+ * Returns 0 on succes, -1 otherwise.
+ */
+int kbuffer_load_subbuffer(struct kbuffer *kbuf, void *subbuffer)
+{
+ unsigned long long flags;
+ void *ptr = subbuffer;
+
+ if (!kbuf || !subbuffer)
+ return -1;
+
+ kbuf->subbuffer = subbuffer;
+
+ kbuf->timestamp = read_8(kbuf, ptr);
+ ptr += 8;
+
+ kbuf->curr = 0;
+
+ if (kbuf->flags & KBUFFER_FL_LONG_8)
+ kbuf->start = 16;
+ else
+ kbuf->start = 12;
+
+ kbuf->data = subbuffer + kbuf->start;
+
+ flags = read_long(kbuf, ptr);
+ kbuf->size = (unsigned int)flags & COMMIT_MASK;
+
+ if (flags & MISSING_EVENTS) {
+ if (flags & MISSING_STORED) {
+ ptr = kbuf->data + kbuf->size;
+ kbuf->lost_events = read_long(kbuf, ptr);
+ } else
+ kbuf->lost_events = -1;
+ } else
+ kbuf->lost_events = 0;
+
+ kbuf->index = 0;
+ kbuf->next = 0;
+
+ next_event(kbuf);
+
+ return 0;
+}
+
+/**
+ * kbuffer_read_event - read the next event in the kbuffer subbuffer
+ * @kbuf: The kbuffer to read from
+ * @ts: The address to store the timestamp of the event (may be NULL to ignore)
+ *
+ * Returns a pointer to the data part of the current event.
+ * NULL if no event is left on the subbuffer.
+ */
+void *kbuffer_read_event(struct kbuffer *kbuf, unsigned long long *ts)
+{
+ if (!kbuf || !kbuf->subbuffer)
+ return NULL;
+
+ if (kbuf->curr >= kbuf->size)
+ return NULL;
+
+ if (ts)
+ *ts = kbuf->timestamp;
+ return kbuf->data + kbuf->index;
+}
+
+/**
+ * kbuffer_timestamp - Return the timestamp of the current event
+ * @kbuf: The kbuffer to read from
+ *
+ * Returns the timestamp of the current (next) event.
+ */
+unsigned long long kbuffer_timestamp(struct kbuffer *kbuf)
+{
+ return kbuf->timestamp;
+}
+
+/**
+ * kbuffer_read_at_offset - read the event that is at offset
+ * @kbuf: The kbuffer to read from
+ * @offset: The offset into the subbuffer
+ * @ts: The address to store the timestamp of the event (may be NULL to ignore)
+ *
+ * The @offset must be an index from the @kbuf subbuffer beginning.
+ * If @offset is bigger than the stored subbuffer, NULL will be returned.
+ *
+ * Returns the data of the record that is at @offset. Note, @offset does
+ * not need to be the start of the record, the offset just needs to be
+ * in the record (or beginning of it).
+ *
+ * Note, the kbuf timestamp and pointers are updated to the
+ * returned record. That is, kbuffer_read_event() will return the same
+ * data and timestamp, and kbuffer_next_event() will increment from
+ * this record.
+ */
+void *kbuffer_read_at_offset(struct kbuffer *kbuf, int offset,
+ unsigned long long *ts)
+{
+ void *data;
+
+ if (offset < kbuf->start)
+ offset = 0;
+ else
+ offset -= kbuf->start;
+
+ /* Reset the buffer */
+ kbuffer_load_subbuffer(kbuf, kbuf->subbuffer);
+
+ while (kbuf->curr < offset) {
+ data = kbuffer_next_event(kbuf, ts);
+ if (!data)
+ break;
+ }
+
+ return data;
+}
+
+/**
+ * kbuffer_subbuffer_size - the size of the loaded subbuffer
+ * @kbuf: The kbuffer to read from
+ *
+ * Returns the size of the subbuffer. Note, this size is
+ * where the last event resides. The stored subbuffer may actually be
+ * bigger due to padding and such.
+ */
+int kbuffer_subbuffer_size(struct kbuffer *kbuf)
+{
+ return kbuf->size;
+}
+
+/**
+ * kbuffer_curr_index - Return the index of the record
+ * @kbuf: The kbuffer to read from
+ *
+ * Returns the index from the start of the data part of
+ * the subbuffer to the current location. Note this is not
+ * from the start of the subbuffer. An index of zero will
+ * point to the first record. Use kbuffer_curr_offset() for
+ * the actually offset (that can be used by kbuffer_read_at_offset())
+ */
+int kbuffer_curr_index(struct kbuffer *kbuf)
+{
+ return kbuf->curr;
+}
+
+/**
+ * kbuffer_curr_offset - Return the offset of the record
+ * @kbuf: The kbuffer to read from
+ *
+ * Returns the offset from the start of the subbuffer to the
+ * current location.
+ */
+int kbuffer_curr_offset(struct kbuffer *kbuf)
+{
+ return kbuf->curr + kbuf->start;
+}
+
+/**
+ * kbuffer_event_size - return the size of the event data
+ * @kbuf: The kbuffer to read
+ *
+ * Returns the size of the event data (the payload not counting
+ * the meta data of the record) of the current event.
+ */
+int kbuffer_event_size(struct kbuffer *kbuf)
+{
+ return kbuf->next - kbuf->index;
+}
+
+/**
+ * kbuffer_curr_size - return the size of the entire record
+ * @kbuf: The kbuffer to read
+ *
+ * Returns the size of the entire record (meta data and payload)
+ * of the current event.
+ */
+int kbuffer_curr_size(struct kbuffer *kbuf)
+{
+ return kbuf->next - kbuf->curr;
+}
+
+/**
+ * kbuffer_missed_events - return the # of missed events from last event.
+ * @kbuf: The kbuffer to read from
+ *
+ * Returns the # of missed events (if recorded) before the current
+ * event. Note, only events on the beginning of a subbuffer can
+ * have missed events, all other events within the buffer will be
+ * zero.
+ */
+int kbuffer_missed_events(struct kbuffer *kbuf)
+{
+ /* Only the first event can have missed events */
+ if (kbuf->curr)
+ return 0;
+
+ return kbuf->lost_events;
+}
+
+/**
+ * kbuffer_set_old_forma - set the kbuffer to use the old format parsing
+ * @kbuf: The kbuffer to set
+ *
+ * This is obsolete (or should be). The first kernels to use the
+ * new ring buffer had a slightly different ring buffer format
+ * (2.6.30 and earlier). It is still somewhat supported by kbuffer,
+ * but should not be counted on in the future.
+ */
+void kbuffer_set_old_format(struct kbuffer *kbuf)
+{
+ kbuf->flags |= KBUFFER_FL_OLD_FORMAT;
+
+ kbuf->next_event = __old_next_event;
+}
diff --git a/tools/lib/traceevent/kbuffer.h b/tools/lib/traceevent/kbuffer.h
new file mode 100644
index 00000000000..c831f64b17a
--- /dev/null
+++ b/tools/lib/traceevent/kbuffer.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (C) 2012 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License (not later!)
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+#ifndef _KBUFFER_H
+#define _KBUFFER_H
+
+#ifndef TS_SHIFT
+#define TS_SHIFT 27
+#endif
+
+enum kbuffer_endian {
+ KBUFFER_ENDIAN_BIG,
+ KBUFFER_ENDIAN_LITTLE,
+};
+
+enum kbuffer_long_size {
+ KBUFFER_LSIZE_4,
+ KBUFFER_LSIZE_8,
+};
+
+enum {
+ KBUFFER_TYPE_PADDING = 29,
+ KBUFFER_TYPE_TIME_EXTEND = 30,
+ KBUFFER_TYPE_TIME_STAMP = 31,
+};
+
+struct kbuffer;
+
+struct kbuffer *kbuffer_alloc(enum kbuffer_long_size size, enum kbuffer_endian endian);
+void kbuffer_free(struct kbuffer *kbuf);
+int kbuffer_load_subbuffer(struct kbuffer *kbuf, void *subbuffer);
+void *kbuffer_read_event(struct kbuffer *kbuf, unsigned long long *ts);
+void *kbuffer_next_event(struct kbuffer *kbuf, unsigned long long *ts);
+unsigned long long kbuffer_timestamp(struct kbuffer *kbuf);
+
+void *kbuffer_translate_data(int swap, void *data, unsigned int *size);
+
+void *kbuffer_read_at_offset(struct kbuffer *kbuf, int offset, unsigned long long *ts);
+
+int kbuffer_curr_index(struct kbuffer *kbuf);
+
+int kbuffer_curr_offset(struct kbuffer *kbuf);
+int kbuffer_curr_size(struct kbuffer *kbuf);
+int kbuffer_event_size(struct kbuffer *kbuf);
+int kbuffer_missed_events(struct kbuffer *kbuf);
+int kbuffer_subbuffer_size(struct kbuffer *kbuf);
+
+void kbuffer_set_old_format(struct kbuffer *kbuf);
+
+#endif /* _K_BUFFER_H */
diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c
new file mode 100644
index 00000000000..b50234402fc
--- /dev/null
+++ b/tools/lib/traceevent/parse-filter.c
@@ -0,0 +1,2432 @@
+/*
+ * Copyright (C) 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License (not later!)
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <sys/types.h>
+
+#include "event-parse.h"
+#include "event-utils.h"
+
+#define COMM "COMM"
+
+static struct format_field comm = {
+ .name = "COMM",
+};
+
+struct event_list {
+ struct event_list *next;
+ struct event_format *event;
+};
+
+static void show_error(char *error_buf, const char *fmt, ...)
+{
+ unsigned long long index;
+ const char *input;
+ va_list ap;
+ int len;
+ int i;
+
+ input = pevent_get_input_buf();
+ index = pevent_get_input_buf_ptr();
+ len = input ? strlen(input) : 0;
+
+ if (len) {
+ strcpy(error_buf, input);
+ error_buf[len] = '\n';
+ for (i = 1; i < len && i < index; i++)
+ error_buf[len+i] = ' ';
+ error_buf[len + i] = '^';
+ error_buf[len + i + 1] = '\n';
+ len += i+2;
+ }
+
+ va_start(ap, fmt);
+ vsnprintf(error_buf + len, PEVENT_FILTER_ERROR_BUFSZ - len, fmt, ap);
+ va_end(ap);
+}
+
+static void free_token(char *token)
+{
+ pevent_free_token(token);
+}
+
+static enum event_type read_token(char **tok)
+{
+ enum event_type type;
+ char *token = NULL;
+
+ do {
+ free_token(token);
+ type = pevent_read_token(&token);
+ } while (type == EVENT_NEWLINE || type == EVENT_SPACE);
+
+ /* If token is = or ! check to see if the next char is ~ */
+ if (token &&
+ (strcmp(token, "=") == 0 || strcmp(token, "!") == 0) &&
+ pevent_peek_char() == '~') {
+ /* append it */
+ *tok = malloc(3);
+ if (*tok == NULL) {
+ free_token(token);
+ return EVENT_ERROR;
+ }
+ sprintf(*tok, "%c%c", *token, '~');
+ free_token(token);
+ /* Now remove the '~' from the buffer */
+ pevent_read_token(&token);
+ free_token(token);
+ } else
+ *tok = token;
+
+ return type;
+}
+
+static int filter_cmp(const void *a, const void *b)
+{
+ const struct filter_type *ea = a;
+ const struct filter_type *eb = b;
+
+ if (ea->event_id < eb->event_id)
+ return -1;
+
+ if (ea->event_id > eb->event_id)
+ return 1;
+
+ return 0;
+}
+
+static struct filter_type *
+find_filter_type(struct event_filter *filter, int id)
+{
+ struct filter_type *filter_type;
+ struct filter_type key;
+
+ key.event_id = id;
+
+ filter_type = bsearch(&key, filter->event_filters,
+ filter->filters,
+ sizeof(*filter->event_filters),
+ filter_cmp);
+
+ return filter_type;
+}
+
+static struct filter_type *
+add_filter_type(struct event_filter *filter, int id)
+{
+ struct filter_type *filter_type;
+ int i;
+
+ filter_type = find_filter_type(filter, id);
+ if (filter_type)
+ return filter_type;
+
+ filter_type = realloc(filter->event_filters,
+ sizeof(*filter->event_filters) *
+ (filter->filters + 1));
+ if (!filter_type)
+ return NULL;
+
+ filter->event_filters = filter_type;
+
+ for (i = 0; i < filter->filters; i++) {
+ if (filter->event_filters[i].event_id > id)
+ break;
+ }
+
+ if (i < filter->filters)
+ memmove(&filter->event_filters[i+1],
+ &filter->event_filters[i],
+ sizeof(*filter->event_filters) *
+ (filter->filters - i));
+
+ filter_type = &filter->event_filters[i];
+ filter_type->event_id = id;
+ filter_type->event = pevent_find_event(filter->pevent, id);
+ filter_type->filter = NULL;
+
+ filter->filters++;
+
+ return filter_type;
+}
+
+/**
+ * pevent_filter_alloc - create a new event filter
+ * @pevent: The pevent that this filter is associated with
+ */
+struct event_filter *pevent_filter_alloc(struct pevent *pevent)
+{
+ struct event_filter *filter;
+
+ filter = malloc(sizeof(*filter));
+ if (filter == NULL)
+ return NULL;
+
+ memset(filter, 0, sizeof(*filter));
+ filter->pevent = pevent;
+ pevent_ref(pevent);
+
+ return filter;
+}
+
+static struct filter_arg *allocate_arg(void)
+{
+ return calloc(1, sizeof(struct filter_arg));
+}
+
+static void free_arg(struct filter_arg *arg)
+{
+ if (!arg)
+ return;
+
+ switch (arg->type) {
+ case FILTER_ARG_NONE:
+ case FILTER_ARG_BOOLEAN:
+ break;
+
+ case FILTER_ARG_NUM:
+ free_arg(arg->num.left);
+ free_arg(arg->num.right);
+ break;
+
+ case FILTER_ARG_EXP:
+ free_arg(arg->exp.left);
+ free_arg(arg->exp.right);
+ break;
+
+ case FILTER_ARG_STR:
+ free(arg->str.val);
+ regfree(&arg->str.reg);
+ free(arg->str.buffer);
+ break;
+
+ case FILTER_ARG_VALUE:
+ if (arg->value.type == FILTER_STRING ||
+ arg->value.type == FILTER_CHAR)
+ free(arg->value.str);
+ break;
+
+ case FILTER_ARG_OP:
+ free_arg(arg->op.left);
+ free_arg(arg->op.right);
+ default:
+ break;
+ }
+
+ free(arg);
+}
+
+static int add_event(struct event_list **events,
+ struct event_format *event)
+{
+ struct event_list *list;
+
+ list = malloc(sizeof(*list));
+ if (list == NULL)
+ return -1;
+
+ list->next = *events;
+ *events = list;
+ list->event = event;
+ return 0;
+}
+
+static int event_match(struct event_format *event,
+ regex_t *sreg, regex_t *ereg)
+{
+ if (sreg) {
+ return !regexec(sreg, event->system, 0, NULL, 0) &&
+ !regexec(ereg, event->name, 0, NULL, 0);
+ }
+
+ return !regexec(ereg, event->system, 0, NULL, 0) ||
+ !regexec(ereg, event->name, 0, NULL, 0);
+}
+
+static enum pevent_errno
+find_event(struct pevent *pevent, struct event_list **events,
+ char *sys_name, char *event_name)
+{
+ struct event_format *event;
+ regex_t ereg;
+ regex_t sreg;
+ int match = 0;
+ int fail = 0;
+ char *reg;
+ int ret;
+ int i;
+
+ if (!event_name) {
+ /* if no name is given, then swap sys and name */
+ event_name = sys_name;
+ sys_name = NULL;
+ }
+
+ reg = malloc(strlen(event_name) + 3);
+ if (reg == NULL)
+ return PEVENT_ERRNO__MEM_ALLOC_FAILED;
+
+ sprintf(reg, "^%s$", event_name);
+
+ ret = regcomp(&ereg, reg, REG_ICASE|REG_NOSUB);
+ free(reg);
+
+ if (ret)
+ return PEVENT_ERRNO__INVALID_EVENT_NAME;
+
+ if (sys_name) {
+ reg = malloc(strlen(sys_name) + 3);
+ if (reg == NULL) {
+ regfree(&ereg);
+ return PEVENT_ERRNO__MEM_ALLOC_FAILED;
+ }
+
+ sprintf(reg, "^%s$", sys_name);
+ ret = regcomp(&sreg, reg, REG_ICASE|REG_NOSUB);
+ free(reg);
+ if (ret) {
+ regfree(&ereg);
+ return PEVENT_ERRNO__INVALID_EVENT_NAME;
+ }
+ }
+
+ for (i = 0; i < pevent->nr_events; i++) {
+ event = pevent->events[i];
+ if (event_match(event, sys_name ? &sreg : NULL, &ereg)) {
+ match = 1;
+ if (add_event(events, event) < 0) {
+ fail = 1;
+ break;
+ }
+ }
+ }
+
+ regfree(&ereg);
+ if (sys_name)
+ regfree(&sreg);
+
+ if (!match)
+ return PEVENT_ERRNO__EVENT_NOT_FOUND;
+ if (fail)
+ return PEVENT_ERRNO__MEM_ALLOC_FAILED;
+
+ return 0;
+}
+
+static void free_events(struct event_list *events)
+{
+ struct event_list *event;
+
+ while (events) {
+ event = events;
+ events = events->next;
+ free(event);
+ }
+}
+
+static enum pevent_errno
+create_arg_item(struct event_format *event, const char *token,
+ enum event_type type, struct filter_arg **parg, char *error_str)
+{
+ struct format_field *field;
+ struct filter_arg *arg;
+
+ arg = allocate_arg();
+ if (arg == NULL) {
+ show_error(error_str, "failed to allocate filter arg");
+ return PEVENT_ERRNO__MEM_ALLOC_FAILED;
+ }
+
+ switch (type) {
+
+ case EVENT_SQUOTE:
+ case EVENT_DQUOTE:
+ arg->type = FILTER_ARG_VALUE;
+ arg->value.type =
+ type == EVENT_DQUOTE ? FILTER_STRING : FILTER_CHAR;
+ arg->value.str = strdup(token);
+ if (!arg->value.str) {
+ free_arg(arg);
+ show_error(error_str, "failed to allocate string filter arg");
+ return PEVENT_ERRNO__MEM_ALLOC_FAILED;
+ }
+ break;
+ case EVENT_ITEM:
+ /* if it is a number, then convert it */
+ if (isdigit(token[0])) {
+ arg->type = FILTER_ARG_VALUE;
+ arg->value.type = FILTER_NUMBER;
+ arg->value.val = strtoull(token, NULL, 0);
+ break;
+ }
+ /* Consider this a field */
+ field = pevent_find_any_field(event, token);
+ if (!field) {
+ if (strcmp(token, COMM) != 0) {
+ /* not a field, Make it false */
+ arg->type = FILTER_ARG_BOOLEAN;
+ arg->boolean.value = FILTER_FALSE;
+ break;
+ }
+ /* If token is 'COMM' then it is special */
+ field = &comm;
+ }
+ arg->type = FILTER_ARG_FIELD;
+ arg->field.field = field;
+ break;
+ default:
+ free_arg(arg);
+ show_error(error_str, "expected a value but found %s", token);
+ return PEVENT_ERRNO__UNEXPECTED_TYPE;
+ }
+ *parg = arg;
+ return 0;
+}
+
+static struct filter_arg *
+create_arg_op(enum filter_op_type btype)
+{
+ struct filter_arg *arg;
+
+ arg = allocate_arg();
+ if (!arg)
+ return NULL;
+
+ arg->type = FILTER_ARG_OP;
+ arg->op.type = btype;
+
+ return arg;
+}
+
+static struct filter_arg *
+create_arg_exp(enum filter_exp_type etype)
+{
+ struct filter_arg *arg;
+
+ arg = allocate_arg();
+ if (!arg)
+ return NULL;
+
+ arg->type = FILTER_ARG_EXP;
+ arg->op.type = etype;
+
+ return arg;
+}
+
+static struct filter_arg *
+create_arg_cmp(enum filter_exp_type etype)
+{
+ struct filter_arg *arg;
+
+ arg = allocate_arg();
+ if (!arg)
+ return NULL;
+
+ /* Use NUM and change if necessary */
+ arg->type = FILTER_ARG_NUM;
+ arg->op.type = etype;
+
+ return arg;
+}
+
+static enum pevent_errno
+add_right(struct filter_arg *op, struct filter_arg *arg, char *error_str)
+{
+ struct filter_arg *left;
+ char *str;
+ int op_type;
+ int ret;
+
+ switch (op->type) {
+ case FILTER_ARG_EXP:
+ if (op->exp.right)
+ goto out_fail;
+ op->exp.right = arg;
+ break;
+
+ case FILTER_ARG_OP:
+ if (op->op.right)
+ goto out_fail;
+ op->op.right = arg;
+ break;
+
+ case FILTER_ARG_NUM:
+ if (op->op.right)
+ goto out_fail;
+ /*
+ * The arg must be num, str, or field
+ */
+ switch (arg->type) {
+ case FILTER_ARG_VALUE:
+ case FILTER_ARG_FIELD:
+ break;
+ default:
+ show_error(error_str, "Illegal rvalue");
+ return PEVENT_ERRNO__ILLEGAL_RVALUE;
+ }
+
+ /*
+ * Depending on the type, we may need to
+ * convert this to a string or regex.
+ */
+ switch (arg->value.type) {
+ case FILTER_CHAR:
+ /*
+ * A char should be converted to number if
+ * the string is 1 byte, and the compare
+ * is not a REGEX.
+ */
+ if (strlen(arg->value.str) == 1 &&
+ op->num.type != FILTER_CMP_REGEX &&
+ op->num.type != FILTER_CMP_NOT_REGEX) {
+ arg->value.type = FILTER_NUMBER;
+ goto do_int;
+ }
+ /* fall through */
+ case FILTER_STRING:
+
+ /* convert op to a string arg */
+ op_type = op->num.type;
+ left = op->num.left;
+ str = arg->value.str;
+
+ /* reset the op for the new field */
+ memset(op, 0, sizeof(*op));
+
+ /*
+ * If left arg was a field not found then
+ * NULL the entire op.
+ */
+ if (left->type == FILTER_ARG_BOOLEAN) {
+ free_arg(left);
+ free_arg(arg);
+ op->type = FILTER_ARG_BOOLEAN;
+ op->boolean.value = FILTER_FALSE;
+ break;
+ }
+
+ /* Left arg must be a field */
+ if (left->type != FILTER_ARG_FIELD) {
+ show_error(error_str,
+ "Illegal lvalue for string comparison");
+ return PEVENT_ERRNO__ILLEGAL_LVALUE;
+ }
+
+ /* Make sure this is a valid string compare */
+ switch (op_type) {
+ case FILTER_CMP_EQ:
+ op_type = FILTER_CMP_MATCH;
+ break;
+ case FILTER_CMP_NE:
+ op_type = FILTER_CMP_NOT_MATCH;
+ break;
+
+ case FILTER_CMP_REGEX:
+ case FILTER_CMP_NOT_REGEX:
+ ret = regcomp(&op->str.reg, str, REG_ICASE|REG_NOSUB);
+ if (ret) {
+ show_error(error_str,
+ "RegEx '%s' did not compute",
+ str);
+ return PEVENT_ERRNO__INVALID_REGEX;
+ }
+ break;
+ default:
+ show_error(error_str,
+ "Illegal comparison for string");
+ return PEVENT_ERRNO__ILLEGAL_STRING_CMP;
+ }
+
+ op->type = FILTER_ARG_STR;
+ op->str.type = op_type;
+ op->str.field = left->field.field;
+ op->str.val = strdup(str);
+ if (!op->str.val) {
+ show_error(error_str, "Failed to allocate string filter");
+ return PEVENT_ERRNO__MEM_ALLOC_FAILED;
+ }
+ /*
+ * Need a buffer to copy data for tests
+ */
+ op->str.buffer = malloc(op->str.field->size + 1);
+ if (!op->str.buffer) {
+ show_error(error_str, "Failed to allocate string filter");
+ return PEVENT_ERRNO__MEM_ALLOC_FAILED;
+ }
+ /* Null terminate this buffer */
+ op->str.buffer[op->str.field->size] = 0;
+
+ /* We no longer have left or right args */
+ free_arg(arg);
+ free_arg(left);
+
+ break;
+
+ case FILTER_NUMBER:
+
+ do_int:
+ switch (op->num.type) {
+ case FILTER_CMP_REGEX:
+ case FILTER_CMP_NOT_REGEX:
+ show_error(error_str,
+ "Op not allowed with integers");
+ return PEVENT_ERRNO__ILLEGAL_INTEGER_CMP;
+
+ default:
+ break;
+ }
+
+ /* numeric compare */
+ op->num.right = arg;
+ break;
+ default:
+ goto out_fail;
+ }
+ break;
+ default:
+ goto out_fail;
+ }
+
+ return 0;
+
+ out_fail:
+ show_error(error_str, "Syntax error");
+ return PEVENT_ERRNO__SYNTAX_ERROR;
+}
+
+static struct filter_arg *
+rotate_op_right(struct filter_arg *a, struct filter_arg *b)
+{
+ struct filter_arg *arg;
+
+ arg = a->op.right;
+ a->op.right = b;
+ return arg;
+}
+
+static enum pevent_errno add_left(struct filter_arg *op, struct filter_arg *arg)
+{
+ switch (op->type) {
+ case FILTER_ARG_EXP:
+ if (arg->type == FILTER_ARG_OP)
+ arg = rotate_op_right(arg, op);
+ op->exp.left = arg;
+ break;
+
+ case FILTER_ARG_OP:
+ op->op.left = arg;
+ break;
+ case FILTER_ARG_NUM:
+ if (arg->type == FILTER_ARG_OP)
+ arg = rotate_op_right(arg, op);
+
+ /* left arg of compares must be a field */
+ if (arg->type != FILTER_ARG_FIELD &&
+ arg->type != FILTER_ARG_BOOLEAN)
+ return PEVENT_ERRNO__INVALID_ARG_TYPE;
+ op->num.left = arg;
+ break;
+ default:
+ return PEVENT_ERRNO__INVALID_ARG_TYPE;
+ }
+ return 0;
+}
+
+enum op_type {
+ OP_NONE,
+ OP_BOOL,
+ OP_NOT,
+ OP_EXP,
+ OP_CMP,
+};
+
+static enum op_type process_op(const char *token,
+ enum filter_op_type *btype,
+ enum filter_cmp_type *ctype,
+ enum filter_exp_type *etype)
+{
+ *btype = FILTER_OP_NOT;
+ *etype = FILTER_EXP_NONE;
+ *ctype = FILTER_CMP_NONE;
+
+ if (strcmp(token, "&&") == 0)
+ *btype = FILTER_OP_AND;
+ else if (strcmp(token, "||") == 0)
+ *btype = FILTER_OP_OR;
+ else if (strcmp(token, "!") == 0)
+ return OP_NOT;
+
+ if (*btype != FILTER_OP_NOT)
+ return OP_BOOL;
+
+ /* Check for value expressions */
+ if (strcmp(token, "+") == 0) {
+ *etype = FILTER_EXP_ADD;
+ } else if (strcmp(token, "-") == 0) {
+ *etype = FILTER_EXP_SUB;
+ } else if (strcmp(token, "*") == 0) {
+ *etype = FILTER_EXP_MUL;
+ } else if (strcmp(token, "/") == 0) {
+ *etype = FILTER_EXP_DIV;
+ } else if (strcmp(token, "%") == 0) {
+ *etype = FILTER_EXP_MOD;
+ } else if (strcmp(token, ">>") == 0) {
+ *etype = FILTER_EXP_RSHIFT;
+ } else if (strcmp(token, "<<") == 0) {
+ *etype = FILTER_EXP_LSHIFT;
+ } else if (strcmp(token, "&") == 0) {
+ *etype = FILTER_EXP_AND;
+ } else if (strcmp(token, "|") == 0) {
+ *etype = FILTER_EXP_OR;
+ } else if (strcmp(token, "^") == 0) {
+ *etype = FILTER_EXP_XOR;
+ } else if (strcmp(token, "~") == 0)
+ *etype = FILTER_EXP_NOT;
+
+ if (*etype != FILTER_EXP_NONE)
+ return OP_EXP;
+
+ /* Check for compares */
+ if (strcmp(token, "==") == 0)
+ *ctype = FILTER_CMP_EQ;
+ else if (strcmp(token, "!=") == 0)
+ *ctype = FILTER_CMP_NE;
+ else if (strcmp(token, "<") == 0)
+ *ctype = FILTER_CMP_LT;
+ else if (strcmp(token, ">") == 0)
+ *ctype = FILTER_CMP_GT;
+ else if (strcmp(token, "<=") == 0)
+ *ctype = FILTER_CMP_LE;
+ else if (strcmp(token, ">=") == 0)
+ *ctype = FILTER_CMP_GE;
+ else if (strcmp(token, "=~") == 0)
+ *ctype = FILTER_CMP_REGEX;
+ else if (strcmp(token, "!~") == 0)
+ *ctype = FILTER_CMP_NOT_REGEX;
+ else
+ return OP_NONE;
+
+ return OP_CMP;
+}
+
+static int check_op_done(struct filter_arg *arg)
+{
+ switch (arg->type) {
+ case FILTER_ARG_EXP:
+ return arg->exp.right != NULL;
+
+ case FILTER_ARG_OP:
+ return arg->op.right != NULL;
+
+ case FILTER_ARG_NUM:
+ return arg->num.right != NULL;
+
+ case FILTER_ARG_STR:
+ /* A string conversion is always done */
+ return 1;
+
+ case FILTER_ARG_BOOLEAN:
+ /* field not found, is ok */
+ return 1;
+
+ default:
+ return 0;
+ }
+}
+
+enum filter_vals {
+ FILTER_VAL_NORM,
+ FILTER_VAL_FALSE,
+ FILTER_VAL_TRUE,
+};
+
+static enum pevent_errno
+reparent_op_arg(struct filter_arg *parent, struct filter_arg *old_child,
+ struct filter_arg *arg, char *error_str)
+{
+ struct filter_arg *other_child;
+ struct filter_arg **ptr;
+
+ if (parent->type != FILTER_ARG_OP &&
+ arg->type != FILTER_ARG_OP) {
+ show_error(error_str, "can not reparent other than OP");
+ return PEVENT_ERRNO__REPARENT_NOT_OP;
+ }
+
+ /* Get the sibling */
+ if (old_child->op.right == arg) {
+ ptr = &old_child->op.right;
+ other_child = old_child->op.left;
+ } else if (old_child->op.left == arg) {
+ ptr = &old_child->op.left;
+ other_child = old_child->op.right;
+ } else {
+ show_error(error_str, "Error in reparent op, find other child");
+ return PEVENT_ERRNO__REPARENT_FAILED;
+ }
+
+ /* Detach arg from old_child */
+ *ptr = NULL;
+
+ /* Check for root */
+ if (parent == old_child) {
+ free_arg(other_child);
+ *parent = *arg;
+ /* Free arg without recussion */
+ free(arg);
+ return 0;
+ }
+
+ if (parent->op.right == old_child)
+ ptr = &parent->op.right;
+ else if (parent->op.left == old_child)
+ ptr = &parent->op.left;
+ else {
+ show_error(error_str, "Error in reparent op");
+ return PEVENT_ERRNO__REPARENT_FAILED;
+ }
+
+ *ptr = arg;
+
+ free_arg(old_child);
+ return 0;
+}
+
+/* Returns either filter_vals (success) or pevent_errno (failfure) */
+static int test_arg(struct filter_arg *parent, struct filter_arg *arg,
+ char *error_str)
+{
+ int lval, rval;
+
+ switch (arg->type) {
+
+ /* bad case */
+ case FILTER_ARG_BOOLEAN:
+ return FILTER_VAL_FALSE + arg->boolean.value;
+
+ /* good cases: */
+ case FILTER_ARG_STR:
+ case FILTER_ARG_VALUE:
+ case FILTER_ARG_FIELD:
+ return FILTER_VAL_NORM;
+
+ case FILTER_ARG_EXP:
+ lval = test_arg(arg, arg->exp.left, error_str);
+ if (lval != FILTER_VAL_NORM)
+ return lval;
+ rval = test_arg(arg, arg->exp.right, error_str);
+ if (rval != FILTER_VAL_NORM)
+ return rval;
+ return FILTER_VAL_NORM;
+
+ case FILTER_ARG_NUM:
+ lval = test_arg(arg, arg->num.left, error_str);
+ if (lval != FILTER_VAL_NORM)
+ return lval;
+ rval = test_arg(arg, arg->num.right, error_str);
+ if (rval != FILTER_VAL_NORM)
+ return rval;
+ return FILTER_VAL_NORM;
+
+ case FILTER_ARG_OP:
+ if (arg->op.type != FILTER_OP_NOT) {
+ lval = test_arg(arg, arg->op.left, error_str);
+ switch (lval) {
+ case FILTER_VAL_NORM:
+ break;
+ case FILTER_VAL_TRUE:
+ if (arg->op.type == FILTER_OP_OR)
+ return FILTER_VAL_TRUE;
+ rval = test_arg(arg, arg->op.right, error_str);
+ if (rval != FILTER_VAL_NORM)
+ return rval;
+
+ return reparent_op_arg(parent, arg, arg->op.right,
+ error_str);
+
+ case FILTER_VAL_FALSE:
+ if (arg->op.type == FILTER_OP_AND)
+ return FILTER_VAL_FALSE;
+ rval = test_arg(arg, arg->op.right, error_str);
+ if (rval != FILTER_VAL_NORM)
+ return rval;
+
+ return reparent_op_arg(parent, arg, arg->op.right,
+ error_str);
+
+ default:
+ return lval;
+ }
+ }
+
+ rval = test_arg(arg, arg->op.right, error_str);
+ switch (rval) {
+ case FILTER_VAL_NORM:
+ default:
+ break;
+
+ case FILTER_VAL_TRUE:
+ if (arg->op.type == FILTER_OP_OR)
+ return FILTER_VAL_TRUE;
+ if (arg->op.type == FILTER_OP_NOT)
+ return FILTER_VAL_FALSE;
+
+ return reparent_op_arg(parent, arg, arg->op.left,
+ error_str);
+
+ case FILTER_VAL_FALSE:
+ if (arg->op.type == FILTER_OP_AND)
+ return FILTER_VAL_FALSE;
+ if (arg->op.type == FILTER_OP_NOT)
+ return FILTER_VAL_TRUE;
+
+ return reparent_op_arg(parent, arg, arg->op.left,
+ error_str);
+ }
+
+ return rval;
+ default:
+ show_error(error_str, "bad arg in filter tree");
+ return PEVENT_ERRNO__BAD_FILTER_ARG;
+ }
+ return FILTER_VAL_NORM;
+}
+
+/* Remove any unknown event fields */
+static int collapse_tree(struct filter_arg *arg,
+ struct filter_arg **arg_collapsed, char *error_str)
+{
+ int ret;
+
+ ret = test_arg(arg, arg, error_str);
+ switch (ret) {
+ case FILTER_VAL_NORM:
+ break;
+
+ case FILTER_VAL_TRUE:
+ case FILTER_VAL_FALSE:
+ free_arg(arg);
+ arg = allocate_arg();
+ if (arg) {
+ arg->type = FILTER_ARG_BOOLEAN;
+ arg->boolean.value = ret == FILTER_VAL_TRUE;
+ } else {
+ show_error(error_str, "Failed to allocate filter arg");
+ ret = PEVENT_ERRNO__MEM_ALLOC_FAILED;
+ }
+ break;
+
+ default:
+ /* test_arg() already set the error_str */
+ free_arg(arg);
+ arg = NULL;
+ break;
+ }
+
+ *arg_collapsed = arg;
+ return ret;
+}
+
+static enum pevent_errno
+process_filter(struct event_format *event, struct filter_arg **parg,
+ char *error_str, int not)
+{
+ enum event_type type;
+ char *token = NULL;
+ struct filter_arg *current_op = NULL;
+ struct filter_arg *current_exp = NULL;
+ struct filter_arg *left_item = NULL;
+ struct filter_arg *arg = NULL;
+ enum op_type op_type;
+ enum filter_op_type btype;
+ enum filter_exp_type etype;
+ enum filter_cmp_type ctype;
+ enum pevent_errno ret;
+
+ *parg = NULL;
+
+ do {
+ free(token);
+ type = read_token(&token);
+ switch (type) {
+ case EVENT_SQUOTE:
+ case EVENT_DQUOTE:
+ case EVENT_ITEM:
+ ret = create_arg_item(event, token, type, &arg, error_str);
+ if (ret < 0)
+ goto fail;
+ if (!left_item)
+ left_item = arg;
+ else if (current_exp) {
+ ret = add_right(current_exp, arg, error_str);
+ if (ret < 0)
+ goto fail;
+ left_item = NULL;
+ /* Not's only one one expression */
+ if (not) {
+ arg = NULL;
+ if (current_op)
+ goto fail_syntax;
+ free(token);
+ *parg = current_exp;
+ return 0;
+ }
+ } else
+ goto fail_syntax;
+ arg = NULL;
+ break;
+
+ case EVENT_DELIM:
+ if (*token == ',') {
+ show_error(error_str, "Illegal token ','");
+ ret = PEVENT_ERRNO__ILLEGAL_TOKEN;
+ goto fail;
+ }
+
+ if (*token == '(') {
+ if (left_item) {
+ show_error(error_str,
+ "Open paren can not come after item");
+ ret = PEVENT_ERRNO__INVALID_PAREN;
+ goto fail;
+ }
+ if (current_exp) {
+ show_error(error_str,
+ "Open paren can not come after expression");
+ ret = PEVENT_ERRNO__INVALID_PAREN;
+ goto fail;
+ }
+
+ ret = process_filter(event, &arg, error_str, 0);
+ if (ret != PEVENT_ERRNO__UNBALANCED_PAREN) {
+ if (ret == 0) {
+ show_error(error_str,
+ "Unbalanced number of '('");
+ ret = PEVENT_ERRNO__UNBALANCED_PAREN;
+ }
+ goto fail;
+ }
+ ret = 0;
+
+ /* A not wants just one expression */
+ if (not) {
+ if (current_op)
+ goto fail_syntax;
+ *parg = arg;
+ return 0;
+ }
+
+ if (current_op)
+ ret = add_right(current_op, arg, error_str);
+ else
+ current_exp = arg;
+
+ if (ret < 0)
+ goto fail;
+
+ } else { /* ')' */
+ if (!current_op && !current_exp)
+ goto fail_syntax;
+
+ /* Make sure everything is finished at this level */
+ if (current_exp && !check_op_done(current_exp))
+ goto fail_syntax;
+ if (current_op && !check_op_done(current_op))
+ goto fail_syntax;
+
+ if (current_op)
+ *parg = current_op;
+ else
+ *parg = current_exp;
+ return PEVENT_ERRNO__UNBALANCED_PAREN;
+ }
+ break;
+
+ case EVENT_OP:
+ op_type = process_op(token, &btype, &ctype, &etype);
+
+ /* All expect a left arg except for NOT */
+ switch (op_type) {
+ case OP_BOOL:
+ /* Logic ops need a left expression */
+ if (!current_exp && !current_op)
+ goto fail_syntax;
+ /* fall through */
+ case OP_NOT:
+ /* logic only processes ops and exp */
+ if (left_item)
+ goto fail_syntax;
+ break;
+ case OP_EXP:
+ case OP_CMP:
+ if (!left_item)
+ goto fail_syntax;
+ break;
+ case OP_NONE:
+ show_error(error_str,
+ "Unknown op token %s", token);
+ ret = PEVENT_ERRNO__UNKNOWN_TOKEN;
+ goto fail;
+ }
+
+ ret = 0;
+ switch (op_type) {
+ case OP_BOOL:
+ arg = create_arg_op(btype);
+ if (arg == NULL)
+ goto fail_alloc;
+ if (current_op)
+ ret = add_left(arg, current_op);
+ else
+ ret = add_left(arg, current_exp);
+ current_op = arg;
+ current_exp = NULL;
+ break;
+
+ case OP_NOT:
+ arg = create_arg_op(btype);
+ if (arg == NULL)
+ goto fail_alloc;
+ if (current_op)
+ ret = add_right(current_op, arg, error_str);
+ if (ret < 0)
+ goto fail;
+ current_exp = arg;
+ ret = process_filter(event, &arg, error_str, 1);
+ if (ret < 0)
+ goto fail;
+ ret = add_right(current_exp, arg, error_str);
+ if (ret < 0)
+ goto fail;
+ break;
+
+ case OP_EXP:
+ case OP_CMP:
+ if (op_type == OP_EXP)
+ arg = create_arg_exp(etype);
+ else
+ arg = create_arg_cmp(ctype);
+ if (arg == NULL)
+ goto fail_alloc;
+
+ if (current_op)
+ ret = add_right(current_op, arg, error_str);
+ if (ret < 0)
+ goto fail;
+ ret = add_left(arg, left_item);
+ if (ret < 0) {
+ arg = NULL;
+ goto fail_syntax;
+ }
+ current_exp = arg;
+ break;
+ default:
+ break;
+ }
+ arg = NULL;
+ if (ret < 0)
+ goto fail_syntax;
+ break;
+ case EVENT_NONE:
+ break;
+ case EVENT_ERROR:
+ goto fail_alloc;
+ default:
+ goto fail_syntax;
+ }
+ } while (type != EVENT_NONE);
+
+ if (!current_op && !current_exp)
+ goto fail_syntax;
+
+ if (!current_op)
+ current_op = current_exp;
+
+ ret = collapse_tree(current_op, parg, error_str);
+ if (ret < 0)
+ goto fail;
+
+ *parg = current_op;
+
+ return 0;
+
+ fail_alloc:
+ show_error(error_str, "failed to allocate filter arg");
+ ret = PEVENT_ERRNO__MEM_ALLOC_FAILED;
+ goto fail;
+ fail_syntax:
+ show_error(error_str, "Syntax error");
+ ret = PEVENT_ERRNO__SYNTAX_ERROR;
+ fail:
+ free_arg(current_op);
+ free_arg(current_exp);
+ free_arg(arg);
+ free(token);
+ return ret;
+}
+
+static enum pevent_errno
+process_event(struct event_format *event, const char *filter_str,
+ struct filter_arg **parg, char *error_str)
+{
+ int ret;
+
+ pevent_buffer_init(filter_str, strlen(filter_str));
+
+ ret = process_filter(event, parg, error_str, 0);
+ if (ret < 0)
+ return ret;
+
+ /* If parg is NULL, then make it into FALSE */
+ if (!*parg) {
+ *parg = allocate_arg();
+ if (*parg == NULL)
+ return PEVENT_ERRNO__MEM_ALLOC_FAILED;
+
+ (*parg)->type = FILTER_ARG_BOOLEAN;
+ (*parg)->boolean.value = FILTER_FALSE;
+ }
+
+ return 0;
+}
+
+static enum pevent_errno
+filter_event(struct event_filter *filter, struct event_format *event,
+ const char *filter_str, char *error_str)
+{
+ struct filter_type *filter_type;
+ struct filter_arg *arg;
+ enum pevent_errno ret;
+
+ if (filter_str) {
+ ret = process_event(event, filter_str, &arg, error_str);
+ if (ret < 0)
+ return ret;
+
+ } else {
+ /* just add a TRUE arg */
+ arg = allocate_arg();
+ if (arg == NULL)
+ return PEVENT_ERRNO__MEM_ALLOC_FAILED;
+
+ arg->type = FILTER_ARG_BOOLEAN;
+ arg->boolean.value = FILTER_TRUE;
+ }
+
+ filter_type = add_filter_type(filter, event->id);
+ if (filter_type == NULL)
+ return PEVENT_ERRNO__MEM_ALLOC_FAILED;
+
+ if (filter_type->filter)
+ free_arg(filter_type->filter);
+ filter_type->filter = arg;
+
+ return 0;
+}
+
+static void filter_init_error_buf(struct event_filter *filter)
+{
+ /* clear buffer to reset show error */
+ pevent_buffer_init("", 0);
+ filter->error_buffer[0] = '\0';
+}
+
+/**
+ * pevent_filter_add_filter_str - add a new filter
+ * @filter: the event filter to add to
+ * @filter_str: the filter string that contains the filter
+ *
+ * Returns 0 if the filter was successfully added or a
+ * negative error code. Use pevent_filter_strerror() to see
+ * actual error message in case of error.
+ */
+enum pevent_errno pevent_filter_add_filter_str(struct event_filter *filter,
+ const char *filter_str)
+{
+ struct pevent *pevent = filter->pevent;
+ struct event_list *event;
+ struct event_list *events = NULL;
+ const char *filter_start;
+ const char *next_event;
+ char *this_event;
+ char *event_name = NULL;
+ char *sys_name = NULL;
+ char *sp;
+ enum pevent_errno rtn = 0; /* PEVENT_ERRNO__SUCCESS */
+ int len;
+ int ret;
+
+ filter_init_error_buf(filter);
+
+ filter_start = strchr(filter_str, ':');
+ if (filter_start)
+ len = filter_start - filter_str;
+ else
+ len = strlen(filter_str);
+
+ do {
+ next_event = strchr(filter_str, ',');
+ if (next_event &&
+ (!filter_start || next_event < filter_start))
+ len = next_event - filter_str;
+ else if (filter_start)
+ len = filter_start - filter_str;
+ else
+ len = strlen(filter_str);
+
+ this_event = malloc(len + 1);
+ if (this_event == NULL) {
+ /* This can only happen when events is NULL, but still */
+ free_events(events);
+ return PEVENT_ERRNO__MEM_ALLOC_FAILED;
+ }
+ memcpy(this_event, filter_str, len);
+ this_event[len] = 0;
+
+ if (next_event)
+ next_event++;
+
+ filter_str = next_event;
+
+ sys_name = strtok_r(this_event, "/", &sp);
+ event_name = strtok_r(NULL, "/", &sp);
+
+ if (!sys_name) {
+ /* This can only happen when events is NULL, but still */
+ free_events(events);
+ free(this_event);
+ return PEVENT_ERRNO__FILTER_NOT_FOUND;
+ }
+
+ /* Find this event */
+ ret = find_event(pevent, &events, strim(sys_name), strim(event_name));
+ if (ret < 0) {
+ free_events(events);
+ free(this_event);
+ return ret;
+ }
+ free(this_event);
+ } while (filter_str);
+
+ /* Skip the ':' */
+ if (filter_start)
+ filter_start++;
+
+ /* filter starts here */
+ for (event = events; event; event = event->next) {
+ ret = filter_event(filter, event->event, filter_start,
+ filter->error_buffer);
+ /* Failures are returned if a parse error happened */
+ if (ret < 0)
+ rtn = ret;
+
+ if (ret >= 0 && pevent->test_filters) {
+ char *test;
+ test = pevent_filter_make_string(filter, event->event->id);
+ if (test) {
+ printf(" '%s: %s'\n", event->event->name, test);
+ free(test);
+ }
+ }
+ }
+
+ free_events(events);
+
+ if (rtn >= 0 && pevent->test_filters)
+ exit(0);
+
+ return rtn;
+}
+
+static void free_filter_type(struct filter_type *filter_type)
+{
+ free_arg(filter_type->filter);
+}
+
+/**
+ * pevent_filter_strerror - fill error message in a buffer
+ * @filter: the event filter contains error
+ * @err: the error code
+ * @buf: the buffer to be filled in
+ * @buflen: the size of the buffer
+ *
+ * Returns 0 if message was filled successfully, -1 if error
+ */
+int pevent_filter_strerror(struct event_filter *filter, enum pevent_errno err,
+ char *buf, size_t buflen)
+{
+ if (err <= __PEVENT_ERRNO__START || err >= __PEVENT_ERRNO__END)
+ return -1;
+
+ if (strlen(filter->error_buffer) > 0) {
+ size_t len = snprintf(buf, buflen, "%s", filter->error_buffer);
+
+ if (len > buflen)
+ return -1;
+ return 0;
+ }
+
+ return pevent_strerror(filter->pevent, err, buf, buflen);
+}
+
+/**
+ * pevent_filter_remove_event - remove a filter for an event
+ * @filter: the event filter to remove from
+ * @event_id: the event to remove a filter for
+ *
+ * Removes the filter saved for an event defined by @event_id
+ * from the @filter.
+ *
+ * Returns 1: if an event was removed
+ * 0: if the event was not found
+ */
+int pevent_filter_remove_event(struct event_filter *filter,
+ int event_id)
+{
+ struct filter_type *filter_type;
+ unsigned long len;
+
+ if (!filter->filters)
+ return 0;
+
+ filter_type = find_filter_type(filter, event_id);
+
+ if (!filter_type)
+ return 0;
+
+ free_filter_type(filter_type);
+
+ /* The filter_type points into the event_filters array */
+ len = (unsigned long)(filter->event_filters + filter->filters) -
+ (unsigned long)(filter_type + 1);
+
+ memmove(filter_type, filter_type + 1, len);
+ filter->filters--;
+
+ memset(&filter->event_filters[filter->filters], 0,
+ sizeof(*filter_type));
+
+ return 1;
+}
+
+/**
+ * pevent_filter_reset - clear all filters in a filter
+ * @filter: the event filter to reset
+ *
+ * Removes all filters from a filter and resets it.
+ */
+void pevent_filter_reset(struct event_filter *filter)
+{
+ int i;
+
+ for (i = 0; i < filter->filters; i++)
+ free_filter_type(&filter->event_filters[i]);
+
+ free(filter->event_filters);
+ filter->filters = 0;
+ filter->event_filters = NULL;
+}
+
+void pevent_filter_free(struct event_filter *filter)
+{
+ pevent_unref(filter->pevent);
+
+ pevent_filter_reset(filter);
+
+ free(filter);
+}
+
+static char *arg_to_str(struct event_filter *filter, struct filter_arg *arg);
+
+static int copy_filter_type(struct event_filter *filter,
+ struct event_filter *source,
+ struct filter_type *filter_type)
+{
+ struct filter_arg *arg;
+ struct event_format *event;
+ const char *sys;
+ const char *name;
+ char *str;
+
+ /* Can't assume that the pevent's are the same */
+ sys = filter_type->event->system;
+ name = filter_type->event->name;
+ event = pevent_find_event_by_name(filter->pevent, sys, name);
+ if (!event)
+ return -1;
+
+ str = arg_to_str(source, filter_type->filter);
+ if (!str)
+ return -1;
+
+ if (strcmp(str, "TRUE") == 0 || strcmp(str, "FALSE") == 0) {
+ /* Add trivial event */
+ arg = allocate_arg();
+ if (arg == NULL)
+ return -1;
+
+ arg->type = FILTER_ARG_BOOLEAN;
+ if (strcmp(str, "TRUE") == 0)
+ arg->boolean.value = 1;
+ else
+ arg->boolean.value = 0;
+
+ filter_type = add_filter_type(filter, event->id);
+ if (filter_type == NULL)
+ return -1;
+
+ filter_type->filter = arg;
+
+ free(str);
+ return 0;
+ }
+
+ filter_event(filter, event, str, NULL);
+ free(str);
+
+ return 0;
+}
+
+/**
+ * pevent_filter_copy - copy a filter using another filter
+ * @dest - the filter to copy to
+ * @source - the filter to copy from
+ *
+ * Returns 0 on success and -1 if not all filters were copied
+ */
+int pevent_filter_copy(struct event_filter *dest, struct event_filter *source)
+{
+ int ret = 0;
+ int i;
+
+ pevent_filter_reset(dest);
+
+ for (i = 0; i < source->filters; i++) {
+ if (copy_filter_type(dest, source, &source->event_filters[i]))
+ ret = -1;
+ }
+ return ret;
+}
+
+
+/**
+ * pevent_update_trivial - update the trivial filters with the given filter
+ * @dest - the filter to update
+ * @source - the filter as the source of the update
+ * @type - the type of trivial filter to update.
+ *
+ * Scan dest for trivial events matching @type to replace with the source.
+ *
+ * Returns 0 on success and -1 if there was a problem updating, but
+ * events may have still been updated on error.
+ */
+int pevent_update_trivial(struct event_filter *dest, struct event_filter *source,
+ enum filter_trivial_type type)
+{
+ struct pevent *src_pevent;
+ struct pevent *dest_pevent;
+ struct event_format *event;
+ struct filter_type *filter_type;
+ struct filter_arg *arg;
+ char *str;
+ int i;
+
+ src_pevent = source->pevent;
+ dest_pevent = dest->pevent;
+
+ /* Do nothing if either of the filters has nothing to filter */
+ if (!dest->filters || !source->filters)
+ return 0;
+
+ for (i = 0; i < dest->filters; i++) {
+ filter_type = &dest->event_filters[i];
+ arg = filter_type->filter;
+ if (arg->type != FILTER_ARG_BOOLEAN)
+ continue;
+ if ((arg->boolean.value && type == FILTER_TRIVIAL_FALSE) ||
+ (!arg->boolean.value && type == FILTER_TRIVIAL_TRUE))
+ continue;
+
+ event = filter_type->event;
+
+ if (src_pevent != dest_pevent) {
+ /* do a look up */
+ event = pevent_find_event_by_name(src_pevent,
+ event->system,
+ event->name);
+ if (!event)
+ return -1;
+ }
+
+ str = pevent_filter_make_string(source, event->id);
+ if (!str)
+ continue;
+
+ /* Don't bother if the filter is trivial too */
+ if (strcmp(str, "TRUE") != 0 && strcmp(str, "FALSE") != 0)
+ filter_event(dest, event, str, NULL);
+ free(str);
+ }
+ return 0;
+}
+
+/**
+ * pevent_filter_clear_trivial - clear TRUE and FALSE filters
+ * @filter: the filter to remove trivial filters from
+ * @type: remove only true, false, or both
+ *
+ * Removes filters that only contain a TRUE or FALES boolean arg.
+ *
+ * Returns 0 on success and -1 if there was a problem.
+ */
+int pevent_filter_clear_trivial(struct event_filter *filter,
+ enum filter_trivial_type type)
+{
+ struct filter_type *filter_type;
+ int count = 0;
+ int *ids = NULL;
+ int i;
+
+ if (!filter->filters)
+ return 0;
+
+ /*
+ * Two steps, first get all ids with trivial filters.
+ * then remove those ids.
+ */
+ for (i = 0; i < filter->filters; i++) {
+ int *new_ids;
+
+ filter_type = &filter->event_filters[i];
+ if (filter_type->filter->type != FILTER_ARG_BOOLEAN)
+ continue;
+ switch (type) {
+ case FILTER_TRIVIAL_FALSE:
+ if (filter_type->filter->boolean.value)
+ continue;
+ case FILTER_TRIVIAL_TRUE:
+ if (!filter_type->filter->boolean.value)
+ continue;
+ default:
+ break;
+ }
+
+ new_ids = realloc(ids, sizeof(*ids) * (count + 1));
+ if (!new_ids) {
+ free(ids);
+ return -1;
+ }
+
+ ids = new_ids;
+ ids[count++] = filter_type->event_id;
+ }
+
+ if (!count)
+ return 0;
+
+ for (i = 0; i < count; i++)
+ pevent_filter_remove_event(filter, ids[i]);
+
+ free(ids);
+ return 0;
+}
+
+/**
+ * pevent_filter_event_has_trivial - return true event contains trivial filter
+ * @filter: the filter with the information
+ * @event_id: the id of the event to test
+ * @type: trivial type to test for (TRUE, FALSE, EITHER)
+ *
+ * Returns 1 if the event contains a matching trivial type
+ * otherwise 0.
+ */
+int pevent_filter_event_has_trivial(struct event_filter *filter,
+ int event_id,
+ enum filter_trivial_type type)
+{
+ struct filter_type *filter_type;
+
+ if (!filter->filters)
+ return 0;
+
+ filter_type = find_filter_type(filter, event_id);
+
+ if (!filter_type)
+ return 0;
+
+ if (filter_type->filter->type != FILTER_ARG_BOOLEAN)
+ return 0;
+
+ switch (type) {
+ case FILTER_TRIVIAL_FALSE:
+ return !filter_type->filter->boolean.value;
+
+ case FILTER_TRIVIAL_TRUE:
+ return filter_type->filter->boolean.value;
+ default:
+ return 1;
+ }
+}
+
+static int test_filter(struct event_format *event, struct filter_arg *arg,
+ struct pevent_record *record, enum pevent_errno *err);
+
+static const char *
+get_comm(struct event_format *event, struct pevent_record *record)
+{
+ const char *comm;
+ int pid;
+
+ pid = pevent_data_pid(event->pevent, record);
+ comm = pevent_data_comm_from_pid(event->pevent, pid);
+ return comm;
+}
+
+static unsigned long long
+get_value(struct event_format *event,
+ struct format_field *field, struct pevent_record *record)
+{
+ unsigned long long val;
+
+ /* Handle our dummy "comm" field */
+ if (field == &comm) {
+ const char *name;
+
+ name = get_comm(event, record);
+ return (unsigned long)name;
+ }
+
+ pevent_read_number_field(field, record->data, &val);
+
+ if (!(field->flags & FIELD_IS_SIGNED))
+ return val;
+
+ switch (field->size) {
+ case 1:
+ return (char)val;
+ case 2:
+ return (short)val;
+ case 4:
+ return (int)val;
+ case 8:
+ return (long long)val;
+ }
+ return val;
+}
+
+static unsigned long long
+get_arg_value(struct event_format *event, struct filter_arg *arg,
+ struct pevent_record *record, enum pevent_errno *err);
+
+static unsigned long long
+get_exp_value(struct event_format *event, struct filter_arg *arg,
+ struct pevent_record *record, enum pevent_errno *err)
+{
+ unsigned long long lval, rval;
+
+ lval = get_arg_value(event, arg->exp.left, record, err);
+ rval = get_arg_value(event, arg->exp.right, record, err);
+
+ if (*err) {
+ /*
+ * There was an error, no need to process anymore.
+ */
+ return 0;
+ }
+
+ switch (arg->exp.type) {
+ case FILTER_EXP_ADD:
+ return lval + rval;
+
+ case FILTER_EXP_SUB:
+ return lval - rval;
+
+ case FILTER_EXP_MUL:
+ return lval * rval;
+
+ case FILTER_EXP_DIV:
+ return lval / rval;
+
+ case FILTER_EXP_MOD:
+ return lval % rval;
+
+ case FILTER_EXP_RSHIFT:
+ return lval >> rval;
+
+ case FILTER_EXP_LSHIFT:
+ return lval << rval;
+
+ case FILTER_EXP_AND:
+ return lval & rval;
+
+ case FILTER_EXP_OR:
+ return lval | rval;
+
+ case FILTER_EXP_XOR:
+ return lval ^ rval;
+
+ case FILTER_EXP_NOT:
+ default:
+ if (!*err)
+ *err = PEVENT_ERRNO__INVALID_EXP_TYPE;
+ }
+ return 0;
+}
+
+static unsigned long long
+get_arg_value(struct event_format *event, struct filter_arg *arg,
+ struct pevent_record *record, enum pevent_errno *err)
+{
+ switch (arg->type) {
+ case FILTER_ARG_FIELD:
+ return get_value(event, arg->field.field, record);
+
+ case FILTER_ARG_VALUE:
+ if (arg->value.type != FILTER_NUMBER) {
+ if (!*err)
+ *err = PEVENT_ERRNO__NOT_A_NUMBER;
+ }
+ return arg->value.val;
+
+ case FILTER_ARG_EXP:
+ return get_exp_value(event, arg, record, err);
+
+ default:
+ if (!*err)
+ *err = PEVENT_ERRNO__INVALID_ARG_TYPE;
+ }
+ return 0;
+}
+
+static int test_num(struct event_format *event, struct filter_arg *arg,
+ struct pevent_record *record, enum pevent_errno *err)
+{
+ unsigned long long lval, rval;
+
+ lval = get_arg_value(event, arg->num.left, record, err);
+ rval = get_arg_value(event, arg->num.right, record, err);
+
+ if (*err) {
+ /*
+ * There was an error, no need to process anymore.
+ */
+ return 0;
+ }
+
+ switch (arg->num.type) {
+ case FILTER_CMP_EQ:
+ return lval == rval;
+
+ case FILTER_CMP_NE:
+ return lval != rval;
+
+ case FILTER_CMP_GT:
+ return lval > rval;
+
+ case FILTER_CMP_LT:
+ return lval < rval;
+
+ case FILTER_CMP_GE:
+ return lval >= rval;
+
+ case FILTER_CMP_LE:
+ return lval <= rval;
+
+ default:
+ if (!*err)
+ *err = PEVENT_ERRNO__ILLEGAL_INTEGER_CMP;
+ return 0;
+ }
+}
+
+static const char *get_field_str(struct filter_arg *arg, struct pevent_record *record)
+{
+ struct event_format *event;
+ struct pevent *pevent;
+ unsigned long long addr;
+ const char *val = NULL;
+ char hex[64];
+
+ /* If the field is not a string convert it */
+ if (arg->str.field->flags & FIELD_IS_STRING) {
+ val = record->data + arg->str.field->offset;
+
+ /*
+ * We need to copy the data since we can't be sure the field
+ * is null terminated.
+ */
+ if (*(val + arg->str.field->size - 1)) {
+ /* copy it */
+ memcpy(arg->str.buffer, val, arg->str.field->size);
+ /* the buffer is already NULL terminated */
+ val = arg->str.buffer;
+ }
+
+ } else {
+ event = arg->str.field->event;
+ pevent = event->pevent;
+ addr = get_value(event, arg->str.field, record);
+
+ if (arg->str.field->flags & (FIELD_IS_POINTER | FIELD_IS_LONG))
+ /* convert to a kernel symbol */
+ val = pevent_find_function(pevent, addr);
+
+ if (val == NULL) {
+ /* just use the hex of the string name */
+ snprintf(hex, 64, "0x%llx", addr);
+ val = hex;
+ }
+ }
+
+ return val;
+}
+
+static int test_str(struct event_format *event, struct filter_arg *arg,
+ struct pevent_record *record, enum pevent_errno *err)
+{
+ const char *val;
+
+ if (arg->str.field == &comm)
+ val = get_comm(event, record);
+ else
+ val = get_field_str(arg, record);
+
+ switch (arg->str.type) {
+ case FILTER_CMP_MATCH:
+ return strcmp(val, arg->str.val) == 0;
+
+ case FILTER_CMP_NOT_MATCH:
+ return strcmp(val, arg->str.val) != 0;
+
+ case FILTER_CMP_REGEX:
+ /* Returns zero on match */
+ return !regexec(&arg->str.reg, val, 0, NULL, 0);
+
+ case FILTER_CMP_NOT_REGEX:
+ return regexec(&arg->str.reg, val, 0, NULL, 0);
+
+ default:
+ if (!*err)
+ *err = PEVENT_ERRNO__ILLEGAL_STRING_CMP;
+ return 0;
+ }
+}
+
+static int test_op(struct event_format *event, struct filter_arg *arg,
+ struct pevent_record *record, enum pevent_errno *err)
+{
+ switch (arg->op.type) {
+ case FILTER_OP_AND:
+ return test_filter(event, arg->op.left, record, err) &&
+ test_filter(event, arg->op.right, record, err);
+
+ case FILTER_OP_OR:
+ return test_filter(event, arg->op.left, record, err) ||
+ test_filter(event, arg->op.right, record, err);
+
+ case FILTER_OP_NOT:
+ return !test_filter(event, arg->op.right, record, err);
+
+ default:
+ if (!*err)
+ *err = PEVENT_ERRNO__INVALID_OP_TYPE;
+ return 0;
+ }
+}
+
+static int test_filter(struct event_format *event, struct filter_arg *arg,
+ struct pevent_record *record, enum pevent_errno *err)
+{
+ if (*err) {
+ /*
+ * There was an error, no need to process anymore.
+ */
+ return 0;
+ }
+
+ switch (arg->type) {
+ case FILTER_ARG_BOOLEAN:
+ /* easy case */
+ return arg->boolean.value;
+
+ case FILTER_ARG_OP:
+ return test_op(event, arg, record, err);
+
+ case FILTER_ARG_NUM:
+ return test_num(event, arg, record, err);
+
+ case FILTER_ARG_STR:
+ return test_str(event, arg, record, err);
+
+ case FILTER_ARG_EXP:
+ case FILTER_ARG_VALUE:
+ case FILTER_ARG_FIELD:
+ /*
+ * Expressions, fields and values evaluate
+ * to true if they return non zero
+ */
+ return !!get_arg_value(event, arg, record, err);
+
+ default:
+ if (!*err)
+ *err = PEVENT_ERRNO__INVALID_ARG_TYPE;
+ return 0;
+ }
+}
+
+/**
+ * pevent_event_filtered - return true if event has filter
+ * @filter: filter struct with filter information
+ * @event_id: event id to test if filter exists
+ *
+ * Returns 1 if filter found for @event_id
+ * otherwise 0;
+ */
+int pevent_event_filtered(struct event_filter *filter, int event_id)
+{
+ struct filter_type *filter_type;
+
+ if (!filter->filters)
+ return 0;
+
+ filter_type = find_filter_type(filter, event_id);
+
+ return filter_type ? 1 : 0;
+}
+
+/**
+ * pevent_filter_match - test if a record matches a filter
+ * @filter: filter struct with filter information
+ * @record: the record to test against the filter
+ *
+ * Returns: match result or error code (prefixed with PEVENT_ERRNO__)
+ * FILTER_MATCH - filter found for event and @record matches
+ * FILTER_MISS - filter found for event and @record does not match
+ * FILTER_NOT_FOUND - no filter found for @record's event
+ * NO_FILTER - if no filters exist
+ * otherwise - error occurred during test
+ */
+enum pevent_errno pevent_filter_match(struct event_filter *filter,
+ struct pevent_record *record)
+{
+ struct pevent *pevent = filter->pevent;
+ struct filter_type *filter_type;
+ int event_id;
+ int ret;
+ enum pevent_errno err = 0;
+
+ filter_init_error_buf(filter);
+
+ if (!filter->filters)
+ return PEVENT_ERRNO__NO_FILTER;
+
+ event_id = pevent_data_type(pevent, record);
+
+ filter_type = find_filter_type(filter, event_id);
+ if (!filter_type)
+ return PEVENT_ERRNO__FILTER_NOT_FOUND;
+
+ ret = test_filter(filter_type->event, filter_type->filter, record, &err);
+ if (err)
+ return err;
+
+ return ret ? PEVENT_ERRNO__FILTER_MATCH : PEVENT_ERRNO__FILTER_MISS;
+}
+
+static char *op_to_str(struct event_filter *filter, struct filter_arg *arg)
+{
+ char *str = NULL;
+ char *left = NULL;
+ char *right = NULL;
+ char *op = NULL;
+ int left_val = -1;
+ int right_val = -1;
+ int val;
+
+ switch (arg->op.type) {
+ case FILTER_OP_AND:
+ op = "&&";
+ /* fall through */
+ case FILTER_OP_OR:
+ if (!op)
+ op = "||";
+
+ left = arg_to_str(filter, arg->op.left);
+ right = arg_to_str(filter, arg->op.right);
+ if (!left || !right)
+ break;
+
+ /* Try to consolidate boolean values */
+ if (strcmp(left, "TRUE") == 0)
+ left_val = 1;
+ else if (strcmp(left, "FALSE") == 0)
+ left_val = 0;
+
+ if (strcmp(right, "TRUE") == 0)
+ right_val = 1;
+ else if (strcmp(right, "FALSE") == 0)
+ right_val = 0;
+
+ if (left_val >= 0) {
+ if ((arg->op.type == FILTER_OP_AND && !left_val) ||
+ (arg->op.type == FILTER_OP_OR && left_val)) {
+ /* Just return left value */
+ str = left;
+ left = NULL;
+ break;
+ }
+ if (right_val >= 0) {
+ /* just evaluate this. */
+ val = 0;
+ switch (arg->op.type) {
+ case FILTER_OP_AND:
+ val = left_val && right_val;
+ break;
+ case FILTER_OP_OR:
+ val = left_val || right_val;
+ break;
+ default:
+ break;
+ }
+ asprintf(&str, val ? "TRUE" : "FALSE");
+ break;
+ }
+ }
+ if (right_val >= 0) {
+ if ((arg->op.type == FILTER_OP_AND && !right_val) ||
+ (arg->op.type == FILTER_OP_OR && right_val)) {
+ /* Just return right value */
+ str = right;
+ right = NULL;
+ break;
+ }
+ /* The right value is meaningless */
+ str = left;
+ left = NULL;
+ break;
+ }
+
+ asprintf(&str, "(%s) %s (%s)", left, op, right);
+ break;
+
+ case FILTER_OP_NOT:
+ op = "!";
+ right = arg_to_str(filter, arg->op.right);
+ if (!right)
+ break;
+
+ /* See if we can consolidate */
+ if (strcmp(right, "TRUE") == 0)
+ right_val = 1;
+ else if (strcmp(right, "FALSE") == 0)
+ right_val = 0;
+ if (right_val >= 0) {
+ /* just return the opposite */
+ asprintf(&str, right_val ? "FALSE" : "TRUE");
+ break;
+ }
+ asprintf(&str, "%s(%s)", op, right);
+ break;
+
+ default:
+ /* ?? */
+ break;
+ }
+ free(left);
+ free(right);
+ return str;
+}
+
+static char *val_to_str(struct event_filter *filter, struct filter_arg *arg)
+{
+ char *str = NULL;
+
+ asprintf(&str, "%lld", arg->value.val);
+
+ return str;
+}
+
+static char *field_to_str(struct event_filter *filter, struct filter_arg *arg)
+{
+ return strdup(arg->field.field->name);
+}
+
+static char *exp_to_str(struct event_filter *filter, struct filter_arg *arg)
+{
+ char *lstr;
+ char *rstr;
+ char *op;
+ char *str = NULL;
+
+ lstr = arg_to_str(filter, arg->exp.left);
+ rstr = arg_to_str(filter, arg->exp.right);
+ if (!lstr || !rstr)
+ goto out;
+
+ switch (arg->exp.type) {
+ case FILTER_EXP_ADD:
+ op = "+";
+ break;
+ case FILTER_EXP_SUB:
+ op = "-";
+ break;
+ case FILTER_EXP_MUL:
+ op = "*";
+ break;
+ case FILTER_EXP_DIV:
+ op = "/";
+ break;
+ case FILTER_EXP_MOD:
+ op = "%";
+ break;
+ case FILTER_EXP_RSHIFT:
+ op = ">>";
+ break;
+ case FILTER_EXP_LSHIFT:
+ op = "<<";
+ break;
+ case FILTER_EXP_AND:
+ op = "&";
+ break;
+ case FILTER_EXP_OR:
+ op = "|";
+ break;
+ case FILTER_EXP_XOR:
+ op = "^";
+ break;
+ default:
+ op = "[ERROR IN EXPRESSION TYPE]";
+ break;
+ }
+
+ asprintf(&str, "%s %s %s", lstr, op, rstr);
+out:
+ free(lstr);
+ free(rstr);
+
+ return str;
+}
+
+static char *num_to_str(struct event_filter *filter, struct filter_arg *arg)
+{
+ char *lstr;
+ char *rstr;
+ char *str = NULL;
+ char *op = NULL;
+
+ lstr = arg_to_str(filter, arg->num.left);
+ rstr = arg_to_str(filter, arg->num.right);
+ if (!lstr || !rstr)
+ goto out;
+
+ switch (arg->num.type) {
+ case FILTER_CMP_EQ:
+ op = "==";
+ /* fall through */
+ case FILTER_CMP_NE:
+ if (!op)
+ op = "!=";
+ /* fall through */
+ case FILTER_CMP_GT:
+ if (!op)
+ op = ">";
+ /* fall through */
+ case FILTER_CMP_LT:
+ if (!op)
+ op = "<";
+ /* fall through */
+ case FILTER_CMP_GE:
+ if (!op)
+ op = ">=";
+ /* fall through */
+ case FILTER_CMP_LE:
+ if (!op)
+ op = "<=";
+
+ asprintf(&str, "%s %s %s", lstr, op, rstr);
+ break;
+
+ default:
+ /* ?? */
+ break;
+ }
+
+out:
+ free(lstr);
+ free(rstr);
+ return str;
+}
+
+static char *str_to_str(struct event_filter *filter, struct filter_arg *arg)
+{
+ char *str = NULL;
+ char *op = NULL;
+
+ switch (arg->str.type) {
+ case FILTER_CMP_MATCH:
+ op = "==";
+ /* fall through */
+ case FILTER_CMP_NOT_MATCH:
+ if (!op)
+ op = "!=";
+ /* fall through */
+ case FILTER_CMP_REGEX:
+ if (!op)
+ op = "=~";
+ /* fall through */
+ case FILTER_CMP_NOT_REGEX:
+ if (!op)
+ op = "!~";
+
+ asprintf(&str, "%s %s \"%s\"",
+ arg->str.field->name, op, arg->str.val);
+ break;
+
+ default:
+ /* ?? */
+ break;
+ }
+ return str;
+}
+
+static char *arg_to_str(struct event_filter *filter, struct filter_arg *arg)
+{
+ char *str = NULL;
+
+ switch (arg->type) {
+ case FILTER_ARG_BOOLEAN:
+ asprintf(&str, arg->boolean.value ? "TRUE" : "FALSE");
+ return str;
+
+ case FILTER_ARG_OP:
+ return op_to_str(filter, arg);
+
+ case FILTER_ARG_NUM:
+ return num_to_str(filter, arg);
+
+ case FILTER_ARG_STR:
+ return str_to_str(filter, arg);
+
+ case FILTER_ARG_VALUE:
+ return val_to_str(filter, arg);
+
+ case FILTER_ARG_FIELD:
+ return field_to_str(filter, arg);
+
+ case FILTER_ARG_EXP:
+ return exp_to_str(filter, arg);
+
+ default:
+ /* ?? */
+ return NULL;
+ }
+
+}
+
+/**
+ * pevent_filter_make_string - return a string showing the filter
+ * @filter: filter struct with filter information
+ * @event_id: the event id to return the filter string with
+ *
+ * Returns a string that displays the filter contents.
+ * This string must be freed with free(str).
+ * NULL is returned if no filter is found or allocation failed.
+ */
+char *
+pevent_filter_make_string(struct event_filter *filter, int event_id)
+{
+ struct filter_type *filter_type;
+
+ if (!filter->filters)
+ return NULL;
+
+ filter_type = find_filter_type(filter, event_id);
+
+ if (!filter_type)
+ return NULL;
+
+ return arg_to_str(filter, filter_type->filter);
+}
+
+/**
+ * pevent_filter_compare - compare two filters and return if they are the same
+ * @filter1: Filter to compare with @filter2
+ * @filter2: Filter to compare with @filter1
+ *
+ * Returns:
+ * 1 if the two filters hold the same content.
+ * 0 if they do not.
+ */
+int pevent_filter_compare(struct event_filter *filter1, struct event_filter *filter2)
+{
+ struct filter_type *filter_type1;
+ struct filter_type *filter_type2;
+ char *str1, *str2;
+ int result;
+ int i;
+
+ /* Do the easy checks first */
+ if (filter1->filters != filter2->filters)
+ return 0;
+ if (!filter1->filters && !filter2->filters)
+ return 1;
+
+ /*
+ * Now take a look at each of the events to see if they have the same
+ * filters to them.
+ */
+ for (i = 0; i < filter1->filters; i++) {
+ filter_type1 = &filter1->event_filters[i];
+ filter_type2 = find_filter_type(filter2, filter_type1->event_id);
+ if (!filter_type2)
+ break;
+ if (filter_type1->filter->type != filter_type2->filter->type)
+ break;
+ switch (filter_type1->filter->type) {
+ case FILTER_TRIVIAL_FALSE:
+ case FILTER_TRIVIAL_TRUE:
+ /* trivial types just need the type compared */
+ continue;
+ default:
+ break;
+ }
+ /* The best way to compare complex filters is with strings */
+ str1 = arg_to_str(filter1, filter_type1->filter);
+ str2 = arg_to_str(filter2, filter_type2->filter);
+ if (str1 && str2)
+ result = strcmp(str1, str2) != 0;
+ else
+ /* bail out if allocation fails */
+ result = 1;
+
+ free(str1);
+ free(str2);
+ if (result)
+ break;
+ }
+
+ if (i < filter1->filters)
+ return 0;
+ return 1;
+}
+
diff --git a/tools/lib/traceevent/parse-utils.c b/tools/lib/traceevent/parse-utils.c
new file mode 100644
index 00000000000..eda07fa31dc
--- /dev/null
+++ b/tools/lib/traceevent/parse-utils.c
@@ -0,0 +1,85 @@
+/*
+ * Copyright (C) 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License (not later!)
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <errno.h>
+
+#define __weak __attribute__((weak))
+
+void __vwarning(const char *fmt, va_list ap)
+{
+ if (errno)
+ perror("trace-cmd");
+ errno = 0;
+
+ fprintf(stderr, " ");
+ vfprintf(stderr, fmt, ap);
+
+ fprintf(stderr, "\n");
+}
+
+void __warning(const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ __vwarning(fmt, ap);
+ va_end(ap);
+}
+
+void __weak warning(const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ __vwarning(fmt, ap);
+ va_end(ap);
+}
+
+void __vpr_stat(const char *fmt, va_list ap)
+{
+ vprintf(fmt, ap);
+ printf("\n");
+}
+
+void __pr_stat(const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ __vpr_stat(fmt, ap);
+ va_end(ap);
+}
+
+void __weak vpr_stat(const char *fmt, va_list ap)
+{
+ __vpr_stat(fmt, ap);
+}
+
+void __weak pr_stat(const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ __vpr_stat(fmt, ap);
+ va_end(ap);
+}
diff --git a/tools/lib/traceevent/plugin_cfg80211.c b/tools/lib/traceevent/plugin_cfg80211.c
new file mode 100644
index 00000000000..c066b25905f
--- /dev/null
+++ b/tools/lib/traceevent/plugin_cfg80211.c
@@ -0,0 +1,30 @@
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <endian.h>
+#include "event-parse.h"
+
+static unsigned long long
+process___le16_to_cpup(struct trace_seq *s,
+ unsigned long long *args)
+{
+ uint16_t *val = (uint16_t *) (unsigned long) args[0];
+ return val ? (long long) le16toh(*val) : 0;
+}
+
+int PEVENT_PLUGIN_LOADER(struct pevent *pevent)
+{
+ pevent_register_print_function(pevent,
+ process___le16_to_cpup,
+ PEVENT_FUNC_ARG_INT,
+ "__le16_to_cpup",
+ PEVENT_FUNC_ARG_PTR,
+ PEVENT_FUNC_ARG_VOID);
+ return 0;
+}
+
+void PEVENT_PLUGIN_UNLOADER(struct pevent *pevent)
+{
+ pevent_unregister_print_function(pevent, process___le16_to_cpup,
+ "__le16_to_cpup");
+}
diff --git a/tools/lib/traceevent/plugin_function.c b/tools/lib/traceevent/plugin_function.c
new file mode 100644
index 00000000000..a00ec190821
--- /dev/null
+++ b/tools/lib/traceevent/plugin_function.c
@@ -0,0 +1,194 @@
+/*
+ * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License (not later!)
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "event-parse.h"
+#include "event-utils.h"
+
+static struct func_stack {
+ int size;
+ char **stack;
+} *fstack;
+
+static int cpus = -1;
+
+#define STK_BLK 10
+
+struct pevent_plugin_option plugin_options[] =
+{
+ {
+ .name = "parent",
+ .plugin_alias = "ftrace",
+ .description =
+ "Print parent of functions for function events",
+ },
+ {
+ .name = "indent",
+ .plugin_alias = "ftrace",
+ .description =
+ "Try to show function call indents, based on parents",
+ .set = 1,
+ },
+ {
+ .name = NULL,
+ }
+};
+
+static struct pevent_plugin_option *ftrace_parent = &plugin_options[0];
+static struct pevent_plugin_option *ftrace_indent = &plugin_options[1];
+
+static void add_child(struct func_stack *stack, const char *child, int pos)
+{
+ int i;
+
+ if (!child)
+ return;
+
+ if (pos < stack->size)
+ free(stack->stack[pos]);
+ else {
+ char **ptr;
+
+ ptr = realloc(stack->stack, sizeof(char *) *
+ (stack->size + STK_BLK));
+ if (!ptr) {
+ warning("could not allocate plugin memory\n");
+ return;
+ }
+
+ stack->stack = ptr;
+
+ for (i = stack->size; i < stack->size + STK_BLK; i++)
+ stack->stack[i] = NULL;
+ stack->size += STK_BLK;
+ }
+
+ stack->stack[pos] = strdup(child);
+}
+
+static int add_and_get_index(const char *parent, const char *child, int cpu)
+{
+ int i;
+
+ if (cpu < 0)
+ return 0;
+
+ if (cpu > cpus) {
+ struct func_stack *ptr;
+
+ ptr = realloc(fstack, sizeof(*fstack) * (cpu + 1));
+ if (!ptr) {
+ warning("could not allocate plugin memory\n");
+ return 0;
+ }
+
+ fstack = ptr;
+
+ /* Account for holes in the cpu count */
+ for (i = cpus + 1; i <= cpu; i++)
+ memset(&fstack[i], 0, sizeof(fstack[i]));
+ cpus = cpu;
+ }
+
+ for (i = 0; i < fstack[cpu].size && fstack[cpu].stack[i]; i++) {
+ if (strcmp(parent, fstack[cpu].stack[i]) == 0) {
+ add_child(&fstack[cpu], child, i+1);
+ return i;
+ }
+ }
+
+ /* Not found */
+ add_child(&fstack[cpu], parent, 0);
+ add_child(&fstack[cpu], child, 1);
+ return 0;
+}
+
+static int function_handler(struct trace_seq *s, struct pevent_record *record,
+ struct event_format *event, void *context)
+{
+ struct pevent *pevent = event->pevent;
+ unsigned long long function;
+ unsigned long long pfunction;
+ const char *func;
+ const char *parent;
+ int index;
+
+ if (pevent_get_field_val(s, event, "ip", record, &function, 1))
+ return trace_seq_putc(s, '!');
+
+ func = pevent_find_function(pevent, function);
+
+ if (pevent_get_field_val(s, event, "parent_ip", record, &pfunction, 1))
+ return trace_seq_putc(s, '!');
+
+ parent = pevent_find_function(pevent, pfunction);
+
+ if (parent && ftrace_indent->set)
+ index = add_and_get_index(parent, func, record->cpu);
+
+ trace_seq_printf(s, "%*s", index*3, "");
+
+ if (func)
+ trace_seq_printf(s, "%s", func);
+ else
+ trace_seq_printf(s, "0x%llx", function);
+
+ if (ftrace_parent->set) {
+ trace_seq_printf(s, " <-- ");
+ if (parent)
+ trace_seq_printf(s, "%s", parent);
+ else
+ trace_seq_printf(s, "0x%llx", pfunction);
+ }
+
+ return 0;
+}
+
+int PEVENT_PLUGIN_LOADER(struct pevent *pevent)
+{
+ pevent_register_event_handler(pevent, -1, "ftrace", "function",
+ function_handler, NULL);
+
+ traceevent_plugin_add_options("ftrace", plugin_options);
+
+ return 0;
+}
+
+void PEVENT_PLUGIN_UNLOADER(struct pevent *pevent)
+{
+ int i, x;
+
+ pevent_unregister_event_handler(pevent, -1, "ftrace", "function",
+ function_handler, NULL);
+
+ for (i = 0; i <= cpus; i++) {
+ for (x = 0; x < fstack[i].size && fstack[i].stack[x]; x++)
+ free(fstack[i].stack[x]);
+ free(fstack[i].stack);
+ }
+
+ traceevent_plugin_remove_options(plugin_options);
+
+ free(fstack);
+ fstack = NULL;
+ cpus = -1;
+}
diff --git a/tools/lib/traceevent/plugin_hrtimer.c b/tools/lib/traceevent/plugin_hrtimer.c
new file mode 100644
index 00000000000..12bf14cc115
--- /dev/null
+++ b/tools/lib/traceevent/plugin_hrtimer.c
@@ -0,0 +1,88 @@
+/*
+ * Copyright (C) 2009 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ * Copyright (C) 2009 Johannes Berg <johannes@sipsolutions.net>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License (not later!)
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "event-parse.h"
+
+static int timer_expire_handler(struct trace_seq *s,
+ struct pevent_record *record,
+ struct event_format *event, void *context)
+{
+ trace_seq_printf(s, "hrtimer=");
+
+ if (pevent_print_num_field(s, "0x%llx", event, "timer",
+ record, 0) == -1)
+ pevent_print_num_field(s, "0x%llx", event, "hrtimer",
+ record, 1);
+
+ trace_seq_printf(s, " now=");
+
+ pevent_print_num_field(s, "%llu", event, "now", record, 1);
+
+ pevent_print_func_field(s, " function=%s", event, "function",
+ record, 0);
+ return 0;
+}
+
+static int timer_start_handler(struct trace_seq *s,
+ struct pevent_record *record,
+ struct event_format *event, void *context)
+{
+ trace_seq_printf(s, "hrtimer=");
+
+ if (pevent_print_num_field(s, "0x%llx", event, "timer",
+ record, 0) == -1)
+ pevent_print_num_field(s, "0x%llx", event, "hrtimer",
+ record, 1);
+
+ pevent_print_func_field(s, " function=%s", event, "function",
+ record, 0);
+
+ trace_seq_printf(s, " expires=");
+ pevent_print_num_field(s, "%llu", event, "expires", record, 1);
+
+ trace_seq_printf(s, " softexpires=");
+ pevent_print_num_field(s, "%llu", event, "softexpires", record, 1);
+ return 0;
+}
+
+int PEVENT_PLUGIN_LOADER(struct pevent *pevent)
+{
+ pevent_register_event_handler(pevent, -1,
+ "timer", "hrtimer_expire_entry",
+ timer_expire_handler, NULL);
+
+ pevent_register_event_handler(pevent, -1, "timer", "hrtimer_start",
+ timer_start_handler, NULL);
+ return 0;
+}
+
+void PEVENT_PLUGIN_UNLOADER(struct pevent *pevent)
+{
+ pevent_unregister_event_handler(pevent, -1,
+ "timer", "hrtimer_expire_entry",
+ timer_expire_handler, NULL);
+
+ pevent_unregister_event_handler(pevent, -1, "timer", "hrtimer_start",
+ timer_start_handler, NULL);
+}
diff --git a/tools/lib/traceevent/plugin_jbd2.c b/tools/lib/traceevent/plugin_jbd2.c
new file mode 100644
index 00000000000..0db714c721b
--- /dev/null
+++ b/tools/lib/traceevent/plugin_jbd2.c
@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License (not later!)
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "event-parse.h"
+
+#define MINORBITS 20
+#define MINORMASK ((1U << MINORBITS) - 1)
+
+#define MAJOR(dev) ((unsigned int) ((dev) >> MINORBITS))
+#define MINOR(dev) ((unsigned int) ((dev) & MINORMASK))
+
+static unsigned long long
+process_jbd2_dev_to_name(struct trace_seq *s,
+ unsigned long long *args)
+{
+ unsigned int dev = args[0];
+
+ trace_seq_printf(s, "%d:%d", MAJOR(dev), MINOR(dev));
+ return 0;
+}
+
+static unsigned long long
+process_jiffies_to_msecs(struct trace_seq *s,
+ unsigned long long *args)
+{
+ unsigned long long jiffies = args[0];
+
+ trace_seq_printf(s, "%lld", jiffies);
+ return jiffies;
+}
+
+int PEVENT_PLUGIN_LOADER(struct pevent *pevent)
+{
+ pevent_register_print_function(pevent,
+ process_jbd2_dev_to_name,
+ PEVENT_FUNC_ARG_STRING,
+ "jbd2_dev_to_name",
+ PEVENT_FUNC_ARG_INT,
+ PEVENT_FUNC_ARG_VOID);
+
+ pevent_register_print_function(pevent,
+ process_jiffies_to_msecs,
+ PEVENT_FUNC_ARG_LONG,
+ "jiffies_to_msecs",
+ PEVENT_FUNC_ARG_LONG,
+ PEVENT_FUNC_ARG_VOID);
+ return 0;
+}
+
+void PEVENT_PLUGIN_UNLOADER(struct pevent *pevent)
+{
+ pevent_unregister_print_function(pevent, process_jbd2_dev_to_name,
+ "jbd2_dev_to_name");
+
+ pevent_unregister_print_function(pevent, process_jiffies_to_msecs,
+ "jiffies_to_msecs");
+}
diff --git a/tools/lib/traceevent/plugin_kmem.c b/tools/lib/traceevent/plugin_kmem.c
new file mode 100644
index 00000000000..70650ff48d7
--- /dev/null
+++ b/tools/lib/traceevent/plugin_kmem.c
@@ -0,0 +1,94 @@
+/*
+ * Copyright (C) 2009 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License (not later!)
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "event-parse.h"
+
+static int call_site_handler(struct trace_seq *s, struct pevent_record *record,
+ struct event_format *event, void *context)
+{
+ struct format_field *field;
+ unsigned long long val, addr;
+ void *data = record->data;
+ const char *func;
+
+ field = pevent_find_field(event, "call_site");
+ if (!field)
+ return 1;
+
+ if (pevent_read_number_field(field, data, &val))
+ return 1;
+
+ func = pevent_find_function(event->pevent, val);
+ if (!func)
+ return 1;
+
+ addr = pevent_find_function_address(event->pevent, val);
+
+ trace_seq_printf(s, "(%s+0x%x) ", func, (int)(val - addr));
+ return 1;
+}
+
+int PEVENT_PLUGIN_LOADER(struct pevent *pevent)
+{
+ pevent_register_event_handler(pevent, -1, "kmem", "kfree",
+ call_site_handler, NULL);
+
+ pevent_register_event_handler(pevent, -1, "kmem", "kmalloc",
+ call_site_handler, NULL);
+
+ pevent_register_event_handler(pevent, -1, "kmem", "kmalloc_node",
+ call_site_handler, NULL);
+
+ pevent_register_event_handler(pevent, -1, "kmem", "kmem_cache_alloc",
+ call_site_handler, NULL);
+
+ pevent_register_event_handler(pevent, -1, "kmem",
+ "kmem_cache_alloc_node",
+ call_site_handler, NULL);
+
+ pevent_register_event_handler(pevent, -1, "kmem", "kmem_cache_free",
+ call_site_handler, NULL);
+ return 0;
+}
+
+void PEVENT_PLUGIN_UNLOADER(struct pevent *pevent)
+{
+ pevent_unregister_event_handler(pevent, -1, "kmem", "kfree",
+ call_site_handler, NULL);
+
+ pevent_unregister_event_handler(pevent, -1, "kmem", "kmalloc",
+ call_site_handler, NULL);
+
+ pevent_unregister_event_handler(pevent, -1, "kmem", "kmalloc_node",
+ call_site_handler, NULL);
+
+ pevent_unregister_event_handler(pevent, -1, "kmem", "kmem_cache_alloc",
+ call_site_handler, NULL);
+
+ pevent_unregister_event_handler(pevent, -1, "kmem",
+ "kmem_cache_alloc_node",
+ call_site_handler, NULL);
+
+ pevent_unregister_event_handler(pevent, -1, "kmem", "kmem_cache_free",
+ call_site_handler, NULL);
+}
diff --git a/tools/lib/traceevent/plugin_kvm.c b/tools/lib/traceevent/plugin_kvm.c
new file mode 100644
index 00000000000..9e0e8c61b43
--- /dev/null
+++ b/tools/lib/traceevent/plugin_kvm.c
@@ -0,0 +1,465 @@
+/*
+ * Copyright (C) 2009 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License (not later!)
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+
+#include "event-parse.h"
+
+#ifdef HAVE_UDIS86
+
+#include <udis86.h>
+
+static ud_t ud;
+
+static void init_disassembler(void)
+{
+ ud_init(&ud);
+ ud_set_syntax(&ud, UD_SYN_ATT);
+}
+
+static const char *disassemble(unsigned char *insn, int len, uint64_t rip,
+ int cr0_pe, int eflags_vm,
+ int cs_d, int cs_l)
+{
+ int mode;
+
+ if (!cr0_pe)
+ mode = 16;
+ else if (eflags_vm)
+ mode = 16;
+ else if (cs_l)
+ mode = 64;
+ else if (cs_d)
+ mode = 32;
+ else
+ mode = 16;
+
+ ud_set_pc(&ud, rip);
+ ud_set_mode(&ud, mode);
+ ud_set_input_buffer(&ud, insn, len);
+ ud_disassemble(&ud);
+ return ud_insn_asm(&ud);
+}
+
+#else
+
+static void init_disassembler(void)
+{
+}
+
+static const char *disassemble(unsigned char *insn, int len, uint64_t rip,
+ int cr0_pe, int eflags_vm,
+ int cs_d, int cs_l)
+{
+ static char out[15*3+1];
+ int i;
+
+ for (i = 0; i < len; ++i)
+ sprintf(out + i * 3, "%02x ", insn[i]);
+ out[len*3-1] = '\0';
+ return out;
+}
+
+#endif
+
+
+#define VMX_EXIT_REASONS \
+ _ER(EXCEPTION_NMI, 0) \
+ _ER(EXTERNAL_INTERRUPT, 1) \
+ _ER(TRIPLE_FAULT, 2) \
+ _ER(PENDING_INTERRUPT, 7) \
+ _ER(NMI_WINDOW, 8) \
+ _ER(TASK_SWITCH, 9) \
+ _ER(CPUID, 10) \
+ _ER(HLT, 12) \
+ _ER(INVD, 13) \
+ _ER(INVLPG, 14) \
+ _ER(RDPMC, 15) \
+ _ER(RDTSC, 16) \
+ _ER(VMCALL, 18) \
+ _ER(VMCLEAR, 19) \
+ _ER(VMLAUNCH, 20) \
+ _ER(VMPTRLD, 21) \
+ _ER(VMPTRST, 22) \
+ _ER(VMREAD, 23) \
+ _ER(VMRESUME, 24) \
+ _ER(VMWRITE, 25) \
+ _ER(VMOFF, 26) \
+ _ER(VMON, 27) \
+ _ER(CR_ACCESS, 28) \
+ _ER(DR_ACCESS, 29) \
+ _ER(IO_INSTRUCTION, 30) \
+ _ER(MSR_READ, 31) \
+ _ER(MSR_WRITE, 32) \
+ _ER(MWAIT_INSTRUCTION, 36) \
+ _ER(MONITOR_INSTRUCTION, 39) \
+ _ER(PAUSE_INSTRUCTION, 40) \
+ _ER(MCE_DURING_VMENTRY, 41) \
+ _ER(TPR_BELOW_THRESHOLD, 43) \
+ _ER(APIC_ACCESS, 44) \
+ _ER(EOI_INDUCED, 45) \
+ _ER(EPT_VIOLATION, 48) \
+ _ER(EPT_MISCONFIG, 49) \
+ _ER(INVEPT, 50) \
+ _ER(PREEMPTION_TIMER, 52) \
+ _ER(WBINVD, 54) \
+ _ER(XSETBV, 55) \
+ _ER(APIC_WRITE, 56) \
+ _ER(INVPCID, 58)
+
+#define SVM_EXIT_REASONS \
+ _ER(EXIT_READ_CR0, 0x000) \
+ _ER(EXIT_READ_CR3, 0x003) \
+ _ER(EXIT_READ_CR4, 0x004) \
+ _ER(EXIT_READ_CR8, 0x008) \
+ _ER(EXIT_WRITE_CR0, 0x010) \
+ _ER(EXIT_WRITE_CR3, 0x013) \
+ _ER(EXIT_WRITE_CR4, 0x014) \
+ _ER(EXIT_WRITE_CR8, 0x018) \
+ _ER(EXIT_READ_DR0, 0x020) \
+ _ER(EXIT_READ_DR1, 0x021) \
+ _ER(EXIT_READ_DR2, 0x022) \
+ _ER(EXIT_READ_DR3, 0x023) \
+ _ER(EXIT_READ_DR4, 0x024) \
+ _ER(EXIT_READ_DR5, 0x025) \
+ _ER(EXIT_READ_DR6, 0x026) \
+ _ER(EXIT_READ_DR7, 0x027) \
+ _ER(EXIT_WRITE_DR0, 0x030) \
+ _ER(EXIT_WRITE_DR1, 0x031) \
+ _ER(EXIT_WRITE_DR2, 0x032) \
+ _ER(EXIT_WRITE_DR3, 0x033) \
+ _ER(EXIT_WRITE_DR4, 0x034) \
+ _ER(EXIT_WRITE_DR5, 0x035) \
+ _ER(EXIT_WRITE_DR6, 0x036) \
+ _ER(EXIT_WRITE_DR7, 0x037) \
+ _ER(EXIT_EXCP_BASE, 0x040) \
+ _ER(EXIT_INTR, 0x060) \
+ _ER(EXIT_NMI, 0x061) \
+ _ER(EXIT_SMI, 0x062) \
+ _ER(EXIT_INIT, 0x063) \
+ _ER(EXIT_VINTR, 0x064) \
+ _ER(EXIT_CR0_SEL_WRITE, 0x065) \
+ _ER(EXIT_IDTR_READ, 0x066) \
+ _ER(EXIT_GDTR_READ, 0x067) \
+ _ER(EXIT_LDTR_READ, 0x068) \
+ _ER(EXIT_TR_READ, 0x069) \
+ _ER(EXIT_IDTR_WRITE, 0x06a) \
+ _ER(EXIT_GDTR_WRITE, 0x06b) \
+ _ER(EXIT_LDTR_WRITE, 0x06c) \
+ _ER(EXIT_TR_WRITE, 0x06d) \
+ _ER(EXIT_RDTSC, 0x06e) \
+ _ER(EXIT_RDPMC, 0x06f) \
+ _ER(EXIT_PUSHF, 0x070) \
+ _ER(EXIT_POPF, 0x071) \
+ _ER(EXIT_CPUID, 0x072) \
+ _ER(EXIT_RSM, 0x073) \
+ _ER(EXIT_IRET, 0x074) \
+ _ER(EXIT_SWINT, 0x075) \
+ _ER(EXIT_INVD, 0x076) \
+ _ER(EXIT_PAUSE, 0x077) \
+ _ER(EXIT_HLT, 0x078) \
+ _ER(EXIT_INVLPG, 0x079) \
+ _ER(EXIT_INVLPGA, 0x07a) \
+ _ER(EXIT_IOIO, 0x07b) \
+ _ER(EXIT_MSR, 0x07c) \
+ _ER(EXIT_TASK_SWITCH, 0x07d) \
+ _ER(EXIT_FERR_FREEZE, 0x07e) \
+ _ER(EXIT_SHUTDOWN, 0x07f) \
+ _ER(EXIT_VMRUN, 0x080) \
+ _ER(EXIT_VMMCALL, 0x081) \
+ _ER(EXIT_VMLOAD, 0x082) \
+ _ER(EXIT_VMSAVE, 0x083) \
+ _ER(EXIT_STGI, 0x084) \
+ _ER(EXIT_CLGI, 0x085) \
+ _ER(EXIT_SKINIT, 0x086) \
+ _ER(EXIT_RDTSCP, 0x087) \
+ _ER(EXIT_ICEBP, 0x088) \
+ _ER(EXIT_WBINVD, 0x089) \
+ _ER(EXIT_MONITOR, 0x08a) \
+ _ER(EXIT_MWAIT, 0x08b) \
+ _ER(EXIT_MWAIT_COND, 0x08c) \
+ _ER(EXIT_NPF, 0x400) \
+ _ER(EXIT_ERR, -1)
+
+#define _ER(reason, val) { #reason, val },
+struct str_values {
+ const char *str;
+ int val;
+};
+
+static struct str_values vmx_exit_reasons[] = {
+ VMX_EXIT_REASONS
+ { NULL, -1}
+};
+
+static struct str_values svm_exit_reasons[] = {
+ SVM_EXIT_REASONS
+ { NULL, -1}
+};
+
+static struct isa_exit_reasons {
+ unsigned isa;
+ struct str_values *strings;
+} isa_exit_reasons[] = {
+ { .isa = 1, .strings = vmx_exit_reasons },
+ { .isa = 2, .strings = svm_exit_reasons },
+ { }
+};
+
+static const char *find_exit_reason(unsigned isa, int val)
+{
+ struct str_values *strings = NULL;
+ int i;
+
+ for (i = 0; isa_exit_reasons[i].strings; ++i)
+ if (isa_exit_reasons[i].isa == isa) {
+ strings = isa_exit_reasons[i].strings;
+ break;
+ }
+ if (!strings)
+ return "UNKNOWN-ISA";
+ for (i = 0; strings[i].val >= 0; i++)
+ if (strings[i].val == val)
+ break;
+ if (strings[i].str)
+ return strings[i].str;
+ return "UNKNOWN";
+}
+
+static int kvm_exit_handler(struct trace_seq *s, struct pevent_record *record,
+ struct event_format *event, void *context)
+{
+ unsigned long long isa;
+ unsigned long long val;
+ unsigned long long info1 = 0, info2 = 0;
+
+ if (pevent_get_field_val(s, event, "exit_reason", record, &val, 1) < 0)
+ return -1;
+
+ if (pevent_get_field_val(s, event, "isa", record, &isa, 0) < 0)
+ isa = 1;
+
+ trace_seq_printf(s, "reason %s", find_exit_reason(isa, val));
+
+ pevent_print_num_field(s, " rip 0x%lx", event, "guest_rip", record, 1);
+
+ if (pevent_get_field_val(s, event, "info1", record, &info1, 0) >= 0
+ && pevent_get_field_val(s, event, "info2", record, &info2, 0) >= 0)
+ trace_seq_printf(s, " info %llx %llx", info1, info2);
+
+ return 0;
+}
+
+#define KVM_EMUL_INSN_F_CR0_PE (1 << 0)
+#define KVM_EMUL_INSN_F_EFL_VM (1 << 1)
+#define KVM_EMUL_INSN_F_CS_D (1 << 2)
+#define KVM_EMUL_INSN_F_CS_L (1 << 3)
+
+static int kvm_emulate_insn_handler(struct trace_seq *s,
+ struct pevent_record *record,
+ struct event_format *event, void *context)
+{
+ unsigned long long rip, csbase, len, flags, failed;
+ int llen;
+ uint8_t *insn;
+ const char *disasm;
+
+ if (pevent_get_field_val(s, event, "rip", record, &rip, 1) < 0)
+ return -1;
+
+ if (pevent_get_field_val(s, event, "csbase", record, &csbase, 1) < 0)
+ return -1;
+
+ if (pevent_get_field_val(s, event, "len", record, &len, 1) < 0)
+ return -1;
+
+ if (pevent_get_field_val(s, event, "flags", record, &flags, 1) < 0)
+ return -1;
+
+ if (pevent_get_field_val(s, event, "failed", record, &failed, 1) < 0)
+ return -1;
+
+ insn = pevent_get_field_raw(s, event, "insn", record, &llen, 1);
+ if (!insn)
+ return -1;
+
+ disasm = disassemble(insn, len, rip,
+ flags & KVM_EMUL_INSN_F_CR0_PE,
+ flags & KVM_EMUL_INSN_F_EFL_VM,
+ flags & KVM_EMUL_INSN_F_CS_D,
+ flags & KVM_EMUL_INSN_F_CS_L);
+
+ trace_seq_printf(s, "%llx:%llx: %s%s", csbase, rip, disasm,
+ failed ? " FAIL" : "");
+ return 0;
+}
+
+union kvm_mmu_page_role {
+ unsigned word;
+ struct {
+ unsigned glevels:4;
+ unsigned level:4;
+ unsigned quadrant:2;
+ unsigned pad_for_nice_hex_output:6;
+ unsigned direct:1;
+ unsigned access:3;
+ unsigned invalid:1;
+ unsigned cr4_pge:1;
+ unsigned nxe:1;
+ };
+};
+
+static int kvm_mmu_print_role(struct trace_seq *s, struct pevent_record *record,
+ struct event_format *event, void *context)
+{
+ unsigned long long val;
+ static const char *access_str[] = {
+ "---", "--x", "w--", "w-x", "-u-", "-ux", "wu-", "wux"
+ };
+ union kvm_mmu_page_role role;
+
+ if (pevent_get_field_val(s, event, "role", record, &val, 1) < 0)
+ return -1;
+
+ role.word = (int)val;
+
+ /*
+ * We can only use the structure if file is of the same
+ * endianess.
+ */
+ if (pevent_is_file_bigendian(event->pevent) ==
+ pevent_is_host_bigendian(event->pevent)) {
+
+ trace_seq_printf(s, "%u/%u q%u%s %s%s %spge %snxe",
+ role.level,
+ role.glevels,
+ role.quadrant,
+ role.direct ? " direct" : "",
+ access_str[role.access],
+ role.invalid ? " invalid" : "",
+ role.cr4_pge ? "" : "!",
+ role.nxe ? "" : "!");
+ } else
+ trace_seq_printf(s, "WORD: %08x", role.word);
+
+ pevent_print_num_field(s, " root %u ", event,
+ "root_count", record, 1);
+
+ if (pevent_get_field_val(s, event, "unsync", record, &val, 1) < 0)
+ return -1;
+
+ trace_seq_printf(s, "%s%c", val ? "unsync" : "sync", 0);
+ return 0;
+}
+
+static int kvm_mmu_get_page_handler(struct trace_seq *s,
+ struct pevent_record *record,
+ struct event_format *event, void *context)
+{
+ unsigned long long val;
+
+ if (pevent_get_field_val(s, event, "created", record, &val, 1) < 0)
+ return -1;
+
+ trace_seq_printf(s, "%s ", val ? "new" : "existing");
+
+ if (pevent_get_field_val(s, event, "gfn", record, &val, 1) < 0)
+ return -1;
+
+ trace_seq_printf(s, "sp gfn %llx ", val);
+ return kvm_mmu_print_role(s, record, event, context);
+}
+
+#define PT_WRITABLE_SHIFT 1
+#define PT_WRITABLE_MASK (1ULL << PT_WRITABLE_SHIFT)
+
+static unsigned long long
+process_is_writable_pte(struct trace_seq *s, unsigned long long *args)
+{
+ unsigned long pte = args[0];
+ return pte & PT_WRITABLE_MASK;
+}
+
+int PEVENT_PLUGIN_LOADER(struct pevent *pevent)
+{
+ init_disassembler();
+
+ pevent_register_event_handler(pevent, -1, "kvm", "kvm_exit",
+ kvm_exit_handler, NULL);
+
+ pevent_register_event_handler(pevent, -1, "kvm", "kvm_emulate_insn",
+ kvm_emulate_insn_handler, NULL);
+
+ pevent_register_event_handler(pevent, -1, "kvmmmu", "kvm_mmu_get_page",
+ kvm_mmu_get_page_handler, NULL);
+
+ pevent_register_event_handler(pevent, -1, "kvmmmu", "kvm_mmu_sync_page",
+ kvm_mmu_print_role, NULL);
+
+ pevent_register_event_handler(pevent, -1,
+ "kvmmmu", "kvm_mmu_unsync_page",
+ kvm_mmu_print_role, NULL);
+
+ pevent_register_event_handler(pevent, -1, "kvmmmu", "kvm_mmu_zap_page",
+ kvm_mmu_print_role, NULL);
+
+ pevent_register_event_handler(pevent, -1, "kvmmmu",
+ "kvm_mmu_prepare_zap_page", kvm_mmu_print_role,
+ NULL);
+
+ pevent_register_print_function(pevent,
+ process_is_writable_pte,
+ PEVENT_FUNC_ARG_INT,
+ "is_writable_pte",
+ PEVENT_FUNC_ARG_LONG,
+ PEVENT_FUNC_ARG_VOID);
+ return 0;
+}
+
+void PEVENT_PLUGIN_UNLOADER(struct pevent *pevent)
+{
+ pevent_unregister_event_handler(pevent, -1, "kvm", "kvm_exit",
+ kvm_exit_handler, NULL);
+
+ pevent_unregister_event_handler(pevent, -1, "kvm", "kvm_emulate_insn",
+ kvm_emulate_insn_handler, NULL);
+
+ pevent_unregister_event_handler(pevent, -1, "kvmmmu", "kvm_mmu_get_page",
+ kvm_mmu_get_page_handler, NULL);
+
+ pevent_unregister_event_handler(pevent, -1, "kvmmmu", "kvm_mmu_sync_page",
+ kvm_mmu_print_role, NULL);
+
+ pevent_unregister_event_handler(pevent, -1,
+ "kvmmmu", "kvm_mmu_unsync_page",
+ kvm_mmu_print_role, NULL);
+
+ pevent_unregister_event_handler(pevent, -1, "kvmmmu", "kvm_mmu_zap_page",
+ kvm_mmu_print_role, NULL);
+
+ pevent_unregister_event_handler(pevent, -1, "kvmmmu",
+ "kvm_mmu_prepare_zap_page", kvm_mmu_print_role,
+ NULL);
+
+ pevent_unregister_print_function(pevent, process_is_writable_pte,
+ "is_writable_pte");
+}
diff --git a/tools/lib/traceevent/plugin_mac80211.c b/tools/lib/traceevent/plugin_mac80211.c
new file mode 100644
index 00000000000..7e15a0f1c2f
--- /dev/null
+++ b/tools/lib/traceevent/plugin_mac80211.c
@@ -0,0 +1,102 @@
+/*
+ * Copyright (C) 2009 Johannes Berg <johannes@sipsolutions.net>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License (not later!)
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "event-parse.h"
+
+#define INDENT 65
+
+static void print_string(struct trace_seq *s, struct event_format *event,
+ const char *name, const void *data)
+{
+ struct format_field *f = pevent_find_field(event, name);
+ int offset;
+ int length;
+
+ if (!f) {
+ trace_seq_printf(s, "NOTFOUND:%s", name);
+ return;
+ }
+
+ offset = f->offset;
+ length = f->size;
+
+ if (!strncmp(f->type, "__data_loc", 10)) {
+ unsigned long long v;
+ if (pevent_read_number_field(f, data, &v)) {
+ trace_seq_printf(s, "invalid_data_loc");
+ return;
+ }
+ offset = v & 0xffff;
+ length = v >> 16;
+ }
+
+ trace_seq_printf(s, "%.*s", length, (char *)data + offset);
+}
+
+#define SF(fn) pevent_print_num_field(s, fn ":%d", event, fn, record, 0)
+#define SFX(fn) pevent_print_num_field(s, fn ":%#x", event, fn, record, 0)
+#define SP() trace_seq_putc(s, ' ')
+
+static int drv_bss_info_changed(struct trace_seq *s,
+ struct pevent_record *record,
+ struct event_format *event, void *context)
+{
+ void *data = record->data;
+
+ print_string(s, event, "wiphy_name", data);
+ trace_seq_printf(s, " vif:");
+ print_string(s, event, "vif_name", data);
+ pevent_print_num_field(s, "(%d)", event, "vif_type", record, 1);
+
+ trace_seq_printf(s, "\n%*s", INDENT, "");
+ SF("assoc"); SP();
+ SF("aid"); SP();
+ SF("cts"); SP();
+ SF("shortpre"); SP();
+ SF("shortslot"); SP();
+ SF("dtimper"); SP();
+ trace_seq_printf(s, "\n%*s", INDENT, "");
+ SF("bcnint"); SP();
+ SFX("assoc_cap"); SP();
+ SFX("basic_rates"); SP();
+ SF("enable_beacon");
+ trace_seq_printf(s, "\n%*s", INDENT, "");
+ SF("ht_operation_mode");
+
+ return 0;
+}
+
+int PEVENT_PLUGIN_LOADER(struct pevent *pevent)
+{
+ pevent_register_event_handler(pevent, -1, "mac80211",
+ "drv_bss_info_changed",
+ drv_bss_info_changed, NULL);
+ return 0;
+}
+
+void PEVENT_PLUGIN_UNLOADER(struct pevent *pevent)
+{
+ pevent_unregister_event_handler(pevent, -1, "mac80211",
+ "drv_bss_info_changed",
+ drv_bss_info_changed, NULL);
+}
diff --git a/tools/lib/traceevent/plugin_sched_switch.c b/tools/lib/traceevent/plugin_sched_switch.c
new file mode 100644
index 00000000000..f1ce6006525
--- /dev/null
+++ b/tools/lib/traceevent/plugin_sched_switch.c
@@ -0,0 +1,160 @@
+/*
+ * Copyright (C) 2009, 2010 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License (not later!)
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "event-parse.h"
+
+static void write_state(struct trace_seq *s, int val)
+{
+ const char states[] = "SDTtZXxW";
+ int found = 0;
+ int i;
+
+ for (i = 0; i < (sizeof(states) - 1); i++) {
+ if (!(val & (1 << i)))
+ continue;
+
+ if (found)
+ trace_seq_putc(s, '|');
+
+ found = 1;
+ trace_seq_putc(s, states[i]);
+ }
+
+ if (!found)
+ trace_seq_putc(s, 'R');
+}
+
+static void write_and_save_comm(struct format_field *field,
+ struct pevent_record *record,
+ struct trace_seq *s, int pid)
+{
+ const char *comm;
+ int len;
+
+ comm = (char *)(record->data + field->offset);
+ len = s->len;
+ trace_seq_printf(s, "%.*s",
+ field->size, comm);
+
+ /* make sure the comm has a \0 at the end. */
+ trace_seq_terminate(s);
+ comm = &s->buffer[len];
+
+ /* Help out the comm to ids. This will handle dups */
+ pevent_register_comm(field->event->pevent, comm, pid);
+}
+
+static int sched_wakeup_handler(struct trace_seq *s,
+ struct pevent_record *record,
+ struct event_format *event, void *context)
+{
+ struct format_field *field;
+ unsigned long long val;
+
+ if (pevent_get_field_val(s, event, "pid", record, &val, 1))
+ return trace_seq_putc(s, '!');
+
+ field = pevent_find_any_field(event, "comm");
+ if (field) {
+ write_and_save_comm(field, record, s, val);
+ trace_seq_putc(s, ':');
+ }
+ trace_seq_printf(s, "%lld", val);
+
+ if (pevent_get_field_val(s, event, "prio", record, &val, 0) == 0)
+ trace_seq_printf(s, " [%lld]", val);
+
+ if (pevent_get_field_val(s, event, "success", record, &val, 1) == 0)
+ trace_seq_printf(s, " success=%lld", val);
+
+ if (pevent_get_field_val(s, event, "target_cpu", record, &val, 0) == 0)
+ trace_seq_printf(s, " CPU:%03llu", val);
+
+ return 0;
+}
+
+static int sched_switch_handler(struct trace_seq *s,
+ struct pevent_record *record,
+ struct event_format *event, void *context)
+{
+ struct format_field *field;
+ unsigned long long val;
+
+ if (pevent_get_field_val(s, event, "prev_pid", record, &val, 1))
+ return trace_seq_putc(s, '!');
+
+ field = pevent_find_any_field(event, "prev_comm");
+ if (field) {
+ write_and_save_comm(field, record, s, val);
+ trace_seq_putc(s, ':');
+ }
+ trace_seq_printf(s, "%lld ", val);
+
+ if (pevent_get_field_val(s, event, "prev_prio", record, &val, 0) == 0)
+ trace_seq_printf(s, "[%lld] ", val);
+
+ if (pevent_get_field_val(s, event, "prev_state", record, &val, 0) == 0)
+ write_state(s, val);
+
+ trace_seq_puts(s, " ==> ");
+
+ if (pevent_get_field_val(s, event, "next_pid", record, &val, 1))
+ return trace_seq_putc(s, '!');
+
+ field = pevent_find_any_field(event, "next_comm");
+ if (field) {
+ write_and_save_comm(field, record, s, val);
+ trace_seq_putc(s, ':');
+ }
+ trace_seq_printf(s, "%lld", val);
+
+ if (pevent_get_field_val(s, event, "next_prio", record, &val, 0) == 0)
+ trace_seq_printf(s, " [%lld]", val);
+
+ return 0;
+}
+
+int PEVENT_PLUGIN_LOADER(struct pevent *pevent)
+{
+ pevent_register_event_handler(pevent, -1, "sched", "sched_switch",
+ sched_switch_handler, NULL);
+
+ pevent_register_event_handler(pevent, -1, "sched", "sched_wakeup",
+ sched_wakeup_handler, NULL);
+
+ pevent_register_event_handler(pevent, -1, "sched", "sched_wakeup_new",
+ sched_wakeup_handler, NULL);
+ return 0;
+}
+
+void PEVENT_PLUGIN_UNLOADER(struct pevent *pevent)
+{
+ pevent_unregister_event_handler(pevent, -1, "sched", "sched_switch",
+ sched_switch_handler, NULL);
+
+ pevent_unregister_event_handler(pevent, -1, "sched", "sched_wakeup",
+ sched_wakeup_handler, NULL);
+
+ pevent_unregister_event_handler(pevent, -1, "sched", "sched_wakeup_new",
+ sched_wakeup_handler, NULL);
+}
diff --git a/tools/lib/traceevent/plugin_scsi.c b/tools/lib/traceevent/plugin_scsi.c
new file mode 100644
index 00000000000..eda326fc862
--- /dev/null
+++ b/tools/lib/traceevent/plugin_scsi.c
@@ -0,0 +1,429 @@
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include "event-parse.h"
+
+typedef unsigned long sector_t;
+typedef uint64_t u64;
+typedef unsigned int u32;
+
+/*
+ * SCSI opcodes
+ */
+#define TEST_UNIT_READY 0x00
+#define REZERO_UNIT 0x01
+#define REQUEST_SENSE 0x03
+#define FORMAT_UNIT 0x04
+#define READ_BLOCK_LIMITS 0x05
+#define REASSIGN_BLOCKS 0x07
+#define INITIALIZE_ELEMENT_STATUS 0x07
+#define READ_6 0x08
+#define WRITE_6 0x0a
+#define SEEK_6 0x0b
+#define READ_REVERSE 0x0f
+#define WRITE_FILEMARKS 0x10
+#define SPACE 0x11
+#define INQUIRY 0x12
+#define RECOVER_BUFFERED_DATA 0x14
+#define MODE_SELECT 0x15
+#define RESERVE 0x16
+#define RELEASE 0x17
+#define COPY 0x18
+#define ERASE 0x19
+#define MODE_SENSE 0x1a
+#define START_STOP 0x1b
+#define RECEIVE_DIAGNOSTIC 0x1c
+#define SEND_DIAGNOSTIC 0x1d
+#define ALLOW_MEDIUM_REMOVAL 0x1e
+
+#define READ_FORMAT_CAPACITIES 0x23
+#define SET_WINDOW 0x24
+#define READ_CAPACITY 0x25
+#define READ_10 0x28
+#define WRITE_10 0x2a
+#define SEEK_10 0x2b
+#define POSITION_TO_ELEMENT 0x2b
+#define WRITE_VERIFY 0x2e
+#define VERIFY 0x2f
+#define SEARCH_HIGH 0x30
+#define SEARCH_EQUAL 0x31
+#define SEARCH_LOW 0x32
+#define SET_LIMITS 0x33
+#define PRE_FETCH 0x34
+#define READ_POSITION 0x34
+#define SYNCHRONIZE_CACHE 0x35
+#define LOCK_UNLOCK_CACHE 0x36
+#define READ_DEFECT_DATA 0x37
+#define MEDIUM_SCAN 0x38
+#define COMPARE 0x39
+#define COPY_VERIFY 0x3a
+#define WRITE_BUFFER 0x3b
+#define READ_BUFFER 0x3c
+#define UPDATE_BLOCK 0x3d
+#define READ_LONG 0x3e
+#define WRITE_LONG 0x3f
+#define CHANGE_DEFINITION 0x40
+#define WRITE_SAME 0x41
+#define UNMAP 0x42
+#define READ_TOC 0x43
+#define READ_HEADER 0x44
+#define GET_EVENT_STATUS_NOTIFICATION 0x4a
+#define LOG_SELECT 0x4c
+#define LOG_SENSE 0x4d
+#define XDWRITEREAD_10 0x53
+#define MODE_SELECT_10 0x55
+#define RESERVE_10 0x56
+#define RELEASE_10 0x57
+#define MODE_SENSE_10 0x5a
+#define PERSISTENT_RESERVE_IN 0x5e
+#define PERSISTENT_RESERVE_OUT 0x5f
+#define VARIABLE_LENGTH_CMD 0x7f
+#define REPORT_LUNS 0xa0
+#define SECURITY_PROTOCOL_IN 0xa2
+#define MAINTENANCE_IN 0xa3
+#define MAINTENANCE_OUT 0xa4
+#define MOVE_MEDIUM 0xa5
+#define EXCHANGE_MEDIUM 0xa6
+#define READ_12 0xa8
+#define WRITE_12 0xaa
+#define READ_MEDIA_SERIAL_NUMBER 0xab
+#define WRITE_VERIFY_12 0xae
+#define VERIFY_12 0xaf
+#define SEARCH_HIGH_12 0xb0
+#define SEARCH_EQUAL_12 0xb1
+#define SEARCH_LOW_12 0xb2
+#define SECURITY_PROTOCOL_OUT 0xb5
+#define READ_ELEMENT_STATUS 0xb8
+#define SEND_VOLUME_TAG 0xb6
+#define WRITE_LONG_2 0xea
+#define EXTENDED_COPY 0x83
+#define RECEIVE_COPY_RESULTS 0x84
+#define ACCESS_CONTROL_IN 0x86
+#define ACCESS_CONTROL_OUT 0x87
+#define READ_16 0x88
+#define WRITE_16 0x8a
+#define READ_ATTRIBUTE 0x8c
+#define WRITE_ATTRIBUTE 0x8d
+#define VERIFY_16 0x8f
+#define SYNCHRONIZE_CACHE_16 0x91
+#define WRITE_SAME_16 0x93
+#define SERVICE_ACTION_IN 0x9e
+/* values for service action in */
+#define SAI_READ_CAPACITY_16 0x10
+#define SAI_GET_LBA_STATUS 0x12
+/* values for VARIABLE_LENGTH_CMD service action codes
+ * see spc4r17 Section D.3.5, table D.7 and D.8 */
+#define VLC_SA_RECEIVE_CREDENTIAL 0x1800
+/* values for maintenance in */
+#define MI_REPORT_IDENTIFYING_INFORMATION 0x05
+#define MI_REPORT_TARGET_PGS 0x0a
+#define MI_REPORT_ALIASES 0x0b
+#define MI_REPORT_SUPPORTED_OPERATION_CODES 0x0c
+#define MI_REPORT_SUPPORTED_TASK_MANAGEMENT_FUNCTIONS 0x0d
+#define MI_REPORT_PRIORITY 0x0e
+#define MI_REPORT_TIMESTAMP 0x0f
+#define MI_MANAGEMENT_PROTOCOL_IN 0x10
+/* value for MI_REPORT_TARGET_PGS ext header */
+#define MI_EXT_HDR_PARAM_FMT 0x20
+/* values for maintenance out */
+#define MO_SET_IDENTIFYING_INFORMATION 0x06
+#define MO_SET_TARGET_PGS 0x0a
+#define MO_CHANGE_ALIASES 0x0b
+#define MO_SET_PRIORITY 0x0e
+#define MO_SET_TIMESTAMP 0x0f
+#define MO_MANAGEMENT_PROTOCOL_OUT 0x10
+/* values for variable length command */
+#define XDREAD_32 0x03
+#define XDWRITE_32 0x04
+#define XPWRITE_32 0x06
+#define XDWRITEREAD_32 0x07
+#define READ_32 0x09
+#define VERIFY_32 0x0a
+#define WRITE_32 0x0b
+#define WRITE_SAME_32 0x0d
+
+#define SERVICE_ACTION16(cdb) (cdb[1] & 0x1f)
+#define SERVICE_ACTION32(cdb) ((cdb[8] << 8) | cdb[9])
+
+static const char *
+scsi_trace_misc(struct trace_seq *, unsigned char *, int);
+
+static const char *
+scsi_trace_rw6(struct trace_seq *p, unsigned char *cdb, int len)
+{
+ const char *ret = p->buffer + p->len;
+ sector_t lba = 0, txlen = 0;
+
+ lba |= ((cdb[1] & 0x1F) << 16);
+ lba |= (cdb[2] << 8);
+ lba |= cdb[3];
+ txlen = cdb[4];
+
+ trace_seq_printf(p, "lba=%llu txlen=%llu",
+ (unsigned long long)lba, (unsigned long long)txlen);
+ trace_seq_putc(p, 0);
+ return ret;
+}
+
+static const char *
+scsi_trace_rw10(struct trace_seq *p, unsigned char *cdb, int len)
+{
+ const char *ret = p->buffer + p->len;
+ sector_t lba = 0, txlen = 0;
+
+ lba |= (cdb[2] << 24);
+ lba |= (cdb[3] << 16);
+ lba |= (cdb[4] << 8);
+ lba |= cdb[5];
+ txlen |= (cdb[7] << 8);
+ txlen |= cdb[8];
+
+ trace_seq_printf(p, "lba=%llu txlen=%llu protect=%u",
+ (unsigned long long)lba, (unsigned long long)txlen,
+ cdb[1] >> 5);
+
+ if (cdb[0] == WRITE_SAME)
+ trace_seq_printf(p, " unmap=%u", cdb[1] >> 3 & 1);
+
+ trace_seq_putc(p, 0);
+ return ret;
+}
+
+static const char *
+scsi_trace_rw12(struct trace_seq *p, unsigned char *cdb, int len)
+{
+ const char *ret = p->buffer + p->len;
+ sector_t lba = 0, txlen = 0;
+
+ lba |= (cdb[2] << 24);
+ lba |= (cdb[3] << 16);
+ lba |= (cdb[4] << 8);
+ lba |= cdb[5];
+ txlen |= (cdb[6] << 24);
+ txlen |= (cdb[7] << 16);
+ txlen |= (cdb[8] << 8);
+ txlen |= cdb[9];
+
+ trace_seq_printf(p, "lba=%llu txlen=%llu protect=%u",
+ (unsigned long long)lba, (unsigned long long)txlen,
+ cdb[1] >> 5);
+ trace_seq_putc(p, 0);
+ return ret;
+}
+
+static const char *
+scsi_trace_rw16(struct trace_seq *p, unsigned char *cdb, int len)
+{
+ const char *ret = p->buffer + p->len;
+ sector_t lba = 0, txlen = 0;
+
+ lba |= ((u64)cdb[2] << 56);
+ lba |= ((u64)cdb[3] << 48);
+ lba |= ((u64)cdb[4] << 40);
+ lba |= ((u64)cdb[5] << 32);
+ lba |= (cdb[6] << 24);
+ lba |= (cdb[7] << 16);
+ lba |= (cdb[8] << 8);
+ lba |= cdb[9];
+ txlen |= (cdb[10] << 24);
+ txlen |= (cdb[11] << 16);
+ txlen |= (cdb[12] << 8);
+ txlen |= cdb[13];
+
+ trace_seq_printf(p, "lba=%llu txlen=%llu protect=%u",
+ (unsigned long long)lba, (unsigned long long)txlen,
+ cdb[1] >> 5);
+
+ if (cdb[0] == WRITE_SAME_16)
+ trace_seq_printf(p, " unmap=%u", cdb[1] >> 3 & 1);
+
+ trace_seq_putc(p, 0);
+ return ret;
+}
+
+static const char *
+scsi_trace_rw32(struct trace_seq *p, unsigned char *cdb, int len)
+{
+ const char *ret = p->buffer + p->len, *cmd;
+ sector_t lba = 0, txlen = 0;
+ u32 ei_lbrt = 0;
+
+ switch (SERVICE_ACTION32(cdb)) {
+ case READ_32:
+ cmd = "READ";
+ break;
+ case VERIFY_32:
+ cmd = "VERIFY";
+ break;
+ case WRITE_32:
+ cmd = "WRITE";
+ break;
+ case WRITE_SAME_32:
+ cmd = "WRITE_SAME";
+ break;
+ default:
+ trace_seq_printf(p, "UNKNOWN");
+ goto out;
+ }
+
+ lba |= ((u64)cdb[12] << 56);
+ lba |= ((u64)cdb[13] << 48);
+ lba |= ((u64)cdb[14] << 40);
+ lba |= ((u64)cdb[15] << 32);
+ lba |= (cdb[16] << 24);
+ lba |= (cdb[17] << 16);
+ lba |= (cdb[18] << 8);
+ lba |= cdb[19];
+ ei_lbrt |= (cdb[20] << 24);
+ ei_lbrt |= (cdb[21] << 16);
+ ei_lbrt |= (cdb[22] << 8);
+ ei_lbrt |= cdb[23];
+ txlen |= (cdb[28] << 24);
+ txlen |= (cdb[29] << 16);
+ txlen |= (cdb[30] << 8);
+ txlen |= cdb[31];
+
+ trace_seq_printf(p, "%s_32 lba=%llu txlen=%llu protect=%u ei_lbrt=%u",
+ cmd, (unsigned long long)lba,
+ (unsigned long long)txlen, cdb[10] >> 5, ei_lbrt);
+
+ if (SERVICE_ACTION32(cdb) == WRITE_SAME_32)
+ trace_seq_printf(p, " unmap=%u", cdb[10] >> 3 & 1);
+
+out:
+ trace_seq_putc(p, 0);
+ return ret;
+}
+
+static const char *
+scsi_trace_unmap(struct trace_seq *p, unsigned char *cdb, int len)
+{
+ const char *ret = p->buffer + p->len;
+ unsigned int regions = cdb[7] << 8 | cdb[8];
+
+ trace_seq_printf(p, "regions=%u", (regions - 8) / 16);
+ trace_seq_putc(p, 0);
+ return ret;
+}
+
+static const char *
+scsi_trace_service_action_in(struct trace_seq *p, unsigned char *cdb, int len)
+{
+ const char *ret = p->buffer + p->len, *cmd;
+ sector_t lba = 0;
+ u32 alloc_len = 0;
+
+ switch (SERVICE_ACTION16(cdb)) {
+ case SAI_READ_CAPACITY_16:
+ cmd = "READ_CAPACITY_16";
+ break;
+ case SAI_GET_LBA_STATUS:
+ cmd = "GET_LBA_STATUS";
+ break;
+ default:
+ trace_seq_printf(p, "UNKNOWN");
+ goto out;
+ }
+
+ lba |= ((u64)cdb[2] << 56);
+ lba |= ((u64)cdb[3] << 48);
+ lba |= ((u64)cdb[4] << 40);
+ lba |= ((u64)cdb[5] << 32);
+ lba |= (cdb[6] << 24);
+ lba |= (cdb[7] << 16);
+ lba |= (cdb[8] << 8);
+ lba |= cdb[9];
+ alloc_len |= (cdb[10] << 24);
+ alloc_len |= (cdb[11] << 16);
+ alloc_len |= (cdb[12] << 8);
+ alloc_len |= cdb[13];
+
+ trace_seq_printf(p, "%s lba=%llu alloc_len=%u", cmd,
+ (unsigned long long)lba, alloc_len);
+
+out:
+ trace_seq_putc(p, 0);
+ return ret;
+}
+
+static const char *
+scsi_trace_varlen(struct trace_seq *p, unsigned char *cdb, int len)
+{
+ switch (SERVICE_ACTION32(cdb)) {
+ case READ_32:
+ case VERIFY_32:
+ case WRITE_32:
+ case WRITE_SAME_32:
+ return scsi_trace_rw32(p, cdb, len);
+ default:
+ return scsi_trace_misc(p, cdb, len);
+ }
+}
+
+static const char *
+scsi_trace_misc(struct trace_seq *p, unsigned char *cdb, int len)
+{
+ const char *ret = p->buffer + p->len;
+
+ trace_seq_printf(p, "-");
+ trace_seq_putc(p, 0);
+ return ret;
+}
+
+const char *
+scsi_trace_parse_cdb(struct trace_seq *p, unsigned char *cdb, int len)
+{
+ switch (cdb[0]) {
+ case READ_6:
+ case WRITE_6:
+ return scsi_trace_rw6(p, cdb, len);
+ case READ_10:
+ case VERIFY:
+ case WRITE_10:
+ case WRITE_SAME:
+ return scsi_trace_rw10(p, cdb, len);
+ case READ_12:
+ case VERIFY_12:
+ case WRITE_12:
+ return scsi_trace_rw12(p, cdb, len);
+ case READ_16:
+ case VERIFY_16:
+ case WRITE_16:
+ case WRITE_SAME_16:
+ return scsi_trace_rw16(p, cdb, len);
+ case UNMAP:
+ return scsi_trace_unmap(p, cdb, len);
+ case SERVICE_ACTION_IN:
+ return scsi_trace_service_action_in(p, cdb, len);
+ case VARIABLE_LENGTH_CMD:
+ return scsi_trace_varlen(p, cdb, len);
+ default:
+ return scsi_trace_misc(p, cdb, len);
+ }
+}
+
+unsigned long long process_scsi_trace_parse_cdb(struct trace_seq *s,
+ unsigned long long *args)
+{
+ scsi_trace_parse_cdb(s, (unsigned char *) (unsigned long) args[1], args[2]);
+ return 0;
+}
+
+int PEVENT_PLUGIN_LOADER(struct pevent *pevent)
+{
+ pevent_register_print_function(pevent,
+ process_scsi_trace_parse_cdb,
+ PEVENT_FUNC_ARG_STRING,
+ "scsi_trace_parse_cdb",
+ PEVENT_FUNC_ARG_PTR,
+ PEVENT_FUNC_ARG_PTR,
+ PEVENT_FUNC_ARG_INT,
+ PEVENT_FUNC_ARG_VOID);
+ return 0;
+}
+
+void PEVENT_PLUGIN_UNLOADER(struct pevent *pevent)
+{
+ pevent_unregister_print_function(pevent, process_scsi_trace_parse_cdb,
+ "scsi_trace_parse_cdb");
+}
diff --git a/tools/lib/traceevent/plugin_xen.c b/tools/lib/traceevent/plugin_xen.c
new file mode 100644
index 00000000000..3a413eaada6
--- /dev/null
+++ b/tools/lib/traceevent/plugin_xen.c
@@ -0,0 +1,136 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "event-parse.h"
+
+#define __HYPERVISOR_set_trap_table 0
+#define __HYPERVISOR_mmu_update 1
+#define __HYPERVISOR_set_gdt 2
+#define __HYPERVISOR_stack_switch 3
+#define __HYPERVISOR_set_callbacks 4
+#define __HYPERVISOR_fpu_taskswitch 5
+#define __HYPERVISOR_sched_op_compat 6
+#define __HYPERVISOR_dom0_op 7
+#define __HYPERVISOR_set_debugreg 8
+#define __HYPERVISOR_get_debugreg 9
+#define __HYPERVISOR_update_descriptor 10
+#define __HYPERVISOR_memory_op 12
+#define __HYPERVISOR_multicall 13
+#define __HYPERVISOR_update_va_mapping 14
+#define __HYPERVISOR_set_timer_op 15
+#define __HYPERVISOR_event_channel_op_compat 16
+#define __HYPERVISOR_xen_version 17
+#define __HYPERVISOR_console_io 18
+#define __HYPERVISOR_physdev_op_compat 19
+#define __HYPERVISOR_grant_table_op 20
+#define __HYPERVISOR_vm_assist 21
+#define __HYPERVISOR_update_va_mapping_otherdomain 22
+#define __HYPERVISOR_iret 23 /* x86 only */
+#define __HYPERVISOR_vcpu_op 24
+#define __HYPERVISOR_set_segment_base 25 /* x86/64 only */
+#define __HYPERVISOR_mmuext_op 26
+#define __HYPERVISOR_acm_op 27
+#define __HYPERVISOR_nmi_op 28
+#define __HYPERVISOR_sched_op 29
+#define __HYPERVISOR_callback_op 30
+#define __HYPERVISOR_xenoprof_op 31
+#define __HYPERVISOR_event_channel_op 32
+#define __HYPERVISOR_physdev_op 33
+#define __HYPERVISOR_hvm_op 34
+#define __HYPERVISOR_tmem_op 38
+
+/* Architecture-specific hypercall definitions. */
+#define __HYPERVISOR_arch_0 48
+#define __HYPERVISOR_arch_1 49
+#define __HYPERVISOR_arch_2 50
+#define __HYPERVISOR_arch_3 51
+#define __HYPERVISOR_arch_4 52
+#define __HYPERVISOR_arch_5 53
+#define __HYPERVISOR_arch_6 54
+#define __HYPERVISOR_arch_7 55
+
+#define N(x) [__HYPERVISOR_##x] = "("#x")"
+static const char *xen_hypercall_names[] = {
+ N(set_trap_table),
+ N(mmu_update),
+ N(set_gdt),
+ N(stack_switch),
+ N(set_callbacks),
+ N(fpu_taskswitch),
+ N(sched_op_compat),
+ N(dom0_op),
+ N(set_debugreg),
+ N(get_debugreg),
+ N(update_descriptor),
+ N(memory_op),
+ N(multicall),
+ N(update_va_mapping),
+ N(set_timer_op),
+ N(event_channel_op_compat),
+ N(xen_version),
+ N(console_io),
+ N(physdev_op_compat),
+ N(grant_table_op),
+ N(vm_assist),
+ N(update_va_mapping_otherdomain),
+ N(iret),
+ N(vcpu_op),
+ N(set_segment_base),
+ N(mmuext_op),
+ N(acm_op),
+ N(nmi_op),
+ N(sched_op),
+ N(callback_op),
+ N(xenoprof_op),
+ N(event_channel_op),
+ N(physdev_op),
+ N(hvm_op),
+
+/* Architecture-specific hypercall definitions. */
+ N(arch_0),
+ N(arch_1),
+ N(arch_2),
+ N(arch_3),
+ N(arch_4),
+ N(arch_5),
+ N(arch_6),
+ N(arch_7),
+};
+#undef N
+
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+
+static const char *xen_hypercall_name(unsigned op)
+{
+ if (op < ARRAY_SIZE(xen_hypercall_names) &&
+ xen_hypercall_names[op] != NULL)
+ return xen_hypercall_names[op];
+
+ return "";
+}
+
+unsigned long long process_xen_hypercall_name(struct trace_seq *s,
+ unsigned long long *args)
+{
+ unsigned int op = args[0];
+
+ trace_seq_printf(s, "%s", xen_hypercall_name(op));
+ return 0;
+}
+
+int PEVENT_PLUGIN_LOADER(struct pevent *pevent)
+{
+ pevent_register_print_function(pevent,
+ process_xen_hypercall_name,
+ PEVENT_FUNC_ARG_STRING,
+ "xen_hypercall_name",
+ PEVENT_FUNC_ARG_INT,
+ PEVENT_FUNC_ARG_VOID);
+ return 0;
+}
+
+void PEVENT_PLUGIN_UNLOADER(struct pevent *pevent)
+{
+ pevent_unregister_print_function(pevent, process_xen_hypercall_name,
+ "xen_hypercall_name");
+}
diff --git a/tools/lib/traceevent/trace-seq.c b/tools/lib/traceevent/trace-seq.c
new file mode 100644
index 00000000000..ec3bd16a548
--- /dev/null
+++ b/tools/lib/traceevent/trace-seq.c
@@ -0,0 +1,249 @@
+/*
+ * Copyright (C) 2009 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation;
+ * version 2.1 of the License (not later!)
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+
+#include <asm/bug.h>
+#include "event-parse.h"
+#include "event-utils.h"
+
+/*
+ * The TRACE_SEQ_POISON is to catch the use of using
+ * a trace_seq structure after it was destroyed.
+ */
+#define TRACE_SEQ_POISON ((void *)0xdeadbeef)
+#define TRACE_SEQ_CHECK(s) \
+do { \
+ if (WARN_ONCE((s)->buffer == TRACE_SEQ_POISON, \
+ "Usage of trace_seq after it was destroyed")) \
+ (s)->state = TRACE_SEQ__BUFFER_POISONED; \
+} while (0)
+
+#define TRACE_SEQ_CHECK_RET_N(s, n) \
+do { \
+ TRACE_SEQ_CHECK(s); \
+ if ((s)->state != TRACE_SEQ__GOOD) \
+ return n; \
+} while (0)
+
+#define TRACE_SEQ_CHECK_RET(s) TRACE_SEQ_CHECK_RET_N(s, )
+#define TRACE_SEQ_CHECK_RET0(s) TRACE_SEQ_CHECK_RET_N(s, 0)
+
+/**
+ * trace_seq_init - initialize the trace_seq structure
+ * @s: a pointer to the trace_seq structure to initialize
+ */
+void trace_seq_init(struct trace_seq *s)
+{
+ s->len = 0;
+ s->readpos = 0;
+ s->buffer_size = TRACE_SEQ_BUF_SIZE;
+ s->buffer = malloc(s->buffer_size);
+ if (s->buffer != NULL)
+ s->state = TRACE_SEQ__GOOD;
+ else
+ s->state = TRACE_SEQ__MEM_ALLOC_FAILED;
+}
+
+/**
+ * trace_seq_reset - re-initialize the trace_seq structure
+ * @s: a pointer to the trace_seq structure to reset
+ */
+void trace_seq_reset(struct trace_seq *s)
+{
+ if (!s)
+ return;
+ TRACE_SEQ_CHECK(s);
+ s->len = 0;
+ s->readpos = 0;
+}
+
+/**
+ * trace_seq_destroy - free up memory of a trace_seq
+ * @s: a pointer to the trace_seq to free the buffer
+ *
+ * Only frees the buffer, not the trace_seq struct itself.
+ */
+void trace_seq_destroy(struct trace_seq *s)
+{
+ if (!s)
+ return;
+ TRACE_SEQ_CHECK_RET(s);
+ free(s->buffer);
+ s->buffer = TRACE_SEQ_POISON;
+}
+
+static void expand_buffer(struct trace_seq *s)
+{
+ char *buf;
+
+ buf = realloc(s->buffer, s->buffer_size + TRACE_SEQ_BUF_SIZE);
+ if (WARN_ONCE(!buf, "Can't allocate trace_seq buffer memory")) {
+ s->state = TRACE_SEQ__MEM_ALLOC_FAILED;
+ return;
+ }
+
+ s->buffer = buf;
+ s->buffer_size += TRACE_SEQ_BUF_SIZE;
+}
+
+/**
+ * trace_seq_printf - sequence printing of trace information
+ * @s: trace sequence descriptor
+ * @fmt: printf format string
+ *
+ * It returns 0 if the trace oversizes the buffer's free
+ * space, 1 otherwise.
+ *
+ * The tracer may use either sequence operations or its own
+ * copy to user routines. To simplify formating of a trace
+ * trace_seq_printf is used to store strings into a special
+ * buffer (@s). Then the output may be either used by
+ * the sequencer or pulled into another buffer.
+ */
+int
+trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
+{
+ va_list ap;
+ int len;
+ int ret;
+
+ try_again:
+ TRACE_SEQ_CHECK_RET0(s);
+
+ len = (s->buffer_size - 1) - s->len;
+
+ va_start(ap, fmt);
+ ret = vsnprintf(s->buffer + s->len, len, fmt, ap);
+ va_end(ap);
+
+ if (ret >= len) {
+ expand_buffer(s);
+ goto try_again;
+ }
+
+ s->len += ret;
+
+ return 1;
+}
+
+/**
+ * trace_seq_vprintf - sequence printing of trace information
+ * @s: trace sequence descriptor
+ * @fmt: printf format string
+ *
+ * The tracer may use either sequence operations or its own
+ * copy to user routines. To simplify formating of a trace
+ * trace_seq_printf is used to store strings into a special
+ * buffer (@s). Then the output may be either used by
+ * the sequencer or pulled into another buffer.
+ */
+int
+trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args)
+{
+ int len;
+ int ret;
+
+ try_again:
+ TRACE_SEQ_CHECK_RET0(s);
+
+ len = (s->buffer_size - 1) - s->len;
+
+ ret = vsnprintf(s->buffer + s->len, len, fmt, args);
+
+ if (ret >= len) {
+ expand_buffer(s);
+ goto try_again;
+ }
+
+ s->len += ret;
+
+ return len;
+}
+
+/**
+ * trace_seq_puts - trace sequence printing of simple string
+ * @s: trace sequence descriptor
+ * @str: simple string to record
+ *
+ * The tracer may use either the sequence operations or its own
+ * copy to user routines. This function records a simple string
+ * into a special buffer (@s) for later retrieval by a sequencer
+ * or other mechanism.
+ */
+int trace_seq_puts(struct trace_seq *s, const char *str)
+{
+ int len;
+
+ TRACE_SEQ_CHECK_RET0(s);
+
+ len = strlen(str);
+
+ while (len > ((s->buffer_size - 1) - s->len))
+ expand_buffer(s);
+
+ TRACE_SEQ_CHECK_RET0(s);
+
+ memcpy(s->buffer + s->len, str, len);
+ s->len += len;
+
+ return len;
+}
+
+int trace_seq_putc(struct trace_seq *s, unsigned char c)
+{
+ TRACE_SEQ_CHECK_RET0(s);
+
+ while (s->len >= (s->buffer_size - 1))
+ expand_buffer(s);
+
+ TRACE_SEQ_CHECK_RET0(s);
+
+ s->buffer[s->len++] = c;
+
+ return 1;
+}
+
+void trace_seq_terminate(struct trace_seq *s)
+{
+ TRACE_SEQ_CHECK_RET(s);
+
+ /* There's always one character left on the buffer */
+ s->buffer[s->len] = 0;
+}
+
+int trace_seq_do_printf(struct trace_seq *s)
+{
+ TRACE_SEQ_CHECK(s);
+
+ switch (s->state) {
+ case TRACE_SEQ__GOOD:
+ return printf("%.*s", s->len, s->buffer);
+ case TRACE_SEQ__BUFFER_POISONED:
+ puts("Usage of trace_seq after it was destroyed");
+ break;
+ case TRACE_SEQ__MEM_ALLOC_FAILED:
+ puts("Can't allocate trace_seq buffer memory");
+ break;
+ }
+ return -1;
+}
diff --git a/tools/net/Makefile b/tools/net/Makefile
new file mode 100644
index 00000000000..ee577ea03ba
--- /dev/null
+++ b/tools/net/Makefile
@@ -0,0 +1,34 @@
+prefix = /usr
+
+CC = gcc
+LEX = flex
+YACC = bison
+
+%.yacc.c: %.y
+ $(YACC) -o $@ -d $<
+
+%.lex.c: %.l
+ $(LEX) -o $@ $<
+
+all : bpf_jit_disasm bpf_dbg bpf_asm
+
+bpf_jit_disasm : CFLAGS = -Wall -O2 -DPACKAGE='bpf_jit_disasm'
+bpf_jit_disasm : LDLIBS = -lopcodes -lbfd -ldl
+bpf_jit_disasm : bpf_jit_disasm.o
+
+bpf_dbg : CFLAGS = -Wall -O2
+bpf_dbg : LDLIBS = -lreadline
+bpf_dbg : bpf_dbg.o
+
+bpf_asm : CFLAGS = -Wall -O2 -I.
+bpf_asm : LDLIBS =
+bpf_asm : bpf_asm.o bpf_exp.yacc.o bpf_exp.lex.o
+bpf_exp.lex.o : bpf_exp.yacc.c
+
+clean :
+ rm -rf *.o bpf_jit_disasm bpf_dbg bpf_asm bpf_exp.yacc.* bpf_exp.lex.*
+
+install :
+ install bpf_jit_disasm $(prefix)/bin/bpf_jit_disasm
+ install bpf_dbg $(prefix)/bin/bpf_dbg
+ install bpf_asm $(prefix)/bin/bpf_asm
diff --git a/tools/net/bpf_asm.c b/tools/net/bpf_asm.c
new file mode 100644
index 00000000000..c15aef097b0
--- /dev/null
+++ b/tools/net/bpf_asm.c
@@ -0,0 +1,52 @@
+/*
+ * Minimal BPF assembler
+ *
+ * Instead of libpcap high-level filter expressions, it can be quite
+ * useful to define filters in low-level BPF assembler (that is kept
+ * close to Steven McCanne and Van Jacobson's original BPF paper).
+ * In particular for BPF JIT implementors, JIT security auditors, or
+ * just for defining BPF expressions that contain extensions which are
+ * not supported by compilers.
+ *
+ * How to get into it:
+ *
+ * 1) read Documentation/networking/filter.txt
+ * 2) Run `bpf_asm [-c] <filter-prog file>` to translate into binary
+ * blob that is loadable with xt_bpf, cls_bpf et al. Note: -c will
+ * pretty print a C-like construct.
+ *
+ * Copyright 2013 Daniel Borkmann <borkmann@redhat.com>
+ * Licensed under the GNU General Public License, version 2.0 (GPLv2)
+ */
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <string.h>
+
+extern void bpf_asm_compile(FILE *fp, bool cstyle);
+
+int main(int argc, char **argv)
+{
+ FILE *fp = stdin;
+ bool cstyle = false;
+ int i;
+
+ for (i = 1; i < argc; i++) {
+ if (!strncmp("-c", argv[i], 2)) {
+ cstyle = true;
+ continue;
+ }
+
+ fp = fopen(argv[i], "r");
+ if (!fp) {
+ fp = stdin;
+ continue;
+ }
+
+ break;
+ }
+
+ bpf_asm_compile(fp, cstyle);
+
+ return 0;
+}
diff --git a/tools/net/bpf_dbg.c b/tools/net/bpf_dbg.c
new file mode 100644
index 00000000000..9a287bec695
--- /dev/null
+++ b/tools/net/bpf_dbg.c
@@ -0,0 +1,1395 @@
+/*
+ * Minimal BPF debugger
+ *
+ * Minimal BPF debugger that mimics the kernel's engine (w/o extensions)
+ * and allows for single stepping through selected packets from a pcap
+ * with a provided user filter in order to facilitate verification of a
+ * BPF program. Besides others, this is useful to verify BPF programs
+ * before attaching to a live system, and can be used in socket filters,
+ * cls_bpf, xt_bpf, team driver and e.g. PTP code; in particular when a
+ * single more complex BPF program is being used. Reasons for a more
+ * complex BPF program are likely primarily to optimize execution time
+ * for making a verdict when multiple simple BPF programs are combined
+ * into one in order to prevent parsing same headers multiple times.
+ *
+ * More on how to debug BPF opcodes see Documentation/networking/filter.txt
+ * which is the main document on BPF. Mini howto for getting started:
+ *
+ * 1) `./bpf_dbg` to enter the shell (shell cmds denoted with '>'):
+ * 2) > load bpf 6,40 0 0 12,21 0 3 20... (output from `bpf_asm` or
+ * `tcpdump -iem1 -ddd port 22 | tr '\n' ','` to load as filter)
+ * 3) > load pcap foo.pcap
+ * 4) > run <n>/disassemble/dump/quit (self-explanatory)
+ * 5) > breakpoint 2 (sets bp at loaded BPF insns 2, do `run` then;
+ * multiple bps can be set, of course, a call to `breakpoint`
+ * w/o args shows currently loaded bps, `breakpoint reset` for
+ * resetting all breakpoints)
+ * 6) > select 3 (`run` etc will start from the 3rd packet in the pcap)
+ * 7) > step [-<n>, +<n>] (performs single stepping through the BPF)
+ *
+ * Copyright 2013 Daniel Borkmann <borkmann@redhat.com>
+ * Licensed under the GNU General Public License, version 2.0 (GPLv2)
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <stdbool.h>
+#include <stdarg.h>
+#include <setjmp.h>
+#include <linux/filter.h>
+#include <linux/if_packet.h>
+#include <readline/readline.h>
+#include <readline/history.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <signal.h>
+#include <arpa/inet.h>
+#include <net/ethernet.h>
+
+#define TCPDUMP_MAGIC 0xa1b2c3d4
+
+#define BPF_LDX_B (BPF_LDX | BPF_B)
+#define BPF_LDX_W (BPF_LDX | BPF_W)
+#define BPF_JMP_JA (BPF_JMP | BPF_JA)
+#define BPF_JMP_JEQ (BPF_JMP | BPF_JEQ)
+#define BPF_JMP_JGT (BPF_JMP | BPF_JGT)
+#define BPF_JMP_JGE (BPF_JMP | BPF_JGE)
+#define BPF_JMP_JSET (BPF_JMP | BPF_JSET)
+#define BPF_ALU_ADD (BPF_ALU | BPF_ADD)
+#define BPF_ALU_SUB (BPF_ALU | BPF_SUB)
+#define BPF_ALU_MUL (BPF_ALU | BPF_MUL)
+#define BPF_ALU_DIV (BPF_ALU | BPF_DIV)
+#define BPF_ALU_MOD (BPF_ALU | BPF_MOD)
+#define BPF_ALU_NEG (BPF_ALU | BPF_NEG)
+#define BPF_ALU_AND (BPF_ALU | BPF_AND)
+#define BPF_ALU_OR (BPF_ALU | BPF_OR)
+#define BPF_ALU_XOR (BPF_ALU | BPF_XOR)
+#define BPF_ALU_LSH (BPF_ALU | BPF_LSH)
+#define BPF_ALU_RSH (BPF_ALU | BPF_RSH)
+#define BPF_MISC_TAX (BPF_MISC | BPF_TAX)
+#define BPF_MISC_TXA (BPF_MISC | BPF_TXA)
+#define BPF_LD_B (BPF_LD | BPF_B)
+#define BPF_LD_H (BPF_LD | BPF_H)
+#define BPF_LD_W (BPF_LD | BPF_W)
+
+#ifndef array_size
+# define array_size(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+
+#ifndef __check_format_printf
+# define __check_format_printf(pos_fmtstr, pos_fmtargs) \
+ __attribute__ ((format (printf, (pos_fmtstr), (pos_fmtargs))))
+#endif
+
+enum {
+ CMD_OK,
+ CMD_ERR,
+ CMD_EX,
+};
+
+struct shell_cmd {
+ const char *name;
+ int (*func)(char *args);
+};
+
+struct pcap_filehdr {
+ uint32_t magic;
+ uint16_t version_major;
+ uint16_t version_minor;
+ int32_t thiszone;
+ uint32_t sigfigs;
+ uint32_t snaplen;
+ uint32_t linktype;
+};
+
+struct pcap_timeval {
+ int32_t tv_sec;
+ int32_t tv_usec;
+};
+
+struct pcap_pkthdr {
+ struct pcap_timeval ts;
+ uint32_t caplen;
+ uint32_t len;
+};
+
+struct bpf_regs {
+ uint32_t A;
+ uint32_t X;
+ uint32_t M[BPF_MEMWORDS];
+ uint32_t R;
+ bool Rs;
+ uint16_t Pc;
+};
+
+static struct sock_filter bpf_image[BPF_MAXINSNS + 1];
+static unsigned int bpf_prog_len = 0;
+
+static int bpf_breakpoints[64];
+static struct bpf_regs bpf_regs[BPF_MAXINSNS + 1];
+static struct bpf_regs bpf_curr;
+static unsigned int bpf_regs_len = 0;
+
+static int pcap_fd = -1;
+static unsigned int pcap_packet = 0;
+static size_t pcap_map_size = 0;
+static char *pcap_ptr_va_start, *pcap_ptr_va_curr;
+
+static const char * const op_table[] = {
+ [BPF_ST] = "st",
+ [BPF_STX] = "stx",
+ [BPF_LD_B] = "ldb",
+ [BPF_LD_H] = "ldh",
+ [BPF_LD_W] = "ld",
+ [BPF_LDX] = "ldx",
+ [BPF_LDX_B] = "ldxb",
+ [BPF_JMP_JA] = "ja",
+ [BPF_JMP_JEQ] = "jeq",
+ [BPF_JMP_JGT] = "jgt",
+ [BPF_JMP_JGE] = "jge",
+ [BPF_JMP_JSET] = "jset",
+ [BPF_ALU_ADD] = "add",
+ [BPF_ALU_SUB] = "sub",
+ [BPF_ALU_MUL] = "mul",
+ [BPF_ALU_DIV] = "div",
+ [BPF_ALU_MOD] = "mod",
+ [BPF_ALU_NEG] = "neg",
+ [BPF_ALU_AND] = "and",
+ [BPF_ALU_OR] = "or",
+ [BPF_ALU_XOR] = "xor",
+ [BPF_ALU_LSH] = "lsh",
+ [BPF_ALU_RSH] = "rsh",
+ [BPF_MISC_TAX] = "tax",
+ [BPF_MISC_TXA] = "txa",
+ [BPF_RET] = "ret",
+};
+
+static __check_format_printf(1, 2) int rl_printf(const char *fmt, ...)
+{
+ int ret;
+ va_list vl;
+
+ va_start(vl, fmt);
+ ret = vfprintf(rl_outstream, fmt, vl);
+ va_end(vl);
+
+ return ret;
+}
+
+static int matches(const char *cmd, const char *pattern)
+{
+ int len = strlen(cmd);
+
+ if (len > strlen(pattern))
+ return -1;
+
+ return memcmp(pattern, cmd, len);
+}
+
+static void hex_dump(const uint8_t *buf, size_t len)
+{
+ int i;
+
+ rl_printf("%3u: ", 0);
+ for (i = 0; i < len; i++) {
+ if (i && !(i % 16))
+ rl_printf("\n%3u: ", i);
+ rl_printf("%02x ", buf[i]);
+ }
+ rl_printf("\n");
+}
+
+static bool bpf_prog_loaded(void)
+{
+ if (bpf_prog_len == 0)
+ rl_printf("no bpf program loaded!\n");
+
+ return bpf_prog_len > 0;
+}
+
+static void bpf_disasm(const struct sock_filter f, unsigned int i)
+{
+ const char *op, *fmt;
+ int val = f.k;
+ char buf[256];
+
+ switch (f.code) {
+ case BPF_RET | BPF_K:
+ op = op_table[BPF_RET];
+ fmt = "#%#x";
+ break;
+ case BPF_RET | BPF_A:
+ op = op_table[BPF_RET];
+ fmt = "a";
+ break;
+ case BPF_RET | BPF_X:
+ op = op_table[BPF_RET];
+ fmt = "x";
+ break;
+ case BPF_MISC_TAX:
+ op = op_table[BPF_MISC_TAX];
+ fmt = "";
+ break;
+ case BPF_MISC_TXA:
+ op = op_table[BPF_MISC_TXA];
+ fmt = "";
+ break;
+ case BPF_ST:
+ op = op_table[BPF_ST];
+ fmt = "M[%d]";
+ break;
+ case BPF_STX:
+ op = op_table[BPF_STX];
+ fmt = "M[%d]";
+ break;
+ case BPF_LD_W | BPF_ABS:
+ op = op_table[BPF_LD_W];
+ fmt = "[%d]";
+ break;
+ case BPF_LD_H | BPF_ABS:
+ op = op_table[BPF_LD_H];
+ fmt = "[%d]";
+ break;
+ case BPF_LD_B | BPF_ABS:
+ op = op_table[BPF_LD_B];
+ fmt = "[%d]";
+ break;
+ case BPF_LD_W | BPF_LEN:
+ op = op_table[BPF_LD_W];
+ fmt = "#len";
+ break;
+ case BPF_LD_W | BPF_IND:
+ op = op_table[BPF_LD_W];
+ fmt = "[x+%d]";
+ break;
+ case BPF_LD_H | BPF_IND:
+ op = op_table[BPF_LD_H];
+ fmt = "[x+%d]";
+ break;
+ case BPF_LD_B | BPF_IND:
+ op = op_table[BPF_LD_B];
+ fmt = "[x+%d]";
+ break;
+ case BPF_LD | BPF_IMM:
+ op = op_table[BPF_LD_W];
+ fmt = "#%#x";
+ break;
+ case BPF_LDX | BPF_IMM:
+ op = op_table[BPF_LDX];
+ fmt = "#%#x";
+ break;
+ case BPF_LDX_B | BPF_MSH:
+ op = op_table[BPF_LDX_B];
+ fmt = "4*([%d]&0xf)";
+ break;
+ case BPF_LD | BPF_MEM:
+ op = op_table[BPF_LD_W];
+ fmt = "M[%d]";
+ break;
+ case BPF_LDX | BPF_MEM:
+ op = op_table[BPF_LDX];
+ fmt = "M[%d]";
+ break;
+ case BPF_JMP_JA:
+ op = op_table[BPF_JMP_JA];
+ fmt = "%d";
+ val = i + 1 + f.k;
+ break;
+ case BPF_JMP_JGT | BPF_X:
+ op = op_table[BPF_JMP_JGT];
+ fmt = "x";
+ break;
+ case BPF_JMP_JGT | BPF_K:
+ op = op_table[BPF_JMP_JGT];
+ fmt = "#%#x";
+ break;
+ case BPF_JMP_JGE | BPF_X:
+ op = op_table[BPF_JMP_JGE];
+ fmt = "x";
+ break;
+ case BPF_JMP_JGE | BPF_K:
+ op = op_table[BPF_JMP_JGE];
+ fmt = "#%#x";
+ break;
+ case BPF_JMP_JEQ | BPF_X:
+ op = op_table[BPF_JMP_JEQ];
+ fmt = "x";
+ break;
+ case BPF_JMP_JEQ | BPF_K:
+ op = op_table[BPF_JMP_JEQ];
+ fmt = "#%#x";
+ break;
+ case BPF_JMP_JSET | BPF_X:
+ op = op_table[BPF_JMP_JSET];
+ fmt = "x";
+ break;
+ case BPF_JMP_JSET | BPF_K:
+ op = op_table[BPF_JMP_JSET];
+ fmt = "#%#x";
+ break;
+ case BPF_ALU_NEG:
+ op = op_table[BPF_ALU_NEG];
+ fmt = "";
+ break;
+ case BPF_ALU_LSH | BPF_X:
+ op = op_table[BPF_ALU_LSH];
+ fmt = "x";
+ break;
+ case BPF_ALU_LSH | BPF_K:
+ op = op_table[BPF_ALU_LSH];
+ fmt = "#%d";
+ break;
+ case BPF_ALU_RSH | BPF_X:
+ op = op_table[BPF_ALU_RSH];
+ fmt = "x";
+ break;
+ case BPF_ALU_RSH | BPF_K:
+ op = op_table[BPF_ALU_RSH];
+ fmt = "#%d";
+ break;
+ case BPF_ALU_ADD | BPF_X:
+ op = op_table[BPF_ALU_ADD];
+ fmt = "x";
+ break;
+ case BPF_ALU_ADD | BPF_K:
+ op = op_table[BPF_ALU_ADD];
+ fmt = "#%d";
+ break;
+ case BPF_ALU_SUB | BPF_X:
+ op = op_table[BPF_ALU_SUB];
+ fmt = "x";
+ break;
+ case BPF_ALU_SUB | BPF_K:
+ op = op_table[BPF_ALU_SUB];
+ fmt = "#%d";
+ break;
+ case BPF_ALU_MUL | BPF_X:
+ op = op_table[BPF_ALU_MUL];
+ fmt = "x";
+ break;
+ case BPF_ALU_MUL | BPF_K:
+ op = op_table[BPF_ALU_MUL];
+ fmt = "#%d";
+ break;
+ case BPF_ALU_DIV | BPF_X:
+ op = op_table[BPF_ALU_DIV];
+ fmt = "x";
+ break;
+ case BPF_ALU_DIV | BPF_K:
+ op = op_table[BPF_ALU_DIV];
+ fmt = "#%d";
+ break;
+ case BPF_ALU_MOD | BPF_X:
+ op = op_table[BPF_ALU_MOD];
+ fmt = "x";
+ break;
+ case BPF_ALU_MOD | BPF_K:
+ op = op_table[BPF_ALU_MOD];
+ fmt = "#%d";
+ break;
+ case BPF_ALU_AND | BPF_X:
+ op = op_table[BPF_ALU_AND];
+ fmt = "x";
+ break;
+ case BPF_ALU_AND | BPF_K:
+ op = op_table[BPF_ALU_AND];
+ fmt = "#%#x";
+ break;
+ case BPF_ALU_OR | BPF_X:
+ op = op_table[BPF_ALU_OR];
+ fmt = "x";
+ break;
+ case BPF_ALU_OR | BPF_K:
+ op = op_table[BPF_ALU_OR];
+ fmt = "#%#x";
+ break;
+ case BPF_ALU_XOR | BPF_X:
+ op = op_table[BPF_ALU_XOR];
+ fmt = "x";
+ break;
+ case BPF_ALU_XOR | BPF_K:
+ op = op_table[BPF_ALU_XOR];
+ fmt = "#%#x";
+ break;
+ default:
+ op = "nosup";
+ fmt = "%#x";
+ val = f.code;
+ break;
+ }
+
+ memset(buf, 0, sizeof(buf));
+ snprintf(buf, sizeof(buf), fmt, val);
+ buf[sizeof(buf) - 1] = 0;
+
+ if ((BPF_CLASS(f.code) == BPF_JMP && BPF_OP(f.code) != BPF_JA))
+ rl_printf("l%d:\t%s %s, l%d, l%d\n", i, op, buf,
+ i + 1 + f.jt, i + 1 + f.jf);
+ else
+ rl_printf("l%d:\t%s %s\n", i, op, buf);
+}
+
+static void bpf_dump_curr(struct bpf_regs *r, struct sock_filter *f)
+{
+ int i, m = 0;
+
+ rl_printf("pc: [%u]\n", r->Pc);
+ rl_printf("code: [%u] jt[%u] jf[%u] k[%u]\n",
+ f->code, f->jt, f->jf, f->k);
+ rl_printf("curr: ");
+ bpf_disasm(*f, r->Pc);
+
+ if (f->jt || f->jf) {
+ rl_printf("jt: ");
+ bpf_disasm(*(f + f->jt + 1), r->Pc + f->jt + 1);
+ rl_printf("jf: ");
+ bpf_disasm(*(f + f->jf + 1), r->Pc + f->jf + 1);
+ }
+
+ rl_printf("A: [%#08x][%u]\n", r->A, r->A);
+ rl_printf("X: [%#08x][%u]\n", r->X, r->X);
+ if (r->Rs)
+ rl_printf("ret: [%#08x][%u]!\n", r->R, r->R);
+
+ for (i = 0; i < BPF_MEMWORDS; i++) {
+ if (r->M[i]) {
+ m++;
+ rl_printf("M[%d]: [%#08x][%u]\n", i, r->M[i], r->M[i]);
+ }
+ }
+ if (m == 0)
+ rl_printf("M[0,%d]: [%#08x][%u]\n", BPF_MEMWORDS - 1, 0, 0);
+}
+
+static void bpf_dump_pkt(uint8_t *pkt, uint32_t pkt_caplen, uint32_t pkt_len)
+{
+ if (pkt_caplen != pkt_len)
+ rl_printf("cap: %u, len: %u\n", pkt_caplen, pkt_len);
+ else
+ rl_printf("len: %u\n", pkt_len);
+
+ hex_dump(pkt, pkt_caplen);
+}
+
+static void bpf_disasm_all(const struct sock_filter *f, unsigned int len)
+{
+ unsigned int i;
+
+ for (i = 0; i < len; i++)
+ bpf_disasm(f[i], i);
+}
+
+static void bpf_dump_all(const struct sock_filter *f, unsigned int len)
+{
+ unsigned int i;
+
+ rl_printf("/* { op, jt, jf, k }, */\n");
+ for (i = 0; i < len; i++)
+ rl_printf("{ %#04x, %2u, %2u, %#010x },\n",
+ f[i].code, f[i].jt, f[i].jf, f[i].k);
+}
+
+static bool bpf_runnable(struct sock_filter *f, unsigned int len)
+{
+ int sock, ret, i;
+ struct sock_fprog bpf = {
+ .filter = f,
+ .len = len,
+ };
+
+ sock = socket(AF_INET, SOCK_DGRAM, 0);
+ if (sock < 0) {
+ rl_printf("cannot open socket!\n");
+ return false;
+ }
+ ret = setsockopt(sock, SOL_SOCKET, SO_ATTACH_FILTER, &bpf, sizeof(bpf));
+ close(sock);
+ if (ret < 0) {
+ rl_printf("program not allowed to run by kernel!\n");
+ return false;
+ }
+ for (i = 0; i < len; i++) {
+ if (BPF_CLASS(f[i].code) == BPF_LD &&
+ f[i].k > SKF_AD_OFF) {
+ rl_printf("extensions currently not supported!\n");
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static void bpf_reset_breakpoints(void)
+{
+ int i;
+
+ for (i = 0; i < array_size(bpf_breakpoints); i++)
+ bpf_breakpoints[i] = -1;
+}
+
+static void bpf_set_breakpoints(unsigned int where)
+{
+ int i;
+ bool set = false;
+
+ for (i = 0; i < array_size(bpf_breakpoints); i++) {
+ if (bpf_breakpoints[i] == (int) where) {
+ rl_printf("breakpoint already set!\n");
+ set = true;
+ break;
+ }
+
+ if (bpf_breakpoints[i] == -1 && set == false) {
+ bpf_breakpoints[i] = where;
+ set = true;
+ }
+ }
+
+ if (!set)
+ rl_printf("too many breakpoints set, reset first!\n");
+}
+
+static void bpf_dump_breakpoints(void)
+{
+ int i;
+
+ rl_printf("breakpoints: ");
+
+ for (i = 0; i < array_size(bpf_breakpoints); i++) {
+ if (bpf_breakpoints[i] < 0)
+ continue;
+ rl_printf("%d ", bpf_breakpoints[i]);
+ }
+
+ rl_printf("\n");
+}
+
+static void bpf_reset(void)
+{
+ bpf_regs_len = 0;
+
+ memset(bpf_regs, 0, sizeof(bpf_regs));
+ memset(&bpf_curr, 0, sizeof(bpf_curr));
+}
+
+static void bpf_safe_regs(void)
+{
+ memcpy(&bpf_regs[bpf_regs_len++], &bpf_curr, sizeof(bpf_curr));
+}
+
+static bool bpf_restore_regs(int off)
+{
+ unsigned int index = bpf_regs_len - 1 + off;
+
+ if (index == 0) {
+ bpf_reset();
+ return true;
+ } else if (index < bpf_regs_len) {
+ memcpy(&bpf_curr, &bpf_regs[index], sizeof(bpf_curr));
+ bpf_regs_len = index;
+ return true;
+ } else {
+ rl_printf("reached bottom of register history stack!\n");
+ return false;
+ }
+}
+
+static uint32_t extract_u32(uint8_t *pkt, uint32_t off)
+{
+ uint32_t r;
+
+ memcpy(&r, &pkt[off], sizeof(r));
+
+ return ntohl(r);
+}
+
+static uint16_t extract_u16(uint8_t *pkt, uint32_t off)
+{
+ uint16_t r;
+
+ memcpy(&r, &pkt[off], sizeof(r));
+
+ return ntohs(r);
+}
+
+static uint8_t extract_u8(uint8_t *pkt, uint32_t off)
+{
+ return pkt[off];
+}
+
+static void set_return(struct bpf_regs *r)
+{
+ r->R = 0;
+ r->Rs = true;
+}
+
+static void bpf_single_step(struct bpf_regs *r, struct sock_filter *f,
+ uint8_t *pkt, uint32_t pkt_caplen,
+ uint32_t pkt_len)
+{
+ uint32_t K = f->k;
+ int d;
+
+ switch (f->code) {
+ case BPF_RET | BPF_K:
+ r->R = K;
+ r->Rs = true;
+ break;
+ case BPF_RET | BPF_A:
+ r->R = r->A;
+ r->Rs = true;
+ break;
+ case BPF_RET | BPF_X:
+ r->R = r->X;
+ r->Rs = true;
+ break;
+ case BPF_MISC_TAX:
+ r->X = r->A;
+ break;
+ case BPF_MISC_TXA:
+ r->A = r->X;
+ break;
+ case BPF_ST:
+ r->M[K] = r->A;
+ break;
+ case BPF_STX:
+ r->M[K] = r->X;
+ break;
+ case BPF_LD_W | BPF_ABS:
+ d = pkt_caplen - K;
+ if (d >= sizeof(uint32_t))
+ r->A = extract_u32(pkt, K);
+ else
+ set_return(r);
+ break;
+ case BPF_LD_H | BPF_ABS:
+ d = pkt_caplen - K;
+ if (d >= sizeof(uint16_t))
+ r->A = extract_u16(pkt, K);
+ else
+ set_return(r);
+ break;
+ case BPF_LD_B | BPF_ABS:
+ d = pkt_caplen - K;
+ if (d >= sizeof(uint8_t))
+ r->A = extract_u8(pkt, K);
+ else
+ set_return(r);
+ break;
+ case BPF_LD_W | BPF_IND:
+ d = pkt_caplen - (r->X + K);
+ if (d >= sizeof(uint32_t))
+ r->A = extract_u32(pkt, r->X + K);
+ break;
+ case BPF_LD_H | BPF_IND:
+ d = pkt_caplen - (r->X + K);
+ if (d >= sizeof(uint16_t))
+ r->A = extract_u16(pkt, r->X + K);
+ else
+ set_return(r);
+ break;
+ case BPF_LD_B | BPF_IND:
+ d = pkt_caplen - (r->X + K);
+ if (d >= sizeof(uint8_t))
+ r->A = extract_u8(pkt, r->X + K);
+ else
+ set_return(r);
+ break;
+ case BPF_LDX_B | BPF_MSH:
+ d = pkt_caplen - K;
+ if (d >= sizeof(uint8_t)) {
+ r->X = extract_u8(pkt, K);
+ r->X = (r->X & 0xf) << 2;
+ } else
+ set_return(r);
+ break;
+ case BPF_LD_W | BPF_LEN:
+ r->A = pkt_len;
+ break;
+ case BPF_LDX_W | BPF_LEN:
+ r->A = pkt_len;
+ break;
+ case BPF_LD | BPF_IMM:
+ r->A = K;
+ break;
+ case BPF_LDX | BPF_IMM:
+ r->X = K;
+ break;
+ case BPF_LD | BPF_MEM:
+ r->A = r->M[K];
+ break;
+ case BPF_LDX | BPF_MEM:
+ r->X = r->M[K];
+ break;
+ case BPF_JMP_JA:
+ r->Pc += K;
+ break;
+ case BPF_JMP_JGT | BPF_X:
+ r->Pc += r->A > r->X ? f->jt : f->jf;
+ break;
+ case BPF_JMP_JGT | BPF_K:
+ r->Pc += r->A > K ? f->jt : f->jf;
+ break;
+ case BPF_JMP_JGE | BPF_X:
+ r->Pc += r->A >= r->X ? f->jt : f->jf;
+ break;
+ case BPF_JMP_JGE | BPF_K:
+ r->Pc += r->A >= K ? f->jt : f->jf;
+ break;
+ case BPF_JMP_JEQ | BPF_X:
+ r->Pc += r->A == r->X ? f->jt : f->jf;
+ break;
+ case BPF_JMP_JEQ | BPF_K:
+ r->Pc += r->A == K ? f->jt : f->jf;
+ break;
+ case BPF_JMP_JSET | BPF_X:
+ r->Pc += r->A & r->X ? f->jt : f->jf;
+ break;
+ case BPF_JMP_JSET | BPF_K:
+ r->Pc += r->A & K ? f->jt : f->jf;
+ break;
+ case BPF_ALU_NEG:
+ r->A = -r->A;
+ break;
+ case BPF_ALU_LSH | BPF_X:
+ r->A <<= r->X;
+ break;
+ case BPF_ALU_LSH | BPF_K:
+ r->A <<= K;
+ break;
+ case BPF_ALU_RSH | BPF_X:
+ r->A >>= r->X;
+ break;
+ case BPF_ALU_RSH | BPF_K:
+ r->A >>= K;
+ break;
+ case BPF_ALU_ADD | BPF_X:
+ r->A += r->X;
+ break;
+ case BPF_ALU_ADD | BPF_K:
+ r->A += K;
+ break;
+ case BPF_ALU_SUB | BPF_X:
+ r->A -= r->X;
+ break;
+ case BPF_ALU_SUB | BPF_K:
+ r->A -= K;
+ break;
+ case BPF_ALU_MUL | BPF_X:
+ r->A *= r->X;
+ break;
+ case BPF_ALU_MUL | BPF_K:
+ r->A *= K;
+ break;
+ case BPF_ALU_DIV | BPF_X:
+ case BPF_ALU_MOD | BPF_X:
+ if (r->X == 0) {
+ set_return(r);
+ break;
+ }
+ goto do_div;
+ case BPF_ALU_DIV | BPF_K:
+ case BPF_ALU_MOD | BPF_K:
+ if (K == 0) {
+ set_return(r);
+ break;
+ }
+do_div:
+ switch (f->code) {
+ case BPF_ALU_DIV | BPF_X:
+ r->A /= r->X;
+ break;
+ case BPF_ALU_DIV | BPF_K:
+ r->A /= K;
+ break;
+ case BPF_ALU_MOD | BPF_X:
+ r->A %= r->X;
+ break;
+ case BPF_ALU_MOD | BPF_K:
+ r->A %= K;
+ break;
+ }
+ break;
+ case BPF_ALU_AND | BPF_X:
+ r->A &= r->X;
+ break;
+ case BPF_ALU_AND | BPF_K:
+ r->A &= K;
+ break;
+ case BPF_ALU_OR | BPF_X:
+ r->A |= r->X;
+ break;
+ case BPF_ALU_OR | BPF_K:
+ r->A |= K;
+ break;
+ case BPF_ALU_XOR | BPF_X:
+ r->A ^= r->X;
+ break;
+ case BPF_ALU_XOR | BPF_K:
+ r->A ^= K;
+ break;
+ }
+}
+
+static bool bpf_pc_has_breakpoint(uint16_t pc)
+{
+ int i;
+
+ for (i = 0; i < array_size(bpf_breakpoints); i++) {
+ if (bpf_breakpoints[i] < 0)
+ continue;
+ if (bpf_breakpoints[i] == pc)
+ return true;
+ }
+
+ return false;
+}
+
+static bool bpf_handle_breakpoint(struct bpf_regs *r, struct sock_filter *f,
+ uint8_t *pkt, uint32_t pkt_caplen,
+ uint32_t pkt_len)
+{
+ rl_printf("-- register dump --\n");
+ bpf_dump_curr(r, &f[r->Pc]);
+ rl_printf("-- packet dump --\n");
+ bpf_dump_pkt(pkt, pkt_caplen, pkt_len);
+ rl_printf("(breakpoint)\n");
+ return true;
+}
+
+static int bpf_run_all(struct sock_filter *f, uint16_t bpf_len, uint8_t *pkt,
+ uint32_t pkt_caplen, uint32_t pkt_len)
+{
+ bool stop = false;
+
+ while (bpf_curr.Rs == false && stop == false) {
+ bpf_safe_regs();
+
+ if (bpf_pc_has_breakpoint(bpf_curr.Pc))
+ stop = bpf_handle_breakpoint(&bpf_curr, f, pkt,
+ pkt_caplen, pkt_len);
+
+ bpf_single_step(&bpf_curr, &f[bpf_curr.Pc], pkt, pkt_caplen,
+ pkt_len);
+ bpf_curr.Pc++;
+ }
+
+ return stop ? -1 : bpf_curr.R;
+}
+
+static int bpf_run_stepping(struct sock_filter *f, uint16_t bpf_len,
+ uint8_t *pkt, uint32_t pkt_caplen,
+ uint32_t pkt_len, int next)
+{
+ bool stop = false;
+ int i = 1;
+
+ while (bpf_curr.Rs == false && stop == false) {
+ bpf_safe_regs();
+
+ if (i++ == next)
+ stop = bpf_handle_breakpoint(&bpf_curr, f, pkt,
+ pkt_caplen, pkt_len);
+
+ bpf_single_step(&bpf_curr, &f[bpf_curr.Pc], pkt, pkt_caplen,
+ pkt_len);
+ bpf_curr.Pc++;
+ }
+
+ return stop ? -1 : bpf_curr.R;
+}
+
+static bool pcap_loaded(void)
+{
+ if (pcap_fd < 0)
+ rl_printf("no pcap file loaded!\n");
+
+ return pcap_fd >= 0;
+}
+
+static struct pcap_pkthdr *pcap_curr_pkt(void)
+{
+ return (void *) pcap_ptr_va_curr;
+}
+
+static bool pcap_next_pkt(void)
+{
+ struct pcap_pkthdr *hdr = pcap_curr_pkt();
+
+ if (pcap_ptr_va_curr + sizeof(*hdr) -
+ pcap_ptr_va_start >= pcap_map_size)
+ return false;
+ if (hdr->caplen == 0 || hdr->len == 0 || hdr->caplen > hdr->len)
+ return false;
+ if (pcap_ptr_va_curr + sizeof(*hdr) + hdr->caplen -
+ pcap_ptr_va_start >= pcap_map_size)
+ return false;
+
+ pcap_ptr_va_curr += (sizeof(*hdr) + hdr->caplen);
+ return true;
+}
+
+static void pcap_reset_pkt(void)
+{
+ pcap_ptr_va_curr = pcap_ptr_va_start + sizeof(struct pcap_filehdr);
+}
+
+static int try_load_pcap(const char *file)
+{
+ struct pcap_filehdr *hdr;
+ struct stat sb;
+ int ret;
+
+ pcap_fd = open(file, O_RDONLY);
+ if (pcap_fd < 0) {
+ rl_printf("cannot open pcap [%s]!\n", strerror(errno));
+ return CMD_ERR;
+ }
+
+ ret = fstat(pcap_fd, &sb);
+ if (ret < 0) {
+ rl_printf("cannot fstat pcap file!\n");
+ return CMD_ERR;
+ }
+
+ if (!S_ISREG(sb.st_mode)) {
+ rl_printf("not a regular pcap file, duh!\n");
+ return CMD_ERR;
+ }
+
+ pcap_map_size = sb.st_size;
+ if (pcap_map_size <= sizeof(struct pcap_filehdr)) {
+ rl_printf("pcap file too small!\n");
+ return CMD_ERR;
+ }
+
+ pcap_ptr_va_start = mmap(NULL, pcap_map_size, PROT_READ,
+ MAP_SHARED | MAP_LOCKED, pcap_fd, 0);
+ if (pcap_ptr_va_start == MAP_FAILED) {
+ rl_printf("mmap of file failed!");
+ return CMD_ERR;
+ }
+
+ hdr = (void *) pcap_ptr_va_start;
+ if (hdr->magic != TCPDUMP_MAGIC) {
+ rl_printf("wrong pcap magic!\n");
+ return CMD_ERR;
+ }
+
+ pcap_reset_pkt();
+
+ return CMD_OK;
+
+}
+
+static void try_close_pcap(void)
+{
+ if (pcap_fd >= 0) {
+ munmap(pcap_ptr_va_start, pcap_map_size);
+ close(pcap_fd);
+
+ pcap_ptr_va_start = pcap_ptr_va_curr = NULL;
+ pcap_map_size = 0;
+ pcap_packet = 0;
+ pcap_fd = -1;
+ }
+}
+
+static int cmd_load_bpf(char *bpf_string)
+{
+ char sp, *token, separator = ',';
+ unsigned short bpf_len, i = 0;
+ struct sock_filter tmp;
+
+ bpf_prog_len = 0;
+ memset(bpf_image, 0, sizeof(bpf_image));
+
+ if (sscanf(bpf_string, "%hu%c", &bpf_len, &sp) != 2 ||
+ sp != separator || bpf_len > BPF_MAXINSNS || bpf_len == 0) {
+ rl_printf("syntax error in head length encoding!\n");
+ return CMD_ERR;
+ }
+
+ token = bpf_string;
+ while ((token = strchr(token, separator)) && (++token)[0]) {
+ if (i >= bpf_len) {
+ rl_printf("program exceeds encoded length!\n");
+ return CMD_ERR;
+ }
+
+ if (sscanf(token, "%hu %hhu %hhu %u,",
+ &tmp.code, &tmp.jt, &tmp.jf, &tmp.k) != 4) {
+ rl_printf("syntax error at instruction %d!\n", i);
+ return CMD_ERR;
+ }
+
+ bpf_image[i].code = tmp.code;
+ bpf_image[i].jt = tmp.jt;
+ bpf_image[i].jf = tmp.jf;
+ bpf_image[i].k = tmp.k;
+
+ i++;
+ }
+
+ if (i != bpf_len) {
+ rl_printf("syntax error exceeding encoded length!\n");
+ return CMD_ERR;
+ } else
+ bpf_prog_len = bpf_len;
+ if (!bpf_runnable(bpf_image, bpf_prog_len))
+ bpf_prog_len = 0;
+
+ return CMD_OK;
+}
+
+static int cmd_load_pcap(char *file)
+{
+ char *file_trim, *tmp;
+
+ file_trim = strtok_r(file, " ", &tmp);
+ if (file_trim == NULL)
+ return CMD_ERR;
+
+ try_close_pcap();
+
+ return try_load_pcap(file_trim);
+}
+
+static int cmd_load(char *arg)
+{
+ char *subcmd, *cont, *tmp = strdup(arg);
+ int ret = CMD_OK;
+
+ subcmd = strtok_r(tmp, " ", &cont);
+ if (subcmd == NULL)
+ goto out;
+ if (matches(subcmd, "bpf") == 0) {
+ bpf_reset();
+ bpf_reset_breakpoints();
+
+ ret = cmd_load_bpf(cont);
+ } else if (matches(subcmd, "pcap") == 0) {
+ ret = cmd_load_pcap(cont);
+ } else {
+out:
+ rl_printf("bpf <code>: load bpf code\n");
+ rl_printf("pcap <file>: load pcap file\n");
+ ret = CMD_ERR;
+ }
+
+ free(tmp);
+ return ret;
+}
+
+static int cmd_step(char *num)
+{
+ struct pcap_pkthdr *hdr;
+ int steps, ret;
+
+ if (!bpf_prog_loaded() || !pcap_loaded())
+ return CMD_ERR;
+
+ steps = strtol(num, NULL, 10);
+ if (steps == 0 || strlen(num) == 0)
+ steps = 1;
+ if (steps < 0) {
+ if (!bpf_restore_regs(steps))
+ return CMD_ERR;
+ steps = 1;
+ }
+
+ hdr = pcap_curr_pkt();
+ ret = bpf_run_stepping(bpf_image, bpf_prog_len,
+ (uint8_t *) hdr + sizeof(*hdr),
+ hdr->caplen, hdr->len, steps);
+ if (ret >= 0 || bpf_curr.Rs) {
+ bpf_reset();
+ if (!pcap_next_pkt()) {
+ rl_printf("(going back to first packet)\n");
+ pcap_reset_pkt();
+ } else {
+ rl_printf("(next packet)\n");
+ }
+ }
+
+ return CMD_OK;
+}
+
+static int cmd_select(char *num)
+{
+ unsigned int which, i;
+ bool have_next = true;
+
+ if (!pcap_loaded() || strlen(num) == 0)
+ return CMD_ERR;
+
+ which = strtoul(num, NULL, 10);
+ if (which == 0) {
+ rl_printf("packet count starts with 1, clamping!\n");
+ which = 1;
+ }
+
+ pcap_reset_pkt();
+ bpf_reset();
+
+ for (i = 0; i < which && (have_next = pcap_next_pkt()); i++)
+ /* noop */;
+ if (!have_next || pcap_curr_pkt() == NULL) {
+ rl_printf("no packet #%u available!\n", which);
+ pcap_reset_pkt();
+ return CMD_ERR;
+ }
+
+ return CMD_OK;
+}
+
+static int cmd_breakpoint(char *subcmd)
+{
+ if (!bpf_prog_loaded())
+ return CMD_ERR;
+ if (strlen(subcmd) == 0)
+ bpf_dump_breakpoints();
+ else if (matches(subcmd, "reset") == 0)
+ bpf_reset_breakpoints();
+ else {
+ unsigned int where = strtoul(subcmd, NULL, 10);
+
+ if (where < bpf_prog_len) {
+ bpf_set_breakpoints(where);
+ rl_printf("breakpoint at: ");
+ bpf_disasm(bpf_image[where], where);
+ }
+ }
+
+ return CMD_OK;
+}
+
+static int cmd_run(char *num)
+{
+ static uint32_t pass = 0, fail = 0;
+ bool has_limit = true;
+ int pkts = 0, i = 0;
+
+ if (!bpf_prog_loaded() || !pcap_loaded())
+ return CMD_ERR;
+
+ pkts = strtol(num, NULL, 10);
+ if (pkts == 0 || strlen(num) == 0)
+ has_limit = false;
+
+ do {
+ struct pcap_pkthdr *hdr = pcap_curr_pkt();
+ int ret = bpf_run_all(bpf_image, bpf_prog_len,
+ (uint8_t *) hdr + sizeof(*hdr),
+ hdr->caplen, hdr->len);
+ if (ret > 0)
+ pass++;
+ else if (ret == 0)
+ fail++;
+ else
+ return CMD_OK;
+ bpf_reset();
+ } while (pcap_next_pkt() && (!has_limit || (has_limit && ++i < pkts)));
+
+ rl_printf("bpf passes:%u fails:%u\n", pass, fail);
+
+ pcap_reset_pkt();
+ bpf_reset();
+
+ pass = fail = 0;
+ return CMD_OK;
+}
+
+static int cmd_disassemble(char *line_string)
+{
+ bool single_line = false;
+ unsigned long line;
+
+ if (!bpf_prog_loaded())
+ return CMD_ERR;
+ if (strlen(line_string) > 0 &&
+ (line = strtoul(line_string, NULL, 10)) < bpf_prog_len)
+ single_line = true;
+ if (single_line)
+ bpf_disasm(bpf_image[line], line);
+ else
+ bpf_disasm_all(bpf_image, bpf_prog_len);
+
+ return CMD_OK;
+}
+
+static int cmd_dump(char *dontcare)
+{
+ if (!bpf_prog_loaded())
+ return CMD_ERR;
+
+ bpf_dump_all(bpf_image, bpf_prog_len);
+
+ return CMD_OK;
+}
+
+static int cmd_quit(char *dontcare)
+{
+ return CMD_EX;
+}
+
+static const struct shell_cmd cmds[] = {
+ { .name = "load", .func = cmd_load },
+ { .name = "select", .func = cmd_select },
+ { .name = "step", .func = cmd_step },
+ { .name = "run", .func = cmd_run },
+ { .name = "breakpoint", .func = cmd_breakpoint },
+ { .name = "disassemble", .func = cmd_disassemble },
+ { .name = "dump", .func = cmd_dump },
+ { .name = "quit", .func = cmd_quit },
+};
+
+static int execf(char *arg)
+{
+ char *cmd, *cont, *tmp = strdup(arg);
+ int i, ret = 0, len;
+
+ cmd = strtok_r(tmp, " ", &cont);
+ if (cmd == NULL)
+ goto out;
+ len = strlen(cmd);
+ for (i = 0; i < array_size(cmds); i++) {
+ if (len != strlen(cmds[i].name))
+ continue;
+ if (strncmp(cmds[i].name, cmd, len) == 0) {
+ ret = cmds[i].func(cont);
+ break;
+ }
+ }
+out:
+ free(tmp);
+ return ret;
+}
+
+static char *shell_comp_gen(const char *buf, int state)
+{
+ static int list_index, len;
+
+ if (!state) {
+ list_index = 0;
+ len = strlen(buf);
+ }
+
+ for (; list_index < array_size(cmds); ) {
+ const char *name = cmds[list_index].name;
+
+ list_index++;
+ if (strncmp(name, buf, len) == 0)
+ return strdup(name);
+ }
+
+ return NULL;
+}
+
+static char **shell_completion(const char *buf, int start, int end)
+{
+ char **matches = NULL;
+
+ if (start == 0)
+ matches = rl_completion_matches(buf, shell_comp_gen);
+
+ return matches;
+}
+
+static void intr_shell(int sig)
+{
+ if (rl_end)
+ rl_kill_line(-1, 0);
+
+ rl_crlf();
+ rl_refresh_line(0, 0);
+ rl_free_line_state();
+}
+
+static void init_shell(FILE *fin, FILE *fout)
+{
+ char file[128];
+
+ snprintf(file, sizeof(file), "%s/.bpf_dbg_history", getenv("HOME"));
+ read_history(file);
+
+ rl_instream = fin;
+ rl_outstream = fout;
+
+ rl_readline_name = "bpf_dbg";
+ rl_terminal_name = getenv("TERM");
+
+ rl_catch_signals = 0;
+ rl_catch_sigwinch = 1;
+
+ rl_attempted_completion_function = shell_completion;
+
+ rl_bind_key('\t', rl_complete);
+
+ rl_bind_key_in_map('\t', rl_complete, emacs_meta_keymap);
+ rl_bind_key_in_map('\033', rl_complete, emacs_meta_keymap);
+
+ snprintf(file, sizeof(file), "%s/.bpf_dbg_init", getenv("HOME"));
+ rl_read_init_file(file);
+
+ rl_prep_terminal(0);
+ rl_set_signals();
+
+ signal(SIGINT, intr_shell);
+}
+
+static void exit_shell(FILE *fin, FILE *fout)
+{
+ char file[128];
+
+ snprintf(file, sizeof(file), "%s/.bpf_dbg_history", getenv("HOME"));
+ write_history(file);
+
+ clear_history();
+ rl_deprep_terminal();
+
+ try_close_pcap();
+
+ if (fin != stdin)
+ fclose(fin);
+ if (fout != stdout)
+ fclose(fout);
+}
+
+static int run_shell_loop(FILE *fin, FILE *fout)
+{
+ char *buf;
+
+ init_shell(fin, fout);
+
+ while ((buf = readline("> ")) != NULL) {
+ int ret = execf(buf);
+ if (ret == CMD_EX)
+ break;
+ if (ret == CMD_OK && strlen(buf) > 0)
+ add_history(buf);
+
+ free(buf);
+ }
+
+ exit_shell(fin, fout);
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ FILE *fin = NULL, *fout = NULL;
+
+ if (argc >= 2)
+ fin = fopen(argv[1], "r");
+ if (argc >= 3)
+ fout = fopen(argv[2], "w");
+
+ return run_shell_loop(fin ? : stdin, fout ? : stdout);
+}
diff --git a/tools/net/bpf_exp.l b/tools/net/bpf_exp.l
new file mode 100644
index 00000000000..833a96611da
--- /dev/null
+++ b/tools/net/bpf_exp.l
@@ -0,0 +1,144 @@
+/*
+ * BPF asm code lexer
+ *
+ * This program is free software; you can distribute it and/or modify
+ * it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation; either version 2 of the License,
+ * or (at your option) any later version.
+ *
+ * Syntax kept close to:
+ *
+ * Steven McCanne and Van Jacobson. 1993. The BSD packet filter: a new
+ * architecture for user-level packet capture. In Proceedings of the
+ * USENIX Winter 1993 Conference Proceedings on USENIX Winter 1993
+ * Conference Proceedings (USENIX'93). USENIX Association, Berkeley,
+ * CA, USA, 2-2.
+ *
+ * Copyright 2013 Daniel Borkmann <borkmann@redhat.com>
+ * Licensed under the GNU General Public License, version 2.0 (GPLv2)
+ */
+
+%{
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+
+#include "bpf_exp.yacc.h"
+
+extern void yyerror(const char *str);
+
+%}
+
+%option align
+%option ecs
+
+%option nounput
+%option noreject
+%option noinput
+%option noyywrap
+
+%option 8bit
+%option caseless
+%option yylineno
+
+%%
+
+"ldb" { return OP_LDB; }
+"ldh" { return OP_LDH; }
+"ld" { return OP_LD; }
+"ldi" { return OP_LDI; }
+"ldx" { return OP_LDX; }
+"ldxi" { return OP_LDXI; }
+"ldxb" { return OP_LDXB; }
+"st" { return OP_ST; }
+"stx" { return OP_STX; }
+"jmp" { return OP_JMP; }
+"ja" { return OP_JMP; }
+"jeq" { return OP_JEQ; }
+"jneq" { return OP_JNEQ; }
+"jne" { return OP_JNEQ; }
+"jlt" { return OP_JLT; }
+"jle" { return OP_JLE; }
+"jgt" { return OP_JGT; }
+"jge" { return OP_JGE; }
+"jset" { return OP_JSET; }
+"add" { return OP_ADD; }
+"sub" { return OP_SUB; }
+"mul" { return OP_MUL; }
+"div" { return OP_DIV; }
+"mod" { return OP_MOD; }
+"neg" { return OP_NEG; }
+"and" { return OP_AND; }
+"xor" { return OP_XOR; }
+"or" { return OP_OR; }
+"lsh" { return OP_LSH; }
+"rsh" { return OP_RSH; }
+"ret" { return OP_RET; }
+"tax" { return OP_TAX; }
+"txa" { return OP_TXA; }
+
+"#"?("len") { return K_PKT_LEN; }
+"#"?("proto") { return K_PROTO; }
+"#"?("type") { return K_TYPE; }
+"#"?("poff") { return K_POFF; }
+"#"?("ifidx") { return K_IFIDX; }
+"#"?("nla") { return K_NLATTR; }
+"#"?("nlan") { return K_NLATTR_NEST; }
+"#"?("mark") { return K_MARK; }
+"#"?("queue") { return K_QUEUE; }
+"#"?("hatype") { return K_HATYPE; }
+"#"?("rxhash") { return K_RXHASH; }
+"#"?("cpu") { return K_CPU; }
+"#"?("vlan_tci") { return K_VLANT; }
+"#"?("vlan_pr") { return K_VLANP; }
+"#"?("rand") { return K_RAND; }
+
+":" { return ':'; }
+"," { return ','; }
+"#" { return '#'; }
+"%" { return '%'; }
+"[" { return '['; }
+"]" { return ']'; }
+"(" { return '('; }
+")" { return ')'; }
+"x" { return 'x'; }
+"a" { return 'a'; }
+"+" { return '+'; }
+"M" { return 'M'; }
+"*" { return '*'; }
+"&" { return '&'; }
+
+([0][x][a-fA-F0-9]+) {
+ yylval.number = strtoul(yytext, NULL, 16);
+ return number;
+ }
+([0][b][0-1]+) {
+ yylval.number = strtol(yytext + 2, NULL, 2);
+ return number;
+ }
+(([0])|([-+]?[1-9][0-9]*)) {
+ yylval.number = strtol(yytext, NULL, 10);
+ return number;
+ }
+([0][0-9]+) {
+ yylval.number = strtol(yytext + 1, NULL, 8);
+ return number;
+ }
+[a-zA-Z_][a-zA-Z0-9_]+ {
+ yylval.label = strdup(yytext);
+ return label;
+ }
+
+"/*"([^\*]|\*[^/])*"*/" { /* NOP */ }
+";"[^\n]* { /* NOP */ }
+^#.* { /* NOP */ }
+[ \t]+ { /* NOP */ }
+[ \n]+ { /* NOP */ }
+
+. {
+ printf("unknown character \'%s\'", yytext);
+ yyerror("lex unknown character");
+ }
+
+%%
diff --git a/tools/net/bpf_exp.y b/tools/net/bpf_exp.y
new file mode 100644
index 00000000000..e6306c51c26
--- /dev/null
+++ b/tools/net/bpf_exp.y
@@ -0,0 +1,771 @@
+/*
+ * BPF asm code parser
+ *
+ * This program is free software; you can distribute it and/or modify
+ * it under the terms of the GNU General Public License as published
+ * by the Free Software Foundation; either version 2 of the License,
+ * or (at your option) any later version.
+ *
+ * Syntax kept close to:
+ *
+ * Steven McCanne and Van Jacobson. 1993. The BSD packet filter: a new
+ * architecture for user-level packet capture. In Proceedings of the
+ * USENIX Winter 1993 Conference Proceedings on USENIX Winter 1993
+ * Conference Proceedings (USENIX'93). USENIX Association, Berkeley,
+ * CA, USA, 2-2.
+ *
+ * Copyright 2013 Daniel Borkmann <borkmann@redhat.com>
+ * Licensed under the GNU General Public License, version 2.0 (GPLv2)
+ */
+
+%{
+
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <unistd.h>
+#include <errno.h>
+#include <assert.h>
+#include <linux/filter.h>
+
+#include "bpf_exp.yacc.h"
+
+enum jmp_type { JTL, JFL, JKL };
+
+extern FILE *yyin;
+extern int yylex(void);
+extern void yyerror(const char *str);
+
+extern void bpf_asm_compile(FILE *fp, bool cstyle);
+static void bpf_set_curr_instr(uint16_t op, uint8_t jt, uint8_t jf, uint32_t k);
+static void bpf_set_curr_label(char *label);
+static void bpf_set_jmp_label(char *label, enum jmp_type type);
+
+%}
+
+%union {
+ char *label;
+ uint32_t number;
+}
+
+%token OP_LDB OP_LDH OP_LD OP_LDX OP_ST OP_STX OP_JMP OP_JEQ OP_JGT OP_JGE
+%token OP_JSET OP_ADD OP_SUB OP_MUL OP_DIV OP_AND OP_OR OP_XOR OP_LSH OP_RSH
+%token OP_RET OP_TAX OP_TXA OP_LDXB OP_MOD OP_NEG OP_JNEQ OP_JLT OP_JLE OP_LDI
+%token OP_LDXI
+
+%token K_PKT_LEN K_PROTO K_TYPE K_NLATTR K_NLATTR_NEST K_MARK K_QUEUE K_HATYPE
+%token K_RXHASH K_CPU K_IFIDX K_VLANT K_VLANP K_POFF K_RAND
+
+%token ':' ',' '[' ']' '(' ')' 'x' 'a' '+' 'M' '*' '&' '#' '%'
+
+%token number label
+
+%type <label> label
+%type <number> number
+
+%%
+
+prog
+ : line
+ | prog line
+ ;
+
+line
+ : instr
+ | labelled_instr
+ ;
+
+labelled_instr
+ : labelled instr
+ ;
+
+instr
+ : ldb
+ | ldh
+ | ld
+ | ldi
+ | ldx
+ | ldxi
+ | st
+ | stx
+ | jmp
+ | jeq
+ | jneq
+ | jlt
+ | jle
+ | jgt
+ | jge
+ | jset
+ | add
+ | sub
+ | mul
+ | div
+ | mod
+ | neg
+ | and
+ | or
+ | xor
+ | lsh
+ | rsh
+ | ret
+ | tax
+ | txa
+ ;
+
+labelled
+ : label ':' { bpf_set_curr_label($1); }
+ ;
+
+ldb
+ : OP_LDB '[' 'x' '+' number ']' {
+ bpf_set_curr_instr(BPF_LD | BPF_B | BPF_IND, 0, 0, $5); }
+ | OP_LDB '[' '%' 'x' '+' number ']' {
+ bpf_set_curr_instr(BPF_LD | BPF_B | BPF_IND, 0, 0, $6); }
+ | OP_LDB '[' number ']' {
+ bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, $3); }
+ | OP_LDB K_PROTO {
+ bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0,
+ SKF_AD_OFF + SKF_AD_PROTOCOL); }
+ | OP_LDB K_TYPE {
+ bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0,
+ SKF_AD_OFF + SKF_AD_PKTTYPE); }
+ | OP_LDB K_IFIDX {
+ bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0,
+ SKF_AD_OFF + SKF_AD_IFINDEX); }
+ | OP_LDB K_NLATTR {
+ bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0,
+ SKF_AD_OFF + SKF_AD_NLATTR); }
+ | OP_LDB K_NLATTR_NEST {
+ bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0,
+ SKF_AD_OFF + SKF_AD_NLATTR_NEST); }
+ | OP_LDB K_MARK {
+ bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0,
+ SKF_AD_OFF + SKF_AD_MARK); }
+ | OP_LDB K_QUEUE {
+ bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0,
+ SKF_AD_OFF + SKF_AD_QUEUE); }
+ | OP_LDB K_HATYPE {
+ bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0,
+ SKF_AD_OFF + SKF_AD_HATYPE); }
+ | OP_LDB K_RXHASH {
+ bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0,
+ SKF_AD_OFF + SKF_AD_RXHASH); }
+ | OP_LDB K_CPU {
+ bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0,
+ SKF_AD_OFF + SKF_AD_CPU); }
+ | OP_LDB K_VLANT {
+ bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0,
+ SKF_AD_OFF + SKF_AD_VLAN_TAG); }
+ | OP_LDB K_VLANP {
+ bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0,
+ SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT); }
+ | OP_LDB K_POFF {
+ bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0,
+ SKF_AD_OFF + SKF_AD_PAY_OFFSET); }
+ | OP_LDB K_RAND {
+ bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0,
+ SKF_AD_OFF + SKF_AD_RANDOM); }
+ ;
+
+ldh
+ : OP_LDH '[' 'x' '+' number ']' {
+ bpf_set_curr_instr(BPF_LD | BPF_H | BPF_IND, 0, 0, $5); }
+ | OP_LDH '[' '%' 'x' '+' number ']' {
+ bpf_set_curr_instr(BPF_LD | BPF_H | BPF_IND, 0, 0, $6); }
+ | OP_LDH '[' number ']' {
+ bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, $3); }
+ | OP_LDH K_PROTO {
+ bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0,
+ SKF_AD_OFF + SKF_AD_PROTOCOL); }
+ | OP_LDH K_TYPE {
+ bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0,
+ SKF_AD_OFF + SKF_AD_PKTTYPE); }
+ | OP_LDH K_IFIDX {
+ bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0,
+ SKF_AD_OFF + SKF_AD_IFINDEX); }
+ | OP_LDH K_NLATTR {
+ bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0,
+ SKF_AD_OFF + SKF_AD_NLATTR); }
+ | OP_LDH K_NLATTR_NEST {
+ bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0,
+ SKF_AD_OFF + SKF_AD_NLATTR_NEST); }
+ | OP_LDH K_MARK {
+ bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0,
+ SKF_AD_OFF + SKF_AD_MARK); }
+ | OP_LDH K_QUEUE {
+ bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0,
+ SKF_AD_OFF + SKF_AD_QUEUE); }
+ | OP_LDH K_HATYPE {
+ bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0,
+ SKF_AD_OFF + SKF_AD_HATYPE); }
+ | OP_LDH K_RXHASH {
+ bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0,
+ SKF_AD_OFF + SKF_AD_RXHASH); }
+ | OP_LDH K_CPU {
+ bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0,
+ SKF_AD_OFF + SKF_AD_CPU); }
+ | OP_LDH K_VLANT {
+ bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0,
+ SKF_AD_OFF + SKF_AD_VLAN_TAG); }
+ | OP_LDH K_VLANP {
+ bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0,
+ SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT); }
+ | OP_LDH K_POFF {
+ bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0,
+ SKF_AD_OFF + SKF_AD_PAY_OFFSET); }
+ | OP_LDH K_RAND {
+ bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0,
+ SKF_AD_OFF + SKF_AD_RANDOM); }
+ ;
+
+ldi
+ : OP_LDI '#' number {
+ bpf_set_curr_instr(BPF_LD | BPF_IMM, 0, 0, $3); }
+ | OP_LDI number {
+ bpf_set_curr_instr(BPF_LD | BPF_IMM, 0, 0, $2); }
+ ;
+
+ld
+ : OP_LD '#' number {
+ bpf_set_curr_instr(BPF_LD | BPF_IMM, 0, 0, $3); }
+ | OP_LD K_PKT_LEN {
+ bpf_set_curr_instr(BPF_LD | BPF_W | BPF_LEN, 0, 0, 0); }
+ | OP_LD K_PROTO {
+ bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0,
+ SKF_AD_OFF + SKF_AD_PROTOCOL); }
+ | OP_LD K_TYPE {
+ bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0,
+ SKF_AD_OFF + SKF_AD_PKTTYPE); }
+ | OP_LD K_IFIDX {
+ bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0,
+ SKF_AD_OFF + SKF_AD_IFINDEX); }
+ | OP_LD K_NLATTR {
+ bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0,
+ SKF_AD_OFF + SKF_AD_NLATTR); }
+ | OP_LD K_NLATTR_NEST {
+ bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0,
+ SKF_AD_OFF + SKF_AD_NLATTR_NEST); }
+ | OP_LD K_MARK {
+ bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0,
+ SKF_AD_OFF + SKF_AD_MARK); }
+ | OP_LD K_QUEUE {
+ bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0,
+ SKF_AD_OFF + SKF_AD_QUEUE); }
+ | OP_LD K_HATYPE {
+ bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0,
+ SKF_AD_OFF + SKF_AD_HATYPE); }
+ | OP_LD K_RXHASH {
+ bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0,
+ SKF_AD_OFF + SKF_AD_RXHASH); }
+ | OP_LD K_CPU {
+ bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0,
+ SKF_AD_OFF + SKF_AD_CPU); }
+ | OP_LD K_VLANT {
+ bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0,
+ SKF_AD_OFF + SKF_AD_VLAN_TAG); }
+ | OP_LD K_VLANP {
+ bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0,
+ SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT); }
+ | OP_LD K_POFF {
+ bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0,
+ SKF_AD_OFF + SKF_AD_PAY_OFFSET); }
+ | OP_LD K_RAND {
+ bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0,
+ SKF_AD_OFF + SKF_AD_RANDOM); }
+ | OP_LD 'M' '[' number ']' {
+ bpf_set_curr_instr(BPF_LD | BPF_MEM, 0, 0, $4); }
+ | OP_LD '[' 'x' '+' number ']' {
+ bpf_set_curr_instr(BPF_LD | BPF_W | BPF_IND, 0, 0, $5); }
+ | OP_LD '[' '%' 'x' '+' number ']' {
+ bpf_set_curr_instr(BPF_LD | BPF_W | BPF_IND, 0, 0, $6); }
+ | OP_LD '[' number ']' {
+ bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0, $3); }
+ ;
+
+ldxi
+ : OP_LDXI '#' number {
+ bpf_set_curr_instr(BPF_LDX | BPF_IMM, 0, 0, $3); }
+ | OP_LDXI number {
+ bpf_set_curr_instr(BPF_LDX | BPF_IMM, 0, 0, $2); }
+ ;
+
+ldx
+ : OP_LDX '#' number {
+ bpf_set_curr_instr(BPF_LDX | BPF_IMM, 0, 0, $3); }
+ | OP_LDX K_PKT_LEN {
+ bpf_set_curr_instr(BPF_LDX | BPF_W | BPF_LEN, 0, 0, 0); }
+ | OP_LDX 'M' '[' number ']' {
+ bpf_set_curr_instr(BPF_LDX | BPF_MEM, 0, 0, $4); }
+ | OP_LDXB number '*' '(' '[' number ']' '&' number ')' {
+ if ($2 != 4 || $9 != 0xf) {
+ fprintf(stderr, "ldxb offset not supported!\n");
+ exit(0);
+ } else {
+ bpf_set_curr_instr(BPF_LDX | BPF_MSH | BPF_B, 0, 0, $6); } }
+ | OP_LDX number '*' '(' '[' number ']' '&' number ')' {
+ if ($2 != 4 || $9 != 0xf) {
+ fprintf(stderr, "ldxb offset not supported!\n");
+ exit(0);
+ } else {
+ bpf_set_curr_instr(BPF_LDX | BPF_MSH | BPF_B, 0, 0, $6); } }
+ ;
+
+st
+ : OP_ST 'M' '[' number ']' {
+ bpf_set_curr_instr(BPF_ST, 0, 0, $4); }
+ ;
+
+stx
+ : OP_STX 'M' '[' number ']' {
+ bpf_set_curr_instr(BPF_STX, 0, 0, $4); }
+ ;
+
+jmp
+ : OP_JMP label {
+ bpf_set_jmp_label($2, JKL);
+ bpf_set_curr_instr(BPF_JMP | BPF_JA, 0, 0, 0); }
+ ;
+
+jeq
+ : OP_JEQ '#' number ',' label ',' label {
+ bpf_set_jmp_label($5, JTL);
+ bpf_set_jmp_label($7, JFL);
+ bpf_set_curr_instr(BPF_JMP | BPF_JEQ | BPF_K, 0, 0, $3); }
+ | OP_JEQ 'x' ',' label ',' label {
+ bpf_set_jmp_label($4, JTL);
+ bpf_set_jmp_label($6, JFL);
+ bpf_set_curr_instr(BPF_JMP | BPF_JEQ | BPF_X, 0, 0, 0); }
+ | OP_JEQ '%' 'x' ',' label ',' label {
+ bpf_set_jmp_label($5, JTL);
+ bpf_set_jmp_label($7, JFL);
+ bpf_set_curr_instr(BPF_JMP | BPF_JEQ | BPF_X, 0, 0, 0); }
+ | OP_JEQ '#' number ',' label {
+ bpf_set_jmp_label($5, JTL);
+ bpf_set_curr_instr(BPF_JMP | BPF_JEQ | BPF_K, 0, 0, $3); }
+ | OP_JEQ 'x' ',' label {
+ bpf_set_jmp_label($4, JTL);
+ bpf_set_curr_instr(BPF_JMP | BPF_JEQ | BPF_X, 0, 0, 0); }
+ | OP_JEQ '%' 'x' ',' label {
+ bpf_set_jmp_label($5, JTL);
+ bpf_set_curr_instr(BPF_JMP | BPF_JEQ | BPF_X, 0, 0, 0); }
+ ;
+
+jneq
+ : OP_JNEQ '#' number ',' label {
+ bpf_set_jmp_label($5, JFL);
+ bpf_set_curr_instr(BPF_JMP | BPF_JEQ | BPF_K, 0, 0, $3); }
+ | OP_JNEQ 'x' ',' label {
+ bpf_set_jmp_label($4, JFL);
+ bpf_set_curr_instr(BPF_JMP | BPF_JEQ | BPF_X, 0, 0, 0); }
+ | OP_JNEQ '%' 'x' ',' label {
+ bpf_set_jmp_label($5, JFL);
+ bpf_set_curr_instr(BPF_JMP | BPF_JEQ | BPF_X, 0, 0, 0); }
+ ;
+
+jlt
+ : OP_JLT '#' number ',' label {
+ bpf_set_jmp_label($5, JFL);
+ bpf_set_curr_instr(BPF_JMP | BPF_JGE | BPF_K, 0, 0, $3); }
+ | OP_JLT 'x' ',' label {
+ bpf_set_jmp_label($4, JFL);
+ bpf_set_curr_instr(BPF_JMP | BPF_JGE | BPF_X, 0, 0, 0); }
+ | OP_JLT '%' 'x' ',' label {
+ bpf_set_jmp_label($5, JFL);
+ bpf_set_curr_instr(BPF_JMP | BPF_JGE | BPF_X, 0, 0, 0); }
+ ;
+
+jle
+ : OP_JLE '#' number ',' label {
+ bpf_set_jmp_label($5, JFL);
+ bpf_set_curr_instr(BPF_JMP | BPF_JGT | BPF_K, 0, 0, $3); }
+ | OP_JLE 'x' ',' label {
+ bpf_set_jmp_label($4, JFL);
+ bpf_set_curr_instr(BPF_JMP | BPF_JGT | BPF_X, 0, 0, 0); }
+ | OP_JLE '%' 'x' ',' label {
+ bpf_set_jmp_label($5, JFL);
+ bpf_set_curr_instr(BPF_JMP | BPF_JGT | BPF_X, 0, 0, 0); }
+ ;
+
+jgt
+ : OP_JGT '#' number ',' label ',' label {
+ bpf_set_jmp_label($5, JTL);
+ bpf_set_jmp_label($7, JFL);
+ bpf_set_curr_instr(BPF_JMP | BPF_JGT | BPF_K, 0, 0, $3); }
+ | OP_JGT 'x' ',' label ',' label {
+ bpf_set_jmp_label($4, JTL);
+ bpf_set_jmp_label($6, JFL);
+ bpf_set_curr_instr(BPF_JMP | BPF_JGT | BPF_X, 0, 0, 0); }
+ | OP_JGT '%' 'x' ',' label ',' label {
+ bpf_set_jmp_label($5, JTL);
+ bpf_set_jmp_label($7, JFL);
+ bpf_set_curr_instr(BPF_JMP | BPF_JGT | BPF_X, 0, 0, 0); }
+ | OP_JGT '#' number ',' label {
+ bpf_set_jmp_label($5, JTL);
+ bpf_set_curr_instr(BPF_JMP | BPF_JGT | BPF_K, 0, 0, $3); }
+ | OP_JGT 'x' ',' label {
+ bpf_set_jmp_label($4, JTL);
+ bpf_set_curr_instr(BPF_JMP | BPF_JGT | BPF_X, 0, 0, 0); }
+ | OP_JGT '%' 'x' ',' label {
+ bpf_set_jmp_label($5, JTL);
+ bpf_set_curr_instr(BPF_JMP | BPF_JGT | BPF_X, 0, 0, 0); }
+ ;
+
+jge
+ : OP_JGE '#' number ',' label ',' label {
+ bpf_set_jmp_label($5, JTL);
+ bpf_set_jmp_label($7, JFL);
+ bpf_set_curr_instr(BPF_JMP | BPF_JGE | BPF_K, 0, 0, $3); }
+ | OP_JGE 'x' ',' label ',' label {
+ bpf_set_jmp_label($4, JTL);
+ bpf_set_jmp_label($6, JFL);
+ bpf_set_curr_instr(BPF_JMP | BPF_JGE | BPF_X, 0, 0, 0); }
+ | OP_JGE '%' 'x' ',' label ',' label {
+ bpf_set_jmp_label($5, JTL);
+ bpf_set_jmp_label($7, JFL);
+ bpf_set_curr_instr(BPF_JMP | BPF_JGE | BPF_X, 0, 0, 0); }
+ | OP_JGE '#' number ',' label {
+ bpf_set_jmp_label($5, JTL);
+ bpf_set_curr_instr(BPF_JMP | BPF_JGE | BPF_K, 0, 0, $3); }
+ | OP_JGE 'x' ',' label {
+ bpf_set_jmp_label($4, JTL);
+ bpf_set_curr_instr(BPF_JMP | BPF_JGE | BPF_X, 0, 0, 0); }
+ | OP_JGE '%' 'x' ',' label {
+ bpf_set_jmp_label($5, JTL);
+ bpf_set_curr_instr(BPF_JMP | BPF_JGE | BPF_X, 0, 0, 0); }
+ ;
+
+jset
+ : OP_JSET '#' number ',' label ',' label {
+ bpf_set_jmp_label($5, JTL);
+ bpf_set_jmp_label($7, JFL);
+ bpf_set_curr_instr(BPF_JMP | BPF_JSET | BPF_K, 0, 0, $3); }
+ | OP_JSET 'x' ',' label ',' label {
+ bpf_set_jmp_label($4, JTL);
+ bpf_set_jmp_label($6, JFL);
+ bpf_set_curr_instr(BPF_JMP | BPF_JSET | BPF_X, 0, 0, 0); }
+ | OP_JSET '%' 'x' ',' label ',' label {
+ bpf_set_jmp_label($5, JTL);
+ bpf_set_jmp_label($7, JFL);
+ bpf_set_curr_instr(BPF_JMP | BPF_JSET | BPF_X, 0, 0, 0); }
+ | OP_JSET '#' number ',' label {
+ bpf_set_jmp_label($5, JTL);
+ bpf_set_curr_instr(BPF_JMP | BPF_JSET | BPF_K, 0, 0, $3); }
+ | OP_JSET 'x' ',' label {
+ bpf_set_jmp_label($4, JTL);
+ bpf_set_curr_instr(BPF_JMP | BPF_JSET | BPF_X, 0, 0, 0); }
+ | OP_JSET '%' 'x' ',' label {
+ bpf_set_jmp_label($5, JTL);
+ bpf_set_curr_instr(BPF_JMP | BPF_JSET | BPF_X, 0, 0, 0); }
+ ;
+
+add
+ : OP_ADD '#' number {
+ bpf_set_curr_instr(BPF_ALU | BPF_ADD | BPF_K, 0, 0, $3); }
+ | OP_ADD 'x' {
+ bpf_set_curr_instr(BPF_ALU | BPF_ADD | BPF_X, 0, 0, 0); }
+ | OP_ADD '%' 'x' {
+ bpf_set_curr_instr(BPF_ALU | BPF_ADD | BPF_X, 0, 0, 0); }
+ ;
+
+sub
+ : OP_SUB '#' number {
+ bpf_set_curr_instr(BPF_ALU | BPF_SUB | BPF_K, 0, 0, $3); }
+ | OP_SUB 'x' {
+ bpf_set_curr_instr(BPF_ALU | BPF_SUB | BPF_X, 0, 0, 0); }
+ | OP_SUB '%' 'x' {
+ bpf_set_curr_instr(BPF_ALU | BPF_SUB | BPF_X, 0, 0, 0); }
+ ;
+
+mul
+ : OP_MUL '#' number {
+ bpf_set_curr_instr(BPF_ALU | BPF_MUL | BPF_K, 0, 0, $3); }
+ | OP_MUL 'x' {
+ bpf_set_curr_instr(BPF_ALU | BPF_MUL | BPF_X, 0, 0, 0); }
+ | OP_MUL '%' 'x' {
+ bpf_set_curr_instr(BPF_ALU | BPF_MUL | BPF_X, 0, 0, 0); }
+ ;
+
+div
+ : OP_DIV '#' number {
+ bpf_set_curr_instr(BPF_ALU | BPF_DIV | BPF_K, 0, 0, $3); }
+ | OP_DIV 'x' {
+ bpf_set_curr_instr(BPF_ALU | BPF_DIV | BPF_X, 0, 0, 0); }
+ | OP_DIV '%' 'x' {
+ bpf_set_curr_instr(BPF_ALU | BPF_DIV | BPF_X, 0, 0, 0); }
+ ;
+
+mod
+ : OP_MOD '#' number {
+ bpf_set_curr_instr(BPF_ALU | BPF_MOD | BPF_K, 0, 0, $3); }
+ | OP_MOD 'x' {
+ bpf_set_curr_instr(BPF_ALU | BPF_MOD | BPF_X, 0, 0, 0); }
+ | OP_MOD '%' 'x' {
+ bpf_set_curr_instr(BPF_ALU | BPF_MOD | BPF_X, 0, 0, 0); }
+ ;
+
+neg
+ : OP_NEG {
+ bpf_set_curr_instr(BPF_ALU | BPF_NEG, 0, 0, 0); }
+ ;
+
+and
+ : OP_AND '#' number {
+ bpf_set_curr_instr(BPF_ALU | BPF_AND | BPF_K, 0, 0, $3); }
+ | OP_AND 'x' {
+ bpf_set_curr_instr(BPF_ALU | BPF_AND | BPF_X, 0, 0, 0); }
+ | OP_AND '%' 'x' {
+ bpf_set_curr_instr(BPF_ALU | BPF_AND | BPF_X, 0, 0, 0); }
+ ;
+
+or
+ : OP_OR '#' number {
+ bpf_set_curr_instr(BPF_ALU | BPF_OR | BPF_K, 0, 0, $3); }
+ | OP_OR 'x' {
+ bpf_set_curr_instr(BPF_ALU | BPF_OR | BPF_X, 0, 0, 0); }
+ | OP_OR '%' 'x' {
+ bpf_set_curr_instr(BPF_ALU | BPF_OR | BPF_X, 0, 0, 0); }
+ ;
+
+xor
+ : OP_XOR '#' number {
+ bpf_set_curr_instr(BPF_ALU | BPF_XOR | BPF_K, 0, 0, $3); }
+ | OP_XOR 'x' {
+ bpf_set_curr_instr(BPF_ALU | BPF_XOR | BPF_X, 0, 0, 0); }
+ | OP_XOR '%' 'x' {
+ bpf_set_curr_instr(BPF_ALU | BPF_XOR | BPF_X, 0, 0, 0); }
+ ;
+
+lsh
+ : OP_LSH '#' number {
+ bpf_set_curr_instr(BPF_ALU | BPF_LSH | BPF_K, 0, 0, $3); }
+ | OP_LSH 'x' {
+ bpf_set_curr_instr(BPF_ALU | BPF_LSH | BPF_X, 0, 0, 0); }
+ | OP_LSH '%' 'x' {
+ bpf_set_curr_instr(BPF_ALU | BPF_LSH | BPF_X, 0, 0, 0); }
+ ;
+
+rsh
+ : OP_RSH '#' number {
+ bpf_set_curr_instr(BPF_ALU | BPF_RSH | BPF_K, 0, 0, $3); }
+ | OP_RSH 'x' {
+ bpf_set_curr_instr(BPF_ALU | BPF_RSH | BPF_X, 0, 0, 0); }
+ | OP_RSH '%' 'x' {
+ bpf_set_curr_instr(BPF_ALU | BPF_RSH | BPF_X, 0, 0, 0); }
+ ;
+
+ret
+ : OP_RET 'a' {
+ bpf_set_curr_instr(BPF_RET | BPF_A, 0, 0, 0); }
+ | OP_RET '%' 'a' {
+ bpf_set_curr_instr(BPF_RET | BPF_A, 0, 0, 0); }
+ | OP_RET 'x' {
+ bpf_set_curr_instr(BPF_RET | BPF_X, 0, 0, 0); }
+ | OP_RET '%' 'x' {
+ bpf_set_curr_instr(BPF_RET | BPF_X, 0, 0, 0); }
+ | OP_RET '#' number {
+ bpf_set_curr_instr(BPF_RET | BPF_K, 0, 0, $3); }
+ ;
+
+tax
+ : OP_TAX {
+ bpf_set_curr_instr(BPF_MISC | BPF_TAX, 0, 0, 0); }
+ ;
+
+txa
+ : OP_TXA {
+ bpf_set_curr_instr(BPF_MISC | BPF_TXA, 0, 0, 0); }
+ ;
+
+%%
+
+static int curr_instr = 0;
+static struct sock_filter out[BPF_MAXINSNS];
+static char **labels, **labels_jt, **labels_jf, **labels_k;
+
+static void bpf_assert_max(void)
+{
+ if (curr_instr >= BPF_MAXINSNS) {
+ fprintf(stderr, "only max %u insns allowed!\n", BPF_MAXINSNS);
+ exit(0);
+ }
+}
+
+static void bpf_set_curr_instr(uint16_t code, uint8_t jt, uint8_t jf,
+ uint32_t k)
+{
+ bpf_assert_max();
+ out[curr_instr].code = code;
+ out[curr_instr].jt = jt;
+ out[curr_instr].jf = jf;
+ out[curr_instr].k = k;
+ curr_instr++;
+}
+
+static void bpf_set_curr_label(char *label)
+{
+ bpf_assert_max();
+ labels[curr_instr] = label;
+}
+
+static void bpf_set_jmp_label(char *label, enum jmp_type type)
+{
+ bpf_assert_max();
+ switch (type) {
+ case JTL:
+ labels_jt[curr_instr] = label;
+ break;
+ case JFL:
+ labels_jf[curr_instr] = label;
+ break;
+ case JKL:
+ labels_k[curr_instr] = label;
+ break;
+ }
+}
+
+static int bpf_find_insns_offset(const char *label)
+{
+ int i, max = curr_instr, ret = -ENOENT;
+
+ for (i = 0; i < max; i++) {
+ if (labels[i] && !strcmp(label, labels[i])) {
+ ret = i;
+ break;
+ }
+ }
+
+ if (ret == -ENOENT) {
+ fprintf(stderr, "no such label \'%s\'!\n", label);
+ exit(0);
+ }
+
+ return ret;
+}
+
+static void bpf_stage_1_insert_insns(void)
+{
+ yyparse();
+}
+
+static void bpf_reduce_k_jumps(void)
+{
+ int i;
+
+ for (i = 0; i < curr_instr; i++) {
+ if (labels_k[i]) {
+ int off = bpf_find_insns_offset(labels_k[i]);
+ out[i].k = (uint32_t) (off - i - 1);
+ }
+ }
+}
+
+static void bpf_reduce_jt_jumps(void)
+{
+ int i;
+
+ for (i = 0; i < curr_instr; i++) {
+ if (labels_jt[i]) {
+ int off = bpf_find_insns_offset(labels_jt[i]);
+ out[i].jt = (uint8_t) (off - i -1);
+ }
+ }
+}
+
+static void bpf_reduce_jf_jumps(void)
+{
+ int i;
+
+ for (i = 0; i < curr_instr; i++) {
+ if (labels_jf[i]) {
+ int off = bpf_find_insns_offset(labels_jf[i]);
+ out[i].jf = (uint8_t) (off - i - 1);
+ }
+ }
+}
+
+static void bpf_stage_2_reduce_labels(void)
+{
+ bpf_reduce_k_jumps();
+ bpf_reduce_jt_jumps();
+ bpf_reduce_jf_jumps();
+}
+
+static void bpf_pretty_print_c(void)
+{
+ int i;
+
+ for (i = 0; i < curr_instr; i++)
+ printf("{ %#04x, %2u, %2u, %#010x },\n", out[i].code,
+ out[i].jt, out[i].jf, out[i].k);
+}
+
+static void bpf_pretty_print(void)
+{
+ int i;
+
+ printf("%u,", curr_instr);
+ for (i = 0; i < curr_instr; i++)
+ printf("%u %u %u %u,", out[i].code,
+ out[i].jt, out[i].jf, out[i].k);
+ printf("\n");
+}
+
+static void bpf_init(void)
+{
+ memset(out, 0, sizeof(out));
+
+ labels = calloc(BPF_MAXINSNS, sizeof(*labels));
+ assert(labels);
+ labels_jt = calloc(BPF_MAXINSNS, sizeof(*labels_jt));
+ assert(labels_jt);
+ labels_jf = calloc(BPF_MAXINSNS, sizeof(*labels_jf));
+ assert(labels_jf);
+ labels_k = calloc(BPF_MAXINSNS, sizeof(*labels_k));
+ assert(labels_k);
+}
+
+static void bpf_destroy_labels(void)
+{
+ int i;
+
+ for (i = 0; i < curr_instr; i++) {
+ free(labels_jf[i]);
+ free(labels_jt[i]);
+ free(labels_k[i]);
+ free(labels[i]);
+ }
+}
+
+static void bpf_destroy(void)
+{
+ bpf_destroy_labels();
+ free(labels_jt);
+ free(labels_jf);
+ free(labels_k);
+ free(labels);
+}
+
+void bpf_asm_compile(FILE *fp, bool cstyle)
+{
+ yyin = fp;
+
+ bpf_init();
+ bpf_stage_1_insert_insns();
+ bpf_stage_2_reduce_labels();
+ bpf_destroy();
+
+ if (cstyle)
+ bpf_pretty_print_c();
+ else
+ bpf_pretty_print();
+
+ if (fp != stdin)
+ fclose(yyin);
+}
+
+void yyerror(const char *str)
+{
+ exit(1);
+}
diff --git a/tools/net/bpf_jit_disasm.c b/tools/net/bpf_jit_disasm.c
new file mode 100644
index 00000000000..c5baf9c591b
--- /dev/null
+++ b/tools/net/bpf_jit_disasm.c
@@ -0,0 +1,197 @@
+/*
+ * Minimal BPF JIT image disassembler
+ *
+ * Disassembles BPF JIT compiler emitted opcodes back to asm insn's for
+ * debugging or verification purposes.
+ *
+ * To get the disassembly of the JIT code, do the following:
+ *
+ * 1) `echo 2 > /proc/sys/net/core/bpf_jit_enable`
+ * 2) Load a BPF filter (e.g. `tcpdump -p -n -s 0 -i eth1 host 192.168.20.0/24`)
+ * 3) Run e.g. `bpf_jit_disasm -o` to read out the last JIT code
+ *
+ * Copyright 2013 Daniel Borkmann <borkmann@redhat.com>
+ * Licensed under the GNU General Public License, version 2.0 (GPLv2)
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <unistd.h>
+#include <string.h>
+#include <bfd.h>
+#include <dis-asm.h>
+#include <sys/klog.h>
+#include <sys/types.h>
+#include <regex.h>
+
+static void get_exec_path(char *tpath, size_t size)
+{
+ char *path;
+ ssize_t len;
+
+ snprintf(tpath, size, "/proc/%d/exe", (int) getpid());
+ tpath[size - 1] = 0;
+
+ path = strdup(tpath);
+ assert(path);
+
+ len = readlink(path, tpath, size);
+ tpath[len] = 0;
+
+ free(path);
+}
+
+static void get_asm_insns(uint8_t *image, size_t len, int opcodes)
+{
+ int count, i, pc = 0;
+ char tpath[256];
+ struct disassemble_info info;
+ disassembler_ftype disassemble;
+ bfd *bfdf;
+
+ memset(tpath, 0, sizeof(tpath));
+ get_exec_path(tpath, sizeof(tpath));
+
+ bfdf = bfd_openr(tpath, NULL);
+ assert(bfdf);
+ assert(bfd_check_format(bfdf, bfd_object));
+
+ init_disassemble_info(&info, stdout, (fprintf_ftype) fprintf);
+ info.arch = bfd_get_arch(bfdf);
+ info.mach = bfd_get_mach(bfdf);
+ info.buffer = image;
+ info.buffer_length = len;
+
+ disassemble_init_for_target(&info);
+
+ disassemble = disassembler(bfdf);
+ assert(disassemble);
+
+ do {
+ printf("%4x:\t", pc);
+
+ count = disassemble(pc, &info);
+
+ if (opcodes) {
+ printf("\n\t");
+ for (i = 0; i < count; ++i)
+ printf("%02x ", (uint8_t) image[pc + i]);
+ }
+ printf("\n");
+
+ pc += count;
+ } while(count > 0 && pc < len);
+
+ bfd_close(bfdf);
+}
+
+static char *get_klog_buff(int *klen)
+{
+ int ret, len = klogctl(10, NULL, 0);
+ char *buff = malloc(len);
+
+ assert(buff && klen);
+ ret = klogctl(3, buff, len);
+ assert(ret >= 0);
+ *klen = ret;
+
+ return buff;
+}
+
+static void put_klog_buff(char *buff)
+{
+ free(buff);
+}
+
+static int get_last_jit_image(char *haystack, size_t hlen,
+ uint8_t *image, size_t ilen)
+{
+ char *ptr, *pptr, *tmp;
+ off_t off = 0;
+ int ret, flen, proglen, pass, ulen = 0;
+ regmatch_t pmatch[1];
+ unsigned long base;
+ regex_t regex;
+
+ if (hlen == 0)
+ return 0;
+
+ ret = regcomp(&regex, "flen=[[:alnum:]]+ proglen=[[:digit:]]+ "
+ "pass=[[:digit:]]+ image=[[:xdigit:]]+", REG_EXTENDED);
+ assert(ret == 0);
+
+ ptr = haystack;
+ while (1) {
+ ret = regexec(&regex, ptr, 1, pmatch, 0);
+ if (ret == 0) {
+ ptr += pmatch[0].rm_eo;
+ off += pmatch[0].rm_eo;
+ assert(off < hlen);
+ } else
+ break;
+ }
+
+ ptr = haystack + off - (pmatch[0].rm_eo - pmatch[0].rm_so);
+ ret = sscanf(ptr, "flen=%d proglen=%d pass=%d image=%lx",
+ &flen, &proglen, &pass, &base);
+ if (ret != 4)
+ return 0;
+
+ tmp = ptr = haystack + off;
+ while ((ptr = strtok(tmp, "\n")) != NULL && ulen < ilen) {
+ tmp = NULL;
+ if (!strstr(ptr, "JIT code"))
+ continue;
+ pptr = ptr;
+ while ((ptr = strstr(pptr, ":")))
+ pptr = ptr + 1;
+ ptr = pptr;
+ do {
+ image[ulen++] = (uint8_t) strtoul(pptr, &pptr, 16);
+ if (ptr == pptr || ulen >= ilen) {
+ ulen--;
+ break;
+ }
+ ptr = pptr;
+ } while (1);
+ }
+
+ assert(ulen == proglen);
+ printf("%d bytes emitted from JIT compiler (pass:%d, flen:%d)\n",
+ proglen, pass, flen);
+ printf("%lx + <x>:\n", base);
+
+ regfree(&regex);
+ return ulen;
+}
+
+int main(int argc, char **argv)
+{
+ int len, klen, opcodes = 0;
+ char *kbuff;
+ static uint8_t image[32768];
+
+ if (argc > 1) {
+ if (!strncmp("-o", argv[argc - 1], 2)) {
+ opcodes = 1;
+ } else {
+ printf("usage: bpf_jit_disasm [-o: show opcodes]\n");
+ exit(0);
+ }
+ }
+
+ bfd_init();
+ memset(image, 0, sizeof(image));
+
+ kbuff = get_klog_buff(&klen);
+
+ len = get_last_jit_image(kbuff, klen, image, sizeof(image));
+ if (len > 0)
+ get_asm_insns(image, len, opcodes);
+
+ put_klog_buff(kbuff);
+
+ return 0;
+}
diff --git a/tools/nfsd/inject_fault.sh b/tools/nfsd/inject_fault.sh
new file mode 100755
index 00000000000..06a399ac8b2
--- /dev/null
+++ b/tools/nfsd/inject_fault.sh
@@ -0,0 +1,49 @@
+#!/bin/bash
+#
+# Copyright (c) 2011 Bryan Schumaker <bjschuma@netapp.com>
+#
+# Script for easier NFSD fault injection
+
+# Check that debugfs has been mounted
+DEBUGFS=`cat /proc/mounts | grep debugfs`
+if [ "$DEBUGFS" == "" ]; then
+ echo "debugfs does not appear to be mounted!"
+ echo "Please mount debugfs and try again"
+ exit 1
+fi
+
+# Check that the fault injection directory exists
+DEBUGDIR=`echo $DEBUGFS | awk '{print $2}'`/nfsd
+if [ ! -d "$DEBUGDIR" ]; then
+ echo "$DEBUGDIR does not exist"
+ echo "Check that your .config selects CONFIG_NFSD_FAULT_INJECTION"
+ exit 1
+fi
+
+function help()
+{
+ echo "Usage $0 injection_type [count]"
+ echo ""
+ echo "Injection types are:"
+ ls $DEBUGDIR
+ exit 1
+}
+
+if [ $# == 0 ]; then
+ help
+elif [ ! -f $DEBUGDIR/$1 ]; then
+ help
+elif [ $# != 2 ]; then
+ COUNT=0
+else
+ COUNT=$2
+fi
+
+BEFORE=`mktemp`
+AFTER=`mktemp`
+dmesg > $BEFORE
+echo $COUNT > $DEBUGDIR/$1
+dmesg > $AFTER
+# Capture lines that only exist in the $AFTER file
+diff $BEFORE $AFTER | grep ">"
+rm -f $BEFORE $AFTER
diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore
new file mode 100644
index 00000000000..782d86e961b
--- /dev/null
+++ b/tools/perf/.gitignore
@@ -0,0 +1,26 @@
+PERF-CFLAGS
+PERF-GUI-VARS
+PERF-VERSION-FILE
+perf
+perf-help
+perf-record
+perf-report
+perf-stat
+perf-top
+perf*.1
+perf*.xml
+perf*.html
+common-cmds.h
+perf.data
+perf.data.old
+output.svg
+perf-archive
+tags
+TAGS
+cscope*
+config.mak
+config.mak.autogen
+*-bison.*
+*-flex.*
+*.pyc
+*.pyo
diff --git a/tools/perf/CREDITS b/tools/perf/CREDITS
new file mode 100644
index 00000000000..c2ddcb3acbd
--- /dev/null
+++ b/tools/perf/CREDITS
@@ -0,0 +1,30 @@
+Most of the infrastructure that 'perf' uses here has been reused
+from the Git project, as of version:
+
+ 66996ec: Sync with 1.6.2.4
+
+Here is an (incomplete!) list of main contributors to those files
+in util/* and elsewhere:
+
+ Alex Riesen
+ Christian Couder
+ Dmitry Potapov
+ Jeff King
+ Johannes Schindelin
+ Johannes Sixt
+ Junio C Hamano
+ Linus Torvalds
+ Matthias Kestenholz
+ Michal Ostrowski
+ Miklos Vajna
+ Petr Baudis
+ Pierre Habouzit
+ René Scharfe
+ Samuel Tardieu
+ Shawn O. Pearce
+ Steffen Prohaska
+ Steve Haslam
+
+Thanks guys!
+
+The full history of the files can be found in the upstream Git commits.
diff --git a/tools/perf/Documentation/Makefile b/tools/perf/Documentation/Makefile
new file mode 100644
index 00000000000..3ba1c0b0990
--- /dev/null
+++ b/tools/perf/Documentation/Makefile
@@ -0,0 +1,343 @@
+include ../../scripts/Makefile.include
+include ../config/utilities.mak
+
+MAN1_TXT= \
+ $(filter-out $(addsuffix .txt, $(ARTICLES) $(SP_ARTICLES)), \
+ $(wildcard perf-*.txt)) \
+ perf.txt
+MAN5_TXT=
+MAN7_TXT=
+
+MAN_TXT = $(MAN1_TXT) $(MAN5_TXT) $(MAN7_TXT)
+_MAN_XML=$(patsubst %.txt,%.xml,$(MAN_TXT))
+_MAN_HTML=$(patsubst %.txt,%.html,$(MAN_TXT))
+
+MAN_XML=$(addprefix $(OUTPUT),$(_MAN_XML))
+MAN_HTML=$(addprefix $(OUTPUT),$(_MAN_HTML))
+
+ARTICLES =
+# with their own formatting rules.
+SP_ARTICLES =
+API_DOCS = $(patsubst %.txt,%,$(filter-out technical/api-index-skel.txt technical/api-index.txt, $(wildcard technical/api-*.txt)))
+SP_ARTICLES += $(API_DOCS)
+SP_ARTICLES += technical/api-index
+
+_DOC_HTML = $(_MAN_HTML)
+_DOC_HTML+=$(patsubst %,%.html,$(ARTICLES) $(SP_ARTICLES))
+DOC_HTML=$(addprefix $(OUTPUT),$(_DOC_HTML))
+
+_DOC_MAN1=$(patsubst %.txt,%.1,$(MAN1_TXT))
+_DOC_MAN5=$(patsubst %.txt,%.5,$(MAN5_TXT))
+_DOC_MAN7=$(patsubst %.txt,%.7,$(MAN7_TXT))
+
+DOC_MAN1=$(addprefix $(OUTPUT),$(_DOC_MAN1))
+DOC_MAN5=$(addprefix $(OUTPUT),$(_DOC_MAN5))
+DOC_MAN7=$(addprefix $(OUTPUT),$(_DOC_MAN7))
+
+# Make the path relative to DESTDIR, not prefix
+ifndef DESTDIR
+prefix?=$(HOME)
+endif
+bindir?=$(prefix)/bin
+htmldir?=$(prefix)/share/doc/perf-doc
+pdfdir?=$(prefix)/share/doc/perf-doc
+mandir?=$(prefix)/share/man
+man1dir=$(mandir)/man1
+man5dir=$(mandir)/man5
+man7dir=$(mandir)/man7
+
+ASCIIDOC=asciidoc
+ASCIIDOC_EXTRA = --unsafe
+MANPAGE_XSL = manpage-normal.xsl
+XMLTO_EXTRA =
+INSTALL?=install
+RM ?= rm -f
+DOC_REF = origin/man
+HTML_REF = origin/html
+
+infodir?=$(prefix)/share/info
+MAKEINFO=makeinfo
+INSTALL_INFO=install-info
+DOCBOOK2X_TEXI=docbook2x-texi
+DBLATEX=dblatex
+XMLTO=xmlto
+ifndef PERL_PATH
+ PERL_PATH = /usr/bin/perl
+endif
+
+-include ../config.mak.autogen
+-include ../config.mak
+
+_tmp_tool_path := $(call get-executable,$(ASCIIDOC))
+ifeq ($(_tmp_tool_path),)
+ missing_tools = $(ASCIIDOC)
+endif
+
+_tmp_tool_path := $(call get-executable,$(XMLTO))
+ifeq ($(_tmp_tool_path),)
+ missing_tools += $(XMLTO)
+endif
+
+#
+# For asciidoc ...
+# -7.1.2, no extra settings are needed.
+# 8.0-, set ASCIIDOC8.
+#
+
+#
+# For docbook-xsl ...
+# -1.68.1, set ASCIIDOC_NO_ROFF? (based on changelog from 1.73.0)
+# 1.69.0, no extra settings are needed?
+# 1.69.1-1.71.0, set DOCBOOK_SUPPRESS_SP?
+# 1.71.1, no extra settings are needed?
+# 1.72.0, set DOCBOOK_XSL_172.
+# 1.73.0-, set ASCIIDOC_NO_ROFF
+#
+
+#
+# If you had been using DOCBOOK_XSL_172 in an attempt to get rid
+# of 'the ".ft C" problem' in your generated manpages, and you
+# instead ended up with weird characters around callouts, try
+# using ASCIIDOC_NO_ROFF instead (it works fine with ASCIIDOC8).
+#
+
+ifdef ASCIIDOC8
+ASCIIDOC_EXTRA += -a asciidoc7compatible
+endif
+ifdef DOCBOOK_XSL_172
+ASCIIDOC_EXTRA += -a perf-asciidoc-no-roff
+MANPAGE_XSL = manpage-1.72.xsl
+else
+ ifdef ASCIIDOC_NO_ROFF
+ # docbook-xsl after 1.72 needs the regular XSL, but will not
+ # pass-thru raw roff codes from asciidoc.conf, so turn them off.
+ ASCIIDOC_EXTRA += -a perf-asciidoc-no-roff
+ endif
+endif
+ifdef MAN_BOLD_LITERAL
+XMLTO_EXTRA += -m manpage-bold-literal.xsl
+endif
+ifdef DOCBOOK_SUPPRESS_SP
+XMLTO_EXTRA += -m manpage-suppress-sp.xsl
+endif
+
+SHELL_PATH ?= $(SHELL)
+# Shell quote;
+SHELL_PATH_SQ = $(subst ','\'',$(SHELL_PATH))
+
+#
+# Please note that there is a minor bug in asciidoc.
+# The version after 6.0.3 _will_ include the patch found here:
+# http://marc.theaimsgroup.com/?l=perf&m=111558757202243&w=2
+#
+# Until that version is released you may have to apply the patch
+# yourself - yes, all 6 characters of it!
+#
+
+QUIET_SUBDIR0 = +$(MAKE) -C # space to separate -C and subdir
+QUIET_SUBDIR1 =
+
+ifneq ($(findstring $(MAKEFLAGS),w),w)
+PRINT_DIR = --no-print-directory
+else # "make -w"
+NO_SUBDIR = :
+endif
+
+ifneq ($(findstring $(MAKEFLAGS),s),s)
+ifneq ($(V),1)
+ QUIET_ASCIIDOC = @echo ' ASCIIDOC '$@;
+ QUIET_XMLTO = @echo ' XMLTO '$@;
+ QUIET_DB2TEXI = @echo ' DB2TEXI '$@;
+ QUIET_MAKEINFO = @echo ' MAKEINFO '$@;
+ QUIET_DBLATEX = @echo ' DBLATEX '$@;
+ QUIET_XSLTPROC = @echo ' XSLTPROC '$@;
+ QUIET_GEN = @echo ' GEN '$@;
+ QUIET_STDERR = 2> /dev/null
+ QUIET_SUBDIR0 = +@subdir=
+ QUIET_SUBDIR1 = ;$(NO_SUBDIR) \
+ echo ' SUBDIR ' $$subdir; \
+ $(MAKE) $(PRINT_DIR) -C $$subdir
+ export V
+endif
+endif
+
+all: html man
+
+html: $(DOC_HTML)
+
+$(DOC_HTML) $(DOC_MAN1) $(DOC_MAN5) $(DOC_MAN7): asciidoc.conf
+
+man: man1 man5 man7
+man1: $(DOC_MAN1)
+man5: $(DOC_MAN5)
+man7: $(DOC_MAN7)
+
+info: $(OUTPUT)perf.info $(OUTPUT)perfman.info
+
+pdf: $(OUTPUT)user-manual.pdf
+
+install: install-man
+
+check-man-tools:
+ifdef missing_tools
+ $(error "You need to install $(missing_tools) for man pages")
+endif
+
+do-install-man: man
+ $(call QUIET_INSTALL, Documentation-man) \
+ $(INSTALL) -d -m 755 $(DESTDIR)$(man1dir); \
+# $(INSTALL) -d -m 755 $(DESTDIR)$(man5dir); \
+# $(INSTALL) -d -m 755 $(DESTDIR)$(man7dir); \
+ $(INSTALL) -m 644 $(DOC_MAN1) $(DESTDIR)$(man1dir); \
+# $(INSTALL) -m 644 $(DOC_MAN5) $(DESTDIR)$(man5dir); \
+# $(INSTALL) -m 644 $(DOC_MAN7) $(DESTDIR)$(man7dir)
+
+install-man: check-man-tools man
+
+ifdef missing_tools
+ DO_INSTALL_MAN = $(warning Please install $(missing_tools) to have the man pages installed)
+else
+ DO_INSTALL_MAN = do-install-man
+endif
+
+try-install-man: $(DO_INSTALL_MAN)
+
+install-info: info
+ $(call QUIET_INSTALL, Documentation-info) \
+ $(INSTALL) -d -m 755 $(DESTDIR)$(infodir); \
+ $(INSTALL) -m 644 $(OUTPUT)perf.info $(OUTPUT)perfman.info $(DESTDIR)$(infodir); \
+ if test -r $(DESTDIR)$(infodir)/dir; then \
+ $(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) perf.info ;\
+ $(INSTALL_INFO) --info-dir=$(DESTDIR)$(infodir) perfman.info ;\
+ else \
+ echo "No directory found in $(DESTDIR)$(infodir)" >&2 ; \
+ fi
+
+install-pdf: pdf
+ $(call QUIET_INSTALL, Documentation-pdf) \
+ $(INSTALL) -d -m 755 $(DESTDIR)$(pdfdir); \
+ $(INSTALL) -m 644 $(OUTPUT)user-manual.pdf $(DESTDIR)$(pdfdir)
+
+#install-html: html
+# '$(SHELL_PATH_SQ)' ./install-webdoc.sh $(DESTDIR)$(htmldir)
+
+
+#
+# Determine "include::" file references in asciidoc files.
+#
+$(OUTPUT)doc.dep : $(wildcard *.txt) build-docdep.perl
+ $(QUIET_GEN)$(RM) $@+ $@ && \
+ $(PERL_PATH) ./build-docdep.perl >$@+ $(QUIET_STDERR) && \
+ mv $@+ $@
+
+-include $(OUPTUT)doc.dep
+
+_cmds_txt = cmds-ancillaryinterrogators.txt \
+ cmds-ancillarymanipulators.txt \
+ cmds-mainporcelain.txt \
+ cmds-plumbinginterrogators.txt \
+ cmds-plumbingmanipulators.txt \
+ cmds-synchingrepositories.txt \
+ cmds-synchelpers.txt \
+ cmds-purehelpers.txt \
+ cmds-foreignscminterface.txt
+cmds_txt=$(addprefix $(OUTPUT),$(_cmds_txt))
+
+$(cmds_txt): $(OUTPUT)cmd-list.made
+
+$(OUTPUT)cmd-list.made: cmd-list.perl ../command-list.txt $(MAN1_TXT)
+ $(QUIET_GEN)$(RM) $@ && \
+ $(PERL_PATH) ./cmd-list.perl ../command-list.txt $(QUIET_STDERR) && \
+ date >$@
+
+CLEAN_FILES = \
+ $(MAN_XML) $(addsuffix +,$(MAN_XML)) \
+ $(MAN_HTML) $(addsuffix +,$(MAN_HTML)) \
+ $(DOC_HTML) $(DOC_MAN1) $(DOC_MAN5) $(DOC_MAN7) \
+ $(OUTPUT)*.texi $(OUTPUT)*.texi+ $(OUTPUT)*.texi++ \
+ $(OUTPUT)perf.info $(OUTPUT)perfman.info \
+ $(OUTPUT)howto-index.txt $(OUTPUT)howto/*.html $(OUTPUT)doc.dep \
+ $(OUTPUT)technical/api-*.html $(OUTPUT)technical/api-index.txt \
+ $(cmds_txt) $(OUTPUT)*.made
+clean:
+ $(call QUIET_CLEAN, Documentation) $(RM) $(CLEAN_FILES)
+
+$(MAN_HTML): $(OUTPUT)%.html : %.txt
+ $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
+ $(ASCIIDOC) -b xhtml11 -d manpage -f asciidoc.conf \
+ $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \
+ mv $@+ $@
+
+$(OUTPUT)%.1 $(OUTPUT)%.5 $(OUTPUT)%.7 : $(OUTPUT)%.xml
+ $(QUIET_XMLTO)$(RM) $@ && \
+ $(XMLTO) -o $(OUTPUT). -m $(MANPAGE_XSL) $(XMLTO_EXTRA) man $<
+
+$(OUTPUT)%.xml : %.txt
+ $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
+ $(ASCIIDOC) -b docbook -d manpage -f asciidoc.conf \
+ $(ASCIIDOC_EXTRA) -aperf_version=$(PERF_VERSION) -o $@+ $< && \
+ mv $@+ $@
+
+XSLT = docbook.xsl
+XSLTOPTS = --xinclude --stringparam html.stylesheet docbook-xsl.css
+
+$(OUTPUT)user-manual.html: $(OUTPUT)user-manual.xml
+ $(QUIET_XSLTPROC)xsltproc $(XSLTOPTS) -o $@ $(XSLT) $<
+
+$(OUTPUT)perf.info: $(OUTPUT)user-manual.texi
+ $(QUIET_MAKEINFO)$(MAKEINFO) --no-split -o $@ $(OUTPUT)user-manual.texi
+
+$(OUTPUT)user-manual.texi: $(OUTPUT)user-manual.xml
+ $(QUIET_DB2TEXI)$(RM) $@+ $@ && \
+ $(DOCBOOK2X_TEXI) $(OUTPUT)user-manual.xml --encoding=UTF-8 --to-stdout >$@++ && \
+ $(PERL_PATH) fix-texi.perl <$@++ >$@+ && \
+ rm $@++ && \
+ mv $@+ $@
+
+$(OUTPUT)user-manual.pdf: $(OUTPUT)user-manual.xml
+ $(QUIET_DBLATEX)$(RM) $@+ $@ && \
+ $(DBLATEX) -o $@+ -p /etc/asciidoc/dblatex/asciidoc-dblatex.xsl -s /etc/asciidoc/dblatex/asciidoc-dblatex.sty $< && \
+ mv $@+ $@
+
+$(OUTPUT)perfman.texi: $(MAN_XML) cat-texi.perl
+ $(QUIET_DB2TEXI)$(RM) $@+ $@ && \
+ ($(foreach xml,$(MAN_XML),$(DOCBOOK2X_TEXI) --encoding=UTF-8 \
+ --to-stdout $(xml) &&) true) > $@++ && \
+ $(PERL_PATH) cat-texi.perl $@ <$@++ >$@+ && \
+ rm $@++ && \
+ mv $@+ $@
+
+$(OUTPUT)perfman.info: $(OUTPUT)perfman.texi
+ $(QUIET_MAKEINFO)$(MAKEINFO) --no-split --no-validate $*.texi
+
+$(patsubst %.txt,%.texi,$(MAN_TXT)): %.texi : %.xml
+ $(QUIET_DB2TEXI)$(RM) $@+ $@ && \
+ $(DOCBOOK2X_TEXI) --to-stdout $*.xml >$@+ && \
+ mv $@+ $@
+
+howto-index.txt: howto-index.sh $(wildcard howto/*.txt)
+ $(QUIET_GEN)$(RM) $@+ $@ && \
+ '$(SHELL_PATH_SQ)' ./howto-index.sh $(wildcard howto/*.txt) >$@+ && \
+ mv $@+ $@
+
+$(patsubst %,%.html,$(ARTICLES)) : %.html : %.txt
+ $(QUIET_ASCIIDOC)$(ASCIIDOC) -b xhtml11 $*.txt
+
+WEBDOC_DEST = /pub/software/tools/perf/docs
+
+$(patsubst %.txt,%.html,$(wildcard howto/*.txt)): %.html : %.txt
+ $(QUIET_ASCIIDOC)$(RM) $@+ $@ && \
+ sed -e '1,/^$$/d' $< | $(ASCIIDOC) -b xhtml11 - >$@+ && \
+ mv $@+ $@
+
+# UNIMPLEMENTED
+#install-webdoc : html
+# '$(SHELL_PATH_SQ)' ./install-webdoc.sh $(WEBDOC_DEST)
+
+# quick-install: quick-install-man
+
+# quick-install-man:
+# '$(SHELL_PATH_SQ)' ./install-doc-quick.sh $(DOC_REF) $(DESTDIR)$(mandir)
+
+#quick-install-html:
+# '$(SHELL_PATH_SQ)' ./install-doc-quick.sh $(HTML_REF) $(DESTDIR)$(htmldir)
diff --git a/tools/perf/Documentation/android.txt b/tools/perf/Documentation/android.txt
new file mode 100644
index 00000000000..8484c3a04a6
--- /dev/null
+++ b/tools/perf/Documentation/android.txt
@@ -0,0 +1,78 @@
+How to compile perf for Android
+=========================================
+
+I. Set the Android NDK environment
+------------------------------------------------
+
+(a). Use the Android NDK
+------------------------------------------------
+1. You need to download and install the Android Native Development Kit (NDK).
+Set the NDK variable to point to the path where you installed the NDK:
+ export NDK=/path/to/android-ndk
+
+2. Set cross-compiling environment variables for NDK toolchain and sysroot.
+For arm:
+ export NDK_TOOLCHAIN=${NDK}/toolchains/arm-linux-androideabi-4.6/prebuilt/linux-x86/bin/arm-linux-androideabi-
+ export NDK_SYSROOT=${NDK}/platforms/android-9/arch-arm
+For x86:
+ export NDK_TOOLCHAIN=${NDK}/toolchains/x86-4.6/prebuilt/linux-x86/bin/i686-linux-android-
+ export NDK_SYSROOT=${NDK}/platforms/android-9/arch-x86
+
+This method is not working for Android NDK versions up to Revision 8b.
+perf uses some bionic enhancements that are not included in these NDK versions.
+You can use method (b) described below instead.
+
+(b). Use the Android source tree
+-----------------------------------------------
+1. Download the master branch of the Android source tree.
+Set the environment for the target you want using:
+ source build/envsetup.sh
+ lunch
+
+2. Build your own NDK sysroot to contain latest bionic changes and set the
+NDK sysroot environment variable.
+ cd ${ANDROID_BUILD_TOP}/ndk
+For arm:
+ ./build/tools/build-ndk-sysroot.sh --abi=arm
+ export NDK_SYSROOT=${ANDROID_BUILD_TOP}/ndk/build/platforms/android-3/arch-arm
+For x86:
+ ./build/tools/build-ndk-sysroot.sh --abi=x86
+ export NDK_SYSROOT=${ANDROID_BUILD_TOP}/ndk/build/platforms/android-3/arch-x86
+
+3. Set the NDK toolchain environment variable.
+For arm:
+ export NDK_TOOLCHAIN=${ANDROID_TOOLCHAIN}/arm-linux-androideabi-
+For x86:
+ export NDK_TOOLCHAIN=${ANDROID_TOOLCHAIN}/i686-linux-android-
+
+II. Compile perf for Android
+------------------------------------------------
+You need to run make with the NDK toolchain and sysroot defined above:
+For arm:
+ make ARCH=arm CROSS_COMPILE=${NDK_TOOLCHAIN} CFLAGS="--sysroot=${NDK_SYSROOT}"
+For x86:
+ make ARCH=x86 CROSS_COMPILE=${NDK_TOOLCHAIN} CFLAGS="--sysroot=${NDK_SYSROOT}"
+
+III. Install perf
+-----------------------------------------------
+You need to connect to your Android device/emulator using adb.
+Install perf using:
+ adb push perf /data/perf
+
+If you also want to use perf-archive you need busybox tools for Android.
+For installing perf-archive, you first need to replace #!/bin/bash with #!/system/bin/sh:
+ sed 's/#!\/bin\/bash/#!\/system\/bin\/sh/g' perf-archive >> /tmp/perf-archive
+ chmod +x /tmp/perf-archive
+ adb push /tmp/perf-archive /data/perf-archive
+
+IV. Environment settings for running perf
+------------------------------------------------
+Some perf features need environment variables to run properly.
+You need to set these before running perf on the target:
+ adb shell
+ # PERF_PAGER=cat
+
+IV. Run perf
+------------------------------------------------
+Run perf on your device/emulator to which you previously connected using adb:
+ # ./data/perf
diff --git a/tools/perf/Documentation/asciidoc.conf b/tools/perf/Documentation/asciidoc.conf
new file mode 100644
index 00000000000..356b23a4033
--- /dev/null
+++ b/tools/perf/Documentation/asciidoc.conf
@@ -0,0 +1,91 @@
+## linkperf: macro
+#
+# Usage: linkperf:command[manpage-section]
+#
+# Note, {0} is the manpage section, while {target} is the command.
+#
+# Show PERF link as: <command>(<section>); if section is defined, else just show
+# the command.
+
+[macros]
+(?su)[\\]?(?P<name>linkperf):(?P<target>\S*?)\[(?P<attrlist>.*?)\]=
+
+[attributes]
+asterisk=&#42;
+plus=&#43;
+caret=&#94;
+startsb=&#91;
+endsb=&#93;
+tilde=&#126;
+
+ifdef::backend-docbook[]
+[linkperf-inlinemacro]
+{0%{target}}
+{0#<citerefentry>}
+{0#<refentrytitle>{target}</refentrytitle><manvolnum>{0}</manvolnum>}
+{0#</citerefentry>}
+endif::backend-docbook[]
+
+ifdef::backend-docbook[]
+ifndef::perf-asciidoc-no-roff[]
+# "unbreak" docbook-xsl v1.68 for manpages. v1.69 works with or without this.
+# v1.72 breaks with this because it replaces dots not in roff requests.
+[listingblock]
+<example><title>{title}</title>
+<literallayout>
+ifdef::doctype-manpage[]
+&#10;.ft C&#10;
+endif::doctype-manpage[]
+|
+ifdef::doctype-manpage[]
+&#10;.ft&#10;
+endif::doctype-manpage[]
+</literallayout>
+{title#}</example>
+endif::perf-asciidoc-no-roff[]
+
+ifdef::perf-asciidoc-no-roff[]
+ifdef::doctype-manpage[]
+# The following two small workarounds insert a simple paragraph after screen
+[listingblock]
+<example><title>{title}</title>
+<literallayout>
+|
+</literallayout><simpara></simpara>
+{title#}</example>
+
+[verseblock]
+<formalpara{id? id="{id}"}><title>{title}</title><para>
+{title%}<literallayout{id? id="{id}"}>
+{title#}<literallayout>
+|
+</literallayout>
+{title#}</para></formalpara>
+{title%}<simpara></simpara>
+endif::doctype-manpage[]
+endif::perf-asciidoc-no-roff[]
+endif::backend-docbook[]
+
+ifdef::doctype-manpage[]
+ifdef::backend-docbook[]
+[header]
+template::[header-declarations]
+<refentry>
+<refmeta>
+<refentrytitle>{mantitle}</refentrytitle>
+<manvolnum>{manvolnum}</manvolnum>
+<refmiscinfo class="source">perf</refmiscinfo>
+<refmiscinfo class="version">{perf_version}</refmiscinfo>
+<refmiscinfo class="manual">perf Manual</refmiscinfo>
+</refmeta>
+<refnamediv>
+ <refname>{manname}</refname>
+ <refpurpose>{manpurpose}</refpurpose>
+</refnamediv>
+endif::backend-docbook[]
+endif::doctype-manpage[]
+
+ifdef::backend-xhtml11[]
+[linkperf-inlinemacro]
+<a href="{target}.html">{target}{0?({0})}</a>
+endif::backend-xhtml11[]
diff --git a/tools/perf/Documentation/examples.txt b/tools/perf/Documentation/examples.txt
new file mode 100644
index 00000000000..a4e39215648
--- /dev/null
+++ b/tools/perf/Documentation/examples.txt
@@ -0,0 +1,225 @@
+
+ ------------------------------
+ ****** perf by examples ******
+ ------------------------------
+
+[ From an e-mail by Ingo Molnar, http://lkml.org/lkml/2009/8/4/346 ]
+
+
+First, discovery/enumeration of available counters can be done via
+'perf list':
+
+titan:~> perf list
+ [...]
+ kmem:kmalloc [Tracepoint event]
+ kmem:kmem_cache_alloc [Tracepoint event]
+ kmem:kmalloc_node [Tracepoint event]
+ kmem:kmem_cache_alloc_node [Tracepoint event]
+ kmem:kfree [Tracepoint event]
+ kmem:kmem_cache_free [Tracepoint event]
+ kmem:mm_page_free [Tracepoint event]
+ kmem:mm_page_free_batched [Tracepoint event]
+ kmem:mm_page_alloc [Tracepoint event]
+ kmem:mm_page_alloc_zone_locked [Tracepoint event]
+ kmem:mm_page_pcpu_drain [Tracepoint event]
+ kmem:mm_page_alloc_extfrag [Tracepoint event]
+
+Then any (or all) of the above event sources can be activated and
+measured. For example the page alloc/free properties of a 'hackbench
+run' are:
+
+ titan:~> perf stat -e kmem:mm_page_pcpu_drain -e kmem:mm_page_alloc
+ -e kmem:mm_page_free_batched -e kmem:mm_page_free ./hackbench 10
+ Time: 0.575
+
+ Performance counter stats for './hackbench 10':
+
+ 13857 kmem:mm_page_pcpu_drain
+ 27576 kmem:mm_page_alloc
+ 6025 kmem:mm_page_free_batched
+ 20934 kmem:mm_page_free
+
+ 0.613972165 seconds time elapsed
+
+You can observe the statistical properties as well, by using the
+'repeat the workload N times' feature of perf stat:
+
+ titan:~> perf stat --repeat 5 -e kmem:mm_page_pcpu_drain -e
+ kmem:mm_page_alloc -e kmem:mm_page_free_batched -e
+ kmem:mm_page_free ./hackbench 10
+ Time: 0.627
+ Time: 0.644
+ Time: 0.564
+ Time: 0.559
+ Time: 0.626
+
+ Performance counter stats for './hackbench 10' (5 runs):
+
+ 12920 kmem:mm_page_pcpu_drain ( +- 3.359% )
+ 25035 kmem:mm_page_alloc ( +- 3.783% )
+ 6104 kmem:mm_page_free_batched ( +- 0.934% )
+ 18376 kmem:mm_page_free ( +- 4.941% )
+
+ 0.643954516 seconds time elapsed ( +- 2.363% )
+
+Furthermore, these tracepoints can be used to sample the workload as
+well. For example the page allocations done by a 'git gc' can be
+captured the following way:
+
+ titan:~/git> perf record -e kmem:mm_page_alloc -c 1 ./git gc
+ Counting objects: 1148, done.
+ Delta compression using up to 2 threads.
+ Compressing objects: 100% (450/450), done.
+ Writing objects: 100% (1148/1148), done.
+ Total 1148 (delta 690), reused 1148 (delta 690)
+ [ perf record: Captured and wrote 0.267 MB perf.data (~11679 samples) ]
+
+To check which functions generated page allocations:
+
+ titan:~/git> perf report
+ # Samples: 10646
+ #
+ # Overhead Command Shared Object
+ # ........ ............... ..........................
+ #
+ 23.57% git-repack /lib64/libc-2.5.so
+ 21.81% git /lib64/libc-2.5.so
+ 14.59% git ./git
+ 11.79% git-repack ./git
+ 7.12% git /lib64/ld-2.5.so
+ 3.16% git-repack /lib64/libpthread-2.5.so
+ 2.09% git-repack /bin/bash
+ 1.97% rm /lib64/libc-2.5.so
+ 1.39% mv /lib64/ld-2.5.so
+ 1.37% mv /lib64/libc-2.5.so
+ 1.12% git-repack /lib64/ld-2.5.so
+ 0.95% rm /lib64/ld-2.5.so
+ 0.90% git-update-serv /lib64/libc-2.5.so
+ 0.73% git-update-serv /lib64/ld-2.5.so
+ 0.68% perf /lib64/libpthread-2.5.so
+ 0.64% git-repack /usr/lib64/libz.so.1.2.3
+
+Or to see it on a more finegrained level:
+
+titan:~/git> perf report --sort comm,dso,symbol
+# Samples: 10646
+#
+# Overhead Command Shared Object Symbol
+# ........ ............... .......................... ......
+#
+ 9.35% git-repack ./git [.] insert_obj_hash
+ 9.12% git ./git [.] insert_obj_hash
+ 7.31% git /lib64/libc-2.5.so [.] memcpy
+ 6.34% git-repack /lib64/libc-2.5.so [.] _int_malloc
+ 6.24% git-repack /lib64/libc-2.5.so [.] memcpy
+ 5.82% git-repack /lib64/libc-2.5.so [.] __GI___fork
+ 5.47% git /lib64/libc-2.5.so [.] _int_malloc
+ 2.99% git /lib64/libc-2.5.so [.] memset
+
+Furthermore, call-graph sampling can be done too, of page
+allocations - to see precisely what kind of page allocations there
+are:
+
+ titan:~/git> perf record -g -e kmem:mm_page_alloc -c 1 ./git gc
+ Counting objects: 1148, done.
+ Delta compression using up to 2 threads.
+ Compressing objects: 100% (450/450), done.
+ Writing objects: 100% (1148/1148), done.
+ Total 1148 (delta 690), reused 1148 (delta 690)
+ [ perf record: Captured and wrote 0.963 MB perf.data (~42069 samples) ]
+
+ titan:~/git> perf report -g
+ # Samples: 10686
+ #
+ # Overhead Command Shared Object
+ # ........ ............... ..........................
+ #
+ 23.25% git-repack /lib64/libc-2.5.so
+ |
+ |--50.00%-- _int_free
+ |
+ |--37.50%-- __GI___fork
+ | make_child
+ |
+ |--12.50%-- ptmalloc_unlock_all2
+ | make_child
+ |
+ --6.25%-- __GI_strcpy
+ 21.61% git /lib64/libc-2.5.so
+ |
+ |--30.00%-- __GI_read
+ | |
+ | --83.33%-- git_config_from_file
+ | git_config
+ | |
+ [...]
+
+Or you can observe the whole system's page allocations for 10
+seconds:
+
+titan:~/git> perf stat -a -e kmem:mm_page_pcpu_drain -e
+kmem:mm_page_alloc -e kmem:mm_page_free_batched -e
+kmem:mm_page_free sleep 10
+
+ Performance counter stats for 'sleep 10':
+
+ 171585 kmem:mm_page_pcpu_drain
+ 322114 kmem:mm_page_alloc
+ 73623 kmem:mm_page_free_batched
+ 254115 kmem:mm_page_free
+
+ 10.000591410 seconds time elapsed
+
+Or observe how fluctuating the page allocations are, via statistical
+analysis done over ten 1-second intervals:
+
+ titan:~/git> perf stat --repeat 10 -a -e kmem:mm_page_pcpu_drain -e
+ kmem:mm_page_alloc -e kmem:mm_page_free_batched -e
+ kmem:mm_page_free sleep 1
+
+ Performance counter stats for 'sleep 1' (10 runs):
+
+ 17254 kmem:mm_page_pcpu_drain ( +- 3.709% )
+ 34394 kmem:mm_page_alloc ( +- 4.617% )
+ 7509 kmem:mm_page_free_batched ( +- 4.820% )
+ 25653 kmem:mm_page_free ( +- 3.672% )
+
+ 1.058135029 seconds time elapsed ( +- 3.089% )
+
+Or you can annotate the recorded 'git gc' run on a per symbol basis
+and check which instructions/source-code generated page allocations:
+
+ titan:~/git> perf annotate __GI___fork
+ ------------------------------------------------
+ Percent | Source code & Disassembly of libc-2.5.so
+ ------------------------------------------------
+ :
+ :
+ : Disassembly of section .plt:
+ : Disassembly of section .text:
+ :
+ : 00000031a2e95560 <__fork>:
+ [...]
+ 0.00 : 31a2e95602: b8 38 00 00 00 mov $0x38,%eax
+ 0.00 : 31a2e95607: 0f 05 syscall
+ 83.42 : 31a2e95609: 48 3d 00 f0 ff ff cmp $0xfffffffffffff000,%rax
+ 0.00 : 31a2e9560f: 0f 87 4d 01 00 00 ja 31a2e95762 <__fork+0x202>
+ 0.00 : 31a2e95615: 85 c0 test %eax,%eax
+
+( this shows that 83.42% of __GI___fork's page allocations come from
+ the 0x38 system call it performs. )
+
+etc. etc. - a lot more is possible. I could list a dozen of
+other different usecases straight away - neither of which is
+possible via /proc/vmstat.
+
+/proc/vmstat is not in the same league really, in terms of
+expressive power of system analysis and performance
+analysis.
+
+All that the above results needed were those new tracepoints
+in include/tracing/events/kmem.h.
+
+ Ingo
+
+
diff --git a/tools/perf/Documentation/jit-interface.txt b/tools/perf/Documentation/jit-interface.txt
new file mode 100644
index 00000000000..a8656f56491
--- /dev/null
+++ b/tools/perf/Documentation/jit-interface.txt
@@ -0,0 +1,15 @@
+perf supports a simple JIT interface to resolve symbols for dynamic code generated
+by a JIT.
+
+The JIT has to write a /tmp/perf-%d.map (%d = pid of process) file
+
+This is a text file.
+
+Each line has the following format, fields separated with spaces:
+
+START SIZE symbolname
+
+START and SIZE are hex numbers without 0x.
+symbolname is the rest of the line, so it could contain special characters.
+
+The ownership of the file has to match the process.
diff --git a/tools/perf/Documentation/manpage-1.72.xsl b/tools/perf/Documentation/manpage-1.72.xsl
new file mode 100644
index 00000000000..b4d315cb8c4
--- /dev/null
+++ b/tools/perf/Documentation/manpage-1.72.xsl
@@ -0,0 +1,14 @@
+<!-- manpage-1.72.xsl:
+ special settings for manpages rendered from asciidoc+docbook
+ handles peculiarities in docbook-xsl 1.72.0 -->
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+ version="1.0">
+
+<xsl:import href="manpage-base.xsl"/>
+
+<!-- these are the special values for the roff control characters
+ needed for docbook-xsl 1.72.0 -->
+<xsl:param name="git.docbook.backslash">&#x2593;</xsl:param>
+<xsl:param name="git.docbook.dot" >&#x2302;</xsl:param>
+
+</xsl:stylesheet>
diff --git a/tools/perf/Documentation/manpage-base.xsl b/tools/perf/Documentation/manpage-base.xsl
new file mode 100644
index 00000000000..a264fa61609
--- /dev/null
+++ b/tools/perf/Documentation/manpage-base.xsl
@@ -0,0 +1,35 @@
+<!-- manpage-base.xsl:
+ special formatting for manpages rendered from asciidoc+docbook -->
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+ version="1.0">
+
+<!-- these params silence some output from xmlto -->
+<xsl:param name="man.output.quietly" select="1"/>
+<xsl:param name="refentry.meta.get.quietly" select="1"/>
+
+<!-- convert asciidoc callouts to man page format;
+ git.docbook.backslash and git.docbook.dot params
+ must be supplied by another XSL file or other means -->
+<xsl:template match="co">
+ <xsl:value-of select="concat(
+ $git.docbook.backslash,'fB(',
+ substring-after(@id,'-'),')',
+ $git.docbook.backslash,'fR')"/>
+</xsl:template>
+<xsl:template match="calloutlist">
+ <xsl:value-of select="$git.docbook.dot"/>
+ <xsl:text>sp&#10;</xsl:text>
+ <xsl:apply-templates/>
+ <xsl:text>&#10;</xsl:text>
+</xsl:template>
+<xsl:template match="callout">
+ <xsl:value-of select="concat(
+ $git.docbook.backslash,'fB',
+ substring-after(@arearefs,'-'),
+ '. ',$git.docbook.backslash,'fR')"/>
+ <xsl:apply-templates/>
+ <xsl:value-of select="$git.docbook.dot"/>
+ <xsl:text>br&#10;</xsl:text>
+</xsl:template>
+
+</xsl:stylesheet>
diff --git a/tools/perf/Documentation/manpage-bold-literal.xsl b/tools/perf/Documentation/manpage-bold-literal.xsl
new file mode 100644
index 00000000000..608eb5df628
--- /dev/null
+++ b/tools/perf/Documentation/manpage-bold-literal.xsl
@@ -0,0 +1,17 @@
+<!-- manpage-bold-literal.xsl:
+ special formatting for manpages rendered from asciidoc+docbook -->
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+ version="1.0">
+
+<!-- render literal text as bold (instead of plain or monospace);
+ this makes literal text easier to distinguish in manpages
+ viewed on a tty -->
+<xsl:template match="literal">
+ <xsl:value-of select="$git.docbook.backslash"/>
+ <xsl:text>fB</xsl:text>
+ <xsl:apply-templates/>
+ <xsl:value-of select="$git.docbook.backslash"/>
+ <xsl:text>fR</xsl:text>
+</xsl:template>
+
+</xsl:stylesheet>
diff --git a/tools/perf/Documentation/manpage-normal.xsl b/tools/perf/Documentation/manpage-normal.xsl
new file mode 100644
index 00000000000..a48f5b11f3d
--- /dev/null
+++ b/tools/perf/Documentation/manpage-normal.xsl
@@ -0,0 +1,13 @@
+<!-- manpage-normal.xsl:
+ special settings for manpages rendered from asciidoc+docbook
+ handles anything we want to keep away from docbook-xsl 1.72.0 -->
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+ version="1.0">
+
+<xsl:import href="manpage-base.xsl"/>
+
+<!-- these are the normal values for the roff control characters -->
+<xsl:param name="git.docbook.backslash">\</xsl:param>
+<xsl:param name="git.docbook.dot" >.</xsl:param>
+
+</xsl:stylesheet>
diff --git a/tools/perf/Documentation/manpage-suppress-sp.xsl b/tools/perf/Documentation/manpage-suppress-sp.xsl
new file mode 100644
index 00000000000..a63c7632a87
--- /dev/null
+++ b/tools/perf/Documentation/manpage-suppress-sp.xsl
@@ -0,0 +1,21 @@
+<!-- manpage-suppress-sp.xsl:
+ special settings for manpages rendered from asciidoc+docbook
+ handles erroneous, inline .sp in manpage output of some
+ versions of docbook-xsl -->
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+ version="1.0">
+
+<!-- attempt to work around spurious .sp at the tail of the line
+ that some versions of docbook stylesheets seem to add -->
+<xsl:template match="simpara">
+ <xsl:variable name="content">
+ <xsl:apply-templates/>
+ </xsl:variable>
+ <xsl:value-of select="normalize-space($content)"/>
+ <xsl:if test="not(ancestor::authorblurb) and
+ not(ancestor::personblurb)">
+ <xsl:text>&#10;&#10;</xsl:text>
+ </xsl:if>
+</xsl:template>
+
+</xsl:stylesheet>
diff --git a/tools/perf/Documentation/perf-annotate.txt b/tools/perf/Documentation/perf-annotate.txt
new file mode 100644
index 00000000000..e9cd39a92dc
--- /dev/null
+++ b/tools/perf/Documentation/perf-annotate.txt
@@ -0,0 +1,101 @@
+perf-annotate(1)
+================
+
+NAME
+----
+perf-annotate - Read perf.data (created by perf record) and display annotated code
+
+SYNOPSIS
+--------
+[verse]
+'perf annotate' [-i <file> | --input=file] [symbol_name]
+
+DESCRIPTION
+-----------
+This command reads the input file and displays an annotated version of the
+code. If the object file has debug symbols then the source code will be
+displayed alongside assembly code.
+
+If there is no debug info in the object, then annotated assembly is displayed.
+
+OPTIONS
+-------
+-i::
+--input=::
+ Input file name. (default: perf.data unless stdin is a fifo)
+
+-d::
+--dsos=<dso[,dso...]>::
+ Only consider symbols in these dsos.
+-s::
+--symbol=<symbol>::
+ Symbol to annotate.
+
+-f::
+--force::
+ Don't complain, do it.
+
+-v::
+--verbose::
+ Be more verbose. (Show symbol address, etc)
+
+-D::
+--dump-raw-trace::
+ Dump raw trace in ASCII.
+
+-k::
+--vmlinux=<file>::
+ vmlinux pathname.
+
+-m::
+--modules::
+ Load module symbols. WARNING: use only with -k and LIVE kernel.
+
+-l::
+--print-line::
+ Print matching source lines (may be slow).
+
+-P::
+--full-paths::
+ Don't shorten the displayed pathnames.
+
+--stdio:: Use the stdio interface.
+
+--tui:: Use the TUI interface. Use of --tui requires a tty, if one is not
+ present, as when piping to other commands, the stdio interface is
+ used. This interfaces starts by centering on the line with more
+ samples, TAB/UNTAB cycles through the lines with more samples.
+
+--gtk:: Use the GTK interface.
+
+-C::
+--cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can
+ be provided as a comma-separated list with no space: 0,1. Ranges of
+ CPUs are specified with -: 0-2. Default is to report samples on all
+ CPUs.
+
+--asm-raw::
+ Show raw instruction encoding of assembly instructions.
+
+--source::
+ Interleave source code with assembly code. Enabled by default,
+ disable with --no-source.
+
+--symfs=<directory>::
+ Look for files with symbols relative to this directory.
+
+-M::
+--disassembler-style=:: Set disassembler style for objdump.
+
+--objdump=<path>::
+ Path to objdump binary.
+
+--skip-missing::
+ Skip symbols that cannot be annotated.
+
+--group::
+ Show event group information together
+
+SEE ALSO
+--------
+linkperf:perf-record[1], linkperf:perf-report[1]
diff --git a/tools/perf/Documentation/perf-archive.txt b/tools/perf/Documentation/perf-archive.txt
new file mode 100644
index 00000000000..ac6ecbb3e66
--- /dev/null
+++ b/tools/perf/Documentation/perf-archive.txt
@@ -0,0 +1,22 @@
+perf-archive(1)
+===============
+
+NAME
+----
+perf-archive - Create archive with object files with build-ids found in perf.data file
+
+SYNOPSIS
+--------
+[verse]
+'perf archive' [file]
+
+DESCRIPTION
+-----------
+This command runs perf-buildid-list --with-hits, and collects the files with the
+buildids found so that analysis of perf.data contents can be possible on another
+machine.
+
+
+SEE ALSO
+--------
+linkperf:perf-record[1], linkperf:perf-buildid-list[1], linkperf:perf-report[1]
diff --git a/tools/perf/Documentation/perf-bench.txt b/tools/perf/Documentation/perf-bench.txt
new file mode 100644
index 00000000000..4464ad770d5
--- /dev/null
+++ b/tools/perf/Documentation/perf-bench.txt
@@ -0,0 +1,214 @@
+perf-bench(1)
+=============
+
+NAME
+----
+perf-bench - General framework for benchmark suites
+
+SYNOPSIS
+--------
+[verse]
+'perf bench' [<common options>] <subsystem> <suite> [<options>]
+
+DESCRIPTION
+-----------
+This 'perf bench' command is a general framework for benchmark suites.
+
+COMMON OPTIONS
+--------------
+-f::
+--format=::
+Specify format style.
+Current available format styles are:
+
+'default'::
+Default style. This is mainly for human reading.
+---------------------
+% perf bench sched pipe # with no style specified
+(executing 1000000 pipe operations between two tasks)
+ Total time:5.855 sec
+ 5.855061 usecs/op
+ 170792 ops/sec
+---------------------
+
+'simple'::
+This simple style is friendly for automated
+processing by scripts.
+---------------------
+% perf bench --format=simple sched pipe # specified simple
+5.988
+---------------------
+
+SUBSYSTEM
+---------
+
+'sched'::
+ Scheduler and IPC mechanisms.
+
+'mem'::
+ Memory access performance.
+
+'numa'::
+ NUMA scheduling and MM benchmarks.
+
+'futex'::
+ Futex stressing benchmarks.
+
+'all'::
+ All benchmark subsystems.
+
+SUITES FOR 'sched'
+~~~~~~~~~~~~~~~~~~
+*messaging*::
+Suite for evaluating performance of scheduler and IPC mechanisms.
+Based on hackbench by Rusty Russell.
+
+Options of *messaging*
+^^^^^^^^^^^^^^^^^^^^^^
+-p::
+--pipe::
+Use pipe() instead of socketpair()
+
+-t::
+--thread::
+Be multi thread instead of multi process
+
+-g::
+--group=::
+Specify number of groups
+
+-l::
+--loop=::
+Specify number of loops
+
+Example of *messaging*
+^^^^^^^^^^^^^^^^^^^^^^
+
+---------------------
+% perf bench sched messaging # run with default
+options (20 sender and receiver processes per group)
+(10 groups == 400 processes run)
+
+ Total time:0.308 sec
+
+% perf bench sched messaging -t -g 20 # be multi-thread, with 20 groups
+(20 sender and receiver threads per group)
+(20 groups == 800 threads run)
+
+ Total time:0.582 sec
+---------------------
+
+*pipe*::
+Suite for pipe() system call.
+Based on pipe-test-1m.c by Ingo Molnar.
+
+Options of *pipe*
+^^^^^^^^^^^^^^^^^
+-l::
+--loop=::
+Specify number of loops.
+
+Example of *pipe*
+^^^^^^^^^^^^^^^^^
+
+---------------------
+% perf bench sched pipe
+(executing 1000000 pipe operations between two tasks)
+
+ Total time:8.091 sec
+ 8.091833 usecs/op
+ 123581 ops/sec
+
+% perf bench sched pipe -l 1000 # loop 1000
+(executing 1000 pipe operations between two tasks)
+
+ Total time:0.016 sec
+ 16.948000 usecs/op
+ 59004 ops/sec
+---------------------
+
+SUITES FOR 'mem'
+~~~~~~~~~~~~~~~~
+*memcpy*::
+Suite for evaluating performance of simple memory copy in various ways.
+
+Options of *memcpy*
+^^^^^^^^^^^^^^^^^^^
+-l::
+--length::
+Specify length of memory to copy (default: 1MB).
+Available units are B, KB, MB, GB and TB (case insensitive).
+
+-r::
+--routine::
+Specify routine to copy (default: default).
+Available routines are depend on the architecture.
+On x86-64, x86-64-unrolled, x86-64-movsq and x86-64-movsb are supported.
+
+-i::
+--iterations::
+Repeat memcpy invocation this number of times.
+
+-c::
+--cycle::
+Use perf's cpu-cycles event instead of gettimeofday syscall.
+
+-o::
+--only-prefault::
+Show only the result with page faults before memcpy.
+
+-n::
+--no-prefault::
+Show only the result without page faults before memcpy.
+
+*memset*::
+Suite for evaluating performance of simple memory set in various ways.
+
+Options of *memset*
+^^^^^^^^^^^^^^^^^^^
+-l::
+--length::
+Specify length of memory to set (default: 1MB).
+Available units are B, KB, MB, GB and TB (case insensitive).
+
+-r::
+--routine::
+Specify routine to set (default: default).
+Available routines are depend on the architecture.
+On x86-64, x86-64-unrolled, x86-64-stosq and x86-64-stosb are supported.
+
+-i::
+--iterations::
+Repeat memset invocation this number of times.
+
+-c::
+--cycle::
+Use perf's cpu-cycles event instead of gettimeofday syscall.
+
+-o::
+--only-prefault::
+Show only the result with page faults before memset.
+
+-n::
+--no-prefault::
+Show only the result without page faults before memset.
+
+SUITES FOR 'numa'
+~~~~~~~~~~~~~~~~~
+*mem*::
+Suite for evaluating NUMA workloads.
+
+SUITES FOR 'futex'
+~~~~~~~~~~~~~~~~~~
+*hash*::
+Suite for evaluating hash tables.
+
+*wake*::
+Suite for evaluating wake calls.
+
+*requeue*::
+Suite for evaluating requeue calls.
+
+SEE ALSO
+--------
+linkperf:perf[1]
diff --git a/tools/perf/Documentation/perf-buildid-cache.txt b/tools/perf/Documentation/perf-buildid-cache.txt
new file mode 100644
index 00000000000..fd77d81ea74
--- /dev/null
+++ b/tools/perf/Documentation/perf-buildid-cache.txt
@@ -0,0 +1,53 @@
+perf-buildid-cache(1)
+=====================
+
+NAME
+----
+perf-buildid-cache - Manage build-id cache.
+
+SYNOPSIS
+--------
+[verse]
+'perf buildid-cache <options>'
+
+DESCRIPTION
+-----------
+This command manages the build-id cache. It can add and remove files to/from
+the cache. In the future it should as well purge older entries, set upper
+limits for the space used by the cache, etc.
+
+OPTIONS
+-------
+-a::
+--add=::
+ Add specified file to the cache.
+-k::
+--kcore::
+ Add specified kcore file to the cache. For the current host that is
+ /proc/kcore which requires root permissions to read. Be aware that
+ running 'perf buildid-cache' as root may update root's build-id cache
+ not the user's. Use the -v option to see where the file is created.
+ Note that the copied file contains only code sections not the whole core
+ image. Note also that files "kallsyms" and "modules" must also be in the
+ same directory and are also copied. All 3 files are created with read
+ permissions for root only. kcore will not be added if there is already a
+ kcore in the cache (with the same build-id) that has the same modules at
+ the same addresses. Use the -v option to see if a copy of kcore is
+ actually made.
+-r::
+--remove=::
+ Remove specified file from the cache.
+-M::
+--missing=::
+ List missing build ids in the cache for the specified file.
+-u::
+--update::
+ Update specified file of the cache. It can be used to update kallsyms
+ kernel dso to vmlinux in order to support annotation.
+-v::
+--verbose::
+ Be more verbose.
+
+SEE ALSO
+--------
+linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-buildid-list[1]
diff --git a/tools/perf/Documentation/perf-buildid-list.txt b/tools/perf/Documentation/perf-buildid-list.txt
new file mode 100644
index 00000000000..25c52efcc7f
--- /dev/null
+++ b/tools/perf/Documentation/perf-buildid-list.txt
@@ -0,0 +1,43 @@
+perf-buildid-list(1)
+====================
+
+NAME
+----
+perf-buildid-list - List the buildids in a perf.data file
+
+SYNOPSIS
+--------
+[verse]
+'perf buildid-list <options>'
+
+DESCRIPTION
+-----------
+This command displays the buildids found in a perf.data file, so that other
+tools can be used to fetch packages with matching symbol tables for use by
+perf report.
+
+It can also be used to show the build id of the running kernel or in an ELF
+file using -i/--input.
+
+OPTIONS
+-------
+-H::
+--with-hits::
+ Show only DSOs with hits.
+-i::
+--input=::
+ Input file name. (default: perf.data unless stdin is a fifo)
+-f::
+--force::
+ Don't do ownership validation.
+-k::
+--kernel::
+ Show running kernel build id.
+-v::
+--verbose::
+ Be more verbose.
+
+SEE ALSO
+--------
+linkperf:perf-record[1], linkperf:perf-top[1],
+linkperf:perf-report[1]
diff --git a/tools/perf/Documentation/perf-diff.txt b/tools/perf/Documentation/perf-diff.txt
new file mode 100644
index 00000000000..b3b8abae62b
--- /dev/null
+++ b/tools/perf/Documentation/perf-diff.txt
@@ -0,0 +1,206 @@
+perf-diff(1)
+============
+
+NAME
+----
+perf-diff - Read perf.data files and display the differential profile
+
+SYNOPSIS
+--------
+[verse]
+'perf diff' [baseline file] [data file1] [[data file2] ... ]
+
+DESCRIPTION
+-----------
+This command displays the performance difference amongst two or more perf.data
+files captured via perf record.
+
+If no parameters are passed it will assume perf.data.old and perf.data.
+
+The differential profile is displayed only for events matching both
+specified perf.data files.
+
+OPTIONS
+-------
+-D::
+--dump-raw-trace::
+ Dump raw trace in ASCII.
+
+-m::
+--modules::
+ Load module symbols. WARNING: use only with -k and LIVE kernel
+
+-d::
+--dsos=::
+ Only consider symbols in these dsos. CSV that understands
+ file://filename entries. This option will affect the percentage
+ of the Baseline/Delta column. See --percentage for more info.
+
+-C::
+--comms=::
+ Only consider symbols in these comms. CSV that understands
+ file://filename entries. This option will affect the percentage
+ of the Baseline/Delta column. See --percentage for more info.
+
+-S::
+--symbols=::
+ Only consider these symbols. CSV that understands
+ file://filename entries. This option will affect the percentage
+ of the Baseline/Delta column. See --percentage for more info.
+
+-s::
+--sort=::
+ Sort by key(s): pid, comm, dso, symbol, cpu, parent, srcline.
+ Please see description of --sort in the perf-report man page.
+
+-t::
+--field-separator=::
+
+ Use a special separator character and don't pad with spaces, replacing
+ all occurrences of this separator in symbol names (and other output)
+ with a '.' character, that thus it's the only non valid separator.
+
+-v::
+--verbose::
+ Be verbose, for instance, show the raw counts in addition to the
+ diff.
+
+-f::
+--force::
+ Don't complain, do it.
+
+--symfs=<directory>::
+ Look for files with symbols relative to this directory.
+
+-b::
+--baseline-only::
+ Show only items with match in baseline.
+
+-c::
+--compute::
+ Differential computation selection - delta,ratio,wdiff (default is delta).
+ See COMPARISON METHODS section for more info.
+
+-p::
+--period::
+ Show period values for both compared hist entries.
+
+-F::
+--formula::
+ Show formula for given computation.
+
+-o::
+--order::
+ Specify compute sorting column number.
+
+--percentage::
+ Determine how to display the overhead percentage of filtered entries.
+ Filters can be applied by --comms, --dsos and/or --symbols options.
+
+ "relative" means it's relative to filtered entries only so that the
+ sum of shown entries will be always 100%. "absolute" means it retains
+ the original value before and after the filter is applied.
+
+COMPARISON
+----------
+The comparison is governed by the baseline file. The baseline perf.data
+file is iterated for samples. All other perf.data files specified on
+the command line are searched for the baseline sample pair. If the pair
+is found, specified computation is made and result is displayed.
+
+All samples from non-baseline perf.data files, that do not match any
+baseline entry, are displayed with empty space within baseline column
+and possible computation results (delta) in their related column.
+
+Example files samples:
+- file A with samples f1, f2, f3, f4, f6
+- file B with samples f2, f4, f5
+- file C with samples f1, f2, f5
+
+Example output:
+ x - computation takes place for pair
+ b - baseline sample percentage
+
+- perf diff A B C
+
+ baseline/A compute/B compute/C samples
+ ---------------------------------------
+ b x f1
+ b x x f2
+ b f3
+ b x f4
+ b f6
+ x x f5
+
+- perf diff B A C
+
+ baseline/B compute/A compute/C samples
+ ---------------------------------------
+ b x x f2
+ b x f4
+ b x f5
+ x x f1
+ x f3
+ x f6
+
+- perf diff C B A
+
+ baseline/C compute/B compute/A samples
+ ---------------------------------------
+ b x f1
+ b x x f2
+ b x f5
+ x f3
+ x x f4
+ x f6
+
+COMPARISON METHODS
+------------------
+delta
+~~~~~
+If specified the 'Delta' column is displayed with value 'd' computed as:
+
+ d = A->period_percent - B->period_percent
+
+with:
+ - A/B being matching hist entry from data/baseline file specified
+ (or perf.data/perf.data.old) respectively.
+
+ - period_percent being the % of the hist entry period value within
+ single data file
+
+ - with filtering by -C, -d and/or -S, period_percent might be changed
+ relative to how entries are filtered. Use --percentage=absolute to
+ prevent such fluctuation.
+
+ratio
+~~~~~
+If specified the 'Ratio' column is displayed with value 'r' computed as:
+
+ r = A->period / B->period
+
+with:
+ - A/B being matching hist entry from data/baseline file specified
+ (or perf.data/perf.data.old) respectively.
+
+ - period being the hist entry period value
+
+wdiff:WEIGHT-B,WEIGHT-A
+~~~~~~~~~~~~~~~~~~~~~~~
+If specified the 'Weighted diff' column is displayed with value 'd' computed as:
+
+ d = B->period * WEIGHT-A - A->period * WEIGHT-B
+
+ - A/B being matching hist entry from data/baseline file specified
+ (or perf.data/perf.data.old) respectively.
+
+ - period being the hist entry period value
+
+ - WEIGHT-A/WEIGHT-B being user suplied weights in the the '-c' option
+ behind ':' separator like '-c wdiff:1,2'.
+ - WIEGHT-A being the weight of the data file
+ - WIEGHT-B being the weight of the baseline data file
+
+SEE ALSO
+--------
+linkperf:perf-record[1], linkperf:perf-report[1]
diff --git a/tools/perf/Documentation/perf-evlist.txt b/tools/perf/Documentation/perf-evlist.txt
new file mode 100644
index 00000000000..1ceb3700ffb
--- /dev/null
+++ b/tools/perf/Documentation/perf-evlist.txt
@@ -0,0 +1,38 @@
+perf-evlist(1)
+==============
+
+NAME
+----
+perf-evlist - List the event names in a perf.data file
+
+SYNOPSIS
+--------
+[verse]
+'perf evlist <options>'
+
+DESCRIPTION
+-----------
+This command displays the names of events sampled in a perf.data file.
+
+OPTIONS
+-------
+-i::
+--input=::
+ Input file name. (default: perf.data unless stdin is a fifo)
+
+-F::
+--freq=::
+ Show just the sample frequency used for each event.
+
+-v::
+--verbose=::
+ Show all fields.
+
+-g::
+--group::
+ Show event group information.
+
+SEE ALSO
+--------
+linkperf:perf-record[1], linkperf:perf-list[1],
+linkperf:perf-report[1]
diff --git a/tools/perf/Documentation/perf-help.txt b/tools/perf/Documentation/perf-help.txt
new file mode 100644
index 00000000000..514391818d1
--- /dev/null
+++ b/tools/perf/Documentation/perf-help.txt
@@ -0,0 +1,38 @@
+perf-help(1)
+============
+
+NAME
+----
+perf-help - display help information about perf
+
+SYNOPSIS
+--------
+'perf help' [-a|--all] [COMMAND]
+
+DESCRIPTION
+-----------
+
+With no options and no COMMAND given, the synopsis of the 'perf'
+command and a list of the most commonly used perf commands are printed
+on the standard output.
+
+If the option '--all' or '-a' is given, then all available commands are
+printed on the standard output.
+
+If a perf command is named, a manual page for that command is brought
+up. The 'man' program is used by default for this purpose, but this
+can be overridden by other options or configuration variables.
+
+Note that `perf --help ...` is identical to `perf help ...` because the
+former is internally converted into the latter.
+
+OPTIONS
+-------
+-a::
+--all::
+ Prints all the available commands on the standard output. This
+ option supersedes any other option.
+
+PERF
+----
+Part of the linkperf:perf[1] suite
diff --git a/tools/perf/Documentation/perf-inject.txt b/tools/perf/Documentation/perf-inject.txt
new file mode 100644
index 00000000000..a00a34276c5
--- /dev/null
+++ b/tools/perf/Documentation/perf-inject.txt
@@ -0,0 +1,46 @@
+perf-inject(1)
+==============
+
+NAME
+----
+perf-inject - Filter to augment the events stream with additional information
+
+SYNOPSIS
+--------
+[verse]
+'perf inject <options>'
+
+DESCRIPTION
+-----------
+perf-inject reads a perf-record event stream and repipes it to stdout. At any
+point the processing code can inject other events into the event stream - in
+this case build-ids (-b option) are read and injected as needed into the event
+stream.
+
+Build-ids are just the first user of perf-inject - potentially anything that
+needs userspace processing to augment the events stream with additional
+information could make use of this facility.
+
+OPTIONS
+-------
+-b::
+--build-ids=::
+ Inject build-ids into the output stream
+-v::
+--verbose::
+ Be more verbose.
+-i::
+--input=::
+ Input file name. (default: stdin)
+-o::
+--output=::
+ Output file name. (default: stdout)
+-s::
+--sched-stat::
+ Merge sched_stat and sched_switch for getting events where and how long
+ tasks slept. sched_switch contains a callchain where a task slept and
+ sched_stat contains a timeslice how long a task slept.
+
+SEE ALSO
+--------
+linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-archive[1]
diff --git a/tools/perf/Documentation/perf-kmem.txt b/tools/perf/Documentation/perf-kmem.txt
new file mode 100644
index 00000000000..7c8fbbf3f61
--- /dev/null
+++ b/tools/perf/Documentation/perf-kmem.txt
@@ -0,0 +1,47 @@
+perf-kmem(1)
+============
+
+NAME
+----
+perf-kmem - Tool to trace/measure kernel memory(slab) properties
+
+SYNOPSIS
+--------
+[verse]
+'perf kmem' {record|stat} [<options>]
+
+DESCRIPTION
+-----------
+There are two variants of perf kmem:
+
+ 'perf kmem record <command>' to record the kmem events
+ of an arbitrary workload.
+
+ 'perf kmem stat' to report kernel memory statistics.
+
+OPTIONS
+-------
+-i <file>::
+--input=<file>::
+ Select the input file (default: perf.data unless stdin is a fifo)
+
+--caller::
+ Show per-callsite statistics
+
+--alloc::
+ Show per-allocation statistics
+
+-s <key[,key2...]>::
+--sort=<key[,key2...]>::
+ Sort the output (default: frag,hit,bytes)
+
+-l <num>::
+--line=<num>::
+ Print n lines only
+
+--raw-ip::
+ Print raw ip instead of symbol
+
+SEE ALSO
+--------
+linkperf:perf-record[1]
diff --git a/tools/perf/Documentation/perf-kvm.txt b/tools/perf/Documentation/perf-kvm.txt
new file mode 100644
index 00000000000..52276a6d2b7
--- /dev/null
+++ b/tools/perf/Documentation/perf-kvm.txt
@@ -0,0 +1,156 @@
+perf-kvm(1)
+===========
+
+NAME
+----
+perf-kvm - Tool to trace/measure kvm guest os
+
+SYNOPSIS
+--------
+[verse]
+'perf kvm' [--host] [--guest] [--guestmount=<path>
+ [--guestkallsyms=<path> --guestmodules=<path> | --guestvmlinux=<path>]]
+ {top|record|report|diff|buildid-list} [<options>]
+'perf kvm' [--host] [--guest] [--guestkallsyms=<path> --guestmodules=<path>
+ | --guestvmlinux=<path>] {top|record|report|diff|buildid-list|stat} [<options>]
+'perf kvm stat [record|report|live] [<options>]
+
+DESCRIPTION
+-----------
+There are a couple of variants of perf kvm:
+
+ 'perf kvm [options] top <command>' to generates and displays
+ a performance counter profile of guest os in realtime
+ of an arbitrary workload.
+
+ 'perf kvm record <command>' to record the performance counter profile
+ of an arbitrary workload and save it into a perf data file. We set the
+ default behavior of perf kvm as --guest, so if neither --host nor --guest
+ is input, the perf data file name is perf.data.guest. If --host is input,
+ the perf data file name is perf.data.kvm. If you want to record data into
+ perf.data.host, please input --host --no-guest. The behaviors are shown as
+ following:
+ Default('') -> perf.data.guest
+ --host -> perf.data.kvm
+ --guest -> perf.data.guest
+ --host --guest -> perf.data.kvm
+ --host --no-guest -> perf.data.host
+
+ 'perf kvm report' to display the performance counter profile information
+ recorded via perf kvm record.
+
+ 'perf kvm diff' to displays the performance difference amongst two perf.data
+ files captured via perf record.
+
+ 'perf kvm buildid-list' to display the buildids found in a perf data file,
+ so that other tools can be used to fetch packages with matching symbol tables
+ for use by perf report. As buildid is read from /sys/kernel/notes in os, then
+ if you want to list the buildid for guest, please make sure your perf data file
+ was captured with --guestmount in perf kvm record.
+
+ 'perf kvm stat <command>' to run a command and gather performance counter
+ statistics.
+ Especially, perf 'kvm stat record/report' generates a statistical analysis
+ of KVM events. Currently, vmexit, mmio and ioport events are supported.
+ 'perf kvm stat record <command>' records kvm events and the events between
+ start and end <command>.
+ And this command produces a file which contains tracing results of kvm
+ events.
+
+ 'perf kvm stat report' reports statistical data which includes events
+ handled time, samples, and so on.
+
+ 'perf kvm stat live' reports statistical data in a live mode (similar to
+ record + report but with statistical data updated live at a given display
+ rate).
+
+OPTIONS
+-------
+-i::
+--input=<path>::
+ Input file name.
+-o::
+--output=<path>::
+ Output file name.
+--host::
+ Collect host side performance profile.
+--guest::
+ Collect guest side performance profile.
+--guestmount=<path>::
+ Guest os root file system mount directory. Users mounts guest os
+ root directories under <path> by a specific filesystem access method,
+ typically, sshfs. For example, start 2 guest os. The one's pid is 8888
+ and the other's is 9999.
+ #mkdir ~/guestmount; cd ~/guestmount
+ #sshfs -o allow_other,direct_io -p 5551 localhost:/ 8888/
+ #sshfs -o allow_other,direct_io -p 5552 localhost:/ 9999/
+ #perf kvm --host --guest --guestmount=~/guestmount top
+--guestkallsyms=<path>::
+ Guest os /proc/kallsyms file copy. 'perf' kvm' reads it to get guest
+ kernel symbols. Users copy it out from guest os.
+--guestmodules=<path>::
+ Guest os /proc/modules file copy. 'perf' kvm' reads it to get guest
+ kernel module information. Users copy it out from guest os.
+--guestvmlinux=<path>::
+ Guest os kernel vmlinux.
+-v::
+--verbose::
+ Be more verbose (show counter open errors, etc).
+
+STAT REPORT OPTIONS
+-------------------
+--vcpu=<value>::
+ analyze events which occures on this vcpu. (default: all vcpus)
+
+--event=<value>::
+ event to be analyzed. Possible values: vmexit, mmio, ioport.
+ (default: vmexit)
+-k::
+--key=<value>::
+ Sorting key. Possible values: sample (default, sort by samples
+ number), time (sort by average time).
+-p::
+--pid=::
+ Analyze events only for given process ID(s) (comma separated list).
+
+STAT LIVE OPTIONS
+-----------------
+-d::
+--display::
+ Time in seconds between display updates
+
+-m::
+--mmap-pages=::
+ Number of mmap data pages (must be a power of two) or size
+ specification with appended unit character - B/K/M/G. The
+ size is rounded up to have nearest pages power of two value.
+
+-a::
+--all-cpus::
+ System-wide collection from all CPUs.
+
+-p::
+--pid=::
+ Analyze events only for given process ID(s) (comma separated list).
+
+--vcpu=<value>::
+ analyze events which occures on this vcpu. (default: all vcpus)
+
+
+--event=<value>::
+ event to be analyzed. Possible values: vmexit, mmio, ioport.
+ (default: vmexit)
+
+-k::
+--key=<value>::
+ Sorting key. Possible values: sample (default, sort by samples
+ number), time (sort by average time).
+
+--duration=<value>::
+ Show events other than HLT that take longer than duration usecs.
+
+SEE ALSO
+--------
+linkperf:perf-top[1], linkperf:perf-record[1], linkperf:perf-report[1],
+linkperf:perf-diff[1], linkperf:perf-buildid-list[1],
+linkperf:perf-stat[1]
diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt
new file mode 100644
index 00000000000..6fce6a62220
--- /dev/null
+++ b/tools/perf/Documentation/perf-list.txt
@@ -0,0 +1,122 @@
+perf-list(1)
+============
+
+NAME
+----
+perf-list - List all symbolic event types
+
+SYNOPSIS
+--------
+[verse]
+'perf list' [hw|sw|cache|tracepoint|pmu|event_glob]
+
+DESCRIPTION
+-----------
+This command displays the symbolic event types which can be selected in the
+various perf commands with the -e option.
+
+[[EVENT_MODIFIERS]]
+EVENT MODIFIERS
+---------------
+
+Events can optionally have a modifer by appending a colon and one or
+more modifiers. Modifiers allow the user to restrict the events to be
+counted. The following modifiers exist:
+
+ u - user-space counting
+ k - kernel counting
+ h - hypervisor counting
+ G - guest counting (in KVM guests)
+ H - host counting (not in KVM guests)
+ p - precise level
+ S - read sample value (PERF_SAMPLE_READ)
+ D - pin the event to the PMU
+
+The 'p' modifier can be used for specifying how precise the instruction
+address should be. The 'p' modifier can be specified multiple times:
+
+ 0 - SAMPLE_IP can have arbitrary skid
+ 1 - SAMPLE_IP must have constant skid
+ 2 - SAMPLE_IP requested to have 0 skid
+ 3 - SAMPLE_IP must have 0 skid
+
+For Intel systems precise event sampling is implemented with PEBS
+which supports up to precise-level 2.
+
+On AMD systems it is implemented using IBS (up to precise-level 2).
+The precise modifier works with event types 0x76 (cpu-cycles, CPU
+clocks not halted) and 0xC1 (micro-ops retired). Both events map to
+IBS execution sampling (IBS op) with the IBS Op Counter Control bit
+(IbsOpCntCtl) set respectively (see AMD64 Architecture Programmer’s
+Manual Volume 2: System Programming, 13.3 Instruction-Based
+Sampling). Examples to use IBS:
+
+ perf record -a -e cpu-cycles:p ... # use ibs op counting cycles
+ perf record -a -e r076:p ... # same as -e cpu-cycles:p
+ perf record -a -e r0C1:p ... # use ibs op counting micro-ops
+
+RAW HARDWARE EVENT DESCRIPTOR
+-----------------------------
+Even when an event is not available in a symbolic form within perf right now,
+it can be encoded in a per processor specific way.
+
+For instance For x86 CPUs NNN represents the raw register encoding with the
+layout of IA32_PERFEVTSELx MSRs (see [Intel® 64 and IA-32 Architectures Software Developer's Manual Volume 3B: System Programming Guide] Figure 30-1 Layout
+of IA32_PERFEVTSELx MSRs) or AMD's PerfEvtSeln (see [AMD64 Architecture Programmer’s Manual Volume 2: System Programming], Page 344,
+Figure 13-7 Performance Event-Select Register (PerfEvtSeln)).
+
+Note: Only the following bit fields can be set in x86 counter
+registers: event, umask, edge, inv, cmask. Esp. guest/host only and
+OS/user mode flags must be setup using <<EVENT_MODIFIERS, EVENT
+MODIFIERS>>.
+
+Example:
+
+If the Intel docs for a QM720 Core i7 describe an event as:
+
+ Event Umask Event Mask
+ Num. Value Mnemonic Description Comment
+
+ A8H 01H LSD.UOPS Counts the number of micro-ops Use cmask=1 and
+ delivered by loop stream detector invert to count
+ cycles
+
+raw encoding of 0x1A8 can be used:
+
+ perf stat -e r1a8 -a sleep 1
+ perf record -e r1a8 ...
+
+You should refer to the processor specific documentation for getting these
+details. Some of them are referenced in the SEE ALSO section below.
+
+OPTIONS
+-------
+
+Without options all known events will be listed.
+
+To limit the list use:
+
+. 'hw' or 'hardware' to list hardware events such as cache-misses, etc.
+
+. 'sw' or 'software' to list software events such as context switches, etc.
+
+. 'cache' or 'hwcache' to list hardware cache events such as L1-dcache-loads, etc.
+
+. 'tracepoint' to list all tracepoint events, alternatively use
+ 'subsys_glob:event_glob' to filter by tracepoint subsystems such as sched,
+ block, etc.
+
+. 'pmu' to print the kernel supplied PMU events.
+
+. If none of the above is matched, it will apply the supplied glob to all
+ events, printing the ones that match.
+
+One or more types can be used at the same time, listing the events for the
+types specified.
+
+SEE ALSO
+--------
+linkperf:perf-stat[1], linkperf:perf-top[1],
+linkperf:perf-record[1],
+http://www.intel.com/Assets/PDF/manual/253669.pdf[Intel® 64 and IA-32 Architectures Software Developer's Manual Volume 3B: System Programming Guide],
+http://support.amd.com/us/Processor_TechDocs/24593_APM_v2.pdf[AMD64 Architecture Programmer’s Manual Volume 2: System Programming]
diff --git a/tools/perf/Documentation/perf-lock.txt b/tools/perf/Documentation/perf-lock.txt
new file mode 100644
index 00000000000..ab25be28c9d
--- /dev/null
+++ b/tools/perf/Documentation/perf-lock.txt
@@ -0,0 +1,66 @@
+perf-lock(1)
+============
+
+NAME
+----
+perf-lock - Analyze lock events
+
+SYNOPSIS
+--------
+[verse]
+'perf lock' {record|report|script|info}
+
+DESCRIPTION
+-----------
+You can analyze various lock behaviours
+and statistics with this 'perf lock' command.
+
+ 'perf lock record <command>' records lock events
+ between start and end <command>. And this command
+ produces the file "perf.data" which contains tracing
+ results of lock events.
+
+ 'perf lock report' reports statistical data.
+
+ 'perf lock script' shows raw lock events.
+
+ 'perf lock info' shows metadata like threads or addresses
+ of lock instances.
+
+COMMON OPTIONS
+--------------
+
+-i::
+--input=<file>::
+ Input file name. (default: perf.data unless stdin is a fifo)
+
+-v::
+--verbose::
+ Be more verbose (show symbol address, etc).
+
+-D::
+--dump-raw-trace::
+ Dump raw trace in ASCII.
+
+REPORT OPTIONS
+--------------
+
+-k::
+--key=<value>::
+ Sorting key. Possible values: acquired (default), contended,
+ avg_wait, wait_total, wait_max, wait_min.
+
+INFO OPTIONS
+------------
+
+-t::
+--threads::
+ dump thread list in perf.data
+
+-m::
+--map::
+ dump map of lock instances (address:name table)
+
+SEE ALSO
+--------
+linkperf:perf[1]
diff --git a/tools/perf/Documentation/perf-mem.txt b/tools/perf/Documentation/perf-mem.txt
new file mode 100644
index 00000000000..1d78a4064da
--- /dev/null
+++ b/tools/perf/Documentation/perf-mem.txt
@@ -0,0 +1,52 @@
+perf-mem(1)
+===========
+
+NAME
+----
+perf-mem - Profile memory accesses
+
+SYNOPSIS
+--------
+[verse]
+'perf mem' [<options>] (record [<command>] | report)
+
+DESCRIPTION
+-----------
+"perf mem -t <TYPE> record" runs a command and gathers memory operation data
+from it, into perf.data. Perf record options are accepted and are passed through.
+
+"perf mem -t <TYPE> report" displays the result. It invokes perf report with the
+right set of options to display a memory access profile.
+
+Note that on Intel systems the memory latency reported is the use-latency,
+not the pure load (or store latency). Use latency includes any pipeline
+queueing delays in addition to the memory subsystem latency.
+
+OPTIONS
+-------
+<command>...::
+ Any command you can specify in a shell.
+
+-t::
+--type=::
+ Select the memory operation type: load or store (default: load)
+
+-D::
+--dump-raw-samples=::
+ Dump the raw decoded samples on the screen in a format that is easy to parse with
+ one sample per line.
+
+-x::
+--field-separator::
+ Specify the field separator used when dump raw samples (-D option). By default,
+ The separator is the space character.
+
+-C::
+--cpu-list::
+ Restrict dump of raw samples to those provided via this option. Note that the same
+ option can be passed in record mode. It will be interpreted the same way as perf
+ record.
+
+SEE ALSO
+--------
+linkperf:perf-record[1], linkperf:perf-report[1]
diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt
new file mode 100644
index 00000000000..1513935c399
--- /dev/null
+++ b/tools/perf/Documentation/perf-probe.txt
@@ -0,0 +1,207 @@
+perf-probe(1)
+=============
+
+NAME
+----
+perf-probe - Define new dynamic tracepoints
+
+SYNOPSIS
+--------
+[verse]
+'perf probe' [options] --add='PROBE' [...]
+or
+'perf probe' [options] PROBE
+or
+'perf probe' [options] --del='[GROUP:]EVENT' [...]
+or
+'perf probe' --list
+or
+'perf probe' [options] --line='LINE'
+or
+'perf probe' [options] --vars='PROBEPOINT'
+
+DESCRIPTION
+-----------
+This command defines dynamic tracepoint events, by symbol and registers
+without debuginfo, or by C expressions (C line numbers, C function names,
+and C local variables) with debuginfo.
+
+
+OPTIONS
+-------
+-k::
+--vmlinux=PATH::
+ Specify vmlinux path which has debuginfo (Dwarf binary).
+
+-m::
+--module=MODNAME|PATH::
+ Specify module name in which perf-probe searches probe points
+ or lines. If a path of module file is passed, perf-probe
+ treat it as an offline module (this means you can add a probe on
+ a module which has not been loaded yet).
+
+-s::
+--source=PATH::
+ Specify path to kernel source.
+
+-v::
+--verbose::
+ Be more verbose (show parsed arguments, etc).
+
+-a::
+--add=::
+ Define a probe event (see PROBE SYNTAX for detail).
+
+-d::
+--del=::
+ Delete probe events. This accepts glob wildcards('*', '?') and character
+ classes(e.g. [a-z], [!A-Z]).
+
+-l::
+--list::
+ List up current probe events.
+
+-L::
+--line=::
+ Show source code lines which can be probed. This needs an argument
+ which specifies a range of the source code. (see LINE SYNTAX for detail)
+
+-V::
+--vars=::
+ Show available local variables at given probe point. The argument
+ syntax is same as PROBE SYNTAX, but NO ARGs.
+
+--externs::
+ (Only for --vars) Show external defined variables in addition to local
+ variables.
+
+-F::
+--funcs::
+ Show available functions in given module or kernel. With -x/--exec,
+ can also list functions in a user space executable / shared library.
+
+--filter=FILTER::
+ (Only for --vars and --funcs) Set filter. FILTER is a combination of glob
+ pattern, see FILTER PATTERN for detail.
+ Default FILTER is "!__k???tab_* & !__crc_*" for --vars, and "!_*"
+ for --funcs.
+ If several filters are specified, only the last filter is used.
+
+-f::
+--force::
+ Forcibly add events with existing name.
+
+-n::
+--dry-run::
+ Dry run. With this option, --add and --del doesn't execute actual
+ adding and removal operations.
+
+--max-probes::
+ Set the maximum number of probe points for an event. Default is 128.
+
+-x::
+--exec=PATH::
+ Specify path to the executable or shared library file for user
+ space tracing. Can also be used with --funcs option.
+
+In absence of -m/-x options, perf probe checks if the first argument after
+the options is an absolute path name. If its an absolute path, perf probe
+uses it as a target module/target user space binary to probe.
+
+PROBE SYNTAX
+------------
+Probe points are defined by following syntax.
+
+ 1) Define event based on function name
+ [EVENT=]FUNC[@SRC][:RLN|+OFFS|%return|;PTN] [ARG ...]
+
+ 2) Define event based on source file with line number
+ [EVENT=]SRC:ALN [ARG ...]
+
+ 3) Define event based on source file with lazy pattern
+ [EVENT=]SRC;PTN [ARG ...]
+
+
+'EVENT' specifies the name of new event, if omitted, it will be set the name of the probed function. Currently, event group name is set as 'probe'.
+'FUNC' specifies a probed function name, and it may have one of the following options; '+OFFS' is the offset from function entry address in bytes, ':RLN' is the relative-line number from function entry line, and '%return' means that it probes function return. And ';PTN' means lazy matching pattern (see LAZY MATCHING). Note that ';PTN' must be the end of the probe point definition. In addition, '@SRC' specifies a source file which has that function.
+It is also possible to specify a probe point by the source line number or lazy matching by using 'SRC:ALN' or 'SRC;PTN' syntax, where 'SRC' is the source file path, ':ALN' is the line number and ';PTN' is the lazy matching pattern.
+'ARG' specifies the arguments of this probe point, (see PROBE ARGUMENT).
+
+PROBE ARGUMENT
+--------------
+Each probe argument follows below syntax.
+
+ [NAME=]LOCALVAR|$retval|%REG|@SYMBOL[:TYPE]
+
+'NAME' specifies the name of this argument (optional). You can use the name of local variable, local data structure member (e.g. var->field, var.field2), local array with fixed index (e.g. array[1], var->array[0], var->pointer[2]), or kprobe-tracer argument format (e.g. $retval, %ax, etc). Note that the name of this argument will be set as the last member name if you specify a local data structure member (e.g. field2 for 'var->field1.field2'.)
+'TYPE' casts the type of this argument (optional). If omitted, perf probe automatically set the type based on debuginfo. You can specify 'string' type only for the local variable or structure member which is an array of or a pointer to 'char' or 'unsigned char' type.
+
+On x86 systems %REG is always the short form of the register: for example %AX. %RAX or %EAX is not valid.
+
+LINE SYNTAX
+-----------
+Line range is described by following syntax.
+
+ "FUNC[@SRC][:RLN[+NUM|-RLN2]]|SRC[:ALN[+NUM|-ALN2]]"
+
+FUNC specifies the function name of showing lines. 'RLN' is the start line
+number from function entry line, and 'RLN2' is the end line number. As same as
+probe syntax, 'SRC' means the source file path, 'ALN' is start line number,
+and 'ALN2' is end line number in the file. It is also possible to specify how
+many lines to show by using 'NUM'. Moreover, 'FUNC@SRC' combination is good
+for searching a specific function when several functions share same name.
+So, "source.c:100-120" shows lines between 100th to l20th in source.c file. And "func:10+20" shows 20 lines from 10th line of func function.
+
+LAZY MATCHING
+-------------
+ The lazy line matching is similar to glob matching but ignoring spaces in both of pattern and target. So this accepts wildcards('*', '?') and character classes(e.g. [a-z], [!A-Z]).
+
+e.g.
+ 'a=*' can matches 'a=b', 'a = b', 'a == b' and so on.
+
+This provides some sort of flexibility and robustness to probe point definitions against minor code changes. For example, actual 10th line of schedule() can be moved easily by modifying schedule(), but the same line matching 'rq=cpu_rq*' may still exist in the function.)
+
+FILTER PATTERN
+--------------
+ The filter pattern is a glob matching pattern(s) to filter variables.
+ In addition, you can use "!" for specifying filter-out rule. You also can give several rules combined with "&" or "|", and fold those rules as one rule by using "(" ")".
+
+e.g.
+ With --filter "foo* | bar*", perf probe -V shows variables which start with "foo" or "bar".
+ With --filter "!foo* & *bar", perf probe -V shows variables which don't start with "foo" and end with "bar", like "fizzbar". But "foobar" is filtered out.
+
+EXAMPLES
+--------
+Display which lines in schedule() can be probed:
+
+ ./perf probe --line schedule
+
+Add a probe on schedule() function 12th line with recording cpu local variable:
+
+ ./perf probe schedule:12 cpu
+ or
+ ./perf probe --add='schedule:12 cpu'
+
+ this will add one or more probes which has the name start with "schedule".
+
+ Add probes on lines in schedule() function which calls update_rq_clock().
+
+ ./perf probe 'schedule;update_rq_clock*'
+ or
+ ./perf probe --add='schedule;update_rq_clock*'
+
+Delete all probes on schedule().
+
+ ./perf probe --del='schedule*'
+
+Add probes at zfree() function on /bin/zsh
+
+ ./perf probe -x /bin/zsh zfree or ./perf probe /bin/zsh zfree
+
+Add probes at malloc() function on libc
+
+ ./perf probe -x /lib/libc.so.6 malloc or ./perf probe /lib/libc.so.6 malloc
+
+SEE ALSO
+--------
+linkperf:perf-trace[1], linkperf:perf-record[1]
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
new file mode 100644
index 00000000000..d460049cae8
--- /dev/null
+++ b/tools/perf/Documentation/perf-record.txt
@@ -0,0 +1,219 @@
+perf-record(1)
+==============
+
+NAME
+----
+perf-record - Run a command and record its profile into perf.data
+
+SYNOPSIS
+--------
+[verse]
+'perf record' [-e <EVENT> | --event=EVENT] [-l] [-a] <command>
+'perf record' [-e <EVENT> | --event=EVENT] [-l] [-a] -- <command> [<options>]
+
+DESCRIPTION
+-----------
+This command runs a command and gathers a performance counter profile
+from it, into perf.data - without displaying anything.
+
+This file can then be inspected later on, using 'perf report'.
+
+
+OPTIONS
+-------
+<command>...::
+ Any command you can specify in a shell.
+
+-e::
+--event=::
+ Select the PMU event. Selection can be:
+
+ - a symbolic event name (use 'perf list' to list all events)
+
+ - a raw PMU event (eventsel+umask) in the form of rNNN where NNN is a
+ hexadecimal event descriptor.
+
+ - a hardware breakpoint event in the form of '\mem:addr[:access]'
+ where addr is the address in memory you want to break in.
+ Access is the memory access type (read, write, execute) it can
+ be passed as follows: '\mem:addr[:[r][w][x]]'.
+ If you want to profile read-write accesses in 0x1000, just set
+ 'mem:0x1000:rw'.
+
+--filter=<filter>::
+ Event filter.
+
+-a::
+--all-cpus::
+ System-wide collection from all CPUs.
+
+-l::
+ Scale counter values.
+
+-p::
+--pid=::
+ Record events on existing process ID (comma separated list).
+
+-t::
+--tid=::
+ Record events on existing thread ID (comma separated list).
+ This option also disables inheritance by default. Enable it by adding
+ --inherit.
+
+-u::
+--uid=::
+ Record events in threads owned by uid. Name or number.
+
+-r::
+--realtime=::
+ Collect data with this RT SCHED_FIFO priority.
+
+--no-buffering::
+ Collect data without buffering.
+
+-c::
+--count=::
+ Event period to sample.
+
+-o::
+--output=::
+ Output file name.
+
+-i::
+--no-inherit::
+ Child tasks do not inherit counters.
+-F::
+--freq=::
+ Profile at this frequency.
+
+-m::
+--mmap-pages=::
+ Number of mmap data pages (must be a power of two) or size
+ specification with appended unit character - B/K/M/G. The
+ size is rounded up to have nearest pages power of two value.
+
+-g::
+ Enables call-graph (stack chain/backtrace) recording.
+
+--call-graph::
+ Setup and enable call-graph (stack chain/backtrace) recording,
+ implies -g.
+
+ Allows specifying "fp" (frame pointer) or "dwarf"
+ (DWARF's CFI - Call Frame Information) as the method to collect
+ the information used to show the call graphs.
+
+ In some systems, where binaries are build with gcc
+ --fomit-frame-pointer, using the "fp" method will produce bogus
+ call graphs, using "dwarf", if available (perf tools linked to
+ the libunwind library) should be used instead.
+
+-q::
+--quiet::
+ Don't print any message, useful for scripting.
+
+-v::
+--verbose::
+ Be more verbose (show counter open errors, etc).
+
+-s::
+--stat::
+ Per thread counts.
+
+-d::
+--data::
+ Sample addresses.
+
+-T::
+--timestamp::
+ Sample timestamps. Use it with 'perf report -D' to see the timestamps,
+ for instance.
+
+-n::
+--no-samples::
+ Don't sample.
+
+-R::
+--raw-samples::
+Collect raw sample records from all opened counters (default for tracepoint counters).
+
+-C::
+--cpu::
+Collect samples only on the list of CPUs provided. Multiple CPUs can be provided as a
+comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
+In per-thread mode with inheritance mode on (default), samples are captured only when
+the thread executes on the designated CPUs. Default is to monitor all CPUs.
+
+-N::
+--no-buildid-cache::
+Do not update the builid cache. This saves some overhead in situations
+where the information in the perf.data file (which includes buildids)
+is sufficient.
+
+-G name,...::
+--cgroup name,...::
+monitor only in the container (cgroup) called "name". This option is available only
+in per-cpu mode. The cgroup filesystem must be mounted. All threads belonging to
+container "name" are monitored when they run on the monitored CPUs. Multiple cgroups
+can be provided. Each cgroup is applied to the corresponding event, i.e., first cgroup
+to first event, second cgroup to second event and so on. It is possible to provide
+an empty cgroup (monitor all the time) using, e.g., -G foo,,bar. Cgroups must have
+corresponding events, i.e., they always refer to events defined earlier on the command
+line.
+
+-b::
+--branch-any::
+Enable taken branch stack sampling. Any type of taken branch may be sampled.
+This is a shortcut for --branch-filter any. See --branch-filter for more infos.
+
+-j::
+--branch-filter::
+Enable taken branch stack sampling. Each sample captures a series of consecutive
+taken branches. The number of branches captured with each sample depends on the
+underlying hardware, the type of branches of interest, and the executed code.
+It is possible to select the types of branches captured by enabling filters. The
+following filters are defined:
+
+ - any: any type of branches
+ - any_call: any function call or system call
+ - any_ret: any function return or system call return
+ - ind_call: any indirect branch
+ - u: only when the branch target is at the user level
+ - k: only when the branch target is in the kernel
+ - hv: only when the target is at the hypervisor level
+ - in_tx: only when the target is in a hardware transaction
+ - no_tx: only when the target is not in a hardware transaction
+ - abort_tx: only when the target is a hardware transaction abort
+ - cond: conditional branches
+
++
+The option requires at least one branch type among any, any_call, any_ret, ind_call, cond.
+The privilege levels may be omitted, in which case, the privilege levels of the associated
+event are applied to the branch filter. Both kernel (k) and hypervisor (hv) privilege
+levels are subject to permissions. When sampling on multiple events, branch stack sampling
+is enabled for all the sampling events. The sampled branch type is the same for all events.
+The various filters must be specified as a comma separated list: --branch-filter any_ret,u,k
+Note that this feature may not be available on all processors.
+
+--weight::
+Enable weightened sampling. An additional weight is recorded per sample and can be
+displayed with the weight and local_weight sort keys. This currently works for TSX
+abort events and some memory events in precise mode on modern Intel CPUs.
+
+--transaction::
+Record transaction flags for transaction related events.
+
+--per-thread::
+Use per-thread mmaps. By default per-cpu mmaps are created. This option
+overrides that and uses per-thread mmaps. A side-effect of that is that
+inheritance is automatically disabled. --per-thread is ignored with a warning
+if combined with -a or -C options.
+
+-D::
+--delay=::
+After starting the program, wait msecs before measuring. This is useful to
+filter out the startup phase of the program, which is often very different.
+
+SEE ALSO
+--------
+linkperf:perf-stat[1], linkperf:perf-list[1]
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
new file mode 100644
index 00000000000..d2b59af62bc
--- /dev/null
+++ b/tools/perf/Documentation/perf-report.txt
@@ -0,0 +1,310 @@
+perf-report(1)
+==============
+
+NAME
+----
+perf-report - Read perf.data (created by perf record) and display the profile
+
+SYNOPSIS
+--------
+[verse]
+'perf report' [-i <file> | --input=file]
+
+DESCRIPTION
+-----------
+This command displays the performance counter profile information recorded
+via perf record.
+
+OPTIONS
+-------
+-i::
+--input=::
+ Input file name. (default: perf.data unless stdin is a fifo)
+
+-v::
+--verbose::
+ Be more verbose. (show symbol address, etc)
+
+-n::
+--show-nr-samples::
+ Show the number of samples for each symbol
+
+--showcpuutilization::
+ Show sample percentage for different cpu modes.
+
+-T::
+--threads::
+ Show per-thread event counters
+-c::
+--comms=::
+ Only consider symbols in these comms. CSV that understands
+ file://filename entries. This option will affect the percentage of
+ the overhead column. See --percentage for more info.
+-d::
+--dsos=::
+ Only consider symbols in these dsos. CSV that understands
+ file://filename entries. This option will affect the percentage of
+ the overhead column. See --percentage for more info.
+-S::
+--symbols=::
+ Only consider these symbols. CSV that understands
+ file://filename entries. This option will affect the percentage of
+ the overhead column. See --percentage for more info.
+
+--symbol-filter=::
+ Only show symbols that match (partially) with this filter.
+
+-U::
+--hide-unresolved::
+ Only display entries resolved to a symbol.
+
+-s::
+--sort=::
+ Sort histogram entries by given key(s) - multiple keys can be specified
+ in CSV format. Following sort keys are available:
+ pid, comm, dso, symbol, parent, cpu, srcline, weight, local_weight.
+
+ Each key has following meaning:
+
+ - comm: command (name) of the task which can be read via /proc/<pid>/comm
+ - pid: command and tid of the task
+ - dso: name of library or module executed at the time of sample
+ - symbol: name of function executed at the time of sample
+ - parent: name of function matched to the parent regex filter. Unmatched
+ entries are displayed as "[other]".
+ - cpu: cpu number the task ran at the time of sample
+ - srcline: filename and line number executed at the time of sample. The
+ DWARF debugging info must be provided.
+ - weight: Event specific weight, e.g. memory latency or transaction
+ abort cost. This is the global weight.
+ - local_weight: Local weight version of the weight above.
+ - transaction: Transaction abort flags.
+ - overhead: Overhead percentage of sample
+ - overhead_sys: Overhead percentage of sample running in system mode
+ - overhead_us: Overhead percentage of sample running in user mode
+ - overhead_guest_sys: Overhead percentage of sample running in system mode
+ on guest machine
+ - overhead_guest_us: Overhead percentage of sample running in user mode on
+ guest machine
+ - sample: Number of sample
+ - period: Raw number of event count of sample
+
+ By default, comm, dso and symbol keys are used.
+ (i.e. --sort comm,dso,symbol)
+
+ If --branch-stack option is used, following sort keys are also
+ available:
+ dso_from, dso_to, symbol_from, symbol_to, mispredict.
+
+ - dso_from: name of library or module branched from
+ - dso_to: name of library or module branched to
+ - symbol_from: name of function branched from
+ - symbol_to: name of function branched to
+ - mispredict: "N" for predicted branch, "Y" for mispredicted branch
+ - in_tx: branch in TSX transaction
+ - abort: TSX transaction abort.
+
+ And default sort keys are changed to comm, dso_from, symbol_from, dso_to
+ and symbol_to, see '--branch-stack'.
+
+-F::
+--fields=::
+ Specify output field - multiple keys can be specified in CSV format.
+ Following fields are available:
+ overhead, overhead_sys, overhead_us, overhead_children, sample and period.
+ Also it can contain any sort key(s).
+
+ By default, every sort keys not specified in -F will be appended
+ automatically.
+
+ If --mem-mode option is used, following sort keys are also available
+ (incompatible with --branch-stack):
+ symbol_daddr, dso_daddr, locked, tlb, mem, snoop, dcacheline.
+
+ - symbol_daddr: name of data symbol being executed on at the time of sample
+ - dso_daddr: name of library or module containing the data being executed
+ on at the time of sample
+ - locked: whether the bus was locked at the time of sample
+ - tlb: type of tlb access for the data at the time of sample
+ - mem: type of memory access for the data at the time of sample
+ - snoop: type of snoop (if any) for the data at the time of sample
+ - dcacheline: the cacheline the data address is on at the time of sample
+
+ And default sort keys are changed to local_weight, mem, sym, dso,
+ symbol_daddr, dso_daddr, snoop, tlb, locked, see '--mem-mode'.
+
+-p::
+--parent=<regex>::
+ A regex filter to identify parent. The parent is a caller of this
+ function and searched through the callchain, thus it requires callchain
+ information recorded. The pattern is in the exteneded regex format and
+ defaults to "\^sys_|^do_page_fault", see '--sort parent'.
+
+-x::
+--exclude-other::
+ Only display entries with parent-match.
+
+-w::
+--column-widths=<width[,width...]>::
+ Force each column width to the provided list, for large terminal
+ readability.
+
+-t::
+--field-separator=::
+ Use a special separator character and don't pad with spaces, replacing
+ all occurrences of this separator in symbol names (and other output)
+ with a '.' character, that thus it's the only non valid separator.
+
+-D::
+--dump-raw-trace::
+ Dump raw trace in ASCII.
+
+-g [type,min[,limit],order[,key]]::
+--call-graph::
+ Display call chains using type, min percent threshold, optional print
+ limit and order.
+ type can be either:
+ - flat: single column, linear exposure of call chains.
+ - graph: use a graph tree, displaying absolute overhead rates.
+ - fractal: like graph, but displays relative rates. Each branch of
+ the tree is considered as a new profiled object. +
+
+ order can be either:
+ - callee: callee based call graph.
+ - caller: inverted caller based call graph.
+
+ key can be:
+ - function: compare on functions
+ - address: compare on individual code addresses
+
+ Default: fractal,0.5,callee,function.
+
+--children::
+ Accumulate callchain of children to parent entry so that then can
+ show up in the output. The output will have a new "Children" column
+ and will be sorted on the data. It requires callchains are recorded.
+
+--max-stack::
+ Set the stack depth limit when parsing the callchain, anything
+ beyond the specified depth will be ignored. This is a trade-off
+ between information loss and faster processing especially for
+ workloads that can have a very long callchain stack.
+
+ Default: 127
+
+-G::
+--inverted::
+ alias for inverted caller based call graph.
+
+--ignore-callees=<regex>::
+ Ignore callees of the function(s) matching the given regex.
+ This has the effect of collecting the callers of each such
+ function into one place in the call-graph tree.
+
+--pretty=<key>::
+ Pretty printing style. key: normal, raw
+
+--stdio:: Use the stdio interface.
+
+--tui:: Use the TUI interface, that is integrated with annotate and allows
+ zooming into DSOs or threads, among other features. Use of --tui
+ requires a tty, if one is not present, as when piping to other
+ commands, the stdio interface is used.
+
+--gtk:: Use the GTK2 interface.
+
+-k::
+--vmlinux=<file>::
+ vmlinux pathname
+
+--kallsyms=<file>::
+ kallsyms pathname
+
+-m::
+--modules::
+ Load module symbols. WARNING: This should only be used with -k and
+ a LIVE kernel.
+
+-f::
+--force::
+ Don't complain, do it.
+
+--symfs=<directory>::
+ Look for files with symbols relative to this directory.
+
+-C::
+--cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can
+ be provided as a comma-separated list with no space: 0,1. Ranges of
+ CPUs are specified with -: 0-2. Default is to report samples on all
+ CPUs.
+
+-M::
+--disassembler-style=:: Set disassembler style for objdump.
+
+--source::
+ Interleave source code with assembly code. Enabled by default,
+ disable with --no-source.
+
+--asm-raw::
+ Show raw instruction encoding of assembly instructions.
+
+--show-total-period:: Show a column with the sum of periods.
+
+-I::
+--show-info::
+ Display extended information about the perf.data file. This adds
+ information which may be very large and thus may clutter the display.
+ It currently includes: cpu and numa topology of the host system.
+
+-b::
+--branch-stack::
+ Use the addresses of sampled taken branches instead of the instruction
+ address to build the histograms. To generate meaningful output, the
+ perf.data file must have been obtained using perf record -b or
+ perf record --branch-filter xxx where xxx is a branch filter option.
+ perf report is able to auto-detect whether a perf.data file contains
+ branch stacks and it will automatically switch to the branch view mode,
+ unless --no-branch-stack is used.
+
+--objdump=<path>::
+ Path to objdump binary.
+
+--group::
+ Show event group information together.
+
+--demangle::
+ Demangle symbol names to human readable form. It's enabled by default,
+ disable with --no-demangle.
+
+--mem-mode::
+ Use the data addresses of samples in addition to instruction addresses
+ to build the histograms. To generate meaningful output, the perf.data
+ file must have been obtained using perf record -d -W and using a
+ special event -e cpu/mem-loads/ or -e cpu/mem-stores/. See
+ 'perf mem' for simpler access.
+
+--percent-limit::
+ Do not show entries which have an overhead under that percent.
+ (Default: 0).
+
+--percentage::
+ Determine how to display the overhead percentage of filtered entries.
+ Filters can be applied by --comms, --dsos and/or --symbols options and
+ Zoom operations on the TUI (thread, dso, etc).
+
+ "relative" means it's relative to filtered entries only so that the
+ sum of shown entries will be always 100%. "absolute" means it retains
+ the original value before and after the filter is applied.
+
+--header::
+ Show header information in the perf.data file. This includes
+ various information like hostname, OS and perf version, cpu/mem
+ info, perf command line, event list and so on. Currently only
+ --stdio output supports this feature.
+
+--header-only::
+ Show only perf.data header (forces --stdio).
+
+SEE ALSO
+--------
+linkperf:perf-stat[1], linkperf:perf-annotate[1]
diff --git a/tools/perf/Documentation/perf-sched.txt b/tools/perf/Documentation/perf-sched.txt
new file mode 100644
index 00000000000..8ff4df95695
--- /dev/null
+++ b/tools/perf/Documentation/perf-sched.txt
@@ -0,0 +1,55 @@
+perf-sched(1)
+==============
+
+NAME
+----
+perf-sched - Tool to trace/measure scheduler properties (latencies)
+
+SYNOPSIS
+--------
+[verse]
+'perf sched' {record|latency|map|replay|script}
+
+DESCRIPTION
+-----------
+There are five variants of perf sched:
+
+ 'perf sched record <command>' to record the scheduling events
+ of an arbitrary workload.
+
+ 'perf sched latency' to report the per task scheduling latencies
+ and other scheduling properties of the workload.
+
+ 'perf sched script' to see a detailed trace of the workload that
+ was recorded (aliased to 'perf script' for now).
+
+ 'perf sched replay' to simulate the workload that was recorded
+ via perf sched record. (this is done by starting up mockup threads
+ that mimic the workload based on the events in the trace. These
+ threads can then replay the timings (CPU runtime and sleep patterns)
+ of the workload as it occurred when it was recorded - and can repeat
+ it a number of times, measuring its performance.)
+
+ 'perf sched map' to print a textual context-switching outline of
+ workload captured via perf sched record. Columns stand for
+ individual CPUs, and the two-letter shortcuts stand for tasks that
+ are running on a CPU. A '*' denotes the CPU that had the event, and
+ a dot signals an idle CPU.
+
+OPTIONS
+-------
+-i::
+--input=<file>::
+ Input file name. (default: perf.data unless stdin is a fifo)
+
+-v::
+--verbose::
+ Be more verbose. (show symbol address, etc)
+
+-D::
+--dump-raw-trace=::
+ Display verbose dump of the sched data.
+
+SEE ALSO
+--------
+linkperf:perf-record[1]
diff --git a/tools/perf/Documentation/perf-script-perl.txt b/tools/perf/Documentation/perf-script-perl.txt
new file mode 100644
index 00000000000..d00bef23134
--- /dev/null
+++ b/tools/perf/Documentation/perf-script-perl.txt
@@ -0,0 +1,216 @@
+perf-script-perl(1)
+==================
+
+NAME
+----
+perf-script-perl - Process trace data with a Perl script
+
+SYNOPSIS
+--------
+[verse]
+'perf script' [-s [Perl]:script[.pl] ]
+
+DESCRIPTION
+-----------
+
+This perf script option is used to process perf script data using perf's
+built-in Perl interpreter. It reads and processes the input file and
+displays the results of the trace analysis implemented in the given
+Perl script, if any.
+
+STARTER SCRIPTS
+---------------
+
+You can avoid reading the rest of this document by running 'perf script
+-g perl' in the same directory as an existing perf.data trace file.
+That will generate a starter script containing a handler for each of
+the event types in the trace file; it simply prints every available
+field for each event in the trace file.
+
+You can also look at the existing scripts in
+~/libexec/perf-core/scripts/perl for typical examples showing how to
+do basic things like aggregate event data, print results, etc. Also,
+the check-perf-script.pl script, while not interesting for its results,
+attempts to exercise all of the main scripting features.
+
+EVENT HANDLERS
+--------------
+
+When perf script is invoked using a trace script, a user-defined
+'handler function' is called for each event in the trace. If there's
+no handler function defined for a given event type, the event is
+ignored (or passed to a 'trace_handled' function, see below) and the
+next event is processed.
+
+Most of the event's field values are passed as arguments to the
+handler function; some of the less common ones aren't - those are
+available as calls back into the perf executable (see below).
+
+As an example, the following perf record command can be used to record
+all sched_wakeup events in the system:
+
+ # perf record -a -e sched:sched_wakeup
+
+Traces meant to be processed using a script should be recorded with
+the above option: -a to enable system-wide collection.
+
+The format file for the sched_wakep event defines the following fields
+(see /sys/kernel/debug/tracing/events/sched/sched_wakeup/format):
+
+----
+ format:
+ field:unsigned short common_type;
+ field:unsigned char common_flags;
+ field:unsigned char common_preempt_count;
+ field:int common_pid;
+
+ field:char comm[TASK_COMM_LEN];
+ field:pid_t pid;
+ field:int prio;
+ field:int success;
+ field:int target_cpu;
+----
+
+The handler function for this event would be defined as:
+
+----
+sub sched::sched_wakeup
+{
+ my ($event_name, $context, $common_cpu, $common_secs,
+ $common_nsecs, $common_pid, $common_comm,
+ $comm, $pid, $prio, $success, $target_cpu) = @_;
+}
+----
+
+The handler function takes the form subsystem::event_name.
+
+The $common_* arguments in the handler's argument list are the set of
+arguments passed to all event handlers; some of the fields correspond
+to the common_* fields in the format file, but some are synthesized,
+and some of the common_* fields aren't common enough to to be passed
+to every event as arguments but are available as library functions.
+
+Here's a brief description of each of the invariant event args:
+
+ $event_name the name of the event as text
+ $context an opaque 'cookie' used in calls back into perf
+ $common_cpu the cpu the event occurred on
+ $common_secs the secs portion of the event timestamp
+ $common_nsecs the nsecs portion of the event timestamp
+ $common_pid the pid of the current task
+ $common_comm the name of the current process
+
+All of the remaining fields in the event's format file have
+counterparts as handler function arguments of the same name, as can be
+seen in the example above.
+
+The above provides the basics needed to directly access every field of
+every event in a trace, which covers 90% of what you need to know to
+write a useful trace script. The sections below cover the rest.
+
+SCRIPT LAYOUT
+-------------
+
+Every perf script Perl script should start by setting up a Perl module
+search path and 'use'ing a few support modules (see module
+descriptions below):
+
+----
+ use lib "$ENV{'PERF_EXEC_PATH'}/scripts/perl/Perf-Trace-Util/lib";
+ use lib "./Perf-Trace-Util/lib";
+ use Perf::Trace::Core;
+ use Perf::Trace::Context;
+ use Perf::Trace::Util;
+----
+
+The rest of the script can contain handler functions and support
+functions in any order.
+
+Aside from the event handler functions discussed above, every script
+can implement a set of optional functions:
+
+*trace_begin*, if defined, is called before any event is processed and
+gives scripts a chance to do setup tasks:
+
+----
+ sub trace_begin
+ {
+ }
+----
+
+*trace_end*, if defined, is called after all events have been
+ processed and gives scripts a chance to do end-of-script tasks, such
+ as display results:
+
+----
+sub trace_end
+{
+}
+----
+
+*trace_unhandled*, if defined, is called after for any event that
+ doesn't have a handler explicitly defined for it. The standard set
+ of common arguments are passed into it:
+
+----
+sub trace_unhandled
+{
+ my ($event_name, $context, $common_cpu, $common_secs,
+ $common_nsecs, $common_pid, $common_comm) = @_;
+}
+----
+
+The remaining sections provide descriptions of each of the available
+built-in perf script Perl modules and their associated functions.
+
+AVAILABLE MODULES AND FUNCTIONS
+-------------------------------
+
+The following sections describe the functions and variables available
+via the various Perf::Trace::* Perl modules. To use the functions and
+variables from the given module, add the corresponding 'use
+Perf::Trace::XXX' line to your perf script script.
+
+Perf::Trace::Core Module
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+These functions provide some essential functions to user scripts.
+
+The *flag_str* and *symbol_str* functions provide human-readable
+strings for flag and symbolic fields. These correspond to the strings
+and values parsed from the 'print fmt' fields of the event format
+files:
+
+ flag_str($event_name, $field_name, $field_value) - returns the string represention corresponding to $field_value for the flag field $field_name of event $event_name
+ symbol_str($event_name, $field_name, $field_value) - returns the string represention corresponding to $field_value for the symbolic field $field_name of event $event_name
+
+Perf::Trace::Context Module
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Some of the 'common' fields in the event format file aren't all that
+common, but need to be made accessible to user scripts nonetheless.
+
+Perf::Trace::Context defines a set of functions that can be used to
+access this data in the context of the current event. Each of these
+functions expects a $context variable, which is the same as the
+$context variable passed into every event handler as the second
+argument.
+
+ common_pc($context) - returns common_preempt count for the current event
+ common_flags($context) - returns common_flags for the current event
+ common_lock_depth($context) - returns common_lock_depth for the current event
+
+Perf::Trace::Util Module
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+Various utility functions for use with perf script:
+
+ nsecs($secs, $nsecs) - returns total nsecs given secs/nsecs pair
+ nsecs_secs($nsecs) - returns whole secs portion given nsecs
+ nsecs_nsecs($nsecs) - returns nsecs remainder given nsecs
+ nsecs_str($nsecs) - returns printable string in the form secs.nsecs
+ avg($total, $n) - returns average given a sum and a total number of values
+
+SEE ALSO
+--------
+linkperf:perf-script[1]
diff --git a/tools/perf/Documentation/perf-script-python.txt b/tools/perf/Documentation/perf-script-python.txt
new file mode 100644
index 00000000000..9f1f054b843
--- /dev/null
+++ b/tools/perf/Documentation/perf-script-python.txt
@@ -0,0 +1,620 @@
+perf-script-python(1)
+====================
+
+NAME
+----
+perf-script-python - Process trace data with a Python script
+
+SYNOPSIS
+--------
+[verse]
+'perf script' [-s [Python]:script[.py] ]
+
+DESCRIPTION
+-----------
+
+This perf script option is used to process perf script data using perf's
+built-in Python interpreter. It reads and processes the input file and
+displays the results of the trace analysis implemented in the given
+Python script, if any.
+
+A QUICK EXAMPLE
+---------------
+
+This section shows the process, start to finish, of creating a working
+Python script that aggregates and extracts useful information from a
+raw perf script stream. You can avoid reading the rest of this
+document if an example is enough for you; the rest of the document
+provides more details on each step and lists the library functions
+available to script writers.
+
+This example actually details the steps that were used to create the
+'syscall-counts' script you see when you list the available perf script
+scripts via 'perf script -l'. As such, this script also shows how to
+integrate your script into the list of general-purpose 'perf script'
+scripts listed by that command.
+
+The syscall-counts script is a simple script, but demonstrates all the
+basic ideas necessary to create a useful script. Here's an example
+of its output (syscall names are not yet supported, they will appear
+as numbers):
+
+----
+syscall events:
+
+event count
+---------------------------------------- -----------
+sys_write 455067
+sys_getdents 4072
+sys_close 3037
+sys_swapoff 1769
+sys_read 923
+sys_sched_setparam 826
+sys_open 331
+sys_newfstat 326
+sys_mmap 217
+sys_munmap 216
+sys_futex 141
+sys_select 102
+sys_poll 84
+sys_setitimer 12
+sys_writev 8
+15 8
+sys_lseek 7
+sys_rt_sigprocmask 6
+sys_wait4 3
+sys_ioctl 3
+sys_set_robust_list 1
+sys_exit 1
+56 1
+sys_access 1
+----
+
+Basically our task is to keep a per-syscall tally that gets updated
+every time a system call occurs in the system. Our script will do
+that, but first we need to record the data that will be processed by
+that script. Theoretically, there are a couple of ways we could do
+that:
+
+- we could enable every event under the tracing/events/syscalls
+ directory, but this is over 600 syscalls, well beyond the number
+ allowable by perf. These individual syscall events will however be
+ useful if we want to later use the guidance we get from the
+ general-purpose scripts to drill down and get more detail about
+ individual syscalls of interest.
+
+- we can enable the sys_enter and/or sys_exit syscalls found under
+ tracing/events/raw_syscalls. These are called for all syscalls; the
+ 'id' field can be used to distinguish between individual syscall
+ numbers.
+
+For this script, we only need to know that a syscall was entered; we
+don't care how it exited, so we'll use 'perf record' to record only
+the sys_enter events:
+
+----
+# perf record -a -e raw_syscalls:sys_enter
+
+^C[ perf record: Woken up 1 times to write data ]
+[ perf record: Captured and wrote 56.545 MB perf.data (~2470503 samples) ]
+----
+
+The options basically say to collect data for every syscall event
+system-wide and multiplex the per-cpu output into a single stream.
+That single stream will be recorded in a file in the current directory
+called perf.data.
+
+Once we have a perf.data file containing our data, we can use the -g
+'perf script' option to generate a Python script that will contain a
+callback handler for each event type found in the perf.data trace
+stream (for more details, see the STARTER SCRIPTS section).
+
+----
+# perf script -g python
+generated Python script: perf-script.py
+
+The output file created also in the current directory is named
+perf-script.py. Here's the file in its entirety:
+
+# perf script event handlers, generated by perf script -g python
+# Licensed under the terms of the GNU GPL License version 2
+
+# The common_* event handler fields are the most useful fields common to
+# all events. They don't necessarily correspond to the 'common_*' fields
+# in the format files. Those fields not available as handler params can
+# be retrieved using Python functions of the form common_*(context).
+# See the perf-script-python Documentation for the list of available functions.
+
+import os
+import sys
+
+sys.path.append(os.environ['PERF_EXEC_PATH'] + \
+ '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+
+from perf_trace_context import *
+from Core import *
+
+def trace_begin():
+ print "in trace_begin"
+
+def trace_end():
+ print "in trace_end"
+
+def raw_syscalls__sys_enter(event_name, context, common_cpu,
+ common_secs, common_nsecs, common_pid, common_comm,
+ id, args):
+ print_header(event_name, common_cpu, common_secs, common_nsecs,
+ common_pid, common_comm)
+
+ print "id=%d, args=%s\n" % \
+ (id, args),
+
+def trace_unhandled(event_name, context, common_cpu, common_secs, common_nsecs,
+ common_pid, common_comm):
+ print_header(event_name, common_cpu, common_secs, common_nsecs,
+ common_pid, common_comm)
+
+def print_header(event_name, cpu, secs, nsecs, pid, comm):
+ print "%-20s %5u %05u.%09u %8u %-20s " % \
+ (event_name, cpu, secs, nsecs, pid, comm),
+----
+
+At the top is a comment block followed by some import statements and a
+path append which every perf script script should include.
+
+Following that are a couple generated functions, trace_begin() and
+trace_end(), which are called at the beginning and the end of the
+script respectively (for more details, see the SCRIPT_LAYOUT section
+below).
+
+Following those are the 'event handler' functions generated one for
+every event in the 'perf record' output. The handler functions take
+the form subsystem__event_name, and contain named parameters, one for
+each field in the event; in this case, there's only one event,
+raw_syscalls__sys_enter(). (see the EVENT HANDLERS section below for
+more info on event handlers).
+
+The final couple of functions are, like the begin and end functions,
+generated for every script. The first, trace_unhandled(), is called
+every time the script finds an event in the perf.data file that
+doesn't correspond to any event handler in the script. This could
+mean either that the record step recorded event types that it wasn't
+really interested in, or the script was run against a trace file that
+doesn't correspond to the script.
+
+The script generated by -g option simply prints a line for each
+event found in the trace stream i.e. it basically just dumps the event
+and its parameter values to stdout. The print_header() function is
+simply a utility function used for that purpose. Let's rename the
+script and run it to see the default output:
+
+----
+# mv perf-script.py syscall-counts.py
+# perf script -s syscall-counts.py
+
+raw_syscalls__sys_enter 1 00840.847582083 7506 perf id=1, args=
+raw_syscalls__sys_enter 1 00840.847595764 7506 perf id=1, args=
+raw_syscalls__sys_enter 1 00840.847620860 7506 perf id=1, args=
+raw_syscalls__sys_enter 1 00840.847710478 6533 npviewer.bin id=78, args=
+raw_syscalls__sys_enter 1 00840.847719204 6533 npviewer.bin id=142, args=
+raw_syscalls__sys_enter 1 00840.847755445 6533 npviewer.bin id=3, args=
+raw_syscalls__sys_enter 1 00840.847775601 6533 npviewer.bin id=3, args=
+raw_syscalls__sys_enter 1 00840.847781820 6533 npviewer.bin id=3, args=
+.
+.
+.
+----
+
+Of course, for this script, we're not interested in printing every
+trace event, but rather aggregating it in a useful way. So we'll get
+rid of everything to do with printing as well as the trace_begin() and
+trace_unhandled() functions, which we won't be using. That leaves us
+with this minimalistic skeleton:
+
+----
+import os
+import sys
+
+sys.path.append(os.environ['PERF_EXEC_PATH'] + \
+ '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+
+from perf_trace_context import *
+from Core import *
+
+def trace_end():
+ print "in trace_end"
+
+def raw_syscalls__sys_enter(event_name, context, common_cpu,
+ common_secs, common_nsecs, common_pid, common_comm,
+ id, args):
+----
+
+In trace_end(), we'll simply print the results, but first we need to
+generate some results to print. To do that we need to have our
+sys_enter() handler do the necessary tallying until all events have
+been counted. A hash table indexed by syscall id is a good way to
+store that information; every time the sys_enter() handler is called,
+we simply increment a count associated with that hash entry indexed by
+that syscall id:
+
+----
+ syscalls = autodict()
+
+ try:
+ syscalls[id] += 1
+ except TypeError:
+ syscalls[id] = 1
+----
+
+The syscalls 'autodict' object is a special kind of Python dictionary
+(implemented in Core.py) that implements Perl's 'autovivifying' hashes
+in Python i.e. with autovivifying hashes, you can assign nested hash
+values without having to go to the trouble of creating intermediate
+levels if they don't exist e.g syscalls[comm][pid][id] = 1 will create
+the intermediate hash levels and finally assign the value 1 to the
+hash entry for 'id' (because the value being assigned isn't a hash
+object itself, the initial value is assigned in the TypeError
+exception. Well, there may be a better way to do this in Python but
+that's what works for now).
+
+Putting that code into the raw_syscalls__sys_enter() handler, we
+effectively end up with a single-level dictionary keyed on syscall id
+and having the counts we've tallied as values.
+
+The print_syscall_totals() function iterates over the entries in the
+dictionary and displays a line for each entry containing the syscall
+name (the dictonary keys contain the syscall ids, which are passed to
+the Util function syscall_name(), which translates the raw syscall
+numbers to the corresponding syscall name strings). The output is
+displayed after all the events in the trace have been processed, by
+calling the print_syscall_totals() function from the trace_end()
+handler called at the end of script processing.
+
+The final script producing the output shown above is shown in its
+entirety below (syscall_name() helper is not yet available, you can
+only deal with id's for now):
+
+----
+import os
+import sys
+
+sys.path.append(os.environ['PERF_EXEC_PATH'] + \
+ '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+
+from perf_trace_context import *
+from Core import *
+from Util import *
+
+syscalls = autodict()
+
+def trace_end():
+ print_syscall_totals()
+
+def raw_syscalls__sys_enter(event_name, context, common_cpu,
+ common_secs, common_nsecs, common_pid, common_comm,
+ id, args):
+ try:
+ syscalls[id] += 1
+ except TypeError:
+ syscalls[id] = 1
+
+def print_syscall_totals():
+ if for_comm is not None:
+ print "\nsyscall events for %s:\n\n" % (for_comm),
+ else:
+ print "\nsyscall events:\n\n",
+
+ print "%-40s %10s\n" % ("event", "count"),
+ print "%-40s %10s\n" % ("----------------------------------------", \
+ "-----------"),
+
+ for id, val in sorted(syscalls.iteritems(), key = lambda(k, v): (v, k), \
+ reverse = True):
+ print "%-40s %10d\n" % (syscall_name(id), val),
+----
+
+The script can be run just as before:
+
+ # perf script -s syscall-counts.py
+
+So those are the essential steps in writing and running a script. The
+process can be generalized to any tracepoint or set of tracepoints
+you're interested in - basically find the tracepoint(s) you're
+interested in by looking at the list of available events shown by
+'perf list' and/or look in /sys/kernel/debug/tracing events for
+detailed event and field info, record the corresponding trace data
+using 'perf record', passing it the list of interesting events,
+generate a skeleton script using 'perf script -g python' and modify the
+code to aggregate and display it for your particular needs.
+
+After you've done that you may end up with a general-purpose script
+that you want to keep around and have available for future use. By
+writing a couple of very simple shell scripts and putting them in the
+right place, you can have your script listed alongside the other
+scripts listed by the 'perf script -l' command e.g.:
+
+----
+root@tropicana:~# perf script -l
+List of available trace scripts:
+ wakeup-latency system-wide min/max/avg wakeup latency
+ rw-by-file <comm> r/w activity for a program, by file
+ rw-by-pid system-wide r/w activity
+----
+
+A nice side effect of doing this is that you also then capture the
+probably lengthy 'perf record' command needed to record the events for
+the script.
+
+To have the script appear as a 'built-in' script, you write two simple
+scripts, one for recording and one for 'reporting'.
+
+The 'record' script is a shell script with the same base name as your
+script, but with -record appended. The shell script should be put
+into the perf/scripts/python/bin directory in the kernel source tree.
+In that script, you write the 'perf record' command-line needed for
+your script:
+
+----
+# cat kernel-source/tools/perf/scripts/python/bin/syscall-counts-record
+
+#!/bin/bash
+perf record -a -e raw_syscalls:sys_enter
+----
+
+The 'report' script is also a shell script with the same base name as
+your script, but with -report appended. It should also be located in
+the perf/scripts/python/bin directory. In that script, you write the
+'perf script -s' command-line needed for running your script:
+
+----
+# cat kernel-source/tools/perf/scripts/python/bin/syscall-counts-report
+
+#!/bin/bash
+# description: system-wide syscall counts
+perf script -s ~/libexec/perf-core/scripts/python/syscall-counts.py
+----
+
+Note that the location of the Python script given in the shell script
+is in the libexec/perf-core/scripts/python directory - this is where
+the script will be copied by 'make install' when you install perf.
+For the installation to install your script there, your script needs
+to be located in the perf/scripts/python directory in the kernel
+source tree:
+
+----
+# ls -al kernel-source/tools/perf/scripts/python
+
+root@tropicana:/home/trz/src/tip# ls -al tools/perf/scripts/python
+total 32
+drwxr-xr-x 4 trz trz 4096 2010-01-26 22:30 .
+drwxr-xr-x 4 trz trz 4096 2010-01-26 22:29 ..
+drwxr-xr-x 2 trz trz 4096 2010-01-26 22:29 bin
+-rw-r--r-- 1 trz trz 2548 2010-01-26 22:29 check-perf-script.py
+drwxr-xr-x 3 trz trz 4096 2010-01-26 22:49 Perf-Trace-Util
+-rw-r--r-- 1 trz trz 1462 2010-01-26 22:30 syscall-counts.py
+----
+
+Once you've done that (don't forget to do a new 'make install',
+otherwise your script won't show up at run-time), 'perf script -l'
+should show a new entry for your script:
+
+----
+root@tropicana:~# perf script -l
+List of available trace scripts:
+ wakeup-latency system-wide min/max/avg wakeup latency
+ rw-by-file <comm> r/w activity for a program, by file
+ rw-by-pid system-wide r/w activity
+ syscall-counts system-wide syscall counts
+----
+
+You can now perform the record step via 'perf script record':
+
+ # perf script record syscall-counts
+
+and display the output using 'perf script report':
+
+ # perf script report syscall-counts
+
+STARTER SCRIPTS
+---------------
+
+You can quickly get started writing a script for a particular set of
+trace data by generating a skeleton script using 'perf script -g
+python' in the same directory as an existing perf.data trace file.
+That will generate a starter script containing a handler for each of
+the event types in the trace file; it simply prints every available
+field for each event in the trace file.
+
+You can also look at the existing scripts in
+~/libexec/perf-core/scripts/python for typical examples showing how to
+do basic things like aggregate event data, print results, etc. Also,
+the check-perf-script.py script, while not interesting for its results,
+attempts to exercise all of the main scripting features.
+
+EVENT HANDLERS
+--------------
+
+When perf script is invoked using a trace script, a user-defined
+'handler function' is called for each event in the trace. If there's
+no handler function defined for a given event type, the event is
+ignored (or passed to a 'trace_handled' function, see below) and the
+next event is processed.
+
+Most of the event's field values are passed as arguments to the
+handler function; some of the less common ones aren't - those are
+available as calls back into the perf executable (see below).
+
+As an example, the following perf record command can be used to record
+all sched_wakeup events in the system:
+
+ # perf record -a -e sched:sched_wakeup
+
+Traces meant to be processed using a script should be recorded with
+the above option: -a to enable system-wide collection.
+
+The format file for the sched_wakep event defines the following fields
+(see /sys/kernel/debug/tracing/events/sched/sched_wakeup/format):
+
+----
+ format:
+ field:unsigned short common_type;
+ field:unsigned char common_flags;
+ field:unsigned char common_preempt_count;
+ field:int common_pid;
+
+ field:char comm[TASK_COMM_LEN];
+ field:pid_t pid;
+ field:int prio;
+ field:int success;
+ field:int target_cpu;
+----
+
+The handler function for this event would be defined as:
+
+----
+def sched__sched_wakeup(event_name, context, common_cpu, common_secs,
+ common_nsecs, common_pid, common_comm,
+ comm, pid, prio, success, target_cpu):
+ pass
+----
+
+The handler function takes the form subsystem__event_name.
+
+The common_* arguments in the handler's argument list are the set of
+arguments passed to all event handlers; some of the fields correspond
+to the common_* fields in the format file, but some are synthesized,
+and some of the common_* fields aren't common enough to to be passed
+to every event as arguments but are available as library functions.
+
+Here's a brief description of each of the invariant event args:
+
+ event_name the name of the event as text
+ context an opaque 'cookie' used in calls back into perf
+ common_cpu the cpu the event occurred on
+ common_secs the secs portion of the event timestamp
+ common_nsecs the nsecs portion of the event timestamp
+ common_pid the pid of the current task
+ common_comm the name of the current process
+
+All of the remaining fields in the event's format file have
+counterparts as handler function arguments of the same name, as can be
+seen in the example above.
+
+The above provides the basics needed to directly access every field of
+every event in a trace, which covers 90% of what you need to know to
+write a useful trace script. The sections below cover the rest.
+
+SCRIPT LAYOUT
+-------------
+
+Every perf script Python script should start by setting up a Python
+module search path and 'import'ing a few support modules (see module
+descriptions below):
+
+----
+ import os
+ import sys
+
+ sys.path.append(os.environ['PERF_EXEC_PATH'] + \
+ '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+
+ from perf_trace_context import *
+ from Core import *
+----
+
+The rest of the script can contain handler functions and support
+functions in any order.
+
+Aside from the event handler functions discussed above, every script
+can implement a set of optional functions:
+
+*trace_begin*, if defined, is called before any event is processed and
+gives scripts a chance to do setup tasks:
+
+----
+def trace_begin:
+ pass
+----
+
+*trace_end*, if defined, is called after all events have been
+ processed and gives scripts a chance to do end-of-script tasks, such
+ as display results:
+
+----
+def trace_end:
+ pass
+----
+
+*trace_unhandled*, if defined, is called after for any event that
+ doesn't have a handler explicitly defined for it. The standard set
+ of common arguments are passed into it:
+
+----
+def trace_unhandled(event_name, context, common_cpu, common_secs,
+ common_nsecs, common_pid, common_comm):
+ pass
+----
+
+The remaining sections provide descriptions of each of the available
+built-in perf script Python modules and their associated functions.
+
+AVAILABLE MODULES AND FUNCTIONS
+-------------------------------
+
+The following sections describe the functions and variables available
+via the various perf script Python modules. To use the functions and
+variables from the given module, add the corresponding 'from XXXX
+import' line to your perf script script.
+
+Core.py Module
+~~~~~~~~~~~~~~
+
+These functions provide some essential functions to user scripts.
+
+The *flag_str* and *symbol_str* functions provide human-readable
+strings for flag and symbolic fields. These correspond to the strings
+and values parsed from the 'print fmt' fields of the event format
+files:
+
+ flag_str(event_name, field_name, field_value) - returns the string represention corresponding to field_value for the flag field field_name of event event_name
+ symbol_str(event_name, field_name, field_value) - returns the string represention corresponding to field_value for the symbolic field field_name of event event_name
+
+The *autodict* function returns a special kind of Python
+dictionary that implements Perl's 'autovivifying' hashes in Python
+i.e. with autovivifying hashes, you can assign nested hash values
+without having to go to the trouble of creating intermediate levels if
+they don't exist.
+
+ autodict() - returns an autovivifying dictionary instance
+
+
+perf_trace_context Module
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Some of the 'common' fields in the event format file aren't all that
+common, but need to be made accessible to user scripts nonetheless.
+
+perf_trace_context defines a set of functions that can be used to
+access this data in the context of the current event. Each of these
+functions expects a context variable, which is the same as the
+context variable passed into every event handler as the second
+argument.
+
+ common_pc(context) - returns common_preempt count for the current event
+ common_flags(context) - returns common_flags for the current event
+ common_lock_depth(context) - returns common_lock_depth for the current event
+
+Util.py Module
+~~~~~~~~~~~~~~
+
+Various utility functions for use with perf script:
+
+ nsecs(secs, nsecs) - returns total nsecs given secs/nsecs pair
+ nsecs_secs(nsecs) - returns whole secs portion given nsecs
+ nsecs_nsecs(nsecs) - returns nsecs remainder given nsecs
+ nsecs_str(nsecs) - returns printable string in the form secs.nsecs
+ avg(total, n) - returns average given a sum and a total number of values
+
+SEE ALSO
+--------
+linkperf:perf-script[1]
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
new file mode 100644
index 00000000000..05f9a0a6784
--- /dev/null
+++ b/tools/perf/Documentation/perf-script.txt
@@ -0,0 +1,221 @@
+perf-script(1)
+=============
+
+NAME
+----
+perf-script - Read perf.data (created by perf record) and display trace output
+
+SYNOPSIS
+--------
+[verse]
+'perf script' [<options>]
+'perf script' [<options>] record <script> [<record-options>] <command>
+'perf script' [<options>] report <script> [script-args]
+'perf script' [<options>] <script> <required-script-args> [<record-options>] <command>
+'perf script' [<options>] <top-script> [script-args]
+
+DESCRIPTION
+-----------
+This command reads the input file and displays the trace recorded.
+
+There are several variants of perf script:
+
+ 'perf script' to see a detailed trace of the workload that was
+ recorded.
+
+ You can also run a set of pre-canned scripts that aggregate and
+ summarize the raw trace data in various ways (the list of scripts is
+ available via 'perf script -l'). The following variants allow you to
+ record and run those scripts:
+
+ 'perf script record <script> <command>' to record the events required
+ for 'perf script report'. <script> is the name displayed in the
+ output of 'perf script --list' i.e. the actual script name minus any
+ language extension. If <command> is not specified, the events are
+ recorded using the -a (system-wide) 'perf record' option.
+
+ 'perf script report <script> [args]' to run and display the results
+ of <script>. <script> is the name displayed in the output of 'perf
+ trace --list' i.e. the actual script name minus any language
+ extension. The perf.data output from a previous run of 'perf script
+ record <script>' is used and should be present for this command to
+ succeed. [args] refers to the (mainly optional) args expected by
+ the script.
+
+ 'perf script <script> <required-script-args> <command>' to both
+ record the events required for <script> and to run the <script>
+ using 'live-mode' i.e. without writing anything to disk. <script>
+ is the name displayed in the output of 'perf script --list' i.e. the
+ actual script name minus any language extension. If <command> is
+ not specified, the events are recorded using the -a (system-wide)
+ 'perf record' option. If <script> has any required args, they
+ should be specified before <command>. This mode doesn't allow for
+ optional script args to be specified; if optional script args are
+ desired, they can be specified using separate 'perf script record'
+ and 'perf script report' commands, with the stdout of the record step
+ piped to the stdin of the report script, using the '-o -' and '-i -'
+ options of the corresponding commands.
+
+ 'perf script <top-script>' to both record the events required for
+ <top-script> and to run the <top-script> using 'live-mode'
+ i.e. without writing anything to disk. <top-script> is the name
+ displayed in the output of 'perf script --list' i.e. the actual
+ script name minus any language extension; a <top-script> is defined
+ as any script name ending with the string 'top'.
+
+ [<record-options>] can be passed to the record steps of 'perf script
+ record' and 'live-mode' variants; this isn't possible however for
+ <top-script> 'live-mode' or 'perf script report' variants.
+
+ See the 'SEE ALSO' section for links to language-specific
+ information on how to write and run your own trace scripts.
+
+OPTIONS
+-------
+<command>...::
+ Any command you can specify in a shell.
+
+-D::
+--dump-raw-script=::
+ Display verbose dump of the trace data.
+
+-L::
+--Latency=::
+ Show latency attributes (irqs/preemption disabled, etc).
+
+-l::
+--list=::
+ Display a list of available trace scripts.
+
+-s ['lang']::
+--script=::
+ Process trace data with the given script ([lang]:script[.ext]).
+ If the string 'lang' is specified in place of a script name, a
+ list of supported languages will be displayed instead.
+
+-g::
+--gen-script=::
+ Generate perf-script.[ext] starter script for given language,
+ using current perf.data.
+
+-a::
+ Force system-wide collection. Scripts run without a <command>
+ normally use -a by default, while scripts run with a <command>
+ normally don't - this option allows the latter to be run in
+ system-wide mode.
+
+-i::
+--input=::
+ Input file name. (default: perf.data unless stdin is a fifo)
+
+-d::
+--debug-mode::
+ Do various checks like samples ordering and lost events.
+
+-f::
+--fields::
+ Comma separated list of fields to print. Options are:
+ comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff, srcline.
+ Field list can be prepended with the type, trace, sw or hw,
+ to indicate to which event type the field list applies.
+ e.g., -f sw:comm,tid,time,ip,sym and -f trace:time,cpu,trace
+
+ perf script -f <fields>
+
+ is equivalent to:
+
+ perf script -f trace:<fields> -f sw:<fields> -f hw:<fields>
+
+ i.e., the specified fields apply to all event types if the type string
+ is not given.
+
+ The arguments are processed in the order received. A later usage can
+ reset a prior request. e.g.:
+
+ -f trace: -f comm,tid,time,ip,sym
+
+ The first -f suppresses trace events (field list is ""), but then the
+ second invocation sets the fields to comm,tid,time,ip,sym. In this case a
+ warning is given to the user:
+
+ "Overriding previous field request for all events."
+
+ Alternativey, consider the order:
+
+ -f comm,tid,time,ip,sym -f trace:
+
+ The first -f sets the fields for all events and the second -f
+ suppresses trace events. The user is given a warning message about
+ the override, and the result of the above is that only S/W and H/W
+ events are displayed with the given fields.
+
+ For the 'wildcard' option if a user selected field is invalid for an
+ event type, a message is displayed to the user that the option is
+ ignored for that type. For example:
+
+ $ perf script -f comm,tid,trace
+ 'trace' not valid for hardware events. Ignoring.
+ 'trace' not valid for software events. Ignoring.
+
+ Alternatively, if the type is given an invalid field is specified it
+ is an error. For example:
+
+ perf script -v -f sw:comm,tid,trace
+ 'trace' not valid for software events.
+
+ At this point usage is displayed, and perf-script exits.
+
+ Finally, a user may not set fields to none for all event types.
+ i.e., -f "" is not allowed.
+
+-k::
+--vmlinux=<file>::
+ vmlinux pathname
+
+--kallsyms=<file>::
+ kallsyms pathname
+
+--symfs=<directory>::
+ Look for files with symbols relative to this directory.
+
+-G::
+--hide-call-graph::
+ When printing symbols do not display call chain.
+
+-C::
+--cpu:: Only report samples for the list of CPUs provided. Multiple CPUs can
+ be provided as a comma-separated list with no space: 0,1. Ranges of
+ CPUs are specified with -: 0-2. Default is to report samples on all
+ CPUs.
+
+-c::
+--comms=::
+ Only display events for these comms. CSV that understands
+ file://filename entries.
+
+-I::
+--show-info::
+ Display extended information about the perf.data file. This adds
+ information which may be very large and thus may clutter the display.
+ It currently includes: cpu and numa topology of the host system.
+ It can only be used with the perf script report mode.
+
+--show-kernel-path::
+ Try to resolve the path of [kernel.kallsyms]
+
+--show-task-events
+ Display task related events (e.g. FORK, COMM, EXIT).
+
+--show-mmap-events
+ Display mmap related events (e.g. MMAP, MMAP2).
+
+--header
+ Show perf.data header.
+
+--header-only
+ Show only perf.data header.
+
+SEE ALSO
+--------
+linkperf:perf-record[1], linkperf:perf-script-perl[1],
+linkperf:perf-script-python[1]
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
new file mode 100644
index 00000000000..29ee857c09c
--- /dev/null
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -0,0 +1,165 @@
+perf-stat(1)
+============
+
+NAME
+----
+perf-stat - Run a command and gather performance counter statistics
+
+SYNOPSIS
+--------
+[verse]
+'perf stat' [-e <EVENT> | --event=EVENT] [-a] <command>
+'perf stat' [-e <EVENT> | --event=EVENT] [-a] -- <command> [<options>]
+
+DESCRIPTION
+-----------
+This command runs a command and gathers performance counter statistics
+from it.
+
+
+OPTIONS
+-------
+<command>...::
+ Any command you can specify in a shell.
+
+
+-e::
+--event=::
+ Select the PMU event. Selection can be a symbolic event name
+ (use 'perf list' to list all events) or a raw PMU
+ event (eventsel+umask) in the form of rNNN where NNN is a
+ hexadecimal event descriptor.
+
+-i::
+--no-inherit::
+ child tasks do not inherit counters
+-p::
+--pid=<pid>::
+ stat events on existing process id (comma separated list)
+
+-t::
+--tid=<tid>::
+ stat events on existing thread id (comma separated list)
+
+
+-a::
+--all-cpus::
+ system-wide collection from all CPUs
+
+-c::
+--scale::
+ scale/normalize counter values
+
+-r::
+--repeat=<n>::
+ repeat command and print average + stddev (max: 100). 0 means forever.
+
+-B::
+--big-num::
+ print large numbers with thousands' separators according to locale
+
+-C::
+--cpu=::
+Count only on the list of CPUs provided. Multiple CPUs can be provided as a
+comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
+In per-thread mode, this option is ignored. The -a option is still necessary
+to activate system-wide monitoring. Default is to count on all CPUs.
+
+-A::
+--no-aggr::
+Do not aggregate counts across all monitored CPUs in system-wide mode (-a).
+This option is only valid in system-wide mode.
+
+-n::
+--null::
+ null run - don't start any counters
+
+-v::
+--verbose::
+ be more verbose (show counter open errors, etc)
+
+-x SEP::
+--field-separator SEP::
+print counts using a CSV-style output to make it easy to import directly into
+spreadsheets. Columns are separated by the string specified in SEP.
+
+-G name::
+--cgroup name::
+monitor only in the container (cgroup) called "name". This option is available only
+in per-cpu mode. The cgroup filesystem must be mounted. All threads belonging to
+container "name" are monitored when they run on the monitored CPUs. Multiple cgroups
+can be provided. Each cgroup is applied to the corresponding event, i.e., first cgroup
+to first event, second cgroup to second event and so on. It is possible to provide
+an empty cgroup (monitor all the time) using, e.g., -G foo,,bar. Cgroups must have
+corresponding events, i.e., they always refer to events defined earlier on the command
+line.
+
+-o file::
+--output file::
+Print the output into the designated file.
+
+--append::
+Append to the output file designated with the -o option. Ignored if -o is not specified.
+
+--log-fd::
+
+Log output to fd, instead of stderr. Complementary to --output, and mutually exclusive
+with it. --append may be used here. Examples:
+ 3>results perf stat --log-fd 3 -- $cmd
+ 3>>results perf stat --log-fd 3 --append -- $cmd
+
+--pre::
+--post::
+ Pre and post measurement hooks, e.g.:
+
+perf stat --repeat 10 --null --sync --pre 'make -s O=defconfig-build/clean' -- make -s -j64 O=defconfig-build/ bzImage
+
+-I msecs::
+--interval-print msecs::
+ Print count deltas every N milliseconds (minimum: 100ms)
+ example: perf stat -I 1000 -e cycles -a sleep 5
+
+--per-socket::
+Aggregate counts per processor socket for system-wide mode measurements. This
+is a useful mode to detect imbalance between sockets. To enable this mode,
+use --per-socket in addition to -a. (system-wide). The output includes the
+socket number and the number of online processors on that socket. This is
+useful to gauge the amount of aggregation.
+
+--per-core::
+Aggregate counts per physical processor for system-wide mode measurements. This
+is a useful mode to detect imbalance between physical cores. To enable this mode,
+use --per-core in addition to -a. (system-wide). The output includes the
+core number and the number of online logical processors on that physical processor.
+
+-D msecs::
+--delay msecs::
+After starting the program, wait msecs before measuring. This is useful to
+filter out the startup phase of the program, which is often very different.
+
+-T::
+--transaction::
+
+Print statistics of transactional execution if supported.
+
+EXAMPLES
+--------
+
+$ perf stat -- make -j
+
+ Performance counter stats for 'make -j':
+
+ 8117.370256 task clock ticks # 11.281 CPU utilization factor
+ 678 context switches # 0.000 M/sec
+ 133 CPU migrations # 0.000 M/sec
+ 235724 pagefaults # 0.029 M/sec
+ 24821162526 CPU cycles # 3057.784 M/sec
+ 18687303457 instructions # 2302.138 M/sec
+ 172158895 cache references # 21.209 M/sec
+ 27075259 cache misses # 3.335 M/sec
+
+ Wall-clock time elapsed: 719.554352 msecs
+
+SEE ALSO
+--------
+linkperf:perf-top[1], linkperf:perf-list[1]
diff --git a/tools/perf/Documentation/perf-test.txt b/tools/perf/Documentation/perf-test.txt
new file mode 100644
index 00000000000..d1d3e5121f8
--- /dev/null
+++ b/tools/perf/Documentation/perf-test.txt
@@ -0,0 +1,32 @@
+perf-test(1)
+============
+
+NAME
+----
+perf-test - Runs sanity tests.
+
+SYNOPSIS
+--------
+[verse]
+'perf test [<options>] [{list <test-name-fragment>|[<test-name-fragments>|<test-numbers>]}]'
+
+DESCRIPTION
+-----------
+This command does assorted sanity tests, initially through linked routines but
+also will look for a directory with more tests in the form of scripts.
+
+To get a list of available tests use 'perf test list', specifying a test name
+fragment will show all tests that have it.
+
+To run just specific tests, inform test name fragments or the numbers obtained
+from 'perf test list'.
+
+OPTIONS
+-------
+-s::
+--skip::
+ Tests to skip (comma separater numeric list).
+
+-v::
+--verbose::
+ Be more verbose.
diff --git a/tools/perf/Documentation/perf-timechart.txt b/tools/perf/Documentation/perf-timechart.txt
new file mode 100644
index 00000000000..5e0f986dff3
--- /dev/null
+++ b/tools/perf/Documentation/perf-timechart.txt
@@ -0,0 +1,92 @@
+perf-timechart(1)
+=================
+
+NAME
+----
+perf-timechart - Tool to visualize total system behavior during a workload
+
+SYNOPSIS
+--------
+[verse]
+'perf timechart' [<timechart options>] {record} [<record options>]
+
+DESCRIPTION
+-----------
+There are two variants of perf timechart:
+
+ 'perf timechart record <command>' to record the system level events
+ of an arbitrary workload.
+
+ 'perf timechart' to turn a trace into a Scalable Vector Graphics file,
+ that can be viewed with popular SVG viewers such as 'Inkscape'.
+
+TIMECHART OPTIONS
+-----------------
+-o::
+--output=::
+ Select the output file (default: output.svg)
+-i::
+--input=::
+ Select the input file (default: perf.data unless stdin is a fifo)
+-w::
+--width=::
+ Select the width of the SVG file (default: 1000)
+-P::
+--power-only::
+ Only output the CPU power section of the diagram
+-T::
+--tasks-only::
+ Don't output processor state transitions
+-p::
+--process::
+ Select the processes to display, by name or PID
+
+--symfs=<directory>::
+ Look for files with symbols relative to this directory.
+-n::
+--proc-num::
+ Print task info for at least given number of tasks.
+-t::
+--topology::
+ Sort CPUs according to topology.
+--highlight=<duration_nsecs|task_name>::
+ Highlight tasks (using different color) that run more than given
+ duration or tasks with given name. If number is given it's interpreted
+ as number of nanoseconds. If non-numeric string is given it's
+ interpreted as task name.
+
+RECORD OPTIONS
+--------------
+-P::
+--power-only::
+ Record only power-related events
+-T::
+--tasks-only::
+ Record only tasks-related events
+-g::
+--callchain::
+ Do call-graph (stack chain/backtrace) recording
+
+EXAMPLES
+--------
+
+$ perf timechart record git pull
+
+ [ perf record: Woken up 13 times to write data ]
+ [ perf record: Captured and wrote 4.253 MB perf.data (~185801 samples) ]
+
+$ perf timechart
+
+ Written 10.2 seconds of trace to output.svg.
+
+Record system-wide timechart:
+
+ $ perf timechart record
+
+ then generate timechart and highlight 'gcc' tasks:
+
+ $ perf timechart --highlight gcc
+
+SEE ALSO
+--------
+linkperf:perf-record[1]
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
new file mode 100644
index 00000000000..180ae02137a
--- /dev/null
+++ b/tools/perf/Documentation/perf-top.txt
@@ -0,0 +1,231 @@
+perf-top(1)
+===========
+
+NAME
+----
+perf-top - System profiling tool.
+
+SYNOPSIS
+--------
+[verse]
+'perf top' [-e <EVENT> | --event=EVENT] [<options>]
+
+DESCRIPTION
+-----------
+This command generates and displays a performance counter profile in real time.
+
+
+OPTIONS
+-------
+-a::
+--all-cpus::
+ System-wide collection. (default)
+
+-c <count>::
+--count=<count>::
+ Event period to sample.
+
+-C <cpu-list>::
+--cpu=<cpu>::
+Monitor only on the list of CPUs provided. Multiple CPUs can be provided as a
+comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
+Default is to monitor all CPUS.
+
+-d <seconds>::
+--delay=<seconds>::
+ Number of seconds to delay between refreshes.
+
+-e <event>::
+--event=<event>::
+ Select the PMU event. Selection can be a symbolic event name
+ (use 'perf list' to list all events) or a raw PMU
+ event (eventsel+umask) in the form of rNNN where NNN is a
+ hexadecimal event descriptor.
+
+-E <entries>::
+--entries=<entries>::
+ Display this many functions.
+
+-f <count>::
+--count-filter=<count>::
+ Only display functions with more events than this.
+
+--group::
+ Put the counters into a counter group.
+
+-F <freq>::
+--freq=<freq>::
+ Profile at this frequency.
+
+-i::
+--inherit::
+ Child tasks do not inherit counters.
+
+-k <path>::
+--vmlinux=<path>::
+ Path to vmlinux. Required for annotation functionality.
+
+-m <pages>::
+--mmap-pages=<pages>::
+ Number of mmap data pages (must be a power of two) or size
+ specification with appended unit character - B/K/M/G. The
+ size is rounded up to have nearest pages power of two value.
+
+-p <pid>::
+--pid=<pid>::
+ Profile events on existing Process ID (comma separated list).
+
+-t <tid>::
+--tid=<tid>::
+ Profile events on existing thread ID (comma separated list).
+
+-u::
+--uid=::
+ Record events in threads owned by uid. Name or number.
+
+-r <priority>::
+--realtime=<priority>::
+ Collect data with this RT SCHED_FIFO priority.
+
+--sym-annotate=<symbol>::
+ Annotate this symbol.
+
+-K::
+--hide_kernel_symbols::
+ Hide kernel symbols.
+
+-U::
+--hide_user_symbols::
+ Hide user symbols.
+
+-D::
+--dump-symtab::
+ Dump the symbol table used for profiling.
+
+-v::
+--verbose::
+ Be more verbose (show counter open errors, etc).
+
+-z::
+--zero::
+ Zero history across display updates.
+
+-s::
+--sort::
+ Sort by key(s): pid, comm, dso, symbol, parent, srcline, weight,
+ local_weight, abort, in_tx, transaction, overhead, sample, period.
+ Please see description of --sort in the perf-report man page.
+
+--fields=::
+ Specify output field - multiple keys can be specified in CSV format.
+ Following fields are available:
+ overhead, overhead_sys, overhead_us, overhead_children, sample and period.
+ Also it can contain any sort key(s).
+
+ By default, every sort keys not specified in --field will be appended
+ automatically.
+
+-n::
+--show-nr-samples::
+ Show a column with the number of samples.
+
+--show-total-period::
+ Show a column with the sum of periods.
+
+--dsos::
+ Only consider symbols in these dsos. This option will affect the
+ percentage of the overhead column. See --percentage for more info.
+
+--comms::
+ Only consider symbols in these comms. This option will affect the
+ percentage of the overhead column. See --percentage for more info.
+
+--symbols::
+ Only consider these symbols. This option will affect the
+ percentage of the overhead column. See --percentage for more info.
+
+-M::
+--disassembler-style=:: Set disassembler style for objdump.
+
+--source::
+ Interleave source code with assembly code. Enabled by default,
+ disable with --no-source.
+
+--asm-raw::
+ Show raw instruction encoding of assembly instructions.
+
+-g::
+ Enables call-graph (stack chain/backtrace) recording.
+
+--call-graph::
+ Setup and enable call-graph (stack chain/backtrace) recording,
+ implies -g.
+
+--children::
+ Accumulate callchain of children to parent entry so that then can
+ show up in the output. The output will have a new "Children" column
+ and will be sorted on the data. It requires -g/--call-graph option
+ enabled.
+
+--max-stack::
+ Set the stack depth limit when parsing the callchain, anything
+ beyond the specified depth will be ignored. This is a trade-off
+ between information loss and faster processing especially for
+ workloads that can have a very long callchain stack.
+
+ Default: 127
+
+--ignore-callees=<regex>::
+ Ignore callees of the function(s) matching the given regex.
+ This has the effect of collecting the callers of each such
+ function into one place in the call-graph tree.
+
+--percent-limit::
+ Do not show entries which have an overhead under that percent.
+ (Default: 0).
+
+--percentage::
+ Determine how to display the overhead percentage of filtered entries.
+ Filters can be applied by --comms, --dsos and/or --symbols options and
+ Zoom operations on the TUI (thread, dso, etc).
+
+ "relative" means it's relative to filtered entries only so that the
+ sum of shown entries will be always 100%. "absolute" means it retains
+ the original value before and after the filter is applied.
+
+INTERACTIVE PROMPTING KEYS
+--------------------------
+
+[d]::
+ Display refresh delay.
+
+[e]::
+ Number of entries to display.
+
+[E]::
+ Event to display when multiple counters are active.
+
+[f]::
+ Profile display filter (>= hit count).
+
+[F]::
+ Annotation display filter (>= % of total).
+
+[s]::
+ Annotate symbol.
+
+[S]::
+ Stop annotation, return to full profile display.
+
+[z]::
+ Toggle event count zeroing across display updates.
+
+[qQ]::
+ Quit.
+
+Pressing any unmapped key displays a menu, and prompts for input.
+
+
+SEE ALSO
+--------
+linkperf:perf-stat[1], linkperf:perf-list[1], linkperf:perf-report[1]
diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
new file mode 100644
index 00000000000..fae38d9a44a
--- /dev/null
+++ b/tools/perf/Documentation/perf-trace.txt
@@ -0,0 +1,112 @@
+perf-trace(1)
+=============
+
+NAME
+----
+perf-trace - strace inspired tool
+
+SYNOPSIS
+--------
+[verse]
+'perf trace'
+'perf trace record'
+
+DESCRIPTION
+-----------
+This command will show the events associated with the target, initially
+syscalls, but other system events like pagefaults, task lifetime events,
+scheduling events, etc.
+
+This is a live mode tool in addition to working with perf.data files like
+the other perf tools. Files can be generated using the 'perf record' command
+but the session needs to include the raw_syscalls events (-e 'raw_syscalls:*').
+Alernatively, the 'perf trace record' can be used as a shortcut to
+automatically include the raw_syscalls events when writing events to a file.
+
+The following options apply to perf trace; options to perf trace record are
+found in the perf record man page.
+
+OPTIONS
+-------
+
+-a::
+--all-cpus::
+ System-wide collection from all CPUs.
+
+-e::
+--expr::
+ List of events to show, currently only syscall names.
+ Prefixing with ! shows all syscalls but the ones specified. You may
+ need to escape it.
+
+-o::
+--output=::
+ Output file name.
+
+-p::
+--pid=::
+ Record events on existing process ID (comma separated list).
+
+-t::
+--tid=::
+ Record events on existing thread ID (comma separated list).
+
+-u::
+--uid=::
+ Record events in threads owned by uid. Name or number.
+
+-v::
+--verbose=::
+ Verbosity level.
+
+-i::
+--no-inherit::
+ Child tasks do not inherit counters.
+
+-m::
+--mmap-pages=::
+ Number of mmap data pages (must be a power of two) or size
+ specification with appended unit character - B/K/M/G. The
+ size is rounded up to have nearest pages power of two value.
+
+-C::
+--cpu::
+Collect samples only on the list of CPUs provided. Multiple CPUs can be provided as a
+comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0-2.
+In per-thread mode with inheritance mode on (default), Events are captured only when
+the thread executes on the designated CPUs. Default is to monitor all CPUs.
+
+--duration:
+ Show only events that had a duration greater than N.M ms.
+
+--sched:
+ Accrue thread runtime and provide a summary at the end of the session.
+
+-i
+--input
+ Process events from a given perf data file.
+
+-T
+--time
+ Print full timestamp rather time relative to first sample.
+
+--comm::
+ Show process COMM right beside its ID, on by default, disable with --no-comm.
+
+-s::
+--summary::
+ Show only a summary of syscalls by thread with min, max, and average times
+ (in msec) and relative stddev.
+
+-S::
+--with-summary::
+ Show all syscalls followed by a summary by thread with min, max, and
+ average times (in msec) and relative stddev.
+
+--tool_stats::
+ Show tool stats such as number of times fd->pathname was discovered thru
+ hooking the open syscall return + vfs_getname or via reading /proc/pid/fd, etc.
+
+SEE ALSO
+--------
+linkperf:perf-record[1], linkperf:perf-script[1]
diff --git a/tools/perf/Documentation/perf.txt b/tools/perf/Documentation/perf.txt
new file mode 100644
index 00000000000..0eeb247dc7d
--- /dev/null
+++ b/tools/perf/Documentation/perf.txt
@@ -0,0 +1,24 @@
+perf(1)
+=======
+
+NAME
+----
+perf - Performance analysis tools for Linux
+
+SYNOPSIS
+--------
+[verse]
+'perf' [--version] [--help] COMMAND [ARGS]
+
+DESCRIPTION
+-----------
+Performance counters for Linux are a new kernel-based subsystem
+that provide a framework for all things performance analysis. It
+covers hardware level (CPU/PMU, Performance Monitoring Unit) features
+and software features (software counters, tracepoints) as well.
+
+SEE ALSO
+--------
+linkperf:perf-stat[1], linkperf:perf-top[1],
+linkperf:perf-record[1], linkperf:perf-report[1],
+linkperf:perf-list[1]
diff --git a/tools/perf/Documentation/perfconfig.example b/tools/perf/Documentation/perfconfig.example
new file mode 100644
index 00000000000..767ea2436e1
--- /dev/null
+++ b/tools/perf/Documentation/perfconfig.example
@@ -0,0 +1,29 @@
+[colors]
+
+ # These were the old defaults
+ top = red, lightgray
+ medium = green, lightgray
+ normal = black, lightgray
+ selected = lightgray, magenta
+ code = blue, lightgray
+ addr = magenta, lightgray
+
+[tui]
+
+ # Defaults if linked with libslang
+ report = on
+ annotate = on
+ top = on
+
+[buildid]
+
+ # Default, disable using /dev/null
+ dir = /root/.debug
+
+[annotate]
+
+ # Defaults
+ hide_src_code = false
+ use_offset = true
+ jump_arrows = true
+ show_nr_jumps = false
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
new file mode 100644
index 00000000000..45da209b6ed
--- /dev/null
+++ b/tools/perf/MANIFEST
@@ -0,0 +1,39 @@
+tools/perf
+tools/scripts
+tools/lib/traceevent
+tools/lib/api
+tools/lib/symbol/kallsyms.c
+tools/lib/symbol/kallsyms.h
+tools/include/asm/bug.h
+tools/include/linux/compiler.h
+tools/include/linux/hash.h
+tools/include/linux/export.h
+tools/include/linux/types.h
+include/linux/const.h
+include/linux/perf_event.h
+include/linux/rbtree.h
+include/linux/list.h
+include/linux/hash.h
+include/linux/stringify.h
+lib/rbtree.c
+include/linux/swab.h
+arch/*/include/asm/unistd*.h
+arch/*/include/asm/perf_regs.h
+arch/*/include/uapi/asm/unistd*.h
+arch/*/include/uapi/asm/perf_regs.h
+arch/*/lib/memcpy*.S
+arch/*/lib/memset*.S
+include/linux/poison.h
+include/linux/magic.h
+include/linux/hw_breakpoint.h
+include/linux/rbtree_augmented.h
+include/uapi/linux/perf_event.h
+include/uapi/linux/const.h
+include/uapi/linux/swab.h
+include/uapi/linux/hw_breakpoint.h
+arch/x86/include/asm/svm.h
+arch/x86/include/asm/vmx.h
+arch/x86/include/asm/kvm_host.h
+arch/x86/include/uapi/asm/svm.h
+arch/x86/include/uapi/asm/vmx.h
+arch/x86/include/uapi/asm/kvm.h
diff --git a/tools/perf/Makefile b/tools/perf/Makefile
new file mode 100644
index 00000000000..cb2e5868c8e
--- /dev/null
+++ b/tools/perf/Makefile
@@ -0,0 +1,90 @@
+#
+# This is a simple wrapper Makefile that calls the main Makefile.perf
+# with a -j option to do parallel builds
+#
+# If you want to invoke the perf build in some non-standard way then
+# you can use the 'make -f Makefile.perf' method to invoke it.
+#
+
+#
+# Clear out the built-in rules GNU make defines by default (such as .o targets),
+# so that we pass through all targets to Makefile.perf:
+#
+.SUFFIXES:
+
+#
+# We don't want to pass along options like -j:
+#
+unexport MAKEFLAGS
+
+#
+# Do a parallel build with multiple jobs, based on the number of CPUs online
+# in this system: 'make -j8' on a 8-CPU system, etc.
+#
+# (To override it, run 'make JOBS=1' and similar.)
+#
+ifeq ($(JOBS),)
+ JOBS := $(shell grep -c ^processor /proc/cpuinfo 2>/dev/null)
+ ifeq ($(JOBS),)
+ JOBS := 1
+ endif
+endif
+
+#
+# Only pass canonical directory names as the output directory:
+#
+ifneq ($(O),)
+ FULL_O := $(shell readlink -f $(O) || echo $(O))
+endif
+
+#
+# Only accept the 'DEBUG' variable from the command line:
+#
+ifeq ("$(origin DEBUG)", "command line")
+ ifeq ($(DEBUG),)
+ override DEBUG = 0
+ else
+ SET_DEBUG = "DEBUG=$(DEBUG)"
+ endif
+else
+ override DEBUG = 0
+endif
+
+define print_msg
+ @printf ' BUILD: Doing '\''make \033[33m-j'$(JOBS)'\033[m'\'' parallel build\n'
+endef
+
+define make
+ @$(MAKE) -f Makefile.perf --no-print-directory -j$(JOBS) O=$(FULL_O) $(SET_DEBUG) $@
+endef
+
+#
+# Needed if no target specified:
+# (Except for tags and TAGS targets. The reason is that the
+# Makefile does not treat tags/TAGS as targets but as files
+# and thus won't rebuilt them once they are in place.)
+#
+all tags TAGS:
+ $(print_msg)
+ $(make)
+
+#
+# The clean target is not really parallel, don't print the jobs info:
+#
+clean:
+ $(make)
+
+#
+# The build-test target is not really parallel, don't print the jobs info:
+#
+build-test:
+ @$(MAKE) -f tests/make --no-print-directory
+
+#
+# All other targets get passed through:
+#
+%:
+ $(print_msg)
+ $(make)
+
+.PHONY: tags TAGS
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
new file mode 100644
index 00000000000..9670a16fa57
--- /dev/null
+++ b/tools/perf/Makefile.perf
@@ -0,0 +1,938 @@
+include ../scripts/Makefile.include
+
+# The default target of this Makefile is...
+all:
+
+include config/utilities.mak
+
+# Define V to have a more verbose compile.
+#
+# Define VF to have a more verbose feature check output.
+#
+# Define O to save output files in a separate directory.
+#
+# Define ARCH as name of target architecture if you want cross-builds.
+#
+# Define CROSS_COMPILE as prefix name of compiler if you want cross-builds.
+#
+# Define NO_LIBPERL to disable perl script extension.
+#
+# Define NO_LIBPYTHON to disable python script extension.
+#
+# Define PYTHON to point to the python binary if the default
+# `python' is not correct; for example: PYTHON=python2
+#
+# Define PYTHON_CONFIG to point to the python-config binary if
+# the default `$(PYTHON)-config' is not correct.
+#
+# Define ASCIIDOC8 if you want to format documentation with AsciiDoc 8
+#
+# Define DOCBOOK_XSL_172 if you want to format man pages with DocBook XSL v1.72.
+#
+# Define LDFLAGS=-static to build a static binary.
+#
+# Define EXTRA_CFLAGS=-m64 or EXTRA_CFLAGS=-m32 as appropriate for cross-builds.
+#
+# Define NO_DWARF if you do not want debug-info analysis feature at all.
+#
+# Define WERROR=0 to disable treating any warnings as errors.
+#
+# Define NO_NEWT if you do not want TUI support. (deprecated)
+#
+# Define NO_SLANG if you do not want TUI support.
+#
+# Define NO_GTK2 if you do not want GTK+ GUI support.
+#
+# Define NO_DEMANGLE if you do not want C++ symbol demangling.
+#
+# Define NO_LIBELF if you do not want libelf dependency (e.g. cross-builds)
+#
+# Define NO_LIBUNWIND if you do not want libunwind dependency for dwarf
+# backtrace post unwind.
+#
+# Define NO_BACKTRACE if you do not want stack backtrace debug feature
+#
+# Define NO_LIBNUMA if you do not want numa perf benchmark
+#
+# Define NO_LIBAUDIT if you do not want libaudit support
+#
+# Define NO_LIBBIONIC if you do not want bionic support
+#
+# Define NO_LIBDW_DWARF_UNWIND if you do not want libdw support
+# for dwarf backtrace post unwind.
+
+ifeq ($(srctree),)
+srctree := $(patsubst %/,%,$(dir $(shell pwd)))
+srctree := $(patsubst %/,%,$(dir $(srctree)))
+#$(info Determined 'srctree' to be $(srctree))
+endif
+
+ifneq ($(objtree),)
+#$(info Determined 'objtree' to be $(objtree))
+endif
+
+ifneq ($(OUTPUT),)
+#$(info Determined 'OUTPUT' to be $(OUTPUT))
+endif
+
+$(OUTPUT)PERF-VERSION-FILE: ../../.git/HEAD
+ @$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT)
+ @touch $(OUTPUT)PERF-VERSION-FILE
+
+CC = $(CROSS_COMPILE)gcc
+AR = $(CROSS_COMPILE)ar
+PKG_CONFIG = $(CROSS_COMPILE)pkg-config
+
+RM = rm -f
+LN = ln -f
+MKDIR = mkdir
+FIND = find
+INSTALL = install
+FLEX = flex
+BISON = bison
+STRIP = strip
+
+LIB_DIR = $(srctree)/tools/lib/api/
+TRACE_EVENT_DIR = $(srctree)/tools/lib/traceevent/
+
+# include config/Makefile by default and rule out
+# non-config cases
+config := 1
+
+NON_CONFIG_TARGETS := clean TAGS tags cscope help
+
+ifdef MAKECMDGOALS
+ifeq ($(filter-out $(NON_CONFIG_TARGETS),$(MAKECMDGOALS)),)
+ config := 0
+endif
+endif
+
+ifeq ($(config),1)
+include config/Makefile
+endif
+
+export prefix bindir sharedir sysconfdir DESTDIR
+
+# sparse is architecture-neutral, which means that we need to tell it
+# explicitly what architecture to check for. Fix this up for yours..
+SPARSE_FLAGS = -D__BIG_ENDIAN__ -D__powerpc__
+
+# Guard against environment variables
+BUILTIN_OBJS =
+LIB_H =
+LIB_OBJS =
+GTK_OBJS =
+PYRF_OBJS =
+SCRIPT_SH =
+
+SCRIPT_SH += perf-archive.sh
+
+grep-libs = $(filter -l%,$(1))
+strip-libs = $(filter-out -l%,$(1))
+
+ifneq ($(OUTPUT),)
+ TE_PATH=$(OUTPUT)
+ifneq ($(subdir),)
+ LIB_PATH=$(OUTPUT)/../lib/api/
+else
+ LIB_PATH=$(OUTPUT)
+endif
+else
+ TE_PATH=$(TRACE_EVENT_DIR)
+ LIB_PATH=$(LIB_DIR)
+endif
+
+LIBTRACEEVENT = $(TE_PATH)libtraceevent.a
+export LIBTRACEEVENT
+
+LIBAPIKFS = $(LIB_PATH)libapikfs.a
+export LIBAPIKFS
+
+# python extension build directories
+PYTHON_EXTBUILD := $(OUTPUT)python_ext_build/
+PYTHON_EXTBUILD_LIB := $(PYTHON_EXTBUILD)lib/
+PYTHON_EXTBUILD_TMP := $(PYTHON_EXTBUILD)tmp/
+export PYTHON_EXTBUILD_LIB PYTHON_EXTBUILD_TMP
+
+python-clean := $(call QUIET_CLEAN, python) $(RM) -r $(PYTHON_EXTBUILD) $(OUTPUT)python/perf.so
+
+PYTHON_EXT_SRCS := $(shell grep -v ^\# util/python-ext-sources)
+PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py $(LIBTRACEEVENT) $(LIBAPIKFS)
+
+$(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS)
+ $(QUIET_GEN)CFLAGS='$(CFLAGS)' $(PYTHON_WORD) util/setup.py \
+ --quiet build_ext; \
+ mkdir -p $(OUTPUT)python && \
+ cp $(PYTHON_EXTBUILD_LIB)perf.so $(OUTPUT)python/
+#
+# No Perl scripts right now:
+#
+
+SCRIPTS = $(patsubst %.sh,%,$(SCRIPT_SH))
+
+#
+# Single 'perf' binary right now:
+#
+PROGRAMS += $(OUTPUT)perf
+
+# what 'all' will build and 'install' will install, in perfexecdir
+ALL_PROGRAMS = $(PROGRAMS) $(SCRIPTS)
+
+# what 'all' will build but not install in perfexecdir
+OTHER_PROGRAMS = $(OUTPUT)perf
+
+# Set paths to tools early so that they can be used for version tests.
+ifndef SHELL_PATH
+ SHELL_PATH = /bin/sh
+endif
+ifndef PERL_PATH
+ PERL_PATH = /usr/bin/perl
+endif
+
+export PERL_PATH
+
+$(OUTPUT)util/parse-events-flex.c: util/parse-events.l $(OUTPUT)util/parse-events-bison.c
+ $(QUIET_FLEX)$(FLEX) -o $@ --header-file=$(OUTPUT)util/parse-events-flex.h $(PARSER_DEBUG_FLEX) util/parse-events.l
+
+$(OUTPUT)util/parse-events-bison.c: util/parse-events.y
+ $(QUIET_BISON)$(BISON) -v util/parse-events.y -d $(PARSER_DEBUG_BISON) -o $(OUTPUT)util/parse-events-bison.c -p parse_events_
+
+$(OUTPUT)util/pmu-flex.c: util/pmu.l $(OUTPUT)util/pmu-bison.c
+ $(QUIET_FLEX)$(FLEX) -o $@ --header-file=$(OUTPUT)util/pmu-flex.h util/pmu.l
+
+$(OUTPUT)util/pmu-bison.c: util/pmu.y
+ $(QUIET_BISON)$(BISON) -v util/pmu.y -d -o $(OUTPUT)util/pmu-bison.c -p perf_pmu_
+
+$(OUTPUT)util/parse-events.o: $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-bison.c
+$(OUTPUT)util/pmu.o: $(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-bison.c
+
+LIB_FILE=$(OUTPUT)libperf.a
+
+LIB_H += ../lib/symbol/kallsyms.h
+LIB_H += ../../include/uapi/linux/perf_event.h
+LIB_H += ../../include/linux/rbtree.h
+LIB_H += ../../include/linux/list.h
+LIB_H += ../../include/uapi/linux/const.h
+LIB_H += ../include/linux/hash.h
+LIB_H += ../../include/linux/stringify.h
+LIB_H += util/include/linux/bitmap.h
+LIB_H += util/include/linux/bitops.h
+LIB_H += ../include/linux/compiler.h
+LIB_H += util/include/linux/const.h
+LIB_H += util/include/linux/ctype.h
+LIB_H += util/include/linux/kernel.h
+LIB_H += util/include/linux/list.h
+LIB_H += ../include/linux/export.h
+LIB_H += util/include/linux/poison.h
+LIB_H += util/include/linux/rbtree.h
+LIB_H += util/include/linux/rbtree_augmented.h
+LIB_H += util/include/linux/string.h
+LIB_H += ../include/linux/types.h
+LIB_H += util/include/linux/linkage.h
+LIB_H += util/include/asm/asm-offsets.h
+LIB_H += ../include/asm/bug.h
+LIB_H += util/include/asm/byteorder.h
+LIB_H += util/include/asm/hweight.h
+LIB_H += util/include/asm/swab.h
+LIB_H += util/include/asm/system.h
+LIB_H += util/include/asm/uaccess.h
+LIB_H += util/include/dwarf-regs.h
+LIB_H += util/include/asm/dwarf2.h
+LIB_H += util/include/asm/cpufeature.h
+LIB_H += util/include/asm/unistd_32.h
+LIB_H += util/include/asm/unistd_64.h
+LIB_H += perf.h
+LIB_H += util/annotate.h
+LIB_H += util/cache.h
+LIB_H += util/callchain.h
+LIB_H += util/build-id.h
+LIB_H += util/debug.h
+LIB_H += util/pmu.h
+LIB_H += util/event.h
+LIB_H += util/evsel.h
+LIB_H += util/evlist.h
+LIB_H += util/exec_cmd.h
+LIB_H += util/levenshtein.h
+LIB_H += util/machine.h
+LIB_H += util/map.h
+LIB_H += util/parse-options.h
+LIB_H += util/parse-events.h
+LIB_H += util/quote.h
+LIB_H += util/util.h
+LIB_H += util/xyarray.h
+LIB_H += util/header.h
+LIB_H += util/help.h
+LIB_H += util/session.h
+LIB_H += util/strbuf.h
+LIB_H += util/strlist.h
+LIB_H += util/strfilter.h
+LIB_H += util/svghelper.h
+LIB_H += util/tool.h
+LIB_H += util/run-command.h
+LIB_H += util/sigchain.h
+LIB_H += util/dso.h
+LIB_H += util/symbol.h
+LIB_H += util/color.h
+LIB_H += util/values.h
+LIB_H += util/sort.h
+LIB_H += util/hist.h
+LIB_H += util/comm.h
+LIB_H += util/thread.h
+LIB_H += util/thread_map.h
+LIB_H += util/trace-event.h
+LIB_H += util/probe-finder.h
+LIB_H += util/dwarf-aux.h
+LIB_H += util/probe-event.h
+LIB_H += util/pstack.h
+LIB_H += util/cpumap.h
+LIB_H += util/top.h
+LIB_H += $(ARCH_INCLUDE)
+LIB_H += util/cgroup.h
+LIB_H += $(LIB_INCLUDE)traceevent/event-parse.h
+LIB_H += util/target.h
+LIB_H += util/rblist.h
+LIB_H += util/intlist.h
+LIB_H += util/perf_regs.h
+LIB_H += util/unwind.h
+LIB_H += util/vdso.h
+LIB_H += ui/helpline.h
+LIB_H += ui/progress.h
+LIB_H += ui/util.h
+LIB_H += ui/ui.h
+LIB_H += util/data.h
+
+LIB_OBJS += $(OUTPUT)util/abspath.o
+LIB_OBJS += $(OUTPUT)util/alias.o
+LIB_OBJS += $(OUTPUT)util/annotate.o
+LIB_OBJS += $(OUTPUT)util/build-id.o
+LIB_OBJS += $(OUTPUT)util/config.o
+LIB_OBJS += $(OUTPUT)util/ctype.o
+LIB_OBJS += $(OUTPUT)util/pmu.o
+LIB_OBJS += $(OUTPUT)util/environment.o
+LIB_OBJS += $(OUTPUT)util/event.o
+LIB_OBJS += $(OUTPUT)util/evlist.o
+LIB_OBJS += $(OUTPUT)util/evsel.o
+LIB_OBJS += $(OUTPUT)util/exec_cmd.o
+LIB_OBJS += $(OUTPUT)util/help.o
+LIB_OBJS += $(OUTPUT)util/kallsyms.o
+LIB_OBJS += $(OUTPUT)util/levenshtein.o
+LIB_OBJS += $(OUTPUT)util/parse-options.o
+LIB_OBJS += $(OUTPUT)util/parse-events.o
+LIB_OBJS += $(OUTPUT)util/path.o
+LIB_OBJS += $(OUTPUT)util/rbtree.o
+LIB_OBJS += $(OUTPUT)util/bitmap.o
+LIB_OBJS += $(OUTPUT)util/hweight.o
+LIB_OBJS += $(OUTPUT)util/run-command.o
+LIB_OBJS += $(OUTPUT)util/quote.o
+LIB_OBJS += $(OUTPUT)util/strbuf.o
+LIB_OBJS += $(OUTPUT)util/string.o
+LIB_OBJS += $(OUTPUT)util/strlist.o
+LIB_OBJS += $(OUTPUT)util/strfilter.o
+LIB_OBJS += $(OUTPUT)util/top.o
+LIB_OBJS += $(OUTPUT)util/usage.o
+LIB_OBJS += $(OUTPUT)util/wrapper.o
+LIB_OBJS += $(OUTPUT)util/sigchain.o
+LIB_OBJS += $(OUTPUT)util/dso.o
+LIB_OBJS += $(OUTPUT)util/symbol.o
+LIB_OBJS += $(OUTPUT)util/symbol-elf.o
+LIB_OBJS += $(OUTPUT)util/color.o
+LIB_OBJS += $(OUTPUT)util/pager.o
+LIB_OBJS += $(OUTPUT)util/header.o
+LIB_OBJS += $(OUTPUT)util/callchain.o
+LIB_OBJS += $(OUTPUT)util/values.o
+LIB_OBJS += $(OUTPUT)util/debug.o
+LIB_OBJS += $(OUTPUT)util/machine.o
+LIB_OBJS += $(OUTPUT)util/map.o
+LIB_OBJS += $(OUTPUT)util/pstack.o
+LIB_OBJS += $(OUTPUT)util/session.o
+LIB_OBJS += $(OUTPUT)util/comm.o
+LIB_OBJS += $(OUTPUT)util/thread.o
+LIB_OBJS += $(OUTPUT)util/thread_map.o
+LIB_OBJS += $(OUTPUT)util/trace-event-parse.o
+LIB_OBJS += $(OUTPUT)util/parse-events-flex.o
+LIB_OBJS += $(OUTPUT)util/parse-events-bison.o
+LIB_OBJS += $(OUTPUT)util/pmu-flex.o
+LIB_OBJS += $(OUTPUT)util/pmu-bison.o
+LIB_OBJS += $(OUTPUT)util/trace-event-read.o
+LIB_OBJS += $(OUTPUT)util/trace-event-info.o
+LIB_OBJS += $(OUTPUT)util/trace-event-scripting.o
+LIB_OBJS += $(OUTPUT)util/trace-event.o
+LIB_OBJS += $(OUTPUT)util/svghelper.o
+LIB_OBJS += $(OUTPUT)util/sort.o
+LIB_OBJS += $(OUTPUT)util/hist.o
+LIB_OBJS += $(OUTPUT)util/probe-event.o
+LIB_OBJS += $(OUTPUT)util/util.o
+LIB_OBJS += $(OUTPUT)util/xyarray.o
+LIB_OBJS += $(OUTPUT)util/cpumap.o
+LIB_OBJS += $(OUTPUT)util/cgroup.o
+LIB_OBJS += $(OUTPUT)util/target.o
+LIB_OBJS += $(OUTPUT)util/rblist.o
+LIB_OBJS += $(OUTPUT)util/intlist.o
+LIB_OBJS += $(OUTPUT)util/vdso.o
+LIB_OBJS += $(OUTPUT)util/stat.o
+LIB_OBJS += $(OUTPUT)util/record.o
+LIB_OBJS += $(OUTPUT)util/srcline.o
+LIB_OBJS += $(OUTPUT)util/data.o
+
+LIB_OBJS += $(OUTPUT)ui/setup.o
+LIB_OBJS += $(OUTPUT)ui/helpline.o
+LIB_OBJS += $(OUTPUT)ui/progress.o
+LIB_OBJS += $(OUTPUT)ui/util.o
+LIB_OBJS += $(OUTPUT)ui/hist.o
+LIB_OBJS += $(OUTPUT)ui/stdio/hist.o
+
+LIB_OBJS += $(OUTPUT)arch/common.o
+
+LIB_OBJS += $(OUTPUT)tests/parse-events.o
+LIB_OBJS += $(OUTPUT)tests/dso-data.o
+LIB_OBJS += $(OUTPUT)tests/attr.o
+LIB_OBJS += $(OUTPUT)tests/vmlinux-kallsyms.o
+LIB_OBJS += $(OUTPUT)tests/open-syscall.o
+LIB_OBJS += $(OUTPUT)tests/open-syscall-all-cpus.o
+LIB_OBJS += $(OUTPUT)tests/open-syscall-tp-fields.o
+LIB_OBJS += $(OUTPUT)tests/mmap-basic.o
+LIB_OBJS += $(OUTPUT)tests/perf-record.o
+LIB_OBJS += $(OUTPUT)tests/rdpmc.o
+LIB_OBJS += $(OUTPUT)tests/evsel-roundtrip-name.o
+LIB_OBJS += $(OUTPUT)tests/evsel-tp-sched.o
+LIB_OBJS += $(OUTPUT)tests/pmu.o
+LIB_OBJS += $(OUTPUT)tests/hists_common.o
+LIB_OBJS += $(OUTPUT)tests/hists_link.o
+LIB_OBJS += $(OUTPUT)tests/hists_filter.o
+LIB_OBJS += $(OUTPUT)tests/hists_output.o
+LIB_OBJS += $(OUTPUT)tests/hists_cumulate.o
+LIB_OBJS += $(OUTPUT)tests/python-use.o
+LIB_OBJS += $(OUTPUT)tests/bp_signal.o
+LIB_OBJS += $(OUTPUT)tests/bp_signal_overflow.o
+LIB_OBJS += $(OUTPUT)tests/task-exit.o
+LIB_OBJS += $(OUTPUT)tests/sw-clock.o
+ifeq ($(ARCH),x86)
+LIB_OBJS += $(OUTPUT)tests/perf-time-to-tsc.o
+endif
+LIB_OBJS += $(OUTPUT)tests/code-reading.o
+LIB_OBJS += $(OUTPUT)tests/sample-parsing.o
+LIB_OBJS += $(OUTPUT)tests/parse-no-sample-id-all.o
+ifndef NO_DWARF_UNWIND
+ifeq ($(ARCH),$(filter $(ARCH),x86 arm))
+LIB_OBJS += $(OUTPUT)tests/dwarf-unwind.o
+endif
+endif
+LIB_OBJS += $(OUTPUT)tests/mmap-thread-lookup.o
+LIB_OBJS += $(OUTPUT)tests/thread-mg-share.o
+
+BUILTIN_OBJS += $(OUTPUT)builtin-annotate.o
+BUILTIN_OBJS += $(OUTPUT)builtin-bench.o
+# Benchmark modules
+BUILTIN_OBJS += $(OUTPUT)bench/sched-messaging.o
+BUILTIN_OBJS += $(OUTPUT)bench/sched-pipe.o
+ifeq ($(RAW_ARCH),x86_64)
+BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy-x86-64-asm.o
+BUILTIN_OBJS += $(OUTPUT)bench/mem-memset-x86-64-asm.o
+endif
+BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o
+BUILTIN_OBJS += $(OUTPUT)bench/mem-memset.o
+BUILTIN_OBJS += $(OUTPUT)bench/futex-hash.o
+BUILTIN_OBJS += $(OUTPUT)bench/futex-wake.o
+BUILTIN_OBJS += $(OUTPUT)bench/futex-requeue.o
+
+BUILTIN_OBJS += $(OUTPUT)builtin-diff.o
+BUILTIN_OBJS += $(OUTPUT)builtin-evlist.o
+BUILTIN_OBJS += $(OUTPUT)builtin-help.o
+BUILTIN_OBJS += $(OUTPUT)builtin-sched.o
+BUILTIN_OBJS += $(OUTPUT)builtin-buildid-list.o
+BUILTIN_OBJS += $(OUTPUT)builtin-buildid-cache.o
+BUILTIN_OBJS += $(OUTPUT)builtin-list.o
+BUILTIN_OBJS += $(OUTPUT)builtin-record.o
+BUILTIN_OBJS += $(OUTPUT)builtin-report.o
+BUILTIN_OBJS += $(OUTPUT)builtin-stat.o
+BUILTIN_OBJS += $(OUTPUT)builtin-timechart.o
+BUILTIN_OBJS += $(OUTPUT)builtin-top.o
+BUILTIN_OBJS += $(OUTPUT)builtin-script.o
+BUILTIN_OBJS += $(OUTPUT)builtin-probe.o
+BUILTIN_OBJS += $(OUTPUT)builtin-kmem.o
+BUILTIN_OBJS += $(OUTPUT)builtin-lock.o
+BUILTIN_OBJS += $(OUTPUT)builtin-kvm.o
+BUILTIN_OBJS += $(OUTPUT)builtin-inject.o
+BUILTIN_OBJS += $(OUTPUT)tests/builtin-test.o
+BUILTIN_OBJS += $(OUTPUT)builtin-mem.o
+
+PERFLIBS = $(LIB_FILE) $(LIBAPIKFS) $(LIBTRACEEVENT)
+
+# We choose to avoid "if .. else if .. else .. endif endif"
+# because maintaining the nesting to match is a pain. If
+# we had "elif" things would have been much nicer...
+
+-include arch/$(ARCH)/Makefile
+
+ifneq ($(OUTPUT),)
+ CFLAGS += -I$(OUTPUT)
+endif
+
+ifdef NO_LIBELF
+EXTLIBS := $(filter-out -lelf,$(EXTLIBS))
+
+# Remove ELF/DWARF dependent codes
+LIB_OBJS := $(filter-out $(OUTPUT)util/symbol-elf.o,$(LIB_OBJS))
+LIB_OBJS := $(filter-out $(OUTPUT)util/dwarf-aux.o,$(LIB_OBJS))
+LIB_OBJS := $(filter-out $(OUTPUT)util/probe-event.o,$(LIB_OBJS))
+LIB_OBJS := $(filter-out $(OUTPUT)util/probe-finder.o,$(LIB_OBJS))
+
+BUILTIN_OBJS := $(filter-out $(OUTPUT)builtin-probe.o,$(BUILTIN_OBJS))
+
+# Use minimal symbol handling
+LIB_OBJS += $(OUTPUT)util/symbol-minimal.o
+
+else # NO_LIBELF
+ifndef NO_DWARF
+ LIB_OBJS += $(OUTPUT)util/probe-finder.o
+ LIB_OBJS += $(OUTPUT)util/dwarf-aux.o
+endif # NO_DWARF
+endif # NO_LIBELF
+
+ifndef NO_LIBDW_DWARF_UNWIND
+ LIB_OBJS += $(OUTPUT)util/unwind-libdw.o
+ LIB_H += util/unwind-libdw.h
+endif
+
+ifndef NO_LIBUNWIND
+ LIB_OBJS += $(OUTPUT)util/unwind-libunwind.o
+endif
+LIB_OBJS += $(OUTPUT)tests/keep-tracking.o
+
+ifndef NO_LIBAUDIT
+ BUILTIN_OBJS += $(OUTPUT)builtin-trace.o
+endif
+
+ifndef NO_SLANG
+ LIB_OBJS += $(OUTPUT)ui/browser.o
+ LIB_OBJS += $(OUTPUT)ui/browsers/annotate.o
+ LIB_OBJS += $(OUTPUT)ui/browsers/hists.o
+ LIB_OBJS += $(OUTPUT)ui/browsers/map.o
+ LIB_OBJS += $(OUTPUT)ui/browsers/scripts.o
+ LIB_OBJS += $(OUTPUT)ui/browsers/header.o
+ LIB_OBJS += $(OUTPUT)ui/tui/setup.o
+ LIB_OBJS += $(OUTPUT)ui/tui/util.o
+ LIB_OBJS += $(OUTPUT)ui/tui/helpline.o
+ LIB_OBJS += $(OUTPUT)ui/tui/progress.o
+ LIB_H += ui/tui/tui.h
+ LIB_H += ui/browser.h
+ LIB_H += ui/browsers/map.h
+ LIB_H += ui/keysyms.h
+ LIB_H += ui/libslang.h
+endif
+
+ifndef NO_GTK2
+ ALL_PROGRAMS += $(OUTPUT)libperf-gtk.so
+
+ GTK_OBJS += $(OUTPUT)ui/gtk/browser.o
+ GTK_OBJS += $(OUTPUT)ui/gtk/hists.o
+ GTK_OBJS += $(OUTPUT)ui/gtk/setup.o
+ GTK_OBJS += $(OUTPUT)ui/gtk/util.o
+ GTK_OBJS += $(OUTPUT)ui/gtk/helpline.o
+ GTK_OBJS += $(OUTPUT)ui/gtk/progress.o
+ GTK_OBJS += $(OUTPUT)ui/gtk/annotate.o
+
+install-gtk: $(OUTPUT)libperf-gtk.so
+ $(call QUIET_INSTALL, 'GTK UI') \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(libdir_SQ)'; \
+ $(INSTALL) $(OUTPUT)libperf-gtk.so '$(DESTDIR_SQ)$(libdir_SQ)'
+endif
+
+ifndef NO_LIBPERL
+ LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-perl.o
+ LIB_OBJS += $(OUTPUT)scripts/perl/Perf-Trace-Util/Context.o
+endif
+
+ifndef NO_LIBPYTHON
+ LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-python.o
+ LIB_OBJS += $(OUTPUT)scripts/python/Perf-Trace-Util/Context.o
+endif
+
+ifeq ($(NO_PERF_REGS),0)
+ ifeq ($(ARCH),x86)
+ LIB_H += arch/x86/include/perf_regs.h
+ endif
+ LIB_OBJS += $(OUTPUT)util/perf_regs.o
+endif
+
+ifndef NO_LIBNUMA
+ BUILTIN_OBJS += $(OUTPUT)bench/numa.o
+endif
+
+ifdef ASCIIDOC8
+ export ASCIIDOC8
+endif
+
+LIBS = -Wl,--whole-archive $(PERFLIBS) -Wl,--no-whole-archive -Wl,--start-group $(EXTLIBS) -Wl,--end-group
+
+export INSTALL SHELL_PATH
+
+### Build rules
+
+SHELL = $(SHELL_PATH)
+
+all: shell_compatibility_test $(ALL_PROGRAMS) $(LANG_BINDINGS) $(OTHER_PROGRAMS)
+
+please_set_SHELL_PATH_to_a_more_modern_shell:
+ @$$(:)
+
+shell_compatibility_test: please_set_SHELL_PATH_to_a_more_modern_shell
+
+strip: $(PROGRAMS) $(OUTPUT)perf
+ $(STRIP) $(STRIP_OPTS) $(PROGRAMS) $(OUTPUT)perf
+
+$(OUTPUT)perf.o: perf.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -include $(OUTPUT)PERF-VERSION-FILE \
+ '-DPERF_HTML_PATH="$(htmldir_SQ)"' \
+ $(CFLAGS) -c $(filter %.c,$^) -o $@
+
+$(OUTPUT)perf: $(OUTPUT)perf.o $(BUILTIN_OBJS) $(PERFLIBS)
+ $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(OUTPUT)perf.o \
+ $(BUILTIN_OBJS) $(LIBS) -o $@
+
+$(GTK_OBJS): $(OUTPUT)%.o: %.c $(LIB_H)
+ $(QUIET_CC)$(CC) -o $@ -c -fPIC $(CFLAGS) $(GTK_CFLAGS) $<
+
+$(OUTPUT)libperf-gtk.so: $(GTK_OBJS) $(PERFLIBS)
+ $(QUIET_LINK)$(CC) -o $@ -shared $(LDFLAGS) $(filter %.o,$^) $(GTK_LIBS)
+
+$(OUTPUT)builtin-help.o: builtin-help.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \
+ '-DPERF_HTML_PATH="$(htmldir_SQ)"' \
+ '-DPERF_MAN_PATH="$(mandir_SQ)"' \
+ '-DPERF_INFO_PATH="$(infodir_SQ)"' $<
+
+$(OUTPUT)builtin-timechart.o: builtin-timechart.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \
+ '-DPERF_HTML_PATH="$(htmldir_SQ)"' \
+ '-DPERF_MAN_PATH="$(mandir_SQ)"' \
+ '-DPERF_INFO_PATH="$(infodir_SQ)"' $<
+
+$(OUTPUT)common-cmds.h: util/generate-cmdlist.sh command-list.txt
+
+$(OUTPUT)common-cmds.h: $(wildcard Documentation/perf-*.txt)
+ $(QUIET_GEN). util/generate-cmdlist.sh > $@+ && mv $@+ $@
+
+$(SCRIPTS) : % : %.sh
+ $(QUIET_GEN)$(INSTALL) '$@.sh' '$(OUTPUT)$@'
+
+# These can record PERF_VERSION
+$(OUTPUT)perf.o perf.spec \
+ $(SCRIPTS) \
+ : $(OUTPUT)PERF-VERSION-FILE
+
+.SUFFIXES:
+
+#
+# If a target does not match any of the later rules then prefix it by $(OUTPUT)
+# This makes targets like 'make O=/tmp/perf perf.o' work in a natural way.
+#
+ifneq ($(OUTPUT),)
+%.o: $(OUTPUT)%.o
+ @echo " # Redirected target $@ => $(OUTPUT)$@"
+util/%.o: $(OUTPUT)util/%.o
+ @echo " # Redirected target $@ => $(OUTPUT)$@"
+bench/%.o: $(OUTPUT)bench/%.o
+ @echo " # Redirected target $@ => $(OUTPUT)$@"
+tests/%.o: $(OUTPUT)tests/%.o
+ @echo " # Redirected target $@ => $(OUTPUT)$@"
+endif
+
+# These two need to be here so that when O= is not used they take precedence
+# over the general rule for .o
+
+$(OUTPUT)util/%-flex.o: $(OUTPUT)util/%-flex.c $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -c -Iutil/ $(CFLAGS) -w $<
+
+$(OUTPUT)util/%-bison.o: $(OUTPUT)util/%-bison.c $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -c -Iutil/ $(CFLAGS) -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w $<
+
+$(OUTPUT)%.o: %.c $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $<
+$(OUTPUT)%.i: %.c $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -E $(CFLAGS) $<
+$(OUTPUT)%.s: %.c $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -S $(CFLAGS) $<
+$(OUTPUT)%.o: %.S
+ $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $<
+$(OUTPUT)%.s: %.S
+ $(QUIET_CC)$(CC) -o $@ -E $(CFLAGS) $<
+
+$(OUTPUT)util/exec_cmd.o: util/exec_cmd.c $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \
+ '-DPERF_EXEC_PATH="$(perfexecdir_SQ)"' \
+ '-DPREFIX="$(prefix_SQ)"' \
+ $<
+
+$(OUTPUT)tests/attr.o: tests/attr.c $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \
+ '-DBINDIR="$(bindir_SQ)"' -DPYTHON='"$(PYTHON_WORD)"' \
+ $<
+
+$(OUTPUT)tests/python-use.o: tests/python-use.c $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \
+ -DPYTHONPATH='"$(OUTPUT)python"' \
+ -DPYTHON='"$(PYTHON_WORD)"' \
+ $<
+
+$(OUTPUT)tests/dwarf-unwind.o: tests/dwarf-unwind.c
+ $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -fno-optimize-sibling-calls $<
+
+$(OUTPUT)util/config.o: util/config.c $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $<
+
+$(OUTPUT)ui/setup.o: ui/setup.c $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DLIBDIR='"$(libdir_SQ)"' $<
+
+$(OUTPUT)ui/browser.o: ui/browser.c $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $<
+
+$(OUTPUT)ui/browsers/annotate.o: ui/browsers/annotate.c $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $<
+
+$(OUTPUT)ui/browsers/hists.o: ui/browsers/hists.c $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $<
+
+$(OUTPUT)ui/browsers/map.o: ui/browsers/map.c $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $<
+
+$(OUTPUT)ui/browsers/scripts.o: ui/browsers/scripts.c $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $<
+
+$(OUTPUT)util/kallsyms.o: ../lib/symbol/kallsyms.c $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $<
+
+$(OUTPUT)util/rbtree.o: ../../lib/rbtree.c $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -Wno-unused-parameter -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $<
+
+$(OUTPUT)util/parse-events.o: util/parse-events.c $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -Wno-redundant-decls $<
+
+$(OUTPUT)util/scripting-engines/trace-event-perl.o: util/scripting-engines/trace-event-perl.c $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-undef -Wno-switch-default $<
+
+$(OUTPUT)scripts/perl/Perf-Trace-Util/Context.o: scripts/perl/Perf-Trace-Util/Context.c $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs -Wno-undef -Wno-switch-default $<
+
+$(OUTPUT)util/scripting-engines/trace-event-python.o: util/scripting-engines/trace-event-python.c $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow $<
+
+$(OUTPUT)scripts/python/Perf-Trace-Util/Context.o: scripts/python/Perf-Trace-Util/Context.c $(OUTPUT)PERF-CFLAGS
+ $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs $<
+
+$(OUTPUT)perf-%: %.o $(PERFLIBS)
+ $(QUIET_LINK)$(CC) $(CFLAGS) -o $@ $(LDFLAGS) $(filter %.o,$^) $(LIBS)
+
+$(LIB_OBJS) $(BUILTIN_OBJS): $(LIB_H)
+$(patsubst perf-%,%.o,$(PROGRAMS)): $(LIB_H) $(wildcard */*.h)
+
+# we compile into subdirectories. if the target directory is not the source directory, they might not exists. So
+# we depend the various files onto their directories.
+DIRECTORY_DEPS = $(LIB_OBJS) $(BUILTIN_OBJS) $(GTK_OBJS)
+DIRECTORY_DEPS += $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h
+# no need to add flex objects, because they depend on bison ones
+DIRECTORY_DEPS += $(OUTPUT)util/parse-events-bison.c
+DIRECTORY_DEPS += $(OUTPUT)util/pmu-bison.c
+
+OUTPUT_DIRECTORIES := $(sort $(dir $(DIRECTORY_DEPS)))
+
+$(DIRECTORY_DEPS): | $(OUTPUT_DIRECTORIES)
+# In the second step, we make a rule to actually create these directories
+$(OUTPUT_DIRECTORIES):
+ $(QUIET_MKDIR)$(MKDIR) -p $@ 2>/dev/null
+
+$(LIB_FILE): $(LIB_OBJS)
+ $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIB_OBJS)
+
+# libtraceevent.a
+TE_SOURCES = $(wildcard $(TRACE_EVENT_DIR)*.[ch])
+
+LIBTRACEEVENT_FLAGS = $(QUIET_SUBDIR1) O=$(OUTPUT)
+LIBTRACEEVENT_FLAGS += CFLAGS="-g -Wall $(EXTRA_CFLAGS)"
+LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ)
+
+$(LIBTRACEEVENT): $(TE_SOURCES) $(OUTPUT)PERF-CFLAGS
+ $(QUIET_SUBDIR0)$(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) libtraceevent.a plugins
+
+$(LIBTRACEEVENT)-clean:
+ $(call QUIET_CLEAN, libtraceevent)
+ @$(MAKE) -C $(TRACE_EVENT_DIR) O=$(OUTPUT) clean >/dev/null
+
+install-traceevent-plugins: $(LIBTRACEEVENT)
+ $(QUIET_SUBDIR0)$(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) install_plugins
+
+LIBAPIKFS_SOURCES = $(wildcard $(LIB_PATH)fs/*.[ch])
+
+# if subdir is set, we've been called from above so target has been built
+# already
+$(LIBAPIKFS): $(LIBAPIKFS_SOURCES)
+ifeq ($(subdir),)
+ $(QUIET_SUBDIR0)$(LIB_DIR) $(QUIET_SUBDIR1) O=$(OUTPUT) libapikfs.a
+endif
+
+$(LIBAPIKFS)-clean:
+ifeq ($(subdir),)
+ $(call QUIET_CLEAN, libapikfs)
+ @$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null
+endif
+
+help:
+ @echo 'Perf make targets:'
+ @echo ' doc - make *all* documentation (see below)'
+ @echo ' man - make manpage documentation (access with man <foo>)'
+ @echo ' html - make html documentation'
+ @echo ' info - make GNU info documentation (access with info <foo>)'
+ @echo ' pdf - make pdf documentation'
+ @echo ' TAGS - use etags to make tag information for source browsing'
+ @echo ' tags - use ctags to make tag information for source browsing'
+ @echo ' cscope - use cscope to make interactive browsing database'
+ @echo ''
+ @echo 'Perf install targets:'
+ @echo ' NOTE: documentation build requires asciidoc, xmlto packages to be installed'
+ @echo ' HINT: use "prefix" or "DESTDIR" to install to a particular'
+ @echo ' path like "make prefix=/usr/local install install-doc"'
+ @echo ' install - install compiled binaries'
+ @echo ' install-doc - install *all* documentation'
+ @echo ' install-man - install manpage documentation'
+ @echo ' install-html - install html documentation'
+ @echo ' install-info - install GNU info documentation'
+ @echo ' install-pdf - install pdf documentation'
+ @echo ''
+ @echo ' quick-install-doc - alias for quick-install-man'
+ @echo ' quick-install-man - install the documentation quickly'
+ @echo ' quick-install-html - install the html documentation quickly'
+ @echo ''
+ @echo 'Perf maintainer targets:'
+ @echo ' clean - clean all binary objects and build output'
+
+
+DOC_TARGETS := doc man html info pdf
+
+INSTALL_DOC_TARGETS := $(patsubst %,install-%,$(DOC_TARGETS)) try-install-man
+INSTALL_DOC_TARGETS += quick-install-doc quick-install-man quick-install-html
+
+# 'make doc' should call 'make -C Documentation all'
+$(DOC_TARGETS):
+ $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) $(@:doc=all)
+
+TAG_FOLDERS= . ../lib/traceevent ../lib/api ../lib/symbol
+TAG_FILES= ../../include/uapi/linux/perf_event.h
+
+TAGS:
+ $(QUIET_GEN)$(RM) TAGS; \
+ $(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print | xargs etags -a $(TAG_FILES)
+
+tags:
+ $(QUIET_GEN)$(RM) tags; \
+ $(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print | xargs ctags -a $(TAG_FILES)
+
+cscope:
+ $(QUIET_GEN)$(RM) cscope*; \
+ $(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print | xargs cscope -b $(TAG_FILES)
+
+### Detect prefix changes
+TRACK_CFLAGS = $(subst ','\'',$(CFLAGS)):\
+ $(bindir_SQ):$(perfexecdir_SQ):$(template_dir_SQ):$(prefix_SQ):$(plugindir_SQ)
+
+$(OUTPUT)PERF-CFLAGS: .FORCE-PERF-CFLAGS
+ @FLAGS='$(TRACK_CFLAGS)'; \
+ if test x"$$FLAGS" != x"`cat $(OUTPUT)PERF-CFLAGS 2>/dev/null`" ; then \
+ echo 1>&2 " FLAGS: * new build flags or prefix"; \
+ echo "$$FLAGS" >$(OUTPUT)PERF-CFLAGS; \
+ fi
+
+### Testing rules
+
+# GNU make supports exporting all variables by "export" without parameters.
+# However, the environment gets quite big, and some programs have problems
+# with that.
+
+check: $(OUTPUT)common-cmds.h
+ if sparse; \
+ then \
+ for i in *.c */*.c; \
+ do \
+ sparse $(CFLAGS) $(SPARSE_FLAGS) $$i || exit; \
+ done; \
+ else \
+ exit 1; \
+ fi
+
+### Installation rules
+
+install-gtk:
+
+install-bin: all install-gtk
+ $(call QUIET_INSTALL, binaries) \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)'; \
+ $(INSTALL) $(OUTPUT)perf '$(DESTDIR_SQ)$(bindir_SQ)'; \
+ $(LN) '$(DESTDIR_SQ)$(bindir_SQ)/perf' '$(DESTDIR_SQ)$(bindir_SQ)/trace'
+ $(call QUIET_INSTALL, libexec) \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
+ $(call QUIET_INSTALL, perf-archive) \
+ $(INSTALL) $(OUTPUT)perf-archive -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)'
+ifndef NO_LIBPERL
+ $(call QUIET_INSTALL, perl-scripts) \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'; \
+ $(INSTALL) scripts/perl/Perf-Trace-Util/lib/Perf/Trace/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/Perf-Trace-Util/lib/Perf/Trace'; \
+ $(INSTALL) scripts/perl/*.pl -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl'; \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin'; \
+ $(INSTALL) scripts/perl/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/perl/bin'
+endif
+ifndef NO_LIBPYTHON
+ $(call QUIET_INSTALL, python-scripts) \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/Perf-Trace-Util/lib/Perf/Trace'; \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin'; \
+ $(INSTALL) scripts/python/Perf-Trace-Util/lib/Perf/Trace/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/Perf-Trace-Util/lib/Perf/Trace'; \
+ $(INSTALL) scripts/python/*.py -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python'; \
+ $(INSTALL) scripts/python/bin/* -t '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/scripts/python/bin'
+endif
+ $(call QUIET_INSTALL, perf_completion-script) \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d'; \
+ $(INSTALL) perf-completion.sh '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d/perf'
+ $(call QUIET_INSTALL, tests) \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'; \
+ $(INSTALL) tests/attr.py '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'; \
+ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'; \
+ $(INSTALL) tests/attr/* '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'
+
+install: install-bin try-install-man install-traceevent-plugins
+
+install-python_ext:
+ $(PYTHON_WORD) util/setup.py --quiet install --root='/$(DESTDIR_SQ)'
+
+# 'make install-doc' should call 'make -C Documentation install'
+$(INSTALL_DOC_TARGETS):
+ $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) $(@:-doc=)
+
+### Cleaning rules
+
+#
+# This is here, not in config/Makefile, because config/Makefile does
+# not get included for the clean target:
+#
+config-clean:
+ $(call QUIET_CLEAN, config)
+ @$(MAKE) -C config/feature-checks clean >/dev/null
+
+clean: $(LIBTRACEEVENT)-clean $(LIBAPIKFS)-clean config-clean
+ $(call QUIET_CLEAN, core-objs) $(RM) $(LIB_OBJS) $(BUILTIN_OBJS) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf.o $(LANG_BINDINGS) $(GTK_OBJS)
+ $(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf
+ $(call QUIET_CLEAN, core-gen) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)PERF-CFLAGS $(OUTPUT)PERF-FEATURES $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex*
+ $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean
+ $(python-clean)
+
+#
+# Trick: if ../../.git does not exist - we are building out of tree for example,
+# then force version regeneration:
+#
+ifeq ($(wildcard ../../.git/HEAD),)
+ GIT-HEAD-PHONY = ../../.git/HEAD
+else
+ GIT-HEAD-PHONY =
+endif
+
+.PHONY: all install clean config-clean strip install-gtk
+.PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell
+.PHONY: $(GIT-HEAD-PHONY) TAGS tags cscope .FORCE-PERF-CFLAGS
+
diff --git a/tools/perf/arch/arm/Makefile b/tools/perf/arch/arm/Makefile
new file mode 100644
index 00000000000..09d62153d38
--- /dev/null
+++ b/tools/perf/arch/arm/Makefile
@@ -0,0 +1,14 @@
+ifndef NO_DWARF
+PERF_HAVE_DWARF_REGS := 1
+LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o
+endif
+ifndef NO_LIBUNWIND
+LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libunwind.o
+endif
+ifndef NO_LIBDW_DWARF_UNWIND
+LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libdw.o
+endif
+ifndef NO_DWARF_UNWIND
+LIB_OBJS += $(OUTPUT)arch/$(ARCH)/tests/regs_load.o
+LIB_OBJS += $(OUTPUT)arch/$(ARCH)/tests/dwarf-unwind.o
+endif
diff --git a/tools/perf/arch/arm/include/perf_regs.h b/tools/perf/arch/arm/include/perf_regs.h
new file mode 100644
index 00000000000..f619c9c5a4b
--- /dev/null
+++ b/tools/perf/arch/arm/include/perf_regs.h
@@ -0,0 +1,59 @@
+#ifndef ARCH_PERF_REGS_H
+#define ARCH_PERF_REGS_H
+
+#include <stdlib.h>
+#include <linux/types.h>
+#include <asm/perf_regs.h>
+
+void perf_regs_load(u64 *regs);
+
+#define PERF_REGS_MASK ((1ULL << PERF_REG_ARM_MAX) - 1)
+#define PERF_REGS_MAX PERF_REG_ARM_MAX
+#define PERF_SAMPLE_REGS_ABI PERF_SAMPLE_REGS_ABI_32
+
+#define PERF_REG_IP PERF_REG_ARM_PC
+#define PERF_REG_SP PERF_REG_ARM_SP
+
+static inline const char *perf_reg_name(int id)
+{
+ switch (id) {
+ case PERF_REG_ARM_R0:
+ return "r0";
+ case PERF_REG_ARM_R1:
+ return "r1";
+ case PERF_REG_ARM_R2:
+ return "r2";
+ case PERF_REG_ARM_R3:
+ return "r3";
+ case PERF_REG_ARM_R4:
+ return "r4";
+ case PERF_REG_ARM_R5:
+ return "r5";
+ case PERF_REG_ARM_R6:
+ return "r6";
+ case PERF_REG_ARM_R7:
+ return "r7";
+ case PERF_REG_ARM_R8:
+ return "r8";
+ case PERF_REG_ARM_R9:
+ return "r9";
+ case PERF_REG_ARM_R10:
+ return "r10";
+ case PERF_REG_ARM_FP:
+ return "fp";
+ case PERF_REG_ARM_IP:
+ return "ip";
+ case PERF_REG_ARM_SP:
+ return "sp";
+ case PERF_REG_ARM_LR:
+ return "lr";
+ case PERF_REG_ARM_PC:
+ return "pc";
+ default:
+ return NULL;
+ }
+
+ return NULL;
+}
+
+#endif /* ARCH_PERF_REGS_H */
diff --git a/tools/perf/arch/arm/tests/dwarf-unwind.c b/tools/perf/arch/arm/tests/dwarf-unwind.c
new file mode 100644
index 00000000000..9f870d27cb3
--- /dev/null
+++ b/tools/perf/arch/arm/tests/dwarf-unwind.c
@@ -0,0 +1,60 @@
+#include <string.h>
+#include "perf_regs.h"
+#include "thread.h"
+#include "map.h"
+#include "event.h"
+#include "tests/tests.h"
+
+#define STACK_SIZE 8192
+
+static int sample_ustack(struct perf_sample *sample,
+ struct thread *thread, u64 *regs)
+{
+ struct stack_dump *stack = &sample->user_stack;
+ struct map *map;
+ unsigned long sp;
+ u64 stack_size, *buf;
+
+ buf = malloc(STACK_SIZE);
+ if (!buf) {
+ pr_debug("failed to allocate sample uregs data\n");
+ return -1;
+ }
+
+ sp = (unsigned long) regs[PERF_REG_ARM_SP];
+
+ map = map_groups__find(thread->mg, MAP__VARIABLE, (u64) sp);
+ if (!map) {
+ pr_debug("failed to get stack map\n");
+ free(buf);
+ return -1;
+ }
+
+ stack_size = map->end - sp;
+ stack_size = stack_size > STACK_SIZE ? STACK_SIZE : stack_size;
+
+ memcpy(buf, (void *) sp, stack_size);
+ stack->data = (char *) buf;
+ stack->size = stack_size;
+ return 0;
+}
+
+int test__arch_unwind_sample(struct perf_sample *sample,
+ struct thread *thread)
+{
+ struct regs_dump *regs = &sample->user_regs;
+ u64 *buf;
+
+ buf = calloc(1, sizeof(u64) * PERF_REGS_MAX);
+ if (!buf) {
+ pr_debug("failed to allocate sample uregs data\n");
+ return -1;
+ }
+
+ perf_regs_load(buf);
+ regs->abi = PERF_SAMPLE_REGS_ABI;
+ regs->regs = buf;
+ regs->mask = PERF_REGS_MASK;
+
+ return sample_ustack(sample, thread, buf);
+}
diff --git a/tools/perf/arch/arm/tests/regs_load.S b/tools/perf/arch/arm/tests/regs_load.S
new file mode 100644
index 00000000000..e09e983946f
--- /dev/null
+++ b/tools/perf/arch/arm/tests/regs_load.S
@@ -0,0 +1,58 @@
+#include <linux/linkage.h>
+
+#define R0 0x00
+#define R1 0x08
+#define R2 0x10
+#define R3 0x18
+#define R4 0x20
+#define R5 0x28
+#define R6 0x30
+#define R7 0x38
+#define R8 0x40
+#define R9 0x48
+#define SL 0x50
+#define FP 0x58
+#define IP 0x60
+#define SP 0x68
+#define LR 0x70
+#define PC 0x78
+
+/*
+ * Implementation of void perf_regs_load(u64 *regs);
+ *
+ * This functions fills in the 'regs' buffer from the actual registers values,
+ * in the way the perf built-in unwinding test expects them:
+ * - the PC at the time at the call to this function. Since this function
+ * is called using a bl instruction, the PC value is taken from LR.
+ * The built-in unwinding test then unwinds the call stack from the dwarf
+ * information in unwind__get_entries.
+ *
+ * Notes:
+ * - the 8 bytes stride in the registers offsets comes from the fact
+ * that the registers are stored in an u64 array (u64 *regs),
+ * - the regs buffer needs to be zeroed before the call to this function,
+ * in this case using a calloc in dwarf-unwind.c.
+ */
+
+.text
+.type perf_regs_load,%function
+ENTRY(perf_regs_load)
+ str r0, [r0, #R0]
+ str r1, [r0, #R1]
+ str r2, [r0, #R2]
+ str r3, [r0, #R3]
+ str r4, [r0, #R4]
+ str r5, [r0, #R5]
+ str r6, [r0, #R6]
+ str r7, [r0, #R7]
+ str r8, [r0, #R8]
+ str r9, [r0, #R9]
+ str sl, [r0, #SL]
+ str fp, [r0, #FP]
+ str ip, [r0, #IP]
+ str sp, [r0, #SP]
+ str lr, [r0, #LR]
+ str lr, [r0, #PC] // store pc as lr in order to skip the call
+ // to this function
+ mov pc, lr
+ENDPROC(perf_regs_load)
diff --git a/tools/perf/arch/arm/util/dwarf-regs.c b/tools/perf/arch/arm/util/dwarf-regs.c
new file mode 100644
index 00000000000..33ec5b339da
--- /dev/null
+++ b/tools/perf/arch/arm/util/dwarf-regs.c
@@ -0,0 +1,64 @@
+/*
+ * Mapping of DWARF debug register numbers into register names.
+ *
+ * Copyright (C) 2010 Will Deacon, ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <stddef.h>
+#include <dwarf-regs.h>
+
+struct pt_regs_dwarfnum {
+ const char *name;
+ unsigned int dwarfnum;
+};
+
+#define STR(s) #s
+#define REG_DWARFNUM_NAME(r, num) {.name = r, .dwarfnum = num}
+#define GPR_DWARFNUM_NAME(num) \
+ {.name = STR(%r##num), .dwarfnum = num}
+#define REG_DWARFNUM_END {.name = NULL, .dwarfnum = 0}
+
+/*
+ * Reference:
+ * http://infocenter.arm.com/help/topic/com.arm.doc.ihi0040a/IHI0040A_aadwarf.pdf
+ */
+static const struct pt_regs_dwarfnum regdwarfnum_table[] = {
+ GPR_DWARFNUM_NAME(0),
+ GPR_DWARFNUM_NAME(1),
+ GPR_DWARFNUM_NAME(2),
+ GPR_DWARFNUM_NAME(3),
+ GPR_DWARFNUM_NAME(4),
+ GPR_DWARFNUM_NAME(5),
+ GPR_DWARFNUM_NAME(6),
+ GPR_DWARFNUM_NAME(7),
+ GPR_DWARFNUM_NAME(8),
+ GPR_DWARFNUM_NAME(9),
+ GPR_DWARFNUM_NAME(10),
+ REG_DWARFNUM_NAME("%fp", 11),
+ REG_DWARFNUM_NAME("%ip", 12),
+ REG_DWARFNUM_NAME("%sp", 13),
+ REG_DWARFNUM_NAME("%lr", 14),
+ REG_DWARFNUM_NAME("%pc", 15),
+ REG_DWARFNUM_END,
+};
+
+/**
+ * get_arch_regstr() - lookup register name from it's DWARF register number
+ * @n: the DWARF register number
+ *
+ * get_arch_regstr() returns the name of the register in struct
+ * regdwarfnum_table from it's DWARF register number. If the register is not
+ * found in the table, this returns NULL;
+ */
+const char *get_arch_regstr(unsigned int n)
+{
+ const struct pt_regs_dwarfnum *roff;
+ for (roff = regdwarfnum_table; roff->name != NULL; roff++)
+ if (roff->dwarfnum == n)
+ return roff->name;
+ return NULL;
+}
diff --git a/tools/perf/arch/arm/util/unwind-libdw.c b/tools/perf/arch/arm/util/unwind-libdw.c
new file mode 100644
index 00000000000..b4176c60117
--- /dev/null
+++ b/tools/perf/arch/arm/util/unwind-libdw.c
@@ -0,0 +1,36 @@
+#include <elfutils/libdwfl.h>
+#include "../../util/unwind-libdw.h"
+#include "../../util/perf_regs.h"
+
+bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg)
+{
+ struct unwind_info *ui = arg;
+ struct regs_dump *user_regs = &ui->sample->user_regs;
+ Dwarf_Word dwarf_regs[PERF_REG_ARM_MAX];
+
+#define REG(r) ({ \
+ Dwarf_Word val = 0; \
+ perf_reg_value(&val, user_regs, PERF_REG_ARM_##r); \
+ val; \
+})
+
+ dwarf_regs[0] = REG(R0);
+ dwarf_regs[1] = REG(R1);
+ dwarf_regs[2] = REG(R2);
+ dwarf_regs[3] = REG(R3);
+ dwarf_regs[4] = REG(R4);
+ dwarf_regs[5] = REG(R5);
+ dwarf_regs[6] = REG(R6);
+ dwarf_regs[7] = REG(R7);
+ dwarf_regs[8] = REG(R8);
+ dwarf_regs[9] = REG(R9);
+ dwarf_regs[10] = REG(R10);
+ dwarf_regs[11] = REG(FP);
+ dwarf_regs[12] = REG(IP);
+ dwarf_regs[13] = REG(SP);
+ dwarf_regs[14] = REG(LR);
+ dwarf_regs[15] = REG(PC);
+
+ return dwfl_thread_state_registers(thread, 0, PERF_REG_ARM_MAX,
+ dwarf_regs);
+}
diff --git a/tools/perf/arch/arm/util/unwind-libunwind.c b/tools/perf/arch/arm/util/unwind-libunwind.c
new file mode 100644
index 00000000000..729ed69a666
--- /dev/null
+++ b/tools/perf/arch/arm/util/unwind-libunwind.c
@@ -0,0 +1,48 @@
+
+#include <errno.h>
+#include <libunwind.h>
+#include "perf_regs.h"
+#include "../../util/unwind.h"
+
+int libunwind__arch_reg_id(int regnum)
+{
+ switch (regnum) {
+ case UNW_ARM_R0:
+ return PERF_REG_ARM_R0;
+ case UNW_ARM_R1:
+ return PERF_REG_ARM_R1;
+ case UNW_ARM_R2:
+ return PERF_REG_ARM_R2;
+ case UNW_ARM_R3:
+ return PERF_REG_ARM_R3;
+ case UNW_ARM_R4:
+ return PERF_REG_ARM_R4;
+ case UNW_ARM_R5:
+ return PERF_REG_ARM_R5;
+ case UNW_ARM_R6:
+ return PERF_REG_ARM_R6;
+ case UNW_ARM_R7:
+ return PERF_REG_ARM_R7;
+ case UNW_ARM_R8:
+ return PERF_REG_ARM_R8;
+ case UNW_ARM_R9:
+ return PERF_REG_ARM_R9;
+ case UNW_ARM_R10:
+ return PERF_REG_ARM_R10;
+ case UNW_ARM_R11:
+ return PERF_REG_ARM_FP;
+ case UNW_ARM_R12:
+ return PERF_REG_ARM_IP;
+ case UNW_ARM_R13:
+ return PERF_REG_ARM_SP;
+ case UNW_ARM_R14:
+ return PERF_REG_ARM_LR;
+ case UNW_ARM_R15:
+ return PERF_REG_ARM_PC;
+ default:
+ pr_err("unwind: invalid reg id %d\n", regnum);
+ return -EINVAL;
+ }
+
+ return -EINVAL;
+}
diff --git a/tools/perf/arch/arm64/Makefile b/tools/perf/arch/arm64/Makefile
new file mode 100644
index 00000000000..67e9b3d38e8
--- /dev/null
+++ b/tools/perf/arch/arm64/Makefile
@@ -0,0 +1,7 @@
+ifndef NO_DWARF
+PERF_HAVE_DWARF_REGS := 1
+LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o
+endif
+ifndef NO_LIBUNWIND
+LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libunwind.o
+endif
diff --git a/tools/perf/arch/arm64/include/perf_regs.h b/tools/perf/arch/arm64/include/perf_regs.h
new file mode 100644
index 00000000000..e9441b9e2a3
--- /dev/null
+++ b/tools/perf/arch/arm64/include/perf_regs.h
@@ -0,0 +1,88 @@
+#ifndef ARCH_PERF_REGS_H
+#define ARCH_PERF_REGS_H
+
+#include <stdlib.h>
+#include <linux/types.h>
+#include <asm/perf_regs.h>
+
+#define PERF_REGS_MASK ((1ULL << PERF_REG_ARM64_MAX) - 1)
+#define PERF_REG_IP PERF_REG_ARM64_PC
+#define PERF_REG_SP PERF_REG_ARM64_SP
+
+static inline const char *perf_reg_name(int id)
+{
+ switch (id) {
+ case PERF_REG_ARM64_X0:
+ return "x0";
+ case PERF_REG_ARM64_X1:
+ return "x1";
+ case PERF_REG_ARM64_X2:
+ return "x2";
+ case PERF_REG_ARM64_X3:
+ return "x3";
+ case PERF_REG_ARM64_X4:
+ return "x4";
+ case PERF_REG_ARM64_X5:
+ return "x5";
+ case PERF_REG_ARM64_X6:
+ return "x6";
+ case PERF_REG_ARM64_X7:
+ return "x7";
+ case PERF_REG_ARM64_X8:
+ return "x8";
+ case PERF_REG_ARM64_X9:
+ return "x9";
+ case PERF_REG_ARM64_X10:
+ return "x10";
+ case PERF_REG_ARM64_X11:
+ return "x11";
+ case PERF_REG_ARM64_X12:
+ return "x12";
+ case PERF_REG_ARM64_X13:
+ return "x13";
+ case PERF_REG_ARM64_X14:
+ return "x14";
+ case PERF_REG_ARM64_X15:
+ return "x15";
+ case PERF_REG_ARM64_X16:
+ return "x16";
+ case PERF_REG_ARM64_X17:
+ return "x17";
+ case PERF_REG_ARM64_X18:
+ return "x18";
+ case PERF_REG_ARM64_X19:
+ return "x19";
+ case PERF_REG_ARM64_X20:
+ return "x20";
+ case PERF_REG_ARM64_X21:
+ return "x21";
+ case PERF_REG_ARM64_X22:
+ return "x22";
+ case PERF_REG_ARM64_X23:
+ return "x23";
+ case PERF_REG_ARM64_X24:
+ return "x24";
+ case PERF_REG_ARM64_X25:
+ return "x25";
+ case PERF_REG_ARM64_X26:
+ return "x26";
+ case PERF_REG_ARM64_X27:
+ return "x27";
+ case PERF_REG_ARM64_X28:
+ return "x28";
+ case PERF_REG_ARM64_X29:
+ return "x29";
+ case PERF_REG_ARM64_SP:
+ return "sp";
+ case PERF_REG_ARM64_LR:
+ return "lr";
+ case PERF_REG_ARM64_PC:
+ return "pc";
+ default:
+ return NULL;
+ }
+
+ return NULL;
+}
+
+#endif /* ARCH_PERF_REGS_H */
diff --git a/tools/perf/arch/arm64/util/dwarf-regs.c b/tools/perf/arch/arm64/util/dwarf-regs.c
new file mode 100644
index 00000000000..d49efeb8172
--- /dev/null
+++ b/tools/perf/arch/arm64/util/dwarf-regs.c
@@ -0,0 +1,80 @@
+/*
+ * Mapping of DWARF debug register numbers into register names.
+ *
+ * Copyright (C) 2010 Will Deacon, ARM Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <stddef.h>
+#include <dwarf-regs.h>
+
+struct pt_regs_dwarfnum {
+ const char *name;
+ unsigned int dwarfnum;
+};
+
+#define STR(s) #s
+#define REG_DWARFNUM_NAME(r, num) {.name = r, .dwarfnum = num}
+#define GPR_DWARFNUM_NAME(num) \
+ {.name = STR(%x##num), .dwarfnum = num}
+#define REG_DWARFNUM_END {.name = NULL, .dwarfnum = 0}
+
+/*
+ * Reference:
+ * http://infocenter.arm.com/help/topic/com.arm.doc.ihi0057b/IHI0057B_aadwarf64.pdf
+ */
+static const struct pt_regs_dwarfnum regdwarfnum_table[] = {
+ GPR_DWARFNUM_NAME(0),
+ GPR_DWARFNUM_NAME(1),
+ GPR_DWARFNUM_NAME(2),
+ GPR_DWARFNUM_NAME(3),
+ GPR_DWARFNUM_NAME(4),
+ GPR_DWARFNUM_NAME(5),
+ GPR_DWARFNUM_NAME(6),
+ GPR_DWARFNUM_NAME(7),
+ GPR_DWARFNUM_NAME(8),
+ GPR_DWARFNUM_NAME(9),
+ GPR_DWARFNUM_NAME(10),
+ GPR_DWARFNUM_NAME(11),
+ GPR_DWARFNUM_NAME(12),
+ GPR_DWARFNUM_NAME(13),
+ GPR_DWARFNUM_NAME(14),
+ GPR_DWARFNUM_NAME(15),
+ GPR_DWARFNUM_NAME(16),
+ GPR_DWARFNUM_NAME(17),
+ GPR_DWARFNUM_NAME(18),
+ GPR_DWARFNUM_NAME(19),
+ GPR_DWARFNUM_NAME(20),
+ GPR_DWARFNUM_NAME(21),
+ GPR_DWARFNUM_NAME(22),
+ GPR_DWARFNUM_NAME(23),
+ GPR_DWARFNUM_NAME(24),
+ GPR_DWARFNUM_NAME(25),
+ GPR_DWARFNUM_NAME(26),
+ GPR_DWARFNUM_NAME(27),
+ GPR_DWARFNUM_NAME(28),
+ GPR_DWARFNUM_NAME(29),
+ REG_DWARFNUM_NAME("%lr", 30),
+ REG_DWARFNUM_NAME("%sp", 31),
+ REG_DWARFNUM_END,
+};
+
+/**
+ * get_arch_regstr() - lookup register name from it's DWARF register number
+ * @n: the DWARF register number
+ *
+ * get_arch_regstr() returns the name of the register in struct
+ * regdwarfnum_table from it's DWARF register number. If the register is not
+ * found in the table, this returns NULL;
+ */
+const char *get_arch_regstr(unsigned int n)
+{
+ const struct pt_regs_dwarfnum *roff;
+ for (roff = regdwarfnum_table; roff->name != NULL; roff++)
+ if (roff->dwarfnum == n)
+ return roff->name;
+ return NULL;
+}
diff --git a/tools/perf/arch/arm64/util/unwind-libunwind.c b/tools/perf/arch/arm64/util/unwind-libunwind.c
new file mode 100644
index 00000000000..436ee43859d
--- /dev/null
+++ b/tools/perf/arch/arm64/util/unwind-libunwind.c
@@ -0,0 +1,82 @@
+
+#include <errno.h>
+#include <libunwind.h>
+#include "perf_regs.h"
+#include "../../util/unwind.h"
+
+int libunwind__arch_reg_id(int regnum)
+{
+ switch (regnum) {
+ case UNW_AARCH64_X0:
+ return PERF_REG_ARM64_X0;
+ case UNW_AARCH64_X1:
+ return PERF_REG_ARM64_X1;
+ case UNW_AARCH64_X2:
+ return PERF_REG_ARM64_X2;
+ case UNW_AARCH64_X3:
+ return PERF_REG_ARM64_X3;
+ case UNW_AARCH64_X4:
+ return PERF_REG_ARM64_X4;
+ case UNW_AARCH64_X5:
+ return PERF_REG_ARM64_X5;
+ case UNW_AARCH64_X6:
+ return PERF_REG_ARM64_X6;
+ case UNW_AARCH64_X7:
+ return PERF_REG_ARM64_X7;
+ case UNW_AARCH64_X8:
+ return PERF_REG_ARM64_X8;
+ case UNW_AARCH64_X9:
+ return PERF_REG_ARM64_X9;
+ case UNW_AARCH64_X10:
+ return PERF_REG_ARM64_X10;
+ case UNW_AARCH64_X11:
+ return PERF_REG_ARM64_X11;
+ case UNW_AARCH64_X12:
+ return PERF_REG_ARM64_X12;
+ case UNW_AARCH64_X13:
+ return PERF_REG_ARM64_X13;
+ case UNW_AARCH64_X14:
+ return PERF_REG_ARM64_X14;
+ case UNW_AARCH64_X15:
+ return PERF_REG_ARM64_X15;
+ case UNW_AARCH64_X16:
+ return PERF_REG_ARM64_X16;
+ case UNW_AARCH64_X17:
+ return PERF_REG_ARM64_X17;
+ case UNW_AARCH64_X18:
+ return PERF_REG_ARM64_X18;
+ case UNW_AARCH64_X19:
+ return PERF_REG_ARM64_X19;
+ case UNW_AARCH64_X20:
+ return PERF_REG_ARM64_X20;
+ case UNW_AARCH64_X21:
+ return PERF_REG_ARM64_X21;
+ case UNW_AARCH64_X22:
+ return PERF_REG_ARM64_X22;
+ case UNW_AARCH64_X23:
+ return PERF_REG_ARM64_X23;
+ case UNW_AARCH64_X24:
+ return PERF_REG_ARM64_X24;
+ case UNW_AARCH64_X25:
+ return PERF_REG_ARM64_X25;
+ case UNW_AARCH64_X26:
+ return PERF_REG_ARM64_X26;
+ case UNW_AARCH64_X27:
+ return PERF_REG_ARM64_X27;
+ case UNW_AARCH64_X28:
+ return PERF_REG_ARM64_X28;
+ case UNW_AARCH64_X29:
+ return PERF_REG_ARM64_X29;
+ case UNW_AARCH64_X30:
+ return PERF_REG_ARM64_LR;
+ case UNW_AARCH64_SP:
+ return PERF_REG_ARM64_SP;
+ case UNW_AARCH64_PC:
+ return PERF_REG_ARM64_PC;
+ default:
+ pr_err("unwind: invalid reg id %d\n", regnum);
+ return -EINVAL;
+ }
+
+ return -EINVAL;
+}
diff --git a/tools/perf/arch/common.c b/tools/perf/arch/common.c
new file mode 100644
index 00000000000..42faf369211
--- /dev/null
+++ b/tools/perf/arch/common.c
@@ -0,0 +1,211 @@
+#include <stdio.h>
+#include <sys/utsname.h>
+#include "common.h"
+#include "../util/debug.h"
+
+const char *const arm_triplets[] = {
+ "arm-eabi-",
+ "arm-linux-androideabi-",
+ "arm-unknown-linux-",
+ "arm-unknown-linux-gnu-",
+ "arm-unknown-linux-gnueabi-",
+ NULL
+};
+
+const char *const powerpc_triplets[] = {
+ "powerpc-unknown-linux-gnu-",
+ "powerpc64-unknown-linux-gnu-",
+ NULL
+};
+
+const char *const s390_triplets[] = {
+ "s390-ibm-linux-",
+ NULL
+};
+
+const char *const sh_triplets[] = {
+ "sh-unknown-linux-gnu-",
+ "sh64-unknown-linux-gnu-",
+ NULL
+};
+
+const char *const sparc_triplets[] = {
+ "sparc-unknown-linux-gnu-",
+ "sparc64-unknown-linux-gnu-",
+ NULL
+};
+
+const char *const x86_triplets[] = {
+ "x86_64-pc-linux-gnu-",
+ "x86_64-unknown-linux-gnu-",
+ "i686-pc-linux-gnu-",
+ "i586-pc-linux-gnu-",
+ "i486-pc-linux-gnu-",
+ "i386-pc-linux-gnu-",
+ "i686-linux-android-",
+ "i686-android-linux-",
+ NULL
+};
+
+const char *const mips_triplets[] = {
+ "mips-unknown-linux-gnu-",
+ "mipsel-linux-android-",
+ NULL
+};
+
+static bool lookup_path(char *name)
+{
+ bool found = false;
+ char *path, *tmp;
+ char buf[PATH_MAX];
+ char *env = getenv("PATH");
+
+ if (!env)
+ return false;
+
+ env = strdup(env);
+ if (!env)
+ return false;
+
+ path = strtok_r(env, ":", &tmp);
+ while (path) {
+ scnprintf(buf, sizeof(buf), "%s/%s", path, name);
+ if (access(buf, F_OK) == 0) {
+ found = true;
+ break;
+ }
+ path = strtok_r(NULL, ":", &tmp);
+ }
+ free(env);
+ return found;
+}
+
+static int lookup_triplets(const char *const *triplets, const char *name)
+{
+ int i;
+ char buf[PATH_MAX];
+
+ for (i = 0; triplets[i] != NULL; i++) {
+ scnprintf(buf, sizeof(buf), "%s%s", triplets[i], name);
+ if (lookup_path(buf))
+ return i;
+ }
+ return -1;
+}
+
+/*
+ * Return architecture name in a normalized form.
+ * The conversion logic comes from the Makefile.
+ */
+static const char *normalize_arch(char *arch)
+{
+ if (!strcmp(arch, "x86_64"))
+ return "x86";
+ if (arch[0] == 'i' && arch[2] == '8' && arch[3] == '6')
+ return "x86";
+ if (!strcmp(arch, "sun4u") || !strncmp(arch, "sparc", 5))
+ return "sparc";
+ if (!strncmp(arch, "arm", 3) || !strcmp(arch, "sa110"))
+ return "arm";
+ if (!strncmp(arch, "s390", 4))
+ return "s390";
+ if (!strncmp(arch, "parisc", 6))
+ return "parisc";
+ if (!strncmp(arch, "powerpc", 7) || !strncmp(arch, "ppc", 3))
+ return "powerpc";
+ if (!strncmp(arch, "mips", 4))
+ return "mips";
+ if (!strncmp(arch, "sh", 2) && isdigit(arch[2]))
+ return "sh";
+
+ return arch;
+}
+
+static int perf_session_env__lookup_binutils_path(struct perf_session_env *env,
+ const char *name,
+ const char **path)
+{
+ int idx;
+ const char *arch, *cross_env;
+ struct utsname uts;
+ const char *const *path_list;
+ char *buf = NULL;
+
+ arch = normalize_arch(env->arch);
+
+ if (uname(&uts) < 0)
+ goto out;
+
+ /*
+ * We don't need to try to find objdump path for native system.
+ * Just use default binutils path (e.g.: "objdump").
+ */
+ if (!strcmp(normalize_arch(uts.machine), arch))
+ goto out;
+
+ cross_env = getenv("CROSS_COMPILE");
+ if (cross_env) {
+ if (asprintf(&buf, "%s%s", cross_env, name) < 0)
+ goto out_error;
+ if (buf[0] == '/') {
+ if (access(buf, F_OK) == 0)
+ goto out;
+ goto out_error;
+ }
+ if (lookup_path(buf))
+ goto out;
+ zfree(&buf);
+ }
+
+ if (!strcmp(arch, "arm"))
+ path_list = arm_triplets;
+ else if (!strcmp(arch, "powerpc"))
+ path_list = powerpc_triplets;
+ else if (!strcmp(arch, "sh"))
+ path_list = sh_triplets;
+ else if (!strcmp(arch, "s390"))
+ path_list = s390_triplets;
+ else if (!strcmp(arch, "sparc"))
+ path_list = sparc_triplets;
+ else if (!strcmp(arch, "x86"))
+ path_list = x86_triplets;
+ else if (!strcmp(arch, "mips"))
+ path_list = mips_triplets;
+ else {
+ ui__error("binutils for %s not supported.\n", arch);
+ goto out_error;
+ }
+
+ idx = lookup_triplets(path_list, name);
+ if (idx < 0) {
+ ui__error("Please install %s for %s.\n"
+ "You can add it to PATH, set CROSS_COMPILE or "
+ "override the default using --%s.\n",
+ name, arch, name);
+ goto out_error;
+ }
+
+ if (asprintf(&buf, "%s%s", path_list[idx], name) < 0)
+ goto out_error;
+
+out:
+ *path = buf;
+ return 0;
+out_error:
+ free(buf);
+ *path = NULL;
+ return -1;
+}
+
+int perf_session_env__lookup_objdump(struct perf_session_env *env)
+{
+ /*
+ * For live mode, env->arch will be NULL and we can use
+ * the native objdump tool.
+ */
+ if (env->arch == NULL)
+ return 0;
+
+ return perf_session_env__lookup_binutils_path(env, "objdump",
+ &objdump_path);
+}
diff --git a/tools/perf/arch/common.h b/tools/perf/arch/common.h
new file mode 100644
index 00000000000..ede246eda9b
--- /dev/null
+++ b/tools/perf/arch/common.h
@@ -0,0 +1,10 @@
+#ifndef ARCH_PERF_COMMON_H
+#define ARCH_PERF_COMMON_H
+
+#include "../util/session.h"
+
+extern const char *objdump_path;
+
+int perf_session_env__lookup_objdump(struct perf_session_env *env);
+
+#endif /* ARCH_PERF_COMMON_H */
diff --git a/tools/perf/arch/powerpc/Makefile b/tools/perf/arch/powerpc/Makefile
new file mode 100644
index 00000000000..744e629797b
--- /dev/null
+++ b/tools/perf/arch/powerpc/Makefile
@@ -0,0 +1,5 @@
+ifndef NO_DWARF
+PERF_HAVE_DWARF_REGS := 1
+LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o
+endif
+LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/header.o
diff --git a/tools/perf/arch/powerpc/util/dwarf-regs.c b/tools/perf/arch/powerpc/util/dwarf-regs.c
new file mode 100644
index 00000000000..733151cdf46
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/dwarf-regs.c
@@ -0,0 +1,88 @@
+/*
+ * Mapping of DWARF debug register numbers into register names.
+ *
+ * Copyright (C) 2010 Ian Munsie, IBM Corporation.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <stddef.h>
+#include <dwarf-regs.h>
+
+
+struct pt_regs_dwarfnum {
+ const char *name;
+ unsigned int dwarfnum;
+};
+
+#define STR(s) #s
+#define REG_DWARFNUM_NAME(r, num) {.name = r, .dwarfnum = num}
+#define GPR_DWARFNUM_NAME(num) \
+ {.name = STR(%gpr##num), .dwarfnum = num}
+#define REG_DWARFNUM_END {.name = NULL, .dwarfnum = 0}
+
+/*
+ * Reference:
+ * http://refspecs.linuxfoundation.org/ELF/ppc64/PPC-elf64abi-1.9.html
+ */
+static const struct pt_regs_dwarfnum regdwarfnum_table[] = {
+ GPR_DWARFNUM_NAME(0),
+ GPR_DWARFNUM_NAME(1),
+ GPR_DWARFNUM_NAME(2),
+ GPR_DWARFNUM_NAME(3),
+ GPR_DWARFNUM_NAME(4),
+ GPR_DWARFNUM_NAME(5),
+ GPR_DWARFNUM_NAME(6),
+ GPR_DWARFNUM_NAME(7),
+ GPR_DWARFNUM_NAME(8),
+ GPR_DWARFNUM_NAME(9),
+ GPR_DWARFNUM_NAME(10),
+ GPR_DWARFNUM_NAME(11),
+ GPR_DWARFNUM_NAME(12),
+ GPR_DWARFNUM_NAME(13),
+ GPR_DWARFNUM_NAME(14),
+ GPR_DWARFNUM_NAME(15),
+ GPR_DWARFNUM_NAME(16),
+ GPR_DWARFNUM_NAME(17),
+ GPR_DWARFNUM_NAME(18),
+ GPR_DWARFNUM_NAME(19),
+ GPR_DWARFNUM_NAME(20),
+ GPR_DWARFNUM_NAME(21),
+ GPR_DWARFNUM_NAME(22),
+ GPR_DWARFNUM_NAME(23),
+ GPR_DWARFNUM_NAME(24),
+ GPR_DWARFNUM_NAME(25),
+ GPR_DWARFNUM_NAME(26),
+ GPR_DWARFNUM_NAME(27),
+ GPR_DWARFNUM_NAME(28),
+ GPR_DWARFNUM_NAME(29),
+ GPR_DWARFNUM_NAME(30),
+ GPR_DWARFNUM_NAME(31),
+ REG_DWARFNUM_NAME("%msr", 66),
+ REG_DWARFNUM_NAME("%ctr", 109),
+ REG_DWARFNUM_NAME("%link", 108),
+ REG_DWARFNUM_NAME("%xer", 101),
+ REG_DWARFNUM_NAME("%dar", 119),
+ REG_DWARFNUM_NAME("%dsisr", 118),
+ REG_DWARFNUM_END,
+};
+
+/**
+ * get_arch_regstr() - lookup register name from it's DWARF register number
+ * @n: the DWARF register number
+ *
+ * get_arch_regstr() returns the name of the register in struct
+ * regdwarfnum_table from it's DWARF register number. If the register is not
+ * found in the table, this returns NULL;
+ */
+const char *get_arch_regstr(unsigned int n)
+{
+ const struct pt_regs_dwarfnum *roff;
+ for (roff = regdwarfnum_table; roff->name != NULL; roff++)
+ if (roff->dwarfnum == n)
+ return roff->name;
+ return NULL;
+}
diff --git a/tools/perf/arch/powerpc/util/header.c b/tools/perf/arch/powerpc/util/header.c
new file mode 100644
index 00000000000..2f7073d107f
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/header.c
@@ -0,0 +1,36 @@
+#include <sys/types.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../../util/header.h"
+
+#define __stringify_1(x) #x
+#define __stringify(x) __stringify_1(x)
+
+#define mfspr(rn) ({unsigned long rval; \
+ asm volatile("mfspr %0," __stringify(rn) \
+ : "=r" (rval)); rval; })
+
+#define SPRN_PVR 0x11F /* Processor Version Register */
+#define PVR_VER(pvr) (((pvr) >> 16) & 0xFFFF) /* Version field */
+#define PVR_REV(pvr) (((pvr) >> 0) & 0xFFFF) /* Revison field */
+
+int
+get_cpuid(char *buffer, size_t sz)
+{
+ unsigned long pvr;
+ int nb;
+
+ pvr = mfspr(SPRN_PVR);
+
+ nb = scnprintf(buffer, sz, "%lu,%lu$", PVR_VER(pvr), PVR_REV(pvr));
+
+ /* look for end marker to ensure the entire data fit */
+ if (strchr(buffer, '$')) {
+ buffer[nb-1] = '\0';
+ return 0;
+ }
+ return -1;
+}
diff --git a/tools/perf/arch/s390/Makefile b/tools/perf/arch/s390/Makefile
new file mode 100644
index 00000000000..15130b50dfe
--- /dev/null
+++ b/tools/perf/arch/s390/Makefile
@@ -0,0 +1,4 @@
+ifndef NO_DWARF
+PERF_HAVE_DWARF_REGS := 1
+LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o
+endif
diff --git a/tools/perf/arch/s390/util/dwarf-regs.c b/tools/perf/arch/s390/util/dwarf-regs.c
new file mode 100644
index 00000000000..0469df02ee6
--- /dev/null
+++ b/tools/perf/arch/s390/util/dwarf-regs.c
@@ -0,0 +1,22 @@
+/*
+ * Mapping of DWARF debug register numbers into register names.
+ *
+ * Copyright IBM Corp. 2010
+ * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>,
+ *
+ */
+
+#include <stddef.h>
+#include <dwarf-regs.h>
+
+#define NUM_GPRS 16
+
+static const char *gpr_names[NUM_GPRS] = {
+ "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
+ "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
+};
+
+const char *get_arch_regstr(unsigned int n)
+{
+ return (n >= NUM_GPRS) ? NULL : gpr_names[n];
+}
diff --git a/tools/perf/arch/sh/Makefile b/tools/perf/arch/sh/Makefile
new file mode 100644
index 00000000000..15130b50dfe
--- /dev/null
+++ b/tools/perf/arch/sh/Makefile
@@ -0,0 +1,4 @@
+ifndef NO_DWARF
+PERF_HAVE_DWARF_REGS := 1
+LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o
+endif
diff --git a/tools/perf/arch/sh/util/dwarf-regs.c b/tools/perf/arch/sh/util/dwarf-regs.c
new file mode 100644
index 00000000000..0d0897f57a1
--- /dev/null
+++ b/tools/perf/arch/sh/util/dwarf-regs.c
@@ -0,0 +1,55 @@
+/*
+ * Mapping of DWARF debug register numbers into register names.
+ *
+ * Copyright (C) 2010 Matt Fleming <matt@console-pimps.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ */
+
+#include <stddef.h>
+#include <dwarf-regs.h>
+
+/*
+ * Generic dwarf analysis helpers
+ */
+
+#define SH_MAX_REGS 18
+const char *sh_regs_table[SH_MAX_REGS] = {
+ "r0",
+ "r1",
+ "r2",
+ "r3",
+ "r4",
+ "r5",
+ "r6",
+ "r7",
+ "r8",
+ "r9",
+ "r10",
+ "r11",
+ "r12",
+ "r13",
+ "r14",
+ "r15",
+ "pc",
+ "pr",
+};
+
+/* Return architecture dependent register string (for kprobe-tracer) */
+const char *get_arch_regstr(unsigned int n)
+{
+ return (n <= SH_MAX_REGS) ? sh_regs_table[n] : NULL;
+}
diff --git a/tools/perf/arch/sparc/Makefile b/tools/perf/arch/sparc/Makefile
new file mode 100644
index 00000000000..15130b50dfe
--- /dev/null
+++ b/tools/perf/arch/sparc/Makefile
@@ -0,0 +1,4 @@
+ifndef NO_DWARF
+PERF_HAVE_DWARF_REGS := 1
+LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o
+endif
diff --git a/tools/perf/arch/sparc/util/dwarf-regs.c b/tools/perf/arch/sparc/util/dwarf-regs.c
new file mode 100644
index 00000000000..92eda412fed
--- /dev/null
+++ b/tools/perf/arch/sparc/util/dwarf-regs.c
@@ -0,0 +1,43 @@
+/*
+ * Mapping of DWARF debug register numbers into register names.
+ *
+ * Copyright (C) 2010 David S. Miller <davem@davemloft.net>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version
+ * 2 of the License, or (at your option) any later version.
+ */
+
+#include <stddef.h>
+#include <dwarf-regs.h>
+
+#define SPARC_MAX_REGS 96
+
+const char *sparc_regs_table[SPARC_MAX_REGS] = {
+ "%g0", "%g1", "%g2", "%g3", "%g4", "%g5", "%g6", "%g7",
+ "%o0", "%o1", "%o2", "%o3", "%o4", "%o5", "%sp", "%o7",
+ "%l0", "%l1", "%l2", "%l3", "%l4", "%l5", "%l6", "%l7",
+ "%i0", "%i1", "%i2", "%i3", "%i4", "%i5", "%fp", "%i7",
+ "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
+ "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
+ "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
+ "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
+ "%f32", "%f33", "%f34", "%f35", "%f36", "%f37", "%f38", "%f39",
+ "%f40", "%f41", "%f42", "%f43", "%f44", "%f45", "%f46", "%f47",
+ "%f48", "%f49", "%f50", "%f51", "%f52", "%f53", "%f54", "%f55",
+ "%f56", "%f57", "%f58", "%f59", "%f60", "%f61", "%f62", "%f63",
+};
+
+/**
+ * get_arch_regstr() - lookup register name from it's DWARF register number
+ * @n: the DWARF register number
+ *
+ * get_arch_regstr() returns the name of the register in struct
+ * regdwarfnum_table from it's DWARF register number. If the register is not
+ * found in the table, this returns NULL;
+ */
+const char *get_arch_regstr(unsigned int n)
+{
+ return (n <= SPARC_MAX_REGS) ? sparc_regs_table[n] : NULL;
+}
diff --git a/tools/perf/arch/x86/Makefile b/tools/perf/arch/x86/Makefile
new file mode 100644
index 00000000000..1641542e363
--- /dev/null
+++ b/tools/perf/arch/x86/Makefile
@@ -0,0 +1,17 @@
+ifndef NO_DWARF
+PERF_HAVE_DWARF_REGS := 1
+LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o
+endif
+ifndef NO_LIBUNWIND
+LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libunwind.o
+endif
+ifndef NO_LIBDW_DWARF_UNWIND
+LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libdw.o
+endif
+ifndef NO_DWARF_UNWIND
+LIB_OBJS += $(OUTPUT)arch/$(ARCH)/tests/regs_load.o
+LIB_OBJS += $(OUTPUT)arch/$(ARCH)/tests/dwarf-unwind.o
+endif
+LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/header.o
+LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/tsc.o
+LIB_H += arch/$(ARCH)/util/tsc.h
diff --git a/tools/perf/arch/x86/include/perf_regs.h b/tools/perf/arch/x86/include/perf_regs.h
new file mode 100644
index 00000000000..7df517acfef
--- /dev/null
+++ b/tools/perf/arch/x86/include/perf_regs.h
@@ -0,0 +1,86 @@
+#ifndef ARCH_PERF_REGS_H
+#define ARCH_PERF_REGS_H
+
+#include <stdlib.h>
+#include <linux/types.h>
+#include <asm/perf_regs.h>
+
+void perf_regs_load(u64 *regs);
+
+#ifndef HAVE_ARCH_X86_64_SUPPORT
+#define PERF_REGS_MASK ((1ULL << PERF_REG_X86_32_MAX) - 1)
+#define PERF_REGS_MAX PERF_REG_X86_32_MAX
+#define PERF_SAMPLE_REGS_ABI PERF_SAMPLE_REGS_ABI_32
+#else
+#define REG_NOSUPPORT ((1ULL << PERF_REG_X86_DS) | \
+ (1ULL << PERF_REG_X86_ES) | \
+ (1ULL << PERF_REG_X86_FS) | \
+ (1ULL << PERF_REG_X86_GS))
+#define PERF_REGS_MASK (((1ULL << PERF_REG_X86_64_MAX) - 1) & ~REG_NOSUPPORT)
+#define PERF_REGS_MAX PERF_REG_X86_64_MAX
+#define PERF_SAMPLE_REGS_ABI PERF_SAMPLE_REGS_ABI_64
+#endif
+#define PERF_REG_IP PERF_REG_X86_IP
+#define PERF_REG_SP PERF_REG_X86_SP
+
+static inline const char *perf_reg_name(int id)
+{
+ switch (id) {
+ case PERF_REG_X86_AX:
+ return "AX";
+ case PERF_REG_X86_BX:
+ return "BX";
+ case PERF_REG_X86_CX:
+ return "CX";
+ case PERF_REG_X86_DX:
+ return "DX";
+ case PERF_REG_X86_SI:
+ return "SI";
+ case PERF_REG_X86_DI:
+ return "DI";
+ case PERF_REG_X86_BP:
+ return "BP";
+ case PERF_REG_X86_SP:
+ return "SP";
+ case PERF_REG_X86_IP:
+ return "IP";
+ case PERF_REG_X86_FLAGS:
+ return "FLAGS";
+ case PERF_REG_X86_CS:
+ return "CS";
+ case PERF_REG_X86_SS:
+ return "SS";
+ case PERF_REG_X86_DS:
+ return "DS";
+ case PERF_REG_X86_ES:
+ return "ES";
+ case PERF_REG_X86_FS:
+ return "FS";
+ case PERF_REG_X86_GS:
+ return "GS";
+#ifdef HAVE_ARCH_X86_64_SUPPORT
+ case PERF_REG_X86_R8:
+ return "R8";
+ case PERF_REG_X86_R9:
+ return "R9";
+ case PERF_REG_X86_R10:
+ return "R10";
+ case PERF_REG_X86_R11:
+ return "R11";
+ case PERF_REG_X86_R12:
+ return "R12";
+ case PERF_REG_X86_R13:
+ return "R13";
+ case PERF_REG_X86_R14:
+ return "R14";
+ case PERF_REG_X86_R15:
+ return "R15";
+#endif /* HAVE_ARCH_X86_64_SUPPORT */
+ default:
+ return NULL;
+ }
+
+ return NULL;
+}
+
+#endif /* ARCH_PERF_REGS_H */
diff --git a/tools/perf/arch/x86/tests/dwarf-unwind.c b/tools/perf/arch/x86/tests/dwarf-unwind.c
new file mode 100644
index 00000000000..9f89f899ccc
--- /dev/null
+++ b/tools/perf/arch/x86/tests/dwarf-unwind.c
@@ -0,0 +1,60 @@
+#include <string.h>
+#include "perf_regs.h"
+#include "thread.h"
+#include "map.h"
+#include "event.h"
+#include "tests/tests.h"
+
+#define STACK_SIZE 8192
+
+static int sample_ustack(struct perf_sample *sample,
+ struct thread *thread, u64 *regs)
+{
+ struct stack_dump *stack = &sample->user_stack;
+ struct map *map;
+ unsigned long sp;
+ u64 stack_size, *buf;
+
+ buf = malloc(STACK_SIZE);
+ if (!buf) {
+ pr_debug("failed to allocate sample uregs data\n");
+ return -1;
+ }
+
+ sp = (unsigned long) regs[PERF_REG_X86_SP];
+
+ map = map_groups__find(thread->mg, MAP__VARIABLE, (u64) sp);
+ if (!map) {
+ pr_debug("failed to get stack map\n");
+ free(buf);
+ return -1;
+ }
+
+ stack_size = map->end - sp;
+ stack_size = stack_size > STACK_SIZE ? STACK_SIZE : stack_size;
+
+ memcpy(buf, (void *) sp, stack_size);
+ stack->data = (char *) buf;
+ stack->size = stack_size;
+ return 0;
+}
+
+int test__arch_unwind_sample(struct perf_sample *sample,
+ struct thread *thread)
+{
+ struct regs_dump *regs = &sample->user_regs;
+ u64 *buf;
+
+ buf = malloc(sizeof(u64) * PERF_REGS_MAX);
+ if (!buf) {
+ pr_debug("failed to allocate sample uregs data\n");
+ return -1;
+ }
+
+ perf_regs_load(buf);
+ regs->abi = PERF_SAMPLE_REGS_ABI;
+ regs->regs = buf;
+ regs->mask = PERF_REGS_MASK;
+
+ return sample_ustack(sample, thread, buf);
+}
diff --git a/tools/perf/arch/x86/tests/regs_load.S b/tools/perf/arch/x86/tests/regs_load.S
new file mode 100644
index 00000000000..60875d5c556
--- /dev/null
+++ b/tools/perf/arch/x86/tests/regs_load.S
@@ -0,0 +1,98 @@
+#include <linux/linkage.h>
+
+#define AX 0
+#define BX 1 * 8
+#define CX 2 * 8
+#define DX 3 * 8
+#define SI 4 * 8
+#define DI 5 * 8
+#define BP 6 * 8
+#define SP 7 * 8
+#define IP 8 * 8
+#define FLAGS 9 * 8
+#define CS 10 * 8
+#define SS 11 * 8
+#define DS 12 * 8
+#define ES 13 * 8
+#define FS 14 * 8
+#define GS 15 * 8
+#define R8 16 * 8
+#define R9 17 * 8
+#define R10 18 * 8
+#define R11 19 * 8
+#define R12 20 * 8
+#define R13 21 * 8
+#define R14 22 * 8
+#define R15 23 * 8
+
+.text
+#ifdef HAVE_ARCH_X86_64_SUPPORT
+ENTRY(perf_regs_load)
+ movq %rax, AX(%rdi)
+ movq %rbx, BX(%rdi)
+ movq %rcx, CX(%rdi)
+ movq %rdx, DX(%rdi)
+ movq %rsi, SI(%rdi)
+ movq %rdi, DI(%rdi)
+ movq %rbp, BP(%rdi)
+
+ leaq 8(%rsp), %rax /* exclude this call. */
+ movq %rax, SP(%rdi)
+
+ movq 0(%rsp), %rax
+ movq %rax, IP(%rdi)
+
+ movq $0, FLAGS(%rdi)
+ movq $0, CS(%rdi)
+ movq $0, SS(%rdi)
+ movq $0, DS(%rdi)
+ movq $0, ES(%rdi)
+ movq $0, FS(%rdi)
+ movq $0, GS(%rdi)
+
+ movq %r8, R8(%rdi)
+ movq %r9, R9(%rdi)
+ movq %r10, R10(%rdi)
+ movq %r11, R11(%rdi)
+ movq %r12, R12(%rdi)
+ movq %r13, R13(%rdi)
+ movq %r14, R14(%rdi)
+ movq %r15, R15(%rdi)
+ ret
+ENDPROC(perf_regs_load)
+#else
+ENTRY(perf_regs_load)
+ push %edi
+ movl 8(%esp), %edi
+ movl %eax, AX(%edi)
+ movl %ebx, BX(%edi)
+ movl %ecx, CX(%edi)
+ movl %edx, DX(%edi)
+ movl %esi, SI(%edi)
+ pop %eax
+ movl %eax, DI(%edi)
+ movl %ebp, BP(%edi)
+
+ leal 4(%esp), %eax /* exclude this call. */
+ movl %eax, SP(%edi)
+
+ movl 0(%esp), %eax
+ movl %eax, IP(%edi)
+
+ movl $0, FLAGS(%edi)
+ movl $0, CS(%edi)
+ movl $0, SS(%edi)
+ movl $0, DS(%edi)
+ movl $0, ES(%edi)
+ movl $0, FS(%edi)
+ movl $0, GS(%edi)
+ ret
+ENDPROC(perf_regs_load)
+#endif
+
+/*
+ * We need to provide note.GNU-stack section, saying that we want
+ * NOT executable stack. Otherwise the final linking will assume that
+ * the ELF stack should not be restricted at all and set it RWX.
+ */
+.section .note.GNU-stack,"",@progbits
diff --git a/tools/perf/arch/x86/util/dwarf-regs.c b/tools/perf/arch/x86/util/dwarf-regs.c
new file mode 100644
index 00000000000..be22dd46323
--- /dev/null
+++ b/tools/perf/arch/x86/util/dwarf-regs.c
@@ -0,0 +1,75 @@
+/*
+ * dwarf-regs.c : Mapping of DWARF debug register numbers into register names.
+ * Extracted from probe-finder.c
+ *
+ * Written by Masami Hiramatsu <mhiramat@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ */
+
+#include <stddef.h>
+#include <dwarf-regs.h>
+
+/*
+ * Generic dwarf analysis helpers
+ */
+
+#define X86_32_MAX_REGS 8
+const char *x86_32_regs_table[X86_32_MAX_REGS] = {
+ "%ax",
+ "%cx",
+ "%dx",
+ "%bx",
+ "$stack", /* Stack address instead of %sp */
+ "%bp",
+ "%si",
+ "%di",
+};
+
+#define X86_64_MAX_REGS 16
+const char *x86_64_regs_table[X86_64_MAX_REGS] = {
+ "%ax",
+ "%dx",
+ "%cx",
+ "%bx",
+ "%si",
+ "%di",
+ "%bp",
+ "%sp",
+ "%r8",
+ "%r9",
+ "%r10",
+ "%r11",
+ "%r12",
+ "%r13",
+ "%r14",
+ "%r15",
+};
+
+/* TODO: switching by dwarf address size */
+#ifdef __x86_64__
+#define ARCH_MAX_REGS X86_64_MAX_REGS
+#define arch_regs_table x86_64_regs_table
+#else
+#define ARCH_MAX_REGS X86_32_MAX_REGS
+#define arch_regs_table x86_32_regs_table
+#endif
+
+/* Return architecture dependent register string (for kprobe-tracer) */
+const char *get_arch_regstr(unsigned int n)
+{
+ return (n <= ARCH_MAX_REGS) ? arch_regs_table[n] : NULL;
+}
diff --git a/tools/perf/arch/x86/util/header.c b/tools/perf/arch/x86/util/header.c
new file mode 100644
index 00000000000..146d12a1cec
--- /dev/null
+++ b/tools/perf/arch/x86/util/header.c
@@ -0,0 +1,59 @@
+#include <sys/types.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "../../util/header.h"
+
+static inline void
+cpuid(unsigned int op, unsigned int *a, unsigned int *b, unsigned int *c,
+ unsigned int *d)
+{
+ __asm__ __volatile__ (".byte 0x53\n\tcpuid\n\t"
+ "movl %%ebx, %%esi\n\t.byte 0x5b"
+ : "=a" (*a),
+ "=S" (*b),
+ "=c" (*c),
+ "=d" (*d)
+ : "a" (op));
+}
+
+int
+get_cpuid(char *buffer, size_t sz)
+{
+ unsigned int a, b, c, d, lvl;
+ int family = -1, model = -1, step = -1;
+ int nb;
+ char vendor[16];
+
+ cpuid(0, &lvl, &b, &c, &d);
+ strncpy(&vendor[0], (char *)(&b), 4);
+ strncpy(&vendor[4], (char *)(&d), 4);
+ strncpy(&vendor[8], (char *)(&c), 4);
+ vendor[12] = '\0';
+
+ if (lvl >= 1) {
+ cpuid(1, &a, &b, &c, &d);
+
+ family = (a >> 8) & 0xf; /* bits 11 - 8 */
+ model = (a >> 4) & 0xf; /* Bits 7 - 4 */
+ step = a & 0xf;
+
+ /* extended family */
+ if (family == 0xf)
+ family += (a >> 20) & 0xff;
+
+ /* extended model */
+ if (family >= 0x6)
+ model += ((a >> 16) & 0xf) << 4;
+ }
+ nb = scnprintf(buffer, sz, "%s,%u,%u,%u$", vendor, family, model, step);
+
+ /* look for end marker to ensure the entire data fit */
+ if (strchr(buffer, '$')) {
+ buffer[nb-1] = '\0';
+ return 0;
+ }
+ return -1;
+}
diff --git a/tools/perf/arch/x86/util/tsc.c b/tools/perf/arch/x86/util/tsc.c
new file mode 100644
index 00000000000..40021fa3129
--- /dev/null
+++ b/tools/perf/arch/x86/util/tsc.c
@@ -0,0 +1,59 @@
+#include <stdbool.h>
+#include <errno.h>
+
+#include <linux/perf_event.h>
+
+#include "../../perf.h"
+#include <linux/types.h>
+#include "../../util/debug.h"
+#include "tsc.h"
+
+u64 perf_time_to_tsc(u64 ns, struct perf_tsc_conversion *tc)
+{
+ u64 t, quot, rem;
+
+ t = ns - tc->time_zero;
+ quot = t / tc->time_mult;
+ rem = t % tc->time_mult;
+ return (quot << tc->time_shift) +
+ (rem << tc->time_shift) / tc->time_mult;
+}
+
+u64 tsc_to_perf_time(u64 cyc, struct perf_tsc_conversion *tc)
+{
+ u64 quot, rem;
+
+ quot = cyc >> tc->time_shift;
+ rem = cyc & ((1 << tc->time_shift) - 1);
+ return tc->time_zero + quot * tc->time_mult +
+ ((rem * tc->time_mult) >> tc->time_shift);
+}
+
+int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc,
+ struct perf_tsc_conversion *tc)
+{
+ bool cap_user_time_zero;
+ u32 seq;
+ int i = 0;
+
+ while (1) {
+ seq = pc->lock;
+ rmb();
+ tc->time_mult = pc->time_mult;
+ tc->time_shift = pc->time_shift;
+ tc->time_zero = pc->time_zero;
+ cap_user_time_zero = pc->cap_user_time_zero;
+ rmb();
+ if (pc->lock == seq && !(seq & 1))
+ break;
+ if (++i > 10000) {
+ pr_debug("failed to get perf_event_mmap_page lock\n");
+ return -EINVAL;
+ }
+ }
+
+ if (!cap_user_time_zero)
+ return -EOPNOTSUPP;
+
+ return 0;
+}
diff --git a/tools/perf/arch/x86/util/tsc.h b/tools/perf/arch/x86/util/tsc.h
new file mode 100644
index 00000000000..2affe0366b5
--- /dev/null
+++ b/tools/perf/arch/x86/util/tsc.h
@@ -0,0 +1,20 @@
+#ifndef TOOLS_PERF_ARCH_X86_UTIL_TSC_H__
+#define TOOLS_PERF_ARCH_X86_UTIL_TSC_H__
+
+#include <linux/types.h>
+
+struct perf_tsc_conversion {
+ u16 time_shift;
+ u32 time_mult;
+ u64 time_zero;
+};
+
+struct perf_event_mmap_page;
+
+int perf_read_tsc_conversion(const struct perf_event_mmap_page *pc,
+ struct perf_tsc_conversion *tc);
+
+u64 perf_time_to_tsc(u64 ns, struct perf_tsc_conversion *tc);
+u64 tsc_to_perf_time(u64 cyc, struct perf_tsc_conversion *tc);
+
+#endif /* TOOLS_PERF_ARCH_X86_UTIL_TSC_H__ */
diff --git a/tools/perf/arch/x86/util/unwind-libdw.c b/tools/perf/arch/x86/util/unwind-libdw.c
new file mode 100644
index 00000000000..c4b72176ca8
--- /dev/null
+++ b/tools/perf/arch/x86/util/unwind-libdw.c
@@ -0,0 +1,51 @@
+#include <elfutils/libdwfl.h>
+#include "../../util/unwind-libdw.h"
+#include "../../util/perf_regs.h"
+
+bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg)
+{
+ struct unwind_info *ui = arg;
+ struct regs_dump *user_regs = &ui->sample->user_regs;
+ Dwarf_Word dwarf_regs[17];
+ unsigned nregs;
+
+#define REG(r) ({ \
+ Dwarf_Word val = 0; \
+ perf_reg_value(&val, user_regs, PERF_REG_X86_##r); \
+ val; \
+})
+
+ if (user_regs->abi == PERF_SAMPLE_REGS_ABI_32) {
+ dwarf_regs[0] = REG(AX);
+ dwarf_regs[1] = REG(CX);
+ dwarf_regs[2] = REG(DX);
+ dwarf_regs[3] = REG(BX);
+ dwarf_regs[4] = REG(SP);
+ dwarf_regs[5] = REG(BP);
+ dwarf_regs[6] = REG(SI);
+ dwarf_regs[7] = REG(DI);
+ dwarf_regs[8] = REG(IP);
+ nregs = 9;
+ } else {
+ dwarf_regs[0] = REG(AX);
+ dwarf_regs[1] = REG(DX);
+ dwarf_regs[2] = REG(CX);
+ dwarf_regs[3] = REG(BX);
+ dwarf_regs[4] = REG(SI);
+ dwarf_regs[5] = REG(DI);
+ dwarf_regs[6] = REG(BP);
+ dwarf_regs[7] = REG(SP);
+ dwarf_regs[8] = REG(R8);
+ dwarf_regs[9] = REG(R9);
+ dwarf_regs[10] = REG(R10);
+ dwarf_regs[11] = REG(R11);
+ dwarf_regs[12] = REG(R12);
+ dwarf_regs[13] = REG(R13);
+ dwarf_regs[14] = REG(R14);
+ dwarf_regs[15] = REG(R15);
+ dwarf_regs[16] = REG(IP);
+ nregs = 17;
+ }
+
+ return dwfl_thread_state_registers(thread, 0, nregs, dwarf_regs);
+}
diff --git a/tools/perf/arch/x86/util/unwind-libunwind.c b/tools/perf/arch/x86/util/unwind-libunwind.c
new file mode 100644
index 00000000000..3261f68c6a7
--- /dev/null
+++ b/tools/perf/arch/x86/util/unwind-libunwind.c
@@ -0,0 +1,111 @@
+
+#include <errno.h>
+#include <libunwind.h>
+#include "perf_regs.h"
+#include "../../util/unwind.h"
+
+#ifdef HAVE_ARCH_X86_64_SUPPORT
+int libunwind__arch_reg_id(int regnum)
+{
+ int id;
+
+ switch (regnum) {
+ case UNW_X86_64_RAX:
+ id = PERF_REG_X86_AX;
+ break;
+ case UNW_X86_64_RDX:
+ id = PERF_REG_X86_DX;
+ break;
+ case UNW_X86_64_RCX:
+ id = PERF_REG_X86_CX;
+ break;
+ case UNW_X86_64_RBX:
+ id = PERF_REG_X86_BX;
+ break;
+ case UNW_X86_64_RSI:
+ id = PERF_REG_X86_SI;
+ break;
+ case UNW_X86_64_RDI:
+ id = PERF_REG_X86_DI;
+ break;
+ case UNW_X86_64_RBP:
+ id = PERF_REG_X86_BP;
+ break;
+ case UNW_X86_64_RSP:
+ id = PERF_REG_X86_SP;
+ break;
+ case UNW_X86_64_R8:
+ id = PERF_REG_X86_R8;
+ break;
+ case UNW_X86_64_R9:
+ id = PERF_REG_X86_R9;
+ break;
+ case UNW_X86_64_R10:
+ id = PERF_REG_X86_R10;
+ break;
+ case UNW_X86_64_R11:
+ id = PERF_REG_X86_R11;
+ break;
+ case UNW_X86_64_R12:
+ id = PERF_REG_X86_R12;
+ break;
+ case UNW_X86_64_R13:
+ id = PERF_REG_X86_R13;
+ break;
+ case UNW_X86_64_R14:
+ id = PERF_REG_X86_R14;
+ break;
+ case UNW_X86_64_R15:
+ id = PERF_REG_X86_R15;
+ break;
+ case UNW_X86_64_RIP:
+ id = PERF_REG_X86_IP;
+ break;
+ default:
+ pr_err("unwind: invalid reg id %d\n", regnum);
+ return -EINVAL;
+ }
+
+ return id;
+}
+#else
+int libunwind__arch_reg_id(int regnum)
+{
+ int id;
+
+ switch (regnum) {
+ case UNW_X86_EAX:
+ id = PERF_REG_X86_AX;
+ break;
+ case UNW_X86_EDX:
+ id = PERF_REG_X86_DX;
+ break;
+ case UNW_X86_ECX:
+ id = PERF_REG_X86_CX;
+ break;
+ case UNW_X86_EBX:
+ id = PERF_REG_X86_BX;
+ break;
+ case UNW_X86_ESI:
+ id = PERF_REG_X86_SI;
+ break;
+ case UNW_X86_EDI:
+ id = PERF_REG_X86_DI;
+ break;
+ case UNW_X86_EBP:
+ id = PERF_REG_X86_BP;
+ break;
+ case UNW_X86_ESP:
+ id = PERF_REG_X86_SP;
+ break;
+ case UNW_X86_EIP:
+ id = PERF_REG_X86_IP;
+ break;
+ default:
+ pr_err("unwind: invalid reg id %d\n", regnum);
+ return -EINVAL;
+ }
+
+ return id;
+}
+#endif /* HAVE_ARCH_X86_64_SUPPORT */
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
new file mode 100644