aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2011-07-21 09:32:40 +0200
committerIngo Molnar <mingo@elte.hu>2011-07-21 09:32:40 +0200
commit40bcea7bbe8fe452a2d272e2ffd3dea281eec9ff (patch)
treeaedb6d02e53e3cf84cc32fd81db84032cee205e1
parent492f73a303b488ffd67097b2351d54aa6e6c7c73 (diff)
parent14a8fd7ceea6915c613746203d6e9a2bf273f16c (diff)
Merge branch 'tip/perf/core' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-2.6-trace into perf/core
-rw-r--r--Documentation/trace/kprobetrace.txt9
-rw-r--r--arch/x86/include/asm/perf_event_p4.h33
-rw-r--r--arch/x86/kernel/cpu/perf_event.c7
-rw-r--r--arch/x86/kernel/cpu/perf_event_p4.c135
-rw-r--r--include/linux/ftrace_event.h1
-rw-r--r--kernel/kprobes.c33
-rw-r--r--kernel/trace/ftrace.c95
-rw-r--r--kernel/trace/trace.c92
-rw-r--r--kernel/trace/trace.h50
-rw-r--r--kernel/trace/trace_entries.h3
-rw-r--r--kernel/trace/trace_functions_graph.c2
-rw-r--r--kernel/trace/trace_kprobe.c318
-rw-r--r--kernel/trace/trace_output.c11
-rw-r--r--kernel/watchdog.c2
-rw-r--r--tools/perf/Documentation/perf-probe.txt6
-rw-r--r--tools/perf/Makefile2
-rw-r--r--tools/perf/builtin-probe.c3
-rw-r--r--tools/perf/util/dwarf-aux.c663
-rw-r--r--tools/perf/util/dwarf-aux.h100
-rw-r--r--tools/perf/util/probe-event.c165
-rw-r--r--tools/perf/util/probe-event.h1
-rw-r--r--tools/perf/util/probe-finder.c752
-rw-r--r--tools/perf/util/probe-finder.h43
-rw-r--r--tools/perf/util/string.c19
-rw-r--r--tools/perf/util/trace-event-info.c120
-rw-r--r--tools/perf/util/util.h1
26 files changed, 1663 insertions, 1003 deletions
diff --git a/Documentation/trace/kprobetrace.txt b/Documentation/trace/kprobetrace.txt
index c83bd6b4e6e..d0d0bb9e3e2 100644
--- a/Documentation/trace/kprobetrace.txt
+++ b/Documentation/trace/kprobetrace.txt
@@ -22,14 +22,15 @@ current_tracer. Instead of that, add probe points via
Synopsis of kprobe_events
-------------------------
- p[:[GRP/]EVENT] SYMBOL[+offs]|MEMADDR [FETCHARGS] : Set a probe
- r[:[GRP/]EVENT] SYMBOL[+0] [FETCHARGS] : Set a return probe
+ p[:[GRP/]EVENT] [MOD:]SYM[+offs]|MEMADDR [FETCHARGS] : Set a probe
+ r[:[GRP/]EVENT] [MOD:]SYM[+0] [FETCHARGS] : Set a return probe
-:[GRP/]EVENT : Clear a probe
GRP : Group name. If omitted, use "kprobes" for it.
EVENT : Event name. If omitted, the event name is generated
- based on SYMBOL+offs or MEMADDR.
- SYMBOL[+offs] : Symbol+offset where the probe is inserted.
+ based on SYM+offs or MEMADDR.
+ MOD : Module name which has given SYM.
+ SYM[+offs] : Symbol+offset where the probe is inserted.
MEMADDR : Address where the probe is inserted.
FETCHARGS : Arguments. Each probe can have up to 128 args.
diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h
index 56fd9e3abbd..4d86c86178f 100644
--- a/arch/x86/include/asm/perf_event_p4.h
+++ b/arch/x86/include/asm/perf_event_p4.h
@@ -102,6 +102,14 @@
#define P4_CONFIG_HT (1ULL << P4_CONFIG_HT_SHIFT)
/*
+ * If an event has alias it should be marked
+ * with a special bit. (Don't forget to check
+ * P4_PEBS_CONFIG_MASK and related bits on
+ * modification.)
+ */
+#define P4_CONFIG_ALIASABLE (1 << 9)
+
+/*
* The bits we allow to pass for RAW events
*/
#define P4_CONFIG_MASK_ESCR \
@@ -123,6 +131,31 @@
(p4_config_pack_escr(P4_CONFIG_MASK_ESCR)) | \
(p4_config_pack_cccr(P4_CONFIG_MASK_CCCR))
+/*
+ * In case of event aliasing we need to preserve some
+ * caller bits otherwise the mapping won't be complete.
+ */
+#define P4_CONFIG_EVENT_ALIAS_MASK \
+ (p4_config_pack_escr(P4_CONFIG_MASK_ESCR) | \
+ p4_config_pack_cccr(P4_CCCR_EDGE | \
+ P4_CCCR_THRESHOLD_MASK | \
+ P4_CCCR_COMPLEMENT | \
+ P4_CCCR_COMPARE))
+
+#define P4_CONFIG_EVENT_ALIAS_IMMUTABLE_BITS \
+ ((P4_CONFIG_HT) | \
+ p4_config_pack_escr(P4_ESCR_T0_OS | \
+ P4_ESCR_T0_USR | \
+ P4_ESCR_T1_OS | \
+ P4_ESCR_T1_USR) | \
+ p4_config_pack_cccr(P4_CCCR_OVF | \
+ P4_CCCR_CASCADE | \
+ P4_CCCR_FORCE_OVF | \
+ P4_CCCR_THREAD_ANY | \
+ P4_CCCR_OVF_PMI_T0 | \
+ P4_CCCR_OVF_PMI_T1 | \
+ P4_CONFIG_ALIASABLE))
+
static inline bool p4_is_event_cascaded(u64 config)
{
u32 cccr = p4_config_unpack_cccr(config);
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index c53d433c3dd..b7a010fce8c 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -274,7 +274,6 @@ struct x86_pmu {
void (*enable_all)(int added);
void (*enable)(struct perf_event *);
void (*disable)(struct perf_event *);
- void (*hw_watchdog_set_attr)(struct perf_event_attr *attr);
int (*hw_config)(struct perf_event *event);
int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
unsigned eventsel;
@@ -360,12 +359,6 @@ static u64 __read_mostly hw_cache_extra_regs
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX];
-void hw_nmi_watchdog_set_attr(struct perf_event_attr *wd_attr)
-{
- if (x86_pmu.hw_watchdog_set_attr)
- x86_pmu.hw_watchdog_set_attr(wd_attr);
-}
-
/*
* Propagate event elapsed time into the generic event.
* Can only be executed on the CPU where the event is active.
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index fb901c5080f..8b7a0c30678 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -570,11 +570,92 @@ static __initconst const u64 p4_hw_cache_event_ids
},
};
+/*
+ * Because of Netburst being quite restricted in now
+ * many same events can run simultaneously, we use
+ * event aliases, ie different events which have the
+ * same functionallity but use non-intersected resources
+ * (ESCR/CCCR/couter registers). This allow us to run
+ * two or more semi-same events together. It is done
+ * transparently to a user space.
+ *
+ * Never set any cusom internal bits such as P4_CONFIG_HT,
+ * P4_CONFIG_ALIASABLE or bits for P4_PEBS_METRIC, they are
+ * either up-to-dated automatically either not appliable
+ * at all.
+ *
+ * And be really carefull choosing aliases!
+ */
+struct p4_event_alias {
+ u64 orig;
+ u64 alter;
+} p4_event_aliases[] = {
+ {
+ /*
+ * Non-halted cycles can be substituted with
+ * non-sleeping cycles (see Intel SDM Vol3b for
+ * details).
+ */
+ .orig =
+ p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING)),
+ .alter =
+ p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_EXECUTION_EVENT) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS0)|
+ P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS1)|
+ P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS2)|
+ P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS3)|
+ P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS0) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS1) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS2) |
+ P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS3))|
+ p4_config_pack_cccr(P4_CCCR_THRESHOLD(15) | P4_CCCR_COMPLEMENT |
+ P4_CCCR_COMPARE),
+ },
+};
+
+static u64 p4_get_alias_event(u64 config)
+{
+ u64 config_match;
+ int i;
+
+ /*
+ * Probably we're lucky and don't have to do
+ * matching over all config bits.
+ */
+ if (!(config & P4_CONFIG_ALIASABLE))
+ return 0;
+
+ config_match = config & P4_CONFIG_EVENT_ALIAS_MASK;
+
+ /*
+ * If an event was previously swapped to the alter config
+ * we should swap it back otherwise contnention on registers
+ * will return back.
+ */
+ for (i = 0; i < ARRAY_SIZE(p4_event_aliases); i++) {
+ if (config_match == p4_event_aliases[i].orig) {
+ config_match = p4_event_aliases[i].alter;
+ break;
+ } else if (config_match == p4_event_aliases[i].alter) {
+ config_match = p4_event_aliases[i].orig;
+ break;
+ }
+ }
+
+ if (i >= ARRAY_SIZE(p4_event_aliases))
+ return 0;
+
+ return config_match |
+ (config & P4_CONFIG_EVENT_ALIAS_IMMUTABLE_BITS);
+}
+
static u64 p4_general_events[PERF_COUNT_HW_MAX] = {
/* non-halted CPU clocks */
[PERF_COUNT_HW_CPU_CYCLES] =
p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS) |
- P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING)),
+ P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING)) |
+ P4_CONFIG_ALIASABLE,
/*
* retired instructions
@@ -719,31 +800,6 @@ static int p4_validate_raw_event(struct perf_event *event)
return 0;
}
-static void p4_hw_watchdog_set_attr(struct perf_event_attr *wd_attr)
-{
- /*
- * Watchdog ticks are special on Netburst, we use
- * that named "non-sleeping" ticks as recommended
- * by Intel SDM Vol3b.
- */
- WARN_ON_ONCE(wd_attr->type != PERF_TYPE_HARDWARE ||
- wd_attr->config != PERF_COUNT_HW_CPU_CYCLES);
-
- wd_attr->type = PERF_TYPE_RAW;
- wd_attr->config =
- p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_EXECUTION_EVENT) |
- P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS0) |
- P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS1) |
- P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS2) |
- P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, NBOGUS3) |
- P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS0) |
- P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS1) |
- P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS2) |
- P4_ESCR_EMASK_BIT(P4_EVENT_EXECUTION_EVENT, BOGUS3)) |
- p4_config_pack_cccr(P4_CCCR_THRESHOLD(15) | P4_CCCR_COMPLEMENT |
- P4_CCCR_COMPARE);
-}
-
static int p4_hw_config(struct perf_event *event)
{
int cpu = get_cpu();
@@ -1159,6 +1215,8 @@ static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign
struct p4_event_bind *bind;
unsigned int i, thread, num;
int cntr_idx, escr_idx;
+ u64 config_alias;
+ int pass;
bitmap_zero(used_mask, X86_PMC_IDX_MAX);
bitmap_zero(escr_mask, P4_ESCR_MSR_TABLE_SIZE);
@@ -1167,6 +1225,17 @@ static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign
hwc = &cpuc->event_list[i]->hw;
thread = p4_ht_thread(cpu);
+ pass = 0;
+
+again:
+ /*
+ * Aliases are swappable so we may hit circular
+ * lock if both original config and alias need
+ * resources (MSR registers) which already busy.
+ */
+ if (pass > 2)
+ goto done;
+
bind = p4_config_get_bind(hwc->config);
escr_idx = p4_get_escr_idx(bind->escr_msr[thread]);
if (unlikely(escr_idx == -1))
@@ -1180,8 +1249,17 @@ static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign
}
cntr_idx = p4_next_cntr(thread, used_mask, bind);
- if (cntr_idx == -1 || test_bit(escr_idx, escr_mask))
- goto done;
+ if (cntr_idx == -1 || test_bit(escr_idx, escr_mask)) {
+ /*
+ * Probably an event alias is still available.
+ */
+ config_alias = p4_get_alias_event(hwc->config);
+ if (!config_alias)
+ goto done;
+ hwc->config = config_alias;
+ pass++;
+ goto again;
+ }
p4_pmu_swap_config_ts(hwc, cpu);
if (assign)
@@ -1218,7 +1296,6 @@ static __initconst const struct x86_pmu p4_pmu = {
.cntval_bits = ARCH_P4_CNTRVAL_BITS,
.cntval_mask = ARCH_P4_CNTRVAL_MASK,
.max_period = (1ULL << (ARCH_P4_CNTRVAL_BITS - 1)) - 1,
- .hw_watchdog_set_attr = p4_hw_watchdog_set_attr,
.hw_config = p4_hw_config,
.schedule_events = p4_pmu_schedule_events,
/*
diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index b1e69eefc20..96efa6794ea 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -76,6 +76,7 @@ struct trace_iterator {
struct trace_entry *ent;
unsigned long lost_events;
int leftover;
+ int ent_size;
int cpu;
u64 ts;
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 77981813a1e..b30fd54eb98 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1255,19 +1255,29 @@ static int __kprobes in_kprobes_functions(unsigned long addr)
/*
* If we have a symbol_name argument, look it up and add the offset field
* to it. This way, we can specify a relative address to a symbol.
+ * This returns encoded errors if it fails to look up symbol or invalid
+ * combination of parameters.
*/
static kprobe_opcode_t __kprobes *kprobe_addr(struct kprobe *p)
{
kprobe_opcode_t *addr = p->addr;
+
+ if ((p->symbol_name && p->addr) ||
+ (!p->symbol_name && !p->addr))
+ goto invalid;
+
if (p->symbol_name) {
- if (addr)
- return NULL;
kprobe_lookup_name(p->symbol_name, addr);
+ if (!addr)
+ return ERR_PTR(-ENOENT);
}
- if (!addr)
- return NULL;
- return (kprobe_opcode_t *)(((char *)addr) + p->offset);
+ addr = (kprobe_opcode_t *)(((char *)addr) + p->offset);
+ if (addr)
+ return addr;
+
+invalid:
+ return ERR_PTR(-EINVAL);
}
/* Check passed kprobe is valid and return kprobe in kprobe_table. */
@@ -1311,8 +1321,8 @@ int __kprobes register_kprobe(struct kprobe *p)
kprobe_opcode_t *addr;
addr = kprobe_addr(p);
- if (!addr)
- return -EINVAL;
+ if (IS_ERR(addr))
+ return PTR_ERR(addr);
p->addr = addr;
ret = check_kprobe_rereg(p);
@@ -1335,6 +1345,8 @@ int __kprobes register_kprobe(struct kprobe *p)
*/
probed_mod = __module_text_address((unsigned long) p->addr);
if (probed_mod) {
+ /* Return -ENOENT if fail. */
+ ret = -ENOENT;
/*
* We must hold a refcount of the probed module while updating
* its code to prohibit unexpected unloading.
@@ -1351,6 +1363,7 @@ int __kprobes register_kprobe(struct kprobe *p)
module_put(probed_mod);
goto fail_with_jump_label;
}
+ /* ret will be updated by following code */
}
preempt_enable();
jump_label_unlock();
@@ -1399,7 +1412,7 @@ out:
fail_with_jump_label:
preempt_enable();
jump_label_unlock();
- return -EINVAL;
+ return ret;
}
EXPORT_SYMBOL_GPL(register_kprobe);
@@ -1686,8 +1699,8 @@ int __kprobes register_kretprobe(struct kretprobe *rp)
if (kretprobe_blacklist_size) {
addr = kprobe_addr(&rp->kp);
- if (!addr)
- return -EINVAL;
+ if (IS_ERR(addr))
+ return PTR_ERR(addr);
for (i = 0; kretprobe_blacklist[i].name != NULL; i++) {
if (kretprobe_blacklist[i].addr == addr)
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index a0e246e2cee..c3e4575e782 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -88,6 +88,7 @@ static struct ftrace_ops ftrace_list_end __read_mostly = {
static struct ftrace_ops *ftrace_global_list __read_mostly = &ftrace_list_end;
static struct ftrace_ops *ftrace_ops_list __read_mostly = &ftrace_list_end;
ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
+static ftrace_func_t __ftrace_trace_function_delay __read_mostly = ftrace_stub;
ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub;
ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub;
static struct ftrace_ops global_ops;
@@ -146,9 +147,11 @@ void clear_ftrace_function(void)
{
ftrace_trace_function = ftrace_stub;
__ftrace_trace_function = ftrace_stub;
+ __ftrace_trace_function_delay = ftrace_stub;
ftrace_pid_function = ftrace_stub;
}
+#undef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
#ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
/*
* For those archs that do not test ftrace_trace_stop in their
@@ -208,7 +211,12 @@ static void update_ftrace_function(void)
#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
ftrace_trace_function = func;
#else
+#ifdef CONFIG_DYNAMIC_FTRACE
+ /* do not update till all functions have been modified */
+ __ftrace_trace_function_delay = func;
+#else
__ftrace_trace_function = func;
+#endif
ftrace_trace_function = ftrace_test_stop_func;
#endif
}
@@ -1170,8 +1178,14 @@ alloc_and_copy_ftrace_hash(int size_bits, struct ftrace_hash *hash)
return NULL;
}
+static void
+ftrace_hash_rec_disable(struct ftrace_ops *ops, int filter_hash);
+static void
+ftrace_hash_rec_enable(struct ftrace_ops *ops, int filter_hash);
+
static int
-ftrace_hash_move(struct ftrace_hash **dst, struct ftrace_hash *src)
+ftrace_hash_move(struct ftrace_ops *ops, int enable,
+ struct ftrace_hash **dst, struct ftrace_hash *src)
{
struct ftrace_func_entry *entry;
struct hlist_node *tp, *tn;
@@ -1181,9 +1195,16 @@ ftrace_hash_move(struct ftrace_hash **dst, struct ftrace_hash *src)
unsigned long key;
int size = src->count;
int bits = 0;
+ int ret;
int i;
/*
+ * Remove the current set, update the hash and add
+ * them back.
+ */
+ ftrace_hash_rec_disable(ops, enable);
+
+ /*
* If the new source is empty, just free dst and assign it
* the empty_hash.
*/
@@ -1203,9 +1224,10 @@ ftrace_hash_move(struct ftrace_hash **dst, struct ftrace_hash *src)
if (bits > FTRACE_HASH_MAX_BITS)
bits = FTRACE_HASH_MAX_BITS;
+ ret = -ENOMEM;
new_hash = alloc_ftrace_hash(bits);
if (!new_hash)
- return -ENOMEM;
+ goto out;
size = 1 << src->size_bits;
for (i = 0; i < size; i++) {
@@ -1224,7 +1246,16 @@ ftrace_hash_move(struct ftrace_hash **dst, struct ftrace_hash *src)
rcu_assign_pointer(*dst, new_hash);
free_ftrace_hash_rcu(old_hash);
- return 0;
+ ret = 0;
+ out:
+ /*
+ * Enable regardless of ret:
+ * On success, we enable the new hash.
+ * On failure, we re-enable the original hash.
+ */
+ ftrace_hash_rec_enable(ops, enable);
+
+ return ret;
}
/*
@@ -1584,6 +1615,12 @@ static int __ftrace_modify_code(void *data)
{
int *command = data;
+ /*
+ * Do not call function tracer while we update the code.
+ * We are in stop machine, no worrying about races.
+ */
+ function_trace_stop++;
+
if (*command & FTRACE_ENABLE_CALLS)
ftrace_replace_code(1);
else if (*command & FTRACE_DISABLE_CALLS)
@@ -1597,6 +1634,18 @@ static int __ftrace_modify_code(void *data)
else if (*command & FTRACE_STOP_FUNC_RET)
ftrace_disable_ftrace_graph_caller();
+#ifndef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST
+ /*
+ * For archs that call ftrace_test_stop_func(), we must
+ * wait till after we update all the function callers
+ * before we update the callback. This keeps different
+ * ops that record different functions from corrupting
+ * each other.
+ */
+ __ftrace_trace_function = __ftrace_trace_function_delay;
+#endif
+ function_trace_stop--;
+
return 0;
}
@@ -2865,7 +2914,11 @@ ftrace_set_regex(struct ftrace_ops *ops, unsigned char *buf, int len,
ftrace_match_records(hash, buf, len);
mutex_lock(&ftrace_lock);
- ret = ftrace_hash_move(orig_hash, hash);
+ ret = ftrace_hash_move(ops, enable, orig_hash, hash);
+ if (!ret && ops->flags & FTRACE_OPS_FL_ENABLED
+ && ftrace_enabled)
+ ftrace_run_update_code(FTRACE_ENABLE_CALLS);
+
mutex_unlock(&ftrace_lock);
mutex_unlock(&ftrace_regex_lock);
@@ -3048,18 +3101,12 @@ ftrace_regex_release(struct inode *inode, struct file *file)
orig_hash = &iter->ops->notrace_hash;
mutex_lock(&ftrace_lock);
- /*
- * Remove the current set, update the hash and add
- * them back.
- */
- ftrace_hash_rec_disable(iter->ops, filter_hash);
- ret = ftrace_hash_move(orig_hash, iter->hash);
- if (!ret) {
- ftrace_hash_rec_enable(iter->ops, filter_hash);
- if (iter->ops->flags & FTRACE_OPS_FL_ENABLED
- && ftrace_enabled)
- ftrace_run_update_code(FTRACE_ENABLE_CALLS);
- }
+ ret = ftrace_hash_move(iter->ops, filter_hash,
+ orig_hash, iter->hash);
+ if (!ret && (iter->ops->flags & FTRACE_OPS_FL_ENABLED)
+ && ftrace_enabled)
+ ftrace_run_update_code(FTRACE_ENABLE_CALLS);
+
mutex_unlock(&ftrace_lock);
}
free_ftrace_hash(iter->hash);
@@ -3338,7 +3385,7 @@ static int ftrace_process_locs(struct module *mod,
{
unsigned long *p;
unsigned long addr;
- unsigned long flags;
+ unsigned long flags = 0; /* Shut up gcc */
mutex_lock(&ftrace_lock);
p = start;
@@ -3356,12 +3403,18 @@ static int ftrace_process_locs(struct module *mod,
}
/*
- * Disable interrupts to prevent interrupts from executing
- * code that is being modified.
+ * We only need to disable interrupts on start up
+ * because we are modifying code that an interrupt
+ * may execute, and the modification is not atomic.
+ * But for modules, nothing runs the code we modify
+ * until we are finished with it, and there's no
+ * reason to cause large interrupt latencies while we do it.
*/
- local_irq_save(flags);
+ if (!mod)
+ local_irq_save(flags);
ftrace_update_code(mod);
- local_irq_restore(flags);
+ if (!mod)
+ local_irq_restore(flags);
mutex_unlock(&ftrace_lock);
return 0;
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index d9c16123f6e..e5df02c69b1 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1248,6 +1248,15 @@ ftrace(struct trace_array *tr, struct trace_array_cpu *data,
}
#ifdef CONFIG_STACKTRACE
+
+#define FTRACE_STACK_MAX_ENTRIES (PAGE_SIZE / sizeof(unsigned long))
+struct ftrace_stack {
+ unsigned long calls[FTRACE_STACK_MAX_ENTRIES];
+};
+
+static DEFINE_PER_CPU(struct ftrace_stack, ftrace_stack);
+static DEFINE_PER_CPU(int, ftrace_stack_reserve);
+
static void __ftrace_trace_stack(struct ring_buffer *buffer,
unsigned long flags,
int skip, int pc, struct pt_regs *regs)
@@ -1256,25 +1265,77 @@ static void __ftrace_trace_stack(struct ring_buffer *buffer,
struct ring_buffer_event *event;
struct stack_entry *entry;
struct stack_trace trace;
+ int use_stack;
+ int size = FTRACE_STACK_ENTRIES;
+
+ trace.nr_entries = 0;
+ trace.skip = skip;
+
+ /*
+ * Since events can happen in NMIs there's no safe way to
+ * use the per cpu ftrace_stacks. We reserve it and if an interrupt
+ * or NMI comes in, it will just have to use the default
+ * FTRACE_STACK_SIZE.
+ */
+ preempt_disable_notrace();
+
+ use_stack = ++__get_cpu_var(ftrace_stack_reserve);
+ /*
+ * We don't need any atomic variables, just a barrier.
+ * If an interrupt comes in, we don't care, because it would
+ * have exited and put the counter back to what we want.
+ * We just need a barrier to keep gcc from moving things
+ * around.
+ */
+ barrier();
+ if (use_stack == 1) {
+ trace.entries = &__get_cpu_var(ftrace_stack).calls[0];
+ trace.max_entries = FTRACE_STACK_MAX_ENTRIES;
+
+ if (regs)
+ save_stack_trace_regs(regs, &trace);
+ else
+ save_stack_trace(&trace);
+
+ if (trace.nr_entries > size)
+ size = trace.nr_entries;
+ } else
+ /* From now on, use_stack is a boolean */
+ use_stack = 0;
+
+ size *= sizeof(unsigned long);
event = trace_buffer_lock_reserve(buffer, TRACE_STACK,
- sizeof(*entry), flags, pc);
+ sizeof(*entry) + size, flags, pc);
if (!event)
- return;
- entry = ring_buffer_event_data(event);
- memset(&entry->caller, 0, sizeof(entry->caller));
+ goto out;
+ entry = ring_buffer_event_data(event);
- trace.nr_entries = 0;
- trace.max_entries = FTRACE_STACK_ENTRIES;
- trace.skip = skip;
- trace.entries = entry->caller;
+ memset(&entry->caller, 0, size);
+
+ if (use_stack)
+ memcpy(&entry->caller, trace.entries,
+ trace.nr_entries * sizeof(unsigned long));
+ else {
+ trace.max_entries = FTRACE_STACK_ENTRIES;
+ trace.entries = entry->caller;
+ if (regs)
+ save_stack_trace_regs(regs, &trace);
+ else
+ save_stack_trace(&trace);
+ }
+
+ entry->size = trace.nr_entries;
- if (regs)
- save_stack_trace_regs(regs, &trace);
- else
- save_stack_trace(&trace);
if (!filter_check_discard(call, entry, buffer, event))
ring_buffer_unlock_commit(buffer, event);
+
+ out:
+ /* Again, don't let gcc optimize things here */
+ barrier();
+ __get_cpu_var(ftrace_stack_reserve)--;
+ preempt_enable_notrace();
+
}
void ftrace_trace_stack_regs(struct ring_buffer *buffer, unsigned long flags,
@@ -1562,7 +1623,12 @@ peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
ftrace_enable_cpu();
- return event ? ring_buffer_event_data(event) : NULL;
+ if (event) {
+ iter->ent_size = ring_buffer_event_length(event);
+ return ring_buffer_event_data(event);
+ }
+ iter->ent_size = 0;
+ return NULL;
}
static struct trace_entry *
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 30a94c26dcb..3f381d0b20a 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -278,6 +278,29 @@ struct tracer {
};
+/* Only current can touch trace_recursion */
+#define trace_recursion_inc() do { (current)->trace_recursion++; } while (0)
+#define trace_recursion_dec() do { (current)->trace_recursion--; } while (0)
+
+/* Ring buffer has the 10 LSB bits to count */
+#define trace_recursion_buffer() ((current)->trace_recursion & 0x3ff)
+
+/* for function tracing recursion */
+#define TRACE_INTERNAL_BIT (1<<11)
+#define TRACE_GLOBAL_BIT (1<<12)
+/*
+ * Abuse of the trace_recursion.
+ * As we need a way to maintain state if we are tracing the function
+ * graph in irq because we want to trace a particular function that
+ * was called in irq context but we have irq tracing off. Since this
+ * can only be modified by current, we can reuse trace_recursion.
+ */
+#define TRACE_IRQ_BIT (1<<13)
+
+#define trace_recursion_set(bit) do { (current)->trace_recursion |= (bit); } while (0)
+#define trace_recursion_clear(bit) do { (current)->trace_recursion &= ~(bit); } while (0)
+#define trace_recursion_test(bit) ((current)->trace_recursion & (bit))
+
#define TRACE_PIPE_ALL_CPU -1
int tracer_init(struct tracer *t, struct trace_array *tr);
@@ -516,8 +539,18 @@ static inline int ftrace_graph_addr(unsigned long addr)
return 1;
for (i = 0; i < ftrace_graph_count; i++) {
- if (addr == ftrace_graph_funcs[i])
+ if (addr == ftrace_graph_funcs[i]) {
+ /*
+ * If no irqs are to be traced, but a set_graph_function
+ * is set, and called by an interrupt handler, we still
+ * want to trace it.
+ */
+ if (in_irq())
+ trace_recursion_set(TRACE_IRQ_BIT);
+ else
+ trace_recursion_clear(TRACE_IRQ_BIT);
return 1;
+ }
}
return 0;
@@ -795,19 +828,4 @@ extern const char *__stop___trace_bprintk_fmt[];
FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print))
#include "trace_entries.h"
-/* Only current can touch trace_recursion */
-#define trace_recursion_inc() do { (current)->trace_recursion++; } while (0)
-#define trace_recursion_dec() do { (current)->trace_recursion--; } while (0)
-
-/* Ring buffer has the 10 LSB bits to count */
-#define trace_recursion_buffer() ((current)->trace_recursion & 0x3ff)
-
-/* for function tracing recursion */
-#define TRACE_INTERNAL_BIT (1<<11)
-#define TRACE_GLOBAL_BIT (1<<12)
-
-#define trace_recursion_set(bit) do { (current)->trace_recursion |= (bit); } while (0)
-#define trace_recursion_clear(bit) do { (current)->trace_recursion &= ~(bit); } while (0)
-#define trace_recursion_test(bit) ((current)->trace_recursion & (bit))
-
#endif /* _LINUX_KERNEL_TRACE_H */
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index e32744c84d9..93365907f21 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -161,7 +161,8 @@ FTRACE_ENTRY(kernel_stack, stack_entry,
TRACE_STACK,
F_STRUCT(
- __array( unsigned long, caller, FTRACE_STACK_ENTRIES )
+ __field( int, size )
+ __dynamic_array(unsigned long, caller )
),
F_printk("\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n"
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index e8d6bb55d71..a7d2a4c653d 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -227,7 +227,7 @@ int __trace_graph_entry(struct trace_array *tr,
static inline int ftrace_graph_ignore_irqs(void)
{
- if (!ftrace_graph_skip_irqs)
+ if (!ftrace_graph_skip_irqs || trace_recursion_test(TRACE_IRQ_BIT))
return 0;
return in_irq();
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 7db7b68c6c3..5fb3697bf0e 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -343,6 +343,14 @@ DEFINE_BASIC_FETCH_FUNCS(deref)
DEFINE_FETCH_deref(string)
DEFINE_FETCH_deref(string_size)
+static __kprobes void update_deref_fetch_param(struct deref_fetch_param *data)
+{
+ if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
+ update_deref_fetch_param(data->orig.data);
+ else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
+ update_symbol_cache(data->orig.data);
+}
+
static __kprobes void free_deref_fetch_param(struct deref_fetch_param *data)
{
if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
@@ -377,6 +385,19 @@ DEFINE_BASIC_FETCH_FUNCS(bitfield)
#define fetch_bitfield_string_size NULL
static __kprobes void
+update_bitfield_fetch_param(struct bitfield_fetch_param *data)
+{
+ /*
+ * Don't check the bitfield itself, because this must be the
+ * last fetch function.
+ */
+ if (CHECK_FETCH_FUNCS(deref, data->orig.fn))
+ update_deref_fetch_param(data->orig.data);
+ else if (CHECK_FETCH_FUNCS(symbol, data->orig.fn))
+ update_symbol_cache(data->orig.data);
+}
+
+static __kprobes void
free_bitfield_fetch_param(struct bitfield_fetch_param *data)
{
/*
@@ -389,6 +410,7 @@ free_bi