aboutsummaryrefslogtreecommitdiff
path: root/kernel/trace
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/trace')
-rw-r--r--kernel/trace/blktrace.c16
-rw-r--r--kernel/trace/ftrace.c52
-rw-r--r--kernel/trace/ring_buffer.c24
-rw-r--r--kernel/trace/trace.c38
-rw-r--r--kernel/trace/trace.h41
-rw-r--r--kernel/trace/trace_entries.h6
-rw-r--r--kernel/trace/trace_events.c2
-rw-r--r--kernel/trace/trace_events_filter.c885
-rw-r--r--kernel/trace/trace_kprobe.c111
-rw-r--r--kernel/trace/trace_output.c36
-rw-r--r--kernel/trace/trace_sched_switch.c48
-rw-r--r--kernel/trace/trace_syscalls.c42
12 files changed, 1009 insertions, 292 deletions
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index d95721f3370..cbafed7d4f3 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -1827,21 +1827,5 @@ void blk_fill_rwbs(char *rwbs, u32 rw, int bytes)
rwbs[i] = '\0';
}
-void blk_fill_rwbs_rq(char *rwbs, struct request *rq)
-{
- int rw = rq->cmd_flags & 0x03;
- int bytes;
-
- if (rq->cmd_flags & REQ_DISCARD)
- rw |= REQ_DISCARD;
-
- if (rq->cmd_flags & REQ_SECURE)
- rw |= REQ_SECURE;
-
- bytes = blk_rq_bytes(rq);
-
- blk_fill_rwbs(rwbs, rw, bytes);
-}
-
#endif /* CONFIG_EVENT_TRACING */
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index f3dadae8388..888b611897d 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3328,7 +3328,7 @@ static int start_graph_tracing(void)
/* The cpu_boot init_task->ret_stack will never be freed */
for_each_online_cpu(cpu) {
if (!idle_task(cpu)->ret_stack)
- ftrace_graph_init_task(idle_task(cpu));
+ ftrace_graph_init_idle_task(idle_task(cpu), cpu);
}
do {
@@ -3418,6 +3418,49 @@ void unregister_ftrace_graph(void)
mutex_unlock(&ftrace_lock);
}
+static DEFINE_PER_CPU(struct ftrace_ret_stack *, idle_ret_stack);
+
+static void
+graph_init_task(struct task_struct *t, struct ftrace_ret_stack *ret_stack)
+{
+ atomic_set(&t->tracing_graph_pause, 0);
+ atomic_set(&t->trace_overrun, 0);
+ t->ftrace_timestamp = 0;
+ /* make curr_ret_stack visable before we add the ret_stack */
+ smp_wmb();
+ t->ret_stack = ret_stack;
+}
+
+/*
+ * Allocate a return stack for the idle task. May be the first
+ * time through, or it may be done by CPU hotplug online.
+ */
+void ftrace_graph_init_idle_task(struct task_struct *t, int cpu)
+{
+ t->curr_ret_stack = -1;
+ /*
+ * The idle task has no parent, it either has its own
+ * stack or no stack at all.
+ */
+ if (t->ret_stack)
+ WARN_ON(t->ret_stack != per_cpu(idle_ret_stack, cpu));
+
+ if (ftrace_graph_active) {
+ struct ftrace_ret_stack *ret_stack;
+
+ ret_stack = per_cpu(idle_ret_stack, cpu);
+ if (!ret_stack) {
+ ret_stack = kmalloc(FTRACE_RETFUNC_DEPTH
+ * sizeof(struct ftrace_ret_stack),
+ GFP_KERNEL);
+ if (!ret_stack)
+ return;
+ per_cpu(idle_ret_stack, cpu) = ret_stack;
+ }
+ graph_init_task(t, ret_stack);
+ }
+}
+
/* Allocate a return stack for newly created task */
void ftrace_graph_init_task(struct task_struct *t)
{
@@ -3433,12 +3476,7 @@ void ftrace_graph_init_task(struct task_struct *t)
GFP_KERNEL);
if (!ret_stack)
return;
- atomic_set(&t->tracing_graph_pause, 0);
- atomic_set(&t->trace_overrun, 0);
- t->ftrace_timestamp = 0;
- /* make curr_ret_stack visable before we add the ret_stack */
- smp_wmb();
- t->ret_stack = ret_stack;
+ graph_init_task(t, ret_stack);
}
}
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 6ee56b4ad13..d9c8bcafb12 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -5,7 +5,6 @@
*/
#include <linux/ring_buffer.h>
#include <linux/trace_clock.h>
-#include <linux/ftrace_irq.h>
#include <linux/spinlock.h>
#include <linux/debugfs.h>
#include <linux/uaccess.h>
@@ -1429,6 +1428,17 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size)
}
EXPORT_SYMBOL_GPL(ring_buffer_resize);
+void ring_buffer_change_overwrite(struct ring_buffer *buffer, int val)
+{
+ mutex_lock(&buffer->mutex);
+ if (val)
+ buffer->flags |= RB_FL_OVERWRITE;
+ else
+ buffer->flags &= ~RB_FL_OVERWRITE;
+ mutex_unlock(&buffer->mutex);
+}
+EXPORT_SYMBOL_GPL(ring_buffer_change_overwrite);
+
static inline void *
__rb_data_page_index(struct buffer_data_page *bpage, unsigned index)
{
@@ -2162,11 +2172,19 @@ rb_reserve_next_event(struct ring_buffer *buffer,
if (likely(ts >= cpu_buffer->write_stamp)) {
delta = diff;
if (unlikely(test_time_stamp(delta))) {
+ int local_clock_stable = 1;
+#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
+ local_clock_stable = sched_clock_stable;
+#endif
WARN_ONCE(delta > (1ULL << 59),
- KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n",
+ KERN_WARNING "Delta way too big! %llu ts=%llu write stamp = %llu\n%s",
(unsigned long long)delta,
(unsigned long long)ts,
- (unsigned long long)cpu_buffer->write_stamp);
+ (unsigned long long)cpu_buffer->write_stamp,
+ local_clock_stable ? "" :
+ "If you just came from a suspend/resume,\n"
+ "please switch to the trace global clock:\n"
+ " echo global > /sys/kernel/debug/tracing/trace_clock\n");
add_timestamp = 1;
}
}
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index dc53ecb8058..9541c27c1cf 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -41,8 +41,6 @@
#include "trace.h"
#include "trace_output.h"
-#define TRACE_BUFFER_FLAGS (RB_FL_OVERWRITE)
-
/*
* On boot up, the ring buffer is set to the minimum size, so that
* we do not waste memory on systems that are not using tracing.
@@ -340,7 +338,7 @@ static DECLARE_WAIT_QUEUE_HEAD(trace_wait);
/* trace_flags holds trace_options default values */
unsigned long trace_flags = TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |
TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO | TRACE_ITER_SLEEP_TIME |
- TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD;
+ TRACE_ITER_GRAPH_TIME | TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE;
static int trace_stop_count;
static DEFINE_SPINLOCK(tracing_start_lock);
@@ -425,6 +423,7 @@ static const char *trace_options[] = {
"sleep-time",
"graph-time",
"record-cmd",
+ "overwrite",
NULL
};
@@ -780,6 +779,11 @@ __acquires(kernel_lock)
tracing_reset_online_cpus(tr);
current_trace = type;
+
+ /* If we expanded the buffers, make sure the max is expanded too */
+ if (ring_buffer_expanded && type->use_max_tr)
+ ring_buffer_resize(max_tr.buffer, trace_buf_size);
+
/* the test is responsible for initializing and enabling */
pr_info("Testing tracer %s: ", type->name);
ret = type->selftest(type, tr);
@@ -792,6 +796,10 @@ __acquires(kernel_lock)
/* Only reset on passing, to avoid touching corrupted buffers */
tracing_reset_online_cpus(tr);
+ /* Shrink the max buffer again */
+ if (ring_buffer_expanded && type->use_max_tr)
+ ring_buffer_resize(max_tr.buffer, 1);
+
printk(KERN_CONT "PASSED\n");
}
#endif
@@ -1102,7 +1110,6 @@ tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags,
entry->preempt_count = pc & 0xff;
entry->pid = (tsk) ? tsk->pid : 0;
- entry->lock_depth = (tsk) ? tsk->lock_depth : 0;
entry->flags =
#ifdef CONFIG_TRACE_IRQFLAGS_SUPPORT
(irqs_disabled_flags(flags) ? TRACE_FLAG_IRQS_OFF : 0) |
@@ -1749,10 +1756,9 @@ static void print_lat_help_header(struct seq_file *m)
seq_puts(m, "# | / _----=> need-resched \n");
seq_puts(m, "# || / _---=> hardirq/softirq \n");
seq_puts(m, "# ||| / _--=> preempt-depth \n");
- seq_puts(m, "# |||| /_--=> lock-depth \n");
- seq_puts(m, "# |||||/ delay \n");
- seq_puts(m, "# cmd pid |||||| time | caller \n");
- seq_puts(m, "# \\ / |||||| \\ | / \n");
+ seq_puts(m, "# |||| / delay \n");
+ seq_puts(m, "# cmd pid ||||| time | caller \n");
+ seq_puts(m, "# \\ / ||||| \\ | / \n");
}
static void print_func_help_header(struct seq_file *m)
@@ -2529,6 +2535,9 @@ static void set_tracer_flags(unsigned int mask, int enabled)
if (mask == TRACE_ITER_RECORD_CMD)
trace_event_enable_cmd_record(enabled);
+
+ if (mask == TRACE_ITER_OVERWRITE)
+ ring_buffer_change_overwrite(global_trace.buffer, enabled);
}
static ssize_t
@@ -2710,6 +2719,10 @@ tracing_ctrl_write(struct file *filp, const char __user *ubuf,
mutex_lock(&trace_types_lock);
if (tracer_enabled ^ val) {
+
+ /* Only need to warn if this is used to change the state */
+ WARN_ONCE(1, "tracing_enabled is deprecated. Use tracing_on");
+
if (val) {
tracer_enabled = 1;
if (current_trace->start)
@@ -4551,9 +4564,11 @@ void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
__init static int tracer_alloc_buffers(void)
{
int ring_buf_size;
+ enum ring_buffer_flags rb_flags;
int i;
int ret = -ENOMEM;
+
if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
goto out;
@@ -4566,12 +4581,13 @@ __init static int tracer_alloc_buffers(void)
else
ring_buf_size = 1;
+ rb_flags = trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
+
cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
cpumask_copy(tracing_cpumask, cpu_all_mask);
/* TODO: make the number of buffers hot pluggable with CPUS */
- global_trace.buffer = ring_buffer_alloc(ring_buf_size,
- TRACE_BUFFER_FLAGS);
+ global_trace.buffer = ring_buffer_alloc(ring_buf_size, rb_flags);
if (!global_trace.buffer) {
printk(KERN_ERR "tracer: failed to allocate ring buffer!\n");
WARN_ON(1);
@@ -4581,7 +4597,7 @@ __init static int tracer_alloc_buffers(void)
#ifdef CONFIG_TRACER_MAX_TRACE
- max_tr.buffer = ring_buffer_alloc(1, TRACE_BUFFER_FLAGS);
+ max_tr.buffer = ring_buffer_alloc(1, rb_flags);
if (!max_tr.buffer) {
printk(KERN_ERR "tracer: failed to allocate max ring buffer!\n");
WARN_ON(1);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 9021f8c0c0c..5e9dfc6286d 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -272,8 +272,8 @@ struct tracer {
/* If you handled the flag setting, return 0 */
int (*set_flag)(u32 old_flags, u32 bit, int set);
struct tracer *next;
- int print_max;
struct tracer_flags *flags;
+ int print_max;
int use_max_tr;
};
@@ -606,6 +606,7 @@ enum trace_iterator_flags {
TRACE_ITER_SLEEP_TIME = 0x40000,
TRACE_ITER_GRAPH_TIME = 0x80000,
TRACE_ITER_RECORD_CMD = 0x100000,
+ TRACE_ITER_OVERWRITE = 0x200000,
};
/*
@@ -661,8 +662,10 @@ struct ftrace_event_field {
};
struct event_filter {
- int n_preds;
- struct filter_pred **preds;
+ int n_preds; /* Number assigned */
+ int a_preds; /* allocated */
+ struct filter_pred *preds;
+ struct filter_pred *root;
char *filter_string;
};
@@ -674,11 +677,23 @@ struct event_subsystem {
int nr_events;
};
+#define FILTER_PRED_INVALID ((unsigned short)-1)
+#define FILTER_PRED_IS_RIGHT (1 << 15)
+#define FILTER_PRED_FOLD (1 << 15)
+
+/*
+ * The max preds is the size of unsigned short with
+ * two flags at the MSBs. One bit is used for both the IS_RIGHT
+ * and FOLD flags. The other is reserved.
+ *
+ * 2^14 preds is way more than enough.
+ */
+#define MAX_FILTER_PRED 16384
+
struct filter_pred;
struct regex;
-typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event,
- int val1, int val2);
+typedef int (*filter_pred_fn_t) (struct filter_pred *pred, void *event);
typedef int (*regex_match_func)(char *str, struct regex *r, int len);
@@ -700,11 +715,23 @@ struct filter_pred {
filter_pred_fn_t fn;
u64 val;
struct regex regex;
- char *field_name;
+ /*
+ * Leaf nodes use field_name, ops is used by AND and OR
+ * nodes. The field_name is always freed when freeing a pred.
+ * We can overload field_name for ops and have it freed
+ * as well.
+ */
+ union {
+ char *field_name;
+ unsigned short *ops;
+ };
int offset;
int not;
int op;
- int pop_n;
+ unsigned short index;
+ unsigned short parent;
+ unsigned short left;
+ unsigned short right;
};
extern struct list_head ftrace_common_fields;
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index 6cf223764be..1516cb3ec54 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -109,12 +109,12 @@ FTRACE_ENTRY(funcgraph_exit, ftrace_graph_ret_entry,
*/
#define FTRACE_CTX_FIELDS \
__field( unsigned int, prev_pid ) \
+ __field( unsigned int, next_pid ) \
+ __field( unsigned int, next_cpu ) \
__field( unsigned char, prev_prio ) \
__field( unsigned char, prev_state ) \
- __field( unsigned int, next_pid ) \
__field( unsigned char, next_prio ) \
- __field( unsigned char, next_state ) \
- __field( unsigned int, next_cpu )
+ __field( unsigned char, next_state )
FTRACE_ENTRY(context_switch, ctx_switch_entry,
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 5f499e0438a..e88f74fe1d4 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -116,7 +116,6 @@ static int trace_define_common_fields(void)
__common_field(unsigned char, flags);
__common_field(unsigned char, preempt_count);
__common_field(int, pid);
- __common_field(int, lock_depth);
return ret;
}
@@ -326,6 +325,7 @@ int trace_set_clr_event(const char *system, const char *event, int set)
{
return __ftrace_set_clr_event(NULL, system, event, set);
}
+EXPORT_SYMBOL_GPL(trace_set_clr_event);
/* 128 should be much more than enough */
#define EVENT_BUF_SIZE 127
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 36d40104b17..3249b4f77ef 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -123,9 +123,13 @@ struct filter_parse_state {
} operand;
};
+struct pred_stack {
+ struct filter_pred **preds;
+ int index;
+};
+
#define DEFINE_COMPARISON_PRED(type) \
-static int filter_pred_##type(struct filter_pred *pred, void *event, \
- int val1, int val2) \
+static int filter_pred_##type(struct filter_pred *pred, void *event) \
{ \
type *addr = (type *)(event + pred->offset); \
type val = (type)pred->val; \
@@ -152,8 +156,7 @@ static int filter_pred_##type(struct filter_pred *pred, void *event, \
}
#define DEFINE_EQUALITY_PRED(size) \
-static int filter_pred_##size(struct filter_pred *pred, void *event, \
- int val1, int val2) \
+static int filter_pred_##size(struct filter_pred *pred, void *event) \
{ \
u##size *addr = (u##size *)(event + pred->offset); \
u##size val = (u##size)pred->val; \
@@ -178,23 +181,8 @@ DEFINE_EQUALITY_PRED(32);
DEFINE_EQUALITY_PRED(16);
DEFINE_EQUALITY_PRED(8);
-static int filter_pred_and(struct filter_pred *pred __attribute((unused)),
- void *event __attribute((unused)),
- int val1, int val2)
-{
- return val1 && val2;
-}
-
-static int filter_pred_or(struct filter_pred *pred __attribute((unused)),
- void *event __attribute((unused)),
- int val1, int val2)
-{
- return val1 || val2;
-}
-
/* Filter predicate for fixed sized arrays of characters */
-static int filter_pred_string(struct filter_pred *pred, void *event,
- int val1, int val2)
+static int filter_pred_string(struct filter_pred *pred, void *event)
{
char *addr = (char *)(event + pred->offset);
int cmp, match;
@@ -207,8 +195,7 @@ static int filter_pred_string(struct filter_pred *pred, void *event,
}
/* Filter predicate for char * pointers */
-static int filter_pred_pchar(struct filter_pred *pred, void *event,
- int val1, int val2)
+static int filter_pred_pchar(struct filter_pred *pred, void *event)
{
char **addr = (char **)(event + pred->offset);
int cmp, match;
@@ -231,8 +218,7 @@ static int filter_pred_pchar(struct filter_pred *pred, void *event,
* and add it to the address of the entry, and at last we have
* the address of the string.
*/
-static int filter_pred_strloc(struct filter_pred *pred, void *event,
- int val1, int val2)
+static int filter_pred_strloc(struct filter_pred *pred, void *event)
{
u32 str_item = *(u32 *)(event + pred->offset);
int str_loc = str_item & 0xffff;
@@ -247,8 +233,7 @@ static int filter_pred_strloc(struct filter_pred *pred, void *event,
return match;
}
-static int filter_pred_none(struct filter_pred *pred, void *event,
- int val1, int val2)
+static int filter_pred_none(struct filter_pred *pred, void *event)
{
return 0;
}
@@ -377,32 +362,147 @@ static void filter_build_regex(struct filter_pred *pred)
pred->not ^= not;
}
+enum move_type {
+ MOVE_DOWN,
+ MOVE_UP_FROM_LEFT,
+ MOVE_UP_FROM_RIGHT
+};
+
+static struct filter_pred *
+get_pred_parent(struct filter_pred *pred, struct filter_pred *preds,
+ int index, enum move_type *move)
+{
+ if (pred->parent & FILTER_PRED_IS_RIGHT)
+ *move = MOVE_UP_FROM_RIGHT;
+ else
+ *move = MOVE_UP_FROM_LEFT;
+ pred = &preds[pred->parent & ~FILTER_PRED_IS_RIGHT];
+
+ return pred;
+}
+
+/*
+ * A series of AND or ORs where found together. Instead of
+ * climbing up and down the tree branches, an array of the
+ * ops were made in order of checks. We can just move across
+ * the array and short circuit if needed.
+ */
+static int process_ops(struct filter_pred *preds,
+ struct filter_pred *op, void *rec)
+{
+ struct filter_pred *pred;
+ int type;
+ int match;
+ int i;
+
+ /*
+ * Micro-optimization: We set type to true if op
+ * is an OR and false otherwise (AND). Then we
+ * just need to test if the match is equal to
+ * the type, and if it is, we can short circuit the
+ * rest of the checks:
+ *
+ * if ((match && op->op == OP_OR) ||
+ * (!match && op->op == OP_AND))
+ * return match;
+ */
+ type = op->op == OP_OR;
+
+ for (i = 0; i < op->val; i++) {
+ pred = &preds[op->ops[i]];
+ match = pred->fn(pred, rec);
+ if (!!match == type)
+ return match;
+ }
+ return match;
+}
+
/* return 1 if event matches, 0 otherwise (discard) */
int filter_match_preds(struct event_filter *filter, void *rec)
{
- int match, top = 0, val1 = 0, val2 = 0;
- int stack[MAX_FILTER_PRED];
+ int match = -1;
+ enum move_type move = MOVE_DOWN;
+ struct filter_pred *preds;
struct filter_pred *pred;
- int i;
+ struct filter_pred *root;
+ int n_preds;
+ int done = 0;
+
+ /* no filter is considered a match */
+ if (!filter)
+ return 1;
+
+ n_preds = filter->n_preds;
+
+ if (!n_preds)
+ return 1;
+
+ /*
+ * n_preds, root and filter->preds are protect with preemption disabled.
+ */
+ preds = rcu_dereference_sched(filter->preds);
+ root = rcu_dereference_sched(filter->root);
+ if (!root)
+ return 1;
+
+ pred = root;
- for (i = 0; i < filter->n_preds; i++) {
- pred = filter->preds[i];
- if (!pred->pop_n) {
- match = pred->fn(pred, rec, val1, val2);
- stack[top++] = match;
+ /* match is currently meaningless */
+ match = -1;
+
+ do {
+ switch (move) {
+ case MOVE_DOWN:
+ /* only AND and OR have children */
+ if (pred->left != FILTER_PRED_INVALID) {
+ /* If ops is set, then it was folded. */
+ if (!pred->ops) {
+ /* keep going to down the left side */
+ pred = &preds[pred->left];
+ continue;
+ }
+ /* We can treat folded ops as a leaf node */
+ match = process_ops(preds, pred, rec);
+ } else
+ match = pred->fn(pred, rec);
+ /* If this pred is the only pred */
+ if (pred == root)
+ break;
+ pred = get_pred_parent(pred, preds,
+ pred->parent, &move);
+ continue;
+ case MOVE_UP_FROM_LEFT:
+ /*
+ * Check for short circuits.
+ *
+ * Optimization: !!match == (pred->op == OP_OR)
+ * is the same as:
+ * if ((match && pred->op == OP_OR) ||
+ * (!match && pred->op == OP_AND))
+ */
+ if (!!match == (pred->op == OP_OR)) {
+ if (pred == root)
+ break;
+ pred = get_pred_parent(pred, preds,
+ pred->parent, &move);
+ continue;
+ }
+ /* now go down the right side of the tree. */
+ pred = &preds[pred->right];
+ move = MOVE_DOWN;
+ continue;
+ case MOVE_UP_FROM_RIGHT:
+ /* We finished this equation. */
+ if (pred == root)
+ break;
+ pred = get_pred_parent(pred, preds,
+ pred->parent, &move);
continue;
}
- if (pred->pop_n > top) {
- WARN_ON_ONCE(1);
- return 0;
- }
- val1 = stack[--top];
- val2 = stack[--top];
- match = pred->fn(pred, rec, val1, val2);
- stack[top++] = match;
- }
+ done = 1;
+ } while (!done);
- return stack[--top];
+ return match;
}
EXPORT_SYMBOL_GPL(filter_match_preds);
@@ -414,6 +514,9 @@ static void parse_error(struct filter_parse_state *ps, int err, int pos)
static void remove_filter_string(struct event_filter *filter)
{
+ if (!filter)
+ return;
+
kfree(filter->filter_string);
filter->filter_string = NULL;
}
@@ -473,9 +576,10 @@ static void append_filter_err(struct filter_parse_state *ps,
void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s)
{
- struct event_filter *filter = call->filter;
+ struct event_filter *filter;
mutex_lock(&event_mutex);
+ filter = call->filter;
if (filter && filter->filter_string)
trace_seq_printf(s, "%s\n", filter->filter_string);
else
@@ -486,9 +590,10 @@ void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s)
void print_subsystem_event_filter(struct event_subsystem *system,
struct trace_seq *s)
{
- struct event_filter *filter = system->filter;
+ struct event_filter *filter;
mutex_lock(&event_mutex);
+ filter = system->filter;
if (filter && filter->filter_string)
trace_seq_printf(s, "%s\n", filter->filter_string);
else
@@ -539,10 +644,58 @@ static void filter_clear_pred(struct filter_pred *pred)
pred->regex.len = 0;
}
-static int filter_set_pred(struct filter_pred *dest,
+static int __alloc_pred_stack(struct pred_stack *stack, int n_preds)
+{
+ stack->preds = kzalloc(sizeof(*stack->preds)*(n_preds + 1), GFP_KERNEL);
+ if (!stack->preds)
+ return -ENOMEM;
+ stack->index = n_preds;
+ return 0;
+}
+
+static void __free_pred_stack(struct pred_stack *stack)
+{
+ kfree(stack->preds);
+ stack->index = 0;
+}
+
+static int __push_pred_stack(struct pred_stack *stack,
+ struct filter_pred *pred)
+{
+ int index = stack->index;
+
+ if (WARN_ON(index == 0))
+ return -ENOSPC;
+
+ stack->preds[--index] = pred;
+ stack->index = index;
+ return 0;
+}
+
+static struct filter_pred *
+__pop_pred_stack(struct pred_stack *stack)
+{
+ struct filter_pred *pred;
+ int index = stack->index;
+
+ pred = stack->preds[index++];
+ if (!pred)
+ return NULL;
+
+ stack->index = index;
+ return pred;
+}
+
+static int filter_set_pred(struct event_filter *filter,
+ int idx,
+ struct pred_stack *stack,
struct filter_pred *src,
filter_pred_fn_t fn)
{
+ struct filter_pred *dest = &filter->preds[idx];
+ struct filter_pred *left;
+ struct filter_pred *right;
+
*dest = *src;
if (src->field_name) {
dest->field_name = kstrdup(src->field_name, GFP_KERNEL);
@@ -550,116 +703,140 @@ static int filter_set_pred(struct filter_pred *dest,
return -ENOMEM;
}
dest->fn = fn;
+ dest->index = idx;
- return 0;
+ if (dest->op == OP_OR || dest->op == OP_AND) {
+ right = __pop_pred_stack(stack);
+ left = __pop_pred_stack(stack);
+ if (!left || !right)
+ return -EINVAL;
+ /*
+ * If both children can be folded
+ * and they are the same op as this op or a leaf,
+ * then this op can be folded.
+ */
+ if (left->index & FILTER_PRED_FOLD &&
+ (left->op == dest->op ||
+ left->left == FILTER_PRED_INVALID) &&
+ right->index & FILTER_PRED_FOLD &&
+ (right->op == dest->op ||
+ right->left == FILTER_PRED_INVALID))
+ dest->index |= FILTER_PRED_FOLD;
+
+ dest->left = left->index & ~FILTER_PRED_FOLD;
+ dest->right = right->index & ~FILTER_PRED_FOLD;
+ left->parent = dest->index & ~FILTER_PRED_FOLD;
+ right->parent = dest->index | FILTER_PRED_IS_RIGHT;
+ } else {
+ /*
+ * Make dest->left invalid to be used as a quick
+ * way to know this is a leaf node.
+ */
+ dest->left = FILTER_PRED_INVALID;
+
+ /* All leafs allow folding the parent ops. */
+ dest->index |= FILTER_PRED_FOLD;
+ }
+
+ return __push_pred_stack(stack, dest);
}
-static void filter_disable_preds(struct ftrace_event_call *call)
+static void __free_preds(struct event_filter *filter)
{
- struct event_filter *filter = call->filter;
int i;
- call->flags &= ~TRACE_EVENT_FL_FILTERED;
+ if (filter->preds) {
+ for (i = 0; i < filter->a_preds; i++)
+ kfree(filter->preds[i].field_name);
+ kfree(filter->preds);
+ filter->preds = NULL;
+ }
+ filter->a_preds = 0;
filter->n_preds = 0;
-
- for (i = 0; i < MAX_FILTER_PRED; i++)
- filter->preds[i]->fn = filter_pred_none;
}
-static void __free_preds(struct event_filter *filter)
+static void filter_disable(struct ftrace_event_call *call)
{
- int i;
+ call->flags &= ~TRACE_EVENT_FL_FILTERED;
+}
+static void __free_filter(struct event_filter *filter)
+{
if (!filter)
return;
- for (i = 0; i < MAX_FILTER_PRED; i++) {
- if (filter->preds[i])
- filter_free_pred(filter->preds[i]);
- }
- kfree(filter->preds);
+ __free_preds(filter);
kfree(filter->filter_string);
kfree(filter);
}
+/*
+ * Called when destroying the ftrace_event_call.
+ * The call is being freed, so we do not need to worry about
+ * the call being currently used. This is for module code removing
+ * the tracepoints from within it.
+ */
void destroy_preds(struct ftrace_event_call *call)
{
- __free_preds(call->filter);
+ __free_filter(call->filter);
call->filter = NULL;
- call->flags &= ~TRACE_EVENT_FL_FILTERED;
}
-static struct event_filter *__alloc_preds(void)
+static struct event_filter *__alloc_filter(void)
{
struct event_filter *filter;
+
+ filter = kzalloc(sizeof(*filter), GFP_KERNEL);
+ return filter;
+}
+
+static int __alloc_preds(struct event_filter *filter, int n_preds)
+{
struct filter_pred *pred;
int i;
- filter = kzalloc(sizeof(*filter), GFP_KERNEL);
- if (!filter)
- return ERR_PTR(-ENOMEM);
+ if (filter->preds)
+ __free_preds(filter);
- filter->n_preds = 0;
+ filter->preds =
+ kzalloc(sizeof(*filter->preds) * n_preds, GFP_KERNEL);
- filter->preds = kzalloc(MAX_FILTER_PRED * sizeof(pred), GFP_KERNEL);
if (!filter->preds)
- goto oom;
+ return -ENOMEM;
- for (i = 0; i < MAX_FILTER_PRED; i++) {
- pred = kzalloc(sizeof(*pred), GFP_KERNEL);
- if (!pred)
- goto oom;
+ filter->a_preds = n_preds;
+ filter->n_preds = 0;
+
+ for (i = 0; i < n_preds; i++) {
+ pred = &filter->preds[i];
pred->fn = filter_pred_none;
- filter->preds[i] = pred;
}
- return filter;
-
-oom:
- __free_preds(filter);
- return ERR_PTR(-ENOMEM);
-}
-
-static int init_preds(struct ftrace_event_call *call)
-{
- if (call->filter)
- return 0;
-
- call->flags &= ~TRACE_EVENT_FL_FILTERED;
- call->filter = __alloc_preds();
- if (IS_ERR(call->filter))
- return PTR_ERR(call->filter);
-
return 0;
}
-static int init_subsystem_preds(struct event_subsystem *system)
+static void filter_free_subsystem_preds(struct event_subsystem *system)
{
struct ftrace_event_call *call;
- int err;
list_for_each_entry(call, &ftrace_events, list) {
if (strcmp(call->class->system, system->name) != 0)
continue;
- err = init_preds(call);
- if (err)
- return err;
+ filter_disable(call);
+ remove_filter_string(call->filter);
}
-
- return 0;
}
-static void filter_free_subsystem_preds(struct event_subsystem *system)
+static void filter_free_subsystem_filters(struct event_subsystem *system)
{
struct ftrace_event_call *call;
list_for_each_entry(call, &ftrace_events, list) {
if (strcmp(call->class->system, system->name) != 0)
continue;
-
- filter_disable_preds(call);
- remove_filter_string(call->filter);
+ __free_filter(call->filter);
+ call->filter = NULL;
}
}
@@ -667,18 +844,19 @@ static int filter_add_pred_fn(struct filter_parse_state *ps,
struct ftrace_event_call *call,
struct event_filter *filter,
struct filter_pred *pred,
+ struct pred_stack *stack,
filter_pred_fn_t fn)
{
int idx, err;
- if (filter->n_preds == MAX_FILTER_PRED) {
+ if (WARN_ON(filter->n_preds == filter->a_preds)) {
parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0);
return -ENOSPC;
}
idx = filter->n_preds;
- filter_clear_pred(filter->preds[idx]);
- err = filter_set_pred(filter->preds[idx], pred, fn);
+ filter_clear_pred(&filter->preds[idx]);
+ err = filter_set_pred(filter, idx, stack, pred, fn);
if (err)
return err;
@@ -763,6 +941,7 @@ static int filter_add_pred(struct filter_parse_state *ps,
struct ftrace_event_call *call,
struct event_filter *filter,
struct filter_pred *pred,
+ struct pred_stack *stack,
bool dry_run)
{
struct ftrace_event_field *field;
@@ -770,17 +949,12 @@ static int filter_add_pred(struct filter_parse_state *ps,
unsigned long long val;
int ret;
- pred->fn = filter_pred_none;
+ fn = pred->fn = filter_pred_none;
- if (pred->op == OP_AND) {
- pred->pop_n = 2;
- fn = filter_pred_and;
+ if (pred->op == OP_AND)
goto add_pred_fn;
- } else if (pred->op == OP_OR) {
- pred->pop_n = 2;
- fn = filter_pred_or;
+ else if (pred->op == OP_OR)
goto add_pred_fn;
- }
field = find_event_field(call, pred->field_name);
if (!field) {
@@ -829,7 +1003,7 @@ static int filter_add_pred(struct filter_parse_state *ps,
add_pred_fn:
if (!dry_run)
- return filter_add_pred_fn(ps, call, filter, pred, fn);
+ return filter_add_pred_fn(ps, call, filter, pred, stack, fn);
return 0;
}
@@ -1187,6 +1361,234 @@ static int check_preds(struct filter_parse_state *ps)
return 0;
}
+static int count_preds(struct filter_parse_state *ps)
+{
+ struct postfix_elt *elt;
+ int n_preds = 0;
+
+ list_for_each_entry(elt, &ps->postfix, list) {
+ if (elt->op == OP_NONE)
+ continue;
+ n_preds++;
+ }
+
+ return n_preds;
+}
+
+/*
+ * The tree is walked at filtering of an event. If the tree is not correctly
+ * built, it may cause an infinite loop. Check here that the tree does
+ * indeed t