aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2011-10-12 17:14:47 +0200
committerIngo Molnar <mingo@elte.hu>2011-10-12 17:14:47 +0200
commit910e94dd0cc5abacebf0bd5ffd859f61b9583857 (patch)
tree7cb33f3f89f5fcdaa4dd4f5f19bb946ff495e9ac /kernel
parent177e2163fe2a5951c1d0531baa4bc1f135c6b7b4 (diff)
parentd696b58ca2c3ca76e784ef89a7e0453d9b7ab187 (diff)
Merge branch 'tip/perf/core' of git://github.com/rostedt/linux into perf/core
Diffstat (limited to 'kernel')
-rw-r--r--kernel/module.c47
-rw-r--r--kernel/trace/Makefile2
-rw-r--r--kernel/trace/ftrace.c8
-rw-r--r--kernel/trace/ring_buffer.c70
-rw-r--r--kernel/trace/trace.c173
-rw-r--r--kernel/trace/trace.h16
-rw-r--r--kernel/trace/trace_clock.c12
-rw-r--r--kernel/trace/trace_events_filter.c795
-rw-r--r--kernel/trace/trace_events_filter_test.h50
-rw-r--r--kernel/trace/trace_irqsoff.c4
-rw-r--r--kernel/trace/trace_printk.c19
-rw-r--r--kernel/tracepoint.c169
12 files changed, 913 insertions, 452 deletions
diff --git a/kernel/module.c b/kernel/module.c
index 04379f92f84..93342d992f3 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -3487,50 +3487,3 @@ void module_layout(struct module *mod,
}
EXPORT_SYMBOL(module_layout);
#endif
-
-#ifdef CONFIG_TRACEPOINTS
-void module_update_tracepoints(void)
-{
- struct module *mod;
-
- mutex_lock(&module_mutex);
- list_for_each_entry(mod, &modules, list)
- if (!mod->taints)
- tracepoint_update_probe_range(mod->tracepoints_ptrs,
- mod->tracepoints_ptrs + mod->num_tracepoints);
- mutex_unlock(&module_mutex);
-}
-
-/*
- * Returns 0 if current not found.
- * Returns 1 if current found.
- */
-int module_get_iter_tracepoints(struct tracepoint_iter *iter)
-{
- struct module *iter_mod;
- int found = 0;
-
- mutex_lock(&module_mutex);
- list_for_each_entry(iter_mod, &modules, list) {
- if (!iter_mod->taints) {
- /*
- * Sorted module list
- */
- if (iter_mod < iter->module)
- continue;
- else if (iter_mod > iter->module)
- iter->tracepoint = NULL;
- found = tracepoint_get_iter_range(&iter->tracepoint,
- iter_mod->tracepoints_ptrs,
- iter_mod->tracepoints_ptrs
- + iter_mod->num_tracepoints);
- if (found) {
- iter->module = iter_mod;
- break;
- }
- }
- }
- mutex_unlock(&module_mutex);
- return found;
-}
-#endif
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 761c510a06c..b384ed512ba 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -15,6 +15,8 @@ ifdef CONFIG_TRACING_BRANCHES
KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
endif
+CFLAGS_trace_events_filter.o := -I$(src)
+
#
# Make the trace clocks available generally: it's infrastructure
# relied on by ptrace for example:
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index c3e4575e782..077d8538790 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3863,6 +3863,14 @@ void ftrace_kill(void)
}
/**
+ * Test if ftrace is dead or not.
+ */
+int ftrace_is_dead(void)
+{
+ return ftrace_disabled;
+}
+
+/**
* register_ftrace_function - register a function for profiling
* @ops - ops structure that holds the function for profiling.
*
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 731201bf4ac..acf6b68dc4a 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -488,12 +488,14 @@ struct ring_buffer_per_cpu {
struct buffer_page *reader_page;
unsigned long lost_events;
unsigned long last_overrun;
+ local_t entries_bytes;
local_t commit_overrun;
local_t overrun;
local_t entries;
local_t committing;
local_t commits;
unsigned long read;
+ unsigned long read_bytes;
u64 write_stamp;
u64 read_stamp;
};
@@ -1708,6 +1710,7 @@ rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer,
* the counters.
*/
local_add(entries, &cpu_buffer->overrun);
+ local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes);
/*
* The entries will be zeroed out when we move the
@@ -1863,6 +1866,9 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
event = __rb_page_index(tail_page, tail);
kmemcheck_annotate_bitfield(event, bitfield);
+ /* account for padding bytes */
+ local_add(BUF_PAGE_SIZE - tail, &cpu_buffer->entries_bytes);
+
/*
* Save the original length to the meta data.
* This will be used by the reader to add lost event
@@ -2054,6 +2060,9 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
if (!tail)
tail_page->page->time_stamp = ts;
+ /* account for these added bytes */
+ local_add(length, &cpu_buffer->entries_bytes);
+
return event;
}
@@ -2076,6 +2085,7 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
if (bpage->page == (void *)addr && rb_page_write(bpage) == old_index) {
unsigned long write_mask =
local_read(&bpage->write) & ~RB_WRITE_MASK;
+ unsigned long event_length = rb_event_length(event);
/*
* This is on the tail page. It is possible that
* a write could come in and move the tail page
@@ -2085,8 +2095,11 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
old_index += write_mask;
new_index += write_mask;
index = local_cmpxchg(&bpage->write, old_index, new_index);
- if (index == old_index)
+ if (index == old_index) {
+ /* update counters */
+ local_sub(event_length, &cpu_buffer->entries_bytes);
return 1;
+ }
}
/* could not discard */
@@ -2661,6 +2674,58 @@ rb_num_of_entries(struct ring_buffer_per_cpu *cpu_buffer)
}
/**
+ * ring_buffer_oldest_event_ts - get the oldest event timestamp from the buffer
+ * @buffer: The ring buffer
+ * @cpu: The per CPU buffer to read from.
+ */
+unsigned long ring_buffer_oldest_event_ts(struct ring_buffer *buffer, int cpu)
+{
+ unsigned long flags;
+ struct ring_buffer_per_cpu *cpu_buffer;
+ struct buffer_page *bpage;
+ unsigned long ret;
+
+ if (!cpumask_test_cpu(cpu, buffer->cpumask))
+ return 0;
+
+ cpu_buffer = buffer->buffers[cpu];
+ spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+ /*
+ * if the tail is on reader_page, oldest time stamp is on the reader
+ * page
+ */
+ if (cpu_buffer->tail_page == cpu_buffer->reader_page)
+ bpage = cpu_buffer->reader_page;
+ else
+ bpage = rb_set_head_page(cpu_buffer);
+ ret = bpage->page->time_stamp;
+ spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(ring_buffer_oldest_event_ts);
+
+/**
+ * ring_buffer_bytes_cpu - get the number of bytes consumed in a cpu buffer
+ * @buffer: The ring buffer
+ * @cpu: The per CPU buffer to read from.
+ */
+unsigned long ring_buffer_bytes_cpu(struct ring_buffer *buffer, int cpu)
+{
+ struct ring_buffer_per_cpu *cpu_buffer;
+ unsigned long ret;
+
+ if (!cpumask_test_cpu(cpu, buffer->cpumask))
+ return 0;
+
+ cpu_buffer = buffer->buffers[cpu];
+ ret = local_read(&cpu_buffer->entries_bytes) - cpu_buffer->read_bytes;
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(ring_buffer_bytes_cpu);
+
+/**
* ring_buffer_entries_cpu - get the number of entries in a cpu buffer
* @buffer: The ring buffer
* @cpu: The per CPU buffer to get the entries from.
@@ -3527,11 +3592,13 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
cpu_buffer->reader_page->read = 0;
local_set(&cpu_buffer->commit_overrun, 0);
+ local_set(&cpu_buffer->entries_bytes, 0);
local_set(&cpu_buffer->overrun, 0);
local_set(&cpu_buffer->entries, 0);
local_set(&cpu_buffer->committing, 0);
local_set(&cpu_buffer->commits, 0);
cpu_buffer->read = 0;
+ cpu_buffer->read_bytes = 0;
cpu_buffer->write_stamp = 0;
cpu_buffer->read_stamp = 0;
@@ -3918,6 +3985,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
} else {
/* update the entry counter */
cpu_buffer->read += rb_page_entries(reader);
+ cpu_buffer->read_bytes += BUF_PAGE_SIZE;
/* swap the pages */
rb_init_page(bpage);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index e5df02c69b1..f86efe90ca4 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -435,6 +435,7 @@ static struct {
} trace_clocks[] = {
{ trace_clock_local, "local" },
{ trace_clock_global, "global" },
+ { trace_clock_counter, "counter" },
};
int trace_clock_id;
@@ -2159,6 +2160,14 @@ void trace_default_header(struct seq_file *m)
}
}
+static void test_ftrace_alive(struct seq_file *m)
+{
+ if (!ftrace_is_dead())
+ return;
+ seq_printf(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n");
+ seq_printf(m, "# MAY BE MISSING FUNCTION EVENTS\n");
+}
+
static int s_show(struct seq_file *m, void *v)
{
struct trace_iterator *iter = v;
@@ -2168,6 +2177,7 @@ static int s_show(struct seq_file *m, void *v)
if (iter->tr) {
seq_printf(m, "# tracer: %s\n", iter->trace->name);
seq_puts(m, "#\n");
+ test_ftrace_alive(m);
}
if (iter->trace && iter->trace->print_header)
iter->trace->print_header(m);
@@ -3569,6 +3579,30 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
}
static ssize_t
+tracing_total_entries_read(struct file *filp, char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ struct trace_array *tr = filp->private_data;
+ char buf[64];
+ int r, cpu;
+ unsigned long size = 0, expanded_size = 0;
+
+ mutex_lock(&trace_types_lock);
+ for_each_tracing_cpu(cpu) {
+ size += tr->entries >> 10;
+ if (!ring_buffer_expanded)
+ expanded_size += trace_buf_size >> 10;
+ }
+ if (ring_buffer_expanded)
+ r = sprintf(buf, "%lu\n", size);
+ else
+ r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
+ mutex_unlock(&trace_types_lock);
+
+ return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static ssize_t
tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
size_t cnt, loff_t *ppos)
{
@@ -3594,22 +3628,24 @@ tracing_free_buffer_release(struct inode *inode, struct file *filp)
return 0;
}
-static int mark_printk(const char *fmt, ...)
-{
- int ret;
- va_list args;
- va_start(args, fmt);
- ret = trace_vprintk(0, fmt, args);
- va_end(args);
- return ret;
-}
-
static ssize_t
tracing_mark_write(struct file *filp, const char __user *ubuf,
size_t cnt, loff_t *fpos)
{
- char *buf;
- size_t written;
+ unsigned long addr = (unsigned long)ubuf;
+ struct ring_buffer_event *event;
+ struct ring_buffer *buffer;
+ struct print_entry *entry;
+ unsigned long irq_flags;
+ struct page *pages[2];
+ int nr_pages = 1;
+ ssize_t written;
+ void *page1;
+ void *page2;
+ int offset;
+ int size;
+ int len;
+ int ret;
if (tracing_disabled)
return -EINVAL;
@@ -3617,28 +3653,81 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
if (cnt > TRACE_BUF_SIZE)
cnt = TRACE_BUF_SIZE;
- buf = kmalloc(cnt + 2, GFP_KERNEL);
- if (buf == NULL)
- return -ENOMEM;
+ /*
+ * Userspace is injecting traces into the kernel trace buffer.
+ * We want to be as non intrusive as possible.
+ * To do so, we do not want to allocate any special buffers
+ * or take any locks, but instead write the userspace data
+ * straight into the ring buffer.
+ *
+ * First we need to pin the userspace buffer into memory,
+ * which, most likely it is, because it just referenced it.
+ * But there's no guarantee that it is. By using get_user_pages_fast()
+ * and kmap_atomic/kunmap_atomic() we can get access to the
+ * pages directly. We then write the data directly into the
+ * ring buffer.
+ */
+ BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
- if (copy_from_user(buf, ubuf, cnt)) {
- kfree(buf);
- return -EFAULT;
+ /* check if we cross pages */
+ if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
+ nr_pages = 2;
+
+ offset = addr & (PAGE_SIZE - 1);
+ addr &= PAGE_MASK;
+
+ ret = get_user_pages_fast(addr, nr_pages, 0, pages);
+ if (ret < nr_pages) {
+ while (--ret >= 0)
+ put_page(pages[ret]);
+ written = -EFAULT;
+ goto out;
}
- if (buf[cnt-1] != '\n') {
- buf[cnt] = '\n';
- buf[cnt+1] = '\0';
+
+ page1 = kmap_atomic(pages[0]);
+ if (nr_pages == 2)
+ page2 = kmap_atomic(pages[1]);
+
+ local_save_flags(irq_flags);
+ size = sizeof(*entry) + cnt + 2; /* possible \n added */
+ buffer = global_trace.buffer;
+ event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
+ irq_flags, preempt_count());
+ if (!event) {
+ /* Ring buffer disabled, return as if not open for write */
+ written = -EBADF;
+ goto out_unlock;
+ }
+
+ entry = ring_buffer_event_data(event);
+ entry->ip = _THIS_IP_;
+
+ if (nr_pages == 2) {
+ len = PAGE_SIZE - offset;
+ memcpy(&entry->buf, page1 + offset, len);
+ memcpy(&entry->buf[len], page2, cnt - len);
} else
- buf[cnt] = '\0';
+ memcpy(&entry->buf, page1 + offset, cnt);
- written = mark_printk("%s", buf);
- kfree(buf);
- *fpos += written;
+ if (entry->buf[cnt - 1] != '\n') {
+ entry->buf[cnt] = '\n';
+ entry->buf[cnt + 1] = '\0';
+ } else
+ entry->buf[cnt] = '\0';
- /* don't tell userspace we wrote more - it might confuse them */
- if (written > cnt)
- written = cnt;
+ ring_buffer_unlock_commit(buffer, event);
+
+ written = cnt;
+
+ *fpos += written;
+ out_unlock:
+ if (nr_pages == 2)
+ kunmap_atomic(page2);
+ kunmap_atomic(page1);
+ while (nr_pages > 0)
+ put_page(pages[--nr_pages]);
+ out:
return written;
}
@@ -3739,6 +3828,12 @@ static const struct file_operations tracing_entries_fops = {
.llseek = generic_file_llseek,
};
+static const struct file_operations tracing_total_entries_fops = {
+ .open = tracing_open_generic,
+ .read = tracing_total_entries_read,
+ .llseek = generic_file_llseek,
+};
+
static const struct file_operations tracing_free_buffer_fops = {
.write = tracing_free_buffer_write,
.release = tracing_free_buffer_release,
@@ -4026,6 +4121,8 @@ tracing_stats_read(struct file *filp, char __user *ubuf,
struct trace_array *tr = &global_trace;
struct trace_seq *s;
unsigned long cnt;
+ unsigned long long t;
+ unsigned long usec_rem;
s = kmalloc(sizeof(*s), GFP_KERNEL);
if (!s)
@@ -4042,6 +4139,17 @@ tracing_stats_read(struct file *filp, char __user *ubuf,
cnt = ring_buffer_commit_overrun_cpu(tr->buffer, cpu);
trace_seq_printf(s, "commit overrun: %ld\n", cnt);
+ cnt = ring_buffer_bytes_cpu(tr->buffer, cpu);
+ trace_seq_printf(s, "bytes: %ld\n", cnt);
+
+ t = ns2usecs(ring_buffer_oldest_event_ts(tr->buffer, cpu));
+ usec_rem = do_div(t, USEC_PER_SEC);
+ trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n", t, usec_rem);
+
+ t = ns2usecs(ring_buffer_time_stamp(tr->buffer, cpu));
+ usec_rem = do_div(t, USEC_PER_SEC);
+ trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
+
count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
kfree(s);
@@ -4450,6 +4558,9 @@ static __init int tracer_init_debugfs(void)
trace_create_file("buffer_size_kb", 0644, d_tracer,
&global_trace, &tracing_entries_fops);
+ trace_create_file("buffer_total_size_kb", 0444, d_tracer,
+ &global_trace, &tracing_total_entries_fops);
+
trace_create_file("free_buffer", 0644, d_tracer,
&global_trace, &tracing_free_buffer_fops);
@@ -4566,6 +4677,12 @@ __ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode)
tracing_off();
+ /* Did function tracer already get disabled? */
+ if (ftrace_is_dead()) {
+ printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
+ printk("# MAY BE MISSING FUNCTION EVENTS\n");
+ }
+
if (disable_tracing)
ftrace_kill();
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 616846bcfee..092e1f8d18d 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -579,11 +579,13 @@ static inline int ftrace_trace_task(struct task_struct *task)
return test_tsk_trace_trace(task);
}
+extern int ftrace_is_dead(void);
#else
static inline int ftrace_trace_task(struct task_struct *task)
{
return 1;
}
+static inline int ftrace_is_dead(void) { return 0; }
#endif
/*
@@ -761,16 +763,10 @@ struct filter_pred {
filter_pred_fn_t fn;
u64 val;
struct regex regex;
- /*
- * Leaf nodes use field_name, ops is used by AND and OR
- * nodes. The field_name is always freed when freeing a pred.
- * We can overload field_name for ops and have it freed
- * as well.
- */
- union {
- char *field_name;
- unsigned short *ops;
- };
+ unsigned short *ops;
+#ifdef CONFIG_FTRACE_STARTUP_TEST
+ struct ftrace_event_field *field;
+#endif
int offset;
int not;
int op;
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
index 6302747a139..394783531cb 100644
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -113,3 +113,15 @@ u64 notrace trace_clock_global(void)
return now;
}
+
+static atomic64_t trace_counter;
+
+/*
+ * trace_clock_counter(): simply an atomic counter.
+ * Use the trace_counter "counter" for cases where you do not care
+ * about timings, but are interested in strict ordering.
+ */
+u64 notrace trace_clock_counter(void)
+{
+ return atomic64_add_return(1, &trace_counter);
+}
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 256764ecccd..816d3d07497 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -381,6 +381,63 @@ get_pred_parent(struct filter_pred *pred, struct filter_pred *preds,
return pred;
}
+enum walk_return {
+ WALK_PRED_ABORT,
+ WALK_PRED_PARENT,
+ WALK_PRED_DEFAULT,
+};
+
+typedef int (*filter_pred_walkcb_t) (enum move_type move,
+ struct filter_pred *pred,
+ int *err, void *data);
+
+static int walk_pred_tree(struct filter_pred *preds,
+ struct filter_pred *root,
+ filter_pred_walkcb_t cb, void *data)
+{
+ struct filter_pred *pred = root;
+ enum move_type move = MOVE_DOWN;
+ int done = 0;
+
+ if (!preds)
+ return -EINVAL;
+
+ do {
+ int err = 0, ret;
+
+ ret = cb(move, pred, &err, data);
+ if (ret == WALK_PRED_ABORT)
+ return err;
+ if (ret == WALK_PRED_PARENT)
+ goto get_parent;
+
+ switch (move) {
+ case MOVE_DOWN:
+ if (pred->left != FILTER_PRED_INVALID) {
+ pred = &preds[pred->left];
+ continue;
+ }
+ goto get_parent;
+ case MOVE_UP_FROM_LEFT:
+ pred = &preds[pred->right];
+ move = MOVE_DOWN;
+ continue;
+ case MOVE_UP_FROM_RIGHT:
+ get_parent:
+ if (pred == root)
+ break;
+ pred = get_pred_parent(pred, preds,
+ pred->parent,
+ &move);
+ continue;
+ }
+ done = 1;
+ } while (!done);
+
+ /* We are fine. */
+ return 0;
+}
+
/*
* A series of AND or ORs where found together. Instead of
* climbing up and down the tree branches, an array of the
@@ -410,99 +467,91 @@ static int process_ops(struct filter_pred *preds,
for (i = 0; i < op->val; i++) {
pred = &preds[op->ops[i]];
- match = pred->fn(pred, rec);
+ if (!WARN_ON_ONCE(!pred->fn))
+ match = pred->fn(pred, rec);
if (!!match == type)
return match;
}
return match;
}
+struct filter_match_preds_data {
+ struct filter_pred *preds;
+ int match;
+ void *rec;
+};
+
+static int filter_match_preds_cb(enum move_type move, struct filter_pred *pred,
+ int *err, void *data)
+{
+ struct filter_match_preds_data *d = data;
+
+ *err = 0;
+ switch (move) {
+ case MOVE_DOWN:
+ /* only AND and OR have children */
+ if (pred->left != FILTER_PRED_INVALID) {
+ /* If ops is set, then it was folded. */
+ if (!pred->ops)
+ return WALK_PRED_DEFAULT;
+ /* We can treat folded ops as a leaf node */
+ d->match = process_ops(d->preds, pred, d->rec);
+ } else {
+ if (!WARN_ON_ONCE(!pred->fn))
+ d->match = pred->fn(pred, d->rec);
+ }
+
+ return WALK_PRED_PARENT;
+ case MOVE_UP_FROM_LEFT:
+ /*
+ * Check for short circuits.
+ *
+ * Optimization: !!match == (pred->op == OP_OR)
+ * is the same as:
+ * if ((match && pred->op == OP_OR) ||
+ * (!match && pred->op == OP_AND))
+ */
+ if (!!d->match == (pred->op == OP_OR))
+ return WALK_PRED_PARENT;
+ break;
+ case MOVE_UP_FROM_RIGHT:
+ break;
+ }
+
+ return WALK_PRED_DEFAULT;
+}
+
/* return 1 if event matches, 0 otherwise (discard) */
int filter_match_preds(struct event_filter *filter, void *rec)
{
- int match = -1;
- enum move_type move = MOVE_DOWN;
struct filter_pred *preds;
- struct filter_pred *pred;
struct filter_pred *root;
- int n_preds;
- int done = 0;
+ struct filter_match_preds_data data = {
+ /* match is currently meaningless */
+ .match = -1,
+ .rec = rec,
+ };
+ int n_preds, ret;
/* no filter is considered a match */
if (!filter)
return 1;
n_preds = filter->n_preds;
-
if (!n_preds)
return 1;
/*
* n_preds, root and filter->preds are protect with preemption disabled.
*/
- preds = rcu_dereference_sched(filter->preds);
root = rcu_dereference_sched(filter->root);
if (!root)
return 1;
- pred = root;
-
- /* match is currently meaningless */
- match = -1;
-
- do {
- switch (move) {
- case MOVE_DOWN:
- /* only AND and OR have children */
- if (pred->left != FILTER_PRED_INVALID) {
- /* If ops is set, then it was folded. */
- if (!pred->ops) {
- /* keep going to down the left side */
- pred = &preds[pred->left];
- continue;
- }
- /* We can treat folded ops as a leaf node */
- match = process_ops(preds, pred, rec);
- } else
- match = pred->fn(pred, rec);
- /* If this pred is the only pred */
- if (pred == root)
- break;
- pred = get_pred_parent(pred, preds,
- pred->parent, &move);
- continue;
- case MOVE_UP_FROM_LEFT:
- /*
- * Check for short circuits.
- *
- * Optimization: !!match == (pred->op == OP_OR)
- * is the same as:
- * if ((match && pred->op == OP_OR) ||
- * (!match && pred->op == OP_AND))
- */
- if (!!match == (pred->op == OP_OR)) {
- if (pred == root)
- break;
- pred = get_pred_parent(pred, preds,
- pred->parent, &move);
- continue;
- }
- /* now go down the right side of the tree. */
- pred = &preds[pred->right];
- move = MOVE_DOWN;
- continue;
- case MOVE_UP_FROM_RIGHT:
- /* We finished this equation. */
- if (pred == root)
- break;
- pred = get_pred_parent(pred, preds,
- pred->parent, &move);
- continue;
- }
- done = 1;
- } while (!done);
-
- return match;
+ data.preds = preds = rcu_dereference_sched(filter->preds);
+ ret = walk_pred_tree(preds, root, filter_match_preds_cb, &data);
+ WARN_ON(ret);
+ return data.match;
}
EXPORT_SYMBOL_GPL(filter_match_preds);
@@ -628,22 +677,6 @@ find_event_field(struct ftrace_event_call *call, char *name)
return __find_event_field(head, name);
}
-static void filter_free_pred(struct filter_pred *pred)
-{
- if (!pred)
- return;
-
- kfree(pred->field_name);
- kfree(pred);
-}
-
-static void filter_clear_pred(struct filter_pred *pred)
-{
- kfree(pred->field_name);
- pred->field_name = NULL;
- pred->regex.len = 0;
-}
-
static int __alloc_pred_stack(struct pred_stack *stack, int n_preds)
{
stack->preds = kzalloc(sizeof(*stack->preds)*(n_preds + 1), GFP_KERNEL);
@@ -689,20 +722,13 @@ __pop_pred_stack(struct pred_stack *stack)
static int filter_set_pred(struct event_filter *filter,
int idx,
struct pred_stack *stack,
- struct filter_pred *src,
- filter_pred_fn_t fn)
+ struct filter_pred *src)
{
struct filter_pred *dest = &filter->preds[idx];
struct filter_pred *left;
struct filter_pred *right;
*dest = *src;
- if (src->field_name) {
- dest->field_name = kstrdup(src->field_name, GFP_KERNEL);
- if (!dest->field_name)
- return -ENOMEM;
- }
- dest->fn = fn;
dest->index = idx;
if (dest->op == OP_OR || dest->op == OP_AND) {
@@ -743,11 +769,7 @@ static int filter_set_pred(struct event_filter *filter,
static void __free_preds(struct event_filter *filter)
{
- int i;
-
if (filter->preds) {
- for (i = 0; i < filter->a_preds; i++)
- kfree(filter->preds[i].field_name);
kfree(filter->preds);
filter->preds = NULL;
}
@@ -840,23 +862,19 @@ static void filter_free_subsystem_filters(struct event_subsystem *system)
}
}
-static int filter_add_pred_fn(struct filter_parse_state *ps,
- struct ftrace_event_call *call,
- struct event_filter *filter,
- struct filter_pred *pred,
- struct pred_stack *stack,
- filter_pred_fn_t fn)
+static int filter_add_pred(struct filter_parse_state *ps,
+ struct event_filter *filter,
+ struct filter_pred *pred,
+ struct pred_stack *stack)
{
- int idx, err;
+ int err;
if (WARN_ON(filter->n_preds == filter->a_preds)) {
parse_error(ps, FILT_ERR_TOO_MANY_PREDS, 0);
return -ENOSPC;
}
- idx = filter->n_preds;
- filter_clear_pred(&filter->preds[idx]);
- err = filter_set_pred(filter, idx, stack, pred, fn);
+ err = filter_set_pred(filter, filter->n_preds, stack, pred);
if (err)
return err;
@@ -937,31 +955,15 @@ static filter_pred_fn_t select_comparison_fn(int op, int field_size,
return fn;
}
-static int filter_add_pred(struct filter_parse_state *ps,
- struct ftrace_event_call *call,
- struct event_filter *filter,
- struct filter_pred *pred,
- struct pred_stack *stack,
- bool dry_run)
+static int init_pred(struct filter_parse_state *ps,
+ struct ftrace_event_field *field,
+ struct filter_pred *pred)
+
{
- struct ftrace_event_field *field;
- filter_pred_fn_t fn;
+ filter_pred_fn_t fn = filter_pred_none;
unsigned long long val;
int ret;
- fn = pred->fn = filter_pred_none;
-
- if (pred->op == OP_AND)
- goto add_pred_fn;
- else if (pred->op == OP_OR)
- goto add_pred_fn;
-
- field = find_event_field(call, pred->field_name);
- if (!field) {
- parse_error(ps, FILT_ERR_FIELD_NOT_FOUND, 0);
- return -EINVAL;
- }
-
pred->offset = field->offset;
if (!is_legal_op(field, pred->op)) {
@@ -1001,9 +1003,7 @@ static int filter_add_pred(struct filter_parse_state *ps,
if (pred->op == OP_NE)
pred->not = 1;
-add_pred_fn:
- if (!dry_run)
- return filter_add_pred_fn(ps, call, filter, pred, stack, fn);
+ pred->fn = fn;
return 0;
}
@@ -1302,39 +1302,37 @@ parse_operand:
return 0;
}
-static struct filter_pred *create_pred(int op, char *operand1, char *operand2)
+static struct filter_pred *create_pred(struct filter_parse_state *ps,
+ struct ftrace_event_call *call,
+ int op, char *operand1, char *operand2)
{
- struct filter_pred *pred;
+ struct ftrace_event_field *field;
+ static struct filter_pred pred;
- pred = kzalloc(sizeof(*pred), GFP_KERNEL);
- if (!pred)
- return NULL;
+ memset(&pred, 0, sizeof(pred));
+ pred.op = op;
- pred->field_name = kstrdup(operand1, GFP_KERNEL);
- if (!pred->field_name) {
- kfree(pred);
+ if (op == OP_AND || op == OP_OR)
+ return &pred;
+
+ if (!operand1 || !operand2) {
+ parse_error(ps, FILT_ERR_MISSING_FIELD, 0);
return NULL;
}
- strcpy(pred->regex.pattern, operand2);
- pred->regex.len = strlen(pred->regex.pattern);
-
- pred->op = op;
-
- return pred;
-}
-
-static struct filter_pred *create_logical_pred(int op)
-{
- struct filter_pred *pred;
-
- pred = kzalloc(sizeof(*pred), GFP_KERNEL);
- if (!pred)
+ field = find_event_field(call, operand1);
+ if (!field) {
+ parse_error(ps, FILT_ERR_FIELD_NOT_FOUND, 0);
return NULL;
+ }
- pred->op = op;
+ strcpy(pred.regex.pattern, operand2);
+ pred.regex.len = strlen(pred.regex.pattern);
- return pred;
+#ifdef CONFIG_FTRACE_STARTUP_TEST
+ pred.field = field;
+#endif
+ return init_pred(ps, field, &pred) ? NULL : &pred;
}
static int check_preds(struct filter_parse_state *ps)
@@ -1375,6 +1373,23 @@ static int count_preds(struct filter_parse_state *ps)
return n_preds;
}
+struct check_pred_data {
+ int count;
+ int max;
+};
+
+static int check_pred_tree_cb(enum move_type move, struct filter_pred *pred,
+ int *err, void *data)
+{
+ struct check_pred_data *d = data;
+
+ if (WARN_ON(d->count++ > d->max)) {
+ *err = -EINVAL;
+ return WALK_PRED_ABORT;
+ }
+ return WALK_PRED_DEFAULT;
+}
+
/*
* The tree is walked at filtering of an event. If the tree is not correctly
* built, it may cause an infinite loop. Check here that the tree does
@@ -1383,107 +1398,76 @@ static int count_preds(struct filter_parse_state *ps)
static int check_pred_tree(struct event_filter *filter,
struct filter_pred *root)
{
- struct filter_pred *preds;
- struct filter_pred *pred;
- enum move_type move = MOVE_DOWN;
- int count = 0;
- int done = 0;
- int max;
-
- /*
- * The max that we can hit a node is three times.
- * Once going down, once coming up from left, and
- * once coming up from right. This is more than enough
- * since leafs are only hit a single time.
- */
- max = 3 * filter->n_preds;
+ struct check_pred_data data = {
+ /*
+ * The max that we can hit a node is three times.
+ * Once going down, once coming up from left, and
+ * once coming up from right. This is more than enough
+ * since leafs are only hit a single time.
+ */
+ .max = 3 * filter->n_preds,
+ .count = 0,
+ };
- preds = filter->preds;
- if (!preds)
- return -EINVAL;
- pred = root;
+ return walk_pred_tree(filter->preds, root,
+ check_pred_tree_cb, &data);
+}
- do {
- if (WARN_ON(count++ > max))
- return -EINVAL;
+static int count_leafs_cb(enum move_type move, struct filter_pred *pred,
+ int *err, void *data)
+{
+ int *count = data;
- switch (move) {
- case MOVE_DOWN:
- if (pred->left != FILTER_PRED_INVALID) {
- pred = &preds[pred->left];
- continue;
- }
- /* A leaf at the root is just a leaf in the tree */
- if (pred == root)
- break;
- pred = get_pred_parent(pred, preds,
- pred->parent, &move);
- continue;
- case MOVE_UP_FROM_LEFT:
- pred = &preds[pred->right];
- move = MOVE_DOWN;
- continue;
- case MOVE_UP_FROM_RIGHT:
- if (pred == root)
- break;
- pred = get_pred_parent(pred, preds,