aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-10-26 17:03:38 +0200
committerLinus Torvalds <torvalds@linux-foundation.org>2011-10-26 17:03:38 +0200
commit7115e3fcf45514db7525a05365b10454ff7f345e (patch)
tree17450e6337d559cc35dae6a7a73abab01ac63f00 /kernel
parent1f6e05171bb5cc32a4d6437ab2269fc21d169ca7 (diff)
parentc752d04066a36ae30b29795f3fa3f536292c1f8c (diff)
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (121 commits) perf symbols: Increase symbol KSYM_NAME_LEN size perf hists browser: Refuse 'a' hotkey on non symbolic views perf ui browser: Use libslang to read keys perf tools: Fix tracing info recording perf hists browser: Elide DSO column when it is set to just one DSO, ditto for threads perf hists: Don't consider filtered entries when calculating column widths perf hists: Don't decay total_period for filtered entries perf hists browser: Honour symbol_conf.show_{nr_samples,total_period} perf hists browser: Do not exit on tab key with single event perf annotate browser: Don't change selection line when returning from callq perf tools: handle endianness of feature bitmap perf tools: Add prelink suggestion to dso update message perf script: Fix unknown feature comment perf hists browser: Apply the dso and thread filters when merging new batches perf hists: Move the dso and thread filters from hist_browser perf ui browser: Honour the xterm colors perf top tui: Give color hints just on the percentage, like on --stdio perf ui browser: Make the colors configurable and change the defaults perf tui: Remove unneeded call to newtCls on startup perf hists: Don't format the percentage on hist_entry__snprintf ... Fix up conflicts in arch/x86/kernel/kprobes.c manually. Ingo's tree did the insane "add volatile to const array", which just doesn't make sense ("volatile const"?). But we could remove the const *and* make the array volatile to make doubly sure that gcc doesn't optimize it away.. Also fix up kernel/trace/ring_buffer.c non-data-conflicts manually: the reader_lock has been turned into a raw lock by the core locking merge, and there was a new user of it introduced in this perf core merge. Make sure that new use also uses the raw accessor functions.
Diffstat (limited to 'kernel')
-rw-r--r--kernel/events/core.c101
-rw-r--r--kernel/module.c47
-rw-r--r--kernel/trace/Makefile2
-rw-r--r--kernel/trace/ftrace.c8
-rw-r--r--kernel/trace/ring_buffer.c70
-rw-r--r--kernel/trace/trace.c181
-rw-r--r--kernel/trace/trace.h16
-rw-r--r--kernel/trace/trace_clock.c12
-rw-r--r--kernel/trace/trace_events_filter.c795
-rw-r--r--kernel/trace/trace_events_filter_test.h50
-rw-r--r--kernel/trace/trace_irqsoff.c4
-rw-r--r--kernel/trace/trace_kprobe.c58
-rw-r--r--kernel/trace/trace_printk.c19
-rw-r--r--kernel/tracepoint.c169
-rw-r--r--kernel/watchdog.c7
15 files changed, 1067 insertions, 472 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 0f857782d06..d1a1bee3522 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -29,6 +29,7 @@
#include <linux/hardirq.h>
#include <linux/rculist.h>
#include <linux/uaccess.h>
+#include <linux/suspend.h>
#include <linux/syscalls.h>
#include <linux/anon_inodes.h>
#include <linux/kernel_stat.h>
@@ -5758,6 +5759,7 @@ struct pmu *perf_init_event(struct perf_event *event)
pmu = idr_find(&pmu_idr, event->attr.type);
rcu_read_unlock();
if (pmu) {
+ event->pmu = pmu;
ret = pmu->event_init(event);
if (ret)
pmu = ERR_PTR(ret);
@@ -5765,6 +5767,7 @@ struct pmu *perf_init_event(struct perf_event *event)
}
list_for_each_entry_rcu(pmu, &pmus, entry) {
+ event->pmu = pmu;
ret = pmu->event_init(event);
if (!ret)
goto unlock;
@@ -5891,8 +5894,6 @@ done:
return ERR_PTR(err);
}
- event->pmu = pmu;
-
if (!event->parent) {
if (event->attach_state & PERF_ATTACH_TASK)
jump_label_inc(&perf_sched_events);
@@ -6852,7 +6853,7 @@ static void __cpuinit perf_event_init_cpu(int cpu)
struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
mutex_lock(&swhash->hlist_mutex);
- if (swhash->hlist_refcount > 0) {
+ if (swhash->hlist_refcount > 0 && !swhash->swevent_hlist) {
struct swevent_hlist *hlist;
hlist = kzalloc_node(sizeof(*hlist), GFP_KERNEL, cpu_to_node(cpu));
@@ -6941,7 +6942,14 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
{
unsigned int cpu = (long)hcpu;
- switch (action & ~CPU_TASKS_FROZEN) {
+ /*
+ * Ignore suspend/resume action, the perf_pm_notifier will
+ * take care of that.
+ */
+ if (action & CPU_TASKS_FROZEN)
+ return NOTIFY_OK;
+
+ switch (action) {
case CPU_UP_PREPARE:
case CPU_DOWN_FAILED:
@@ -6960,6 +6968,90 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu)
return NOTIFY_OK;
}
+static void perf_pm_resume_cpu(void *unused)
+{
+ struct perf_cpu_context *cpuctx;
+ struct perf_event_context *ctx;
+ struct pmu *pmu;
+ int idx;
+
+ idx = srcu_read_lock(&pmus_srcu);
+ list_for_each_entry_rcu(pmu, &pmus, entry) {
+ cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
+ ctx = cpuctx->task_ctx;
+
+ perf_ctx_lock(cpuctx, ctx);
+ perf_pmu_disable(cpuctx->ctx.pmu);
+
+ cpu_ctx_sched_out(cpuctx, EVENT_ALL);
+ if (ctx)
+ ctx_sched_out(ctx, cpuctx, EVENT_ALL);
+
+ perf_pmu_enable(cpuctx->ctx.pmu);
+ perf_ctx_unlock(cpuctx, ctx);
+ }
+ srcu_read_unlock(&pmus_srcu, idx);
+}
+
+static void perf_pm_suspend_cpu(void *unused)
+{
+ struct perf_cpu_context *cpuctx;
+ struct perf_event_context *ctx;
+ struct pmu *pmu;
+ int idx;
+
+ idx = srcu_read_lock(&pmus_srcu);
+ list_for_each_entry_rcu(pmu, &pmus, entry) {
+ cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
+ ctx = cpuctx->task_ctx;
+
+ perf_ctx_lock(cpuctx, ctx);
+ perf_pmu_disable(cpuctx->ctx.pmu);
+
+ perf_event_sched_in(cpuctx, ctx, current);
+
+ perf_pmu_enable(cpuctx->ctx.pmu);
+ perf_ctx_unlock(cpuctx, ctx);
+ }
+ srcu_read_unlock(&pmus_srcu, idx);
+}
+
+static int perf_resume(void)
+{
+ get_online_cpus();
+ smp_call_function(perf_pm_resume_cpu, NULL, 1);
+ put_online_cpus();
+
+ return NOTIFY_OK;
+}
+
+static int perf_suspend(void)
+{
+ get_online_cpus();
+ smp_call_function(perf_pm_suspend_cpu, NULL, 1);
+ put_online_cpus();
+
+ return NOTIFY_OK;
+}
+
+static int perf_pm(struct notifier_block *self, unsigned long action, void *ptr)
+{
+ switch (action) {
+ case PM_POST_HIBERNATION:
+ case PM_POST_SUSPEND:
+ return perf_resume();
+ case PM_HIBERNATION_PREPARE:
+ case PM_SUSPEND_PREPARE:
+ return perf_suspend();
+ default:
+ return NOTIFY_DONE;
+ }
+}
+
+static struct notifier_block perf_pm_notifier = {
+ .notifier_call = perf_pm,
+};
+
void __init perf_event_init(void)
{
int ret;
@@ -6974,6 +7066,7 @@ void __init perf_event_init(void)
perf_tp_register();
perf_cpu_notifier(perf_cpu_notify);
register_reboot_notifier(&perf_reboot_notifier);
+ register_pm_notifier(&perf_pm_notifier);
ret = init_hw_breakpoint();
WARN(ret, "hw_breakpoint initialization failed with: %d", ret);
diff --git a/kernel/module.c b/kernel/module.c
index 04379f92f84..93342d992f3 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -3487,50 +3487,3 @@ void module_layout(struct module *mod,
}
EXPORT_SYMBOL(module_layout);
#endif
-
-#ifdef CONFIG_TRACEPOINTS
-void module_update_tracepoints(void)
-{
- struct module *mod;
-
- mutex_lock(&module_mutex);
- list_for_each_entry(mod, &modules, list)
- if (!mod->taints)
- tracepoint_update_probe_range(mod->tracepoints_ptrs,
- mod->tracepoints_ptrs + mod->num_tracepoints);
- mutex_unlock(&module_mutex);
-}
-
-/*
- * Returns 0 if current not found.
- * Returns 1 if current found.
- */
-int module_get_iter_tracepoints(struct tracepoint_iter *iter)
-{
- struct module *iter_mod;
- int found = 0;
-
- mutex_lock(&module_mutex);
- list_for_each_entry(iter_mod, &modules, list) {
- if (!iter_mod->taints) {
- /*
- * Sorted module list
- */
- if (iter_mod < iter->module)
- continue;
- else if (iter_mod > iter->module)
- iter->tracepoint = NULL;
- found = tracepoint_get_iter_range(&iter->tracepoint,
- iter_mod->tracepoints_ptrs,
- iter_mod->tracepoints_ptrs
- + iter_mod->num_tracepoints);
- if (found) {
- iter->module = iter_mod;
- break;
- }
- }
- }
- mutex_unlock(&module_mutex);
- return found;
-}
-#endif
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index f49405f842f..5f39a07fe5e 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -15,6 +15,8 @@ ifdef CONFIG_TRACING_BRANCHES
KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING
endif
+CFLAGS_trace_events_filter.o := -I$(src)
+
#
# Make the trace clocks available generally: it's infrastructure
# relied on by ptrace for example:
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index c3e4575e782..077d8538790 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3863,6 +3863,14 @@ void ftrace_kill(void)
}
/**
+ * Test if ftrace is dead or not.
+ */
+int ftrace_is_dead(void)
+{
+ return ftrace_disabled;
+}
+
+/**
* register_ftrace_function - register a function for profiling
* @ops - ops structure that holds the function for profiling.
*
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index f2f821acc59..f5b7b5c1195 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -488,12 +488,14 @@ struct ring_buffer_per_cpu {
struct buffer_page *reader_page;
unsigned long lost_events;
unsigned long last_overrun;
+ local_t entries_bytes;
local_t commit_overrun;
local_t overrun;
local_t entries;
local_t committing;
local_t commits;
unsigned long read;
+ unsigned long read_bytes;
u64 write_stamp;
u64 read_stamp;
};
@@ -1708,6 +1710,7 @@ rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer,
* the counters.
*/
local_add(entries, &cpu_buffer->overrun);
+ local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes);
/*
* The entries will be zeroed out when we move the
@@ -1863,6 +1866,9 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer,
event = __rb_page_index(tail_page, tail);
kmemcheck_annotate_bitfield(event, bitfield);
+ /* account for padding bytes */
+ local_add(BUF_PAGE_SIZE - tail, &cpu_buffer->entries_bytes);
+
/*
* Save the original length to the meta data.
* This will be used by the reader to add lost event
@@ -2054,6 +2060,9 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
if (!tail)
tail_page->page->time_stamp = ts;
+ /* account for these added bytes */
+ local_add(length, &cpu_buffer->entries_bytes);
+
return event;
}
@@ -2076,6 +2085,7 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
if (bpage->page == (void *)addr && rb_page_write(bpage) == old_index) {
unsigned long write_mask =
local_read(&bpage->write) & ~RB_WRITE_MASK;
+ unsigned long event_length = rb_event_length(event);
/*
* This is on the tail page. It is possible that
* a write could come in and move the tail page
@@ -2085,8 +2095,11 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
old_index += write_mask;
new_index += write_mask;
index = local_cmpxchg(&bpage->write, old_index, new_index);
- if (index == old_index)
+ if (index == old_index) {
+ /* update counters */
+ local_sub(event_length, &cpu_buffer->entries_bytes);
return 1;
+ }
}
/* could not discard */
@@ -2661,6 +2674,58 @@ rb_num_of_entries(struct ring_buffer_per_cpu *cpu_buffer)
}
/**
+ * ring_buffer_oldest_event_ts - get the oldest event timestamp from the buffer
+ * @buffer: The ring buffer
+ * @cpu: The per CPU buffer to read from.
+ */
+unsigned long ring_buffer_oldest_event_ts(struct ring_buffer *buffer, int cpu)
+{
+ unsigned long flags;
+ struct ring_buffer_per_cpu *cpu_buffer;
+ struct buffer_page *bpage;
+ unsigned long ret;
+
+ if (!cpumask_test_cpu(cpu, buffer->cpumask))
+ return 0;
+
+ cpu_buffer = buffer->buffers[cpu];
+ raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+ /*
+ * if the tail is on reader_page, oldest time stamp is on the reader
+ * page
+ */
+ if (cpu_buffer->tail_page == cpu_buffer->reader_page)
+ bpage = cpu_buffer->reader_page;
+ else
+ bpage = rb_set_head_page(cpu_buffer);
+ ret = bpage->page->time_stamp;
+ raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(ring_buffer_oldest_event_ts);
+
+/**
+ * ring_buffer_bytes_cpu - get the number of bytes consumed in a cpu buffer
+ * @buffer: The ring buffer
+ * @cpu: The per CPU buffer to read from.
+ */
+unsigned long ring_buffer_bytes_cpu(struct ring_buffer *buffer, int cpu)
+{
+ struct ring_buffer_per_cpu *cpu_buffer;
+ unsigned long ret;
+
+ if (!cpumask_test_cpu(cpu, buffer->cpumask))
+ return 0;
+
+ cpu_buffer = buffer->buffers[cpu];
+ ret = local_read(&cpu_buffer->entries_bytes) - cpu_buffer->read_bytes;
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(ring_buffer_bytes_cpu);
+
+/**
* ring_buffer_entries_cpu - get the number of entries in a cpu buffer
* @buffer: The ring buffer
* @cpu: The per CPU buffer to get the entries from.
@@ -3527,11 +3592,13 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer)
cpu_buffer->reader_page->read = 0;
local_set(&cpu_buffer->commit_overrun, 0);
+ local_set(&cpu_buffer->entries_bytes, 0);
local_set(&cpu_buffer->overrun, 0);
local_set(&cpu_buffer->entries, 0);
local_set(&cpu_buffer->committing, 0);
local_set(&cpu_buffer->commits, 0);
cpu_buffer->read = 0;
+ cpu_buffer->read_bytes = 0;
cpu_buffer->write_stamp = 0;
cpu_buffer->read_stamp = 0;
@@ -3918,6 +3985,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer,
} else {
/* update the entry counter */
cpu_buffer->read += rb_page_entries(reader);
+ cpu_buffer->read_bytes += BUF_PAGE_SIZE;
/* swap the pages */
rb_init_page(bpage);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 0c8bdeeb358..f2bd275bb60 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -435,6 +435,7 @@ static struct {
} trace_clocks[] = {
{ trace_clock_local, "local" },
{ trace_clock_global, "global" },
+ { trace_clock_counter, "counter" },
};
int trace_clock_id;
@@ -2159,6 +2160,14 @@ void trace_default_header(struct seq_file *m)
}
}
+static void test_ftrace_alive(struct seq_file *m)
+{
+ if (!ftrace_is_dead())
+ return;
+ seq_printf(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n");
+ seq_printf(m, "# MAY BE MISSING FUNCTION EVENTS\n");
+}
+
static int s_show(struct seq_file *m, void *v)
{
struct trace_iterator *iter = v;
@@ -2168,6 +2177,7 @@ static int s_show(struct seq_file *m, void *v)
if (iter->tr) {
seq_printf(m, "# tracer: %s\n", iter->trace->name);
seq_puts(m, "#\n");
+ test_ftrace_alive(m);
}
if (iter->trace && iter->trace->print_header)
iter->trace->print_header(m);
@@ -2710,9 +2720,9 @@ static const char readme_msg[] =
"# cat /sys/kernel/debug/tracing/trace_options\n"
"noprint-parent nosym-offset nosym-addr noverbose\n"
"# echo print-parent > /sys/kernel/debug/tracing/trace_options\n"
- "# echo 1 > /sys/kernel/debug/tracing/tracing_enabled\n"
+ "# echo 1 > /sys/kernel/debug/tracing/tracing_on\n"
"# cat /sys/kernel/debug/tracing/trace > /tmp/trace.txt\n"
- "# echo 0 > /sys/kernel/debug/tracing/tracing_enabled\n"
+ "# echo 0 > /sys/kernel/debug/tracing/tracing_on\n"
;
static ssize_t
@@ -3569,6 +3579,30 @@ tracing_entries_write(struct file *filp, const char __user *ubuf,
}
static ssize_t
+tracing_total_entries_read(struct file *filp, char __user *ubuf,
+ size_t cnt, loff_t *ppos)
+{
+ struct trace_array *tr = filp->private_data;
+ char buf[64];
+ int r, cpu;
+ unsigned long size = 0, expanded_size = 0;
+
+ mutex_lock(&trace_types_lock);
+ for_each_tracing_cpu(cpu) {
+ size += tr->entries >> 10;
+ if (!ring_buffer_expanded)
+ expanded_size += trace_buf_size >> 10;
+ }
+ if (ring_buffer_expanded)
+ r = sprintf(buf, "%lu\n", size);
+ else
+ r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
+ mutex_unlock(&trace_types_lock);
+
+ return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
+}
+
+static ssize_t
tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
size_t cnt, loff_t *ppos)
{
@@ -3594,22 +3628,24 @@ tracing_free_buffer_release(struct inode *inode, struct file *filp)
return 0;
}
-static int mark_printk(const char *fmt, ...)
-{
- int ret;
- va_list args;
- va_start(args, fmt);
- ret = trace_vprintk(0, fmt, args);
- va_end(args);
- return ret;
-}
-
static ssize_t
tracing_mark_write(struct file *filp, const char __user *ubuf,
size_t cnt, loff_t *fpos)
{
- char *buf;
- size_t written;
+ unsigned long addr = (unsigned long)ubuf;
+ struct ring_buffer_event *event;
+ struct ring_buffer *buffer;
+ struct print_entry *entry;
+ unsigned long irq_flags;
+ struct page *pages[2];
+ int nr_pages = 1;
+ ssize_t written;
+ void *page1;
+ void *page2;
+ int offset;
+ int size;
+ int len;
+ int ret;
if (tracing_disabled)
return -EINVAL;
@@ -3617,28 +3653,81 @@ tracing_mark_write(struct file *filp, const char __user *ubuf,
if (cnt > TRACE_BUF_SIZE)
cnt = TRACE_BUF_SIZE;
- buf = kmalloc(cnt + 2, GFP_KERNEL);
- if (buf == NULL)
- return -ENOMEM;
+ /*
+ * Userspace is injecting traces into the kernel trace buffer.
+ * We want to be as non intrusive as possible.
+ * To do so, we do not want to allocate any special buffers
+ * or take any locks, but instead write the userspace data
+ * straight into the ring buffer.
+ *
+ * First we need to pin the userspace buffer into memory,
+ * which, most likely it is, because it just referenced it.
+ * But there's no guarantee that it is. By using get_user_pages_fast()
+ * and kmap_atomic/kunmap_atomic() we can get access to the
+ * pages directly. We then write the data directly into the
+ * ring buffer.
+ */
+ BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE);
- if (copy_from_user(buf, ubuf, cnt)) {
- kfree(buf);
- return -EFAULT;
+ /* check if we cross pages */
+ if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK))
+ nr_pages = 2;
+
+ offset = addr & (PAGE_SIZE - 1);
+ addr &= PAGE_MASK;
+
+ ret = get_user_pages_fast(addr, nr_pages, 0, pages);
+ if (ret < nr_pages) {
+ while (--ret >= 0)
+ put_page(pages[ret]);
+ written = -EFAULT;
+ goto out;
+ }
+
+ page1 = kmap_atomic(pages[0]);
+ if (nr_pages == 2)
+ page2 = kmap_atomic(pages[1]);
+
+ local_save_flags(irq_flags);
+ size = sizeof(*entry) + cnt + 2; /* possible \n added */
+ buffer = global_trace.buffer;
+ event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
+ irq_flags, preempt_count());
+ if (!event) {
+ /* Ring buffer disabled, return as if not open for write */
+ written = -EBADF;
+ goto out_unlock;
}
- if (buf[cnt-1] != '\n') {
- buf[cnt] = '\n';
- buf[cnt+1] = '\0';
+
+ entry = ring_buffer_event_data(event);
+ entry->ip = _THIS_IP_;
+
+ if (nr_pages == 2) {
+ len = PAGE_SIZE - offset;
+ memcpy(&entry->buf, page1 + offset, len);
+ memcpy(&entry->buf[len], page2, cnt - len);
} else
- buf[cnt] = '\0';
+ memcpy(&entry->buf, page1 + offset, cnt);
- written = mark_printk("%s", buf);
- kfree(buf);
- *fpos += written;
+ if (entry->buf[cnt - 1] != '\n') {
+ entry->buf[cnt] = '\n';
+ entry->buf[cnt + 1] = '\0';
+ } else
+ entry->buf[cnt] = '\0';
+
+ ring_buffer_unlock_commit(buffer, event);
- /* don't tell userspace we wrote more - it might confuse them */
- if (written > cnt)
- written = cnt;
+ written = cnt;
+ *fpos += written;
+
+ out_unlock:
+ if (nr_pages == 2)
+ kunmap_atomic(page2);
+ kunmap_atomic(page1);
+ while (nr_pages > 0)
+ put_page(pages[--nr_pages]);
+ out:
return written;
}
@@ -3739,6 +3828,12 @@ static const struct file_operations tracing_entries_fops = {
.llseek = generic_file_llseek,
};
+static const struct file_operations tracing_total_entries_fops = {
+ .open = tracing_open_generic,
+ .read = tracing_total_entries_read,
+ .llseek = generic_file_llseek,
+};
+
static const struct file_operations tracing_free_buffer_fops = {
.write = tracing_free_buffer_write,
.release = tracing_free_buffer_release,
@@ -3808,8 +3903,6 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
if (info->read < PAGE_SIZE)
goto read;
- info->read = 0;
-
trace_access_lock(info->cpu);
ret = ring_buffer_read_page(info->tr->buffer,
&info->spare,
@@ -3819,6 +3912,8 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
if (ret < 0)
return 0;
+ info->read = 0;
+
read:
size = PAGE_SIZE - info->read;
if (size > count)
@@ -4026,6 +4121,8 @@ tracing_stats_read(struct file *filp, char __user *ubuf,
struct trace_array *tr = &global_trace;
struct trace_seq *s;
unsigned long cnt;
+ unsigned long long t;
+ unsigned long usec_rem;
s = kmalloc(sizeof(*s), GFP_KERNEL);
if (!s)
@@ -4042,6 +4139,17 @@ tracing_stats_read(struct file *filp, char __user *ubuf,
cnt = ring_buffer_commit_overrun_cpu(tr->buffer, cpu);
trace_seq_printf(s, "commit overrun: %ld\n", cnt);
+ cnt = ring_buffer_bytes_cpu(tr->buffer, cpu);
+ trace_seq_printf(s, "bytes: %ld\n", cnt);
+
+ t = ns2usecs(ring_buffer_oldest_event_ts(tr->buffer, cpu));
+ usec_rem = do_div(t, USEC_PER_SEC);
+ trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n", t, usec_rem);
+
+ t = ns2usecs(ring_buffer_time_stamp(tr->buffer, cpu));
+ usec_rem = do_div(t, USEC_PER_SEC);
+ trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
+
count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
kfree(s);
@@ -4450,6 +4558,9 @@ static __init int tracer_init_debugfs(void)
trace_create_file("buffer_size_kb", 0644, d_tracer,
&global_trace, &tracing_entries_fops);
+ trace_create_file("buffer_total_size_kb", 0444, d_tracer,
+ &global_trace, &tracing_total_entries_fops);
+
trace_create_file("free_buffer", 0644, d_tracer,
&global_trace, &tracing_free_buffer_fops);
@@ -4566,6 +4677,12 @@ __ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode)
tracing_off();
+ /* Did function tracer already get disabled? */
+ if (ftrace_is_dead()) {
+ printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
+ printk("# MAY BE MISSING FUNCTION EVENTS\n");
+ }
+
if (disable_tracing)
ftrace_kill();
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 616846bcfee..092e1f8d18d 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -579,11 +579,13 @@ static inline int ftrace_trace_task(struct task_struct *task)
return test_tsk_trace_trace(task);
}
+extern int ftrace_is_dead(void);
#else
static inline int ftrace_trace_task(struct task_struct *task)
{
return 1;
}
+static inline int ftrace_is_dead(void) { return 0; }
#endif
/*
@@ -761,16 +763,10 @@ struct filter_pred {
filter_pred_fn_t fn;
u64 val;
struct regex regex;
- /*
- * Leaf nodes use field_name, ops is used by AND and OR
- * nodes. The field_name is always freed when freeing a pred.
- * We can overload field_name for ops and have it freed
- * as well.
- */
- union {
- char *field_name;
- unsigned short *ops;
- };
+ unsigned short *ops;
+#ifdef CONFIG_FTRACE_STARTUP_TEST
+ struct ftrace_event_field *field;
+#endif
int offset;
int not;
int op;
diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c
index 6302747a139..394783531cb 100644
--- a/kernel/trace/trace_clock.c
+++ b/kernel/trace/trace_clock.c
@@ -113,3 +113,15 @@ u64 notrace trace_clock_global(void)
return now;
}
+
+static atomic64_t trace_counter;
+
+/*
+ * trace_clock_counter(): simply an atomic counter.
+ * Use the trace_counter "counter" for cases where you do not care
+ * about timings, but are interested in strict ordering.
+ */
+u64 notrace trace_clock_counter(void)
+{
+ return atomic64_add_return(1, &trace_counter);
+}
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 256764ecccd..816d3d07497 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -381,6 +381,63 @@ get_pred_parent(struct filter_pred *pred, struct filter_pred *preds,
return pred;
}
+enum walk_return {
+ WALK_PRED_ABORT,
+ WALK_PRED_PARENT,
+ WALK_PRED_DEFAULT,
+};
+
+typedef int (*filter_pred_walkcb_t) (enum move_type move,
+ struct filter_pred *pred,
+ int *err, void *data);
+
+static int walk_pred_tree(struct filter_pred *preds,
+ struct filter_pred *root,
+ filter_pred_walkcb_t cb, void *data)
+{
+ struct filter_pred *pred = root;
+ enum move_type move = MOVE_DOWN;
+ int done = 0;
+
+ if (!preds)
+ return -EINVAL;
+
+ do {
+ int err = 0, ret;
+
+ ret = cb(move, pred, &err, data);
+ if (ret == WALK_PRED_ABORT)
+ return err;
+ if (ret == WALK_PRED_PARENT)
+ goto get_parent;
+
+ switch (move) {
+ case MOVE_DOWN:
+ if (pred->left != FILTER_PRED_INVALID) {
+ pred = &preds[pred->left];
+ continue;
+ }
+ goto get_parent;
+ case MOVE_UP_FROM_LEFT:
+ pred = &preds[pred->right];
+ move = MOVE_DOWN;
+ continue;
+ case MOVE_UP_FROM_RIGHT:
+ get_parent:
+ if (pred == root)
+ break;
+ pred = get_pred_parent(pred, preds,
+ pred->parent,
+ &move);
+ continue;
+ }
+ done = 1;
+ } while (!done);
+
+ /* We are fine. */
+ return 0;
+}
+
/*
* A series of AND or ORs where found together. Instead of
* climbing up and down the tree branches, an array of the
@@ -410,99 +467,91 @@ static int process_ops(struct filter_pred *preds,
for (i = 0; i < op->val; i++) {
pred = &preds[op->ops[i]];
- match = pred->fn(pred, rec);
+ if (!WARN_ON_ONCE(!pred->fn))
+ match = pred->fn(pred, rec);
if (!!match == type)
return match;
}
return match;
}
+struct filter_match_preds_data {
+ struct filter_pred *preds;
+ int match;
+ void *rec;
+};
+
+static int filter_match_preds_cb(enum move_type move, struct filter_pred *pred,
+ int *err, void *data)
+{
+ struct filter_match_preds_data *d = data;
+
+ *err = 0;
+ switch (move) {
+ case MOVE_DOWN:
+ /* only AND and OR have children */
+ if (pred->left != FILTER_PRED_INVALID) {
+ /* If ops is set, then it was folded. */
+ if (!pred->ops)
+ return WALK_PRED_DEFAULT;
+ /* We can treat folded ops as a leaf node */
+ d->match = process_ops(d->preds, pred, d->rec);
+ } else {
+ if (!WARN_ON_ONCE(!pred->fn))
+ d->match = pred->fn(pred, d->rec);
+ }
+
+ return WALK_PRED_PARENT;
+ case MOVE_UP_FROM_LEFT:
+ /*
+ * Check for short circuits.
+ *
+ * Optimization: !!match == (pred->op == OP_OR)
+ * is the same as:
+ * if ((match && pred->op == OP_OR) ||
+ * (!match && pred->op == OP_AND))
+ */
+ if (!!d->match == (pred->op == OP_OR))
+ return WALK_PRED_PARENT;
+ break;
+ case MOVE_UP_FROM_RIGHT:
+ break;
+ }
+
+ return WALK_PRED_DEFAULT;
+}
+
/* return 1 if event matches, 0 otherwise (discard) */
int filter_match_preds(struct event_filter *filter, void *rec)
{
- int match = -1;
- enum move_type move = MOVE_DOWN;
struct filter_pred *preds;
- struct filter_pred *pred;
struct filter_pred *root;
- int n_preds;
- int done = 0;
+ struct filter_match_preds_data data = {
+ /* match is currently meaningless */
+ .match = -1,
+ .rec = rec,
+ };
+ int n_preds, ret;
/* no filter is considered a match */
if (!filter)
return 1;
n_preds = filter->n_preds;
-
if (!n_preds)
return 1;
/*
* n_preds, root and filter->preds are protect with preemption disabled.
*/
- preds = rcu_dereference_sched(filter->preds);
root = rcu_dereference_sched(filter->root);
if (!root)
return 1;
- pred = root;
-
- /* match is currently meaningless */
- match = -1;
-
- do {
- switch (move) {
- case MOVE_DOWN:
- /* only AND and OR have children */
- if (pred->left != FILTER_PRED_INVALID) {
- /* If ops is set, then it was folded. */
- if (!pred->ops) {
- /* keep going to down the left side */
- pred = &preds[pred->left];
- continue;
- }
- /* We can treat folded ops as a leaf node */
- match = process_ops(preds, pred, rec);
- } else
- match = pred->fn(pred, rec);
- /* If this pred is the only pred */
- if (pred == root)
- break;
- pred = get_pred_parent(pred, preds,
- pred->parent, &move);
- continue;
- case MOVE_UP_FROM_LEFT:
- /*
- * Check for short circuits.
- *
- * Optimization: !!match == (pred->op == OP_OR)
- * is the same as:
- * if ((match && pred->op == OP_OR) ||
- * (!match && pred->op == OP_AND))
- */
- if (!!match == (pred->op == OP_OR)) {
- if (pred == root)
- break;
- pred = get_pred_parent(pred, preds,
- pred->parent, &move);
- continue;
- }
- /* now go down the right side of the tree. */
- pred = &preds[pred->right];
- move = MOVE_DOWN;
- continue;
- case MOVE_UP_FROM_RIGHT:
- /* We finished this equation. */
- if (pred == root)
- break;
- pred = get_pred_parent(pred, preds,
- pred->parent, &move);
- continue;
- }
- done = 1;
- } while (!done);
-
- return match;
+ data.preds = preds = rcu_dereference_sched(filter->preds);
+ ret = walk_pred_tree(preds, root, filter_match_preds_cb, &data);
+ WARN_ON(ret);
+ return data.match;
}
EXPORT_SYMBOL_GPL(filter_match_preds);
@@ -628,22 +677,6 @@ find_event_field(struct ftrace_event_call *call, char *name)
return __find_event_field(head, name);
}
-static void filter_free_pred(struct filter_pred *pred)
-{
- if (!pred)
- return;
-
- kfree(pred->field_name);
- kfree(pred);
-}
-
-static void filter_clear_pred(struct filter_pred *pred)
-{
- kfree(pred->field_name);
- pred->field_name = NULL;
- pred->regex.len = 0;
-}
-
static int __alloc