diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-10-26 17:03:38 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-10-26 17:03:38 +0200 |
commit | 7115e3fcf45514db7525a05365b10454ff7f345e (patch) | |
tree | 17450e6337d559cc35dae6a7a73abab01ac63f00 /kernel | |
parent | 1f6e05171bb5cc32a4d6437ab2269fc21d169ca7 (diff) | |
parent | c752d04066a36ae30b29795f3fa3f536292c1f8c (diff) |
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (121 commits)
perf symbols: Increase symbol KSYM_NAME_LEN size
perf hists browser: Refuse 'a' hotkey on non symbolic views
perf ui browser: Use libslang to read keys
perf tools: Fix tracing info recording
perf hists browser: Elide DSO column when it is set to just one DSO, ditto for threads
perf hists: Don't consider filtered entries when calculating column widths
perf hists: Don't decay total_period for filtered entries
perf hists browser: Honour symbol_conf.show_{nr_samples,total_period}
perf hists browser: Do not exit on tab key with single event
perf annotate browser: Don't change selection line when returning from callq
perf tools: handle endianness of feature bitmap
perf tools: Add prelink suggestion to dso update message
perf script: Fix unknown feature comment
perf hists browser: Apply the dso and thread filters when merging new batches
perf hists: Move the dso and thread filters from hist_browser
perf ui browser: Honour the xterm colors
perf top tui: Give color hints just on the percentage, like on --stdio
perf ui browser: Make the colors configurable and change the defaults
perf tui: Remove unneeded call to newtCls on startup
perf hists: Don't format the percentage on hist_entry__snprintf
...
Fix up conflicts in arch/x86/kernel/kprobes.c manually.
Ingo's tree did the insane "add volatile to const array", which just
doesn't make sense ("volatile const"?). But we could remove the const
*and* make the array volatile to make doubly sure that gcc doesn't
optimize it away..
Also fix up kernel/trace/ring_buffer.c non-data-conflicts manually: the
reader_lock has been turned into a raw lock by the core locking merge,
and there was a new user of it introduced in this perf core merge. Make
sure that new use also uses the raw accessor functions.
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/events/core.c | 101 | ||||
-rw-r--r-- | kernel/module.c | 47 | ||||
-rw-r--r-- | kernel/trace/Makefile | 2 | ||||
-rw-r--r-- | kernel/trace/ftrace.c | 8 | ||||
-rw-r--r-- | kernel/trace/ring_buffer.c | 70 | ||||
-rw-r--r-- | kernel/trace/trace.c | 181 | ||||
-rw-r--r-- | kernel/trace/trace.h | 16 | ||||
-rw-r--r-- | kernel/trace/trace_clock.c | 12 | ||||
-rw-r--r-- | kernel/trace/trace_events_filter.c | 795 | ||||
-rw-r--r-- | kernel/trace/trace_events_filter_test.h | 50 | ||||
-rw-r--r-- | kernel/trace/trace_irqsoff.c | 4 | ||||
-rw-r--r-- | kernel/trace/trace_kprobe.c | 58 | ||||
-rw-r--r-- | kernel/trace/trace_printk.c | 19 | ||||
-rw-r--r-- | kernel/tracepoint.c | 169 | ||||
-rw-r--r-- | kernel/watchdog.c | 7 |
15 files changed, 1067 insertions, 472 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c index 0f857782d06..d1a1bee3522 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -29,6 +29,7 @@ #include <linux/hardirq.h> #include <linux/rculist.h> #include <linux/uaccess.h> +#include <linux/suspend.h> #include <linux/syscalls.h> #include <linux/anon_inodes.h> #include <linux/kernel_stat.h> @@ -5758,6 +5759,7 @@ struct pmu *perf_init_event(struct perf_event *event) pmu = idr_find(&pmu_idr, event->attr.type); rcu_read_unlock(); if (pmu) { + event->pmu = pmu; ret = pmu->event_init(event); if (ret) pmu = ERR_PTR(ret); @@ -5765,6 +5767,7 @@ struct pmu *perf_init_event(struct perf_event *event) } list_for_each_entry_rcu(pmu, &pmus, entry) { + event->pmu = pmu; ret = pmu->event_init(event); if (!ret) goto unlock; @@ -5891,8 +5894,6 @@ done: return ERR_PTR(err); } - event->pmu = pmu; - if (!event->parent) { if (event->attach_state & PERF_ATTACH_TASK) jump_label_inc(&perf_sched_events); @@ -6852,7 +6853,7 @@ static void __cpuinit perf_event_init_cpu(int cpu) struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu); mutex_lock(&swhash->hlist_mutex); - if (swhash->hlist_refcount > 0) { + if (swhash->hlist_refcount > 0 && !swhash->swevent_hlist) { struct swevent_hlist *hlist; hlist = kzalloc_node(sizeof(*hlist), GFP_KERNEL, cpu_to_node(cpu)); @@ -6941,7 +6942,14 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) { unsigned int cpu = (long)hcpu; - switch (action & ~CPU_TASKS_FROZEN) { + /* + * Ignore suspend/resume action, the perf_pm_notifier will + * take care of that. + */ + if (action & CPU_TASKS_FROZEN) + return NOTIFY_OK; + + switch (action) { case CPU_UP_PREPARE: case CPU_DOWN_FAILED: @@ -6960,6 +6968,90 @@ perf_cpu_notify(struct notifier_block *self, unsigned long action, void *hcpu) return NOTIFY_OK; } +static void perf_pm_resume_cpu(void *unused) +{ + struct perf_cpu_context *cpuctx; + struct perf_event_context *ctx; + struct pmu *pmu; + int idx; + + idx = srcu_read_lock(&pmus_srcu); + list_for_each_entry_rcu(pmu, &pmus, entry) { + cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); + ctx = cpuctx->task_ctx; + + perf_ctx_lock(cpuctx, ctx); + perf_pmu_disable(cpuctx->ctx.pmu); + + cpu_ctx_sched_out(cpuctx, EVENT_ALL); + if (ctx) + ctx_sched_out(ctx, cpuctx, EVENT_ALL); + + perf_pmu_enable(cpuctx->ctx.pmu); + perf_ctx_unlock(cpuctx, ctx); + } + srcu_read_unlock(&pmus_srcu, idx); +} + +static void perf_pm_suspend_cpu(void *unused) +{ + struct perf_cpu_context *cpuctx; + struct perf_event_context *ctx; + struct pmu *pmu; + int idx; + + idx = srcu_read_lock(&pmus_srcu); + list_for_each_entry_rcu(pmu, &pmus, entry) { + cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); + ctx = cpuctx->task_ctx; + + perf_ctx_lock(cpuctx, ctx); + perf_pmu_disable(cpuctx->ctx.pmu); + + perf_event_sched_in(cpuctx, ctx, current); + + perf_pmu_enable(cpuctx->ctx.pmu); + perf_ctx_unlock(cpuctx, ctx); + } + srcu_read_unlock(&pmus_srcu, idx); +} + +static int perf_resume(void) +{ + get_online_cpus(); + smp_call_function(perf_pm_resume_cpu, NULL, 1); + put_online_cpus(); + + return NOTIFY_OK; +} + +static int perf_suspend(void) +{ + get_online_cpus(); + smp_call_function(perf_pm_suspend_cpu, NULL, 1); + put_online_cpus(); + + return NOTIFY_OK; +} + +static int perf_pm(struct notifier_block *self, unsigned long action, void *ptr) +{ + switch (action) { + case PM_POST_HIBERNATION: + case PM_POST_SUSPEND: + return perf_resume(); + case PM_HIBERNATION_PREPARE: + case PM_SUSPEND_PREPARE: + return perf_suspend(); + default: + return NOTIFY_DONE; + } +} + +static struct notifier_block perf_pm_notifier = { + .notifier_call = perf_pm, +}; + void __init perf_event_init(void) { int ret; @@ -6974,6 +7066,7 @@ void __init perf_event_init(void) perf_tp_register(); perf_cpu_notifier(perf_cpu_notify); register_reboot_notifier(&perf_reboot_notifier); + register_pm_notifier(&perf_pm_notifier); ret = init_hw_breakpoint(); WARN(ret, "hw_breakpoint initialization failed with: %d", ret); diff --git a/kernel/module.c b/kernel/module.c index 04379f92f84..93342d992f3 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -3487,50 +3487,3 @@ void module_layout(struct module *mod, } EXPORT_SYMBOL(module_layout); #endif - -#ifdef CONFIG_TRACEPOINTS -void module_update_tracepoints(void) -{ - struct module *mod; - - mutex_lock(&module_mutex); - list_for_each_entry(mod, &modules, list) - if (!mod->taints) - tracepoint_update_probe_range(mod->tracepoints_ptrs, - mod->tracepoints_ptrs + mod->num_tracepoints); - mutex_unlock(&module_mutex); -} - -/* - * Returns 0 if current not found. - * Returns 1 if current found. - */ -int module_get_iter_tracepoints(struct tracepoint_iter *iter) -{ - struct module *iter_mod; - int found = 0; - - mutex_lock(&module_mutex); - list_for_each_entry(iter_mod, &modules, list) { - if (!iter_mod->taints) { - /* - * Sorted module list - */ - if (iter_mod < iter->module) - continue; - else if (iter_mod > iter->module) - iter->tracepoint = NULL; - found = tracepoint_get_iter_range(&iter->tracepoint, - iter_mod->tracepoints_ptrs, - iter_mod->tracepoints_ptrs - + iter_mod->num_tracepoints); - if (found) { - iter->module = iter_mod; - break; - } - } - } - mutex_unlock(&module_mutex); - return found; -} -#endif diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index f49405f842f..5f39a07fe5e 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -15,6 +15,8 @@ ifdef CONFIG_TRACING_BRANCHES KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING endif +CFLAGS_trace_events_filter.o := -I$(src) + # # Make the trace clocks available generally: it's infrastructure # relied on by ptrace for example: diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index c3e4575e782..077d8538790 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3863,6 +3863,14 @@ void ftrace_kill(void) } /** + * Test if ftrace is dead or not. + */ +int ftrace_is_dead(void) +{ + return ftrace_disabled; +} + +/** * register_ftrace_function - register a function for profiling * @ops - ops structure that holds the function for profiling. * diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index f2f821acc59..f5b7b5c1195 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -488,12 +488,14 @@ struct ring_buffer_per_cpu { struct buffer_page *reader_page; unsigned long lost_events; unsigned long last_overrun; + local_t entries_bytes; local_t commit_overrun; local_t overrun; local_t entries; local_t committing; local_t commits; unsigned long read; + unsigned long read_bytes; u64 write_stamp; u64 read_stamp; }; @@ -1708,6 +1710,7 @@ rb_handle_head_page(struct ring_buffer_per_cpu *cpu_buffer, * the counters. */ local_add(entries, &cpu_buffer->overrun); + local_sub(BUF_PAGE_SIZE, &cpu_buffer->entries_bytes); /* * The entries will be zeroed out when we move the @@ -1863,6 +1866,9 @@ rb_reset_tail(struct ring_buffer_per_cpu *cpu_buffer, event = __rb_page_index(tail_page, tail); kmemcheck_annotate_bitfield(event, bitfield); + /* account for padding bytes */ + local_add(BUF_PAGE_SIZE - tail, &cpu_buffer->entries_bytes); + /* * Save the original length to the meta data. * This will be used by the reader to add lost event @@ -2054,6 +2060,9 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer, if (!tail) tail_page->page->time_stamp = ts; + /* account for these added bytes */ + local_add(length, &cpu_buffer->entries_bytes); + return event; } @@ -2076,6 +2085,7 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer, if (bpage->page == (void *)addr && rb_page_write(bpage) == old_index) { unsigned long write_mask = local_read(&bpage->write) & ~RB_WRITE_MASK; + unsigned long event_length = rb_event_length(event); /* * This is on the tail page. It is possible that * a write could come in and move the tail page @@ -2085,8 +2095,11 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer, old_index += write_mask; new_index += write_mask; index = local_cmpxchg(&bpage->write, old_index, new_index); - if (index == old_index) + if (index == old_index) { + /* update counters */ + local_sub(event_length, &cpu_buffer->entries_bytes); return 1; + } } /* could not discard */ @@ -2661,6 +2674,58 @@ rb_num_of_entries(struct ring_buffer_per_cpu *cpu_buffer) } /** + * ring_buffer_oldest_event_ts - get the oldest event timestamp from the buffer + * @buffer: The ring buffer + * @cpu: The per CPU buffer to read from. + */ +unsigned long ring_buffer_oldest_event_ts(struct ring_buffer *buffer, int cpu) +{ + unsigned long flags; + struct ring_buffer_per_cpu *cpu_buffer; + struct buffer_page *bpage; + unsigned long ret; + + if (!cpumask_test_cpu(cpu, buffer->cpumask)) + return 0; + + cpu_buffer = buffer->buffers[cpu]; + raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); + /* + * if the tail is on reader_page, oldest time stamp is on the reader + * page + */ + if (cpu_buffer->tail_page == cpu_buffer->reader_page) + bpage = cpu_buffer->reader_page; + else + bpage = rb_set_head_page(cpu_buffer); + ret = bpage->page->time_stamp; + raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); + + return ret; +} +EXPORT_SYMBOL_GPL(ring_buffer_oldest_event_ts); + +/** + * ring_buffer_bytes_cpu - get the number of bytes consumed in a cpu buffer + * @buffer: The ring buffer + * @cpu: The per CPU buffer to read from. + */ +unsigned long ring_buffer_bytes_cpu(struct ring_buffer *buffer, int cpu) +{ + struct ring_buffer_per_cpu *cpu_buffer; + unsigned long ret; + + if (!cpumask_test_cpu(cpu, buffer->cpumask)) + return 0; + + cpu_buffer = buffer->buffers[cpu]; + ret = local_read(&cpu_buffer->entries_bytes) - cpu_buffer->read_bytes; + + return ret; +} +EXPORT_SYMBOL_GPL(ring_buffer_bytes_cpu); + +/** * ring_buffer_entries_cpu - get the number of entries in a cpu buffer * @buffer: The ring buffer * @cpu: The per CPU buffer to get the entries from. @@ -3527,11 +3592,13 @@ rb_reset_cpu(struct ring_buffer_per_cpu *cpu_buffer) cpu_buffer->reader_page->read = 0; local_set(&cpu_buffer->commit_overrun, 0); + local_set(&cpu_buffer->entries_bytes, 0); local_set(&cpu_buffer->overrun, 0); local_set(&cpu_buffer->entries, 0); local_set(&cpu_buffer->committing, 0); local_set(&cpu_buffer->commits, 0); cpu_buffer->read = 0; + cpu_buffer->read_bytes = 0; cpu_buffer->write_stamp = 0; cpu_buffer->read_stamp = 0; @@ -3918,6 +3985,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer, } else { /* update the entry counter */ cpu_buffer->read += rb_page_entries(reader); + cpu_buffer->read_bytes += BUF_PAGE_SIZE; /* swap the pages */ rb_init_page(bpage); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 0c8bdeeb358..f2bd275bb60 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -435,6 +435,7 @@ static struct { } trace_clocks[] = { { trace_clock_local, "local" }, { trace_clock_global, "global" }, + { trace_clock_counter, "counter" }, }; int trace_clock_id; @@ -2159,6 +2160,14 @@ void trace_default_header(struct seq_file *m) } } +static void test_ftrace_alive(struct seq_file *m) +{ + if (!ftrace_is_dead()) + return; + seq_printf(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"); + seq_printf(m, "# MAY BE MISSING FUNCTION EVENTS\n"); +} + static int s_show(struct seq_file *m, void *v) { struct trace_iterator *iter = v; @@ -2168,6 +2177,7 @@ static int s_show(struct seq_file *m, void *v) if (iter->tr) { seq_printf(m, "# tracer: %s\n", iter->trace->name); seq_puts(m, "#\n"); + test_ftrace_alive(m); } if (iter->trace && iter->trace->print_header) iter->trace->print_header(m); @@ -2710,9 +2720,9 @@ static const char readme_msg[] = "# cat /sys/kernel/debug/tracing/trace_options\n" "noprint-parent nosym-offset nosym-addr noverbose\n" "# echo print-parent > /sys/kernel/debug/tracing/trace_options\n" - "# echo 1 > /sys/kernel/debug/tracing/tracing_enabled\n" + "# echo 1 > /sys/kernel/debug/tracing/tracing_on\n" "# cat /sys/kernel/debug/tracing/trace > /tmp/trace.txt\n" - "# echo 0 > /sys/kernel/debug/tracing/tracing_enabled\n" + "# echo 0 > /sys/kernel/debug/tracing/tracing_on\n" ; static ssize_t @@ -3569,6 +3579,30 @@ tracing_entries_write(struct file *filp, const char __user *ubuf, } static ssize_t +tracing_total_entries_read(struct file *filp, char __user *ubuf, + size_t cnt, loff_t *ppos) +{ + struct trace_array *tr = filp->private_data; + char buf[64]; + int r, cpu; + unsigned long size = 0, expanded_size = 0; + + mutex_lock(&trace_types_lock); + for_each_tracing_cpu(cpu) { + size += tr->entries >> 10; + if (!ring_buffer_expanded) + expanded_size += trace_buf_size >> 10; + } + if (ring_buffer_expanded) + r = sprintf(buf, "%lu\n", size); + else + r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size); + mutex_unlock(&trace_types_lock); + + return simple_read_from_buffer(ubuf, cnt, ppos, buf, r); +} + +static ssize_t tracing_free_buffer_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos) { @@ -3594,22 +3628,24 @@ tracing_free_buffer_release(struct inode *inode, struct file *filp) return 0; } -static int mark_printk(const char *fmt, ...) -{ - int ret; - va_list args; - va_start(args, fmt); - ret = trace_vprintk(0, fmt, args); - va_end(args); - return ret; -} - static ssize_t tracing_mark_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *fpos) { - char *buf; - size_t written; + unsigned long addr = (unsigned long)ubuf; + struct ring_buffer_event *event; + struct ring_buffer *buffer; + struct print_entry *entry; + unsigned long irq_flags; + struct page *pages[2]; + int nr_pages = 1; + ssize_t written; + void *page1; + void *page2; + int offset; + int size; + int len; + int ret; if (tracing_disabled) return -EINVAL; @@ -3617,28 +3653,81 @@ tracing_mark_write(struct file *filp, const char __user *ubuf, if (cnt > TRACE_BUF_SIZE) cnt = TRACE_BUF_SIZE; - buf = kmalloc(cnt + 2, GFP_KERNEL); - if (buf == NULL) - return -ENOMEM; + /* + * Userspace is injecting traces into the kernel trace buffer. + * We want to be as non intrusive as possible. + * To do so, we do not want to allocate any special buffers + * or take any locks, but instead write the userspace data + * straight into the ring buffer. + * + * First we need to pin the userspace buffer into memory, + * which, most likely it is, because it just referenced it. + * But there's no guarantee that it is. By using get_user_pages_fast() + * and kmap_atomic/kunmap_atomic() we can get access to the + * pages directly. We then write the data directly into the + * ring buffer. + */ + BUILD_BUG_ON(TRACE_BUF_SIZE >= PAGE_SIZE); - if (copy_from_user(buf, ubuf, cnt)) { - kfree(buf); - return -EFAULT; + /* check if we cross pages */ + if ((addr & PAGE_MASK) != ((addr + cnt) & PAGE_MASK)) + nr_pages = 2; + + offset = addr & (PAGE_SIZE - 1); + addr &= PAGE_MASK; + + ret = get_user_pages_fast(addr, nr_pages, 0, pages); + if (ret < nr_pages) { + while (--ret >= 0) + put_page(pages[ret]); + written = -EFAULT; + goto out; + } + + page1 = kmap_atomic(pages[0]); + if (nr_pages == 2) + page2 = kmap_atomic(pages[1]); + + local_save_flags(irq_flags); + size = sizeof(*entry) + cnt + 2; /* possible \n added */ + buffer = global_trace.buffer; + event = trace_buffer_lock_reserve(buffer, TRACE_PRINT, size, + irq_flags, preempt_count()); + if (!event) { + /* Ring buffer disabled, return as if not open for write */ + written = -EBADF; + goto out_unlock; } - if (buf[cnt-1] != '\n') { - buf[cnt] = '\n'; - buf[cnt+1] = '\0'; + + entry = ring_buffer_event_data(event); + entry->ip = _THIS_IP_; + + if (nr_pages == 2) { + len = PAGE_SIZE - offset; + memcpy(&entry->buf, page1 + offset, len); + memcpy(&entry->buf[len], page2, cnt - len); } else - buf[cnt] = '\0'; + memcpy(&entry->buf, page1 + offset, cnt); - written = mark_printk("%s", buf); - kfree(buf); - *fpos += written; + if (entry->buf[cnt - 1] != '\n') { + entry->buf[cnt] = '\n'; + entry->buf[cnt + 1] = '\0'; + } else + entry->buf[cnt] = '\0'; + + ring_buffer_unlock_commit(buffer, event); - /* don't tell userspace we wrote more - it might confuse them */ - if (written > cnt) - written = cnt; + written = cnt; + *fpos += written; + + out_unlock: + if (nr_pages == 2) + kunmap_atomic(page2); + kunmap_atomic(page1); + while (nr_pages > 0) + put_page(pages[--nr_pages]); + out: return written; } @@ -3739,6 +3828,12 @@ static const struct file_operations tracing_entries_fops = { .llseek = generic_file_llseek, }; +static const struct file_operations tracing_total_entries_fops = { + .open = tracing_open_generic, + .read = tracing_total_entries_read, + .llseek = generic_file_llseek, +}; + static const struct file_operations tracing_free_buffer_fops = { .write = tracing_free_buffer_write, .release = tracing_free_buffer_release, @@ -3808,8 +3903,6 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, if (info->read < PAGE_SIZE) goto read; - info->read = 0; - trace_access_lock(info->cpu); ret = ring_buffer_read_page(info->tr->buffer, &info->spare, @@ -3819,6 +3912,8 @@ tracing_buffers_read(struct file *filp, char __user *ubuf, if (ret < 0) return 0; + info->read = 0; + read: size = PAGE_SIZE - info->read; if (size > count) @@ -4026,6 +4121,8 @@ tracing_stats_read(struct file *filp, char __user *ubuf, struct trace_array *tr = &global_trace; struct trace_seq *s; unsigned long cnt; + unsigned long long t; + unsigned long usec_rem; s = kmalloc(sizeof(*s), GFP_KERNEL); if (!s) @@ -4042,6 +4139,17 @@ tracing_stats_read(struct file *filp, char __user *ubuf, cnt = ring_buffer_commit_overrun_cpu(tr->buffer, cpu); trace_seq_printf(s, "commit overrun: %ld\n", cnt); + cnt = ring_buffer_bytes_cpu(tr->buffer, cpu); + trace_seq_printf(s, "bytes: %ld\n", cnt); + + t = ns2usecs(ring_buffer_oldest_event_ts(tr->buffer, cpu)); + usec_rem = do_div(t, USEC_PER_SEC); + trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n", t, usec_rem); + + t = ns2usecs(ring_buffer_time_stamp(tr->buffer, cpu)); + usec_rem = do_div(t, USEC_PER_SEC); + trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem); + count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len); kfree(s); @@ -4450,6 +4558,9 @@ static __init int tracer_init_debugfs(void) trace_create_file("buffer_size_kb", 0644, d_tracer, &global_trace, &tracing_entries_fops); + trace_create_file("buffer_total_size_kb", 0444, d_tracer, + &global_trace, &tracing_total_entries_fops); + trace_create_file("free_buffer", 0644, d_tracer, &global_trace, &tracing_free_buffer_fops); @@ -4566,6 +4677,12 @@ __ftrace_dump(bool disable_tracing, enum ftrace_dump_mode oops_dump_mode) tracing_off(); + /* Did function tracer already get disabled? */ + if (ftrace_is_dead()) { + printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n"); + printk("# MAY BE MISSING FUNCTION EVENTS\n"); + } + if (disable_tracing) ftrace_kill(); diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index 616846bcfee..092e1f8d18d 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -579,11 +579,13 @@ static inline int ftrace_trace_task(struct task_struct *task) return test_tsk_trace_trace(task); } +extern int ftrace_is_dead(void); #else static inline int ftrace_trace_task(struct task_struct *task) { return 1; } +static inline int ftrace_is_dead(void) { return 0; } #endif /* @@ -761,16 +763,10 @@ struct filter_pred { filter_pred_fn_t fn; u64 val; struct regex regex; - /* - * Leaf nodes use field_name, ops is used by AND and OR - * nodes. The field_name is always freed when freeing a pred. - * We can overload field_name for ops and have it freed - * as well. - */ - union { - char *field_name; - unsigned short *ops; - }; + unsigned short *ops; +#ifdef CONFIG_FTRACE_STARTUP_TEST + struct ftrace_event_field *field; +#endif int offset; int not; int op; diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c index 6302747a139..394783531cb 100644 --- a/kernel/trace/trace_clock.c +++ b/kernel/trace/trace_clock.c @@ -113,3 +113,15 @@ u64 notrace trace_clock_global(void) return now; } + +static atomic64_t trace_counter; + +/* + * trace_clock_counter(): simply an atomic counter. + * Use the trace_counter "counter" for cases where you do not care + * about timings, but are interested in strict ordering. + */ +u64 notrace trace_clock_counter(void) +{ + return atomic64_add_return(1, &trace_counter); +} diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 256764ecccd..816d3d07497 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -381,6 +381,63 @@ get_pred_parent(struct filter_pred *pred, struct filter_pred *preds, return pred; } +enum walk_return { + WALK_PRED_ABORT, + WALK_PRED_PARENT, + WALK_PRED_DEFAULT, +}; + +typedef int (*filter_pred_walkcb_t) (enum move_type move, + struct filter_pred *pred, + int *err, void *data); + +static int walk_pred_tree(struct filter_pred *preds, + struct filter_pred *root, + filter_pred_walkcb_t cb, void *data) +{ + struct filter_pred *pred = root; + enum move_type move = MOVE_DOWN; + int done = 0; + + if (!preds) + return -EINVAL; + + do { + int err = 0, ret; + + ret = cb(move, pred, &err, data); + if (ret == WALK_PRED_ABORT) + return err; + if (ret == WALK_PRED_PARENT) + goto get_parent; + + switch (move) { + case MOVE_DOWN: + if (pred->left != FILTER_PRED_INVALID) { + pred = &preds[pred->left]; + continue; + } + goto get_parent; + case MOVE_UP_FROM_LEFT: + pred = &preds[pred->right]; + move = MOVE_DOWN; + continue; + case MOVE_UP_FROM_RIGHT: + get_parent: + if (pred == root) + break; + pred = get_pred_parent(pred, preds, + pred->parent, + &move); + continue; + } + done = 1; + } while (!done); + + /* We are fine. */ + return 0; +} + /* * A series of AND or ORs where found together. Instead of * climbing up and down the tree branches, an array of the @@ -410,99 +467,91 @@ static int process_ops(struct filter_pred *preds, for (i = 0; i < op->val; i++) { pred = &preds[op->ops[i]]; - match = pred->fn(pred, rec); + if (!WARN_ON_ONCE(!pred->fn)) + match = pred->fn(pred, rec); if (!!match == type) return match; } return match; } +struct filter_match_preds_data { + struct filter_pred *preds; + int match; + void *rec; +}; + +static int filter_match_preds_cb(enum move_type move, struct filter_pred *pred, + int *err, void *data) +{ + struct filter_match_preds_data *d = data; + + *err = 0; + switch (move) { + case MOVE_DOWN: + /* only AND and OR have children */ + if (pred->left != FILTER_PRED_INVALID) { + /* If ops is set, then it was folded. */ + if (!pred->ops) + return WALK_PRED_DEFAULT; + /* We can treat folded ops as a leaf node */ + d->match = process_ops(d->preds, pred, d->rec); + } else { + if (!WARN_ON_ONCE(!pred->fn)) + d->match = pred->fn(pred, d->rec); + } + + return WALK_PRED_PARENT; + case MOVE_UP_FROM_LEFT: + /* + * Check for short circuits. + * + * Optimization: !!match == (pred->op == OP_OR) + * is the same as: + * if ((match && pred->op == OP_OR) || + * (!match && pred->op == OP_AND)) + */ + if (!!d->match == (pred->op == OP_OR)) + return WALK_PRED_PARENT; + break; + case MOVE_UP_FROM_RIGHT: + break; + } + + return WALK_PRED_DEFAULT; +} + /* return 1 if event matches, 0 otherwise (discard) */ int filter_match_preds(struct event_filter *filter, void *rec) { - int match = -1; - enum move_type move = MOVE_DOWN; struct filter_pred *preds; - struct filter_pred *pred; struct filter_pred *root; - int n_preds; - int done = 0; + struct filter_match_preds_data data = { + /* match is currently meaningless */ + .match = -1, + .rec = rec, + }; + int n_preds, ret; /* no filter is considered a match */ if (!filter) return 1; n_preds = filter->n_preds; - if (!n_preds) return 1; /* * n_preds, root and filter->preds are protect with preemption disabled. */ - preds = rcu_dereference_sched(filter->preds); root = rcu_dereference_sched(filter->root); if (!root) return 1; - pred = root; - - /* match is currently meaningless */ - match = -1; - - do { - switch (move) { - case MOVE_DOWN: - /* only AND and OR have children */ - if (pred->left != FILTER_PRED_INVALID) { - /* If ops is set, then it was folded. */ - if (!pred->ops) { - /* keep going to down the left side */ - pred = &preds[pred->left]; - continue; - } - /* We can treat folded ops as a leaf node */ - match = process_ops(preds, pred, rec); - } else - match = pred->fn(pred, rec); - /* If this pred is the only pred */ - if (pred == root) - break; - pred = get_pred_parent(pred, preds, - pred->parent, &move); - continue; - case MOVE_UP_FROM_LEFT: - /* - * Check for short circuits. - * - * Optimization: !!match == (pred->op == OP_OR) - * is the same as: - * if ((match && pred->op == OP_OR) || - * (!match && pred->op == OP_AND)) - */ - if (!!match == (pred->op == OP_OR)) { - if (pred == root) - break; - pred = get_pred_parent(pred, preds, - pred->parent, &move); - continue; - } - /* now go down the right side of the tree. */ - pred = &preds[pred->right]; - move = MOVE_DOWN; - continue; - case MOVE_UP_FROM_RIGHT: - /* We finished this equation. */ - if (pred == root) - break; - pred = get_pred_parent(pred, preds, - pred->parent, &move); - continue; - } - done = 1; - } while (!done); - - return match; + data.preds = preds = rcu_dereference_sched(filter->preds); + ret = walk_pred_tree(preds, root, filter_match_preds_cb, &data); + WARN_ON(ret); + return data.match; } EXPORT_SYMBOL_GPL(filter_match_preds); @@ -628,22 +677,6 @@ find_event_field(struct ftrace_event_call *call, char *name) return __find_event_field(head, name); } -static void filter_free_pred(struct filter_pred *pred) -{ - if (!pred) - return; - - kfree(pred->field_name); - kfree(pred); -} - -static void filter_clear_pred(struct filter_pred *pred) -{ - kfree(pred->field_name); - pred->field_name = NULL; - pred->regex.len = 0; -} - static int __alloc |