diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2011-07-22 16:44:39 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2011-07-22 16:44:39 -0700 |
commit | 4d4abdcb1dee03a4f9d6d2021622ed07e14dfd17 (patch) | |
tree | 4ed4c74b70240451065165fda5fb2059f8c6b1e5 /kernel | |
parent | 0342cbcfced2ee937d7c8e1c63f3d3082da7c7dc (diff) | |
parent | 7fcfd1abd6480d3b9ef17f5759c175e036e835cf (diff) |
Merge branch 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'perf-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: (123 commits)
perf: Remove the nmi parameter from the oprofile_perf backend
x86, perf: Make copy_from_user_nmi() a library function
perf: Remove perf_event_attr::type check
x86, perf: P4 PMU - Fix typos in comments and style cleanup
perf tools: Make test use the preset debugfs path
perf tools: Add automated tests for events parsing
perf tools: De-opt the parse_events function
perf script: Fix display of IP address for non-callchain path
perf tools: Fix endian conversion reading event attr from file header
perf tools: Add missing 'node' alias to the hw_cache[] array
perf probe: Support adding probes on offline kernel modules
perf probe: Add probed module in front of function
perf probe: Introduce debuginfo to encapsulate dwarf information
perf-probe: Move dwarf library routines to dwarf-aux.{c, h}
perf probe: Remove redundant dwarf functions
perf probe: Move strtailcmp to string.c
perf probe: Rename DIE_FIND_CB_FOUND to DIE_FIND_CB_END
tracing/kprobe: Update symbol reference when loading module
tracing/kprobes: Support module init function probing
kprobes: Return -ENOENT if probe point doesn't exist
...
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/async.c | 12 | ||||
-rw-r--r-- | kernel/events/Makefile | 2 | ||||
-rw-r--r-- | kernel/events/core.c | 938 | ||||
-rw-r--r-- | kernel/events/hw_breakpoint.c | 10 | ||||
-rw-r--r-- | kernel/events/internal.h | 96 | ||||
-rw-r--r-- | kernel/events/ring_buffer.c | 380 | ||||
-rw-r--r-- | kernel/kprobes.c | 33 | ||||
-rw-r--r-- | kernel/sched.c | 2 | ||||
-rw-r--r-- | kernel/stacktrace.c | 12 | ||||
-rw-r--r-- | kernel/trace/ftrace.c | 157 | ||||
-rw-r--r-- | kernel/trace/ring_buffer.c | 66 | ||||
-rw-r--r-- | kernel/trace/ring_buffer_benchmark.c | 2 | ||||
-rw-r--r-- | kernel/trace/trace.c | 326 | ||||
-rw-r--r-- | kernel/trace/trace.h | 61 | ||||
-rw-r--r-- | kernel/trace/trace_entries.h | 3 | ||||
-rw-r--r-- | kernel/trace/trace_events.c | 139 | ||||
-rw-r--r-- | kernel/trace/trace_events_filter.c | 6 | ||||
-rw-r--r-- | kernel/trace/trace_functions.c | 3 | ||||
-rw-r--r-- | kernel/trace/trace_functions_graph.c | 225 | ||||
-rw-r--r-- | kernel/trace/trace_irqsoff.c | 4 | ||||
-rw-r--r-- | kernel/trace/trace_kprobe.c | 324 | ||||
-rw-r--r-- | kernel/trace/trace_output.c | 11 | ||||
-rw-r--r-- | kernel/trace/trace_sched_wakeup.c | 4 | ||||
-rw-r--r-- | kernel/trace/trace_stack.c | 13 | ||||
-rw-r--r-- | kernel/watchdog.c | 8 |
25 files changed, 1655 insertions, 1182 deletions
diff --git a/kernel/async.c b/kernel/async.c index cd9dbb913c7..d5fe7af0de2 100644 --- a/kernel/async.c +++ b/kernel/async.c @@ -49,12 +49,13 @@ asynchronous and synchronous parts of the kernel. */ #include <linux/async.h> +#include <linux/atomic.h> +#include <linux/ktime.h> #include <linux/module.h> #include <linux/wait.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/workqueue.h> -#include <asm/atomic.h> static async_cookie_t next_cookie = 1; @@ -128,7 +129,8 @@ static void async_run_entry_fn(struct work_struct *work) /* 2) run (and print duration) */ if (initcall_debug && system_state == SYSTEM_BOOTING) { - printk("calling %lli_%pF @ %i\n", (long long)entry->cookie, + printk(KERN_DEBUG "calling %lli_%pF @ %i\n", + (long long)entry->cookie, entry->func, task_pid_nr(current)); calltime = ktime_get(); } @@ -136,7 +138,7 @@ static void async_run_entry_fn(struct work_struct *work) if (initcall_debug && system_state == SYSTEM_BOOTING) { rettime = ktime_get(); delta = ktime_sub(rettime, calltime); - printk("initcall %lli_%pF returned 0 after %lld usecs\n", + printk(KERN_DEBUG "initcall %lli_%pF returned 0 after %lld usecs\n", (long long)entry->cookie, entry->func, (long long)ktime_to_ns(delta) >> 10); @@ -270,7 +272,7 @@ void async_synchronize_cookie_domain(async_cookie_t cookie, ktime_t starttime, delta, endtime; if (initcall_debug && system_state == SYSTEM_BOOTING) { - printk("async_waiting @ %i\n", task_pid_nr(current)); + printk(KERN_DEBUG "async_waiting @ %i\n", task_pid_nr(current)); starttime = ktime_get(); } @@ -280,7 +282,7 @@ void async_synchronize_cookie_domain(async_cookie_t cookie, endtime = ktime_get(); delta = ktime_sub(endtime, starttime); - printk("async_continuing @ %i after %lli usec\n", + printk(KERN_DEBUG "async_continuing @ %i after %lli usec\n", task_pid_nr(current), (long long)ktime_to_ns(delta) >> 10); } diff --git a/kernel/events/Makefile b/kernel/events/Makefile index 1ce23d3d839..89e5e8aa4c3 100644 --- a/kernel/events/Makefile +++ b/kernel/events/Makefile @@ -2,5 +2,5 @@ ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_core.o = -pg endif -obj-y := core.o +obj-y := core.o ring_buffer.o obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o diff --git a/kernel/events/core.c b/kernel/events/core.c index 9efe7108cca..b8785e26ee1 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -36,6 +36,8 @@ #include <linux/ftrace_event.h> #include <linux/hw_breakpoint.h> +#include "internal.h" + #include <asm/irq_regs.h> struct remote_function_call { @@ -200,6 +202,22 @@ __get_cpu_context(struct perf_event_context *ctx) return this_cpu_ptr(ctx->pmu->pmu_cpu_context); } +static void perf_ctx_lock(struct perf_cpu_context *cpuctx, + struct perf_event_context *ctx) +{ + raw_spin_lock(&cpuctx->ctx.lock); + if (ctx) + raw_spin_lock(&ctx->lock); +} + +static void perf_ctx_unlock(struct perf_cpu_context *cpuctx, + struct perf_event_context *ctx) +{ + if (ctx) + raw_spin_unlock(&ctx->lock); + raw_spin_unlock(&cpuctx->ctx.lock); +} + #ifdef CONFIG_CGROUP_PERF /* @@ -340,11 +358,8 @@ void perf_cgroup_switch(struct task_struct *task, int mode) rcu_read_lock(); list_for_each_entry_rcu(pmu, &pmus, entry) { - cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); - perf_pmu_disable(cpuctx->ctx.pmu); - /* * perf_cgroup_events says at least one * context on this CPU has cgroup events. @@ -353,6 +368,8 @@ void perf_cgroup_switch(struct task_struct *task, int mode) * events for a context. */ if (cpuctx->ctx.nr_cgroups > 0) { + perf_ctx_lock(cpuctx, cpuctx->task_ctx); + perf_pmu_disable(cpuctx->ctx.pmu); if (mode & PERF_CGROUP_SWOUT) { cpu_ctx_sched_out(cpuctx, EVENT_ALL); @@ -372,9 +389,9 @@ void perf_cgroup_switch(struct task_struct *task, int mode) cpuctx->cgrp = perf_cgroup_from_task(task); cpu_ctx_sched_in(cpuctx, EVENT_ALL, task); } + perf_pmu_enable(cpuctx->ctx.pmu); + perf_ctx_unlock(cpuctx, cpuctx->task_ctx); } - - perf_pmu_enable(cpuctx->ctx.pmu); } rcu_read_unlock(); @@ -731,6 +748,7 @@ static u64 perf_event_time(struct perf_event *event) /* * Update the total_time_enabled and total_time_running fields for a event. + * The caller of this function needs to hold the ctx->lock. */ static void update_event_times(struct perf_event *event) { @@ -1105,6 +1123,10 @@ static int __perf_remove_from_context(void *info) raw_spin_lock(&ctx->lock); event_sched_out(event, cpuctx, ctx); list_del_event(event, ctx); + if (!ctx->nr_events && cpuctx->task_ctx == ctx) { + ctx->is_active = 0; + cpuctx->task_ctx = NULL; + } raw_spin_unlock(&ctx->lock); return 0; @@ -1454,8 +1476,24 @@ static void add_event_to_ctx(struct perf_event *event, event->tstamp_stopped = tstamp; } -static void perf_event_context_sched_in(struct perf_event_context *ctx, - struct task_struct *tsk); +static void task_ctx_sched_out(struct perf_event_context *ctx); +static void +ctx_sched_in(struct perf_event_context *ctx, + struct perf_cpu_context *cpuctx, + enum event_type_t event_type, + struct task_struct *task); + +static void perf_event_sched_in(struct perf_cpu_context *cpuctx, + struct perf_event_context *ctx, + struct task_struct *task) +{ + cpu_ctx_sched_in(cpuctx, EVENT_PINNED, task); + if (ctx) + ctx_sched_in(ctx, cpuctx, EVENT_PINNED, task); + cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE, task); + if (ctx) + ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE, task); +} /* * Cross CPU call to install and enable a performance event @@ -1466,20 +1504,37 @@ static int __perf_install_in_context(void *info) { struct perf_event *event = info; struct perf_event_context *ctx = event->ctx; - struct perf_event *leader = event->group_leader; struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); - int err; + struct perf_event_context *task_ctx = cpuctx->task_ctx; + struct task_struct *task = current; + + perf_ctx_lock(cpuctx, task_ctx); + perf_pmu_disable(cpuctx->ctx.pmu); /* - * In case we're installing a new context to an already running task, - * could also happen before perf_event_task_sched_in() on architectures - * which do context switches with IRQs enabled. + * If there was an active task_ctx schedule it out. */ - if (ctx->task && !cpuctx->task_ctx) - perf_event_context_sched_in(ctx, ctx->task); + if (task_ctx) + task_ctx_sched_out(task_ctx); + + /* + * If the context we're installing events in is not the + * active task_ctx, flip them. + */ + if (ctx->task && task_ctx != ctx) { + if (task_ctx) + raw_spin_unlock(&task_ctx->lock); + raw_spin_lock(&ctx->lock); + task_ctx = ctx; + } + + if (task_ctx) { + cpuctx->task_ctx = task_ctx; + task = task_ctx->task; + } + + cpu_ctx_sched_out(cpuctx, EVENT_ALL); - raw_spin_lock(&ctx->lock); - ctx->is_active = 1; update_context_time(ctx); /* * update cgrp time only if current cgrp @@ -1490,43 +1545,13 @@ static int __perf_install_in_context(void *info) add_event_to_ctx(event, ctx); - if (!event_filter_match(event)) - goto unlock; - - /* - * Don't put the event on if it is disabled or if - * it is in a group and the group isn't on. - */ - if (event->state != PERF_EVENT_STATE_INACTIVE || - (leader != event && leader->state != PERF_EVENT_STATE_ACTIVE)) - goto unlock; - /* - * An exclusive event can't go on if there are already active - * hardware events, and no hardware event can go on if there - * is already an exclusive event on. + * Schedule everything back in */ - if (!group_can_go_on(event, cpuctx, 1)) - err = -EEXIST; - else - err = event_sched_in(event, cpuctx, ctx); - - if (err) { - /* - * This event couldn't go on. If it is in a group - * then we have to pull the whole group off. - * If the event group is pinned then put it in error state. - */ - if (leader != event) - group_sched_out(leader, cpuctx, ctx); - if (leader->attr.pinned) { - update_group_times(leader); - leader->state = PERF_EVENT_STATE_ERROR; - } - } + perf_event_sched_in(cpuctx, task_ctx, task); -unlock: - raw_spin_unlock(&ctx->lock); + perf_pmu_enable(cpuctx->ctx.pmu); + perf_ctx_unlock(cpuctx, task_ctx); return 0; } @@ -1739,7 +1764,7 @@ out: raw_spin_unlock_irq(&ctx->lock); } -static int perf_event_refresh(struct perf_event *event, int refresh) +int perf_event_refresh(struct perf_event *event, int refresh) { /* * not supported on inherited events @@ -1752,36 +1777,35 @@ static int perf_event_refresh(struct perf_event *event, int refresh) return 0; } +EXPORT_SYMBOL_GPL(perf_event_refresh); static void ctx_sched_out(struct perf_event_context *ctx, struct perf_cpu_context *cpuctx, enum event_type_t event_type) { struct perf_event *event; + int is_active = ctx->is_active; - raw_spin_lock(&ctx->lock); - perf_pmu_disable(ctx->pmu); - ctx->is_active = 0; + ctx->is_active &= ~event_type; if (likely(!ctx->nr_events)) - goto out; + return; + update_context_time(ctx); update_cgrp_time_from_cpuctx(cpuctx); - if (!ctx->nr_active) - goto out; + return; - if (event_type & EVENT_PINNED) { + perf_pmu_disable(ctx->pmu); + if ((is_active & EVENT_PINNED) && (event_type & EVENT_PINNED)) { list_for_each_entry(event, &ctx->pinned_groups, group_entry) group_sched_out(event, cpuctx, ctx); } - if (event_type & EVENT_FLEXIBLE) { + if ((is_active & EVENT_FLEXIBLE) && (event_type & EVENT_FLEXIBLE)) { list_for_each_entry(event, &ctx->flexible_groups, group_entry) group_sched_out(event, cpuctx, ctx); } -out: perf_pmu_enable(ctx->pmu); - raw_spin_unlock(&ctx->lock); } /* @@ -1929,8 +1953,10 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn, rcu_read_unlock(); if (do_switch) { + raw_spin_lock(&ctx->lock); ctx_sched_out(ctx, cpuctx, EVENT_ALL); cpuctx->task_ctx = NULL; + raw_spin_unlock(&ctx->lock); } } @@ -1965,8 +1991,7 @@ void __perf_event_task_sched_out(struct task_struct *task, perf_cgroup_sched_out(task); } -static void task_ctx_sched_out(struct perf_event_context *ctx, - enum event_type_t event_type) +static void task_ctx_sched_out(struct perf_event_context *ctx) { struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); @@ -1976,7 +2001,7 @@ static void task_ctx_sched_out(struct perf_event_context *ctx, if (WARN_ON_ONCE(ctx != cpuctx->task_ctx)) return; - ctx_sched_out(ctx, cpuctx, event_type); + ctx_sched_out(ctx, cpuctx, EVENT_ALL); cpuctx->task_ctx = NULL; } @@ -2055,11 +2080,11 @@ ctx_sched_in(struct perf_event_context *ctx, struct task_struct *task) { u64 now; + int is_active = ctx->is_active; - raw_spin_lock(&ctx->lock); - ctx->is_active = 1; + ctx->is_active |= event_type; if (likely(!ctx->nr_events)) - goto out; + return; now = perf_clock(); ctx->timestamp = now; @@ -2068,15 +2093,12 @@ ctx_sched_in(struct perf_event_context *ctx, * First go through the list and put on any pinned groups * in order to give them the best chance of going on. */ - if (event_type & EVENT_PINNED) + if (!(is_active & EVENT_PINNED) && (event_type & EVENT_PINNED)) ctx_pinned_sched_in(ctx, cpuctx); /* Then walk through the lower prio flexible groups */ - if (event_type & EVENT_FLEXIBLE) + if (!(is_active & EVENT_FLEXIBLE) && (event_type & EVENT_FLEXIBLE)) ctx_flexible_sched_in(ctx, cpuctx); - -out: - raw_spin_unlock(&ctx->lock); } static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx, @@ -2088,19 +2110,6 @@ static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx, ctx_sched_in(ctx, cpuctx, event_type, task); } -static void task_ctx_sched_in(struct perf_event_context *ctx, - enum event_type_t event_type) -{ - struct perf_cpu_context *cpuctx; - - cpuctx = __get_cpu_context(ctx); - if (cpuctx->task_ctx == ctx) - return; - - ctx_sched_in(ctx, cpuctx, event_type, NULL); - cpuctx->task_ctx = ctx; -} - static void perf_event_context_sched_in(struct perf_event_context *ctx, struct task_struct *task) { @@ -2110,6 +2119,7 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx, if (cpuctx->task_ctx == ctx) return; + perf_ctx_lock(cpuctx, ctx); perf_pmu_disable(ctx->pmu); /* * We want to keep the following priority order: @@ -2118,18 +2128,18 @@ static void perf_event_context_sched_in(struct perf_event_context *ctx, */ cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); - ctx_sched_in(ctx, cpuctx, EVENT_PINNED, task); - cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE, task); - ctx_sched_in(ctx, cpuctx, EVENT_FLEXIBLE, task); + perf_event_sched_in(cpuctx, ctx, task); cpuctx->task_ctx = ctx; + perf_pmu_enable(ctx->pmu); + perf_ctx_unlock(cpuctx, ctx); + /* * Since these rotations are per-cpu, we need to ensure the * cpu-context we got scheduled on is actually rotating. */ perf_pmu_rotate_start(ctx->pmu); - perf_pmu_enable(ctx->pmu); } /* @@ -2269,7 +2279,6 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period) u64 interrupts, now; s64 delta; - raw_spin_lock(&ctx->lock); list_for_each_entry_rcu(event, &ctx->event_list, event_entry) { if (event->state != PERF_EVENT_STATE_ACTIVE) continue; @@ -2301,7 +2310,6 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period) if (delta > 0) perf_adjust_period(event, period, delta); } - raw_spin_unlock(&ctx->lock); } /* @@ -2309,16 +2317,12 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period) */ static void rotate_ctx(struct perf_event_context *ctx) { - raw_spin_lock(&ctx->lock); - /* * Rotate the first entry last of non-pinned groups. Rotation might be * disabled by the inheritance code. */ if (!ctx->rotate_disable) list_rotate_left(&ctx->flexible_groups); - - raw_spin_unlock(&ctx->lock); } /* @@ -2345,6 +2349,7 @@ static void perf_rotate_context(struct perf_cpu_context *cpuctx) rotate = 1; } + perf_ctx_lock(cpuctx, cpuctx->task_ctx); perf_pmu_disable(cpuctx->ctx.pmu); perf_ctx_adjust_freq(&cpuctx->ctx, interval); if (ctx) @@ -2355,21 +2360,20 @@ static void perf_rotate_context(struct perf_cpu_context *cpuctx) cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE); if (ctx) - task_ctx_sched_out(ctx, EVENT_FLEXIBLE); + ctx_sched_out(ctx, cpuctx, EVENT_FLEXIBLE); rotate_ctx(&cpuctx->ctx); if (ctx) rotate_ctx(ctx); - cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE, current); - if (ctx) - task_ctx_sched_in(ctx, EVENT_FLEXIBLE); + perf_event_sched_in(cpuctx, ctx, current); done: if (remove) list_del_init(&cpuctx->rotation_list); perf_pmu_enable(cpuctx->ctx.pmu); + perf_ctx_unlock(cpuctx, cpuctx->task_ctx); } void perf_event_task_tick(void) @@ -2424,9 +2428,9 @@ static void perf_event_enable_on_exec(struct perf_event_context *ctx) * in. */ perf_cgroup_sched_out(current); - task_ctx_sched_out(ctx, EVENT_ALL); raw_spin_lock(&ctx->lock); + task_ctx_sched_out(ctx); list_for_each_entry(event, &ctx->pinned_groups, group_entry) { ret = event_enable_on_exec(event, ctx); @@ -2835,16 +2839,12 @@ retry: unclone_ctx(ctx); ++ctx->pin_count; raw_spin_unlock_irqrestore(&ctx->lock, flags); - } - - if (!ctx) { + } else { ctx = alloc_perf_context(pmu, task); err = -ENOMEM; if (!ctx) goto errout; - get_ctx(ctx); - err = 0; mutex_lock(&task->perf_event_mutex); /* @@ -2856,14 +2856,14 @@ retry: else if (task->perf_event_ctxp[ctxn]) err = -EAGAIN; else { + get_ctx(ctx); ++ctx->pin_count; rcu_assign_pointer(task->perf_event_ctxp[ctxn], ctx); } mutex_unlock(&task->perf_event_mutex); if (unlikely(err)) { - put_task_struct(task); - kfree(ctx); + put_ctx(ctx); if (err == -EAGAIN) goto retry; @@ -2890,7 +2890,7 @@ static void free_event_rcu(struct rcu_head *head) kfree(event); } -static void perf_buffer_put(struct perf_buffer *buffer); +static void ring_buffer_put(struct ring_buffer *rb); static void free_event(struct perf_event *event) { @@ -2913,9 +2913,9 @@ static void free_event(struct perf_event *event) } } - if (event->buffer) { - perf_buffer_put(event->buffer); - event->buffer = NULL; + if (event->rb) { + ring_buffer_put(event->rb); + event->rb = NULL; } if (is_cgroup_event(event)) @@ -2934,12 +2934,6 @@ int perf_event_release_kernel(struct perf_event *event) { struct perf_event_context *ctx = event->ctx; - /* - * Remove from the PMU, can't get re-enabled since we got - * here because the last ref went. - */ - perf_event_disable(event); - WARN_ON_ONCE(ctx->parent_ctx); /* * There are two ways this annotation is useful: @@ -2956,8 +2950,8 @@ int perf_event_release_kernel(struct perf_event *event) mutex_lock_nested(&ctx->mutex, SINGLE_DEPTH_NESTING); raw_spin_lock_irq(&ctx->lock); perf_group_detach(event); - list_del_event(event, ctx); raw_spin_unlock_irq(&ctx->lock); + perf_remove_from_context(event); mutex_unlock(&ctx->mutex); free_event(event); @@ -3149,13 +3143,13 @@ perf_read(struct file *file, char __user *buf, size_t count, loff_t *ppos) static unsigned int perf_poll(struct file *file, poll_table *wait) { struct perf_event *event = file->private_data; - struct perf_buffer *buffer; + struct ring_buffer *rb; unsigned int events = POLL_HUP; rcu_read_lock(); - buffer = rcu_dereference(event->buffer); - if (buffer) - events = atomic_xchg(&buffer->poll, 0); + rb = rcu_dereference(event->rb); + if (rb) + events = atomic_xchg(&rb->poll, 0); rcu_read_unlock(); poll_wait(file, &event->waitq, wait); @@ -3358,6 +3352,18 @@ static int perf_event_index(struct perf_event *event) return event->hw.idx + 1 - PERF_EVENT_INDEX_OFFSET; } +static void calc_timer_values(struct perf_event *event, + u64 *running, + u64 *enabled) +{ + u64 now, ctx_time; + + now = perf_clock(); + ctx_time = event->shadow_ctx_time + now; + *enabled = ctx_time - event->tstamp_enabled; + *running = ctx_time - event->tstamp_running; +} + /* * Callers need to ensure there can be no nesting of this function, otherwise * the seqlock logic goes bad. We can not serialize this because the arch @@ -3366,14 +3372,25 @@ static int perf_event_index(struct perf_event *event) void perf_event_update_userpage(struct perf_event *event) { struct perf_event_mmap_page *userpg; - struct perf_buffer *buffer; + struct ring_buffer *rb; + u64 enabled, running; rcu_read_lock(); - buffer = rcu_dereference(event->buffer); - if (!buffer) + /* + * compute total_time_enabled, total_time_running + * based on snapshot values taken when the event + * was last scheduled in. + * + * we cannot simply called update_context_time() + * because of locking issue as we can be called in + * NMI context + */ + calc_timer_values(event, &enabled, &running); + rb = rcu_dereference(event->rb); + if (!rb) goto unlock; - userpg = buffer->user_page; + userpg = rb->user_page; /* * Disable preemption so as to not let the corresponding user-space @@ -3387,10 +3404,10 @@ void perf_event_update_userpage(struct perf_event *event) if (event->state == PERF_EVENT_STATE_ACTIVE) userpg->offset -= local64_read(&event->hw.prev_count); - userpg->time_enabled = event->total_time_enabled + + userpg->time_enabled = enabled + atomic64_read(&event->child_total_time_enabled); - userpg->time_running = event->total_time_running + + userpg->time_running = running + atomic64_read(&event->child_total_time_running); barrier(); @@ -3400,220 +3417,10 @@ unlock: rcu_read_unlock(); } -static unsigned long perf_data_size(struct perf_buffer *buffer); - -static void -perf_buffer_init(struct perf_buffer *buffer, long watermark, int flags) -{ - long max_size = perf_data_size(buffer); - - if (watermark) - buffer->watermark = min(max_size, watermark); - - if (!buffer->watermark) - buffer->watermark = max_size / 2; - - if (flags & PERF_BUFFER_WRITABLE) - buffer->writable = 1; - - atomic_set(&buffer->refcount, 1); -} - -#ifndef CONFIG_PERF_USE_VMALLOC - -/* - * Back perf_mmap() with regular GFP_KERNEL-0 pages. - */ - -static struct page * -perf_mmap_to_page(struct perf_buffer *buffer, unsigned long pgoff) -{ - if (pgoff > buffer->nr_pages) - return NULL; - - if (pgoff == 0) - return virt_to_page(buffer->user_page); - - return virt_to_page(buffer->data_pages[pgoff - 1]); -} - -static void *perf_mmap_alloc_page(int cpu) -{ - struct page *page; - int node; - - node = (cpu == -1) ? cpu : cpu_to_node(cpu); - page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0); - if (!page) - return NULL; - - return page_address(page); -} - -static struct perf_buffer * -perf_buffer_alloc(int nr_pages, long watermark, int cpu, int flags) -{ - struct perf_buffer *buffer; - unsigned long size; - int i; - - size = sizeof(struct perf_buffer); - size += nr_pages * sizeof(void *); - - buffer = kzalloc(size, GFP_KERNEL); - if (!buffer) - goto fail; - - buffer->user_page = perf_mmap_alloc_page(cpu); - if (!buffer->user_page) - goto fail_user_page; - - for (i = 0; i < nr_pages; i++) { - buffer->data_pages[i] = perf_mmap_alloc_page(cpu); - if (!buffer->data_pages[i]) - goto fail_data_pages; - } - - buffer->nr_pages = nr_pages; - - perf_buffer_init(buffer, watermark, flags); - - return buffer; - -fail_data_pages: - for (i--; i >= 0; i--) - free_page((unsigned long)buffer->data_pages[i]); - - free_page((unsigned long)buffer->user_page); - -fail_user_page: - kfree(buffer); - -fail: - return NULL; -} - -static void perf_mmap_free_page(unsigned long addr) -{ - struct page *page = virt_to_page((void *)addr); - - page->mapping = NULL; - __free_page(page); -} - -static void perf_buffer_free(struct perf_buffer *buffer) -{ - int i; - - perf_mmap_free_page((unsigned long)buffer->user_page); - for (i = 0; i < buffer->nr_pages; i++) - perf_mmap_free_page((unsigned long)buffer->data_pages[i]); - kfree(buffer); -} - -static inline int page_order(struct perf_buffer *buffer) -{ - return 0; -} - -#else - -/* - * Back perf_mmap() with vmalloc memory. - * - * Required for architectures that have d-cache aliasing issues. - */ - -static inline int page_order(struct perf_buffer *buffer) -{ - return buffer->page_order; -} - -static struct page * -perf_mmap_to_page(struct perf_buffer *buffer, unsigned long pgoff) -{ - if (pgoff > (1UL << page_order(buffer))) - return NULL; - - return vmalloc_to_page((void *)buffer->user_page + pgoff * PAGE_SIZE); -} - -static void perf_mmap_unmark_page(void *addr) -{ - struct page *page = vmalloc_to_page(addr); - - page->mapping = NULL; -} - -static void perf_buffer_free_work(struct work_struct *work) -{ - struct perf_buffer *buffer; - void *base; - int i, nr; - - buffer = container_of(work, struct perf_buffer, work); - nr = 1 << page_order(buffer); - - base = buffer->user_page; - for (i = 0; i < nr + 1; i++) - perf_mmap_unmark_page(base + (i * PAGE_SIZE)); - - vfree(base); - kfree(buffer); -} - -static void perf_buffer_free(struct perf_buffer *buffer) -{ - schedule_work(&buffer->work); -} - -static struct perf_buffer * -perf_buffer_alloc(int nr_pages, long watermark, int cpu, int flags) -{ - struct perf_buffer *buffer; - unsigned long size; - void *all_buf; - - size = sizeof(struct perf_buffer); - size += sizeof(void *); - - buffer = kzalloc(size, GFP_KERNEL); - if (!buffer) - goto fail; - - INIT_WORK(&buffer->work, perf_buffer_free_work); - - all_buf = vmalloc_user((nr_pages + 1) * PAGE_SIZE); - if (!all_buf) - goto fail_all_buf; - - buffer->user_page = all_buf; - buffer->data_pages[0] = all_buf + PAGE_SIZE; - buffer->page_order = ilog2(nr_pages); - buffer->nr_pages = 1; - - perf_buffer_init(buffer, watermark, flags); - - return buffer; - -fail_all_buf: - kfree(buffer); - -fail: - return NULL; -} - -#endif - -static unsigned long perf_data_size(struct perf_buffer *buffer) -{ - return buffer->nr_pages << (PAGE_SHIFT + page_order(buffer)); -} - static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) { struct perf_event *event = vma->vm_file->private_data; - struct perf_buffer *buffer; + struct ring_buffer *rb; int ret = VM_FAULT_SIGBUS; if (vmf->flags & FAULT_FLAG_MKWRITE) { @@ -3623,14 +3430,14 @@ static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) } rcu_read_lock(); - buffer = rcu_dereference(event->buffer); - if (!buffer) + rb = rcu_dereference(event->rb); + if (!rb) goto unlock; if (vmf->pgoff && (vmf->flags & FAULT_FLAG_WRITE)) goto unlock; - vmf->page = perf_mmap_to_page(buffer, vmf->pgoff); + vmf->page = perf_mmap_to_page(rb, vmf->pgoff); if (!vmf->page) goto unlock; @@ -3645,35 +3452,35 @@ unlock: return ret; } -static void perf_buffer_free_rcu(struct rcu_head *rcu_head) +static void rb_free_rcu(struct rcu_head *rcu_head) { - struct perf_buffer *buffer; + struct ring_buffer *rb; - buffer = container_of(rcu_head, struct perf_buffer, rcu_head); - perf_buffer_free(buffer); + rb = container_of(rcu_head, struct ring_buffer, rcu_head); + rb_free(rb); } -static struct perf_buffer *perf_buffer_get(struct perf_event *event) +static struct ring_buffer *ring_buffer_get(struct perf_event *event) { - struc |