aboutsummaryrefslogtreecommitdiff
path: root/kernel/perf_event.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/perf_event.c')
-rw-r--r--kernel/perf_event.c2357
1 files changed, 1409 insertions, 948 deletions
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index fc512684423..1ec3916ffef 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -31,24 +31,18 @@
#include <linux/kernel_stat.h>
#include <linux/perf_event.h>
#include <linux/ftrace_event.h>
-#include <linux/hw_breakpoint.h>
#include <asm/irq_regs.h>
-/*
- * Each CPU has a list of per CPU events:
- */
-static DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context);
-
-int perf_max_events __read_mostly = 1;
-static int perf_reserved_percpu __read_mostly;
-static int perf_overcommit __read_mostly = 1;
-
static atomic_t nr_events __read_mostly;
static atomic_t nr_mmap_events __read_mostly;
static atomic_t nr_comm_events __read_mostly;
static atomic_t nr_task_events __read_mostly;
+static LIST_HEAD(pmus);
+static DEFINE_MUTEX(pmus_lock);
+static struct srcu_struct pmus_srcu;
+
/*
* perf event paranoia level:
* -1 - not paranoid at all
@@ -67,22 +61,6 @@ int sysctl_perf_event_sample_rate __read_mostly = 100000;
static atomic64_t perf_event_id;
-/*
- * Lock for (sysadmin-configurable) event reservations:
- */
-static DEFINE_SPINLOCK(perf_resource_lock);
-
-/*
- * Architecture provided APIs - weak aliases:
- */
-extern __weak const struct pmu *hw_perf_event_init(struct perf_event *event)
-{
- return NULL;
-}
-
-void __weak hw_perf_disable(void) { barrier(); }
-void __weak hw_perf_enable(void) { barrier(); }
-
void __weak perf_event_print_debug(void) { }
extern __weak const char *perf_pmu_name(void)
@@ -90,18 +68,36 @@ extern __weak const char *perf_pmu_name(void)
return "pmu";
}
-static DEFINE_PER_CPU(int, perf_disable_count);
+void perf_pmu_disable(struct pmu *pmu)
+{
+ int *count = this_cpu_ptr(pmu->pmu_disable_count);
+ if (!(*count)++)
+ pmu->pmu_disable(pmu);
+}
-void perf_disable(void)
+void perf_pmu_enable(struct pmu *pmu)
{
- if (!__get_cpu_var(perf_disable_count)++)
- hw_perf_disable();
+ int *count = this_cpu_ptr(pmu->pmu_disable_count);
+ if (!--(*count))
+ pmu->pmu_enable(pmu);
}
-void perf_enable(void)
+static DEFINE_PER_CPU(struct list_head, rotation_list);
+
+/*
+ * perf_pmu_rotate_start() and perf_rotate_context() are fully serialized
+ * because they're strictly cpu affine and rotate_start is called with IRQs
+ * disabled, while rotate_context is called from IRQ context.
+ */
+static void perf_pmu_rotate_start(struct pmu *pmu)
{
- if (!--__get_cpu_var(perf_disable_count))
- hw_perf_enable();
+ struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
+ struct list_head *head = &__get_cpu_var(rotation_list);
+
+ WARN_ON(!irqs_disabled());
+
+ if (list_empty(&cpuctx->rotation_list))
+ list_add(&cpuctx->rotation_list, head);
}
static void get_ctx(struct perf_event_context *ctx)
@@ -156,13 +152,13 @@ static u64 primary_event_id(struct perf_event *event)
* the context could get moved to another task.
*/
static struct perf_event_context *
-perf_lock_task_context(struct task_struct *task, unsigned long *flags)
+perf_lock_task_context(struct task_struct *task, int ctxn, unsigned long *flags)
{
struct perf_event_context *ctx;
rcu_read_lock();
- retry:
- ctx = rcu_dereference(task->perf_event_ctxp);
+retry:
+ ctx = rcu_dereference(task->perf_event_ctxp[ctxn]);
if (ctx) {
/*
* If this context is a clone of another, it might
@@ -175,7 +171,7 @@ perf_lock_task_context(struct task_struct *task, unsigned long *flags)
* can't get swapped on us any more.
*/
raw_spin_lock_irqsave(&ctx->lock, *flags);
- if (ctx != rcu_dereference(task->perf_event_ctxp)) {
+ if (ctx != rcu_dereference(task->perf_event_ctxp[ctxn])) {
raw_spin_unlock_irqrestore(&ctx->lock, *flags);
goto retry;
}
@@ -194,12 +190,13 @@ perf_lock_task_context(struct task_struct *task, unsigned long *flags)
* can't get swapped to another task. This also increments its
* reference count so that the context can't get freed.
*/
-static struct perf_event_context *perf_pin_task_context(struct task_struct *task)
+static struct perf_event_context *
+perf_pin_task_context(struct task_struct *task, int ctxn)
{
struct perf_event_context *ctx;
unsigned long flags;
- ctx = perf_lock_task_context(task, &flags);
+ ctx = perf_lock_task_context(task, ctxn, &flags);
if (ctx) {
++ctx->pin_count;
raw_spin_unlock_irqrestore(&ctx->lock, flags);
@@ -307,6 +304,8 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
}
list_add_rcu(&event->event_entry, &ctx->event_list);
+ if (!ctx->nr_events)
+ perf_pmu_rotate_start(ctx->pmu);
ctx->nr_events++;
if (event->attr.inherit_stat)
ctx->nr_stat++;
@@ -441,7 +440,7 @@ event_sched_out(struct perf_event *event,
event->state = PERF_EVENT_STATE_OFF;
}
event->tstamp_stopped = ctx->time;
- event->pmu->disable(event);
+ event->pmu->del(event, 0);
event->oncpu = -1;
if (!is_software_event(event))
@@ -471,6 +470,12 @@ group_sched_out(struct perf_event *group_event,
cpuctx->exclusive = 0;
}
+static inline struct perf_cpu_context *
+__get_cpu_context(struct perf_event_context *ctx)
+{
+ return this_cpu_ptr(ctx->pmu->pmu_cpu_context);
+}
+
/*
* Cross CPU call to remove a performance event
*
@@ -479,9 +484,9 @@ group_sched_out(struct perf_event *group_event,
*/
static void __perf_event_remove_from_context(void *info)
{
- struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
struct perf_event *event = info;
struct perf_event_context *ctx = event->ctx;
+ struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
/*
* If this is a task context, we need to check whether it is
@@ -492,27 +497,11 @@ static void __perf_event_remove_from_context(void *info)
return;
raw_spin_lock(&ctx->lock);
- /*
- * Protect the list operation against NMI by disabling the
- * events on a global level.
- */
- perf_disable();
event_sched_out(event, cpuctx, ctx);
list_del_event(event, ctx);
- if (!ctx->task) {
- /*
- * Allow more per task events with respect to the
- * reservation:
- */
- cpuctx->max_pertask =
- min(perf_max_events - ctx->nr_events,
- perf_max_events - perf_reserved_percpu);
- }
-
- perf_enable();
raw_spin_unlock(&ctx->lock);
}
@@ -577,8 +566,8 @@ retry:
static void __perf_event_disable(void *info)
{
struct perf_event *event = info;
- struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
struct perf_event_context *ctx = event->ctx;
+ struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
/*
* If this is a per-task event, need to check whether this
@@ -633,7 +622,7 @@ void perf_event_disable(struct perf_event *event)
return;
}
- retry:
+retry:
task_oncpu_function_call(task, __perf_event_disable, event);
raw_spin_lock_irq(&ctx->lock);
@@ -672,7 +661,7 @@ event_sched_in(struct perf_event *event,
*/
smp_wmb();
- if (event->pmu->enable(event)) {
+ if (event->pmu->add(event, PERF_EF_START)) {
event->state = PERF_EVENT_STATE_INACTIVE;
event->oncpu = -1;
return -EAGAIN;
@@ -696,22 +685,15 @@ group_sched_in(struct perf_event *group_event,
struct perf_event_context *ctx)
{
struct perf_event *event, *partial_group = NULL;
- const struct pmu *pmu = group_event->pmu;
- bool txn = false;
+ struct pmu *pmu = group_event->pmu;
if (group_event->state == PERF_EVENT_STATE_OFF)
return 0;
- /* Check if group transaction availabe */
- if (pmu->start_txn)
- txn = true;
-
- if (txn)
- pmu->start_txn(pmu);
+ pmu->start_txn(pmu);
if (event_sched_in(group_event, cpuctx, ctx)) {
- if (txn)
- pmu->cancel_txn(pmu);
+ pmu->cancel_txn(pmu);
return -EAGAIN;
}
@@ -725,7 +707,7 @@ group_sched_in(struct perf_event *group_event,
}
}
- if (!txn || !pmu->commit_txn(pmu))
+ if (!pmu->commit_txn(pmu))
return 0;
group_error:
@@ -740,8 +722,7 @@ group_error:
}
event_sched_out(group_event, cpuctx, ctx);
- if (txn)
- pmu->cancel_txn(pmu);
+ pmu->cancel_txn(pmu);
return -EAGAIN;
}
@@ -794,10 +775,10 @@ static void add_event_to_ctx(struct perf_event *event,
*/
static void __perf_install_in_context(void *info)
{
- struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
struct perf_event *event = info;
struct perf_event_context *ctx = event->ctx;
struct perf_event *leader = event->group_leader;
+ struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
int err;
/*
@@ -817,12 +798,6 @@ static void __perf_install_in_context(void *info)
ctx->is_active = 1;
update_context_time(ctx);
- /*
- * Protect the list operation against NMI by disabling the
- * events on a global level. NOP for non NMI based events.
- */
- perf_disable();
-
add_event_to_ctx(event, ctx);
if (event->cpu != -1 && event->cpu != smp_processor_id())
@@ -860,12 +835,7 @@ static void __perf_install_in_context(void *info)
}
}
- if (!err && !ctx->task && cpuctx->max_pertask)
- cpuctx->max_pertask--;
-
- unlock:
- perf_enable();
-
+unlock:
raw_spin_unlock(&ctx->lock);
}
@@ -888,6 +858,8 @@ perf_install_in_context(struct perf_event_context *ctx,
{
struct task_struct *task = ctx->task;
+ event->ctx = ctx;
+
if (!task) {
/*
* Per cpu events are installed via an smp call and
@@ -936,10 +908,12 @@ static void __perf_event_mark_enabled(struct perf_event *event,
event->state = PERF_EVENT_STATE_INACTIVE;
event->tstamp_enabled = ctx->time - event->total_time_enabled;
- list_for_each_entry(sub, &event->sibling_list, group_entry)
- if (sub->state >= PERF_EVENT_STATE_INACTIVE)
+ list_for_each_entry(sub, &event->sibling_list, group_entry) {
+ if (sub->state >= PERF_EVENT_STATE_INACTIVE) {
sub->tstamp_enabled =
ctx->time - sub->total_time_enabled;
+ }
+ }
}
/*
@@ -948,9 +922,9 @@ static void __perf_event_mark_enabled(struct perf_event *event,
static void __perf_event_enable(void *info)
{
struct perf_event *event = info;
- struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
struct perf_event_context *ctx = event->ctx;
struct perf_event *leader = event->group_leader;
+ struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
int err;
/*
@@ -984,12 +958,10 @@ static void __perf_event_enable(void *info)
if (!group_can_go_on(event, cpuctx, 1)) {
err = -EEXIST;
} else {
- perf_disable();
if (event == leader)
err = group_sched_in(event, cpuctx, ctx);
else
err = event_sched_in(event, cpuctx, ctx);
- perf_enable();
}
if (err) {
@@ -1005,7 +977,7 @@ static void __perf_event_enable(void *info)
}
}
- unlock:
+unlock:
raw_spin_unlock(&ctx->lock);
}
@@ -1046,7 +1018,7 @@ void perf_event_enable(struct perf_event *event)
if (event->state == PERF_EVENT_STATE_ERROR)
event->state = PERF_EVENT_STATE_OFF;
- retry:
+retry:
raw_spin_unlock_irq(&ctx->lock);
task_oncpu_function_call(task, __perf_event_enable, event);
@@ -1066,7 +1038,7 @@ void perf_event_enable(struct perf_event *event)
if (event->state == PERF_EVENT_STATE_OFF)
__perf_event_mark_enabled(event, ctx);
- out:
+out:
raw_spin_unlock_irq(&ctx->lock);
}
@@ -1097,26 +1069,26 @@ static void ctx_sched_out(struct perf_event_context *ctx,
struct perf_event *event;
raw_spin_lock(&ctx->lock);
+ perf_pmu_disable(ctx->pmu);
ctx->is_active = 0;
if (likely(!ctx->nr_events))
goto out;
update_context_time(ctx);
- perf_disable();
if (!ctx->nr_active)
- goto out_enable;
+ goto out;
- if (event_type & EVENT_PINNED)
+ if (event_type & EVENT_PINNED) {
list_for_each_entry(event, &ctx->pinned_groups, group_entry)
group_sched_out(event, cpuctx, ctx);
+ }
- if (event_type & EVENT_FLEXIBLE)
+ if (event_type & EVENT_FLEXIBLE) {
list_for_each_entry(event, &ctx->flexible_groups, group_entry)
group_sched_out(event, cpuctx, ctx);
-
- out_enable:
- perf_enable();
- out:
+ }
+out:
+ perf_pmu_enable(ctx->pmu);
raw_spin_unlock(&ctx->lock);
}
@@ -1214,34 +1186,25 @@ static void perf_event_sync_stat(struct perf_event_context *ctx,
}
}
-/*
- * Called from scheduler to remove the events of the current task,
- * with interrupts disabled.
- *
- * We stop each event and update the event value in event->count.
- *
- * This does not protect us against NMI, but disable()
- * sets the disabled bit in the control field of event _before_
- * accessing the event control register. If a NMI hits, then it will
- * not restart the event.
- */
-void perf_event_task_sched_out(struct task_struct *task,
- struct task_struct *next)
+void perf_event_context_sched_out(struct task_struct *task, int ctxn,
+ struct task_struct *next)
{
- struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
- struct perf_event_context *ctx = task->perf_event_ctxp;
+ struct perf_event_context *ctx = task->perf_event_ctxp[ctxn];
struct perf_event_context *next_ctx;
struct perf_event_context *parent;
+ struct perf_cpu_context *cpuctx;
int do_switch = 1;
- perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0);
+ if (likely(!ctx))
+ return;
- if (likely(!ctx || !cpuctx->task_ctx))
+ cpuctx = __get_cpu_context(ctx);
+ if (!cpuctx->task_ctx)
return;
rcu_read_lock();
parent = rcu_dereference(ctx->parent_ctx);
- next_ctx = next->perf_event_ctxp;
+ next_ctx = next->perf_event_ctxp[ctxn];
if (parent && next_ctx &&
rcu_dereference(next_ctx->parent_ctx) == parent) {
/*
@@ -1260,8 +1223,8 @@ void perf_event_task_sched_out(struct task_struct *task,
* XXX do we need a memory barrier of sorts
* wrt to rcu_dereference() of perf_event_ctxp
*/
- task->perf_event_ctxp = next_ctx;
- next->perf_event_ctxp = ctx;
+ task->perf_event_ctxp[ctxn] = next_ctx;
+ next->perf_event_ctxp[ctxn] = ctx;
ctx->task = next;
next_ctx->task = task;
do_switch = 0;
@@ -1279,10 +1242,35 @@ void perf_event_task_sched_out(struct task_struct *task,
}
}
+#define for_each_task_context_nr(ctxn) \
+ for ((ctxn) = 0; (ctxn) < perf_nr_task_contexts; (ctxn)++)
+
+/*
+ * Called from scheduler to remove the events of the current task,
+ * with interrupts disabled.
+ *
+ * We stop each event and update the event value in event->count.
+ *
+ * This does not protect us against NMI, but disable()
+ * sets the disabled bit in the control field of event _before_
+ * accessing the event control register. If a NMI hits, then it will
+ * not restart the event.
+ */
+void perf_event_task_sched_out(struct task_struct *task,
+ struct task_struct *next)
+{
+ int ctxn;
+
+ perf_sw_event(PERF_COUNT_SW_CONTEXT_SWITCHES, 1, 1, NULL, 0);
+
+ for_each_task_context_nr(ctxn)
+ perf_event_context_sched_out(task, ctxn, next);
+}
+
static void task_ctx_sched_out(struct perf_event_context *ctx,
enum event_type_t event_type)
{
- struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
+ struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
if (!cpuctx->task_ctx)
return;
@@ -1355,9 +1343,10 @@ ctx_flexible_sched_in(struct perf_event_context *ctx,
if (event->cpu != -1 && event->cpu != smp_processor_id())
continue;
- if (group_can_go_on(event, cpuctx, can_add_hw))
+ if (group_can_go_on(event, cpuctx, can_add_hw)) {
if (group_sched_in(event, cpuctx, ctx))
can_add_hw = 0;
+ }
}
}
@@ -1373,8 +1362,6 @@ ctx_sched_in(struct perf_event_context *ctx,
ctx->timestamp = perf_clock();
- perf_disable();
-
/*
* First go through the list and put on any pinned groups
* in order to give them the best chance of going on.
@@ -1386,8 +1373,7 @@ ctx_sched_in(struct perf_event_context *ctx,
if (event_type & EVENT_FLEXIBLE)
ctx_flexible_sched_in(ctx, cpuctx);
- perf_enable();
- out:
+out:
raw_spin_unlock(&ctx->lock);
}
@@ -1399,43 +1385,28 @@ static void cpu_ctx_sched_in(struct perf_cpu_context *cpuctx,
ctx_sched_in(ctx, cpuctx, event_type);
}
-static void task_ctx_sched_in(struct task_struct *task,
+static void task_ctx_sched_in(struct perf_event_context *ctx,
enum event_type_t event_type)
{
- struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
- struct perf_event_context *ctx = task->perf_event_ctxp;
+ struct perf_cpu_context *cpuctx;
- if (likely(!ctx))
- return;
+ cpuctx = __get_cpu_context(ctx);
if (cpuctx->task_ctx == ctx)
return;
+
ctx_sched_in(ctx, cpuctx, event_type);
cpuctx->task_ctx = ctx;
}
-/*
- * Called from scheduler to add the events of the current task
- * with interrupts disabled.
- *
- * We restore the event value and then enable it.
- *
- * This does not protect us against NMI, but enable()
- * sets the enabled bit in the control field of event _before_
- * accessing the event control register. If a NMI hits, then it will
- * keep the event running.
- */
-void perf_event_task_sched_in(struct task_struct *task)
-{
- struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
- struct perf_event_context *ctx = task->perf_event_ctxp;
- if (likely(!ctx))
- return;
+void perf_event_context_sched_in(struct perf_event_context *ctx)
+{
+ struct perf_cpu_context *cpuctx;
+ cpuctx = __get_cpu_context(ctx);
if (cpuctx->task_ctx == ctx)
return;
- perf_disable();
-
+ perf_pmu_disable(ctx->pmu);
/*
* We want to keep the following priority order:
* cpu pinned (that don't need to move), task pinned,
@@ -1449,7 +1420,37 @@ void perf_event_task_sched_in(struct task_struct *task)
cpuctx->task_ctx = ctx;
- perf_enable();
+ /*
+ * Since these rotations are per-cpu, we need to ensure the
+ * cpu-context we got scheduled on is actually rotating.
+ */
+ perf_pmu_rotate_start(ctx->pmu);
+ perf_pmu_enable(ctx->pmu);
+}
+
+/*
+ * Called from scheduler to add the events of the current task
+ * with interrupts disabled.
+ *
+ * We restore the event value and then enable it.
+ *
+ * This does not protect us against NMI, but enable()
+ * sets the enabled bit in the control field of event _before_
+ * accessing the event control register. If a NMI hits, then it will
+ * keep the event running.
+ */
+void perf_event_task_sched_in(struct task_struct *task)
+{
+ struct perf_event_context *ctx;
+ int ctxn;
+
+ for_each_task_context_nr(ctxn) {
+ ctx = task->perf_event_ctxp[ctxn];
+ if (likely(!ctx))
+ continue;
+
+ perf_event_context_sched_in(ctx);
+ }
}
#define MAX_INTERRUPTS (~0ULL)
@@ -1529,22 +1530,6 @@ do { \
return div64_u64(dividend, divisor);
}
-static void perf_event_stop(struct perf_event *event)
-{
- if (!event->pmu->stop)
- return event->pmu->disable(event);
-
- return event->pmu->stop(event);
-}
-
-static int perf_event_start(struct perf_event *event)
-{
- if (!event->pmu->start)
- return event->pmu->enable(event);
-
- return event->pmu->start(event);
-}
-
static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
{
struct hw_perf_event *hwc = &event->hw;
@@ -1564,15 +1549,13 @@ static void perf_adjust_period(struct perf_event *event, u64 nsec, u64 count)
hwc->sample_period = sample_period;
if (local64_read(&hwc->period_left) > 8*sample_period) {
- perf_disable();
- perf_event_stop(event);
+ event->pmu->stop(event, PERF_EF_UPDATE);
local64_set(&hwc->period_left, 0);
- perf_event_start(event);
- perf_enable();
+ event->pmu->start(event, PERF_EF_RELOAD);
}
}
-static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
+static void perf_ctx_adjust_freq(struct perf_event_context *ctx, u64 period)
{
struct perf_event *event;
struct hw_perf_event *hwc;
@@ -1597,23 +1580,19 @@ static void perf_ctx_adjust_freq(struct perf_event_context *ctx)
*/
if (interrupts == MAX_INTERRUPTS) {
perf_log_throttle(event, 1);
- perf_disable();
- event->pmu->unthrottle(event);
- perf_enable();
+ event->pmu->start(event, 0);
}
if (!event->attr.freq || !event->attr.sample_freq)
continue;
- perf_disable();
event->pmu->read(event);
now = local64_read(&event->count);
delta = now - hwc->freq_count_stamp;
hwc->freq_count_stamp = now;
if (delta > 0)
- perf_adjust_period(event, TICK_NSEC, delta);
- perf_enable();
+ perf_adjust_period(event, period, delta);
}
raw_spin_unlock(&ctx->lock);
}
@@ -1631,32 +1610,38 @@ static void rotate_ctx(struct perf_event_context *ctx)
raw_spin_unlock(&ctx->lock);
}
-void perf_event_task_tick(struct task_struct *curr)
+/*
+ * perf_pmu_rotate_start() and perf_rotate_context() are fully serialized
+ * because they're strictly cpu affine and rotate_start is called with IRQs
+ * disabled, while rotate_context is called from IRQ context.
+ */
+static void perf_rotate_context(struct perf_cpu_context *cpuctx)
{
- struct perf_cpu_context *cpuctx;
- struct perf_event_context *ctx;
- int rotate = 0;
-
- if (!atomic_read(&nr_events))
- return;
+ u64 interval = (u64)cpuctx->jiffies_interval * TICK_NSEC;
+ struct perf_event_context *ctx = NULL;
+ int rotate = 0, remove = 1;
- cpuctx = &__get_cpu_var(perf_cpu_context);
- if (cpuctx->ctx.nr_events &&
- cpuctx->ctx.nr_events != cpuctx->ctx.nr_active)
- rotate = 1;
+ if (cpuctx->ctx.nr_events) {
+ remove = 0;
+ if (cpuctx->ctx.nr_events != cpuctx->ctx.nr_active)
+ rotate = 1;
+ }
- ctx = curr->perf_event_ctxp;
- if (ctx && ctx->nr_events && ctx->nr_events != ctx->nr_active)
- rotate = 1;
+ ctx = cpuctx->task_ctx;
+ if (ctx && ctx->nr_events) {
+ remove = 0;
+ if (ctx->nr_events != ctx->nr_active)
+ rotate = 1;
+ }
- perf_ctx_adjust_freq(&cpuctx->ctx);
+ perf_pmu_disable(cpuctx->ctx.pmu);
+ perf_ctx_adjust_freq(&cpuctx->ctx, interval);
if (ctx)
- perf_ctx_adjust_freq(ctx);
+ perf_ctx_adjust_freq(ctx, interval);
if (!rotate)
- return;
+ goto done;
- perf_disable();
cpu_ctx_sched_out(cpuctx, EVENT_FLEXIBLE);
if (ctx)
task_ctx_sched_out(ctx, EVENT_FLEXIBLE);
@@ -1667,8 +1652,27 @@ void perf_event_task_tick(struct task_struct *curr)
cpu_ctx_sched_in(cpuctx, EVENT_FLEXIBLE);
if (ctx)
- task_ctx_sched_in(curr, EVENT_FLEXIBLE);
- perf_enable();
+ task_ctx_sched_in(ctx, EVENT_FLEXIBLE);
+
+done:
+ if (remove)
+ list_del_init(&cpuctx->rotation_list);
+
+ perf_pmu_enable(cpuctx->ctx.pmu);
+}
+
+void perf_event_task_tick(void)
+{
+ struct list_head *head = &__get_cpu_var(rotation_list);
+ struct perf_cpu_context *cpuctx, *tmp;
+
+ WARN_ON(!irqs_disabled());
+
+ list_for_each_entry_safe(cpuctx, tmp, head, rotation_list) {
+ if (cpuctx->jiffies_interval == 1 ||
+ !(jiffies % cpuctx->jiffies_interval))
+ perf_rotate_context(cpuctx);
+ }
}
static int event_enable_on_exec(struct perf_event *event,
@@ -1690,20 +1694,18 @@ static int event_enable_on_exec(struct perf_event *event,
* Enable all of a task's events that have been marked enable-on-exec.
* This expects task == current.
*/
-static void perf_event_enable_on_exec(struct task_struct *task)
+static void perf_event_enable_on_exec(struct perf_event_context *ctx)
{
- struct perf_event_context *ctx;
struct perf_event *event;
unsigned long flags;
int enabled = 0;
int ret;
local_irq_save(flags);
- ctx = task->perf_event_ctxp;
if (!ctx || !ctx->nr_events)
goto out;
- __perf_event_task_sched_out(ctx);
+ task_ctx_sched_out(ctx, EVENT_ALL);
raw_spin_lock(&ctx->lock);
@@ -1727,8 +1729,8 @@ static void perf_event_enable_on_exec(struct task_struct *task)
raw_spin_unlock(&ctx->lock);
- perf_event_task_sched_in(task);
- out:
+ perf_event_context_sched_in(ctx);
+out:
local_irq_restore(flags);
}
@@ -1737,9 +1739,9 @@ static void perf_event_enable_on_exec(struct task_struct *task)
*/
static void __perf_event_read(void *info)
{
- struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
struct perf_event *event = info;
struct perf_event_context *ctx = event->ctx;
+ struct perf_cpu_context *cpuctx = __get_cpu_context(ctx);
/*
* If this is a task context, we need to check whether it is
@@ -1787,11 +1789,219 @@ static u64 perf_event_read(struct perf_event *event)
}
/*
- * Initialize the perf_event context in a task_struct:
+ * Callchain support
*/
+
+struct callchain_cpus_entries {
+ struct rcu_head rcu_head;
+ struct perf_callchain_entry *cpu_entries[0];
+};
+
+static DEFINE_PER_CPU(int, callchain_recursion[PERF_NR_CONTEXTS]);
+static atomic_t nr_callchain_events;
+static DEFINE_MUTEX(callchain_mutex);
+struct callchain_cpus_entries *callchain_cpus_entries;
+
+
+__weak void perf_callchain_kernel(struct perf_callchain_entry *entry,
+ struct pt_regs *regs)
+{
+}
+
+__weak void perf_callchain_user(struct perf_callchain_entry *entry,
+ struct pt_regs *regs)
+{
+}
+
+static void release_callchain_buffers_rcu(struct rcu_head *head)
+{
+ struct callchain_cpus_entries *entries;
+ int cpu;
+
+ entries = container_of(head, struct callchain_cpus_entries, rcu_head);
+
+ for_each_possible_cpu(cpu)
+ kfree(entries->cpu_entries[cpu]);
+
+ kfree(entries);
+}
+
+static void release_callchain_buffers(void)
+{
+ struct callchain_cpus_entries *entries;
+
+ entries = callchain_cpus_entries;
+ rcu_assign_pointer(callchain_cpus_entries, NULL);
+ call_rcu(&entries->rcu_head, release_callchain_buffers_rcu);
+}
+
+static int alloc_callchain_buffers(void)
+{
+ int cpu;
+ int size;
+ struct callchain_cpus_entries *entries;
+
+ /*
+ * We can't use the percpu allocation API for data that can be
+ * accessed from NMI. Use a temporary manual per cpu allocation
+ * until that gets sorted out.
+ */
+ size = sizeof(*entries) + sizeof(struct perf_callchain_entry *) *
+ num_possible_cpus();
+
+ entries = kzalloc(size, GFP_KERNEL);
+ if (!entries)
+ return -ENOMEM;
+
+ size = sizeof(struct perf_callchain_entry) * PERF_NR_CONTEXTS;
+
+ for_each_possible_cpu(cpu) {
+ entries->cpu_entries[cpu] = kmalloc_node(size, GFP_KERNEL,
+ cpu_to_node(cpu));
+ if (!entries->cpu_entries[cpu])
+ goto fail;
+ }
+
+ rcu_assign_pointer(callchain_cpus_entries, entries);
+
+ return 0;
+
+fail:
+ for_each_possible_cpu(cpu)
+ kfree(entries->cpu_entries[cpu]);
+ kfree(entries);
+
+ return -ENOMEM;
+}
+
+static int get_callchain_buffers(void)
+{
+ int err = 0;
+ int count;
+
+ mutex_lock(&callchain_mutex);
+
+ count = atomic_inc_return(&nr_callchain_events);
+ if (WARN_ON_ONCE(count < 1)) {
+ err = -EINVAL;
+ goto exit;
+ }
+
+ if (count > 1) {
+ /* If the allocation failed, give up */
+ if (!callchain_cpus_entries)
+ err = -ENOMEM;
+ goto exit;
+ }
+
+ err = alloc_callchain_buffers();
+ if (err)
+ release_callchain_buffers();
+exit:
+ mutex_unlock(&callchain_mutex);
+
+ return err;
+}
+
+static void put_callchain_buffers(void)
+{
+ if (atomic_dec_and_mutex_lock(&nr_callchain_events, &callchain_mutex)) {
+ release_callchain_buffers();
+ mutex_unlock(&callchain_mutex);
+ }
+}
+
+static int get_recursion_context(int *recursion)
+{
+ int rctx;
+
+ if (in_nmi())
+ rctx = 3;
+ else if (in_irq())
+ rctx = 2;
+ else if (in_softirq())
+ rctx = 1;
+ else
+ rctx = 0;
+
+ if (recursion[rctx])
+ return -1;
+
+ recursion[rctx]++;
+ barrier();
+
+ return rctx;
+}
+
+static inline void put_recursion_context(int *recursion, int rctx)
+{
+ barrier();
+ recursion[rctx]--;
+}
+
+static struct perf_callchain_entry *get_callchain_entry(int *rctx)
+{
+ int cpu;
+ struct callchain_cpus_entries *entries;
+
+ *rctx = get_recursion_context(__get_cpu_var(callchain_recursion));
+ if (*rctx == -1)
+ return NULL;
+
+ entries = rcu_dereference(callchain_cpus_entries);
+ if (!entries)
+ return NULL;
+
+ cpu = smp_processor_id();
+
+ return &entries->cpu_entries[cpu][*rctx];
+}
+
static void
-__perf_event_init_context(struct perf_event_context *ctx,
- struct task_struct *task)
+put_callchain_entry(int rctx)
+{
+ put_recursion_context(__get_cpu_var(callchain_recursion), rctx);
+}
+
+static struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
+{
+ int rctx;
+ struct perf_callchain_entry *entry;
+
+
+ entry = get_callchain_entry(&rctx);
+ if (rctx == -1)
+ return NULL;
+
+ if (!entry)
+ goto exit_put;
+
+ entry->nr = 0;
+
+ if (!user_mode(regs)) {
+ perf_callchain_store(entry, PERF_CONTEXT_KERNEL);
+ perf_callchain_kernel(entry, regs);
+ if (current->mm)
+ regs = task_pt_regs(current);
+ else
+ regs = NULL;
+ }
+
+ if (regs) {
+ perf_callchain_store(entry, PERF_CONTEXT_USER);
+ perf_callchain_user(entry, regs);
+ }
+
+exit_put:
+ put_callchain_entry(rctx);
+
+ return entry;
+}
+
+/*
+ * Initialize the perf_event context in a task_struct:
+ */
+static void __perf_event_init_context(struct perf_event_context *ctx)
{
raw_spin_lock_init(&ctx->lock);
mutex_init(&ctx->mutex);
@@ -1799,45 +2009,38 @@ __perf_event_init_context(struct perf_event_context *ctx,
INIT_LIST_HEAD(&ctx->flexible_groups);
INIT_LIST_HEAD(&ctx->event_list);
atomic_set(&ctx->refcount, 1);
- ctx->task = task;
}
-static struct perf_event_context *find_get_context(pid_t pid, int cpu)
+static struct perf_event_context *
+alloc_perf_context(struct pmu *pmu, struct task_struct *task)
{
struct perf_event_context *ctx;
- struct perf_cpu_context *cpuctx;
- struct task_struct *task;
- unsigned long flags;
- int err;
-
- if (pid == -1 && cpu != -1) {
- /* Must be root to operate on a CPU event: */
- if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
- return ERR_PTR(-EACCES);
- if (cpu < 0 || cpu >= nr_cpumask_bits)
- return ERR_PTR(-EINVAL);
+ ctx = kzalloc(sizeof(struct perf_event_context), GFP_KERNEL);
+ if (!ctx)
+ return NULL;
- /*
- * We could be clever and allow to attach a event to an
- * offline CPU and activate it when the CPU comes up, but
- * that's for later.
- */
- if (!cpu_online(cpu))
- return ERR_PTR(-ENODEV);
+ __perf_event_init_context(ctx);
+ if (task) {
+ ctx->task = task;
+ get_task_struct(task);
+ }
+ ctx->pmu = pmu;
- cpuctx = &per_cpu(perf_cpu_context, cpu);
- ctx = &cpuctx->ctx;
- get_ctx(ctx);
+ return ctx;
+}
- return ctx;
- }
+static struct task_struct *
+find_lively_task_by_vpid(pid_t vpid)
+{
+ struct task_struct *task;
+ int err;
rcu_read_lock();
- if (!pid)
+ if (!vpid)
task = current;
else
- task = find_task_by_vpid(pid);
+ task = find_task_by_vpid(vpid);
if (task)
get_task_struct(task);
rcu_read_unlock();
@@ -1857,35 +2060,79 @@ static struct perf_event_context *find_get_context(pid_t pid, int cpu)
if (!ptrace_may_access(task, PTRACE_MODE_READ))
goto errout;
- retry:
- ctx = perf_lock_task_context(task, &flags);
+ return task;
+errout:
+ put_task_struct(task);
+ return ERR_PTR(err);
+
+}
+
+static struct perf_event_context *
+find_get_context(struct pmu *pmu, struct task_struct *task, int cpu)
+{
+ struct perf_event_context *ctx;
+ struct perf_cpu_context *cpuctx;
+ unsigned long flags;
+ int ctxn, err;
+
+ if (!task && cpu != -1) {
+ /* Must be root to operate on a CPU event: */
+ if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
+ return ERR_PTR(-EACCES);
+
+ if (cpu < 0 || cpu >= nr_cpumask_bits)
+ return ERR_PTR(-EINVAL);
+
+ /*
+ * We could be clever and allow to attach a event to an
+ * offline CPU and activate it when the CPU comes up, but
+ * that's for later.
+ */
+ if (!cpu_online(cpu))
+ return ERR_PTR(-ENODEV);
+
+ cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
+ ctx = &cpuctx->ctx;
+ get_ctx(ctx);
+
+ return ctx;
+ }
+
+ err = -EINVAL;
+ ctxn = pmu->task_ctx_nr;
+ if (ctxn < 0)
+ goto errout;
+
+retry:
+ ctx = perf_lock_task_context(task, ctxn, &flags);
if (ctx) {
unclone_ctx(ctx);
raw_spin_unlock_irqrestore(&ctx->lock, flags);
}
if (!ctx) {
- ctx = kzalloc(sizeof(struct perf_event_context), GFP_KERNEL);
+ ctx = alloc_perf_context(pmu, task);
err = -ENOMEM;
if (!ctx)
goto errout;
- __perf_event_init_context(ctx, task);
+
get_ctx(ctx);
- if (cmpxchg(&task->perf_event_ctxp, NULL, ctx)) {
+
+ if (cmpxchg(&task->perf_event_ctxp[ctxn], NULL, ctx)) {
/*
* We raced with some other task; use
* the context they set.
*/
+ put_task_struct(task);
kfree(ctx);
goto retry;