diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/Makefile | 2 | ||||
-rw-r--r-- | kernel/exit.c | 4 | ||||
-rw-r--r-- | kernel/hw_breakpoint.c | 67 | ||||
-rw-r--r-- | kernel/jump_label.c | 429 | ||||
-rw-r--r-- | kernel/kprobes.c | 26 | ||||
-rw-r--r-- | kernel/module.c | 6 | ||||
-rw-r--r-- | kernel/perf_event.c | 2355 | ||||
-rw-r--r-- | kernel/sched.c | 2 | ||||
-rw-r--r-- | kernel/trace/ftrace.c | 123 | ||||
-rw-r--r-- | kernel/trace/ring_buffer.c | 21 | ||||
-rw-r--r-- | kernel/trace/trace_event_perf.c | 28 | ||||
-rw-r--r-- | kernel/trace/trace_events.c | 55 | ||||
-rw-r--r-- | kernel/trace/trace_functions_graph.c | 126 | ||||
-rw-r--r-- | kernel/trace/trace_workqueue.c | 10 | ||||
-rw-r--r-- | kernel/tracepoint.c | 14 | ||||
-rw-r--r-- | kernel/watchdog.c | 41 |
16 files changed, 2206 insertions, 1103 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 0b72d1a74be..d52b473c99a 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -10,7 +10,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o \ kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \ notifier.o ksysfs.o pm_qos_params.o sched_clock.o cred.o \ - async.o range.o + async.o range.o jump_label.o obj-$(CONFIG_HAVE_EARLY_RES) += early_res.o obj-y += groups.o diff --git a/kernel/exit.c b/kernel/exit.c index 03120229db2..e2bdf37f9fd 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -149,9 +149,7 @@ static void delayed_put_task_struct(struct rcu_head *rhp) { struct task_struct *tsk = container_of(rhp, struct task_struct, rcu); -#ifdef CONFIG_PERF_EVENTS - WARN_ON_ONCE(tsk->perf_event_ctxp); -#endif + perf_event_delayed_put(tsk); trace_sched_process_free(tsk); put_task_struct(tsk); } diff --git a/kernel/hw_breakpoint.c b/kernel/hw_breakpoint.c index c7c2aed9e2d..3b714e839c1 100644 --- a/kernel/hw_breakpoint.c +++ b/kernel/hw_breakpoint.c @@ -433,8 +433,7 @@ register_user_hw_breakpoint(struct perf_event_attr *attr, perf_overflow_handler_t triggered, struct task_struct *tsk) { - return perf_event_create_kernel_counter(attr, -1, task_pid_vnr(tsk), - triggered); + return perf_event_create_kernel_counter(attr, -1, tsk, triggered); } EXPORT_SYMBOL_GPL(register_user_hw_breakpoint); @@ -516,7 +515,7 @@ register_wide_hw_breakpoint(struct perf_event_attr *attr, get_online_cpus(); for_each_online_cpu(cpu) { pevent = per_cpu_ptr(cpu_events, cpu); - bp = perf_event_create_kernel_counter(attr, cpu, -1, triggered); + bp = perf_event_create_kernel_counter(attr, cpu, NULL, triggered); *pevent = bp; @@ -566,6 +565,61 @@ static struct notifier_block hw_breakpoint_exceptions_nb = { .priority = 0x7fffffff }; +static void bp_perf_event_destroy(struct perf_event *event) +{ + release_bp_slot(event); +} + +static int hw_breakpoint_event_init(struct perf_event *bp) +{ + int err; + + if (bp->attr.type != PERF_TYPE_BREAKPOINT) + return -ENOENT; + + err = register_perf_hw_breakpoint(bp); + if (err) + return err; + + bp->destroy = bp_perf_event_destroy; + + return 0; +} + +static int hw_breakpoint_add(struct perf_event *bp, int flags) +{ + if (!(flags & PERF_EF_START)) + bp->hw.state = PERF_HES_STOPPED; + + return arch_install_hw_breakpoint(bp); +} + +static void hw_breakpoint_del(struct perf_event *bp, int flags) +{ + arch_uninstall_hw_breakpoint(bp); +} + +static void hw_breakpoint_start(struct perf_event *bp, int flags) +{ + bp->hw.state = 0; +} + +static void hw_breakpoint_stop(struct perf_event *bp, int flags) +{ + bp->hw.state = PERF_HES_STOPPED; +} + +static struct pmu perf_breakpoint = { + .task_ctx_nr = perf_sw_context, /* could eventually get its own */ + + .event_init = hw_breakpoint_event_init, + .add = hw_breakpoint_add, + .del = hw_breakpoint_del, + .start = hw_breakpoint_start, + .stop = hw_breakpoint_stop, + .read = hw_breakpoint_pmu_read, +}; + static int __init init_hw_breakpoint(void) { unsigned int **task_bp_pinned; @@ -587,6 +641,8 @@ static int __init init_hw_breakpoint(void) constraints_initialized = 1; + perf_pmu_register(&perf_breakpoint); + return register_die_notifier(&hw_breakpoint_exceptions_nb); err_alloc: @@ -602,8 +658,3 @@ static int __init init_hw_breakpoint(void) core_initcall(init_hw_breakpoint); -struct pmu perf_ops_bp = { - .enable = arch_install_hw_breakpoint, - .disable = arch_uninstall_hw_breakpoint, - .read = hw_breakpoint_pmu_read, -}; diff --git a/kernel/jump_label.c b/kernel/jump_label.c new file mode 100644 index 00000000000..7be868bf25c --- /dev/null +++ b/kernel/jump_label.c @@ -0,0 +1,429 @@ +/* + * jump label support + * + * Copyright (C) 2009 Jason Baron <jbaron@redhat.com> + * + */ +#include <linux/jump_label.h> +#include <linux/memory.h> +#include <linux/uaccess.h> +#include <linux/module.h> +#include <linux/list.h> +#include <linux/jhash.h> +#include <linux/slab.h> +#include <linux/sort.h> +#include <linux/err.h> + +#ifdef HAVE_JUMP_LABEL + +#define JUMP_LABEL_HASH_BITS 6 +#define JUMP_LABEL_TABLE_SIZE (1 << JUMP_LABEL_HASH_BITS) +static struct hlist_head jump_label_table[JUMP_LABEL_TABLE_SIZE]; + +/* mutex to protect coming/going of the the jump_label table */ +static DEFINE_MUTEX(jump_label_mutex); + +struct jump_label_entry { + struct hlist_node hlist; + struct jump_entry *table; + int nr_entries; + /* hang modules off here */ + struct hlist_head modules; + unsigned long key; +}; + +struct jump_label_module_entry { + struct hlist_node hlist; + struct jump_entry *table; + int nr_entries; + struct module *mod; +}; + +static int jump_label_cmp(const void *a, const void *b) +{ + const struct jump_entry *jea = a; + const struct jump_entry *jeb = b; + + if (jea->key < jeb->key) + return -1; + + if (jea->key > jeb->key) + return 1; + + return 0; +} + +static void +sort_jump_label_entries(struct jump_entry *start, struct jump_entry *stop) +{ + unsigned long size; + + size = (((unsigned long)stop - (unsigned long)start) + / sizeof(struct jump_entry)); + sort(start, size, sizeof(struct jump_entry), jump_label_cmp, NULL); +} + +static struct jump_label_entry *get_jump_label_entry(jump_label_t key) +{ + struct hlist_head *head; + struct hlist_node *node; + struct jump_label_entry *e; + u32 hash = jhash((void *)&key, sizeof(jump_label_t), 0); + + head = &jump_label_table[hash & (JUMP_LABEL_TABLE_SIZE - 1)]; + hlist_for_each_entry(e, node, head, hlist) { + if (key == e->key) + return e; + } + return NULL; +} + +static struct jump_label_entry * +add_jump_label_entry(jump_label_t key, int nr_entries, struct jump_entry *table) +{ + struct hlist_head *head; + struct jump_label_entry *e; + u32 hash; + + e = get_jump_label_entry(key); + if (e) + return ERR_PTR(-EEXIST); + + e = kmalloc(sizeof(struct jump_label_entry), GFP_KERNEL); + if (!e) + return ERR_PTR(-ENOMEM); + + hash = jhash((void *)&key, sizeof(jump_label_t), 0); + head = &jump_label_table[hash & (JUMP_LABEL_TABLE_SIZE - 1)]; + e->key = key; + e->table = table; + e->nr_entries = nr_entries; + INIT_HLIST_HEAD(&(e->modules)); + hlist_add_head(&e->hlist, head); + return e; +} + +static int +build_jump_label_hashtable(struct jump_entry *start, struct jump_entry *stop) +{ + struct jump_entry *iter, *iter_begin; + struct jump_label_entry *entry; + int count; + + sort_jump_label_entries(start, stop); + iter = start; + while (iter < stop) { + entry = get_jump_label_entry(iter->key); + if (!entry) { + iter_begin = iter; + count = 0; + while ((iter < stop) && + (iter->key == iter_begin->key)) { + iter++; + count++; + } + entry = add_jump_label_entry(iter_begin->key, + count, iter_begin); + if (IS_ERR(entry)) + return PTR_ERR(entry); + } else { + WARN_ONCE(1, KERN_ERR "build_jump_hashtable: unexpected entry!\n"); + return -1; + } + } + return 0; +} + +/*** + * jump_label_update - update jump label text + * @key - key value associated with a a jump label + * @type - enum set to JUMP_LABEL_ENABLE or JUMP_LABEL_DISABLE + * + * Will enable/disable the jump for jump label @key, depending on the + * value of @type. + * + */ + +void jump_label_update(unsigned long key, enum jump_label_type type) +{ + struct jump_entry *iter; + struct jump_label_entry *entry; + struct hlist_node *module_node; + struct jump_label_module_entry *e_module; + int count; + + mutex_lock(&jump_label_mutex); + entry = get_jump_label_entry((jump_label_t)key); + if (entry) { + count = entry->nr_entries; + iter = entry->table; + while (count--) { + if (kernel_text_address(iter->code)) + arch_jump_label_transform(iter, type); + iter++; + } + /* eanble/disable jump labels in modules */ + hlist_for_each_entry(e_module, module_node, &(entry->modules), + hlist) { + count = e_module->nr_entries; + iter = e_module->table; + while (count--) { + if (kernel_text_address(iter->code)) + arch_jump_label_transform(iter, type); + iter++; + } + } + } + mutex_unlock(&jump_label_mutex); +} + +static int addr_conflict(struct jump_entry *entry, void *start, void *end) +{ + if (entry->code <= (unsigned long)end && + entry->code + JUMP_LABEL_NOP_SIZE > (unsigned long)start) + return 1; + + return 0; +} + +#ifdef CONFIG_MODULES + +static int module_conflict(void *start, void *end) +{ + struct hlist_head *head; + struct hlist_node *node, *node_next, *module_node, *module_node_next; + struct jump_label_entry *e; + struct jump_label_module_entry *e_module; + struct jump_entry *iter; + int i, count; + int conflict = 0; + + for (i = 0; i < JUMP_LABEL_TABLE_SIZE; i++) { + head = &jump_label_table[i]; + hlist_for_each_entry_safe(e, node, node_next, head, hlist) { + hlist_for_each_entry_safe(e_module, module_node, + module_node_next, + &(e->modules), hlist) { + count = e_module->nr_entries; + iter = e_module->table; + while (count--) { + if (addr_conflict(iter, start, end)) { + conflict = 1; + goto out; + } + iter++; + } + } + } + } +out: + return conflict; +} + +#endif + +/*** + * jump_label_text_reserved - check if addr range is reserved + * @start: start text addr + * @end: end text addr + * + * checks if the text addr located between @start and @end + * overlaps with any of the jump label patch addresses. Code + * that wants to modify kernel text should first verify that + * it does not overlap with any of the jump label addresses. + * + * returns 1 if there is an overlap, 0 otherwise + */ +int jump_label_text_reserved(void *start, void *end) +{ + struct jump_entry *iter; + struct jump_entry *iter_start = __start___jump_table; + struct jump_entry *iter_stop = __start___jump_table; + int conflict = 0; + + mutex_lock(&jump_label_mutex); + iter = iter_start; + while (iter < iter_stop) { + if (addr_conflict(iter, start, end)) { + conflict = 1; + goto out; + } + iter++; + } + + /* now check modules */ +#ifdef CONFIG_MODULES + conflict = module_conflict(start, end); +#endif +out: + mutex_unlock(&jump_label_mutex); + return conflict; +} + +static __init int init_jump_label(void) +{ + int ret; + struct jump_entry *iter_start = __start___jump_table; + struct jump_entry *iter_stop = __stop___jump_table; + struct jump_entry *iter; + + mutex_lock(&jump_label_mutex); + ret = build_jump_label_hashtable(__start___jump_table, + __stop___jump_table); + iter = iter_start; + while (iter < iter_stop) { + arch_jump_label_text_poke_early(iter->code); + iter++; + } + mutex_unlock(&jump_label_mutex); + return ret; +} +early_initcall(init_jump_label); + +#ifdef CONFIG_MODULES + +static struct jump_label_module_entry * +add_jump_label_module_entry(struct jump_label_entry *entry, + struct jump_entry *iter_begin, + int count, struct module *mod) +{ + struct jump_label_module_entry *e; + + e = kmalloc(sizeof(struct jump_label_module_entry), GFP_KERNEL); + if (!e) + return ERR_PTR(-ENOMEM); + e->mod = mod; + e->nr_entries = count; + e->table = iter_begin; + hlist_add_head(&e->hlist, &entry->modules); + return e; +} + +static int add_jump_label_module(struct module *mod) +{ + struct jump_entry *iter, *iter_begin; + struct jump_label_entry *entry; + struct jump_label_module_entry *module_entry; + int count; + + /* if the module doesn't have jump label entries, just return */ + if (!mod->num_jump_entries) + return 0; + + sort_jump_label_entries(mod->jump_entries, + mod->jump_entries + mod->num_jump_entries); + iter = mod->jump_entries; + while (iter < mod->jump_entries + mod->num_jump_entries) { + entry = get_jump_label_entry(iter->key); + iter_begin = iter; + count = 0; + while ((iter < mod->jump_entries + mod->num_jump_entries) && + (iter->key == iter_begin->key)) { + iter++; + count++; + } + if (!entry) { + entry = add_jump_label_entry(iter_begin->key, 0, NULL); + if (IS_ERR(entry)) + return PTR_ERR(entry); + } + module_entry = add_jump_label_module_entry(entry, iter_begin, + count, mod); + if (IS_ERR(module_entry)) + return PTR_ERR(module_entry); + } + return 0; +} + +static void remove_jump_label_module(struct module *mod) +{ + struct hlist_head *head; + struct hlist_node *node, *node_next, *module_node, *module_node_next; + struct jump_label_entry *e; + struct jump_label_module_entry *e_module; + int i; + + /* if the module doesn't have jump label entries, just return */ + if (!mod->num_jump_entries) + return; + + for (i = 0; i < JUMP_LABEL_TABLE_SIZE; i++) { + head = &jump_label_table[i]; + hlist_for_each_entry_safe(e, node, node_next, head, hlist) { + hlist_for_each_entry_safe(e_module, module_node, + module_node_next, + &(e->modules), hlist) { + if (e_module->mod == mod) { + hlist_del(&e_module->hlist); + kfree(e_module); + } + } + if (hlist_empty(&e->modules) && (e->nr_entries == 0)) { + hlist_del(&e->hlist); + kfree(e); + } + } + } +} + +static int +jump_label_module_notify(struct notifier_block *self, unsigned long val, + void *data) +{ + struct module *mod = data; + int ret = 0; + + switch (val) { + case MODULE_STATE_COMING: + mutex_lock(&jump_label_mutex); + ret = add_jump_label_module(mod); + if (ret) + remove_jump_label_module(mod); + mutex_unlock(&jump_label_mutex); + break; + case MODULE_STATE_GOING: + mutex_lock(&jump_label_mutex); + remove_jump_label_module(mod); + mutex_unlock(&jump_label_mutex); + break; + } + return ret; +} + +/*** + * apply_jump_label_nops - patch module jump labels with arch_get_jump_label_nop() + * @mod: module to patch + * + * Allow for run-time selection of the optimal nops. Before the module + * loads patch these with arch_get_jump_label_nop(), which is specified by + * the arch specific jump label code. + */ +void jump_label_apply_nops(struct module *mod) +{ + struct jump_entry *iter; + + /* if the module doesn't have jump label entries, just return */ + if (!mod->num_jump_entries) + return; + + iter = mod->jump_entries; + while (iter < mod->jump_entries + mod->num_jump_entries) { + arch_jump_label_text_poke_early(iter->code); + iter++; + } +} + +struct notifier_block jump_label_module_nb = { + .notifier_call = jump_label_module_notify, + .priority = 0, +}; + +static __init int init_jump_label_module(void) +{ + return register_module_notifier(&jump_label_module_nb); +} +early_initcall(init_jump_label_module); + +#endif /* CONFIG_MODULES */ + +#endif diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 282035f3ae9..ec4210c6501 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -47,6 +47,7 @@ #include <linux/memory.h> #include <linux/ftrace.h> #include <linux/cpu.h> +#include <linux/jump_label.h> #include <asm-generic/sections.h> #include <asm/cacheflush.h> @@ -399,7 +400,7 @@ static inline int kprobe_optready(struct kprobe *p) * Return an optimized kprobe whose optimizing code replaces * instructions including addr (exclude breakpoint). */ -struct kprobe *__kprobes get_optimized_kprobe(unsigned long addr) +static struct kprobe *__kprobes get_optimized_kprobe(unsigned long addr) { int i; struct kprobe *p = NULL; @@ -831,6 +832,7 @@ void __kprobes recycle_rp_inst(struct kretprobe_instance *ri, void __kprobes kretprobe_hash_lock(struct task_struct *tsk, struct hlist_head **head, unsigned long *flags) +__acquires(hlist_lock) { unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS); spinlock_t *hlist_lock; @@ -842,6 +844,7 @@ void __kprobes kretprobe_hash_lock(struct task_struct *tsk, static void __kprobes kretprobe_table_lock(unsigned long hash, unsigned long *flags) +__acquires(hlist_lock) { spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash); spin_lock_irqsave(hlist_lock, *flags); @@ -849,6 +852,7 @@ static void __kprobes kretprobe_table_lock(unsigned long hash, void __kprobes kretprobe_hash_unlock(struct task_struct *tsk, unsigned long *flags) +__releases(hlist_lock) { unsigned long hash = hash_ptr(tsk, KPROBE_HASH_BITS); spinlock_t *hlist_lock; @@ -857,7 +861,9 @@ void __kprobes kretprobe_hash_unlock(struct task_struct *tsk, spin_unlock_irqrestore(hlist_lock, *flags); } -void __kprobes kretprobe_table_unlock(unsigned long hash, unsigned long *flags) +static void __kprobes kretprobe_table_unlock(unsigned long hash, + unsigned long *flags) +__releases(hlist_lock) { spinlock_t *hlist_lock = kretprobe_table_lock_ptr(hash); spin_unlock_irqrestore(hlist_lock, *flags); @@ -1141,7 +1147,8 @@ int __kprobes register_kprobe(struct kprobe *p) preempt_disable(); if (!kernel_text_address((unsigned long) p->addr) || in_kprobes_functions((unsigned long) p->addr) || - ftrace_text_reserved(p->addr, p->addr)) { + ftrace_text_reserved(p->addr, p->addr) || + jump_label_text_reserved(p->addr, p->addr)) { preempt_enable(); return -EINVAL; } @@ -1339,18 +1346,19 @@ int __kprobes register_jprobes(struct jprobe **jps, int num) if (num <= 0) return -EINVAL; for (i = 0; i < num; i++) { - unsigned long addr; + unsigned long addr, offset; jp = jps[i]; addr = arch_deref_entry_point(jp->entry); - if (!kernel_text_address(addr)) - ret = -EINVAL; - else { - /* Todo: Verify probepoint is a function entry point */ + /* Verify probepoint is a function entry point */ + if (kallsyms_lookup_size_offset(addr, NULL, &offset) && + offset == 0) { jp->kp.pre_handler = setjmp_pre_handler; jp->kp.break_handler = longjmp_break_handler; ret = register_kprobe(&jp->kp); - } + } else + ret = -EINVAL; + if (ret < 0) { if (i > 0) unregister_jprobes(jps, i); diff --git a/kernel/module.c b/kernel/module.c index d0b5f8db11b..eba134157ef 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -55,6 +55,7 @@ #include <linux/async.h> #include <linux/percpu.h> #include <linux/kmemleak.h> +#include <linux/jump_label.h> #define CREATE_TRACE_POINTS #include <trace/events/module.h> @@ -2308,6 +2309,11 @@ static void find_module_sections(struct module *mod, struct load_info *info) sizeof(*mod->tracepoints), &mod->num_tracepoints); #endif +#ifdef HAVE_JUMP_LABEL + mod->jump_entries = section_objs(info, "__jump_table", + sizeof(*mod->jump_entries), + &mod->num_jump_entries); +#endif #ifdef CONFIG_EVENT_TRACING mod->trace_events = section_objs(info, "_ftrace_events", sizeof(*mod->trace_events), diff --git a/kernel/perf_event.c b/kernel/perf_event.c index db5b5606468..64507eaa2d9 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -31,24 +31,18 @@ #include <linux/kernel_stat.h> #include <linux/perf_event.h> #include <linux/ftrace_event.h> -#include <linux/hw_breakpoint.h> #include <asm/irq_regs.h> -/* - * Each CPU has a list of per CPU events: - */ -static DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context); - -int perf_max_events __read_mostly = 1; -static int perf_reserved_percpu __read_mostly; -static int perf_overcommit __read_mostly = 1; - static atomic_t nr_events __read_mostly; static atomic_t nr_mmap_events __read_mostly; static atomic_t nr_comm_events __read_mostly; static atomic_t nr_task_events __read_mostly; +static LIST_HEAD(pmus); +static DEFINE_MUTEX(pmus_lock); +static struct srcu_struct pmus_srcu; + /* * perf event paranoia level: * -1 - not paranoid at all @@ -67,36 +61,38 @@ int sysctl_perf_event_sample_rate __read_mostly = 100000; static atomic64_t perf_event_id; -/* - * Lock for (sysadmin-configurable) event reservations: - */ -static DEFINE_SPINLOCK(perf_resource_lock); +void __weak perf_event_print_debug(void) { } -/* - * Architecture provided APIs - weak aliases: - */ -extern __weak const struct pmu *hw_perf_event_init(struct perf_event *event) +void perf_pmu_disable(struct pmu *pmu) { - return NULL; + int *count = this_cpu_ptr(pmu->pmu_disable_count); + if (!(*count)++) + pmu->pmu_disable(pmu); } -void __weak hw_perf_disable(void) { barrier(); } -void __weak hw_perf_enable(void) { barrier(); } - -void __weak perf_event_print_debug(void) { } - -static DEFINE_PER_CPU(int, perf_disable_count); - -void perf_disable(void) +void perf_pmu_enable(struct pmu *pmu) { - if (!__get_cpu_var(perf_disable_count)++) - hw_perf_disable(); + int *count = this_cpu_ptr(pmu->pmu_disable_count); + if (!--(*count)) + pmu->pmu_enable(pmu); } -void perf_enable(void) +static DEFINE_PER_CPU(struct list_head, rotation_list); + +/* + * perf_pmu_rotate_start() and perf_rotate_context() are fully serialized + * because they're strictly cpu affine and rotate_start is called with IRQs + * disabled, while rotate_context is called from IRQ context. + */ +static void perf_pmu_rotate_start(struct pmu *pmu) { - if (!--__get_cpu_var(perf_disable_count)) - hw_perf_enable(); + struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context); + struct list_head *head = &__get_cpu_var(rotation_list); + + WARN_ON(!irqs_disabled()); + + if (list_empty(&cpuctx->rotation_list)) + list_add(&cpuctx->rotation_list, head); } static void get_ctx(struct perf_event_context *ctx) @@ -151,13 +147,13 @@ static u64 primary_event_id(struct perf_event *event) * the context could get moved to another task. */ static struct perf_event_context * -perf_lock_task_context(struct task_struct *task, unsigned long *flags) +perf_lock_task_context(struct task_struct *task, int ctxn, unsigned long *flags) { struct perf_event_context *ctx; rcu_read_lock(); - retry: - ctx = rcu_dereference(task->perf_event_ctxp); +retry: + ctx = rcu_dereference(task->perf_event_ctxp[ctxn]); if (ctx) { /* * If this context is a clone of another, it might @@ -170,7 +166,7 @@ perf_lock_task_context(struct task_struct *task, unsigned long *flags) * can't get swapped on us any more. */ raw_spin_lock_irqsave(&ctx->lock, *flags); - if (ctx != rcu_dereference(task->perf_event_ctxp)) { + if (ctx != rcu_dereference(task->perf_event_ctxp[ctxn])) { raw_spin_unlock_irqrestore(&ctx->lock, *flags); goto retry; } @@ -189,12 +185,13 @@ perf_lock_task_context(struct task_struct *task, unsigned long *flags) * can't get swapped to another task. This also increments its * reference count so that the context can't get freed. */ -static struct perf_event_context *perf_pin_task_context(struct task_struct *task) +static struct perf_event_context * +perf_pin_task_context(struct task_struct *task, int ctxn) { struct perf_event_context *ctx; unsigned long flags; - ctx = perf_lock_task_context(task, &flags); + ctx = perf_lock_task_context(task, ctxn, &flags); if (ctx) { ++ctx->pin_count; raw_spin_unlock_irqrestore(&ctx->lock, flags); @@ -302,6 +299,8 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx) } list_add_rcu(&event->event_entry, &ctx->event_list); + if (!ctx->nr_events) + perf_pmu_rotate_start(ctx->pmu); ctx->nr_events++; if (event->attr.inherit_stat) ctx->nr_stat++; @@ -436,7 +435,7 @@ event_sched_out(struct perf_event *event, event->state = PERF_EVENT_STATE_OFF; } event->tstamp_stopped = ctx->time; - event->pmu->disable(event); + event->pmu->del(event, 0); event->oncpu = -1; if (!is_software_event(event)) @@ -466,6 +465,12 @@ group_sched_out(struct perf_event *group_event, cpuctx->exclusive = 0; } +static inline struct perf_cpu_context * +__get_cpu_context(struct perf_event_context *ctx) +{ + return this_cpu_ptr(ctx->pmu->pmu_cpu_context); +} + /* * Cross CPU call to remove a performance event * @@ -474,9 +479,9 @@ group_sched_out(struct perf_event *group_event, */ static void __perf_event_remove_from_context(void *info) { - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); struct perf_event *event = info; struct perf_event_context *ctx = event->ctx; + struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); /* * If this is a task context, we need to check whether it is @@ -487,27 +492,11 @@ static void __perf_event_remove_from_context(void *info) return; raw_spin_lock(&ctx->lock); - /* - * Protect the list operation against NMI by disabling the - * events on a global level. - */ - perf_disable(); event_sched_out(event, cpuctx, ctx); list_del_event(event, ctx); - if (!ctx->task) { - /* - * Allow more per task events with respect to the - * reservation: - */ - cpuctx->max_pertask = - min(perf_max_events - ctx->nr_events, - perf_max_events - perf_reserved_percpu); - } - - perf_enable(); raw_spin_unlock(&ctx->lock); } @@ -572,8 +561,8 @@ retry: static void __perf_event_disable(void *info) { struct perf_event *event = info; - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); struct perf_event_context *ctx = event->ctx; + struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); /* * If this is a per-task event, need to check whether this @@ -628,7 +617,7 @@ void perf_event_disable(struct perf_event *event) return; } - retry: +retry: task_oncpu_function_call(task, __perf_event_disable, event); raw_spin_lock_irq(&ctx->lock); @@ -667,7 +656,7 @@ event_sched_in(struct perf_event *event, */ smp_wmb(); - if (event->pmu->enable(event)) { + if (event->pmu->add(event, PERF_EF_START)) { event->state = PERF_EVENT_STATE_INACTIVE; event->oncpu = -1; return -EAGAIN; @@ -691,22 +680,15 @@ group_sched_in(struct perf_event *group_event, struct perf_event_context *ctx) { struct perf_event *event, *partial_group = NULL; - const struct pmu *pmu = group_event->pmu; - bool txn = false; + struct pmu *pmu = group_event->pmu; if (group_event->state == PERF_EVENT_STATE_OFF) return 0; - /* Check if group transaction availabe */ - if (pmu->start_txn) - txn = true; - - if (txn) - pmu->start_txn(pmu); + pmu->start_txn(pmu); if (event_sched_in(group_event, cpuctx, ctx)) { - if (txn) - pmu->cancel_txn(pmu); + pmu->cancel_txn(pmu); return -EAGAIN; } @@ -720,7 +702,7 @@ group_sched_in(struct perf_event *group_event, } } - if (!txn || !pmu->commit_txn(pmu)) + if (!pmu->commit_txn(pmu)) return 0; group_error: @@ -735,8 +717,7 @@ group_error: } event_sched_out(group_event, cpuctx, ctx); - if (txn) - pmu->cancel_txn(pmu); + pmu->cancel_txn(pmu); return -EAGAIN; } @@ -789,10 +770,10 @@ static void add_event_to_ctx(struct perf_event *event, */ static void __perf_install_in_context(void *info) { - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); struct perf_event *event = info; struct perf_event_context *ctx = event->ctx; struct perf_event *leader = event->group_leader; + struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); int err; /* @@ -812,12 +793,6 @@ static void __perf_install_in_context(void *info) ctx->is_active = 1; update_context_time(ctx); - /* - * Protect the list operation against NMI by disabling the - * events on a global level. NOP for non NMI based events. - */ - perf_disable(); - add_event_to_ctx(event, ctx); if (event->cpu != -1 && event->cpu != smp_processor_id()) @@ -855,12 +830,7 @@ static void __perf_install_in_context(void *info) } } - if (!err && !ctx->task && cpuctx->max_pertask) - cpuctx->max_pertask--; - - unlock: - perf_enable(); - +unlock: raw_spin_unlock(&ctx->lock); } @@ -883,6 +853,8 @@ perf_install_in_context(struct perf_event_context *ctx, { struct task_struct *task = ctx->task; + event->ctx = ctx; + if (!task) { /* * Per cpu events are installed via an smp call and @@ -931,10 +903,12 @@ static void __perf_event_mark_enabled(struct perf_event *event, event->state = PERF_EVENT_STATE_INACTIVE; event->tstamp_enabled = ctx->time - event->total_time_enabled; - list_for_each_entry(sub, &event->sibling_list, group_entry) - if (sub->state >= PERF_EVENT_STATE_INACTIVE) + list_for_each_entry(sub, &event->sibling_list, group_entry) { + if (sub->state >= PERF_EVENT_STATE_INACTIVE) { sub->tstamp_enabled = ctx->time - sub->total_time_enabled; + } + } } /* @@ -943,9 +917,9 @@ static void __perf_event_mark_enabled(struct perf_event *event, static void __perf_event_enable(void *info) { struct perf_event *event = info; - struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); struct perf_event_context *ctx = event->ctx; struct perf_event *leader = event->group_leader; + struct perf_cpu_context *cpuctx = __get_cpu_context(ctx); |