aboutsummaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorSteven Rostedt <srostedt@redhat.com>2010-05-21 11:49:57 -0400
committerSteven Rostedt <rostedt@goodmis.org>2010-05-21 11:49:57 -0400
commitff5f149b6aec8edbfa3698721667acd043009a33 (patch)
treed052553eb296dfee3f01b1cb2b717cb7ccf3127a /kernel
parentf0218b3e9974f06014b61be8987159f4a20e011e (diff)
parent580d607cd666dfabfc1c7b0fb08c8ac690c7c87f (diff)
Merge branch 'perf/core' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip into trace/tip/tracing/core-7
Conflicts: include/linux/ftrace_event.h include/trace/ftrace.h kernel/trace/trace_event_perf.c kernel/trace/trace_kprobe.c kernel/trace/trace_syscalls.c Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/acct.c17
-rw-r--r--kernel/cgroup.c62
-rw-r--r--kernel/fork.c1
-rw-r--r--kernel/kexec.c6
-rw-r--r--kernel/lockdep.c84
-rw-r--r--kernel/lockdep_internals.h72
-rw-r--r--kernel/lockdep_proc.c58
-rw-r--r--kernel/perf_event.c389
-rw-r--r--kernel/profile.c4
-rw-r--r--kernel/ptrace.c11
-rw-r--r--kernel/rcupdate.c30
-rw-r--r--kernel/rcutiny.c35
-rw-r--r--kernel/rcutiny_plugin.h39
-rw-r--r--kernel/rcutorture.c2
-rw-r--r--kernel/rcutree.c131
-rw-r--r--kernel/rcutree.h2
-rw-r--r--kernel/rcutree_plugin.h69
-rw-r--r--kernel/rcutree_trace.c4
-rw-r--r--kernel/sched.c2
-rw-r--r--kernel/sched_debug.c2
-rw-r--r--kernel/softirq.c2
-rw-r--r--kernel/stop_machine.c5
-rw-r--r--kernel/trace/trace_event_perf.c190
-rw-r--r--kernel/trace/trace_kprobe.c17
-rw-r--r--kernel/trace/trace_syscalls.c17
25 files changed, 762 insertions, 489 deletions
diff --git a/kernel/acct.c b/kernel/acct.c
index 24f8c81fc48..e4c0e1fee9b 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -353,17 +353,18 @@ restart:
void acct_exit_ns(struct pid_namespace *ns)
{
- struct bsd_acct_struct *acct;
+ struct bsd_acct_struct *acct = ns->bacct;
- spin_lock(&acct_lock);
- acct = ns->bacct;
- if (acct != NULL) {
- if (acct->file != NULL)
- acct_file_reopen(acct, NULL, NULL);
+ if (acct == NULL)
+ return;
- kfree(acct);
- }
+ del_timer_sync(&acct->timer);
+ spin_lock(&acct_lock);
+ if (acct->file != NULL)
+ acct_file_reopen(acct, NULL, NULL);
spin_unlock(&acct_lock);
+
+ kfree(acct);
}
/*
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 4a07d057a26..e9ec642932e 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1646,7 +1646,9 @@ static inline struct cftype *__d_cft(struct dentry *dentry)
int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
{
char *start;
- struct dentry *dentry = rcu_dereference(cgrp->dentry);
+ struct dentry *dentry = rcu_dereference_check(cgrp->dentry,
+ rcu_read_lock_held() ||
+ cgroup_lock_is_held());
if (!dentry || cgrp == dummytop) {
/*
@@ -1662,13 +1664,17 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen)
*--start = '\0';
for (;;) {
int len = dentry->d_name.len;
+
if ((start -= len) < buf)
return -ENAMETOOLONG;
- memcpy(start, cgrp->dentry->d_name.name, len);
+ memcpy(start, dentry->d_name.name, len);
cgrp = cgrp->parent;
if (!cgrp)
break;
- dentry = rcu_dereference(cgrp->dentry);
+
+ dentry = rcu_dereference_check(cgrp->dentry,
+ rcu_read_lock_held() ||
+ cgroup_lock_is_held());
if (!cgrp->parent)
continue;
if (--start < buf)
@@ -4429,7 +4435,15 @@ __setup("cgroup_disable=", cgroup_disable);
*/
unsigned short css_id(struct cgroup_subsys_state *css)
{
- struct css_id *cssid = rcu_dereference(css->id);
+ struct css_id *cssid;
+
+ /*
+ * This css_id() can return correct value when somone has refcnt
+ * on this or this is under rcu_read_lock(). Once css->id is allocated,
+ * it's unchanged until freed.
+ */
+ cssid = rcu_dereference_check(css->id,
+ rcu_read_lock_held() || atomic_read(&css->refcnt));
if (cssid)
return cssid->id;
@@ -4439,7 +4453,10 @@ EXPORT_SYMBOL_GPL(css_id);
unsigned short css_depth(struct cgroup_subsys_state *css)
{
- struct css_id *cssid = rcu_dereference(css->id);
+ struct css_id *cssid;
+
+ cssid = rcu_dereference_check(css->id,
+ rcu_read_lock_held() || atomic_read(&css->refcnt));
if (cssid)
return cssid->depth;
@@ -4447,15 +4464,36 @@ unsigned short css_depth(struct cgroup_subsys_state *css)
}
EXPORT_SYMBOL_GPL(css_depth);
+/**
+ * css_is_ancestor - test "root" css is an ancestor of "child"
+ * @child: the css to be tested.
+ * @root: the css supporsed to be an ancestor of the child.
+ *
+ * Returns true if "root" is an ancestor of "child" in its hierarchy. Because
+ * this function reads css->id, this use rcu_dereference() and rcu_read_lock().
+ * But, considering usual usage, the csses should be valid objects after test.
+ * Assuming that the caller will do some action to the child if this returns
+ * returns true, the caller must take "child";s reference count.
+ * If "child" is valid object and this returns true, "root" is valid, too.
+ */
+
bool css_is_ancestor(struct cgroup_subsys_state *child,
const struct cgroup_subsys_state *root)
{
- struct css_id *child_id = rcu_dereference(child->id);
- struct css_id *root_id = rcu_dereference(root->id);
+ struct css_id *child_id;
+ struct css_id *root_id;
+ bool ret = true;
- if (!child_id || !root_id || (child_id->depth < root_id->depth))
- return false;
- return child_id->stack[root_id->depth] == root_id->id;
+ rcu_read_lock();
+ child_id = rcu_dereference(child->id);
+ root_id = rcu_dereference(root->id);
+ if (!child_id
+ || !root_id
+ || (child_id->depth < root_id->depth)
+ || (child_id->stack[root_id->depth] != root_id->id))
+ ret = false;
+ rcu_read_unlock();
+ return ret;
}
static void __free_css_id_cb(struct rcu_head *head)
@@ -4555,13 +4593,13 @@ static int alloc_css_id(struct cgroup_subsys *ss, struct cgroup *parent,
{
int subsys_id, i, depth = 0;
struct cgroup_subsys_state *parent_css, *child_css;
- struct css_id *child_id, *parent_id = NULL;
+ struct css_id *child_id, *parent_id;
subsys_id = ss->subsys_id;
parent_css = parent->subsys[subsys_id];
child_css = child->subsys[subsys_id];
- depth = css_depth(parent_css) + 1;
parent_id = parent_css->id;
+ depth = parent_id->depth;
child_id = get_new_cssid(ss, depth);
if (IS_ERR(child_id))
diff --git a/kernel/fork.c b/kernel/fork.c
index 5d3592deaf7..4d57d9e3a6e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1111,7 +1111,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
p->memcg_batch.do_batch = 0;
p->memcg_batch.memcg = NULL;
#endif
- p->stack_start = stack_start;
/* Perform scheduler related setup. Assign this task to a CPU. */
sched_fork(p, clone_flags);
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 87ebe8adc47..474a84715ea 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -1134,11 +1134,9 @@ int crash_shrink_memory(unsigned long new_size)
free_reserved_phys_range(end, crashk_res.end);
- if (start == end) {
- crashk_res.end = end;
+ if (start == end)
release_resource(&crashk_res);
- } else
- crashk_res.end = end - 1;
+ crashk_res.end = end - 1;
unlock:
mutex_unlock(&kexec_mutex);
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index e9c759f06c1..ec21304856d 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -431,20 +431,7 @@ static struct stack_trace lockdep_init_trace = {
/*
* Various lockdep statistics:
*/
-atomic_t chain_lookup_hits;
-atomic_t chain_lookup_misses;
-atomic_t hardirqs_on_events;
-atomic_t hardirqs_off_events;
-atomic_t redundant_hardirqs_on;
-atomic_t redundant_hardirqs_off;
-atomic_t softirqs_on_events;
-atomic_t softirqs_off_events;
-atomic_t redundant_softirqs_on;
-atomic_t redundant_softirqs_off;
-atomic_t nr_unused_locks;
-atomic_t nr_cyclic_checks;
-atomic_t nr_find_usage_forwards_checks;
-atomic_t nr_find_usage_backwards_checks;
+DEFINE_PER_CPU(struct lockdep_stats, lockdep_stats);
#endif
/*
@@ -748,7 +735,7 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
return NULL;
}
class = lock_classes + nr_lock_classes++;
- debug_atomic_inc(&nr_unused_locks);
+ debug_atomic_inc(nr_unused_locks);
class->key = key;
class->name = lock->name;
class->subclass = subclass;
@@ -818,7 +805,8 @@ static struct lock_list *alloc_list_entry(void)
* Add a new dependency to the head of the list:
*/
static int add_lock_to_list(struct lock_class *class, struct lock_class *this,
- struct list_head *head, unsigned long ip, int distance)
+ struct list_head *head, unsigned long ip,
+ int distance, struct stack_trace *trace)
{
struct lock_list *entry;
/*
@@ -829,11 +817,9 @@ static int add_lock_to_list(struct lock_class *class, struct lock_class *this,
if (!entry)
return 0;
- if (!save_trace(&entry->trace))
- return 0;
-
entry->class = this;
entry->distance = distance;
+ entry->trace = *trace;
/*
* Since we never remove from the dependency list, the list can
* be walked lockless by other CPUs, it's only allocation
@@ -1205,7 +1191,7 @@ check_noncircular(struct lock_list *root, struct lock_class *target,
{
int result;
- debug_atomic_inc(&nr_cyclic_checks);
+ debug_atomic_inc(nr_cyclic_checks);
result = __bfs_forwards(root, target, class_equal, target_entry);
@@ -1242,7 +1228,7 @@ find_usage_forwards(struct lock_list *root, enum lock_usage_bit bit,
{
int result;
- debug_atomic_inc(&nr_find_usage_forwards_checks);
+ debug_atomic_inc(nr_find_usage_forwards_checks);
result = __bfs_forwards(root, (void *)bit, usage_match, target_entry);
@@ -1265,7 +1251,7 @@ find_usage_backwards(struct lock_list *root, enum lock_usage_bit bit,
{
int result;
- debug_atomic_inc(&nr_find_usage_backwards_checks);
+ debug_atomic_inc(nr_find_usage_backwards_checks);
result = __bfs_backwards(root, (void *)bit, usage_match, target_entry);
@@ -1635,12 +1621,20 @@ check_deadlock(struct task_struct *curr, struct held_lock *next,
*/
static int
check_prev_add(struct task_struct *curr, struct held_lock *prev,
- struct held_lock *next, int distance)
+ struct held_lock *next, int distance, int trylock_loop)
{
struct lock_list *entry;
int ret;
struct lock_list this;
struct lock_list *uninitialized_var(target_entry);
+ /*
+ * Static variable, serialized by the graph_lock().
+ *
+ * We use this static variable to save the stack trace in case
+ * we call into this function multiple times due to encountering
+ * trylocks in the held lock stack.
+ */
+ static struct stack_trace trace;
/*
* Prove that the new <prev> -> <next> dependency would not
@@ -1688,20 +1682,23 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
}
}
+ if (!trylock_loop && !save_trace(&trace))
+ return 0;
+
/*
* Ok, all validations passed, add the new lock
* to the previous lock's dependency list:
*/
ret = add_lock_to_list(hlock_class(prev), hlock_class(next),
&hlock_class(prev)->locks_after,
- next->acquire_ip, distance);
+ next->acquire_ip, distance, &trace);
if (!ret)
return 0;
ret = add_lock_to_list(hlock_class(next), hlock_class(prev),
&hlock_class(next)->locks_before,
- next->acquire_ip, distance);
+ next->acquire_ip, distance, &trace);
if (!ret)
return 0;
@@ -1731,6 +1728,7 @@ static int
check_prevs_add(struct task_struct *curr, struct held_lock *next)
{
int depth = curr->lockdep_depth;
+ int trylock_loop = 0;
struct held_lock *hlock;
/*
@@ -1756,7 +1754,8 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next)
* added:
*/
if (hlock->read != 2) {
- if (!check_prev_add(curr, hlock, next, distance))
+ if (!check_prev_add(curr, hlock, next,
+ distance, trylock_loop))
return 0;
/*
* Stop after the first non-trylock entry,
@@ -1779,6 +1778,7 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next)
if (curr->held_locks[depth].irq_context !=
curr->held_locks[depth-1].irq_context)
break;
+ trylock_loop = 1;
}
return 1;
out_bug:
@@ -1825,7 +1825,7 @@ static inline int lookup_chain_cache(struct task_struct *curr,
list_for_each_entry(chain, hash_head, entry) {
if (chain->chain_key == chain_key) {
cache_hit:
- debug_atomic_inc(&chain_lookup_hits);
+ debug_atomic_inc(chain_lookup_hits);
if (very_verbose(class))
printk("\nhash chain already cached, key: "
"%016Lx tail class: [%p] %s\n",
@@ -1890,7 +1890,7 @@ cache_hit:
chain_hlocks[chain->base + j] = class - lock_classes;
}
list_add_tail_rcu(&chain->entry, hash_head);
- debug_atomic_inc(&chain_lookup_misses);
+ debug_atomic_inc(chain_lookup_misses);
inc_chains();
return 1;
@@ -2311,7 +2311,12 @@ void trace_hardirqs_on_caller(unsigned long ip)
return;
if (unlikely(curr->hardirqs_enabled)) {
- debug_atomic_inc(&redundant_hardirqs_on);
+ /*
+ * Neither irq nor preemption are disabled here
+ * so this is racy by nature but loosing one hit
+ * in a stat is not a big deal.
+ */
+ __debug_atomic_inc(redundant_hardirqs_on);
return;
}
/* we'll do an OFF -> ON transition: */
@@ -2338,7 +2343,7 @@ void trace_hardirqs_on_caller(unsigned long ip)
curr->hardirq_enable_ip = ip;
curr->hardirq_enable_event = ++curr->irq_events;
- debug_atomic_inc(&hardirqs_on_events);
+ debug_atomic_inc(hardirqs_on_events);
}
EXPORT_SYMBOL(trace_hardirqs_on_caller);
@@ -2370,9 +2375,9 @@ void trace_hardirqs_off_caller(unsigned long ip)
curr->hardirqs_enabled = 0;
curr->hardirq_disable_ip = ip;
curr->hardirq_disable_event = ++curr->irq_events;
- debug_atomic_inc(&hardirqs_off_events);
+ debug_atomic_inc(hardirqs_off_events);
} else
- debug_atomic_inc(&redundant_hardirqs_off);
+ debug_atomic_inc(redundant_hardirqs_off);
}
EXPORT_SYMBOL(trace_hardirqs_off_caller);
@@ -2396,7 +2401,7 @@ void trace_softirqs_on(unsigned long ip)
return;
if (curr->softirqs_enabled) {
- debug_atomic_inc(&redundant_softirqs_on);
+ debug_atomic_inc(redundant_softirqs_on);
return;
}
@@ -2406,7 +2411,7 @@ void trace_softirqs_on(unsigned long ip)
curr->softirqs_enabled = 1;
curr->softirq_enable_ip = ip;
curr->softirq_enable_event = ++curr->irq_events;
- debug_atomic_inc(&softirqs_on_events);
+ debug_atomic_inc(softirqs_on_events);
/*
* We are going to turn softirqs on, so set the
* usage bit for all held locks, if hardirqs are
@@ -2436,10 +2441,10 @@ void trace_softirqs_off(unsigned long ip)
curr->softirqs_enabled = 0;
curr->softirq_disable_ip = ip;
curr->softirq_disable_event = ++curr->irq_events;
- debug_atomic_inc(&softirqs_off_events);
+ debug_atomic_inc(softirqs_off_events);
DEBUG_LOCKS_WARN_ON(!softirq_count());
} else
- debug_atomic_inc(&redundant_softirqs_off);
+ debug_atomic_inc(redundant_softirqs_off);
}
static void __lockdep_trace_alloc(gfp_t gfp_mask, unsigned long flags)
@@ -2644,7 +2649,7 @@ static int mark_lock(struct task_struct *curr, struct held_lock *this,
return 0;
break;
case LOCK_USED:
- debug_atomic_dec(&nr_unused_locks);
+ debug_atomic_dec(nr_unused_locks);
break;
default:
if (!debug_locks_off_graph_unlock())
@@ -2750,7 +2755,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
if (!class)
return 0;
}
- debug_atomic_inc((atomic_t *)&class->ops);
+ atomic_inc((atomic_t *)&class->ops);
if (very_verbose(class)) {
printk("\nacquire class [%p] %s", class->key, class->name);
if (class->name_version > 1)
@@ -3801,8 +3806,11 @@ void lockdep_rcu_dereference(const char *file, const int line)
{
struct task_struct *curr = current;
+#ifndef CONFIG_PROVE_RCU_REPEATEDLY
if (!debug_locks_off())
return;
+#endif /* #ifdef CONFIG_PROVE_RCU_REPEATEDLY */
+ /* Note: the following can be executed concurrently, so be careful. */
printk("\n===================================================\n");
printk( "[ INFO: suspicious rcu_dereference_check() usage. ]\n");
printk( "---------------------------------------------------\n");
diff --git a/kernel/lockdep_internals.h b/kernel/lockdep_internals.h
index a2ee95ad131..4f560cfedc8 100644
--- a/kernel/lockdep_internals.h
+++ b/kernel/lockdep_internals.h
@@ -110,30 +110,60 @@ lockdep_count_backward_deps(struct lock_class *class)
#endif
#ifdef CONFIG_DEBUG_LOCKDEP
+
+#include <asm/local.h>
/*
- * Various lockdep statistics:
+ * Various lockdep statistics.
+ * We want them per cpu as they are often accessed in fast path
+ * and we want to avoid too much cache bouncing.
*/
-extern atomic_t chain_lookup_hits;
-extern atomic_t chain_lookup_misses;
-extern atomic_t hardirqs_on_events;
-extern atomic_t hardirqs_off_events;
-extern atomic_t redundant_hardirqs_on;
-extern atomic_t redundant_hardirqs_off;
-extern atomic_t softirqs_on_events;
-extern atomic_t softirqs_off_events;
-extern atomic_t redundant_softirqs_on;
-extern atomic_t redundant_softirqs_off;
-extern atomic_t nr_unused_locks;
-extern atomic_t nr_cyclic_checks;
-extern atomic_t nr_cyclic_check_recursions;
-extern atomic_t nr_find_usage_forwards_checks;
-extern atomic_t nr_find_usage_forwards_recursions;
-extern atomic_t nr_find_usage_backwards_checks;
-extern atomic_t nr_find_usage_backwards_recursions;
-# define debug_atomic_inc(ptr) atomic_inc(ptr)
-# define debug_atomic_dec(ptr) atomic_dec(ptr)
-# define debug_atomic_read(ptr) atomic_read(ptr)
+struct lockdep_stats {
+ int chain_lookup_hits;
+ int chain_lookup_misses;
+ int hardirqs_on_events;
+ int hardirqs_off_events;
+ int redundant_hardirqs_on;
+ int redundant_hardirqs_off;
+ int softirqs_on_events;
+ int softirqs_off_events;
+ int redundant_softirqs_on;
+ int redundant_softirqs_off;
+ int nr_unused_locks;
+ int nr_cyclic_checks;
+ int nr_cyclic_check_recursions;
+ int nr_find_usage_forwards_checks;
+ int nr_find_usage_forwards_recursions;
+ int nr_find_usage_backwards_checks;
+ int nr_find_usage_backwards_recursions;
+};
+
+DECLARE_PER_CPU(struct lockdep_stats, lockdep_stats);
+
+#define __debug_atomic_inc(ptr) \
+ this_cpu_inc(lockdep_stats.ptr);
+
+#define debug_atomic_inc(ptr) { \
+ WARN_ON_ONCE(!irqs_disabled()); \
+ __this_cpu_inc(lockdep_stats.ptr); \
+}
+
+#define debug_atomic_dec(ptr) { \
+ WARN_ON_ONCE(!irqs_disabled()); \
+ __this_cpu_dec(lockdep_stats.ptr); \
+}
+
+#define debug_atomic_read(ptr) ({ \
+ struct lockdep_stats *__cpu_lockdep_stats; \
+ unsigned long long __total = 0; \
+ int __cpu; \
+ for_each_possible_cpu(__cpu) { \
+ __cpu_lockdep_stats = &per_cpu(lockdep_stats, __cpu); \
+ __total += __cpu_lockdep_stats->ptr; \
+ } \
+ __total; \
+})
#else
+# define __debug_atomic_inc(ptr) do { } while (0)
# define debug_atomic_inc(ptr) do { } while (0)
# define debug_atomic_dec(ptr) do { } while (0)
# define debug_atomic_read(ptr) 0
diff --git a/kernel/lockdep_proc.c b/kernel/lockdep_proc.c
index d4aba4f3584..59b76c8ce9d 100644
--- a/kernel/lockdep_proc.c
+++ b/kernel/lockdep_proc.c
@@ -184,34 +184,34 @@ static const struct file_operations proc_lockdep_chains_operations = {
static void lockdep_stats_debug_show(struct seq_file *m)
{
#ifdef CONFIG_DEBUG_LOCKDEP
- unsigned int hi1 = debug_atomic_read(&hardirqs_on_events),
- hi2 = debug_atomic_read(&hardirqs_off_events),
- hr1 = debug_atomic_read(&redundant_hardirqs_on),
- hr2 = debug_atomic_read(&redundant_hardirqs_off),
- si1 = debug_atomic_read(&softirqs_on_events),
- si2 = debug_atomic_read(&softirqs_off_events),
- sr1 = debug_atomic_read(&redundant_softirqs_on),
- sr2 = debug_atomic_read(&redundant_softirqs_off);
-
- seq_printf(m, " chain lookup misses: %11u\n",
- debug_atomic_read(&chain_lookup_misses));
- seq_printf(m, " chain lookup hits: %11u\n",
- debug_atomic_read(&chain_lookup_hits));
- seq_printf(m, " cyclic checks: %11u\n",
- debug_atomic_read(&nr_cyclic_checks));
- seq_printf(m, " find-mask forwards checks: %11u\n",
- debug_atomic_read(&nr_find_usage_forwards_checks));
- seq_printf(m, " find-mask backwards checks: %11u\n",
- debug_atomic_read(&nr_find_usage_backwards_checks));
-
- seq_printf(m, " hardirq on events: %11u\n", hi1);
- seq_printf(m, " hardirq off events: %11u\n", hi2);
- seq_printf(m, " redundant hardirq ons: %11u\n", hr1);
- seq_printf(m, " redundant hardirq offs: %11u\n", hr2);
- seq_printf(m, " softirq on events: %11u\n", si1);
- seq_printf(m, " softirq off events: %11u\n", si2);
- seq_printf(m, " redundant softirq ons: %11u\n", sr1);
- seq_printf(m, " redundant softirq offs: %11u\n", sr2);
+ unsigned long long hi1 = debug_atomic_read(hardirqs_on_events),
+ hi2 = debug_atomic_read(hardirqs_off_events),
+ hr1 = debug_atomic_read(redundant_hardirqs_on),
+ hr2 = debug_atomic_read(redundant_hardirqs_off),
+ si1 = debug_atomic_read(softirqs_on_events),
+ si2 = debug_atomic_read(softirqs_off_events),
+ sr1 = debug_atomic_read(redundant_softirqs_on),
+ sr2 = debug_atomic_read(redundant_softirqs_off);
+
+ seq_printf(m, " chain lookup misses: %11llu\n",
+ debug_atomic_read(chain_lookup_misses));
+ seq_printf(m, " chain lookup hits: %11llu\n",
+ debug_atomic_read(chain_lookup_hits));
+ seq_printf(m, " cyclic checks: %11llu\n",
+ debug_atomic_read(nr_cyclic_checks));
+ seq_printf(m, " find-mask forwards checks: %11llu\n",
+ debug_atomic_read(nr_find_usage_forwards_checks));
+ seq_printf(m, " find-mask backwards checks: %11llu\n",
+ debug_atomic_read(nr_find_usage_backwards_checks));
+
+ seq_printf(m, " hardirq on events: %11llu\n", hi1);
+ seq_printf(m, " hardirq off events: %11llu\n", hi2);
+ seq_printf(m, " redundant hardirq ons: %11llu\n", hr1);
+ seq_printf(m, " redundant hardirq offs: %11llu\n", hr2);
+ seq_printf(m, " softirq on events: %11llu\n", si1);
+ seq_printf(m, " softirq off events: %11llu\n", si2);
+ seq_printf(m, " redundant softirq ons: %11llu\n", sr1);
+ seq_printf(m, " redundant softirq offs: %11llu\n", sr2);
#endif
}
@@ -263,7 +263,7 @@ static int lockdep_stats_show(struct seq_file *m, void *v)
#endif
}
#ifdef CONFIG_DEBUG_LOCKDEP
- DEBUG_LOCKS_WARN_ON(debug_atomic_read(&nr_unused_locks) != nr_unused);
+ DEBUG_LOCKS_WARN_ON(debug_atomic_read(nr_unused_locks) != nr_unused);
#endif
seq_printf(m, " lock-classes: %11lu [max: %lu]\n",
nr_lock_classes, MAX_LOCKDEP_KEYS);
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index a4fa381db3c..e099650cd24 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -2297,11 +2297,6 @@ unlock:
rcu_read_unlock();
}
-static unsigned long perf_data_size(struct perf_mmap_data *data)
-{
- return data->nr_pages << (PAGE_SHIFT + data->data_order);
-}
-
#ifndef CONFIG_PERF_USE_VMALLOC
/*
@@ -2320,6 +2315,19 @@ perf_mmap_to_page(struct perf_mmap_data *data, unsigned long pgoff)
return virt_to_page(data->data_pages[pgoff - 1]);
}
+static void *perf_mmap_alloc_page(int cpu)
+{
+ struct page *page;
+ int node;
+
+ node = (cpu == -1) ? cpu : cpu_to_node(cpu);
+ page = alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0);
+ if (!page)
+ return NULL;
+
+ return page_address(page);
+}
+
static struct perf_mmap_data *
perf_mmap_data_alloc(struct perf_event *event, int nr_pages)
{
@@ -2336,17 +2344,16 @@ perf_mmap_data_alloc(struct perf_event *event, int nr_pages)
if (!data)
goto fail;
- data->user_page = (void *)get_zeroed_page(GFP_KERNEL);
+ data->user_page = perf_mmap_alloc_page(event->cpu);
if (!data->user_page)
goto fail_user_page;
for (i = 0; i < nr_pages; i++) {
- data->data_pages[i] = (void *)get_zeroed_page(GFP_KERNEL);
+ data->data_pages[i] = perf_mmap_alloc_page(event->cpu);
if (!data->data_pages[i])
goto fail_data_pages;
}
- data->data_order = 0;
data->nr_pages = nr_pages;
return data;
@@ -2382,6 +2389,11 @@ static void perf_mmap_data_free(struct perf_mmap_data *data)
kfree(data);
}
+static inline int page_order(struct perf_mmap_data *data)
+{
+ return 0;
+}
+
#else
/*
@@ -2390,10 +2402,15 @@ static void perf_mmap_data_free(struct perf_mmap_data *data)
* Required for architectures that have d-cache aliasing issues.
*/
+static inline int page_order(struct perf_mmap_data *data)
+{
+ return data->page_order;
+}
+
static struct page *
perf_mmap_to_page(struct perf_mmap_data *data, unsigned long pgoff)
{
- if (pgoff > (1UL << data->data_order))
+ if (pgoff > (1UL << page_order(data)))
return NULL;
return vmalloc_to_page((void *)data->user_page + pgoff * PAGE_SIZE);
@@ -2413,7 +2430,7 @@ static void perf_mmap_data_free_work(struct work_struct *work)
int i, nr;
data = container_of(work, struct perf_mmap_data, work);
- nr = 1 << data->data_order;
+ nr = 1 << page_order(data);
base = data->user_page;
for (i = 0; i < nr + 1; i++)
@@ -2452,7 +2469,7 @@ perf_mmap_data_alloc(struct perf_event *event, int nr_pages)
data->user_page = all_buf;
data->data_pages[0] = all_buf + PAGE_SIZE;
- data->data_order = ilog2(nr_pages);
+ data->page_order = ilog2(nr_pages);
data->nr_pages = 1;
return data;
@@ -2466,6 +2483,11 @@ fail:
#endif
+static unsigned long perf_data_size(struct perf_mmap_data *data)
+{
+ return data->nr_pages << (PAGE_SHIFT + page_order(data));
+}
+
static int perf_mmap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
struct perf_event *event = vma->vm_file->private_data;
@@ -2506,8 +2528,6 @@ perf_mmap_data_init(struct perf_event *event, struct perf_mmap_data *data)
{
long max_size = perf_data_size(data);
- atomic_set(&data->lock, -1);
-
if (event->attr.watermark) {
data->watermark = min_t(long, max_size,
event->attr.wakeup_watermark);
@@ -2580,6 +2600,14 @@ static int perf_mmap(struct file *file, struct vm_area_struct *vma)
long user_extra, extra;
int ret = 0;
+ /*
+ * Don't allow mmap() of inherited per-task counters. This would
+ * create a performance issue due to all children writing to the
+ * same buffer.
+ */
+ if (event->cpu == -1 && event->attr.inherit)
+ return -EINVAL;
+
if (!(vma->vm_flags & VM_SHARED))
return -EINVAL;
@@ -2885,120 +2913,80 @@ static void perf_output_wakeup(struct perf_output_handle *handle)
}
/*
- * Curious locking construct.
- *
* We need to ensure a later event_id doesn't publish a head when a former
- * event_id isn't done writing. However since we need to deal with NMIs we
+ * event isn't done writing. However since we need to deal with NMIs we
* cannot fully serialize things.
*
- * What we do is serialize between CPUs so we only have to deal with NMI
- * nesting on a single CPU.
- *
* We only publish the head (and generate a wakeup) when the outer-most
- * event_id completes.
+ * event completes.
*/
-static void perf_output_lock(struct perf_output_handle *handle)
+static void perf_output_get_handle(struct perf_output_handle *handle)
{
struct perf_mmap_data *data = handle->data;
- int cur, cpu = get_cpu();
-
- handle->locked = 0;
-
- for (;;) {
- cur = atomic_cmpxchg(&data->lock, -1, cpu);
- if (cur == -1) {
- handle->locked = 1;
- break;
- }
- if (cur == cpu)
- break;
- cpu_relax();
- }
+ preempt_disable();
+ local_inc(&data->nest);
+ handle->wakeup = local_read(&data->wakeup);
}
-static void perf_output_unlock(struct perf_output_handle *handle)
+static void perf_output_put_handle(struct perf_output_handle *handle)
{
struct perf_mmap_data *data = handle->data;
unsigned long head;
- int cpu;
-
- data->done_head = data->head;
-
- if (!handle->locked)
- goto out;
again:
- /*
- * The xchg implies a full barrier that ensures all writes are done
- * before we publish the new head, matched by a rmb() in userspace when
- * reading this position.
- */
- while ((head = atomic_long_xchg(&data->done_head, 0)))
- data->user_page->data_head = head;
+ head = local_read(&data->head);
/*
- * NMI can happen here, which means we can miss a done_head update.
+ * IRQ/NMI can happen here, which means we can miss a head update.
*/
- cpu = atomic_xchg(&data->lock, -1);
- WARN_ON_ONCE(cpu != smp_processor_id());
+ if (!local_dec_and_test(&data->nest))
+ goto out;
/*
- * Therefore we have to validate we did not indeed do so.
+ * Publish the known good head. Rely on the full barrier implied
+ * by atomic_dec_and_test() order the data->head read and this
+ * write.
*/
- if (unlikely(atomic_long_read(&data->done_head))) {
- /*
- * Since we had it locked, we can lock it again.
- */
- while (atomic_cmpxchg(&data->lock, -1, cpu) != -1)
- cpu_relax();
+ data->user_page->data_head = head;
+ /*
+ * Now check if we missed an update, rely on the (compiler)
+ * barrier in atomic_dec_and_test() to re-read data->head.
+ */
+ if (unlikely(head != local_read(&data->head))) {
+ local_inc(&data->nest);
goto again;
}
- if (atomic_xchg(&data->wakeup, 0))
+ if (handle->wakeup != local_read(&data->wakeup))
perf_output_wakeup(handle);
-out:
- put_cpu();
+
+ out:
+ preempt_enable();
}
-void perf_output_copy(struct perf_output_handle *handle,
+__always_inline void perf_output_copy(struct perf_output_handle *handle,
const void *buf, unsigned int len)
{
- unsigned int pages_mask;