From ff6f05416ece2caec1a7a1f8180d6598e0ab9272 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Fri, 9 Jan 2009 16:19:25 +1100
Subject: perf_counter: Fix return value from dummy hw_perf_counter_init

Impact: fix oops-causing bug

Currently, if you try to use perf_counters on an architecture that has
no hardware support, and you select an event that doesn't map to any of
the defined software counters, you get an oops rather than an error.
This is because the dummy hw_perf_counter_init returns ERR_PTR(-EINVAL)
but the caller (perf_counter_alloc) only tests for NULL.

This makes the dummy hw_perf_counter_init return NULL instead.

Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 kernel/perf_counter.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 37f771691f9..4be1a8d872b 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -41,7 +41,7 @@ static DEFINE_MUTEX(perf_resource_mutex);
 extern __weak const struct hw_perf_counter_ops *
 hw_perf_counter_init(struct perf_counter *counter)
 {
-	return ERR_PTR(-EINVAL);
+	return NULL;
 }
 
 u64 __weak hw_perf_save_disable(void)		{ return 0; }
-- 
cgit v1.2.3-18-g5258


From 9abf8a08bc8f18a3b125f834f00e2e71b49c15d2 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Fri, 9 Jan 2009 16:26:43 +1100
Subject: perf_counter: Fix the cpu_clock software counter

Impact: bug fix

Currently if you do (e.g.) timec -e -1 ls, it will report 0 for the
value of the cpu_clock counter.  The reason is that the core assumes
that a counter's count field is up-to-date when the counter is inactive,
and doesn't call the counter's read function.  However, the cpu_clock
counter code only updates the count in the read function.

This fixes it by making both the read and disable functions update the
count.  It also makes the counter ignore time passing while the counter
is disabled, by making the enable function update the hw.prev_count field.

Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 kernel/perf_counter.c | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 4be1a8d872b..b7a027a2ef0 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -928,18 +928,32 @@ static const struct file_operations perf_fops = {
 
 static int cpu_clock_perf_counter_enable(struct perf_counter *counter)
 {
+	int cpu = raw_smp_processor_id();
+
+	atomic64_set(&counter->hw.prev_count, cpu_clock(cpu));
 	return 0;
 }
 
+static void cpu_clock_perf_counter_update(struct perf_counter *counter)
+{
+	int cpu = raw_smp_processor_id();
+	s64 prev;
+	u64 now;
+
+	now = cpu_clock(cpu);
+	prev = atomic64_read(&counter->hw.prev_count);
+	atomic64_set(&counter->hw.prev_count, now);
+	atomic64_add(now - prev, &counter->count);
+}
+
 static void cpu_clock_perf_counter_disable(struct perf_counter *counter)
 {
+	cpu_clock_perf_counter_update(counter);
 }
 
 static void cpu_clock_perf_counter_read(struct perf_counter *counter)
 {
-	int cpu = raw_smp_processor_id();
-
-	atomic64_set(&counter->count, cpu_clock(cpu));
+	cpu_clock_perf_counter_update(counter);
 }
 
 static const struct hw_perf_counter_ops perf_ops_cpu_clock = {
-- 
cgit v1.2.3-18-g5258


From 3cbed429a9ccdb7a243f733b1056fe5c39e9004c Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Fri, 9 Jan 2009 16:43:42 +1100
Subject: perf_counter: Add optional hw_perf_group_sched_in arch function

Impact: extend perf_counter infrastructure

This adds an optional hw_perf_group_sched_in() arch function that enables
a whole group of counters in one go.  It returns 1 if it added the group
successfully, 0 if it did nothing (and therefore the core needs to add
the counters individually), or a negative number if an error occurred.
It should add all the counters and enable any software counters in the
group, or else add none of them and return an error.

There are a couple of related changes/improvements in the group handling
here:

* As an optimization, group_sched_out() and group_sched_in() now check the
  state of the group leader, and do nothing if the leader is not active
  or disabled.

* We now call hw_perf_save_disable/hw_perf_restore around the complete
  set of counter enable/disable calls in __perf_counter_sched_in/out,
  to give the arch code the opportunity to defer updating the hardware
  state until the hw_perf_restore call if it wants.

* We no longer stop adding groups after we get to a group that has more
  than one counter.  We will ultimately add an option for a group to be
  exclusive.  The current code doesn't really implement exclusive groups
  anyway, since a group could end up going on with other counters that
  get added before it.

Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 kernel/perf_counter.c | 31 ++++++++++++++++++++++++++-----
 1 file changed, 26 insertions(+), 5 deletions(-)

(limited to 'kernel')

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index b7a027a2ef0..9ad11e44d9a 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -47,6 +47,12 @@ hw_perf_counter_init(struct perf_counter *counter)
 u64 __weak hw_perf_save_disable(void)		{ return 0; }
 void __weak hw_perf_restore(u64 ctrl)		{ barrier(); }
 void __weak hw_perf_counter_setup(void)		{ barrier(); }
+int __weak hw_perf_group_sched_in(struct perf_counter *group_leader,
+	       struct perf_cpu_context *cpuctx,
+	       struct perf_counter_context *ctx, int cpu)
+{
+	return 0;
+}
 
 static void
 list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
@@ -341,6 +347,9 @@ group_sched_out(struct perf_counter *group_counter,
 {
 	struct perf_counter *counter;
 
+	if (group_counter->state != PERF_COUNTER_STATE_ACTIVE)
+		return;
+
 	counter_sched_out(group_counter, cpuctx, ctx);
 
 	/*
@@ -354,15 +363,18 @@ void __perf_counter_sched_out(struct perf_counter_context *ctx,
 			      struct perf_cpu_context *cpuctx)
 {
 	struct perf_counter *counter;
+	u64 flags;
 
 	if (likely(!ctx->nr_counters))
 		return;
 
 	spin_lock(&ctx->lock);
+	flags = hw_perf_save_disable();
 	if (ctx->nr_active) {
 		list_for_each_entry(counter, &ctx->counter_list, list_entry)
 			group_sched_out(counter, cpuctx, ctx);
 	}
+	hw_perf_restore(flags);
 	spin_unlock(&ctx->lock);
 }
 
@@ -402,7 +414,14 @@ group_sched_in(struct perf_counter *group_counter,
 	       int cpu)
 {
 	struct perf_counter *counter, *partial_group;
-	int ret = 0;
+	int ret;
+
+	if (group_counter->state == PERF_COUNTER_STATE_OFF)
+		return 0;
+
+	ret = hw_perf_group_sched_in(group_counter, cpuctx, ctx, cpu);
+	if (ret)
+		return ret < 0 ? ret : 0;
 
 	if (counter_sched_in(group_counter, cpuctx, ctx, cpu))
 		return -EAGAIN;
@@ -415,10 +434,9 @@ group_sched_in(struct perf_counter *group_counter,
 			partial_group = counter;
 			goto group_error;
 		}
-		ret = -EAGAIN;
 	}
 
-	return ret;
+	return 0;
 
 group_error:
 	/*
@@ -440,11 +458,13 @@ __perf_counter_sched_in(struct perf_counter_context *ctx,
 			struct perf_cpu_context *cpuctx, int cpu)
 {
 	struct perf_counter *counter;
+	u64 flags;
 
 	if (likely(!ctx->nr_counters))
 		return;
 
 	spin_lock(&ctx->lock);
+	flags = hw_perf_save_disable();
 	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
 		/*
 		 * Listen to the 'cpu' scheduling filter constraint
@@ -454,12 +474,13 @@ __perf_counter_sched_in(struct perf_counter_context *ctx,
 			continue;
 
 		/*
-		 * If we scheduled in a group atomically and
-		 * exclusively, break out:
+		 * If we scheduled in a group atomically and exclusively,
+		 * or if this group can't go on, break out:
 		 */
 		if (group_sched_in(counter, cpuctx, ctx, cpu))
 			break;
 	}
+	hw_perf_restore(flags);
 	spin_unlock(&ctx->lock);
 }
 
-- 
cgit v1.2.3-18-g5258


From 4eb96fcfe07b7f2a05577e57533840f8e26bea53 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Fri, 9 Jan 2009 17:24:34 +1100
Subject: perf_counter: Add dummy perf_counter_print_debug function

Impact: minimize requirements on architectures

Currently, an architecture just enabling CONFIG_PERF_COUNTERS but not
providing any extra functions will fail to build with
perf_counter_print_debug being undefined, since we don't provide an
empty dummy definition like we do with the hw_perf_* functions.

This provides an empty dummy perf_counter_print_debug() to make it
easier for architectures to turn on CONFIG_PERF_COUNTERS.

Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 kernel/perf_counter.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'kernel')

diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index 9ad11e44d9a..4c0dccb756a 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -54,6 +54,8 @@ int __weak hw_perf_group_sched_in(struct perf_counter *group_leader,
 	return 0;
 }
 
+void __weak perf_counter_print_debug(void)	{ }
+
 static void
 list_add_counter(struct perf_counter *counter, struct perf_counter_context *ctx)
 {
-- 
cgit v1.2.3-18-g5258