aboutsummaryrefslogtreecommitdiff
path: root/drivers/oprofile/cpu_buffer.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/oprofile/cpu_buffer.c')
-rw-r--r--drivers/oprofile/cpu_buffer.c157
1 files changed, 74 insertions, 83 deletions
diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c
index 242257b1944..8aa73fac6ad 100644
--- a/drivers/oprofile/cpu_buffer.c
+++ b/drivers/oprofile/cpu_buffer.c
@@ -21,7 +21,6 @@
#include <linux/sched.h>
#include <linux/oprofile.h>
-#include <linux/vmalloc.h>
#include <linux/errno.h>
#include "event_buffer.h"
@@ -31,24 +30,8 @@
#define OP_BUFFER_FLAGS 0
-/*
- * Read and write access is using spin locking. Thus, writing to the
- * buffer by NMI handler (x86) could occur also during critical
- * sections when reading the buffer. To avoid this, there are 2
- * buffers for independent read and write access. Read access is in
- * process context only, write access only in the NMI handler. If the
- * read buffer runs empty, both buffers are swapped atomically. There
- * is potentially a small window during swapping where the buffers are
- * disabled and samples could be lost.
- *
- * Using 2 buffers is a little bit overhead, but the solution is clear
- * and does not require changes in the ring buffer implementation. It
- * can be changed to a single buffer solution when the ring buffer
- * access is implemented as non-locking atomic code.
- */
-static struct ring_buffer *op_ring_buffer_read;
-static struct ring_buffer *op_ring_buffer_write;
-DEFINE_PER_CPU(struct oprofile_cpu_buffer, cpu_buffer);
+static struct ring_buffer *op_ring_buffer;
+DEFINE_PER_CPU(struct oprofile_cpu_buffer, op_cpu_buffer);
static void wq_sync_buffer(struct work_struct *work);
@@ -62,20 +45,16 @@ unsigned long oprofile_get_cpu_buffer_size(void)
void oprofile_cpu_buffer_inc_smpl_lost(void)
{
- struct oprofile_cpu_buffer *cpu_buf
- = &__get_cpu_var(cpu_buffer);
+ struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(op_cpu_buffer);
cpu_buf->sample_lost_overflow++;
}
void free_cpu_buffers(void)
{
- if (op_ring_buffer_read)
- ring_buffer_free(op_ring_buffer_read);
- op_ring_buffer_read = NULL;
- if (op_ring_buffer_write)
- ring_buffer_free(op_ring_buffer_write);
- op_ring_buffer_write = NULL;
+ if (op_ring_buffer)
+ ring_buffer_free(op_ring_buffer);
+ op_ring_buffer = NULL;
}
#define RB_EVENT_HDR_SIZE 4
@@ -88,15 +67,12 @@ int alloc_cpu_buffers(void)
unsigned long byte_size = buffer_size * (sizeof(struct op_sample) +
RB_EVENT_HDR_SIZE);
- op_ring_buffer_read = ring_buffer_alloc(byte_size, OP_BUFFER_FLAGS);
- if (!op_ring_buffer_read)
- goto fail;
- op_ring_buffer_write = ring_buffer_alloc(byte_size, OP_BUFFER_FLAGS);
- if (!op_ring_buffer_write)
+ op_ring_buffer = ring_buffer_alloc(byte_size, OP_BUFFER_FLAGS);
+ if (!op_ring_buffer)
goto fail;
for_each_possible_cpu(i) {
- struct oprofile_cpu_buffer *b = &per_cpu(cpu_buffer, i);
+ struct oprofile_cpu_buffer *b = &per_cpu(op_cpu_buffer, i);
b->last_task = NULL;
b->last_is_kernel = -1;
@@ -123,7 +99,7 @@ void start_cpu_work(void)
work_enabled = 1;
for_each_online_cpu(i) {
- struct oprofile_cpu_buffer *b = &per_cpu(cpu_buffer, i);
+ struct oprofile_cpu_buffer *b = &per_cpu(op_cpu_buffer, i);
/*
* Spread the work by 1 jiffy per cpu so they dont all
@@ -135,17 +111,19 @@ void start_cpu_work(void)
void end_cpu_work(void)
{
- int i;
-
work_enabled = 0;
+}
+
+void flush_cpu_work(void)
+{
+ int i;
for_each_online_cpu(i) {
- struct oprofile_cpu_buffer *b = &per_cpu(cpu_buffer, i);
+ struct oprofile_cpu_buffer *b = &per_cpu(op_cpu_buffer, i);
- cancel_delayed_work(&b->work);
+ /* these works are per-cpu, no need for flush_sync */
+ flush_delayed_work(&b->work);
}
-
- flush_scheduled_work();
}
/*
@@ -164,16 +142,11 @@ struct op_sample
*op_cpu_buffer_write_reserve(struct op_entry *entry, unsigned long size)
{
entry->event = ring_buffer_lock_reserve
- (op_ring_buffer_write, sizeof(struct op_sample) +
+ (op_ring_buffer, sizeof(struct op_sample) +
size * sizeof(entry->sample->data[0]));
- if (entry->event)
- entry->sample = ring_buffer_event_data(entry->event);
- else
- entry->sample = NULL;
-
- if (!entry->sample)
+ if (!entry->event)
return NULL;
-
+ entry->sample = ring_buffer_event_data(entry->event);
entry->size = size;
entry->data = entry->sample->data;
@@ -182,25 +155,16 @@ struct op_sample
int op_cpu_buffer_write_commit(struct op_entry *entry)
{
- return ring_buffer_unlock_commit(op_ring_buffer_write, entry->event);
+ return ring_buffer_unlock_commit(op_ring_buffer, entry->event);
}
struct op_sample *op_cpu_buffer_read_entry(struct op_entry *entry, int cpu)
{
struct ring_buffer_event *e;
- e = ring_buffer_consume(op_ring_buffer_read, cpu, NULL);
- if (e)
- goto event;
- if (ring_buffer_swap_cpu(op_ring_buffer_read,
- op_ring_buffer_write,
- cpu))
+ e = ring_buffer_consume(op_ring_buffer, cpu, NULL, NULL);
+ if (!e)
return NULL;
- e = ring_buffer_consume(op_ring_buffer_read, cpu, NULL);
- if (e)
- goto event;
- return NULL;
-event:
entry->event = e;
entry->sample = ring_buffer_event_data(e);
entry->size = (ring_buffer_event_length(e) - sizeof(struct op_sample))
@@ -211,8 +175,7 @@ event:
unsigned long op_cpu_buffer_entries(int cpu)
{
- return ring_buffer_entries_cpu(op_ring_buffer_read, cpu)
- + ring_buffer_entries_cpu(op_ring_buffer_write, cpu);
+ return ring_buffer_entries_cpu(op_ring_buffer, cpu);
}
static int
@@ -295,8 +258,10 @@ op_add_sample(struct oprofile_cpu_buffer *cpu_buf,
*/
static int
log_sample(struct oprofile_cpu_buffer *cpu_buf, unsigned long pc,
- unsigned long backtrace, int is_kernel, unsigned long event)
+ unsigned long backtrace, int is_kernel, unsigned long event,
+ struct task_struct *task)
{
+ struct task_struct *tsk = task ? task : current;
cpu_buf->sample_received++;
if (pc == ESCAPE_CODE) {
@@ -304,7 +269,7 @@ log_sample(struct oprofile_cpu_buffer *cpu_buf, unsigned long pc,
return 0;
}
- if (op_add_code(cpu_buf, backtrace, is_kernel, current))
+ if (op_add_code(cpu_buf, backtrace, is_kernel, tsk))
goto fail;
if (op_add_sample(cpu_buf, pc, event))
@@ -329,16 +294,17 @@ static inline void oprofile_end_trace(struct oprofile_cpu_buffer *cpu_buf)
static inline void
__oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs,
- unsigned long event, int is_kernel)
+ unsigned long event, int is_kernel,
+ struct task_struct *task)
{
- struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer);
+ struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(op_cpu_buffer);
unsigned long backtrace = oprofile_backtrace_depth;
/*
* if log_sample() fail we can't backtrace since we lost the
* source of this event
*/
- if (!log_sample(cpu_buf, pc, backtrace, is_kernel, event))
+ if (!log_sample(cpu_buf, pc, backtrace, is_kernel, event, task))
/* failed */
return;
@@ -350,18 +316,33 @@ __oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs,
oprofile_end_trace(cpu_buf);
}
+void oprofile_add_ext_hw_sample(unsigned long pc, struct pt_regs * const regs,
+ unsigned long event, int is_kernel,
+ struct task_struct *task)
+{
+ __oprofile_add_ext_sample(pc, regs, event, is_kernel, task);
+}
+
void oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs,
unsigned long event, int is_kernel)
{
- __oprofile_add_ext_sample(pc, regs, event, is_kernel);
+ __oprofile_add_ext_sample(pc, regs, event, is_kernel, NULL);
}
void oprofile_add_sample(struct pt_regs * const regs, unsigned long event)
{
- int is_kernel = !user_mode(regs);
- unsigned long pc = profile_pc(regs);
+ int is_kernel;
+ unsigned long pc;
+
+ if (likely(regs)) {
+ is_kernel = !user_mode(regs);
+ pc = profile_pc(regs);
+ } else {
+ is_kernel = 0; /* This value will not be used */
+ pc = ESCAPE_CODE; /* as this causes an early return. */
+ }
- __oprofile_add_ext_sample(pc, regs, event, is_kernel);
+ __oprofile_add_ext_sample(pc, regs, event, is_kernel, NULL);
}
/*
@@ -376,7 +357,7 @@ oprofile_write_reserve(struct op_entry *entry, struct pt_regs * const regs,
{
struct op_sample *sample;
int is_kernel = !user_mode(regs);
- struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer);
+ struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(op_cpu_buffer);
cpu_buf->sample_received++;
@@ -407,6 +388,21 @@ int oprofile_add_data(struct op_entry *entry, unsigned long val)
return op_cpu_buffer_add_data(entry, val);
}
+int oprofile_add_data64(struct op_entry *entry, u64 val)
+{
+ if (!entry->event)
+ return 0;
+ if (op_cpu_buffer_get_size(entry) < 2)
+ /*
+ * the function returns 0 to indicate a too small
+ * buffer, even if there is some space left
+ */
+ return 0;
+ if (!op_cpu_buffer_add_data(entry, (u32)val))
+ return 0;
+ return op_cpu_buffer_add_data(entry, (u32)(val >> 32));
+}
+
int oprofile_write_commit(struct op_entry *entry)
{
if (!entry->event)
@@ -416,13 +412,13 @@ int oprofile_write_commit(struct op_entry *entry)
void oprofile_add_pc(unsigned long pc, int is_kernel, unsigned long event)
{
- struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer);
- log_sample(cpu_buf, pc, 0, is_kernel, event);
+ struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(op_cpu_buffer);
+ log_sample(cpu_buf, pc, 0, is_kernel, event, NULL);
}
void oprofile_add_trace(unsigned long pc)
{
- struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer);
+ struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(op_cpu_buffer);
if (!cpu_buf->tracing)
return;
@@ -455,14 +451,9 @@ static void wq_sync_buffer(struct work_struct *work)
{
struct oprofile_cpu_buffer *b =
container_of(work, struct oprofile_cpu_buffer, work.work);
- if (b->cpu != smp_processor_id()) {
- printk(KERN_DEBUG "WQ on CPU%d, prefer CPU%d\n",
- smp_processor_id(), b->cpu);
-
- if (!cpu_online(b->cpu)) {
- cancel_delayed_work(&b->work);
- return;
- }
+ if (b->cpu != smp_processor_id() && !cpu_online(b->cpu)) {
+ cancel_delayed_work(&b->work);
+ return;
}
sync_buffer(b->cpu);