aboutsummaryrefslogtreecommitdiff
path: root/drivers/oprofile/cpu_buffer.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/oprofile/cpu_buffer.c')
-rw-r--r--drivers/oprofile/cpu_buffer.c122
1 files changed, 50 insertions, 72 deletions
diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c
index 166b67ea622..8aa73fac6ad 100644
--- a/drivers/oprofile/cpu_buffer.c
+++ b/drivers/oprofile/cpu_buffer.c
@@ -30,23 +30,7 @@
#define OP_BUFFER_FLAGS 0
-/*
- * Read and write access is using spin locking. Thus, writing to the
- * buffer by NMI handler (x86) could occur also during critical
- * sections when reading the buffer. To avoid this, there are 2
- * buffers for independent read and write access. Read access is in
- * process context only, write access only in the NMI handler. If the
- * read buffer runs empty, both buffers are swapped atomically. There
- * is potentially a small window during swapping where the buffers are
- * disabled and samples could be lost.
- *
- * Using 2 buffers is a little bit overhead, but the solution is clear
- * and does not require changes in the ring buffer implementation. It
- * can be changed to a single buffer solution when the ring buffer
- * access is implemented as non-locking atomic code.
- */
-static struct ring_buffer *op_ring_buffer_read;
-static struct ring_buffer *op_ring_buffer_write;
+static struct ring_buffer *op_ring_buffer;
DEFINE_PER_CPU(struct oprofile_cpu_buffer, op_cpu_buffer);
static void wq_sync_buffer(struct work_struct *work);
@@ -68,12 +52,9 @@ void oprofile_cpu_buffer_inc_smpl_lost(void)
void free_cpu_buffers(void)
{
- if (op_ring_buffer_read)
- ring_buffer_free(op_ring_buffer_read);
- op_ring_buffer_read = NULL;
- if (op_ring_buffer_write)
- ring_buffer_free(op_ring_buffer_write);
- op_ring_buffer_write = NULL;
+ if (op_ring_buffer)
+ ring_buffer_free(op_ring_buffer);
+ op_ring_buffer = NULL;
}
#define RB_EVENT_HDR_SIZE 4
@@ -86,11 +67,8 @@ int alloc_cpu_buffers(void)
unsigned long byte_size = buffer_size * (sizeof(struct op_sample) +
RB_EVENT_HDR_SIZE);
- op_ring_buffer_read = ring_buffer_alloc(byte_size, OP_BUFFER_FLAGS);
- if (!op_ring_buffer_read)
- goto fail;
- op_ring_buffer_write = ring_buffer_alloc(byte_size, OP_BUFFER_FLAGS);
- if (!op_ring_buffer_write)
+ op_ring_buffer = ring_buffer_alloc(byte_size, OP_BUFFER_FLAGS);
+ if (!op_ring_buffer)
goto fail;
for_each_possible_cpu(i) {
@@ -133,17 +111,19 @@ void start_cpu_work(void)
void end_cpu_work(void)
{
- int i;
-
work_enabled = 0;
+}
+
+void flush_cpu_work(void)
+{
+ int i;
for_each_online_cpu(i) {
struct oprofile_cpu_buffer *b = &per_cpu(op_cpu_buffer, i);
- cancel_delayed_work(&b->work);
+ /* these works are per-cpu, no need for flush_sync */
+ flush_delayed_work(&b->work);
}
-
- flush_scheduled_work();
}
/*
@@ -162,16 +142,11 @@ struct op_sample
*op_cpu_buffer_write_reserve(struct op_entry *entry, unsigned long size)
{
entry->event = ring_buffer_lock_reserve
- (op_ring_buffer_write, sizeof(struct op_sample) +
+ (op_ring_buffer, sizeof(struct op_sample) +
size * sizeof(entry->sample->data[0]));
- if (entry->event)
- entry->sample = ring_buffer_event_data(entry->event);
- else
- entry->sample = NULL;
-
- if (!entry->sample)
+ if (!entry->event)
return NULL;
-
+ entry->sample = ring_buffer_event_data(entry->event);
entry->size = size;
entry->data = entry->sample->data;
@@ -180,25 +155,16 @@ struct op_sample
int op_cpu_buffer_write_commit(struct op_entry *entry)
{
- return ring_buffer_unlock_commit(op_ring_buffer_write, entry->event);
+ return ring_buffer_unlock_commit(op_ring_buffer, entry->event);
}
struct op_sample *op_cpu_buffer_read_entry(struct op_entry *entry, int cpu)
{
struct ring_buffer_event *e;
- e = ring_buffer_consume(op_ring_buffer_read, cpu, NULL);
- if (e)
- goto event;
- if (ring_buffer_swap_cpu(op_ring_buffer_read,
- op_ring_buffer_write,
- cpu))
+ e = ring_buffer_consume(op_ring_buffer, cpu, NULL, NULL);
+ if (!e)
return NULL;
- e = ring_buffer_consume(op_ring_buffer_read, cpu, NULL);
- if (e)
- goto event;
- return NULL;
-event:
entry->event = e;
entry->sample = ring_buffer_event_data(e);
entry->size = (ring_buffer_event_length(e) - sizeof(struct op_sample))
@@ -209,8 +175,7 @@ event:
unsigned long op_cpu_buffer_entries(int cpu)
{
- return ring_buffer_entries_cpu(op_ring_buffer_read, cpu)
- + ring_buffer_entries_cpu(op_ring_buffer_write, cpu);
+ return ring_buffer_entries_cpu(op_ring_buffer, cpu);
}
static int
@@ -293,8 +258,10 @@ op_add_sample(struct oprofile_cpu_buffer *cpu_buf,
*/
static int
log_sample(struct oprofile_cpu_buffer *cpu_buf, unsigned long pc,
- unsigned long backtrace, int is_kernel, unsigned long event)
+ unsigned long backtrace, int is_kernel, unsigned long event,
+ struct task_struct *task)
{
+ struct task_struct *tsk = task ? task : current;
cpu_buf->sample_received++;
if (pc == ESCAPE_CODE) {
@@ -302,7 +269,7 @@ log_sample(struct oprofile_cpu_buffer *cpu_buf, unsigned long pc,
return 0;
}
- if (op_add_code(cpu_buf, backtrace, is_kernel, current))
+ if (op_add_code(cpu_buf, backtrace, is_kernel, tsk))
goto fail;
if (op_add_sample(cpu_buf, pc, event))
@@ -327,7 +294,8 @@ static inline void oprofile_end_trace(struct oprofile_cpu_buffer *cpu_buf)
static inline void
__oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs,
- unsigned long event, int is_kernel)
+ unsigned long event, int is_kernel,
+ struct task_struct *task)
{
struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(op_cpu_buffer);
unsigned long backtrace = oprofile_backtrace_depth;
@@ -336,7 +304,7 @@ __oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs,
* if log_sample() fail we can't backtrace since we lost the
* source of this event
*/
- if (!log_sample(cpu_buf, pc, backtrace, is_kernel, event))
+ if (!log_sample(cpu_buf, pc, backtrace, is_kernel, event, task))
/* failed */
return;
@@ -348,18 +316,33 @@ __oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs,
oprofile_end_trace(cpu_buf);
}
+void oprofile_add_ext_hw_sample(unsigned long pc, struct pt_regs * const regs,
+ unsigned long event, int is_kernel,
+ struct task_struct *task)
+{
+ __oprofile_add_ext_sample(pc, regs, event, is_kernel, task);
+}
+
void oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs,
unsigned long event, int is_kernel)
{
- __oprofile_add_ext_sample(pc, regs, event, is_kernel);
+ __oprofile_add_ext_sample(pc, regs, event, is_kernel, NULL);
}
void oprofile_add_sample(struct pt_regs * const regs, unsigned long event)
{
- int is_kernel = !user_mode(regs);
- unsigned long pc = profile_pc(regs);
+ int is_kernel;
+ unsigned long pc;
+
+ if (likely(regs)) {
+ is_kernel = !user_mode(regs);
+ pc = profile_pc(regs);
+ } else {
+ is_kernel = 0; /* This value will not be used */
+ pc = ESCAPE_CODE; /* as this causes an early return. */
+ }
- __oprofile_add_ext_sample(pc, regs, event, is_kernel);
+ __oprofile_add_ext_sample(pc, regs, event, is_kernel, NULL);
}
/*
@@ -430,7 +413,7 @@ int oprofile_write_commit(struct op_entry *entry)
void oprofile_add_pc(unsigned long pc, int is_kernel, unsigned long event)
{
struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(op_cpu_buffer);
- log_sample(cpu_buf, pc, 0, is_kernel, event);
+ log_sample(cpu_buf, pc, 0, is_kernel, event, NULL);
}
void oprofile_add_trace(unsigned long pc)
@@ -468,14 +451,9 @@ static void wq_sync_buffer(struct work_struct *work)
{
struct oprofile_cpu_buffer *b =
container_of(work, struct oprofile_cpu_buffer, work.work);
- if (b->cpu != smp_processor_id()) {
- printk(KERN_DEBUG "WQ on CPU%d, prefer CPU%d\n",
- smp_processor_id(), b->cpu);
-
- if (!cpu_online(b->cpu)) {
- cancel_delayed_work(&b->work);
- return;
- }
+ if (b->cpu != smp_processor_id() && !cpu_online(b->cpu)) {
+ cancel_delayed_work(&b->work);
+ return;
}
sync_buffer(b->cpu);