diff options
Diffstat (limited to 'drivers/oprofile/cpu_buffer.c')
| -rw-r--r-- | drivers/oprofile/cpu_buffer.c | 157 |
1 files changed, 74 insertions, 83 deletions
diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c index 242257b1944..8aa73fac6ad 100644 --- a/drivers/oprofile/cpu_buffer.c +++ b/drivers/oprofile/cpu_buffer.c @@ -21,7 +21,6 @@ #include <linux/sched.h> #include <linux/oprofile.h> -#include <linux/vmalloc.h> #include <linux/errno.h> #include "event_buffer.h" @@ -31,24 +30,8 @@ #define OP_BUFFER_FLAGS 0 -/* - * Read and write access is using spin locking. Thus, writing to the - * buffer by NMI handler (x86) could occur also during critical - * sections when reading the buffer. To avoid this, there are 2 - * buffers for independent read and write access. Read access is in - * process context only, write access only in the NMI handler. If the - * read buffer runs empty, both buffers are swapped atomically. There - * is potentially a small window during swapping where the buffers are - * disabled and samples could be lost. - * - * Using 2 buffers is a little bit overhead, but the solution is clear - * and does not require changes in the ring buffer implementation. It - * can be changed to a single buffer solution when the ring buffer - * access is implemented as non-locking atomic code. - */ -static struct ring_buffer *op_ring_buffer_read; -static struct ring_buffer *op_ring_buffer_write; -DEFINE_PER_CPU(struct oprofile_cpu_buffer, cpu_buffer); +static struct ring_buffer *op_ring_buffer; +DEFINE_PER_CPU(struct oprofile_cpu_buffer, op_cpu_buffer); static void wq_sync_buffer(struct work_struct *work); @@ -62,20 +45,16 @@ unsigned long oprofile_get_cpu_buffer_size(void) void oprofile_cpu_buffer_inc_smpl_lost(void) { - struct oprofile_cpu_buffer *cpu_buf - = &__get_cpu_var(cpu_buffer); + struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(op_cpu_buffer); cpu_buf->sample_lost_overflow++; } void free_cpu_buffers(void) { - if (op_ring_buffer_read) - ring_buffer_free(op_ring_buffer_read); - op_ring_buffer_read = NULL; - if (op_ring_buffer_write) - ring_buffer_free(op_ring_buffer_write); - op_ring_buffer_write = NULL; + if (op_ring_buffer) + ring_buffer_free(op_ring_buffer); + op_ring_buffer = NULL; } #define RB_EVENT_HDR_SIZE 4 @@ -88,15 +67,12 @@ int alloc_cpu_buffers(void) unsigned long byte_size = buffer_size * (sizeof(struct op_sample) + RB_EVENT_HDR_SIZE); - op_ring_buffer_read = ring_buffer_alloc(byte_size, OP_BUFFER_FLAGS); - if (!op_ring_buffer_read) - goto fail; - op_ring_buffer_write = ring_buffer_alloc(byte_size, OP_BUFFER_FLAGS); - if (!op_ring_buffer_write) + op_ring_buffer = ring_buffer_alloc(byte_size, OP_BUFFER_FLAGS); + if (!op_ring_buffer) goto fail; for_each_possible_cpu(i) { - struct oprofile_cpu_buffer *b = &per_cpu(cpu_buffer, i); + struct oprofile_cpu_buffer *b = &per_cpu(op_cpu_buffer, i); b->last_task = NULL; b->last_is_kernel = -1; @@ -123,7 +99,7 @@ void start_cpu_work(void) work_enabled = 1; for_each_online_cpu(i) { - struct oprofile_cpu_buffer *b = &per_cpu(cpu_buffer, i); + struct oprofile_cpu_buffer *b = &per_cpu(op_cpu_buffer, i); /* * Spread the work by 1 jiffy per cpu so they dont all @@ -135,17 +111,19 @@ void start_cpu_work(void) void end_cpu_work(void) { - int i; - work_enabled = 0; +} + +void flush_cpu_work(void) +{ + int i; for_each_online_cpu(i) { - struct oprofile_cpu_buffer *b = &per_cpu(cpu_buffer, i); + struct oprofile_cpu_buffer *b = &per_cpu(op_cpu_buffer, i); - cancel_delayed_work(&b->work); + /* these works are per-cpu, no need for flush_sync */ + flush_delayed_work(&b->work); } - - flush_scheduled_work(); } /* @@ -164,16 +142,11 @@ struct op_sample *op_cpu_buffer_write_reserve(struct op_entry *entry, unsigned long size) { entry->event = ring_buffer_lock_reserve - (op_ring_buffer_write, sizeof(struct op_sample) + + (op_ring_buffer, sizeof(struct op_sample) + size * sizeof(entry->sample->data[0])); - if (entry->event) - entry->sample = ring_buffer_event_data(entry->event); - else - entry->sample = NULL; - - if (!entry->sample) + if (!entry->event) return NULL; - + entry->sample = ring_buffer_event_data(entry->event); entry->size = size; entry->data = entry->sample->data; @@ -182,25 +155,16 @@ struct op_sample int op_cpu_buffer_write_commit(struct op_entry *entry) { - return ring_buffer_unlock_commit(op_ring_buffer_write, entry->event); + return ring_buffer_unlock_commit(op_ring_buffer, entry->event); } struct op_sample *op_cpu_buffer_read_entry(struct op_entry *entry, int cpu) { struct ring_buffer_event *e; - e = ring_buffer_consume(op_ring_buffer_read, cpu, NULL); - if (e) - goto event; - if (ring_buffer_swap_cpu(op_ring_buffer_read, - op_ring_buffer_write, - cpu)) + e = ring_buffer_consume(op_ring_buffer, cpu, NULL, NULL); + if (!e) return NULL; - e = ring_buffer_consume(op_ring_buffer_read, cpu, NULL); - if (e) - goto event; - return NULL; -event: entry->event = e; entry->sample = ring_buffer_event_data(e); entry->size = (ring_buffer_event_length(e) - sizeof(struct op_sample)) @@ -211,8 +175,7 @@ event: unsigned long op_cpu_buffer_entries(int cpu) { - return ring_buffer_entries_cpu(op_ring_buffer_read, cpu) - + ring_buffer_entries_cpu(op_ring_buffer_write, cpu); + return ring_buffer_entries_cpu(op_ring_buffer, cpu); } static int @@ -295,8 +258,10 @@ op_add_sample(struct oprofile_cpu_buffer *cpu_buf, */ static int log_sample(struct oprofile_cpu_buffer *cpu_buf, unsigned long pc, - unsigned long backtrace, int is_kernel, unsigned long event) + unsigned long backtrace, int is_kernel, unsigned long event, + struct task_struct *task) { + struct task_struct *tsk = task ? task : current; cpu_buf->sample_received++; if (pc == ESCAPE_CODE) { @@ -304,7 +269,7 @@ log_sample(struct oprofile_cpu_buffer *cpu_buf, unsigned long pc, return 0; } - if (op_add_code(cpu_buf, backtrace, is_kernel, current)) + if (op_add_code(cpu_buf, backtrace, is_kernel, tsk)) goto fail; if (op_add_sample(cpu_buf, pc, event)) @@ -329,16 +294,17 @@ static inline void oprofile_end_trace(struct oprofile_cpu_buffer *cpu_buf) static inline void __oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs, - unsigned long event, int is_kernel) + unsigned long event, int is_kernel, + struct task_struct *task) { - struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer); + struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(op_cpu_buffer); unsigned long backtrace = oprofile_backtrace_depth; /* * if log_sample() fail we can't backtrace since we lost the * source of this event */ - if (!log_sample(cpu_buf, pc, backtrace, is_kernel, event)) + if (!log_sample(cpu_buf, pc, backtrace, is_kernel, event, task)) /* failed */ return; @@ -350,18 +316,33 @@ __oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs, oprofile_end_trace(cpu_buf); } +void oprofile_add_ext_hw_sample(unsigned long pc, struct pt_regs * const regs, + unsigned long event, int is_kernel, + struct task_struct *task) +{ + __oprofile_add_ext_sample(pc, regs, event, is_kernel, task); +} + void oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs, unsigned long event, int is_kernel) { - __oprofile_add_ext_sample(pc, regs, event, is_kernel); + __oprofile_add_ext_sample(pc, regs, event, is_kernel, NULL); } void oprofile_add_sample(struct pt_regs * const regs, unsigned long event) { - int is_kernel = !user_mode(regs); - unsigned long pc = profile_pc(regs); + int is_kernel; + unsigned long pc; + + if (likely(regs)) { + is_kernel = !user_mode(regs); + pc = profile_pc(regs); + } else { + is_kernel = 0; /* This value will not be used */ + pc = ESCAPE_CODE; /* as this causes an early return. */ + } - __oprofile_add_ext_sample(pc, regs, event, is_kernel); + __oprofile_add_ext_sample(pc, regs, event, is_kernel, NULL); } /* @@ -376,7 +357,7 @@ oprofile_write_reserve(struct op_entry *entry, struct pt_regs * const regs, { struct op_sample *sample; int is_kernel = !user_mode(regs); - struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer); + struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(op_cpu_buffer); cpu_buf->sample_received++; @@ -407,6 +388,21 @@ int oprofile_add_data(struct op_entry *entry, unsigned long val) return op_cpu_buffer_add_data(entry, val); } +int oprofile_add_data64(struct op_entry *entry, u64 val) +{ + if (!entry->event) + return 0; + if (op_cpu_buffer_get_size(entry) < 2) + /* + * the function returns 0 to indicate a too small + * buffer, even if there is some space left + */ + return 0; + if (!op_cpu_buffer_add_data(entry, (u32)val)) + return 0; + return op_cpu_buffer_add_data(entry, (u32)(val >> 32)); +} + int oprofile_write_commit(struct op_entry *entry) { if (!entry->event) @@ -416,13 +412,13 @@ int oprofile_write_commit(struct op_entry *entry) void oprofile_add_pc(unsigned long pc, int is_kernel, unsigned long event) { - struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer); - log_sample(cpu_buf, pc, 0, is_kernel, event); + struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(op_cpu_buffer); + log_sample(cpu_buf, pc, 0, is_kernel, event, NULL); } void oprofile_add_trace(unsigned long pc) { - struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer); + struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(op_cpu_buffer); if (!cpu_buf->tracing) return; @@ -455,14 +451,9 @@ static void wq_sync_buffer(struct work_struct *work) { struct oprofile_cpu_buffer *b = container_of(work, struct oprofile_cpu_buffer, work.work); - if (b->cpu != smp_processor_id()) { - printk(KERN_DEBUG "WQ on CPU%d, prefer CPU%d\n", - smp_processor_id(), b->cpu); - - if (!cpu_online(b->cpu)) { - cancel_delayed_work(&b->work); - return; - } + if (b->cpu != smp_processor_id() && !cpu_online(b->cpu)) { + cancel_delayed_work(&b->work); + return; } sync_buffer(b->cpu); |
