diff options
Diffstat (limited to 'drivers/oprofile')
| -rw-r--r-- | drivers/oprofile/buffer_sync.c | 338 | ||||
| -rw-r--r-- | drivers/oprofile/buffer_sync.h | 4 | ||||
| -rw-r--r-- | drivers/oprofile/cpu_buffer.c | 422 | ||||
| -rw-r--r-- | drivers/oprofile/cpu_buffer.h | 88 | ||||
| -rw-r--r-- | drivers/oprofile/event_buffer.c | 84 | ||||
| -rw-r--r-- | drivers/oprofile/event_buffer.h | 19 | ||||
| -rw-r--r-- | drivers/oprofile/nmi_timer_int.c | 176 | ||||
| -rw-r--r-- | drivers/oprofile/oprof.c | 150 | ||||
| -rw-r--r-- | drivers/oprofile/oprof.h | 35 | ||||
| -rw-r--r-- | drivers/oprofile/oprofile_files.c | 136 | ||||
| -rw-r--r-- | drivers/oprofile/oprofile_perf.c | 327 | ||||
| -rw-r--r-- | drivers/oprofile/oprofile_stats.c | 54 | ||||
| -rw-r--r-- | drivers/oprofile/oprofile_stats.h | 14 | ||||
| -rw-r--r-- | drivers/oprofile/oprofilefs.c | 194 | ||||
| -rw-r--r-- | drivers/oprofile/timer_int.c | 100 |
15 files changed, 1571 insertions, 570 deletions
diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c index 9304c455507..d93b2b6b1f7 100644 --- a/drivers/oprofile/buffer_sync.c +++ b/drivers/oprofile/buffer_sync.c @@ -1,10 +1,12 @@ /** * @file buffer_sync.c * - * @remark Copyright 2002 OProfile authors + * @remark Copyright 2002-2009 OProfile authors * @remark Read the file COPYING * * @author John Levon <levon@movementarian.org> + * @author Barry Kasindorf + * @author Robert Richter <robert.richter@amd.com> * * This is the core of the buffer management. Each * CPU buffer is processed and entered into the @@ -28,19 +30,19 @@ #include <linux/fs.h> #include <linux/oprofile.h> #include <linux/sched.h> +#include <linux/gfp.h> #include "oprofile_stats.h" #include "event_buffer.h" #include "cpu_buffer.h" #include "buffer_sync.h" - + static LIST_HEAD(dying_tasks); static LIST_HEAD(dead_tasks); -static cpumask_t marked_cpus = CPU_MASK_NONE; +static cpumask_var_t marked_cpus; static DEFINE_SPINLOCK(task_mortuary); static void process_task_mortuary(void); - /* Take ownership of the task struct and place it on the * list for processing. Only after two full buffer syncs * does the task eventually get freed, because by then @@ -48,10 +50,11 @@ static void process_task_mortuary(void); * Can be invoked from softirq via RCU callback due to * call_rcu() of the task struct, hence the _irqsave. */ -static int task_free_notify(struct notifier_block * self, unsigned long val, void * data) +static int +task_free_notify(struct notifier_block *self, unsigned long val, void *data) { unsigned long flags; - struct task_struct * task = data; + struct task_struct *task = data; spin_lock_irqsave(&task_mortuary, flags); list_add(&task->tasks, &dying_tasks); spin_unlock_irqrestore(&task_mortuary, flags); @@ -62,13 +65,14 @@ static int task_free_notify(struct notifier_block * self, unsigned long val, voi /* The task is on its way out. A sync of the buffer means we can catch * any remaining samples for this task. */ -static int task_exit_notify(struct notifier_block * self, unsigned long val, void * data) +static int +task_exit_notify(struct notifier_block *self, unsigned long val, void *data) { /* To avoid latency problems, we only process the current CPU, * hoping that most samples for the task are on this CPU */ sync_buffer(raw_smp_processor_id()); - return 0; + return 0; } @@ -77,11 +81,12 @@ static int task_exit_notify(struct notifier_block * self, unsigned long val, voi * we don't lose any. This does not have to be exact, it's a QoI issue * only. */ -static int munmap_notify(struct notifier_block * self, unsigned long val, void * data) +static int +munmap_notify(struct notifier_block *self, unsigned long val, void *data) { unsigned long addr = (unsigned long)data; - struct mm_struct * mm = current->mm; - struct vm_area_struct * mpnt; + struct mm_struct *mm = current->mm; + struct vm_area_struct *mpnt; down_read(&mm->mmap_sem); @@ -99,11 +104,12 @@ static int munmap_notify(struct notifier_block * self, unsigned long val, void * return 0; } - + /* We need to be told about new modules so we don't attribute to a previously * loaded module, or drop the samples on the floor. */ -static int module_load_notify(struct notifier_block * self, unsigned long val, void * data) +static int +module_load_notify(struct notifier_block *self, unsigned long val, void *data) { #ifdef CONFIG_MODULES if (val != MODULE_STATE_COMING) @@ -118,7 +124,7 @@ static int module_load_notify(struct notifier_block * self, unsigned long val, v return 0; } - + static struct notifier_block task_free_nb = { .notifier_call = task_free_notify, }; @@ -135,21 +141,19 @@ static struct notifier_block module_load_nb = { .notifier_call = module_load_notify, }; - -static void end_sync(void) +static void free_all_tasks(void) { - end_cpu_work(); /* make sure we don't leak task structs */ process_task_mortuary(); process_task_mortuary(); } - int sync_start(void) { int err; - start_cpu_work(); + if (!zalloc_cpumask_var(&marked_cpus, GFP_KERNEL)) + return -ENOMEM; err = task_handoff_register(&task_free_nb); if (err) @@ -164,6 +168,8 @@ int sync_start(void) if (err) goto out4; + start_cpu_work(); + out: return err; out4: @@ -172,19 +178,26 @@ out3: profile_event_unregister(PROFILE_TASK_EXIT, &task_exit_nb); out2: task_handoff_unregister(&task_free_nb); + free_all_tasks(); out1: - end_sync(); + free_cpumask_var(marked_cpus); goto out; } void sync_stop(void) { + end_cpu_work(); unregister_module_notifier(&module_load_nb); profile_event_unregister(PROFILE_MUNMAP, &munmap_nb); profile_event_unregister(PROFILE_TASK_EXIT, &task_exit_nb); task_handoff_unregister(&task_free_nb); - end_sync(); + barrier(); /* do all of the above first */ + + flush_cpu_work(); + + free_all_tasks(); + free_cpumask_var(marked_cpus); } @@ -196,36 +209,25 @@ static inline unsigned long fast_get_dcookie(struct path *path) { unsigned long cookie; - if (path->dentry->d_cookie) + if (path->dentry->d_flags & DCACHE_COOKIE) return (unsigned long)path->dentry; get_dcookie(path, &cookie); return cookie; } -/* Look up the dcookie for the task's first VM_EXECUTABLE mapping, +/* Look up the dcookie for the task's mm->exe_file, * which corresponds loosely to "application name". This is * not strictly necessary but allows oprofile to associate * shared-library samples with particular applications */ -static unsigned long get_exec_dcookie(struct mm_struct * mm) +static unsigned long get_exec_dcookie(struct mm_struct *mm) { unsigned long cookie = NO_COOKIE; - struct vm_area_struct * vma; - - if (!mm) - goto out; - - for (vma = mm->mmap; vma; vma = vma->vm_next) { - if (!vma->vm_file) - continue; - if (!(vma->vm_flags & VM_EXECUTABLE)) - continue; - cookie = fast_get_dcookie(&vma->vm_file->f_path); - break; - } -out: + if (mm && mm->exe_file) + cookie = fast_get_dcookie(&mm->exe_file->f_path); + return cookie; } @@ -235,13 +237,14 @@ out: * sure to do this lookup before a mm->mmap modification happens so * we don't lose track. */ -static unsigned long lookup_dcookie(struct mm_struct * mm, unsigned long addr, off_t * offset) +static unsigned long +lookup_dcookie(struct mm_struct *mm, unsigned long addr, off_t *offset) { unsigned long cookie = NO_COOKIE; - struct vm_area_struct * vma; + struct vm_area_struct *vma; for (vma = find_vma(mm, addr); vma; vma = vma->vm_next) { - + if (addr < vma->vm_start || addr >= vma->vm_end) continue; @@ -263,9 +266,8 @@ static unsigned long lookup_dcookie(struct mm_struct * mm, unsigned long addr, o return cookie; } - static unsigned long last_cookie = INVALID_COOKIE; - + static void add_cpu_switch(int i) { add_event_entry(ESCAPE_CODE); @@ -278,16 +280,16 @@ static void add_kernel_ctx_switch(unsigned int in_kernel) { add_event_entry(ESCAPE_CODE); if (in_kernel) - add_event_entry(KERNEL_ENTER_SWITCH_CODE); + add_event_entry(KERNEL_ENTER_SWITCH_CODE); else - add_event_entry(KERNEL_EXIT_SWITCH_CODE); + add_event_entry(KERNEL_EXIT_SWITCH_CODE); } - + static void -add_user_ctx_switch(struct task_struct const * task, unsigned long cookie) +add_user_ctx_switch(struct task_struct const *task, unsigned long cookie) { add_event_entry(ESCAPE_CODE); - add_event_entry(CTX_SWITCH_CODE); + add_event_entry(CTX_SWITCH_CODE); add_event_entry(task->pid); add_event_entry(cookie); /* Another code for daemon back-compat */ @@ -296,7 +298,7 @@ add_user_ctx_switch(struct task_struct const * task, unsigned long cookie) add_event_entry(task->tgid); } - + static void add_cookie_switch(unsigned long cookie) { add_event_entry(ESCAPE_CODE); @@ -304,28 +306,82 @@ static void add_cookie_switch(unsigned long cookie) add_event_entry(cookie); } - + static void add_trace_begin(void) { add_event_entry(ESCAPE_CODE); add_event_entry(TRACE_BEGIN_CODE); } +static void add_data(struct op_entry *entry, struct mm_struct *mm) +{ + unsigned long code, pc, val; + unsigned long cookie; + off_t offset; + + if (!op_cpu_buffer_get_data(entry, &code)) + return; + if (!op_cpu_buffer_get_data(entry, &pc)) + return; + if (!op_cpu_buffer_get_size(entry)) + return; + + if (mm) { + cookie = lookup_dcookie(mm, pc, &offset); + + if (cookie == NO_COOKIE) + offset = pc; + if (cookie == INVALID_COOKIE) { + atomic_inc(&oprofile_stats.sample_lost_no_mapping); + offset = pc; + } + if (cookie != last_cookie) { + add_cookie_switch(cookie); + last_cookie = cookie; + } + } else + offset = pc; + + add_event_entry(ESCAPE_CODE); + add_event_entry(code); + add_event_entry(offset); /* Offset from Dcookie */ + + while (op_cpu_buffer_get_data(entry, &val)) + add_event_entry(val); +} -static void add_sample_entry(unsigned long offset, unsigned long event) +static inline void add_sample_entry(unsigned long offset, unsigned long event) { add_event_entry(offset); add_event_entry(event); } -static int add_us_sample(struct mm_struct * mm, struct op_sample * s) +/* + * Add a sample to the global event buffer. If possible the + * sample is converted into a persistent dentry/offset pair + * for later lookup from userspace. Return 0 on failure. + */ +static int +add_sample(struct mm_struct *mm, struct op_sample *s, int in_kernel) { unsigned long cookie; off_t offset; - - cookie = lookup_dcookie(mm, s->eip, &offset); - + + if (in_kernel) { + add_sample_entry(s->eip, s->event); + return 1; + } + + /* add userspace sample */ + + if (!mm) { + atomic_inc(&oprofile_stats.sample_lost_no_mm); + return 0; + } + + cookie = lookup_dcookie(mm, s->eip, &offset); + if (cookie == INVALID_COOKIE) { atomic_inc(&oprofile_stats.sample_lost_no_mapping); return 0; @@ -341,27 +397,8 @@ static int add_us_sample(struct mm_struct * mm, struct op_sample * s) return 1; } - -/* Add a sample to the global event buffer. If possible the - * sample is converted into a persistent dentry/offset pair - * for later lookup from userspace. - */ -static int -add_sample(struct mm_struct * mm, struct op_sample * s, int in_kernel) -{ - if (in_kernel) { - add_sample_entry(s->eip, s->event); - return 1; - } else if (mm) { - return add_us_sample(mm, s); - } else { - atomic_inc(&oprofile_stats.sample_lost_no_mm); - } - return 0; -} - -static void release_mm(struct mm_struct * mm) +static void release_mm(struct mm_struct *mm) { if (!mm) return; @@ -370,9 +407,9 @@ static void release_mm(struct mm_struct * mm) } -static struct mm_struct * take_tasks_mm(struct task_struct * task) +static struct mm_struct *take_tasks_mm(struct task_struct *task) { - struct mm_struct * mm = get_task_mm(task); + struct mm_struct *mm = get_task_mm(task); if (mm) down_read(&mm->mmap_sem); return mm; @@ -383,46 +420,6 @@ static inline int is_code(unsigned long val) { return val == ESCAPE_CODE; } - - -/* "acquire" as many cpu buffer slots as we can */ -static unsigned long get_slots(struct oprofile_cpu_buffer * b) -{ - unsigned long head = b->head_pos; - unsigned long tail = b->tail_pos; - - /* - * Subtle. This resets the persistent last_task - * and in_kernel values used for switching notes. - * BUT, there is a small window between reading - * head_pos, and this call, that means samples - * can appear at the new head position, but not - * be prefixed with the notes for switching - * kernel mode or a task switch. This small hole - * can lead to mis-attribution or samples where - * we don't know if it's in the kernel or not, - * at the start of an event buffer. - */ - cpu_buffer_reset(b); - - if (head >= tail) - return head - tail; - - return head + (b->buffer_size - tail); -} - - -static void increment_tail(struct oprofile_cpu_buffer * b) -{ - unsigned long new_tail = b->tail_pos + 1; - - rmb(); - - if (new_tail < b->buffer_size) - b->tail_pos = new_tail; - else - b->tail_pos = 0; -} /* Move tasks along towards death. Any tasks on dead_tasks @@ -435,8 +432,8 @@ static void process_task_mortuary(void) { unsigned long flags; LIST_HEAD(local_dead_tasks); - struct task_struct * task; - struct task_struct * ttask; + struct task_struct *task; + struct task_struct *ttask; spin_lock_irqsave(&task_mortuary, flags); @@ -456,10 +453,10 @@ static void mark_done(int cpu) { int i; - cpu_set(cpu, marked_cpus); + cpumask_set_cpu(cpu, marked_cpus); for_each_online_cpu(i) { - if (!cpu_isset(i, marked_cpus)) + if (!cpumask_test_cpu(i, marked_cpus)) return; } @@ -468,7 +465,7 @@ static void mark_done(int cpu) */ process_task_mortuary(); - cpus_clear(marked_cpus); + cpumask_clear(marked_cpus); } @@ -491,59 +488,72 @@ typedef enum { */ void sync_buffer(int cpu) { - struct oprofile_cpu_buffer *cpu_buf = &per_cpu(cpu_buffer, cpu); struct mm_struct *mm = NULL; - struct task_struct * new; + struct mm_struct *oldmm; + unsigned long val; + struct task_struct *new; unsigned long cookie = 0; int in_kernel = 1; - unsigned int i; sync_buffer_state state = sb_buffer_start; + unsigned int i; unsigned long available; + unsigned long flags; + struct op_entry entry; + struct op_sample *sample; mutex_lock(&buffer_mutex); - - add_cpu_switch(cpu); - /* Remember, only we can modify tail_pos */ + add_cpu_switch(cpu); - available = get_slots(cpu_buf); + op_cpu_buffer_reset(cpu); + available = op_cpu_buffer_entries(cpu); for (i = 0; i < available; ++i) { - struct op_sample * s = &cpu_buf->buffer[cpu_buf->tail_pos]; - - if (is_code(s->eip)) { - if (s->event <= CPU_IS_KERNEL) { + sample = op_cpu_buffer_read_entry(&entry, cpu); + if (!sample) + break; + + if (is_code(sample->eip)) { + flags = sample->event; + if (flags & TRACE_BEGIN) { + state = sb_bt_start; + add_trace_begin(); + } + if (flags & KERNEL_CTX_SWITCH) { /* kernel/userspace switch */ - in_kernel = s->event; + in_kernel = flags & IS_KERNEL; if (state == sb_buffer_start) state = sb_sample_start; - add_kernel_ctx_switch(s->event); - } else if (s->event == CPU_TRACE_BEGIN) { - state = sb_bt_start; - add_trace_begin(); - } else { - struct mm_struct * oldmm = mm; - + add_kernel_ctx_switch(flags & IS_KERNEL); + } + if (flags & USER_CTX_SWITCH + && op_cpu_buffer_get_data(&entry, &val)) { /* userspace context switch */ - new = (struct task_struct *)s->event; - + new = (struct task_struct *)val; + oldmm = mm; release_mm(oldmm); mm = take_tasks_mm(new); if (mm != oldmm) cookie = get_exec_dcookie(mm); add_user_ctx_switch(new, cookie); } - } else { - if (state >= sb_bt_start && - !add_sample(mm, s, in_kernel)) { - if (state == sb_bt_start) { - state = sb_bt_ignore; - atomic_inc(&oprofile_stats.bt_lost_no_mapping); - } - } + if (op_cpu_buffer_get_size(&entry)) + add_data(&entry, mm); + continue; } - increment_tail(cpu_buf); + if (state < sb_bt_start) + /* ignore sample */ + continue; + + if (add_sample(mm, sample, in_kernel)) + continue; + + /* ignore backtraces if failed to add a sample */ + if (state == sb_bt_start) { + state = sb_bt_ignore; + atomic_inc(&oprofile_stats.bt_lost_no_mapping); + } } release_mm(mm); @@ -551,3 +561,27 @@ void sync_buffer(int cpu) mutex_unlock(&buffer_mutex); } + +/* The function can be used to add a buffer worth of data directly to + * the kernel buffer. The buffer is assumed to be a circular buffer. + * Take the entries from index start and end at index end, wrapping + * at max_entries. + */ +void oprofile_put_buff(unsigned long *buf, unsigned int start, + unsigned int stop, unsigned int max) +{ + int i; + + i = start; + + mutex_lock(&buffer_mutex); + while (i != stop) { + add_event_entry(buf[i++]); + + if (i >= max) + i = 0; + } + + mutex_unlock(&buffer_mutex); +} + diff --git a/drivers/oprofile/buffer_sync.h b/drivers/oprofile/buffer_sync.h index 08866f6a96a..3110732c183 100644 --- a/drivers/oprofile/buffer_sync.h +++ b/drivers/oprofile/buffer_sync.h @@ -9,13 +9,13 @@ #ifndef OPROFILE_BUFFER_SYNC_H #define OPROFILE_BUFFER_SYNC_H - + /* add the necessary profiling hooks */ int sync_start(void); /* remove the hooks */ void sync_stop(void); - + /* sync the given CPU's buffer */ void sync_buffer(int cpu); diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c index 2450b3a393f..8aa73fac6ad 100644 --- a/drivers/oprofile/cpu_buffer.c +++ b/drivers/oprofile/cpu_buffer.c @@ -1,10 +1,12 @@ /** * @file cpu_buffer.c * - * @remark Copyright 2002 OProfile authors + * @remark Copyright 2002-2009 OProfile authors * @remark Read the file COPYING * * @author John Levon <levon@movementarian.org> + * @author Barry Kasindorf <barry.kasindorf@amd.com> + * @author Robert Richter <robert.richter@amd.com> * * Each CPU has a local buffer that stores PC value/event * pairs. We also log context switches when we notice them. @@ -19,49 +21,63 @@ #include <linux/sched.h> #include <linux/oprofile.h> -#include <linux/vmalloc.h> #include <linux/errno.h> - + #include "event_buffer.h" #include "cpu_buffer.h" #include "buffer_sync.h" #include "oprof.h" -DEFINE_PER_CPU(struct oprofile_cpu_buffer, cpu_buffer); +#define OP_BUFFER_FLAGS 0 + +static struct ring_buffer *op_ring_buffer; +DEFINE_PER_CPU(struct oprofile_cpu_buffer, op_cpu_buffer); static void wq_sync_buffer(struct work_struct *work); #define DEFAULT_TIMER_EXPIRE (HZ / 10) static int work_enabled; +unsigned long oprofile_get_cpu_buffer_size(void) +{ + return oprofile_cpu_buffer_size; +} + +void oprofile_cpu_buffer_inc_smpl_lost(void) +{ + struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(op_cpu_buffer); + + cpu_buf->sample_lost_overflow++; +} + void free_cpu_buffers(void) { - int i; - - for_each_online_cpu(i) - vfree(per_cpu(cpu_buffer, i).buffer); + if (op_ring_buffer) + ring_buffer_free(op_ring_buffer); + op_ring_buffer = NULL; } +#define RB_EVENT_HDR_SIZE 4 + int alloc_cpu_buffers(void) { int i; - - unsigned long buffer_size = fs_cpu_buffer_size; - - for_each_online_cpu(i) { - struct oprofile_cpu_buffer *b = &per_cpu(cpu_buffer, i); - - b->buffer = vmalloc_node(sizeof(struct op_sample) * buffer_size, - cpu_to_node(i)); - if (!b->buffer) - goto fail; - + + unsigned long buffer_size = oprofile_cpu_buffer_size; + unsigned long byte_size = buffer_size * (sizeof(struct op_sample) + + RB_EVENT_HDR_SIZE); + + op_ring_buffer = ring_buffer_alloc(byte_size, OP_BUFFER_FLAGS); + if (!op_ring_buffer) + goto fail; + + for_each_possible_cpu(i) { + struct oprofile_cpu_buffer *b = &per_cpu(op_cpu_buffer, i); + b->last_task = NULL; b->last_is_kernel = -1; b->tracing = 0; b->buffer_size = buffer_size; - b->tail_pos = 0; - b->head_pos = 0; b->sample_received = 0; b->sample_lost_overflow = 0; b->backtrace_aborted = 0; @@ -83,7 +99,7 @@ void start_cpu_work(void) work_enabled = 1; for_each_online_cpu(i) { - struct oprofile_cpu_buffer *b = &per_cpu(cpu_buffer, i); + struct oprofile_cpu_buffer *b = &per_cpu(op_cpu_buffer, i); /* * Spread the work by 1 jiffy per cpu so they dont all @@ -95,86 +111,157 @@ void start_cpu_work(void) void end_cpu_work(void) { - int i; - work_enabled = 0; +} + +void flush_cpu_work(void) +{ + int i; for_each_online_cpu(i) { - struct oprofile_cpu_buffer *b = &per_cpu(cpu_buffer, i); + struct oprofile_cpu_buffer *b = &per_cpu(op_cpu_buffer, i); - cancel_delayed_work(&b->work); + /* these works are per-cpu, no need for flush_sync */ + flush_delayed_work(&b->work); } - - flush_scheduled_work(); } -/* Resets the cpu buffer to a sane state. */ -void cpu_buffer_reset(struct oprofile_cpu_buffer * cpu_buf) +/* + * This function prepares the cpu buffer to write a sample. + * + * Struct op_entry is used during operations on the ring buffer while + * struct op_sample contains the data that is stored in the ring + * buffer. Struct entry can be uninitialized. The function reserves a + * data array that is specified by size. Use + * op_cpu_buffer_write_commit() after preparing the sample. In case of + * errors a null pointer is returned, otherwise the pointer to the + * sample. + * + */ +struct op_sample +*op_cpu_buffer_write_reserve(struct op_entry *entry, unsigned long size) { - /* reset these to invalid values; the next sample - * collected will populate the buffer with proper - * values to initialize the buffer - */ - cpu_buf->last_is_kernel = -1; - cpu_buf->last_task = NULL; + entry->event = ring_buffer_lock_reserve + (op_ring_buffer, sizeof(struct op_sample) + + size * sizeof(entry->sample->data[0])); + if (!entry->event) + return NULL; + entry->sample = ring_buffer_event_data(entry->event); + entry->size = size; + entry->data = entry->sample->data; + + return entry->sample; } -/* compute number of available slots in cpu_buffer queue */ -static unsigned long nr_available_slots(struct oprofile_cpu_buffer const * b) +int op_cpu_buffer_write_commit(struct op_entry *entry) { - unsigned long head = b->head_pos; - unsigned long tail = b->tail_pos; + return ring_buffer_unlock_commit(op_ring_buffer, entry->event); +} - if (tail > head) - return (tail - head) - 1; +struct op_sample *op_cpu_buffer_read_entry(struct op_entry *entry, int cpu) +{ + struct ring_buffer_event *e; + e = ring_buffer_consume(op_ring_buffer, cpu, NULL, NULL); + if (!e) + return NULL; + + entry->event = e; + entry->sample = ring_buffer_event_data(e); + entry->size = (ring_buffer_event_length(e) - sizeof(struct op_sample)) + / sizeof(entry->sample->data[0]); + entry->data = entry->sample->data; + return entry->sample; +} - return tail + (b->buffer_size - head) - 1; +unsigned long op_cpu_buffer_entries(int cpu) +{ + return ring_buffer_entries_cpu(op_ring_buffer, cpu); } -static void increment_head(struct oprofile_cpu_buffer * b) +static int +op_add_code(struct oprofile_cpu_buffer *cpu_buf, unsigned long backtrace, + int is_kernel, struct task_struct *task) { - unsigned long new_head = b->head_pos + 1; + struct op_entry entry; + struct op_sample *sample; + unsigned long flags; + int size; - /* Ensure anything written to the slot before we - * increment is visible */ - wmb(); + flags = 0; - if (new_head < b->buffer_size) - b->head_pos = new_head; + if (backtrace) + flags |= TRACE_BEGIN; + + /* notice a switch from user->kernel or vice versa */ + is_kernel = !!is_kernel; + if (cpu_buf->last_is_kernel != is_kernel) { + cpu_buf->last_is_kernel = is_kernel; + flags |= KERNEL_CTX_SWITCH; + if (is_kernel) + flags |= IS_KERNEL; + } + + /* notice a task switch */ + if (cpu_buf->last_task != task) { + cpu_buf->last_task = task; + flags |= USER_CTX_SWITCH; + } + + if (!flags) + /* nothing to do */ + return 0; + + if (flags & USER_CTX_SWITCH) + size = 1; else - b->head_pos = 0; -} + size = 0; -static inline void -add_sample(struct oprofile_cpu_buffer * cpu_buf, - unsigned long pc, unsigned long event) -{ - struct op_sample * entry = &cpu_buf->buffer[cpu_buf->head_pos]; - entry->eip = pc; - entry->event = event; - increment_head(cpu_buf); + sample = op_cpu_buffer_write_reserve(&entry, size); + if (!sample) + return -ENOMEM; + + sample->eip = ESCAPE_CODE; + sample->event = flags; + + if (size) + op_cpu_buffer_add_data(&entry, (unsigned long)task); + + op_cpu_buffer_write_commit(&entry); + + return 0; } -static inline void -add_code(struct oprofile_cpu_buffer * buffer, unsigned long value) +static inline int +op_add_sample(struct oprofile_cpu_buffer *cpu_buf, + unsigned long pc, unsigned long event) { - add_sample(buffer, ESCAPE_CODE, value); + struct op_entry entry; + struct op_sample *sample; + + sample = op_cpu_buffer_write_reserve(&entry, 0); + if (!sample) + return -ENOMEM; + + sample->eip = pc; + sample->event = event; + + return op_cpu_buffer_write_commit(&entry); } -/* This must be safe from any context. It's safe writing here - * because of the head/tail separation of the writer and reader - * of the CPU buffer. +/* + * This must be safe from any context. * * is_kernel is needed because on some architectures you cannot * tell if you are in kernel or user space simply by looking at * pc. We tag this in the buffer by generating kernel enter/exit * events whenever is_kernel changes */ -static int log_sample(struct oprofile_cpu_buffer * cpu_buf, unsigned long pc, - int is_kernel, unsigned long event) +static int +log_sample(struct oprofile_cpu_buffer *cpu_buf, unsigned long pc, + unsigned long backtrace, int is_kernel, unsigned long event, + struct task_struct *task) { - struct task_struct * task; - + struct task_struct *tsk = task ? task : current; cpu_buf->sample_received++; if (pc == ESCAPE_CODE) { @@ -182,104 +269,175 @@ static int log_sample(struct oprofile_cpu_buffer * cpu_buf, unsigned long pc, return 0; } - if (nr_available_slots(cpu_buf) < 3) { - cpu_buf->sample_lost_overflow++; - return 0; - } - - is_kernel = !!is_kernel; - - task = current; + if (op_add_code(cpu_buf, backtrace, is_kernel, tsk)) + goto fail; - /* notice a switch from user->kernel or vice versa */ - if (cpu_buf->last_is_kernel != is_kernel) { - cpu_buf->last_is_kernel = is_kernel; - add_code(cpu_buf, is_kernel); - } + if (op_add_sample(cpu_buf, pc, event)) + goto fail; - /* notice a task switch */ - if (cpu_buf->last_task != task) { - cpu_buf->last_task = task; - add_code(cpu_buf, (unsigned long)task); - } - - add_sample(cpu_buf, pc, event); return 1; + +fail: + cpu_buf->sample_lost_overflow++; + return 0; } -static int oprofile_begin_trace(struct oprofile_cpu_buffer * cpu_buf) +static inline void oprofile_begin_trace(struct oprofile_cpu_buffer *cpu_buf) { - if (nr_available_slots(cpu_buf) < 4) { - cpu_buf->sample_lost_overflow++; - return 0; - } - - add_code(cpu_buf, CPU_TRACE_BEGIN); cpu_buf->tracing = 1; - return 1; } -static void oprofile_end_trace(struct oprofile_cpu_buffer * cpu_buf) +static inline void oprofile_end_trace(struct oprofile_cpu_buffer *cpu_buf) { cpu_buf->tracing = 0; } -void oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs, - unsigned long event, int is_kernel) +static inline void +__oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs, + unsigned long event, int is_kernel, + struct task_struct *task) { - struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer); + struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(op_cpu_buffer); + unsigned long backtrace = oprofile_backtrace_depth; - if (!backtrace_depth) { - log_sample(cpu_buf, pc, is_kernel, event); + /* + * if log_sample() fail we can't backtrace since we lost the + * source of this event + */ + if (!log_sample(cpu_buf, pc, backtrace, is_kernel, event, task)) + /* failed */ return; - } - if (!oprofile_begin_trace(cpu_buf)) + if (!backtrace) return; - /* if log_sample() fail we can't backtrace since we lost the source - * of this event */ - if (log_sample(cpu_buf, pc, is_kernel, event)) - oprofile_ops.backtrace(regs, backtrace_depth); + oprofile_begin_trace(cpu_buf); + oprofile_ops.backtrace(regs, backtrace); oprofile_end_trace(cpu_buf); } +void oprofile_add_ext_hw_sample(unsigned long pc, struct pt_regs * const regs, + unsigned long event, int is_kernel, + struct task_struct *task) +{ + __oprofile_add_ext_sample(pc, regs, event, is_kernel, task); +} + +void oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs, + unsigned long event, int is_kernel) +{ + __oprofile_add_ext_sample(pc, regs, event, is_kernel, NULL); +} + void oprofile_add_sample(struct pt_regs * const regs, unsigned long event) { + int is_kernel; + unsigned long pc; + + if (likely(regs)) { + is_kernel = !user_mode(regs); + pc = profile_pc(regs); + } else { + is_kernel = 0; /* This value will not be used */ + pc = ESCAPE_CODE; /* as this causes an early return. */ + } + + __oprofile_add_ext_sample(pc, regs, event, is_kernel, NULL); +} + +/* + * Add samples with data to the ring buffer. + * + * Use oprofile_add_data(&entry, val) to add data and + * oprofile_write_commit(&entry) to commit the sample. + */ +void +oprofile_write_reserve(struct op_entry *entry, struct pt_regs * const regs, + unsigned long pc, int code, int size) +{ + struct op_sample *sample; int is_kernel = !user_mode(regs); - unsigned long pc = profile_pc(regs); + struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(op_cpu_buffer); + + cpu_buf->sample_received++; + + /* no backtraces for samples with data */ + if (op_add_code(cpu_buf, 0, is_kernel, current)) + goto fail; + + sample = op_cpu_buffer_write_reserve(entry, size + 2); + if (!sample) + goto fail; + sample->eip = ESCAPE_CODE; + sample->event = 0; /* no flags */ - oprofile_add_ext_sample(pc, regs, event, is_kernel); + op_cpu_buffer_add_data(entry, code); + op_cpu_buffer_add_data(entry, pc); + + return; + +fail: + entry->event = NULL; + cpu_buf->sample_lost_overflow++; +} + +int oprofile_add_data(struct op_entry *entry, unsigned long val) +{ + if (!entry->event) + return 0; + return op_cpu_buffer_add_data(entry, val); +} + +int oprofile_add_data64(struct op_entry *entry, u64 val) +{ + if (!entry->event) + return 0; + if (op_cpu_buffer_get_size(entry) < 2) + /* + * the function returns 0 to indicate a too small + * buffer, even if there is some space left + */ + return 0; + if (!op_cpu_buffer_add_data(entry, (u32)val)) + return 0; + return op_cpu_buffer_add_data(entry, (u32)(val >> 32)); +} + +int oprofile_write_commit(struct op_entry *entry) +{ + if (!entry->event) + return -EINVAL; + return op_cpu_buffer_write_commit(entry); } void oprofile_add_pc(unsigned long pc, int is_kernel, unsigned long event) { - struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer); - log_sample(cpu_buf, pc, is_kernel, event); + struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(op_cpu_buffer); + log_sample(cpu_buf, pc, 0, is_kernel, event, NULL); } void oprofile_add_trace(unsigned long pc) { - struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer); + struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(op_cpu_buffer); if (!cpu_buf->tracing) return; - if (nr_available_slots(cpu_buf) < 1) { - cpu_buf->tracing = 0; - cpu_buf->sample_lost_overflow++; - return; - } + /* + * broken frame can give an eip with the same value as an + * escape code, abort the trace if we get it + */ + if (pc == ESCAPE_CODE) + goto fail; - /* broken frame can give an eip with the same value as an escape code, - * abort the trace if we get it */ - if (pc == ESCAPE_CODE) { - cpu_buf->tracing = 0; - cpu_buf->backtrace_aborted++; - return; - } + if (op_add_sample(cpu_buf, pc, 0)) + goto fail; - add_sample(cpu_buf, pc, 0); + return; +fail: + cpu_buf->tracing = 0; + cpu_buf->backtrace_aborted++; + return; } /* @@ -291,11 +449,11 @@ void oprofile_add_trace(unsigned long pc) */ static void wq_sync_buffer(struct work_struct *work) { - struct oprofile_cpu_buffer * b = + struct oprofile_cpu_buffer *b = container_of(work, struct oprofile_cpu_buffer, work.work); - if (b->cpu != smp_processor_id()) { - printk("WQ on CPU%d, prefer CPU%d\n", - smp_processor_id(), b->cpu); + if (b->cpu != smp_processor_id() && !cpu_online(b->cpu)) { + cancel_delayed_work(&b->work); + return; } sync_buffer(b->cpu); diff --git a/drivers/oprofile/cpu_buffer.h b/drivers/oprofile/cpu_buffer.h index c3e366b5226..e1d097e250a 100644 --- a/drivers/oprofile/cpu_buffer.h +++ b/drivers/oprofile/cpu_buffer.h @@ -1,10 +1,11 @@ /** * @file cpu_buffer.h * - * @remark Copyright 2002 OProfile authors + * @remark Copyright 2002-2009 OProfile authors * @remark Read the file COPYING * * @author John Levon <levon@movementarian.org> + * @author Robert Richter <robert.richter@amd.com> */ #ifndef OPROFILE_CPU_BUFFER_H @@ -15,14 +16,16 @@ #include <linux/workqueue.h> #include <linux/cache.h> #include <linux/sched.h> - +#include <linux/ring_buffer.h> + struct task_struct; - + int alloc_cpu_buffers(void); void free_cpu_buffers(void); void start_cpu_work(void); void end_cpu_work(void); +void flush_cpu_work(void); /* CPU buffer is composed of such entries (which are * also used for context switch notes) @@ -30,16 +33,16 @@ void end_cpu_work(void); struct op_sample { unsigned long eip; unsigned long event; + unsigned long data[0]; }; - + +struct op_entry; + struct oprofile_cpu_buffer { - volatile unsigned long head_pos; - volatile unsigned long tail_pos; unsigned long buffer_size; - struct task_struct * last_task; + struct task_struct *last_task; int last_is_kernel; int tracing; - struct op_sample * buffer; unsigned long sample_received; unsigned long sample_lost_overflow; unsigned long backtrace_aborted; @@ -48,12 +51,71 @@ struct oprofile_cpu_buffer { struct delayed_work work; }; -DECLARE_PER_CPU(struct oprofile_cpu_buffer, cpu_buffer); +DECLARE_PER_CPU(struct oprofile_cpu_buffer, op_cpu_buffer); + +/* + * Resets the cpu buffer to a sane state. + * + * reset these to invalid values; the next sample collected will + * populate the buffer with proper values to initialize the buffer + */ +static inline void op_cpu_buffer_reset(int cpu) +{ + struct oprofile_cpu_buffer *cpu_buf = &per_cpu(op_cpu_buffer, cpu); + + cpu_buf->last_is_kernel = -1; + cpu_buf->last_task = NULL; +} + +/* + * op_cpu_buffer_add_data() and op_cpu_buffer_write_commit() may be + * called only if op_cpu_buffer_write_reserve() did not return NULL or + * entry->event != NULL, otherwise entry->size or entry->event will be + * used uninitialized. + */ + +struct op_sample +*op_cpu_buffer_write_reserve(struct op_entry *entry, unsigned long size); +int op_cpu_buffer_write_commit(struct op_entry *entry); +struct op_sample *op_cpu_buffer_read_entry(struct op_entry *entry, int cpu); +unsigned long op_cpu_buffer_entries(int cpu); + +/* returns the remaining free size of data in the entry */ +static inline +int op_cpu_buffer_add_data(struct op_entry *entry, unsigned long val) +{ + if (!entry->size) + return 0; + *entry->data = val; + entry->size--; + entry->data++; + return entry->size; +} + +/* returns the size of data in the entry */ +static inline +int op_cpu_buffer_get_size(struct op_entry *entry) +{ + return entry->size; +} -void cpu_buffer_reset(struct oprofile_cpu_buffer * cpu_buf); +/* returns 0 if empty or the size of data including the current value */ +static inline +int op_cpu_buffer_get_data(struct op_entry *entry, unsigned long *val) +{ + int size = entry->size; + if (!size) + return 0; + *val = *entry->data; + entry->size--; + entry->data++; + return size; +} -/* transient events for the CPU buffer -> event buffer */ -#define CPU_IS_KERNEL 1 -#define CPU_TRACE_BEGIN 2 +/* extra data flags */ +#define KERNEL_CTX_SWITCH (1UL << 0) +#define IS_KERNEL (1UL << 1) +#define TRACE_BEGIN (1UL << 2) +#define USER_CTX_SWITCH (1UL << 3) #endif /* OPROFILE_CPU_BUFFER_H */ diff --git a/drivers/oprofile/event_buffer.c b/drivers/oprofile/event_buffer.c index e7fbac52993..c0cc4e7ff02 100644 --- a/drivers/oprofile/event_buffer.c +++ b/drivers/oprofile/event_buffer.c @@ -19,28 +19,39 @@ #include <linux/dcookies.h> #include <linux/fs.h> #include <asm/uaccess.h> - + #include "oprof.h" #include "event_buffer.h" #include "oprofile_stats.h" DEFINE_MUTEX(buffer_mutex); - + static unsigned long buffer_opened; static DECLARE_WAIT_QUEUE_HEAD(buffer_wait); -static unsigned long * event_buffer; +static unsigned long *event_buffer; static unsigned long buffer_size; static unsigned long buffer_watershed; static size_t buffer_pos; /* atomic_t because wait_event checks it outside of buffer_mutex */ static atomic_t buffer_ready = ATOMIC_INIT(0); -/* Add an entry to the event buffer. When we - * get near to the end we wake up the process - * sleeping on the read() of the file. +/* + * Add an entry to the event buffer. When we get near to the end we + * wake up the process sleeping on the read() of the file. To protect + * the event_buffer this function may only be called when buffer_mutex + * is set. */ void add_event_entry(unsigned long value) { + /* + * This shouldn't happen since all workqueues or handlers are + * canceled or flushed before the event buffer is freed. + */ + if (!event_buffer) { + WARN_ON_ONCE(1); + return; + } + if (buffer_pos == buffer_size) { atomic_inc(&oprofile_stats.event_lost_overflow); return; @@ -66,44 +77,46 @@ void wake_up_buffer_waiter(void) mutex_unlock(&buffer_mutex); } - + int alloc_event_buffer(void) { - int err = -ENOMEM; unsigned long flags; - spin_lock_irqsave(&oprofilefs_lock, flags); - buffer_size = fs_buffer_size; - buffer_watershed = fs_buffer_watershed; - spin_unlock_irqrestore(&oprofilefs_lock, flags); - + raw_spin_lock_irqsave(&oprofilefs_lock, flags); + buffer_size = oprofile_buffer_size; + buffer_watershed = oprofile_buffer_watershed; + raw_spin_unlock_irqrestore(&oprofilefs_lock, flags); + if (buffer_watershed >= buffer_size) return -EINVAL; - + + buffer_pos = 0; event_buffer = vmalloc(sizeof(unsigned long) * buffer_size); if (!event_buffer) - goto out; + return -ENOMEM; - err = 0; -out: - return err; + return 0; } void free_event_buffer(void) { + mutex_lock(&buffer_mutex); vfree(event_buffer); + buffer_pos = 0; + event_buffer = NULL; + mutex_unlock(&buffer_mutex); } - -static int event_buffer_open(struct inode * inode, struct file * file) + +static int event_buffer_open(struct inode *inode, struct file *file) { int err = -EPERM; if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (test_and_set_bit(0, &buffer_opened)) + if (test_and_set_bit_lock(0, &buffer_opened)) return -EBUSY; /* Register as a user of dcookies @@ -114,38 +127,38 @@ static int event_buffer_open(struct inode * inode, struct file * file) file->private_data = dcookie_register(); if (!file->private_data) goto out; - + if ((err = oprofile_setup())) goto fail; /* NB: the actual start happens from userspace * echo 1 >/dev/oprofile/enable */ - - return 0; + + return nonseekable_open(inode, file); fail: dcookie_unregister(file->private_data); out: - clear_bit(0, &buffer_opened); + __clear_bit_unlock(0, &buffer_opened); return err; } -static int event_buffer_release(struct inode * inode, struct file * file) +static int event_buffer_release(struct inode *inode, struct file *file) { oprofile_stop(); oprofile_shutdown(); dcookie_unregister(file->private_data); buffer_pos = 0; atomic_set(&buffer_ready, 0); - clear_bit(0, &buffer_opened); + __clear_bit_unlock(0, &buffer_opened); return 0; } -static ssize_t event_buffer_read(struct file * file, char __user * buf, - size_t count, loff_t * offset) +static ssize_t event_buffer_read(struct file *file, char __user *buf, + size_t count, loff_t *offset) { int retval = -EINVAL; size_t const max = buffer_size * sizeof(unsigned long); @@ -165,25 +178,32 @@ static ssize_t event_buffer_read(struct file * file, char __user * buf, mutex_lock(&buffer_mutex); + /* May happen if the buffer is freed during pending reads. */ + if (!event_buffer) { + retval = -EINTR; + goto out; + } + atomic_set(&buffer_ready, 0); retval = -EFAULT; count = buffer_pos * sizeof(unsigned long); - + if (copy_to_user(buf, event_buffer, count)) goto out; retval = count; buffer_pos = 0; - + out: mutex_unlock(&buffer_mutex); return retval; } - + const struct file_operations event_buffer_fops = { .open = event_buffer_open, .release = event_buffer_release, .read = event_buffer_read, + .llseek = no_llseek, }; diff --git a/drivers/oprofile/event_buffer.h b/drivers/oprofile/event_buffer.h index 5076ed1ebd8..a8d5bb3cba8 100644 --- a/drivers/oprofile/event_buffer.h +++ b/drivers/oprofile/event_buffer.h @@ -10,13 +10,20 @@ #ifndef EVENT_BUFFER_H #define EVENT_BUFFER_H -#include <linux/types.h> -#include <asm/mutex.h> - +#include <linux/types.h> +#include <linux/mutex.h> + int alloc_event_buffer(void); void free_event_buffer(void); - + +/** + * Add data to the event buffer. + * The data passed is free-form, but typically consists of + * file offsets, dcookies, context information, and ESCAPE codes. + */ +void add_event_entry(unsigned long data); + /* wake up the process sleeping on the event file */ void wake_up_buffer_waiter(void); @@ -24,10 +31,10 @@ void wake_up_buffer_waiter(void); #define NO_COOKIE 0UL extern const struct file_operations event_buffer_fops; - + /* mutex between sync_cpu_buffers() and the * file reading code. */ extern struct mutex buffer_mutex; - + #endif /* EVENT_BUFFER_H */ diff --git a/drivers/oprofile/nmi_timer_int.c b/drivers/oprofile/nmi_timer_int.c new file mode 100644 index 00000000000..9559829fb23 --- /dev/null +++ b/drivers/oprofile/nmi_timer_int.c @@ -0,0 +1,176 @@ +/** + * @file nmi_timer_int.c + * + * @remark Copyright 2011 Advanced Micro Devices, Inc. + * + * @author Robert Richter <robert.richter@amd.com> + */ + +#include <linux/init.h> +#include <linux/smp.h> +#include <linux/errno.h> +#include <linux/oprofile.h> +#include <linux/perf_event.h> + +#ifdef CONFIG_OPROFILE_NMI_TIMER + +static DEFINE_PER_CPU(struct perf_event *, nmi_timer_events); +static int ctr_running; + +static struct perf_event_attr nmi_timer_attr = { + .type = PERF_TYPE_HARDWARE, + .config = PERF_COUNT_HW_CPU_CYCLES, + .size = sizeof(struct perf_event_attr), + .pinned = 1, + .disabled = 1, +}; + +static void nmi_timer_callback(struct perf_event *event, + struct perf_sample_data *data, + struct pt_regs *regs) +{ + event->hw.interrupts = 0; /* don't throttle interrupts */ + oprofile_add_sample(regs, 0); +} + +static int nmi_timer_start_cpu(int cpu) +{ + struct perf_event *event = per_cpu(nmi_timer_events, cpu); + + if (!event) { + event = perf_event_create_kernel_counter(&nmi_timer_attr, cpu, NULL, + nmi_timer_callback, NULL); + if (IS_ERR(event)) + return PTR_ERR(event); + per_cpu(nmi_timer_events, cpu) = event; + } + + if (event && ctr_running) + perf_event_enable(event); + + return 0; +} + +static void nmi_timer_stop_cpu(int cpu) +{ + struct perf_event *event = per_cpu(nmi_timer_events, cpu); + + if (event && ctr_running) + perf_event_disable(event); +} + +static int nmi_timer_cpu_notifier(struct notifier_block *b, unsigned long action, + void *data) +{ + int cpu = (unsigned long)data; + switch (action) { + case CPU_DOWN_FAILED: + case CPU_ONLINE: + nmi_timer_start_cpu(cpu); + break; + case CPU_DOWN_PREPARE: + nmi_timer_stop_cpu(cpu); + break; + } + return NOTIFY_DONE; +} + +static struct notifier_block nmi_timer_cpu_nb = { + .notifier_call = nmi_timer_cpu_notifier +}; + +static int nmi_timer_start(void) +{ + int cpu; + + get_online_cpus(); + ctr_running = 1; + for_each_online_cpu(cpu) + nmi_timer_start_cpu(cpu); + put_online_cpus(); + + return 0; +} + +static void nmi_timer_stop(void) +{ + int cpu; + + get_online_cpus(); + for_each_online_cpu(cpu) + nmi_timer_stop_cpu(cpu); + ctr_running = 0; + put_online_cpus(); +} + +static void nmi_timer_shutdown(void) +{ + struct perf_event *event; + int cpu; + + cpu_notifier_register_begin(); + __unregister_cpu_notifier(&nmi_timer_cpu_nb); + for_each_possible_cpu(cpu) { + event = per_cpu(nmi_timer_events, cpu); + if (!event) + continue; + perf_event_disable(event); + per_cpu(nmi_timer_events, cpu) = NULL; + perf_event_release_kernel(event); + } + + cpu_notifier_register_done(); +} + +static int nmi_timer_setup(void) +{ + int cpu, err; + u64 period; + + /* clock cycles per tick: */ + period = (u64)cpu_khz * 1000; + do_div(period, HZ); + nmi_timer_attr.sample_period = period; + + cpu_notifier_register_begin(); + err = __register_cpu_notifier(&nmi_timer_cpu_nb); + if (err) + goto out; + + /* can't attach events to offline cpus: */ + for_each_online_cpu(cpu) { + err = nmi_timer_start_cpu(cpu); + if (err) { + cpu_notifier_register_done(); + nmi_timer_shutdown(); + return err; + } + } + +out: + cpu_notifier_register_done(); + return err; +} + +int __init op_nmi_timer_init(struct oprofile_operations *ops) +{ + int err = 0; + + err = nmi_timer_setup(); + if (err) + return err; + nmi_timer_shutdown(); /* only check, don't alloc */ + + ops->create_files = NULL; + ops->setup = nmi_timer_setup; + ops->shutdown = nmi_timer_shutdown; + ops->start = nmi_timer_start; + ops->stop = nmi_timer_stop; + ops->cpu_type = "timer"; + + printk(KERN_INFO "oprofile: using NMI timer interrupt.\n"); + + return 0; +} + +#endif diff --git a/drivers/oprofile/oprof.c b/drivers/oprofile/oprof.c index 2c645170f06..ed2c3ec0702 100644 --- a/drivers/oprofile/oprof.c +++ b/drivers/oprofile/oprof.c @@ -12,18 +12,20 @@ #include <linux/init.h> #include <linux/oprofile.h> #include <linux/moduleparam.h> -#include <asm/mutex.h> +#include <linux/workqueue.h> +#include <linux/time.h> +#include <linux/mutex.h> #include "oprof.h" #include "event_buffer.h" #include "cpu_buffer.h" #include "buffer_sync.h" #include "oprofile_stats.h" - + struct oprofile_operations oprofile_ops; unsigned long oprofile_started; -unsigned long backtrace_depth; +unsigned long oprofile_backtrace_depth; static unsigned long is_setup; static DEFINE_MUTEX(start_mutex); @@ -36,7 +38,7 @@ static int timer = 0; int oprofile_setup(void) { int err; - + mutex_lock(&start_mutex); if ((err = alloc_cpu_buffers())) @@ -44,10 +46,10 @@ int oprofile_setup(void) if ((err = alloc_event_buffer())) goto out1; - + if (oprofile_ops.setup && (err = oprofile_ops.setup())) goto out2; - + /* Note even though this starts part of the * profiling overhead, it's necessary to prevent * us missing task deaths and eventually oopsing @@ -74,7 +76,7 @@ post_sync: is_setup = 1; mutex_unlock(&start_mutex); return 0; - + out3: if (oprofile_ops.shutdown) oprofile_ops.shutdown(); @@ -87,34 +89,99 @@ out: return err; } +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX + +static void switch_worker(struct work_struct *work); +static DECLARE_DELAYED_WORK(switch_work, switch_worker); + +static void start_switch_worker(void) +{ + if (oprofile_ops.switch_events) + schedule_delayed_work(&switch_work, oprofile_time_slice); +} + +static void stop_switch_worker(void) +{ + cancel_delayed_work_sync(&switch_work); +} + +static void switch_worker(struct work_struct *work) +{ + if (oprofile_ops.switch_events()) + return; + + atomic_inc(&oprofile_stats.multiplex_counter); + start_switch_worker(); +} + +/* User inputs in ms, converts to jiffies */ +int oprofile_set_timeout(unsigned long val_msec) +{ + int err = 0; + unsigned long time_slice; + + mutex_lock(&start_mutex); + + if (oprofile_started) { + err = -EBUSY; + goto out; + } + + if (!oprofile_ops.switch_events) { + err = -EINVAL; + goto out; + } + + time_slice = msecs_to_jiffies(val_msec); + if (time_slice == MAX_JIFFY_OFFSET) { + err = -EINVAL; + goto out; + } + + oprofile_time_slice = time_slice; + +out: + mutex_unlock(&start_mutex); + return err; + +} + +#else + +static inline void start_switch_worker(void) { } +static inline void stop_switch_worker(void) { } + +#endif /* Actually start profiling (echo 1>/dev/oprofile/enable) */ int oprofile_start(void) { int err = -EINVAL; - + mutex_lock(&start_mutex); - + if (!is_setup) goto out; - err = 0; - + err = 0; + if (oprofile_started) goto out; - + oprofile_reset_stats(); if ((err = oprofile_ops.start())) goto out; + start_switch_worker(); + oprofile_started = 1; out: mutex_unlock(&start_mutex); return err; } - + /* echo 0>/dev/oprofile/enable */ void oprofile_stop(void) { @@ -123,6 +190,9 @@ void oprofile_stop(void) goto out; oprofile_ops.stop(); oprofile_started = 0; + + stop_switch_worker(); + /* wake up the daemon to read what remains */ wake_up_buffer_waiter(); out: @@ -155,62 +225,62 @@ post_sync: mutex_unlock(&start_mutex); } - -int oprofile_set_backtrace(unsigned long val) +int oprofile_set_ulong(unsigned long *addr, unsigned long val) { - int err = 0; + int err = -EBUSY; mutex_lock(&start_mutex); - - if (oprofile_started) { - err = -EBUSY; - goto out; - } - - if (!oprofile_ops.backtrace) { - err = -EINVAL; - goto out; + if (!oprofile_started) { + *addr = val; + err = 0; } - - backtrace_depth = val; - -out: mutex_unlock(&start_mutex); + return err; } +static int timer_mode; + static int __init oprofile_init(void) { int err; + /* always init architecture to setup backtrace support */ + timer_mode = 0; err = oprofile_arch_init(&oprofile_ops); - - if (err < 0 || timer) { - printk(KERN_INFO "oprofile: using timer interrupt.\n"); - oprofile_timer_init(&oprofile_ops); + if (!err) { + if (!timer && !oprofilefs_register()) + return 0; + oprofile_arch_exit(); } - err = oprofilefs_register(); - if (err) - oprofile_arch_exit(); + /* setup timer mode: */ + timer_mode = 1; + /* no nmi timer mode if oprofile.timer is set */ + if (timer || op_nmi_timer_init(&oprofile_ops)) { + err = oprofile_timer_init(&oprofile_ops); + if (err) + return err; + } - return err; + return oprofilefs_register(); } static void __exit oprofile_exit(void) { oprofilefs_unregister(); - oprofile_arch_exit(); + if (!timer_mode) + oprofile_arch_exit(); } - + module_init(oprofile_init); module_exit(oprofile_exit); module_param_named(timer, timer, int, 0644); MODULE_PARM_DESC(timer, "force use of timer interrupt"); - + MODULE_LICENSE("GPL"); MODULE_AUTHOR("John Levon <levon@movementarian.org>"); MODULE_DESCRIPTION("OProfile system profiler"); diff --git a/drivers/oprofile/oprof.h b/drivers/oprofile/oprof.h index 18323650806..d5412060ab0 100644 --- a/drivers/oprofile/oprof.h +++ b/drivers/oprofile/oprof.h @@ -11,7 +11,7 @@ #define OPROF_H int oprofile_setup(void); -void oprofile_shutdown(void); +void oprofile_shutdown(void); int oprofilefs_register(void); void oprofilefs_unregister(void); @@ -20,20 +20,31 @@ int oprofile_start(void); void oprofile_stop(void); struct oprofile_operations; - -extern unsigned long fs_buffer_size; -extern unsigned long fs_cpu_buffer_size; -extern unsigned long fs_buffer_watershed; + +extern unsigned long oprofile_buffer_size; +extern unsigned long oprofile_cpu_buffer_size; +extern unsigned long oprofile_buffer_watershed; +extern unsigned long oprofile_time_slice; + extern struct oprofile_operations oprofile_ops; extern unsigned long oprofile_started; -extern unsigned long backtrace_depth; - -struct super_block; +extern unsigned long oprofile_backtrace_depth; + struct dentry; -void oprofile_create_files(struct super_block * sb, struct dentry * root); -void oprofile_timer_init(struct oprofile_operations * ops); +void oprofile_create_files(struct dentry *root); +int oprofile_timer_init(struct oprofile_operations *ops); +#ifdef CONFIG_OPROFILE_NMI_TIMER +int op_nmi_timer_init(struct oprofile_operations *ops); +#else +static inline int op_nmi_timer_init(struct oprofile_operations *ops) +{ + return -ENODEV; +} +#endif + + +int oprofile_set_ulong(unsigned long *addr, unsigned long val); +int oprofile_set_timeout(unsigned long time); -int oprofile_set_backtrace(unsigned long depth); - #endif /* OPROF_H */ diff --git a/drivers/oprofile/oprofile_files.c b/drivers/oprofile/oprofile_files.c index ef953ba5ab6..ee2cfce358b 100644 --- a/drivers/oprofile/oprofile_files.c +++ b/drivers/oprofile/oprofile_files.c @@ -9,22 +9,34 @@ #include <linux/fs.h> #include <linux/oprofile.h> +#include <linux/jiffies.h> #include "event_buffer.h" #include "oprofile_stats.h" #include "oprof.h" - -unsigned long fs_buffer_size = 131072; -unsigned long fs_cpu_buffer_size = 8192; -unsigned long fs_buffer_watershed = 32768; /* FIXME: tune */ -static ssize_t depth_read(struct file * file, char __user * buf, size_t count, loff_t * offset) +#define BUFFER_SIZE_DEFAULT 131072 +#define CPU_BUFFER_SIZE_DEFAULT 8192 +#define BUFFER_WATERSHED_DEFAULT 32768 /* FIXME: tune */ +#define TIME_SLICE_DEFAULT 1 + +unsigned long oprofile_buffer_size; +unsigned long oprofile_cpu_buffer_size; +unsigned long oprofile_buffer_watershed; +unsigned long oprofile_time_slice; + +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX + +static ssize_t timeout_read(struct file *file, char __user *buf, + size_t count, loff_t *offset) { - return oprofilefs_ulong_to_user(backtrace_depth, buf, count, offset); + return oprofilefs_ulong_to_user(jiffies_to_msecs(oprofile_time_slice), + buf, count, offset); } -static ssize_t depth_write(struct file * file, char const __user * buf, size_t count, loff_t * offset) +static ssize_t timeout_write(struct file *file, char const __user *buf, + size_t count, loff_t *offset) { unsigned long val; int retval; @@ -33,24 +45,64 @@ static ssize_t depth_write(struct file * file, char const __user * buf, size_t c return -EINVAL; retval = oprofilefs_ulong_from_user(&val, buf, count); + if (retval <= 0) + return retval; + + retval = oprofile_set_timeout(val); + if (retval) return retval; + return count; +} + + +static const struct file_operations timeout_fops = { + .read = timeout_read, + .write = timeout_write, + .llseek = default_llseek, +}; + +#endif + + +static ssize_t depth_read(struct file *file, char __user *buf, size_t count, loff_t *offset) +{ + return oprofilefs_ulong_to_user(oprofile_backtrace_depth, buf, count, + offset); +} + + +static ssize_t depth_write(struct file *file, char const __user *buf, size_t count, loff_t *offset) +{ + unsigned long val; + int retval; + + if (*offset) + return -EINVAL; - retval = oprofile_set_backtrace(val); + if (!oprofile_ops.backtrace) + return -EINVAL; + retval = oprofilefs_ulong_from_user(&val, buf, count); + if (retval <= 0) + return retval; + + retval = oprofile_set_ulong(&oprofile_backtrace_depth, val); if (retval) return retval; + return count; } static const struct file_operations depth_fops = { .read = depth_read, - .write = depth_write + .write = depth_write, + .llseek = default_llseek, }; - -static ssize_t pointer_size_read(struct file * file, char __user * buf, size_t count, loff_t * offset) + +static ssize_t pointer_size_read(struct file *file, char __user *buf, size_t count, loff_t *offset) { return oprofilefs_ulong_to_user(sizeof(void *), buf, count, offset); } @@ -58,27 +110,29 @@ static ssize_t pointer_size_read(struct file * file, char __user * buf, size_t c static const struct file_operations pointer_size_fops = { .read = pointer_size_read, + .llseek = default_llseek, }; -static ssize_t cpu_type_read(struct file * file, char __user * buf, size_t count, loff_t * offset) +static ssize_t cpu_type_read(struct file *file, char __user *buf, size_t count, loff_t *offset) { return oprofilefs_str_to_user(oprofile_ops.cpu_type, buf, count, offset); } - - + + static const struct file_operations cpu_type_fops = { .read = cpu_type_read, + .llseek = default_llseek, }; - - -static ssize_t enable_read(struct file * file, char __user * buf, size_t count, loff_t * offset) + + +static ssize_t enable_read(struct file *file, char __user *buf, size_t count, loff_t *offset) { return oprofilefs_ulong_to_user(oprofile_started, buf, count, offset); } -static ssize_t enable_write(struct file * file, char const __user * buf, size_t count, loff_t * offset) +static ssize_t enable_write(struct file *file, char const __user *buf, size_t count, loff_t *offset) { unsigned long val; int retval; @@ -87,9 +141,10 @@ static ssize_t enable_write(struct file * file, char const __user * buf, size_t return -EINVAL; retval = oprofilefs_ulong_from_user(&val, buf, count); - if (retval) + if (retval <= 0) return retval; - + + retval = 0; if (val) retval = oprofile_start(); else @@ -100,14 +155,15 @@ static ssize_t enable_write(struct file * file, char const __user * buf, size_t return count; } - + static const struct file_operations enable_fops = { .read = enable_read, .write = enable_write, + .llseek = default_llseek, }; -static ssize_t dump_write(struct file * file, char const __user * buf, size_t count, loff_t * offset) +static ssize_t dump_write(struct file *file, char const __user *buf, size_t count, loff_t *offset) { wake_up_buffer_waiter(); return count; @@ -116,20 +172,30 @@ static ssize_t dump_write(struct file * file, char const __user * buf, size_t co static const struct file_operations dump_fops = { .write = dump_write, + .llseek = noop_llseek, }; - -void oprofile_create_files(struct super_block * sb, struct dentry * root) + +void oprofile_create_files(struct dentry *root) { - oprofilefs_create_file(sb, root, "enable", &enable_fops); - oprofilefs_create_file_perm(sb, root, "dump", &dump_fops, 0666); - oprofilefs_create_file(sb, root, "buffer", &event_buffer_fops); - oprofilefs_create_ulong(sb, root, "buffer_size", &fs_buffer_size); - oprofilefs_create_ulong(sb, root, "buffer_watershed", &fs_buffer_watershed); - oprofilefs_create_ulong(sb, root, "cpu_buffer_size", &fs_cpu_buffer_size); - oprofilefs_create_file(sb, root, "cpu_type", &cpu_type_fops); - oprofilefs_create_file(sb, root, "backtrace_depth", &depth_fops); - oprofilefs_create_file(sb, root, "pointer_size", &pointer_size_fops); - oprofile_create_stats_files(sb, root); + /* reinitialize default values */ + oprofile_buffer_size = BUFFER_SIZE_DEFAULT; + oprofile_cpu_buffer_size = CPU_BUFFER_SIZE_DEFAULT; + oprofile_buffer_watershed = BUFFER_WATERSHED_DEFAULT; + oprofile_time_slice = msecs_to_jiffies(TIME_SLICE_DEFAULT); + + oprofilefs_create_file(root, "enable", &enable_fops); + oprofilefs_create_file_perm(root, "dump", &dump_fops, 0666); + oprofilefs_create_file(root, "buffer", &event_buffer_fops); + oprofilefs_create_ulong(root, "buffer_size", &oprofile_buffer_size); + oprofilefs_create_ulong(root, "buffer_watershed", &oprofile_buffer_watershed); + oprofilefs_create_ulong(root, "cpu_buffer_size", &oprofile_cpu_buffer_size); + oprofilefs_create_file(root, "cpu_type", &cpu_type_fops); + oprofilefs_create_file(root, "backtrace_depth", &depth_fops); + oprofilefs_create_file(root, "pointer_size", &pointer_size_fops); +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX + oprofilefs_create_file(root, "time_slice", &timeout_fops); +#endif + oprofile_create_stats_files(root); if (oprofile_ops.create_files) - oprofile_ops.create_files(sb, root); + oprofile_ops.create_files(root); } diff --git a/drivers/oprofile/oprofile_perf.c b/drivers/oprofile/oprofile_perf.c new file mode 100644 index 00000000000..d5b2732b1b8 --- /dev/null +++ b/drivers/oprofile/oprofile_perf.c @@ -0,0 +1,327 @@ +/* + * Copyright 2010 ARM Ltd. + * Copyright 2012 Advanced Micro Devices, Inc., Robert Richter + * + * Perf-events backend for OProfile. + */ +#include <linux/perf_event.h> +#include <linux/platform_device.h> +#include <linux/oprofile.h> +#include <linux/slab.h> + +/* + * Per performance monitor configuration as set via oprofilefs. + */ +struct op_counter_config { + unsigned long count; + unsigned long enabled; + unsigned long event; + unsigned long unit_mask; + unsigned long kernel; + unsigned long user; + struct perf_event_attr attr; +}; + +static int oprofile_perf_enabled; +static DEFINE_MUTEX(oprofile_perf_mutex); + +static struct op_counter_config *counter_config; +static DEFINE_PER_CPU(struct perf_event **, perf_events); +static int num_counters; + +/* + * Overflow callback for oprofile. + */ +static void op_overflow_handler(struct perf_event *event, + struct perf_sample_data *data, struct pt_regs *regs) +{ + int id; + u32 cpu = smp_processor_id(); + + for (id = 0; id < num_counters; ++id) + if (per_cpu(perf_events, cpu)[id] == event) + break; + + if (id != num_counters) + oprofile_add_sample(regs, id); + else + pr_warning("oprofile: ignoring spurious overflow " + "on cpu %u\n", cpu); +} + +/* + * Called by oprofile_perf_setup to create perf attributes to mirror the oprofile + * settings in counter_config. Attributes are created as `pinned' events and + * so are permanently scheduled on the PMU. + */ +static void op_perf_setup(void) +{ + int i; + u32 size = sizeof(struct perf_event_attr); + struct perf_event_attr *attr; + + for (i = 0; i < num_counters; ++i) { + attr = &counter_config[i].attr; + memset(attr, 0, size); + attr->type = PERF_TYPE_RAW; + attr->size = size; + attr->config = counter_config[i].event; + attr->sample_period = counter_config[i].count; + attr->pinned = 1; + } +} + +static int op_create_counter(int cpu, int event) +{ + struct perf_event *pevent; + + if (!counter_config[event].enabled || per_cpu(perf_events, cpu)[event]) + return 0; + + pevent = perf_event_create_kernel_counter(&counter_config[event].attr, + cpu, NULL, + op_overflow_handler, NULL); + + if (IS_ERR(pevent)) + return PTR_ERR(pevent); + + if (pevent->state != PERF_EVENT_STATE_ACTIVE) { + perf_event_release_kernel(pevent); + pr_warning("oprofile: failed to enable event %d " + "on CPU %d\n", event, cpu); + return -EBUSY; + } + + per_cpu(perf_events, cpu)[event] = pevent; + + return 0; +} + +static void op_destroy_counter(int cpu, int event) +{ + struct perf_event *pevent = per_cpu(perf_events, cpu)[event]; + + if (pevent) { + perf_event_release_kernel(pevent); + per_cpu(perf_events, cpu)[event] = NULL; + } +} + +/* + * Called by oprofile_perf_start to create active perf events based on the + * perviously configured attributes. + */ +static int op_perf_start(void) +{ + int cpu, event, ret = 0; + + for_each_online_cpu(cpu) { + for (event = 0; event < num_counters; ++event) { + ret = op_create_counter(cpu, event); + if (ret) + return ret; + } + } + + return ret; +} + +/* + * Called by oprofile_perf_stop at the end of a profiling run. + */ +static void op_perf_stop(void) +{ + int cpu, event; + + for_each_online_cpu(cpu) + for (event = 0; event < num_counters; ++event) + op_destroy_counter(cpu, event); +} + +static int oprofile_perf_create_files(struct dentry *root) +{ + unsigned int i; + + for (i = 0; i < num_counters; i++) { + struct dentry *dir; + char buf[4]; + + snprintf(buf, sizeof buf, "%d", i); + dir = oprofilefs_mkdir(root, buf); + oprofilefs_create_ulong(dir, "enabled", &counter_config[i].enabled); + oprofilefs_create_ulong(dir, "event", &counter_config[i].event); + oprofilefs_create_ulong(dir, "count", &counter_config[i].count); + oprofilefs_create_ulong(dir, "unit_mask", &counter_config[i].unit_mask); + oprofilefs_create_ulong(dir, "kernel", &counter_config[i].kernel); + oprofilefs_create_ulong(dir, "user", &counter_config[i].user); + } + + return 0; +} + +static int oprofile_perf_setup(void) +{ + raw_spin_lock(&oprofilefs_lock); + op_perf_setup(); + raw_spin_unlock(&oprofilefs_lock); + return 0; +} + +static int oprofile_perf_start(void) +{ + int ret = -EBUSY; + + mutex_lock(&oprofile_perf_mutex); + if (!oprofile_perf_enabled) { + ret = 0; + op_perf_start(); + oprofile_perf_enabled = 1; + } + mutex_unlock(&oprofile_perf_mutex); + return ret; +} + +static void oprofile_perf_stop(void) +{ + mutex_lock(&oprofile_perf_mutex); + if (oprofile_perf_enabled) + op_perf_stop(); + oprofile_perf_enabled = 0; + mutex_unlock(&oprofile_perf_mutex); +} + +#ifdef CONFIG_PM + +static int oprofile_perf_suspend(struct platform_device *dev, pm_message_t state) +{ + mutex_lock(&oprofile_perf_mutex); + if (oprofile_perf_enabled) + op_perf_stop(); + mutex_unlock(&oprofile_perf_mutex); + return 0; +} + +static int oprofile_perf_resume(struct platform_device *dev) +{ + mutex_lock(&oprofile_perf_mutex); + if (oprofile_perf_enabled && op_perf_start()) + oprofile_perf_enabled = 0; + mutex_unlock(&oprofile_perf_mutex); + return 0; +} + +static struct platform_driver oprofile_driver = { + .driver = { + .name = "oprofile-perf", + }, + .resume = oprofile_perf_resume, + .suspend = oprofile_perf_suspend, +}; + +static struct platform_device *oprofile_pdev; + +static int __init init_driverfs(void) +{ + int ret; + + ret = platform_driver_register(&oprofile_driver); + if (ret) + return ret; + + oprofile_pdev = platform_device_register_simple( + oprofile_driver.driver.name, 0, NULL, 0); + if (IS_ERR(oprofile_pdev)) { + ret = PTR_ERR(oprofile_pdev); + platform_driver_unregister(&oprofile_driver); + } + + return ret; +} + +static void exit_driverfs(void) +{ + platform_device_unregister(oprofile_pdev); + platform_driver_unregister(&oprofile_driver); +} + +#else + +static inline int init_driverfs(void) { return 0; } +static inline void exit_driverfs(void) { } + +#endif /* CONFIG_PM */ + +void oprofile_perf_exit(void) +{ + int cpu, id; + struct perf_event *event; + + for_each_possible_cpu(cpu) { + for (id = 0; id < num_counters; ++id) { + event = per_cpu(perf_events, cpu)[id]; + if (event) + perf_event_release_kernel(event); + } + + kfree(per_cpu(perf_events, cpu)); + } + + kfree(counter_config); + exit_driverfs(); +} + +int __init oprofile_perf_init(struct oprofile_operations *ops) +{ + int cpu, ret = 0; + + ret = init_driverfs(); + if (ret) + return ret; + + num_counters = perf_num_counters(); + if (num_counters <= 0) { + pr_info("oprofile: no performance counters\n"); + ret = -ENODEV; + goto out; + } + + counter_config = kcalloc(num_counters, + sizeof(struct op_counter_config), GFP_KERNEL); + + if (!counter_config) { + pr_info("oprofile: failed to allocate %d " + "counters\n", num_counters); + ret = -ENOMEM; + num_counters = 0; + goto out; + } + + for_each_possible_cpu(cpu) { + per_cpu(perf_events, cpu) = kcalloc(num_counters, + sizeof(struct perf_event *), GFP_KERNEL); + if (!per_cpu(perf_events, cpu)) { + pr_info("oprofile: failed to allocate %d perf events " + "for cpu %d\n", num_counters, cpu); + ret = -ENOMEM; + goto out; + } + } + + ops->create_files = oprofile_perf_create_files; + ops->setup = oprofile_perf_setup; + ops->start = oprofile_perf_start; + ops->stop = oprofile_perf_stop; + ops->shutdown = oprofile_perf_stop; + ops->cpu_type = op_name_from_perf_id(); + + if (!ops->cpu_type) + ret = -ENODEV; + else + pr_info("oprofile: using %s\n", ops->cpu_type); + +out: + if (ret) + oprofile_perf_exit(); + + return ret; +} diff --git a/drivers/oprofile/oprofile_stats.c b/drivers/oprofile/oprofile_stats.c index f99b28e7b79..59659cea458 100644 --- a/drivers/oprofile/oprofile_stats.c +++ b/drivers/oprofile/oprofile_stats.c @@ -11,68 +11,74 @@ #include <linux/smp.h> #include <linux/cpumask.h> #include <linux/threads.h> - + #include "oprofile_stats.h" #include "cpu_buffer.h" - + struct oprofile_stat_struct oprofile_stats; - + void oprofile_reset_stats(void) { - struct oprofile_cpu_buffer * cpu_buf; + struct oprofile_cpu_buffer *cpu_buf; int i; - + for_each_possible_cpu(i) { - cpu_buf = &per_cpu(cpu_buffer, i); + cpu_buf = &per_cpu(op_cpu_buffer, i); cpu_buf->sample_received = 0; cpu_buf->sample_lost_overflow = 0; cpu_buf->backtrace_aborted = 0; cpu_buf->sample_invalid_eip = 0; } - + atomic_set(&oprofile_stats.sample_lost_no_mm, 0); atomic_set(&oprofile_stats.sample_lost_no_mapping, 0); atomic_set(&oprofile_stats.event_lost_overflow, 0); + atomic_set(&oprofile_stats.bt_lost_no_mapping, 0); + atomic_set(&oprofile_stats.multiplex_counter, 0); } -void oprofile_create_stats_files(struct super_block * sb, struct dentry * root) +void oprofile_create_stats_files(struct dentry *root) { - struct oprofile_cpu_buffer * cpu_buf; - struct dentry * cpudir; - struct dentry * dir; + struct oprofile_cpu_buffer *cpu_buf; + struct dentry *cpudir; + struct dentry *dir; char buf[10]; int i; - dir = oprofilefs_mkdir(sb, root, "stats"); + dir = oprofilefs_mkdir(root, "stats"); if (!dir) return; for_each_possible_cpu(i) { - cpu_buf = &per_cpu(cpu_buffer, i); + cpu_buf = &per_cpu(op_cpu_buffer, i); snprintf(buf, 10, "cpu%d", i); - cpudir = oprofilefs_mkdir(sb, dir, buf); - + cpudir = oprofilefs_mkdir(dir, buf); + /* Strictly speaking access to these ulongs is racy, * but we can't simply lock them, and they are * informational only. */ - oprofilefs_create_ro_ulong(sb, cpudir, "sample_received", + oprofilefs_create_ro_ulong(cpudir, "sample_received", &cpu_buf->sample_received); - oprofilefs_create_ro_ulong(sb, cpudir, "sample_lost_overflow", + oprofilefs_create_ro_ulong(cpudir, "sample_lost_overflow", &cpu_buf->sample_lost_overflow); - oprofilefs_create_ro_ulong(sb, cpudir, "backtrace_aborted", + oprofilefs_create_ro_ulong(cpudir, "backtrace_aborted", &cpu_buf->backtrace_aborted); - oprofilefs_create_ro_ulong(sb, cpudir, "sample_invalid_eip", + oprofilefs_create_ro_ulong(cpudir, "sample_invalid_eip", &cpu_buf->sample_invalid_eip); } - - oprofilefs_create_ro_atomic(sb, dir, "sample_lost_no_mm", + + oprofilefs_create_ro_atomic(dir, "sample_lost_no_mm", &oprofile_stats.sample_lost_no_mm); - oprofilefs_create_ro_atomic(sb, dir, "sample_lost_no_mapping", + oprofilefs_create_ro_atomic(dir, "sample_lost_no_mapping", &oprofile_stats.sample_lost_no_mapping); - oprofilefs_create_ro_atomic(sb, dir, "event_lost_overflow", + oprofilefs_create_ro_atomic(dir, "event_lost_overflow", &oprofile_stats.event_lost_overflow); - oprofilefs_create_ro_atomic(sb, dir, "bt_lost_no_mapping", + oprofilefs_create_ro_atomic(dir, "bt_lost_no_mapping", &oprofile_stats.bt_lost_no_mapping); +#ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX + oprofilefs_create_ro_atomic(dir, "multiplex_counter", + &oprofile_stats.multiplex_counter); +#endif } diff --git a/drivers/oprofile/oprofile_stats.h b/drivers/oprofile/oprofile_stats.h index 6d755a633f1..1fc622bd183 100644 --- a/drivers/oprofile/oprofile_stats.h +++ b/drivers/oprofile/oprofile_stats.h @@ -10,24 +10,24 @@ #ifndef OPROFILE_STATS_H #define OPROFILE_STATS_H -#include <asm/atomic.h> - +#include <linux/atomic.h> + struct oprofile_stat_struct { atomic_t sample_lost_no_mm; atomic_t sample_lost_no_mapping; atomic_t bt_lost_no_mapping; atomic_t event_lost_overflow; + atomic_t multiplex_counter; }; extern struct oprofile_stat_struct oprofile_stats; - + /* reset all stats to zero */ void oprofile_reset_stats(void); - -struct super_block; + struct dentry; - + /* create the stats/ dir */ -void oprofile_create_stats_files(struct super_block * sb, struct dentry * root); +void oprofile_create_stats_files(struct dentry *root); #endif /* OPROFILE_STATS_H */ diff --git a/drivers/oprofile/oprofilefs.c b/drivers/oprofile/oprofilefs.c index 8543cb26cf3..3f493459378 100644 --- a/drivers/oprofile/oprofilefs.c +++ b/drivers/oprofile/oprofilefs.c @@ -21,30 +21,28 @@ #define OPROFILEFS_MAGIC 0x6f70726f -DEFINE_SPINLOCK(oprofilefs_lock); +DEFINE_RAW_SPINLOCK(oprofilefs_lock); -static struct inode * oprofilefs_get_inode(struct super_block * sb, int mode) +static struct inode *oprofilefs_get_inode(struct super_block *sb, int mode) { - struct inode * inode = new_inode(sb); + struct inode *inode = new_inode(sb); if (inode) { + inode->i_ino = get_next_ino(); inode->i_mode = mode; - inode->i_uid = 0; - inode->i_gid = 0; - inode->i_blocks = 0; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; } return inode; } -static struct super_operations s_ops = { +static const struct super_operations s_ops = { .statfs = simple_statfs, .drop_inode = generic_delete_inode, }; -ssize_t oprofilefs_str_to_user(char const * str, char __user * buf, size_t count, loff_t * offset) +ssize_t oprofilefs_str_to_user(char const *str, char __user *buf, size_t count, loff_t *offset) { return simple_read_from_buffer(buf, count, offset, str, strlen(str)); } @@ -52,7 +50,7 @@ ssize_t oprofilefs_str_to_user(char const * str, char __user * buf, size_t count #define TMPBUFSIZE 50 -ssize_t oprofilefs_ulong_to_user(unsigned long val, char __user * buf, size_t count, loff_t * offset) +ssize_t oprofilefs_ulong_to_user(unsigned long val, char __user *buf, size_t count, loff_t *offset) { char tmpbuf[TMPBUFSIZE]; size_t maxlen = snprintf(tmpbuf, TMPBUFSIZE, "%lu\n", val); @@ -62,7 +60,14 @@ ssize_t oprofilefs_ulong_to_user(unsigned long val, char __user * buf, size_t co } -int oprofilefs_ulong_from_user(unsigned long * val, char const __user * buf, size_t count) +/* + * Note: If oprofilefs_ulong_from_user() returns 0, then *val remains + * unchanged and might be uninitialized. This follows write syscall + * implementation when count is zero: "If count is zero ... [and if] + * no errors are detected, 0 will be returned without causing any + * other effect." (man 2 write) + */ +int oprofilefs_ulong_from_user(unsigned long *val, char const __user *buf, size_t count) { char tmpbuf[TMPBUFSIZE]; unsigned long flags; @@ -78,173 +83,161 @@ int oprofilefs_ulong_from_user(unsigned long * val, char const __user * buf, siz if (copy_from_user(tmpbuf, buf, count)) return -EFAULT; - spin_lock_irqsave(&oprofilefs_lock, flags); + raw_spin_lock_irqsave(&oprofilefs_lock, flags); *val = simple_strtoul(tmpbuf, NULL, 0); - spin_unlock_irqrestore(&oprofilefs_lock, flags); - return 0; + raw_spin_unlock_irqrestore(&oprofilefs_lock, flags); + return count; } -static ssize_t ulong_read_file(struct file * file, char __user * buf, size_t count, loff_t * offset) +static ssize_t ulong_read_file(struct file *file, char __user *buf, size_t count, loff_t *offset) { - unsigned long * val = file->private_data; + unsigned long *val = file->private_data; return oprofilefs_ulong_to_user(*val, buf, count, offset); } -static ssize_t ulong_write_file(struct file * file, char const __user * buf, size_t count, loff_t * offset) +static ssize_t ulong_write_file(struct file *file, char const __user *buf, size_t count, loff_t *offset) { - unsigned long * value = file->private_data; + unsigned long value; int retval; if (*offset) return -EINVAL; - retval = oprofilefs_ulong_from_user(value, buf, count); + retval = oprofilefs_ulong_from_user(&value, buf, count); + if (retval <= 0) + return retval; + retval = oprofile_set_ulong(file->private_data, value); if (retval) return retval; - return count; -} - -static int default_open(struct inode * inode, struct file * filp) -{ - if (inode->i_private) - filp->private_data = inode->i_private; - return 0; + return count; } static const struct file_operations ulong_fops = { .read = ulong_read_file, .write = ulong_write_file, - .open = default_open, + .open = simple_open, + .llseek = default_llseek, }; static const struct file_operations ulong_ro_fops = { .read = ulong_read_file, - .open = default_open, + .open = simple_open, + .llseek = default_llseek, }; -static struct dentry * __oprofilefs_create_file(struct super_block * sb, - struct dentry * root, char const * name, const struct file_operations * fops, - int perm) +static int __oprofilefs_create_file(struct dentry *root, char const *name, + const struct file_operations *fops, int perm, void *priv) { - struct dentry * dentry; - struct inode * inode; + struct dentry *dentry; + struct inode *inode; + mutex_lock(&root->d_inode->i_mutex); dentry = d_alloc_name(root, name); - if (!dentry) - return NULL; - inode = oprofilefs_get_inode(sb, S_IFREG | perm); + if (!dentry) { + mutex_unlock(&root->d_inode->i_mutex); + return -ENOMEM; + } + inode = oprofilefs_get_inode(root->d_sb, S_IFREG | perm); if (!inode) { dput(dentry); - return NULL; + mutex_unlock(&root->d_inode->i_mutex); + return -ENOMEM; } inode->i_fop = fops; + inode->i_private = priv; d_add(dentry, inode); - return dentry; + mutex_unlock(&root->d_inode->i_mutex); + return 0; } -int oprofilefs_create_ulong(struct super_block * sb, struct dentry * root, - char const * name, unsigned long * val) +int oprofilefs_create_ulong(struct dentry *root, + char const *name, unsigned long *val) { - struct dentry * d = __oprofilefs_create_file(sb, root, name, - &ulong_fops, 0644); - if (!d) - return -EFAULT; - - d->d_inode->i_private = val; - return 0; + return __oprofilefs_create_file(root, name, + &ulong_fops, 0644, val); } -int oprofilefs_create_ro_ulong(struct super_block * sb, struct dentry * root, - char const * name, unsigned long * val) +int oprofilefs_create_ro_ulong(struct dentry *root, + char const *name, unsigned long *val) { - struct dentry * d = __oprofilefs_create_file(sb, root, name, - &ulong_ro_fops, 0444); - if (!d) - return -EFAULT; - - d->d_inode->i_private = val; - return 0; + return __oprofilefs_create_file(root, name, + &ulong_ro_fops, 0444, val); } -static ssize_t atomic_read_file(struct file * file, char __user * buf, size_t count, loff_t * offset) +static ssize_t atomic_read_file(struct file *file, char __user *buf, size_t count, loff_t *offset) { - atomic_t * val = file->private_data; + atomic_t *val = file->private_data; return oprofilefs_ulong_to_user(atomic_read(val), buf, count, offset); } - + static const struct file_operations atomic_ro_fops = { .read = atomic_read_file, - .open = default_open, + .open = simple_open, + .llseek = default_llseek, }; - -int oprofilefs_create_ro_atomic(struct super_block * sb, struct dentry * root, - char const * name, atomic_t * val) -{ - struct dentry * d = __oprofilefs_create_file(sb, root, name, - &atomic_ro_fops, 0444); - if (!d) - return -EFAULT; - d->d_inode->i_private = val; - return 0; +int oprofilefs_create_ro_atomic(struct dentry *root, + char const *name, atomic_t *val) +{ + return __oprofilefs_create_file(root, name, + &atomic_ro_fops, 0444, val); } - -int oprofilefs_create_file(struct super_block * sb, struct dentry * root, - char const * name, const struct file_operations * fops) + +int oprofilefs_create_file(struct dentry *root, + char const *name, const struct file_operations *fops) { - if (!__oprofilefs_create_file(sb, root, name, fops, 0644)) - return -EFAULT; - return 0; + return __oprofilefs_create_file(root, name, fops, 0644, NULL); } -int oprofilefs_create_file_perm(struct super_block * sb, struct dentry * root, - char const * name, const struct file_operations * fops, int perm) +int oprofilefs_create_file_perm(struct dentry *root, + char const *name, const struct file_operations *fops, int perm) { - if (!__oprofilefs_create_file(sb, root, name, fops, perm)) - return -EFAULT; - return 0; + return __oprofilefs_create_file(root, name, fops, perm, NULL); } -struct dentry * oprofilefs_mkdir(struct super_block * sb, - struct dentry * root, char const * name) +struct dentry *oprofilefs_mkdir(struct dentry *parent, char const *name) { - struct dentry * dentry; - struct inode * inode; + struct dentry *dentry; + struct inode *inode; - dentry = d_alloc_name(root, name); - if (!dentry) + mutex_lock(&parent->d_inode->i_mutex); + dentry = d_alloc_name(parent, name); + if (!dentry) { + mutex_unlock(&parent->d_inode->i_mutex); return NULL; - inode = oprofilefs_get_inode(sb, S_IFDIR | 0755); + } + inode = oprofilefs_get_inode(parent->d_sb, S_IFDIR | 0755); if (!inode) { dput(dentry); + mutex_unlock(&parent->d_inode->i_mutex); return NULL; } inode->i_op = &simple_dir_inode_operations; inode->i_fop = &simple_dir_operations; d_add(dentry, inode); + mutex_unlock(&parent->d_inode->i_mutex); return dentry; } -static int oprofilefs_fill_super(struct super_block * sb, void * data, int silent) +static int oprofilefs_fill_super(struct super_block *sb, void *data, int silent) { - struct inode * root_inode; - struct dentry * root_dentry; + struct inode *root_inode; sb->s_blocksize = PAGE_CACHE_SIZE; sb->s_blocksize_bits = PAGE_CACHE_SHIFT; @@ -257,34 +250,31 @@ static int oprofilefs_fill_super(struct super_block * sb, void * data, int silen return -ENOMEM; root_inode->i_op = &simple_dir_inode_operations; root_inode->i_fop = &simple_dir_operations; - root_dentry = d_alloc_root(root_inode); - if (!root_dentry) { - iput(root_inode); + sb->s_root = d_make_root(root_inode); + if (!sb->s_root) return -ENOMEM; - } - - sb->s_root = root_dentry; - oprofile_create_files(sb, root_dentry); + oprofile_create_files(sb->s_root); // FIXME: verify kill_litter_super removes our dentries return 0; } -static int oprofilefs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, struct vfsmount *mnt) +static struct dentry *oprofilefs_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) { - return get_sb_single(fs_type, flags, data, oprofilefs_fill_super, mnt); + return mount_single(fs_type, flags, data, oprofilefs_fill_super); } static struct file_system_type oprofilefs_type = { .owner = THIS_MODULE, .name = "oprofilefs", - .get_sb = oprofilefs_get_sb, + .mount = oprofilefs_mount, .kill_sb = kill_litter_super, }; +MODULE_ALIAS_FS("oprofilefs"); int __init oprofilefs_register(void) diff --git a/drivers/oprofile/timer_int.c b/drivers/oprofile/timer_int.c index 710a45f0d73..61be1d9c16c 100644 --- a/drivers/oprofile/timer_int.c +++ b/drivers/oprofile/timer_int.c @@ -13,34 +13,108 @@ #include <linux/oprofile.h> #include <linux/profile.h> #include <linux/init.h> +#include <linux/cpu.h> +#include <linux/hrtimer.h> +#include <asm/irq_regs.h> #include <asm/ptrace.h> #include "oprof.h" -static int timer_notify(struct pt_regs *regs) +static DEFINE_PER_CPU(struct hrtimer, oprofile_hrtimer); +static int ctr_running; + +static enum hrtimer_restart oprofile_hrtimer_notify(struct hrtimer *hrtimer) +{ + oprofile_add_sample(get_irq_regs(), 0); + hrtimer_forward_now(hrtimer, ns_to_ktime(TICK_NSEC)); + return HRTIMER_RESTART; +} + +static void __oprofile_hrtimer_start(void *unused) +{ + struct hrtimer *hrtimer = &__get_cpu_var(oprofile_hrtimer); + + if (!ctr_running) + return; + + hrtimer_init(hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + hrtimer->function = oprofile_hrtimer_notify; + + hrtimer_start(hrtimer, ns_to_ktime(TICK_NSEC), + HRTIMER_MODE_REL_PINNED); +} + +static int oprofile_hrtimer_start(void) { - oprofile_add_sample(regs, 0); + get_online_cpus(); + ctr_running = 1; + on_each_cpu(__oprofile_hrtimer_start, NULL, 1); + put_online_cpus(); return 0; } -static int timer_start(void) +static void __oprofile_hrtimer_stop(int cpu) { - return register_timer_hook(timer_notify); + struct hrtimer *hrtimer = &per_cpu(oprofile_hrtimer, cpu); + + if (!ctr_running) + return; + + hrtimer_cancel(hrtimer); } +static void oprofile_hrtimer_stop(void) +{ + int cpu; + + get_online_cpus(); + for_each_online_cpu(cpu) + __oprofile_hrtimer_stop(cpu); + ctr_running = 0; + put_online_cpus(); +} -static void timer_stop(void) +static int oprofile_cpu_notify(struct notifier_block *self, + unsigned long action, void *hcpu) { - unregister_timer_hook(timer_notify); + long cpu = (long) hcpu; + + switch (action) { + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + smp_call_function_single(cpu, __oprofile_hrtimer_start, + NULL, 1); + break; + case CPU_DEAD: + case CPU_DEAD_FROZEN: + __oprofile_hrtimer_stop(cpu); + break; + } + return NOTIFY_OK; } +static struct notifier_block __refdata oprofile_cpu_notifier = { + .notifier_call = oprofile_cpu_notify, +}; -void __init oprofile_timer_init(struct oprofile_operations * ops) +static int oprofile_hrtimer_setup(void) { - ops->create_files = NULL; - ops->setup = NULL; - ops->shutdown = NULL; - ops->start = timer_start; - ops->stop = timer_stop; - ops->cpu_type = "timer"; + return register_hotcpu_notifier(&oprofile_cpu_notifier); +} + +static void oprofile_hrtimer_shutdown(void) +{ + unregister_hotcpu_notifier(&oprofile_cpu_notifier); +} + +int oprofile_timer_init(struct oprofile_operations *ops) +{ + ops->create_files = NULL; + ops->setup = oprofile_hrtimer_setup; + ops->shutdown = oprofile_hrtimer_shutdown; + ops->start = oprofile_hrtimer_start; + ops->stop = oprofile_hrtimer_stop; + ops->cpu_type = "timer"; + printk(KERN_INFO "oprofile: using timer interrupt.\n"); + return 0; } |
