diff options
Diffstat (limited to 'drivers/oprofile/buffer_sync.c')
| -rw-r--r-- | drivers/oprofile/buffer_sync.c | 338 |
1 files changed, 186 insertions, 152 deletions
diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c index b07ba2a1411..d93b2b6b1f7 100644 --- a/drivers/oprofile/buffer_sync.c +++ b/drivers/oprofile/buffer_sync.c @@ -1,10 +1,12 @@ /** * @file buffer_sync.c * - * @remark Copyright 2002 OProfile authors + * @remark Copyright 2002-2009 OProfile authors * @remark Read the file COPYING * * @author John Levon <levon@movementarian.org> + * @author Barry Kasindorf + * @author Robert Richter <robert.richter@amd.com> * * This is the core of the buffer management. Each * CPU buffer is processed and entered into the @@ -28,19 +30,19 @@ #include <linux/fs.h> #include <linux/oprofile.h> #include <linux/sched.h> +#include <linux/gfp.h> #include "oprofile_stats.h" #include "event_buffer.h" #include "cpu_buffer.h" #include "buffer_sync.h" - + static LIST_HEAD(dying_tasks); static LIST_HEAD(dead_tasks); -static cpumask_t marked_cpus = CPU_MASK_NONE; +static cpumask_var_t marked_cpus; static DEFINE_SPINLOCK(task_mortuary); static void process_task_mortuary(void); - /* Take ownership of the task struct and place it on the * list for processing. Only after two full buffer syncs * does the task eventually get freed, because by then @@ -48,10 +50,11 @@ static void process_task_mortuary(void); * Can be invoked from softirq via RCU callback due to * call_rcu() of the task struct, hence the _irqsave. */ -static int task_free_notify(struct notifier_block * self, unsigned long val, void * data) +static int +task_free_notify(struct notifier_block *self, unsigned long val, void *data) { unsigned long flags; - struct task_struct * task = data; + struct task_struct *task = data; spin_lock_irqsave(&task_mortuary, flags); list_add(&task->tasks, &dying_tasks); spin_unlock_irqrestore(&task_mortuary, flags); @@ -62,13 +65,14 @@ static int task_free_notify(struct notifier_block * self, unsigned long val, voi /* The task is on its way out. A sync of the buffer means we can catch * any remaining samples for this task. */ -static int task_exit_notify(struct notifier_block * self, unsigned long val, void * data) +static int +task_exit_notify(struct notifier_block *self, unsigned long val, void *data) { /* To avoid latency problems, we only process the current CPU, * hoping that most samples for the task are on this CPU */ sync_buffer(raw_smp_processor_id()); - return 0; + return 0; } @@ -77,11 +81,12 @@ static int task_exit_notify(struct notifier_block * self, unsigned long val, voi * we don't lose any. This does not have to be exact, it's a QoI issue * only. */ -static int munmap_notify(struct notifier_block * self, unsigned long val, void * data) +static int +munmap_notify(struct notifier_block *self, unsigned long val, void *data) { unsigned long addr = (unsigned long)data; - struct mm_struct * mm = current->mm; - struct vm_area_struct * mpnt; + struct mm_struct *mm = current->mm; + struct vm_area_struct *mpnt; down_read(&mm->mmap_sem); @@ -99,11 +104,12 @@ static int munmap_notify(struct notifier_block * self, unsigned long val, void * return 0; } - + /* We need to be told about new modules so we don't attribute to a previously * loaded module, or drop the samples on the floor. */ -static int module_load_notify(struct notifier_block * self, unsigned long val, void * data) +static int +module_load_notify(struct notifier_block *self, unsigned long val, void *data) { #ifdef CONFIG_MODULES if (val != MODULE_STATE_COMING) @@ -118,7 +124,7 @@ static int module_load_notify(struct notifier_block * self, unsigned long val, v return 0; } - + static struct notifier_block task_free_nb = { .notifier_call = task_free_notify, }; @@ -135,21 +141,19 @@ static struct notifier_block module_load_nb = { .notifier_call = module_load_notify, }; - -static void end_sync(void) +static void free_all_tasks(void) { - end_cpu_work(); /* make sure we don't leak task structs */ process_task_mortuary(); process_task_mortuary(); } - int sync_start(void) { int err; - start_cpu_work(); + if (!zalloc_cpumask_var(&marked_cpus, GFP_KERNEL)) + return -ENOMEM; err = task_handoff_register(&task_free_nb); if (err) @@ -164,6 +168,8 @@ int sync_start(void) if (err) goto out4; + start_cpu_work(); + out: return err; out4: @@ -172,19 +178,26 @@ out3: profile_event_unregister(PROFILE_TASK_EXIT, &task_exit_nb); out2: task_handoff_unregister(&task_free_nb); + free_all_tasks(); out1: - end_sync(); + free_cpumask_var(marked_cpus); goto out; } void sync_stop(void) { + end_cpu_work(); unregister_module_notifier(&module_load_nb); profile_event_unregister(PROFILE_MUNMAP, &munmap_nb); profile_event_unregister(PROFILE_TASK_EXIT, &task_exit_nb); task_handoff_unregister(&task_free_nb); - end_sync(); + barrier(); /* do all of the above first */ + + flush_cpu_work(); + + free_all_tasks(); + free_cpumask_var(marked_cpus); } @@ -196,36 +209,25 @@ static inline unsigned long fast_get_dcookie(struct path *path) { unsigned long cookie; - if (path->dentry->d_cookie) + if (path->dentry->d_flags & DCACHE_COOKIE) return (unsigned long)path->dentry; get_dcookie(path, &cookie); return cookie; } -/* Look up the dcookie for the task's first VM_EXECUTABLE mapping, +/* Look up the dcookie for the task's mm->exe_file, * which corresponds loosely to "application name". This is * not strictly necessary but allows oprofile to associate * shared-library samples with particular applications */ -static unsigned long get_exec_dcookie(struct mm_struct * mm) +static unsigned long get_exec_dcookie(struct mm_struct *mm) { unsigned long cookie = NO_COOKIE; - struct vm_area_struct * vma; - - if (!mm) - goto out; - - for (vma = mm->mmap; vma; vma = vma->vm_next) { - if (!vma->vm_file) - continue; - if (!(vma->vm_flags & VM_EXECUTABLE)) - continue; - cookie = fast_get_dcookie(&vma->vm_file->f_path); - break; - } -out: + if (mm && mm->exe_file) + cookie = fast_get_dcookie(&mm->exe_file->f_path); + return cookie; } @@ -235,13 +237,14 @@ out: * sure to do this lookup before a mm->mmap modification happens so * we don't lose track. */ -static unsigned long lookup_dcookie(struct mm_struct * mm, unsigned long addr, off_t * offset) +static unsigned long +lookup_dcookie(struct mm_struct *mm, unsigned long addr, off_t *offset) { unsigned long cookie = NO_COOKIE; - struct vm_area_struct * vma; + struct vm_area_struct *vma; for (vma = find_vma(mm, addr); vma; vma = vma->vm_next) { - + if (addr < vma->vm_start || addr >= vma->vm_end) continue; @@ -263,9 +266,8 @@ static unsigned long lookup_dcookie(struct mm_struct * mm, unsigned long addr, o return cookie; } - static unsigned long last_cookie = INVALID_COOKIE; - + static void add_cpu_switch(int i) { add_event_entry(ESCAPE_CODE); @@ -278,16 +280,16 @@ static void add_kernel_ctx_switch(unsigned int in_kernel) { add_event_entry(ESCAPE_CODE); if (in_kernel) - add_event_entry(KERNEL_ENTER_SWITCH_CODE); + add_event_entry(KERNEL_ENTER_SWITCH_CODE); else - add_event_entry(KERNEL_EXIT_SWITCH_CODE); + add_event_entry(KERNEL_EXIT_SWITCH_CODE); } - + static void -add_user_ctx_switch(struct task_struct const * task, unsigned long cookie) +add_user_ctx_switch(struct task_struct const *task, unsigned long cookie) { add_event_entry(ESCAPE_CODE); - add_event_entry(CTX_SWITCH_CODE); + add_event_entry(CTX_SWITCH_CODE); add_event_entry(task->pid); add_event_entry(cookie); /* Another code for daemon back-compat */ @@ -296,7 +298,7 @@ add_user_ctx_switch(struct task_struct const * task, unsigned long cookie) add_event_entry(task->tgid); } - + static void add_cookie_switch(unsigned long cookie) { add_event_entry(ESCAPE_CODE); @@ -304,28 +306,82 @@ static void add_cookie_switch(unsigned long cookie) add_event_entry(cookie); } - + static void add_trace_begin(void) { add_event_entry(ESCAPE_CODE); add_event_entry(TRACE_BEGIN_CODE); } +static void add_data(struct op_entry *entry, struct mm_struct *mm) +{ + unsigned long code, pc, val; + unsigned long cookie; + off_t offset; + + if (!op_cpu_buffer_get_data(entry, &code)) + return; + if (!op_cpu_buffer_get_data(entry, &pc)) + return; + if (!op_cpu_buffer_get_size(entry)) + return; + + if (mm) { + cookie = lookup_dcookie(mm, pc, &offset); + + if (cookie == NO_COOKIE) + offset = pc; + if (cookie == INVALID_COOKIE) { + atomic_inc(&oprofile_stats.sample_lost_no_mapping); + offset = pc; + } + if (cookie != last_cookie) { + add_cookie_switch(cookie); + last_cookie = cookie; + } + } else + offset = pc; + + add_event_entry(ESCAPE_CODE); + add_event_entry(code); + add_event_entry(offset); /* Offset from Dcookie */ + + while (op_cpu_buffer_get_data(entry, &val)) + add_event_entry(val); +} -static void add_sample_entry(unsigned long offset, unsigned long event) +static inline void add_sample_entry(unsigned long offset, unsigned long event) { add_event_entry(offset); add_event_entry(event); } -static int add_us_sample(struct mm_struct * mm, struct op_sample * s) +/* + * Add a sample to the global event buffer. If possible the + * sample is converted into a persistent dentry/offset pair + * for later lookup from userspace. Return 0 on failure. + */ +static int +add_sample(struct mm_struct *mm, struct op_sample *s, int in_kernel) { unsigned long cookie; off_t offset; - - cookie = lookup_dcookie(mm, s->eip, &offset); - + + if (in_kernel) { + add_sample_entry(s->eip, s->event); + return 1; + } + + /* add userspace sample */ + + if (!mm) { + atomic_inc(&oprofile_stats.sample_lost_no_mm); + return 0; + } + + cookie = lookup_dcookie(mm, s->eip, &offset); + if (cookie == INVALID_COOKIE) { atomic_inc(&oprofile_stats.sample_lost_no_mapping); return 0; @@ -341,27 +397,8 @@ static int add_us_sample(struct mm_struct * mm, struct op_sample * s) return 1; } - -/* Add a sample to the global event buffer. If possible the - * sample is converted into a persistent dentry/offset pair - * for later lookup from userspace. - */ -static int -add_sample(struct mm_struct * mm, struct op_sample * s, int in_kernel) -{ - if (in_kernel) { - add_sample_entry(s->eip, s->event); - return 1; - } else if (mm) { - return add_us_sample(mm, s); - } else { - atomic_inc(&oprofile_stats.sample_lost_no_mm); - } - return 0; -} - -static void release_mm(struct mm_struct * mm) +static void release_mm(struct mm_struct *mm) { if (!mm) return; @@ -370,9 +407,9 @@ static void release_mm(struct mm_struct * mm) } -static struct mm_struct * take_tasks_mm(struct task_struct * task) +static struct mm_struct *take_tasks_mm(struct task_struct *task) { - struct mm_struct * mm = get_task_mm(task); + struct mm_struct *mm = get_task_mm(task); if (mm) down_read(&mm->mmap_sem); return mm; @@ -383,46 +420,6 @@ static inline int is_code(unsigned long val) { return val == ESCAPE_CODE; } - - -/* "acquire" as many cpu buffer slots as we can */ -static unsigned long get_slots(struct oprofile_cpu_buffer * b) -{ - unsigned long head = b->head_pos; - unsigned long tail = b->tail_pos; - - /* - * Subtle. This resets the persistent last_task - * and in_kernel values used for switching notes. - * BUT, there is a small window between reading - * head_pos, and this call, that means samples - * can appear at the new head position, but not - * be prefixed with the notes for switching - * kernel mode or a task switch. This small hole - * can lead to mis-attribution or samples where - * we don't know if it's in the kernel or not, - * at the start of an event buffer. - */ - cpu_buffer_reset(b); - - if (head >= tail) - return head - tail; - - return head + (b->buffer_size - tail); -} - - -static void increment_tail(struct oprofile_cpu_buffer * b) -{ - unsigned long new_tail = b->tail_pos + 1; - - rmb(); - - if (new_tail < b->buffer_size) - b->tail_pos = new_tail; - else - b->tail_pos = 0; -} /* Move tasks along towards death. Any tasks on dead_tasks @@ -435,8 +432,8 @@ static void process_task_mortuary(void) { unsigned long flags; LIST_HEAD(local_dead_tasks); - struct task_struct * task; - struct task_struct * ttask; + struct task_struct *task; + struct task_struct *ttask; spin_lock_irqsave(&task_mortuary, flags); @@ -456,10 +453,10 @@ static void mark_done(int cpu) { int i; - cpu_set(cpu, marked_cpus); + cpumask_set_cpu(cpu, marked_cpus); for_each_online_cpu(i) { - if (!cpu_isset(i, marked_cpus)) + if (!cpumask_test_cpu(i, marked_cpus)) return; } @@ -468,7 +465,7 @@ static void mark_done(int cpu) */ process_task_mortuary(); - cpus_clear(marked_cpus); + cpumask_clear(marked_cpus); } @@ -491,59 +488,72 @@ typedef enum { */ void sync_buffer(int cpu) { - struct oprofile_cpu_buffer * cpu_buf = &cpu_buffer[cpu]; struct mm_struct *mm = NULL; - struct task_struct * new; + struct mm_struct *oldmm; + unsigned long val; + struct task_struct *new; unsigned long cookie = 0; int in_kernel = 1; - unsigned int i; sync_buffer_state state = sb_buffer_start; + unsigned int i; unsigned long available; + unsigned long flags; + struct op_entry entry; + struct op_sample *sample; mutex_lock(&buffer_mutex); - - add_cpu_switch(cpu); - /* Remember, only we can modify tail_pos */ + add_cpu_switch(cpu); - available = get_slots(cpu_buf); + op_cpu_buffer_reset(cpu); + available = op_cpu_buffer_entries(cpu); for (i = 0; i < available; ++i) { - struct op_sample * s = &cpu_buf->buffer[cpu_buf->tail_pos]; - - if (is_code(s->eip)) { - if (s->event <= CPU_IS_KERNEL) { + sample = op_cpu_buffer_read_entry(&entry, cpu); + if (!sample) + break; + + if (is_code(sample->eip)) { + flags = sample->event; + if (flags & TRACE_BEGIN) { + state = sb_bt_start; + add_trace_begin(); + } + if (flags & KERNEL_CTX_SWITCH) { /* kernel/userspace switch */ - in_kernel = s->event; + in_kernel = flags & IS_KERNEL; if (state == sb_buffer_start) state = sb_sample_start; - add_kernel_ctx_switch(s->event); - } else if (s->event == CPU_TRACE_BEGIN) { - state = sb_bt_start; - add_trace_begin(); - } else { - struct mm_struct * oldmm = mm; - + add_kernel_ctx_switch(flags & IS_KERNEL); + } + if (flags & USER_CTX_SWITCH + && op_cpu_buffer_get_data(&entry, &val)) { /* userspace context switch */ - new = (struct task_struct *)s->event; - + new = (struct task_struct *)val; + oldmm = mm; release_mm(oldmm); mm = take_tasks_mm(new); if (mm != oldmm) cookie = get_exec_dcookie(mm); add_user_ctx_switch(new, cookie); } - } else { - if (state >= sb_bt_start && - !add_sample(mm, s, in_kernel)) { - if (state == sb_bt_start) { - state = sb_bt_ignore; - atomic_inc(&oprofile_stats.bt_lost_no_mapping); - } - } + if (op_cpu_buffer_get_size(&entry)) + add_data(&entry, mm); + continue; } - increment_tail(cpu_buf); + if (state < sb_bt_start) + /* ignore sample */ + continue; + + if (add_sample(mm, sample, in_kernel)) + continue; + + /* ignore backtraces if failed to add a sample */ + if (state == sb_bt_start) { + state = sb_bt_ignore; + atomic_inc(&oprofile_stats.bt_lost_no_mapping); + } } release_mm(mm); @@ -551,3 +561,27 @@ void sync_buffer(int cpu) mutex_unlock(&buffer_mutex); } + +/* The function can be used to add a buffer worth of data directly to + * the kernel buffer. The buffer is assumed to be a circular buffer. + * Take the entries from index start and end at index end, wrapping + * at max_entries. + */ +void oprofile_put_buff(unsigned long *buf, unsigned int start, + unsigned int stop, unsigned int max) +{ + int i; + + i = start; + + mutex_lock(&buffer_mutex); + while (i != stop) { + add_event_entry(buf[i++]); + + if (i >= max) + i = 0; + } + + mutex_unlock(&buffer_mutex); +} + |
