aboutsummaryrefslogtreecommitdiff
path: root/drivers/oprofile/buffer_sync.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/oprofile/buffer_sync.c')
-rw-r--r--drivers/oprofile/buffer_sync.c338
1 files changed, 186 insertions, 152 deletions
diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c
index b07ba2a1411..d93b2b6b1f7 100644
--- a/drivers/oprofile/buffer_sync.c
+++ b/drivers/oprofile/buffer_sync.c
@@ -1,10 +1,12 @@
/**
* @file buffer_sync.c
*
- * @remark Copyright 2002 OProfile authors
+ * @remark Copyright 2002-2009 OProfile authors
* @remark Read the file COPYING
*
* @author John Levon <levon@movementarian.org>
+ * @author Barry Kasindorf
+ * @author Robert Richter <robert.richter@amd.com>
*
* This is the core of the buffer management. Each
* CPU buffer is processed and entered into the
@@ -28,19 +30,19 @@
#include <linux/fs.h>
#include <linux/oprofile.h>
#include <linux/sched.h>
+#include <linux/gfp.h>
#include "oprofile_stats.h"
#include "event_buffer.h"
#include "cpu_buffer.h"
#include "buffer_sync.h"
-
+
static LIST_HEAD(dying_tasks);
static LIST_HEAD(dead_tasks);
-static cpumask_t marked_cpus = CPU_MASK_NONE;
+static cpumask_var_t marked_cpus;
static DEFINE_SPINLOCK(task_mortuary);
static void process_task_mortuary(void);
-
/* Take ownership of the task struct and place it on the
* list for processing. Only after two full buffer syncs
* does the task eventually get freed, because by then
@@ -48,10 +50,11 @@ static void process_task_mortuary(void);
* Can be invoked from softirq via RCU callback due to
* call_rcu() of the task struct, hence the _irqsave.
*/
-static int task_free_notify(struct notifier_block * self, unsigned long val, void * data)
+static int
+task_free_notify(struct notifier_block *self, unsigned long val, void *data)
{
unsigned long flags;
- struct task_struct * task = data;
+ struct task_struct *task = data;
spin_lock_irqsave(&task_mortuary, flags);
list_add(&task->tasks, &dying_tasks);
spin_unlock_irqrestore(&task_mortuary, flags);
@@ -62,13 +65,14 @@ static int task_free_notify(struct notifier_block * self, unsigned long val, voi
/* The task is on its way out. A sync of the buffer means we can catch
* any remaining samples for this task.
*/
-static int task_exit_notify(struct notifier_block * self, unsigned long val, void * data)
+static int
+task_exit_notify(struct notifier_block *self, unsigned long val, void *data)
{
/* To avoid latency problems, we only process the current CPU,
* hoping that most samples for the task are on this CPU
*/
sync_buffer(raw_smp_processor_id());
- return 0;
+ return 0;
}
@@ -77,11 +81,12 @@ static int task_exit_notify(struct notifier_block * self, unsigned long val, voi
* we don't lose any. This does not have to be exact, it's a QoI issue
* only.
*/
-static int munmap_notify(struct notifier_block * self, unsigned long val, void * data)
+static int
+munmap_notify(struct notifier_block *self, unsigned long val, void *data)
{
unsigned long addr = (unsigned long)data;
- struct mm_struct * mm = current->mm;
- struct vm_area_struct * mpnt;
+ struct mm_struct *mm = current->mm;
+ struct vm_area_struct *mpnt;
down_read(&mm->mmap_sem);
@@ -99,11 +104,12 @@ static int munmap_notify(struct notifier_block * self, unsigned long val, void *
return 0;
}
-
+
/* We need to be told about new modules so we don't attribute to a previously
* loaded module, or drop the samples on the floor.
*/
-static int module_load_notify(struct notifier_block * self, unsigned long val, void * data)
+static int
+module_load_notify(struct notifier_block *self, unsigned long val, void *data)
{
#ifdef CONFIG_MODULES
if (val != MODULE_STATE_COMING)
@@ -118,7 +124,7 @@ static int module_load_notify(struct notifier_block * self, unsigned long val, v
return 0;
}
-
+
static struct notifier_block task_free_nb = {
.notifier_call = task_free_notify,
};
@@ -135,21 +141,19 @@ static struct notifier_block module_load_nb = {
.notifier_call = module_load_notify,
};
-
-static void end_sync(void)
+static void free_all_tasks(void)
{
- end_cpu_work();
/* make sure we don't leak task structs */
process_task_mortuary();
process_task_mortuary();
}
-
int sync_start(void)
{
int err;
- start_cpu_work();
+ if (!zalloc_cpumask_var(&marked_cpus, GFP_KERNEL))
+ return -ENOMEM;
err = task_handoff_register(&task_free_nb);
if (err)
@@ -164,6 +168,8 @@ int sync_start(void)
if (err)
goto out4;
+ start_cpu_work();
+
out:
return err;
out4:
@@ -172,19 +178,26 @@ out3:
profile_event_unregister(PROFILE_TASK_EXIT, &task_exit_nb);
out2:
task_handoff_unregister(&task_free_nb);
+ free_all_tasks();
out1:
- end_sync();
+ free_cpumask_var(marked_cpus);
goto out;
}
void sync_stop(void)
{
+ end_cpu_work();
unregister_module_notifier(&module_load_nb);
profile_event_unregister(PROFILE_MUNMAP, &munmap_nb);
profile_event_unregister(PROFILE_TASK_EXIT, &task_exit_nb);
task_handoff_unregister(&task_free_nb);
- end_sync();
+ barrier(); /* do all of the above first */
+
+ flush_cpu_work();
+
+ free_all_tasks();
+ free_cpumask_var(marked_cpus);
}
@@ -196,36 +209,25 @@ static inline unsigned long fast_get_dcookie(struct path *path)
{
unsigned long cookie;
- if (path->dentry->d_cookie)
+ if (path->dentry->d_flags & DCACHE_COOKIE)
return (unsigned long)path->dentry;
get_dcookie(path, &cookie);
return cookie;
}
-/* Look up the dcookie for the task's first VM_EXECUTABLE mapping,
+/* Look up the dcookie for the task's mm->exe_file,
* which corresponds loosely to "application name". This is
* not strictly necessary but allows oprofile to associate
* shared-library samples with particular applications
*/
-static unsigned long get_exec_dcookie(struct mm_struct * mm)
+static unsigned long get_exec_dcookie(struct mm_struct *mm)
{
unsigned long cookie = NO_COOKIE;
- struct vm_area_struct * vma;
-
- if (!mm)
- goto out;
-
- for (vma = mm->mmap; vma; vma = vma->vm_next) {
- if (!vma->vm_file)
- continue;
- if (!(vma->vm_flags & VM_EXECUTABLE))
- continue;
- cookie = fast_get_dcookie(&vma->vm_file->f_path);
- break;
- }
-out:
+ if (mm && mm->exe_file)
+ cookie = fast_get_dcookie(&mm->exe_file->f_path);
+
return cookie;
}
@@ -235,13 +237,14 @@ out:
* sure to do this lookup before a mm->mmap modification happens so
* we don't lose track.
*/
-static unsigned long lookup_dcookie(struct mm_struct * mm, unsigned long addr, off_t * offset)
+static unsigned long
+lookup_dcookie(struct mm_struct *mm, unsigned long addr, off_t *offset)
{
unsigned long cookie = NO_COOKIE;
- struct vm_area_struct * vma;
+ struct vm_area_struct *vma;
for (vma = find_vma(mm, addr); vma; vma = vma->vm_next) {
-
+
if (addr < vma->vm_start || addr >= vma->vm_end)
continue;
@@ -263,9 +266,8 @@ static unsigned long lookup_dcookie(struct mm_struct * mm, unsigned long addr, o
return cookie;
}
-
static unsigned long last_cookie = INVALID_COOKIE;
-
+
static void add_cpu_switch(int i)
{
add_event_entry(ESCAPE_CODE);
@@ -278,16 +280,16 @@ static void add_kernel_ctx_switch(unsigned int in_kernel)
{
add_event_entry(ESCAPE_CODE);
if (in_kernel)
- add_event_entry(KERNEL_ENTER_SWITCH_CODE);
+ add_event_entry(KERNEL_ENTER_SWITCH_CODE);
else
- add_event_entry(KERNEL_EXIT_SWITCH_CODE);
+ add_event_entry(KERNEL_EXIT_SWITCH_CODE);
}
-
+
static void
-add_user_ctx_switch(struct task_struct const * task, unsigned long cookie)
+add_user_ctx_switch(struct task_struct const *task, unsigned long cookie)
{
add_event_entry(ESCAPE_CODE);
- add_event_entry(CTX_SWITCH_CODE);
+ add_event_entry(CTX_SWITCH_CODE);
add_event_entry(task->pid);
add_event_entry(cookie);
/* Another code for daemon back-compat */
@@ -296,7 +298,7 @@ add_user_ctx_switch(struct task_struct const * task, unsigned long cookie)
add_event_entry(task->tgid);
}
-
+
static void add_cookie_switch(unsigned long cookie)
{
add_event_entry(ESCAPE_CODE);
@@ -304,28 +306,82 @@ static void add_cookie_switch(unsigned long cookie)
add_event_entry(cookie);
}
-
+
static void add_trace_begin(void)
{
add_event_entry(ESCAPE_CODE);
add_event_entry(TRACE_BEGIN_CODE);
}
+static void add_data(struct op_entry *entry, struct mm_struct *mm)
+{
+ unsigned long code, pc, val;
+ unsigned long cookie;
+ off_t offset;
+
+ if (!op_cpu_buffer_get_data(entry, &code))
+ return;
+ if (!op_cpu_buffer_get_data(entry, &pc))
+ return;
+ if (!op_cpu_buffer_get_size(entry))
+ return;
+
+ if (mm) {
+ cookie = lookup_dcookie(mm, pc, &offset);
+
+ if (cookie == NO_COOKIE)
+ offset = pc;
+ if (cookie == INVALID_COOKIE) {
+ atomic_inc(&oprofile_stats.sample_lost_no_mapping);
+ offset = pc;
+ }
+ if (cookie != last_cookie) {
+ add_cookie_switch(cookie);
+ last_cookie = cookie;
+ }
+ } else
+ offset = pc;
+
+ add_event_entry(ESCAPE_CODE);
+ add_event_entry(code);
+ add_event_entry(offset); /* Offset from Dcookie */
+
+ while (op_cpu_buffer_get_data(entry, &val))
+ add_event_entry(val);
+}
-static void add_sample_entry(unsigned long offset, unsigned long event)
+static inline void add_sample_entry(unsigned long offset, unsigned long event)
{
add_event_entry(offset);
add_event_entry(event);
}
-static int add_us_sample(struct mm_struct * mm, struct op_sample * s)
+/*
+ * Add a sample to the global event buffer. If possible the
+ * sample is converted into a persistent dentry/offset pair
+ * for later lookup from userspace. Return 0 on failure.
+ */
+static int
+add_sample(struct mm_struct *mm, struct op_sample *s, int in_kernel)
{
unsigned long cookie;
off_t offset;
-
- cookie = lookup_dcookie(mm, s->eip, &offset);
-
+
+ if (in_kernel) {
+ add_sample_entry(s->eip, s->event);
+ return 1;
+ }
+
+ /* add userspace sample */
+
+ if (!mm) {
+ atomic_inc(&oprofile_stats.sample_lost_no_mm);
+ return 0;
+ }
+
+ cookie = lookup_dcookie(mm, s->eip, &offset);
+
if (cookie == INVALID_COOKIE) {
atomic_inc(&oprofile_stats.sample_lost_no_mapping);
return 0;
@@ -341,27 +397,8 @@ static int add_us_sample(struct mm_struct * mm, struct op_sample * s)
return 1;
}
-
-/* Add a sample to the global event buffer. If possible the
- * sample is converted into a persistent dentry/offset pair
- * for later lookup from userspace.
- */
-static int
-add_sample(struct mm_struct * mm, struct op_sample * s, int in_kernel)
-{
- if (in_kernel) {
- add_sample_entry(s->eip, s->event);
- return 1;
- } else if (mm) {
- return add_us_sample(mm, s);
- } else {
- atomic_inc(&oprofile_stats.sample_lost_no_mm);
- }
- return 0;
-}
-
-static void release_mm(struct mm_struct * mm)
+static void release_mm(struct mm_struct *mm)
{
if (!mm)
return;
@@ -370,9 +407,9 @@ static void release_mm(struct mm_struct * mm)
}
-static struct mm_struct * take_tasks_mm(struct task_struct * task)
+static struct mm_struct *take_tasks_mm(struct task_struct *task)
{
- struct mm_struct * mm = get_task_mm(task);
+ struct mm_struct *mm = get_task_mm(task);
if (mm)
down_read(&mm->mmap_sem);
return mm;
@@ -383,46 +420,6 @@ static inline int is_code(unsigned long val)
{
return val == ESCAPE_CODE;
}
-
-
-/* "acquire" as many cpu buffer slots as we can */
-static unsigned long get_slots(struct oprofile_cpu_buffer * b)
-{
- unsigned long head = b->head_pos;
- unsigned long tail = b->tail_pos;
-
- /*
- * Subtle. This resets the persistent last_task
- * and in_kernel values used for switching notes.
- * BUT, there is a small window between reading
- * head_pos, and this call, that means samples
- * can appear at the new head position, but not
- * be prefixed with the notes for switching
- * kernel mode or a task switch. This small hole
- * can lead to mis-attribution or samples where
- * we don't know if it's in the kernel or not,
- * at the start of an event buffer.
- */
- cpu_buffer_reset(b);
-
- if (head >= tail)
- return head - tail;
-
- return head + (b->buffer_size - tail);
-}
-
-
-static void increment_tail(struct oprofile_cpu_buffer * b)
-{
- unsigned long new_tail = b->tail_pos + 1;
-
- rmb();
-
- if (new_tail < b->buffer_size)
- b->tail_pos = new_tail;
- else
- b->tail_pos = 0;
-}
/* Move tasks along towards death. Any tasks on dead_tasks
@@ -435,8 +432,8 @@ static void process_task_mortuary(void)
{
unsigned long flags;
LIST_HEAD(local_dead_tasks);
- struct task_struct * task;
- struct task_struct * ttask;
+ struct task_struct *task;
+ struct task_struct *ttask;
spin_lock_irqsave(&task_mortuary, flags);
@@ -456,10 +453,10 @@ static void mark_done(int cpu)
{
int i;
- cpu_set(cpu, marked_cpus);
+ cpumask_set_cpu(cpu, marked_cpus);
for_each_online_cpu(i) {
- if (!cpu_isset(i, marked_cpus))
+ if (!cpumask_test_cpu(i, marked_cpus))
return;
}
@@ -468,7 +465,7 @@ static void mark_done(int cpu)
*/
process_task_mortuary();
- cpus_clear(marked_cpus);
+ cpumask_clear(marked_cpus);
}
@@ -491,59 +488,72 @@ typedef enum {
*/
void sync_buffer(int cpu)
{
- struct oprofile_cpu_buffer * cpu_buf = &cpu_buffer[cpu];
struct mm_struct *mm = NULL;
- struct task_struct * new;
+ struct mm_struct *oldmm;
+ unsigned long val;
+ struct task_struct *new;
unsigned long cookie = 0;
int in_kernel = 1;
- unsigned int i;
sync_buffer_state state = sb_buffer_start;
+ unsigned int i;
unsigned long available;
+ unsigned long flags;
+ struct op_entry entry;
+ struct op_sample *sample;
mutex_lock(&buffer_mutex);
-
- add_cpu_switch(cpu);
- /* Remember, only we can modify tail_pos */
+ add_cpu_switch(cpu);
- available = get_slots(cpu_buf);
+ op_cpu_buffer_reset(cpu);
+ available = op_cpu_buffer_entries(cpu);
for (i = 0; i < available; ++i) {
- struct op_sample * s = &cpu_buf->buffer[cpu_buf->tail_pos];
-
- if (is_code(s->eip)) {
- if (s->event <= CPU_IS_KERNEL) {
+ sample = op_cpu_buffer_read_entry(&entry, cpu);
+ if (!sample)
+ break;
+
+ if (is_code(sample->eip)) {
+ flags = sample->event;
+ if (flags & TRACE_BEGIN) {
+ state = sb_bt_start;
+ add_trace_begin();
+ }
+ if (flags & KERNEL_CTX_SWITCH) {
/* kernel/userspace switch */
- in_kernel = s->event;
+ in_kernel = flags & IS_KERNEL;
if (state == sb_buffer_start)
state = sb_sample_start;
- add_kernel_ctx_switch(s->event);
- } else if (s->event == CPU_TRACE_BEGIN) {
- state = sb_bt_start;
- add_trace_begin();
- } else {
- struct mm_struct * oldmm = mm;
-
+ add_kernel_ctx_switch(flags & IS_KERNEL);
+ }
+ if (flags & USER_CTX_SWITCH
+ && op_cpu_buffer_get_data(&entry, &val)) {
/* userspace context switch */
- new = (struct task_struct *)s->event;
-
+ new = (struct task_struct *)val;
+ oldmm = mm;
release_mm(oldmm);
mm = take_tasks_mm(new);
if (mm != oldmm)
cookie = get_exec_dcookie(mm);
add_user_ctx_switch(new, cookie);
}
- } else {
- if (state >= sb_bt_start &&
- !add_sample(mm, s, in_kernel)) {
- if (state == sb_bt_start) {
- state = sb_bt_ignore;
- atomic_inc(&oprofile_stats.bt_lost_no_mapping);
- }
- }
+ if (op_cpu_buffer_get_size(&entry))
+ add_data(&entry, mm);
+ continue;
}
- increment_tail(cpu_buf);
+ if (state < sb_bt_start)
+ /* ignore sample */
+ continue;
+
+ if (add_sample(mm, sample, in_kernel))
+ continue;
+
+ /* ignore backtraces if failed to add a sample */
+ if (state == sb_bt_start) {
+ state = sb_bt_ignore;
+ atomic_inc(&oprofile_stats.bt_lost_no_mapping);
+ }
}
release_mm(mm);
@@ -551,3 +561,27 @@ void sync_buffer(int cpu)
mutex_unlock(&buffer_mutex);
}
+
+/* The function can be used to add a buffer worth of data directly to
+ * the kernel buffer. The buffer is assumed to be a circular buffer.
+ * Take the entries from index start and end at index end, wrapping
+ * at max_entries.
+ */
+void oprofile_put_buff(unsigned long *buf, unsigned int start,
+ unsigned int stop, unsigned int max)
+{
+ int i;
+
+ i = start;
+
+ mutex_lock(&buffer_mutex);
+ while (i != stop) {
+ add_event_entry(buf[i++]);
+
+ if (i >= max)
+ i = 0;
+ }
+
+ mutex_unlock(&buffer_mutex);
+}
+