aboutsummaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
Diffstat (limited to 'drivers')
-rw-r--r--drivers/oprofile/buffer_sync.c117
-rw-r--r--drivers/oprofile/cpu_buffer.c197
-rw-r--r--drivers/oprofile/cpu_buffer.h69
-rw-r--r--drivers/oprofile/oprofile_files.c15
4 files changed, 218 insertions, 180 deletions
diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c
index b55cd23ffde..737bd948482 100644
--- a/drivers/oprofile/buffer_sync.c
+++ b/drivers/oprofile/buffer_sync.c
@@ -268,18 +268,6 @@ lookup_dcookie(struct mm_struct *mm, unsigned long addr, off_t *offset)
return cookie;
}
-static void increment_tail(struct oprofile_cpu_buffer *b)
-{
- unsigned long new_tail = b->tail_pos + 1;
-
- rmb(); /* be sure fifo pointers are synchromized */
-
- if (new_tail < b->buffer_size)
- b->tail_pos = new_tail;
- else
- b->tail_pos = 0;
-}
-
static unsigned long last_cookie = INVALID_COOKIE;
static void add_cpu_switch(int i)
@@ -331,28 +319,25 @@ static void add_trace_begin(void)
#define IBS_FETCH_CODE_SIZE 2
#define IBS_OP_CODE_SIZE 5
-#define IBS_EIP(offset) \
- (((struct op_sample *)&cpu_buf->buffer[(offset)])->eip)
-#define IBS_EVENT(offset) \
- (((struct op_sample *)&cpu_buf->buffer[(offset)])->event)
/*
* Add IBS fetch and op entries to event buffer
*/
-static void add_ibs_begin(struct oprofile_cpu_buffer *cpu_buf, int code,
- struct mm_struct *mm)
+static void add_ibs_begin(int cpu, int code, struct mm_struct *mm)
{
unsigned long rip;
int i, count;
unsigned long ibs_cookie = 0;
off_t offset;
+ struct op_sample *sample;
- increment_tail(cpu_buf); /* move to RIP entry */
-
- rip = IBS_EIP(cpu_buf->tail_pos);
+ sample = cpu_buffer_read_entry(cpu);
+ if (!sample)
+ goto Error;
+ rip = sample->eip;
#ifdef __LP64__
- rip += IBS_EVENT(cpu_buf->tail_pos) << 32;
+ rip += sample->event << 32;
#endif
if (mm) {
@@ -376,8 +361,8 @@ static void add_ibs_begin(struct oprofile_cpu_buffer *cpu_buf, int code,
add_event_entry(offset); /* Offset from Dcookie */
/* we send the Dcookie offset, but send the raw Linear Add also*/
- add_event_entry(IBS_EIP(cpu_buf->tail_pos));
- add_event_entry(IBS_EVENT(cpu_buf->tail_pos));
+ add_event_entry(sample->eip);
+ add_event_entry(sample->event);
if (code == IBS_FETCH_CODE)
count = IBS_FETCH_CODE_SIZE; /*IBS FETCH is 2 int64s*/
@@ -385,10 +370,17 @@ static void add_ibs_begin(struct oprofile_cpu_buffer *cpu_buf, int code,
count = IBS_OP_CODE_SIZE; /*IBS OP is 5 int64s*/
for (i = 0; i < count; i++) {
- increment_tail(cpu_buf);
- add_event_entry(IBS_EIP(cpu_buf->tail_pos));
- add_event_entry(IBS_EVENT(cpu_buf->tail_pos));
+ sample = cpu_buffer_read_entry(cpu);
+ if (!sample)
+ goto Error;
+ add_event_entry(sample->eip);
+ add_event_entry(sample->event);
}
+
+ return;
+
+Error:
+ return;
}
#endif
@@ -466,33 +458,6 @@ static inline int is_code(unsigned long val)
}
-/* "acquire" as many cpu buffer slots as we can */
-static unsigned long get_slots(struct oprofile_cpu_buffer *b)
-{
- unsigned long head = b->head_pos;
- unsigned long tail = b->tail_pos;
-
- /*
- * Subtle. This resets the persistent last_task
- * and in_kernel values used for switching notes.
- * BUT, there is a small window between reading
- * head_pos, and this call, that means samples
- * can appear at the new head position, but not
- * be prefixed with the notes for switching
- * kernel mode or a task switch. This small hole
- * can lead to mis-attribution or samples where
- * we don't know if it's in the kernel or not,
- * at the start of an event buffer.
- */
- cpu_buffer_reset(b);
-
- if (head >= tail)
- return head - tail;
-
- return head + (b->buffer_size - tail);
-}
-
-
/* Move tasks along towards death. Any tasks on dead_tasks
* will definitely have no remaining references in any
* CPU buffers at this point, because we use two lists,
@@ -559,61 +524,61 @@ typedef enum {
*/
void sync_buffer(int cpu)
{
- struct oprofile_cpu_buffer *cpu_buf = &per_cpu(cpu_buffer, cpu);
struct mm_struct *mm = NULL;
+ struct mm_struct *oldmm;
struct task_struct *new;
unsigned long cookie = 0;
int in_kernel = 1;
sync_buffer_state state = sb_buffer_start;
-#ifndef CONFIG_OPROFILE_IBS
unsigned int i;
unsigned long available;
-#endif
mutex_lock(&buffer_mutex);
add_cpu_switch(cpu);
- /* Remember, only we can modify tail_pos */
-
-#ifndef CONFIG_OPROFILE_IBS
- available = get_slots(cpu_buf);
+ cpu_buffer_reset(cpu);
+ available = cpu_buffer_entries(cpu);
for (i = 0; i < available; ++i) {
-#else
- while (get_slots(cpu_buf)) {
-#endif
- struct op_sample *s = &cpu_buf->buffer[cpu_buf->tail_pos];
+ struct op_sample *s = cpu_buffer_read_entry(cpu);
+ if (!s)
+ break;
if (is_code(s->eip)) {
- if (s->event <= CPU_IS_KERNEL) {
+ switch (s->event) {
+ case 0:
+ case CPU_IS_KERNEL:
/* kernel/userspace switch */
in_kernel = s->event;
if (state == sb_buffer_start)
state = sb_sample_start;
add_kernel_ctx_switch(s->event);
- } else if (s->event == CPU_TRACE_BEGIN) {
+ break;
+ case CPU_TRACE_BEGIN:
state = sb_bt_start;
add_trace_begin();
+ break;
#ifdef CONFIG_OPROFILE_IBS
- } else if (s->event == IBS_FETCH_BEGIN) {
+ case IBS_FETCH_BEGIN:
state = sb_bt_start;
- add_ibs_begin(cpu_buf, IBS_FETCH_CODE, mm);
- } else if (s->event == IBS_OP_BEGIN) {
+ add_ibs_begin(cpu, IBS_FETCH_CODE, mm);
+ break;
+ case IBS_OP_BEGIN:
state = sb_bt_start;
- add_ibs_begin(cpu_buf, IBS_OP_CODE, mm);
+ add_ibs_begin(cpu, IBS_OP_CODE, mm);
+ break;
#endif
- } else {
- struct mm_struct *oldmm = mm;
-
+ default:
/* userspace context switch */
+ oldmm = mm;
new = (struct task_struct *)s->event;
-
release_mm(oldmm);
mm = take_tasks_mm(new);
if (mm != oldmm)
cookie = get_exec_dcookie(mm);
add_user_ctx_switch(new, cookie);
+ break;
}
} else if (state >= sb_bt_start &&
!add_sample(mm, s, in_kernel)) {
@@ -622,8 +587,6 @@ void sync_buffer(int cpu)
atomic_inc(&oprofile_stats.bt_lost_no_mapping);
}
}
-
- increment_tail(cpu_buf);
}
release_mm(mm);
diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c
index 01d38e78cde..61090969158 100644
--- a/drivers/oprofile/cpu_buffer.c
+++ b/drivers/oprofile/cpu_buffer.c
@@ -28,6 +28,25 @@
#include "buffer_sync.h"
#include "oprof.h"
+#define OP_BUFFER_FLAGS 0
+
+/*
+ * Read and write access is using spin locking. Thus, writing to the
+ * buffer by NMI handler (x86) could occur also during critical
+ * sections when reading the buffer. To avoid this, there are 2
+ * buffers for independent read and write access. Read access is in
+ * process context only, write access only in the NMI handler. If the
+ * read buffer runs empty, both buffers are swapped atomically. There
+ * is potentially a small window during swapping where the buffers are
+ * disabled and samples could be lost.
+ *
+ * Using 2 buffers is a little bit overhead, but the solution is clear
+ * and does not require changes in the ring buffer implementation. It
+ * can be changed to a single buffer solution when the ring buffer
+ * access is implemented as non-locking atomic code.
+ */
+struct ring_buffer *op_ring_buffer_read;
+struct ring_buffer *op_ring_buffer_write;
DEFINE_PER_CPU(struct oprofile_cpu_buffer, cpu_buffer);
static void wq_sync_buffer(struct work_struct *work);
@@ -37,12 +56,12 @@ static int work_enabled;
void free_cpu_buffers(void)
{
- int i;
-
- for_each_possible_cpu(i) {
- vfree(per_cpu(cpu_buffer, i).buffer);
- per_cpu(cpu_buffer, i).buffer = NULL;
- }
+ if (op_ring_buffer_read)
+ ring_buffer_free(op_ring_buffer_read);
+ op_ring_buffer_read = NULL;
+ if (op_ring_buffer_write)
+ ring_buffer_free(op_ring_buffer_write);
+ op_ring_buffer_write = NULL;
}
unsigned long oprofile_get_cpu_buffer_size(void)
@@ -64,14 +83,16 @@ int alloc_cpu_buffers(void)
unsigned long buffer_size = fs_cpu_buffer_size;
+ op_ring_buffer_read = ring_buffer_alloc(buffer_size, OP_BUFFER_FLAGS);
+ if (!op_ring_buffer_read)
+ goto fail;
+ op_ring_buffer_write = ring_buffer_alloc(buffer_size, OP_BUFFER_FLAGS);
+ if (!op_ring_buffer_write)
+ goto fail;
+
for_each_possible_cpu(i) {
struct oprofile_cpu_buffer *b = &per_cpu(cpu_buffer, i);
- b->buffer = vmalloc_node(sizeof(struct op_sample) * buffer_size,
- cpu_to_node(i));
- if (!b->buffer)
- goto fail;
-
b->last_task = NULL;
b->last_is_kernel = -1;
b->tracing = 0;
@@ -124,57 +145,31 @@ void end_cpu_work(void)
flush_scheduled_work();
}
-/* Resets the cpu buffer to a sane state. */
-void cpu_buffer_reset(struct oprofile_cpu_buffer *cpu_buf)
-{
- /* reset these to invalid values; the next sample
- * collected will populate the buffer with proper
- * values to initialize the buffer
- */
- cpu_buf->last_is_kernel = -1;
- cpu_buf->last_task = NULL;
-}
-
-/* compute number of available slots in cpu_buffer queue */
-static unsigned long nr_available_slots(struct oprofile_cpu_buffer const *b)
+static inline int
+add_sample(struct oprofile_cpu_buffer *cpu_buf,
+ unsigned long pc, unsigned long event)
{
- unsigned long head = b->head_pos;
- unsigned long tail = b->tail_pos;
+ struct op_entry entry;
+ int ret;
- if (tail > head)
- return (tail - head) - 1;
+ ret = cpu_buffer_write_entry(&entry);
+ if (ret)
+ return ret;
- return tail + (b->buffer_size - head) - 1;
-}
+ entry.sample->eip = pc;
+ entry.sample->event = event;
-static void increment_head(struct oprofile_cpu_buffer *b)
-{
- unsigned long new_head = b->head_pos + 1;
-
- /* Ensure anything written to the slot before we
- * increment is visible */
- wmb();
-
- if (new_head < b->buffer_size)
- b->head_pos = new_head;
- else
- b->head_pos = 0;
-}
+ ret = cpu_buffer_write_commit(&entry);
+ if (ret)
+ return ret;
-static inline void
-add_sample(struct oprofile_cpu_buffer *cpu_buf,
- unsigned long pc, unsigned long event)
-{
- struct op_sample *entry = &cpu_buf->buffer[cpu_buf->head_pos];
- entry->eip = pc;
- entry->event = event;
- increment_head(cpu_buf);
+ return 0;
}
-static inline void
+static inline int
add_code(struct oprofile_cpu_buffer *buffer, unsigned long value)
{
- add_sample(buffer, ESCAPE_CODE, value);
+ return add_sample(buffer, ESCAPE_CODE, value);
}
/* This must be safe from any context. It's safe writing here
@@ -198,11 +193,6 @@ static int log_sample(struct oprofile_cpu_buffer *cpu_buf, unsigned long pc,
return 0;
}
- if (nr_available_slots(cpu_buf) < 3) {
- cpu_buf->sample_lost_overflow++;
- return 0;
- }
-
is_kernel = !!is_kernel;
task = current;
@@ -210,26 +200,29 @@ static int log_sample(struct oprofile_cpu_buffer *cpu_buf, unsigned long pc,
/* notice a switch from user->kernel or vice versa */
if (cpu_buf->last_is_kernel != is_kernel) {
cpu_buf->last_is_kernel = is_kernel;
- add_code(cpu_buf, is_kernel);
+ if (add_code(cpu_buf, is_kernel))
+ goto fail;
}
/* notice a task switch */
if (cpu_buf->last_task != task) {
cpu_buf->last_task = task;
- add_code(cpu_buf, (unsigned long)task);
+ if (add_code(cpu_buf, (unsigned long)task))
+ goto fail;
}
- add_sample(cpu_buf, pc, event);
+ if (add_sample(cpu_buf, pc, event))
+ goto fail;
+
return 1;
+
+fail:
+ cpu_buf->sample_lost_overflow++;
+ return 0;
}
static int oprofile_begin_trace(struct oprofile_cpu_buffer *cpu_buf)
{
- if (nr_available_slots(cpu_buf) < 4) {
- cpu_buf->sample_lost_overflow++;
- return 0;
- }
-
add_code(cpu_buf, CPU_TRACE_BEGIN);
cpu_buf->tracing = 1;
return 1;
@@ -253,8 +246,10 @@ void oprofile_add_ext_sample(unsigned long pc, struct pt_regs * const regs,
if (!oprofile_begin_trace(cpu_buf))
return;
- /* if log_sample() fail we can't backtrace since we lost the source
- * of this event */
+ /*
+ * if log_sample() fail we can't backtrace since we lost the
+ * source of this event
+ */
if (log_sample(cpu_buf, pc, is_kernel, event))
oprofile_ops.backtrace(regs, backtrace_depth);
oprofile_end_trace(cpu_buf);
@@ -272,49 +267,55 @@ void oprofile_add_sample(struct pt_regs * const regs, unsigned long event)
#define MAX_IBS_SAMPLE_SIZE 14
-void oprofile_add_ibs_sample(struct pt_regs *const regs,
- unsigned int *const ibs_sample, int ibs_code)
+void oprofile_add_ibs_sample(struct pt_regs * const regs,
+ unsigned int * const ibs_sample, int ibs_code)
{
int is_kernel = !user_mode(regs);
struct oprofile_cpu_buffer *cpu_buf = &__get_cpu_var(cpu_buffer);
struct task_struct *task;
+ int fail = 0;
cpu_buf->sample_received++;
- if (nr_available_slots(cpu_buf) < MAX_IBS_SAMPLE_SIZE) {
- /* we can't backtrace since we lost the source of this event */
- cpu_buf->sample_lost_overflow++;
- return;
- }
-
/* notice a switch from user->kernel or vice versa */
if (cpu_buf->last_is_kernel != is_kernel) {
+ if (add_code(cpu_buf, is_kernel))
+ goto fail;
cpu_buf->last_is_kernel = is_kernel;
- add_code(cpu_buf, is_kernel);
}
/* notice a task switch */
if (!is_kernel) {
task = current;
if (cpu_buf->last_task != task) {
+ if (add_code(cpu_buf, (unsigned long)task))
+ goto fail;
cpu_buf->last_task = task;
- add_code(cpu_buf, (unsigned long)task);
}
}
- add_code(cpu_buf, ibs_code);
- add_sample(cpu_buf, ibs_sample[0], ibs_sample[1]);
- add_sample(cpu_buf, ibs_sample[2], ibs_sample[3]);
- add_sample(cpu_buf, ibs_sample[4], ibs_sample[5]);
+ fail = fail || add_code(cpu_buf, ibs_code);
+ fail = fail || add_sample(cpu_buf, ibs_sample[0], ibs_sample[1]);
+ fail = fail || add_sample(cpu_buf, ibs_sample[2], ibs_sample[3]);
+ fail = fail || add_sample(cpu_buf, ibs_sample[4], ibs_sample[5]);
if (ibs_code == IBS_OP_BEGIN) {
- add_sample(cpu_buf, ibs_sample[6], ibs_sample[7]);
- add_sample(cpu_buf, ibs_sample[8], ibs_sample[9]);
- add_sample(cpu_buf, ibs_sample[10], ibs_sample[11]);
+ fail = fail || add_sample(cpu_buf, ibs_sample[6], ibs_sample[7]);
+ fail = fail || add_sample(cpu_buf, ibs_sample[8], ibs_sample[9]);
+ fail = fail || add_sample(cpu_buf, ibs_sample[10], ibs_sample[11]);
}
+ if (fail)
+ goto fail;
+
if (backtrace_depth)
oprofile_ops.backtrace(regs, backtrace_depth);
+
+ return;
+
+fail:
+ cpu_buf->sample_lost_overflow++;
+ return;
}
#endif
@@ -332,21 +333,21 @@ void oprofile_add_trace(unsigned long pc)
if (!cpu_buf->tracing)
return;
- if (nr_available_slots(cpu_buf) < 1) {
- cpu_buf->tracing = 0;
- cpu_buf->sample_lost_overflow++;
- return;
- }
+ /*
+ * broken frame can give an eip with the same value as an
+ * escape code, abort the trace if we get it
+ */
+ if (pc == ESCAPE_CODE)
+ goto fail;
- /* broken frame can give an eip with the same value as an escape code,
- * abort the trace if we get it */
- if (pc == ESCAPE_CODE) {
- cpu_buf->tracing = 0;
- cpu_buf->backtrace_aborted++;
- return;
- }
+ if (add_sample(cpu_buf, pc, 0))
+ goto fail;
- add_sample(cpu_buf, pc, 0);
+ return;
+fail:
+ cpu_buf->tracing = 0;
+ cpu_buf->backtrace_aborted++;
+ return;
}
/*
diff --git a/drivers/oprofile/cpu_buffer.h b/drivers/oprofile/cpu_buffer.h
index d3cc26264db..aacb0f0bc56 100644
--- a/drivers/oprofile/cpu_buffer.h
+++ b/drivers/oprofile/cpu_buffer.h
@@ -15,6 +15,7 @@
#include <linux/workqueue.h>
#include <linux/cache.h>
#include <linux/sched.h>
+#include <linux/ring_buffer.h>
struct task_struct;
@@ -32,6 +33,12 @@ struct op_sample {
unsigned long event;
};
+struct op_entry {
+ struct ring_buffer_event *event;
+ struct op_sample *sample;
+ unsigned long irq_flags;
+};
+
struct oprofile_cpu_buffer {
volatile unsigned long head_pos;
volatile unsigned long tail_pos;
@@ -39,7 +46,6 @@ struct oprofile_cpu_buffer {
struct task_struct *last_task;
int last_is_kernel;
int tracing;
- struct op_sample *buffer;
unsigned long sample_received;
unsigned long sample_lost_overflow;
unsigned long backtrace_aborted;
@@ -48,9 +54,68 @@ struct oprofile_cpu_buffer {
struct delayed_work work;
};
+extern struct ring_buffer *op_ring_buffer_read;
+extern struct ring_buffer *op_ring_buffer_write;
DECLARE_PER_CPU(struct oprofile_cpu_buffer, cpu_buffer);
-void cpu_buffer_reset(struct oprofile_cpu_buffer *cpu_buf);
+/*
+ * Resets the cpu buffer to a sane state.
+ *
+ * reset these to invalid values; the next sample collected will
+ * populate the buffer with proper values to initialize the buffer
+ */
+static inline void cpu_buffer_reset(int cpu)
+{
+ struct oprofile_cpu_buffer *cpu_buf = &per_cpu(cpu_buffer, cpu);
+
+ cpu_buf->last_is_kernel = -1;
+ cpu_buf->last_task = NULL;
+}
+
+static inline int cpu_buffer_write_entry(struct op_entry *entry)
+{
+ entry->event = ring_buffer_lock_reserve(op_ring_buffer_write,
+ sizeof(struct op_sample),
+ &entry->irq_flags);
+ if (entry->event)
+ entry->sample = ring_buffer_event_data(entry->event);
+ else
+ entry->sample = NULL;
+
+ if (!entry->sample)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static inline int cpu_buffer_write_commit(struct op_entry *entry)
+{
+ return ring_buffer_unlock_commit(op_ring_buffer_write, entry->event,
+ entry->irq_flags);
+}
+
+static inline struct op_sample *cpu_buffer_read_entry(int cpu)
+{
+ struct ring_buffer_event *e;
+ e = ring_buffer_consume(op_ring_buffer_read, cpu, NULL);
+ if (e)
+ return ring_buffer_event_data(e);
+ if (ring_buffer_swap_cpu(op_ring_buffer_read,
+ op_ring_buffer_write,
+ cpu))
+ return NULL;
+ e = ring_buffer_consume(op_ring_buffer_read, cpu, NULL);
+ if (e)
+ return ring_buffer_event_data(e);
+ return NULL;
+}
+
+/* "acquire" as many cpu buffer slots as we can */
+static inline unsigned long cpu_buffer_entries(int cpu)
+{
+ return ring_buffer_entries_cpu(op_ring_buffer_read, cpu)
+ + ring_buffer_entries_cpu(op_ring_buffer_write, cpu);
+}
/* transient events for the CPU buffer -> event buffer */
#define CPU_IS_KERNEL 1
diff --git a/drivers/oprofile/oprofile_files.c b/drivers/oprofile/oprofile_files.c
index cc106d503ac..d8201998b0b 100644
--- a/drivers/oprofile/oprofile_files.c
+++ b/drivers/oprofile/oprofile_files.c
@@ -14,9 +14,13 @@
#include "oprofile_stats.h"
#include "oprof.h"
-unsigned long fs_buffer_size = 131072;
-unsigned long fs_cpu_buffer_size = 8192;
-unsigned long fs_buffer_watershed = 32768; /* FIXME: tune */
+#define FS_BUFFER_SIZE_DEFAULT 131072
+#define FS_CPU_BUFFER_SIZE_DEFAULT 8192
+#define FS_BUFFER_WATERSHED_DEFAULT 32768 /* FIXME: tune */
+
+unsigned long fs_buffer_size;
+unsigned long fs_cpu_buffer_size;
+unsigned long fs_buffer_watershed;
static ssize_t depth_read(struct file *file, char __user *buf, size_t count, loff_t *offset)
{
@@ -120,6 +124,11 @@ static const struct file_operations dump_fops = {
void oprofile_create_files(struct super_block *sb, struct dentry *root)
{
+ /* reinitialize default values */
+ fs_buffer_size = FS_BUFFER_SIZE_DEFAULT;
+ fs_cpu_buffer_size = FS_CPU_BUFFER_SIZE_DEFAULT;
+ fs_buffer_watershed = FS_BUFFER_WATERSHED_DEFAULT;
+
oprofilefs_create_file(sb, root, "enable", &enable_fops);
oprofilefs_create_file_perm(sb, root, "dump", &dump_fops, 0666);
oprofilefs_create_file(sb, root, "buffer", &event_buffer_fops);