aboutsummaryrefslogtreecommitdiff
path: root/arch/powerpc/oprofile/cell
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/oprofile/cell')
-rw-r--r--arch/powerpc/oprofile/cell/pr_util.h13
-rw-r--r--arch/powerpc/oprofile/cell/spu_profiler.c59
-rw-r--r--arch/powerpc/oprofile/cell/spu_task_sync.c16
-rw-r--r--arch/powerpc/oprofile/cell/vma_map.c3
4 files changed, 61 insertions, 30 deletions
diff --git a/arch/powerpc/oprofile/cell/pr_util.h b/arch/powerpc/oprofile/cell/pr_util.h
index 628009c0195..964b93974d8 100644
--- a/arch/powerpc/oprofile/cell/pr_util.h
+++ b/arch/powerpc/oprofile/cell/pr_util.h
@@ -30,6 +30,10 @@
extern struct delayed_work spu_work;
extern int spu_prof_running;
+#define TRACE_ARRAY_SIZE 1024
+
+extern spinlock_t oprof_spu_smpl_arry_lck;
+
struct spu_overlay_info { /* map of sections within an SPU overlay */
unsigned int vma; /* SPU virtual memory address from elf */
unsigned int size; /* size of section from elf */
@@ -79,7 +83,7 @@ struct spu_buffer {
* the vma-to-fileoffset map.
*/
struct vma_to_fileoffset_map *create_vma_map(const struct spu *spu,
- u64 objectid);
+ unsigned long objectid);
unsigned int vma_map_lookup(struct vma_to_fileoffset_map *map,
unsigned int vma, const struct spu *aSpu,
int *grd_val);
@@ -89,10 +93,11 @@ void vma_map_free(struct vma_to_fileoffset_map *map);
* Entry point for SPU profiling.
* cycles_reset is the SPU_CYCLES count value specified by the user.
*/
-int start_spu_profiling(unsigned int cycles_reset);
-
-void stop_spu_profiling(void);
+int start_spu_profiling_cycles(unsigned int cycles_reset);
+void start_spu_profiling_events(void);
+void stop_spu_profiling_cycles(void);
+void stop_spu_profiling_events(void);
/* add the necessary profiling hooks */
int spu_sync_start(void);
diff --git a/arch/powerpc/oprofile/cell/spu_profiler.c b/arch/powerpc/oprofile/cell/spu_profiler.c
index dd499c3e9da..b129d007e7f 100644
--- a/arch/powerpc/oprofile/cell/spu_profiler.c
+++ b/arch/powerpc/oprofile/cell/spu_profiler.c
@@ -16,13 +16,24 @@
#include <linux/smp.h>
#include <linux/slab.h>
#include <asm/cell-pmu.h>
+#include <asm/time.h>
#include "pr_util.h"
-#define TRACE_ARRAY_SIZE 1024
#define SCALE_SHIFT 14
static u32 *samples;
+/* spu_prof_running is a flag used to indicate if spu profiling is enabled
+ * or not. It is set by the routines start_spu_profiling_cycles() and
+ * start_spu_profiling_events(). The flag is cleared by the routines
+ * stop_spu_profiling_cycles() and stop_spu_profiling_events(). These
+ * routines are called via global_start() and global_stop() which are called in
+ * op_powerpc_start() and op_powerpc_stop(). These routines are called once
+ * per system as a result of the user starting/stopping oprofile. Hence, only
+ * one CPU per user at a time will be changing the value of spu_prof_running.
+ * In general, OProfile does not protect against multiple users trying to run
+ * OProfile at a time.
+ */
int spu_prof_running;
static unsigned int profiling_interval;
@@ -31,8 +42,8 @@ static unsigned int profiling_interval;
#define SPU_PC_MASK 0xFFFF
-static DEFINE_SPINLOCK(sample_array_lock);
-unsigned long sample_array_lock_flags;
+DEFINE_SPINLOCK(oprof_spu_smpl_arry_lck);
+unsigned long oprof_spu_smpl_arry_lck_flags;
void set_spu_profiling_frequency(unsigned int freq_khz, unsigned int cycles_reset)
{
@@ -49,7 +60,7 @@ void set_spu_profiling_frequency(unsigned int freq_khz, unsigned int cycles_rese
* of precision. This is close enough for the purpose at hand.
*
* The value of the timeout should be small enough that the hw
- * trace buffer will not get more then about 1/3 full for the
+ * trace buffer will not get more than about 1/3 full for the
* maximum user specified (the LFSR value) hw sampling frequency.
* This is to ensure the trace buffer will never fill even if the
* kernel thread scheduling varies under a heavy system load.
@@ -145,13 +156,13 @@ static enum hrtimer_restart profile_spus(struct hrtimer *timer)
* sample array must be loaded and then processed for a given
* cpu. The sample array is not per cpu.
*/
- spin_lock_irqsave(&sample_array_lock,
- sample_array_lock_flags);
+ spin_lock_irqsave(&oprof_spu_smpl_arry_lck,
+ oprof_spu_smpl_arry_lck_flags);
num_samples = cell_spu_pc_collection(cpu);
if (num_samples == 0) {
- spin_unlock_irqrestore(&sample_array_lock,
- sample_array_lock_flags);
+ spin_unlock_irqrestore(&oprof_spu_smpl_arry_lck,
+ oprof_spu_smpl_arry_lck_flags);
continue;
}
@@ -162,8 +173,8 @@ static enum hrtimer_restart profile_spus(struct hrtimer *timer)
num_samples);
}
- spin_unlock_irqrestore(&sample_array_lock,
- sample_array_lock_flags);
+ spin_unlock_irqrestore(&oprof_spu_smpl_arry_lck,
+ oprof_spu_smpl_arry_lck_flags);
}
smp_wmb(); /* insure spu event buffer updates are written */
@@ -182,13 +193,13 @@ static enum hrtimer_restart profile_spus(struct hrtimer *timer)
static struct hrtimer timer;
/*
- * Entry point for SPU profiling.
+ * Entry point for SPU cycle profiling.
* NOTE: SPU profiling is done system-wide, not per-CPU.
*
* cycles_reset is the count value specified by the user when
* setting up OProfile to count SPU_CYCLES.
*/
-int start_spu_profiling(unsigned int cycles_reset)
+int start_spu_profiling_cycles(unsigned int cycles_reset)
{
ktime_t kt;
@@ -212,10 +223,30 @@ int start_spu_profiling(unsigned int cycles_reset)
return 0;
}
-void stop_spu_profiling(void)
+/*
+ * Entry point for SPU event profiling.
+ * NOTE: SPU profiling is done system-wide, not per-CPU.
+ *
+ * cycles_reset is the count value specified by the user when
+ * setting up OProfile to count SPU_CYCLES.
+ */
+void start_spu_profiling_events(void)
+{
+ spu_prof_running = 1;
+ schedule_delayed_work(&spu_work, DEFAULT_TIMER_EXPIRE);
+
+ return;
+}
+
+void stop_spu_profiling_cycles(void)
{
spu_prof_running = 0;
hrtimer_cancel(&timer);
kfree(samples);
- pr_debug("SPU_PROF: stop_spu_profiling issued\n");
+ pr_debug("SPU_PROF: stop_spu_profiling_cycles issued\n");
+}
+
+void stop_spu_profiling_events(void)
+{
+ spu_prof_running = 0;
}
diff --git a/arch/powerpc/oprofile/cell/spu_task_sync.c b/arch/powerpc/oprofile/cell/spu_task_sync.c
index 6b793aeda72..28f1af2db1f 100644
--- a/arch/powerpc/oprofile/cell/spu_task_sync.c
+++ b/arch/powerpc/oprofile/cell/spu_task_sync.c
@@ -26,6 +26,7 @@
#include <linux/notifier.h>
#include <linux/numa.h>
#include <linux/oprofile.h>
+#include <linux/slab.h>
#include <linux/spinlock.h>
#include "pr_util.h"
@@ -303,7 +304,7 @@ static inline unsigned long fast_get_dcookie(struct path *path)
return cookie;
}
-/* Look up the dcookie for the task's first VM_EXECUTABLE mapping,
+/* Look up the dcookie for the task's mm->exe_file,
* which corresponds loosely to "application name". Also, determine
* the offset for the SPU ELF object. If computed offset is
* non-zero, it implies an embedded SPU object; otherwise, it's a
@@ -320,7 +321,6 @@ get_exec_dcookie_and_offset(struct spu *spu, unsigned int *offsetp,
{
unsigned long app_cookie = 0;
unsigned int my_offset = 0;
- struct file *app = NULL;
struct vm_area_struct *vma;
struct mm_struct *mm = spu->mm;
@@ -329,16 +329,10 @@ get_exec_dcookie_and_offset(struct spu *spu, unsigned int *offsetp,
down_read(&mm->mmap_sem);
- for (vma = mm->mmap; vma; vma = vma->vm_next) {
- if (!vma->vm_file)
- continue;
- if (!(vma->vm_flags & VM_EXECUTABLE))
- continue;
- app_cookie = fast_get_dcookie(&vma->vm_file->f_path);
+ if (mm->exe_file) {
+ app_cookie = fast_get_dcookie(&mm->exe_file->f_path);
pr_debug("got dcookie for %s\n",
- vma->vm_file->f_dentry->d_name.name);
- app = vma->vm_file;
- break;
+ mm->exe_file->f_dentry->d_name.name);
}
for (vma = mm->mmap; vma; vma = vma->vm_next) {
diff --git a/arch/powerpc/oprofile/cell/vma_map.c b/arch/powerpc/oprofile/cell/vma_map.c
index 258fa4411e9..c579b16845d 100644
--- a/arch/powerpc/oprofile/cell/vma_map.c
+++ b/arch/powerpc/oprofile/cell/vma_map.c
@@ -20,6 +20,7 @@
#include <linux/string.h>
#include <linux/uaccess.h>
#include <linux/elf.h>
+#include <linux/slab.h>
#include "pr_util.h"
@@ -185,7 +186,7 @@ struct vma_to_fileoffset_map *create_vma_map(const struct spu *aSpu,
goto fail;
if (shdr_str.sh_type != SHT_STRTAB)
- goto fail;;
+ goto fail;
for (j = 0; j < shdr.sh_size / sizeof (sym); j++) {
if (copy_from_user(&sym, spu_elf_start +