diff options
Diffstat (limited to 'arch/powerpc/oprofile/cell')
| -rw-r--r-- | arch/powerpc/oprofile/cell/pr_util.h | 13 | ||||
| -rw-r--r-- | arch/powerpc/oprofile/cell/spu_profiler.c | 59 | ||||
| -rw-r--r-- | arch/powerpc/oprofile/cell/spu_task_sync.c | 16 | ||||
| -rw-r--r-- | arch/powerpc/oprofile/cell/vma_map.c | 3 |
4 files changed, 61 insertions, 30 deletions
diff --git a/arch/powerpc/oprofile/cell/pr_util.h b/arch/powerpc/oprofile/cell/pr_util.h index 628009c0195..964b93974d8 100644 --- a/arch/powerpc/oprofile/cell/pr_util.h +++ b/arch/powerpc/oprofile/cell/pr_util.h @@ -30,6 +30,10 @@ extern struct delayed_work spu_work; extern int spu_prof_running; +#define TRACE_ARRAY_SIZE 1024 + +extern spinlock_t oprof_spu_smpl_arry_lck; + struct spu_overlay_info { /* map of sections within an SPU overlay */ unsigned int vma; /* SPU virtual memory address from elf */ unsigned int size; /* size of section from elf */ @@ -79,7 +83,7 @@ struct spu_buffer { * the vma-to-fileoffset map. */ struct vma_to_fileoffset_map *create_vma_map(const struct spu *spu, - u64 objectid); + unsigned long objectid); unsigned int vma_map_lookup(struct vma_to_fileoffset_map *map, unsigned int vma, const struct spu *aSpu, int *grd_val); @@ -89,10 +93,11 @@ void vma_map_free(struct vma_to_fileoffset_map *map); * Entry point for SPU profiling. * cycles_reset is the SPU_CYCLES count value specified by the user. */ -int start_spu_profiling(unsigned int cycles_reset); - -void stop_spu_profiling(void); +int start_spu_profiling_cycles(unsigned int cycles_reset); +void start_spu_profiling_events(void); +void stop_spu_profiling_cycles(void); +void stop_spu_profiling_events(void); /* add the necessary profiling hooks */ int spu_sync_start(void); diff --git a/arch/powerpc/oprofile/cell/spu_profiler.c b/arch/powerpc/oprofile/cell/spu_profiler.c index dd499c3e9da..b129d007e7f 100644 --- a/arch/powerpc/oprofile/cell/spu_profiler.c +++ b/arch/powerpc/oprofile/cell/spu_profiler.c @@ -16,13 +16,24 @@ #include <linux/smp.h> #include <linux/slab.h> #include <asm/cell-pmu.h> +#include <asm/time.h> #include "pr_util.h" -#define TRACE_ARRAY_SIZE 1024 #define SCALE_SHIFT 14 static u32 *samples; +/* spu_prof_running is a flag used to indicate if spu profiling is enabled + * or not. It is set by the routines start_spu_profiling_cycles() and + * start_spu_profiling_events(). The flag is cleared by the routines + * stop_spu_profiling_cycles() and stop_spu_profiling_events(). These + * routines are called via global_start() and global_stop() which are called in + * op_powerpc_start() and op_powerpc_stop(). These routines are called once + * per system as a result of the user starting/stopping oprofile. Hence, only + * one CPU per user at a time will be changing the value of spu_prof_running. + * In general, OProfile does not protect against multiple users trying to run + * OProfile at a time. + */ int spu_prof_running; static unsigned int profiling_interval; @@ -31,8 +42,8 @@ static unsigned int profiling_interval; #define SPU_PC_MASK 0xFFFF -static DEFINE_SPINLOCK(sample_array_lock); -unsigned long sample_array_lock_flags; +DEFINE_SPINLOCK(oprof_spu_smpl_arry_lck); +unsigned long oprof_spu_smpl_arry_lck_flags; void set_spu_profiling_frequency(unsigned int freq_khz, unsigned int cycles_reset) { @@ -49,7 +60,7 @@ void set_spu_profiling_frequency(unsigned int freq_khz, unsigned int cycles_rese * of precision. This is close enough for the purpose at hand. * * The value of the timeout should be small enough that the hw - * trace buffer will not get more then about 1/3 full for the + * trace buffer will not get more than about 1/3 full for the * maximum user specified (the LFSR value) hw sampling frequency. * This is to ensure the trace buffer will never fill even if the * kernel thread scheduling varies under a heavy system load. @@ -145,13 +156,13 @@ static enum hrtimer_restart profile_spus(struct hrtimer *timer) * sample array must be loaded and then processed for a given * cpu. The sample array is not per cpu. */ - spin_lock_irqsave(&sample_array_lock, - sample_array_lock_flags); + spin_lock_irqsave(&oprof_spu_smpl_arry_lck, + oprof_spu_smpl_arry_lck_flags); num_samples = cell_spu_pc_collection(cpu); if (num_samples == 0) { - spin_unlock_irqrestore(&sample_array_lock, - sample_array_lock_flags); + spin_unlock_irqrestore(&oprof_spu_smpl_arry_lck, + oprof_spu_smpl_arry_lck_flags); continue; } @@ -162,8 +173,8 @@ static enum hrtimer_restart profile_spus(struct hrtimer *timer) num_samples); } - spin_unlock_irqrestore(&sample_array_lock, - sample_array_lock_flags); + spin_unlock_irqrestore(&oprof_spu_smpl_arry_lck, + oprof_spu_smpl_arry_lck_flags); } smp_wmb(); /* insure spu event buffer updates are written */ @@ -182,13 +193,13 @@ static enum hrtimer_restart profile_spus(struct hrtimer *timer) static struct hrtimer timer; /* - * Entry point for SPU profiling. + * Entry point for SPU cycle profiling. * NOTE: SPU profiling is done system-wide, not per-CPU. * * cycles_reset is the count value specified by the user when * setting up OProfile to count SPU_CYCLES. */ -int start_spu_profiling(unsigned int cycles_reset) +int start_spu_profiling_cycles(unsigned int cycles_reset) { ktime_t kt; @@ -212,10 +223,30 @@ int start_spu_profiling(unsigned int cycles_reset) return 0; } -void stop_spu_profiling(void) +/* + * Entry point for SPU event profiling. + * NOTE: SPU profiling is done system-wide, not per-CPU. + * + * cycles_reset is the count value specified by the user when + * setting up OProfile to count SPU_CYCLES. + */ +void start_spu_profiling_events(void) +{ + spu_prof_running = 1; + schedule_delayed_work(&spu_work, DEFAULT_TIMER_EXPIRE); + + return; +} + +void stop_spu_profiling_cycles(void) { spu_prof_running = 0; hrtimer_cancel(&timer); kfree(samples); - pr_debug("SPU_PROF: stop_spu_profiling issued\n"); + pr_debug("SPU_PROF: stop_spu_profiling_cycles issued\n"); +} + +void stop_spu_profiling_events(void) +{ + spu_prof_running = 0; } diff --git a/arch/powerpc/oprofile/cell/spu_task_sync.c b/arch/powerpc/oprofile/cell/spu_task_sync.c index 6b793aeda72..28f1af2db1f 100644 --- a/arch/powerpc/oprofile/cell/spu_task_sync.c +++ b/arch/powerpc/oprofile/cell/spu_task_sync.c @@ -26,6 +26,7 @@ #include <linux/notifier.h> #include <linux/numa.h> #include <linux/oprofile.h> +#include <linux/slab.h> #include <linux/spinlock.h> #include "pr_util.h" @@ -303,7 +304,7 @@ static inline unsigned long fast_get_dcookie(struct path *path) return cookie; } -/* Look up the dcookie for the task's first VM_EXECUTABLE mapping, +/* Look up the dcookie for the task's mm->exe_file, * which corresponds loosely to "application name". Also, determine * the offset for the SPU ELF object. If computed offset is * non-zero, it implies an embedded SPU object; otherwise, it's a @@ -320,7 +321,6 @@ get_exec_dcookie_and_offset(struct spu *spu, unsigned int *offsetp, { unsigned long app_cookie = 0; unsigned int my_offset = 0; - struct file *app = NULL; struct vm_area_struct *vma; struct mm_struct *mm = spu->mm; @@ -329,16 +329,10 @@ get_exec_dcookie_and_offset(struct spu *spu, unsigned int *offsetp, down_read(&mm->mmap_sem); - for (vma = mm->mmap; vma; vma = vma->vm_next) { - if (!vma->vm_file) - continue; - if (!(vma->vm_flags & VM_EXECUTABLE)) - continue; - app_cookie = fast_get_dcookie(&vma->vm_file->f_path); + if (mm->exe_file) { + app_cookie = fast_get_dcookie(&mm->exe_file->f_path); pr_debug("got dcookie for %s\n", - vma->vm_file->f_dentry->d_name.name); - app = vma->vm_file; - break; + mm->exe_file->f_dentry->d_name.name); } for (vma = mm->mmap; vma; vma = vma->vm_next) { diff --git a/arch/powerpc/oprofile/cell/vma_map.c b/arch/powerpc/oprofile/cell/vma_map.c index 258fa4411e9..c579b16845d 100644 --- a/arch/powerpc/oprofile/cell/vma_map.c +++ b/arch/powerpc/oprofile/cell/vma_map.c @@ -20,6 +20,7 @@ #include <linux/string.h> #include <linux/uaccess.h> #include <linux/elf.h> +#include <linux/slab.h> #include "pr_util.h" @@ -185,7 +186,7 @@ struct vma_to_fileoffset_map *create_vma_map(const struct spu *aSpu, goto fail; if (shdr_str.sh_type != SHT_STRTAB) - goto fail;; + goto fail; for (j = 0; j < shdr.sh_size / sizeof (sym); j++) { if (copy_from_user(&sym, spu_elf_start + |
