diff options
55 files changed, 4303 insertions, 914 deletions
diff --git a/arch/powerpc/configs/cell_defconfig b/arch/powerpc/configs/cell_defconfig index 74f83f4a4e5..d9ac24e8de1 100644 --- a/arch/powerpc/configs/cell_defconfig +++ b/arch/powerpc/configs/cell_defconfig @@ -1455,7 +1455,8 @@ CONFIG_HAS_DMA=y # Instrumentation Support # CONFIG_PROFILING=y -CONFIG_OPROFILE=y +CONFIG_OPROFILE=m +CONFIG_OPROFILE_CELL=y # CONFIG_KPROBES is not set # diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c index d3f2080d2ee..37658ea417f 100644 --- a/arch/powerpc/kernel/crash.c +++ b/arch/powerpc/kernel/crash.c @@ -219,6 +219,72 @@ void crash_kexec_secondary(struct pt_regs *regs) cpus_in_sr = CPU_MASK_NONE; } #endif +#ifdef CONFIG_SPU_BASE + +#include <asm/spu.h> +#include <asm/spu_priv1.h> + +struct crash_spu_info { + struct spu *spu; + u32 saved_spu_runcntl_RW; + u32 saved_spu_status_R; + u32 saved_spu_npc_RW; + u64 saved_mfc_sr1_RW; + u64 saved_mfc_dar; + u64 saved_mfc_dsisr; +}; + +#define CRASH_NUM_SPUS 16 /* Enough for current hardware */ +static struct crash_spu_info crash_spu_info[CRASH_NUM_SPUS]; + +static void crash_kexec_stop_spus(void) +{ + struct spu *spu; + int i; + u64 tmp; + + for (i = 0; i < CRASH_NUM_SPUS; i++) { + if (!crash_spu_info[i].spu) + continue; + + spu = crash_spu_info[i].spu; + + crash_spu_info[i].saved_spu_runcntl_RW = + in_be32(&spu->problem->spu_runcntl_RW); + crash_spu_info[i].saved_spu_status_R = + in_be32(&spu->problem->spu_status_R); + crash_spu_info[i].saved_spu_npc_RW = + in_be32(&spu->problem->spu_npc_RW); + + crash_spu_info[i].saved_mfc_dar = spu_mfc_dar_get(spu); + crash_spu_info[i].saved_mfc_dsisr = spu_mfc_dsisr_get(spu); + tmp = spu_mfc_sr1_get(spu); + crash_spu_info[i].saved_mfc_sr1_RW = tmp; + + tmp &= ~MFC_STATE1_MASTER_RUN_CONTROL_MASK; + spu_mfc_sr1_set(spu, tmp); + + __delay(200); + } +} + +void crash_register_spus(struct list_head *list) +{ + struct spu *spu; + + list_for_each_entry(spu, list, full_list) { + if (WARN_ON(spu->number >= CRASH_NUM_SPUS)) + continue; + + crash_spu_info[spu->number].spu = spu; + } +} + +#else +static inline void crash_kexec_stop_spus(void) +{ +} +#endif /* CONFIG_SPU_BASE */ void default_machine_crash_shutdown(struct pt_regs *regs) { @@ -254,6 +320,7 @@ void default_machine_crash_shutdown(struct pt_regs *regs) crash_save_cpu(regs, crashing_cpu); crash_kexec_prepare_cpus(crashing_cpu); cpu_set(crashing_cpu, cpus_in_crash); + crash_kexec_stop_spus(); if (ppc_md.kexec_cpu_down) ppc_md.kexec_cpu_down(1, 0); } diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index e5df167f782..727a6699f2f 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -122,6 +122,7 @@ extern struct timezone sys_tz; static long timezone_offset; unsigned long ppc_proc_freq; +EXPORT_SYMBOL(ppc_proc_freq); unsigned long ppc_tb_freq; static u64 tb_last_jiffy __cacheline_aligned_in_smp; diff --git a/arch/powerpc/oprofile/Kconfig b/arch/powerpc/oprofile/Kconfig index eb2dece76a5..7089e79689b 100644 --- a/arch/powerpc/oprofile/Kconfig +++ b/arch/powerpc/oprofile/Kconfig @@ -15,3 +15,10 @@ config OPROFILE If unsure, say N. +config OPROFILE_CELL + bool "OProfile for Cell Broadband Engine" + depends on (SPU_FS = y && OPROFILE = m) || (SPU_FS = y && OPROFILE = y) || (SPU_FS = m && OPROFILE = m) + default y + help + Profiling of Cell BE SPUs requires special support enabled + by this option. diff --git a/arch/powerpc/oprofile/Makefile b/arch/powerpc/oprofile/Makefile index 4b5f9528218..c5f64c3bd66 100644 --- a/arch/powerpc/oprofile/Makefile +++ b/arch/powerpc/oprofile/Makefile @@ -11,7 +11,9 @@ DRIVER_OBJS := $(addprefix ../../../drivers/oprofile/, \ timer_int.o ) oprofile-y := $(DRIVER_OBJS) common.o backtrace.o -oprofile-$(CONFIG_PPC_CELL_NATIVE) += op_model_cell.o +oprofile-$(CONFIG_OPROFILE_CELL) += op_model_cell.o \ + cell/spu_profiler.o cell/vma_map.o \ + cell/spu_task_sync.o oprofile-$(CONFIG_PPC64) += op_model_rs64.o op_model_power4.o op_model_pa6t.o oprofile-$(CONFIG_FSL_BOOKE) += op_model_fsl_booke.o oprofile-$(CONFIG_6xx) += op_model_7450.o diff --git a/arch/powerpc/oprofile/cell/pr_util.h b/arch/powerpc/oprofile/cell/pr_util.h new file mode 100644 index 00000000000..e5704f00c8b --- /dev/null +++ b/arch/powerpc/oprofile/cell/pr_util.h @@ -0,0 +1,97 @@ + /* + * Cell Broadband Engine OProfile Support + * + * (C) Copyright IBM Corporation 2006 + * + * Author: Maynard Johnson <maynardj@us.ibm.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef PR_UTIL_H +#define PR_UTIL_H + +#include <linux/cpumask.h> +#include <linux/oprofile.h> +#include <asm/cell-pmu.h> +#include <asm/spu.h> + +#include "../../platforms/cell/cbe_regs.h" + +/* Defines used for sync_start */ +#define SKIP_GENERIC_SYNC 0 +#define SYNC_START_ERROR -1 +#define DO_GENERIC_SYNC 1 + +struct spu_overlay_info { /* map of sections within an SPU overlay */ + unsigned int vma; /* SPU virtual memory address from elf */ + unsigned int size; /* size of section from elf */ + unsigned int offset; /* offset of section into elf file */ + unsigned int buf; +}; + +struct vma_to_fileoffset_map { /* map of sections within an SPU program */ + struct vma_to_fileoffset_map *next; /* list pointer */ + unsigned int vma; /* SPU virtual memory address from elf */ + unsigned int size; /* size of section from elf */ + unsigned int offset; /* offset of section into elf file */ + unsigned int guard_ptr; + unsigned int guard_val; + /* + * The guard pointer is an entry in the _ovly_buf_table, + * computed using ovly.buf as the index into the table. Since + * ovly.buf values begin at '1' to reference the first (or 0th) + * entry in the _ovly_buf_table, the computation subtracts 1 + * from ovly.buf. + * The guard value is stored in the _ovly_buf_table entry and + * is an index (starting at 1) back to the _ovly_table entry + * that is pointing at this _ovly_buf_table entry. So, for + * example, for an overlay scenario with one overlay segment + * and two overlay sections: + * - Section 1 points to the first entry of the + * _ovly_buf_table, which contains a guard value + * of '1', referencing the first (index=0) entry of + * _ovly_table. + * - Section 2 points to the second entry of the + * _ovly_buf_table, which contains a guard value + * of '2', referencing the second (index=1) entry of + * _ovly_table. + */ + +}; + +/* The three functions below are for maintaining and accessing + * the vma-to-fileoffset map. + */ +struct vma_to_fileoffset_map *create_vma_map(const struct spu *spu, + u64 objectid); +unsigned int vma_map_lookup(struct vma_to_fileoffset_map *map, + unsigned int vma, const struct spu *aSpu, + int *grd_val); +void vma_map_free(struct vma_to_fileoffset_map *map); + +/* + * Entry point for SPU profiling. + * cycles_reset is the SPU_CYCLES count value specified by the user. + */ +int start_spu_profiling(unsigned int cycles_reset); + +void stop_spu_profiling(void); + + +/* add the necessary profiling hooks */ +int spu_sync_start(void); + +/* remove the hooks */ +int spu_sync_stop(void); + +/* Record SPU program counter samples to the oprofile event buffer. */ +void spu_sync_buffer(int spu_num, unsigned int *samples, + int num_samples); + +void set_spu_profiling_frequency(unsigned int freq_khz, unsigned int cycles_reset); + +#endif /* PR_UTIL_H */ diff --git a/arch/powerpc/oprofile/cell/spu_profiler.c b/arch/powerpc/oprofile/cell/spu_profiler.c new file mode 100644 index 00000000000..380d7e21753 --- /dev/null +++ b/arch/powerpc/oprofile/cell/spu_profiler.c @@ -0,0 +1,221 @@ +/* + * Cell Broadband Engine OProfile Support + * + * (C) Copyright IBM Corporation 2006 + * + * Authors: Maynard Johnson <maynardj@us.ibm.com> + * Carl Love <carll@us.ibm.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/hrtimer.h> +#include <linux/smp.h> +#include <linux/slab.h> +#include <asm/cell-pmu.h> +#include "pr_util.h" + +#define TRACE_ARRAY_SIZE 1024 +#define SCALE_SHIFT 14 + +static u32 *samples; + +static int spu_prof_running; +static unsigned int profiling_interval; + +#define NUM_SPU_BITS_TRBUF 16 +#define SPUS_PER_TB_ENTRY 4 +#define SPUS_PER_NODE 8 + +#define SPU_PC_MASK 0xFFFF + +static DEFINE_SPINLOCK(sample_array_lock); +unsigned long sample_array_lock_flags; + +void set_spu_profiling_frequency(unsigned int freq_khz, unsigned int cycles_reset) +{ + unsigned long ns_per_cyc; + + if (!freq_khz) + freq_khz = ppc_proc_freq/1000; + + /* To calculate a timeout in nanoseconds, the basic + * formula is ns = cycles_reset * (NSEC_PER_SEC / cpu frequency). + * To avoid floating point math, we use the scale math + * technique as described in linux/jiffies.h. We use + * a scale factor of SCALE_SHIFT, which provides 4 decimal places + * of precision. This is close enough for the purpose at hand. + * + * The value of the timeout should be small enough that the hw + * trace buffer will not get more then about 1/3 full for the + * maximum user specified (the LFSR value) hw sampling frequency. + * This is to ensure the trace buffer will never fill even if the + * kernel thread scheduling varies under a heavy system load. + */ + + ns_per_cyc = (USEC_PER_SEC << SCALE_SHIFT)/freq_khz; + profiling_interval = (ns_per_cyc * cycles_reset) >> SCALE_SHIFT; + +} + +/* + * Extract SPU PC from trace buffer entry + */ +static void spu_pc_extract(int cpu, int entry) +{ + /* the trace buffer is 128 bits */ + u64 trace_buffer[2]; + u64 spu_mask; + int spu; + + spu_mask = SPU_PC_MASK; + + /* Each SPU PC is 16 bits; hence, four spus in each of + * the two 64-bit buffer entries that make up the + * 128-bit trace_buffer entry. Process two 64-bit values + * simultaneously. + * trace[0] SPU PC contents are: 0 1 2 3 + * trace[1] SPU PC contents are: 4 5 6 7 + */ + + cbe_read_trace_buffer(cpu, trace_buffer); + + for (spu = SPUS_PER_TB_ENTRY-1; spu >= 0; spu--) { + /* spu PC trace entry is upper 16 bits of the + * 18 bit SPU program counter + */ + samples[spu * TRACE_ARRAY_SIZE + entry] + = (spu_mask & trace_buffer[0]) << 2; + samples[(spu + SPUS_PER_TB_ENTRY) * TRACE_ARRAY_SIZE + entry] + = (spu_mask & trace_buffer[1]) << 2; + + trace_buffer[0] = trace_buffer[0] >> NUM_SPU_BITS_TRBUF; + trace_buffer[1] = trace_buffer[1] >> NUM_SPU_BITS_TRBUF; + } +} + +static int cell_spu_pc_collection(int cpu) +{ + u32 trace_addr; + int entry; + + /* process the collected SPU PC for the node */ + + entry = 0; + + trace_addr = cbe_read_pm(cpu, trace_address); + while (!(trace_addr & CBE_PM_TRACE_BUF_EMPTY)) { + /* there is data in the trace buffer to process */ + spu_pc_extract(cpu, entry); + + entry++; + + if (entry >= TRACE_ARRAY_SIZE) + /* spu_samples is full */ + break; + + trace_addr = cbe_read_pm(cpu, trace_address); + } + + return entry; +} + + +static enum hrtimer_restart profile_spus(struct hrtimer *timer) +{ + ktime_t kt; + int cpu, node, k, num_samples, spu_num; + + if (!spu_prof_running) + goto stop; + + for_each_online_cpu(cpu) { + if (cbe_get_hw_thread_id(cpu)) + continue; + + node = cbe_cpu_to_node(cpu); + + /* There should only be one kernel thread at a time processing + * the samples. In the very unlikely case that the processing + * is taking a very long time and multiple kernel threads are + * started to process the samples. Make sure only one kernel + * thread is working on the samples array at a time. The + * sample array must be loaded and then processed for a given + * cpu. The sample array is not per cpu. + */ + spin_lock_irqsave(&sample_array_lock, + sample_array_lock_flags); + num_samples = cell_spu_pc_collection(cpu); + + if (num_samples == 0) { + spin_unlock_irqrestore(&sample_array_lock, + sample_array_lock_flags); + continue; + } + + for (k = 0; k < SPUS_PER_NODE; k++) { + spu_num = k + (node * SPUS_PER_NODE); + spu_sync_buffer(spu_num, + samples + (k * TRACE_ARRAY_SIZE), + num_samples); + } + + spin_unlock_irqrestore(&sample_array_lock, + sample_array_lock_flags); + + } + smp_wmb(); /* insure spu event buffer updates are written */ + /* don't want events intermingled... */ + + kt = ktime_set(0, profiling_interval); + if (!spu_prof_running) + goto stop; + hrtimer_forward(timer, timer->base->get_time(), kt); + return HRTIMER_RESTART; + + stop: + printk(KERN_INFO "SPU_PROF: spu-prof timer ending\n"); + return HRTIMER_NORESTART; +} + +static struct hrtimer timer; +/* + * Entry point for SPU profiling. + * NOTE: SPU profiling is done system-wide, not per-CPU. + * + * cycles_reset is the count value specified by the user when + * setting up OProfile to count SPU_CYCLES. + */ +int start_spu_profiling(unsigned int cycles_reset) +{ + ktime_t kt; + + pr_debug("timer resolution: %lu\n", TICK_NSEC); + kt = ktime_set(0, profiling_interval); + hrtimer_init(&timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); + timer.expires = kt; + timer.function = profile_spus; + + /* Allocate arrays for collecting SPU PC samples */ + samples = kzalloc(SPUS_PER_NODE * + TRACE_ARRAY_SIZE * sizeof(u32), GFP_KERNEL); + + if (!samples) + return -ENOMEM; + + spu_prof_running = 1; + hrtimer_start(&timer, kt, HRTIMER_MODE_REL); + + return 0; +} + +void stop_spu_profiling(void) +{ + spu_prof_running = 0; + hrtimer_cancel(&timer); + kfree(samples); + pr_debug("SPU_PROF: stop_spu_profiling issued\n"); +} diff --git a/arch/powerpc/oprofile/cell/spu_task_sync.c b/arch/powerpc/oprofile/cell/spu_task_sync.c new file mode 100644 index 00000000000..133665754a7 --- /dev/null +++ b/arch/powerpc/oprofile/cell/spu_task_sync.c @@ -0,0 +1,484 @@ +/* + * Cell Broadband Engine OProfile Support + * + * (C) Copyright IBM Corporation 2006 + * + * Author: Maynard Johnson <maynardj@us.ibm.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +/* The purpose of this file is to handle SPU event task switching + * and to record SPU context information into the OProfile + * event buffer. + * + * Additionally, the spu_sync_buffer function is provided as a helper + * for recoding actual SPU program counter samples to the event buffer. + */ +#include <linux/dcookies.h> +#include <linux/kref.h> +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/notifier.h> +#include <linux/numa.h> +#include <linux/oprofile.h> +#include <linux/spinlock.h> +#include "pr_util.h" + +#define RELEASE_ALL 9999 + +static DEFINE_SPINLOCK(buffer_lock); +static DEFINE_SPINLOCK(cache_lock); +static int num_spu_nodes; +int spu_prof_num_nodes; +int last_guard_val[MAX_NUMNODES * 8]; + +/* Container for caching information about an active SPU task. */ +struct cached_info { + struct vma_to_fileoffset_map *map; + struct spu *the_spu; /* needed to access pointer to local_store */ + struct kref cache_ref; +}; + +static struct cached_info *spu_info[MAX_NUMNODES * 8]; + +static void destroy_cached_info(struct kref *kref) +{ + struct cached_info *info; + + info = container_of(kref, struct cached_info, cache_ref); + vma_map_free(info->map); + kfree(info); + module_put(THIS_MODULE); +} + +/* Return the cached_info for the passed SPU number. + * ATTENTION: Callers are responsible for obtaining the + * cache_lock if needed prior to invoking this function. + */ +static struct cached_info *get_cached_info(struct spu *the_spu, int spu_num) +{ + struct kref *ref; + struct cached_info *ret_info; + + if (spu_num >= num_spu_nodes) { + printk(KERN_ERR "SPU_PROF: " + "%s, line %d: Invalid index %d into spu info cache\n", + __FUNCTION__, __LINE__, spu_num); + ret_info = NULL; + goto out; + } + if (!spu_info[spu_num] && the_spu) { + ref = spu_get_profile_private_kref(the_spu->ctx); + if (ref) { + spu_info[spu_num] = container_of(ref, struct cached_info, cache_ref); + kref_get(&spu_info[spu_num]->cache_ref); + } + } + + ret_info = spu_info[spu_num]; + out: + return ret_info; +} + + +/* Looks for cached info for the passed spu. If not found, the + * cached info is created for the passed spu. + * Returns 0 for success; otherwise, -1 for error. + */ +static int +prepare_cached_spu_info(struct spu *spu, unsigned long objectId) +{ + unsigned long flags; + struct vma_to_fileoffset_map *new_map; + int retval = 0; + struct cached_info *info; + + /* We won't bother getting cache_lock here since + * don't do anything with the cached_info that's returned. + */ + info = get_cached_info(spu, spu->number); + + if (info) { + pr_debug("Found cached SPU info.\n"); + goto out; + } + + /* Create cached_info and set spu_info[spu->number] to point to it. + * spu->number is a system-wide value, not a per-node value. + */ + info = kzalloc(sizeof(struct cached_info), GFP_KERNEL); + if (!info) { + printk(KERN_ERR "SPU_PROF: " + "%s, line %d: create vma_map failed\n", + __FUNCTION__, __LINE__); + retval = -ENOMEM; + goto err_alloc; + } + new_map = create_vma_map(spu, objectId); + if (!new_map) { + printk(KERN_ERR "SPU_PROF: " + "%s, line %d: create vma_map failed\n", + __FUNCTION__, __LINE__); + retval = -ENOMEM; + goto err_alloc; + } + + pr_debug("Created vma_map\n"); + info->map = new_map; + info->the_spu = spu; + kref_init(&info->cache_ref); + spin_lock_irqsave(&cache_lock, flags); + spu_info[spu->number] = info; + /* Increment count before passing off ref to SPUFS. */ + kref_get(&info->cache_ref); + + /* We increment the module refcount here since SPUFS is + * responsible for the final destruction of the cached_info, + * and it must be able to access the destroy_cached_info() + * function defined in the OProfile module. We decrement + * the module refcount in destroy_cached_info. + */ + try_module_get(THIS_MODULE); + spu_set_profile_private_kref(spu->ctx, &info->cache_ref, + destroy_cached_info); + spin_unlock_irqrestore(&cache_lock, flags); + goto out; + +err_alloc: + kfree(info); +out: + return retval; +} + +/* + * NOTE: The caller is responsible for locking the + * cache_lock prior to calling this function. + */ +static int release_cached_info(int spu_index) +{ + int index, end; + + if (spu_index == RELEASE_ALL) { + end = num_spu_nodes; + index = 0; + } else { + if (spu_index >= num_spu_nodes) { + printk(KERN_ERR "SPU_PROF: " + "%s, line %d: " + "Invalid index %d into spu info cache\n", + __FUNCTION__, __LINE__, spu_index); + goto out; + } + end = spu_index + 1; + index = spu_index; + } + for (; index < end; index++) { + if (spu_info[index]) { + kref_put(&spu_info[index]->cache_ref, + destroy_cached_info); + spu_info[index] = NULL; + } + } + +out: + return 0; +} + +/* The source code for fast_get_dcookie was "borrowed" + * from drivers/oprofile/buffer_sync.c. + */ + +/* Optimisation. We can manage without taking the dcookie sem + * because we cannot reach this code without at least one + * dcookie user still being registered (namely, the reader + * of the event buffer). + */ +static inline unsigned long fast_get_dcookie(struct dentry *dentry, + struct vfsmount *vfsmnt) +{ + unsigned long cookie; + + if (dentry->d_cookie) + return (unsigned long)dentry; + get_dcookie(dentry, vfsmnt, &cookie); + return cookie; +} + +/* Look up the dcookie for the task's first VM_EXECUTABLE mapping, + * which corresponds loosely to "application name". Also, determine + * the offset for |