diff options
Diffstat (limited to 'arch/powerpc/oprofile')
| -rw-r--r-- | arch/powerpc/oprofile/Makefile | 8 | ||||
| -rw-r--r-- | arch/powerpc/oprofile/backtrace.c | 2 | ||||
| -rw-r--r-- | arch/powerpc/oprofile/cell/spu_task_sync.c | 16 | ||||
| -rw-r--r-- | arch/powerpc/oprofile/cell/vma_map.c | 3 | ||||
| -rw-r--r-- | arch/powerpc/oprofile/common.c | 34 | ||||
| -rw-r--r-- | arch/powerpc/oprofile/op_model_7450.c | 23 | ||||
| -rw-r--r-- | arch/powerpc/oprofile/op_model_cell.c | 21 | ||||
| -rw-r--r-- | arch/powerpc/oprofile/op_model_fsl_emb.c | 61 | ||||
| -rw-r--r-- | arch/powerpc/oprofile/op_model_pa6t.c | 1 | ||||
| -rw-r--r-- | arch/powerpc/oprofile/op_model_power4.c | 143 | ||||
| -rw-r--r-- | arch/powerpc/oprofile/op_model_rs64.c | 2 |
11 files changed, 219 insertions, 95 deletions
diff --git a/arch/powerpc/oprofile/Makefile b/arch/powerpc/oprofile/Makefile index 2ef6b0dddd8..751ec7bd501 100644 --- a/arch/powerpc/oprofile/Makefile +++ b/arch/powerpc/oprofile/Makefile @@ -1,6 +1,6 @@ -ifeq ($(CONFIG_PPC64),y) -EXTRA_CFLAGS += -mno-minimal-toc -endif +subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror + +ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC) obj-$(CONFIG_OPROFILE) += oprofile.o @@ -14,6 +14,6 @@ oprofile-y := $(DRIVER_OBJS) common.o backtrace.o oprofile-$(CONFIG_OPROFILE_CELL) += op_model_cell.o \ cell/spu_profiler.o cell/vma_map.o \ cell/spu_task_sync.o -oprofile-$(CONFIG_PPC64) += op_model_rs64.o op_model_power4.o op_model_pa6t.o +oprofile-$(CONFIG_PPC_BOOK3S_64) += op_model_rs64.o op_model_power4.o op_model_pa6t.o oprofile-$(CONFIG_FSL_EMB_PERFMON) += op_model_fsl_emb.o oprofile-$(CONFIG_6xx) += op_model_7450.o diff --git a/arch/powerpc/oprofile/backtrace.c b/arch/powerpc/oprofile/backtrace.c index b4278cfd1f8..f75301f2c85 100644 --- a/arch/powerpc/oprofile/backtrace.c +++ b/arch/powerpc/oprofile/backtrace.c @@ -105,7 +105,7 @@ void op_powerpc_backtrace(struct pt_regs * const regs, unsigned int depth) } } else { #ifdef CONFIG_PPC64 - if (!test_thread_flag(TIF_32BIT)) { + if (!is_32bit_task()) { while (depth--) { sp = user_getsp64(sp, first_frame); if (!sp) diff --git a/arch/powerpc/oprofile/cell/spu_task_sync.c b/arch/powerpc/oprofile/cell/spu_task_sync.c index 6b793aeda72..28f1af2db1f 100644 --- a/arch/powerpc/oprofile/cell/spu_task_sync.c +++ b/arch/powerpc/oprofile/cell/spu_task_sync.c @@ -26,6 +26,7 @@ #include <linux/notifier.h> #include <linux/numa.h> #include <linux/oprofile.h> +#include <linux/slab.h> #include <linux/spinlock.h> #include "pr_util.h" @@ -303,7 +304,7 @@ static inline unsigned long fast_get_dcookie(struct path *path) return cookie; } -/* Look up the dcookie for the task's first VM_EXECUTABLE mapping, +/* Look up the dcookie for the task's mm->exe_file, * which corresponds loosely to "application name". Also, determine * the offset for the SPU ELF object. If computed offset is * non-zero, it implies an embedded SPU object; otherwise, it's a @@ -320,7 +321,6 @@ get_exec_dcookie_and_offset(struct spu *spu, unsigned int *offsetp, { unsigned long app_cookie = 0; unsigned int my_offset = 0; - struct file *app = NULL; struct vm_area_struct *vma; struct mm_struct *mm = spu->mm; @@ -329,16 +329,10 @@ get_exec_dcookie_and_offset(struct spu *spu, unsigned int *offsetp, down_read(&mm->mmap_sem); - for (vma = mm->mmap; vma; vma = vma->vm_next) { - if (!vma->vm_file) - continue; - if (!(vma->vm_flags & VM_EXECUTABLE)) - continue; - app_cookie = fast_get_dcookie(&vma->vm_file->f_path); + if (mm->exe_file) { + app_cookie = fast_get_dcookie(&mm->exe_file->f_path); pr_debug("got dcookie for %s\n", - vma->vm_file->f_dentry->d_name.name); - app = vma->vm_file; - break; + mm->exe_file->f_dentry->d_name.name); } for (vma = mm->mmap; vma; vma = vma->vm_next) { diff --git a/arch/powerpc/oprofile/cell/vma_map.c b/arch/powerpc/oprofile/cell/vma_map.c index 258fa4411e9..c579b16845d 100644 --- a/arch/powerpc/oprofile/cell/vma_map.c +++ b/arch/powerpc/oprofile/cell/vma_map.c @@ -20,6 +20,7 @@ #include <linux/string.h> #include <linux/uaccess.h> #include <linux/elf.h> +#include <linux/slab.h> #include "pr_util.h" @@ -185,7 +186,7 @@ struct vma_to_fileoffset_map *create_vma_map(const struct spu *aSpu, goto fail; if (shdr_str.sh_type != SHT_STRTAB) - goto fail;; + goto fail; for (j = 0; j < shdr.sh_size / sizeof (sym); j++) { if (copy_from_user(&sym, spu_elf_start + diff --git a/arch/powerpc/oprofile/common.c b/arch/powerpc/oprofile/common.c index 21f16edf6c8..c77348c5d46 100644 --- a/arch/powerpc/oprofile/common.c +++ b/arch/powerpc/oprofile/common.c @@ -18,7 +18,6 @@ #include <linux/smp.h> #include <linux/errno.h> #include <asm/ptrace.h> -#include <asm/system.h> #include <asm/pmc.h> #include <asm/cputable.h> #include <asm/oprofile_impl.h> @@ -120,7 +119,7 @@ static void op_powerpc_stop(void) model->global_stop(); } -static int op_powerpc_create_files(struct super_block *sb, struct dentry *root) +static int op_powerpc_create_files(struct dentry *root) { int i; @@ -129,9 +128,9 @@ static int op_powerpc_create_files(struct super_block *sb, struct dentry *root) * There is one mmcr0, mmcr1 and mmcra for setting the events for * all of the counters. */ - oprofilefs_create_ulong(sb, root, "mmcr0", &sys.mmcr0); - oprofilefs_create_ulong(sb, root, "mmcr1", &sys.mmcr1); - oprofilefs_create_ulong(sb, root, "mmcra", &sys.mmcra); + oprofilefs_create_ulong(root, "mmcr0", &sys.mmcr0); + oprofilefs_create_ulong(root, "mmcr1", &sys.mmcr1); + oprofilefs_create_ulong(root, "mmcra", &sys.mmcra); #ifdef CONFIG_OPROFILE_CELL /* create a file the user tool can check to see what level of profiling * support exits with this kernel. Initialize bit mask to indicate @@ -143,7 +142,7 @@ static int op_powerpc_create_files(struct super_block *sb, struct dentry *root) * If the file does not exist, then the kernel only supports SPU * cycle profiling, PPU event and cycle profiling. */ - oprofilefs_create_ulong(sb, root, "cell_support", &sys.cell_support); + oprofilefs_create_ulong(root, "cell_support", &sys.cell_support); sys.cell_support = 0x1; /* Note, the user OProfile tool must check * that this bit is set before attempting to * user SPU event profiling. Older kernels @@ -161,11 +160,11 @@ static int op_powerpc_create_files(struct super_block *sb, struct dentry *root) char buf[4]; snprintf(buf, sizeof buf, "%d", i); - dir = oprofilefs_mkdir(sb, root, buf); + dir = oprofilefs_mkdir(root, buf); - oprofilefs_create_ulong(sb, dir, "enabled", &ctr[i].enabled); - oprofilefs_create_ulong(sb, dir, "event", &ctr[i].event); - oprofilefs_create_ulong(sb, dir, "count", &ctr[i].count); + oprofilefs_create_ulong(dir, "enabled", &ctr[i].enabled); + oprofilefs_create_ulong(dir, "event", &ctr[i].event); + oprofilefs_create_ulong(dir, "count", &ctr[i].count); /* * Classic PowerPC doesn't support per-counter @@ -174,14 +173,14 @@ static int op_powerpc_create_files(struct super_block *sb, struct dentry *root) * Book-E style performance monitors, we do * support them. */ - oprofilefs_create_ulong(sb, dir, "kernel", &ctr[i].kernel); - oprofilefs_create_ulong(sb, dir, "user", &ctr[i].user); + oprofilefs_create_ulong(dir, "kernel", &ctr[i].kernel); + oprofilefs_create_ulong(dir, "user", &ctr[i].user); - oprofilefs_create_ulong(sb, dir, "unit_mask", &ctr[i].unit_mask); + oprofilefs_create_ulong(dir, "unit_mask", &ctr[i].unit_mask); } - oprofilefs_create_ulong(sb, root, "enable_kernel", &sys.enable_kernel); - oprofilefs_create_ulong(sb, root, "enable_user", &sys.enable_user); + oprofilefs_create_ulong(root, "enable_kernel", &sys.enable_kernel); + oprofilefs_create_ulong(root, "enable_user", &sys.enable_user); /* Default to tracing both kernel and user */ sys.enable_kernel = 1; @@ -195,11 +194,8 @@ int __init oprofile_arch_init(struct oprofile_operations *ops) if (!cur_cpu_spec->oprofile_cpu_type) return -ENODEV; - if (firmware_has_feature(FW_FEATURE_ISERIES)) - return -ENODEV; - switch (cur_cpu_spec->oprofile_type) { -#ifdef CONFIG_PPC64 +#ifdef CONFIG_PPC_BOOK3S_64 #ifdef CONFIG_OPROFILE_CELL case PPC_OPROFILE_CELL: if (firmware_has_feature(FW_FEATURE_LPAR)) diff --git a/arch/powerpc/oprofile/op_model_7450.c b/arch/powerpc/oprofile/op_model_7450.c index cc599eb8768..d29b6e4e5e7 100644 --- a/arch/powerpc/oprofile/op_model_7450.c +++ b/arch/powerpc/oprofile/op_model_7450.c @@ -16,10 +16,8 @@ */ #include <linux/oprofile.h> -#include <linux/init.h> #include <linux/smp.h> #include <asm/ptrace.h> -#include <asm/system.h> #include <asm/processor.h> #include <asm/cputable.h> #include <asm/page.h> @@ -29,7 +27,7 @@ static unsigned long reset_value[OP_MAX_COUNTER]; static int oprofile_running; -static u32 mmcr0_val, mmcr1_val, mmcr2_val; +static u32 mmcr0_val, mmcr1_val, mmcr2_val, num_pmcs; #define MMCR0_PMC1_SHIFT 6 #define MMCR0_PMC2_SHIFT 0 @@ -88,13 +86,12 @@ static int fsl7450_cpu_setup(struct op_counter_config *ctr) mtspr(SPRN_MMCR0, mmcr0_val); mtspr(SPRN_MMCR1, mmcr1_val); - mtspr(SPRN_MMCR2, mmcr2_val); + if (num_pmcs > 4) + mtspr(SPRN_MMCR2, mmcr2_val); return 0; } -#define NUM_CTRS 6 - /* Configures the global settings for the countes on all CPUs. */ static int fsl7450_reg_setup(struct op_counter_config *ctr, struct op_system_config *sys, @@ -102,12 +99,13 @@ static int fsl7450_reg_setup(struct op_counter_config *ctr, { int i; + num_pmcs = num_ctrs; /* Our counters count up, and "count" refers to * how much before the next interrupt, and we interrupt * on overflow. So we calculate the starting value * which will give us "count" until overflow. * Then we set the events on the enabled counters */ - for (i = 0; i < NUM_CTRS; ++i) + for (i = 0; i < num_ctrs; ++i) reset_value[i] = 0x80000000UL - ctr[i].count; /* Set events for Counters 1 & 2 */ @@ -123,9 +121,10 @@ static int fsl7450_reg_setup(struct op_counter_config *ctr, /* Set events for Counters 3-6 */ mmcr1_val = mmcr1_event3(ctr[2].event) - | mmcr1_event4(ctr[3].event) - | mmcr1_event5(ctr[4].event) - | mmcr1_event6(ctr[5].event); + | mmcr1_event4(ctr[3].event); + if (num_ctrs > 4) + mmcr1_val |= mmcr1_event5(ctr[4].event) + | mmcr1_event6(ctr[5].event); mmcr2_val = 0; @@ -139,7 +138,7 @@ static int fsl7450_start(struct op_counter_config *ctr) mtmsr(mfmsr() | MSR_PMM); - for (i = 0; i < NUM_CTRS; ++i) { + for (i = 0; i < num_pmcs; ++i) { if (ctr[i].enabled) classic_ctr_write(i, reset_value[i]); else @@ -184,7 +183,7 @@ static void fsl7450_handle_interrupt(struct pt_regs *regs, pc = mfspr(SPRN_SIAR); is_kernel = is_kernel_addr(pc); - for (i = 0; i < NUM_CTRS; ++i) { + for (i = 0; i < num_pmcs; ++i) { val = classic_ctr_read(i); if (val < 0) { if (oprofile_running && ctr[i].enabled) { diff --git a/arch/powerpc/oprofile/op_model_cell.c b/arch/powerpc/oprofile/op_model_cell.c index ae06c6236d9..863d89386f6 100644 --- a/arch/powerpc/oprofile/op_model_cell.c +++ b/arch/powerpc/oprofile/op_model_cell.c @@ -16,7 +16,6 @@ #include <linux/cpufreq.h> #include <linux/delay.h> -#include <linux/init.h> #include <linux/jiffies.h> #include <linux/kthread.h> #include <linux/oprofile.h> @@ -34,7 +33,6 @@ #include <asm/ptrace.h> #include <asm/reg.h> #include <asm/rtas.h> -#include <asm/system.h> #include <asm/cell-regs.h> #include "../platforms/cell/interrupt.h" @@ -67,7 +65,7 @@ #define MAX_SPU_COUNT 0xFFFFFF /* maximum 24 bit LFSR value */ -/* Minumum HW interval timer setting to send value to trace buffer is 10 cycle. +/* Minimum HW interval timer setting to send value to trace buffer is 10 cycle. * To configure counter to send value every N cycles set counter to * 2^32 - 1 - N. */ @@ -248,7 +246,7 @@ static int pm_rtas_activate_signals(u32 node, u32 count) * There is no debug setup required for the cycles event. * Note that only events in the same group can be used. * Otherwise, there will be conflicts in correctly routing - * the signals on the debug bus. It is the responsiblity + * the signals on the debug bus. It is the responsibility * of the OProfile user tool to check the events are in * the same group. */ @@ -1077,7 +1075,7 @@ static int calculate_lfsr(int n) index = ENTRIES-1; /* make sure index is valid */ - if ((index > ENTRIES) || (index < 0)) + if ((index >= ENTRIES) || (index < 0)) index = ENTRIES-1; return initial_lfsr[index]; @@ -1123,8 +1121,7 @@ oprof_cpufreq_notify(struct notifier_block *nb, unsigned long val, void *data) int ret = 0; struct cpufreq_freqs *frq = data; if ((val == CPUFREQ_PRECHANGE && frq->old < frq->new) || - (val == CPUFREQ_POSTCHANGE && frq->old > frq->new) || - (val == CPUFREQ_RESUMECHANGE || val == CPUFREQ_SUSPENDCHANGE)) + (val == CPUFREQ_POSTCHANGE && frq->old > frq->new)) set_spu_profiling_frequency(frq->new, spu_cycle_reset); return ret; } @@ -1469,8 +1466,8 @@ static int cell_global_start(struct op_counter_config *ctr) * The pm_interval register is setup to write the SPU PC value into the * trace buffer at the maximum rate possible. The trace buffer is configured * to store the PCs, wrapping when it is full. The performance counter is - * intialized to the max hardware count minus the number of events, N, between - * samples. Once the N events have occured, a HW counter overflow occurs + * initialized to the max hardware count minus the number of events, N, between + * samples. Once the N events have occurred, a HW counter overflow occurs * causing the generation of a HW counter interrupt which also stops the * writing of the SPU PC values to the trace buffer. Hence the last PC * written to the trace buffer is the SPU PC that we want. Unfortunately, @@ -1594,7 +1591,7 @@ static void cell_handle_interrupt_spu(struct pt_regs *regs, * to a latch. The new values (interrupt setting bits, reset * counter value etc.) are not copied to the actual registers * until the performance monitor is enabled. In order to get - * this to work as desired, the permormance monitor needs to + * this to work as desired, the performance monitor needs to * be disabled while writing to the latches. This is a * HW design issue. */ @@ -1656,7 +1653,7 @@ static void cell_handle_interrupt_ppu(struct pt_regs *regs, * The counters were frozen by the interrupt. * Reenable the interrupt and restart the counters. * If there was a race between the interrupt handler and - * the virtual counter routine. The virutal counter + * the virtual counter routine. The virtual counter * routine may have cleared the interrupts. Hence must * use the virt_cntr_inter_mask to re-enable the interrupts. */ @@ -1668,7 +1665,7 @@ static void cell_handle_interrupt_ppu(struct pt_regs *regs, * to a latch. The new values (interrupt setting bits, reset * counter value etc.) are not copied to the actual registers * until the performance monitor is enabled. In order to get - * this to work as desired, the permormance monitor needs to + * this to work as desired, the performance monitor needs to * be disabled while writing to the latches. This is a * HW design issue. */ diff --git a/arch/powerpc/oprofile/op_model_fsl_emb.c b/arch/powerpc/oprofile/op_model_fsl_emb.c index 91596f6ba1f..14cf86fddda 100644 --- a/arch/powerpc/oprofile/op_model_fsl_emb.c +++ b/arch/powerpc/oprofile/op_model_fsl_emb.c @@ -2,7 +2,7 @@ * Freescale Embedded oprofile support, based on ppc64 oprofile support * Copyright (C) 2004 Anton Blanchard <anton@au.ibm.com>, IBM * - * Copyright (c) 2004 Freescale Semiconductor, Inc + * Copyright (c) 2004, 2010 Freescale Semiconductor, Inc * * Author: Andy Fleming * Maintainer: Kumar Gala <galak@kernel.crashing.org> @@ -14,10 +14,8 @@ */ #include <linux/oprofile.h> -#include <linux/init.h> #include <linux/smp.h> #include <asm/ptrace.h> -#include <asm/system.h> #include <asm/processor.h> #include <asm/cputable.h> #include <asm/reg_fsl_emb.h> @@ -47,6 +45,12 @@ static inline u32 get_pmlca(int ctr) case 3: pmlca = mfpmr(PMRN_PMLCA3); break; + case 4: + pmlca = mfpmr(PMRN_PMLCA4); + break; + case 5: + pmlca = mfpmr(PMRN_PMLCA5); + break; default: panic("Bad ctr number\n"); } @@ -69,6 +73,12 @@ static inline void set_pmlca(int ctr, u32 pmlca) case 3: mtpmr(PMRN_PMLCA3, pmlca); break; + case 4: + mtpmr(PMRN_PMLCA4, pmlca); + break; + case 5: + mtpmr(PMRN_PMLCA5, pmlca); + break; default: panic("Bad ctr number\n"); } @@ -85,6 +95,10 @@ static inline unsigned int ctr_read(unsigned int i) return mfpmr(PMRN_PMC2); case 3: return mfpmr(PMRN_PMC3); + case 4: + return mfpmr(PMRN_PMC4); + case 5: + return mfpmr(PMRN_PMC5); default: return 0; } @@ -105,6 +119,12 @@ static inline void ctr_write(unsigned int i, unsigned int val) case 3: mtpmr(PMRN_PMC3, val); break; + case 4: + mtpmr(PMRN_PMC4, val); + break; + case 5: + mtpmr(PMRN_PMC5, val); + break; default: break; } @@ -134,6 +154,14 @@ static void init_pmc_stop(int ctr) mtpmr(PMRN_PMLCA3, pmlca); mtpmr(PMRN_PMLCB3, pmlcb); break; + case 4: + mtpmr(PMRN_PMLCA4, pmlca); + mtpmr(PMRN_PMLCB4, pmlcb); + break; + case 5: + mtpmr(PMRN_PMLCA5, pmlca); + mtpmr(PMRN_PMLCB5, pmlcb); + break; default: panic("Bad ctr number!\n"); } @@ -228,20 +256,6 @@ static void pmc_stop_ctrs(void) mtpmr(PMRN_PMGC0, pmgc0); } -static void dump_pmcs(void) -{ - printk("pmgc0: %x\n", mfpmr(PMRN_PMGC0)); - printk("pmc\t\tpmlca\t\tpmlcb\n"); - printk("%8x\t%8x\t%8x\n", mfpmr(PMRN_PMC0), - mfpmr(PMRN_PMLCA0), mfpmr(PMRN_PMLCB0)); - printk("%8x\t%8x\t%8x\n", mfpmr(PMRN_PMC1), - mfpmr(PMRN_PMLCA1), mfpmr(PMRN_PMLCB1)); - printk("%8x\t%8x\t%8x\n", mfpmr(PMRN_PMC2), - mfpmr(PMRN_PMLCA2), mfpmr(PMRN_PMLCB2)); - printk("%8x\t%8x\t%8x\n", mfpmr(PMRN_PMC3), - mfpmr(PMRN_PMLCA3), mfpmr(PMRN_PMLCB3)); -} - static int fsl_emb_cpu_setup(struct op_counter_config *ctr) { int i; @@ -335,9 +349,6 @@ static void fsl_emb_handle_interrupt(struct pt_regs *regs, int val; int i; - /* set the PMM bit (see comment below) */ - mtmsr(mfmsr() | MSR_PMM); - pc = regs->nip; is_kernel = is_kernel_addr(pc); @@ -354,9 +365,13 @@ static void fsl_emb_handle_interrupt(struct pt_regs *regs, } /* The freeze bit was set by the interrupt. */ - /* Clear the freeze bit, and reenable the interrupt. - * The counters won't actually start until the rfi clears - * the PMM bit */ + /* Clear the freeze bit, and reenable the interrupt. The + * counters won't actually start until the rfi clears the PMM + * bit. The PMM bit should not be set until after the interrupt + * is cleared to avoid it getting lost in some hypervisor + * environments. + */ + mtmsr(mfmsr() | MSR_PMM); pmc_start_ctrs(1); } diff --git a/arch/powerpc/oprofile/op_model_pa6t.c b/arch/powerpc/oprofile/op_model_pa6t.c index 42f778dff91..a114a7c22d4 100644 --- a/arch/powerpc/oprofile/op_model_pa6t.c +++ b/arch/powerpc/oprofile/op_model_pa6t.c @@ -22,7 +22,6 @@ */ #include <linux/oprofile.h> -#include <linux/init.h> #include <linux/smp.h> #include <linux/percpu.h> #include <asm/processor.h> diff --git a/arch/powerpc/oprofile/op_model_power4.c b/arch/powerpc/oprofile/op_model_power4.c index 3e3d91f536e..962fe7b3e3f 100644 --- a/arch/powerpc/oprofile/op_model_power4.c +++ b/arch/powerpc/oprofile/op_model_power4.c @@ -10,11 +10,9 @@ */ #include <linux/oprofile.h> -#include <linux/init.h> #include <linux/smp.h> #include <asm/firmware.h> #include <asm/ptrace.h> -#include <asm/system.h> #include <asm/processor.h> #include <asm/cputable.h> #include <asm/rtas.h> @@ -22,15 +20,78 @@ #include <asm/reg.h> #define dbg(args...) +#define OPROFILE_PM_PMCSEL_MSK 0xffULL +#define OPROFILE_PM_UNIT_SHIFT 60 +#define OPROFILE_PM_UNIT_MSK 0xfULL +#define OPROFILE_MAX_PMC_NUM 3 +#define OPROFILE_PMSEL_FIELD_WIDTH 8 +#define OPROFILE_UNIT_FIELD_WIDTH 4 +#define MMCRA_SIAR_VALID_MASK 0x10000000ULL static unsigned long reset_value[OP_MAX_COUNTER]; static int oprofile_running; +static int use_slot_nums; /* mmcr values are set in power4_reg_setup, used in power4_cpu_setup */ static u32 mmcr0_val; static u64 mmcr1_val; static u64 mmcra_val; +static u32 cntr_marked_events; + +static int power7_marked_instr_event(u64 mmcr1) +{ + u64 psel, unit; + int pmc, cntr_marked_events = 0; + + /* Given the MMCR1 value, look at the field for each counter to + * determine if it is a marked event. Code based on the function + * power7_marked_instr_event() in file arch/powerpc/perf/power7-pmu.c. + */ + for (pmc = 0; pmc < 4; pmc++) { + psel = mmcr1 & (OPROFILE_PM_PMCSEL_MSK + << (OPROFILE_MAX_PMC_NUM - pmc) + * OPROFILE_PMSEL_FIELD_WIDTH); + psel = (psel >> ((OPROFILE_MAX_PMC_NUM - pmc) + * OPROFILE_PMSEL_FIELD_WIDTH)) & ~1ULL; + unit = mmcr1 & (OPROFILE_PM_UNIT_MSK + << (OPROFILE_PM_UNIT_SHIFT + - (pmc * OPROFILE_PMSEL_FIELD_WIDTH ))); + unit = unit >> (OPROFILE_PM_UNIT_SHIFT + - (pmc * OPROFILE_PMSEL_FIELD_WIDTH)); + + switch (psel >> 4) { + case 2: + cntr_marked_events |= (pmc == 1 || pmc == 3) << pmc; + break; + case 3: + if (psel == 0x3c) { + cntr_marked_events |= (pmc == 0) << pmc; + break; + } + + if (psel == 0x3e) { + cntr_marked_events |= (pmc != 1) << pmc; + break; + } + + cntr_marked_events |= 1 << pmc; + break; + case 4: + case 5: + cntr_marked_events |= (unit == 0xd) << pmc; + break; + case 6: + if (psel == 0x64) + cntr_marked_events |= (pmc >= 2) << pmc; + break; + case 8: + cntr_marked_events |= (unit == 0xd) << pmc; + break; + } + } + return cntr_marked_events; +} static int power4_reg_setup(struct op_counter_config *ctr, struct op_system_config *sys, @@ -47,6 +108,23 @@ static int power4_reg_setup(struct op_counter_config *ctr, mmcr1_val = sys->mmcr1; mmcra_val = sys->mmcra; + /* Power 7+ and newer architectures: + * Determine which counter events in the group (the group of events is + * specified by the bit settings in the MMCR1 register) are marked + * events for use in the interrupt handler. Do the calculation once + * before OProfile starts. Information is used in the interrupt + * handler. Starting with Power 7+ we only record the sample for + * marked events if the SIAR valid bit is set. For non marked events + * the sample is always recorded. + */ + if (pvr_version_is(PVR_POWER7p)) + cntr_marked_events = power7_marked_instr_event(mmcr1_val); + else + cntr_marked_events = 0; /* For older processors, set the bit map + * to zero so the sample will always be + * be recorded. + */ + for (i = 0; i < cur_cpu_spec->num_pmcs; ++i) reset_value[i] = 0x80000000UL - ctr[i].count; @@ -61,6 +139,12 @@ static int power4_reg_setup(struct op_counter_config *ctr, else mmcr0_val |= MMCR0_PROBLEM_DISABLE; + if (pvr_version_is(PVR_POWER4) || pvr_version_is(PVR_POWER4p) || + pvr_version_is(PVR_970) || pvr_version_is(PVR_970FX) || + pvr_version_is(PVR_970MP) || pvr_version_is(PVR_970GX) || + pvr_version_is(PVR_POWER5) || pvr_version_is(PVR_POWER5p)) + use_slot_nums = 1; + return 0; } @@ -78,9 +162,9 @@ extern void ppc_enable_pmcs(void); */ static inline int mmcra_must_set_sample(void) { - if (__is_processor(PV_POWER4) || __is_processor(PV_POWER4p) || - __is_processor(PV_970) || __is_processor(PV_970FX) || - __is_processor(PV_970MP) || __is_processor(PV_970GX)) + if (pvr_version_is(PVR_POWER4) || pvr_version_is(PVR_POWER4p) || + pvr_version_is(PVR_970) || pvr_version_is(PVR_970FX) || + pvr_version_is(PVR_970MP) || pvr_version_is(PVR_970GX)) return 1; return 0; @@ -200,13 +284,13 @@ static unsigned long get_pc(struct pt_regs *regs) unsigned long mmcra; unsigned long slot; - /* Cant do much about it */ + /* Can't do much about it */ if (!cur_cpu_spec->oprofile_mmcra_sihv) return pc; mmcra = mfspr(SPRN_MMCRA); - if (mmcra & MMCRA_SAMPLE_ENABLE) { + if (use_slot_nums && (mmcra & MMCRA_SAMPLE_ENABLE)) { slot = ((mmcra & MMCRA_SLOT) >> MMCRA_SLOT_SHIFT); if (slot > 1) pc += 4 * (slot - 1); @@ -254,6 +338,28 @@ static int get_kernel(unsigned long pc, unsigned long mmcra) return is_kernel; } +static bool pmc_overflow(unsigned long val) +{ + if ((int)val < 0) + return true; + + /* + * Events on POWER7 can roll back if a speculative event doesn't + * eventually complete. Unfortunately in some rare cases they will + * raise a performance monitor exception. We need to catch this to + * ensure we reset the PMC. In all cases the PMC will be 256 or less + * cycles from overflow. + * + * We only do this if the first pass fails to find any overflowing + * PMCs because a user might set a period of less than 256 and we + * don't want to mistakenly reset them. + */ + if (pvr_version_is(PVR_POWER7) && ((0x80000000 - val) <= 256)) + return true; + + return false; +} + static void power4_handle_interrupt(struct pt_regs *regs, struct op_counter_config *ctr) { @@ -263,6 +369,7 @@ static void power4_handle_interrupt(struct pt_regs *regs, int i; unsigned int mmcr0; unsigned long mmcra; + bool siar_valid = false; mmcra = mfspr(SPRN_MMCRA); @@ -272,11 +379,29 @@ static void power4_handle_interrupt(struct pt_regs *regs, /* set the PMM bit (see comment below) */ mtmsrd(mfmsr() | MSR_PMM); + /* Check that the SIAR valid bit in MMCRA is set to 1. */ + if ((mmcra & MMCRA_SIAR_VALID_MASK) == MMCRA_SIAR_VALID_MASK) + siar_valid = true; + for (i = 0; i < cur_cpu_spec->num_pmcs; ++i) { val = classic_ctr_read(i); - if (val < 0) { + if (pmc_overflow(val)) { if (oprofile_running && ctr[i].enabled) { - oprofile_add_ext_sample(pc, regs, i, is_kernel); + /* Power 7+ and newer architectures: + * If the event is a marked event, then only + * save the sample if the SIAR valid bit is + * set. If the event is not marked, then + * always save the sample. + * Note, the Sample enable bit in the MMCRA + * register must be set to 1 if the group + * contains a marked event. + */ + if ((siar_valid && + (cntr_marked_events & (1 << i))) + || !(cntr_marked_events & (1 << i))) + oprofile_add_ext_sample(pc, regs, i, + is_kernel); + classic_ctr_write(i, reset_value[i]); } else { classic_ctr_write(i, 0); diff --git a/arch/powerpc/oprofile/op_model_rs64.c b/arch/powerpc/oprofile/op_model_rs64.c index a20afe45d93..7e5b8ed3a1b 100644 --- a/arch/powerpc/oprofile/op_model_rs64.c +++ b/arch/powerpc/oprofile/op_model_rs64.c @@ -8,10 +8,8 @@ */ #include <linux/oprofile.h> -#include <linux/init.h> #include <linux/smp.h> #include <asm/ptrace.h> -#include <asm/system.h> #include <asm/processor.h> #include <asm/cputable.h> #include <asm/oprofile_impl.h> |
