diff options
Diffstat (limited to 'arch/sh/kernel/cpu')
-rw-r--r-- | arch/sh/kernel/cpu/Makefile | 1 | ||||
-rw-r--r-- | arch/sh/kernel/cpu/init.c | 28 | ||||
-rw-r--r-- | arch/sh/kernel/cpu/sh2a/fpu.c | 27 | ||||
-rw-r--r-- | arch/sh/kernel/cpu/sh3/entry.S | 33 | ||||
-rw-r--r-- | arch/sh/kernel/cpu/sh4/Makefile | 8 | ||||
-rw-r--r-- | arch/sh/kernel/cpu/sh4/fpu.c | 28 | ||||
-rw-r--r-- | arch/sh/kernel/cpu/sh4/perf_event.c | 253 | ||||
-rw-r--r-- | arch/sh/kernel/cpu/sh4a/Makefile | 1 | ||||
-rw-r--r-- | arch/sh/kernel/cpu/sh4a/clock-sh7724.c | 2 | ||||
-rw-r--r-- | arch/sh/kernel/cpu/sh4a/perf_event.c | 269 | ||||
-rw-r--r-- | arch/sh/kernel/cpu/sh4a/setup-sh7724.c | 264 | ||||
-rw-r--r-- | arch/sh/kernel/cpu/sh4a/setup-shx3.c | 45 | ||||
-rw-r--r-- | arch/sh/kernel/cpu/sh4a/smp-shx3.c | 37 | ||||
-rw-r--r-- | arch/sh/kernel/cpu/sh5/entry.S | 2 | ||||
-rw-r--r-- | arch/sh/kernel/cpu/shmobile/cpuidle.c | 42 | ||||
-rw-r--r-- | arch/sh/kernel/cpu/shmobile/pm.c | 117 | ||||
-rw-r--r-- | arch/sh/kernel/cpu/shmobile/pm_runtime.c | 17 | ||||
-rw-r--r-- | arch/sh/kernel/cpu/shmobile/sleep.S | 344 | ||||
-rw-r--r-- | arch/sh/kernel/cpu/ubc.S | 59 |
19 files changed, 1202 insertions, 375 deletions
diff --git a/arch/sh/kernel/cpu/Makefile b/arch/sh/kernel/cpu/Makefile index 3d6b9312dc4..d97c803719e 100644 --- a/arch/sh/kernel/cpu/Makefile +++ b/arch/sh/kernel/cpu/Makefile @@ -15,7 +15,6 @@ obj-$(CONFIG_ARCH_SHMOBILE) += shmobile/ # Common interfaces. -obj-$(CONFIG_UBC_WAKEUP) += ubc.o obj-$(CONFIG_SH_ADC) += adc.o obj-$(CONFIG_SH_CLK_CPG) += clock-cpg.o diff --git a/arch/sh/kernel/cpu/init.c b/arch/sh/kernel/cpu/init.c index e932ebef473..89b4b76c0d7 100644 --- a/arch/sh/kernel/cpu/init.c +++ b/arch/sh/kernel/cpu/init.c @@ -75,16 +75,11 @@ static void __init expmask_init(void) /* * Future proofing. * - * Disable support for slottable sleep instruction - * and non-nop instructions in the rte delay slot. + * Disable support for slottable sleep instruction, non-nop + * instructions in the rte delay slot, and associative writes to + * the memory-mapped cache array. */ - expmask &= ~(EXPMASK_RTEDS | EXPMASK_BRDSSLP); - - /* - * Enable associative writes to the memory-mapped cache array - * until the cache flush ops have been rewritten. - */ - expmask |= EXPMASK_MMCAW; + expmask &= ~(EXPMASK_RTEDS | EXPMASK_BRDSSLP | EXPMASK_MMCAW); __raw_writel(expmask, EXPMASK); ctrl_barrier(); @@ -311,12 +306,12 @@ asmlinkage void __init sh_cpu_init(void) if (fpu_disabled) { printk("FPU Disabled\n"); current_cpu_data.flags &= ~CPU_HAS_FPU; - disable_fpu(); } /* FPU initialization */ + disable_fpu(); if ((current_cpu_data.flags & CPU_HAS_FPU)) { - clear_thread_flag(TIF_USEDFPU); + current_thread_info()->status &= ~TS_USEDFPU; clear_used_math(); } @@ -338,17 +333,6 @@ asmlinkage void __init sh_cpu_init(void) } #endif - /* - * Some brain-damaged loaders decided it would be a good idea to put - * the UBC to sleep. This causes some issues when it comes to things - * like PTRACE_SINGLESTEP or doing hardware watchpoints in GDB. So .. - * we wake it up and hope that all is well. - */ -#ifdef CONFIG_SUPERH32 - if (raw_smp_processor_id() == 0) - ubc_wakeup(); -#endif - speculative_execution_init(); expmask_init(); } diff --git a/arch/sh/kernel/cpu/sh2a/fpu.c b/arch/sh/kernel/cpu/sh2a/fpu.c index 6df2fb98eb3..d395ce5740e 100644 --- a/arch/sh/kernel/cpu/sh2a/fpu.c +++ b/arch/sh/kernel/cpu/sh2a/fpu.c @@ -25,14 +25,12 @@ /* * Save FPU registers onto task structure. - * Assume called with FPU enabled (SR.FD=0). */ void -save_fpu(struct task_struct *tsk, struct pt_regs *regs) +save_fpu(struct task_struct *tsk) { unsigned long dummy; - clear_tsk_thread_flag(tsk, TIF_USEDFPU); enable_fpu(); asm volatile("sts.l fpul, @-%0\n\t" "sts.l fpscr, @-%0\n\t" @@ -60,7 +58,6 @@ save_fpu(struct task_struct *tsk, struct pt_regs *regs) : "memory"); disable_fpu(); - release_fpu(regs); } static void @@ -598,31 +595,31 @@ BUILD_TRAP_HANDLER(fpu_error) struct task_struct *tsk = current; TRAP_HANDLER_DECL; - save_fpu(tsk, regs); + __unlazy_fpu(tsk, regs); if (ieee_fpe_handler(regs)) { tsk->thread.fpu.hard.fpscr &= ~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK); grab_fpu(regs); restore_fpu(tsk); - set_tsk_thread_flag(tsk, TIF_USEDFPU); + task_thread_info(tsk)->status |= TS_USEDFPU; return; } force_sig(SIGFPE, tsk); } -BUILD_TRAP_HANDLER(fpu_state_restore) +void fpu_state_restore(struct pt_regs *regs) { struct task_struct *tsk = current; - TRAP_HANDLER_DECL; grab_fpu(regs); - if (!user_mode(regs)) { + if (unlikely(!user_mode(regs))) { printk(KERN_ERR "BUG: FPU is used in kernel mode.\n"); + BUG(); return; } - if (used_math()) { + if (likely(used_math())) { /* Using the FPU again. */ restore_fpu(tsk); } else { @@ -630,5 +627,13 @@ BUILD_TRAP_HANDLER(fpu_state_restore) fpu_init(); set_used_math(); } - set_tsk_thread_flag(tsk, TIF_USEDFPU); + task_thread_info(tsk)->status |= TS_USEDFPU; + tsk->fpu_counter++; +} + +BUILD_TRAP_HANDLER(fpu_state_restore) +{ + TRAP_HANDLER_DECL; + + fpu_state_restore(regs); } diff --git a/arch/sh/kernel/cpu/sh3/entry.S b/arch/sh/kernel/cpu/sh3/entry.S index bb407ef0b91..3f7e2a22c7c 100644 --- a/arch/sh/kernel/cpu/sh3/entry.S +++ b/arch/sh/kernel/cpu/sh3/entry.S @@ -297,41 +297,8 @@ ENTRY(vbr_base) ! .balign 256,0,256 general_exception: -#ifndef CONFIG_CPU_SUBTYPE_SHX3 bra handle_exception sts pr, k3 ! save original pr value in k3 -#else - mov.l 1f, k4 - mov.l @k4, k4 - - ! Is EXPEVT larger than 0x800? - mov #0x8, k0 - shll8 k0 - cmp/hs k0, k4 - bf 0f - - ! then add 0x580 (k2 is 0xd80 or 0xda0) - mov #0x58, k0 - shll2 k0 - shll2 k0 - add k0, k4 -0: - ! Setup stack and save DSP context (k0 contains original r15 on return) - bsr prepare_stack - nop - - ! Save registers / Switch to bank 0 - mov k4, k2 ! keep vector in k2 - mov.l 1f, k4 ! SR bits to clear in k4 - bsr save_regs ! needs original pr value in k3 - nop - - bra handle_exception_special - nop - - .align 2 -1: .long EXPEVT -#endif ! prepare_stack() ! - roll back gRB diff --git a/arch/sh/kernel/cpu/sh4/Makefile b/arch/sh/kernel/cpu/sh4/Makefile index 203b18347b8..3a1dbc70983 100644 --- a/arch/sh/kernel/cpu/sh4/Makefile +++ b/arch/sh/kernel/cpu/sh4/Makefile @@ -9,6 +9,11 @@ obj-$(CONFIG_HIBERNATION) += $(addprefix ../sh3/, swsusp.o) obj-$(CONFIG_SH_FPU) += fpu.o softfloat.o obj-$(CONFIG_SH_STORE_QUEUES) += sq.o +# Perf events +perf-$(CONFIG_CPU_SUBTYPE_SH7750) := perf_event.o +perf-$(CONFIG_CPU_SUBTYPE_SH7750S) := perf_event.o +perf-$(CONFIG_CPU_SUBTYPE_SH7091) := perf_event.o + # CPU subtype setup obj-$(CONFIG_CPU_SUBTYPE_SH7750) += setup-sh7750.o obj-$(CONFIG_CPU_SUBTYPE_SH7750R) += setup-sh7750.o @@ -27,4 +32,5 @@ endif # Additional clocks by subtype clock-$(CONFIG_CPU_SUBTYPE_SH4_202) += clock-sh4-202.o -obj-y += $(clock-y) +obj-y += $(clock-y) +obj-$(CONFIG_PERF_EVENTS) += $(perf-y) diff --git a/arch/sh/kernel/cpu/sh4/fpu.c b/arch/sh/kernel/cpu/sh4/fpu.c index e3ea5411da6..e97857aec8a 100644 --- a/arch/sh/kernel/cpu/sh4/fpu.c +++ b/arch/sh/kernel/cpu/sh4/fpu.c @@ -41,13 +41,11 @@ static unsigned int fpu_exception_flags; /* * Save FPU registers onto task structure. - * Assume called with FPU enabled (SR.FD=0). */ -void save_fpu(struct task_struct *tsk, struct pt_regs *regs) +void save_fpu(struct task_struct *tsk) { unsigned long dummy; - clear_tsk_thread_flag(tsk, TIF_USEDFPU); enable_fpu(); asm volatile ("sts.l fpul, @-%0\n\t" "sts.l fpscr, @-%0\n\t" @@ -92,7 +90,6 @@ void save_fpu(struct task_struct *tsk, struct pt_regs *regs) :"memory"); disable_fpu(); - release_fpu(regs); } static void restore_fpu(struct task_struct *tsk) @@ -285,7 +282,6 @@ static int ieee_fpe_handler(struct pt_regs *regs) /* fcnvsd */ struct task_struct *tsk = current; - save_fpu(tsk, regs); if ((tsk->thread.fpu.hard.fpscr & FPSCR_CAUSE_ERROR)) /* FPU error */ denormal_to_double(&tsk->thread.fpu.hard, @@ -462,7 +458,7 @@ BUILD_TRAP_HANDLER(fpu_error) struct task_struct *tsk = current; TRAP_HANDLER_DECL; - save_fpu(tsk, regs); + __unlazy_fpu(tsk, regs); fpu_exception_flags = 0; if (ieee_fpe_handler(regs)) { tsk->thread.fpu.hard.fpscr &= @@ -473,7 +469,7 @@ BUILD_TRAP_HANDLER(fpu_error) tsk->thread.fpu.hard.fpscr |= (fpu_exception_flags >> 10); grab_fpu(regs); restore_fpu(tsk); - set_tsk_thread_flag(tsk, TIF_USEDFPU); + task_thread_info(tsk)->status |= TS_USEDFPU; if ((((tsk->thread.fpu.hard.fpscr & FPSCR_ENABLE_MASK) >> 7) & (fpu_exception_flags >> 2)) == 0) { return; @@ -483,18 +479,18 @@ BUILD_TRAP_HANDLER(fpu_error) force_sig(SIGFPE, tsk); } -BUILD_TRAP_HANDLER(fpu_state_restore) +void fpu_state_restore(struct pt_regs *regs) { struct task_struct *tsk = current; - TRAP_HANDLER_DECL; grab_fpu(regs); - if (!user_mode(regs)) { + if (unlikely(!user_mode(regs))) { printk(KERN_ERR "BUG: FPU is used in kernel mode.\n"); + BUG(); return; } - if (used_math()) { + if (likely(used_math())) { /* Using the FPU again. */ restore_fpu(tsk); } else { @@ -502,5 +498,13 @@ BUILD_TRAP_HANDLER(fpu_state_restore) fpu_init(); set_used_math(); } - set_tsk_thread_flag(tsk, TIF_USEDFPU); + task_thread_info(tsk)->status |= TS_USEDFPU; + tsk->fpu_counter++; +} + +BUILD_TRAP_HANDLER(fpu_state_restore) +{ + TRAP_HANDLER_DECL; + + fpu_state_restore(regs); } diff --git a/arch/sh/kernel/cpu/sh4/perf_event.c b/arch/sh/kernel/cpu/sh4/perf_event.c new file mode 100644 index 00000000000..7f9ecc9c2d0 --- /dev/null +++ b/arch/sh/kernel/cpu/sh4/perf_event.c @@ -0,0 +1,253 @@ +/* + * Performance events support for SH7750-style performance counters + * + * Copyright (C) 2009 Paul Mundt + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/io.h> +#include <linux/irq.h> +#include <linux/perf_event.h> +#include <asm/processor.h> + +#define PM_CR_BASE 0xff000084 /* 16-bit */ +#define PM_CTR_BASE 0xff100004 /* 32-bit */ + +#define PMCR(n) (PM_CR_BASE + ((n) * 0x04)) +#define PMCTRH(n) (PM_CTR_BASE + 0x00 + ((n) * 0x08)) +#define PMCTRL(n) (PM_CTR_BASE + 0x04 + ((n) * 0x08)) + +#define PMCR_PMM_MASK 0x0000003f + +#define PMCR_CLKF 0x00000100 +#define PMCR_PMCLR 0x00002000 +#define PMCR_PMST 0x00004000 +#define PMCR_PMEN 0x00008000 + +static struct sh_pmu sh7750_pmu; + +/* + * There are a number of events supported by each counter (33 in total). + * Since we have 2 counters, each counter will take the event code as it + * corresponds to the PMCR PMM setting. Each counter can be configured + * independently. + * + * Event Code Description + * ---------- ----------- + * + * 0x01 Operand read access + * 0x02 Operand write access + * 0x03 UTLB miss + * 0x04 Operand cache read miss + * 0x05 Operand cache write miss + * 0x06 Instruction fetch (w/ cache) + * 0x07 Instruction TLB miss + * 0x08 Instruction cache miss + * 0x09 All operand accesses + * 0x0a All instruction accesses + * 0x0b OC RAM operand access + * 0x0d On-chip I/O space access + * 0x0e Operand access (r/w) + * 0x0f Operand cache miss (r/w) + * 0x10 Branch instruction + * 0x11 Branch taken + * 0x12 BSR/BSRF/JSR + * 0x13 Instruction execution + * 0x14 Instruction execution in parallel + * 0x15 FPU Instruction execution + * 0x16 Interrupt + * 0x17 NMI + * 0x18 trapa instruction execution + * 0x19 UBCA match + * 0x1a UBCB match + * 0x21 Instruction cache fill + * 0x22 Operand cache fill + * 0x23 Elapsed time + * 0x24 Pipeline freeze by I-cache miss + * 0x25 Pipeline freeze by D-cache miss + * 0x27 Pipeline freeze by branch instruction + * 0x28 Pipeline freeze by CPU register + * 0x29 Pipeline freeze by FPU + */ + +static const int sh7750_general_events[] = { + [PERF_COUNT_HW_CPU_CYCLES] = 0x0023, + [PERF_COUNT_HW_INSTRUCTIONS] = 0x000a, + [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0006, /* I-cache */ + [PERF_COUNT_HW_CACHE_MISSES] = 0x0008, /* I-cache */ + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x0010, + [PERF_COUNT_HW_BRANCH_MISSES] = -1, + [PERF_COUNT_HW_BUS_CYCLES] = -1, +}; + +#define C(x) PERF_COUNT_HW_CACHE_##x + +static const int sh7750_cache_events + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(L1D) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0001, + [ C(RESULT_MISS) ] = 0x0004, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x0002, + [ C(RESULT_MISS) ] = 0x0005, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + + [ C(L1I) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0006, + [ C(RESULT_MISS) ] = 0x0008, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + + [ C(LL) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + + [ C(DTLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0x0003, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + + [ C(ITLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0x0007, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + + [ C(BPU) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, +}; + +static int sh7750_event_map(int event) +{ + return sh7750_general_events[event]; +} + +static u64 sh7750_pmu_read(int idx) +{ + return (u64)((u64)(__raw_readl(PMCTRH(idx)) & 0xffff) << 32) | + __raw_readl(PMCTRL(idx)); +} + +static void sh7750_pmu_disable(struct hw_perf_event *hwc, int idx) +{ + unsigned int tmp; + + tmp = __raw_readw(PMCR(idx)); + tmp &= ~(PMCR_PMM_MASK | PMCR_PMEN); + __raw_writew(tmp, PMCR(idx)); +} + +static void sh7750_pmu_enable(struct hw_perf_event *hwc, int idx) +{ + __raw_writew(__raw_readw(PMCR(idx)) | PMCR_PMCLR, PMCR(idx)); + __raw_writew(hwc->config | PMCR_PMEN | PMCR_PMST, PMCR(idx)); +} + +static void sh7750_pmu_disable_all(void) +{ + int i; + + for (i = 0; i < sh7750_pmu.num_events; i++) + __raw_writew(__raw_readw(PMCR(i)) & ~PMCR_PMEN, PMCR(i)); +} + +static void sh7750_pmu_enable_all(void) +{ + int i; + + for (i = 0; i < sh7750_pmu.num_events; i++) + __raw_writew(__raw_readw(PMCR(i)) | PMCR_PMEN, PMCR(i)); +} + +static struct sh_pmu sh7750_pmu = { + .name = "SH7750", + .num_events = 2, + .event_map = sh7750_event_map, + .max_events = ARRAY_SIZE(sh7750_general_events), + .raw_event_mask = PMCR_PMM_MASK, + .cache_events = &sh7750_cache_events, + .read = sh7750_pmu_read, + .disable = sh7750_pmu_disable, + .enable = sh7750_pmu_enable, + .disable_all = sh7750_pmu_disable_all, + .enable_all = sh7750_pmu_enable_all, +}; + +static int __init sh7750_pmu_init(void) +{ + /* + * Make sure this CPU actually has perf counters. + */ + if (!(boot_cpu_data.flags & CPU_HAS_PERF_COUNTER)) { + pr_notice("HW perf events unsupported, software events only.\n"); + return -ENODEV; + } + + return register_sh_pmu(&sh7750_pmu); +} +arch_initcall(sh7750_pmu_init); diff --git a/arch/sh/kernel/cpu/sh4a/Makefile b/arch/sh/kernel/cpu/sh4a/Makefile index 490d5dc9e37..33bab477d2e 100644 --- a/arch/sh/kernel/cpu/sh4a/Makefile +++ b/arch/sh/kernel/cpu/sh4a/Makefile @@ -44,3 +44,4 @@ pinmux-$(CONFIG_CPU_SUBTYPE_SH7786) := pinmux-sh7786.o obj-y += $(clock-y) obj-$(CONFIG_SMP) += $(smp-y) obj-$(CONFIG_GENERIC_GPIO) += $(pinmux-y) +obj-$(CONFIG_PERF_EVENTS) += perf_event.o diff --git a/arch/sh/kernel/cpu/sh4a/clock-sh7724.c b/arch/sh/kernel/cpu/sh4a/clock-sh7724.c index dfe9192be63..9db743802f0 100644 --- a/arch/sh/kernel/cpu/sh4a/clock-sh7724.c +++ b/arch/sh/kernel/cpu/sh4a/clock-sh7724.c @@ -152,7 +152,7 @@ struct clk div6_clks[] = { SH_CLK_DIV6("fsia_clk", &div3_clk, FCLKACR, 0), SH_CLK_DIV6("fsib_clk", &div3_clk, FCLKBCR, 0), SH_CLK_DIV6("irda_clk", &div3_clk, IRDACLKCR, 0), - SH_CLK_DIV6("spu_clk", &div3_clk, SPUCLKCR, 0), + SH_CLK_DIV6("spu_clk", &div3_clk, SPUCLKCR, CLK_ENABLE_ON_INIT), }; #define R_CLK (&r_clk) diff --git a/arch/sh/kernel/cpu/sh4a/perf_event.c b/arch/sh/kernel/cpu/sh4a/perf_event.c new file mode 100644 index 00000000000..eddc21973fa --- /dev/null +++ b/arch/sh/kernel/cpu/sh4a/perf_event.c @@ -0,0 +1,269 @@ +/* + * Performance events support for SH-4A performance counters + * + * Copyright (C) 2009 Paul Mundt + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/io.h> +#include <linux/irq.h> +#include <linux/perf_event.h> +#include <asm/processor.h> + +#define PPC_CCBR(idx) (0xff200800 + (sizeof(u32) * idx)) +#define PPC_PMCTR(idx) (0xfc100000 + (sizeof(u32) * idx)) + +#define CCBR_CIT_MASK (0x7ff << 6) +#define CCBR_DUC (1 << 3) +#define CCBR_CMDS (1 << 1) +#define CCBR_PPCE (1 << 0) + +#define PPC_PMCAT 0xfc100080 + +#define PMCAT_OVF3 (1 << 27) +#define PMCAT_CNN3 (1 << 26) +#define PMCAT_CLR3 (1 << 25) +#define PMCAT_OVF2 (1 << 19) +#define PMCAT_CLR2 (1 << 17) +#define PMCAT_OVF1 (1 << 11) +#define PMCAT_CNN1 (1 << 10) +#define PMCAT_CLR1 (1 << 9) +#define PMCAT_OVF0 (1 << 3) +#define PMCAT_CLR0 (1 << 1) + +static struct sh_pmu sh4a_pmu; + +/* + * Supported raw event codes: + * + * Event Code Description + * ---------- ----------- + * + * 0x0000 number of elapsed cycles + * 0x0200 number of elapsed cycles in privileged mode + * 0x0280 number of elapsed cycles while SR.BL is asserted + * 0x0202 instruction execution + * 0x0203 instruction execution in parallel + * 0x0204 number of unconditional branches + * 0x0208 number of exceptions + * 0x0209 number of interrupts + * 0x0220 UTLB miss caused by instruction fetch + * 0x0222 UTLB miss caused by operand access + * 0x02a0 number of ITLB misses + * 0x0028 number of accesses to instruction memories + * 0x0029 number of accesses to instruction cache + * 0x002a instruction cache miss + * 0x022e number of access to instruction X/Y memory + * 0x0030 number of reads to operand memories + * 0x0038 number of writes to operand memories + * 0x0031 number of operand cache read accesses + * 0x0039 number of operand cache write accesses + * 0x0032 operand cache read miss + * 0x003a operand cache write miss + * 0x0236 number of reads to operand X/Y memory + * 0x023e number of writes to operand X/Y memory + * 0x0237 number of reads to operand U memory + * 0x023f number of writes to operand U memory + * 0x0337 number of U memory read buffer misses + * 0x02b4 number of wait cycles due to operand read access + * 0x02bc number of wait cycles due to operand write access + * 0x0033 number of wait cycles due to operand cache read miss + * 0x003b number of wait cycles due to operand cache write miss + */ + +/* + * Special reserved bits used by hardware emulators, read values will + * vary, but writes must always be 0. + */ +#define PMCAT_EMU_CLR_MASK ((1 << 24) | (1 << 16) | (1 << 8) | (1 << 0)) + +static const int sh4a_general_events[] = { + [PERF_COUNT_HW_CPU_CYCLES] = 0x0000, + [PERF_COUNT_HW_INSTRUCTIONS] = 0x0202, + [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0029, /* I-cache */ + [PERF_COUNT_HW_CACHE_MISSES] = 0x002a, /* I-cache */ + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x0204, + [PERF_COUNT_HW_BRANCH_MISSES] = -1, + [PERF_COUNT_HW_BUS_CYCLES] = -1, +}; + +#define C(x) PERF_COUNT_HW_CACHE_##x + +static const int sh4a_cache_events + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(L1D) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0031, + [ C(RESULT_MISS) ] = 0x0032, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x0039, + [ C(RESULT_MISS) ] = 0x003a, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + + [ C(L1I) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0029, + [ C(RESULT_MISS) ] = 0x002a, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + + [ C(LL) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0030, + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x0038, + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + + [ C(DTLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0222, + [ C(RESULT_MISS) ] = 0x0220, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + + [ C(ITLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0x02a0, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + + [ C(BPU) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, +}; + +static int sh4a_event_map(int event) +{ + return sh4a_general_events[event]; +} + +static u64 sh4a_pmu_read(int idx) +{ + return __raw_readl(PPC_PMCTR(idx)); +} + +static void sh4a_pmu_disable(struct hw_perf_event *hwc, int idx) +{ + unsigned int tmp; + + tmp = __raw_readl(PPC_CCBR(idx)); + tmp &= ~(CCBR_CIT_MASK | CCBR_DUC); + __raw_writel(tmp, PPC_CCBR(idx)); +} + +static void sh4a_pmu_enable(struct hw_perf_event *hwc, int idx) +{ + unsigned int tmp; + + tmp = __raw_readl(PPC_PMCAT); + tmp &= ~PMCAT_EMU_CLR_MASK; + tmp |= idx ? PMCAT_CLR1 : PMCAT_CLR0; + __raw_writel(tmp, PPC_PMCAT); + + tmp = __raw_readl(PPC_CCBR(idx)); + tmp |= (hwc->config << 6) | CCBR_CMDS | CCBR_PPCE; + __raw_writel(tmp, PPC_CCBR(idx)); + + __raw_writel(__raw_readl(PPC_CCBR(idx)) | CCBR_DUC, PPC_CCBR(idx)); +} + +static void sh4a_pmu_disable_all(void) +{ + int i; + + for (i = 0; i < sh4a_pmu.num_events; i++) + __raw_writel(__raw_readl(PPC_CCBR(i)) & ~CCBR_DUC, PPC_CCBR(i)); +} + +static void sh4a_pmu_enable_all(void) +{ + int i; + + for (i = 0; i < sh4a_pmu.num_events; i++) + __raw_writel(__raw_readl(PPC_CCBR(i)) | CCBR_DUC, PPC_CCBR(i)); +} + +static struct sh_pmu sh4a_pmu = { + .name = "SH-4A", + .num_events = 2, + .event_map = sh4a_event_map, + .max_events = ARRAY_SIZE(sh4a_general_events), + .raw_event_mask = 0x3ff, + .cache_events = &sh4a_cache_events, + .read = sh4a_pmu_read, + .disable = sh4a_pmu_disable, + .enable = sh4a_pmu_enable, + .disable_all = sh4a_pmu_disable_all, + .enable_all = sh4a_pmu_enable_all, +}; + +static int __init sh4a_pmu_init(void) +{ + /* + * Make sure this CPU actually has perf counters. + */ + if (!(boot_cpu_data.flags & CPU_HAS_PERF_COUNTER)) { + pr_notice("HW perf events unsupported, software events only.\n"); + return -ENODEV; + } + + return register_sh_pmu(&sh4a_pmu); +} +arch_initcall(sh4a_pmu_init); diff --git a/arch/sh/kernel/cpu/sh4a/setup-sh7724.c b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c index f3851fd757e..845e89c936e 100644 --- a/arch/sh/kernel/cpu/sh4a/setup-sh7724.c +++ b/arch/sh/kernel/cpu/sh4a/setup-sh7724.c @@ -20,6 +20,8 @@ #include <linux/uio_driver.h> #include <linux/sh_timer.h> #include <linux/io.h> +#include <linux/notifier.h> +#include <asm/suspend.h> #include <asm/clock.h> #include <asm/mmzone.h> #include <cpu/sh7724.h> @@ -202,7 +204,7 @@ static struct resource veu0_resources[] = { [0] = { .name = "VEU3F0", .start = 0xfe920000, - .end = 0xfe9200cb - 1, + .end = 0xfe9200cb, .flags = IORESOURCE_MEM, }, [1] = { @@ -234,7 +236,7 @@ static struct resource veu1_resources[] = { [0] = { .name = "VEU3F1", .start = 0xfe924000, - .end = 0xfe9240cb - 1, + .end = 0xfe9240cb, .flags = IORESOURCE_MEM, }, [1] = { @@ -523,6 +525,70 @@ static struct platform_device jpu_device = { }, }; +/* SPU2DSP0 */ +static struct uio_info spu0_platform_data = { + .name = "SPU2DSP0", + .version = "0", + .irq = 86, +}; + +static struct resource spu0_resources[] = { + [0] = { + .name = "SPU2DSP0", + .start = 0xFE200000, + .end = 0xFE2FFFFF, + .flags = IORESOURCE_MEM, + }, + [1] = { + /* place holder for contiguous memory */ + }, +}; + +static struct platform_device spu0_device = { + .name = "uio_pdrv_genirq", + .id = 4, + .dev = { + .platform_data = &spu0_platform_data, + }, + .resource = spu0_resources, + .num_resources = ARRAY_SIZE(spu0_resources), + .archdata = { + .hwblk_id = HWBLK_SPU, + }, +}; + +/* SPU2DSP1 */ +static struct uio_info spu1_platform_data = { + .name = "SPU2DSP1", + .version = "0", + .irq = 87, +}; + +static struct resource spu1_resources[] = { + [0] = { + .name = "SPU2DSP1", + .start = 0xFE300000, + .end = 0xFE3FFFFF, + .flags = IORESOURCE_MEM, + }, + [1] = { + /* place holder for contiguous memory */ + }, +}; + +static struct platform_device spu1_device = { + .name = "uio_pdrv_genirq", + .id = 5, + .dev = { + .platform_data = &spu1_platform_data, + }, + .resource = spu1_resources, + .num_resources = ARRAY_SIZE(spu1_resources), + .archdata = { + .hwblk_id = HWBLK_SPU, + }, +}; + static struct platform_device *sh7724_devices[] __initdata = { &cmt_device, &tmu0_device, @@ -539,6 +605,8 @@ static struct platform_device *sh7724_devices[] __initdata = { &veu0_device, &veu1_device, &jpu_device, + &spu0_device, + &spu1_device, }; static int __init sh7724_devices_setup(void) @@ -547,6 +615,8 @@ static int __init sh7724_devices_setup(void) platform_resource_setup_memory(&veu0_device, "veu0", 2 << 20); platform_resource_setup_memory(&veu1_device, "veu1", 2 << 20); platform_resource_setup_memory(&jpu_device, "jpu", 2 << 20); + platform_resource_setup_memory(&spu0_device, "spu0", 2 << 20); + platform_resource_setup_memory(&spu1_device, "spu1", 2 << 20); return platform_add_devices(sh7724_devices, ARRAY_SIZE(sh7724_devices)); @@ -827,3 +897,193 @@ void __init plat_irq_setup(void) { register_intc_controller(&intc_desc); } + +static struct { + /* BSC */ + unsigned long mmselr; + unsigned long cs0bcr; + unsigned long cs4bcr; + unsigned long cs5abcr; + unsigned long cs5bbcr; + unsigned long cs6abcr; + unsigned long cs6bbcr; + unsigned long cs4wcr; + unsigned long cs5awcr; + unsigned long cs5bwcr; + unsigned long cs6awcr; + unsigned long cs6bwcr; + /* INTC */ + unsigned short ipra; + unsigned short iprb; + unsigned short iprc; + unsigned short iprd; + unsigned short ipre; + unsigned short iprf; + unsigned short iprg; + unsigned short iprh; + unsigned short ipri; + unsigned short iprj; + unsigned short iprk; + unsigned short iprl; + unsigned char imr0; + unsigned char imr1; + unsigned char imr2; + unsigned char imr3; + unsigned char imr4; + unsigned char imr5; + unsigned char imr6; + unsigned char imr7; + unsigned char imr8; + unsigned char imr9; + unsigned char imr10; + unsigned char imr11; + unsigned char imr12; + /* RWDT */ + unsigned short rwtcnt; + unsigned short rwtcsr; + /* CPG */ + unsigned long irdaclk; |