diff options
author | Russell King <rmk+kernel@arm.linux.org.uk> | 2011-01-05 18:09:03 +0000 |
---|---|---|
committer | Russell King <rmk+kernel@arm.linux.org.uk> | 2011-01-05 18:09:03 +0000 |
commit | 58daf18cdcab550262a5f4681e1f1e073e21965a (patch) | |
tree | 2096324b947761a567dd451f33664f17ee1de2cd /arch/arm/kernel | |
parent | aa312be1987d43216e72ffce42bccf6bf81f62ed (diff) | |
parent | 0af85dda39d9b673aca8c0ebae004ea70f3efc93 (diff) |
Merge branch 'clksrc' into devel
Conflicts:
arch/arm/mach-vexpress/v2m.c
arch/arm/plat-omap/counter_32k.c
arch/arm/plat-versatile/Makefile
Diffstat (limited to 'arch/arm/kernel')
-rw-r--r-- | arch/arm/kernel/Makefile | 4 | ||||
-rw-r--r-- | arch/arm/kernel/entry-common.S | 202 | ||||
-rw-r--r-- | arch/arm/kernel/ftrace.c | 103 | ||||
-rw-r--r-- | arch/arm/kernel/irq.c | 4 | ||||
-rw-r--r-- | arch/arm/kernel/perf_event.c | 2448 | ||||
-rw-r--r-- | arch/arm/kernel/perf_event_v6.c | 672 | ||||
-rw-r--r-- | arch/arm/kernel/perf_event_v7.c | 906 | ||||
-rw-r--r-- | arch/arm/kernel/perf_event_xscale.c | 807 | ||||
-rw-r--r-- | arch/arm/kernel/sched_clock.c | 69 | ||||
-rw-r--r-- | arch/arm/kernel/smp.c | 5 | ||||
-rw-r--r-- | arch/arm/kernel/vmlinux.lds.S | 1 |
11 files changed, 2733 insertions, 2488 deletions
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 5b9b268f4fb..fd3ec49bfba 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -5,7 +5,7 @@ CPPFLAGS_vmlinux.lds := -DTEXT_OFFSET=$(TEXT_OFFSET) AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET) -ifdef CONFIG_DYNAMIC_FTRACE +ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_ftrace.o = -pg endif @@ -29,10 +29,12 @@ obj-$(CONFIG_MODULES) += armksyms.o module.o obj-$(CONFIG_ARTHUR) += arthur.o obj-$(CONFIG_ISA_DMA) += dma-isa.o obj-$(CONFIG_PCI) += bios32.o isa.o +obj-$(CONFIG_HAVE_SCHED_CLOCK) += sched_clock.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_HAVE_ARM_SCU) += smp_scu.o obj-$(CONFIG_HAVE_ARM_TWD) += smp_twd.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o +obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o obj-$(CONFIG_KPROBES) += kprobes.o kprobes-decode.o obj-$(CONFIG_ATAGS_PROC) += atags.o diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 80bf8cd88d7..1e7b04a40a3 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -147,98 +147,170 @@ ENDPROC(ret_from_fork) #endif #endif -#ifdef CONFIG_DYNAMIC_FTRACE -ENTRY(__gnu_mcount_nc) - mov ip, lr - ldmia sp!, {lr} - mov pc, ip -ENDPROC(__gnu_mcount_nc) +.macro __mcount suffix + mcount_enter + ldr r0, =ftrace_trace_function + ldr r2, [r0] + adr r0, .Lftrace_stub + cmp r0, r2 + bne 1f + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + ldr r1, =ftrace_graph_return + ldr r2, [r1] + cmp r0, r2 + bne ftrace_graph_caller\suffix + + ldr r1, =ftrace_graph_entry + ldr r2, [r1] + ldr r0, =ftrace_graph_entry_stub + cmp r0, r2 + bne ftrace_graph_caller\suffix +#endif -ENTRY(ftrace_caller) - stmdb sp!, {r0-r3, lr} - mov r0, lr + mcount_exit + +1: mcount_get_lr r1 @ lr of instrumented func + mov r0, lr @ instrumented function + sub r0, r0, #MCOUNT_INSN_SIZE + adr lr, BSYM(2f) + mov pc, r2 +2: mcount_exit +.endm + +.macro __ftrace_caller suffix + mcount_enter + + mcount_get_lr r1 @ lr of instrumented func + mov r0, lr @ instrumented function sub r0, r0, #MCOUNT_INSN_SIZE - ldr r1, [sp, #20] - .global ftrace_call -ftrace_call: + .globl ftrace_call\suffix +ftrace_call\suffix: bl ftrace_stub - ldmia sp!, {r0-r3, ip, lr} - mov pc, ip -ENDPROC(ftrace_caller) + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + .globl ftrace_graph_call\suffix +ftrace_graph_call\suffix: + mov r0, r0 +#endif + + mcount_exit +.endm + +.macro __ftrace_graph_caller + sub r0, fp, #4 @ &lr of instrumented routine (&parent) +#ifdef CONFIG_DYNAMIC_FTRACE + @ called from __ftrace_caller, saved in mcount_enter + ldr r1, [sp, #16] @ instrumented routine (func) +#else + @ called from __mcount, untouched in lr + mov r1, lr @ instrumented routine (func) +#endif + sub r1, r1, #MCOUNT_INSN_SIZE + mov r2, fp @ frame pointer + bl prepare_ftrace_return + mcount_exit +.endm #ifdef CONFIG_OLD_MCOUNT +/* + * mcount + */ + +.macro mcount_enter + stmdb sp!, {r0-r3, lr} +.endm + +.macro mcount_get_lr reg + ldr \reg, [fp, #-4] +.endm + +.macro mcount_exit + ldr lr, [fp, #-4] + ldmia sp!, {r0-r3, pc} +.endm + ENTRY(mcount) +#ifdef CONFIG_DYNAMIC_FTRACE stmdb sp!, {lr} ldr lr, [fp, #-4] ldmia sp!, {pc} +#else + __mcount _old +#endif ENDPROC(mcount) +#ifdef CONFIG_DYNAMIC_FTRACE ENTRY(ftrace_caller_old) - stmdb sp!, {r0-r3, lr} - ldr r1, [fp, #-4] - mov r0, lr - sub r0, r0, #MCOUNT_INSN_SIZE - - .globl ftrace_call_old -ftrace_call_old: - bl ftrace_stub - ldr lr, [fp, #-4] @ restore lr - ldmia sp!, {r0-r3, pc} + __ftrace_caller _old ENDPROC(ftrace_caller_old) #endif -#else +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +ENTRY(ftrace_graph_caller_old) + __ftrace_graph_caller +ENDPROC(ftrace_graph_caller_old) +#endif -ENTRY(__gnu_mcount_nc) +.purgem mcount_enter +.purgem mcount_get_lr +.purgem mcount_exit +#endif + +/* + * __gnu_mcount_nc + */ + +.macro mcount_enter stmdb sp!, {r0-r3, lr} - ldr r0, =ftrace_trace_function - ldr r2, [r0] - adr r0, .Lftrace_stub - cmp r0, r2 - bne gnu_trace +.endm + +.macro mcount_get_lr reg + ldr \reg, [sp, #20] +.endm + +.macro mcount_exit ldmia sp!, {r0-r3, ip, lr} mov pc, ip +.endm -gnu_trace: - ldr r1, [sp, #20] @ lr of instrumented routine - mov r0, lr - sub r0, r0, #MCOUNT_INSN_SIZE - adr lr, BSYM(1f) - mov pc, r2 -1: - ldmia sp!, {r0-r3, ip, lr} +ENTRY(__gnu_mcount_nc) +#ifdef CONFIG_DYNAMIC_FTRACE + mov ip, lr + ldmia sp!, {lr} mov pc, ip +#else + __mcount +#endif ENDPROC(__gnu_mcount_nc) -#ifdef CONFIG_OLD_MCOUNT -/* - * This is under an ifdef in order to force link-time errors for people trying - * to build with !FRAME_POINTER with a GCC which doesn't use the new-style - * mcount. - */ -ENTRY(mcount) - stmdb sp!, {r0-r3, lr} - ldr r0, =ftrace_trace_function - ldr r2, [r0] - adr r0, ftrace_stub - cmp r0, r2 - bne trace - ldr lr, [fp, #-4] @ restore lr - ldmia sp!, {r0-r3, pc} +#ifdef CONFIG_DYNAMIC_FTRACE +ENTRY(ftrace_caller) + __ftrace_caller +ENDPROC(ftrace_caller) +#endif -trace: - ldr r1, [fp, #-4] @ lr of instrumented routine - mov r0, lr - sub r0, r0, #MCOUNT_INSN_SIZE - mov lr, pc - mov pc, r2 - ldr lr, [fp, #-4] @ restore lr - ldmia sp!, {r0-r3, pc} -ENDPROC(mcount) +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +ENTRY(ftrace_graph_caller) + __ftrace_graph_caller +ENDPROC(ftrace_graph_caller) #endif -#endif /* CONFIG_DYNAMIC_FTRACE */ +.purgem mcount_enter +.purgem mcount_get_lr +.purgem mcount_exit + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + .globl return_to_handler +return_to_handler: + stmdb sp!, {r0-r3} + mov r0, fp @ frame pointer + bl ftrace_return_to_handler + mov lr, r0 @ r0 has real ret addr + ldmia sp!, {r0-r3} + mov pc, lr +#endif ENTRY(ftrace_stub) .Lftrace_stub: diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c index 971ac8c36ea..c0062ad1e84 100644 --- a/arch/arm/kernel/ftrace.c +++ b/arch/arm/kernel/ftrace.c @@ -24,6 +24,7 @@ #define NOP 0xe8bd4000 /* pop {lr} */ #endif +#ifdef CONFIG_DYNAMIC_FTRACE #ifdef CONFIG_OLD_MCOUNT #define OLD_MCOUNT_ADDR ((unsigned long) mcount) #define OLD_FTRACE_ADDR ((unsigned long) ftrace_caller_old) @@ -59,9 +60,9 @@ static unsigned long adjust_address(struct dyn_ftrace *rec, unsigned long addr) } #endif -/* construct a branch (BL) instruction to addr */ #ifdef CONFIG_THUMB2_KERNEL -static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr) +static unsigned long ftrace_gen_branch(unsigned long pc, unsigned long addr, + bool link) { unsigned long s, j1, j2, i1, i2, imm10, imm11; unsigned long first, second; @@ -83,15 +84,22 @@ static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr) j2 = (!i2) ^ s; first = 0xf000 | (s << 10) | imm10; - second = 0xd000 | (j1 << 13) | (j2 << 11) | imm11; + second = 0x9000 | (j1 << 13) | (j2 << 11) | imm11; + if (link) + second |= 1 << 14; return (second << 16) | first; } #else -static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr) +static unsigned long ftrace_gen_branch(unsigned long pc, unsigned long addr, + bool link) { + unsigned long opcode = 0xea000000; long offset; + if (link) + opcode |= 1 << 24; + offset = (long)addr - (long)(pc + 8); if (unlikely(offset < -33554432 || offset > 33554428)) { /* Can't generate branches that far (from ARM ARM). Ftrace @@ -103,10 +111,15 @@ static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr) offset = (offset >> 2) & 0x00ffffff; - return 0xeb000000 | offset; + return opcode | offset; } #endif +static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr) +{ + return ftrace_gen_branch(pc, addr, true); +} + static int ftrace_modify_code(unsigned long pc, unsigned long old, unsigned long new) { @@ -193,3 +206,83 @@ int __init ftrace_dyn_arch_init(void *data) return 0; } +#endif /* CONFIG_DYNAMIC_FTRACE */ + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, + unsigned long frame_pointer) +{ + unsigned long return_hooker = (unsigned long) &return_to_handler; + struct ftrace_graph_ent trace; + unsigned long old; + int err; + + if (unlikely(atomic_read(¤t->tracing_graph_pause))) + return; + + old = *parent; + *parent = return_hooker; + + err = ftrace_push_return_trace(old, self_addr, &trace.depth, + frame_pointer); + if (err == -EBUSY) { + *parent = old; + return; + } + + trace.func = self_addr; + + /* Only trace if the calling function expects to */ + if (!ftrace_graph_entry(&trace)) { + current->curr_ret_stack--; + *parent = old; + } +} + +#ifdef CONFIG_DYNAMIC_FTRACE +extern unsigned long ftrace_graph_call; +extern unsigned long ftrace_graph_call_old; +extern void ftrace_graph_caller_old(void); + +static int __ftrace_modify_caller(unsigned long *callsite, + void (*func) (void), bool enable) +{ + unsigned long caller_fn = (unsigned long) func; + unsigned long pc = (unsigned long) callsite; + unsigned long branch = ftrace_gen_branch(pc, caller_fn, false); + unsigned long nop = 0xe1a00000; /* mov r0, r0 */ + unsigned long old = enable ? nop : branch; + unsigned long new = enable ? branch : nop; + + return ftrace_modify_code(pc, old, new); +} + +static int ftrace_modify_graph_caller(bool enable) +{ + int ret; + + ret = __ftrace_modify_caller(&ftrace_graph_call, + ftrace_graph_caller, + enable); + +#ifdef CONFIG_OLD_MCOUNT + if (!ret) + ret = __ftrace_modify_caller(&ftrace_graph_call_old, + ftrace_graph_caller_old, + enable); +#endif + + return ret; +} + +int ftrace_enable_ftrace_graph_caller(void) +{ + return ftrace_modify_graph_caller(true); +} + +int ftrace_disable_ftrace_graph_caller(void) +{ + return ftrace_modify_graph_caller(false); +} +#endif /* CONFIG_DYNAMIC_FTRACE */ +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c index 36ad3be4692..6d616333340 100644 --- a/arch/arm/kernel/irq.c +++ b/arch/arm/kernel/irq.c @@ -35,6 +35,7 @@ #include <linux/list.h> #include <linux/kallsyms.h> #include <linux/proc_fs.h> +#include <linux/ftrace.h> #include <asm/system.h> #include <asm/mach/irq.h> @@ -105,7 +106,8 @@ unlock: * come via this function. Instead, they should provide their * own 'handler' */ -asmlinkage void __exception asm_do_IRQ(unsigned int irq, struct pt_regs *regs) +asmlinkage void __exception_irq_entry +asm_do_IRQ(unsigned int irq, struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 07a50357492..421a4bb88fe 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -4,9 +4,7 @@ * ARM performance counter support. * * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles - * - * ARMv7 support: Jean Pihet <jpihet@mvista.com> - * 2010 (c) MontaVista Software, LLC. + * Copyright (C) 2010 ARM Ltd., Will Deacon <will.deacon@arm.com> * * This code is based on the sparc64 perf event code, which is in turn based * on the x86 code. Callchain code is based on the ARM OProfile backtrace @@ -69,29 +67,23 @@ struct cpu_hw_events { }; DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); -/* PMU names. */ -static const char *arm_pmu_names[] = { - [ARM_PERF_PMU_ID_XSCALE1] = "xscale1", - [ARM_PERF_PMU_ID_XSCALE2] = "xscale2", - [ARM_PERF_PMU_ID_V6] = "v6", - [ARM_PERF_PMU_ID_V6MP] = "v6mpcore", - [ARM_PERF_PMU_ID_CA8] = "ARMv7 Cortex-A8", - [ARM_PERF_PMU_ID_CA9] = "ARMv7 Cortex-A9", -}; - struct arm_pmu { enum arm_perf_pmu_ids id; + const char *name; irqreturn_t (*handle_irq)(int irq_num, void *dev); void (*enable)(struct hw_perf_event *evt, int idx); void (*disable)(struct hw_perf_event *evt, int idx); - int (*event_map)(int evt); - u64 (*raw_event)(u64); int (*get_event_idx)(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc); u32 (*read_counter)(int idx); void (*write_counter)(int idx, u32 val); void (*start)(void); void (*stop)(void); + const unsigned (*cache_map)[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX]; + const unsigned (*event_map)[PERF_COUNT_HW_MAX]; + u32 raw_event_mask; int num_events; u64 max_period; }; @@ -136,10 +128,6 @@ EXPORT_SYMBOL_GPL(perf_num_counters); #define CACHE_OP_UNSUPPORTED 0xFFFF -static unsigned armpmu_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX]; - static int armpmu_map_cache_event(u64 config) { @@ -157,7 +145,7 @@ armpmu_map_cache_event(u64 config) if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) return -EINVAL; - ret = (int)armpmu_perf_cache_map[cache_type][cache_op][cache_result]; + ret = (int)(*armpmu->cache_map)[cache_type][cache_op][cache_result]; if (ret == CACHE_OP_UNSUPPORTED) return -ENOENT; @@ -166,6 +154,19 @@ armpmu_map_cache_event(u64 config) } static int +armpmu_map_event(u64 config) +{ + int mapping = (*armpmu->event_map)[config]; + return mapping == HW_OP_UNSUPPORTED ? -EOPNOTSUPP : mapping; +} + +static int +armpmu_map_raw_event(u64 config) +{ + return (int)(config & armpmu->raw_event_mask); +} + +static int armpmu_event_set_period(struct perf_event *event, struct hw_perf_event *hwc, int idx) @@ -458,11 +459,11 @@ __hw_perf_event_init(struct perf_event *event) /* Decode the generic type into an ARM event identifier. */ if (PERF_TYPE_HARDWARE == event->attr.type) { - mapping = armpmu->event_map(event->attr.config); + mapping = armpmu_map_event(event->attr.config); } else if (PERF_TYPE_HW_CACHE == event->attr.type) { mapping = armpmu_map_cache_event(event->attr.config); } else if (PERF_TYPE_RAW == event->attr.type) { - mapping = armpmu->raw_event(event->attr.config); + mapping = armpmu_map_raw_event(event->attr.config); } else { pr_debug("event type %x not supported\n", event->attr.type); return -EOPNOTSUPP; @@ -603,2366 +604,10 @@ static struct pmu pmu = { .read = armpmu_read, }; -/* - * ARMv6 Performance counter handling code. - * - * ARMv6 has 2 configurable performance counters and a single cycle counter. - * They all share a single reset bit but can be written to zero so we can use - * that for a reset. - * - * The counters can't be individually enabled or disabled so when we remove - * one event and replace it with another we could get spurious counts from the - * wrong event. However, we can take advantage of the fact that the - * performance counters can export events to the event bus, and the event bus - * itself can be monitored. This requires that we *don't* export the events to - * the event bus. The procedure for disabling a configurable counter is: - * - change the counter to count the ETMEXTOUT[0] signal (0x20). This - * effectively stops the counter from counting. - * - disable the counter's interrupt generation (each counter has it's - * own interrupt enable bit). - * Once stopped, the counter value can be written as 0 to reset. - * - * To enable a counter: - * - enable the counter's interrupt generation. - * - set the new event type. - * - * Note: the dedicated cycle counter only counts cycles and can't be - * enabled/disabled independently of the others. When we want to disable the - * cycle counter, we have to just disable the interrupt reporting and start - * ignoring that counter. When re-enabling, we have to reset the value and - * enable the interrupt. - */ - -enum armv6_perf_types { - ARMV6_PERFCTR_ICACHE_MISS = 0x0, - ARMV6_PERFCTR_IBUF_STALL = 0x1, - ARMV6_PERFCTR_DDEP_STALL = 0x2, - ARMV6_PERFCTR_ITLB_MISS = 0x3, - ARMV6_PERFCTR_DTLB_MISS = 0x4, - ARMV6_PERFCTR_BR_EXEC = 0x5, - ARMV6_PERFCTR_BR_MISPREDICT = 0x6, - ARMV6_PERFCTR_INSTR_EXEC = 0x7, - ARMV6_PERFCTR_DCACHE_HIT = 0x9, - ARMV6_PERFCTR_DCACHE_ACCESS = 0xA, - ARMV6_PERFCTR_DCACHE_MISS = 0xB, - ARMV6_PERFCTR_DCACHE_WBACK = 0xC, - ARMV6_PERFCTR_SW_PC_CHANGE = 0xD, - ARMV6_PERFCTR_MAIN_TLB_MISS = 0xF, - ARMV6_PERFCTR_EXPL_D_ACCESS = 0x10, - ARMV6_PERFCTR_LSU_FULL_STALL = 0x11, - ARMV6_PERFCTR_WBUF_DRAINED = 0x12, - ARMV6_PERFCTR_CPU_CYCLES = 0xFF, - ARMV6_PERFCTR_NOP = 0x20, -}; - -enum armv6_counters { - ARMV6_CYCLE_COUNTER = 1, - ARMV6_COUNTER0, - ARMV6_COUNTER1, -}; - -/* - * The hardware events that we support. We do support cache operations but - * we have harvard caches and no way to combine instruction and data - * accesses/misses in hardware. - */ -static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = { - [PERF_COUNT_HW_CPU_CYCLES] = ARMV6_PERFCTR_CPU_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6_PERFCTR_INSTR_EXEC, - [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6_PERFCTR_BR_EXEC, - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6_PERFCTR_BR_MISPREDICT, - [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, -}; - -static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - [C(L1D)] = { - /* - * The performance counters don't differentiate between read - * and write accesses/misses so this isn't strictly correct, - * but it's the best we can do. Writes and reads get - * combined. - */ - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS, - [C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS, - [C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(L1I)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(LL)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(DTLB)] = { - /* - * The ARM performance counters can count micro DTLB misses, - * micro ITLB misses and main TLB misses. There isn't an event - * for TLB misses, so use the micro misses here and if users - * want the main TLB misses they can use a raw counter. - */ - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(ITLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(BPU)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, -}; - -enum armv6mpcore_perf_types { - ARMV6MPCORE_PERFCTR_ICACHE_MISS = 0x0, - ARMV6MPCORE_PERFCTR_IBUF_STALL = 0x1, - ARMV6MPCORE_PERFCTR_DDEP_STALL = 0x2, - ARMV6MPCORE_PERFCTR_ITLB_MISS = 0x3, - ARMV6MPCORE_PERFCTR_DTLB_MISS = 0x4, - ARMV6MPCORE_PERFCTR_BR_EXEC = 0x5, - ARMV6MPCORE_PERFCTR_BR_NOTPREDICT = 0x6, - ARMV6MPCORE_PERFCTR_BR_MISPREDICT = 0x7, - ARMV6MPCORE_PERFCTR_INSTR_EXEC = 0x8, - ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS = 0xA, - ARMV6MPCORE_PERFCTR_DCACHE_RDMISS = 0xB, - ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS = 0xC, - ARMV6MPCORE_PERFCTR_DCACHE_WRMISS = 0xD, - ARMV6MPCORE_PERFCTR_DCACHE_EVICTION = 0xE, - ARMV6MPCORE_PERFCTR_SW_PC_CHANGE = 0xF, - ARMV6MPCORE_PERFCTR_MAIN_TLB_MISS = 0x10, - ARMV6MPCORE_PERFCTR_EXPL_MEM_ACCESS = 0x11, - ARMV6MPCORE_PERFCTR_LSU_FULL_STALL = 0x12, - ARMV6MPCORE_PERFCTR_WBUF_DRAINED = 0x13, - ARMV6MPCORE_PERFCTR_CPU_CYCLES = 0xFF, -}; - -/* - * The hardware events that we support. We do support cache operations but - * we have harvard caches and no way to combine instruction and data - * accesses/misses in hardware. - */ -static const unsigned armv6mpcore_perf_map[PERF_COUNT_HW_MAX] = { - [PERF_COUNT_HW_CPU_CYCLES] = ARMV6MPCORE_PERFCTR_CPU_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_INSTR_EXEC, - [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_BR_EXEC, - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6MPCORE_PERFCTR_BR_MISPREDICT, - [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, -}; - -static const unsigned armv6mpcore_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - [C(L1D)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = - ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS, - [C(RESULT_MISS)] = - ARMV6MPCORE_PERFCTR_DCACHE_RDMISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = - ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS, - [C(RESULT_MISS)] = - ARMV6MPCORE_PERFCTR_DCACHE_WRMISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(L1I)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(LL)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(DTLB)] = { - /* - * The ARM performance counters can count micro DTLB misses, - * micro ITLB misses and main TLB misses. There isn't an event - * for TLB misses, so use the micro misses here and if users - * want the main TLB misses they can use a raw counter. - */ - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(ITLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(BPU)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, -}; - -static inline unsigned long -armv6_pmcr_read(void) -{ - u32 val; - asm volatile("mrc p15, 0, %0, c15, c12, 0" : "=r"(val)); - return val; -} - -static inline void -armv6_pmcr_write(unsigned long val) -{ - asm volatile("mcr p15, 0, %0, c15, c12, 0" : : "r"(val)); -} - -#define ARMV6_PMCR_ENABLE (1 << 0) -#define ARMV6_PMCR_CTR01_RESET (1 << 1) -#define ARMV6_PMCR_CCOUNT_RESET (1 << 2) -#define ARMV6_PMCR_CCOUNT_DIV (1 << 3) -#define ARMV6_PMCR_COUNT0_IEN (1 << 4) -#define ARMV6_PMCR_COUNT1_IEN (1 << 5) -#define ARMV6_PMCR_CCOUNT_IEN (1 << 6) -#define ARMV6_PMCR_COUNT0_OVERFLOW (1 << 8) -#define ARMV6_PMCR_COUNT1_OVERFLOW (1 << 9) -#define ARMV6_PMCR_CCOUNT_OVERFLOW (1 << 10) -#define ARMV6_PMCR_EVT_COUNT0_SHIFT 20 -#define ARMV6_PMCR_EVT_COUNT0_MASK (0xFF << ARMV6_PMCR_EVT_COUNT0_SHIFT) -#define ARMV6_PMCR_EVT_COUNT1_SHIFT 12 -#define ARMV6_PMCR_EVT_COUNT1_MASK (0xFF << ARMV6_PMCR_EVT_COUNT1_SHIFT) - -#define ARMV6_PMCR_OVERFLOWED_MASK \ - (ARMV6_PMCR_COUNT0_OVERFLOW | ARMV6_PMCR_COUNT1_OVERFLOW | \ - ARMV6_PMCR_CCOUNT_OVERFLOW) - -static inline int -armv6_pmcr_has_overflowed(unsigned long pmcr) -{ - return (pmcr & ARMV6_PMCR_OVERFLOWED_MASK); -} - -static inline int -armv6_pmcr_counter_has_overflowed(unsigned long pmcr, - enum armv6_counters counter) -{ - int ret = 0; - - if (ARMV6_CYCLE_COUNTER == counter) - ret = pmcr & ARMV6_PMCR_CCOUNT_OVERFLOW; - else if (ARMV6_COUNTER0 == counter) - ret = pmcr & ARMV6_PMCR_COUNT0_OVERFLOW; - else if (ARMV6_COUNTER1 == counter) - ret = pmcr & ARMV6_PMCR_COUNT1_OVERFLOW; - else - WARN_ONCE(1, "invalid counter number (%d)\n", counter); - - return ret; -} - -static inline u32 -armv6pmu_read_counter(int counter) -{ - unsigned long value = 0; - - if (ARMV6_CYCLE_COUNTER == counter) - asm volatile("mrc p15, 0, %0, c15, c12, 1" : "=r"(value)); - else if (ARMV6_COUNTER0 == counter) - asm volatile("mrc p15, 0, %0, c15, c12, 2" : "=r"(value)); - else if (ARMV6_COUNTER1 == counter) - asm volatile("mrc p15, 0, %0, c15, c12, 3" : "=r"(value)); - else - WARN_ONCE(1, "invalid counter number (%d)\n", counter); - - return value; -} - -static inline void -armv6pmu_write_counter(int counter, - u32 value) -{ - if (ARMV6_CYCLE_COUNTER == counter) - asm volatile("mcr p15, 0, %0, c15, c12, 1" : : "r"(value)); - else if (ARMV6_COUNTER0 == counter) - asm volatile("mcr p15, 0, %0, c15, c12, 2" : : "r"(value)); - else if (ARMV6_COUNTER1 == counter) - asm volatile("mcr p15, 0, %0, c15, c12, 3" : : "r"(value)); - else - WARN_ONCE(1, "invalid counter number (%d)\n", counter); -} - -void -armv6pmu_enable_event(struct hw_perf_event *hwc, - int idx) -{ - unsigned long val, mask, evt, flags; - - if (ARMV6_CYCLE_COUNTER == idx) { - mask = 0; - evt = ARMV6_PMCR_CCOUNT_IEN; - } else if (ARMV6_COUNTER0 == idx) { - mask = ARMV6_PMCR_EVT_COUNT0_MASK; - evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT0_SHIFT) | - ARMV6_PMCR_COUNT0_IEN; - } else if (ARMV6_COUNTER1 == idx) { - mask = ARMV6_PMCR_EVT_COUNT1_MASK; - evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT1_SHIFT) | - ARMV6_PMCR_COUNT1_IEN; - } else { - WARN_ONCE(1, "invalid counter number (%d)\n", idx); - return; - } - - /* - * Mask out the current event and set the counter to count the event - * that we're interested in. - */ - spin_lock_irqsave(&pmu_lock, flags); - val = armv6_pmcr_read(); - val &= ~mask; - val |= evt; - armv6_pmcr_write(val); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static irqreturn_t -armv6pmu_handle_irq(int irq_num, - void *dev) -{ - unsigned long pmcr = armv6_pmcr_read(); - struct perf_sample_data data; - struct cpu_hw_events *cpuc; - struct pt_regs *regs; - int idx; - - if (!armv6_pmcr_has_overflowed(pmcr)) - return IRQ_NONE; - - regs = get_irq_regs(); - - /* - * The interrupts are cleared by writing the overflow flags back to - * the control register. All of the other bits don't have any effect - * if they are rewritten, so write the whole value back. - */ - armv6_pmcr_write(pmcr); - - perf_sample_data_init(&data, 0); - - cpuc = &__get_cpu_var(cpu_hw_events); - for (idx = 0; idx <= armpmu->num_events; ++idx) { - struct perf_event *event = cpuc->events[idx]; - struct hw_perf_event *hwc; - - if (!test_bit(idx, cpuc->active_mask)) - continue; - - /* - * We have a single interrupt for all counters. Check that - * each counter has overflowed before we process it. - */ - if (!armv6_pmcr_counter_has_overflowed(pmcr, idx)) - continue; - - hwc = &event->hw; - armpmu_event_update(event, hwc, idx); - data.period = event->hw.last_period; - if (!armpmu_event_set_period(event, hwc, idx)) - continue; - - if (perf_event_overflow(event, 0, &data, regs)) - armpmu->disable(hwc, idx); - } - - /* - * Handle the pending perf events. - * - * Note: this call *must* be run with interrupts disabled. For - * platforms that can have the PMU interrupts raised as an NMI, this - * will not work. - */ - irq_work_run(); - - return IRQ_HANDLED; -} - -static void -armv6pmu_start(void) -{ - unsigned long flags, val; - - spin_lock_irqsave(&pmu_lock, flags); - val = armv6_pmcr_read(); - val |= ARMV6_PMCR_ENABLE; - armv6_pmcr_write(val); |