aboutsummaryrefslogtreecommitdiff
path: root/arch/arm/kernel
diff options
context:
space:
mode:
authorRussell King <rmk+kernel@arm.linux.org.uk>2011-01-05 18:09:03 +0000
committerRussell King <rmk+kernel@arm.linux.org.uk>2011-01-05 18:09:03 +0000
commit58daf18cdcab550262a5f4681e1f1e073e21965a (patch)
tree2096324b947761a567dd451f33664f17ee1de2cd /arch/arm/kernel
parentaa312be1987d43216e72ffce42bccf6bf81f62ed (diff)
parent0af85dda39d9b673aca8c0ebae004ea70f3efc93 (diff)
Merge branch 'clksrc' into devel
Conflicts: arch/arm/mach-vexpress/v2m.c arch/arm/plat-omap/counter_32k.c arch/arm/plat-versatile/Makefile
Diffstat (limited to 'arch/arm/kernel')
-rw-r--r--arch/arm/kernel/Makefile4
-rw-r--r--arch/arm/kernel/entry-common.S202
-rw-r--r--arch/arm/kernel/ftrace.c103
-rw-r--r--arch/arm/kernel/irq.c4
-rw-r--r--arch/arm/kernel/perf_event.c2448
-rw-r--r--arch/arm/kernel/perf_event_v6.c672
-rw-r--r--arch/arm/kernel/perf_event_v7.c906
-rw-r--r--arch/arm/kernel/perf_event_xscale.c807
-rw-r--r--arch/arm/kernel/sched_clock.c69
-rw-r--r--arch/arm/kernel/smp.c5
-rw-r--r--arch/arm/kernel/vmlinux.lds.S1
11 files changed, 2733 insertions, 2488 deletions
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile
index 5b9b268f4fb..fd3ec49bfba 100644
--- a/arch/arm/kernel/Makefile
+++ b/arch/arm/kernel/Makefile
@@ -5,7 +5,7 @@
CPPFLAGS_vmlinux.lds := -DTEXT_OFFSET=$(TEXT_OFFSET)
AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET)
-ifdef CONFIG_DYNAMIC_FTRACE
+ifdef CONFIG_FUNCTION_TRACER
CFLAGS_REMOVE_ftrace.o = -pg
endif
@@ -29,10 +29,12 @@ obj-$(CONFIG_MODULES) += armksyms.o module.o
obj-$(CONFIG_ARTHUR) += arthur.o
obj-$(CONFIG_ISA_DMA) += dma-isa.o
obj-$(CONFIG_PCI) += bios32.o isa.o
+obj-$(CONFIG_HAVE_SCHED_CLOCK) += sched_clock.o
obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_HAVE_ARM_SCU) += smp_scu.o
obj-$(CONFIG_HAVE_ARM_TWD) += smp_twd.o
obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
+obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o
obj-$(CONFIG_KPROBES) += kprobes.o kprobes-decode.o
obj-$(CONFIG_ATAGS_PROC) += atags.o
diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
index 80bf8cd88d7..1e7b04a40a3 100644
--- a/arch/arm/kernel/entry-common.S
+++ b/arch/arm/kernel/entry-common.S
@@ -147,98 +147,170 @@ ENDPROC(ret_from_fork)
#endif
#endif
-#ifdef CONFIG_DYNAMIC_FTRACE
-ENTRY(__gnu_mcount_nc)
- mov ip, lr
- ldmia sp!, {lr}
- mov pc, ip
-ENDPROC(__gnu_mcount_nc)
+.macro __mcount suffix
+ mcount_enter
+ ldr r0, =ftrace_trace_function
+ ldr r2, [r0]
+ adr r0, .Lftrace_stub
+ cmp r0, r2
+ bne 1f
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ ldr r1, =ftrace_graph_return
+ ldr r2, [r1]
+ cmp r0, r2
+ bne ftrace_graph_caller\suffix
+
+ ldr r1, =ftrace_graph_entry
+ ldr r2, [r1]
+ ldr r0, =ftrace_graph_entry_stub
+ cmp r0, r2
+ bne ftrace_graph_caller\suffix
+#endif
-ENTRY(ftrace_caller)
- stmdb sp!, {r0-r3, lr}
- mov r0, lr
+ mcount_exit
+
+1: mcount_get_lr r1 @ lr of instrumented func
+ mov r0, lr @ instrumented function
+ sub r0, r0, #MCOUNT_INSN_SIZE
+ adr lr, BSYM(2f)
+ mov pc, r2
+2: mcount_exit
+.endm
+
+.macro __ftrace_caller suffix
+ mcount_enter
+
+ mcount_get_lr r1 @ lr of instrumented func
+ mov r0, lr @ instrumented function
sub r0, r0, #MCOUNT_INSN_SIZE
- ldr r1, [sp, #20]
- .global ftrace_call
-ftrace_call:
+ .globl ftrace_call\suffix
+ftrace_call\suffix:
bl ftrace_stub
- ldmia sp!, {r0-r3, ip, lr}
- mov pc, ip
-ENDPROC(ftrace_caller)
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ .globl ftrace_graph_call\suffix
+ftrace_graph_call\suffix:
+ mov r0, r0
+#endif
+
+ mcount_exit
+.endm
+
+.macro __ftrace_graph_caller
+ sub r0, fp, #4 @ &lr of instrumented routine (&parent)
+#ifdef CONFIG_DYNAMIC_FTRACE
+ @ called from __ftrace_caller, saved in mcount_enter
+ ldr r1, [sp, #16] @ instrumented routine (func)
+#else
+ @ called from __mcount, untouched in lr
+ mov r1, lr @ instrumented routine (func)
+#endif
+ sub r1, r1, #MCOUNT_INSN_SIZE
+ mov r2, fp @ frame pointer
+ bl prepare_ftrace_return
+ mcount_exit
+.endm
#ifdef CONFIG_OLD_MCOUNT
+/*
+ * mcount
+ */
+
+.macro mcount_enter
+ stmdb sp!, {r0-r3, lr}
+.endm
+
+.macro mcount_get_lr reg
+ ldr \reg, [fp, #-4]
+.endm
+
+.macro mcount_exit
+ ldr lr, [fp, #-4]
+ ldmia sp!, {r0-r3, pc}
+.endm
+
ENTRY(mcount)
+#ifdef CONFIG_DYNAMIC_FTRACE
stmdb sp!, {lr}
ldr lr, [fp, #-4]
ldmia sp!, {pc}
+#else
+ __mcount _old
+#endif
ENDPROC(mcount)
+#ifdef CONFIG_DYNAMIC_FTRACE
ENTRY(ftrace_caller_old)
- stmdb sp!, {r0-r3, lr}
- ldr r1, [fp, #-4]
- mov r0, lr
- sub r0, r0, #MCOUNT_INSN_SIZE
-
- .globl ftrace_call_old
-ftrace_call_old:
- bl ftrace_stub
- ldr lr, [fp, #-4] @ restore lr
- ldmia sp!, {r0-r3, pc}
+ __ftrace_caller _old
ENDPROC(ftrace_caller_old)
#endif
-#else
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ENTRY(ftrace_graph_caller_old)
+ __ftrace_graph_caller
+ENDPROC(ftrace_graph_caller_old)
+#endif
-ENTRY(__gnu_mcount_nc)
+.purgem mcount_enter
+.purgem mcount_get_lr
+.purgem mcount_exit
+#endif
+
+/*
+ * __gnu_mcount_nc
+ */
+
+.macro mcount_enter
stmdb sp!, {r0-r3, lr}
- ldr r0, =ftrace_trace_function
- ldr r2, [r0]
- adr r0, .Lftrace_stub
- cmp r0, r2
- bne gnu_trace
+.endm
+
+.macro mcount_get_lr reg
+ ldr \reg, [sp, #20]
+.endm
+
+.macro mcount_exit
ldmia sp!, {r0-r3, ip, lr}
mov pc, ip
+.endm
-gnu_trace:
- ldr r1, [sp, #20] @ lr of instrumented routine
- mov r0, lr
- sub r0, r0, #MCOUNT_INSN_SIZE
- adr lr, BSYM(1f)
- mov pc, r2
-1:
- ldmia sp!, {r0-r3, ip, lr}
+ENTRY(__gnu_mcount_nc)
+#ifdef CONFIG_DYNAMIC_FTRACE
+ mov ip, lr
+ ldmia sp!, {lr}
mov pc, ip
+#else
+ __mcount
+#endif
ENDPROC(__gnu_mcount_nc)
-#ifdef CONFIG_OLD_MCOUNT
-/*
- * This is under an ifdef in order to force link-time errors for people trying
- * to build with !FRAME_POINTER with a GCC which doesn't use the new-style
- * mcount.
- */
-ENTRY(mcount)
- stmdb sp!, {r0-r3, lr}
- ldr r0, =ftrace_trace_function
- ldr r2, [r0]
- adr r0, ftrace_stub
- cmp r0, r2
- bne trace
- ldr lr, [fp, #-4] @ restore lr
- ldmia sp!, {r0-r3, pc}
+#ifdef CONFIG_DYNAMIC_FTRACE
+ENTRY(ftrace_caller)
+ __ftrace_caller
+ENDPROC(ftrace_caller)
+#endif
-trace:
- ldr r1, [fp, #-4] @ lr of instrumented routine
- mov r0, lr
- sub r0, r0, #MCOUNT_INSN_SIZE
- mov lr, pc
- mov pc, r2
- ldr lr, [fp, #-4] @ restore lr
- ldmia sp!, {r0-r3, pc}
-ENDPROC(mcount)
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ENTRY(ftrace_graph_caller)
+ __ftrace_graph_caller
+ENDPROC(ftrace_graph_caller)
#endif
-#endif /* CONFIG_DYNAMIC_FTRACE */
+.purgem mcount_enter
+.purgem mcount_get_lr
+.purgem mcount_exit
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ .globl return_to_handler
+return_to_handler:
+ stmdb sp!, {r0-r3}
+ mov r0, fp @ frame pointer
+ bl ftrace_return_to_handler
+ mov lr, r0 @ r0 has real ret addr
+ ldmia sp!, {r0-r3}
+ mov pc, lr
+#endif
ENTRY(ftrace_stub)
.Lftrace_stub:
diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c
index 971ac8c36ea..c0062ad1e84 100644
--- a/arch/arm/kernel/ftrace.c
+++ b/arch/arm/kernel/ftrace.c
@@ -24,6 +24,7 @@
#define NOP 0xe8bd4000 /* pop {lr} */
#endif
+#ifdef CONFIG_DYNAMIC_FTRACE
#ifdef CONFIG_OLD_MCOUNT
#define OLD_MCOUNT_ADDR ((unsigned long) mcount)
#define OLD_FTRACE_ADDR ((unsigned long) ftrace_caller_old)
@@ -59,9 +60,9 @@ static unsigned long adjust_address(struct dyn_ftrace *rec, unsigned long addr)
}
#endif
-/* construct a branch (BL) instruction to addr */
#ifdef CONFIG_THUMB2_KERNEL
-static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr)
+static unsigned long ftrace_gen_branch(unsigned long pc, unsigned long addr,
+ bool link)
{
unsigned long s, j1, j2, i1, i2, imm10, imm11;
unsigned long first, second;
@@ -83,15 +84,22 @@ static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr)
j2 = (!i2) ^ s;
first = 0xf000 | (s << 10) | imm10;
- second = 0xd000 | (j1 << 13) | (j2 << 11) | imm11;
+ second = 0x9000 | (j1 << 13) | (j2 << 11) | imm11;
+ if (link)
+ second |= 1 << 14;
return (second << 16) | first;
}
#else
-static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr)
+static unsigned long ftrace_gen_branch(unsigned long pc, unsigned long addr,
+ bool link)
{
+ unsigned long opcode = 0xea000000;
long offset;
+ if (link)
+ opcode |= 1 << 24;
+
offset = (long)addr - (long)(pc + 8);
if (unlikely(offset < -33554432 || offset > 33554428)) {
/* Can't generate branches that far (from ARM ARM). Ftrace
@@ -103,10 +111,15 @@ static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr)
offset = (offset >> 2) & 0x00ffffff;
- return 0xeb000000 | offset;
+ return opcode | offset;
}
#endif
+static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr)
+{
+ return ftrace_gen_branch(pc, addr, true);
+}
+
static int ftrace_modify_code(unsigned long pc, unsigned long old,
unsigned long new)
{
@@ -193,3 +206,83 @@ int __init ftrace_dyn_arch_init(void *data)
return 0;
}
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
+ unsigned long frame_pointer)
+{
+ unsigned long return_hooker = (unsigned long) &return_to_handler;
+ struct ftrace_graph_ent trace;
+ unsigned long old;
+ int err;
+
+ if (unlikely(atomic_read(&current->tracing_graph_pause)))
+ return;
+
+ old = *parent;
+ *parent = return_hooker;
+
+ err = ftrace_push_return_trace(old, self_addr, &trace.depth,
+ frame_pointer);
+ if (err == -EBUSY) {
+ *parent = old;
+ return;
+ }
+
+ trace.func = self_addr;
+
+ /* Only trace if the calling function expects to */
+ if (!ftrace_graph_entry(&trace)) {
+ current->curr_ret_stack--;
+ *parent = old;
+ }
+}
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+extern unsigned long ftrace_graph_call;
+extern unsigned long ftrace_graph_call_old;
+extern void ftrace_graph_caller_old(void);
+
+static int __ftrace_modify_caller(unsigned long *callsite,
+ void (*func) (void), bool enable)
+{
+ unsigned long caller_fn = (unsigned long) func;
+ unsigned long pc = (unsigned long) callsite;
+ unsigned long branch = ftrace_gen_branch(pc, caller_fn, false);
+ unsigned long nop = 0xe1a00000; /* mov r0, r0 */
+ unsigned long old = enable ? nop : branch;
+ unsigned long new = enable ? branch : nop;
+
+ return ftrace_modify_code(pc, old, new);
+}
+
+static int ftrace_modify_graph_caller(bool enable)
+{
+ int ret;
+
+ ret = __ftrace_modify_caller(&ftrace_graph_call,
+ ftrace_graph_caller,
+ enable);
+
+#ifdef CONFIG_OLD_MCOUNT
+ if (!ret)
+ ret = __ftrace_modify_caller(&ftrace_graph_call_old,
+ ftrace_graph_caller_old,
+ enable);
+#endif
+
+ return ret;
+}
+
+int ftrace_enable_ftrace_graph_caller(void)
+{
+ return ftrace_modify_graph_caller(true);
+}
+
+int ftrace_disable_ftrace_graph_caller(void)
+{
+ return ftrace_modify_graph_caller(false);
+}
+#endif /* CONFIG_DYNAMIC_FTRACE */
+#endif /* CONFIG_FUNCTION_GRAPH_TRACER */
diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c
index 36ad3be4692..6d616333340 100644
--- a/arch/arm/kernel/irq.c
+++ b/arch/arm/kernel/irq.c
@@ -35,6 +35,7 @@
#include <linux/list.h>
#include <linux/kallsyms.h>
#include <linux/proc_fs.h>
+#include <linux/ftrace.h>
#include <asm/system.h>
#include <asm/mach/irq.h>
@@ -105,7 +106,8 @@ unlock:
* come via this function. Instead, they should provide their
* own 'handler'
*/
-asmlinkage void __exception asm_do_IRQ(unsigned int irq, struct pt_regs *regs)
+asmlinkage void __exception_irq_entry
+asm_do_IRQ(unsigned int irq, struct pt_regs *regs)
{
struct pt_regs *old_regs = set_irq_regs(regs);
diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c
index 07a50357492..421a4bb88fe 100644
--- a/arch/arm/kernel/perf_event.c
+++ b/arch/arm/kernel/perf_event.c
@@ -4,9 +4,7 @@
* ARM performance counter support.
*
* Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles
- *
- * ARMv7 support: Jean Pihet <jpihet@mvista.com>
- * 2010 (c) MontaVista Software, LLC.
+ * Copyright (C) 2010 ARM Ltd., Will Deacon <will.deacon@arm.com>
*
* This code is based on the sparc64 perf event code, which is in turn based
* on the x86 code. Callchain code is based on the ARM OProfile backtrace
@@ -69,29 +67,23 @@ struct cpu_hw_events {
};
DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events);
-/* PMU names. */
-static const char *arm_pmu_names[] = {
- [ARM_PERF_PMU_ID_XSCALE1] = "xscale1",
- [ARM_PERF_PMU_ID_XSCALE2] = "xscale2",
- [ARM_PERF_PMU_ID_V6] = "v6",
- [ARM_PERF_PMU_ID_V6MP] = "v6mpcore",
- [ARM_PERF_PMU_ID_CA8] = "ARMv7 Cortex-A8",
- [ARM_PERF_PMU_ID_CA9] = "ARMv7 Cortex-A9",
-};
-
struct arm_pmu {
enum arm_perf_pmu_ids id;
+ const char *name;
irqreturn_t (*handle_irq)(int irq_num, void *dev);
void (*enable)(struct hw_perf_event *evt, int idx);
void (*disable)(struct hw_perf_event *evt, int idx);
- int (*event_map)(int evt);
- u64 (*raw_event)(u64);
int (*get_event_idx)(struct cpu_hw_events *cpuc,
struct hw_perf_event *hwc);
u32 (*read_counter)(int idx);
void (*write_counter)(int idx, u32 val);
void (*start)(void);
void (*stop)(void);
+ const unsigned (*cache_map)[PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX];
+ const unsigned (*event_map)[PERF_COUNT_HW_MAX];
+ u32 raw_event_mask;
int num_events;
u64 max_period;
};
@@ -136,10 +128,6 @@ EXPORT_SYMBOL_GPL(perf_num_counters);
#define CACHE_OP_UNSUPPORTED 0xFFFF
-static unsigned armpmu_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
- [PERF_COUNT_HW_CACHE_OP_MAX]
- [PERF_COUNT_HW_CACHE_RESULT_MAX];
-
static int
armpmu_map_cache_event(u64 config)
{
@@ -157,7 +145,7 @@ armpmu_map_cache_event(u64 config)
if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX)
return -EINVAL;
- ret = (int)armpmu_perf_cache_map[cache_type][cache_op][cache_result];
+ ret = (int)(*armpmu->cache_map)[cache_type][cache_op][cache_result];
if (ret == CACHE_OP_UNSUPPORTED)
return -ENOENT;
@@ -166,6 +154,19 @@ armpmu_map_cache_event(u64 config)
}
static int
+armpmu_map_event(u64 config)
+{
+ int mapping = (*armpmu->event_map)[config];
+ return mapping == HW_OP_UNSUPPORTED ? -EOPNOTSUPP : mapping;
+}
+
+static int
+armpmu_map_raw_event(u64 config)
+{
+ return (int)(config & armpmu->raw_event_mask);
+}
+
+static int
armpmu_event_set_period(struct perf_event *event,
struct hw_perf_event *hwc,
int idx)
@@ -458,11 +459,11 @@ __hw_perf_event_init(struct perf_event *event)
/* Decode the generic type into an ARM event identifier. */
if (PERF_TYPE_HARDWARE == event->attr.type) {
- mapping = armpmu->event_map(event->attr.config);
+ mapping = armpmu_map_event(event->attr.config);
} else if (PERF_TYPE_HW_CACHE == event->attr.type) {
mapping = armpmu_map_cache_event(event->attr.config);
} else if (PERF_TYPE_RAW == event->attr.type) {
- mapping = armpmu->raw_event(event->attr.config);
+ mapping = armpmu_map_raw_event(event->attr.config);
} else {
pr_debug("event type %x not supported\n", event->attr.type);
return -EOPNOTSUPP;
@@ -603,2366 +604,10 @@ static struct pmu pmu = {
.read = armpmu_read,
};
-/*
- * ARMv6 Performance counter handling code.
- *
- * ARMv6 has 2 configurable performance counters and a single cycle counter.
- * They all share a single reset bit but can be written to zero so we can use
- * that for a reset.
- *
- * The counters can't be individually enabled or disabled so when we remove
- * one event and replace it with another we could get spurious counts from the
- * wrong event. However, we can take advantage of the fact that the
- * performance counters can export events to the event bus, and the event bus
- * itself can be monitored. This requires that we *don't* export the events to
- * the event bus. The procedure for disabling a configurable counter is:
- * - change the counter to count the ETMEXTOUT[0] signal (0x20). This
- * effectively stops the counter from counting.
- * - disable the counter's interrupt generation (each counter has it's
- * own interrupt enable bit).
- * Once stopped, the counter value can be written as 0 to reset.
- *
- * To enable a counter:
- * - enable the counter's interrupt generation.
- * - set the new event type.
- *
- * Note: the dedicated cycle counter only counts cycles and can't be
- * enabled/disabled independently of the others. When we want to disable the
- * cycle counter, we have to just disable the interrupt reporting and start
- * ignoring that counter. When re-enabling, we have to reset the value and
- * enable the interrupt.
- */
-
-enum armv6_perf_types {
- ARMV6_PERFCTR_ICACHE_MISS = 0x0,
- ARMV6_PERFCTR_IBUF_STALL = 0x1,
- ARMV6_PERFCTR_DDEP_STALL = 0x2,
- ARMV6_PERFCTR_ITLB_MISS = 0x3,
- ARMV6_PERFCTR_DTLB_MISS = 0x4,
- ARMV6_PERFCTR_BR_EXEC = 0x5,
- ARMV6_PERFCTR_BR_MISPREDICT = 0x6,
- ARMV6_PERFCTR_INSTR_EXEC = 0x7,
- ARMV6_PERFCTR_DCACHE_HIT = 0x9,
- ARMV6_PERFCTR_DCACHE_ACCESS = 0xA,
- ARMV6_PERFCTR_DCACHE_MISS = 0xB,
- ARMV6_PERFCTR_DCACHE_WBACK = 0xC,
- ARMV6_PERFCTR_SW_PC_CHANGE = 0xD,
- ARMV6_PERFCTR_MAIN_TLB_MISS = 0xF,
- ARMV6_PERFCTR_EXPL_D_ACCESS = 0x10,
- ARMV6_PERFCTR_LSU_FULL_STALL = 0x11,
- ARMV6_PERFCTR_WBUF_DRAINED = 0x12,
- ARMV6_PERFCTR_CPU_CYCLES = 0xFF,
- ARMV6_PERFCTR_NOP = 0x20,
-};
-
-enum armv6_counters {
- ARMV6_CYCLE_COUNTER = 1,
- ARMV6_COUNTER0,
- ARMV6_COUNTER1,
-};
-
-/*
- * The hardware events that we support. We do support cache operations but
- * we have harvard caches and no way to combine instruction and data
- * accesses/misses in hardware.
- */
-static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = {
- [PERF_COUNT_HW_CPU_CYCLES] = ARMV6_PERFCTR_CPU_CYCLES,
- [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6_PERFCTR_INSTR_EXEC,
- [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED,
- [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED,
- [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6_PERFCTR_BR_EXEC,
- [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6_PERFCTR_BR_MISPREDICT,
- [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
-};
-
-static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
- [PERF_COUNT_HW_CACHE_OP_MAX]
- [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
- [C(L1D)] = {
- /*
- * The performance counters don't differentiate between read
- * and write accesses/misses so this isn't strictly correct,
- * but it's the best we can do. Writes and reads get
- * combined.
- */
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS,
- [C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS,
- [C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(L1I)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(LL)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(DTLB)] = {
- /*
- * The ARM performance counters can count micro DTLB misses,
- * micro ITLB misses and main TLB misses. There isn't an event
- * for TLB misses, so use the micro misses here and if users
- * want the main TLB misses they can use a raw counter.
- */
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(ITLB)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(BPU)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
-};
-
-enum armv6mpcore_perf_types {
- ARMV6MPCORE_PERFCTR_ICACHE_MISS = 0x0,
- ARMV6MPCORE_PERFCTR_IBUF_STALL = 0x1,
- ARMV6MPCORE_PERFCTR_DDEP_STALL = 0x2,
- ARMV6MPCORE_PERFCTR_ITLB_MISS = 0x3,
- ARMV6MPCORE_PERFCTR_DTLB_MISS = 0x4,
- ARMV6MPCORE_PERFCTR_BR_EXEC = 0x5,
- ARMV6MPCORE_PERFCTR_BR_NOTPREDICT = 0x6,
- ARMV6MPCORE_PERFCTR_BR_MISPREDICT = 0x7,
- ARMV6MPCORE_PERFCTR_INSTR_EXEC = 0x8,
- ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS = 0xA,
- ARMV6MPCORE_PERFCTR_DCACHE_RDMISS = 0xB,
- ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS = 0xC,
- ARMV6MPCORE_PERFCTR_DCACHE_WRMISS = 0xD,
- ARMV6MPCORE_PERFCTR_DCACHE_EVICTION = 0xE,
- ARMV6MPCORE_PERFCTR_SW_PC_CHANGE = 0xF,
- ARMV6MPCORE_PERFCTR_MAIN_TLB_MISS = 0x10,
- ARMV6MPCORE_PERFCTR_EXPL_MEM_ACCESS = 0x11,
- ARMV6MPCORE_PERFCTR_LSU_FULL_STALL = 0x12,
- ARMV6MPCORE_PERFCTR_WBUF_DRAINED = 0x13,
- ARMV6MPCORE_PERFCTR_CPU_CYCLES = 0xFF,
-};
-
-/*
- * The hardware events that we support. We do support cache operations but
- * we have harvard caches and no way to combine instruction and data
- * accesses/misses in hardware.
- */
-static const unsigned armv6mpcore_perf_map[PERF_COUNT_HW_MAX] = {
- [PERF_COUNT_HW_CPU_CYCLES] = ARMV6MPCORE_PERFCTR_CPU_CYCLES,
- [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_INSTR_EXEC,
- [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED,
- [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED,
- [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_BR_EXEC,
- [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6MPCORE_PERFCTR_BR_MISPREDICT,
- [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
-};
-
-static const unsigned armv6mpcore_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
- [PERF_COUNT_HW_CACHE_OP_MAX]
- [PERF_COUNT_HW_CACHE_RESULT_MAX] = {
- [C(L1D)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] =
- ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS,
- [C(RESULT_MISS)] =
- ARMV6MPCORE_PERFCTR_DCACHE_RDMISS,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] =
- ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS,
- [C(RESULT_MISS)] =
- ARMV6MPCORE_PERFCTR_DCACHE_WRMISS,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(L1I)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(LL)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(DTLB)] = {
- /*
- * The ARM performance counters can count micro DTLB misses,
- * micro ITLB misses and main TLB misses. There isn't an event
- * for TLB misses, so use the micro misses here and if users
- * want the main TLB misses they can use a raw counter.
- */
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(ITLB)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
- [C(BPU)] = {
- [C(OP_READ)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_WRITE)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- [C(OP_PREFETCH)] = {
- [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
- [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
- },
- },
-};
-
-static inline unsigned long
-armv6_pmcr_read(void)
-{
- u32 val;
- asm volatile("mrc p15, 0, %0, c15, c12, 0" : "=r"(val));
- return val;
-}
-
-static inline void
-armv6_pmcr_write(unsigned long val)
-{
- asm volatile("mcr p15, 0, %0, c15, c12, 0" : : "r"(val));
-}
-
-#define ARMV6_PMCR_ENABLE (1 << 0)
-#define ARMV6_PMCR_CTR01_RESET (1 << 1)
-#define ARMV6_PMCR_CCOUNT_RESET (1 << 2)
-#define ARMV6_PMCR_CCOUNT_DIV (1 << 3)
-#define ARMV6_PMCR_COUNT0_IEN (1 << 4)
-#define ARMV6_PMCR_COUNT1_IEN (1 << 5)
-#define ARMV6_PMCR_CCOUNT_IEN (1 << 6)
-#define ARMV6_PMCR_COUNT0_OVERFLOW (1 << 8)
-#define ARMV6_PMCR_COUNT1_OVERFLOW (1 << 9)
-#define ARMV6_PMCR_CCOUNT_OVERFLOW (1 << 10)
-#define ARMV6_PMCR_EVT_COUNT0_SHIFT 20
-#define ARMV6_PMCR_EVT_COUNT0_MASK (0xFF << ARMV6_PMCR_EVT_COUNT0_SHIFT)
-#define ARMV6_PMCR_EVT_COUNT1_SHIFT 12
-#define ARMV6_PMCR_EVT_COUNT1_MASK (0xFF << ARMV6_PMCR_EVT_COUNT1_SHIFT)
-
-#define ARMV6_PMCR_OVERFLOWED_MASK \
- (ARMV6_PMCR_COUNT0_OVERFLOW | ARMV6_PMCR_COUNT1_OVERFLOW | \
- ARMV6_PMCR_CCOUNT_OVERFLOW)
-
-static inline int
-armv6_pmcr_has_overflowed(unsigned long pmcr)
-{
- return (pmcr & ARMV6_PMCR_OVERFLOWED_MASK);
-}
-
-static inline int
-armv6_pmcr_counter_has_overflowed(unsigned long pmcr,
- enum armv6_counters counter)
-{
- int ret = 0;
-
- if (ARMV6_CYCLE_COUNTER == counter)
- ret = pmcr & ARMV6_PMCR_CCOUNT_OVERFLOW;
- else if (ARMV6_COUNTER0 == counter)
- ret = pmcr & ARMV6_PMCR_COUNT0_OVERFLOW;
- else if (ARMV6_COUNTER1 == counter)
- ret = pmcr & ARMV6_PMCR_COUNT1_OVERFLOW;
- else
- WARN_ONCE(1, "invalid counter number (%d)\n", counter);
-
- return ret;
-}
-
-static inline u32
-armv6pmu_read_counter(int counter)
-{
- unsigned long value = 0;
-
- if (ARMV6_CYCLE_COUNTER == counter)
- asm volatile("mrc p15, 0, %0, c15, c12, 1" : "=r"(value));
- else if (ARMV6_COUNTER0 == counter)
- asm volatile("mrc p15, 0, %0, c15, c12, 2" : "=r"(value));
- else if (ARMV6_COUNTER1 == counter)
- asm volatile("mrc p15, 0, %0, c15, c12, 3" : "=r"(value));
- else
- WARN_ONCE(1, "invalid counter number (%d)\n", counter);
-
- return value;
-}
-
-static inline void
-armv6pmu_write_counter(int counter,
- u32 value)
-{
- if (ARMV6_CYCLE_COUNTER == counter)
- asm volatile("mcr p15, 0, %0, c15, c12, 1" : : "r"(value));
- else if (ARMV6_COUNTER0 == counter)
- asm volatile("mcr p15, 0, %0, c15, c12, 2" : : "r"(value));
- else if (ARMV6_COUNTER1 == counter)
- asm volatile("mcr p15, 0, %0, c15, c12, 3" : : "r"(value));
- else
- WARN_ONCE(1, "invalid counter number (%d)\n", counter);
-}
-
-void
-armv6pmu_enable_event(struct hw_perf_event *hwc,
- int idx)
-{
- unsigned long val, mask, evt, flags;
-
- if (ARMV6_CYCLE_COUNTER == idx) {
- mask = 0;
- evt = ARMV6_PMCR_CCOUNT_IEN;
- } else if (ARMV6_COUNTER0 == idx) {
- mask = ARMV6_PMCR_EVT_COUNT0_MASK;
- evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT0_SHIFT) |
- ARMV6_PMCR_COUNT0_IEN;
- } else if (ARMV6_COUNTER1 == idx) {
- mask = ARMV6_PMCR_EVT_COUNT1_MASK;
- evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT1_SHIFT) |
- ARMV6_PMCR_COUNT1_IEN;
- } else {
- WARN_ONCE(1, "invalid counter number (%d)\n", idx);
- return;
- }
-
- /*
- * Mask out the current event and set the counter to count the event
- * that we're interested in.
- */
- spin_lock_irqsave(&pmu_lock, flags);
- val = armv6_pmcr_read();
- val &= ~mask;
- val |= evt;
- armv6_pmcr_write(val);
- spin_unlock_irqrestore(&pmu_lock, flags);
-}
-
-static irqreturn_t
-armv6pmu_handle_irq(int irq_num,
- void *dev)
-{
- unsigned long pmcr = armv6_pmcr_read();
- struct perf_sample_data data;
- struct cpu_hw_events *cpuc;
- struct pt_regs *regs;
- int idx;
-
- if (!armv6_pmcr_has_overflowed(pmcr))
- return IRQ_NONE;
-
- regs = get_irq_regs();
-
- /*
- * The interrupts are cleared by writing the overflow flags back to
- * the control register. All of the other bits don't have any effect
- * if they are rewritten, so write the whole value back.
- */
- armv6_pmcr_write(pmcr);
-
- perf_sample_data_init(&data, 0);
-
- cpuc = &__get_cpu_var(cpu_hw_events);
- for (idx = 0; idx <= armpmu->num_events; ++idx) {
- struct perf_event *event = cpuc->events[idx];
- struct hw_perf_event *hwc;
-
- if (!test_bit(idx, cpuc->active_mask))
- continue;
-
- /*
- * We have a single interrupt for all counters. Check that
- * each counter has overflowed before we process it.
- */
- if (!armv6_pmcr_counter_has_overflowed(pmcr, idx))
- continue;
-
- hwc = &event->hw;
- armpmu_event_update(event, hwc, idx);
- data.period = event->hw.last_period;
- if (!armpmu_event_set_period(event, hwc, idx))
- continue;
-
- if (perf_event_overflow(event, 0, &data, regs))
- armpmu->disable(hwc, idx);
- }
-
- /*
- * Handle the pending perf events.
- *
- * Note: this call *must* be run with interrupts disabled. For
- * platforms that can have the PMU interrupts raised as an NMI, this
- * will not work.
- */
- irq_work_run();
-
- return IRQ_HANDLED;
-}
-
-static void
-armv6pmu_start(void)
-{
- unsigned long flags, val;
-
- spin_lock_irqsave(&pmu_lock, flags);
- val = armv6_pmcr_read();
- val |= ARMV6_PMCR_ENABLE;
- armv6_pmcr_write(val);