From 247055aa21ffef1c49dd64710d5e94c2aee19b58 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 13 Sep 2010 16:03:21 +0100 Subject: ARM: 6384/1: Remove the domain switching on ARMv6k/v7 CPUs This patch removes the domain switching functionality via the set_fs and __switch_to functions on cores that have a TLS register. Currently, the ioremap and vmalloc areas share the same level 1 page tables and therefore have the same domain (DOMAIN_KERNEL). When the kernel domain is modified from Client to Manager (via the __set_fs or in the __switch_to function), the XN (eXecute Never) bit is overridden and newer CPUs can speculatively prefetch the ioremap'ed memory. Linux performs the kernel domain switching to allow user-specific functions (copy_to/from_user, get/put_user etc.) to access kernel memory. In order for these functions to work with the kernel domain set to Client, the patch modifies the LDRT/STRT and related instructions to the LDR/STR ones. The user pages access rights are also modified for kernel read-only access rather than read/write so that the copy-on-write mechanism still works. CPU_USE_DOMAINS gets disabled only if the hardware has a TLS register (CPU_32v6K is defined) since writing the TLS value to the high vectors page isn't possible. The user addresses passed to the kernel are checked by the access_ok() function so that they do not point to the kernel space. Tested-by: Anton Vorontsov Cc: Tony Lindgren Signed-off-by: Catalin Marinas Signed-off-by: Russell King --- arch/arm/kernel/entry-armv.S | 4 ++-- arch/arm/kernel/fiq.c | 5 +++++ arch/arm/kernel/traps.c | 14 ++++++++++---- 3 files changed, 17 insertions(+), 6 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index c09e3573c5d..35f3f20d673 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -735,7 +735,7 @@ ENTRY(__switch_to) THUMB( stmia ip!, {r4 - sl, fp} ) @ Store most regs on stack THUMB( str sp, [ip], #4 ) THUMB( str lr, [ip], #4 ) -#ifdef CONFIG_MMU +#ifdef CONFIG_CPU_USE_DOMAINS ldr r6, [r2, #TI_CPU_DOMAIN] #endif set_tls r3, r4, r5 @@ -744,7 +744,7 @@ ENTRY(__switch_to) ldr r8, =__stack_chk_guard ldr r7, [r7, #TSK_STACK_CANARY] #endif -#ifdef CONFIG_MMU +#ifdef CONFIG_CPU_USE_DOMAINS mcr p15, 0, r6, c3, c0, 0 @ Set domain register #endif mov r5, r0 diff --git a/arch/arm/kernel/fiq.c b/arch/arm/kernel/fiq.c index 6ff7919613d..d601ef297eb 100644 --- a/arch/arm/kernel/fiq.c +++ b/arch/arm/kernel/fiq.c @@ -45,6 +45,7 @@ #include #include #include +#include static unsigned long no_fiq_insn; @@ -77,7 +78,11 @@ int show_fiq_list(struct seq_file *p, void *v) void set_fiq_handler(void *start, unsigned int length) { +#if defined(CONFIG_CPU_USE_DOMAINS) memcpy((void *)0xffff001c, start, length); +#else + memcpy(vectors_page + 0x1c, start, length); +#endif flush_icache_range(0xffff001c, 0xffff001c + length); if (!vectors_high()) flush_icache_range(0x1c, 0x1c + length); diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index cda78d59aa3..87abca01805 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -37,6 +37,8 @@ static const char *handler[]= { "prefetch abort", "data abort", "address exception", "interrupt" }; +void *vectors_page; + #ifdef CONFIG_DEBUG_USER unsigned int user_debug; @@ -759,7 +761,11 @@ static void __init kuser_get_tls_init(unsigned long vectors) void __init early_trap_init(void) { +#if defined(CONFIG_CPU_USE_DOMAINS) unsigned long vectors = CONFIG_VECTORS_BASE; +#else + unsigned long vectors = (unsigned long)vectors_page; +#endif extern char __stubs_start[], __stubs_end[]; extern char __vectors_start[], __vectors_end[]; extern char __kuser_helper_start[], __kuser_helper_end[]; @@ -783,10 +789,10 @@ void __init early_trap_init(void) * Copy signal return handlers into the vector page, and * set sigreturn to be a pointer to these. */ - memcpy((void *)KERN_SIGRETURN_CODE, sigreturn_codes, - sizeof(sigreturn_codes)); - memcpy((void *)KERN_RESTART_CODE, syscall_restart_code, - sizeof(syscall_restart_code)); + memcpy((void *)(vectors + KERN_SIGRETURN_CODE - CONFIG_VECTORS_BASE), + sigreturn_codes, sizeof(sigreturn_codes)); + memcpy((void *)(vectors + KERN_RESTART_CODE - CONFIG_VECTORS_BASE), + syscall_restart_code, sizeof(syscall_restart_code)); flush_icache_range(vectors, vectors + PAGE_SIZE); modify_domain(DOMAIN_USER, DOMAIN_CLIENT); -- cgit v1.2.3-18-g5258 From 64d2dc384e41e2b7acead6804593ddaaf8aad8e1 Mon Sep 17 00:00:00 2001 From: Leif Lindholm Date: Thu, 16 Sep 2010 18:00:47 +0100 Subject: ARM: 6396/1: Add SWP/SWPB emulation for ARMv7 processors The SWP instruction was deprecated in the ARMv6 architecture, superseded by the LDREX/STREX family of instructions for load-linked/store-conditional operations. The ARMv7 multiprocessing extensions mandate that SWP/SWPB instructions are treated as undefined from reset, with the ability to enable them through the System Control Register SW bit. This patch adds the alternative solution to emulate the SWP and SWPB instructions using LDREX/STREX sequences, and log statistics to /proc/cpu/swp_emulation. To correctly deal with copy-on-write, it also modifies cpu_v7_set_pte_ext to change the mappings to priviliged RO when user RO. Signed-off-by: Leif Lindholm Acked-by: Catalin Marinas Acked-by: Kirill A. Shutemov Signed-off-by: Russell King --- arch/arm/kernel/Makefile | 1 + arch/arm/kernel/swp_emulate.c | 267 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 268 insertions(+) create mode 100644 arch/arm/kernel/swp_emulate.c (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 5b9b268f4fb..119a6bb59bd 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -42,6 +42,7 @@ obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_ARM_UNWIND) += unwind.o obj-$(CONFIG_HAVE_TCM) += tcm.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o +obj-$(CONFIG_SWP_EMULATE) += swp_emulate.o obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_CRUNCH) += crunch.o crunch-bits.o diff --git a/arch/arm/kernel/swp_emulate.c b/arch/arm/kernel/swp_emulate.c new file mode 100644 index 00000000000..7a576092291 --- /dev/null +++ b/arch/arm/kernel/swp_emulate.c @@ -0,0 +1,267 @@ +/* + * linux/arch/arm/kernel/swp_emulate.c + * + * Copyright (C) 2009 ARM Limited + * __user_* functions adapted from include/asm/uaccess.h + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Implements emulation of the SWP/SWPB instructions using load-exclusive and + * store-exclusive for processors that have them disabled (or future ones that + * might not implement them). + * + * Syntax of SWP{B} instruction: SWP{B} , , [] + * Where: Rt = destination + * Rt2 = source + * Rn = address + */ + +#include +#include +#include +#include +#include +#include + +#include +#include + +/* + * Error-checking SWP macros implemented using ldrex{b}/strex{b} + */ +#define __user_swpX_asm(data, addr, res, temp, B) \ + __asm__ __volatile__( \ + " mov %2, %1\n" \ + "0: ldrex"B" %1, [%3]\n" \ + "1: strex"B" %0, %2, [%3]\n" \ + " cmp %0, #0\n" \ + " movne %0, %4\n" \ + "2:\n" \ + " .section .fixup,\"ax\"\n" \ + " .align 2\n" \ + "3: mov %0, %5\n" \ + " b 2b\n" \ + " .previous\n" \ + " .section __ex_table,\"a\"\n" \ + " .align 3\n" \ + " .long 0b, 3b\n" \ + " .long 1b, 3b\n" \ + " .previous" \ + : "=&r" (res), "+r" (data), "=&r" (temp) \ + : "r" (addr), "i" (-EAGAIN), "i" (-EFAULT) \ + : "cc", "memory") + +#define __user_swp_asm(data, addr, res, temp) \ + __user_swpX_asm(data, addr, res, temp, "") +#define __user_swpb_asm(data, addr, res, temp) \ + __user_swpX_asm(data, addr, res, temp, "b") + +/* + * Macros/defines for extracting register numbers from instruction. + */ +#define EXTRACT_REG_NUM(instruction, offset) \ + (((instruction) & (0xf << (offset))) >> (offset)) +#define RN_OFFSET 16 +#define RT_OFFSET 12 +#define RT2_OFFSET 0 +/* + * Bit 22 of the instruction encoding distinguishes between + * the SWP and SWPB variants (bit set means SWPB). + */ +#define TYPE_SWPB (1 << 22) + +static unsigned long swpcounter; +static unsigned long swpbcounter; +static unsigned long abtcounter; +static pid_t previous_pid; + +#ifdef CONFIG_PROC_FS +static int proc_read_status(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + char *p = page; + int len; + + p += sprintf(p, "Emulated SWP:\t\t%lu\n", swpcounter); + p += sprintf(p, "Emulated SWPB:\t\t%lu\n", swpbcounter); + p += sprintf(p, "Aborted SWP{B}:\t\t%lu\n", abtcounter); + if (previous_pid != 0) + p += sprintf(p, "Last process:\t\t%d\n", previous_pid); + + len = (p - page) - off; + if (len < 0) + len = 0; + + *eof = (len <= count) ? 1 : 0; + *start = page + off; + + return len; +} +#endif + +/* + * Set up process info to signal segmentation fault - called on access error. + */ +static void set_segfault(struct pt_regs *regs, unsigned long addr) +{ + siginfo_t info; + + if (find_vma(current->mm, addr) == NULL) + info.si_code = SEGV_MAPERR; + else + info.si_code = SEGV_ACCERR; + + info.si_signo = SIGSEGV; + info.si_errno = 0; + info.si_addr = (void *) instruction_pointer(regs); + + pr_debug("SWP{B} emulation: access caused memory abort!\n"); + arm_notify_die("Illegal memory access", regs, &info, 0, 0); + + abtcounter++; +} + +static int emulate_swpX(unsigned int address, unsigned int *data, + unsigned int type) +{ + unsigned int res = 0; + + if ((type != TYPE_SWPB) && (address & 0x3)) { + /* SWP to unaligned address not permitted */ + pr_debug("SWP instruction on unaligned pointer!\n"); + return -EFAULT; + } + + while (1) { + unsigned long temp; + + /* + * Barrier required between accessing protected resource and + * releasing a lock for it. Legacy code might not have done + * this, and we cannot determine that this is not the case + * being emulated, so insert always. + */ + smp_mb(); + + if (type == TYPE_SWPB) + __user_swpb_asm(*data, address, res, temp); + else + __user_swp_asm(*data, address, res, temp); + + if (likely(res != -EAGAIN) || signal_pending(current)) + break; + + cond_resched(); + } + + if (res == 0) { + /* + * Barrier also required between aquiring a lock for a + * protected resource and accessing the resource. Inserted for + * same reason as above. + */ + smp_mb(); + + if (type == TYPE_SWPB) + swpbcounter++; + else + swpcounter++; + } + + return res; +} + +/* + * swp_handler logs the id of calling process, dissects the instruction, sanity + * checks the memory location, calls emulate_swpX for the actual operation and + * deals with fixup/error handling before returning + */ +static int swp_handler(struct pt_regs *regs, unsigned int instr) +{ + unsigned int address, destreg, data, type; + unsigned int res = 0; + + perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, 0, regs, regs->ARM_pc); + + if (current->pid != previous_pid) { + pr_debug("\"%s\" (%ld) uses deprecated SWP{B} instruction\n", + current->comm, (unsigned long)current->pid); + previous_pid = current->pid; + } + + address = regs->uregs[EXTRACT_REG_NUM(instr, RN_OFFSET)]; + data = regs->uregs[EXTRACT_REG_NUM(instr, RT2_OFFSET)]; + destreg = EXTRACT_REG_NUM(instr, RT_OFFSET); + + type = instr & TYPE_SWPB; + + pr_debug("addr in r%d->0x%08x, dest is r%d, source in r%d->0x%08x)\n", + EXTRACT_REG_NUM(instr, RN_OFFSET), address, + destreg, EXTRACT_REG_NUM(instr, RT2_OFFSET), data); + + /* Check access in reasonable access range for both SWP and SWPB */ + if (!access_ok(VERIFY_WRITE, (address & ~3), 4)) { + pr_debug("SWP{B} emulation: access to %p not allowed!\n", + (void *)address); + res = -EFAULT; + } else { + res = emulate_swpX(address, &data, type); + } + + if (res == 0) { + /* + * On successful emulation, revert the adjustment to the PC + * made in kernel/traps.c in order to resume execution at the + * instruction following the SWP{B}. + */ + regs->ARM_pc += 4; + regs->uregs[destreg] = data; + } else if (res == -EFAULT) { + /* + * Memory errors do not mean emulation failed. + * Set up signal info to return SEGV, then return OK + */ + set_segfault(regs, address); + } + + return 0; +} + +/* + * Only emulate SWP/SWPB executed in ARM state/User mode. + * The kernel must be SWP free and SWP{B} does not exist in Thumb/ThumbEE. + */ +static struct undef_hook swp_hook = { + .instr_mask = 0x0fb00ff0, + .instr_val = 0x01000090, + .cpsr_mask = MODE_MASK | PSR_T_BIT | PSR_J_BIT, + .cpsr_val = USR_MODE, + .fn = swp_handler +}; + +/* + * Register handler and create status file in /proc/cpu + * Invoked as late_initcall, since not needed before init spawned. + */ +static int __init swp_emulation_init(void) +{ +#ifdef CONFIG_PROC_FS + struct proc_dir_entry *res; + + res = create_proc_entry("cpu/swp_emulation", S_IRUGO, NULL); + + if (!res) + return -ENOMEM; + + res->read_proc = proc_read_status; +#endif /* CONFIG_PROC_FS */ + + printk(KERN_NOTICE "Registering SWP/SWPB emulation handler\n"); + register_undef_hook(&swp_hook); + + return 0; +} + +late_initcall(swp_emulation_init); -- cgit v1.2.3-18-g5258 From 61b5cb1c3bff8875d2fd289c7b6ac344f95261fa Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Thu, 7 Oct 2010 20:51:58 +0530 Subject: ARM: place C irq handlers in IRQ_ENTRY for ftrace When FUNCTION_GRAPH_TRACER is enabled, place do_IRQ() and friends in the IRQ_ENTRY section so that the irq-related features of the function graph tracer work. Signed-off-by: Rabin Vincent --- arch/arm/kernel/irq.c | 4 +++- arch/arm/kernel/smp.c | 5 +++-- arch/arm/kernel/vmlinux.lds.S | 1 + 3 files changed, 7 insertions(+), 3 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c index 36ad3be4692..6d616333340 100644 --- a/arch/arm/kernel/irq.c +++ b/arch/arm/kernel/irq.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -105,7 +106,8 @@ unlock: * come via this function. Instead, they should provide their * own 'handler' */ -asmlinkage void __exception asm_do_IRQ(unsigned int irq, struct pt_regs *regs) +asmlinkage void __exception_irq_entry +asm_do_IRQ(unsigned int irq, struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 8c195959025..bbca89872c1 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -457,7 +458,7 @@ static void ipi_timer(void) } #ifdef CONFIG_LOCAL_TIMERS -asmlinkage void __exception do_local_timer(struct pt_regs *regs) +asmlinkage void __exception_irq_entry do_local_timer(struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); int cpu = smp_processor_id(); @@ -544,7 +545,7 @@ static void ipi_cpu_stop(unsigned int cpu) * * Bit 0 - Inter-processor function call */ -asmlinkage void __exception do_IPI(struct pt_regs *regs) +asmlinkage void __exception_irq_entry do_IPI(struct pt_regs *regs) { unsigned int cpu = smp_processor_id(); struct ipi_data *ipi = &per_cpu(ipi_data, cpu); diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index cead8893b46..897c1a8f169 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -101,6 +101,7 @@ SECTIONS __exception_text_start = .; *(.exception.text) __exception_text_end = .; + IRQENTRY_TEXT TEXT_TEXT SCHED_TEXT LOCK_TEXT -- cgit v1.2.3-18-g5258 From d3b9dc9dd2b994f396741f7086ffe7a48bacb165 Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Thu, 7 Oct 2010 17:39:47 +0530 Subject: ARM: ftrace: use gas macros to avoid code duplication Use assembler macros to avoid copy/pasting code between the implementations of the two variants of the mcount call. Signed-off-by: Rabin Vincent --- arch/arm/kernel/entry-common.S | 146 ++++++++++++++++++++++------------------- 1 file changed, 80 insertions(+), 66 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 8bfa98757cd..fe1d5862b19 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -141,98 +141,112 @@ ENDPROC(ret_from_fork) #endif #endif -#ifdef CONFIG_DYNAMIC_FTRACE -ENTRY(__gnu_mcount_nc) - mov ip, lr - ldmia sp!, {lr} - mov pc, ip -ENDPROC(__gnu_mcount_nc) +.macro __mcount suffix + mcount_enter + ldr r0, =ftrace_trace_function + ldr r2, [r0] + adr r0, .Lftrace_stub + cmp r0, r2 + bne 1f + mcount_exit -ENTRY(ftrace_caller) - stmdb sp!, {r0-r3, lr} - mov r0, lr +1: mcount_get_lr r1 @ lr of instrumented func + mov r0, lr @ instrumented function + sub r0, r0, #MCOUNT_INSN_SIZE + adr lr, BSYM(2f) + mov pc, r2 +2: mcount_exit +.endm + +.macro __ftrace_caller suffix + mcount_enter + + mcount_get_lr r1 @ lr of instrumented func + mov r0, lr @ instrumented function sub r0, r0, #MCOUNT_INSN_SIZE - ldr r1, [sp, #20] - .global ftrace_call -ftrace_call: + .globl ftrace_call\suffix +ftrace_call\suffix: bl ftrace_stub - ldmia sp!, {r0-r3, ip, lr} - mov pc, ip -ENDPROC(ftrace_caller) + + mcount_exit +.endm #ifdef CONFIG_OLD_MCOUNT +/* + * mcount + */ + +.macro mcount_enter + stmdb sp!, {r0-r3, lr} +.endm + +.macro mcount_get_lr reg + ldr \reg, [fp, #-4] +.endm + +.macro mcount_exit + ldr lr, [fp, #-4] + ldmia sp!, {r0-r3, pc} +.endm + ENTRY(mcount) +#ifdef CONFIG_DYNAMIC_FTRACE stmdb sp!, {lr} ldr lr, [fp, #-4] ldmia sp!, {pc} +#else + __mcount _old +#endif ENDPROC(mcount) +#ifdef CONFIG_DYNAMIC_FTRACE ENTRY(ftrace_caller_old) - stmdb sp!, {r0-r3, lr} - ldr r1, [fp, #-4] - mov r0, lr - sub r0, r0, #MCOUNT_INSN_SIZE - - .globl ftrace_call_old -ftrace_call_old: - bl ftrace_stub - ldr lr, [fp, #-4] @ restore lr - ldmia sp!, {r0-r3, pc} + __ftrace_caller _old ENDPROC(ftrace_caller_old) #endif -#else +.purgem mcount_enter +.purgem mcount_get_lr +.purgem mcount_exit +#endif -ENTRY(__gnu_mcount_nc) +/* + * __gnu_mcount_nc + */ + +.macro mcount_enter stmdb sp!, {r0-r3, lr} - ldr r0, =ftrace_trace_function - ldr r2, [r0] - adr r0, .Lftrace_stub - cmp r0, r2 - bne gnu_trace +.endm + +.macro mcount_get_lr reg + ldr \reg, [sp, #20] +.endm + +.macro mcount_exit ldmia sp!, {r0-r3, ip, lr} mov pc, ip +.endm -gnu_trace: - ldr r1, [sp, #20] @ lr of instrumented routine - mov r0, lr - sub r0, r0, #MCOUNT_INSN_SIZE - adr lr, BSYM(1f) - mov pc, r2 -1: - ldmia sp!, {r0-r3, ip, lr} +ENTRY(__gnu_mcount_nc) +#ifdef CONFIG_DYNAMIC_FTRACE + mov ip, lr + ldmia sp!, {lr} mov pc, ip +#else + __mcount +#endif ENDPROC(__gnu_mcount_nc) -#ifdef CONFIG_OLD_MCOUNT -/* - * This is under an ifdef in order to force link-time errors for people trying - * to build with !FRAME_POINTER with a GCC which doesn't use the new-style - * mcount. - */ -ENTRY(mcount) - stmdb sp!, {r0-r3, lr} - ldr r0, =ftrace_trace_function - ldr r2, [r0] - adr r0, ftrace_stub - cmp r0, r2 - bne trace - ldr lr, [fp, #-4] @ restore lr - ldmia sp!, {r0-r3, pc} - -trace: - ldr r1, [fp, #-4] @ lr of instrumented routine - mov r0, lr - sub r0, r0, #MCOUNT_INSN_SIZE - mov lr, pc - mov pc, r2 - ldr lr, [fp, #-4] @ restore lr - ldmia sp!, {r0-r3, pc} -ENDPROC(mcount) +#ifdef CONFIG_DYNAMIC_FTRACE +ENTRY(ftrace_caller) + __ftrace_caller +ENDPROC(ftrace_caller) #endif -#endif /* CONFIG_DYNAMIC_FTRACE */ +.purgem mcount_enter +.purgem mcount_get_lr +.purgem mcount_exit ENTRY(ftrace_stub) .Lftrace_stub: -- cgit v1.2.3-18-g5258 From 376cfa8730c08c0394d0aa1d4a80fd8c9971f323 Mon Sep 17 00:00:00 2001 From: Tim Bird Date: Sat, 9 Oct 2010 22:24:38 +0530 Subject: ARM: ftrace: function graph tracer support Cc: Tim Bird [rabin@rab.in: rebase on top of latest code, keep code in ftrace.c instead of separate file, check for ftrace_graph_entry also] Signed-off-by: Rabin Vincent --- arch/arm/kernel/Makefile | 3 ++- arch/arm/kernel/entry-common.S | 46 ++++++++++++++++++++++++++++++++++++++++++ arch/arm/kernel/ftrace.c | 34 +++++++++++++++++++++++++++++++ 3 files changed, 82 insertions(+), 1 deletion(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 5b9b268f4fb..679851a9f58 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -5,7 +5,7 @@ CPPFLAGS_vmlinux.lds := -DTEXT_OFFSET=$(TEXT_OFFSET) AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET) -ifdef CONFIG_DYNAMIC_FTRACE +ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_ftrace.o = -pg endif @@ -33,6 +33,7 @@ obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_HAVE_ARM_SCU) += smp_scu.o obj-$(CONFIG_HAVE_ARM_TWD) += smp_twd.o obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o +obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o obj-$(CONFIG_KPROBES) += kprobes.o kprobes-decode.o obj-$(CONFIG_ATAGS_PROC) += atags.o diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index fe1d5862b19..9f176621166 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -148,6 +148,20 @@ ENDPROC(ret_from_fork) adr r0, .Lftrace_stub cmp r0, r2 bne 1f + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + ldr r1, =ftrace_graph_return + ldr r2, [r1] + cmp r0, r2 + bne ftrace_graph_caller\suffix + + ldr r1, =ftrace_graph_entry + ldr r2, [r1] + ldr r0, =ftrace_graph_entry_stub + cmp r0, r2 + bne ftrace_graph_caller\suffix +#endif + mcount_exit 1: mcount_get_lr r1 @ lr of instrumented func @@ -172,6 +186,15 @@ ftrace_call\suffix: mcount_exit .endm +.macro __ftrace_graph_caller + sub r0, fp, #4 @ &lr of instrumented routine (&parent) + mov r1, lr @ instrumented routine (func) + sub r1, r1, #MCOUNT_INSN_SIZE + mov r2, fp @ frame pointer + bl prepare_ftrace_return + mcount_exit +.endm + #ifdef CONFIG_OLD_MCOUNT /* * mcount @@ -206,6 +229,12 @@ ENTRY(ftrace_caller_old) ENDPROC(ftrace_caller_old) #endif +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +ENTRY(ftrace_graph_caller_old) + __ftrace_graph_caller +ENDPROC(ftrace_graph_caller_old) +#endif + .purgem mcount_enter .purgem mcount_get_lr .purgem mcount_exit @@ -244,10 +273,27 @@ ENTRY(ftrace_caller) ENDPROC(ftrace_caller) #endif +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +ENTRY(ftrace_graph_caller) + __ftrace_graph_caller +ENDPROC(ftrace_graph_caller) +#endif + .purgem mcount_enter .purgem mcount_get_lr .purgem mcount_exit +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + .globl return_to_handler +return_to_handler: + stmdb sp!, {r0-r3} + mov r0, fp @ frame pointer + bl ftrace_return_to_handler + mov lr, r0 @ r0 has real ret addr + ldmia sp!, {r0-r3} + mov pc, lr +#endif + ENTRY(ftrace_stub) .Lftrace_stub: mov pc, lr diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c index 971ac8c36ea..7a702a50287 100644 --- a/arch/arm/kernel/ftrace.c +++ b/arch/arm/kernel/ftrace.c @@ -24,6 +24,7 @@ #define NOP 0xe8bd4000 /* pop {lr} */ #endif +#ifdef CONFIG_DYNAMIC_FTRACE #ifdef CONFIG_OLD_MCOUNT #define OLD_MCOUNT_ADDR ((unsigned long) mcount) #define OLD_FTRACE_ADDR ((unsigned long) ftrace_caller_old) @@ -193,3 +194,36 @@ int __init ftrace_dyn_arch_init(void *data) return 0; } +#endif /* CONFIG_DYNAMIC_FTRACE */ + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, + unsigned long frame_pointer) +{ + unsigned long return_hooker = (unsigned long) &return_to_handler; + struct ftrace_graph_ent trace; + unsigned long old; + int err; + + if (unlikely(atomic_read(¤t->tracing_graph_pause))) + return; + + old = *parent; + *parent = return_hooker; + + err = ftrace_push_return_trace(old, self_addr, &trace.depth, + frame_pointer); + if (err == -EBUSY) { + *parent = old; + return; + } + + trace.func = self_addr; + + /* Only trace if the calling function expects to */ + if (!ftrace_graph_entry(&trace)) { + current->curr_ret_stack--; + *parent = old; + } +} +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ -- cgit v1.2.3-18-g5258 From dd686eb13959e49a1112fd608c124ab711050582 Mon Sep 17 00:00:00 2001 From: Rabin Vincent Date: Sat, 6 Nov 2010 23:03:21 +0530 Subject: ARM: ftrace: graph tracer + dynamic ftrace Support the graph tracer + dynamic ftrace combination on ARM. Signed-off-by: Rabin Vincent --- arch/arm/kernel/entry-common.S | 12 ++++++++ arch/arm/kernel/ftrace.c | 69 +++++++++++++++++++++++++++++++++++++++--- 2 files changed, 76 insertions(+), 5 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 9f176621166..aae802ee12f 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -183,12 +183,24 @@ ENDPROC(ret_from_fork) ftrace_call\suffix: bl ftrace_stub +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + .globl ftrace_graph_call\suffix +ftrace_graph_call\suffix: + mov r0, r0 +#endif + mcount_exit .endm .macro __ftrace_graph_caller sub r0, fp, #4 @ &lr of instrumented routine (&parent) +#ifdef CONFIG_DYNAMIC_FTRACE + @ called from __ftrace_caller, saved in mcount_enter + ldr r1, [sp, #16] @ instrumented routine (func) +#else + @ called from __mcount, untouched in lr mov r1, lr @ instrumented routine (func) +#endif sub r1, r1, #MCOUNT_INSN_SIZE mov r2, fp @ frame pointer bl prepare_ftrace_return diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c index 7a702a50287..c0062ad1e84 100644 --- a/arch/arm/kernel/ftrace.c +++ b/arch/arm/kernel/ftrace.c @@ -60,9 +60,9 @@ static unsigned long adjust_address(struct dyn_ftrace *rec, unsigned long addr) } #endif -/* construct a branch (BL) instruction to addr */ #ifdef CONFIG_THUMB2_KERNEL -static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr) +static unsigned long ftrace_gen_branch(unsigned long pc, unsigned long addr, + bool link) { unsigned long s, j1, j2, i1, i2, imm10, imm11; unsigned long first, second; @@ -84,15 +84,22 @@ static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr) j2 = (!i2) ^ s; first = 0xf000 | (s << 10) | imm10; - second = 0xd000 | (j1 << 13) | (j2 << 11) | imm11; + second = 0x9000 | (j1 << 13) | (j2 << 11) | imm11; + if (link) + second |= 1 << 14; return (second << 16) | first; } #else -static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr) +static unsigned long ftrace_gen_branch(unsigned long pc, unsigned long addr, + bool link) { + unsigned long opcode = 0xea000000; long offset; + if (link) + opcode |= 1 << 24; + offset = (long)addr - (long)(pc + 8); if (unlikely(offset < -33554432 || offset > 33554428)) { /* Can't generate branches that far (from ARM ARM). Ftrace @@ -104,10 +111,15 @@ static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr) offset = (offset >> 2) & 0x00ffffff; - return 0xeb000000 | offset; + return opcode | offset; } #endif +static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr) +{ + return ftrace_gen_branch(pc, addr, true); +} + static int ftrace_modify_code(unsigned long pc, unsigned long old, unsigned long new) { @@ -226,4 +238,51 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, *parent = old; } } + +#ifdef CONFIG_DYNAMIC_FTRACE +extern unsigned long ftrace_graph_call; +extern unsigned long ftrace_graph_call_old; +extern void ftrace_graph_caller_old(void); + +static int __ftrace_modify_caller(unsigned long *callsite, + void (*func) (void), bool enable) +{ + unsigned long caller_fn = (unsigned long) func; + unsigned long pc = (unsigned long) callsite; + unsigned long branch = ftrace_gen_branch(pc, caller_fn, false); + unsigned long nop = 0xe1a00000; /* mov r0, r0 */ + unsigned long old = enable ? nop : branch; + unsigned long new = enable ? branch : nop; + + return ftrace_modify_code(pc, old, new); +} + +static int ftrace_modify_graph_caller(bool enable) +{ + int ret; + + ret = __ftrace_modify_caller(&ftrace_graph_call, + ftrace_graph_caller, + enable); + +#ifdef CONFIG_OLD_MCOUNT + if (!ret) + ret = __ftrace_modify_caller(&ftrace_graph_call_old, + ftrace_graph_caller_old, + enable); +#endif + + return ret; +} + +int ftrace_enable_ftrace_graph_caller(void) +{ + return ftrace_modify_graph_caller(true); +} + +int ftrace_disable_ftrace_graph_caller(void) +{ + return ftrace_modify_graph_caller(false); +} +#endif /* CONFIG_DYNAMIC_FTRACE */ #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ -- cgit v1.2.3-18-g5258 From 84fee97a026ca085f08381054513f9e24689a303 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Sat, 13 Nov 2010 17:13:56 +0000 Subject: ARM: perf: consolidate common PMU behaviour The functions for mapping PMU events (perf, cache and raw) are common between all PMU types and differ only in the data on which they operate. This patch implements common definitions of these mapping functions and changes the arm_pmu struct to hold pointers to the data which they require. This is in anticipation of separating out the PMU-specific code into separate files. Acked-by: Jamie Iles Acked-by: Jean Pihet Signed-off-by: Will Deacon --- arch/arm/kernel/perf_event.c | 131 +++++++++++++------------------------------ 1 file changed, 38 insertions(+), 93 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 07a50357492..c49e1701a2f 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -84,14 +84,17 @@ struct arm_pmu { irqreturn_t (*handle_irq)(int irq_num, void *dev); void (*enable)(struct hw_perf_event *evt, int idx); void (*disable)(struct hw_perf_event *evt, int idx); - int (*event_map)(int evt); - u64 (*raw_event)(u64); int (*get_event_idx)(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc); u32 (*read_counter)(int idx); void (*write_counter)(int idx, u32 val); void (*start)(void); void (*stop)(void); + const unsigned (*cache_map)[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX]; + const unsigned (*event_map)[PERF_COUNT_HW_MAX]; + u32 raw_event_mask; int num_events; u64 max_period; }; @@ -136,10 +139,6 @@ EXPORT_SYMBOL_GPL(perf_num_counters); #define CACHE_OP_UNSUPPORTED 0xFFFF -static unsigned armpmu_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX]; - static int armpmu_map_cache_event(u64 config) { @@ -157,7 +156,7 @@ armpmu_map_cache_event(u64 config) if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) return -EINVAL; - ret = (int)armpmu_perf_cache_map[cache_type][cache_op][cache_result]; + ret = (int)(*armpmu->cache_map)[cache_type][cache_op][cache_result]; if (ret == CACHE_OP_UNSUPPORTED) return -ENOENT; @@ -165,6 +164,19 @@ armpmu_map_cache_event(u64 config) return ret; } +static int +armpmu_map_event(u64 config) +{ + int mapping = (*armpmu->event_map)[config]; + return mapping == HW_OP_UNSUPPORTED ? -EOPNOTSUPP : mapping; +} + +static int +armpmu_map_raw_event(u64 config) +{ + return (int)(config & armpmu->raw_event_mask); +} + static int armpmu_event_set_period(struct perf_event *event, struct hw_perf_event *hwc, @@ -458,11 +470,11 @@ __hw_perf_event_init(struct perf_event *event) /* Decode the generic type into an ARM event identifier. */ if (PERF_TYPE_HARDWARE == event->attr.type) { - mapping = armpmu->event_map(event->attr.config); + mapping = armpmu_map_event(event->attr.config); } else if (PERF_TYPE_HW_CACHE == event->attr.type) { mapping = armpmu_map_cache_event(event->attr.config); } else if (PERF_TYPE_RAW == event->attr.type) { - mapping = armpmu->raw_event(event->attr.config); + mapping = armpmu_map_raw_event(event->attr.config); } else { pr_debug("event type %x not supported\n", event->attr.type); return -EOPNOTSUPP; @@ -1121,30 +1133,6 @@ armv6pmu_stop(void) spin_unlock_irqrestore(&pmu_lock, flags); } -static inline int -armv6pmu_event_map(int config) -{ - int mapping = armv6_perf_map[config]; - if (HW_OP_UNSUPPORTED == mapping) - mapping = -EOPNOTSUPP; - return mapping; -} - -static inline int -armv6mpcore_pmu_event_map(int config) -{ - int mapping = armv6mpcore_perf_map[config]; - if (HW_OP_UNSUPPORTED == mapping) - mapping = -EOPNOTSUPP; - return mapping; -} - -static u64 -armv6pmu_raw_event(u64 config) -{ - return config & 0xff; -} - static int armv6pmu_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *event) @@ -1240,13 +1228,14 @@ static const struct arm_pmu armv6pmu = { .handle_irq = armv6pmu_handle_irq, .enable = armv6pmu_enable_event, .disable = armv6pmu_disable_event, - .event_map = armv6pmu_event_map, - .raw_event = armv6pmu_raw_event, .read_counter = armv6pmu_read_counter, .write_counter = armv6pmu_write_counter, .get_event_idx = armv6pmu_get_event_idx, .start = armv6pmu_start, .stop = armv6pmu_stop, + .cache_map = &armv6_perf_cache_map, + .event_map = &armv6_perf_map, + .raw_event_mask = 0xFF, .num_events = 3, .max_period = (1LLU << 32) - 1, }; @@ -1263,13 +1252,14 @@ static const struct arm_pmu armv6mpcore_pmu = { .handle_irq = armv6pmu_handle_irq, .enable = armv6pmu_enable_event, .disable = armv6mpcore_pmu_disable_event, - .event_map = armv6mpcore_pmu_event_map, - .raw_event = armv6pmu_raw_event, .read_counter = armv6pmu_read_counter, .write_counter = armv6pmu_write_counter, .get_event_idx = armv6pmu_get_event_idx, .start = armv6pmu_start, .stop = armv6pmu_stop, + .cache_map = &armv6mpcore_perf_cache_map, + .event_map = &armv6mpcore_perf_map, + .raw_event_mask = 0xFF, .num_events = 3, .max_period = (1LLU << 32) - 1, }; @@ -2093,27 +2083,6 @@ static void armv7pmu_stop(void) spin_unlock_irqrestore(&pmu_lock, flags); } -static inline int armv7_a8_pmu_event_map(int config) -{ - int mapping = armv7_a8_perf_map[config]; - if (HW_OP_UNSUPPORTED == mapping) - mapping = -EOPNOTSUPP; - return mapping; -} - -static inline int armv7_a9_pmu_event_map(int config) -{ - int mapping = armv7_a9_perf_map[config]; - if (HW_OP_UNSUPPORTED == mapping) - mapping = -EOPNOTSUPP; - return mapping; -} - -static u64 armv7pmu_raw_event(u64 config) -{ - return config & 0xff; -} - static int armv7pmu_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *event) { @@ -2144,12 +2113,12 @@ static struct arm_pmu armv7pmu = { .handle_irq = armv7pmu_handle_irq, .enable = armv7pmu_enable_event, .disable = armv7pmu_disable_event, - .raw_event = armv7pmu_raw_event, .read_counter = armv7pmu_read_counter, .write_counter = armv7pmu_write_counter, .get_event_idx = armv7pmu_get_event_idx, .start = armv7pmu_start, .stop = armv7pmu_stop, + .raw_event_mask = 0xFF, .max_period = (1LLU << 32) - 1, }; @@ -2318,21 +2287,6 @@ static const unsigned xscale_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] #define XSCALE_PMU_RESET (CCNT_RESET | PMN_RESET) #define XSCALE_PMU_CNT64 0x008 -static inline int -xscalepmu_event_map(int config) -{ - int mapping = xscale_perf_map[config]; - if (HW_OP_UNSUPPORTED == mapping) - mapping = -EOPNOTSUPP; - return mapping; -} - -static u64 -xscalepmu_raw_event(u64 config) -{ - return config & 0xff; -} - #define XSCALE1_OVERFLOWED_MASK 0x700 #define XSCALE1_CCOUNT_OVERFLOW 0x400 #define XSCALE1_COUNT0_OVERFLOW 0x100 @@ -2598,13 +2552,14 @@ static const struct arm_pmu xscale1pmu = { .handle_irq = xscale1pmu_handle_irq, .enable = xscale1pmu_enable_event, .disable = xscale1pmu_disable_event, - .event_map = xscalepmu_event_map, - .raw_event = xscalepmu_raw_event, .read_counter = xscale1pmu_read_counter, .write_counter = xscale1pmu_write_counter, .get_event_idx = xscale1pmu_get_event_idx, .start = xscale1pmu_start, .stop = xscale1pmu_stop, + .cache_map = &xscale_perf_cache_map, + .event_map = &xscale_perf_map, + .raw_event_mask = 0xFF, .num_events = 3, .max_period = (1LLU << 32) - 1, }; @@ -2953,13 +2908,14 @@ static const struct arm_pmu xscale2pmu = { .handle_irq = xscale2pmu_handle_irq, .enable = xscale2pmu_enable_event, .disable = xscale2pmu_disable_event, - .event_map = xscalepmu_event_map, - .raw_event = xscalepmu_raw_event, .read_counter = xscale2pmu_read_counter, .write_counter = xscale2pmu_write_counter, .get_event_idx = xscale2pmu_get_event_idx, .start = xscale2pmu_start, .stop = xscale2pmu_stop, + .cache_map = &xscale_perf_cache_map, + .event_map = &xscale_perf_map, + .raw_event_mask = 0xFF, .num_events = 5, .max_period = (1LLU << 32) - 1, }; @@ -2978,20 +2934,14 @@ init_hw_perf_events(void) case 0xB560: /* ARM1156 */ case 0xB760: /* ARM1176 */ armpmu = &armv6pmu; - memcpy(armpmu_perf_cache_map, armv6_perf_cache_map, - sizeof(armv6_perf_cache_map)); break; case 0xB020: /* ARM11mpcore */ armpmu = &armv6mpcore_pmu; - memcpy(armpmu_perf_cache_map, - armv6mpcore_perf_cache_map, - sizeof(armv6mpcore_perf_cache_map)); break; case 0xC080: /* Cortex-A8 */ armv7pmu.id = ARM_PERF_PMU_ID_CA8; - memcpy(armpmu_perf_cache_map, armv7_a8_perf_cache_map, - sizeof(armv7_a8_perf_cache_map)); - armv7pmu.event_map = armv7_a8_pmu_event_map; + armv7pmu.cache_map = &armv7_a8_perf_cache_map; + armv7pmu.event_map = &armv7_a8_perf_map; armpmu = &armv7pmu; /* Reset PMNC and read the nb of CNTx counters @@ -3000,9 +2950,8 @@ init_hw_perf_events(void) break; case 0xC090: /* Cortex-A9 */ armv7pmu.id = ARM_PERF_PMU_ID_CA9; - memcpy(armpmu_perf_cache_map, armv7_a9_perf_cache_map, - sizeof(armv7_a9_perf_cache_map)); - armv7pmu.event_map = armv7_a9_pmu_event_map; + armv7pmu.cache_map = &armv7_a9_perf_cache_map; + armv7pmu.event_map = &armv7_a9_perf_map; armpmu = &armv7pmu; /* Reset PMNC and read the nb of CNTx counters @@ -3016,13 +2965,9 @@ init_hw_perf_events(void) switch (part_number) { case 1: armpmu = &xscale1pmu; - memcpy(armpmu_perf_cache_map, xscale_perf_cache_map, - sizeof(xscale_perf_cache_map)); break; case 2: armpmu = &xscale2pmu; - memcpy(armpmu_perf_cache_map, xscale_perf_cache_map, - sizeof(xscale_perf_cache_map)); break; } } -- cgit v1.2.3-18-g5258 From 59a98a1e56edea4d7d9c5f4ce9d50e271a04993c Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Sat, 13 Nov 2010 17:18:36 +0000 Subject: ARM: perf: avoid exposing internal stop function for v6 PMU Unlike other pmu functions, armv6pmu_pmu_stop is not declared static. This patch adds the missing keyword. Acked-by: Jamie Iles Signed-off-by: Will Deacon --- arch/arm/kernel/perf_event.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index c49e1701a2f..35319b8e4d4 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -1121,7 +1121,7 @@ armv6pmu_start(void) spin_unlock_irqrestore(&pmu_lock, flags); } -void +static void armv6pmu_stop(void) { unsigned long flags, val; -- cgit v1.2.3-18-g5258 From 3cb314bae2191b432a7e898abf865db880f6d07d Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Sat, 13 Nov 2010 17:37:46 +0000 Subject: ARM: perf: add _init() functions to PMUs In preparation for separating the PMU-specific code, this patch adds self-contained init functions to each PMU, therefore removing any PMU-specific knowledge from the PMU-agnostic init_hw_perf_events function. Acked-by: Jamie Iles Acked-by: Jean Pihet Signed-off-by: Will Deacon --- arch/arm/kernel/perf_event.c | 65 ++++++++++++++++++++++++++++++-------------- 1 file changed, 45 insertions(+), 20 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 35319b8e4d4..acc4e91dd30 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -1240,6 +1240,11 @@ static const struct arm_pmu armv6pmu = { .max_period = (1LLU << 32) - 1, }; +const struct arm_pmu *__init armv6pmu_init(void) +{ + return &armv6pmu; +} + /* * ARMv6mpcore is almost identical to single core ARMv6 with the exception * that some of the events have different enumerations and that there is no @@ -1264,6 +1269,11 @@ static const struct arm_pmu armv6mpcore_pmu = { .max_period = (1LLU << 32) - 1, }; +const struct arm_pmu *__init armv6mpcore_pmu_init(void) +{ + return &armv6mpcore_pmu; +} + /* * ARMv7 Cortex-A8 and Cortex-A9 Performance Events handling code. * @@ -2136,6 +2146,25 @@ static u32 __init armv7_reset_read_pmnc(void) return nb_cnt + 1; } +const struct arm_pmu *__init armv7_a8_pmu_init(void) +{ + armv7pmu.id = ARM_PERF_PMU_ID_CA8; + armv7pmu.cache_map = &armv7_a8_perf_cache_map; + armv7pmu.event_map = &armv7_a8_perf_map; + armv7pmu.num_events = armv7_reset_read_pmnc(); + return &armv7pmu; +} + +const struct arm_pmu *__init armv7_a9_pmu_init(void) +{ + armv7pmu.id = ARM_PERF_PMU_ID_CA9; + armv7pmu.cache_map = &armv7_a9_perf_cache_map; + armv7pmu.event_map = &armv7_a9_perf_map; + armv7pmu.num_events = armv7_reset_read_pmnc(); + return &armv7pmu; +} + + /* * ARMv5 [xscale] Performance counter handling code. * @@ -2564,6 +2593,11 @@ static const struct arm_pmu xscale1pmu = { .max_period = (1LLU << 32) - 1, }; +const struct arm_pmu *__init xscale1pmu_init(void) +{ + return &xscale1pmu; +} + #define XSCALE2_OVERFLOWED_MASK 0x01f #define XSCALE2_CCOUNT_OVERFLOW 0x001 #define XSCALE2_COUNT0_OVERFLOW 0x002 @@ -2920,6 +2954,11 @@ static const struct arm_pmu xscale2pmu = { .max_period = (1LLU << 32) - 1, }; +const struct arm_pmu *__init xscale2pmu_init(void) +{ + return &xscale2pmu; +} + static int __init init_hw_perf_events(void) { @@ -2933,30 +2972,16 @@ init_hw_perf_events(void) case 0xB360: /* ARM1136 */ case 0xB560: /* ARM1156 */ case 0xB760: /* ARM1176 */ - armpmu = &armv6pmu; + armpmu = armv6pmu_init(); break; case 0xB020: /* ARM11mpcore */ - armpmu = &armv6mpcore_pmu; + armpmu = armv6mpcore_pmu_init(); break; case 0xC080: /* Cortex-A8 */ - armv7pmu.id = ARM_PERF_PMU_ID_CA8; - armv7pmu.cache_map = &armv7_a8_perf_cache_map; - armv7pmu.event_map = &armv7_a8_perf_map; - armpmu = &armv7pmu; - - /* Reset PMNC and read the nb of CNTx counters - supported */ - armv7pmu.num_events = armv7_reset_read_pmnc(); + armpmu = armv7_a8_pmu_init(); break; case 0xC090: /* Cortex-A9 */ - armv7pmu.id = ARM_PERF_PMU_ID_CA9; - armv7pmu.cache_map = &armv7_a9_perf_cache_map; - armv7pmu.event_map = &armv7_a9_perf_map; - armpmu = &armv7pmu; - - /* Reset PMNC and read the nb of CNTx counters - supported */ - armv7pmu.num_events = armv7_reset_read_pmnc(); + armpmu = armv7_a9_pmu_init(); break; } /* Intel CPUs [xscale]. */ @@ -2964,10 +2989,10 @@ init_hw_perf_events(void) part_number = (cpuid >> 13) & 0x7; switch (part_number) { case 1: - armpmu = &xscale1pmu; + armpmu = xscale1pmu_init(); break; case 2: - armpmu = &xscale2pmu; + armpmu = xscale2pmu_init(); break; } } -- cgit v1.2.3-18-g5258 From 629948310e4270e9b32c37b4a65a8cd5d6ebf38a Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Sat, 13 Nov 2010 18:45:27 +0000 Subject: ARM: perf: encode PMU name in arm_pmu structure Currently, perf uses the PMU ID as an index into a string table to look up the name of a given PMU. This patch encodes the name of a PMU directly into the arm_pmu structure so that PMU-specific code can be factored out into separate files. Acked-by: Jamie Iles Acked-by: Jean Pihet Signed-off-by: Will Deacon --- arch/arm/kernel/perf_event.c | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index acc4e91dd30..ac4e9a1ed80 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -69,18 +69,9 @@ struct cpu_hw_events { }; DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); -/* PMU names. */ -static const char *arm_pmu_names[] = { - [ARM_PERF_PMU_ID_XSCALE1] = "xscale1", - [ARM_PERF_PMU_ID_XSCALE2] = "xscale2", - [ARM_PERF_PMU_ID_V6] = "v6", - [ARM_PERF_PMU_ID_V6MP] = "v6mpcore", - [ARM_PERF_PMU_ID_CA8] = "ARMv7 Cortex-A8", - [ARM_PERF_PMU_ID_CA9] = "ARMv7 Cortex-A9", -}; - struct arm_pmu { enum arm_perf_pmu_ids id; + const char *name; irqreturn_t (*handle_irq)(int irq_num, void *dev); void (*enable)(struct hw_perf_event *evt, int idx); void (*disable)(struct hw_perf_event *evt, int idx); @@ -1225,6 +1216,7 @@ armv6mpcore_pmu_disable_event(struct hw_perf_event *hwc, static const struct arm_pmu armv6pmu = { .id = ARM_PERF_PMU_ID_V6, + .name = "v6", .handle_irq = armv6pmu_handle_irq, .enable = armv6pmu_enable_event, .disable = armv6pmu_disable_event, @@ -1254,6 +1246,7 @@ const struct arm_pmu *__init armv6pmu_init(void) */ static const struct arm_pmu armv6mpcore_pmu = { .id = ARM_PERF_PMU_ID_V6MP, + .name = "v6mpcore", .handle_irq = armv6pmu_handle_irq, .enable = armv6pmu_enable_event, .disable = armv6mpcore_pmu_disable_event, @@ -2149,6 +2142,7 @@ static u32 __init armv7_reset_read_pmnc(void) const struct arm_pmu *__init armv7_a8_pmu_init(void) { armv7pmu.id = ARM_PERF_PMU_ID_CA8; + armv7pmu.name = "ARMv7 Cortex-A8"; armv7pmu.cache_map = &armv7_a8_perf_cache_map; armv7pmu.event_map = &armv7_a8_perf_map; armv7pmu.num_events = armv7_reset_read_pmnc(); @@ -2158,6 +2152,7 @@ const struct arm_pmu *__init armv7_a8_pmu_init(void) const struct arm_pmu *__init armv7_a9_pmu_init(void) { armv7pmu.id = ARM_PERF_PMU_ID_CA9; + armv7pmu.name = "ARMv7 Cortex-A9"; armv7pmu.cache_map = &armv7_a9_perf_cache_map; armv7pmu.event_map = &armv7_a9_perf_map; armv7pmu.num_events = armv7_reset_read_pmnc(); @@ -2578,6 +2573,7 @@ xscale1pmu_write_counter(int counter, u32 val) static const struct arm_pmu xscale1pmu = { .id = ARM_PERF_PMU_ID_XSCALE1, + .name = "xscale1", .handle_irq = xscale1pmu_handle_irq, .enable = xscale1pmu_enable_event, .disable = xscale1pmu_disable_event, @@ -2939,6 +2935,7 @@ xscale2pmu_write_counter(int counter, u32 val) static const struct arm_pmu xscale2pmu = { .id = ARM_PERF_PMU_ID_XSCALE2, + .name = "xscale2", .handle_irq = xscale2pmu_handle_irq, .enable = xscale2pmu_enable_event, .disable = xscale2pmu_disable_event, @@ -2999,7 +2996,7 @@ init_hw_perf_events(void) if (armpmu) { pr_info("enabled with %s PMU driver, %d counters available\n", - arm_pmu_names[armpmu->id], armpmu->num_events); + armpmu->name, armpmu->num_events); } else { pr_info("no hardware support available\n"); } -- cgit v1.2.3-18-g5258 From 43eab87828fee65f89f4088736b2b7a187390a2f Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Sat, 13 Nov 2010 19:04:32 +0000 Subject: ARM: perf: separate PMU backends into multiple files The ARM perf_event.c file contains all PMU backends and, as new PMUs are introduced, will continue to grow. This patch follows the example of x86 and splits the PMU implementations into separate files which are then #included back into the main file. Compile-time guards are added to each PMU file to avoid compiling in code that is not relevant for the version of the architecture which we are targetting. Acked-by: Jean Pihet Signed-off-by: Will Deacon --- arch/arm/kernel/perf_event.c | 2357 +---------------------------------- arch/arm/kernel/perf_event_v6.c | 672 ++++++++++ arch/arm/kernel/perf_event_v7.c | 906 ++++++++++++++ arch/arm/kernel/perf_event_xscale.c | 807 ++++++++++++ 4 files changed, 2390 insertions(+), 2352 deletions(-) create mode 100644 arch/arm/kernel/perf_event_v6.c create mode 100644 arch/arm/kernel/perf_event_v7.c create mode 100644 arch/arm/kernel/perf_event_xscale.c (limited to 'arch/arm/kernel') diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index ac4e9a1ed80..421a4bb88fe 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -4,9 +4,7 @@ * ARM performance counter support. * * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles - * - * ARMv7 support: Jean Pihet - * 2010 (c) MontaVista Software, LLC. + * Copyright (C) 2010 ARM Ltd., Will Deacon * * This code is based on the sparc64 perf event code, which is in turn based * on the x86 code. Callchain code is based on the ARM OProfile backtrace @@ -606,2355 +604,10 @@ static struct pmu pmu = { .read = armpmu_read, }; -/* - * ARMv6 Performance counter handling code. - * - * ARMv6 has 2 configurable performance counters and a single cycle counter. - * They all share a single reset bit but can be written to zero so we can use - * that for a reset. - * - * The counters can't be individually enabled or disabled so when we remove - * one event and replace it with another we could get spurious counts from the - * wrong event. However, we can take advantage of the fact that the - * performance counters can export events to the event bus, and the event bus - * itself can be monitored. This requires that we *don't* export the events to - * the event bus. The procedure for disabling a configurable counter is: - * - change the counter to count the ETMEXTOUT[0] signal (0x20). This - * effectively stops the counter from counting. - * - disable the counter's interrupt generation (each counter has it's - * own interrupt enable bit). - * Once stopped, the counter value can be written as 0 to reset. - * - * To enable a counter: - * - enable the counter's interrupt generation. - * - set the new event type. - * - * Note: the dedicated cycle counter only counts cycles and can't be - * enabled/disabled independently of the others. When we want to disable the - * cycle counter, we have to just disable the interrupt reporting and start - * ignoring that counter. When re-enabling, we have to reset the value and - * enable the interrupt. - */ - -enum armv6_perf_types { - ARMV6_PERFCTR_ICACHE_MISS = 0x0, - ARMV6_PERFCTR_IBUF_STALL = 0x1, - ARMV6_PERFCTR_DDEP_STALL = 0x2, - ARMV6_PERFCTR_ITLB_MISS = 0x3, - ARMV6_PERFCTR_DTLB_MISS = 0x4, - ARMV6_PERFCTR_BR_EXEC = 0x5, - ARMV6_PERFCTR_BR_MISPREDICT = 0x6, - ARMV6_PERFCTR_INSTR_EXEC = 0x7, - ARMV6_PERFCTR_DCACHE_HIT = 0x9, - ARMV6_PERFCTR_DCACHE_ACCESS = 0xA, - ARMV6_PERFCTR_DCACHE_MISS = 0xB, - ARMV6_PERFCTR_DCACHE_WBACK = 0xC, - ARMV6_PERFCTR_SW_PC_CHANGE = 0xD, - ARMV6_PERFCTR_MAIN_TLB_MISS = 0xF, - ARMV6_PERFCTR_EXPL_D_ACCESS = 0x10, - ARMV6_PERFCTR_LSU_FULL_STALL = 0x11, - ARMV6_PERFCTR_WBUF_DRAINED = 0x12, - ARMV6_PERFCTR_CPU_CYCLES = 0xFF, - ARMV6_PERFCTR_NOP = 0x20, -}; - -enum armv6_counters { - ARMV6_CYCLE_COUNTER = 1, - ARMV6_COUNTER0, - ARMV6_COUNTER1, -}; - -/* - * The hardware events that we support. We do support cache operations but - * we have harvard caches and no way to combine instruction and data - * accesses/misses in hardware. - */ -static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = { - [PERF_COUNT_HW_CPU_CYCLES] = ARMV6_PERFCTR_CPU_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6_PERFCTR_INSTR_EXEC, - [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6_PERFCTR_BR_EXEC, - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6_PERFCTR_BR_MISPREDICT, - [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, -}; - -static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - [C(L1D)] = { - /* - * The performance counters don't differentiate between read - * and write accesses/misses so this isn't strictly correct, - * but it's the best we can do. Writes and reads get - * combined. - */ - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS, - [C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS, - [C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(L1I)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(LL)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(DTLB)] = { - /* - * The ARM performance counters can count micro DTLB misses, - * micro ITLB misses and main TLB misses. There isn't an event - * for TLB misses, so use the micro misses here and if users - * want the main TLB misses they can use a raw counter. - */ - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(ITLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(BPU)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, -}; - -enum armv6mpcore_perf_types { - ARMV6MPCORE_PERFCTR_ICACHE_MISS = 0x0, - ARMV6MPCORE_PERFCTR_IBUF_STALL = 0x1, - ARMV6MPCORE_PERFCTR_DDEP_STALL = 0x2, - ARMV6MPCORE_PERFCTR_ITLB_MISS = 0x3, - ARMV6MPCORE_PERFCTR_DTLB_MISS = 0x4, - ARMV6MPCORE_PERFCTR_BR_EXEC = 0x5, - ARMV6MPCORE_PERFCTR_BR_NOTPREDICT = 0x6, - ARMV6MPCORE_PERFCTR_BR_MISPREDICT = 0x7, - ARMV6MPCORE_PERFCTR_INSTR_EXEC = 0x8, - ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS = 0xA, - ARMV6MPCORE_PERFCTR_DCACHE_RDMISS = 0xB, - ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS = 0xC, - ARMV6MPCORE_PERFCTR_DCACHE_WRMISS = 0xD, - ARMV6MPCORE_PERFCTR_DCACHE_EVICTION = 0xE, - ARMV6MPCORE_PERFCTR_SW_PC_CHANGE = 0xF, - ARMV6MPCORE_PERFCTR_MAIN_TLB_MISS = 0x10, - ARMV6MPCORE_PERFCTR_EXPL_MEM_ACCESS = 0x11, - ARMV6MPCORE_PERFCTR_LSU_FULL_STALL = 0x12, - ARMV6MPCORE_PERFCTR_WBUF_DRAINED = 0x13, - ARMV6MPCORE_PERFCTR_CPU_CYCLES = 0xFF, -}; - -/* - * The hardware events that we support. We do support cache operations but - * we have harvard caches and no way to combine instruction and data - * accesses/misses in hardware. - */ -static const unsigned armv6mpcore_perf_map[PERF_COUNT_HW_MAX] = { - [PERF_COUNT_HW_CPU_CYCLES] = ARMV6MPCORE_PERFCTR_CPU_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_INSTR_EXEC, - [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_BR_EXEC, - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6MPCORE_PERFCTR_BR_MISPREDICT, - [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, -}; - -static const unsigned armv6mpcore_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - [C(L1D)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = - ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS, - [C(RESULT_MISS)] = - ARMV6MPCORE_PERFCTR_DCACHE_RDMISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = - ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS, - [C(RESULT_MISS)] = - ARMV6MPCORE_PERFCTR_DCACHE_WRMISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(L1I)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(LL)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(DTLB)] = { - /* - * The ARM performance counters can count micro DTLB misses, - * micro ITLB misses and main TLB misses. There isn't an event - * for TLB misses, so use the micro misses here and if users - * want the main TLB misses they can use a raw counter. - */ - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(ITLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(BPU)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, -}; - -static inline unsigned long -armv6_pmcr_read(void) -{ - u32 val; - asm volatile("mrc p15, 0, %0, c15, c12, 0" : "=r"(val)); - return val; -} - -static inline void -armv6_pmcr_write(unsigned long val) -{ - asm volatile("mcr p15, 0, %0, c15, c12, 0" : : "r"(val)); -} - -#define ARMV6_PMCR_ENABLE (1 << 0) -#define ARMV6_PMCR_CTR01_RESET (1 << 1) -#define ARMV6_PMCR_CCOUNT_RESET (1 << 2) -#define ARMV6_PMCR_CCOUNT_DIV (1 << 3) -#define ARMV6_PMCR_COUNT0_IEN (1 << 4) -#define ARMV6_PMCR_COUNT1_IEN (1 << 5) -#define ARMV6_PMCR_CCOUNT_IEN (1 << 6) -#define ARMV6_PMCR_COUNT0_OVERFLOW (1 << 8) -#define ARMV6_PMCR_COUNT1_OVERFLOW (1 << 9) -#define ARMV6_PMCR_CCOUNT_OVERFLOW (1 << 10) -#define ARMV6_PMCR_EVT_COUNT0_SHIFT 20 -#define ARMV6_PMCR_EVT_COUNT0_MASK (0xFF << ARMV6_PMCR_EVT_COUNT0_SHIFT) -#define ARMV6_PMCR_EVT_COUNT1_SHIFT 12 -#define ARMV6_PMCR_EVT_COUNT1_MASK (0xFF << ARMV6_PMCR_EVT_COUNT1_SHIFT) - -#define ARMV6_PMCR_OVERFLOWED_MASK \ - (ARMV6_PMCR_COUNT0_OVERFLOW | ARMV6_PMCR_COUNT1_OVERFLOW | \ - ARMV6_PMCR_CCOUNT_OVERFLOW) - -static inline int -armv6_pmcr_has_overflowed(unsigned long pmcr) -{ - return (pmcr & ARMV6_PMCR_OVERFLOWED_MASK); -} - -static inline int -armv6_pmcr_counter_has_overflowed(unsigned long pmcr, - enum armv6_counters counter) -{ - int ret = 0; - - if (ARMV6_CYCLE_COUNTER == counter) - ret = pmcr & ARMV6_PMCR_CCOUNT_OVERFLOW; - else if (ARMV6_COUNTER0 == counter) - ret = pmcr & ARMV6_PMCR_COUNT0_OVERFLOW; - else if (ARMV6_COUNTER1 == counter) - ret = pmcr & ARMV6_PMCR_COUNT1_OVERFLOW; - else - WARN_ONCE(1, "invalid counter number (%d)\n", counter); - - return ret; -} - -static inline u32 -armv6pmu_read_counter(int counter) -{ - unsigned long value = 0; - - if (ARMV6_CYCLE_COUNTER == counter) - asm volatile("mrc p15, 0, %0, c15, c12, 1" : "=r"(value)); - else if (ARMV6_COUNTER0 == counter) - asm volatile("mrc p15, 0, %0, c15, c12, 2" : "=r"(value)); - else if (ARMV6_COUNTER1 == counter) - asm volatile("mrc p15, 0, %0, c15, c12, 3" : "=r"(value)); - else - WARN_ONCE(1, "invalid counter number (%d)\n", counter); - - return value; -} - -static inline void -armv6pmu_write_counter(int counter, - u32 value) -{ - if (ARMV6_CYCLE_COUNTER == counter) - asm volatile("mcr p15, 0, %0, c15, c12, 1" : : "r"(value)); - else if (ARMV6_COUNTER0 == counter) - asm volatile("mcr p15, 0, %0, c15, c12, 2" : : "r"(value)); - else if (ARMV6_COUNTER1 == counter) - asm volatile("mcr p15, 0, %0, c15, c12, 3" : : "r"(value)); - else - WARN_ONCE(1, "invalid counter number (%d)\n", counter); -} - -void -armv6pmu_enable_event(struct hw_perf_event *hwc, - int idx) -{ - unsigned long val, mask, evt, flags; - - if (ARMV6_CYCLE_COUNTER == idx) { - mask = 0; - evt = ARMV6_PMCR_CCOUNT_IEN; - } else if (ARMV6_COUNTER0 == idx) { - mask = ARMV6_PMCR_EVT_COUNT0_MASK; - evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT0_SHIFT) | - ARMV6_PMCR_COUNT0_IEN; - } else if (ARMV6_COUNTER1 == idx) { - mask = ARMV6_PMCR_EVT_COUNT1_MASK; - evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT1_SHIFT) | - ARMV6_PMCR_COUNT1_IEN; - } else { - WARN_ONCE(1, "invalid counter number (%d)\n", idx); - return; - } - - /* - * Mask out the current event and set the counter to count the event - * that we're interested in. - */ - spin_lock_irqsave(&pmu_lock, flags); - val = armv6_pmcr_read(); - val &= ~mask; - val |= evt; - armv6_pmcr_write(val); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static irqreturn_t -armv6pmu_handle_irq(int irq_num, - void *dev) -{ - unsigned long pmcr = armv6_pmcr_read(); - struct perf_sample_data data; - struct cpu_hw_events *cpuc; - struct pt_regs *regs; - int idx; - - if (!armv6_pmcr_has_overflowed(pmcr)) - return IRQ_NONE; - - regs = get_irq_regs(); - - /* - * The interrupts are cleared by writing the overflow flags back to - * the control register. All of the other bits don't have any effect - * if they are rewritten, so write the whole value back. - */ - armv6_pmcr_write(pmcr); - - perf_sample_data_init(&data, 0); - - cpuc = &__get_cpu_var(cpu_hw_events); - for (idx = 0; idx <= armpmu->num_events; ++idx) { - struct perf_event *event = cpuc->events[idx]; - struct hw_perf_event *hwc; - - if (!test_bit(idx, cpuc->active_mask)) - continue; - - /