diff options
Diffstat (limited to 'arch/arm/kernel')
109 files changed, 22374 insertions, 10824 deletions
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 5b9b268f4fb..38ddd9f83d0 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -5,55 +5,87 @@ CPPFLAGS_vmlinux.lds := -DTEXT_OFFSET=$(TEXT_OFFSET) AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET) -ifdef CONFIG_DYNAMIC_FTRACE +ifdef CONFIG_FUNCTION_TRACER CFLAGS_REMOVE_ftrace.o = -pg +CFLAGS_REMOVE_insn.o = -pg +CFLAGS_REMOVE_patch.o = -pg endif CFLAGS_REMOVE_return_address.o = -pg # Object file lists. -obj-y := elf.o entry-armv.o entry-common.o irq.o \ - process.o ptrace.o return_address.o setup.o signal.o \ - sys_arm.o stacktrace.o time.o traps.o +obj-y := elf.o entry-common.o irq.o opcodes.o \ + process.o ptrace.o return_address.o \ + setup.o signal.o sigreturn_codes.o \ + stacktrace.o sys_arm.o time.o traps.o -obj-$(CONFIG_DEPRECATED_PARAM_STRUCT) += compat.o +obj-$(CONFIG_ATAGS) += atags_parse.o +obj-$(CONFIG_ATAGS_PROC) += atags_proc.o +obj-$(CONFIG_DEPRECATED_PARAM_STRUCT) += atags_compat.o -obj-$(CONFIG_LEDS) += leds.o -obj-$(CONFIG_OC_ETM) += etm.o +ifeq ($(CONFIG_CPU_V7M),y) +obj-y += entry-v7m.o v7m.o +else +obj-y += entry-armv.o +endif +obj-$(CONFIG_OC_ETM) += etm.o +obj-$(CONFIG_CPU_IDLE) += cpuidle.o obj-$(CONFIG_ISA_DMA_API) += dma.o -obj-$(CONFIG_ARCH_ACORN) += ecard.o -obj-$(CONFIG_FIQ) += fiq.o +obj-$(CONFIG_FIQ) += fiq.o fiqasm.o obj-$(CONFIG_MODULES) += armksyms.o module.o obj-$(CONFIG_ARTHUR) += arthur.o obj-$(CONFIG_ISA_DMA) += dma-isa.o obj-$(CONFIG_PCI) += bios32.o isa.o +obj-$(CONFIG_ARM_CPU_SUSPEND) += sleep.o suspend.o +obj-$(CONFIG_HIBERNATION) += hibernate.o obj-$(CONFIG_SMP) += smp.o +ifdef CONFIG_MMU +obj-$(CONFIG_SMP) += smp_tlb.o +endif obj-$(CONFIG_HAVE_ARM_SCU) += smp_scu.o obj-$(CONFIG_HAVE_ARM_TWD) += smp_twd.o -obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o +obj-$(CONFIG_ARM_ARCH_TIMER) += arch_timer.o +obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o insn.o +obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o insn.o +obj-$(CONFIG_JUMP_LABEL) += jump_label.o insn.o patch.o obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o -obj-$(CONFIG_KPROBES) += kprobes.o kprobes-decode.o -obj-$(CONFIG_ATAGS_PROC) += atags.o +obj-$(CONFIG_UPROBES) += probes.o probes-arm.o uprobes.o uprobes-arm.o +obj-$(CONFIG_KPROBES) += probes.o kprobes.o kprobes-common.o patch.o +ifdef CONFIG_THUMB2_KERNEL +obj-$(CONFIG_KPROBES) += kprobes-thumb.o probes-thumb.o +else +obj-$(CONFIG_KPROBES) += kprobes-arm.o probes-arm.o +endif +obj-$(CONFIG_ARM_KPROBES_TEST) += test-kprobes.o +test-kprobes-objs := kprobes-test.o +ifdef CONFIG_THUMB2_KERNEL +test-kprobes-objs += kprobes-test-thumb.o +else +test-kprobes-objs += kprobes-test-arm.o +endif obj-$(CONFIG_OABI_COMPAT) += sys_oabi-compat.o obj-$(CONFIG_ARM_THUMBEE) += thumbee.o obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_ARM_UNWIND) += unwind.o obj-$(CONFIG_HAVE_TCM) += tcm.o +obj-$(CONFIG_OF) += devtree.o obj-$(CONFIG_CRASH_DUMP) += crash_dump.o +obj-$(CONFIG_SWP_EMULATE) += swp_emulate.o +CFLAGS_swp_emulate.o := -Wa,-march=armv7-a obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o -obj-$(CONFIG_CRUNCH) += crunch.o crunch-bits.o -AFLAGS_crunch-bits.o := -Wa,-mcpu=ep9312 - obj-$(CONFIG_CPU_XSCALE) += xscale-cp0.o obj-$(CONFIG_CPU_XSC3) += xscale-cp0.o obj-$(CONFIG_CPU_MOHAWK) += xscale-cp0.o +obj-$(CONFIG_CPU_PJ4) += pj4-cp0.o +obj-$(CONFIG_CPU_PJ4B) += pj4-cp0.o obj-$(CONFIG_IWMMXT) += iwmmxt.o -obj-$(CONFIG_CPU_HAS_PMU) += pmu.o -obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o +obj-$(CONFIG_PERF_EVENTS) += perf_regs.o +obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o perf_event_cpu.o AFLAGS_iwmmxt.o := -Wa,-mcpu=iwmmxt +obj-$(CONFIG_ARM_CPU_TOPOLOGY) += topology.o ifneq ($(CONFIG_ARCH_EBSA110),y) obj-y += io.o @@ -63,4 +95,10 @@ head-y := head$(MMUEXT).o obj-$(CONFIG_DEBUG_LL) += debug.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o -extra-y := $(head-y) init_task.o vmlinux.lds +obj-$(CONFIG_ARM_VIRT_EXT) += hyp-stub.o +ifeq ($(CONFIG_ARM_PSCI),y) +obj-y += psci.o +obj-$(CONFIG_SMP) += psci_smp.o +endif + +extra-y := $(head-y) vmlinux.lds diff --git a/arch/arm/kernel/arch_timer.c b/arch/arm/kernel/arch_timer.c new file mode 100644 index 00000000000..1791f12c180 --- /dev/null +++ b/arch/arm/kernel/arch_timer.c @@ -0,0 +1,44 @@ +/* + * linux/arch/arm/kernel/arch_timer.c + * + * Copyright (C) 2011 ARM Ltd. + * All Rights Reserved + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/init.h> +#include <linux/types.h> +#include <linux/errno.h> + +#include <asm/delay.h> + +#include <clocksource/arm_arch_timer.h> + +static unsigned long arch_timer_read_counter_long(void) +{ + return arch_timer_read_counter(); +} + +static struct delay_timer arch_delay_timer; + +static void __init arch_timer_delay_timer_register(void) +{ + /* Use the architected timer for the delay loop. */ + arch_delay_timer.read_current_timer = arch_timer_read_counter_long; + arch_delay_timer.freq = arch_timer_get_rate(); + register_current_timer_delay(&arch_delay_timer); +} + +int __init arch_timer_arch_init(void) +{ + u32 arch_timer_rate = arch_timer_get_rate(); + + if (arch_timer_rate == 0) + return -ENXIO; + + arch_timer_delay_timer_register(); + + return 0; +} diff --git a/arch/arm/kernel/armksyms.c b/arch/arm/kernel/armksyms.c index e5e1e538767..f7b450f97e6 100644 --- a/arch/arm/kernel/armksyms.c +++ b/arch/arm/kernel/armksyms.c @@ -7,7 +7,7 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ -#include <linux/module.h> +#include <linux/export.h> #include <linux/sched.h> #include <linux/string.h> #include <linux/cryptohash.h> @@ -18,7 +18,6 @@ #include <linux/io.h> #include <asm/checksum.h> -#include <asm/system.h> #include <asm/ftrace.h> /* @@ -36,6 +35,8 @@ extern void __ucmpdi2(void); extern void __udivsi3(void); extern void __umodsi3(void); extern void __do_div64(void); +extern void __bswapsi2(void); +extern void __bswapdi2(void); extern void __aeabi_idiv(void); extern void __aeabi_idivmod(void); @@ -49,12 +50,8 @@ extern void __aeabi_ulcmp(void); extern void fpundefinstr(void); - -EXPORT_SYMBOL(__backtrace); - /* platform dependent support */ -EXPORT_SYMBOL(__udelay); -EXPORT_SYMBOL(__const_udelay); +EXPORT_SYMBOL(arm_delay_ops); /* networking */ EXPORT_SYMBOL(csum_partial); @@ -91,10 +88,6 @@ EXPORT_SYMBOL(memmove); EXPORT_SYMBOL(memchr); EXPORT_SYMBOL(__memzero); - /* user mem (segment) */ -EXPORT_SYMBOL(__strnlen_user); -EXPORT_SYMBOL(__strncpy_from_user); - #ifdef CONFIG_MMU EXPORT_SYMBOL(copy_page); @@ -112,9 +105,6 @@ EXPORT_SYMBOL(__put_user_4); EXPORT_SYMBOL(__put_user_8); #endif - /* crypto hash */ -EXPORT_SYMBOL(sha_transform); - /* gcc lib functions */ EXPORT_SYMBOL(__ashldi3); EXPORT_SYMBOL(__ashrdi3); @@ -126,6 +116,8 @@ EXPORT_SYMBOL(__ucmpdi2); EXPORT_SYMBOL(__udivsi3); EXPORT_SYMBOL(__umodsi3); EXPORT_SYMBOL(__do_div64); +EXPORT_SYMBOL(__bswapsi2); +EXPORT_SYMBOL(__bswapdi2); #ifdef CONFIG_AEABI EXPORT_SYMBOL(__aeabi_idiv); @@ -140,24 +132,18 @@ EXPORT_SYMBOL(__aeabi_ulcmp); #endif /* bitops */ -EXPORT_SYMBOL(_set_bit_le); -EXPORT_SYMBOL(_test_and_set_bit_le); -EXPORT_SYMBOL(_clear_bit_le); -EXPORT_SYMBOL(_test_and_clear_bit_le); -EXPORT_SYMBOL(_change_bit_le); -EXPORT_SYMBOL(_test_and_change_bit_le); +EXPORT_SYMBOL(_set_bit); +EXPORT_SYMBOL(_test_and_set_bit); +EXPORT_SYMBOL(_clear_bit); +EXPORT_SYMBOL(_test_and_clear_bit); +EXPORT_SYMBOL(_change_bit); +EXPORT_SYMBOL(_test_and_change_bit); EXPORT_SYMBOL(_find_first_zero_bit_le); EXPORT_SYMBOL(_find_next_zero_bit_le); EXPORT_SYMBOL(_find_first_bit_le); EXPORT_SYMBOL(_find_next_bit_le); #ifdef __ARMEB__ -EXPORT_SYMBOL(_set_bit_be); -EXPORT_SYMBOL(_test_and_set_bit_be); -EXPORT_SYMBOL(_clear_bit_be); -EXPORT_SYMBOL(_test_and_clear_bit_be); -EXPORT_SYMBOL(_change_bit_be); -EXPORT_SYMBOL(_test_and_change_bit_be); EXPORT_SYMBOL(_find_first_zero_bit_be); EXPORT_SYMBOL(_find_next_zero_bit_be); EXPORT_SYMBOL(_find_first_bit_be); @@ -170,3 +156,8 @@ EXPORT_SYMBOL(mcount); #endif EXPORT_SYMBOL(__gnu_mcount_nc); #endif + +#ifdef CONFIG_ARM_PATCH_PHYS_VIRT +EXPORT_SYMBOL(__pv_phys_pfn_offset); +EXPORT_SYMBOL(__pv_offset); +#endif diff --git a/arch/arm/kernel/asm-offsets.c b/arch/arm/kernel/asm-offsets.c index 82da6617213..85598b5d1ef 100644 --- a/arch/arm/kernel/asm-offsets.c +++ b/arch/arm/kernel/asm-offsets.c @@ -13,10 +13,18 @@ #include <linux/sched.h> #include <linux/mm.h> #include <linux/dma-mapping.h> +#ifdef CONFIG_KVM_ARM_HOST +#include <linux/kvm_host.h> +#endif +#include <asm/cacheflush.h> +#include <asm/glue-df.h> +#include <asm/glue-pf.h> #include <asm/mach/arch.h> #include <asm/thread_info.h> #include <asm/memory.h> #include <asm/procinfo.h> +#include <asm/suspend.h> +#include <asm/hardware/cache-l2x0.h> #include <linux/kbuild.h> /* @@ -55,7 +63,12 @@ int main(void) DEFINE(TI_USED_CP, offsetof(struct thread_info, used_cp)); DEFINE(TI_TP_VALUE, offsetof(struct thread_info, tp_value)); DEFINE(TI_FPSTATE, offsetof(struct thread_info, fpstate)); +#ifdef CONFIG_VFP DEFINE(TI_VFPSTATE, offsetof(struct thread_info, vfpstate)); +#ifdef CONFIG_SMP + DEFINE(VFP_CPU, offsetof(union vfp_state, hard.cpu)); +#endif +#endif #ifdef CONFIG_ARM_THUMBEE DEFINE(TI_THUMBEE_STATE, offsetof(struct thread_info, thumbee_state)); #endif @@ -86,8 +99,19 @@ int main(void) DEFINE(S_OLD_R0, offsetof(struct pt_regs, ARM_ORIG_r0)); DEFINE(S_FRAME_SIZE, sizeof(struct pt_regs)); BLANK(); +#ifdef CONFIG_CACHE_L2X0 + DEFINE(L2X0_R_PHY_BASE, offsetof(struct l2x0_regs, phy_base)); + DEFINE(L2X0_R_AUX_CTRL, offsetof(struct l2x0_regs, aux_ctrl)); + DEFINE(L2X0_R_TAG_LATENCY, offsetof(struct l2x0_regs, tag_latency)); + DEFINE(L2X0_R_DATA_LATENCY, offsetof(struct l2x0_regs, data_latency)); + DEFINE(L2X0_R_FILTER_START, offsetof(struct l2x0_regs, filter_start)); + DEFINE(L2X0_R_FILTER_END, offsetof(struct l2x0_regs, filter_end)); + DEFINE(L2X0_R_PREFETCH_CTRL, offsetof(struct l2x0_regs, prefetch_ctrl)); + DEFINE(L2X0_R_PWR_CTRL, offsetof(struct l2x0_regs, pwr_ctrl)); + BLANK(); +#endif #ifdef CONFIG_CPU_HAS_ASID - DEFINE(MM_CONTEXT_ID, offsetof(struct mm_struct, context.id)); + DEFINE(MM_CONTEXT_ID, offsetof(struct mm_struct, context.id.counter)); BLANK(); #endif DEFINE(VMA_VM_MM, offsetof(struct vm_area_struct, vm_mm)); @@ -114,9 +138,67 @@ int main(void) #ifdef MULTI_PABORT DEFINE(PROCESSOR_PABT_FUNC, offsetof(struct processor, _prefetch_abort)); #endif +#ifdef MULTI_CPU + DEFINE(CPU_SLEEP_SIZE, offsetof(struct processor, suspend_size)); + DEFINE(CPU_DO_SUSPEND, offsetof(struct processor, do_suspend)); + DEFINE(CPU_DO_RESUME, offsetof(struct processor, do_resume)); +#endif +#ifdef MULTI_CACHE + DEFINE(CACHE_FLUSH_KERN_ALL, offsetof(struct cpu_cache_fns, flush_kern_all)); +#endif +#ifdef CONFIG_ARM_CPU_SUSPEND + DEFINE(SLEEP_SAVE_SP_SZ, sizeof(struct sleep_save_sp)); + DEFINE(SLEEP_SAVE_SP_PHYS, offsetof(struct sleep_save_sp, save_ptr_stash_phys)); + DEFINE(SLEEP_SAVE_SP_VIRT, offsetof(struct sleep_save_sp, save_ptr_stash)); +#endif BLANK(); DEFINE(DMA_BIDIRECTIONAL, DMA_BIDIRECTIONAL); DEFINE(DMA_TO_DEVICE, DMA_TO_DEVICE); DEFINE(DMA_FROM_DEVICE, DMA_FROM_DEVICE); + BLANK(); + DEFINE(CACHE_WRITEBACK_ORDER, __CACHE_WRITEBACK_ORDER); + DEFINE(CACHE_WRITEBACK_GRANULE, __CACHE_WRITEBACK_GRANULE); + BLANK(); +#ifdef CONFIG_KVM_ARM_HOST + DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm)); + DEFINE(VCPU_MIDR, offsetof(struct kvm_vcpu, arch.midr)); + DEFINE(VCPU_CP15, offsetof(struct kvm_vcpu, arch.cp15)); + DEFINE(VCPU_VFP_GUEST, offsetof(struct kvm_vcpu, arch.vfp_guest)); + DEFINE(VCPU_VFP_HOST, offsetof(struct kvm_vcpu, arch.host_cpu_context)); + DEFINE(VCPU_REGS, offsetof(struct kvm_vcpu, arch.regs)); + DEFINE(VCPU_USR_REGS, offsetof(struct kvm_vcpu, arch.regs.usr_regs)); + DEFINE(VCPU_SVC_REGS, offsetof(struct kvm_vcpu, arch.regs.svc_regs)); + DEFINE(VCPU_ABT_REGS, offsetof(struct kvm_vcpu, arch.regs.abt_regs)); + DEFINE(VCPU_UND_REGS, offsetof(struct kvm_vcpu, arch.regs.und_regs)); + DEFINE(VCPU_IRQ_REGS, offsetof(struct kvm_vcpu, arch.regs.irq_regs)); + DEFINE(VCPU_FIQ_REGS, offsetof(struct kvm_vcpu, arch.regs.fiq_regs)); + DEFINE(VCPU_PC, offsetof(struct kvm_vcpu, arch.regs.usr_regs.ARM_pc)); + DEFINE(VCPU_CPSR, offsetof(struct kvm_vcpu, arch.regs.usr_regs.ARM_cpsr)); + DEFINE(VCPU_HCR, offsetof(struct kvm_vcpu, arch.hcr)); + DEFINE(VCPU_IRQ_LINES, offsetof(struct kvm_vcpu, arch.irq_lines)); + DEFINE(VCPU_HSR, offsetof(struct kvm_vcpu, arch.fault.hsr)); + DEFINE(VCPU_HxFAR, offsetof(struct kvm_vcpu, arch.fault.hxfar)); + DEFINE(VCPU_HPFAR, offsetof(struct kvm_vcpu, arch.fault.hpfar)); + DEFINE(VCPU_HYP_PC, offsetof(struct kvm_vcpu, arch.fault.hyp_pc)); +#ifdef CONFIG_KVM_ARM_VGIC + DEFINE(VCPU_VGIC_CPU, offsetof(struct kvm_vcpu, arch.vgic_cpu)); + DEFINE(VGIC_CPU_HCR, offsetof(struct vgic_cpu, vgic_hcr)); + DEFINE(VGIC_CPU_VMCR, offsetof(struct vgic_cpu, vgic_vmcr)); + DEFINE(VGIC_CPU_MISR, offsetof(struct vgic_cpu, vgic_misr)); + DEFINE(VGIC_CPU_EISR, offsetof(struct vgic_cpu, vgic_eisr)); + DEFINE(VGIC_CPU_ELRSR, offsetof(struct vgic_cpu, vgic_elrsr)); + DEFINE(VGIC_CPU_APR, offsetof(struct vgic_cpu, vgic_apr)); + DEFINE(VGIC_CPU_LR, offsetof(struct vgic_cpu, vgic_lr)); + DEFINE(VGIC_CPU_NR_LR, offsetof(struct vgic_cpu, nr_lr)); +#ifdef CONFIG_KVM_ARM_TIMER + DEFINE(VCPU_TIMER_CNTV_CTL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_ctl)); + DEFINE(VCPU_TIMER_CNTV_CVAL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_cval)); + DEFINE(KVM_TIMER_CNTVOFF, offsetof(struct kvm, arch.timer.cntvoff)); + DEFINE(KVM_TIMER_ENABLED, offsetof(struct kvm, arch.timer.enabled)); +#endif + DEFINE(KVM_VGIC_VCTRL, offsetof(struct kvm, arch.vgic.vctrl_base)); +#endif + DEFINE(KVM_VTTBR, offsetof(struct kvm, arch.vttbr)); +#endif return 0; } diff --git a/arch/arm/kernel/atags.h b/arch/arm/kernel/atags.h index e5f028d214a..ec4164da6e3 100644 --- a/arch/arm/kernel/atags.h +++ b/arch/arm/kernel/atags.h @@ -3,3 +3,18 @@ extern void save_atags(struct tag *tags); #else static inline void save_atags(struct tag *tags) { } #endif + +void convert_to_tag_list(struct tag *tags); + +#ifdef CONFIG_ATAGS +const struct machine_desc *setup_machine_tags(phys_addr_t __atags_pointer, + unsigned int machine_nr); +#else +static inline const struct machine_desc * +setup_machine_tags(phys_addr_t __atags_pointer, unsigned int machine_nr) +{ + early_print("no ATAGS support: can't continue\n"); + while (true); + unreachable(); +} +#endif diff --git a/arch/arm/kernel/compat.c b/arch/arm/kernel/atags_compat.c index 925652318b8..5236ad38f41 100644 --- a/arch/arm/kernel/compat.c +++ b/arch/arm/kernel/atags_compat.c @@ -1,5 +1,5 @@ /* - * linux/arch/arm/kernel/compat.c + * linux/arch/arm/kernel/atags_compat.c * * Copyright (C) 2001 Russell King * @@ -26,7 +26,7 @@ #include <asm/mach/arch.h> -#include "compat.h" +#include "atags.h" /* * Usage: diff --git a/arch/arm/kernel/atags_parse.c b/arch/arm/kernel/atags_parse.c new file mode 100644 index 00000000000..7807ef58a2a --- /dev/null +++ b/arch/arm/kernel/atags_parse.c @@ -0,0 +1,239 @@ +/* + * Tag parsing. + * + * Copyright (C) 1995-2001 Russell King + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* + * This is the traditional way of passing data to the kernel at boot time. Rather + * than passing a fixed inflexible structure to the kernel, we pass a list + * of variable-sized tags to the kernel. The first tag must be a ATAG_CORE + * tag for the list to be recognised (to distinguish the tagged list from + * a param_struct). The list is terminated with a zero-length tag (this tag + * is not parsed in any way). + */ + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/fs.h> +#include <linux/root_dev.h> +#include <linux/screen_info.h> +#include <linux/memblock.h> + +#include <asm/setup.h> +#include <asm/system_info.h> +#include <asm/page.h> +#include <asm/mach/arch.h> + +#include "atags.h" + +static char default_command_line[COMMAND_LINE_SIZE] __initdata = CONFIG_CMDLINE; + +#ifndef MEM_SIZE +#define MEM_SIZE (16*1024*1024) +#endif + +static struct { + struct tag_header hdr1; + struct tag_core core; + struct tag_header hdr2; + struct tag_mem32 mem; + struct tag_header hdr3; +} default_tags __initdata = { + { tag_size(tag_core), ATAG_CORE }, + { 1, PAGE_SIZE, 0xff }, + { tag_size(tag_mem32), ATAG_MEM }, + { MEM_SIZE }, + { 0, ATAG_NONE } +}; + +static int __init parse_tag_core(const struct tag *tag) +{ + if (tag->hdr.size > 2) { + if ((tag->u.core.flags & 1) == 0) + root_mountflags &= ~MS_RDONLY; + ROOT_DEV = old_decode_dev(tag->u.core.rootdev); + } + return 0; +} + +__tagtable(ATAG_CORE, parse_tag_core); + +static int __init parse_tag_mem32(const struct tag *tag) +{ + return arm_add_memory(tag->u.mem.start, tag->u.mem.size); +} + +__tagtable(ATAG_MEM, parse_tag_mem32); + +#if defined(CONFIG_VGA_CONSOLE) || defined(CONFIG_DUMMY_CONSOLE) +static int __init parse_tag_videotext(const struct tag *tag) +{ + screen_info.orig_x = tag->u.videotext.x; + screen_info.orig_y = tag->u.videotext.y; + screen_info.orig_video_page = tag->u.videotext.video_page; + screen_info.orig_video_mode = tag->u.videotext.video_mode; + screen_info.orig_video_cols = tag->u.videotext.video_cols; + screen_info.orig_video_ega_bx = tag->u.videotext.video_ega_bx; + screen_info.orig_video_lines = tag->u.videotext.video_lines; + screen_info.orig_video_isVGA = tag->u.videotext.video_isvga; + screen_info.orig_video_points = tag->u.videotext.video_points; + return 0; +} + +__tagtable(ATAG_VIDEOTEXT, parse_tag_videotext); +#endif + +#ifdef CONFIG_BLK_DEV_RAM +static int __init parse_tag_ramdisk(const struct tag *tag) +{ + extern int rd_size, rd_image_start, rd_prompt, rd_doload; + + rd_image_start = tag->u.ramdisk.start; + rd_doload = (tag->u.ramdisk.flags & 1) == 0; + rd_prompt = (tag->u.ramdisk.flags & 2) == 0; + + if (tag->u.ramdisk.size) + rd_size = tag->u.ramdisk.size; + + return 0; +} + +__tagtable(ATAG_RAMDISK, parse_tag_ramdisk); +#endif + +static int __init parse_tag_serialnr(const struct tag *tag) +{ + system_serial_low = tag->u.serialnr.low; + system_serial_high = tag->u.serialnr.high; + return 0; +} + +__tagtable(ATAG_SERIAL, parse_tag_serialnr); + +static int __init parse_tag_revision(const struct tag *tag) +{ + system_rev = tag->u.revision.rev; + return 0; +} + +__tagtable(ATAG_REVISION, parse_tag_revision); + +static int __init parse_tag_cmdline(const struct tag *tag) +{ +#if defined(CONFIG_CMDLINE_EXTEND) + strlcat(default_command_line, " ", COMMAND_LINE_SIZE); + strlcat(default_command_line, tag->u.cmdline.cmdline, + COMMAND_LINE_SIZE); +#elif defined(CONFIG_CMDLINE_FORCE) + pr_warning("Ignoring tag cmdline (using the default kernel command line)\n"); +#else + strlcpy(default_command_line, tag->u.cmdline.cmdline, + COMMAND_LINE_SIZE); +#endif + return 0; +} + +__tagtable(ATAG_CMDLINE, parse_tag_cmdline); + +/* + * Scan the tag table for this tag, and call its parse function. + * The tag table is built by the linker from all the __tagtable + * declarations. + */ +static int __init parse_tag(const struct tag *tag) +{ + extern struct tagtable __tagtable_begin, __tagtable_end; + struct tagtable *t; + + for (t = &__tagtable_begin; t < &__tagtable_end; t++) + if (tag->hdr.tag == t->tag) { + t->parse(tag); + break; + } + + return t < &__tagtable_end; +} + +/* + * Parse all tags in the list, checking both the global and architecture + * specific tag tables. + */ +static void __init parse_tags(const struct tag *t) +{ + for (; t->hdr.size; t = tag_next(t)) + if (!parse_tag(t)) + printk(KERN_WARNING + "Ignoring unrecognised tag 0x%08x\n", + t->hdr.tag); +} + +static void __init squash_mem_tags(struct tag *tag) +{ + for (; tag->hdr.size; tag = tag_next(tag)) + if (tag->hdr.tag == ATAG_MEM) + tag->hdr.tag = ATAG_NONE; +} + +const struct machine_desc * __init +setup_machine_tags(phys_addr_t __atags_pointer, unsigned int machine_nr) +{ + struct tag *tags = (struct tag *)&default_tags; + const struct machine_desc *mdesc = NULL, *p; + char *from = default_command_line; + + default_tags.mem.start = PHYS_OFFSET; + + /* + * locate machine in the list of supported machines. + */ + for_each_machine_desc(p) + if (machine_nr == p->nr) { + printk("Machine: %s\n", p->name); + mdesc = p; + break; + } + + if (!mdesc) { + early_print("\nError: unrecognized/unsupported machine ID" + " (r1 = 0x%08x).\n\n", machine_nr); + dump_machine_table(); /* does not return */ + } + + if (__atags_pointer) + tags = phys_to_virt(__atags_pointer); + else if (mdesc->atag_offset) + tags = (void *)(PAGE_OFFSET + mdesc->atag_offset); + +#if defined(CONFIG_DEPRECATED_PARAM_STRUCT) + /* + * If we have the old style parameters, convert them to + * a tag list. + */ + if (tags->hdr.tag != ATAG_CORE) + convert_to_tag_list(tags); +#endif + if (tags->hdr.tag != ATAG_CORE) { + early_print("Warning: Neither atags nor dtb found\n"); + tags = (struct tag *)&default_tags; + } + + if (mdesc->fixup) + mdesc->fixup(tags, &from); + + if (tags->hdr.tag == ATAG_CORE) { + if (memblock_phys_mem_size()) + squash_mem_tags(tags); + save_atags(tags); + parse_tags(tags); + } + + /* parse_early_param needs a boot_command_line */ + strlcpy(boot_command_line, from, COMMAND_LINE_SIZE); + + return mdesc; +} diff --git a/arch/arm/kernel/atags.c b/arch/arm/kernel/atags_proc.c index 42a1a1415fa..c7ff8073416 100644 --- a/arch/arm/kernel/atags.c +++ b/arch/arm/kernel/atags_proc.c @@ -9,24 +9,18 @@ struct buffer { char data[]; }; -static int -read_buffer(char* page, char** start, off_t off, int count, - int* eof, void* data) +static ssize_t atags_read(struct file *file, char __user *buf, + size_t count, loff_t *ppos) { - struct buffer *buffer = (struct buffer *)data; - - if (off >= buffer->size) { - *eof = 1; - return 0; - } - - count = min((int) (buffer->size - off), count); - - memcpy(page, &buffer->data[off], count); - - return count; + struct buffer *b = PDE_DATA(file_inode(file)); + return simple_read_from_buffer(buf, count, ppos, b->data, b->size); } +static const struct file_operations atags_fops = { + .read = atags_read, + .llseek = default_llseek, +}; + #define BOOT_PARAMS_SIZE 1536 static char __initdata atags_copy[BOOT_PARAMS_SIZE]; @@ -66,9 +60,7 @@ static int __init init_atags_procfs(void) b->size = size; memcpy(b->data, atags_copy, size); - tags_entry = create_proc_read_entry("atags", 0400, - NULL, read_buffer, b); - + tags_entry = proc_create_data("atags", 0400, NULL, &atags_fops, b); if (!tags_entry) goto nomem; diff --git a/arch/arm/kernel/bios32.c b/arch/arm/kernel/bios32.c index c6273a3bfc2..17a26c17f7f 100644 --- a/arch/arm/kernel/bios32.c +++ b/arch/arm/kernel/bios32.c @@ -5,7 +5,7 @@ * * Bits taken from various places. */ -#include <linux/module.h> +#include <linux/export.h> #include <linux/kernel.h> #include <linux/pci.h> #include <linux/slab.h> @@ -13,13 +13,13 @@ #include <linux/io.h> #include <asm/mach-types.h> +#include <asm/mach/map.h> #include <asm/mach/pci.h> static int debug_pci; -static int use_firmware; /* - * We can't use pci_find_device() here since we are + * We can't use pci_get_device() here since we are * called from interrupt context. */ static void pcibios_bus_report_status(struct pci_bus *bus, u_int status_mask, int warn) @@ -57,13 +57,10 @@ static void pcibios_bus_report_status(struct pci_bus *bus, u_int status_mask, in void pcibios_report_status(u_int status_mask, int warn) { - struct list_head *l; - - list_for_each(l, &pci_root_buses) { - struct pci_bus *bus = pci_bus_b(l); + struct pci_bus *bus; + list_for_each_entry(bus, &pci_root_buses, node) pcibios_bus_report_status(bus, status_mask, warn); - } } /* @@ -78,7 +75,7 @@ void pcibios_report_status(u_int status_mask, int warn) * Bug 3 is responsible for the sound DMA grinding to a halt. We now * live with bug 2. */ -static void __devinit pci_fixup_83c553(struct pci_dev *dev) +static void pci_fixup_83c553(struct pci_dev *dev) { /* * Set memory region to start at address 0, and enable IO @@ -130,7 +127,7 @@ static void __devinit pci_fixup_83c553(struct pci_dev *dev) } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_WINBOND, PCI_DEVICE_ID_WINBOND_83C553, pci_fixup_83c553); -static void __devinit pci_fixup_unassign(struct pci_dev *dev) +static void pci_fixup_unassign(struct pci_dev *dev) { dev->resource[0].end -= dev->resource[0].start; dev->resource[0].start = 0; @@ -142,7 +139,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_WINBOND2, PCI_DEVICE_ID_WINBOND2_89C940F, * if it is the host bridge by marking it as such. These resources are of * no consequence to the PCI layer (they are handled elsewhere). */ -static void __devinit pci_fixup_dec21285(struct pci_dev *dev) +static void pci_fixup_dec21285(struct pci_dev *dev) { int i; @@ -159,34 +156,9 @@ static void __devinit pci_fixup_dec21285(struct pci_dev *dev) DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_DEC, PCI_DEVICE_ID_DEC_21285, pci_fixup_dec21285); /* - * Same as above. The PrPMC800 carrier board for the PrPMC1100 - * card maps the host-bridge @ 00:01:00 for some reason and it - * ends up getting scanned. Note that we only want to do this - * fixup when we find the IXP4xx on a PrPMC system, which is why - * we check the machine type. We could be running on a board - * with an IXP4xx target device and we don't want to kill the - * resources in that case. - */ -static void __devinit pci_fixup_prpmc1100(struct pci_dev *dev) -{ - int i; - - if (machine_is_prpmc1100()) { - dev->class &= 0xff; - dev->class |= PCI_CLASS_BRIDGE_HOST << 8; - for (i = 0; i < PCI_NUM_RESOURCES; i++) { - dev->resource[i].start = 0; - dev->resource[i].end = 0; - dev->resource[i].flags = 0; - } - } -} -DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IXP4XX, pci_fixup_prpmc1100); - -/* * PCI IDE controllers use non-standard I/O port decoding, respect it. */ -static void __devinit pci_fixup_ide_bases(struct pci_dev *dev) +static void pci_fixup_ide_bases(struct pci_dev *dev) { struct resource *r; int i; @@ -207,7 +179,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pci_fixup_ide_bases); /* * Put the DEC21142 to sleep */ -static void __devinit pci_fixup_dec21142(struct pci_dev *dev) +static void pci_fixup_dec21142(struct pci_dev *dev) { pci_write_config_dword(dev, 0x40, 0x80000000); } @@ -229,7 +201,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_DEC, PCI_DEVICE_ID_DEC_21142, pci_fixup_d * functional. However, The CY82C693U _does not work_ in bus * master mode without locking the PCI bus solid. */ -static void __devinit pci_fixup_cy82c693(struct pci_dev *dev) +static void pci_fixup_cy82c693(struct pci_dev *dev) { if ((dev->class >> 8) == PCI_CLASS_STORAGE_IDE) { u32 base0, base1; @@ -279,7 +251,7 @@ static void __devinit pci_fixup_cy82c693(struct pci_dev *dev) } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_CONTAQ, PCI_DEVICE_ID_CONTAQ_82C693, pci_fixup_cy82c693); -static void __init pci_fixup_it8152(struct pci_dev *dev) +static void pci_fixup_it8152(struct pci_dev *dev) { int i; /* fixup for ITE 8152 devices */ @@ -296,15 +268,6 @@ static void __init pci_fixup_it8152(struct pci_dev *dev) } DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_ITE, PCI_DEVICE_ID_ITE_8152, pci_fixup_it8152); - - -void __devinit pcibios_update_irq(struct pci_dev *dev, int irq) -{ - if (debug_pci) - printk("PCI: Assigning IRQ %02d to %s\n", irq, pci_name(dev)); - pci_write_config_byte(dev, PCI_INTERRUPT_LINE, irq); -} - /* * If the bus contains any of these devices, then we must not turn on * parity checking of any kind. Currently this is CyberPro 20x0 only. @@ -320,54 +283,14 @@ static inline int pdev_bad_for_parity(struct pci_dev *dev) } /* - * Adjust the device resources from bus-centric to Linux-centric. - */ -static void __devinit -pdev_fixup_device_resources(struct pci_sys_data *root, struct pci_dev *dev) -{ - resource_size_t offset; - int i; - - for (i = 0; i < PCI_NUM_RESOURCES; i++) { - if (dev->resource[i].start == 0) - continue; - if (dev->resource[i].flags & IORESOURCE_MEM) - offset = root->mem_offset; - else - offset = root->io_offset; - - dev->resource[i].start += offset; - dev->resource[i].end += offset; - } -} - -static void __devinit -pbus_assign_bus_resources(struct pci_bus *bus, struct pci_sys_data *root) -{ - struct pci_dev *dev = bus->self; - int i; - - if (!dev) { - /* - * Assign root bus resources. - */ - for (i = 0; i < 3; i++) - bus->resource[i] = root->resource[i]; - } -} - -/* * pcibios_fixup_bus - Called after each bus is probed, * but before its children are examined. */ void pcibios_fixup_bus(struct pci_bus *bus) { - struct pci_sys_data *root = bus->sysdata; struct pci_dev *dev; u16 features = PCI_COMMAND_SERR | PCI_COMMAND_PARITY | PCI_COMMAND_FAST_BACK; - pbus_assign_bus_resources(bus, root); - /* * Walk the devices on this bus, working out what we can * and can't support. @@ -375,8 +298,6 @@ void pcibios_fixup_bus(struct pci_bus *bus) list_for_each_entry(dev, &bus->devices, bus_list) { u16 status; - pdev_fixup_device_resources(root, dev); - pci_read_config_word(dev, PCI_STATUS, &status); /* @@ -437,59 +358,46 @@ void pcibios_fixup_bus(struct pci_bus *bus) printk(KERN_INFO "PCI: bus%d: Fast back to back transfers %sabled\n", bus->number, (features & PCI_COMMAND_FAST_BACK) ? "en" : "dis"); } +EXPORT_SYMBOL(pcibios_fixup_bus); -/* - * Convert from Linux-centric to bus-centric addresses for bridge devices. - */ -void -pcibios_resource_to_bus(struct pci_dev *dev, struct pci_bus_region *region, - struct resource *res) +void pcibios_add_bus(struct pci_bus *bus) { - struct pci_sys_data *root = dev->sysdata; - unsigned long offset = 0; - - if (res->flags & IORESOURCE_IO) - offset = root->io_offset; - if (res->flags & IORESOURCE_MEM) - offset = root->mem_offset; - - region->start = res->start - offset; - region->end = res->end - offset; + struct pci_sys_data *sys = bus->sysdata; + if (sys->add_bus) + sys->add_bus(bus); } -void __devinit -pcibios_bus_to_resource(struct pci_dev *dev, struct resource *res, - struct pci_bus_region *region) +void pcibios_remove_bus(struct pci_bus *bus) { - struct pci_sys_data *root = dev->sysdata; - unsigned long offset = 0; - - if (res->flags & IORESOURCE_IO) - offset = root->io_offset; - if (res->flags & IORESOURCE_MEM) - offset = root->mem_offset; - - res->start = region->start + offset; - res->end = region->end + offset; + struct pci_sys_data *sys = bus->sysdata; + if (sys->remove_bus) + sys->remove_bus(bus); } -#ifdef CONFIG_HOTPLUG -EXPORT_SYMBOL(pcibios_fixup_bus); -EXPORT_SYMBOL(pcibios_resource_to_bus); -EXPORT_SYMBOL(pcibios_bus_to_resource); -#endif - /* - * Swizzle the device pin each time we cross a bridge. - * This might update pin and returns the slot number. + * Swizzle the device pin each time we cross a bridge. If a platform does + * not provide a swizzle function, we perform the standard PCI swizzling. + * + * The default swizzling walks up the bus tree one level at a time, applying + * the standard swizzle function at each step, stopping when it finds the PCI + * root bus. This will return the slot number of the bridge device on the + * root bus and the interrupt pin on that device which should correspond + * with the downstream device interrupt. + * + * Platforms may override this, in which case the slot and pin returned + * depend entirely on the platform code. However, please note that the + * PCI standard swizzle is implemented on plug-in cards and Cardbus based + * PCI extenders, so it can not be ignored. */ -static u8 __devinit pcibios_swizzle(struct pci_dev *dev, u8 *pin) +static u8 pcibios_swizzle(struct pci_dev *dev, u8 *pin) { struct pci_sys_data *sys = dev->sysdata; - int slot = 0, oldpin = *pin; + int slot, oldpin = *pin; if (sys->swizzle) slot = sys->swizzle(dev, pin); + else + slot = pci_common_swizzle(dev, pin); if (debug_pci) printk("PCI: %s swizzling pin %d => pin %d slot %d\n", @@ -501,7 +409,7 @@ static u8 __devinit pcibios_swizzle(struct pci_dev *dev, u8 *pin) /* * Map a slot/pin to an IRQ. */ -static int pcibios_map_irq(struct pci_dev *dev, u8 slot, u8 pin) +static int pcibios_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) { struct pci_sys_data *sys = dev->sysdata; int irq = -1; @@ -516,7 +424,40 @@ static int pcibios_map_irq(struct pci_dev *dev, u8 slot, u8 pin) return irq; } -static void __init pcibios_init_hw(struct hw_pci *hw) +static int pcibios_init_resources(int busnr, struct pci_sys_data *sys) +{ + int ret; + struct pci_host_bridge_window *window; + + if (list_empty(&sys->resources)) { + pci_add_resource_offset(&sys->resources, + &iomem_resource, sys->mem_offset); + } + + list_for_each_entry(window, &sys->resources, list) { + if (resource_type(window->res) == IORESOURCE_IO) + return 0; + } + + sys->io_res.start = (busnr * SZ_64K) ? : pcibios_min_io; + sys->io_res.end = (busnr + 1) * SZ_64K - 1; + sys->io_res.flags = IORESOURCE_IO; + sys->io_res.name = sys->io_res_name; + sprintf(sys->io_res_name, "PCI%d I/O", busnr); + + ret = request_resource(&ioport_resource, &sys->io_res); + if (ret) { + pr_err("PCI: unable to allocate I/O port region (%d)\n", ret); + return ret; + } + pci_add_resource_offset(&sys->resources, &sys->io_res, + sys->io_offset); + + return 0; +} + +static void pcibios_init_hw(struct device *parent, struct hw_pci *hw, + struct list_head *head) { struct pci_sys_data *sys = NULL; int ret; @@ -530,24 +471,38 @@ static void __init pcibios_init_hw(struct hw_pci *hw) #ifdef CONFIG_PCI_DOMAINS sys->domain = hw->domain; #endif - sys->hw = hw; sys->busnr = busnr; sys->swizzle = hw->swizzle; sys->map_irq = hw->map_irq; - sys->resource[0] = &ioport_resource; - sys->resource[1] = &iomem_resource; + sys->align_resource = hw->align_resource; + sys->add_bus = hw->add_bus; + sys->remove_bus = hw->remove_bus; + INIT_LIST_HEAD(&sys->resources); + + if (hw->private_data) + sys->private_data = hw->private_data[nr]; ret = hw->setup(nr, sys); if (ret > 0) { - sys->bus = hw->scan(nr, sys); + ret = pcibios_init_resources(nr, sys); + if (ret) { + kfree(sys); + break; + } + + if (hw->scan) + sys->bus = hw->scan(nr, sys); + else + sys->bus = pci_scan_root_bus(parent, sys->busnr, + hw->ops, sys, &sys->resources); if (!sys->bus) panic("PCI: unable to scan bus!"); - busnr = sys->bus->subordinate + 1; + busnr = sys->bus->busn_res.end + 1; - list_add(&sys->node, &hw->buses); + list_add(&sys->node, head); } else { kfree(sys); if (ret < 0) @@ -556,24 +511,24 @@ static void __init pcibios_init_hw(struct hw_pci *hw) } } -void __init pci_common_init(struct hw_pci *hw) +void pci_common_init_dev(struct device *parent, struct hw_pci *hw) { struct pci_sys_data *sys; + LIST_HEAD(head); - INIT_LIST_HEAD(&hw->buses); - + pci_add_flags(PCI_REASSIGN_ALL_RSRC); if (hw->preinit) hw->preinit(); - pcibios_init_hw(hw); + pcibios_init_hw(parent, hw, &head); if (hw->postinit) hw->postinit(); pci_fixup_irqs(pcibios_swizzle, pcibios_map_irq); - list_for_each_entry(sys, &hw->buses, node) { + list_for_each_entry(sys, &head, node) { struct pci_bus *bus = sys->bus; - if (!use_firmware) { + if (!pci_has_flag(PCI_PROBE_ONLY)) { /* * Size the bridge windows. */ @@ -590,15 +545,34 @@ void __init pci_common_init(struct hw_pci *hw) */ pci_bus_add_devices(bus); } + + list_for_each_entry(sys, &head, node) { + struct pci_bus *bus = sys->bus; + + /* Configure PCI Express settings */ + if (bus && !pci_has_flag(PCI_PROBE_ONLY)) { + struct pci_bus *child; + + list_for_each_entry(child, &bus->children, node) + pcie_bus_configure_settings(child); + } + } } +#ifndef CONFIG_PCI_HOST_ITE8152 +void pcibios_set_master(struct pci_dev *dev) +{ + /* No special bus mastering setup handling */ +} +#endif + char * __init pcibios_setup(char *str) { if (!strcmp(str, "debug")) { debug_pci = 1; return NULL; } else if (!strcmp(str, "firmware")) { - use_firmware = 1; + pci_add_flags(PCI_PROBE_ONLY); return NULL; } return str; @@ -622,6 +596,8 @@ char * __init pcibios_setup(char *str) resource_size_t pcibios_align_resource(void *data, const struct resource *res, resource_size_t size, resource_size_t align) { + struct pci_dev *dev = data; + struct pci_sys_data *sys = dev->sysdata; resource_size_t start = res->start; if (res->flags & IORESOURCE_IO && start & 0x300) @@ -629,6 +605,9 @@ resource_size_t pcibios_align_resource(void *data, const struct resource *res, start = (start + align - 1) & ~(align - 1); + if (sys->align_resource) + return sys->align_resource(dev, res, start, size, align); + return start; } @@ -638,41 +617,10 @@ resource_size_t pcibios_align_resource(void *data, const struct resource *res, */ int pcibios_enable_device(struct pci_dev *dev, int mask) { - u16 cmd, old_cmd; - int idx; - struct resource *r; - - pci_read_config_word(dev, PCI_COMMAND, &cmd); - old_cmd = cmd; - for (idx = 0; idx < 6; idx++) { - /* Only set up the requested stuff */ - if (!(mask & (1 << idx))) - continue; + if (pci_has_flag(PCI_PROBE_ONLY)) + return 0; - r = dev->resource + idx; - if (!r->start && r->end) { - printk(KERN_ERR "PCI: Device %s not available because" - " of resource collisions\n", pci_name(dev)); - return -EINVAL; - } - if (r->flags & IORESOURCE_IO) - cmd |= PCI_COMMAND_IO; - if (r->flags & IORESOURCE_MEM) - cmd |= PCI_COMMAND_MEMORY; - } - - /* - * Bridges (eg, cardbus bridges) need to be fully enabled - */ - if ((dev->class >> 16) == PCI_BASE_CLASS_BRIDGE) - cmd |= PCI_COMMAND_IO | PCI_COMMAND_MEMORY; - - if (cmd != old_cmd) { - printk("PCI: enabling device %s (%04x -> %04x)\n", - pci_name(dev), old_cmd, cmd); - pci_write_config_word(dev, PCI_COMMAND, cmd); - } - return 0; + return pci_enable_resources(dev, mask); } int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, @@ -699,3 +647,15 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma, return 0; } + +void __init pci_map_io_early(unsigned long pfn) +{ + struct map_desc pci_io_desc = { + .virtual = PCI_IO_VIRT_BASE, + .type = MT_DEVICE, + .length = SZ_64K, + }; + + pci_io_desc.pfn = pfn; + iotable_init(&pci_io_desc, 1); +} diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S index 5c26eccef99..8f51bdcdacb 100644 --- a/arch/arm/kernel/calls.S +++ b/arch/arm/kernel/calls.S @@ -11,7 +11,7 @@ */ /* 0 */ CALL(sys_restart_syscall) CALL(sys_exit) - CALL(sys_fork_wrapper) + CALL(sys_fork) CALL(sys_read) CALL(sys_write) /* 5 */ CALL(sys_open) @@ -20,7 +20,7 @@ CALL(sys_creat) CALL(sys_link) /* 10 */ CALL(sys_unlink) - CALL(sys_execve_wrapper) + CALL(sys_execve) CALL(sys_chdir) CALL(OBSOLETE(sys_time)) /* used by libc4 */ CALL(sys_mknod) @@ -129,7 +129,7 @@ CALL(OBSOLETE(ABI(sys_ipc, sys_oabi_ipc))) CALL(sys_fsync) CALL(sys_sigreturn_wrapper) -/* 120 */ CALL(sys_clone_wrapper) +/* 120 */ CALL(sys_clone) CALL(sys_setdomainname) CALL(sys_newuname) CALL(sys_ni_syscall) /* modify_ldt */ @@ -178,7 +178,7 @@ CALL(sys_ni_syscall) /* vm86 */ CALL(sys_ni_syscall) /* was sys_query_module */ CALL(sys_poll) - CALL(sys_nfsservctl) + CALL(sys_ni_syscall) /* was nfsservctl */ /* 170 */ CALL(sys_setresgid16) CALL(sys_getresgid16) CALL(sys_prctl) @@ -195,11 +195,11 @@ CALL(sys_getcwd) CALL(sys_capget) /* 185 */ CALL(sys_capset) - CALL(sys_sigaltstack_wrapper) + CALL(sys_sigaltstack) CALL(sys_sendfile) CALL(sys_ni_syscall) /* getpmsg */ CALL(sys_ni_syscall) /* putpmsg */ -/* 190 */ CALL(sys_vfork_wrapper) +/* 190 */ CALL(sys_vfork) CALL(sys_getrlimit) CALL(sys_mmap2) CALL(ABI(sys_truncate64, sys_oabi_truncate64)) @@ -379,6 +379,19 @@ CALL(sys_fanotify_init) CALL(sys_fanotify_mark) CALL(sys_prlimit64) +/* 370 */ CALL(sys_name_to_handle_at) + CALL(sys_open_by_handle_at) + CALL(sys_clock_adjtime) + CALL(sys_syncfs) + CALL(sys_sendmmsg) +/* 375 */ CALL(sys_setns) + CALL(sys_process_vm_readv) + CALL(sys_process_vm_writev) + CALL(sys_kcmp) + CALL(sys_finit_module) +/* 380 */ CALL(sys_sched_setattr) + CALL(sys_sched_getattr) + CALL(sys_renameat2) #ifndef syscalls_counted .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls #define syscalls_counted diff --git a/arch/arm/kernel/compat.h b/arch/arm/kernel/compat.h deleted file mode 100644 index 39264ab1b9c..00000000000 --- a/arch/arm/kernel/compat.h +++ /dev/null @@ -1,11 +0,0 @@ -/* - * linux/arch/arm/kernel/compat.h - * - * Copyright (C) 2001 Russell King - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. -*/ - -extern void convert_to_tag_list(struct tag *tags); diff --git a/arch/arm/kernel/cpuidle.c b/arch/arm/kernel/cpuidle.c new file mode 100644 index 00000000000..89545f6c840 --- /dev/null +++ b/arch/arm/kernel/cpuidle.c @@ -0,0 +1,21 @@ +/* + * Copyright 2012 Linaro Ltd. + * + * The code contained herein is licensed under the GNU General Public + * License. You may obtain a copy of the GNU General Public License + * Version 2 or later at the following locations: + * + * http://www.opensource.org/licenses/gpl-license.html + * http://www.gnu.org/copyleft/gpl.html + */ + +#include <linux/cpuidle.h> +#include <asm/proc-fns.h> + +int arm_cpuidle_simple_enter(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index) +{ + cpu_do_idle(); + + return index; +} diff --git a/arch/arm/kernel/crash_dump.c b/arch/arm/kernel/crash_dump.c index cd3b853a8a6..5d1286d5115 100644 --- a/arch/arm/kernel/crash_dump.c +++ b/arch/arm/kernel/crash_dump.c @@ -18,9 +18,6 @@ #include <linux/uaccess.h> #include <linux/io.h> -/* stores the physical address of elf header of crash image */ -unsigned long long elfcorehdr_addr = ELFCORE_ADDR_MAX; - /** * copy_oldmem_page() - copy one page from old kernel memory * @pfn: page frame number to be copied @@ -42,7 +39,7 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf, if (!csize) return 0; - vaddr = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE); + vaddr = ioremap(__pfn_to_phys(pfn), PAGE_SIZE); if (!vaddr) return -ENOMEM; diff --git a/arch/arm/kernel/crunch-bits.S b/arch/arm/kernel/crunch-bits.S deleted file mode 100644 index 0ec9bb48fab..00000000000 --- a/arch/arm/kernel/crunch-bits.S +++ /dev/null @@ -1,305 +0,0 @@ -/* - * arch/arm/kernel/crunch-bits.S - * Cirrus MaverickCrunch context switching and handling - * - * Copyright (C) 2006 Lennert Buytenhek <buytenh@wantstofly.org> - * - * Shamelessly stolen from the iWMMXt code by Nicolas Pitre, which is - * Copyright (c) 2003-2004, MontaVista Software, Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/linkage.h> -#include <asm/ptrace.h> -#include <asm/thread_info.h> -#include <asm/asm-offsets.h> -#include <mach/ep93xx-regs.h> - -/* - * We can't use hex constants here due to a bug in gas. - */ -#define CRUNCH_MVDX0 0 -#define CRUNCH_MVDX1 8 -#define CRUNCH_MVDX2 16 -#define CRUNCH_MVDX3 24 -#define CRUNCH_MVDX4 32 -#define CRUNCH_MVDX5 40 -#define CRUNCH_MVDX6 48 -#define CRUNCH_MVDX7 56 -#define CRUNCH_MVDX8 64 -#define CRUNCH_MVDX9 72 -#define CRUNCH_MVDX10 80 -#define CRUNCH_MVDX11 88 -#define CRUNCH_MVDX12 96 -#define CRUNCH_MVDX13 104 -#define CRUNCH_MVDX14 112 -#define CRUNCH_MVDX15 120 -#define CRUNCH_MVAX0L 128 -#define CRUNCH_MVAX0M 132 -#define CRUNCH_MVAX0H 136 -#define CRUNCH_MVAX1L 140 -#define CRUNCH_MVAX1M 144 -#define CRUNCH_MVAX1H 148 -#define CRUNCH_MVAX2L 152 -#define CRUNCH_MVAX2M 156 -#define CRUNCH_MVAX2H 160 -#define CRUNCH_MVAX3L 164 -#define CRUNCH_MVAX3M 168 -#define CRUNCH_MVAX3H 172 -#define CRUNCH_DSPSC 176 - -#define CRUNCH_SIZE 184 - - .text - -/* - * Lazy switching of crunch coprocessor context - * - * r10 = struct thread_info pointer - * r9 = ret_from_exception - * lr = undefined instr exit - * - * called from prefetch exception handler with interrupts disabled - */ -ENTRY(crunch_task_enable) - ldr r8, =(EP93XX_APB_VIRT_BASE + 0x00130000) @ syscon addr - - ldr r1, [r8, #0x80] - tst r1, #0x00800000 @ access to crunch enabled? - movne pc, lr @ if so no business here - mov r3, #0xaa @ unlock syscon swlock - str r3, [r8, #0xc0] - orr r1, r1, #0x00800000 @ enable access to crunch - str r1, [r8, #0x80] - - ldr r3, =crunch_owner - add r0, r10, #TI_CRUNCH_STATE @ get task crunch save area - ldr r2, [sp, #60] @ current task pc value - ldr r1, [r3] @ get current crunch owner - str r0, [r3] @ this task now owns crunch - sub r2, r2, #4 @ adjust pc back - str r2, [sp, #60] - - ldr r2, [r8, #0x80] - mov r2, r2 @ flush out enable (@@@) - - teq r1, #0 @ test for last ownership - mov lr, r9 @ normal exit from exception - beq crunch_load @ no owner, skip save - -crunch_save: - cfstr64 mvdx0, [r1, #CRUNCH_MVDX0] @ save 64b registers - cfstr64 mvdx1, [r1, #CRUNCH_MVDX1] - cfstr64 mvdx2, [r1, #CRUNCH_MVDX2] - cfstr64 mvdx3, [r1, #CRUNCH_MVDX3] - cfstr64 mvdx4, [r1, #CRUNCH_MVDX4] - cfstr64 mvdx5, [r1, #CRUNCH_MVDX5] - cfstr64 mvdx6, [r1, #CRUNCH_MVDX6] - cfstr64 mvdx7, [r1, #CRUNCH_MVDX7] - cfstr64 mvdx8, [r1, #CRUNCH_MVDX8] - cfstr64 mvdx9, [r1, #CRUNCH_MVDX9] - cfstr64 mvdx10, [r1, #CRUNCH_MVDX10] - cfstr64 mvdx11, [r1, #CRUNCH_MVDX11] - cfstr64 mvdx12, [r1, #CRUNCH_MVDX12] - cfstr64 mvdx13, [r1, #CRUNCH_MVDX13] - cfstr64 mvdx14, [r1, #CRUNCH_MVDX14] - cfstr64 mvdx15, [r1, #CRUNCH_MVDX15] - -#ifdef __ARMEB__ -#error fix me for ARMEB -#endif - - cfmv32al mvfx0, mvax0 @ save 72b accumulators - cfstr32 mvfx0, [r1, #CRUNCH_MVAX0L] - cfmv32am mvfx0, mvax0 - cfstr32 mvfx0, [r1, #CRUNCH_MVAX0M] - cfmv32ah mvfx0, mvax0 - cfstr32 mvfx0, [r1, #CRUNCH_MVAX0H] - cfmv32al mvfx0, mvax1 - cfstr32 mvfx0, [r1, #CRUNCH_MVAX1L] - cfmv32am mvfx0, mvax1 - cfstr32 mvfx0, [r1, #CRUNCH_MVAX1M] - cfmv32ah mvfx0, mvax1 - cfstr32 mvfx0, [r1, #CRUNCH_MVAX1H] - cfmv32al mvfx0, mvax2 - cfstr32 mvfx0, [r1, #CRUNCH_MVAX2L] - cfmv32am mvfx0, mvax2 - cfstr32 mvfx0, [r1, #CRUNCH_MVAX2M] - cfmv32ah mvfx0, mvax2 - cfstr32 mvfx0, [r1, #CRUNCH_MVAX2H] - cfmv32al mvfx0, mvax3 - cfstr32 mvfx0, [r1, #CRUNCH_MVAX3L] - cfmv32am mvfx0, mvax3 - cfstr32 mvfx0, [r1, #CRUNCH_MVAX3M] - cfmv32ah mvfx0, mvax3 - cfstr32 mvfx0, [r1, #CRUNCH_MVAX3H] - - cfmv32sc mvdx0, dspsc @ save status word - cfstr64 mvdx0, [r1, #CRUNCH_DSPSC] - - teq r0, #0 @ anything to load? - cfldr64eq mvdx0, [r1, #CRUNCH_MVDX0] @ mvdx0 was clobbered - moveq pc, lr - -crunch_load: - cfldr64 mvdx0, [r0, #CRUNCH_DSPSC] @ load status word - cfmvsc32 dspsc, mvdx0 - - cfldr32 mvfx0, [r0, #CRUNCH_MVAX0L] @ load 72b accumulators - cfmval32 mvax0, mvfx0 - cfldr32 mvfx0, [r0, #CRUNCH_MVAX0M] - cfmvam32 mvax0, mvfx0 - cfldr32 mvfx0, [r0, #CRUNCH_MVAX0H] - cfmvah32 mvax0, mvfx0 - cfldr32 mvfx0, [r0, #CRUNCH_MVAX1L] - cfmval32 mvax1, mvfx0 - cfldr32 mvfx0, [r0, #CRUNCH_MVAX1M] - cfmvam32 mvax1, mvfx0 - cfldr32 mvfx0, [r0, #CRUNCH_MVAX1H] - cfmvah32 mvax1, mvfx0 - cfldr32 mvfx0, [r0, #CRUNCH_MVAX2L] - cfmval32 mvax2, mvfx0 - cfldr32 mvfx0, [r0, #CRUNCH_MVAX2M] - cfmvam32 mvax2, mvfx0 - cfldr32 mvfx0, [r0, #CRUNCH_MVAX2H] - cfmvah32 mvax2, mvfx0 - cfldr32 mvfx0, [r0, #CRUNCH_MVAX3L] - cfmval32 mvax3, mvfx0 - cfldr32 mvfx0, [r0, #CRUNCH_MVAX3M] - cfmvam32 mvax3, mvfx0 - cfldr32 mvfx0, [r0, #CRUNCH_MVAX3H] - cfmvah32 mvax3, mvfx0 - - cfldr64 mvdx0, [r0, #CRUNCH_MVDX0] @ load 64b registers - cfldr64 mvdx1, [r0, #CRUNCH_MVDX1] - cfldr64 mvdx2, [r0, #CRUNCH_MVDX2] - cfldr64 mvdx3, [r0, #CRUNCH_MVDX3] - cfldr64 mvdx4, [r0, #CRUNCH_MVDX4] - cfldr64 mvdx5, [r0, #CRUNCH_MVDX5] - cfldr64 mvdx6, [r0, #CRUNCH_MVDX6] - cfldr64 mvdx7, [r0, #CRUNCH_MVDX7] - cfldr64 mvdx8, [r0, #CRUNCH_MVDX8] - cfldr64 mvdx9, [r0, #CRUNCH_MVDX9] - cfldr64 mvdx10, [r0, #CRUNCH_MVDX10] - cfldr64 mvdx11, [r0, #CRUNCH_MVDX11] - cfldr64 mvdx12, [r0, #CRUNCH_MVDX12] - cfldr64 mvdx13, [r0, #CRUNCH_MVDX13] - cfldr64 mvdx14, [r0, #CRUNCH_MVDX14] - cfldr64 mvdx15, [r0, #CRUNCH_MVDX15] - - mov pc, lr - -/* - * Back up crunch regs to save area and disable access to them - * (mainly for gdb or sleep mode usage) - * - * r0 = struct thread_info pointer of target task or NULL for any - */ -ENTRY(crunch_task_disable) - stmfd sp!, {r4, r5, lr} - - mrs ip, cpsr - orr r2, ip, #PSR_I_BIT @ disable interrupts - msr cpsr_c, r2 - - ldr r4, =(EP93XX_APB_VIRT_BASE + 0x00130000) @ syscon addr - - ldr r3, =crunch_owner - add r2, r0, #TI_CRUNCH_STATE @ get task crunch save area - ldr r1, [r3] @ get current crunch owner - teq r1, #0 @ any current owner? - beq 1f @ no: quit - teq r0, #0 @ any owner? - teqne r1, r2 @ or specified one? - bne 1f @ no: quit - - ldr r5, [r4, #0x80] @ enable access to crunch - mov r2, #0xaa - str r2, [r4, #0xc0] - orr r5, r5, #0x00800000 - str r5, [r4, #0x80] - - mov r0, #0 @ nothing to load - str r0, [r3] @ no more current owner - ldr r2, [r4, #0x80] @ flush out enable (@@@) - mov r2, r2 - bl crunch_save - - mov r2, #0xaa @ disable access to crunch - str r2, [r4, #0xc0] - bic r5, r5, #0x00800000 - str r5, [r4, #0x80] - ldr r5, [r4, #0x80] @ flush out enable (@@@) - mov r5, r5 - -1: msr cpsr_c, ip @ restore interrupt mode - ldmfd sp!, {r4, r5, pc} - -/* - * Copy crunch state to given memory address - * - * r0 = struct thread_info pointer of target task - * r1 = memory address where to store crunch state - * - * this is called mainly in the creation of signal stack frames - */ -ENTRY(crunch_task_copy) - mrs ip, cpsr - orr r2, ip, #PSR_I_BIT @ disable interrupts - msr cpsr_c, r2 - - ldr r3, =crunch_owner - add r2, r0, #TI_CRUNCH_STATE @ get task crunch save area - ldr r3, [r3] @ get current crunch owner - teq r2, r3 @ does this task own it... - beq 1f - - @ current crunch values are in the task save area - msr cpsr_c, ip @ restore interrupt mode - mov r0, r1 - mov r1, r2 - mov r2, #CRUNCH_SIZE - b memcpy - -1: @ this task owns crunch regs -- grab a copy from there - mov r0, #0 @ nothing to load - mov r3, lr @ preserve return address - bl crunch_save - msr cpsr_c, ip @ restore interrupt mode - mov pc, r3 - -/* - * Restore crunch state from given memory address - * - * r0 = struct thread_info pointer of target task - * r1 = memory address where to get crunch state from - * - * this is used to restore crunch state when unwinding a signal stack frame - */ -ENTRY(crunch_task_restore) - mrs ip, cpsr - orr r2, ip, #PSR_I_BIT @ disable interrupts - msr cpsr_c, r2 - - ldr r3, =crunch_owner - add r2, r0, #TI_CRUNCH_STATE @ get task crunch save area - ldr r3, [r3] @ get current crunch owner - teq r2, r3 @ does this task own it... - beq 1f - - @ this task doesn't own crunch regs -- use its save area - msr cpsr_c, ip @ restore interrupt mode - mov r0, r2 - mov r2, #CRUNCH_SIZE - b memcpy - -1: @ this task owns crunch regs -- load them directly - mov r0, r1 - mov r1, #0 @ nothing to save - mov r3, lr @ preserve return address - bl crunch_load - msr cpsr_c, ip @ restore interrupt mode - mov pc, r3 diff --git a/arch/arm/kernel/crunch.c b/arch/arm/kernel/crunch.c deleted file mode 100644 index 25ef223ba7f..00000000000 --- a/arch/arm/kernel/crunch.c +++ /dev/null @@ -1,88 +0,0 @@ -/* - * arch/arm/kernel/crunch.c - * Cirrus MaverickCrunch context switching and handling - * - * Copyright (C) 2006 Lennert Buytenhek <buytenh@wantstofly.org> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/module.h> -#include <linux/types.h> -#include <linux/kernel.h> -#include <linux/signal.h> -#include <linux/sched.h> -#include <linux/init.h> -#include <linux/io.h> -#include <mach/ep93xx-regs.h> -#include <asm/thread_notify.h> - -struct crunch_state *crunch_owner; - -void crunch_task_release(struct thread_info *thread) -{ - local_irq_disable(); - if (crunch_owner == &thread->crunchstate) - crunch_owner = NULL; - local_irq_enable(); -} - -static int crunch_enabled(u32 devcfg) -{ - return !!(devcfg & EP93XX_SYSCON_DEVCFG_CPENA); -} - -static int crunch_do(struct notifier_block *self, unsigned long cmd, void *t) -{ - struct thread_info *thread = (struct thread_info *)t; - struct crunch_state *crunch_state; - u32 devcfg; - - crunch_state = &thread->crunchstate; - - switch (cmd) { - case THREAD_NOTIFY_FLUSH: - memset(crunch_state, 0, sizeof(*crunch_state)); - - /* - * FALLTHROUGH: Ensure we don't try to overwrite our newly - * initialised state information on the first fault. - */ - - case THREAD_NOTIFY_EXIT: - crunch_task_release(thread); - break; - - case THREAD_NOTIFY_SWITCH: - devcfg = __raw_readl(EP93XX_SYSCON_DEVCFG); - if (crunch_enabled(devcfg) || crunch_owner == crunch_state) { - /* - * We don't use ep93xx_syscon_swlocked_write() here - * because we are on the context switch path and - * preemption is already disabled. - */ - devcfg ^= EP93XX_SYSCON_DEVCFG_CPENA; - __raw_writel(0xaa, EP93XX_SYSCON_SWLOCK); - __raw_writel(devcfg, EP93XX_SYSCON_DEVCFG); - } - break; - } - - return NOTIFY_DONE; -} - -static struct notifier_block crunch_notifier_block = { - .notifier_call = crunch_do, -}; - -static int __init crunch_init(void) -{ - thread_register_notifier(&crunch_notifier_block); - elf_hwcap |= HWCAP_CRUNCH; - - return 0; -} - -late_initcall(crunch_init); diff --git a/arch/arm/kernel/debug.S b/arch/arm/kernel/debug.S index a0f07521ca8..14f7c3b1463 100644 --- a/arch/arm/kernel/debug.S +++ b/arch/arm/kernel/debug.S @@ -10,6 +10,7 @@ * 32-bit debugging code */ #include <linux/linkage.h> +#include <asm/assembler.h> .text @@ -19,111 +20,13 @@ * references to these in a production kernel! */ -#if defined(CONFIG_DEBUG_ICEDCC) - @@ debug using ARM EmbeddedICE DCC channel - - .macro addruart, rp, rv - .endm - -#if defined(CONFIG_CPU_V6) - - .macro senduart, rd, rx - mcr p14, 0, \rd, c0, c5, 0 - .endm - - .macro busyuart, rd, rx -1001: - mrc p14, 0, \rx, c0, c1, 0 - tst \rx, #0x20000000 - beq 1001b - .endm - - .macro waituart, rd, rx - mov \rd, #0x2000000 -1001: - subs \rd, \rd, #1 - bmi 1002f - mrc p14, 0, \rx, c0, c1, 0 - tst \rx, #0x20000000 - bne 1001b -1002: - .endm - -#elif defined(CONFIG_CPU_V7) - - .macro senduart, rd, rx - mcr p14, 0, \rd, c0, c5, 0 - .endm - - .macro busyuart, rd, rx -busy: mrc p14, 0, pc, c0, c1, 0 - bcs busy - .endm - - .macro waituart, rd, rx -wait: mrc p14, 0, pc, c0, c1, 0 - bcs wait - - .endm - -#elif defined(CONFIG_CPU_XSCALE) - - .macro senduart, rd, rx - mcr p14, 0, \rd, c8, c0, 0 - .endm - - .macro busyuart, rd, rx -1001: - mrc p14, 0, \rx, c14, c0, 0 - tst \rx, #0x10000000 - beq 1001b - .endm - - .macro waituart, rd, rx - mov \rd, #0x10000000 -1001: - subs \rd, \rd, #1 - bmi 1002f - mrc p14, 0, \rx, c14, c0, 0 - tst \rx, #0x10000000 - bne 1001b -1002: - .endm - -#else - - .macro senduart, rd, rx - mcr p14, 0, \rd, c1, c0, 0 - .endm - - .macro busyuart, rd, rx -1001: - mrc p14, 0, \rx, c0, c0, 0 - tst \rx, #2 - beq 1001b - - .endm - - .macro waituart, rd, rx - mov \rd, #0x2000000 -1001: - subs \rd, \rd, #1 - bmi 1002f - mrc p14, 0, \rx, c0, c0, 0 - tst \rx, #2 - bne 1001b -1002: - .endm - -#endif /* CONFIG_CPU_V6 */ - -#else -#include <mach/debug-macro.S> -#endif /* CONFIG_DEBUG_ICEDCC */ +#if !defined(CONFIG_DEBUG_SEMIHOSTING) +#include CONFIG_DEBUG_LL_INCLUDE +#endif #ifdef CONFIG_MMU .macro addruart_current, rx, tmp1, tmp2 - addruart \tmp1, \tmp2 + addruart \tmp1, \tmp2, \rx mrc p15, 0, \rx, c1, c0 tst \rx, #1 moveq \rx, \tmp1 @@ -168,8 +71,12 @@ printhex: adr r2, hexbuf b printascii ENDPROC(printhex2) +hexbuf: .space 16 + .ltorg +#ifndef CONFIG_DEBUG_SEMIHOSTING + ENTRY(printascii) addruart_current r3, r1, r2 b 2f @@ -193,4 +100,39 @@ ENTRY(printch) b 1b ENDPROC(printch) -hexbuf: .space 16 +#ifdef CONFIG_MMU +ENTRY(debug_ll_addr) + addruart r2, r3, ip + str r2, [r0] + str r3, [r1] + mov pc, lr +ENDPROC(debug_ll_addr) +#endif + +#else + +ENTRY(printascii) + mov r1, r0 + mov r0, #0x04 @ SYS_WRITE0 + ARM( svc #0x123456 ) + THUMB( svc #0xab ) + mov pc, lr +ENDPROC(printascii) + +ENTRY(printch) + adr r1, hexbuf + strb r0, [r1] + mov r0, #0x03 @ SYS_WRITEC + ARM( svc #0x123456 ) + THUMB( svc #0xab ) + mov pc, lr +ENDPROC(printch) + +ENTRY(debug_ll_addr) + mov r2, #0 + str r2, [r0] + str r2, [r1] + mov pc, lr +ENDPROC(debug_ll_addr) + +#endif diff --git a/arch/arm/kernel/devtree.c b/arch/arm/kernel/devtree.c new file mode 100644 index 00000000000..11c54de9f8c --- /dev/null +++ b/arch/arm/kernel/devtree.c @@ -0,0 +1,250 @@ +/* + * linux/arch/arm/kernel/devtree.c + * + * Copyright (C) 2009 Canonical Ltd. <jeremy.kerr@canonical.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/init.h> +#include <linux/export.h> +#include <linux/errno.h> +#include <linux/types.h> +#include <linux/bootmem.h> +#include <linux/memblock.h> +#include <linux/of.h> +#include <linux/of_fdt.h> +#include <linux/of_irq.h> +#include <linux/of_platform.h> +#include <linux/smp.h> + +#include <asm/cputype.h> +#include <asm/setup.h> +#include <asm/page.h> +#include <asm/smp_plat.h> +#include <asm/mach/arch.h> +#include <asm/mach-types.h> + + +#ifdef CONFIG_SMP +extern struct of_cpu_method __cpu_method_of_table[]; + +static const struct of_cpu_method __cpu_method_of_table_sentinel + __used __section(__cpu_method_of_table_end); + + +static int __init set_smp_ops_by_method(struct device_node *node) +{ + const char *method; + struct of_cpu_method *m = __cpu_method_of_table; + + if (of_property_read_string(node, "enable-method", &method)) + return 0; + + for (; m->method; m++) + if (!strcmp(m->method, method)) { + smp_set_ops(m->ops); + return 1; + } + + return 0; +} +#else +static inline int set_smp_ops_by_method(struct device_node *node) +{ + return 1; +} +#endif + + +/* + * arm_dt_init_cpu_maps - Function retrieves cpu nodes from the device tree + * and builds the cpu logical map array containing MPIDR values related to + * logical cpus + * + * Updates the cpu possible mask with the number of parsed cpu nodes + */ +void __init arm_dt_init_cpu_maps(void) +{ + /* + * Temp logical map is initialized with UINT_MAX values that are + * considered invalid logical map entries since the logical map must + * contain a list of MPIDR[23:0] values where MPIDR[31:24] must + * read as 0. + */ + struct device_node *cpu, *cpus; + int found_method = 0; + u32 i, j, cpuidx = 1; + u32 mpidr = is_smp() ? read_cpuid_mpidr() & MPIDR_HWID_BITMASK : 0; + + u32 tmp_map[NR_CPUS] = { [0 ... NR_CPUS-1] = MPIDR_INVALID }; + bool bootcpu_valid = false; + cpus = of_find_node_by_path("/cpus"); + + if (!cpus) + return; + + for_each_child_of_node(cpus, cpu) { + u32 hwid; + + if (of_node_cmp(cpu->type, "cpu")) + continue; + + pr_debug(" * %s...\n", cpu->full_name); + /* + * A device tree containing CPU nodes with missing "reg" + * properties is considered invalid to build the + * cpu_logical_map. + */ + if (of_property_read_u32(cpu, "reg", &hwid)) { + pr_debug(" * %s missing reg property\n", + cpu->full_name); + return; + } + + /* + * 8 MSBs must be set to 0 in the DT since the reg property + * defines the MPIDR[23:0]. + */ + if (hwid & ~MPIDR_HWID_BITMASK) + return; + + /* + * Duplicate MPIDRs are a recipe for disaster. + * Scan all initialized entries and check for + * duplicates. If any is found just bail out. + * temp values were initialized to UINT_MAX + * to avoid matching valid MPIDR[23:0] values. + */ + for (j = 0; j < cpuidx; j++) + if (WARN(tmp_map[j] == hwid, "Duplicate /cpu reg " + "properties in the DT\n")) + return; + + /* + * Build a stashed array of MPIDR values. Numbering scheme + * requires that if detected the boot CPU must be assigned + * logical id 0. Other CPUs get sequential indexes starting + * from 1. If a CPU node with a reg property matching the + * boot CPU MPIDR is detected, this is recorded so that the + * logical map built from DT is validated and can be used + * to override the map created in smp_setup_processor_id(). + */ + if (hwid == mpidr) { + i = 0; + bootcpu_valid = true; + } else { + i = cpuidx++; + } + + if (WARN(cpuidx > nr_cpu_ids, "DT /cpu %u nodes greater than " + "max cores %u, capping them\n", + cpuidx, nr_cpu_ids)) { + cpuidx = nr_cpu_ids; + break; + } + + tmp_map[i] = hwid; + + if (!found_method) + found_method = set_smp_ops_by_method(cpu); + } + + /* + * Fallback to an enable-method in the cpus node if nothing found in + * a cpu node. + */ + if (!found_method) + set_smp_ops_by_method(cpus); + + if (!bootcpu_valid) { + pr_warn("DT missing boot CPU MPIDR[23:0], fall back to default cpu_logical_map\n"); + return; + } + + /* + * Since the boot CPU node contains proper data, and all nodes have + * a reg property, the DT CPU list can be considered valid and the + * logical map created in smp_setup_processor_id() can be overridden + */ + for (i = 0; i < cpuidx; i++) { + set_cpu_possible(i, true); + cpu_logical_map(i) = tmp_map[i]; + pr_debug("cpu logical map 0x%x\n", cpu_logical_map(i)); + } +} + +bool arch_match_cpu_phys_id(int cpu, u64 phys_id) +{ + return phys_id == cpu_logical_map(cpu); +} + +static const void * __init arch_get_next_mach(const char *const **match) +{ + static const struct machine_desc *mdesc = __arch_info_begin; + const struct machine_desc *m = mdesc; + + if (m >= __arch_info_end) + return NULL; + + mdesc++; + *match = m->dt_compat; + return m; +} + +/** + * setup_machine_fdt - Machine setup when an dtb was passed to the kernel + * @dt_phys: physical address of dt blob + * + * If a dtb was passed to the kernel in r2, then use it to choose the + * correct machine_desc and to setup the system. + */ +const struct machine_desc * __init setup_machine_fdt(unsigned int dt_phys) +{ + const struct machine_desc *mdesc, *mdesc_best = NULL; + +#ifdef CONFIG_ARCH_MULTIPLATFORM + DT_MACHINE_START(GENERIC_DT, "Generic DT based system") + MACHINE_END + + mdesc_best = &__mach_desc_GENERIC_DT; +#endif + + if (!dt_phys || !early_init_dt_verify(phys_to_virt(dt_phys))) + return NULL; + + mdesc = of_flat_dt_match_machine(mdesc_best, arch_get_next_mach); + + if (!mdesc) { + const char *prop; + int size; + unsigned long dt_root; + + early_print("\nError: unrecognized/unsupported " + "device tree compatible list:\n[ "); + + dt_root = of_get_flat_dt_root(); + prop = of_get_flat_dt_prop(dt_root, "compatible", &size); + while (size > 0) { + early_print("'%s' ", prop); + size -= strlen(prop) + 1; + prop += strlen(prop) + 1; + } + early_print("]\n\n"); + + dump_machine_table(); /* does not return */ + } + + /* We really don't want to do this, but sometimes firmware provides buggy data */ + if (mdesc->dt_fixup) + mdesc->dt_fixup(); + + early_init_dt_scan_nodes(); + + /* Change machine number to match the mdesc we're using */ + __machine_arch_type = mdesc->nr; + + return mdesc; +} diff --git a/arch/arm/kernel/dma.c b/arch/arm/kernel/dma.c index 2c4a185f92c..7b829d9663b 100644 --- a/arch/arm/kernel/dma.c +++ b/arch/arm/kernel/dma.c @@ -23,7 +23,7 @@ #include <asm/mach/dma.h> -DEFINE_SPINLOCK(dma_spin_lock); +DEFINE_RAW_SPINLOCK(dma_spin_lock); EXPORT_SYMBOL(dma_spin_lock); static dma_t *dma_chan[MAX_DMA_CHANNELS]; diff --git a/arch/arm/kernel/early_printk.c b/arch/arm/kernel/early_printk.c index 85aa2b29269..43076536965 100644 --- a/arch/arm/kernel/early_printk.c +++ b/arch/arm/kernel/early_printk.c @@ -29,28 +29,17 @@ static void early_console_write(struct console *con, const char *s, unsigned n) early_write(s, n); } -static struct console early_console = { +static struct console early_console_dev = { .name = "earlycon", .write = early_console_write, .flags = CON_PRINTBUFFER | CON_BOOT, .index = -1, }; -asmlinkage void early_printk(const char *fmt, ...) -{ - char buf[512]; - int n; - va_list ap; - - va_start(ap, fmt); - n = vscnprintf(buf, sizeof(buf), fmt, ap); - early_write(buf, n); - va_end(ap); -} - static int __init setup_early_printk(char *buf) { - register_console(&early_console); + early_console = &early_console_dev; + register_console(&early_console_dev); return 0; } diff --git a/arch/arm/kernel/ecard.c b/arch/arm/kernel/ecard.c deleted file mode 100644 index eed2f795e1b..00000000000 --- a/arch/arm/kernel/ecard.c +++ /dev/null @@ -1,1241 +0,0 @@ -/* - * linux/arch/arm/kernel/ecard.c - * - * Copyright 1995-2001 Russell King - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Find all installed expansion cards, and handle interrupts from them. - * - * Created from information from Acorns RiscOS3 PRMs - * - * 08-Dec-1996 RMK Added code for the 9'th expansion card - the ether - * podule slot. - * 06-May-1997 RMK Added blacklist for cards whose loader doesn't work. - * 12-Sep-1997 RMK Created new handling of interrupt enables/disables - * - cards can now register their own routine to control - * interrupts (recommended). - * 29-Sep-1997 RMK Expansion card interrupt hardware not being re-enabled - * on reset from Linux. (Caused cards not to respond - * under RiscOS without hard reset). - * 15-Feb-1998 RMK Added DMA support - * 12-Sep-1998 RMK Added EASI support - * 10-Jan-1999 RMK Run loaders in a simulated RISC OS environment. - * 17-Apr-1999 RMK Support for EASI Type C cycles. - */ -#define ECARD_C - -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/types.h> -#include <linux/sched.h> -#include <linux/interrupt.h> -#include <linux/completion.h> -#include <linux/reboot.h> -#include <linux/mm.h> -#include <linux/slab.h> -#include <linux/proc_fs.h> -#include <linux/seq_file.h> -#include <linux/device.h> -#include <linux/init.h> -#include <linux/mutex.h> -#include <linux/kthread.h> -#include <linux/io.h> - -#include <asm/dma.h> -#include <asm/ecard.h> -#include <mach/hardware.h> -#include <asm/irq.h> -#include <asm/mmu_context.h> -#include <asm/mach/irq.h> -#include <asm/tlbflush.h> - -#include "ecard.h" - -#ifndef CONFIG_ARCH_RPC -#define HAVE_EXPMASK -#endif - -struct ecard_request { - void (*fn)(struct ecard_request *); - ecard_t *ec; - unsigned int address; - unsigned int length; - unsigned int use_loader; - void *buffer; - struct completion *complete; -}; - -struct expcard_blacklist { - unsigned short manufacturer; - unsigned short product; - const char *type; -}; - -static ecard_t *cards; -static ecard_t *slot_to_expcard[MAX_ECARDS]; -static unsigned int ectcr; -#ifdef HAS_EXPMASK -static unsigned int have_expmask; -#endif - -/* List of descriptions of cards which don't have an extended - * identification, or chunk directories containing a description. - */ -static struct expcard_blacklist __initdata blacklist[] = { - { MANU_ACORN, PROD_ACORN_ETHER1, "Acorn Ether1" } -}; - -asmlinkage extern int -ecard_loader_reset(unsigned long base, loader_t loader); -asmlinkage extern int -ecard_loader_read(int off, unsigned long base, loader_t loader); - -static inline unsigned short ecard_getu16(unsigned char *v) -{ - return v[0] | v[1] << 8; -} - -static inline signed long ecard_gets24(unsigned char *v) -{ - return v[0] | v[1] << 8 | v[2] << 16 | ((v[2] & 0x80) ? 0xff000000 : 0); -} - -static inline ecard_t *slot_to_ecard(unsigned int slot) -{ - return slot < MAX_ECARDS ? slot_to_expcard[slot] : NULL; -} - -/* ===================== Expansion card daemon ======================== */ -/* - * Since the loader programs on the expansion cards need to be run - * in a specific environment, create a separate task with this - * environment up, and pass requests to this task as and when we - * need to. - * - * This should allow 99% of loaders to be called from Linux. - * - * From a security standpoint, we trust the card vendors. This - * may be a misplaced trust. - */ -static void ecard_task_reset(struct ecard_request *req) -{ - struct expansion_card *ec = req->ec; - struct resource *res; - - res = ec->slot_no == 8 - ? &ec->resource[ECARD_RES_MEMC] - : ec->easi - ? &ec->resource[ECARD_RES_EASI] - : &ec->resource[ECARD_RES_IOCSYNC]; - - ecard_loader_reset(res->start, ec->loader); -} - -static void ecard_task_readbytes(struct ecard_request *req) -{ - struct expansion_card *ec = req->ec; - unsigned char *buf = req->buffer; - unsigned int len = req->length; - unsigned int off = req->address; - - if (ec->slot_no == 8) { - void __iomem *base = (void __iomem *) - ec->resource[ECARD_RES_MEMC].start; - - /* - * The card maintains an index which increments the address - * into a 4096-byte page on each access. We need to keep - * track of the counter. - */ - static unsigned int index; - unsigned int page; - - page = (off >> 12) * 4; - if (page > 256 * 4) - return; - - off &= 4095; - - /* - * If we are reading offset 0, or our current index is - * greater than the offset, reset the hardware index counter. - */ - if (off == 0 || index > off) { - writeb(0, base); - index = 0; - } - - /* - * Increment the hardware index counter until we get to the - * required offset. The read bytes are discarded. - */ - while (index < off) { - readb(base + page); - index += 1; - } - - while (len--) { - *buf++ = readb(base + page); - index += 1; - } - } else { - unsigned long base = (ec->easi - ? &ec->resource[ECARD_RES_EASI] - : &ec->resource[ECARD_RES_IOCSYNC])->start; - void __iomem *pbase = (void __iomem *)base; - - if (!req->use_loader || !ec->loader) { - off *= 4; - while (len--) { - *buf++ = readb(pbase + off); - off += 4; - } - } else { - while(len--) { - /* - * The following is required by some - * expansion card loader programs. - */ - *(unsigned long *)0x108 = 0; - *buf++ = ecard_loader_read(off++, base, - ec->loader); - } - } - } - -} - -static DECLARE_WAIT_QUEUE_HEAD(ecard_wait); -static struct ecard_request *ecard_req; -static DEFINE_MUTEX(ecard_mutex); - -/* - * Set up the expansion card daemon's page tables. - */ -static void ecard_init_pgtables(struct mm_struct *mm) -{ - struct vm_area_struct vma; - - /* We want to set up the page tables for the following mapping: - * Virtual Physical - * 0x03000000 0x03000000 - * 0x03010000 unmapped - * 0x03210000 0x03210000 - * 0x03400000 unmapped - * 0x08000000 0x08000000 - * 0x10000000 unmapped - * - * FIXME: we don't follow this 100% yet. - */ - pgd_t *src_pgd, *dst_pgd; - - src_pgd = pgd_offset(mm, (unsigned long)IO_BASE); - dst_pgd = pgd_offset(mm, IO_START); - - memcpy(dst_pgd, src_pgd, sizeof(pgd_t) * (IO_SIZE / PGDIR_SIZE)); - - src_pgd = pgd_offset(mm, EASI_BASE); - dst_pgd = pgd_offset(mm, EASI_START); - - memcpy(dst_pgd, src_pgd, sizeof(pgd_t) * (EASI_SIZE / PGDIR_SIZE)); - - vma.vm_mm = mm; - - flush_tlb_range(&vma, IO_START, IO_START + IO_SIZE); - flush_tlb_range(&vma, EASI_START, EASI_START + EASI_SIZE); -} - -static int ecard_init_mm(void) -{ - struct mm_struct * mm = mm_alloc(); - struct mm_struct *active_mm = current->active_mm; - - if (!mm) - return -ENOMEM; - - current->mm = mm; - current->active_mm = mm; - activate_mm(active_mm, mm); - mmdrop(active_mm); - ecard_init_pgtables(mm); - return 0; -} - -static int -ecard_task(void * unused) -{ - /* - * Allocate a mm. We're not a lazy-TLB kernel task since we need - * to set page table entries where the user space would be. Note - * that this also creates the page tables. Failure is not an - * option here. - */ - if (ecard_init_mm()) - panic("kecardd: unable to alloc mm\n"); - - while (1) { - struct ecard_request *req; - - wait_event_interruptible(ecard_wait, ecard_req != NULL); - - req = xchg(&ecard_req, NULL); - if (req != NULL) { - req->fn(req); - complete(req->complete); - } - } -} - -/* - * Wake the expansion card daemon to action our request. - * - * FIXME: The test here is not sufficient to detect if the - * kcardd is running. - */ -static void ecard_call(struct ecard_request *req) -{ - DECLARE_COMPLETION_ONSTACK(completion); - - req->complete = &completion; - - mutex_lock(&ecard_mutex); - ecard_req = req; - wake_up(&ecard_wait); - - /* - * Now wait for kecardd to run. - */ - wait_for_completion(&completion); - mutex_unlock(&ecard_mutex); -} - -/* ======================= Mid-level card control ===================== */ - -static void -ecard_readbytes(void *addr, ecard_t *ec, int off, int len, int useld) -{ - struct ecard_request req; - - req.fn = ecard_task_readbytes; - req.ec = ec; - req.address = off; - req.length = len; - req.use_loader = useld; - req.buffer = addr; - - ecard_call(&req); -} - -int ecard_readchunk(struct in_chunk_dir *cd, ecard_t *ec, int id, int num) -{ - struct ex_chunk_dir excd; - int index = 16; - int useld = 0; - - if (!ec->cid.cd) - return 0; - - while(1) { - ecard_readbytes(&excd, ec, index, 8, useld); - index += 8; - if (c_id(&excd) == 0) { - if (!useld && ec->loader) { - useld = 1; - index = 0; - continue; - } - return 0; - } - if (c_id(&excd) == 0xf0) { /* link */ - index = c_start(&excd); - continue; - } - if (c_id(&excd) == 0x80) { /* loader */ - if (!ec->loader) { - ec->loader = kmalloc(c_len(&excd), - GFP_KERNEL); - if (ec->loader) - ecard_readbytes(ec->loader, ec, - (int)c_start(&excd), - c_len(&excd), useld); - else - return 0; - } - continue; - } - if (c_id(&excd) == id && num-- == 0) - break; - } - - if (c_id(&excd) & 0x80) { - switch (c_id(&excd) & 0x70) { - case 0x70: - ecard_readbytes((unsigned char *)excd.d.string, ec, - (int)c_start(&excd), c_len(&excd), - useld); - break; - case 0x00: - break; - } - } - cd->start_offset = c_start(&excd); - memcpy(cd->d.string, excd.d.string, 256); - return 1; -} - -/* ======================= Interrupt control ============================ */ - -static void ecard_def_irq_enable(ecard_t *ec, int irqnr) -{ -#ifdef HAS_EXPMASK - if (irqnr < 4 && have_expmask) { - have_expmask |= 1 << irqnr; - __raw_writeb(have_expmask, EXPMASK_ENABLE); - } -#endif -} - -static void ecard_def_irq_disable(ecard_t *ec, int irqnr) -{ -#ifdef HAS_EXPMASK - if (irqnr < 4 && have_expmask) { - have_expmask &= ~(1 << irqnr); - __raw_writeb(have_expmask, EXPMASK_ENABLE); - } -#endif -} - -static int ecard_def_irq_pending(ecard_t *ec) -{ - return !ec->irqmask || readb(ec->irqaddr) & ec->irqmask; -} - -static void ecard_def_fiq_enable(ecard_t *ec, int fiqnr) -{ - panic("ecard_def_fiq_enable called - impossible"); -} - -static void ecard_def_fiq_disable(ecard_t *ec, int fiqnr) -{ - panic("ecard_def_fiq_disable called - impossible"); -} - -static int ecard_def_fiq_pending(ecard_t *ec) -{ - return !ec->fiqmask || readb(ec->fiqaddr) & ec->fiqmask; -} - -static expansioncard_ops_t ecard_default_ops = { - ecard_def_irq_enable, - ecard_def_irq_disable, - ecard_def_irq_pending, - ecard_def_fiq_enable, - ecard_def_fiq_disable, - ecard_def_fiq_pending -}; - -/* - * Enable and disable interrupts from expansion cards. - * (interrupts are disabled for these functions). - * - * They are not meant to be called directly, but via enable/disable_irq. - */ -static void ecard_irq_unmask(unsigned int irqnr) -{ - ecard_t *ec = slot_to_ecard(irqnr - 32); - - if (ec) { - if (!ec->ops) - ec->ops = &ecard_default_ops; - - if (ec->claimed && ec->ops->irqenable) - ec->ops->irqenable(ec, irqnr); - else - printk(KERN_ERR "ecard: rejecting request to " - "enable IRQs for %d\n", irqnr); - } -} - -static void ecard_irq_mask(unsigned int irqnr) -{ - ecard_t *ec = slot_to_ecard(irqnr - 32); - - if (ec) { - if (!ec->ops) - ec->ops = &ecard_default_ops; - - if (ec->ops && ec->ops->irqdisable) - ec->ops->irqdisable(ec, irqnr); - } -} - -static struct irq_chip ecard_chip = { - .name = "ECARD", - .ack = ecard_irq_mask, - .mask = ecard_irq_mask, - .unmask = ecard_irq_unmask, -}; - -void ecard_enablefiq(unsigned int fiqnr) -{ - ecard_t *ec = slot_to_ecard(fiqnr); - - if (ec) { - if (!ec->ops) - ec->ops = &ecard_default_ops; - - if (ec->claimed && ec->ops->fiqenable) - ec->ops->fiqenable(ec, fiqnr); - else - printk(KERN_ERR "ecard: rejecting request to " - "enable FIQs for %d\n", fiqnr); - } -} - -void ecard_disablefiq(unsigned int fiqnr) -{ - ecard_t *ec = slot_to_ecard(fiqnr); - - if (ec) { - if (!ec->ops) - ec->ops = &ecard_default_ops; - - if (ec->ops->fiqdisable) - ec->ops->fiqdisable(ec, fiqnr); - } -} - -static void ecard_dump_irq_state(void) -{ - ecard_t *ec; - - printk("Expansion card IRQ state:\n"); - - for (ec = cards; ec; ec = ec->next) { - if (ec->slot_no == 8) - continue; - - printk(" %d: %sclaimed, ", - ec->slot_no, ec->claimed ? "" : "not "); - - if (ec->ops && ec->ops->irqpending && - ec->ops != &ecard_default_ops) - printk("irq %spending\n", - ec->ops->irqpending(ec) ? "" : "not "); - else - printk("irqaddr %p, mask = %02X, status = %02X\n", - ec->irqaddr, ec->irqmask, readb(ec->irqaddr)); - } -} - -static void ecard_check_lockup(struct irq_desc *desc) -{ - static unsigned long last; - static int lockup; - - /* - * If the timer interrupt has not run since the last million - * unrecognised expansion card interrupts, then there is - * something seriously wrong. Disable the expansion card - * interrupts so at least we can continue. - * - * Maybe we ought to start a timer to re-enable them some time - * later? - */ - if (last == jiffies) { - lockup += 1; - if (lockup > 1000000) { - printk(KERN_ERR "\nInterrupt lockup detected - " - "disabling all expansion card interrupts\n"); - - desc->chip->mask(IRQ_EXPANSIONCARD); - ecard_dump_irq_state(); - } - } else - lockup = 0; - - /* - * If we did not recognise the source of this interrupt, - * warn the user, but don't flood the user with these messages. - */ - if (!last || time_after(jiffies, last + 5*HZ)) { - last = jiffies; - printk(KERN_WARNING "Unrecognised interrupt from backplane\n"); - ecard_dump_irq_state(); - } -} - -static void -ecard_irq_handler(unsigned int irq, struct irq_desc *desc) -{ - ecard_t *ec; - int called = 0; - - desc->chip->mask(irq); - for (ec = cards; ec; ec = ec->next) { - int pending; - - if (!ec->claimed || ec->irq == NO_IRQ || ec->slot_no == 8) - continue; - - if (ec->ops && ec->ops->irqpending) - pending = ec->ops->irqpending(ec); - else - pending = ecard_default_ops.irqpending(ec); - - if (pending) { - generic_handle_irq(ec->irq); - called ++; - } - } - desc->chip->unmask(irq); - - if (called == 0) - ecard_check_lockup(desc); -} - -#ifdef HAS_EXPMASK -static unsigned char priority_masks[] = -{ - 0xf0, 0xf1, 0xf3, 0xf7, 0xff, 0xff, 0xff, 0xff -}; - -static unsigned char first_set[] = -{ - 0x00, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00, - 0x03, 0x00, 0x01, 0x00, 0x02, 0x00, 0x01, 0x00 -}; - -static void -ecard_irqexp_handler(unsigned int irq, struct irq_desc *desc) -{ - const unsigned int statusmask = 15; - unsigned int status; - - status = __raw_readb(EXPMASK_STATUS) & statusmask; - if (status) { - unsigned int slot = first_set[status]; - ecard_t *ec = slot_to_ecard(slot); - - if (ec->claimed) { - /* - * this ugly code is so that we can operate a - * prioritorising system: - * - * Card 0 highest priority - * Card 1 - * Card 2 - * Card 3 lowest priority - * - * Serial cards should go in 0/1, ethernet/scsi in 2/3 - * otherwise you will lose serial data at high speeds! - */ - generic_handle_irq(ec->irq); - } else { - printk(KERN_WARNING "card%d: interrupt from unclaimed " - "card???\n", slot); - have_expmask &= ~(1 << slot); - __raw_writeb(have_expmask, EXPMASK_ENABLE); - } - } else - printk(KERN_WARNING "Wild interrupt from backplane (masks)\n"); -} - -static int __init ecard_probeirqhw(void) -{ - ecard_t *ec; - int found; - - __raw_writeb(0x00, EXPMASK_ENABLE); - __raw_writeb(0xff, EXPMASK_STATUS); - found = (__raw_readb(EXPMASK_STATUS) & 15) == 0; - __raw_writeb(0xff, EXPMASK_ENABLE); - - if (found) { - printk(KERN_DEBUG "Expansion card interrupt " - "management hardware found\n"); - - /* for each card present, set a bit to '1' */ - have_expmask = 0x80000000; - - for (ec = cards; ec; ec = ec->next) - have_expmask |= 1 << ec->slot_no; - - __raw_writeb(have_expmask, EXPMASK_ENABLE); - } - - return found; -} -#else -#define ecard_irqexp_handler NULL -#define ecard_probeirqhw() (0) -#endif - -#ifndef IO_EC_MEMC8_BASE -#define IO_EC_MEMC8_BASE 0 -#endif - -static unsigned int __ecard_address(ecard_t *ec, card_type_t type, card_speed_t speed) -{ - unsigned long address = 0; - int slot = ec->slot_no; - - if (ec->slot_no == 8) - return IO_EC_MEMC8_BASE; - - ectcr &= ~(1 << slot); - - switch (type) { - case ECARD_MEMC: - if (slot < 4) - address = IO_EC_MEMC_BASE + (slot << 12); - break; - - case ECARD_IOC: - if (slot < 4) - address = IO_EC_IOC_BASE + (slot << 12); -#ifdef IO_EC_IOC4_BASE - else - address = IO_EC_IOC4_BASE + ((slot - 4) << 12); -#endif - if (address) - address += speed << 17; - break; - -#ifdef IO_EC_EASI_BASE - case ECARD_EASI: - address = IO_EC_EASI_BASE + (slot << 22); - if (speed == ECARD_FAST) - ectcr |= 1 << slot; - break; -#endif - default: - break; - } - -#ifdef IOMD_ECTCR - iomd_writeb(ectcr, IOMD_ECTCR); -#endif - return address; -} - -static int ecard_prints(struct seq_file *m, ecard_t *ec) -{ - seq_printf(m, " %d: %s ", ec->slot_no, ec->easi ? "EASI" : " "); - - if (ec->cid.id == 0) { - struct in_chunk_dir incd; - - seq_printf(m, "[%04X:%04X] ", - ec->cid.manufacturer, ec->cid.product); - - if (!ec->card_desc && ec->cid.cd && - ecard_readchunk(&incd, ec, 0xf5, 0)) { - ec->card_desc = kmalloc(strlen(incd.d.string)+1, GFP_KERNEL); - - if (ec->card_desc) - strcpy((char *)ec->card_desc, incd.d.string); - } - - seq_printf(m, "%s\n", ec->card_desc ? ec->card_desc : "*unknown*"); - } else - seq_printf(m, "Simple card %d\n", ec->cid.id); - - return 0; -} - -static int ecard_devices_proc_show(struct seq_file *m, void *v) -{ - ecard_t *ec = cards; - - while (ec) { - ecard_prints(m, ec); - ec = ec->next; - } - return 0; -} - -static int ecard_devices_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, ecard_devices_proc_show, NULL); -} - -static const struct file_operations bus_ecard_proc_fops = { - .owner = THIS_MODULE, - .open = ecard_devices_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static struct proc_dir_entry *proc_bus_ecard_dir = NULL; - -static void ecard_proc_init(void) -{ - proc_bus_ecard_dir = proc_mkdir("bus/ecard", NULL); - proc_create("devices", 0, proc_bus_ecard_dir, &bus_ecard_proc_fops); -} - -#define ec_set_resource(ec,nr,st,sz) \ - do { \ - (ec)->resource[nr].name = dev_name(&ec->dev); \ - (ec)->resource[nr].start = st; \ - (ec)->resource[nr].end = (st) + (sz) - 1; \ - (ec)->resource[nr].flags = IORESOURCE_MEM; \ - } while (0) - -static void __init ecard_free_card(struct expansion_card *ec) -{ - int i; - - for (i = 0; i < ECARD_NUM_RESOURCES; i++) - if (ec->resource[i].flags) - release_resource(&ec->resource[i]); - - kfree(ec); -} - -static struct expansion_card *__init ecard_alloc_card(int type, int slot) -{ - struct expansion_card *ec; - unsigned long base; - int i; - - ec = kzalloc(sizeof(ecard_t), GFP_KERNEL); - if (!ec) { - ec = ERR_PTR(-ENOMEM); - goto nomem; - } - - ec->slot_no = slot; - ec->easi = type == ECARD_EASI; - ec->irq = NO_IRQ; - ec->fiq = NO_IRQ; - ec->dma = NO_DMA; - ec->ops = &ecard_default_ops; - - dev_set_name(&ec->dev, "ecard%d", slot); - ec->dev.parent = NULL; - ec->dev.bus = &ecard_bus_type; - ec->dev.dma_mask = &ec->dma_mask; - ec->dma_mask = (u64)0xffffffff; - ec->dev.coherent_dma_mask = ec->dma_mask; - - if (slot < 4) { - ec_set_resource(ec, ECARD_RES_MEMC, - PODSLOT_MEMC_BASE + (slot << 14), - PODSLOT_MEMC_SIZE); - base = PODSLOT_IOC0_BASE + (slot << 14); - } else - base = PODSLOT_IOC4_BASE + ((slot - 4) << 14); - -#ifdef CONFIG_ARCH_RPC - if (slot < 8) { - ec_set_resource(ec, ECARD_RES_EASI, - PODSLOT_EASI_BASE + (slot << 24), - PODSLOT_EASI_SIZE); - } - - if (slot == 8) { - ec_set_resource(ec, ECARD_RES_MEMC, NETSLOT_BASE, NETSLOT_SIZE); - } else -#endif - - for (i = 0; i <= ECARD_RES_IOCSYNC - ECARD_RES_IOCSLOW; i++) - ec_set_resource(ec, i + ECARD_RES_IOCSLOW, - base + (i << 19), PODSLOT_IOC_SIZE); - - for (i = 0; i < ECARD_NUM_RESOURCES; i++) { - if (ec->resource[i].flags && - request_resource(&iomem_resource, &ec->resource[i])) { - dev_err(&ec->dev, "resource(s) not available\n"); - ec->resource[i].end -= ec->resource[i].start; - ec->resource[i].start = 0; - ec->resource[i].flags = 0; - } - } - - nomem: - return ec; -} - -static ssize_t ecard_show_irq(struct device *dev, struct device_attribute *attr, char *buf) -{ - struct expansion_card *ec = ECARD_DEV(dev); - return sprintf(buf, "%u\n", ec->irq); -} - -static ssize_t ecard_show_dma(struct device *dev, struct device_attribute *attr, char *buf) -{ - struct expansion_card *ec = ECARD_DEV(dev); - return sprintf(buf, "%u\n", ec->dma); -} - -static ssize_t ecard_show_resources(struct device *dev, struct device_attribute *attr, char *buf) -{ - struct expansion_card *ec = ECARD_DEV(dev); - char *str = buf; - int i; - - for (i = 0; i < ECARD_NUM_RESOURCES; i++) - str += sprintf(str, "%08x %08x %08lx\n", - ec->resource[i].start, - ec->resource[i].end, - ec->resource[i].flags); - - return str - buf; -} - -static ssize_t ecard_show_vendor(struct device *dev, struct device_attribute *attr, char *buf) -{ - struct expansion_card *ec = ECARD_DEV(dev); - return sprintf(buf, "%u\n", ec->cid.manufacturer); -} - -static ssize_t ecard_show_device(struct device *dev, struct device_attribute *attr, char *buf) -{ - struct expansion_card *ec = ECARD_DEV(dev); - return sprintf(buf, "%u\n", ec->cid.product); -} - -static ssize_t ecard_show_type(struct device *dev, struct device_attribute *attr, char *buf) -{ - struct expansion_card *ec = ECARD_DEV(dev); - return sprintf(buf, "%s\n", ec->easi ? "EASI" : "IOC"); -} - -static struct device_attribute ecard_dev_attrs[] = { - __ATTR(device, S_IRUGO, ecard_show_device, NULL), - __ATTR(dma, S_IRUGO, ecard_show_dma, NULL), - __ATTR(irq, S_IRUGO, ecard_show_irq, NULL), - __ATTR(resource, S_IRUGO, ecard_show_resources, NULL), - __ATTR(type, S_IRUGO, ecard_show_type, NULL), - __ATTR(vendor, S_IRUGO, ecard_show_vendor, NULL), - __ATTR_NULL, -}; - - -int ecard_request_resources(struct expansion_card *ec) -{ - int i, err = 0; - - for (i = 0; i < ECARD_NUM_RESOURCES; i++) { - if (ecard_resource_end(ec, i) && - !request_mem_region(ecard_resource_start(ec, i), - ecard_resource_len(ec, i), - ec->dev.driver->name)) { - err = -EBUSY; - break; - } - } - - if (err) { - while (i--) - if (ecard_resource_end(ec, i)) - release_mem_region(ecard_resource_start(ec, i), - ecard_resource_len(ec, i)); - } - return err; -} -EXPORT_SYMBOL(ecard_request_resources); - -void ecard_release_resources(struct expansion_card *ec) -{ - int i; - - for (i = 0; i < ECARD_NUM_RESOURCES; i++) - if (ecard_resource_end(ec, i)) - release_mem_region(ecard_resource_start(ec, i), - ecard_resource_len(ec, i)); -} -EXPORT_SYMBOL(ecard_release_resources); - -void ecard_setirq(struct expansion_card *ec, const struct expansion_card_ops *ops, void *irq_data) -{ - ec->irq_data = irq_data; - barrier(); - ec->ops = ops; -} -EXPORT_SYMBOL(ecard_setirq); - -void __iomem *ecardm_iomap(struct expansion_card *ec, unsigned int res, - unsigned long offset, unsigned long maxsize) -{ - unsigned long start = ecard_resource_start(ec, res); - unsigned long end = ecard_resource_end(ec, res); - - if (offset > (end - start)) - return NULL; - - start += offset; - if (maxsize && end - start > maxsize) - end = start + maxsize; - - return devm_ioremap(&ec->dev, start, end - start); -} -EXPORT_SYMBOL(ecardm_iomap); - -/* - * Probe for an expansion card. - * - * If bit 1 of the first byte of the card is set, then the - * card does not exist. - */ -static int __init -ecard_probe(int slot, card_type_t type) -{ - ecard_t **ecp; - ecard_t *ec; - struct ex_ecid cid; - int i, rc; - - ec = ecard_alloc_card(type, slot); - if (IS_ERR(ec)) { - rc = PTR_ERR(ec); - goto nomem; - } - - rc = -ENODEV; - if ((ec->podaddr = __ecard_address(ec, type, ECARD_SYNC)) == 0) - goto nodev; - - cid.r_zero = 1; - ecard_readbytes(&cid, ec, 0, 16, 0); - if (cid.r_zero) - goto nodev; - - ec->cid.id = cid.r_id; - ec->cid.cd = cid.r_cd; - ec->cid.is = cid.r_is; - ec->cid.w = cid.r_w; - ec->cid.manufacturer = ecard_getu16(cid.r_manu); - ec->cid.product = ecard_getu16(cid.r_prod); - ec->cid.country = cid.r_country; - ec->cid.irqmask = cid.r_irqmask; - ec->cid.irqoff = ecard_gets24(cid.r_irqoff); - ec->cid.fiqmask = cid.r_fiqmask; - ec->cid.fiqoff = ecard_gets24(cid.r_fiqoff); - ec->fiqaddr = - ec->irqaddr = (void __iomem *)ioaddr(ec->podaddr); - - if (ec->cid.is) { - ec->irqmask = ec->cid.irqmask; - ec->irqaddr += ec->cid.irqoff; - ec->fiqmask = ec->cid.fiqmask; - ec->fiqaddr += ec->cid.fiqoff; - } else { - ec->irqmask = 1; - ec->fiqmask = 4; - } - - for (i = 0; i < ARRAY_SIZE(blacklist); i++) - if (blacklist[i].manufacturer == ec->cid.manufacturer && - blacklist[i].product == ec->cid.product) { - ec->card_desc = blacklist[i].type; - break; - } - - /* - * hook the interrupt handlers - */ - if (slot < 8) { - ec->irq = 32 + slot; - set_irq_chip(ec->irq, &ecard_chip); - set_irq_handler(ec->irq, handle_level_irq); - set_irq_flags(ec->irq, IRQF_VALID); - } - -#ifdef IO_EC_MEMC8_BASE - if (slot == 8) - ec->irq = 11; -#endif -#ifdef CONFIG_ARCH_RPC - /* On RiscPC, only first two slots have DMA capability */ - if (slot < 2) - ec->dma = 2 + slot; -#endif - - for (ecp = &cards; *ecp; ecp = &(*ecp)->next); - - *ecp = ec; - slot_to_expcard[slot] = ec; - - device_register(&ec->dev); - - return 0; - - nodev: - ecard_free_card(ec); - nomem: - return rc; -} - -/* - * Initialise the expansion card system. - * Locate all hardware - interrupt management and - * actual cards. - */ -static int __init ecard_init(void) -{ - struct task_struct *task; - int slot, irqhw; - - task = kthread_run(ecard_task, NULL, "kecardd"); - if (IS_ERR(task)) { - printk(KERN_ERR "Ecard: unable to create kernel thread: %ld\n", - PTR_ERR(task)); - return PTR_ERR(task); - } - - printk("Probing expansion cards\n"); - - for (slot = 0; slot < 8; slot ++) { - if (ecard_probe(slot, ECARD_EASI) == -ENODEV) - ecard_probe(slot, ECARD_IOC); - } - -#ifdef IO_EC_MEMC8_BASE - ecard_probe(8, ECARD_IOC); -#endif - - irqhw = ecard_probeirqhw(); - - set_irq_chained_handler(IRQ_EXPANSIONCARD, - irqhw ? ecard_irqexp_handler : ecard_irq_handler); - - ecard_proc_init(); - - return 0; -} - -subsys_initcall(ecard_init); - -/* - * ECARD "bus" - */ -static const struct ecard_id * -ecard_match_device(const struct ecard_id *ids, struct expansion_card *ec) -{ - int i; - - for (i = 0; ids[i].manufacturer != 65535; i++) - if (ec->cid.manufacturer == ids[i].manufacturer && - ec->cid.product == ids[i].product) - return ids + i; - - return NULL; -} - -static int ecard_drv_probe(struct device *dev) -{ - struct expansion_card *ec = ECARD_DEV(dev); - struct ecard_driver *drv = ECARD_DRV(dev->driver); - const struct ecard_id *id; - int ret; - - id = ecard_match_device(drv->id_table, ec); - - ec->claimed = 1; - ret = drv->probe(ec, id); - if (ret) - ec->claimed = 0; - return ret; -} - -static int ecard_drv_remove(struct device *dev) -{ - struct expansion_card *ec = ECARD_DEV(dev); - struct ecard_driver *drv = ECARD_DRV(dev->driver); - - drv->remove(ec); - ec->claimed = 0; - - /* - * Restore the default operations. We ensure that the - * ops are set before we change the data. - */ - ec->ops = &ecard_default_ops; - barrier(); - ec->irq_data = NULL; - - return 0; -} - -/* - * Before rebooting, we must make sure that the expansion card is in a - * sensible state, so it can be re-detected. This means that the first - * page of the ROM must be visible. We call the expansion cards reset - * handler, if any. - */ -static void ecard_drv_shutdown(struct device *dev) -{ - struct expansion_card *ec = ECARD_DEV(dev); - struct ecard_driver *drv = ECARD_DRV(dev->driver); - struct ecard_request req; - - if (dev->driver) { - if (drv->shutdown) - drv->shutdown(ec); - ec->claimed = 0; - } - - /* - * If this card has a loader, call the reset handler. - */ - if (ec->loader) { - req.fn = ecard_task_reset; - req.ec = ec; - ecard_call(&req); - } -} - -int ecard_register_driver(struct ecard_driver *drv) -{ - drv->drv.bus = &ecard_bus_type; - - return driver_register(&drv->drv); -} - -void ecard_remove_driver(struct ecard_driver *drv) -{ - driver_unregister(&drv->drv); -} - -static int ecard_match(struct device *_dev, struct device_driver *_drv) -{ - struct expansion_card *ec = ECARD_DEV(_dev); - struct ecard_driver *drv = ECARD_DRV(_drv); - int ret; - - if (drv->id_table) { - ret = ecard_match_device(drv->id_table, ec) != NULL; - } else { - ret = ec->cid.id == drv->id; - } - - return ret; -} - -struct bus_type ecard_bus_type = { - .name = "ecard", - .dev_attrs = ecard_dev_attrs, - .match = ecard_match, - .probe = ecard_drv_probe, - .remove = ecard_drv_remove, - .shutdown = ecard_drv_shutdown, -}; - -static int ecard_bus_init(void) -{ - return bus_register(&ecard_bus_type); -} - -postcore_initcall(ecard_bus_init); - -EXPORT_SYMBOL(ecard_readchunk); -EXPORT_SYMBOL(ecard_register_driver); -EXPORT_SYMBOL(ecard_remove_driver); -EXPORT_SYMBOL(ecard_bus_type); diff --git a/arch/arm/kernel/ecard.h b/arch/arm/kernel/ecard.h deleted file mode 100644 index 4642d436be2..00000000000 --- a/arch/arm/kernel/ecard.h +++ /dev/null @@ -1,69 +0,0 @@ -/* - * ecard.h - * - * Copyright 2007 Russell King - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -/* Definitions internal to ecard.c - for it's use only!! - * - * External expansion card header as read from the card - */ -struct ex_ecid { - unsigned char r_irq:1; - unsigned char r_zero:1; - unsigned char r_fiq:1; - unsigned char r_id:4; - unsigned char r_a:1; - - unsigned char r_cd:1; - unsigned char r_is:1; - unsigned char r_w:2; - unsigned char r_r1:4; - - unsigned char r_r2:8; - - unsigned char r_prod[2]; - - unsigned char r_manu[2]; - - unsigned char r_country; - - unsigned char r_fiqmask; - unsigned char r_fiqoff[3]; - - unsigned char r_irqmask; - unsigned char r_irqoff[3]; -}; - -/* - * Chunk directory entry as read from the card - */ -struct ex_chunk_dir { - unsigned char r_id; - unsigned char r_len[3]; - unsigned long r_start; - union { - char string[256]; - char data[1]; - } d; -#define c_id(x) ((x)->r_id) -#define c_len(x) ((x)->r_len[0]|((x)->r_len[1]<<8)|((x)->r_len[2]<<16)) -#define c_start(x) ((x)->r_start) -}; - -typedef enum ecard_type { /* Cards address space */ - ECARD_IOC, - ECARD_MEMC, - ECARD_EASI -} card_type_t; - -typedef enum { /* Speed for ECARD_IOC space */ - ECARD_SLOW = 0, - ECARD_MEDIUM = 1, - ECARD_FAST = 2, - ECARD_SYNC = 3 -} card_speed_t; diff --git a/arch/arm/kernel/elf.c b/arch/arm/kernel/elf.c index d4a0da1e48f..d0d1e83150c 100644 --- a/arch/arm/kernel/elf.c +++ b/arch/arm/kernel/elf.c @@ -1,8 +1,9 @@ -#include <linux/module.h> +#include <linux/export.h> #include <linux/sched.h> #include <linux/personality.h> #include <linux/binfmts.h> #include <linux/elf.h> +#include <asm/system_info.h> int elf_check_arch(const struct elf32_hdr *x) { @@ -40,15 +41,22 @@ EXPORT_SYMBOL(elf_check_arch); void elf_set_personality(const struct elf32_hdr *x) { unsigned int eflags = x->e_flags; - unsigned int personality = PER_LINUX_32BIT; + unsigned int personality = current->personality & ~PER_MASK; + + /* + * We only support Linux ELF executables, so always set the + * personality to LINUX. + */ + personality |= PER_LINUX; /* * APCS-26 is only valid for OABI executables */ - if ((eflags & EF_ARM_EABI_MASK) == EF_ARM_EABI_UNKNOWN) { - if (eflags & EF_ARM_APCS_26) - personality = PER_LINUX; - } + if ((eflags & EF_ARM_EABI_MASK) == EF_ARM_EABI_UNKNOWN && + (eflags & EF_ARM_APCS_26)) + personality &= ~ADDR_LIMIT_32BIT; + else + personality |= ADDR_LIMIT_32BIT; set_personality(personality); diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index c09e3573c5d..52a949a8077 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -15,52 +15,68 @@ * that causes it to save wrong values... Be aware! */ +#include <asm/assembler.h> #include <asm/memory.h> -#include <asm/glue.h> +#include <asm/glue-df.h> +#include <asm/glue-pf.h> #include <asm/vfpmacros.h> +#ifndef CONFIG_MULTI_IRQ_HANDLER #include <mach/entry-macro.S> +#endif #include <asm/thread_notify.h> #include <asm/unwind.h> #include <asm/unistd.h> #include <asm/tls.h> +#include <asm/system_info.h> #include "entry-header.S" +#include <asm/entry-macro-multi.S> /* - * Interrupt handling. Preserves r7, r8, r9 + * Interrupt handling. */ .macro irq_handler - get_irqnr_preamble r5, lr -1: get_irqnr_and_base r0, r6, r5, lr - movne r1, sp - @ - @ routine called with r0 = irq number, r1 = struct pt_regs * - @ - adrne lr, BSYM(1b) - bne asm_do_IRQ - -#ifdef CONFIG_SMP - /* - * XXX - * - * this macro assumes that irqstat (r6) and base (r5) are - * preserved from get_irqnr_and_base above - */ - ALT_SMP(test_for_ipi r0, r6, r5, lr) - ALT_UP_B(9997f) - movne r0, sp - adrne lr, BSYM(1b) - bne do_IPI - -#ifdef CONFIG_LOCAL_TIMERS - test_for_ltirq r0, r6, r5, lr - movne r0, sp - adrne lr, BSYM(1b) - bne do_local_timer +#ifdef CONFIG_MULTI_IRQ_HANDLER + ldr r1, =handle_arch_irq + mov r0, sp + adr lr, BSYM(9997f) + ldr pc, [r1] +#else + arch_irq_handler_default #endif 9997: + .endm + + .macro pabt_helper + @ PABORT handler takes pt_regs in r2, fault address in r4 and psr in r5 +#ifdef MULTI_PABORT + ldr ip, .LCprocfns + mov lr, pc + ldr pc, [ip, #PROCESSOR_PABT_FUNC] +#else + bl CPU_PABORT_HANDLER #endif + .endm + .macro dabt_helper + + @ + @ Call the processor-specific abort handler: + @ + @ r2 - pt_regs + @ r4 - aborted context pc + @ r5 - aborted context psr + @ + @ The abort handler must return the aborted address in r0, and + @ the fault status register in r1. r9 must be preserved. + @ +#ifdef MULTI_DABORT + ldr ip, .LCprocfns + mov lr, pc + ldr pc, [ip, #PROCESSOR_DABT_FUNC] +#else + bl CPU_DABORT_HANDLER +#endif .endm #ifdef CONFIG_KPROBES @@ -145,105 +161,58 @@ ENDPROC(__und_invalid) SPFIX( subeq sp, sp, #4 ) stmia sp, {r1 - r12} - ldmia r0, {r1 - r3} - add r5, sp, #S_SP - 4 @ here for interlock avoidance - mov r4, #-1 @ "" "" "" "" - add r0, sp, #(S_FRAME_SIZE + \stack_hole - 4) - SPFIX( addeq r0, r0, #4 ) - str r1, [sp, #-4]! @ save the "real" r0 copied + ldmia r0, {r3 - r5} + add r7, sp, #S_SP - 4 @ here for interlock avoidance + mov r6, #-1 @ "" "" "" "" + add r2, sp, #(S_FRAME_SIZE + \stack_hole - 4) + SPFIX( addeq r2, r2, #4 ) + str r3, [sp, #-4]! @ save the "real" r0 copied @ from the exception stack - mov r1, lr + mov r3, lr @ @ We are now ready to fill in the remaining blanks on the stack: @ - @ r0 - sp_svc - @ r1 - lr_svc - @ r2 - lr_<exception>, already fixed up for correct return/restart - @ r3 - spsr_<exception> - @ r4 - orig_r0 (see pt_regs definition in ptrace.h) + @ r2 - sp_svc + @ r3 - lr_svc + @ r4 - lr_<exception>, already fixed up for correct return/restart + @ r5 - spsr_<exception> + @ r6 - orig_r0 (see pt_regs definition in ptrace.h) @ - stmia r5, {r0 - r4} + stmia r7, {r2 - r6} + +#ifdef CONFIG_TRACE_IRQFLAGS + bl trace_hardirqs_off +#endif .endm .align 5 __dabt_svc: svc_entry - - @ - @ get ready to re-enable interrupts if appropriate - @ - mrs r9, cpsr - tst r3, #PSR_I_BIT - biceq r9, r9, #PSR_I_BIT - - @ - @ Call the processor-specific abort handler: - @ - @ r2 - aborted context pc - @ r3 - aborted context cpsr - @ - @ The abort handler must return the aborted address in r0, and - @ the fault status register in r1. r9 must be preserved. - @ -#ifdef MULTI_DABORT - ldr r4, .LCprocfns - mov lr, pc - ldr pc, [r4, #PROCESSOR_DABT_FUNC] -#else - bl CPU_DABORT_HANDLER -#endif - - @ - @ set desired IRQ state, then call main handler - @ - msr cpsr_c, r9 mov r2, sp - bl do_DataAbort - - @ - @ IRQs off again before pulling preserved data off the stack - @ - disable_irq_notrace - - @ - @ restore SPSR and restart the instruction - @ - ldr r2, [sp, #S_PSR] - svc_exit r2 @ return from exception + dabt_helper + THUMB( ldr r5, [sp, #S_PSR] ) @ potentially updated CPSR + svc_exit r5 @ return from exception UNWIND(.fnend ) ENDPROC(__dabt_svc) .align 5 __irq_svc: svc_entry + irq_handler -#ifdef CONFIG_TRACE_IRQFLAGS - bl trace_hardirqs_off -#endif #ifdef CONFIG_PREEMPT get_thread_info tsk ldr r8, [tsk, #TI_PREEMPT] @ get preempt count - add r7, r8, #1 @ increment it - str r7, [tsk, #TI_PREEMPT] -#endif - - irq_handler -#ifdef CONFIG_PREEMPT - str r8, [tsk, #TI_PREEMPT] @ restore preempt count ldr r0, [tsk, #TI_FLAGS] @ get flags teq r8, #0 @ if preempt count != 0 movne r0, #0 @ force flags to 0 tst r0, #_TIF_NEED_RESCHED blne svc_preempt #endif - ldr r4, [sp, #S_PSR] @ irqs are already disabled -#ifdef CONFIG_TRACE_IRQFLAGS - tst r4, #PSR_I_BIT - bleq trace_hardirqs_on -#endif - svc_exit r4 @ return from exception + + svc_exit r5, irq = 1 @ return from exception UNWIND(.fnend ) ENDPROC(__irq_svc) @@ -259,6 +228,19 @@ svc_preempt: b 1b #endif +__und_fault: + @ Correct the PC such that it is pointing at the instruction + @ which caused the fault. If the faulting instruction was ARM + @ the PC will be pointing at the next instruction, and have to + @ subtract 4. Otherwise, it is Thumb, and the PC will be + @ pointing at the second half of the Thumb instruction. We + @ have to subtract 2. + ldr r2, [r0, #S_PC] + sub r2, r2, r1 + str r2, [r0, #S_PC] + b do_undefinstr +ENDPROC(__und_fault) + .align 5 __und_svc: #ifdef CONFIG_KPROBES @@ -269,7 +251,6 @@ __und_svc: #else svc_entry #endif - @ @ call emulation code, which returns using r9 if it has emulated @ the instruction, or the more conventional lr if we are to treat @@ -277,67 +258,39 @@ __und_svc: @ @ r0 - instruction @ -#ifndef CONFIG_THUMB2_KERNEL - ldr r0, [r2, #-4] +#ifndef CONFIG_THUMB2_KERNEL + ldr r0, [r4, #-4] #else - ldrh r0, [r2, #-2] @ Thumb instruction at LR - 2 - and r9, r0, #0xf800 - cmp r9, #0xe800 @ 32-bit instruction if xx >= 0 - ldrhhs r9, [r2] @ bottom 16 bits - orrhs r0, r9, r0, lsl #16 + mov r1, #2 + ldrh r0, [r4, #-2] @ Thumb instruction at LR - 2 + cmp r0, #0xe800 @ 32-bit instruction if xx >= 0 + blo __und_svc_fault + ldrh r9, [r4] @ bottom 16 bits + add r4, r4, #2 + str r4, [sp, #S_PC] + orr r0, r9, r0, lsl #16 #endif - adr r9, BSYM(1f) + adr r9, BSYM(__und_svc_finish) + mov r2, r4 bl call_fpe + mov r1, #4 @ PC correction to apply +__und_svc_fault: mov r0, sp @ struct pt_regs *regs - bl do_undefinstr + bl __und_fault - @ - @ IRQs off again before pulling preserved data off the stack - @ -1: disable_irq_notrace - - @ - @ restore SPSR and restart the instruction - @ - ldr r2, [sp, #S_PSR] @ Get SVC cpsr - svc_exit r2 @ return from exception +__und_svc_finish: + ldr r5, [sp, #S_PSR] @ Get SVC cpsr + svc_exit r5 @ return from exception UNWIND(.fnend ) ENDPROC(__und_svc) .align 5 __pabt_svc: svc_entry - - @ - @ re-enable interrupts if appropriate - @ - mrs r9, cpsr - tst r3, #PSR_I_BIT - biceq r9, r9, #PSR_I_BIT - - mov r0, r2 @ pass address of aborted instruction. -#ifdef MULTI_PABORT - ldr r4, .LCprocfns - mov lr, pc - ldr pc, [r4, #PROCESSOR_PABT_FUNC] -#else - bl CPU_PABORT_HANDLER -#endif - msr cpsr_c, r9 @ Maybe enable interrupts mov r2, sp @ regs - bl do_PrefetchAbort @ call abort handler - - @ - @ IRQs off again before pulling preserved data off the stack - @ - disable_irq_notrace - - @ - @ restore SPSR and restart the instruction - @ - ldr r2, [sp, #S_PSR] - svc_exit r2 @ return from exception + pabt_helper + svc_exit r5 @ return from exception UNWIND(.fnend ) ENDPROC(__pabt_svc) @@ -368,39 +321,45 @@ ENDPROC(__pabt_svc) ARM( stmib sp, {r1 - r12} ) THUMB( stmia sp, {r0 - r12} ) - ldmia r0, {r1 - r3} + ldmia r0, {r3 - r5} add r0, sp, #S_PC @ here for interlock avoidance - mov r4, #-1 @ "" "" "" "" + mov r6, #-1 @ "" "" "" "" - str r1, [sp] @ save the "real" r0 copied + str r3, [sp] @ save the "real" r0 copied @ from the exception stack @ @ We are now ready to fill in the remaining blanks on the stack: @ - @ r2 - lr_<exception>, already fixed up for correct return/restart - @ r3 - spsr_<exception> - @ r4 - orig_r0 (see pt_regs definition in ptrace.h) + @ r4 - lr_<exception>, already fixed up for correct return/restart + @ r5 - spsr_<exception> + @ r6 - orig_r0 (see pt_regs definition in ptrace.h) @ @ Also, separately save sp_usr and lr_usr @ - stmia r0, {r2 - r4} + stmia r0, {r4 - r6} ARM( stmdb r0, {sp, lr}^ ) THUMB( store_user_sp_lr r0, r1, S_SP - S_PC ) @ @ Enable the alignment trap while in kernel mode @ - alignment_trap r0 + alignment_trap r0, .LCcralign @ @ Clear FP to mark the first stack frame @ zero_fp + +#ifdef CONFIG_IRQSOFF_TRACER + bl trace_hardirqs_off +#endif + ct_user_exit save = 0 .endm .macro kuser_cmpxchg_check -#if __LINUX_ARM_ARCH__ < 6 && !defined(CONFIG_NEEDS_SYSCALL_FOR_CMPXCHG) +#if !defined(CONFIG_CPU_32v6K) && defined(CONFIG_KUSER_HELPERS) && \ + !defined(CONFIG_NEEDS_SYSCALL_FOR_CMPXCHG) #ifndef CONFIG_MMU #warning "NPTL on non MMU needs fixing" #else @@ -408,8 +367,8 @@ ENDPROC(__pabt_svc) @ if it was interrupted in a critical region. Here we @ perform a quick test inline since it should be false @ 99.9999% of the time. The rest is done out of line. - cmp r2, #TASK_SIZE - blhs kuser_cmpxchg_fixup + cmp r4, #TASK_SIZE + blhs kuser_cmpxchg64_fixup #endif #endif .endm @@ -418,31 +377,9 @@ ENDPROC(__pabt_svc) __dabt_usr: usr_entry kuser_cmpxchg_check - - @ - @ Call the processor-specific abort handler: - @ - @ r2 - aborted context pc - @ r3 - aborted context cpsr - @ - @ The abort handler must return the aborted address in r0, and - @ the fault status register in r1. - @ -#ifdef MULTI_DABORT - ldr r4, .LCprocfns - mov lr, pc - ldr pc, [r4, #PROCESSOR_DABT_FUNC] -#else - bl CPU_DABORT_HANDLER -#endif - - @ - @ IRQs on, then call the main handler - @ - enable_irq mov r2, sp - adr lr, BSYM(ret_from_exception) - b do_DataAbort + dabt_helper + b ret_from_exception UNWIND(.fnend ) ENDPROC(__dabt_usr) @@ -450,26 +387,10 @@ ENDPROC(__dabt_usr) __irq_usr: usr_entry kuser_cmpxchg_check - - get_thread_info tsk -#ifdef CONFIG_PREEMPT - ldr r8, [tsk, #TI_PREEMPT] @ get preempt count - add r7, r8, #1 @ increment it - str r7, [tsk, #TI_PREEMPT] -#endif - irq_handler -#ifdef CONFIG_PREEMPT - ldr r0, [tsk, #TI_PREEMPT] - str r8, [tsk, #TI_PREEMPT] - teq r0, r7 - ARM( strne r0, [r0, -r0] ) - THUMB( movne r0, #0 ) - THUMB( strne r0, [r0] ) -#endif - + get_thread_info tsk mov why, #0 - b ret_to_user + b ret_to_user_from_irq UNWIND(.fnend ) ENDPROC(__irq_usr) @@ -479,55 +400,101 @@ ENDPROC(__irq_usr) __und_usr: usr_entry + mov r2, r4 + mov r3, r5 + + @ r2 = regs->ARM_pc, which is either 2 or 4 bytes ahead of the + @ faulting instruction depending on Thumb mode. + @ r3 = regs->ARM_cpsr @ - @ fall through to the emulation code, which returns using r9 if - @ it has emulated the instruction, or the more conventional lr - @ if we are to treat this as a real undefined instruction - @ - @ r0 - instruction + @ The emulation code returns using r9 if it has emulated the + @ instruction, or the more conventional lr if we are to treat + @ this as a real undefined instruction @ adr r9, BSYM(ret_from_exception) - adr lr, BSYM(__und_usr_unknown) + + @ IRQs must be enabled before attempting to read the instruction from + @ user space since that could cause a page/translation fault if the + @ page table was modified by another CPU. + enable_irq + tst r3, #PSR_T_BIT @ Thumb mode? - itet eq @ explicit IT needed for the 1f label - subeq r4, r2, #4 @ ARM instr at LR - 4 - subne r4, r2, #2 @ Thumb instr at LR - 2 -1: ldreqt r0, [r4] -#ifdef CONFIG_CPU_ENDIAN_BE8 - reveq r0, r0 @ little endian instruction -#endif - beq call_fpe + bne __und_usr_thumb + sub r4, r2, #4 @ ARM instr at LR - 4 +1: ldrt r0, [r4] + ARM_BE8(rev r0, r0) @ little endian instruction + + @ r0 = 32-bit ARM instruction which caused the exception + @ r2 = PC value for the following instruction (:= regs->ARM_pc) + @ r4 = PC value for the faulting instruction + @ lr = 32-bit undefined instruction function + adr lr, BSYM(__und_usr_fault_32) + b call_fpe + +__und_usr_thumb: @ Thumb instruction -#if __LINUX_ARM_ARCH__ >= 7 -2: - ARM( ldrht r5, [r4], #2 ) - THUMB( ldrht r5, [r4] ) - THUMB( add r4, r4, #2 ) - and r0, r5, #0xf800 @ mask bits 111x x... .... .... - cmp r0, #0xe800 @ 32bit instruction if xx != 0 - blo __und_usr_unknown -3: ldrht r0, [r4] + sub r4, r2, #2 @ First half of thumb instr at LR - 2 +#if CONFIG_ARM_THUMB && __LINUX_ARM_ARCH__ >= 6 && CONFIG_CPU_V7 +/* + * Thumb-2 instruction handling. Note that because pre-v6 and >= v6 platforms + * can never be supported in a single kernel, this code is not applicable at + * all when __LINUX_ARM_ARCH__ < 6. This allows simplifying assumptions to be + * made about .arch directives. + */ +#if __LINUX_ARM_ARCH__ < 7 +/* If the target CPU may not be Thumb-2-capable, a run-time check is needed: */ +#define NEED_CPU_ARCHITECTURE + ldr r5, .LCcpu_architecture + ldr r5, [r5] + cmp r5, #CPU_ARCH_ARMv7 + blo __und_usr_fault_16 @ 16bit undefined instruction +/* + * The following code won't get run unless the running CPU really is v7, so + * coding round the lack of ldrht on older arches is pointless. Temporarily + * override the assembler target arch with the minimum required instead: + */ + .arch armv6t2 +#endif +2: ldrht r5, [r4] +ARM_BE8(rev16 r5, r5) @ little endian instruction + cmp r5, #0xe800 @ 32bit instruction if xx != 0 + blo __und_usr_fault_16 @ 16bit undefined instruction +3: ldrht r0, [r2] +ARM_BE8(rev16 r0, r0) @ little endian instruction add r2, r2, #2 @ r2 is PC + 2, make it PC + 4 + str r2, [sp, #S_PC] @ it's a 2x16bit instr, update orr r0, r0, r5, lsl #16 + adr lr, BSYM(__und_usr_fault_32) + @ r0 = the two 16-bit Thumb instructions which caused the exception + @ r2 = PC value for the following Thumb instruction (:= regs->ARM_pc) + @ r4 = PC value for the first 16-bit Thumb instruction + @ lr = 32bit undefined instruction function + +#if __LINUX_ARM_ARCH__ < 7 +/* If the target arch was overridden, change it back: */ +#ifdef CONFIG_CPU_32v6K + .arch armv6k #else - b __und_usr_unknown + .arch armv6 #endif - UNWIND(.fnend ) +#endif /* __LINUX_ARM_ARCH__ < 7 */ +#else /* !(CONFIG_ARM_THUMB && __LINUX_ARM_ARCH__ >= 6 && CONFIG_CPU_V7) */ + b __und_usr_fault_16 +#endif + UNWIND(.fnend) ENDPROC(__und_usr) - @ - @ fallthrough to call_fpe - @ - /* - * The out of line fixup for the ldrt above. + * The out of line fixup for the ldrt instructions above. */ .pushsection .fixup, "ax" -4: mov pc, r9 + .align 2 +4: str r4, [sp, #S_PC] @ retry current instruction + mov pc, r9 .popsection .pushsection __ex_table,"a" .long 1b, 4b -#if __LINUX_ARM_ARCH__ >= 7 +#if CONFIG_ARM_THUMB && __LINUX_ARM_ARCH__ >= 6 && CONFIG_CPU_V7 .long 2b, 4b .long 3b, 4b #endif @@ -551,31 +518,32 @@ ENDPROC(__und_usr) * NEON handler code. * * Emulators may wish to make use of the following registers: - * r0 = instruction opcode. - * r2 = PC+4 + * r0 = instruction opcode (32-bit ARM or two 16-bit Thumb) + * r2 = PC value to resume execution after successful emulation * r9 = normal "successful" return address - * r10 = this threads thread_info structure. + * r10 = this threads thread_info structure * lr = unrecognised instruction return address + * IRQs enabled, FIQs enabled. */ @ @ Fall-through from Thumb-2 __und_usr @ #ifdef CONFIG_NEON + get_thread_info r10 @ get current thread adr r6, .LCneon_thumb_opcodes b 2f #endif call_fpe: + get_thread_info r10 @ get current thread #ifdef CONFIG_NEON adr r6, .LCneon_arm_opcodes -2: - ldr r7, [r6], #4 @ mask value - cmp r7, #0 @ end mask? - beq 1f - and r8, r0, r7 +2: ldr r5, [r6], #4 @ mask value ldr r7, [r6], #4 @ opcode bits matching in mask + cmp r5, #0 @ end mask? + beq 1f + and r8, r0, r5 cmp r8, r7 @ NEON instruction? bne 2b - get_thread_info r10 mov r7, #1 strb r7, [r10, #TI_USED_CP + 10] @ mark CP#10 as used strb r7, [r10, #TI_USED_CP + 11] @ mark CP#11 as used @@ -584,12 +552,7 @@ call_fpe: #endif tst r0, #0x08000000 @ only CDP/CPRT/LDC/STC have bit 27 tstne r0, #0x04000000 @ bit 26 set on both ARM and Thumb-2 -#if defined(CONFIG_CPU_ARM610) || defined(CONFIG_CPU_ARM710) - and r8, r0, #0x0f000000 @ mask out op-code bits - teqne r8, #0x0f000000 @ SWI (ARM6/7 bug)? -#endif moveq pc, lr - get_thread_info r10 @ get current thread and r8, r0, #0x00000f00 @ mask out CP number THUMB( lsr r8, r8, #8 ) mov r7, #1 @@ -636,6 +599,12 @@ call_fpe: movw_pc lr @ CP#14 (Debug) movw_pc lr @ CP#15 (Control) +#ifdef NEED_CPU_ARCHITECTURE + .align 2 +.LCcpu_architecture: + .word __cpu_architecture +#endif + #ifdef CONFIG_NEON .align 6 @@ -661,7 +630,6 @@ call_fpe: #endif do_fpe: - enable_irq ldr r4, .LCfp add r10, r10, #TI_FPSTATE @ r10 = workspace ldr pc, [r4] @ Call FP module USR entry point @@ -684,28 +652,22 @@ ENTRY(no_fp) mov pc, lr ENDPROC(no_fp) -__und_usr_unknown: - enable_irq - mov r0, sp +__und_usr_fault_32: + mov r1, #4 + b 1f +__und_usr_fault_16: + mov r1, #2 +1: mov r0, sp adr lr, BSYM(ret_from_exception) - b do_undefinstr -ENDPROC(__und_usr_unknown) + b __und_fault +ENDPROC(__und_usr_fault_32) +ENDPROC(__und_usr_fault_16) .align 5 __pabt_usr: usr_entry - - mov r0, r2 @ pass address of aborted instruction. -#ifdef MULTI_PABORT - ldr r4, .LCprocfns - mov lr, pc - ldr pc, [r4, #PROCESSOR_PABT_FUNC] -#else - bl CPU_PABORT_HANDLER -#endif - enable_irq @ Enable interrupts mov r2, sp @ regs - bl do_PrefetchAbort @ call abort handler + pabt_helper UNWIND(.fnend ) /* fall through */ /* @@ -730,21 +692,22 @@ ENTRY(__switch_to) UNWIND(.fnstart ) UNWIND(.cantunwind ) add ip, r1, #TI_CPU_SAVE - ldr r3, [r2, #TI_TP_VALUE] ARM( stmia ip!, {r4 - sl, fp, sp, lr} ) @ Store most regs on stack THUMB( stmia ip!, {r4 - sl, fp} ) @ Store most regs on stack THUMB( str sp, [ip], #4 ) THUMB( str lr, [ip], #4 ) -#ifdef CONFIG_MMU + ldr r4, [r2, #TI_TP_VALUE] + ldr r5, [r2, #TI_TP_VALUE + 4] +#ifdef CONFIG_CPU_USE_DOMAINS ldr r6, [r2, #TI_CPU_DOMAIN] #endif - set_tls r3, r4, r5 + switch_tls r1, r4, r5, r3, r7 #if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP) ldr r7, [r2, #TI_TASK] ldr r8, =__stack_chk_guard ldr r7, [r7, #TSK_STACK_CANARY] #endif -#ifdef CONFIG_MMU +#ifdef CONFIG_CPU_USE_DOMAINS mcr p15, 0, r6, c3, c0, 0 @ Set domain register #endif mov r5, r0 @@ -769,31 +732,12 @@ ENDPROC(__switch_to) /* * User helpers. * - * These are segment of kernel provided user code reachable from user space - * at a fixed address in kernel memory. This is used to provide user space - * with some operations which require kernel help because of unimplemented - * native feature and/or instructions in many ARM CPUs. The idea is for - * this code to be executed directly in user mode for best efficiency but - * which is too intimate with the kernel counter part to be left to user - * libraries. In fact this code might even differ from one CPU to another - * depending on the available instruction set and restrictions like on - * SMP systems. In other words, the kernel reserves the right to change - * this code as needed without warning. Only the entry points and their - * results are guaranteed to be stable. - * * Each segment is 32-byte aligned and will be moved to the top of the high * vector page. New segments (if ever needed) must be added in front of * existing ones. This mechanism should be used only for things that are * really small and justified, and not be abused freely. * - * User space is expected to implement those things inline when optimizing - * for a processor that has the necessary native support, but only if such - * resulting binaries are already to be incompatible with earlier ARM - * processors due to the use of unsupported instructions other than what - * is provided here. In other words don't make binaries unable to run on - * earlier processors just for the sake of not using these kernel helpers - * if your compiled code is not going to use the new instructions for other - * purpose. + * See Documentation/arm/kernel_user_helpers.txt for formal definitions. */ THUMB( .arm ) @@ -805,101 +749,115 @@ ENDPROC(__switch_to) #endif .endm + .macro kuser_pad, sym, size + .if (. - \sym) & 3 + .rept 4 - (. - \sym) & 3 + .byte 0 + .endr + .endif + .rept (\size - (. - \sym)) / 4 + .word 0xe7fddef1 + .endr + .endm + +#ifdef CONFIG_KUSER_HELPERS .align 5 .globl __kuser_helper_start __kuser_helper_start: /* - * Reference prototype: - * - * void __kernel_memory_barrier(void) - * - * Input: - * - * lr = return address - * - * Output: - * - * none - * - * Clobbered: - * - * none - * - * Definition and user space usage example: - * - * typedef void (__kernel_dmb_t)(void); - * #define __kernel_dmb (*(__kernel_dmb_t *)0xffff0fa0) - * - * Apply any needed memory barrier to preserve consistency with data modified - * manually and __kuser_cmpxchg usage. - * - * This could be used as follows: - * - * #define __kernel_dmb() \ - * asm volatile ( "mov r0, #0xffff0fff; mov lr, pc; sub pc, r0, #95" \ - * : : : "r0", "lr","cc" ) + * Due to the length of some sequences, __kuser_cmpxchg64 spans 2 regular + * kuser "slots", therefore 0xffff0f80 is not used as a valid entry point. */ -__kuser_memory_barrier: @ 0xffff0fa0 - smp_dmb +__kuser_cmpxchg64: @ 0xffff0f60 + +#if defined(CONFIG_NEEDS_SYSCALL_FOR_CMPXCHG) + + /* + * Poor you. No fast solution possible... + * The kernel itself must perform the operation. + * A special ghost syscall is used for that (see traps.c). + */ + stmfd sp!, {r7, lr} + ldr r7, 1f @ it's 20 bits + swi __ARM_NR_cmpxchg64 + ldmfd sp!, {r7, pc} +1: .word __ARM_NR_cmpxchg64 + +#elif defined(CONFIG_CPU_32v6K) + + stmfd sp!, {r4, r5, r6, r7} + ldrd r4, r5, [r0] @ load old val + ldrd r6, r7, [r1] @ load new val + smp_dmb arm +1: ldrexd r0, r1, [r2] @ load current val + eors r3, r0, r4 @ compare with oldval (1) + eoreqs r3, r1, r5 @ compare with oldval (2) + strexdeq r3, r6, r7, [r2] @ store newval if eq + teqeq r3, #1 @ success? + beq 1b @ if no then retry + smp_dmb arm + rsbs r0, r3, #0 @ set returned val and C flag + ldmfd sp!, {r4, r5, r6, r7} usr_ret lr - .align 5 +#elif !defined(CONFIG_SMP) -/* - * Reference prototype: - * - * int __kernel_cmpxchg(int oldval, int newval, int *ptr) - * - * Input: - * - * r0 = oldval - * r1 = newval - * r2 = ptr - * lr = return address - * - * Output: - * - * r0 = returned value (zero or non-zero) - * C flag = set if r0 == 0, clear if r0 != 0 - * - * Clobbered: - * - * r3, ip, flags - * - * Definition and user space usage example: - * - * typedef int (__kernel_cmpxchg_t)(int oldval, int newval, int *ptr); - * #define __kernel_cmpxchg (*(__kernel_cmpxchg_t *)0xffff0fc0) - * - * Atomically store newval in *ptr if *ptr is equal to oldval for user space. - * Return zero if *ptr was changed or non-zero if no exchange happened. - * The C flag is also set if *ptr was changed to allow for assembly - * optimization in the calling code. - * - * Notes: - * - * - This routine already includes memory barriers as needed. - * - * For example, a user space atomic_add implementation could look like this: - * - * #define atomic_add(ptr, val) \ - * ({ register unsigned int *__ptr asm("r2") = (ptr); \ - * register unsigned int __result asm("r1"); \ - * asm volatile ( \ - * "1: @ atomic_add\n\t" \ - * "ldr r0, [r2]\n\t" \ - * "mov r3, #0xffff0fff\n\t" \ - * "add lr, pc, #4\n\t" \ - * "add r1, r0, %2\n\t" \ - * "add pc, r3, #(0xffff0fc0 - 0xffff0fff)\n\t" \ - * "bcc 1b" \ - * : "=&r" (__result) \ - * : "r" (__ptr), "rIL" (val) \ - * : "r0","r3","ip","lr","cc","memory" ); \ - * __result; }) - */ +#ifdef CONFIG_MMU + + /* + * The only thing that can break atomicity in this cmpxchg64 + * implementation is either an IRQ or a data abort exception + * causing another process/thread to be scheduled in the middle of + * the critical sequence. The same strategy as for cmpxchg is used. + */ + stmfd sp!, {r4, r5, r6, lr} + ldmia r0, {r4, r5} @ load old val + ldmia r1, {r6, lr} @ load new val +1: ldmia r2, {r0, r1} @ load current val + eors r3, r0, r4 @ compare with oldval (1) + eoreqs r3, r1, r5 @ compare with oldval (2) +2: stmeqia r2, {r6, lr} @ store newval if eq + rsbs r0, r3, #0 @ set return val and C flag + ldmfd sp!, {r4, r5, r6, pc} + + .text +kuser_cmpxchg64_fixup: + @ Called from kuser_cmpxchg_fixup. + @ r4 = address of interrupted insn (must be preserved). + @ sp = saved regs. r7 and r8 are clobbered. + @ 1b = first critical insn, 2b = last critical insn. + @ If r4 >= 1b and r4 <= 2b then saved pc_usr is set to 1b. + mov r7, #0xffff0fff + sub r7, r7, #(0xffff0fff - (0xffff0f60 + (1b - __kuser_cmpxchg64))) + subs r8, r4, r7 + rsbcss r8, r8, #(2b - 1b) + strcs r7, [sp, #S_PC] +#if __LINUX_ARM_ARCH__ < 6 + bcc kuser_cmpxchg32_fixup +#endif + mov pc, lr + .previous + +#else +#warning "NPTL on non MMU needs fixing" + mov r0, #-1 + adds r0, r0, #0 + usr_ret lr +#endif + +#else +#error "incoherent kernel configuration" +#endif + + kuser_pad __kuser_cmpxchg64, 64 + +__kuser_memory_barrier: @ 0xffff0fa0 + smp_dmb arm + usr_ret lr + + kuser_pad __kuser_memory_barrier, 32 __kuser_cmpxchg: @ 0xffff0fc0 @@ -911,7 +869,7 @@ __kuser_cmpxchg: @ 0xffff0fc0 * A special ghost syscall is used for that (see traps.c). */ stmfd sp!, {r7, lr} - ldr r7, =1f @ it's 20 bits + ldr r7, 1f @ it's 20 bits swi __ARM_NR_cmpxchg ldmfd sp!, {r7, pc} 1: .word __ARM_NR_cmpxchg @@ -936,15 +894,15 @@ __kuser_cmpxchg: @ 0xffff0fc0 usr_ret lr .text -kuser_cmpxchg_fixup: +kuser_cmpxchg32_fixup: @ Called from kuser_cmpxchg_check macro. - @ r2 = address of interrupted insn (must be preserved). + @ r4 = address of interrupted insn (must be preserved). @ sp = saved regs. r7 and r8 are clobbered. @ 1b = first critical insn, 2b = last critical insn. - @ If r2 >= 1b and r2 <= 2b then saved pc_usr is set to 1b. + @ If r4 >= 1b and r4 <= 2b then saved pc_usr is set to 1b. mov r7, #0xffff0fff sub r7, r7, #(0xffff0fff - (0xffff0fc0 + (1b - __kuser_cmpxchg))) - subs r8, r2, r7 + subs r8, r4, r7 rsbcss r8, r8, #(2b - 1b) strcs r7, [sp, #S_PC] mov pc, lr @@ -959,7 +917,7 @@ kuser_cmpxchg_fixup: #else - smp_dmb + smp_dmb arm 1: ldrex r3, [r2] subs r3, r3, r0 strexeq r3, r1, [r2] @@ -972,76 +930,33 @@ kuser_cmpxchg_fixup: #endif - .align 5 - -/* - * Reference prototype: - * - * int __kernel_get_tls(void) - * - * Input: - * - * lr = return address - * - * Output: - * - * r0 = TLS value - * - * Clobbered: - * - * none - * - * Definition and user space usage example: - * - * typedef int (__kernel_get_tls_t)(void); - * #define __kernel_get_tls (*(__kernel_get_tls_t *)0xffff0fe0) - * - * Get the TLS value as previously set via the __ARM_NR_set_tls syscall. - * - * This could be used as follows: - * - * #define __kernel_get_tls() \ - * ({ register unsigned int __val asm("r0"); \ - * asm( "mov r0, #0xffff0fff; mov lr, pc; sub pc, r0, #31" \ - * : "=r" (__val) : : "lr","cc" ); \ - * __val; }) - */ + kuser_pad __kuser_cmpxchg, 32 __kuser_get_tls: @ 0xffff0fe0 ldr r0, [pc, #(16 - 8)] @ read TLS, set in kuser_get_tls_init usr_ret lr mrc p15, 0, r0, c13, c0, 3 @ 0xffff0fe8 hardware TLS code - .rep 4 + kuser_pad __kuser_get_tls, 16 + .rep 3 .word 0 @ 0xffff0ff0 software TLS value, then .endr @ pad up to __kuser_helper_version -/* - * Reference declaration: - * - * extern unsigned int __kernel_helper_version; - * - * Definition and user space usage example: - * - * #define __kernel_helper_version (*(unsigned int *)0xffff0ffc) - * - * User space may read this to determine the curent number of helpers - * available. - */ - __kuser_helper_version: @ 0xffff0ffc .word ((__kuser_helper_end - __kuser_helper_start) >> 5) .globl __kuser_helper_end __kuser_helper_end: +#endif + THUMB( .thumb ) /* * Vector stubs. * - * This code is copied to 0xffff0200 so we can use branches in the - * vectors, rather than ldr's. Note that this code must not - * exceed 0x300 bytes. + * This code is copied to 0xffff1000 so we can use branches in the + * vectors, rather than ldr's. Note that this code must not exceed + * a page size. * * Common stub entry macro: * Enter in IRQ mode, spsr = SVC/USR CPSR, lr = SVC/USR PC @@ -1088,8 +1003,17 @@ ENDPROC(vector_\name) 1: .endm - .globl __stubs_start + .section .stubs, "ax", %progbits __stubs_start: + @ This must be the first word + .word vector_swi + +vector_rst: + ARM( swi SYS_ERROR0 ) + THUMB( svc #0 ) + THUMB( nop ) + b vector_und + /* * Interrupt dispatcher */ @@ -1184,6 +1108,16 @@ __stubs_start: .align 5 /*============================================================================= + * Address exception handler + *----------------------------------------------------------------------------- + * These aren't too critical. + * (they're not supposed to happen, and won't happen in 32-bit data mode). + */ + +vector_addrexcptn: + b vector_addrexcptn + +/*============================================================================= * Undefined FIQs *----------------------------------------------------------------------------- * Enter in FIQ mode, spsr = ANY CPSR, lr = ANY PC @@ -1194,54 +1128,30 @@ __stubs_start: * get out of that mode without clobbering one register. */ vector_fiq: - disable_fiq subs pc, lr, #4 -/*============================================================================= - * Address exception handler - *----------------------------------------------------------------------------- - * These aren't too critical. - * (they're not supposed to happen, and won't happen in 32-bit data mode). - */ - -vector_addrexcptn: - b vector_addrexcptn - -/* - * We group all the following data together to optimise - * for CPUs with separate I & D caches. - */ - .align 5 - -.LCvswi: - .word vector_swi - - .globl __stubs_end -__stubs_end: - - .equ stubs_offset, __vectors_start + 0x200 - __stubs_start + .globl vector_fiq_offset + .equ vector_fiq_offset, vector_fiq - .globl __vectors_start + .section .vectors, "ax", %progbits __vectors_start: - ARM( swi SYS_ERROR0 ) - THUMB( svc #0 ) - THUMB( nop ) - W(b) vector_und + stubs_offset - W(ldr) pc, .LCvswi + stubs_offset - W(b) vector_pabt + stubs_offset - W(b) vector_dabt + stubs_offset - W(b) vector_addrexcptn + stubs_offset - W(b) vector_irq + stubs_offset - W(b) vector_fiq + stubs_offset - - .globl __vectors_end -__vectors_end: + W(b) vector_rst + W(b) vector_und + W(ldr) pc, __vectors_start + 0x1000 + W(b) vector_pabt + W(b) vector_dabt + W(b) vector_addrexcptn + W(b) vector_irq + W(b) vector_fiq .data .globl cr_alignment - .globl cr_no_alignment cr_alignment: .space 4 -cr_no_alignment: + +#ifdef CONFIG_MULTI_IRQ_HANDLER + .globl handle_arch_irq +handle_arch_irq: .space 4 +#endif diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 8bfa98757cd..7139d4a7dea 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -10,9 +10,15 @@ #include <asm/unistd.h> #include <asm/ftrace.h> -#include <mach/entry-macro.S> #include <asm/unwind.h> +#ifdef CONFIG_NEED_RET_TO_USER +#include <mach/entry-macro.S> +#else + .macro arch_ret_to_user, tmp1, tmp2 + .endm +#endif + #include "entry-header.S" @@ -29,9 +35,11 @@ ret_fast_syscall: ldr r1, [tsk, #TI_FLAGS] tst r1, #_TIF_WORK_MASK bne fast_work_pending + asm_trace_hardirqs_on /* perform architecture specific actions before user return */ arch_ret_to_user r1, lr + ct_user_enter restore_user_regs fast = 1, offset = S_OFF UNWIND(.fnend ) @@ -42,33 +50,34 @@ ret_fast_syscall: fast_work_pending: str r0, [sp, #S_R0+S_OFF]! @ returned r0 work_pending: - tst r1, #_TIF_NEED_RESCHED - bne work_resched - tst r1, #_TIF_SIGPENDING|_TIF_NOTIFY_RESUME - beq no_work_pending mov r0, sp @ 'regs' mov r2, why @ 'syscall' - tst r1, #_TIF_SIGPENDING @ delivering a signal? - movne why, #0 @ prevent further restarts - bl do_notify_resume - b ret_slow_syscall @ Check work again + bl do_work_pending + cmp r0, #0 + beq no_work_pending + movlt scno, #(__NR_restart_syscall - __NR_SYSCALL_BASE) + ldmia sp, {r0 - r6} @ have to reload r0 - r6 + b local_restart @ ... and off we go -work_resched: - bl schedule /* * "slow" syscall return path. "why" tells us if this was a real syscall. */ ENTRY(ret_to_user) ret_slow_syscall: disable_irq @ disable interrupts +ENTRY(ret_to_user_from_irq) ldr r1, [tsk, #TI_FLAGS] tst r1, #_TIF_WORK_MASK bne work_pending no_work_pending: + asm_trace_hardirqs_on + /* perform architecture specific actions before user return */ arch_ret_to_user r1, lr + ct_user_enter save = 0 restore_user_regs fast = 0, offset = 0 +ENDPROC(ret_to_user_from_irq) ENDPROC(ret_to_user) /* @@ -76,20 +85,26 @@ ENDPROC(ret_to_user) */ ENTRY(ret_from_fork) bl schedule_tail - get_thread_info tsk - ldr r1, [tsk, #TI_FLAGS] @ check for syscall tracing - mov why, #1 - tst r1, #_TIF_SYSCALL_TRACE @ are we tracing syscalls? - beq ret_slow_syscall - mov r1, sp - mov r0, #1 @ trace exit [IP = 1] - bl syscall_trace + cmp r5, #0 + movne r0, r4 + adrne lr, BSYM(1f) + movne pc, r5 +1: get_thread_info tsk b ret_slow_syscall ENDPROC(ret_from_fork) .equ NR_syscalls,0 #define CALL(x) .equ NR_syscalls,NR_syscalls+1 #include "calls.S" + +/* + * Ensure that the system call table is equal to __NR_syscalls, + * which is the value the rest of the system sees + */ +.ifne NR_syscalls - __NR_syscalls +.error "__NR_syscalls is not equal to the size of the syscall table" +.endif + #undef CALL #define CALL(x) .long x @@ -141,98 +156,185 @@ ENDPROC(ret_from_fork) #endif #endif -#ifdef CONFIG_DYNAMIC_FTRACE -ENTRY(__gnu_mcount_nc) - mov ip, lr - ldmia sp!, {lr} - mov pc, ip -ENDPROC(__gnu_mcount_nc) +.macro mcount_adjust_addr rd, rn + bic \rd, \rn, #1 @ clear the Thumb bit if present + sub \rd, \rd, #MCOUNT_INSN_SIZE +.endm -ENTRY(ftrace_caller) - stmdb sp!, {r0-r3, lr} - mov r0, lr - sub r0, r0, #MCOUNT_INSN_SIZE - ldr r1, [sp, #20] +.macro __mcount suffix + mcount_enter + ldr r0, =ftrace_trace_function + ldr r2, [r0] + adr r0, .Lftrace_stub + cmp r0, r2 + bne 1f + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + ldr r1, =ftrace_graph_return + ldr r2, [r1] + cmp r0, r2 + bne ftrace_graph_caller\suffix + + ldr r1, =ftrace_graph_entry + ldr r2, [r1] + ldr r0, =ftrace_graph_entry_stub + cmp r0, r2 + bne ftrace_graph_caller\suffix +#endif - .global ftrace_call -ftrace_call: + mcount_exit + +1: mcount_get_lr r1 @ lr of instrumented func + mcount_adjust_addr r0, lr @ instrumented function + adr lr, BSYM(2f) + mov pc, r2 +2: mcount_exit +.endm + +.macro __ftrace_caller suffix + mcount_enter + + mcount_get_lr r1 @ lr of instrumented func + mcount_adjust_addr r0, lr @ instrumented function + + .globl ftrace_call\suffix +ftrace_call\suffix: bl ftrace_stub - ldmia sp!, {r0-r3, ip, lr} - mov pc, ip -ENDPROC(ftrace_caller) + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + .globl ftrace_graph_call\suffix +ftrace_graph_call\suffix: + mov r0, r0 +#endif + + mcount_exit +.endm + +.macro __ftrace_graph_caller + sub r0, fp, #4 @ &lr of instrumented routine (&parent) +#ifdef CONFIG_DYNAMIC_FTRACE + @ called from __ftrace_caller, saved in mcount_enter + ldr r1, [sp, #16] @ instrumented routine (func) + mcount_adjust_addr r1, r1 +#else + @ called from __mcount, untouched in lr + mcount_adjust_addr r1, lr @ instrumented routine (func) +#endif + mov r2, fp @ frame pointer + bl prepare_ftrace_return + mcount_exit +.endm #ifdef CONFIG_OLD_MCOUNT +/* + * mcount + */ + +.macro mcount_enter + stmdb sp!, {r0-r3, lr} +.endm + +.macro mcount_get_lr reg + ldr \reg, [fp, #-4] +.endm + +.macro mcount_exit + ldr lr, [fp, #-4] + ldmia sp!, {r0-r3, pc} +.endm + ENTRY(mcount) +#ifdef CONFIG_DYNAMIC_FTRACE stmdb sp!, {lr} ldr lr, [fp, #-4] ldmia sp!, {pc} +#else + __mcount _old +#endif ENDPROC(mcount) +#ifdef CONFIG_DYNAMIC_FTRACE ENTRY(ftrace_caller_old) - stmdb sp!, {r0-r3, lr} - ldr r1, [fp, #-4] - mov r0, lr - sub r0, r0, #MCOUNT_INSN_SIZE - - .globl ftrace_call_old -ftrace_call_old: - bl ftrace_stub - ldr lr, [fp, #-4] @ restore lr - ldmia sp!, {r0-r3, pc} + __ftrace_caller _old ENDPROC(ftrace_caller_old) #endif -#else +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +ENTRY(ftrace_graph_caller_old) + __ftrace_graph_caller +ENDPROC(ftrace_graph_caller_old) +#endif -ENTRY(__gnu_mcount_nc) +.purgem mcount_enter +.purgem mcount_get_lr +.purgem mcount_exit +#endif + +/* + * __gnu_mcount_nc + */ + +.macro mcount_enter +/* + * This pad compensates for the push {lr} at the call site. Note that we are + * unable to unwind through a function which does not otherwise save its lr. + */ + UNWIND(.pad #4) stmdb sp!, {r0-r3, lr} - ldr r0, =ftrace_trace_function - ldr r2, [r0] - adr r0, .Lftrace_stub - cmp r0, r2 - bne gnu_trace + UNWIND(.save {r0-r3, lr}) +.endm + +.macro mcount_get_lr reg + ldr \reg, [sp, #20] +.endm + +.macro mcount_exit ldmia sp!, {r0-r3, ip, lr} mov pc, ip +.endm -gnu_trace: - ldr r1, [sp, #20] @ lr of instrumented routine - mov r0, lr - sub r0, r0, #MCOUNT_INSN_SIZE - adr lr, BSYM(1f) - mov pc, r2 -1: - ldmia sp!, {r0-r3, ip, lr} +ENTRY(__gnu_mcount_nc) +UNWIND(.fnstart) +#ifdef CONFIG_DYNAMIC_FTRACE + mov ip, lr + ldmia sp!, {lr} mov pc, ip +#else + __mcount +#endif +UNWIND(.fnend) ENDPROC(__gnu_mcount_nc) -#ifdef CONFIG_OLD_MCOUNT -/* - * This is under an ifdef in order to force link-time errors for people trying - * to build with !FRAME_POINTER with a GCC which doesn't use the new-style - * mcount. - */ -ENTRY(mcount) - stmdb sp!, {r0-r3, lr} - ldr r0, =ftrace_trace_function - ldr r2, [r0] - adr r0, ftrace_stub - cmp r0, r2 - bne trace - ldr lr, [fp, #-4] @ restore lr - ldmia sp!, {r0-r3, pc} +#ifdef CONFIG_DYNAMIC_FTRACE +ENTRY(ftrace_caller) +UNWIND(.fnstart) + __ftrace_caller +UNWIND(.fnend) +ENDPROC(ftrace_caller) +#endif -trace: - ldr r1, [fp, #-4] @ lr of instrumented routine - mov r0, lr - sub r0, r0, #MCOUNT_INSN_SIZE - mov lr, pc - mov pc, r2 - ldr lr, [fp, #-4] @ restore lr - ldmia sp!, {r0-r3, pc} -ENDPROC(mcount) +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +ENTRY(ftrace_graph_caller) +UNWIND(.fnstart) + __ftrace_graph_caller +UNWIND(.fnend) +ENDPROC(ftrace_graph_caller) #endif -#endif /* CONFIG_DYNAMIC_FTRACE */ +.purgem mcount_enter +.purgem mcount_get_lr +.purgem mcount_exit + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + .globl return_to_handler +return_to_handler: + stmdb sp!, {r0-r3} + mov r0, fp @ frame pointer + bl ftrace_return_to_handler + mov lr, r0 @ r0 has real ret addr + ldmia sp!, {r0-r3} + mov pc, lr +#endif ENTRY(ftrace_stub) .Lftrace_stub: @@ -246,22 +348,11 @@ ENDPROC(ftrace_stub) *----------------------------------------------------------------------------- */ - /* If we're optimising for StrongARM the resulting code won't - run on an ARM7 and we can save a couple of instructions. - --pb */ -#ifdef CONFIG_CPU_ARM710 -#define A710(code...) code -.Larm710bug: - ldmia sp, {r0 - lr}^ @ Get calling r0 - lr - mov r0, r0 - add sp, sp, #S_FRAME_SIZE - subs pc, lr, #4 -#else -#define A710(code...) -#endif - .align 5 ENTRY(vector_swi) +#ifdef CONFIG_CPU_V7M + v7m_exception_entry +#else sub sp, sp, #S_FRAME_SIZE stmia sp, {r0 - r12} @ Calling r0 - r12 ARM( add r8, sp, #S_PC ) @@ -272,7 +363,12 @@ ENTRY(vector_swi) str lr, [sp, #S_PC] @ Save calling PC str r8, [sp, #S_PSR] @ Save CPSR str r0, [sp, #S_OLD_R0] @ Save OLD_R0 +#endif zero_fp + alignment_trap ip, __cr_alignment + enable_irq + ct_user_exit + get_thread_info tsk /* * Get the system call number. @@ -287,52 +383,28 @@ ENTRY(vector_swi) #ifdef CONFIG_ARM_THUMB tst r8, #PSR_T_BIT movne r10, #0 @ no thumb OABI emulation - ldreq r10, [lr, #-4] @ get SWI instruction + USER( ldreq r10, [lr, #-4] ) @ get SWI instruction #else - ldr r10, [lr, #-4] @ get SWI instruction - A710( and ip, r10, #0x0f000000 @ check for SWI ) - A710( teq ip, #0x0f000000 ) - A710( bne .Larm710bug ) -#endif -#ifdef CONFIG_CPU_ENDIAN_BE8 - rev r10, r10 @ little endian instruction + USER( ldr r10, [lr, #-4] ) @ get SWI instruction #endif + ARM_BE8(rev r10, r10) @ little endian instruction #elif defined(CONFIG_AEABI) /* * Pure EABI user space always put syscall number into scno (r7). */ - A710( ldr ip, [lr, #-4] @ get SWI instruction ) - A710( and ip, ip, #0x0f000000 @ check for SWI ) - A710( teq ip, #0x0f000000 ) - A710( bne .Larm710bug ) - #elif defined(CONFIG_ARM_THUMB) - /* Legacy ABI only, possibly thumb mode. */ tst r8, #PSR_T_BIT @ this is SPSR from save_user_regs addne scno, r7, #__NR_SYSCALL_BASE @ put OS number in - ldreq scno, [lr, #-4] + USER( ldreq scno, [lr, #-4] ) #else - /* Legacy ABI only. */ - ldr scno, [lr, #-4] @ get SWI instruction - A710( and ip, scno, #0x0f000000 @ check for SWI ) - A710( teq ip, #0x0f000000 ) - A710( bne .Larm710bug ) - + USER( ldr scno, [lr, #-4] ) @ get SWI instruction #endif -#ifdef CONFIG_ALIGNMENT_TRAP - ldr ip, __cr_alignment - ldr ip, [ip] - mcr p15, 0, ip, c1, c0 @ update control register -#endif - enable_irq - - get_thread_info tsk adr tbl, sys_call_table @ load syscall table pointer #if defined(CONFIG_OABI_COMPAT) @@ -350,20 +422,11 @@ ENTRY(vector_swi) eor scno, scno, #__NR_SYSCALL_BASE @ check OS number #endif +local_restart: ldr r10, [tsk, #TI_FLAGS] @ check for syscall tracing stmdb sp!, {r4, r5} @ push fifth and sixth args -#ifdef CONFIG_SECCOMP - tst r10, #_TIF_SECCOMP - beq 1f - mov r0, scno - bl __secure_computing - add r0, sp, #S_R0 + S_OFF @ pointer to regs - ldmia r0, {r0 - r3} @ have to reload r0 - r3 -1: -#endif - - tst r10, #_TIF_SYSCALL_TRACE @ are we tracing syscalls? + tst r10, #_TIF_SYSCALL_WORK @ are we tracing syscalls? bne __sys_trace cmp scno, #NR_syscalls @ check upper syscall limit @@ -371,11 +434,26 @@ ENTRY(vector_swi) ldrcc pc, [tbl, scno, lsl #2] @ call sys_* routine add r1, sp, #S_OFF -2: mov why, #0 @ no longer a real syscall - cmp scno, #(__ARM_NR_BASE - __NR_SYSCALL_BASE) +2: cmp scno, #(__ARM_NR_BASE - __NR_SYSCALL_BASE) eor r0, scno, #__NR_SYSCALL_BASE @ put OS number back - bcs arm_syscall + bcs arm_syscall + mov why, #0 @ no longer a real syscall b sys_ni_syscall @ not private func + +#if defined(CONFIG_OABI_COMPAT) || !defined(CONFIG_AEABI) + /* + * We failed to handle a fault trying to access the page + * containing the swi instruction, but we're not really in a + * position to return -EFAULT. Instead, return back to the + * instruction and re-enter the user fault handling path trying + * to page it in. This will likely result in sending SEGV to the + * current task. + */ +9001: + sub lr, lr, #4 + str lr, [sp, #S_PC] + b ret_fast_syscall +#endif ENDPROC(vector_swi) /* @@ -383,25 +461,26 @@ ENDPROC(vector_swi) * context switches, and waiting for our parent to respond. */ __sys_trace: - mov r2, scno - add r1, sp, #S_OFF - mov r0, #0 @ trace entry [IP = 0] - bl syscall_trace + mov r1, scno + add r0, sp, #S_OFF + bl syscall_trace_enter adr lr, BSYM(__sys_trace_return) @ return address mov scno, r0 @ syscall number (possibly new) add r1, sp, #S_R0 + S_OFF @ pointer to regs cmp scno, #NR_syscalls @ check upper syscall limit - ldmccia r1, {r0 - r3} @ have to reload r0 - r3 + ldmccia r1, {r0 - r6} @ have to reload r0 - r6 + stmccia sp, {r4, r5} @ and update the stack args ldrcc pc, [tbl, scno, lsl #2] @ call sys_* routine - b 2b + cmp scno, #-1 @ skip the syscall? + bne 2b + add sp, sp, #S_OFF @ restore stack + b ret_slow_syscall __sys_trace_return: str r0, [sp, #S_R0 + S_OFF]! @ save returned r0 - mov r2, scno - mov r1, sp - mov r0, #1 @ trace exit [IP = 1] - bl syscall_trace + mov r0, sp + bl syscall_trace_exit b ret_slow_syscall .align 5 @@ -447,27 +526,6 @@ sys_syscall: b sys_ni_syscall ENDPROC(sys_syscall) -sys_fork_wrapper: - add r0, sp, #S_OFF - b sys_fork -ENDPROC(sys_fork_wrapper) - -sys_vfork_wrapper: - add r0, sp, #S_OFF - b sys_vfork -ENDPROC(sys_vfork_wrapper) - -sys_execve_wrapper: - add r3, sp, #S_OFF - b sys_execve -ENDPROC(sys_execve_wrapper) - -sys_clone_wrapper: - add ip, sp, #S_OFF - str ip, [sp, #4] - b sys_clone -ENDPROC(sys_clone_wrapper) - sys_sigreturn_wrapper: add r0, sp, #S_OFF mov why, #0 @ prevent syscall restart handling @@ -480,11 +538,6 @@ sys_rt_sigreturn_wrapper: b sys_rt_sigreturn ENDPROC(sys_rt_sigreturn_wrapper) -sys_sigaltstack_wrapper: - ldr r2, [sp, #S_OFF + S_SP] - b do_sigaltstack -ENDPROC(sys_sigaltstack_wrapper) - sys_statfs64_wrapper: teq r1, #88 moveq r1, #84 diff --git a/arch/arm/kernel/entry-header.S b/arch/arm/kernel/entry-header.S index d93f976fb38..5d702f8900b 100644 --- a/arch/arm/kernel/entry-header.S +++ b/arch/arm/kernel/entry-header.S @@ -5,6 +5,7 @@ #include <asm/asm-offsets.h> #include <asm/errno.h> #include <asm/thread_info.h> +#include <asm/v7m.h> @ Bad Abort numbers @ ----------------- @@ -36,14 +37,128 @@ #endif .endm - .macro alignment_trap, rtemp + .macro alignment_trap, rtemp, label #ifdef CONFIG_ALIGNMENT_TRAP - ldr \rtemp, .LCcralign + ldr \rtemp, \label ldr \rtemp, [\rtemp] mcr p15, 0, \rtemp, c1, c0 #endif .endm +#ifdef CONFIG_CPU_V7M +/* + * ARMv7-M exception entry/exit macros. + * + * xPSR, ReturnAddress(), LR (R14), R12, R3, R2, R1, and R0 are + * automatically saved on the current stack (32 words) before + * switching to the exception stack (SP_main). + * + * If exception is taken while in user mode, SP_main is + * empty. Otherwise, SP_main is aligned to 64 bit automatically + * (CCR.STKALIGN set). + * + * Linux assumes that the interrupts are disabled when entering an + * exception handler and it may BUG if this is not the case. Interrupts + * are disabled during entry and reenabled in the exit macro. + * + * v7m_exception_slow_exit is used when returning from SVC or PendSV. + * When returning to kernel mode, we don't return from exception. + */ + .macro v7m_exception_entry + @ determine the location of the registers saved by the core during + @ exception entry. Depending on the mode the cpu was in when the + @ exception happend that is either on the main or the process stack. + @ Bit 2 of EXC_RETURN stored in the lr register specifies which stack + @ was used. + tst lr, #EXC_RET_STACK_MASK + mrsne r12, psp + moveq r12, sp + + @ we cannot rely on r0-r3 and r12 matching the value saved in the + @ exception frame because of tail-chaining. So these have to be + @ reloaded. + ldmia r12!, {r0-r3} + + @ Linux expects to have irqs off. Do it here before taking stack space + cpsid i + + sub sp, #S_FRAME_SIZE-S_IP + stmdb sp!, {r0-r11} + + @ load saved r12, lr, return address and xPSR. + @ r0-r7 are used for signals and never touched from now on. Clobbering + @ r8-r12 is OK. + mov r9, r12 + ldmia r9!, {r8, r10-r12} + + @ calculate the original stack pointer value. + @ r9 currently points to the memory location just above the auto saved + @ xPSR. + @ The cpu might automatically 8-byte align the stack. Bit 9 + @ of the saved xPSR specifies if stack aligning took place. In this case + @ another 32-bit value is included in the stack. + + tst r12, V7M_xPSR_FRAMEPTRALIGN + addne r9, r9, #4 + + @ store saved r12 using str to have a register to hold the base for stm + str r8, [sp, #S_IP] + add r8, sp, #S_SP + @ store r13-r15, xPSR + stmia r8!, {r9-r12} + @ store old_r0 + str r0, [r8] + .endm + + /* + * PENDSV and SVCALL are configured to have the same exception + * priorities. As a kernel thread runs at SVCALL execution priority it + * can never be preempted and so we will never have to return to a + * kernel thread here. + */ + .macro v7m_exception_slow_exit ret_r0 + cpsid i + ldr lr, =EXC_RET_THREADMODE_PROCESSSTACK + + @ read original r12, sp, lr, pc and xPSR + add r12, sp, #S_IP + ldmia r12, {r1-r5} + + @ an exception frame is always 8-byte aligned. To tell the hardware if + @ the sp to be restored is aligned or not set bit 9 of the saved xPSR + @ accordingly. + tst r2, #4 + subne r2, r2, #4 + orrne r5, V7M_xPSR_FRAMEPTRALIGN + biceq r5, V7M_xPSR_FRAMEPTRALIGN + + @ ensure bit 0 is cleared in the PC, otherwise behaviour is + @ unpredictable + bic r4, #1 + + @ write basic exception frame + stmdb r2!, {r1, r3-r5} + ldmia sp, {r1, r3-r5} + .if \ret_r0 + stmdb r2!, {r0, r3-r5} + .else + stmdb r2!, {r1, r3-r5} + .endif + + @ restore process sp + msr psp, r2 + + @ restore original r4-r11 + ldmia sp!, {r0-r11} + + @ restore main sp + add sp, sp, #S_FRAME_SIZE-S_IP + + cpsie i + bx lr + .endm +#endif /* CONFIG_CPU_V7M */ + @ @ Store/load the USER SP and LR registers by switching to the SYS @ mode. Useful in Thumb-2 mode where "stm/ldm rd, {sp, lr}^" is not @@ -74,15 +189,32 @@ .endm #ifndef CONFIG_THUMB2_KERNEL - .macro svc_exit, rpsr + .macro svc_exit, rpsr, irq = 0 + .if \irq != 0 + @ IRQs already off +#ifdef CONFIG_TRACE_IRQFLAGS + @ The parent context IRQs must have been enabled to get here in + @ the first place, so there's no point checking the PSR I bit. + bl trace_hardirqs_on +#endif + .else + @ IRQs off again before pulling preserved data off the stack + disable_irq_notrace +#ifdef CONFIG_TRACE_IRQFLAGS + tst \rpsr, #PSR_I_BIT + bleq trace_hardirqs_on + tst \rpsr, #PSR_I_BIT + blne trace_hardirqs_off +#endif + .endif msr spsr_cxsf, \rpsr -#if defined(CONFIG_CPU_32v6K) - clrex @ clear the exclusive monitor - ldmia sp, {r0 - pc}^ @ load r0 - pc, cpsr -#elif defined (CONFIG_CPU_V6) +#if defined(CONFIG_CPU_V6) ldr r0, [sp] strex r1, r2, [sp] @ clear the exclusive monitor ldmib sp, {r1 - pc}^ @ load r1 - pc, cpsr +#elif defined(CONFIG_CPU_32v6K) + clrex @ clear the exclusive monitor + ldmia sp, {r0 - pc}^ @ load r0 - pc, cpsr #else ldmia sp, {r0 - pc}^ @ load r0 - pc, cpsr #endif @@ -92,10 +224,10 @@ ldr r1, [sp, #\offset + S_PSR] @ get calling cpsr ldr lr, [sp, #\offset + S_PC]! @ get pc msr spsr_cxsf, r1 @ save in spsr_svc -#if defined(CONFIG_CPU_32v6K) - clrex @ clear the exclusive monitor -#elif defined (CONFIG_CPU_V6) +#if defined(CONFIG_CPU_V6) strex r1, r2, [sp] @ clear the exclusive monitor +#elif defined(CONFIG_CPU_32v6K) + clrex @ clear the exclusive monitor #endif .if \fast ldmdb sp, {r1 - lr}^ @ get calling r1 - lr @@ -108,11 +240,6 @@ movs pc, lr @ return & move spsr_svc into cpsr .endm - .macro get_thread_info, rd - mov \rd, sp, lsr #13 - mov \rd, \rd, lsl #13 - .endm - @ @ 32-bit wide "mov pc, reg" @ @@ -120,19 +247,46 @@ mov pc, \reg .endm #else /* CONFIG_THUMB2_KERNEL */ - .macro svc_exit, rpsr + .macro svc_exit, rpsr, irq = 0 + .if \irq != 0 + @ IRQs already off +#ifdef CONFIG_TRACE_IRQFLAGS + @ The parent context IRQs must have been enabled to get here in + @ the first place, so there's no point checking the PSR I bit. + bl trace_hardirqs_on +#endif + .else + @ IRQs off again before pulling preserved data off the stack + disable_irq_notrace +#ifdef CONFIG_TRACE_IRQFLAGS + tst \rpsr, #PSR_I_BIT + bleq trace_hardirqs_on + tst \rpsr, #PSR_I_BIT + blne trace_hardirqs_off +#endif + .endif + ldr lr, [sp, #S_SP] @ top of the stack + ldrd r0, r1, [sp, #S_LR] @ calling lr and pc clrex @ clear the exclusive monitor - ldr r0, [sp, #S_SP] @ top of the stack - ldr r1, [sp, #S_PC] @ return address - tst r0, #4 @ orig stack 8-byte aligned? - stmdb r0, {r1, \rpsr} @ rfe context + stmdb lr!, {r0, r1, \rpsr} @ calling lr and rfe context ldmia sp, {r0 - r12} - ldr lr, [sp, #S_LR] - addeq sp, sp, #S_FRAME_SIZE - 8 @ aligned - addne sp, sp, #S_FRAME_SIZE - 4 @ not aligned + mov sp, lr + ldr lr, [sp], #4 rfeia sp! .endm +#ifdef CONFIG_CPU_V7M + /* + * Note we don't need to do clrex here as clearing the local monitor is + * part of each exception entry and exit sequence. + */ + .macro restore_user_regs, fast = 0, offset = 0 + .if \offset + add sp, #\offset + .endif + v7m_exception_slow_exit ret_r0 = \fast + .endm +#else /* ifdef CONFIG_CPU_V7M */ .macro restore_user_regs, fast = 0, offset = 0 clrex @ clear the exclusive monitor mov r2, sp @@ -149,12 +303,7 @@ add sp, sp, #S_FRAME_SIZE - S_SP movs pc, lr @ return & move spsr_svc into cpsr .endm - - .macro get_thread_info, rd - mov \rd, sp - lsr \rd, \rd, #13 - mov \rd, \rd, lsl #13 - .endm +#endif /* ifdef CONFIG_CPU_V7M / else */ @ @ 32-bit wide "mov pc, reg" @@ -166,6 +315,34 @@ #endif /* !CONFIG_THUMB2_KERNEL */ /* + * Context tracking subsystem. Used to instrument transitions + * between user and kernel mode. + */ + .macro ct_user_exit, save = 1 +#ifdef CONFIG_CONTEXT_TRACKING + .if \save + stmdb sp!, {r0-r3, ip, lr} + bl context_tracking_user_exit + ldmia sp!, {r0-r3, ip, lr} + .else + bl context_tracking_user_exit + .endif +#endif + .endm + + .macro ct_user_enter, save = 1 +#ifdef CONFIG_CONTEXT_TRACKING + .if \save + stmdb sp!, {r0-r3, ip, lr} + bl context_tracking_user_enter + ldmia sp!, {r0-r3, ip, lr} + .else + bl context_tracking_user_enter + .endif +#endif + .endm + +/* * These are the registers used in the syscall handler, and allow us to * have in theory up to 7 arguments to a function - r0 to r6. * diff --git a/arch/arm/kernel/entry-v7m.S b/arch/arm/kernel/entry-v7m.S new file mode 100644 index 00000000000..2260f185582 --- /dev/null +++ b/arch/arm/kernel/entry-v7m.S @@ -0,0 +1,141 @@ +/* + * linux/arch/arm/kernel/entry-v7m.S + * + * Copyright (C) 2008 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Low-level vector interface routines for the ARMv7-M architecture + */ +#include <asm/memory.h> +#include <asm/glue.h> +#include <asm/thread_notify.h> +#include <asm/v7m.h> + +#include "entry-header.S" + +#ifdef CONFIG_TRACE_IRQFLAGS +#error "CONFIG_TRACE_IRQFLAGS not supported on the current ARMv7M implementation" +#endif + +__invalid_entry: + v7m_exception_entry + adr r0, strerr + mrs r1, ipsr + mov r2, lr + bl printk + mov r0, sp + bl show_regs +1: b 1b +ENDPROC(__invalid_entry) + +strerr: .asciz "\nUnhandled exception: IPSR = %08lx LR = %08lx\n" + + .align 2 +__irq_entry: + v7m_exception_entry + + @ + @ Invoke the IRQ handler + @ + mrs r0, ipsr + ldr r1, =V7M_xPSR_EXCEPTIONNO + and r0, r1 + sub r0, #16 + mov r1, sp + stmdb sp!, {lr} + @ routine called with r0 = irq number, r1 = struct pt_regs * + bl nvic_handle_irq + + pop {lr} + @ + @ Check for any pending work if returning to user + @ + ldr r1, =BASEADDR_V7M_SCB + ldr r0, [r1, V7M_SCB_ICSR] + tst r0, V7M_SCB_ICSR_RETTOBASE + beq 2f + + get_thread_info tsk + ldr r2, [tsk, #TI_FLAGS] + tst r2, #_TIF_WORK_MASK + beq 2f @ no work pending + mov r0, #V7M_SCB_ICSR_PENDSVSET + str r0, [r1, V7M_SCB_ICSR] @ raise PendSV + +2: + @ registers r0-r3 and r12 are automatically restored on exception + @ return. r4-r7 were not clobbered in v7m_exception_entry so for + @ correctness they don't need to be restored. So only r8-r11 must be + @ restored here. The easiest way to do so is to restore r0-r7, too. + ldmia sp!, {r0-r11} + add sp, #S_FRAME_SIZE-S_IP + cpsie i + bx lr +ENDPROC(__irq_entry) + +__pendsv_entry: + v7m_exception_entry + + ldr r1, =BASEADDR_V7M_SCB + mov r0, #V7M_SCB_ICSR_PENDSVCLR + str r0, [r1, V7M_SCB_ICSR] @ clear PendSV + + @ execute the pending work, including reschedule + get_thread_info tsk + mov why, #0 + b ret_to_user +ENDPROC(__pendsv_entry) + +/* + * Register switch for ARMv7-M processors. + * r0 = previous task_struct, r1 = previous thread_info, r2 = next thread_info + * previous and next are guaranteed not to be the same. + */ +ENTRY(__switch_to) + .fnstart + .cantunwind + add ip, r1, #TI_CPU_SAVE + stmia ip!, {r4 - r11} @ Store most regs on stack + str sp, [ip], #4 + str lr, [ip], #4 + mov r5, r0 + add r4, r2, #TI_CPU_SAVE + ldr r0, =thread_notify_head + mov r1, #THREAD_NOTIFY_SWITCH + bl atomic_notifier_call_chain + mov ip, r4 + mov r0, r5 + ldmia ip!, {r4 - r11} @ Load all regs saved previously + ldr sp, [ip] + ldr pc, [ip, #4]! + .fnend +ENDPROC(__switch_to) + + .data + .align 8 +/* + * Vector table (64 words => 256 bytes natural alignment) + */ +ENTRY(vector_table) + .long 0 @ 0 - Reset stack pointer + .long __invalid_entry @ 1 - Reset + .long __invalid_entry @ 2 - NMI + .long __invalid_entry @ 3 - HardFault + .long __invalid_entry @ 4 - MemManage + .long __invalid_entry @ 5 - BusFault + .long __invalid_entry @ 6 - UsageFault + .long __invalid_entry @ 7 - Reserved + .long __invalid_entry @ 8 - Reserved + .long __invalid_entry @ 9 - Reserved + .long __invalid_entry @ 10 - Reserved + .long vector_swi @ 11 - SVCall + .long __invalid_entry @ 12 - Debug Monitor + .long __invalid_entry @ 13 - Reserved + .long __pendsv_entry @ 14 - PendSV + .long __invalid_entry @ 15 - SysTick + .rept 64 - 16 + .long __irq_entry @ 16..64 - External Interrupts + .endr diff --git a/arch/arm/kernel/etm.c b/arch/arm/kernel/etm.c index 11db62806a1..131a6ab5f35 100644 --- a/arch/arm/kernel/etm.c +++ b/arch/arm/kernel/etm.c @@ -24,6 +24,7 @@ #include <linux/miscdevice.h> #include <linux/vmalloc.h> #include <linux/mutex.h> +#include <linux/module.h> #include <asm/hardware/coresight.h> #include <asm/sections.h> @@ -253,7 +254,7 @@ static void sysrq_etm_dump(int key) static struct sysrq_key_op sysrq_etm_op = { .handler = sysrq_etm_dump, - .help_msg = "ETM buffer dump", + .help_msg = "etm-buffer-dump(v)", .action_msg = "etm", }; @@ -338,7 +339,7 @@ static struct miscdevice etb_miscdev = { .fops = &etb_fops, }; -static int __init etb_probe(struct amba_device *dev, struct amba_id *id) +static int etb_probe(struct amba_device *dev, const struct amba_id *id) { struct tracectx *t = &tracer; int ret = 0; @@ -384,7 +385,6 @@ out: return ret; out_unmap: - amba_set_drvdata(dev, NULL); iounmap(t->etb_regs); out_release: @@ -397,8 +397,6 @@ static int etb_remove(struct amba_device *dev) { struct tracectx *t = amba_get_drvdata(dev); - amba_set_drvdata(dev, NULL); - iounmap(t->etb_regs); t->etb_regs = NULL; @@ -530,7 +528,7 @@ static ssize_t trace_mode_store(struct kobject *kobj, static struct kobj_attribute trace_mode_attr = __ATTR(trace_mode, 0644, trace_mode_show, trace_mode_store); -static int __init etm_probe(struct amba_device *dev, struct amba_id *id) +static int etm_probe(struct amba_device *dev, const struct amba_id *id) { struct tracectx *t = &tracer; int ret = 0; @@ -587,7 +585,6 @@ out: return ret; out_unmap: - amba_set_drvdata(dev, NULL); iounmap(t->etm_regs); out_release: @@ -600,8 +597,6 @@ static int etm_remove(struct amba_device *dev) { struct tracectx *t = amba_get_drvdata(dev); - amba_set_drvdata(dev, NULL); - iounmap(t->etm_regs); t->etm_regs = NULL; diff --git a/arch/arm/kernel/fiq.c b/arch/arm/kernel/fiq.c index 6ff7919613d..918875d96d5 100644 --- a/arch/arm/kernel/fiq.c +++ b/arch/arm/kernel/fiq.c @@ -42,9 +42,15 @@ #include <linux/seq_file.h> #include <asm/cacheflush.h> +#include <asm/cp15.h> #include <asm/fiq.h> #include <asm/irq.h> -#include <asm/system.h> +#include <asm/traps.h> + +#define FIQ_OFFSET ({ \ + extern void *vector_fiq_offset; \ + (unsigned)&vector_fiq_offset; \ + }) static unsigned long no_fiq_insn; @@ -67,61 +73,25 @@ static struct fiq_handler default_owner = { static struct fiq_handler *current_fiq = &default_owner; -int show_fiq_list(struct seq_file *p, void *v) +int show_fiq_list(struct seq_file *p, int prec) { if (current_fiq != &default_owner) - seq_printf(p, "FIQ: %s\n", current_fiq->name); + seq_printf(p, "%*s: %s\n", prec, "FIQ", + current_fiq->name); return 0; } void set_fiq_handler(void *start, unsigned int length) { - memcpy((void *)0xffff001c, start, length); - flush_icache_range(0xffff001c, 0xffff001c + length); - if (!vectors_high()) - flush_icache_range(0x1c, 0x1c + length); -} - -/* - * Taking an interrupt in FIQ mode is death, so both these functions - * disable irqs for the duration. Note - these functions are almost - * entirely coded in assembly. - */ -void __naked set_fiq_regs(struct pt_regs *regs) -{ - register unsigned long tmp; - asm volatile ( - "mov ip, sp\n\ - stmfd sp!, {fp, ip, lr, pc}\n\ - sub fp, ip, #4\n\ - mrs %0, cpsr\n\ - msr cpsr_c, %2 @ select FIQ mode\n\ - mov r0, r0\n\ - ldmia %1, {r8 - r14}\n\ - msr cpsr_c, %0 @ return to SVC mode\n\ - mov r0, r0\n\ - ldmfd sp, {fp, sp, pc}" - : "=&r" (tmp) - : "r" (®s->ARM_r8), "I" (PSR_I_BIT | PSR_F_BIT | FIQ_MODE)); -} - -void __naked get_fiq_regs(struct pt_regs *regs) -{ - register unsigned long tmp; - asm volatile ( - "mov ip, sp\n\ - stmfd sp!, {fp, ip, lr, pc}\n\ - sub fp, ip, #4\n\ - mrs %0, cpsr\n\ - msr cpsr_c, %2 @ select FIQ mode\n\ - mov r0, r0\n\ - stmia %1, {r8 - r14}\n\ - msr cpsr_c, %0 @ return to SVC mode\n\ - mov r0, r0\n\ - ldmfd sp, {fp, sp, pc}" - : "=&r" (tmp) - : "r" (®s->ARM_r8), "I" (PSR_I_BIT | PSR_F_BIT | FIQ_MODE)); + void *base = vectors_page; + unsigned offset = FIQ_OFFSET; + + memcpy(base + offset, start, length); + if (!cache_is_vipt_nonaliasing()) + flush_icache_range((unsigned long)base + offset, offset + + length); + flush_icache_range(0xffff0000 + offset, 0xffff0000 + offset + length); } int claim_fiq(struct fiq_handler *f) @@ -157,25 +127,29 @@ void release_fiq(struct fiq_handler *f) while (current_fiq->fiq_op(current_fiq->dev_id, 0)); } +static int fiq_start; + void enable_fiq(int fiq) { - enable_irq(fiq + FIQ_START); + enable_irq(fiq + fiq_start); } void disable_fiq(int fiq) { - disable_irq(fiq + FIQ_START); + disable_irq(fiq + fiq_start); } EXPORT_SYMBOL(set_fiq_handler); -EXPORT_SYMBOL(set_fiq_regs); -EXPORT_SYMBOL(get_fiq_regs); +EXPORT_SYMBOL(__set_fiq_regs); /* defined in fiqasm.S */ +EXPORT_SYMBOL(__get_fiq_regs); /* defined in fiqasm.S */ EXPORT_SYMBOL(claim_fiq); EXPORT_SYMBOL(release_fiq); EXPORT_SYMBOL(enable_fiq); EXPORT_SYMBOL(disable_fiq); -void __init init_FIQ(void) +void __init init_FIQ(int start) { - no_fiq_insn = *(unsigned long *)0xffff001c; + unsigned offset = FIQ_OFFSET; + no_fiq_insn = *(unsigned long *)(0xffff0000 + offset); + fiq_start = start; } diff --git a/arch/arm/kernel/fiqasm.S b/arch/arm/kernel/fiqasm.S new file mode 100644 index 00000000000..207f9d65201 --- /dev/null +++ b/arch/arm/kernel/fiqasm.S @@ -0,0 +1,49 @@ +/* + * linux/arch/arm/kernel/fiqasm.S + * + * Derived from code originally in linux/arch/arm/kernel/fiq.c: + * + * Copyright (C) 1998 Russell King + * Copyright (C) 1998, 1999 Phil Blundell + * Copyright (C) 2011, Linaro Limited + * + * FIQ support written by Philip Blundell <philb@gnu.org>, 1998. + * + * FIQ support re-written by Russell King to be more generic + * + * v7/Thumb-2 compatibility modifications by Linaro Limited, 2011. + */ + +#include <linux/linkage.h> +#include <asm/assembler.h> + +/* + * Taking an interrupt in FIQ mode is death, so both these functions + * disable irqs for the duration. + */ + +ENTRY(__set_fiq_regs) + mov r2, #PSR_I_BIT | PSR_F_BIT | FIQ_MODE + mrs r1, cpsr + msr cpsr_c, r2 @ select FIQ mode + mov r0, r0 @ avoid hazard prior to ARMv4 + ldmia r0!, {r8 - r12} + ldr sp, [r0], #4 + ldr lr, [r0] + msr cpsr_c, r1 @ return to SVC mode + mov r0, r0 @ avoid hazard prior to ARMv4 + mov pc, lr +ENDPROC(__set_fiq_regs) + +ENTRY(__get_fiq_regs) + mov r2, #PSR_I_BIT | PSR_F_BIT | FIQ_MODE + mrs r1, cpsr + msr cpsr_c, r2 @ select FIQ mode + mov r0, r0 @ avoid hazard prior to ARMv4 + stmia r0!, {r8 - r12} + str sp, [r0], #4 + str lr, [r0] + msr cpsr_c, r1 @ return to SVC mode + mov r0, r0 @ avoid hazard prior to ARMv4 + mov pc, lr +ENDPROC(__get_fiq_regs) diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c index 971ac8c36ea..af9a8a927a4 100644 --- a/arch/arm/kernel/ftrace.c +++ b/arch/arm/kernel/ftrace.c @@ -14,16 +14,21 @@ #include <linux/ftrace.h> #include <linux/uaccess.h> +#include <linux/module.h> #include <asm/cacheflush.h> +#include <asm/opcodes.h> #include <asm/ftrace.h> +#include "insn.h" + #ifdef CONFIG_THUMB2_KERNEL -#define NOP 0xeb04f85d /* pop.w {lr} */ +#define NOP 0xf85deb04 /* pop.w {lr} */ #else #define NOP 0xe8bd4000 /* pop {lr} */ #endif +#ifdef CONFIG_DYNAMIC_FTRACE #ifdef CONFIG_OLD_MCOUNT #define OLD_MCOUNT_ADDR ((unsigned long) mcount) #define OLD_FTRACE_ADDR ((unsigned long) ftrace_caller_old) @@ -59,64 +64,43 @@ static unsigned long adjust_address(struct dyn_ftrace *rec, unsigned long addr) } #endif -/* construct a branch (BL) instruction to addr */ -#ifdef CONFIG_THUMB2_KERNEL -static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr) +int ftrace_arch_code_modify_prepare(void) { - unsigned long s, j1, j2, i1, i2, imm10, imm11; - unsigned long first, second; - long offset; - - offset = (long)addr - (long)(pc + 4); - if (offset < -16777216 || offset > 16777214) { - WARN_ON_ONCE(1); - return 0; - } - - s = (offset >> 24) & 0x1; - i1 = (offset >> 23) & 0x1; - i2 = (offset >> 22) & 0x1; - imm10 = (offset >> 12) & 0x3ff; - imm11 = (offset >> 1) & 0x7ff; - - j1 = (!i1) ^ s; - j2 = (!i2) ^ s; - - first = 0xf000 | (s << 10) | imm10; - second = 0xd000 | (j1 << 13) | (j2 << 11) | imm11; + set_all_modules_text_rw(); + return 0; +} - return (second << 16) | first; +int ftrace_arch_code_modify_post_process(void) +{ + set_all_modules_text_ro(); + return 0; } -#else + static unsigned long ftrace_call_replace(unsigned long pc, unsigned long addr) { - long offset; - - offset = (long)addr - (long)(pc + 8); - if (unlikely(offset < -33554432 || offset > 33554428)) { - /* Can't generate branches that far (from ARM ARM). Ftrace - * doesn't generate branches outside of kernel text. - */ - WARN_ON_ONCE(1); - return 0; - } - - offset = (offset >> 2) & 0x00ffffff; - - return 0xeb000000 | offset; + return arm_gen_branch_link(pc, addr); } -#endif static int ftrace_modify_code(unsigned long pc, unsigned long old, - unsigned long new) + unsigned long new, bool validate) { unsigned long replaced; - if (probe_kernel_read(&replaced, (void *)pc, MCOUNT_INSN_SIZE)) - return -EFAULT; + if (IS_ENABLED(CONFIG_THUMB2_KERNEL)) { + old = __opcode_to_mem_thumb32(old); + new = __opcode_to_mem_thumb32(new); + } else { + old = __opcode_to_mem_arm(old); + new = __opcode_to_mem_arm(new); + } - if (replaced != old) - return -EINVAL; + if (validate) { + if (probe_kernel_read(&replaced, (void *)pc, MCOUNT_INSN_SIZE)) + return -EFAULT; + + if (replaced != old) + return -EINVAL; + } if (probe_kernel_write((void *)pc, &new, MCOUNT_INSN_SIZE)) return -EPERM; @@ -128,23 +112,21 @@ static int ftrace_modify_code(unsigned long pc, unsigned long old, int ftrace_update_ftrace_func(ftrace_func_t func) { - unsigned long pc, old; + unsigned long pc; unsigned long new; int ret; pc = (unsigned long)&ftrace_call; - memcpy(&old, &ftrace_call, MCOUNT_INSN_SIZE); new = ftrace_call_replace(pc, (unsigned long)func); - ret = ftrace_modify_code(pc, old, new); + ret = ftrace_modify_code(pc, 0, new, false); #ifdef CONFIG_OLD_MCOUNT if (!ret) { pc = (unsigned long)&ftrace_call_old; - memcpy(&old, &ftrace_call_old, MCOUNT_INSN_SIZE); new = ftrace_call_replace(pc, (unsigned long)func); - ret = ftrace_modify_code(pc, old, new); + ret = ftrace_modify_code(pc, 0, new, false); } #endif @@ -159,7 +141,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) old = ftrace_nop_replace(rec); new = ftrace_call_replace(ip, adjust_address(rec, addr)); - return ftrace_modify_code(rec->ip, old, new); + return ftrace_modify_code(rec->ip, old, new, true); } int ftrace_make_nop(struct module *mod, @@ -172,7 +154,7 @@ int ftrace_make_nop(struct module *mod, old = ftrace_call_replace(ip, adjust_address(rec, addr)); new = ftrace_nop_replace(rec); - ret = ftrace_modify_code(ip, old, new); + ret = ftrace_modify_code(ip, old, new, true); #ifdef CONFIG_OLD_MCOUNT if (ret == -EINVAL && addr == MCOUNT_ADDR) { @@ -180,16 +162,95 @@ int ftrace_make_nop(struct module *mod, old = ftrace_call_replace(ip, adjust_address(rec, addr)); new = ftrace_nop_replace(rec); - ret = ftrace_modify_code(ip, old, new); + ret = ftrace_modify_code(ip, old, new, true); } #endif return ret; } -int __init ftrace_dyn_arch_init(void *data) +int __init ftrace_dyn_arch_init(void) { - *(unsigned long *)data = 0; - return 0; } +#endif /* CONFIG_DYNAMIC_FTRACE */ + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, + unsigned long frame_pointer) +{ + unsigned long return_hooker = (unsigned long) &return_to_handler; + struct ftrace_graph_ent trace; + unsigned long old; + int err; + + if (unlikely(atomic_read(¤t->tracing_graph_pause))) + return; + + old = *parent; + *parent = return_hooker; + + trace.func = self_addr; + trace.depth = current->curr_ret_stack + 1; + + /* Only trace if the calling function expects to */ + if (!ftrace_graph_entry(&trace)) { + *parent = old; + return; + } + + err = ftrace_push_return_trace(old, self_addr, &trace.depth, + frame_pointer); + if (err == -EBUSY) { + *parent = old; + return; + } +} + +#ifdef CONFIG_DYNAMIC_FTRACE +extern unsigned long ftrace_graph_call; +extern unsigned long ftrace_graph_call_old; +extern void ftrace_graph_caller_old(void); + +static int __ftrace_modify_caller(unsigned long *callsite, + void (*func) (void), bool enable) +{ + unsigned long caller_fn = (unsigned long) func; + unsigned long pc = (unsigned long) callsite; + unsigned long branch = arm_gen_branch(pc, caller_fn); + unsigned long nop = 0xe1a00000; /* mov r0, r0 */ + unsigned long old = enable ? nop : branch; + unsigned long new = enable ? branch : nop; + + return ftrace_modify_code(pc, old, new, true); +} + +static int ftrace_modify_graph_caller(bool enable) +{ + int ret; + + ret = __ftrace_modify_caller(&ftrace_graph_call, + ftrace_graph_caller, + enable); + +#ifdef CONFIG_OLD_MCOUNT + if (!ret) + ret = __ftrace_modify_caller(&ftrace_graph_call_old, + ftrace_graph_caller_old, + enable); +#endif + + return ret; +} + +int ftrace_enable_ftrace_graph_caller(void) +{ + return ftrace_modify_graph_caller(true); +} + +int ftrace_disable_ftrace_graph_caller(void) +{ + return ftrace_modify_graph_caller(false); +} +#endif /* CONFIG_DYNAMIC_FTRACE */ +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S index bbecaac1e01..572a38335c9 100644 --- a/arch/arm/kernel/head-common.S +++ b/arch/arm/kernel/head-common.S @@ -15,6 +15,12 @@ #define ATAG_CORE_SIZE ((2*4 + 3*4) >> 2) #define ATAG_CORE_SIZE_EMPTY ((2*4) >> 2) +#ifdef CONFIG_CPU_BIG_ENDIAN +#define OF_DT_MAGIC 0xd00dfeed +#else +#define OF_DT_MAGIC 0xedfe0dd0 /* 0xd00dfeed in big-endian */ +#endif + /* * Exception handling. Something went wrong and we can't proceed. We * ought to tell the user, but since we don't have any guarantee that @@ -25,100 +31,29 @@ * machine ID for example). */ __HEAD -__error_a: -#ifdef CONFIG_DEBUG_LL - mov r4, r1 @ preserve machine ID - adr r0, str_a1 - bl printascii - mov r0, r4 - bl printhex8 - adr r0, str_a2 - bl printascii - adr r3, __lookup_machine_type_data - ldmia r3, {r4, r5, r6} @ get machine desc list - sub r4, r3, r4 @ get offset between virt&phys - add r5, r5, r4 @ convert virt addresses to - add r6, r6, r4 @ physical address space -1: ldr r0, [r5, #MACHINFO_TYPE] @ get machine type - bl printhex8 - mov r0, #'\t' - bl printch - ldr r0, [r5, #MACHINFO_NAME] @ get machine name - add r0, r0, r4 - bl printascii - mov r0, #'\n' - bl printch - add r5, r5, #SIZEOF_MACHINE_DESC @ next machine_desc - cmp r5, r6 - blo 1b - adr r0, str_a3 - bl printascii - b __error -ENDPROC(__error_a) - -str_a1: .asciz "\nError: unrecognized/unsupported machine ID (r1 = 0x" -str_a2: .asciz ").\n\nAvailable machine support:\n\nID (hex)\tNAME\n" -str_a3: .asciz "\nPlease check your kernel config and/or bootloader.\n" - .align -#endif - -/* - * Lookup machine architecture in the linker-build list of architectures. - * Note that we can't use the absolute addresses for the __arch_info - * lists since we aren't running with the MMU on (and therefore, we are - * not in the correct address space). We have to calculate the offset. - * - * r1 = machine architecture number - * Returns: - * r3, r4, r6 corrupted - * r5 = mach_info pointer in physical address space - */ -__lookup_machine_type: - adr r3, __lookup_machine_type_data - ldmia r3, {r4, r5, r6} - sub r3, r3, r4 @ get offset between virt&phys - add r5, r5, r3 @ convert virt addresses to - add r6, r6, r3 @ physical address space -1: ldr r3, [r5, #MACHINFO_TYPE] @ get machine type - teq r3, r1 @ matches loader number? - beq 2f @ found - add r5, r5, #SIZEOF_MACHINE_DESC @ next machine_desc - cmp r5, r6 - blo 1b - mov r5, #0 @ unknown machine -2: mov pc, lr -ENDPROC(__lookup_machine_type) - -/* - * Look in arch/arm/kernel/arch.[ch] for information about the - * __arch_info structures. - */ - .align 2 - .type __lookup_machine_type_data, %object -__lookup_machine_type_data: - .long . - .long __arch_info_begin - .long __arch_info_end - .size __lookup_machine_type_data, . - __lookup_machine_type_data /* Determine validity of the r2 atags pointer. The heuristic requires * that the pointer be aligned, in the first 16k of physical RAM and - * that the ATAG_CORE marker is first and present. Future revisions + * that the ATAG_CORE marker is first and present. If CONFIG_OF_FLATTREE + * is selected, then it will also accept a dtb pointer. Future revisions * of this function may be more lenient with the physical address and * may also be able to move the ATAGS block if necessary. * - * r8 = machinfo - * * Returns: - * r2 either valid atags pointer, or zero + * r2 either valid atags pointer, valid dtb pointer, or zero * r5, r6 corrupted */ __vet_atags: tst r2, #0x3 @ aligned? bne 1f - ldr r5, [r2, #0] @ is first tag ATAG_CORE? - cmp r5, #ATAG_CORE_SIZE + ldr r5, [r2, #0] +#ifdef CONFIG_OF_FLATTREE + ldr r6, =OF_DT_MAGIC @ is it a DTB? + cmp r5, r6 + beq 2f +#endif + cmp r5, #ATAG_CORE_SIZE @ is first tag ATAG_CORE? cmpne r5, #ATAG_CORE_SIZE_EMPTY bne 1f ldr r5, [r2, #4] @@ -126,7 +61,7 @@ __vet_atags: cmp r5, r6 bne 1f - mov pc, lr @ atag pointer is ok +2: mov pc, lr @ atag/dtb pointer is ok 1: mov r2, #0 mov pc, lr @@ -138,7 +73,7 @@ ENDPROC(__vet_atags) * * r0 = cp#15 control register * r1 = machine ID - * r2 = atags pointer + * r2 = atags/dtb pointer * r9 = processor ID */ __INIT @@ -163,8 +98,8 @@ __mmap_switched: str r9, [r4] @ Save processor ID str r1, [r5] @ Save machine type str r2, [r6] @ Save atags pointer - bic r4, r0, #CR_A @ Clear 'A' bit - stmia r7, {r0, r4} @ Save control register values + cmp r7, #0 + strne r0, [r7] @ Save control register values b start_kernel ENDPROC(__mmap_switched) @@ -178,22 +113,15 @@ __mmap_switched_data: .long processor_id @ r4 .long __machine_arch_type @ r5 .long __atags_pointer @ r6 +#ifdef CONFIG_CPU_CP15 .long cr_alignment @ r7 +#else + .long 0 @ r7 +#endif .long init_thread_union + THREAD_START_SP @ sp .size __mmap_switched_data, . - __mmap_switched_data /* - * This provides a C-API version of __lookup_machine_type - */ -ENTRY(lookup_machine_type) - stmfd sp!, {r4 - r6, lr} - mov r1, r0 - bl __lookup_machine_type - mov r0, r5 - ldmfd sp!, {r4 - r6, pc} -ENDPROC(lookup_machine_type) - -/* * This provides a C-API version of __lookup_processor_type */ ENTRY(lookup_processor_type) @@ -204,6 +132,9 @@ ENTRY(lookup_processor_type) ldmfd sp!, {r4 - r6, r9, pc} ENDPROC(lookup_processor_type) + __FINIT + .text + /* * Read processor ID register (CP#15, CR0), and look up in the linker-built * supported processor list. Note that we can't use the absolute addresses @@ -217,7 +148,6 @@ ENDPROC(lookup_processor_type) * r5 = proc_info pointer in physical address space * r9 = cpuid (preserved) */ - __CPUINIT __lookup_processor_type: adr r3, __lookup_processor_type_data ldmia r3, {r4 - r6} @@ -246,6 +176,18 @@ __lookup_processor_type_data: .long __proc_info_end .size __lookup_processor_type_data, . - __lookup_processor_type_data +__error_lpae: +#ifdef CONFIG_DEBUG_LL + adr r0, str_lpae + bl printascii + b __error +str_lpae: .asciz "\nError: Kernel with LPAE support, but CPU does not support LPAE.\n" +#else + b __error +#endif + .align +ENDPROC(__error_lpae) + __error_p: #ifdef CONFIG_DEBUG_LL adr r0, str_p1 diff --git a/arch/arm/kernel/head-nommu.S b/arch/arm/kernel/head-nommu.S index 814ce1a7327..716249cc2ee 100644 --- a/arch/arm/kernel/head-nommu.S +++ b/arch/arm/kernel/head-nommu.S @@ -17,8 +17,12 @@ #include <asm/assembler.h> #include <asm/ptrace.h> #include <asm/asm-offsets.h> +#include <asm/memory.h> +#include <asm/cp15.h> #include <asm/thread_info.h> -#include <asm/system.h> +#include <asm/v7m.h> +#include <asm/mpu.h> +#include <asm/page.h> /* * Kernel startup entry point. @@ -32,28 +36,104 @@ * numbers for r1. * */ + __HEAD + +#ifdef CONFIG_CPU_THUMBONLY + .thumb +ENTRY(stext) +#else + .arm ENTRY(stext) + + THUMB( adr r9, BSYM(1f) ) @ Kernel is always entered in ARM. + THUMB( bx r9 ) @ If this is a Thumb-2 kernel, + THUMB( .thumb ) @ switch to Thumb now. + THUMB(1: ) +#endif + setmode PSR_F_BIT | PSR_I_BIT | SVC_MODE, r9 @ ensure svc mode @ and irqs disabled -#ifndef CONFIG_CPU_CP15 - ldr r9, =CONFIG_PROCESSOR_ID -#else +#if defined(CONFIG_CPU_CP15) mrc p15, 0, r9, c0, c0 @ get processor id +#elif defined(CONFIG_CPU_V7M) + ldr r9, =BASEADDR_V7M_SCB + ldr r9, [r9, V7M_SCB_CPUID] +#else + ldr r9, =CONFIG_PROCESSOR_ID #endif bl __lookup_processor_type @ r5=procinfo r9=cpuid movs r10, r5 @ invalid processor (r5=0)? beq __error_p @ yes, error 'p' - bl __lookup_machine_type @ r5=machinfo - movs r8, r5 @ invalid machine (r5=0)? - beq __error_a @ yes, error 'a' - adr lr, BSYM(__after_proc_init) @ return (PIC) address +#ifdef CONFIG_ARM_MPU + /* Calculate the size of a region covering just the kernel */ + ldr r5, =PLAT_PHYS_OFFSET @ Region start: PHYS_OFFSET + ldr r6, =(_end) @ Cover whole kernel + sub r6, r6, r5 @ Minimum size of region to map + clz r6, r6 @ Region size must be 2^N... + rsb r6, r6, #31 @ ...so round up region size + lsl r6, r6, #MPU_RSR_SZ @ Put size in right field + orr r6, r6, #(1 << MPU_RSR_EN) @ Set region enabled bit + bl __setup_mpu +#endif + ldr r13, =__mmap_switched @ address to jump to after + @ initialising sctlr + adr lr, BSYM(1f) @ return (PIC) address ARM( add pc, r10, #PROCINFO_INITFUNC ) THUMB( add r12, r10, #PROCINFO_INITFUNC ) THUMB( mov pc, r12 ) + 1: b __after_proc_init ENDPROC(stext) +#ifdef CONFIG_SMP + .text +ENTRY(secondary_startup) + /* + * Common entry point for secondary CPUs. + * + * Ensure that we're in SVC mode, and IRQs are disabled. Lookup + * the processor type - there is no need to check the machine type + * as it has already been validated by the primary processor. + */ + setmode PSR_F_BIT | PSR_I_BIT | SVC_MODE, r9 +#ifndef CONFIG_CPU_CP15 + ldr r9, =CONFIG_PROCESSOR_ID +#else + mrc p15, 0, r9, c0, c0 @ get processor id +#endif + bl __lookup_processor_type @ r5=procinfo r9=cpuid + movs r10, r5 @ invalid processor? + beq __error_p @ yes, error 'p' + + adr r4, __secondary_data + ldmia r4, {r7, r12} + +#ifdef CONFIG_ARM_MPU + /* Use MPU region info supplied by __cpu_up */ + ldr r6, [r7] @ get secondary_data.mpu_szr + bl __setup_mpu @ Initialize the MPU +#endif + + adr lr, BSYM(__after_proc_init) @ return address + mov r13, r12 @ __secondary_switched address + ARM( add pc, r10, #PROCINFO_INITFUNC ) + THUMB( add r12, r10, #PROCINFO_INITFUNC ) + THUMB( mov pc, r12 ) +ENDPROC(secondary_startup) + +ENTRY(__secondary_switched) + ldr sp, [r7, #8] @ set up the stack pointer + mov fp, #0 + b secondary_start_kernel +ENDPROC(__secondary_switched) + + .type __secondary_data, %object +__secondary_data: + .long secondary_data + .long __secondary_switched +#endif /* CONFIG_SMP */ + /* * Set the Control Register and Read the process ID. */ @@ -63,7 +143,7 @@ __after_proc_init: * CP15 system control register value returned in r0 from * the CPU init function. */ -#ifdef CONFIG_ALIGNMENT_TRAP +#if defined(CONFIG_ALIGNMENT_TRAP) && __LINUX_ARM_ARCH__ < 6 orr r0, r0, #CR_A #else bic r0, r0, #CR_A @@ -84,10 +164,97 @@ __after_proc_init: #endif mcr p15, 0, r0, c1, c0, 0 @ write control reg #endif /* CONFIG_CPU_CP15 */ - - b __mmap_switched @ clear the BSS and jump - @ to start_kernel + mov pc, r13 ENDPROC(__after_proc_init) .ltorg +#ifdef CONFIG_ARM_MPU + + +/* Set which MPU region should be programmed */ +.macro set_region_nr tmp, rgnr + mov \tmp, \rgnr @ Use static region numbers + mcr p15, 0, \tmp, c6, c2, 0 @ Write RGNR +.endm + +/* Setup a single MPU region, either D or I side (D-side for unified) */ +.macro setup_region bar, acr, sr, side = MPU_DATA_SIDE + mcr p15, 0, \bar, c6, c1, (0 + \side) @ I/DRBAR + mcr p15, 0, \acr, c6, c1, (4 + \side) @ I/DRACR + mcr p15, 0, \sr, c6, c1, (2 + \side) @ I/DRSR +.endm + +/* + * Setup the MPU and initial MPU Regions. We create the following regions: + * Region 0: Use this for probing the MPU details, so leave disabled. + * Region 1: Background region - covers the whole of RAM as strongly ordered + * Region 2: Normal, Shared, cacheable for RAM. From PHYS_OFFSET, size from r6 + * Region 3: Normal, shared, inaccessible from PL0 to protect the vectors page + * + * r6: Value to be written to DRSR (and IRSR if required) for MPU_RAM_REGION +*/ + +ENTRY(__setup_mpu) + + /* Probe for v7 PMSA compliance */ + mrc p15, 0, r0, c0, c1, 4 @ Read ID_MMFR0 + and r0, r0, #(MMFR0_PMSA) @ PMSA field + teq r0, #(MMFR0_PMSAv7) @ PMSA v7 + bne __error_p @ Fail: ARM_MPU on NOT v7 PMSA + + /* Determine whether the D/I-side memory map is unified. We set the + * flags here and continue to use them for the rest of this function */ + mrc p15, 0, r0, c0, c0, 4 @ MPUIR + ands r5, r0, #MPUIR_DREGION_SZMASK @ 0 size d region => No MPU + beq __error_p @ Fail: ARM_MPU and no MPU + tst r0, #MPUIR_nU @ MPUIR_nU = 0 for unified + + /* Setup second region first to free up r6 */ + set_region_nr r0, #MPU_RAM_REGION + isb + /* Full access from PL0, PL1, shared for CONFIG_SMP, cacheable */ + ldr r0, =PLAT_PHYS_OFFSET @ RAM starts at PHYS_OFFSET + ldr r5,=(MPU_AP_PL1RW_PL0RW | MPU_RGN_NORMAL) + + setup_region r0, r5, r6, MPU_DATA_SIDE @ PHYS_OFFSET, shared, enabled + beq 1f @ Memory-map not unified + setup_region r0, r5, r6, MPU_INSTR_SIDE @ PHYS_OFFSET, shared, enabled +1: isb + + /* First/background region */ + set_region_nr r0, #MPU_BG_REGION + isb + /* Execute Never, strongly ordered, inaccessible to PL0, rw PL1 */ + mov r0, #0 @ BG region starts at 0x0 + ldr r5,=(MPU_ACR_XN | MPU_RGN_STRONGLY_ORDERED | MPU_AP_PL1RW_PL0NA) + mov r6, #MPU_RSR_ALL_MEM @ 4GB region, enabled + + setup_region r0, r5, r6, MPU_DATA_SIDE @ 0x0, BG region, enabled + beq 2f @ Memory-map not unified + setup_region r0, r5, r6, MPU_INSTR_SIDE @ 0x0, BG region, enabled +2: isb + + /* Vectors region */ + set_region_nr r0, #MPU_VECTORS_REGION + isb + /* Shared, inaccessible to PL0, rw PL1 */ + mov r0, #CONFIG_VECTORS_BASE @ Cover from VECTORS_BASE + ldr r5,=(MPU_AP_PL1RW_PL0NA | MPU_RGN_NORMAL) + /* Writing N to bits 5:1 (RSR_SZ) --> region size 2^N+1 */ + mov r6, #(((PAGE_SHIFT - 1) << MPU_RSR_SZ) | 1 << MPU_RSR_EN) + + setup_region r0, r5, r6, MPU_DATA_SIDE @ VECTORS_BASE, PL0 NA, enabled + beq 3f @ Memory-map not unified + setup_region r0, r5, r6, MPU_INSTR_SIDE @ VECTORS_BASE, PL0 NA, enabled +3: isb + + /* Enable the MPU */ + mrc p15, 0, r0, c1, c0, 0 @ Read SCTLR + bic r0, r0, #CR_BR @ Disable the 'default mem-map' + orr r0, r0, #CR_M @ Set SCTRL.M (MPU on) + mcr p15, 0, r0, c1, c0, 0 @ Enable MPU + isb + mov pc,lr +ENDPROC(__setup_mpu) +#endif #include "head-common.S" diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index dd6b369ac69..2c35f0ff2fd 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -15,25 +15,18 @@ #include <linux/init.h> #include <asm/assembler.h> +#include <asm/cp15.h> #include <asm/domain.h> #include <asm/ptrace.h> #include <asm/asm-offsets.h> #include <asm/memory.h> #include <asm/thread_info.h> -#include <asm/system.h> +#include <asm/pgtable.h> -#ifdef CONFIG_DEBUG_LL -#include <mach/debug-macro.S> -#endif - -#if (PHYS_OFFSET & 0x001fffff) -#error "PHYS_OFFSET must be at an even 2MiB boundary!" +#if defined(CONFIG_DEBUG_LL) && !defined(CONFIG_DEBUG_SEMIHOSTING) +#include CONFIG_DEBUG_LL_INCLUDE #endif -#define KERNEL_RAM_VADDR (PAGE_OFFSET + TEXT_OFFSET) -#define KERNEL_RAM_PADDR (PHYS_OFFSET + TEXT_OFFSET) - - /* * swapper_pg_dir is the virtual address of the initial page table. * We place the page tables 16K below KERNEL_RAM_VADDR. Therefore, we must @@ -41,32 +34,35 @@ * the least significant 16 bits to be 0x8000, but we could probably * relax this restriction to KERNEL_RAM_VADDR >= PAGE_OFFSET + 0x4000. */ +#define KERNEL_RAM_VADDR (PAGE_OFFSET + TEXT_OFFSET) #if (KERNEL_RAM_VADDR & 0xffff) != 0x8000 #error KERNEL_RAM_VADDR must start at 0xXXXX8000 #endif +#ifdef CONFIG_ARM_LPAE + /* LPAE requires an additional page for the PGD */ +#define PG_DIR_SIZE 0x5000 +#define PMD_ORDER 3 +#else +#define PG_DIR_SIZE 0x4000 +#define PMD_ORDER 2 +#endif + .globl swapper_pg_dir - .equ swapper_pg_dir, KERNEL_RAM_VADDR - 0x4000 + .equ swapper_pg_dir, KERNEL_RAM_VADDR - PG_DIR_SIZE - .macro pgtbl, rd - ldr \rd, =(KERNEL_RAM_PADDR - 0x4000) + .macro pgtbl, rd, phys + add \rd, \phys, #TEXT_OFFSET + sub \rd, \rd, #PG_DIR_SIZE .endm -#ifdef CONFIG_XIP_KERNEL -#define KERNEL_START XIP_VIRT_ADDR(CONFIG_XIP_PHYS_ADDR) -#define KERNEL_END _edata_loc -#else -#define KERNEL_START KERNEL_RAM_VADDR -#define KERNEL_END _end -#endif - /* * Kernel startup entry point. * --------------------------- * * This is normally called from the decompressor code. The requirements * are: MMU = off, D-cache = off, I-cache = dont care, r0 = 0, - * r1 = machine nr, r2 = atags pointer. + * r1 = machine nr, r2 = atags or dtb pointer. * * This code is mostly position independent, so if you link the kernel at * 0xc0008000, you call this at __pa(0xc0008000). @@ -78,62 +74,101 @@ * crap here - that's what the boot loader (or in extreme, well justified * circumstances, zImage) is for. */ + .arm + __HEAD ENTRY(stext) - setmode PSR_F_BIT | PSR_I_BIT | SVC_MODE, r9 @ ensure svc mode - @ and irqs disabled + ARM_BE8(setend be ) @ ensure we are in BE8 mode + + THUMB( adr r9, BSYM(1f) ) @ Kernel is always entered in ARM. + THUMB( bx r9 ) @ If this is a Thumb-2 kernel, + THUMB( .thumb ) @ switch to Thumb now. + THUMB(1: ) + +#ifdef CONFIG_ARM_VIRT_EXT + bl __hyp_stub_install +#endif + @ ensure svc mode and all interrupts masked + safe_svcmode_maskall r9 + mrc p15, 0, r9, c0, c0 @ get processor id bl __lookup_processor_type @ r5=procinfo r9=cpuid movs r10, r5 @ invalid processor (r5=0)? + THUMB( it eq ) @ force fixup-able long branch encoding beq __error_p @ yes, error 'p' - bl __lookup_machine_type @ r5=machinfo - movs r8, r5 @ invalid machine (r5=0)? - beq __error_a @ yes, error 'a' + +#ifdef CONFIG_ARM_LPAE + mrc p15, 0, r3, c0, c1, 4 @ read ID_MMFR0 + and r3, r3, #0xf @ extract VMSA support + cmp r3, #5 @ long-descriptor translation table format? + THUMB( it lo ) @ force fixup-able long branch encoding + blo __error_lpae @ only classic page table format +#endif + +#ifndef CONFIG_XIP_KERNEL + adr r3, 2f + ldmia r3, {r4, r8} + sub r4, r3, r4 @ (PHYS_OFFSET - PAGE_OFFSET) + add r8, r8, r4 @ PHYS_OFFSET +#else + ldr r8, =PLAT_PHYS_OFFSET @ always constant in this case +#endif + + /* + * r1 = machine no, r2 = atags or dtb, + * r8 = phys_offset, r9 = cpuid, r10 = procinfo + */ bl __vet_atags #ifdef CONFIG_SMP_ON_UP bl __fixup_smp #endif +#ifdef CONFIG_ARM_PATCH_PHYS_VIRT + bl __fixup_pv_table +#endif bl __create_page_tables /* * The following calls CPU specific code in a position independent * manner. See arch/arm/mm/proc-*.S for details. r10 = base of - * xxx_proc_info structure selected by __lookup_machine_type + * xxx_proc_info structure selected by __lookup_processor_type * above. On return, the CPU will be ready for the MMU to be * turned on, and r0 will hold the CPU control register value. */ ldr r13, =__mmap_switched @ address to jump to after @ mmu has been enabled adr lr, BSYM(1f) @ return (PIC) address + mov r8, r4 @ set TTBR1 to swapper_pg_dir ARM( add pc, r10, #PROCINFO_INITFUNC ) THUMB( add r12, r10, #PROCINFO_INITFUNC ) THUMB( mov pc, r12 ) 1: b __enable_mmu ENDPROC(stext) .ltorg +#ifndef CONFIG_XIP_KERNEL +2: .long . + .long PAGE_OFFSET +#endif /* * Setup the initial page tables. We only setup the barest * amount which are required to get the kernel running, which * generally means mapping in the kernel code. * - * r8 = machinfo - * r9 = cpuid - * r10 = procinfo + * r8 = phys_offset, r9 = cpuid, r10 = procinfo * * Returns: * r0, r3, r5-r7 corrupted - * r4 = physical page table address + * r4 = page table (see ARCH_PGD_SHIFT in asm/memory.h) */ __create_page_tables: - pgtbl r4 @ page table address + pgtbl r4, r8 @ page table address /* - * Clear the 16K level 1 swapper page table + * Clear the swapper page table */ mov r0, r4 mov r3, #0 - add r6, r0, #0x4000 + add r6, r0, #PG_DIR_SIZE 1: str r3, [r0], #4 str r3, [r0], #4 str r3, [r0], #4 @@ -141,108 +176,145 @@ __create_page_tables: teq r0, r6 bne 1b +#ifdef CONFIG_ARM_LPAE + /* + * Build the PGD table (first level) to point to the PMD table. A PGD + * entry is 64-bit wide. + */ + mov r0, r4 + add r3, r4, #0x1000 @ first PMD table address + orr r3, r3, #3 @ PGD block type + mov r6, #4 @ PTRS_PER_PGD + mov r7, #1 << (55 - 32) @ L_PGD_SWAPPER +1: +#ifdef CONFIG_CPU_ENDIAN_BE8 + str r7, [r0], #4 @ set top PGD entry bits + str r3, [r0], #4 @ set bottom PGD entry bits +#else + str r3, [r0], #4 @ set bottom PGD entry bits + str r7, [r0], #4 @ set top PGD entry bits +#endif + add r3, r3, #0x1000 @ next PMD table + subs r6, r6, #1 + bne 1b + + add r4, r4, #0x1000 @ point to the PMD tables +#ifdef CONFIG_CPU_ENDIAN_BE8 + add r4, r4, #4 @ we only write the bottom word +#endif +#endif + ldr r7, [r10, #PROCINFO_MM_MMUFLAGS] @ mm_mmuflags /* * Create identity mapping to cater for __enable_mmu. * This identity mapping will be removed by paging_init(). */ - adr r0, __enable_mmu_loc + adr r0, __turn_mmu_on_loc ldmia r0, {r3, r5, r6} sub r0, r0, r3 @ virt->phys offset - add r5, r5, r0 @ phys __enable_mmu - add r6, r6, r0 @ phys __enable_mmu_end - mov r5, r5, lsr #20 - mov r6, r6, lsr #20 - -1: orr r3, r7, r5, lsl #20 @ flags + kernel base - str r3, [r4, r5, lsl #2] @ identity mapping - teq r5, r6 - addne r5, r5, #1 @ next section - bne 1b + add r5, r5, r0 @ phys __turn_mmu_on + add r6, r6, r0 @ phys __turn_mmu_on_end + mov r5, r5, lsr #SECTION_SHIFT + mov r6, r6, lsr #SECTION_SHIFT + +1: orr r3, r7, r5, lsl #SECTION_SHIFT @ flags + kernel base + str r3, [r4, r5, lsl #PMD_ORDER] @ identity mapping + cmp r5, r6 + addlo r5, r5, #1 @ next section + blo 1b /* - * Now setup the pagetables for our kernel direct - * mapped region. + * Map our RAM from the start to the end of the kernel .bss section. */ - mov r3, pc - mov r3, r3, lsr #20 - orr r3, r7, r3, lsl #20 - add r0, r4, #(KERNEL_START & 0xff000000) >> 18 - str r3, [r0, #(KERNEL_START & 0x00f00000) >> 18]! - ldr r6, =(KERNEL_END - 1) - add r0, r0, #4 - add r6, r4, r6, lsr #18 -1: cmp r0, r6 - add r3, r3, #1 << 20 - strls r3, [r0], #4 + add r0, r4, #PAGE_OFFSET >> (SECTION_SHIFT - PMD_ORDER) + ldr r6, =(_end - 1) + orr r3, r8, r7 + add r6, r4, r6, lsr #(SECTION_SHIFT - PMD_ORDER) +1: str r3, [r0], #1 << PMD_ORDER + add r3, r3, #1 << SECTION_SHIFT + cmp r0, r6 bls 1b #ifdef CONFIG_XIP_KERNEL /* - * Map some ram to cover our .data and .bss areas. + * Map the kernel image separately as it is not located in RAM. */ - orr r3, r7, #(KERNEL_RAM_PADDR & 0xff000000) - .if (KERNEL_RAM_PADDR & 0x00f00000) - orr r3, r3, #(KERNEL_RAM_PADDR & 0x00f00000) - .endif - add r0, r4, #(KERNEL_RAM_VADDR & 0xff000000) >> 18 - str r3, [r0, #(KERNEL_RAM_VADDR & 0x00f00000) >> 18]! - ldr r6, =(_end - 1) - add r0, r0, #4 - add r6, r4, r6, lsr #18 +#define XIP_START XIP_VIRT_ADDR(CONFIG_XIP_PHYS_ADDR) + mov r3, pc + mov r3, r3, lsr #SECTION_SHIFT + orr r3, r7, r3, lsl #SECTION_SHIFT + add r0, r4, #(XIP_START & 0xff000000) >> (SECTION_SHIFT - PMD_ORDER) + str r3, [r0, #((XIP_START & 0x00f00000) >> SECTION_SHIFT) << PMD_ORDER]! + ldr r6, =(_edata_loc - 1) + add r0, r0, #1 << PMD_ORDER + add r6, r4, r6, lsr #(SECTION_SHIFT - PMD_ORDER) 1: cmp r0, r6 - add r3, r3, #1 << 20 - strls r3, [r0], #4 + add r3, r3, #1 << SECTION_SHIFT + strls r3, [r0], #1 << PMD_ORDER bls 1b #endif /* - * Then map first 1MB of ram in case it contains our boot params. + * Then map boot params address in r2 if specified. + * We map 2 sections in case the ATAGs/DTB crosses a section boundary. */ - add r0, r4, #PAGE_OFFSET >> 18 - orr r6, r7, #(PHYS_OFFSET & 0xff000000) - .if (PHYS_OFFSET & 0x00f00000) - orr r6, r6, #(PHYS_OFFSET & 0x00f00000) - .endif - str r6, [r0] + mov r0, r2, lsr #SECTION_SHIFT + movs r0, r0, lsl #SECTION_SHIFT + subne r3, r0, r8 + addne r3, r3, #PAGE_OFFSET + addne r3, r4, r3, lsr #(SECTION_SHIFT - PMD_ORDER) + orrne r6, r7, r0 + strne r6, [r3], #1 << PMD_ORDER + addne r6, r6, #1 << SECTION_SHIFT + strne r6, [r3] + +#if defined(CONFIG_ARM_LPAE) && defined(CONFIG_CPU_ENDIAN_BE8) + sub r4, r4, #4 @ Fixup page table pointer + @ for 64-bit descriptors +#endif #ifdef CONFIG_DEBUG_LL -#ifndef CONFIG_DEBUG_ICEDCC +#if !defined(CONFIG_DEBUG_ICEDCC) && !defined(CONFIG_DEBUG_SEMIHOSTING) /* * Map in IO space for serial debugging. * This allows debug messages to be output * via a serial console before paging_init. */ - addruart r7, r3 + addruart r7, r3, r0 - mov r3, r3, lsr #20 - mov r3, r3, lsl #2 + mov r3, r3, lsr #SECTION_SHIFT + mov r3, r3, lsl #PMD_ORDER add r0, r4, r3 - rsb r3, r3, #0x4000 @ PTRS_PER_PGD*sizeof(long) - cmp r3, #0x0800 @ limit to 512MB - movhi r3, #0x0800 - add r6, r0, r3 - mov r3, r7, lsr #20 + mov r3, r7, lsr #SECTION_SHIFT ldr r7, [r10, #PROCINFO_IO_MMUFLAGS] @ io_mmuflags - orr r3, r7, r3, lsl #20 -1: str r3, [r0], #4 - add r3, r3, #1 << 20 - teq r0, r6 - bne 1b + orr r3, r7, r3, lsl #SECTION_SHIFT +#ifdef CONFIG_ARM_LPAE + mov r7, #1 << (54 - 32) @ XN +#ifdef CONFIG_CPU_ENDIAN_BE8 + str r7, [r0], #4 + str r3, [r0], #4 +#else + str r3, [r0], #4 + str r7, [r0], #4 +#endif +#else + orr r3, r3, #PMD_SECT_XN + str r3, [r0], #4 +#endif -#else /* CONFIG_DEBUG_ICEDCC */ - /* we don't need any serial debugging mappings for ICEDCC */ +#else /* CONFIG_DEBUG_ICEDCC || CONFIG_DEBUG_SEMIHOSTING */ + /* we don't need any serial debugging mappings */ ldr r7, [r10, #PROCINFO_IO_MMUFLAGS] @ io_mmuflags -#endif /* !CONFIG_DEBUG_ICEDCC */ +#endif #if defined(CONFIG_ARCH_NETWINDER) || defined(CONFIG_ARCH_CATS) /* * If we're using the NetWinder or CATS, we also need to map * in the 16550-type serial port for the debug messages */ - add r0, r4, #0xff000000 >> 18 + add r0, r4, #0xff000000 >> (SECTION_SHIFT - PMD_ORDER) orr r3, r7, #0x7c000000 str r3, [r0] #endif @@ -252,23 +324,28 @@ __create_page_tables: * Similar reasons here - for debug. This is * only for Acorn RiscPC architectures. */ - add r0, r4, #0x02000000 >> 18 + add r0, r4, #0x02000000 >> (SECTION_SHIFT - PMD_ORDER) orr r3, r7, #0x02000000 str r3, [r0] - add r0, r4, #0xd8000000 >> 18 + add r0, r4, #0xd8000000 >> (SECTION_SHIFT - PMD_ORDER) str r3, [r0] #endif #endif +#ifdef CONFIG_ARM_LPAE + sub r4, r4, #0x1000 @ point to the PGD table + mov r4, r4, lsr #ARCH_PGD_SHIFT +#endif mov pc, lr ENDPROC(__create_page_tables) .ltorg -__enable_mmu_loc: + .align +__turn_mmu_on_loc: .long . - .long __enable_mmu - .long __enable_mmu_end + .long __turn_mmu_on + .long __turn_mmu_on_end #if defined(CONFIG_SMP) - __CPUINIT + .text ENTRY(secondary_startup) /* * Common entry point for secondary CPUs. @@ -277,11 +354,19 @@ ENTRY(secondary_startup) * the processor type - there is no need to check the machine type * as it has already been validated by the primary processor. */ - setmode PSR_F_BIT | PSR_I_BIT | SVC_MODE, r9 + + ARM_BE8(setend be) @ ensure we are in BE8 mode + +#ifdef CONFIG_ARM_VIRT_EXT + bl __hyp_stub_install_secondary +#endif + safe_svcmode_maskall r9 + mrc p15, 0, r9, c0, c0 @ get processor id bl __lookup_processor_type movs r10, r5 @ invalid processor? moveq r0, #'p' @ yes, error 'p' + THUMB( it eq ) @ force fixup-able long branch encoding beq __error_p /* @@ -289,8 +374,10 @@ ENTRY(secondary_startup) */ adr r4, __secondary_data ldmia r4, {r5, r7, r12} @ address to jump to after - sub r4, r4, r5 @ mmu has been enabled - ldr r4, [r7, r4] @ get secondary_data.pgdir + sub lr, r4, r5 @ mmu has been enabled + ldr r4, [r7, lr] @ get secondary_data.pgdir + add r7, r7, #4 + ldr r8, [r7, lr] @ get secondary_data.swapper_pg_dir adr lr, BSYM(__enable_mmu) @ return address mov r13, r12 @ __secondary_switched address ARM( add pc, r10, #PROCINFO_INITFUNC ) @ initialise processor @@ -308,6 +395,8 @@ ENTRY(__secondary_switched) b secondary_start_kernel ENDPROC(__secondary_switched) + .align + .type __secondary_data, %object __secondary_data: .long . @@ -324,13 +413,13 @@ __secondary_data: * * r0 = cp#15 control register * r1 = machine ID - * r2 = atags pointer - * r4 = page table pointer + * r2 = atags or dtb pointer + * r4 = page table (see ARCH_PGD_SHIFT in asm/memory.h) * r9 = processor ID * r13 = *virtual* address to jump to upon completion */ __enable_mmu: -#ifdef CONFIG_ALIGNMENT_TRAP +#if defined(CONFIG_ALIGNMENT_TRAP) && __LINUX_ARM_ARCH__ < 6 orr r0, r0, #CR_A #else bic r0, r0, #CR_A @@ -344,12 +433,14 @@ __enable_mmu: #ifdef CONFIG_CPU_ICACHE_DISABLE bic r0, r0, #CR_I #endif +#ifndef CONFIG_ARM_LPAE mov r5, #(domain_val(DOMAIN_USER, DOMAIN_MANAGER) | \ domain_val(DOMAIN_KERNEL, DOMAIN_MANAGER) | \ domain_val(DOMAIN_TABLE, DOMAIN_MANAGER) | \ domain_val(DOMAIN_IO, DOMAIN_CLIENT)) mcr p15, 0, r5, c3, c0, 0 @ load domain access register mcr p15, 0, r4, c2, c0, 0 @ load page table pointer +#endif b __turn_mmu_on ENDPROC(__enable_mmu) @@ -361,58 +452,78 @@ ENDPROC(__enable_mmu) * * r0 = cp#15 control register * r1 = machine ID - * r2 = atags pointer + * r2 = atags or dtb pointer * r9 = processor ID * r13 = *virtual* address to jump to upon completion * * other registers depend on the function called upon completion */ .align 5 -__turn_mmu_on: + .pushsection .idmap.text, "ax" +ENTRY(__turn_mmu_on) mov r0, r0 + instr_sync mcr p15, 0, r0, c1, c0, 0 @ write control reg mrc p15, 0, r3, c0, c0, 0 @ read id reg + instr_sync mov r3, r3 mov r3, r13 mov pc, r3 -__enable_mmu_end: +__turn_mmu_on_end: ENDPROC(__turn_mmu_on) + .popsection #ifdef CONFIG_SMP_ON_UP + __HEAD __fixup_smp: - mov r7, #0x00070000 - orr r6, r7, #0xff000000 @ mask 0xff070000 - orr r7, r7, #0x41000000 @ val 0x41070000 - and r0, r9, r6 - teq r0, r7 @ ARM CPU and ARMv6/v7? + and r3, r9, #0x000f0000 @ architecture version + teq r3, #0x000f0000 @ CPU ID supported? bne __fixup_smp_on_up @ no, assume UP - orr r6, r6, #0x0000ff00 - orr r6, r6, #0x000000f0 @ mask 0xff07fff0 - orr r7, r7, #0x0000b000 - orr r7, r7, #0x00000020 @ val 0x4107b020 - and r0, r9, r6 - teq r0, r7 @ ARM 11MPCore? + bic r3, r9, #0x00ff0000 + bic r3, r3, #0x0000000f @ mask 0xff00fff0 + mov r4, #0x41000000 + orr r4, r4, #0x0000b000 + orr r4, r4, #0x00000020 @ val 0x4100b020 + teq r3, r4 @ ARM 11MPCore? moveq pc, lr @ yes, assume SMP mrc p15, 0, r0, c0, c0, 5 @ read MPIDR - tst r0, #1 << 31 - movne pc, lr @ bit 31 => SMP + and r0, r0, #0xc0000000 @ multiprocessing extensions and + teq r0, #0x80000000 @ not part of a uniprocessor system? + bne __fixup_smp_on_up @ no, assume UP + + @ Core indicates it is SMP. Check for Aegis SOC where a single + @ Cortex-A9 CPU is present but SMP operations fault. + mov r4, #0x41000000 + orr r4, r4, #0x0000c000 + orr r4, r4, #0x00000090 + teq r3, r4 @ Check for ARM Cortex-A9 + movne pc, lr @ Not ARM Cortex-A9, + + @ If a future SoC *does* use 0x0 as the PERIPH_BASE, then the + @ below address check will need to be #ifdef'd or equivalent + @ for the Aegis platform. + mrc p15, 4, r0, c15, c0 @ get SCU base address + teq r0, #0x0 @ '0' on actual UP A9 hardware + beq __fixup_smp_on_up @ So its an A9 UP + ldr r0, [r0, #4] @ read SCU Config +ARM_BE8(rev r0, r0) @ byteswap if big endian + and r0, r0, #0x3 @ number of CPUs + teq r0, #0x0 @ is 1? + movne pc, lr __fixup_smp_on_up: adr r0, 1f - ldmia r0, {r3, r6, r7} + ldmia r0, {r3 - r5} sub r3, r0, r3 - add r6, r6, r3 - add r7, r7, r3 -2: cmp r6, r7 - ldmia r6!, {r0, r4} - strlo r4, [r0, r3] - blo 2b - mov pc, lr + add r4, r4, r3 + add r5, r5, r3 + b __do_fixup_smp_on_up ENDPROC(__fixup_smp) + .align 1: .word . .word __smpalt_begin .word __smpalt_end @@ -423,7 +534,172 @@ smp_on_up: ALT_SMP(.long 1) ALT_UP(.long 0) .popsection +#endif + + .text +__do_fixup_smp_on_up: + cmp r4, r5 + movhs pc, lr + ldmia r4!, {r0, r6} + ARM( str r6, [r0, r3] ) + THUMB( add r0, r0, r3 ) +#ifdef __ARMEB__ + THUMB( mov r6, r6, ror #16 ) @ Convert word order for big-endian. +#endif + THUMB( strh r6, [r0], #2 ) @ For Thumb-2, store as two halfwords + THUMB( mov r6, r6, lsr #16 ) @ to be robust against misaligned r3. + THUMB( strh r6, [r0] ) + b __do_fixup_smp_on_up +ENDPROC(__do_fixup_smp_on_up) + +ENTRY(fixup_smp) + stmfd sp!, {r4 - r6, lr} + mov r4, r0 + add r5, r0, r1 + mov r3, #0 + bl __do_fixup_smp_on_up + ldmfd sp!, {r4 - r6, pc} +ENDPROC(fixup_smp) + +#ifdef __ARMEB__ +#define LOW_OFFSET 0x4 +#define HIGH_OFFSET 0x0 +#else +#define LOW_OFFSET 0x0 +#define HIGH_OFFSET 0x4 +#endif + +#ifdef CONFIG_ARM_PATCH_PHYS_VIRT +/* __fixup_pv_table - patch the stub instructions with the delta between + * PHYS_OFFSET and PAGE_OFFSET, which is assumed to be 16MiB aligned and + * can be expressed by an immediate shifter operand. The stub instruction + * has a form of '(add|sub) rd, rn, #imm'. + */ + __HEAD +__fixup_pv_table: + adr r0, 1f + ldmia r0, {r3-r7} + mvn ip, #0 + subs r3, r0, r3 @ PHYS_OFFSET - PAGE_OFFSET + add r4, r4, r3 @ adjust table start address + add r5, r5, r3 @ adjust table end address + add r6, r6, r3 @ adjust __pv_phys_pfn_offset address + add r7, r7, r3 @ adjust __pv_offset address + mov r0, r8, lsr #12 @ convert to PFN + str r0, [r6] @ save computed PHYS_OFFSET to __pv_phys_pfn_offset + strcc ip, [r7, #HIGH_OFFSET] @ save to __pv_offset high bits + mov r6, r3, lsr #24 @ constant for add/sub instructions + teq r3, r6, lsl #24 @ must be 16MiB aligned +THUMB( it ne @ cross section branch ) + bne __error + str r3, [r7, #LOW_OFFSET] @ save to __pv_offset low bits + b __fixup_a_pv_table +ENDPROC(__fixup_pv_table) + + .align +1: .long . + .long __pv_table_begin + .long __pv_table_end +2: .long __pv_phys_pfn_offset + .long __pv_offset + + .text +__fixup_a_pv_table: + adr r0, 3f + ldr r6, [r0] + add r6, r6, r3 + ldr r0, [r6, #HIGH_OFFSET] @ pv_offset high word + ldr r6, [r6, #LOW_OFFSET] @ pv_offset low word + mov r6, r6, lsr #24 + cmn r0, #1 +#ifdef CONFIG_THUMB2_KERNEL + moveq r0, #0x200000 @ set bit 21, mov to mvn instruction + lsls r6, #24 + beq 2f + clz r7, r6 + lsr r6, #24 + lsl r6, r7 + bic r6, #0x0080 + lsrs r7, #1 + orrcs r6, #0x0080 + orr r6, r6, r7, lsl #12 + orr r6, #0x4000 + b 2f +1: add r7, r3 + ldrh ip, [r7, #2] +ARM_BE8(rev16 ip, ip) + tst ip, #0x4000 + and ip, #0x8f00 + orrne ip, r6 @ mask in offset bits 31-24 + orreq ip, r0 @ mask in offset bits 7-0 +ARM_BE8(rev16 ip, ip) + strh ip, [r7, #2] + bne 2f + ldrh ip, [r7] +ARM_BE8(rev16 ip, ip) + bic ip, #0x20 + orr ip, ip, r0, lsr #16 +ARM_BE8(rev16 ip, ip) + strh ip, [r7] +2: cmp r4, r5 + ldrcc r7, [r4], #4 @ use branch for delay slot + bcc 1b + bx lr +#else +#ifdef CONFIG_CPU_ENDIAN_BE8 + moveq r0, #0x00004000 @ set bit 22, mov to mvn instruction +#else + moveq r0, #0x400000 @ set bit 22, mov to mvn instruction +#endif + b 2f +1: ldr ip, [r7, r3] +#ifdef CONFIG_CPU_ENDIAN_BE8 + @ in BE8, we load data in BE, but instructions still in LE + bic ip, ip, #0xff000000 + tst ip, #0x000f0000 @ check the rotation field + orrne ip, ip, r6, lsl #24 @ mask in offset bits 31-24 + biceq ip, ip, #0x00004000 @ clear bit 22 + orreq ip, ip, r0 @ mask in offset bits 7-0 +#else + bic ip, ip, #0x000000ff + tst ip, #0xf00 @ check the rotation field + orrne ip, ip, r6 @ mask in offset bits 31-24 + biceq ip, ip, #0x400000 @ clear bit 22 + orreq ip, ip, r0 @ mask in offset bits 7-0 +#endif + str ip, [r7, r3] +2: cmp r4, r5 + ldrcc r7, [r4], #4 @ use branch for delay slot + bcc 1b + mov pc, lr +#endif +ENDPROC(__fixup_a_pv_table) + + .align +3: .long __pv_offset + +ENTRY(fixup_pv_table) + stmfd sp!, {r4 - r7, lr} + mov r3, #0 @ no offset + mov r4, r0 @ r0 = table start + add r5, r0, r1 @ r1 = table size + bl __fixup_a_pv_table + ldmfd sp!, {r4 - r7, pc} +ENDPROC(fixup_pv_table) + + .data + .globl __pv_phys_pfn_offset + .type __pv_phys_pfn_offset, %object +__pv_phys_pfn_offset: + .word 0 + .size __pv_phys_pfn_offset, . -__pv_phys_pfn_offset + + .globl __pv_offset + .type __pv_offset, %object +__pv_offset: + .quad 0 + .size __pv_offset, . -__pv_offset #endif #include "head-common.S" diff --git a/arch/arm/kernel/hibernate.c b/arch/arm/kernel/hibernate.c new file mode 100644 index 00000000000..bb8b7964864 --- /dev/null +++ b/arch/arm/kernel/hibernate.c @@ -0,0 +1,107 @@ +/* + * Hibernation support specific for ARM + * + * Derived from work on ARM hibernation support by: + * + * Ubuntu project, hibernation support for mach-dove + * Copyright (C) 2010 Nokia Corporation (Hiroshi Doyu) + * Copyright (C) 2010 Texas Instruments, Inc. (Teerth Reddy et al.) + * https://lkml.org/lkml/2010/6/18/4 + * https://lists.linux-foundation.org/pipermail/linux-pm/2010-June/027422.html + * https://patchwork.kernel.org/patch/96442/ + * + * Copyright (C) 2006 Rafael J. Wysocki <rjw@sisk.pl> + * + * License terms: GNU General Public License (GPL) version 2 + */ + +#include <linux/mm.h> +#include <linux/suspend.h> +#include <asm/system_misc.h> +#include <asm/idmap.h> +#include <asm/suspend.h> +#include <asm/memory.h> + +extern const void __nosave_begin, __nosave_end; + +int pfn_is_nosave(unsigned long pfn) +{ + unsigned long nosave_begin_pfn = virt_to_pfn(&__nosave_begin); + unsigned long nosave_end_pfn = virt_to_pfn(&__nosave_end - 1); + + return (pfn >= nosave_begin_pfn) && (pfn <= nosave_end_pfn); +} + +void notrace save_processor_state(void) +{ + WARN_ON(num_online_cpus() != 1); + local_fiq_disable(); +} + +void notrace restore_processor_state(void) +{ + local_fiq_enable(); +} + +/* + * Snapshot kernel memory and reset the system. + * + * swsusp_save() is executed in the suspend finisher so that the CPU + * context pointer and memory are part of the saved image, which is + * required by the resume kernel image to restart execution from + * swsusp_arch_suspend(). + * + * soft_restart is not technically needed, but is used to get success + * returned from cpu_suspend. + * + * When soft reboot completes, the hibernation snapshot is written out. + */ +static int notrace arch_save_image(unsigned long unused) +{ + int ret; + + ret = swsusp_save(); + if (ret == 0) + soft_restart(virt_to_phys(cpu_resume)); + return ret; +} + +/* + * Save the current CPU state before suspend / poweroff. + */ +int notrace swsusp_arch_suspend(void) +{ + return cpu_suspend(0, arch_save_image); +} + +/* + * Restore page contents for physical pages that were in use during loading + * hibernation image. Switch to idmap_pgd so the physical page tables + * are overwritten with the same contents. + */ +static void notrace arch_restore_image(void *unused) +{ + struct pbe *pbe; + + cpu_switch_mm(idmap_pgd, &init_mm); + for (pbe = restore_pblist; pbe; pbe = pbe->next) + copy_page(pbe->orig_address, pbe->address); + + soft_restart(virt_to_phys(cpu_resume)); +} + +static u64 resume_stack[PAGE_SIZE/2/sizeof(u64)] __nosavedata; + +/* + * Resume from the hibernation image. + * Due to the kernel heap / data restore, stack contents change underneath + * and that would make function calls impossible; switch to a temporary + * stack within the nosave region to avoid that problem. + */ +int swsusp_arch_resume(void) +{ + extern void call_with_stack(void (*fn)(void *), void *arg, void *sp); + call_with_stack(arch_restore_image, 0, + resume_stack + ARRAY_SIZE(resume_stack)); + return 0; +} diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index 21e3a4ab3b8..4d963fb66e3 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -24,17 +24,19 @@ #define pr_fmt(fmt) "hw-breakpoint: " fmt #include <linux/errno.h> +#include <linux/hardirq.h> #include <linux/perf_event.h> #include <linux/hw_breakpoint.h> #include <linux/smp.h> +#include <linux/cpu_pm.h> #include <asm/cacheflush.h> #include <asm/cputype.h> #include <asm/current.h> #include <asm/hw_breakpoint.h> #include <asm/kdebug.h> -#include <asm/system.h> #include <asm/traps.h> +#include <asm/hardware/coresight.h> /* Breakpoint currently in use for each BRP. */ static DEFINE_PER_CPU(struct perf_event *, bp_on_reg[ARM_MAX_BRP]); @@ -49,98 +51,20 @@ static int core_num_wrps; /* Debug architecture version. */ static u8 debug_arch; +/* Does debug architecture support OS Save and Restore? */ +static bool has_ossr; + /* Maximum supported watchpoint length. */ static u8 max_watchpoint_len; -/* Determine number of BRP registers available. */ -static int get_num_brps(void) -{ - u32 didr; - ARM_DBG_READ(c0, 0, didr); - return ((didr >> 24) & 0xf) + 1; -} - -/* Determine number of WRP registers available. */ -static int get_num_wrps(void) -{ - /* - * FIXME: When a watchpoint fires, the only way to work out which - * watchpoint it was is by disassembling the faulting instruction - * and working out the address of the memory access. - * - * Furthermore, we can only do this if the watchpoint was precise - * since imprecise watchpoints prevent us from calculating register - * based addresses. - * - * For the time being, we only report 1 watchpoint register so we - * always know which watchpoint fired. In the future we can either - * add a disassembler and address generation emulator, or we can - * insert a check to see if the DFAR is set on watchpoint exception - * entry [the ARM ARM states that the DFAR is UNKNOWN, but - * experience shows that it is set on some implementations]. - */ - -#if 0 - u32 didr, wrps; - ARM_DBG_READ(c0, 0, didr); - return ((didr >> 28) & 0xf) + 1; -#endif - - return 1; -} - -int hw_breakpoint_slots(int type) -{ - /* - * We can be called early, so don't rely on - * our static variables being initialised. - */ - switch (type) { - case TYPE_INST: - return get_num_brps(); - case TYPE_DATA: - return get_num_wrps(); - default: - pr_warning("unknown slot type: %d\n", type); - return 0; - } -} - -/* Determine debug architecture. */ -static u8 get_debug_arch(void) -{ - u32 didr; - - /* Do we implement the extended CPUID interface? */ - if (((read_cpuid_id() >> 16) & 0xf) != 0xf) { - pr_warning("CPUID feature registers not supported. " - "Assuming v6 debug is present.\n"); - return ARM_DEBUG_ARCH_V6; - } - - ARM_DBG_READ(c0, 0, didr); - return (didr >> 16) & 0xf; -} - -/* Does this core support mismatch breakpoints? */ -static int core_has_mismatch_bps(void) -{ - return debug_arch >= ARM_DEBUG_ARCH_V7_ECP14 && core_num_brps > 1; -} - -u8 arch_get_debug_arch(void) -{ - return debug_arch; -} - -#define READ_WB_REG_CASE(OP2, M, VAL) \ - case ((OP2 << 4) + M): \ - ARM_DBG_READ(c ## M, OP2, VAL); \ +#define READ_WB_REG_CASE(OP2, M, VAL) \ + case ((OP2 << 4) + M): \ + ARM_DBG_READ(c0, c ## M, OP2, VAL); \ break -#define WRITE_WB_REG_CASE(OP2, M, VAL) \ - case ((OP2 << 4) + M): \ - ARM_DBG_WRITE(c ## M, OP2, VAL);\ +#define WRITE_WB_REG_CASE(OP2, M, VAL) \ + case ((OP2 << 4) + M): \ + ARM_DBG_WRITE(c0, c ## M, OP2, VAL); \ break #define GEN_READ_WB_REG_CASES(OP2, VAL) \ @@ -210,49 +134,167 @@ static void write_wb_reg(int n, u32 val) isb(); } +/* Determine debug architecture. */ +static u8 get_debug_arch(void) +{ + u32 didr; + + /* Do we implement the extended CPUID interface? */ + if (((read_cpuid_id() >> 16) & 0xf) != 0xf) { + pr_warn_once("CPUID feature registers not supported. " + "Assuming v6 debug is present.\n"); + return ARM_DEBUG_ARCH_V6; + } + + ARM_DBG_READ(c0, c0, 0, didr); + return (didr >> 16) & 0xf; +} + +u8 arch_get_debug_arch(void) +{ + return debug_arch; +} + +static int debug_arch_supported(void) +{ + u8 arch = get_debug_arch(); + + /* We don't support the memory-mapped interface. */ + return (arch >= ARM_DEBUG_ARCH_V6 && arch <= ARM_DEBUG_ARCH_V7_ECP14) || + arch >= ARM_DEBUG_ARCH_V7_1; +} + +/* Can we determine the watchpoint access type from the fsr? */ +static int debug_exception_updates_fsr(void) +{ + return get_debug_arch() >= ARM_DEBUG_ARCH_V8; +} + +/* Determine number of WRP registers available. */ +static int get_num_wrp_resources(void) +{ + u32 didr; + ARM_DBG_READ(c0, c0, 0, didr); + return ((didr >> 28) & 0xf) + 1; +} + +/* Determine number of BRP registers available. */ +static int get_num_brp_resources(void) +{ + u32 didr; + ARM_DBG_READ(c0, c0, 0, didr); + return ((didr >> 24) & 0xf) + 1; +} + +/* Does this core support mismatch breakpoints? */ +static int core_has_mismatch_brps(void) +{ + return (get_debug_arch() >= ARM_DEBUG_ARCH_V7_ECP14 && + get_num_brp_resources() > 1); +} + +/* Determine number of usable WRPs available. */ +static int get_num_wrps(void) +{ + /* + * On debug architectures prior to 7.1, when a watchpoint fires, the + * only way to work out which watchpoint it was is by disassembling + * the faulting instruction and working out the address of the memory + * access. + * + * Furthermore, we can only do this if the watchpoint was precise + * since imprecise watchpoints prevent us from calculating register + * based addresses. + * + * Providing we have more than 1 breakpoint register, we only report + * a single watchpoint register for the time being. This way, we always + * know which watchpoint fired. In the future we can either add a + * disassembler and address generation emulator, or we can insert a + * check to see if the DFAR is set on watchpoint exception entry + * [the ARM ARM states that the DFAR is UNKNOWN, but experience shows + * that it is set on some implementations]. + */ + if (get_debug_arch() < ARM_DEBUG_ARCH_V7_1) + return 1; + + return get_num_wrp_resources(); +} + +/* Determine number of usable BRPs available. */ +static int get_num_brps(void) +{ + int brps = get_num_brp_resources(); + return core_has_mismatch_brps() ? brps - 1 : brps; +} + /* * In order to access the breakpoint/watchpoint control registers, * we must be running in debug monitor mode. Unfortunately, we can * be put into halting debug mode at any time by an external debugger * but there is nothing we can do to prevent that. */ -static int enable_monitor_mode(void) +static int monitor_mode_enabled(void) { u32 dscr; - int ret = 0; + ARM_DBG_READ(c0, c1, 0, dscr); + return !!(dscr & ARM_DSCR_MDBGEN); +} - ARM_DBG_READ(c1, 0, dscr); +static int enable_monitor_mode(void) +{ + u32 dscr; + ARM_DBG_READ(c0, c1, 0, dscr); - /* Ensure that halting mode is disabled. */ - if (WARN_ONCE(dscr & ARM_DSCR_HDBGEN, "halting debug mode enabled." - "Unable to access hardware resources.")) { - ret = -EPERM; + /* If monitor mode is already enabled, just return. */ + if (dscr & ARM_DSCR_MDBGEN) goto out; - } /* Write to the corresponding DSCR. */ - switch (debug_arch) { + switch (get_debug_arch()) { case ARM_DEBUG_ARCH_V6: case ARM_DEBUG_ARCH_V6_1: - ARM_DBG_WRITE(c1, 0, (dscr | ARM_DSCR_MDBGEN)); + ARM_DBG_WRITE(c0, c1, 0, (dscr | ARM_DSCR_MDBGEN)); break; case ARM_DEBUG_ARCH_V7_ECP14: - ARM_DBG_WRITE(c2, 2, (dscr | ARM_DSCR_MDBGEN)); + case ARM_DEBUG_ARCH_V7_1: + case ARM_DEBUG_ARCH_V8: + ARM_DBG_WRITE(c0, c2, 2, (dscr | ARM_DSCR_MDBGEN)); + isb(); break; default: - ret = -ENODEV; - goto out; + return -ENODEV; } /* Check that the write made it through. */ - ARM_DBG_READ(c1, 0, dscr); - if (WARN_ONCE(!(dscr & ARM_DSCR_MDBGEN), - "failed to enable monitor mode.")) { - ret = -EPERM; + ARM_DBG_READ(c0, c1, 0, dscr); + if (!(dscr & ARM_DSCR_MDBGEN)) { + pr_warn_once("Failed to enable monitor mode on CPU %d.\n", + smp_processor_id()); + return -EPERM; } out: - return ret; + return 0; +} + +int hw_breakpoint_slots(int type) +{ + if (!debug_arch_supported()) + return 0; + + /* + * We can be called early, so don't rely on + * our static variables being initialised. + */ + switch (type) { + case TYPE_INST: + return get_num_brps(); + case TYPE_DATA: + return get_num_wrps(); + default: + pr_warning("unknown slot type: %d\n", type); + return 0; + } } /* @@ -268,9 +310,6 @@ static u8 get_max_wp_len(void) if (debug_arch < ARM_DEBUG_ARCH_V7_ECP14) goto out; - if (enable_monitor_mode()) - goto out; - memset(&ctrl, 0, sizeof(ctrl)); ctrl.len = ARM_BREAKPOINT_LEN_8; ctrl_reg = encode_ctrl_reg(ctrl); @@ -290,54 +329,29 @@ u8 arch_get_max_wp_len(void) } /* - * Handler for reactivating a suspended watchpoint when the single - * step `mismatch' breakpoint is triggered. - */ -static void wp_single_step_handler(struct perf_event *bp, int unused, - struct perf_sample_data *data, - struct pt_regs *regs) -{ - perf_event_enable(counter_arch_bp(bp)->suspended_wp); - unregister_hw_breakpoint(bp); -} - -static int bp_is_single_step(struct perf_event *bp) -{ - return bp->overflow_handler == wp_single_step_handler; -} - -/* * Install a perf counter breakpoint. */ int arch_install_hw_breakpoint(struct perf_event *bp) { struct arch_hw_breakpoint *info = counter_arch_bp(bp); struct perf_event **slot, **slots; - int i, max_slots, ctrl_base, val_base, ret = 0; + int i, max_slots, ctrl_base, val_base; + u32 addr, ctrl; - /* Ensure that we are in monitor mode and halting mode is disabled. */ - ret = enable_monitor_mode(); - if (ret) - goto out; + addr = info->address; + ctrl = encode_ctrl_reg(info->ctrl) | 0x1; if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) { /* Breakpoint */ ctrl_base = ARM_BASE_BCR; val_base = ARM_BASE_BVR; - slots = __get_cpu_var(bp_on_reg); - max_slots = core_num_brps - 1; - - if (bp_is_single_step(bp)) { - info->ctrl.mismatch = 1; - i = max_slots; - slots[i] = bp; - goto setup; - } + slots = this_cpu_ptr(bp_on_reg); + max_slots = core_num_brps; } else { /* Watchpoint */ ctrl_base = ARM_BASE_WCR; val_base = ARM_BASE_WVR; - slots = __get_cpu_var(wp_on_reg); + slots = this_cpu_ptr(wp_on_reg); max_slots = core_num_wrps; } @@ -350,20 +364,28 @@ int arch_install_hw_breakpoint(struct perf_event *bp) } } - if (WARN_ONCE(i == max_slots, "Can't find any breakpoint slot")) { - ret = -EBUSY; - goto out; + if (i == max_slots) { + pr_warning("Can't find any breakpoint slot\n"); + return -EBUSY; + } + + /* Override the breakpoint data with the step data. */ + if (info->step_ctrl.enabled) { + addr = info->trigger & ~0x3; + ctrl = encode_ctrl_reg(info->step_ctrl); + if (info->ctrl.type != ARM_BREAKPOINT_EXECUTE) { + i = 0; + ctrl_base = ARM_BASE_BCR + core_num_brps; + val_base = ARM_BASE_BVR + core_num_brps; + } } -setup: /* Setup the address register. */ - write_wb_reg(val_base + i, info->address); + write_wb_reg(val_base + i, addr); /* Setup the control register. */ - write_wb_reg(ctrl_base + i, encode_ctrl_reg(info->ctrl) | 0x1); - -out: - return ret; + write_wb_reg(ctrl_base + i, ctrl); + return 0; } void arch_uninstall_hw_breakpoint(struct perf_event *bp) @@ -375,18 +397,12 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp) if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) { /* Breakpoint */ base = ARM_BASE_BCR; - slots = __get_cpu_var(bp_on_reg); - max_slots = core_num_brps - 1; - - if (bp_is_single_step(bp)) { - i = max_slots; - slots[i] = NULL; - goto reset; - } + slots = this_cpu_ptr(bp_on_reg); + max_slots = core_num_brps; } else { /* Watchpoint */ base = ARM_BASE_WCR; - slots = __get_cpu_var(wp_on_reg); + slots = this_cpu_ptr(wp_on_reg); max_slots = core_num_wrps; } @@ -400,10 +416,18 @@ void arch_uninstall_hw_breakpoint(struct perf_event *bp) } } - if (WARN_ONCE(i == max_slots, "Can't find any breakpoint slot")) + if (i == max_slots) { + pr_warning("Can't find any breakpoint slot\n"); return; + } + + /* Ensure that we disable the mismatch breakpoint. */ + if (info->ctrl.type != ARM_BREAKPOINT_EXECUTE && + info->step_ctrl.enabled) { + i = 0; + base = ARM_BASE_BCR + core_num_brps; + } -reset: /* Reset the control register. */ write_wb_reg(base + i, 0); } @@ -537,12 +561,23 @@ static int arch_build_bp_info(struct perf_event *bp) return -EINVAL; } + /* + * Breakpoints must be of length 2 (thumb) or 4 (ARM) bytes. + * Watchpoints can be of length 1, 2, 4 or 8 bytes if supported + * by the hardware and must be aligned to the appropriate number of + * bytes. + */ + if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE && + info->ctrl.len != ARM_BREAKPOINT_LEN_2 && + info->ctrl.len != ARM_BREAKPOINT_LEN_4) + return -EINVAL; + /* Address */ info->address = bp->attr.bp_addr; /* Privilege */ info->ctrl.privilege = ARM_BREAKPOINT_USER; - if (arch_check_bp_in_kernelspace(bp) && !bp_is_single_step(bp)) + if (arch_check_bp_in_kernelspace(bp)) info->ctrl.privilege |= ARM_BREAKPOINT_PRIV; /* Enabled? */ @@ -561,7 +596,11 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) { struct arch_hw_breakpoint *info = counter_arch_bp(bp); int ret = 0; - u32 bytelen, max_len, offset, alignment_mask = 0x3; + u32 offset, alignment_mask = 0x3; + + /* Ensure that we are in monitor debug mode. */ + if (!monitor_mode_enabled()) + return -ENODEV; /* Build the arch_hw_breakpoint. */ ret = arch_build_bp_info(bp); @@ -571,111 +610,187 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp) /* Check address alignment. */ if (info->ctrl.len == ARM_BREAKPOINT_LEN_8) alignment_mask = 0x7; - if (info->address & alignment_mask) { + offset = info->address & alignment_mask; + switch (offset) { + case 0: + /* Aligned */ + break; + case 1: + case 2: + /* Allow halfword watchpoints and breakpoints. */ + if (info->ctrl.len == ARM_BREAKPOINT_LEN_2) + break; + case 3: + /* Allow single byte watchpoint. */ + if (info->ctrl.len == ARM_BREAKPOINT_LEN_1) + break; + default: + ret = -EINVAL; + goto out; + } + + info->address &= ~alignment_mask; + info->ctrl.len <<= offset; + + if (!bp->overflow_handler) { /* - * Try to fix the alignment. This may result in a length - * that is too large, so we must check for that. + * Mismatch breakpoints are required for single-stepping + * breakpoints. */ - bytelen = get_hbp_len(info->ctrl.len); - max_len = info->ctrl.type == ARM_BREAKPOINT_EXECUTE ? 4 : - max_watchpoint_len; - - if (max_len >= 8) - offset = info->address & 0x7; - else - offset = info->address & 0x3; - - if (bytelen > (1 << ((max_len - (offset + 1)) >> 1))) { - ret = -EFBIG; - goto out; - } + if (!core_has_mismatch_brps()) + return -EINVAL; - info->ctrl.len <<= offset; - info->address &= ~offset; + /* We don't allow mismatch breakpoints in kernel space. */ + if (arch_check_bp_in_kernelspace(bp)) + return -EPERM; - pr_debug("breakpoint alignment fixup: length = 0x%x, " - "address = 0x%x\n", info->ctrl.len, info->address); - } + /* + * Per-cpu breakpoints are not supported by our stepping + * mechanism. + */ + if (!bp->hw.bp_target) + return -EINVAL; - /* - * Currently we rely on an overflow handler to take - * care of single-stepping the breakpoint when it fires. - * In the case of userspace breakpoints on a core with V7 debug, - * we can use the mismatch feature as a poor-man's hardware single-step. - */ - if (WARN_ONCE(!bp->overflow_handler && - (arch_check_bp_in_kernelspace(bp) || !core_has_mismatch_bps()), - "overflow handler required but none found")) { - ret = -EINVAL; - goto out; + /* + * We only support specific access types if the fsr + * reports them. + */ + if (!debug_exception_updates_fsr() && + (info->ctrl.type == ARM_BREAKPOINT_LOAD || + info->ctrl.type == ARM_BREAKPOINT_STORE)) + return -EINVAL; } + out: return ret; } -static void update_mismatch_flag(int idx, int flag) +/* + * Enable/disable single-stepping over the breakpoint bp at address addr. + */ +static void enable_single_step(struct perf_event *bp, u32 addr) { - struct perf_event *bp = __get_cpu_var(bp_on_reg[idx]); - struct arch_hw_breakpoint *info; - - if (bp == NULL) - return; + struct arch_hw_breakpoint *info = counter_arch_bp(bp); - info = counter_arch_bp(bp); + arch_uninstall_hw_breakpoint(bp); + info->step_ctrl.mismatch = 1; + info->step_ctrl.len = ARM_BREAKPOINT_LEN_4; + info->step_ctrl.type = ARM_BREAKPOINT_EXECUTE; + info->step_ctrl.privilege = info->ctrl.privilege; + info->step_ctrl.enabled = 1; + info->trigger = addr; + arch_install_hw_breakpoint(bp); +} - /* Update the mismatch field to enter/exit `single-step' mode */ - if (!bp->overflow_handler && info->ctrl.mismatch != flag) { - info->ctrl.mismatch = flag; - write_wb_reg(ARM_BASE_BCR + idx, encode_ctrl_reg(info->ctrl) | 0x1); - } +static void disable_single_step(struct perf_event *bp) +{ + arch_uninstall_hw_breakpoint(bp); + counter_arch_bp(bp)->step_ctrl.enabled = 0; + arch_install_hw_breakpoint(bp); } -static void watchpoint_handler(unsigned long unknown, struct pt_regs *regs) +static void watchpoint_handler(unsigned long addr, unsigned int fsr, + struct pt_regs *regs) { - int i; - struct perf_event *bp, **slots = __get_cpu_var(wp_on_reg); + int i, access; + u32 val, ctrl_reg, alignment_mask; + struct perf_event *wp, **slots; struct arch_hw_breakpoint *info; - struct perf_event_attr attr; - - /* Without a disassembler, we can only handle 1 watchpoint. */ - BUG_ON(core_num_wrps > 1); + struct arch_hw_breakpoint_ctrl ctrl; - hw_breakpoint_init(&attr); - attr.bp_addr = regs->ARM_pc & ~0x3; - attr.bp_len = HW_BREAKPOINT_LEN_4; - attr.bp_type = HW_BREAKPOINT_X; + slots = this_cpu_ptr(wp_on_reg); for (i = 0; i < core_num_wrps; ++i) { rcu_read_lock(); - if (slots[i] == NULL) { - rcu_read_unlock(); - continue; - } + wp = slots[i]; + if (wp == NULL) + goto unlock; + + info = counter_arch_bp(wp); /* - * The DFAR is an unknown value. Since we only allow a - * single watchpoint, we can set the trigger to the lowest - * possible faulting address. + * The DFAR is an unknown value on debug architectures prior + * to 7.1. Since we only allow a single watchpoint on these + * older CPUs, we can set the trigger to the lowest possible + * faulting address. */ - info = counter_arch_bp(slots[i]); - info->trigger = slots[i]->attr.bp_addr; + if (debug_arch < ARM_DEBUG_ARCH_V7_1) { + BUG_ON(i > 0); + info->trigger = wp->attr.bp_addr; + } else { + if (info->ctrl.len == ARM_BREAKPOINT_LEN_8) + alignment_mask = 0x7; + else + alignment_mask = 0x3; + + /* Check if the watchpoint value matches. */ + val = read_wb_reg(ARM_BASE_WVR + i); + if (val != (addr & ~alignment_mask)) + goto unlock; + + /* Possible match, check the byte address select. */ + ctrl_reg = read_wb_reg(ARM_BASE_WCR + i); + decode_ctrl_reg(ctrl_reg, &ctrl); + if (!((1 << (addr & alignment_mask)) & ctrl.len)) + goto unlock; + + /* Check that the access type matches. */ + if (debug_exception_updates_fsr()) { + access = (fsr & ARM_FSR_ACCESS_MASK) ? + HW_BREAKPOINT_W : HW_BREAKPOINT_R; + if (!(access & hw_breakpoint_type(wp))) + goto unlock; + } + + /* We have a winner. */ + info->trigger = addr; + } + pr_debug("watchpoint fired: address = 0x%x\n", info->trigger); - perf_bp_event(slots[i], regs); + perf_bp_event(wp, regs); /* * If no overflow handler is present, insert a temporary * mismatch breakpoint so we can single-step over the * watchpoint trigger. */ - if (!slots[i]->overflow_handler) { - bp = register_user_hw_breakpoint(&attr, - wp_single_step_handler, - current); - counter_arch_bp(bp)->suspended_wp = slots[i]; - perf_event_disable(slots[i]); - } + if (!wp->overflow_handler) + enable_single_step(wp, instruction_pointer(regs)); +unlock: + rcu_read_unlock(); + } +} + +static void watchpoint_single_step_handler(unsigned long pc) +{ + int i; + struct perf_event *wp, **slots; + struct arch_hw_breakpoint *info; + + slots = this_cpu_ptr(wp_on_reg); + + for (i = 0; i < core_num_wrps; ++i) { + rcu_read_lock(); + + wp = slots[i]; + + if (wp == NULL) + goto unlock; + + info = counter_arch_bp(wp); + if (!info->step_ctrl.enabled) + goto unlock; + + /* + * Restore the original watchpoint if we've completed the + * single-step. + */ + if (info->trigger != pc) + disable_single_step(wp); + +unlock: rcu_read_unlock(); } } @@ -683,64 +798,73 @@ static void watchpoint_handler(unsigned long unknown, struct pt_regs *regs) static void breakpoint_handler(unsigned long unknown, struct pt_regs *regs) { int i; - int mismatch; u32 ctrl_reg, val, addr; - struct perf_event *bp, **slots = __get_cpu_var(bp_on_reg); + struct perf_event *bp, **slots; struct arch_hw_breakpoint *info; struct arch_hw_breakpoint_ctrl ctrl; + slots = this_cpu_ptr(bp_on_reg); + /* The exception entry code places the amended lr in the PC. */ addr = regs->ARM_pc; + /* Check the currently installed breakpoints first. */ for (i = 0; i < core_num_brps; ++i) { rcu_read_lock(); bp = slots[i]; - if (bp == NULL) { - rcu_read_unlock(); - continue; - } + if (bp == NULL) + goto unlock; - mismatch = 0; + info = counter_arch_bp(bp); /* Check if the breakpoint value matches. */ val = read_wb_reg(ARM_BASE_BVR + i); if (val != (addr & ~0x3)) - goto unlock; + goto mismatch; /* Possible match, check the byte address select to confirm. */ ctrl_reg = read_wb_reg(ARM_BASE_BCR + i); decode_ctrl_reg(ctrl_reg, &ctrl); if ((1 << (addr & 0x3)) & ctrl.len) { - mismatch = 1; - info = counter_arch_bp(bp); info->trigger = addr; - } - -unlock: - if ((mismatch && !info->ctrl.mismatch) || bp_is_single_step(bp)) { pr_debug("breakpoint fired: address = 0x%x\n", addr); perf_bp_event(bp, regs); + if (!bp->overflow_handler) + enable_single_step(bp, addr); + goto unlock; } - update_mismatch_flag(i, mismatch); +mismatch: + /* If we're stepping a breakpoint, it can now be restored. */ + if (info->step_ctrl.enabled) + disable_single_step(bp); +unlock: rcu_read_unlock(); } + + /* Handle any pending watchpoint single-step breakpoints. */ + watchpoint_single_step_handler(addr); } /* * Called from either the Data Abort Handler [watchpoint] or the - * Prefetch Abort Handler [breakpoint]. + * Prefetch Abort Handler [breakpoint] with interrupts disabled. */ static int hw_breakpoint_pending(unsigned long addr, unsigned int fsr, struct pt_regs *regs) { - int ret = 1; /* Unhandled fault. */ + int ret = 0; u32 dscr; + preempt_disable(); + + if (interrupts_enabled(regs)) + local_irq_enable(); + /* We only handle watchpoints and hardware breakpoints. */ - ARM_DBG_READ(c1, 0, dscr); + ARM_DBG_READ(c0, c1, 0, dscr); /* Perform perf callbacks. */ switch (ARM_DSCR_MOE(dscr)) { @@ -750,28 +874,130 @@ static int hw_breakpoint_pending(unsigned long addr, unsigned int fsr, case ARM_ENTRY_ASYNC_WATCHPOINT: WARN(1, "Asynchronous watchpoint exception taken. Debugging results may be unreliable\n"); case ARM_ENTRY_SYNC_WATCHPOINT: - watchpoint_handler(addr, regs); + watchpoint_handler(addr, fsr, regs); break; default: - goto out; + ret = 1; /* Unhandled fault. */ } - ret = 0; -out: + preempt_enable(); + return ret; } /* * One-time initialisation. */ -static void __init reset_ctrl_regs(void *unused) +static cpumask_t debug_err_mask; + +static int debug_reg_trap(struct pt_regs *regs, unsigned int instr) { - int i; + int cpu = smp_processor_id(); - if (enable_monitor_mode()) + pr_warning("Debug register access (0x%x) caused undefined instruction on CPU %d\n", + instr, cpu); + + /* Set the error flag for this CPU and skip the faulting instruction. */ + cpumask_set_cpu(cpu, &debug_err_mask); + instruction_pointer(regs) += 4; + return 0; +} + +static struct undef_hook debug_reg_hook = { + .instr_mask = 0x0fe80f10, + .instr_val = 0x0e000e10, + .fn = debug_reg_trap, +}; + +/* Does this core support OS Save and Restore? */ +static bool core_has_os_save_restore(void) +{ + u32 oslsr; + + switch (get_debug_arch()) { + case ARM_DEBUG_ARCH_V7_1: + return true; + case ARM_DEBUG_ARCH_V7_ECP14: + ARM_DBG_READ(c1, c1, 4, oslsr); + if (oslsr & ARM_OSLSR_OSLM0) + return true; + default: + return false; + } +} + +static void reset_ctrl_regs(void *unused) +{ + int i, raw_num_brps, err = 0, cpu = smp_processor_id(); + u32 val; + + /* + * v7 debug contains save and restore registers so that debug state + * can be maintained across low-power modes without leaving the debug + * logic powered up. It is IMPLEMENTATION DEFINED whether we can access + * the debug registers out of reset, so we must unlock the OS Lock + * Access Register to avoid taking undefined instruction exceptions + * later on. + */ + switch (debug_arch) { + case ARM_DEBUG_ARCH_V6: + case ARM_DEBUG_ARCH_V6_1: + /* ARMv6 cores clear the registers out of reset. */ + goto out_mdbgen; + case ARM_DEBUG_ARCH_V7_ECP14: + /* + * Ensure sticky power-down is clear (i.e. debug logic is + * powered up). + */ + ARM_DBG_READ(c1, c5, 4, val); + if ((val & 0x1) == 0) + err = -EPERM; + + if (!has_ossr) + goto clear_vcr; + break; + case ARM_DEBUG_ARCH_V7_1: + /* + * Ensure the OS double lock is clear. + */ + ARM_DBG_READ(c1, c3, 4, val); + if ((val & 0x1) == 1) + err = -EPERM; + break; + } + + if (err) { + pr_warn_once("CPU %d debug is powered down!\n", cpu); + cpumask_or(&debug_err_mask, &debug_err_mask, cpumask_of(cpu)); return; + } - for (i = 0; i < core_num_brps; ++i) { + /* + * Unconditionally clear the OS lock by writing a value + * other than CS_LAR_KEY to the access register. + */ + ARM_DBG_WRITE(c1, c0, 4, ~CS_LAR_KEY); + isb(); + + /* + * Clear any configured vector-catch events before + * enabling monitor mode. + */ +clear_vcr: + ARM_DBG_WRITE(c0, c7, 0, 0); + isb(); + + if (cpumask_intersects(&debug_err_mask, cpumask_of(cpu))) { + pr_warn_once("CPU %d failed to disable vector catch\n", cpu); + return; + } + + /* + * The control/value register pairs are UNKNOWN out of reset so + * clear them to avoid spurious debug events. + */ + raw_num_brps = get_num_brp_resources(); + for (i = 0; i < raw_num_brps; ++i) { write_wb_reg(ARM_BASE_BCR + i, 0UL); write_wb_reg(ARM_BASE_BVR + i, 0UL); } @@ -780,57 +1006,117 @@ static void __init reset_ctrl_regs(void *unused) write_wb_reg(ARM_BASE_WCR + i, 0UL); write_wb_reg(ARM_BASE_WVR + i, 0UL); } + + if (cpumask_intersects(&debug_err_mask, cpumask_of(cpu))) { + pr_warn_once("CPU %d failed to clear debug register pairs\n", cpu); + return; + } + + /* + * Have a crack at enabling monitor mode. We don't actually need + * it yet, but reporting an error early is useful if it fails. + */ +out_mdbgen: + if (enable_monitor_mode()) + cpumask_or(&debug_err_mask, &debug_err_mask, cpumask_of(cpu)); } -static int __init arch_hw_breakpoint_init(void) +static int dbg_reset_notify(struct notifier_block *self, + unsigned long action, void *cpu) { - int ret = 0; - u32 dscr; + if ((action & ~CPU_TASKS_FROZEN) == CPU_ONLINE) + smp_call_function_single((int)cpu, reset_ctrl_regs, NULL, 1); + + return NOTIFY_OK; +} + +static struct notifier_block dbg_reset_nb = { + .notifier_call = dbg_reset_notify, +}; + +#ifdef CONFIG_CPU_PM +static int dbg_cpu_pm_notify(struct notifier_block *self, unsigned long action, + void *v) +{ + if (action == CPU_PM_EXIT) + reset_ctrl_regs(NULL); + + return NOTIFY_OK; +} + +static struct notifier_block dbg_cpu_pm_nb = { + .notifier_call = dbg_cpu_pm_notify, +}; + +static void __init pm_init(void) +{ + cpu_pm_register_notifier(&dbg_cpu_pm_nb); +} +#else +static inline void pm_init(void) +{ +} +#endif +static int __init arch_hw_breakpoint_init(void) +{ debug_arch = get_debug_arch(); - if (debug_arch > ARM_DEBUG_ARCH_V7_ECP14) { + if (!debug_arch_supported()) { pr_info("debug architecture 0x%x unsupported.\n", debug_arch); - ret = -ENODEV; - goto out; + return 0; } + has_ossr = core_has_os_save_restore(); + /* Determine how many BRPs/WRPs are available. */ core_num_brps = get_num_brps(); core_num_wrps = get_num_wrps(); - pr_info("found %d breakpoint and %d watchpoint registers.\n", - core_num_brps, core_num_wrps); - - if (core_has_mismatch_bps()) - pr_info("1 breakpoint reserved for watchpoint single-step.\n"); + cpu_notifier_register_begin(); - ARM_DBG_READ(c1, 0, dscr); - if (dscr & ARM_DSCR_HDBGEN) { - pr_warning("halting debug mode enabled. Assuming maximum " - "watchpoint size of 4 bytes."); - } else { - /* Work out the maximum supported watchpoint length. */ - max_watchpoint_len = get_max_wp_len(); - pr_info("maximum watchpoint size is %u bytes.\n", - max_watchpoint_len); + /* + * We need to tread carefully here because DBGSWENABLE may be + * driven low on this core and there isn't an architected way to + * determine that. + */ + register_undef_hook(&debug_reg_hook); - /* - * Reset the breakpoint resources. We assume that a halting - * debugger will leave the world in a nice state for us. - */ - smp_call_function(reset_ctrl_regs, NULL, 1); - reset_ctrl_regs(NULL); + /* + * Reset the breakpoint resources. We assume that a halting + * debugger will leave the world in a nice state for us. + */ + on_each_cpu(reset_ctrl_regs, NULL, 1); + unregister_undef_hook(&debug_reg_hook); + if (!cpumask_empty(&debug_err_mask)) { + core_num_brps = 0; + core_num_wrps = 0; + cpu_notifier_register_done(); + return 0; } + pr_info("found %d " "%s" "breakpoint and %d watchpoint registers.\n", + core_num_brps, core_has_mismatch_brps() ? "(+1 reserved) " : + "", core_num_wrps); + + /* Work out the maximum supported watchpoint length. */ + max_watchpoint_len = get_max_wp_len(); + pr_info("maximum watchpoint size is %u bytes.\n", + max_watchpoint_len); + /* Register debug fault handler. */ - hook_fault_code(2, hw_breakpoint_pending, SIGTRAP, TRAP_HWBKPT, - "watchpoint debug exception"); - hook_ifault_code(2, hw_breakpoint_pending, SIGTRAP, TRAP_HWBKPT, - "breakpoint debug exception"); + hook_fault_code(FAULT_CODE_DEBUG, hw_breakpoint_pending, SIGTRAP, + TRAP_HWBKPT, "watchpoint debug exception"); + hook_ifault_code(FAULT_CODE_DEBUG, hw_breakpoint_pending, SIGTRAP, + TRAP_HWBKPT, "breakpoint debug exception"); -out: - return ret; + /* Register hotplug and PM notifiers. */ + __register_cpu_notifier(&dbg_reset_nb); + + cpu_notifier_register_done(); + + pm_init(); + return 0; } arch_initcall(arch_hw_breakpoint_init); diff --git a/arch/arm/kernel/hyp-stub.S b/arch/arm/kernel/hyp-stub.S new file mode 100644 index 00000000000..797b1a6a490 --- /dev/null +++ b/arch/arm/kernel/hyp-stub.S @@ -0,0 +1,224 @@ +/* + * Copyright (c) 2012 Linaro Limited. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <linux/init.h> +#include <linux/linkage.h> +#include <asm/assembler.h> +#include <asm/virt.h> + +#ifndef ZIMAGE +/* + * For the kernel proper, we need to find out the CPU boot mode long after + * boot, so we need to store it in a writable variable. + * + * This is not in .bss, because we set it sufficiently early that the boot-time + * zeroing of .bss would clobber it. + */ +.data +ENTRY(__boot_cpu_mode) + .long 0 +.text + + /* + * Save the primary CPU boot mode. Requires 3 scratch registers. + */ + .macro store_primary_cpu_mode reg1, reg2, reg3 + mrs \reg1, cpsr + and \reg1, \reg1, #MODE_MASK + adr \reg2, .L__boot_cpu_mode_offset + ldr \reg3, [\reg2] + str \reg1, [\reg2, \reg3] + .endm + + /* + * Compare the current mode with the one saved on the primary CPU. + * If they don't match, record that fact. The Z bit indicates + * if there's a match or not. + * Requires 3 additionnal scratch registers. + */ + .macro compare_cpu_mode_with_primary mode, reg1, reg2, reg3 + adr \reg2, .L__boot_cpu_mode_offset + ldr \reg3, [\reg2] + ldr \reg1, [\reg2, \reg3] + cmp \mode, \reg1 @ matches primary CPU boot mode? + orrne \reg1, \reg1, #BOOT_CPU_MODE_MISMATCH + strne \reg1, [\reg2, \reg3] @ record what happened and give up + .endm + +#else /* ZIMAGE */ + + .macro store_primary_cpu_mode reg1:req, reg2:req, reg3:req + .endm + +/* + * The zImage loader only runs on one CPU, so we don't bother with mult-CPU + * consistency checking: + */ + .macro compare_cpu_mode_with_primary mode, reg1, reg2, reg3 + cmp \mode, \mode + .endm + +#endif /* ZIMAGE */ + +/* + * Hypervisor stub installation functions. + * + * These must be called with the MMU and D-cache off. + * They are not ABI compliant and are only intended to be called from the kernel + * entry points in head.S. + */ +@ Call this from the primary CPU +ENTRY(__hyp_stub_install) + store_primary_cpu_mode r4, r5, r6 +ENDPROC(__hyp_stub_install) + + @ fall through... + +@ Secondary CPUs should call here +ENTRY(__hyp_stub_install_secondary) + mrs r4, cpsr + and r4, r4, #MODE_MASK + + /* + * If the secondary has booted with a different mode, give up + * immediately. + */ + compare_cpu_mode_with_primary r4, r5, r6, r7 + movne pc, lr + + /* + * Once we have given up on one CPU, we do not try to install the + * stub hypervisor on the remaining ones: because the saved boot mode + * is modified, it can't compare equal to the CPSR mode field any + * more. + * + * Otherwise... + */ + + cmp r4, #HYP_MODE + movne pc, lr @ give up if the CPU is not in HYP mode + +/* + * Configure HSCTLR to set correct exception endianness/instruction set + * state etc. + * Turn off all traps + * Eventually, CPU-specific code might be needed -- assume not for now + * + * This code relies on the "eret" instruction to synchronize the + * various coprocessor accesses. This is done when we switch to SVC + * (see safe_svcmode_maskall). + */ + @ Now install the hypervisor stub: + adr r7, __hyp_stub_vectors + mcr p15, 4, r7, c12, c0, 0 @ set hypervisor vector base (HVBAR) + + @ Disable all traps, so we don't get any nasty surprise + mov r7, #0 + mcr p15, 4, r7, c1, c1, 0 @ HCR + mcr p15, 4, r7, c1, c1, 2 @ HCPTR + mcr p15, 4, r7, c1, c1, 3 @ HSTR + +THUMB( orr r7, #(1 << 30) ) @ HSCTLR.TE +#ifdef CONFIG_CPU_BIG_ENDIAN + orr r7, #(1 << 9) @ HSCTLR.EE +#endif + mcr p15, 4, r7, c1, c0, 0 @ HSCTLR + + mrc p15, 4, r7, c1, c1, 1 @ HDCR + and r7, #0x1f @ Preserve HPMN + mcr p15, 4, r7, c1, c1, 1 @ HDCR + +#if !defined(ZIMAGE) && defined(CONFIG_ARM_ARCH_TIMER) + @ make CNTP_* and CNTPCT accessible from PL1 + mrc p15, 0, r7, c0, c1, 1 @ ID_PFR1 + lsr r7, #16 + and r7, #0xf + cmp r7, #1 + bne 1f + mrc p15, 4, r7, c14, c1, 0 @ CNTHCTL + orr r7, r7, #3 @ PL1PCEN | PL1PCTEN + mcr p15, 4, r7, c14, c1, 0 @ CNTHCTL + mov r7, #0 + mcrr p15, 4, r7, r7, c14 @ CNTVOFF + + @ Disable virtual timer in case it was counting + mrc p15, 0, r7, c14, c3, 1 @ CNTV_CTL + bic r7, #1 @ Clear ENABLE + mcr p15, 0, r7, c14, c3, 1 @ CNTV_CTL +1: +#endif + + bx lr @ The boot CPU mode is left in r4. +ENDPROC(__hyp_stub_install_secondary) + +__hyp_stub_do_trap: + cmp r0, #-1 + mrceq p15, 4, r0, c12, c0, 0 @ get HVBAR + mcrne p15, 4, r0, c12, c0, 0 @ set HVBAR + __ERET +ENDPROC(__hyp_stub_do_trap) + +/* + * __hyp_set_vectors: Call this after boot to set the initial hypervisor + * vectors as part of hypervisor installation. On an SMP system, this should + * be called on each CPU. + * + * r0 must be the physical address of the new vector table (which must lie in + * the bottom 4GB of physical address space. + * + * r0 must be 32-byte aligned. + * + * Before calling this, you must check that the stub hypervisor is installed + * everywhere, by waiting for any secondary CPUs to be brought up and then + * checking that BOOT_CPU_MODE_HAVE_HYP(__boot_cpu_mode) is true. + * + * If not, there is a pre-existing hypervisor, some CPUs failed to boot, or + * something else went wrong... in such cases, trying to install a new + * hypervisor is unlikely to work as desired. + * + * When you call into your shiny new hypervisor, sp_hyp will contain junk, + * so you will need to set that to something sensible at the new hypervisor's + * initialisation entry point. + */ +ENTRY(__hyp_get_vectors) + mov r0, #-1 +ENDPROC(__hyp_get_vectors) + @ fall through +ENTRY(__hyp_set_vectors) + __HVC(0) + mov pc, lr +ENDPROC(__hyp_set_vectors) + +#ifndef ZIMAGE +.align 2 +.L__boot_cpu_mode_offset: + .long __boot_cpu_mode - . +#endif + +.align 5 +__hyp_stub_vectors: +__hyp_stub_reset: W(b) . +__hyp_stub_und: W(b) . +__hyp_stub_svc: W(b) . +__hyp_stub_pabort: W(b) . +__hyp_stub_dabort: W(b) . +__hyp_stub_trap: W(b) __hyp_stub_do_trap +__hyp_stub_irq: W(b) . +__hyp_stub_fiq: W(b) . +ENDPROC(__hyp_stub_vectors) + diff --git a/arch/arm/kernel/init_task.c b/arch/arm/kernel/init_task.c deleted file mode 100644 index e7cbb50dc35..00000000000 --- a/arch/arm/kernel/init_task.c +++ /dev/null @@ -1,37 +0,0 @@ -/* - * linux/arch/arm/kernel/init_task.c - */ -#include <linux/mm.h> -#include <linux/module.h> -#include <linux/fs.h> -#include <linux/sched.h> -#include <linux/init.h> -#include <linux/init_task.h> -#include <linux/mqueue.h> -#include <linux/uaccess.h> - -#include <asm/pgtable.h> - -static struct signal_struct init_signals = INIT_SIGNALS(init_signals); -static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); -/* - * Initial thread structure. - * - * We need to make sure that this is 8192-byte aligned due to the - * way process stacks are handled. This is done by making sure - * the linker maps this in the .text segment right after head.S, - * and making head.S ensure the proper alignment. - * - * The things we do for performance.. - */ -union thread_union init_thread_union __init_task_data = - { INIT_THREAD_INFO(init_task) }; - -/* - * Initial task structure. - * - * All other task structs will be allocated on slabs in fork.c - */ -struct task_struct init_task = INIT_TASK(init_task); - -EXPORT_SYMBOL(init_task); diff --git a/arch/arm/kernel/insn.c b/arch/arm/kernel/insn.c new file mode 100644 index 00000000000..b760340b701 --- /dev/null +++ b/arch/arm/kernel/insn.c @@ -0,0 +1,62 @@ +#include <linux/bug.h> +#include <linux/kernel.h> +#include <asm/opcodes.h> + +static unsigned long +__arm_gen_branch_thumb2(unsigned long pc, unsigned long addr, bool link) +{ + unsigned long s, j1, j2, i1, i2, imm10, imm11; + unsigned long first, second; + long offset; + + offset = (long)addr - (long)(pc + 4); + if (offset < -16777216 || offset > 16777214) { + WARN_ON_ONCE(1); + return 0; + } + + s = (offset >> 24) & 0x1; + i1 = (offset >> 23) & 0x1; + i2 = (offset >> 22) & 0x1; + imm10 = (offset >> 12) & 0x3ff; + imm11 = (offset >> 1) & 0x7ff; + + j1 = (!i1) ^ s; + j2 = (!i2) ^ s; + + first = 0xf000 | (s << 10) | imm10; + second = 0x9000 | (j1 << 13) | (j2 << 11) | imm11; + if (link) + second |= 1 << 14; + + return __opcode_thumb32_compose(first, second); +} + +static unsigned long +__arm_gen_branch_arm(unsigned long pc, unsigned long addr, bool link) +{ + unsigned long opcode = 0xea000000; + long offset; + + if (link) + opcode |= 1 << 24; + + offset = (long)addr - (long)(pc + 8); + if (unlikely(offset < -33554432 || offset > 33554428)) { + WARN_ON_ONCE(1); + return 0; + } + + offset = (offset >> 2) & 0x00ffffff; + + return opcode | offset; +} + +unsigned long +__arm_gen_branch(unsigned long pc, unsigned long addr, bool link) +{ + if (IS_ENABLED(CONFIG_THUMB2_KERNEL)) + return __arm_gen_branch_thumb2(pc, addr, link); + else + return __arm_gen_branch_arm(pc, addr, link); +} diff --git a/arch/arm/kernel/insn.h b/arch/arm/kernel/insn.h new file mode 100644 index 00000000000..e96065da4da --- /dev/null +++ b/arch/arm/kernel/insn.h @@ -0,0 +1,29 @@ +#ifndef __ASM_ARM_INSN_H +#define __ASM_ARM_INSN_H + +static inline unsigned long +arm_gen_nop(void) +{ +#ifdef CONFIG_THUMB2_KERNEL + return 0xf3af8000; /* nop.w */ +#else + return 0xe1a00000; /* mov r0, r0 */ +#endif +} + +unsigned long +__arm_gen_branch(unsigned long pc, unsigned long addr, bool link); + +static inline unsigned long +arm_gen_branch(unsigned long pc, unsigned long addr) +{ + return __arm_gen_branch(pc, addr, false); +} + +static inline unsigned long +arm_gen_branch_link(unsigned long pc, unsigned long addr) +{ + return __arm_gen_branch(pc, addr, true); +} + +#endif diff --git a/arch/arm/kernel/io.c b/arch/arm/kernel/io.c index f4470307edb..9203cf88333 100644 --- a/arch/arm/kernel/io.c +++ b/arch/arm/kernel/io.c @@ -1,6 +1,41 @@ -#include <linux/module.h> +#include <linux/export.h> #include <linux/types.h> #include <linux/io.h> +#include <linux/spinlock.h> + +static DEFINE_RAW_SPINLOCK(__io_lock); + +/* + * Generic atomic MMIO modify. + * + * Allows thread-safe access to registers shared by unrelated subsystems. + * The access is protected by a single MMIO-wide lock. + */ +void atomic_io_modify_relaxed(void __iomem *reg, u32 mask, u32 set) +{ + unsigned long flags; + u32 value; + + raw_spin_lock_irqsave(&__io_lock, flags); + value = readl_relaxed(reg) & ~mask; + value |= (set & mask); + writel_relaxed(value, reg); + raw_spin_unlock_irqrestore(&__io_lock, flags); +} +EXPORT_SYMBOL(atomic_io_modify_relaxed); + +void atomic_io_modify(void __iomem *reg, u32 mask, u32 set) +{ + unsigned long flags; + u32 value; + + raw_spin_lock_irqsave(&__io_lock, flags); + value = readl_relaxed(reg) & ~mask; + value |= (set & mask); + writel(value, reg); + raw_spin_unlock_irqrestore(&__io_lock, flags); +} +EXPORT_SYMBOL(atomic_io_modify); /* * Copy data from IO memory space to "real" memory space. diff --git a/arch/arm/kernel/irq.c b/arch/arm/kernel/irq.c index 36ad3be4692..2c425760451 100644 --- a/arch/arm/kernel/irq.c +++ b/arch/arm/kernel/irq.c @@ -22,11 +22,11 @@ * Naturally it's not a 1:1 relation, but there are similarities. */ #include <linux/kernel_stat.h> -#include <linux/module.h> #include <linux/signal.h> #include <linux/ioport.h> #include <linux/interrupt.h> #include <linux/irq.h> +#include <linux/irqchip.h> #include <linux/random.h> #include <linux/smp.h> #include <linux/init.h> @@ -35,77 +35,35 @@ #include <linux/list.h> #include <linux/kallsyms.h> #include <linux/proc_fs.h> +#include <linux/export.h> -#include <asm/system.h> +#include <asm/hardware/cache-l2x0.h> +#include <asm/exception.h> +#include <asm/mach/arch.h> #include <asm/mach/irq.h> #include <asm/mach/time.h> -/* - * No architecture-specific irq_finish function defined in arm/arch/irqs.h. - */ -#ifndef irq_finish -#define irq_finish(irq) do { } while (0) -#endif - -unsigned int arch_nr_irqs; -void (*init_arch_irq)(void) __initdata = NULL; unsigned long irq_err_count; -int show_interrupts(struct seq_file *p, void *v) +int arch_show_interrupts(struct seq_file *p, int prec) { - int i = *(loff_t *) v, cpu; - struct irq_desc *desc; - struct irqaction * action; - unsigned long flags; - - if (i == 0) { - char cpuname[12]; - - seq_printf(p, " "); - for_each_present_cpu(cpu) { - sprintf(cpuname, "CPU%d", cpu); - seq_printf(p, " %10s", cpuname); - } - seq_putc(p, '\n'); - } - - if (i < nr_irqs) { - desc = irq_to_desc(i); - raw_spin_lock_irqsave(&desc->lock, flags); - action = desc->action; - if (!action) - goto unlock; - - seq_printf(p, "%3d: ", i); - for_each_present_cpu(cpu) - seq_printf(p, "%10u ", kstat_irqs_cpu(i, cpu)); - seq_printf(p, " %10s", desc->chip->name ? : "-"); - seq_printf(p, " %s", action->name); - for (action = action->next; action; action = action->next) - seq_printf(p, ", %s", action->name); - - seq_putc(p, '\n'); -unlock: - raw_spin_unlock_irqrestore(&desc->lock, flags); - } else if (i == nr_irqs) { #ifdef CONFIG_FIQ - show_fiq_list(p, v); + show_fiq_list(p, prec); #endif #ifdef CONFIG_SMP - show_ipi_list(p); - show_local_irqs(p); + show_ipi_list(p, prec); #endif - seq_printf(p, "Err: %10lu\n", irq_err_count); - } + seq_printf(p, "%*s: %10lu\n", prec, "Err", irq_err_count); return 0; } /* - * do_IRQ handles all hardware IRQ's. Decoded IRQs should not - * come via this function. Instead, they should provide their - * own 'handler' + * handle_IRQ handles all hardware IRQ's. Decoded IRQs should + * not come via this function. Instead, they should provide their + * own 'handler'. Used by platform code implementing C-based 1st + * level decoding. */ -asmlinkage void __exception asm_do_IRQ(unsigned int irq, struct pt_regs *regs) +void handle_IRQ(unsigned int irq, struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); @@ -123,85 +81,134 @@ asmlinkage void __exception asm_do_IRQ(unsigned int irq, struct pt_regs *regs) generic_handle_irq(irq); } - /* AT91 specific workaround */ - irq_finish(irq); - irq_exit(); set_irq_regs(old_regs); } +/* + * asm_do_IRQ is the interface to be used from assembly code. + */ +asmlinkage void __exception_irq_entry +asm_do_IRQ(unsigned int irq, struct pt_regs *regs) +{ + handle_IRQ(irq, regs); +} + void set_irq_flags(unsigned int irq, unsigned int iflags) { - struct irq_desc *desc; - unsigned long flags; + unsigned long clr = 0, set = IRQ_NOREQUEST | IRQ_NOPROBE | IRQ_NOAUTOEN; if (irq >= nr_irqs) { printk(KERN_ERR "Trying to set irq flags for IRQ%d\n", irq); return; } - desc = irq_to_desc(irq); - raw_spin_lock_irqsave(&desc->lock, flags); - desc->status |= IRQ_NOREQUEST | IRQ_NOPROBE | IRQ_NOAUTOEN; if (iflags & IRQF_VALID) - desc->status &= ~IRQ_NOREQUEST; + clr |= IRQ_NOREQUEST; if (iflags & IRQF_PROBE) - desc->status &= ~IRQ_NOPROBE; + clr |= IRQ_NOPROBE; if (!(iflags & IRQF_NOAUTOEN)) - desc->status &= ~IRQ_NOAUTOEN; - raw_spin_unlock_irqrestore(&desc->lock, flags); + clr |= IRQ_NOAUTOEN; + /* Order is clear bits in "clr" then set bits in "set" */ + irq_modify_status(irq, clr, set & ~clr); } +EXPORT_SYMBOL_GPL(set_irq_flags); void __init init_IRQ(void) { - init_arch_irq(); + int ret; + + if (IS_ENABLED(CONFIG_OF) && !machine_desc->init_irq) + irqchip_init(); + else + machine_desc->init_irq(); + + if (IS_ENABLED(CONFIG_OF) && IS_ENABLED(CONFIG_CACHE_L2X0) && + (machine_desc->l2c_aux_mask || machine_desc->l2c_aux_val)) { + outer_cache.write_sec = machine_desc->l2c_write_sec; + ret = l2x0_of_init(machine_desc->l2c_aux_val, + machine_desc->l2c_aux_mask); + if (ret) + pr_err("L2C: failed to init: %d\n", ret); + } +} + +#ifdef CONFIG_MULTI_IRQ_HANDLER +void __init set_handle_irq(void (*handle_irq)(struct pt_regs *)) +{ + if (handle_arch_irq) + return; + + handle_arch_irq = handle_irq; } +#endif #ifdef CONFIG_SPARSE_IRQ int __init arch_probe_nr_irqs(void) { - nr_irqs = arch_nr_irqs ? arch_nr_irqs : NR_IRQS; + nr_irqs = machine_desc->nr_irqs ? machine_desc->nr_irqs : NR_IRQS; return nr_irqs; } #endif #ifdef CONFIG_HOTPLUG_CPU -static void route_irq(struct irq_desc *desc, unsigned int irq, unsigned int cpu) +static bool migrate_one_irq(struct irq_desc *desc) { - pr_debug("IRQ%u: moving from cpu%u to cpu%u\n", irq, desc->node, cpu); + struct irq_data *d = irq_desc_get_irq_data(desc); + const struct cpumask *affinity = d->affinity; + struct irq_chip *c; + bool ret = false; + + /* + * If this is a per-CPU interrupt, or the affinity does not + * include this CPU, then we have nothing to do. + */ + if (irqd_is_per_cpu(d) || !cpumask_test_cpu(smp_processor_id(), affinity)) + return false; - raw_spin_lock_irq(&desc->lock); - desc->chip->set_affinity(irq, cpumask_of(cpu)); - raw_spin_unlock_irq(&desc->lock); + if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { + affinity = cpu_online_mask; + ret = true; + } + + c = irq_data_get_irq_chip(d); + if (!c->irq_set_affinity) + pr_debug("IRQ%u: unable to set affinity\n", d->irq); + else if (c->irq_set_affinity(d, affinity, true) == IRQ_SET_MASK_OK && ret) + cpumask_copy(d->affinity, affinity); + + return ret; } /* - * The CPU has been marked offline. Migrate IRQs off this CPU. If - * the affinity settings do not allow other CPUs, force them onto any + * The current CPU has been marked offline. Migrate IRQs off this CPU. + * If the affinity settings do not allow other CPUs, force them onto any * available CPU. + * + * Note: we must iterate over all IRQs, whether they have an attached + * action structure or not, as we need to get chained interrupts too. */ void migrate_irqs(void) { - unsigned int i, cpu = smp_processor_id(); + unsigned int i; struct irq_desc *desc; + unsigned long flags; + + local_irq_save(flags); for_each_irq_desc(i, desc) { - if (desc->node == cpu) { - unsigned int newcpu = cpumask_any_and(desc->affinity, - cpu_online_mask); - if (newcpu >= nr_cpu_ids) { - if (printk_ratelimit()) - printk(KERN_INFO "IRQ%u no longer affine to CPU%u\n", - i, cpu); - - cpumask_setall(desc->affinity); - newcpu = cpumask_any_and(desc->affinity, - cpu_online_mask); - } - - route_irq(desc, i, newcpu); - } + bool affinity_broken; + + raw_spin_lock(&desc->lock); + affinity_broken = migrate_one_irq(desc); + raw_spin_unlock(&desc->lock); + + if (affinity_broken && printk_ratelimit()) + pr_warning("IRQ%u no longer affine to CPU%u\n", i, + smp_processor_id()); } + + local_irq_restore(flags); } #endif /* CONFIG_HOTPLUG_CPU */ diff --git a/arch/arm/kernel/isa.c b/arch/arm/kernel/isa.c index 34648591073..9d1cf715689 100644 --- a/arch/arm/kernel/isa.c +++ b/arch/arm/kernel/isa.c @@ -20,7 +20,7 @@ static unsigned int isa_membase, isa_portbase, isa_portshift; -static ctl_table ctl_isa_vars[4] = { +static struct ctl_table ctl_isa_vars[4] = { { .procname = "membase", .data = &isa_membase, @@ -44,7 +44,7 @@ static ctl_table ctl_isa_vars[4] = { static struct ctl_table_header *isa_sysctl_header; -static ctl_table ctl_isa[2] = { +static struct ctl_table ctl_isa[2] = { { .procname = "isa", .mode = 0555, @@ -52,7 +52,7 @@ static ctl_table ctl_isa[2] = { }, {} }; -static ctl_table ctl_bus[2] = { +static struct ctl_table ctl_bus[2] = { { .procname = "bus", .mode = 0555, diff --git a/arch/arm/kernel/iwmmxt.S b/arch/arm/kernel/iwmmxt.S index b63b528f22a..2b32978ae90 100644 --- a/arch/arm/kernel/iwmmxt.S +++ b/arch/arm/kernel/iwmmxt.S @@ -18,6 +18,19 @@ #include <asm/ptrace.h> #include <asm/thread_info.h> #include <asm/asm-offsets.h> +#include <asm/assembler.h> + +#if defined(CONFIG_CPU_PJ4) || defined(CONFIG_CPU_PJ4B) +#define PJ4(code...) code +#define XSC(code...) +#elif defined(CONFIG_CPU_MOHAWK) || \ + defined(CONFIG_CPU_XSC3) || \ + defined(CONFIG_CPU_XSCALE) +#define PJ4(code...) +#define XSC(code...) code +#else +#error "Unsupported iWMMXt architecture" +#endif #define MMX_WR0 (0x00) #define MMX_WR1 (0x08) @@ -53,16 +66,23 @@ * r9 = ret_from_exception * lr = undefined instr exit * - * called from prefetch exception handler with interrupts disabled + * called from prefetch exception handler with interrupts enabled */ ENTRY(iwmmxt_task_enable) - - mrc p15, 0, r2, c15, c1, 0 - tst r2, #0x3 @ CP0 and CP1 accessible? - movne pc, lr @ if so no business here - orr r2, r2, #0x3 @ enable access to CP0 and CP1 - mcr p15, 0, r2, c15, c1, 0 + inc_preempt_count r10, r3 + + XSC(mrc p15, 0, r2, c15, c1, 0) + PJ4(mrc p15, 0, r2, c1, c0, 2) + @ CP0 and CP1 accessible? + XSC(tst r2, #0x3) + PJ4(tst r2, #0xf) + bne 4f @ if so no business here + @ enable access to CP0 and CP1 + XSC(orr r2, r2, #0x3) + XSC(mcr p15, 0, r2, c15, c1, 0) + PJ4(orr r2, r2, #0xf) + PJ4(mcr p15, 0, r2, c1, c0, 2) ldr r3, =concan_owner add r0, r10, #TI_IWMMXT_STATE @ get task Concan save area @@ -74,13 +94,19 @@ ENTRY(iwmmxt_task_enable) mrc p15, 0, r2, c2, c0, 0 mov r2, r2 @ cpwait + bl concan_save - teq r1, #0 @ test for last ownership - mov lr, r9 @ normal exit from exception - beq concan_load @ no owner, skip save +#ifdef CONFIG_PREEMPT_COUNT + get_thread_info r10 +#endif +4: dec_preempt_count r10, r3 + mov pc, r9 @ normal exit from exception concan_save: + teq r1, #0 @ test for last ownership + beq concan_load @ no owner, skip save + tmrc r2, wCon @ CUP? wCx @@ -118,7 +144,7 @@ concan_dump: wstrd wR15, [r1, #MMX_WR15] 2: teq r0, #0 @ anything to load? - moveq pc, lr + moveq pc, lr @ if not, return concan_load: @@ -152,6 +178,7 @@ concan_load: teq r1, #0 mov r2, #0 moveq pc, lr + tmcr wCon, r2 mov pc, lr @@ -179,17 +206,26 @@ ENTRY(iwmmxt_task_disable) teqne r1, r2 @ or specified one? bne 1f @ no: quit - mrc p15, 0, r4, c15, c1, 0 - orr r4, r4, #0x3 @ enable access to CP0 and CP1 - mcr p15, 0, r4, c15, c1, 0 + @ enable access to CP0 and CP1 + XSC(mrc p15, 0, r4, c15, c1, 0) + XSC(orr r4, r4, #0x3) + XSC(mcr p15, 0, r4, c15, c1, 0) + PJ4(mrc p15, 0, r4, c1, c0, 2) + PJ4(orr r4, r4, #0xf) + PJ4(mcr p15, 0, r4, c1, c0, 2) + mov r0, #0 @ nothing to load str r0, [r3] @ no more current owner mrc p15, 0, r2, c2, c0, 0 mov r2, r2 @ cpwait bl concan_save - bic r4, r4, #0x3 @ disable access to CP0 and CP1 - mcr p15, 0, r4, c15, c1, 0 + @ disable access to CP0 and CP1 + XSC(bic r4, r4, #0x3) + XSC(mcr p15, 0, r4, c15, c1, 0) + PJ4(bic r4, r4, #0xf) + PJ4(mcr p15, 0, r4, c1, c0, 2) + mrc p15, 0, r2, c2, c0, 0 mov r2, r2 @ cpwait @@ -277,8 +313,11 @@ ENTRY(iwmmxt_task_restore) */ ENTRY(iwmmxt_task_switch) - mrc p15, 0, r1, c15, c1, 0 - tst r1, #0x3 @ CP0 and CP1 accessible? + XSC(mrc p15, 0, r1, c15, c1, 0) + PJ4(mrc p15, 0, r1, c1, c0, 2) + @ CP0 and CP1 accessible? + XSC(tst r1, #0x3) + PJ4(tst r1, #0xf) bne 1f @ yes: block them for next task ldr r2, =concan_owner @@ -287,8 +326,11 @@ ENTRY(iwmmxt_task_switch) teq r2, r3 @ next task owns it? movne pc, lr @ no: leave Concan disabled -1: eor r1, r1, #3 @ flip Concan access - mcr p15, 0, r1, c15, c1, 0 +1: @ flip Concan access + XSC(eor r1, r1, #0x3) + XSC(mcr p15, 0, r1, c15, c1, 0) + PJ4(eor r1, r1, #0xf) + PJ4(mcr p15, 0, r1, c1, c0, 2) mrc p15, 0, r1, c2, c0, 0 sub pc, lr, r1, lsr #32 @ cpwait and return diff --git a/arch/arm/kernel/jump_label.c b/arch/arm/kernel/jump_label.c new file mode 100644 index 00000000000..4ce4f789446 --- /dev/null +++ b/arch/arm/kernel/jump_label.c @@ -0,0 +1,39 @@ +#include <linux/kernel.h> +#include <linux/jump_label.h> + +#include "insn.h" +#include "patch.h" + +#ifdef HAVE_JUMP_LABEL + +static void __arch_jump_label_transform(struct jump_entry *entry, + enum jump_label_type type, + bool is_static) +{ + void *addr = (void *)entry->code; + unsigned int insn; + + if (type == JUMP_LABEL_ENABLE) + insn = arm_gen_branch(entry->code, entry->target); + else + insn = arm_gen_nop(); + + if (is_static) + __patch_text(addr, insn); + else + patch_text(addr, insn); +} + +void arch_jump_label_transform(struct jump_entry *entry, + enum jump_label_type type) +{ + __arch_jump_label_transform(entry, type, false); +} + +void arch_jump_label_transform_static(struct jump_entry *entry, + enum jump_label_type type) +{ + __arch_jump_label_transform(entry, type, true); +} + +#endif diff --git a/arch/arm/kernel/kgdb.c b/arch/arm/kernel/kgdb.c index 778c2f7024f..a74b53c1b7d 100644 --- a/arch/arm/kernel/kgdb.c +++ b/arch/arm/kernel/kgdb.c @@ -160,12 +160,16 @@ static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int instr) static struct undef_hook kgdb_brkpt_hook = { .instr_mask = 0xffffffff, .instr_val = KGDB_BREAKINST, + .cpsr_mask = MODE_MASK, + .cpsr_val = SVC_MODE, .fn = kgdb_brk_fn }; static struct undef_hook kgdb_compiled_brkpt_hook = { .instr_mask = 0xffffffff, .instr_val = KGDB_COMPILED_BREAK, + .cpsr_mask = MODE_MASK, + .cpsr_val = SVC_MODE, .fn = kgdb_compiled_brk_fn }; diff --git a/arch/arm/kernel/kprobes-arm.c b/arch/arm/kernel/kprobes-arm.c new file mode 100644 index 00000000000..ac300c60d65 --- /dev/null +++ b/arch/arm/kernel/kprobes-arm.c @@ -0,0 +1,343 @@ +/* + * arch/arm/kernel/kprobes-decode.c + * + * Copyright (C) 2006, 2007 Motorola Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +/* + * We do not have hardware single-stepping on ARM, This + * effort is further complicated by the ARM not having a + * "next PC" register. Instructions that change the PC + * can't be safely single-stepped in a MP environment, so + * we have a lot of work to do: + * + * In the prepare phase: + * *) If it is an instruction that does anything + * with the CPU mode, we reject it for a kprobe. + * (This is out of laziness rather than need. The + * instructions could be simulated.) + * + * *) Otherwise, decode the instruction rewriting its + * registers to take fixed, ordered registers and + * setting a handler for it to run the instruction. + * + * In the execution phase by an instruction's handler: + * + * *) If the PC is written to by the instruction, the + * instruction must be fully simulated in software. + * + * *) Otherwise, a modified form of the instruction is + * directly executed. Its handler calls the + * instruction in insn[0]. In insn[1] is a + * "mov pc, lr" to return. + * + * Before calling, load up the reordered registers + * from the original instruction's registers. If one + * of the original input registers is the PC, compute + * and adjust the appropriate input register. + * + * After call completes, copy the output registers to + * the original instruction's original registers. + * + * We don't use a real breakpoint instruction since that + * would have us in the kernel go from SVC mode to SVC + * mode losing the link register. Instead we use an + * undefined instruction. To simplify processing, the + * undefined instruction used for kprobes must be reserved + * exclusively for kprobes use. + * + * TODO: ifdef out some instruction decoding based on architecture. + */ + +#include <linux/kernel.h> +#include <linux/kprobes.h> +#include <linux/ptrace.h> + +#include "kprobes.h" +#include "probes-arm.h" + +#if __LINUX_ARM_ARCH__ >= 6 +#define BLX(reg) "blx "reg" \n\t" +#else +#define BLX(reg) "mov lr, pc \n\t" \ + "mov pc, "reg" \n\t" +#endif + +static void __kprobes +emulate_ldrdstrd(probes_opcode_t insn, + struct arch_probes_insn *asi, struct pt_regs *regs) +{ + unsigned long pc = regs->ARM_pc + 4; + int rt = (insn >> 12) & 0xf; + int rn = (insn >> 16) & 0xf; + int rm = insn & 0xf; + + register unsigned long rtv asm("r0") = regs->uregs[rt]; + register unsigned long rt2v asm("r1") = regs->uregs[rt+1]; + register unsigned long rnv asm("r2") = (rn == 15) ? pc + : regs->uregs[rn]; + register unsigned long rmv asm("r3") = regs->uregs[rm]; + + __asm__ __volatile__ ( + BLX("%[fn]") + : "=r" (rtv), "=r" (rt2v), "=r" (rnv) + : "0" (rtv), "1" (rt2v), "2" (rnv), "r" (rmv), + [fn] "r" (asi->insn_fn) + : "lr", "memory", "cc" + ); + + regs->uregs[rt] = rtv; + regs->uregs[rt+1] = rt2v; + if (is_writeback(insn)) + regs->uregs[rn] = rnv; +} + +static void __kprobes +emulate_ldr(probes_opcode_t insn, + struct arch_probes_insn *asi, struct pt_regs *regs) +{ + unsigned long pc = regs->ARM_pc + 4; + int rt = (insn >> 12) & 0xf; + int rn = (insn >> 16) & 0xf; + int rm = insn & 0xf; + + register unsigned long rtv asm("r0"); + register unsigned long rnv asm("r2") = (rn == 15) ? pc + : regs->uregs[rn]; + register unsigned long rmv asm("r3") = regs->uregs[rm]; + + __asm__ __volatile__ ( + BLX("%[fn]") + : "=r" (rtv), "=r" (rnv) + : "1" (rnv), "r" (rmv), [fn] "r" (asi->insn_fn) + : "lr", "memory", "cc" + ); + + if (rt == 15) + load_write_pc(rtv, regs); + else + regs->uregs[rt] = rtv; + + if (is_writeback(insn)) + regs->uregs[rn] = rnv; +} + +static void __kprobes +emulate_str(probes_opcode_t insn, + struct arch_probes_insn *asi, struct pt_regs *regs) +{ + unsigned long rtpc = regs->ARM_pc - 4 + str_pc_offset; + unsigned long rnpc = regs->ARM_pc + 4; + int rt = (insn >> 12) & 0xf; + int rn = (insn >> 16) & 0xf; + int rm = insn & 0xf; + + register unsigned long rtv asm("r0") = (rt == 15) ? rtpc + : regs->uregs[rt]; + register unsigned long rnv asm("r2") = (rn == 15) ? rnpc + : regs->uregs[rn]; + register unsigned long rmv asm("r3") = regs->uregs[rm]; + + __asm__ __volatile__ ( + BLX("%[fn]") + : "=r" (rnv) + : "r" (rtv), "0" (rnv), "r" (rmv), [fn] "r" (asi->insn_fn) + : "lr", "memory", "cc" + ); + + if (is_writeback(insn)) + regs->uregs[rn] = rnv; +} + +static void __kprobes +emulate_rd12rn16rm0rs8_rwflags(probes_opcode_t insn, + struct arch_probes_insn *asi, struct pt_regs *regs) +{ + unsigned long pc = regs->ARM_pc + 4; + int rd = (insn >> 12) & 0xf; + int rn = (insn >> 16) & 0xf; + int rm = insn & 0xf; + int rs = (insn >> 8) & 0xf; + + register unsigned long rdv asm("r0") = regs->uregs[rd]; + register unsigned long rnv asm("r2") = (rn == 15) ? pc + : regs->uregs[rn]; + register unsigned long rmv asm("r3") = (rm == 15) ? pc + : regs->uregs[rm]; + register unsigned long rsv asm("r1") = regs->uregs[rs]; + unsigned long cpsr = regs->ARM_cpsr; + + __asm__ __volatile__ ( + "msr cpsr_fs, %[cpsr] \n\t" + BLX("%[fn]") + "mrs %[cpsr], cpsr \n\t" + : "=r" (rdv), [cpsr] "=r" (cpsr) + : "0" (rdv), "r" (rnv), "r" (rmv), "r" (rsv), + "1" (cpsr), [fn] "r" (asi->insn_fn) + : "lr", "memory", "cc" + ); + + if (rd == 15) + alu_write_pc(rdv, regs); + else + regs->uregs[rd] = rdv; + regs->ARM_cpsr = (regs->ARM_cpsr & ~APSR_MASK) | (cpsr & APSR_MASK); +} + +static void __kprobes +emulate_rd12rn16rm0_rwflags_nopc(probes_opcode_t insn, + struct arch_probes_insn *asi, struct pt_regs *regs) +{ + int rd = (insn >> 12) & 0xf; + int rn = (insn >> 16) & 0xf; + int rm = insn & 0xf; + + register unsigned long rdv asm("r0") = regs->uregs[rd]; + register unsigned long rnv asm("r2") = regs->uregs[rn]; + register unsigned long rmv asm("r3") = regs->uregs[rm]; + unsigned long cpsr = regs->ARM_cpsr; + + __asm__ __volatile__ ( + "msr cpsr_fs, %[cpsr] \n\t" + BLX("%[fn]") + "mrs %[cpsr], cpsr \n\t" + : "=r" (rdv), [cpsr] "=r" (cpsr) + : "0" (rdv), "r" (rnv), "r" (rmv), + "1" (cpsr), [fn] "r" (asi->insn_fn) + : "lr", "memory", "cc" + ); + + regs->uregs[rd] = rdv; + regs->ARM_cpsr = (regs->ARM_cpsr & ~APSR_MASK) | (cpsr & APSR_MASK); +} + +static void __kprobes +emulate_rd16rn12rm0rs8_rwflags_nopc(probes_opcode_t insn, + struct arch_probes_insn *asi, + struct pt_regs *regs) +{ + int rd = (insn >> 16) & 0xf; + int rn = (insn >> 12) & 0xf; + int rm = insn & 0xf; + int rs = (insn >> 8) & 0xf; + + register unsigned long rdv asm("r2") = regs->uregs[rd]; + register unsigned long rnv asm("r0") = regs->uregs[rn]; + register unsigned long rmv asm("r3") = regs->uregs[rm]; + register unsigned long rsv asm("r1") = regs->uregs[rs]; + unsigned long cpsr = regs->ARM_cpsr; + + __asm__ __volatile__ ( + "msr cpsr_fs, %[cpsr] \n\t" + BLX("%[fn]") + "mrs %[cpsr], cpsr \n\t" + : "=r" (rdv), [cpsr] "=r" (cpsr) + : "0" (rdv), "r" (rnv), "r" (rmv), "r" (rsv), + "1" (cpsr), [fn] "r" (asi->insn_fn) + : "lr", "memory", "cc" + ); + + regs->uregs[rd] = rdv; + regs->ARM_cpsr = (regs->ARM_cpsr & ~APSR_MASK) | (cpsr & APSR_MASK); +} + +static void __kprobes +emulate_rd12rm0_noflags_nopc(probes_opcode_t insn, + struct arch_probes_insn *asi, struct pt_regs *regs) +{ + int rd = (insn >> 12) & 0xf; + int rm = insn & 0xf; + + register unsigned long rdv asm("r0") = regs->uregs[rd]; + register unsigned long rmv asm("r3") = regs->uregs[rm]; + + __asm__ __volatile__ ( + BLX("%[fn]") + : "=r" (rdv) + : "0" (rdv), "r" (rmv), [fn] "r" (asi->insn_fn) + : "lr", "memory", "cc" + ); + + regs->uregs[rd] = rdv; +} + +static void __kprobes +emulate_rdlo12rdhi16rn0rm8_rwflags_nopc(probes_opcode_t insn, + struct arch_probes_insn *asi, + struct pt_regs *regs) +{ + int rdlo = (insn >> 12) & 0xf; + int rdhi = (insn >> 16) & 0xf; + int rn = insn & 0xf; + int rm = (insn >> 8) & 0xf; + + register unsigned long rdlov asm("r0") = regs->uregs[rdlo]; + register unsigned long rdhiv asm("r2") = regs->uregs[rdhi]; + register unsigned long rnv asm("r3") = regs->uregs[rn]; + register unsigned long rmv asm("r1") = regs->uregs[rm]; + unsigned long cpsr = regs->ARM_cpsr; + + __asm__ __volatile__ ( + "msr cpsr_fs, %[cpsr] \n\t" + BLX("%[fn]") + "mrs %[cpsr], cpsr \n\t" + : "=r" (rdlov), "=r" (rdhiv), [cpsr] "=r" (cpsr) + : "0" (rdlov), "1" (rdhiv), "r" (rnv), "r" (rmv), + "2" (cpsr), [fn] "r" (asi->insn_fn) + : "lr", "memory", "cc" + ); + + regs->uregs[rdlo] = rdlov; + regs->uregs[rdhi] = rdhiv; + regs->ARM_cpsr = (regs->ARM_cpsr & ~APSR_MASK) | (cpsr & APSR_MASK); +} + +const union decode_action kprobes_arm_actions[NUM_PROBES_ARM_ACTIONS] = { + [PROBES_EMULATE_NONE] = {.handler = probes_emulate_none}, + [PROBES_SIMULATE_NOP] = {.handler = probes_simulate_nop}, + [PROBES_PRELOAD_IMM] = {.handler = probes_simulate_nop}, + [PROBES_PRELOAD_REG] = {.handler = probes_simulate_nop}, + [PROBES_BRANCH_IMM] = {.handler = simulate_blx1}, + [PROBES_MRS] = {.handler = simulate_mrs}, + [PROBES_BRANCH_REG] = {.handler = simulate_blx2bx}, + [PROBES_CLZ] = {.handler = emulate_rd12rm0_noflags_nopc}, + [PROBES_SATURATING_ARITHMETIC] = { + .handler = emulate_rd12rn16rm0_rwflags_nopc}, + [PROBES_MUL1] = {.handler = emulate_rdlo12rdhi16rn0rm8_rwflags_nopc}, + [PROBES_MUL2] = {.handler = emulate_rd16rn12rm0rs8_rwflags_nopc}, + [PROBES_SWP] = {.handler = emulate_rd12rn16rm0_rwflags_nopc}, + [PROBES_LDRSTRD] = {.handler = emulate_ldrdstrd}, + [PROBES_LOAD_EXTRA] = {.handler = emulate_ldr}, + [PROBES_LOAD] = {.handler = emulate_ldr}, + [PROBES_STORE_EXTRA] = {.handler = emulate_str}, + [PROBES_STORE] = {.handler = emulate_str}, + [PROBES_MOV_IP_SP] = {.handler = simulate_mov_ipsp}, + [PROBES_DATA_PROCESSING_REG] = { + .handler = emulate_rd12rn16rm0rs8_rwflags}, + [PROBES_DATA_PROCESSING_IMM] = { + .handler = emulate_rd12rn16rm0rs8_rwflags}, + [PROBES_MOV_HALFWORD] = {.handler = emulate_rd12rm0_noflags_nopc}, + [PROBES_SEV] = {.handler = probes_emulate_none}, + [PROBES_WFE] = {.handler = probes_simulate_nop}, + [PROBES_SATURATE] = {.handler = emulate_rd12rn16rm0_rwflags_nopc}, + [PROBES_REV] = {.handler = emulate_rd12rm0_noflags_nopc}, + [PROBES_MMI] = {.handler = emulate_rd12rn16rm0_rwflags_nopc}, + [PROBES_PACK] = {.handler = emulate_rd12rn16rm0_rwflags_nopc}, + [PROBES_EXTEND] = {.handler = emulate_rd12rm0_noflags_nopc}, + [PROBES_EXTEND_ADD] = {.handler = emulate_rd12rn16rm0_rwflags_nopc}, + [PROBES_MUL_ADD_LONG] = { + .handler = emulate_rdlo12rdhi16rn0rm8_rwflags_nopc}, + [PROBES_MUL_ADD] = {.handler = emulate_rd16rn12rm0rs8_rwflags_nopc}, + [PROBES_BITFIELD] = {.handler = emulate_rd12rm0_noflags_nopc}, + [PROBES_BRANCH] = {.handler = simulate_bbl}, + [PROBES_LDMSTM] = {.decoder = kprobe_decode_ldmstm} +}; diff --git a/arch/arm/kernel/kprobes-common.c b/arch/arm/kernel/kprobes-common.c new file mode 100644 index 00000000000..0bf5d64eba1 --- /dev/null +++ b/arch/arm/kernel/kprobes-common.c @@ -0,0 +1,171 @@ +/* + * arch/arm/kernel/kprobes-common.c + * + * Copyright (C) 2011 Jon Medhurst <tixy@yxit.co.uk>. + * + * Some contents moved here from arch/arm/include/asm/kprobes-arm.c which is + * Copyright (C) 2006, 2007 Motorola Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/kprobes.h> +#include <asm/opcodes.h> + +#include "kprobes.h" + + +static void __kprobes simulate_ldm1stm1(probes_opcode_t insn, + struct arch_probes_insn *asi, + struct pt_regs *regs) +{ + int rn = (insn >> 16) & 0xf; + int lbit = insn & (1 << 20); + int wbit = insn & (1 << 21); + int ubit = insn & (1 << 23); + int pbit = insn & (1 << 24); + long *addr = (long *)regs->uregs[rn]; + int reg_bit_vector; + int reg_count; + + reg_count = 0; + reg_bit_vector = insn & 0xffff; + while (reg_bit_vector) { + reg_bit_vector &= (reg_bit_vector - 1); + ++reg_count; + } + + if (!ubit) + addr -= reg_count; + addr += (!pbit == !ubit); + + reg_bit_vector = insn & 0xffff; + while (reg_bit_vector) { + int reg = __ffs(reg_bit_vector); + reg_bit_vector &= (reg_bit_vector - 1); + if (lbit) + regs->uregs[reg] = *addr++; + else + *addr++ = regs->uregs[reg]; + } + + if (wbit) { + if (!ubit) + addr -= reg_count; + addr -= (!pbit == !ubit); + regs->uregs[rn] = (long)addr; + } +} + +static void __kprobes simulate_stm1_pc(probes_opcode_t insn, + struct arch_probes_insn *asi, + struct pt_regs *regs) +{ + unsigned long addr = regs->ARM_pc - 4; + + regs->ARM_pc = (long)addr + str_pc_offset; + simulate_ldm1stm1(insn, asi, regs); + regs->ARM_pc = (long)addr + 4; +} + +static void __kprobes simulate_ldm1_pc(probes_opcode_t insn, + struct arch_probes_insn *asi, + struct pt_regs *regs) +{ + simulate_ldm1stm1(insn, asi, regs); + load_write_pc(regs->ARM_pc, regs); +} + +static void __kprobes +emulate_generic_r0_12_noflags(probes_opcode_t insn, + struct arch_probes_insn *asi, struct pt_regs *regs) +{ + register void *rregs asm("r1") = regs; + register void *rfn asm("lr") = asi->insn_fn; + + __asm__ __volatile__ ( + "stmdb sp!, {%[regs], r11} \n\t" + "ldmia %[regs], {r0-r12} \n\t" +#if __LINUX_ARM_ARCH__ >= 6 + "blx %[fn] \n\t" +#else + "str %[fn], [sp, #-4]! \n\t" + "adr lr, 1f \n\t" + "ldr pc, [sp], #4 \n\t" + "1: \n\t" +#endif + "ldr lr, [sp], #4 \n\t" /* lr = regs */ + "stmia lr, {r0-r12} \n\t" + "ldr r11, [sp], #4 \n\t" + : [regs] "=r" (rregs), [fn] "=r" (rfn) + : "0" (rregs), "1" (rfn) + : "r0", "r2", "r3", "r4", "r5", "r6", "r7", + "r8", "r9", "r10", "r12", "memory", "cc" + ); +} + +static void __kprobes +emulate_generic_r2_14_noflags(probes_opcode_t insn, + struct arch_probes_insn *asi, struct pt_regs *regs) +{ + emulate_generic_r0_12_noflags(insn, asi, + (struct pt_regs *)(regs->uregs+2)); +} + +static void __kprobes +emulate_ldm_r3_15(probes_opcode_t insn, + struct arch_probes_insn *asi, struct pt_regs *regs) +{ + emulate_generic_r0_12_noflags(insn, asi, + (struct pt_regs *)(regs->uregs+3)); + load_write_pc(regs->ARM_pc, regs); +} + +enum probes_insn __kprobes +kprobe_decode_ldmstm(probes_opcode_t insn, struct arch_probes_insn *asi, + const struct decode_header *h) +{ + probes_insn_handler_t *handler = 0; + unsigned reglist = insn & 0xffff; + int is_ldm = insn & 0x100000; + int rn = (insn >> 16) & 0xf; + + if (rn <= 12 && (reglist & 0xe000) == 0) { + /* Instruction only uses registers in the range R0..R12 */ + handler = emulate_generic_r0_12_noflags; + + } else if (rn >= 2 && (reglist & 0x8003) == 0) { + /* Instruction only uses registers in the range R2..R14 */ + rn -= 2; + reglist >>= 2; + handler = emulate_generic_r2_14_noflags; + + } else if (rn >= 3 && (reglist & 0x0007) == 0) { + /* Instruction only uses registers in the range R3..R15 */ + if (is_ldm && (reglist & 0x8000)) { + rn -= 3; + reglist >>= 3; + handler = emulate_ldm_r3_15; + } + } + + if (handler) { + /* We can emulate the instruction in (possibly) modified form */ + asi->insn[0] = __opcode_to_mem_arm((insn & 0xfff00000) | + (rn << 16) | reglist); + asi->insn_handler = handler; + return INSN_GOOD; + } + + /* Fallback to slower simulation... */ + if (reglist & 0x8000) + handler = is_ldm ? simulate_ldm1_pc : simulate_stm1_pc; + else + handler = simulate_ldm1stm1; + asi->insn_handler = handler; + return INSN_GOOD_NO_SLOT; +} + diff --git a/arch/arm/kernel/kprobes-decode.c b/arch/arm/kernel/kprobes-decode.c deleted file mode 100644 index 2c1f0050c9c..00000000000 --- a/arch/arm/kernel/kprobes-decode.c +++ /dev/null @@ -1,1531 +0,0 @@ -/* - * arch/arm/kernel/kprobes-decode.c - * - * Copyright (C) 2006, 2007 Motorola Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - */ - -/* - * We do not have hardware single-stepping on ARM, This - * effort is further complicated by the ARM not having a - * "next PC" register. Instructions that change the PC - * can't be safely single-stepped in a MP environment, so - * we have a lot of work to do: - * - * In the prepare phase: - * *) If it is an instruction that does anything - * with the CPU mode, we reject it for a kprobe. - * (This is out of laziness rather than need. The - * instructions could be simulated.) - * - * *) Otherwise, decode the instruction rewriting its - * registers to take fixed, ordered registers and - * setting a handler for it to run the instruction. - * - * In the execution phase by an instruction's handler: - * - * *) If the PC is written to by the instruction, the - * instruction must be fully simulated in software. - * If it is a conditional instruction, the handler - * will use insn[0] to copy its condition code to - * set r0 to 1 and insn[1] to "mov pc, lr" to return. - * - * *) Otherwise, a modified form of the instruction is - * directly executed. Its handler calls the - * instruction in insn[0]. In insn[1] is a - * "mov pc, lr" to return. - * - * Before calling, load up the reordered registers - * from the original instruction's registers. If one - * of the original input registers is the PC, compute - * and adjust the appropriate input register. - * - * After call completes, copy the output registers to - * the original instruction's original registers. - * - * We don't use a real breakpoint instruction since that - * would have us in the kernel go from SVC mode to SVC - * mode losing the link register. Instead we use an - * undefined instruction. To simplify processing, the - * undefined instruction used for kprobes must be reserved - * exclusively for kprobes use. - * - * TODO: ifdef out some instruction decoding based on architecture. - */ - -#include <linux/kernel.h> -#include <linux/kprobes.h> - -#define sign_extend(x, signbit) ((x) | (0 - ((x) & (1 << (signbit))))) - -#define branch_displacement(insn) sign_extend(((insn) & 0xffffff) << 2, 25) - -#define PSR_fs (PSR_f|PSR_s) - -#define KPROBE_RETURN_INSTRUCTION 0xe1a0f00e /* mov pc, lr */ -#define SET_R0_TRUE_INSTRUCTION 0xe3a00001 /* mov r0, #1 */ - -#define truecc_insn(insn) (((insn) & 0xf0000000) | \ - (SET_R0_TRUE_INSTRUCTION & 0x0fffffff)) - -typedef long (insn_0arg_fn_t)(void); -typedef long (insn_1arg_fn_t)(long); -typedef long (insn_2arg_fn_t)(long, long); -typedef long (insn_3arg_fn_t)(long, long, long); -typedef long (insn_4arg_fn_t)(long, long, long, long); -typedef long long (insn_llret_0arg_fn_t)(void); -typedef long long (insn_llret_3arg_fn_t)(long, long, long); -typedef long long (insn_llret_4arg_fn_t)(long, long, long, long); - -union reg_pair { - long long dr; -#ifdef __LITTLE_ENDIAN - struct { long r0, r1; }; -#else - struct { long r1, r0; }; -#endif -}; - -/* - * For STR and STM instructions, an ARM core may choose to use either - * a +8 or a +12 displacement from the current instruction's address. - * Whichever value is chosen for a given core, it must be the same for - * both instructions and may not change. This function measures it. - */ - -static int str_pc_offset; - -static void __init find_str_pc_offset(void) -{ - int addr, scratch, ret; - - __asm__ ( - "sub %[ret], pc, #4 \n\t" - "str pc, %[addr] \n\t" - "ldr %[scr], %[addr] \n\t" - "sub %[ret], %[scr], %[ret] \n\t" - : [ret] "=r" (ret), [scr] "=r" (scratch), [addr] "+m" (addr)); - - str_pc_offset = ret; -} - -/* - * The insnslot_?arg_r[w]flags() functions below are to keep the - * msr -> *fn -> mrs instruction sequences indivisible so that - * the state of the CPSR flags aren't inadvertently modified - * just before or just after the call. - */ - -static inline long __kprobes -insnslot_0arg_rflags(long cpsr, insn_0arg_fn_t *fn) -{ - register long ret asm("r0"); - - __asm__ __volatile__ ( - "msr cpsr_fs, %[cpsr] \n\t" - "mov lr, pc \n\t" - "mov pc, %[fn] \n\t" - : "=r" (ret) - : [cpsr] "r" (cpsr), [fn] "r" (fn) - : "lr", "cc" - ); - return ret; -} - -static inline long long __kprobes -insnslot_llret_0arg_rflags(long cpsr, insn_llret_0arg_fn_t *fn) -{ - register long ret0 asm("r0"); - register long ret1 asm("r1"); - union reg_pair fnr; - - __asm__ __volatile__ ( - "msr cpsr_fs, %[cpsr] \n\t" - "mov lr, pc \n\t" - "mov pc, %[fn] \n\t" - : "=r" (ret0), "=r" (ret1) - : [cpsr] "r" (cpsr), [fn] "r" (fn) - : "lr", "cc" - ); - fnr.r0 = ret0; - fnr.r1 = ret1; - return fnr.dr; -} - -static inline long __kprobes -insnslot_1arg_rflags(long r0, long cpsr, insn_1arg_fn_t *fn) -{ - register long rr0 asm("r0") = r0; - register long ret asm("r0"); - - __asm__ __volatile__ ( - "msr cpsr_fs, %[cpsr] \n\t" - "mov lr, pc \n\t" - "mov pc, %[fn] \n\t" - : "=r" (ret) - : "0" (rr0), [cpsr] "r" (cpsr), [fn] "r" (fn) - : "lr", "cc" - ); - return ret; -} - -static inline long __kprobes -insnslot_2arg_rflags(long r0, long r1, long cpsr, insn_2arg_fn_t *fn) -{ - register long rr0 asm("r0") = r0; - register long rr1 asm("r1") = r1; - register long ret asm("r0"); - - __asm__ __volatile__ ( - "msr cpsr_fs, %[cpsr] \n\t" - "mov lr, pc \n\t" - "mov pc, %[fn] \n\t" - : "=r" (ret) - : "0" (rr0), "r" (rr1), - [cpsr] "r" (cpsr), [fn] "r" (fn) - : "lr", "cc" - ); - return ret; -} - -static inline long __kprobes -insnslot_3arg_rflags(long r0, long r1, long r2, long cpsr, insn_3arg_fn_t *fn) -{ - register long rr0 asm("r0") = r0; - register long rr1 asm("r1") = r1; - register long rr2 asm("r2") = r2; - register long ret asm("r0"); - - __asm__ __volatile__ ( - "msr cpsr_fs, %[cpsr] \n\t" - "mov lr, pc \n\t" - "mov pc, %[fn] \n\t" - : "=r" (ret) - : "0" (rr0), "r" (rr1), "r" (rr2), - [cpsr] "r" (cpsr), [fn] "r" (fn) - : "lr", "cc" - ); - return ret; -} - -static inline long long __kprobes -insnslot_llret_3arg_rflags(long r0, long r1, long r2, long cpsr, - insn_llret_3arg_fn_t *fn) -{ - register long rr0 asm("r0") = r0; - register long rr1 asm("r1") = r1; - register long rr2 asm("r2") = r2; - register long ret0 asm("r0"); - register long ret1 asm("r1"); - union reg_pair fnr; - - __asm__ __volatile__ ( - "msr cpsr_fs, %[cpsr] \n\t" - "mov lr, pc \n\t" - "mov pc, %[fn] \n\t" - : "=r" (ret0), "=r" (ret1) - : "0" (rr0), "r" (rr1), "r" (rr2), - [cpsr] "r" (cpsr), [fn] "r" (fn) - : "lr", "cc" - ); - fnr.r0 = ret0; - fnr.r1 = ret1; - return fnr.dr; -} - -static inline long __kprobes -insnslot_4arg_rflags(long r0, long r1, long r2, long r3, long cpsr, - insn_4arg_fn_t *fn) -{ - register long rr0 asm("r0") = r0; - register long rr1 asm("r1") = r1; - register long rr2 asm("r2") = r2; - register long rr3 asm("r3") = r3; - register long ret asm("r0"); - - __asm__ __volatile__ ( - "msr cpsr_fs, %[cpsr] \n\t" - "mov lr, pc \n\t" - "mov pc, %[fn] \n\t" - : "=r" (ret) - : "0" (rr0), "r" (rr1), "r" (rr2), "r" (rr3), - [cpsr] "r" (cpsr), [fn] "r" (fn) - : "lr", "cc" - ); - return ret; -} - -static inline long __kprobes -insnslot_1arg_rwflags(long r0, long *cpsr, insn_1arg_fn_t *fn) -{ - register long rr0 asm("r0") = r0; - register long ret asm("r0"); - long oldcpsr = *cpsr; - long newcpsr; - - __asm__ __volatile__ ( - "msr cpsr_fs, %[oldcpsr] \n\t" - "mov lr, pc \n\t" - "mov pc, %[fn] \n\t" - "mrs %[newcpsr], cpsr \n\t" - : "=r" (ret), [newcpsr] "=r" (newcpsr) - : "0" (rr0), [oldcpsr] "r" (oldcpsr), [fn] "r" (fn) - : "lr", "cc" - ); - *cpsr = (oldcpsr & ~PSR_fs) | (newcpsr & PSR_fs); - return ret; -} - -static inline long __kprobes -insnslot_2arg_rwflags(long r0, long r1, long *cpsr, insn_2arg_fn_t *fn) -{ - register long rr0 asm("r0") = r0; - register long rr1 asm("r1") = r1; - register long ret asm("r0"); - long oldcpsr = *cpsr; - long newcpsr; - - __asm__ __volatile__ ( - "msr cpsr_fs, %[oldcpsr] \n\t" - "mov lr, pc \n\t" - "mov pc, %[fn] \n\t" - "mrs %[newcpsr], cpsr \n\t" - : "=r" (ret), [newcpsr] "=r" (newcpsr) - : "0" (rr0), "r" (rr1), [oldcpsr] "r" (oldcpsr), [fn] "r" (fn) - : "lr", "cc" - ); - *cpsr = (oldcpsr & ~PSR_fs) | (newcpsr & PSR_fs); - return ret; -} - -static inline long __kprobes -insnslot_3arg_rwflags(long r0, long r1, long r2, long *cpsr, - insn_3arg_fn_t *fn) -{ - register long rr0 asm("r0") = r0; - register long rr1 asm("r1") = r1; - register long rr2 asm("r2") = r2; - register long ret asm("r0"); - long oldcpsr = *cpsr; - long newcpsr; - - __asm__ __volatile__ ( - "msr cpsr_fs, %[oldcpsr] \n\t" - "mov lr, pc \n\t" - "mov pc, %[fn] \n\t" - "mrs %[newcpsr], cpsr \n\t" - : "=r" (ret), [newcpsr] "=r" (newcpsr) - : "0" (rr0), "r" (rr1), "r" (rr2), - [oldcpsr] "r" (oldcpsr), [fn] "r" (fn) - : "lr", "cc" - ); - *cpsr = (oldcpsr & ~PSR_fs) | (newcpsr & PSR_fs); - return ret; -} - -static inline long __kprobes -insnslot_4arg_rwflags(long r0, long r1, long r2, long r3, long *cpsr, - insn_4arg_fn_t *fn) -{ - register long rr0 asm("r0") = r0; - register long rr1 asm("r1") = r1; - register long rr2 asm("r2") = r2; - register long rr3 asm("r3") = r3; - register long ret asm("r0"); - long oldcpsr = *cpsr; - long newcpsr; - - __asm__ __volatile__ ( - "msr cpsr_fs, %[oldcpsr] \n\t" - "mov lr, pc \n\t" - "mov pc, %[fn] \n\t" - "mrs %[newcpsr], cpsr \n\t" - : "=r" (ret), [newcpsr] "=r" (newcpsr) - : "0" (rr0), "r" (rr1), "r" (rr2), "r" (rr3), - [oldcpsr] "r" (oldcpsr), [fn] "r" (fn) - : "lr", "cc" - ); - *cpsr = (oldcpsr & ~PSR_fs) | (newcpsr & PSR_fs); - return ret; -} - -static inline long long __kprobes -insnslot_llret_4arg_rwflags(long r0, long r1, long r2, long r3, long *cpsr, - insn_llret_4arg_fn_t *fn) -{ - register long rr0 asm("r0") = r0; - register long rr1 asm("r1") = r1; - register long rr2 asm("r2") = r2; - register long rr3 asm("r3") = r3; - register long ret0 asm("r0"); - register long ret1 asm("r1"); - long oldcpsr = *cpsr; - long newcpsr; - union reg_pair fnr; - - __asm__ __volatile__ ( - "msr cpsr_fs, %[oldcpsr] \n\t" - "mov lr, pc \n\t" - "mov pc, %[fn] \n\t" - "mrs %[newcpsr], cpsr \n\t" - : "=r" (ret0), "=r" (ret1), [newcpsr] "=r" (newcpsr) - : "0" (rr0), "r" (rr1), "r" (rr2), "r" (rr3), - [oldcpsr] "r" (oldcpsr), [fn] "r" (fn) - : "lr", "cc" - ); - *cpsr = (oldcpsr & ~PSR_fs) | (newcpsr & PSR_fs); - fnr.r0 = ret0; - fnr.r1 = ret1; - return fnr.dr; -} - -/* - * To avoid the complications of mimicing single-stepping on a - * processor without a Next-PC or a single-step mode, and to - * avoid having to deal with the side-effects of boosting, we - * simulate or emulate (almost) all ARM instructions. - * - * "Simulation" is where the instruction's behavior is duplicated in - * C code. "Emulation" is where the original instruction is rewritten - * and executed, often by altering its registers. - * - * By having all behavior of the kprobe'd instruction completed before - * returning from the kprobe_handler(), all locks (scheduler and - * interrupt) can safely be released. There is no need for secondary - * breakpoints, no race with MP or preemptable kernels, nor having to - * clean up resources counts at a later time impacting overall system - * performance. By rewriting the instruction, only the minimum registers - * need to be loaded and saved back optimizing performance. - * - * Calling the insnslot_*_rwflags version of a function doesn't hurt - * anything even when the CPSR flags aren't updated by the - * instruction. It's just a little slower in return for saving - * a little space by not having a duplicate function that doesn't - * update the flags. (The same optimization can be said for - * instructions that do or don't perform register writeback) - * Also, instructions can either read the flags, only write the - * flags, or read and write the flags. To save combinations - * rather than for sheer performance, flag functions just assume - * read and write of flags. - */ - -static void __kprobes simulate_bbl(struct kprobe *p, struct pt_regs *regs) -{ - insn_1arg_fn_t *i_fn = (insn_1arg_fn_t *)&p->ainsn.insn[0]; - kprobe_opcode_t insn = p->opcode; - long iaddr = (long)p->addr; - int disp = branch_displacement(insn); - - if (!insnslot_1arg_rflags(0, regs->ARM_cpsr, i_fn)) - return; - - if (insn & (1 << 24)) - regs->ARM_lr = iaddr + 4; - - regs->ARM_pc = iaddr + 8 + disp; -} - -static void __kprobes simulate_blx1(struct kprobe *p, struct pt_regs *regs) -{ - kprobe_opcode_t insn = p->opcode; - long iaddr = (long)p->addr; - int disp = branch_displacement(insn); - - regs->ARM_lr = iaddr + 4; - regs->ARM_pc = iaddr + 8 + disp + ((insn >> 23) & 0x2); - regs->ARM_cpsr |= PSR_T_BIT; -} - -static void __kprobes simulate_blx2bx(struct kprobe *p, struct pt_regs *regs) -{ - insn_1arg_fn_t *i_fn = (insn_1arg_fn_t *)&p->ainsn.insn[0]; - kprobe_opcode_t insn = p->opcode; - int rm = insn & 0xf; - long rmv = regs->uregs[rm]; - - if (!insnslot_1arg_rflags(0, regs->ARM_cpsr, i_fn)) - return; - - if (insn & (1 << 5)) - regs->ARM_lr = (long)p->addr + 4; - - regs->ARM_pc = rmv & ~0x1; - regs->ARM_cpsr &= ~PSR_T_BIT; - if (rmv & 0x1) - regs->ARM_cpsr |= PSR_T_BIT; -} - -static void __kprobes simulate_ldm1stm1(struct kprobe *p, struct pt_regs *regs) -{ - insn_1arg_fn_t *i_fn = (insn_1arg_fn_t *)&p->ainsn.insn[0]; - kprobe_opcode_t insn = p->opcode; - int rn = (insn >> 16) & 0xf; - int lbit = insn & (1 << 20); - int wbit = insn & (1 << 21); - int ubit = insn & (1 << 23); - int pbit = insn & (1 << 24); - long *addr = (long *)regs->uregs[rn]; - int reg_bit_vector; - int reg_count; - - if (!insnslot_1arg_rflags(0, regs->ARM_cpsr, i_fn)) - return; - - reg_count = 0; - reg_bit_vector = insn & 0xffff; - while (reg_bit_vector) { - reg_bit_vector &= (reg_bit_vector - 1); - ++reg_count; - } - - if (!ubit) - addr -= reg_count; - addr += (!pbit == !ubit); - - reg_bit_vector = insn & 0xffff; - while (reg_bit_vector) { - int reg = __ffs(reg_bit_vector); - reg_bit_vector &= (reg_bit_vector - 1); - if (lbit) - regs->uregs[reg] = *addr++; - else - *addr++ = regs->uregs[reg]; - } - - if (wbit) { - if (!ubit) - addr -= reg_count; - addr -= (!pbit == !ubit); - regs->uregs[rn] = (long)addr; - } -} - -static void __kprobes simulate_stm1_pc(struct kprobe *p, struct pt_regs *regs) -{ - insn_1arg_fn_t *i_fn = (insn_1arg_fn_t *)&p->ainsn.insn[0]; - - if (!insnslot_1arg_rflags(0, regs->ARM_cpsr, i_fn)) - return; - - regs->ARM_pc = (long)p->addr + str_pc_offset; - simulate_ldm1stm1(p, regs); - regs->ARM_pc = (long)p->addr + 4; -} - -static void __kprobes simulate_mov_ipsp(struct kprobe *p, struct pt_regs *regs) -{ - regs->uregs[12] = regs->uregs[13]; -} - -static void __kprobes emulate_ldcstc(struct kprobe *p, struct pt_regs *regs) -{ - insn_1arg_fn_t *i_fn = (insn_1arg_fn_t *)&p->ainsn.insn[0]; - kprobe_opcode_t insn = p->opcode; - int rn = (insn >> 16) & 0xf; - long rnv = regs->uregs[rn]; - - /* Save Rn in case of writeback. */ - regs->uregs[rn] = insnslot_1arg_rflags(rnv, regs->ARM_cpsr, i_fn); -} - -static void __kprobes emulate_ldrd(struct kprobe *p, struct pt_regs *regs) -{ - insn_2arg_fn_t *i_fn = (insn_2arg_fn_t *)&p->ainsn.insn[0]; - kprobe_opcode_t insn = p->opcode; - int rd = (insn >> 12) & 0xf; - int rn = (insn >> 16) & 0xf; - int rm = insn & 0xf; /* rm may be invalid, don't care. */ - - /* Not following the C calling convention here, so need asm(). */ - __asm__ __volatile__ ( - "ldr r0, %[rn] \n\t" - "ldr r1, %[rm] \n\t" - "msr cpsr_fs, %[cpsr]\n\t" - "mov lr, pc \n\t" - "mov pc, %[i_fn] \n\t" - "str r0, %[rn] \n\t" /* in case of writeback */ - "str r2, %[rd0] \n\t" - "str r3, %[rd1] \n\t" - : [rn] "+m" (regs->uregs[rn]), - [rd0] "=m" (regs->uregs[rd]), - [rd1] "=m" (regs->uregs[rd+1]) - : [rm] "m" (regs->uregs[rm]), - [cpsr] "r" (regs->ARM_cpsr), - [i_fn] "r" (i_fn) - : "r0", "r1", "r2", "r3", "lr", "cc" - ); -} - -static void __kprobes emulate_strd(struct kprobe *p, struct pt_regs *regs) -{ - insn_4arg_fn_t *i_fn = (insn_4arg_fn_t *)&p->ainsn.insn[0]; - kprobe_opcode_t insn = p->opcode; - int rd = (insn >> 12) & 0xf; - int rn = (insn >> 16) & 0xf; - int rm = insn & 0xf; - long rnv = regs->uregs[rn]; - long rmv = regs->uregs[rm]; /* rm/rmv may be invalid, don't care. */ - - regs->uregs[rn] = insnslot_4arg_rflags(rnv, rmv, regs->uregs[rd], - regs->uregs[rd+1], - regs->ARM_cpsr, i_fn); -} - -static void __kprobes emulate_ldr(struct kprobe *p, struct pt_regs *regs) -{ - insn_llret_3arg_fn_t *i_fn = (insn_llret_3arg_fn_t *)&p->ainsn.insn[0]; - kprobe_opcode_t insn = p->opcode; - long ppc = (long)p->addr + 8; - union reg_pair fnr; - int rd = (insn >> 12) & 0xf; - int rn = (insn >> 16) & 0xf; - int rm = insn & 0xf; - long rdv; - long rnv = (rn == 15) ? ppc : regs->uregs[rn]; - long rmv = (rm == 15) ? ppc : regs->uregs[rm]; - long cpsr = regs->ARM_cpsr; - - fnr.dr = insnslot_llret_3arg_rflags(rnv, 0, rmv, cpsr, i_fn); - regs->uregs[rn] = fnr.r0; /* Save Rn in case of writeback. */ - rdv = fnr.r1; - - if (rd == 15) { -#if __LINUX_ARM_ARCH__ >= 5 - cpsr &= ~PSR_T_BIT; - if (rdv & 0x1) - cpsr |= PSR_T_BIT; - regs->ARM_cpsr = cpsr; - rdv &= ~0x1; -#else - rdv &= ~0x2; -#endif - } - regs->uregs[rd] = rdv; -} - -static void __kprobes emulate_str(struct kprobe *p, struct pt_regs *regs) -{ - insn_3arg_fn_t *i_fn = (insn_3arg_fn_t *)&p->ainsn.insn[0]; - kprobe_opcode_t insn = p->opcode; - long iaddr = (long)p->addr; - int rd = (insn >> 12) & 0xf; - int rn = (insn >> 16) & 0xf; - int rm = insn & 0xf; - long rdv = (rd == 15) ? iaddr + str_pc_offset : regs->uregs[rd]; - long rnv = (rn == 15) ? iaddr + 8 : regs->uregs[rn]; - long rmv = regs->uregs[rm]; /* rm/rmv may be invalid, don't care. */ - - /* Save Rn in case of writeback. */ - regs->uregs[rn] = - insnslot_3arg_rflags(rnv, rdv, rmv, regs->ARM_cpsr, i_fn); -} - -static void __kprobes emulate_mrrc(struct kprobe *p, struct pt_regs *regs) -{ - insn_llret_0arg_fn_t *i_fn = (insn_llret_0arg_fn_t *)&p->ainsn.insn[0]; - kprobe_opcode_t insn = p->opcode; - union reg_pair fnr; - int rd = (insn >> 12) & 0xf; - int rn = (insn >> 16) & 0xf; - - fnr.dr = insnslot_llret_0arg_rflags(regs->ARM_cpsr, i_fn); - regs->uregs[rn] = fnr.r0; - regs->uregs[rd] = fnr.r1; -} - -static void __kprobes emulate_mcrr(struct kprobe *p, struct pt_regs *regs) -{ - insn_2arg_fn_t *i_fn = (insn_2arg_fn_t *)&p->ainsn.insn[0]; - kprobe_opcode_t insn = p->opcode; - int rd = (insn >> 12) & 0xf; - int rn = (insn >> 16) & 0xf; - long rnv = regs->uregs[rn]; - long rdv = regs->uregs[rd]; - - insnslot_2arg_rflags(rnv, rdv, regs->ARM_cpsr, i_fn); -} - -static void __kprobes emulate_sat(struct kprobe *p, struct pt_regs *regs) -{ - insn_1arg_fn_t *i_fn = (insn_1arg_fn_t *)&p->ainsn.insn[0]; - kprobe_opcode_t insn = p->opcode; - int rd = (insn >> 12) & 0xf; - int rm = insn & 0xf; - long rmv = regs->uregs[rm]; - - /* Writes Q flag */ - regs->uregs[rd] = insnslot_1arg_rwflags(rmv, ®s->ARM_cpsr, i_fn); -} - -static void __kprobes emulate_sel(struct kprobe *p, struct pt_regs *regs) -{ - insn_2arg_fn_t *i_fn = (insn_2arg_fn_t *)&p->ainsn.insn[0]; - kprobe_opcode_t insn = p->opcode; - int rd = (insn >> 12) & 0xf; - int rn = (insn >> 16) & 0xf; - int rm = insn & 0xf; - long rnv = regs->uregs[rn]; - long rmv = regs->uregs[rm]; - - /* Reads GE bits */ - regs->uregs[rd] = insnslot_2arg_rflags(rnv, rmv, regs->ARM_cpsr, i_fn); -} - -static void __kprobes emulate_none(struct kprobe *p, struct pt_regs *regs) -{ - insn_0arg_fn_t *i_fn = (insn_0arg_fn_t *)&p->ainsn.insn[0]; - - insnslot_0arg_rflags(regs->ARM_cpsr, i_fn); -} - -static void __kprobes emulate_rd12(struct kprobe *p, struct pt_regs *regs) -{ - insn_0arg_fn_t *i_fn = (insn_0arg_fn_t *)&p->ainsn.insn[0]; - kprobe_opcode_t insn = p->opcode; - int rd = (insn >> 12) & 0xf; - - regs->uregs[rd] = insnslot_0arg_rflags(regs->ARM_cpsr, i_fn); -} - -static void __kprobes emulate_ird12(struct kprobe *p, struct pt_regs *regs) -{ - insn_1arg_fn_t *i_fn = (insn_1arg_fn_t *)&p->ainsn.insn[0]; - kprobe_opcode_t insn = p->opcode; - int ird = (insn >> 12) & 0xf; - - insnslot_1arg_rflags(regs->uregs[ird], regs->ARM_cpsr, i_fn); -} - -static void __kprobes emulate_rn16(struct kprobe *p, struct pt_regs *regs) -{ - insn_1arg_fn_t *i_fn = (insn_1arg_fn_t *)&p->ainsn.insn[0]; - kprobe_opcode_t insn = p->opcode; - int rn = (insn >> 16) & 0xf; - long rnv = regs->uregs[rn]; - - insnslot_1arg_rflags(rnv, regs->ARM_cpsr, i_fn); -} - -static void __kprobes emulate_rd12rm0(struct kprobe *p, struct pt_regs *regs) -{ - insn_1arg_fn_t *i_fn = (insn_1arg_fn_t *)&p->ainsn.insn[0]; - kprobe_opcode_t insn = p->opcode; - int rd = (insn >> 12) & 0xf; - int rm = insn & 0xf; - long rmv = regs->uregs[rm]; - - regs->uregs[rd] = insnslot_1arg_rflags(rmv, regs->ARM_cpsr, i_fn); -} - -static void __kprobes -emulate_rd12rn16rm0_rwflags(struct kprobe *p, struct pt_regs *regs) -{ - insn_2arg_fn_t *i_fn = (insn_2arg_fn_t *)&p->ainsn.insn[0]; - kprobe_opcode_t insn = p->opcode; - int rd = (insn >> 12) & 0xf; - int rn = (insn >> 16) & 0xf; - int rm = insn & 0xf; - long rnv = regs->uregs[rn]; - long rmv = regs->uregs[rm]; - - regs->uregs[rd] = - insnslot_2arg_rwflags(rnv, rmv, ®s->ARM_cpsr, i_fn); -} - -static void __kprobes -emulate_rd16rn12rs8rm0_rwflags(struct kprobe *p, struct pt_regs *regs) -{ - insn_3arg_fn_t *i_fn = (insn_3arg_fn_t *)&p->ainsn.insn[0]; - kprobe_opcode_t insn = p->opcode; - int rd = (insn >> 16) & 0xf; - int rn = (insn >> 12) & 0xf; - int rs = (insn >> 8) & 0xf; - int rm = insn & 0xf; - long rnv = regs->uregs[rn]; - long rsv = regs->uregs[rs]; - long rmv = regs->uregs[rm]; - - regs->uregs[rd] = - insnslot_3arg_rwflags(rnv, rsv, rmv, ®s->ARM_cpsr, i_fn); -} - -static void __kprobes -emulate_rd16rs8rm0_rwflags(struct kprobe *p, struct pt_regs *regs) -{ - insn_2arg_fn_t *i_fn = (insn_2arg_fn_t *)&p->ainsn.insn[0]; - kprobe_opcode_t insn = p->opcode; - int rd = (insn >> 16) & 0xf; - int rs = (insn >> 8) & 0xf; - int rm = insn & 0xf; - long rsv = regs->uregs[rs]; - long rmv = regs->uregs[rm]; - - regs->uregs[rd] = - insnslot_2arg_rwflags(rsv, rmv, ®s->ARM_cpsr, i_fn); -} - -static void __kprobes -emulate_rdhi16rdlo12rs8rm0_rwflags(struct kprobe *p, struct pt_regs *regs) -{ - insn_llret_4arg_fn_t *i_fn = (insn_llret_4arg_fn_t *)&p->ainsn.insn[0]; - kprobe_opcode_t insn = p->opcode; - union reg_pair fnr; - int rdhi = (insn >> 16) & 0xf; - int rdlo = (insn >> 12) & 0xf; - int rs = (insn >> 8) & 0xf; - int rm = insn & 0xf; - long rsv = regs->uregs[rs]; - long rmv = regs->uregs[rm]; - - fnr.dr = insnslot_llret_4arg_rwflags(regs->uregs[rdhi], - regs->uregs[rdlo], rsv, rmv, - ®s->ARM_cpsr, i_fn); - regs->uregs[rdhi] = fnr.r0; - regs->uregs[rdlo] = fnr.r1; -} - -static void __kprobes -emulate_alu_imm_rflags(struct kprobe *p, struct pt_regs *regs) -{ - insn_1arg_fn_t *i_fn = (insn_1arg_fn_t *)&p->ainsn.insn[0]; - kprobe_opcode_t insn = p->opcode; - int rd = (insn >> 12) & 0xf; - int rn = (insn >> 16) & 0xf; - long rnv = (rn == 15) ? (long)p->addr + 8 : regs->uregs[rn]; - - regs->uregs[rd] = insnslot_1arg_rflags(rnv, regs->ARM_cpsr, i_fn); -} - -static void __kprobes -emulate_alu_imm_rwflags(struct kprobe *p, struct pt_regs *regs) -{ - insn_1arg_fn_t *i_fn = (insn_1arg_fn_t *)&p->ainsn.insn[0]; - kprobe_opcode_t insn = p->opcode; - int rd = (insn >> 12) & 0xf; - int rn = (insn >> 16) & 0xf; - long rnv = (rn == 15) ? (long)p->addr + 8 : regs->uregs[rn]; - - regs->uregs[rd] = insnslot_1arg_rwflags(rnv, ®s->ARM_cpsr, i_fn); -} - -static void __kprobes -emulate_alu_rflags(struct kprobe *p, struct pt_regs *regs) -{ - insn_3arg_fn_t *i_fn = (insn_3arg_fn_t *)&p->ainsn.insn[0]; - kprobe_opcode_t insn = p->opcode; - long ppc = (long)p->addr + 8; - int rd = (insn >> 12) & 0xf; - int rn = (insn >> 16) & 0xf; /* rn/rnv/rs/rsv may be */ - int rs = (insn >> 8) & 0xf; /* invalid, don't care. */ - int rm = insn & 0xf; - long rnv = (rn == 15) ? ppc : regs->uregs[rn]; - long rmv = (rm == 15) ? ppc : regs->uregs[rm]; - long rsv = regs->uregs[rs]; - - regs->uregs[rd] = - insnslot_3arg_rflags(rnv, rmv, rsv, regs->ARM_cpsr, i_fn); -} - -static void __kprobes -emulate_alu_rwflags(struct kprobe *p, struct pt_regs *regs) -{ - insn_3arg_fn_t *i_fn = (insn_3arg_fn_t *)&p->ainsn.insn[0]; - kprobe_opcode_t insn = p->opcode; - long ppc = (long)p->addr + 8; - int rd = (insn >> 12) & 0xf; - int rn = (insn >> 16) & 0xf; /* rn/rnv/rs/rsv may be */ - int rs = (insn >> 8) & 0xf; /* invalid, don't care. */ - int rm = insn & 0xf; - long rnv = (rn == 15) ? ppc : regs->uregs[rn]; - long rmv = (rm == 15) ? ppc : regs->uregs[rm]; - long rsv = regs->uregs[rs]; - - regs->uregs[rd] = - insnslot_3arg_rwflags(rnv, rmv, rsv, ®s->ARM_cpsr, i_fn); -} - -static enum kprobe_insn __kprobes -prep_emulate_ldr_str(kprobe_opcode_t insn, struct arch_specific_insn *asi) -{ - int ibit = (insn & (1 << 26)) ? 25 : 22; - - insn &= 0xfff00fff; - insn |= 0x00001000; /* Rn = r0, Rd = r1 */ - if (insn & (1 << ibit)) { - insn &= ~0xf; - insn |= 2; /* Rm = r2 */ - } - asi->insn[0] = insn; - asi->insn_handler = (insn & (1 << 20)) ? emulate_ldr : emulate_str; - return INSN_GOOD; -} - -static enum kprobe_insn __kprobes -prep_emulate_rd12rm0(kprobe_opcode_t insn, struct arch_specific_insn *asi) -{ - insn &= 0xffff0ff0; /* Rd = r0, Rm = r0 */ - asi->insn[0] = insn; - asi->insn_handler = emulate_rd12rm0; - return INSN_GOOD; -} - -static enum kprobe_insn __kprobes -prep_emulate_rd12(kprobe_opcode_t insn, struct arch_specific_insn *asi) -{ - insn &= 0xffff0fff; /* Rd = r0 */ - asi->insn[0] = insn; - asi->insn_handler = emulate_rd12; - return INSN_GOOD; -} - -static enum kprobe_insn __kprobes -prep_emulate_rd12rn16rm0_wflags(kprobe_opcode_t insn, - struct arch_specific_insn *asi) -{ - insn &= 0xfff00ff0; /* Rd = r0, Rn = r0 */ - insn |= 0x00000001; /* Rm = r1 */ - asi->insn[0] = insn; - asi->insn_handler = emulate_rd12rn16rm0_rwflags; - return INSN_GOOD; -} - -static enum kprobe_insn __kprobes -prep_emulate_rd16rs8rm0_wflags(kprobe_opcode_t insn, - struct arch_specific_insn *asi) -{ - insn &= 0xfff0f0f0; /* Rd = r0, Rs = r0 */ - insn |= 0x00000001; /* Rm = r1 */ - asi->insn[0] = insn; - asi->insn_handler = emulate_rd16rs8rm0_rwflags; - return INSN_GOOD; -} - -static enum kprobe_insn __kprobes -prep_emulate_rd16rn12rs8rm0_wflags(kprobe_opcode_t insn, - struct arch_specific_insn *asi) -{ - insn &= 0xfff000f0; /* Rd = r0, Rn = r0 */ - insn |= 0x00000102; /* Rs = r1, Rm = r2 */ - asi->insn[0] = insn; - asi->insn_handler = emulate_rd16rn12rs8rm0_rwflags; - return INSN_GOOD; -} - -static enum kprobe_insn __kprobes -prep_emulate_rdhi16rdlo12rs8rm0_wflags(kprobe_opcode_t insn, - struct arch_specific_insn *asi) -{ - insn &= 0xfff000f0; /* RdHi = r0, RdLo = r1 */ - insn |= 0x00001203; /* Rs = r2, Rm = r3 */ - asi->insn[0] = insn; - asi->insn_handler = emulate_rdhi16rdlo12rs8rm0_rwflags; - return INSN_GOOD; -} - -/* - * For the instruction masking and comparisons in all the "space_*" - * functions below, Do _not_ rearrange the order of tests unless - * you're very, very sure of what you are doing. For the sake of - * efficiency, the masks for some tests sometimes assume other test - * have been done prior to them so the number of patterns to test - * for an instruction set can be as broad as possible to reduce the - * number of tests needed. - */ - -static enum kprobe_insn __kprobes -space_1111(kprobe_opcode_t insn, struct arch_specific_insn *asi) -{ - /* CPS mmod == 1 : 1111 0001 0000 xx10 xxxx xxxx xx0x xxxx */ - /* RFE : 1111 100x x0x1 xxxx xxxx 1010 xxxx xxxx */ - /* SRS : 1111 100x x1x0 1101 xxxx 0101 xxxx xxxx */ - if ((insn & 0xfff30020) == 0xf1020000 || - (insn & 0xfe500f00) == 0xf8100a00 || - (insn & 0xfe5f0f00) == 0xf84d0500) - return INSN_REJECTED; - - /* PLD : 1111 01x1 x101 xxxx xxxx xxxx xxxx xxxx : */ - if ((insn & 0xfd700000) == 0xf4500000) { - insn &= 0xfff0ffff; /* Rn = r0 */ - asi->insn[0] = insn; - asi->insn_handler = emulate_rn16; - return INSN_GOOD; - } - - /* BLX(1) : 1111 101x xxxx xxxx xxxx xxxx xxxx xxxx : */ - if ((insn & 0xfe000000) == 0xfa000000) { - asi->insn_handler = simulate_blx1; - return INSN_GOOD_NO_SLOT; - } - - /* SETEND : 1111 0001 0000 0001 xxxx xxxx 0000 xxxx */ - /* CDP2 : 1111 1110 xxxx xxxx xxxx xxxx xxx0 xxxx */ - if ((insn & 0xffff00f0) == 0xf1010000 || - (insn & 0xff000010) == 0xfe000000) { - asi->insn[0] = insn; - asi->insn_handler = emulate_none; - return INSN_GOOD; - } - - /* MCRR2 : 1111 1100 0100 xxxx xxxx xxxx xxxx xxxx : (Rd != Rn) */ - /* MRRC2 : 1111 1100 0101 xxxx xxxx xxxx xxxx xxxx : (Rd != Rn) */ - if ((insn & 0xffe00000) == 0xfc400000) { - insn &= 0xfff00fff; /* Rn = r0 */ - insn |= 0x00001000; /* Rd = r1 */ - asi->insn[0] = insn; - asi->insn_handler = - (insn & (1 << 20)) ? emulate_mrrc : emulate_mcrr; - return INSN_GOOD; - } - - /* LDC2 : 1111 110x xxx1 xxxx xxxx xxxx xxxx xxxx */ - /* STC2 : 1111 110x xxx0 xxxx xxxx xxxx xxxx xxxx */ - if ((insn & 0xfe000000) == 0xfc000000) { - insn &= 0xfff0ffff; /* Rn = r0 */ - asi->insn[0] = insn; - asi->insn_handler = emulate_ldcstc; - return INSN_GOOD; - } - - /* MCR2 : 1111 1110 xxx0 xxxx xxxx xxxx xxx1 xxxx */ - /* MRC2 : 1111 1110 xxx1 xxxx xxxx xxxx xxx1 xxxx */ - insn &= 0xffff0fff; /* Rd = r0 */ - asi->insn[0] = insn; - asi->insn_handler = (insn & (1 << 20)) ? emulate_rd12 : emulate_ird12; - return INSN_GOOD; -} - -static enum kprobe_insn __kprobes -space_cccc_000x(kprobe_opcode_t insn, struct arch_specific_insn *asi) -{ - /* cccc 0001 0xx0 xxxx xxxx xxxx xxxx xxx0 xxxx */ - if ((insn & 0x0f900010) == 0x01000000) { - - /* BXJ : cccc 0001 0010 xxxx xxxx xxxx 0010 xxxx */ - /* MSR : cccc 0001 0x10 xxxx xxxx xxxx 0000 xxxx */ - if ((insn & 0x0ff000f0) == 0x01200020 || - (insn & 0x0fb000f0) == 0x01200000) - return INSN_REJECTED; - - /* MRS : cccc 0001 0x00 xxxx xxxx xxxx 0000 xxxx */ - if ((insn & 0x0fb00010) == 0x01000000) - return prep_emulate_rd12(insn, asi); - - /* SMLALxy : cccc 0001 0100 xxxx xxxx xxxx 1xx0 xxxx */ - if ((insn & 0x0ff00090) == 0x01400080) - return prep_emulate_rdhi16rdlo12rs8rm0_wflags(insn, asi); - - /* SMULWy : cccc 0001 0010 xxxx xxxx xxxx 1x10 xxxx */ - /* SMULxy : cccc 0001 0110 xxxx xxxx xxxx 1xx0 xxxx */ - if ((insn & 0x0ff000b0) == 0x012000a0 || - (insn & 0x0ff00090) == 0x01600080) - return prep_emulate_rd16rs8rm0_wflags(insn, asi); - - /* SMLAxy : cccc 0001 0000 xxxx xxxx xxxx 1xx0 xxxx : Q */ - /* SMLAWy : cccc 0001 0010 xxxx xxxx xxxx 0x00 xxxx : Q */ - return prep_emulate_rd16rn12rs8rm0_wflags(insn, asi); - - } - - /* cccc 0001 0xx0 xxxx xxxx xxxx xxxx 0xx1 xxxx */ - else if ((insn & 0x0f900090) == 0x01000010) { - - /* BKPT : 1110 0001 0010 xxxx xxxx xxxx 0111 xxxx */ - if ((insn & 0xfff000f0) == 0xe1200070) - return INSN_REJECTED; - - /* BLX(2) : cccc 0001 0010 xxxx xxxx xxxx 0011 xxxx */ - /* BX : cccc 0001 0010 xxxx xxxx xxxx 0001 xxxx */ - if ((insn & 0x0ff000d0) == 0x01200010) { - asi->insn[0] = truecc_insn(insn); - asi->insn_handler = simulate_blx2bx; - return INSN_GOOD; - } - - /* CLZ : cccc 0001 0110 xxxx xxxx xxxx 0001 xxxx */ - if ((insn & 0x0ff000f0) == 0x01600010) - return prep_emulate_rd12rm0(insn, asi); - - /* QADD : cccc 0001 0000 xxxx xxxx xxxx 0101 xxxx :Q */ - /* QSUB : cccc 0001 0010 xxxx xxxx xxxx 0101 xxxx :Q */ - /* QDADD : cccc 0001 0100 xxxx xxxx xxxx 0101 xxxx :Q */ - /* QDSUB : cccc 0001 0110 xxxx xxxx xxxx 0101 xxxx :Q */ - return prep_emulate_rd12rn16rm0_wflags(insn, asi); - } - - /* cccc 0000 xxxx xxxx xxxx xxxx xxxx 1001 xxxx */ - else if ((insn & 0x0f000090) == 0x00000090) { - - /* MUL : cccc 0000 0000 xxxx xxxx xxxx 1001 xxxx : */ - /* MULS : cccc 0000 0001 xxxx xxxx xxxx 1001 xxxx :cc */ - /* MLA : cccc 0000 0010 xxxx xxxx xxxx 1001 xxxx : */ - /* MLAS : cccc 0000 0011 xxxx xxxx xxxx 1001 xxxx :cc */ - /* UMAAL : cccc 0000 0100 xxxx xxxx xxxx 1001 xxxx : */ - /* UMULL : cccc 0000 1000 xxxx xxxx xxxx 1001 xxxx : */ - /* UMULLS : cccc 0000 1001 xxxx xxxx xxxx 1001 xxxx :cc */ - /* UMLAL : cccc 0000 1010 xxxx xxxx xxxx 1001 xxxx : */ - /* UMLALS : cccc 0000 1011 xxxx xxxx xxxx 1001 xxxx :cc */ - /* SMULL : cccc 0000 1100 xxxx xxxx xxxx 1001 xxxx : */ - /* SMULLS : cccc 0000 1101 xxxx xxxx xxxx 1001 xxxx :cc */ - /* SMLAL : cccc 0000 1110 xxxx xxxx xxxx 1001 xxxx : */ - /* SMLALS : cccc 0000 1111 xxxx xxxx xxxx 1001 xxxx :cc */ - if ((insn & 0x0fe000f0) == 0x00000090) { - return prep_emulate_rd16rs8rm0_wflags(insn, asi); - } else if ((insn & 0x0fe000f0) == 0x00200090) { - return prep_emulate_rd16rn12rs8rm0_wflags(insn, asi); - } else { - return prep_emulate_rdhi16rdlo12rs8rm0_wflags(insn, asi); - } - } - - /* cccc 000x xxxx xxxx xxxx xxxx xxxx 1xx1 xxxx */ - else if ((insn & 0x0e000090) == 0x00000090) { - - /* SWP : cccc 0001 0000 xxxx xxxx xxxx 1001 xxxx */ - /* SWPB : cccc 0001 0100 xxxx xxxx xxxx 1001 xxxx */ - /* LDRD : cccc 000x xxx0 xxxx xxxx xxxx 1101 xxxx */ - /* STRD : cccc 000x xxx0 xxxx xxxx xxxx 1111 xxxx */ - /* STREX : cccc 0001 1000 xxxx xxxx xxxx 1001 xxxx */ - /* LDREX : cccc 0001 1001 xxxx xxxx xxxx 1001 xxxx */ - /* LDRH : cccc 000x xxx1 xxxx xxxx xxxx 1011 xxxx */ - /* STRH : cccc 000x xxx0 xxxx xxxx xxxx 1011 xxxx */ - /* LDRSB : cccc 000x xxx1 xxxx xxxx xxxx 1101 xxxx */ - /* LDRSH : cccc 000x xxx1 xxxx xxxx xxxx 1111 xxxx */ - if ((insn & 0x0fb000f0) == 0x01000090) { - /* SWP/SWPB */ - return prep_emulate_rd12rn16rm0_wflags(insn, asi); - } else if ((insn & 0x0e1000d0) == 0x00000d0) { - /* STRD/LDRD */ - insn &= 0xfff00fff; - insn |= 0x00002000; /* Rn = r0, Rd = r2 */ - if (insn & (1 << 22)) { - /* I bit */ - insn &= ~0xf; - insn |= 1; /* Rm = r1 */ - } - asi->insn[0] = insn; - asi->insn_handler = - (insn & (1 << 5)) ? emulate_strd : emulate_ldrd; - return INSN_GOOD; - } - - return prep_emulate_ldr_str(insn, asi); - } - - /* cccc 000x xxxx xxxx xxxx xxxx xxxx xxxx xxxx */ - - /* - * ALU op with S bit and Rd == 15 : - * cccc 000x xxx1 xxxx 1111 xxxx xxxx xxxx - */ - if ((insn & 0x0e10f000) == 0x0010f000) - return INSN_REJECTED; - - /* - * "mov ip, sp" is the most common kprobe'd instruction by far. - * Check and optimize for it explicitly. - */ - if (insn == 0xe1a0c00d) { - asi->insn_handler = simulate_mov_ipsp; - return INSN_GOOD_NO_SLOT; - } - - /* - * Data processing: Immediate-shift / Register-shift - * ALU op : cccc 000x xxxx xxxx xxxx xxxx xxxx xxxx - * CPY : cccc 0001 1010 xxxx xxxx 0000 0000 xxxx - * MOV : cccc 0001 101x xxxx xxxx xxxx xxxx xxxx - * *S (bit 20) updates condition codes - * ADC/SBC/RSC reads the C flag - */ - insn &= 0xfff00ff0; /* Rn = r0, Rd = r0 */ - insn |= 0x00000001; /* Rm = r1 */ - if (insn & 0x010) { - insn &= 0xfffff0ff; /* register shift */ - insn |= 0x00000200; /* Rs = r2 */ - } - asi->insn[0] = insn; - asi->insn_handler = (insn & (1 << 20)) ? /* S-bit */ - emulate_alu_rwflags : emulate_alu_rflags; - return INSN_GOOD; -} - -static enum kprobe_insn __kprobes -space_cccc_001x(kprobe_opcode_t insn, struct arch_specific_insn *asi) -{ - /* - * MSR : cccc 0011 0x10 xxxx xxxx xxxx xxxx xxxx - * Undef : cccc 0011 0100 xxxx xxxx xxxx xxxx xxxx - * ALU op with S bit and Rd == 15 : - * cccc 001x xxx1 xxxx 1111 xxxx xxxx xxxx - */ - if ((insn & 0x0fb00000) == 0x03200000 || /* MSR */ - (insn & 0x0ff00000) == 0x03400000 || /* Undef */ - (insn & 0x0e10f000) == 0x0210f000) /* ALU s-bit, R15 */ - return INSN_REJECTED; - - /* - * Data processing: 32-bit Immediate - * ALU op : cccc 001x xxxx xxxx xxxx xxxx xxxx xxxx - * MOV : cccc 0011 101x xxxx xxxx xxxx xxxx xxxx - * *S (bit 20) updates condition codes - * ADC/SBC/RSC reads the C flag - */ - insn &= 0xffff0fff; /* Rd = r0 */ - asi->insn[0] = insn; - asi->insn_handler = (insn & (1 << 20)) ? /* S-bit */ - emulate_alu_imm_rwflags : emulate_alu_imm_rflags; - return INSN_GOOD; -} - -static enum kprobe_insn __kprobes -space_cccc_0110__1(kprobe_opcode_t insn, struct arch_specific_insn *asi) -{ - /* SEL : cccc 0110 1000 xxxx xxxx xxxx 1011 xxxx GE: !!! */ - if ((insn & 0x0ff000f0) == 0x068000b0) { - insn &= 0xfff00ff0; /* Rd = r0, Rn = r0 */ - insn |= 0x00000001; /* Rm = r1 */ - asi->insn[0] = insn; - asi->insn_handler = emulate_sel; - return INSN_GOOD; - } - - /* SSAT : cccc 0110 101x xxxx xxxx xxxx xx01 xxxx :Q */ - /* USAT : cccc 0110 111x xxxx xxxx xxxx xx01 xxxx :Q */ - /* SSAT16 : cccc 0110 1010 xxxx xxxx xxxx 0011 xxxx :Q */ - /* USAT16 : cccc 0110 1110 xxxx xxxx xxxx 0011 xxxx :Q */ - if ((insn & 0x0fa00030) == 0x06a00010 || - (insn & 0x0fb000f0) == 0x06a00030) { - insn &= 0xffff0ff0; /* Rd = r0, Rm = r0 */ - asi->insn[0] = insn; - asi->insn_handler = emulate_sat; - return INSN_GOOD; - } - - /* REV : cccc 0110 1011 xxxx xxxx xxxx 0011 xxxx */ - /* REV16 : cccc 0110 1011 xxxx xxxx xxxx 1011 xxxx */ - /* REVSH : cccc 0110 1111 xxxx xxxx xxxx 1011 xxxx */ - if ((insn & 0x0ff00070) == 0x06b00030 || - (insn & 0x0ff000f0) == 0x06f000b0) - return prep_emulate_rd12rm0(insn, asi); - - /* SADD16 : cccc 0110 0001 xxxx xxxx xxxx 0001 xxxx :GE */ - /* SADDSUBX : cccc 0110 0001 xxxx xxxx xxxx 0011 xxxx :GE */ - /* SSUBADDX : cccc 0110 0001 xxxx xxxx xxxx 0101 xxxx :GE */ - /* SSUB16 : cccc 0110 0001 xxxx xxxx xxxx 0111 xxxx :GE */ - /* SADD8 : cccc 0110 0001 xxxx xxxx xxxx 1001 xxxx :GE */ - /* SSUB8 : cccc 0110 0001 xxxx xxxx xxxx 1111 xxxx :GE */ - /* QADD16 : cccc 0110 0010 xxxx xxxx xxxx 0001 xxxx : */ - /* QADDSUBX : cccc 0110 0010 xxxx xxxx xxxx 0011 xxxx : */ - /* QSUBADDX : cccc 0110 0010 xxxx xxxx xxxx 0101 xxxx : */ - /* QSUB16 : cccc 0110 0010 xxxx xxxx xxxx 0111 xxxx : */ - /* QADD8 : cccc 0110 0010 xxxx xxxx xxxx 1001 xxxx : */ - /* QSUB8 : cccc 0110 0010 xxxx xxxx xxxx 1111 xxxx : */ - /* SHADD16 : cccc 0110 0011 xxxx xxxx xxxx 0001 xxxx : */ - /* SHADDSUBX : cccc 0110 0011 xxxx xxxx xxxx 0011 xxxx : */ - /* SHSUBADDX : cccc 0110 0011 xxxx xxxx xxxx 0101 xxxx : */ - /* SHSUB16 : cccc 0110 0011 xxxx xxxx xxxx 0111 xxxx : */ - /* SHADD8 : cccc 0110 0011 xxxx xxxx xxxx 1001 xxxx : */ - /* SHSUB8 : cccc 0110 0011 xxxx xxxx xxxx 1111 xxxx : */ - /* UADD16 : cccc 0110 0101 xxxx xxxx xxxx 0001 xxxx :GE */ - /* UADDSUBX : cccc 0110 0101 xxxx xxxx xxxx 0011 xxxx :GE */ - /* USUBADDX : cccc 0110 0101 xxxx xxxx xxxx 0101 xxxx :GE */ - /* USUB16 : cccc 0110 0101 xxxx xxxx xxxx 0111 xxxx :GE */ - /* UADD8 : cccc 0110 0101 xxxx xxxx xxxx 1001 xxxx :GE */ - /* USUB8 : cccc 0110 0101 xxxx xxxx xxxx 1111 xxxx :GE */ - /* UQADD16 : cccc 0110 0110 xxxx xxxx xxxx 0001 xxxx : */ - /* UQADDSUBX : cccc 0110 0110 xxxx xxxx xxxx 0011 xxxx : */ - /* UQSUBADDX : cccc 0110 0110 xxxx xxxx xxxx 0101 xxxx : */ - /* UQSUB16 : cccc 0110 0110 xxxx xxxx xxxx 0111 xxxx : */ - /* UQADD8 : cccc 0110 0110 xxxx xxxx xxxx 1001 xxxx : */ - /* UQSUB8 : cccc 0110 0110 xxxx xxxx xxxx 1111 xxxx : */ - /* UHADD16 : cccc 0110 0111 xxxx xxxx xxxx 0001 xxxx : */ - /* UHADDSUBX : cccc 0110 0111 xxxx xxxx xxxx 0011 xxxx : */ - /* UHSUBADDX : cccc 0110 0111 xxxx xxxx xxxx 0101 xxxx : */ - /* UHSUB16 : cccc 0110 0111 xxxx xxxx xxxx 0111 xxxx : */ - /* UHADD8 : cccc 0110 0111 xxxx xxxx xxxx 1001 xxxx : */ - /* UHSUB8 : cccc 0110 0111 xxxx xxxx xxxx 1111 xxxx : */ - /* PKHBT : cccc 0110 1000 xxxx xxxx xxxx x001 xxxx : */ - /* PKHTB : cccc 0110 1000 xxxx xxxx xxxx x101 xxxx : */ - /* SXTAB16 : cccc 0110 1000 xxxx xxxx xxxx 0111 xxxx : */ - /* SXTB : cccc 0110 1010 xxxx xxxx xxxx 0111 xxxx : */ - /* SXTAB : cccc 0110 1010 xxxx xxxx xxxx 0111 xxxx : */ - /* SXTAH : cccc 0110 1011 xxxx xxxx xxxx 0111 xxxx : */ - /* UXTAB16 : cccc 0110 1100 xxxx xxxx xxxx 0111 xxxx : */ - /* UXTAB : cccc 0110 1110 xxxx xxxx xxxx 0111 xxxx : */ - /* UXTAH : cccc 0110 1111 xxxx xxxx xxxx 0111 xxxx : */ - return prep_emulate_rd12rn16rm0_wflags(insn, asi); -} - -static enum kprobe_insn __kprobes -space_cccc_0111__1(kprobe_opcode_t insn, struct arch_specific_insn *asi) -{ - /* Undef : cccc 0111 1111 xxxx xxxx xxxx 1111 xxxx */ - if ((insn & 0x0ff000f0) == 0x03f000f0) - return INSN_REJECTED; - - /* USADA8 : cccc 0111 1000 xxxx xxxx xxxx 0001 xxxx */ - /* USAD8 : cccc 0111 1000 xxxx 1111 xxxx 0001 xxxx */ - if ((insn & 0x0ff000f0) == 0x07800010) - return prep_emulate_rd16rn12rs8rm0_wflags(insn, asi); - - /* SMLALD : cccc 0111 0100 xxxx xxxx xxxx 00x1 xxxx */ - /* SMLSLD : cccc 0111 0100 xxxx xxxx xxxx 01x1 xxxx */ - if ((insn & 0x0ff00090) == 0x07400010) - return prep_emulate_rdhi16rdlo12rs8rm0_wflags(insn, asi); - - /* SMLAD : cccc 0111 0000 xxxx xxxx xxxx 00x1 xxxx :Q */ - /* SMLSD : cccc 0111 0000 xxxx xxxx xxxx 01x1 xxxx :Q */ - /* SMMLA : cccc 0111 0101 xxxx xxxx xxxx 00x1 xxxx : */ - /* SMMLS : cccc 0111 0101 xxxx xxxx xxxx 11x1 xxxx : */ - if ((insn & 0x0ff00090) == 0x07000010 || - (insn & 0x0ff000d0) == 0x07500010 || - (insn & 0x0ff000d0) == 0x075000d0) - return prep_emulate_rd16rn12rs8rm0_wflags(insn, asi); - - /* SMUSD : cccc 0111 0000 xxxx xxxx xxxx 01x1 xxxx : */ - /* SMUAD : cccc 0111 0000 xxxx 1111 xxxx 00x1 xxxx :Q */ - /* SMMUL : cccc 0111 0101 xxxx 1111 xxxx 00x1 xxxx : */ - return prep_emulate_rd16rs8rm0_wflags(insn, asi); -} - -static enum kprobe_insn __kprobes -space_cccc_01xx(kprobe_opcode_t insn, struct arch_specific_insn *asi) -{ - /* LDR : cccc 01xx x0x1 xxxx xxxx xxxx xxxx xxxx */ - /* LDRB : cccc 01xx x1x1 xxxx xxxx xxxx xxxx xxxx */ - /* LDRBT : cccc 01x0 x111 xxxx xxxx xxxx xxxx xxxx */ - /* LDRT : cccc 01x0 x011 xxxx xxxx xxxx xxxx xxxx */ - /* STR : cccc 01xx x0x0 xxxx xxxx xxxx xxxx xxxx */ - /* STRB : cccc 01xx x1x0 xxxx xxxx xxxx xxxx xxxx */ - /* STRBT : cccc 01x0 x110 xxxx xxxx xxxx xxxx xxxx */ - /* STRT : cccc 01x0 x010 xxxx xxxx xxxx xxxx xxxx */ - return prep_emulate_ldr_str(insn, asi); -} - -static enum kprobe_insn __kprobes -space_cccc_100x(kprobe_opcode_t insn, struct arch_specific_insn *asi) -{ - /* LDM(2) : cccc 100x x101 xxxx 0xxx xxxx xxxx xxxx */ - /* LDM(3) : cccc 100x x1x1 xxxx 1xxx xxxx xxxx xxxx */ - if ((insn & 0x0e708000) == 0x85000000 || - (insn & 0x0e508000) == 0x85010000) - return INSN_REJECTED; - - /* LDM(1) : cccc 100x x0x1 xxxx xxxx xxxx xxxx xxxx */ - /* STM(1) : cccc 100x x0x0 xxxx xxxx xxxx xxxx xxxx */ - asi->insn[0] = truecc_insn(insn); - asi->insn_handler = ((insn & 0x108000) == 0x008000) ? /* STM & R15 */ - simulate_stm1_pc : simulate_ldm1stm1; - return INSN_GOOD; -} - -static enum kprobe_insn __kprobes -space_cccc_101x(kprobe_opcode_t insn, struct arch_specific_insn *asi) -{ - /* B : cccc 1010 xxxx xxxx xxxx xxxx xxxx xxxx */ - /* BL : cccc 1011 xxxx xxxx xxxx xxxx xxxx xxxx */ - asi->insn[0] = truecc_insn(insn); - asi->insn_handler = simulate_bbl; - return INSN_GOOD; -} - -static enum kprobe_insn __kprobes -space_cccc_1100_010x(kprobe_opcode_t insn, struct arch_specific_insn *asi) -{ - /* MCRR : cccc 1100 0100 xxxx xxxx xxxx xxxx xxxx : (Rd!=Rn) */ - /* MRRC : cccc 1100 0101 xxxx xxxx xxxx xxxx xxxx : (Rd!=Rn) */ - insn &= 0xfff00fff; - insn |= 0x00001000; /* Rn = r0, Rd = r1 */ - asi->insn[0] = insn; - asi->insn_handler = (insn & (1 << 20)) ? emulate_mrrc : emulate_mcrr; - return INSN_GOOD; -} - -static enum kprobe_insn __kprobes -space_cccc_110x(kprobe_opcode_t insn, struct arch_specific_insn *asi) -{ - /* LDC : cccc 110x xxx1 xxxx xxxx xxxx xxxx xxxx */ - /* STC : cccc 110x xxx0 xxxx xxxx xxxx xxxx xxxx */ - insn &= 0xfff0ffff; /* Rn = r0 */ - asi->insn[0] = insn; - asi->insn_handler = emulate_ldcstc; - return INSN_GOOD; -} - -static enum kprobe_insn __kprobes -space_cccc_111x(kprobe_opcode_t insn, struct arch_specific_insn *asi) -{ - /* BKPT : 1110 0001 0010 xxxx xxxx xxxx 0111 xxxx */ - /* SWI : cccc 1111 xxxx xxxx xxxx xxxx xxxx xxxx */ - if ((insn & 0xfff000f0) == 0xe1200070 || - (insn & 0x0f000000) == 0x0f000000) - return INSN_REJECTED; - - /* CDP : cccc 1110 xxxx xxxx xxxx xxxx xxx0 xxxx */ - if ((insn & 0x0f000010) == 0x0e000000) { - asi->insn[0] = insn; - asi->insn_handler = emulate_none; - return INSN_GOOD; - } - - /* MCR : cccc 1110 xxx0 xxxx xxxx xxxx xxx1 xxxx */ - /* MRC : cccc 1110 xxx1 xxxx xxxx xxxx xxx1 xxxx */ - insn &= 0xffff0fff; /* Rd = r0 */ - asi->insn[0] = insn; - asi->insn_handler = (insn & (1 << 20)) ? emulate_rd12 : emulate_ird12; - return INSN_GOOD; -} - -/* Return: - * INSN_REJECTED If instruction is one not allowed to kprobe, - * INSN_GOOD If instruction is supported and uses instruction slot, - * INSN_GOOD_NO_SLOT If instruction is supported but doesn't use its slot. - * - * For instructions we don't want to kprobe (INSN_REJECTED return result): - * These are generally ones that modify the processor state making - * them "hard" to simulate such as switches processor modes or - * make accesses in alternate modes. Any of these could be simulated - * if the work was put into it, but low return considering they - * should also be very rare. - */ -enum kprobe_insn __kprobes -arm_kprobe_decode_insn(kprobe_opcode_t insn, struct arch_specific_insn *asi) -{ - asi->insn[1] = KPROBE_RETURN_INSTRUCTION; - - if ((insn & 0xf0000000) == 0xf0000000) { - - return space_1111(insn, asi); - - } else if ((insn & 0x0e000000) == 0x00000000) { - - return space_cccc_000x(insn, asi); - - } else if ((insn & 0x0e000000) == 0x02000000) { - - return space_cccc_001x(insn, asi); - - } else if ((insn & 0x0f000010) == 0x06000010) { - - return space_cccc_0110__1(insn, asi); - - } else if ((insn & 0x0f000010) == 0x07000010) { - - return space_cccc_0111__1(insn, asi); - - } else if ((insn & 0x0c000000) == 0x04000000) { - - return space_cccc_01xx(insn, asi); - - } else if ((insn & 0x0e000000) == 0x08000000) { - - return space_cccc_100x(insn, asi); - - } else if ((insn & 0x0e000000) == 0x0a000000) { - - return space_cccc_101x(insn, asi); - - } else if ((insn & 0x0fe00000) == 0x0c400000) { - - return space_cccc_1100_010x(insn, asi); - - } else if ((insn & 0x0e000000) == 0x0c400000) { - - return space_cccc_110x(insn, asi); - - } - - return space_cccc_111x(insn, asi); -} - -void __init arm_kprobe_decode_init(void) -{ - find_str_pc_offset(); -} - - -/* - * All ARM instructions listed below. - * - * Instructions and their general purpose registers are given. - * If a particular register may not use R15, it is prefixed with a "!". - * If marked with a "*" means the value returned by reading R15 - * is implementation defined. - * - * ADC/ADD/AND/BIC/CMN/CMP/EOR/MOV/MVN/ORR/RSB/RSC/SBC/SUB/TEQ - * TST: Rd, Rn, Rm, !Rs - * BX: Rm - * BLX(2): !Rm - * BX: Rm (R15 legal, but discouraged) - * BXJ: !Rm, - * CLZ: !Rd, !Rm - * CPY: Rd, Rm - * LDC/2,STC/2 immediate offset & unindex: Rn - * LDC/2,STC/2 immediate pre/post-indexed: !Rn - * LDM(1/3): !Rn, register_list - * LDM(2): !Rn, !register_list - * LDR,STR,PLD immediate offset: Rd, Rn - * LDR,STR,PLD register offset: Rd, Rn, !Rm - * LDR,STR,PLD scaled register offset: Rd, !Rn, !Rm - * LDR,STR immediate pre/post-indexed: Rd, !Rn - * LDR,STR register pre/post-indexed: Rd, !Rn, !Rm - * LDR,STR scaled register pre/post-indexed: Rd, !Rn, !Rm - * LDRB,STRB immediate offset: !Rd, Rn - * LDRB,STRB register offset: !Rd, Rn, !Rm - * LDRB,STRB scaled register offset: !Rd, !Rn, !Rm - * LDRB,STRB immediate pre/post-indexed: !Rd, !Rn - * LDRB,STRB register pre/post-indexed: !Rd, !Rn, !Rm - * LDRB,STRB scaled register pre/post-indexed: !Rd, !Rn, !Rm - * LDRT,LDRBT,STRBT immediate pre/post-indexed: !Rd, !Rn - * LDRT,LDRBT,STRBT register pre/post-indexed: !Rd, !Rn, !Rm - * LDRT,LDRBT,STRBT scaled register pre/post-indexed: !Rd, !Rn, !Rm - * LDRH/SH/SB/D,STRH/SH/SB/D immediate offset: !Rd, Rn - * LDRH/SH/SB/D,STRH/SH/SB/D register offset: !Rd, Rn, !Rm - * LDRH/SH/SB/D,STRH/SH/SB/D immediate pre/post-indexed: !Rd, !Rn - * LDRH/SH/SB/D,STRH/SH/SB/D register pre/post-indexed: !Rd, !Rn, !Rm - * LDREX: !Rd, !Rn - * MCR/2: !Rd - * MCRR/2,MRRC/2: !Rd, !Rn - * MLA: !Rd, !Rn, !Rm, !Rs - * MOV: Rd - * MRC/2: !Rd (if Rd==15, only changes cond codes, not the register) - * MRS,MSR: !Rd - * MUL: !Rd, !Rm, !Rs - * PKH{BT,TB}: !Rd, !Rn, !Rm - * QDADD,[U]QADD/16/8/SUBX: !Rd, !Rm, !Rn - * QDSUB,[U]QSUB/16/8/ADDX: !Rd, !Rm, !Rn - * REV/16/SH: !Rd, !Rm - * RFE: !Rn - * {S,U}[H]ADD{16,8,SUBX},{S,U}[H]SUB{16,8,ADDX}: !Rd, !Rn, !Rm - * SEL: !Rd, !Rn, !Rm - * SMLA<x><y>,SMLA{D,W<y>},SMLSD,SMML{A,S}: !Rd, !Rn, !Rm, !Rs - * SMLAL<x><y>,SMLA{D,LD},SMLSLD,SMMULL,SMULW<y>: !RdHi, !RdLo, !Rm, !Rs - * SMMUL,SMUAD,SMUL<x><y>,SMUSD: !Rd, !Rm, !Rs - * SSAT/16: !Rd, !Rm - * STM(1/2): !Rn, register_list* (R15 in reg list not recommended) - * STRT immediate pre/post-indexed: Rd*, !Rn - * STRT register pre/post-indexed: Rd*, !Rn, !Rm - * STRT scaled register pre/post-indexed: Rd*, !Rn, !Rm - * STREX: !Rd, !Rn, !Rm - * SWP/B: !Rd, !Rn, !Rm - * {S,U}XTA{B,B16,H}: !Rd, !Rn, !Rm - * {S,U}XT{B,B16,H}: !Rd, !Rm - * UM{AA,LA,UL}L: !RdHi, !RdLo, !Rm, !Rs - * USA{D8,A8,T,T16}: !Rd, !Rm, !Rs - * - * May transfer control by writing R15 (possible mode changes or alternate - * mode accesses marked by "*"): - * ALU op (* with s-bit), B, BL, BKPT, BLX(1/2), BX, BXJ, CPS*, CPY, - * LDM(1), LDM(2/3)*, LDR, MOV, RFE*, SWI* - * - * Instructions that do not take general registers, nor transfer control: - * CDP/2, SETEND, SRS* - */ diff --git a/arch/arm/kernel/kprobes-test-arm.c b/arch/arm/kernel/kprobes-test-arm.c new file mode 100644 index 00000000000..cb1424240ff --- /dev/null +++ b/arch/arm/kernel/kprobes-test-arm.c @@ -0,0 +1,1346 @@ +/* + * arch/arm/kernel/kprobes-test-arm.c + * + * Copyright (C) 2011 Jon Medhurst <tixy@yxit.co.uk>. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <asm/system_info.h> +#include <asm/opcodes.h> + +#include "kprobes-test.h" + + +#define TEST_ISA "32" + +#define TEST_ARM_TO_THUMB_INTERWORK_R(code1, reg, val, code2) \ + TESTCASE_START(code1 #reg code2) \ + TEST_ARG_REG(reg, val) \ + TEST_ARG_REG(14, 99f) \ + TEST_ARG_END("") \ + "50: nop \n\t" \ + "1: "code1 #reg code2" \n\t" \ + " bx lr \n\t" \ + ".thumb \n\t" \ + "3: adr lr, 2f \n\t" \ + " bx lr \n\t" \ + ".arm \n\t" \ + "2: nop \n\t" \ + TESTCASE_END + +#define TEST_ARM_TO_THUMB_INTERWORK_P(code1, reg, val, code2) \ + TESTCASE_START(code1 #reg code2) \ + TEST_ARG_PTR(reg, val) \ + TEST_ARG_REG(14, 99f) \ + TEST_ARG_MEM(15, 3f+1) \ + TEST_ARG_END("") \ + "50: nop \n\t" \ + "1: "code1 #reg code2" \n\t" \ + " bx lr \n\t" \ + ".thumb \n\t" \ + "3: adr lr, 2f \n\t" \ + " bx lr \n\t" \ + ".arm \n\t" \ + "2: nop \n\t" \ + TESTCASE_END + + +void kprobe_arm_test_cases(void) +{ + kprobe_test_flags = 0; + + TEST_GROUP("Data-processing (register), (register-shifted register), (immediate)") + +#define _DATA_PROCESSING_DNM(op,s,val) \ + TEST_RR( op "eq" s " r0, r",1, VAL1,", r",2, val, "") \ + TEST_RR( op "ne" s " r1, r",1, VAL1,", r",2, val, ", lsl #3") \ + TEST_RR( op "cs" s " r2, r",3, VAL1,", r",2, val, ", lsr #4") \ + TEST_RR( op "cc" s " r3, r",3, VAL1,", r",2, val, ", asr #5") \ + TEST_RR( op "mi" s " r4, r",5, VAL1,", r",2, N(val),", asr #6") \ + TEST_RR( op "pl" s " r5, r",5, VAL1,", r",2, val, ", ror #7") \ + TEST_RR( op "vs" s " r6, r",7, VAL1,", r",2, val, ", rrx") \ + TEST_R( op "vc" s " r6, r",7, VAL1,", pc, lsl #3") \ + TEST_R( op "vc" s " r6, r",7, VAL1,", sp, lsr #4") \ + TEST_R( op "vc" s " r6, pc, r",7, VAL1,", asr #5") \ + TEST_R( op "vc" s " r6, sp, r",7, VAL1,", ror #6") \ + TEST_RRR( op "hi" s " r8, r",9, VAL1,", r",14,val, ", lsl r",0, 3,"")\ + TEST_RRR( op "ls" s " r9, r",9, VAL1,", r",14,val, ", lsr r",7, 4,"")\ + TEST_RRR( op "ge" s " r10, r",11,VAL1,", r",14,val, ", asr r",7, 5,"")\ + TEST_RRR( op "lt" s " r11, r",11,VAL1,", r",14,N(val),", asr r",7, 6,"")\ + TEST_RR( op "gt" s " r12, r13" ", r",14,val, ", ror r",14,7,"")\ + TEST_RR( op "le" s " r14, r",0, val, ", r13" ", lsl r",14,8,"")\ + TEST_R( op "eq" s " r0, r",11,VAL1,", #0xf5") \ + TEST_R( op "ne" s " r11, r",0, VAL1,", #0xf5000000") \ + TEST_R( op s " r7, r",8, VAL2,", #0x000af000") \ + TEST( op s " r4, pc" ", #0x00005a00") + +#define DATA_PROCESSING_DNM(op,val) \ + _DATA_PROCESSING_DNM(op,"",val) \ + _DATA_PROCESSING_DNM(op,"s",val) + +#define DATA_PROCESSING_NM(op,val) \ + TEST_RR( op "ne r",1, VAL1,", r",2, val, "") \ + TEST_RR( op "eq r",1, VAL1,", r",2, val, ", lsl #3") \ + TEST_RR( op "cc r",3, VAL1,", r",2, val, ", lsr #4") \ + TEST_RR( op "cs r",3, VAL1,", r",2, val, ", asr #5") \ + TEST_RR( op "pl r",5, VAL1,", r",2, N(val),", asr #6") \ + TEST_RR( op "mi r",5, VAL1,", r",2, val, ", ror #7") \ + TEST_RR( op "vc r",7, VAL1,", r",2, val, ", rrx") \ + TEST_R ( op "vs r",7, VAL1,", pc, lsl #3") \ + TEST_R ( op "vs r",7, VAL1,", sp, lsr #4") \ + TEST_R( op "vs pc, r",7, VAL1,", asr #5") \ + TEST_R( op "vs sp, r",7, VAL1,", ror #6") \ + TEST_RRR( op "ls r",9, VAL1,", r",14,val, ", lsl r",0, 3,"") \ + TEST_RRR( op "hi r",9, VAL1,", r",14,val, ", lsr r",7, 4,"") \ + TEST_RRR( op "lt r",11,VAL1,", r",14,val, ", asr r",7, 5,"") \ + TEST_RRR( op "ge r",11,VAL1,", r",14,N(val),", asr r",7, 6,"") \ + TEST_RR( op "le r13" ", r",14,val, ", ror r",14,7,"") \ + TEST_RR( op "gt r",0, val, ", r13" ", lsl r",14,8,"") \ + TEST_R( op "eq r",11,VAL1,", #0xf5") \ + TEST_R( op "ne r",0, VAL1,", #0xf5000000") \ + TEST_R( op " r",8, VAL2,", #0x000af000") + +#define _DATA_PROCESSING_DM(op,s,val) \ + TEST_R( op "eq" s " r0, r",1, val, "") \ + TEST_R( op "ne" s " r1, r",1, val, ", lsl #3") \ + TEST_R( op "cs" s " r2, r",3, val, ", lsr #4") \ + TEST_R( op "cc" s " r3, r",3, val, ", asr #5") \ + TEST_R( op "mi" s " r4, r",5, N(val),", asr #6") \ + TEST_R( op "pl" s " r5, r",5, val, ", ror #7") \ + TEST_R( op "vs" s " r6, r",10,val, ", rrx") \ + TEST( op "vs" s " r7, pc, lsl #3") \ + TEST( op "vs" s " r7, sp, lsr #4") \ + TEST_RR( op "vc" s " r8, r",7, val, ", lsl r",0, 3,"") \ + TEST_RR( op "hi" s " r9, r",9, val, ", lsr r",7, 4,"") \ + TEST_RR( op "ls" s " r10, r",9, val, ", asr r",7, 5,"") \ + TEST_RR( op "ge" s " r11, r",11,N(val),", asr r",7, 6,"") \ + TEST_RR( op "lt" s " r12, r",11,val, ", ror r",14,7,"") \ + TEST_R( op "gt" s " r14, r13" ", lsl r",14,8,"") \ + TEST( op "eq" s " r0, #0xf5") \ + TEST( op "ne" s " r11, #0xf5000000") \ + TEST( op s " r7, #0x000af000") \ + TEST( op s " r4, #0x00005a00") + +#define DATA_PROCESSING_DM(op,val) \ + _DATA_PROCESSING_DM(op,"",val) \ + _DATA_PROCESSING_DM(op,"s",val) + + DATA_PROCESSING_DNM("and",0xf00f00ff) + DATA_PROCESSING_DNM("eor",0xf00f00ff) + DATA_PROCESSING_DNM("sub",VAL2) + DATA_PROCESSING_DNM("rsb",VAL2) + DATA_PROCESSING_DNM("add",VAL2) + DATA_PROCESSING_DNM("adc",VAL2) + DATA_PROCESSING_DNM("sbc",VAL2) + DATA_PROCESSING_DNM("rsc",VAL2) + DATA_PROCESSING_NM("tst",0xf00f00ff) + DATA_PROCESSING_NM("teq",0xf00f00ff) + DATA_PROCESSING_NM("cmp",VAL2) + DATA_PROCESSING_NM("cmn",VAL2) + DATA_PROCESSING_DNM("orr",0xf00f00ff) + DATA_PROCESSING_DM("mov",VAL2) + DATA_PROCESSING_DNM("bic",0xf00f00ff) + DATA_PROCESSING_DM("mvn",VAL2) + + TEST("mov ip, sp") /* This has special case emulation code */ + + TEST_SUPPORTED("mov pc, #0x1000"); + TEST_SUPPORTED("mov sp, #0x1000"); + TEST_SUPPORTED("cmp pc, #0x1000"); + TEST_SUPPORTED("cmp sp, #0x1000"); + + /* Data-processing with PC and a shift count in a register */ + TEST_UNSUPPORTED(__inst_arm(0xe15c0f1e) " @ cmp r12, r14, asl pc") + TEST_UNSUPPORTED(__inst_arm(0xe1a0cf1e) " @ mov r12, r14, asl pc") + TEST_UNSUPPORTED(__inst_arm(0xe08caf1e) " @ add r10, r12, r14, asl pc") + TEST_UNSUPPORTED(__inst_arm(0xe151021f) " @ cmp r1, pc, lsl r2") + TEST_UNSUPPORTED(__inst_arm(0xe17f0211) " @ cmn pc, r1, lsl r2") + TEST_UNSUPPORTED(__inst_arm(0xe1a0121f) " @ mov r1, pc, lsl r2") + TEST_UNSUPPORTED(__inst_arm(0xe1a0f211) " @ mov pc, r1, lsl r2") + TEST_UNSUPPORTED(__inst_arm(0xe042131f) " @ sub r1, r2, pc, lsl r3") + TEST_UNSUPPORTED(__inst_arm(0xe1cf1312) " @ bic r1, pc, r2, lsl r3") + TEST_UNSUPPORTED(__inst_arm(0xe081f312) " @ add pc, r1, r2, lsl r3") + + /* Data-processing with PC as a target and status registers updated */ + TEST_UNSUPPORTED("movs pc, r1") + TEST_UNSUPPORTED("movs pc, r1, lsl r2") + TEST_UNSUPPORTED("movs pc, #0x10000") + TEST_UNSUPPORTED("adds pc, lr, r1") + TEST_UNSUPPORTED("adds pc, lr, r1, lsl r2") + TEST_UNSUPPORTED("adds pc, lr, #4") + + /* Data-processing with SP as target */ + TEST("add sp, sp, #16") + TEST("sub sp, sp, #8") + TEST("bic sp, sp, #0x20") + TEST("orr sp, sp, #0x20") + TEST_PR( "add sp, r",10,0,", r",11,4,"") + TEST_PRR("add sp, r",10,0,", r",11,4,", asl r",12,1,"") + TEST_P( "mov sp, r",10,0,"") + TEST_PR( "mov sp, r",10,0,", asl r",12,0,"") + + /* Data-processing with PC as target */ + TEST_BF( "add pc, pc, #2f-1b-8") + TEST_BF_R ("add pc, pc, r",14,2f-1f-8,"") + TEST_BF_R ("add pc, r",14,2f-1f-8,", pc") + TEST_BF_R ("mov pc, r",0,2f,"") + TEST_BF_R ("add pc, pc, r",14,(2f-1f-8)*2,", asr #1") + TEST_BB( "sub pc, pc, #1b-2b+8") +#if __LINUX_ARM_ARCH__ == 6 && !defined(CONFIG_CPU_V7) + TEST_BB( "sub pc, pc, #1b-2b+8-2") /* UNPREDICTABLE before and after ARMv6 */ +#endif + TEST_BB_R( "sub pc, pc, r",14, 1f-2f+8,"") + TEST_BB_R( "rsb pc, r",14,1f-2f+8,", pc") + TEST_R( "add pc, pc, r",10,-2,", asl #1") +#ifdef CONFIG_THUMB2_KERNEL + TEST_ARM_TO_THUMB_INTERWORK_R("add pc, pc, r",0,3f-1f-8+1,"") + TEST_ARM_TO_THUMB_INTERWORK_R("sub pc, r",0,3f+8+1,", #8") +#endif + TEST_GROUP("Miscellaneous instructions") + + TEST("mrs r0, cpsr") + TEST("mrspl r7, cpsr") + TEST("mrs r14, cpsr") + TEST_UNSUPPORTED(__inst_arm(0xe10ff000) " @ mrs r15, cpsr") + TEST_UNSUPPORTED("mrs r0, spsr") + TEST_UNSUPPORTED("mrs lr, spsr") + + TEST_UNSUPPORTED("msr cpsr, r0") + TEST_UNSUPPORTED("msr cpsr_f, lr") + TEST_UNSUPPORTED("msr spsr, r0") + + TEST_BF_R("bx r",0,2f,"") + TEST_BB_R("bx r",7,2f,"") + TEST_BF_R("bxeq r",14,2f,"") + +#if __LINUX_ARM_ARCH__ >= 5 + TEST_R("clz r0, r",0, 0x0,"") + TEST_R("clzeq r7, r",14,0x1,"") + TEST_R("clz lr, r",7, 0xffffffff,"") + TEST( "clz r4, sp") + TEST_UNSUPPORTED(__inst_arm(0x016fff10) " @ clz pc, r0") + TEST_UNSUPPORTED(__inst_arm(0x016f0f1f) " @ clz r0, pc") + +#if __LINUX_ARM_ARCH__ >= 6 + TEST_UNSUPPORTED("bxj r0") +#endif + + TEST_BF_R("blx r",0,2f,"") + TEST_BB_R("blx r",7,2f,"") + TEST_BF_R("blxeq r",14,2f,"") + TEST_UNSUPPORTED(__inst_arm(0x0120003f) " @ blx pc") + + TEST_RR( "qadd r0, r",1, VAL1,", r",2, VAL2,"") + TEST_RR( "qaddvs lr, r",9, VAL2,", r",8, VAL1,"") + TEST_R( "qadd lr, r",9, VAL2,", r13") + TEST_RR( "qsub r0, r",1, VAL1,", r",2, VAL2,"") + TEST_RR( "qsubvs lr, r",9, VAL2,", r",8, VAL1,"") + TEST_R( "qsub lr, r",9, VAL2,", r13") + TEST_RR( "qdadd r0, r",1, VAL1,", r",2, VAL2,"") + TEST_RR( "qdaddvs lr, r",9, VAL2,", r",8, VAL1,"") + TEST_R( "qdadd lr, r",9, VAL2,", r13") + TEST_RR( "qdsub r0, r",1, VAL1,", r",2, VAL2,"") + TEST_RR( "qdsubvs lr, r",9, VAL2,", r",8, VAL1,"") + TEST_R( "qdsub lr, r",9, VAL2,", r13") + TEST_UNSUPPORTED(__inst_arm(0xe101f050) " @ qadd pc, r0, r1") + TEST_UNSUPPORTED(__inst_arm(0xe121f050) " @ qsub pc, r0, r1") + TEST_UNSUPPORTED(__inst_arm(0xe141f050) " @ qdadd pc, r0, r1") + TEST_UNSUPPORTED(__inst_arm(0xe161f050) " @ qdsub pc, r0, r1") + TEST_UNSUPPORTED(__inst_arm(0xe16f2050) " @ qdsub r2, r0, pc") + TEST_UNSUPPORTED(__inst_arm(0xe161205f) " @ qdsub r2, pc, r1") + + TEST_UNSUPPORTED("bkpt 0xffff") + TEST_UNSUPPORTED("bkpt 0x0000") + + TEST_UNSUPPORTED(__inst_arm(0xe1600070) " @ smc #0") + + TEST_GROUP("Halfword multiply and multiply-accumulate") + + TEST_RRR( "smlabb r0, r",1, VAL1,", r",2, VAL2,", r",3, VAL3,"") + TEST_RRR( "smlabbge r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"") + TEST_RR( "smlabb lr, r",1, VAL2,", r",2, VAL3,", r13") + TEST_UNSUPPORTED(__inst_arm(0xe10f3281) " @ smlabb pc, r1, r2, r3") + TEST_RRR( "smlatb r0, r",1, VAL1,", r",2, VAL2,", r",3, VAL3,"") + TEST_RRR( "smlatbge r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"") + TEST_RR( "smlatb lr, r",1, VAL2,", r",2, VAL3,", r13") + TEST_UNSUPPORTED(__inst_arm(0xe10f32a1) " @ smlatb pc, r1, r2, r3") + TEST_RRR( "smlabt r0, r",1, VAL1,", r",2, VAL2,", r",3, VAL3,"") + TEST_RRR( "smlabtge r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"") + TEST_RR( "smlabt lr, r",1, VAL2,", r",2, VAL3,", r13") + TEST_UNSUPPORTED(__inst_arm(0xe10f32c1) " @ smlabt pc, r1, r2, r3") + TEST_RRR( "smlatt r0, r",1, VAL1,", r",2, VAL2,", r",3, VAL3,"") + TEST_RRR( "smlattge r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"") + TEST_RR( "smlatt lr, r",1, VAL2,", r",2, VAL3,", r13") + TEST_UNSUPPORTED(__inst_arm(0xe10f32e1) " @ smlatt pc, r1, r2, r3") + + TEST_RRR( "smlawb r0, r",1, VAL1,", r",2, VAL2,", r",3, VAL3,"") + TEST_RRR( "smlawbge r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"") + TEST_RR( "smlawb lr, r",1, VAL2,", r",2, VAL3,", r13") + TEST_UNSUPPORTED(__inst_arm(0xe12f3281) " @ smlawb pc, r1, r2, r3") + TEST_RRR( "smlawt r0, r",1, VAL1,", r",2, VAL2,", r",3, VAL3,"") + TEST_RRR( "smlawtge r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"") + TEST_RR( "smlawt lr, r",1, VAL2,", r",2, VAL3,", r13") + TEST_UNSUPPORTED(__inst_arm(0xe12f32c1) " @ smlawt pc, r1, r2, r3") + TEST_UNSUPPORTED(__inst_arm(0xe12032cf) " @ smlawt r0, pc, r2, r3") + TEST_UNSUPPORTED(__inst_arm(0xe1203fc1) " @ smlawt r0, r1, pc, r3") + TEST_UNSUPPORTED(__inst_arm(0xe120f2c1) " @ smlawt r0, r1, r2, pc") + + TEST_RR( "smulwb r0, r",1, VAL1,", r",2, VAL2,"") + TEST_RR( "smulwbge r7, r",8, VAL3,", r",9, VAL1,"") + TEST_R( "smulwb lr, r",1, VAL2,", r13") + TEST_UNSUPPORTED(__inst_arm(0xe12f02a1) " @ smulwb pc, r1, r2") + TEST_RR( "smulwt r0, r",1, VAL1,", r",2, VAL2,"") + TEST_RR( "smulwtge r7, r",8, VAL3,", r",9, VAL1,"") + TEST_R( "smulwt lr, r",1, VAL2,", r13") + TEST_UNSUPPORTED(__inst_arm(0xe12f02e1) " @ smulwt pc, r1, r2") + + TEST_RRRR( "smlalbb r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4) + TEST_RRRR( "smlalbble r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3) + TEST_RRR( "smlalbb r",14,VAL3,", r",7, VAL4,", r",5, VAL1,", r13") + TEST_UNSUPPORTED(__inst_arm(0xe14f1382) " @ smlalbb pc, r1, r2, r3") + TEST_UNSUPPORTED(__inst_arm(0xe141f382) " @ smlalbb r1, pc, r2, r3") + TEST_RRRR( "smlaltb r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4) + TEST_RRRR( "smlaltble r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3) + TEST_RRR( "smlaltb r",14,VAL3,", r",7, VAL4,", r",5, VAL1,", r13") + TEST_UNSUPPORTED(__inst_arm(0xe14f13a2) " @ smlaltb pc, r1, r2, r3") + TEST_UNSUPPORTED(__inst_arm(0xe141f3a2) " @ smlaltb r1, pc, r2, r3") + TEST_RRRR( "smlalbt r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4) + TEST_RRRR( "smlalbtle r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3) + TEST_RRR( "smlalbt r",14,VAL3,", r",7, VAL4,", r",5, VAL1,", r13") + TEST_UNSUPPORTED(__inst_arm(0xe14f13c2) " @ smlalbt pc, r1, r2, r3") + TEST_UNSUPPORTED(__inst_arm(0xe141f3c2) " @ smlalbt r1, pc, r2, r3") + TEST_RRRR( "smlaltt r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4) + TEST_RRRR( "smlalttle r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3) + TEST_RRR( "smlaltt r",14,VAL3,", r",7, VAL4,", r",5, VAL1,", r13") + TEST_UNSUPPORTED(__inst_arm(0xe14f13e2) " @ smlalbb pc, r1, r2, r3") + TEST_UNSUPPORTED(__inst_arm(0xe140f3e2) " @ smlalbb r0, pc, r2, r3") + TEST_UNSUPPORTED(__inst_arm(0xe14013ef) " @ smlalbb r0, r1, pc, r3") + TEST_UNSUPPORTED(__inst_arm(0xe1401fe2) " @ smlalbb r0, r1, r2, pc") + + TEST_RR( "smulbb r0, r",1, VAL1,", r",2, VAL2,"") + TEST_RR( "smulbbge r7, r",8, VAL3,", r",9, VAL1,"") + TEST_R( "smulbb lr, r",1, VAL2,", r13") + TEST_UNSUPPORTED(__inst_arm(0xe16f0281) " @ smulbb pc, r1, r2") + TEST_RR( "smultb r0, r",1, VAL1,", r",2, VAL2,"") + TEST_RR( "smultbge r7, r",8, VAL3,", r",9, VAL1,"") + TEST_R( "smultb lr, r",1, VAL2,", r13") + TEST_UNSUPPORTED(__inst_arm(0xe16f02a1) " @ smultb pc, r1, r2") + TEST_RR( "smulbt r0, r",1, VAL1,", r",2, VAL2,"") + TEST_RR( "smulbtge r7, r",8, VAL3,", r",9, VAL1,"") + TEST_R( "smulbt lr, r",1, VAL2,", r13") + TEST_UNSUPPORTED(__inst_arm(0xe16f02c1) " @ smultb pc, r1, r2") + TEST_RR( "smultt r0, r",1, VAL1,", r",2, VAL2,"") + TEST_RR( "smulttge r7, r",8, VAL3,", r",9, VAL1,"") + TEST_R( "smultt lr, r",1, VAL2,", r13") + TEST_UNSUPPORTED(__inst_arm(0xe16f02e1) " @ smultt pc, r1, r2") + TEST_UNSUPPORTED(__inst_arm(0xe16002ef) " @ smultt r0, pc, r2") + TEST_UNSUPPORTED(__inst_arm(0xe1600fe1) " @ smultt r0, r1, pc") +#endif + + TEST_GROUP("Multiply and multiply-accumulate") + + TEST_RR( "mul r0, r",1, VAL1,", r",2, VAL2,"") + TEST_RR( "mulls r7, r",8, VAL2,", r",9, VAL2,"") + TEST_R( "mul lr, r",4, VAL3,", r13") + TEST_UNSUPPORTED(__inst_arm(0xe00f0291) " @ mul pc, r1, r2") + TEST_UNSUPPORTED(__inst_arm(0xe000029f) " @ mul r0, pc, r2") + TEST_UNSUPPORTED(__inst_arm(0xe0000f91) " @ mul r0, r1, pc") + TEST_RR( "muls r0, r",1, VAL1,", r",2, VAL2,"") + TEST_RR( "mullss r7, r",8, VAL2,", r",9, VAL2,"") + TEST_R( "muls lr, r",4, VAL3,", r13") + TEST_UNSUPPORTED(__inst_arm(0xe01f0291) " @ muls pc, r1, r2") + + TEST_RRR( "mla r0, r",1, VAL1,", r",2, VAL2,", r",3, VAL3,"") + TEST_RRR( "mlahi r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"") + TEST_RR( "mla lr, r",1, VAL2,", r",2, VAL3,", r13") + TEST_UNSUPPORTED(__inst_arm(0xe02f3291) " @ mla pc, r1, r2, r3") + TEST_RRR( "mlas r0, r",1, VAL1,", r",2, VAL2,", r",3, VAL3,"") + TEST_RRR( "mlahis r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"") + TEST_RR( "mlas lr, r",1, VAL2,", r",2, VAL3,", r13") + TEST_UNSUPPORTED(__inst_arm(0xe03f3291) " @ mlas pc, r1, r2, r3") + +#if __LINUX_ARM_ARCH__ >= 6 + TEST_RR( "umaal r0, r1, r",2, VAL1,", r",3, VAL2,"") + TEST_RR( "umaalls r7, r8, r",9, VAL2,", r",10, VAL1,"") + TEST_R( "umaal lr, r12, r",11,VAL3,", r13") + TEST_UNSUPPORTED(__inst_arm(0xe041f392) " @ umaal pc, r1, r2, r3") + TEST_UNSUPPORTED(__inst_arm(0xe04f0392) " @ umaal r0, pc, r2, r3") + TEST_UNSUPPORTED(__inst_arm(0xe0500090) " @ undef") + TEST_UNSUPPORTED(__inst_arm(0xe05fff9f) " @ undef") +#endif + +#if __LINUX_ARM_ARCH__ >= 7 + TEST_RRR( "mls r0, r",1, VAL1,", r",2, VAL2,", r",3, VAL3,"") + TEST_RRR( "mlshi r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"") + TEST_RR( "mls lr, r",1, VAL2,", r",2, VAL3,", r13") + TEST_UNSUPPORTED(__inst_arm(0xe06f3291) " @ mls pc, r1, r2, r3") + TEST_UNSUPPORTED(__inst_arm(0xe060329f) " @ mls r0, pc, r2, r3") + TEST_UNSUPPORTED(__inst_arm(0xe0603f91) " @ mls r0, r1, pc, r3") + TEST_UNSUPPORTED(__inst_arm(0xe060f291) " @ mls r0, r1, r2, pc") +#endif + + TEST_UNSUPPORTED(__inst_arm(0xe0700090) " @ undef") + TEST_UNSUPPORTED(__inst_arm(0xe07fff9f) " @ undef") + + TEST_RR( "umull r0, r1, r",2, VAL1,", r",3, VAL2,"") + TEST_RR( "umullls r7, r8, r",9, VAL2,", r",10, VAL1,"") + TEST_R( "umull lr, r12, r",11,VAL3,", r13") + TEST_UNSUPPORTED(__inst_arm(0xe081f392) " @ umull pc, r1, r2, r3") + TEST_UNSUPPORTED(__inst_arm(0xe08f1392) " @ umull r1, pc, r2, r3") + TEST_RR( "umulls r0, r1, r",2, VAL1,", r",3, VAL2,"") + TEST_RR( "umulllss r7, r8, r",9, VAL2,", r",10, VAL1,"") + TEST_R( "umulls lr, r12, r",11,VAL3,", r13") + TEST_UNSUPPORTED(__inst_arm(0xe091f392) " @ umulls pc, r1, r2, r3") + TEST_UNSUPPORTED(__inst_arm(0xe09f1392) " @ umulls r1, pc, r2, r3") + + TEST_RRRR( "umlal r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4) + TEST_RRRR( "umlalle r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3) + TEST_RRR( "umlal r",14,VAL3,", r",7, VAL4,", r",5, VAL1,", r13") + TEST_UNSUPPORTED(__inst_arm(0xe0af1392) " @ umlal pc, r1, r2, r3") + TEST_UNSUPPORTED(__inst_arm(0xe0a1f392) " @ umlal r1, pc, r2, r3") + TEST_RRRR( "umlals r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4) + TEST_RRRR( "umlalles r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3) + TEST_RRR( "umlals r",14,VAL3,", r",7, VAL4,", r",5, VAL1,", r13") + TEST_UNSUPPORTED(__inst_arm(0xe0bf1392) " @ umlals pc, r1, r2, r3") + TEST_UNSUPPORTED(__inst_arm(0xe0b1f392) " @ umlals r1, pc, r2, r3") + + TEST_RR( "smull r0, r1, r",2, VAL1,", r",3, VAL2,"") + TEST_RR( "smullls r7, r8, r",9, VAL2,", r",10, VAL1,"") + TEST_R( "smull lr, r12, r",11,VAL3,", r13") + TEST_UNSUPPORTED(__inst_arm(0xe0c1f392) " @ smull pc, r1, r2, r3") + TEST_UNSUPPORTED(__inst_arm(0xe0cf1392) " @ smull r1, pc, r2, r3") + TEST_RR( "smulls r0, r1, r",2, VAL1,", r",3, VAL2,"") + TEST_RR( "smulllss r7, r8, r",9, VAL2,", r",10, VAL1,"") + TEST_R( "smulls lr, r12, r",11,VAL3,", r13") + TEST_UNSUPPORTED(__inst_arm(0xe0d1f392) " @ smulls pc, r1, r2, r3") + TEST_UNSUPPORTED(__inst_arm(0xe0df1392) " @ smulls r1, pc, r2, r3") + + TEST_RRRR( "smlal r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4) + TEST_RRRR( "smlalle r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3) + TEST_RRR( "smlal r",14,VAL3,", r",7, VAL4,", r",5, VAL1,", r13") + TEST_UNSUPPORTED(__inst_arm(0xe0ef1392) " @ smlal pc, r1, r2, r3") + TEST_UNSUPPORTED(__inst_arm(0xe0e1f392) " @ smlal r1, pc, r2, r3") + TEST_RRRR( "smlals r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4) + TEST_RRRR( "smlalles r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3) + TEST_RRR( "smlals r",14,VAL3,", r",7, VAL4,", r",5, VAL1,", r13") + TEST_UNSUPPORTED(__inst_arm(0xe0ff1392) " @ smlals pc, r1, r2, r3") + TEST_UNSUPPORTED(__inst_arm(0xe0f0f392) " @ smlals r0, pc, r2, r3") + TEST_UNSUPPORTED(__inst_arm(0xe0f0139f) " @ smlals r0, r1, pc, r3") + TEST_UNSUPPORTED(__inst_arm(0xe0f01f92) " @ smlals r0, r1, r2, pc") + + TEST_GROUP("Synchronization primitives") + +#if __LINUX_ARM_ARCH__ < 6 + TEST_RP("swp lr, r",7,VAL2,", [r",8,0,"]") + TEST_R( "swpvs r0, r",1,VAL1,", [sp]") + TEST_RP("swp sp, r",14,VAL2,", [r",12,13*4,"]") +#else + TEST_UNSUPPORTED(__inst_arm(0xe108e097) " @ swp lr, r7, [r8]") + TEST_UNSUPPORTED(__inst_arm(0x610d0091) " @ swpvs r0, r1, [sp]") + TEST_UNSUPPORTED(__inst_arm(0xe10cd09e) " @ swp sp, r14 [r12]") +#endif + TEST_UNSUPPORTED(__inst_arm(0xe102f091) " @ swp pc, r1, [r2]") + TEST_UNSUPPORTED(__inst_arm(0xe102009f) " @ swp r0, pc, [r2]") + TEST_UNSUPPORTED(__inst_arm(0xe10f0091) " @ swp r0, r1, [pc]") +#if __LINUX_ARM_ARCH__ < 6 + TEST_RP("swpb lr, r",7,VAL2,", [r",8,0,"]") + TEST_R( "swpvsb r0, r",1,VAL1,", [sp]") +#else + TEST_UNSUPPORTED(__inst_arm(0xe148e097) " @ swpb lr, r7, [r8]") + TEST_UNSUPPORTED(__inst_arm(0x614d0091) " @ swpvsb r0, r1, [sp]") +#endif + TEST_UNSUPPORTED(__inst_arm(0xe142f091) " @ swpb pc, r1, [r2]") + + TEST_UNSUPPORTED(__inst_arm(0xe1100090)) /* Unallocated space */ + TEST_UNSUPPORTED(__inst_arm(0xe1200090)) /* Unallocated space */ + TEST_UNSUPPORTED(__inst_arm(0xe1300090)) /* Unallocated space */ + TEST_UNSUPPORTED(__inst_arm(0xe1500090)) /* Unallocated space */ + TEST_UNSUPPORTED(__inst_arm(0xe1600090)) /* Unallocated space */ + TEST_UNSUPPORTED(__inst_arm(0xe1700090)) /* Unallocated space */ +#if __LINUX_ARM_ARCH__ >= 6 + TEST_UNSUPPORTED("ldrex r2, [sp]") +#endif +#if (__LINUX_ARM_ARCH__ >= 7) || defined(CONFIG_CPU_32v6K) + TEST_UNSUPPORTED("strexd r0, r2, r3, [sp]") + TEST_UNSUPPORTED("ldrexd r2, r3, [sp]") + TEST_UNSUPPORTED("strexb r0, r2, [sp]") + TEST_UNSUPPORTED("ldrexb r2, [sp]") + TEST_UNSUPPORTED("strexh r0, r2, [sp]") + TEST_UNSUPPORTED("ldrexh r2, [sp]") +#endif + TEST_GROUP("Extra load/store instructions") + + TEST_RPR( "strh r",0, VAL1,", [r",1, 48,", -r",2, 24,"]") + TEST_RPR( "streqh r",14,VAL2,", [r",13,0, ", r",12, 48,"]") + TEST_RPR( "strh r",1, VAL1,", [r",2, 24,", r",3, 48,"]!") + TEST_RPR( "strneh r",12,VAL2,", [r",11,48,", -r",10,24,"]!") + TEST_RPR( "strh r",2, VAL1,", [r",3, 24,"], r",4, 48,"") + TEST_RPR( "strh r",10,VAL2,", [r",9, 48,"], -r",11,24,"") + TEST_UNSUPPORTED(__inst_arm(0xe1afc0ba) " @ strh r12, [pc, r10]!") + TEST_UNSUPPORTED(__inst_arm(0xe089f0bb) " @ strh pc, [r9], r11") + TEST_UNSUPPORTED(__inst_arm(0xe089a0bf) " @ strh r10, [r9], pc") + + TEST_PR( "ldrh r0, [r",0, 48,", -r",2, 24,"]") + TEST_PR( "ldrcsh r14, [r",13,0, ", r",12, 48,"]") + TEST_PR( "ldrh r1, [r",2, 24,", r",3, 48,"]!") + TEST_PR( "ldrcch r12, [r",11,48,", -r",10,24,"]!") + TEST_PR( "ldrh r2, [r",3, 24,"], r",4, 48,"") + TEST_PR( "ldrh r10, [r",9, 48,"], -r",11,24,"") + TEST_UNSUPPORTED(__inst_arm(0xe1bfc0ba) " @ ldrh r12, [pc, r10]!") + TEST_UNSUPPORTED(__inst_arm(0xe099f0bb) " @ ldrh pc, [r9], r11") + TEST_UNSUPPORTED(__inst_arm(0xe099a0bf) " @ ldrh r10, [r9], pc") + + TEST_RP( "strh r",0, VAL1,", [r",1, 24,", #-2]") + TEST_RP( "strmih r",14,VAL2,", [r",13,0, ", #2]") + TEST_RP( "strh r",1, VAL1,", [r",2, 24,", #4]!") + TEST_RP( "strplh r",12,VAL2,", [r",11,24,", #-4]!") + TEST_RP( "strh r",2, VAL1,", [r",3, 24,"], #48") + TEST_RP( "strh r",10,VAL2,", [r",9, 64,"], #-48") + TEST_UNSUPPORTED(__inst_arm(0xe1efc3b0) " @ strh r12, [pc, #48]!") + TEST_UNSUPPORTED(__inst_arm(0xe0c9f3b0) " @ strh pc, [r9], #48") + + TEST_P( "ldrh r0, [r",0, 24,", #-2]") + TEST_P( "ldrvsh r14, [r",13,0, ", #2]") + TEST_P( "ldrh r1, [r",2, 24,", #4]!") + TEST_P( "ldrvch r12, [r",11,24,", #-4]!") + TEST_P( "ldrh r2, [r",3, 24,"], #48") + TEST_P( "ldrh r10, [r",9, 64,"], #-48") + TEST( "ldrh r0, [pc, #0]") + TEST_UNSUPPORTED(__inst_arm(0xe1ffc3b0) " @ ldrh r12, [pc, #48]!") + TEST_UNSUPPORTED(__inst_arm(0xe0d9f3b0) " @ ldrh pc, [r9], #48") + + TEST_PR( "ldrsb r0, [r",0, 48,", -r",2, 24,"]") + TEST_PR( "ldrhisb r14, [r",13,0,", r",12, 48,"]") + TEST_PR( "ldrsb r1, [r",2, 24,", r",3, 48,"]!") + TEST_PR( "ldrlssb r12, [r",11,48,", -r",10,24,"]!") + TEST_PR( "ldrsb r2, [r",3, 24,"], r",4, 48,"") + TEST_PR( "ldrsb r10, [r",9, 48,"], -r",11,24,"") + TEST_UNSUPPORTED(__inst_arm(0xe1bfc0da) " @ ldrsb r12, [pc, r10]!") + TEST_UNSUPPORTED(__inst_arm(0xe099f0db) " @ ldrsb pc, [r9], r11") + + TEST_P( "ldrsb r0, [r",0, 24,", #-1]") + TEST_P( "ldrgesb r14, [r",13,0, ", #1]") + TEST_P( "ldrsb r1, [r",2, 24,", #4]!") + TEST_P( "ldrltsb r12, [r",11,24,", #-4]!") + TEST_P( "ldrsb r2, [r",3, 24,"], #48") + TEST_P( "ldrsb r10, [r",9, 64,"], #-48") + TEST( "ldrsb r0, [pc, #0]") + TEST_UNSUPPORTED(__inst_arm(0xe1ffc3d0) " @ ldrsb r12, [pc, #48]!") + TEST_UNSUPPORTED(__inst_arm(0xe0d9f3d0) " @ ldrsb pc, [r9], #48") + + TEST_PR( "ldrsh r0, [r",0, 48,", -r",2, 24,"]") + TEST_PR( "ldrgtsh r14, [r",13,0, ", r",12, 48,"]") + TEST_PR( "ldrsh r1, [r",2, 24,", r",3, 48,"]!") + TEST_PR( "ldrlesh r12, [r",11,48,", -r",10,24,"]!") + TEST_PR( "ldrsh r2, [r",3, 24,"], r",4, 48,"") + TEST_PR( "ldrsh r10, [r",9, 48,"], -r",11,24,"") + TEST_UNSUPPORTED(__inst_arm(0xe1bfc0fa) " @ ldrsh r12, [pc, r10]!") + TEST_UNSUPPORTED(__inst_arm(0xe099f0fb) " @ ldrsh pc, [r9], r11") + + TEST_P( "ldrsh r0, [r",0, 24,", #-1]") + TEST_P( "ldreqsh r14, [r",13,0 ,", #1]") + TEST_P( "ldrsh r1, [r",2, 24,", #4]!") + TEST_P( "ldrnesh r12, [r",11,24,", #-4]!") + TEST_P( "ldrsh r2, [r",3, 24,"], #48") + TEST_P( "ldrsh r10, [r",9, 64,"], #-48") + TEST( "ldrsh r0, [pc, #0]") + TEST_UNSUPPORTED(__inst_arm(0xe1ffc3f0) " @ ldrsh r12, [pc, #48]!") + TEST_UNSUPPORTED(__inst_arm(0xe0d9f3f0) " @ ldrsh pc, [r9], #48") + +#if __LINUX_ARM_ARCH__ >= 7 + TEST_UNSUPPORTED("strht r1, [r2], r3") + TEST_UNSUPPORTED("ldrht r1, [r2], r3") + TEST_UNSUPPORTED("strht r1, [r2], #48") + TEST_UNSUPPORTED("ldrht r1, [r2], #48") + TEST_UNSUPPORTED("ldrsbt r1, [r2], r3") + TEST_UNSUPPORTED("ldrsbt r1, [r2], #48") + TEST_UNSUPPORTED("ldrsht r1, [r2], r3") + TEST_UNSUPPORTED("ldrsht r1, [r2], #48") +#endif + +#if __LINUX_ARM_ARCH__ >= 5 + TEST_RPR( "strd r",0, VAL1,", [r",1, 48,", -r",2,24,"]") + TEST_RPR( "strccd r",8, VAL2,", [r",13,0, ", r",12,48,"]") + TEST_RPR( "strd r",4, VAL1,", [r",2, 24,", r",3, 48,"]!") + TEST_RPR( "strcsd r",12,VAL2,", [r",11,48,", -r",10,24,"]!") + TEST_RPR( "strd r",2, VAL1,", [r",5, 24,"], r",4,48,"") + TEST_RPR( "strd r",10,VAL2,", [r",9, 48,"], -r",7,24,"") + TEST_UNSUPPORTED(__inst_arm(0xe1afc0fa) " @ strd r12, [pc, r10]!") + + TEST_PR( "ldrd r0, [r",0, 48,", -r",2,24,"]") + TEST_PR( "ldrmid r8, [r",13,0, ", r",12,48,"]") + TEST_PR( "ldrd r4, [r",2, 24,", r",3, 48,"]!") + TEST_PR( "ldrpld r6, [r",11,48,", -r",10,24,"]!") + TEST_PR( "ldrd r2, [r",5, 24,"], r",4,48,"") + TEST_PR( "ldrd r10, [r",9,48,"], -r",7,24,"") + TEST_UNSUPPORTED(__inst_arm(0xe1afc0da) " @ ldrd r12, [pc, r10]!") + TEST_UNSUPPORTED(__inst_arm(0xe089f0db) " @ ldrd pc, [r9], r11") + TEST_UNSUPPORTED(__inst_arm(0xe089e0db) " @ ldrd lr, [r9], r11") + TEST_UNSUPPORTED(__inst_arm(0xe089c0df) " @ ldrd r12, [r9], pc") + + TEST_RP( "strd r",0, VAL1,", [r",1, 24,", #-8]") + TEST_RP( "strvsd r",8, VAL2,", [r",13,0, ", #8]") + TEST_RP( "strd r",4, VAL1,", [r",2, 24,", #16]!") + TEST_RP( "strvcd r",12,VAL2,", [r",11,24,", #-16]!") + TEST_RP( "strd r",2, VAL1,", [r",4, 24,"], #48") + TEST_RP( "strd r",10,VAL2,", [r",9, 64,"], #-48") + TEST_UNSUPPORTED(__inst_arm(0xe1efc3f0) " @ strd r12, [pc, #48]!") + + TEST_P( "ldrd r0, [r",0, 24,", #-8]") + TEST_P( "ldrhid r8, [r",13,0, ", #8]") + TEST_P( "ldrd r4, [r",2, 24,", #16]!") + TEST_P( "ldrlsd r6, [r",11,24,", #-16]!") + TEST_P( "ldrd r2, [r",5, 24,"], #48") + TEST_P( "ldrd r10, [r",9,6,"], #-48") + TEST_UNSUPPORTED(__inst_arm(0xe1efc3d0) " @ ldrd r12, [pc, #48]!") + TEST_UNSUPPORTED(__inst_arm(0xe0c9f3d0) " @ ldrd pc, [r9], #48") + TEST_UNSUPPORTED(__inst_arm(0xe0c9e3d0) " @ ldrd lr, [r9], #48") +#endif + + TEST_GROUP("Miscellaneous") + +#if __LINUX_ARM_ARCH__ >= 7 + TEST("movw r0, #0") + TEST("movw r0, #0xffff") + TEST("movw lr, #0xffff") + TEST_UNSUPPORTED(__inst_arm(0xe300f000) " @ movw pc, #0") + TEST_R("movt r",0, VAL1,", #0") + TEST_R("movt r",0, VAL2,", #0xffff") + TEST_R("movt r",14,VAL1,", #0xffff") + TEST_UNSUPPORTED(__inst_arm(0xe340f000) " @ movt pc, #0") +#endif + + TEST_UNSUPPORTED("msr cpsr, 0x13") + TEST_UNSUPPORTED("msr cpsr_f, 0xf0000000") + TEST_UNSUPPORTED("msr spsr, 0x13") + +#if __LINUX_ARM_ARCH__ >= 7 + TEST_SUPPORTED("yield") + TEST("sev") + TEST("nop") + TEST("wfi") + TEST_SUPPORTED("wfe") + TEST_UNSUPPORTED("dbg #0") +#endif + + TEST_GROUP("Load/store word and unsigned byte") + +#define LOAD_STORE(byte) \ + TEST_RP( "str"byte" r",0, VAL1,", [r",1, 24,", #-2]") \ + TEST_RP( "str"byte" r",14,VAL2,", [r",13,0, ", #2]") \ + TEST_RP( "str"byte" r",1, VAL1,", [r",2, 24,", #4]!") \ + TEST_RP( "str"byte" r",12,VAL2,", [r",11,24,", #-4]!") \ + TEST_RP( "str"byte" r",2, VAL1,", [r",3, 24,"], #48") \ + TEST_RP( "str"byte" r",10,VAL2,", [r",9, 64,"], #-48") \ + TEST_RPR("str"byte" r",0, VAL1,", [r",1, 48,", -r",2, 24,"]") \ + TEST_RPR("str"byte" r",14,VAL2,", [r",13,0, ", r",12, 48,"]") \ + TEST_RPR("str"byte" r",1, VAL1,", [r",2, 24,", r",3, 48,"]!") \ + TEST_RPR("str"byte" r",12,VAL2,", [r",11,48,", -r",10,24,"]!") \ + TEST_RPR("str"byte" r",2, VAL1,", [r",3, 24,"], r",4, 48,"") \ + TEST_RPR("str"byte" r",10,VAL2,", [r",9, 48,"], -r",11,24,"") \ + TEST_RPR("str"byte" r",0, VAL1,", [r",1, 24,", r",2, 32,", asl #1]")\ + TEST_RPR("str"byte" r",14,VAL2,", [r",13,0, ", r",12, 32,", lsr #2]")\ + TEST_RPR("str"byte" r",1, VAL1,", [r",2, 24,", r",3, 32,", asr #3]!")\ + TEST_RPR("str"byte" r",12,VAL2,", [r",11,24,", r",10, 4,", ror #31]!")\ + TEST_P( "ldr"byte" r0, [r",0, 24,", #-2]") \ + TEST_P( "ldr"byte" r14, [r",13,0, ", #2]") \ + TEST_P( "ldr"byte" r1, [r",2, 24,", #4]!") \ + TEST_P( "ldr"byte" r12, [r",11,24,", #-4]!") \ + TEST_P( "ldr"byte" r2, [r",3, 24,"], #48") \ + TEST_P( "ldr"byte" r10, [r",9, 64,"], #-48") \ + TEST_PR( "ldr"byte" r0, [r",0, 48,", -r",2, 24,"]") \ + TEST_PR( "ldr"byte" r14, [r",13,0, ", r",12, 48,"]") \ + TEST_PR( "ldr"byte" r1, [r",2, 24,", r",3, 48,"]!") \ + TEST_PR( "ldr"byte" r12, [r",11,48,", -r",10,24,"]!") \ + TEST_PR( "ldr"byte" r2, [r",3, 24,"], r",4, 48,"") \ + TEST_PR( "ldr"byte" r10, [r",9, 48,"], -r",11,24,"") \ + TEST_PR( "ldr"byte" r0, [r",0, 24,", r",2, 32,", asl #1]") \ + TEST_PR( "ldr"byte" r14, [r",13,0, ", r",12, 32,", lsr #2]") \ + TEST_PR( "ldr"byte" r1, [r",2, 24,", r",3, 32,", asr #3]!") \ + TEST_PR( "ldr"byte" r12, [r",11,24,", r",10, 4,", ror #31]!") \ + TEST( "ldr"byte" r0, [pc, #0]") \ + TEST_R( "ldr"byte" r12, [pc, r",14,0,"]") + + LOAD_STORE("") + TEST_P( "str pc, [r",0,0,", #15*4]") + TEST_R( "str pc, [sp, r",2,15*4,"]") + TEST_BF( "ldr pc, [sp, #15*4]") + TEST_BF_R("ldr pc, [sp, r",2,15*4,"]") + + TEST_P( "str sp, [r",0,0,", #13*4]") + TEST_R( "str sp, [sp, r",2,13*4,"]") + TEST_BF( "ldr sp, [sp, #13*4]") + TEST_BF_R("ldr sp, [sp, r",2,13*4,"]") + +#ifdef CONFIG_THUMB2_KERNEL + TEST_ARM_TO_THUMB_INTERWORK_P("ldr pc, [r",0,0,", #15*4]") +#endif + TEST_UNSUPPORTED(__inst_arm(0xe5af6008) " @ str r6, [pc, #8]!") + TEST_UNSUPPORTED(__inst_arm(0xe7af6008) " @ str r6, [pc, r8]!") + TEST_UNSUPPORTED(__inst_arm(0xe5bf6008) " @ ldr r6, [pc, #8]!") + TEST_UNSUPPORTED(__inst_arm(0xe7bf6008) " @ ldr r6, [pc, r8]!") + TEST_UNSUPPORTED(__inst_arm(0xe788600f) " @ str r6, [r8, pc]") + TEST_UNSUPPORTED(__inst_arm(0xe798600f) " @ ldr r6, [r8, pc]") + + LOAD_STORE("b") + TEST_UNSUPPORTED(__inst_arm(0xe5f7f008) " @ ldrb pc, [r7, #8]!") + TEST_UNSUPPORTED(__inst_arm(0xe7f7f008) " @ ldrb pc, [r7, r8]!") + TEST_UNSUPPORTED(__inst_arm(0xe5ef6008) " @ strb r6, [pc, #8]!") + TEST_UNSUPPORTED(__inst_arm(0xe7ef6008) " @ strb r6, [pc, r3]!") + TEST_UNSUPPORTED(__inst_arm(0xe5ff6008) " @ ldrb r6, [pc, #8]!") + TEST_UNSUPPORTED(__inst_arm(0xe7ff6008) " @ ldrb r6, [pc, r3]!") + + TEST_UNSUPPORTED("ldrt r0, [r1], #4") + TEST_UNSUPPORTED("ldrt r1, [r2], r3") + TEST_UNSUPPORTED("strt r2, [r3], #4") + TEST_UNSUPPORTED("strt r3, [r4], r5") + TEST_UNSUPPORTED("ldrbt r4, [r5], #4") + TEST_UNSUPPORTED("ldrbt r5, [r6], r7") + TEST_UNSUPPORTED("strbt r6, [r7], #4") + TEST_UNSUPPORTED("strbt r7, [r8], r9") + +#if __LINUX_ARM_ARCH__ >= 7 + TEST_GROUP("Parallel addition and subtraction, signed") + + TEST_UNSUPPORTED(__inst_arm(0xe6000010) "") /* Unallocated space */ + TEST_UNSUPPORTED(__inst_arm(0xe60fffff) "") /* Unallocated space */ + + TEST_RR( "sadd16 r0, r",0, HH1,", r",1, HH2,"") + TEST_RR( "sadd16 r14, r",12,HH2,", r",10,HH1,"") + TEST_UNSUPPORTED(__inst_arm(0xe61cff1a) " @ sadd16 pc, r12, r10") + TEST_RR( "sasx r0, r",0, HH1,", r",1, HH2,"") + TEST_RR( "sasx r14, r",12,HH2,", r",10,HH1,"") + TEST_UNSUPPORTED(__inst_arm(0xe61cff3a) " @ sasx pc, r12, r10") + TEST_RR( "ssax r0, r",0, HH1,", r",1, HH2,"") + TEST_RR( "ssax r14, r",12,HH2,", r",10,HH1,"") + TEST_UNSUPPORTED(__inst_arm(0xe61cff5a) " @ ssax pc, r12, r10") + TEST_RR( "ssub16 r0, r",0, HH1,", r",1, HH2,"") + TEST_RR( "ssub16 r14, r",12,HH2,", r",10,HH1,"") + TEST_UNSUPPORTED(__inst_arm(0xe61cff7a) " @ ssub16 pc, r12, r10") + TEST_RR( "sadd8 r0, r",0, HH1,", r",1, HH2,"") + TEST_RR( "sadd8 r14, r",12,HH2,", r",10,HH1,"") + TEST_UNSUPPORTED(__inst_arm(0xe61cff9a) " @ sadd8 pc, r12, r10") + TEST_UNSUPPORTED(__inst_arm(0xe61000b0) "") /* Unallocated space */ + TEST_UNSUPPORTED(__inst_arm(0xe61fffbf) "") /* Unallocated space */ + TEST_UNSUPPORTED(__inst_arm(0xe61000d0) "") /* Unallocated space */ + TEST_UNSUPPORTED(__inst_arm(0xe61fffdf) "") /* Unallocated space */ + TEST_RR( "ssub8 r0, r",0, HH1,", r",1, HH2,"") + TEST_RR( "ssub8 r14, r",12,HH2,", r",10,HH1,"") + TEST_UNSUPPORTED(__inst_arm(0xe61cfffa) " @ ssub8 pc, r12, r10") + + TEST_RR( "qadd16 r0, r",0, HH1,", r",1, HH2,"") + TEST_RR( "qadd16 r14, r",12,HH2,", r",10,HH1,"") + TEST_UNSUPPORTED(__inst_arm(0xe62cff1a) " @ qadd16 pc, r12, r10") + TEST_RR( "qasx r0, r",0, HH1,", r",1, HH2,"") + TEST_RR( "qasx r14, r",12,HH2,", r",10,HH1,"") + TEST_UNSUPPORTED(__inst_arm(0xe62cff3a) " @ qasx pc, r12, r10") + TEST_RR( "qsax r0, r",0, HH1,", r",1, HH2,"") + TEST_RR( "qsax r14, r",12,HH2,", r",10,HH1,"") + TEST_UNSUPPORTED(__inst_arm(0xe62cff5a) " @ qsax pc, r12, r10") + TEST_RR( "qsub16 r0, r",0, HH1,", r",1, HH2,"") + TEST_RR( "qsub16 r14, r",12,HH2,", r",10,HH1,"") + TEST_UNSUPPORTED(__inst_arm(0xe62cff7a) " @ qsub16 pc, r12, r10") + TEST_RR( "qadd8 r0, r",0, HH1,", r",1, HH2,"") + TEST_RR( "qadd8 r14, r",12,HH2,", r",10,HH1,"") + TEST_UNSUPPORTED(__inst_arm(0xe62cff9a) " @ qadd8 pc, r12, r10") + TEST_UNSUPPORTED(__inst_arm(0xe62000b0) "") /* Unallocated space */ + TEST_UNSUPPORTED(__inst_arm(0xe62fffbf) "") /* Unallocated space */ + TEST_UNSUPPORTED(__inst_arm(0xe62000d0) "") /* Unallocated space */ + TEST_UNSUPPORTED(__inst_arm(0xe62fffdf) "") /* Unallocated space */ + TEST_RR( "qsub8 r0, r",0, HH1,", r",1, HH2,"") + TEST_RR( "qsub8 r14, r",12,HH2,", r",10,HH1,"") + TEST_UNSUPPORTED(__inst_arm(0xe62cfffa) " @ qsub8 pc, r12, r10") + + TEST_RR( "shadd16 r0, r",0, HH1,", r",1, HH2,"") + TEST_RR( "shadd16 r14, r",12,HH2,", r",10,HH1,"") + TEST_UNSUPPORTED(__inst_arm(0xe63cff1a) " @ shadd16 pc, r12, r10") + TEST_RR( "shasx r0, r",0, HH1,", r",1, HH2,"") + TEST_RR( "shasx r14, r",12,HH2,", r",10,HH1,"") + TEST_UNSUPPORTED(__inst_arm(0xe63cff3a) " @ shasx pc, r12, r10") + TEST_RR( "shsax r0, r",0, HH1,", r",1, HH2,"") + TEST_RR( "shsax r14, r",12,HH2,", r",10,HH1,"") + TEST_UNSUPPORTED(__inst_arm(0xe63cff5a) " @ shsax pc, r12, r10") + TEST_RR( "shsub16 r0, r",0, HH1,", r",1, HH2,"") + TEST_RR( "shsub16 r14, r",12,HH2,", r",10,HH1,"") + TEST_UNSUPPORTED(__inst_arm(0xe63cff7a) " @ shsub16 pc, r12, r10") + TEST_RR( "shadd8 r0, r",0, HH1,", r",1, HH2,"") + TEST_RR( "shadd8 r14, r",12,HH2,", r",10,HH1,"") + TEST_UNSUPPORTED(__inst_arm(0xe63cff9a) " @ shadd8 pc, r12, r10") + TEST_UNSUPPORTED(__inst_arm(0xe63000b0) "") /* Unallocated space */ + TEST_UNSUPPORTED(__inst_arm(0xe63fffbf) "") /* Unallocated space */ + TEST_UNSUPPORTED(__inst_arm(0xe63000d0) "") /* Unallocated space */ + TEST_UNSUPPORTED(__inst_arm(0xe63fffdf) "") /* Unallocated space */ + TEST_RR( "shsub8 r0, r",0, HH1,", r",1, HH2,"") + TEST_RR( "shsub8 r14, r",12,HH2,", r",10,HH1,"") + TEST_UNSUPPORTED(__inst_arm(0xe63cfffa) " @ shsub8 pc, r12, r10") + + TEST_GROUP("Parallel addition and subtraction, unsigned") + + TEST_UNSUPPORTED(__inst_arm(0xe6400010) "") /* Unallocated space */ + TEST_UNSUPPORTED(__inst_arm(0xe64fffff) "") /* Unallocated space */ + + TEST_RR( "uadd16 r0, r",0, HH1,", r",1, HH2,"") + TEST_RR( "uadd16 r14, r",12,HH2,", r",10,HH1,"") + TEST_UNSUPPORTED(__inst_arm(0xe65cff1a) " @ uadd16 pc, r12, r10") + TEST_RR( "uasx r0, r",0, HH1,", r",1, HH2,"") + TEST_RR( "uasx r14, r",12,HH2,", r",10,HH1,"") + TEST_UNSUPPORTED(__inst_arm(0xe65cff3a) " @ uasx pc, r12, r10") + TEST_RR( "usax r0, r",0, HH1,", r",1, HH2,"") + TEST_RR( "usax r14, r",12,HH2,", r",10,HH1,"") + TEST_UNSUPPORTED(__inst_arm(0xe65cff5a) " @ usax pc, r12, r10") + TEST_RR( "usub16 r0, r",0, HH1,", r",1, HH2,"") + TEST_RR( "usub16 r14, r",12,HH2,", r",10,HH1,"") + TEST_UNSUPPORTED(__inst_arm(0xe65cff7a) " @ usub16 pc, r12, r10") + TEST_RR( "uadd8 r0, r",0, HH1,", r",1, HH2,"") + TEST_RR( "uadd8 r14, r",12,HH2,", r",10,HH1,"") + TEST_UNSUPPORTED(__inst_arm(0xe65cff9a) " @ uadd8 pc, r12, r10") + TEST_UNSUPPORTED(__inst_arm(0xe65000b0) "") /* Unallocated space */ + TEST_UNSUPPORTED(__inst_arm(0xe65fffbf) "") /* Unallocated space */ + TEST_UNSUPPORTED(__inst_arm(0xe65000d0) "") /* Unallocated space */ + TEST_UNSUPPORTED(__inst_arm(0xe65fffdf) "") /* Unallocated space */ + TEST_RR( "usub8 r0, r",0, HH1,", r",1, HH2,"") + TEST_RR( "usub8 r14, r",12,HH2,", r",10,HH1,"") + TEST_UNSUPPORTED(__inst_arm(0xe65cfffa) " @ usub8 pc, r12, r10") + + TEST_RR( "uqadd16 r0, r",0, HH1,", r",1, HH2,"") + TEST_RR( "uqadd16 r14, r",12,HH2,", r",10,HH1,"") + TEST_UNSUPPORTED(__inst_arm(0xe66cff1a) " @ uqadd16 pc, r12, r10") + TEST_RR( "uqasx r0, r",0, HH1,", r",1, HH2,"") + TEST_RR( "uqasx r14, r",12,HH2,", r",10,HH1,"") + TEST_UNSUPPORTED(__inst_arm(0xe66cff3a) " @ uqasx pc, r12, r10") + TEST_RR( "uqsax r0, r",0, HH1,", r",1, HH2,"") + TEST_RR( "uqsax r14, r",12,HH2,", r",10,HH1,"") + TEST_UNSUPPORTED(__inst_arm(0xe66cff5a) " @ uqsax pc, r12, r10") + TEST_RR( "uqsub16 r0, r",0, HH1,", r",1, HH2,"") + TEST_RR( "uqsub16 r14, r",12,HH2,", r",10,HH1,"") + TEST_UNSUPPORTED(__inst_arm(0xe66cff7a) " @ uqsub16 pc, r12, r10") + TEST_RR( "uqadd8 r0, r",0, HH1,", r",1, HH2,"") + TEST_RR( "uqadd8 r14, r",12,HH2,", r",10,HH1,"") + TEST_UNSUPPORTED(__inst_arm(0xe66cff9a) " @ uqadd8 pc, r12, r10") + TEST_UNSUPPORTED(__inst_arm(0xe66000b0) "") /* Unallocated space */ + TEST_UNSUPPORTED(__inst_arm(0xe66fffbf) "") /* Unallocated space */ + TEST_UNSUPPORTED(__inst_arm(0xe66000d0) "") /* Unallocated space */ + TEST_UNSUPPORTED(__inst_arm(0xe66fffdf) "") /* Unallocated space */ + TEST_RR( "uqsub8 r0, r",0, HH1,", r",1, HH2,"") + TEST_RR( "uqsub8 r14, r",12,HH2,", r",10,HH1,"") + TEST_UNSUPPORTED(__inst_arm(0xe66cfffa) " @ uqsub8 pc, r12, r10") + + TEST_RR( "uhadd16 r0, r",0, HH1,", r",1, HH2,"") + TEST_RR( "uhadd16 r14, r",12,HH2,", r",10,HH1,"") + TEST_UNSUPPORTED(__inst_arm(0xe67cff1a) " @ uhadd16 pc, r12, r10") + TEST_RR( "uhasx r0, r",0, HH1,", r",1, HH2,"") + TEST_RR( "uhasx r14, r",12,HH2,", r",10,HH1,"") + TEST_UNSUPPORTED(__inst_arm(0xe67cff3a) " @ uhasx pc, r12, r10") + TEST_RR( "uhsax r0, r",0, HH1,", r",1, HH2,"") + TEST_RR( "uhsax r14, r",12,HH2,", r",10,HH1,"") + TEST_UNSUPPORTED(__inst_arm(0xe67cff5a) " @ uhsax pc, r12, r10") + TEST_RR( "uhsub16 r0, r",0, HH1,", r",1, HH2,"") + TEST_RR( "uhsub16 r14, r",12,HH2,", r",10,HH1,"") + TEST_UNSUPPORTED(__inst_arm(0xe67cff7a) " @ uhsub16 pc, r12, r10") + TEST_RR( "uhadd8 r0, r",0, HH1,", r",1, HH2,"") + TEST_RR( "uhadd8 r14, r",12,HH2,", r",10,HH1,"") + TEST_UNSUPPORTED(__inst_arm(0xe67cff9a) " @ uhadd8 pc, r12, r10") + TEST_UNSUPPORTED(__inst_arm(0xe67000b0) "") /* Unallocated space */ + TEST_UNSUPPORTED(__inst_arm(0xe67fffbf) "") /* Unallocated space */ + TEST_UNSUPPORTED(__inst_arm(0xe67000d0) "") /* Unallocated space */ + TEST_UNSUPPORTED(__inst_arm(0xe67fffdf) "") /* Unallocated space */ + TEST_RR( "uhsub8 r0, r",0, HH1,", r",1, HH2,"") + TEST_RR( "uhsub8 r14, r",12,HH2,", r",10,HH1,"") + TEST_UNSUPPORTED(__inst_arm(0xe67cfffa) " @ uhsub8 pc, r12, r10") + TEST_UNSUPPORTED(__inst_arm(0xe67feffa) " @ uhsub8 r14, pc, r10") + TEST_UNSUPPORTED(__inst_arm(0xe67cefff) " @ uhsub8 r14, r12, pc") +#endif /* __LINUX_ARM_ARCH__ >= 7 */ + +#if __LINUX_ARM_ARCH__ >= 6 + TEST_GROUP("Packing, unpacking, saturation, and reversal") + + TEST_RR( "pkhbt r0, r",0, HH1,", r",1, HH2,"") + TEST_RR( "pkhbt r14,r",12, HH1,", r",10,HH2,", lsl #2") + TEST_UNSUPPORTED(__inst_arm(0xe68cf11a) " @ pkhbt pc, r12, r10, lsl #2") + TEST_RR( "pkhtb r0, r",0, HH1,", r",1, HH2,"") + TEST_RR( "pkhtb r14,r",12, HH1,", r",10,HH2,", asr #2") + TEST_UNSUPPORTED(__inst_arm(0xe68cf15a) " @ pkhtb pc, r12, r10, asr #2") + TEST_UNSUPPORTED(__inst_arm(0xe68fe15a) " @ pkhtb r14, pc, r10, asr #2") + TEST_UNSUPPORTED(__inst_arm(0xe68ce15f) " @ pkhtb r14, r12, pc, asr #2") + TEST_UNSUPPORTED(__inst_arm(0xe6900010) "") /* Unallocated space */ + TEST_UNSUPPORTED(__inst_arm(0xe69fffdf) "") /* Unallocated space */ + + TEST_R( "ssat r0, #24, r",0, VAL1,"") + TEST_R( "ssat r14, #24, r",12, VAL2,"") + TEST_R( "ssat r0, #24, r",0, VAL1,", lsl #8") + TEST_R( "ssat r14, #24, r",12, VAL2,", asr #8") + TEST_UNSUPPORTED(__inst_arm(0xe6b7f01c) " @ ssat pc, #24, r12") + + TEST_R( "usat r0, #24, r",0, VAL1,"") + TEST_R( "usat r14, #24, r",12, VAL2,"") + TEST_R( "usat r0, #24, r",0, VAL1,", lsl #8") + TEST_R( "usat r14, #24, r",12, VAL2,", asr #8") + TEST_UNSUPPORTED(__inst_arm(0xe6f7f01c) " @ usat pc, #24, r12") + + TEST_RR( "sxtab16 r0, r",0, HH1,", r",1, HH2,"") + TEST_RR( "sxtab16 r14,r",12, HH2,", r",10,HH1,", ror #8") + TEST_R( "sxtb16 r8, r",7, HH1,"") + TEST_UNSUPPORTED(__inst_arm(0xe68cf47a) " @ sxtab16 pc,r12, r10, ror #8") + + TEST_RR( "sel r0, r",0, VAL1,", r",1, VAL2,"") + TEST_RR( "sel r14, r",12,VAL1,", r",10, VAL2,"") + TEST_UNSUPPORTED(__inst_arm(0xe68cffba) " @ sel pc, r12, r10") + TEST_UNSUPPORTED(__inst_arm(0xe68fefba) " @ sel r14, pc, r10") + TEST_UNSUPPORTED(__inst_arm(0xe68cefbf) " @ sel r14, r12, pc") + + TEST_R( "ssat16 r0, #12, r",0, HH1,"") + TEST_R( "ssat16 r14, #12, r",12, HH2,"") + TEST_UNSUPPORTED(__inst_arm(0xe6abff3c) " @ ssat16 pc, #12, r12") + + TEST_RR( "sxtab r0, r",0, HH1,", r",1, HH2,"") + TEST_RR( "sxtab r14,r",12, HH2,", r",10,HH1,", ror #8") + TEST_R( "sxtb r8, r",7, HH1,"") + TEST_UNSUPPORTED(__inst_arm(0xe6acf47a) " @ sxtab pc,r12, r10, ror #8") + + TEST_R( "rev r0, r",0, VAL1,"") + TEST_R( "rev r14, r",12, VAL2,"") + TEST_UNSUPPORTED(__inst_arm(0xe6bfff3c) " @ rev pc, r12") + + TEST_RR( "sxtah r0, r",0, HH1,", r",1, HH2,"") + TEST_RR( "sxtah r14,r",12, HH2,", r",10,HH1,", ror #8") + TEST_R( "sxth r8, r",7, HH1,"") + TEST_UNSUPPORTED(__inst_arm(0xe6bcf47a) " @ sxtah pc,r12, r10, ror #8") + + TEST_R( "rev16 r0, r",0, VAL1,"") + TEST_R( "rev16 r14, r",12, VAL2,"") + TEST_UNSUPPORTED(__inst_arm(0xe6bfffbc) " @ rev16 pc, r12") + + TEST_RR( "uxtab16 r0, r",0, HH1,", r",1, HH2,"") + TEST_RR( "uxtab16 r14,r",12, HH2,", r",10,HH1,", ror #8") + TEST_R( "uxtb16 r8, r",7, HH1,"") + TEST_UNSUPPORTED(__inst_arm(0xe6ccf47a) " @ uxtab16 pc,r12, r10, ror #8") + + TEST_R( "usat16 r0, #12, r",0, HH1,"") + TEST_R( "usat16 r14, #12, r",12, HH2,"") + TEST_UNSUPPORTED(__inst_arm(0xe6ecff3c) " @ usat16 pc, #12, r12") + TEST_UNSUPPORTED(__inst_arm(0xe6ecef3f) " @ usat16 r14, #12, pc") + + TEST_RR( "uxtab r0, r",0, HH1,", r",1, HH2,"") + TEST_RR( "uxtab r14,r",12, HH2,", r",10,HH1,", ror #8") + TEST_R( "uxtb r8, r",7, HH1,"") + TEST_UNSUPPORTED(__inst_arm(0xe6ecf47a) " @ uxtab pc,r12, r10, ror #8") + +#if __LINUX_ARM_ARCH__ >= 7 + TEST_R( "rbit r0, r",0, VAL1,"") + TEST_R( "rbit r14, r",12, VAL2,"") + TEST_UNSUPPORTED(__inst_arm(0xe6ffff3c) " @ rbit pc, r12") +#endif + + TEST_RR( "uxtah r0, r",0, HH1,", r",1, HH2,"") + TEST_RR( "uxtah r14,r",12, HH2,", r",10,HH1,", ror #8") + TEST_R( "uxth r8, r",7, HH1,"") + TEST_UNSUPPORTED(__inst_arm(0xe6fff077) " @ uxth pc, r7") + TEST_UNSUPPORTED(__inst_arm(0xe6ff807f) " @ uxth r8, pc") + TEST_UNSUPPORTED(__inst_arm(0xe6fcf47a) " @ uxtah pc, r12, r10, ror #8") + TEST_UNSUPPORTED(__inst_arm(0xe6fce47f) " @ uxtah r14, r12, pc, ror #8") + + TEST_R( "revsh r0, r",0, VAL1,"") + TEST_R( "revsh r14, r",12, VAL2,"") + TEST_UNSUPPORTED(__inst_arm(0xe6ffff3c) " @ revsh pc, r12") + TEST_UNSUPPORTED(__inst_arm(0xe6ffef3f) " @ revsh r14, pc") + + TEST_UNSUPPORTED(__inst_arm(0xe6900070) "") /* Unallocated space */ + TEST_UNSUPPORTED(__inst_arm(0xe69fff7f) "") /* Unallocated space */ + + TEST_UNSUPPORTED(__inst_arm(0xe6d00070) "") /* Unallocated space */ + TEST_UNSUPPORTED(__inst_arm(0xe6dfff7f) "") /* Unallocated space */ +#endif /* __LINUX_ARM_ARCH__ >= 6 */ + +#if __LINUX_ARM_ARCH__ >= 6 + TEST_GROUP("Signed multiplies") + + TEST_RRR( "smlad r0, r",0, HH1,", r",1, HH2,", r",2, VAL1,"") + TEST_RRR( "smlad r14, r",12,HH2,", r",10,HH1,", r",8, VAL2,"") + TEST_UNSUPPORTED(__inst_arm(0xe70f8a1c) " @ smlad pc, r12, r10, r8") + TEST_RRR( "smladx r0, r",0, HH1,", r",1, HH2,", r",2, VAL1,"") + TEST_RRR( "smladx r14, r",12,HH2,", r",10,HH1,", r",8, VAL2,"") + TEST_UNSUPPORTED(__inst_arm(0xe70f8a3c) " @ smladx pc, r12, r10, r8") + + TEST_RR( "smuad r0, r",0, HH1,", r",1, HH2,"") + TEST_RR( "smuad r14, r",12,HH2,", r",10,HH1,"") + TEST_UNSUPPORTED(__inst_arm(0xe70ffa1c) " @ smuad pc, r12, r10") + TEST_RR( "smuadx r0, r",0, HH1,", r",1, HH2,"") + TEST_RR( "smuadx r14, r",12,HH2,", r",10,HH1,"") + TEST_UNSUPPORTED(__inst_arm(0xe70ffa3c) " @ smuadx pc, r12, r10") + + TEST_RRR( "smlsd r0, r",0, HH1,", r",1, HH2,", r",2, VAL1,"") + TEST_RRR( "smlsd r14, r",12,HH2,", r",10,HH1,", r",8, VAL2,"") + TEST_UNSUPPORTED(__inst_arm(0xe70f8a5c) " @ smlsd pc, r12, r10, r8") + TEST_RRR( "smlsdx r0, r",0, HH1,", r",1, HH2,", r",2, VAL1,"") + TEST_RRR( "smlsdx r14, r",12,HH2,", r",10,HH1,", r",8, VAL2,"") + TEST_UNSUPPORTED(__inst_arm(0xe70f8a7c) " @ smlsdx pc, r12, r10, r8") + + TEST_RR( "smusd r0, r",0, HH1,", r",1, HH2,"") + TEST_RR( "smusd r14, r",12,HH2,", r",10,HH1,"") + TEST_UNSUPPORTED(__inst_arm(0xe70ffa5c) " @ smusd pc, r12, r10") + TEST_RR( "smusdx r0, r",0, HH1,", r",1, HH2,"") + TEST_RR( "smusdx r14, r",12,HH2,", r",10,HH1,"") + TEST_UNSUPPORTED(__inst_arm(0xe70ffa7c) " @ smusdx pc, r12, r10") + + TEST_RRRR( "smlald r",0, VAL1,", r",1, VAL2, ", r",0, HH1,", r",1, HH2) + TEST_RRRR( "smlald r",11,VAL2,", r",10,VAL1, ", r",9, HH2,", r",8, HH1) + TEST_UNSUPPORTED(__inst_arm(0xe74af819) " @ smlald pc, r10, r9, r8") + TEST_UNSUPPORTED(__inst_arm(0xe74fb819) " @ smlald r11, pc, r9, r8") + TEST_UNSUPPORTED(__inst_arm(0xe74ab81f) " @ smlald r11, r10, pc, r8") + TEST_UNSUPPORTED(__inst_arm(0xe74abf19) " @ smlald r11, r10, r9, pc") + + TEST_RRRR( "smlaldx r",0, VAL1,", r",1, VAL2, ", r",0, HH1,", r",1, HH2) + TEST_RRRR( "smlaldx r",11,VAL2,", r",10,VAL1, ", r",9, HH2,", r",8, HH1) + TEST_UNSUPPORTED(__inst_arm(0xe74af839) " @ smlaldx pc, r10, r9, r8") + TEST_UNSUPPORTED(__inst_arm(0xe74fb839) " @ smlaldx r11, pc, r9, r8") + + TEST_RRR( "smmla r0, r",0, VAL1,", r",1, VAL2,", r",2, VAL1,"") + TEST_RRR( "smmla r14, r",12,VAL2,", r",10,VAL1,", r",8, VAL2,"") + TEST_UNSUPPORTED(__inst_arm(0xe75f8a1c) " @ smmla pc, r12, r10, r8") + TEST_RRR( "smmlar r0, r",0, VAL1,", r",1, VAL2,", r",2, VAL1,"") + TEST_RRR( "smmlar r14, r",12,VAL2,", r",10,VAL1,", r",8, VAL2,"") + TEST_UNSUPPORTED(__inst_arm(0xe75f8a3c) " @ smmlar pc, r12, r10, r8") + + TEST_RR( "smmul r0, r",0, VAL1,", r",1, VAL2,"") + TEST_RR( "smmul r14, r",12,VAL2,", r",10,VAL1,"") + TEST_UNSUPPORTED(__inst_arm(0xe75ffa1c) " @ smmul pc, r12, r10") + TEST_RR( "smmulr r0, r",0, VAL1,", r",1, VAL2,"") + TEST_RR( "smmulr r14, r",12,VAL2,", r",10,VAL1,"") + TEST_UNSUPPORTED(__inst_arm(0xe75ffa3c) " @ smmulr pc, r12, r10") + + TEST_RRR( "smmls r0, r",0, VAL1,", r",1, VAL2,", r",2, VAL1,"") + TEST_RRR( "smmls r14, r",12,VAL2,", r",10,VAL1,", r",8, VAL2,"") + TEST_UNSUPPORTED(__inst_arm(0xe75f8adc) " @ smmls pc, r12, r10, r8") + TEST_RRR( "smmlsr r0, r",0, VAL1,", r",1, VAL2,", r",2, VAL1,"") + TEST_RRR( "smmlsr r14, r",12,VAL2,", r",10,VAL1,", r",8, VAL2,"") + TEST_UNSUPPORTED(__inst_arm(0xe75f8afc) " @ smmlsr pc, r12, r10, r8") + TEST_UNSUPPORTED(__inst_arm(0xe75e8aff) " @ smmlsr r14, pc, r10, r8") + TEST_UNSUPPORTED(__inst_arm(0xe75e8ffc) " @ smmlsr r14, r12, pc, r8") + TEST_UNSUPPORTED(__inst_arm(0xe75efafc) " @ smmlsr r14, r12, r10, pc") + + TEST_RR( "usad8 r0, r",0, VAL1,", r",1, VAL2,"") + TEST_RR( "usad8 r14, r",12,VAL2,", r",10,VAL1,"") + TEST_UNSUPPORTED(__inst_arm(0xe75ffa1c) " @ usad8 pc, r12, r10") + TEST_UNSUPPORTED(__inst_arm(0xe75efa1f) " @ usad8 r14, pc, r10") + TEST_UNSUPPORTED(__inst_arm(0xe75eff1c) " @ usad8 r14, r12, pc") + + TEST_RRR( "usada8 r0, r",0, VAL1,", r",1, VAL2,", r",2, VAL3,"") + TEST_RRR( "usada8 r14, r",12,VAL2,", r",10,VAL1,", r",8, VAL3,"") + TEST_UNSUPPORTED(__inst_arm(0xe78f8a1c) " @ usada8 pc, r12, r10, r8") + TEST_UNSUPPORTED(__inst_arm(0xe78e8a1f) " @ usada8 r14, pc, r10, r8") + TEST_UNSUPPORTED(__inst_arm(0xe78e8f1c) " @ usada8 r14, r12, pc, r8") +#endif /* __LINUX_ARM_ARCH__ >= 6 */ + +#if __LINUX_ARM_ARCH__ >= 7 + TEST_GROUP("Bit Field") + + TEST_R( "sbfx r0, r",0 , VAL1,", #0, #31") + TEST_R( "sbfxeq r14, r",12, VAL2,", #8, #16") + TEST_R( "sbfx r4, r",10, VAL1,", #16, #15") + TEST_UNSUPPORTED(__inst_arm(0xe7aff45c) " @ sbfx pc, r12, #8, #16") + + TEST_R( "ubfx r0, r",0 , VAL1,", #0, #31") + TEST_R( "ubfxcs r14, r",12, VAL2,", #8, #16") + TEST_R( "ubfx r4, r",10, VAL1,", #16, #15") + TEST_UNSUPPORTED(__inst_arm(0xe7eff45c) " @ ubfx pc, r12, #8, #16") + TEST_UNSUPPORTED(__inst_arm(0xe7efc45f) " @ ubfx r12, pc, #8, #16") + + TEST_R( "bfc r",0, VAL1,", #4, #20") + TEST_R( "bfcvs r",14,VAL2,", #4, #20") + TEST_R( "bfc r",7, VAL1,", #0, #31") + TEST_R( "bfc r",8, VAL2,", #0, #31") + TEST_UNSUPPORTED(__inst_arm(0xe7def01f) " @ bfc pc, #0, #31"); + + TEST_RR( "bfi r",0, VAL1,", r",0 , VAL2,", #0, #31") + TEST_RR( "bfipl r",12,VAL1,", r",14 , VAL2,", #4, #20") + TEST_UNSUPPORTED(__inst_arm(0xe7d7f21e) " @ bfi pc, r14, #4, #20") + + TEST_UNSUPPORTED(__inst_arm(0x07f000f0) "") /* Permanently UNDEFINED */ + TEST_UNSUPPORTED(__inst_arm(0x07ffffff) "") /* Permanently UNDEFINED */ +#endif /* __LINUX_ARM_ARCH__ >= 6 */ + + TEST_GROUP("Branch, branch with link, and block data transfer") + + TEST_P( "stmda r",0, 16*4,", {r0}") + TEST_P( "stmeqda r",4, 16*4,", {r0-r15}") + TEST_P( "stmneda r",8, 16*4,"!, {r8-r15}") + TEST_P( "stmda r",12,16*4,"!, {r1,r3,r5,r7,r8-r11,r14}") + TEST_P( "stmda r",13,0, "!, {pc}") + + TEST_P( "ldmda r",0, 16*4,", {r0}") + TEST_BF_P("ldmcsda r",4, 15*4,", {r0-r15}") + TEST_BF_P("ldmccda r",7, 15*4,"!, {r8-r15}") + TEST_P( "ldmda r",12,16*4,"!, {r1,r3,r5,r7,r8-r11,r14}") + TEST_BF_P("ldmda r",14,15*4,"!, {pc}") + + TEST_P( "stmia r",0, 16*4,", {r0}") + TEST_P( "stmmiia r",4, 16*4,", {r0-r15}") + TEST_P( "stmplia r",8, 16*4,"!, {r8-r15}") + TEST_P( "stmia r",12,16*4,"!, {r1,r3,r5,r7,r8-r11,r14}") + TEST_P( "stmia r",14,0, "!, {pc}") + + TEST_P( "ldmia r",0, 16*4,", {r0}") + TEST_BF_P("ldmvsia r",4, 0, ", {r0-r15}") + TEST_BF_P("ldmvcia r",7, 8*4, "!, {r8-r15}") + TEST_P( "ldmia r",12,16*4,"!, {r1,r3,r5,r7,r8-r11,r14}") + TEST_BF_P("ldmia r",14,15*4,"!, {pc}") + + TEST_P( "stmdb r",0, 16*4,", {r0}") + TEST_P( "stmhidb r",4, 16*4,", {r0-r15}") + TEST_P( "stmlsdb r",8, 16*4,"!, {r8-r15}") + TEST_P( "stmdb r",12,16*4,"!, {r1,r3,r5,r7,r8-r11,r14}") + TEST_P( "stmdb r",13,4, "!, {pc}") + + TEST_P( "ldmdb r",0, 16*4,", {r0}") + TEST_BF_P("ldmgedb r",4, 16*4,", {r0-r15}") + TEST_BF_P("ldmltdb r",7, 16*4,"!, {r8-r15}") + TEST_P( "ldmdb r",12,16*4,"!, {r1,r3,r5,r7,r8-r11,r14}") + TEST_BF_P("ldmdb r",14,16*4,"!, {pc}") + + TEST_P( "stmib r",0, 16*4,", {r0}") + TEST_P( "stmgtib r",4, 16*4,", {r0-r15}") + TEST_P( "stmleib r",8, 16*4,"!, {r8-r15}") + TEST_P( "stmib r",12,16*4,"!, {r1,r3,r5,r7,r8-r11,r14}") + TEST_P( "stmib r",13,-4, "!, {pc}") + + TEST_P( "ldmib r",0, 16*4,", {r0}") + TEST_BF_P("ldmeqib r",4, -4,", {r0-r15}") + TEST_BF_P("ldmneib r",7, 7*4,"!, {r8-r15}") + TEST_P( "ldmib r",12,16*4,"!, {r1,r3,r5,r7,r8-r11,r14}") + TEST_BF_P("ldmib r",14,14*4,"!, {pc}") + + TEST_P( "stmdb r",13,16*4,"!, {r3-r12,lr}") + TEST_P( "stmeqdb r",13,16*4,"!, {r3-r12}") + TEST_P( "stmnedb r",2, 16*4,", {r3-r12,lr}") + TEST_P( "stmdb r",13,16*4,"!, {r2-r12,lr}") + TEST_P( "stmdb r",0, 16*4,", {r0-r12}") + TEST_P( "stmdb r",0, 16*4,", {r0-r12,lr}") + + TEST_BF_P("ldmia r",13,5*4, "!, {r3-r12,pc}") + TEST_P( "ldmccia r",13,5*4, "!, {r3-r12}") + TEST_BF_P("ldmcsia r",2, 5*4, "!, {r3-r12,pc}") + TEST_BF_P("ldmia r",13,4*4, "!, {r2-r12,pc}") + TEST_P( "ldmia r",0, 16*4,", {r0-r12}") + TEST_P( "ldmia r",0, 16*4,", {r0-r12,lr}") + +#ifdef CONFIG_THUMB2_KERNEL + TEST_ARM_TO_THUMB_INTERWORK_P("ldmplia r",0,15*4,", {pc}") + TEST_ARM_TO_THUMB_INTERWORK_P("ldmmiia r",13,0,", {r0-r15}") +#endif + TEST_BF("b 2f") + TEST_BF("bl 2f") + TEST_BB("b 2b") + TEST_BB("bl 2b") + + TEST_BF("beq 2f") + TEST_BF("bleq 2f") + TEST_BB("bne 2b") + TEST_BB("blne 2b") + + TEST_BF("bgt 2f") + TEST_BF("blgt 2f") + TEST_BB("blt 2b") + TEST_BB("bllt 2b") + + TEST_GROUP("Supervisor Call, and coprocessor instructions") + + /* + * We can't really test these by executing them, so all + * we can do is check that probes are, or are not allowed. + * At the moment none are allowed... + */ +#define TEST_COPROCESSOR(code) TEST_UNSUPPORTED(code) + +#define COPROCESSOR_INSTRUCTIONS_ST_LD(two,cc) \ + TEST_COPROCESSOR("stc"two" 0, cr0, [r13, #4]") \ + TEST_COPROCESSOR("stc"two" 0, cr0, [r13, #-4]") \ + TEST_COPROCESSOR("stc"two" 0, cr0, [r13, #4]!") \ + TEST_COPROCESSOR("stc"two" 0, cr0, [r13, #-4]!") \ + TEST_COPROCESSOR("stc"two" 0, cr0, [r13], #4") \ + TEST_COPROCESSOR("stc"two" 0, cr0, [r13], #-4") \ + TEST_COPROCESSOR("stc"two" 0, cr0, [r13], {1}") \ + TEST_COPROCESSOR("stc"two"l 0, cr0, [r13, #4]") \ + TEST_COPROCESSOR("stc"two"l 0, cr0, [r13, #-4]") \ + TEST_COPROCESSOR("stc"two"l 0, cr0, [r13, #4]!") \ + TEST_COPROCESSOR("stc"two"l 0, cr0, [r13, #-4]!") \ + TEST_COPROCESSOR("stc"two"l 0, cr0, [r13], #4") \ + TEST_COPROCESSOR("stc"two"l 0, cr0, [r13], #-4") \ + TEST_COPROCESSOR("stc"two"l 0, cr0, [r13], {1}") \ + TEST_COPROCESSOR("ldc"two" 0, cr0, [r13, #4]") \ + TEST_COPROCESSOR("ldc"two" 0, cr0, [r13, #-4]") \ + TEST_COPROCESSOR("ldc"two" 0, cr0, [r13, #4]!") \ + TEST_COPROCESSOR("ldc"two" 0, cr0, [r13, #-4]!") \ + TEST_COPROCESSOR("ldc"two" 0, cr0, [r13], #4") \ + TEST_COPROCESSOR("ldc"two" 0, cr0, [r13], #-4") \ + TEST_COPROCESSOR("ldc"two" 0, cr0, [r13], {1}") \ + TEST_COPROCESSOR("ldc"two"l 0, cr0, [r13, #4]") \ + TEST_COPROCESSOR("ldc"two"l 0, cr0, [r13, #-4]") \ + TEST_COPROCESSOR("ldc"two"l 0, cr0, [r13, #4]!") \ + TEST_COPROCESSOR("ldc"two"l 0, cr0, [r13, #-4]!") \ + TEST_COPROCESSOR("ldc"two"l 0, cr0, [r13], #4") \ + TEST_COPROCESSOR("ldc"two"l 0, cr0, [r13], #-4") \ + TEST_COPROCESSOR("ldc"two"l 0, cr0, [r13], {1}") \ + \ + TEST_COPROCESSOR( "stc"two" 0, cr0, [r15, #4]") \ + TEST_COPROCESSOR( "stc"two" 0, cr0, [r15, #-4]") \ + TEST_UNSUPPORTED(__inst_arm(0x##cc##daf0001) " @ stc"two" 0, cr0, [r15, #4]!") \ + TEST_UNSUPPORTED(__inst_arm(0x##cc##d2f0001) " @ stc"two" 0, cr0, [r15, #-4]!") \ + TEST_UNSUPPORTED(__inst_arm(0x##cc##caf0001) " @ stc"two" 0, cr0, [r15], #4") \ + TEST_UNSUPPORTED(__inst_arm(0x##cc##c2f0001) " @ stc"two" 0, cr0, [r15], #-4") \ + TEST_COPROCESSOR( "stc"two" 0, cr0, [r15], {1}") \ + TEST_COPROCESSOR( "stc"two"l 0, cr0, [r15, #4]") \ + TEST_COPROCESSOR( "stc"two"l 0, cr0, [r15, #-4]") \ + TEST_UNSUPPORTED(__inst_arm(0x##cc##def0001) " @ stc"two"l 0, cr0, [r15, #4]!") \ + TEST_UNSUPPORTED(__inst_arm(0x##cc##d6f0001) " @ stc"two"l 0, cr0, [r15, #-4]!") \ + TEST_UNSUPPORTED(__inst_arm(0x##cc##cef0001) " @ stc"two"l 0, cr0, [r15], #4") \ + TEST_UNSUPPORTED(__inst_arm(0x##cc##c6f0001) " @ stc"two"l 0, cr0, [r15], #-4") \ + TEST_COPROCESSOR( "stc"two"l 0, cr0, [r15], {1}") \ + TEST_COPROCESSOR( "ldc"two" 0, cr0, [r15, #4]") \ + TEST_COPROCESSOR( "ldc"two" 0, cr0, [r15, #-4]") \ + TEST_UNSUPPORTED(__inst_arm(0x##cc##dbf0001) " @ ldc"two" 0, cr0, [r15, #4]!") \ + TEST_UNSUPPORTED(__inst_arm(0x##cc##d3f0001) " @ ldc"two" 0, cr0, [r15, #-4]!") \ + TEST_UNSUPPORTED(__inst_arm(0x##cc##cbf0001) " @ ldc"two" 0, cr0, [r15], #4") \ + TEST_UNSUPPORTED(__inst_arm(0x##cc##c3f0001) " @ ldc"two" 0, cr0, [r15], #-4") \ + TEST_COPROCESSOR( "ldc"two" 0, cr0, [r15], {1}") \ + TEST_COPROCESSOR( "ldc"two"l 0, cr0, [r15, #4]") \ + TEST_COPROCESSOR( "ldc"two"l 0, cr0, [r15, #-4]") \ + TEST_UNSUPPORTED(__inst_arm(0x##cc##dff0001) " @ ldc"two"l 0, cr0, [r15, #4]!") \ + TEST_UNSUPPORTED(__inst_arm(0x##cc##d7f0001) " @ ldc"two"l 0, cr0, [r15, #-4]!") \ + TEST_UNSUPPORTED(__inst_arm(0x##cc##cff0001) " @ ldc"two"l 0, cr0, [r15], #4") \ + TEST_UNSUPPORTED(__inst_arm(0x##cc##c7f0001) " @ ldc"two"l 0, cr0, [r15], #-4") \ + TEST_COPROCESSOR( "ldc"two"l 0, cr0, [r15], {1}") + +#define COPROCESSOR_INSTRUCTIONS_MC_MR(two,cc) \ + \ + TEST_COPROCESSOR( "mcrr"two" 0, 15, r0, r14, cr0") \ + TEST_COPROCESSOR( "mcrr"two" 15, 0, r14, r0, cr15") \ + TEST_UNSUPPORTED(__inst_arm(0x##cc##c4f00f0) " @ mcrr"two" 0, 15, r0, r15, cr0") \ + TEST_UNSUPPORTED(__inst_arm(0x##cc##c40ff0f) " @ mcrr"two" 15, 0, r15, r0, cr15") \ + TEST_COPROCESSOR( "mrrc"two" 0, 15, r0, r14, cr0") \ + TEST_COPROCESSOR( "mrrc"two" 15, 0, r14, r0, cr15") \ + TEST_UNSUPPORTED(__inst_arm(0x##cc##c5f00f0) " @ mrrc"two" 0, 15, r0, r15, cr0") \ + TEST_UNSUPPORTED(__inst_arm(0x##cc##c50ff0f) " @ mrrc"two" 15, 0, r15, r0, cr15") \ + TEST_COPROCESSOR( "cdp"two" 15, 15, cr15, cr15, cr15, 7") \ + TEST_COPROCESSOR( "cdp"two" 0, 0, cr0, cr0, cr0, 0") \ + TEST_COPROCESSOR( "mcr"two" 15, 7, r15, cr15, cr15, 7") \ + TEST_COPROCESSOR( "mcr"two" 0, 0, r0, cr0, cr0, 0") \ + TEST_COPROCESSOR( "mrc"two" 15, 7, r15, cr15, cr15, 7") \ + TEST_COPROCESSOR( "mrc"two" 0, 0, r0, cr0, cr0, 0") + + COPROCESSOR_INSTRUCTIONS_ST_LD("",e) +#if __LINUX_ARM_ARCH__ >= 5 + COPROCESSOR_INSTRUCTIONS_MC_MR("",e) +#endif + TEST_UNSUPPORTED("svc 0") + TEST_UNSUPPORTED("svc 0xffffff") + + TEST_UNSUPPORTED("svc 0") + + TEST_GROUP("Unconditional instruction") + +#if __LINUX_ARM_ARCH__ >= 6 + TEST_UNSUPPORTED("srsda sp, 0x13") + TEST_UNSUPPORTED("srsdb sp, 0x13") + TEST_UNSUPPORTED("srsia sp, 0x13") + TEST_UNSUPPORTED("srsib sp, 0x13") + TEST_UNSUPPORTED("srsda sp!, 0x13") + TEST_UNSUPPORTED("srsdb sp!, 0x13") + TEST_UNSUPPORTED("srsia sp!, 0x13") + TEST_UNSUPPORTED("srsib sp!, 0x13") + + TEST_UNSUPPORTED("rfeda sp") + TEST_UNSUPPORTED("rfedb sp") + TEST_UNSUPPORTED("rfeia sp") + TEST_UNSUPPORTED("rfeib sp") + TEST_UNSUPPORTED("rfeda sp!") + TEST_UNSUPPORTED("rfedb sp!") + TEST_UNSUPPORTED("rfeia sp!") + TEST_UNSUPPORTED("rfeib sp!") + TEST_UNSUPPORTED(__inst_arm(0xf81d0a00) " @ rfeda pc") + TEST_UNSUPPORTED(__inst_arm(0xf91d0a00) " @ rfedb pc") + TEST_UNSUPPORTED(__inst_arm(0xf89d0a00) " @ rfeia pc") + TEST_UNSUPPORTED(__inst_arm(0xf99d0a00) " @ rfeib pc") + TEST_UNSUPPORTED(__inst_arm(0xf83d0a00) " @ rfeda pc!") + TEST_UNSUPPORTED(__inst_arm(0xf93d0a00) " @ rfedb pc!") + TEST_UNSUPPORTED(__inst_arm(0xf8bd0a00) " @ rfeia pc!") + TEST_UNSUPPORTED(__inst_arm(0xf9bd0a00) " @ rfeib pc!") +#endif /* __LINUX_ARM_ARCH__ >= 6 */ + +#if __LINUX_ARM_ARCH__ >= 6 + TEST_X( "blx __dummy_thumb_subroutine_even", + ".thumb \n\t" + ".space 4 \n\t" + ".type __dummy_thumb_subroutine_even, %%function \n\t" + "__dummy_thumb_subroutine_even: \n\t" + "mov r0, pc \n\t" + "bx lr \n\t" + ".arm \n\t" + ) + TEST( "blx __dummy_thumb_subroutine_even") + + TEST_X( "blx __dummy_thumb_subroutine_odd", + ".thumb \n\t" + ".space 2 \n\t" + ".type __dummy_thumb_subroutine_odd, %%function \n\t" + "__dummy_thumb_subroutine_odd: \n\t" + "mov r0, pc \n\t" + "bx lr \n\t" + ".arm \n\t" + ) + TEST( "blx __dummy_thumb_subroutine_odd") +#endif /* __LINUX_ARM_ARCH__ >= 6 */ + +#if __LINUX_ARM_ARCH__ >= 5 + COPROCESSOR_INSTRUCTIONS_ST_LD("2",f) +#endif +#if __LINUX_ARM_ARCH__ >= 6 + COPROCESSOR_INSTRUCTIONS_MC_MR("2",f) +#endif + + TEST_GROUP("Miscellaneous instructions, memory hints, and Advanced SIMD instructions") + +#if __LINUX_ARM_ARCH__ >= 6 + TEST_UNSUPPORTED("cps 0x13") + TEST_UNSUPPORTED("cpsie i") + TEST_UNSUPPORTED("cpsid i") + TEST_UNSUPPORTED("cpsie i,0x13") + TEST_UNSUPPORTED("cpsid i,0x13") + TEST_UNSUPPORTED("setend le") + TEST_UNSUPPORTED("setend be") +#endif + +#if __LINUX_ARM_ARCH__ >= 7 + TEST_P("pli [r",0,0b,", #16]") + TEST( "pli [pc, #0]") + TEST_RR("pli [r",12,0b,", r",0, 16,"]") + TEST_RR("pli [r",0, 0b,", -r",12,16,", lsl #4]") +#endif + +#if __LINUX_ARM_ARCH__ >= 5 + TEST_P("pld [r",0,32,", #-16]") + TEST( "pld [pc, #0]") + TEST_PR("pld [r",7, 24, ", r",0, 16,"]") + TEST_PR("pld [r",8, 24, ", -r",12,16,", lsl #4]") +#endif + +#if __LINUX_ARM_ARCH__ >= 7 + TEST_SUPPORTED( __inst_arm(0xf590f000) " @ pldw [r0, #0]") + TEST_SUPPORTED( __inst_arm(0xf797f000) " @ pldw [r7, r0]") + TEST_SUPPORTED( __inst_arm(0xf798f18c) " @ pldw [r8, r12, lsl #3]"); +#endif + +#if __LINUX_ARM_ARCH__ >= 7 + TEST_UNSUPPORTED("clrex") + TEST_UNSUPPORTED("dsb") + TEST_UNSUPPORTED("dmb") + TEST_UNSUPPORTED("isb") +#endif + + verbose("\n"); +} + diff --git a/arch/arm/kernel/kprobes-test-thumb.c b/arch/arm/kernel/kprobes-test-thumb.c new file mode 100644 index 00000000000..844dd10d859 --- /dev/null +++ b/arch/arm/kernel/kprobes-test-thumb.c @@ -0,0 +1,1188 @@ +/* + * arch/arm/kernel/kprobes-test-thumb.c + * + * Copyright (C) 2011 Jon Medhurst <tixy@yxit.co.uk>. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <asm/opcodes.h> + +#include "kprobes-test.h" + + +#define TEST_ISA "16" + +#define DONT_TEST_IN_ITBLOCK(tests) \ + kprobe_test_flags |= TEST_FLAG_NO_ITBLOCK; \ + tests \ + kprobe_test_flags &= ~TEST_FLAG_NO_ITBLOCK; + +#define CONDITION_INSTRUCTIONS(cc_pos, tests) \ + kprobe_test_cc_position = cc_pos; \ + DONT_TEST_IN_ITBLOCK(tests) \ + kprobe_test_cc_position = 0; + +#define TEST_ITBLOCK(code) \ + kprobe_test_flags |= TEST_FLAG_FULL_ITBLOCK; \ + TESTCASE_START(code) \ + TEST_ARG_END("") \ + "50: nop \n\t" \ + "1: "code" \n\t" \ + " mov r1, #0x11 \n\t" \ + " mov r2, #0x22 \n\t" \ + " mov r3, #0x33 \n\t" \ + "2: nop \n\t" \ + TESTCASE_END \ + kprobe_test_flags &= ~TEST_FLAG_FULL_ITBLOCK; + +#define TEST_THUMB_TO_ARM_INTERWORK_P(code1, reg, val, code2) \ + TESTCASE_START(code1 #reg code2) \ + TEST_ARG_PTR(reg, val) \ + TEST_ARG_REG(14, 99f+1) \ + TEST_ARG_MEM(15, 3f) \ + TEST_ARG_END("") \ + " nop \n\t" /* To align 1f */ \ + "50: nop \n\t" \ + "1: "code1 #reg code2" \n\t" \ + " bx lr \n\t" \ + ".arm \n\t" \ + "3: adr lr, 2f+1 \n\t" \ + " bx lr \n\t" \ + ".thumb \n\t" \ + "2: nop \n\t" \ + TESTCASE_END + + +void kprobe_thumb16_test_cases(void) +{ + kprobe_test_flags = TEST_FLAG_NARROW_INSTR; + + TEST_GROUP("Shift (immediate), add, subtract, move, and compare") + + TEST_R( "lsls r7, r",0,VAL1,", #5") + TEST_R( "lsls r0, r",7,VAL2,", #11") + TEST_R( "lsrs r7, r",0,VAL1,", #5") + TEST_R( "lsrs r0, r",7,VAL2,", #11") + TEST_R( "asrs r7, r",0,VAL1,", #5") + TEST_R( "asrs r0, r",7,VAL2,", #11") + TEST_RR( "adds r2, r",0,VAL1,", r",7,VAL2,"") + TEST_RR( "adds r5, r",7,VAL2,", r",0,VAL2,"") + TEST_RR( "subs r2, r",0,VAL1,", r",7,VAL2,"") + TEST_RR( "subs r5, r",7,VAL2,", r",0,VAL2,"") + TEST_R( "adds r7, r",0,VAL1,", #5") + TEST_R( "adds r0, r",7,VAL2,", #2") + TEST_R( "subs r7, r",0,VAL1,", #5") + TEST_R( "subs r0, r",7,VAL2,", #2") + TEST( "movs.n r0, #0x5f") + TEST( "movs.n r7, #0xa0") + TEST_R( "cmp.n r",0,0x5e, ", #0x5f") + TEST_R( "cmp.n r",5,0x15f,", #0x5f") + TEST_R( "cmp.n r",7,0xa0, ", #0xa0") + TEST_R( "adds.n r",0,VAL1,", #0x5f") + TEST_R( "adds.n r",7,VAL2,", #0xa0") + TEST_R( "subs.n r",0,VAL1,", #0x5f") + TEST_R( "subs.n r",7,VAL2,", #0xa0") + + TEST_GROUP("16-bit Thumb data-processing instructions") + +#define DATA_PROCESSING16(op,val) \ + TEST_RR( op" r",0,VAL1,", r",7,val,"") \ + TEST_RR( op" r",7,VAL2,", r",0,val,"") + + DATA_PROCESSING16("ands",0xf00f00ff) + DATA_PROCESSING16("eors",0xf00f00ff) + DATA_PROCESSING16("lsls",11) + DATA_PROCESSING16("lsrs",11) + DATA_PROCESSING16("asrs",11) + DATA_PROCESSING16("adcs",VAL2) + DATA_PROCESSING16("sbcs",VAL2) + DATA_PROCESSING16("rors",11) + DATA_PROCESSING16("tst",0xf00f00ff) + TEST_R("rsbs r",0,VAL1,", #0") + TEST_R("rsbs r",7,VAL2,", #0") + DATA_PROCESSING16("cmp",0xf00f00ff) + DATA_PROCESSING16("cmn",0xf00f00ff) + DATA_PROCESSING16("orrs",0xf00f00ff) + DATA_PROCESSING16("muls",VAL2) + DATA_PROCESSING16("bics",0xf00f00ff) + DATA_PROCESSING16("mvns",VAL2) + + TEST_GROUP("Special data instructions and branch and exchange") + + TEST_RR( "add r",0, VAL1,", r",7,VAL2,"") + TEST_RR( "add r",3, VAL2,", r",8,VAL3,"") + TEST_RR( "add r",8, VAL3,", r",0,VAL1,"") + TEST_R( "add sp" ", r",8,-8, "") + TEST_R( "add r",14,VAL1,", pc") + TEST_BF_R("add pc" ", r",0,2f-1f-8,"") + TEST_UNSUPPORTED(__inst_thumb16(0x44ff) " @ add pc, pc") + + TEST_RR( "cmp r",3,VAL1,", r",8,VAL2,"") + TEST_RR( "cmp r",8,VAL2,", r",0,VAL1,"") + TEST_R( "cmp sp" ", r",8,-8, "") + + TEST_R( "mov r0, r",7,VAL2,"") + TEST_R( "mov r3, r",8,VAL3,"") + TEST_R( "mov r8, r",0,VAL1,"") + TEST_P( "mov sp, r",8,-8, "") + TEST( "mov lr, pc") + TEST_BF_R("mov pc, r",0,2f, "") + + TEST_BF_R("bx r",0, 2f+1,"") + TEST_BF_R("bx r",14,2f+1,"") + TESTCASE_START("bx pc") + TEST_ARG_REG(14, 99f+1) + TEST_ARG_END("") + " nop \n\t" /* To align the bx pc*/ + "50: nop \n\t" + "1: bx pc \n\t" + " bx lr \n\t" + ".arm \n\t" + " adr lr, 2f+1 \n\t" + " bx lr \n\t" + ".thumb \n\t" + "2: nop \n\t" + TESTCASE_END + + TEST_BF_R("blx r",0, 2f+1,"") + TEST_BB_R("blx r",14,2f+1,"") + TEST_UNSUPPORTED(__inst_thumb16(0x47f8) " @ blx pc") + + TEST_GROUP("Load from Literal Pool") + + TEST_X( "ldr r0, 3f", + ".align \n\t" + "3: .word "__stringify(VAL1)) + TEST_X( "ldr r7, 3f", + ".space 128 \n\t" + ".align \n\t" + "3: .word "__stringify(VAL2)) + + TEST_GROUP("16-bit Thumb Load/store instructions") + + TEST_RPR("str r",0, VAL1,", [r",1, 24,", r",2, 48,"]") + TEST_RPR("str r",7, VAL2,", [r",6, 24,", r",5, 48,"]") + TEST_RPR("strh r",0, VAL1,", [r",1, 24,", r",2, 48,"]") + TEST_RPR("strh r",7, VAL2,", [r",6, 24,", r",5, 48,"]") + TEST_RPR("strb r",0, VAL1,", [r",1, 24,", r",2, 48,"]") + TEST_RPR("strb r",7, VAL2,", [r",6, 24,", r",5, 48,"]") + TEST_PR( "ldrsb r0, [r",1, 24,", r",2, 48,"]") + TEST_PR( "ldrsb r7, [r",6, 24,", r",5, 50,"]") + TEST_PR( "ldr r0, [r",1, 24,", r",2, 48,"]") + TEST_PR( "ldr r7, [r",6, 24,", r",5, 48,"]") + TEST_PR( "ldrh r0, [r",1, 24,", r",2, 48,"]") + TEST_PR( "ldrh r7, [r",6, 24,", r",5, 50,"]") + TEST_PR( "ldrb r0, [r",1, 24,", r",2, 48,"]") + TEST_PR( "ldrb r7, [r",6, 24,", r",5, 50,"]") + TEST_PR( "ldrsh r0, [r",1, 24,", r",2, 48,"]") + TEST_PR( "ldrsh r7, [r",6, 24,", r",5, 50,"]") + + TEST_RP("str r",0, VAL1,", [r",1, 24,", #120]") + TEST_RP("str r",7, VAL2,", [r",6, 24,", #120]") + TEST_P( "ldr r0, [r",1, 24,", #120]") + TEST_P( "ldr r7, [r",6, 24,", #120]") + TEST_RP("strb r",0, VAL1,", [r",1, 24,", #30]") + TEST_RP("strb r",7, VAL2,", [r",6, 24,", #30]") + TEST_P( "ldrb r0, [r",1, 24,", #30]") + TEST_P( "ldrb r7, [r",6, 24,", #30]") + TEST_RP("strh r",0, VAL1,", [r",1, 24,", #60]") + TEST_RP("strh r",7, VAL2,", [r",6, 24,", #60]") + TEST_P( "ldrh r0, [r",1, 24,", #60]") + TEST_P( "ldrh r7, [r",6, 24,", #60]") + + TEST_R( "str r",0, VAL1,", [sp, #0]") + TEST_R( "str r",7, VAL2,", [sp, #160]") + TEST( "ldr r0, [sp, #0]") + TEST( "ldr r7, [sp, #160]") + + TEST_RP("str r",0, VAL1,", [r",0, 24,"]") + TEST_P( "ldr r0, [r",0, 24,"]") + + TEST_GROUP("Generate PC-/SP-relative address") + + TEST("add r0, pc, #4") + TEST("add r7, pc, #1020") + TEST("add r0, sp, #4") + TEST("add r7, sp, #1020") + + TEST_GROUP("Miscellaneous 16-bit instructions") + + TEST_UNSUPPORTED( "cpsie i") + TEST_UNSUPPORTED( "cpsid i") + TEST_UNSUPPORTED( "setend le") + TEST_UNSUPPORTED( "setend be") + + TEST("add sp, #"__stringify(TEST_MEMORY_SIZE)) /* Assumes TEST_MEMORY_SIZE < 0x400 */ + TEST("sub sp, #0x7f*4") + +DONT_TEST_IN_ITBLOCK( + TEST_BF_R( "cbnz r",0,0, ", 2f") + TEST_BF_R( "cbz r",2,-1,", 2f") + TEST_BF_RX( "cbnz r",4,1, ", 2f", SPACE_0x20) + TEST_BF_RX( "cbz r",7,0, ", 2f", SPACE_0x40) +) + TEST_R("sxth r0, r",7, HH1,"") + TEST_R("sxth r7, r",0, HH2,"") + TEST_R("sxtb r0, r",7, HH1,"") + TEST_R("sxtb r7, r",0, HH2,"") + TEST_R("uxth r0, r",7, HH1,"") + TEST_R("uxth r7, r",0, HH2,"") + TEST_R("uxtb r0, r",7, HH1,"") + TEST_R("uxtb r7, r",0, HH2,"") + TEST_R("rev r0, r",7, VAL1,"") + TEST_R("rev r7, r",0, VAL2,"") + TEST_R("rev16 r0, r",7, VAL1,"") + TEST_R("rev16 r7, r",0, VAL2,"") + TEST_UNSUPPORTED(__inst_thumb16(0xba80) "") + TEST_UNSUPPORTED(__inst_thumb16(0xbabf) "") + TEST_R("revsh r0, r",7, VAL1,"") + TEST_R("revsh r7, r",0, VAL2,"") + +#define TEST_POPPC(code, offset) \ + TESTCASE_START(code) \ + TEST_ARG_PTR(13, offset) \ + TEST_ARG_END("") \ + TEST_BRANCH_F(code) \ + TESTCASE_END + + TEST("push {r0}") + TEST("push {r7}") + TEST("push {r14}") + TEST("push {r0-r7,r14}") + TEST("push {r0,r2,r4,r6,r14}") + TEST("push {r1,r3,r5,r7}") + TEST("pop {r0}") + TEST("pop {r7}") + TEST("pop {r0,r2,r4,r6}") + TEST_POPPC("pop {pc}",15*4) + TEST_POPPC("pop {r0-r7,pc}",7*4) + TEST_POPPC("pop {r1,r3,r5,r7,pc}",11*4) + TEST_THUMB_TO_ARM_INTERWORK_P("pop {pc} @ ",13,15*4,"") + TEST_THUMB_TO_ARM_INTERWORK_P("pop {r0-r7,pc} @ ",13,7*4,"") + + TEST_UNSUPPORTED("bkpt.n 0") + TEST_UNSUPPORTED("bkpt.n 255") + + TEST_SUPPORTED("yield") + TEST("sev") + TEST("nop") + TEST("wfi") + TEST_SUPPORTED("wfe") + TEST_UNSUPPORTED(__inst_thumb16(0xbf50) "") /* Unassigned hints */ + TEST_UNSUPPORTED(__inst_thumb16(0xbff0) "") /* Unassigned hints */ + +#define TEST_IT(code, code2) \ + TESTCASE_START(code) \ + TEST_ARG_END("") \ + "50: nop \n\t" \ + "1: "code" \n\t" \ + " "code2" \n\t" \ + "2: nop \n\t" \ + TESTCASE_END + +DONT_TEST_IN_ITBLOCK( + TEST_IT("it eq","moveq r0,#0") + TEST_IT("it vc","movvc r0,#0") + TEST_IT("it le","movle r0,#0") + TEST_IT("ite eq","moveq r0,#0\n\t movne r1,#1") + TEST_IT("itet vc","movvc r0,#0\n\t movvs r1,#1\n\t movvc r2,#2") + TEST_IT("itete le","movle r0,#0\n\t movgt r1,#1\n\t movle r2,#2\n\t movgt r3,#3") + TEST_IT("itttt le","movle r0,#0\n\t movle r1,#1\n\t movle r2,#2\n\t movle r3,#3") + TEST_IT("iteee le","movle r0,#0\n\t movgt r1,#1\n\t movgt r2,#2\n\t movgt r3,#3") +) + + TEST_GROUP("Load and store multiple") + + TEST_P("ldmia r",4, 16*4,"!, {r0,r7}") + TEST_P("ldmia r",7, 16*4,"!, {r0-r6}") + TEST_P("stmia r",4, 16*4,"!, {r0,r7}") + TEST_P("stmia r",0, 16*4,"!, {r0-r7}") + + TEST_GROUP("Conditional branch and Supervisor Call instructions") + +CONDITION_INSTRUCTIONS(8, + TEST_BF("beq 2f") + TEST_BB("bne 2b") + TEST_BF("bgt 2f") + TEST_BB("blt 2b") +) + TEST_UNSUPPORTED(__inst_thumb16(0xde00) "") + TEST_UNSUPPORTED(__inst_thumb16(0xdeff) "") + TEST_UNSUPPORTED("svc #0x00") + TEST_UNSUPPORTED("svc #0xff") + + TEST_GROUP("Unconditional branch") + + TEST_BF( "b 2f") + TEST_BB( "b 2b") + TEST_BF_X("b 2f", SPACE_0x400) + TEST_BB_X("b 2b", SPACE_0x400) + + TEST_GROUP("Testing instructions in IT blocks") + + TEST_ITBLOCK("subs.n r0, r0") + + verbose("\n"); +} + + +void kprobe_thumb32_test_cases(void) +{ + kprobe_test_flags = 0; + + TEST_GROUP("Load/store multiple") + + TEST_UNSUPPORTED("rfedb sp") + TEST_UNSUPPORTED("rfeia sp") + TEST_UNSUPPORTED("rfedb sp!") + TEST_UNSUPPORTED("rfeia sp!") + + TEST_P( "stmia r",0, 16*4,", {r0,r8}") + TEST_P( "stmia r",4, 16*4,", {r0-r12,r14}") + TEST_P( "stmia r",7, 16*4,"!, {r8-r12,r14}") + TEST_P( "stmia r",12,16*4,"!, {r1,r3,r5,r7,r8-r11,r14}") + + TEST_P( "ldmia r",0, 16*4,", {r0,r8}") + TEST_P( "ldmia r",4, 0, ", {r0-r12,r14}") + TEST_BF_P("ldmia r",5, 8*4, "!, {r6-r12,r15}") + TEST_P( "ldmia r",12,16*4,"!, {r1,r3,r5,r7,r8-r11,r14}") + TEST_BF_P("ldmia r",14,14*4,"!, {r4,pc}") + + TEST_P( "stmdb r",0, 16*4,", {r0,r8}") + TEST_P( "stmdb r",4, 16*4,", {r0-r12,r14}") + TEST_P( "stmdb r",5, 16*4,"!, {r8-r12,r14}") + TEST_P( "stmdb r",12,16*4,"!, {r1,r3,r5,r7,r8-r11,r14}") + + TEST_P( "ldmdb r",0, 16*4,", {r0,r8}") + TEST_P( "ldmdb r",4, 16*4,", {r0-r12,r14}") + TEST_BF_P("ldmdb r",5, 16*4,"!, {r6-r12,r15}") + TEST_P( "ldmdb r",12,16*4,"!, {r1,r3,r5,r7,r8-r11,r14}") + TEST_BF_P("ldmdb r",14,16*4,"!, {r4,pc}") + + TEST_P( "stmdb r",13,16*4,"!, {r3-r12,lr}") + TEST_P( "stmdb r",13,16*4,"!, {r3-r12}") + TEST_P( "stmdb r",2, 16*4,", {r3-r12,lr}") + TEST_P( "stmdb r",13,16*4,"!, {r2-r12,lr}") + TEST_P( "stmdb r",0, 16*4,", {r0-r12}") + TEST_P( "stmdb r",0, 16*4,", {r0-r12,lr}") + + TEST_BF_P("ldmia r",13,5*4, "!, {r3-r12,pc}") + TEST_P( "ldmia r",13,5*4, "!, {r3-r12}") + TEST_BF_P("ldmia r",2, 5*4, "!, {r3-r12,pc}") + TEST_BF_P("ldmia r",13,4*4, "!, {r2-r12,pc}") + TEST_P( "ldmia r",0, 16*4,", {r0-r12}") + TEST_P( "ldmia r",0, 16*4,", {r0-r12,lr}") + + TEST_THUMB_TO_ARM_INTERWORK_P("ldmia r",0,14*4,", {r12,pc}") + TEST_THUMB_TO_ARM_INTERWORK_P("ldmia r",13,2*4,", {r0-r12,pc}") + + TEST_UNSUPPORTED(__inst_thumb32(0xe88f0101) " @ stmia pc, {r0,r8}") + TEST_UNSUPPORTED(__inst_thumb32(0xe92f5f00) " @ stmdb pc!, {r8-r12,r14}") + TEST_UNSUPPORTED(__inst_thumb32(0xe8bdc000) " @ ldmia r13!, {r14,pc}") + TEST_UNSUPPORTED(__inst_thumb32(0xe93ec000) " @ ldmdb r14!, {r14,pc}") + TEST_UNSUPPORTED(__inst_thumb32(0xe8a73f00) " @ stmia r7!, {r8-r12,sp}") + TEST_UNSUPPORTED(__inst_thumb32(0xe8a79f00) " @ stmia r7!, {r8-r12,pc}") + TEST_UNSUPPORTED(__inst_thumb32(0xe93e2010) " @ ldmdb r14!, {r4,sp}") + + TEST_GROUP("Load/store double or exclusive, table branch") + + TEST_P( "ldrd r0, r1, [r",1, 24,", #-16]") + TEST( "ldrd r12, r14, [sp, #16]") + TEST_P( "ldrd r1, r0, [r",7, 24,", #-16]!") + TEST( "ldrd r14, r12, [sp, #16]!") + TEST_P( "ldrd r1, r0, [r",7, 24,"], #16") + TEST( "ldrd r7, r8, [sp], #-16") + + TEST_X( "ldrd r12, r14, 3f", + ".align 3 \n\t" + "3: .word "__stringify(VAL1)" \n\t" + " .word "__stringify(VAL2)) + + TEST_UNSUPPORTED(__inst_thumb32(0xe9ffec04) " @ ldrd r14, r12, [pc, #16]!") + TEST_UNSUPPORTED(__inst_thumb32(0xe8ffec04) " @ ldrd r14, r12, [pc], #16") + TEST_UNSUPPORTED(__inst_thumb32(0xe9d4d800) " @ ldrd sp, r8, [r4]") + TEST_UNSUPPORTED(__inst_thumb32(0xe9d4f800) " @ ldrd pc, r8, [r4]") + TEST_UNSUPPORTED(__inst_thumb32(0xe9d47d00) " @ ldrd r7, sp, [r4]") + TEST_UNSUPPORTED(__inst_thumb32(0xe9d47f00) " @ ldrd r7, pc, [r4]") + + TEST_RRP("strd r",0, VAL1,", r",1, VAL2,", [r",1, 24,", #-16]") + TEST_RR( "strd r",12,VAL2,", r",14,VAL1,", [sp, #16]") + TEST_RRP("strd r",1, VAL1,", r",0, VAL2,", [r",7, 24,", #-16]!") + TEST_RR( "strd r",14,VAL2,", r",12,VAL1,", [sp, #16]!") + TEST_RRP("strd r",1, VAL1,", r",0, VAL2,", [r",7, 24,"], #16") + TEST_RR( "strd r",7, VAL2,", r",8, VAL1,", [sp], #-16") + TEST_UNSUPPORTED(__inst_thumb32(0xe9efec04) " @ strd r14, r12, [pc, #16]!") + TEST_UNSUPPORTED(__inst_thumb32(0xe8efec04) " @ strd r14, r12, [pc], #16") + + TEST_RX("tbb [pc, r",0, (9f-(1f+4)),"]", + "9: \n\t" + ".byte (2f-1b-4)>>1 \n\t" + ".byte (3f-1b-4)>>1 \n\t" + "3: mvn r0, r0 \n\t" + "2: nop \n\t") + + TEST_RX("tbb [pc, r",4, (9f-(1f+4)+1),"]", + "9: \n\t" + ".byte (2f-1b-4)>>1 \n\t" + ".byte (3f-1b-4)>>1 \n\t" + "3: mvn r0, r0 \n\t" + "2: nop \n\t") + + TEST_RRX("tbb [r",1,9f,", r",2,0,"]", + "9: \n\t" + ".byte (2f-1b-4)>>1 \n\t" + ".byte (3f-1b-4)>>1 \n\t" + "3: mvn r0, r0 \n\t" + "2: nop \n\t") + + TEST_RX("tbh [pc, r",7, (9f-(1f+4))>>1,"]", + "9: \n\t" + ".short (2f-1b-4)>>1 \n\t" + ".short (3f-1b-4)>>1 \n\t" + "3: mvn r0, r0 \n\t" + "2: nop \n\t") + + TEST_RX("tbh [pc, r",12, ((9f-(1f+4))>>1)+1,"]", + "9: \n\t" + ".short (2f-1b-4)>>1 \n\t" + ".short (3f-1b-4)>>1 \n\t" + "3: mvn r0, r0 \n\t" + "2: nop \n\t") + + TEST_RRX("tbh [r",1,9f, ", r",14,1,"]", + "9: \n\t" + ".short (2f-1b-4)>>1 \n\t" + ".short (3f-1b-4)>>1 \n\t" + "3: mvn r0, r0 \n\t" + "2: nop \n\t") + + TEST_UNSUPPORTED(__inst_thumb32(0xe8d1f01f) " @ tbh [r1, pc]") + TEST_UNSUPPORTED(__inst_thumb32(0xe8d1f01d) " @ tbh [r1, sp]") + TEST_UNSUPPORTED(__inst_thumb32(0xe8ddf012) " @ tbh [sp, r2]") + + TEST_UNSUPPORTED("strexb r0, r1, [r2]") + TEST_UNSUPPORTED("strexh r0, r1, [r2]") + TEST_UNSUPPORTED("strexd r0, r1, [r2]") + TEST_UNSUPPORTED("ldrexb r0, [r1]") + TEST_UNSUPPORTED("ldrexh r0, [r1]") + TEST_UNSUPPORTED("ldrexd r0, [r1]") + + TEST_GROUP("Data-processing (shifted register) and (modified immediate)") + +#define _DATA_PROCESSING32_DNM(op,s,val) \ + TEST_RR(op s".w r0, r",1, VAL1,", r",2, val, "") \ + TEST_RR(op s" r1, r",1, VAL1,", r",2, val, ", lsl #3") \ + TEST_RR(op s" r2, r",3, VAL1,", r",2, val, ", lsr #4") \ + TEST_RR(op s" r3, r",3, VAL1,", r",2, val, ", asr #5") \ + TEST_RR(op s" r4, r",5, VAL1,", r",2, N(val),", asr #6") \ + TEST_RR(op s" r5, r",5, VAL1,", r",2, val, ", ror #7") \ + TEST_RR(op s" r8, r",9, VAL1,", r",10,val, ", rrx") \ + TEST_R( op s" r0, r",11,VAL1,", #0x00010001") \ + TEST_R( op s" r11, r",0, VAL1,", #0xf5000000") \ + TEST_R( op s" r7, r",8, VAL2,", #0x000af000") + +#define DATA_PROCESSING32_DNM(op,val) \ + _DATA_PROCESSING32_DNM(op,"",val) \ + _DATA_PROCESSING32_DNM(op,"s",val) + +#define DATA_PROCESSING32_NM(op,val) \ + TEST_RR(op".w r",1, VAL1,", r",2, val, "") \ + TEST_RR(op" r",1, VAL1,", r",2, val, ", lsl #3") \ + TEST_RR(op" r",3, VAL1,", r",2, val, ", lsr #4") \ + TEST_RR(op" r",3, VAL1,", r",2, val, ", asr #5") \ + TEST_RR(op" r",5, VAL1,", r",2, N(val),", asr #6") \ + TEST_RR(op" r",5, VAL1,", r",2, val, ", ror #7") \ + TEST_RR(op" r",9, VAL1,", r",10,val, ", rrx") \ + TEST_R( op" r",11,VAL1,", #0x00010001") \ + TEST_R( op" r",0, VAL1,", #0xf5000000") \ + TEST_R( op" r",8, VAL2,", #0x000af000") + +#define _DATA_PROCESSING32_DM(op,s,val) \ + TEST_R( op s".w r0, r",14, val, "") \ + TEST_R( op s" r1, r",12, val, ", lsl #3") \ + TEST_R( op s" r2, r",11, val, ", lsr #4") \ + TEST_R( op s" r3, r",10, val, ", asr #5") \ + TEST_R( op s" r4, r",9, N(val),", asr #6") \ + TEST_R( op s" r5, r",8, val, ", ror #7") \ + TEST_R( op s" r8, r",7,val, ", rrx") \ + TEST( op s" r0, #0x00010001") \ + TEST( op s" r11, #0xf5000000") \ + TEST( op s" r7, #0x000af000") \ + TEST( op s" r4, #0x00005a00") + +#define DATA_PROCESSING32_DM(op,val) \ + _DATA_PROCESSING32_DM(op,"",val) \ + _DATA_PROCESSING32_DM(op,"s",val) + + DATA_PROCESSING32_DNM("and",0xf00f00ff) + DATA_PROCESSING32_NM("tst",0xf00f00ff) + DATA_PROCESSING32_DNM("bic",0xf00f00ff) + DATA_PROCESSING32_DNM("orr",0xf00f00ff) + DATA_PROCESSING32_DM("mov",VAL2) + DATA_PROCESSING32_DNM("orn",0xf00f00ff) + DATA_PROCESSING32_DM("mvn",VAL2) + DATA_PROCESSING32_DNM("eor",0xf00f00ff) + DATA_PROCESSING32_NM("teq",0xf00f00ff) + DATA_PROCESSING32_DNM("add",VAL2) + DATA_PROCESSING32_NM("cmn",VAL2) + DATA_PROCESSING32_DNM("adc",VAL2) + DATA_PROCESSING32_DNM("sbc",VAL2) + DATA_PROCESSING32_DNM("sub",VAL2) + DATA_PROCESSING32_NM("cmp",VAL2) + DATA_PROCESSING32_DNM("rsb",VAL2) + + TEST_RR("pkhbt r0, r",0, HH1,", r",1, HH2,"") + TEST_RR("pkhbt r14,r",12, HH1,", r",10,HH2,", lsl #2") + TEST_RR("pkhtb r0, r",0, HH1,", r",1, HH2,"") + TEST_RR("pkhtb r14,r",12, HH1,", r",10,HH2,", asr #2") + + TEST_UNSUPPORTED(__inst_thumb32(0xea170f0d) " @ tst.w r7, sp") + TEST_UNSUPPORTED(__inst_thumb32(0xea170f0f) " @ tst.w r7, pc") + TEST_UNSUPPORTED(__inst_thumb32(0xea1d0f07) " @ tst.w sp, r7") + TEST_UNSUPPORTED(__inst_thumb32(0xea1f0f07) " @ tst.w pc, r7") + TEST_UNSUPPORTED(__inst_thumb32(0xf01d1f08) " @ tst sp, #0x00080008") + TEST_UNSUPPORTED(__inst_thumb32(0xf01f1f08) " @ tst pc, #0x00080008") + + TEST_UNSUPPORTED(__inst_thumb32(0xea970f0d) " @ teq.w r7, sp") + TEST_UNSUPPORTED(__inst_thumb32(0xea970f0f) " @ teq.w r7, pc") + TEST_UNSUPPORTED(__inst_thumb32(0xea9d0f07) " @ teq.w sp, r7") + TEST_UNSUPPORTED(__inst_thumb32(0xea9f0f07) " @ teq.w pc, r7") + TEST_UNSUPPORTED(__inst_thumb32(0xf09d1f08) " @ tst sp, #0x00080008") + TEST_UNSUPPORTED(__inst_thumb32(0xf09f1f08) " @ tst pc, #0x00080008") + + TEST_UNSUPPORTED(__inst_thumb32(0xeb170f0d) " @ cmn.w r7, sp") + TEST_UNSUPPORTED(__inst_thumb32(0xeb170f0f) " @ cmn.w r7, pc") + TEST_P("cmn.w sp, r",7,0,"") + TEST_UNSUPPORTED(__inst_thumb32(0xeb1f0f07) " @ cmn.w pc, r7") + TEST( "cmn sp, #0x00080008") + TEST_UNSUPPORTED(__inst_thumb32(0xf11f1f08) " @ cmn pc, #0x00080008") + + TEST_UNSUPPORTED(__inst_thumb32(0xebb70f0d) " @ cmp.w r7, sp") + TEST_UNSUPPORTED(__inst_thumb32(0xebb70f0f) " @ cmp.w r7, pc") + TEST_P("cmp.w sp, r",7,0,"") + TEST_UNSUPPORTED(__inst_thumb32(0xebbf0f07) " @ cmp.w pc, r7") + TEST( "cmp sp, #0x00080008") + TEST_UNSUPPORTED(__inst_thumb32(0xf1bf1f08) " @ cmp pc, #0x00080008") + + TEST_UNSUPPORTED(__inst_thumb32(0xea5f070d) " @ movs.w r7, sp") + TEST_UNSUPPORTED(__inst_thumb32(0xea5f070f) " @ movs.w r7, pc") + TEST_UNSUPPORTED(__inst_thumb32(0xea5f0d07) " @ movs.w sp, r7") + TEST_UNSUPPORTED(__inst_thumb32(0xea4f0f07) " @ mov.w pc, r7") + TEST_UNSUPPORTED(__inst_thumb32(0xf04f1d08) " @ mov sp, #0x00080008") + TEST_UNSUPPORTED(__inst_thumb32(0xf04f1f08) " @ mov pc, #0x00080008") + + TEST_R("add.w r0, sp, r",1, 4,"") + TEST_R("adds r0, sp, r",1, 4,", asl #3") + TEST_R("add r0, sp, r",1, 4,", asl #4") + TEST_R("add r0, sp, r",1, 16,", ror #1") + TEST_R("add.w sp, sp, r",1, 4,"") + TEST_R("add sp, sp, r",1, 4,", asl #3") + TEST_UNSUPPORTED(__inst_thumb32(0xeb0d1d01) " @ add sp, sp, r1, asl #4") + TEST_UNSUPPORTED(__inst_thumb32(0xeb0d0d71) " @ add sp, sp, r1, ror #1") + TEST( "add.w r0, sp, #24") + TEST( "add.w sp, sp, #24") + TEST_UNSUPPORTED(__inst_thumb32(0xeb0d0f01) " @ add pc, sp, r1") + TEST_UNSUPPORTED(__inst_thumb32(0xeb0d000f) " @ add r0, sp, pc") + TEST_UNSUPPORTED(__inst_thumb32(0xeb0d000d) " @ add r0, sp, sp") + TEST_UNSUPPORTED(__inst_thumb32(0xeb0d0d0f) " @ add sp, sp, pc") + TEST_UNSUPPORTED(__inst_thumb32(0xeb0d0d0d) " @ add sp, sp, sp") + + TEST_R("sub.w r0, sp, r",1, 4,"") + TEST_R("subs r0, sp, r",1, 4,", asl #3") + TEST_R("sub r0, sp, r",1, 4,", asl #4") + TEST_R("sub r0, sp, r",1, 16,", ror #1") + TEST_R("sub.w sp, sp, r",1, 4,"") + TEST_R("sub sp, sp, r",1, 4,", asl #3") + TEST_UNSUPPORTED(__inst_thumb32(0xebad1d01) " @ sub sp, sp, r1, asl #4") + TEST_UNSUPPORTED(__inst_thumb32(0xebad0d71) " @ sub sp, sp, r1, ror #1") + TEST_UNSUPPORTED(__inst_thumb32(0xebad0f01) " @ sub pc, sp, r1") + TEST( "sub.w r0, sp, #24") + TEST( "sub.w sp, sp, #24") + + TEST_UNSUPPORTED(__inst_thumb32(0xea02010f) " @ and r1, r2, pc") + TEST_UNSUPPORTED(__inst_thumb32(0xea0f0103) " @ and r1, pc, r3") + TEST_UNSUPPORTED(__inst_thumb32(0xea020f03) " @ and pc, r2, r3") + TEST_UNSUPPORTED(__inst_thumb32(0xea02010d) " @ and r1, r2, sp") + TEST_UNSUPPORTED(__inst_thumb32(0xea0d0103) " @ and r1, sp, r3") + TEST_UNSUPPORTED(__inst_thumb32(0xea020d03) " @ and sp, r2, r3") + TEST_UNSUPPORTED(__inst_thumb32(0xf00d1108) " @ and r1, sp, #0x00080008") + TEST_UNSUPPORTED(__inst_thumb32(0xf00f1108) " @ and r1, pc, #0x00080008") + TEST_UNSUPPORTED(__inst_thumb32(0xf0021d08) " @ and sp, r8, #0x00080008") + TEST_UNSUPPORTED(__inst_thumb32(0xf0021f08) " @ and pc, r8, #0x00080008") + + TEST_UNSUPPORTED(__inst_thumb32(0xeb02010f) " @ add r1, r2, pc") + TEST_UNSUPPORTED(__inst_thumb32(0xeb0f0103) " @ add r1, pc, r3") + TEST_UNSUPPORTED(__inst_thumb32(0xeb020f03) " @ add pc, r2, r3") + TEST_UNSUPPORTED(__inst_thumb32(0xeb02010d) " @ add r1, r2, sp") + TEST_SUPPORTED( __inst_thumb32(0xeb0d0103) " @ add r1, sp, r3") + TEST_UNSUPPORTED(__inst_thumb32(0xeb020d03) " @ add sp, r2, r3") + TEST_SUPPORTED( __inst_thumb32(0xf10d1108) " @ add r1, sp, #0x00080008") + TEST_UNSUPPORTED(__inst_thumb32(0xf10d1f08) " @ add pc, sp, #0x00080008") + TEST_UNSUPPORTED(__inst_thumb32(0xf10f1108) " @ add r1, pc, #0x00080008") + TEST_UNSUPPORTED(__inst_thumb32(0xf1021d08) " @ add sp, r8, #0x00080008") + TEST_UNSUPPORTED(__inst_thumb32(0xf1021f08) " @ add pc, r8, #0x00080008") + + TEST_UNSUPPORTED(__inst_thumb32(0xeaa00000) "") + TEST_UNSUPPORTED(__inst_thumb32(0xeaf00000) "") + TEST_UNSUPPORTED(__inst_thumb32(0xeb200000) "") + TEST_UNSUPPORTED(__inst_thumb32(0xeb800000) "") + TEST_UNSUPPORTED(__inst_thumb32(0xebe00000) "") + + TEST_UNSUPPORTED(__inst_thumb32(0xf0a00000) "") + TEST_UNSUPPORTED(__inst_thumb32(0xf0c00000) "") + TEST_UNSUPPORTED(__inst_thumb32(0xf0f00000) "") + TEST_UNSUPPORTED(__inst_thumb32(0xf1200000) "") + TEST_UNSUPPORTED(__inst_thumb32(0xf1800000) "") + TEST_UNSUPPORTED(__inst_thumb32(0xf1e00000) "") + + TEST_GROUP("Coprocessor instructions") + + TEST_UNSUPPORTED(__inst_thumb32(0xec000000) "") + TEST_UNSUPPORTED(__inst_thumb32(0xeff00000) "") + TEST_UNSUPPORTED(__inst_thumb32(0xfc000000) "") + TEST_UNSUPPORTED(__inst_thumb32(0xfff00000) "") + + TEST_GROUP("Data-processing (plain binary immediate)") + + TEST_R("addw r0, r",1, VAL1,", #0x123") + TEST( "addw r14, sp, #0xf5a") + TEST( "addw sp, sp, #0x20") + TEST( "addw r7, pc, #0x888") + TEST_UNSUPPORTED(__inst_thumb32(0xf20f1f20) " @ addw pc, pc, #0x120") + TEST_UNSUPPORTED(__inst_thumb32(0xf20d1f20) " @ addw pc, sp, #0x120") + TEST_UNSUPPORTED(__inst_thumb32(0xf20f1d20) " @ addw sp, pc, #0x120") + TEST_UNSUPPORTED(__inst_thumb32(0xf2001d20) " @ addw sp, r0, #0x120") + + TEST_R("subw r0, r",1, VAL1,", #0x123") + TEST( "subw r14, sp, #0xf5a") + TEST( "subw sp, sp, #0x20") + TEST( "subw r7, pc, #0x888") + TEST_UNSUPPORTED(__inst_thumb32(0xf2af1f20) " @ subw pc, pc, #0x120") + TEST_UNSUPPORTED(__inst_thumb32(0xf2ad1f20) " @ subw pc, sp, #0x120") + TEST_UNSUPPORTED(__inst_thumb32(0xf2af1d20) " @ subw sp, pc, #0x120") + TEST_UNSUPPORTED(__inst_thumb32(0xf2a01d20) " @ subw sp, r0, #0x120") + + TEST("movw r0, #0") + TEST("movw r0, #0xffff") + TEST("movw lr, #0xffff") + TEST_UNSUPPORTED(__inst_thumb32(0xf2400d00) " @ movw sp, #0") + TEST_UNSUPPORTED(__inst_thumb32(0xf2400f00) " @ movw pc, #0") + + TEST_R("movt r",0, VAL1,", #0") + TEST_R("movt r",0, VAL2,", #0xffff") + TEST_R("movt r",14,VAL1,", #0xffff") + TEST_UNSUPPORTED(__inst_thumb32(0xf2c00d00) " @ movt sp, #0") + TEST_UNSUPPORTED(__inst_thumb32(0xf2c00f00) " @ movt pc, #0") + + TEST_R( "ssat r0, #24, r",0, VAL1,"") + TEST_R( "ssat r14, #24, r",12, VAL2,"") + TEST_R( "ssat r0, #24, r",0, VAL1,", lsl #8") + TEST_R( "ssat r14, #24, r",12, VAL2,", asr #8") + TEST_UNSUPPORTED(__inst_thumb32(0xf30c0d17) " @ ssat sp, #24, r12") + TEST_UNSUPPORTED(__inst_thumb32(0xf30c0f17) " @ ssat pc, #24, r12") + TEST_UNSUPPORTED(__inst_thumb32(0xf30d0c17) " @ ssat r12, #24, sp") + TEST_UNSUPPORTED(__inst_thumb32(0xf30f0c17) " @ ssat r12, #24, pc") + + TEST_R( "usat r0, #24, r",0, VAL1,"") + TEST_R( "usat r14, #24, r",12, VAL2,"") + TEST_R( "usat r0, #24, r",0, VAL1,", lsl #8") + TEST_R( "usat r14, #24, r",12, VAL2,", asr #8") + TEST_UNSUPPORTED(__inst_thumb32(0xf38c0d17) " @ usat sp, #24, r12") + TEST_UNSUPPORTED(__inst_thumb32(0xf38c0f17) " @ usat pc, #24, r12") + TEST_UNSUPPORTED(__inst_thumb32(0xf38d0c17) " @ usat r12, #24, sp") + TEST_UNSUPPORTED(__inst_thumb32(0xf38f0c17) " @ usat r12, #24, pc") + + TEST_R( "ssat16 r0, #12, r",0, HH1,"") + TEST_R( "ssat16 r14, #12, r",12, HH2,"") + TEST_UNSUPPORTED(__inst_thumb32(0xf32c0d0b) " @ ssat16 sp, #12, r12") + TEST_UNSUPPORTED(__inst_thumb32(0xf32c0f0b) " @ ssat16 pc, #12, r12") + TEST_UNSUPPORTED(__inst_thumb32(0xf32d0c0b) " @ ssat16 r12, #12, sp") + TEST_UNSUPPORTED(__inst_thumb32(0xf32f0c0b) " @ ssat16 r12, #12, pc") + + TEST_R( "usat16 r0, #12, r",0, HH1,"") + TEST_R( "usat16 r14, #12, r",12, HH2,"") + TEST_UNSUPPORTED(__inst_thumb32(0xf3ac0d0b) " @ usat16 sp, #12, r12") + TEST_UNSUPPORTED(__inst_thumb32(0xf3ac0f0b) " @ usat16 pc, #12, r12") + TEST_UNSUPPORTED(__inst_thumb32(0xf3ad0c0b) " @ usat16 r12, #12, sp") + TEST_UNSUPPORTED(__inst_thumb32(0xf3af0c0b) " @ usat16 r12, #12, pc") + + TEST_R( "sbfx r0, r",0 , VAL1,", #0, #31") + TEST_R( "sbfx r14, r",12, VAL2,", #8, #16") + TEST_R( "sbfx r4, r",10, VAL1,", #16, #15") + TEST_UNSUPPORTED(__inst_thumb32(0xf34c2d0f) " @ sbfx sp, r12, #8, #16") + TEST_UNSUPPORTED(__inst_thumb32(0xf34c2f0f) " @ sbfx pc, r12, #8, #16") + TEST_UNSUPPORTED(__inst_thumb32(0xf34d2c0f) " @ sbfx r12, sp, #8, #16") + TEST_UNSUPPORTED(__inst_thumb32(0xf34f2c0f) " @ sbfx r12, pc, #8, #16") + + TEST_R( "ubfx r0, r",0 , VAL1,", #0, #31") + TEST_R( "ubfx r14, r",12, VAL2,", #8, #16") + TEST_R( "ubfx r4, r",10, VAL1,", #16, #15") + TEST_UNSUPPORTED(__inst_thumb32(0xf3cc2d0f) " @ ubfx sp, r12, #8, #16") + TEST_UNSUPPORTED(__inst_thumb32(0xf3cc2f0f) " @ ubfx pc, r12, #8, #16") + TEST_UNSUPPORTED(__inst_thumb32(0xf3cd2c0f) " @ ubfx r12, sp, #8, #16") + TEST_UNSUPPORTED(__inst_thumb32(0xf3cf2c0f) " @ ubfx r12, pc, #8, #16") + + TEST_R( "bfc r",0, VAL1,", #4, #20") + TEST_R( "bfc r",14,VAL2,", #4, #20") + TEST_R( "bfc r",7, VAL1,", #0, #31") + TEST_R( "bfc r",8, VAL2,", #0, #31") + TEST_UNSUPPORTED(__inst_thumb32(0xf36f0d1e) " @ bfc sp, #0, #31") + TEST_UNSUPPORTED(__inst_thumb32(0xf36f0f1e) " @ bfc pc, #0, #31") + + TEST_RR( "bfi r",0, VAL1,", r",0 , VAL2,", #0, #31") + TEST_RR( "bfi r",12,VAL1,", r",14 , VAL2,", #4, #20") + TEST_UNSUPPORTED(__inst_thumb32(0xf36e1d17) " @ bfi sp, r14, #4, #20") + TEST_UNSUPPORTED(__inst_thumb32(0xf36e1f17) " @ bfi pc, r14, #4, #20") + TEST_UNSUPPORTED(__inst_thumb32(0xf36d1e17) " @ bfi r14, sp, #4, #20") + + TEST_GROUP("Branches and miscellaneous control") + +CONDITION_INSTRUCTIONS(22, + TEST_BF("beq.w 2f") + TEST_BB("bne.w 2b") + TEST_BF("bgt.w 2f") + TEST_BB("blt.w 2b") + TEST_BF_X("bpl.w 2f", SPACE_0x1000) +) + + TEST_UNSUPPORTED("msr cpsr, r0") + TEST_UNSUPPORTED("msr cpsr_f, r1") + TEST_UNSUPPORTED("msr spsr, r2") + + TEST_UNSUPPORTED("cpsie.w i") + TEST_UNSUPPORTED("cpsid.w i") + TEST_UNSUPPORTED("cps 0x13") + + TEST_SUPPORTED("yield.w") + TEST("sev.w") + TEST("nop.w") + TEST("wfi.w") + TEST_SUPPORTED("wfe.w") + TEST_UNSUPPORTED("dbg.w #0") + + TEST_UNSUPPORTED("clrex") + TEST_UNSUPPORTED("dsb") + TEST_UNSUPPORTED("dmb") + TEST_UNSUPPORTED("isb") + + TEST_UNSUPPORTED("bxj r0") + + TEST_UNSUPPORTED("subs pc, lr, #4") + + TEST("mrs r0, cpsr") + TEST("mrs r14, cpsr") + TEST_UNSUPPORTED(__inst_thumb32(0xf3ef8d00) " @ mrs sp, spsr") + TEST_UNSUPPORTED(__inst_thumb32(0xf3ef8f00) " @ mrs pc, spsr") + TEST_UNSUPPORTED("mrs r0, spsr") + TEST_UNSUPPORTED("mrs lr, spsr") + + TEST_UNSUPPORTED(__inst_thumb32(0xf7f08000) " @ smc #0") + + TEST_UNSUPPORTED(__inst_thumb32(0xf7f0a000) " @ undefeined") + + TEST_BF( "b.w 2f") + TEST_BB( "b.w 2b") + TEST_BF_X("b.w 2f", SPACE_0x1000) + + TEST_BF( "bl.w 2f") + TEST_BB( "bl.w 2b") + TEST_BB_X("bl.w 2b", SPACE_0x1000) + + TEST_X( "blx __dummy_arm_subroutine", + ".arm \n\t" + ".align \n\t" + ".type __dummy_arm_subroutine, %%function \n\t" + "__dummy_arm_subroutine: \n\t" + "mov r0, pc \n\t" + "bx lr \n\t" + ".thumb \n\t" + ) + TEST( "blx __dummy_arm_subroutine") + + TEST_GROUP("Store single data item") + +#define SINGLE_STORE(size) \ + TEST_RP( "str"size" r",0, VAL1,", [r",11,-1024,", #1024]") \ + TEST_RP( "str"size" r",14,VAL2,", [r",1, -1024,", #1080]") \ + TEST_RP( "str"size" r",0, VAL1,", [r",11,256, ", #-120]") \ + TEST_RP( "str"size" r",14,VAL2,", [r",1, 256, ", #-128]") \ + TEST_RP( "str"size" r",0, VAL1,", [r",11,24, "], #120") \ + TEST_RP( "str"size" r",14,VAL2,", [r",1, 24, "], #128") \ + TEST_RP( "str"size" r",0, VAL1,", [r",11,24, "], #-120") \ + TEST_RP( "str"size" r",14,VAL2,", [r",1, 24, "], #-128") \ + TEST_RP( "str"size" r",0, VAL1,", [r",11,24, ", #120]!") \ + TEST_RP( "str"size" r",14,VAL2,", [r",1, 24, ", #128]!") \ + TEST_RP( "str"size" r",0, VAL1,", [r",11,256, ", #-120]!") \ + TEST_RP( "str"size" r",14,VAL2,", [r",1, 256, ", #-128]!") \ + TEST_RPR("str"size".w r",0, VAL1,", [r",1, 0,", r",2, 4,"]") \ + TEST_RPR("str"size" r",14,VAL2,", [r",10,0,", r",11,4,", lsl #1]") \ + TEST_R( "str"size".w r",7, VAL1,", [sp, #24]") \ + TEST_RP( "str"size".w r",0, VAL2,", [r",0,0, "]") \ + TEST_UNSUPPORTED("str"size"t r0, [r1, #4]") + + SINGLE_STORE("b") + SINGLE_STORE("h") + SINGLE_STORE("") + + TEST("str sp, [sp]") + TEST_UNSUPPORTED(__inst_thumb32(0xf8cfe000) " @ str r14, [pc]") + TEST_UNSUPPORTED(__inst_thumb32(0xf8cef000) " @ str pc, [r14]") + + TEST_GROUP("Advanced SIMD element or structure load/store instructions") + + TEST_UNSUPPORTED(__inst_thumb32(0xf9000000) "") + TEST_UNSUPPORTED(__inst_thumb32(0xf92fffff) "") + TEST_UNSUPPORTED(__inst_thumb32(0xf9800000) "") + TEST_UNSUPPORTED(__inst_thumb32(0xf9efffff) "") + + TEST_GROUP("Load single data item and memory hints") + +#define SINGLE_LOAD(size) \ + TEST_P( "ldr"size" r0, [r",11,-1024, ", #1024]") \ + TEST_P( "ldr"size" r14, [r",1, -1024,", #1080]") \ + TEST_P( "ldr"size" r0, [r",11,256, ", #-120]") \ + TEST_P( "ldr"size" r14, [r",1, 256, ", #-128]") \ + TEST_P( "ldr"size" r0, [r",11,24, "], #120") \ + TEST_P( "ldr"size" r14, [r",1, 24, "], #128") \ + TEST_P( "ldr"size" r0, [r",11,24, "], #-120") \ + TEST_P( "ldr"size" r14, [r",1,24, "], #-128") \ + TEST_P( "ldr"size" r0, [r",11,24, ", #120]!") \ + TEST_P( "ldr"size" r14, [r",1, 24, ", #128]!") \ + TEST_P( "ldr"size" r0, [r",11,256, ", #-120]!") \ + TEST_P( "ldr"size" r14, [r",1, 256, ", #-128]!") \ + TEST_PR("ldr"size".w r0, [r",1, 0,", r",2, 4,"]") \ + TEST_PR("ldr"size" r14, [r",10,0,", r",11,4,", lsl #1]") \ + TEST_X( "ldr"size".w r0, 3f", \ + ".align 3 \n\t" \ + "3: .word "__stringify(VAL1)) \ + TEST_X( "ldr"size".w r14, 3f", \ + ".align 3 \n\t" \ + "3: .word "__stringify(VAL2)) \ + TEST( "ldr"size".w r7, 3b") \ + TEST( "ldr"size".w r7, [sp, #24]") \ + TEST_P( "ldr"size".w r0, [r",0,0, "]") \ + TEST_UNSUPPORTED("ldr"size"t r0, [r1, #4]") + + SINGLE_LOAD("b") + SINGLE_LOAD("sb") + SINGLE_LOAD("h") + SINGLE_LOAD("sh") + SINGLE_LOAD("") + + TEST_BF_P("ldr pc, [r",14, 15*4,"]") + TEST_P( "ldr sp, [r",14, 13*4,"]") + TEST_BF_R("ldr pc, [sp, r",14, 15*4,"]") + TEST_R( "ldr sp, [sp, r",14, 13*4,"]") + TEST_THUMB_TO_ARM_INTERWORK_P("ldr pc, [r",0,0,", #15*4]") + TEST_SUPPORTED("ldr sp, 99f") + TEST_SUPPORTED("ldr pc, 99f") + + TEST_UNSUPPORTED(__inst_thumb32(0xf854700d) " @ ldr r7, [r4, sp]") + TEST_UNSUPPORTED(__inst_thumb32(0xf854700f) " @ ldr r7, [r4, pc]") + TEST_UNSUPPORTED(__inst_thumb32(0xf814700d) " @ ldrb r7, [r4, sp]") + TEST_UNSUPPORTED(__inst_thumb32(0xf814700f) " @ ldrb r7, [r4, pc]") + TEST_UNSUPPORTED(__inst_thumb32(0xf89fd004) " @ ldrb sp, 99f") + TEST_UNSUPPORTED(__inst_thumb32(0xf814d008) " @ ldrb sp, [r4, r8]") + TEST_UNSUPPORTED(__inst_thumb32(0xf894d000) " @ ldrb sp, [r4]") + + TEST_UNSUPPORTED(__inst_thumb32(0xf8600000) "") /* Unallocated space */ + TEST_UNSUPPORTED(__inst_thumb32(0xf9ffffff) "") /* Unallocated space */ + TEST_UNSUPPORTED(__inst_thumb32(0xf9500000) "") /* Unallocated space */ + TEST_UNSUPPORTED(__inst_thumb32(0xf95fffff) "") /* Unallocated space */ + TEST_UNSUPPORTED(__inst_thumb32(0xf8000800) "") /* Unallocated space */ + TEST_UNSUPPORTED(__inst_thumb32(0xf97ffaff) "") /* Unallocated space */ + + TEST( "pli [pc, #4]") + TEST( "pli [pc, #-4]") + TEST( "pld [pc, #4]") + TEST( "pld [pc, #-4]") + + TEST_P( "pld [r",0,-1024,", #1024]") + TEST( __inst_thumb32(0xf8b0f400) " @ pldw [r0, #1024]") + TEST_P( "pli [r",4, 0b,", #1024]") + TEST_P( "pld [r",7, 120,", #-120]") + TEST( __inst_thumb32(0xf837fc78) " @ pldw [r7, #-120]") + TEST_P( "pli [r",11,120,", #-120]") + TEST( "pld [sp, #0]") + + TEST_PR("pld [r",7, 24, ", r",0, 16,"]") + TEST_PR("pld [r",8, 24, ", r",12,16,", lsl #3]") + TEST_SUPPORTED(__inst_thumb32(0xf837f000) " @ pldw [r7, r0]") + TEST_SUPPORTED(__inst_thumb32(0xf838f03c) " @ pldw [r8, r12, lsl #3]"); + TEST_RR("pli [r",12,0b,", r",0, 16,"]") + TEST_RR("pli [r",0, 0b,", r",12,16,", lsl #3]") + TEST_R( "pld [sp, r",1, 16,"]") + TEST_UNSUPPORTED(__inst_thumb32(0xf817f00d) " @pld [r7, sp]") + TEST_UNSUPPORTED(__inst_thumb32(0xf817f00f) " @pld [r7, pc]") + + TEST_GROUP("Data-processing (register)") + +#define SHIFTS32(op) \ + TEST_RR(op" r0, r",1, VAL1,", r",2, 3, "") \ + TEST_RR(op" r14, r",12,VAL2,", r",11,10,"") + + SHIFTS32("lsl") + SHIFTS32("lsls") + SHIFTS32("lsr") + SHIFTS32("lsrs") + SHIFTS32("asr") + SHIFTS32("asrs") + SHIFTS32("ror") + SHIFTS32("rors") + + TEST_UNSUPPORTED(__inst_thumb32(0xfa01ff02) " @ lsl pc, r1, r2") + TEST_UNSUPPORTED(__inst_thumb32(0xfa01fd02) " @ lsl sp, r1, r2") + TEST_UNSUPPORTED(__inst_thumb32(0xfa0ff002) " @ lsl r0, pc, r2") + TEST_UNSUPPORTED(__inst_thumb32(0xfa0df002) " @ lsl r0, sp, r2") + TEST_UNSUPPORTED(__inst_thumb32(0xfa01f00f) " @ lsl r0, r1, pc") + TEST_UNSUPPORTED(__inst_thumb32(0xfa01f00d) " @ lsl r0, r1, sp") + + TEST_RR( "sxtah r0, r",0, HH1,", r",1, HH2,"") + TEST_RR( "sxtah r14,r",12, HH2,", r",10,HH1,", ror #8") + TEST_R( "sxth r8, r",7, HH1,"") + + TEST_UNSUPPORTED(__inst_thumb32(0xfa0fff87) " @ sxth pc, r7"); + TEST_UNSUPPORTED(__inst_thumb32(0xfa0ffd87) " @ sxth sp, r7"); + TEST_UNSUPPORTED(__inst_thumb32(0xfa0ff88f) " @ sxth r8, pc"); + TEST_UNSUPPORTED(__inst_thumb32(0xfa0ff88d) " @ sxth r8, sp"); + + TEST_RR( "uxtah r0, r",0, HH1,", r",1, HH2,"") + TEST_RR( "uxtah r14,r",12, HH2,", r",10,HH1,", ror #8") + TEST_R( "uxth r8, r",7, HH1,"") + + TEST_RR( "sxtab16 r0, r",0, HH1,", r",1, HH2,"") + TEST_RR( "sxtab16 r14,r",12, HH2,", r",10,HH1,", ror #8") + TEST_R( "sxtb16 r8, r",7, HH1,"") + + TEST_RR( "uxtab16 r0, r",0, HH1,", r",1, HH2,"") + TEST_RR( "uxtab16 r14,r",12, HH2,", r",10,HH1,", ror #8") + TEST_R( "uxtb16 r8, r",7, HH1,"") + + TEST_RR( "sxtab r0, r",0, HH1,", r",1, HH2,"") + TEST_RR( "sxtab r14,r",12, HH2,", r",10,HH1,", ror #8") + TEST_R( "sxtb r8, r",7, HH1,"") + + TEST_RR( "uxtab r0, r",0, HH1,", r",1, HH2,"") + TEST_RR( "uxtab r14,r",12, HH2,", r",10,HH1,", ror #8") + TEST_R( "uxtb r8, r",7, HH1,"") + + TEST_UNSUPPORTED(__inst_thumb32(0xfa6000f0) "") + TEST_UNSUPPORTED(__inst_thumb32(0xfa7fffff) "") + +#define PARALLEL_ADD_SUB(op) \ + TEST_RR( op"add16 r0, r",0, HH1,", r",1, HH2,"") \ + TEST_RR( op"add16 r14, r",12,HH2,", r",10,HH1,"") \ + TEST_RR( op"asx r0, r",0, HH1,", r",1, HH2,"") \ + TEST_RR( op"asx r14, r",12,HH2,", r",10,HH1,"") \ + TEST_RR( op"sax r0, r",0, HH1,", r",1, HH2,"") \ + TEST_RR( op"sax r14, r",12,HH2,", r",10,HH1,"") \ + TEST_RR( op"sub16 r0, r",0, HH1,", r",1, HH2,"") \ + TEST_RR( op"sub16 r14, r",12,HH2,", r",10,HH1,"") \ + TEST_RR( op"add8 r0, r",0, HH1,", r",1, HH2,"") \ + TEST_RR( op"add8 r14, r",12,HH2,", r",10,HH1,"") \ + TEST_RR( op"sub8 r0, r",0, HH1,", r",1, HH2,"") \ + TEST_RR( op"sub8 r14, r",12,HH2,", r",10,HH1,"") + + TEST_GROUP("Parallel addition and subtraction, signed") + + PARALLEL_ADD_SUB("s") + PARALLEL_ADD_SUB("q") + PARALLEL_ADD_SUB("sh") + + TEST_GROUP("Parallel addition and subtraction, unsigned") + + PARALLEL_ADD_SUB("u") + PARALLEL_ADD_SUB("uq") + PARALLEL_ADD_SUB("uh") + + TEST_GROUP("Miscellaneous operations") + + TEST_RR("qadd r0, r",1, VAL1,", r",2, VAL2,"") + TEST_RR("qadd lr, r",9, VAL2,", r",8, VAL1,"") + TEST_RR("qsub r0, r",1, VAL1,", r",2, VAL2,"") + TEST_RR("qsub lr, r",9, VAL2,", r",8, VAL1,"") + TEST_RR("qdadd r0, r",1, VAL1,", r",2, VAL2,"") + TEST_RR("qdadd lr, r",9, VAL2,", r",8, VAL1,"") + TEST_RR("qdsub r0, r",1, VAL1,", r",2, VAL2,"") + TEST_RR("qdsub lr, r",9, VAL2,", r",8, VAL1,"") + + TEST_R("rev.w r0, r",0, VAL1,"") + TEST_R("rev r14, r",12, VAL2,"") + TEST_R("rev16.w r0, r",0, VAL1,"") + TEST_R("rev16 r14, r",12, VAL2,"") + TEST_R("rbit r0, r",0, VAL1,"") + TEST_R("rbit r14, r",12, VAL2,"") + TEST_R("revsh.w r0, r",0, VAL1,"") + TEST_R("revsh r14, r",12, VAL2,"") + + TEST_UNSUPPORTED(__inst_thumb32(0xfa9cff8c) " @ rev pc, r12"); + TEST_UNSUPPORTED(__inst_thumb32(0xfa9cfd8c) " @ rev sp, r12"); + TEST_UNSUPPORTED(__inst_thumb32(0xfa9ffe8f) " @ rev r14, pc"); + TEST_UNSUPPORTED(__inst_thumb32(0xfa9dfe8d) " @ rev r14, sp"); + + TEST_RR("sel r0, r",0, VAL1,", r",1, VAL2,"") + TEST_RR("sel r14, r",12,VAL1,", r",10, VAL2,"") + + TEST_R("clz r0, r",0, 0x0,"") + TEST_R("clz r7, r",14,0x1,"") + TEST_R("clz lr, r",7, 0xffffffff,"") + + TEST_UNSUPPORTED(__inst_thumb32(0xfa80f030) "") /* Unallocated space */ + TEST_UNSUPPORTED(__inst_thumb32(0xfaffff7f) "") /* Unallocated space */ + TEST_UNSUPPORTED(__inst_thumb32(0xfab0f000) "") /* Unallocated space */ + TEST_UNSUPPORTED(__inst_thumb32(0xfaffff7f) "") /* Unallocated space */ + + TEST_GROUP("Multiply, multiply accumulate, and absolute difference operations") + + TEST_RR( "mul r0, r",1, VAL1,", r",2, VAL2,"") + TEST_RR( "mul r7, r",8, VAL2,", r",9, VAL2,"") + TEST_UNSUPPORTED(__inst_thumb32(0xfb08ff09) " @ mul pc, r8, r9") + TEST_UNSUPPORTED(__inst_thumb32(0xfb08fd09) " @ mul sp, r8, r9") + TEST_UNSUPPORTED(__inst_thumb32(0xfb0ff709) " @ mul r7, pc, r9") + TEST_UNSUPPORTED(__inst_thumb32(0xfb0df709) " @ mul r7, sp, r9") + TEST_UNSUPPORTED(__inst_thumb32(0xfb08f70f) " @ mul r7, r8, pc") + TEST_UNSUPPORTED(__inst_thumb32(0xfb08f70d) " @ mul r7, r8, sp") + + TEST_RRR( "mla r0, r",1, VAL1,", r",2, VAL2,", r",3, VAL3,"") + TEST_RRR( "mla r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"") + TEST_UNSUPPORTED(__inst_thumb32(0xfb08af09) " @ mla pc, r8, r9, r10"); + TEST_UNSUPPORTED(__inst_thumb32(0xfb08ad09) " @ mla sp, r8, r9, r10"); + TEST_UNSUPPORTED(__inst_thumb32(0xfb0fa709) " @ mla r7, pc, r9, r10"); + TEST_UNSUPPORTED(__inst_thumb32(0xfb0da709) " @ mla r7, sp, r9, r10"); + TEST_UNSUPPORTED(__inst_thumb32(0xfb08a70f) " @ mla r7, r8, pc, r10"); + TEST_UNSUPPORTED(__inst_thumb32(0xfb08a70d) " @ mla r7, r8, sp, r10"); + TEST_UNSUPPORTED(__inst_thumb32(0xfb08d709) " @ mla r7, r8, r9, sp"); + + TEST_RRR( "mls r0, r",1, VAL1,", r",2, VAL2,", r",3, VAL3,"") + TEST_RRR( "mls r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"") + + TEST_RRR( "smlabb r0, r",1, VAL1,", r",2, VAL2,", r",3, VAL3,"") + TEST_RRR( "smlabb r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"") + TEST_RRR( "smlatb r0, r",1, VAL1,", r",2, VAL2,", r",3, VAL3,"") + TEST_RRR( "smlatb r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"") + TEST_RRR( "smlabt r0, r",1, VAL1,", r",2, VAL2,", r",3, VAL3,"") + TEST_RRR( "smlabt r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"") + TEST_RRR( "smlatt r0, r",1, VAL1,", r",2, VAL2,", r",3, VAL3,"") + TEST_RRR( "smlatt r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"") + TEST_RR( "smulbb r0, r",1, VAL1,", r",2, VAL2,"") + TEST_RR( "smulbb r7, r",8, VAL3,", r",9, VAL1,"") + TEST_RR( "smultb r0, r",1, VAL1,", r",2, VAL2,"") + TEST_RR( "smultb r7, r",8, VAL3,", r",9, VAL1,"") + TEST_RR( "smulbt r0, r",1, VAL1,", r",2, VAL2,"") + TEST_RR( "smulbt r7, r",8, VAL3,", r",9, VAL1,"") + TEST_RR( "smultt r0, r",1, VAL1,", r",2, VAL2,"") + TEST_RR( "smultt r7, r",8, VAL3,", r",9, VAL1,"") + + TEST_RRR( "smlad r0, r",0, HH1,", r",1, HH2,", r",2, VAL1,"") + TEST_RRR( "smlad r14, r",12,HH2,", r",10,HH1,", r",8, VAL2,"") + TEST_RRR( "smladx r0, r",0, HH1,", r",1, HH2,", r",2, VAL1,"") + TEST_RRR( "smladx r14, r",12,HH2,", r",10,HH1,", r",8, VAL2,"") + TEST_RR( "smuad r0, r",0, HH1,", r",1, HH2,"") + TEST_RR( "smuad r14, r",12,HH2,", r",10,HH1,"") + TEST_RR( "smuadx r0, r",0, HH1,", r",1, HH2,"") + TEST_RR( "smuadx r14, r",12,HH2,", r",10,HH1,"") + + TEST_RRR( "smlawb r0, r",1, VAL1,", r",2, VAL2,", r",3, VAL3,"") + TEST_RRR( "smlawb r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"") + TEST_RRR( "smlawt r0, r",1, VAL1,", r",2, VAL2,", r",3, VAL3,"") + TEST_RRR( "smlawt r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"") + TEST_RR( "smulwb r0, r",1, VAL1,", r",2, VAL2,"") + TEST_RR( "smulwb r7, r",8, VAL3,", r",9, VAL1,"") + TEST_RR( "smulwt r0, r",1, VAL1,", r",2, VAL2,"") + TEST_RR( "smulwt r7, r",8, VAL3,", r",9, VAL1,"") + + TEST_RRR( "smlsd r0, r",0, HH1,", r",1, HH2,", r",2, VAL1,"") + TEST_RRR( "smlsd r14, r",12,HH2,", r",10,HH1,", r",8, VAL2,"") + TEST_RRR( "smlsdx r0, r",0, HH1,", r",1, HH2,", r",2, VAL1,"") + TEST_RRR( "smlsdx r14, r",12,HH2,", r",10,HH1,", r",8, VAL2,"") + TEST_RR( "smusd r0, r",0, HH1,", r",1, HH2,"") + TEST_RR( "smusd r14, r",12,HH2,", r",10,HH1,"") + TEST_RR( "smusdx r0, r",0, HH1,", r",1, HH2,"") + TEST_RR( "smusdx r14, r",12,HH2,", r",10,HH1,"") + + TEST_RRR( "smmla r0, r",0, VAL1,", r",1, VAL2,", r",2, VAL1,"") + TEST_RRR( "smmla r14, r",12,VAL2,", r",10,VAL1,", r",8, VAL2,"") + TEST_RRR( "smmlar r0, r",0, VAL1,", r",1, VAL2,", r",2, VAL1,"") + TEST_RRR( "smmlar r14, r",12,VAL2,", r",10,VAL1,", r",8, VAL2,"") + TEST_RR( "smmul r0, r",0, VAL1,", r",1, VAL2,"") + TEST_RR( "smmul r14, r",12,VAL2,", r",10,VAL1,"") + TEST_RR( "smmulr r0, r",0, VAL1,", r",1, VAL2,"") + TEST_RR( "smmulr r14, r",12,VAL2,", r",10,VAL1,"") + + TEST_RRR( "smmls r0, r",0, VAL1,", r",1, VAL2,", r",2, VAL1,"") + TEST_RRR( "smmls r14, r",12,VAL2,", r",10,VAL1,", r",8, VAL2,"") + TEST_RRR( "smmlsr r0, r",0, VAL1,", r",1, VAL2,", r",2, VAL1,"") + TEST_RRR( "smmlsr r14, r",12,VAL2,", r",10,VAL1,", r",8, VAL2,"") + + TEST_RRR( "usada8 r0, r",0, VAL1,", r",1, VAL2,", r",2, VAL3,"") + TEST_RRR( "usada8 r14, r",12,VAL2,", r",10,VAL1,", r",8, VAL3,"") + TEST_RR( "usad8 r0, r",0, VAL1,", r",1, VAL2,"") + TEST_RR( "usad8 r14, r",12,VAL2,", r",10,VAL1,"") + + TEST_UNSUPPORTED(__inst_thumb32(0xfb00f010) "") /* Unallocated space */ + TEST_UNSUPPORTED(__inst_thumb32(0xfb0fff1f) "") /* Unallocated space */ + TEST_UNSUPPORTED(__inst_thumb32(0xfb70f010) "") /* Unallocated space */ + TEST_UNSUPPORTED(__inst_thumb32(0xfb7fff1f) "") /* Unallocated space */ + TEST_UNSUPPORTED(__inst_thumb32(0xfb700010) "") /* Unallocated space */ + TEST_UNSUPPORTED(__inst_thumb32(0xfb7fff1f) "") /* Unallocated space */ + + TEST_GROUP("Long multiply, long multiply accumulate, and divide") + + TEST_RR( "smull r0, r1, r",2, VAL1,", r",3, VAL2,"") + TEST_RR( "smull r7, r8, r",9, VAL2,", r",10, VAL1,"") + TEST_UNSUPPORTED(__inst_thumb32(0xfb89f80a) " @ smull pc, r8, r9, r10"); + TEST_UNSUPPORTED(__inst_thumb32(0xfb89d80a) " @ smull sp, r8, r9, r10"); + TEST_UNSUPPORTED(__inst_thumb32(0xfb897f0a) " @ smull r7, pc, r9, r10"); + TEST_UNSUPPORTED(__inst_thumb32(0xfb897d0a) " @ smull r7, sp, r9, r10"); + TEST_UNSUPPORTED(__inst_thumb32(0xfb8f780a) " @ smull r7, r8, pc, r10"); + TEST_UNSUPPORTED(__inst_thumb32(0xfb8d780a) " @ smull r7, r8, sp, r10"); + TEST_UNSUPPORTED(__inst_thumb32(0xfb89780f) " @ smull r7, r8, r9, pc"); + TEST_UNSUPPORTED(__inst_thumb32(0xfb89780d) " @ smull r7, r8, r9, sp"); + + TEST_RR( "umull r0, r1, r",2, VAL1,", r",3, VAL2,"") + TEST_RR( "umull r7, r8, r",9, VAL2,", r",10, VAL1,"") + + TEST_RRRR( "smlal r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4) + TEST_RRRR( "smlal r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3) + + TEST_RRRR( "smlalbb r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4) + TEST_RRRR( "smlalbb r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3) + TEST_RRRR( "smlalbt r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4) + TEST_RRRR( "smlalbt r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3) + TEST_RRRR( "smlaltb r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4) + TEST_RRRR( "smlaltb r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3) + TEST_RRRR( "smlaltt r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4) + TEST_RRRR( "smlaltt r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3) + + TEST_RRRR( "smlald r",0, VAL1,", r",1, VAL2, ", r",0, HH1,", r",1, HH2) + TEST_RRRR( "smlald r",11,VAL2,", r",10,VAL1, ", r",9, HH2,", r",8, HH1) + TEST_RRRR( "smlaldx r",0, VAL1,", r",1, VAL2, ", r",0, HH1,", r",1, HH2) + TEST_RRRR( "smlaldx r",11,VAL2,", r",10,VAL1, ", r",9, HH2,", r",8, HH1) + + TEST_RRRR( "smlsld r",0, VAL1,", r",1, VAL2, ", r",0, HH1,", r",1, HH2) + TEST_RRRR( "smlsld r",11,VAL2,", r",10,VAL1, ", r",9, HH2,", r",8, HH1) + TEST_RRRR( "smlsldx r",0, VAL1,", r",1, VAL2, ", r",0, HH1,", r",1, HH2) + TEST_RRRR( "smlsldx r",11,VAL2,", r",10,VAL1, ", r",9, HH2,", r",8, HH1) + + TEST_RRRR( "umlal r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4) + TEST_RRRR( "umlal r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3) + TEST_RRRR( "umaal r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4) + TEST_RRRR( "umaal r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3) + + TEST_GROUP("Coprocessor instructions") + + TEST_UNSUPPORTED(__inst_thumb32(0xfc000000) "") + TEST_UNSUPPORTED(__inst_thumb32(0xffffffff) "") + + TEST_GROUP("Testing instructions in IT blocks") + + TEST_ITBLOCK("sub.w r0, r0") + + verbose("\n"); +} + diff --git a/arch/arm/kernel/kprobes-test.c b/arch/arm/kernel/kprobes-test.c new file mode 100644 index 00000000000..08d731294bc --- /dev/null +++ b/arch/arm/kernel/kprobes-test.c @@ -0,0 +1,1711 @@ +/* + * arch/arm/kernel/kprobes-test.c + * + * Copyright (C) 2011 Jon Medhurst <tixy@yxit.co.uk>. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/* + * This file contains test code for ARM kprobes. + * + * The top level function run_all_tests() executes tests for all of the + * supported instruction sets: ARM, 16-bit Thumb, and 32-bit Thumb. These tests + * fall into two categories; run_api_tests() checks basic functionality of the + * kprobes API, and run_test_cases() is a comprehensive test for kprobes + * instruction decoding and simulation. + * + * run_test_cases() first checks the kprobes decoding table for self consistency + * (using table_test()) then executes a series of test cases for each of the CPU + * instruction forms. coverage_start() and coverage_end() are used to verify + * that these test cases cover all of the possible combinations of instructions + * described by the kprobes decoding tables. + * + * The individual test cases are in kprobes-test-arm.c and kprobes-test-thumb.c + * which use the macros defined in kprobes-test.h. The rest of this + * documentation will describe the operation of the framework used by these + * test cases. + */ + +/* + * TESTING METHODOLOGY + * ------------------- + * + * The methodology used to test an ARM instruction 'test_insn' is to use + * inline assembler like: + * + * test_before: nop + * test_case: test_insn + * test_after: nop + * + * When the test case is run a kprobe is placed of each nop. The + * post-handler of the test_before probe is used to modify the saved CPU + * register context to that which we require for the test case. The + * pre-handler of the of the test_after probe saves a copy of the CPU + * register context. In this way we can execute test_insn with a specific + * register context and see the results afterwards. + * + * To actually test the kprobes instruction emulation we perform the above + * step a second time but with an additional kprobe on the test_case + * instruction itself. If the emulation is accurate then the results seen + * by the test_after probe will be identical to the first run which didn't + * have a probe on test_case. + * + * Each test case is run several times with a variety of variations in the + * flags value of stored in CPSR, and for Thumb code, different ITState. + * + * For instructions which can modify PC, a second test_after probe is used + * like this: + * + * test_before: nop + * test_case: test_insn + * test_after: nop + * b test_done + * test_after2: nop + * test_done: + * + * The test case is constructed such that test_insn branches to + * test_after2, or, if testing a conditional instruction, it may just + * continue to test_after. The probes inserted at both locations let us + * determine which happened. A similar approach is used for testing + * backwards branches... + * + * b test_before + * b test_done @ helps to cope with off by 1 branches + * test_after2: nop + * b test_done + * test_before: nop + * test_case: test_insn + * test_after: nop + * test_done: + * + * The macros used to generate the assembler instructions describe above + * are TEST_INSTRUCTION, TEST_BRANCH_F (branch forwards) and TEST_BRANCH_B + * (branch backwards). In these, the local variables numbered 1, 50, 2 and + * 99 represent: test_before, test_case, test_after2 and test_done. + * + * FRAMEWORK + * --------- + * + * Each test case is wrapped between the pair of macros TESTCASE_START and + * TESTCASE_END. As well as performing the inline assembler boilerplate, + * these call out to the kprobes_test_case_start() and + * kprobes_test_case_end() functions which drive the execution of the test + * case. The specific arguments to use for each test case are stored as + * inline data constructed using the various TEST_ARG_* macros. Putting + * this all together, a simple test case may look like: + * + * TESTCASE_START("Testing mov r0, r7") + * TEST_ARG_REG(7, 0x12345678) // Set r7=0x12345678 + * TEST_ARG_END("") + * TEST_INSTRUCTION("mov r0, r7") + * TESTCASE_END + * + * Note, in practice the single convenience macro TEST_R would be used for this + * instead. + * + * The above would expand to assembler looking something like: + * + * @ TESTCASE_START + * bl __kprobes_test_case_start + * @ start of inline data... + * .ascii "mov r0, r7" @ text title for test case + * .byte 0 + * .align 2, 0 + * + * @ TEST_ARG_REG + * .byte ARG_TYPE_REG + * .byte 7 + * .short 0 + * .word 0x1234567 + * + * @ TEST_ARG_END + * .byte ARG_TYPE_END + * .byte TEST_ISA @ flags, including ISA being tested + * .short 50f-0f @ offset of 'test_before' + * .short 2f-0f @ offset of 'test_after2' (if relevent) + * .short 99f-0f @ offset of 'test_done' + * @ start of test case code... + * 0: + * .code TEST_ISA @ switch to ISA being tested + * + * @ TEST_INSTRUCTION + * 50: nop @ location for 'test_before' probe + * 1: mov r0, r7 @ the test case instruction 'test_insn' + * nop @ location for 'test_after' probe + * + * // TESTCASE_END + * 2: + * 99: bl __kprobes_test_case_end_##TEST_ISA + * .code NONMAL_ISA + * + * When the above is execute the following happens... + * + * __kprobes_test_case_start() is an assembler wrapper which sets up space + * for a stack buffer and calls the C function kprobes_test_case_start(). + * This C function will do some initial processing of the inline data and + * setup some global state. It then inserts the test_before and test_after + * kprobes and returns a value which causes the assembler wrapper to jump + * to the start of the test case code, (local label '0'). + * + * When the test case code executes, the test_before probe will be hit and + * test_before_post_handler will call setup_test_context(). This fills the + * stack buffer and CPU registers with a test pattern and then processes + * the test case arguments. In our example there is one TEST_ARG_REG which + * indicates that R7 should be loaded with the value 0x12345678. + * + * When the test_before probe ends, the test case continues and executes + * the "mov r0, r7" instruction. It then hits the test_after probe and the + * pre-handler for this (test_after_pre_handler) will save a copy of the + * CPU register context. This should now have R0 holding the same value as + * R7. + * + * Finally we get to the call to __kprobes_test_case_end_{32,16}. This is + * an assembler wrapper which switches back to the ISA used by the test + * code and calls the C function kprobes_test_case_end(). + * + * For each run through the test case, test_case_run_count is incremented + * by one. For even runs, kprobes_test_case_end() saves a copy of the + * register and stack buffer contents from the test case just run. It then + * inserts a kprobe on the test case instruction 'test_insn' and returns a + * value to cause the test case code to be re-run. + * + * For odd numbered runs, kprobes_test_case_end() compares the register and + * stack buffer contents to those that were saved on the previous even + * numbered run (the one without the kprobe on test_insn). These should be + * the same if the kprobe instruction simulation routine is correct. + * + * The pair of test case runs is repeated with different combinations of + * flag values in CPSR and, for Thumb, different ITState. This is + * controlled by test_context_cpsr(). + * + * BUILDING TEST CASES + * ------------------- + * + * + * As an aid to building test cases, the stack buffer is initialised with + * some special values: + * + * [SP+13*4] Contains SP+120. This can be used to test instructions + * which load a value into SP. + * + * [SP+15*4] When testing branching instructions using TEST_BRANCH_{F,B}, + * this holds the target address of the branch, 'test_after2'. + * This can be used to test instructions which load a PC value + * from memory. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/kprobes.h> +#include <linux/errno.h> +#include <linux/stddef.h> +#include <linux/bug.h> +#include <asm/opcodes.h> + +#include "kprobes.h" +#include "probes-arm.h" +#include "probes-thumb.h" +#include "kprobes-test.h" + + +#define BENCHMARKING 1 + + +/* + * Test basic API + */ + +static bool test_regs_ok; +static int test_func_instance; +static int pre_handler_called; +static int post_handler_called; +static int jprobe_func_called; +static int kretprobe_handler_called; +static int tests_failed; + +#define FUNC_ARG1 0x12345678 +#define FUNC_ARG2 0xabcdef + + +#ifndef CONFIG_THUMB2_KERNEL + +long arm_func(long r0, long r1); + +static void __used __naked __arm_kprobes_test_func(void) +{ + __asm__ __volatile__ ( + ".arm \n\t" + ".type arm_func, %%function \n\t" + "arm_func: \n\t" + "adds r0, r0, r1 \n\t" + "bx lr \n\t" + ".code "NORMAL_ISA /* Back to Thumb if necessary */ + : : : "r0", "r1", "cc" + ); +} + +#else /* CONFIG_THUMB2_KERNEL */ + +long thumb16_func(long r0, long r1); +long thumb32even_func(long r0, long r1); +long thumb32odd_func(long r0, long r1); + +static void __used __naked __thumb_kprobes_test_funcs(void) +{ + __asm__ __volatile__ ( + ".type thumb16_func, %%function \n\t" + "thumb16_func: \n\t" + "adds.n r0, r0, r1 \n\t" + "bx lr \n\t" + + ".align \n\t" + ".type thumb32even_func, %%function \n\t" + "thumb32even_func: \n\t" + "adds.w r0, r0, r1 \n\t" + "bx lr \n\t" + + ".align \n\t" + "nop.n \n\t" + ".type thumb32odd_func, %%function \n\t" + "thumb32odd_func: \n\t" + "adds.w r0, r0, r1 \n\t" + "bx lr \n\t" + + : : : "r0", "r1", "cc" + ); +} + +#endif /* CONFIG_THUMB2_KERNEL */ + + +static int call_test_func(long (*func)(long, long), bool check_test_regs) +{ + long ret; + + ++test_func_instance; + test_regs_ok = false; + + ret = (*func)(FUNC_ARG1, FUNC_ARG2); + if (ret != FUNC_ARG1 + FUNC_ARG2) { + pr_err("FAIL: call_test_func: func returned %lx\n", ret); + return false; + } + + if (check_test_regs && !test_regs_ok) { + pr_err("FAIL: test regs not OK\n"); + return false; + } + + return true; +} + +static int __kprobes pre_handler(struct kprobe *p, struct pt_regs *regs) +{ + pre_handler_called = test_func_instance; + if (regs->ARM_r0 == FUNC_ARG1 && regs->ARM_r1 == FUNC_ARG2) + test_regs_ok = true; + return 0; +} + +static void __kprobes post_handler(struct kprobe *p, struct pt_regs *regs, + unsigned long flags) +{ + post_handler_called = test_func_instance; + if (regs->ARM_r0 != FUNC_ARG1 + FUNC_ARG2 || regs->ARM_r1 != FUNC_ARG2) + test_regs_ok = false; +} + +static struct kprobe the_kprobe = { + .addr = 0, + .pre_handler = pre_handler, + .post_handler = post_handler +}; + +static int test_kprobe(long (*func)(long, long)) +{ + int ret; + + the_kprobe.addr = (kprobe_opcode_t *)func; + ret = register_kprobe(&the_kprobe); + if (ret < 0) { + pr_err("FAIL: register_kprobe failed with %d\n", ret); + return ret; + } + + ret = call_test_func(func, true); + + unregister_kprobe(&the_kprobe); + the_kprobe.flags = 0; /* Clear disable flag to allow reuse */ + + if (!ret) + return -EINVAL; + if (pre_handler_called != test_func_instance) { + pr_err("FAIL: kprobe pre_handler not called\n"); + return -EINVAL; + } + if (post_handler_called != test_func_instance) { + pr_err("FAIL: kprobe post_handler not called\n"); + return -EINVAL; + } + if (!call_test_func(func, false)) + return -EINVAL; + if (pre_handler_called == test_func_instance || + post_handler_called == test_func_instance) { + pr_err("FAIL: probe called after unregistering\n"); + return -EINVAL; + } + + return 0; +} + +static void __kprobes jprobe_func(long r0, long r1) +{ + jprobe_func_called = test_func_instance; + if (r0 == FUNC_ARG1 && r1 == FUNC_ARG2) + test_regs_ok = true; + jprobe_return(); +} + +static struct jprobe the_jprobe = { + .entry = jprobe_func, +}; + +static int test_jprobe(long (*func)(long, long)) +{ + int ret; + + the_jprobe.kp.addr = (kprobe_opcode_t *)func; + ret = register_jprobe(&the_jprobe); + if (ret < 0) { + pr_err("FAIL: register_jprobe failed with %d\n", ret); + return ret; + } + + ret = call_test_func(func, true); + + unregister_jprobe(&the_jprobe); + the_jprobe.kp.flags = 0; /* Clear disable flag to allow reuse */ + + if (!ret) + return -EINVAL; + if (jprobe_func_called != test_func_instance) { + pr_err("FAIL: jprobe handler function not called\n"); + return -EINVAL; + } + if (!call_test_func(func, false)) + return -EINVAL; + if (jprobe_func_called == test_func_instance) { + pr_err("FAIL: probe called after unregistering\n"); + return -EINVAL; + } + + return 0; +} + +static int __kprobes +kretprobe_handler(struct kretprobe_instance *ri, struct pt_regs *regs) +{ + kretprobe_handler_called = test_func_instance; + if (regs_return_value(regs) == FUNC_ARG1 + FUNC_ARG2) + test_regs_ok = true; + return 0; +} + +static struct kretprobe the_kretprobe = { + .handler = kretprobe_handler, +}; + +static int test_kretprobe(long (*func)(long, long)) +{ + int ret; + + the_kretprobe.kp.addr = (kprobe_opcode_t *)func; + ret = register_kretprobe(&the_kretprobe); + if (ret < 0) { + pr_err("FAIL: register_kretprobe failed with %d\n", ret); + return ret; + } + + ret = call_test_func(func, true); + + unregister_kretprobe(&the_kretprobe); + the_kretprobe.kp.flags = 0; /* Clear disable flag to allow reuse */ + + if (!ret) + return -EINVAL; + if (kretprobe_handler_called != test_func_instance) { + pr_err("FAIL: kretprobe handler not called\n"); + return -EINVAL; + } + if (!call_test_func(func, false)) + return -EINVAL; + if (jprobe_func_called == test_func_instance) { + pr_err("FAIL: kretprobe called after unregistering\n"); + return -EINVAL; + } + + return 0; +} + +static int run_api_tests(long (*func)(long, long)) +{ + int ret; + + pr_info(" kprobe\n"); + ret = test_kprobe(func); + if (ret < 0) + return ret; + + pr_info(" jprobe\n"); + ret = test_jprobe(func); +#if defined(CONFIG_THUMB2_KERNEL) && !defined(MODULE) + if (ret == -EINVAL) { + pr_err("FAIL: Known longtime bug with jprobe on Thumb kernels\n"); + tests_failed = ret; + ret = 0; + } +#endif + if (ret < 0) + return ret; + + pr_info(" kretprobe\n"); + ret = test_kretprobe(func); + if (ret < 0) + return ret; + + return 0; +} + + +/* + * Benchmarking + */ + +#if BENCHMARKING + +static void __naked benchmark_nop(void) +{ + __asm__ __volatile__ ( + "nop \n\t" + "bx lr" + ); +} + +#ifdef CONFIG_THUMB2_KERNEL +#define wide ".w" +#else +#define wide +#endif + +static void __naked benchmark_pushpop1(void) +{ + __asm__ __volatile__ ( + "stmdb"wide" sp!, {r3-r11,lr} \n\t" + "ldmia"wide" sp!, {r3-r11,pc}" + ); +} + +static void __naked benchmark_pushpop2(void) +{ + __asm__ __volatile__ ( + "stmdb"wide" sp!, {r0-r8,lr} \n\t" + "ldmia"wide" sp!, {r0-r8,pc}" + ); +} + +static void __naked benchmark_pushpop3(void) +{ + __asm__ __volatile__ ( + "stmdb"wide" sp!, {r4,lr} \n\t" + "ldmia"wide" sp!, {r4,pc}" + ); +} + +static void __naked benchmark_pushpop4(void) +{ + __asm__ __volatile__ ( + "stmdb"wide" sp!, {r0,lr} \n\t" + "ldmia"wide" sp!, {r0,pc}" + ); +} + + +#ifdef CONFIG_THUMB2_KERNEL + +static void __naked benchmark_pushpop_thumb(void) +{ + __asm__ __volatile__ ( + "push.n {r0-r7,lr} \n\t" + "pop.n {r0-r7,pc}" + ); +} + +#endif + +static int __kprobes +benchmark_pre_handler(struct kprobe *p, struct pt_regs *regs) +{ + return 0; +} + +static int benchmark(void(*fn)(void)) +{ + unsigned n, i, t, t0; + + for (n = 1000; ; n *= 2) { + t0 = sched_clock(); + for (i = n; i > 0; --i) + fn(); + t = sched_clock() - t0; + if (t >= 250000000) + break; /* Stop once we took more than 0.25 seconds */ + } + return t / n; /* Time for one iteration in nanoseconds */ +}; + +static int kprobe_benchmark(void(*fn)(void), unsigned offset) +{ + struct kprobe k = { + .addr = (kprobe_opcode_t *)((uintptr_t)fn + offset), + .pre_handler = benchmark_pre_handler, + }; + + int ret = register_kprobe(&k); + if (ret < 0) { + pr_err("FAIL: register_kprobe failed with %d\n", ret); + return ret; + } + + ret = benchmark(fn); + + unregister_kprobe(&k); + return ret; +}; + +struct benchmarks { + void (*fn)(void); + unsigned offset; + const char *title; +}; + +static int run_benchmarks(void) +{ + int ret; + struct benchmarks list[] = { + {&benchmark_nop, 0, "nop"}, + /* + * benchmark_pushpop{1,3} will have the optimised + * instruction emulation, whilst benchmark_pushpop{2,4} will + * be the equivalent unoptimised instructions. + */ + {&benchmark_pushpop1, 0, "stmdb sp!, {r3-r11,lr}"}, + {&benchmark_pushpop1, 4, "ldmia sp!, {r3-r11,pc}"}, + {&benchmark_pushpop2, 0, "stmdb sp!, {r0-r8,lr}"}, + {&benchmark_pushpop2, 4, "ldmia sp!, {r0-r8,pc}"}, + {&benchmark_pushpop3, 0, "stmdb sp!, {r4,lr}"}, + {&benchmark_pushpop3, 4, "ldmia sp!, {r4,pc}"}, + {&benchmark_pushpop4, 0, "stmdb sp!, {r0,lr}"}, + {&benchmark_pushpop4, 4, "ldmia sp!, {r0,pc}"}, +#ifdef CONFIG_THUMB2_KERNEL + {&benchmark_pushpop_thumb, 0, "push.n {r0-r7,lr}"}, + {&benchmark_pushpop_thumb, 2, "pop.n {r0-r7,pc}"}, +#endif + {0} + }; + + struct benchmarks *b; + for (b = list; b->fn; ++b) { + ret = kprobe_benchmark(b->fn, b->offset); + if (ret < 0) + return ret; + pr_info(" %dns for kprobe %s\n", ret, b->title); + } + + pr_info("\n"); + return 0; +} + +#endif /* BENCHMARKING */ + + +/* + * Decoding table self-consistency tests + */ + +static const int decode_struct_sizes[NUM_DECODE_TYPES] = { + [DECODE_TYPE_TABLE] = sizeof(struct decode_table), + [DECODE_TYPE_CUSTOM] = sizeof(struct decode_custom), + [DECODE_TYPE_SIMULATE] = sizeof(struct decode_simulate), + [DECODE_TYPE_EMULATE] = sizeof(struct decode_emulate), + [DECODE_TYPE_OR] = sizeof(struct decode_or), + [DECODE_TYPE_REJECT] = sizeof(struct decode_reject) +}; + +static int table_iter(const union decode_item *table, + int (*fn)(const struct decode_header *, void *), + void *args) +{ + const struct decode_header *h = (struct decode_header *)table; + int result; + + for (;;) { + enum decode_type type = h->type_regs.bits & DECODE_TYPE_MASK; + + if (type == DECODE_TYPE_END) + return 0; + + result = fn(h, args); + if (result) + return result; + + h = (struct decode_header *) + ((uintptr_t)h + decode_struct_sizes[type]); + + } +} + +static int table_test_fail(const struct decode_header *h, const char* message) +{ + + pr_err("FAIL: kprobes test failure \"%s\" (mask %08x, value %08x)\n", + message, h->mask.bits, h->value.bits); + return -EINVAL; +} + +struct table_test_args { + const union decode_item *root_table; + u32 parent_mask; + u32 parent_value; +}; + +static int table_test_fn(const struct decode_header *h, void *args) +{ + struct table_test_args *a = (struct table_test_args *)args; + enum decode_type type = h->type_regs.bits & DECODE_TYPE_MASK; + + if (h->value.bits & ~h->mask.bits) + return table_test_fail(h, "Match value has bits not in mask"); + + if ((h->mask.bits & a->parent_mask) != a->parent_mask) + return table_test_fail(h, "Mask has bits not in parent mask"); + + if ((h->value.bits ^ a->parent_value) & a->parent_mask) + return table_test_fail(h, "Value is inconsistent with parent"); + + if (type == DECODE_TYPE_TABLE) { + struct decode_table *d = (struct decode_table *)h; + struct table_test_args args2 = *a; + args2.parent_mask = h->mask.bits; + args2.parent_value = h->value.bits; + return table_iter(d->table.table, table_test_fn, &args2); + } + + return 0; +} + +static int table_test(const union decode_item *table) +{ + struct table_test_args args = { + .root_table = table, + .parent_mask = 0, + .parent_value = 0 + }; + return table_iter(args.root_table, table_test_fn, &args); +} + + +/* + * Decoding table test coverage analysis + * + * coverage_start() builds a coverage_table which contains a list of + * coverage_entry's to match each entry in the specified kprobes instruction + * decoding table. + * + * When test cases are run, coverage_add() is called to process each case. + * This looks up the corresponding entry in the coverage_table and sets it as + * being matched, as well as clearing the regs flag appropriate for the test. + * + * After all test cases have been run, coverage_end() is called to check that + * all entries in coverage_table have been matched and that all regs flags are + * cleared. I.e. that all possible combinations of instructions described by + * the kprobes decoding tables have had a test case executed for them. + */ + +bool coverage_fail; + +#define MAX_COVERAGE_ENTRIES 256 + +struct coverage_entry { + const struct decode_header *header; + unsigned regs; + unsigned nesting; + char matched; +}; + +struct coverage_table { + struct coverage_entry *base; + unsigned num_entries; + unsigned nesting; +}; + +struct coverage_table coverage; + +#define COVERAGE_ANY_REG (1<<0) +#define COVERAGE_SP (1<<1) +#define COVERAGE_PC (1<<2) +#define COVERAGE_PCWB (1<<3) + +static const char coverage_register_lookup[16] = { + [REG_TYPE_ANY] = COVERAGE_ANY_REG | COVERAGE_SP | COVERAGE_PC, + [REG_TYPE_SAMEAS16] = COVERAGE_ANY_REG, + [REG_TYPE_SP] = COVERAGE_SP, + [REG_TYPE_PC] = COVERAGE_PC, + [REG_TYPE_NOSP] = COVERAGE_ANY_REG | COVERAGE_SP, + [REG_TYPE_NOSPPC] = COVERAGE_ANY_REG | COVERAGE_SP | COVERAGE_PC, + [REG_TYPE_NOPC] = COVERAGE_ANY_REG | COVERAGE_PC, + [REG_TYPE_NOPCWB] = COVERAGE_ANY_REG | COVERAGE_PC | COVERAGE_PCWB, + [REG_TYPE_NOPCX] = COVERAGE_ANY_REG, + [REG_TYPE_NOSPPCX] = COVERAGE_ANY_REG | COVERAGE_SP, +}; + +unsigned coverage_start_registers(const struct decode_header *h) +{ + unsigned regs = 0; + int i; + for (i = 0; i < 20; i += 4) { + int r = (h->type_regs.bits >> (DECODE_TYPE_BITS + i)) & 0xf; + regs |= coverage_register_lookup[r] << i; + } + return regs; +} + +static int coverage_start_fn(const struct decode_header *h, void *args) +{ + struct coverage_table *coverage = (struct coverage_table *)args; + enum decode_type type = h->type_regs.bits & DECODE_TYPE_MASK; + struct coverage_entry *entry = coverage->base + coverage->num_entries; + + if (coverage->num_entries == MAX_COVERAGE_ENTRIES - 1) { + pr_err("FAIL: Out of space for test coverage data"); + return -ENOMEM; + } + + ++coverage->num_entries; + + entry->header = h; + entry->regs = coverage_start_registers(h); + entry->nesting = coverage->nesting; + entry->matched = false; + + if (type == DECODE_TYPE_TABLE) { + struct decode_table *d = (struct decode_table *)h; + int ret; + ++coverage->nesting; + ret = table_iter(d->table.table, coverage_start_fn, coverage); + --coverage->nesting; + return ret; + } + + return 0; +} + +static int coverage_start(const union decode_item *table) +{ + coverage.base = kmalloc(MAX_COVERAGE_ENTRIES * + sizeof(struct coverage_entry), GFP_KERNEL); + coverage.num_entries = 0; + coverage.nesting = 0; + return table_iter(table, coverage_start_fn, &coverage); +} + +static void +coverage_add_registers(struct coverage_entry *entry, kprobe_opcode_t insn) +{ + int regs = entry->header->type_regs.bits >> DECODE_TYPE_BITS; + int i; + for (i = 0; i < 20; i += 4) { + enum decode_reg_type reg_type = (regs >> i) & 0xf; + int reg = (insn >> i) & 0xf; + int flag; + + if (!reg_type) + continue; + + if (reg == 13) + flag = COVERAGE_SP; + else if (reg == 15) + flag = COVERAGE_PC; + else + flag = COVERAGE_ANY_REG; + entry->regs &= ~(flag << i); + + switch (reg_type) { + + case REG_TYPE_NONE: + case REG_TYPE_ANY: + case REG_TYPE_SAMEAS16: + break; + + case REG_TYPE_SP: + if (reg != 13) + return; + break; + + case REG_TYPE_PC: + if (reg != 15) + return; + break; + + case REG_TYPE_NOSP: + if (reg == 13) + return; + break; + + case REG_TYPE_NOSPPC: + case REG_TYPE_NOSPPCX: + if (reg == 13 || reg == 15) + return; + break; + + case REG_TYPE_NOPCWB: + if (!is_writeback(insn)) + break; + if (reg == 15) { + entry->regs &= ~(COVERAGE_PCWB << i); + return; + } + break; + + case REG_TYPE_NOPC: + case REG_TYPE_NOPCX: + if (reg == 15) + return; + break; + } + + } +} + +static void coverage_add(kprobe_opcode_t insn) +{ + struct coverage_entry *entry = coverage.base; + struct coverage_entry *end = coverage.base + coverage.num_entries; + bool matched = false; + unsigned nesting = 0; + + for (; entry < end; ++entry) { + const struct decode_header *h = entry->header; + enum decode_type type = h->type_regs.bits & DECODE_TYPE_MASK; + + if (entry->nesting > nesting) + continue; /* Skip sub-table we didn't match */ + + if (entry->nesting < nesting) + break; /* End of sub-table we were scanning */ + + if (!matched) { + if ((insn & h->mask.bits) != h->value.bits) + continue; + entry->matched = true; + } + + switch (type) { + + case DECODE_TYPE_TABLE: + ++nesting; + break; + + case DECODE_TYPE_CUSTOM: + case DECODE_TYPE_SIMULATE: + case DECODE_TYPE_EMULATE: + coverage_add_registers(entry, insn); + return; + + case DECODE_TYPE_OR: + matched = true; + break; + + case DECODE_TYPE_REJECT: + default: + return; + } + + } +} + +static void coverage_end(void) +{ + struct coverage_entry *entry = coverage.base; + struct coverage_entry *end = coverage.base + coverage.num_entries; + + for (; entry < end; ++entry) { + u32 mask = entry->header->mask.bits; + u32 value = entry->header->value.bits; + + if (entry->regs) { + pr_err("FAIL: Register test coverage missing for %08x %08x (%05x)\n", + mask, value, entry->regs); + coverage_fail = true; + } + if (!entry->matched) { + pr_err("FAIL: Test coverage entry missing for %08x %08x\n", + mask, value); + coverage_fail = true; + } + } + + kfree(coverage.base); +} + + +/* + * Framework for instruction set test cases + */ + +void __naked __kprobes_test_case_start(void) +{ + __asm__ __volatile__ ( + "stmdb sp!, {r4-r11} \n\t" + "sub sp, sp, #"__stringify(TEST_MEMORY_SIZE)"\n\t" + "bic r0, lr, #1 @ r0 = inline title string \n\t" + "mov r1, sp \n\t" + "bl kprobes_test_case_start \n\t" + "bx r0 \n\t" + ); +} + +#ifndef CONFIG_THUMB2_KERNEL + +void __naked __kprobes_test_case_end_32(void) +{ + __asm__ __volatile__ ( + "mov r4, lr \n\t" + "bl kprobes_test_case_end \n\t" + "cmp r0, #0 \n\t" + "movne pc, r0 \n\t" + "mov r0, r4 \n\t" + "add sp, sp, #"__stringify(TEST_MEMORY_SIZE)"\n\t" + "ldmia sp!, {r4-r11} \n\t" + "mov pc, r0 \n\t" + ); +} + +#else /* CONFIG_THUMB2_KERNEL */ + +void __naked __kprobes_test_case_end_16(void) +{ + __asm__ __volatile__ ( + "mov r4, lr \n\t" + "bl kprobes_test_case_end \n\t" + "cmp r0, #0 \n\t" + "bxne r0 \n\t" + "mov r0, r4 \n\t" + "add sp, sp, #"__stringify(TEST_MEMORY_SIZE)"\n\t" + "ldmia sp!, {r4-r11} \n\t" + "bx r0 \n\t" + ); +} + +void __naked __kprobes_test_case_end_32(void) +{ + __asm__ __volatile__ ( + ".arm \n\t" + "orr lr, lr, #1 @ will return to Thumb code \n\t" + "ldr pc, 1f \n\t" + "1: \n\t" + ".word __kprobes_test_case_end_16 \n\t" + ); +} + +#endif + + +int kprobe_test_flags; +int kprobe_test_cc_position; + +static int test_try_count; +static int test_pass_count; +static int test_fail_count; + +static struct pt_regs initial_regs; +static struct pt_regs expected_regs; +static struct pt_regs result_regs; + +static u32 expected_memory[TEST_MEMORY_SIZE/sizeof(u32)]; + +static const char *current_title; +static struct test_arg *current_args; +static u32 *current_stack; +static uintptr_t current_branch_target; + +static uintptr_t current_code_start; +static kprobe_opcode_t current_instruction; + + +#define TEST_CASE_PASSED -1 +#define TEST_CASE_FAILED -2 + +static int test_case_run_count; +static bool test_case_is_thumb; +static int test_instance; + +/* + * We ignore the state of the imprecise abort disable flag (CPSR.A) because this + * can change randomly as the kernel doesn't take care to preserve or initialise + * this across context switches. Also, with Security Extentions, the flag may + * not be under control of the kernel; for this reason we ignore the state of + * the FIQ disable flag CPSR.F as well. + */ +#define PSR_IGNORE_BITS (PSR_A_BIT | PSR_F_BIT) + +static unsigned long test_check_cc(int cc, unsigned long cpsr) +{ + int ret = arm_check_condition(cc << 28, cpsr); + + return (ret != ARM_OPCODE_CONDTEST_FAIL); +} + +static int is_last_scenario; +static int probe_should_run; /* 0 = no, 1 = yes, -1 = unknown */ +static int memory_needs_checking; + +static unsigned long test_context_cpsr(int scenario) +{ + unsigned long cpsr; + + probe_should_run = 1; + + /* Default case is that we cycle through 16 combinations of flags */ + cpsr = (scenario & 0xf) << 28; /* N,Z,C,V flags */ + cpsr |= (scenario & 0xf) << 16; /* GE flags */ + cpsr |= (scenario & 0x1) << 27; /* Toggle Q flag */ + + if (!test_case_is_thumb) { + /* Testing ARM code */ + int cc = current_instruction >> 28; + + probe_should_run = test_check_cc(cc, cpsr) != 0; + if (scenario == 15) + is_last_scenario = true; + + } else if (kprobe_test_flags & TEST_FLAG_NO_ITBLOCK) { + /* Testing Thumb code without setting ITSTATE */ + if (kprobe_test_cc_position) { + int cc = (current_instruction >> kprobe_test_cc_position) & 0xf; + probe_should_run = test_check_cc(cc, cpsr) != 0; + } + + if (scenario == 15) + is_last_scenario = true; + + } else if (kprobe_test_flags & TEST_FLAG_FULL_ITBLOCK) { + /* Testing Thumb code with all combinations of ITSTATE */ + unsigned x = (scenario >> 4); + unsigned cond_base = x % 7; /* ITSTATE<7:5> */ + unsigned mask = x / 7 + 2; /* ITSTATE<4:0>, bits reversed */ + + if (mask > 0x1f) { + /* Finish by testing state from instruction 'itt al' */ + cond_base = 7; + mask = 0x4; + if ((scenario & 0xf) == 0xf) + is_last_scenario = true; + } + + cpsr |= cond_base << 13; /* ITSTATE<7:5> */ + cpsr |= (mask & 0x1) << 12; /* ITSTATE<4> */ + cpsr |= (mask & 0x2) << 10; /* ITSTATE<3> */ + cpsr |= (mask & 0x4) << 8; /* ITSTATE<2> */ + cpsr |= (mask & 0x8) << 23; /* ITSTATE<1> */ + cpsr |= (mask & 0x10) << 21; /* ITSTATE<0> */ + + probe_should_run = test_check_cc((cpsr >> 12) & 0xf, cpsr) != 0; + + } else { + /* Testing Thumb code with several combinations of ITSTATE */ + switch (scenario) { + case 16: /* Clear NZCV flags and 'it eq' state (false as Z=0) */ + cpsr = 0x00000800; + probe_should_run = 0; + break; + case 17: /* Set NZCV flags and 'it vc' state (false as V=1) */ + cpsr = 0xf0007800; + probe_should_run = 0; + break; + case 18: /* Clear NZCV flags and 'it ls' state (true as C=0) */ + cpsr = 0x00009800; + break; + case 19: /* Set NZCV flags and 'it cs' state (true as C=1) */ + cpsr = 0xf0002800; + is_last_scenario = true; + break; + } + } + + return cpsr; +} + +static void setup_test_context(struct pt_regs *regs) +{ + int scenario = test_case_run_count>>1; + unsigned long val; + struct test_arg *args; + int i; + + is_last_scenario = false; + memory_needs_checking = false; + + /* Initialise test memory on stack */ + val = (scenario & 1) ? VALM : ~VALM; + for (i = 0; i < TEST_MEMORY_SIZE / sizeof(current_stack[0]); ++i) + current_stack[i] = val + (i << 8); + /* Put target of branch on stack for tests which load PC from memory */ + if (current_branch_target) + current_stack[15] = current_branch_target; + /* Put a value for SP on stack for tests which load SP from memory */ + current_stack[13] = (u32)current_stack + 120; + + /* Initialise register values to their default state */ + val = (scenario & 2) ? VALR : ~VALR; + for (i = 0; i < 13; ++i) + regs->uregs[i] = val ^ (i << 8); + regs->ARM_lr = val ^ (14 << 8); + regs->ARM_cpsr &= ~(APSR_MASK | PSR_IT_MASK); + regs->ARM_cpsr |= test_context_cpsr(scenario); + + /* Perform testcase specific register setup */ + args = current_args; + for (; args[0].type != ARG_TYPE_END; ++args) + switch (args[0].type) { + case ARG_TYPE_REG: { + struct test_arg_regptr *arg = + (struct test_arg_regptr *)args; + regs->uregs[arg->reg] = arg->val; + break; + } + case ARG_TYPE_PTR: { + struct test_arg_regptr *arg = + (struct test_arg_regptr *)args; + regs->uregs[arg->reg] = + (unsigned long)current_stack + arg->val; + memory_needs_checking = true; + break; + } + case ARG_TYPE_MEM: { + struct test_arg_mem *arg = (struct test_arg_mem *)args; + current_stack[arg->index] = arg->val; + break; + } + default: + break; + } +} + +struct test_probe { + struct kprobe kprobe; + bool registered; + int hit; +}; + +static void unregister_test_probe(struct test_probe *probe) +{ + if (probe->registered) { + unregister_kprobe(&probe->kprobe); + probe->kprobe.flags = 0; /* Clear disable flag to allow reuse */ + } + probe->registered = false; +} + +static int register_test_probe(struct test_probe *probe) +{ + int ret; + + if (probe->registered) + BUG(); + + ret = register_kprobe(&probe->kprobe); + if (ret >= 0) { + probe->registered = true; + probe->hit = -1; + } + return ret; +} + +static int __kprobes +test_before_pre_handler(struct kprobe *p, struct pt_regs *regs) +{ + container_of(p, struct test_probe, kprobe)->hit = test_instance; + return 0; +} + +static void __kprobes +test_before_post_handler(struct kprobe *p, struct pt_regs *regs, + unsigned long flags) +{ + setup_test_context(regs); + initial_regs = *regs; + initial_regs.ARM_cpsr &= ~PSR_IGNORE_BITS; +} + +static int __kprobes +test_case_pre_handler(struct kprobe *p, struct pt_regs *regs) +{ + container_of(p, struct test_probe, kprobe)->hit = test_instance; + return 0; +} + +static int __kprobes +test_after_pre_handler(struct kprobe *p, struct pt_regs *regs) +{ + if (container_of(p, struct test_probe, kprobe)->hit == test_instance) + return 0; /* Already run for this test instance */ + + result_regs = *regs; + result_regs.ARM_cpsr &= ~PSR_IGNORE_BITS; + + /* Undo any changes done to SP by the test case */ + regs->ARM_sp = (unsigned long)current_stack; + + container_of(p, struct test_probe, kprobe)->hit = test_instance; + return 0; +} + +static struct test_probe test_before_probe = { + .kprobe.pre_handler = test_before_pre_handler, + .kprobe.post_handler = test_before_post_handler, +}; + +static struct test_probe test_case_probe = { + .kprobe.pre_handler = test_case_pre_handler, +}; + +static struct test_probe test_after_probe = { + .kprobe.pre_handler = test_after_pre_handler, +}; + +static struct test_probe test_after2_probe = { + .kprobe.pre_handler = test_after_pre_handler, +}; + +static void test_case_cleanup(void) +{ + unregister_test_probe(&test_before_probe); + unregister_test_probe(&test_case_probe); + unregister_test_probe(&test_after_probe); + unregister_test_probe(&test_after2_probe); +} + +static void print_registers(struct pt_regs *regs) +{ + pr_err("r0 %08lx | r1 %08lx | r2 %08lx | r3 %08lx\n", + regs->ARM_r0, regs->ARM_r1, regs->ARM_r2, regs->ARM_r3); + pr_err("r4 %08lx | r5 %08lx | r6 %08lx | r7 %08lx\n", + regs->ARM_r4, regs->ARM_r5, regs->ARM_r6, regs->ARM_r7); + pr_err("r8 %08lx | r9 %08lx | r10 %08lx | r11 %08lx\n", + regs->ARM_r8, regs->ARM_r9, regs->ARM_r10, regs->ARM_fp); + pr_err("r12 %08lx | sp %08lx | lr %08lx | pc %08lx\n", + regs->ARM_ip, regs->ARM_sp, regs->ARM_lr, regs->ARM_pc); + pr_err("cpsr %08lx\n", regs->ARM_cpsr); +} + +static void print_memory(u32 *mem, size_t size) +{ + int i; + for (i = 0; i < size / sizeof(u32); i += 4) + pr_err("%08x %08x %08x %08x\n", mem[i], mem[i+1], + mem[i+2], mem[i+3]); +} + +static size_t expected_memory_size(u32 *sp) +{ + size_t size = sizeof(expected_memory); + int offset = (uintptr_t)sp - (uintptr_t)current_stack; + if (offset > 0) + size -= offset; + return size; +} + +static void test_case_failed(const char *message) +{ + test_case_cleanup(); + + pr_err("FAIL: %s\n", message); + pr_err("FAIL: Test %s\n", current_title); + pr_err("FAIL: Scenario %d\n", test_case_run_count >> 1); +} + +static unsigned long next_instruction(unsigned long pc) +{ +#ifdef CONFIG_THUMB2_KERNEL + if ((pc & 1) && + !is_wide_instruction(__mem_to_opcode_thumb16(*(u16 *)(pc - 1)))) + return pc + 2; + else +#endif + return pc + 4; +} + +static uintptr_t __used kprobes_test_case_start(const char *title, void *stack) +{ + struct test_arg *args; + struct test_arg_end *end_arg; + unsigned long test_code; + + args = (struct test_arg *)PTR_ALIGN(title + strlen(title) + 1, 4); + + current_title = title; + current_args = args; + current_stack = stack; + + ++test_try_count; + + while (args->type != ARG_TYPE_END) + ++args; + end_arg = (struct test_arg_end *)args; + + test_code = (unsigned long)(args + 1); /* Code starts after args */ + + test_case_is_thumb = end_arg->flags & ARG_FLAG_THUMB; + if (test_case_is_thumb) + test_code |= 1; + + current_code_start = test_code; + + current_branch_target = 0; + if (end_arg->branch_offset != end_arg->end_offset) + current_branch_target = test_code + end_arg->branch_offset; + + test_code += end_arg->code_offset; + test_before_probe.kprobe.addr = (kprobe_opcode_t *)test_code; + + test_code = next_instruction(test_code); + test_case_probe.kprobe.addr = (kprobe_opcode_t *)test_code; + + if (test_case_is_thumb) { + u16 *p = (u16 *)(test_code & ~1); + current_instruction = __mem_to_opcode_thumb16(p[0]); + if (is_wide_instruction(current_instruction)) { + u16 instr2 = __mem_to_opcode_thumb16(p[1]); + current_instruction = __opcode_thumb32_compose(current_instruction, instr2); + } + } else { + current_instruction = __mem_to_opcode_arm(*(u32 *)test_code); + } + + if (current_title[0] == '.') + verbose("%s\n", current_title); + else + verbose("%s\t@ %0*x\n", current_title, + test_case_is_thumb ? 4 : 8, + current_instruction); + + test_code = next_instruction(test_code); + test_after_probe.kprobe.addr = (kprobe_opcode_t *)test_code; + + if (kprobe_test_flags & TEST_FLAG_NARROW_INSTR) { + if (!test_case_is_thumb || + is_wide_instruction(current_instruction)) { + test_case_failed("expected 16-bit instruction"); + goto fail; + } + } else { + if (test_case_is_thumb && + !is_wide_instruction(current_instruction)) { + test_case_failed("expected 32-bit instruction"); + goto fail; + } + } + + coverage_add(current_instruction); + + if (end_arg->flags & ARG_FLAG_UNSUPPORTED) { + if (register_test_probe(&test_case_probe) < 0) + goto pass; + test_case_failed("registered probe for unsupported instruction"); + goto fail; + } + + if (end_arg->flags & ARG_FLAG_SUPPORTED) { + if (register_test_probe(&test_case_probe) >= 0) + goto pass; + test_case_failed("couldn't register probe for supported instruction"); + goto fail; + } + + if (register_test_probe(&test_before_probe) < 0) { + test_case_failed("register test_before_probe failed"); + goto fail; + } + if (register_test_probe(&test_after_probe) < 0) { + test_case_failed("register test_after_probe failed"); + goto fail; + } + if (current_branch_target) { + test_after2_probe.kprobe.addr = + (kprobe_opcode_t *)current_branch_target; + if (register_test_probe(&test_after2_probe) < 0) { + test_case_failed("register test_after2_probe failed"); + goto fail; + } + } + + /* Start first run of test case */ + test_case_run_count = 0; + ++test_instance; + return current_code_start; +pass: + test_case_run_count = TEST_CASE_PASSED; + return (uintptr_t)test_after_probe.kprobe.addr; +fail: + test_case_run_count = TEST_CASE_FAILED; + return (uintptr_t)test_after_probe.kprobe.addr; +} + +static bool check_test_results(void) +{ + size_t mem_size = 0; + u32 *mem = 0; + + if (memcmp(&expected_regs, &result_regs, sizeof(expected_regs))) { + test_case_failed("registers differ"); + goto fail; + } + + if (memory_needs_checking) { + mem = (u32 *)result_regs.ARM_sp; + mem_size = expected_memory_size(mem); + if (memcmp(expected_memory, mem, mem_size)) { + test_case_failed("test memory differs"); + goto fail; + } + } + + return true; + +fail: + pr_err("initial_regs:\n"); + print_registers(&initial_regs); + pr_err("expected_regs:\n"); + print_registers(&expected_regs); + pr_err("result_regs:\n"); + print_registers(&result_regs); + + if (mem) { + pr_err("current_stack=%p\n", current_stack); + pr_err("expected_memory:\n"); + print_memory(expected_memory, mem_size); + pr_err("result_memory:\n"); + print_memory(mem, mem_size); + } + + return false; +} + +static uintptr_t __used kprobes_test_case_end(void) +{ + if (test_case_run_count < 0) { + if (test_case_run_count == TEST_CASE_PASSED) + /* kprobes_test_case_start did all the needed testing */ + goto pass; + else + /* kprobes_test_case_start failed */ + goto fail; + } + + if (test_before_probe.hit != test_instance) { + test_case_failed("test_before_handler not run"); + goto fail; + } + + if (test_after_probe.hit != test_instance && + test_after2_probe.hit != test_instance) { + test_case_failed("test_after_handler not run"); + goto fail; + } + + /* + * Even numbered test runs ran without a probe on the test case so + * we can gather reference results. The subsequent odd numbered run + * will have the probe inserted. + */ + if ((test_case_run_count & 1) == 0) { + /* Save results from run without probe */ + u32 *mem = (u32 *)result_regs.ARM_sp; + expected_regs = result_regs; + memcpy(expected_memory, mem, expected_memory_size(mem)); + + /* Insert probe onto test case instruction */ + if (register_test_probe(&test_case_probe) < 0) { + test_case_failed("register test_case_probe failed"); + goto fail; + } + } else { + /* Check probe ran as expected */ + if (probe_should_run == 1) { + if (test_case_probe.hit != test_instance) { + test_case_failed("test_case_handler not run"); + goto fail; + } + } else if (probe_should_run == 0) { + if (test_case_probe.hit == test_instance) { + test_case_failed("test_case_handler ran"); + goto fail; + } + } + + /* Remove probe for any subsequent reference run */ + unregister_test_probe(&test_case_probe); + + if (!check_test_results()) + goto fail; + + if (is_last_scenario) + goto pass; + } + + /* Do next test run */ + ++test_case_run_count; + ++test_instance; + return current_code_start; +fail: + ++test_fail_count; + goto end; +pass: + ++test_pass_count; +end: + test_case_cleanup(); + return 0; +} + + +/* + * Top level test functions + */ + +static int run_test_cases(void (*tests)(void), const union decode_item *table) +{ + int ret; + + pr_info(" Check decoding tables\n"); + ret = table_test(table); + if (ret) + return ret; + + pr_info(" Run test cases\n"); + ret = coverage_start(table); + if (ret) + return ret; + + tests(); + + coverage_end(); + return 0; +} + + +static int __init run_all_tests(void) +{ + int ret = 0; + + pr_info("Beginning kprobe tests...\n"); + +#ifndef CONFIG_THUMB2_KERNEL + + pr_info("Probe ARM code\n"); + ret = run_api_tests(arm_func); + if (ret) + goto out; + + pr_info("ARM instruction simulation\n"); + ret = run_test_cases(kprobe_arm_test_cases, probes_decode_arm_table); + if (ret) + goto out; + +#else /* CONFIG_THUMB2_KERNEL */ + + pr_info("Probe 16-bit Thumb code\n"); + ret = run_api_tests(thumb16_func); + if (ret) + goto out; + + pr_info("Probe 32-bit Thumb code, even halfword\n"); + ret = run_api_tests(thumb32even_func); + if (ret) + goto out; + + pr_info("Probe 32-bit Thumb code, odd halfword\n"); + ret = run_api_tests(thumb32odd_func); + if (ret) + goto out; + + pr_info("16-bit Thumb instruction simulation\n"); + ret = run_test_cases(kprobe_thumb16_test_cases, + probes_decode_thumb16_table); + if (ret) + goto out; + + pr_info("32-bit Thumb instruction simulation\n"); + ret = run_test_cases(kprobe_thumb32_test_cases, + probes_decode_thumb32_table); + if (ret) + goto out; +#endif + + pr_info("Total instruction simulation tests=%d, pass=%d fail=%d\n", + test_try_count, test_pass_count, test_fail_count); + if (test_fail_count) { + ret = -EINVAL; + goto out; + } + +#if BENCHMARKING + pr_info("Benchmarks\n"); + ret = run_benchmarks(); + if (ret) + goto out; +#endif + +#if __LINUX_ARM_ARCH__ >= 7 + /* We are able to run all test cases so coverage should be complete */ + if (coverage_fail) { + pr_err("FAIL: Test coverage checks failed\n"); + ret = -EINVAL; + goto out; + } +#endif + +out: + if (ret == 0) + ret = tests_failed; + if (ret == 0) + pr_info("Finished kprobe tests OK\n"); + else + pr_err("kprobe tests failed\n"); + + return ret; +} + + +/* + * Module setup + */ + +#ifdef MODULE + +static void __exit kprobe_test_exit(void) +{ +} + +module_init(run_all_tests) +module_exit(kprobe_test_exit) +MODULE_LICENSE("GPL"); + +#else /* !MODULE */ + +late_initcall(run_all_tests); + +#endif diff --git a/arch/arm/kernel/kprobes-test.h b/arch/arm/kernel/kprobes-test.h new file mode 100644 index 00000000000..eecc90a0fd9 --- /dev/null +++ b/arch/arm/kernel/kprobes-test.h @@ -0,0 +1,432 @@ +/* + * arch/arm/kernel/kprobes-test.h + * + * Copyright (C) 2011 Jon Medhurst <tixy@yxit.co.uk>. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#define VERBOSE 0 /* Set to '1' for more logging of test cases */ + +#ifdef CONFIG_THUMB2_KERNEL +#define NORMAL_ISA "16" +#else +#define NORMAL_ISA "32" +#endif + + +/* Flags used in kprobe_test_flags */ +#define TEST_FLAG_NO_ITBLOCK (1<<0) +#define TEST_FLAG_FULL_ITBLOCK (1<<1) +#define TEST_FLAG_NARROW_INSTR (1<<2) + +extern int kprobe_test_flags; +extern int kprobe_test_cc_position; + + +#define TEST_MEMORY_SIZE 256 + + +/* + * Test case structures. + * + * The arguments given to test cases can be one of three types. + * + * ARG_TYPE_REG + * Load a register with the given value. + * + * ARG_TYPE_PTR + * Load a register with a pointer into the stack buffer (SP + given value). + * + * ARG_TYPE_MEM + * Store the given value into the stack buffer at [SP+index]. + * + */ + +#define ARG_TYPE_END 0 +#define ARG_TYPE_REG 1 +#define ARG_TYPE_PTR 2 +#define ARG_TYPE_MEM 3 + +#define ARG_FLAG_UNSUPPORTED 0x01 +#define ARG_FLAG_SUPPORTED 0x02 +#define ARG_FLAG_THUMB 0x10 /* Must be 16 so TEST_ISA can be used */ +#define ARG_FLAG_ARM 0x20 /* Must be 32 so TEST_ISA can be used */ + +struct test_arg { + u8 type; /* ARG_TYPE_x */ + u8 _padding[7]; +}; + +struct test_arg_regptr { + u8 type; /* ARG_TYPE_REG or ARG_TYPE_PTR */ + u8 reg; + u8 _padding[2]; + u32 val; +}; + +struct test_arg_mem { + u8 type; /* ARG_TYPE_MEM */ + u8 index; + u8 _padding[2]; + u32 val; +}; + +struct test_arg_end { + u8 type; /* ARG_TYPE_END */ + u8 flags; /* ARG_FLAG_x */ + u16 code_offset; + u16 branch_offset; + u16 end_offset; +}; + + +/* + * Building blocks for test cases. + * + * Each test case is wrapped between TESTCASE_START and TESTCASE_END. + * + * To specify arguments for a test case the TEST_ARG_{REG,PTR,MEM} macros are + * used followed by a terminating TEST_ARG_END. + * + * After this, the instruction to be tested is defined with TEST_INSTRUCTION. + * Or for branches, TEST_BRANCH_B and TEST_BRANCH_F (branch forwards/backwards). + * + * Some specific test cases may make use of other custom constructs. + */ + +#if VERBOSE +#define verbose(fmt, ...) pr_info(fmt, ##__VA_ARGS__) +#else +#define verbose(fmt, ...) +#endif + +#define TEST_GROUP(title) \ + verbose("\n"); \ + verbose(title"\n"); \ + verbose("---------------------------------------------------------\n"); + +#define TESTCASE_START(title) \ + __asm__ __volatile__ ( \ + "bl __kprobes_test_case_start \n\t" \ + /* don't use .asciz here as 'title' may be */ \ + /* multiple strings to be concatenated. */ \ + ".ascii "#title" \n\t" \ + ".byte 0 \n\t" \ + ".align 2, 0 \n\t" + +#define TEST_ARG_REG(reg, val) \ + ".byte "__stringify(ARG_TYPE_REG)" \n\t" \ + ".byte "#reg" \n\t" \ + ".short 0 \n\t" \ + ".word "#val" \n\t" + +#define TEST_ARG_PTR(reg, val) \ + ".byte "__stringify(ARG_TYPE_PTR)" \n\t" \ + ".byte "#reg" \n\t" \ + ".short 0 \n\t" \ + ".word "#val" \n\t" + +#define TEST_ARG_MEM(index, val) \ + ".byte "__stringify(ARG_TYPE_MEM)" \n\t" \ + ".byte "#index" \n\t" \ + ".short 0 \n\t" \ + ".word "#val" \n\t" + +#define TEST_ARG_END(flags) \ + ".byte "__stringify(ARG_TYPE_END)" \n\t" \ + ".byte "TEST_ISA flags" \n\t" \ + ".short 50f-0f \n\t" \ + ".short 2f-0f \n\t" \ + ".short 99f-0f \n\t" \ + ".code "TEST_ISA" \n\t" \ + "0: \n\t" + +#define TEST_INSTRUCTION(instruction) \ + "50: nop \n\t" \ + "1: "instruction" \n\t" \ + " nop \n\t" + +#define TEST_BRANCH_F(instruction) \ + TEST_INSTRUCTION(instruction) \ + " b 99f \n\t" \ + "2: nop \n\t" + +#define TEST_BRANCH_B(instruction) \ + " b 50f \n\t" \ + " b 99f \n\t" \ + "2: nop \n\t" \ + " b 99f \n\t" \ + TEST_INSTRUCTION(instruction) + +#define TEST_BRANCH_FX(instruction, codex) \ + TEST_INSTRUCTION(instruction) \ + " b 99f \n\t" \ + codex" \n\t" \ + " b 99f \n\t" \ + "2: nop \n\t" + +#define TEST_BRANCH_BX(instruction, codex) \ + " b 50f \n\t" \ + " b 99f \n\t" \ + "2: nop \n\t" \ + " b 99f \n\t" \ + codex" \n\t" \ + TEST_INSTRUCTION(instruction) + +#define TESTCASE_END \ + "2: \n\t" \ + "99: \n\t" \ + " bl __kprobes_test_case_end_"TEST_ISA" \n\t" \ + ".code "NORMAL_ISA" \n\t" \ + : : \ + : "r0", "r1", "r2", "r3", "ip", "lr", "memory", "cc" \ + ); + + +/* + * Macros to define test cases. + * + * Those of the form TEST_{R,P,M}* can be used to define test cases + * which take combinations of the three basic types of arguments. E.g. + * + * TEST_R One register argument + * TEST_RR Two register arguments + * TEST_RPR A register, a pointer, then a register argument + * + * For testing instructions which may branch, there are macros TEST_BF_* + * and TEST_BB_* for branching forwards and backwards. + * + * TEST_SUPPORTED and TEST_UNSUPPORTED don't cause the code to be executed, + * the just verify that a kprobe is or is not allowed on the given instruction. + */ + +#define TEST(code) \ + TESTCASE_START(code) \ + TEST_ARG_END("") \ + TEST_INSTRUCTION(code) \ + TESTCASE_END + +#define TEST_UNSUPPORTED(code) \ + TESTCASE_START(code) \ + TEST_ARG_END("|"__stringify(ARG_FLAG_UNSUPPORTED)) \ + TEST_INSTRUCTION(code) \ + TESTCASE_END + +#define TEST_SUPPORTED(code) \ + TESTCASE_START(code) \ + TEST_ARG_END("|"__stringify(ARG_FLAG_SUPPORTED)) \ + TEST_INSTRUCTION(code) \ + TESTCASE_END + +#define TEST_R(code1, reg, val, code2) \ + TESTCASE_START(code1 #reg code2) \ + TEST_ARG_REG(reg, val) \ + TEST_ARG_END("") \ + TEST_INSTRUCTION(code1 #reg code2) \ + TESTCASE_END + +#define TEST_RR(code1, reg1, val1, code2, reg2, val2, code3) \ + TESTCASE_START(code1 #reg1 code2 #reg2 code3) \ + TEST_ARG_REG(reg1, val1) \ + TEST_ARG_REG(reg2, val2) \ + TEST_ARG_END("") \ + TEST_INSTRUCTION(code1 #reg1 code2 #reg2 code3) \ + TESTCASE_END + +#define TEST_RRR(code1, reg1, val1, code2, reg2, val2, code3, reg3, val3, code4)\ + TESTCASE_START(code1 #reg1 code2 #reg2 code3 #reg3 code4) \ + TEST_ARG_REG(reg1, val1) \ + TEST_ARG_REG(reg2, val2) \ + TEST_ARG_REG(reg3, val3) \ + TEST_ARG_END("") \ + TEST_INSTRUCTION(code1 #reg1 code2 #reg2 code3 #reg3 code4) \ + TESTCASE_END + +#define TEST_RRRR(code1, reg1, val1, code2, reg2, val2, code3, reg3, val3, code4, reg4, val4) \ + TESTCASE_START(code1 #reg1 code2 #reg2 code3 #reg3 code4 #reg4) \ + TEST_ARG_REG(reg1, val1) \ + TEST_ARG_REG(reg2, val2) \ + TEST_ARG_REG(reg3, val3) \ + TEST_ARG_REG(reg4, val4) \ + TEST_ARG_END("") \ + TEST_INSTRUCTION(code1 #reg1 code2 #reg2 code3 #reg3 code4 #reg4) \ + TESTCASE_END + +#define TEST_P(code1, reg1, val1, code2) \ + TESTCASE_START(code1 #reg1 code2) \ + TEST_ARG_PTR(reg1, val1) \ + TEST_ARG_END("") \ + TEST_INSTRUCTION(code1 #reg1 code2) \ + TESTCASE_END + +#define TEST_PR(code1, reg1, val1, code2, reg2, val2, code3) \ + TESTCASE_START(code1 #reg1 code2 #reg2 code3) \ + TEST_ARG_PTR(reg1, val1) \ + TEST_ARG_REG(reg2, val2) \ + TEST_ARG_END("") \ + TEST_INSTRUCTION(code1 #reg1 code2 #reg2 code3) \ + TESTCASE_END + +#define TEST_RP(code1, reg1, val1, code2, reg2, val2, code3) \ + TESTCASE_START(code1 #reg1 code2 #reg2 code3) \ + TEST_ARG_REG(reg1, val1) \ + TEST_ARG_PTR(reg2, val2) \ + TEST_ARG_END("") \ + TEST_INSTRUCTION(code1 #reg1 code2 #reg2 code3) \ + TESTCASE_END + +#define TEST_PRR(code1, reg1, val1, code2, reg2, val2, code3, reg3, val3, code4)\ + TESTCASE_START(code1 #reg1 code2 #reg2 code3 #reg3 code4) \ + TEST_ARG_PTR(reg1, val1) \ + TEST_ARG_REG(reg2, val2) \ + TEST_ARG_REG(reg3, val3) \ + TEST_ARG_END("") \ + TEST_INSTRUCTION(code1 #reg1 code2 #reg2 code3 #reg3 code4) \ + TESTCASE_END + +#define TEST_RPR(code1, reg1, val1, code2, reg2, val2, code3, reg3, val3, code4)\ + TESTCASE_START(code1 #reg1 code2 #reg2 code3 #reg3 code4) \ + TEST_ARG_REG(reg1, val1) \ + TEST_ARG_PTR(reg2, val2) \ + TEST_ARG_REG(reg3, val3) \ + TEST_ARG_END("") \ + TEST_INSTRUCTION(code1 #reg1 code2 #reg2 code3 #reg3 code4) \ + TESTCASE_END + +#define TEST_RRP(code1, reg1, val1, code2, reg2, val2, code3, reg3, val3, code4)\ + TESTCASE_START(code1 #reg1 code2 #reg2 code3 #reg3 code4) \ + TEST_ARG_REG(reg1, val1) \ + TEST_ARG_REG(reg2, val2) \ + TEST_ARG_PTR(reg3, val3) \ + TEST_ARG_END("") \ + TEST_INSTRUCTION(code1 #reg1 code2 #reg2 code3 #reg3 code4) \ + TESTCASE_END + +#define TEST_BF_P(code1, reg1, val1, code2) \ + TESTCASE_START(code1 #reg1 code2) \ + TEST_ARG_PTR(reg1, val1) \ + TEST_ARG_END("") \ + TEST_BRANCH_F(code1 #reg1 code2) \ + TESTCASE_END + +#define TEST_BF(code) \ + TESTCASE_START(code) \ + TEST_ARG_END("") \ + TEST_BRANCH_F(code) \ + TESTCASE_END + +#define TEST_BB(code) \ + TESTCASE_START(code) \ + TEST_ARG_END("") \ + TEST_BRANCH_B(code) \ + TESTCASE_END + +#define TEST_BF_R(code1, reg, val, code2) \ + TESTCASE_START(code1 #reg code2) \ + TEST_ARG_REG(reg, val) \ + TEST_ARG_END("") \ + TEST_BRANCH_F(code1 #reg code2) \ + TESTCASE_END + +#define TEST_BB_R(code1, reg, val, code2) \ + TESTCASE_START(code1 #reg code2) \ + TEST_ARG_REG(reg, val) \ + TEST_ARG_END("") \ + TEST_BRANCH_B(code1 #reg code2) \ + TESTCASE_END + +#define TEST_BF_RR(code1, reg1, val1, code2, reg2, val2, code3) \ + TESTCASE_START(code1 #reg1 code2 #reg2 code3) \ + TEST_ARG_REG(reg1, val1) \ + TEST_ARG_REG(reg2, val2) \ + TEST_ARG_END("") \ + TEST_BRANCH_F(code1 #reg1 code2 #reg2 code3) \ + TESTCASE_END + +#define TEST_BF_X(code, codex) \ + TESTCASE_START(code) \ + TEST_ARG_END("") \ + TEST_BRANCH_FX(code, codex) \ + TESTCASE_END + +#define TEST_BB_X(code, codex) \ + TESTCASE_START(code) \ + TEST_ARG_END("") \ + TEST_BRANCH_BX(code, codex) \ + TESTCASE_END + +#define TEST_BF_RX(code1, reg, val, code2, codex) \ + TESTCASE_START(code1 #reg code2) \ + TEST_ARG_REG(reg, val) \ + TEST_ARG_END("") \ + TEST_BRANCH_FX(code1 #reg code2, codex) \ + TESTCASE_END + +#define TEST_X(code, codex) \ + TESTCASE_START(code) \ + TEST_ARG_END("") \ + TEST_INSTRUCTION(code) \ + " b 99f \n\t" \ + " "codex" \n\t" \ + TESTCASE_END + +#define TEST_RX(code1, reg, val, code2, codex) \ + TESTCASE_START(code1 #reg code2) \ + TEST_ARG_REG(reg, val) \ + TEST_ARG_END("") \ + TEST_INSTRUCTION(code1 __stringify(reg) code2) \ + " b 99f \n\t" \ + " "codex" \n\t" \ + TESTCASE_END + +#define TEST_RRX(code1, reg1, val1, code2, reg2, val2, code3, codex) \ + TESTCASE_START(code1 #reg1 code2 #reg2 code3) \ + TEST_ARG_REG(reg1, val1) \ + TEST_ARG_REG(reg2, val2) \ + TEST_ARG_END("") \ + TEST_INSTRUCTION(code1 __stringify(reg1) code2 __stringify(reg2) code3) \ + " b 99f \n\t" \ + " "codex" \n\t" \ + TESTCASE_END + + +/* + * Macros for defining space directives spread over multiple lines. + * These are required so the compiler guesses better the length of inline asm + * code and will spill the literal pool early enough to avoid generating PC + * relative loads with out of range offsets. + */ +#define TWICE(x) x x +#define SPACE_0x8 TWICE(".space 4\n\t") +#define SPACE_0x10 TWICE(SPACE_0x8) +#define SPACE_0x20 TWICE(SPACE_0x10) +#define SPACE_0x40 TWICE(SPACE_0x20) +#define SPACE_0x80 TWICE(SPACE_0x40) +#define SPACE_0x100 TWICE(SPACE_0x80) +#define SPACE_0x200 TWICE(SPACE_0x100) +#define SPACE_0x400 TWICE(SPACE_0x200) +#define SPACE_0x800 TWICE(SPACE_0x400) +#define SPACE_0x1000 TWICE(SPACE_0x800) + + +/* Various values used in test cases... */ +#define N(val) (val ^ 0xffffffff) +#define VAL1 0x12345678 +#define VAL2 N(VAL1) +#define VAL3 0xa5f801 +#define VAL4 N(VAL3) +#define VALM 0x456789ab +#define VALR 0xdeaddead +#define HH1 0x0123fecb +#define HH2 0xa9874567 + + +#ifdef CONFIG_THUMB2_KERNEL +void kprobe_thumb16_test_cases(void); +void kprobe_thumb32_test_cases(void); +#else +void kprobe_arm_test_cases(void); +#endif diff --git a/arch/arm/kernel/kprobes-thumb.c b/arch/arm/kernel/kprobes-thumb.c new file mode 100644 index 00000000000..9495d7f3516 --- /dev/null +++ b/arch/arm/kernel/kprobes-thumb.c @@ -0,0 +1,666 @@ +/* + * arch/arm/kernel/kprobes-thumb.c + * + * Copyright (C) 2011 Jon Medhurst <tixy@yxit.co.uk>. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/ptrace.h> +#include <linux/kprobes.h> + +#include "kprobes.h" +#include "probes-thumb.h" + +/* These emulation encodings are functionally equivalent... */ +#define t32_emulate_rd8rn16rm0ra12_noflags \ + t32_emulate_rdlo12rdhi8rn16rm0_noflags + +/* t32 thumb actions */ + +static void __kprobes +t32_simulate_table_branch(probes_opcode_t insn, + struct arch_probes_insn *asi, struct pt_regs *regs) +{ + unsigned long pc = regs->ARM_pc; + int rn = (insn >> 16) & 0xf; + int rm = insn & 0xf; + + unsigned long rnv = (rn == 15) ? pc : regs->uregs[rn]; + unsigned long rmv = regs->uregs[rm]; + unsigned int halfwords; + + if (insn & 0x10) /* TBH */ + halfwords = ((u16 *)rnv)[rmv]; + else /* TBB */ + halfwords = ((u8 *)rnv)[rmv]; + + regs->ARM_pc = pc + 2 * halfwords; +} + +static void __kprobes +t32_simulate_mrs(probes_opcode_t insn, + struct arch_probes_insn *asi, struct pt_regs *regs) +{ + int rd = (insn >> 8) & 0xf; + unsigned long mask = 0xf8ff03df; /* Mask out execution state */ + regs->uregs[rd] = regs->ARM_cpsr & mask; +} + +static void __kprobes +t32_simulate_cond_branch(probes_opcode_t insn, + struct arch_probes_insn *asi, struct pt_regs *regs) +{ + unsigned long pc = regs->ARM_pc; + + long offset = insn & 0x7ff; /* imm11 */ + offset += (insn & 0x003f0000) >> 5; /* imm6 */ + offset += (insn & 0x00002000) << 4; /* J1 */ + offset += (insn & 0x00000800) << 7; /* J2 */ + offset -= (insn & 0x04000000) >> 7; /* Apply sign bit */ + + regs->ARM_pc = pc + (offset * 2); +} + +static enum probes_insn __kprobes +t32_decode_cond_branch(probes_opcode_t insn, struct arch_probes_insn *asi, + const struct decode_header *d) +{ + int cc = (insn >> 22) & 0xf; + asi->insn_check_cc = probes_condition_checks[cc]; + asi->insn_handler = t32_simulate_cond_branch; + return INSN_GOOD_NO_SLOT; +} + +static void __kprobes +t32_simulate_branch(probes_opcode_t insn, + struct arch_probes_insn *asi, struct pt_regs *regs) +{ + unsigned long pc = regs->ARM_pc; + + long offset = insn & 0x7ff; /* imm11 */ + offset += (insn & 0x03ff0000) >> 5; /* imm10 */ + offset += (insn & 0x00002000) << 9; /* J1 */ + offset += (insn & 0x00000800) << 10; /* J2 */ + if (insn & 0x04000000) + offset -= 0x00800000; /* Apply sign bit */ + else + offset ^= 0x00600000; /* Invert J1 and J2 */ + + if (insn & (1 << 14)) { + /* BL or BLX */ + regs->ARM_lr = regs->ARM_pc | 1; + if (!(insn & (1 << 12))) { + /* BLX so switch to ARM mode */ + regs->ARM_cpsr &= ~PSR_T_BIT; + pc &= ~3; + } + } + + regs->ARM_pc = pc + (offset * 2); +} + +static void __kprobes +t32_simulate_ldr_literal(probes_opcode_t insn, + struct arch_probes_insn *asi, struct pt_regs *regs) +{ + unsigned long addr = regs->ARM_pc & ~3; + int rt = (insn >> 12) & 0xf; + unsigned long rtv; + + long offset = insn & 0xfff; + if (insn & 0x00800000) + addr += offset; + else + addr -= offset; + + if (insn & 0x00400000) { + /* LDR */ + rtv = *(unsigned long *)addr; + if (rt == 15) { + bx_write_pc(rtv, regs); + return; + } + } else if (insn & 0x00200000) { + /* LDRH */ + if (insn & 0x01000000) + rtv = *(s16 *)addr; + else + rtv = *(u16 *)addr; + } else { + /* LDRB */ + if (insn & 0x01000000) + rtv = *(s8 *)addr; + else + rtv = *(u8 *)addr; + } + + regs->uregs[rt] = rtv; +} + +static enum probes_insn __kprobes +t32_decode_ldmstm(probes_opcode_t insn, struct arch_probes_insn *asi, + const struct decode_header *d) +{ + enum probes_insn ret = kprobe_decode_ldmstm(insn, asi, d); + + /* Fixup modified instruction to have halfwords in correct order...*/ + insn = __mem_to_opcode_arm(asi->insn[0]); + ((u16 *)asi->insn)[0] = __opcode_to_mem_thumb16(insn >> 16); + ((u16 *)asi->insn)[1] = __opcode_to_mem_thumb16(insn & 0xffff); + + return ret; +} + +static void __kprobes +t32_emulate_ldrdstrd(probes_opcode_t insn, + struct arch_probes_insn *asi, struct pt_regs *regs) +{ + unsigned long pc = regs->ARM_pc & ~3; + int rt1 = (insn >> 12) & 0xf; + int rt2 = (insn >> 8) & 0xf; + int rn = (insn >> 16) & 0xf; + + register unsigned long rt1v asm("r0") = regs->uregs[rt1]; + register unsigned long rt2v asm("r1") = regs->uregs[rt2]; + register unsigned long rnv asm("r2") = (rn == 15) ? pc + : regs->uregs[rn]; + + __asm__ __volatile__ ( + "blx %[fn]" + : "=r" (rt1v), "=r" (rt2v), "=r" (rnv) + : "0" (rt1v), "1" (rt2v), "2" (rnv), [fn] "r" (asi->insn_fn) + : "lr", "memory", "cc" + ); + + if (rn != 15) + regs->uregs[rn] = rnv; /* Writeback base register */ + regs->uregs[rt1] = rt1v; + regs->uregs[rt2] = rt2v; +} + +static void __kprobes +t32_emulate_ldrstr(probes_opcode_t insn, + struct arch_probes_insn *asi, struct pt_regs *regs) +{ + int rt = (insn >> 12) & 0xf; + int rn = (insn >> 16) & 0xf; + int rm = insn & 0xf; + + register unsigned long rtv asm("r0") = regs->uregs[rt]; + register unsigned long rnv asm("r2") = regs->uregs[rn]; + register unsigned long rmv asm("r3") = regs->uregs[rm]; + + __asm__ __volatile__ ( + "blx %[fn]" + : "=r" (rtv), "=r" (rnv) + : "0" (rtv), "1" (rnv), "r" (rmv), [fn] "r" (asi->insn_fn) + : "lr", "memory", "cc" + ); + + regs->uregs[rn] = rnv; /* Writeback base register */ + if (rt == 15) /* Can't be true for a STR as they aren't allowed */ + bx_write_pc(rtv, regs); + else + regs->uregs[rt] = rtv; +} + +static void __kprobes +t32_emulate_rd8rn16rm0_rwflags(probes_opcode_t insn, + struct arch_probes_insn *asi, struct pt_regs *regs) +{ + int rd = (insn >> 8) & 0xf; + int rn = (insn >> 16) & 0xf; + int rm = insn & 0xf; + + register unsigned long rdv asm("r1") = regs->uregs[rd]; + register unsigned long rnv asm("r2") = regs->uregs[rn]; + register unsigned long rmv asm("r3") = regs->uregs[rm]; + unsigned long cpsr = regs->ARM_cpsr; + + __asm__ __volatile__ ( + "msr cpsr_fs, %[cpsr] \n\t" + "blx %[fn] \n\t" + "mrs %[cpsr], cpsr \n\t" + : "=r" (rdv), [cpsr] "=r" (cpsr) + : "0" (rdv), "r" (rnv), "r" (rmv), + "1" (cpsr), [fn] "r" (asi->insn_fn) + : "lr", "memory", "cc" + ); + + regs->uregs[rd] = rdv; + regs->ARM_cpsr = (regs->ARM_cpsr & ~APSR_MASK) | (cpsr & APSR_MASK); +} + +static void __kprobes +t32_emulate_rd8pc16_noflags(probes_opcode_t insn, + struct arch_probes_insn *asi, struct pt_regs *regs) +{ + unsigned long pc = regs->ARM_pc; + int rd = (insn >> 8) & 0xf; + + register unsigned long rdv asm("r1") = regs->uregs[rd]; + register unsigned long rnv asm("r2") = pc & ~3; + + __asm__ __volatile__ ( + "blx %[fn]" + : "=r" (rdv) + : "0" (rdv), "r" (rnv), [fn] "r" (asi->insn_fn) + : "lr", "memory", "cc" + ); + + regs->uregs[rd] = rdv; +} + +static void __kprobes +t32_emulate_rd8rn16_noflags(probes_opcode_t insn, + struct arch_probes_insn *asi, struct pt_regs *regs) +{ + int rd = (insn >> 8) & 0xf; + int rn = (insn >> 16) & 0xf; + + register unsigned long rdv asm("r1") = regs->uregs[rd]; + register unsigned long rnv asm("r2") = regs->uregs[rn]; + + __asm__ __volatile__ ( + "blx %[fn]" + : "=r" (rdv) + : "0" (rdv), "r" (rnv), [fn] "r" (asi->insn_fn) + : "lr", "memory", "cc" + ); + + regs->uregs[rd] = rdv; +} + +static void __kprobes +t32_emulate_rdlo12rdhi8rn16rm0_noflags(probes_opcode_t insn, + struct arch_probes_insn *asi, + struct pt_regs *regs) +{ + int rdlo = (insn >> 12) & 0xf; + int rdhi = (insn >> 8) & 0xf; + int rn = (insn >> 16) & 0xf; + int rm = insn & 0xf; + + register unsigned long rdlov asm("r0") = regs->uregs[rdlo]; + register unsigned long rdhiv asm("r1") = regs->uregs[rdhi]; + register unsigned long rnv asm("r2") = regs->uregs[rn]; + register unsigned long rmv asm("r3") = regs->uregs[rm]; + + __asm__ __volatile__ ( + "blx %[fn]" + : "=r" (rdlov), "=r" (rdhiv) + : "0" (rdlov), "1" (rdhiv), "r" (rnv), "r" (rmv), + [fn] "r" (asi->insn_fn) + : "lr", "memory", "cc" + ); + + regs->uregs[rdlo] = rdlov; + regs->uregs[rdhi] = rdhiv; +} +/* t16 thumb actions */ + +static void __kprobes +t16_simulate_bxblx(probes_opcode_t insn, + struct arch_probes_insn *asi, struct pt_regs *regs) +{ + unsigned long pc = regs->ARM_pc + 2; + int rm = (insn >> 3) & 0xf; + unsigned long rmv = (rm == 15) ? pc : regs->uregs[rm]; + + if (insn & (1 << 7)) /* BLX ? */ + regs->ARM_lr = regs->ARM_pc | 1; + + bx_write_pc(rmv, regs); +} + +static void __kprobes +t16_simulate_ldr_literal(probes_opcode_t insn, + struct arch_probes_insn *asi, struct pt_regs *regs) +{ + unsigned long *base = (unsigned long *)((regs->ARM_pc + 2) & ~3); + long index = insn & 0xff; + int rt = (insn >> 8) & 0x7; + regs->uregs[rt] = base[index]; +} + +static void __kprobes +t16_simulate_ldrstr_sp_relative(probes_opcode_t insn, + struct arch_probes_insn *asi, struct pt_regs *regs) +{ + unsigned long* base = (unsigned long *)regs->ARM_sp; + long index = insn & 0xff; + int rt = (insn >> 8) & 0x7; + if (insn & 0x800) /* LDR */ + regs->uregs[rt] = base[index]; + else /* STR */ + base[index] = regs->uregs[rt]; +} + +static void __kprobes +t16_simulate_reladr(probes_opcode_t insn, + struct arch_probes_insn *asi, struct pt_regs *regs) +{ + unsigned long base = (insn & 0x800) ? regs->ARM_sp + : ((regs->ARM_pc + 2) & ~3); + long offset = insn & 0xff; + int rt = (insn >> 8) & 0x7; + regs->uregs[rt] = base + offset * 4; +} + +static void __kprobes +t16_simulate_add_sp_imm(probes_opcode_t insn, + struct arch_probes_insn *asi, struct pt_regs *regs) +{ + long imm = insn & 0x7f; + if (insn & 0x80) /* SUB */ + regs->ARM_sp -= imm * 4; + else /* ADD */ + regs->ARM_sp += imm * 4; +} + +static void __kprobes +t16_simulate_cbz(probes_opcode_t insn, + struct arch_probes_insn *asi, struct pt_regs *regs) +{ + int rn = insn & 0x7; + probes_opcode_t nonzero = regs->uregs[rn] ? insn : ~insn; + if (nonzero & 0x800) { + long i = insn & 0x200; + long imm5 = insn & 0xf8; + unsigned long pc = regs->ARM_pc + 2; + regs->ARM_pc = pc + (i >> 3) + (imm5 >> 2); + } +} + +static void __kprobes +t16_simulate_it(probes_opcode_t insn, + struct arch_probes_insn *asi, struct pt_regs *regs) +{ + /* + * The 8 IT state bits are split into two parts in CPSR: + * ITSTATE<1:0> are in CPSR<26:25> + * ITSTATE<7:2> are in CPSR<15:10> + * The new IT state is in the lower byte of insn. + */ + unsigned long cpsr = regs->ARM_cpsr; + cpsr &= ~PSR_IT_MASK; + cpsr |= (insn & 0xfc) << 8; + cpsr |= (insn & 0x03) << 25; + regs->ARM_cpsr = cpsr; +} + +static void __kprobes +t16_singlestep_it(probes_opcode_t insn, + struct arch_probes_insn *asi, struct pt_regs *regs) +{ + regs->ARM_pc += 2; + t16_simulate_it(insn, asi, regs); +} + +static enum probes_insn __kprobes +t16_decode_it(probes_opcode_t insn, struct arch_probes_insn *asi, + const struct decode_header *d) +{ + asi->insn_singlestep = t16_singlestep_it; + return INSN_GOOD_NO_SLOT; +} + +static void __kprobes +t16_simulate_cond_branch(probes_opcode_t insn, + struct arch_probes_insn *asi, struct pt_regs *regs) +{ + unsigned long pc = regs->ARM_pc + 2; + long offset = insn & 0x7f; + offset -= insn & 0x80; /* Apply sign bit */ + regs->ARM_pc = pc + (offset * 2); +} + +static enum probes_insn __kprobes +t16_decode_cond_branch(probes_opcode_t insn, struct arch_probes_insn *asi, + const struct decode_header *d) +{ + int cc = (insn >> 8) & 0xf; + asi->insn_check_cc = probes_condition_checks[cc]; + asi->insn_handler = t16_simulate_cond_branch; + return INSN_GOOD_NO_SLOT; +} + +static void __kprobes +t16_simulate_branch(probes_opcode_t insn, + struct arch_probes_insn *asi, struct pt_regs *regs) +{ + unsigned long pc = regs->ARM_pc + 2; + long offset = insn & 0x3ff; + offset -= insn & 0x400; /* Apply sign bit */ + regs->ARM_pc = pc + (offset * 2); +} + +static unsigned long __kprobes +t16_emulate_loregs(probes_opcode_t insn, + struct arch_probes_insn *asi, struct pt_regs *regs) +{ + unsigned long oldcpsr = regs->ARM_cpsr; + unsigned long newcpsr; + + __asm__ __volatile__ ( + "msr cpsr_fs, %[oldcpsr] \n\t" + "ldmia %[regs], {r0-r7} \n\t" + "blx %[fn] \n\t" + "stmia %[regs], {r0-r7} \n\t" + "mrs %[newcpsr], cpsr \n\t" + : [newcpsr] "=r" (newcpsr) + : [oldcpsr] "r" (oldcpsr), [regs] "r" (regs), + [fn] "r" (asi->insn_fn) + : "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", + "lr", "memory", "cc" + ); + + return (oldcpsr & ~APSR_MASK) | (newcpsr & APSR_MASK); +} + +static void __kprobes +t16_emulate_loregs_rwflags(probes_opcode_t insn, + struct arch_probes_insn *asi, struct pt_regs *regs) +{ + regs->ARM_cpsr = t16_emulate_loregs(insn, asi, regs); +} + +static void __kprobes +t16_emulate_loregs_noitrwflags(probes_opcode_t insn, + struct arch_probes_insn *asi, struct pt_regs *regs) +{ + unsigned long cpsr = t16_emulate_loregs(insn, asi, regs); + if (!in_it_block(cpsr)) + regs->ARM_cpsr = cpsr; +} + +static void __kprobes +t16_emulate_hiregs(probes_opcode_t insn, + struct arch_probes_insn *asi, struct pt_regs *regs) +{ + unsigned long pc = regs->ARM_pc + 2; + int rdn = (insn & 0x7) | ((insn & 0x80) >> 4); + int rm = (insn >> 3) & 0xf; + + register unsigned long rdnv asm("r1"); + register unsigned long rmv asm("r0"); + unsigned long cpsr = regs->ARM_cpsr; + + rdnv = (rdn == 15) ? pc : regs->uregs[rdn]; + rmv = (rm == 15) ? pc : regs->uregs[rm]; + + __asm__ __volatile__ ( + "msr cpsr_fs, %[cpsr] \n\t" + "blx %[fn] \n\t" + "mrs %[cpsr], cpsr \n\t" + : "=r" (rdnv), [cpsr] "=r" (cpsr) + : "0" (rdnv), "r" (rmv), "1" (cpsr), [fn] "r" (asi->insn_fn) + : "lr", "memory", "cc" + ); + + if (rdn == 15) + rdnv &= ~1; + + regs->uregs[rdn] = rdnv; + regs->ARM_cpsr = (regs->ARM_cpsr & ~APSR_MASK) | (cpsr & APSR_MASK); +} + +static enum probes_insn __kprobes +t16_decode_hiregs(probes_opcode_t insn, struct arch_probes_insn *asi, + const struct decode_header *d) +{ + insn &= ~0x00ff; + insn |= 0x001; /* Set Rdn = R1 and Rm = R0 */ + ((u16 *)asi->insn)[0] = __opcode_to_mem_thumb16(insn); + asi->insn_handler = t16_emulate_hiregs; + return INSN_GOOD; +} + +static void __kprobes +t16_emulate_push(probes_opcode_t insn, + struct arch_probes_insn *asi, struct pt_regs *regs) +{ + __asm__ __volatile__ ( + "ldr r9, [%[regs], #13*4] \n\t" + "ldr r8, [%[regs], #14*4] \n\t" + "ldmia %[regs], {r0-r7} \n\t" + "blx %[fn] \n\t" + "str r9, [%[regs], #13*4] \n\t" + : + : [regs] "r" (regs), [fn] "r" (asi->insn_fn) + : "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", + "lr", "memory", "cc" + ); +} + +static enum probes_insn __kprobes +t16_decode_push(probes_opcode_t insn, struct arch_probes_insn *asi, + const struct decode_header *d) +{ + /* + * To simulate a PUSH we use a Thumb-2 "STMDB R9!, {registers}" + * and call it with R9=SP and LR in the register list represented + * by R8. + */ + /* 1st half STMDB R9!,{} */ + ((u16 *)asi->insn)[0] = __opcode_to_mem_thumb16(0xe929); + /* 2nd half (register list) */ + ((u16 *)asi->insn)[1] = __opcode_to_mem_thumb16(insn & 0x1ff); + asi->insn_handler = t16_emulate_push; + return INSN_GOOD; +} + +static void __kprobes +t16_emulate_pop_nopc(probes_opcode_t insn, + struct arch_probes_insn *asi, struct pt_regs *regs) +{ + __asm__ __volatile__ ( + "ldr r9, [%[regs], #13*4] \n\t" + "ldmia %[regs], {r0-r7} \n\t" + "blx %[fn] \n\t" + "stmia %[regs], {r0-r7} \n\t" + "str r9, [%[regs], #13*4] \n\t" + : + : [regs] "r" (regs), [fn] "r" (asi->insn_fn) + : "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r9", + "lr", "memory", "cc" + ); +} + +static void __kprobes +t16_emulate_pop_pc(probes_opcode_t insn, + struct arch_probes_insn *asi, struct pt_regs *regs) +{ + register unsigned long pc asm("r8"); + + __asm__ __volatile__ ( + "ldr r9, [%[regs], #13*4] \n\t" + "ldmia %[regs], {r0-r7} \n\t" + "blx %[fn] \n\t" + "stmia %[regs], {r0-r7} \n\t" + "str r9, [%[regs], #13*4] \n\t" + : "=r" (pc) + : [regs] "r" (regs), [fn] "r" (asi->insn_fn) + : "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r9", + "lr", "memory", "cc" + ); + + bx_write_pc(pc, regs); +} + +static enum probes_insn __kprobes +t16_decode_pop(probes_opcode_t insn, struct arch_probes_insn *asi, + const struct decode_header *d) +{ + /* + * To simulate a POP we use a Thumb-2 "LDMDB R9!, {registers}" + * and call it with R9=SP and PC in the register list represented + * by R8. + */ + /* 1st half LDMIA R9!,{} */ + ((u16 *)asi->insn)[0] = __opcode_to_mem_thumb16(0xe8b9); + /* 2nd half (register list) */ + ((u16 *)asi->insn)[1] = __opcode_to_mem_thumb16(insn & 0x1ff); + asi->insn_handler = insn & 0x100 ? t16_emulate_pop_pc + : t16_emulate_pop_nopc; + return INSN_GOOD; +} + +const union decode_action kprobes_t16_actions[NUM_PROBES_T16_ACTIONS] = { + [PROBES_T16_ADD_SP] = {.handler = t16_simulate_add_sp_imm}, + [PROBES_T16_CBZ] = {.handler = t16_simulate_cbz}, + [PROBES_T16_SIGN_EXTEND] = {.handler = t16_emulate_loregs_rwflags}, + [PROBES_T16_PUSH] = {.decoder = t16_decode_push}, + [PROBES_T16_POP] = {.decoder = t16_decode_pop}, + [PROBES_T16_SEV] = {.handler = probes_emulate_none}, + [PROBES_T16_WFE] = {.handler = probes_simulate_nop}, + [PROBES_T16_IT] = {.decoder = t16_decode_it}, + [PROBES_T16_CMP] = {.handler = t16_emulate_loregs_rwflags}, + [PROBES_T16_ADDSUB] = {.handler = t16_emulate_loregs_noitrwflags}, + [PROBES_T16_LOGICAL] = {.handler = t16_emulate_loregs_noitrwflags}, + [PROBES_T16_LDR_LIT] = {.handler = t16_simulate_ldr_literal}, + [PROBES_T16_BLX] = {.handler = t16_simulate_bxblx}, + [PROBES_T16_HIREGOPS] = {.decoder = t16_decode_hiregs}, + [PROBES_T16_LDRHSTRH] = {.handler = t16_emulate_loregs_rwflags}, + [PROBES_T16_LDRSTR] = {.handler = t16_simulate_ldrstr_sp_relative}, + [PROBES_T16_ADR] = {.handler = t16_simulate_reladr}, + [PROBES_T16_LDMSTM] = {.handler = t16_emulate_loregs_rwflags}, + [PROBES_T16_BRANCH_COND] = {.decoder = t16_decode_cond_branch}, + [PROBES_T16_BRANCH] = {.handler = t16_simulate_branch}, +}; + +const union decode_action kprobes_t32_actions[NUM_PROBES_T32_ACTIONS] = { + [PROBES_T32_LDMSTM] = {.decoder = t32_decode_ldmstm}, + [PROBES_T32_LDRDSTRD] = {.handler = t32_emulate_ldrdstrd}, + [PROBES_T32_TABLE_BRANCH] = {.handler = t32_simulate_table_branch}, + [PROBES_T32_TST] = {.handler = t32_emulate_rd8rn16rm0_rwflags}, + [PROBES_T32_MOV] = {.handler = t32_emulate_rd8rn16rm0_rwflags}, + [PROBES_T32_ADDSUB] = {.handler = t32_emulate_rd8rn16rm0_rwflags}, + [PROBES_T32_LOGICAL] = {.handler = t32_emulate_rd8rn16rm0_rwflags}, + [PROBES_T32_CMP] = {.handler = t32_emulate_rd8rn16rm0_rwflags}, + [PROBES_T32_ADDWSUBW_PC] = {.handler = t32_emulate_rd8pc16_noflags,}, + [PROBES_T32_ADDWSUBW] = {.handler = t32_emulate_rd8rn16_noflags}, + [PROBES_T32_MOVW] = {.handler = t32_emulate_rd8rn16_noflags}, + [PROBES_T32_SAT] = {.handler = t32_emulate_rd8rn16rm0_rwflags}, + [PROBES_T32_BITFIELD] = {.handler = t32_emulate_rd8rn16_noflags}, + [PROBES_T32_SEV] = {.handler = probes_emulate_none}, + [PROBES_T32_WFE] = {.handler = probes_simulate_nop}, + [PROBES_T32_MRS] = {.handler = t32_simulate_mrs}, + [PROBES_T32_BRANCH_COND] = {.decoder = t32_decode_cond_branch}, + [PROBES_T32_BRANCH] = {.handler = t32_simulate_branch}, + [PROBES_T32_PLDI] = {.handler = probes_simulate_nop}, + [PROBES_T32_LDR_LIT] = {.handler = t32_simulate_ldr_literal}, + [PROBES_T32_LDRSTR] = {.handler = t32_emulate_ldrstr}, + [PROBES_T32_SIGN_EXTEND] = {.handler = t32_emulate_rd8rn16rm0_rwflags}, + [PROBES_T32_MEDIA] = {.handler = t32_emulate_rd8rn16rm0_rwflags}, + [PROBES_T32_REVERSE] = {.handler = t32_emulate_rd8rn16_noflags}, + [PROBES_T32_MUL_ADD] = {.handler = t32_emulate_rd8rn16rm0_rwflags}, + [PROBES_T32_MUL_ADD2] = {.handler = t32_emulate_rd8rn16rm0ra12_noflags}, + [PROBES_T32_MUL_ADD_LONG] = { + .handler = t32_emulate_rdlo12rdhi8rn16rm0_noflags}, +}; diff --git a/arch/arm/kernel/kprobes.c b/arch/arm/kernel/kprobes.c index 2ba7deb3072..6d644202c8d 100644 --- a/arch/arm/kernel/kprobes.c +++ b/arch/arm/kernel/kprobes.c @@ -26,16 +26,24 @@ #include <linux/stop_machine.h> #include <linux/stringify.h> #include <asm/traps.h> +#include <asm/opcodes.h> #include <asm/cacheflush.h> +#include <linux/percpu.h> +#include <linux/bug.h> + +#include "kprobes.h" +#include "probes-arm.h" +#include "probes-thumb.h" +#include "patch.h" #define MIN_STACK_SIZE(addr) \ min((unsigned long)MAX_STACK_SIZE, \ (unsigned long)current_thread_info() + THREAD_START_SP - (addr)) -#define flush_insns(addr, cnt) \ +#define flush_insns(addr, size) \ flush_icache_range((unsigned long)(addr), \ (unsigned long)(addr) + \ - sizeof(kprobe_opcode_t) * (cnt)) + (size)) /* Used as a marker in ARM_pc to note when we're in a jprobe. */ #define JPROBE_MAGIC_ADDR 0xffffffff @@ -49,16 +57,40 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) kprobe_opcode_t insn; kprobe_opcode_t tmp_insn[MAX_INSN_SIZE]; unsigned long addr = (unsigned long)p->addr; + bool thumb; + kprobe_decode_insn_t *decode_insn; + const union decode_action *actions; int is; - if (addr & 0x3 || in_exception_text(addr)) + if (in_exception_text(addr)) + return -EINVAL; + +#ifdef CONFIG_THUMB2_KERNEL + thumb = true; + addr &= ~1; /* Bit 0 would normally be set to indicate Thumb code */ + insn = __mem_to_opcode_thumb16(((u16 *)addr)[0]); + if (is_wide_instruction(insn)) { + u16 inst2 = __mem_to_opcode_thumb16(((u16 *)addr)[1]); + insn = __opcode_thumb32_compose(insn, inst2); + decode_insn = thumb32_probes_decode_insn; + actions = kprobes_t32_actions; + } else { + decode_insn = thumb16_probes_decode_insn; + actions = kprobes_t16_actions; + } +#else /* !CONFIG_THUMB2_KERNEL */ + thumb = false; + if (addr & 0x3) return -EINVAL; + insn = __mem_to_opcode_arm(*p->addr); + decode_insn = arm_probes_decode_insn; + actions = kprobes_arm_actions; +#endif - insn = *p->addr; p->opcode = insn; p->ainsn.insn = tmp_insn; - switch (arm_kprobe_decode_insn(insn, &p->ainsn)) { + switch ((*decode_insn)(insn, &p->ainsn, true, actions)) { case INSN_REJECTED: /* not supported */ return -EINVAL; @@ -68,7 +100,10 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) return -ENOMEM; for (is = 0; is < MAX_INSN_SIZE; ++is) p->ainsn.insn[is] = tmp_insn[is]; - flush_insns(p->ainsn.insn, MAX_INSN_SIZE); + flush_insns(p->ainsn.insn, + sizeof(p->ainsn.insn[0]) * MAX_INSN_SIZE); + p->ainsn.insn_fn = (probes_insn_fn_t *) + ((uintptr_t)p->ainsn.insn | thumb); break; case INSN_GOOD_NO_SLOT: /* instruction doesn't need insn slot */ @@ -81,8 +116,30 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) void __kprobes arch_arm_kprobe(struct kprobe *p) { - *p->addr = KPROBE_BREAKPOINT_INSTRUCTION; - flush_insns(p->addr, 1); + unsigned int brkp; + void *addr; + + if (IS_ENABLED(CONFIG_THUMB2_KERNEL)) { + /* Remove any Thumb flag */ + addr = (void *)((uintptr_t)p->addr & ~1); + + if (is_wide_instruction(p->opcode)) + brkp = KPROBE_THUMB32_BREAKPOINT_INSTRUCTION; + else + brkp = KPROBE_THUMB16_BREAKPOINT_INSTRUCTION; + } else { + kprobe_opcode_t insn = p->opcode; + + addr = p->addr; + brkp = KPROBE_ARM_BREAKPOINT_INSTRUCTION; + + if (insn >= 0xe0000000) + brkp |= 0xe0000000; /* Unconditional instruction */ + else + brkp |= insn & 0xf0000000; /* Copy condition from insn */ + } + + patch_text(addr, brkp); } /* @@ -90,19 +147,22 @@ void __kprobes arch_arm_kprobe(struct kprobe *p) * stop_machine. This synchronization is necessary on SMP to avoid removing * a probe between the moment the 'Undefined Instruction' exception is raised * and the moment the exception handler reads the faulting instruction from - * memory. + * memory. It is also needed to atomically set the two half-words of a 32-bit + * Thumb breakpoint. */ int __kprobes __arch_disarm_kprobe(void *p) { struct kprobe *kp = p; - *kp->addr = kp->opcode; - flush_insns(kp->addr, 1); + void *addr = (void *)((uintptr_t)kp->addr & ~1); + + __patch_text(addr, kp->opcode); + return 0; } void __kprobes arch_disarm_kprobe(struct kprobe *p) { - stop_machine(__arch_disarm_kprobe, p, &cpu_online_map); + stop_machine(__arch_disarm_kprobe, p, cpu_online_mask); } void __kprobes arch_remove_kprobe(struct kprobe *p) @@ -121,20 +181,33 @@ static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb) static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb) { - __get_cpu_var(current_kprobe) = kcb->prev_kprobe.kp; + __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp); kcb->kprobe_status = kcb->prev_kprobe.status; } static void __kprobes set_current_kprobe(struct kprobe *p) { - __get_cpu_var(current_kprobe) = p; + __this_cpu_write(current_kprobe, p); } -static void __kprobes singlestep(struct kprobe *p, struct pt_regs *regs, - struct kprobe_ctlblk *kcb) +static void __kprobes +singlestep_skip(struct kprobe *p, struct pt_regs *regs) { +#ifdef CONFIG_THUMB2_KERNEL + regs->ARM_cpsr = it_advance(regs->ARM_cpsr); + if (is_wide_instruction(p->opcode)) + regs->ARM_pc += 4; + else + regs->ARM_pc += 2; +#else regs->ARM_pc += 4; - p->ainsn.insn_handler(p, regs); +#endif +} + +static inline void __kprobes +singlestep(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) +{ + p->ainsn.insn_singlestep(p->opcode, &p->ainsn, regs); } /* @@ -148,11 +221,23 @@ void __kprobes kprobe_handler(struct pt_regs *regs) { struct kprobe *p, *cur; struct kprobe_ctlblk *kcb; - kprobe_opcode_t *addr = (kprobe_opcode_t *)regs->ARM_pc; kcb = get_kprobe_ctlblk(); cur = kprobe_running(); - p = get_kprobe(addr); + +#ifdef CONFIG_THUMB2_KERNEL + /* + * First look for a probe which was registered using an address with + * bit 0 set, this is the usual situation for pointers to Thumb code. + * If not found, fallback to looking for one with bit 0 clear. + */ + p = get_kprobe((kprobe_opcode_t *)(regs->ARM_pc | 1)); + if (!p) + p = get_kprobe((kprobe_opcode_t *)regs->ARM_pc); + +#else /* ! CONFIG_THUMB2_KERNEL */ + p = get_kprobe((kprobe_opcode_t *)regs->ARM_pc); +#endif if (p) { if (cur) { @@ -172,7 +257,8 @@ void __kprobes kprobe_handler(struct pt_regs *regs) /* impossible cases */ BUG(); } - } else { + } else if (p->ainsn.insn_check_cc(regs->ARM_cpsr)) { + /* Probe hit and conditional execution check ok. */ set_current_kprobe(p); kcb->kprobe_status = KPROBE_HIT_ACTIVE; @@ -192,6 +278,13 @@ void __kprobes kprobe_handler(struct pt_regs *regs) } reset_current_kprobe(); } + } else { + /* + * Probe hit but conditional execution check failed, + * so just skip the instruction and continue as if + * nothing had happened. + */ + singlestep_skip(p, regs); } } else if (cur) { /* We probably hit a jprobe. Call its break handler. */ @@ -299,7 +392,11 @@ void __naked __kprobes kretprobe_trampoline(void) "bl trampoline_handler \n\t" "mov lr, r0 \n\t" "ldmia sp!, {r0 - r11} \n\t" +#ifdef CONFIG_THUMB2_KERNEL + "bx lr \n\t" +#else "mov pc, lr \n\t" +#endif : : : "memory"); } @@ -308,7 +405,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) { struct kretprobe_instance *ri = NULL; struct hlist_head *head, empty_rp; - struct hlist_node *node, *tmp; + struct hlist_node *tmp; unsigned long flags, orig_ret_address = 0; unsigned long trampoline_address = (unsigned long)&kretprobe_trampoline; @@ -328,16 +425,16 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) * real return address, and all the rest will point to * kretprobe_trampoline */ - hlist_for_each_entry_safe(ri, node, tmp, head, hlist) { + hlist_for_each_entry_safe(ri, tmp, head, hlist) { if (ri->task != current) /* another task is sharing our hash bucket */ continue; if (ri->rp && ri->rp->handler) { - __get_cpu_var(current_kprobe) = &ri->rp->kp; + __this_cpu_write(current_kprobe, &ri->rp->kp); get_kprobe_ctlblk()->kprobe_status = KPROBE_HIT_ACTIVE; ri->rp->handler(ri, regs); - __get_cpu_var(current_kprobe) = NULL; + __this_cpu_write(current_kprobe, NULL); } orig_ret_address = (unsigned long)ri->ret_addr; @@ -355,7 +452,7 @@ static __used __kprobes void *trampoline_handler(struct pt_regs *regs) kretprobe_assert(ri, orig_ret_address, trampoline_address); kretprobe_hash_unlock(current, &flags); - hlist_for_each_entry_safe(ri, node, tmp, &empty_rp, hlist) { + hlist_for_each_entry_safe(ri, tmp, &empty_rp, hlist) { hlist_del(&ri->hlist); kfree(ri); } @@ -377,11 +474,22 @@ int __kprobes setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) struct jprobe *jp = container_of(p, struct jprobe, kp); struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); long sp_addr = regs->ARM_sp; + long cpsr; kcb->jprobe_saved_regs = *regs; memcpy(kcb->jprobes_stack, (void *)sp_addr, MIN_STACK_SIZE(sp_addr)); regs->ARM_pc = (long)jp->entry; - regs->ARM_cpsr |= PSR_I_BIT; + + cpsr = regs->ARM_cpsr | PSR_I_BIT; +#ifdef CONFIG_THUMB2_KERNEL + /* Set correct Thumb state in cpsr */ + if (regs->ARM_pc & 1) + cpsr |= PSR_T_BIT; + else + cpsr &= ~PSR_T_BIT; +#endif + regs->ARM_cpsr = cpsr; + preempt_disable(); return 1; } @@ -403,7 +511,12 @@ void __kprobes jprobe_return(void) * This is to prevent any simulated instruction from writing * over the regs when they are accessing the stack. */ +#ifdef CONFIG_THUMB2_KERNEL + "sub r0, %0, %1 \n\t" + "mov sp, r0 \n\t" +#else "sub sp, %0, %1 \n\t" +#endif "ldr r0, ="__stringify(JPROBE_MAGIC_ADDR)"\n\t" "str %0, [sp, %2] \n\t" "str r0, [sp, %3] \n\t" @@ -414,15 +527,28 @@ void __kprobes jprobe_return(void) * Return to the context saved by setjmp_pre_handler * and restored by longjmp_break_handler. */ +#ifdef CONFIG_THUMB2_KERNEL + "ldr lr, [sp, %2] \n\t" /* lr = saved sp */ + "ldrd r0, r1, [sp, %5] \n\t" /* r0,r1 = saved lr,pc */ + "ldr r2, [sp, %4] \n\t" /* r2 = saved psr */ + "stmdb lr!, {r0, r1, r2} \n\t" /* push saved lr and */ + /* rfe context */ + "ldmia sp, {r0 - r12} \n\t" + "mov sp, lr \n\t" + "ldr lr, [sp], #4 \n\t" + "rfeia sp! \n\t" +#else "ldr r0, [sp, %4] \n\t" "msr cpsr_cxsf, r0 \n\t" "ldmia sp, {r0 - pc} \n\t" +#endif : : "r" (kcb->jprobe_saved_regs.ARM_sp), "I" (sizeof(struct pt_regs) * 2), "J" (offsetof(struct pt_regs, ARM_sp)), "J" (offsetof(struct pt_regs, ARM_pc)), - "J" (offsetof(struct pt_regs, ARM_cpsr)) + "J" (offsetof(struct pt_regs, ARM_cpsr)), + "J" (offsetof(struct pt_regs, ARM_lr)) : "memory", "cc"); } @@ -459,17 +585,44 @@ int __kprobes arch_trampoline_kprobe(struct kprobe *p) return 0; } -static struct undef_hook kprobes_break_hook = { +#ifdef CONFIG_THUMB2_KERNEL + +static struct undef_hook kprobes_thumb16_break_hook = { + .instr_mask = 0xffff, + .instr_val = KPROBE_THUMB16_BREAKPOINT_INSTRUCTION, + .cpsr_mask = MODE_MASK, + .cpsr_val = SVC_MODE, + .fn = kprobe_trap_handler, +}; + +static struct undef_hook kprobes_thumb32_break_hook = { .instr_mask = 0xffffffff, - .instr_val = KPROBE_BREAKPOINT_INSTRUCTION, + .instr_val = KPROBE_THUMB32_BREAKPOINT_INSTRUCTION, + .cpsr_mask = MODE_MASK, + .cpsr_val = SVC_MODE, + .fn = kprobe_trap_handler, +}; + +#else /* !CONFIG_THUMB2_KERNEL */ + +static struct undef_hook kprobes_arm_break_hook = { + .instr_mask = 0x0fffffff, + .instr_val = KPROBE_ARM_BREAKPOINT_INSTRUCTION, .cpsr_mask = MODE_MASK, .cpsr_val = SVC_MODE, .fn = kprobe_trap_handler, }; +#endif /* !CONFIG_THUMB2_KERNEL */ + int __init arch_init_kprobes() { - arm_kprobe_decode_init(); - register_undef_hook(&kprobes_break_hook); + arm_probes_decode_init(); +#ifdef CONFIG_THUMB2_KERNEL + register_undef_hook(&kprobes_thumb16_break_hook); + register_undef_hook(&kprobes_thumb32_break_hook); +#else + register_undef_hook(&kprobes_arm_break_hook); +#endif return 0; } diff --git a/arch/arm/kernel/kprobes.h b/arch/arm/kernel/kprobes.h new file mode 100644 index 00000000000..9a2712ecefc --- /dev/null +++ b/arch/arm/kernel/kprobes.h @@ -0,0 +1,52 @@ +/* + * arch/arm/kernel/kprobes.h + * + * Copyright (C) 2011 Jon Medhurst <tixy@yxit.co.uk>. + * + * Some contents moved here from arch/arm/include/asm/kprobes.h which is + * Copyright (C) 2006, 2007 Motorola Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#ifndef _ARM_KERNEL_KPROBES_H +#define _ARM_KERNEL_KPROBES_H + +#include "probes.h" + +/* + * These undefined instructions must be unique and + * reserved solely for kprobes' use. + */ +#define KPROBE_ARM_BREAKPOINT_INSTRUCTION 0x07f001f8 +#define KPROBE_THUMB16_BREAKPOINT_INSTRUCTION 0xde18 +#define KPROBE_THUMB32_BREAKPOINT_INSTRUCTION 0xf7f0a018 + +enum probes_insn __kprobes +kprobe_decode_ldmstm(kprobe_opcode_t insn, struct arch_probes_insn *asi, + const struct decode_header *h); + +typedef enum probes_insn (kprobe_decode_insn_t)(probes_opcode_t, + struct arch_probes_insn *, + bool, + const union decode_action *); + +#ifdef CONFIG_THUMB2_KERNEL + +extern const union decode_action kprobes_t32_actions[]; +extern const union decode_action kprobes_t16_actions[]; + +#else /* !CONFIG_THUMB2_KERNEL */ + +extern const union decode_action kprobes_arm_actions[]; + +#endif + +#endif /* _ARM_KERNEL_KPROBES_H */ diff --git a/arch/arm/kernel/leds.c b/arch/arm/kernel/leds.c deleted file mode 100644 index 31a316c1777..00000000000 --- a/arch/arm/kernel/leds.c +++ /dev/null @@ -1,115 +0,0 @@ -/* - * LED support code, ripped out of arch/arm/kernel/time.c - * - * Copyright (C) 1994-2001 Russell King - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include <linux/module.h> -#include <linux/init.h> -#include <linux/sysdev.h> - -#include <asm/leds.h> - -static void dummy_leds_event(led_event_t evt) -{ -} - -void (*leds_event)(led_event_t) = dummy_leds_event; - -struct leds_evt_name { - const char name[8]; - int on; - int off; -}; - -static const struct leds_evt_name evt_names[] = { - { "amber", led_amber_on, led_amber_off }, - { "blue", led_blue_on, led_blue_off }, - { "green", led_green_on, led_green_off }, - { "red", led_red_on, led_red_off }, -}; - -static ssize_t leds_store(struct sys_device *dev, - struct sysdev_attribute *attr, - const char *buf, size_t size) -{ - int ret = -EINVAL, len = strcspn(buf, " "); - - if (len > 0 && buf[len] == '\0') - len--; - - if (strncmp(buf, "claim", len) == 0) { - leds_event(led_claim); - ret = size; - } else if (strncmp(buf, "release", len) == 0) { - leds_event(led_release); - ret = size; - } else { - int i; - - for (i = 0; i < ARRAY_SIZE(evt_names); i++) { - if (strlen(evt_names[i].name) != len || - strncmp(buf, evt_names[i].name, len) != 0) - continue; - if (strncmp(buf+len, " on", 3) == 0) { - leds_event(evt_names[i].on); - ret = size; - } else if (strncmp(buf+len, " off", 4) == 0) { - leds_event(evt_names[i].off); - ret = size; - } - break; - } - } - return ret; -} - -static SYSDEV_ATTR(event, 0200, NULL, leds_store); - -static int leds_suspend(struct sys_device *dev, pm_message_t state) -{ - leds_event(led_stop); - return 0; -} - -static int leds_resume(struct sys_device *dev) -{ - leds_event(led_start); - return 0; -} - -static int leds_shutdown(struct sys_device *dev) -{ - leds_event(led_halted); - return 0; -} - -static struct sysdev_class leds_sysclass = { - .name = "leds", - .shutdown = leds_shutdown, - .suspend = leds_suspend, - .resume = leds_resume, -}; - -static struct sys_device leds_device = { - .id = 0, - .cls = &leds_sysclass, -}; - -static int __init leds_init(void) -{ - int ret; - ret = sysdev_class_register(&leds_sysclass); - if (ret == 0) - ret = sysdev_register(&leds_device); - if (ret == 0) - ret = sysdev_create_file(&leds_device, &attr_event); - return ret; -} - -device_initcall(leds_init); - -EXPORT_SYMBOL(leds_event); diff --git a/arch/arm/kernel/machine_kexec.c b/arch/arm/kernel/machine_kexec.c index 3a8fd5140d7..8cf0996aa1a 100644 --- a/arch/arm/kernel/machine_kexec.c +++ b/arch/arm/kernel/machine_kexec.c @@ -7,22 +7,28 @@ #include <linux/delay.h> #include <linux/reboot.h> #include <linux/io.h> +#include <linux/irq.h> +#include <linux/memblock.h> #include <asm/pgtable.h> +#include <linux/of_fdt.h> #include <asm/pgalloc.h> #include <asm/mmu_context.h> #include <asm/cacheflush.h> +#include <asm/fncpy.h> #include <asm/mach-types.h> +#include <asm/smp_plat.h> +#include <asm/system_misc.h> -extern const unsigned char relocate_new_kernel[]; +extern void relocate_new_kernel(void); extern const unsigned int relocate_new_kernel_size; -extern void setup_mm_for_reboot(char mode); - extern unsigned long kexec_start_address; extern unsigned long kexec_indirection_page; extern unsigned long kexec_mach_type; extern unsigned long kexec_boot_atags; +static atomic_t waiting_for_crash_ipi; + /* * Provide a dummy crash_notes definition while crash dump arrives to arm. * This prevents breakage of crash_notes attribute in kernel/ksysfs.c. @@ -30,6 +36,36 @@ extern unsigned long kexec_boot_atags; int machine_kexec_prepare(struct kimage *image) { + struct kexec_segment *current_segment; + __be32 header; + int i, err; + + /* + * Validate that if the current HW supports SMP, then the SW supports + * and implements CPU hotplug for the current HW. If not, we won't be + * able to kexec reliably, so fail the prepare operation. + */ + if (num_possible_cpus() > 1 && !platform_can_cpu_hotplug()) + return -EINVAL; + + /* + * No segment at default ATAGs address. try to locate + * a dtb using magic. + */ + for (i = 0; i < image->nr_segments; i++) { + current_segment = &image->segment[i]; + + if (!memblock_is_region_memory(current_segment->mem, + current_segment->memsz)) + return -EINVAL; + + err = get_user(header, (__be32*)current_segment->buf); + if (err) + return err; + + if (be32_to_cpu(header) == OF_DT_HEADER) + kexec_boot_atags = current_segment->mem; + } return 0; } @@ -37,20 +73,87 @@ void machine_kexec_cleanup(struct kimage *image) { } +void machine_crash_nonpanic_core(void *unused) +{ + struct pt_regs regs; + + crash_setup_regs(®s, NULL); + printk(KERN_DEBUG "CPU %u will stop doing anything useful since another CPU has crashed\n", + smp_processor_id()); + crash_save_cpu(®s, smp_processor_id()); + flush_cache_all(); + + set_cpu_online(smp_processor_id(), false); + atomic_dec(&waiting_for_crash_ipi); + while (1) + cpu_relax(); +} + +static void machine_kexec_mask_interrupts(void) +{ + unsigned int i; + struct irq_desc *desc; + + for_each_irq_desc(i, desc) { + struct irq_chip *chip; + + chip = irq_desc_get_chip(desc); + if (!chip) + continue; + + if (chip->irq_eoi && irqd_irq_inprogress(&desc->irq_data)) + chip->irq_eoi(&desc->irq_data); + + if (chip->irq_mask) + chip->irq_mask(&desc->irq_data); + + if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data)) + chip->irq_disable(&desc->irq_data); + } +} + void machine_crash_shutdown(struct pt_regs *regs) { + unsigned long msecs; + local_irq_disable(); + + atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1); + smp_call_function(machine_crash_nonpanic_core, NULL, false); + msecs = 1000; /* Wait at most a second for the other cpus to stop */ + while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) { + mdelay(1); + msecs--; + } + if (atomic_read(&waiting_for_crash_ipi) > 0) + printk(KERN_WARNING "Non-crashing CPUs did not react to IPI\n"); + crash_save_cpu(regs, smp_processor_id()); + machine_kexec_mask_interrupts(); printk(KERN_INFO "Loading crashdump kernel...\n"); } +/* + * Function pointer to optional machine-specific reinitialization + */ +void (*kexec_reinit)(void); + void machine_kexec(struct kimage *image) { unsigned long page_list; unsigned long reboot_code_buffer_phys; + unsigned long reboot_entry = (unsigned long)relocate_new_kernel; + unsigned long reboot_entry_phys; void *reboot_code_buffer; + /* + * This can only happen if machine_shutdown() failed to disable some + * CPU, and that can only happen if the checks in + * machine_kexec_prepare() were not correct. If this fails, we can't + * reliably kexec anyway, so BUG_ON is appropriate. + */ + BUG_ON(num_online_cpus() > 1); page_list = image->head & PAGE_MASK; @@ -63,25 +166,28 @@ void machine_kexec(struct kimage *image) kexec_start_address = image->start; kexec_indirection_page = page_list; kexec_mach_type = machine_arch_type; - kexec_boot_atags = image->start - KEXEC_ARM_ZIMAGE_OFFSET + KEXEC_ARM_ATAGS_OFFSET; + if (!kexec_boot_atags) + kexec_boot_atags = image->start - KEXEC_ARM_ZIMAGE_OFFSET + KEXEC_ARM_ATAGS_OFFSET; - /* copy our kernel relocation code to the control code page */ - memcpy(reboot_code_buffer, - relocate_new_kernel, relocate_new_kernel_size); + /* copy our kernel relocation code to the control code page */ + reboot_entry = fncpy(reboot_code_buffer, + reboot_entry, + relocate_new_kernel_size); + reboot_entry_phys = (unsigned long)reboot_entry + + (reboot_code_buffer_phys - (unsigned long)reboot_code_buffer); - flush_icache_range((unsigned long) reboot_code_buffer, - (unsigned long) reboot_code_buffer + KEXEC_CONTROL_PAGE_SIZE); printk(KERN_INFO "Bye!\n"); - local_irq_disable(); - local_fiq_disable(); - setup_mm_for_reboot(0); /* mode is not used, so just pass 0*/ - flush_cache_all(); - outer_flush_all(); - outer_disable(); - cpu_proc_fin(); - outer_inv_all(); - flush_cache_all(); - cpu_reset(reboot_code_buffer_phys); + if (kexec_reinit) + kexec_reinit(); + + soft_restart(reboot_entry_phys); +} + +void arch_crash_save_vmcoreinfo(void) +{ +#ifdef CONFIG_ARM_LPAE + VMCOREINFO_CONFIG(ARM_LPAE); +#endif } diff --git a/arch/arm/kernel/module.c b/arch/arm/kernel/module.c index d9bd786ce23..45e47815727 100644 --- a/arch/arm/kernel/module.c +++ b/arch/arm/kernel/module.c @@ -22,7 +22,9 @@ #include <asm/pgtable.h> #include <asm/sections.h> +#include <asm/smp_plat.h> #include <asm/unwind.h> +#include <asm/opcodes.h> #ifdef CONFIG_XIP_KERNEL /* @@ -32,72 +34,17 @@ * recompiling the whole kernel when CONFIG_XIP_KERNEL is turned on/off. */ #undef MODULES_VADDR -#define MODULES_VADDR (((unsigned long)_etext + ~PGDIR_MASK) & PGDIR_MASK) +#define MODULES_VADDR (((unsigned long)_etext + ~PMD_MASK) & PMD_MASK) #endif #ifdef CONFIG_MMU void *module_alloc(unsigned long size) { - struct vm_struct *area; - - size = PAGE_ALIGN(size); - if (!size) - return NULL; - - area = __get_vm_area(size, VM_ALLOC, MODULES_VADDR, MODULES_END); - if (!area) - return NULL; - - return __vmalloc_area(area, GFP_KERNEL, PAGE_KERNEL_EXEC); -} -#else /* CONFIG_MMU */ -void *module_alloc(unsigned long size) -{ - return size == 0 ? NULL : vmalloc(size); -} -#endif /* !CONFIG_MMU */ - -void module_free(struct module *module, void *region) -{ - vfree(region); + return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, + GFP_KERNEL, PAGE_KERNEL_EXEC, NUMA_NO_NODE, + __builtin_return_address(0)); } - -int module_frob_arch_sections(Elf_Ehdr *hdr, - Elf_Shdr *sechdrs, - char *secstrings, - struct module *mod) -{ -#ifdef CONFIG_ARM_UNWIND - Elf_Shdr *s, *sechdrs_end = sechdrs + hdr->e_shnum; - struct arm_unwind_mapping *maps = mod->arch.map; - - for (s = sechdrs; s < sechdrs_end; s++) { - char const *secname = secstrings + s->sh_name; - - if (strcmp(".ARM.exidx.init.text", secname) == 0) - maps[ARM_SEC_INIT].unw_sec = s; - else if (strcmp(".ARM.exidx.devinit.text", secname) == 0) - maps[ARM_SEC_DEVINIT].unw_sec = s; - else if (strcmp(".ARM.exidx", secname) == 0) - maps[ARM_SEC_CORE].unw_sec = s; - else if (strcmp(".ARM.exidx.exit.text", secname) == 0) - maps[ARM_SEC_EXIT].unw_sec = s; - else if (strcmp(".ARM.exidx.devexit.text", secname) == 0) - maps[ARM_SEC_DEVEXIT].unw_sec = s; - else if (strcmp(".init.text", secname) == 0) - maps[ARM_SEC_INIT].sec_text = s; - else if (strcmp(".devinit.text", secname) == 0) - maps[ARM_SEC_DEVINIT].sec_text = s; - else if (strcmp(".text", secname) == 0) - maps[ARM_SEC_CORE].sec_text = s; - else if (strcmp(".exit.text", secname) == 0) - maps[ARM_SEC_EXIT].sec_text = s; - else if (strcmp(".devexit.text", secname) == 0) - maps[ARM_SEC_DEVEXIT].sec_text = s; - } #endif - return 0; -} int apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, @@ -112,25 +59,27 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, for (i = 0; i < relsec->sh_size / sizeof(Elf32_Rel); i++, rel++) { unsigned long loc; Elf32_Sym *sym; + const char *symname; s32 offset; + u32 tmp; #ifdef CONFIG_THUMB2_KERNEL u32 upper, lower, sign, j1, j2; #endif offset = ELF32_R_SYM(rel->r_info); if (offset < 0 || offset > (symsec->sh_size / sizeof(Elf32_Sym))) { - printk(KERN_ERR "%s: bad relocation, section %d reloc %d\n", + pr_err("%s: section %u reloc %u: bad relocation sym offset\n", module->name, relindex, i); return -ENOEXEC; } sym = ((Elf32_Sym *)symsec->sh_addr) + offset; + symname = strtab + sym->st_name; if (rel->r_offset < 0 || rel->r_offset > dstsec->sh_size - sizeof(u32)) { - printk(KERN_ERR "%s: out of bounds relocation, " - "section %d reloc %d offset %d size %d\n", - module->name, relindex, i, rel->r_offset, - dstsec->sh_size); + pr_err("%s: section %u reloc %u sym '%s': out of bounds relocation, offset %d size %u\n", + module->name, relindex, i, symname, + rel->r_offset, dstsec->sh_size); return -ENOEXEC; } @@ -148,7 +97,8 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, case R_ARM_PC24: case R_ARM_CALL: case R_ARM_JUMP24: - offset = (*(u32 *)loc & 0x00ffffff) << 2; + offset = __mem_to_opcode_arm(*(u32 *)loc); + offset = (offset & 0x00ffffff) << 2; if (offset & 0x02000000) offset -= 0x04000000; @@ -156,17 +106,18 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, if (offset & 3 || offset <= (s32)0xfe000000 || offset >= (s32)0x02000000) { - printk(KERN_ERR - "%s: relocation out of range, section " - "%d reloc %d sym '%s'\n", module->name, - relindex, i, strtab + sym->st_name); + pr_err("%s: section %u reloc %u sym '%s': relocation %u out of range (%#lx -> %#x)\n", + module->name, relindex, i, symname, + ELF32_R_TYPE(rel->r_info), loc, + sym->st_value); return -ENOEXEC; } offset >>= 2; + offset &= 0x00ffffff; - *(u32 *)loc &= 0xff000000; - *(u32 *)loc |= offset & 0x00ffffff; + *(u32 *)loc &= __opcode_to_mem_arm(0xff000000); + *(u32 *)loc |= __opcode_to_mem_arm(offset); break; case R_ARM_V4BX: @@ -174,8 +125,8 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, * other bits to re-code instruction as * MOV PC,Rm. */ - *(u32 *)loc &= 0xf000000f; - *(u32 *)loc |= 0x01a0f000; + *(u32 *)loc &= __opcode_to_mem_arm(0xf000000f); + *(u32 *)loc |= __opcode_to_mem_arm(0x01a0f000); break; case R_ARM_PREL31: @@ -185,7 +136,7 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, case R_ARM_MOVW_ABS_NC: case R_ARM_MOVT_ABS: - offset = *(u32 *)loc; + offset = tmp = __mem_to_opcode_arm(*(u32 *)loc); offset = ((offset & 0xf0000) >> 4) | (offset & 0xfff); offset = (offset ^ 0x8000) - 0x8000; @@ -193,16 +144,18 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, if (ELF32_R_TYPE(rel->r_info) == R_ARM_MOVT_ABS) offset >>= 16; - *(u32 *)loc &= 0xfff0f000; - *(u32 *)loc |= ((offset & 0xf000) << 4) | - (offset & 0x0fff); + tmp &= 0xfff0f000; + tmp |= ((offset & 0xf000) << 4) | + (offset & 0x0fff); + + *(u32 *)loc = __opcode_to_mem_arm(tmp); break; #ifdef CONFIG_THUMB2_KERNEL case R_ARM_THM_CALL: case R_ARM_THM_JUMP24: - upper = *(u16 *)loc; - lower = *(u16 *)(loc + 2); + upper = __mem_to_opcode_thumb16(*(u16 *)loc); + lower = __mem_to_opcode_thumb16(*(u16 *)(loc + 2)); /* * 25 bit signed address range (Thumb-2 BL and B.W @@ -228,31 +181,43 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, offset -= 0x02000000; offset += sym->st_value - loc; - /* only Thumb addresses allowed (no interworking) */ - if (!(offset & 1) || + /* + * For function symbols, only Thumb addresses are + * allowed (no interworking). + * + * For non-function symbols, the destination + * has no specific ARM/Thumb disposition, so + * the branch is resolved under the assumption + * that interworking is not required. + */ + if ((ELF32_ST_TYPE(sym->st_info) == STT_FUNC && + !(offset & 1)) || offset <= (s32)0xff000000 || offset >= (s32)0x01000000) { - printk(KERN_ERR - "%s: relocation out of range, section " - "%d reloc %d sym '%s'\n", module->name, - relindex, i, strtab + sym->st_name); + pr_err("%s: section %u reloc %u sym '%s': relocation %u out of range (%#lx -> %#x)\n", + module->name, relindex, i, symname, + ELF32_R_TYPE(rel->r_info), loc, + sym->st_value); return -ENOEXEC; } sign = (offset >> 24) & 1; j1 = sign ^ (~(offset >> 23) & 1); j2 = sign ^ (~(offset >> 22) & 1); - *(u16 *)loc = (u16)((upper & 0xf800) | (sign << 10) | + upper = (u16)((upper & 0xf800) | (sign << 10) | ((offset >> 12) & 0x03ff)); - *(u16 *)(loc + 2) = (u16)((lower & 0xd000) | - (j1 << 13) | (j2 << 11) | - ((offset >> 1) & 0x07ff)); + lower = (u16)((lower & 0xd000) | + (j1 << 13) | (j2 << 11) | + ((offset >> 1) & 0x07ff)); + + *(u16 *)loc = __opcode_to_mem_thumb16(upper); + *(u16 *)(loc + 2) = __opcode_to_mem_thumb16(lower); break; case R_ARM_THM_MOVW_ABS_NC: case R_ARM_THM_MOVT_ABS: - upper = *(u16 *)loc; - lower = *(u16 *)(loc + 2); + upper = __mem_to_opcode_thumb16(*(u16 *)loc); + lower = __mem_to_opcode_thumb16(*(u16 *)(loc + 2)); /* * MOVT/MOVW instructions encoding in Thumb-2: @@ -273,12 +238,14 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, if (ELF32_R_TYPE(rel->r_info) == R_ARM_THM_MOVT_ABS) offset >>= 16; - *(u16 *)loc = (u16)((upper & 0xfbf0) | - ((offset & 0xf000) >> 12) | - ((offset & 0x0800) >> 1)); - *(u16 *)(loc + 2) = (u16)((lower & 0x8f00) | - ((offset & 0x0700) << 4) | - (offset & 0x00ff)); + upper = (u16)((upper & 0xfbf0) | + ((offset & 0xf000) >> 12) | + ((offset & 0x0800) >> 1)); + lower = (u16)((lower & 0x8f00) | + ((offset & 0x0700) << 4) | + (offset & 0x00ff)); + *(u16 *)loc = __opcode_to_mem_thumb16(upper); + *(u16 *)(loc + 2) = __opcode_to_mem_thumb16(lower); break; #endif @@ -291,50 +258,98 @@ apply_relocate(Elf32_Shdr *sechdrs, const char *strtab, unsigned int symindex, return 0; } -int -apply_relocate_add(Elf32_Shdr *sechdrs, const char *strtab, - unsigned int symindex, unsigned int relsec, struct module *module) +struct mod_unwind_map { + const Elf_Shdr *unw_sec; + const Elf_Shdr *txt_sec; +}; + +static const Elf_Shdr *find_mod_section(const Elf32_Ehdr *hdr, + const Elf_Shdr *sechdrs, const char *name) { - printk(KERN_ERR "module %s: ADD RELOCATION unsupported\n", - module->name); - return -ENOEXEC; + const Elf_Shdr *s, *se; + const char *secstrs = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; + + for (s = sechdrs, se = sechdrs + hdr->e_shnum; s < se; s++) + if (strcmp(name, secstrs + s->sh_name) == 0) + return s; + + return NULL; } -#ifdef CONFIG_ARM_UNWIND -static void register_unwind_tables(struct module *mod) +extern void fixup_pv_table(const void *, unsigned long); +extern void fixup_smp(const void *, unsigned long); + +int module_finalize(const Elf32_Ehdr *hdr, const Elf_Shdr *sechdrs, + struct module *mod) { + const Elf_Shdr *s = NULL; +#ifdef CONFIG_ARM_UNWIND + const char *secstrs = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; + const Elf_Shdr *sechdrs_end = sechdrs + hdr->e_shnum; + struct mod_unwind_map maps[ARM_SEC_MAX]; int i; - for (i = 0; i < ARM_SEC_MAX; ++i) { - struct arm_unwind_mapping *map = &mod->arch.map[i]; - if (map->unw_sec && map->sec_text) - map->unwind = unwind_table_add(map->unw_sec->sh_addr, - map->unw_sec->sh_size, - map->sec_text->sh_addr, - map->sec_text->sh_size); + + memset(maps, 0, sizeof(maps)); + + for (s = sechdrs; s < sechdrs_end; s++) { + const char *secname = secstrs + s->sh_name; + + if (!(s->sh_flags & SHF_ALLOC)) + continue; + + if (strcmp(".ARM.exidx.init.text", secname) == 0) + maps[ARM_SEC_INIT].unw_sec = s; + else if (strcmp(".ARM.exidx", secname) == 0) + maps[ARM_SEC_CORE].unw_sec = s; + else if (strcmp(".ARM.exidx.exit.text", secname) == 0) + maps[ARM_SEC_EXIT].unw_sec = s; + else if (strcmp(".ARM.exidx.text.unlikely", secname) == 0) + maps[ARM_SEC_UNLIKELY].unw_sec = s; + else if (strcmp(".ARM.exidx.text.hot", secname) == 0) + maps[ARM_SEC_HOT].unw_sec = s; + else if (strcmp(".init.text", secname) == 0) + maps[ARM_SEC_INIT].txt_sec = s; + else if (strcmp(".text", secname) == 0) + maps[ARM_SEC_CORE].txt_sec = s; + else if (strcmp(".exit.text", secname) == 0) + maps[ARM_SEC_EXIT].txt_sec = s; + else if (strcmp(".text.unlikely", secname) == 0) + maps[ARM_SEC_UNLIKELY].txt_sec = s; + else if (strcmp(".text.hot", secname) == 0) + maps[ARM_SEC_HOT].txt_sec = s; } -} -static void unregister_unwind_tables(struct module *mod) -{ - int i = ARM_SEC_MAX; - while (--i >= 0) - unwind_table_del(mod->arch.map[i].unwind); -} + for (i = 0; i < ARM_SEC_MAX; i++) + if (maps[i].unw_sec && maps[i].txt_sec) + mod->arch.unwind[i] = + unwind_table_add(maps[i].unw_sec->sh_addr, + maps[i].unw_sec->sh_size, + maps[i].txt_sec->sh_addr, + maps[i].txt_sec->sh_size); +#endif +#ifdef CONFIG_ARM_PATCH_PHYS_VIRT + s = find_mod_section(hdr, sechdrs, ".pv_table"); + if (s) + fixup_pv_table((void *)s->sh_addr, s->sh_size); +#endif + s = find_mod_section(hdr, sechdrs, ".alt.smp.init"); + if (s && !is_smp()) +#ifdef CONFIG_SMP_ON_UP + fixup_smp((void *)s->sh_addr, s->sh_size); #else -static inline void register_unwind_tables(struct module *mod) { } -static inline void unregister_unwind_tables(struct module *mod) { } + return -EINVAL; #endif - -int -module_finalize(const Elf32_Ehdr *hdr, const Elf_Shdr *sechdrs, - struct module *module) -{ - register_unwind_tables(module); return 0; } void module_arch_cleanup(struct module *mod) { - unregister_unwind_tables(mod); +#ifdef CONFIG_ARM_UNWIND + int i; + + for (i = 0; i < ARM_SEC_MAX; i++) + if (mod->arch.unwind[i]) + unwind_table_del(mod->arch.unwind[i]); +#endif } diff --git a/arch/arm/kernel/opcodes.c b/arch/arm/kernel/opcodes.c new file mode 100644 index 00000000000..f8179c6a817 --- /dev/null +++ b/arch/arm/kernel/opcodes.c @@ -0,0 +1,72 @@ +/* + * linux/arch/arm/kernel/opcodes.c + * + * A32 condition code lookup feature moved from nwfpe/fpopcode.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <asm/opcodes.h> + +#define ARM_OPCODE_CONDITION_UNCOND 0xf + +/* + * condition code lookup table + * index into the table is test code: EQ, NE, ... LT, GT, AL, NV + * + * bit position in short is condition code: NZCV + */ +static const unsigned short cc_map[16] = { + 0xF0F0, /* EQ == Z set */ + 0x0F0F, /* NE */ + 0xCCCC, /* CS == C set */ + 0x3333, /* CC */ + 0xFF00, /* MI == N set */ + 0x00FF, /* PL */ + 0xAAAA, /* VS == V set */ + 0x5555, /* VC */ + 0x0C0C, /* HI == C set && Z clear */ + 0xF3F3, /* LS == C clear || Z set */ + 0xAA55, /* GE == (N==V) */ + 0x55AA, /* LT == (N!=V) */ + 0x0A05, /* GT == (!Z && (N==V)) */ + 0xF5FA, /* LE == (Z || (N!=V)) */ + 0xFFFF, /* AL always */ + 0 /* NV */ +}; + +/* + * Returns: + * ARM_OPCODE_CONDTEST_FAIL - if condition fails + * ARM_OPCODE_CONDTEST_PASS - if condition passes (including AL) + * ARM_OPCODE_CONDTEST_UNCOND - if NV condition, or separate unconditional + * opcode space from v5 onwards + * + * Code that tests whether a conditional instruction would pass its condition + * check should check that return value == ARM_OPCODE_CONDTEST_PASS. + * + * Code that tests if a condition means that the instruction would be executed + * (regardless of conditional or unconditional) should instead check that the + * return value != ARM_OPCODE_CONDTEST_FAIL. + */ +asmlinkage unsigned int arm_check_condition(u32 opcode, u32 psr) +{ + u32 cc_bits = opcode >> 28; + u32 psr_cond = psr >> 28; + unsigned int ret; + + if (cc_bits != ARM_OPCODE_CONDITION_UNCOND) { + if ((cc_map[cc_bits] >> (psr_cond)) & 1) + ret = ARM_OPCODE_CONDTEST_PASS; + else + ret = ARM_OPCODE_CONDTEST_FAIL; + } else { + ret = ARM_OPCODE_CONDTEST_UNCOND; + } + + return ret; +} +EXPORT_SYMBOL_GPL(arm_check_condition); diff --git a/arch/arm/kernel/patch.c b/arch/arm/kernel/patch.c new file mode 100644 index 00000000000..07314af4773 --- /dev/null +++ b/arch/arm/kernel/patch.c @@ -0,0 +1,75 @@ +#include <linux/kernel.h> +#include <linux/kprobes.h> +#include <linux/stop_machine.h> + +#include <asm/cacheflush.h> +#include <asm/smp_plat.h> +#include <asm/opcodes.h> + +#include "patch.h" + +struct patch { + void *addr; + unsigned int insn; +}; + +void __kprobes __patch_text(void *addr, unsigned int insn) +{ + bool thumb2 = IS_ENABLED(CONFIG_THUMB2_KERNEL); + int size; + + if (thumb2 && __opcode_is_thumb16(insn)) { + *(u16 *)addr = __opcode_to_mem_thumb16(insn); + size = sizeof(u16); + } else if (thumb2 && ((uintptr_t)addr & 2)) { + u16 first = __opcode_thumb32_first(insn); + u16 second = __opcode_thumb32_second(insn); + u16 *addrh = addr; + + addrh[0] = __opcode_to_mem_thumb16(first); + addrh[1] = __opcode_to_mem_thumb16(second); + + size = sizeof(u32); + } else { + if (thumb2) + insn = __opcode_to_mem_thumb32(insn); + else + insn = __opcode_to_mem_arm(insn); + + *(u32 *)addr = insn; + size = sizeof(u32); + } + + flush_icache_range((uintptr_t)(addr), + (uintptr_t)(addr) + size); +} + +static int __kprobes patch_text_stop_machine(void *data) +{ + struct patch *patch = data; + + __patch_text(patch->addr, patch->insn); + + return 0; +} + +void __kprobes patch_text(void *addr, unsigned int insn) +{ + struct patch patch = { + .addr = addr, + .insn = insn, + }; + + if (cache_ops_need_broadcast()) { + stop_machine(patch_text_stop_machine, &patch, cpu_online_mask); + } else { + bool straddles_word = IS_ENABLED(CONFIG_THUMB2_KERNEL) + && __opcode_is_thumb32(insn) + && ((uintptr_t)addr & 2); + + if (straddles_word) + stop_machine(patch_text_stop_machine, &patch, NULL); + else + __patch_text(addr, insn); + } +} diff --git a/arch/arm/kernel/patch.h b/arch/arm/kernel/patch.h new file mode 100644 index 00000000000..b4731f2dac3 --- /dev/null +++ b/arch/arm/kernel/patch.h @@ -0,0 +1,7 @@ +#ifndef _ARM_KERNEL_PATCH_H +#define _ARM_KERNEL_PATCH_H + +void patch_text(void *addr, unsigned int insn); +void __patch_text(void *addr, unsigned int insn); + +#endif diff --git a/arch/arm/kernel/perf_event.c b/arch/arm/kernel/perf_event.c index 07a50357492..4238bcba9d6 100644 --- a/arch/arm/kernel/perf_event.c +++ b/arch/arm/kernel/perf_event.c @@ -4,9 +4,7 @@ * ARM performance counter support. * * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles - * - * ARMv7 support: Jean Pihet <jpihet@mvista.com> - * 2010 (c) MontaVista Software, LLC. + * Copyright (C) 2010 ARM Ltd., Will Deacon <will.deacon@arm.com> * * This code is based on the sparc64 perf event code, which is in turn based * on the x86 code. Callchain code is based on the ARM OProfile backtrace @@ -14,134 +12,23 @@ */ #define pr_fmt(fmt) "hw perfevents: " fmt -#include <linux/interrupt.h> #include <linux/kernel.h> -#include <linux/module.h> -#include <linux/perf_event.h> #include <linux/platform_device.h> -#include <linux/spinlock.h> +#include <linux/pm_runtime.h> #include <linux/uaccess.h> +#include <linux/irq.h> +#include <linux/irqdesc.h> -#include <asm/cputype.h> -#include <asm/irq.h> #include <asm/irq_regs.h> #include <asm/pmu.h> #include <asm/stacktrace.h> -static struct platform_device *pmu_device; - -/* - * Hardware lock to serialize accesses to PMU registers. Needed for the - * read/modify/write sequences. - */ -DEFINE_SPINLOCK(pmu_lock); - -/* - * ARMv6 supports a maximum of 3 events, starting from index 1. If we add - * another platform that supports more, we need to increase this to be the - * largest of all platforms. - * - * ARMv7 supports up to 32 events: - * cycle counter CCNT + 31 events counters CNT0..30. - * Cortex-A8 has 1+4 counters, Cortex-A9 has 1+6 counters. - */ -#define ARMPMU_MAX_HWEVENTS 33 - -/* The events for a given CPU. */ -struct cpu_hw_events { - /* - * The events that are active on the CPU for the given index. Index 0 - * is reserved. - */ - struct perf_event *events[ARMPMU_MAX_HWEVENTS]; - - /* - * A 1 bit for an index indicates that the counter is being used for - * an event. A 0 means that the counter can be used. - */ - unsigned long used_mask[BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)]; - - /* - * A 1 bit for an index indicates that the counter is actively being - * used. - */ - unsigned long active_mask[BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)]; -}; -DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events); - -/* PMU names. */ -static const char *arm_pmu_names[] = { - [ARM_PERF_PMU_ID_XSCALE1] = "xscale1", - [ARM_PERF_PMU_ID_XSCALE2] = "xscale2", - [ARM_PERF_PMU_ID_V6] = "v6", - [ARM_PERF_PMU_ID_V6MP] = "v6mpcore", - [ARM_PERF_PMU_ID_CA8] = "ARMv7 Cortex-A8", - [ARM_PERF_PMU_ID_CA9] = "ARMv7 Cortex-A9", -}; - -struct arm_pmu { - enum arm_perf_pmu_ids id; - irqreturn_t (*handle_irq)(int irq_num, void *dev); - void (*enable)(struct hw_perf_event *evt, int idx); - void (*disable)(struct hw_perf_event *evt, int idx); - int (*event_map)(int evt); - u64 (*raw_event)(u64); - int (*get_event_idx)(struct cpu_hw_events *cpuc, - struct hw_perf_event *hwc); - u32 (*read_counter)(int idx); - void (*write_counter)(int idx, u32 val); - void (*start)(void); - void (*stop)(void); - int num_events; - u64 max_period; -}; - -/* Set at runtime when we know what CPU type we are. */ -static const struct arm_pmu *armpmu; - -enum arm_perf_pmu_ids -armpmu_get_pmu_id(void) -{ - int id = -ENODEV; - - if (armpmu != NULL) - id = armpmu->id; - - return id; -} -EXPORT_SYMBOL_GPL(armpmu_get_pmu_id); - -int -armpmu_get_max_events(void) -{ - int max_events = 0; - - if (armpmu != NULL) - max_events = armpmu->num_events; - - return max_events; -} -EXPORT_SYMBOL_GPL(armpmu_get_max_events); - -int perf_num_counters(void) -{ - return armpmu_get_max_events(); -} -EXPORT_SYMBOL_GPL(perf_num_counters); - -#define HW_OP_UNSUPPORTED 0xFFFF - -#define C(_x) \ - PERF_COUNT_HW_CACHE_##_x - -#define CACHE_OP_UNSUPPORTED 0xFFFF - -static unsigned armpmu_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX]; - static int -armpmu_map_cache_event(u64 config) +armpmu_map_cache_event(const unsigned (*cache_map) + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX], + u64 config) { unsigned int cache_type, cache_op, cache_result, ret; @@ -157,7 +44,7 @@ armpmu_map_cache_event(u64 config) if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) return -EINVAL; - ret = (int)armpmu_perf_cache_map[cache_type][cache_op][cache_result]; + ret = (int)(*cache_map)[cache_type][cache_op][cache_result]; if (ret == CACHE_OP_UNSUPPORTED) return -ENOENT; @@ -166,10 +53,50 @@ armpmu_map_cache_event(u64 config) } static int -armpmu_event_set_period(struct perf_event *event, - struct hw_perf_event *hwc, - int idx) +armpmu_map_hw_event(const unsigned (*event_map)[PERF_COUNT_HW_MAX], u64 config) +{ + int mapping; + + if (config >= PERF_COUNT_HW_MAX) + return -EINVAL; + + mapping = (*event_map)[config]; + return mapping == HW_OP_UNSUPPORTED ? -ENOENT : mapping; +} + +static int +armpmu_map_raw_event(u32 raw_event_mask, u64 config) { + return (int)(config & raw_event_mask); +} + +int +armpmu_map_event(struct perf_event *event, + const unsigned (*event_map)[PERF_COUNT_HW_MAX], + const unsigned (*cache_map) + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX], + u32 raw_event_mask) +{ + u64 config = event->attr.config; + + switch (event->attr.type) { + case PERF_TYPE_HARDWARE: + return armpmu_map_hw_event(event_map, config); + case PERF_TYPE_HW_CACHE: + return armpmu_map_cache_event(cache_map, config); + case PERF_TYPE_RAW: + return armpmu_map_raw_event(raw_event_mask, config); + } + + return -ENOENT; +} + +int armpmu_event_set_period(struct perf_event *event) +{ + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; s64 left = local64_read(&hwc->period_left); s64 period = hwc->sample_period; int ret = 0; @@ -193,32 +120,28 @@ armpmu_event_set_period(struct perf_event *event, local64_set(&hwc->prev_count, (u64)-left); - armpmu->write_counter(idx, (u64)(-left) & 0xffffffff); + armpmu->write_counter(event, (u64)(-left) & 0xffffffff); perf_event_update_userpage(event); return ret; } -static u64 -armpmu_event_update(struct perf_event *event, - struct hw_perf_event *hwc, - int idx) +u64 armpmu_event_update(struct perf_event *event) { - int shift = 64 - 32; - s64 prev_raw_count, new_raw_count; - u64 delta; + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + u64 delta, prev_raw_count, new_raw_count; again: prev_raw_count = local64_read(&hwc->prev_count); - new_raw_count = armpmu->read_counter(idx); + new_raw_count = armpmu->read_counter(event); if (local64_cmpxchg(&hwc->prev_count, prev_raw_count, new_raw_count) != prev_raw_count) goto again; - delta = (new_raw_count << shift) - (prev_raw_count << shift); - delta >>= shift; + delta = (new_raw_count - prev_raw_count) & armpmu->max_period; local64_add(delta, &event->count); local64_sub(delta, &hwc->period_left); @@ -229,43 +152,31 @@ again: static void armpmu_read(struct perf_event *event) { - struct hw_perf_event *hwc = &event->hw; - - /* Don't read disabled counters! */ - if (hwc->idx < 0) - return; - - armpmu_event_update(event, hwc, hwc->idx); + armpmu_event_update(event); } static void armpmu_stop(struct perf_event *event, int flags) { + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; - if (!armpmu) - return; - /* * ARM pmu always has to update the counter, so ignore * PERF_EF_UPDATE, see comments in armpmu_start(). */ if (!(hwc->state & PERF_HES_STOPPED)) { - armpmu->disable(hwc, hwc->idx); - barrier(); /* why? */ - armpmu_event_update(event, hwc, hwc->idx); + armpmu->disable(event); + armpmu_event_update(event); hwc->state |= PERF_HES_STOPPED | PERF_HES_UPTODATE; } } -static void -armpmu_start(struct perf_event *event, int flags) +static void armpmu_start(struct perf_event *event, int flags) { + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; - if (!armpmu) - return; - /* * ARM pmu always has to reprogram the period, so ignore * PERF_EF_RELOAD, see the comment below. @@ -281,23 +192,23 @@ armpmu_start(struct perf_event *event, int flags) * get an interrupt too soon or *way* too late if the overflow has * happened since disabling. */ - armpmu_event_set_period(event, hwc, hwc->idx); - armpmu->enable(hwc, hwc->idx); + armpmu_event_set_period(event); + armpmu->enable(event); } static void armpmu_del(struct perf_event *event, int flags) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + struct pmu_hw_events *hw_events = armpmu->get_hw_events(); struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; - WARN_ON(idx < 0); - - clear_bit(idx, cpuc->active_mask); armpmu_stop(event, PERF_EF_UPDATE); - cpuc->events[idx] = NULL; - clear_bit(idx, cpuc->used_mask); + hw_events->events[idx] = NULL; + clear_bit(idx, hw_events->used_mask); + if (armpmu->clear_event_idx) + armpmu->clear_event_idx(hw_events, event); perf_event_update_userpage(event); } @@ -305,7 +216,8 @@ armpmu_del(struct perf_event *event, int flags) static int armpmu_add(struct perf_event *event, int flags) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + struct pmu_hw_events *hw_events = armpmu->get_hw_events(); struct hw_perf_event *hwc = &event->hw; int idx; int err = 0; @@ -313,7 +225,7 @@ armpmu_add(struct perf_event *event, int flags) perf_pmu_disable(event->pmu); /* If we don't have a space for the counter then finish early. */ - idx = armpmu->get_event_idx(cpuc, hwc); + idx = armpmu->get_event_idx(hw_events, event); if (idx < 0) { err = idx; goto out; @@ -324,9 +236,8 @@ armpmu_add(struct perf_event *event, int flags) * sure it is disabled. */ event->hw.idx = idx; - armpmu->disable(hwc, idx); - cpuc->events[idx] = event; - set_bit(idx, cpuc->active_mask); + armpmu->disable(event); + hw_events->events[idx] = event; hwc->state = PERF_HES_STOPPED | PERF_HES_UPTODATE; if (flags & PERF_EF_START) @@ -340,133 +251,131 @@ out: return err; } -static struct pmu pmu; - static int -validate_event(struct cpu_hw_events *cpuc, +validate_event(struct pmu_hw_events *hw_events, struct perf_event *event) { - struct hw_perf_event fake_event = event->hw; + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + + if (is_software_event(event)) + return 1; + + if (event->state < PERF_EVENT_STATE_OFF) + return 1; - if (event->pmu != &pmu || event->state <= PERF_EVENT_STATE_OFF) + if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec) return 1; - return armpmu->get_event_idx(cpuc, &fake_event) >= 0; + return armpmu->get_event_idx(hw_events, event) >= 0; } static int validate_group(struct perf_event *event) { struct perf_event *sibling, *leader = event->group_leader; - struct cpu_hw_events fake_pmu; + struct pmu_hw_events fake_pmu; + DECLARE_BITMAP(fake_used_mask, ARMPMU_MAX_HWEVENTS); - memset(&fake_pmu, 0, sizeof(fake_pmu)); + /* + * Initialise the fake PMU. We only need to populate the + * used_mask for the purposes of validation. + */ + memset(fake_used_mask, 0, sizeof(fake_used_mask)); + fake_pmu.used_mask = fake_used_mask; if (!validate_event(&fake_pmu, leader)) - return -ENOSPC; + return -EINVAL; list_for_each_entry(sibling, &leader->sibling_list, group_entry) { if (!validate_event(&fake_pmu, sibling)) - return -ENOSPC; + return -EINVAL; } if (!validate_event(&fake_pmu, event)) - return -ENOSPC; + return -EINVAL; return 0; } -static int -armpmu_reserve_hardware(void) +static irqreturn_t armpmu_dispatch_irq(int irq, void *dev) { - int i, err = -ENODEV, irq; + struct arm_pmu *armpmu; + struct platform_device *plat_device; + struct arm_pmu_platdata *plat; + int ret; + u64 start_clock, finish_clock; - pmu_device = reserve_pmu(ARM_PMU_DEVICE_CPU); - if (IS_ERR(pmu_device)) { - pr_warning("unable to reserve pmu\n"); - return PTR_ERR(pmu_device); - } + if (irq_is_percpu(irq)) + dev = *(void **)dev; + armpmu = dev; + plat_device = armpmu->plat_device; + plat = dev_get_platdata(&plat_device->dev); - init_pmu(ARM_PMU_DEVICE_CPU); + start_clock = sched_clock(); + if (plat && plat->handle_irq) + ret = plat->handle_irq(irq, dev, armpmu->handle_irq); + else + ret = armpmu->handle_irq(irq, dev); + finish_clock = sched_clock(); - if (pmu_device->num_resources < 1) { - pr_err("no irqs for PMUs defined\n"); - return -ENODEV; - } + perf_sample_event_took(finish_clock - start_clock); + return ret; +} - for (i = 0; i < pmu_device->num_resources; ++i) { - irq = platform_get_irq(pmu_device, i); - if (irq < 0) - continue; - - err = request_irq(irq, armpmu->handle_irq, - IRQF_DISABLED | IRQF_NOBALANCING, - "armpmu", NULL); - if (err) { - pr_warning("unable to request IRQ%d for ARM perf " - "counters\n", irq); - break; - } - } +static void +armpmu_release_hardware(struct arm_pmu *armpmu) +{ + armpmu->free_irq(armpmu); + pm_runtime_put_sync(&armpmu->plat_device->dev); +} +static int +armpmu_reserve_hardware(struct arm_pmu *armpmu) +{ + int err; + struct platform_device *pmu_device = armpmu->plat_device; + + if (!pmu_device) + return -ENODEV; + + pm_runtime_get_sync(&pmu_device->dev); + err = armpmu->request_irq(armpmu, armpmu_dispatch_irq); if (err) { - for (i = i - 1; i >= 0; --i) { - irq = platform_get_irq(pmu_device, i); - if (irq >= 0) - free_irq(irq, NULL); - } - release_pmu(pmu_device); - pmu_device = NULL; + armpmu_release_hardware(armpmu); + return err; } - return err; + return 0; } static void -armpmu_release_hardware(void) +hw_perf_event_destroy(struct perf_event *event) { - int i, irq; + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); + atomic_t *active_events = &armpmu->active_events; + struct mutex *pmu_reserve_mutex = &armpmu->reserve_mutex; - for (i = pmu_device->num_resources - 1; i >= 0; --i) { - irq = platform_get_irq(pmu_device, i); - if (irq >= 0) - free_irq(irq, NULL); + if (atomic_dec_and_mutex_lock(active_events, pmu_reserve_mutex)) { + armpmu_release_hardware(armpmu); + mutex_unlock(pmu_reserve_mutex); } - armpmu->stop(); - - release_pmu(pmu_device); - pmu_device = NULL; } -static atomic_t active_events = ATOMIC_INIT(0); -static DEFINE_MUTEX(pmu_reserve_mutex); - -static void -hw_perf_event_destroy(struct perf_event *event) +static int +event_requires_mode_exclusion(struct perf_event_attr *attr) { - if (atomic_dec_and_mutex_lock(&active_events, &pmu_reserve_mutex)) { - armpmu_release_hardware(); - mutex_unlock(&pmu_reserve_mutex); - } + return attr->exclude_idle || attr->exclude_user || + attr->exclude_kernel || attr->exclude_hv; } static int __hw_perf_event_init(struct perf_event *event) { + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; - int mapping, err; - - /* Decode the generic type into an ARM event identifier. */ - if (PERF_TYPE_HARDWARE == event->attr.type) { - mapping = armpmu->event_map(event->attr.config); - } else if (PERF_TYPE_HW_CACHE == event->attr.type) { - mapping = armpmu_map_cache_event(event->attr.config); - } else if (PERF_TYPE_RAW == event->attr.type) { - mapping = armpmu->raw_event(event->attr.config); - } else { - pr_debug("event type %x not supported\n", event->attr.type); - return -EOPNOTSUPP; - } + int mapping; + + mapping = armpmu->map_event(event); if (mapping < 0) { pr_debug("event %x:%llx not supported\n", event->attr.type, @@ -475,84 +384,75 @@ __hw_perf_event_init(struct perf_event *event) } /* - * Check whether we need to exclude the counter from certain modes. - * The ARM performance counters are on all of the time so if someone - * has asked us for some excludes then we have to fail. - */ - if (event->attr.exclude_kernel || event->attr.exclude_user || - event->attr.exclude_hv || event->attr.exclude_idle) { - pr_debug("ARM performance counters do not support " - "mode exclusion\n"); - return -EPERM; - } - - /* * We don't assign an index until we actually place the event onto * hardware. Use -1 to signify that we haven't decided where to put it * yet. For SMP systems, each core has it's own PMU so we can't do any * clever allocation or constraints checking at this point. */ - hwc->idx = -1; + hwc->idx = -1; + hwc->config_base = 0; + hwc->config = 0; + hwc->event_base = 0; + + /* + * Check whether we need to exclude the counter from certain modes. + */ + if ((!armpmu->set_event_filter || + armpmu->set_event_filter(hwc, &event->attr)) && + event_requires_mode_exclusion(&event->attr)) { + pr_debug("ARM performance counters do not support " + "mode exclusion\n"); + return -EOPNOTSUPP; + } /* - * Store the event encoding into the config_base field. config and - * event_base are unused as the only 2 things we need to know are - * the event mapping and the counter to use. The counter to use is - * also the indx and the config_base is the event type. + * Store the event encoding into the config_base field. */ - hwc->config_base = (unsigned long)mapping; - hwc->config = 0; - hwc->event_base = 0; + hwc->config_base |= (unsigned long)mapping; - if (!hwc->sample_period) { - hwc->sample_period = armpmu->max_period; + if (!is_sampling_event(event)) { + /* + * For non-sampling runs, limit the sample_period to half + * of the counter width. That way, the new counter value + * is far less likely to overtake the previous one unless + * you have some serious IRQ latency issues. + */ + hwc->sample_period = armpmu->max_period >> 1; hwc->last_period = hwc->sample_period; local64_set(&hwc->period_left, hwc->sample_period); } - err = 0; if (event->group_leader != event) { - err = validate_group(event); - if (err) + if (validate_group(event) != 0) return -EINVAL; } - return err; + return 0; } static int armpmu_event_init(struct perf_event *event) { + struct arm_pmu *armpmu = to_arm_pmu(event->pmu); int err = 0; + atomic_t *active_events = &armpmu->active_events; - switch (event->attr.type) { - case PERF_TYPE_RAW: - case PERF_TYPE_HARDWARE: - case PERF_TYPE_HW_CACHE: - break; + /* does not support taken branch sampling */ + if (has_branch_stack(event)) + return -EOPNOTSUPP; - default: + if (armpmu->map_event(event) == -ENOENT) return -ENOENT; - } - - if (!armpmu) - return -ENODEV; event->destroy = hw_perf_event_destroy; - if (!atomic_inc_not_zero(&active_events)) { - if (atomic_read(&active_events) > armpmu->num_events) { - atomic_dec(&active_events); - return -ENOSPC; - } - - mutex_lock(&pmu_reserve_mutex); - if (atomic_read(&active_events) == 0) { - err = armpmu_reserve_hardware(); - } + if (!atomic_inc_not_zero(active_events)) { + mutex_lock(&armpmu->reserve_mutex); + if (atomic_read(active_events) == 0) + err = armpmu_reserve_hardware(armpmu); if (!err) - atomic_inc(&active_events); - mutex_unlock(&pmu_reserve_mutex); + atomic_inc(active_events); + mutex_unlock(&armpmu->reserve_mutex); } if (err) @@ -567,2479 +467,72 @@ static int armpmu_event_init(struct perf_event *event) static void armpmu_enable(struct pmu *pmu) { - /* Enable all of the perf events on hardware. */ - int idx; - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct arm_pmu *armpmu = to_arm_pmu(pmu); + struct pmu_hw_events *hw_events = armpmu->get_hw_events(); + int enabled = bitmap_weight(hw_events->used_mask, armpmu->num_events); - if (!armpmu) - return; - - for (idx = 0; idx <= armpmu->num_events; ++idx) { - struct perf_event *event = cpuc->events[idx]; - - if (!event) - continue; - - armpmu->enable(&event->hw, idx); - } - - armpmu->start(); + if (enabled) + armpmu->start(armpmu); } static void armpmu_disable(struct pmu *pmu) { - if (armpmu) - armpmu->stop(); -} - -static struct pmu pmu = { - .pmu_enable = armpmu_enable, - .pmu_disable = armpmu_disable, - .event_init = armpmu_event_init, - .add = armpmu_add, - .del = armpmu_del, - .start = armpmu_start, - .stop = armpmu_stop, - .read = armpmu_read, -}; - -/* - * ARMv6 Performance counter handling code. - * - * ARMv6 has 2 configurable performance counters and a single cycle counter. - * They all share a single reset bit but can be written to zero so we can use - * that for a reset. - * - * The counters can't be individually enabled or disabled so when we remove - * one event and replace it with another we could get spurious counts from the - * wrong event. However, we can take advantage of the fact that the - * performance counters can export events to the event bus, and the event bus - * itself can be monitored. This requires that we *don't* export the events to - * the event bus. The procedure for disabling a configurable counter is: - * - change the counter to count the ETMEXTOUT[0] signal (0x20). This - * effectively stops the counter from counting. - * - disable the counter's interrupt generation (each counter has it's - * own interrupt enable bit). - * Once stopped, the counter value can be written as 0 to reset. - * - * To enable a counter: - * - enable the counter's interrupt generation. - * - set the new event type. - * - * Note: the dedicated cycle counter only counts cycles and can't be - * enabled/disabled independently of the others. When we want to disable the - * cycle counter, we have to just disable the interrupt reporting and start - * ignoring that counter. When re-enabling, we have to reset the value and - * enable the interrupt. - */ - -enum armv6_perf_types { - ARMV6_PERFCTR_ICACHE_MISS = 0x0, - ARMV6_PERFCTR_IBUF_STALL = 0x1, - ARMV6_PERFCTR_DDEP_STALL = 0x2, - ARMV6_PERFCTR_ITLB_MISS = 0x3, - ARMV6_PERFCTR_DTLB_MISS = 0x4, - ARMV6_PERFCTR_BR_EXEC = 0x5, - ARMV6_PERFCTR_BR_MISPREDICT = 0x6, - ARMV6_PERFCTR_INSTR_EXEC = 0x7, - ARMV6_PERFCTR_DCACHE_HIT = 0x9, - ARMV6_PERFCTR_DCACHE_ACCESS = 0xA, - ARMV6_PERFCTR_DCACHE_MISS = 0xB, - ARMV6_PERFCTR_DCACHE_WBACK = 0xC, - ARMV6_PERFCTR_SW_PC_CHANGE = 0xD, - ARMV6_PERFCTR_MAIN_TLB_MISS = 0xF, - ARMV6_PERFCTR_EXPL_D_ACCESS = 0x10, - ARMV6_PERFCTR_LSU_FULL_STALL = 0x11, - ARMV6_PERFCTR_WBUF_DRAINED = 0x12, - ARMV6_PERFCTR_CPU_CYCLES = 0xFF, - ARMV6_PERFCTR_NOP = 0x20, -}; - -enum armv6_counters { - ARMV6_CYCLE_COUNTER = 1, - ARMV6_COUNTER0, - ARMV6_COUNTER1, -}; - -/* - * The hardware events that we support. We do support cache operations but - * we have harvard caches and no way to combine instruction and data - * accesses/misses in hardware. - */ -static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = { - [PERF_COUNT_HW_CPU_CYCLES] = ARMV6_PERFCTR_CPU_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6_PERFCTR_INSTR_EXEC, - [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6_PERFCTR_BR_EXEC, - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6_PERFCTR_BR_MISPREDICT, - [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, -}; - -static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - [C(L1D)] = { - /* - * The performance counters don't differentiate between read - * and write accesses/misses so this isn't strictly correct, - * but it's the best we can do. Writes and reads get - * combined. - */ - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS, - [C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS, - [C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(L1I)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(LL)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(DTLB)] = { - /* - * The ARM performance counters can count micro DTLB misses, - * micro ITLB misses and main TLB misses. There isn't an event - * for TLB misses, so use the micro misses here and if users - * want the main TLB misses they can use a raw counter. - */ - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(ITLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(BPU)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, -}; - -enum armv6mpcore_perf_types { - ARMV6MPCORE_PERFCTR_ICACHE_MISS = 0x0, - ARMV6MPCORE_PERFCTR_IBUF_STALL = 0x1, - ARMV6MPCORE_PERFCTR_DDEP_STALL = 0x2, - ARMV6MPCORE_PERFCTR_ITLB_MISS = 0x3, - ARMV6MPCORE_PERFCTR_DTLB_MISS = 0x4, - ARMV6MPCORE_PERFCTR_BR_EXEC = 0x5, - ARMV6MPCORE_PERFCTR_BR_NOTPREDICT = 0x6, - ARMV6MPCORE_PERFCTR_BR_MISPREDICT = 0x7, - ARMV6MPCORE_PERFCTR_INSTR_EXEC = 0x8, - ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS = 0xA, - ARMV6MPCORE_PERFCTR_DCACHE_RDMISS = 0xB, - ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS = 0xC, - ARMV6MPCORE_PERFCTR_DCACHE_WRMISS = 0xD, - ARMV6MPCORE_PERFCTR_DCACHE_EVICTION = 0xE, - ARMV6MPCORE_PERFCTR_SW_PC_CHANGE = 0xF, - ARMV6MPCORE_PERFCTR_MAIN_TLB_MISS = 0x10, - ARMV6MPCORE_PERFCTR_EXPL_MEM_ACCESS = 0x11, - ARMV6MPCORE_PERFCTR_LSU_FULL_STALL = 0x12, - ARMV6MPCORE_PERFCTR_WBUF_DRAINED = 0x13, - ARMV6MPCORE_PERFCTR_CPU_CYCLES = 0xFF, -}; - -/* - * The hardware events that we support. We do support cache operations but - * we have harvard caches and no way to combine instruction and data - * accesses/misses in hardware. - */ -static const unsigned armv6mpcore_perf_map[PERF_COUNT_HW_MAX] = { - [PERF_COUNT_HW_CPU_CYCLES] = ARMV6MPCORE_PERFCTR_CPU_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_INSTR_EXEC, - [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_BR_EXEC, - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6MPCORE_PERFCTR_BR_MISPREDICT, - [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, -}; - -static const unsigned armv6mpcore_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - [C(L1D)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = - ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS, - [C(RESULT_MISS)] = - ARMV6MPCORE_PERFCTR_DCACHE_RDMISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = - ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS, - [C(RESULT_MISS)] = - ARMV6MPCORE_PERFCTR_DCACHE_WRMISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(L1I)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(LL)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(DTLB)] = { - /* - * The ARM performance counters can count micro DTLB misses, - * micro ITLB misses and main TLB misses. There isn't an event - * for TLB misses, so use the micro misses here and if users - * want the main TLB misses they can use a raw counter. - */ - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(ITLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(BPU)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, -}; - -static inline unsigned long -armv6_pmcr_read(void) -{ - u32 val; - asm volatile("mrc p15, 0, %0, c15, c12, 0" : "=r"(val)); - return val; -} - -static inline void -armv6_pmcr_write(unsigned long val) -{ - asm volatile("mcr p15, 0, %0, c15, c12, 0" : : "r"(val)); -} - -#define ARMV6_PMCR_ENABLE (1 << 0) -#define ARMV6_PMCR_CTR01_RESET (1 << 1) -#define ARMV6_PMCR_CCOUNT_RESET (1 << 2) -#define ARMV6_PMCR_CCOUNT_DIV (1 << 3) -#define ARMV6_PMCR_COUNT0_IEN (1 << 4) -#define ARMV6_PMCR_COUNT1_IEN (1 << 5) -#define ARMV6_PMCR_CCOUNT_IEN (1 << 6) -#define ARMV6_PMCR_COUNT0_OVERFLOW (1 << 8) -#define ARMV6_PMCR_COUNT1_OVERFLOW (1 << 9) -#define ARMV6_PMCR_CCOUNT_OVERFLOW (1 << 10) -#define ARMV6_PMCR_EVT_COUNT0_SHIFT 20 -#define ARMV6_PMCR_EVT_COUNT0_MASK (0xFF << ARMV6_PMCR_EVT_COUNT0_SHIFT) -#define ARMV6_PMCR_EVT_COUNT1_SHIFT 12 -#define ARMV6_PMCR_EVT_COUNT1_MASK (0xFF << ARMV6_PMCR_EVT_COUNT1_SHIFT) - -#define ARMV6_PMCR_OVERFLOWED_MASK \ - (ARMV6_PMCR_COUNT0_OVERFLOW | ARMV6_PMCR_COUNT1_OVERFLOW | \ - ARMV6_PMCR_CCOUNT_OVERFLOW) - -static inline int -armv6_pmcr_has_overflowed(unsigned long pmcr) -{ - return (pmcr & ARMV6_PMCR_OVERFLOWED_MASK); -} - -static inline int -armv6_pmcr_counter_has_overflowed(unsigned long pmcr, - enum armv6_counters counter) -{ - int ret = 0; - - if (ARMV6_CYCLE_COUNTER == counter) - ret = pmcr & ARMV6_PMCR_CCOUNT_OVERFLOW; - else if (ARMV6_COUNTER0 == counter) - ret = pmcr & ARMV6_PMCR_COUNT0_OVERFLOW; - else if (ARMV6_COUNTER1 == counter) - ret = pmcr & ARMV6_PMCR_COUNT1_OVERFLOW; - else - WARN_ONCE(1, "invalid counter number (%d)\n", counter); - - return ret; -} - -static inline u32 -armv6pmu_read_counter(int counter) -{ - unsigned long value = 0; - - if (ARMV6_CYCLE_COUNTER == counter) - asm volatile("mrc p15, 0, %0, c15, c12, 1" : "=r"(value)); - else if (ARMV6_COUNTER0 == counter) - asm volatile("mrc p15, 0, %0, c15, c12, 2" : "=r"(value)); - else if (ARMV6_COUNTER1 == counter) - asm volatile("mrc p15, 0, %0, c15, c12, 3" : "=r"(value)); - else - WARN_ONCE(1, "invalid counter number (%d)\n", counter); - - return value; -} - -static inline void -armv6pmu_write_counter(int counter, - u32 value) -{ - if (ARMV6_CYCLE_COUNTER == counter) - asm volatile("mcr p15, 0, %0, c15, c12, 1" : : "r"(value)); - else if (ARMV6_COUNTER0 == counter) - asm volatile("mcr p15, 0, %0, c15, c12, 2" : : "r"(value)); - else if (ARMV6_COUNTER1 == counter) - asm volatile("mcr p15, 0, %0, c15, c12, 3" : : "r"(value)); - else - WARN_ONCE(1, "invalid counter number (%d)\n", counter); -} - -void -armv6pmu_enable_event(struct hw_perf_event *hwc, - int idx) -{ - unsigned long val, mask, evt, flags; - - if (ARMV6_CYCLE_COUNTER == idx) { - mask = 0; - evt = ARMV6_PMCR_CCOUNT_IEN; - } else if (ARMV6_COUNTER0 == idx) { - mask = ARMV6_PMCR_EVT_COUNT0_MASK; - evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT0_SHIFT) | - ARMV6_PMCR_COUNT0_IEN; - } else if (ARMV6_COUNTER1 == idx) { - mask = ARMV6_PMCR_EVT_COUNT1_MASK; - evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT1_SHIFT) | - ARMV6_PMCR_COUNT1_IEN; - } else { - WARN_ONCE(1, "invalid counter number (%d)\n", idx); - return; - } - - /* - * Mask out the current event and set the counter to count the event - * that we're interested in. - */ - spin_lock_irqsave(&pmu_lock, flags); - val = armv6_pmcr_read(); - val &= ~mask; - val |= evt; - armv6_pmcr_write(val); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static irqreturn_t -armv6pmu_handle_irq(int irq_num, - void *dev) -{ - unsigned long pmcr = armv6_pmcr_read(); - struct perf_sample_data data; - struct cpu_hw_events *cpuc; - struct pt_regs *regs; - int idx; - - if (!armv6_pmcr_has_overflowed(pmcr)) - return IRQ_NONE; - - regs = get_irq_regs(); - - /* - * The interrupts are cleared by writing the overflow flags back to - * the control register. All of the other bits don't have any effect - * if they are rewritten, so write the whole value back. - */ - armv6_pmcr_write(pmcr); - - perf_sample_data_init(&data, 0); - - cpuc = &__get_cpu_var(cpu_hw_events); - for (idx = 0; idx <= armpmu->num_events; ++idx) { - struct perf_event *event = cpuc->events[idx]; - struct hw_perf_event *hwc; - - if (!test_bit(idx, cpuc->active_mask)) - continue; - - /* - * We have a single interrupt for all counters. Check that - * each counter has overflowed before we process it. - */ - if (!armv6_pmcr_counter_has_overflowed(pmcr, idx)) - continue; - - hwc = &event->hw; - armpmu_event_update(event, hwc, idx); - data.period = event->hw.last_period; - if (!armpmu_event_set_period(event, hwc, idx)) - continue; - - if (perf_event_overflow(event, 0, &data, regs)) - armpmu->disable(hwc, idx); - } - - /* - * Handle the pending perf events. - * - * Note: this call *must* be run with interrupts disabled. For - * platforms that can have the PMU interrupts raised as an NMI, this - * will not work. - */ - irq_work_run(); - - return IRQ_HANDLED; -} - -static void -armv6pmu_start(void) -{ - unsigned long flags, val; - - spin_lock_irqsave(&pmu_lock, flags); - val = armv6_pmcr_read(); - val |= ARMV6_PMCR_ENABLE; - armv6_pmcr_write(val); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -void -armv6pmu_stop(void) -{ - unsigned long flags, val; - - spin_lock_irqsave(&pmu_lock, flags); - val = armv6_pmcr_read(); - val &= ~ARMV6_PMCR_ENABLE; - armv6_pmcr_write(val); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static inline int -armv6pmu_event_map(int config) -{ - int mapping = armv6_perf_map[config]; - if (HW_OP_UNSUPPORTED == mapping) - mapping = -EOPNOTSUPP; - return mapping; -} - -static inline int -armv6mpcore_pmu_event_map(int config) -{ - int mapping = armv6mpcore_perf_map[config]; - if (HW_OP_UNSUPPORTED == mapping) - mapping = -EOPNOTSUPP; - return mapping; -} - -static u64 -armv6pmu_raw_event(u64 config) -{ - return config & 0xff; -} - -static int -armv6pmu_get_event_idx(struct cpu_hw_events *cpuc, - struct hw_perf_event *event) -{ - /* Always place a cycle counter into the cycle counter. */ - if (ARMV6_PERFCTR_CPU_CYCLES == event->config_base) { - if (test_and_set_bit(ARMV6_CYCLE_COUNTER, cpuc->used_mask)) - return -EAGAIN; - - return ARMV6_CYCLE_COUNTER; - } else { - /* - * For anything other than a cycle counter, try and use - * counter0 and counter1. - */ - if (!test_and_set_bit(ARMV6_COUNTER1, cpuc->used_mask)) { - return ARMV6_COUNTER1; - } - - if (!test_and_set_bit(ARMV6_COUNTER0, cpuc->used_mask)) { - return ARMV6_COUNTER0; - } - - /* The counters are all in use. */ - return -EAGAIN; - } -} - -static void -armv6pmu_disable_event(struct hw_perf_event *hwc, - int idx) -{ - unsigned long val, mask, evt, flags; - - if (ARMV6_CYCLE_COUNTER == idx) { - mask = ARMV6_PMCR_CCOUNT_IEN; - evt = 0; - } else if (ARMV6_COUNTER0 == idx) { - mask = ARMV6_PMCR_COUNT0_IEN | ARMV6_PMCR_EVT_COUNT0_MASK; - evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT0_SHIFT; - } else if (ARMV6_COUNTER1 == idx) { - mask = ARMV6_PMCR_COUNT1_IEN | ARMV6_PMCR_EVT_COUNT1_MASK; - evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT1_SHIFT; - } else { - WARN_ONCE(1, "invalid counter number (%d)\n", idx); - return; - } - - /* - * Mask out the current event and set the counter to count the number - * of ETM bus signal assertion cycles. The external reporting should - * be disabled and so this should never increment. - */ - spin_lock_irqsave(&pmu_lock, flags); - val = armv6_pmcr_read(); - val &= ~mask; - val |= evt; - armv6_pmcr_write(val); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static void -armv6mpcore_pmu_disable_event(struct hw_perf_event *hwc, - int idx) -{ - unsigned long val, mask, flags, evt = 0; - - if (ARMV6_CYCLE_COUNTER == idx) { - mask = ARMV6_PMCR_CCOUNT_IEN; - } else if (ARMV6_COUNTER0 == idx) { - mask = ARMV6_PMCR_COUNT0_IEN; - } else if (ARMV6_COUNTER1 == idx) { - mask = ARMV6_PMCR_COUNT1_IEN; - } else { - WARN_ONCE(1, "invalid counter number (%d)\n", idx); - return; - } - - /* - * Unlike UP ARMv6, we don't have a way of stopping the counters. We - * simply disable the interrupt reporting. - */ - spin_lock_irqsave(&pmu_lock, flags); - val = armv6_pmcr_read(); - val &= ~mask; - val |= evt; - armv6_pmcr_write(val); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static const struct arm_pmu armv6pmu = { - .id = ARM_PERF_PMU_ID_V6, - .handle_irq = armv6pmu_handle_irq, - .enable = armv6pmu_enable_event, - .disable = armv6pmu_disable_event, - .event_map = armv6pmu_event_map, - .raw_event = armv6pmu_raw_event, - .read_counter = armv6pmu_read_counter, - .write_counter = armv6pmu_write_counter, - .get_event_idx = armv6pmu_get_event_idx, - .start = armv6pmu_start, - .stop = armv6pmu_stop, - .num_events = 3, - .max_period = (1LLU << 32) - 1, -}; - -/* - * ARMv6mpcore is almost identical to single core ARMv6 with the exception - * that some of the events have different enumerations and that there is no - * *hack* to stop the programmable counters. To stop the counters we simply - * disable the interrupt reporting and update the event. When unthrottling we - * reset the period and enable the interrupt reporting. - */ -static const struct arm_pmu armv6mpcore_pmu = { - .id = ARM_PERF_PMU_ID_V6MP, - .handle_irq = armv6pmu_handle_irq, - .enable = armv6pmu_enable_event, - .disable = armv6mpcore_pmu_disable_event, - .event_map = armv6mpcore_pmu_event_map, - .raw_event = armv6pmu_raw_event, - .read_counter = armv6pmu_read_counter, - .write_counter = armv6pmu_write_counter, - .get_event_idx = armv6pmu_get_event_idx, - .start = armv6pmu_start, - .stop = armv6pmu_stop, - .num_events = 3, - .max_period = (1LLU << 32) - 1, -}; - -/* - * ARMv7 Cortex-A8 and Cortex-A9 Performance Events handling code. - * - * Copied from ARMv6 code, with the low level code inspired - * by the ARMv7 Oprofile code. - * - * Cortex-A8 has up to 4 configurable performance counters and - * a single cycle counter. - * Cortex-A9 has up to 31 configurable performance counters and - * a single cycle counter. - * - * All counters can be enabled/disabled and IRQ masked separately. The cycle - * counter and all 4 performance counters together can be reset separately. - */ - -/* Common ARMv7 event types */ -enum armv7_perf_types { - ARMV7_PERFCTR_PMNC_SW_INCR = 0x00, - ARMV7_PERFCTR_IFETCH_MISS = 0x01, - ARMV7_PERFCTR_ITLB_MISS = 0x02, - ARMV7_PERFCTR_DCACHE_REFILL = 0x03, - ARMV7_PERFCTR_DCACHE_ACCESS = 0x04, - ARMV7_PERFCTR_DTLB_REFILL = 0x05, - ARMV7_PERFCTR_DREAD = 0x06, - ARMV7_PERFCTR_DWRITE = 0x07, - - ARMV7_PERFCTR_EXC_TAKEN = 0x09, - ARMV7_PERFCTR_EXC_EXECUTED = 0x0A, - ARMV7_PERFCTR_CID_WRITE = 0x0B, - /* ARMV7_PERFCTR_PC_WRITE is equivalent to HW_BRANCH_INSTRUCTIONS. - * It counts: - * - all branch instructions, - * - instructions that explicitly write the PC, - * - exception generating instructions. - */ - ARMV7_PERFCTR_PC_WRITE = 0x0C, - ARMV7_PERFCTR_PC_IMM_BRANCH = 0x0D, - ARMV7_PERFCTR_UNALIGNED_ACCESS = 0x0F, - ARMV7_PERFCTR_PC_BRANCH_MIS_PRED = 0x10, - ARMV7_PERFCTR_CLOCK_CYCLES = 0x11, - - ARMV7_PERFCTR_PC_BRANCH_MIS_USED = 0x12, - - ARMV7_PERFCTR_CPU_CYCLES = 0xFF -}; - -/* ARMv7 Cortex-A8 specific event types */ -enum armv7_a8_perf_types { - ARMV7_PERFCTR_INSTR_EXECUTED = 0x08, - - ARMV7_PERFCTR_PC_PROC_RETURN = 0x0E, - - ARMV7_PERFCTR_WRITE_BUFFER_FULL = 0x40, - ARMV7_PERFCTR_L2_STORE_MERGED = 0x41, - ARMV7_PERFCTR_L2_STORE_BUFF = 0x42, - ARMV7_PERFCTR_L2_ACCESS = 0x43, - ARMV7_PERFCTR_L2_CACH_MISS = 0x44, - ARMV7_PERFCTR_AXI_READ_CYCLES = 0x45, - ARMV7_PERFCTR_AXI_WRITE_CYCLES = 0x46, - ARMV7_PERFCTR_MEMORY_REPLAY = 0x47, - ARMV7_PERFCTR_UNALIGNED_ACCESS_REPLAY = 0x48, - ARMV7_PERFCTR_L1_DATA_MISS = 0x49, - ARMV7_PERFCTR_L1_INST_MISS = 0x4A, - ARMV7_PERFCTR_L1_DATA_COLORING = 0x4B, - ARMV7_PERFCTR_L1_NEON_DATA = 0x4C, - ARMV7_PERFCTR_L1_NEON_CACH_DATA = 0x4D, - ARMV7_PERFCTR_L2_NEON = 0x4E, - ARMV7_PERFCTR_L2_NEON_HIT = 0x4F, - ARMV7_PERFCTR_L1_INST = 0x50, - ARMV7_PERFCTR_PC_RETURN_MIS_PRED = 0x51, - ARMV7_PERFCTR_PC_BRANCH_FAILED = 0x52, - ARMV7_PERFCTR_PC_BRANCH_TAKEN = 0x53, - ARMV7_PERFCTR_PC_BRANCH_EXECUTED = 0x54, - ARMV7_PERFCTR_OP_EXECUTED = 0x55, - ARMV7_PERFCTR_CYCLES_INST_STALL = 0x56, - ARMV7_PERFCTR_CYCLES_INST = 0x57, - ARMV7_PERFCTR_CYCLES_NEON_DATA_STALL = 0x58, - ARMV7_PERFCTR_CYCLES_NEON_INST_STALL = 0x59, - ARMV7_PERFCTR_NEON_CYCLES = 0x5A, - - ARMV7_PERFCTR_PMU0_EVENTS = 0x70, - ARMV7_PERFCTR_PMU1_EVENTS = 0x71, - ARMV7_PERFCTR_PMU_EVENTS = 0x72, -}; - -/* ARMv7 Cortex-A9 specific event types */ -enum armv7_a9_perf_types { - ARMV7_PERFCTR_JAVA_HW_BYTECODE_EXEC = 0x40, - ARMV7_PERFCTR_JAVA_SW_BYTECODE_EXEC = 0x41, - ARMV7_PERFCTR_JAZELLE_BRANCH_EXEC = 0x42, - - ARMV7_PERFCTR_COHERENT_LINE_MISS = 0x50, - ARMV7_PERFCTR_COHERENT_LINE_HIT = 0x51, - - ARMV7_PERFCTR_ICACHE_DEP_STALL_CYCLES = 0x60, - ARMV7_PERFCTR_DCACHE_DEP_STALL_CYCLES = 0x61, - ARMV7_PERFCTR_TLB_MISS_DEP_STALL_CYCLES = 0x62, - ARMV7_PERFCTR_STREX_EXECUTED_PASSED = 0x63, - ARMV7_PERFCTR_STREX_EXECUTED_FAILED = 0x64, - ARMV7_PERFCTR_DATA_EVICTION = 0x65, - ARMV7_PERFCTR_ISSUE_STAGE_NO_INST = 0x66, - ARMV7_PERFCTR_ISSUE_STAGE_EMPTY = 0x67, - ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE = 0x68, - - ARMV7_PERFCTR_PREDICTABLE_FUNCT_RETURNS = 0x6E, - - ARMV7_PERFCTR_MAIN_UNIT_EXECUTED_INST = 0x70, - ARMV7_PERFCTR_SECOND_UNIT_EXECUTED_INST = 0x71, - ARMV7_PERFCTR_LD_ST_UNIT_EXECUTED_INST = 0x72, - ARMV7_PERFCTR_FP_EXECUTED_INST = 0x73, - ARMV7_PERFCTR_NEON_EXECUTED_INST = 0x74, - - ARMV7_PERFCTR_PLD_FULL_DEP_STALL_CYCLES = 0x80, - ARMV7_PERFCTR_DATA_WR_DEP_STALL_CYCLES = 0x81, - ARMV7_PERFCTR_ITLB_MISS_DEP_STALL_CYCLES = 0x82, - ARMV7_PERFCTR_DTLB_MISS_DEP_STALL_CYCLES = 0x83, - ARMV7_PERFCTR_MICRO_ITLB_MISS_DEP_STALL_CYCLES = 0x84, - ARMV7_PERFCTR_MICRO_DTLB_MISS_DEP_STALL_CYCLES = 0x85, - ARMV7_PERFCTR_DMB_DEP_STALL_CYCLES = 0x86, - - ARMV7_PERFCTR_INTGR_CLK_ENABLED_CYCLES = 0x8A, - ARMV7_PERFCTR_DATA_ENGINE_CLK_EN_CYCLES = 0x8B, - - ARMV7_PERFCTR_ISB_INST = 0x90, - ARMV7_PERFCTR_DSB_INST = 0x91, - ARMV7_PERFCTR_DMB_INST = 0x92, - ARMV7_PERFCTR_EXT_INTERRUPTS = 0x93, - - ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_COMPLETED = 0xA0, - ARMV7_PERFCTR_PLE_CACHE_LINE_RQST_SKIPPED = 0xA1, - ARMV7_PERFCTR_PLE_FIFO_FLUSH = 0xA2, - ARMV7_PERFCTR_PLE_RQST_COMPLETED = 0xA3, - ARMV7_PERFCTR_PLE_FIFO_OVERFLOW = 0xA4, - ARMV7_PERFCTR_PLE_RQST_PROG = 0xA5 -}; - -/* - * Cortex-A8 HW events mapping - * - * The hardware events that we support. We do support cache operations but - * we have harvard caches and no way to combine instruction and data - * accesses/misses in hardware. - */ -static const unsigned armv7_a8_perf_map[PERF_COUNT_HW_MAX] = { - [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED, - [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES, -}; - -static const unsigned armv7_a8_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - [C(L1D)] = { - /* - * The performance counters don't differentiate between read - * and write accesses/misses so this isn't strictly correct, - * but it's the best we can do. Writes and reads get - * combined. - */ - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, - [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, - [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(L1I)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_INST, - [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_INST_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_INST, - [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_INST_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(LL)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_ACCESS, - [C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACH_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_ACCESS, - [C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACH_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(DTLB)] = { - /* - * Only ITLB misses and DTLB refills are supported. - * If users want the DTLB refills misses a raw counter - * must be used. - */ - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(ITLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(BPU)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, - [C(RESULT_MISS)] - = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, - [C(RESULT_MISS)] - = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, -}; - -/* - * Cortex-A9 HW events mapping - */ -static const unsigned armv7_a9_perf_map[PERF_COUNT_HW_MAX] = { - [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, - [PERF_COUNT_HW_INSTRUCTIONS] = - ARMV7_PERFCTR_INST_OUT_OF_RENAME_STAGE, - [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_COHERENT_LINE_HIT, - [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_COHERENT_LINE_MISS, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, - [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES, -}; - -static const unsigned armv7_a9_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - [C(L1D)] = { - /* - * The performance counters don't differentiate between read - * and write accesses/misses so this isn't strictly correct, - * but it's the best we can do. Writes and reads get - * combined. - */ - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, - [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_DCACHE_ACCESS, - [C(RESULT_MISS)] = ARMV7_PERFCTR_DCACHE_REFILL, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(L1I)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_IFETCH_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_IFETCH_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(LL)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(DTLB)] = { - /* - * Only ITLB misses and DTLB refills are supported. - * If users want the DTLB refills misses a raw counter - * must be used. - */ - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(ITLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(BPU)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, - [C(RESULT_MISS)] - = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_WRITE, - [C(RESULT_MISS)] - = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, -}; - -/* - * Perf Events counters - */ -enum armv7_counters { - ARMV7_CYCLE_COUNTER = 1, /* Cycle counter */ - ARMV7_COUNTER0 = 2, /* First event counter */ -}; - -/* - * The cycle counter is ARMV7_CYCLE_COUNTER. - * The first event counter is ARMV7_COUNTER0. - * The last event counter is (ARMV7_COUNTER0 + armpmu->num_events - 1). - */ -#define ARMV7_COUNTER_LAST (ARMV7_COUNTER0 + armpmu->num_events - 1) - -/* - * ARMv7 low level PMNC access - */ - -/* - * Per-CPU PMNC: config reg - */ -#define ARMV7_PMNC_E (1 << 0) /* Enable all counters */ -#define ARMV7_PMNC_P (1 << 1) /* Reset all counters */ -#define ARMV7_PMNC_C (1 << 2) /* Cycle counter reset */ -#define ARMV7_PMNC_D (1 << 3) /* CCNT counts every 64th cpu cycle */ -#define ARMV7_PMNC_X (1 << 4) /* Export to ETM */ -#define ARMV7_PMNC_DP (1 << 5) /* Disable CCNT if non-invasive debug*/ -#define ARMV7_PMNC_N_SHIFT 11 /* Number of counters supported */ -#define ARMV7_PMNC_N_MASK 0x1f -#define ARMV7_PMNC_MASK 0x3f /* Mask for writable bits */ - -/* - * Available counters - */ -#define ARMV7_CNT0 0 /* First event counter */ -#define ARMV7_CCNT 31 /* Cycle counter */ - -/* Perf Event to low level counters mapping */ -#define ARMV7_EVENT_CNT_TO_CNTx (ARMV7_COUNTER0 - ARMV7_CNT0) - -/* - * CNTENS: counters enable reg - */ -#define ARMV7_CNTENS_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) -#define ARMV7_CNTENS_C (1 << ARMV7_CCNT) - -/* - * CNTENC: counters disable reg - */ -#define ARMV7_CNTENC_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) -#define ARMV7_CNTENC_C (1 << ARMV7_CCNT) - -/* - * INTENS: counters overflow interrupt enable reg - */ -#define ARMV7_INTENS_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) -#define ARMV7_INTENS_C (1 << ARMV7_CCNT) - -/* - * INTENC: counters overflow interrupt disable reg - */ -#define ARMV7_INTENC_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) -#define ARMV7_INTENC_C (1 << ARMV7_CCNT) - -/* - * EVTSEL: Event selection reg - */ -#define ARMV7_EVTSEL_MASK 0xff /* Mask for writable bits */ - -/* - * SELECT: Counter selection reg - */ -#define ARMV7_SELECT_MASK 0x1f /* Mask for writable bits */ - -/* - * FLAG: counters overflow flag status reg - */ -#define ARMV7_FLAG_P(idx) (1 << (idx - ARMV7_EVENT_CNT_TO_CNTx)) -#define ARMV7_FLAG_C (1 << ARMV7_CCNT) -#define ARMV7_FLAG_MASK 0xffffffff /* Mask for writable bits */ -#define ARMV7_OVERFLOWED_MASK ARMV7_FLAG_MASK - -static inline unsigned long armv7_pmnc_read(void) -{ - u32 val; - asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(val)); - return val; + struct arm_pmu *armpmu = to_arm_pmu(pmu); + armpmu->stop(armpmu); } -static inline void armv7_pmnc_write(unsigned long val) +#ifdef CONFIG_PM_RUNTIME +static int armpmu_runtime_resume(struct device *dev) { - val &= ARMV7_PMNC_MASK; - asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(val)); -} + struct arm_pmu_platdata *plat = dev_get_platdata(dev); -static inline int armv7_pmnc_has_overflowed(unsigned long pmnc) -{ - return pmnc & ARMV7_OVERFLOWED_MASK; -} + if (plat && plat->runtime_resume) + return plat->runtime_resume(dev); -static inline int armv7_pmnc_counter_has_overflowed(unsigned long pmnc, - enum armv7_counters counter) -{ - int ret = 0; - - if (counter == ARMV7_CYCLE_COUNTER) - ret = pmnc & ARMV7_FLAG_C; - else if ((counter >= ARMV7_COUNTER0) && (counter <= ARMV7_COUNTER_LAST)) - ret = pmnc & ARMV7_FLAG_P(counter); - else - pr_err("CPU%u checking wrong counter %d overflow status\n", - smp_processor_id(), counter); - - return ret; -} - -static inline int armv7_pmnc_select_counter(unsigned int idx) -{ - u32 val; - - if ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST)) { - pr_err("CPU%u selecting wrong PMNC counter" - " %d\n", smp_processor_id(), idx); - return -1; - } - - val = (idx - ARMV7_EVENT_CNT_TO_CNTx) & ARMV7_SELECT_MASK; - asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (val)); - - return idx; -} - -static inline u32 armv7pmu_read_counter(int idx) -{ - unsigned long value = 0; - - if (idx == ARMV7_CYCLE_COUNTER) - asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (value)); - else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) { - if (armv7_pmnc_select_counter(idx) == idx) - asm volatile("mrc p15, 0, %0, c9, c13, 2" - : "=r" (value)); - } else - pr_err("CPU%u reading wrong counter %d\n", - smp_processor_id(), idx); - - return value; -} - -static inline void armv7pmu_write_counter(int idx, u32 value) -{ - if (idx == ARMV7_CYCLE_COUNTER) - asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (value)); - else if ((idx >= ARMV7_COUNTER0) && (idx <= ARMV7_COUNTER_LAST)) { - if (armv7_pmnc_select_counter(idx) == idx) - asm volatile("mcr p15, 0, %0, c9, c13, 2" - : : "r" (value)); - } else - pr_err("CPU%u writing wrong counter %d\n", - smp_processor_id(), idx); -} - -static inline void armv7_pmnc_write_evtsel(unsigned int idx, u32 val) -{ - if (armv7_pmnc_select_counter(idx) == idx) { - val &= ARMV7_EVTSEL_MASK; - asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val)); - } -} - -static inline u32 armv7_pmnc_enable_counter(unsigned int idx) -{ - u32 val; - - if ((idx != ARMV7_CYCLE_COUNTER) && - ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { - pr_err("CPU%u enabling wrong PMNC counter" - " %d\n", smp_processor_id(), idx); - return -1; - } - - if (idx == ARMV7_CYCLE_COUNTER) - val = ARMV7_CNTENS_C; - else - val = ARMV7_CNTENS_P(idx); - - asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (val)); - - return idx; -} - -static inline u32 armv7_pmnc_disable_counter(unsigned int idx) -{ - u32 val; - - - if ((idx != ARMV7_CYCLE_COUNTER) && - ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { - pr_err("CPU%u disabling wrong PMNC counter" - " %d\n", smp_processor_id(), idx); - return -1; - } - - if (idx == ARMV7_CYCLE_COUNTER) - val = ARMV7_CNTENC_C; - else - val = ARMV7_CNTENC_P(idx); - - asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (val)); - - return idx; -} - -static inline u32 armv7_pmnc_enable_intens(unsigned int idx) -{ - u32 val; - - if ((idx != ARMV7_CYCLE_COUNTER) && - ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { - pr_err("CPU%u enabling wrong PMNC counter" - " interrupt enable %d\n", smp_processor_id(), idx); - return -1; - } - - if (idx == ARMV7_CYCLE_COUNTER) - val = ARMV7_INTENS_C; - else - val = ARMV7_INTENS_P(idx); - - asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (val)); - - return idx; -} - -static inline u32 armv7_pmnc_disable_intens(unsigned int idx) -{ - u32 val; - - if ((idx != ARMV7_CYCLE_COUNTER) && - ((idx < ARMV7_COUNTER0) || (idx > ARMV7_COUNTER_LAST))) { - pr_err("CPU%u disabling wrong PMNC counter" - " interrupt enable %d\n", smp_processor_id(), idx); - return -1; - } - - if (idx == ARMV7_CYCLE_COUNTER) - val = ARMV7_INTENC_C; - else - val = ARMV7_INTENC_P(idx); - - asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (val)); - - return idx; -} - -static inline u32 armv7_pmnc_getreset_flags(void) -{ - u32 val; - - /* Read */ - asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val)); - - /* Write to clear flags */ - val &= ARMV7_FLAG_MASK; - asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (val)); - - return val; + return 0; } -#ifdef DEBUG -static void armv7_pmnc_dump_regs(void) +static int armpmu_runtime_suspend(struct device *dev) { - u32 val; - unsigned int cnt; - - printk(KERN_INFO "PMNC registers dump:\n"); - - asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (val)); - printk(KERN_INFO "PMNC =0x%08x\n", val); - - asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (val)); - printk(KERN_INFO "CNTENS=0x%08x\n", val); + struct arm_pmu_platdata *plat = dev_get_platdata(dev); - asm volatile("mrc p15, 0, %0, c9, c14, 1" : "=r" (val)); - printk(KERN_INFO "INTENS=0x%08x\n", val); + if (plat && plat->runtime_suspend) + return plat->runtime_suspend(dev); - asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val)); - printk(KERN_INFO "FLAGS =0x%08x\n", val); - - asm volatile("mrc p15, 0, %0, c9, c12, 5" : "=r" (val)); - printk(KERN_INFO "SELECT=0x%08x\n", val); - - asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val)); - printk(KERN_INFO "CCNT =0x%08x\n", val); - - for (cnt = ARMV7_COUNTER0; cnt < ARMV7_COUNTER_LAST; cnt++) { - armv7_pmnc_select_counter(cnt); - asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (val)); - printk(KERN_INFO "CNT[%d] count =0x%08x\n", - cnt-ARMV7_EVENT_CNT_TO_CNTx, val); - asm volatile("mrc p15, 0, %0, c9, c13, 1" : "=r" (val)); - printk(KERN_INFO "CNT[%d] evtsel=0x%08x\n", - cnt-ARMV7_EVENT_CNT_TO_CNTx, val); - } + return 0; } #endif -void armv7pmu_enable_event(struct hw_perf_event *hwc, int idx) -{ - unsigned long flags; - - /* - * Enable counter and interrupt, and set the counter to count - * the event that we're interested in. - */ - spin_lock_irqsave(&pmu_lock, flags); - - /* - * Disable counter - */ - armv7_pmnc_disable_counter(idx); - - /* - * Set event (if destined for PMNx counters) - * We don't need to set the event if it's a cycle count - */ - if (idx != ARMV7_CYCLE_COUNTER) - armv7_pmnc_write_evtsel(idx, hwc->config_base); - - /* - * Enable interrupt for this counter - */ - armv7_pmnc_enable_intens(idx); - - /* - * Enable counter - */ - armv7_pmnc_enable_counter(idx); - - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static void armv7pmu_disable_event(struct hw_perf_event *hwc, int idx) -{ - unsigned long flags; - - /* - * Disable counter and interrupt - */ - spin_lock_irqsave(&pmu_lock, flags); - - /* - * Disable counter - */ - armv7_pmnc_disable_counter(idx); - - /* - * Disable interrupt for this counter - */ - armv7_pmnc_disable_intens(idx); - - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev) -{ - unsigned long pmnc; - struct perf_sample_data data; - struct cpu_hw_events *cpuc; - struct pt_regs *regs; - int idx; - - /* - * Get and reset the IRQ flags - */ - pmnc = armv7_pmnc_getreset_flags(); - - /* - * Did an overflow occur? - */ - if (!armv7_pmnc_has_overflowed(pmnc)) - return IRQ_NONE; - - /* - * Handle the counter(s) overflow(s) - */ - regs = get_irq_regs(); - - perf_sample_data_init(&data, 0); - - cpuc = &__get_cpu_var(cpu_hw_events); - for (idx = 0; idx <= armpmu->num_events; ++idx) { - struct perf_event *event = cpuc->events[idx]; - struct hw_perf_event *hwc; - - if (!test_bit(idx, cpuc->active_mask)) - continue; - - /* - * We have a single interrupt for all counters. Check that - * each counter has overflowed before we process it. - */ - if (!armv7_pmnc_counter_has_overflowed(pmnc, idx)) - continue; - - hwc = &event->hw; - armpmu_event_update(event, hwc, idx); - data.period = event->hw.last_period; - if (!armpmu_event_set_period(event, hwc, idx)) - continue; - - if (perf_event_overflow(event, 0, &data, regs)) - armpmu->disable(hwc, idx); - } - - /* - * Handle the pending perf events. - * - * Note: this call *must* be run with interrupts disabled. For - * platforms that can have the PMU interrupts raised as an NMI, this - * will not work. - */ - irq_work_run(); - - return IRQ_HANDLED; -} - -static void armv7pmu_start(void) -{ - unsigned long flags; - - spin_lock_irqsave(&pmu_lock, flags); - /* Enable all counters */ - armv7_pmnc_write(armv7_pmnc_read() | ARMV7_PMNC_E); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static void armv7pmu_stop(void) -{ - unsigned long flags; - - spin_lock_irqsave(&pmu_lock, flags); - /* Disable all counters */ - armv7_pmnc_write(armv7_pmnc_read() & ~ARMV7_PMNC_E); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static inline int armv7_a8_pmu_event_map(int config) -{ - int mapping = armv7_a8_perf_map[config]; - if (HW_OP_UNSUPPORTED == mapping) - mapping = -EOPNOTSUPP; - return mapping; -} - -static inline int armv7_a9_pmu_event_map(int config) -{ - int mapping = armv7_a9_perf_map[config]; - if (HW_OP_UNSUPPORTED == mapping) - mapping = -EOPNOTSUPP; - return mapping; -} - -static u64 armv7pmu_raw_event(u64 config) -{ - return config & 0xff; -} - -static int armv7pmu_get_event_idx(struct cpu_hw_events *cpuc, - struct hw_perf_event *event) -{ - int idx; - - /* Always place a cycle counter into the cycle counter. */ - if (event->config_base == ARMV7_PERFCTR_CPU_CYCLES) { - if (test_and_set_bit(ARMV7_CYCLE_COUNTER, cpuc->used_mask)) - return -EAGAIN; - - return ARMV7_CYCLE_COUNTER; - } else { - /* - * For anything other than a cycle counter, try and use - * the events counters - */ - for (idx = ARMV7_COUNTER0; idx <= armpmu->num_events; ++idx) { - if (!test_and_set_bit(idx, cpuc->used_mask)) - return idx; - } - - /* The counters are all in use. */ - return -EAGAIN; - } -} - -static struct arm_pmu armv7pmu = { - .handle_irq = armv7pmu_handle_irq, - .enable = armv7pmu_enable_event, - .disable = armv7pmu_disable_event, - .raw_event = armv7pmu_raw_event, - .read_counter = armv7pmu_read_counter, - .write_counter = armv7pmu_write_counter, - .get_event_idx = armv7pmu_get_event_idx, - .start = armv7pmu_start, - .stop = armv7pmu_stop, - .max_period = (1LLU << 32) - 1, +const struct dev_pm_ops armpmu_dev_pm_ops = { + SET_RUNTIME_PM_OPS(armpmu_runtime_suspend, armpmu_runtime_resume, NULL) }; -static u32 __init armv7_reset_read_pmnc(void) +static void armpmu_init(struct arm_pmu *armpmu) { - u32 nb_cnt; - - /* Initialize & Reset PMNC: C and P bits */ - armv7_pmnc_write(ARMV7_PMNC_P | ARMV7_PMNC_C); + atomic_set(&armpmu->active_events, 0); + mutex_init(&armpmu->reserve_mutex); - /* Read the nb of CNTx counters supported from PMNC */ - nb_cnt = (armv7_pmnc_read() >> ARMV7_PMNC_N_SHIFT) & ARMV7_PMNC_N_MASK; - - /* Add the CPU cycles counter and return */ - return nb_cnt + 1; + armpmu->pmu = (struct pmu) { + .pmu_enable = armpmu_enable, + .pmu_disable = armpmu_disable, + .event_init = armpmu_event_init, + .add = armpmu_add, + .del = armpmu_del, + .start = armpmu_start, + .stop = armpmu_stop, + .read = armpmu_read, + }; } -/* - * ARMv5 [xscale] Performance counter handling code. - * - * Based on xscale OProfile code. - * - * There are two variants of the xscale PMU that we support: - * - xscale1pmu: 2 event counters and a cycle counter - * - xscale2pmu: 4 event counters and a cycle counter - * The two variants share event definitions, but have different - * PMU structures. - */ - -enum xscale_perf_types { - XSCALE_PERFCTR_ICACHE_MISS = 0x00, - XSCALE_PERFCTR_ICACHE_NO_DELIVER = 0x01, - XSCALE_PERFCTR_DATA_STALL = 0x02, - XSCALE_PERFCTR_ITLB_MISS = 0x03, - XSCALE_PERFCTR_DTLB_MISS = 0x04, - XSCALE_PERFCTR_BRANCH = 0x05, - XSCALE_PERFCTR_BRANCH_MISS = 0x06, - XSCALE_PERFCTR_INSTRUCTION = 0x07, - XSCALE_PERFCTR_DCACHE_FULL_STALL = 0x08, - XSCALE_PERFCTR_DCACHE_FULL_STALL_CONTIG = 0x09, - XSCALE_PERFCTR_DCACHE_ACCESS = 0x0A, - XSCALE_PERFCTR_DCACHE_MISS = 0x0B, - XSCALE_PERFCTR_DCACHE_WRITE_BACK = 0x0C, - XSCALE_PERFCTR_PC_CHANGED = 0x0D, - XSCALE_PERFCTR_BCU_REQUEST = 0x10, - XSCALE_PERFCTR_BCU_FULL = 0x11, - XSCALE_PERFCTR_BCU_DRAIN = 0x12, - XSCALE_PERFCTR_BCU_ECC_NO_ELOG = 0x14, - XSCALE_PERFCTR_BCU_1_BIT_ERR = 0x15, - XSCALE_PERFCTR_RMW = 0x16, - /* XSCALE_PERFCTR_CCNT is not hardware defined */ - XSCALE_PERFCTR_CCNT = 0xFE, - XSCALE_PERFCTR_UNUSED = 0xFF, -}; - -enum xscale_counters { - XSCALE_CYCLE_COUNTER = 1, - XSCALE_COUNTER0, - XSCALE_COUNTER1, - XSCALE_COUNTER2, - XSCALE_COUNTER3, -}; - -static const unsigned xscale_perf_map[PERF_COUNT_HW_MAX] = { - [PERF_COUNT_HW_CPU_CYCLES] = XSCALE_PERFCTR_CCNT, - [PERF_COUNT_HW_INSTRUCTIONS] = XSCALE_PERFCTR_INSTRUCTION, - [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = XSCALE_PERFCTR_BRANCH, - [PERF_COUNT_HW_BRANCH_MISSES] = XSCALE_PERFCTR_BRANCH_MISS, - [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, -}; - -static const unsigned xscale_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = { - [C(L1D)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS, - [C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS, - [C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(L1I)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(LL)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(DTLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(ITLB)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, - [C(BPU)] = { - [C(OP_READ)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_WRITE)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - [C(OP_PREFETCH)] = { - [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, - [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, - }, - }, -}; - -#define XSCALE_PMU_ENABLE 0x001 -#define XSCALE_PMN_RESET 0x002 -#define XSCALE_CCNT_RESET 0x004 -#define XSCALE_PMU_RESET (CCNT_RESET | PMN_RESET) -#define XSCALE_PMU_CNT64 0x008 - -static inline int -xscalepmu_event_map(int config) -{ - int mapping = xscale_perf_map[config]; - if (HW_OP_UNSUPPORTED == mapping) - mapping = -EOPNOTSUPP; - return mapping; -} - -static u64 -xscalepmu_raw_event(u64 config) -{ - return config & 0xff; -} - -#define XSCALE1_OVERFLOWED_MASK 0x700 -#define XSCALE1_CCOUNT_OVERFLOW 0x400 -#define XSCALE1_COUNT0_OVERFLOW 0x100 -#define XSCALE1_COUNT1_OVERFLOW 0x200 -#define XSCALE1_CCOUNT_INT_EN 0x040 -#define XSCALE1_COUNT0_INT_EN 0x010 -#define XSCALE1_COUNT1_INT_EN 0x020 -#define XSCALE1_COUNT0_EVT_SHFT 12 -#define XSCALE1_COUNT0_EVT_MASK (0xff << XSCALE1_COUNT0_EVT_SHFT) -#define XSCALE1_COUNT1_EVT_SHFT 20 -#define XSCALE1_COUNT1_EVT_MASK (0xff << XSCALE1_COUNT1_EVT_SHFT) - -static inline u32 -xscale1pmu_read_pmnc(void) +int armpmu_register(struct arm_pmu *armpmu, int type) { - u32 val; - asm volatile("mrc p14, 0, %0, c0, c0, 0" : "=r" (val)); - return val; -} - -static inline void -xscale1pmu_write_pmnc(u32 val) -{ - /* upper 4bits and 7, 11 are write-as-0 */ - val &= 0xffff77f; - asm volatile("mcr p14, 0, %0, c0, c0, 0" : : "r" (val)); -} - -static inline int -xscale1_pmnc_counter_has_overflowed(unsigned long pmnc, - enum xscale_counters counter) -{ - int ret = 0; - - switch (counter) { - case XSCALE_CYCLE_COUNTER: - ret = pmnc & XSCALE1_CCOUNT_OVERFLOW; - break; - case XSCALE_COUNTER0: - ret = pmnc & XSCALE1_COUNT0_OVERFLOW; - break; - case XSCALE_COUNTER1: - ret = pmnc & XSCALE1_COUNT1_OVERFLOW; - break; - default: - WARN_ONCE(1, "invalid counter number (%d)\n", counter); - } - - return ret; + armpmu_init(armpmu); + pm_runtime_enable(&armpmu->plat_device->dev); + pr_info("enabled with %s PMU driver, %d counters available\n", + armpmu->name, armpmu->num_events); + return perf_pmu_register(&armpmu->pmu, armpmu->name, type); } -static irqreturn_t -xscale1pmu_handle_irq(int irq_num, void *dev) -{ - unsigned long pmnc; - struct perf_sample_data data; - struct cpu_hw_events *cpuc; - struct pt_regs *regs; - int idx; - - /* - * NOTE: there's an A stepping erratum that states if an overflow - * bit already exists and another occurs, the previous - * Overflow bit gets cleared. There's no workaround. - * Fixed in B stepping or later. - */ - pmnc = xscale1pmu_read_pmnc(); - - /* - * Write the value back to clear the overflow flags. Overflow - * flags remain in pmnc for use below. We also disable the PMU - * while we process the interrupt. - */ - xscale1pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE); - - if (!(pmnc & XSCALE1_OVERFLOWED_MASK)) - return IRQ_NONE; - - regs = get_irq_regs(); - - perf_sample_data_init(&data, 0); - - cpuc = &__get_cpu_var(cpu_hw_events); - for (idx = 0; idx <= armpmu->num_events; ++idx) { - struct perf_event *event = cpuc->events[idx]; - struct hw_perf_event *hwc; - - if (!test_bit(idx, cpuc->active_mask)) - continue; - - if (!xscale1_pmnc_counter_has_overflowed(pmnc, idx)) - continue; - - hwc = &event->hw; - armpmu_event_update(event, hwc, idx); - data.period = event->hw.last_period; - if (!armpmu_event_set_period(event, hwc, idx)) - continue; - - if (perf_event_overflow(event, 0, &data, regs)) - armpmu->disable(hwc, idx); - } - - irq_work_run(); - - /* - * Re-enable the PMU. - */ - pmnc = xscale1pmu_read_pmnc() | XSCALE_PMU_ENABLE; - xscale1pmu_write_pmnc(pmnc); - - return IRQ_HANDLED; -} - -static void -xscale1pmu_enable_event(struct hw_perf_event *hwc, int idx) -{ - unsigned long val, mask, evt, flags; - - switch (idx) { - case XSCALE_CYCLE_COUNTER: - mask = 0; - evt = XSCALE1_CCOUNT_INT_EN; - break; - case XSCALE_COUNTER0: - mask = XSCALE1_COUNT0_EVT_MASK; - evt = (hwc->config_base << XSCALE1_COUNT0_EVT_SHFT) | - XSCALE1_COUNT0_INT_EN; - break; - case XSCALE_COUNTER1: - mask = XSCALE1_COUNT1_EVT_MASK; - evt = (hwc->config_base << XSCALE1_COUNT1_EVT_SHFT) | - XSCALE1_COUNT1_INT_EN; - break; - default: - WARN_ONCE(1, "invalid counter number (%d)\n", idx); - return; - } - - spin_lock_irqsave(&pmu_lock, flags); - val = xscale1pmu_read_pmnc(); - val &= ~mask; - val |= evt; - xscale1pmu_write_pmnc(val); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static void -xscale1pmu_disable_event(struct hw_perf_event *hwc, int idx) -{ - unsigned long val, mask, evt, flags; - - switch (idx) { - case XSCALE_CYCLE_COUNTER: - mask = XSCALE1_CCOUNT_INT_EN; - evt = 0; - break; - case XSCALE_COUNTER0: - mask = XSCALE1_COUNT0_INT_EN | XSCALE1_COUNT0_EVT_MASK; - evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT0_EVT_SHFT; - break; - case XSCALE_COUNTER1: - mask = XSCALE1_COUNT1_INT_EN | XSCALE1_COUNT1_EVT_MASK; - evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT1_EVT_SHFT; - break; - default: - WARN_ONCE(1, "invalid counter number (%d)\n", idx); - return; - } - - spin_lock_irqsave(&pmu_lock, flags); - val = xscale1pmu_read_pmnc(); - val &= ~mask; - val |= evt; - xscale1pmu_write_pmnc(val); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static int -xscale1pmu_get_event_idx(struct cpu_hw_events *cpuc, - struct hw_perf_event *event) -{ - if (XSCALE_PERFCTR_CCNT == event->config_base) { - if (test_and_set_bit(XSCALE_CYCLE_COUNTER, cpuc->used_mask)) - return -EAGAIN; - - return XSCALE_CYCLE_COUNTER; - } else { - if (!test_and_set_bit(XSCALE_COUNTER1, cpuc->used_mask)) { - return XSCALE_COUNTER1; - } - - if (!test_and_set_bit(XSCALE_COUNTER0, cpuc->used_mask)) { - return XSCALE_COUNTER0; - } - - return -EAGAIN; - } -} - -static void -xscale1pmu_start(void) -{ - unsigned long flags, val; - - spin_lock_irqsave(&pmu_lock, flags); - val = xscale1pmu_read_pmnc(); - val |= XSCALE_PMU_ENABLE; - xscale1pmu_write_pmnc(val); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static void -xscale1pmu_stop(void) -{ - unsigned long flags, val; - - spin_lock_irqsave(&pmu_lock, flags); - val = xscale1pmu_read_pmnc(); - val &= ~XSCALE_PMU_ENABLE; - xscale1pmu_write_pmnc(val); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static inline u32 -xscale1pmu_read_counter(int counter) -{ - u32 val = 0; - - switch (counter) { - case XSCALE_CYCLE_COUNTER: - asm volatile("mrc p14, 0, %0, c1, c0, 0" : "=r" (val)); - break; - case XSCALE_COUNTER0: - asm volatile("mrc p14, 0, %0, c2, c0, 0" : "=r" (val)); - break; - case XSCALE_COUNTER1: - asm volatile("mrc p14, 0, %0, c3, c0, 0" : "=r" (val)); - break; - } - - return val; -} - -static inline void -xscale1pmu_write_counter(int counter, u32 val) -{ - switch (counter) { - case XSCALE_CYCLE_COUNTER: - asm volatile("mcr p14, 0, %0, c1, c0, 0" : : "r" (val)); - break; - case XSCALE_COUNTER0: - asm volatile("mcr p14, 0, %0, c2, c0, 0" : : "r" (val)); - break; - case XSCALE_COUNTER1: - asm volatile("mcr p14, 0, %0, c3, c0, 0" : : "r" (val)); - break; - } -} - -static const struct arm_pmu xscale1pmu = { - .id = ARM_PERF_PMU_ID_XSCALE1, - .handle_irq = xscale1pmu_handle_irq, - .enable = xscale1pmu_enable_event, - .disable = xscale1pmu_disable_event, - .event_map = xscalepmu_event_map, - .raw_event = xscalepmu_raw_event, - .read_counter = xscale1pmu_read_counter, - .write_counter = xscale1pmu_write_counter, - .get_event_idx = xscale1pmu_get_event_idx, - .start = xscale1pmu_start, - .stop = xscale1pmu_stop, - .num_events = 3, - .max_period = (1LLU << 32) - 1, -}; - -#define XSCALE2_OVERFLOWED_MASK 0x01f -#define XSCALE2_CCOUNT_OVERFLOW 0x001 -#define XSCALE2_COUNT0_OVERFLOW 0x002 -#define XSCALE2_COUNT1_OVERFLOW 0x004 -#define XSCALE2_COUNT2_OVERFLOW 0x008 -#define XSCALE2_COUNT3_OVERFLOW 0x010 -#define XSCALE2_CCOUNT_INT_EN 0x001 -#define XSCALE2_COUNT0_INT_EN 0x002 -#define XSCALE2_COUNT1_INT_EN 0x004 -#define XSCALE2_COUNT2_INT_EN 0x008 -#define XSCALE2_COUNT3_INT_EN 0x010 -#define XSCALE2_COUNT0_EVT_SHFT 0 -#define XSCALE2_COUNT0_EVT_MASK (0xff << XSCALE2_COUNT0_EVT_SHFT) -#define XSCALE2_COUNT1_EVT_SHFT 8 -#define XSCALE2_COUNT1_EVT_MASK (0xff << XSCALE2_COUNT1_EVT_SHFT) -#define XSCALE2_COUNT2_EVT_SHFT 16 -#define XSCALE2_COUNT2_EVT_MASK (0xff << XSCALE2_COUNT2_EVT_SHFT) -#define XSCALE2_COUNT3_EVT_SHFT 24 -#define XSCALE2_COUNT3_EVT_MASK (0xff << XSCALE2_COUNT3_EVT_SHFT) - -static inline u32 -xscale2pmu_read_pmnc(void) -{ - u32 val; - asm volatile("mrc p14, 0, %0, c0, c1, 0" : "=r" (val)); - /* bits 1-2 and 4-23 are read-unpredictable */ - return val & 0xff000009; -} - -static inline void -xscale2pmu_write_pmnc(u32 val) -{ - /* bits 4-23 are write-as-0, 24-31 are write ignored */ - val &= 0xf; - asm volatile("mcr p14, 0, %0, c0, c1, 0" : : "r" (val)); -} - -static inline u32 -xscale2pmu_read_overflow_flags(void) -{ - u32 val; - asm volatile("mrc p14, 0, %0, c5, c1, 0" : "=r" (val)); - return val; -} - -static inline void -xscale2pmu_write_overflow_flags(u32 val) -{ - asm volatile("mcr p14, 0, %0, c5, c1, 0" : : "r" (val)); -} - -static inline u32 -xscale2pmu_read_event_select(void) -{ - u32 val; - asm volatile("mrc p14, 0, %0, c8, c1, 0" : "=r" (val)); - return val; -} - -static inline void -xscale2pmu_write_event_select(u32 val) -{ - asm volatile("mcr p14, 0, %0, c8, c1, 0" : : "r"(val)); -} - -static inline u32 -xscale2pmu_read_int_enable(void) -{ - u32 val; - asm volatile("mrc p14, 0, %0, c4, c1, 0" : "=r" (val)); - return val; -} - -static void -xscale2pmu_write_int_enable(u32 val) -{ - asm volatile("mcr p14, 0, %0, c4, c1, 0" : : "r" (val)); -} - -static inline int -xscale2_pmnc_counter_has_overflowed(unsigned long of_flags, - enum xscale_counters counter) -{ - int ret = 0; - - switch (counter) { - case XSCALE_CYCLE_COUNTER: - ret = of_flags & XSCALE2_CCOUNT_OVERFLOW; - break; - case XSCALE_COUNTER0: - ret = of_flags & XSCALE2_COUNT0_OVERFLOW; - break; - case XSCALE_COUNTER1: - ret = of_flags & XSCALE2_COUNT1_OVERFLOW; - break; - case XSCALE_COUNTER2: - ret = of_flags & XSCALE2_COUNT2_OVERFLOW; - break; - case XSCALE_COUNTER3: - ret = of_flags & XSCALE2_COUNT3_OVERFLOW; - break; - default: - WARN_ONCE(1, "invalid counter number (%d)\n", counter); - } - - return ret; -} - -static irqreturn_t -xscale2pmu_handle_irq(int irq_num, void *dev) -{ - unsigned long pmnc, of_flags; - struct perf_sample_data data; - struct cpu_hw_events *cpuc; - struct pt_regs *regs; - int idx; - - /* Disable the PMU. */ - pmnc = xscale2pmu_read_pmnc(); - xscale2pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE); - - /* Check the overflow flag register. */ - of_flags = xscale2pmu_read_overflow_flags(); - if (!(of_flags & XSCALE2_OVERFLOWED_MASK)) - return IRQ_NONE; - - /* Clear the overflow bits. */ - xscale2pmu_write_overflow_flags(of_flags); - - regs = get_irq_regs(); - - perf_sample_data_init(&data, 0); - - cpuc = &__get_cpu_var(cpu_hw_events); - for (idx = 0; idx <= armpmu->num_events; ++idx) { - struct perf_event *event = cpuc->events[idx]; - struct hw_perf_event *hwc; - - if (!test_bit(idx, cpuc->active_mask)) - continue; - - if (!xscale2_pmnc_counter_has_overflowed(pmnc, idx)) - continue; - - hwc = &event->hw; - armpmu_event_update(event, hwc, idx); - data.period = event->hw.last_period; - if (!armpmu_event_set_period(event, hwc, idx)) - continue; - - if (perf_event_overflow(event, 0, &data, regs)) - armpmu->disable(hwc, idx); - } - - irq_work_run(); - - /* - * Re-enable the PMU. - */ - pmnc = xscale2pmu_read_pmnc() | XSCALE_PMU_ENABLE; - xscale2pmu_write_pmnc(pmnc); - - return IRQ_HANDLED; -} - -static void -xscale2pmu_enable_event(struct hw_perf_event *hwc, int idx) -{ - unsigned long flags, ien, evtsel; - - ien = xscale2pmu_read_int_enable(); - evtsel = xscale2pmu_read_event_select(); - - switch (idx) { - case XSCALE_CYCLE_COUNTER: - ien |= XSCALE2_CCOUNT_INT_EN; - break; - case XSCALE_COUNTER0: - ien |= XSCALE2_COUNT0_INT_EN; - evtsel &= ~XSCALE2_COUNT0_EVT_MASK; - evtsel |= hwc->config_base << XSCALE2_COUNT0_EVT_SHFT; - break; - case XSCALE_COUNTER1: - ien |= XSCALE2_COUNT1_INT_EN; - evtsel &= ~XSCALE2_COUNT1_EVT_MASK; - evtsel |= hwc->config_base << XSCALE2_COUNT1_EVT_SHFT; - break; - case XSCALE_COUNTER2: - ien |= XSCALE2_COUNT2_INT_EN; - evtsel &= ~XSCALE2_COUNT2_EVT_MASK; - evtsel |= hwc->config_base << XSCALE2_COUNT2_EVT_SHFT; - break; - case XSCALE_COUNTER3: - ien |= XSCALE2_COUNT3_INT_EN; - evtsel &= ~XSCALE2_COUNT3_EVT_MASK; - evtsel |= hwc->config_base << XSCALE2_COUNT3_EVT_SHFT; - break; - default: - WARN_ONCE(1, "invalid counter number (%d)\n", idx); - return; - } - - spin_lock_irqsave(&pmu_lock, flags); - xscale2pmu_write_event_select(evtsel); - xscale2pmu_write_int_enable(ien); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static void -xscale2pmu_disable_event(struct hw_perf_event *hwc, int idx) -{ - unsigned long flags, ien, evtsel; - - ien = xscale2pmu_read_int_enable(); - evtsel = xscale2pmu_read_event_select(); - - switch (idx) { - case XSCALE_CYCLE_COUNTER: - ien &= ~XSCALE2_CCOUNT_INT_EN; - break; - case XSCALE_COUNTER0: - ien &= ~XSCALE2_COUNT0_INT_EN; - evtsel &= ~XSCALE2_COUNT0_EVT_MASK; - evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT0_EVT_SHFT; - break; - case XSCALE_COUNTER1: - ien &= ~XSCALE2_COUNT1_INT_EN; - evtsel &= ~XSCALE2_COUNT1_EVT_MASK; - evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT1_EVT_SHFT; - break; - case XSCALE_COUNTER2: - ien &= ~XSCALE2_COUNT2_INT_EN; - evtsel &= ~XSCALE2_COUNT2_EVT_MASK; - evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT2_EVT_SHFT; - break; - case XSCALE_COUNTER3: - ien &= ~XSCALE2_COUNT3_INT_EN; - evtsel &= ~XSCALE2_COUNT3_EVT_MASK; - evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT3_EVT_SHFT; - break; - default: - WARN_ONCE(1, "invalid counter number (%d)\n", idx); - return; - } - - spin_lock_irqsave(&pmu_lock, flags); - xscale2pmu_write_event_select(evtsel); - xscale2pmu_write_int_enable(ien); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static int -xscale2pmu_get_event_idx(struct cpu_hw_events *cpuc, - struct hw_perf_event *event) -{ - int idx = xscale1pmu_get_event_idx(cpuc, event); - if (idx >= 0) - goto out; - - if (!test_and_set_bit(XSCALE_COUNTER3, cpuc->used_mask)) - idx = XSCALE_COUNTER3; - else if (!test_and_set_bit(XSCALE_COUNTER2, cpuc->used_mask)) - idx = XSCALE_COUNTER2; -out: - return idx; -} - -static void -xscale2pmu_start(void) -{ - unsigned long flags, val; - - spin_lock_irqsave(&pmu_lock, flags); - val = xscale2pmu_read_pmnc() & ~XSCALE_PMU_CNT64; - val |= XSCALE_PMU_ENABLE; - xscale2pmu_write_pmnc(val); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static void -xscale2pmu_stop(void) -{ - unsigned long flags, val; - - spin_lock_irqsave(&pmu_lock, flags); - val = xscale2pmu_read_pmnc(); - val &= ~XSCALE_PMU_ENABLE; - xscale2pmu_write_pmnc(val); - spin_unlock_irqrestore(&pmu_lock, flags); -} - -static inline u32 -xscale2pmu_read_counter(int counter) -{ - u32 val = 0; - - switch (counter) { - case XSCALE_CYCLE_COUNTER: - asm volatile("mrc p14, 0, %0, c1, c1, 0" : "=r" (val)); - break; - case XSCALE_COUNTER0: - asm volatile("mrc p14, 0, %0, c0, c2, 0" : "=r" (val)); - break; - case XSCALE_COUNTER1: - asm volatile("mrc p14, 0, %0, c1, c2, 0" : "=r" (val)); - break; - case XSCALE_COUNTER2: - asm volatile("mrc p14, 0, %0, c2, c2, 0" : "=r" (val)); - break; - case XSCALE_COUNTER3: - asm volatile("mrc p14, 0, %0, c3, c2, 0" : "=r" (val)); - break; - } - - return val; -} - -static inline void -xscale2pmu_write_counter(int counter, u32 val) -{ - switch (counter) { - case XSCALE_CYCLE_COUNTER: - asm volatile("mcr p14, 0, %0, c1, c1, 0" : : "r" (val)); - break; - case XSCALE_COUNTER0: - asm volatile("mcr p14, 0, %0, c0, c2, 0" : : "r" (val)); - break; - case XSCALE_COUNTER1: - asm volatile("mcr p14, 0, %0, c1, c2, 0" : : "r" (val)); - break; - case XSCALE_COUNTER2: - asm volatile("mcr p14, 0, %0, c2, c2, 0" : : "r" (val)); - break; - case XSCALE_COUNTER3: - asm volatile("mcr p14, 0, %0, c3, c2, 0" : : "r" (val)); - break; - } -} - -static const struct arm_pmu xscale2pmu = { - .id = ARM_PERF_PMU_ID_XSCALE2, - .handle_irq = xscale2pmu_handle_irq, - .enable = xscale2pmu_enable_event, - .disable = xscale2pmu_disable_event, - .event_map = xscalepmu_event_map, - .raw_event = xscalepmu_raw_event, - .read_counter = xscale2pmu_read_counter, - .write_counter = xscale2pmu_write_counter, - .get_event_idx = xscale2pmu_get_event_idx, - .start = xscale2pmu_start, - .stop = xscale2pmu_stop, - .num_events = 5, - .max_period = (1LLU << 32) - 1, -}; - -static int __init -init_hw_perf_events(void) -{ - unsigned long cpuid = read_cpuid_id(); - unsigned long implementor = (cpuid & 0xFF000000) >> 24; - unsigned long part_number = (cpuid & 0xFFF0); - - /* ARM Ltd CPUs. */ - if (0x41 == implementor) { - switch (part_number) { - case 0xB360: /* ARM1136 */ - case 0xB560: /* ARM1156 */ - case 0xB760: /* ARM1176 */ - armpmu = &armv6pmu; - memcpy(armpmu_perf_cache_map, armv6_perf_cache_map, - sizeof(armv6_perf_cache_map)); - break; - case 0xB020: /* ARM11mpcore */ - armpmu = &armv6mpcore_pmu; - memcpy(armpmu_perf_cache_map, - armv6mpcore_perf_cache_map, - sizeof(armv6mpcore_perf_cache_map)); - break; - case 0xC080: /* Cortex-A8 */ - armv7pmu.id = ARM_PERF_PMU_ID_CA8; - memcpy(armpmu_perf_cache_map, armv7_a8_perf_cache_map, - sizeof(armv7_a8_perf_cache_map)); - armv7pmu.event_map = armv7_a8_pmu_event_map; - armpmu = &armv7pmu; - - /* Reset PMNC and read the nb of CNTx counters - supported */ - armv7pmu.num_events = armv7_reset_read_pmnc(); - break; - case 0xC090: /* Cortex-A9 */ - armv7pmu.id = ARM_PERF_PMU_ID_CA9; - memcpy(armpmu_perf_cache_map, armv7_a9_perf_cache_map, - sizeof(armv7_a9_perf_cache_map)); - armv7pmu.event_map = armv7_a9_pmu_event_map; - armpmu = &armv7pmu; - - /* Reset PMNC and read the nb of CNTx counters - supported */ - armv7pmu.num_events = armv7_reset_read_pmnc(); - break; - } - /* Intel CPUs [xscale]. */ - } else if (0x69 == implementor) { - part_number = (cpuid >> 13) & 0x7; - switch (part_number) { - case 1: - armpmu = &xscale1pmu; - memcpy(armpmu_perf_cache_map, xscale_perf_cache_map, - sizeof(xscale_perf_cache_map)); - break; - case 2: - armpmu = &xscale2pmu; - memcpy(armpmu_perf_cache_map, xscale_perf_cache_map, - sizeof(xscale_perf_cache_map)); - break; - } - } - - if (armpmu) { - pr_info("enabled with %s PMU driver, %d counters available\n", - arm_pmu_names[armpmu->id], armpmu->num_events); - } else { - pr_info("no hardware support available\n"); - } - - perf_pmu_register(&pmu); - - return 0; -} -arch_initcall(init_hw_perf_events); - /* * Callchain handling code. */ @@ -3053,17 +546,17 @@ arch_initcall(init_hw_perf_events); * This code has been adapted from the ARM OProfile support. */ struct frame_tail { - struct frame_tail *fp; - unsigned long sp; - unsigned long lr; + struct frame_tail __user *fp; + unsigned long sp; + unsigned long lr; } __attribute__((packed)); /* * Get the return address for a single stackframe and return a pointer to the * next frame tail. */ -static struct frame_tail * -user_backtrace(struct frame_tail *tail, +static struct frame_tail __user * +user_backtrace(struct frame_tail __user *tail, struct perf_callchain_entry *entry) { struct frame_tail buftail; @@ -3080,7 +573,7 @@ user_backtrace(struct frame_tail *tail, * Frame pointers should strictly progress back up the stack * (towards higher addresses). */ - if (tail >= buftail.fp) + if (tail + 1 >= buftail.fp) return NULL; return buftail.fp - 1; @@ -3089,12 +582,18 @@ user_backtrace(struct frame_tail *tail, void perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) { - struct frame_tail *tail; + struct frame_tail __user *tail; + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + /* We don't support guest os callchain now */ + return; + } - tail = (struct frame_tail *)regs->ARM_fp - 1; + perf_callchain_store(entry, regs->ARM_pc); + tail = (struct frame_tail __user *)regs->ARM_fp - 1; - while (tail && !((unsigned long)tail & 0x3)) + while ((entry->nr < PERF_MAX_STACK_DEPTH) && + tail && !((unsigned long)tail & 0x3)) tail = user_backtrace(tail, entry); } @@ -3117,9 +616,41 @@ perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs) { struct stackframe fr; + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + /* We don't support guest os callchain now */ + return; + } + fr.fp = regs->ARM_fp; fr.sp = regs->ARM_sp; fr.lr = regs->ARM_lr; fr.pc = regs->ARM_pc; walk_stackframe(&fr, callchain_trace, entry); } + +unsigned long perf_instruction_pointer(struct pt_regs *regs) +{ + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) + return perf_guest_cbs->get_guest_ip(); + + return instruction_pointer(regs); +} + +unsigned long perf_misc_flags(struct pt_regs *regs) +{ + int misc = 0; + + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + if (perf_guest_cbs->is_user_mode()) + misc |= PERF_RECORD_MISC_GUEST_USER; + else + misc |= PERF_RECORD_MISC_GUEST_KERNEL; + } else { + if (user_mode(regs)) + misc |= PERF_RECORD_MISC_USER; + else + misc |= PERF_RECORD_MISC_KERNEL; + } + + return misc; +} diff --git a/arch/arm/kernel/perf_event_cpu.c b/arch/arm/kernel/perf_event_cpu.c new file mode 100644 index 00000000000..af9e35e8836 --- /dev/null +++ b/arch/arm/kernel/perf_event_cpu.c @@ -0,0 +1,363 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * Copyright (C) 2012 ARM Limited + * + * Author: Will Deacon <will.deacon@arm.com> + */ +#define pr_fmt(fmt) "CPU PMU: " fmt + +#include <linux/bitmap.h> +#include <linux/export.h> +#include <linux/kernel.h> +#include <linux/of.h> +#include <linux/platform_device.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/irq.h> +#include <linux/irqdesc.h> + +#include <asm/cputype.h> +#include <asm/irq_regs.h> +#include <asm/pmu.h> + +/* Set at runtime when we know what CPU type we are. */ +static struct arm_pmu *cpu_pmu; + +static DEFINE_PER_CPU(struct arm_pmu *, percpu_pmu); +static DEFINE_PER_CPU(struct perf_event * [ARMPMU_MAX_HWEVENTS], hw_events); +static DEFINE_PER_CPU(unsigned long [BITS_TO_LONGS(ARMPMU_MAX_HWEVENTS)], used_mask); +static DEFINE_PER_CPU(struct pmu_hw_events, cpu_hw_events); + +/* + * Despite the names, these two functions are CPU-specific and are used + * by the OProfile/perf code. + */ +const char *perf_pmu_name(void) +{ + if (!cpu_pmu) + return NULL; + + return cpu_pmu->name; +} +EXPORT_SYMBOL_GPL(perf_pmu_name); + +int perf_num_counters(void) +{ + int max_events = 0; + + if (cpu_pmu != NULL) + max_events = cpu_pmu->num_events; + + return max_events; +} +EXPORT_SYMBOL_GPL(perf_num_counters); + +/* Include the PMU-specific implementations. */ +#include "perf_event_xscale.c" +#include "perf_event_v6.c" +#include "perf_event_v7.c" + +static struct pmu_hw_events *cpu_pmu_get_cpu_events(void) +{ + return this_cpu_ptr(&cpu_hw_events); +} + +static void cpu_pmu_enable_percpu_irq(void *data) +{ + struct arm_pmu *cpu_pmu = data; + struct platform_device *pmu_device = cpu_pmu->plat_device; + int irq = platform_get_irq(pmu_device, 0); + + enable_percpu_irq(irq, IRQ_TYPE_NONE); + cpumask_set_cpu(smp_processor_id(), &cpu_pmu->active_irqs); +} + +static void cpu_pmu_disable_percpu_irq(void *data) +{ + struct arm_pmu *cpu_pmu = data; + struct platform_device *pmu_device = cpu_pmu->plat_device; + int irq = platform_get_irq(pmu_device, 0); + + cpumask_clear_cpu(smp_processor_id(), &cpu_pmu->active_irqs); + disable_percpu_irq(irq); +} + +static void cpu_pmu_free_irq(struct arm_pmu *cpu_pmu) +{ + int i, irq, irqs; + struct platform_device *pmu_device = cpu_pmu->plat_device; + + irqs = min(pmu_device->num_resources, num_possible_cpus()); + + irq = platform_get_irq(pmu_device, 0); + if (irq >= 0 && irq_is_percpu(irq)) { + on_each_cpu(cpu_pmu_disable_percpu_irq, cpu_pmu, 1); + free_percpu_irq(irq, &percpu_pmu); + } else { + for (i = 0; i < irqs; ++i) { + if (!cpumask_test_and_clear_cpu(i, &cpu_pmu->active_irqs)) + continue; + irq = platform_get_irq(pmu_device, i); + if (irq >= 0) + free_irq(irq, cpu_pmu); + } + } +} + +static int cpu_pmu_request_irq(struct arm_pmu *cpu_pmu, irq_handler_t handler) +{ + int i, err, irq, irqs; + struct platform_device *pmu_device = cpu_pmu->plat_device; + + if (!pmu_device) + return -ENODEV; + + irqs = min(pmu_device->num_resources, num_possible_cpus()); + if (irqs < 1) { + printk_once("perf/ARM: No irqs for PMU defined, sampling events not supported\n"); + return 0; + } + + irq = platform_get_irq(pmu_device, 0); + if (irq >= 0 && irq_is_percpu(irq)) { + err = request_percpu_irq(irq, handler, "arm-pmu", &percpu_pmu); + if (err) { + pr_err("unable to request IRQ%d for ARM PMU counters\n", + irq); + return err; + } + on_each_cpu(cpu_pmu_enable_percpu_irq, cpu_pmu, 1); + } else { + for (i = 0; i < irqs; ++i) { + err = 0; + irq = platform_get_irq(pmu_device, i); + if (irq < 0) + continue; + + /* + * If we have a single PMU interrupt that we can't shift, + * assume that we're running on a uniprocessor machine and + * continue. Otherwise, continue without this interrupt. + */ + if (irq_set_affinity(irq, cpumask_of(i)) && irqs > 1) { + pr_warning("unable to set irq affinity (irq=%d, cpu=%u)\n", + irq, i); + continue; + } + + err = request_irq(irq, handler, + IRQF_NOBALANCING | IRQF_NO_THREAD, "arm-pmu", + cpu_pmu); + if (err) { + pr_err("unable to request IRQ%d for ARM PMU counters\n", + irq); + return err; + } + + cpumask_set_cpu(i, &cpu_pmu->active_irqs); + } + } + + return 0; +} + +static void cpu_pmu_init(struct arm_pmu *cpu_pmu) +{ + int cpu; + for_each_possible_cpu(cpu) { + struct pmu_hw_events *events = &per_cpu(cpu_hw_events, cpu); + events->events = per_cpu(hw_events, cpu); + events->used_mask = per_cpu(used_mask, cpu); + raw_spin_lock_init(&events->pmu_lock); + per_cpu(percpu_pmu, cpu) = cpu_pmu; + } + + cpu_pmu->get_hw_events = cpu_pmu_get_cpu_events; + cpu_pmu->request_irq = cpu_pmu_request_irq; + cpu_pmu->free_irq = cpu_pmu_free_irq; + + /* Ensure the PMU has sane values out of reset. */ + if (cpu_pmu->reset) + on_each_cpu(cpu_pmu->reset, cpu_pmu, 1); + + /* If no interrupts available, set the corresponding capability flag */ + if (!platform_get_irq(cpu_pmu->plat_device, 0)) + cpu_pmu->pmu.capabilities |= PERF_PMU_CAP_NO_INTERRUPT; +} + +/* + * PMU hardware loses all context when a CPU goes offline. + * When a CPU is hotplugged back in, since some hardware registers are + * UNKNOWN at reset, the PMU must be explicitly reset to avoid reading + * junk values out of them. + */ +static int cpu_pmu_notify(struct notifier_block *b, unsigned long action, + void *hcpu) +{ + if ((action & ~CPU_TASKS_FROZEN) != CPU_STARTING) + return NOTIFY_DONE; + + if (cpu_pmu && cpu_pmu->reset) + cpu_pmu->reset(cpu_pmu); + else + return NOTIFY_DONE; + + return NOTIFY_OK; +} + +static struct notifier_block cpu_pmu_hotplug_notifier = { + .notifier_call = cpu_pmu_notify, +}; + +/* + * PMU platform driver and devicetree bindings. + */ +static struct of_device_id cpu_pmu_of_device_ids[] = { + {.compatible = "arm,cortex-a17-pmu", .data = armv7_a17_pmu_init}, + {.compatible = "arm,cortex-a15-pmu", .data = armv7_a15_pmu_init}, + {.compatible = "arm,cortex-a12-pmu", .data = armv7_a12_pmu_init}, + {.compatible = "arm,cortex-a9-pmu", .data = armv7_a9_pmu_init}, + {.compatible = "arm,cortex-a8-pmu", .data = armv7_a8_pmu_init}, + {.compatible = "arm,cortex-a7-pmu", .data = armv7_a7_pmu_init}, + {.compatible = "arm,cortex-a5-pmu", .data = armv7_a5_pmu_init}, + {.compatible = "arm,arm11mpcore-pmu", .data = armv6mpcore_pmu_init}, + {.compatible = "arm,arm1176-pmu", .data = armv6pmu_init}, + {.compatible = "arm,arm1136-pmu", .data = armv6pmu_init}, + {.compatible = "qcom,krait-pmu", .data = krait_pmu_init}, + {}, +}; + +static struct platform_device_id cpu_pmu_plat_device_ids[] = { + {.name = "arm-pmu"}, + {}, +}; + +/* + * CPU PMU identification and probing. + */ +static int probe_current_pmu(struct arm_pmu *pmu) +{ + int cpu = get_cpu(); + unsigned long implementor = read_cpuid_implementor(); + unsigned long part_number = read_cpuid_part_number(); + int ret = -ENODEV; + + pr_info("probing PMU on CPU %d\n", cpu); + + /* ARM Ltd CPUs. */ + if (implementor == ARM_CPU_IMP_ARM) { + switch (part_number) { + case ARM_CPU_PART_ARM1136: + case ARM_CPU_PART_ARM1156: + case ARM_CPU_PART_ARM1176: + ret = armv6pmu_init(pmu); + break; + case ARM_CPU_PART_ARM11MPCORE: + ret = armv6mpcore_pmu_init(pmu); + break; + case ARM_CPU_PART_CORTEX_A8: + ret = armv7_a8_pmu_init(pmu); + break; + case ARM_CPU_PART_CORTEX_A9: + ret = armv7_a9_pmu_init(pmu); + break; + } + /* Intel CPUs [xscale]. */ + } else if (implementor == ARM_CPU_IMP_INTEL) { + switch (xscale_cpu_arch_version()) { + case ARM_CPU_XSCALE_ARCH_V1: + ret = xscale1pmu_init(pmu); + break; + case ARM_CPU_XSCALE_ARCH_V2: + ret = xscale2pmu_init(pmu); + break; + } + } + + put_cpu(); + return ret; +} + +static int cpu_pmu_device_probe(struct platform_device *pdev) +{ + const struct of_device_id *of_id; + const int (*init_fn)(struct arm_pmu *); + struct device_node *node = pdev->dev.of_node; + struct arm_pmu *pmu; + int ret = -ENODEV; + + if (cpu_pmu) { + pr_info("attempt to register multiple PMU devices!"); + return -ENOSPC; + } + + pmu = kzalloc(sizeof(struct arm_pmu), GFP_KERNEL); + if (!pmu) { + pr_info("failed to allocate PMU device!"); + return -ENOMEM; + } + + cpu_pmu = pmu; + cpu_pmu->plat_device = pdev; + + if (node && (of_id = of_match_node(cpu_pmu_of_device_ids, pdev->dev.of_node))) { + init_fn = of_id->data; + ret = init_fn(pmu); + } else { + ret = probe_current_pmu(pmu); + } + + if (ret) { + pr_info("failed to probe PMU!"); + goto out_free; + } + + cpu_pmu_init(cpu_pmu); + ret = armpmu_register(cpu_pmu, PERF_TYPE_RAW); + + if (!ret) + return 0; + +out_free: + pr_info("failed to register PMU devices!"); + kfree(pmu); + return ret; +} + +static struct platform_driver cpu_pmu_driver = { + .driver = { + .name = "arm-pmu", + .pm = &armpmu_dev_pm_ops, + .of_match_table = cpu_pmu_of_device_ids, + }, + .probe = cpu_pmu_device_probe, + .id_table = cpu_pmu_plat_device_ids, +}; + +static int __init register_pmu_driver(void) +{ + int err; + + err = register_cpu_notifier(&cpu_pmu_hotplug_notifier); + if (err) + return err; + + err = platform_driver_register(&cpu_pmu_driver); + if (err) + unregister_cpu_notifier(&cpu_pmu_hotplug_notifier); + + return err; +} +device_initcall(register_pmu_driver); diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c new file mode 100644 index 00000000000..03664b0e8fa --- /dev/null +++ b/arch/arm/kernel/perf_event_v6.c @@ -0,0 +1,715 @@ +/* + * ARMv6 Performance counter handling code. + * + * Copyright (C) 2009 picoChip Designs, Ltd., Jamie Iles + * + * ARMv6 has 2 configurable performance counters and a single cycle counter. + * They all share a single reset bit but can be written to zero so we can use + * that for a reset. + * + * The counters can't be individually enabled or disabled so when we remove + * one event and replace it with another we could get spurious counts from the + * wrong event. However, we can take advantage of the fact that the + * performance counters can export events to the event bus, and the event bus + * itself can be monitored. This requires that we *don't* export the events to + * the event bus. The procedure for disabling a configurable counter is: + * - change the counter to count the ETMEXTOUT[0] signal (0x20). This + * effectively stops the counter from counting. + * - disable the counter's interrupt generation (each counter has it's + * own interrupt enable bit). + * Once stopped, the counter value can be written as 0 to reset. + * + * To enable a counter: + * - enable the counter's interrupt generation. + * - set the new event type. + * + * Note: the dedicated cycle counter only counts cycles and can't be + * enabled/disabled independently of the others. When we want to disable the + * cycle counter, we have to just disable the interrupt reporting and start + * ignoring that counter. When re-enabling, we have to reset the value and + * enable the interrupt. + */ + +#if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K) +enum armv6_perf_types { + ARMV6_PERFCTR_ICACHE_MISS = 0x0, + ARMV6_PERFCTR_IBUF_STALL = 0x1, + ARMV6_PERFCTR_DDEP_STALL = 0x2, + ARMV6_PERFCTR_ITLB_MISS = 0x3, + ARMV6_PERFCTR_DTLB_MISS = 0x4, + ARMV6_PERFCTR_BR_EXEC = 0x5, + ARMV6_PERFCTR_BR_MISPREDICT = 0x6, + ARMV6_PERFCTR_INSTR_EXEC = 0x7, + ARMV6_PERFCTR_DCACHE_HIT = 0x9, + ARMV6_PERFCTR_DCACHE_ACCESS = 0xA, + ARMV6_PERFCTR_DCACHE_MISS = 0xB, + ARMV6_PERFCTR_DCACHE_WBACK = 0xC, + ARMV6_PERFCTR_SW_PC_CHANGE = 0xD, + ARMV6_PERFCTR_MAIN_TLB_MISS = 0xF, + ARMV6_PERFCTR_EXPL_D_ACCESS = 0x10, + ARMV6_PERFCTR_LSU_FULL_STALL = 0x11, + ARMV6_PERFCTR_WBUF_DRAINED = 0x12, + ARMV6_PERFCTR_CPU_CYCLES = 0xFF, + ARMV6_PERFCTR_NOP = 0x20, +}; + +enum armv6_counters { + ARMV6_CYCLE_COUNTER = 0, + ARMV6_COUNTER0, + ARMV6_COUNTER1, +}; + +/* + * The hardware events that we support. We do support cache operations but + * we have harvard caches and no way to combine instruction and data + * accesses/misses in hardware. + */ +static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = { + [PERF_COUNT_HW_CPU_CYCLES] = ARMV6_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6_PERFCTR_INSTR_EXEC, + [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6_PERFCTR_BR_EXEC, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6_PERFCTR_BR_MISPREDICT, + [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV6_PERFCTR_IBUF_STALL, + [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = ARMV6_PERFCTR_LSU_FULL_STALL, +}; + +static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + [C(L1D)] = { + /* + * The performance counters don't differentiate between read + * and write accesses/misses so this isn't strictly correct, + * but it's the best we can do. Writes and reads get + * combined. + */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS, + [C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS, + [C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(DTLB)] = { + /* + * The ARM performance counters can count micro DTLB misses, + * micro ITLB misses and main TLB misses. There isn't an event + * for TLB misses, so use the micro misses here and if users + * want the main TLB misses they can use a raw counter. + */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(NODE)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, +}; + +enum armv6mpcore_perf_types { + ARMV6MPCORE_PERFCTR_ICACHE_MISS = 0x0, + ARMV6MPCORE_PERFCTR_IBUF_STALL = 0x1, + ARMV6MPCORE_PERFCTR_DDEP_STALL = 0x2, + ARMV6MPCORE_PERFCTR_ITLB_MISS = 0x3, + ARMV6MPCORE_PERFCTR_DTLB_MISS = 0x4, + ARMV6MPCORE_PERFCTR_BR_EXEC = 0x5, + ARMV6MPCORE_PERFCTR_BR_NOTPREDICT = 0x6, + ARMV6MPCORE_PERFCTR_BR_MISPREDICT = 0x7, + ARMV6MPCORE_PERFCTR_INSTR_EXEC = 0x8, + ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS = 0xA, + ARMV6MPCORE_PERFCTR_DCACHE_RDMISS = 0xB, + ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS = 0xC, + ARMV6MPCORE_PERFCTR_DCACHE_WRMISS = 0xD, + ARMV6MPCORE_PERFCTR_DCACHE_EVICTION = 0xE, + ARMV6MPCORE_PERFCTR_SW_PC_CHANGE = 0xF, + ARMV6MPCORE_PERFCTR_MAIN_TLB_MISS = 0x10, + ARMV6MPCORE_PERFCTR_EXPL_MEM_ACCESS = 0x11, + ARMV6MPCORE_PERFCTR_LSU_FULL_STALL = 0x12, + ARMV6MPCORE_PERFCTR_WBUF_DRAINED = 0x13, + ARMV6MPCORE_PERFCTR_CPU_CYCLES = 0xFF, +}; + +/* + * The hardware events that we support. We do support cache operations but + * we have harvard caches and no way to combine instruction and data + * accesses/misses in hardware. + */ +static const unsigned armv6mpcore_perf_map[PERF_COUNT_HW_MAX] = { + [PERF_COUNT_HW_CPU_CYCLES] = ARMV6MPCORE_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_INSTR_EXEC, + [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_BR_EXEC, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV6MPCORE_PERFCTR_BR_MISPREDICT, + [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV6MPCORE_PERFCTR_IBUF_STALL, + [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = ARMV6MPCORE_PERFCTR_LSU_FULL_STALL, +}; + +static const unsigned armv6mpcore_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + [C(L1D)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = + ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS, + [C(RESULT_MISS)] = + ARMV6MPCORE_PERFCTR_DCACHE_RDMISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = + ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS, + [C(RESULT_MISS)] = + ARMV6MPCORE_PERFCTR_DCACHE_WRMISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(DTLB)] = { + /* + * The ARM performance counters can count micro DTLB misses, + * micro ITLB misses and main TLB misses. There isn't an event + * for TLB misses, so use the micro misses here and if users + * want the main TLB misses they can use a raw counter. + */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(NODE)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, +}; + +static inline unsigned long +armv6_pmcr_read(void) +{ + u32 val; + asm volatile("mrc p15, 0, %0, c15, c12, 0" : "=r"(val)); + return val; +} + +static inline void +armv6_pmcr_write(unsigned long val) +{ + asm volatile("mcr p15, 0, %0, c15, c12, 0" : : "r"(val)); +} + +#define ARMV6_PMCR_ENABLE (1 << 0) +#define ARMV6_PMCR_CTR01_RESET (1 << 1) +#define ARMV6_PMCR_CCOUNT_RESET (1 << 2) +#define ARMV6_PMCR_CCOUNT_DIV (1 << 3) +#define ARMV6_PMCR_COUNT0_IEN (1 << 4) +#define ARMV6_PMCR_COUNT1_IEN (1 << 5) +#define ARMV6_PMCR_CCOUNT_IEN (1 << 6) +#define ARMV6_PMCR_COUNT0_OVERFLOW (1 << 8) +#define ARMV6_PMCR_COUNT1_OVERFLOW (1 << 9) +#define ARMV6_PMCR_CCOUNT_OVERFLOW (1 << 10) +#define ARMV6_PMCR_EVT_COUNT0_SHIFT 20 +#define ARMV6_PMCR_EVT_COUNT0_MASK (0xFF << ARMV6_PMCR_EVT_COUNT0_SHIFT) +#define ARMV6_PMCR_EVT_COUNT1_SHIFT 12 +#define ARMV6_PMCR_EVT_COUNT1_MASK (0xFF << ARMV6_PMCR_EVT_COUNT1_SHIFT) + +#define ARMV6_PMCR_OVERFLOWED_MASK \ + (ARMV6_PMCR_COUNT0_OVERFLOW | ARMV6_PMCR_COUNT1_OVERFLOW | \ + ARMV6_PMCR_CCOUNT_OVERFLOW) + +static inline int +armv6_pmcr_has_overflowed(unsigned long pmcr) +{ + return pmcr & ARMV6_PMCR_OVERFLOWED_MASK; +} + +static inline int +armv6_pmcr_counter_has_overflowed(unsigned long pmcr, + enum armv6_counters counter) +{ + int ret = 0; + + if (ARMV6_CYCLE_COUNTER == counter) + ret = pmcr & ARMV6_PMCR_CCOUNT_OVERFLOW; + else if (ARMV6_COUNTER0 == counter) + ret = pmcr & ARMV6_PMCR_COUNT0_OVERFLOW; + else if (ARMV6_COUNTER1 == counter) + ret = pmcr & ARMV6_PMCR_COUNT1_OVERFLOW; + else + WARN_ONCE(1, "invalid counter number (%d)\n", counter); + + return ret; +} + +static inline u32 armv6pmu_read_counter(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + int counter = hwc->idx; + unsigned long value = 0; + + if (ARMV6_CYCLE_COUNTER == counter) + asm volatile("mrc p15, 0, %0, c15, c12, 1" : "=r"(value)); + else if (ARMV6_COUNTER0 == counter) + asm volatile("mrc p15, 0, %0, c15, c12, 2" : "=r"(value)); + else if (ARMV6_COUNTER1 == counter) + asm volatile("mrc p15, 0, %0, c15, c12, 3" : "=r"(value)); + else + WARN_ONCE(1, "invalid counter number (%d)\n", counter); + + return value; +} + +static inline void armv6pmu_write_counter(struct perf_event *event, u32 value) +{ + struct hw_perf_event *hwc = &event->hw; + int counter = hwc->idx; + + if (ARMV6_CYCLE_COUNTER == counter) + asm volatile("mcr p15, 0, %0, c15, c12, 1" : : "r"(value)); + else if (ARMV6_COUNTER0 == counter) + asm volatile("mcr p15, 0, %0, c15, c12, 2" : : "r"(value)); + else if (ARMV6_COUNTER1 == counter) + asm volatile("mcr p15, 0, %0, c15, c12, 3" : : "r"(value)); + else + WARN_ONCE(1, "invalid counter number (%d)\n", counter); +} + +static void armv6pmu_enable_event(struct perf_event *event) +{ + unsigned long val, mask, evt, flags; + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + int idx = hwc->idx; + + if (ARMV6_CYCLE_COUNTER == idx) { + mask = 0; + evt = ARMV6_PMCR_CCOUNT_IEN; + } else if (ARMV6_COUNTER0 == idx) { + mask = ARMV6_PMCR_EVT_COUNT0_MASK; + evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT0_SHIFT) | + ARMV6_PMCR_COUNT0_IEN; + } else if (ARMV6_COUNTER1 == idx) { + mask = ARMV6_PMCR_EVT_COUNT1_MASK; + evt = (hwc->config_base << ARMV6_PMCR_EVT_COUNT1_SHIFT) | + ARMV6_PMCR_COUNT1_IEN; + } else { + WARN_ONCE(1, "invalid counter number (%d)\n", idx); + return; + } + + /* + * Mask out the current event and set the counter to count the event + * that we're interested in. + */ + raw_spin_lock_irqsave(&events->pmu_lock, flags); + val = armv6_pmcr_read(); + val &= ~mask; + val |= evt; + armv6_pmcr_write(val); + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); +} + +static irqreturn_t +armv6pmu_handle_irq(int irq_num, + void *dev) +{ + unsigned long pmcr = armv6_pmcr_read(); + struct perf_sample_data data; + struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev; + struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events(); + struct pt_regs *regs; + int idx; + + if (!armv6_pmcr_has_overflowed(pmcr)) + return IRQ_NONE; + + regs = get_irq_regs(); + + /* + * The interrupts are cleared by writing the overflow flags back to + * the control register. All of the other bits don't have any effect + * if they are rewritten, so write the whole value back. + */ + armv6_pmcr_write(pmcr); + + for (idx = 0; idx < cpu_pmu->num_events; ++idx) { + struct perf_event *event = cpuc->events[idx]; + struct hw_perf_event *hwc; + + /* Ignore if we don't have an event. */ + if (!event) + continue; + + /* + * We have a single interrupt for all counters. Check that + * each counter has overflowed before we process it. + */ + if (!armv6_pmcr_counter_has_overflowed(pmcr, idx)) + continue; + + hwc = &event->hw; + armpmu_event_update(event); + perf_sample_data_init(&data, 0, hwc->last_period); + if (!armpmu_event_set_period(event)) + continue; + + if (perf_event_overflow(event, &data, regs)) + cpu_pmu->disable(event); + } + + /* + * Handle the pending perf events. + * + * Note: this call *must* be run with interrupts disabled. For + * platforms that can have the PMU interrupts raised as an NMI, this + * will not work. + */ + irq_work_run(); + + return IRQ_HANDLED; +} + +static void armv6pmu_start(struct arm_pmu *cpu_pmu) +{ + unsigned long flags, val; + struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + + raw_spin_lock_irqsave(&events->pmu_lock, flags); + val = armv6_pmcr_read(); + val |= ARMV6_PMCR_ENABLE; + armv6_pmcr_write(val); + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); +} + +static void armv6pmu_stop(struct arm_pmu *cpu_pmu) +{ + unsigned long flags, val; + struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + + raw_spin_lock_irqsave(&events->pmu_lock, flags); + val = armv6_pmcr_read(); + val &= ~ARMV6_PMCR_ENABLE; + armv6_pmcr_write(val); + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); +} + +static int +armv6pmu_get_event_idx(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + /* Always place a cycle counter into the cycle counter. */ + if (ARMV6_PERFCTR_CPU_CYCLES == hwc->config_base) { + if (test_and_set_bit(ARMV6_CYCLE_COUNTER, cpuc->used_mask)) + return -EAGAIN; + + return ARMV6_CYCLE_COUNTER; + } else { + /* + * For anything other than a cycle counter, try and use + * counter0 and counter1. + */ + if (!test_and_set_bit(ARMV6_COUNTER1, cpuc->used_mask)) + return ARMV6_COUNTER1; + + if (!test_and_set_bit(ARMV6_COUNTER0, cpuc->used_mask)) + return ARMV6_COUNTER0; + + /* The counters are all in use. */ + return -EAGAIN; + } +} + +static void armv6pmu_disable_event(struct perf_event *event) +{ + unsigned long val, mask, evt, flags; + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + int idx = hwc->idx; + + if (ARMV6_CYCLE_COUNTER == idx) { + mask = ARMV6_PMCR_CCOUNT_IEN; + evt = 0; + } else if (ARMV6_COUNTER0 == idx) { + mask = ARMV6_PMCR_COUNT0_IEN | ARMV6_PMCR_EVT_COUNT0_MASK; + evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT0_SHIFT; + } else if (ARMV6_COUNTER1 == idx) { + mask = ARMV6_PMCR_COUNT1_IEN | ARMV6_PMCR_EVT_COUNT1_MASK; + evt = ARMV6_PERFCTR_NOP << ARMV6_PMCR_EVT_COUNT1_SHIFT; + } else { + WARN_ONCE(1, "invalid counter number (%d)\n", idx); + return; + } + + /* + * Mask out the current event and set the counter to count the number + * of ETM bus signal assertion cycles. The external reporting should + * be disabled and so this should never increment. + */ + raw_spin_lock_irqsave(&events->pmu_lock, flags); + val = armv6_pmcr_read(); + val &= ~mask; + val |= evt; + armv6_pmcr_write(val); + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); +} + +static void armv6mpcore_pmu_disable_event(struct perf_event *event) +{ + unsigned long val, mask, flags, evt = 0; + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + int idx = hwc->idx; + + if (ARMV6_CYCLE_COUNTER == idx) { + mask = ARMV6_PMCR_CCOUNT_IEN; + } else if (ARMV6_COUNTER0 == idx) { + mask = ARMV6_PMCR_COUNT0_IEN; + } else if (ARMV6_COUNTER1 == idx) { + mask = ARMV6_PMCR_COUNT1_IEN; + } else { + WARN_ONCE(1, "invalid counter number (%d)\n", idx); + return; + } + + /* + * Unlike UP ARMv6, we don't have a way of stopping the counters. We + * simply disable the interrupt reporting. + */ + raw_spin_lock_irqsave(&events->pmu_lock, flags); + val = armv6_pmcr_read(); + val &= ~mask; + val |= evt; + armv6_pmcr_write(val); + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); +} + +static int armv6_map_event(struct perf_event *event) +{ + return armpmu_map_event(event, &armv6_perf_map, + &armv6_perf_cache_map, 0xFF); +} + +static int armv6pmu_init(struct arm_pmu *cpu_pmu) +{ + cpu_pmu->name = "v6"; + cpu_pmu->handle_irq = armv6pmu_handle_irq; + cpu_pmu->enable = armv6pmu_enable_event; + cpu_pmu->disable = armv6pmu_disable_event; + cpu_pmu->read_counter = armv6pmu_read_counter; + cpu_pmu->write_counter = armv6pmu_write_counter; + cpu_pmu->get_event_idx = armv6pmu_get_event_idx; + cpu_pmu->start = armv6pmu_start; + cpu_pmu->stop = armv6pmu_stop; + cpu_pmu->map_event = armv6_map_event; + cpu_pmu->num_events = 3; + cpu_pmu->max_period = (1LLU << 32) - 1; + + return 0; +} + +/* + * ARMv6mpcore is almost identical to single core ARMv6 with the exception + * that some of the events have different enumerations and that there is no + * *hack* to stop the programmable counters. To stop the counters we simply + * disable the interrupt reporting and update the event. When unthrottling we + * reset the period and enable the interrupt reporting. + */ + +static int armv6mpcore_map_event(struct perf_event *event) +{ + return armpmu_map_event(event, &armv6mpcore_perf_map, + &armv6mpcore_perf_cache_map, 0xFF); +} + +static int armv6mpcore_pmu_init(struct arm_pmu *cpu_pmu) +{ + cpu_pmu->name = "v6mpcore"; + cpu_pmu->handle_irq = armv6pmu_handle_irq; + cpu_pmu->enable = armv6pmu_enable_event; + cpu_pmu->disable = armv6mpcore_pmu_disable_event; + cpu_pmu->read_counter = armv6pmu_read_counter; + cpu_pmu->write_counter = armv6pmu_write_counter; + cpu_pmu->get_event_idx = armv6pmu_get_event_idx; + cpu_pmu->start = armv6pmu_start; + cpu_pmu->stop = armv6pmu_stop; + cpu_pmu->map_event = armv6mpcore_map_event; + cpu_pmu->num_events = 3; + cpu_pmu->max_period = (1LLU << 32) - 1; + + return 0; +} +#else +static int armv6pmu_init(struct arm_pmu *cpu_pmu) +{ + return -ENODEV; +} + +static int armv6mpcore_pmu_init(struct arm_pmu *cpu_pmu) +{ + return -ENODEV; +} +#endif /* CONFIG_CPU_V6 || CONFIG_CPU_V6K */ diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c new file mode 100644 index 00000000000..1d37568c547 --- /dev/null +++ b/arch/arm/kernel/perf_event_v7.c @@ -0,0 +1,2040 @@ +/* + * ARMv7 Cortex-A8 and Cortex-A9 Performance Events handling code. + * + * ARMv7 support: Jean Pihet <jpihet@mvista.com> + * 2010 (c) MontaVista Software, LLC. + * + * Copied from ARMv6 code, with the low level code inspired + * by the ARMv7 Oprofile code. + * + * Cortex-A8 has up to 4 configurable performance counters and + * a single cycle counter. + * Cortex-A9 has up to 31 configurable performance counters and + * a single cycle counter. + * + * All counters can be enabled/disabled and IRQ masked separately. The cycle + * counter and all 4 performance counters together can be reset separately. + */ + +#ifdef CONFIG_CPU_V7 + +#include <asm/cp15.h> +#include <asm/vfp.h> +#include "../vfp/vfpinstr.h" + +/* + * Common ARMv7 event types + * + * Note: An implementation may not be able to count all of these events + * but the encodings are considered to be `reserved' in the case that + * they are not available. + */ +enum armv7_perf_types { + ARMV7_PERFCTR_PMNC_SW_INCR = 0x00, + ARMV7_PERFCTR_L1_ICACHE_REFILL = 0x01, + ARMV7_PERFCTR_ITLB_REFILL = 0x02, + ARMV7_PERFCTR_L1_DCACHE_REFILL = 0x03, + ARMV7_PERFCTR_L1_DCACHE_ACCESS = 0x04, + ARMV7_PERFCTR_DTLB_REFILL = 0x05, + ARMV7_PERFCTR_MEM_READ = 0x06, + ARMV7_PERFCTR_MEM_WRITE = 0x07, + ARMV7_PERFCTR_INSTR_EXECUTED = 0x08, + ARMV7_PERFCTR_EXC_TAKEN = 0x09, + ARMV7_PERFCTR_EXC_EXECUTED = 0x0A, + ARMV7_PERFCTR_CID_WRITE = 0x0B, + + /* + * ARMV7_PERFCTR_PC_WRITE is equivalent to HW_BRANCH_INSTRUCTIONS. + * It counts: + * - all (taken) branch instructions, + * - instructions that explicitly write the PC, + * - exception generating instructions. + */ + ARMV7_PERFCTR_PC_WRITE = 0x0C, + ARMV7_PERFCTR_PC_IMM_BRANCH = 0x0D, + ARMV7_PERFCTR_PC_PROC_RETURN = 0x0E, + ARMV7_PERFCTR_MEM_UNALIGNED_ACCESS = 0x0F, + ARMV7_PERFCTR_PC_BRANCH_MIS_PRED = 0x10, + ARMV7_PERFCTR_CLOCK_CYCLES = 0x11, + ARMV7_PERFCTR_PC_BRANCH_PRED = 0x12, + + /* These events are defined by the PMUv2 supplement (ARM DDI 0457A). */ + ARMV7_PERFCTR_MEM_ACCESS = 0x13, + ARMV7_PERFCTR_L1_ICACHE_ACCESS = 0x14, + ARMV7_PERFCTR_L1_DCACHE_WB = 0x15, + ARMV7_PERFCTR_L2_CACHE_ACCESS = 0x16, + ARMV7_PERFCTR_L2_CACHE_REFILL = 0x17, + ARMV7_PERFCTR_L2_CACHE_WB = 0x18, + ARMV7_PERFCTR_BUS_ACCESS = 0x19, + ARMV7_PERFCTR_MEM_ERROR = 0x1A, + ARMV7_PERFCTR_INSTR_SPEC = 0x1B, + ARMV7_PERFCTR_TTBR_WRITE = 0x1C, + ARMV7_PERFCTR_BUS_CYCLES = 0x1D, + + ARMV7_PERFCTR_CPU_CYCLES = 0xFF +}; + +/* ARMv7 Cortex-A8 specific event types */ +enum armv7_a8_perf_types { + ARMV7_A8_PERFCTR_L2_CACHE_ACCESS = 0x43, + ARMV7_A8_PERFCTR_L2_CACHE_REFILL = 0x44, + ARMV7_A8_PERFCTR_L1_ICACHE_ACCESS = 0x50, + ARMV7_A8_PERFCTR_STALL_ISIDE = 0x56, +}; + +/* ARMv7 Cortex-A9 specific event types */ +enum armv7_a9_perf_types { + ARMV7_A9_PERFCTR_INSTR_CORE_RENAME = 0x68, + ARMV7_A9_PERFCTR_STALL_ICACHE = 0x60, + ARMV7_A9_PERFCTR_STALL_DISPATCH = 0x66, +}; + +/* ARMv7 Cortex-A5 specific event types */ +enum armv7_a5_perf_types { + ARMV7_A5_PERFCTR_PREFETCH_LINEFILL = 0xc2, + ARMV7_A5_PERFCTR_PREFETCH_LINEFILL_DROP = 0xc3, +}; + +/* ARMv7 Cortex-A15 specific event types */ +enum armv7_a15_perf_types { + ARMV7_A15_PERFCTR_L1_DCACHE_ACCESS_READ = 0x40, + ARMV7_A15_PERFCTR_L1_DCACHE_ACCESS_WRITE = 0x41, + ARMV7_A15_PERFCTR_L1_DCACHE_REFILL_READ = 0x42, + ARMV7_A15_PERFCTR_L1_DCACHE_REFILL_WRITE = 0x43, + + ARMV7_A15_PERFCTR_DTLB_REFILL_L1_READ = 0x4C, + ARMV7_A15_PERFCTR_DTLB_REFILL_L1_WRITE = 0x4D, + + ARMV7_A15_PERFCTR_L2_CACHE_ACCESS_READ = 0x50, + ARMV7_A15_PERFCTR_L2_CACHE_ACCESS_WRITE = 0x51, + ARMV7_A15_PERFCTR_L2_CACHE_REFILL_READ = 0x52, + ARMV7_A15_PERFCTR_L2_CACHE_REFILL_WRITE = 0x53, + + ARMV7_A15_PERFCTR_PC_WRITE_SPEC = 0x76, +}; + +/* ARMv7 Cortex-A12 specific event types */ +enum armv7_a12_perf_types { + ARMV7_A12_PERFCTR_L1_DCACHE_ACCESS_READ = 0x40, + ARMV7_A12_PERFCTR_L1_DCACHE_ACCESS_WRITE = 0x41, + + ARMV7_A12_PERFCTR_L2_CACHE_ACCESS_READ = 0x50, + ARMV7_A12_PERFCTR_L2_CACHE_ACCESS_WRITE = 0x51, + + ARMV7_A12_PERFCTR_PC_WRITE_SPEC = 0x76, + + ARMV7_A12_PERFCTR_PF_TLB_REFILL = 0xe7, +}; + +/* ARMv7 Krait specific event types */ +enum krait_perf_types { + KRAIT_PMRESR0_GROUP0 = 0xcc, + KRAIT_PMRESR1_GROUP0 = 0xd0, + KRAIT_PMRESR2_GROUP0 = 0xd4, + KRAIT_VPMRESR0_GROUP0 = 0xd8, + + KRAIT_PERFCTR_L1_ICACHE_ACCESS = 0x10011, + KRAIT_PERFCTR_L1_ICACHE_MISS = 0x10010, + + KRAIT_PERFCTR_L1_ITLB_ACCESS = 0x12222, + KRAIT_PERFCTR_L1_DTLB_ACCESS = 0x12210, +}; + +/* + * Cortex-A8 HW events mapping + * + * The hardware events that we support. We do support cache operations but + * we have harvard caches and no way to combine instruction and data + * accesses/misses in hardware. + */ +static const unsigned armv7_a8_perf_map[PERF_COUNT_HW_MAX] = { + [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED, + [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV7_A8_PERFCTR_STALL_ISIDE, + [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = HW_OP_UNSUPPORTED, +}; + +static const unsigned armv7_a8_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + [C(L1D)] = { + /* + * The performance counters don't differentiate between read + * and write accesses/misses so this isn't strictly correct, + * but it's the best we can do. Writes and reads get + * combined. + */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV7_A8_PERFCTR_L1_ICACHE_ACCESS, + [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV7_A8_PERFCTR_L2_CACHE_ACCESS, + [C(RESULT_MISS)] = ARMV7_A8_PERFCTR_L2_CACHE_REFILL, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV7_A8_PERFCTR_L2_CACHE_ACCESS, + [C(RESULT_MISS)] = ARMV7_A8_PERFCTR_L2_CACHE_REFILL, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(DTLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(NODE)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, +}; + +/* + * Cortex-A9 HW events mapping + */ +static const unsigned armv7_a9_perf_map[PERF_COUNT_HW_MAX] = { + [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_A9_PERFCTR_INSTR_CORE_RENAME, + [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV7_A9_PERFCTR_STALL_ICACHE, + [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = ARMV7_A9_PERFCTR_STALL_DISPATCH, +}; + +static const unsigned armv7_a9_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + [C(L1D)] = { + /* + * The performance counters don't differentiate between read + * and write accesses/misses so this isn't strictly correct, + * but it's the best we can do. Writes and reads get + * combined. + */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(DTLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(NODE)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, +}; + +/* + * Cortex-A5 HW events mapping + */ +static const unsigned armv7_a5_perf_map[PERF_COUNT_HW_MAX] = { + [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED, + [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = HW_OP_UNSUPPORTED, +}; + +static const unsigned armv7_a5_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + [C(L1D)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = ARMV7_A5_PERFCTR_PREFETCH_LINEFILL, + [C(RESULT_MISS)] = ARMV7_A5_PERFCTR_PREFETCH_LINEFILL_DROP, + }, + }, + [C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_ICACHE_ACCESS, + [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + /* + * The prefetch counters don't differentiate between the I + * side and the D side. + */ + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = ARMV7_A5_PERFCTR_PREFETCH_LINEFILL, + [C(RESULT_MISS)] = ARMV7_A5_PERFCTR_PREFETCH_LINEFILL_DROP, + }, + }, + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(DTLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(NODE)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, +}; + +/* + * Cortex-A15 HW events mapping + */ +static const unsigned armv7_a15_perf_map[PERF_COUNT_HW_MAX] = { + [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED, + [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_A15_PERFCTR_PC_WRITE_SPEC, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_BUS_CYCLES, + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = HW_OP_UNSUPPORTED, +}; + +static const unsigned armv7_a15_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + [C(L1D)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV7_A15_PERFCTR_L1_DCACHE_ACCESS_READ, + [C(RESULT_MISS)] = ARMV7_A15_PERFCTR_L1_DCACHE_REFILL_READ, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV7_A15_PERFCTR_L1_DCACHE_ACCESS_WRITE, + [C(RESULT_MISS)] = ARMV7_A15_PERFCTR_L1_DCACHE_REFILL_WRITE, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(L1I)] = { + /* + * Not all performance counters differentiate between read + * and write accesses/misses so we're not always strictly + * correct, but it's the best we can do. Writes and reads get + * combined in these cases. + */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_ICACHE_ACCESS, + [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV7_A15_PERFCTR_L2_CACHE_ACCESS_READ, + [C(RESULT_MISS)] = ARMV7_A15_PERFCTR_L2_CACHE_REFILL_READ, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV7_A15_PERFCTR_L2_CACHE_ACCESS_WRITE, + [C(RESULT_MISS)] = ARMV7_A15_PERFCTR_L2_CACHE_REFILL_WRITE, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(DTLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_A15_PERFCTR_DTLB_REFILL_L1_READ, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_A15_PERFCTR_DTLB_REFILL_L1_WRITE, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(NODE)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, +}; + +/* + * Cortex-A7 HW events mapping + */ +static const unsigned armv7_a7_perf_map[PERF_COUNT_HW_MAX] = { + [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED, + [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_BUS_CYCLES, + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = HW_OP_UNSUPPORTED, +}; + +static const unsigned armv7_a7_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + [C(L1D)] = { + /* + * The performance counters don't differentiate between read + * and write accesses/misses so this isn't strictly correct, + * but it's the best we can do. Writes and reads get + * combined. + */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_ICACHE_ACCESS, + [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_CACHE_ACCESS, + [C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACHE_REFILL, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L2_CACHE_ACCESS, + [C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACHE_REFILL, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(DTLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(NODE)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, +}; + +/* + * Cortex-A12 HW events mapping + */ +static const unsigned armv7_a12_perf_map[PERF_COUNT_HW_MAX] = { + [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED, + [PERF_COUNT_HW_CACHE_REFERENCES] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [PERF_COUNT_HW_CACHE_MISSES] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_A12_PERFCTR_PC_WRITE_SPEC, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_BUS_CYCLES, + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = HW_OP_UNSUPPORTED, +}; + +static const unsigned armv7_a12_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + [C(L1D)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV7_A12_PERFCTR_L1_DCACHE_ACCESS_READ, + [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV7_A12_PERFCTR_L1_DCACHE_ACCESS_WRITE, + [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(L1I)] = { + /* + * Not all performance counters differentiate between read + * and write accesses/misses so we're not always strictly + * correct, but it's the best we can do. Writes and reads get + * combined in these cases. + */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_ICACHE_ACCESS, + [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_ICACHE_REFILL, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV7_A12_PERFCTR_L2_CACHE_ACCESS_READ, + [C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACHE_REFILL, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV7_A12_PERFCTR_L2_CACHE_ACCESS_WRITE, + [C(RESULT_MISS)] = ARMV7_PERFCTR_L2_CACHE_REFILL, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(DTLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_DTLB_REFILL, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_A12_PERFCTR_PF_TLB_REFILL, + }, + }, + [C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_ITLB_REFILL, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(NODE)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, +}; + +/* + * Krait HW events mapping + */ +static const unsigned krait_perf_map[PERF_COUNT_HW_MAX] = { + [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED, + [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV7_PERFCTR_PC_WRITE, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES, +}; + +static const unsigned krait_perf_map_no_branch[PERF_COUNT_HW_MAX] = { + [PERF_COUNT_HW_CPU_CYCLES] = ARMV7_PERFCTR_CPU_CYCLES, + [PERF_COUNT_HW_INSTRUCTIONS] = ARMV7_PERFCTR_INSTR_EXECUTED, + [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_BRANCH_MISSES] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + [PERF_COUNT_HW_BUS_CYCLES] = ARMV7_PERFCTR_CLOCK_CYCLES, +}; + +static const unsigned krait_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + [C(L1D)] = { + /* + * The performance counters don't differentiate between read + * and write accesses/misses so this isn't strictly correct, + * but it's the best we can do. Writes and reads get + * combined. + */ + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_L1_DCACHE_ACCESS, + [C(RESULT_MISS)] = ARMV7_PERFCTR_L1_DCACHE_REFILL, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_ICACHE_ACCESS, + [C(RESULT_MISS)] = KRAIT_PERFCTR_L1_ICACHE_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(DTLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_DTLB_ACCESS, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_DTLB_ACCESS, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_ITLB_ACCESS, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = KRAIT_PERFCTR_L1_ITLB_ACCESS, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = ARMV7_PERFCTR_PC_BRANCH_PRED, + [C(RESULT_MISS)] = ARMV7_PERFCTR_PC_BRANCH_MIS_PRED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(NODE)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, +}; + +/* + * Perf Events' indices + */ +#define ARMV7_IDX_CYCLE_COUNTER 0 +#define ARMV7_IDX_COUNTER0 1 +#define ARMV7_IDX_COUNTER_LAST(cpu_pmu) \ + (ARMV7_IDX_CYCLE_COUNTER + cpu_pmu->num_events - 1) + +#define ARMV7_MAX_COUNTERS 32 +#define ARMV7_COUNTER_MASK (ARMV7_MAX_COUNTERS - 1) + +/* + * ARMv7 low level PMNC access + */ + +/* + * Perf Event to low level counters mapping + */ +#define ARMV7_IDX_TO_COUNTER(x) \ + (((x) - ARMV7_IDX_COUNTER0) & ARMV7_COUNTER_MASK) + +/* + * Per-CPU PMNC: config reg + */ +#define ARMV7_PMNC_E (1 << 0) /* Enable all counters */ +#define ARMV7_PMNC_P (1 << 1) /* Reset all counters */ +#define ARMV7_PMNC_C (1 << 2) /* Cycle counter reset */ +#define ARMV7_PMNC_D (1 << 3) /* CCNT counts every 64th cpu cycle */ +#define ARMV7_PMNC_X (1 << 4) /* Export to ETM */ +#define ARMV7_PMNC_DP (1 << 5) /* Disable CCNT if non-invasive debug*/ +#define ARMV7_PMNC_N_SHIFT 11 /* Number of counters supported */ +#define ARMV7_PMNC_N_MASK 0x1f +#define ARMV7_PMNC_MASK 0x3f /* Mask for writable bits */ + +/* + * FLAG: counters overflow flag status reg + */ +#define ARMV7_FLAG_MASK 0xffffffff /* Mask for writable bits */ +#define ARMV7_OVERFLOWED_MASK ARMV7_FLAG_MASK + +/* + * PMXEVTYPER: Event selection reg + */ +#define ARMV7_EVTYPE_MASK 0xc80000ff /* Mask for writable bits */ +#define ARMV7_EVTYPE_EVENT 0xff /* Mask for EVENT bits */ + +/* + * Event filters for PMUv2 + */ +#define ARMV7_EXCLUDE_PL1 (1 << 31) +#define ARMV7_EXCLUDE_USER (1 << 30) +#define ARMV7_INCLUDE_HYP (1 << 27) + +static inline u32 armv7_pmnc_read(void) +{ + u32 val; + asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(val)); + return val; +} + +static inline void armv7_pmnc_write(u32 val) +{ + val &= ARMV7_PMNC_MASK; + isb(); + asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(val)); +} + +static inline int armv7_pmnc_has_overflowed(u32 pmnc) +{ + return pmnc & ARMV7_OVERFLOWED_MASK; +} + +static inline int armv7_pmnc_counter_valid(struct arm_pmu *cpu_pmu, int idx) +{ + return idx >= ARMV7_IDX_CYCLE_COUNTER && + idx <= ARMV7_IDX_COUNTER_LAST(cpu_pmu); +} + +static inline int armv7_pmnc_counter_has_overflowed(u32 pmnc, int idx) +{ + return pmnc & BIT(ARMV7_IDX_TO_COUNTER(idx)); +} + +static inline int armv7_pmnc_select_counter(int idx) +{ + u32 counter = ARMV7_IDX_TO_COUNTER(idx); + asm volatile("mcr p15, 0, %0, c9, c12, 5" : : "r" (counter)); + isb(); + + return idx; +} + +static inline u32 armv7pmu_read_counter(struct perf_event *event) +{ + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + u32 value = 0; + + if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) + pr_err("CPU%u reading wrong counter %d\n", + smp_processor_id(), idx); + else if (idx == ARMV7_IDX_CYCLE_COUNTER) + asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (value)); + else if (armv7_pmnc_select_counter(idx) == idx) + asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (value)); + + return value; +} + +static inline void armv7pmu_write_counter(struct perf_event *event, u32 value) +{ + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + + if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) + pr_err("CPU%u writing wrong counter %d\n", + smp_processor_id(), idx); + else if (idx == ARMV7_IDX_CYCLE_COUNTER) + asm volatile("mcr p15, 0, %0, c9, c13, 0" : : "r" (value)); + else if (armv7_pmnc_select_counter(idx) == idx) + asm volatile("mcr p15, 0, %0, c9, c13, 2" : : "r" (value)); +} + +static inline void armv7_pmnc_write_evtsel(int idx, u32 val) +{ + if (armv7_pmnc_select_counter(idx) == idx) { + val &= ARMV7_EVTYPE_MASK; + asm volatile("mcr p15, 0, %0, c9, c13, 1" : : "r" (val)); + } +} + +static inline int armv7_pmnc_enable_counter(int idx) +{ + u32 counter = ARMV7_IDX_TO_COUNTER(idx); + asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r" (BIT(counter))); + return idx; +} + +static inline int armv7_pmnc_disable_counter(int idx) +{ + u32 counter = ARMV7_IDX_TO_COUNTER(idx); + asm volatile("mcr p15, 0, %0, c9, c12, 2" : : "r" (BIT(counter))); + return idx; +} + +static inline int armv7_pmnc_enable_intens(int idx) +{ + u32 counter = ARMV7_IDX_TO_COUNTER(idx); + asm volatile("mcr p15, 0, %0, c9, c14, 1" : : "r" (BIT(counter))); + return idx; +} + +static inline int armv7_pmnc_disable_intens(int idx) +{ + u32 counter = ARMV7_IDX_TO_COUNTER(idx); + asm volatile("mcr p15, 0, %0, c9, c14, 2" : : "r" (BIT(counter))); + isb(); + /* Clear the overflow flag in case an interrupt is pending. */ + asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (BIT(counter))); + isb(); + + return idx; +} + +static inline u32 armv7_pmnc_getreset_flags(void) +{ + u32 val; + + /* Read */ + asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val)); + + /* Write to clear flags */ + val &= ARMV7_FLAG_MASK; + asm volatile("mcr p15, 0, %0, c9, c12, 3" : : "r" (val)); + + return val; +} + +#ifdef DEBUG +static void armv7_pmnc_dump_regs(struct arm_pmu *cpu_pmu) +{ + u32 val; + unsigned int cnt; + + printk(KERN_INFO "PMNC registers dump:\n"); + + asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r" (val)); + printk(KERN_INFO "PMNC =0x%08x\n", val); + + asm volatile("mrc p15, 0, %0, c9, c12, 1" : "=r" (val)); + printk(KERN_INFO "CNTENS=0x%08x\n", val); + + asm volatile("mrc p15, 0, %0, c9, c14, 1" : "=r" (val)); + printk(KERN_INFO "INTENS=0x%08x\n", val); + + asm volatile("mrc p15, 0, %0, c9, c12, 3" : "=r" (val)); + printk(KERN_INFO "FLAGS =0x%08x\n", val); + + asm volatile("mrc p15, 0, %0, c9, c12, 5" : "=r" (val)); + printk(KERN_INFO "SELECT=0x%08x\n", val); + + asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r" (val)); + printk(KERN_INFO "CCNT =0x%08x\n", val); + + for (cnt = ARMV7_IDX_COUNTER0; + cnt <= ARMV7_IDX_COUNTER_LAST(cpu_pmu); cnt++) { + armv7_pmnc_select_counter(cnt); + asm volatile("mrc p15, 0, %0, c9, c13, 2" : "=r" (val)); + printk(KERN_INFO "CNT[%d] count =0x%08x\n", + ARMV7_IDX_TO_COUNTER(cnt), val); + asm volatile("mrc p15, 0, %0, c9, c13, 1" : "=r" (val)); + printk(KERN_INFO "CNT[%d] evtsel=0x%08x\n", + ARMV7_IDX_TO_COUNTER(cnt), val); + } +} +#endif + +static void armv7pmu_enable_event(struct perf_event *event) +{ + unsigned long flags; + struct hw_perf_event *hwc = &event->hw; + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + int idx = hwc->idx; + + if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) { + pr_err("CPU%u enabling wrong PMNC counter IRQ enable %d\n", + smp_processor_id(), idx); + return; + } + + /* + * Enable counter and interrupt, and set the counter to count + * the event that we're interested in. + */ + raw_spin_lock_irqsave(&events->pmu_lock, flags); + + /* + * Disable counter + */ + armv7_pmnc_disable_counter(idx); + + /* + * Set event (if destined for PMNx counters) + * We only need to set the event for the cycle counter if we + * have the ability to perform event filtering. + */ + if (cpu_pmu->set_event_filter || idx != ARMV7_IDX_CYCLE_COUNTER) + armv7_pmnc_write_evtsel(idx, hwc->config_base); + + /* + * Enable interrupt for this counter + */ + armv7_pmnc_enable_intens(idx); + + /* + * Enable counter + */ + armv7_pmnc_enable_counter(idx); + + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); +} + +static void armv7pmu_disable_event(struct perf_event *event) +{ + unsigned long flags; + struct hw_perf_event *hwc = &event->hw; + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + int idx = hwc->idx; + + if (!armv7_pmnc_counter_valid(cpu_pmu, idx)) { + pr_err("CPU%u disabling wrong PMNC counter IRQ enable %d\n", + smp_processor_id(), idx); + return; + } + + /* + * Disable counter and interrupt + */ + raw_spin_lock_irqsave(&events->pmu_lock, flags); + + /* + * Disable counter + */ + armv7_pmnc_disable_counter(idx); + + /* + * Disable interrupt for this counter + */ + armv7_pmnc_disable_intens(idx); + + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); +} + +static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev) +{ + u32 pmnc; + struct perf_sample_data data; + struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev; + struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events(); + struct pt_regs *regs; + int idx; + + /* + * Get and reset the IRQ flags + */ + pmnc = armv7_pmnc_getreset_flags(); + + /* + * Did an overflow occur? + */ + if (!armv7_pmnc_has_overflowed(pmnc)) + return IRQ_NONE; + + /* + * Handle the counter(s) overflow(s) + */ + regs = get_irq_regs(); + + for (idx = 0; idx < cpu_pmu->num_events; ++idx) { + struct perf_event *event = cpuc->events[idx]; + struct hw_perf_event *hwc; + + /* Ignore if we don't have an event. */ + if (!event) + continue; + + /* + * We have a single interrupt for all counters. Check that + * each counter has overflowed before we process it. + */ + if (!armv7_pmnc_counter_has_overflowed(pmnc, idx)) + continue; + + hwc = &event->hw; + armpmu_event_update(event); + perf_sample_data_init(&data, 0, hwc->last_period); + if (!armpmu_event_set_period(event)) + continue; + + if (perf_event_overflow(event, &data, regs)) + cpu_pmu->disable(event); + } + + /* + * Handle the pending perf events. + * + * Note: this call *must* be run with interrupts disabled. For + * platforms that can have the PMU interrupts raised as an NMI, this + * will not work. + */ + irq_work_run(); + + return IRQ_HANDLED; +} + +static void armv7pmu_start(struct arm_pmu *cpu_pmu) +{ + unsigned long flags; + struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + + raw_spin_lock_irqsave(&events->pmu_lock, flags); + /* Enable all counters */ + armv7_pmnc_write(armv7_pmnc_read() | ARMV7_PMNC_E); + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); +} + +static void armv7pmu_stop(struct arm_pmu *cpu_pmu) +{ + unsigned long flags; + struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + + raw_spin_lock_irqsave(&events->pmu_lock, flags); + /* Disable all counters */ + armv7_pmnc_write(armv7_pmnc_read() & ~ARMV7_PMNC_E); + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); +} + +static int armv7pmu_get_event_idx(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + int idx; + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + unsigned long evtype = hwc->config_base & ARMV7_EVTYPE_EVENT; + + /* Always place a cycle counter into the cycle counter. */ + if (evtype == ARMV7_PERFCTR_CPU_CYCLES) { + if (test_and_set_bit(ARMV7_IDX_CYCLE_COUNTER, cpuc->used_mask)) + return -EAGAIN; + + return ARMV7_IDX_CYCLE_COUNTER; + } + + /* + * For anything other than a cycle counter, try and use + * the events counters + */ + for (idx = ARMV7_IDX_COUNTER0; idx < cpu_pmu->num_events; ++idx) { + if (!test_and_set_bit(idx, cpuc->used_mask)) + return idx; + } + + /* The counters are all in use. */ + return -EAGAIN; +} + +/* + * Add an event filter to a given event. This will only work for PMUv2 PMUs. + */ +static int armv7pmu_set_event_filter(struct hw_perf_event *event, + struct perf_event_attr *attr) +{ + unsigned long config_base = 0; + + if (attr->exclude_idle) + return -EPERM; + if (attr->exclude_user) + config_base |= ARMV7_EXCLUDE_USER; + if (attr->exclude_kernel) + config_base |= ARMV7_EXCLUDE_PL1; + if (!attr->exclude_hv) + config_base |= ARMV7_INCLUDE_HYP; + + /* + * Install the filter into config_base as this is used to + * construct the event type. + */ + event->config_base = config_base; + + return 0; +} + +static void armv7pmu_reset(void *info) +{ + struct arm_pmu *cpu_pmu = (struct arm_pmu *)info; + u32 idx, nb_cnt = cpu_pmu->num_events; + + /* The counter and interrupt enable registers are unknown at reset. */ + for (idx = ARMV7_IDX_CYCLE_COUNTER; idx < nb_cnt; ++idx) { + armv7_pmnc_disable_counter(idx); + armv7_pmnc_disable_intens(idx); + } + + /* Initialize & Reset PMNC: C and P bits */ + armv7_pmnc_write(ARMV7_PMNC_P | ARMV7_PMNC_C); +} + +static int armv7_a8_map_event(struct perf_event *event) +{ + return armpmu_map_event(event, &armv7_a8_perf_map, + &armv7_a8_perf_cache_map, 0xFF); +} + +static int armv7_a9_map_event(struct perf_event *event) +{ + return armpmu_map_event(event, &armv7_a9_perf_map, + &armv7_a9_perf_cache_map, 0xFF); +} + +static int armv7_a5_map_event(struct perf_event *event) +{ + return armpmu_map_event(event, &armv7_a5_perf_map, + &armv7_a5_perf_cache_map, 0xFF); +} + +static int armv7_a15_map_event(struct perf_event *event) +{ + return armpmu_map_event(event, &armv7_a15_perf_map, + &armv7_a15_perf_cache_map, 0xFF); +} + +static int armv7_a7_map_event(struct perf_event *event) +{ + return armpmu_map_event(event, &armv7_a7_perf_map, + &armv7_a7_perf_cache_map, 0xFF); +} + +static int armv7_a12_map_event(struct perf_event *event) +{ + return armpmu_map_event(event, &armv7_a12_perf_map, + &armv7_a12_perf_cache_map, 0xFF); +} + +static int krait_map_event(struct perf_event *event) +{ + return armpmu_map_event(event, &krait_perf_map, + &krait_perf_cache_map, 0xFFFFF); +} + +static int krait_map_event_no_branch(struct perf_event *event) +{ + return armpmu_map_event(event, &krait_perf_map_no_branch, + &krait_perf_cache_map, 0xFFFFF); +} + +static void armv7pmu_init(struct arm_pmu *cpu_pmu) +{ + cpu_pmu->handle_irq = armv7pmu_handle_irq; + cpu_pmu->enable = armv7pmu_enable_event; + cpu_pmu->disable = armv7pmu_disable_event; + cpu_pmu->read_counter = armv7pmu_read_counter; + cpu_pmu->write_counter = armv7pmu_write_counter; + cpu_pmu->get_event_idx = armv7pmu_get_event_idx; + cpu_pmu->start = armv7pmu_start; + cpu_pmu->stop = armv7pmu_stop; + cpu_pmu->reset = armv7pmu_reset; + cpu_pmu->max_period = (1LLU << 32) - 1; +}; + +static u32 armv7_read_num_pmnc_events(void) +{ + u32 nb_cnt; + + /* Read the nb of CNTx counters supported from PMNC */ + nb_cnt = (armv7_pmnc_read() >> ARMV7_PMNC_N_SHIFT) & ARMV7_PMNC_N_MASK; + + /* Add the CPU cycles counter and return */ + return nb_cnt + 1; +} + +static int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu) +{ + armv7pmu_init(cpu_pmu); + cpu_pmu->name = "ARMv7 Cortex-A8"; + cpu_pmu->map_event = armv7_a8_map_event; + cpu_pmu->num_events = armv7_read_num_pmnc_events(); + return 0; +} + +static int armv7_a9_pmu_init(struct arm_pmu *cpu_pmu) +{ + armv7pmu_init(cpu_pmu); + cpu_pmu->name = "ARMv7 Cortex-A9"; + cpu_pmu->map_event = armv7_a9_map_event; + cpu_pmu->num_events = armv7_read_num_pmnc_events(); + return 0; +} + +static int armv7_a5_pmu_init(struct arm_pmu *cpu_pmu) +{ + armv7pmu_init(cpu_pmu); + cpu_pmu->name = "ARMv7 Cortex-A5"; + cpu_pmu->map_event = armv7_a5_map_event; + cpu_pmu->num_events = armv7_read_num_pmnc_events(); + return 0; +} + +static int armv7_a15_pmu_init(struct arm_pmu *cpu_pmu) +{ + armv7pmu_init(cpu_pmu); + cpu_pmu->name = "ARMv7 Cortex-A15"; + cpu_pmu->map_event = armv7_a15_map_event; + cpu_pmu->num_events = armv7_read_num_pmnc_events(); + cpu_pmu->set_event_filter = armv7pmu_set_event_filter; + return 0; +} + +static int armv7_a7_pmu_init(struct arm_pmu *cpu_pmu) +{ + armv7pmu_init(cpu_pmu); + cpu_pmu->name = "ARMv7 Cortex-A7"; + cpu_pmu->map_event = armv7_a7_map_event; + cpu_pmu->num_events = armv7_read_num_pmnc_events(); + cpu_pmu->set_event_filter = armv7pmu_set_event_filter; + return 0; +} + +static int armv7_a12_pmu_init(struct arm_pmu *cpu_pmu) +{ + armv7pmu_init(cpu_pmu); + cpu_pmu->name = "ARMv7 Cortex-A12"; + cpu_pmu->map_event = armv7_a12_map_event; + cpu_pmu->num_events = armv7_read_num_pmnc_events(); + cpu_pmu->set_event_filter = armv7pmu_set_event_filter; + return 0; +} + +static int armv7_a17_pmu_init(struct arm_pmu *cpu_pmu) +{ + armv7_a12_pmu_init(cpu_pmu); + cpu_pmu->name = "ARMv7 Cortex-A17"; + return 0; +} + +/* + * Krait Performance Monitor Region Event Selection Register (PMRESRn) + * + * 31 30 24 16 8 0 + * +--------------------------------+ + * PMRESR0 | EN | CC | CC | CC | CC | N = 1, R = 0 + * +--------------------------------+ + * PMRESR1 | EN | CC | CC | CC | CC | N = 1, R = 1 + * +--------------------------------+ + * PMRESR2 | EN | CC | CC | CC | CC | N = 1, R = 2 + * +--------------------------------+ + * VPMRESR0 | EN | CC | CC | CC | CC | N = 2, R = ? + * +--------------------------------+ + * EN | G=3 | G=2 | G=1 | G=0 + * + * Event Encoding: + * + * hwc->config_base = 0xNRCCG + * + * N = prefix, 1 for Krait CPU (PMRESRn), 2 for Venum VFP (VPMRESR) + * R = region register + * CC = class of events the group G is choosing from + * G = group or particular event + * + * Example: 0x12021 is a Krait CPU event in PMRESR2's group 1 with code 2 + * + * A region (R) corresponds to a piece of the CPU (execution unit, instruction + * unit, etc.) while the event code (CC) corresponds to a particular class of + * events (interrupts for example). An event code is broken down into + * groups (G) that can be mapped into the PMU (irq, fiqs, and irq+fiqs for + * example). + */ + +#define KRAIT_EVENT (1 << 16) +#define VENUM_EVENT (2 << 16) +#define KRAIT_EVENT_MASK (KRAIT_EVENT | VENUM_EVENT) +#define PMRESRn_EN BIT(31) + +static u32 krait_read_pmresrn(int n) +{ + u32 val; + + switch (n) { + case 0: + asm volatile("mrc p15, 1, %0, c9, c15, 0" : "=r" (val)); + break; + case 1: + asm volatile("mrc p15, 1, %0, c9, c15, 1" : "=r" (val)); + break; + case 2: + asm volatile("mrc p15, 1, %0, c9, c15, 2" : "=r" (val)); + break; + default: + BUG(); /* Should be validated in krait_pmu_get_event_idx() */ + } + + return val; +} + +static void krait_write_pmresrn(int n, u32 val) +{ + switch (n) { + case 0: + asm volatile("mcr p15, 1, %0, c9, c15, 0" : : "r" (val)); + break; + case 1: + asm volatile("mcr p15, 1, %0, c9, c15, 1" : : "r" (val)); + break; + case 2: + asm volatile("mcr p15, 1, %0, c9, c15, 2" : : "r" (val)); + break; + default: + BUG(); /* Should be validated in krait_pmu_get_event_idx() */ + } +} + +static u32 krait_read_vpmresr0(void) +{ + u32 val; + asm volatile("mrc p10, 7, %0, c11, c0, 0" : "=r" (val)); + return val; +} + +static void krait_write_vpmresr0(u32 val) +{ + asm volatile("mcr p10, 7, %0, c11, c0, 0" : : "r" (val)); +} + +static void krait_pre_vpmresr0(u32 *venum_orig_val, u32 *fp_orig_val) +{ + u32 venum_new_val; + u32 fp_new_val; + + BUG_ON(preemptible()); + /* CPACR Enable CP10 and CP11 access */ + *venum_orig_val = get_copro_access(); + venum_new_val = *venum_orig_val | CPACC_SVC(10) | CPACC_SVC(11); + set_copro_access(venum_new_val); + + /* Enable FPEXC */ + *fp_orig_val = fmrx(FPEXC); + fp_new_val = *fp_orig_val | FPEXC_EN; + fmxr(FPEXC, fp_new_val); +} + +static void krait_post_vpmresr0(u32 venum_orig_val, u32 fp_orig_val) +{ + BUG_ON(preemptible()); + /* Restore FPEXC */ + fmxr(FPEXC, fp_orig_val); + isb(); + /* Restore CPACR */ + set_copro_access(venum_orig_val); +} + +static u32 krait_get_pmresrn_event(unsigned int region) +{ + static const u32 pmresrn_table[] = { KRAIT_PMRESR0_GROUP0, + KRAIT_PMRESR1_GROUP0, + KRAIT_PMRESR2_GROUP0 }; + return pmresrn_table[region]; +} + +static void krait_evt_setup(int idx, u32 config_base) +{ + u32 val; + u32 mask; + u32 vval, fval; + unsigned int region; + unsigned int group; + unsigned int code; + unsigned int group_shift; + bool venum_event; + + venum_event = !!(config_base & VENUM_EVENT); + region = (config_base >> 12) & 0xf; + code = (config_base >> 4) & 0xff; + group = (config_base >> 0) & 0xf; + + group_shift = group * 8; + mask = 0xff << group_shift; + + /* Configure evtsel for the region and group */ + if (venum_event) + val = KRAIT_VPMRESR0_GROUP0; + else + val = krait_get_pmresrn_event(region); + val += group; + /* Mix in mode-exclusion bits */ + val |= config_base & (ARMV7_EXCLUDE_USER | ARMV7_EXCLUDE_PL1); + armv7_pmnc_write_evtsel(idx, val); + + asm volatile("mcr p15, 0, %0, c9, c15, 0" : : "r" (0)); + + if (venum_event) { + krait_pre_vpmresr0(&vval, &fval); + val = krait_read_vpmresr0(); + val &= ~mask; + val |= code << group_shift; + val |= PMRESRn_EN; + krait_write_vpmresr0(val); + krait_post_vpmresr0(vval, fval); + } else { + val = krait_read_pmresrn(region); + val &= ~mask; + val |= code << group_shift; + val |= PMRESRn_EN; + krait_write_pmresrn(region, val); + } +} + +static u32 krait_clear_pmresrn_group(u32 val, int group) +{ + u32 mask; + int group_shift; + + group_shift = group * 8; + mask = 0xff << group_shift; + val &= ~mask; + + /* Don't clear enable bit if entire region isn't disabled */ + if (val & ~PMRESRn_EN) + return val |= PMRESRn_EN; + + return 0; +} + +static void krait_clearpmu(u32 config_base) +{ + u32 val; + u32 vval, fval; + unsigned int region; + unsigned int group; + bool venum_event; + + venum_event = !!(config_base & VENUM_EVENT); + region = (config_base >> 12) & 0xf; + group = (config_base >> 0) & 0xf; + + if (venum_event) { + krait_pre_vpmresr0(&vval, &fval); + val = krait_read_vpmresr0(); + val = krait_clear_pmresrn_group(val, group); + krait_write_vpmresr0(val); + krait_post_vpmresr0(vval, fval); + } else { + val = krait_read_pmresrn(region); + val = krait_clear_pmresrn_group(val, group); + krait_write_pmresrn(region, val); + } +} + +static void krait_pmu_disable_event(struct perf_event *event) +{ + unsigned long flags; + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + + /* Disable counter and interrupt */ + raw_spin_lock_irqsave(&events->pmu_lock, flags); + + /* Disable counter */ + armv7_pmnc_disable_counter(idx); + + /* + * Clear pmresr code (if destined for PMNx counters) + */ + if (hwc->config_base & KRAIT_EVENT_MASK) + krait_clearpmu(hwc->config_base); + + /* Disable interrupt for this counter */ + armv7_pmnc_disable_intens(idx); + + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); +} + +static void krait_pmu_enable_event(struct perf_event *event) +{ + unsigned long flags; + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + + /* + * Enable counter and interrupt, and set the counter to count + * the event that we're interested in. + */ + raw_spin_lock_irqsave(&events->pmu_lock, flags); + + /* Disable counter */ + armv7_pmnc_disable_counter(idx); + + /* + * Set event (if destined for PMNx counters) + * We set the event for the cycle counter because we + * have the ability to perform event filtering. + */ + if (hwc->config_base & KRAIT_EVENT_MASK) + krait_evt_setup(idx, hwc->config_base); + else + armv7_pmnc_write_evtsel(idx, hwc->config_base); + + /* Enable interrupt for this counter */ + armv7_pmnc_enable_intens(idx); + + /* Enable counter */ + armv7_pmnc_enable_counter(idx); + + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); +} + +static void krait_pmu_reset(void *info) +{ + u32 vval, fval; + + armv7pmu_reset(info); + + /* Clear all pmresrs */ + krait_write_pmresrn(0, 0); + krait_write_pmresrn(1, 0); + krait_write_pmresrn(2, 0); + + krait_pre_vpmresr0(&vval, &fval); + krait_write_vpmresr0(0); + krait_post_vpmresr0(vval, fval); +} + +static int krait_event_to_bit(struct perf_event *event, unsigned int region, + unsigned int group) +{ + int bit; + struct hw_perf_event *hwc = &event->hw; + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + + if (hwc->config_base & VENUM_EVENT) + bit = KRAIT_VPMRESR0_GROUP0; + else + bit = krait_get_pmresrn_event(region); + bit -= krait_get_pmresrn_event(0); + bit += group; + /* + * Lower bits are reserved for use by the counters (see + * armv7pmu_get_event_idx() for more info) + */ + bit += ARMV7_IDX_COUNTER_LAST(cpu_pmu) + 1; + + return bit; +} + +/* + * We check for column exclusion constraints here. + * Two events cant use the same group within a pmresr register. + */ +static int krait_pmu_get_event_idx(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + int idx; + int bit = -1; + unsigned int prefix; + unsigned int region; + unsigned int code; + unsigned int group; + bool krait_event; + struct hw_perf_event *hwc = &event->hw; + + region = (hwc->config_base >> 12) & 0xf; + code = (hwc->config_base >> 4) & 0xff; + group = (hwc->config_base >> 0) & 0xf; + krait_event = !!(hwc->config_base & KRAIT_EVENT_MASK); + + if (krait_event) { + /* Ignore invalid events */ + if (group > 3 || region > 2) + return -EINVAL; + prefix = hwc->config_base & KRAIT_EVENT_MASK; + if (prefix != KRAIT_EVENT && prefix != VENUM_EVENT) + return -EINVAL; + if (prefix == VENUM_EVENT && (code & 0xe0)) + return -EINVAL; + + bit = krait_event_to_bit(event, region, group); + if (test_and_set_bit(bit, cpuc->used_mask)) + return -EAGAIN; + } + + idx = armv7pmu_get_event_idx(cpuc, event); + if (idx < 0 && bit >= 0) + clear_bit(bit, cpuc->used_mask); + + return idx; +} + +static void krait_pmu_clear_event_idx(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + int bit; + struct hw_perf_event *hwc = &event->hw; + unsigned int region; + unsigned int group; + bool krait_event; + + region = (hwc->config_base >> 12) & 0xf; + group = (hwc->config_base >> 0) & 0xf; + krait_event = !!(hwc->config_base & KRAIT_EVENT_MASK); + + if (krait_event) { + bit = krait_event_to_bit(event, region, group); + clear_bit(bit, cpuc->used_mask); + } +} + +static int krait_pmu_init(struct arm_pmu *cpu_pmu) +{ + armv7pmu_init(cpu_pmu); + cpu_pmu->name = "ARMv7 Krait"; + /* Some early versions of Krait don't support PC write events */ + if (of_property_read_bool(cpu_pmu->plat_device->dev.of_node, + "qcom,no-pc-write")) + cpu_pmu->map_event = krait_map_event_no_branch; + else + cpu_pmu->map_event = krait_map_event; + cpu_pmu->num_events = armv7_read_num_pmnc_events(); + cpu_pmu->set_event_filter = armv7pmu_set_event_filter; + cpu_pmu->reset = krait_pmu_reset; + cpu_pmu->enable = krait_pmu_enable_event; + cpu_pmu->disable = krait_pmu_disable_event; + cpu_pmu->get_event_idx = krait_pmu_get_event_idx; + cpu_pmu->clear_event_idx = krait_pmu_clear_event_idx; + return 0; +} +#else +static inline int armv7_a8_pmu_init(struct arm_pmu *cpu_pmu) +{ + return -ENODEV; +} + +static inline int armv7_a9_pmu_init(struct arm_pmu *cpu_pmu) +{ + return -ENODEV; +} + +static inline int armv7_a5_pmu_init(struct arm_pmu *cpu_pmu) +{ + return -ENODEV; +} + +static inline int armv7_a15_pmu_init(struct arm_pmu *cpu_pmu) +{ + return -ENODEV; +} + +static inline int armv7_a7_pmu_init(struct arm_pmu *cpu_pmu) +{ + return -ENODEV; +} + +static inline int armv7_a12_pmu_init(struct arm_pmu *cpu_pmu) +{ + return -ENODEV; +} + +static inline int armv7_a17_pmu_init(struct arm_pmu *cpu_pmu) +{ + return -ENODEV; +} + +static inline int krait_pmu_init(struct arm_pmu *cpu_pmu) +{ + return -ENODEV; +} +#endif /* CONFIG_CPU_V7 */ diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c new file mode 100644 index 00000000000..63990c42fac --- /dev/null +++ b/arch/arm/kernel/perf_event_xscale.c @@ -0,0 +1,840 @@ +/* + * ARMv5 [xscale] Performance counter handling code. + * + * Copyright (C) 2010, ARM Ltd., Will Deacon <will.deacon@arm.com> + * + * Based on the previous xscale OProfile code. + * + * There are two variants of the xscale PMU that we support: + * - xscale1pmu: 2 event counters and a cycle counter + * - xscale2pmu: 4 event counters and a cycle counter + * The two variants share event definitions, but have different + * PMU structures. + */ + +#ifdef CONFIG_CPU_XSCALE +enum xscale_perf_types { + XSCALE_PERFCTR_ICACHE_MISS = 0x00, + XSCALE_PERFCTR_ICACHE_NO_DELIVER = 0x01, + XSCALE_PERFCTR_DATA_STALL = 0x02, + XSCALE_PERFCTR_ITLB_MISS = 0x03, + XSCALE_PERFCTR_DTLB_MISS = 0x04, + XSCALE_PERFCTR_BRANCH = 0x05, + XSCALE_PERFCTR_BRANCH_MISS = 0x06, + XSCALE_PERFCTR_INSTRUCTION = 0x07, + XSCALE_PERFCTR_DCACHE_FULL_STALL = 0x08, + XSCALE_PERFCTR_DCACHE_FULL_STALL_CONTIG = 0x09, + XSCALE_PERFCTR_DCACHE_ACCESS = 0x0A, + XSCALE_PERFCTR_DCACHE_MISS = 0x0B, + XSCALE_PERFCTR_DCACHE_WRITE_BACK = 0x0C, + XSCALE_PERFCTR_PC_CHANGED = 0x0D, + XSCALE_PERFCTR_BCU_REQUEST = 0x10, + XSCALE_PERFCTR_BCU_FULL = 0x11, + XSCALE_PERFCTR_BCU_DRAIN = 0x12, + XSCALE_PERFCTR_BCU_ECC_NO_ELOG = 0x14, + XSCALE_PERFCTR_BCU_1_BIT_ERR = 0x15, + XSCALE_PERFCTR_RMW = 0x16, + /* XSCALE_PERFCTR_CCNT is not hardware defined */ + XSCALE_PERFCTR_CCNT = 0xFE, + XSCALE_PERFCTR_UNUSED = 0xFF, +}; + +enum xscale_counters { + XSCALE_CYCLE_COUNTER = 0, + XSCALE_COUNTER0, + XSCALE_COUNTER1, + XSCALE_COUNTER2, + XSCALE_COUNTER3, +}; + +static const unsigned xscale_perf_map[PERF_COUNT_HW_MAX] = { + [PERF_COUNT_HW_CPU_CYCLES] = XSCALE_PERFCTR_CCNT, + [PERF_COUNT_HW_INSTRUCTIONS] = XSCALE_PERFCTR_INSTRUCTION, + [PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = XSCALE_PERFCTR_BRANCH, + [PERF_COUNT_HW_BRANCH_MISSES] = XSCALE_PERFCTR_BRANCH_MISS, + [PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED, + [PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = XSCALE_PERFCTR_ICACHE_NO_DELIVER, + [PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = HW_OP_UNSUPPORTED, +}; + +static const unsigned xscale_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = { + [C(L1D)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS, + [C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS, + [C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(L1I)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(LL)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(DTLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(ITLB)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(BPU)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, + [C(NODE)] = { + [C(OP_READ)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_WRITE)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + [C(OP_PREFETCH)] = { + [C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED, + [C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED, + }, + }, +}; + +#define XSCALE_PMU_ENABLE 0x001 +#define XSCALE_PMN_RESET 0x002 +#define XSCALE_CCNT_RESET 0x004 +#define XSCALE_PMU_RESET (CCNT_RESET | PMN_RESET) +#define XSCALE_PMU_CNT64 0x008 + +#define XSCALE1_OVERFLOWED_MASK 0x700 +#define XSCALE1_CCOUNT_OVERFLOW 0x400 +#define XSCALE1_COUNT0_OVERFLOW 0x100 +#define XSCALE1_COUNT1_OVERFLOW 0x200 +#define XSCALE1_CCOUNT_INT_EN 0x040 +#define XSCALE1_COUNT0_INT_EN 0x010 +#define XSCALE1_COUNT1_INT_EN 0x020 +#define XSCALE1_COUNT0_EVT_SHFT 12 +#define XSCALE1_COUNT0_EVT_MASK (0xff << XSCALE1_COUNT0_EVT_SHFT) +#define XSCALE1_COUNT1_EVT_SHFT 20 +#define XSCALE1_COUNT1_EVT_MASK (0xff << XSCALE1_COUNT1_EVT_SHFT) + +static inline u32 +xscale1pmu_read_pmnc(void) +{ + u32 val; + asm volatile("mrc p14, 0, %0, c0, c0, 0" : "=r" (val)); + return val; +} + +static inline void +xscale1pmu_write_pmnc(u32 val) +{ + /* upper 4bits and 7, 11 are write-as-0 */ + val &= 0xffff77f; + asm volatile("mcr p14, 0, %0, c0, c0, 0" : : "r" (val)); +} + +static inline int +xscale1_pmnc_counter_has_overflowed(unsigned long pmnc, + enum xscale_counters counter) +{ + int ret = 0; + + switch (counter) { + case XSCALE_CYCLE_COUNTER: + ret = pmnc & XSCALE1_CCOUNT_OVERFLOW; + break; + case XSCALE_COUNTER0: + ret = pmnc & XSCALE1_COUNT0_OVERFLOW; + break; + case XSCALE_COUNTER1: + ret = pmnc & XSCALE1_COUNT1_OVERFLOW; + break; + default: + WARN_ONCE(1, "invalid counter number (%d)\n", counter); + } + + return ret; +} + +static irqreturn_t +xscale1pmu_handle_irq(int irq_num, void *dev) +{ + unsigned long pmnc; + struct perf_sample_data data; + struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev; + struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events(); + struct pt_regs *regs; + int idx; + + /* + * NOTE: there's an A stepping erratum that states if an overflow + * bit already exists and another occurs, the previous + * Overflow bit gets cleared. There's no workaround. + * Fixed in B stepping or later. + */ + pmnc = xscale1pmu_read_pmnc(); + + /* + * Write the value back to clear the overflow flags. Overflow + * flags remain in pmnc for use below. We also disable the PMU + * while we process the interrupt. + */ + xscale1pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE); + + if (!(pmnc & XSCALE1_OVERFLOWED_MASK)) + return IRQ_NONE; + + regs = get_irq_regs(); + + for (idx = 0; idx < cpu_pmu->num_events; ++idx) { + struct perf_event *event = cpuc->events[idx]; + struct hw_perf_event *hwc; + + if (!event) + continue; + + if (!xscale1_pmnc_counter_has_overflowed(pmnc, idx)) + continue; + + hwc = &event->hw; + armpmu_event_update(event); + perf_sample_data_init(&data, 0, hwc->last_period); + if (!armpmu_event_set_period(event)) + continue; + + if (perf_event_overflow(event, &data, regs)) + cpu_pmu->disable(event); + } + + irq_work_run(); + + /* + * Re-enable the PMU. + */ + pmnc = xscale1pmu_read_pmnc() | XSCALE_PMU_ENABLE; + xscale1pmu_write_pmnc(pmnc); + + return IRQ_HANDLED; +} + +static void xscale1pmu_enable_event(struct perf_event *event) +{ + unsigned long val, mask, evt, flags; + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + int idx = hwc->idx; + + switch (idx) { + case XSCALE_CYCLE_COUNTER: + mask = 0; + evt = XSCALE1_CCOUNT_INT_EN; + break; + case XSCALE_COUNTER0: + mask = XSCALE1_COUNT0_EVT_MASK; + evt = (hwc->config_base << XSCALE1_COUNT0_EVT_SHFT) | + XSCALE1_COUNT0_INT_EN; + break; + case XSCALE_COUNTER1: + mask = XSCALE1_COUNT1_EVT_MASK; + evt = (hwc->config_base << XSCALE1_COUNT1_EVT_SHFT) | + XSCALE1_COUNT1_INT_EN; + break; + default: + WARN_ONCE(1, "invalid counter number (%d)\n", idx); + return; + } + + raw_spin_lock_irqsave(&events->pmu_lock, flags); + val = xscale1pmu_read_pmnc(); + val &= ~mask; + val |= evt; + xscale1pmu_write_pmnc(val); + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); +} + +static void xscale1pmu_disable_event(struct perf_event *event) +{ + unsigned long val, mask, evt, flags; + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + int idx = hwc->idx; + + switch (idx) { + case XSCALE_CYCLE_COUNTER: + mask = XSCALE1_CCOUNT_INT_EN; + evt = 0; + break; + case XSCALE_COUNTER0: + mask = XSCALE1_COUNT0_INT_EN | XSCALE1_COUNT0_EVT_MASK; + evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT0_EVT_SHFT; + break; + case XSCALE_COUNTER1: + mask = XSCALE1_COUNT1_INT_EN | XSCALE1_COUNT1_EVT_MASK; + evt = XSCALE_PERFCTR_UNUSED << XSCALE1_COUNT1_EVT_SHFT; + break; + default: + WARN_ONCE(1, "invalid counter number (%d)\n", idx); + return; + } + + raw_spin_lock_irqsave(&events->pmu_lock, flags); + val = xscale1pmu_read_pmnc(); + val &= ~mask; + val |= evt; + xscale1pmu_write_pmnc(val); + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); +} + +static int +xscale1pmu_get_event_idx(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + if (XSCALE_PERFCTR_CCNT == hwc->config_base) { + if (test_and_set_bit(XSCALE_CYCLE_COUNTER, cpuc->used_mask)) + return -EAGAIN; + + return XSCALE_CYCLE_COUNTER; + } else { + if (!test_and_set_bit(XSCALE_COUNTER1, cpuc->used_mask)) + return XSCALE_COUNTER1; + + if (!test_and_set_bit(XSCALE_COUNTER0, cpuc->used_mask)) + return XSCALE_COUNTER0; + + return -EAGAIN; + } +} + +static void xscale1pmu_start(struct arm_pmu *cpu_pmu) +{ + unsigned long flags, val; + struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + + raw_spin_lock_irqsave(&events->pmu_lock, flags); + val = xscale1pmu_read_pmnc(); + val |= XSCALE_PMU_ENABLE; + xscale1pmu_write_pmnc(val); + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); +} + +static void xscale1pmu_stop(struct arm_pmu *cpu_pmu) +{ + unsigned long flags, val; + struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + + raw_spin_lock_irqsave(&events->pmu_lock, flags); + val = xscale1pmu_read_pmnc(); + val &= ~XSCALE_PMU_ENABLE; + xscale1pmu_write_pmnc(val); + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); +} + +static inline u32 xscale1pmu_read_counter(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + int counter = hwc->idx; + u32 val = 0; + + switch (counter) { + case XSCALE_CYCLE_COUNTER: + asm volatile("mrc p14, 0, %0, c1, c0, 0" : "=r" (val)); + break; + case XSCALE_COUNTER0: + asm volatile("mrc p14, 0, %0, c2, c0, 0" : "=r" (val)); + break; + case XSCALE_COUNTER1: + asm volatile("mrc p14, 0, %0, c3, c0, 0" : "=r" (val)); + break; + } + + return val; +} + +static inline void xscale1pmu_write_counter(struct perf_event *event, u32 val) +{ + struct hw_perf_event *hwc = &event->hw; + int counter = hwc->idx; + + switch (counter) { + case XSCALE_CYCLE_COUNTER: + asm volatile("mcr p14, 0, %0, c1, c0, 0" : : "r" (val)); + break; + case XSCALE_COUNTER0: + asm volatile("mcr p14, 0, %0, c2, c0, 0" : : "r" (val)); + break; + case XSCALE_COUNTER1: + asm volatile("mcr p14, 0, %0, c3, c0, 0" : : "r" (val)); + break; + } +} + +static int xscale_map_event(struct perf_event *event) +{ + return armpmu_map_event(event, &xscale_perf_map, + &xscale_perf_cache_map, 0xFF); +} + +static int xscale1pmu_init(struct arm_pmu *cpu_pmu) +{ + cpu_pmu->name = "xscale1"; + cpu_pmu->handle_irq = xscale1pmu_handle_irq; + cpu_pmu->enable = xscale1pmu_enable_event; + cpu_pmu->disable = xscale1pmu_disable_event; + cpu_pmu->read_counter = xscale1pmu_read_counter; + cpu_pmu->write_counter = xscale1pmu_write_counter; + cpu_pmu->get_event_idx = xscale1pmu_get_event_idx; + cpu_pmu->start = xscale1pmu_start; + cpu_pmu->stop = xscale1pmu_stop; + cpu_pmu->map_event = xscale_map_event; + cpu_pmu->num_events = 3; + cpu_pmu->max_period = (1LLU << 32) - 1; + + return 0; +} + +#define XSCALE2_OVERFLOWED_MASK 0x01f +#define XSCALE2_CCOUNT_OVERFLOW 0x001 +#define XSCALE2_COUNT0_OVERFLOW 0x002 +#define XSCALE2_COUNT1_OVERFLOW 0x004 +#define XSCALE2_COUNT2_OVERFLOW 0x008 +#define XSCALE2_COUNT3_OVERFLOW 0x010 +#define XSCALE2_CCOUNT_INT_EN 0x001 +#define XSCALE2_COUNT0_INT_EN 0x002 +#define XSCALE2_COUNT1_INT_EN 0x004 +#define XSCALE2_COUNT2_INT_EN 0x008 +#define XSCALE2_COUNT3_INT_EN 0x010 +#define XSCALE2_COUNT0_EVT_SHFT 0 +#define XSCALE2_COUNT0_EVT_MASK (0xff << XSCALE2_COUNT0_EVT_SHFT) +#define XSCALE2_COUNT1_EVT_SHFT 8 +#define XSCALE2_COUNT1_EVT_MASK (0xff << XSCALE2_COUNT1_EVT_SHFT) +#define XSCALE2_COUNT2_EVT_SHFT 16 +#define XSCALE2_COUNT2_EVT_MASK (0xff << XSCALE2_COUNT2_EVT_SHFT) +#define XSCALE2_COUNT3_EVT_SHFT 24 +#define XSCALE2_COUNT3_EVT_MASK (0xff << XSCALE2_COUNT3_EVT_SHFT) + +static inline u32 +xscale2pmu_read_pmnc(void) +{ + u32 val; + asm volatile("mrc p14, 0, %0, c0, c1, 0" : "=r" (val)); + /* bits 1-2 and 4-23 are read-unpredictable */ + return val & 0xff000009; +} + +static inline void +xscale2pmu_write_pmnc(u32 val) +{ + /* bits 4-23 are write-as-0, 24-31 are write ignored */ + val &= 0xf; + asm volatile("mcr p14, 0, %0, c0, c1, 0" : : "r" (val)); +} + +static inline u32 +xscale2pmu_read_overflow_flags(void) +{ + u32 val; + asm volatile("mrc p14, 0, %0, c5, c1, 0" : "=r" (val)); + return val; +} + +static inline void +xscale2pmu_write_overflow_flags(u32 val) +{ + asm volatile("mcr p14, 0, %0, c5, c1, 0" : : "r" (val)); +} + +static inline u32 +xscale2pmu_read_event_select(void) +{ + u32 val; + asm volatile("mrc p14, 0, %0, c8, c1, 0" : "=r" (val)); + return val; +} + +static inline void +xscale2pmu_write_event_select(u32 val) +{ + asm volatile("mcr p14, 0, %0, c8, c1, 0" : : "r"(val)); +} + +static inline u32 +xscale2pmu_read_int_enable(void) +{ + u32 val; + asm volatile("mrc p14, 0, %0, c4, c1, 0" : "=r" (val)); + return val; +} + +static void +xscale2pmu_write_int_enable(u32 val) +{ + asm volatile("mcr p14, 0, %0, c4, c1, 0" : : "r" (val)); +} + +static inline int +xscale2_pmnc_counter_has_overflowed(unsigned long of_flags, + enum xscale_counters counter) +{ + int ret = 0; + + switch (counter) { + case XSCALE_CYCLE_COUNTER: + ret = of_flags & XSCALE2_CCOUNT_OVERFLOW; + break; + case XSCALE_COUNTER0: + ret = of_flags & XSCALE2_COUNT0_OVERFLOW; + break; + case XSCALE_COUNTER1: + ret = of_flags & XSCALE2_COUNT1_OVERFLOW; + break; + case XSCALE_COUNTER2: + ret = of_flags & XSCALE2_COUNT2_OVERFLOW; + break; + case XSCALE_COUNTER3: + ret = of_flags & XSCALE2_COUNT3_OVERFLOW; + break; + default: + WARN_ONCE(1, "invalid counter number (%d)\n", counter); + } + + return ret; +} + +static irqreturn_t +xscale2pmu_handle_irq(int irq_num, void *dev) +{ + unsigned long pmnc, of_flags; + struct perf_sample_data data; + struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev; + struct pmu_hw_events *cpuc = cpu_pmu->get_hw_events(); + struct pt_regs *regs; + int idx; + + /* Disable the PMU. */ + pmnc = xscale2pmu_read_pmnc(); + xscale2pmu_write_pmnc(pmnc & ~XSCALE_PMU_ENABLE); + + /* Check the overflow flag register. */ + of_flags = xscale2pmu_read_overflow_flags(); + if (!(of_flags & XSCALE2_OVERFLOWED_MASK)) + return IRQ_NONE; + + /* Clear the overflow bits. */ + xscale2pmu_write_overflow_flags(of_flags); + + regs = get_irq_regs(); + + for (idx = 0; idx < cpu_pmu->num_events; ++idx) { + struct perf_event *event = cpuc->events[idx]; + struct hw_perf_event *hwc; + + if (!event) + continue; + + if (!xscale2_pmnc_counter_has_overflowed(of_flags, idx)) + continue; + + hwc = &event->hw; + armpmu_event_update(event); + perf_sample_data_init(&data, 0, hwc->last_period); + if (!armpmu_event_set_period(event)) + continue; + + if (perf_event_overflow(event, &data, regs)) + cpu_pmu->disable(event); + } + + irq_work_run(); + + /* + * Re-enable the PMU. + */ + pmnc = xscale2pmu_read_pmnc() | XSCALE_PMU_ENABLE; + xscale2pmu_write_pmnc(pmnc); + + return IRQ_HANDLED; +} + +static void xscale2pmu_enable_event(struct perf_event *event) +{ + unsigned long flags, ien, evtsel; + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + int idx = hwc->idx; + + ien = xscale2pmu_read_int_enable(); + evtsel = xscale2pmu_read_event_select(); + + switch (idx) { + case XSCALE_CYCLE_COUNTER: + ien |= XSCALE2_CCOUNT_INT_EN; + break; + case XSCALE_COUNTER0: + ien |= XSCALE2_COUNT0_INT_EN; + evtsel &= ~XSCALE2_COUNT0_EVT_MASK; + evtsel |= hwc->config_base << XSCALE2_COUNT0_EVT_SHFT; + break; + case XSCALE_COUNTER1: + ien |= XSCALE2_COUNT1_INT_EN; + evtsel &= ~XSCALE2_COUNT1_EVT_MASK; + evtsel |= hwc->config_base << XSCALE2_COUNT1_EVT_SHFT; + break; + case XSCALE_COUNTER2: + ien |= XSCALE2_COUNT2_INT_EN; + evtsel &= ~XSCALE2_COUNT2_EVT_MASK; + evtsel |= hwc->config_base << XSCALE2_COUNT2_EVT_SHFT; + break; + case XSCALE_COUNTER3: + ien |= XSCALE2_COUNT3_INT_EN; + evtsel &= ~XSCALE2_COUNT3_EVT_MASK; + evtsel |= hwc->config_base << XSCALE2_COUNT3_EVT_SHFT; + break; + default: + WARN_ONCE(1, "invalid counter number (%d)\n", idx); + return; + } + + raw_spin_lock_irqsave(&events->pmu_lock, flags); + xscale2pmu_write_event_select(evtsel); + xscale2pmu_write_int_enable(ien); + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); +} + +static void xscale2pmu_disable_event(struct perf_event *event) +{ + unsigned long flags, ien, evtsel, of_flags; + struct arm_pmu *cpu_pmu = to_arm_pmu(event->pmu); + struct hw_perf_event *hwc = &event->hw; + struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + int idx = hwc->idx; + + ien = xscale2pmu_read_int_enable(); + evtsel = xscale2pmu_read_event_select(); + + switch (idx) { + case XSCALE_CYCLE_COUNTER: + ien &= ~XSCALE2_CCOUNT_INT_EN; + of_flags = XSCALE2_CCOUNT_OVERFLOW; + break; + case XSCALE_COUNTER0: + ien &= ~XSCALE2_COUNT0_INT_EN; + evtsel &= ~XSCALE2_COUNT0_EVT_MASK; + evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT0_EVT_SHFT; + of_flags = XSCALE2_COUNT0_OVERFLOW; + break; + case XSCALE_COUNTER1: + ien &= ~XSCALE2_COUNT1_INT_EN; + evtsel &= ~XSCALE2_COUNT1_EVT_MASK; + evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT1_EVT_SHFT; + of_flags = XSCALE2_COUNT1_OVERFLOW; + break; + case XSCALE_COUNTER2: + ien &= ~XSCALE2_COUNT2_INT_EN; + evtsel &= ~XSCALE2_COUNT2_EVT_MASK; + evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT2_EVT_SHFT; + of_flags = XSCALE2_COUNT2_OVERFLOW; + break; + case XSCALE_COUNTER3: + ien &= ~XSCALE2_COUNT3_INT_EN; + evtsel &= ~XSCALE2_COUNT3_EVT_MASK; + evtsel |= XSCALE_PERFCTR_UNUSED << XSCALE2_COUNT3_EVT_SHFT; + of_flags = XSCALE2_COUNT3_OVERFLOW; + break; + default: + WARN_ONCE(1, "invalid counter number (%d)\n", idx); + return; + } + + raw_spin_lock_irqsave(&events->pmu_lock, flags); + xscale2pmu_write_event_select(evtsel); + xscale2pmu_write_int_enable(ien); + xscale2pmu_write_overflow_flags(of_flags); + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); +} + +static int +xscale2pmu_get_event_idx(struct pmu_hw_events *cpuc, + struct perf_event *event) +{ + int idx = xscale1pmu_get_event_idx(cpuc, event); + if (idx >= 0) + goto out; + + if (!test_and_set_bit(XSCALE_COUNTER3, cpuc->used_mask)) + idx = XSCALE_COUNTER3; + else if (!test_and_set_bit(XSCALE_COUNTER2, cpuc->used_mask)) + idx = XSCALE_COUNTER2; +out: + return idx; +} + +static void xscale2pmu_start(struct arm_pmu *cpu_pmu) +{ + unsigned long flags, val; + struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + + raw_spin_lock_irqsave(&events->pmu_lock, flags); + val = xscale2pmu_read_pmnc() & ~XSCALE_PMU_CNT64; + val |= XSCALE_PMU_ENABLE; + xscale2pmu_write_pmnc(val); + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); +} + +static void xscale2pmu_stop(struct arm_pmu *cpu_pmu) +{ + unsigned long flags, val; + struct pmu_hw_events *events = cpu_pmu->get_hw_events(); + + raw_spin_lock_irqsave(&events->pmu_lock, flags); + val = xscale2pmu_read_pmnc(); + val &= ~XSCALE_PMU_ENABLE; + xscale2pmu_write_pmnc(val); + raw_spin_unlock_irqrestore(&events->pmu_lock, flags); +} + +static inline u32 xscale2pmu_read_counter(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + int counter = hwc->idx; + u32 val = 0; + + switch (counter) { + case XSCALE_CYCLE_COUNTER: + asm volatile("mrc p14, 0, %0, c1, c1, 0" : "=r" (val)); + break; + case XSCALE_COUNTER0: + asm volatile("mrc p14, 0, %0, c0, c2, 0" : "=r" (val)); + break; + case XSCALE_COUNTER1: + asm volatile("mrc p14, 0, %0, c1, c2, 0" : "=r" (val)); + break; + case XSCALE_COUNTER2: + asm volatile("mrc p14, 0, %0, c2, c2, 0" : "=r" (val)); + break; + case XSCALE_COUNTER3: + asm volatile("mrc p14, 0, %0, c3, c2, 0" : "=r" (val)); + break; + } + + return val; +} + +static inline void xscale2pmu_write_counter(struct perf_event *event, u32 val) +{ + struct hw_perf_event *hwc = &event->hw; + int counter = hwc->idx; + + switch (counter) { + case XSCALE_CYCLE_COUNTER: + asm volatile("mcr p14, 0, %0, c1, c1, 0" : : "r" (val)); + break; + case XSCALE_COUNTER0: + asm volatile("mcr p14, 0, %0, c0, c2, 0" : : "r" (val)); + break; + case XSCALE_COUNTER1: + asm volatile("mcr p14, 0, %0, c1, c2, 0" : : "r" (val)); + break; + case XSCALE_COUNTER2: + asm volatile("mcr p14, 0, %0, c2, c2, 0" : : "r" (val)); + break; + case XSCALE_COUNTER3: + asm volatile("mcr p14, 0, %0, c3, c2, 0" : : "r" (val)); + break; + } +} + +static int xscale2pmu_init(struct arm_pmu *cpu_pmu) +{ + cpu_pmu->name = "xscale2"; + cpu_pmu->handle_irq = xscale2pmu_handle_irq; + cpu_pmu->enable = xscale2pmu_enable_event; + cpu_pmu->disable = xscale2pmu_disable_event; + cpu_pmu->read_counter = xscale2pmu_read_counter; + cpu_pmu->write_counter = xscale2pmu_write_counter; + cpu_pmu->get_event_idx = xscale2pmu_get_event_idx; + cpu_pmu->start = xscale2pmu_start; + cpu_pmu->stop = xscale2pmu_stop; + cpu_pmu->map_event = xscale_map_event; + cpu_pmu->num_events = 5; + cpu_pmu->max_period = (1LLU << 32) - 1; + + return 0; +} +#else +static inline int xscale1pmu_init(struct arm_pmu *cpu_pmu) +{ + return -ENODEV; +} + +static inline int xscale2pmu_init(struct arm_pmu *cpu_pmu) +{ + return -ENODEV; +} +#endif /* CONFIG_CPU_XSCALE */ diff --git a/arch/arm/kernel/perf_regs.c b/arch/arm/kernel/perf_regs.c new file mode 100644 index 00000000000..6e4379c67cb --- /dev/null +++ b/arch/arm/kernel/perf_regs.c @@ -0,0 +1,30 @@ + +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/perf_event.h> +#include <linux/bug.h> +#include <asm/perf_regs.h> +#include <asm/ptrace.h> + +u64 perf_reg_value(struct pt_regs *regs, int idx) +{ + if (WARN_ON_ONCE((u32)idx >= PERF_REG_ARM_MAX)) + return 0; + + return regs->uregs[idx]; +} + +#define REG_RESERVED (~((1ULL << PERF_REG_ARM_MAX) - 1)) + +int perf_reg_validate(u64 mask) +{ + if (!mask || mask & REG_RESERVED) + return -EINVAL; + + return 0; +} + +u64 perf_reg_abi(struct task_struct *task) +{ + return PERF_SAMPLE_REGS_ABI_32; +} diff --git a/arch/arm/kernel/pj4-cp0.c b/arch/arm/kernel/pj4-cp0.c new file mode 100644 index 00000000000..8153e36b249 --- /dev/null +++ b/arch/arm/kernel/pj4-cp0.c @@ -0,0 +1,133 @@ +/* + * linux/arch/arm/kernel/pj4-cp0.c + * + * PJ4 iWMMXt coprocessor context switching and handling + * + * Copyright (c) 2010 Marvell International Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/signal.h> +#include <linux/sched.h> +#include <linux/init.h> +#include <linux/io.h> +#include <asm/thread_notify.h> +#include <asm/cputype.h> + +static int iwmmxt_do(struct notifier_block *self, unsigned long cmd, void *t) +{ + struct thread_info *thread = t; + + switch (cmd) { + case THREAD_NOTIFY_FLUSH: + /* + * flush_thread() zeroes thread->fpstate, so no need + * to do anything here. + * + * FALLTHROUGH: Ensure we don't try to overwrite our newly + * initialised state information on the first fault. + */ + + case THREAD_NOTIFY_EXIT: + iwmmxt_task_release(thread); + break; + + case THREAD_NOTIFY_SWITCH: + iwmmxt_task_switch(thread); + break; + } + + return NOTIFY_DONE; +} + +static struct notifier_block __maybe_unused iwmmxt_notifier_block = { + .notifier_call = iwmmxt_do, +}; + + +static u32 __init pj4_cp_access_read(void) +{ + u32 value; + + __asm__ __volatile__ ( + "mrc p15, 0, %0, c1, c0, 2\n\t" + : "=r" (value)); + return value; +} + +static void __init pj4_cp_access_write(u32 value) +{ + u32 temp; + + __asm__ __volatile__ ( + "mcr p15, 0, %1, c1, c0, 2\n\t" + "mrc p15, 0, %0, c1, c0, 2\n\t" + "mov %0, %0\n\t" + "sub pc, pc, #4\n\t" + : "=r" (temp) : "r" (value)); +} + +static int __init pj4_get_iwmmxt_version(void) +{ + u32 cp_access, wcid; + + cp_access = pj4_cp_access_read(); + pj4_cp_access_write(cp_access | 0xf); + + /* check if coprocessor 0 and 1 are available */ + if ((pj4_cp_access_read() & 0xf) != 0xf) { + pj4_cp_access_write(cp_access); + return -ENODEV; + } + + /* read iWMMXt coprocessor id register p1, c0 */ + __asm__ __volatile__ ("mrc p1, 0, %0, c0, c0, 0\n" : "=r" (wcid)); + + pj4_cp_access_write(cp_access); + + /* iWMMXt v1 */ + if ((wcid & 0xffffff00) == 0x56051000) + return 1; + /* iWMMXt v2 */ + if ((wcid & 0xffffff00) == 0x56052000) + return 2; + + return -EINVAL; +} + +/* + * Disable CP0/CP1 on boot, and let call_fpe() and the iWMMXt lazy + * switch code handle iWMMXt context switching. + */ +static int __init pj4_cp0_init(void) +{ + u32 __maybe_unused cp_access; + int vers; + + if (!cpu_is_pj4()) + return 0; + + vers = pj4_get_iwmmxt_version(); + if (vers < 0) + return 0; + +#ifndef CONFIG_IWMMXT + pr_info("PJ4 iWMMXt coprocessor detected, but kernel support is missing.\n"); +#else + cp_access = pj4_cp_access_read() & ~0xf; + pj4_cp_access_write(cp_access); + + pr_info("PJ4 iWMMXt v%d coprocessor enabled.\n", vers); + elf_hwcap |= HWCAP_IWMMXT; + thread_register_notifier(&iwmmxt_notifier_block); +#endif + + return 0; +} + +late_initcall(pj4_cp0_init); diff --git a/arch/arm/kernel/pmu.c b/arch/arm/kernel/pmu.c deleted file mode 100644 index b8af96ea62e..00000000000 --- a/arch/arm/kernel/pmu.c +++ /dev/null @@ -1,142 +0,0 @@ -/* - * linux/arch/arm/kernel/pmu.c - * - * Copyright (C) 2009 picoChip Designs Ltd, Jamie Iles - * Copyright (C) 2010 ARM Ltd, Will Deacon - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - */ - -#define pr_fmt(fmt) "PMU: " fmt - -#include <linux/cpumask.h> -#include <linux/err.h> -#include <linux/interrupt.h> -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/platform_device.h> - -#include <asm/pmu.h> - -static volatile long pmu_lock; - -static struct platform_device *pmu_devices[ARM_NUM_PMU_DEVICES]; - -static int __devinit pmu_device_probe(struct platform_device *pdev) -{ - - if (pdev->id < 0 || pdev->id >= ARM_NUM_PMU_DEVICES) { - pr_warning("received registration request for unknown " - "device %d\n", pdev->id); - return -EINVAL; - } - - if (pmu_devices[pdev->id]) - pr_warning("registering new PMU device type %d overwrites " - "previous registration!\n", pdev->id); - else - pr_info("registered new PMU device of type %d\n", - pdev->id); - - pmu_devices[pdev->id] = pdev; - return 0; -} - -static struct platform_driver pmu_driver = { - .driver = { - .name = "arm-pmu", - }, - .probe = pmu_device_probe, -}; - -static int __init register_pmu_driver(void) -{ - return platform_driver_register(&pmu_driver); -} -device_initcall(register_pmu_driver); - -struct platform_device * -reserve_pmu(enum arm_pmu_type device) -{ - struct platform_device *pdev; - - if (test_and_set_bit_lock(device, &pmu_lock)) { - pdev = ERR_PTR(-EBUSY); - } else if (pmu_devices[device] == NULL) { - clear_bit_unlock(device, &pmu_lock); - pdev = ERR_PTR(-ENODEV); - } else { - pdev = pmu_devices[device]; - } - - return pdev; -} -EXPORT_SYMBOL_GPL(reserve_pmu); - -int -release_pmu(struct platform_device *pdev) -{ - if (WARN_ON(pdev != pmu_devices[pdev->id])) - return -EINVAL; - clear_bit_unlock(pdev->id, &pmu_lock); - return 0; -} -EXPORT_SYMBOL_GPL(release_pmu); - -static int -set_irq_affinity(int irq, - unsigned int cpu) -{ -#ifdef CONFIG_SMP - int err = irq_set_affinity(irq, cpumask_of(cpu)); - if (err) - pr_warning("unable to set irq affinity (irq=%d, cpu=%u)\n", - irq, cpu); - return err; -#else - return 0; -#endif -} - -static int -init_cpu_pmu(void) -{ - int i, err = 0; - struct platform_device *pdev = pmu_devices[ARM_PMU_DEVICE_CPU]; - - if (!pdev) { - err = -ENODEV; - goto out; - } - - for (i = 0; i < pdev->num_resources; ++i) { - err = set_irq_affinity(platform_get_irq(pdev, i), i); - if (err) - break; - } - -out: - return err; -} - -int -init_pmu(enum arm_pmu_type device) -{ - int err = 0; - - switch (device) { - case ARM_PMU_DEVICE_CPU: - err = init_cpu_pmu(); - break; - default: - pr_warning("attempt to initialise unknown device %d\n", - device); - err = -EINVAL; - } - - return err; -} -EXPORT_SYMBOL_GPL(init_pmu); diff --git a/arch/arm/kernel/probes-arm.c b/arch/arm/kernel/probes-arm.c new file mode 100644 index 00000000000..8eaef81d834 --- /dev/null +++ b/arch/arm/kernel/probes-arm.c @@ -0,0 +1,734 @@ +/* + * arch/arm/kernel/probes-arm.c + * + * Some code moved here from arch/arm/kernel/kprobes-arm.c + * + * Copyright (C) 2006, 2007 Motorola Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/stddef.h> +#include <linux/ptrace.h> + +#include "probes.h" +#include "probes-arm.h" + +#define sign_extend(x, signbit) ((x) | (0 - ((x) & (1 << (signbit))))) + +#define branch_displacement(insn) sign_extend(((insn) & 0xffffff) << 2, 25) + +/* + * To avoid the complications of mimicing single-stepping on a + * processor without a Next-PC or a single-step mode, and to + * avoid having to deal with the side-effects of boosting, we + * simulate or emulate (almost) all ARM instructions. + * + * "Simulation" is where the instruction's behavior is duplicated in + * C code. "Emulation" is where the original instruction is rewritten + * and executed, often by altering its registers. + * + * By having all behavior of the kprobe'd instruction completed before + * returning from the kprobe_handler(), all locks (scheduler and + * interrupt) can safely be released. There is no need for secondary + * breakpoints, no race with MP or preemptable kernels, nor having to + * clean up resources counts at a later time impacting overall system + * performance. By rewriting the instruction, only the minimum registers + * need to be loaded and saved back optimizing performance. + * + * Calling the insnslot_*_rwflags version of a function doesn't hurt + * anything even when the CPSR flags aren't updated by the + * instruction. It's just a little slower in return for saving + * a little space by not having a duplicate function that doesn't + * update the flags. (The same optimization can be said for + * instructions that do or don't perform register writeback) + * Also, instructions can either read the flags, only write the + * flags, or read and write the flags. To save combinations + * rather than for sheer performance, flag functions just assume + * read and write of flags. + */ + +void __kprobes simulate_bbl(probes_opcode_t insn, + struct arch_probes_insn *asi, struct pt_regs *regs) +{ + long iaddr = (long) regs->ARM_pc - 4; + int disp = branch_displacement(insn); + + if (insn & (1 << 24)) + regs->ARM_lr = iaddr + 4; + + regs->ARM_pc = iaddr + 8 + disp; +} + +void __kprobes simulate_blx1(probes_opcode_t insn, + struct arch_probes_insn *asi, struct pt_regs *regs) +{ + long iaddr = (long) regs->ARM_pc - 4; + int disp = branch_displacement(insn); + + regs->ARM_lr = iaddr + 4; + regs->ARM_pc = iaddr + 8 + disp + ((insn >> 23) & 0x2); + regs->ARM_cpsr |= PSR_T_BIT; +} + +void __kprobes simulate_blx2bx(probes_opcode_t insn, + struct arch_probes_insn *asi, struct pt_regs *regs) +{ + int rm = insn & 0xf; + long rmv = regs->uregs[rm]; + + if (insn & (1 << 5)) + regs->ARM_lr = (long) regs->ARM_pc; + + regs->ARM_pc = rmv & ~0x1; + regs->ARM_cpsr &= ~PSR_T_BIT; + if (rmv & 0x1) + regs->ARM_cpsr |= PSR_T_BIT; +} + +void __kprobes simulate_mrs(probes_opcode_t insn, + struct arch_probes_insn *asi, struct pt_regs *regs) +{ + int rd = (insn >> 12) & 0xf; + unsigned long mask = 0xf8ff03df; /* Mask out execution state */ + regs->uregs[rd] = regs->ARM_cpsr & mask; +} + +void __kprobes simulate_mov_ipsp(probes_opcode_t insn, + struct arch_probes_insn *asi, struct pt_regs *regs) +{ + regs->uregs[12] = regs->uregs[13]; +} + +/* + * For the instruction masking and comparisons in all the "space_*" + * functions below, Do _not_ rearrange the order of tests unless + * you're very, very sure of what you are doing. For the sake of + * efficiency, the masks for some tests sometimes assume other test + * have been done prior to them so the number of patterns to test + * for an instruction set can be as broad as possible to reduce the + * number of tests needed. + */ + +static const union decode_item arm_1111_table[] = { + /* Unconditional instructions */ + + /* memory hint 1111 0100 x001 xxxx xxxx xxxx xxxx xxxx */ + /* PLDI (immediate) 1111 0100 x101 xxxx xxxx xxxx xxxx xxxx */ + /* PLDW (immediate) 1111 0101 x001 xxxx xxxx xxxx xxxx xxxx */ + /* PLD (immediate) 1111 0101 x101 xxxx xxxx xxxx xxxx xxxx */ + DECODE_SIMULATE (0xfe300000, 0xf4100000, PROBES_PRELOAD_IMM), + + /* memory hint 1111 0110 x001 xxxx xxxx xxxx xxx0 xxxx */ + /* PLDI (register) 1111 0110 x101 xxxx xxxx xxxx xxx0 xxxx */ + /* PLDW (register) 1111 0111 x001 xxxx xxxx xxxx xxx0 xxxx */ + /* PLD (register) 1111 0111 x101 xxxx xxxx xxxx xxx0 xxxx */ + DECODE_SIMULATE (0xfe300010, 0xf6100000, PROBES_PRELOAD_REG), + + /* BLX (immediate) 1111 101x xxxx xxxx xxxx xxxx xxxx xxxx */ + DECODE_SIMULATE (0xfe000000, 0xfa000000, PROBES_BRANCH_IMM), + + /* CPS 1111 0001 0000 xxx0 xxxx xxxx xx0x xxxx */ + /* SETEND 1111 0001 0000 0001 xxxx xxxx 0000 xxxx */ + /* SRS 1111 100x x1x0 xxxx xxxx xxxx xxxx xxxx */ + /* RFE 1111 100x x0x1 xxxx xxxx xxxx xxxx xxxx */ + + /* Coprocessor instructions... */ + /* MCRR2 1111 1100 0100 xxxx xxxx xxxx xxxx xxxx */ + /* MRRC2 1111 1100 0101 xxxx xxxx xxxx xxxx xxxx */ + /* LDC2 1111 110x xxx1 xxxx xxxx xxxx xxxx xxxx */ + /* STC2 1111 110x xxx0 xxxx xxxx xxxx xxxx xxxx */ + /* CDP2 1111 1110 xxxx xxxx xxxx xxxx xxx0 xxxx */ + /* MCR2 1111 1110 xxx0 xxxx xxxx xxxx xxx1 xxxx */ + /* MRC2 1111 1110 xxx1 xxxx xxxx xxxx xxx1 xxxx */ + + /* Other unallocated instructions... */ + DECODE_END +}; + +static const union decode_item arm_cccc_0001_0xx0____0xxx_table[] = { + /* Miscellaneous instructions */ + + /* MRS cpsr cccc 0001 0000 xxxx xxxx xxxx 0000 xxxx */ + DECODE_SIMULATEX(0x0ff000f0, 0x01000000, PROBES_MRS, + REGS(0, NOPC, 0, 0, 0)), + + /* BX cccc 0001 0010 xxxx xxxx xxxx 0001 xxxx */ + DECODE_SIMULATE (0x0ff000f0, 0x01200010, PROBES_BRANCH_REG), + + /* BLX (register) cccc 0001 0010 xxxx xxxx xxxx 0011 xxxx */ + DECODE_SIMULATEX(0x0ff000f0, 0x01200030, PROBES_BRANCH_REG, + REGS(0, 0, 0, 0, NOPC)), + + /* CLZ cccc 0001 0110 xxxx xxxx xxxx 0001 xxxx */ + DECODE_EMULATEX (0x0ff000f0, 0x01600010, PROBES_CLZ, + REGS(0, NOPC, 0, 0, NOPC)), + + /* QADD cccc 0001 0000 xxxx xxxx xxxx 0101 xxxx */ + /* QSUB cccc 0001 0010 xxxx xxxx xxxx 0101 xxxx */ + /* QDADD cccc 0001 0100 xxxx xxxx xxxx 0101 xxxx */ + /* QDSUB cccc 0001 0110 xxxx xxxx xxxx 0101 xxxx */ + DECODE_EMULATEX (0x0f9000f0, 0x01000050, PROBES_SATURATING_ARITHMETIC, + REGS(NOPC, NOPC, 0, 0, NOPC)), + + /* BXJ cccc 0001 0010 xxxx xxxx xxxx 0010 xxxx */ + /* MSR cccc 0001 0x10 xxxx xxxx xxxx 0000 xxxx */ + /* MRS spsr cccc 0001 0100 xxxx xxxx xxxx 0000 xxxx */ + /* BKPT 1110 0001 0010 xxxx xxxx xxxx 0111 xxxx */ + /* SMC cccc 0001 0110 xxxx xxxx xxxx 0111 xxxx */ + /* And unallocated instructions... */ + DECODE_END +}; + +static const union decode_item arm_cccc_0001_0xx0____1xx0_table[] = { + /* Halfword multiply and multiply-accumulate */ + + /* SMLALxy cccc 0001 0100 xxxx xxxx xxxx 1xx0 xxxx */ + DECODE_EMULATEX (0x0ff00090, 0x01400080, PROBES_MUL1, + REGS(NOPC, NOPC, NOPC, 0, NOPC)), + + /* SMULWy cccc 0001 0010 xxxx xxxx xxxx 1x10 xxxx */ + DECODE_OR (0x0ff000b0, 0x012000a0), + /* SMULxy cccc 0001 0110 xxxx xxxx xxxx 1xx0 xxxx */ + DECODE_EMULATEX (0x0ff00090, 0x01600080, PROBES_MUL2, + REGS(NOPC, 0, NOPC, 0, NOPC)), + + /* SMLAxy cccc 0001 0000 xxxx xxxx xxxx 1xx0 xxxx */ + DECODE_OR (0x0ff00090, 0x01000080), + /* SMLAWy cccc 0001 0010 xxxx xxxx xxxx 1x00 xxxx */ + DECODE_EMULATEX (0x0ff000b0, 0x01200080, PROBES_MUL2, + REGS(NOPC, NOPC, NOPC, 0, NOPC)), + + DECODE_END +}; + +static const union decode_item arm_cccc_0000_____1001_table[] = { + /* Multiply and multiply-accumulate */ + + /* MUL cccc 0000 0000 xxxx xxxx xxxx 1001 xxxx */ + /* MULS cccc 0000 0001 xxxx xxxx xxxx 1001 xxxx */ + DECODE_EMULATEX (0x0fe000f0, 0x00000090, PROBES_MUL2, + REGS(NOPC, 0, NOPC, 0, NOPC)), + + /* MLA cccc 0000 0010 xxxx xxxx xxxx 1001 xxxx */ + /* MLAS cccc 0000 0011 xxxx xxxx xxxx 1001 xxxx */ + DECODE_OR (0x0fe000f0, 0x00200090), + /* MLS cccc 0000 0110 xxxx xxxx xxxx 1001 xxxx */ + DECODE_EMULATEX (0x0ff000f0, 0x00600090, PROBES_MUL2, + REGS(NOPC, NOPC, NOPC, 0, NOPC)), + + /* UMAAL cccc 0000 0100 xxxx xxxx xxxx 1001 xxxx */ + DECODE_OR (0x0ff000f0, 0x00400090), + /* UMULL cccc 0000 1000 xxxx xxxx xxxx 1001 xxxx */ + /* UMULLS cccc 0000 1001 xxxx xxxx xxxx 1001 xxxx */ + /* UMLAL cccc 0000 1010 xxxx xxxx xxxx 1001 xxxx */ + /* UMLALS cccc 0000 1011 xxxx xxxx xxxx 1001 xxxx */ + /* SMULL cccc 0000 1100 xxxx xxxx xxxx 1001 xxxx */ + /* SMULLS cccc 0000 1101 xxxx xxxx xxxx 1001 xxxx */ + /* SMLAL cccc 0000 1110 xxxx xxxx xxxx 1001 xxxx */ + /* SMLALS cccc 0000 1111 xxxx xxxx xxxx 1001 xxxx */ + DECODE_EMULATEX (0x0f8000f0, 0x00800090, PROBES_MUL1, + REGS(NOPC, NOPC, NOPC, 0, NOPC)), + + DECODE_END +}; + +static const union decode_item arm_cccc_0001_____1001_table[] = { + /* Synchronization primitives */ + +#if __LINUX_ARM_ARCH__ < 6 + /* Deprecated on ARMv6 and may be UNDEFINED on v7 */ + /* SMP/SWPB cccc 0001 0x00 xxxx xxxx xxxx 1001 xxxx */ + DECODE_EMULATEX (0x0fb000f0, 0x01000090, PROBES_SWP, + REGS(NOPC, NOPC, 0, 0, NOPC)), +#endif + /* LDREX/STREX{,D,B,H} cccc 0001 1xxx xxxx xxxx xxxx 1001 xxxx */ + /* And unallocated instructions... */ + DECODE_END +}; + +static const union decode_item arm_cccc_000x_____1xx1_table[] = { + /* Extra load/store instructions */ + + /* STRHT cccc 0000 xx10 xxxx xxxx xxxx 1011 xxxx */ + /* ??? cccc 0000 xx10 xxxx xxxx xxxx 11x1 xxxx */ + /* LDRHT cccc 0000 xx11 xxxx xxxx xxxx 1011 xxxx */ + /* LDRSBT cccc 0000 xx11 xxxx xxxx xxxx 1101 xxxx */ + /* LDRSHT cccc 0000 xx11 xxxx xxxx xxxx 1111 xxxx */ + DECODE_REJECT (0x0f200090, 0x00200090), + + /* LDRD/STRD lr,pc,{... cccc 000x x0x0 xxxx 111x xxxx 1101 xxxx */ + DECODE_REJECT (0x0e10e0d0, 0x0000e0d0), + + /* LDRD (register) cccc 000x x0x0 xxxx xxxx xxxx 1101 xxxx */ + /* STRD (register) cccc 000x x0x0 xxxx xxxx xxxx 1111 xxxx */ + DECODE_EMULATEX (0x0e5000d0, 0x000000d0, PROBES_LDRSTRD, + REGS(NOPCWB, NOPCX, 0, 0, NOPC)), + + /* LDRD (immediate) cccc 000x x1x0 xxxx xxxx xxxx 1101 xxxx */ + /* STRD (immediate) cccc 000x x1x0 xxxx xxxx xxxx 1111 xxxx */ + DECODE_EMULATEX (0x0e5000d0, 0x004000d0, PROBES_LDRSTRD, + REGS(NOPCWB, NOPCX, 0, 0, 0)), + + /* STRH (register) cccc 000x x0x0 xxxx xxxx xxxx 1011 xxxx */ + DECODE_EMULATEX (0x0e5000f0, 0x000000b0, PROBES_STORE_EXTRA, + REGS(NOPCWB, NOPC, 0, 0, NOPC)), + + /* LDRH (register) cccc 000x x0x1 xxxx xxxx xxxx 1011 xxxx */ + /* LDRSB (register) cccc 000x x0x1 xxxx xxxx xxxx 1101 xxxx */ + /* LDRSH (register) cccc 000x x0x1 xxxx xxxx xxxx 1111 xxxx */ + DECODE_EMULATEX (0x0e500090, 0x00100090, PROBES_LOAD_EXTRA, + REGS(NOPCWB, NOPC, 0, 0, NOPC)), + + /* STRH (immediate) cccc 000x x1x0 xxxx xxxx xxxx 1011 xxxx */ + DECODE_EMULATEX (0x0e5000f0, 0x004000b0, PROBES_STORE_EXTRA, + REGS(NOPCWB, NOPC, 0, 0, 0)), + + /* LDRH (immediate) cccc 000x x1x1 xxxx xxxx xxxx 1011 xxxx */ + /* LDRSB (immediate) cccc 000x x1x1 xxxx xxxx xxxx 1101 xxxx */ + /* LDRSH (immediate) cccc 000x x1x1 xxxx xxxx xxxx 1111 xxxx */ + DECODE_EMULATEX (0x0e500090, 0x00500090, PROBES_LOAD_EXTRA, + REGS(NOPCWB, NOPC, 0, 0, 0)), + + DECODE_END +}; + +static const union decode_item arm_cccc_000x_table[] = { + /* Data-processing (register) */ + + /* <op>S PC, ... cccc 000x xxx1 xxxx 1111 xxxx xxxx xxxx */ + DECODE_REJECT (0x0e10f000, 0x0010f000), + + /* MOV IP, SP 1110 0001 1010 0000 1100 0000 0000 1101 */ + DECODE_SIMULATE (0xffffffff, 0xe1a0c00d, PROBES_MOV_IP_SP), + + /* TST (register) cccc 0001 0001 xxxx xxxx xxxx xxx0 xxxx */ + /* TEQ (register) cccc 0001 0011 xxxx xxxx xxxx xxx0 xxxx */ + /* CMP (register) cccc 0001 0101 xxxx xxxx xxxx xxx0 xxxx */ + /* CMN (register) cccc 0001 0111 xxxx xxxx xxxx xxx0 xxxx */ + DECODE_EMULATEX (0x0f900010, 0x01100000, PROBES_DATA_PROCESSING_REG, + REGS(ANY, 0, 0, 0, ANY)), + + /* MOV (register) cccc 0001 101x xxxx xxxx xxxx xxx0 xxxx */ + /* MVN (register) cccc 0001 111x xxxx xxxx xxxx xxx0 xxxx */ + DECODE_EMULATEX (0x0fa00010, 0x01a00000, PROBES_DATA_PROCESSING_REG, + REGS(0, ANY, 0, 0, ANY)), + + /* AND (register) cccc 0000 000x xxxx xxxx xxxx xxx0 xxxx */ + /* EOR (register) cccc 0000 001x xxxx xxxx xxxx xxx0 xxxx */ + /* SUB (register) cccc 0000 010x xxxx xxxx xxxx xxx0 xxxx */ + /* RSB (register) cccc 0000 011x xxxx xxxx xxxx xxx0 xxxx */ + /* ADD (register) cccc 0000 100x xxxx xxxx xxxx xxx0 xxxx */ + /* ADC (register) cccc 0000 101x xxxx xxxx xxxx xxx0 xxxx */ + /* SBC (register) cccc 0000 110x xxxx xxxx xxxx xxx0 xxxx */ + /* RSC (register) cccc 0000 111x xxxx xxxx xxxx xxx0 xxxx */ + /* ORR (register) cccc 0001 100x xxxx xxxx xxxx xxx0 xxxx */ + /* BIC (register) cccc 0001 110x xxxx xxxx xxxx xxx0 xxxx */ + DECODE_EMULATEX (0x0e000010, 0x00000000, PROBES_DATA_PROCESSING_REG, + REGS(ANY, ANY, 0, 0, ANY)), + + /* TST (reg-shift reg) cccc 0001 0001 xxxx xxxx xxxx 0xx1 xxxx */ + /* TEQ (reg-shift reg) cccc 0001 0011 xxxx xxxx xxxx 0xx1 xxxx */ + /* CMP (reg-shift reg) cccc 0001 0101 xxxx xxxx xxxx 0xx1 xxxx */ + /* CMN (reg-shift reg) cccc 0001 0111 xxxx xxxx xxxx 0xx1 xxxx */ + DECODE_EMULATEX (0x0f900090, 0x01100010, PROBES_DATA_PROCESSING_REG, + REGS(NOPC, 0, NOPC, 0, NOPC)), + + /* MOV (reg-shift reg) cccc 0001 101x xxxx xxxx xxxx 0xx1 xxxx */ + /* MVN (reg-shift reg) cccc 0001 111x xxxx xxxx xxxx 0xx1 xxxx */ + DECODE_EMULATEX (0x0fa00090, 0x01a00010, PROBES_DATA_PROCESSING_REG, + REGS(0, NOPC, NOPC, 0, NOPC)), + + /* AND (reg-shift reg) cccc 0000 000x xxxx xxxx xxxx 0xx1 xxxx */ + /* EOR (reg-shift reg) cccc 0000 001x xxxx xxxx xxxx 0xx1 xxxx */ + /* SUB (reg-shift reg) cccc 0000 010x xxxx xxxx xxxx 0xx1 xxxx */ + /* RSB (reg-shift reg) cccc 0000 011x xxxx xxxx xxxx 0xx1 xxxx */ + /* ADD (reg-shift reg) cccc 0000 100x xxxx xxxx xxxx 0xx1 xxxx */ + /* ADC (reg-shift reg) cccc 0000 101x xxxx xxxx xxxx 0xx1 xxxx */ + /* SBC (reg-shift reg) cccc 0000 110x xxxx xxxx xxxx 0xx1 xxxx */ + /* RSC (reg-shift reg) cccc 0000 111x xxxx xxxx xxxx 0xx1 xxxx */ + /* ORR (reg-shift reg) cccc 0001 100x xxxx xxxx xxxx 0xx1 xxxx */ + /* BIC (reg-shift reg) cccc 0001 110x xxxx xxxx xxxx 0xx1 xxxx */ + DECODE_EMULATEX (0x0e000090, 0x00000010, PROBES_DATA_PROCESSING_REG, + REGS(NOPC, NOPC, NOPC, 0, NOPC)), + + DECODE_END +}; + +static const union decode_item arm_cccc_001x_table[] = { + /* Data-processing (immediate) */ + + /* MOVW cccc 0011 0000 xxxx xxxx xxxx xxxx xxxx */ + /* MOVT cccc 0011 0100 xxxx xxxx xxxx xxxx xxxx */ + DECODE_EMULATEX (0x0fb00000, 0x03000000, PROBES_DATA_PROCESSING_IMM, + REGS(0, NOPC, 0, 0, 0)), + + /* YIELD cccc 0011 0010 0000 xxxx xxxx 0000 0001 */ + DECODE_OR (0x0fff00ff, 0x03200001), + /* SEV cccc 0011 0010 0000 xxxx xxxx 0000 0100 */ + DECODE_EMULATE (0x0fff00ff, 0x03200004, PROBES_EMULATE_NONE), + /* NOP cccc 0011 0010 0000 xxxx xxxx 0000 0000 */ + /* WFE cccc 0011 0010 0000 xxxx xxxx 0000 0010 */ + /* WFI cccc 0011 0010 0000 xxxx xxxx 0000 0011 */ + DECODE_SIMULATE (0x0fff00fc, 0x03200000, PROBES_SIMULATE_NOP), + /* DBG cccc 0011 0010 0000 xxxx xxxx ffff xxxx */ + /* unallocated hints cccc 0011 0010 0000 xxxx xxxx xxxx xxxx */ + /* MSR (immediate) cccc 0011 0x10 xxxx xxxx xxxx xxxx xxxx */ + DECODE_REJECT (0x0fb00000, 0x03200000), + + /* <op>S PC, ... cccc 001x xxx1 xxxx 1111 xxxx xxxx xxxx */ + DECODE_REJECT (0x0e10f000, 0x0210f000), + + /* TST (immediate) cccc 0011 0001 xxxx xxxx xxxx xxxx xxxx */ + /* TEQ (immediate) cccc 0011 0011 xxxx xxxx xxxx xxxx xxxx */ + /* CMP (immediate) cccc 0011 0101 xxxx xxxx xxxx xxxx xxxx */ + /* CMN (immediate) cccc 0011 0111 xxxx xxxx xxxx xxxx xxxx */ + DECODE_EMULATEX (0x0f900000, 0x03100000, PROBES_DATA_PROCESSING_IMM, + REGS(ANY, 0, 0, 0, 0)), + + /* MOV (immediate) cccc 0011 101x xxxx xxxx xxxx xxxx xxxx */ + /* MVN (immediate) cccc 0011 111x xxxx xxxx xxxx xxxx xxxx */ + DECODE_EMULATEX (0x0fa00000, 0x03a00000, PROBES_DATA_PROCESSING_IMM, + REGS(0, ANY, 0, 0, 0)), + + /* AND (immediate) cccc 0010 000x xxxx xxxx xxxx xxxx xxxx */ + /* EOR (immediate) cccc 0010 001x xxxx xxxx xxxx xxxx xxxx */ + /* SUB (immediate) cccc 0010 010x xxxx xxxx xxxx xxxx xxxx */ + /* RSB (immediate) cccc 0010 011x xxxx xxxx xxxx xxxx xxxx */ + /* ADD (immediate) cccc 0010 100x xxxx xxxx xxxx xxxx xxxx */ + /* ADC (immediate) cccc 0010 101x xxxx xxxx xxxx xxxx xxxx */ + /* SBC (immediate) cccc 0010 110x xxxx xxxx xxxx xxxx xxxx */ + /* RSC (immediate) cccc 0010 111x xxxx xxxx xxxx xxxx xxxx */ + /* ORR (immediate) cccc 0011 100x xxxx xxxx xxxx xxxx xxxx */ + /* BIC (immediate) cccc 0011 110x xxxx xxxx xxxx xxxx xxxx */ + DECODE_EMULATEX (0x0e000000, 0x02000000, PROBES_DATA_PROCESSING_IMM, + REGS(ANY, ANY, 0, 0, 0)), + + DECODE_END +}; + +static const union decode_item arm_cccc_0110_____xxx1_table[] = { + /* Media instructions */ + + /* SEL cccc 0110 1000 xxxx xxxx xxxx 1011 xxxx */ + DECODE_EMULATEX (0x0ff000f0, 0x068000b0, PROBES_SATURATE, + REGS(NOPC, NOPC, 0, 0, NOPC)), + + /* SSAT cccc 0110 101x xxxx xxxx xxxx xx01 xxxx */ + /* USAT cccc 0110 111x xxxx xxxx xxxx xx01 xxxx */ + DECODE_OR(0x0fa00030, 0x06a00010), + /* SSAT16 cccc 0110 1010 xxxx xxxx xxxx 0011 xxxx */ + /* USAT16 cccc 0110 1110 xxxx xxxx xxxx 0011 xxxx */ + DECODE_EMULATEX (0x0fb000f0, 0x06a00030, PROBES_SATURATE, + REGS(0, NOPC, 0, 0, NOPC)), + + /* REV cccc 0110 1011 xxxx xxxx xxxx 0011 xxxx */ + /* REV16 cccc 0110 1011 xxxx xxxx xxxx 1011 xxxx */ + /* RBIT cccc 0110 1111 xxxx xxxx xxxx 0011 xxxx */ + /* REVSH cccc 0110 1111 xxxx xxxx xxxx 1011 xxxx */ + DECODE_EMULATEX (0x0fb00070, 0x06b00030, PROBES_REV, + REGS(0, NOPC, 0, 0, NOPC)), + + /* ??? cccc 0110 0x00 xxxx xxxx xxxx xxx1 xxxx */ + DECODE_REJECT (0x0fb00010, 0x06000010), + /* ??? cccc 0110 0xxx xxxx xxxx xxxx 1011 xxxx */ + DECODE_REJECT (0x0f8000f0, 0x060000b0), + /* ??? cccc 0110 0xxx xxxx xxxx xxxx 1101 xxxx */ + DECODE_REJECT (0x0f8000f0, 0x060000d0), + /* SADD16 cccc 0110 0001 xxxx xxxx xxxx 0001 xxxx */ + /* SADDSUBX cccc 0110 0001 xxxx xxxx xxxx 0011 xxxx */ + /* SSUBADDX cccc 0110 0001 xxxx xxxx xxxx 0101 xxxx */ + /* SSUB16 cccc 0110 0001 xxxx xxxx xxxx 0111 xxxx */ + /* SADD8 cccc 0110 0001 xxxx xxxx xxxx 1001 xxxx */ + /* SSUB8 cccc 0110 0001 xxxx xxxx xxxx 1111 xxxx */ + /* QADD16 cccc 0110 0010 xxxx xxxx xxxx 0001 xxxx */ + /* QADDSUBX cccc 0110 0010 xxxx xxxx xxxx 0011 xxxx */ + /* QSUBADDX cccc 0110 0010 xxxx xxxx xxxx 0101 xxxx */ + /* QSUB16 cccc 0110 0010 xxxx xxxx xxxx 0111 xxxx */ + /* QADD8 cccc 0110 0010 xxxx xxxx xxxx 1001 xxxx */ + /* QSUB8 cccc 0110 0010 xxxx xxxx xxxx 1111 xxxx */ + /* SHADD16 cccc 0110 0011 xxxx xxxx xxxx 0001 xxxx */ + /* SHADDSUBX cccc 0110 0011 xxxx xxxx xxxx 0011 xxxx */ + /* SHSUBADDX cccc 0110 0011 xxxx xxxx xxxx 0101 xxxx */ + /* SHSUB16 cccc 0110 0011 xxxx xxxx xxxx 0111 xxxx */ + /* SHADD8 cccc 0110 0011 xxxx xxxx xxxx 1001 xxxx */ + /* SHSUB8 cccc 0110 0011 xxxx xxxx xxxx 1111 xxxx */ + /* UADD16 cccc 0110 0101 xxxx xxxx xxxx 0001 xxxx */ + /* UADDSUBX cccc 0110 0101 xxxx xxxx xxxx 0011 xxxx */ + /* USUBADDX cccc 0110 0101 xxxx xxxx xxxx 0101 xxxx */ + /* USUB16 cccc 0110 0101 xxxx xxxx xxxx 0111 xxxx */ + /* UADD8 cccc 0110 0101 xxxx xxxx xxxx 1001 xxxx */ + /* USUB8 cccc 0110 0101 xxxx xxxx xxxx 1111 xxxx */ + /* UQADD16 cccc 0110 0110 xxxx xxxx xxxx 0001 xxxx */ + /* UQADDSUBX cccc 0110 0110 xxxx xxxx xxxx 0011 xxxx */ + /* UQSUBADDX cccc 0110 0110 xxxx xxxx xxxx 0101 xxxx */ + /* UQSUB16 cccc 0110 0110 xxxx xxxx xxxx 0111 xxxx */ + /* UQADD8 cccc 0110 0110 xxxx xxxx xxxx 1001 xxxx */ + /* UQSUB8 cccc 0110 0110 xxxx xxxx xxxx 1111 xxxx */ + /* UHADD16 cccc 0110 0111 xxxx xxxx xxxx 0001 xxxx */ + /* UHADDSUBX cccc 0110 0111 xxxx xxxx xxxx 0011 xxxx */ + /* UHSUBADDX cccc 0110 0111 xxxx xxxx xxxx 0101 xxxx */ + /* UHSUB16 cccc 0110 0111 xxxx xxxx xxxx 0111 xxxx */ + /* UHADD8 cccc 0110 0111 xxxx xxxx xxxx 1001 xxxx */ + /* UHSUB8 cccc 0110 0111 xxxx xxxx xxxx 1111 xxxx */ + DECODE_EMULATEX (0x0f800010, 0x06000010, PROBES_MMI, + REGS(NOPC, NOPC, 0, 0, NOPC)), + + /* PKHBT cccc 0110 1000 xxxx xxxx xxxx x001 xxxx */ + /* PKHTB cccc 0110 1000 xxxx xxxx xxxx x101 xxxx */ + DECODE_EMULATEX (0x0ff00030, 0x06800010, PROBES_PACK, + REGS(NOPC, NOPC, 0, 0, NOPC)), + + /* ??? cccc 0110 1001 xxxx xxxx xxxx 0111 xxxx */ + /* ??? cccc 0110 1101 xxxx xxxx xxxx 0111 xxxx */ + DECODE_REJECT (0x0fb000f0, 0x06900070), + + /* SXTB16 cccc 0110 1000 1111 xxxx xxxx 0111 xxxx */ + /* SXTB cccc 0110 1010 1111 xxxx xxxx 0111 xxxx */ + /* SXTH cccc 0110 1011 1111 xxxx xxxx 0111 xxxx */ + /* UXTB16 cccc 0110 1100 1111 xxxx xxxx 0111 xxxx */ + /* UXTB cccc 0110 1110 1111 xxxx xxxx 0111 xxxx */ + /* UXTH cccc 0110 1111 1111 xxxx xxxx 0111 xxxx */ + DECODE_EMULATEX (0x0f8f00f0, 0x068f0070, PROBES_EXTEND, + REGS(0, NOPC, 0, 0, NOPC)), + + /* SXTAB16 cccc 0110 1000 xxxx xxxx xxxx 0111 xxxx */ + /* SXTAB cccc 0110 1010 xxxx xxxx xxxx 0111 xxxx */ + /* SXTAH cccc 0110 1011 xxxx xxxx xxxx 0111 xxxx */ + /* UXTAB16 cccc 0110 1100 xxxx xxxx xxxx 0111 xxxx */ + /* UXTAB cccc 0110 1110 xxxx xxxx xxxx 0111 xxxx */ + /* UXTAH cccc 0110 1111 xxxx xxxx xxxx 0111 xxxx */ + DECODE_EMULATEX (0x0f8000f0, 0x06800070, PROBES_EXTEND_ADD, + REGS(NOPCX, NOPC, 0, 0, NOPC)), + + DECODE_END +}; + +static const union decode_item arm_cccc_0111_____xxx1_table[] = { + /* Media instructions */ + + /* UNDEFINED cccc 0111 1111 xxxx xxxx xxxx 1111 xxxx */ + DECODE_REJECT (0x0ff000f0, 0x07f000f0), + + /* SMLALD cccc 0111 0100 xxxx xxxx xxxx 00x1 xxxx */ + /* SMLSLD cccc 0111 0100 xxxx xxxx xxxx 01x1 xxxx */ + DECODE_EMULATEX (0x0ff00090, 0x07400010, PROBES_MUL_ADD_LONG, + REGS(NOPC, NOPC, NOPC, 0, NOPC)), + + /* SMUAD cccc 0111 0000 xxxx 1111 xxxx 00x1 xxxx */ + /* SMUSD cccc 0111 0000 xxxx 1111 xxxx 01x1 xxxx */ + DECODE_OR (0x0ff0f090, 0x0700f010), + /* SMMUL cccc 0111 0101 xxxx 1111 xxxx 00x1 xxxx */ + DECODE_OR (0x0ff0f0d0, 0x0750f010), + /* USAD8 cccc 0111 1000 xxxx 1111 xxxx 0001 xxxx */ + DECODE_EMULATEX (0x0ff0f0f0, 0x0780f010, PROBES_MUL_ADD, + REGS(NOPC, 0, NOPC, 0, NOPC)), + + /* SMLAD cccc 0111 0000 xxxx xxxx xxxx 00x1 xxxx */ + /* SMLSD cccc 0111 0000 xxxx xxxx xxxx 01x1 xxxx */ + DECODE_OR (0x0ff00090, 0x07000010), + /* SMMLA cccc 0111 0101 xxxx xxxx xxxx 00x1 xxxx */ + DECODE_OR (0x0ff000d0, 0x07500010), + /* USADA8 cccc 0111 1000 xxxx xxxx xxxx 0001 xxxx */ + DECODE_EMULATEX (0x0ff000f0, 0x07800010, PROBES_MUL_ADD, + REGS(NOPC, NOPCX, NOPC, 0, NOPC)), + + /* SMMLS cccc 0111 0101 xxxx xxxx xxxx 11x1 xxxx */ + DECODE_EMULATEX (0x0ff000d0, 0x075000d0, PROBES_MUL_ADD, + REGS(NOPC, NOPC, NOPC, 0, NOPC)), + + /* SBFX cccc 0111 101x xxxx xxxx xxxx x101 xxxx */ + /* UBFX cccc 0111 111x xxxx xxxx xxxx x101 xxxx */ + DECODE_EMULATEX (0x0fa00070, 0x07a00050, PROBES_BITFIELD, + REGS(0, NOPC, 0, 0, NOPC)), + + /* BFC cccc 0111 110x xxxx xxxx xxxx x001 1111 */ + DECODE_EMULATEX (0x0fe0007f, 0x07c0001f, PROBES_BITFIELD, + REGS(0, NOPC, 0, 0, 0)), + + /* BFI cccc 0111 110x xxxx xxxx xxxx x001 xxxx */ + DECODE_EMULATEX (0x0fe00070, 0x07c00010, PROBES_BITFIELD, + REGS(0, NOPC, 0, 0, NOPCX)), + + DECODE_END +}; + +static const union decode_item arm_cccc_01xx_table[] = { + /* Load/store word and unsigned byte */ + + /* LDRB/STRB pc,[...] cccc 01xx x0xx xxxx xxxx xxxx xxxx xxxx */ + DECODE_REJECT (0x0c40f000, 0x0440f000), + + /* STRT cccc 01x0 x010 xxxx xxxx xxxx xxxx xxxx */ + /* LDRT cccc 01x0 x011 xxxx xxxx xxxx xxxx xxxx */ + /* STRBT cccc 01x0 x110 xxxx xxxx xxxx xxxx xxxx */ + /* LDRBT cccc 01x0 x111 xxxx xxxx xxxx xxxx xxxx */ + DECODE_REJECT (0x0d200000, 0x04200000), + + /* STR (immediate) cccc 010x x0x0 xxxx xxxx xxxx xxxx xxxx */ + /* STRB (immediate) cccc 010x x1x0 xxxx xxxx xxxx xxxx xxxx */ + DECODE_EMULATEX (0x0e100000, 0x04000000, PROBES_STORE, + REGS(NOPCWB, ANY, 0, 0, 0)), + + /* LDR (immediate) cccc 010x x0x1 xxxx xxxx xxxx xxxx xxxx */ + /* LDRB (immediate) cccc 010x x1x1 xxxx xxxx xxxx xxxx xxxx */ + DECODE_EMULATEX (0x0e100000, 0x04100000, PROBES_LOAD, + REGS(NOPCWB, ANY, 0, 0, 0)), + + /* STR (register) cccc 011x x0x0 xxxx xxxx xxxx xxxx xxxx */ + /* STRB (register) cccc 011x x1x0 xxxx xxxx xxxx xxxx xxxx */ + DECODE_EMULATEX (0x0e100000, 0x06000000, PROBES_STORE, + REGS(NOPCWB, ANY, 0, 0, NOPC)), + + /* LDR (register) cccc 011x x0x1 xxxx xxxx xxxx xxxx xxxx */ + /* LDRB (register) cccc 011x x1x1 xxxx xxxx xxxx xxxx xxxx */ + DECODE_EMULATEX (0x0e100000, 0x06100000, PROBES_LOAD, + REGS(NOPCWB, ANY, 0, 0, NOPC)), + + DECODE_END +}; + +static const union decode_item arm_cccc_100x_table[] = { + /* Block data transfer instructions */ + + /* LDM cccc 100x x0x1 xxxx xxxx xxxx xxxx xxxx */ + /* STM cccc 100x x0x0 xxxx xxxx xxxx xxxx xxxx */ + DECODE_CUSTOM (0x0e400000, 0x08000000, PROBES_LDMSTM), + + /* STM (user registers) cccc 100x x1x0 xxxx xxxx xxxx xxxx xxxx */ + /* LDM (user registers) cccc 100x x1x1 xxxx 0xxx xxxx xxxx xxxx */ + /* LDM (exception ret) cccc 100x x1x1 xxxx 1xxx xxxx xxxx xxxx */ + DECODE_END +}; + +const union decode_item probes_decode_arm_table[] = { + /* + * Unconditional instructions + * 1111 xxxx xxxx xxxx xxxx xxxx xxxx xxxx + */ + DECODE_TABLE (0xf0000000, 0xf0000000, arm_1111_table), + + /* + * Miscellaneous instructions + * cccc 0001 0xx0 xxxx xxxx xxxx 0xxx xxxx + */ + DECODE_TABLE (0x0f900080, 0x01000000, arm_cccc_0001_0xx0____0xxx_table), + + /* + * Halfword multiply and multiply-accumulate + * cccc 0001 0xx0 xxxx xxxx xxxx 1xx0 xxxx + */ + DECODE_TABLE (0x0f900090, 0x01000080, arm_cccc_0001_0xx0____1xx0_table), + + /* + * Multiply and multiply-accumulate + * cccc 0000 xxxx xxxx xxxx xxxx 1001 xxxx + */ + DECODE_TABLE (0x0f0000f0, 0x00000090, arm_cccc_0000_____1001_table), + + /* + * Synchronization primitives + * cccc 0001 xxxx xxxx xxxx xxxx 1001 xxxx + */ + DECODE_TABLE (0x0f0000f0, 0x01000090, arm_cccc_0001_____1001_table), + + /* + * Extra load/store instructions + * cccc 000x xxxx xxxx xxxx xxxx 1xx1 xxxx + */ + DECODE_TABLE (0x0e000090, 0x00000090, arm_cccc_000x_____1xx1_table), + + /* + * Data-processing (register) + * cccc 000x xxxx xxxx xxxx xxxx xxx0 xxxx + * Data-processing (register-shifted register) + * cccc 000x xxxx xxxx xxxx xxxx 0xx1 xxxx + */ + DECODE_TABLE (0x0e000000, 0x00000000, arm_cccc_000x_table), + + /* + * Data-processing (immediate) + * cccc 001x xxxx xxxx xxxx xxxx xxxx xxxx + */ + DECODE_TABLE (0x0e000000, 0x02000000, arm_cccc_001x_table), + + /* + * Media instructions + * cccc 011x xxxx xxxx xxxx xxxx xxx1 xxxx + */ + DECODE_TABLE (0x0f000010, 0x06000010, arm_cccc_0110_____xxx1_table), + DECODE_TABLE (0x0f000010, 0x07000010, arm_cccc_0111_____xxx1_table), + + /* + * Load/store word and unsigned byte + * cccc 01xx xxxx xxxx xxxx xxxx xxxx xxxx + */ + DECODE_TABLE (0x0c000000, 0x04000000, arm_cccc_01xx_table), + + /* + * Block data transfer instructions + * cccc 100x xxxx xxxx xxxx xxxx xxxx xxxx + */ + DECODE_TABLE (0x0e000000, 0x08000000, arm_cccc_100x_table), + + /* B cccc 1010 xxxx xxxx xxxx xxxx xxxx xxxx */ + /* BL cccc 1011 xxxx xxxx xxxx xxxx xxxx xxxx */ + DECODE_SIMULATE (0x0e000000, 0x0a000000, PROBES_BRANCH), + + /* + * Supervisor Call, and coprocessor instructions + */ + + /* MCRR cccc 1100 0100 xxxx xxxx xxxx xxxx xxxx */ + /* MRRC cccc 1100 0101 xxxx xxxx xxxx xxxx xxxx */ + /* LDC cccc 110x xxx1 xxxx xxxx xxxx xxxx xxxx */ + /* STC cccc 110x xxx0 xxxx xxxx xxxx xxxx xxxx */ + /* CDP cccc 1110 xxxx xxxx xxxx xxxx xxx0 xxxx */ + /* MCR cccc 1110 xxx0 xxxx xxxx xxxx xxx1 xxxx */ + /* MRC cccc 1110 xxx1 xxxx xxxx xxxx xxx1 xxxx */ + /* SVC cccc 1111 xxxx xxxx xxxx xxxx xxxx xxxx */ + DECODE_REJECT (0x0c000000, 0x0c000000), + + DECODE_END +}; +#ifdef CONFIG_ARM_KPROBES_TEST_MODULE +EXPORT_SYMBOL_GPL(probes_decode_arm_table); +#endif + +static void __kprobes arm_singlestep(probes_opcode_t insn, + struct arch_probes_insn *asi, struct pt_regs *regs) +{ + regs->ARM_pc += 4; + asi->insn_handler(insn, asi, regs); +} + +/* Return: + * INSN_REJECTED If instruction is one not allowed to kprobe, + * INSN_GOOD If instruction is supported and uses instruction slot, + * INSN_GOOD_NO_SLOT If instruction is supported but doesn't use its slot. + * + * For instructions we don't want to kprobe (INSN_REJECTED return result): + * These are generally ones that modify the processor state making + * them "hard" to simulate such as switches processor modes or + * make accesses in alternate modes. Any of these could be simulated + * if the work was put into it, but low return considering they + * should also be very rare. + */ +enum probes_insn __kprobes +arm_probes_decode_insn(probes_opcode_t insn, struct arch_probes_insn *asi, + bool emulate, const union decode_action *actions) +{ + asi->insn_singlestep = arm_singlestep; + asi->insn_check_cc = probes_condition_checks[insn>>28]; + return probes_decode_insn(insn, asi, probes_decode_arm_table, false, + emulate, actions); +} diff --git a/arch/arm/kernel/probes-arm.h b/arch/arm/kernel/probes-arm.h new file mode 100644 index 00000000000..ace6572f6e2 --- /dev/null +++ b/arch/arm/kernel/probes-arm.h @@ -0,0 +1,73 @@ +/* + * arch/arm/kernel/probes-arm.h + * + * Copyright 2013 Linaro Ltd. + * Written by: David A. Long + * + * The code contained herein is licensed under the GNU General Public + * License. You may obtain a copy of the GNU General Public License + * Version 2 or later at the following locations: + * + * http://www.opensource.org/licenses/gpl-license.html + * http://www.gnu.org/copyleft/gpl.html + */ + +#ifndef _ARM_KERNEL_PROBES_ARM_H +#define _ARM_KERNEL_PROBES_ARM_H + +enum probes_arm_action { + PROBES_EMULATE_NONE, + PROBES_SIMULATE_NOP, + PROBES_PRELOAD_IMM, + PROBES_PRELOAD_REG, + PROBES_BRANCH_IMM, + PROBES_BRANCH_REG, + PROBES_MRS, + PROBES_CLZ, + PROBES_SATURATING_ARITHMETIC, + PROBES_MUL1, + PROBES_MUL2, + PROBES_SWP, + PROBES_LDRSTRD, + PROBES_LOAD, + PROBES_STORE, + PROBES_LOAD_EXTRA, + PROBES_STORE_EXTRA, + PROBES_MOV_IP_SP, + PROBES_DATA_PROCESSING_REG, + PROBES_DATA_PROCESSING_IMM, + PROBES_MOV_HALFWORD, + PROBES_SEV, + PROBES_WFE, + PROBES_SATURATE, + PROBES_REV, + PROBES_MMI, + PROBES_PACK, + PROBES_EXTEND, + PROBES_EXTEND_ADD, + PROBES_MUL_ADD_LONG, + PROBES_MUL_ADD, + PROBES_BITFIELD, + PROBES_BRANCH, + PROBES_LDMSTM, + NUM_PROBES_ARM_ACTIONS +}; + +void __kprobes simulate_bbl(probes_opcode_t opcode, + struct arch_probes_insn *asi, struct pt_regs *regs); +void __kprobes simulate_blx1(probes_opcode_t opcode, + struct arch_probes_insn *asi, struct pt_regs *regs); +void __kprobes simulate_blx2bx(probes_opcode_t opcode, + struct arch_probes_insn *asi, struct pt_regs *regs); +void __kprobes simulate_mrs(probes_opcode_t opcode, + struct arch_probes_insn *asi, struct pt_regs *regs); +void __kprobes simulate_mov_ipsp(probes_opcode_t opcode, + struct arch_probes_insn *asi, struct pt_regs *regs); + +extern const union decode_item probes_decode_arm_table[]; + +enum probes_insn arm_probes_decode_insn(probes_opcode_t, + struct arch_probes_insn *, bool emulate, + const union decode_action *actions); + +#endif diff --git a/arch/arm/kernel/probes-thumb.c b/arch/arm/kernel/probes-thumb.c new file mode 100644 index 00000000000..4131351e812 --- /dev/null +++ b/arch/arm/kernel/probes-thumb.c @@ -0,0 +1,882 @@ +/* + * arch/arm/kernel/probes-thumb.c + * + * Copyright (C) 2011 Jon Medhurst <tixy@yxit.co.uk>. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/stddef.h> +#include <linux/kernel.h> +#include <linux/module.h> + +#include "probes.h" +#include "probes-thumb.h" + + +static const union decode_item t32_table_1110_100x_x0xx[] = { + /* Load/store multiple instructions */ + + /* Rn is PC 1110 100x x0xx 1111 xxxx xxxx xxxx xxxx */ + DECODE_REJECT (0xfe4f0000, 0xe80f0000), + + /* SRS 1110 1000 00x0 xxxx xxxx xxxx xxxx xxxx */ + /* RFE 1110 1000 00x1 xxxx xxxx xxxx xxxx xxxx */ + DECODE_REJECT (0xffc00000, 0xe8000000), + /* SRS 1110 1001 10x0 xxxx xxxx xxxx xxxx xxxx */ + /* RFE 1110 1001 10x1 xxxx xxxx xxxx xxxx xxxx */ + DECODE_REJECT (0xffc00000, 0xe9800000), + + /* STM Rn, {...pc} 1110 100x x0x0 xxxx 1xxx xxxx xxxx xxxx */ + DECODE_REJECT (0xfe508000, 0xe8008000), + /* LDM Rn, {...lr,pc} 1110 100x x0x1 xxxx 11xx xxxx xxxx xxxx */ + DECODE_REJECT (0xfe50c000, 0xe810c000), + /* LDM/STM Rn, {...sp} 1110 100x x0xx xxxx xx1x xxxx xxxx xxxx */ + DECODE_REJECT (0xfe402000, 0xe8002000), + + /* STMIA 1110 1000 10x0 xxxx xxxx xxxx xxxx xxxx */ + /* LDMIA 1110 1000 10x1 xxxx xxxx xxxx xxxx xxxx */ + /* STMDB 1110 1001 00x0 xxxx xxxx xxxx xxxx xxxx */ + /* LDMDB 1110 1001 00x1 xxxx xxxx xxxx xxxx xxxx */ + DECODE_CUSTOM (0xfe400000, 0xe8000000, PROBES_T32_LDMSTM), + + DECODE_END +}; + +static const union decode_item t32_table_1110_100x_x1xx[] = { + /* Load/store dual, load/store exclusive, table branch */ + + /* STRD (immediate) 1110 1000 x110 xxxx xxxx xxxx xxxx xxxx */ + /* LDRD (immediate) 1110 1000 x111 xxxx xxxx xxxx xxxx xxxx */ + DECODE_OR (0xff600000, 0xe8600000), + /* STRD (immediate) 1110 1001 x1x0 xxxx xxxx xxxx xxxx xxxx */ + /* LDRD (immediate) 1110 1001 x1x1 xxxx xxxx xxxx xxxx xxxx */ + DECODE_EMULATEX (0xff400000, 0xe9400000, PROBES_T32_LDRDSTRD, + REGS(NOPCWB, NOSPPC, NOSPPC, 0, 0)), + + /* TBB 1110 1000 1101 xxxx xxxx xxxx 0000 xxxx */ + /* TBH 1110 1000 1101 xxxx xxxx xxxx 0001 xxxx */ + DECODE_SIMULATEX(0xfff000e0, 0xe8d00000, PROBES_T32_TABLE_BRANCH, + REGS(NOSP, 0, 0, 0, NOSPPC)), + + /* STREX 1110 1000 0100 xxxx xxxx xxxx xxxx xxxx */ + /* LDREX 1110 1000 0101 xxxx xxxx xxxx xxxx xxxx */ + /* STREXB 1110 1000 1100 xxxx xxxx xxxx 0100 xxxx */ + /* STREXH 1110 1000 1100 xxxx xxxx xxxx 0101 xxxx */ + /* STREXD 1110 1000 1100 xxxx xxxx xxxx 0111 xxxx */ + /* LDREXB 1110 1000 1101 xxxx xxxx xxxx 0100 xxxx */ + /* LDREXH 1110 1000 1101 xxxx xxxx xxxx 0101 xxxx */ + /* LDREXD 1110 1000 1101 xxxx xxxx xxxx 0111 xxxx */ + /* And unallocated instructions... */ + DECODE_END +}; + +static const union decode_item t32_table_1110_101x[] = { + /* Data-processing (shifted register) */ + + /* TST 1110 1010 0001 xxxx xxxx 1111 xxxx xxxx */ + /* TEQ 1110 1010 1001 xxxx xxxx 1111 xxxx xxxx */ + DECODE_EMULATEX (0xff700f00, 0xea100f00, PROBES_T32_TST, + REGS(NOSPPC, 0, 0, 0, NOSPPC)), + + /* CMN 1110 1011 0001 xxxx xxxx 1111 xxxx xxxx */ + DECODE_OR (0xfff00f00, 0xeb100f00), + /* CMP 1110 1011 1011 xxxx xxxx 1111 xxxx xxxx */ + DECODE_EMULATEX (0xfff00f00, 0xebb00f00, PROBES_T32_TST, + REGS(NOPC, 0, 0, 0, NOSPPC)), + + /* MOV 1110 1010 010x 1111 xxxx xxxx xxxx xxxx */ + /* MVN 1110 1010 011x 1111 xxxx xxxx xxxx xxxx */ + DECODE_EMULATEX (0xffcf0000, 0xea4f0000, PROBES_T32_MOV, + REGS(0, 0, NOSPPC, 0, NOSPPC)), + + /* ??? 1110 1010 101x xxxx xxxx xxxx xxxx xxxx */ + /* ??? 1110 1010 111x xxxx xxxx xxxx xxxx xxxx */ + DECODE_REJECT (0xffa00000, 0xeaa00000), + /* ??? 1110 1011 001x xxxx xxxx xxxx xxxx xxxx */ + DECODE_REJECT (0xffe00000, 0xeb200000), + /* ??? 1110 1011 100x xxxx xxxx xxxx xxxx xxxx */ + DECODE_REJECT (0xffe00000, 0xeb800000), + /* ??? 1110 1011 111x xxxx xxxx xxxx xxxx xxxx */ + DECODE_REJECT (0xffe00000, 0xebe00000), + + /* ADD/SUB SP, SP, Rm, LSL #0..3 */ + /* 1110 1011 x0xx 1101 x000 1101 xx00 xxxx */ + DECODE_EMULATEX (0xff4f7f30, 0xeb0d0d00, PROBES_T32_ADDSUB, + REGS(SP, 0, SP, 0, NOSPPC)), + + /* ADD/SUB SP, SP, Rm, shift */ + /* 1110 1011 x0xx 1101 xxxx 1101 xxxx xxxx */ + DECODE_REJECT (0xff4f0f00, 0xeb0d0d00), + + /* ADD/SUB Rd, SP, Rm, shift */ + /* 1110 1011 x0xx 1101 xxxx xxxx xxxx xxxx */ + DECODE_EMULATEX (0xff4f0000, 0xeb0d0000, PROBES_T32_ADDSUB, + REGS(SP, 0, NOPC, 0, NOSPPC)), + + /* AND 1110 1010 000x xxxx xxxx xxxx xxxx xxxx */ + /* BIC 1110 1010 001x xxxx xxxx xxxx xxxx xxxx */ + /* ORR 1110 1010 010x xxxx xxxx xxxx xxxx xxxx */ + /* ORN 1110 1010 011x xxxx xxxx xxxx xxxx xxxx */ + /* EOR 1110 1010 100x xxxx xxxx xxxx xxxx xxxx */ + /* PKH 1110 1010 110x xxxx xxxx xxxx xxxx xxxx */ + /* ADD 1110 1011 000x xxxx xxxx xxxx xxxx xxxx */ + /* ADC 1110 1011 010x xxxx xxxx xxxx xxxx xxxx */ + /* SBC 1110 1011 011x xxxx xxxx xxxx xxxx xxxx */ + /* SUB 1110 1011 101x xxxx xxxx xxxx xxxx xxxx */ + /* RSB 1110 1011 110x xxxx xxxx xxxx xxxx xxxx */ + DECODE_EMULATEX (0xfe000000, 0xea000000, PROBES_T32_LOGICAL, + REGS(NOSPPC, 0, NOSPPC, 0, NOSPPC)), + + DECODE_END +}; + +static const union decode_item t32_table_1111_0x0x___0[] = { + /* Data-processing (modified immediate) */ + + /* TST 1111 0x00 0001 xxxx 0xxx 1111 xxxx xxxx */ + /* TEQ 1111 0x00 1001 xxxx 0xxx 1111 xxxx xxxx */ + DECODE_EMULATEX (0xfb708f00, 0xf0100f00, PROBES_T32_TST, + REGS(NOSPPC, 0, 0, 0, 0)), + + /* CMN 1111 0x01 0001 xxxx 0xxx 1111 xxxx xxxx */ + DECODE_OR (0xfbf08f00, 0xf1100f00), + /* CMP 1111 0x01 1011 xxxx 0xxx 1111 xxxx xxxx */ + DECODE_EMULATEX (0xfbf08f00, 0xf1b00f00, PROBES_T32_CMP, + REGS(NOPC, 0, 0, 0, 0)), + + /* MOV 1111 0x00 010x 1111 0xxx xxxx xxxx xxxx */ + /* MVN 1111 0x00 011x 1111 0xxx xxxx xxxx xxxx */ + DECODE_EMULATEX (0xfbcf8000, 0xf04f0000, PROBES_T32_MOV, + REGS(0, 0, NOSPPC, 0, 0)), + + /* ??? 1111 0x00 101x xxxx 0xxx xxxx xxxx xxxx */ + DECODE_REJECT (0xfbe08000, 0xf0a00000), + /* ??? 1111 0x00 110x xxxx 0xxx xxxx xxxx xxxx */ + /* ??? 1111 0x00 111x xxxx 0xxx xxxx xxxx xxxx */ + DECODE_REJECT (0xfbc08000, 0xf0c00000), + /* ??? 1111 0x01 001x xxxx 0xxx xxxx xxxx xxxx */ + DECODE_REJECT (0xfbe08000, 0xf1200000), + /* ??? 1111 0x01 100x xxxx 0xxx xxxx xxxx xxxx */ + DECODE_REJECT (0xfbe08000, 0xf1800000), + /* ??? 1111 0x01 111x xxxx 0xxx xxxx xxxx xxxx */ + DECODE_REJECT (0xfbe08000, 0xf1e00000), + + /* ADD Rd, SP, #imm 1111 0x01 000x 1101 0xxx xxxx xxxx xxxx */ + /* SUB Rd, SP, #imm 1111 0x01 101x 1101 0xxx xxxx xxxx xxxx */ + DECODE_EMULATEX (0xfb4f8000, 0xf10d0000, PROBES_T32_ADDSUB, + REGS(SP, 0, NOPC, 0, 0)), + + /* AND 1111 0x00 000x xxxx 0xxx xxxx xxxx xxxx */ + /* BIC 1111 0x00 001x xxxx 0xxx xxxx xxxx xxxx */ + /* ORR 1111 0x00 010x xxxx 0xxx xxxx xxxx xxxx */ + /* ORN 1111 0x00 011x xxxx 0xxx xxxx xxxx xxxx */ + /* EOR 1111 0x00 100x xxxx 0xxx xxxx xxxx xxxx */ + /* ADD 1111 0x01 000x xxxx 0xxx xxxx xxxx xxxx */ + /* ADC 1111 0x01 010x xxxx 0xxx xxxx xxxx xxxx */ + /* SBC 1111 0x01 011x xxxx 0xxx xxxx xxxx xxxx */ + /* SUB 1111 0x01 101x xxxx 0xxx xxxx xxxx xxxx */ + /* RSB 1111 0x01 110x xxxx 0xxx xxxx xxxx xxxx */ + DECODE_EMULATEX (0xfa008000, 0xf0000000, PROBES_T32_LOGICAL, + REGS(NOSPPC, 0, NOSPPC, 0, 0)), + + DECODE_END +}; + +static const union decode_item t32_table_1111_0x1x___0[] = { + /* Data-processing (plain binary immediate) */ + + /* ADDW Rd, PC, #imm 1111 0x10 0000 1111 0xxx xxxx xxxx xxxx */ + DECODE_OR (0xfbff8000, 0xf20f0000), + /* SUBW Rd, PC, #imm 1111 0x10 1010 1111 0xxx xxxx xxxx xxxx */ + DECODE_EMULATEX (0xfbff8000, 0xf2af0000, PROBES_T32_ADDWSUBW_PC, + REGS(PC, 0, NOSPPC, 0, 0)), + + /* ADDW SP, SP, #imm 1111 0x10 0000 1101 0xxx 1101 xxxx xxxx */ + DECODE_OR (0xfbff8f00, 0xf20d0d00), + /* SUBW SP, SP, #imm 1111 0x10 1010 1101 0xxx 1101 xxxx xxxx */ + DECODE_EMULATEX (0xfbff8f00, 0xf2ad0d00, PROBES_T32_ADDWSUBW, + REGS(SP, 0, SP, 0, 0)), + + /* ADDW 1111 0x10 0000 xxxx 0xxx xxxx xxxx xxxx */ + DECODE_OR (0xfbf08000, 0xf2000000), + /* SUBW 1111 0x10 1010 xxxx 0xxx xxxx xxxx xxxx */ + DECODE_EMULATEX (0xfbf08000, 0xf2a00000, PROBES_T32_ADDWSUBW, + REGS(NOPCX, 0, NOSPPC, 0, 0)), + + /* MOVW 1111 0x10 0100 xxxx 0xxx xxxx xxxx xxxx */ + /* MOVT 1111 0x10 1100 xxxx 0xxx xxxx xxxx xxxx */ + DECODE_EMULATEX (0xfb708000, 0xf2400000, PROBES_T32_MOVW, + REGS(0, 0, NOSPPC, 0, 0)), + + /* SSAT16 1111 0x11 0010 xxxx 0000 xxxx 00xx xxxx */ + /* SSAT 1111 0x11 00x0 xxxx 0xxx xxxx xxxx xxxx */ + /* USAT16 1111 0x11 1010 xxxx 0000 xxxx 00xx xxxx */ + /* USAT 1111 0x11 10x0 xxxx 0xxx xxxx xxxx xxxx */ + DECODE_EMULATEX (0xfb508000, 0xf3000000, PROBES_T32_SAT, + REGS(NOSPPC, 0, NOSPPC, 0, 0)), + + /* SFBX 1111 0x11 0100 xxxx 0xxx xxxx xxxx xxxx */ + /* UFBX 1111 0x11 1100 xxxx 0xxx xxxx xxxx xxxx */ + DECODE_EMULATEX (0xfb708000, 0xf3400000, PROBES_T32_BITFIELD, + REGS(NOSPPC, 0, NOSPPC, 0, 0)), + + /* BFC 1111 0x11 0110 1111 0xxx xxxx xxxx xxxx */ + DECODE_EMULATEX (0xfbff8000, 0xf36f0000, PROBES_T32_BITFIELD, + REGS(0, 0, NOSPPC, 0, 0)), + + /* BFI 1111 0x11 0110 xxxx 0xxx xxxx xxxx xxxx */ + DECODE_EMULATEX (0xfbf08000, 0xf3600000, PROBES_T32_BITFIELD, + REGS(NOSPPCX, 0, NOSPPC, 0, 0)), + + DECODE_END +}; + +static const union decode_item t32_table_1111_0xxx___1[] = { + /* Branches and miscellaneous control */ + + /* YIELD 1111 0011 1010 xxxx 10x0 x000 0000 0001 */ + DECODE_OR (0xfff0d7ff, 0xf3a08001), + /* SEV 1111 0011 1010 xxxx 10x0 x000 0000 0100 */ + DECODE_EMULATE (0xfff0d7ff, 0xf3a08004, PROBES_T32_SEV), + /* NOP 1111 0011 1010 xxxx 10x0 x000 0000 0000 */ + /* WFE 1111 0011 1010 xxxx 10x0 x000 0000 0010 */ + /* WFI 1111 0011 1010 xxxx 10x0 x000 0000 0011 */ + DECODE_SIMULATE (0xfff0d7fc, 0xf3a08000, PROBES_T32_WFE), + + /* MRS Rd, CPSR 1111 0011 1110 xxxx 10x0 xxxx xxxx xxxx */ + DECODE_SIMULATEX(0xfff0d000, 0xf3e08000, PROBES_T32_MRS, + REGS(0, 0, NOSPPC, 0, 0)), + + /* + * Unsupported instructions + * 1111 0x11 1xxx xxxx 10x0 xxxx xxxx xxxx + * + * MSR 1111 0011 100x xxxx 10x0 xxxx xxxx xxxx + * DBG hint 1111 0011 1010 xxxx 10x0 x000 1111 xxxx + * Unallocated hints 1111 0011 1010 xxxx 10x0 x000 xxxx xxxx + * CPS 1111 0011 1010 xxxx 10x0 xxxx xxxx xxxx + * CLREX/DSB/DMB/ISB 1111 0011 1011 xxxx 10x0 xxxx xxxx xxxx + * BXJ 1111 0011 1100 xxxx 10x0 xxxx xxxx xxxx + * SUBS PC,LR,#<imm8> 1111 0011 1101 xxxx 10x0 xxxx xxxx xxxx + * MRS Rd, SPSR 1111 0011 1111 xxxx 10x0 xxxx xxxx xxxx + * SMC 1111 0111 1111 xxxx 1000 xxxx xxxx xxxx + * UNDEFINED 1111 0111 1111 xxxx 1010 xxxx xxxx xxxx + * ??? 1111 0111 1xxx xxxx 1010 xxxx xxxx xxxx + */ + DECODE_REJECT (0xfb80d000, 0xf3808000), + + /* Bcc 1111 0xxx xxxx xxxx 10x0 xxxx xxxx xxxx */ + DECODE_CUSTOM (0xf800d000, 0xf0008000, PROBES_T32_BRANCH_COND), + + /* BLX 1111 0xxx xxxx xxxx 11x0 xxxx xxxx xxx0 */ + DECODE_OR (0xf800d001, 0xf000c000), + /* B 1111 0xxx xxxx xxxx 10x1 xxxx xxxx xxxx */ + /* BL 1111 0xxx xxxx xxxx 11x1 xxxx xxxx xxxx */ + DECODE_SIMULATE (0xf8009000, 0xf0009000, PROBES_T32_BRANCH), + + DECODE_END +}; + +static const union decode_item t32_table_1111_100x_x0x1__1111[] = { + /* Memory hints */ + + /* PLD (literal) 1111 1000 x001 1111 1111 xxxx xxxx xxxx */ + /* PLI (literal) 1111 1001 x001 1111 1111 xxxx xxxx xxxx */ + DECODE_SIMULATE (0xfe7ff000, 0xf81ff000, PROBES_T32_PLDI), + + /* PLD{W} (immediate) 1111 1000 10x1 xxxx 1111 xxxx xxxx xxxx */ + DECODE_OR (0xffd0f000, 0xf890f000), + /* PLD{W} (immediate) 1111 1000 00x1 xxxx 1111 1100 xxxx xxxx */ + DECODE_OR (0xffd0ff00, 0xf810fc00), + /* PLI (immediate) 1111 1001 1001 xxxx 1111 xxxx xxxx xxxx */ + DECODE_OR (0xfff0f000, 0xf990f000), + /* PLI (immediate) 1111 1001 0001 xxxx 1111 1100 xxxx xxxx */ + DECODE_SIMULATEX(0xfff0ff00, 0xf910fc00, PROBES_T32_PLDI, + REGS(NOPCX, 0, 0, 0, 0)), + + /* PLD{W} (register) 1111 1000 00x1 xxxx 1111 0000 00xx xxxx */ + DECODE_OR (0xffd0ffc0, 0xf810f000), + /* PLI (register) 1111 1001 0001 xxxx 1111 0000 00xx xxxx */ + DECODE_SIMULATEX(0xfff0ffc0, 0xf910f000, PROBES_T32_PLDI, + REGS(NOPCX, 0, 0, 0, NOSPPC)), + + /* Other unallocated instructions... */ + DECODE_END +}; + +static const union decode_item t32_table_1111_100x[] = { + /* Store/Load single data item */ + + /* ??? 1111 100x x11x xxxx xxxx xxxx xxxx xxxx */ + DECODE_REJECT (0xfe600000, 0xf8600000), + + /* ??? 1111 1001 0101 xxxx xxxx xxxx xxxx xxxx */ + DECODE_REJECT (0xfff00000, 0xf9500000), + + /* ??? 1111 100x 0xxx xxxx xxxx 10x0 xxxx xxxx */ + DECODE_REJECT (0xfe800d00, 0xf8000800), + + /* STRBT 1111 1000 0000 xxxx xxxx 1110 xxxx xxxx */ + /* STRHT 1111 1000 0010 xxxx xxxx 1110 xxxx xxxx */ + /* STRT 1111 1000 0100 xxxx xxxx 1110 xxxx xxxx */ + /* LDRBT 1111 1000 0001 xxxx xxxx 1110 xxxx xxxx */ + /* LDRSBT 1111 1001 0001 xxxx xxxx 1110 xxxx xxxx */ + /* LDRHT 1111 1000 0011 xxxx xxxx 1110 xxxx xxxx */ + /* LDRSHT 1111 1001 0011 xxxx xxxx 1110 xxxx xxxx */ + /* LDRT 1111 1000 0101 xxxx xxxx 1110 xxxx xxxx */ + DECODE_REJECT (0xfe800f00, 0xf8000e00), + + /* STR{,B,H} Rn,[PC...] 1111 1000 xxx0 1111 xxxx xxxx xxxx xxxx */ + DECODE_REJECT (0xff1f0000, 0xf80f0000), + + /* STR{,B,H} PC,[Rn...] 1111 1000 xxx0 xxxx 1111 xxxx xxxx xxxx */ + DECODE_REJECT (0xff10f000, 0xf800f000), + + /* LDR (literal) 1111 1000 x101 1111 xxxx xxxx xxxx xxxx */ + DECODE_SIMULATEX(0xff7f0000, 0xf85f0000, PROBES_T32_LDR_LIT, + REGS(PC, ANY, 0, 0, 0)), + + /* STR (immediate) 1111 1000 0100 xxxx xxxx 1xxx xxxx xxxx */ + /* LDR (immediate) 1111 1000 0101 xxxx xxxx 1xxx xxxx xxxx */ + DECODE_OR (0xffe00800, 0xf8400800), + /* STR (immediate) 1111 1000 1100 xxxx xxxx xxxx xxxx xxxx */ + /* LDR (immediate) 1111 1000 1101 xxxx xxxx xxxx xxxx xxxx */ + DECODE_EMULATEX (0xffe00000, 0xf8c00000, PROBES_T32_LDRSTR, + REGS(NOPCX, ANY, 0, 0, 0)), + + /* STR (register) 1111 1000 0100 xxxx xxxx 0000 00xx xxxx */ + /* LDR (register) 1111 1000 0101 xxxx xxxx 0000 00xx xxxx */ + DECODE_EMULATEX (0xffe00fc0, 0xf8400000, PROBES_T32_LDRSTR, + REGS(NOPCX, ANY, 0, 0, NOSPPC)), + + /* LDRB (literal) 1111 1000 x001 1111 xxxx xxxx xxxx xxxx */ + /* LDRSB (literal) 1111 1001 x001 1111 xxxx xxxx xxxx xxxx */ + /* LDRH (literal) 1111 1000 x011 1111 xxxx xxxx xxxx xxxx */ + /* LDRSH (literal) 1111 1001 x011 1111 xxxx xxxx xxxx xxxx */ + DECODE_SIMULATEX(0xfe5f0000, 0xf81f0000, PROBES_T32_LDR_LIT, + REGS(PC, NOSPPCX, 0, 0, 0)), + + /* STRB (immediate) 1111 1000 0000 xxxx xxxx 1xxx xxxx xxxx */ + /* STRH (immediate) 1111 1000 0010 xxxx xxxx 1xxx xxxx xxxx */ + /* LDRB (immediate) 1111 1000 0001 xxxx xxxx 1xxx xxxx xxxx */ + /* LDRSB (immediate) 1111 1001 0001 xxxx xxxx 1xxx xxxx xxxx */ + /* LDRH (immediate) 1111 1000 0011 xxxx xxxx 1xxx xxxx xxxx */ + /* LDRSH (immediate) 1111 1001 0011 xxxx xxxx 1xxx xxxx xxxx */ + DECODE_OR (0xfec00800, 0xf8000800), + /* STRB (immediate) 1111 1000 1000 xxxx xxxx xxxx xxxx xxxx */ + /* STRH (immediate) 1111 1000 1010 xxxx xxxx xxxx xxxx xxxx */ + /* LDRB (immediate) 1111 1000 1001 xxxx xxxx xxxx xxxx xxxx */ + /* LDRSB (immediate) 1111 1001 1001 xxxx xxxx xxxx xxxx xxxx */ + /* LDRH (immediate) 1111 1000 1011 xxxx xxxx xxxx xxxx xxxx */ + /* LDRSH (immediate) 1111 1001 1011 xxxx xxxx xxxx xxxx xxxx */ + DECODE_EMULATEX (0xfec00000, 0xf8800000, PROBES_T32_LDRSTR, + REGS(NOPCX, NOSPPCX, 0, 0, 0)), + + /* STRB (register) 1111 1000 0000 xxxx xxxx 0000 00xx xxxx */ + /* STRH (register) 1111 1000 0010 xxxx xxxx 0000 00xx xxxx */ + /* LDRB (register) 1111 1000 0001 xxxx xxxx 0000 00xx xxxx */ + /* LDRSB (register) 1111 1001 0001 xxxx xxxx 0000 00xx xxxx */ + /* LDRH (register) 1111 1000 0011 xxxx xxxx 0000 00xx xxxx */ + /* LDRSH (register) 1111 1001 0011 xxxx xxxx 0000 00xx xxxx */ + DECODE_EMULATEX (0xfe800fc0, 0xf8000000, PROBES_T32_LDRSTR, + REGS(NOPCX, NOSPPCX, 0, 0, NOSPPC)), + + /* Other unallocated instructions... */ + DECODE_END +}; + +static const union decode_item t32_table_1111_1010___1111[] = { + /* Data-processing (register) */ + + /* ??? 1111 1010 011x xxxx 1111 xxxx 1xxx xxxx */ + DECODE_REJECT (0xffe0f080, 0xfa60f080), + + /* SXTH 1111 1010 0000 1111 1111 xxxx 1xxx xxxx */ + /* UXTH 1111 1010 0001 1111 1111 xxxx 1xxx xxxx */ + /* SXTB16 1111 1010 0010 1111 1111 xxxx 1xxx xxxx */ + /* UXTB16 1111 1010 0011 1111 1111 xxxx 1xxx xxxx */ + /* SXTB 1111 1010 0100 1111 1111 xxxx 1xxx xxxx */ + /* UXTB 1111 1010 0101 1111 1111 xxxx 1xxx xxxx */ + DECODE_EMULATEX (0xff8ff080, 0xfa0ff080, PROBES_T32_SIGN_EXTEND, + REGS(0, 0, NOSPPC, 0, NOSPPC)), + + + /* ??? 1111 1010 1xxx xxxx 1111 xxxx 0x11 xxxx */ + DECODE_REJECT (0xff80f0b0, 0xfa80f030), + /* ??? 1111 1010 1x11 xxxx 1111 xxxx 0xxx xxxx */ + DECODE_REJECT (0xffb0f080, 0xfab0f000), + + /* SADD16 1111 1010 1001 xxxx 1111 xxxx 0000 xxxx */ + /* SASX 1111 1010 1010 xxxx 1111 xxxx 0000 xxxx */ + /* SSAX 1111 1010 1110 xxxx 1111 xxxx 0000 xxxx */ + /* SSUB16 1111 1010 1101 xxxx 1111 xxxx 0000 xxxx */ + /* SADD8 1111 1010 1000 xxxx 1111 xxxx 0000 xxxx */ + /* SSUB8 1111 1010 1100 xxxx 1111 xxxx 0000 xxxx */ + + /* QADD16 1111 1010 1001 xxxx 1111 xxxx 0001 xxxx */ + /* QASX 1111 1010 1010 xxxx 1111 xxxx 0001 xxxx */ + /* QSAX 1111 1010 1110 xxxx 1111 xxxx 0001 xxxx */ + /* QSUB16 1111 1010 1101 xxxx 1111 xxxx 0001 xxxx */ + /* QADD8 1111 1010 1000 xxxx 1111 xxxx 0001 xxxx */ + /* QSUB8 1111 1010 1100 xxxx 1111 xxxx 0001 xxxx */ + + /* SHADD16 1111 1010 1001 xxxx 1111 xxxx 0010 xxxx */ + /* SHASX 1111 1010 1010 xxxx 1111 xxxx 0010 xxxx */ + /* SHSAX 1111 1010 1110 xxxx 1111 xxxx 0010 xxxx */ + /* SHSUB16 1111 1010 1101 xxxx 1111 xxxx 0010 xxxx */ + /* SHADD8 1111 1010 1000 xxxx 1111 xxxx 0010 xxxx */ + /* SHSUB8 1111 1010 1100 xxxx 1111 xxxx 0010 xxxx */ + + /* UADD16 1111 1010 1001 xxxx 1111 xxxx 0100 xxxx */ + /* UASX 1111 1010 1010 xxxx 1111 xxxx 0100 xxxx */ + /* USAX 1111 1010 1110 xxxx 1111 xxxx 0100 xxxx */ + /* USUB16 1111 1010 1101 xxxx 1111 xxxx 0100 xxxx */ + /* UADD8 1111 1010 1000 xxxx 1111 xxxx 0100 xxxx */ + /* USUB8 1111 1010 1100 xxxx 1111 xxxx 0100 xxxx */ + + /* UQADD16 1111 1010 1001 xxxx 1111 xxxx 0101 xxxx */ + /* UQASX 1111 1010 1010 xxxx 1111 xxxx 0101 xxxx */ + /* UQSAX 1111 1010 1110 xxxx 1111 xxxx 0101 xxxx */ + /* UQSUB16 1111 1010 1101 xxxx 1111 xxxx 0101 xxxx */ + /* UQADD8 1111 1010 1000 xxxx 1111 xxxx 0101 xxxx */ + /* UQSUB8 1111 1010 1100 xxxx 1111 xxxx 0101 xxxx */ + + /* UHADD16 1111 1010 1001 xxxx 1111 xxxx 0110 xxxx */ + /* UHASX 1111 1010 1010 xxxx 1111 xxxx 0110 xxxx */ + /* UHSAX 1111 1010 1110 xxxx 1111 xxxx 0110 xxxx */ + /* UHSUB16 1111 1010 1101 xxxx 1111 xxxx 0110 xxxx */ + /* UHADD8 1111 1010 1000 xxxx 1111 xxxx 0110 xxxx */ + /* UHSUB8 1111 1010 1100 xxxx 1111 xxxx 0110 xxxx */ + DECODE_OR (0xff80f080, 0xfa80f000), + + /* SXTAH 1111 1010 0000 xxxx 1111 xxxx 1xxx xxxx */ + /* UXTAH 1111 1010 0001 xxxx 1111 xxxx 1xxx xxxx */ + /* SXTAB16 1111 1010 0010 xxxx 1111 xxxx 1xxx xxxx */ + /* UXTAB16 1111 1010 0011 xxxx 1111 xxxx 1xxx xxxx */ + /* SXTAB 1111 1010 0100 xxxx 1111 xxxx 1xxx xxxx */ + /* UXTAB 1111 1010 0101 xxxx 1111 xxxx 1xxx xxxx */ + DECODE_OR (0xff80f080, 0xfa00f080), + + /* QADD 1111 1010 1000 xxxx 1111 xxxx 1000 xxxx */ + /* QDADD 1111 1010 1000 xxxx 1111 xxxx 1001 xxxx */ + /* QSUB 1111 1010 1000 xxxx 1111 xxxx 1010 xxxx */ + /* QDSUB 1111 1010 1000 xxxx 1111 xxxx 1011 xxxx */ + DECODE_OR (0xfff0f0c0, 0xfa80f080), + + /* SEL 1111 1010 1010 xxxx 1111 xxxx 1000 xxxx */ + DECODE_OR (0xfff0f0f0, 0xfaa0f080), + + /* LSL 1111 1010 000x xxxx 1111 xxxx 0000 xxxx */ + /* LSR 1111 1010 001x xxxx 1111 xxxx 0000 xxxx */ + /* ASR 1111 1010 010x xxxx 1111 xxxx 0000 xxxx */ + /* ROR 1111 1010 011x xxxx 1111 xxxx 0000 xxxx */ + DECODE_EMULATEX (0xff80f0f0, 0xfa00f000, PROBES_T32_MEDIA, + REGS(NOSPPC, 0, NOSPPC, 0, NOSPPC)), + + /* CLZ 1111 1010 1010 xxxx 1111 xxxx 1000 xxxx */ + DECODE_OR (0xfff0f0f0, 0xfab0f080), + + /* REV 1111 1010 1001 xxxx 1111 xxxx 1000 xxxx */ + /* REV16 1111 1010 1001 xxxx 1111 xxxx 1001 xxxx */ + /* RBIT 1111 1010 1001 xxxx 1111 xxxx 1010 xxxx */ + /* REVSH 1111 1010 1001 xxxx 1111 xxxx 1011 xxxx */ + DECODE_EMULATEX (0xfff0f0c0, 0xfa90f080, PROBES_T32_REVERSE, + REGS(NOSPPC, 0, NOSPPC, 0, SAMEAS16)), + + /* Other unallocated instructions... */ + DECODE_END +}; + +static const union decode_item t32_table_1111_1011_0[] = { + /* Multiply, multiply accumulate, and absolute difference */ + + /* ??? 1111 1011 0000 xxxx 1111 xxxx 0001 xxxx */ + DECODE_REJECT (0xfff0f0f0, 0xfb00f010), + /* ??? 1111 1011 0111 xxxx 1111 xxxx 0001 xxxx */ + DECODE_REJECT (0xfff0f0f0, 0xfb70f010), + + /* SMULxy 1111 1011 0001 xxxx 1111 xxxx 00xx xxxx */ + DECODE_OR (0xfff0f0c0, 0xfb10f000), + /* MUL 1111 1011 0000 xxxx 1111 xxxx 0000 xxxx */ + /* SMUAD{X} 1111 1011 0010 xxxx 1111 xxxx 000x xxxx */ + /* SMULWy 1111 1011 0011 xxxx 1111 xxxx 000x xxxx */ + /* SMUSD{X} 1111 1011 0100 xxxx 1111 xxxx 000x xxxx */ + /* SMMUL{R} 1111 1011 0101 xxxx 1111 xxxx 000x xxxx */ + /* USAD8 1111 1011 0111 xxxx 1111 xxxx 0000 xxxx */ + DECODE_EMULATEX (0xff80f0e0, 0xfb00f000, PROBES_T32_MUL_ADD, + REGS(NOSPPC, 0, NOSPPC, 0, NOSPPC)), + + /* ??? 1111 1011 0111 xxxx xxxx xxxx 0001 xxxx */ + DECODE_REJECT (0xfff000f0, 0xfb700010), + + /* SMLAxy 1111 1011 0001 xxxx xxxx xxxx 00xx xxxx */ + DECODE_OR (0xfff000c0, 0xfb100000), + /* MLA 1111 1011 0000 xxxx xxxx xxxx 0000 xxxx */ + /* MLS 1111 1011 0000 xxxx xxxx xxxx 0001 xxxx */ + /* SMLAD{X} 1111 1011 0010 xxxx xxxx xxxx 000x xxxx */ + /* SMLAWy 1111 1011 0011 xxxx xxxx xxxx 000x xxxx */ + /* SMLSD{X} 1111 1011 0100 xxxx xxxx xxxx 000x xxxx */ + /* SMMLA{R} 1111 1011 0101 xxxx xxxx xxxx 000x xxxx */ + /* SMMLS{R} 1111 1011 0110 xxxx xxxx xxxx 000x xxxx */ + /* USADA8 1111 1011 0111 xxxx xxxx xxxx 0000 xxxx */ + DECODE_EMULATEX (0xff8000c0, 0xfb000000, PROBES_T32_MUL_ADD2, + REGS(NOSPPC, NOSPPCX, NOSPPC, 0, NOSPPC)), + + /* Other unallocated instructions... */ + DECODE_END +}; + +static const union decode_item t32_table_1111_1011_1[] = { + /* Long multiply, long multiply accumulate, and divide */ + + /* UMAAL 1111 1011 1110 xxxx xxxx xxxx 0110 xxxx */ + DECODE_OR (0xfff000f0, 0xfbe00060), + /* SMLALxy 1111 1011 1100 xxxx xxxx xxxx 10xx xxxx */ + DECODE_OR (0xfff000c0, 0xfbc00080), + /* SMLALD{X} 1111 1011 1100 xxxx xxxx xxxx 110x xxxx */ + /* SMLSLD{X} 1111 1011 1101 xxxx xxxx xxxx 110x xxxx */ + DECODE_OR (0xffe000e0, 0xfbc000c0), + /* SMULL 1111 1011 1000 xxxx xxxx xxxx 0000 xxxx */ + /* UMULL 1111 1011 1010 xxxx xxxx xxxx 0000 xxxx */ + /* SMLAL 1111 1011 1100 xxxx xxxx xxxx 0000 xxxx */ + /* UMLAL 1111 1011 1110 xxxx xxxx xxxx 0000 xxxx */ + DECODE_EMULATEX (0xff9000f0, 0xfb800000, PROBES_T32_MUL_ADD_LONG, + REGS(NOSPPC, NOSPPC, NOSPPC, 0, NOSPPC)), + + /* SDIV 1111 1011 1001 xxxx xxxx xxxx 1111 xxxx */ + /* UDIV 1111 1011 1011 xxxx xxxx xxxx 1111 xxxx */ + /* Other unallocated instructions... */ + DECODE_END +}; + +const union decode_item probes_decode_thumb32_table[] = { + + /* + * Load/store multiple instructions + * 1110 100x x0xx xxxx xxxx xxxx xxxx xxxx + */ + DECODE_TABLE (0xfe400000, 0xe8000000, t32_table_1110_100x_x0xx), + + /* + * Load/store dual, load/store exclusive, table branch + * 1110 100x x1xx xxxx xxxx xxxx xxxx xxxx + */ + DECODE_TABLE (0xfe400000, 0xe8400000, t32_table_1110_100x_x1xx), + + /* + * Data-processing (shifted register) + * 1110 101x xxxx xxxx xxxx xxxx xxxx xxxx + */ + DECODE_TABLE (0xfe000000, 0xea000000, t32_table_1110_101x), + + /* + * Coprocessor instructions + * 1110 11xx xxxx xxxx xxxx xxxx xxxx xxxx + */ + DECODE_REJECT (0xfc000000, 0xec000000), + + /* + * Data-processing (modified immediate) + * 1111 0x0x xxxx xxxx 0xxx xxxx xxxx xxxx + */ + DECODE_TABLE (0xfa008000, 0xf0000000, t32_table_1111_0x0x___0), + + /* + * Data-processing (plain binary immediate) + * 1111 0x1x xxxx xxxx 0xxx xxxx xxxx xxxx + */ + DECODE_TABLE (0xfa008000, 0xf2000000, t32_table_1111_0x1x___0), + + /* + * Branches and miscellaneous control + * 1111 0xxx xxxx xxxx 1xxx xxxx xxxx xxxx + */ + DECODE_TABLE (0xf8008000, 0xf0008000, t32_table_1111_0xxx___1), + + /* + * Advanced SIMD element or structure load/store instructions + * 1111 1001 xxx0 xxxx xxxx xxxx xxxx xxxx + */ + DECODE_REJECT (0xff100000, 0xf9000000), + + /* + * Memory hints + * 1111 100x x0x1 xxxx 1111 xxxx xxxx xxxx + */ + DECODE_TABLE (0xfe50f000, 0xf810f000, t32_table_1111_100x_x0x1__1111), + + /* + * Store single data item + * 1111 1000 xxx0 xxxx xxxx xxxx xxxx xxxx + * Load single data items + * 1111 100x xxx1 xxxx xxxx xxxx xxxx xxxx + */ + DECODE_TABLE (0xfe000000, 0xf8000000, t32_table_1111_100x), + + /* + * Data-processing (register) + * 1111 1010 xxxx xxxx 1111 xxxx xxxx xxxx + */ + DECODE_TABLE (0xff00f000, 0xfa00f000, t32_table_1111_1010___1111), + + /* + * Multiply, multiply accumulate, and absolute difference + * 1111 1011 0xxx xxxx xxxx xxxx xxxx xxxx + */ + DECODE_TABLE (0xff800000, 0xfb000000, t32_table_1111_1011_0), + + /* + * Long multiply, long multiply accumulate, and divide + * 1111 1011 1xxx xxxx xxxx xxxx xxxx xxxx + */ + DECODE_TABLE (0xff800000, 0xfb800000, t32_table_1111_1011_1), + + /* + * Coprocessor instructions + * 1111 11xx xxxx xxxx xxxx xxxx xxxx xxxx + */ + DECODE_END +}; +#ifdef CONFIG_ARM_KPROBES_TEST_MODULE +EXPORT_SYMBOL_GPL(probes_decode_thumb32_table); +#endif + +static const union decode_item t16_table_1011[] = { + /* Miscellaneous 16-bit instructions */ + + /* ADD (SP plus immediate) 1011 0000 0xxx xxxx */ + /* SUB (SP minus immediate) 1011 0000 1xxx xxxx */ + DECODE_SIMULATE (0xff00, 0xb000, PROBES_T16_ADD_SP), + + /* CBZ 1011 00x1 xxxx xxxx */ + /* CBNZ 1011 10x1 xxxx xxxx */ + DECODE_SIMULATE (0xf500, 0xb100, PROBES_T16_CBZ), + + /* SXTH 1011 0010 00xx xxxx */ + /* SXTB 1011 0010 01xx xxxx */ + /* UXTH 1011 0010 10xx xxxx */ + /* UXTB 1011 0010 11xx xxxx */ + /* REV 1011 1010 00xx xxxx */ + /* REV16 1011 1010 01xx xxxx */ + /* ??? 1011 1010 10xx xxxx */ + /* REVSH 1011 1010 11xx xxxx */ + DECODE_REJECT (0xffc0, 0xba80), + DECODE_EMULATE (0xf500, 0xb000, PROBES_T16_SIGN_EXTEND), + + /* PUSH 1011 010x xxxx xxxx */ + DECODE_CUSTOM (0xfe00, 0xb400, PROBES_T16_PUSH), + /* POP 1011 110x xxxx xxxx */ + DECODE_CUSTOM (0xfe00, 0xbc00, PROBES_T16_POP), + + /* + * If-Then, and hints + * 1011 1111 xxxx xxxx + */ + + /* YIELD 1011 1111 0001 0000 */ + DECODE_OR (0xffff, 0xbf10), + /* SEV 1011 1111 0100 0000 */ + DECODE_EMULATE (0xffff, 0xbf40, PROBES_T16_SEV), + /* NOP 1011 1111 0000 0000 */ + /* WFE 1011 1111 0010 0000 */ + /* WFI 1011 1111 0011 0000 */ + DECODE_SIMULATE (0xffcf, 0xbf00, PROBES_T16_WFE), + /* Unassigned hints 1011 1111 xxxx 0000 */ + DECODE_REJECT (0xff0f, 0xbf00), + /* IT 1011 1111 xxxx xxxx */ + DECODE_CUSTOM (0xff00, 0xbf00, PROBES_T16_IT), + + /* SETEND 1011 0110 010x xxxx */ + /* CPS 1011 0110 011x xxxx */ + /* BKPT 1011 1110 xxxx xxxx */ + /* And unallocated instructions... */ + DECODE_END +}; + +const union decode_item probes_decode_thumb16_table[] = { + + /* + * Shift (immediate), add, subtract, move, and compare + * 00xx xxxx xxxx xxxx + */ + + /* CMP (immediate) 0010 1xxx xxxx xxxx */ + DECODE_EMULATE (0xf800, 0x2800, PROBES_T16_CMP), + + /* ADD (register) 0001 100x xxxx xxxx */ + /* SUB (register) 0001 101x xxxx xxxx */ + /* LSL (immediate) 0000 0xxx xxxx xxxx */ + /* LSR (immediate) 0000 1xxx xxxx xxxx */ + /* ASR (immediate) 0001 0xxx xxxx xxxx */ + /* ADD (immediate, Thumb) 0001 110x xxxx xxxx */ + /* SUB (immediate, Thumb) 0001 111x xxxx xxxx */ + /* MOV (immediate) 0010 0xxx xxxx xxxx */ + /* ADD (immediate, Thumb) 0011 0xxx xxxx xxxx */ + /* SUB (immediate, Thumb) 0011 1xxx xxxx xxxx */ + DECODE_EMULATE (0xc000, 0x0000, PROBES_T16_ADDSUB), + + /* + * 16-bit Thumb data-processing instructions + * 0100 00xx xxxx xxxx + */ + + /* TST (register) 0100 0010 00xx xxxx */ + DECODE_EMULATE (0xffc0, 0x4200, PROBES_T16_CMP), + /* CMP (register) 0100 0010 10xx xxxx */ + /* CMN (register) 0100 0010 11xx xxxx */ + DECODE_EMULATE (0xff80, 0x4280, PROBES_T16_CMP), + /* AND (register) 0100 0000 00xx xxxx */ + /* EOR (register) 0100 0000 01xx xxxx */ + /* LSL (register) 0100 0000 10xx xxxx */ + /* LSR (register) 0100 0000 11xx xxxx */ + /* ASR (register) 0100 0001 00xx xxxx */ + /* ADC (register) 0100 0001 01xx xxxx */ + /* SBC (register) 0100 0001 10xx xxxx */ + /* ROR (register) 0100 0001 11xx xxxx */ + /* RSB (immediate) 0100 0010 01xx xxxx */ + /* ORR (register) 0100 0011 00xx xxxx */ + /* MUL 0100 0011 00xx xxxx */ + /* BIC (register) 0100 0011 10xx xxxx */ + /* MVN (register) 0100 0011 10xx xxxx */ + DECODE_EMULATE (0xfc00, 0x4000, PROBES_T16_LOGICAL), + + /* + * Special data instructions and branch and exchange + * 0100 01xx xxxx xxxx + */ + + /* BLX pc 0100 0111 1111 1xxx */ + DECODE_REJECT (0xfff8, 0x47f8), + + /* BX (register) 0100 0111 0xxx xxxx */ + /* BLX (register) 0100 0111 1xxx xxxx */ + DECODE_SIMULATE (0xff00, 0x4700, PROBES_T16_BLX), + + /* ADD pc, pc 0100 0100 1111 1111 */ + DECODE_REJECT (0xffff, 0x44ff), + + /* ADD (register) 0100 0100 xxxx xxxx */ + /* CMP (register) 0100 0101 xxxx xxxx */ + /* MOV (register) 0100 0110 xxxx xxxx */ + DECODE_CUSTOM (0xfc00, 0x4400, PROBES_T16_HIREGOPS), + + /* + * Load from Literal Pool + * LDR (literal) 0100 1xxx xxxx xxxx + */ + DECODE_SIMULATE (0xf800, 0x4800, PROBES_T16_LDR_LIT), + + /* + * 16-bit Thumb Load/store instructions + * 0101 xxxx xxxx xxxx + * 011x xxxx xxxx xxxx + * 100x xxxx xxxx xxxx + */ + + /* STR (register) 0101 000x xxxx xxxx */ + /* STRH (register) 0101 001x xxxx xxxx */ + /* STRB (register) 0101 010x xxxx xxxx */ + /* LDRSB (register) 0101 011x xxxx xxxx */ + /* LDR (register) 0101 100x xxxx xxxx */ + /* LDRH (register) 0101 101x xxxx xxxx */ + /* LDRB (register) 0101 110x xxxx xxxx */ + /* LDRSH (register) 0101 111x xxxx xxxx */ + /* STR (immediate, Thumb) 0110 0xxx xxxx xxxx */ + /* LDR (immediate, Thumb) 0110 1xxx xxxx xxxx */ + /* STRB (immediate, Thumb) 0111 0xxx xxxx xxxx */ + /* LDRB (immediate, Thumb) 0111 1xxx xxxx xxxx */ + DECODE_EMULATE (0xc000, 0x4000, PROBES_T16_LDRHSTRH), + /* STRH (immediate, Thumb) 1000 0xxx xxxx xxxx */ + /* LDRH (immediate, Thumb) 1000 1xxx xxxx xxxx */ + DECODE_EMULATE (0xf000, 0x8000, PROBES_T16_LDRHSTRH), + /* STR (immediate, Thumb) 1001 0xxx xxxx xxxx */ + /* LDR (immediate, Thumb) 1001 1xxx xxxx xxxx */ + DECODE_SIMULATE (0xf000, 0x9000, PROBES_T16_LDRSTR), + + /* + * Generate PC-/SP-relative address + * ADR (literal) 1010 0xxx xxxx xxxx + * ADD (SP plus immediate) 1010 1xxx xxxx xxxx + */ + DECODE_SIMULATE (0xf000, 0xa000, PROBES_T16_ADR), + + /* + * Miscellaneous 16-bit instructions + * 1011 xxxx xxxx xxxx + */ + DECODE_TABLE (0xf000, 0xb000, t16_table_1011), + + /* STM 1100 0xxx xxxx xxxx */ + /* LDM 1100 1xxx xxxx xxxx */ + DECODE_EMULATE (0xf000, 0xc000, PROBES_T16_LDMSTM), + + /* + * Conditional branch, and Supervisor Call + */ + + /* Permanently UNDEFINED 1101 1110 xxxx xxxx */ + /* SVC 1101 1111 xxxx xxxx */ + DECODE_REJECT (0xfe00, 0xde00), + + /* Conditional branch 1101 xxxx xxxx xxxx */ + DECODE_CUSTOM (0xf000, 0xd000, PROBES_T16_BRANCH_COND), + + /* + * Unconditional branch + * B 1110 0xxx xxxx xxxx + */ + DECODE_SIMULATE (0xf800, 0xe000, PROBES_T16_BRANCH), + + DECODE_END +}; +#ifdef CONFIG_ARM_KPROBES_TEST_MODULE +EXPORT_SYMBOL_GPL(probes_decode_thumb16_table); +#endif + +static unsigned long __kprobes thumb_check_cc(unsigned long cpsr) +{ + if (unlikely(in_it_block(cpsr))) + return probes_condition_checks[current_cond(cpsr)](cpsr); + return true; +} + +static void __kprobes thumb16_singlestep(probes_opcode_t opcode, + struct arch_probes_insn *asi, + struct pt_regs *regs) +{ + regs->ARM_pc += 2; + asi->insn_handler(opcode, asi, regs); + regs->ARM_cpsr = it_advance(regs->ARM_cpsr); +} + +static void __kprobes thumb32_singlestep(probes_opcode_t opcode, + struct arch_probes_insn *asi, + struct pt_regs *regs) +{ + regs->ARM_pc += 4; + asi->insn_handler(opcode, asi, regs); + regs->ARM_cpsr = it_advance(regs->ARM_cpsr); +} + +enum probes_insn __kprobes +thumb16_probes_decode_insn(probes_opcode_t insn, struct arch_probes_insn *asi, + bool emulate, const union decode_action *actions) +{ + asi->insn_singlestep = thumb16_singlestep; + asi->insn_check_cc = thumb_check_cc; + return probes_decode_insn(insn, asi, probes_decode_thumb16_table, true, + emulate, actions); +} + +enum probes_insn __kprobes +thumb32_probes_decode_insn(probes_opcode_t insn, struct arch_probes_insn *asi, + bool emulate, const union decode_action *actions) +{ + asi->insn_singlestep = thumb32_singlestep; + asi->insn_check_cc = thumb_check_cc; + return probes_decode_insn(insn, asi, probes_decode_thumb32_table, true, + emulate, actions); +} diff --git a/arch/arm/kernel/probes-thumb.h b/arch/arm/kernel/probes-thumb.h new file mode 100644 index 00000000000..7c6f6ebe514 --- /dev/null +++ b/arch/arm/kernel/probes-thumb.h @@ -0,0 +1,97 @@ +/* + * arch/arm/kernel/probes-thumb.h + * + * Copyright 2013 Linaro Ltd. + * Written by: David A. Long + * + * The code contained herein is licensed under the GNU General Public + * License. You may obtain a copy of the GNU General Public License + * Version 2 or later at the following locations: + * + * http://www.opensource.org/licenses/gpl-license.html + * http://www.gnu.org/copyleft/gpl.html + */ + +#ifndef _ARM_KERNEL_PROBES_THUMB_H +#define _ARM_KERNEL_PROBES_THUMB_H + +/* + * True if current instruction is in an IT block. + */ +#define in_it_block(cpsr) ((cpsr & 0x06000c00) != 0x00000000) + +/* + * Return the condition code to check for the currently executing instruction. + * This is in ITSTATE<7:4> which is in CPSR<15:12> but is only valid if + * in_it_block returns true. + */ +#define current_cond(cpsr) ((cpsr >> 12) & 0xf) + +enum probes_t32_action { + PROBES_T32_EMULATE_NONE, + PROBES_T32_SIMULATE_NOP, + PROBES_T32_LDMSTM, + PROBES_T32_LDRDSTRD, + PROBES_T32_TABLE_BRANCH, + PROBES_T32_TST, + PROBES_T32_CMP, + PROBES_T32_MOV, + PROBES_T32_ADDSUB, + PROBES_T32_LOGICAL, + PROBES_T32_ADDWSUBW_PC, + PROBES_T32_ADDWSUBW, + PROBES_T32_MOVW, + PROBES_T32_SAT, + PROBES_T32_BITFIELD, + PROBES_T32_SEV, + PROBES_T32_WFE, + PROBES_T32_MRS, + PROBES_T32_BRANCH_COND, + PROBES_T32_BRANCH, + PROBES_T32_PLDI, + PROBES_T32_LDR_LIT, + PROBES_T32_LDRSTR, + PROBES_T32_SIGN_EXTEND, + PROBES_T32_MEDIA, + PROBES_T32_REVERSE, + PROBES_T32_MUL_ADD, + PROBES_T32_MUL_ADD2, + PROBES_T32_MUL_ADD_LONG, + NUM_PROBES_T32_ACTIONS +}; + +enum probes_t16_action { + PROBES_T16_ADD_SP, + PROBES_T16_CBZ, + PROBES_T16_SIGN_EXTEND, + PROBES_T16_PUSH, + PROBES_T16_POP, + PROBES_T16_SEV, + PROBES_T16_WFE, + PROBES_T16_IT, + PROBES_T16_CMP, + PROBES_T16_ADDSUB, + PROBES_T16_LOGICAL, + PROBES_T16_BLX, + PROBES_T16_HIREGOPS, + PROBES_T16_LDR_LIT, + PROBES_T16_LDRHSTRH, + PROBES_T16_LDRSTR, + PROBES_T16_ADR, + PROBES_T16_LDMSTM, + PROBES_T16_BRANCH_COND, + PROBES_T16_BRANCH, + NUM_PROBES_T16_ACTIONS +}; + +extern const union decode_item probes_decode_thumb32_table[]; +extern const union decode_item probes_decode_thumb16_table[]; + +enum probes_insn __kprobes +thumb16_probes_decode_insn(probes_opcode_t insn, struct arch_probes_insn *asi, + bool emulate, const union decode_action *actions); +enum probes_insn __kprobes +thumb32_probes_decode_insn(probes_opcode_t insn, struct arch_probes_insn *asi, + bool emulate, const union decode_action *actions); + +#endif diff --git a/arch/arm/kernel/probes.c b/arch/arm/kernel/probes.c new file mode 100644 index 00000000000..a8ab540d7e7 --- /dev/null +++ b/arch/arm/kernel/probes.c @@ -0,0 +1,456 @@ +/* + * arch/arm/kernel/probes.c + * + * Copyright (C) 2011 Jon Medhurst <tixy@yxit.co.uk>. + * + * Some contents moved here from arch/arm/include/asm/kprobes-arm.c which is + * Copyright (C) 2006, 2007 Motorola Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <asm/system_info.h> +#include <asm/ptrace.h> +#include <linux/bug.h> + +#include "probes.h" + + +#ifndef find_str_pc_offset + +/* + * For STR and STM instructions, an ARM core may choose to use either + * a +8 or a +12 displacement from the current instruction's address. + * Whichever value is chosen for a given core, it must be the same for + * both instructions and may not change. This function measures it. + */ + +int str_pc_offset; + +void __init find_str_pc_offset(void) +{ + int addr, scratch, ret; + + __asm__ ( + "sub %[ret], pc, #4 \n\t" + "str pc, %[addr] \n\t" + "ldr %[scr], %[addr] \n\t" + "sub %[ret], %[scr], %[ret] \n\t" + : [ret] "=r" (ret), [scr] "=r" (scratch), [addr] "+m" (addr)); + + str_pc_offset = ret; +} + +#endif /* !find_str_pc_offset */ + + +#ifndef test_load_write_pc_interworking + +bool load_write_pc_interworks; + +void __init test_load_write_pc_interworking(void) +{ + int arch = cpu_architecture(); + BUG_ON(arch == CPU_ARCH_UNKNOWN); + load_write_pc_interworks = arch >= CPU_ARCH_ARMv5T; +} + +#endif /* !test_load_write_pc_interworking */ + + +#ifndef test_alu_write_pc_interworking + +bool alu_write_pc_interworks; + +void __init test_alu_write_pc_interworking(void) +{ + int arch = cpu_architecture(); + BUG_ON(arch == CPU_ARCH_UNKNOWN); + alu_write_pc_interworks = arch >= CPU_ARCH_ARMv7; +} + +#endif /* !test_alu_write_pc_interworking */ + + +void __init arm_probes_decode_init(void) +{ + find_str_pc_offset(); + test_load_write_pc_interworking(); + test_alu_write_pc_interworking(); +} + + +static unsigned long __kprobes __check_eq(unsigned long cpsr) +{ + return cpsr & PSR_Z_BIT; +} + +static unsigned long __kprobes __check_ne(unsigned long cpsr) +{ + return (~cpsr) & PSR_Z_BIT; +} + +static unsigned long __kprobes __check_cs(unsigned long cpsr) +{ + return cpsr & PSR_C_BIT; +} + +static unsigned long __kprobes __check_cc(unsigned long cpsr) +{ + return (~cpsr) & PSR_C_BIT; +} + +static unsigned long __kprobes __check_mi(unsigned long cpsr) +{ + return cpsr & PSR_N_BIT; +} + +static unsigned long __kprobes __check_pl(unsigned long cpsr) +{ + return (~cpsr) & PSR_N_BIT; +} + +static unsigned long __kprobes __check_vs(unsigned long cpsr) +{ + return cpsr & PSR_V_BIT; +} + +static unsigned long __kprobes __check_vc(unsigned long cpsr) +{ + return (~cpsr) & PSR_V_BIT; +} + +static unsigned long __kprobes __check_hi(unsigned long cpsr) +{ + cpsr &= ~(cpsr >> 1); /* PSR_C_BIT &= ~PSR_Z_BIT */ + return cpsr & PSR_C_BIT; +} + +static unsigned long __kprobes __check_ls(unsigned long cpsr) +{ + cpsr &= ~(cpsr >> 1); /* PSR_C_BIT &= ~PSR_Z_BIT */ + return (~cpsr) & PSR_C_BIT; +} + +static unsigned long __kprobes __check_ge(unsigned long cpsr) +{ + cpsr ^= (cpsr << 3); /* PSR_N_BIT ^= PSR_V_BIT */ + return (~cpsr) & PSR_N_BIT; +} + +static unsigned long __kprobes __check_lt(unsigned long cpsr) +{ + cpsr ^= (cpsr << 3); /* PSR_N_BIT ^= PSR_V_BIT */ + return cpsr & PSR_N_BIT; +} + +static unsigned long __kprobes __check_gt(unsigned long cpsr) +{ + unsigned long temp = cpsr ^ (cpsr << 3); /* PSR_N_BIT ^= PSR_V_BIT */ + temp |= (cpsr << 1); /* PSR_N_BIT |= PSR_Z_BIT */ + return (~temp) & PSR_N_BIT; +} + +static unsigned long __kprobes __check_le(unsigned long cpsr) +{ + unsigned long temp = cpsr ^ (cpsr << 3); /* PSR_N_BIT ^= PSR_V_BIT */ + temp |= (cpsr << 1); /* PSR_N_BIT |= PSR_Z_BIT */ + return temp & PSR_N_BIT; +} + +static unsigned long __kprobes __check_al(unsigned long cpsr) +{ + return true; +} + +probes_check_cc * const probes_condition_checks[16] = { + &__check_eq, &__check_ne, &__check_cs, &__check_cc, + &__check_mi, &__check_pl, &__check_vs, &__check_vc, + &__check_hi, &__check_ls, &__check_ge, &__check_lt, + &__check_gt, &__check_le, &__check_al, &__check_al +}; + + +void __kprobes probes_simulate_nop(probes_opcode_t opcode, + struct arch_probes_insn *asi, + struct pt_regs *regs) +{ +} + +void __kprobes probes_emulate_none(probes_opcode_t opcode, + struct arch_probes_insn *asi, + struct pt_regs *regs) +{ + asi->insn_fn(); +} + +/* + * Prepare an instruction slot to receive an instruction for emulating. + * This is done by placing a subroutine return after the location where the + * instruction will be placed. We also modify ARM instructions to be + * unconditional as the condition code will already be checked before any + * emulation handler is called. + */ +static probes_opcode_t __kprobes +prepare_emulated_insn(probes_opcode_t insn, struct arch_probes_insn *asi, + bool thumb) +{ +#ifdef CONFIG_THUMB2_KERNEL + if (thumb) { + u16 *thumb_insn = (u16 *)asi->insn; + /* Thumb bx lr */ + thumb_insn[1] = __opcode_to_mem_thumb16(0x4770); + thumb_insn[2] = __opcode_to_mem_thumb16(0x4770); + return insn; + } + asi->insn[1] = __opcode_to_mem_arm(0xe12fff1e); /* ARM bx lr */ +#else + asi->insn[1] = __opcode_to_mem_arm(0xe1a0f00e); /* mov pc, lr */ +#endif + /* Make an ARM instruction unconditional */ + if (insn < 0xe0000000) + insn = (insn | 0xe0000000) & ~0x10000000; + return insn; +} + +/* + * Write a (probably modified) instruction into the slot previously prepared by + * prepare_emulated_insn + */ +static void __kprobes +set_emulated_insn(probes_opcode_t insn, struct arch_probes_insn *asi, + bool thumb) +{ +#ifdef CONFIG_THUMB2_KERNEL + if (thumb) { + u16 *ip = (u16 *)asi->insn; + if (is_wide_instruction(insn)) + *ip++ = __opcode_to_mem_thumb16(insn >> 16); + *ip++ = __opcode_to_mem_thumb16(insn); + return; + } +#endif + asi->insn[0] = __opcode_to_mem_arm(insn); +} + +/* + * When we modify the register numbers encoded in an instruction to be emulated, + * the new values come from this define. For ARM and 32-bit Thumb instructions + * this gives... + * + * bit position 16 12 8 4 0 + * ---------------+---+---+---+---+---+ + * register r2 r0 r1 -- r3 + */ +#define INSN_NEW_BITS 0x00020103 + +/* Each nibble has same value as that at INSN_NEW_BITS bit 16 */ +#define INSN_SAMEAS16_BITS 0x22222222 + +/* + * Validate and modify each of the registers encoded in an instruction. + * + * Each nibble in regs contains a value from enum decode_reg_type. For each + * non-zero value, the corresponding nibble in pinsn is validated and modified + * according to the type. + */ +static bool __kprobes decode_regs(probes_opcode_t *pinsn, u32 regs, bool modify) +{ + probes_opcode_t insn = *pinsn; + probes_opcode_t mask = 0xf; /* Start at least significant nibble */ + + for (; regs != 0; regs >>= 4, mask <<= 4) { + + probes_opcode_t new_bits = INSN_NEW_BITS; + + switch (regs & 0xf) { + + case REG_TYPE_NONE: + /* Nibble not a register, skip to next */ + continue; + + case REG_TYPE_ANY: + /* Any register is allowed */ + break; + + case REG_TYPE_SAMEAS16: + /* Replace register with same as at bit position 16 */ + new_bits = INSN_SAMEAS16_BITS; + break; + + case REG_TYPE_SP: + /* Only allow SP (R13) */ + if ((insn ^ 0xdddddddd) & mask) + goto reject; + break; + + case REG_TYPE_PC: + /* Only allow PC (R15) */ + if ((insn ^ 0xffffffff) & mask) + goto reject; + break; + + case REG_TYPE_NOSP: + /* Reject SP (R13) */ + if (((insn ^ 0xdddddddd) & mask) == 0) + goto reject; + break; + + case REG_TYPE_NOSPPC: + case REG_TYPE_NOSPPCX: + /* Reject SP and PC (R13 and R15) */ + if (((insn ^ 0xdddddddd) & 0xdddddddd & mask) == 0) + goto reject; + break; + + case REG_TYPE_NOPCWB: + if (!is_writeback(insn)) + break; /* No writeback, so any register is OK */ + /* fall through... */ + case REG_TYPE_NOPC: + case REG_TYPE_NOPCX: + /* Reject PC (R15) */ + if (((insn ^ 0xffffffff) & mask) == 0) + goto reject; + break; + } + + /* Replace value of nibble with new register number... */ + insn &= ~mask; + insn |= new_bits & mask; + } + + if (modify) + *pinsn = insn; + + return true; + +reject: + return false; +} + +static const int decode_struct_sizes[NUM_DECODE_TYPES] = { + [DECODE_TYPE_TABLE] = sizeof(struct decode_table), + [DECODE_TYPE_CUSTOM] = sizeof(struct decode_custom), + [DECODE_TYPE_SIMULATE] = sizeof(struct decode_simulate), + [DECODE_TYPE_EMULATE] = sizeof(struct decode_emulate), + [DECODE_TYPE_OR] = sizeof(struct decode_or), + [DECODE_TYPE_REJECT] = sizeof(struct decode_reject) +}; + +/* + * probes_decode_insn operates on data tables in order to decode an ARM + * architecture instruction onto which a kprobe has been placed. + * + * These instruction decoding tables are a concatenation of entries each + * of which consist of one of the following structs: + * + * decode_table + * decode_custom + * decode_simulate + * decode_emulate + * decode_or + * decode_reject + * + * Each of these starts with a struct decode_header which has the following + * fields: + * + * type_regs + * mask + * value + * + * The least significant DECODE_TYPE_BITS of type_regs contains a value + * from enum decode_type, this indicates which of the decode_* structs + * the entry contains. The value DECODE_TYPE_END indicates the end of the + * table. + * + * When the table is parsed, each entry is checked in turn to see if it + * matches the instruction to be decoded using the test: + * + * (insn & mask) == value + * + * If no match is found before the end of the table is reached then decoding + * fails with INSN_REJECTED. + * + * When a match is found, decode_regs() is called to validate and modify each + * of the registers encoded in the instruction; the data it uses to do this + * is (type_regs >> DECODE_TYPE_BITS). A validation failure will cause decoding + * to fail with INSN_REJECTED. + * + * Once the instruction has passed the above tests, further processing + * depends on the type of the table entry's decode struct. + * + */ +int __kprobes +probes_decode_insn(probes_opcode_t insn, struct arch_probes_insn *asi, + const union decode_item *table, bool thumb, + bool emulate, const union decode_action *actions) +{ + const struct decode_header *h = (struct decode_header *)table; + const struct decode_header *next; + bool matched = false; + + if (emulate) + insn = prepare_emulated_insn(insn, asi, thumb); + + for (;; h = next) { + enum decode_type type = h->type_regs.bits & DECODE_TYPE_MASK; + u32 regs = h->type_regs.bits >> DECODE_TYPE_BITS; + + if (type == DECODE_TYPE_END) + return INSN_REJECTED; + + next = (struct decode_header *) + ((uintptr_t)h + decode_struct_sizes[type]); + + if (!matched && (insn & h->mask.bits) != h->value.bits) + continue; + + if (!decode_regs(&insn, regs, emulate)) + return INSN_REJECTED; + + switch (type) { + + case DECODE_TYPE_TABLE: { + struct decode_table *d = (struct decode_table *)h; + next = (struct decode_header *)d->table.table; + break; + } + + case DECODE_TYPE_CUSTOM: { + struct decode_custom *d = (struct decode_custom *)h; + return actions[d->decoder.action].decoder(insn, asi, h); + } + + case DECODE_TYPE_SIMULATE: { + struct decode_simulate *d = (struct decode_simulate *)h; + asi->insn_handler = actions[d->handler.action].handler; + return INSN_GOOD_NO_SLOT; + } + + case DECODE_TYPE_EMULATE: { + struct decode_emulate *d = (struct decode_emulate *)h; + + if (!emulate) + return actions[d->handler.action].decoder(insn, + asi, h); + + asi->insn_handler = actions[d->handler.action].handler; + set_emulated_insn(insn, asi, thumb); + return INSN_GOOD; + } + + case DECODE_TYPE_OR: + matched = true; + break; + + case DECODE_TYPE_REJECT: + default: + return INSN_REJECTED; + } + } +} diff --git a/arch/arm/kernel/probes.h b/arch/arm/kernel/probes.h new file mode 100644 index 00000000000..dba9f2466a9 --- /dev/null +++ b/arch/arm/kernel/probes.h @@ -0,0 +1,407 @@ +/* + * arch/arm/kernel/probes.h + * + * Copyright (C) 2011 Jon Medhurst <tixy@yxit.co.uk>. + * + * Some contents moved here from arch/arm/include/asm/kprobes.h which is + * Copyright (C) 2006, 2007 Motorola Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +#ifndef _ARM_KERNEL_PROBES_H +#define _ARM_KERNEL_PROBES_H + +#include <linux/types.h> +#include <linux/stddef.h> +#include <asm/probes.h> + +void __init arm_probes_decode_init(void); + +extern probes_check_cc * const probes_condition_checks[16]; + +#if __LINUX_ARM_ARCH__ >= 7 + +/* str_pc_offset is architecturally defined from ARMv7 onwards */ +#define str_pc_offset 8 +#define find_str_pc_offset() + +#else /* __LINUX_ARM_ARCH__ < 7 */ + +/* We need a run-time check to determine str_pc_offset */ +extern int str_pc_offset; +void __init find_str_pc_offset(void); + +#endif + + +/* + * Update ITSTATE after normal execution of an IT block instruction. + * + * The 8 IT state bits are split into two parts in CPSR: + * ITSTATE<1:0> are in CPSR<26:25> + * ITSTATE<7:2> are in CPSR<15:10> + */ +static inline unsigned long it_advance(unsigned long cpsr) + { + if ((cpsr & 0x06000400) == 0) { + /* ITSTATE<2:0> == 0 means end of IT block, so clear IT state */ + cpsr &= ~PSR_IT_MASK; + } else { + /* We need to shift left ITSTATE<4:0> */ + const unsigned long mask = 0x06001c00; /* Mask ITSTATE<4:0> */ + unsigned long it = cpsr & mask; + it <<= 1; + it |= it >> (27 - 10); /* Carry ITSTATE<2> to correct place */ + it &= mask; + cpsr &= ~mask; + cpsr |= it; + } + return cpsr; +} + +static inline void __kprobes bx_write_pc(long pcv, struct pt_regs *regs) +{ + long cpsr = regs->ARM_cpsr; + if (pcv & 0x1) { + cpsr |= PSR_T_BIT; + pcv &= ~0x1; + } else { + cpsr &= ~PSR_T_BIT; + pcv &= ~0x2; /* Avoid UNPREDICTABLE address allignment */ + } + regs->ARM_cpsr = cpsr; + regs->ARM_pc = pcv; +} + + +#if __LINUX_ARM_ARCH__ >= 6 + +/* Kernels built for >= ARMv6 should never run on <= ARMv5 hardware, so... */ +#define load_write_pc_interworks true +#define test_load_write_pc_interworking() + +#else /* __LINUX_ARM_ARCH__ < 6 */ + +/* We need run-time testing to determine if load_write_pc() should interwork. */ +extern bool load_write_pc_interworks; +void __init test_load_write_pc_interworking(void); + +#endif + +static inline void __kprobes load_write_pc(long pcv, struct pt_regs *regs) +{ + if (load_write_pc_interworks) + bx_write_pc(pcv, regs); + else + regs->ARM_pc = pcv; +} + + +#if __LINUX_ARM_ARCH__ >= 7 + +#define alu_write_pc_interworks true +#define test_alu_write_pc_interworking() + +#elif __LINUX_ARM_ARCH__ <= 5 + +/* Kernels built for <= ARMv5 should never run on >= ARMv6 hardware, so... */ +#define alu_write_pc_interworks false +#define test_alu_write_pc_interworking() + +#else /* __LINUX_ARM_ARCH__ == 6 */ + +/* We could be an ARMv6 binary on ARMv7 hardware so we need a run-time check. */ +extern bool alu_write_pc_interworks; +void __init test_alu_write_pc_interworking(void); + +#endif /* __LINUX_ARM_ARCH__ == 6 */ + +static inline void __kprobes alu_write_pc(long pcv, struct pt_regs *regs) +{ + if (alu_write_pc_interworks) + bx_write_pc(pcv, regs); + else + regs->ARM_pc = pcv; +} + + +/* + * Test if load/store instructions writeback the address register. + * if P (bit 24) == 0 or W (bit 21) == 1 + */ +#define is_writeback(insn) ((insn ^ 0x01000000) & 0x01200000) + +/* + * The following definitions and macros are used to build instruction + * decoding tables for use by probes_decode_insn. + * + * These tables are a concatenation of entries each of which consist of one of + * the decode_* structs. All of the fields in every type of decode structure + * are of the union type decode_item, therefore the entire decode table can be + * viewed as an array of these and declared like: + * + * static const union decode_item table_name[] = {}; + * + * In order to construct each entry in the table, macros are used to + * initialise a number of sequential decode_item values in a layout which + * matches the relevant struct. E.g. DECODE_SIMULATE initialise a struct + * decode_simulate by initialising four decode_item objects like this... + * + * {.bits = _type}, + * {.bits = _mask}, + * {.bits = _value}, + * {.action = _handler}, + * + * Initialising a specified member of the union means that the compiler + * will produce a warning if the argument is of an incorrect type. + * + * Below is a list of each of the macros used to initialise entries and a + * description of the action performed when that entry is matched to an + * instruction. A match is found when (instruction & mask) == value. + * + * DECODE_TABLE(mask, value, table) + * Instruction decoding jumps to parsing the new sub-table 'table'. + * + * DECODE_CUSTOM(mask, value, decoder) + * The value of 'decoder' is used as an index into the array of + * action functions, and the retrieved decoder function is invoked + * to complete decoding of the instruction. + * + * DECODE_SIMULATE(mask, value, handler) + * The probes instruction handler is set to the value found by + * indexing into the action array using the value of 'handler'. This + * will be used to simulate the instruction when the probe is hit. + * Decoding returns with INSN_GOOD_NO_SLOT. + * + * DECODE_EMULATE(mask, value, handler) + * The probes instruction handler is set to the value found by + * indexing into the action array using the value of 'handler'. This + * will be used to emulate the instruction when the probe is hit. The + * modified instruction (see below) is placed in the probes instruction + * slot so it may be called by the emulation code. Decoding returns + * with INSN_GOOD. + * + * DECODE_REJECT(mask, value) + * Instruction decoding fails with INSN_REJECTED + * + * DECODE_OR(mask, value) + * This allows the mask/value test of multiple table entries to be + * logically ORed. Once an 'or' entry is matched the decoding action to + * be performed is that of the next entry which isn't an 'or'. E.g. + * + * DECODE_OR (mask1, value1) + * DECODE_OR (mask2, value2) + * DECODE_SIMULATE (mask3, value3, simulation_handler) + * + * This means that if any of the three mask/value pairs match the + * instruction being decoded, then 'simulation_handler' will be used + * for it. + * + * Both the SIMULATE and EMULATE macros have a second form which take an + * additional 'regs' argument. + * + * DECODE_SIMULATEX(mask, value, handler, regs) + * DECODE_EMULATEX (mask, value, handler, regs) + * + * These are used to specify what kind of CPU register is encoded in each of the + * least significant 5 nibbles of the instruction being decoded. The regs value + * is specified using the REGS macro, this takes any of the REG_TYPE_* values + * from enum decode_reg_type as arguments; only the '*' part of the name is + * given. E.g. + * + * REGS(0, ANY, NOPC, 0, ANY) + * + * This indicates an instruction is encoded like: + * + * bits 19..16 ignore + * bits 15..12 any register allowed here + * bits 11.. 8 any register except PC allowed here + * bits 7.. 4 ignore + * bits 3.. 0 any register allowed here + * + * This register specification is checked after a decode table entry is found to + * match an instruction (through the mask/value test). Any invalid register then + * found in the instruction will cause decoding to fail with INSN_REJECTED. In + * the above example this would happen if bits 11..8 of the instruction were + * 1111, indicating R15 or PC. + * + * As well as checking for legal combinations of registers, this data is also + * used to modify the registers encoded in the instructions so that an + * emulation routines can use it. (See decode_regs() and INSN_NEW_BITS.) + * + * Here is a real example which matches ARM instructions of the form + * "AND <Rd>,<Rn>,<Rm>,<shift> <Rs>" + * + * DECODE_EMULATEX (0x0e000090, 0x00000010, PROBES_DATA_PROCESSING_REG, + * REGS(ANY, ANY, NOPC, 0, ANY)), + * ^ ^ ^ ^ + * Rn Rd Rs Rm + * + * Decoding the instruction "AND R4, R5, R6, ASL R15" will be rejected because + * Rs == R15 + * + * Decoding the instruction "AND R4, R5, R6, ASL R7" will be accepted and the + * instruction will be modified to "AND R0, R2, R3, ASL R1" and then placed into + * the kprobes instruction slot. This can then be called later by the handler + * function emulate_rd12rn16rm0rs8_rwflags (a pointer to which is retrieved from + * the indicated slot in the action array), in order to simulate the instruction. + */ + +enum decode_type { + DECODE_TYPE_END, + DECODE_TYPE_TABLE, + DECODE_TYPE_CUSTOM, + DECODE_TYPE_SIMULATE, + DECODE_TYPE_EMULATE, + DECODE_TYPE_OR, + DECODE_TYPE_REJECT, + NUM_DECODE_TYPES /* Must be last enum */ +}; + +#define DECODE_TYPE_BITS 4 +#define DECODE_TYPE_MASK ((1 << DECODE_TYPE_BITS) - 1) + +enum decode_reg_type { + REG_TYPE_NONE = 0, /* Not a register, ignore */ + REG_TYPE_ANY, /* Any register allowed */ + REG_TYPE_SAMEAS16, /* Register should be same as that at bits 19..16 */ + REG_TYPE_SP, /* Register must be SP */ + REG_TYPE_PC, /* Register must be PC */ + REG_TYPE_NOSP, /* Register must not be SP */ + REG_TYPE_NOSPPC, /* Register must not be SP or PC */ + REG_TYPE_NOPC, /* Register must not be PC */ + REG_TYPE_NOPCWB, /* No PC if load/store write-back flag also set */ + + /* The following types are used when the encoding for PC indicates + * another instruction form. This distiction only matters for test + * case coverage checks. + */ + REG_TYPE_NOPCX, /* Register must not be PC */ + REG_TYPE_NOSPPCX, /* Register must not be SP or PC */ + + /* Alias to allow '0' arg to be used in REGS macro. */ + REG_TYPE_0 = REG_TYPE_NONE +}; + +#define REGS(r16, r12, r8, r4, r0) \ + (((REG_TYPE_##r16) << 16) + \ + ((REG_TYPE_##r12) << 12) + \ + ((REG_TYPE_##r8) << 8) + \ + ((REG_TYPE_##r4) << 4) + \ + (REG_TYPE_##r0)) + +union decode_item { + u32 bits; + const union decode_item *table; + int action; +}; + +struct decode_header; +typedef enum probes_insn (probes_custom_decode_t)(probes_opcode_t, + struct arch_probes_insn *, + const struct decode_header *); + +union decode_action { + probes_insn_handler_t *handler; + probes_custom_decode_t *decoder; +}; + +#define DECODE_END \ + {.bits = DECODE_TYPE_END} + + +struct decode_header { + union decode_item type_regs; + union decode_item mask; + union decode_item value; +}; + +#define DECODE_HEADER(_type, _mask, _value, _regs) \ + {.bits = (_type) | ((_regs) << DECODE_TYPE_BITS)}, \ + {.bits = (_mask)}, \ + {.bits = (_value)} + + +struct decode_table { + struct decode_header header; + union decode_item table; +}; + +#define DECODE_TABLE(_mask, _value, _table) \ + DECODE_HEADER(DECODE_TYPE_TABLE, _mask, _value, 0), \ + {.table = (_table)} + + +struct decode_custom { + struct decode_header header; + union decode_item decoder; +}; + +#define DECODE_CUSTOM(_mask, _value, _decoder) \ + DECODE_HEADER(DECODE_TYPE_CUSTOM, _mask, _value, 0), \ + {.action = (_decoder)} + + +struct decode_simulate { + struct decode_header header; + union decode_item handler; +}; + +#define DECODE_SIMULATEX(_mask, _value, _handler, _regs) \ + DECODE_HEADER(DECODE_TYPE_SIMULATE, _mask, _value, _regs), \ + {.action = (_handler)} + +#define DECODE_SIMULATE(_mask, _value, _handler) \ + DECODE_SIMULATEX(_mask, _value, _handler, 0) + + +struct decode_emulate { + struct decode_header header; + union decode_item handler; +}; + +#define DECODE_EMULATEX(_mask, _value, _handler, _regs) \ + DECODE_HEADER(DECODE_TYPE_EMULATE, _mask, _value, _regs), \ + {.action = (_handler)} + +#define DECODE_EMULATE(_mask, _value, _handler) \ + DECODE_EMULATEX(_mask, _value, _handler, 0) + + +struct decode_or { + struct decode_header header; +}; + +#define DECODE_OR(_mask, _value) \ + DECODE_HEADER(DECODE_TYPE_OR, _mask, _value, 0) + +enum probes_insn { + INSN_REJECTED, + INSN_GOOD, + INSN_GOOD_NO_SLOT +}; + +struct decode_reject { + struct decode_header header; +}; + +#define DECODE_REJECT(_mask, _value) \ + DECODE_HEADER(DECODE_TYPE_REJECT, _mask, _value, 0) + +probes_insn_handler_t probes_simulate_nop; +probes_insn_handler_t probes_emulate_none; + +int __kprobes +probes_decode_insn(probes_opcode_t insn, struct arch_probes_insn *asi, + const union decode_item *table, bool thumb, bool emulate, + const union decode_action *actions); + +#endif diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index e76fcaadce0..81ef686a91c 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -10,7 +10,7 @@ */ #include <stdarg.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/sched.h> #include <linux/kernel.h> #include <linux/mm.h> @@ -30,14 +30,17 @@ #include <linux/uaccess.h> #include <linux/random.h> #include <linux/hw_breakpoint.h> +#include <linux/leds.h> +#include <linux/reboot.h> #include <asm/cacheflush.h> -#include <asm/leds.h> +#include <asm/idmap.h> #include <asm/processor.h> -#include <asm/system.h> #include <asm/thread_notify.h> #include <asm/stacktrace.h> +#include <asm/system_misc.h> #include <asm/mach/time.h> +#include <asm/tls.h> #ifdef CONFIG_CC_STACKPROTECTOR #include <linux/stackprotector.h> @@ -45,86 +48,74 @@ unsigned long __stack_chk_guard __read_mostly; EXPORT_SYMBOL(__stack_chk_guard); #endif -static const char *processor_modes[] = { +static const char *processor_modes[] __maybe_unused = { "USER_26", "FIQ_26" , "IRQ_26" , "SVC_26" , "UK4_26" , "UK5_26" , "UK6_26" , "UK7_26" , "UK8_26" , "UK9_26" , "UK10_26", "UK11_26", "UK12_26", "UK13_26", "UK14_26", "UK15_26", "USER_32", "FIQ_32" , "IRQ_32" , "SVC_32" , "UK4_32" , "UK5_32" , "UK6_32" , "ABT_32" , "UK8_32" , "UK9_32" , "UK10_32", "UND_32" , "UK12_32", "UK13_32", "UK14_32", "SYS_32" }; -static const char *isa_modes[] = { +static const char *isa_modes[] __maybe_unused = { "ARM" , "Thumb" , "Jazelle", "ThumbEE" }; -extern void setup_mm_for_reboot(char mode); - -static volatile int hlt_counter; +extern void call_with_stack(void (*fn)(void *), void *arg, void *sp); +typedef void (*phys_reset_t)(unsigned long); -#include <mach/system.h> +/* + * A temporary stack to use for CPU reset. This is static so that we + * don't clobber it with the identity mapping. When running with this + * stack, any references to the current task *will not work* so you + * should really do as little as possible before jumping to your reset + * code. + */ +static u64 soft_restart_stack[16]; -void disable_hlt(void) +static void __soft_restart(void *addr) { - hlt_counter++; -} + phys_reset_t phys_reset; -EXPORT_SYMBOL(disable_hlt); + /* Take out a flat memory mapping. */ + setup_mm_for_reboot(); -void enable_hlt(void) -{ - hlt_counter--; -} + /* Clean and invalidate caches */ + flush_cache_all(); -EXPORT_SYMBOL(enable_hlt); + /* Turn off caching */ + cpu_proc_fin(); -static int __init nohlt_setup(char *__unused) -{ - hlt_counter = 1; - return 1; -} + /* Push out any further dirty data, and ensure cache is empty */ + flush_cache_all(); -static int __init hlt_setup(char *__unused) -{ - hlt_counter = 0; - return 1; -} + /* Switch to the identity mapping. */ + phys_reset = (phys_reset_t)(unsigned long)virt_to_phys(cpu_reset); + phys_reset((unsigned long)addr); -__setup("nohlt", nohlt_setup); -__setup("hlt", hlt_setup); + /* Should never get here. */ + BUG(); +} -void arm_machine_restart(char mode, const char *cmd) +void soft_restart(unsigned long addr) { + u64 *stack = soft_restart_stack + ARRAY_SIZE(soft_restart_stack); + /* Disable interrupts first */ - local_irq_disable(); + raw_local_irq_disable(); local_fiq_disable(); - /* - * Tell the mm system that we are going to reboot - - * we may need it to insert some 1:1 mappings so that - * soft boot works. - */ - setup_mm_for_reboot(mode); + /* Disable the L2 if we're the last man standing. */ + if (num_online_cpus() == 1) + outer_disable(); - /* Clean and invalidate caches */ - flush_cache_all(); + /* Change to the new stack and continue with the reset. */ + call_with_stack(__soft_restart, (void *)addr, (void *)stack); - /* Turn off caching */ - cpu_proc_fin(); - - /* Push out any further dirty data, and ensure cache is empty */ - flush_cache_all(); - - /* - * Now call the architecture specific reboot code. - */ - arch_reset(mode, cmd); + /* Should never get here. */ + BUG(); +} - /* - * Whoops - the architecture was unable to reboot. - * Tell the user! - */ - mdelay(1000); - printk("Reboot failed -- System halted\n"); - while (1); +static void null_restart(enum reboot_mode reboot_mode, const char *cmd) +{ } /* @@ -133,122 +124,121 @@ void arm_machine_restart(char mode, const char *cmd) void (*pm_power_off)(void); EXPORT_SYMBOL(pm_power_off); -void (*arm_pm_restart)(char str, const char *cmd) = arm_machine_restart; +void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd) = null_restart; EXPORT_SYMBOL_GPL(arm_pm_restart); -static void do_nothing(void *unused) -{ -} - /* - * cpu_idle_wait - Used to ensure that all the CPUs discard old value of - * pm_idle and update to new pm_idle value. Required while changing pm_idle - * handler on SMP systems. - * - * Caller must have changed pm_idle to the new value before the call. Old - * pm_idle value will not be used by any CPU after the return of this function. + * This is our default idle handler. */ -void cpu_idle_wait(void) -{ - smp_mb(); - /* kick all the CPUs so that they exit out of pm_idle */ - smp_call_function(do_nothing, NULL, 1); -} -EXPORT_SYMBOL_GPL(cpu_idle_wait); + +void (*arm_pm_idle)(void); /* - * This is our default idle handler. We need to disable - * interrupts here to ensure we don't miss a wakeup call. + * Called from the core idle loop. */ -static void default_idle(void) + +void arch_cpu_idle(void) { - if (!need_resched()) - arch_idle(); + if (arm_pm_idle) + arm_pm_idle(); + else + cpu_do_idle(); local_irq_enable(); } -void (*pm_idle)(void) = default_idle; -EXPORT_SYMBOL(pm_idle); - -/* - * The idle thread, has rather strange semantics for calling pm_idle, - * but this is what x86 does and we need to do the same, so that - * things like cpuidle get called in the same way. The only difference - * is that we always respect 'hlt_counter' to prevent low power idle. - */ -void cpu_idle(void) +void arch_cpu_idle_prepare(void) { local_fiq_enable(); +} - /* endless idle loop with no priority at all */ - while (1) { - tick_nohz_stop_sched_tick(1); - leds_event(led_idle_start); - while (!need_resched()) { -#ifdef CONFIG_HOTPLUG_CPU - if (cpu_is_offline(smp_processor_id())) - cpu_die(); +void arch_cpu_idle_enter(void) +{ + ledtrig_cpu(CPU_LED_IDLE_START); +#ifdef CONFIG_PL310_ERRATA_769419 + wmb(); #endif - - local_irq_disable(); - if (hlt_counter) { - local_irq_enable(); - cpu_relax(); - } else { - stop_critical_timings(); - pm_idle(); - start_critical_timings(); - /* - * This will eventually be removed - pm_idle - * functions should always return with IRQs - * enabled. - */ - WARN_ON(irqs_disabled()); - local_irq_enable(); - } - } - leds_event(led_idle_end); - tick_nohz_restart_sched_tick(); - preempt_enable_no_resched(); - schedule(); - preempt_disable(); - } } -static char reboot_mode = 'h'; - -int __init reboot_setup(char *str) +void arch_cpu_idle_exit(void) { - reboot_mode = str[0]; - return 1; + ledtrig_cpu(CPU_LED_IDLE_END); } -__setup("reboot=", reboot_setup); +#ifdef CONFIG_HOTPLUG_CPU +void arch_cpu_idle_dead(void) +{ + cpu_die(); +} +#endif +/* + * Called by kexec, immediately prior to machine_kexec(). + * + * This must completely disable all secondary CPUs; simply causing those CPUs + * to execute e.g. a RAM-based pin loop is not sufficient. This allows the + * kexec'd kernel to use any and all RAM as it sees fit, without having to + * avoid any code or data used by any SW CPU pin loop. The CPU hotplug + * functionality embodied in disable_nonboot_cpus() to achieve this. + */ void machine_shutdown(void) { -#ifdef CONFIG_SMP - smp_send_stop(); -#endif + disable_nonboot_cpus(); } +/* + * Halting simply requires that the secondary CPUs stop performing any + * activity (executing tasks, handling interrupts). smp_send_stop() + * achieves this. + */ void machine_halt(void) { - machine_shutdown(); + local_irq_disable(); + smp_send_stop(); + + local_irq_disable(); while (1); } +/* + * Power-off simply requires that the secondary CPUs stop performing any + * activity (executing tasks, handling interrupts). smp_send_stop() + * achieves this. When the system power is turned off, it will take all CPUs + * with it. + */ void machine_power_off(void) { - machine_shutdown(); + local_irq_disable(); + smp_send_stop(); + if (pm_power_off) pm_power_off(); } +/* + * Restart requires that the secondary CPUs stop performing any activity + * while the primary CPU resets the system. Systems with a single CPU can + * use soft_restart() as their machine descriptor's .restart hook, since that + * will cause the only available CPU to reset. Systems with multiple CPUs must + * provide a HW restart implementation, to ensure that all CPUs reset at once. + * This is required so that any code running after reset on the primary CPU + * doesn't have to co-ordinate with other CPUs to ensure they aren't still + * executing pre-reset code, and using RAM that the primary CPU's code wishes + * to use. Implementing such co-ordination would be essentially impossible. + */ void machine_restart(char *cmd) { - machine_shutdown(); + local_irq_disable(); + smp_send_stop(); + arm_pm_restart(reboot_mode, cmd); + + /* Give a grace period for failure to restart of 1s */ + mdelay(1000); + + /* Whoops - the platform was unable to reboot. Tell the user! */ + printk("Reboot failed -- System halted\n"); + local_irq_disable(); + while (1); } void __show_regs(struct pt_regs *regs) @@ -256,11 +246,8 @@ void __show_regs(struct pt_regs *regs) unsigned long flags; char buf[64]; - printk("CPU: %d %s (%s %.*s)\n", - raw_smp_processor_id(), print_tainted(), - init_utsname()->release, - (int)strcspn(init_utsname()->version, " "), - init_utsname()->version); + show_regs_print_info(KERN_DEFAULT); + print_symbol("PC is at %s\n", instruction_pointer(regs)); print_symbol("LR is at %s\n", regs->ARM_lr); printk("pc : [<%08lx>] lr : [<%08lx>] psr: %08lx\n" @@ -284,12 +271,17 @@ void __show_regs(struct pt_regs *regs) buf[3] = flags & PSR_V_BIT ? 'V' : 'v'; buf[4] = '\0'; +#ifndef CONFIG_CPU_V7M printk("Flags: %s IRQs o%s FIQs o%s Mode %s ISA %s Segment %s\n", buf, interrupts_enabled(regs) ? "n" : "ff", fast_interrupts_enabled(regs) ? "n" : "ff", processor_modes[processor_mode(regs)], isa_modes[isa_mode(regs)], get_fs() == get_ds() ? "kernel" : "user"); +#else + printk("xPSR: %08lx\n", regs->ARM_cpsr); +#endif + #ifdef CONFIG_CPU_CP15 { unsigned int ctrl; @@ -315,9 +307,8 @@ void __show_regs(struct pt_regs *regs) void show_regs(struct pt_regs * regs) { printk("\n"); - printk("Pid: %d, comm: %20s\n", task_pid_nr(current), current->comm); __show_regs(regs); - __backtrace(); + dump_stack(); } ATOMIC_NOTIFIER_HEAD(thread_notify_head); @@ -354,23 +345,34 @@ asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); int copy_thread(unsigned long clone_flags, unsigned long stack_start, - unsigned long stk_sz, struct task_struct *p, struct pt_regs *regs) + unsigned long stk_sz, struct task_struct *p) { struct thread_info *thread = task_thread_info(p); struct pt_regs *childregs = task_pt_regs(p); - *childregs = *regs; - childregs->ARM_r0 = 0; - childregs->ARM_sp = stack_start; - memset(&thread->cpu_context, 0, sizeof(struct cpu_context_save)); - thread->cpu_context.sp = (unsigned long)childregs; + + if (likely(!(p->flags & PF_KTHREAD))) { + *childregs = *current_pt_regs(); + childregs->ARM_r0 = 0; + if (stack_start) + childregs->ARM_sp = stack_start; + } else { + memset(childregs, 0, sizeof(struct pt_regs)); + thread->cpu_context.r4 = stk_sz; + thread->cpu_context.r5 = stack_start; + childregs->ARM_cpsr = SVC_MODE; + } thread->cpu_context.pc = (unsigned long)ret_from_fork; + thread->cpu_context.sp = (unsigned long)childregs; clear_ptrace_hw_breakpoint(p); if (clone_flags & CLONE_SETTLS) - thread->tp_value = regs->ARM_r3; + thread->tp_value[0] = childregs->ARM_r3; + thread->tp_value[1] = get_tpuser(); + + thread_notify(THREAD_NOTIFY_COPY, thread); return 0; } @@ -399,66 +401,10 @@ int dump_fpu (struct pt_regs *regs, struct user_fp *fp) } EXPORT_SYMBOL(dump_fpu); -/* - * Shuffle the argument into the correct register before calling the - * thread function. r4 is the thread argument, r5 is the pointer to - * the thread function, and r6 points to the exit function. - */ -extern void kernel_thread_helper(void); -asm( ".pushsection .text\n" -" .align\n" -" .type kernel_thread_helper, #function\n" -"kernel_thread_helper:\n" -#ifdef CONFIG_TRACE_IRQFLAGS -" bl trace_hardirqs_on\n" -#endif -" msr cpsr_c, r7\n" -" mov r0, r4\n" -" mov lr, r6\n" -" mov pc, r5\n" -" .size kernel_thread_helper, . - kernel_thread_helper\n" -" .popsection"); - -#ifdef CONFIG_ARM_UNWIND -extern void kernel_thread_exit(long code); -asm( ".pushsection .text\n" -" .align\n" -" .type kernel_thread_exit, #function\n" -"kernel_thread_exit:\n" -" .fnstart\n" -" .cantunwind\n" -" bl do_exit\n" -" nop\n" -" .fnend\n" -" .size kernel_thread_exit, . - kernel_thread_exit\n" -" .popsection"); -#else -#define kernel_thread_exit do_exit -#endif - -/* - * Create a kernel thread. - */ -pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags) -{ - struct pt_regs regs; - - memset(®s, 0, sizeof(regs)); - - regs.ARM_r4 = (unsigned long)arg; - regs.ARM_r5 = (unsigned long)fn; - regs.ARM_r6 = (unsigned long)kernel_thread_exit; - regs.ARM_r7 = SVC_MODE | PSR_ENDSTATE | PSR_ISETSTATE; - regs.ARM_pc = (unsigned long)kernel_thread_helper; - regs.ARM_cpsr = regs.ARM_r7 | PSR_I_BIT; - - return do_fork(flags|CLONE_VM|CLONE_UNTRACED, 0, ®s, 0, NULL, NULL); -} -EXPORT_SYMBOL(kernel_thread); - unsigned long get_wchan(struct task_struct *p) { struct stackframe frame; + unsigned long stack_page; int count = 0; if (!p || p == current || p->state == TASK_RUNNING) return 0; @@ -467,9 +413,11 @@ unsigned long get_wchan(struct task_struct *p) frame.sp = thread_saved_sp(p); frame.lr = 0; /* recovered from the stack */ frame.pc = thread_saved_pc(p); + stack_page = (unsigned long)task_stack_page(p); do { - int ret = unwind_frame(&frame); - if (ret < 0) + if (frame.sp < stack_page || + frame.sp >= stack_page + THREAD_SIZE || + unwind_frame(&frame) < 0) return 0; if (!in_sched_functions(frame.pc)) return frame.pc; @@ -483,23 +431,82 @@ unsigned long arch_randomize_brk(struct mm_struct *mm) return randomize_range(mm->brk, range_end, 0) ? : mm->brk; } +#ifdef CONFIG_MMU +#ifdef CONFIG_KUSER_HELPERS /* * The vectors page is always readable from user space for the - * atomic helpers and the signal restart code. Let's declare a mapping - * for it so it is visible through ptrace and /proc/<pid>/mem. + * atomic helpers. Insert it into the gate_vma so that it is visible + * through ptrace and /proc/<pid>/mem. */ +static struct vm_area_struct gate_vma = { + .vm_start = 0xffff0000, + .vm_end = 0xffff0000 + PAGE_SIZE, + .vm_flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYEXEC, +}; -int vectors_user_mapping(void) +static int __init gate_vma_init(void) { - struct mm_struct *mm = current->mm; - return install_special_mapping(mm, 0xffff0000, PAGE_SIZE, - VM_READ | VM_EXEC | - VM_MAYREAD | VM_MAYEXEC | - VM_ALWAYSDUMP | VM_RESERVED, - NULL); + gate_vma.vm_page_prot = PAGE_READONLY_EXEC; + return 0; +} +arch_initcall(gate_vma_init); + +struct vm_area_struct *get_gate_vma(struct mm_struct *mm) +{ + return &gate_vma; } +int in_gate_area(struct mm_struct *mm, unsigned long addr) +{ + return (addr >= gate_vma.vm_start) && (addr < gate_vma.vm_end); +} + +int in_gate_area_no_mm(unsigned long addr) +{ + return in_gate_area(NULL, addr); +} +#define is_gate_vma(vma) ((vma) == &gate_vma) +#else +#define is_gate_vma(vma) 0 +#endif + const char *arch_vma_name(struct vm_area_struct *vma) { - return (vma->vm_start == 0xffff0000) ? "[vectors]" : NULL; + return is_gate_vma(vma) ? "[vectors]" : + (vma->vm_mm && vma->vm_start == vma->vm_mm->context.sigpage) ? + "[sigpage]" : NULL; +} + +static struct page *signal_page; +extern struct page *get_signal_page(void); + +int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) +{ + struct mm_struct *mm = current->mm; + unsigned long addr; + int ret; + + if (!signal_page) + signal_page = get_signal_page(); + if (!signal_page) + return -ENOMEM; + + down_write(&mm->mmap_sem); + addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0); + if (IS_ERR_VALUE(addr)) { + ret = addr; + goto up_fail; + } + + ret = install_special_mapping(mm, addr, PAGE_SIZE, + VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC, + &signal_page); + + if (ret == 0) + mm->context.sigpage = addr; + + up_fail: + up_write(&mm->mmap_sem); + return ret; } +#endif diff --git a/arch/arm/kernel/psci.c b/arch/arm/kernel/psci.c new file mode 100644 index 00000000000..f73891b6b73 --- /dev/null +++ b/arch/arm/kernel/psci.c @@ -0,0 +1,332 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Copyright (C) 2012 ARM Limited + * + * Author: Will Deacon <will.deacon@arm.com> + */ + +#define pr_fmt(fmt) "psci: " fmt + +#include <linux/init.h> +#include <linux/of.h> +#include <linux/reboot.h> +#include <linux/pm.h> +#include <uapi/linux/psci.h> + +#include <asm/compiler.h> +#include <asm/errno.h> +#include <asm/opcodes-sec.h> +#include <asm/opcodes-virt.h> +#include <asm/psci.h> +#include <asm/system_misc.h> + +struct psci_operations psci_ops; + +static int (*invoke_psci_fn)(u32, u32, u32, u32); +typedef int (*psci_initcall_t)(const struct device_node *); + +enum psci_function { + PSCI_FN_CPU_SUSPEND, + PSCI_FN_CPU_ON, + PSCI_FN_CPU_OFF, + PSCI_FN_MIGRATE, + PSCI_FN_AFFINITY_INFO, + PSCI_FN_MIGRATE_INFO_TYPE, + PSCI_FN_MAX, +}; + +static u32 psci_function_id[PSCI_FN_MAX]; + +static int psci_to_linux_errno(int errno) +{ + switch (errno) { + case PSCI_RET_SUCCESS: + return 0; + case PSCI_RET_NOT_SUPPORTED: + return -EOPNOTSUPP; + case PSCI_RET_INVALID_PARAMS: + return -EINVAL; + case PSCI_RET_DENIED: + return -EPERM; + }; + + return -EINVAL; +} + +static u32 psci_power_state_pack(struct psci_power_state state) +{ + return ((state.id << PSCI_0_2_POWER_STATE_ID_SHIFT) + & PSCI_0_2_POWER_STATE_ID_MASK) | + ((state.type << PSCI_0_2_POWER_STATE_TYPE_SHIFT) + & PSCI_0_2_POWER_STATE_TYPE_MASK) | + ((state.affinity_level << PSCI_0_2_POWER_STATE_AFFL_SHIFT) + & PSCI_0_2_POWER_STATE_AFFL_MASK); +} + +/* + * The following two functions are invoked via the invoke_psci_fn pointer + * and will not be inlined, allowing us to piggyback on the AAPCS. + */ +static noinline int __invoke_psci_fn_hvc(u32 function_id, u32 arg0, u32 arg1, + u32 arg2) +{ + asm volatile( + __asmeq("%0", "r0") + __asmeq("%1", "r1") + __asmeq("%2", "r2") + __asmeq("%3", "r3") + __HVC(0) + : "+r" (function_id) + : "r" (arg0), "r" (arg1), "r" (arg2)); + + return function_id; +} + +static noinline int __invoke_psci_fn_smc(u32 function_id, u32 arg0, u32 arg1, + u32 arg2) +{ + asm volatile( + __asmeq("%0", "r0") + __asmeq("%1", "r1") + __asmeq("%2", "r2") + __asmeq("%3", "r3") + __SMC(0) + : "+r" (function_id) + : "r" (arg0), "r" (arg1), "r" (arg2)); + + return function_id; +} + +static int psci_get_version(void) +{ + int err; + + err = invoke_psci_fn(PSCI_0_2_FN_PSCI_VERSION, 0, 0, 0); + return err; +} + +static int psci_cpu_suspend(struct psci_power_state state, + unsigned long entry_point) +{ + int err; + u32 fn, power_state; + + fn = psci_function_id[PSCI_FN_CPU_SUSPEND]; + power_state = psci_power_state_pack(state); + err = invoke_psci_fn(fn, power_state, entry_point, 0); + return psci_to_linux_errno(err); +} + +static int psci_cpu_off(struct psci_power_state state) +{ + int err; + u32 fn, power_state; + + fn = psci_function_id[PSCI_FN_CPU_OFF]; + power_state = psci_power_state_pack(state); + err = invoke_psci_fn(fn, power_state, 0, 0); + return psci_to_linux_errno(err); +} + +static int psci_cpu_on(unsigned long cpuid, unsigned long entry_point) +{ + int err; + u32 fn; + + fn = psci_function_id[PSCI_FN_CPU_ON]; + err = invoke_psci_fn(fn, cpuid, entry_point, 0); + return psci_to_linux_errno(err); +} + +static int psci_migrate(unsigned long cpuid) +{ + int err; + u32 fn; + + fn = psci_function_id[PSCI_FN_MIGRATE]; + err = invoke_psci_fn(fn, cpuid, 0, 0); + return psci_to_linux_errno(err); +} + +static int psci_affinity_info(unsigned long target_affinity, + unsigned long lowest_affinity_level) +{ + int err; + u32 fn; + + fn = psci_function_id[PSCI_FN_AFFINITY_INFO]; + err = invoke_psci_fn(fn, target_affinity, lowest_affinity_level, 0); + return err; +} + +static int psci_migrate_info_type(void) +{ + int err; + u32 fn; + + fn = psci_function_id[PSCI_FN_MIGRATE_INFO_TYPE]; + err = invoke_psci_fn(fn, 0, 0, 0); + return err; +} + +static int get_set_conduit_method(struct device_node *np) +{ + const char *method; + + pr_info("probing for conduit method from DT.\n"); + + if (of_property_read_string(np, "method", &method)) { + pr_warn("missing \"method\" property\n"); + return -ENXIO; + } + + if (!strcmp("hvc", method)) { + invoke_psci_fn = __invoke_psci_fn_hvc; + } else if (!strcmp("smc", method)) { + invoke_psci_fn = __invoke_psci_fn_smc; + } else { + pr_warn("invalid \"method\" property: %s\n", method); + return -EINVAL; + } + return 0; +} + +static void psci_sys_reset(enum reboot_mode reboot_mode, const char *cmd) +{ + invoke_psci_fn(PSCI_0_2_FN_SYSTEM_RESET, 0, 0, 0); +} + +static void psci_sys_poweroff(void) +{ + invoke_psci_fn(PSCI_0_2_FN_SYSTEM_OFF, 0, 0, 0); +} + +/* + * PSCI Function IDs for v0.2+ are well defined so use + * standard values. + */ +static int psci_0_2_init(struct device_node *np) +{ + int err, ver; + + err = get_set_conduit_method(np); + + if (err) + goto out_put_node; + + ver = psci_get_version(); + + if (ver == PSCI_RET_NOT_SUPPORTED) { + /* PSCI v0.2 mandates implementation of PSCI_ID_VERSION. */ + pr_err("PSCI firmware does not comply with the v0.2 spec.\n"); + err = -EOPNOTSUPP; + goto out_put_node; + } else { + pr_info("PSCIv%d.%d detected in firmware.\n", + PSCI_VERSION_MAJOR(ver), + PSCI_VERSION_MINOR(ver)); + + if (PSCI_VERSION_MAJOR(ver) == 0 && + PSCI_VERSION_MINOR(ver) < 2) { + err = -EINVAL; + pr_err("Conflicting PSCI version detected.\n"); + goto out_put_node; + } + } + + pr_info("Using standard PSCI v0.2 function IDs\n"); + psci_function_id[PSCI_FN_CPU_SUSPEND] = PSCI_0_2_FN_CPU_SUSPEND; + psci_ops.cpu_suspend = psci_cpu_suspend; + + psci_function_id[PSCI_FN_CPU_OFF] = PSCI_0_2_FN_CPU_OFF; + psci_ops.cpu_off = psci_cpu_off; + + psci_function_id[PSCI_FN_CPU_ON] = PSCI_0_2_FN_CPU_ON; + psci_ops.cpu_on = psci_cpu_on; + + psci_function_id[PSCI_FN_MIGRATE] = PSCI_0_2_FN_MIGRATE; + psci_ops.migrate = psci_migrate; + + psci_function_id[PSCI_FN_AFFINITY_INFO] = PSCI_0_2_FN_AFFINITY_INFO; + psci_ops.affinity_info = psci_affinity_info; + + psci_function_id[PSCI_FN_MIGRATE_INFO_TYPE] = + PSCI_0_2_FN_MIGRATE_INFO_TYPE; + psci_ops.migrate_info_type = psci_migrate_info_type; + + arm_pm_restart = psci_sys_reset; + + pm_power_off = psci_sys_poweroff; + +out_put_node: + of_node_put(np); + return err; +} + +/* + * PSCI < v0.2 get PSCI Function IDs via DT. + */ +static int psci_0_1_init(struct device_node *np) +{ + u32 id; + int err; + + err = get_set_conduit_method(np); + + if (err) + goto out_put_node; + + pr_info("Using PSCI v0.1 Function IDs from DT\n"); + + if (!of_property_read_u32(np, "cpu_suspend", &id)) { + psci_function_id[PSCI_FN_CPU_SUSPEND] = id; + psci_ops.cpu_suspend = psci_cpu_suspend; + } + + if (!of_property_read_u32(np, "cpu_off", &id)) { + psci_function_id[PSCI_FN_CPU_OFF] = id; + psci_ops.cpu_off = psci_cpu_off; + } + + if (!of_property_read_u32(np, "cpu_on", &id)) { + psci_function_id[PSCI_FN_CPU_ON] = id; + psci_ops.cpu_on = psci_cpu_on; + } + + if (!of_property_read_u32(np, "migrate", &id)) { + psci_function_id[PSCI_FN_MIGRATE] = id; + psci_ops.migrate = psci_migrate; + } + +out_put_node: + of_node_put(np); + return err; +} + +static const struct of_device_id psci_of_match[] __initconst = { + { .compatible = "arm,psci", .data = psci_0_1_init}, + { .compatible = "arm,psci-0.2", .data = psci_0_2_init}, + {}, +}; + +int __init psci_init(void) +{ + struct device_node *np; + const struct of_device_id *matched_np; + psci_initcall_t init_fn; + + np = of_find_matching_node_and_match(NULL, psci_of_match, &matched_np); + if (!np) + return -ENODEV; + + init_fn = (psci_initcall_t)matched_np->data; + return init_fn(np); +} diff --git a/arch/arm/kernel/psci_smp.c b/arch/arm/kernel/psci_smp.c new file mode 100644 index 00000000000..28a1db4da70 --- /dev/null +++ b/arch/arm/kernel/psci_smp.c @@ -0,0 +1,115 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Copyright (C) 2012 ARM Limited + * + * Author: Will Deacon <will.deacon@arm.com> + */ + +#include <linux/init.h> +#include <linux/smp.h> +#include <linux/of.h> +#include <linux/delay.h> +#include <uapi/linux/psci.h> + +#include <asm/psci.h> +#include <asm/smp_plat.h> + +/* + * psci_smp assumes that the following is true about PSCI: + * + * cpu_suspend Suspend the execution on a CPU + * @state we don't currently describe affinity levels, so just pass 0. + * @entry_point the first instruction to be executed on return + * returns 0 success, < 0 on failure + * + * cpu_off Power down a CPU + * @state we don't currently describe affinity levels, so just pass 0. + * no return on successful call + * + * cpu_on Power up a CPU + * @cpuid cpuid of target CPU, as from MPIDR + * @entry_point the first instruction to be executed on return + * returns 0 success, < 0 on failure + * + * migrate Migrate the context to a different CPU + * @cpuid cpuid of target CPU, as from MPIDR + * returns 0 success, < 0 on failure + * + */ + +extern void secondary_startup(void); + +static int psci_boot_secondary(unsigned int cpu, struct task_struct *idle) +{ + if (psci_ops.cpu_on) + return psci_ops.cpu_on(cpu_logical_map(cpu), + __pa(secondary_startup)); + return -ENODEV; +} + +#ifdef CONFIG_HOTPLUG_CPU +void __ref psci_cpu_die(unsigned int cpu) +{ + const struct psci_power_state ps = { + .type = PSCI_POWER_STATE_TYPE_POWER_DOWN, + }; + + if (psci_ops.cpu_off) + psci_ops.cpu_off(ps); + + /* We should never return */ + panic("psci: cpu %d failed to shutdown\n", cpu); +} + +int __ref psci_cpu_kill(unsigned int cpu) +{ + int err, i; + + if (!psci_ops.affinity_info) + return 1; + /* + * cpu_kill could race with cpu_die and we can + * potentially end up declaring this cpu undead + * while it is dying. So, try again a few times. + */ + + for (i = 0; i < 10; i++) { + err = psci_ops.affinity_info(cpu_logical_map(cpu), 0); + if (err == PSCI_0_2_AFFINITY_LEVEL_OFF) { + pr_info("CPU%d killed.\n", cpu); + return 1; + } + + msleep(10); + pr_info("Retrying again to check for CPU kill\n"); + } + + pr_warn("CPU%d may not have shut down cleanly (AFFINITY_INFO reports %d)\n", + cpu, err); + /* Make platform_cpu_kill() fail. */ + return 0; +} + +#endif + +bool __init psci_smp_available(void) +{ + /* is cpu_on available at least? */ + return (psci_ops.cpu_on != NULL); +} + +struct smp_operations __initdata psci_smp_ops = { + .smp_boot_secondary = psci_boot_secondary, +#ifdef CONFIG_HOTPLUG_CPU + .cpu_die = psci_cpu_die, + .cpu_kill = psci_cpu_kill, +#endif +}; diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c index 3e97483abcf..0c27ed6f3f2 100644 --- a/arch/arm/kernel/ptrace.c +++ b/arch/arm/kernel/ptrace.c @@ -12,6 +12,7 @@ #include <linux/kernel.h> #include <linux/sched.h> #include <linux/mm.h> +#include <linux/elf.h> #include <linux/smp.h> #include <linux/ptrace.h> #include <linux/user.h> @@ -21,12 +22,16 @@ #include <linux/uaccess.h> #include <linux/perf_event.h> #include <linux/hw_breakpoint.h> +#include <linux/regset.h> +#include <linux/audit.h> +#include <linux/tracehook.h> +#include <linux/unistd.h> #include <asm/pgtable.h> -#include <asm/system.h> #include <asm/traps.h> -#include "ptrace.h" +#define CREATE_TRACE_POINTS +#include <trace/events/syscalls.h> #define REG_PC 15 #define REG_PSR 16 @@ -184,389 +189,12 @@ put_user_reg(struct task_struct *task, int offset, long data) return ret; } -static inline int -read_u32(struct task_struct *task, unsigned long addr, u32 *res) -{ - int ret; - - ret = access_process_vm(task, addr, res, sizeof(*res), 0); - - return ret == sizeof(*res) ? 0 : -EIO; -} - -static inline int -read_instr(struct task_struct *task, unsigned long addr, u32 *res) -{ - int ret; - - if (addr & 1) { - u16 val; - ret = access_process_vm(task, addr & ~1, &val, sizeof(val), 0); - ret = ret == sizeof(val) ? 0 : -EIO; - *res = val; - } else { - u32 val; - ret = access_process_vm(task, addr & ~3, &val, sizeof(val), 0); - ret = ret == sizeof(val) ? 0 : -EIO; - *res = val; - } - return ret; -} - -/* - * Get value of register `rn' (in the instruction) - */ -static unsigned long -ptrace_getrn(struct task_struct *child, unsigned long insn) -{ - unsigned int reg = (insn >> 16) & 15; - unsigned long val; - - val = get_user_reg(child, reg); - if (reg == 15) - val += 8; - - return val; -} - -/* - * Get value of operand 2 (in an ALU instruction) - */ -static unsigned long -ptrace_getaluop2(struct task_struct *child, unsigned long insn) -{ - unsigned long val; - int shift; - int type; - - if (insn & 1 << 25) { - val = insn & 255; - shift = (insn >> 8) & 15; - type = 3; - } else { - val = get_user_reg (child, insn & 15); - - if (insn & (1 << 4)) - shift = (int)get_user_reg (child, (insn >> 8) & 15); - else - shift = (insn >> 7) & 31; - - type = (insn >> 5) & 3; - } - - switch (type) { - case 0: val <<= shift; break; - case 1: val >>= shift; break; - case 2: - val = (((signed long)val) >> shift); - break; - case 3: - val = (val >> shift) | (val << (32 - shift)); - break; - } - return val; -} - -/* - * Get value of operand 2 (in a LDR instruction) - */ -static unsigned long -ptrace_getldrop2(struct task_struct *child, unsigned long insn) -{ - unsigned long val; - int shift; - int type; - - val = get_user_reg(child, insn & 15); - shift = (insn >> 7) & 31; - type = (insn >> 5) & 3; - - switch (type) { - case 0: val <<= shift; break; - case 1: val >>= shift; break; - case 2: - val = (((signed long)val) >> shift); - break; - case 3: - val = (val >> shift) | (val << (32 - shift)); - break; - } - return val; -} - -#define OP_MASK 0x01e00000 -#define OP_AND 0x00000000 -#define OP_EOR 0x00200000 -#define OP_SUB 0x00400000 -#define OP_RSB 0x00600000 -#define OP_ADD 0x00800000 -#define OP_ADC 0x00a00000 -#define OP_SBC 0x00c00000 -#define OP_RSC 0x00e00000 -#define OP_ORR 0x01800000 -#define OP_MOV 0x01a00000 -#define OP_BIC 0x01c00000 -#define OP_MVN 0x01e00000 - -static unsigned long -get_branch_address(struct task_struct *child, unsigned long pc, unsigned long insn) -{ - u32 alt = 0; - - switch (insn & 0x0e000000) { - case 0x00000000: - case 0x02000000: { - /* - * data processing - */ - long aluop1, aluop2, ccbit; - - if ((insn & 0x0fffffd0) == 0x012fff10) { - /* - * bx or blx - */ - alt = get_user_reg(child, insn & 15); - break; - } - - - if ((insn & 0xf000) != 0xf000) - break; - - aluop1 = ptrace_getrn(child, insn); - aluop2 = ptrace_getaluop2(child, insn); - ccbit = get_user_reg(child, REG_PSR) & PSR_C_BIT ? 1 : 0; - - switch (insn & OP_MASK) { - case OP_AND: alt = aluop1 & aluop2; break; - case OP_EOR: alt = aluop1 ^ aluop2; break; - case OP_SUB: alt = aluop1 - aluop2; break; - case OP_RSB: alt = aluop2 - aluop1; break; - case OP_ADD: alt = aluop1 + aluop2; break; - case OP_ADC: alt = aluop1 + aluop2 + ccbit; break; - case OP_SBC: alt = aluop1 - aluop2 + ccbit; break; - case OP_RSC: alt = aluop2 - aluop1 + ccbit; break; - case OP_ORR: alt = aluop1 | aluop2; break; - case OP_MOV: alt = aluop2; break; - case OP_BIC: alt = aluop1 & ~aluop2; break; - case OP_MVN: alt = ~aluop2; break; - } - break; - } - - case 0x04000000: - case 0x06000000: - /* - * ldr - */ - if ((insn & 0x0010f000) == 0x0010f000) { - unsigned long base; - - base = ptrace_getrn(child, insn); - if (insn & 1 << 24) { - long aluop2; - - if (insn & 0x02000000) - aluop2 = ptrace_getldrop2(child, insn); - else - aluop2 = insn & 0xfff; - - if (insn & 1 << 23) - base += aluop2; - else - base -= aluop2; - } - read_u32(child, base, &alt); - } - break; - - case 0x08000000: - /* - * ldm - */ - if ((insn & 0x00108000) == 0x00108000) { - unsigned long base; - unsigned int nr_regs; - - if (insn & (1 << 23)) { - nr_regs = hweight16(insn & 65535) << 2; - - if (!(insn & (1 << 24))) - nr_regs -= 4; - } else { - if (insn & (1 << 24)) - nr_regs = -4; - else - nr_regs = 0; - } - - base = ptrace_getrn(child, insn); - - read_u32(child, base + nr_regs, &alt); - break; - } - break; - - case 0x0a000000: { - /* - * bl or b - */ - signed long displ; - /* It's a branch/branch link: instead of trying to - * figure out whether the branch will be taken or not, - * we'll put a breakpoint at both locations. This is - * simpler, more reliable, and probably not a whole lot - * slower than the alternative approach of emulating the - * branch. - */ - displ = (insn & 0x00ffffff) << 8; - displ = (displ >> 6) + 8; - if (displ != 0 && displ != 4) - alt = pc + displ; - } - break; - } - - return alt; -} - -static int -swap_insn(struct task_struct *task, unsigned long addr, - void *old_insn, void *new_insn, int size) -{ - int ret; - - ret = access_process_vm(task, addr, old_insn, size, 0); - if (ret == size) - ret = access_process_vm(task, addr, new_insn, size, 1); - return ret; -} - -static void -add_breakpoint(struct task_struct *task, struct debug_info *dbg, unsigned long addr) -{ - int nr = dbg->nsaved; - - if (nr < 2) { - u32 new_insn = BREAKINST_ARM; - int res; - - res = swap_insn(task, addr, &dbg->bp[nr].insn, &new_insn, 4); - - if (res == 4) { - dbg->bp[nr].address = addr; - dbg->nsaved += 1; - } - } else - printk(KERN_ERR "ptrace: too many breakpoints\n"); -} - -/* - * Clear one breakpoint in the user program. We copy what the hardware - * does and use bit 0 of the address to indicate whether this is a Thumb - * breakpoint or an ARM breakpoint. - */ -static void clear_breakpoint(struct task_struct *task, struct debug_entry *bp) -{ - unsigned long addr = bp->address; - union debug_insn old_insn; - int ret; - - if (addr & 1) { - ret = swap_insn(task, addr & ~1, &old_insn.thumb, - &bp->insn.thumb, 2); - - if (ret != 2 || old_insn.thumb != BREAKINST_THUMB) - printk(KERN_ERR "%s:%d: corrupted Thumb breakpoint at " - "0x%08lx (0x%04x)\n", task->comm, - task_pid_nr(task), addr, old_insn.thumb); - } else { - ret = swap_insn(task, addr & ~3, &old_insn.arm, - &bp->insn.arm, 4); - - if (ret != 4 || old_insn.arm != BREAKINST_ARM) - printk(KERN_ERR "%s:%d: corrupted ARM breakpoint at " - "0x%08lx (0x%08x)\n", task->comm, - task_pid_nr(task), addr, old_insn.arm); - } -} - -void ptrace_set_bpt(struct task_struct *child) -{ - struct pt_regs *regs; - unsigned long pc; - u32 insn; - int res; - - regs = task_pt_regs(child); - pc = instruction_pointer(regs); - - if (thumb_mode(regs)) { - printk(KERN_WARNING "ptrace: can't handle thumb mode\n"); - return; - } - - res = read_instr(child, pc, &insn); - if (!res) { - struct debug_info *dbg = &child->thread.debug; - unsigned long alt; - - dbg->nsaved = 0; - - alt = get_branch_address(child, pc, insn); - if (alt) - add_breakpoint(child, dbg, alt); - - /* - * Note that we ignore the result of setting the above - * breakpoint since it may fail. When it does, this is - * not so much an error, but a forewarning that we may - * be receiving a prefetch abort shortly. - * - * If we don't set this breakpoint here, then we can - * lose control of the thread during single stepping. - */ - if (!alt || predicate(insn) != PREDICATE_ALWAYS) - add_breakpoint(child, dbg, pc + 4); - } -} - -/* - * Ensure no single-step breakpoint is pending. Returns non-zero - * value if child was being single-stepped. - */ -void ptrace_cancel_bpt(struct task_struct *child) -{ - int i, nsaved = child->thread.debug.nsaved; - - child->thread.debug.nsaved = 0; - - if (nsaved > 2) { - printk("ptrace_cancel_bpt: bogus nsaved: %d!\n", nsaved); - nsaved = 2; - } - - for (i = 0; i < nsaved; i++) - clear_breakpoint(child, &child->thread.debug.bp[i]); -} - -void user_disable_single_step(struct task_struct *task) -{ - task->ptrace &= ~PT_SINGLESTEP; - ptrace_cancel_bpt(task); -} - -void user_enable_single_step(struct task_struct *task) -{ - task->ptrace |= PT_SINGLESTEP; -} - /* * Called by kernel/ptrace.c when detaching.. */ void ptrace_disable(struct task_struct *child) { - user_disable_single_step(child); + /* Nothing to do. */ } /* @@ -576,8 +204,6 @@ void ptrace_break(struct task_struct *tsk, struct pt_regs *regs) { siginfo_t info; - ptrace_cancel_bpt(tsk); - info.si_signo = SIGTRAP; info.si_errno = 0; info.si_code = TRAP_BRKPT; @@ -608,34 +234,12 @@ static struct undef_hook thumb_break_hook = { .fn = break_trap, }; -static int thumb2_break_trap(struct pt_regs *regs, unsigned int instr) -{ - unsigned int instr2; - void __user *pc; - - /* Check the second half of the instruction. */ - pc = (void __user *)(instruction_pointer(regs) + 2); - - if (processor_mode(regs) == SVC_MODE) { - instr2 = *(u16 *) pc; - } else { - get_user(instr2, (u16 __user *)pc); - } - - if (instr2 == 0xa000) { - ptrace_break(current, regs); - return 0; - } else { - return 1; - } -} - static struct undef_hook thumb2_break_hook = { - .instr_mask = 0xffff, - .instr_val = 0xf7f0, + .instr_mask = 0xffffffff, + .instr_val = 0xf7f0a000, .cpsr_mask = PSR_T_BIT, .cpsr_val = PSR_T_BIT, - .fn = thumb2_break_trap, + .fn = break_trap, }; static int __init ptrace_break_init(void) @@ -657,7 +261,7 @@ static int ptrace_read_user(struct task_struct *tsk, unsigned long off, { unsigned long tmp; - if (off & 3 || off >= sizeof(struct user)) + if (off & 3) return -EIO; tmp = 0; @@ -669,6 +273,8 @@ static int ptrace_read_user(struct task_struct *tsk, unsigned long off, tmp = tsk->mm->end_code; else if (off < sizeof(struct pt_regs)) tmp = get_user_reg(tsk, off >> 2); + else if (off >= sizeof(struct user)) + return -EIO; return put_user(tmp, ret); } @@ -689,58 +295,6 @@ static int ptrace_write_user(struct task_struct *tsk, unsigned long off, return put_user_reg(tsk, off >> 2, val); } -/* - * Get all user integer registers. - */ -static int ptrace_getregs(struct task_struct *tsk, void __user *uregs) -{ - struct pt_regs *regs = task_pt_regs(tsk); - - return copy_to_user(uregs, regs, sizeof(struct pt_regs)) ? -EFAULT : 0; -} - -/* - * Set all user integer registers. - */ -static int ptrace_setregs(struct task_struct *tsk, void __user *uregs) -{ - struct pt_regs newregs; - int ret; - - ret = -EFAULT; - if (copy_from_user(&newregs, uregs, sizeof(struct pt_regs)) == 0) { - struct pt_regs *regs = task_pt_regs(tsk); - - ret = -EINVAL; - if (valid_user_regs(&newregs)) { - *regs = newregs; - ret = 0; - } - } - - return ret; -} - -/* - * Get the child FPU state. - */ -static int ptrace_getfpregs(struct task_struct *tsk, void __user *ufp) -{ - return copy_to_user(ufp, &task_thread_info(tsk)->fpstate, - sizeof(struct user_fp)) ? -EFAULT : 0; -} - -/* - * Set the child FPU state. - */ -static int ptrace_setfpregs(struct task_struct *tsk, void __user *ufp) -{ - struct thread_info *thread = task_thread_info(tsk); - thread->used_cp[1] = thread->used_cp[2] = 1; - return copy_from_user(&thread->fpstate, ufp, - sizeof(struct user_fp)) ? -EFAULT : 0; -} - #ifdef CONFIG_IWMMXT /* @@ -799,56 +353,6 @@ static int ptrace_setcrunchregs(struct task_struct *tsk, void __user *ufp) } #endif -#ifdef CONFIG_VFP -/* - * Get the child VFP state. - */ -static int ptrace_getvfpregs(struct task_struct *tsk, void __user *data) -{ - struct thread_info *thread = task_thread_info(tsk); - union vfp_state *vfp = &thread->vfpstate; - struct user_vfp __user *ufp = data; - - vfp_sync_hwstate(thread); - - /* copy the floating point registers */ - if (copy_to_user(&ufp->fpregs, &vfp->hard.fpregs, - sizeof(vfp->hard.fpregs))) - return -EFAULT; - - /* copy the status and control register */ - if (put_user(vfp->hard.fpscr, &ufp->fpscr)) - return -EFAULT; - - return 0; -} - -/* - * Set the child VFP state. - */ -static int ptrace_setvfpregs(struct task_struct *tsk, void __user *data) -{ - struct thread_info *thread = task_thread_info(tsk); - union vfp_state *vfp = &thread->vfpstate; - struct user_vfp __user *ufp = data; - - vfp_sync_hwstate(thread); - - /* copy the floating point registers */ - if (copy_from_user(&vfp->hard.fpregs, &ufp->fpregs, - sizeof(vfp->hard.fpregs))) - return -EFAULT; - - /* copy the status and control register */ - if (get_user(vfp->hard.fpscr, &ufp->fpscr)) - return -EFAULT; - - vfp_flush_hwstate(thread); - - return 0; -} -#endif - #ifdef CONFIG_HAVE_HW_BREAKPOINT /* * Convert a virtual register number into an index for a thread_info @@ -878,7 +382,7 @@ static long ptrace_hbp_idx_to_num(int idx) /* * Handle hitting a HW-breakpoint. */ -static void ptrace_hbptriggered(struct perf_event *bp, int unused, +static void ptrace_hbptriggered(struct perf_event *bp, struct perf_sample_data *data, struct pt_regs *regs) { @@ -961,7 +465,8 @@ static struct perf_event *ptrace_hbp_create(struct task_struct *tsk, int type) attr.bp_type = type; attr.disabled = 1; - return register_user_hw_breakpoint(&attr, ptrace_hbptriggered, tsk); + return register_user_hw_breakpoint(&attr, ptrace_hbptriggered, NULL, + tsk); } static int ptrace_gethbpregs(struct task_struct *tsk, long num, @@ -996,10 +501,10 @@ static int ptrace_gethbpregs(struct task_struct *tsk, long num, while (!(arch_ctrl.len & 0x1)) arch_ctrl.len >>= 1; - if (idx & 0x1) - reg = encode_ctrl_reg(arch_ctrl); - else + if (num & 0x1) reg = bp->attr.bp_addr; + else + reg = encode_ctrl_reg(arch_ctrl); } put: @@ -1060,8 +565,8 @@ static int ptrace_sethbpregs(struct task_struct *tsk, long num, goto out; if ((gen_type & implied_type) != gen_type) { - ret = -EINVAL; - goto out; + ret = -EINVAL; + goto out; } attr.bp_len = gen_len; @@ -1075,6 +580,221 @@ out: } #endif +/* regset get/set implementations */ + +static int gpr_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + struct pt_regs *regs = task_pt_regs(target); + + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + regs, + 0, sizeof(*regs)); +} + +static int gpr_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + struct pt_regs newregs; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &newregs, + 0, sizeof(newregs)); + if (ret) + return ret; + + if (!valid_user_regs(&newregs)) + return -EINVAL; + + *task_pt_regs(target) = newregs; + return 0; +} + +static int fpa_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &task_thread_info(target)->fpstate, + 0, sizeof(struct user_fp)); +} + +static int fpa_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + struct thread_info *thread = task_thread_info(target); + + thread->used_cp[1] = thread->used_cp[2] = 1; + + return user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &thread->fpstate, + 0, sizeof(struct user_fp)); +} + +#ifdef CONFIG_VFP +/* + * VFP register get/set implementations. + * + * With respect to the kernel, struct user_fp is divided into three chunks: + * 16 or 32 real VFP registers (d0-d15 or d0-31) + * These are transferred to/from the real registers in the task's + * vfp_hard_struct. The number of registers depends on the kernel + * configuration. + * + * 16 or 0 fake VFP registers (d16-d31 or empty) + * i.e., the user_vfp structure has space for 32 registers even if + * the kernel doesn't have them all. + * + * vfp_get() reads this chunk as zero where applicable + * vfp_set() ignores this chunk + * + * 1 word for the FPSCR + * + * The bounds-checking logic built into user_regset_copyout and friends + * means that we can make a simple sequence of calls to map the relevant data + * to/from the specified slice of the user regset structure. + */ +static int vfp_get(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + void *kbuf, void __user *ubuf) +{ + int ret; + struct thread_info *thread = task_thread_info(target); + struct vfp_hard_struct const *vfp = &thread->vfpstate.hard; + const size_t user_fpregs_offset = offsetof(struct user_vfp, fpregs); + const size_t user_fpscr_offset = offsetof(struct user_vfp, fpscr); + + vfp_sync_hwstate(thread); + + ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &vfp->fpregs, + user_fpregs_offset, + user_fpregs_offset + sizeof(vfp->fpregs)); + if (ret) + return ret; + + ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf, + user_fpregs_offset + sizeof(vfp->fpregs), + user_fpscr_offset); + if (ret) + return ret; + + return user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &vfp->fpscr, + user_fpscr_offset, + user_fpscr_offset + sizeof(vfp->fpscr)); +} + +/* + * For vfp_set() a read-modify-write is done on the VFP registers, + * in order to avoid writing back a half-modified set of registers on + * failure. + */ +static int vfp_set(struct task_struct *target, + const struct user_regset *regset, + unsigned int pos, unsigned int count, + const void *kbuf, const void __user *ubuf) +{ + int ret; + struct thread_info *thread = task_thread_info(target); + struct vfp_hard_struct new_vfp; + const size_t user_fpregs_offset = offsetof(struct user_vfp, fpregs); + const size_t user_fpscr_offset = offsetof(struct user_vfp, fpscr); + + vfp_sync_hwstate(thread); + new_vfp = thread->vfpstate.hard; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &new_vfp.fpregs, + user_fpregs_offset, + user_fpregs_offset + sizeof(new_vfp.fpregs)); + if (ret) + return ret; + + ret = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, + user_fpregs_offset + sizeof(new_vfp.fpregs), + user_fpscr_offset); + if (ret) + return ret; + + ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, + &new_vfp.fpscr, + user_fpscr_offset, + user_fpscr_offset + sizeof(new_vfp.fpscr)); + if (ret) + return ret; + + vfp_flush_hwstate(thread); + thread->vfpstate.hard = new_vfp; + + return 0; +} +#endif /* CONFIG_VFP */ + +enum arm_regset { + REGSET_GPR, + REGSET_FPR, +#ifdef CONFIG_VFP + REGSET_VFP, +#endif +}; + +static const struct user_regset arm_regsets[] = { + [REGSET_GPR] = { + .core_note_type = NT_PRSTATUS, + .n = ELF_NGREG, + .size = sizeof(u32), + .align = sizeof(u32), + .get = gpr_get, + .set = gpr_set + }, + [REGSET_FPR] = { + /* + * For the FPA regs in fpstate, the real fields are a mixture + * of sizes, so pretend that the registers are word-sized: + */ + .core_note_type = NT_PRFPREG, + .n = sizeof(struct user_fp) / sizeof(u32), + .size = sizeof(u32), + .align = sizeof(u32), + .get = fpa_get, + .set = fpa_set + }, +#ifdef CONFIG_VFP + [REGSET_VFP] = { + /* + * Pretend that the VFP regs are word-sized, since the FPSCR is + * a single word dangling at the end of struct user_vfp: + */ + .core_note_type = NT_ARM_VFP, + .n = ARM_VFPREGS_SIZE / sizeof(u32), + .size = sizeof(u32), + .align = sizeof(u32), + .get = vfp_get, + .set = vfp_set + }, +#endif /* CONFIG_VFP */ +}; + +static const struct user_regset_view user_arm_view = { + .name = "arm", .e_machine = ELF_ARCH, .ei_osabi = ELF_OSABI, + .regsets = arm_regsets, .n = ARRAY_SIZE(arm_regsets) +}; + +const struct user_regset_view *task_user_regset_view(struct task_struct *task) +{ + return &user_arm_view; +} + long arch_ptrace(struct task_struct *child, long request, unsigned long addr, unsigned long data) { @@ -1091,19 +811,31 @@ long arch_ptrace(struct task_struct *child, long request, break; case PTRACE_GETREGS: - ret = ptrace_getregs(child, datap); + ret = copy_regset_to_user(child, + &user_arm_view, REGSET_GPR, + 0, sizeof(struct pt_regs), + datap); break; case PTRACE_SETREGS: - ret = ptrace_setregs(child, datap); + ret = copy_regset_from_user(child, + &user_arm_view, REGSET_GPR, + 0, sizeof(struct pt_regs), + datap); break; case PTRACE_GETFPREGS: - ret = ptrace_getfpregs(child, datap); + ret = copy_regset_to_user(child, + &user_arm_view, REGSET_FPR, + 0, sizeof(union fp_state), + datap); break; - + case PTRACE_SETFPREGS: - ret = ptrace_setfpregs(child, datap); + ret = copy_regset_from_user(child, + &user_arm_view, REGSET_FPR, + 0, sizeof(union fp_state), + datap); break; #ifdef CONFIG_IWMMXT @@ -1117,7 +849,7 @@ long arch_ptrace(struct task_struct *child, long request, #endif case PTRACE_GET_THREAD_AREA: - ret = put_user(task_thread_info(child)->tp_value, + ret = put_user(task_thread_info(child)->tp_value[0], datap); break; @@ -1138,11 +870,17 @@ long arch_ptrace(struct task_struct *child, long request, #ifdef CONFIG_VFP case PTRACE_GETVFPREGS: - ret = ptrace_getvfpregs(child, datap); + ret = copy_regset_to_user(child, + &user_arm_view, REGSET_VFP, + 0, ARM_VFPREGS_SIZE, + datap); break; case PTRACE_SETVFPREGS: - ret = ptrace_setvfpregs(child, datap); + ret = copy_regset_from_user(child, + &user_arm_view, REGSET_VFP, + 0, ARM_VFPREGS_SIZE, + datap); break; #endif @@ -1165,38 +903,70 @@ long arch_ptrace(struct task_struct *child, long request, return ret; } -asmlinkage int syscall_trace(int why, struct pt_regs *regs, int scno) +enum ptrace_syscall_dir { + PTRACE_SYSCALL_ENTER = 0, + PTRACE_SYSCALL_EXIT, +}; + +static void tracehook_report_syscall(struct pt_regs *regs, + enum ptrace_syscall_dir dir) { unsigned long ip; - if (!test_thread_flag(TIF_SYSCALL_TRACE)) - return scno; - if (!(current->ptrace & PT_PTRACED)) - return scno; - /* - * Save IP. IP is used to denote syscall entry/exit: - * IP = 0 -> entry, = 1 -> exit + * IP is used to denote syscall entry/exit: + * IP = 0 -> entry, =1 -> exit */ ip = regs->ARM_ip; - regs->ARM_ip = why; + regs->ARM_ip = dir; + if (dir == PTRACE_SYSCALL_EXIT) + tracehook_report_syscall_exit(regs, 0); + else if (tracehook_report_syscall_entry(regs)) + current_thread_info()->syscall = -1; + + regs->ARM_ip = ip; +} + +asmlinkage int syscall_trace_enter(struct pt_regs *regs, int scno) +{ current_thread_info()->syscall = scno; - /* the 0x80 provides a way for the tracing parent to distinguish - between a syscall stop and SIGTRAP delivery */ - ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) - ? 0x80 : 0)); + /* Do the secure computing check first; failures should be fast. */ + if (secure_computing(scno) == -1) + return -1; + + if (test_thread_flag(TIF_SYSCALL_TRACE)) + tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER); + + scno = current_thread_info()->syscall; + + if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) + trace_sys_enter(regs, scno); + + audit_syscall_entry(AUDIT_ARCH_ARM, scno, regs->ARM_r0, regs->ARM_r1, + regs->ARM_r2, regs->ARM_r3); + + return scno; +} + +asmlinkage void syscall_trace_exit(struct pt_regs *regs) +{ /* - * this isn't the same as continuing with a signal, but it will do - * for normal use. strace only continues with a signal if the - * stopping signal is not SIGTRAP. -brl + * Audit the syscall before anything else, as a debugger may + * come in and change the current registers. */ - if (current->exit_code) { - send_sig(current->exit_code, current, 1); - current->exit_code = 0; - } - regs->ARM_ip = ip; + audit_syscall_exit(regs); + + /* + * Note that we haven't updated the ->syscall field for the + * current thread. This isn't a problem because it will have + * been set on syscall entry and there hasn't been an opportunity + * for a PTRACE_SET_SYSCALL since then. + */ + if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) + trace_sys_exit(regs, regs_return_value(regs)); - return current_thread_info()->syscall; + if (test_thread_flag(TIF_SYSCALL_TRACE)) + tracehook_report_syscall(regs, PTRACE_SYSCALL_EXIT); } diff --git a/arch/arm/kernel/ptrace.h b/arch/arm/kernel/ptrace.h deleted file mode 100644 index 3926605b82e..00000000000 --- a/arch/arm/kernel/ptrace.h +++ /dev/null @@ -1,37 +0,0 @@ -/* - * linux/arch/arm/kernel/ptrace.h - * - * Copyright (C) 2000-2003 Russell King - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include <linux/ptrace.h> - -extern void ptrace_cancel_bpt(struct task_struct *); -extern void ptrace_set_bpt(struct task_struct *); -extern void ptrace_break(struct task_struct *, struct pt_regs *); - -/* - * Send SIGTRAP if we're single-stepping - */ -static inline void single_step_trap(struct task_struct *task) -{ - if (task->ptrace & PT_SINGLESTEP) { - ptrace_cancel_bpt(task); - send_sig(SIGTRAP, task, 1); - } -} - -static inline void single_step_clear(struct task_struct *task) -{ - if (task->ptrace & PT_SINGLESTEP) - ptrace_cancel_bpt(task); -} - -static inline void single_step_set(struct task_struct *task) -{ - if (task->ptrace & PT_SINGLESTEP) - ptrace_set_bpt(task); -} diff --git a/arch/arm/kernel/relocate_kernel.S b/arch/arm/kernel/relocate_kernel.S index fd26f8d6515..95858966d84 100644 --- a/arch/arm/kernel/relocate_kernel.S +++ b/arch/arm/kernel/relocate_kernel.S @@ -2,10 +2,12 @@ * relocate_kernel.S - put the kernel image in place to boot */ +#include <linux/linkage.h> #include <asm/kexec.h> - .globl relocate_new_kernel -relocate_new_kernel: + .align 3 /* not needed for this code, but keeps fncpy() happy */ + +ENTRY(relocate_new_kernel) ldr r0,kexec_indirection_page ldr r1,kexec_start_address @@ -57,7 +59,10 @@ relocate_new_kernel: mov r0,#0 ldr r1,kexec_mach_type ldr r2,kexec_boot_atags - mov pc,lr + ARM( mov pc, lr ) + THUMB( bx lr ) + + .align .globl kexec_start_address kexec_start_address: @@ -76,6 +81,8 @@ kexec_mach_type: kexec_boot_atags: .long 0x0 +ENDPROC(relocate_new_kernel) + relocate_new_kernel_end: .globl relocate_new_kernel_size diff --git a/arch/arm/kernel/return_address.c b/arch/arm/kernel/return_address.c index df246da4cec..fafedd86885 100644 --- a/arch/arm/kernel/return_address.c +++ b/arch/arm/kernel/return_address.c @@ -8,7 +8,8 @@ * under the terms of the GNU General Public License version 2 as published by * the Free Software Foundation. */ -#include <linux/module.h> +#include <linux/export.h> +#include <linux/ftrace.h> #if defined(CONFIG_FRAME_POINTER) && !defined(CONFIG_ARM_UNWIND) #include <linux/sched.h> @@ -25,7 +26,7 @@ static int save_return_addr(struct stackframe *frame, void *d) struct return_address_data *data = d; if (!data->level) { - data->addr = (void *)frame->lr; + data->addr = (void *)frame->pc; return 1; } else { @@ -40,7 +41,8 @@ void *return_address(unsigned int level) struct stackframe frame; register unsigned long current_sp asm ("sp"); - data.level = level + 1; + data.level = level + 2; + data.addr = NULL; frame.fp = (unsigned long)__builtin_frame_address(0); frame.sp = current_sp; diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 336f14e0e5c..8a16ee5d8a9 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -7,7 +7,7 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ -#include <linux/module.h> +#include <linux/export.h> #include <linux/kernel.h> #include <linux/stddef.h> #include <linux/ioport.h> @@ -18,22 +18,26 @@ #include <linux/bootmem.h> #include <linux/seq_file.h> #include <linux/screen_info.h> +#include <linux/of_platform.h> #include <linux/init.h> #include <linux/kexec.h> -#include <linux/crash_dump.h> -#include <linux/root_dev.h> +#include <linux/of_fdt.h> #include <linux/cpu.h> #include <linux/interrupt.h> #include <linux/smp.h> -#include <linux/fs.h> #include <linux/proc_fs.h> #include <linux/memblock.h> +#include <linux/bug.h> +#include <linux/compiler.h> +#include <linux/sort.h> #include <asm/unified.h> +#include <asm/cp15.h> #include <asm/cpu.h> #include <asm/cputype.h> #include <asm/elf.h> #include <asm/procinfo.h> +#include <asm/psci.h> #include <asm/sections.h> #include <asm/setup.h> #include <asm/smp_plat.h> @@ -42,21 +46,19 @@ #include <asm/cachetype.h> #include <asm/tlbflush.h> +#include <asm/prom.h> #include <asm/mach/arch.h> #include <asm/mach/irq.h> #include <asm/mach/time.h> +#include <asm/system_info.h> +#include <asm/system_misc.h> #include <asm/traps.h> #include <asm/unwind.h> +#include <asm/memblock.h> +#include <asm/virt.h> -#if defined(CONFIG_DEPRECATED_PARAM_STRUCT) -#include "compat.h" -#endif #include "atags.h" -#include "tcm.h" -#ifndef MEM_SIZE -#define MEM_SIZE (16*1024*1024) -#endif #if defined(CONFIG_FPE_NWFPE) || defined(CONFIG_FPE_FASTFPE) char fpe_type[8]; @@ -70,14 +72,19 @@ static int __init fpe_setup(char *line) __setup("fpe=", fpe_setup); #endif -extern void paging_init(struct machine_desc *desc); -extern void reboot_setup(char *str); +extern void init_default_cache_policy(unsigned long); +extern void paging_init(const struct machine_desc *desc); +extern void early_paging_init(const struct machine_desc *, + struct proc_info_list *); +extern void sanity_check_meminfo(void); +extern enum reboot_mode reboot_mode; +extern void setup_dma_zone(const struct machine_desc *desc); unsigned int processor_id; EXPORT_SYMBOL(processor_id); -unsigned int __machine_arch_type; +unsigned int __machine_arch_type __read_mostly; EXPORT_SYMBOL(__machine_arch_type); -unsigned int cacheid; +unsigned int cacheid __read_mostly; EXPORT_SYMBOL(cacheid); unsigned int __atags_pointer __initdata; @@ -91,34 +98,46 @@ EXPORT_SYMBOL(system_serial_low); unsigned int system_serial_high; EXPORT_SYMBOL(system_serial_high); -unsigned int elf_hwcap; +unsigned int elf_hwcap __read_mostly; EXPORT_SYMBOL(elf_hwcap); +unsigned int elf_hwcap2 __read_mostly; +EXPORT_SYMBOL(elf_hwcap2); + #ifdef MULTI_CPU -struct processor processor; +struct processor processor __read_mostly; #endif #ifdef MULTI_TLB -struct cpu_tlb_fns cpu_tlb; +struct cpu_tlb_fns cpu_tlb __read_mostly; #endif #ifdef MULTI_USER -struct cpu_user_fns cpu_user; +struct cpu_user_fns cpu_user __read_mostly; #endif #ifdef MULTI_CACHE -struct cpu_cache_fns cpu_cache; +struct cpu_cache_fns cpu_cache __read_mostly; #endif #ifdef CONFIG_OUTER_CACHE -struct outer_cache_fns outer_cache; +struct outer_cache_fns outer_cache __read_mostly; EXPORT_SYMBOL(outer_cache); #endif +/* + * Cached cpu_architecture() result for use by assembler code. + * C code should use the cpu_architecture() function instead of accessing this + * variable directly. + */ +int __cpu_architecture __read_mostly = CPU_ARCH_UNKNOWN; + struct stack { u32 irq[3]; u32 abt[3]; u32 und[3]; } ____cacheline_aligned; +#ifndef CONFIG_CPU_V7M static struct stack stacks[NR_CPUS]; +#endif char elf_platform[ELF_PLATFORM_SIZE]; EXPORT_SYMBOL(elf_platform); @@ -126,8 +145,8 @@ EXPORT_SYMBOL(elf_platform); static const char *cpu_name; static const char *machine_name; static char __initdata cmd_line[COMMAND_LINE_SIZE]; +const struct machine_desc *machine_desc __initdata; -static char default_command_line[COMMAND_LINE_SIZE] __initdata = CONFIG_CMDLINE; static union { char c[4]; unsigned long l; } endian_test __initdata = { { 'l', '?', '?', 'b' } }; #define ENDIANNESS ((char)endian_test.l) @@ -144,7 +163,7 @@ static struct resource mem_res[] = { .flags = IORESOURCE_MEM }, { - .name = "Kernel text", + .name = "Kernel code", .start = 0, .end = 0, .flags = IORESOURCE_MEM @@ -197,7 +216,7 @@ static const char *proc_arch[] = { "5TEJ", "6TEJ", "7", - "?(11)", + "7M", "?(12)", "?(13)", "?(14)", @@ -206,7 +225,13 @@ static const char *proc_arch[] = { "?(17)", }; -int cpu_architecture(void) +#ifdef CONFIG_CPU_V7M +static int __get_cpu_architecture(void) +{ + return CPU_ARCH_ARMv7M; +} +#else +static int __get_cpu_architecture(void) { int cpu_arch; @@ -225,8 +250,8 @@ int cpu_architecture(void) * Register 0 and check for VMSAv7 or PMSAv7 */ asm("mrc p15, 0, %0, c0, c1, 4" : "=r" (mmfr0)); - if ((mmfr0 & 0x0000000f) == 0x00000003 || - (mmfr0 & 0x000000f0) == 0x00000030) + if ((mmfr0 & 0x0000000f) >= 0x00000003 || + (mmfr0 & 0x000000f0) >= 0x00000030) cpu_arch = CPU_ARCH_ARMv7; else if ((mmfr0 & 0x0000000f) == 0x00000002 || (mmfr0 & 0x000000f0) == 0x00000020) @@ -238,12 +263,24 @@ int cpu_architecture(void) return cpu_arch; } +#endif + +int __pure cpu_architecture(void) +{ + BUG_ON(__cpu_architecture == CPU_ARCH_UNKNOWN); + + return __cpu_architecture; +} static int cpu_has_aliasing_icache(unsigned int arch) { int aliasing_icache; unsigned int id_reg, num_sets, line_size; + /* PIPT caches never alias. */ + if (icache_is_pipt()) + return 0; + /* arch specifies the register format */ switch (arch) { case CPU_ARCH_ARMv7: @@ -270,35 +307,45 @@ static int cpu_has_aliasing_icache(unsigned int arch) static void __init cacheid_init(void) { - unsigned int cachetype = read_cpuid_cachetype(); unsigned int arch = cpu_architecture(); - if (arch >= CPU_ARCH_ARMv6) { + if (arch == CPU_ARCH_ARMv7M) { + cacheid = 0; + } else if (arch >= CPU_ARCH_ARMv6) { + unsigned int cachetype = read_cpuid_cachetype(); if ((cachetype & (7 << 29)) == 4 << 29) { /* ARMv7 register format */ + arch = CPU_ARCH_ARMv7; cacheid = CACHEID_VIPT_NONALIASING; - if ((cachetype & (3 << 14)) == 1 << 14) + switch (cachetype & (3 << 14)) { + case (1 << 14): cacheid |= CACHEID_ASID_TAGGED; - else if (cpu_has_aliasing_icache(CPU_ARCH_ARMv7)) - cacheid |= CACHEID_VIPT_I_ALIASING; - } else if (cachetype & (1 << 23)) { - cacheid = CACHEID_VIPT_ALIASING; + break; + case (3 << 14): + cacheid |= CACHEID_PIPT; + break; + } } else { - cacheid = CACHEID_VIPT_NONALIASING; - if (cpu_has_aliasing_icache(CPU_ARCH_ARMv6)) - cacheid |= CACHEID_VIPT_I_ALIASING; + arch = CPU_ARCH_ARMv6; + if (cachetype & (1 << 23)) + cacheid = CACHEID_VIPT_ALIASING; + else + cacheid = CACHEID_VIPT_NONALIASING; } + if (cpu_has_aliasing_icache(arch)) + cacheid |= CACHEID_VIPT_I_ALIASING; } else { cacheid = CACHEID_VIVT; } - printk("CPU: %s data cache, %s instruction cache\n", + pr_info("CPU: %s data cache, %s instruction cache\n", cache_is_vivt() ? "VIVT" : cache_is_vipt_aliasing() ? "VIPT aliasing" : - cache_is_vipt_nonaliasing() ? "VIPT nonaliasing" : "unknown", + cache_is_vipt_nonaliasing() ? "PIPT / VIPT nonaliasing" : "unknown", cache_is_vivt() ? "VIVT" : icache_is_vivt_asid_tagged() ? "VIVT ASID tagged" : icache_is_vipt_aliasing() ? "VIPT aliasing" : + icache_is_pipt() ? "PIPT" : cache_is_vipt_nonaliasing() ? "VIPT nonaliasing" : "unknown"); } @@ -307,69 +354,58 @@ static void __init cacheid_init(void) * already provide the required functionality. */ extern struct proc_info_list *lookup_processor_type(unsigned int); -extern struct machine_desc *lookup_machine_type(unsigned int); -static void __init feat_v6_fixup(void) +void __init early_print(const char *str, ...) { - int id = read_cpuid_id(); + extern void printascii(const char *); + char buf[256]; + va_list ap; - if ((id & 0xff0f0000) != 0x41070000) - return; + va_start(ap, str); + vsnprintf(buf, sizeof(buf), str, ap); + va_end(ap); - /* - * HWCAP_TLS is available only on 1136 r1p0 and later, - * see also kuser_get_tls_init. - */ - if ((((id >> 4) & 0xfff) == 0xb36) && (((id >> 20) & 3) == 0)) - elf_hwcap &= ~HWCAP_TLS; +#ifdef CONFIG_DEBUG_LL + printascii(buf); +#endif + printk("%s", buf); } -static void __init setup_processor(void) +static void __init cpuid_init_hwcaps(void) { - struct proc_info_list *list; + unsigned int divide_instrs, vmsa; - /* - * locate processor in the list of supported processor - * types. The linker builds this table for us from the - * entries in arch/arm/mm/proc-*.S - */ - list = lookup_processor_type(read_cpuid_id()); - if (!list) { - printk("CPU configuration botched (ID %08x), unable " - "to continue.\n", read_cpuid_id()); - while (1); - } + if (cpu_architecture() < CPU_ARCH_ARMv7) + return; - cpu_name = list->cpu_name; + divide_instrs = (read_cpuid_ext(CPUID_EXT_ISAR0) & 0x0f000000) >> 24; -#ifdef MULTI_CPU - processor = *list->proc; -#endif -#ifdef MULTI_TLB - cpu_tlb = *list->tlb; -#endif -#ifdef MULTI_USER - cpu_user = *list->user; -#endif -#ifdef MULTI_CACHE - cpu_cache = *list->cache; -#endif + switch (divide_instrs) { + case 2: + elf_hwcap |= HWCAP_IDIVA; + case 1: + elf_hwcap |= HWCAP_IDIVT; + } - printk("CPU: %s [%08x] revision %d (ARMv%s), cr=%08lx\n", - cpu_name, read_cpuid_id(), read_cpuid_id() & 15, - proc_arch[cpu_architecture()], cr_alignment); + /* LPAE implies atomic ldrd/strd instructions */ + vmsa = (read_cpuid_ext(CPUID_EXT_MMFR0) & 0xf) >> 0; + if (vmsa >= 5) + elf_hwcap |= HWCAP_LPAE; +} - sprintf(init_utsname()->machine, "%s%c", list->arch_name, ENDIANNESS); - sprintf(elf_platform, "%s%c", list->elf_name, ENDIANNESS); - elf_hwcap = list->elf_hwcap; -#ifndef CONFIG_ARM_THUMB - elf_hwcap &= ~HWCAP_THUMB; -#endif +static void __init feat_v6_fixup(void) +{ + int id = read_cpuid_id(); - feat_v6_fixup(); + if ((id & 0xff0f0000) != 0x41070000) + return; - cacheid_init(); - cpu_proc_init(); + /* + * HWCAP_TLS is available only on 1136 r1p0 and later, + * see also kuser_get_tls_init. + */ + if ((((id >> 4) & 0xfff) == 0xb36) && (((id >> 20) & 3) == 0)) + elf_hwcap &= ~HWCAP_TLS; } /* @@ -377,17 +413,26 @@ static void __init setup_processor(void) * * cpu_init sets up the per-CPU stacks. */ -void cpu_init(void) +void notrace cpu_init(void) { +#ifndef CONFIG_CPU_V7M unsigned int cpu = smp_processor_id(); struct stack *stk = &stacks[cpu]; if (cpu >= NR_CPUS) { - printk(KERN_CRIT "CPU%u: bad primary CPU number\n", cpu); + pr_crit("CPU%u: bad primary CPU number\n", cpu); BUG(); } /* + * This only works on resume and secondary cores. For booting on the + * boot cpu, smp_prepare_boot_cpu is called after percpu area setup. + */ + set_my_cpu_offset(per_cpu_offset(cpu)); + + cpu_proc_init(); + + /* * Define the placement constraint for the inline asm directive below. * In Thumb-2, msr with an immediate value is not allowed. */ @@ -421,53 +466,224 @@ void cpu_init(void) "I" (offsetof(struct stack, und[0])), PLC (PSR_F_BIT | PSR_I_BIT | SVC_MODE) : "r14"); +#endif +} + +u32 __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = MPIDR_INVALID }; + +void __init smp_setup_processor_id(void) +{ + int i; + u32 mpidr = is_smp() ? read_cpuid_mpidr() & MPIDR_HWID_BITMASK : 0; + u32 cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0); + + cpu_logical_map(0) = cpu; + for (i = 1; i < nr_cpu_ids; ++i) + cpu_logical_map(i) = i == cpu ? 0 : i; + + /* + * clear __my_cpu_offset on boot CPU to avoid hang caused by + * using percpu variable early, for example, lockdep will + * access percpu variable inside lock_release + */ + set_my_cpu_offset(0); + + pr_info("Booting Linux on physical CPU 0x%x\n", mpidr); +} + +struct mpidr_hash mpidr_hash; +#ifdef CONFIG_SMP +/** + * smp_build_mpidr_hash - Pre-compute shifts required at each affinity + * level in order to build a linear index from an + * MPIDR value. Resulting algorithm is a collision + * free hash carried out through shifting and ORing + */ +static void __init smp_build_mpidr_hash(void) +{ + u32 i, affinity; + u32 fs[3], bits[3], ls, mask = 0; + /* + * Pre-scan the list of MPIDRS and filter out bits that do + * not contribute to affinity levels, ie they never toggle. + */ + for_each_possible_cpu(i) + mask |= (cpu_logical_map(i) ^ cpu_logical_map(0)); + pr_debug("mask of set bits 0x%x\n", mask); + /* + * Find and stash the last and first bit set at all affinity levels to + * check how many bits are required to represent them. + */ + for (i = 0; i < 3; i++) { + affinity = MPIDR_AFFINITY_LEVEL(mask, i); + /* + * Find the MSB bit and LSB bits position + * to determine how many bits are required + * to express the affinity level. + */ + ls = fls(affinity); + fs[i] = affinity ? ffs(affinity) - 1 : 0; + bits[i] = ls - fs[i]; + } + /* + * An index can be created from the MPIDR by isolating the + * significant bits at each affinity level and by shifting + * them in order to compress the 24 bits values space to a + * compressed set of values. This is equivalent to hashing + * the MPIDR through shifting and ORing. It is a collision free + * hash though not minimal since some levels might contain a number + * of CPUs that is not an exact power of 2 and their bit + * representation might contain holes, eg MPIDR[7:0] = {0x2, 0x80}. + */ + mpidr_hash.shift_aff[0] = fs[0]; + mpidr_hash.shift_aff[1] = MPIDR_LEVEL_BITS + fs[1] - bits[0]; + mpidr_hash.shift_aff[2] = 2*MPIDR_LEVEL_BITS + fs[2] - + (bits[1] + bits[0]); + mpidr_hash.mask = mask; + mpidr_hash.bits = bits[2] + bits[1] + bits[0]; + pr_debug("MPIDR hash: aff0[%u] aff1[%u] aff2[%u] mask[0x%x] bits[%u]\n", + mpidr_hash.shift_aff[0], + mpidr_hash.shift_aff[1], + mpidr_hash.shift_aff[2], + mpidr_hash.mask, + mpidr_hash.bits); + /* + * 4x is an arbitrary value used to warn on a hash table much bigger + * than expected on most systems. + */ + if (mpidr_hash_size() > 4 * num_possible_cpus()) + pr_warn("Large number of MPIDR hash buckets detected\n"); + sync_cache_w(&mpidr_hash); } +#endif -static struct machine_desc * __init setup_machine(unsigned int nr) +static void __init setup_processor(void) { - struct machine_desc *list; + struct proc_info_list *list; /* - * locate machine in the list of supported machines. + * locate processor in the list of supported processor + * types. The linker builds this table for us from the + * entries in arch/arm/mm/proc-*.S */ - list = lookup_machine_type(nr); + list = lookup_processor_type(read_cpuid_id()); if (!list) { - printk("Machine configuration botched (nr %d), unable " - "to continue.\n", nr); + pr_err("CPU configuration botched (ID %08x), unable to continue.\n", + read_cpuid_id()); while (1); } - printk("Machine: %s\n", list->name); + cpu_name = list->cpu_name; + __cpu_architecture = __get_cpu_architecture(); + +#ifdef MULTI_CPU + processor = *list->proc; +#endif +#ifdef MULTI_TLB + cpu_tlb = *list->tlb; +#endif +#ifdef MULTI_USER + cpu_user = *list->user; +#endif +#ifdef MULTI_CACHE + cpu_cache = *list->cache; +#endif + + pr_info("CPU: %s [%08x] revision %d (ARMv%s), cr=%08lx\n", + cpu_name, read_cpuid_id(), read_cpuid_id() & 15, + proc_arch[cpu_architecture()], get_cr()); + + snprintf(init_utsname()->machine, __NEW_UTS_LEN + 1, "%s%c", + list->arch_name, ENDIANNESS); + snprintf(elf_platform, ELF_PLATFORM_SIZE, "%s%c", + list->elf_name, ENDIANNESS); + elf_hwcap = list->elf_hwcap; + + cpuid_init_hwcaps(); + +#ifndef CONFIG_ARM_THUMB + elf_hwcap &= ~(HWCAP_THUMB | HWCAP_IDIVT); +#endif +#ifdef CONFIG_MMU + init_default_cache_policy(list->__cpu_mm_mmu_flags); +#endif + erratum_a15_798181_init(); + + feat_v6_fixup(); - return list; + cacheid_init(); + cpu_init(); } -static int __init arm_add_memory(unsigned long start, unsigned long size) +void __init dump_machine_table(void) { - struct membank *bank = &meminfo.bank[meminfo.nr_banks]; + const struct machine_desc *p; - if (meminfo.nr_banks >= NR_BANKS) { - printk(KERN_CRIT "NR_BANKS too low, " - "ignoring memory at %#lx\n", start); - return -EINVAL; - } + early_print("Available machine support:\n\nID (hex)\tNAME\n"); + for_each_machine_desc(p) + early_print("%08x\t%s\n", p->nr, p->name); + + early_print("\nPlease check your kernel config and/or bootloader.\n"); + + while (true) + /* can't use cpu_relax() here as it may require MMU setup */; +} + +int __init arm_add_memory(u64 start, u64 size) +{ + u64 aligned_start; /* * Ensure that start/size are aligned to a page boundary. * Size is appropriately rounded down, start is rounded up. */ size -= start & ~PAGE_MASK; - bank->start = PAGE_ALIGN(start); - bank->size = size & PAGE_MASK; + aligned_start = PAGE_ALIGN(start); + +#ifndef CONFIG_ARCH_PHYS_ADDR_T_64BIT + if (aligned_start > ULONG_MAX) { + pr_crit("Ignoring memory at 0x%08llx outside 32-bit physical address space\n", + (long long)start); + return -EINVAL; + } + + if (aligned_start + size > ULONG_MAX) { + pr_crit("Truncating memory at 0x%08llx to fit in 32-bit physical address space\n", + (long long)start); + /* + * To ensure bank->start + bank->size is representable in + * 32 bits, we use ULONG_MAX as the upper limit rather than 4GB. + * This means we lose a page after masking. + */ + size = ULONG_MAX - aligned_start; + } +#endif + + if (aligned_start < PHYS_OFFSET) { + if (aligned_start + size <= PHYS_OFFSET) { + pr_info("Ignoring memory below PHYS_OFFSET: 0x%08llx-0x%08llx\n", + aligned_start, aligned_start + size); + return -EINVAL; + } + + pr_info("Ignoring memory below PHYS_OFFSET: 0x%08llx-0x%08llx\n", + aligned_start, (u64)PHYS_OFFSET); + + size -= PHYS_OFFSET - aligned_start; + aligned_start = PHYS_OFFSET; + } + + start = aligned_start; + size = size & ~(phys_addr_t)(PAGE_SIZE - 1); /* * Check whether this memory region has non-zero size or * invalid node number. */ - if (bank->size == 0) + if (size == 0) return -EINVAL; - meminfo.nr_banks++; + memblock_add(start, size); return 0; } @@ -475,10 +691,12 @@ static int __init arm_add_memory(unsigned long start, unsigned long size) * Pick out the memory size. We look for mem=size@start, * where start and size are "size[KkMm]" */ + static int __init early_mem(char *p) { static int usermem __initdata = 0; - unsigned long size, start; + u64 size; + u64 start; char *endp; /* @@ -488,7 +706,8 @@ static int __init early_mem(char *p) */ if (usermem == 0) { usermem = 1; - meminfo.nr_banks = 0; + memblock_remove(memblock_start_of_DRAM(), + memblock_end_of_DRAM() - memblock_start_of_DRAM()); } start = PHYS_OFFSET; @@ -502,40 +721,21 @@ static int __init early_mem(char *p) } early_param("mem", early_mem); -static void __init -setup_ramdisk(int doload, int prompt, int image_start, unsigned int rd_sz) -{ -#ifdef CONFIG_BLK_DEV_RAM - extern int rd_size, rd_image_start, rd_prompt, rd_doload; - - rd_image_start = image_start; - rd_prompt = prompt; - rd_doload = doload; - - if (rd_sz) - rd_size = rd_sz; -#endif -} - -static void __init -request_standard_resources(struct meminfo *mi, struct machine_desc *mdesc) +static void __init request_standard_resources(const struct machine_desc *mdesc) { + struct memblock_region *region; struct resource *res; - int i; kernel_code.start = virt_to_phys(_text); kernel_code.end = virt_to_phys(_etext - 1); kernel_data.start = virt_to_phys(_sdata); kernel_data.end = virt_to_phys(_end - 1); - for (i = 0; i < mi->nr_banks; i++) { - if (mi->bank[i].size == 0) - continue; - - res = alloc_bootmem_low(sizeof(*res)); + for_each_memblock(memory, region) { + res = memblock_virt_alloc(sizeof(*res), 0); res->name = "System RAM"; - res->start = mi->bank[i].start; - res->end = mi->bank[i].start + mi->bank[i].size - 1; + res->start = __pfn_to_phys(memblock_region_memory_base_pfn(region)); + res->end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1; res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; request_resource(&iomem_resource, res); @@ -566,35 +766,6 @@ request_standard_resources(struct meminfo *mi, struct machine_desc *mdesc) request_resource(&ioport_resource, &lp2); } -/* - * Tag parsing. - * - * This is the new way of passing data to the kernel at boot time. Rather - * than passing a fixed inflexible structure to the kernel, we pass a list - * of variable-sized tags to the kernel. The first tag must be a ATAG_CORE - * tag for the list to be recognised (to distinguish the tagged list from - * a param_struct). The list is terminated with a zero-length tag (this tag - * is not parsed in any way). - */ -static int __init parse_tag_core(const struct tag *tag) -{ - if (tag->hdr.size > 2) { - if ((tag->u.core.flags & 1) == 0) - root_mountflags &= ~MS_RDONLY; - ROOT_DEV = old_decode_dev(tag->u.core.rootdev); - } - return 0; -} - -__tagtable(ATAG_CORE, parse_tag_core); - -static int __init parse_tag_mem32(const struct tag *tag) -{ - return arm_add_memory(tag->u.mem.start, tag->u.mem.size); -} - -__tagtable(ATAG_MEM, parse_tag_mem32); - #if defined(CONFIG_VGA_CONSOLE) || defined(CONFIG_DUMMY_CONSOLE) struct screen_info screen_info = { .orig_video_lines = 30, @@ -604,120 +775,34 @@ struct screen_info screen_info = { .orig_video_isVGA = 1, .orig_video_points = 8 }; - -static int __init parse_tag_videotext(const struct tag *tag) -{ - screen_info.orig_x = tag->u.videotext.x; - screen_info.orig_y = tag->u.videotext.y; - screen_info.orig_video_page = tag->u.videotext.video_page; - screen_info.orig_video_mode = tag->u.videotext.video_mode; - screen_info.orig_video_cols = tag->u.videotext.video_cols; - screen_info.orig_video_ega_bx = tag->u.videotext.video_ega_bx; - screen_info.orig_video_lines = tag->u.videotext.video_lines; - screen_info.orig_video_isVGA = tag->u.videotext.video_isvga; - screen_info.orig_video_points = tag->u.videotext.video_points; - return 0; -} - -__tagtable(ATAG_VIDEOTEXT, parse_tag_videotext); #endif -static int __init parse_tag_ramdisk(const struct tag *tag) -{ - setup_ramdisk((tag->u.ramdisk.flags & 1) == 0, - (tag->u.ramdisk.flags & 2) == 0, - tag->u.ramdisk.start, tag->u.ramdisk.size); - return 0; -} - -__tagtable(ATAG_RAMDISK, parse_tag_ramdisk); - -static int __init parse_tag_serialnr(const struct tag *tag) -{ - system_serial_low = tag->u.serialnr.low; - system_serial_high = tag->u.serialnr.high; - return 0; -} - -__tagtable(ATAG_SERIAL, parse_tag_serialnr); - -static int __init parse_tag_revision(const struct tag *tag) -{ - system_rev = tag->u.revision.rev; - return 0; -} - -__tagtable(ATAG_REVISION, parse_tag_revision); - -#ifndef CONFIG_CMDLINE_FORCE -static int __init parse_tag_cmdline(const struct tag *tag) +static int __init customize_machine(void) { - strlcpy(default_command_line, tag->u.cmdline.cmdline, COMMAND_LINE_SIZE); + /* + * customizes platform devices, or adds new ones + * On DT based machines, we fall back to populating the + * machine from the device tree, if no callback is provided, + * otherwise we would always need an init_machine callback. + */ + if (machine_desc->init_machine) + machine_desc->init_machine(); +#ifdef CONFIG_OF + else + of_platform_populate(NULL, of_default_bus_match_table, + NULL, NULL); +#endif return 0; } +arch_initcall(customize_machine); -__tagtable(ATAG_CMDLINE, parse_tag_cmdline); -#endif /* CONFIG_CMDLINE_FORCE */ - -/* - * Scan the tag table for this tag, and call its parse function. - * The tag table is built by the linker from all the __tagtable - * declarations. - */ -static int __init parse_tag(const struct tag *tag) -{ - extern struct tagtable __tagtable_begin, __tagtable_end; - struct tagtable *t; - - for (t = &__tagtable_begin; t < &__tagtable_end; t++) - if (tag->hdr.tag == t->tag) { - t->parse(tag); - break; - } - - return t < &__tagtable_end; -} - -/* - * Parse all tags in the list, checking both the global and architecture - * specific tag tables. - */ -static void __init parse_tags(const struct tag *t) -{ - for (; t->hdr.size; t = tag_next(t)) - if (!parse_tag(t)) - printk(KERN_WARNING - "Ignoring unrecognised tag 0x%08x\n", - t->hdr.tag); -} - -/* - * This holds our defaults. - */ -static struct init_tags { - struct tag_header hdr1; - struct tag_core core; - struct tag_header hdr2; - struct tag_mem32 mem; - struct tag_header hdr3; -} init_tags __initdata = { - { tag_size(tag_core), ATAG_CORE }, - { 1, PAGE_SIZE, 0xff }, - { tag_size(tag_mem32), ATAG_MEM }, - { MEM_SIZE, PHYS_OFFSET }, - { 0, ATAG_NONE } -}; - -static void (*init_machine)(void) __initdata; - -static int __init customize_machine(void) +static int __init init_machine_late(void) { - /* customizes platform devices, or adds new ones */ - if (init_machine) - init_machine(); + if (machine_desc->init_late) + machine_desc->init_late(); return 0; } -arch_initcall(customize_machine); +late_initcall(init_machine_late); #ifdef CONFIG_KEXEC static inline unsigned long long get_total_mem(void) @@ -747,18 +832,17 @@ static void __init reserve_crashkernel(void) if (ret) return; - ret = reserve_bootmem(crash_base, crash_size, BOOTMEM_EXCLUSIVE); + ret = memblock_reserve(crash_base, crash_size); if (ret < 0) { - printk(KERN_WARNING "crashkernel reservation failed - " - "memory is in use (0x%lx)\n", (unsigned long)crash_base); + pr_warn("crashkernel reservation failed - memory is in use (0x%lx)\n", + (unsigned long)crash_base); return; } - printk(KERN_INFO "Reserving %ldMB of memory at %ldMB " - "for crashkernel (System RAM: %ldMB)\n", - (unsigned long)(crash_size >> 20), - (unsigned long)(crash_base >> 20), - (unsigned long)(total_mem >> 20)); + pr_info("Reserving %ldMB of memory at %ldMB for crashkernel (System RAM: %ldMB)\n", + (unsigned long)(crash_size >> 20), + (unsigned long)(crash_base >> 20), + (unsigned long)(total_mem >> 20)); crashk_res.start = crash_base; crashk_res.end = crash_base + crash_size - 1; @@ -768,113 +852,84 @@ static void __init reserve_crashkernel(void) static inline void reserve_crashkernel(void) {} #endif /* CONFIG_KEXEC */ -/* - * Note: elfcorehdr_addr is not just limited to vmcore. It is also used by - * is_kdump_kernel() to determine if we are booting after a panic. Hence - * ifdef it under CONFIG_CRASH_DUMP and not CONFIG_PROC_VMCORE. - */ - -#ifdef CONFIG_CRASH_DUMP -/* - * elfcorehdr= specifies the location of elf core header stored by the crashed - * kernel. This option will be passed by kexec loader to the capture kernel. - */ -static int __init setup_elfcorehdr(char *arg) -{ - char *end; - - if (!arg) - return -EINVAL; - - elfcorehdr_addr = memparse(arg, &end); - return end > arg ? 0 : -EINVAL; -} -early_param("elfcorehdr", setup_elfcorehdr); -#endif /* CONFIG_CRASH_DUMP */ - -static void __init squash_mem_tags(struct tag *tag) +void __init hyp_mode_check(void) { - for (; tag->hdr.size; tag = tag_next(tag)) - if (tag->hdr.tag == ATAG_MEM) - tag->hdr.tag = ATAG_NONE; +#ifdef CONFIG_ARM_VIRT_EXT + sync_boot_mode(); + + if (is_hyp_mode_available()) { + pr_info("CPU: All CPU(s) started in HYP mode.\n"); + pr_info("CPU: Virtualization extensions available.\n"); + } else if (is_hyp_mode_mismatched()) { + pr_warn("CPU: WARNING: CPU(s) started in wrong/inconsistent modes (primary CPU mode 0x%x)\n", + __boot_cpu_mode & MODE_MASK); + pr_warn("CPU: This may indicate a broken bootloader or firmware.\n"); + } else + pr_info("CPU: All CPU(s) started in SVC mode.\n"); +#endif } void __init setup_arch(char **cmdline_p) { - struct tag *tags = (struct tag *)&init_tags; - struct machine_desc *mdesc; - char *from = default_command_line; - - unwind_init(); + const struct machine_desc *mdesc; setup_processor(); - mdesc = setup_machine(machine_arch_type); + mdesc = setup_machine_fdt(__atags_pointer); + if (!mdesc) + mdesc = setup_machine_tags(__atags_pointer, __machine_arch_type); + machine_desc = mdesc; machine_name = mdesc->name; - if (mdesc->soft_reboot) - reboot_setup("s"); - - if (__atags_pointer) - tags = phys_to_virt(__atags_pointer); - else if (mdesc->boot_params) - tags = phys_to_virt(mdesc->boot_params); - -#if defined(CONFIG_DEPRECATED_PARAM_STRUCT) - /* - * If we have the old style parameters, convert them to - * a tag list. - */ - if (tags->hdr.tag != ATAG_CORE) - convert_to_tag_list(tags); -#endif - if (tags->hdr.tag != ATAG_CORE) - tags = (struct tag *)&init_tags; - - if (mdesc->fixup) - mdesc->fixup(mdesc, tags, &from, &meminfo); - - if (tags->hdr.tag == ATAG_CORE) { - if (meminfo.nr_banks != 0) - squash_mem_tags(tags); - save_atags(tags); - parse_tags(tags); - } + if (mdesc->reboot_mode != REBOOT_HARD) + reboot_mode = mdesc->reboot_mode; init_mm.start_code = (unsigned long) _text; init_mm.end_code = (unsigned long) _etext; init_mm.end_data = (unsigned long) _edata; init_mm.brk = (unsigned long) _end; - /* parse_early_param needs a boot_command_line */ - strlcpy(boot_command_line, from, COMMAND_LINE_SIZE); - /* populate cmd_line too for later use, preserving boot_command_line */ strlcpy(cmd_line, boot_command_line, COMMAND_LINE_SIZE); *cmdline_p = cmd_line; parse_early_param(); - arm_memblock_init(&meminfo, mdesc); + early_paging_init(mdesc, lookup_processor_type(read_cpuid_id())); + setup_dma_zone(mdesc); + sanity_check_meminfo(); + arm_memblock_init(mdesc); paging_init(mdesc); - request_standard_resources(&meminfo, mdesc); + request_standard_resources(mdesc); + + if (mdesc->restart) + arm_pm_restart = mdesc->restart; + unflatten_device_tree(); + + arm_dt_init_cpu_maps(); + psci_init(); #ifdef CONFIG_SMP - if (is_smp()) + if (is_smp()) { + if (!mdesc->smp_init || !mdesc->smp_init()) { + if (psci_smp_available()) + smp_set_ops(&psci_smp_ops); + else if (mdesc->smp) + smp_set_ops(mdesc->smp); + } smp_init_cpus(); + smp_build_mpidr_hash(); + } #endif - reserve_crashkernel(); - cpu_init(); - tcm_init(); + if (!is_smp()) + hyp_mode_check(); - /* - * Set up various architecture-specific pointers - */ - arch_nr_irqs = mdesc->nr_irqs; - init_arch_irq = mdesc->init_irq; - system_timer = mdesc->timer; - init_machine = mdesc->init_machine; + reserve_crashkernel(); + +#ifdef CONFIG_MULTI_IRQ_HANDLER + handle_arch_irq = mdesc->handle_irq; +#endif #ifdef CONFIG_VT #if defined(CONFIG_VGA_CONSOLE) @@ -883,7 +938,9 @@ void __init setup_arch(char **cmdline_p) conswitchp = &dummy_con; #endif #endif - early_trap_init(); + + if (mdesc->init_early) + mdesc->init_early(); } @@ -930,17 +987,30 @@ static const char *hwcap_str[] = { "neon", "vfpv3", "vfpv3d16", + "tls", + "vfpv4", + "idiva", + "idivt", + "vfpd32", + "lpae", + "evtstrm", + NULL +}; + +static const char *hwcap2_str[] = { + "aes", + "pmull", + "sha1", + "sha2", + "crc32", NULL }; static int c_show(struct seq_file *m, void *v) { - int i; + int i, j; + u32 cpuid; - seq_printf(m, "Processor\t: %s rev %d (%s)\n", - cpu_name, read_cpuid_id() & 15, elf_platform); - -#if defined(CONFIG_SMP) for_each_online_cpu(i) { /* * glibc reads /proc/cpuinfo to determine the number of @@ -948,45 +1018,43 @@ static int c_show(struct seq_file *m, void *v) * "processor". Give glibc what it expects. */ seq_printf(m, "processor\t: %d\n", i); - seq_printf(m, "BogoMIPS\t: %lu.%02lu\n\n", - per_cpu(cpu_data, i).loops_per_jiffy / (500000UL/HZ), - (per_cpu(cpu_data, i).loops_per_jiffy / (5000UL/HZ)) % 100); - } -#else /* CONFIG_SMP */ - seq_printf(m, "BogoMIPS\t: %lu.%02lu\n", - loops_per_jiffy / (500000/HZ), - (loops_per_jiffy / (5000/HZ)) % 100); -#endif + cpuid = is_smp() ? per_cpu(cpu_data, i).cpuid : read_cpuid_id(); + seq_printf(m, "model name\t: %s rev %d (%s)\n", + cpu_name, cpuid & 15, elf_platform); - /* dump out the processor features */ - seq_puts(m, "Features\t: "); + /* dump out the processor features */ + seq_puts(m, "Features\t: "); - for (i = 0; hwcap_str[i]; i++) - if (elf_hwcap & (1 << i)) - seq_printf(m, "%s ", hwcap_str[i]); + for (j = 0; hwcap_str[j]; j++) + if (elf_hwcap & (1 << j)) + seq_printf(m, "%s ", hwcap_str[j]); - seq_printf(m, "\nCPU implementer\t: 0x%02x\n", read_cpuid_id() >> 24); - seq_printf(m, "CPU architecture: %s\n", proc_arch[cpu_architecture()]); + for (j = 0; hwcap2_str[j]; j++) + if (elf_hwcap2 & (1 << j)) + seq_printf(m, "%s ", hwcap2_str[j]); - if ((read_cpuid_id() & 0x0008f000) == 0x00000000) { - /* pre-ARM7 */ - seq_printf(m, "CPU part\t: %07x\n", read_cpuid_id() >> 4); - } else { - if ((read_cpuid_id() & 0x0008f000) == 0x00007000) { - /* ARM7 */ - seq_printf(m, "CPU variant\t: 0x%02x\n", - (read_cpuid_id() >> 16) & 127); + seq_printf(m, "\nCPU implementer\t: 0x%02x\n", cpuid >> 24); + seq_printf(m, "CPU architecture: %s\n", + proc_arch[cpu_architecture()]); + + if ((cpuid & 0x0008f000) == 0x00000000) { + /* pre-ARM7 */ + seq_printf(m, "CPU part\t: %07x\n", cpuid >> 4); } else { - /* post-ARM7 */ - seq_printf(m, "CPU variant\t: 0x%x\n", - (read_cpuid_id() >> 20) & 15); + if ((cpuid & 0x0008f000) == 0x00007000) { + /* ARM7 */ + seq_printf(m, "CPU variant\t: 0x%02x\n", + (cpuid >> 16) & 127); + } else { + /* post-ARM7 */ + seq_printf(m, "CPU variant\t: 0x%x\n", + (cpuid >> 20) & 15); + } + seq_printf(m, "CPU part\t: 0x%03x\n", + (cpuid >> 4) & 0xfff); } - seq_printf(m, "CPU part\t: 0x%03x\n", - (read_cpuid_id() >> 4) & 0xfff); + seq_printf(m, "CPU revision\t: %d\n\n", cpuid & 15); } - seq_printf(m, "CPU revision\t: %d\n", read_cpuid_id() & 15); - - seq_puts(m, "\n"); seq_printf(m, "Hardware\t: %s\n", machine_name); seq_printf(m, "Revision\t: %04x\n", system_rev); diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index 907d5a620bc..bd198343720 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -8,109 +8,23 @@ * published by the Free Software Foundation. */ #include <linux/errno.h> +#include <linux/random.h> #include <linux/signal.h> #include <linux/personality.h> -#include <linux/freezer.h> #include <linux/uaccess.h> #include <linux/tracehook.h> +#include <linux/uprobes.h> #include <asm/elf.h> #include <asm/cacheflush.h> +#include <asm/traps.h> #include <asm/ucontext.h> #include <asm/unistd.h> #include <asm/vfp.h> -#include "ptrace.h" -#include "signal.h" +extern const unsigned long sigreturn_codes[7]; -#define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) - -/* - * For ARM syscalls, we encode the syscall number into the instruction. - */ -#define SWI_SYS_SIGRETURN (0xef000000|(__NR_sigreturn)|(__NR_OABI_SYSCALL_BASE)) -#define SWI_SYS_RT_SIGRETURN (0xef000000|(__NR_rt_sigreturn)|(__NR_OABI_SYSCALL_BASE)) -#define SWI_SYS_RESTART (0xef000000|__NR_restart_syscall|__NR_OABI_SYSCALL_BASE) - -/* - * With EABI, the syscall number has to be loaded into r7. - */ -#define MOV_R7_NR_SIGRETURN (0xe3a07000 | (__NR_sigreturn - __NR_SYSCALL_BASE)) -#define MOV_R7_NR_RT_SIGRETURN (0xe3a07000 | (__NR_rt_sigreturn - __NR_SYSCALL_BASE)) - -/* - * For Thumb syscalls, we pass the syscall number via r7. We therefore - * need two 16-bit instructions. - */ -#define SWI_THUMB_SIGRETURN (0xdf00 << 16 | 0x2700 | (__NR_sigreturn - __NR_SYSCALL_BASE)) -#define SWI_THUMB_RT_SIGRETURN (0xdf00 << 16 | 0x2700 | (__NR_rt_sigreturn - __NR_SYSCALL_BASE)) - -const unsigned long sigreturn_codes[7] = { - MOV_R7_NR_SIGRETURN, SWI_SYS_SIGRETURN, SWI_THUMB_SIGRETURN, - MOV_R7_NR_RT_SIGRETURN, SWI_SYS_RT_SIGRETURN, SWI_THUMB_RT_SIGRETURN, -}; - -/* - * Either we support OABI only, or we have EABI with the OABI - * compat layer enabled. In the later case we don't know if - * user space is EABI or not, and if not we must not clobber r7. - * Always using the OABI syscall solves that issue and works for - * all those cases. - */ -const unsigned long syscall_restart_code[2] = { - SWI_SYS_RESTART, /* swi __NR_restart_syscall */ - 0xe49df004, /* ldr pc, [sp], #4 */ -}; - -/* - * atomically swap in the new signal mask, and wait for a signal. - */ -asmlinkage int sys_sigsuspend(int restart, unsigned long oldmask, old_sigset_t mask) -{ - mask &= _BLOCKABLE; - spin_lock_irq(¤t->sighand->siglock); - current->saved_sigmask = current->blocked; - siginitset(¤t->blocked, mask); - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - - current->state = TASK_INTERRUPTIBLE; - schedule(); - set_restore_sigmask(); - return -ERESTARTNOHAND; -} - -asmlinkage int -sys_sigaction(int sig, const struct old_sigaction __user *act, - struct old_sigaction __user *oact) -{ - struct k_sigaction new_ka, old_ka; - int ret; - - if (act) { - old_sigset_t mask; - if (!access_ok(VERIFY_READ, act, sizeof(*act)) || - __get_user(new_ka.sa.sa_handler, &act->sa_handler) || - __get_user(new_ka.sa.sa_restorer, &act->sa_restorer)) - return -EFAULT; - __get_user(new_ka.sa.sa_flags, &act->sa_flags); - __get_user(mask, &act->sa_mask); - siginitset(&new_ka.sa.sa_mask, mask); - } - - ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); - - if (!ret && oact) { - if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) || - __put_user(old_ka.sa.sa_handler, &oact->sa_handler) || - __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer)) - return -EFAULT; - __put_user(old_ka.sa.sa_flags, &oact->sa_flags); - __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask); - } - - return ret; -} +static unsigned long signal_return_offset; #ifdef CONFIG_CRUNCH static int preserve_crunch_context(struct crunch_sigframe __user *frame) @@ -180,44 +94,23 @@ static int restore_iwmmxt_context(struct iwmmxt_sigframe *frame) static int preserve_vfp_context(struct vfp_sigframe __user *frame) { - struct thread_info *thread = current_thread_info(); - struct vfp_hard_struct *h = &thread->vfpstate.hard; const unsigned long magic = VFP_MAGIC; const unsigned long size = VFP_STORAGE_SIZE; int err = 0; - vfp_sync_hwstate(thread); __put_user_error(magic, &frame->magic, err); __put_user_error(size, &frame->size, err); - /* - * Copy the floating point registers. There can be unused - * registers see asm/hwcap.h for details. - */ - err |= __copy_to_user(&frame->ufp.fpregs, &h->fpregs, - sizeof(h->fpregs)); - /* - * Copy the status and control register. - */ - __put_user_error(h->fpscr, &frame->ufp.fpscr, err); - - /* - * Copy the exception registers. - */ - __put_user_error(h->fpexc, &frame->ufp_exc.fpexc, err); - __put_user_error(h->fpinst, &frame->ufp_exc.fpinst, err); - __put_user_error(h->fpinst2, &frame->ufp_exc.fpinst2, err); + if (err) + return -EFAULT; - return err ? -EFAULT : 0; + return vfp_preserve_user_clear_hwstate(&frame->ufp, &frame->ufp_exc); } static int restore_vfp_context(struct vfp_sigframe __user *frame) { - struct thread_info *thread = current_thread_info(); - struct vfp_hard_struct *h = &thread->vfpstate.hard; unsigned long magic; unsigned long size; - unsigned long fpexc; int err = 0; __get_user_error(magic, &frame->magic, err); @@ -228,34 +121,7 @@ static int restore_vfp_context(struct vfp_sigframe __user *frame) if (magic != VFP_MAGIC || size != VFP_STORAGE_SIZE) return -EINVAL; - /* - * Copy the floating point registers. There can be unused - * registers see asm/hwcap.h for details. - */ - err |= __copy_from_user(&h->fpregs, &frame->ufp.fpregs, - sizeof(h->fpregs)); - /* - * Copy the status and control register. - */ - __get_user_error(h->fpscr, &frame->ufp.fpscr, err); - - /* - * Sanitise and restore the exception registers. - */ - __get_user_error(fpexc, &frame->ufp_exc.fpexc, err); - /* Ensure the VFP is enabled. */ - fpexc |= FPEXC_EN; - /* Ensure FPINST2 is invalid and the exception flag is cleared. */ - fpexc &= ~(FPEXC_EX | FPEXC_FP2V); - h->fpexc = fpexc; - - __get_user_error(h->fpinst, &frame->ufp_exc.fpinst, err); - __get_user_error(h->fpinst2, &frame->ufp_exc.fpinst2, err); - - if (!err) - vfp_flush_hwstate(thread); - - return err ? -EFAULT : 0; + return vfp_restore_user_hwstate(&frame->ufp, &frame->ufp_exc); } #endif @@ -280,13 +146,8 @@ static int restore_sigframe(struct pt_regs *regs, struct sigframe __user *sf) int err; err = __copy_from_user(&set, &sf->uc.uc_sigmask, sizeof(set)); - if (err == 0) { - sigdelsetmask(&set, ~_BLOCKABLE); - spin_lock_irq(¤t->sighand->siglock); - current->blocked = set; - recalc_sigpending(); - spin_unlock_irq(¤t->sighand->siglock); - } + if (err == 0) + set_current_blocked(&set); __get_user_error(regs->ARM_r0, &sf->uc.uc_mcontext.arm_r0, err); __get_user_error(regs->ARM_r1, &sf->uc.uc_mcontext.arm_r1, err); @@ -348,8 +209,6 @@ asmlinkage int sys_sigreturn(struct pt_regs *regs) if (restore_sigframe(regs, frame)) goto badframe; - single_step_trap(current); - return regs->ARM_r0; badframe: @@ -380,11 +239,9 @@ asmlinkage int sys_rt_sigreturn(struct pt_regs *regs) if (restore_sigframe(regs, &frame->sig)) goto badframe; - if (do_sigaltstack(&frame->sig.uc.uc_stack, NULL, regs->ARM_sp) == -EFAULT) + if (restore_altstack(&frame->sig.uc.uc_stack)) goto badframe; - single_step_trap(current); - return regs->ARM_r0; badframe: @@ -442,18 +299,12 @@ setup_sigframe(struct sigframe __user *sf, struct pt_regs *regs, sigset_t *set) } static inline void __user * -get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, int framesize) +get_sigframe(struct ksignal *ksig, struct pt_regs *regs, int framesize) { - unsigned long sp = regs->ARM_sp; + unsigned long sp = sigsp(regs->ARM_sp, ksig); void __user *frame; /* - * This is the X/Open sanctioned signal stack switching. - */ - if ((ka->sa.sa_flags & SA_ONSTACK) && !sas_ss_flags(sp)) - sp = current->sas_ss_sp + current->sas_ss_size; - - /* * ATPCS B01 mandates 8-byte alignment */ frame = (void __user *)((sp - framesize) & ~7); @@ -467,19 +318,32 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, int framesize) return frame; } +/* + * translate the signal + */ +static inline int map_sig(int sig) +{ + struct thread_info *thread = current_thread_info(); + if (sig < 32 && thread->exec_domain && thread->exec_domain->signal_invmap) + sig = thread->exec_domain->signal_invmap[sig]; + return sig; +} + static int -setup_return(struct pt_regs *regs, struct k_sigaction *ka, - unsigned long __user *rc, void __user *frame, int usig) +setup_return(struct pt_regs *regs, struct ksignal *ksig, + unsigned long __user *rc, void __user *frame) { - unsigned long handler = (unsigned long)ka->sa.sa_handler; + unsigned long handler = (unsigned long)ksig->ka.sa.sa_handler; unsigned long retcode; int thumb = 0; - unsigned long cpsr = regs->ARM_cpsr & ~PSR_f; + unsigned long cpsr = regs->ARM_cpsr & ~(PSR_f | PSR_E_BIT); + + cpsr |= PSR_ENDSTATE; /* * Maybe we need to deliver a 32-bit signal to a 26-bit task. */ - if (ka->sa.sa_flags & SA_THIRTYTWO) + if (ksig->ka.sa.sa_flags & SA_THIRTYTWO) cpsr = (cpsr & ~MODE_MASK) | USR_MODE; #ifdef CONFIG_ARM_THUMB @@ -490,36 +354,53 @@ setup_return(struct pt_regs *regs, struct k_sigaction *ka, */ thumb = handler & 1; - if (thumb) { - cpsr |= PSR_T_BIT; #if __LINUX_ARM_ARCH__ >= 7 - /* clear the If-Then Thumb-2 execution state */ - cpsr &= ~PSR_IT_MASK; + /* + * Clear the If-Then Thumb-2 execution state + * ARM spec requires this to be all 000s in ARM mode + * Snapdragon S4/Krait misbehaves on a Thumb=>ARM + * signal transition without this. + */ + cpsr &= ~PSR_IT_MASK; #endif + + if (thumb) { + cpsr |= PSR_T_BIT; } else cpsr &= ~PSR_T_BIT; } #endif - if (ka->sa.sa_flags & SA_RESTORER) { - retcode = (unsigned long)ka->sa.sa_restorer; + if (ksig->ka.sa.sa_flags & SA_RESTORER) { + retcode = (unsigned long)ksig->ka.sa.sa_restorer; } else { unsigned int idx = thumb << 1; - if (ka->sa.sa_flags & SA_SIGINFO) + if (ksig->ka.sa.sa_flags & SA_SIGINFO) idx += 3; + /* + * Put the sigreturn code on the stack no matter which return + * mechanism we use in order to remain ABI compliant + */ if (__put_user(sigreturn_codes[idx], rc) || __put_user(sigreturn_codes[idx+1], rc+1)) return 1; +#ifdef CONFIG_MMU if (cpsr & MODE32_BIT) { + struct mm_struct *mm = current->mm; + /* - * 32-bit code can use the new high-page - * signal return code support. + * 32-bit code can use the signal return page + * except when the MPU has protected the vectors + * page from PL0 */ - retcode = KERN_SIGRETURN_CODE + (idx << 2) + thumb; - } else { + retcode = mm->context.sigpage + signal_return_offset + + (idx << 2) + thumb; + } else +#endif + { /* * Ensure that the instruction cache sees * the return code written onto the stack. @@ -531,7 +412,7 @@ setup_return(struct pt_regs *regs, struct k_sigaction *ka, } } - regs->ARM_r0 = usig; + regs->ARM_r0 = map_sig(ksig->sig); regs->ARM_sp = (unsigned long)frame; regs->ARM_lr = retcode; regs->ARM_pc = handler; @@ -541,9 +422,9 @@ setup_return(struct pt_regs *regs, struct k_sigaction *ka, } static int -setup_frame(int usig, struct k_sigaction *ka, sigset_t *set, struct pt_regs *regs) +setup_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs) { - struct sigframe __user *frame = get_sigframe(ka, regs, sizeof(*frame)); + struct sigframe __user *frame = get_sigframe(ksig, regs, sizeof(*frame)); int err = 0; if (!frame) @@ -556,36 +437,29 @@ setup_frame(int usig, struct k_sigaction *ka, sigset_t *set, struct pt_regs *reg err |= setup_sigframe(frame, regs, set); if (err == 0) - err = setup_return(regs, ka, frame->retcode, frame, usig); + err = setup_return(regs, ksig, frame->retcode, frame); return err; } static int -setup_rt_frame(int usig, struct k_sigaction *ka, siginfo_t *info, - sigset_t *set, struct pt_regs *regs) +setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs) { - struct rt_sigframe __user *frame = get_sigframe(ka, regs, sizeof(*frame)); - stack_t stack; + struct rt_sigframe __user *frame = get_sigframe(ksig, regs, sizeof(*frame)); int err = 0; if (!frame) return 1; - err |= copy_siginfo_to_user(&frame->info, info); + err |= copy_siginfo_to_user(&frame->info, &ksig->info); __put_user_error(0, &frame->sig.uc.uc_flags, err); __put_user_error(NULL, &frame->sig.uc.uc_link, err); - memset(&stack, 0, sizeof(stack)); - stack.ss_sp = (void __user *)current->sas_ss_sp; - stack.ss_flags = sas_ss_flags(regs->ARM_sp); - stack.ss_size = current->sas_ss_size; - err |= __copy_to_user(&frame->sig.uc.uc_stack, &stack, sizeof(stack)); - + err |= __save_altstack(&frame->sig.uc.uc_stack, regs->ARM_sp); err |= setup_sigframe(&frame->sig, regs, set); if (err == 0) - err = setup_return(regs, ka, frame->sig.retcode, frame, usig); + err = setup_return(regs, ksig, frame->sig.retcode, frame); if (err == 0) { /* @@ -600,81 +474,28 @@ setup_rt_frame(int usig, struct k_sigaction *ka, siginfo_t *info, return err; } -static inline void setup_syscall_restart(struct pt_regs *regs) -{ - regs->ARM_r0 = regs->ARM_ORIG_r0; - regs->ARM_pc -= thumb_mode(regs) ? 2 : 4; -} - /* * OK, we're invoking a handler */ -static int -handle_signal(unsigned long sig, struct k_sigaction *ka, - siginfo_t *info, sigset_t *oldset, - struct pt_regs * regs, int syscall) +static void handle_signal(struct ksignal *ksig, struct pt_regs *regs) { - struct thread_info *thread = current_thread_info(); - struct task_struct *tsk = current; - int usig = sig; + sigset_t *oldset = sigmask_to_save(); int ret; /* - * If we were from a system call, check for system call restarting... - */ - if (syscall) { - switch (regs->ARM_r0) { - case -ERESTART_RESTARTBLOCK: - case -ERESTARTNOHAND: - regs->ARM_r0 = -EINTR; - break; - case -ERESTARTSYS: - if (!(ka->sa.sa_flags & SA_RESTART)) { - regs->ARM_r0 = -EINTR; - break; - } - /* fallthrough */ - case -ERESTARTNOINTR: - setup_syscall_restart(regs); - } - } - - /* - * translate the signal - */ - if (usig < 32 && thread->exec_domain && thread->exec_domain->signal_invmap) - usig = thread->exec_domain->signal_invmap[usig]; - - /* * Set up the stack frame */ - if (ka->sa.sa_flags & SA_SIGINFO) - ret = setup_rt_frame(usig, ka, info, oldset, regs); + if (ksig->ka.sa.sa_flags & SA_SIGINFO) + ret = setup_rt_frame(ksig, oldset, regs); else - ret = setup_frame(usig, ka, oldset, regs); + ret = setup_frame(ksig, oldset, regs); /* * Check that the resulting registers are actually sane. */ ret |= !valid_user_regs(regs); - if (ret != 0) { - force_sigsegv(sig, tsk); - return ret; - } - - /* - * Block the signal if we were successful. - */ - spin_lock_irq(&tsk->sighand->siglock); - sigorsets(&tsk->blocked, &tsk->blocked, - &ka->sa.sa_mask); - if (!(ka->sa.sa_flags & SA_NODEFER)) - sigaddset(&tsk->blocked, sig); - recalc_sigpending(); - spin_unlock_irq(&tsk->sighand->siglock); - - return 0; + signal_setup_done(ret, ksig, 0); } /* @@ -686,103 +507,130 @@ handle_signal(unsigned long sig, struct k_sigaction *ka, * the kernel can handle, and then we build all the user-level signal handling * stack-frames in one go after that. */ -static void do_signal(struct pt_regs *regs, int syscall) +static int do_signal(struct pt_regs *regs, int syscall) { - struct k_sigaction ka; - siginfo_t info; - int signr; + unsigned int retval = 0, continue_addr = 0, restart_addr = 0; + struct ksignal ksig; + int restart = 0; /* - * We want the common case to go fast, which - * is why we may in certain cases get here from - * kernel mode. Just return without doing anything - * if so. + * If we were from a system call, check for system call restarting... */ - if (!user_mode(regs)) - return; - - if (try_to_freeze()) - goto no_signal; - - single_step_clear(current); - - signr = get_signal_to_deliver(&info, &ka, regs, NULL); - if (signr > 0) { - sigset_t *oldset; + if (syscall) { + continue_addr = regs->ARM_pc; + restart_addr = continue_addr - (thumb_mode(regs) ? 2 : 4); + retval = regs->ARM_r0; - if (test_thread_flag(TIF_RESTORE_SIGMASK)) - oldset = ¤t->saved_sigmask; - else - oldset = ¤t->blocked; - if (handle_signal(signr, &ka, &info, oldset, regs, syscall) == 0) { - /* - * A signal was successfully delivered; the saved - * sigmask will have been stored in the signal frame, - * and will be restored by sigreturn, so we can simply - * clear the TIF_RESTORE_SIGMASK flag. - */ - if (test_thread_flag(TIF_RESTORE_SIGMASK)) - clear_thread_flag(TIF_RESTORE_SIGMASK); + /* + * Prepare for system call restart. We do this here so that a + * debugger will see the already changed PSW. + */ + switch (retval) { + case -ERESTART_RESTARTBLOCK: + restart -= 2; + case -ERESTARTNOHAND: + case -ERESTARTSYS: + case -ERESTARTNOINTR: + restart++; + regs->ARM_r0 = regs->ARM_ORIG_r0; + regs->ARM_pc = restart_addr; + break; } - single_step_set(current); - return; } - no_signal: /* - * No signal to deliver to the process - restart the syscall. + * Get the signal to deliver. When running under ptrace, at this + * point the debugger may change all our registers ... */ - if (syscall) { - if (regs->ARM_r0 == -ERESTART_RESTARTBLOCK) { - if (thumb_mode(regs)) { - regs->ARM_r7 = __NR_restart_syscall - __NR_SYSCALL_BASE; - regs->ARM_pc -= 2; - } else { -#if defined(CONFIG_AEABI) && !defined(CONFIG_OABI_COMPAT) - regs->ARM_r7 = __NR_restart_syscall; - regs->ARM_pc -= 4; -#else - u32 __user *usp; - - regs->ARM_sp -= 4; - usp = (u32 __user *)regs->ARM_sp; - - if (put_user(regs->ARM_pc, usp) == 0) { - regs->ARM_pc = KERN_RESTART_CODE; - } else { - regs->ARM_sp += 4; - force_sigsegv(0, current); - } -#endif + /* + * Depending on the signal settings we may need to revert the + * decision to restart the system call. But skip this if a + * debugger has chosen to restart at a different PC. + */ + if (get_signal(&ksig)) { + /* handler */ + if (unlikely(restart) && regs->ARM_pc == restart_addr) { + if (retval == -ERESTARTNOHAND || + retval == -ERESTART_RESTARTBLOCK + || (retval == -ERESTARTSYS + && !(ksig.ka.sa.sa_flags & SA_RESTART))) { + regs->ARM_r0 = -EINTR; + regs->ARM_pc = continue_addr; } } - if (regs->ARM_r0 == -ERESTARTNOHAND || - regs->ARM_r0 == -ERESTARTSYS || - regs->ARM_r0 == -ERESTARTNOINTR) { - setup_syscall_restart(regs); + handle_signal(&ksig, regs); + } else { + /* no handler */ + restore_saved_sigmask(); + if (unlikely(restart) && regs->ARM_pc == restart_addr) { + regs->ARM_pc = continue_addr; + return restart; } + } + return 0; +} - /* If there's no signal to deliver, we just put the saved sigmask - * back. - */ - if (test_thread_flag(TIF_RESTORE_SIGMASK)) { - clear_thread_flag(TIF_RESTORE_SIGMASK); - sigprocmask(SIG_SETMASK, ¤t->saved_sigmask, NULL); +asmlinkage int +do_work_pending(struct pt_regs *regs, unsigned int thread_flags, int syscall) +{ + do { + if (likely(thread_flags & _TIF_NEED_RESCHED)) { + schedule(); + } else { + if (unlikely(!user_mode(regs))) + return 0; + local_irq_enable(); + if (thread_flags & _TIF_SIGPENDING) { + int restart = do_signal(regs, syscall); + if (unlikely(restart)) { + /* + * Restart without handlers. + * Deal with it without leaving + * the kernel space. + */ + return restart; + } + syscall = 0; + } else if (thread_flags & _TIF_UPROBE) { + clear_thread_flag(TIF_UPROBE); + uprobe_notify_resume(regs); + } else { + clear_thread_flag(TIF_NOTIFY_RESUME); + tracehook_notify_resume(regs); + } } - } - single_step_set(current); + local_irq_disable(); + thread_flags = current_thread_info()->flags; + } while (thread_flags & _TIF_WORK_MASK); + return 0; } -asmlinkage void -do_notify_resume(struct pt_regs *regs, unsigned int thread_flags, int syscall) +struct page *get_signal_page(void) { - if (thread_flags & _TIF_SIGPENDING) - do_signal(regs, syscall); - - if (thread_flags & _TIF_NOTIFY_RESUME) { - clear_thread_flag(TIF_NOTIFY_RESUME); - tracehook_notify_resume(regs); - if (current->replacement_session_keyring) - key_replace_session_keyring(); - } + unsigned long ptr; + unsigned offset; + struct page *page; + void *addr; + + page = alloc_pages(GFP_KERNEL, 0); + + if (!page) + return NULL; + + addr = page_address(page); + + /* Give the signal return code some randomness */ + offset = 0x200 + (get_random_int() & 0x7fc); + signal_return_offset = offset; + + /* + * Copy signal return handlers into the vector page, and + * set sigreturn to be a pointer to these. + */ + memcpy(addr + offset, sigreturn_codes, sizeof(sigreturn_codes)); + + ptr = (unsigned long)addr + offset; + flush_icache_range(ptr, ptr + sizeof(sigreturn_codes)); + + return page; } diff --git a/arch/arm/kernel/signal.h b/arch/arm/kernel/signal.h deleted file mode 100644 index 6fcfe8398aa..00000000000 --- a/arch/arm/kernel/signal.h +++ /dev/null @@ -1,14 +0,0 @@ -/* - * linux/arch/arm/kernel/signal.h - * - * Copyright (C) 2005-2009 Russell King. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#define KERN_SIGRETURN_CODE (CONFIG_VECTORS_BASE + 0x00000500) -#define KERN_RESTART_CODE (KERN_SIGRETURN_CODE + sizeof(sigreturn_codes)) - -extern const unsigned long sigreturn_codes[7]; -extern const unsigned long syscall_restart_code[2]; diff --git a/arch/arm/kernel/sigreturn_codes.S b/arch/arm/kernel/sigreturn_codes.S new file mode 100644 index 00000000000..b84d0cb1368 --- /dev/null +++ b/arch/arm/kernel/sigreturn_codes.S @@ -0,0 +1,102 @@ +/* + * sigreturn_codes.S - code sinpets for sigreturn syscalls + * + * Created by: Victor Kamensky, 2013-08-13 + * Copyright: (C) 2013 Linaro Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <asm/unistd.h> + +/* + * For ARM syscalls, we encode the syscall number into the instruction. + * With EABI, the syscall number has to be loaded into r7. As result + * ARM syscall sequence snippet will have move and svc in .arm encoding + * + * For Thumb syscalls, we pass the syscall number via r7. We therefore + * need two 16-bit instructions in .thumb encoding + * + * Please note sigreturn_codes code are not executed in place. Instead + * they just copied by kernel into appropriate places. Code inside of + * arch/arm/kernel/signal.c is very sensitive to layout of these code + * snippets. + */ + +/* + * In CPU_THUMBONLY case kernel arm opcodes are not allowed. + * Note in this case codes skips those instructions but it uses .org + * directive to keep correct layout of sigreturn_codes array. + */ +#ifndef CONFIG_CPU_THUMBONLY +#define ARM_OK(code...) code +#else +#define ARM_OK(code...) +#endif + + .macro arm_slot n + .org sigreturn_codes + 12 * (\n) +ARM_OK( .arm ) + .endm + + .macro thumb_slot n + .org sigreturn_codes + 12 * (\n) + 8 + .thumb + .endm + +#if __LINUX_ARM_ARCH__ <= 4 + /* + * Note we manually set minimally required arch that supports + * required thumb opcodes for early arch versions. It is OK + * for this file to be used in combination with other + * lower arch variants, since these code snippets are only + * used as input data. + */ + .arch armv4t +#endif + + .section .rodata + .global sigreturn_codes + .type sigreturn_codes, #object + + .align + +sigreturn_codes: + + /* ARM sigreturn syscall code snippet */ + arm_slot 0 +ARM_OK( mov r7, #(__NR_sigreturn - __NR_SYSCALL_BASE) ) +ARM_OK( swi #(__NR_sigreturn)|(__NR_OABI_SYSCALL_BASE) ) + + /* Thumb sigreturn syscall code snippet */ + thumb_slot 0 + movs r7, #(__NR_sigreturn - __NR_SYSCALL_BASE) + swi #0 + + /* ARM sigreturn_rt syscall code snippet */ + arm_slot 1 +ARM_OK( mov r7, #(__NR_rt_sigreturn - __NR_SYSCALL_BASE) ) +ARM_OK( swi #(__NR_rt_sigreturn)|(__NR_OABI_SYSCALL_BASE) ) + + /* Thumb sigreturn_rt syscall code snippet */ + thumb_slot 1 + movs r7, #(__NR_rt_sigreturn - __NR_SYSCALL_BASE) + swi #0 + + /* + * Note on addtional space: setup_return in signal.c + * algorithm uses two words copy regardless whether + * it is thumb case or not, so we need additional + * word after real last entry. + */ + arm_slot 2 + .space 4 + + .size sigreturn_codes, . - sigreturn_codes diff --git a/arch/arm/kernel/sleep.S b/arch/arm/kernel/sleep.S new file mode 100644 index 00000000000..1b880db2a03 --- /dev/null +++ b/arch/arm/kernel/sleep.S @@ -0,0 +1,165 @@ +#include <linux/linkage.h> +#include <linux/threads.h> +#include <asm/asm-offsets.h> +#include <asm/assembler.h> +#include <asm/glue-cache.h> +#include <asm/glue-proc.h> + .text + +/* + * Implementation of MPIDR hash algorithm through shifting + * and OR'ing. + * + * @dst: register containing hash result + * @rs0: register containing affinity level 0 bit shift + * @rs1: register containing affinity level 1 bit shift + * @rs2: register containing affinity level 2 bit shift + * @mpidr: register containing MPIDR value + * @mask: register containing MPIDR mask + * + * Pseudo C-code: + * + *u32 dst; + * + *compute_mpidr_hash(u32 rs0, u32 rs1, u32 rs2, u32 mpidr, u32 mask) { + * u32 aff0, aff1, aff2; + * u32 mpidr_masked = mpidr & mask; + * aff0 = mpidr_masked & 0xff; + * aff1 = mpidr_masked & 0xff00; + * aff2 = mpidr_masked & 0xff0000; + * dst = (aff0 >> rs0 | aff1 >> rs1 | aff2 >> rs2); + *} + * Input registers: rs0, rs1, rs2, mpidr, mask + * Output register: dst + * Note: input and output registers must be disjoint register sets + (eg: a macro instance with mpidr = r1 and dst = r1 is invalid) + */ + .macro compute_mpidr_hash dst, rs0, rs1, rs2, mpidr, mask + and \mpidr, \mpidr, \mask @ mask out MPIDR bits + and \dst, \mpidr, #0xff @ mask=aff0 + ARM( mov \dst, \dst, lsr \rs0 ) @ dst=aff0>>rs0 + THUMB( lsr \dst, \dst, \rs0 ) + and \mask, \mpidr, #0xff00 @ mask = aff1 + ARM( orr \dst, \dst, \mask, lsr \rs1 ) @ dst|=(aff1>>rs1) + THUMB( lsr \mask, \mask, \rs1 ) + THUMB( orr \dst, \dst, \mask ) + and \mask, \mpidr, #0xff0000 @ mask = aff2 + ARM( orr \dst, \dst, \mask, lsr \rs2 ) @ dst|=(aff2>>rs2) + THUMB( lsr \mask, \mask, \rs2 ) + THUMB( orr \dst, \dst, \mask ) + .endm + +/* + * Save CPU state for a suspend. This saves the CPU general purpose + * registers, and allocates space on the kernel stack to save the CPU + * specific registers and some other data for resume. + * r0 = suspend function arg0 + * r1 = suspend function + * r2 = MPIDR value the resuming CPU will use + */ +ENTRY(__cpu_suspend) + stmfd sp!, {r4 - r11, lr} +#ifdef MULTI_CPU + ldr r10, =processor + ldr r4, [r10, #CPU_SLEEP_SIZE] @ size of CPU sleep state +#else + ldr r4, =cpu_suspend_size +#endif + mov r5, sp @ current virtual SP + add r4, r4, #12 @ Space for pgd, virt sp, phys resume fn + sub sp, sp, r4 @ allocate CPU state on stack + ldr r3, =sleep_save_sp + stmfd sp!, {r0, r1} @ save suspend func arg and pointer + ldr r3, [r3, #SLEEP_SAVE_SP_VIRT] + ALT_SMP(ldr r0, =mpidr_hash) + ALT_UP_B(1f) + /* This ldmia relies on the memory layout of the mpidr_hash struct */ + ldmia r0, {r1, r6-r8} @ r1 = mpidr mask (r6,r7,r8) = l[0,1,2] shifts + compute_mpidr_hash r0, r6, r7, r8, r2, r1 + add r3, r3, r0, lsl #2 +1: mov r2, r5 @ virtual SP + mov r1, r4 @ size of save block + add r0, sp, #8 @ pointer to save block + bl __cpu_suspend_save + adr lr, BSYM(cpu_suspend_abort) + ldmfd sp!, {r0, pc} @ call suspend fn +ENDPROC(__cpu_suspend) + .ltorg + +cpu_suspend_abort: + ldmia sp!, {r1 - r3} @ pop phys pgd, virt SP, phys resume fn + teq r0, #0 + moveq r0, #1 @ force non-zero value + mov sp, r2 + ldmfd sp!, {r4 - r11, pc} +ENDPROC(cpu_suspend_abort) + +/* + * r0 = control register value + */ + .align 5 + .pushsection .idmap.text,"ax" +ENTRY(cpu_resume_mmu) + ldr r3, =cpu_resume_after_mmu + instr_sync + mcr p15, 0, r0, c1, c0, 0 @ turn on MMU, I-cache, etc + mrc p15, 0, r0, c0, c0, 0 @ read id reg + instr_sync + mov r0, r0 + mov r0, r0 + mov pc, r3 @ jump to virtual address +ENDPROC(cpu_resume_mmu) + .popsection +cpu_resume_after_mmu: + bl cpu_init @ restore the und/abt/irq banked regs + mov r0, #0 @ return zero on success + ldmfd sp!, {r4 - r11, pc} +ENDPROC(cpu_resume_after_mmu) + +/* + * Note: Yes, part of the following code is located into the .data section. + * This is to allow sleep_save_sp to be accessed with a relative load + * while we can't rely on any MMU translation. We could have put + * sleep_save_sp in the .text section as well, but some setups might + * insist on it to be truly read-only. + */ + .data + .align +ENTRY(cpu_resume) +ARM_BE8(setend be) @ ensure we are in BE mode +#ifdef CONFIG_ARM_VIRT_EXT + bl __hyp_stub_install_secondary +#endif + safe_svcmode_maskall r1 + mov r1, #0 + ALT_SMP(mrc p15, 0, r0, c0, c0, 5) + ALT_UP_B(1f) + adr r2, mpidr_hash_ptr + ldr r3, [r2] + add r2, r2, r3 @ r2 = struct mpidr_hash phys address + /* + * This ldmia relies on the memory layout of the mpidr_hash + * struct mpidr_hash. + */ + ldmia r2, { r3-r6 } @ r3 = mpidr mask (r4,r5,r6) = l[0,1,2] shifts + compute_mpidr_hash r1, r4, r5, r6, r0, r3 +1: + adr r0, _sleep_save_sp + ldr r0, [r0, #SLEEP_SAVE_SP_PHYS] + ldr r0, [r0, r1, lsl #2] + + @ load phys pgd, stack, resume fn + ARM( ldmia r0!, {r1, sp, pc} ) +THUMB( ldmia r0!, {r1, r2, r3} ) +THUMB( mov sp, r2 ) +THUMB( bx r3 ) +ENDPROC(cpu_resume) + + .align 2 +mpidr_hash_ptr: + .long mpidr_hash - . @ mpidr_hash struct offset + + .type sleep_save_sp, #object +ENTRY(sleep_save_sp) +_sleep_save_sp: + .space SLEEP_SAVE_SP_SZ @ struct sleep_save_sp diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 8c195959025..7c4fada440f 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -19,16 +19,22 @@ #include <linux/mm.h> #include <linux/err.h> #include <linux/cpu.h> -#include <linux/smp.h> #include <linux/seq_file.h> #include <linux/irq.h> #include <linux/percpu.h> #include <linux/clockchips.h> +#include <linux/completion.h> +#include <linux/cpufreq.h> +#include <linux/irq_work.h> -#include <asm/atomic.h> +#include <linux/atomic.h> +#include <asm/smp.h> #include <asm/cacheflush.h> #include <asm/cpu.h> #include <asm/cputype.h> +#include <asm/exception.h> +#include <asm/idmap.h> +#include <asm/topology.h> #include <asm/mmu_context.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> @@ -36,8 +42,10 @@ #include <asm/sections.h> #include <asm/tlbflush.h> #include <asm/ptrace.h> -#include <asm/localtimer.h> #include <asm/smp_plat.h> +#include <asm/virt.h> +#include <asm/mach/arch.h> +#include <asm/mpu.h> /* * as from 2.5, kernels no longer have an init_tasks structure @@ -47,172 +55,133 @@ struct secondary_data secondary_data; /* - * structures for inter-processor calls - * - A collection of single bit ipi messages. + * control for which core is the next to come out of the secondary + * boot "holding pen" */ -struct ipi_data { - spinlock_t lock; - unsigned long ipi_count; - unsigned long bits; -}; - -static DEFINE_PER_CPU(struct ipi_data, ipi_data) = { - .lock = SPIN_LOCK_UNLOCKED, -}; +volatile int pen_release = -1; enum ipi_msg_type { + IPI_WAKEUP, IPI_TIMER, IPI_RESCHEDULE, IPI_CALL_FUNC, IPI_CALL_FUNC_SINGLE, IPI_CPU_STOP, + IPI_IRQ_WORK, + IPI_COMPLETION, }; -static inline void identity_mapping_add(pgd_t *pgd, unsigned long start, - unsigned long end) -{ - unsigned long addr, prot; - pmd_t *pmd; - - prot = PMD_TYPE_SECT | PMD_SECT_AP_WRITE; - if (cpu_architecture() <= CPU_ARCH_ARMv5TEJ && !cpu_is_xscale()) - prot |= PMD_BIT4; +static DECLARE_COMPLETION(cpu_running); - for (addr = start & PGDIR_MASK; addr < end;) { - pmd = pmd_offset(pgd + pgd_index(addr), addr); - pmd[0] = __pmd(addr | prot); - addr += SECTION_SIZE; - pmd[1] = __pmd(addr | prot); - addr += SECTION_SIZE; - flush_pmd_entry(pmd); - outer_clean_range(__pa(pmd), __pa(pmd + 1)); - } -} +static struct smp_operations smp_ops; -static inline void identity_mapping_del(pgd_t *pgd, unsigned long start, - unsigned long end) +void __init smp_set_ops(struct smp_operations *ops) { - unsigned long addr; - pmd_t *pmd; + if (ops) + smp_ops = *ops; +}; - for (addr = start & PGDIR_MASK; addr < end; addr += PGDIR_SIZE) { - pmd = pmd_offset(pgd + pgd_index(addr), addr); - pmd[0] = __pmd(0); - pmd[1] = __pmd(0); - clean_pmd_entry(pmd); - outer_clean_range(__pa(pmd), __pa(pmd + 1)); - } +static unsigned long get_arch_pgd(pgd_t *pgd) +{ + phys_addr_t pgdir = virt_to_idmap(pgd); + BUG_ON(pgdir & ARCH_PGD_MASK); + return pgdir >> ARCH_PGD_SHIFT; } -int __cpuinit __cpu_up(unsigned int cpu) +int __cpu_up(unsigned int cpu, struct task_struct *idle) { - struct cpuinfo_arm *ci = &per_cpu(cpu_data, cpu); - struct task_struct *idle = ci->idle; - pgd_t *pgd; int ret; /* - * Spawn a new process manually, if not already done. - * Grab a pointer to its task struct so we can mess with it - */ - if (!idle) { - idle = fork_idle(cpu); - if (IS_ERR(idle)) { - printk(KERN_ERR "CPU%u: fork() failed\n", cpu); - return PTR_ERR(idle); - } - ci->idle = idle; - } else { - /* - * Since this idle thread is being re-used, call - * init_idle() to reinitialize the thread structure. - */ - init_idle(idle, cpu); - } - - /* - * Allocate initial page tables to allow the new CPU to - * enable the MMU safely. This essentially means a set - * of our "standard" page tables, with the addition of - * a 1:1 mapping for the physical address of the kernel. - */ - pgd = pgd_alloc(&init_mm); - if (!pgd) - return -ENOMEM; - - if (PHYS_OFFSET != PAGE_OFFSET) { -#ifndef CONFIG_HOTPLUG_CPU - identity_mapping_add(pgd, __pa(__init_begin), __pa(__init_end)); -#endif - identity_mapping_add(pgd, __pa(_stext), __pa(_etext)); - identity_mapping_add(pgd, __pa(_sdata), __pa(_edata)); - } - - /* * We need to tell the secondary core where to find * its stack and the page tables. */ secondary_data.stack = task_stack_page(idle) + THREAD_START_SP; - secondary_data.pgdir = virt_to_phys(pgd); - __cpuc_flush_dcache_area(&secondary_data, sizeof(secondary_data)); - outer_clean_range(__pa(&secondary_data), __pa(&secondary_data + 1)); +#ifdef CONFIG_ARM_MPU + secondary_data.mpu_rgn_szr = mpu_rgn_info.rgns[MPU_RAM_REGION].drsr; +#endif + +#ifdef CONFIG_MMU + secondary_data.pgdir = get_arch_pgd(idmap_pgd); + secondary_data.swapper_pg_dir = get_arch_pgd(swapper_pg_dir); +#endif + sync_cache_w(&secondary_data); /* * Now bring the CPU into our world. */ ret = boot_secondary(cpu, idle); if (ret == 0) { - unsigned long timeout; - /* * CPU was successfully started, wait for it * to come online or time out. */ - timeout = jiffies + HZ; - while (time_before(jiffies, timeout)) { - if (cpu_online(cpu)) - break; - - udelay(10); - barrier(); - } + wait_for_completion_timeout(&cpu_running, + msecs_to_jiffies(1000)); - if (!cpu_online(cpu)) + if (!cpu_online(cpu)) { + pr_crit("CPU%u: failed to come online\n", cpu); ret = -EIO; + } + } else { + pr_err("CPU%u: failed to boot: %d\n", cpu, ret); } - secondary_data.stack = NULL; - secondary_data.pgdir = 0; - if (PHYS_OFFSET != PAGE_OFFSET) { -#ifndef CONFIG_HOTPLUG_CPU - identity_mapping_del(pgd, __pa(__init_begin), __pa(__init_end)); -#endif - identity_mapping_del(pgd, __pa(_stext), __pa(_etext)); - identity_mapping_del(pgd, __pa(_sdata), __pa(_edata)); - } + memset(&secondary_data, 0, sizeof(secondary_data)); + return ret; +} - pgd_free(&init_mm, pgd); +/* platform specific SMP operations */ +void __init smp_init_cpus(void) +{ + if (smp_ops.smp_init_cpus) + smp_ops.smp_init_cpus(); +} - if (ret) { - printk(KERN_CRIT "CPU%u: processor failed to boot\n", cpu); +int boot_secondary(unsigned int cpu, struct task_struct *idle) +{ + if (smp_ops.smp_boot_secondary) + return smp_ops.smp_boot_secondary(cpu, idle); + return -ENOSYS; +} - /* - * FIXME: We need to clean up the new idle thread. --rmk - */ - } +int platform_can_cpu_hotplug(void) +{ +#ifdef CONFIG_HOTPLUG_CPU + if (smp_ops.cpu_kill) + return 1; +#endif - return ret; + return 0; } #ifdef CONFIG_HOTPLUG_CPU +static int platform_cpu_kill(unsigned int cpu) +{ + if (smp_ops.cpu_kill) + return smp_ops.cpu_kill(cpu); + return 1; +} + +static int platform_cpu_disable(unsigned int cpu) +{ + if (smp_ops.cpu_disable) + return smp_ops.cpu_disable(cpu); + + /* + * By default, allow disabling all CPUs except the first one, + * since this is special on a lot of platforms, e.g. because + * of clock tick interrupts. + */ + return cpu == 0 ? -EPERM : 0; +} /* * __cpu_disable runs on the processor to be shutdown. */ int __cpu_disable(void) { unsigned int cpu = smp_processor_id(); - struct task_struct *p; int ret; ret = platform_cpu_disable(cpu); @@ -231,33 +200,41 @@ int __cpu_disable(void) migrate_irqs(); /* - * Stop the local timer for this CPU. - */ - local_timer_stop(); - - /* * Flush user cache and TLB mappings, and then remove this CPU * from the vm mask set of all processes. + * + * Caches are flushed to the Level of Unification Inner Shareable + * to write-back dirty lines to unified caches shared by all CPUs. */ - flush_cache_all(); + flush_cache_louis(); local_flush_tlb_all(); - read_lock(&tasklist_lock); - for_each_process(p) { - if (p->mm) - cpumask_clear_cpu(cpu, mm_cpumask(p->mm)); - } - read_unlock(&tasklist_lock); + clear_tasks_mm_cpumask(cpu); return 0; } +static DECLARE_COMPLETION(cpu_died); + /* * called on the thread which is asking for a CPU to be shutdown - * waits until shutdown has completed, or it is timed out. */ void __cpu_die(unsigned int cpu) { + if (!wait_for_completion_timeout(&cpu_died, msecs_to_jiffies(5000))) { + pr_err("CPU%u: cpu didn't die\n", cpu); + return; + } + printk(KERN_NOTICE "CPU%u: shutdown\n", cpu); + + /* + * platform_cpu_kill() is generally expected to do the powering off + * and/or cutting of clocks to the dying CPU. Optionally, this may + * be done by the CPU which is dying in preference to supporting + * this call, but that means there is _no_ synchronisation between + * the requesting CPU and the dying CPU actually losing power. + */ if (!platform_cpu_kill(cpu)) printk("CPU%u: unable to kill\n", cpu); } @@ -274,14 +251,50 @@ void __ref cpu_die(void) { unsigned int cpu = smp_processor_id(); - local_irq_disable(); idle_task_exit(); + local_irq_disable(); + + /* + * Flush the data out of the L1 cache for this CPU. This must be + * before the completion to ensure that data is safely written out + * before platform_cpu_kill() gets called - which may disable + * *this* CPU and power down its cache. + */ + flush_cache_louis(); + + /* + * Tell __cpu_die() that this CPU is now safe to dispose of. Once + * this returns, power and/or clocks can be removed at any point + * from this CPU and its cache by platform_cpu_kill(). + */ + complete(&cpu_died); + + /* + * Ensure that the cache lines associated with that completion are + * written out. This covers the case where _this_ CPU is doing the + * powering down, to ensure that the completion is visible to the + * CPU waiting for this one. + */ + flush_cache_louis(); + /* - * actual CPU shutdown procedure is at least platform (if not - * CPU) specific + * The actual CPU shutdown procedure is at least platform (if not + * CPU) specific. This may remove power, or it may simply spin. + * + * Platforms are generally expected *NOT* to return from this call, + * although there are some which do because they have no way to + * power down the CPU. These platforms are the _only_ reason we + * have a return path which uses the fragment of assembly below. + * + * The return path should not be used for platforms which can + * power off the CPU. */ - platform_cpu_die(cpu); + if (smp_ops.cpu_die) + smp_ops.cpu_die(cpu); + + pr_warn("CPU%u: smp_ops.cpu_die() returned, trying to resuscitate\n", + cpu); /* * Do not return to the idle loop - jump back to the secondary @@ -289,6 +302,7 @@ void __ref cpu_die(void) * to be repeated to undo the effects of taking the CPU offline. */ __asm__("mov sp, %0\n" + " mov fp, #0\n" " b secondary_start_kernel" : : "r" (task_stack_page(current) + THREAD_SIZE - 8)); @@ -296,222 +310,203 @@ void __ref cpu_die(void) #endif /* CONFIG_HOTPLUG_CPU */ /* + * Called by both boot and secondaries to move global data into + * per-processor storage. + */ +static void smp_store_cpu_info(unsigned int cpuid) +{ + struct cpuinfo_arm *cpu_info = &per_cpu(cpu_data, cpuid); + + cpu_info->loops_per_jiffy = loops_per_jiffy; + cpu_info->cpuid = read_cpuid_id(); + + store_cpu_topology(cpuid); +} + +/* * This is the secondary CPU boot entry. We're using this CPUs * idle thread stack, but a set of temporary page tables. */ -asmlinkage void __cpuinit secondary_start_kernel(void) +asmlinkage void secondary_start_kernel(void) { struct mm_struct *mm = &init_mm; - unsigned int cpu = smp_processor_id(); + unsigned int cpu; - printk("CPU%u: Booted secondary processor\n", cpu); + /* + * The identity mapping is uncached (strongly ordered), so + * switch away from it before attempting any exclusive accesses. + */ + cpu_switch_mm(mm->pgd, mm); + local_flush_bp_all(); + enter_lazy_tlb(mm, current); + local_flush_tlb_all(); /* * All kernel threads share the same mm context; grab a * reference and switch to it. */ - atomic_inc(&mm->mm_users); + cpu = smp_processor_id(); atomic_inc(&mm->mm_count); current->active_mm = mm; cpumask_set_cpu(cpu, mm_cpumask(mm)); - cpu_switch_mm(mm->pgd, mm); - enter_lazy_tlb(mm, current); - local_flush_tlb_all(); cpu_init(); + + printk("CPU%u: Booted secondary processor\n", cpu); + preempt_disable(); + trace_hardirqs_off(); /* * Give the platform a chance to do its own initialisation. */ - platform_secondary_init(cpu); + if (smp_ops.smp_secondary_init) + smp_ops.smp_secondary_init(cpu); - /* - * Enable local interrupts. - */ notify_cpu_starting(cpu); - local_irq_enable(); - local_fiq_enable(); - - /* - * Setup the percpu timer for this CPU. - */ - percpu_timer_setup(); calibrate_delay(); smp_store_cpu_info(cpu); /* - * OK, now it's safe to let the boot CPU continue + * OK, now it's safe to let the boot CPU continue. Wait for + * the CPU migration code to notice that the CPU is online + * before we continue - which happens after __cpu_up returns. */ set_cpu_online(cpu, true); + complete(&cpu_running); + + local_irq_enable(); + local_fiq_enable(); /* * OK, it's off to the idle thread for us */ - cpu_idle(); -} - -/* - * Called by both boot and secondaries to move global data into - * per-processor storage. - */ -void __cpuinit smp_store_cpu_info(unsigned int cpuid) -{ - struct cpuinfo_arm *cpu_info = &per_cpu(cpu_data, cpuid); - - cpu_info->loops_per_jiffy = loops_per_jiffy; + cpu_startup_entry(CPUHP_ONLINE); } void __init smp_cpus_done(unsigned int max_cpus) { - int cpu; - unsigned long bogosum = 0; - - for_each_online_cpu(cpu) - bogosum += per_cpu(cpu_data, cpu).loops_per_jiffy; + printk(KERN_INFO "SMP: Total of %d processors activated.\n", + num_online_cpus()); - printk(KERN_INFO "SMP: Total of %d processors activated " - "(%lu.%02lu BogoMIPS).\n", - num_online_cpus(), - bogosum / (500000/HZ), - (bogosum / (5000/HZ)) % 100); + hyp_mode_check(); } void __init smp_prepare_boot_cpu(void) { - unsigned int cpu = smp_processor_id(); - - per_cpu(cpu_data, cpu).idle = current; + set_my_cpu_offset(per_cpu_offset(smp_processor_id())); } -static void send_ipi_message(const struct cpumask *mask, enum ipi_msg_type msg) +void __init smp_prepare_cpus(unsigned int max_cpus) { - unsigned long flags; - unsigned int cpu; + unsigned int ncores = num_possible_cpus(); - local_irq_save(flags); + init_cpu_topology(); - for_each_cpu(cpu, mask) { - struct ipi_data *ipi = &per_cpu(ipi_data, cpu); - - spin_lock(&ipi->lock); - ipi->bits |= 1 << msg; - spin_unlock(&ipi->lock); - } + smp_store_cpu_info(smp_processor_id()); /* - * Call the platform specific cross-CPU call function. + * are we trying to boot more cores than exist? */ - smp_cross_call(mask); + if (max_cpus > ncores) + max_cpus = ncores; + if (ncores > 1 && max_cpus) { + /* + * Initialise the present map, which describes the set of CPUs + * actually populated at the present time. A platform should + * re-initialize the map in the platforms smp_prepare_cpus() + * if present != possible (e.g. physical hotplug). + */ + init_cpu_present(cpu_possible_mask); - local_irq_restore(flags); + /* + * Initialise the SCU if there are more than one CPU + * and let them know where to start. + */ + if (smp_ops.smp_prepare_cpus) + smp_ops.smp_prepare_cpus(max_cpus); + } } -void arch_send_call_function_ipi_mask(const struct cpumask *mask) +static void (*smp_cross_call)(const struct cpumask *, unsigned int); + +void __init set_smp_cross_call(void (*fn)(const struct cpumask *, unsigned int)) { - send_ipi_message(mask, IPI_CALL_FUNC); + if (!smp_cross_call) + smp_cross_call = fn; } -void arch_send_call_function_single_ipi(int cpu) +void arch_send_call_function_ipi_mask(const struct cpumask *mask) { - send_ipi_message(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE); + smp_cross_call(mask, IPI_CALL_FUNC); } -void show_ipi_list(struct seq_file *p) +void arch_send_wakeup_ipi_mask(const struct cpumask *mask) { - unsigned int cpu; - - seq_puts(p, "IPI:"); - - for_each_present_cpu(cpu) - seq_printf(p, " %10lu", per_cpu(ipi_data, cpu).ipi_count); - - seq_putc(p, '\n'); + smp_cross_call(mask, IPI_WAKEUP); } -void show_local_irqs(struct seq_file *p) +void arch_send_call_function_single_ipi(int cpu) { - unsigned int cpu; - - seq_printf(p, "LOC: "); - - for_each_present_cpu(cpu) - seq_printf(p, "%10u ", irq_stat[cpu].local_timer_irqs); - - seq_putc(p, '\n'); + smp_cross_call(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE); } -/* - * Timer (local or broadcast) support - */ -static DEFINE_PER_CPU(struct clock_event_device, percpu_clockevent); - -static void ipi_timer(void) +#ifdef CONFIG_IRQ_WORK +void arch_irq_work_raise(void) { - struct clock_event_device *evt = &__get_cpu_var(percpu_clockevent); - irq_enter(); - evt->event_handler(evt); - irq_exit(); + if (is_smp()) + smp_cross_call(cpumask_of(smp_processor_id()), IPI_IRQ_WORK); } +#endif + +static const char *ipi_types[NR_IPI] = { +#define S(x,s) [x] = s + S(IPI_WAKEUP, "CPU wakeup interrupts"), + S(IPI_TIMER, "Timer broadcast interrupts"), + S(IPI_RESCHEDULE, "Rescheduling interrupts"), + S(IPI_CALL_FUNC, "Function call interrupts"), + S(IPI_CALL_FUNC_SINGLE, "Single function call interrupts"), + S(IPI_CPU_STOP, "CPU stop interrupts"), + S(IPI_IRQ_WORK, "IRQ work interrupts"), + S(IPI_COMPLETION, "completion interrupts"), +}; -#ifdef CONFIG_LOCAL_TIMERS -asmlinkage void __exception do_local_timer(struct pt_regs *regs) +void show_ipi_list(struct seq_file *p, int prec) { - struct pt_regs *old_regs = set_irq_regs(regs); - int cpu = smp_processor_id(); + unsigned int cpu, i; - if (local_timer_ack()) { - irq_stat[cpu].local_timer_irqs++; - ipi_timer(); - } + for (i = 0; i < NR_IPI; i++) { + seq_printf(p, "%*s%u: ", prec - 1, "IPI", i); - set_irq_regs(old_regs); -} -#endif + for_each_online_cpu(cpu) + seq_printf(p, "%10u ", + __get_irq_stat(cpu, ipi_irqs[i])); -#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST -static void smp_timer_broadcast(const struct cpumask *mask) -{ - send_ipi_message(mask, IPI_TIMER); + seq_printf(p, " %s\n", ipi_types[i]); + } } -#else -#define smp_timer_broadcast NULL -#endif -#ifndef CONFIG_LOCAL_TIMERS -static void broadcast_timer_set_mode(enum clock_event_mode mode, - struct clock_event_device *evt) +u64 smp_irq_stat_cpu(unsigned int cpu) { -} + u64 sum = 0; + int i; -static void local_timer_setup(struct clock_event_device *evt) -{ - evt->name = "dummy_timer"; - evt->features = CLOCK_EVT_FEAT_ONESHOT | - CLOCK_EVT_FEAT_PERIODIC | - CLOCK_EVT_FEAT_DUMMY; - evt->rating = 400; - evt->mult = 1; - evt->set_mode = broadcast_timer_set_mode; + for (i = 0; i < NR_IPI; i++) + sum += __get_irq_stat(cpu, ipi_irqs[i]); - clockevents_register_device(evt); + return sum; } -#endif -void __cpuinit percpu_timer_setup(void) +#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST +void tick_broadcast(const struct cpumask *mask) { - unsigned int cpu = smp_processor_id(); - struct clock_event_device *evt = &per_cpu(percpu_clockevent, cpu); - - evt->cpumask = cpumask_of(cpu); - evt->broadcast = smp_timer_broadcast; - - local_timer_setup(evt); + smp_cross_call(mask, IPI_TIMER); } +#endif -static DEFINE_SPINLOCK(stop_lock); +static DEFINE_RAW_SPINLOCK(stop_lock); /* * ipi_cpu_stop - handle IPI from smp_send_stop() @@ -520,10 +515,10 @@ static void ipi_cpu_stop(unsigned int cpu) { if (system_state == SYSTEM_BOOTING || system_state == SYSTEM_RUNNING) { - spin_lock(&stop_lock); + raw_spin_lock(&stop_lock); printk(KERN_CRIT "CPU%u: stopping\n", cpu); dump_stack(); - spin_unlock(&stop_lock); + raw_spin_unlock(&stop_lock); } set_cpu_online(cpu, false); @@ -535,218 +530,171 @@ static void ipi_cpu_stop(unsigned int cpu) cpu_relax(); } +static DEFINE_PER_CPU(struct completion *, cpu_completion); + +int register_ipi_completion(struct completion *completion, int cpu) +{ + per_cpu(cpu_completion, cpu) = completion; + return IPI_COMPLETION; +} + +static void ipi_complete(unsigned int cpu) +{ + complete(per_cpu(cpu_completion, cpu)); +} + /* * Main handler for inter-processor interrupts - * - * For ARM, the ipimask now only identifies a single - * category of IPI (Bit 1 IPIs have been replaced by a - * different mechanism): - * - * Bit 0 - Inter-processor function call */ -asmlinkage void __exception do_IPI(struct pt_regs *regs) +asmlinkage void __exception_irq_entry do_IPI(int ipinr, struct pt_regs *regs) +{ + handle_IPI(ipinr, regs); +} + +void handle_IPI(int ipinr, struct pt_regs *regs) { unsigned int cpu = smp_processor_id(); - struct ipi_data *ipi = &per_cpu(ipi_data, cpu); struct pt_regs *old_regs = set_irq_regs(regs); - ipi->ipi_count++; - - for (;;) { - unsigned long msgs; - - spin_lock(&ipi->lock); - msgs = ipi->bits; - ipi->bits = 0; - spin_unlock(&ipi->lock); - - if (!msgs) - break; - - do { - unsigned nextmsg; - - nextmsg = msgs & -msgs; - msgs &= ~nextmsg; - nextmsg = ffz(~nextmsg); - - switch (nextmsg) { - case IPI_TIMER: - ipi_timer(); - break; + if (ipinr < NR_IPI) + __inc_irq_stat(cpu, ipi_irqs[ipinr]); - case IPI_RESCHEDULE: - /* - * nothing more to do - eveything is - * done on the interrupt return path - */ - break; + switch (ipinr) { + case IPI_WAKEUP: + break; - case IPI_CALL_FUNC: - generic_smp_call_function_interrupt(); - break; +#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST + case IPI_TIMER: + irq_enter(); + tick_receive_broadcast(); + irq_exit(); + break; +#endif - case IPI_CALL_FUNC_SINGLE: - generic_smp_call_function_single_interrupt(); - break; + case IPI_RESCHEDULE: + scheduler_ipi(); + break; + + case IPI_CALL_FUNC: + irq_enter(); + generic_smp_call_function_interrupt(); + irq_exit(); + break; + + case IPI_CALL_FUNC_SINGLE: + irq_enter(); + generic_smp_call_function_single_interrupt(); + irq_exit(); + break; + + case IPI_CPU_STOP: + irq_enter(); + ipi_cpu_stop(cpu); + irq_exit(); + break; + +#ifdef CONFIG_IRQ_WORK + case IPI_IRQ_WORK: + irq_enter(); + irq_work_run(); + irq_exit(); + break; +#endif - case IPI_CPU_STOP: - ipi_cpu_stop(cpu); - break; + case IPI_COMPLETION: + irq_enter(); + ipi_complete(cpu); + irq_exit(); + break; - default: - printk(KERN_CRIT "CPU%u: Unknown IPI message 0x%x\n", - cpu, nextmsg); - break; - } - } while (msgs); + default: + printk(KERN_CRIT "CPU%u: Unknown IPI message 0x%x\n", + cpu, ipinr); + break; } - set_irq_regs(old_regs); } void smp_send_reschedule(int cpu) { - send_ipi_message(cpumask_of(cpu), IPI_RESCHEDULE); + smp_cross_call(cpumask_of(cpu), IPI_RESCHEDULE); } void smp_send_stop(void) { - cpumask_t mask = cpu_online_map; - cpu_clear(smp_processor_id(), mask); - if (!cpus_empty(mask)) - send_ipi_message(&mask, IPI_CPU_STOP); -} - -/* - * not supported here - */ -int setup_profiling_timer(unsigned int multiplier) -{ - return -EINVAL; -} + unsigned long timeout; + struct cpumask mask; -static void -on_each_cpu_mask(void (*func)(void *), void *info, int wait, - const struct cpumask *mask) -{ - preempt_disable(); + cpumask_copy(&mask, cpu_online_mask); + cpumask_clear_cpu(smp_processor_id(), &mask); + if (!cpumask_empty(&mask)) + smp_cross_call(&mask, IPI_CPU_STOP); - smp_call_function_many(mask, func, info, wait); - if (cpumask_test_cpu(smp_processor_id(), mask)) - func(info); + /* Wait up to one second for other CPUs to stop */ + timeout = USEC_PER_SEC; + while (num_online_cpus() > 1 && timeout--) + udelay(1); - preempt_enable(); + if (num_online_cpus() > 1) + pr_warning("SMP: failed to stop secondary CPUs\n"); } -/**********************************************************************/ - /* - * TLB operations + * not supported here */ -struct tlb_args { - struct vm_area_struct *ta_vma; - unsigned long ta_start; - unsigned long ta_end; -}; - -static inline void ipi_flush_tlb_all(void *ignored) -{ - local_flush_tlb_all(); -} - -static inline void ipi_flush_tlb_mm(void *arg) -{ - struct mm_struct *mm = (struct mm_struct *)arg; - - local_flush_tlb_mm(mm); -} - -static inline void ipi_flush_tlb_page(void *arg) -{ - struct tlb_args *ta = (struct tlb_args *)arg; - - local_flush_tlb_page(ta->ta_vma, ta->ta_start); -} - -static inline void ipi_flush_tlb_kernel_page(void *arg) +int setup_profiling_timer(unsigned int multiplier) { - struct tlb_args *ta = (struct tlb_args *)arg; - - local_flush_tlb_kernel_page(ta->ta_start); + return -EINVAL; } -static inline void ipi_flush_tlb_range(void *arg) -{ - struct tlb_args *ta = (struct tlb_args *)arg; +#ifdef CONFIG_CPU_FREQ - local_flush_tlb_range(ta->ta_vma, ta->ta_start, ta->ta_end); -} +static DEFINE_PER_CPU(unsigned long, l_p_j_ref); +static DEFINE_PER_CPU(unsigned long, l_p_j_ref_freq); +static unsigned long global_l_p_j_ref; +static unsigned long global_l_p_j_ref_freq; -static inline void ipi_flush_tlb_kernel_range(void *arg) +static int cpufreq_callback(struct notifier_block *nb, + unsigned long val, void *data) { - struct tlb_args *ta = (struct tlb_args *)arg; - - local_flush_tlb_kernel_range(ta->ta_start, ta->ta_end); -} + struct cpufreq_freqs *freq = data; + int cpu = freq->cpu; -void flush_tlb_all(void) -{ - if (tlb_ops_need_broadcast()) - on_each_cpu(ipi_flush_tlb_all, NULL, 1); - else - local_flush_tlb_all(); -} + if (freq->flags & CPUFREQ_CONST_LOOPS) + return NOTIFY_OK; -void flush_tlb_mm(struct mm_struct *mm) -{ - if (tlb_ops_need_broadcast()) - on_each_cpu_mask(ipi_flush_tlb_mm, mm, 1, mm_cpumask(mm)); - else - local_flush_tlb_mm(mm); -} + if (!per_cpu(l_p_j_ref, cpu)) { + per_cpu(l_p_j_ref, cpu) = + per_cpu(cpu_data, cpu).loops_per_jiffy; + per_cpu(l_p_j_ref_freq, cpu) = freq->old; + if (!global_l_p_j_ref) { + global_l_p_j_ref = loops_per_jiffy; + global_l_p_j_ref_freq = freq->old; + } + } -void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr) -{ - if (tlb_ops_need_broadcast()) { - struct tlb_args ta; - ta.ta_vma = vma; - ta.ta_start = uaddr; - on_each_cpu_mask(ipi_flush_tlb_page, &ta, 1, mm_cpumask(vma->vm_mm)); - } else - local_flush_tlb_page(vma, uaddr); + if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) || + (val == CPUFREQ_POSTCHANGE && freq->old > freq->new)) { + loops_per_jiffy = cpufreq_scale(global_l_p_j_ref, + global_l_p_j_ref_freq, + freq->new); + per_cpu(cpu_data, cpu).loops_per_jiffy = + cpufreq_scale(per_cpu(l_p_j_ref, cpu), + per_cpu(l_p_j_ref_freq, cpu), + freq->new); + } + return NOTIFY_OK; } -void flush_tlb_kernel_page(unsigned long kaddr) -{ - if (tlb_ops_need_broadcast()) { - struct tlb_args ta; - ta.ta_start = kaddr; - on_each_cpu(ipi_flush_tlb_kernel_page, &ta, 1); - } else - local_flush_tlb_kernel_page(kaddr); -} +static struct notifier_block cpufreq_notifier = { + .notifier_call = cpufreq_callback, +}; -void flush_tlb_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end) +static int __init register_cpufreq_notifier(void) { - if (tlb_ops_need_broadcast()) { - struct tlb_args ta; - ta.ta_vma = vma; - ta.ta_start = start; - ta.ta_end = end; - on_each_cpu_mask(ipi_flush_tlb_range, &ta, 1, mm_cpumask(vma->vm_mm)); - } else - local_flush_tlb_range(vma, start, end); + return cpufreq_register_notifier(&cpufreq_notifier, + CPUFREQ_TRANSITION_NOTIFIER); } +core_initcall(register_cpufreq_notifier); -void flush_tlb_kernel_range(unsigned long start, unsigned long end) -{ - if (tlb_ops_need_broadcast()) { - struct tlb_args ta; - ta.ta_start = start; - ta.ta_end = end; - on_each_cpu(ipi_flush_tlb_kernel_range, &ta, 1); - } else - local_flush_tlb_kernel_range(start, end); -} +#endif diff --git a/arch/arm/kernel/smp_scu.c b/arch/arm/kernel/smp_scu.c index 9ab4149bd98..1aafa0d785e 100644 --- a/arch/arm/kernel/smp_scu.c +++ b/arch/arm/kernel/smp_scu.c @@ -11,8 +11,10 @@ #include <linux/init.h> #include <linux/io.h> +#include <asm/smp_plat.h> #include <asm/smp_scu.h> #include <asm/cacheflush.h> +#include <asm/cputype.h> #define SCU_CTRL 0x00 #define SCU_CONFIG 0x04 @@ -20,29 +22,39 @@ #define SCU_INVALIDATE 0x0c #define SCU_FPGA_REVISION 0x10 +#ifdef CONFIG_SMP /* * Get the number of CPU cores from the SCU configuration */ unsigned int __init scu_get_core_count(void __iomem *scu_base) { - unsigned int ncores = __raw_readl(scu_base + SCU_CONFIG); + unsigned int ncores = readl_relaxed(scu_base + SCU_CONFIG); return (ncores & 0x03) + 1; } /* * Enable the SCU */ -void __init scu_enable(void __iomem *scu_base) +void scu_enable(void __iomem *scu_base) { u32 scu_ctrl; - scu_ctrl = __raw_readl(scu_base + SCU_CTRL); +#ifdef CONFIG_ARM_ERRATA_764369 + /* Cortex-A9 only */ + if ((read_cpuid_id() & 0xff0ffff0) == 0x410fc090) { + scu_ctrl = readl_relaxed(scu_base + 0x30); + if (!(scu_ctrl & 1)) + writel_relaxed(scu_ctrl | 0x1, scu_base + 0x30); + } +#endif + + scu_ctrl = readl_relaxed(scu_base + SCU_CTRL); /* already enabled? */ if (scu_ctrl & 1) return; scu_ctrl |= 1; - __raw_writel(scu_ctrl, scu_base + SCU_CTRL); + writel_relaxed(scu_ctrl, scu_base + SCU_CTRL); /* * Ensure that the data accessed by CPU0 before the SCU was @@ -50,3 +62,27 @@ void __init scu_enable(void __iomem *scu_base) */ flush_cache_all(); } +#endif + +/* + * Set the executing CPUs power mode as defined. This will be in + * preparation for it executing a WFI instruction. + * + * This function must be called with preemption disabled, and as it + * has the side effect of disabling coherency, caches must have been + * flushed. Interrupts must also have been disabled. + */ +int scu_power_mode(void __iomem *scu_base, unsigned int mode) +{ + unsigned int val; + int cpu = MPIDR_AFFINITY_LEVEL(cpu_logical_map(smp_processor_id()), 0); + + if (mode > 3 || mode == 1 || cpu > 3) + return -EINVAL; + + val = readb_relaxed(scu_base + SCU_CPU_STATUS + cpu) & ~0x03; + val |= mode; + writeb_relaxed(val, scu_base + SCU_CPU_STATUS + cpu); + + return 0; +} diff --git a/arch/arm/kernel/smp_tlb.c b/arch/arm/kernel/smp_tlb.c new file mode 100644 index 00000000000..95d063620b7 --- /dev/null +++ b/arch/arm/kernel/smp_tlb.c @@ -0,0 +1,209 @@ +/* + * linux/arch/arm/kernel/smp_tlb.c + * + * Copyright (C) 2002 ARM Limited, All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/preempt.h> +#include <linux/smp.h> + +#include <asm/smp_plat.h> +#include <asm/tlbflush.h> +#include <asm/mmu_context.h> + +/**********************************************************************/ + +/* + * TLB operations + */ +struct tlb_args { + struct vm_area_struct *ta_vma; + unsigned long ta_start; + unsigned long ta_end; +}; + +static inline void ipi_flush_tlb_all(void *ignored) +{ + local_flush_tlb_all(); +} + +static inline void ipi_flush_tlb_mm(void *arg) +{ + struct mm_struct *mm = (struct mm_struct *)arg; + + local_flush_tlb_mm(mm); +} + +static inline void ipi_flush_tlb_page(void *arg) +{ + struct tlb_args *ta = (struct tlb_args *)arg; + + local_flush_tlb_page(ta->ta_vma, ta->ta_start); +} + +static inline void ipi_flush_tlb_kernel_page(void *arg) +{ + struct tlb_args *ta = (struct tlb_args *)arg; + + local_flush_tlb_kernel_page(ta->ta_start); +} + +static inline void ipi_flush_tlb_range(void *arg) +{ + struct tlb_args *ta = (struct tlb_args *)arg; + + local_flush_tlb_range(ta->ta_vma, ta->ta_start, ta->ta_end); +} + +static inline void ipi_flush_tlb_kernel_range(void *arg) +{ + struct tlb_args *ta = (struct tlb_args *)arg; + + local_flush_tlb_kernel_range(ta->ta_start, ta->ta_end); +} + +static inline void ipi_flush_bp_all(void *ignored) +{ + local_flush_bp_all(); +} + +#ifdef CONFIG_ARM_ERRATA_798181 +bool (*erratum_a15_798181_handler)(void); + +static bool erratum_a15_798181_partial(void) +{ + asm("mcr p15, 0, %0, c8, c3, 1" : : "r" (0)); + dsb(ish); + return false; +} + +static bool erratum_a15_798181_broadcast(void) +{ + asm("mcr p15, 0, %0, c8, c3, 1" : : "r" (0)); + dsb(ish); + return true; +} + +void erratum_a15_798181_init(void) +{ + unsigned int midr = read_cpuid_id(); + unsigned int revidr = read_cpuid(CPUID_REVIDR); + + /* Cortex-A15 r0p0..r3p2 w/o ECO fix affected */ + if ((midr & 0xff0ffff0) != 0x410fc0f0 || midr > 0x413fc0f2 || + (revidr & 0x210) == 0x210) { + return; + } + if (revidr & 0x10) + erratum_a15_798181_handler = erratum_a15_798181_partial; + else + erratum_a15_798181_handler = erratum_a15_798181_broadcast; +} +#endif + +static void ipi_flush_tlb_a15_erratum(void *arg) +{ + dmb(); +} + +static void broadcast_tlb_a15_erratum(void) +{ + if (!erratum_a15_798181()) + return; + + smp_call_function(ipi_flush_tlb_a15_erratum, NULL, 1); +} + +static void broadcast_tlb_mm_a15_erratum(struct mm_struct *mm) +{ + int this_cpu; + cpumask_t mask = { CPU_BITS_NONE }; + + if (!erratum_a15_798181()) + return; + + this_cpu = get_cpu(); + a15_erratum_get_cpumask(this_cpu, mm, &mask); + smp_call_function_many(&mask, ipi_flush_tlb_a15_erratum, NULL, 1); + put_cpu(); +} + +void flush_tlb_all(void) +{ + if (tlb_ops_need_broadcast()) + on_each_cpu(ipi_flush_tlb_all, NULL, 1); + else + __flush_tlb_all(); + broadcast_tlb_a15_erratum(); +} + +void flush_tlb_mm(struct mm_struct *mm) +{ + if (tlb_ops_need_broadcast()) + on_each_cpu_mask(mm_cpumask(mm), ipi_flush_tlb_mm, mm, 1); + else + __flush_tlb_mm(mm); + broadcast_tlb_mm_a15_erratum(mm); +} + +void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr) +{ + if (tlb_ops_need_broadcast()) { + struct tlb_args ta; + ta.ta_vma = vma; + ta.ta_start = uaddr; + on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_tlb_page, + &ta, 1); + } else + __flush_tlb_page(vma, uaddr); + broadcast_tlb_mm_a15_erratum(vma->vm_mm); +} + +void flush_tlb_kernel_page(unsigned long kaddr) +{ + if (tlb_ops_need_broadcast()) { + struct tlb_args ta; + ta.ta_start = kaddr; + on_each_cpu(ipi_flush_tlb_kernel_page, &ta, 1); + } else + __flush_tlb_kernel_page(kaddr); + broadcast_tlb_a15_erratum(); +} + +void flush_tlb_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end) +{ + if (tlb_ops_need_broadcast()) { + struct tlb_args ta; + ta.ta_vma = vma; + ta.ta_start = start; + ta.ta_end = end; + on_each_cpu_mask(mm_cpumask(vma->vm_mm), ipi_flush_tlb_range, + &ta, 1); + } else + local_flush_tlb_range(vma, start, end); + broadcast_tlb_mm_a15_erratum(vma->vm_mm); +} + +void flush_tlb_kernel_range(unsigned long start, unsigned long end) +{ + if (tlb_ops_need_broadcast()) { + struct tlb_args ta; + ta.ta_start = start; + ta.ta_end = end; + on_each_cpu(ipi_flush_tlb_kernel_range, &ta, 1); + } else + local_flush_tlb_kernel_range(start, end); + broadcast_tlb_a15_erratum(); +} + +void flush_bp_all(void) +{ + if (tlb_ops_need_broadcast()) + on_each_cpu(ipi_flush_bp_all, NULL, 1); + else + __flush_bp_all(); +} diff --git a/arch/arm/kernel/smp_twd.c b/arch/arm/kernel/smp_twd.c index 35882fbf37f..dfc32130bc4 100644 --- a/arch/arm/kernel/smp_twd.c +++ b/arch/arm/kernel/smp_twd.c @@ -10,21 +10,31 @@ */ #include <linux/init.h> #include <linux/kernel.h> +#include <linux/clk.h> +#include <linux/cpu.h> #include <linux/delay.h> #include <linux/device.h> +#include <linux/err.h> #include <linux/smp.h> #include <linux/jiffies.h> #include <linux/clockchips.h> -#include <linux/irq.h> +#include <linux/interrupt.h> #include <linux/io.h> +#include <linux/of_irq.h> +#include <linux/of_address.h> +#include <asm/smp_plat.h> #include <asm/smp_twd.h> -#include <asm/hardware/gic.h> /* set up by the platform code */ -void __iomem *twd_base; +static void __iomem *twd_base; +static struct clk *twd_clk; static unsigned long twd_timer_rate; +static DEFINE_PER_CPU(bool, percpu_setup_called); + +static struct clock_event_device __percpu *twd_evt; +static int twd_ppi; static void twd_set_mode(enum clock_event_mode mode, struct clock_event_device *clk) @@ -33,9 +43,10 @@ static void twd_set_mode(enum clock_event_mode mode, switch (mode) { case CLOCK_EVT_MODE_PERIODIC: - /* timer load already set up */ ctrl = TWD_TIMER_CONTROL_ENABLE | TWD_TIMER_CONTROL_IT_ENABLE | TWD_TIMER_CONTROL_PERIODIC; + writel_relaxed(DIV_ROUND_CLOSEST(twd_timer_rate, HZ), + twd_base + TWD_TIMER_LOAD); break; case CLOCK_EVT_MODE_ONESHOT: /* period set, and timer enabled in 'next_event' hook */ @@ -47,18 +58,18 @@ static void twd_set_mode(enum clock_event_mode mode, ctrl = 0; } - __raw_writel(ctrl, twd_base + TWD_TIMER_CONTROL); + writel_relaxed(ctrl, twd_base + TWD_TIMER_CONTROL); } static int twd_set_next_event(unsigned long evt, struct clock_event_device *unused) { - unsigned long ctrl = __raw_readl(twd_base + TWD_TIMER_CONTROL); + unsigned long ctrl = readl_relaxed(twd_base + TWD_TIMER_CONTROL); ctrl |= TWD_TIMER_CONTROL_ENABLE; - __raw_writel(evt, twd_base + TWD_TIMER_COUNTER); - __raw_writel(ctrl, twd_base + TWD_TIMER_CONTROL); + writel_relaxed(evt, twd_base + TWD_TIMER_COUNTER); + writel_relaxed(ctrl, twd_base + TWD_TIMER_CONTROL); return 0; } @@ -69,19 +80,118 @@ static int twd_set_next_event(unsigned long evt, * If a local timer interrupt has occurred, acknowledge and return 1. * Otherwise, return 0. */ -int twd_timer_ack(void) +static int twd_timer_ack(void) { - if (__raw_readl(twd_base + TWD_TIMER_INTSTAT)) { - __raw_writel(1, twd_base + TWD_TIMER_INTSTAT); + if (readl_relaxed(twd_base + TWD_TIMER_INTSTAT)) { + writel_relaxed(1, twd_base + TWD_TIMER_INTSTAT); return 1; } return 0; } -static void __cpuinit twd_calibrate_rate(void) +static void twd_timer_stop(void) +{ + struct clock_event_device *clk = __this_cpu_ptr(twd_evt); + + twd_set_mode(CLOCK_EVT_MODE_UNUSED, clk); + disable_percpu_irq(clk->irq); +} + +#ifdef CONFIG_COMMON_CLK + +/* + * Updates clockevent frequency when the cpu frequency changes. + * Called on the cpu that is changing frequency with interrupts disabled. + */ +static void twd_update_frequency(void *new_rate) +{ + twd_timer_rate = *((unsigned long *) new_rate); + + clockevents_update_freq(__this_cpu_ptr(twd_evt), twd_timer_rate); +} + +static int twd_rate_change(struct notifier_block *nb, + unsigned long flags, void *data) { - unsigned long load, count; + struct clk_notifier_data *cnd = data; + + /* + * The twd clock events must be reprogrammed to account for the new + * frequency. The timer is local to a cpu, so cross-call to the + * changing cpu. + */ + if (flags == POST_RATE_CHANGE) + on_each_cpu(twd_update_frequency, + (void *)&cnd->new_rate, 1); + + return NOTIFY_OK; +} + +static struct notifier_block twd_clk_nb = { + .notifier_call = twd_rate_change, +}; + +static int twd_clk_init(void) +{ + if (twd_evt && __this_cpu_ptr(twd_evt) && !IS_ERR(twd_clk)) + return clk_notifier_register(twd_clk, &twd_clk_nb); + + return 0; +} +core_initcall(twd_clk_init); + +#elif defined (CONFIG_CPU_FREQ) + +#include <linux/cpufreq.h> + +/* + * Updates clockevent frequency when the cpu frequency changes. + * Called on the cpu that is changing frequency with interrupts disabled. + */ +static void twd_update_frequency(void *data) +{ + twd_timer_rate = clk_get_rate(twd_clk); + + clockevents_update_freq(__this_cpu_ptr(twd_evt), twd_timer_rate); +} + +static int twd_cpufreq_transition(struct notifier_block *nb, + unsigned long state, void *data) +{ + struct cpufreq_freqs *freqs = data; + + /* + * The twd clock events must be reprogrammed to account for the new + * frequency. The timer is local to a cpu, so cross-call to the + * changing cpu. + */ + if (state == CPUFREQ_POSTCHANGE) + smp_call_function_single(freqs->cpu, twd_update_frequency, + NULL, 1); + + return NOTIFY_OK; +} + +static struct notifier_block twd_cpufreq_nb = { + .notifier_call = twd_cpufreq_transition, +}; + +static int twd_cpufreq_init(void) +{ + if (twd_evt && __this_cpu_ptr(twd_evt) && !IS_ERR(twd_clk)) + return cpufreq_register_notifier(&twd_cpufreq_nb, + CPUFREQ_TRANSITION_NOTIFIER); + + return 0; +} +core_initcall(twd_cpufreq_init); + +#endif + +static void twd_calibrate_rate(void) +{ + unsigned long count; u64 waitjiffies; /* @@ -101,62 +211,204 @@ static void __cpuinit twd_calibrate_rate(void) waitjiffies += 5; /* enable, no interrupt or reload */ - __raw_writel(0x1, twd_base + TWD_TIMER_CONTROL); + writel_relaxed(0x1, twd_base + TWD_TIMER_CONTROL); /* maximum value */ - __raw_writel(0xFFFFFFFFU, twd_base + TWD_TIMER_COUNTER); + writel_relaxed(0xFFFFFFFFU, twd_base + TWD_TIMER_COUNTER); while (get_jiffies_64() < waitjiffies) udelay(10); - count = __raw_readl(twd_base + TWD_TIMER_COUNTER); + count = readl_relaxed(twd_base + TWD_TIMER_COUNTER); twd_timer_rate = (0xFFFFFFFFU - count) * (HZ / 5); printk("%lu.%02luMHz.\n", twd_timer_rate / 1000000, - (twd_timer_rate / 100000) % 100); + (twd_timer_rate / 10000) % 100); + } +} + +static irqreturn_t twd_handler(int irq, void *dev_id) +{ + struct clock_event_device *evt = dev_id; + + if (twd_timer_ack()) { + evt->event_handler(evt); + return IRQ_HANDLED; } - load = twd_timer_rate / HZ; + return IRQ_NONE; +} + +static void twd_get_clock(struct device_node *np) +{ + int err; + + if (np) + twd_clk = of_clk_get(np, 0); + else + twd_clk = clk_get_sys("smp_twd", NULL); + + if (IS_ERR(twd_clk)) { + pr_err("smp_twd: clock not found %d\n", (int) PTR_ERR(twd_clk)); + return; + } + + err = clk_prepare_enable(twd_clk); + if (err) { + pr_err("smp_twd: clock failed to prepare+enable: %d\n", err); + clk_put(twd_clk); + return; + } - __raw_writel(load, twd_base + TWD_TIMER_LOAD); + twd_timer_rate = clk_get_rate(twd_clk); } /* * Setup the local clock events for a CPU. */ -void __cpuinit twd_timer_setup(struct clock_event_device *clk) +static void twd_timer_setup(void) { - unsigned long flags; + struct clock_event_device *clk = __this_cpu_ptr(twd_evt); + int cpu = smp_processor_id(); + + /* + * If the basic setup for this CPU has been done before don't + * bother with the below. + */ + if (per_cpu(percpu_setup_called, cpu)) { + writel_relaxed(0, twd_base + TWD_TIMER_CONTROL); + clockevents_register_device(clk); + enable_percpu_irq(clk->irq, 0); + return; + } + per_cpu(percpu_setup_called, cpu) = true; twd_calibrate_rate(); + /* + * The following is done once per CPU the first time .setup() is + * called. + */ + writel_relaxed(0, twd_base + TWD_TIMER_CONTROL); + clk->name = "local_timer"; clk->features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_C3STOP; clk->rating = 350; clk->set_mode = twd_set_mode; clk->set_next_event = twd_set_next_event; - clk->shift = 20; - clk->mult = div_sc(twd_timer_rate, NSEC_PER_SEC, clk->shift); - clk->max_delta_ns = clockevent_delta2ns(0xffffffff, clk); - clk->min_delta_ns = clockevent_delta2ns(0xf, clk); + clk->irq = twd_ppi; + clk->cpumask = cpumask_of(cpu); + + clockevents_config_and_register(clk, twd_timer_rate, + 0xf, 0xffffffff); + enable_percpu_irq(clk->irq, 0); +} - /* Make sure our local interrupt controller has this enabled */ - local_irq_save(flags); - irq_to_desc(clk->irq)->status |= IRQ_NOPROBE; - get_irq_chip(clk->irq)->unmask(clk->irq); - local_irq_restore(flags); +static int twd_timer_cpu_notify(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + switch (action & ~CPU_TASKS_FROZEN) { + case CPU_STARTING: + twd_timer_setup(); + break; + case CPU_DYING: + twd_timer_stop(); + break; + } - clockevents_register_device(clk); + return NOTIFY_OK; } -#ifdef CONFIG_HOTPLUG_CPU -/* - * take a local timer down - */ -void twd_timer_stop(void) +static struct notifier_block twd_timer_cpu_nb = { + .notifier_call = twd_timer_cpu_notify, +}; + +static int __init twd_local_timer_common_register(struct device_node *np) { - __raw_writel(0, twd_base + TWD_TIMER_CONTROL); + int err; + + twd_evt = alloc_percpu(struct clock_event_device); + if (!twd_evt) { + err = -ENOMEM; + goto out_free; + } + + err = request_percpu_irq(twd_ppi, twd_handler, "twd", twd_evt); + if (err) { + pr_err("twd: can't register interrupt %d (%d)\n", twd_ppi, err); + goto out_free; + } + + err = register_cpu_notifier(&twd_timer_cpu_nb); + if (err) + goto out_irq; + + twd_get_clock(np); + + /* + * Immediately configure the timer on the boot CPU, unless we need + * jiffies to be incrementing to calibrate the rate in which case + * setup the timer in late_time_init. + */ + if (twd_timer_rate) + twd_timer_setup(); + else + late_time_init = twd_timer_setup; + + return 0; + +out_irq: + free_percpu_irq(twd_ppi, twd_evt); +out_free: + iounmap(twd_base); + twd_base = NULL; + free_percpu(twd_evt); + + return err; +} + +int __init twd_local_timer_register(struct twd_local_timer *tlt) +{ + if (twd_base || twd_evt) + return -EBUSY; + + twd_ppi = tlt->res[1].start; + + twd_base = ioremap(tlt->res[0].start, resource_size(&tlt->res[0])); + if (!twd_base) + return -ENOMEM; + + return twd_local_timer_common_register(NULL); +} + +#ifdef CONFIG_OF +static void __init twd_local_timer_of_register(struct device_node *np) +{ + int err; + + if (!is_smp() || !setup_max_cpus) + return; + + twd_ppi = irq_of_parse_and_map(np, 0); + if (!twd_ppi) { + err = -EINVAL; + goto out; + } + + twd_base = of_iomap(np, 0); + if (!twd_base) { + err = -ENOMEM; + goto out; + } + + err = twd_local_timer_common_register(np); + +out: + WARN(err, "twd_local_timer_of_register failed (%d)\n", err); } +CLOCKSOURCE_OF_DECLARE(arm_twd_a9, "arm,cortex-a9-twd-timer", twd_local_timer_of_register); +CLOCKSOURCE_OF_DECLARE(arm_twd_a5, "arm,cortex-a5-twd-timer", twd_local_timer_of_register); +CLOCKSOURCE_OF_DECLARE(arm_twd_11mp, "arm,arm11mp-twd-timer", twd_local_timer_of_register); #endif diff --git a/arch/arm/kernel/stacktrace.c b/arch/arm/kernel/stacktrace.c index c2e112e1a05..f065eb05d25 100644 --- a/arch/arm/kernel/stacktrace.c +++ b/arch/arm/kernel/stacktrace.c @@ -1,8 +1,9 @@ -#include <linux/module.h> +#include <linux/export.h> #include <linux/sched.h> #include <linux/stacktrace.h> #include <asm/stacktrace.h> +#include <asm/traps.h> #if defined(CONFIG_FRAME_POINTER) && !defined(CONFIG_ARM_UNWIND) /* @@ -31,7 +32,7 @@ int notrace unwind_frame(struct stackframe *frame) high = ALIGN(low, THREAD_SIZE); /* check current frame pointer is within bounds */ - if (fp < (low + 12) || fp + 4 >= high) + if (fp < low + 12 || fp > high - 4) return -EINVAL; /* restore the registers from the stack frame */ @@ -61,6 +62,7 @@ EXPORT_SYMBOL(walk_stackframe); #ifdef CONFIG_STACKTRACE struct stack_trace_data { struct stack_trace *trace; + unsigned long last_pc; unsigned int no_sched_functions; unsigned int skip; }; @@ -69,6 +71,7 @@ static int save_trace(struct stackframe *frame, void *d) { struct stack_trace_data *data = d; struct stack_trace *trace = data->trace; + struct pt_regs *regs; unsigned long addr = frame->pc; if (data->no_sched_functions && in_sched_functions(addr)) @@ -80,26 +83,51 @@ static int save_trace(struct stackframe *frame, void *d) trace->entries[trace->nr_entries++] = addr; + if (trace->nr_entries >= trace->max_entries) + return 1; + + /* + * in_exception_text() is designed to test if the PC is one of + * the functions which has an exception stack above it, but + * unfortunately what is in frame->pc is the return LR value, + * not the saved PC value. So, we need to track the previous + * frame PC value when doing this. + */ + addr = data->last_pc; + data->last_pc = frame->pc; + if (!in_exception_text(addr)) + return 0; + + regs = (struct pt_regs *)frame->sp; + + trace->entries[trace->nr_entries++] = regs->ARM_pc; + return trace->nr_entries >= trace->max_entries; } -void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) +/* This must be noinline to so that our skip calculation works correctly */ +static noinline void __save_stack_trace(struct task_struct *tsk, + struct stack_trace *trace, unsigned int nosched) { struct stack_trace_data data; struct stackframe frame; data.trace = trace; + data.last_pc = ULONG_MAX; data.skip = trace->skip; + data.no_sched_functions = nosched; if (tsk != current) { #ifdef CONFIG_SMP /* - * What guarantees do we have here that 'tsk' - * is not running on another CPU? + * What guarantees do we have here that 'tsk' is not + * running on another CPU? For now, ignore it as we + * can't guarantee we won't explode. */ - BUG(); + if (trace->nr_entries < trace->max_entries) + trace->entries[trace->nr_entries++] = ULONG_MAX; + return; #else - data.no_sched_functions = 1; frame.fp = thread_saved_fp(tsk); frame.sp = thread_saved_sp(tsk); frame.lr = 0; /* recovered from the stack */ @@ -108,11 +136,12 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) } else { register unsigned long current_sp asm ("sp"); - data.no_sched_functions = 0; + /* We don't want this function nor the caller */ + data.skip += 2; frame.fp = (unsigned long)__builtin_frame_address(0); frame.sp = current_sp; frame.lr = (unsigned long)__builtin_return_address(0); - frame.pc = (unsigned long)save_stack_trace_tsk; + frame.pc = (unsigned long)__save_stack_trace; } walk_stackframe(&frame, save_trace, &data); @@ -120,9 +149,33 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) trace->entries[trace->nr_entries++] = ULONG_MAX; } +void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace) +{ + struct stack_trace_data data; + struct stackframe frame; + + data.trace = trace; + data.skip = trace->skip; + data.no_sched_functions = 0; + + frame.fp = regs->ARM_fp; + frame.sp = regs->ARM_sp; + frame.lr = regs->ARM_lr; + frame.pc = regs->ARM_pc; + + walk_stackframe(&frame, save_trace, &data); + if (trace->nr_entries < trace->max_entries) + trace->entries[trace->nr_entries++] = ULONG_MAX; +} + +void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) +{ + __save_stack_trace(tsk, trace, 1); +} + void save_stack_trace(struct stack_trace *trace) { - save_stack_trace_tsk(current, trace); + __save_stack_trace(current, trace, 0); } EXPORT_SYMBOL_GPL(save_stack_trace); #endif diff --git a/arch/arm/kernel/suspend.c b/arch/arm/kernel/suspend.c new file mode 100644 index 00000000000..2835d35234c --- /dev/null +++ b/arch/arm/kernel/suspend.c @@ -0,0 +1,106 @@ +#include <linux/init.h> +#include <linux/slab.h> + +#include <asm/cacheflush.h> +#include <asm/idmap.h> +#include <asm/pgalloc.h> +#include <asm/pgtable.h> +#include <asm/memory.h> +#include <asm/smp_plat.h> +#include <asm/suspend.h> +#include <asm/tlbflush.h> + +extern int __cpu_suspend(unsigned long, int (*)(unsigned long), u32 cpuid); +extern void cpu_resume_mmu(void); + +#ifdef CONFIG_MMU +/* + * Hide the first two arguments to __cpu_suspend - these are an implementation + * detail which platform code shouldn't have to know about. + */ +int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) +{ + struct mm_struct *mm = current->active_mm; + u32 __mpidr = cpu_logical_map(smp_processor_id()); + int ret; + + if (!idmap_pgd) + return -EINVAL; + + /* + * Provide a temporary page table with an identity mapping for + * the MMU-enable code, required for resuming. On successful + * resume (indicated by a zero return code), we need to switch + * back to the correct page tables. + */ + ret = __cpu_suspend(arg, fn, __mpidr); + if (ret == 0) { + cpu_switch_mm(mm->pgd, mm); + local_flush_bp_all(); + local_flush_tlb_all(); + } + + return ret; +} +#else +int cpu_suspend(unsigned long arg, int (*fn)(unsigned long)) +{ + u32 __mpidr = cpu_logical_map(smp_processor_id()); + return __cpu_suspend(arg, fn, __mpidr); +} +#define idmap_pgd NULL +#endif + +/* + * This is called by __cpu_suspend() to save the state, and do whatever + * flushing is required to ensure that when the CPU goes to sleep we have + * the necessary data available when the caches are not searched. + */ +void __cpu_suspend_save(u32 *ptr, u32 ptrsz, u32 sp, u32 *save_ptr) +{ + u32 *ctx = ptr; + + *save_ptr = virt_to_phys(ptr); + + /* This must correspond to the LDM in cpu_resume() assembly */ + *ptr++ = virt_to_phys(idmap_pgd); + *ptr++ = sp; + *ptr++ = virt_to_phys(cpu_do_resume); + + cpu_do_suspend(ptr); + + flush_cache_louis(); + + /* + * flush_cache_louis does not guarantee that + * save_ptr and ptr are cleaned to main memory, + * just up to the Level of Unification Inner Shareable. + * Since the context pointer and context itself + * are to be retrieved with the MMU off that + * data must be cleaned from all cache levels + * to main memory using "area" cache primitives. + */ + __cpuc_flush_dcache_area(ctx, ptrsz); + __cpuc_flush_dcache_area(save_ptr, sizeof(*save_ptr)); + + outer_clean_range(*save_ptr, *save_ptr + ptrsz); + outer_clean_range(virt_to_phys(save_ptr), + virt_to_phys(save_ptr) + sizeof(*save_ptr)); +} + +extern struct sleep_save_sp sleep_save_sp; + +static int cpu_suspend_alloc_sp(void) +{ + void *ctx_ptr; + /* ctx_ptr is an array of physical addresses */ + ctx_ptr = kcalloc(mpidr_hash_size(), sizeof(u32), GFP_KERNEL); + + if (WARN_ON(!ctx_ptr)) + return -ENOMEM; + sleep_save_sp.save_ptr_stash = ctx_ptr; + sleep_save_sp.save_ptr_stash_phys = virt_to_phys(ctx_ptr); + sync_cache_w(&sleep_save_sp); + return 0; +} +early_initcall(cpu_suspend_alloc_sp); diff --git a/arch/arm/kernel/swp_emulate.c b/arch/arm/kernel/swp_emulate.c new file mode 100644 index 00000000000..b1b89882b11 --- /dev/null +++ b/arch/arm/kernel/swp_emulate.c @@ -0,0 +1,280 @@ +/* + * linux/arch/arm/kernel/swp_emulate.c + * + * Copyright (C) 2009 ARM Limited + * __user_* functions adapted from include/asm/uaccess.h + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Implements emulation of the SWP/SWPB instructions using load-exclusive and + * store-exclusive for processors that have them disabled (or future ones that + * might not implement them). + * + * Syntax of SWP{B} instruction: SWP{B}<c> <Rt>, <Rt2>, [<Rn>] + * Where: Rt = destination + * Rt2 = source + * Rn = address + */ + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/proc_fs.h> +#include <linux/seq_file.h> +#include <linux/sched.h> +#include <linux/syscalls.h> +#include <linux/perf_event.h> + +#include <asm/opcodes.h> +#include <asm/traps.h> +#include <asm/uaccess.h> + +/* + * Error-checking SWP macros implemented using ldrex{b}/strex{b} + */ +#define __user_swpX_asm(data, addr, res, temp, B) \ + __asm__ __volatile__( \ + " mov %2, %1\n" \ + "0: ldrex"B" %1, [%3]\n" \ + "1: strex"B" %0, %2, [%3]\n" \ + " cmp %0, #0\n" \ + " movne %0, %4\n" \ + "2:\n" \ + " .section .fixup,\"ax\"\n" \ + " .align 2\n" \ + "3: mov %0, %5\n" \ + " b 2b\n" \ + " .previous\n" \ + " .section __ex_table,\"a\"\n" \ + " .align 3\n" \ + " .long 0b, 3b\n" \ + " .long 1b, 3b\n" \ + " .previous" \ + : "=&r" (res), "+r" (data), "=&r" (temp) \ + : "r" (addr), "i" (-EAGAIN), "i" (-EFAULT) \ + : "cc", "memory") + +#define __user_swp_asm(data, addr, res, temp) \ + __user_swpX_asm(data, addr, res, temp, "") +#define __user_swpb_asm(data, addr, res, temp) \ + __user_swpX_asm(data, addr, res, temp, "b") + +/* + * Macros/defines for extracting register numbers from instruction. + */ +#define EXTRACT_REG_NUM(instruction, offset) \ + (((instruction) & (0xf << (offset))) >> (offset)) +#define RN_OFFSET 16 +#define RT_OFFSET 12 +#define RT2_OFFSET 0 +/* + * Bit 22 of the instruction encoding distinguishes between + * the SWP and SWPB variants (bit set means SWPB). + */ +#define TYPE_SWPB (1 << 22) + +static unsigned long swpcounter; +static unsigned long swpbcounter; +static unsigned long abtcounter; +static pid_t previous_pid; + +#ifdef CONFIG_PROC_FS +static int proc_status_show(struct seq_file *m, void *v) +{ + seq_printf(m, "Emulated SWP:\t\t%lu\n", swpcounter); + seq_printf(m, "Emulated SWPB:\t\t%lu\n", swpbcounter); + seq_printf(m, "Aborted SWP{B}:\t\t%lu\n", abtcounter); + if (previous_pid != 0) + seq_printf(m, "Last process:\t\t%d\n", previous_pid); + return 0; +} + +static int proc_status_open(struct inode *inode, struct file *file) +{ + return single_open(file, proc_status_show, PDE_DATA(inode)); +} + +static const struct file_operations proc_status_fops = { + .open = proc_status_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; +#endif + +/* + * Set up process info to signal segmentation fault - called on access error. + */ +static void set_segfault(struct pt_regs *regs, unsigned long addr) +{ + siginfo_t info; + + down_read(¤t->mm->mmap_sem); + if (find_vma(current->mm, addr) == NULL) + info.si_code = SEGV_MAPERR; + else + info.si_code = SEGV_ACCERR; + up_read(¤t->mm->mmap_sem); + + info.si_signo = SIGSEGV; + info.si_errno = 0; + info.si_addr = (void *) instruction_pointer(regs); + + pr_debug("SWP{B} emulation: access caused memory abort!\n"); + arm_notify_die("Illegal memory access", regs, &info, 0, 0); + + abtcounter++; +} + +static int emulate_swpX(unsigned int address, unsigned int *data, + unsigned int type) +{ + unsigned int res = 0; + + if ((type != TYPE_SWPB) && (address & 0x3)) { + /* SWP to unaligned address not permitted */ + pr_debug("SWP instruction on unaligned pointer!\n"); + return -EFAULT; + } + + while (1) { + unsigned long temp; + + /* + * Barrier required between accessing protected resource and + * releasing a lock for it. Legacy code might not have done + * this, and we cannot determine that this is not the case + * being emulated, so insert always. + */ + smp_mb(); + + if (type == TYPE_SWPB) + __user_swpb_asm(*data, address, res, temp); + else + __user_swp_asm(*data, address, res, temp); + + if (likely(res != -EAGAIN) || signal_pending(current)) + break; + + cond_resched(); + } + + if (res == 0) { + /* + * Barrier also required between acquiring a lock for a + * protected resource and accessing the resource. Inserted for + * same reason as above. + */ + smp_mb(); + + if (type == TYPE_SWPB) + swpbcounter++; + else + swpcounter++; + } + + return res; +} + +/* + * swp_handler logs the id of calling process, dissects the instruction, sanity + * checks the memory location, calls emulate_swpX for the actual operation and + * deals with fixup/error handling before returning + */ +static int swp_handler(struct pt_regs *regs, unsigned int instr) +{ + unsigned int address, destreg, data, type; + unsigned int res = 0; + + perf_sw_event(PERF_COUNT_SW_EMULATION_FAULTS, 1, regs, regs->ARM_pc); + + res = arm_check_condition(instr, regs->ARM_cpsr); + switch (res) { + case ARM_OPCODE_CONDTEST_PASS: + break; + case ARM_OPCODE_CONDTEST_FAIL: + /* Condition failed - return to next instruction */ + regs->ARM_pc += 4; + return 0; + case ARM_OPCODE_CONDTEST_UNCOND: + /* If unconditional encoding - not a SWP, undef */ + return -EFAULT; + default: + return -EINVAL; + } + + if (current->pid != previous_pid) { + pr_debug("\"%s\" (%ld) uses deprecated SWP{B} instruction\n", + current->comm, (unsigned long)current->pid); + previous_pid = current->pid; + } + + address = regs->uregs[EXTRACT_REG_NUM(instr, RN_OFFSET)]; + data = regs->uregs[EXTRACT_REG_NUM(instr, RT2_OFFSET)]; + destreg = EXTRACT_REG_NUM(instr, RT_OFFSET); + + type = instr & TYPE_SWPB; + + pr_debug("addr in r%d->0x%08x, dest is r%d, source in r%d->0x%08x)\n", + EXTRACT_REG_NUM(instr, RN_OFFSET), address, + destreg, EXTRACT_REG_NUM(instr, RT2_OFFSET), data); + + /* Check access in reasonable access range for both SWP and SWPB */ + if (!access_ok(VERIFY_WRITE, (address & ~3), 4)) { + pr_debug("SWP{B} emulation: access to %p not allowed!\n", + (void *)address); + res = -EFAULT; + } else { + res = emulate_swpX(address, &data, type); + } + + if (res == 0) { + /* + * On successful emulation, revert the adjustment to the PC + * made in kernel/traps.c in order to resume execution at the + * instruction following the SWP{B}. + */ + regs->ARM_pc += 4; + regs->uregs[destreg] = data; + } else if (res == -EFAULT) { + /* + * Memory errors do not mean emulation failed. + * Set up signal info to return SEGV, then return OK + */ + set_segfault(regs, address); + } + + return 0; +} + +/* + * Only emulate SWP/SWPB executed in ARM state/User mode. + * The kernel must be SWP free and SWP{B} does not exist in Thumb/ThumbEE. + */ +static struct undef_hook swp_hook = { + .instr_mask = 0x0fb00ff0, + .instr_val = 0x01000090, + .cpsr_mask = MODE_MASK | PSR_T_BIT | PSR_J_BIT, + .cpsr_val = USR_MODE, + .fn = swp_handler +}; + +/* + * Register handler and create status file in /proc/cpu + * Invoked as late_initcall, since not needed before init spawned. + */ +static int __init swp_emulation_init(void) +{ +#ifdef CONFIG_PROC_FS + if (!proc_create("cpu/swp_emulation", S_IRUGO, NULL, &proc_status_fops)) + return -ENOMEM; +#endif /* CONFIG_PROC_FS */ + + printk(KERN_NOTICE "Registering SWP/SWPB emulation handler\n"); + register_undef_hook(&swp_hook); + + return 0; +} + +late_initcall(swp_emulation_init); diff --git a/arch/arm/kernel/sys_arm.c b/arch/arm/kernel/sys_arm.c index 62e7c61d034..3151f5623d0 100644 --- a/arch/arm/kernel/sys_arm.c +++ b/arch/arm/kernel/sys_arm.c @@ -12,7 +12,7 @@ * have a non-standard calling sequence on the Linux/arm * platform. */ -#include <linux/module.h> +#include <linux/export.h> #include <linux/errno.h> #include <linux/sched.h> #include <linux/mm.h> @@ -28,100 +28,6 @@ #include <linux/uaccess.h> #include <linux/slab.h> -/* Fork a new task - this creates a new program thread. - * This is called indirectly via a small wrapper - */ -asmlinkage int sys_fork(struct pt_regs *regs) -{ -#ifdef CONFIG_MMU - return do_fork(SIGCHLD, regs->ARM_sp, regs, 0, NULL, NULL); -#else - /* can not support in nommu mode */ - return(-EINVAL); -#endif -} - -/* Clone a task - this clones the calling program thread. - * This is called indirectly via a small wrapper - */ -asmlinkage int sys_clone(unsigned long clone_flags, unsigned long newsp, - int __user *parent_tidptr, int tls_val, - int __user *child_tidptr, struct pt_regs *regs) -{ - if (!newsp) - newsp = regs->ARM_sp; - - return do_fork(clone_flags, newsp, regs, 0, parent_tidptr, child_tidptr); -} - -asmlinkage int sys_vfork(struct pt_regs *regs) -{ - return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->ARM_sp, regs, 0, NULL, NULL); -} - -/* sys_execve() executes a new program. - * This is called indirectly via a small wrapper - */ -asmlinkage int sys_execve(const char __user *filenamei, - const char __user *const __user *argv, - const char __user *const __user *envp, struct pt_regs *regs) -{ - int error; - char * filename; - - filename = getname(filenamei); - error = PTR_ERR(filename); - if (IS_ERR(filename)) - goto out; - error = do_execve(filename, argv, envp, regs); - putname(filename); -out: - return error; -} - -int kernel_execve(const char *filename, - const char *const argv[], - const char *const envp[]) -{ - struct pt_regs regs; - int ret; - - memset(®s, 0, sizeof(struct pt_regs)); - ret = do_execve(filename, - (const char __user *const __user *)argv, - (const char __user *const __user *)envp, ®s); - if (ret < 0) - goto out; - - /* - * Save argc to the register structure for userspace. - */ - regs.ARM_r0 = ret; - - /* - * We were successful. We won't be returning to our caller, but - * instead to user space by manipulating the kernel stack. - */ - asm( "add r0, %0, %1\n\t" - "mov r1, %2\n\t" - "mov r2, %3\n\t" - "bl memmove\n\t" /* copy regs to top of stack */ - "mov r8, #0\n\t" /* not a syscall */ - "mov r9, %0\n\t" /* thread structure */ - "mov sp, r0\n\t" /* reposition stack pointer */ - "b ret_to_user" - : - : "r" (current_thread_info()), - "Ir" (THREAD_START_SP - sizeof(regs)), - "r" (®s), - "Ir" (sizeof(regs)) - : "r0", "r1", "r2", "r3", "ip", "lr", "memory"); - - out: - return ret; -} -EXPORT_SYMBOL(kernel_execve); - /* * Since loff_t is a 64 bit type we avoid a lot of ABI hassle * with a different argument ordering. diff --git a/arch/arm/kernel/sys_oabi-compat.c b/arch/arm/kernel/sys_oabi-compat.c index 4ad8da15ef2..e90a3148f38 100644 --- a/arch/arm/kernel/sys_oabi-compat.c +++ b/arch/arm/kernel/sys_oabi-compat.c @@ -124,8 +124,8 @@ static long cp_oldabi_stat64(struct kstat *stat, tmp.__st_ino = stat->ino; tmp.st_mode = stat->mode; tmp.st_nlink = stat->nlink; - tmp.st_uid = stat->uid; - tmp.st_gid = stat->gid; + tmp.st_uid = from_kuid_munged(current_user_ns(), stat->uid); + tmp.st_gid = from_kgid_munged(current_user_ns(), stat->gid); tmp.st_rdev = huge_encode_dev(stat->rdev); tmp.st_size = stat->size; tmp.st_blocks = stat->blocks; @@ -203,6 +203,9 @@ asmlinkage long sys_oabi_fcntl64(unsigned int fd, unsigned int cmd, int ret; switch (cmd) { + case F_OFD_GETLK: + case F_OFD_SETLK: + case F_OFD_SETLKW: case F_GETLK64: case F_SETLK64: case F_SETLKW64: @@ -311,7 +314,7 @@ asmlinkage long sys_oabi_semtimedop(int semid, long err; int i; - if (nsops < 1) + if (nsops < 1 || nsops > SEMOPM) return -EINVAL; sops = kmalloc(sizeof(*sops) * nsops, GFP_KERNEL); if (!sops) diff --git a/arch/arm/kernel/tcm.c b/arch/arm/kernel/tcm.c index 26685c2f7a4..7a3be1d4d0b 100644 --- a/arch/arm/kernel/tcm.c +++ b/arch/arm/kernel/tcm.c @@ -15,10 +15,12 @@ #include <linux/string.h> /* memcpy */ #include <asm/cputype.h> #include <asm/mach/map.h> -#include <mach/memory.h> -#include "tcm.h" +#include <asm/memory.h> +#include <asm/system_info.h> static struct gen_pool *tcm_pool; +static bool dtcm_present; +static bool itcm_present; /* TCM section definitions from the linker */ extern char __itcm_start, __sitcm_text, __eitcm_text; @@ -50,7 +52,7 @@ static struct map_desc dtcm_iomap[] __initdata = { .virtual = DTCM_OFFSET, .pfn = __phys_to_pfn(DTCM_OFFSET), .length = 0, - .type = MT_MEMORY_DTCM + .type = MT_MEMORY_RW_DTCM } }; @@ -59,7 +61,7 @@ static struct map_desc itcm_iomap[] __initdata = { .virtual = ITCM_OFFSET, .pfn = __phys_to_pfn(ITCM_OFFSET), .length = 0, - .type = MT_MEMORY_ITCM + .type = MT_MEMORY_RWX_ITCM, } }; @@ -90,6 +92,18 @@ void tcm_free(void *addr, size_t len) } EXPORT_SYMBOL(tcm_free); +bool tcm_dtcm_present(void) +{ + return dtcm_present; +} +EXPORT_SYMBOL(tcm_dtcm_present); + +bool tcm_itcm_present(void) +{ + return itcm_present; +} +EXPORT_SYMBOL(tcm_itcm_present); + static int __init setup_tcm_bank(u8 type, u8 bank, u8 banks, u32 *offset) { @@ -134,6 +148,10 @@ static int __init setup_tcm_bank(u8 type, u8 bank, u8 banks, (tcm_region & 1) ? "" : "not "); } + /* Not much fun you can do with a size 0 bank */ + if (tcm_size == 0) + return 0; + /* Force move the TCM bank to where we want it, enable */ tcm_region = *offset | (tcm_region & 0x00000ffeU) | 1; @@ -162,15 +180,39 @@ static int __init setup_tcm_bank(u8 type, u8 bank, u8 banks, */ void __init tcm_init(void) { - u32 tcm_status = read_cpuid_tcmstatus(); - u8 dtcm_banks = (tcm_status >> 16) & 0x03; - u8 itcm_banks = (tcm_status & 0x03); + u32 tcm_status; + u8 dtcm_banks; + u8 itcm_banks; + size_t dtcm_code_sz = &__edtcm_data - &__sdtcm_data; + size_t itcm_code_sz = &__eitcm_text - &__sitcm_text; char *start; char *end; char *ram; int ret; int i; + /* + * Prior to ARMv5 there is no TCM, and trying to read the status + * register will hang the processor. + */ + if (cpu_architecture() < CPU_ARCH_ARMv5) { + if (dtcm_code_sz || itcm_code_sz) + pr_info("CPU TCM: %u bytes of DTCM and %u bytes of " + "ITCM code compiled in, but no TCM present " + "in pre-v5 CPU\n", dtcm_code_sz, itcm_code_sz); + return; + } + + tcm_status = read_cpuid_tcmstatus(); + dtcm_banks = (tcm_status >> 16) & 0x03; + itcm_banks = (tcm_status & 0x03); + + /* Values greater than 2 for D/ITCM banks are "reserved" */ + if (dtcm_banks > 2) + dtcm_banks = 0; + if (itcm_banks > 2) + itcm_banks = 0; + /* Setup DTCM if present */ if (dtcm_banks > 0) { for (i = 0; i < dtcm_banks; i++) { @@ -178,6 +220,13 @@ void __init tcm_init(void) if (ret) return; } + /* This means you compiled more code than fits into DTCM */ + if (dtcm_code_sz > (dtcm_end - DTCM_OFFSET)) { + pr_info("CPU DTCM: %u bytes of code compiled to " + "DTCM but only %lu bytes of DTCM present\n", + dtcm_code_sz, (dtcm_end - DTCM_OFFSET)); + goto no_dtcm; + } dtcm_res.end = dtcm_end - 1; request_resource(&iomem_resource, &dtcm_res); dtcm_iomap[0].length = dtcm_end - DTCM_OFFSET; @@ -186,12 +235,16 @@ void __init tcm_init(void) start = &__sdtcm_data; end = &__edtcm_data; ram = &__dtcm_start; - /* This means you compiled more code than fits into DTCM */ - BUG_ON((end - start) > (dtcm_end - DTCM_OFFSET)); - memcpy(start, ram, (end-start)); - pr_debug("CPU DTCM: copied data from %p - %p\n", start, end); + memcpy(start, ram, dtcm_code_sz); + pr_debug("CPU DTCM: copied data from %p - %p\n", + start, end); + dtcm_present = true; + } else if (dtcm_code_sz) { + pr_info("CPU DTCM: %u bytes of code compiled to DTCM but no " + "DTCM banks present in CPU\n", dtcm_code_sz); } +no_dtcm: /* Setup ITCM if present */ if (itcm_banks > 0) { for (i = 0; i < itcm_banks; i++) { @@ -199,6 +252,13 @@ void __init tcm_init(void) if (ret) return; } + /* This means you compiled more code than fits into ITCM */ + if (itcm_code_sz > (itcm_end - ITCM_OFFSET)) { + pr_info("CPU ITCM: %u bytes of code compiled to " + "ITCM but only %lu bytes of ITCM present\n", + itcm_code_sz, (itcm_end - ITCM_OFFSET)); + return; + } itcm_res.end = itcm_end - 1; request_resource(&iomem_resource, &itcm_res); itcm_iomap[0].length = itcm_end - ITCM_OFFSET; @@ -207,10 +267,13 @@ void __init tcm_init(void) start = &__sitcm_text; end = &__eitcm_text; ram = &__itcm_start; - /* This means you compiled more code than fits into ITCM */ - BUG_ON((end - start) > (itcm_end - ITCM_OFFSET)); - memcpy(start, ram, (end-start)); - pr_debug("CPU ITCM: copied code from %p - %p\n", start, end); + memcpy(start, ram, itcm_code_sz); + pr_debug("CPU ITCM: copied code from %p - %p\n", + start, end); + itcm_present = true; + } else if (itcm_code_sz) { + pr_info("CPU ITCM: %u bytes of code compiled to ITCM but no " + "ITCM banks present in CPU\n", itcm_code_sz); } } @@ -221,7 +284,6 @@ void __init tcm_init(void) */ static int __init setup_tcm_pool(void) { - u32 tcm_status = read_cpuid_tcmstatus(); u32 dtcm_pool_start = (u32) &__edtcm_data; u32 itcm_pool_start = (u32) &__eitcm_text; int ret; @@ -236,7 +298,7 @@ static int __init setup_tcm_pool(void) pr_debug("Setting up TCM memory pool\n"); /* Add the rest of DTCM to the TCM pool */ - if (tcm_status & (0x03 << 16)) { + if (dtcm_present) { if (dtcm_pool_start < dtcm_end) { ret = gen_pool_add(tcm_pool, dtcm_pool_start, dtcm_end - dtcm_pool_start, -1); @@ -253,7 +315,7 @@ static int __init setup_tcm_pool(void) } /* Add the rest of ITCM to the TCM pool */ - if (tcm_status & 0x03) { + if (itcm_present) { if (itcm_pool_start < itcm_end) { ret = gen_pool_add(tcm_pool, itcm_pool_start, itcm_end - itcm_pool_start, -1); diff --git a/arch/arm/kernel/tcm.h b/arch/arm/kernel/tcm.h deleted file mode 100644 index 8015ad434a4..00000000000 --- a/arch/arm/kernel/tcm.h +++ /dev/null @@ -1,17 +0,0 @@ -/* - * Copyright (C) 2008-2009 ST-Ericsson AB - * License terms: GNU General Public License (GPL) version 2 - * TCM memory handling for ARM systems - * - * Author: Linus Walleij <linus.walleij@stericsson.com> - * Author: Rickard Andersson <rickard.andersson@stericsson.com> - */ - -#ifdef CONFIG_HAVE_TCM -void __init tcm_init(void); -#else -/* No TCM support, just blank inlines to be optimized out */ -inline void tcm_init(void) -{ -} -#endif diff --git a/arch/arm/kernel/thumbee.c b/arch/arm/kernel/thumbee.c index 9cb7aaca159..7b8403b7666 100644 --- a/arch/arm/kernel/thumbee.c +++ b/arch/arm/kernel/thumbee.c @@ -20,6 +20,8 @@ #include <linux/kernel.h> #include <linux/init.h> +#include <asm/cputype.h> +#include <asm/system_info.h> #include <asm/thread_notify.h> /* @@ -66,8 +68,7 @@ static int __init thumbee_init(void) if (cpu_arch < CPU_ARCH_ARMv7) return 0; - /* processor feature register 0 */ - asm("mrc p15, 0, %0, c0, c1, 0\n" : "=r" (pfr0)); + pfr0 = read_cpuid_ext(CPUID_EXT_PFR0); if ((pfr0 & 0x0000f000) != 0x00001000) return 0; diff --git a/arch/arm/kernel/time.c b/arch/arm/kernel/time.c index 38c261f9951..829a96d4a17 100644 --- a/arch/arm/kernel/time.c +++ b/arch/arm/kernel/time.c @@ -11,39 +11,32 @@ * This file contains the ARM-specific time handling details: * reading the RTC at bootup, etc... */ -#include <linux/module.h> -#include <linux/kernel.h> -#include <linux/interrupt.h> -#include <linux/time.h> +#include <linux/clk-provider.h> +#include <linux/clocksource.h> +#include <linux/errno.h> +#include <linux/export.h> #include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/irq.h> +#include <linux/kernel.h> +#include <linux/profile.h> #include <linux/sched.h> +#include <linux/sched_clock.h> #include <linux/smp.h> +#include <linux/time.h> #include <linux/timex.h> -#include <linux/errno.h> -#include <linux/profile.h> -#include <linux/sysdev.h> #include <linux/timer.h> -#include <linux/irq.h> - -#include <linux/mc146818rtc.h> -#include <asm/leds.h> -#include <asm/thread_info.h> -#include <asm/stacktrace.h> +#include <asm/mach/arch.h> #include <asm/mach/time.h> +#include <asm/stacktrace.h> +#include <asm/thread_info.h> -/* - * Our system timer. - */ -struct sys_timer *system_timer; - -#if defined(CONFIG_RTC_DRV_CMOS) || defined(CONFIG_RTC_DRV_CMOS_MODULE) +#if defined(CONFIG_RTC_DRV_CMOS) || defined(CONFIG_RTC_DRV_CMOS_MODULE) || \ + defined(CONFIG_NVRAM) || defined(CONFIG_NVRAM_MODULE) /* this needs a better home */ DEFINE_SPINLOCK(rtc_lock); - -#ifdef CONFIG_RTC_DRV_CMOS_MODULE EXPORT_SYMBOL(rtc_lock); -#endif #endif /* pc-style 'CMOS' RTC support */ /* change this if you have some constant time drift */ @@ -72,31 +65,6 @@ unsigned long profile_pc(struct pt_regs *regs) EXPORT_SYMBOL(profile_pc); #endif -#ifdef CONFIG_ARCH_USES_GETTIMEOFFSET -u32 arch_gettimeoffset(void) -{ - if (system_timer->offset != NULL) - return system_timer->offset() * 1000; - - return 0; -} -#endif /* CONFIG_ARCH_USES_GETTIMEOFFSET */ - -#ifdef CONFIG_LEDS_TIMER -static inline void do_leds(void) -{ - static unsigned int count = HZ/2; - - if (--count == 0) { - count = HZ/2; - leds_event(led_timer); - } -} -#else -#define do_leds() -#endif - - #ifndef CONFIG_GENERIC_CLOCKEVENTS /* * Kernel system timer support. @@ -104,62 +72,57 @@ static inline void do_leds(void) void timer_tick(void) { profile_tick(CPU_PROFILING); - do_leds(); - write_seqlock(&xtime_lock); - do_timer(1); - write_sequnlock(&xtime_lock); + xtime_update(1); #ifndef CONFIG_SMP update_process_times(user_mode(get_irq_regs())); #endif } #endif -#if defined(CONFIG_PM) && !defined(CONFIG_GENERIC_CLOCKEVENTS) -static int timer_suspend(struct sys_device *dev, pm_message_t state) +static void dummy_clock_access(struct timespec *ts) { - struct sys_timer *timer = container_of(dev, struct sys_timer, dev); + ts->tv_sec = 0; + ts->tv_nsec = 0; +} - if (timer->suspend != NULL) - timer->suspend(); +static clock_access_fn __read_persistent_clock = dummy_clock_access; +static clock_access_fn __read_boot_clock = dummy_clock_access;; - return 0; +void read_persistent_clock(struct timespec *ts) +{ + __read_persistent_clock(ts); } -static int timer_resume(struct sys_device *dev) +void read_boot_clock(struct timespec *ts) { - struct sys_timer *timer = container_of(dev, struct sys_timer, dev); - - if (timer->resume != NULL) - timer->resume(); - - return 0; + __read_boot_clock(ts); } -#else -#define timer_suspend NULL -#define timer_resume NULL -#endif -static struct sysdev_class timer_sysclass = { - .name = "timer", - .suspend = timer_suspend, - .resume = timer_resume, -}; - -static int __init timer_init_sysfs(void) +int __init register_persistent_clock(clock_access_fn read_boot, + clock_access_fn read_persistent) { - int ret = sysdev_class_register(&timer_sysclass); - if (ret == 0) { - system_timer->dev.cls = &timer_sysclass; - ret = sysdev_register(&system_timer->dev); + /* Only allow the clockaccess functions to be registered once */ + if (__read_persistent_clock == dummy_clock_access && + __read_boot_clock == dummy_clock_access) { + if (read_boot) + __read_boot_clock = read_boot; + if (read_persistent) + __read_persistent_clock = read_persistent; + + return 0; } - return ret; + return -EINVAL; } -device_initcall(timer_init_sysfs); - void __init time_init(void) { - system_timer->init(); + if (machine_desc->init_time) { + machine_desc->init_time(); + } else { +#ifdef CONFIG_COMMON_CLK + of_clk_init(NULL); +#endif + clocksource_of_init(); + } } - diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c new file mode 100644 index 00000000000..e35d880f977 --- /dev/null +++ b/arch/arm/kernel/topology.c @@ -0,0 +1,318 @@ +/* + * arch/arm/kernel/topology.c + * + * Copyright (C) 2011 Linaro Limited. + * Written by: Vincent Guittot + * + * based on arch/sh/kernel/topology.c + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ + +#include <linux/cpu.h> +#include <linux/cpumask.h> +#include <linux/export.h> +#include <linux/init.h> +#include <linux/percpu.h> +#include <linux/node.h> +#include <linux/nodemask.h> +#include <linux/of.h> +#include <linux/sched.h> +#include <linux/slab.h> + +#include <asm/cputype.h> +#include <asm/topology.h> + +/* + * cpu capacity scale management + */ + +/* + * cpu capacity table + * This per cpu data structure describes the relative capacity of each core. + * On a heteregenous system, cores don't have the same computation capacity + * and we reflect that difference in the cpu_capacity field so the scheduler + * can take this difference into account during load balance. A per cpu + * structure is preferred because each CPU updates its own cpu_capacity field + * during the load balance except for idle cores. One idle core is selected + * to run the rebalance_domains for all idle cores and the cpu_capacity can be + * updated during this sequence. + */ +static DEFINE_PER_CPU(unsigned long, cpu_scale); + +unsigned long arch_scale_freq_capacity(struct sched_domain *sd, int cpu) +{ + return per_cpu(cpu_scale, cpu); +} + +static void set_capacity_scale(unsigned int cpu, unsigned long capacity) +{ + per_cpu(cpu_scale, cpu) = capacity; +} + +#ifdef CONFIG_OF +struct cpu_efficiency { + const char *compatible; + unsigned long efficiency; +}; + +/* + * Table of relative efficiency of each processors + * The efficiency value must fit in 20bit and the final + * cpu_scale value must be in the range + * 0 < cpu_scale < 3*SCHED_CAPACITY_SCALE/2 + * in order to return at most 1 when DIV_ROUND_CLOSEST + * is used to compute the capacity of a CPU. + * Processors that are not defined in the table, + * use the default SCHED_CAPACITY_SCALE value for cpu_scale. + */ +static const struct cpu_efficiency table_efficiency[] = { + {"arm,cortex-a15", 3891}, + {"arm,cortex-a7", 2048}, + {NULL, }, +}; + +static unsigned long *__cpu_capacity; +#define cpu_capacity(cpu) __cpu_capacity[cpu] + +static unsigned long middle_capacity = 1; + +/* + * Iterate all CPUs' descriptor in DT and compute the efficiency + * (as per table_efficiency). Also calculate a middle efficiency + * as close as possible to (max{eff_i} - min{eff_i}) / 2 + * This is later used to scale the cpu_capacity field such that an + * 'average' CPU is of middle capacity. Also see the comments near + * table_efficiency[] and update_cpu_capacity(). + */ +static void __init parse_dt_topology(void) +{ + const struct cpu_efficiency *cpu_eff; + struct device_node *cn = NULL; + unsigned long min_capacity = ULONG_MAX; + unsigned long max_capacity = 0; + unsigned long capacity = 0; + int cpu = 0; + + __cpu_capacity = kcalloc(nr_cpu_ids, sizeof(*__cpu_capacity), + GFP_NOWAIT); + + for_each_possible_cpu(cpu) { + const u32 *rate; + int len; + + /* too early to use cpu->of_node */ + cn = of_get_cpu_node(cpu, NULL); + if (!cn) { + pr_err("missing device node for CPU %d\n", cpu); + continue; + } + + for (cpu_eff = table_efficiency; cpu_eff->compatible; cpu_eff++) + if (of_device_is_compatible(cn, cpu_eff->compatible)) + break; + + if (cpu_eff->compatible == NULL) + continue; + + rate = of_get_property(cn, "clock-frequency", &len); + if (!rate || len != 4) { + pr_err("%s missing clock-frequency property\n", + cn->full_name); + continue; + } + + capacity = ((be32_to_cpup(rate)) >> 20) * cpu_eff->efficiency; + + /* Save min capacity of the system */ + if (capacity < min_capacity) + min_capacity = capacity; + + /* Save max capacity of the system */ + if (capacity > max_capacity) + max_capacity = capacity; + + cpu_capacity(cpu) = capacity; + } + + /* If min and max capacities are equals, we bypass the update of the + * cpu_scale because all CPUs have the same capacity. Otherwise, we + * compute a middle_capacity factor that will ensure that the capacity + * of an 'average' CPU of the system will be as close as possible to + * SCHED_CAPACITY_SCALE, which is the default value, but with the + * constraint explained near table_efficiency[]. + */ + if (4*max_capacity < (3*(max_capacity + min_capacity))) + middle_capacity = (min_capacity + max_capacity) + >> (SCHED_CAPACITY_SHIFT+1); + else + middle_capacity = ((max_capacity / 3) + >> (SCHED_CAPACITY_SHIFT-1)) + 1; + +} + +/* + * Look for a customed capacity of a CPU in the cpu_capacity table during the + * boot. The update of all CPUs is in O(n^2) for heteregeneous system but the + * function returns directly for SMP system. + */ +static void update_cpu_capacity(unsigned int cpu) +{ + if (!cpu_capacity(cpu)) + return; + + set_capacity_scale(cpu, cpu_capacity(cpu) / middle_capacity); + + printk(KERN_INFO "CPU%u: update cpu_capacity %lu\n", + cpu, arch_scale_freq_capacity(NULL, cpu)); +} + +#else +static inline void parse_dt_topology(void) {} +static inline void update_cpu_capacity(unsigned int cpuid) {} +#endif + + /* + * cpu topology table + */ +struct cputopo_arm cpu_topology[NR_CPUS]; +EXPORT_SYMBOL_GPL(cpu_topology); + +const struct cpumask *cpu_coregroup_mask(int cpu) +{ + return &cpu_topology[cpu].core_sibling; +} + +/* + * The current assumption is that we can power gate each core independently. + * This will be superseded by DT binding once available. + */ +const struct cpumask *cpu_corepower_mask(int cpu) +{ + return &cpu_topology[cpu].thread_sibling; +} + +static void update_siblings_masks(unsigned int cpuid) +{ + struct cputopo_arm *cpu_topo, *cpuid_topo = &cpu_topology[cpuid]; + int cpu; + + /* update core and thread sibling masks */ + for_each_possible_cpu(cpu) { + cpu_topo = &cpu_topology[cpu]; + + if (cpuid_topo->socket_id != cpu_topo->socket_id) + continue; + + cpumask_set_cpu(cpuid, &cpu_topo->core_sibling); + if (cpu != cpuid) + cpumask_set_cpu(cpu, &cpuid_topo->core_sibling); + + if (cpuid_topo->core_id != cpu_topo->core_id) + continue; + + cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling); + if (cpu != cpuid) + cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling); + } + smp_wmb(); +} + +/* + * store_cpu_topology is called at boot when only one cpu is running + * and with the mutex cpu_hotplug.lock locked, when several cpus have booted, + * which prevents simultaneous write access to cpu_topology array + */ +void store_cpu_topology(unsigned int cpuid) +{ + struct cputopo_arm *cpuid_topo = &cpu_topology[cpuid]; + unsigned int mpidr; + + /* If the cpu topology has been already set, just return */ + if (cpuid_topo->core_id != -1) + return; + + mpidr = read_cpuid_mpidr(); + + /* create cpu topology mapping */ + if ((mpidr & MPIDR_SMP_BITMASK) == MPIDR_SMP_VALUE) { + /* + * This is a multiprocessor system + * multiprocessor format & multiprocessor mode field are set + */ + + if (mpidr & MPIDR_MT_BITMASK) { + /* core performance interdependency */ + cpuid_topo->thread_id = MPIDR_AFFINITY_LEVEL(mpidr, 0); + cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL(mpidr, 1); + cpuid_topo->socket_id = MPIDR_AFFINITY_LEVEL(mpidr, 2); + } else { + /* largely independent cores */ + cpuid_topo->thread_id = -1; + cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL(mpidr, 0); + cpuid_topo->socket_id = MPIDR_AFFINITY_LEVEL(mpidr, 1); + } + } else { + /* + * This is an uniprocessor system + * we are in multiprocessor format but uniprocessor system + * or in the old uniprocessor format + */ + cpuid_topo->thread_id = -1; + cpuid_topo->core_id = 0; + cpuid_topo->socket_id = -1; + } + + update_siblings_masks(cpuid); + + update_cpu_capacity(cpuid); + + printk(KERN_INFO "CPU%u: thread %d, cpu %d, socket %d, mpidr %x\n", + cpuid, cpu_topology[cpuid].thread_id, + cpu_topology[cpuid].core_id, + cpu_topology[cpuid].socket_id, mpidr); +} + +static inline int cpu_corepower_flags(void) +{ + return SD_SHARE_PKG_RESOURCES | SD_SHARE_POWERDOMAIN; +} + +static struct sched_domain_topology_level arm_topology[] = { +#ifdef CONFIG_SCHED_MC + { cpu_corepower_mask, cpu_corepower_flags, SD_INIT_NAME(GMC) }, + { cpu_coregroup_mask, cpu_core_flags, SD_INIT_NAME(MC) }, +#endif + { cpu_cpu_mask, SD_INIT_NAME(DIE) }, + { NULL, }, +}; + +/* + * init_cpu_topology is called at boot when only one cpu is running + * which prevent simultaneous write access to cpu_topology array + */ +void __init init_cpu_topology(void) +{ + unsigned int cpu; + + /* init core mask and capacity */ + for_each_possible_cpu(cpu) { + struct cputopo_arm *cpu_topo = &(cpu_topology[cpu]); + + cpu_topo->thread_id = -1; + cpu_topo->core_id = -1; + cpu_topo->socket_id = -1; + cpumask_clear(&cpu_topo->core_sibling); + cpumask_clear(&cpu_topo->thread_sibling); + + set_capacity_scale(cpu, SCHED_CAPACITY_SCALE); + } + smp_wmb(); + + parse_dt_topology(); + + /* Set scheduler topology descriptor */ + set_sched_topology(arm_topology); +} diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 446aee97436..abd2fc06773 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -21,21 +21,30 @@ #include <linux/kdebug.h> #include <linux/module.h> #include <linux/kexec.h> +#include <linux/bug.h> #include <linux/delay.h> #include <linux/init.h> +#include <linux/sched.h> -#include <asm/atomic.h> +#include <linux/atomic.h> #include <asm/cacheflush.h> -#include <asm/system.h> +#include <asm/exception.h> #include <asm/unistd.h> #include <asm/traps.h> #include <asm/unwind.h> #include <asm/tls.h> +#include <asm/system_misc.h> +#include <asm/opcodes.h> + +static const char *handler[]= { + "prefetch abort", + "data abort", + "address exception", + "interrupt", + "undefined instruction", +}; -#include "ptrace.h" -#include "signal.h" - -static const char *handler[]= { "prefetch abort", "data abort", "address exception", "interrupt" }; +void *vectors_page; #ifdef CONFIG_DEBUG_USER unsigned int user_debug; @@ -53,7 +62,7 @@ static void dump_mem(const char *, const char *, unsigned long, unsigned long); void dump_backtrace_entry(unsigned long where, unsigned long from, unsigned long frame) { #ifdef CONFIG_KALLSYMS - printk("[<%08lx>] (%pS) from [<%08lx>] (%pS)\n", where, (void *)where, from, (void *)from); + printk("[<%08lx>] (%ps) from [<%08lx>] (%pS)\n", where, (void *)where, from, (void *)from); #else printk("Function entered at [<%08lx>] from [<%08lx>]\n", where, from); #endif @@ -137,7 +146,7 @@ static void dump_instr(const char *lvl, struct pt_regs *regs) fs = get_fs(); set_fs(KERNEL_DS); - for (i = -4; i < 1; i++) { + for (i = -4; i < 1 + !!thumb; i++) { unsigned int val, bad; if (thumb) @@ -200,13 +209,6 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) } #endif -void dump_stack(void) -{ - dump_backtrace(NULL, NULL); -} - -EXPORT_SYMBOL(dump_stack); - void show_stack(struct task_struct *tsk, unsigned long *sp) { dump_backtrace(NULL, tsk); @@ -223,26 +225,30 @@ void show_stack(struct task_struct *tsk, unsigned long *sp) #else #define S_SMP "" #endif +#ifdef CONFIG_THUMB2_KERNEL +#define S_ISA " THUMB2" +#else +#define S_ISA " ARM" +#endif -static int __die(const char *str, int err, struct thread_info *thread, struct pt_regs *regs) +static int __die(const char *str, int err, struct pt_regs *regs) { - struct task_struct *tsk = thread->task; + struct task_struct *tsk = current; static int die_counter; int ret; - printk(KERN_EMERG "Internal error: %s: %x [#%d]" S_PREEMPT S_SMP "\n", - str, err, ++die_counter); - sysfs_printk_last_file(); + printk(KERN_EMERG "Internal error: %s: %x [#%d]" S_PREEMPT S_SMP + S_ISA "\n", str, err, ++die_counter); /* trap and error numbers are mostly meaningless on ARM */ ret = notify_die(DIE_OOPS, str, regs, err, tsk->thread.trap_no, SIGSEGV); if (ret == NOTIFY_STOP) - return ret; + return 1; print_modules(); __show_regs(regs); printk(KERN_EMERG "Process %.*s (pid: %d, stack limit = 0x%p)\n", - TASK_COMM_LEN, tsk->comm, task_pid_nr(tsk), thread + 1); + TASK_COMM_LEN, tsk->comm, task_pid_nr(tsk), end_of_stack(tsk)); if (!user_mode(regs) || in_interrupt()) { dump_mem(KERN_EMERG, "Stack: ", regs->ARM_sp, @@ -251,40 +257,77 @@ static int __die(const char *str, int err, struct thread_info *thread, struct pt dump_instr(KERN_EMERG, regs); } - return ret; + return 0; } -DEFINE_SPINLOCK(die_lock); +static arch_spinlock_t die_lock = __ARCH_SPIN_LOCK_UNLOCKED; +static int die_owner = -1; +static unsigned int die_nest_count; -/* - * This function is protected against re-entrancy. - */ -void die(const char *str, struct pt_regs *regs, int err) +static unsigned long oops_begin(void) { - struct thread_info *thread = current_thread_info(); - int ret; + int cpu; + unsigned long flags; oops_enter(); - spin_lock_irq(&die_lock); + /* racy, but better than risking deadlock. */ + raw_local_irq_save(flags); + cpu = smp_processor_id(); + if (!arch_spin_trylock(&die_lock)) { + if (cpu == die_owner) + /* nested oops. should stop eventually */; + else + arch_spin_lock(&die_lock); + } + die_nest_count++; + die_owner = cpu; console_verbose(); bust_spinlocks(1); - ret = __die(str, err, thread, regs); + return flags; +} - if (regs && kexec_should_crash(thread->task)) +static void oops_end(unsigned long flags, struct pt_regs *regs, int signr) +{ + if (regs && kexec_should_crash(current)) crash_kexec(regs); bust_spinlocks(0); - add_taint(TAINT_DIE); - spin_unlock_irq(&die_lock); + die_owner = -1; + add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); + die_nest_count--; + if (!die_nest_count) + /* Nest count reaches zero, release the lock. */ + arch_spin_unlock(&die_lock); + raw_local_irq_restore(flags); oops_exit(); if (in_interrupt()) panic("Fatal exception in interrupt"); if (panic_on_oops) panic("Fatal exception"); - if (ret != NOTIFY_STOP) - do_exit(SIGSEGV); + if (signr) + do_exit(signr); +} + +/* + * This function is protected against re-entrancy. + */ +void die(const char *str, struct pt_regs *regs, int err) +{ + enum bug_trap_type bug_type = BUG_TRAP_TYPE_NONE; + unsigned long flags = oops_begin(); + int sig = SIGSEGV; + + if (!user_mode(regs)) + bug_type = report_bug(regs->ARM_pc, regs); + if (bug_type != BUG_TRAP_TYPE_NONE) + str = "Oops - BUG"; + + if (__die(str, err, regs)) + sig = 0; + + oops_end(flags, regs, sig); } void arm_notify_die(const char *str, struct pt_regs *regs, @@ -300,25 +343,45 @@ void arm_notify_die(const char *str, struct pt_regs *regs, } } +#ifdef CONFIG_GENERIC_BUG + +int is_valid_bugaddr(unsigned long pc) +{ +#ifdef CONFIG_THUMB2_KERNEL + u16 bkpt; + u16 insn = __opcode_to_mem_thumb16(BUG_INSTR_VALUE); +#else + u32 bkpt; + u32 insn = __opcode_to_mem_arm(BUG_INSTR_VALUE); +#endif + + if (probe_kernel_address((unsigned *)pc, bkpt)) + return 0; + + return bkpt == insn; +} + +#endif + static LIST_HEAD(undef_hook); -static DEFINE_SPINLOCK(undef_lock); +static DEFINE_RAW_SPINLOCK(undef_lock); void register_undef_hook(struct undef_hook *hook) { unsigned long flags; - spin_lock_irqsave(&undef_lock, flags); + raw_spin_lock_irqsave(&undef_lock, flags); list_add(&hook->node, &undef_hook); - spin_unlock_irqrestore(&undef_lock, flags); + raw_spin_unlock_irqrestore(&undef_lock, flags); } void unregister_undef_hook(struct undef_hook *hook) { unsigned long flags; - spin_lock_irqsave(&undef_lock, flags); + raw_spin_lock_irqsave(&undef_lock, flags); list_del(&hook->node); - spin_unlock_irqrestore(&undef_lock, flags); + raw_spin_unlock_irqrestore(&undef_lock, flags); } static int call_undef_hook(struct pt_regs *regs, unsigned int instr) @@ -327,47 +390,62 @@ static int call_undef_hook(struct pt_regs *regs, unsigned int instr) unsigned long flags; int (*fn)(struct pt_regs *regs, unsigned int instr) = NULL; - spin_lock_irqsave(&undef_lock, flags); + raw_spin_lock_irqsave(&undef_lock, flags); list_for_each_entry(hook, &undef_hook, node) if ((instr & hook->instr_mask) == hook->instr_val && (regs->ARM_cpsr & hook->cpsr_mask) == hook->cpsr_val) fn = hook->fn; - spin_unlock_irqrestore(&undef_lock, flags); + raw_spin_unlock_irqrestore(&undef_lock, flags); return fn ? fn(regs, instr) : 1; } asmlinkage void __exception do_undefinstr(struct pt_regs *regs) { - unsigned int correction = thumb_mode(regs) ? 2 : 4; unsigned int instr; siginfo_t info; void __user *pc; - /* - * According to the ARM ARM, PC is 2 or 4 bytes ahead, - * depending whether we're in Thumb mode or not. - * Correct this offset. - */ - regs->ARM_pc -= correction; - pc = (void __user *)instruction_pointer(regs); if (processor_mode(regs) == SVC_MODE) { - instr = *(u32 *) pc; +#ifdef CONFIG_THUMB2_KERNEL + if (thumb_mode(regs)) { + instr = __mem_to_opcode_thumb16(((u16 *)pc)[0]); + if (is_wide_instruction(instr)) { + u16 inst2; + inst2 = __mem_to_opcode_thumb16(((u16 *)pc)[1]); + instr = __opcode_thumb32_compose(instr, inst2); + } + } else +#endif + instr = __mem_to_opcode_arm(*(u32 *) pc); } else if (thumb_mode(regs)) { - get_user(instr, (u16 __user *)pc); + if (get_user(instr, (u16 __user *)pc)) + goto die_sig; + instr = __mem_to_opcode_thumb16(instr); + if (is_wide_instruction(instr)) { + unsigned int instr2; + if (get_user(instr2, (u16 __user *)pc+1)) + goto die_sig; + instr2 = __mem_to_opcode_thumb16(instr2); + instr = __opcode_thumb32_compose(instr, instr2); + } } else { - get_user(instr, (u32 __user *)pc); + if (get_user(instr, (u32 __user *)pc)) + goto die_sig; + instr = __mem_to_opcode_arm(instr); } if (call_undef_hook(regs, instr) == 0) return; +die_sig: #ifdef CONFIG_DEBUG_USER if (user_debug & UDBG_UNDEFINED) { printk(KERN_INFO "%s (%d): undefined instruction: pc=%p\n", current->comm, task_pid_nr(current), pc); + __show_regs(regs); dump_instr(KERN_INFO, regs); } #endif @@ -408,8 +486,7 @@ static int bad_syscall(int n, struct pt_regs *regs) struct thread_info *thread = current_thread_info(); siginfo_t info; - if (current->personality != PER_LINUX && - current->personality != PER_LINUX_32BIT && + if ((current->personality & PER_MASK) != PER_LINUX && thread->exec_domain->handler) { thread->exec_domain->handler(n, regs); return regs->ARM_r0; @@ -434,26 +511,65 @@ static int bad_syscall(int n, struct pt_regs *regs) return regs->ARM_r0; } -static inline void -do_cache_op(unsigned long start, unsigned long end, int flags) +static long do_cache_op_restart(struct restart_block *); + +static inline int +__do_cache_op(unsigned long start, unsigned long end) +{ + int ret; + + do { + unsigned long chunk = min(PAGE_SIZE, end - start); + + if (signal_pending(current)) { + struct thread_info *ti = current_thread_info(); + + ti->restart_block = (struct restart_block) { + .fn = do_cache_op_restart, + }; + + ti->arm_restart_block = (struct arm_restart_block) { + { + .cache = { + .start = start, + .end = end, + }, + }, + }; + + return -ERESTART_RESTARTBLOCK; + } + + ret = flush_cache_user_range(start, start + chunk); + if (ret) + return ret; + + cond_resched(); + start += chunk; + } while (start < end); + + return 0; +} + +static long do_cache_op_restart(struct restart_block *unused) { - struct mm_struct *mm = current->active_mm; - struct vm_area_struct *vma; + struct arm_restart_block *restart_block; + + restart_block = ¤t_thread_info()->arm_restart_block; + return __do_cache_op(restart_block->cache.start, + restart_block->cache.end); +} +static inline int +do_cache_op(unsigned long start, unsigned long end, int flags) +{ if (end < start || flags) - return; + return -EINVAL; - down_read(&mm->mmap_sem); - vma = find_vma(mm, start); - if (vma && vma->vm_start < end) { - if (start < vma->vm_start) - start = vma->vm_start; - if (end > vma->vm_end) - end = vma->vm_end; + if (!access_ok(VERIFY_READ, start, end - start)) + return -EFAULT; - flush_cache_user_range(vma, start, end); - } - up_read(&mm->mmap_sem); + return __do_cache_op(start, end); } /* @@ -499,8 +615,7 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs) * the specified region). */ case NR(cacheflush): - do_cache_op(regs->ARM_r0, regs->ARM_r1, regs->ARM_r2); - return 0; + return do_cache_op(regs->ARM_r0, regs->ARM_r1, regs->ARM_r2); case NR(usr26): if (!(elf_hwcap & HWCAP_26BIT)) @@ -515,7 +630,7 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs) return regs->ARM_r0; case NR(set_tls): - thread->tp_value = regs->ARM_r0; + thread->tp_value[0] = regs->ARM_r0; if (tls_emu) return 0; if (has_tls_reg) { @@ -563,7 +678,7 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs) if (!pmd_present(*pmd)) goto bad_access; pte = pte_offset_map_lock(mm, pmd, addr, &ptl); - if (!pte_present(*pte) || !pte_dirty(*pte)) { + if (!pte_present(*pte) || !pte_write(*pte) || !pte_dirty(*pte)) { pte_unmap_unlock(pte, ptl); goto bad_access; } @@ -633,7 +748,7 @@ static int get_tp_trap(struct pt_regs *regs, unsigned int instr) int reg = (instr >> 12) & 15; if (reg == 15) return 1; - regs->uregs[reg] = current_thread_info()->tp_value; + regs->uregs[reg] = current_thread_info()->tp_value[0]; regs->ARM_pc += 4; return 0; } @@ -691,16 +806,6 @@ baddataabort(int code, unsigned long instr, struct pt_regs *regs) arm_notify_die("unknown data abort code", regs, &info, instr, 0); } -void __attribute__((noreturn)) __bug(const char *file, int line) -{ - printk(KERN_CRIT"kernel BUG at %s:%d!\n", file, line); - *(int *)0 = 0; - - /* Avoid "noreturn function does return" */ - for (;;); -} -EXPORT_SYMBOL(__bug); - void __readwrite_bug(const char *fn) { printk("%s called, but not implemented\n", fn); @@ -708,19 +813,19 @@ void __readwrite_bug(const char *fn) } EXPORT_SYMBOL(__readwrite_bug); -void __pte_error(const char *file, int line, unsigned long val) +void __pte_error(const char *file, int line, pte_t pte) { - printk("%s:%d: bad pte %08lx.\n", file, line, val); + printk("%s:%d: bad pte %08llx.\n", file, line, (long long)pte_val(pte)); } -void __pmd_error(const char *file, int line, unsigned long val) +void __pmd_error(const char *file, int line, pmd_t pmd) { - printk("%s:%d: bad pmd %08lx.\n", file, line, val); + printk("%s:%d: bad pmd %08llx.\n", file, line, (long long)pmd_val(pmd)); } -void __pgd_error(const char *file, int line, unsigned long val) +void __pgd_error(const char *file, int line, pgd_t pgd) { - printk("%s:%d: bad pgd %08lx.\n", file, line, val); + printk("%s:%d: bad pgd %08llx.\n", file, line, (long long)pgd_val(pgd)); } asmlinkage void __div0(void) @@ -744,23 +849,45 @@ void __init trap_init(void) return; } -static void __init kuser_get_tls_init(unsigned long vectors) +#ifdef CONFIG_KUSER_HELPERS +static void __init kuser_init(void *vectors) { + extern char __kuser_helper_start[], __kuser_helper_end[]; + int kuser_sz = __kuser_helper_end - __kuser_helper_start; + + memcpy(vectors + 0x1000 - kuser_sz, __kuser_helper_start, kuser_sz); + /* * vectors + 0xfe0 = __kuser_get_tls * vectors + 0xfe8 = hardware TLS instruction at 0xffff0fe8 */ if (tls_emu || has_tls_reg) - memcpy((void *)vectors + 0xfe0, (void *)vectors + 0xfe8, 4); + memcpy(vectors + 0xfe0, vectors + 0xfe8, 4); +} +#else +static inline void __init kuser_init(void *vectors) +{ } +#endif -void __init early_trap_init(void) +void __init early_trap_init(void *vectors_base) { - unsigned long vectors = CONFIG_VECTORS_BASE; +#ifndef CONFIG_CPU_V7M + unsigned long vectors = (unsigned long)vectors_base; extern char __stubs_start[], __stubs_end[]; extern char __vectors_start[], __vectors_end[]; - extern char __kuser_helper_start[], __kuser_helper_end[]; - int kuser_sz = __kuser_helper_end - __kuser_helper_start; + unsigned i; + + vectors_page = vectors_base; + + /* + * Poison the vectors page with an undefined instruction. This + * instruction is chosen to be undefined for both ARM and Thumb + * ISAs. The Thumb version is an undefined instruction with a + * branch back to the undefined instruction. + */ + for (i = 0; i < PAGE_SIZE / sizeof(u32); i++) + ((u32 *)vectors_base)[i] = 0xe7fddef1; /* * Copy the vectors, stubs and kuser helpers (in entry-armv.S) @@ -768,23 +895,17 @@ void __init early_trap_init(void) * are visible to the instruction stream. */ memcpy((void *)vectors, __vectors_start, __vectors_end - __vectors_start); - memcpy((void *)vectors + 0x200, __stubs_start, __stubs_end - __stubs_start); - memcpy((void *)vectors + 0x1000 - kuser_sz, __kuser_helper_start, kuser_sz); + memcpy((void *)vectors + 0x1000, __stubs_start, __stubs_end - __stubs_start); - /* - * Do processor specific fixups for the kuser helpers - */ - kuser_get_tls_init(vectors); + kuser_init(vectors_base); + flush_icache_range(vectors, vectors + PAGE_SIZE * 2); + modify_domain(DOMAIN_USER, DOMAIN_CLIENT); +#else /* ifndef CONFIG_CPU_V7M */ /* - * Copy signal return handlers into the vector page, and - * set sigreturn to be a pointer to these. + * on V7-M there is no need to copy the vector table to a dedicated + * memory area. The address is configurable and so a table in the kernel + * image can be used. */ - memcpy((void *)KERN_SIGRETURN_CODE, sigreturn_codes, - sizeof(sigreturn_codes)); - memcpy((void *)KERN_RESTART_CODE, syscall_restart_code, - sizeof(syscall_restart_code)); - - flush_icache_range(vectors, vectors + PAGE_SIZE); - modify_domain(DOMAIN_USER, DOMAIN_CLIENT); +#endif } diff --git a/arch/arm/kernel/unwind.c b/arch/arm/kernel/unwind.c index d2cb0b3c987..e67682f02cb 100644 --- a/arch/arm/kernel/unwind.c +++ b/arch/arm/kernel/unwind.c @@ -31,7 +31,7 @@ #warning Your compiler does not have EABI support. #warning ARM unwind is known to compile only with EABI compilers. #warning Change compiler or disable ARM_UNWIND option. -#elif (__GNUC__ == 4 && __GNUC_MINOR__ <= 2) +#elif (__GNUC__ == 4 && __GNUC_MINOR__ <= 2) && !defined(__clang__) #warning Your compiler is too buggy; it is known to not compile ARM unwind support. #warning Change compiler or disable ARM_UNWIND option. #endif @@ -39,7 +39,7 @@ #include <linux/kernel.h> #include <linux/init.h> -#include <linux/module.h> +#include <linux/export.h> #include <linux/sched.h> #include <linux/slab.h> #include <linux/spinlock.h> @@ -67,7 +67,13 @@ EXPORT_SYMBOL(__aeabi_unwind_cpp_pr2); struct unwind_ctrl_block { unsigned long vrs[16]; /* virtual register set */ - unsigned long *insn; /* pointer to the current instructions word */ + const unsigned long *insn; /* pointer to the current instructions word */ + unsigned long sp_high; /* highest value of sp allowed */ + /* + * 1 : check for stack overflow for each register pop. + * 0 : save overhead if there is plenty of stack remaining. + */ + int check_each_pop; int entries; /* number of entries left to interpret */ int byte; /* current byte number in the instructions word */ }; @@ -83,8 +89,9 @@ enum regs { PC = 15 }; -extern struct unwind_idx __start_unwind_idx[]; -extern struct unwind_idx __stop_unwind_idx[]; +extern const struct unwind_idx __start_unwind_idx[]; +static const struct unwind_idx *__origin_unwind_idx; +extern const struct unwind_idx __stop_unwind_idx[]; static DEFINE_SPINLOCK(unwind_lock); static LIST_HEAD(unwind_tables); @@ -98,45 +105,99 @@ static LIST_HEAD(unwind_tables); }) /* - * Binary search in the unwind index. The entries entries are + * Binary search in the unwind index. The entries are * guaranteed to be sorted in ascending order by the linker. + * + * start = first entry + * origin = first entry with positive offset (or stop if there is no such entry) + * stop - 1 = last entry */ -static struct unwind_idx *search_index(unsigned long addr, - struct unwind_idx *first, - struct unwind_idx *last) +static const struct unwind_idx *search_index(unsigned long addr, + const struct unwind_idx *start, + const struct unwind_idx *origin, + const struct unwind_idx *stop) { - pr_debug("%s(%08lx, %p, %p)\n", __func__, addr, first, last); + unsigned long addr_prel31; + + pr_debug("%s(%08lx, %p, %p, %p)\n", + __func__, addr, start, origin, stop); + + /* + * only search in the section with the matching sign. This way the + * prel31 numbers can be compared as unsigned longs. + */ + if (addr < (unsigned long)start) + /* negative offsets: [start; origin) */ + stop = origin; + else + /* positive offsets: [origin; stop) */ + start = origin; + + /* prel31 for address relavive to start */ + addr_prel31 = (addr - (unsigned long)start) & 0x7fffffff; - if (addr < first->addr) { + while (start < stop - 1) { + const struct unwind_idx *mid = start + ((stop - start) >> 1); + + /* + * As addr_prel31 is relative to start an offset is needed to + * make it relative to mid. + */ + if (addr_prel31 - ((unsigned long)mid - (unsigned long)start) < + mid->addr_offset) + stop = mid; + else { + /* keep addr_prel31 relative to start */ + addr_prel31 -= ((unsigned long)mid - + (unsigned long)start); + start = mid; + } + } + + if (likely(start->addr_offset <= addr_prel31)) + return start; + else { pr_warning("unwind: Unknown symbol address %08lx\n", addr); return NULL; - } else if (addr >= last->addr) - return last; + } +} - while (first < last - 1) { - struct unwind_idx *mid = first + ((last - first + 1) >> 1); +static const struct unwind_idx *unwind_find_origin( + const struct unwind_idx *start, const struct unwind_idx *stop) +{ + pr_debug("%s(%p, %p)\n", __func__, start, stop); + while (start < stop) { + const struct unwind_idx *mid = start + ((stop - start) >> 1); - if (addr < mid->addr) - last = mid; + if (mid->addr_offset >= 0x40000000) + /* negative offset */ + start = mid + 1; else - first = mid; + /* positive offset */ + stop = mid; } - - return first; + pr_debug("%s -> %p\n", __func__, stop); + return stop; } -static struct unwind_idx *unwind_find_idx(unsigned long addr) +static const struct unwind_idx *unwind_find_idx(unsigned long addr) { - struct unwind_idx *idx = NULL; + const struct unwind_idx *idx = NULL; unsigned long flags; pr_debug("%s(%08lx)\n", __func__, addr); - if (core_kernel_text(addr)) + if (core_kernel_text(addr)) { + if (unlikely(!__origin_unwind_idx)) + __origin_unwind_idx = + unwind_find_origin(__start_unwind_idx, + __stop_unwind_idx); + /* main unwind table */ idx = search_index(addr, __start_unwind_idx, - __stop_unwind_idx - 1); - else { + __origin_unwind_idx, + __stop_unwind_idx); + } else { /* module unwind tables */ struct unwind_table *table; @@ -145,7 +206,8 @@ static struct unwind_idx *unwind_find_idx(unsigned long addr) if (addr >= table->begin_addr && addr < table->end_addr) { idx = search_index(addr, table->start, - table->stop - 1); + table->origin, + table->stop); /* Move-to-front to exploit common traces */ list_move(&table->list, &unwind_tables); break; @@ -179,12 +241,85 @@ static unsigned long unwind_get_byte(struct unwind_ctrl_block *ctrl) return ret; } +/* Before poping a register check whether it is feasible or not */ +static int unwind_pop_register(struct unwind_ctrl_block *ctrl, + unsigned long **vsp, unsigned int reg) +{ + if (unlikely(ctrl->check_each_pop)) + if (*vsp >= (unsigned long *)ctrl->sp_high) + return -URC_FAILURE; + + ctrl->vrs[reg] = *(*vsp)++; + return URC_OK; +} + +/* Helper functions to execute the instructions */ +static int unwind_exec_pop_subset_r4_to_r13(struct unwind_ctrl_block *ctrl, + unsigned long mask) +{ + unsigned long *vsp = (unsigned long *)ctrl->vrs[SP]; + int load_sp, reg = 4; + + load_sp = mask & (1 << (13 - 4)); + while (mask) { + if (mask & 1) + if (unwind_pop_register(ctrl, &vsp, reg)) + return -URC_FAILURE; + mask >>= 1; + reg++; + } + if (!load_sp) + ctrl->vrs[SP] = (unsigned long)vsp; + + return URC_OK; +} + +static int unwind_exec_pop_r4_to_rN(struct unwind_ctrl_block *ctrl, + unsigned long insn) +{ + unsigned long *vsp = (unsigned long *)ctrl->vrs[SP]; + int reg; + + /* pop R4-R[4+bbb] */ + for (reg = 4; reg <= 4 + (insn & 7); reg++) + if (unwind_pop_register(ctrl, &vsp, reg)) + return -URC_FAILURE; + + if (insn & 0x8) + if (unwind_pop_register(ctrl, &vsp, 14)) + return -URC_FAILURE; + + ctrl->vrs[SP] = (unsigned long)vsp; + + return URC_OK; +} + +static int unwind_exec_pop_subset_r0_to_r3(struct unwind_ctrl_block *ctrl, + unsigned long mask) +{ + unsigned long *vsp = (unsigned long *)ctrl->vrs[SP]; + int reg = 0; + + /* pop R0-R3 according to mask */ + while (mask) { + if (mask & 1) + if (unwind_pop_register(ctrl, &vsp, reg)) + return -URC_FAILURE; + mask >>= 1; + reg++; + } + ctrl->vrs[SP] = (unsigned long)vsp; + + return URC_OK; +} + /* * Execute the current unwind instruction. */ static int unwind_exec_insn(struct unwind_ctrl_block *ctrl) { unsigned long insn = unwind_get_byte(ctrl); + int ret = URC_OK; pr_debug("%s: insn = %08lx\n", __func__, insn); @@ -194,8 +329,6 @@ static int unwind_exec_insn(struct unwind_ctrl_block *ctrl) ctrl->vrs[SP] -= ((insn & 0x3f) << 2) + 4; else if ((insn & 0xf0) == 0x80) { unsigned long mask; - unsigned long *vsp = (unsigned long *)ctrl->vrs[SP]; - int load_sp, reg = 4; insn = (insn << 8) | unwind_get_byte(ctrl); mask = insn & 0x0fff; @@ -205,29 +338,16 @@ static int unwind_exec_insn(struct unwind_ctrl_block *ctrl) return -URC_FAILURE; } - /* pop R4-R15 according to mask */ - load_sp = mask & (1 << (13 - 4)); - while (mask) { - if (mask & 1) - ctrl->vrs[reg] = *vsp++; - mask >>= 1; - reg++; - } - if (!load_sp) - ctrl->vrs[SP] = (unsigned long)vsp; + ret = unwind_exec_pop_subset_r4_to_r13(ctrl, mask); + if (ret) + goto error; } else if ((insn & 0xf0) == 0x90 && (insn & 0x0d) != 0x0d) ctrl->vrs[SP] = ctrl->vrs[insn & 0x0f]; else if ((insn & 0xf0) == 0xa0) { - unsigned long *vsp = (unsigned long *)ctrl->vrs[SP]; - int reg; - - /* pop R4-R[4+bbb] */ - for (reg = 4; reg <= 4 + (insn & 7); reg++) - ctrl->vrs[reg] = *vsp++; - if (insn & 0x80) - ctrl->vrs[14] = *vsp++; - ctrl->vrs[SP] = (unsigned long)vsp; + ret = unwind_exec_pop_r4_to_rN(ctrl, insn); + if (ret) + goto error; } else if (insn == 0xb0) { if (ctrl->vrs[PC] == 0) ctrl->vrs[PC] = ctrl->vrs[LR]; @@ -235,8 +355,6 @@ static int unwind_exec_insn(struct unwind_ctrl_block *ctrl) ctrl->entries = 0; } else if (insn == 0xb1) { unsigned long mask = unwind_get_byte(ctrl); - unsigned long *vsp = (unsigned long *)ctrl->vrs[SP]; - int reg = 0; if (mask == 0 || mask & 0xf0) { pr_warning("unwind: Spare encoding %04lx\n", @@ -244,14 +362,9 @@ static int unwind_exec_insn(struct unwind_ctrl_block *ctrl) return -URC_FAILURE; } - /* pop R0-R3 according to mask */ - while (mask) { - if (mask & 1) - ctrl->vrs[reg] = *vsp++; - mask >>= 1; - reg++; - } - ctrl->vrs[SP] = (unsigned long)vsp; + ret = unwind_exec_pop_subset_r0_to_r3(ctrl, mask); + if (ret) + goto error; } else if (insn == 0xb2) { unsigned long uleb128 = unwind_get_byte(ctrl); @@ -264,7 +377,8 @@ static int unwind_exec_insn(struct unwind_ctrl_block *ctrl) pr_debug("%s: fp = %08lx sp = %08lx lr = %08lx pc = %08lx\n", __func__, ctrl->vrs[FP], ctrl->vrs[SP], ctrl->vrs[LR], ctrl->vrs[PC]); - return URC_OK; +error: + return ret; } /* @@ -273,13 +387,13 @@ static int unwind_exec_insn(struct unwind_ctrl_block *ctrl) */ int unwind_frame(struct stackframe *frame) { - unsigned long high, low; - struct unwind_idx *idx; + unsigned long low; + const struct unwind_idx *idx; struct unwind_ctrl_block ctrl; - /* only go to a higher address on the stack */ + /* store the highest address on the stack to avoid crossing it*/ low = frame->sp; - high = ALIGN(low, THREAD_SIZE); + ctrl.sp_high = ALIGN(low, THREAD_SIZE); pr_debug("%s(pc = %08lx lr = %08lx sp = %08lx)\n", __func__, frame->pc, frame->lr, frame->sp); @@ -326,11 +440,16 @@ int unwind_frame(struct stackframe *frame) return -URC_FAILURE; } + ctrl.check_each_pop = 0; + while (ctrl.entries > 0) { - int urc = unwind_exec_insn(&ctrl); + int urc; + if ((ctrl.sp_high - ctrl.vrs[SP]) < sizeof(ctrl.vrs)) + ctrl.check_each_pop = 1; + urc = unwind_exec_insn(&ctrl); if (urc < 0) return urc; - if (ctrl.vrs[SP] < low || ctrl.vrs[SP] >= high) + if (ctrl.vrs[SP] < low || ctrl.vrs[SP] >= ctrl.sp_high) return -URC_FAILURE; } @@ -399,7 +518,6 @@ struct unwind_table *unwind_table_add(unsigned long start, unsigned long size, unsigned long text_size) { unsigned long flags; - struct unwind_idx *idx; struct unwind_table *tab = kmalloc(sizeof(*tab), GFP_KERNEL); pr_debug("%s(%08lx, %08lx, %08lx, %08lx)\n", __func__, start, size, @@ -408,15 +526,12 @@ struct unwind_table *unwind_table_add(unsigned long start, unsigned long size, if (!tab) return tab; - tab->start = (struct unwind_idx *)start; - tab->stop = (struct unwind_idx *)(start + size); + tab->start = (const struct unwind_idx *)start; + tab->stop = (const struct unwind_idx *)(start + size); + tab->origin = unwind_find_origin(tab->start, tab->stop); tab->begin_addr = text_addr; tab->end_addr = text_addr + text_size; - /* Convert the symbol addresses to absolute values */ - for (idx = tab->start; idx < tab->stop; idx++) - idx->addr = prel31_to_addr(&idx->addr); - spin_lock_irqsave(&unwind_lock, flags); list_add_tail(&tab->list, &unwind_tables); spin_unlock_irqrestore(&unwind_lock, flags); @@ -437,16 +552,3 @@ void unwind_table_del(struct unwind_table *tab) kfree(tab); } - -int __init unwind_init(void) -{ - struct unwind_idx *idx; - - /* Convert the symbol addresses to absolute values */ - for (idx = __start_unwind_idx; idx < __stop_unwind_idx; idx++) - idx->addr = prel31_to_addr(&idx->addr); - - pr_debug("unwind: ARM stack unwinding initialised\n"); - - return 0; -} diff --git a/arch/arm/kernel/uprobes-arm.c b/arch/arm/kernel/uprobes-arm.c new file mode 100644 index 00000000000..d3b655ff17d --- /dev/null +++ b/arch/arm/kernel/uprobes-arm.c @@ -0,0 +1,234 @@ +/* + * Copyright (C) 2012 Rabin Vincent <rabin at rab.in> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/types.h> +#include <linux/stddef.h> +#include <linux/wait.h> +#include <linux/uprobes.h> +#include <linux/module.h> + +#include "probes.h" +#include "probes-arm.h" +#include "uprobes.h" + +static int uprobes_substitute_pc(unsigned long *pinsn, u32 oregs) +{ + probes_opcode_t insn = __mem_to_opcode_arm(*pinsn); + probes_opcode_t temp; + probes_opcode_t mask; + int freereg; + u32 free = 0xffff; + u32 regs; + + for (regs = oregs; regs; regs >>= 4, insn >>= 4) { + if ((regs & 0xf) == REG_TYPE_NONE) + continue; + + free &= ~(1 << (insn & 0xf)); + } + + /* No PC, no problem */ + if (free & (1 << 15)) + return 15; + + if (!free) + return -1; + + /* + * fls instead of ffs ensures that for "ldrd r0, r1, [pc]" we would + * pick LR instead of R1. + */ + freereg = free = fls(free) - 1; + + temp = __mem_to_opcode_arm(*pinsn); + insn = temp; + regs = oregs; + mask = 0xf; + + for (; regs; regs >>= 4, mask <<= 4, free <<= 4, temp >>= 4) { + if ((regs & 0xf) == REG_TYPE_NONE) + continue; + + if ((temp & 0xf) != 15) + continue; + + insn &= ~mask; + insn |= free & mask; + } + + *pinsn = __opcode_to_mem_arm(insn); + return freereg; +} + +static void uprobe_set_pc(struct arch_uprobe *auprobe, + struct arch_uprobe_task *autask, + struct pt_regs *regs) +{ + u32 pcreg = auprobe->pcreg; + + autask->backup = regs->uregs[pcreg]; + regs->uregs[pcreg] = regs->ARM_pc + 8; +} + +static void uprobe_unset_pc(struct arch_uprobe *auprobe, + struct arch_uprobe_task *autask, + struct pt_regs *regs) +{ + /* PC will be taken care of by common code */ + regs->uregs[auprobe->pcreg] = autask->backup; +} + +static void uprobe_aluwrite_pc(struct arch_uprobe *auprobe, + struct arch_uprobe_task *autask, + struct pt_regs *regs) +{ + u32 pcreg = auprobe->pcreg; + + alu_write_pc(regs->uregs[pcreg], regs); + regs->uregs[pcreg] = autask->backup; +} + +static void uprobe_write_pc(struct arch_uprobe *auprobe, + struct arch_uprobe_task *autask, + struct pt_regs *regs) +{ + u32 pcreg = auprobe->pcreg; + + load_write_pc(regs->uregs[pcreg], regs); + regs->uregs[pcreg] = autask->backup; +} + +enum probes_insn +decode_pc_ro(probes_opcode_t insn, struct arch_probes_insn *asi, + const struct decode_header *d) +{ + struct arch_uprobe *auprobe = container_of(asi, struct arch_uprobe, + asi); + struct decode_emulate *decode = (struct decode_emulate *) d; + u32 regs = decode->header.type_regs.bits >> DECODE_TYPE_BITS; + int reg; + + reg = uprobes_substitute_pc(&auprobe->ixol[0], regs); + if (reg == 15) + return INSN_GOOD; + + if (reg == -1) + return INSN_REJECTED; + + auprobe->pcreg = reg; + auprobe->prehandler = uprobe_set_pc; + auprobe->posthandler = uprobe_unset_pc; + + return INSN_GOOD; +} + +enum probes_insn +decode_wb_pc(probes_opcode_t insn, struct arch_probes_insn *asi, + const struct decode_header *d, bool alu) +{ + struct arch_uprobe *auprobe = container_of(asi, struct arch_uprobe, + asi); + enum probes_insn ret = decode_pc_ro(insn, asi, d); + + if (((insn >> 12) & 0xf) == 15) + auprobe->posthandler = alu ? uprobe_aluwrite_pc + : uprobe_write_pc; + + return ret; +} + +enum probes_insn +decode_rd12rn16rm0rs8_rwflags(probes_opcode_t insn, + struct arch_probes_insn *asi, + const struct decode_header *d) +{ + return decode_wb_pc(insn, asi, d, true); +} + +enum probes_insn +decode_ldr(probes_opcode_t insn, struct arch_probes_insn *asi, + const struct decode_header *d) +{ + return decode_wb_pc(insn, asi, d, false); +} + +enum probes_insn +uprobe_decode_ldmstm(probes_opcode_t insn, + struct arch_probes_insn *asi, + const struct decode_header *d) +{ + struct arch_uprobe *auprobe = container_of(asi, struct arch_uprobe, + asi); + unsigned reglist = insn & 0xffff; + int rn = (insn >> 16) & 0xf; + int lbit = insn & (1 << 20); + unsigned used = reglist | (1 << rn); + + if (rn == 15) + return INSN_REJECTED; + + if (!(used & (1 << 15))) + return INSN_GOOD; + + if (used & (1 << 14)) + return INSN_REJECTED; + + /* Use LR instead of PC */ + insn ^= 0xc000; + + auprobe->pcreg = 14; + auprobe->ixol[0] = __opcode_to_mem_arm(insn); + + auprobe->prehandler = uprobe_set_pc; + if (lbit) + auprobe->posthandler = uprobe_write_pc; + else + auprobe->posthandler = uprobe_unset_pc; + + return INSN_GOOD; +} + +const union decode_action uprobes_probes_actions[] = { + [PROBES_EMULATE_NONE] = {.handler = probes_simulate_nop}, + [PROBES_SIMULATE_NOP] = {.handler = probes_simulate_nop}, + [PROBES_PRELOAD_IMM] = {.handler = probes_simulate_nop}, + [PROBES_PRELOAD_REG] = {.handler = probes_simulate_nop}, + [PROBES_BRANCH_IMM] = {.handler = simulate_blx1}, + [PROBES_MRS] = {.handler = simulate_mrs}, + [PROBES_BRANCH_REG] = {.handler = simulate_blx2bx}, + [PROBES_CLZ] = {.handler = probes_simulate_nop}, + [PROBES_SATURATING_ARITHMETIC] = {.handler = probes_simulate_nop}, + [PROBES_MUL1] = {.handler = probes_simulate_nop}, + [PROBES_MUL2] = {.handler = probes_simulate_nop}, + [PROBES_SWP] = {.handler = probes_simulate_nop}, + [PROBES_LDRSTRD] = {.decoder = decode_pc_ro}, + [PROBES_LOAD_EXTRA] = {.decoder = decode_pc_ro}, + [PROBES_LOAD] = {.decoder = decode_ldr}, + [PROBES_STORE_EXTRA] = {.decoder = decode_pc_ro}, + [PROBES_STORE] = {.decoder = decode_pc_ro}, + [PROBES_MOV_IP_SP] = {.handler = simulate_mov_ipsp}, + [PROBES_DATA_PROCESSING_REG] = { + .decoder = decode_rd12rn16rm0rs8_rwflags}, + [PROBES_DATA_PROCESSING_IMM] = { + .decoder = decode_rd12rn16rm0rs8_rwflags}, + [PROBES_MOV_HALFWORD] = {.handler = probes_simulate_nop}, + [PROBES_SEV] = {.handler = probes_simulate_nop}, + [PROBES_WFE] = {.handler = probes_simulate_nop}, + [PROBES_SATURATE] = {.handler = probes_simulate_nop}, + [PROBES_REV] = {.handler = probes_simulate_nop}, + [PROBES_MMI] = {.handler = probes_simulate_nop}, + [PROBES_PACK] = {.handler = probes_simulate_nop}, + [PROBES_EXTEND] = {.handler = probes_simulate_nop}, + [PROBES_EXTEND_ADD] = {.handler = probes_simulate_nop}, + [PROBES_MUL_ADD_LONG] = {.handler = probes_simulate_nop}, + [PROBES_MUL_ADD] = {.handler = probes_simulate_nop}, + [PROBES_BITFIELD] = {.handler = probes_simulate_nop}, + [PROBES_BRANCH] = {.handler = simulate_bbl}, + [PROBES_LDMSTM] = {.decoder = uprobe_decode_ldmstm} +}; diff --git a/arch/arm/kernel/uprobes.c b/arch/arm/kernel/uprobes.c new file mode 100644 index 00000000000..56adf9c1fde --- /dev/null +++ b/arch/arm/kernel/uprobes.c @@ -0,0 +1,230 @@ +/* + * Copyright (C) 2012 Rabin Vincent <rabin at rab.in> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/stddef.h> +#include <linux/errno.h> +#include <linux/highmem.h> +#include <linux/sched.h> +#include <linux/uprobes.h> +#include <linux/notifier.h> + +#include <asm/opcodes.h> +#include <asm/traps.h> + +#include "probes.h" +#include "probes-arm.h" +#include "uprobes.h" + +#define UPROBE_TRAP_NR UINT_MAX + +bool is_swbp_insn(uprobe_opcode_t *insn) +{ + return (__mem_to_opcode_arm(*insn) & 0x0fffffff) == + (UPROBE_SWBP_ARM_INSN & 0x0fffffff); +} + +int set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, + unsigned long vaddr) +{ + return uprobe_write_opcode(mm, vaddr, + __opcode_to_mem_arm(auprobe->bpinsn)); +} + +bool arch_uprobe_ignore(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + if (!auprobe->asi.insn_check_cc(regs->ARM_cpsr)) { + regs->ARM_pc += 4; + return true; + } + + return false; +} + +bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + probes_opcode_t opcode; + + if (!auprobe->simulate) + return false; + + opcode = __mem_to_opcode_arm(*(unsigned int *) auprobe->insn); + + auprobe->asi.insn_singlestep(opcode, &auprobe->asi, regs); + + return true; +} + +unsigned long +arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, + struct pt_regs *regs) +{ + unsigned long orig_ret_vaddr; + + orig_ret_vaddr = regs->ARM_lr; + /* Replace the return addr with trampoline addr */ + regs->ARM_lr = trampoline_vaddr; + return orig_ret_vaddr; +} + +int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, + unsigned long addr) +{ + unsigned int insn; + unsigned int bpinsn; + enum probes_insn ret; + + /* Thumb not yet support */ + if (addr & 0x3) + return -EINVAL; + + insn = __mem_to_opcode_arm(*(unsigned int *)auprobe->insn); + auprobe->ixol[0] = __opcode_to_mem_arm(insn); + auprobe->ixol[1] = __opcode_to_mem_arm(UPROBE_SS_ARM_INSN); + + ret = arm_probes_decode_insn(insn, &auprobe->asi, false, + uprobes_probes_actions); + switch (ret) { + case INSN_REJECTED: + return -EINVAL; + + case INSN_GOOD_NO_SLOT: + auprobe->simulate = true; + break; + + case INSN_GOOD: + default: + break; + } + + bpinsn = UPROBE_SWBP_ARM_INSN & 0x0fffffff; + if (insn >= 0xe0000000) + bpinsn |= 0xe0000000; /* Unconditional instruction */ + else + bpinsn |= insn & 0xf0000000; /* Copy condition from insn */ + + auprobe->bpinsn = bpinsn; + + return 0; +} + +void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr, + void *src, unsigned long len) +{ + void *xol_page_kaddr = kmap_atomic(page); + void *dst = xol_page_kaddr + (vaddr & ~PAGE_MASK); + + preempt_disable(); + + /* Initialize the slot */ + memcpy(dst, src, len); + + /* flush caches (dcache/icache) */ + flush_uprobe_xol_access(page, vaddr, dst, len); + + preempt_enable(); + + kunmap_atomic(xol_page_kaddr); +} + + +int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + struct uprobe_task *utask = current->utask; + + if (auprobe->prehandler) + auprobe->prehandler(auprobe, &utask->autask, regs); + + utask->autask.saved_trap_no = current->thread.trap_no; + current->thread.trap_no = UPROBE_TRAP_NR; + regs->ARM_pc = utask->xol_vaddr; + + return 0; +} + +int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + struct uprobe_task *utask = current->utask; + + WARN_ON_ONCE(current->thread.trap_no != UPROBE_TRAP_NR); + + current->thread.trap_no = utask->autask.saved_trap_no; + regs->ARM_pc = utask->vaddr + 4; + + if (auprobe->posthandler) + auprobe->posthandler(auprobe, &utask->autask, regs); + + return 0; +} + +bool arch_uprobe_xol_was_trapped(struct task_struct *t) +{ + if (t->thread.trap_no != UPROBE_TRAP_NR) + return true; + + return false; +} + +void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs) +{ + struct uprobe_task *utask = current->utask; + + current->thread.trap_no = utask->autask.saved_trap_no; + instruction_pointer_set(regs, utask->vaddr); +} + +int arch_uprobe_exception_notify(struct notifier_block *self, + unsigned long val, void *data) +{ + return NOTIFY_DONE; +} + +static int uprobe_trap_handler(struct pt_regs *regs, unsigned int instr) +{ + unsigned long flags; + + local_irq_save(flags); + instr &= 0x0fffffff; + if (instr == (UPROBE_SWBP_ARM_INSN & 0x0fffffff)) + uprobe_pre_sstep_notifier(regs); + else if (instr == (UPROBE_SS_ARM_INSN & 0x0fffffff)) + uprobe_post_sstep_notifier(regs); + local_irq_restore(flags); + + return 0; +} + +unsigned long uprobe_get_swbp_addr(struct pt_regs *regs) +{ + return instruction_pointer(regs); +} + +static struct undef_hook uprobes_arm_break_hook = { + .instr_mask = 0x0fffffff, + .instr_val = (UPROBE_SWBP_ARM_INSN & 0x0fffffff), + .cpsr_mask = MODE_MASK, + .cpsr_val = USR_MODE, + .fn = uprobe_trap_handler, +}; + +static struct undef_hook uprobes_arm_ss_hook = { + .instr_mask = 0x0fffffff, + .instr_val = (UPROBE_SS_ARM_INSN & 0x0fffffff), + .cpsr_mask = MODE_MASK, + .cpsr_val = USR_MODE, + .fn = uprobe_trap_handler, +}; + +static int arch_uprobes_init(void) +{ + register_undef_hook(&uprobes_arm_break_hook); + register_undef_hook(&uprobes_arm_ss_hook); + + return 0; +} +device_initcall(arch_uprobes_init); diff --git a/arch/arm/kernel/uprobes.h b/arch/arm/kernel/uprobes.h new file mode 100644 index 00000000000..1d0c12dfbd0 --- /dev/null +++ b/arch/arm/kernel/uprobes.h @@ -0,0 +1,35 @@ +/* + * Copyright (C) 2012 Rabin Vincent <rabin at rab.in> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __ARM_KERNEL_UPROBES_H +#define __ARM_KERNEL_UPROBES_H + +enum probes_insn uprobe_decode_ldmstm(probes_opcode_t insn, + struct arch_probes_insn *asi, + const struct decode_header *d); + +enum probes_insn decode_ldr(probes_opcode_t insn, + struct arch_probes_insn *asi, + const struct decode_header *d); + +enum probes_insn +decode_rd12rn16rm0rs8_rwflags(probes_opcode_t insn, + struct arch_probes_insn *asi, + const struct decode_header *d); + +enum probes_insn +decode_wb_pc(probes_opcode_t insn, struct arch_probes_insn *asi, + const struct decode_header *d, bool alu); + +enum probes_insn +decode_pc_ro(probes_opcode_t insn, struct arch_probes_insn *asi, + const struct decode_header *d); + +extern const union decode_action uprobes_probes_actions[]; + +#endif diff --git a/arch/arm/kernel/v7m.c b/arch/arm/kernel/v7m.c new file mode 100644 index 00000000000..4d2cba94f5c --- /dev/null +++ b/arch/arm/kernel/v7m.c @@ -0,0 +1,19 @@ +/* + * Copyright (C) 2013 Uwe Kleine-Koenig for Pengutronix + * + * This program is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License version 2 as published by the + * Free Software Foundation. + */ +#include <linux/io.h> +#include <linux/reboot.h> +#include <asm/barrier.h> +#include <asm/v7m.h> + +void armv7m_restart(enum reboot_mode mode, const char *cmd) +{ + dsb(); + __raw_writel(V7M_SCB_AIRCR_VECTKEY | V7M_SCB_AIRCR_SYSRESETREQ, + BASEADDR_V7M_SCB + V7M_SCB_AIRCR); + dsb(); +} diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S index cead8893b46..7bcee5c9b60 100644 --- a/arch/arm/kernel/vmlinux.lds.S +++ b/arch/arm/kernel/vmlinux.lds.S @@ -4,15 +4,27 @@ */ #include <asm-generic/vmlinux.lds.h> +#include <asm/cache.h> #include <asm/thread_info.h> #include <asm/memory.h> #include <asm/page.h> #define PROC_INFO \ + . = ALIGN(4); \ VMLINUX_SYMBOL(__proc_info_begin) = .; \ *(.proc.info.init) \ VMLINUX_SYMBOL(__proc_info_end) = .; +#define IDMAP_TEXT \ + ALIGN_FUNCTION(); \ + VMLINUX_SYMBOL(__idmap_text_start) = .; \ + *(.idmap.text) \ + VMLINUX_SYMBOL(__idmap_text_end) = .; \ + . = ALIGN(32); \ + VMLINUX_SYMBOL(__hyp_idmap_text_start) = .; \ + *(.hyp.idmap.text) \ + VMLINUX_SYMBOL(__hyp_idmap_text_end) = .; + #ifdef CONFIG_HOTPLUG_CPU #define ARM_CPU_DISCARD(x) #define ARM_CPU_KEEP(x) x @@ -21,6 +33,15 @@ #define ARM_CPU_KEEP(x) #endif +#if (defined(CONFIG_SMP_ON_UP) && !defined(CONFIG_DEBUG_SPINLOCK)) || \ + defined(CONFIG_GENERIC_BUG) +#define ARM_EXIT_KEEP(x) x +#define ARM_EXIT_DISCARD(x) +#else +#define ARM_EXIT_KEEP(x) +#define ARM_EXIT_DISCARD(x) x +#endif + OUTPUT_ARCH(arm) ENTRY(stext) @@ -32,52 +53,12 @@ jiffies = jiffies_64 + 4; SECTIONS { -#ifdef CONFIG_XIP_KERNEL - . = XIP_VIRT_ADDR(CONFIG_XIP_PHYS_ADDR); -#else - . = PAGE_OFFSET + TEXT_OFFSET; -#endif - - .init : { /* Init code and data */ - _stext = .; - _sinittext = .; - HEAD_TEXT - INIT_TEXT - _einittext = .; - ARM_CPU_DISCARD(PROC_INFO) - __arch_info_begin = .; - *(.arch.info.init) - __arch_info_end = .; - __tagtable_begin = .; - *(.taglist.init) - __tagtable_end = .; -#ifdef CONFIG_SMP_ON_UP - __smpalt_begin = .; - *(.alt.smp.init) - __smpalt_end = .; -#endif - - INIT_SETUP(16) - - INIT_CALLS - CON_INITCALL - SECURITY_INITCALL - INIT_RAM_FS - -#ifndef CONFIG_XIP_KERNEL - __init_begin = _stext; - INIT_DATA -#endif - } - - PERCPU(PAGE_SIZE) - -#ifndef CONFIG_XIP_KERNEL - . = ALIGN(PAGE_SIZE); - __init_end = .; -#endif - /* + * XXX: The linker does not define how output sections are + * assigned to input sections when there are multiple statements + * matching the same input section name. There is no documented + * order of matching. + * * unwind exit sections must be discarded before the rest of the * unwind sections get included. */ @@ -86,31 +67,44 @@ SECTIONS *(.ARM.extab.exit.text) ARM_CPU_DISCARD(*(.ARM.exidx.cpuexit.text)) ARM_CPU_DISCARD(*(.ARM.extab.cpuexit.text)) -#ifndef CONFIG_HOTPLUG - *(.ARM.exidx.devexit.text) - *(.ARM.extab.devexit.text) -#endif + ARM_EXIT_DISCARD(EXIT_TEXT) + ARM_EXIT_DISCARD(EXIT_DATA) + EXIT_CALL #ifndef CONFIG_MMU *(.fixup) *(__ex_table) #endif +#ifndef CONFIG_SMP_ON_UP + *(.alt.smp.init) +#endif + *(.discard) + *(.discard.*) } +#ifdef CONFIG_XIP_KERNEL + . = XIP_VIRT_ADDR(CONFIG_XIP_PHYS_ADDR); +#else + . = PAGE_OFFSET + TEXT_OFFSET; +#endif + .head.text : { + _text = .; + HEAD_TEXT + } .text : { /* Real text segment */ - _text = .; /* Text and read-only data */ + _stext = .; /* Text and read-only data */ __exception_text_start = .; *(.exception.text) __exception_text_end = .; + IRQENTRY_TEXT TEXT_TEXT SCHED_TEXT LOCK_TEXT KPROBES_TEXT + IDMAP_TEXT #ifdef CONFIG_MMU *(.fixup) #endif *(.gnu.warning) - *(.rodata) - *(.rodata.*) *(.glue_7) *(.glue_7t) . = ALIGN(4); @@ -120,6 +114,15 @@ SECTIONS RO_DATA(PAGE_SIZE) + . = ALIGN(4); + __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { + __start___ex_table = .; +#ifdef CONFIG_MMU + *(__ex_table) +#endif + __stop___ex_table = .; + } + #ifdef CONFIG_ARM_UNWIND /* * Stack unwinding tables @@ -137,12 +140,86 @@ SECTIONS } #endif + NOTES + _etext = .; /* End of text and rodata section */ +#ifndef CONFIG_XIP_KERNEL + . = ALIGN(PAGE_SIZE); + __init_begin = .; +#endif + /* + * The vectors and stubs are relocatable code, and the + * only thing that matters is their relative offsets + */ + __vectors_start = .; + .vectors 0 : AT(__vectors_start) { + *(.vectors) + } + . = __vectors_start + SIZEOF(.vectors); + __vectors_end = .; + + __stubs_start = .; + .stubs 0x1000 : AT(__stubs_start) { + *(.stubs) + } + . = __stubs_start + SIZEOF(.stubs); + __stubs_end = .; + + INIT_TEXT_SECTION(8) + .exit.text : { + ARM_EXIT_KEEP(EXIT_TEXT) + } + .init.proc.info : { + ARM_CPU_DISCARD(PROC_INFO) + } + .init.arch.info : { + __arch_info_begin = .; + *(.arch.info.init) + __arch_info_end = .; + } + .init.tagtable : { + __tagtable_begin = .; + *(.taglist.init) + __tagtable_end = .; + } +#ifdef CONFIG_SMP_ON_UP + .init.smpalt : { + __smpalt_begin = .; + *(.alt.smp.init) + __smpalt_end = .; + } +#endif + .init.pv_table : { + __pv_table_begin = .; + *(.pv_table) + __pv_table_end = .; + } + .init.data : { +#ifndef CONFIG_XIP_KERNEL + INIT_DATA +#endif + INIT_SETUP(16) + INIT_CALLS + CON_INITCALL + SECURITY_INITCALL + INIT_RAM_FS + } +#ifndef CONFIG_XIP_KERNEL + .exit.data : { + ARM_EXIT_KEEP(EXIT_DATA) + } +#endif + +#ifdef CONFIG_SMP + PERCPU_SECTION(L1_CACHE_BYTES) +#endif + #ifdef CONFIG_XIP_KERNEL __data_loc = ALIGN(4); /* location in binary */ . = PAGE_OFFSET + TEXT_OFFSET; #else + __init_end = .; . = ALIGN(THREAD_SIZE); __data_loc = .; #endif @@ -161,22 +238,14 @@ SECTIONS . = ALIGN(PAGE_SIZE); __init_begin = .; INIT_DATA + ARM_EXIT_KEEP(EXIT_DATA) . = ALIGN(PAGE_SIZE); __init_end = .; #endif NOSAVE_DATA - CACHELINE_ALIGNED_DATA(32) - - /* - * The exception fixup table (might need resorting at runtime) - */ - . = ALIGN(32); - __start___ex_table = .; -#ifdef CONFIG_MMU - *(__ex_table) -#endif - __stop___ex_table = .; + CACHELINE_ALIGNED_DATA(L1_CACHE_BYTES) + READ_MOSTLY_DATA(L1_CACHE_BYTES) /* * and the usual data section @@ -250,15 +319,6 @@ SECTIONS STABS_DEBUG .comment 0 : { *(.comment) } - - /* Default discards */ - DISCARDS - -#ifndef CONFIG_SMP_ON_UP - /DISCARD/ : { - *(.alt.smp.init) - } -#endif } /* @@ -268,3 +328,8 @@ SECTIONS */ ASSERT((__proc_info_end - __proc_info_begin), "missing CPU support") ASSERT((__arch_info_end - __arch_info_begin), "no machine record defined") +/* + * The HYP init code can't be more than a page long. + * The above comment applies as well. + */ +ASSERT(((__hyp_idmap_text_end - __hyp_idmap_text_start) <= PAGE_SIZE), "HYP init code too big") diff --git a/arch/arm/kernel/xscale-cp0.c b/arch/arm/kernel/xscale-cp0.c index 1796157e3dd..e42adc6bcdb 100644 --- a/arch/arm/kernel/xscale-cp0.c +++ b/arch/arm/kernel/xscale-cp0.c @@ -8,7 +8,6 @@ * published by the Free Software Foundation. */ -#include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> #include <linux/signal.h> |
