diff options
Diffstat (limited to 'arch/arm/kernel')
-rw-r--r-- | arch/arm/kernel/Makefile | 2 | ||||
-rw-r--r-- | arch/arm/kernel/atags.c | 86 | ||||
-rw-r--r-- | arch/arm/kernel/atags.h | 5 | ||||
-rw-r--r-- | arch/arm/kernel/calls.S | 2 | ||||
-rw-r--r-- | arch/arm/kernel/dma-isa.c | 2 | ||||
-rw-r--r-- | arch/arm/kernel/entry-armv.S | 159 | ||||
-rw-r--r-- | arch/arm/kernel/entry-common.S | 2 | ||||
-rw-r--r-- | arch/arm/kernel/kprobes-decode.c | 1529 | ||||
-rw-r--r-- | arch/arm/kernel/kprobes.c | 447 | ||||
-rw-r--r-- | arch/arm/kernel/machine_kexec.c | 2 | ||||
-rw-r--r-- | arch/arm/kernel/relocate_kernel.S | 30 | ||||
-rw-r--r-- | arch/arm/kernel/setup.c | 33 | ||||
-rw-r--r-- | arch/arm/kernel/smp.c | 41 | ||||
-rw-r--r-- | arch/arm/kernel/time.c | 17 | ||||
-rw-r--r-- | arch/arm/kernel/traps.c | 26 | ||||
-rw-r--r-- | arch/arm/kernel/vmlinux.lds.S | 11 |
16 files changed, 2253 insertions, 141 deletions
diff --git a/arch/arm/kernel/Makefile b/arch/arm/kernel/Makefile index 593b56509f4..00d44c6fbfe 100644 --- a/arch/arm/kernel/Makefile +++ b/arch/arm/kernel/Makefile @@ -19,6 +19,8 @@ obj-$(CONFIG_ISA_DMA) += dma-isa.o obj-$(CONFIG_PCI) += bios32.o isa.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_KEXEC) += machine_kexec.o relocate_kernel.o +obj-$(CONFIG_KPROBES) += kprobes.o kprobes-decode.o +obj-$(CONFIG_ATAGS_PROC) += atags.o obj-$(CONFIG_OABI_COMPAT) += sys_oabi-compat.o obj-$(CONFIG_CRUNCH) += crunch.o crunch-bits.o diff --git a/arch/arm/kernel/atags.c b/arch/arm/kernel/atags.c new file mode 100644 index 00000000000..e2e934c3808 --- /dev/null +++ b/arch/arm/kernel/atags.c @@ -0,0 +1,86 @@ +#include <linux/slab.h> +#include <linux/kexec.h> +#include <linux/proc_fs.h> +#include <asm/setup.h> +#include <asm/types.h> +#include <asm/page.h> + +struct buffer { + size_t size; + char *data; +}; +static struct buffer tags_buffer; + +static int +read_buffer(char* page, char** start, off_t off, int count, + int* eof, void* data) +{ + struct buffer *buffer = (struct buffer *)data; + + if (off >= buffer->size) { + *eof = 1; + return 0; + } + + count = min((int) (buffer->size - off), count); + + memcpy(page, &buffer->data[off], count); + + return count; +} + + +static int +create_proc_entries(void) +{ + struct proc_dir_entry* tags_entry; + + tags_entry = create_proc_read_entry("atags", 0400, &proc_root, read_buffer, &tags_buffer); + if (!tags_entry) + return -ENOMEM; + + return 0; +} + + +static char __initdata atags_copy_buf[KEXEC_BOOT_PARAMS_SIZE]; +static char __initdata *atags_copy; + +void __init save_atags(const struct tag *tags) +{ + atags_copy = atags_copy_buf; + memcpy(atags_copy, tags, KEXEC_BOOT_PARAMS_SIZE); +} + + +static int __init init_atags_procfs(void) +{ + struct tag *tag; + int error; + + if (!atags_copy) { + printk(KERN_WARNING "Exporting ATAGs: No saved tags found\n"); + return -EIO; + } + + for (tag = (struct tag *) atags_copy; tag->hdr.size; tag = tag_next(tag)) + ; + + tags_buffer.size = ((char *) tag - atags_copy) + sizeof(tag->hdr); + tags_buffer.data = kmalloc(tags_buffer.size, GFP_KERNEL); + if (tags_buffer.data == NULL) + return -ENOMEM; + memcpy(tags_buffer.data, atags_copy, tags_buffer.size); + + error = create_proc_entries(); + if (error) { + printk(KERN_ERR "Exporting ATAGs: not enough memory\n"); + kfree(tags_buffer.data); + tags_buffer.size = 0; + tags_buffer.data = NULL; + } + + return error; +} + +arch_initcall(init_atags_procfs); diff --git a/arch/arm/kernel/atags.h b/arch/arm/kernel/atags.h new file mode 100644 index 00000000000..e5f028d214a --- /dev/null +++ b/arch/arm/kernel/atags.h @@ -0,0 +1,5 @@ +#ifdef CONFIG_ATAGS_PROC +extern void save_atags(struct tag *tags); +#else +static inline void save_atags(struct tag *tags) { } +#endif diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S index cecf658e362..283e14fff99 100644 --- a/arch/arm/kernel/calls.S +++ b/arch/arm/kernel/calls.S @@ -359,7 +359,7 @@ CALL(sys_kexec_load) CALL(sys_utimensat) CALL(sys_signalfd) -/* 350 */ CALL(sys_timerfd) +/* 350 */ CALL(sys_ni_syscall) CALL(sys_eventfd) CALL(sys_fallocate) #ifndef syscalls_counted diff --git a/arch/arm/kernel/dma-isa.c b/arch/arm/kernel/dma-isa.c index 0a3e9ad297d..2f080a35a2d 100644 --- a/arch/arm/kernel/dma-isa.c +++ b/arch/arm/kernel/dma-isa.c @@ -216,7 +216,7 @@ void __init isa_init_dma(dma_t *dma) request_dma(DMA_ISA_CASCADE, "cascade"); - for (i = 0; i < sizeof(dma_resources) / sizeof(dma_resources[0]); i++) + for (i = 0; i < ARRAY_SIZE(dma_resources); i++) request_resource(&ioport_resource, dma_resources + i); } } diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index d645897652c..a46d5b45676 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -11,8 +11,8 @@ * * Low-level vector interface routines * - * Note: there is a StrongARM bug in the STMIA rn, {regs}^ instruction that causes - * it to save wrong values... Be aware! + * Note: there is a StrongARM bug in the STMIA rn, {regs}^ instruction + * that causes it to save wrong values... Be aware! */ #include <asm/memory.h> @@ -58,6 +58,12 @@ .endm +#ifdef CONFIG_KPROBES + .section .kprobes.text,"ax",%progbits +#else + .text +#endif + /* * Invalid mode handlers */ @@ -112,8 +118,8 @@ common_invalid: #define SPFIX(code...) #endif - .macro svc_entry - sub sp, sp, #S_FRAME_SIZE + .macro svc_entry, stack_hole=0 + sub sp, sp, #(S_FRAME_SIZE + \stack_hole) SPFIX( tst sp, #4 ) SPFIX( bicne sp, sp, #4 ) stmib sp, {r1 - r12} @@ -121,7 +127,7 @@ common_invalid: ldmia r0, {r1 - r3} add r5, sp, #S_SP @ here for interlock avoidance mov r4, #-1 @ "" "" "" "" - add r0, sp, #S_FRAME_SIZE @ "" "" "" "" + add r0, sp, #(S_FRAME_SIZE + \stack_hole) SPFIX( addne r0, r0, #4 ) str r1, [sp] @ save the "real" r0 copied @ from the exception stack @@ -242,7 +248,14 @@ svc_preempt: .align 5 __und_svc: +#ifdef CONFIG_KPROBES + @ If a kprobe is about to simulate a "stmdb sp..." instruction, + @ it obviously needs free stack space which then will belong to + @ the saved context. + svc_entry 64 +#else svc_entry +#endif @ @ call emulation code, which returns using r9 if it has emulated @@ -339,16 +352,6 @@ __pabt_svc: str r1, [sp] @ save the "real" r0 copied @ from the exception stack -#if __LINUX_ARM_ARCH__ < 6 && !defined(CONFIG_NEEDS_SYSCALL_FOR_CMPXCHG) -#ifndef CONFIG_MMU -#warning "NPTL on non MMU needs fixing" -#else - @ make sure our user space atomic helper is aborted - cmp r2, #TASK_SIZE - bichs r3, r3, #PSR_Z_BIT -#endif -#endif - @ @ We are now ready to fill in the remaining blanks on the stack: @ @@ -372,9 +375,25 @@ __pabt_svc: zero_fp .endm + .macro kuser_cmpxchg_check +#if __LINUX_ARM_ARCH__ < 6 && !defined(CONFIG_NEEDS_SYSCALL_FOR_CMPXCHG) +#ifndef CONFIG_MMU +#warning "NPTL on non MMU needs fixing" +#else + @ Make sure our user space atomic helper is restarted + @ if it was interrupted in a critical region. Here we + @ perform a quick test inline since it should be false + @ 99.9999% of the time. The rest is done out of line. + cmp r2, #TASK_SIZE + blhs kuser_cmpxchg_fixup +#endif +#endif + .endm + .align 5 __dabt_usr: usr_entry + kuser_cmpxchg_check @ @ Call the processor-specific abort handler: @@ -404,6 +423,7 @@ __dabt_usr: .align 5 __irq_usr: usr_entry + kuser_cmpxchg_check #ifdef CONFIG_TRACE_IRQFLAGS bl trace_hardirqs_off @@ -446,9 +466,9 @@ __und_usr: @ @ r0 - instruction @ -1: ldrt r0, [r4] adr r9, ret_from_exception adr lr, __und_usr_unknown +1: ldrt r0, [r4] @ @ fallthrough to call_fpe @ @@ -473,6 +493,13 @@ __und_usr: * co-processor instructions. However, we have to watch out * for the ARM6/ARM7 SWI bug. * + * NEON is a special case that has to be handled here. Not all + * NEON instructions are co-processor instructions, so we have + * to make a special case of checking for them. Plus, there's + * five groups of them, so we have a table of mask/opcode pairs + * to check against, and if any match then we branch off into the + * NEON handler code. + * * Emulators may wish to make use of the following registers: * r0 = instruction opcode. * r2 = PC+4 @@ -481,6 +508,23 @@ __und_usr: * lr = unrecognised instruction return address */ call_fpe: +#ifdef CONFIG_NEON + adr r6, .LCneon_opcodes +2: + ldr r7, [r6], #4 @ mask value + cmp r7, #0 @ end mask? + beq 1f + and r8, r0, r7 + ldr r7, [r6], #4 @ opcode bits matching in mask + cmp r8, r7 @ NEON instruction? + bne 2b + get_thread_info r10 + mov r7, #1 + strb r7, [r10, #TI_USED_CP + 10] @ mark CP#10 as used + strb r7, [r10, #TI_USED_CP + 11] @ mark CP#11 as used + b do_vfp @ let VFP handler handle this +1: +#endif tst r0, #0x08000000 @ only CDP/CPRT/LDC/STC have bit 27 #if defined(CONFIG_CPU_ARM610) || defined(CONFIG_CPU_ARM710) and r8, r0, #0x0f000000 @ mask out op-code bits @@ -530,6 +574,20 @@ call_fpe: mov pc, lr @ CP#14 (Debug) mov pc, lr @ CP#15 (Control) +#ifdef CONFIG_NEON + .align 6 + +.LCneon_opcodes: + .word 0xfe000000 @ mask + .word 0xf2000000 @ opcode + + .word 0xff100000 @ mask + .word 0xf4000000 @ opcode + + .word 0x00000000 @ mask + .word 0x00000000 @ opcode +#endif + do_fpe: enable_irq ldr r4, .LCfp @@ -548,7 +606,7 @@ do_fpe: .data ENTRY(fp_enter) .word no_fp - .text + .previous no_fp: mov pc, lr @@ -669,7 +727,7 @@ __kuser_helper_start: * * Clobbered: * - * the Z flag might be lost + * none * * Definition and user space usage example: * @@ -730,9 +788,6 @@ __kuser_memory_barrier: @ 0xffff0fa0 * * - This routine already includes memory barriers as needed. * - * - A failure might be transient, i.e. it is possible, although unlikely, - * that "failure" be returned even if *ptr == oldval. - * * For example, a user space atomic_add implementation could look like this: * * #define atomic_add(ptr, val) \ @@ -769,46 +824,62 @@ __kuser_cmpxchg: @ 0xffff0fc0 #elif __LINUX_ARM_ARCH__ < 6 +#ifdef CONFIG_MMU + /* - * Theory of operation: - * - * We set the Z flag before loading oldval. If ever an exception - * occurs we can not be sure the loaded value will still be the same - * when the exception returns, therefore the user exception handler - * will clear the Z flag whenever the interrupted user code was - * actually from the kernel address space (see the usr_entry macro). - * - * The post-increment on the str is used to prevent a race with an - * exception happening just after the str instruction which would - * clear the Z flag although the exchange was done. + * The only thing that can break atomicity in this cmpxchg + * implementation is either an IRQ or a data abort exception + * causing another process/thread to be scheduled in the middle + * of the critical sequence. To prevent this, code is added to + * the IRQ and data abort exception handlers to set the pc back + * to the beginning of the critical section if it is found to be + * within that critical section (see kuser_cmpxchg_fixup). */ -#ifdef CONFIG_MMU - teq ip, ip @ set Z flag - ldr ip, [r2] @ load current val - add r3, r2, #1 @ prepare store ptr - teqeq ip, r0 @ compare with oldval if still allowed - streq r1, [r3, #-1]! @ store newval if still allowed - subs r0, r2, r3 @ if r2 == r3 the str occured +1: ldr r3, [r2] @ load current val + subs r3, r3, r0 @ compare with oldval +2: streq r1, [r2] @ store newval if eq + rsbs r0, r3, #0 @ set return val and C flag + usr_ret lr + + .text +kuser_cmpxchg_fixup: + @ Called from kuser_cmpxchg_check macro. + @ r2 = address of interrupted insn (must be preserved). + @ sp = saved regs. r7 and r8 are clobbered. + @ 1b = first critical insn, 2b = last critical insn. + @ If r2 >= 1b and r2 <= 2b then saved pc_usr is set to 1b. + mov r7, #0xffff0fff + sub r7, r7, #(0xffff0fff - (0xffff0fc0 + (1b - __kuser_cmpxchg))) + subs r8, r2, r7 + rsbcss r8, r8, #(2b - 1b) + strcs r7, [sp, #S_PC] + mov pc, lr + .previous + #else #warning "NPTL on non MMU needs fixing" mov r0, #-1 adds r0, r0, #0 -#endif usr_ret lr +#endif #else #ifdef CONFIG_SMP mcr p15, 0, r0, c7, c10, 5 @ dmb #endif - ldrex r3, [r2] +1: ldrex r3, [r2] subs r3, r3, r0 strexeq r3, r1, [r2] + teqeq r3, #1 + beq 1b rsbs r0, r3, #0 + /* beware -- each __kuser slot must be 8 instructions max */ #ifdef CONFIG_SMP - mcr p15, 0, r0, c7, c10, 5 @ dmb -#endif + b __kuser_memory_barrier +#else usr_ret lr +#endif #endif @@ -829,7 +900,7 @@ __kuser_cmpxchg: @ 0xffff0fc0 * * Clobbered: * - * the Z flag might be lost + * none * * Definition and user space usage example: * diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 33e6cc2ffd3..6c90c50a9ee 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -72,7 +72,7 @@ no_work_pending: ldr r1, [sp, #S_PSR] @ get calling cpsr ldr lr, [sp, #S_PC]! @ get pc msr spsr_cxsf, r1 @ save in spsr_svc - ldmdb sp, {r0 - lr}^ @ get calling r1 - lr + ldmdb sp, {r0 - lr}^ @ get calling r0 - lr mov r0, r0 add sp, sp, #S_FRAME_SIZE - S_PC movs pc, lr @ return & move spsr_svc into cpsr diff --git a/arch/arm/kernel/kprobes-decode.c b/arch/arm/kernel/kprobes-decode.c new file mode 100644 index 00000000000..d51bc8b6055 --- /dev/null +++ b/arch/arm/kernel/kprobes-decode.c @@ -0,0 +1,1529 @@ +/* + * arch/arm/kernel/kprobes-decode.c + * + * Copyright (C) 2006, 2007 Motorola Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ + +/* + * We do not have hardware single-stepping on ARM, This + * effort is further complicated by the ARM not having a + * "next PC" register. Instructions that change the PC + * can't be safely single-stepped in a MP environment, so + * we have a lot of work to do: + * + * In the prepare phase: + * *) If it is an instruction that does anything + * with the CPU mode, we reject it for a kprobe. + * (This is out of laziness rather than need. The + * instructions could be simulated.) + * + * *) Otherwise, decode the instruction rewriting its + * registers to take fixed, ordered registers and + * setting a handler for it to run the instruction. + * + * In the execution phase by an instruction's handler: + * + * *) If the PC is written to by the instruction, the + * instruction must be fully simulated in software. + * If it is a conditional instruction, the handler + * will use insn[0] to copy its condition code to + * set r0 to 1 and insn[1] to "mov pc, lr" to return. + * + * *) Otherwise, a modified form of the instruction is + * directly executed. Its handler calls the + * instruction in insn[0]. In insn[1] is a + * "mov pc, lr" to return. + * + * Before calling, load up the reordered registers + * from the original instruction's registers. If one + * of the original input registers is the PC, compute + * and adjust the appropriate input register. + * + * After call completes, copy the output registers to + * the original instruction's original registers. + * + * We don't use a real breakpoint instruction since that + * would have us in the kernel go from SVC mode to SVC + * mode losing the link register. Instead we use an + * undefined instruction. To simplify processing, the + * undefined instruction used for kprobes must be reserved + * exclusively for kprobes use. + * + * TODO: ifdef out some instruction decoding based on architecture. + */ + +#include <linux/kernel.h> +#include <linux/kprobes.h> + +#define sign_extend(x, signbit) ((x) | (0 - ((x) & (1 << (signbit))))) + +#define branch_displacement(insn) sign_extend(((insn) & 0xffffff) << 2, 25) + +#define PSR_fs (PSR_f|PSR_s) + +#define KPROBE_RETURN_INSTRUCTION 0xe1a0f00e /* mov pc, lr */ +#define SET_R0_TRUE_INSTRUCTION 0xe3a00001 /* mov r0, #1 */ + +#define truecc_insn(insn) (((insn) & 0xf0000000) | \ + (SET_R0_TRUE_INSTRUCTION & 0x0fffffff)) + +typedef long (insn_0arg_fn_t)(void); +typedef long (insn_1arg_fn_t)(long); +typedef long (insn_2arg_fn_t)(long, long); +typedef long (insn_3arg_fn_t)(long, long, long); +typedef long (insn_4arg_fn_t)(long, long, long, long); +typedef long long (insn_llret_0arg_fn_t)(void); +typedef long long (insn_llret_3arg_fn_t)(long, long, long); +typedef long long (insn_llret_4arg_fn_t)(long, long, long, long); + +union reg_pair { + long long dr; +#ifdef __LITTLE_ENDIAN + struct { long r0, r1; }; +#else + struct { long r1, r0; }; +#endif +}; + +/* + * For STR and STM instructions, an ARM core may choose to use either + * a +8 or a +12 displacement from the current instruction's address. + * Whichever value is chosen for a given core, it must be the same for + * both instructions and may not change. This function measures it. + */ + +static int str_pc_offset; + +static void __init find_str_pc_offset(void) +{ + int addr, scratch, ret; + + __asm__ ( + "sub %[ret], pc, #4 \n\t" + "str pc, %[addr] \n\t" + "ldr %[scr], %[addr] \n\t" + "sub %[ret], %[scr], %[ret] \n\t" + : [ret] "=r" (ret), [scr] "=r" (scratch), [addr] "+m" (addr)); + + str_pc_offset = ret; +} + +/* + * The insnslot_?arg_r[w]flags() functions below are to keep the + * msr -> *fn -> mrs instruction sequences indivisible so that + * the state of the CPSR flags aren't inadvertently modified + * just before or just after the call. + */ + +static inline long __kprobes +insnslot_0arg_rflags(long cpsr, insn_0arg_fn_t *fn) +{ + register long ret asm("r0"); + + __asm__ __volatile__ ( + "msr cpsr_fs, %[cpsr] \n\t" + "mov lr, pc \n\t" + "mov pc, %[fn] \n\t" + : "=r" (ret) + : [cpsr] "r" (cpsr), [fn] "r" (fn) + : "lr", "cc" + ); + return ret; +} + +static inline long long __kprobes +insnslot_llret_0arg_rflags(long cpsr, insn_llret_0arg_fn_t *fn) +{ + register long ret0 asm("r0"); + register long ret1 asm("r1"); + union reg_pair fnr; + + __asm__ __volatile__ ( + "msr cpsr_fs, %[cpsr] \n\t" + "mov lr, pc \n\t" + "mov pc, %[fn] \n\t" + : "=r" (ret0), "=r" (ret1) + : [cpsr] "r" (cpsr), [fn] "r" (fn) + : "lr", "cc" + ); + fnr.r0 = ret0; + fnr.r1 = ret1; + return fnr.dr; +} + +static inline long __kprobes +insnslot_1arg_rflags(long r0, long cpsr, insn_1arg_fn_t *fn) +{ + register long rr0 asm("r0") = r0; + register long ret asm("r0"); + + __asm__ __volatile__ ( + "msr cpsr_fs, %[cpsr] \n\t" + "mov lr, pc \n\t" + "mov pc, %[fn] \n\t" + : "=r" (ret) + : "0" (rr0), [cpsr] "r" (cpsr), [fn] "r" (fn) + : "lr", "cc" + ); + return ret; +} + +static inline long __kprobes +insnslot_2arg_rflags(long r0, long r1, long cpsr, insn_2arg_fn_t *fn) +{ + register long rr0 asm("r0") = r0; + register long rr1 asm("r1") = r1; + register long ret asm("r0"); + + __asm__ __volatile__ ( + "msr cpsr_fs, %[cpsr] \n\t" + "mov lr, pc \n\t" + "mov pc, %[fn] \n\t" + : "=r" (ret) + : "0" (rr0), "r" (rr1), + [cpsr] "r" (cpsr), [fn] "r" (fn) + : "lr", "cc" + ); + return ret; +} + +static inline long __kprobes +insnslot_3arg_rflags(long r0, long r1, long r2, long cpsr, insn_3arg_fn_t *fn) +{ + register long rr0 asm("r0") = r0; + register long rr1 asm("r1") = r1; + register long rr2 asm("r2") = r2; + register long ret asm("r0"); + + __asm__ __volatile__ ( + "msr cpsr_fs, %[cpsr] \n\t" + "mov lr, pc \n\t" + "mov pc, %[fn] \n\t" + : "=r" (ret) + : "0" (rr0), "r" (rr1), "r" (rr2), + [cpsr] "r" (cpsr), [fn] "r" (fn) + : "lr", "cc" + ); + return ret; +} + +static inline long long __kprobes +insnslot_llret_3arg_rflags(long r0, long r1, long r2, long cpsr, + insn_llret_3arg_fn_t *fn) +{ + register long rr0 asm("r0") = r0; + register long rr1 asm("r1") = r1; + register long rr2 asm("r2") = r2; + register long ret0 asm("r0"); + register long ret1 asm("r1"); + union reg_pair fnr; + + __asm__ __volatile__ ( + "msr cpsr_fs, %[cpsr] \n\t" + "mov lr, pc \n\t" + "mov pc, %[fn] \n\t" + : "=r" (ret0), "=r" (ret1) + : "0" (rr0), "r" (rr1), "r" (rr2), + [cpsr] "r" (cpsr), [fn] "r" (fn) + : "lr", "cc" + ); + fnr.r0 = ret0; + fnr.r1 = ret1; + return fnr.dr; +} + +static inline long __kprobes +insnslot_4arg_rflags(long r0, long r1, long r2, long r3, long cpsr, + insn_4arg_fn_t *fn) +{ + register long rr0 asm("r0") = r0; + register long rr1 asm("r1") = r1; + register long rr2 asm("r2") = r2; + register long rr3 asm("r3") = r3; + register long ret asm("r0"); + + __asm__ __volatile__ ( + "msr cpsr_fs, %[cpsr] \n\t" + "mov lr, pc \n\t" + "mov pc, %[fn] \n\t" + : "=r" (ret) + : "0" (rr0), "r" (rr1), "r" (rr2), "r" (rr3), + [cpsr] "r" (cpsr), [fn] "r" (fn) + : "lr", "cc" + ); + return ret; +} + +static inline long __kprobes +insnslot_1arg_rwflags(long r0, long *cpsr, insn_1arg_fn_t *fn) +{ + register long rr0 asm("r0") = r0; + register long ret asm("r0"); + long oldcpsr = *cpsr; + long newcpsr; + + __asm__ __volatile__ ( + "msr cpsr_fs, %[oldcpsr] \n\t" + "mov lr, pc \n\t" + "mov pc, %[fn] \n\t" + "mrs %[newcpsr], cpsr \n\t" + : "=r" (ret), [newcpsr] "=r" (newcpsr) + : "0" (rr0), [oldcpsr] "r" (oldcpsr), [fn] "r" (fn) + : "lr", "cc" + ); + *cpsr = (oldcpsr & ~PSR_fs) | (newcpsr & PSR_fs); + return ret; +} + +static inline long __kprobes +insnslot_2arg_rwflags(long r0, long r1, long *cpsr, insn_2arg_fn_t *fn) +{ + register long rr0 asm("r0") = r0; + register long rr1 asm("r1") = r1; + register long ret asm("r0"); + long oldcpsr = *cpsr; + long newcpsr; + + __asm__ __volatile__ ( + "msr cpsr_fs, %[oldcpsr] \n\t" + "mov lr, pc \n\t" + "mov pc, %[fn] \n\t" + "mrs %[newcpsr], cpsr \n\t" + : "=r" (ret), [newcpsr] "=r" (newcpsr) + : "0" (rr0), "r" (rr1), [oldcpsr] "r" (oldcpsr), [fn] "r" (fn) + : "lr", "cc" + ); + *cpsr = (oldcpsr & ~PSR_fs) | (newcpsr & PSR_fs); + return ret; +} + +static inline long __kprobes +insnslot_3arg_rwflags(long r0, long r1, long r2, long *cpsr, + insn_3arg_fn_t *fn) +{ + register long rr0 asm("r0") = r0; + register long rr1 asm("r1") = r1; + register long rr2 asm("r2") = r2; + register long ret asm("r0"); + long oldcpsr = *cpsr; + long newcpsr; + + __asm__ __volatile__ ( + "msr cpsr_fs, %[oldcpsr] \n\t" + "mov lr, pc \n\t" + "mov pc, %[fn] \n\t" + "mrs %[newcpsr], cpsr \n\t" + : "=r" (ret), [newcpsr] "=r" (newcpsr) + : "0" (rr0), "r" (rr1), "r" (rr2), + [oldcpsr] "r" (oldcpsr), [fn] "r" (fn) + : "lr", "cc" + ); + *cpsr = (oldcpsr & ~PSR_fs) | (newcpsr & PSR_fs); + return ret; +} + +static inline long __kprobes +insnslot_4arg_rwflags(long r0, long r1, long r2, long r3, long *cpsr, + insn_4arg_fn_t *fn) +{ + register long rr0 asm("r0") = r0; + register long rr1 asm("r1") = r1; + register long rr2 asm("r2") = r2; + register long rr3 asm("r3") = r3; + register long ret asm("r0"); + long oldcpsr = *cpsr; + long newcpsr; + + __asm__ __volatile__ ( + "msr cpsr_fs, %[oldcpsr] \n\t" + "mov lr, pc \n\t" + "mov pc, %[fn] \n\t" + "mrs %[newcpsr], cpsr \n\t" + : "=r" (ret), [newcpsr] "=r" (newcpsr) + : "0" (rr0), "r" (rr1), "r" (rr2), "r" (rr3), + [oldcpsr] "r" (oldcpsr), [fn] "r" (fn) + : "lr", "cc" + ); + *cpsr = (oldcpsr & ~PSR_fs) | (newcpsr & PSR_fs); + return ret; +} + +static inline long long __kprobes +insnslot_llret_4arg_rwflags(long r0, long r1, long r2, long r3, long *cpsr, + insn_llret_4arg_fn_t *fn) +{ + register long rr0 asm("r0") = r0; + register long rr1 asm("r1") = r1; + register long rr2 asm("r2") = r2; + register long rr3 asm("r3") = r3; + register long ret0 asm("r0"); + register long ret1 asm("r1"); + long oldcpsr = *cpsr; + long newcpsr; + union reg_pair fnr; + + __asm__ __volatile__ ( + "msr cpsr_fs, %[oldcpsr] \n\t" + "mov lr, pc \n\t" + "mov pc, %[fn] \n\t" + "mrs %[newcpsr], cpsr \n\t" + : "=r" (ret0), "=r" (ret1), [newcpsr] "=r" (newcpsr) + : "0" (rr0), "r" (rr1), "r" (rr2), "r" (rr3), + [oldcpsr] "r" (oldcpsr), [fn] "r" (fn) + : "lr", "cc" + ); + *cpsr = (oldcpsr & ~PSR_fs) | (newcpsr & PSR_fs); + fnr.r0 = ret0; + fnr.r1 = ret1; + return fnr.dr; +} + +/* + * To avoid the complications of mimicing single-stepping on a + * processor without a Next-PC or a single-step mode, and to + * avoid having to deal with the side-effects of boosting, we + * simulate or emulate (almost) all ARM instructions. + * + * "Simulation" is where the instruction's behavior is duplicated in + * C code. "Emulation" is where the original instruction is rewritten + * and executed, often by altering its registers. + * + * By having all behavior of the kprobe'd instruction completed before + * returning from the kprobe_handler(), all locks (scheduler and + * interrupt) can safely be released. There is no need for secondary + * breakpoints, no race with MP or preemptable kernels, nor having to + * clean up resources counts at a later time impacting overall system + * performance. By rewriting the instruction, only the minimum registers + * need to be loaded and saved back optimizing performance. + * + * Calling the insnslot_*_rwflags version of a function doesn't hurt + * anything even when the CPSR flags aren't updated by the + * instruction. It's just a little slower in return for saving + * a little space by not having a duplicate function that doesn't + * update the flags. (The same optimization can be said for + * instructions that do or don't perform register writeback) + * Also, instructions can either read the flags, only write the + * flags, or read and write the flags. To save combinations + * rather than for sheer performance, flag functions just assume + * read and write of flags. + */ + +static void __kprobes simulate_bbl(struct kprobe *p, struct pt_regs *regs) +{ + insn_1arg_fn_t *i_fn = (insn_1arg_fn_t *)&p->ainsn.insn[0]; + kprobe_opcode_t insn = p->opcode; + long iaddr = (long)p->addr; + int disp = branch_displacement(insn); + + if (!insnslot_1arg_rflags(0, regs->ARM_cpsr, i_fn)) + return; + + if (insn & (1 << 24)) + regs->ARM_lr = iaddr + 4; + + regs->ARM_pc = iaddr + 8 + disp; +} + +static void __kprobes simulate_blx1(struct kprobe *p, struct pt_regs *regs) +{ + kprobe_opcode_t insn = p->opcode; + long iaddr = (long)p->addr; + int disp = branch_displacement(insn); + + regs->ARM_lr = iaddr + 4; + regs->ARM_pc = iaddr + 8 + disp + ((insn >> 23) & 0x2); + regs->ARM_cpsr |= PSR_T_BIT; +} + +static void __kprobes simulate_blx2bx(struct kprobe *p, struct pt_regs *regs) +{ + insn_1arg_fn_t *i_fn = (insn_1arg_fn_t *)&p->ainsn.insn[0]; + kprobe_opcode_t insn = p->opcode; + int rm = insn & 0xf; + long rmv = regs->uregs[rm]; + + if (!insnslot_1arg_rflags(0, regs->ARM_cpsr, i_fn)) + return; + + if (insn & (1 << 5)) + regs->ARM_lr = (long)p->addr + 4; + + regs->ARM_pc = rmv & ~0x1; + regs->ARM_cpsr &= ~PSR_T_BIT; + if (rmv & 0x1) + regs->ARM_cpsr |= PSR_T_BIT; +} + +static void __kprobes simulate_ldm1stm1(struct kprobe *p, struct pt_regs *regs) +{ + insn_1arg_fn_t *i_fn = (insn_1arg_fn_t *)&p->ainsn.insn[0]; + kprobe_opcode_t insn = p->opcode; + int rn = (insn >> 16) & 0xf; + int lbit = insn & (1 << 20); + int wbit = insn & (1 << 21); + int ubit = insn & (1 << 23); + int pbit = insn & (1 << 24); + long *addr = (long *)regs->uregs[rn]; + int reg_bit_vector; + int reg_count; + + if (!insnslot_1arg_rflags(0, regs->ARM_cpsr, i_fn)) + return; + + reg_count = 0; + reg_bit_vector = insn & 0xffff; + while (reg_bit_vector) { + reg_bit_vector &= (reg_bit_vector - 1); + ++reg_count; + } + + if (!ubit) + addr -= reg_count; + addr += (!pbit ^ !ubit); + + reg_bit_vector = insn & 0xffff; + while (reg_bit_vector) { + int reg = __ffs(reg_bit_vector); + reg_bit_vector &= (reg_bit_vector - 1); + if (lbit) + regs->uregs[reg] = *addr++; + else + *addr++ = regs->uregs[reg]; + } + + if (wbit) { + if (!ubit) + addr -= reg_count; + addr -= (!pbit ^ !ubit); + regs->uregs[rn] = (long)addr; + } +} + +static void __kprobes simulate_stm1_pc(struct kprobe *p, struct pt_regs *regs) +{ + insn_1arg_fn_t *i_fn = (insn_1arg_fn_t *)&p->ainsn.insn[0]; + + if (!insnslot_1arg_rflags(0, regs->ARM_cpsr, i_fn)) + return; + + regs->ARM_pc = (long)p->addr + str_pc_offset; + simulate_ldm1stm1(p, regs); + regs->ARM_pc = (long)p->addr + 4; +} + +static void __kprobes simulate_mov_ipsp(struct kprobe *p, struct pt_regs *regs) +{ + regs->uregs[12] = regs->uregs[13]; +} + +static void __kprobes emulate_ldcstc(struct kprobe *p, struct pt_regs *regs) +{ + insn_1arg_fn_t *i_fn = (insn_1arg_fn_t *)&p->ainsn.insn[0]; + kprobe_opcode_t insn = p->opcode; + int rn = (insn >> 16) & 0xf; + long rnv = regs->uregs[rn]; + + /* Save Rn in case of writeback. */ + regs->uregs[rn] = insnslot_1arg_rflags(rnv, regs->ARM_cpsr, i_fn); +} + +static void __kprobes emulate_ldrd(struct kprobe *p, struct pt_regs *regs) +{ + insn_2arg_fn_t *i_fn = (insn_2arg_fn_t *)&p->ainsn.insn[0]; + kprobe_opcode_t insn = p->opcode; + int rd = (insn >> 12) & 0xf; + int rn = (insn >> 16) & 0xf; + int rm = insn & 0xf; /* rm may be invalid, don't care. */ + + /* Not following the C calling convention here, so need asm(). */ + __asm__ __volatile__ ( + "ldr r0, %[rn] \n\t" + "ldr r1, %[rm] \n\t" + "msr cpsr_fs, %[cpsr]\n\t" + "mov lr, pc \n\t" + "mov pc, %[i_fn] \n\t" + "str r0, %[rn] \n\t" /* in case of writeback */ + "str r2, %[rd0] \n\t" + "str r3, %[rd1] \n\t" + : [rn] "+m" (regs->uregs[rn]), + [rd0] "=m" (regs->uregs[rd]), + [rd1] "=m" (regs->uregs[rd+1]) + : [rm] "m" (regs->uregs[rm]), + [cpsr] "r" (regs->ARM_cpsr), + [i_fn] "r" (i_fn) + : "r0", "r1", "r2", "r3", "lr", "cc" + ); +} + +static void __kprobes emulate_strd(struct |