diff options
Diffstat (limited to 'arch/arm64/kernel')
45 files changed, 5277 insertions, 1223 deletions
diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 74239c31e25..cdaedad3afe 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -4,19 +4,31 @@ CPPFLAGS_vmlinux.lds := -DTEXT_OFFSET=$(TEXT_OFFSET) AFLAGS_head.o := -DTEXT_OFFSET=$(TEXT_OFFSET) +CFLAGS_efi-stub.o := -DTEXT_OFFSET=$(TEXT_OFFSET) \ + -I$(src)/../../../scripts/dtc/libfdt + +CFLAGS_REMOVE_ftrace.o = -pg +CFLAGS_REMOVE_insn.o = -pg +CFLAGS_REMOVE_return_address.o = -pg # Object file lists. arm64-obj-y := cputable.o debug-monitors.o entry.o irq.o fpsimd.o \ entry-fpsimd.o process.o ptrace.o setup.o signal.o \ sys.o stacktrace.o time.o traps.o io.o vdso.o \ - hyp-stub.o + hyp-stub.o psci.o cpu_ops.o insn.o return_address.o arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \ sys_compat.o +arm64-obj-$(CONFIG_FUNCTION_TRACER) += ftrace.o entry-ftrace.o arm64-obj-$(CONFIG_MODULES) += arm64ksyms.o module.o -arm64-obj-$(CONFIG_SMP) += smp.o +arm64-obj-$(CONFIG_SMP) += smp.o smp_spin_table.o topology.o +arm64-obj-$(CONFIG_PERF_EVENTS) += perf_regs.o arm64-obj-$(CONFIG_HW_PERF_EVENTS) += perf_event.o -arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT)+= hw_breakpoint.o +arm64-obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o +arm64-obj-$(CONFIG_ARM64_CPU_SUSPEND) += sleep.o suspend.o +arm64-obj-$(CONFIG_JUMP_LABEL) += jump_label.o +arm64-obj-$(CONFIG_KGDB) += kgdb.o +arm64-obj-$(CONFIG_EFI) += efi.o efi-stub.o efi-entry.o obj-y += $(arm64-obj-y) vdso/ obj-m += $(arm64-obj-m) diff --git a/arch/arm64/kernel/arm64ksyms.c b/arch/arm64/kernel/arm64ksyms.c index cef3925eaf6..a85843ddbde 100644 --- a/arch/arm64/kernel/arm64ksyms.c +++ b/arch/arm64/kernel/arm64ksyms.c @@ -29,18 +29,39 @@ #include <asm/checksum.h> - /* user mem (segment) */ -EXPORT_SYMBOL(__strnlen_user); -EXPORT_SYMBOL(__strncpy_from_user); - EXPORT_SYMBOL(copy_page); +EXPORT_SYMBOL(clear_page); + /* user mem (segment) */ EXPORT_SYMBOL(__copy_from_user); EXPORT_SYMBOL(__copy_to_user); EXPORT_SYMBOL(__clear_user); - - /* bitops */ -EXPORT_SYMBOL(__atomic_hash); +EXPORT_SYMBOL(__copy_in_user); /* physical memory */ EXPORT_SYMBOL(memstart_addr); + + /* string / mem functions */ +EXPORT_SYMBOL(strchr); +EXPORT_SYMBOL(strrchr); +EXPORT_SYMBOL(strcmp); +EXPORT_SYMBOL(strncmp); +EXPORT_SYMBOL(strlen); +EXPORT_SYMBOL(strnlen); +EXPORT_SYMBOL(memset); +EXPORT_SYMBOL(memcpy); +EXPORT_SYMBOL(memmove); +EXPORT_SYMBOL(memchr); +EXPORT_SYMBOL(memcmp); + + /* atomic bitops */ +EXPORT_SYMBOL(set_bit); +EXPORT_SYMBOL(test_and_set_bit); +EXPORT_SYMBOL(clear_bit); +EXPORT_SYMBOL(test_and_clear_bit); +EXPORT_SYMBOL(change_bit); +EXPORT_SYMBOL(test_and_change_bit); + +#ifdef CONFIG_FUNCTION_TRACER +EXPORT_SYMBOL(_mcount); +#endif diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index a2a4d810bea..646f888387c 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -21,9 +21,12 @@ #include <linux/sched.h> #include <linux/mm.h> #include <linux/dma-mapping.h> +#include <linux/kvm_host.h> #include <asm/thread_info.h> #include <asm/memory.h> #include <asm/cputable.h> +#include <asm/smp_plat.h> +#include <asm/suspend.h> #include <asm/vdso_datapage.h> #include <linux/kbuild.h> @@ -104,5 +107,47 @@ int main(void) BLANK(); DEFINE(TZ_MINWEST, offsetof(struct timezone, tz_minuteswest)); DEFINE(TZ_DSTTIME, offsetof(struct timezone, tz_dsttime)); + BLANK(); +#ifdef CONFIG_KVM_ARM_HOST + DEFINE(VCPU_CONTEXT, offsetof(struct kvm_vcpu, arch.ctxt)); + DEFINE(CPU_GP_REGS, offsetof(struct kvm_cpu_context, gp_regs)); + DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_regs, regs)); + DEFINE(CPU_FP_REGS, offsetof(struct kvm_regs, fp_regs)); + DEFINE(CPU_SP_EL1, offsetof(struct kvm_regs, sp_el1)); + DEFINE(CPU_ELR_EL1, offsetof(struct kvm_regs, elr_el1)); + DEFINE(CPU_SPSR, offsetof(struct kvm_regs, spsr)); + DEFINE(CPU_SYSREGS, offsetof(struct kvm_cpu_context, sys_regs)); + DEFINE(VCPU_ESR_EL2, offsetof(struct kvm_vcpu, arch.fault.esr_el2)); + DEFINE(VCPU_FAR_EL2, offsetof(struct kvm_vcpu, arch.fault.far_el2)); + DEFINE(VCPU_HPFAR_EL2, offsetof(struct kvm_vcpu, arch.fault.hpfar_el2)); + DEFINE(VCPU_HCR_EL2, offsetof(struct kvm_vcpu, arch.hcr_el2)); + DEFINE(VCPU_IRQ_LINES, offsetof(struct kvm_vcpu, arch.irq_lines)); + DEFINE(VCPU_HOST_CONTEXT, offsetof(struct kvm_vcpu, arch.host_cpu_context)); + DEFINE(VCPU_TIMER_CNTV_CTL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_ctl)); + DEFINE(VCPU_TIMER_CNTV_CVAL, offsetof(struct kvm_vcpu, arch.timer_cpu.cntv_cval)); + DEFINE(KVM_TIMER_CNTVOFF, offsetof(struct kvm, arch.timer.cntvoff)); + DEFINE(KVM_TIMER_ENABLED, offsetof(struct kvm, arch.timer.enabled)); + DEFINE(VCPU_KVM, offsetof(struct kvm_vcpu, kvm)); + DEFINE(VCPU_VGIC_CPU, offsetof(struct kvm_vcpu, arch.vgic_cpu)); + DEFINE(VGIC_CPU_HCR, offsetof(struct vgic_cpu, vgic_hcr)); + DEFINE(VGIC_CPU_VMCR, offsetof(struct vgic_cpu, vgic_vmcr)); + DEFINE(VGIC_CPU_MISR, offsetof(struct vgic_cpu, vgic_misr)); + DEFINE(VGIC_CPU_EISR, offsetof(struct vgic_cpu, vgic_eisr)); + DEFINE(VGIC_CPU_ELRSR, offsetof(struct vgic_cpu, vgic_elrsr)); + DEFINE(VGIC_CPU_APR, offsetof(struct vgic_cpu, vgic_apr)); + DEFINE(VGIC_CPU_LR, offsetof(struct vgic_cpu, vgic_lr)); + DEFINE(VGIC_CPU_NR_LR, offsetof(struct vgic_cpu, nr_lr)); + DEFINE(KVM_VTTBR, offsetof(struct kvm, arch.vttbr)); + DEFINE(KVM_VGIC_VCTRL, offsetof(struct kvm, arch.vgic.vctrl_base)); +#endif +#ifdef CONFIG_ARM64_CPU_SUSPEND + DEFINE(CPU_SUSPEND_SZ, sizeof(struct cpu_suspend_ctx)); + DEFINE(CPU_CTX_SP, offsetof(struct cpu_suspend_ctx, sp)); + DEFINE(MPIDR_HASH_MASK, offsetof(struct mpidr_hash, mask)); + DEFINE(MPIDR_HASH_SHIFTS, offsetof(struct mpidr_hash, shift_aff)); + DEFINE(SLEEP_SAVE_SP_SZ, sizeof(struct sleep_save_sp)); + DEFINE(SLEEP_SAVE_SP_PHYS, offsetof(struct sleep_save_sp, save_ptr_stash_phys)); + DEFINE(SLEEP_SAVE_SP_VIRT, offsetof(struct sleep_save_sp, save_ptr_stash)); +#endif return 0; } diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c new file mode 100644 index 00000000000..d62d12fb36c --- /dev/null +++ b/arch/arm64/kernel/cpu_ops.c @@ -0,0 +1,87 @@ +/* + * CPU kernel entry/exit control + * + * Copyright (C) 2013 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <asm/cpu_ops.h> +#include <asm/smp_plat.h> +#include <linux/errno.h> +#include <linux/of.h> +#include <linux/string.h> + +extern const struct cpu_operations smp_spin_table_ops; +extern const struct cpu_operations cpu_psci_ops; + +const struct cpu_operations *cpu_ops[NR_CPUS]; + +static const struct cpu_operations *supported_cpu_ops[] __initconst = { +#ifdef CONFIG_SMP + &smp_spin_table_ops, + &cpu_psci_ops, +#endif + NULL, +}; + +static const struct cpu_operations * __init cpu_get_ops(const char *name) +{ + const struct cpu_operations **ops = supported_cpu_ops; + + while (*ops) { + if (!strcmp(name, (*ops)->name)) + return *ops; + + ops++; + } + + return NULL; +} + +/* + * Read a cpu's enable method from the device tree and record it in cpu_ops. + */ +int __init cpu_read_ops(struct device_node *dn, int cpu) +{ + const char *enable_method = of_get_property(dn, "enable-method", NULL); + if (!enable_method) { + /* + * The boot CPU may not have an enable method (e.g. when + * spin-table is used for secondaries). Don't warn spuriously. + */ + if (cpu != 0) + pr_err("%s: missing enable-method property\n", + dn->full_name); + return -ENOENT; + } + + cpu_ops[cpu] = cpu_get_ops(enable_method); + if (!cpu_ops[cpu]) { + pr_warn("%s: unsupported enable-method property: %s\n", + dn->full_name, enable_method); + return -EOPNOTSUPP; + } + + return 0; +} + +void __init cpu_read_bootcpu_ops(void) +{ + struct device_node *dn = of_get_cpu_node(0, NULL); + if (!dn) { + pr_err("Failed to find device node for boot cpu\n"); + return; + } + cpu_read_ops(dn, 0); +} diff --git a/arch/arm64/kernel/cputable.c b/arch/arm64/kernel/cputable.c index 63cfc4a43f4..fd3993cb060 100644 --- a/arch/arm64/kernel/cputable.c +++ b/arch/arm64/kernel/cputable.c @@ -22,7 +22,7 @@ extern unsigned long __cpu_setup(void); -struct cpu_info __initdata cpu_table[] = { +struct cpu_info cpu_table[] = { { .cpu_id_val = 0x000f0000, .cpu_id_mask = 0x000f0000, diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 0c3ba9f5137..a7fb874b595 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -24,9 +24,9 @@ #include <linux/init.h> #include <linux/ptrace.h> #include <linux/stat.h> +#include <linux/uaccess.h> #include <asm/debug-monitors.h> -#include <asm/local.h> #include <asm/cputype.h> #include <asm/system_misc.h> @@ -88,8 +88,8 @@ early_param("nodebugmon", early_debug_disable); * Keep track of debug users on each core. * The ref counts are per-cpu so we use a local_t type. */ -static DEFINE_PER_CPU(local_t, mde_ref_count); -static DEFINE_PER_CPU(local_t, kde_ref_count); +static DEFINE_PER_CPU(int, mde_ref_count); +static DEFINE_PER_CPU(int, kde_ref_count); void enable_debug_monitors(enum debug_el el) { @@ -97,11 +97,11 @@ void enable_debug_monitors(enum debug_el el) WARN_ON(preemptible()); - if (local_inc_return(&__get_cpu_var(mde_ref_count)) == 1) + if (this_cpu_inc_return(mde_ref_count) == 1) enable = DBG_MDSCR_MDE; if (el == DBG_ACTIVE_EL1 && - local_inc_return(&__get_cpu_var(kde_ref_count)) == 1) + this_cpu_inc_return(kde_ref_count) == 1) enable |= DBG_MDSCR_KDE; if (enable && debug_enabled) { @@ -117,11 +117,11 @@ void disable_debug_monitors(enum debug_el el) WARN_ON(preemptible()); - if (local_dec_and_test(&__get_cpu_var(mde_ref_count))) + if (this_cpu_dec_return(mde_ref_count) == 0) disable = ~DBG_MDSCR_MDE; if (el == DBG_ACTIVE_EL1 && - local_dec_and_test(&__get_cpu_var(kde_ref_count))) + this_cpu_dec_return(kde_ref_count) == 0) disable &= ~DBG_MDSCR_KDE; if (disable) { @@ -136,13 +136,10 @@ void disable_debug_monitors(enum debug_el el) */ static void clear_os_lock(void *unused) { - asm volatile("msr mdscr_el1, %0" : : "r" (0)); - isb(); asm volatile("msr oslar_el1, %0" : : "r" (0)); - isb(); } -static int __cpuinit os_lock_notify(struct notifier_block *self, +static int os_lock_notify(struct notifier_block *self, unsigned long action, void *data) { int cpu = (unsigned long)data; @@ -151,18 +148,23 @@ static int __cpuinit os_lock_notify(struct notifier_block *self, return NOTIFY_OK; } -static struct notifier_block __cpuinitdata os_lock_nb = { +static struct notifier_block os_lock_nb = { .notifier_call = os_lock_notify, }; -static int __cpuinit debug_monitors_init(void) +static int debug_monitors_init(void) { + cpu_notifier_register_begin(); + /* Clear the OS lock. */ - smp_call_function(clear_os_lock, NULL, 1); - clear_os_lock(NULL); + on_each_cpu(clear_os_lock, NULL, 1); + isb(); + local_dbg_enable(); /* Register hotplug handler. */ - register_cpu_notifier(&os_lock_nb); + __register_cpu_notifier(&os_lock_nb); + + cpu_notifier_register_done(); return 0; } postcore_initcall(debug_monitors_init); @@ -189,6 +191,48 @@ static void clear_regs_spsr_ss(struct pt_regs *regs) regs->pstate = spsr; } +/* EL1 Single Step Handler hooks */ +static LIST_HEAD(step_hook); +static DEFINE_RWLOCK(step_hook_lock); + +void register_step_hook(struct step_hook *hook) +{ + write_lock(&step_hook_lock); + list_add(&hook->node, &step_hook); + write_unlock(&step_hook_lock); +} + +void unregister_step_hook(struct step_hook *hook) +{ + write_lock(&step_hook_lock); + list_del(&hook->node); + write_unlock(&step_hook_lock); +} + +/* + * Call registered single step handers + * There is no Syndrome info to check for determining the handler. + * So we call all the registered handlers, until the right handler is + * found which returns zero. + */ +static int call_step_hook(struct pt_regs *regs, unsigned int esr) +{ + struct step_hook *hook; + int retval = DBG_HOOK_ERROR; + + read_lock(&step_hook_lock); + + list_for_each_entry(hook, &step_hook, node) { + retval = hook->fn(regs, esr); + if (retval == DBG_HOOK_HANDLED) + break; + } + + read_unlock(&step_hook_lock); + + return retval; +} + static int single_step_handler(unsigned long addr, unsigned int esr, struct pt_regs *regs) { @@ -216,7 +260,9 @@ static int single_step_handler(unsigned long addr, unsigned int esr, */ user_rewind_single_step(current); } else { - /* TODO: route to KGDB */ + if (call_step_hook(regs, esr) == DBG_HOOK_HANDLED) + return 0; + pr_warning("Unexpected kernel single-step exception at EL1\n"); /* * Re-enable stepping since we know that we will be @@ -228,13 +274,117 @@ static int single_step_handler(unsigned long addr, unsigned int esr, return 0; } -static int __init single_step_init(void) +/* + * Breakpoint handler is re-entrant as another breakpoint can + * hit within breakpoint handler, especically in kprobes. + * Use reader/writer locks instead of plain spinlock. + */ +static LIST_HEAD(break_hook); +static DEFINE_RWLOCK(break_hook_lock); + +void register_break_hook(struct break_hook *hook) +{ + write_lock(&break_hook_lock); + list_add(&hook->node, &break_hook); + write_unlock(&break_hook_lock); +} + +void unregister_break_hook(struct break_hook *hook) +{ + write_lock(&break_hook_lock); + list_del(&hook->node); + write_unlock(&break_hook_lock); +} + +static int call_break_hook(struct pt_regs *regs, unsigned int esr) +{ + struct break_hook *hook; + int (*fn)(struct pt_regs *regs, unsigned int esr) = NULL; + + read_lock(&break_hook_lock); + list_for_each_entry(hook, &break_hook, node) + if ((esr & hook->esr_mask) == hook->esr_val) + fn = hook->fn; + read_unlock(&break_hook_lock); + + return fn ? fn(regs, esr) : DBG_HOOK_ERROR; +} + +static int brk_handler(unsigned long addr, unsigned int esr, + struct pt_regs *regs) +{ + siginfo_t info; + + if (call_break_hook(regs, esr) == DBG_HOOK_HANDLED) + return 0; + + if (!user_mode(regs)) + return -EFAULT; + + info = (siginfo_t) { + .si_signo = SIGTRAP, + .si_errno = 0, + .si_code = TRAP_BRKPT, + .si_addr = (void __user *)instruction_pointer(regs), + }; + + force_sig_info(SIGTRAP, &info, current); + return 0; +} + +int aarch32_break_handler(struct pt_regs *regs) +{ + siginfo_t info; + u32 arm_instr; + u16 thumb_instr; + bool bp = false; + void __user *pc = (void __user *)instruction_pointer(regs); + + if (!compat_user_mode(regs)) + return -EFAULT; + + if (compat_thumb_mode(regs)) { + /* get 16-bit Thumb instruction */ + get_user(thumb_instr, (u16 __user *)pc); + thumb_instr = le16_to_cpu(thumb_instr); + if (thumb_instr == AARCH32_BREAK_THUMB2_LO) { + /* get second half of 32-bit Thumb-2 instruction */ + get_user(thumb_instr, (u16 __user *)(pc + 2)); + thumb_instr = le16_to_cpu(thumb_instr); + bp = thumb_instr == AARCH32_BREAK_THUMB2_HI; + } else { + bp = thumb_instr == AARCH32_BREAK_THUMB; + } + } else { + /* 32-bit ARM instruction */ + get_user(arm_instr, (u32 __user *)pc); + arm_instr = le32_to_cpu(arm_instr); + bp = (arm_instr & ~0xf0000000) == AARCH32_BREAK_ARM; + } + + if (!bp) + return -EFAULT; + + info = (siginfo_t) { + .si_signo = SIGTRAP, + .si_errno = 0, + .si_code = TRAP_BRKPT, + .si_addr = pc, + }; + + force_sig_info(SIGTRAP, &info, current); + return 0; +} + +static int __init debug_traps_init(void) { hook_debug_fault_code(DBG_ESR_EVT_HWSS, single_step_handler, SIGTRAP, TRAP_HWBKPT, "single-step handler"); + hook_debug_fault_code(DBG_ESR_EVT_BRK, brk_handler, SIGTRAP, + TRAP_BRKPT, "ptrace BRK handler"); return 0; } -arch_initcall(single_step_init); +arch_initcall(debug_traps_init); /* Re-enable single step for syscall restarting. */ void user_rewind_single_step(struct task_struct *task) diff --git a/arch/arm64/kernel/efi-entry.S b/arch/arm64/kernel/efi-entry.S new file mode 100644 index 00000000000..619b1dd7bcd --- /dev/null +++ b/arch/arm64/kernel/efi-entry.S @@ -0,0 +1,108 @@ +/* + * EFI entry point. + * + * Copyright (C) 2013, 2014 Red Hat, Inc. + * Author: Mark Salter <msalter@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +#include <linux/linkage.h> +#include <linux/init.h> + +#include <asm/assembler.h> + +#define EFI_LOAD_ERROR 0x8000000000000001 + + __INIT + + /* + * We arrive here from the EFI boot manager with: + * + * * CPU in little-endian mode + * * MMU on with identity-mapped RAM + * * Icache and Dcache on + * + * We will most likely be running from some place other than where + * we want to be. The kernel image wants to be placed at TEXT_OFFSET + * from start of RAM. + */ +ENTRY(efi_stub_entry) + /* + * Create a stack frame to save FP/LR with extra space + * for image_addr variable passed to efi_entry(). + */ + stp x29, x30, [sp, #-32]! + + /* + * Call efi_entry to do the real work. + * x0 and x1 are already set up by firmware. Current runtime + * address of image is calculated and passed via *image_addr. + * + * unsigned long efi_entry(void *handle, + * efi_system_table_t *sys_table, + * unsigned long *image_addr) ; + */ + adrp x8, _text + add x8, x8, #:lo12:_text + add x2, sp, 16 + str x8, [x2] + bl efi_entry + cmn x0, #1 + b.eq efi_load_fail + + /* + * efi_entry() will have relocated the kernel image if necessary + * and we return here with device tree address in x0 and the kernel + * entry point stored at *image_addr. Save those values in registers + * which are callee preserved. + */ + mov x20, x0 // DTB address + ldr x0, [sp, #16] // relocated _text address + mov x21, x0 + + /* + * Flush dcache covering current runtime addresses + * of kernel text/data. Then flush all of icache. + */ + adrp x1, _text + add x1, x1, #:lo12:_text + adrp x2, _edata + add x2, x2, #:lo12:_edata + sub x1, x2, x1 + + bl __flush_dcache_area + ic ialluis + + /* Turn off Dcache and MMU */ + mrs x0, CurrentEL + cmp x0, #CurrentEL_EL2 + b.ne 1f + mrs x0, sctlr_el2 + bic x0, x0, #1 << 0 // clear SCTLR.M + bic x0, x0, #1 << 2 // clear SCTLR.C + msr sctlr_el2, x0 + isb + b 2f +1: + mrs x0, sctlr_el1 + bic x0, x0, #1 << 0 // clear SCTLR.M + bic x0, x0, #1 << 2 // clear SCTLR.C + msr sctlr_el1, x0 + isb +2: + /* Jump to kernel entry point */ + mov x0, x20 + mov x1, xzr + mov x2, xzr + mov x3, xzr + br x21 + +efi_load_fail: + mov x0, #EFI_LOAD_ERROR + ldp x29, x30, [sp], #32 + ret + +ENDPROC(efi_stub_entry) diff --git a/arch/arm64/kernel/efi-stub.c b/arch/arm64/kernel/efi-stub.c new file mode 100644 index 00000000000..e786e6cdc40 --- /dev/null +++ b/arch/arm64/kernel/efi-stub.c @@ -0,0 +1,79 @@ +/* + * Copyright (C) 2013, 2014 Linaro Ltd; <roy.franz@linaro.org> + * + * This file implements the EFI boot stub for the arm64 kernel. + * Adapted from ARM version by Mark Salter <msalter@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ +#include <linux/efi.h> +#include <linux/libfdt.h> +#include <asm/sections.h> + +/* + * AArch64 requires the DTB to be 8-byte aligned in the first 512MiB from + * start of kernel and may not cross a 2MiB boundary. We set alignment to + * 2MiB so we know it won't cross a 2MiB boundary. + */ +#define EFI_FDT_ALIGN SZ_2M /* used by allocate_new_fdt_and_exit_boot() */ +#define MAX_FDT_OFFSET SZ_512M + +#define efi_call_early(f, ...) sys_table_arg->boottime->f(__VA_ARGS__) + +static void efi_char16_printk(efi_system_table_t *sys_table_arg, + efi_char16_t *str); + +static efi_status_t efi_open_volume(efi_system_table_t *sys_table, + void *__image, void **__fh); +static efi_status_t efi_file_close(void *handle); + +static efi_status_t +efi_file_read(void *handle, unsigned long *size, void *addr); + +static efi_status_t +efi_file_size(efi_system_table_t *sys_table, void *__fh, + efi_char16_t *filename_16, void **handle, u64 *file_sz); + +/* Include shared EFI stub code */ +#include "../../../drivers/firmware/efi/efi-stub-helper.c" +#include "../../../drivers/firmware/efi/fdt.c" +#include "../../../drivers/firmware/efi/arm-stub.c" + + +static efi_status_t handle_kernel_image(efi_system_table_t *sys_table, + unsigned long *image_addr, + unsigned long *image_size, + unsigned long *reserve_addr, + unsigned long *reserve_size, + unsigned long dram_base, + efi_loaded_image_t *image) +{ + efi_status_t status; + unsigned long kernel_size, kernel_memsize = 0; + + /* Relocate the image, if required. */ + kernel_size = _edata - _text; + if (*image_addr != (dram_base + TEXT_OFFSET)) { + kernel_memsize = kernel_size + (_end - _edata); + status = efi_relocate_kernel(sys_table, image_addr, + kernel_size, kernel_memsize, + dram_base + TEXT_OFFSET, + PAGE_SIZE); + if (status != EFI_SUCCESS) { + pr_efi_err(sys_table, "Failed to relocate kernel\n"); + return status; + } + if (*image_addr != (dram_base + TEXT_OFFSET)) { + pr_efi_err(sys_table, "Failed to alloc kernel memory\n"); + efi_free(sys_table, kernel_memsize, *image_addr); + return EFI_ERROR; + } + *image_size = kernel_memsize; + } + + + return EFI_SUCCESS; +} diff --git a/arch/arm64/kernel/efi.c b/arch/arm64/kernel/efi.c new file mode 100644 index 00000000000..14db1f6e8d7 --- /dev/null +++ b/arch/arm64/kernel/efi.c @@ -0,0 +1,469 @@ +/* + * Extensible Firmware Interface + * + * Based on Extensible Firmware Interface Specification version 2.4 + * + * Copyright (C) 2013, 2014 Linaro Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <linux/efi.h> +#include <linux/export.h> +#include <linux/memblock.h> +#include <linux/bootmem.h> +#include <linux/of.h> +#include <linux/of_fdt.h> +#include <linux/sched.h> +#include <linux/slab.h> + +#include <asm/cacheflush.h> +#include <asm/efi.h> +#include <asm/tlbflush.h> +#include <asm/mmu_context.h> + +struct efi_memory_map memmap; + +static efi_runtime_services_t *runtime; + +static u64 efi_system_table; + +static int uefi_debug __initdata; +static int __init uefi_debug_setup(char *str) +{ + uefi_debug = 1; + + return 0; +} +early_param("uefi_debug", uefi_debug_setup); + +static int __init is_normal_ram(efi_memory_desc_t *md) +{ + if (md->attribute & EFI_MEMORY_WB) + return 1; + return 0; +} + +static void __init efi_setup_idmap(void) +{ + struct memblock_region *r; + efi_memory_desc_t *md; + u64 paddr, npages, size; + + for_each_memblock(memory, r) + create_id_mapping(r->base, r->size, 0); + + /* map runtime io spaces */ + for_each_efi_memory_desc(&memmap, md) { + if (!(md->attribute & EFI_MEMORY_RUNTIME) || is_normal_ram(md)) + continue; + paddr = md->phys_addr; + npages = md->num_pages; + memrange_efi_to_native(&paddr, &npages); + size = npages << PAGE_SHIFT; + create_id_mapping(paddr, size, 1); + } +} + +static int __init uefi_init(void) +{ + efi_char16_t *c16; + char vendor[100] = "unknown"; + int i, retval; + + efi.systab = early_memremap(efi_system_table, + sizeof(efi_system_table_t)); + if (efi.systab == NULL) { + pr_warn("Unable to map EFI system table.\n"); + return -ENOMEM; + } + + set_bit(EFI_BOOT, &efi.flags); + set_bit(EFI_64BIT, &efi.flags); + + /* + * Verify the EFI Table + */ + if (efi.systab->hdr.signature != EFI_SYSTEM_TABLE_SIGNATURE) { + pr_err("System table signature incorrect\n"); + return -EINVAL; + } + if ((efi.systab->hdr.revision >> 16) < 2) + pr_warn("Warning: EFI system table version %d.%02d, expected 2.00 or greater\n", + efi.systab->hdr.revision >> 16, + efi.systab->hdr.revision & 0xffff); + + /* Show what we know for posterity */ + c16 = early_memremap(efi.systab->fw_vendor, + sizeof(vendor)); + if (c16) { + for (i = 0; i < (int) sizeof(vendor) - 1 && *c16; ++i) + vendor[i] = c16[i]; + vendor[i] = '\0'; + } + + pr_info("EFI v%u.%.02u by %s\n", + efi.systab->hdr.revision >> 16, + efi.systab->hdr.revision & 0xffff, vendor); + + retval = efi_config_init(NULL); + if (retval == 0) + set_bit(EFI_CONFIG_TABLES, &efi.flags); + + early_memunmap(c16, sizeof(vendor)); + early_memunmap(efi.systab, sizeof(efi_system_table_t)); + + return retval; +} + +static __initdata char memory_type_name[][32] = { + {"Reserved"}, + {"Loader Code"}, + {"Loader Data"}, + {"Boot Code"}, + {"Boot Data"}, + {"Runtime Code"}, + {"Runtime Data"}, + {"Conventional Memory"}, + {"Unusable Memory"}, + {"ACPI Reclaim Memory"}, + {"ACPI Memory NVS"}, + {"Memory Mapped I/O"}, + {"MMIO Port Space"}, + {"PAL Code"}, +}; + +/* + * Return true for RAM regions we want to permanently reserve. + */ +static __init int is_reserve_region(efi_memory_desc_t *md) +{ + if (!is_normal_ram(md)) + return 0; + + if (md->attribute & EFI_MEMORY_RUNTIME) + return 1; + + if (md->type == EFI_ACPI_RECLAIM_MEMORY || + md->type == EFI_RESERVED_TYPE) + return 1; + + return 0; +} + +static __init void reserve_regions(void) +{ + efi_memory_desc_t *md; + u64 paddr, npages, size; + + if (uefi_debug) + pr_info("Processing EFI memory map:\n"); + + for_each_efi_memory_desc(&memmap, md) { + paddr = md->phys_addr; + npages = md->num_pages; + + if (uefi_debug) + pr_info(" 0x%012llx-0x%012llx [%s]", + paddr, paddr + (npages << EFI_PAGE_SHIFT) - 1, + memory_type_name[md->type]); + + memrange_efi_to_native(&paddr, &npages); + size = npages << PAGE_SHIFT; + + if (is_normal_ram(md)) + early_init_dt_add_memory_arch(paddr, size); + + if (is_reserve_region(md) || + md->type == EFI_BOOT_SERVICES_CODE || + md->type == EFI_BOOT_SERVICES_DATA) { + memblock_reserve(paddr, size); + if (uefi_debug) + pr_cont("*"); + } + + if (uefi_debug) + pr_cont("\n"); + } +} + + +static u64 __init free_one_region(u64 start, u64 end) +{ + u64 size = end - start; + + if (uefi_debug) + pr_info(" EFI freeing: 0x%012llx-0x%012llx\n", start, end - 1); + + free_bootmem_late(start, size); + return size; +} + +static u64 __init free_region(u64 start, u64 end) +{ + u64 map_start, map_end, total = 0; + + if (end <= start) + return total; + + map_start = (u64)memmap.phys_map; + map_end = PAGE_ALIGN(map_start + (memmap.map_end - memmap.map)); + map_start &= PAGE_MASK; + + if (start < map_end && end > map_start) { + /* region overlaps UEFI memmap */ + if (start < map_start) + total += free_one_region(start, map_start); + + if (map_end < end) + total += free_one_region(map_end, end); + } else + total += free_one_region(start, end); + + return total; +} + +static void __init free_boot_services(void) +{ + u64 total_freed = 0; + u64 keep_end, free_start, free_end; + efi_memory_desc_t *md; + + /* + * If kernel uses larger pages than UEFI, we have to be careful + * not to inadvertantly free memory we want to keep if there is + * overlap at the kernel page size alignment. We do not want to + * free is_reserve_region() memory nor the UEFI memmap itself. + * + * The memory map is sorted, so we keep track of the end of + * any previous region we want to keep, remember any region + * we want to free and defer freeing it until we encounter + * the next region we want to keep. This way, before freeing + * it, we can clip it as needed to avoid freeing memory we + * want to keep for UEFI. + */ + + keep_end = 0; + free_start = 0; + + for_each_efi_memory_desc(&memmap, md) { + u64 paddr, npages, size; + + if (is_reserve_region(md)) { + /* + * We don't want to free any memory from this region. + */ + if (free_start) { + /* adjust free_end then free region */ + if (free_end > md->phys_addr) + free_end -= PAGE_SIZE; + total_freed += free_region(free_start, free_end); + free_start = 0; + } + keep_end = md->phys_addr + (md->num_pages << EFI_PAGE_SHIFT); + continue; + } + + if (md->type != EFI_BOOT_SERVICES_CODE && + md->type != EFI_BOOT_SERVICES_DATA) { + /* no need to free this region */ + continue; + } + + /* + * We want to free memory from this region. + */ + paddr = md->phys_addr; + npages = md->num_pages; + memrange_efi_to_native(&paddr, &npages); + size = npages << PAGE_SHIFT; + + if (free_start) { + if (paddr <= free_end) + free_end = paddr + size; + else { + total_freed += free_region(free_start, free_end); + free_start = paddr; + free_end = paddr + size; + } + } else { + free_start = paddr; + free_end = paddr + size; + } + if (free_start < keep_end) { + free_start += PAGE_SIZE; + if (free_start >= free_end) + free_start = 0; + } + } + if (free_start) + total_freed += free_region(free_start, free_end); + + if (total_freed) + pr_info("Freed 0x%llx bytes of EFI boot services memory", + total_freed); +} + +void __init efi_init(void) +{ + struct efi_fdt_params params; + + /* Grab UEFI information placed in FDT by stub */ + if (!efi_get_fdt_params(¶ms, uefi_debug)) + return; + + efi_system_table = params.system_table; + + memblock_reserve(params.mmap & PAGE_MASK, + PAGE_ALIGN(params.mmap_size + (params.mmap & ~PAGE_MASK))); + memmap.phys_map = (void *)params.mmap; + memmap.map = early_memremap(params.mmap, params.mmap_size); + memmap.map_end = memmap.map + params.mmap_size; + memmap.desc_size = params.desc_size; + memmap.desc_version = params.desc_ver; + + if (uefi_init() < 0) + return; + + reserve_regions(); +} + +void __init efi_idmap_init(void) +{ + if (!efi_enabled(EFI_BOOT)) + return; + + /* boot time idmap_pg_dir is incomplete, so fill in missing parts */ + efi_setup_idmap(); +} + +static int __init remap_region(efi_memory_desc_t *md, void **new) +{ + u64 paddr, vaddr, npages, size; + + paddr = md->phys_addr; + npages = md->num_pages; + memrange_efi_to_native(&paddr, &npages); + size = npages << PAGE_SHIFT; + + if (is_normal_ram(md)) + vaddr = (__force u64)ioremap_cache(paddr, size); + else + vaddr = (__force u64)ioremap(paddr, size); + + if (!vaddr) { + pr_err("Unable to remap 0x%llx pages @ %p\n", + npages, (void *)paddr); + return 0; + } + + /* adjust for any rounding when EFI and system pagesize differs */ + md->virt_addr = vaddr + (md->phys_addr - paddr); + + if (uefi_debug) + pr_info(" EFI remap 0x%012llx => %p\n", + md->phys_addr, (void *)md->virt_addr); + + memcpy(*new, md, memmap.desc_size); + *new += memmap.desc_size; + + return 1; +} + +/* + * Switch UEFI from an identity map to a kernel virtual map + */ +static int __init arm64_enter_virtual_mode(void) +{ + efi_memory_desc_t *md; + phys_addr_t virtmap_phys; + void *virtmap, *virt_md; + efi_status_t status; + u64 mapsize; + int count = 0; + unsigned long flags; + + if (!efi_enabled(EFI_BOOT)) { + pr_info("EFI services will not be available.\n"); + return -1; + } + + pr_info("Remapping and enabling EFI services.\n"); + + /* replace early memmap mapping with permanent mapping */ + mapsize = memmap.map_end - memmap.map; + early_memunmap(memmap.map, mapsize); + memmap.map = (__force void *)ioremap_cache((phys_addr_t)memmap.phys_map, + mapsize); + memmap.map_end = memmap.map + mapsize; + + efi.memmap = &memmap; + + /* Map the runtime regions */ + virtmap = kmalloc(mapsize, GFP_KERNEL); + if (!virtmap) { + pr_err("Failed to allocate EFI virtual memmap\n"); + return -1; + } + virtmap_phys = virt_to_phys(virtmap); + virt_md = virtmap; + + for_each_efi_memory_desc(&memmap, md) { + if (!(md->attribute & EFI_MEMORY_RUNTIME)) + continue; + if (remap_region(md, &virt_md)) + ++count; + } + + efi.systab = (__force void *)efi_lookup_mapped_addr(efi_system_table); + if (efi.systab) + set_bit(EFI_SYSTEM_TABLES, &efi.flags); + + local_irq_save(flags); + cpu_switch_mm(idmap_pg_dir, &init_mm); + + /* Call SetVirtualAddressMap with the physical address of the map */ + runtime = efi.systab->runtime; + efi.set_virtual_address_map = runtime->set_virtual_address_map; + + status = efi.set_virtual_address_map(count * memmap.desc_size, + memmap.desc_size, + memmap.desc_version, + (efi_memory_desc_t *)virtmap_phys); + cpu_set_reserved_ttbr0(); + flush_tlb_all(); + local_irq_restore(flags); + + kfree(virtmap); + + free_boot_services(); + + if (status != EFI_SUCCESS) { + pr_err("Failed to set EFI virtual address map! [%lx]\n", + status); + return -1; + } + + /* Set up runtime services function pointers */ + runtime = efi.systab->runtime; + efi.get_time = runtime->get_time; + efi.set_time = runtime->set_time; + efi.get_wakeup_time = runtime->get_wakeup_time; + efi.set_wakeup_time = runtime->set_wakeup_time; + efi.get_variable = runtime->get_variable; + efi.get_next_variable = runtime->get_next_variable; + efi.set_variable = runtime->set_variable; + efi.query_variable_info = runtime->query_variable_info; + efi.update_capsule = runtime->update_capsule; + efi.query_capsule_caps = runtime->query_capsule_caps; + efi.get_next_high_mono_count = runtime->get_next_high_mono_count; + efi.reset_system = runtime->reset_system; + + set_bit(EFI_RUNTIME_SERVICES, &efi.flags); + + return 0; +} +early_initcall(arm64_enter_virtual_mode); diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S index 6a27cd6dbfa..d358ccacfc0 100644 --- a/arch/arm64/kernel/entry-fpsimd.S +++ b/arch/arm64/kernel/entry-fpsimd.S @@ -41,3 +41,27 @@ ENTRY(fpsimd_load_state) fpsimd_restore x0, 8 ret ENDPROC(fpsimd_load_state) + +#ifdef CONFIG_KERNEL_MODE_NEON + +/* + * Save the bottom n FP registers. + * + * x0 - pointer to struct fpsimd_partial_state + */ +ENTRY(fpsimd_save_partial_state) + fpsimd_save_partial x0, 1, 8, 9 + ret +ENDPROC(fpsimd_load_partial_state) + +/* + * Load the bottom n FP registers. + * + * x0 - pointer to struct fpsimd_partial_state + */ +ENTRY(fpsimd_load_partial_state) + fpsimd_restore_partial x0, 8, 9 + ret +ENDPROC(fpsimd_load_partial_state) + +#endif diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S new file mode 100644 index 00000000000..aa5f9fcbf9e --- /dev/null +++ b/arch/arm64/kernel/entry-ftrace.S @@ -0,0 +1,218 @@ +/* + * arch/arm64/kernel/entry-ftrace.S + * + * Copyright (C) 2013 Linaro Limited + * Author: AKASHI Takahiro <takahiro.akashi@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/linkage.h> +#include <asm/ftrace.h> +#include <asm/insn.h> + +/* + * Gcc with -pg will put the following code in the beginning of each function: + * mov x0, x30 + * bl _mcount + * [function's body ...] + * "bl _mcount" may be replaced to "bl ftrace_caller" or NOP if dynamic + * ftrace is enabled. + * + * Please note that x0 as an argument will not be used here because we can + * get lr(x30) of instrumented function at any time by winding up call stack + * as long as the kernel is compiled without -fomit-frame-pointer. + * (or CONFIG_FRAME_POINTER, this is forced on arm64) + * + * stack layout after mcount_enter in _mcount(): + * + * current sp/fp => 0:+-----+ + * in _mcount() | x29 | -> instrumented function's fp + * +-----+ + * | x30 | -> _mcount()'s lr (= instrumented function's pc) + * old sp => +16:+-----+ + * when instrumented | | + * function calls | ... | + * _mcount() | | + * | | + * instrumented => +xx:+-----+ + * function's fp | x29 | -> parent's fp + * +-----+ + * | x30 | -> instrumented function's lr (= parent's pc) + * +-----+ + * | ... | + */ + + .macro mcount_enter + stp x29, x30, [sp, #-16]! + mov x29, sp + .endm + + .macro mcount_exit + ldp x29, x30, [sp], #16 + ret + .endm + + .macro mcount_adjust_addr rd, rn + sub \rd, \rn, #AARCH64_INSN_SIZE + .endm + + /* for instrumented function's parent */ + .macro mcount_get_parent_fp reg + ldr \reg, [x29] + ldr \reg, [\reg] + .endm + + /* for instrumented function */ + .macro mcount_get_pc0 reg + mcount_adjust_addr \reg, x30 + .endm + + .macro mcount_get_pc reg + ldr \reg, [x29, #8] + mcount_adjust_addr \reg, \reg + .endm + + .macro mcount_get_lr reg + ldr \reg, [x29] + ldr \reg, [\reg, #8] + mcount_adjust_addr \reg, \reg + .endm + + .macro mcount_get_lr_addr reg + ldr \reg, [x29] + add \reg, \reg, #8 + .endm + +#ifndef CONFIG_DYNAMIC_FTRACE +/* + * void _mcount(unsigned long return_address) + * @return_address: return address to instrumented function + * + * This function makes calls, if enabled, to: + * - tracer function to probe instrumented function's entry, + * - ftrace_graph_caller to set up an exit hook + */ +ENTRY(_mcount) +#ifdef CONFIG_HAVE_FUNCTION_TRACE_MCOUNT_TEST + ldr x0, =ftrace_trace_stop + ldr x0, [x0] // if ftrace_trace_stop + ret // return; +#endif + mcount_enter + + ldr x0, =ftrace_trace_function + ldr x2, [x0] + adr x0, ftrace_stub + cmp x0, x2 // if (ftrace_trace_function + b.eq skip_ftrace_call // != ftrace_stub) { + + mcount_get_pc x0 // function's pc + mcount_get_lr x1 // function's lr (= parent's pc) + blr x2 // (*ftrace_trace_function)(pc, lr); + +#ifndef CONFIG_FUNCTION_GRAPH_TRACER +skip_ftrace_call: // return; + mcount_exit // } +#else + mcount_exit // return; + // } +skip_ftrace_call: + ldr x1, =ftrace_graph_return + ldr x2, [x1] // if ((ftrace_graph_return + cmp x0, x2 // != ftrace_stub) + b.ne ftrace_graph_caller + + ldr x1, =ftrace_graph_entry // || (ftrace_graph_entry + ldr x2, [x1] // != ftrace_graph_entry_stub)) + ldr x0, =ftrace_graph_entry_stub + cmp x0, x2 + b.ne ftrace_graph_caller // ftrace_graph_caller(); + + mcount_exit +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ +ENDPROC(_mcount) + +#else /* CONFIG_DYNAMIC_FTRACE */ +/* + * _mcount() is used to build the kernel with -pg option, but all the branch + * instructions to _mcount() are replaced to NOP initially at kernel start up, + * and later on, NOP to branch to ftrace_caller() when enabled or branch to + * NOP when disabled per-function base. + */ +ENTRY(_mcount) + ret +ENDPROC(_mcount) + +/* + * void ftrace_caller(unsigned long return_address) + * @return_address: return address to instrumented function + * + * This function is a counterpart of _mcount() in 'static' ftrace, and + * makes calls to: + * - tracer function to probe instrumented function's entry, + * - ftrace_graph_caller to set up an exit hook + */ +ENTRY(ftrace_caller) + mcount_enter + + mcount_get_pc0 x0 // function's pc + mcount_get_lr x1 // function's lr + + .global ftrace_call +ftrace_call: // tracer(pc, lr); + nop // This will be replaced with "bl xxx" + // where xxx can be any kind of tracer. + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER + .global ftrace_graph_call +ftrace_graph_call: // ftrace_graph_caller(); + nop // If enabled, this will be replaced + // "b ftrace_graph_caller" +#endif + + mcount_exit +ENDPROC(ftrace_caller) +#endif /* CONFIG_DYNAMIC_FTRACE */ + +ENTRY(ftrace_stub) + ret +ENDPROC(ftrace_stub) + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +/* + * void ftrace_graph_caller(void) + * + * Called from _mcount() or ftrace_caller() when function_graph tracer is + * selected. + * This function w/ prepare_ftrace_return() fakes link register's value on + * the call stack in order to intercept instrumented function's return path + * and run return_to_handler() later on its exit. + */ +ENTRY(ftrace_graph_caller) + mcount_get_lr_addr x0 // pointer to function's saved lr + mcount_get_pc x1 // function's pc + mcount_get_parent_fp x2 // parent's fp + bl prepare_ftrace_return // prepare_ftrace_return(&lr, pc, fp) + + mcount_exit +ENDPROC(ftrace_graph_caller) + +/* + * void return_to_handler(void) + * + * Run ftrace_return_to_handler() before going back to parent. + * @fp is checked against the value passed by ftrace_graph_caller() + * only when CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST is enabled. + */ +ENTRY(return_to_handler) + str x0, [sp, #-16]! + mov x0, x29 // parent's fp + bl ftrace_return_to_handler// addr = ftrace_return_to_hander(fp); + mov x30, x0 // restore the original return address + ldr x0, [sp], #16 + ret +END(return_to_handler) +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 9c94f404ded..9ce04ba6bcb 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -24,6 +24,7 @@ #include <asm/assembler.h> #include <asm/asm-offsets.h> #include <asm/errno.h> +#include <asm/esr.h> #include <asm/thread_info.h> #include <asm/unistd.h> #include <asm/unistd32.h> @@ -59,6 +60,9 @@ push x0, x1 .if \el == 0 mrs x21, sp_el0 + get_thread_info tsk // Ensure MDSCR_EL1.SS is clear, + ldr x19, [tsk, #TI_FLAGS] // since we can unmask debug + disable_step_tsk x19, x20 // exceptions when scheduling. .else add x21, sp, #S_FRAME_SIZE .endif @@ -120,7 +124,7 @@ .macro get_thread_info, rd mov \rd, sp - and \rd, \rd, #~((1 << 13) - 1) // top of 8K stack + and \rd, \rd, #~(THREAD_SIZE - 1) // top of stack .endm /* @@ -239,18 +243,18 @@ ENDPROC(el1_error_invalid) el1_sync: kernel_entry 1 mrs x1, esr_el1 // read the syndrome register - lsr x24, x1, #26 // exception class - cmp x24, #0x25 // data abort in EL1 + lsr x24, x1, #ESR_EL1_EC_SHIFT // exception class + cmp x24, #ESR_EL1_EC_DABT_EL1 // data abort in EL1 b.eq el1_da - cmp x24, #0x18 // configurable trap + cmp x24, #ESR_EL1_EC_SYS64 // configurable trap b.eq el1_undef - cmp x24, #0x26 // stack alignment exception + cmp x24, #ESR_EL1_EC_SP_ALIGN // stack alignment exception b.eq el1_sp_pc - cmp x24, #0x22 // pc alignment exception + cmp x24, #ESR_EL1_EC_PC_ALIGN // pc alignment exception b.eq el1_sp_pc - cmp x24, #0x00 // unknown exception in EL1 + cmp x24, #ESR_EL1_EC_UNKNOWN // unknown exception in EL1 b.eq el1_undef - cmp x24, #0x30 // debug exception in EL1 + cmp x24, #ESR_EL1_EC_BREAKPT_EL1 // debug exception in EL1 b.ge el1_dbg b el1_inv el1_da: @@ -258,7 +262,7 @@ el1_da: * Data abort handling */ mrs x0, far_el1 - enable_dbg_if_not_stepping x2 + enable_dbg // re-enable interrupts if they were enabled in the aborted context tbnz x23, #7, 1f // PSR_I_BIT enable_irq @@ -274,27 +278,31 @@ el1_sp_pc: * Stack or PC alignment exception handling */ mrs x0, far_el1 - mov x1, x25 + enable_dbg mov x2, sp b do_sp_pc_abort el1_undef: /* * Undefined instruction */ + enable_dbg mov x0, sp b do_undefinstr el1_dbg: /* * Debug exception handling */ + cmp x24, #ESR_EL1_EC_BRK64 // if BRK64 + cinc x24, x24, eq // set bit '0' tbz x24, #0, el1_inv // EL1 only mrs x0, far_el1 mov x2, sp // struct pt_regs bl do_debug_exception - + enable_dbg kernel_exit 1 el1_inv: // TODO: add support for undefined instructions in kernel mode + enable_dbg mov x0, sp mov x1, #BAD_SYNC mrs x2, esr_el1 @@ -304,20 +312,17 @@ ENDPROC(el1_sync) .align 6 el1_irq: kernel_entry 1 - enable_dbg_if_not_stepping x0 + enable_dbg #ifdef CONFIG_TRACE_IRQFLAGS bl trace_hardirqs_off #endif -#ifdef CONFIG_PREEMPT - get_thread_info tsk - ldr x24, [tsk, #TI_PREEMPT] // get preempt count - add x0, x24, #1 // increment it - str x0, [tsk, #TI_PREEMPT] -#endif + irq_handler + #ifdef CONFIG_PREEMPT - str x24, [tsk, #TI_PREEMPT] // restore preempt count - cbnz x24, 1f // preempt count != 0 + get_thread_info tsk + ldr w24, [tsk, #TI_PREEMPT] // get preempt count + cbnz w24, 1f // preempt count != 0 ldr x0, [tsk, #TI_FLAGS] // get flags tbz x0, #TIF_NEED_RESCHED, 1f // needs rescheduling? bl el1_preempt @@ -332,8 +337,7 @@ ENDPROC(el1_irq) #ifdef CONFIG_PREEMPT el1_preempt: mov x24, lr -1: enable_dbg - bl preempt_schedule_irq // irq en/disable is done inside +1: bl preempt_schedule_irq // irq en/disable is done inside ldr x0, [tsk, #TI_FLAGS] // get new tasks TI_FLAGS tbnz x0, #TIF_NEED_RESCHED, 1b // needs rescheduling? ret x24 @@ -346,27 +350,27 @@ el1_preempt: el0_sync: kernel_entry 0 mrs x25, esr_el1 // read the syndrome register - lsr x24, x25, #26 // exception class - cmp x24, #0x15 // SVC in 64-bit state + lsr x24, x25, #ESR_EL1_EC_SHIFT // exception class + cmp x24, #ESR_EL1_EC_SVC64 // SVC in 64-bit state b.eq el0_svc - adr lr, ret_from_exception - cmp x24, #0x24 // data abort in EL0 + adr lr, ret_to_user + cmp x24, #ESR_EL1_EC_DABT_EL0 // data abort in EL0 b.eq el0_da - cmp x24, #0x20 // instruction abort in EL0 + cmp x24, #ESR_EL1_EC_IABT_EL0 // instruction abort in EL0 b.eq el0_ia - cmp x24, #0x07 // FP/ASIMD access + cmp x24, #ESR_EL1_EC_FP_ASIMD // FP/ASIMD access b.eq el0_fpsimd_acc - cmp x24, #0x2c // FP/ASIMD exception + cmp x24, #ESR_EL1_EC_FP_EXC64 // FP/ASIMD exception b.eq el0_fpsimd_exc - cmp x24, #0x18 // configurable trap + cmp x24, #ESR_EL1_EC_SYS64 // configurable trap b.eq el0_undef - cmp x24, #0x26 // stack alignment exception + cmp x24, #ESR_EL1_EC_SP_ALIGN // stack alignment exception b.eq el0_sp_pc - cmp x24, #0x22 // pc alignment exception + cmp x24, #ESR_EL1_EC_PC_ALIGN // pc alignment exception b.eq el0_sp_pc - cmp x24, #0x00 // unknown exception in EL0 + cmp x24, #ESR_EL1_EC_UNKNOWN // unknown exception in EL0 b.eq el0_undef - cmp x24, #0x30 // debug exception in EL0 + cmp x24, #ESR_EL1_EC_BREAKPT_EL0 // debug exception in EL0 b.ge el0_dbg b el0_inv @@ -375,21 +379,31 @@ el0_sync: el0_sync_compat: kernel_entry 0, 32 mrs x25, esr_el1 // read the syndrome register - lsr x24, x25, #26 // exception class - cmp x24, #0x11 // SVC in 32-bit state + lsr x24, x25, #ESR_EL1_EC_SHIFT // exception class + cmp x24, #ESR_EL1_EC_SVC32 // SVC in 32-bit state b.eq el0_svc_compat - adr lr, ret_from_exception - cmp x24, #0x24 // data abort in EL0 + adr lr, ret_to_user + cmp x24, #ESR_EL1_EC_DABT_EL0 // data abort in EL0 b.eq el0_da - cmp x24, #0x20 // instruction abort in EL0 + cmp x24, #ESR_EL1_EC_IABT_EL0 // instruction abort in EL0 b.eq el0_ia - cmp x24, #0x07 // FP/ASIMD access + cmp x24, #ESR_EL1_EC_FP_ASIMD // FP/ASIMD access b.eq el0_fpsimd_acc - cmp x24, #0x28 // FP/ASIMD exception + cmp x24, #ESR_EL1_EC_FP_EXC32 // FP/ASIMD exception b.eq el0_fpsimd_exc - cmp x24, #0x00 // unknown exception in EL0 + cmp x24, #ESR_EL1_EC_UNKNOWN // unknown exception in EL0 b.eq el0_undef - cmp x24, #0x30 // debug exception in EL0 + cmp x24, #ESR_EL1_EC_CP15_32 // CP15 MRC/MCR trap + b.eq el0_undef + cmp x24, #ESR_EL1_EC_CP15_64 // CP15 MRRC/MCRR trap + b.eq el0_undef + cmp x24, #ESR_EL1_EC_CP14_MR // CP14 MRC/MCR trap + b.eq el0_undef + cmp x24, #ESR_EL1_EC_CP14_LS // CP14 LDC/STC trap + b.eq el0_undef + cmp x24, #ESR_EL1_EC_CP14_64 // CP14 MRRC/MCRR trap + b.eq el0_undef + cmp x24, #ESR_EL1_EC_BREAKPT_EL0 // debug exception in EL0 b.ge el0_dbg b el0_inv el0_svc_compat: @@ -412,11 +426,9 @@ el0_da: * Data abort handling */ mrs x0, far_el1 - disable_step x1 - isb - enable_dbg + bic x0, x0, #(0xff << 56) // enable interrupts before calling the main handler - enable_irq + enable_dbg_and_irq mov x1, x25 mov x2, sp b do_mem_abort @@ -425,11 +437,8 @@ el0_ia: * Instruction abort handling */ mrs x0, far_el1 - disable_step x1 - isb - enable_dbg // enable interrupts before calling the main handler - enable_irq + enable_dbg_and_irq orr x1, x25, #1 << 24 // use reserved ISS bit for instruction aborts mov x2, sp b do_mem_abort @@ -437,6 +446,7 @@ el0_fpsimd_acc: /* * Floating Point or Advanced SIMD access */ + enable_dbg mov x0, x25 mov x1, sp b do_fpsimd_acc @@ -444,6 +454,7 @@ el0_fpsimd_exc: /* * Floating Point or Advanced SIMD exception */ + enable_dbg mov x0, x25 mov x1, sp b do_fpsimd_exc @@ -452,11 +463,8 @@ el0_sp_pc: * Stack or PC alignment exception handling */ mrs x0, far_el1 - disable_step x1 - isb - enable_dbg // enable interrupts before calling the main handler - enable_irq + enable_dbg_and_irq mov x1, x25 mov x2, sp b do_sp_pc_abort @@ -464,6 +472,8 @@ el0_undef: /* * Undefined instruction */ + // enable interrupts before calling the main handler + enable_dbg_and_irq mov x0, sp b do_undefinstr el0_dbg: @@ -472,11 +482,13 @@ el0_dbg: */ tbnz x24, #0, el0_inv // EL0 only mrs x0, far_el1 - disable_step x1 mov x1, x25 mov x2, sp - b do_debug_exception + bl do_debug_exception + enable_dbg + b ret_to_user el0_inv: + enable_dbg mov x0, sp mov x1, #BAD_SYNC mrs x2, esr_el1 @@ -487,28 +499,13 @@ ENDPROC(el0_sync) el0_irq: kernel_entry 0 el0_irq_naked: - disable_step x1 - isb enable_dbg #ifdef CONFIG_TRACE_IRQFLAGS bl trace_hardirqs_off #endif - get_thread_info tsk -#ifdef CONFIG_PREEMPT - ldr x24, [tsk, #TI_PREEMPT] // get preempt count - add x23, x24, #1 // increment it - str x23, [tsk, #TI_PREEMPT] -#endif + irq_handler -#ifdef CONFIG_PREEMPT - ldr x0, [tsk, #TI_PREEMPT] - str x24, [tsk, #TI_PREEMPT] - cmp x0, x23 - b.eq 1f - mov x1, #0 - str x1, [x1] // BUG -1: -#endif + #ifdef CONFIG_TRACE_IRQFLAGS bl trace_hardirqs_on #endif @@ -516,14 +513,6 @@ el0_irq_naked: ENDPROC(el0_irq) /* - * This is the return code to user mode for abort handlers - */ -ret_from_exception: - get_thread_info tsk - b ret_to_user -ENDPROC(ret_from_exception) - -/* * Register switch for AArch64. The callee-saved registers need to be saved * and restored. On entry: * x0 = previous task_struct (must be preserved across the switch) @@ -562,10 +551,7 @@ ret_fast_syscall: ldr x1, [tsk, #TI_FLAGS] and x2, x1, #_TIF_WORK_MASK cbnz x2, fast_work_pending - tbz x1, #TIF_SINGLESTEP, fast_exit - disable_dbg - enable_step x2 -fast_exit: + enable_step_tsk x1, x2 kernel_exit 0, ret = 1 /* @@ -575,7 +561,7 @@ fast_work_pending: str x0, [sp, #S_X0] // returned x0 work_pending: tbnz x1, #TIF_NEED_RESCHED, work_resched - /* TIF_SIGPENDING or TIF_NOTIFY_RESUME case */ + /* TIF_SIGPENDING, TIF_NOTIFY_RESUME or TIF_FOREIGN_FPSTATE case */ ldr x2, [sp, #S_PSTATE] mov x0, sp // 'regs' tst x2, #PSR_MODE_MASK // user mode regs? @@ -584,7 +570,6 @@ work_pending: bl do_notify_resume b ret_to_user work_resched: - enable_dbg bl schedule /* @@ -595,9 +580,7 @@ ret_to_user: ldr x1, [tsk, #TI_FLAGS] and x2, x1, #_TIF_WORK_MASK cbnz x2, work_pending - tbz x1, #TIF_SINGLESTEP, no_work_pending - disable_dbg - enable_step x2 + enable_step_tsk x1, x2 no_work_pending: kernel_exit 0, ret = 0 ENDPROC(ret_to_user) @@ -624,14 +607,11 @@ el0_svc: mov sc_nr, #__NR_syscalls el0_svc_naked: // compat entry point stp x0, scno, [sp, #S_ORIG_X0] // save the original x0 and syscall number - disable_step x16 - isb - enable_dbg - enable_irq + enable_dbg_and_irq - get_thread_info tsk - ldr x16, [tsk, #TI_FLAGS] // check for syscall tracing - tbnz x16, #TIF_SYSCALL_TRACE, __sys_trace // are we tracing syscalls? + ldr x16, [tsk, #TI_FLAGS] // check for syscall hooks + tst x16, #_TIF_SYSCALL_WORK + b.ne __sys_trace adr lr, ret_fast_syscall // return address cmp scno, sc_nr // check upper syscall limit b.hs ni_sys @@ -647,9 +627,8 @@ ENDPROC(el0_svc) * switches, and waiting for our parent to respond. */ __sys_trace: - mov x1, sp - mov w0, #0 // trace entry - bl syscall_trace + mov x0, sp + bl syscall_trace_enter adr lr, __sys_trace_return // return address uxtw scno, w0 // syscall number (possibly new) mov x1, sp // pointer to regs @@ -664,9 +643,8 @@ __sys_trace: __sys_trace_return: str x0, [sp] // save returned x0 - mov x1, sp - mov w0, #1 // trace exit - bl syscall_trace + mov x0, sp + bl syscall_trace_exit b ret_to_user /* @@ -677,10 +655,5 @@ ENTRY(sys_rt_sigreturn_wrapper) b sys_rt_sigreturn ENDPROC(sys_rt_sigreturn_wrapper) -ENTRY(sys_sigaltstack_wrapper) - ldr x2, [sp, #S_SP] - b sys_sigaltstack -ENDPROC(sys_sigaltstack_wrapper) - ENTRY(handle_arch_irq) .quad 0 diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index e8b8357aedb..ad8aebb1cde 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -17,10 +17,12 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ +#include <linux/cpu_pm.h> #include <linux/kernel.h> #include <linux/init.h> #include <linux/sched.h> #include <linux/signal.h> +#include <linux/hardirq.h> #include <asm/fpsimd.h> #include <asm/cputype.h> @@ -33,6 +35,60 @@ #define FPEXC_IDF (1 << 7) /* + * In order to reduce the number of times the FPSIMD state is needlessly saved + * and restored, we need to keep track of two things: + * (a) for each task, we need to remember which CPU was the last one to have + * the task's FPSIMD state loaded into its FPSIMD registers; + * (b) for each CPU, we need to remember which task's userland FPSIMD state has + * been loaded into its FPSIMD registers most recently, or whether it has + * been used to perform kernel mode NEON in the meantime. + * + * For (a), we add a 'cpu' field to struct fpsimd_state, which gets updated to + * the id of the current CPU everytime the state is loaded onto a CPU. For (b), + * we add the per-cpu variable 'fpsimd_last_state' (below), which contains the + * address of the userland FPSIMD state of the task that was loaded onto the CPU + * the most recently, or NULL if kernel mode NEON has been performed after that. + * + * With this in place, we no longer have to restore the next FPSIMD state right + * when switching between tasks. Instead, we can defer this check to userland + * resume, at which time we verify whether the CPU's fpsimd_last_state and the + * task's fpsimd_state.cpu are still mutually in sync. If this is the case, we + * can omit the FPSIMD restore. + * + * As an optimization, we use the thread_info flag TIF_FOREIGN_FPSTATE to + * indicate whether or not the userland FPSIMD state of the current task is + * present in the registers. The flag is set unless the FPSIMD registers of this + * CPU currently contain the most recent userland FPSIMD state of the current + * task. + * + * For a certain task, the sequence may look something like this: + * - the task gets scheduled in; if both the task's fpsimd_state.cpu field + * contains the id of the current CPU, and the CPU's fpsimd_last_state per-cpu + * variable points to the task's fpsimd_state, the TIF_FOREIGN_FPSTATE flag is + * cleared, otherwise it is set; + * + * - the task returns to userland; if TIF_FOREIGN_FPSTATE is set, the task's + * userland FPSIMD state is copied from memory to the registers, the task's + * fpsimd_state.cpu field is set to the id of the current CPU, the current + * CPU's fpsimd_last_state pointer is set to this task's fpsimd_state and the + * TIF_FOREIGN_FPSTATE flag is cleared; + * + * - the task executes an ordinary syscall; upon return to userland, the + * TIF_FOREIGN_FPSTATE flag will still be cleared, so no FPSIMD state is + * restored; + * + * - the task executes a syscall which executes some NEON instructions; this is + * preceded by a call to kernel_neon_begin(), which copies the task's FPSIMD + * register contents to memory, clears the fpsimd_last_state per-cpu variable + * and sets the TIF_FOREIGN_FPSTATE flag; + * + * - the task gets preempted after kernel_neon_end() is called; as we have not + * returned from the 2nd syscall yet, TIF_FOREIGN_FPSTATE is still set so + * whatever is in the FPSIMD registers is not saved to memory, but discarded. + */ +static DEFINE_PER_CPU(struct fpsimd_state *, fpsimd_last_state); + +/* * Trapped FP/ASIMD access. */ void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs) @@ -70,20 +126,176 @@ void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs) void fpsimd_thread_switch(struct task_struct *next) { - /* check if not kernel threads */ - if (current->mm) + /* + * Save the current FPSIMD state to memory, but only if whatever is in + * the registers is in fact the most recent userland FPSIMD state of + * 'current'. + */ + if (current->mm && !test_thread_flag(TIF_FOREIGN_FPSTATE)) fpsimd_save_state(¤t->thread.fpsimd_state); - if (next->mm) - fpsimd_load_state(&next->thread.fpsimd_state); + + if (next->mm) { + /* + * If we are switching to a task whose most recent userland + * FPSIMD state is already in the registers of *this* cpu, + * we can skip loading the state from memory. Otherwise, set + * the TIF_FOREIGN_FPSTATE flag so the state will be loaded + * upon the next return to userland. + */ + struct fpsimd_state *st = &next->thread.fpsimd_state; + + if (__this_cpu_read(fpsimd_last_state) == st + && st->cpu == smp_processor_id()) + clear_ti_thread_flag(task_thread_info(next), + TIF_FOREIGN_FPSTATE); + else + set_ti_thread_flag(task_thread_info(next), + TIF_FOREIGN_FPSTATE); + } } void fpsimd_flush_thread(void) { memset(¤t->thread.fpsimd_state, 0, sizeof(struct fpsimd_state)); - fpsimd_load_state(¤t->thread.fpsimd_state); + set_thread_flag(TIF_FOREIGN_FPSTATE); +} + +/* + * Save the userland FPSIMD state of 'current' to memory, but only if the state + * currently held in the registers does in fact belong to 'current' + */ +void fpsimd_preserve_current_state(void) +{ + preempt_disable(); + if (!test_thread_flag(TIF_FOREIGN_FPSTATE)) + fpsimd_save_state(¤t->thread.fpsimd_state); + preempt_enable(); +} + +/* + * Load the userland FPSIMD state of 'current' from memory, but only if the + * FPSIMD state already held in the registers is /not/ the most recent FPSIMD + * state of 'current' + */ +void fpsimd_restore_current_state(void) +{ + preempt_disable(); + if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) { + struct fpsimd_state *st = ¤t->thread.fpsimd_state; + + fpsimd_load_state(st); + this_cpu_write(fpsimd_last_state, st); + st->cpu = smp_processor_id(); + } + preempt_enable(); } /* + * Load an updated userland FPSIMD state for 'current' from memory and set the + * flag that indicates that the FPSIMD register contents are the most recent + * FPSIMD state of 'current' + */ +void fpsimd_update_current_state(struct fpsimd_state *state) +{ + preempt_disable(); + fpsimd_load_state(state); + if (test_and_clear_thread_flag(TIF_FOREIGN_FPSTATE)) { + struct fpsimd_state *st = ¤t->thread.fpsimd_state; + + this_cpu_write(fpsimd_last_state, st); + st->cpu = smp_processor_id(); + } + preempt_enable(); +} + +/* + * Invalidate live CPU copies of task t's FPSIMD state + */ +void fpsimd_flush_task_state(struct task_struct *t) +{ + t->thread.fpsimd_state.cpu = NR_CPUS; +} + +#ifdef CONFIG_KERNEL_MODE_NEON + +static DEFINE_PER_CPU(struct fpsimd_partial_state, hardirq_fpsimdstate); +static DEFINE_PER_CPU(struct fpsimd_partial_state, softirq_fpsimdstate); + +/* + * Kernel-side NEON support functions + */ +void kernel_neon_begin_partial(u32 num_regs) +{ + if (in_interrupt()) { + struct fpsimd_partial_state *s = this_cpu_ptr( + in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate); + + BUG_ON(num_regs > 32); + fpsimd_save_partial_state(s, roundup(num_regs, 2)); + } else { + /* + * Save the userland FPSIMD state if we have one and if we + * haven't done so already. Clear fpsimd_last_state to indicate + * that there is no longer userland FPSIMD state in the + * registers. + */ + preempt_disable(); + if (current->mm && + !test_and_set_thread_flag(TIF_FOREIGN_FPSTATE)) + fpsimd_save_state(¤t->thread.fpsimd_state); + this_cpu_write(fpsimd_last_state, NULL); + } +} +EXPORT_SYMBOL(kernel_neon_begin_partial); + +void kernel_neon_end(void) +{ + if (in_interrupt()) { + struct fpsimd_partial_state *s = this_cpu_ptr( + in_irq() ? &hardirq_fpsimdstate : &softirq_fpsimdstate); + fpsimd_load_partial_state(s); + } else { + preempt_enable(); + } +} +EXPORT_SYMBOL(kernel_neon_end); + +#endif /* CONFIG_KERNEL_MODE_NEON */ + +#ifdef CONFIG_CPU_PM +static int fpsimd_cpu_pm_notifier(struct notifier_block *self, + unsigned long cmd, void *v) +{ + switch (cmd) { + case CPU_PM_ENTER: + if (current->mm && !test_thread_flag(TIF_FOREIGN_FPSTATE)) + fpsimd_save_state(¤t->thread.fpsimd_state); + break; + case CPU_PM_EXIT: + if (current->mm) + set_thread_flag(TIF_FOREIGN_FPSTATE); + break; + case CPU_PM_ENTER_FAILED: + default: + return NOTIFY_DONE; + } + return NOTIFY_OK; +} + +static struct notifier_block fpsimd_cpu_pm_notifier_block = { + .notifier_call = fpsimd_cpu_pm_notifier, +}; + +static void fpsimd_pm_init(void) +{ + cpu_pm_register_notifier(&fpsimd_cpu_pm_notifier_block); +} + +#else +static inline void fpsimd_pm_init(void) { } +#endif /* CONFIG_CPU_PM */ + +/* * FP/SIMD support code initialisation. */ static int __init fpsimd_init(void) @@ -101,6 +313,8 @@ static int __init fpsimd_init(void) else elf_hwcap |= HWCAP_ASIMD; + fpsimd_pm_init(); + return 0; } late_initcall(fpsimd_init); diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c new file mode 100644 index 00000000000..7924d73b647 --- /dev/null +++ b/arch/arm64/kernel/ftrace.c @@ -0,0 +1,176 @@ +/* + * arch/arm64/kernel/ftrace.c + * + * Copyright (C) 2013 Linaro Limited + * Author: AKASHI Takahiro <takahiro.akashi@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/ftrace.h> +#include <linux/swab.h> +#include <linux/uaccess.h> + +#include <asm/cacheflush.h> +#include <asm/ftrace.h> +#include <asm/insn.h> + +#ifdef CONFIG_DYNAMIC_FTRACE +/* + * Replace a single instruction, which may be a branch or NOP. + * If @validate == true, a replaced instruction is checked against 'old'. + */ +static int ftrace_modify_code(unsigned long pc, u32 old, u32 new, + bool validate) +{ + u32 replaced; + + /* + * Note: + * Due to modules and __init, code can disappear and change, + * we need to protect against faulting as well as code changing. + * We do this by aarch64_insn_*() which use the probe_kernel_*(). + * + * No lock is held here because all the modifications are run + * through stop_machine(). + */ + if (validate) { + if (aarch64_insn_read((void *)pc, &replaced)) + return -EFAULT; + + if (replaced != old) + return -EINVAL; + } + if (aarch64_insn_patch_text_nosync((void *)pc, new)) + return -EPERM; + + return 0; +} + +/* + * Replace tracer function in ftrace_caller() + */ +int ftrace_update_ftrace_func(ftrace_func_t func) +{ + unsigned long pc; + u32 new; + + pc = (unsigned long)&ftrace_call; + new = aarch64_insn_gen_branch_imm(pc, (unsigned long)func, true); + + return ftrace_modify_code(pc, 0, new, false); +} + +/* + * Turn on the call to ftrace_caller() in instrumented function + */ +int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) +{ + unsigned long pc = rec->ip; + u32 old, new; + + old = aarch64_insn_gen_nop(); + new = aarch64_insn_gen_branch_imm(pc, addr, true); + + return ftrace_modify_code(pc, old, new, true); +} + +/* + * Turn off the call to ftrace_caller() in instrumented function + */ +int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, + unsigned long addr) +{ + unsigned long pc = rec->ip; + u32 old, new; + + old = aarch64_insn_gen_branch_imm(pc, addr, true); + new = aarch64_insn_gen_nop(); + + return ftrace_modify_code(pc, old, new, true); +} + +int __init ftrace_dyn_arch_init(void) +{ + return 0; +} +#endif /* CONFIG_DYNAMIC_FTRACE */ + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +/* + * function_graph tracer expects ftrace_return_to_handler() to be called + * on the way back to parent. For this purpose, this function is called + * in _mcount() or ftrace_caller() to replace return address (*parent) on + * the call stack to return_to_handler. + * + * Note that @frame_pointer is used only for sanity check later. + */ +void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, + unsigned long frame_pointer) +{ + unsigned long return_hooker = (unsigned long)&return_to_handler; + unsigned long old; + struct ftrace_graph_ent trace; + int err; + + if (unlikely(atomic_read(¤t->tracing_graph_pause))) + return; + + /* + * Note: + * No protection against faulting at *parent, which may be seen + * on other archs. It's unlikely on AArch64. + */ + old = *parent; + *parent = return_hooker; + + trace.func = self_addr; + trace.depth = current->curr_ret_stack + 1; + + /* Only trace if the calling function expects to */ + if (!ftrace_graph_entry(&trace)) { + *parent = old; + return; + } + + err = ftrace_push_return_trace(old, self_addr, &trace.depth, + frame_pointer); + if (err == -EBUSY) { + *parent = old; + return; + } +} + +#ifdef CONFIG_DYNAMIC_FTRACE +/* + * Turn on/off the call to ftrace_graph_caller() in ftrace_caller() + * depending on @enable. + */ +static int ftrace_modify_graph_caller(bool enable) +{ + unsigned long pc = (unsigned long)&ftrace_graph_call; + u32 branch, nop; + + branch = aarch64_insn_gen_branch_imm(pc, + (unsigned long)ftrace_graph_caller, false); + nop = aarch64_insn_gen_nop(); + + if (enable) + return ftrace_modify_code(pc, nop, branch, true); + else + return ftrace_modify_code(pc, branch, nop, true); +} + +int ftrace_enable_ftrace_graph_caller(void) +{ + return ftrace_modify_graph_caller(true); +} + +int ftrace_disable_ftrace_graph_caller(void) +{ + return ftrace_modify_graph_caller(false); +} +#endif /* CONFIG_DYNAMIC_FTRACE */ +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 368ad1f7c36..a2c1195abb7 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -26,6 +26,8 @@ #include <asm/assembler.h> #include <asm/ptrace.h> #include <asm/asm-offsets.h> +#include <asm/cache.h> +#include <asm/cputype.h> #include <asm/memory.h> #include <asm/thread_info.h> #include <asm/pgtable-hwdef.h> @@ -82,10 +84,8 @@ #ifdef CONFIG_ARM64_64K_PAGES #define MM_MMUFLAGS PTE_ATTRINDX(MT_NORMAL) | PTE_FLAGS -#define IO_MMUFLAGS PTE_ATTRINDX(MT_DEVICE_nGnRE) | PTE_XN | PTE_FLAGS #else #define MM_MMUFLAGS PMD_ATTRINDX(MT_NORMAL) | PMD_FLAGS -#define IO_MMUFLAGS PMD_ATTRINDX(MT_DEVICE_nGnRE) | PMD_SECT_XN | PMD_FLAGS #endif /* @@ -108,16 +108,137 @@ /* * DO NOT MODIFY. Image header expected by Linux boot-loaders. */ +#ifdef CONFIG_EFI +efi_head: + /* + * This add instruction has no meaningful effect except that + * its opcode forms the magic "MZ" signature required by UEFI. + */ + add x13, x18, #0x16 + b stext +#else b stext // branch to kernel start, magic .long 0 // reserved +#endif .quad TEXT_OFFSET // Image load offset from start of RAM .quad 0 // reserved .quad 0 // reserved + .quad 0 // reserved + .quad 0 // reserved + .quad 0 // reserved + .byte 0x41 // Magic number, "ARM\x64" + .byte 0x52 + .byte 0x4d + .byte 0x64 +#ifdef CONFIG_EFI + .long pe_header - efi_head // Offset to the PE header. +#else + .word 0 // reserved +#endif + +#ifdef CONFIG_EFI + .align 3 +pe_header: + .ascii "PE" + .short 0 +coff_header: + .short 0xaa64 // AArch64 + .short 2 // nr_sections + .long 0 // TimeDateStamp + .long 0 // PointerToSymbolTable + .long 1 // NumberOfSymbols + .short section_table - optional_header // SizeOfOptionalHeader + .short 0x206 // Characteristics. + // IMAGE_FILE_DEBUG_STRIPPED | + // IMAGE_FILE_EXECUTABLE_IMAGE | + // IMAGE_FILE_LINE_NUMS_STRIPPED +optional_header: + .short 0x20b // PE32+ format + .byte 0x02 // MajorLinkerVersion + .byte 0x14 // MinorLinkerVersion + .long _edata - stext // SizeOfCode + .long 0 // SizeOfInitializedData + .long 0 // SizeOfUninitializedData + .long efi_stub_entry - efi_head // AddressOfEntryPoint + .long stext - efi_head // BaseOfCode + +extra_header_fields: + .quad 0 // ImageBase + .long 0x20 // SectionAlignment + .long 0x8 // FileAlignment + .short 0 // MajorOperatingSystemVersion + .short 0 // MinorOperatingSystemVersion + .short 0 // MajorImageVersion + .short 0 // MinorImageVersion + .short 0 // MajorSubsystemVersion + .short 0 // MinorSubsystemVersion + .long 0 // Win32VersionValue + + .long _edata - efi_head // SizeOfImage + + // Everything before the kernel image is considered part of the header + .long stext - efi_head // SizeOfHeaders + .long 0 // CheckSum + .short 0xa // Subsystem (EFI application) + .short 0 // DllCharacteristics + .quad 0 // SizeOfStackReserve + .quad 0 // SizeOfStackCommit + .quad 0 // SizeOfHeapReserve + .quad 0 // SizeOfHeapCommit + .long 0 // LoaderFlags + .long 0x6 // NumberOfRvaAndSizes + + .quad 0 // ExportTable + .quad 0 // ImportTable + .quad 0 // ResourceTable + .quad 0 // ExceptionTable + .quad 0 // CertificationTable + .quad 0 // BaseRelocationTable + + // Section table +section_table: + + /* + * The EFI application loader requires a relocation section + * because EFI applications must be relocatable. This is a + * dummy section as far as we are concerned. + */ + .ascii ".reloc" + .byte 0 + .byte 0 // end of 0 padding of section name + .long 0 + .long 0 + .long 0 // SizeOfRawData + .long 0 // PointerToRawData + .long 0 // PointerToRelocations + .long 0 // PointerToLineNumbers + .short 0 // NumberOfRelocations + .short 0 // NumberOfLineNumbers + .long 0x42100040 // Characteristics (section flags) + + + .ascii ".text" + .byte 0 + .byte 0 + .byte 0 // end of 0 padding of section name + .long _edata - stext // VirtualSize + .long stext - efi_head // VirtualAddress + .long _edata - stext // SizeOfRawData + .long stext - efi_head // PointerToRawData + + .long 0 // PointerToRelocations (0 for executables) + .long 0 // PointerToLineNumbers (0 for executables) + .short 0 // NumberOfRelocations (0 for executables) + .short 0 // NumberOfLineNumbers (0 for executables) + .long 0xe0500020 // Characteristics (section flags) + .align 5 +#endif ENTRY(stext) mov x21, x0 // x21=FDT + bl el2_setup // Drop to EL1, w20=cpu_boot_mode bl __calc_phys_offset // x24=PHYS_OFFSET, x28=PHYS_OFFSET-PAGE_OFFSET - bl el2_setup // Drop to EL1 + bl set_cpu_boot_mode_flag mrs x22, midr_el1 // x22=cpuid mov x0, x22 bl lookup_processor_type @@ -143,21 +264,29 @@ ENDPROC(stext) /* * If we're fortunate enough to boot at EL2, ensure that the world is * sane before dropping to EL1. + * + * Returns either BOOT_CPU_MODE_EL1 or BOOT_CPU_MODE_EL2 in x20 if + * booted in EL1 or EL2 respectively. */ ENTRY(el2_setup) mrs x0, CurrentEL - cmp x0, #PSR_MODE_EL2t - ccmp x0, #PSR_MODE_EL2h, #0x4, ne - ldr x0, =__boot_cpu_mode // Compute __boot_cpu_mode - add x0, x0, x28 - b.eq 1f - str wzr, [x0] // Remember we don't have EL2... + cmp x0, #CurrentEL_EL2 + b.ne 1f + mrs x0, sctlr_el2 +CPU_BE( orr x0, x0, #(1 << 25) ) // Set the EE bit for EL2 +CPU_LE( bic x0, x0, #(1 << 25) ) // Clear the EE bit for EL2 + msr sctlr_el2, x0 + b 2f +1: mrs x0, sctlr_el1 +CPU_BE( orr x0, x0, #(3 << 24) ) // Set the EE and E0E bits for EL1 +CPU_LE( bic x0, x0, #(3 << 24) ) // Clear the EE and E0E bits for EL1 + msr sctlr_el1, x0 + mov w20, #BOOT_CPU_MODE_EL1 // This cpu booted in EL1 + isb ret /* Hyp configuration. */ -1: ldr w1, =BOOT_CPU_MODE_EL2 - str w1, [x0, #4] // This CPU has EL2 - mov x0, #(1 << 31) // 64-bit EL1 +2: mov x0, #(1 << 31) // 64-bit EL1 msr hcr_el2, x0 /* Generic timers. */ @@ -174,7 +303,8 @@ ENTRY(el2_setup) /* sctlr_el1 */ mov x0, #0x0800 // Set/clear RES{1,0} bits - movk x0, #0x30d0, lsl #16 +CPU_BE( movk x0, #0x33d0, lsl #16 ) // Set EE and E0E on BE systems +CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems msr sctlr_el1, x0 /* Coprocessor traps. */ @@ -197,18 +327,36 @@ ENTRY(el2_setup) PSR_MODE_EL1h) msr spsr_el2, x0 msr elr_el2, lr + mov w20, #BOOT_CPU_MODE_EL2 // This CPU booted in EL2 eret ENDPROC(el2_setup) /* + * Sets the __boot_cpu_mode flag depending on the CPU boot mode passed + * in x20. See arch/arm64/include/asm/virt.h for more info. + */ +ENTRY(set_cpu_boot_mode_flag) + ldr x1, =__boot_cpu_mode // Compute __boot_cpu_mode + add x1, x1, x28 + cmp w20, #BOOT_CPU_MODE_EL2 + b.ne 1f + add x1, x1, #4 +1: str w20, [x1] // This CPU has booted in EL1 + dmb sy + dc ivac, x1 // Invalidate potentially stale cache line + ret +ENDPROC(set_cpu_boot_mode_flag) + +/* * We need to find out the CPU boot mode long after boot, so we need to * store it in a writable variable. * * This is not in .bss, because we set it sufficiently early that the boot-time * zeroing of .bss would clobber it. */ - .pushsection .data + .pushsection .data..cacheline_aligned ENTRY(__boot_cpu_mode) + .align L1_CACHE_SHIFT .long BOOT_CPU_MODE_EL2 .long 0 .popsection @@ -218,7 +366,6 @@ ENTRY(__boot_cpu_mode) .quad PAGE_OFFSET #ifdef CONFIG_SMP - .pushsection .smp.pen.text, "ax" .align 3 1: .quad . .quad secondary_holding_pen_release @@ -228,10 +375,12 @@ ENTRY(__boot_cpu_mode) * cores are held until we're ready for them to initialise. */ ENTRY(secondary_holding_pen) - bl __calc_phys_offset // x24=phys offset - bl el2_setup // Drop to EL1 + bl el2_setup // Drop to EL1, w20=cpu_boot_mode + bl __calc_phys_offset // x24=PHYS_OFFSET, x28=PHYS_OFFSET-PAGE_OFFSET + bl set_cpu_boot_mode_flag mrs x0, mpidr_el1 - and x0, x0, #15 // CPU number + ldr x1, =MPIDR_HWID_BITMASK + and x0, x0, x1 adr x1, 1b ldp x2, x3, [x1] sub x1, x1, x2 @@ -242,7 +391,17 @@ pen: ldr x4, [x3] wfe b pen ENDPROC(secondary_holding_pen) - .popsection + + /* + * Secondary entry point that jumps straight into the kernel. Only to + * be used where CPUs are brought online dynamically by the kernel. + */ +ENTRY(secondary_entry) + bl el2_setup // Drop to EL1 + bl __calc_phys_offset // x24=PHYS_OFFSET, x28=PHYS_OFFSET-PAGE_OFFSET + bl set_cpu_boot_mode_flag + b secondary_startup +ENDPROC(secondary_entry) ENTRY(secondary_startup) /* @@ -340,26 +499,18 @@ ENDPROC(__calc_phys_offset) * Preserves: tbl, flags * Corrupts: phys, start, end, pstate */ - .macro create_block_map, tbl, flags, phys, start, end, idmap=0 + .macro create_block_map, tbl, flags, phys, start, end lsr \phys, \phys, #BLOCK_SHIFT - .if \idmap - and \start, \phys, #PTRS_PER_PTE - 1 // table index - .else lsr \start, \start, #BLOCK_SHIFT and \start, \start, #PTRS_PER_PTE - 1 // table index - .endif orr \phys, \flags, \phys, lsl #BLOCK_SHIFT // table entry - .ifnc \start,\end lsr \end, \end, #BLOCK_SHIFT and \end, \end, #PTRS_PER_PTE - 1 // table end index - .endif 9999: str \phys, [\tbl, \start, lsl #3] // store the entry - .ifnc \start,\end add \start, \start, #1 // next entry add \phys, \phys, #BLOCK_SIZE // next block cmp \start, \end b.ls 9999b - .endif .endm /* @@ -368,9 +519,19 @@ ENDPROC(__calc_phys_offset) * - identity mapping to enable the MMU (low address, TTBR0) * - first few MB of the kernel linear mapping to jump to once the MMU has * been enabled, including the FDT blob (TTBR1) + * - pgd entry for fixed mappings (TTBR1) */ __create_page_tables: pgtbl x25, x26, x24 // idmap_pg_dir and swapper_pg_dir addresses + mov x27, lr + + /* + * Invalidate the idmap and swapper page tables to avoid potential + * dirty cache lines being evicted. + */ + mov x0, x25 + add x1, x26, #SWAPPER_DIR_SIZE + bl __inval_cache_range /* * Clear the idmap and swapper page tables. @@ -390,9 +551,13 @@ __create_page_tables: * Create the identity mapping. */ add x0, x25, #PAGE_SIZE // section table address - adr x3, __turn_mmu_on // virtual/physical address + ldr x3, =KERNEL_START + add x3, x3, x28 // __pa(KERNEL_START) create_pgd_entry x25, x0, x3, x5, x6 - create_block_map x0, x7, x3, x5, x5, idmap=1 + ldr x6, =KERNEL_END + mov x5, x3 // __pa(KERNEL_START) + add x6, x6, x28 // __pa(KERNEL_END) + create_block_map x0, x7, x3, x5, x6 /* * Map the kernel image (starting with PHYS_OFFSET). @@ -400,7 +565,7 @@ __create_page_tables: add x0, x26, #PAGE_SIZE // section table address mov x5, #PAGE_OFFSET create_pgd_entry x26, x0, x5, x3, x6 - ldr x6, =KERNEL_END - 1 + ldr x6, =KERNEL_END mov x3, x24 // phys offset create_block_map x0, x7, x3, x5, x6 @@ -420,6 +585,23 @@ __create_page_tables: sub x6, x6, #1 // inclusive range create_block_map x0, x7, x3, x5, x6 1: + /* + * Create the pgd entry for the fixed mappings. + */ + ldr x5, =FIXADDR_TOP // Fixed mapping virtual address + add x0, x26, #2 * PAGE_SIZE // section table address + create_pgd_entry x26, x0, x5, x6, x7 + + /* + * Since the page tables have been populated with non-cacheable + * accesses (MMU disabled), invalidate the idmap and swapper page + * tables again to remove any speculatively loaded cache lines. + */ + mov x0, x25 + add x1, x26, #SWAPPER_DIR_SIZE + bl __inval_cache_range + + mov lr, x27 ret ENDPROC(__create_page_tables) .ltorg @@ -428,8 +610,6 @@ ENDPROC(__create_page_tables) .type __switch_data, %object __switch_data: .quad __mmap_switched - .quad __data_loc // x4 - .quad _data // x5 .quad __bss_start // x6 .quad _end // x7 .quad processor_id // x4 @@ -444,15 +624,7 @@ __switch_data: __mmap_switched: adr x3, __switch_data + 8 - ldp x4, x5, [x3], #16 ldp x6, x7, [x3], #16 - cmp x4, x5 // Copy data segment if needed -1: ccmp x5, x6, #4, ne - b.eq 2f - ldr x16, [x4], #8 - str x16, [x5], #8 - b 1b -2: 1: cmp x6, x7 b.hs 2f str xzr, [x6], #8 // Clear BSS diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index 5ab825c59db..df1cf15377b 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -20,13 +20,14 @@ #define pr_fmt(fmt) "hw-breakpoint: " fmt +#include <linux/compat.h> +#include <linux/cpu_pm.h> #include <linux/errno.h> #include <linux/hw_breakpoint.h> #include <linux/perf_event.h> #include <linux/ptrace.h> #include <linux/smp.h> -#include <asm/compat.h> #include <asm/current.h> #include <asm/debug-monitors.h> #include <asm/hw_breakpoint.h> @@ -169,94 +170,134 @@ static enum debug_el debug_exception_level(int privilege) } } -/* - * Install a perf counter breakpoint. +enum hw_breakpoint_ops { + HW_BREAKPOINT_INSTALL, + HW_BREAKPOINT_UNINSTALL, + HW_BREAKPOINT_RESTORE +}; + +/** + * hw_breakpoint_slot_setup - Find and setup a perf slot according to + * operations + * + * @slots: pointer to array of slots + * @max_slots: max number of slots + * @bp: perf_event to setup + * @ops: operation to be carried out on the slot + * + * Return: + * slot index on success + * -ENOSPC if no slot is available/matches + * -EINVAL on wrong operations parameter */ -int arch_install_hw_breakpoint(struct perf_event *bp) +static int hw_breakpoint_slot_setup(struct perf_event **slots, int max_slots, + struct perf_event *bp, + enum hw_breakpoint_ops ops) +{ + int i; + struct perf_event **slot; + + for (i = 0; i < max_slots; ++i) { + slot = &slots[i]; + switch (ops) { + case HW_BREAKPOINT_INSTALL: + if (!*slot) { + *slot = bp; + return i; + } + break; + case HW_BREAKPOINT_UNINSTALL: + if (*slot == bp) { + *slot = NULL; + return i; + } + break; + case HW_BREAKPOINT_RESTORE: + if (*slot == bp) + return i; + break; + default: + pr_warn_once("Unhandled hw breakpoint ops %d\n", ops); + return -EINVAL; + } + } + return -ENOSPC; +} + +static int hw_breakpoint_control(struct perf_event *bp, + enum hw_breakpoint_ops ops) { struct arch_hw_breakpoint *info = counter_arch_bp(bp); - struct perf_event **slot, **slots; + struct perf_event **slots; struct debug_info *debug_info = ¤t->thread.debug; int i, max_slots, ctrl_reg, val_reg, reg_enable; + enum debug_el dbg_el = debug_exception_level(info->ctrl.privilege); u32 ctrl; if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) { /* Breakpoint */ ctrl_reg = AARCH64_DBG_REG_BCR; val_reg = AARCH64_DBG_REG_BVR; - slots = __get_cpu_var(bp_on_reg); + slots = this_cpu_ptr(bp_on_reg); max_slots = core_num_brps; reg_enable = !debug_info->bps_disabled; } else { /* Watchpoint */ ctrl_reg = AARCH64_DBG_REG_WCR; val_reg = AARCH64_DBG_REG_WVR; - slots = __get_cpu_var(wp_on_reg); + slots = this_cpu_ptr(wp_on_reg); max_slots = core_num_wrps; reg_enable = !debug_info->wps_disabled; } - for (i = 0; i < max_slots; ++i) { - slot = &slots[i]; - - if (!*slot) { - *slot = bp; - break; - } - } - - if (WARN_ONCE(i == max_slots, "Can't find any breakpoint slot")) - return -ENOSPC; + i = hw_breakpoint_slot_setup(slots, max_slots, bp, ops); - /* Ensure debug monitors are enabled at the correct exception level. */ - enable_debug_monitors(debug_exception_level(info->ctrl.privilege)); + if (WARN_ONCE(i < 0, "Can't find any breakpoint slot")) + return i; - /* Setup the address register. */ - write_wb_reg(val_reg, i, info->address); + switch (ops) { + case HW_BREAKPOINT_INSTALL: + /* + * Ensure debug monitors are enabled at the correct exception + * level. + */ + enable_debug_monitors(dbg_el); + /* Fall through */ + case HW_BREAKPOINT_RESTORE: + /* Setup the address register. */ + write_wb_reg(val_reg, i, info->address); + + /* Setup the control register. */ + ctrl = encode_ctrl_reg(info->ctrl); + write_wb_reg(ctrl_reg, i, + reg_enable ? ctrl | 0x1 : ctrl & ~0x1); + break; + case HW_BREAKPOINT_UNINSTALL: + /* Reset the control register. */ + write_wb_reg(ctrl_reg, i, 0); - /* Setup the control register. */ - ctrl = encode_ctrl_reg(info->ctrl); - write_wb_reg(ctrl_reg, i, reg_enable ? ctrl | 0x1 : ctrl & ~0x1); + /* + * Release the debug monitors for the correct exception + * level. + */ + disable_debug_monitors(dbg_el); + break; + } return 0; } -void arch_uninstall_hw_breakpoint(struct perf_event *bp) +/* + * Install a perf counter breakpoint. + */ +int arch_install_hw_breakpoint(struct perf_event *bp) { - struct arch_hw_breakpoint *info = counter_arch_bp(bp); - struct perf_event **slot, **slots; - int i, max_slots, base; - - if (info->ctrl.type == ARM_BREAKPOINT_EXECUTE) { - /* Breakpoint */ - base = AARCH64_DBG_REG_BCR; - slots = __get_cpu_var(bp_on_reg); - max_slots = core_num_brps; - } else { - /* Watchpoint */ - base = AARCH64_DBG_REG_WCR; - slots = __get_cpu_var(wp_on_reg); - max_slots = core_num_wrps; - } - - /* Remove the breakpoint. */ - for (i = 0; i < max_slots; ++i) { - slot = &slots[i]; - - if (*slot == bp) { - *slot = NULL; - break; - } - } - - if (WARN_ONCE(i == max_slots, "Can't find any breakpoint slot")) - return; - - /* Reset the control register. */ - write_wb_reg(base, i, 0); + return hw_breakpoint_control(bp, HW_BREAKPOINT_INSTALL); +} - /* Release the debug monitors for the correct exception level. */ - disable_debug_monitors(debug_exception_level(info->ctrl.privilege)); +void arch_uninstall_hw_breakpoint(struct perf_event *bp) +{ + hw_breakpoint_control(bp, HW_BREAKPOINT_UNINSTALL); } static int get_hbp_len(u8 hbp_len) @@ -505,11 +546,11 @@ static void toggle_bp_registers(int reg, enum debug_el el, int enable) switch (reg) { case AARCH64_DBG_REG_BCR: - slots = __get_cpu_var(bp_on_reg); + slots = this_cpu_ptr(bp_on_reg); max_slots = core_num_brps; break; case AARCH64_DBG_REG_WCR: - slots = __get_cpu_var(wp_on_reg); + slots = this_cpu_ptr(wp_on_reg); max_slots = core_num_wrps; break; default: @@ -546,7 +587,7 @@ static int breakpoint_handler(unsigned long unused, unsigned int esr, struct debug_info *debug_info; struct arch_hw_breakpoint_ctrl ctrl; - slots = (struct perf_event **)__get_cpu_var(bp_on_reg); + slots = this_cpu_ptr(bp_on_reg); addr = instruction_pointer(regs); debug_info = ¤t->thread.debug; @@ -596,7 +637,7 @@ unlock: user_enable_single_step(current); } else { toggle_bp_registers(AARCH64_DBG_REG_BCR, DBG_ACTIVE_EL1, 0); - kernel_step = &__get_cpu_var(stepping_kernel_bp); + kernel_step = this_cpu_ptr(&stepping_kernel_bp); if (*kernel_step != ARM_KERNEL_STEP_NONE) return 0; @@ -623,7 +664,7 @@ static int watchpoint_handler(unsigned long addr, unsigned int esr, struct arch_hw_breakpoint *info; struct arch_hw_breakpoint_ctrl ctrl; - slots = (struct perf_event **)__get_cpu_var(wp_on_reg); + slots = this_cpu_ptr(wp_on_reg); debug_info = ¤t->thread.debug; for (i = 0; i < core_num_wrps; ++i) { @@ -698,7 +739,7 @@ unlock: user_enable_single_step(current); } else { toggle_bp_registers(AARCH64_DBG_REG_WCR, DBG_ACTIVE_EL1, 0); - kernel_step = &__get_cpu_var(stepping_kernel_bp); + kernel_step = this_cpu_ptr(&stepping_kernel_bp); if (*kernel_step != ARM_KERNEL_STEP_NONE) return 0; @@ -722,7 +763,7 @@ int reinstall_suspended_bps(struct pt_regs *regs) struct debug_info *debug_info = ¤t->thread.debug; int handled_exception = 0, *kernel_step; - kernel_step = &__get_cpu_var(stepping_kernel_bp); + kernel_step = this_cpu_ptr(&stepping_kernel_bp); /* * Called from single-step exception handler. @@ -806,35 +847,61 @@ void hw_breakpoint_thread_switch(struct task_struct *next) /* * CPU initialisation. */ -static void reset_ctrl_regs(void *unused) +static void hw_breakpoint_reset(void *unused) { int i; - - for (i = 0; i < core_num_brps; ++i) { - write_wb_reg(AARCH64_DBG_REG_BCR, i, 0UL); - write_wb_reg(AARCH64_DBG_REG_BVR, i, 0UL); + struct perf_event **slots; + /* + * When a CPU goes through cold-boot, it does not have any installed + * slot, so it is safe to share the same function for restoring and + * resetting breakpoints; when a CPU is hotplugged in, it goes + * through the slots, which are all empty, hence it just resets control + * and value for debug registers. + * When this function is triggered on warm-boot through a CPU PM + * notifier some slots might be initialized; if so they are + * reprogrammed according to the debug slots content. + */ + for (slots = this_cpu_ptr(bp_on_reg), i = 0; i < core_num_brps; ++i) { + if (slots[i]) { + hw_breakpoint_control(slots[i], HW_BREAKPOINT_RESTORE); + } else { + write_wb_reg(AARCH64_DBG_REG_BCR, i, 0UL); + write_wb_reg(AARCH64_DBG_REG_BVR, i, 0UL); + } } - for (i = 0; i < core_num_wrps; ++i) { - write_wb_reg(AARCH64_DBG_REG_WCR, i, 0UL); - write_wb_reg(AARCH64_DBG_REG_WVR, i, 0UL); + for (slots = this_cpu_ptr(wp_on_reg), i = 0; i < core_num_wrps; ++i) { + if (slots[i]) { + hw_breakpoint_control(slots[i], HW_BREAKPOINT_RESTORE); + } else { + write_wb_reg(AARCH64_DBG_REG_WCR, i, 0UL); + write_wb_reg(AARCH64_DBG_REG_WVR, i, 0UL); + } } } -static int __cpuinit hw_breakpoint_reset_notify(struct notifier_block *self, +static int hw_breakpoint_reset_notify(struct notifier_block *self, unsigned long action, void *hcpu) { int cpu = (long)hcpu; if (action == CPU_ONLINE) - smp_call_function_single(cpu, reset_ctrl_regs, NULL, 1); + smp_call_function_single(cpu, hw_breakpoint_reset, NULL, 1); return NOTIFY_OK; } -static struct notifier_block __cpuinitdata hw_breakpoint_reset_nb = { +static struct notifier_block hw_breakpoint_reset_nb = { .notifier_call = hw_breakpoint_reset_notify, }; +#ifdef CONFIG_ARM64_CPU_SUSPEND +extern void cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *)); +#else +static inline void cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *)) +{ +} +#endif + /* * One-time initialisation. */ @@ -846,12 +913,14 @@ static int __init arch_hw_breakpoint_init(void) pr_info("found %d breakpoint and %d watchpoint registers.\n", core_num_brps, core_num_wrps); + cpu_notifier_register_begin(); + /* * Reset the breakpoint resources. We assume that a halting * debugger will leave the world in a nice state for us. */ - smp_call_function(reset_ctrl_regs, NULL, 1); - reset_ctrl_regs(NULL); + smp_call_function(hw_breakpoint_reset, NULL, 1); + hw_breakpoint_reset(NULL); /* Register debug fault handlers. */ hook_debug_fault_code(DBG_ESR_EVT_HWBP, breakpoint_handler, SIGTRAP, @@ -860,7 +929,12 @@ static int __init arch_hw_breakpoint_init(void) TRAP_HWBKPT, "hw-watchpoint handler"); /* Register hotplug notifier. */ - register_cpu_notifier(&hw_breakpoint_reset_nb); + __register_cpu_notifier(&hw_breakpoint_reset_nb); + + cpu_notifier_register_done(); + + /* Register cpu_suspend hw breakpoint restore hook */ + cpu_suspend_set_dbg_restorer(hw_breakpoint_reset); return 0; } diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c new file mode 100644 index 00000000000..92f36835486 --- /dev/null +++ b/arch/arm64/kernel/insn.c @@ -0,0 +1,304 @@ +/* + * Copyright (C) 2013 Huawei Ltd. + * Author: Jiang Liu <liuj97@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#include <linux/bitops.h> +#include <linux/compiler.h> +#include <linux/kernel.h> +#include <linux/smp.h> +#include <linux/stop_machine.h> +#include <linux/uaccess.h> +#include <asm/cacheflush.h> +#include <asm/insn.h> + +static int aarch64_insn_encoding_class[] = { + AARCH64_INSN_CLS_UNKNOWN, + AARCH64_INSN_CLS_UNKNOWN, + AARCH64_INSN_CLS_UNKNOWN, + AARCH64_INSN_CLS_UNKNOWN, + AARCH64_INSN_CLS_LDST, + AARCH64_INSN_CLS_DP_REG, + AARCH64_INSN_CLS_LDST, + AARCH64_INSN_CLS_DP_FPSIMD, + AARCH64_INSN_CLS_DP_IMM, + AARCH64_INSN_CLS_DP_IMM, + AARCH64_INSN_CLS_BR_SYS, + AARCH64_INSN_CLS_BR_SYS, + AARCH64_INSN_CLS_LDST, + AARCH64_INSN_CLS_DP_REG, + AARCH64_INSN_CLS_LDST, + AARCH64_INSN_CLS_DP_FPSIMD, +}; + +enum aarch64_insn_encoding_class __kprobes aarch64_get_insn_class(u32 insn) +{ + return aarch64_insn_encoding_class[(insn >> 25) & 0xf]; +} + +/* NOP is an alias of HINT */ +bool __kprobes aarch64_insn_is_nop(u32 insn) +{ + if (!aarch64_insn_is_hint(insn)) + return false; + + switch (insn & 0xFE0) { + case AARCH64_INSN_HINT_YIELD: + case AARCH64_INSN_HINT_WFE: + case AARCH64_INSN_HINT_WFI: + case AARCH64_INSN_HINT_SEV: + case AARCH64_INSN_HINT_SEVL: + return false; + default: + return true; + } +} + +/* + * In ARMv8-A, A64 instructions have a fixed length of 32 bits and are always + * little-endian. + */ +int __kprobes aarch64_insn_read(void *addr, u32 *insnp) +{ + int ret; + u32 val; + + ret = probe_kernel_read(&val, addr, AARCH64_INSN_SIZE); + if (!ret) + *insnp = le32_to_cpu(val); + + return ret; +} + +int __kprobes aarch64_insn_write(void *addr, u32 insn) +{ + insn = cpu_to_le32(insn); + return probe_kernel_write(addr, &insn, AARCH64_INSN_SIZE); +} + +static bool __kprobes __aarch64_insn_hotpatch_safe(u32 insn) +{ + if (aarch64_get_insn_class(insn) != AARCH64_INSN_CLS_BR_SYS) + return false; + + return aarch64_insn_is_b(insn) || + aarch64_insn_is_bl(insn) || + aarch64_insn_is_svc(insn) || + aarch64_insn_is_hvc(insn) || + aarch64_insn_is_smc(insn) || + aarch64_insn_is_brk(insn) || + aarch64_insn_is_nop(insn); +} + +/* + * ARM Architecture Reference Manual for ARMv8 Profile-A, Issue A.a + * Section B2.6.5 "Concurrent modification and execution of instructions": + * Concurrent modification and execution of instructions can lead to the + * resulting instruction performing any behavior that can be achieved by + * executing any sequence of instructions that can be executed from the + * same Exception level, except where the instruction before modification + * and the instruction after modification is a B, BL, NOP, BKPT, SVC, HVC, + * or SMC instruction. + */ +bool __kprobes aarch64_insn_hotpatch_safe(u32 old_insn, u32 new_insn) +{ + return __aarch64_insn_hotpatch_safe(old_insn) && + __aarch64_insn_hotpatch_safe(new_insn); +} + +int __kprobes aarch64_insn_patch_text_nosync(void *addr, u32 insn) +{ + u32 *tp = addr; + int ret; + + /* A64 instructions must be word aligned */ + if ((uintptr_t)tp & 0x3) + return -EINVAL; + + ret = aarch64_insn_write(tp, insn); + if (ret == 0) + flush_icache_range((uintptr_t)tp, + (uintptr_t)tp + AARCH64_INSN_SIZE); + + return ret; +} + +struct aarch64_insn_patch { + void **text_addrs; + u32 *new_insns; + int insn_cnt; + atomic_t cpu_count; +}; + +static int __kprobes aarch64_insn_patch_text_cb(void *arg) +{ + int i, ret = 0; + struct aarch64_insn_patch *pp = arg; + + /* The first CPU becomes master */ + if (atomic_inc_return(&pp->cpu_count) == 1) { + for (i = 0; ret == 0 && i < pp->insn_cnt; i++) + ret = aarch64_insn_patch_text_nosync(pp->text_addrs[i], + pp->new_insns[i]); + /* + * aarch64_insn_patch_text_nosync() calls flush_icache_range(), + * which ends with "dsb; isb" pair guaranteeing global + * visibility. + */ + atomic_set(&pp->cpu_count, -1); + } else { + while (atomic_read(&pp->cpu_count) != -1) + cpu_relax(); + isb(); + } + + return ret; +} + +int __kprobes aarch64_insn_patch_text_sync(void *addrs[], u32 insns[], int cnt) +{ + struct aarch64_insn_patch patch = { + .text_addrs = addrs, + .new_insns = insns, + .insn_cnt = cnt, + .cpu_count = ATOMIC_INIT(0), + }; + + if (cnt <= 0) + return -EINVAL; + + return stop_machine(aarch64_insn_patch_text_cb, &patch, + cpu_online_mask); +} + +int __kprobes aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt) +{ + int ret; + u32 insn; + + /* Unsafe to patch multiple instructions without synchronizaiton */ + if (cnt == 1) { + ret = aarch64_insn_read(addrs[0], &insn); + if (ret) + return ret; + + if (aarch64_insn_hotpatch_safe(insn, insns[0])) { + /* + * ARMv8 architecture doesn't guarantee all CPUs see + * the new instruction after returning from function + * aarch64_insn_patch_text_nosync(). So send IPIs to + * all other CPUs to achieve instruction + * synchronization. + */ + ret = aarch64_insn_patch_text_nosync(addrs[0], insns[0]); + kick_all_cpus_sync(); + return ret; + } + } + + return aarch64_insn_patch_text_sync(addrs, insns, cnt); +} + +u32 __kprobes aarch64_insn_encode_immediate(enum aarch64_insn_imm_type type, + u32 insn, u64 imm) +{ + u32 immlo, immhi, lomask, himask, mask; + int shift; + + switch (type) { + case AARCH64_INSN_IMM_ADR: + lomask = 0x3; + himask = 0x7ffff; + immlo = imm & lomask; + imm >>= 2; + immhi = imm & himask; + imm = (immlo << 24) | (immhi); + mask = (lomask << 24) | (himask); + shift = 5; + break; + case AARCH64_INSN_IMM_26: + mask = BIT(26) - 1; + shift = 0; + break; + case AARCH64_INSN_IMM_19: + mask = BIT(19) - 1; + shift = 5; + break; + case AARCH64_INSN_IMM_16: + mask = BIT(16) - 1; + shift = 5; + break; + case AARCH64_INSN_IMM_14: + mask = BIT(14) - 1; + shift = 5; + break; + case AARCH64_INSN_IMM_12: + mask = BIT(12) - 1; + shift = 10; + break; + case AARCH64_INSN_IMM_9: + mask = BIT(9) - 1; + shift = 12; + break; + default: + pr_err("aarch64_insn_encode_immediate: unknown immediate encoding %d\n", + type); + return 0; + } + + /* Update the immediate field. */ + insn &= ~(mask << shift); + insn |= (imm & mask) << shift; + + return insn; +} + +u32 __kprobes aarch64_insn_gen_branch_imm(unsigned long pc, unsigned long addr, + enum aarch64_insn_branch_type type) +{ + u32 insn; + long offset; + + /* + * PC: A 64-bit Program Counter holding the address of the current + * instruction. A64 instructions must be word-aligned. + */ + BUG_ON((pc & 0x3) || (addr & 0x3)); + + /* + * B/BL support [-128M, 128M) offset + * ARM64 virtual address arrangement guarantees all kernel and module + * texts are within +/-128M. + */ + offset = ((long)addr - (long)pc); + BUG_ON(offset < -SZ_128M || offset >= SZ_128M); + + if (type == AARCH64_INSN_BRANCH_LINK) + insn = aarch64_insn_get_bl_value(); + else + insn = aarch64_insn_get_b_value(); + + return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_26, insn, + offset >> 2); +} + +u32 __kprobes aarch64_insn_gen_hint(enum aarch64_insn_hint_op op) +{ + return aarch64_insn_get_hint_value() | op; +} + +u32 __kprobes aarch64_insn_gen_nop(void) +{ + return aarch64_insn_gen_hint(AARCH64_INSN_HINT_NOP); +} diff --git a/arch/arm64/kernel/irq.c b/arch/arm64/kernel/irq.c index 0373c6609ea..0f08dfd69eb 100644 --- a/arch/arm64/kernel/irq.c +++ b/arch/arm64/kernel/irq.c @@ -25,7 +25,7 @@ #include <linux/irq.h> #include <linux/smp.h> #include <linux/init.h> -#include <linux/of_irq.h> +#include <linux/irqchip.h> #include <linux/seq_file.h> #include <linux/ratelimit.h> @@ -67,18 +67,82 @@ void handle_IRQ(unsigned int irq, struct pt_regs *regs) set_irq_regs(old_regs); } -/* - * Interrupt controllers supported by the kernel. - */ -static const struct of_device_id intctrl_of_match[] __initconst = { - /* IRQ controllers { .compatible, .data } info to go here */ - {} -}; +void __init set_handle_irq(void (*handle_irq)(struct pt_regs *)) +{ + if (handle_arch_irq) + return; + + handle_arch_irq = handle_irq; +} void __init init_IRQ(void) { - of_irq_init(intctrl_of_match); - + irqchip_init(); if (!handle_arch_irq) panic("No interrupt controller found."); } + +#ifdef CONFIG_HOTPLUG_CPU +static bool migrate_one_irq(struct irq_desc *desc) +{ + struct irq_data *d = irq_desc_get_irq_data(desc); + const struct cpumask *affinity = d->affinity; + struct irq_chip *c; + bool ret = false; + + /* + * If this is a per-CPU interrupt, or the affinity does not + * include this CPU, then we have nothing to do. + */ + if (irqd_is_per_cpu(d) || !cpumask_test_cpu(smp_processor_id(), affinity)) + return false; + + if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) + ret = true; + + /* + * when using forced irq_set_affinity we must ensure that the cpu + * being offlined is not present in the affinity mask, it may be + * selected as the target CPU otherwise + */ + affinity = cpu_online_mask; + c = irq_data_get_irq_chip(d); + if (!c->irq_set_affinity) + pr_debug("IRQ%u: unable to set affinity\n", d->irq); + else if (c->irq_set_affinity(d, affinity, true) == IRQ_SET_MASK_OK && ret) + cpumask_copy(d->affinity, affinity); + + return ret; +} + +/* + * The current CPU has been marked offline. Migrate IRQs off this CPU. + * If the affinity settings do not allow other CPUs, force them onto any + * available CPU. + * + * Note: we must iterate over all IRQs, whether they have an attached + * action structure or not, as we need to get chained interrupts too. + */ +void migrate_irqs(void) +{ + unsigned int i; + struct irq_desc *desc; + unsigned long flags; + + local_irq_save(flags); + + for_each_irq_desc(i, desc) { + bool affinity_broken; + + raw_spin_lock(&desc->lock); + affinity_broken = migrate_one_irq(desc); + raw_spin_unlock(&desc->lock); + + if (affinity_broken) + pr_warn_ratelimited("IRQ%u no longer affine to CPU%u\n", + i, smp_processor_id()); + } + + local_irq_restore(flags); +} +#endif /* CONFIG_HOTPLUG_CPU */ diff --git a/arch/arm64/kernel/jump_label.c b/arch/arm64/kernel/jump_label.c new file mode 100644 index 00000000000..263a166291f --- /dev/null +++ b/arch/arm64/kernel/jump_label.c @@ -0,0 +1,58 @@ +/* + * Copyright (C) 2013 Huawei Ltd. + * Author: Jiang Liu <liuj97@gmail.com> + * + * Based on arch/arm/kernel/jump_label.c + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ +#include <linux/kernel.h> +#include <linux/jump_label.h> +#include <asm/insn.h> + +#ifdef HAVE_JUMP_LABEL + +static void __arch_jump_label_transform(struct jump_entry *entry, + enum jump_label_type type, + bool is_static) +{ + void *addr = (void *)entry->code; + u32 insn; + + if (type == JUMP_LABEL_ENABLE) { + insn = aarch64_insn_gen_branch_imm(entry->code, + entry->target, + AARCH64_INSN_BRANCH_NOLINK); + } else { + insn = aarch64_insn_gen_nop(); + } + + if (is_static) + aarch64_insn_patch_text_nosync(addr, insn); + else + aarch64_insn_patch_text(&addr, &insn, 1); +} + +void arch_jump_label_transform(struct jump_entry *entry, + enum jump_label_type type) +{ + __arch_jump_label_transform(entry, type, false); +} + +void arch_jump_label_transform_static(struct jump_entry *entry, + enum jump_label_type type) +{ + __arch_jump_label_transform(entry, type, true); +} + +#endif /* HAVE_JUMP_LABEL */ diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c new file mode 100644 index 00000000000..75c9cf1aafe --- /dev/null +++ b/arch/arm64/kernel/kgdb.c @@ -0,0 +1,336 @@ +/* + * AArch64 KGDB support + * + * Based on arch/arm/kernel/kgdb.c + * + * Copyright (C) 2013 Cavium Inc. + * Author: Vijaya Kumar K <vijaya.kumar@caviumnetworks.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/irq.h> +#include <linux/kdebug.h> +#include <linux/kgdb.h> +#include <asm/traps.h> + +struct dbg_reg_def_t dbg_reg_def[DBG_MAX_REG_NUM] = { + { "x0", 8, offsetof(struct pt_regs, regs[0])}, + { "x1", 8, offsetof(struct pt_regs, regs[1])}, + { "x2", 8, offsetof(struct pt_regs, regs[2])}, + { "x3", 8, offsetof(struct pt_regs, regs[3])}, + { "x4", 8, offsetof(struct pt_regs, regs[4])}, + { "x5", 8, offsetof(struct pt_regs, regs[5])}, + { "x6", 8, offsetof(struct pt_regs, regs[6])}, + { "x7", 8, offsetof(struct pt_regs, regs[7])}, + { "x8", 8, offsetof(struct pt_regs, regs[8])}, + { "x9", 8, offsetof(struct pt_regs, regs[9])}, + { "x10", 8, offsetof(struct pt_regs, regs[10])}, + { "x11", 8, offsetof(struct pt_regs, regs[11])}, + { "x12", 8, offsetof(struct pt_regs, regs[12])}, + { "x13", 8, offsetof(struct pt_regs, regs[13])}, + { "x14", 8, offsetof(struct pt_regs, regs[14])}, + { "x15", 8, offsetof(struct pt_regs, regs[15])}, + { "x16", 8, offsetof(struct pt_regs, regs[16])}, + { "x17", 8, offsetof(struct pt_regs, regs[17])}, + { "x18", 8, offsetof(struct pt_regs, regs[18])}, + { "x19", 8, offsetof(struct pt_regs, regs[19])}, + { "x20", 8, offsetof(struct pt_regs, regs[20])}, + { "x21", 8, offsetof(struct pt_regs, regs[21])}, + { "x22", 8, offsetof(struct pt_regs, regs[22])}, + { "x23", 8, offsetof(struct pt_regs, regs[23])}, + { "x24", 8, offsetof(struct pt_regs, regs[24])}, + { "x25", 8, offsetof(struct pt_regs, regs[25])}, + { "x26", 8, offsetof(struct pt_regs, regs[26])}, + { "x27", 8, offsetof(struct pt_regs, regs[27])}, + { "x28", 8, offsetof(struct pt_regs, regs[28])}, + { "x29", 8, offsetof(struct pt_regs, regs[29])}, + { "x30", 8, offsetof(struct pt_regs, regs[30])}, + { "sp", 8, offsetof(struct pt_regs, sp)}, + { "pc", 8, offsetof(struct pt_regs, pc)}, + { "pstate", 8, offsetof(struct pt_regs, pstate)}, + { "v0", 16, -1 }, + { "v1", 16, -1 }, + { "v2", 16, -1 }, + { "v3", 16, -1 }, + { "v4", 16, -1 }, + { "v5", 16, -1 }, + { "v6", 16, -1 }, + { "v7", 16, -1 }, + { "v8", 16, -1 }, + { "v9", 16, -1 }, + { "v10", 16, -1 }, + { "v11", 16, -1 }, + { "v12", 16, -1 }, + { "v13", 16, -1 }, + { "v14", 16, -1 }, + { "v15", 16, -1 }, + { "v16", 16, -1 }, + { "v17", 16, -1 }, + { "v18", 16, -1 }, + { "v19", 16, -1 }, + { "v20", 16, -1 }, + { "v21", 16, -1 }, + { "v22", 16, -1 }, + { "v23", 16, -1 }, + { "v24", 16, -1 }, + { "v25", 16, -1 }, + { "v26", 16, -1 }, + { "v27", 16, -1 }, + { "v28", 16, -1 }, + { "v29", 16, -1 }, + { "v30", 16, -1 }, + { "v31", 16, -1 }, + { "fpsr", 4, -1 }, + { "fpcr", 4, -1 }, +}; + +char *dbg_get_reg(int regno, void *mem, struct pt_regs *regs) +{ + if (regno >= DBG_MAX_REG_NUM || regno < 0) + return NULL; + + if (dbg_reg_def[regno].offset != -1) + memcpy(mem, (void *)regs + dbg_reg_def[regno].offset, + dbg_reg_def[regno].size); + else + memset(mem, 0, dbg_reg_def[regno].size); + return dbg_reg_def[regno].name; +} + +int dbg_set_reg(int regno, void *mem, struct pt_regs *regs) +{ + if (regno >= DBG_MAX_REG_NUM || regno < 0) + return -EINVAL; + + if (dbg_reg_def[regno].offset != -1) + memcpy((void *)regs + dbg_reg_def[regno].offset, mem, + dbg_reg_def[regno].size); + return 0; +} + +void +sleeping_thread_to_gdb_regs(unsigned long *gdb_regs, struct task_struct *task) +{ + struct pt_regs *thread_regs; + + /* Initialize to zero */ + memset((char *)gdb_regs, 0, NUMREGBYTES); + thread_regs = task_pt_regs(task); + memcpy((void *)gdb_regs, (void *)thread_regs->regs, GP_REG_BYTES); +} + +void kgdb_arch_set_pc(struct pt_regs *regs, unsigned long pc) +{ + regs->pc = pc; +} + +static int compiled_break; + +static void kgdb_arch_update_addr(struct pt_regs *regs, + char *remcom_in_buffer) +{ + unsigned long addr; + char *ptr; + + ptr = &remcom_in_buffer[1]; + if (kgdb_hex2long(&ptr, &addr)) + kgdb_arch_set_pc(regs, addr); + else if (compiled_break == 1) + kgdb_arch_set_pc(regs, regs->pc + 4); + + compiled_break = 0; +} + +int kgdb_arch_handle_exception(int exception_vector, int signo, + int err_code, char *remcom_in_buffer, + char *remcom_out_buffer, + struct pt_regs *linux_regs) +{ + int err; + + switch (remcom_in_buffer[0]) { + case 'D': + case 'k': + /* + * Packet D (Detach), k (kill). No special handling + * is required here. Handle same as c packet. + */ + case 'c': + /* + * Packet c (Continue) to continue executing. + * Set pc to required address. + * Try to read optional parameter and set pc. + * If this was a compiled breakpoint, we need to move + * to the next instruction else we will just breakpoint + * over and over again. + */ + kgdb_arch_update_addr(linux_regs, remcom_in_buffer); + atomic_set(&kgdb_cpu_doing_single_step, -1); + kgdb_single_step = 0; + + /* + * Received continue command, disable single step + */ + if (kernel_active_single_step()) + kernel_disable_single_step(); + + err = 0; + break; + case 's': + /* + * Update step address value with address passed + * with step packet. + * On debug exception return PC is copied to ELR + * So just update PC. + * If no step address is passed, resume from the address + * pointed by PC. Do not update PC + */ + kgdb_arch_update_addr(linux_regs, remcom_in_buffer); + atomic_set(&kgdb_cpu_doing_single_step, raw_smp_processor_id()); + kgdb_single_step = 1; + + /* + * Enable single step handling + */ + if (!kernel_active_single_step()) + kernel_enable_single_step(linux_regs); + err = 0; + break; + default: + err = -1; + } + return err; +} + +static int kgdb_brk_fn(struct pt_regs *regs, unsigned int esr) +{ + kgdb_handle_exception(1, SIGTRAP, 0, regs); + return 0; +} + +static int kgdb_compiled_brk_fn(struct pt_regs *regs, unsigned int esr) +{ + compiled_break = 1; + kgdb_handle_exception(1, SIGTRAP, 0, regs); + + return 0; +} + +static int kgdb_step_brk_fn(struct pt_regs *regs, unsigned int esr) +{ + kgdb_handle_exception(1, SIGTRAP, 0, regs); + return 0; +} + +static struct break_hook kgdb_brkpt_hook = { + .esr_mask = 0xffffffff, + .esr_val = DBG_ESR_VAL_BRK(KGDB_DYN_DGB_BRK_IMM), + .fn = kgdb_brk_fn +}; + +static struct break_hook kgdb_compiled_brkpt_hook = { + .esr_mask = 0xffffffff, + .esr_val = DBG_ESR_VAL_BRK(KDBG_COMPILED_DBG_BRK_IMM), + .fn = kgdb_compiled_brk_fn +}; + +static struct step_hook kgdb_step_hook = { + .fn = kgdb_step_brk_fn +}; + +static void kgdb_call_nmi_hook(void *ignored) +{ + kgdb_nmicallback(raw_smp_processor_id(), get_irq_regs()); +} + +void kgdb_roundup_cpus(unsigned long flags) +{ + local_irq_enable(); + smp_call_function(kgdb_call_nmi_hook, NULL, 0); + local_irq_disable(); +} + +static int __kgdb_notify(struct die_args *args, unsigned long cmd) +{ + struct pt_regs *regs = args->regs; + + if (kgdb_handle_exception(1, args->signr, cmd, regs)) + return NOTIFY_DONE; + return NOTIFY_STOP; +} + +static int +kgdb_notify(struct notifier_block *self, unsigned long cmd, void *ptr) +{ + unsigned long flags; + int ret; + + local_irq_save(flags); + ret = __kgdb_notify(ptr, cmd); + local_irq_restore(flags); + + return ret; +} + +static struct notifier_block kgdb_notifier = { + .notifier_call = kgdb_notify, + /* + * Want to be lowest priority + */ + .priority = -INT_MAX, +}; + +/* + * kgdb_arch_init - Perform any architecture specific initalization. + * This function will handle the initalization of any architecture + * specific callbacks. + */ +int kgdb_arch_init(void) +{ + int ret = register_die_notifier(&kgdb_notifier); + + if (ret != 0) + return ret; + + register_break_hook(&kgdb_brkpt_hook); + register_break_hook(&kgdb_compiled_brkpt_hook); + register_step_hook(&kgdb_step_hook); + return 0; +} + +/* + * kgdb_arch_exit - Perform any architecture specific uninitalization. + * This function will handle the uninitalization of any architecture + * specific callbacks, for dynamic registration and unregistration. + */ +void kgdb_arch_exit(void) +{ + unregister_break_hook(&kgdb_brkpt_hook); + unregister_break_hook(&kgdb_compiled_brkpt_hook); + unregister_step_hook(&kgdb_step_hook); + unregister_die_notifier(&kgdb_notifier); +} + +/* + * ARM instructions are always in LE. + * Break instruction is encoded in LE format + */ +struct kgdb_arch arch_kgdb_ops = { + .gdb_bpt_instr = { + KGDB_DYN_BRK_INS_BYTE0, + KGDB_DYN_BRK_INS_BYTE1, + KGDB_DYN_BRK_INS_BYTE2, + KGDB_DYN_BRK_INS_BYTE3, + } +}; diff --git a/arch/arm64/kernel/kuser32.S b/arch/arm64/kernel/kuser32.S index 8b69ecb1d8b..7787208e8cc 100644 --- a/arch/arm64/kernel/kuser32.S +++ b/arch/arm64/kernel/kuser32.S @@ -27,6 +27,9 @@ * * See Documentation/arm/kernel_user_helpers.txt for formal definitions. */ + +#include <asm/unistd32.h> + .align 5 .globl __kuser_helper_start __kuser_helper_start: @@ -35,33 +38,32 @@ __kuser_cmpxchg64: // 0xffff0f60 .inst 0xe92d00f0 // push {r4, r5, r6, r7} .inst 0xe1c040d0 // ldrd r4, r5, [r0] .inst 0xe1c160d0 // ldrd r6, r7, [r1] - .inst 0xf57ff05f // dmb sy .inst 0xe1b20f9f // 1: ldrexd r0, r1, [r2] .inst 0xe0303004 // eors r3, r0, r4 .inst 0x00313005 // eoreqs r3, r1, r5 - .inst 0x01a23f96 // strexdeq r3, r6, [r2] + .inst 0x01a23e96 // stlexdeq r3, r6, [r2] .inst 0x03330001 // teqeq r3, #1 .inst 0x0afffff9 // beq 1b - .inst 0xf57ff05f // dmb sy + .inst 0xf57ff05b // dmb ish .inst 0xe2730000 // rsbs r0, r3, #0 .inst 0xe8bd00f0 // pop {r4, r5, r6, r7} .inst 0xe12fff1e // bx lr .align 5 __kuser_memory_barrier: // 0xffff0fa0 - .inst 0xf57ff05f // dmb sy + .inst 0xf57ff05b // dmb ish .inst 0xe12fff1e // bx lr .align 5 __kuser_cmpxchg: // 0xffff0fc0 - .inst 0xf57ff05f // dmb sy .inst 0xe1923f9f // 1: ldrex r3, [r2] .inst 0xe0533000 // subs r3, r3, r0 - .inst 0x01823f91 // strexeq r3, r1, [r2] + .inst 0x01823e91 // stlexeq r3, r1, [r2] .inst 0x03330001 // teqeq r3, #1 .inst 0x0afffffa // beq 1b + .inst 0xf57ff05b // dmb ish .inst 0xe2730000 // rsbs r0, r3, #0 - .inst 0xeaffffef // b <__kuser_memory_barrier> + .inst 0xe12fff1e // bx lr .align 5 __kuser_get_tls: // 0xffff0fe0 @@ -75,3 +77,42 @@ __kuser_helper_version: // 0xffff0ffc .word ((__kuser_helper_end - __kuser_helper_start) >> 5) .globl __kuser_helper_end __kuser_helper_end: + +/* + * AArch32 sigreturn code + * + * For ARM syscalls, the syscall number has to be loaded into r7. + * We do not support an OABI userspace. + * + * For Thumb syscalls, we also pass the syscall number via r7. We therefore + * need two 16-bit instructions. + */ + .globl __aarch32_sigret_code_start +__aarch32_sigret_code_start: + + /* + * ARM Code + */ + .byte __NR_compat_sigreturn, 0x70, 0xa0, 0xe3 // mov r7, #__NR_compat_sigreturn + .byte __NR_compat_sigreturn, 0x00, 0x00, 0xef // svc #__NR_compat_sigreturn + + /* + * Thumb code + */ + .byte __NR_compat_sigreturn, 0x27 // svc #__NR_compat_sigreturn + .byte __NR_compat_sigreturn, 0xdf // mov r7, #__NR_compat_sigreturn + + /* + * ARM code + */ + .byte __NR_compat_rt_sigreturn, 0x70, 0xa0, 0xe3 // mov r7, #__NR_compat_rt_sigreturn + .byte __NR_compat_rt_sigreturn, 0x00, 0x00, 0xef // svc #__NR_compat_rt_sigreturn + + /* + * Thumb code + */ + .byte __NR_compat_rt_sigreturn, 0x27 // svc #__NR_compat_rt_sigreturn + .byte __NR_compat_rt_sigreturn, 0xdf // mov r7, #__NR_compat_rt_sigreturn + + .globl __aarch32_sigret_code_end +__aarch32_sigret_code_end: diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index ca0e3d55da9..1eb1cc95513 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -25,11 +25,15 @@ #include <linux/mm.h> #include <linux/moduleloader.h> #include <linux/vmalloc.h> +#include <asm/insn.h> + +#define AARCH64_INSN_IMM_MOVNZ AARCH64_INSN_IMM_MAX +#define AARCH64_INSN_IMM_MOVK AARCH64_INSN_IMM_16 void *module_alloc(unsigned long size) { return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, - GFP_KERNEL, PAGE_KERNEL_EXEC, -1, + GFP_KERNEL, PAGE_KERNEL_EXEC, NUMA_NO_NODE, __builtin_return_address(0)); } @@ -94,25 +98,18 @@ static int reloc_data(enum aarch64_reloc_op op, void *place, u64 val, int len) return 0; } -enum aarch64_imm_type { - INSN_IMM_MOVNZ, - INSN_IMM_MOVK, - INSN_IMM_ADR, - INSN_IMM_26, - INSN_IMM_19, - INSN_IMM_16, - INSN_IMM_14, - INSN_IMM_12, - INSN_IMM_9, -}; - -static u32 encode_insn_immediate(enum aarch64_imm_type type, u32 insn, u64 imm) +static int reloc_insn_movw(enum aarch64_reloc_op op, void *place, u64 val, + int lsb, enum aarch64_insn_imm_type imm_type) { - u32 immlo, immhi, lomask, himask, mask; - int shift; + u64 imm, limit = 0; + s64 sval; + u32 insn = le32_to_cpu(*(u32 *)place); + + sval = do_reloc(op, place, val); + sval >>= lsb; + imm = sval & 0xffff; - switch (type) { - case INSN_IMM_MOVNZ: + if (imm_type == AARCH64_INSN_IMM_MOVNZ) { /* * For signed MOVW relocations, we have to manipulate the * instruction encoding depending on whether or not the @@ -131,70 +128,12 @@ static u32 encode_insn_immediate(enum aarch64_imm_type type, u32 insn, u64 imm) */ imm = ~imm; } - case INSN_IMM_MOVK: - mask = BIT(16) - 1; - shift = 5; - break; - case INSN_IMM_ADR: - lomask = 0x3; - himask = 0x7ffff; - immlo = imm & lomask; - imm >>= 2; - immhi = imm & himask; - imm = (immlo << 24) | (immhi); - mask = (lomask << 24) | (himask); - shift = 5; - break; - case INSN_IMM_26: - mask = BIT(26) - 1; - shift = 0; - break; - case INSN_IMM_19: - mask = BIT(19) - 1; - shift = 5; - break; - case INSN_IMM_16: - mask = BIT(16) - 1; - shift = 5; - break; - case INSN_IMM_14: - mask = BIT(14) - 1; - shift = 5; - break; - case INSN_IMM_12: - mask = BIT(12) - 1; - shift = 10; - break; - case INSN_IMM_9: - mask = BIT(9) - 1; - shift = 12; - break; - default: - pr_err("encode_insn_immediate: unknown immediate encoding %d\n", - type); - return 0; + imm_type = AARCH64_INSN_IMM_MOVK; } - /* Update the immediate field. */ - insn &= ~(mask << shift); - insn |= (imm & mask) << shift; - - return insn; -} - -static int reloc_insn_movw(enum aarch64_reloc_op op, void *place, u64 val, - int lsb, enum aarch64_imm_type imm_type) -{ - u64 imm, limit = 0; - s64 sval; - u32 insn = *(u32 *)place; - - sval = do_reloc(op, place, val); - sval >>= lsb; - imm = sval & 0xffff; - /* Update the instruction with the new encoding. */ - *(u32 *)place = encode_insn_immediate(imm_type, insn, imm); + insn = aarch64_insn_encode_immediate(imm_type, insn, imm); + *(u32 *)place = cpu_to_le32(insn); /* Shift out the immediate field. */ sval >>= 16; @@ -203,9 +142,9 @@ static int reloc_insn_movw(enum aarch64_reloc_op op, void *place, u64 val, * For unsigned immediates, the overflow check is straightforward. * For signed immediates, the sign bit is actually the bit past the * most significant bit of the field. - * The INSN_IMM_16 immediate type is unsigned. + * The AARCH64_INSN_IMM_16 immediate type is unsigned. */ - if (imm_type != INSN_IMM_16) { + if (imm_type != AARCH64_INSN_IMM_16) { sval++; limit++; } @@ -218,11 +157,11 @@ static int reloc_insn_movw(enum aarch64_reloc_op op, void *place, u64 val, } static int reloc_insn_imm(enum aarch64_reloc_op op, void *place, u64 val, - int lsb, int len, enum aarch64_imm_type imm_type) + int lsb, int len, enum aarch64_insn_imm_type imm_type) { u64 imm, imm_mask; s64 sval; - u32 insn = *(u32 *)place; + u32 insn = le32_to_cpu(*(u32 *)place); /* Calculate the relocation value. */ sval = do_reloc(op, place, val); @@ -233,7 +172,8 @@ static int reloc_insn_imm(enum aarch64_reloc_op op, void *place, u64 val, imm = sval & imm_mask; /* Update the instruction's immediate field. */ - *(u32 *)place = encode_insn_immediate(imm_type, insn, imm); + insn = aarch64_insn_encode_immediate(imm_type, insn, imm); + *(u32 *)place = cpu_to_le32(insn); /* * Extract the upper value bits (including the sign bit) and @@ -315,125 +255,125 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, overflow_check = false; case R_AARCH64_MOVW_UABS_G0: ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 0, - INSN_IMM_16); + AARCH64_INSN_IMM_16); break; case R_AARCH64_MOVW_UABS_G1_NC: overflow_check = false; case R_AARCH64_MOVW_UABS_G1: ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 16, - INSN_IMM_16); + AARCH64_INSN_IMM_16); break; case R_AARCH64_MOVW_UABS_G2_NC: overflow_check = false; case R_AARCH64_MOVW_UABS_G2: ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 32, - INSN_IMM_16); + AARCH64_INSN_IMM_16); break; case R_AARCH64_MOVW_UABS_G3: /* We're using the top bits so we can't overflow. */ overflow_check = false; ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 48, - INSN_IMM_16); + AARCH64_INSN_IMM_16); break; case R_AARCH64_MOVW_SABS_G0: ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 0, - INSN_IMM_MOVNZ); + AARCH64_INSN_IMM_MOVNZ); break; case R_AARCH64_MOVW_SABS_G1: ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 16, - INSN_IMM_MOVNZ); + AARCH64_INSN_IMM_MOVNZ); break; case R_AARCH64_MOVW_SABS_G2: ovf = reloc_insn_movw(RELOC_OP_ABS, loc, val, 32, - INSN_IMM_MOVNZ); + AARCH64_INSN_IMM_MOVNZ); break; case R_AARCH64_MOVW_PREL_G0_NC: overflow_check = false; ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 0, - INSN_IMM_MOVK); + AARCH64_INSN_IMM_MOVK); break; case R_AARCH64_MOVW_PREL_G0: ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 0, - INSN_IMM_MOVNZ); + AARCH64_INSN_IMM_MOVNZ); break; case R_AARCH64_MOVW_PREL_G1_NC: overflow_check = false; ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 16, - INSN_IMM_MOVK); + AARCH64_INSN_IMM_MOVK); break; case R_AARCH64_MOVW_PREL_G1: ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 16, - INSN_IMM_MOVNZ); + AARCH64_INSN_IMM_MOVNZ); break; case R_AARCH64_MOVW_PREL_G2_NC: overflow_check = false; ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 32, - INSN_IMM_MOVK); + AARCH64_INSN_IMM_MOVK); break; case R_AARCH64_MOVW_PREL_G2: ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 32, - INSN_IMM_MOVNZ); + AARCH64_INSN_IMM_MOVNZ); break; case R_AARCH64_MOVW_PREL_G3: /* We're using the top bits so we can't overflow. */ overflow_check = false; ovf = reloc_insn_movw(RELOC_OP_PREL, loc, val, 48, - INSN_IMM_MOVNZ); + AARCH64_INSN_IMM_MOVNZ); break; /* Immediate instruction relocations. */ case R_AARCH64_LD_PREL_LO19: ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 19, - INSN_IMM_19); + AARCH64_INSN_IMM_19); break; case R_AARCH64_ADR_PREL_LO21: ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 0, 21, - INSN_IMM_ADR); + AARCH64_INSN_IMM_ADR); break; case R_AARCH64_ADR_PREL_PG_HI21_NC: overflow_check = false; case R_AARCH64_ADR_PREL_PG_HI21: ovf = reloc_insn_imm(RELOC_OP_PAGE, loc, val, 12, 21, - INSN_IMM_ADR); + AARCH64_INSN_IMM_ADR); break; case R_AARCH64_ADD_ABS_LO12_NC: case R_AARCH64_LDST8_ABS_LO12_NC: overflow_check = false; ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 0, 12, - INSN_IMM_12); + AARCH64_INSN_IMM_12); break; case R_AARCH64_LDST16_ABS_LO12_NC: overflow_check = false; ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 1, 11, - INSN_IMM_12); + AARCH64_INSN_IMM_12); break; case R_AARCH64_LDST32_ABS_LO12_NC: overflow_check = false; ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 2, 10, - INSN_IMM_12); + AARCH64_INSN_IMM_12); break; case R_AARCH64_LDST64_ABS_LO12_NC: overflow_check = false; ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 3, 9, - INSN_IMM_12); + AARCH64_INSN_IMM_12); break; case R_AARCH64_LDST128_ABS_LO12_NC: overflow_check = false; ovf = reloc_insn_imm(RELOC_OP_ABS, loc, val, 4, 8, - INSN_IMM_12); + AARCH64_INSN_IMM_12); break; case R_AARCH64_TSTBR14: ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 14, - INSN_IMM_14); + AARCH64_INSN_IMM_14); break; case R_AARCH64_CONDBR19: ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 19, - INSN_IMM_19); + AARCH64_INSN_IMM_19); break; case R_AARCH64_JUMP26: case R_AARCH64_CALL26: ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 2, 26, - INSN_IMM_26); + AARCH64_INSN_IMM_26); break; default: diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index f7073c7b1ca..baf5afb7e6a 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -22,6 +22,7 @@ #include <linux/bitmap.h> #include <linux/interrupt.h> +#include <linux/irq.h> #include <linux/kernel.h> #include <linux/export.h> #include <linux/perf_event.h> @@ -107,7 +108,12 @@ armpmu_map_cache_event(const unsigned (*cache_map) static int armpmu_map_event(const unsigned (*event_map)[PERF_COUNT_HW_MAX], u64 config) { - int mapping = (*event_map)[config]; + int mapping; + + if (config >= PERF_COUNT_HW_MAX) + return -EINVAL; + + mapping = (*event_map)[config]; return mapping == HW_OP_UNSUPPORTED ? -ENOENT : mapping; } @@ -317,7 +323,13 @@ validate_event(struct pmu_hw_events *hw_events, struct hw_perf_event fake_event = event->hw; struct pmu *leader_pmu = event->group_leader->pmu; - if (event->pmu != leader_pmu || event->state <= PERF_EVENT_STATE_OFF) + if (is_software_event(event)) + return 1; + + if (event->pmu != leader_pmu || event->state < PERF_EVENT_STATE_OFF) + return 1; + + if (event->state == PERF_EVENT_STATE_OFF && !event->attr.enable_on_exec) return 1; return armpmu->get_event_idx(hw_events, &fake_event) >= 0; @@ -352,26 +364,53 @@ validate_group(struct perf_event *event) } static void +armpmu_disable_percpu_irq(void *data) +{ + unsigned int irq = *(unsigned int *)data; + disable_percpu_irq(irq); +} + +static void armpmu_release_hardware(struct arm_pmu *armpmu) { - int i, irq, irqs; + int irq; + unsigned int i, irqs; struct platform_device *pmu_device = armpmu->plat_device; irqs = min(pmu_device->num_resources, num_possible_cpus()); + if (!irqs) + return; - for (i = 0; i < irqs; ++i) { - if (!cpumask_test_and_clear_cpu(i, &armpmu->active_irqs)) - continue; - irq = platform_get_irq(pmu_device, i); - if (irq >= 0) - free_irq(irq, armpmu); + irq = platform_get_irq(pmu_device, 0); + if (irq <= 0) + return; + + if (irq_is_percpu(irq)) { + on_each_cpu(armpmu_disable_percpu_irq, &irq, 1); + free_percpu_irq(irq, &cpu_hw_events); + } else { + for (i = 0; i < irqs; ++i) { + if (!cpumask_test_and_clear_cpu(i, &armpmu->active_irqs)) + continue; + irq = platform_get_irq(pmu_device, i); + if (irq > 0) + free_irq(irq, armpmu); + } } } +static void +armpmu_enable_percpu_irq(void *data) +{ + unsigned int irq = *(unsigned int *)data; + enable_percpu_irq(irq, IRQ_TYPE_NONE); +} + static int armpmu_reserve_hardware(struct arm_pmu *armpmu) { - int i, err, irq, irqs; + int err, irq; + unsigned int i, irqs; struct platform_device *pmu_device = armpmu->plat_device; if (!pmu_device) { @@ -380,39 +419,59 @@ armpmu_reserve_hardware(struct arm_pmu *armpmu) } irqs = min(pmu_device->num_resources, num_possible_cpus()); - if (irqs < 1) { + if (!irqs) { pr_err("no irqs for PMUs defined\n"); return -ENODEV; } - for (i = 0; i < irqs; ++i) { - err = 0; - irq = platform_get_irq(pmu_device, i); - if (irq < 0) - continue; + irq = platform_get_irq(pmu_device, 0); + if (irq <= 0) { + pr_err("failed to get valid irq for PMU device\n"); + return -ENODEV; + } - /* - * If we have a single PMU interrupt that we can't shift, - * assume that we're running on a uniprocessor machine and - * continue. Otherwise, continue without this interrupt. - */ - if (irq_set_affinity(irq, cpumask_of(i)) && irqs > 1) { - pr_warning("unable to set irq affinity (irq=%d, cpu=%u)\n", - irq, i); - continue; - } + if (irq_is_percpu(irq)) { + err = request_percpu_irq(irq, armpmu->handle_irq, + "arm-pmu", &cpu_hw_events); - err = request_irq(irq, armpmu->handle_irq, - IRQF_NOBALANCING, - "arm-pmu", armpmu); if (err) { - pr_err("unable to request IRQ%d for ARM PMU counters\n", - irq); + pr_err("unable to request percpu IRQ%d for ARM PMU counters\n", + irq); armpmu_release_hardware(armpmu); return err; } - cpumask_set_cpu(i, &armpmu->active_irqs); + on_each_cpu(armpmu_enable_percpu_irq, &irq, 1); + } else { + for (i = 0; i < irqs; ++i) { + err = 0; + irq = platform_get_irq(pmu_device, i); + if (irq <= 0) + continue; + + /* + * If we have a single PMU interrupt that we can't shift, + * assume that we're running on a uniprocessor machine and + * continue. Otherwise, continue without this interrupt. + */ + if (irq_set_affinity(irq, cpumask_of(i)) && irqs > 1) { + pr_warning("unable to set irq affinity (irq=%d, cpu=%u)\n", + irq, i); + continue; + } + + err = request_irq(irq, armpmu->handle_irq, + IRQF_NOBALANCING, + "arm-pmu", armpmu); + if (err) { + pr_err("unable to request IRQ%d for ARM PMU counters\n", + irq); + armpmu_release_hardware(armpmu); + return err; + } + + cpumask_set_cpu(i, &armpmu->active_irqs); + } } return 0; @@ -773,8 +832,8 @@ static const unsigned armv8_pmuv3_perf_cache_map[PERF_COUNT_HW_CACHE_MAX] /* * PMXEVTYPER: Event selection reg */ -#define ARMV8_EVTYPE_MASK 0xc00000ff /* Mask for writable bits */ -#define ARMV8_EVTYPE_EVENT 0xff /* Mask for EVENT bits */ +#define ARMV8_EVTYPE_MASK 0xc80003ff /* Mask for writable bits */ +#define ARMV8_EVTYPE_EVENT 0x3ff /* Mask for EVENT bits */ /* * Event filters for PMUv3 @@ -1033,7 +1092,7 @@ static irqreturn_t armv8pmu_handle_irq(int irq_num, void *dev) */ regs = get_irq_regs(); - cpuc = &__get_cpu_var(cpu_hw_events); + cpuc = this_cpu_ptr(&cpu_hw_events); for (idx = 0; idx < cpu_pmu->num_events; ++idx) { struct perf_event *event = cpuc->events[idx]; struct hw_perf_event *hwc; @@ -1164,7 +1223,8 @@ static void armv8pmu_reset(void *info) static int armv8_pmuv3_map_event(struct perf_event *event) { return map_cpu_event(event, &armv8_pmuv3_perf_map, - &armv8_pmuv3_perf_cache_map, 0xFF); + &armv8_pmuv3_perf_cache_map, + ARMV8_EVTYPE_EVENT); } static struct arm_pmu armv8pmu = { @@ -1246,7 +1306,7 @@ device_initcall(register_pmu_driver); static struct pmu_hw_events *armpmu_get_cpu_events(void) { - return &__get_cpu_var(cpu_hw_events); + return this_cpu_ptr(&cpu_hw_events); } static void __init cpu_pmu_init(struct arm_pmu *armpmu) @@ -1288,8 +1348,8 @@ early_initcall(init_hw_perf_events); * Callchain handling code. */ struct frame_tail { - struct frame_tail __user *fp; - unsigned long lr; + struct frame_tail __user *fp; + unsigned long lr; } __attribute__((packed)); /* @@ -1326,16 +1386,84 @@ user_backtrace(struct frame_tail __user *tail, return buftail.fp; } +#ifdef CONFIG_COMPAT +/* + * The registers we're interested in are at the end of the variable + * length saved register structure. The fp points at the end of this + * structure so the address of this struct is: + * (struct compat_frame_tail *)(xxx->fp)-1 + * + * This code has been adapted from the ARM OProfile support. + */ +struct compat_frame_tail { + compat_uptr_t fp; /* a (struct compat_frame_tail *) in compat mode */ + u32 sp; + u32 lr; +} __attribute__((packed)); + +static struct compat_frame_tail __user * +compat_user_backtrace(struct compat_frame_tail __user *tail, + struct perf_callchain_entry *entry) +{ + struct compat_frame_tail buftail; + unsigned long err; + + /* Also check accessibility of one struct frame_tail beyond */ + if (!access_ok(VERIFY_READ, tail, sizeof(buftail))) + return NULL; + + pagefault_disable(); + err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail)); + pagefault_enable(); + + if (err) + return NULL; + + perf_callchain_store(entry, buftail.lr); + + /* + * Frame pointers should strictly progress back up the stack + * (towards higher addresses). + */ + if (tail + 1 >= (struct compat_frame_tail __user *) + compat_ptr(buftail.fp)) + return NULL; + + return (struct compat_frame_tail __user *)compat_ptr(buftail.fp) - 1; +} +#endif /* CONFIG_COMPAT */ + void perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs) { - struct frame_tail __user *tail; + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + /* We don't support guest os callchain now */ + return; + } + + perf_callchain_store(entry, regs->pc); - tail = (struct frame_tail __user *)regs->regs[29]; + if (!compat_user_mode(regs)) { + /* AARCH64 mode */ + struct frame_tail __user *tail; - while (entry->nr < PERF_MAX_STACK_DEPTH && - tail && !((unsigned long)tail & 0xf)) - tail = user_backtrace(tail, entry); + tail = (struct frame_tail __user *)regs->regs[29]; + + while (entry->nr < PERF_MAX_STACK_DEPTH && + tail && !((unsigned long)tail & 0xf)) + tail = user_backtrace(tail, entry); + } else { +#ifdef CONFIG_COMPAT + /* AARCH32 compat mode */ + struct compat_frame_tail __user *tail; + + tail = (struct compat_frame_tail __user *)regs->compat_fp - 1; + + while ((entry->nr < PERF_MAX_STACK_DEPTH) && + tail && !((unsigned long)tail & 0x3)) + tail = compat_user_backtrace(tail, entry); +#endif + } } /* @@ -1355,8 +1483,41 @@ void perf_callchain_kernel(struct perf_callchain_entry *entry, { struct stackframe frame; + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + /* We don't support guest os callchain now */ + return; + } + frame.fp = regs->regs[29]; frame.sp = regs->sp; frame.pc = regs->pc; + walk_stackframe(&frame, callchain_trace, entry); } + +unsigned long perf_instruction_pointer(struct pt_regs *regs) +{ + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) + return perf_guest_cbs->get_guest_ip(); + + return instruction_pointer(regs); +} + +unsigned long perf_misc_flags(struct pt_regs *regs) +{ + int misc = 0; + + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + if (perf_guest_cbs->is_user_mode()) + misc |= PERF_RECORD_MISC_GUEST_USER; + else + misc |= PERF_RECORD_MISC_GUEST_KERNEL; + } else { + if (user_mode(regs)) + misc |= PERF_RECORD_MISC_USER; + else + misc |= PERF_RECORD_MISC_KERNEL; + } + + return misc; +} diff --git a/arch/arm64/kernel/perf_regs.c b/arch/arm64/kernel/perf_regs.c new file mode 100644 index 00000000000..422ebd63b61 --- /dev/null +++ b/arch/arm64/kernel/perf_regs.c @@ -0,0 +1,46 @@ +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/perf_event.h> +#include <linux/bug.h> + +#include <asm/compat.h> +#include <asm/perf_regs.h> +#include <asm/ptrace.h> + +u64 perf_reg_value(struct pt_regs *regs, int idx) +{ + if (WARN_ON_ONCE((u32)idx >= PERF_REG_ARM64_MAX)) + return 0; + + /* + * Compat (i.e. 32 bit) mode: + * - PC has been set in the pt_regs struct in kernel_entry, + * - Handle SP and LR here. + */ + if (compat_user_mode(regs)) { + if ((u32)idx == PERF_REG_ARM64_SP) + return regs->compat_sp; + if ((u32)idx == PERF_REG_ARM64_LR) + return regs->compat_lr; + } + + return regs->regs[idx]; +} + +#define REG_RESERVED (~((1ULL << PERF_REG_ARM64_MAX) - 1)) + +int perf_reg_validate(u64 mask) +{ + if (!mask || mask & REG_RESERVED) + return -EINVAL; + + return 0; +} + +u64 perf_reg_abi(struct task_struct *task) +{ + if (is_compat_thread(task_thread_info(task))) + return PERF_SAMPLE_REGS_ABI_32; + else + return PERF_SAMPLE_REGS_ABI_64; +} diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index cb0956bc96e..43b7c34f92c 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -20,6 +20,7 @@ #include <stdarg.h> +#include <linux/compat.h> #include <linux/export.h> #include <linux/sched.h> #include <linux/kernel.h> @@ -45,9 +46,10 @@ #include <asm/compat.h> #include <asm/cacheflush.h> +#include <asm/fpsimd.h> +#include <asm/mmu_context.h> #include <asm/processor.h> #include <asm/stacktrace.h> -#include <asm/fpsimd.h> static void setup_restart(void) { @@ -70,8 +72,17 @@ static void setup_restart(void) void soft_restart(unsigned long addr) { + typedef void (*phys_reset_t)(unsigned long); + phys_reset_t phys_reset; + setup_restart(); - cpu_reset(addr); + + /* Switch to the identity mapping */ + phys_reset = (phys_reset_t)virt_to_phys(cpu_reset); + phys_reset(addr); + + /* Should never get here */ + BUG(); } /* @@ -80,14 +91,13 @@ void soft_restart(unsigned long addr) void (*pm_power_off)(void); EXPORT_SYMBOL_GPL(pm_power_off); -void (*pm_restart)(const char *cmd); -EXPORT_SYMBOL_GPL(pm_restart); - +void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd); +EXPORT_SYMBOL_GPL(arm_pm_restart); /* * This is our default idle handler. */ -static void default_idle(void) +void arch_cpu_idle(void) { /* * This should do all the clock switching and wait for interrupt @@ -97,79 +107,73 @@ static void default_idle(void) local_irq_enable(); } -void (*pm_idle)(void) = default_idle; -EXPORT_SYMBOL_GPL(pm_idle); - -/* - * The idle thread, has rather strange semantics for calling pm_idle, - * but this is what x86 does and we need to do the same, so that - * things like cpuidle get called in the same way. The only difference - * is that we always respect 'hlt_counter' to prevent low power idle. - */ -void cpu_idle(void) +#ifdef CONFIG_HOTPLUG_CPU +void arch_cpu_idle_dead(void) { - local_fiq_enable(); - - /* endless idle loop with no priority at all */ - while (1) { - tick_nohz_idle_enter(); - rcu_idle_enter(); - while (!need_resched()) { - /* - * We need to disable interrupts here to ensure - * we don't miss a wakeup call. - */ - local_irq_disable(); - if (!need_resched()) { - stop_critical_timings(); - pm_idle(); - start_critical_timings(); - /* - * pm_idle functions should always return - * with IRQs enabled. - */ - WARN_ON(irqs_disabled()); - } else { - local_irq_enable(); - } - } - rcu_idle_exit(); - tick_nohz_idle_exit(); - schedule_preempt_disabled(); - } + cpu_die(); } +#endif +/* + * Called by kexec, immediately prior to machine_kexec(). + * + * This must completely disable all secondary CPUs; simply causing those CPUs + * to execute e.g. a RAM-based pin loop is not sufficient. This allows the + * kexec'd kernel to use any and all RAM as it sees fit, without having to + * avoid any code or data used by any SW CPU pin loop. The CPU hotplug + * functionality embodied in disable_nonboot_cpus() to achieve this. + */ void machine_shutdown(void) { -#ifdef CONFIG_SMP - smp_send_stop(); -#endif + disable_nonboot_cpus(); } +/* + * Halting simply requires that the secondary CPUs stop performing any + * activity (executing tasks, handling interrupts). smp_send_stop() + * achieves this. + */ void machine_halt(void) { - machine_shutdown(); + local_irq_disable(); + smp_send_stop(); while (1); } +/* + * Power-off simply requires that the secondary CPUs stop performing any + * activity (executing tasks, handling interrupts). smp_send_stop() + * achieves this. When the system power is turned off, it will take all CPUs + * with it. + */ void machine_power_off(void) { - machine_shutdown(); + local_irq_disable(); + smp_send_stop(); if (pm_power_off) pm_power_off(); } +/* + * Restart requires that the secondary CPUs stop performing any activity + * while the primary CPU resets the system. Systems with a single CPU can + * use soft_restart() as their machine descriptor's .restart hook, since that + * will cause the only available CPU to reset. Systems with multiple CPUs must + * provide a HW restart implementation, to ensure that all CPUs reset at once. + * This is required so that any code running after reset on the primary CPU + * doesn't have to co-ordinate with other CPUs to ensure they aren't still + * executing pre-reset code, and using RAM that the primary CPU's code wishes + * to use. Implementing such co-ordination would be essentially impossible. + */ void machine_restart(char *cmd) { - machine_shutdown(); - /* Disable interrupts first */ local_irq_disable(); - local_fiq_disable(); + smp_send_stop(); /* Now call the architecture specific reboot code. */ - if (pm_restart) - pm_restart(cmd); + if (arm_pm_restart) + arm_pm_restart(reboot_mode, cmd); /* * Whoops - the architecture was unable to reboot. @@ -180,19 +184,26 @@ void machine_restart(char *cmd) void __show_regs(struct pt_regs *regs) { - int i; + int i, top_reg; + u64 lr, sp; - printk("CPU: %d %s (%s %.*s)\n", - raw_smp_processor_id(), print_tainted(), - init_utsname()->release, - (int)strcspn(init_utsname()->version, " "), - init_utsname()->version); + if (compat_user_mode(regs)) { + lr = regs->compat_lr; + sp = regs->compat_sp; + top_reg = 12; + } else { + lr = regs->regs[30]; + sp = regs->sp; + top_reg = 29; + } + + show_regs_print_info(KERN_DEFAULT); print_symbol("PC is at %s\n", instruction_pointer(regs)); - print_symbol("LR is at %s\n", regs->regs[30]); + print_symbol("LR is at %s\n", lr); printk("pc : [<%016llx>] lr : [<%016llx>] pstate: %08llx\n", - regs->pc, regs->regs[30], regs->pstate); - printk("sp : %016llx\n", regs->sp); - for (i = 29; i >= 0; i--) { + regs->pc, lr, regs->pstate); + printk("sp : %016llx\n", sp); + for (i = top_reg; i >= 0; i--) { printk("x%-2d: %016llx ", i, regs->regs[i]); if (i % 2 == 0) printk("\n"); @@ -203,7 +214,6 @@ void __show_regs(struct pt_regs *regs) void show_regs(struct pt_regs * regs) { printk("\n"); - printk("Pid: %d, comm: %20s\n", task_pid_nr(current), current->comm); __show_regs(regs); } @@ -226,7 +236,7 @@ void release_thread(struct task_struct *dead_task) int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) { - fpsimd_save_state(¤t->thread.fpsimd_state); + fpsimd_preserve_current_state(); *dst = *src; return 0; } @@ -315,6 +325,13 @@ struct task_struct *__switch_to(struct task_struct *prev, fpsimd_thread_switch(next); tls_thread_switch(next); hw_breakpoint_thread_switch(next); + contextidr_thread_switch(next); + + /* + * Complete any pending TLB or cache maintenance on this CPU in case + * the thread migrates to a different CPU. + */ + dsb(ish); /* the actual thread switch */ last = cpu_switch_to(prev, next); @@ -325,6 +342,7 @@ struct task_struct *__switch_to(struct task_struct *prev, unsigned long get_wchan(struct task_struct *p) { struct stackframe frame; + unsigned long stack_page; int count = 0; if (!p || p == current || p->state == TASK_RUNNING) return 0; @@ -332,9 +350,11 @@ unsigned long get_wchan(struct task_struct *p) frame.fp = thread_saved_fp(p); frame.sp = thread_saved_sp(p); frame.pc = thread_saved_pc(p); + stack_page = (unsigned long)task_stack_page(p); do { - int ret = unwind_frame(&frame); - if (ret < 0) + if (frame.sp < stack_page || + frame.sp >= stack_page + THREAD_SIZE || + unwind_frame(&frame)) return 0; if (!in_sched_functions(frame.pc)) return frame.pc; diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c new file mode 100644 index 00000000000..9e9798f9117 --- /dev/null +++ b/arch/arm64/kernel/psci.c @@ -0,0 +1,450 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * Copyright (C) 2013 ARM Limited + * + * Author: Will Deacon <will.deacon@arm.com> + */ + +#define pr_fmt(fmt) "psci: " fmt + +#include <linux/init.h> +#include <linux/of.h> +#include <linux/smp.h> +#include <linux/reboot.h> +#include <linux/pm.h> +#include <linux/delay.h> +#include <uapi/linux/psci.h> + +#include <asm/compiler.h> +#include <asm/cpu_ops.h> +#include <asm/errno.h> +#include <asm/psci.h> +#include <asm/smp_plat.h> +#include <asm/system_misc.h> + +#define PSCI_POWER_STATE_TYPE_STANDBY 0 +#define PSCI_POWER_STATE_TYPE_POWER_DOWN 1 + +struct psci_power_state { + u16 id; + u8 type; + u8 affinity_level; +}; + +struct psci_operations { + int (*cpu_suspend)(struct psci_power_state state, + unsigned long entry_point); + int (*cpu_off)(struct psci_power_state state); + int (*cpu_on)(unsigned long cpuid, unsigned long entry_point); + int (*migrate)(unsigned long cpuid); + int (*affinity_info)(unsigned long target_affinity, + unsigned long lowest_affinity_level); + int (*migrate_info_type)(void); +}; + +static struct psci_operations psci_ops; + +static int (*invoke_psci_fn)(u64, u64, u64, u64); +typedef int (*psci_initcall_t)(const struct device_node *); + +enum psci_function { + PSCI_FN_CPU_SUSPEND, + PSCI_FN_CPU_ON, + PSCI_FN_CPU_OFF, + PSCI_FN_MIGRATE, + PSCI_FN_AFFINITY_INFO, + PSCI_FN_MIGRATE_INFO_TYPE, + PSCI_FN_MAX, +}; + +static u32 psci_function_id[PSCI_FN_MAX]; + +static int psci_to_linux_errno(int errno) +{ + switch (errno) { + case PSCI_RET_SUCCESS: + return 0; + case PSCI_RET_NOT_SUPPORTED: + return -EOPNOTSUPP; + case PSCI_RET_INVALID_PARAMS: + return -EINVAL; + case PSCI_RET_DENIED: + return -EPERM; + }; + + return -EINVAL; +} + +static u32 psci_power_state_pack(struct psci_power_state state) +{ + return ((state.id << PSCI_0_2_POWER_STATE_ID_SHIFT) + & PSCI_0_2_POWER_STATE_ID_MASK) | + ((state.type << PSCI_0_2_POWER_STATE_TYPE_SHIFT) + & PSCI_0_2_POWER_STATE_TYPE_MASK) | + ((state.affinity_level << PSCI_0_2_POWER_STATE_AFFL_SHIFT) + & PSCI_0_2_POWER_STATE_AFFL_MASK); +} + +/* + * The following two functions are invoked via the invoke_psci_fn pointer + * and will not be inlined, allowing us to piggyback on the AAPCS. + */ +static noinline int __invoke_psci_fn_hvc(u64 function_id, u64 arg0, u64 arg1, + u64 arg2) +{ + asm volatile( + __asmeq("%0", "x0") + __asmeq("%1", "x1") + __asmeq("%2", "x2") + __asmeq("%3", "x3") + "hvc #0\n" + : "+r" (function_id) + : "r" (arg0), "r" (arg1), "r" (arg2)); + + return function_id; +} + +static noinline int __invoke_psci_fn_smc(u64 function_id, u64 arg0, u64 arg1, + u64 arg2) +{ + asm volatile( + __asmeq("%0", "x0") + __asmeq("%1", "x1") + __asmeq("%2", "x2") + __asmeq("%3", "x3") + "smc #0\n" + : "+r" (function_id) + : "r" (arg0), "r" (arg1), "r" (arg2)); + + return function_id; +} + +static int psci_get_version(void) +{ + int err; + + err = invoke_psci_fn(PSCI_0_2_FN_PSCI_VERSION, 0, 0, 0); + return err; +} + +static int psci_cpu_suspend(struct psci_power_state state, + unsigned long entry_point) +{ + int err; + u32 fn, power_state; + + fn = psci_function_id[PSCI_FN_CPU_SUSPEND]; + power_state = psci_power_state_pack(state); + err = invoke_psci_fn(fn, power_state, entry_point, 0); + return psci_to_linux_errno(err); +} + +static int psci_cpu_off(struct psci_power_state state) +{ + int err; + u32 fn, power_state; + + fn = psci_function_id[PSCI_FN_CPU_OFF]; + power_state = psci_power_state_pack(state); + err = invoke_psci_fn(fn, power_state, 0, 0); + return psci_to_linux_errno(err); +} + +static int psci_cpu_on(unsigned long cpuid, unsigned long entry_point) +{ + int err; + u32 fn; + + fn = psci_function_id[PSCI_FN_CPU_ON]; + err = invoke_psci_fn(fn, cpuid, entry_point, 0); + return psci_to_linux_errno(err); +} + +static int psci_migrate(unsigned long cpuid) +{ + int err; + u32 fn; + + fn = psci_function_id[PSCI_FN_MIGRATE]; + err = invoke_psci_fn(fn, cpuid, 0, 0); + return psci_to_linux_errno(err); +} + +static int psci_affinity_info(unsigned long target_affinity, + unsigned long lowest_affinity_level) +{ + int err; + u32 fn; + + fn = psci_function_id[PSCI_FN_AFFINITY_INFO]; + err = invoke_psci_fn(fn, target_affinity, lowest_affinity_level, 0); + return err; +} + +static int psci_migrate_info_type(void) +{ + int err; + u32 fn; + + fn = psci_function_id[PSCI_FN_MIGRATE_INFO_TYPE]; + err = invoke_psci_fn(fn, 0, 0, 0); + return err; +} + +static int get_set_conduit_method(struct device_node *np) +{ + const char *method; + + pr_info("probing for conduit method from DT.\n"); + + if (of_property_read_string(np, "method", &method)) { + pr_warn("missing \"method\" property\n"); + return -ENXIO; + } + + if (!strcmp("hvc", method)) { + invoke_psci_fn = __invoke_psci_fn_hvc; + } else if (!strcmp("smc", method)) { + invoke_psci_fn = __invoke_psci_fn_smc; + } else { + pr_warn("invalid \"method\" property: %s\n", method); + return -EINVAL; + } + return 0; +} + +static void psci_sys_reset(enum reboot_mode reboot_mode, const char *cmd) +{ + invoke_psci_fn(PSCI_0_2_FN_SYSTEM_RESET, 0, 0, 0); +} + +static void psci_sys_poweroff(void) +{ + invoke_psci_fn(PSCI_0_2_FN_SYSTEM_OFF, 0, 0, 0); +} + +/* + * PSCI Function IDs for v0.2+ are well defined so use + * standard values. + */ +static int psci_0_2_init(struct device_node *np) +{ + int err, ver; + + err = get_set_conduit_method(np); + + if (err) + goto out_put_node; + + ver = psci_get_version(); + + if (ver == PSCI_RET_NOT_SUPPORTED) { + /* PSCI v0.2 mandates implementation of PSCI_ID_VERSION. */ + pr_err("PSCI firmware does not comply with the v0.2 spec.\n"); + err = -EOPNOTSUPP; + goto out_put_node; + } else { + pr_info("PSCIv%d.%d detected in firmware.\n", + PSCI_VERSION_MAJOR(ver), + PSCI_VERSION_MINOR(ver)); + + if (PSCI_VERSION_MAJOR(ver) == 0 && + PSCI_VERSION_MINOR(ver) < 2) { + err = -EINVAL; + pr_err("Conflicting PSCI version detected.\n"); + goto out_put_node; + } + } + + pr_info("Using standard PSCI v0.2 function IDs\n"); + psci_function_id[PSCI_FN_CPU_SUSPEND] = PSCI_0_2_FN64_CPU_SUSPEND; + psci_ops.cpu_suspend = psci_cpu_suspend; + + psci_function_id[PSCI_FN_CPU_OFF] = PSCI_0_2_FN_CPU_OFF; + psci_ops.cpu_off = psci_cpu_off; + + psci_function_id[PSCI_FN_CPU_ON] = PSCI_0_2_FN64_CPU_ON; + psci_ops.cpu_on = psci_cpu_on; + + psci_function_id[PSCI_FN_MIGRATE] = PSCI_0_2_FN64_MIGRATE; + psci_ops.migrate = psci_migrate; + + psci_function_id[PSCI_FN_AFFINITY_INFO] = PSCI_0_2_FN64_AFFINITY_INFO; + psci_ops.affinity_info = psci_affinity_info; + + psci_function_id[PSCI_FN_MIGRATE_INFO_TYPE] = + PSCI_0_2_FN_MIGRATE_INFO_TYPE; + psci_ops.migrate_info_type = psci_migrate_info_type; + + arm_pm_restart = psci_sys_reset; + + pm_power_off = psci_sys_poweroff; + +out_put_node: + of_node_put(np); + return err; +} + +/* + * PSCI < v0.2 get PSCI Function IDs via DT. + */ +static int psci_0_1_init(struct device_node *np) +{ + u32 id; + int err; + + err = get_set_conduit_method(np); + + if (err) + goto out_put_node; + + pr_info("Using PSCI v0.1 Function IDs from DT\n"); + + if (!of_property_read_u32(np, "cpu_suspend", &id)) { + psci_function_id[PSCI_FN_CPU_SUSPEND] = id; + psci_ops.cpu_suspend = psci_cpu_suspend; + } + + if (!of_property_read_u32(np, "cpu_off", &id)) { + psci_function_id[PSCI_FN_CPU_OFF] = id; + psci_ops.cpu_off = psci_cpu_off; + } + + if (!of_property_read_u32(np, "cpu_on", &id)) { + psci_function_id[PSCI_FN_CPU_ON] = id; + psci_ops.cpu_on = psci_cpu_on; + } + + if (!of_property_read_u32(np, "migrate", &id)) { + psci_function_id[PSCI_FN_MIGRATE] = id; + psci_ops.migrate = psci_migrate; + } + +out_put_node: + of_node_put(np); + return err; +} + +static const struct of_device_id psci_of_match[] __initconst = { + { .compatible = "arm,psci", .data = psci_0_1_init}, + { .compatible = "arm,psci-0.2", .data = psci_0_2_init}, + {}, +}; + +int __init psci_init(void) +{ + struct device_node *np; + const struct of_device_id *matched_np; + psci_initcall_t init_fn; + + np = of_find_matching_node_and_match(NULL, psci_of_match, &matched_np); + + if (!np) + return -ENODEV; + + init_fn = (psci_initcall_t)matched_np->data; + return init_fn(np); +} + +#ifdef CONFIG_SMP + +static int __init cpu_psci_cpu_init(struct device_node *dn, unsigned int cpu) +{ + return 0; +} + +static int __init cpu_psci_cpu_prepare(unsigned int cpu) +{ + if (!psci_ops.cpu_on) { + pr_err("no cpu_on method, not booting CPU%d\n", cpu); + return -ENODEV; + } + + return 0; +} + +static int cpu_psci_cpu_boot(unsigned int cpu) +{ + int err = psci_ops.cpu_on(cpu_logical_map(cpu), __pa(secondary_entry)); + if (err) + pr_err("failed to boot CPU%d (%d)\n", cpu, err); + + return err; +} + +#ifdef CONFIG_HOTPLUG_CPU +static int cpu_psci_cpu_disable(unsigned int cpu) +{ + /* Fail early if we don't have CPU_OFF support */ + if (!psci_ops.cpu_off) + return -EOPNOTSUPP; + return 0; +} + +static void cpu_psci_cpu_die(unsigned int cpu) +{ + int ret; + /* + * There are no known implementations of PSCI actually using the + * power state field, pass a sensible default for now. + */ + struct psci_power_state state = { + .type = PSCI_POWER_STATE_TYPE_POWER_DOWN, + }; + + ret = psci_ops.cpu_off(state); + + pr_crit("unable to power off CPU%u (%d)\n", cpu, ret); +} + +static int cpu_psci_cpu_kill(unsigned int cpu) +{ + int err, i; + + if (!psci_ops.affinity_info) + return 1; + /* + * cpu_kill could race with cpu_die and we can + * potentially end up declaring this cpu undead + * while it is dying. So, try again a few times. + */ + + for (i = 0; i < 10; i++) { + err = psci_ops.affinity_info(cpu_logical_map(cpu), 0); + if (err == PSCI_0_2_AFFINITY_LEVEL_OFF) { + pr_info("CPU%d killed.\n", cpu); + return 1; + } + + msleep(10); + pr_info("Retrying again to check for CPU kill\n"); + } + + pr_warn("CPU%d may not have shut down cleanly (AFFINITY_INFO reports %d)\n", + cpu, err); + /* Make op_cpu_kill() fail. */ + return 0; +} +#endif + +const struct cpu_operations cpu_psci_ops = { + .name = "psci", + .cpu_init = cpu_psci_cpu_init, + .cpu_prepare = cpu_psci_cpu_prepare, + .cpu_boot = cpu_psci_cpu_boot, +#ifdef CONFIG_HOTPLUG_CPU + .cpu_disable = cpu_psci_cpu_disable, + .cpu_die = cpu_psci_cpu_die, + .cpu_kill = cpu_psci_cpu_kill, +#endif +}; + +#endif diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 6e1e77f1831..9fde010c945 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -19,6 +19,7 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ +#include <linux/compat.h> #include <linux/kernel.h> #include <linux/sched.h> #include <linux/mm.h> @@ -41,6 +42,9 @@ #include <asm/traps.h> #include <asm/system_misc.h> +#define CREATE_TRACE_POINTS +#include <trace/events/syscalls.h> + /* * TODO: does not yet catch signals sent when the child dies. * in exit.c or in signal.c. @@ -53,28 +57,6 @@ void ptrace_disable(struct task_struct *child) { } -/* - * Handle hitting a breakpoint. - */ -static int ptrace_break(struct pt_regs *regs) -{ - siginfo_t info = { - .si_signo = SIGTRAP, - .si_errno = 0, - .si_code = TRAP_BRKPT, - .si_addr = (void __user *)instruction_pointer(regs), - }; - - force_sig_info(SIGTRAP, &info, current); - return 0; -} - -static int arm64_break_trap(unsigned long addr, unsigned int esr, - struct pt_regs *regs) -{ - return ptrace_break(regs); -} - #ifdef CONFIG_HAVE_HW_BREAKPOINT /* * Handle hitting a HW-breakpoint. @@ -236,31 +218,29 @@ static int ptrace_hbp_fill_attr_ctrl(unsigned int note_type, { int err, len, type, disabled = !ctrl.enabled; - if (disabled) { - len = 0; - type = HW_BREAKPOINT_EMPTY; - } else { - err = arch_bp_generic_fields(ctrl, &len, &type); - if (err) - return err; - - switch (note_type) { - case NT_ARM_HW_BREAK: - if ((type & HW_BREAKPOINT_X) != type) - return -EINVAL; - break; - case NT_ARM_HW_WATCH: - if ((type & HW_BREAKPOINT_RW) != type) - return -EINVAL; - break; - default: + attr->disabled = disabled; + if (disabled) + return 0; + + err = arch_bp_generic_fields(ctrl, &len, &type); + if (err) + return err; + + switch (note_type) { + case NT_ARM_HW_BREAK: + if ((type & HW_BREAKPOINT_X) != type) return -EINVAL; - } + break; + case NT_ARM_HW_WATCH: + if ((type & HW_BREAKPOINT_RW) != type) + return -EINVAL; + break; + default: + return -EINVAL; } attr->bp_len = len; attr->bp_type = type; - attr->disabled = disabled; return 0; } @@ -541,6 +521,7 @@ static int fpr_set(struct task_struct *target, const struct user_regset *regset, return ret; target->thread.fpsimd_state.user_fpsimd = newstate; + fpsimd_flush_task_state(target); return ret; } @@ -658,28 +639,32 @@ static int compat_gpr_get(struct task_struct *target, for (i = 0; i < num_regs; ++i) { unsigned int idx = start + i; - void *reg; + compat_ulong_t reg; switch (idx) { case 15: - reg = (void *)&task_pt_regs(target)->pc; + reg = task_pt_regs(target)->pc; break; case 16: - reg = (void *)&task_pt_regs(target)->pstate; + reg = task_pt_regs(target)->pstate; break; case 17: - reg = (void *)&task_pt_regs(target)->orig_x0; + reg = task_pt_regs(target)->orig_x0; break; default: - reg = (void *)&task_pt_regs(target)->regs[idx]; + reg = task_pt_regs(target)->regs[idx]; } - ret = copy_to_user(ubuf, reg, sizeof(compat_ulong_t)); + if (kbuf) { + memcpy(kbuf, ®, sizeof(reg)); + kbuf += sizeof(reg); + } else { + ret = copy_to_user(ubuf, ®, sizeof(reg)); + if (ret) + break; - if (ret) - break; - else - ubuf += sizeof(compat_ulong_t); + ubuf += sizeof(reg); + } } return ret; @@ -707,28 +692,33 @@ static int compat_gpr_set(struct task_struct *target, for (i = 0; i < num_regs; ++i) { unsigned int idx = start + i; - void *reg; + compat_ulong_t reg; + + if (kbuf) { + memcpy(®, kbuf, sizeof(reg)); + kbuf += sizeof(reg); + } else { + ret = copy_from_user(®, ubuf, sizeof(reg)); + if (ret) + return ret; + + ubuf += sizeof(reg); + } switch (idx) { case 15: - reg = (void *)&newregs.pc; + newregs.pc = reg; break; case 16: - reg = (void *)&newregs.pstate; + newregs.pstate = reg; break; case 17: - reg = (void *)&newregs.orig_x0; + newregs.orig_x0 = reg; break; default: - reg = (void *)&newregs.regs[idx]; + newregs.regs[idx] = reg; } - ret = copy_from_user(reg, ubuf, sizeof(compat_ulong_t)); - - if (ret) - goto out; - else - ubuf += sizeof(compat_ulong_t); } if (valid_user_regs(&newregs.user_regs)) @@ -736,7 +726,6 @@ static int compat_gpr_set(struct task_struct *target, else ret = -EINVAL; -out: return ret; } @@ -790,6 +779,7 @@ static int compat_vfp_set(struct task_struct *target, uregs->fpcr = fpscr & VFP_FPSCR_CTRL_MASK; } + fpsimd_flush_task_state(target); return ret; } @@ -817,33 +807,6 @@ static const struct user_regset_view user_aarch32_view = { .regsets = aarch32_regsets, .n = ARRAY_SIZE(aarch32_regsets) }; -int aarch32_break_trap(struct pt_regs *regs) -{ - unsigned int instr; - bool bp = false; - void __user *pc = (void __user *)instruction_pointer(regs); - - if (compat_thumb_mode(regs)) { - /* get 16-bit Thumb instruction */ - get_user(instr, (u16 __user *)pc); - if (instr == AARCH32_BREAK_THUMB2_LO) { - /* get second half of 32-bit Thumb-2 instruction */ - get_user(instr, (u16 __user *)(pc + 2)); - bp = instr == AARCH32_BREAK_THUMB2_HI; - } else { - bp = instr == AARCH32_BREAK_THUMB; - } - } else { - /* 32-bit ARM instruction */ - get_user(instr, (u32 __user *)pc); - bp = (instr & ~0xf0000000) == AARCH32_BREAK_ARM; - } - - if (bp) - return ptrace_break(regs); - return 1; -} - static int compat_ptrace_read_user(struct task_struct *tsk, compat_ulong_t off, compat_ulong_t __user *ret) { @@ -874,6 +837,7 @@ static int compat_ptrace_write_user(struct task_struct *tsk, compat_ulong_t off, compat_ulong_t val) { int ret; + mm_segment_t old_fs = get_fs(); if (off & 3 || off >= COMPAT_USER_SZ) return -EIO; @@ -881,10 +845,13 @@ static int compat_ptrace_write_user(struct task_struct *tsk, compat_ulong_t off, if (off >= sizeof(compat_elf_gregset_t)) return 0; + set_fs(KERNEL_DS); ret = copy_regset_from_user(tsk, &user_aarch32_view, REGSET_COMPAT_GPR, off, sizeof(compat_ulong_t), &val); + set_fs(old_fs); + return ret; } @@ -1111,45 +1078,49 @@ long arch_ptrace(struct task_struct *child, long request, return ptrace_request(child, request, addr, data); } +enum ptrace_syscall_dir { + PTRACE_SYSCALL_ENTER = 0, + PTRACE_SYSCALL_EXIT, +}; -static int __init ptrace_break_init(void) -{ - hook_debug_fault_code(DBG_ESR_EVT_BRK, arm64_break_trap, SIGTRAP, - TRAP_BRKPT, "ptrace BRK handler"); - return 0; -} -core_initcall(ptrace_break_init); - - -asmlinkage int syscall_trace(int dir, struct pt_regs *regs) +static void tracehook_report_syscall(struct pt_regs *regs, + enum ptrace_syscall_dir dir) { + int regno; unsigned long saved_reg; - if (!test_thread_flag(TIF_SYSCALL_TRACE)) - return regs->syscallno; - - if (is_compat_task()) { - /* AArch32 uses ip (r12) for scratch */ - saved_reg = regs->regs[12]; - regs->regs[12] = dir; - } else { - /* - * Save X7. X7 is used to denote syscall entry/exit: - * X7 = 0 -> entry, = 1 -> exit - */ - saved_reg = regs->regs[7]; - regs->regs[7] = dir; - } + /* + * A scratch register (ip(r12) on AArch32, x7 on AArch64) is + * used to denote syscall entry/exit: + */ + regno = (is_compat_task() ? 12 : 7); + saved_reg = regs->regs[regno]; + regs->regs[regno] = dir; - if (dir) + if (dir == PTRACE_SYSCALL_EXIT) tracehook_report_syscall_exit(regs, 0); else if (tracehook_report_syscall_entry(regs)) regs->syscallno = ~0UL; - if (is_compat_task()) - regs->regs[12] = saved_reg; - else - regs->regs[7] = saved_reg; + regs->regs[regno] = saved_reg; +} + +asmlinkage int syscall_trace_enter(struct pt_regs *regs) +{ + if (test_thread_flag(TIF_SYSCALL_TRACE)) + tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER); + + if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) + trace_sys_enter(regs, regs->syscallno); return regs->syscallno; } + +asmlinkage void syscall_trace_exit(struct pt_regs *regs) +{ + if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) + trace_sys_exit(regs, regs_return_value(regs)); + + if (test_thread_flag(TIF_SYSCALL_TRACE)) + tracehook_report_syscall(regs, PTRACE_SYSCALL_EXIT); +} diff --git a/arch/arm64/kernel/return_address.c b/arch/arm64/kernel/return_address.c new file mode 100644 index 00000000000..89102a6ffad --- /dev/null +++ b/arch/arm64/kernel/return_address.c @@ -0,0 +1,55 @@ +/* + * arch/arm64/kernel/return_address.c + * + * Copyright (C) 2013 Linaro Limited + * Author: AKASHI Takahiro <takahiro.akashi@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/export.h> +#include <linux/ftrace.h> + +#include <asm/stacktrace.h> + +struct return_address_data { + unsigned int level; + void *addr; +}; + +static int save_return_addr(struct stackframe *frame, void *d) +{ + struct return_address_data *data = d; + + if (!data->level) { + data->addr = (void *)frame->pc; + return 1; + } else { + --data->level; + return 0; + } +} + +void *return_address(unsigned int level) +{ + struct return_address_data data; + struct stackframe frame; + register unsigned long current_sp asm ("sp"); + + data.level = level + 2; + data.addr = NULL; + + frame.fp = (unsigned long)__builtin_frame_address(0); + frame.sp = current_sp; + frame.pc = (unsigned long)return_address; /* dummy */ + + walk_stackframe(&frame, save_return_addr, &data); + + if (!data.level) + return data.addr; + else + return NULL; +} +EXPORT_SYMBOL_GPL(return_address); diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 7665a9bfdb1..46d1125571f 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -25,6 +25,7 @@ #include <linux/utsname.h> #include <linux/initrd.h> #include <linux/console.h> +#include <linux/cache.h> #include <linux/bootmem.h> #include <linux/seq_file.h> #include <linux/screen_info.h> @@ -32,6 +33,7 @@ #include <linux/kexec.h> #include <linux/crash_dump.h> #include <linux/root_dev.h> +#include <linux/clk-provider.h> #include <linux/cpu.h> #include <linux/interrupt.h> #include <linux/smp.h> @@ -39,23 +41,41 @@ #include <linux/proc_fs.h> #include <linux/memblock.h> #include <linux/of_fdt.h> +#include <linux/of_platform.h> +#include <linux/efi.h> +#include <asm/fixmap.h> #include <asm/cputype.h> #include <asm/elf.h> #include <asm/cputable.h> +#include <asm/cpu_ops.h> #include <asm/sections.h> #include <asm/setup.h> +#include <asm/smp_plat.h> #include <asm/cacheflush.h> #include <asm/tlbflush.h> #include <asm/traps.h> #include <asm/memblock.h> +#include <asm/psci.h> +#include <asm/efi.h> unsigned int processor_id; EXPORT_SYMBOL(processor_id); -unsigned int elf_hwcap __read_mostly; +unsigned long elf_hwcap __read_mostly; EXPORT_SYMBOL_GPL(elf_hwcap); +#ifdef CONFIG_COMPAT +#define COMPAT_ELF_HWCAP_DEFAULT \ + (COMPAT_HWCAP_HALF|COMPAT_HWCAP_THUMB|\ + COMPAT_HWCAP_FAST_MULT|COMPAT_HWCAP_EDSP|\ + COMPAT_HWCAP_TLS|COMPAT_HWCAP_VFP|\ + COMPAT_HWCAP_VFPv3|COMPAT_HWCAP_VFPv4|\ + COMPAT_HWCAP_NEON|COMPAT_HWCAP_IDIV) +unsigned int compat_elf_hwcap __read_mostly = COMPAT_ELF_HWCAP_DEFAULT; +unsigned int compat_elf_hwcap2 __read_mostly; +#endif + static const char *cpu_name; static const char *machine_name; phys_addr_t __fdt_pointer __initdata; @@ -93,15 +113,97 @@ void __init early_print(const char *str, ...) printk("%s", buf); } -static void __init setup_processor(void) +void __init smp_setup_processor_id(void) { - struct cpu_info *cpu_info; + /* + * clear __my_cpu_offset on boot CPU to avoid hang caused by + * using percpu variable early, for example, lockdep will + * access percpu variable inside lock_release + */ + set_my_cpu_offset(0); +} + +bool arch_match_cpu_phys_id(int cpu, u64 phys_id) +{ + return phys_id == cpu_logical_map(cpu); +} +struct mpidr_hash mpidr_hash; +#ifdef CONFIG_SMP +/** + * smp_build_mpidr_hash - Pre-compute shifts required at each affinity + * level in order to build a linear index from an + * MPIDR value. Resulting algorithm is a collision + * free hash carried out through shifting and ORing + */ +static void __init smp_build_mpidr_hash(void) +{ + u32 i, affinity, fs[4], bits[4], ls; + u64 mask = 0; + /* + * Pre-scan the list of MPIDRS and filter out bits that do + * not contribute to affinity levels, ie they never toggle. + */ + for_each_possible_cpu(i) + mask |= (cpu_logical_map(i) ^ cpu_logical_map(0)); + pr_debug("mask of set bits %#llx\n", mask); + /* + * Find and stash the last and first bit set at all affinity levels to + * check how many bits are required to represent them. + */ + for (i = 0; i < 4; i++) { + affinity = MPIDR_AFFINITY_LEVEL(mask, i); + /* + * Find the MSB bit and LSB bits position + * to determine how many bits are required + * to express the affinity level. + */ + ls = fls(affinity); + fs[i] = affinity ? ffs(affinity) - 1 : 0; + bits[i] = ls - fs[i]; + } /* - * locate processor in the list of supported processor - * types. The linker builds this table for us from the - * entries in arch/arm/mm/proc.S + * An index can be created from the MPIDR_EL1 by isolating the + * significant bits at each affinity level and by shifting + * them in order to compress the 32 bits values space to a + * compressed set of values. This is equivalent to hashing + * the MPIDR_EL1 through shifting and ORing. It is a collision free + * hash though not minimal since some levels might contain a number + * of CPUs that is not an exact power of 2 and their bit + * representation might contain holes, eg MPIDR_EL1[7:0] = {0x2, 0x80}. */ + mpidr_hash.shift_aff[0] = MPIDR_LEVEL_SHIFT(0) + fs[0]; + mpidr_hash.shift_aff[1] = MPIDR_LEVEL_SHIFT(1) + fs[1] - bits[0]; + mpidr_hash.shift_aff[2] = MPIDR_LEVEL_SHIFT(2) + fs[2] - + (bits[1] + bits[0]); + mpidr_hash.shift_aff[3] = MPIDR_LEVEL_SHIFT(3) + + fs[3] - (bits[2] + bits[1] + bits[0]); + mpidr_hash.mask = mask; + mpidr_hash.bits = bits[3] + bits[2] + bits[1] + bits[0]; + pr_debug("MPIDR hash: aff0[%u] aff1[%u] aff2[%u] aff3[%u] mask[%#llx] bits[%u]\n", + mpidr_hash.shift_aff[0], + mpidr_hash.shift_aff[1], + mpidr_hash.shift_aff[2], + mpidr_hash.shift_aff[3], + mpidr_hash.mask, + mpidr_hash.bits); + /* + * 4x is an arbitrary value used to warn on a hash table much bigger + * than expected on most systems. + */ + if (mpidr_hash_size() > 4 * num_possible_cpus()) + pr_warn("Large number of MPIDR hash buckets detected\n"); + __flush_dcache_area(&mpidr_hash, sizeof(struct mpidr_hash)); +} +#endif + +static void __init setup_processor(void) +{ + struct cpu_info *cpu_info; + u64 features, block; + u32 cwg; + int cls; + cpu_info = lookup_processor_type(read_cpuid_id()); if (!cpu_info) { printk("CPU configuration botched (ID %08x), unable to continue.\n", @@ -114,81 +216,99 @@ static void __init setup_processor(void) printk("CPU: %s [%08x] revision %d\n", cpu_name, read_cpuid_id(), read_cpuid_id() & 15); - sprintf(init_utsname()->machine, "aarch64"); + sprintf(init_utsname()->machine, ELF_PLATFORM); elf_hwcap = 0; -} -static void __init setup_machine_fdt(phys_addr_t dt_phys) -{ - struct boot_param_header *devtree; - unsigned long dt_root; + /* + * Check for sane CTR_EL0.CWG value. + */ + cwg = cache_type_cwg(); + cls = cache_line_size(); + if (!cwg) + pr_warn("No Cache Writeback Granule information, assuming cache line size %d\n", + cls); + if (L1_CACHE_BYTES < cls) + pr_warn("L1_CACHE_BYTES smaller than the Cache Writeback Granule (%d < %d)\n", + L1_CACHE_BYTES, cls); - /* Check we have a non-NULL DT pointer */ - if (!dt_phys) { - early_print("\n" - "Error: NULL or invalid device tree blob\n" - "The dtb must be 8-byte aligned and passed in the first 512MB of memory\n" - "\nPlease check your bootloader.\n"); + /* + * ID_AA64ISAR0_EL1 contains 4-bit wide signed feature blocks. + * The blocks we test below represent incremental functionality + * for non-negative values. Negative values are reserved. + */ + features = read_cpuid(ID_AA64ISAR0_EL1); + block = (features >> 4) & 0xf; + if (!(block & 0x8)) { + switch (block) { + default: + case 2: + elf_hwcap |= HWCAP_PMULL; + case 1: + elf_hwcap |= HWCAP_AES; + case 0: + break; + } + } - while (true) - cpu_relax(); + block = (features >> 8) & 0xf; + if (block && !(block & 0x8)) + elf_hwcap |= HWCAP_SHA1; + block = (features >> 12) & 0xf; + if (block && !(block & 0x8)) + elf_hwcap |= HWCAP_SHA2; + + block = (features >> 16) & 0xf; + if (block && !(block & 0x8)) + elf_hwcap |= HWCAP_CRC32; + +#ifdef CONFIG_COMPAT + /* + * ID_ISAR5_EL1 carries similar information as above, but pertaining to + * the Aarch32 32-bit execution state. + */ + features = read_cpuid(ID_ISAR5_EL1); + block = (features >> 4) & 0xf; + if (!(block & 0x8)) { + switch (block) { + default: + case 2: + compat_elf_hwcap2 |= COMPAT_HWCAP2_PMULL; + case 1: + compat_elf_hwcap2 |= COMPAT_HWCAP2_AES; + case 0: + break; + } } - devtree = phys_to_virt(dt_phys); + block = (features >> 8) & 0xf; + if (block && !(block & 0x8)) + compat_elf_hwcap2 |= COMPAT_HWCAP2_SHA1; - /* Check device tree validity */ - if (be32_to_cpu(devtree->magic) != OF_DT_HEADER) { + block = (features >> 12) & 0xf; + if (block && !(block & 0x8)) + compat_elf_hwcap2 |= COMPAT_HWCAP2_SHA2; + + block = (features >> 16) & 0xf; + if (block && !(block & 0x8)) + compat_elf_hwcap2 |= COMPAT_HWCAP2_CRC32; +#endif +} + +static void __init setup_machine_fdt(phys_addr_t dt_phys) +{ + if (!dt_phys || !early_init_dt_scan(phys_to_virt(dt_phys))) { early_print("\n" "Error: invalid device tree blob at physical address 0x%p (virtual address 0x%p)\n" - "Expected 0x%x, found 0x%x\n" + "The dtb must be 8-byte aligned and passed in the first 512MB of memory\n" "\nPlease check your bootloader.\n", - dt_phys, devtree, OF_DT_HEADER, - be32_to_cpu(devtree->magic)); + dt_phys, phys_to_virt(dt_phys)); while (true) cpu_relax(); } - initial_boot_params = devtree; - dt_root = of_get_flat_dt_root(); - - machine_name = of_get_flat_dt_prop(dt_root, "model", NULL); - if (!machine_name) - machine_name = of_get_flat_dt_prop(dt_root, "compatible", NULL); - if (!machine_name) - machine_name = "<unknown>"; - pr_info("Machine: %s\n", machine_name); - - /* Retrieve various information from the /chosen node */ - of_scan_flat_dt(early_init_dt_scan_chosen, boot_command_line); - /* Initialize {size,address}-cells info */ - of_scan_flat_dt(early_init_dt_scan_root, NULL); - /* Setup memory, calling early_init_dt_add_memory_arch */ - of_scan_flat_dt(early_init_dt_scan_memory, NULL); -} - -void __init early_init_dt_add_memory_arch(u64 base, u64 size) -{ - base &= PAGE_MASK; - size &= PAGE_MASK; - if (base + size < PHYS_OFFSET) { - pr_warning("Ignoring memory block 0x%llx - 0x%llx\n", - base, base + size); - return; - } - if (base < PHYS_OFFSET) { - pr_warning("Ignoring memory range 0x%llx - 0x%llx\n", - base, PHYS_OFFSET); - size -= PHYS_OFFSET - base; - base = PHYS_OFFSET; - } - memblock_add(base, size); -} - -void * __init early_init_dt_alloc_memory_arch(u64 size, u64 align) -{ - return __va(memblock_alloc(size, align)); + machine_name = of_flat_dt_get_machine_name(); } /* @@ -238,8 +358,15 @@ static void __init request_standard_resources(void) } } +u64 __cpu_logical_map[NR_CPUS] = { [0 ... NR_CPUS-1] = INVALID_HWID }; + void __init setup_arch(char **cmdline_p) { + /* + * Unmask asynchronous aborts early to catch possible system errors. + */ + local_async_enable(); + setup_processor(); setup_machine_fdt(__fdt_pointer); @@ -251,17 +378,27 @@ void __init setup_arch(char **cmdline_p) *cmdline_p = boot_command_line; + early_ioremap_init(); + parse_early_param(); + efi_init(); arm64_memblock_init(); paging_init(); request_standard_resources(); + efi_idmap_init(); + unflatten_device_tree(); + psci_init(); + + cpu_logical_map(0) = read_cpuid_mpidr() & MPIDR_HWID_BITMASK; + cpu_read_bootcpu_ops(); #ifdef CONFIG_SMP smp_init_cpus(); + smp_build_mpidr_hash(); #endif #ifdef CONFIG_VT @@ -273,6 +410,13 @@ void __init setup_arch(char **cmdline_p) #endif } +static int __init arm64_device_init(void) +{ + of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL); + return 0; +} +arch_initcall_sync(arm64_device_init); + static DEFINE_PER_CPU(struct cpu, cpu_data); static int __init topology_init(void) @@ -292,6 +436,12 @@ subsys_initcall(topology_init); static const char *hwcap_str[] = { "fp", "asimd", + "evtstrm", + "aes", + "pmull", + "sha1", + "sha2", + "crc32", NULL }; @@ -311,9 +461,6 @@ static int c_show(struct seq_file *m, void *v) #ifdef CONFIG_SMP seq_printf(m, "processor\t: %d\n", i); #endif - seq_printf(m, "BogoMIPS\t: %lu.%02lu\n\n", - loops_per_jiffy / (500000UL/HZ), - loops_per_jiffy / (5000UL/HZ) % 100); } /* dump out the processor features */ diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index abd756315cb..6357b9c6c90 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -17,6 +17,7 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ +#include <linux/compat.h> #include <linux/errno.h> #include <linux/signal.h> #include <linux/personality.h> @@ -25,7 +26,6 @@ #include <linux/tracehook.h> #include <linux/ratelimit.h> -#include <asm/compat.h> #include <asm/debug-monitors.h> #include <asm/elf.h> #include <asm/cacheflush.h> @@ -51,7 +51,7 @@ static int preserve_fpsimd_context(struct fpsimd_context __user *ctx) int err; /* dump the hardware registers to the fpsimd_state structure */ - fpsimd_save_state(fpsimd); + fpsimd_preserve_current_state(); /* copy the FP and status/control registers */ err = __copy_to_user(ctx->vregs, fpsimd->vregs, sizeof(fpsimd->vregs)); @@ -86,11 +86,8 @@ static int restore_fpsimd_context(struct fpsimd_context __user *ctx) __get_user_error(fpsimd.fpcr, &ctx->fpcr, err); /* load the hardware registers from the fpsimd_state structure */ - if (!err) { - preempt_disable(); - fpsimd_load_state(&fpsimd); - preempt_enable(); - } + if (!err) + fpsimd_update_current_state(&fpsimd); return err ? -EFAULT : 0; } @@ -100,8 +97,7 @@ static int restore_sigframe(struct pt_regs *regs, { sigset_t set; int i, err; - struct aux_context __user *aux = - (struct aux_context __user *)sf->uc.uc_mcontext.__reserved; + void *aux = sf->uc.uc_mcontext.__reserved; err = __copy_from_user(&set, &sf->uc.uc_sigmask, sizeof(set)); if (err == 0) @@ -121,8 +117,11 @@ static int restore_sigframe(struct pt_regs *regs, err |= !valid_user_regs(®s->user_regs); - if (err == 0) - err |= restore_fpsimd_context(&aux->fpsimd); + if (err == 0) { + struct fpsimd_context *fpsimd_ctx = + container_of(aux, struct fpsimd_context, head); + err |= restore_fpsimd_context(fpsimd_ctx); + } return err; } @@ -149,8 +148,7 @@ asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) if (restore_sigframe(regs, frame)) goto badframe; - if (do_sigaltstack(&frame->uc.uc_stack, - NULL, regs->sp) == -EFAULT) + if (restore_altstack(&frame->uc.uc_stack)) goto badframe; return regs->regs[0]; @@ -164,18 +162,12 @@ badframe: return 0; } -asmlinkage long sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss, - unsigned long sp) -{ - return do_sigaltstack(uss, uoss, sp); -} - static int setup_sigframe(struct rt_sigframe __user *sf, struct pt_regs *regs, sigset_t *set) { int i, err = 0; - struct aux_context __user *aux = - (struct aux_context __user *)sf->uc.uc_mcontext.__reserved; + void *aux = sf->uc.uc_mcontext.__reserved; + struct _aarch64_ctx *end; /* set up the stack frame for unwinding */ __put_user_error(regs->regs[29], &sf->fp, err); @@ -192,12 +184,27 @@ static int setup_sigframe(struct rt_sigframe __user *sf, err |= __copy_to_user(&sf->uc.uc_sigmask, set, sizeof(*set)); - if (err == 0) - err |= preserve_fpsimd_context(&aux->fpsimd); + if (err == 0) { + struct fpsimd_context *fpsimd_ctx = + container_of(aux, struct fpsimd_context, head); + err |= preserve_fpsimd_context(fpsimd_ctx); + aux += sizeof(*fpsimd_ctx); + } + + /* fault information, if valid */ + if (current->thread.fault_code) { + struct esr_context *esr_ctx = + container_of(aux, struct esr_context, head); + __put_user_error(ESR_MAGIC, &esr_ctx->head.magic, err); + __put_user_error(sizeof(*esr_ctx), &esr_ctx->head.size, err); + __put_user_error(current->thread.fault_code, &esr_ctx->esr, err); + aux += sizeof(*esr_ctx); + } /* set the "end" magic */ - __put_user_error(0, &aux->end.magic, err); - __put_user_error(0, &aux->end.size, err); + end = aux; + __put_user_error(0, &end->magic, err); + __put_user_error(0, &end->size, err); return err; } @@ -250,7 +257,6 @@ static int setup_rt_frame(int usig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set, struct pt_regs *regs) { struct rt_sigframe __user *frame; - stack_t stack; int err = 0; frame = get_sigframe(ka, regs); @@ -260,12 +266,7 @@ static int setup_rt_frame(int usig, struct k_sigaction *ka, siginfo_t *info, __put_user_error(0, &frame->uc.uc_flags, err); __put_user_error(NULL, &frame->uc.uc_link, err); - memset(&stack, 0, sizeof(stack)); - stack.ss_sp = (void __user *)current->sas_ss_sp; - stack.ss_flags = sas_ss_flags(regs->sp); - stack.ss_size = current->sas_ss_size; - err |= __copy_to_user(&frame->uc.uc_stack, &stack, sizeof(stack)); - + err |= __save_altstack(&frame->uc.uc_stack, regs->sp); err |= setup_sigframe(frame, regs, set); if (err == 0) { setup_return(regs, ka, frame, usig); @@ -429,4 +430,8 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, clear_thread_flag(TIF_NOTIFY_RESUME); tracehook_notify_resume(regs); } + + if (thread_flags & _TIF_FOREIGN_FPSTATE) + fpsimd_restore_current_state(); + } diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index a4db3d22aac..3491c638f17 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -23,31 +23,12 @@ #include <linux/syscalls.h> #include <linux/ratelimit.h> +#include <asm/esr.h> #include <asm/fpsimd.h> #include <asm/signal32.h> #include <asm/uaccess.h> #include <asm/unistd32.h> -struct compat_sigaction { - compat_uptr_t sa_handler; - compat_ulong_t sa_flags; - compat_uptr_t sa_restorer; - compat_sigset_t sa_mask; -}; - -struct compat_old_sigaction { - compat_uptr_t sa_handler; - compat_old_sigset_t sa_mask; - compat_ulong_t sa_flags; - compat_uptr_t sa_restorer; -}; - -typedef struct compat_sigaltstack { - compat_uptr_t ss_sp; - int ss_flags; - compat_size_t ss_size; -} compat_stack_t; - struct compat_sigcontext { /* We always set these two fields to 0 */ compat_ulong_t trap_no; @@ -76,7 +57,7 @@ struct compat_sigcontext { struct compat_ucontext { compat_ulong_t uc_flags; - struct compat_ucontext *uc_link; + compat_uptr_t uc_link; compat_stack_t uc_stack; struct compat_sigcontext uc_mcontext; compat_sigset_t uc_sigmask; @@ -101,6 +82,8 @@ struct compat_vfp_sigframe { #define VFP_MAGIC 0x56465001 #define VFP_STORAGE_SIZE sizeof(struct compat_vfp_sigframe) +#define FSR_WRITE_SHIFT (11) + struct compat_aux_sigframe { struct compat_vfp_sigframe vfp; @@ -120,34 +103,6 @@ struct compat_rt_sigframe { #define _BLOCKABLE (~(sigmask(SIGKILL) | sigmask(SIGSTOP))) -/* - * For ARM syscalls, the syscall number has to be loaded into r7. - * We do not support an OABI userspace. - */ -#define MOV_R7_NR_SIGRETURN (0xe3a07000 | __NR_compat_sigreturn) -#define SVC_SYS_SIGRETURN (0xef000000 | __NR_compat_sigreturn) -#define MOV_R7_NR_RT_SIGRETURN (0xe3a07000 | __NR_compat_rt_sigreturn) -#define SVC_SYS_RT_SIGRETURN (0xef000000 | __NR_compat_rt_sigreturn) - -/* - * For Thumb syscalls, we also pass the syscall number via r7. We therefore - * need two 16-bit instructions. - */ -#define SVC_THUMB_SIGRETURN (((0xdf00 | __NR_compat_sigreturn) << 16) | \ - 0x2700 | __NR_compat_sigreturn) -#define SVC_THUMB_RT_SIGRETURN (((0xdf00 | __NR_compat_rt_sigreturn) << 16) | \ - 0x2700 | __NR_compat_rt_sigreturn) - -const compat_ulong_t aarch32_sigret_code[6] = { - /* - * AArch32 sigreturn code. - * We don't construct an OABI SWI - instead we just set the imm24 field - * to the EABI syscall number so that we create a sane disassembly. - */ - MOV_R7_NR_SIGRETURN, SVC_SYS_SIGRETURN, SVC_THUMB_SIGRETURN, - MOV_R7_NR_RT_SIGRETURN, SVC_SYS_RT_SIGRETURN, SVC_THUMB_RT_SIGRETURN, -}; - static inline int put_sigset_t(compat_sigset_t __user *uset, sigset_t *set) { compat_sigset_t cset; @@ -170,7 +125,7 @@ static inline int get_sigset_t(sigset_t *set, return 0; } -int copy_siginfo_to_user32(compat_siginfo_t __user *to, siginfo_t *from) +int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) { int err; @@ -267,7 +222,7 @@ static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame) * Note that this also saves V16-31, which aren't visible * in AArch32. */ - fpsimd_save_state(fpsimd); + fpsimd_preserve_current_state(); /* Place structure header on the stack */ __put_user_error(magic, &frame->magic, err); @@ -330,136 +285,12 @@ static int compat_restore_vfp_context(struct compat_vfp_sigframe __user *frame) * We don't need to touch the exception register, so * reload the hardware state. */ - if (!err) { - preempt_disable(); - fpsimd_load_state(&fpsimd); - preempt_enable(); - } + if (!err) + fpsimd_update_current_state(&fpsimd); return err ? -EFAULT : 0; } -/* - * atomically swap in the new signal mask, and wait for a signal. - */ -asmlinkage int compat_sys_sigsuspend(int restart, compat_ulong_t oldmask, - compat_old_sigset_t mask) -{ - sigset_t blocked; - - siginitset(¤t->blocked, mask); - return sigsuspend(&blocked); -} - -asmlinkage int compat_sys_sigaction(int sig, - const struct compat_old_sigaction __user *act, - struct compat_old_sigaction __user *oact) -{ - struct k_sigaction new_ka, old_ka; - int ret; - compat_old_sigset_t mask; - compat_uptr_t handler, restorer; - - if (act) { - if (!access_ok(VERIFY_READ, act, sizeof(*act)) || - __get_user(handler, &act->sa_handler) || - __get_user(restorer, &act->sa_restorer) || - __get_user(new_ka.sa.sa_flags, &act->sa_flags) || - __get_user(mask, &act->sa_mask)) - return -EFAULT; - - new_ka.sa.sa_handler = compat_ptr(handler); - new_ka.sa.sa_restorer = compat_ptr(restorer); - siginitset(&new_ka.sa.sa_mask, mask); - } - - ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); - - if (!ret && oact) { - if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) || - __put_user(ptr_to_compat(old_ka.sa.sa_handler), - &oact->sa_handler) || - __put_user(ptr_to_compat(old_ka.sa.sa_restorer), - &oact->sa_restorer) || - __put_user(old_ka.sa.sa_flags, &oact->sa_flags) || - __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask)) - return -EFAULT; - } - - return ret; -} - -asmlinkage int compat_sys_rt_sigaction(int sig, - const struct compat_sigaction __user *act, - struct compat_sigaction __user *oact, - compat_size_t sigsetsize) -{ - struct k_sigaction new_ka, old_ka; - int ret; - - /* XXX: Don't preclude handling different sized sigset_t's. */ - if (sigsetsize != sizeof(compat_sigset_t)) - return -EINVAL; - - if (act) { - compat_uptr_t handler, restorer; - - ret = get_user(handler, &act->sa_handler); - new_ka.sa.sa_handler = compat_ptr(handler); - ret |= get_user(restorer, &act->sa_restorer); - new_ka.sa.sa_restorer = compat_ptr(restorer); - ret |= get_sigset_t(&new_ka.sa.sa_mask, &act->sa_mask); - ret |= __get_user(new_ka.sa.sa_flags, &act->sa_flags); - if (ret) - return -EFAULT; - } - - ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); - if (!ret && oact) { - ret = put_user(ptr_to_compat(old_ka.sa.sa_handler), &oact->sa_handler); - ret |= put_sigset_t(&oact->sa_mask, &old_ka.sa.sa_mask); - ret |= __put_user(old_ka.sa.sa_flags, &oact->sa_flags); - } - return ret; -} - -int compat_do_sigaltstack(compat_uptr_t compat_uss, compat_uptr_t compat_uoss, - compat_ulong_t sp) -{ - compat_stack_t __user *newstack = compat_ptr(compat_uss); - compat_stack_t __user *oldstack = compat_ptr(compat_uoss); - compat_uptr_t ss_sp; - int ret; - mm_segment_t old_fs; - stack_t uss, uoss; - - /* Marshall the compat new stack into a stack_t */ - if (newstack) { - if (get_user(ss_sp, &newstack->ss_sp) || - __get_user(uss.ss_flags, &newstack->ss_flags) || - __get_user(uss.ss_size, &newstack->ss_size)) - return -EFAULT; - uss.ss_sp = compat_ptr(ss_sp); - } - - old_fs = get_fs(); - set_fs(KERNEL_DS); - /* The __user pointer casts are valid because of the set_fs() */ - ret = do_sigaltstack( - newstack ? (stack_t __user *) &uss : NULL, - oldstack ? (stack_t __user *) &uoss : NULL, - (unsigned long)sp); - set_fs(old_fs); - - /* Convert the old stack_t into a compat stack. */ - if (!ret && oldstack && - (put_user(ptr_to_compat(uoss.ss_sp), &oldstack->ss_sp) || - __put_user(uoss.ss_flags, &oldstack->ss_flags) || - __put_user(uoss.ss_size, &oldstack->ss_size))) - return -EFAULT; - return ret; -} - static int compat_restore_sigframe(struct pt_regs *regs, struct compat_sigframe __user *sf) { @@ -562,9 +393,7 @@ asmlinkage int compat_sys_rt_sigreturn(struct pt_regs *regs) if (compat_restore_sigframe(regs, &frame->sig)) goto badframe; - if (compat_do_sigaltstack(ptr_to_compat(&frame->sig.uc.uc_stack), - ptr_to_compat((void __user *)NULL), - regs->compat_sp) == -EFAULT) + if (compat_restore_altstack(&frame->sig.uc.uc_stack)) goto badframe; return regs->regs[0]; @@ -617,12 +446,13 @@ static void compat_setup_return(struct pt_regs *regs, struct k_sigaction *ka, /* Check if the handler is written for ARM or Thumb */ thumb = handler & 1; - if (thumb) { + if (thumb) spsr |= COMPAT_PSR_T_BIT; - spsr &= ~COMPAT_PSR_IT_MASK; - } else { + else spsr &= ~COMPAT_PSR_T_BIT; - } + + /* The IT state must be cleared for both ARM and Thumb-2 */ + spsr &= ~COMPAT_PSR_IT_MASK; if (ka->sa.sa_flags & SA_RESTORER) { retcode = ptr_to_compat(ka->sa.sa_restorer); @@ -670,7 +500,9 @@ static int compat_setup_sigframe(struct compat_sigframe __user *sf, __put_user_error(regs->pstate, &sf->uc.uc_mcontext.arm_cpsr, err); __put_user_error((compat_ulong_t)0, &sf->uc.uc_mcontext.trap_no, err); - __put_user_error((compat_ulong_t)0, &sf->uc.uc_mcontext.error_code, err); + /* set the compat FSR WnR */ + __put_user_error(!!(current->thread.fault_code & ESR_EL1_WRITE) << + FSR_WRITE_SHIFT, &sf->uc.uc_mcontext.error_code, err); __put_user_error(current->thread.fault_address, &sf->uc.uc_mcontext.fault_address, err); __put_user_error(set->sig[0], &sf->uc.uc_mcontext.oldmask, err); @@ -692,7 +524,6 @@ int compat_setup_rt_frame(int usig, struct k_sigaction *ka, siginfo_t *info, sigset_t *set, struct pt_regs *regs) { struct compat_rt_sigframe __user *frame; - compat_stack_t stack; int err = 0; frame = compat_get_sigframe(ka, regs, sizeof(*frame)); @@ -703,13 +534,9 @@ int compat_setup_rt_frame(int usig, struct k_sigaction *ka, siginfo_t *info, err |= copy_siginfo_to_user32(&frame->info, info); __put_user_error(0, &frame->sig.uc.uc_flags, err); - __put_user_error(NULL, &frame->sig.uc.uc_link, err); + __put_user_error(0, &frame->sig.uc.uc_link, err); - memset(&stack, 0, sizeof(stack)); - stack.ss_sp = (compat_uptr_t)current->sas_ss_sp; - stack.ss_flags = sas_ss_flags(regs->compat_sp); - stack.ss_size = current->sas_ss_size; - err |= __copy_to_user(&frame->sig.uc.uc_stack, &stack, sizeof(stack)); + err |= __compat_save_altstack(&frame->sig.uc.uc_stack, regs->compat_sp); err |= compat_setup_sigframe(&frame->sig, regs, set); @@ -742,75 +569,6 @@ int compat_setup_frame(int usig, struct k_sigaction *ka, sigset_t *set, return err; } -/* - * RT signals don't have generic compat wrappers. - * See arch/powerpc/kernel/signal_32.c - */ -asmlinkage int compat_sys_rt_sigprocmask(int how, compat_sigset_t __user *set, - compat_sigset_t __user *oset, - compat_size_t sigsetsize) -{ - sigset_t s; - sigset_t __user *up; - int ret; - mm_segment_t old_fs = get_fs(); - - if (set) { - if (get_sigset_t(&s, set)) - return -EFAULT; - } - - set_fs(KERNEL_DS); - /* This is valid because of the set_fs() */ - up = (sigset_t __user *) &s; - ret = sys_rt_sigprocmask(how, set ? up : NULL, oset ? up : NULL, - sigsetsize); - set_fs(old_fs); - if (ret) - return ret; - if (oset) { - if (put_sigset_t(oset, &s)) - return -EFAULT; - } - return 0; -} - -asmlinkage int compat_sys_rt_sigpending(compat_sigset_t __user *set, - compat_size_t sigsetsize) -{ - sigset_t s; - int ret; - mm_segment_t old_fs = get_fs(); - - set_fs(KERNEL_DS); - /* The __user pointer cast is valid because of the set_fs() */ - ret = sys_rt_sigpending((sigset_t __user *) &s, sigsetsize); - set_fs(old_fs); - if (!ret) { - if (put_sigset_t(set, &s)) - return -EFAULT; - } - return ret; -} - -asmlinkage int compat_sys_rt_sigqueueinfo(int pid, int sig, - compat_siginfo_t __user *uinfo) -{ - siginfo_t info; - int ret; - mm_segment_t old_fs = get_fs(); - - ret = copy_siginfo_from_user32(&info, uinfo); - if (unlikely(ret)) - return ret; - - set_fs (KERNEL_DS); - /* The __user pointer cast is valid because of the set_fs() */ - ret = sys_rt_sigqueueinfo(pid, sig, (siginfo_t __user *) &info); - set_fs (old_fs); - return ret; -} - void compat_setup_restart_syscall(struct pt_regs *regs) { regs->regs[7] = __NR_compat_restart_syscall; diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S new file mode 100644 index 00000000000..b1925729c69 --- /dev/null +++ b/arch/arm64/kernel/sleep.S @@ -0,0 +1,184 @@ +#include <linux/errno.h> +#include <linux/linkage.h> +#include <asm/asm-offsets.h> +#include <asm/assembler.h> + + .text +/* + * Implementation of MPIDR_EL1 hash algorithm through shifting + * and OR'ing. + * + * @dst: register containing hash result + * @rs0: register containing affinity level 0 bit shift + * @rs1: register containing affinity level 1 bit shift + * @rs2: register containing affinity level 2 bit shift + * @rs3: register containing affinity level 3 bit shift + * @mpidr: register containing MPIDR_EL1 value + * @mask: register containing MPIDR mask + * + * Pseudo C-code: + * + *u32 dst; + * + *compute_mpidr_hash(u32 rs0, u32 rs1, u32 rs2, u32 rs3, u64 mpidr, u64 mask) { + * u32 aff0, aff1, aff2, aff3; + * u64 mpidr_masked = mpidr & mask; + * aff0 = mpidr_masked & 0xff; + * aff1 = mpidr_masked & 0xff00; + * aff2 = mpidr_masked & 0xff0000; + * aff2 = mpidr_masked & 0xff00000000; + * dst = (aff0 >> rs0 | aff1 >> rs1 | aff2 >> rs2 | aff3 >> rs3); + *} + * Input registers: rs0, rs1, rs2, rs3, mpidr, mask + * Output register: dst + * Note: input and output registers must be disjoint register sets + (eg: a macro instance with mpidr = x1 and dst = x1 is invalid) + */ + .macro compute_mpidr_hash dst, rs0, rs1, rs2, rs3, mpidr, mask + and \mpidr, \mpidr, \mask // mask out MPIDR bits + and \dst, \mpidr, #0xff // mask=aff0 + lsr \dst ,\dst, \rs0 // dst=aff0>>rs0 + and \mask, \mpidr, #0xff00 // mask = aff1 + lsr \mask ,\mask, \rs1 + orr \dst, \dst, \mask // dst|=(aff1>>rs1) + and \mask, \mpidr, #0xff0000 // mask = aff2 + lsr \mask ,\mask, \rs2 + orr \dst, \dst, \mask // dst|=(aff2>>rs2) + and \mask, \mpidr, #0xff00000000 // mask = aff3 + lsr \mask ,\mask, \rs3 + orr \dst, \dst, \mask // dst|=(aff3>>rs3) + .endm +/* + * Save CPU state for a suspend. This saves callee registers, and allocates + * space on the kernel stack to save the CPU specific registers + some + * other data for resume. + * + * x0 = suspend finisher argument + */ +ENTRY(__cpu_suspend) + stp x29, lr, [sp, #-96]! + stp x19, x20, [sp,#16] + stp x21, x22, [sp,#32] + stp x23, x24, [sp,#48] + stp x25, x26, [sp,#64] + stp x27, x28, [sp,#80] + mov x2, sp + sub sp, sp, #CPU_SUSPEND_SZ // allocate cpu_suspend_ctx + mov x1, sp + /* + * x1 now points to struct cpu_suspend_ctx allocated on the stack + */ + str x2, [x1, #CPU_CTX_SP] + ldr x2, =sleep_save_sp + ldr x2, [x2, #SLEEP_SAVE_SP_VIRT] +#ifdef CONFIG_SMP + mrs x7, mpidr_el1 + ldr x9, =mpidr_hash + ldr x10, [x9, #MPIDR_HASH_MASK] + /* + * Following code relies on the struct mpidr_hash + * members size. + */ + ldp w3, w4, [x9, #MPIDR_HASH_SHIFTS] + ldp w5, w6, [x9, #(MPIDR_HASH_SHIFTS + 8)] + compute_mpidr_hash x8, x3, x4, x5, x6, x7, x10 + add x2, x2, x8, lsl #3 +#endif + bl __cpu_suspend_finisher + /* + * Never gets here, unless suspend fails. + * Successful cpu_suspend should return from cpu_resume, returning + * through this code path is considered an error + * If the return value is set to 0 force x0 = -EOPNOTSUPP + * to make sure a proper error condition is propagated + */ + cmp x0, #0 + mov x3, #-EOPNOTSUPP + csel x0, x3, x0, eq + add sp, sp, #CPU_SUSPEND_SZ // rewind stack pointer + ldp x19, x20, [sp, #16] + ldp x21, x22, [sp, #32] + ldp x23, x24, [sp, #48] + ldp x25, x26, [sp, #64] + ldp x27, x28, [sp, #80] + ldp x29, lr, [sp], #96 + ret +ENDPROC(__cpu_suspend) + .ltorg + +/* + * x0 must contain the sctlr value retrieved from restored context + */ +ENTRY(cpu_resume_mmu) + ldr x3, =cpu_resume_after_mmu + msr sctlr_el1, x0 // restore sctlr_el1 + isb + br x3 // global jump to virtual address +ENDPROC(cpu_resume_mmu) +cpu_resume_after_mmu: + mov x0, #0 // return zero on success + ldp x19, x20, [sp, #16] + ldp x21, x22, [sp, #32] + ldp x23, x24, [sp, #48] + ldp x25, x26, [sp, #64] + ldp x27, x28, [sp, #80] + ldp x29, lr, [sp], #96 + ret +ENDPROC(cpu_resume_after_mmu) + + .data +ENTRY(cpu_resume) + bl el2_setup // if in EL2 drop to EL1 cleanly +#ifdef CONFIG_SMP + mrs x1, mpidr_el1 + adr x4, mpidr_hash_ptr + ldr x5, [x4] + add x8, x4, x5 // x8 = struct mpidr_hash phys address + /* retrieve mpidr_hash members to compute the hash */ + ldr x2, [x8, #MPIDR_HASH_MASK] + ldp w3, w4, [x8, #MPIDR_HASH_SHIFTS] + ldp w5, w6, [x8, #(MPIDR_HASH_SHIFTS + 8)] + compute_mpidr_hash x7, x3, x4, x5, x6, x1, x2 + /* x7 contains hash index, let's use it to grab context pointer */ +#else + mov x7, xzr +#endif + adr x0, sleep_save_sp + ldr x0, [x0, #SLEEP_SAVE_SP_PHYS] + ldr x0, [x0, x7, lsl #3] + /* load sp from context */ + ldr x2, [x0, #CPU_CTX_SP] + adr x1, sleep_idmap_phys + /* load physical address of identity map page table in x1 */ + ldr x1, [x1] + mov sp, x2 + /* + * cpu_do_resume expects x0 to contain context physical address + * pointer and x1 to contain physical address of 1:1 page tables + */ + bl cpu_do_resume // PC relative jump, MMU off + b cpu_resume_mmu // Resume MMU, never returns +ENDPROC(cpu_resume) + + .align 3 +mpidr_hash_ptr: + /* + * offset of mpidr_hash symbol from current location + * used to obtain run-time mpidr_hash address with MMU off + */ + .quad mpidr_hash - . +/* + * physical address of identity mapped page tables + */ + .type sleep_idmap_phys, #object +ENTRY(sleep_idmap_phys) + .quad 0 +/* + * struct sleep_save_sp { + * phys_addr_t *save_ptr_stash; + * phys_addr_t save_ptr_stash_phys; + * }; + */ + .type sleep_save_sp, #object +ENTRY(sleep_save_sp) + .space SLEEP_SAVE_SP_SZ // struct sleep_save_sp diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 538300f2273..40f38f46c8e 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -35,14 +35,17 @@ #include <linux/clockchips.h> #include <linux/completion.h> #include <linux/of.h> +#include <linux/irq_work.h> #include <asm/atomic.h> #include <asm/cacheflush.h> #include <asm/cputype.h> +#include <asm/cpu_ops.h> #include <asm/mmu_context.h> #include <asm/pgtable.h> #include <asm/pgalloc.h> #include <asm/processor.h> +#include <asm/smp_plat.h> #include <asm/sections.h> #include <asm/tlbflush.h> #include <asm/ptrace.h> @@ -53,75 +56,31 @@ * where to place its SVC stack */ struct secondary_data secondary_data; -volatile unsigned long secondary_holding_pen_release = -1; enum ipi_msg_type { IPI_RESCHEDULE, IPI_CALL_FUNC, IPI_CALL_FUNC_SINGLE, IPI_CPU_STOP, + IPI_TIMER, + IPI_IRQ_WORK, }; -static DEFINE_RAW_SPINLOCK(boot_lock); - -/* - * Write secondary_holding_pen_release in a way that is guaranteed to be - * visible to all observers, irrespective of whether they're taking part - * in coherency or not. This is necessary for the hotplug code to work - * reliably. - */ -static void __cpuinit write_pen_release(int val) -{ - void *start = (void *)&secondary_holding_pen_release; - unsigned long size = sizeof(secondary_holding_pen_release); - - secondary_holding_pen_release = val; - __flush_dcache_area(start, size); -} - /* * Boot a secondary CPU, and assign it the specified idle task. * This also gives us the initial stack to use for this CPU. */ -static int __cpuinit boot_secondary(unsigned int cpu, struct task_struct *idle) +static int boot_secondary(unsigned int cpu, struct task_struct *idle) { - unsigned long timeout; + if (cpu_ops[cpu]->cpu_boot) + return cpu_ops[cpu]->cpu_boot(cpu); - /* - * Set synchronisation state between this boot processor - * and the secondary one - */ - raw_spin_lock(&boot_lock); - - /* - * Update the pen release flag. - */ - write_pen_release(cpu); - - /* - * Send an event, causing the secondaries to read pen_release. - */ - sev(); - - timeout = jiffies + (1 * HZ); - while (time_before(jiffies, timeout)) { - if (secondary_holding_pen_release == -1UL) - break; - udelay(10); - } - - /* - * Now the secondary core is starting up let it run its - * calibrations, then wait for it to finish - */ - raw_spin_unlock(&boot_lock); - - return secondary_holding_pen_release != -1 ? -ENOSYS : 0; + return -EOPNOTSUPP; } static DECLARE_COMPLETION(cpu_running); -int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *idle) +int __cpu_up(unsigned int cpu, struct task_struct *idle) { int ret; @@ -157,17 +116,20 @@ int __cpuinit __cpu_up(unsigned int cpu, struct task_struct *idle) return ret; } +static void smp_store_cpu_info(unsigned int cpuid) +{ + store_cpu_topology(cpuid); +} + /* * This is the secondary CPU boot entry. We're using this CPUs * idle thread stack, but a set of temporary page tables. */ -asmlinkage void __cpuinit secondary_start_kernel(void) +asmlinkage void secondary_start_kernel(void) { struct mm_struct *mm = &init_mm; unsigned int cpu = smp_processor_id(); - printk("CPU%u: Booted secondary processor\n", cpu); - /* * All kernel threads share the same mm context; grab a * reference and switch to it. @@ -176,6 +138,9 @@ asmlinkage void __cpuinit secondary_start_kernel(void) current->active_mm = mm; cpumask_set_cpu(cpu, mm_cpumask(mm)); + set_my_cpu_offset(per_cpu_offset(smp_processor_id())); + printk("CPU%u: Booted secondary processor\n", cpu); + /* * TTBR0 is only used for the identity mapping at this stage. Make it * point to zero page to avoid speculatively fetching new entries. @@ -186,24 +151,15 @@ asmlinkage void __cpuinit secondary_start_kernel(void) preempt_disable(); trace_hardirqs_off(); - /* - * Let the primary processor know we're out of the - * pen, then head off into the C entry point - */ - write_pen_release(-1); + if (cpu_ops[cpu]->cpu_postboot) + cpu_ops[cpu]->cpu_postboot(); /* - * Synchronise with the boot thread. - */ - raw_spin_lock(&boot_lock); - raw_spin_unlock(&boot_lock); - - /* - * Enable local interrupts. + * Enable GIC and timers. */ notify_cpu_starting(cpu); - local_irq_enable(); - local_fiq_enable(); + + smp_store_cpu_info(cpu); /* * OK, now it's safe to let the boot CPU continue. Wait for @@ -213,62 +169,231 @@ asmlinkage void __cpuinit secondary_start_kernel(void) set_cpu_online(cpu, true); complete(&cpu_running); + local_dbg_enable(); + local_irq_enable(); + local_async_enable(); + /* * OK, it's off to the idle thread for us */ - cpu_idle(); + cpu_startup_entry(CPUHP_ONLINE); } -void __init smp_cpus_done(unsigned int max_cpus) +#ifdef CONFIG_HOTPLUG_CPU +static int op_cpu_disable(unsigned int cpu) +{ + /* + * If we don't have a cpu_die method, abort before we reach the point + * of no return. CPU0 may not have an cpu_ops, so test for it. + */ + if (!cpu_ops[cpu] || !cpu_ops[cpu]->cpu_die) + return -EOPNOTSUPP; + + /* + * We may need to abort a hot unplug for some other mechanism-specific + * reason. + */ + if (cpu_ops[cpu]->cpu_disable) + return cpu_ops[cpu]->cpu_disable(cpu); + + return 0; +} + +/* + * __cpu_disable runs on the processor to be shutdown. + */ +int __cpu_disable(void) +{ + unsigned int cpu = smp_processor_id(); + int ret; + + ret = op_cpu_disable(cpu); + if (ret) + return ret; + + /* + * Take this CPU offline. Once we clear this, we can't return, + * and we must not schedule until we're ready to give up the cpu. + */ + set_cpu_online(cpu, false); + + /* + * OK - migrate IRQs away from this CPU + */ + migrate_irqs(); + + /* + * Remove this CPU from the vm mask set of all processes. + */ + clear_tasks_mm_cpumask(cpu); + + return 0; +} + +static int op_cpu_kill(unsigned int cpu) { - unsigned long bogosum = loops_per_jiffy * num_online_cpus(); + /* + * If we have no means of synchronising with the dying CPU, then assume + * that it is really dead. We can only wait for an arbitrary length of + * time and hope that it's dead, so let's skip the wait and just hope. + */ + if (!cpu_ops[cpu]->cpu_kill) + return 1; - pr_info("SMP: Total of %d processors activated (%lu.%02lu BogoMIPS).\n", - num_online_cpus(), bogosum / (500000/HZ), - (bogosum / (5000/HZ)) % 100); + return cpu_ops[cpu]->cpu_kill(cpu); +} + +static DECLARE_COMPLETION(cpu_died); + +/* + * called on the thread which is asking for a CPU to be shutdown - + * waits until shutdown has completed, or it is timed out. + */ +void __cpu_die(unsigned int cpu) +{ + if (!wait_for_completion_timeout(&cpu_died, msecs_to_jiffies(5000))) { + pr_crit("CPU%u: cpu didn't die\n", cpu); + return; + } + pr_notice("CPU%u: shutdown\n", cpu); + + /* + * Now that the dying CPU is beyond the point of no return w.r.t. + * in-kernel synchronisation, try to get the firwmare to help us to + * verify that it has really left the kernel before we consider + * clobbering anything it might still be using. + */ + if (!op_cpu_kill(cpu)) + pr_warn("CPU%d may not have shut down cleanly\n", cpu); +} + +/* + * Called from the idle thread for the CPU which has been shutdown. + * + * Note that we disable IRQs here, but do not re-enable them + * before returning to the caller. This is also the behaviour + * of the other hotplug-cpu capable cores, so presumably coming + * out of idle fixes this. + */ +void cpu_die(void) +{ + unsigned int cpu = smp_processor_id(); + + idle_task_exit(); + + local_irq_disable(); + + /* Tell __cpu_die() that this CPU is now safe to dispose of */ + complete(&cpu_died); + + /* + * Actually shutdown the CPU. This must never fail. The specific hotplug + * mechanism must perform all required cache maintenance to ensure that + * no dirty lines are lost in the process of shutting down the CPU. + */ + cpu_ops[cpu]->cpu_die(cpu); + + BUG(); +} +#endif + +void __init smp_cpus_done(unsigned int max_cpus) +{ + pr_info("SMP: Total of %d processors activated.\n", num_online_cpus()); } void __init smp_prepare_boot_cpu(void) { + set_my_cpu_offset(per_cpu_offset(smp_processor_id())); } static void (*smp_cross_call)(const struct cpumask *, unsigned int); -static phys_addr_t cpu_release_addr[NR_CPUS]; /* - * Enumerate the possible CPU set from the device tree. + * Enumerate the possible CPU set from the device tree and build the + * cpu logical map array containing MPIDR values related to logical + * cpus. Assumes that cpu_logical_map(0) has already been initialized. */ void __init smp_init_cpus(void) { - const char *enable_method; struct device_node *dn = NULL; - int cpu = 0; + unsigned int i, cpu = 1; + bool bootcpu_valid = false; while ((dn = of_find_node_by_type(dn, "cpu"))) { - if (cpu >= NR_CPUS) - goto next; + const u32 *cell; + u64 hwid; /* - * We currently support only the "spin-table" enable-method. + * A cpu node with missing "reg" property is + * considered invalid to build a cpu_logical_map + * entry. */ - enable_method = of_get_property(dn, "enable-method", NULL); - if (!enable_method || strcmp(enable_method, "spin-table")) { - pr_err("CPU %d: missing or invalid enable-method property: %s\n", - cpu, enable_method); + cell = of_get_property(dn, "reg", NULL); + if (!cell) { + pr_err("%s: missing reg property\n", dn->full_name); goto next; } + hwid = of_read_number(cell, of_n_addr_cells(dn)); /* - * Determine the address from which the CPU is polling. + * Non affinity bits must be set to 0 in the DT */ - if (of_property_read_u64(dn, "cpu-release-addr", - &cpu_release_addr[cpu])) { - pr_err("CPU %d: missing or invalid cpu-release-addr property\n", - cpu); + if (hwid & ~MPIDR_HWID_BITMASK) { + pr_err("%s: invalid reg property\n", dn->full_name); goto next; } - set_cpu_possible(cpu, true); + /* + * Duplicate MPIDRs are a recipe for disaster. Scan + * all initialized entries and check for + * duplicates. If any is found just ignore the cpu. + * cpu_logical_map was initialized to INVALID_HWID to + * avoid matching valid MPIDR values. + */ + for (i = 1; (i < cpu) && (i < NR_CPUS); i++) { + if (cpu_logical_map(i) == hwid) { + pr_err("%s: duplicate cpu reg properties in the DT\n", + dn->full_name); + goto next; + } + } + + /* + * The numbering scheme requires that the boot CPU + * must be assigned logical id 0. Record it so that + * the logical map built from DT is validated and can + * be used. + */ + if (hwid == cpu_logical_map(0)) { + if (bootcpu_valid) { + pr_err("%s: duplicate boot cpu reg property in DT\n", + dn->full_name); + goto next; + } + + bootcpu_valid = true; + + /* + * cpu_logical_map has already been + * initialized and the boot cpu doesn't need + * the enable-method so continue without + * incrementing cpu. + */ + continue; + } + + if (cpu >= NR_CPUS) + goto next; + + if (cpu_read_ops(dn, cpu) != 0) + goto next; + + if (cpu_ops[cpu]->cpu_init(dn, cpu)) + goto next; + + pr_debug("cpu logical map 0x%llx\n", hwid); + cpu_logical_map(cpu) = hwid; next: cpu++; } @@ -277,13 +402,29 @@ next: if (cpu > NR_CPUS) pr_warning("no. of cores (%d) greater than configured maximum of %d - clipping\n", cpu, NR_CPUS); + + if (!bootcpu_valid) { + pr_err("DT missing boot CPU MPIDR, not enabling secondaries\n"); + return; + } + + /* + * All the cpus that made it to the cpu_logical_map have been + * validated so set them as possible cpus. + */ + for (i = 0; i < NR_CPUS; i++) + if (cpu_logical_map(i) != INVALID_HWID) + set_cpu_possible(i, true); } void __init smp_prepare_cpus(unsigned int max_cpus) { - int cpu; - void **release_addr; - unsigned int ncores = num_possible_cpus(); + int err; + unsigned int cpu, ncores = num_possible_cpus(); + + init_cpu_topology(); + + smp_store_cpu_info(smp_processor_id()); /* * are we trying to boot more cores than exist? @@ -291,30 +432,35 @@ void __init smp_prepare_cpus(unsigned int max_cpus) if (max_cpus > ncores) max_cpus = ncores; + /* Don't bother if we're effectively UP */ + if (max_cpus <= 1) + return; + /* * Initialise the present map (which describes the set of CPUs * actually populated at the present time) and release the * secondaries from the bootloader. + * + * Make sure we online at most (max_cpus - 1) additional CPUs. */ + max_cpus--; for_each_possible_cpu(cpu) { if (max_cpus == 0) break; - if (!cpu_release_addr[cpu]) + if (cpu == smp_processor_id()) + continue; + + if (!cpu_ops[cpu]) continue; - release_addr = __va(cpu_release_addr[cpu]); - release_addr[0] = (void *)__pa(secondary_holding_pen); - __flush_dcache_area(release_addr, sizeof(release_addr[0])); + err = cpu_ops[cpu]->cpu_prepare(cpu); + if (err) + continue; set_cpu_present(cpu, true); max_cpus--; } - - /* - * Send an event to wake up the secondaries. - */ - sev(); } @@ -333,12 +479,22 @@ void arch_send_call_function_single_ipi(int cpu) smp_cross_call(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE); } +#ifdef CONFIG_IRQ_WORK +void arch_irq_work_raise(void) +{ + if (smp_cross_call) + smp_cross_call(cpumask_of(smp_processor_id()), IPI_IRQ_WORK); +} +#endif + static const char *ipi_types[NR_IPI] = { #define S(x,s) [x - IPI_RESCHEDULE] = s S(IPI_RESCHEDULE, "Rescheduling interrupts"), S(IPI_CALL_FUNC, "Function call interrupts"), S(IPI_CALL_FUNC_SINGLE, "Single function call interrupts"), S(IPI_CPU_STOP, "CPU stop interrupts"), + S(IPI_TIMER, "Timer broadcast interrupts"), + S(IPI_IRQ_WORK, "IRQ work interrupts"), }; void show_ipi_list(struct seq_file *p, int prec) @@ -348,7 +504,7 @@ void show_ipi_list(struct seq_file *p, int prec) for (i = 0; i < NR_IPI; i++) { seq_printf(p, "%*s%u:%s", prec - 1, "IPI", i + IPI_RESCHEDULE, prec >= 4 ? " " : ""); - for_each_present_cpu(cpu) + for_each_online_cpu(cpu) seq_printf(p, "%10u ", __get_irq_stat(cpu, ipi_irqs[i])); seq_printf(p, " %s\n", ipi_types[i]); @@ -383,7 +539,6 @@ static void ipi_cpu_stop(unsigned int cpu) set_cpu_online(cpu, false); - local_fiq_disable(); local_irq_disable(); while (1) @@ -424,6 +579,22 @@ void handle_IPI(int ipinr, struct pt_regs *regs) irq_exit(); break; +#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST + case IPI_TIMER: + irq_enter(); + tick_receive_broadcast(); + irq_exit(); + break; +#endif + +#ifdef CONFIG_IRQ_WORK + case IPI_IRQ_WORK: + irq_enter(); + irq_work_run(); + irq_exit(); + break; +#endif + default: pr_crit("CPU%u: Unknown IPI message 0x%x\n", cpu, ipinr); break; @@ -436,6 +607,13 @@ void smp_send_reschedule(int cpu) smp_cross_call(cpumask_of(cpu), IPI_RESCHEDULE); } +#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST +void tick_broadcast(const struct cpumask *mask) +{ + smp_cross_call(mask, IPI_TIMER); +} +#endif + void smp_send_stop(void) { unsigned long timeout; diff --git a/arch/arm64/kernel/smp_spin_table.c b/arch/arm64/kernel/smp_spin_table.c new file mode 100644 index 00000000000..0347d38eea2 --- /dev/null +++ b/arch/arm64/kernel/smp_spin_table.c @@ -0,0 +1,114 @@ +/* + * Spin Table SMP initialisation + * + * Copyright (C) 2013 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <linux/delay.h> +#include <linux/init.h> +#include <linux/of.h> +#include <linux/smp.h> + +#include <asm/cacheflush.h> +#include <asm/cpu_ops.h> +#include <asm/cputype.h> +#include <asm/smp_plat.h> + +extern void secondary_holding_pen(void); +volatile unsigned long secondary_holding_pen_release = INVALID_HWID; + +static phys_addr_t cpu_release_addr[NR_CPUS]; + +/* + * Write secondary_holding_pen_release in a way that is guaranteed to be + * visible to all observers, irrespective of whether they're taking part + * in coherency or not. This is necessary for the hotplug code to work + * reliably. + */ +static void write_pen_release(u64 val) +{ + void *start = (void *)&secondary_holding_pen_release; + unsigned long size = sizeof(secondary_holding_pen_release); + + secondary_holding_pen_release = val; + __flush_dcache_area(start, size); +} + + +static int smp_spin_table_cpu_init(struct device_node *dn, unsigned int cpu) +{ + /* + * Determine the address from which the CPU is polling. + */ + if (of_property_read_u64(dn, "cpu-release-addr", + &cpu_release_addr[cpu])) { + pr_err("CPU %d: missing or invalid cpu-release-addr property\n", + cpu); + + return -1; + } + + return 0; +} + +static int smp_spin_table_cpu_prepare(unsigned int cpu) +{ + void **release_addr; + + if (!cpu_release_addr[cpu]) + return -ENODEV; + + release_addr = __va(cpu_release_addr[cpu]); + + /* + * We write the release address as LE regardless of the native + * endianess of the kernel. Therefore, any boot-loaders that + * read this address need to convert this address to the + * boot-loader's endianess before jumping. This is mandated by + * the boot protocol. + */ + release_addr[0] = (void *) cpu_to_le64(__pa(secondary_holding_pen)); + + __flush_dcache_area(release_addr, sizeof(release_addr[0])); + + /* + * Send an event to wake up the secondary CPU. + */ + sev(); + + return 0; +} + +static int smp_spin_table_cpu_boot(unsigned int cpu) +{ + /* + * Update the pen release flag. + */ + write_pen_release(cpu_logical_map(cpu)); + + /* + * Send an event, causing the secondaries to read pen_release. + */ + sev(); + + return 0; +} + +const struct cpu_operations smp_spin_table_ops = { + .name = "spin-table", + .cpu_init = smp_spin_table_cpu_init, + .cpu_prepare = smp_spin_table_cpu_prepare, + .cpu_boot = smp_spin_table_cpu_boot, +}; diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index d25459ff57f..55437ba1f5a 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -35,7 +35,7 @@ * ldp x29, x30, [sp] * add sp, sp, #0x10 */ -int unwind_frame(struct stackframe *frame) +int notrace unwind_frame(struct stackframe *frame) { unsigned long high, low; unsigned long fp = frame->fp; @@ -43,12 +43,16 @@ int unwind_frame(struct stackframe *frame) low = frame->sp; high = ALIGN(low, THREAD_SIZE); - if (fp < low || fp > high || fp & 0xf) + if (fp < low || fp > high - 0x18 || fp & 0xf) return -EINVAL; frame->sp = fp + 0x10; frame->fp = *(unsigned long *)(fp); - frame->pc = *(unsigned long *)(fp + 8); + /* + * -4 here because we care about the PC at time of bl, + * not where the return will go. + */ + frame->pc = *(unsigned long *)(fp + 8) - 4; return 0; } diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c new file mode 100644 index 00000000000..1fa9ce4afd8 --- /dev/null +++ b/arch/arm64/kernel/suspend.c @@ -0,0 +1,140 @@ +#include <linux/percpu.h> +#include <linux/slab.h> +#include <asm/cacheflush.h> +#include <asm/cpu_ops.h> +#include <asm/debug-monitors.h> +#include <asm/pgtable.h> +#include <asm/memory.h> +#include <asm/smp_plat.h> +#include <asm/suspend.h> +#include <asm/tlbflush.h> + +extern int __cpu_suspend(unsigned long); +/* + * This is called by __cpu_suspend() to save the state, and do whatever + * flushing is required to ensure that when the CPU goes to sleep we have + * the necessary data available when the caches are not searched. + * + * @arg: Argument to pass to suspend operations + * @ptr: CPU context virtual address + * @save_ptr: address of the location where the context physical address + * must be saved + */ +int __cpu_suspend_finisher(unsigned long arg, struct cpu_suspend_ctx *ptr, + phys_addr_t *save_ptr) +{ + int cpu = smp_processor_id(); + + *save_ptr = virt_to_phys(ptr); + + cpu_do_suspend(ptr); + /* + * Only flush the context that must be retrieved with the MMU + * off. VA primitives ensure the flush is applied to all + * cache levels so context is pushed to DRAM. + */ + __flush_dcache_area(ptr, sizeof(*ptr)); + __flush_dcache_area(save_ptr, sizeof(*save_ptr)); + + return cpu_ops[cpu]->cpu_suspend(arg); +} + +/* + * This hook is provided so that cpu_suspend code can restore HW + * breakpoints as early as possible in the resume path, before reenabling + * debug exceptions. Code cannot be run from a CPU PM notifier since by the + * time the notifier runs debug exceptions might have been enabled already, + * with HW breakpoints registers content still in an unknown state. + */ +void (*hw_breakpoint_restore)(void *); +void __init cpu_suspend_set_dbg_restorer(void (*hw_bp_restore)(void *)) +{ + /* Prevent multiple restore hook initializations */ + if (WARN_ON(hw_breakpoint_restore)) + return; + hw_breakpoint_restore = hw_bp_restore; +} + +/** + * cpu_suspend + * + * @arg: argument to pass to the finisher function + */ +int cpu_suspend(unsigned long arg) +{ + struct mm_struct *mm = current->active_mm; + int ret, cpu = smp_processor_id(); + unsigned long flags; + + /* + * If cpu_ops have not been registered or suspend + * has not been initialized, cpu_suspend call fails early. + */ + if (!cpu_ops[cpu] || !cpu_ops[cpu]->cpu_suspend) + return -EOPNOTSUPP; + + /* + * From this point debug exceptions are disabled to prevent + * updates to mdscr register (saved and restored along with + * general purpose registers) from kernel debuggers. + */ + local_dbg_save(flags); + + /* + * mm context saved on the stack, it will be restored when + * the cpu comes out of reset through the identity mapped + * page tables, so that the thread address space is properly + * set-up on function return. + */ + ret = __cpu_suspend(arg); + if (ret == 0) { + cpu_switch_mm(mm->pgd, mm); + flush_tlb_all(); + + /* + * Restore per-cpu offset before any kernel + * subsystem relying on it has a chance to run. + */ + set_my_cpu_offset(per_cpu_offset(cpu)); + + /* + * Restore HW breakpoint registers to sane values + * before debug exceptions are possibly reenabled + * through local_dbg_restore. + */ + if (hw_breakpoint_restore) + hw_breakpoint_restore(NULL); + } + + /* + * Restore pstate flags. OS lock and mdscr have been already + * restored, so from this point onwards, debugging is fully + * renabled if it was enabled when core started shutdown. + */ + local_dbg_restore(flags); + + return ret; +} + +extern struct sleep_save_sp sleep_save_sp; +extern phys_addr_t sleep_idmap_phys; + +static int cpu_suspend_init(void) +{ + void *ctx_ptr; + + /* ctx_ptr is an array of physical addresses */ + ctx_ptr = kcalloc(mpidr_hash_size(), sizeof(phys_addr_t), GFP_KERNEL); + + if (WARN_ON(!ctx_ptr)) + return -ENOMEM; + + sleep_save_sp.save_ptr_stash = ctx_ptr; + sleep_save_sp.save_ptr_stash_phys = virt_to_phys(ctx_ptr); + sleep_idmap_phys = virt_to_phys(idmap_pg_dir); + __flush_dcache_area(&sleep_save_sp, sizeof(struct sleep_save_sp)); + __flush_dcache_area(&sleep_idmap_phys, sizeof(sleep_idmap_phys)); + + return 0; +} +early_initcall(cpu_suspend_init); diff --git a/arch/arm64/kernel/sys.c b/arch/arm64/kernel/sys.c index 8292a9b090f..3fa98ff14f0 100644 --- a/arch/arm64/kernel/sys.c +++ b/arch/arm64/kernel/sys.c @@ -40,7 +40,6 @@ asmlinkage long sys_mmap(unsigned long addr, unsigned long len, * Wrappers to pass the pt_regs argument. */ #define sys_rt_sigreturn sys_rt_sigreturn_wrapper -#define sys_sigaltstack sys_sigaltstack_wrapper #include <asm/syscalls.h> diff --git a/arch/arm64/kernel/sys32.S b/arch/arm64/kernel/sys32.S index 7ef59e9245e..423a5b3fc2b 100644 --- a/arch/arm64/kernel/sys32.S +++ b/arch/arm64/kernel/sys32.S @@ -39,11 +39,6 @@ compat_sys_rt_sigreturn_wrapper: b compat_sys_rt_sigreturn ENDPROC(compat_sys_rt_sigreturn_wrapper) -compat_sys_sigaltstack_wrapper: - ldr x2, [sp, #S_COMPAT_SP] - b compat_do_sigaltstack -ENDPROC(compat_sys_sigaltstack_wrapper) - compat_sys_statfs64_wrapper: mov w3, #84 cmp w1, #88 @@ -63,71 +58,52 @@ ENDPROC(compat_sys_fstatfs64_wrapper) * in registers or that take 32-bit parameters which require sign * extension. */ -compat_sys_lseek_wrapper: - sxtw x1, w1 - b sys_lseek -ENDPROC(compat_sys_lseek_wrapper) - compat_sys_pread64_wrapper: - orr x3, x4, x5, lsl #32 + regs_to_64 x3, x4, x5 b sys_pread64 ENDPROC(compat_sys_pread64_wrapper) compat_sys_pwrite64_wrapper: - orr x3, x4, x5, lsl #32 + regs_to_64 x3, x4, x5 b sys_pwrite64 ENDPROC(compat_sys_pwrite64_wrapper) compat_sys_truncate64_wrapper: - orr x1, x2, x3, lsl #32 + regs_to_64 x1, x2, x3 b sys_truncate ENDPROC(compat_sys_truncate64_wrapper) compat_sys_ftruncate64_wrapper: - orr x1, x2, x3, lsl #32 + regs_to_64 x1, x2, x3 b sys_ftruncate ENDPROC(compat_sys_ftruncate64_wrapper) compat_sys_readahead_wrapper: - orr x1, x2, x3, lsl #32 + regs_to_64 x1, x2, x3 mov w2, w4 b sys_readahead ENDPROC(compat_sys_readahead_wrapper) -compat_sys_lookup_dcookie: - orr x0, x0, x1, lsl #32 - mov w1, w2 - mov w2, w3 - b sys_lookup_dcookie -ENDPROC(compat_sys_lookup_dcookie) - compat_sys_fadvise64_64_wrapper: mov w6, w1 - orr x1, x2, x3, lsl #32 - orr x2, x4, x5, lsl #32 + regs_to_64 x1, x2, x3 + regs_to_64 x2, x4, x5 mov w3, w6 b sys_fadvise64_64 ENDPROC(compat_sys_fadvise64_64_wrapper) compat_sys_sync_file_range2_wrapper: - orr x2, x2, x3, lsl #32 - orr x3, x4, x5, lsl #32 + regs_to_64 x2, x2, x3 + regs_to_64 x3, x4, x5 b sys_sync_file_range2 ENDPROC(compat_sys_sync_file_range2_wrapper) compat_sys_fallocate_wrapper: - orr x2, x2, x3, lsl #32 - orr x3, x4, x5, lsl #32 + regs_to_64 x2, x2, x3 + regs_to_64 x3, x4, x5 b sys_fallocate ENDPROC(compat_sys_fallocate_wrapper) -compat_sys_fanotify_mark_wrapper: - orr x2, x2, x3, lsl #32 - mov w3, w4 - mov w4, w5 - b sys_fanotify_mark -ENDPROC(compat_sys_fanotify_mark_wrapper) - #undef __SYSCALL #define __SYSCALL(x, y) .quad y // x diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c index 3b4b7258f49..1a7125c3099 100644 --- a/arch/arm64/kernel/time.c +++ b/arch/arm64/kernel/time.c @@ -18,6 +18,7 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ +#include <linux/clockchips.h> #include <linux/export.h> #include <linux/kernel.h> #include <linux/interrupt.h> @@ -31,8 +32,11 @@ #include <linux/syscore_ops.h> #include <linux/timer.h> #include <linux/irq.h> +#include <linux/delay.h> +#include <linux/clocksource.h> +#include <linux/clk-provider.h> -#include <clocksource/arm_generic.h> +#include <clocksource/arm_arch_timer.h> #include <asm/thread_info.h> #include <asm/stacktrace.h> @@ -61,5 +65,17 @@ EXPORT_SYMBOL(profile_pc); void __init time_init(void) { - arm_generic_timer_init(); + u32 arch_timer_rate; + + of_clk_init(NULL); + clocksource_of_init(); + + tick_setup_hrtimer_broadcast(); + + arch_timer_rate = arch_timer_get_rate(); + if (!arch_timer_rate) + panic("Unable to initialise architected timer.\n"); + + /* Calibrate the delay loop directly */ + lpj_fine = arch_timer_rate / HZ; } diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c new file mode 100644 index 00000000000..43514f90591 --- /dev/null +++ b/arch/arm64/kernel/topology.c @@ -0,0 +1,283 @@ +/* + * arch/arm64/kernel/topology.c + * + * Copyright (C) 2011,2013,2014 Linaro Limited. + * + * Based on the arm32 version written by Vincent Guittot in turn based on + * arch/sh/kernel/topology.c + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ + +#include <linux/cpu.h> +#include <linux/cpumask.h> +#include <linux/init.h> +#include <linux/percpu.h> +#include <linux/node.h> +#include <linux/nodemask.h> +#include <linux/of.h> +#include <linux/sched.h> + +#include <asm/topology.h> + +static int __init get_cpu_for_node(struct device_node *node) +{ + struct device_node *cpu_node; + int cpu; + + cpu_node = of_parse_phandle(node, "cpu", 0); + if (!cpu_node) + return -1; + + for_each_possible_cpu(cpu) { + if (of_get_cpu_node(cpu, NULL) == cpu_node) { + of_node_put(cpu_node); + return cpu; + } + } + + pr_crit("Unable to find CPU node for %s\n", cpu_node->full_name); + + of_node_put(cpu_node); + return -1; +} + +static int __init parse_core(struct device_node *core, int cluster_id, + int core_id) +{ + char name[10]; + bool leaf = true; + int i = 0; + int cpu; + struct device_node *t; + + do { + snprintf(name, sizeof(name), "thread%d", i); + t = of_get_child_by_name(core, name); + if (t) { + leaf = false; + cpu = get_cpu_for_node(t); + if (cpu >= 0) { + cpu_topology[cpu].cluster_id = cluster_id; + cpu_topology[cpu].core_id = core_id; + cpu_topology[cpu].thread_id = i; + } else { + pr_err("%s: Can't get CPU for thread\n", + t->full_name); + of_node_put(t); + return -EINVAL; + } + of_node_put(t); + } + i++; + } while (t); + + cpu = get_cpu_for_node(core); + if (cpu >= 0) { + if (!leaf) { + pr_err("%s: Core has both threads and CPU\n", + core->full_name); + return -EINVAL; + } + + cpu_topology[cpu].cluster_id = cluster_id; + cpu_topology[cpu].core_id = core_id; + } else if (leaf) { + pr_err("%s: Can't get CPU for leaf core\n", core->full_name); + return -EINVAL; + } + + return 0; +} + +static int __init parse_cluster(struct device_node *cluster, int depth) +{ + char name[10]; + bool leaf = true; + bool has_cores = false; + struct device_node *c; + static int cluster_id __initdata; + int core_id = 0; + int i, ret; + + /* + * First check for child clusters; we currently ignore any + * information about the nesting of clusters and present the + * scheduler with a flat list of them. + */ + i = 0; + do { + snprintf(name, sizeof(name), "cluster%d", i); + c = of_get_child_by_name(cluster, name); + if (c) { + leaf = false; + ret = parse_cluster(c, depth + 1); + of_node_put(c); + if (ret != 0) + return ret; + } + i++; + } while (c); + + /* Now check for cores */ + i = 0; + do { + snprintf(name, sizeof(name), "core%d", i); + c = of_get_child_by_name(cluster, name); + if (c) { + has_cores = true; + + if (depth == 0) { + pr_err("%s: cpu-map children should be clusters\n", + c->full_name); + of_node_put(c); + return -EINVAL; + } + + if (leaf) { + ret = parse_core(c, cluster_id, core_id++); + } else { + pr_err("%s: Non-leaf cluster with core %s\n", + cluster->full_name, name); + ret = -EINVAL; + } + + of_node_put(c); + if (ret != 0) + return ret; + } + i++; + } while (c); + + if (leaf && !has_cores) + pr_warn("%s: empty cluster\n", cluster->full_name); + + if (leaf) + cluster_id++; + + return 0; +} + +static int __init parse_dt_topology(void) +{ + struct device_node *cn, *map; + int ret = 0; + int cpu; + + cn = of_find_node_by_path("/cpus"); + if (!cn) { + pr_err("No CPU information found in DT\n"); + return 0; + } + + /* + * When topology is provided cpu-map is essentially a root + * cluster with restricted subnodes. + */ + map = of_get_child_by_name(cn, "cpu-map"); + if (!map) + goto out; + + ret = parse_cluster(map, 0); + if (ret != 0) + goto out_map; + + /* + * Check that all cores are in the topology; the SMP code will + * only mark cores described in the DT as possible. + */ + for_each_possible_cpu(cpu) { + if (cpu_topology[cpu].cluster_id == -1) { + pr_err("CPU%d: No topology information specified\n", + cpu); + ret = -EINVAL; + } + } + +out_map: + of_node_put(map); +out: + of_node_put(cn); + return ret; +} + +/* + * cpu topology table + */ +struct cpu_topology cpu_topology[NR_CPUS]; +EXPORT_SYMBOL_GPL(cpu_topology); + +const struct cpumask *cpu_coregroup_mask(int cpu) +{ + return &cpu_topology[cpu].core_sibling; +} + +static void update_siblings_masks(unsigned int cpuid) +{ + struct cpu_topology *cpu_topo, *cpuid_topo = &cpu_topology[cpuid]; + int cpu; + + if (cpuid_topo->cluster_id == -1) { + /* + * DT does not contain topology information for this cpu. + */ + pr_debug("CPU%u: No topology information configured\n", cpuid); + return; + } + + /* update core and thread sibling masks */ + for_each_possible_cpu(cpu) { + cpu_topo = &cpu_topology[cpu]; + + if (cpuid_topo->cluster_id != cpu_topo->cluster_id) + continue; + + cpumask_set_cpu(cpuid, &cpu_topo->core_sibling); + if (cpu != cpuid) + cpumask_set_cpu(cpu, &cpuid_topo->core_sibling); + + if (cpuid_topo->core_id != cpu_topo->core_id) + continue; + + cpumask_set_cpu(cpuid, &cpu_topo->thread_sibling); + if (cpu != cpuid) + cpumask_set_cpu(cpu, &cpuid_topo->thread_sibling); + } +} + +void store_cpu_topology(unsigned int cpuid) +{ + update_siblings_masks(cpuid); +} + +static void __init reset_cpu_topology(void) +{ + unsigned int cpu; + + for_each_possible_cpu(cpu) { + struct cpu_topology *cpu_topo = &cpu_topology[cpu]; + + cpu_topo->thread_id = -1; + cpu_topo->core_id = 0; + cpu_topo->cluster_id = -1; + + cpumask_clear(&cpu_topo->core_sibling); + cpumask_set_cpu(cpu, &cpu_topo->core_sibling); + cpumask_clear(&cpu_topo->thread_sibling); + cpumask_set_cpu(cpu, &cpu_topo->thread_sibling); + } +} + +void __init init_cpu_topology(void) +{ + reset_cpu_topology(); + + /* + * Discard anything that was parsed if we hit an error so we + * don't use partial information. + */ + if (parse_dt_topology()) + reset_cpu_topology(); +} diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 3883f842434..c43cfa9b830 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -32,6 +32,7 @@ #include <linux/syscalls.h> #include <asm/atomic.h> +#include <asm/debug-monitors.h> #include <asm/traps.h> #include <asm/stacktrace.h> #include <asm/exception.h> @@ -167,13 +168,6 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) } } -void dump_stack(void) -{ - dump_backtrace(NULL, NULL); -} - -EXPORT_SYMBOL(dump_stack); - void show_stack(struct task_struct *tsk, unsigned long *sp) { dump_backtrace(NULL, tsk); @@ -242,7 +236,7 @@ void die(const char *str, struct pt_regs *regs, int err) crash_kexec(regs); bust_spinlocks(0); - add_taint(TAINT_DIE); + add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE); raw_spin_unlock_irq(&die_lock); oops_exit(); @@ -257,10 +251,13 @@ void die(const char *str, struct pt_regs *regs, int err) void arm64_notify_die(const char *str, struct pt_regs *regs, struct siginfo *info, int err) { - if (user_mode(regs)) + if (user_mode(regs)) { + current->thread.fault_address = 0; + current->thread.fault_code = err; force_sig_info(info->si_signo, info, current); - else + } else { die(str, regs, err); + } } asmlinkage void __exception do_undefinstr(struct pt_regs *regs) @@ -268,13 +265,12 @@ asmlinkage void __exception do_undefinstr(struct pt_regs *regs) siginfo_t info; void __user *pc = (void __user *)instruction_pointer(regs); -#ifdef CONFIG_COMPAT /* check for AArch32 breakpoint instructions */ - if (compat_user_mode(regs) && aarch32_break_trap(regs) == 0) + if (!aarch32_break_handler(regs)) return; -#endif - if (show_unhandled_signals) { + if (show_unhandled_signals && unhandled_signal(current, SIGILL) && + printk_ratelimit()) { pr_info("%s[%d]: undefined instruction: pc=%p\n", current->comm, task_pid_nr(current), pc); dump_instr(KERN_INFO, regs); @@ -301,7 +297,7 @@ asmlinkage long do_ni_syscall(struct pt_regs *regs) } #endif - if (show_unhandled_signals) { + if (show_unhandled_signals && printk_ratelimit()) { pr_info("%s[%d]: syscall %d\n", current->comm, task_pid_nr(current), (int)regs->syscallno); dump_instr("", regs); @@ -317,14 +313,20 @@ asmlinkage long do_ni_syscall(struct pt_regs *regs) */ asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr) { + siginfo_t info; + void __user *pc = (void __user *)instruction_pointer(regs); console_verbose(); pr_crit("Bad mode in %s handler detected, code 0x%08x\n", handler[reason], esr); + __show_regs(regs); + + info.si_signo = SIGILL; + info.si_errno = 0; + info.si_code = ILL_ILLOPC; + info.si_addr = pc; - die("Oops - bad mode", regs, 0); - local_irq_disable(); - panic("bad mode"); + arm64_notify_die("Oops - bad mode", regs, &info, 0); } void __pte_error(const char *file, int line, unsigned long val) diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 6a389dc1bd4..50384fec56c 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -58,7 +58,10 @@ static struct page *vectors_page[1]; static int alloc_vectors_page(void) { extern char __kuser_helper_start[], __kuser_helper_end[]; + extern char __aarch32_sigret_code_start[], __aarch32_sigret_code_end[]; + int kuser_sz = __kuser_helper_end - __kuser_helper_start; + int sigret_sz = __aarch32_sigret_code_end - __aarch32_sigret_code_start; unsigned long vpage; vpage = get_zeroed_page(GFP_ATOMIC); @@ -72,7 +75,7 @@ static int alloc_vectors_page(void) /* sigreturn code */ memcpy((void *)vpage + AARCH32_KERN_SIGRET_CODE_OFFSET, - aarch32_sigret_code, sizeof(aarch32_sigret_code)); + __aarch32_sigret_code_start, sigret_sz); flush_icache_range(vpage, vpage + PAGE_SIZE); vectors_page[0] = virt_to_page(vpage); @@ -103,49 +106,31 @@ int aarch32_setup_vectors_page(struct linux_binprm *bprm, int uses_interp) static int __init vdso_init(void) { - struct page *pg; - char *vbase; - int i, ret = 0; + int i; + + if (memcmp(&vdso_start, "\177ELF", 4)) { + pr_err("vDSO is not a valid ELF object!\n"); + return -EINVAL; + } vdso_pages = (&vdso_end - &vdso_start) >> PAGE_SHIFT; pr_info("vdso: %ld pages (%ld code, %ld data) at base %p\n", vdso_pages + 1, vdso_pages, 1L, &vdso_start); /* Allocate the vDSO pagelist, plus a page for the data. */ - vdso_pagelist = kzalloc(sizeof(struct page *) * (vdso_pages + 1), + vdso_pagelist = kcalloc(vdso_pages + 1, sizeof(struct page *), GFP_KERNEL); - if (vdso_pagelist == NULL) { - pr_err("Failed to allocate vDSO pagelist!\n"); + if (vdso_pagelist == NULL) return -ENOMEM; - } /* Grab the vDSO code pages. */ - for (i = 0; i < vdso_pages; i++) { - pg = virt_to_page(&vdso_start + i*PAGE_SIZE); - ClearPageReserved(pg); - get_page(pg); - vdso_pagelist[i] = pg; - } - - /* Sanity check the shared object header. */ - vbase = vmap(vdso_pagelist, 1, 0, PAGE_KERNEL); - if (vbase == NULL) { - pr_err("Failed to map vDSO pagelist!\n"); - return -ENOMEM; - } else if (memcmp(vbase, "\177ELF", 4)) { - pr_err("vDSO is not a valid ELF object!\n"); - ret = -EINVAL; - goto unmap; - } + for (i = 0; i < vdso_pages; i++) + vdso_pagelist[i] = virt_to_page(&vdso_start + i * PAGE_SIZE); /* Grab the vDSO data page. */ - pg = virt_to_page(vdso_data); - get_page(pg); - vdso_pagelist[i] = pg; + vdso_pagelist[i] = virt_to_page(vdso_data); -unmap: - vunmap(vbase); - return ret; + return 0; } arch_initcall(vdso_init); @@ -235,6 +220,8 @@ void update_vsyscall(struct timekeeper *tk) vdso_data->use_syscall = use_syscall; vdso_data->xtime_coarse_sec = xtime_coarse.tv_sec; vdso_data->xtime_coarse_nsec = xtime_coarse.tv_nsec; + vdso_data->wtm_clock_sec = tk->wall_to_monotonic.tv_sec; + vdso_data->wtm_clock_nsec = tk->wall_to_monotonic.tv_nsec; if (!use_syscall) { vdso_data->cs_cycle_last = tk->clock->cycle_last; @@ -242,8 +229,6 @@ void update_vsyscall(struct timekeeper *tk) vdso_data->xtime_clock_nsec = tk->xtime_nsec; vdso_data->cs_mult = tk->mult; vdso_data->cs_shift = tk->shift; - vdso_data->wtm_clock_sec = tk->wall_to_monotonic.tv_sec; - vdso_data->wtm_clock_nsec = tk->wall_to_monotonic.tv_nsec; } smp_wmb(); diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index d8064af42e6..6d20b7d162d 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -48,7 +48,7 @@ $(obj-vdso): %.o: %.S # Actual build commands quiet_cmd_vdsold = VDSOL $@ - cmd_vdsold = $(CC) $(c_flags) -Wl,-T $^ -o $@ + cmd_vdsold = $(CC) $(c_flags) -Wl,-n -Wl,-T $^ -o $@ quiet_cmd_vdsoas = VDSOA $@ cmd_vdsoas = $(CC) $(a_flags) -c -o $@ $< diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S index f0a6d10b521..fe652ffd34c 100644 --- a/arch/arm64/kernel/vdso/gettimeofday.S +++ b/arch/arm64/kernel/vdso/gettimeofday.S @@ -103,6 +103,8 @@ ENTRY(__kernel_clock_gettime) bl __do_get_tspec seqcnt_check w9, 1b + mov x30, x2 + cmp w0, #CLOCK_MONOTONIC b.ne 6f @@ -118,6 +120,9 @@ ENTRY(__kernel_clock_gettime) ccmp w0, #CLOCK_MONOTONIC_COARSE, #0x4, ne b.ne 8f + /* xtime_coarse_nsec is already right-shifted */ + mov x12, #0 + /* Get coarse timespec. */ adr vdso_data, _vdso_data 3: seqcnt_acquire @@ -156,7 +161,7 @@ ENTRY(__kernel_clock_gettime) lsr x11, x11, x12 stp x10, x11, [x1, #TSPEC_TV_SEC] mov x0, xzr - ret x2 + ret 7: mov x30, x2 8: /* Syscall fallback. */ diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 3fae2be8b01..f1e6d5c032e 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -13,10 +13,23 @@ #define ARM_EXIT_DISCARD(x) x OUTPUT_ARCH(aarch64) -ENTRY(stext) +ENTRY(_text) jiffies = jiffies_64; +#define HYPERVISOR_TEXT \ + /* \ + * Force the alignment to be compatible with \ + * the vectors requirements \ + */ \ + . = ALIGN(2048); \ + VMLINUX_SYMBOL(__hyp_idmap_text_start) = .; \ + *(.hyp.idmap.text) \ + VMLINUX_SYMBOL(__hyp_idmap_text_end) = .; \ + VMLINUX_SYMBOL(__hyp_text_start) = .; \ + *(.hyp.text) \ + VMLINUX_SYMBOL(__hyp_text_end) = .; + SECTIONS { /* @@ -41,7 +54,6 @@ SECTIONS } .text : { /* Real text segment */ _stext = .; /* Text and read-only data */ - *(.smp.pen.text) __exception_text_start = .; *(.exception.text) __exception_text_end = .; @@ -49,6 +61,7 @@ SECTIONS TEXT_TEXT SCHED_TEXT LOCK_TEXT + HYPERVISOR_TEXT *(.fixup) *(.gnu.warning) . = ALIGN(16); @@ -56,7 +69,8 @@ SECTIONS } RO_DATA(PAGE_SIZE) - + EXCEPTION_TABLE(8) + NOTES _etext = .; /* End of text and rodata section */ . = ALIGN(PAGE_SIZE); @@ -82,45 +96,21 @@ SECTIONS PERCPU_SECTION(64) __init_end = .; - . = ALIGN(THREAD_SIZE); - __data_loc = .; - - .data : AT(__data_loc) { - _data = .; /* address in memory */ - _sdata = .; - - /* - * first, the init task union, aligned - * to an 8192 byte boundary. - */ - INIT_TASK_DATA(THREAD_SIZE) - NOSAVE_DATA - CACHELINE_ALIGNED_DATA(64) - READ_MOSTLY_DATA(64) - - /* - * The exception fixup table (might need resorting at runtime) - */ - . = ALIGN(32); - __start___ex_table = .; - *(__ex_table) - __stop___ex_table = .; - - /* - * and the usual data section - */ - DATA_DATA - CONSTRUCTORS - - _edata = .; - } - _edata_loc = __data_loc + SIZEOF(.data); - NOTES + . = ALIGN(PAGE_SIZE); + _data = .; + _sdata = .; + RW_DATA_SECTION(64, PAGE_SIZE, THREAD_SIZE) + _edata = .; BSS_SECTION(0, 0, 0) _end = .; STABS_DEBUG - .comment 0 : { *(.comment) } } + +/* + * The HYP init code can't be more than a page long. + */ +ASSERT(((__hyp_idmap_text_start + PAGE_SIZE) > __hyp_idmap_text_end), + "HYP init code too big") |
